diff options
Diffstat (limited to 'drivers')
128 files changed, 18449 insertions, 1982 deletions
diff --git a/drivers/Kconfig b/drivers/Kconfig index 46b4a8e0f859..5a89e409ad18 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -188,4 +188,10 @@ source "drivers/nvdimm/Kconfig" source "drivers/nvmem/Kconfig" +source "drivers/hwtracing/stm/Kconfig" + +source "drivers/hwtracing/intel_th/Kconfig" + +source "drivers/fpga/Kconfig" + endmenu diff --git a/drivers/Makefile b/drivers/Makefile index b250b36b54f2..7064bf476c2a 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -165,5 +165,8 @@ obj-$(CONFIG_PERF_EVENTS) += perf/ obj-$(CONFIG_RAS) += ras/ obj-$(CONFIG_THUNDERBOLT) += thunderbolt/ obj-$(CONFIG_CORESIGHT) += hwtracing/coresight/ +obj-y += hwtracing/intel_th/ +obj-$(CONFIG_STM) += hwtracing/stm/ obj-$(CONFIG_ANDROID) += android/ obj-$(CONFIG_NVMEM) += nvmem/ +obj-$(CONFIG_FPGA) += fpga/ diff --git a/drivers/char/efirtc.c b/drivers/char/efirtc.c index e39e7402e623..dc62568b7dde 100644 --- a/drivers/char/efirtc.c +++ b/drivers/char/efirtc.c @@ -30,7 +30,6 @@ #include <linux/types.h> #include <linux/errno.h> #include <linux/miscdevice.h> -#include <linux/module.h> #include <linux/init.h> #include <linux/rtc.h> #include <linux/proc_fs.h> @@ -395,14 +394,8 @@ efi_rtc_init(void) } return 0; } +device_initcall(efi_rtc_init); -static void __exit -efi_rtc_exit(void) -{ - /* not yet used */ -} - -module_init(efi_rtc_init); -module_exit(efi_rtc_exit); - +/* MODULE_LICENSE("GPL"); +*/ diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c index 5c0baa9ffc64..240b6cf1d97c 100644 --- a/drivers/char/hpet.c +++ b/drivers/char/hpet.c @@ -12,7 +12,6 @@ */ #include <linux/interrupt.h> -#include <linux/module.h> #include <linux/kernel.h> #include <linux/types.h> #include <linux/miscdevice.h> @@ -1043,24 +1042,16 @@ static int hpet_acpi_add(struct acpi_device *device) return hpet_alloc(&data); } -static int hpet_acpi_remove(struct acpi_device *device) -{ - /* XXX need to unregister clocksource, dealloc mem, etc */ - return -EINVAL; -} - static const struct acpi_device_id hpet_device_ids[] = { {"PNP0103", 0}, {"", 0}, }; -MODULE_DEVICE_TABLE(acpi, hpet_device_ids); static struct acpi_driver hpet_acpi_driver = { .name = "hpet", .ids = hpet_device_ids, .ops = { .add = hpet_acpi_add, - .remove = hpet_acpi_remove, }, }; @@ -1086,19 +1077,9 @@ static int __init hpet_init(void) return 0; } +device_initcall(hpet_init); -static void __exit hpet_exit(void) -{ - acpi_bus_unregister_driver(&hpet_acpi_driver); - - if (sysctl_header) - unregister_sysctl_table(sysctl_header); - misc_deregister(&hpet_misc); - - return; -} - -module_init(hpet_init); -module_exit(hpet_exit); +/* MODULE_AUTHOR("Bob Picco <Robert.Picco@hp.com>"); MODULE_LICENSE("GPL"); +*/ diff --git a/drivers/char/snsc.c b/drivers/char/snsc.c index 8a80ead8d316..94006f9c2e43 100644 --- a/drivers/char/snsc.c +++ b/drivers/char/snsc.c @@ -19,7 +19,7 @@ #include <linux/sched.h> #include <linux/device.h> #include <linux/poll.h> -#include <linux/module.h> +#include <linux/init.h> #include <linux/slab.h> #include <linux/mutex.h> #include <asm/sn/io.h> @@ -461,5 +461,4 @@ scdrv_init(void) } return 0; } - -module_init(scdrv_init); +device_initcall(scdrv_init); diff --git a/drivers/dma/mic_x100_dma.c b/drivers/dma/mic_x100_dma.c index 74d9db05a5ad..068e920ecb68 100644 --- a/drivers/dma/mic_x100_dma.c +++ b/drivers/dma/mic_x100_dma.c @@ -193,8 +193,16 @@ static void mic_dma_prog_intr(struct mic_dma_chan *ch) static int mic_dma_do_dma(struct mic_dma_chan *ch, int flags, dma_addr_t src, dma_addr_t dst, size_t len) { - if (-ENOMEM == mic_dma_prog_memcpy_desc(ch, src, dst, len)) + if (len && -ENOMEM == mic_dma_prog_memcpy_desc(ch, src, dst, len)) { return -ENOMEM; + } else { + /* 3 is the maximum number of status descriptors */ + int ret = mic_dma_avail_desc_ring_space(ch, 3); + + if (ret < 0) + return ret; + } + /* Above mic_dma_prog_memcpy_desc() makes sure we have enough space */ if (flags & DMA_PREP_FENCE) { mic_dma_prep_status_desc(&ch->desc_ring[ch->head], 0, @@ -270,6 +278,33 @@ allocate_tx(struct mic_dma_chan *ch) return tx; } +/* Program a status descriptor with dst as address and value to be written */ +static struct dma_async_tx_descriptor * +mic_dma_prep_status_lock(struct dma_chan *ch, dma_addr_t dst, u64 src_val, + unsigned long flags) +{ + struct mic_dma_chan *mic_ch = to_mic_dma_chan(ch); + int result; + + spin_lock(&mic_ch->prep_lock); + result = mic_dma_avail_desc_ring_space(mic_ch, 4); + if (result < 0) + goto error; + mic_dma_prep_status_desc(&mic_ch->desc_ring[mic_ch->head], src_val, dst, + false); + mic_dma_hw_ring_inc_head(mic_ch); + result = mic_dma_do_dma(mic_ch, flags, 0, 0, 0); + if (result < 0) + goto error; + + return allocate_tx(mic_ch); +error: + dev_err(mic_dma_ch_to_device(mic_ch), + "Error enqueueing dma status descriptor, error=%d\n", result); + spin_unlock(&mic_ch->prep_lock); + return NULL; +} + /* * Prepare a memcpy descriptor to be added to the ring. * Note that the temporary descriptor adds an extra overhead of copying the @@ -587,6 +622,8 @@ static int mic_dma_register_dma_device(struct mic_dma_device *mic_dma_dev, mic_dma_free_chan_resources; mic_dma_dev->dma_dev.device_tx_status = mic_dma_tx_status; mic_dma_dev->dma_dev.device_prep_dma_memcpy = mic_dma_prep_memcpy_lock; + mic_dma_dev->dma_dev.device_prep_dma_imm_data = + mic_dma_prep_status_lock; mic_dma_dev->dma_dev.device_prep_dma_interrupt = mic_dma_prep_interrupt_lock; mic_dma_dev->dma_dev.device_issue_pending = mic_dma_issue_pending; diff --git a/drivers/fpga/Kconfig b/drivers/fpga/Kconfig new file mode 100644 index 000000000000..dfc1f1ec093b --- /dev/null +++ b/drivers/fpga/Kconfig @@ -0,0 +1,24 @@ +# +# FPGA framework configuration +# + +menu "FPGA Configuration Support" + +config FPGA + tristate "FPGA Configuration Framework" + help + Say Y here if you want support for configuring FPGAs from the + kernel. The FPGA framework adds a FPGA manager class and FPGA + manager drivers. + +if FPGA + +config FPGA_MGR_SOCFPGA + tristate "Altera SOCFPGA FPGA Manager" + depends on ARCH_SOCFPGA + help + FPGA manager driver support for Altera SOCFPGA. + +endif # FPGA + +endmenu diff --git a/drivers/fpga/Makefile b/drivers/fpga/Makefile new file mode 100644 index 000000000000..ba6c5c597a4d --- /dev/null +++ b/drivers/fpga/Makefile @@ -0,0 +1,9 @@ +# +# Makefile for the fpga framework and fpga manager drivers. +# + +# Core FPGA Manager Framework +obj-$(CONFIG_FPGA) += fpga-mgr.o + +# FPGA Manager Drivers +obj-$(CONFIG_FPGA_MGR_SOCFPGA) += socfpga.o diff --git a/drivers/fpga/fpga-mgr.c b/drivers/fpga/fpga-mgr.c new file mode 100644 index 000000000000..25261636687c --- /dev/null +++ b/drivers/fpga/fpga-mgr.c @@ -0,0 +1,382 @@ +/* + * FPGA Manager Core + * + * Copyright (C) 2013-2015 Altera Corporation + * + * With code from the mailing list: + * Copyright (C) 2013 Xilinx, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + */ +#include <linux/firmware.h> +#include <linux/fpga/fpga-mgr.h> +#include <linux/idr.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/mutex.h> +#include <linux/slab.h> + +static DEFINE_IDA(fpga_mgr_ida); +static struct class *fpga_mgr_class; + +/** + * fpga_mgr_buf_load - load fpga from image in buffer + * @mgr: fpga manager + * @flags: flags setting fpga confuration modes + * @buf: buffer contain fpga image + * @count: byte count of buf + * + * Step the low level fpga manager through the device-specific steps of getting + * an FPGA ready to be configured, writing the image to it, then doing whatever + * post-configuration steps necessary. + * + * Return: 0 on success, negative error code otherwise. + */ +int fpga_mgr_buf_load(struct fpga_manager *mgr, u32 flags, const char *buf, + size_t count) +{ + struct device *dev = &mgr->dev; + int ret; + + if (!mgr) + return -ENODEV; + + /* + * Call the low level driver's write_init function. This will do the + * device-specific things to get the FPGA into the state where it is + * ready to receive an FPGA image. + */ + mgr->state = FPGA_MGR_STATE_WRITE_INIT; + ret = mgr->mops->write_init(mgr, flags, buf, count); + if (ret) { + dev_err(dev, "Error preparing FPGA for writing\n"); + mgr->state = FPGA_MGR_STATE_WRITE_INIT_ERR; + return ret; + } + + /* + * Write the FPGA image to the FPGA. + */ + mgr->state = FPGA_MGR_STATE_WRITE; + ret = mgr->mops->write(mgr, buf, count); + if (ret) { + dev_err(dev, "Error while writing image data to FPGA\n"); + mgr->state = FPGA_MGR_STATE_WRITE_ERR; + return ret; + } + + /* + * After all the FPGA image has been written, do the device specific + * steps to finish and set the FPGA into operating mode. + */ + mgr->state = FPGA_MGR_STATE_WRITE_COMPLETE; + ret = mgr->mops->write_complete(mgr, flags); + if (ret) { + dev_err(dev, "Error after writing image data to FPGA\n"); + mgr->state = FPGA_MGR_STATE_WRITE_COMPLETE_ERR; + return ret; + } + mgr->state = FPGA_MGR_STATE_OPERATING; + + return 0; +} +EXPORT_SYMBOL_GPL(fpga_mgr_buf_load); + +/** + * fpga_mgr_firmware_load - request firmware and load to fpga + * @mgr: fpga manager + * @flags: flags setting fpga confuration modes + * @image_name: name of image file on the firmware search path + * + * Request an FPGA image using the firmware class, then write out to the FPGA. + * Update the state before each step to provide info on what step failed if + * there is a failure. + * + * Return: 0 on success, negative error code otherwise. + */ +int fpga_mgr_firmware_load(struct fpga_manager *mgr, u32 flags, + const char *image_name) +{ + struct device *dev = &mgr->dev; + const struct firmware *fw; + int ret; + + if (!mgr) + return -ENODEV; + + dev_info(dev, "writing %s to %s\n", image_name, mgr->name); + + mgr->state = FPGA_MGR_STATE_FIRMWARE_REQ; + + ret = request_firmware(&fw, image_name, dev); + if (ret) { + mgr->state = FPGA_MGR_STATE_FIRMWARE_REQ_ERR; + dev_err(dev, "Error requesting firmware %s\n", image_name); + return ret; + } + + ret = fpga_mgr_buf_load(mgr, flags, fw->data, fw->size); + if (ret) + return ret; + + release_firmware(fw); + + return 0; +} +EXPORT_SYMBOL_GPL(fpga_mgr_firmware_load); + +static const char * const state_str[] = { + [FPGA_MGR_STATE_UNKNOWN] = "unknown", + [FPGA_MGR_STATE_POWER_OFF] = "power off", + [FPGA_MGR_STATE_POWER_UP] = "power up", + [FPGA_MGR_STATE_RESET] = "reset", + + /* requesting FPGA image from firmware */ + [FPGA_MGR_STATE_FIRMWARE_REQ] = "firmware request", + [FPGA_MGR_STATE_FIRMWARE_REQ_ERR] = "firmware request error", + + /* Preparing FPGA to receive image */ + [FPGA_MGR_STATE_WRITE_INIT] = "write init", + [FPGA_MGR_STATE_WRITE_INIT_ERR] = "write init error", + + /* Writing image to FPGA */ + [FPGA_MGR_STATE_WRITE] = "write", + [FPGA_MGR_STATE_WRITE_ERR] = "write error", + + /* Finishing configuration after image has been written */ + [FPGA_MGR_STATE_WRITE_COMPLETE] = "write complete", + [FPGA_MGR_STATE_WRITE_COMPLETE_ERR] = "write complete error", + + /* FPGA reports to be in normal operating mode */ + [FPGA_MGR_STATE_OPERATING] = "operating", +}; + +static ssize_t name_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct fpga_manager *mgr = to_fpga_manager(dev); + + return sprintf(buf, "%s\n", mgr->name); +} + +static ssize_t state_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct fpga_manager *mgr = to_fpga_manager(dev); + + return sprintf(buf, "%s\n", state_str[mgr->state]); +} + +static DEVICE_ATTR_RO(name); +static DEVICE_ATTR_RO(state); + +static struct attribute *fpga_mgr_attrs[] = { + &dev_attr_name.attr, + &dev_attr_state.attr, + NULL, +}; +ATTRIBUTE_GROUPS(fpga_mgr); + +static int fpga_mgr_of_node_match(struct device *dev, const void *data) +{ + return dev->of_node == data; +} + +/** + * of_fpga_mgr_get - get an exclusive reference to a fpga mgr + * @node: device node + * + * Given a device node, get an exclusive reference to a fpga mgr. + * + * Return: fpga manager struct or IS_ERR() condition containing error code. + */ +struct fpga_manager *of_fpga_mgr_get(struct device_node *node) +{ + struct fpga_manager *mgr; + struct device *dev; + + if (!node) + return ERR_PTR(-EINVAL); + + dev = class_find_device(fpga_mgr_class, NULL, node, + fpga_mgr_of_node_match); + if (!dev) + return ERR_PTR(-ENODEV); + + mgr = to_fpga_manager(dev); + put_device(dev); + if (!mgr) + return ERR_PTR(-ENODEV); + + /* Get exclusive use of fpga manager */ + if (!mutex_trylock(&mgr->ref_mutex)) + return ERR_PTR(-EBUSY); + + if (!try_module_get(THIS_MODULE)) { + mutex_unlock(&mgr->ref_mutex); + return ERR_PTR(-ENODEV); + } + + return mgr; +} +EXPORT_SYMBOL_GPL(of_fpga_mgr_get); + +/** + * fpga_mgr_put - release a reference to a fpga manager + * @mgr: fpga manager structure + */ +void fpga_mgr_put(struct fpga_manager *mgr) +{ + if (mgr) { + module_put(THIS_MODULE); + mutex_unlock(&mgr->ref_mutex); + } +} +EXPORT_SYMBOL_GPL(fpga_mgr_put); + +/** + * fpga_mgr_register - register a low level fpga manager driver + * @dev: fpga manager device from pdev + * @name: fpga manager name + * @mops: pointer to structure of fpga manager ops + * @priv: fpga manager private data + * + * Return: 0 on success, negative error code otherwise. + */ +int fpga_mgr_register(struct device *dev, const char *name, + const struct fpga_manager_ops *mops, + void *priv) +{ + struct fpga_manager *mgr; + const char *dt_label; + int id, ret; + + if (!mops || !mops->write_init || !mops->write || + !mops->write_complete || !mops->state) { + dev_err(dev, "Attempt to register without fpga_manager_ops\n"); + return -EINVAL; + } + + if (!name || !strlen(name)) { + dev_err(dev, "Attempt to register with no name!\n"); + return -EINVAL; + } + + mgr = kzalloc(sizeof(*mgr), GFP_KERNEL); + if (!mgr) + return -ENOMEM; + + id = ida_simple_get(&fpga_mgr_ida, 0, 0, GFP_KERNEL); + if (id < 0) { + ret = id; + goto error_kfree; + } + + mutex_init(&mgr->ref_mutex); + + mgr->name = name; + mgr->mops = mops; + mgr->priv = priv; + + /* + * Initialize framework state by requesting low level driver read state + * from device. FPGA may be in reset mode or may have been programmed + * by bootloader or EEPROM. + */ + mgr->state = mgr->mops->state(mgr); + + device_initialize(&mgr->dev); + mgr->dev.class = fpga_mgr_class; + mgr->dev.parent = dev; + mgr->dev.of_node = dev->of_node; + mgr->dev.id = id; + dev_set_drvdata(dev, mgr); + + dt_label = of_get_property(mgr->dev.of_node, "label", NULL); + if (dt_label) + ret = dev_set_name(&mgr->dev, "%s", dt_label); + else + ret = dev_set_name(&mgr->dev, "fpga%d", id); + + ret = device_add(&mgr->dev); + if (ret) + goto error_device; + + dev_info(&mgr->dev, "%s registered\n", mgr->name); + + return 0; + +error_device: + ida_simple_remove(&fpga_mgr_ida, id); +error_kfree: + kfree(mgr); + + return ret; +} +EXPORT_SYMBOL_GPL(fpga_mgr_register); + +/** + * fpga_mgr_unregister - unregister a low level fpga manager driver + * @dev: fpga manager device from pdev + */ +void fpga_mgr_unregister(struct device *dev) +{ + struct fpga_manager *mgr = dev_get_drvdata(dev); + + dev_info(&mgr->dev, "%s %s\n", __func__, mgr->name); + + /* + * If the low level driver provides a method for putting fpga into + * a desired state upon unregister, do it. + */ + if (mgr->mops->fpga_remove) + mgr->mops->fpga_remove(mgr); + + device_unregister(&mgr->dev); +} +EXPORT_SYMBOL_GPL(fpga_mgr_unregister); + +static void fpga_mgr_dev_release(struct device *dev) +{ + struct fpga_manager *mgr = to_fpga_manager(dev); + + ida_simple_remove(&fpga_mgr_ida, mgr->dev.id); + kfree(mgr); +} + +static int __init fpga_mgr_class_init(void) +{ + pr_info("FPGA manager framework\n"); + + fpga_mgr_class = class_create(THIS_MODULE, "fpga_manager"); + if (IS_ERR(fpga_mgr_class)) + return PTR_ERR(fpga_mgr_class); + + fpga_mgr_class->dev_groups = fpga_mgr_groups; + fpga_mgr_class->dev_release = fpga_mgr_dev_release; + + return 0; +} + +static void __exit fpga_mgr_class_exit(void) +{ + class_destroy(fpga_mgr_class); + ida_destroy(&fpga_mgr_ida); +} + +MODULE_AUTHOR("Alan Tull <atull@opensource.altera.com>"); +MODULE_DESCRIPTION("FPGA manager framework"); +MODULE_LICENSE("GPL v2"); + +subsys_initcall(fpga_mgr_class_init); +module_exit(fpga_mgr_class_exit); diff --git a/drivers/fpga/socfpga.c b/drivers/fpga/socfpga.c new file mode 100644 index 000000000000..706b80d25266 --- /dev/null +++ b/drivers/fpga/socfpga.c @@ -0,0 +1,616 @@ +/* + * FPGA Manager Driver for Altera SOCFPGA + * + * Copyright (C) 2013-2015 Altera Corporation + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + */ +#include <linux/completion.h> +#include <linux/delay.h> +#include <linux/fpga/fpga-mgr.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/module.h> +#include <linux/of_address.h> +#include <linux/of_irq.h> +#include <linux/pm.h> + +/* Register offsets */ +#define SOCFPGA_FPGMGR_STAT_OFST 0x0 +#define SOCFPGA_FPGMGR_CTL_OFST 0x4 +#define SOCFPGA_FPGMGR_DCLKCNT_OFST 0x8 +#define SOCFPGA_FPGMGR_DCLKSTAT_OFST 0xc +#define SOCFPGA_FPGMGR_GPIO_INTEN_OFST 0x830 +#define SOCFPGA_FPGMGR_GPIO_INTMSK_OFST 0x834 +#define SOCFPGA_FPGMGR_GPIO_INTTYPE_LEVEL_OFST 0x838 +#define SOCFPGA_FPGMGR_GPIO_INT_POL_OFST 0x83c +#define SOCFPGA_FPGMGR_GPIO_INTSTAT_OFST 0x840 +#define SOCFPGA_FPGMGR_GPIO_RAW_INTSTAT_OFST 0x844 +#define SOCFPGA_FPGMGR_GPIO_PORTA_EOI_OFST 0x84c +#define SOCFPGA_FPGMGR_GPIO_EXT_PORTA_OFST 0x850 + +/* Register bit defines */ +/* SOCFPGA_FPGMGR_STAT register mode field values */ +#define SOCFPGA_FPGMGR_STAT_POWER_UP 0x0 /*ramping*/ +#define SOCFPGA_FPGMGR_STAT_RESET 0x1 +#define SOCFPGA_FPGMGR_STAT_CFG 0x2 +#define SOCFPGA_FPGMGR_STAT_INIT 0x3 +#define SOCFPGA_FPGMGR_STAT_USER_MODE 0x4 +#define SOCFPGA_FPGMGR_STAT_UNKNOWN 0x5 +#define SOCFPGA_FPGMGR_STAT_STATE_MASK 0x7 +/* This is a flag value that doesn't really happen in this register field */ +#define SOCFPGA_FPGMGR_STAT_POWER_OFF 0x0 + +#define MSEL_PP16_FAST_NOAES_NODC 0x0 +#define MSEL_PP16_FAST_AES_NODC 0x1 +#define MSEL_PP16_FAST_AESOPT_DC 0x2 +#define MSEL_PP16_SLOW_NOAES_NODC 0x4 +#define MSEL_PP16_SLOW_AES_NODC 0x5 +#define MSEL_PP16_SLOW_AESOPT_DC 0x6 +#define MSEL_PP32_FAST_NOAES_NODC 0x8 +#define MSEL_PP32_FAST_AES_NODC 0x9 +#define MSEL_PP32_FAST_AESOPT_DC 0xa +#define MSEL_PP32_SLOW_NOAES_NODC 0xc +#define MSEL_PP32_SLOW_AES_NODC 0xd +#define MSEL_PP32_SLOW_AESOPT_DC 0xe +#define SOCFPGA_FPGMGR_STAT_MSEL_MASK 0x000000f8 +#define SOCFPGA_FPGMGR_STAT_MSEL_SHIFT 3 + +/* SOCFPGA_FPGMGR_CTL register */ +#define SOCFPGA_FPGMGR_CTL_EN 0x00000001 +#define SOCFPGA_FPGMGR_CTL_NCE 0x00000002 +#define SOCFPGA_FPGMGR_CTL_NCFGPULL 0x00000004 + +#define CDRATIO_X1 0x00000000 +#define CDRATIO_X2 0x00000040 +#define CDRATIO_X4 0x00000080 +#define CDRATIO_X8 0x000000c0 +#define SOCFPGA_FPGMGR_CTL_CDRATIO_MASK 0x000000c0 + +#define SOCFPGA_FPGMGR_CTL_AXICFGEN 0x00000100 + +#define CFGWDTH_16 0x00000000 +#define CFGWDTH_32 0x00000200 +#define SOCFPGA_FPGMGR_CTL_CFGWDTH_MASK 0x00000200 + +/* SOCFPGA_FPGMGR_DCLKSTAT register */ +#define SOCFPGA_FPGMGR_DCLKSTAT_DCNTDONE_E_DONE 0x1 + +/* SOCFPGA_FPGMGR_GPIO_* registers share the same bit positions */ +#define SOCFPGA_FPGMGR_MON_NSTATUS 0x0001 +#define SOCFPGA_FPGMGR_MON_CONF_DONE 0x0002 +#define SOCFPGA_FPGMGR_MON_INIT_DONE 0x0004 +#define SOCFPGA_FPGMGR_MON_CRC_ERROR 0x0008 +#define SOCFPGA_FPGMGR_MON_CVP_CONF_DONE 0x0010 +#define SOCFPGA_FPGMGR_MON_PR_READY 0x0020 +#define SOCFPGA_FPGMGR_MON_PR_ERROR 0x0040 +#define SOCFPGA_FPGMGR_MON_PR_DONE 0x0080 +#define SOCFPGA_FPGMGR_MON_NCONFIG_PIN 0x0100 +#define SOCFPGA_FPGMGR_MON_NSTATUS_PIN 0x0200 +#define SOCFPGA_FPGMGR_MON_CONF_DONE_PIN 0x0400 +#define SOCFPGA_FPGMGR_MON_FPGA_POWER_ON 0x0800 +#define SOCFPGA_FPGMGR_MON_STATUS_MASK 0x0fff + +#define SOCFPGA_FPGMGR_NUM_SUPPLIES 3 +#define SOCFPGA_RESUME_TIMEOUT 3 + +/* In power-up order. Reverse for power-down. */ +static const char *supply_names[SOCFPGA_FPGMGR_NUM_SUPPLIES] __maybe_unused = { + "FPGA-1.5V", + "FPGA-1.1V", + "FPGA-2.5V", +}; + +struct socfpga_fpga_priv { + void __iomem *fpga_base_addr; + void __iomem *fpga_data_addr; + struct completion status_complete; + int irq; +}; + +struct cfgmgr_mode { + /* Values to set in the CTRL register */ + u32 ctrl; + + /* flag that this table entry is a valid mode */ + bool valid; +}; + +/* For SOCFPGA_FPGMGR_STAT_MSEL field */ +static struct cfgmgr_mode cfgmgr_modes[] = { + [MSEL_PP16_FAST_NOAES_NODC] = { CFGWDTH_16 | CDRATIO_X1, 1 }, + [MSEL_PP16_FAST_AES_NODC] = { CFGWDTH_16 | CDRATIO_X2, 1 }, + [MSEL_PP16_FAST_AESOPT_DC] = { CFGWDTH_16 | CDRATIO_X4, 1 }, + [MSEL_PP16_SLOW_NOAES_NODC] = { CFGWDTH_16 | CDRATIO_X1, 1 }, + [MSEL_PP16_SLOW_AES_NODC] = { CFGWDTH_16 | CDRATIO_X2, 1 }, + [MSEL_PP16_SLOW_AESOPT_DC] = { CFGWDTH_16 | CDRATIO_X4, 1 }, + [MSEL_PP32_FAST_NOAES_NODC] = { CFGWDTH_32 | CDRATIO_X1, 1 }, + [MSEL_PP32_FAST_AES_NODC] = { CFGWDTH_32 | CDRATIO_X4, 1 }, + [MSEL_PP32_FAST_AESOPT_DC] = { CFGWDTH_32 | CDRATIO_X8, 1 }, + [MSEL_PP32_SLOW_NOAES_NODC] = { CFGWDTH_32 | CDRATIO_X1, 1 }, + [MSEL_PP32_SLOW_AES_NODC] = { CFGWDTH_32 | CDRATIO_X4, 1 }, + [MSEL_PP32_SLOW_AESOPT_DC] = { CFGWDTH_32 | CDRATIO_X8, 1 }, +}; + +static u32 socfpga_fpga_readl(struct socfpga_fpga_priv *priv, u32 reg_offset) +{ + return readl(priv->fpga_base_addr + reg_offset); +} + +static void socfpga_fpga_writel(struct socfpga_fpga_priv *priv, u32 reg_offset, + u32 value) +{ + writel(value, priv->fpga_base_addr + reg_offset); +} + +static u32 socfpga_fpga_raw_readl(struct socfpga_fpga_priv *priv, + u32 reg_offset) +{ + return __raw_readl(priv->fpga_base_addr + reg_offset); +} + +static void socfpga_fpga_raw_writel(struct socfpga_fpga_priv *priv, + u32 reg_offset, u32 value) +{ + __raw_writel(value, priv->fpga_base_addr + reg_offset); +} + +static void socfpga_fpga_data_writel(struct socfpga_fpga_priv *priv, u32 value) +{ + writel(value, priv->fpga_data_addr); +} + +static inline void socfpga_fpga_set_bitsl(struct socfpga_fpga_priv *priv, + u32 offset, u32 bits) +{ + u32 val; + + val = socfpga_fpga_readl(priv, offset); + val |= bits; + socfpga_fpga_writel(priv, offset, val); +} + +static inline void socfpga_fpga_clr_bitsl(struct socfpga_fpga_priv *priv, + u32 offset, u32 bits) +{ + u32 val; + + val = socfpga_fpga_readl(priv, offset); + val &= ~bits; + socfpga_fpga_writel(priv, offset, val); +} + +static u32 socfpga_fpga_mon_status_get(struct socfpga_fpga_priv *priv) +{ + return socfpga_fpga_readl(priv, SOCFPGA_FPGMGR_GPIO_EXT_PORTA_OFST) & + SOCFPGA_FPGMGR_MON_STATUS_MASK; +} + +static u32 socfpga_fpga_state_get(struct socfpga_fpga_priv *priv) +{ + u32 status = socfpga_fpga_mon_status_get(priv); + + if ((status & SOCFPGA_FPGMGR_MON_FPGA_POWER_ON) == 0) + return SOCFPGA_FPGMGR_STAT_POWER_OFF; + + return socfpga_fpga_readl(priv, SOCFPGA_FPGMGR_STAT_OFST) & + SOCFPGA_FPGMGR_STAT_STATE_MASK; +} + +static void socfpga_fpga_clear_done_status(struct socfpga_fpga_priv *priv) +{ + socfpga_fpga_writel(priv, SOCFPGA_FPGMGR_DCLKSTAT_OFST, + SOCFPGA_FPGMGR_DCLKSTAT_DCNTDONE_E_DONE); +} + +/* + * Set the DCLKCNT, wait for DCLKSTAT to report the count completed, and clear + * the complete status. + */ +static int socfpga_fpga_dclk_set_and_wait_clear(struct socfpga_fpga_priv *priv, + u32 count) +{ + int timeout = 2; + u32 done; + + /* Clear any existing DONE status. */ + if (socfpga_fpga_readl(priv, SOCFPGA_FPGMGR_DCLKSTAT_OFST)) + socfpga_fpga_clear_done_status(priv); + + /* Issue the DCLK count. */ + socfpga_fpga_writel(priv, SOCFPGA_FPGMGR_DCLKCNT_OFST, count); + + /* Poll DCLKSTAT to see if it completed in the timeout period. */ + do { + done = socfpga_fpga_readl(priv, SOCFPGA_FPGMGR_DCLKSTAT_OFST); + if (done == SOCFPGA_FPGMGR_DCLKSTAT_DCNTDONE_E_DONE) { + socfpga_fpga_clear_done_status(priv); + return 0; + } + udelay(1); + } while (timeout--); + + return -ETIMEDOUT; +} + +static int socfpga_fpga_wait_for_state(struct socfpga_fpga_priv *priv, + u32 state) +{ + int timeout = 2; + + /* + * HW doesn't support an interrupt for changes in state, so poll to see + * if it matches the requested state within the timeout period. + */ + do { + if ((socfpga_fpga_state_get(priv) & state) != 0) + return 0; + msleep(20); + } while (timeout--); + + return -ETIMEDOUT; +} + +static void socfpga_fpga_enable_irqs(struct socfpga_fpga_priv *priv, u32 irqs) +{ + /* set irqs to level sensitive */ + socfpga_fpga_writel(priv, SOCFPGA_FPGMGR_GPIO_INTTYPE_LEVEL_OFST, 0); + + /* set interrupt polarity */ + socfpga_fpga_writel(priv, SOCFPGA_FPGMGR_GPIO_INT_POL_OFST, irqs); + + /* clear irqs */ + socfpga_fpga_writel(priv, SOCFPGA_FPGMGR_GPIO_PORTA_EOI_OFST, irqs); + + /* unmask interrupts */ + socfpga_fpga_writel(priv, SOCFPGA_FPGMGR_GPIO_INTMSK_OFST, 0); + + /* enable interrupts */ + socfpga_fpga_writel(priv, SOCFPGA_FPGMGR_GPIO_INTEN_OFST, irqs); +} + +static void socfpga_fpga_disable_irqs(struct socfpga_fpga_priv *priv) +{ + socfpga_fpga_writel(priv, SOCFPGA_FPGMGR_GPIO_INTEN_OFST, 0); +} + +static irqreturn_t socfpga_fpga_isr(int irq, void *dev_id) +{ + struct socfpga_fpga_priv *priv = dev_id; + u32 irqs, st; + bool conf_done, nstatus; + + /* clear irqs */ + irqs = socfpga_fpga_raw_readl(priv, SOCFPGA_FPGMGR_GPIO_INTSTAT_OFST); + + socfpga_fpga_raw_writel(priv, SOCFPGA_FPGMGR_GPIO_PORTA_EOI_OFST, irqs); + + st = socfpga_fpga_raw_readl(priv, SOCFPGA_FPGMGR_GPIO_EXT_PORTA_OFST); + conf_done = (st & SOCFPGA_FPGMGR_MON_CONF_DONE) != 0; + nstatus = (st & SOCFPGA_FPGMGR_MON_NSTATUS) != 0; + + /* success */ + if (conf_done && nstatus) { + /* disable irqs */ + socfpga_fpga_raw_writel(priv, + SOCFPGA_FPGMGR_GPIO_INTEN_OFST, 0); + complete(&priv->status_complete); + } + + return IRQ_HANDLED; +} + +static int socfpga_fpga_wait_for_config_done(struct socfpga_fpga_priv *priv) +{ + int timeout, ret = 0; + + socfpga_fpga_disable_irqs(priv); + init_completion(&priv->status_complete); + socfpga_fpga_enable_irqs(priv, SOCFPGA_FPGMGR_MON_CONF_DONE); + + timeout = wait_for_completion_interruptible_timeout( + &priv->status_complete, + msecs_to_jiffies(10)); + if (timeout == 0) + ret = -ETIMEDOUT; + + socfpga_fpga_disable_irqs(priv); + return ret; +} + +static int socfpga_fpga_cfg_mode_get(struct socfpga_fpga_priv *priv) +{ + u32 msel; + + msel = socfpga_fpga_readl(priv, SOCFPGA_FPGMGR_STAT_OFST); + msel &= SOCFPGA_FPGMGR_STAT_MSEL_MASK; + msel >>= SOCFPGA_FPGMGR_STAT_MSEL_SHIFT; + + /* Check that this MSEL setting is supported */ + if ((msel >= ARRAY_SIZE(cfgmgr_modes)) || !cfgmgr_modes[msel].valid) + return -EINVAL; + + return msel; +} + +static int socfpga_fpga_cfg_mode_set(struct socfpga_fpga_priv *priv) +{ + u32 ctrl_reg; + int mode; + + /* get value from MSEL pins */ + mode = socfpga_fpga_cfg_mode_get(priv); + if (mode < 0) + return mode; + + /* Adjust CTRL for the CDRATIO */ + ctrl_reg = socfpga_fpga_readl(priv, SOCFPGA_FPGMGR_CTL_OFST); + ctrl_reg &= ~SOCFPGA_FPGMGR_CTL_CDRATIO_MASK; + ctrl_reg &= ~SOCFPGA_FPGMGR_CTL_CFGWDTH_MASK; + ctrl_reg |= cfgmgr_modes[mode].ctrl; + + /* Set NCE to 0. */ + ctrl_reg &= ~SOCFPGA_FPGMGR_CTL_NCE; + socfpga_fpga_writel(priv, SOCFPGA_FPGMGR_CTL_OFST, ctrl_reg); + + return 0; +} + +static int socfpga_fpga_reset(struct fpga_manager *mgr) +{ + struct socfpga_fpga_priv *priv = mgr->priv; + u32 ctrl_reg, status; + int ret; + + /* + * Step 1: + * - Set CTRL.CFGWDTH, CTRL.CDRATIO to match cfg mode + * - Set CTRL.NCE to 0 + */ + ret = socfpga_fpga_cfg_mode_set(priv); + if (ret) + return ret; + + /* Step 2: Set CTRL.EN to 1 */ + socfpga_fpga_set_bitsl(priv, SOCFPGA_FPGMGR_CTL_OFST, + SOCFPGA_FPGMGR_CTL_EN); + + /* Step 3: Set CTRL.NCONFIGPULL to 1 to put FPGA in reset */ + ctrl_reg = socfpga_fpga_readl(priv, SOCFPGA_FPGMGR_CTL_OFST); + ctrl_reg |= SOCFPGA_FPGMGR_CTL_NCFGPULL; + socfpga_fpga_writel(priv, SOCFPGA_FPGMGR_CTL_OFST, ctrl_reg); + + /* Step 4: Wait for STATUS.MODE to report FPGA is in reset phase */ + status = socfpga_fpga_wait_for_state(priv, SOCFPGA_FPGMGR_STAT_RESET); + + /* Step 5: Set CONTROL.NCONFIGPULL to 0 to release FPGA from reset */ + ctrl_reg &= ~SOCFPGA_FPGMGR_CTL_NCFGPULL; + socfpga_fpga_writel(priv, SOCFPGA_FPGMGR_CTL_OFST, ctrl_reg); + + /* Timeout waiting for reset */ + if (status) + return -ETIMEDOUT; + + return 0; +} + +/* + * Prepare the FPGA to receive the configuration data. + */ +static int socfpga_fpga_ops_configure_init(struct fpga_manager *mgr, u32 flags, + const char *buf, size_t count) +{ + struct socfpga_fpga_priv *priv = mgr->priv; + int ret; + + if (flags & FPGA_MGR_PARTIAL_RECONFIG) { + dev_err(&mgr->dev, "Partial reconfiguration not supported.\n"); + return -EINVAL; + } + /* Steps 1 - 5: Reset the FPGA */ + ret = socfpga_fpga_reset(mgr); + if (ret) + return ret; + + /* Step 6: Wait for FPGA to enter configuration phase */ + if (socfpga_fpga_wait_for_state(priv, SOCFPGA_FPGMGR_STAT_CFG)) + return -ETIMEDOUT; + + /* Step 7: Clear nSTATUS interrupt */ + socfpga_fpga_writel(priv, SOCFPGA_FPGMGR_GPIO_PORTA_EOI_OFST, + SOCFPGA_FPGMGR_MON_NSTATUS); + + /* Step 8: Set CTRL.AXICFGEN to 1 to enable transfer of config data */ + socfpga_fpga_set_bitsl(priv, SOCFPGA_FPGMGR_CTL_OFST, + SOCFPGA_FPGMGR_CTL_AXICFGEN); + + return 0; +} + +/* + * Step 9: write data to the FPGA data register + */ +static int socfpga_fpga_ops_configure_write(struct fpga_manager *mgr, + const char *buf, size_t count) +{ + struct socfpga_fpga_priv *priv = mgr->priv; + u32 *buffer_32 = (u32 *)buf; + size_t i = 0; + + if (count <= 0) + return -EINVAL; + + /* Write out the complete 32-bit chunks. */ + while (count >= sizeof(u32)) { + socfpga_fpga_data_writel(priv, buffer_32[i++]); + count -= sizeof(u32); + } + + /* Write out remaining non 32-bit chunks. */ + switch (count) { + case 3: + socfpga_fpga_data_writel(priv, buffer_32[i++] & 0x00ffffff); + break; + case 2: + socfpga_fpga_data_writel(priv, buffer_32[i++] & 0x0000ffff); + break; + case 1: + socfpga_fpga_data_writel(priv, buffer_32[i++] & 0x000000ff); + break; + case 0: + break; + default: + /* This will never happen. */ + return -EFAULT; + } + + return 0; +} + +static int socfpga_fpga_ops_configure_complete(struct fpga_manager *mgr, + u32 flags) +{ + struct socfpga_fpga_priv *priv = mgr->priv; + u32 status; + + /* + * Step 10: + * - Observe CONF_DONE and nSTATUS (active low) + * - if CONF_DONE = 1 and nSTATUS = 1, configuration was successful + * - if CONF_DONE = 0 and nSTATUS = 0, configuration failed + */ + status = socfpga_fpga_wait_for_config_done(priv); + if (status) + return status; + + /* Step 11: Clear CTRL.AXICFGEN to disable transfer of config data */ + socfpga_fpga_clr_bitsl(priv, SOCFPGA_FPGMGR_CTL_OFST, + SOCFPGA_FPGMGR_CTL_AXICFGEN); + + /* + * Step 12: + * - Write 4 to DCLKCNT + * - Wait for STATUS.DCNTDONE = 1 + * - Clear W1C bit in STATUS.DCNTDONE + */ + if (socfpga_fpga_dclk_set_and_wait_clear(priv, 4)) + return -ETIMEDOUT; + + /* Step 13: Wait for STATUS.MODE to report USER MODE */ + if (socfpga_fpga_wait_for_state(priv, SOCFPGA_FPGMGR_STAT_USER_MODE)) + return -ETIMEDOUT; + + /* Step 14: Set CTRL.EN to 0 */ + socfpga_fpga_clr_bitsl(priv, SOCFPGA_FPGMGR_CTL_OFST, + SOCFPGA_FPGMGR_CTL_EN); + + return 0; +} + +/* Translate state register values to FPGA framework state */ +static const enum fpga_mgr_states socfpga_state_to_framework_state[] = { + [SOCFPGA_FPGMGR_STAT_POWER_OFF] = FPGA_MGR_STATE_POWER_OFF, + [SOCFPGA_FPGMGR_STAT_RESET] = FPGA_MGR_STATE_RESET, + [SOCFPGA_FPGMGR_STAT_CFG] = FPGA_MGR_STATE_WRITE_INIT, + [SOCFPGA_FPGMGR_STAT_INIT] = FPGA_MGR_STATE_WRITE_INIT, + [SOCFPGA_FPGMGR_STAT_USER_MODE] = FPGA_MGR_STATE_OPERATING, + [SOCFPGA_FPGMGR_STAT_UNKNOWN] = FPGA_MGR_STATE_UNKNOWN, +}; + +static enum fpga_mgr_states socfpga_fpga_ops_state(struct fpga_manager *mgr) +{ + struct socfpga_fpga_priv *priv = mgr->priv; + enum fpga_mgr_states ret; + u32 state; + + state = socfpga_fpga_state_get(priv); + + if (state < ARRAY_SIZE(socfpga_state_to_framework_state)) + ret = socfpga_state_to_framework_state[state]; + else + ret = FPGA_MGR_STATE_UNKNOWN; + + return ret; +} + +static const struct fpga_manager_ops socfpga_fpga_ops = { + .state = socfpga_fpga_ops_state, + .write_init = socfpga_fpga_ops_configure_init, + .write = socfpga_fpga_ops_configure_write, + .write_complete = socfpga_fpga_ops_configure_complete, +}; + +static int socfpga_fpga_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct socfpga_fpga_priv *priv; + struct resource *res; + int ret; + + priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + priv->fpga_base_addr = devm_ioremap_resource(dev, res); + if (IS_ERR(priv->fpga_base_addr)) + return PTR_ERR(priv->fpga_base_addr); + + res = platform_get_resource(pdev, IORESOURCE_MEM, 1); + priv->fpga_data_addr = devm_ioremap_resource(dev, res); + if (IS_ERR(priv->fpga_data_addr)) + return PTR_ERR(priv->fpga_data_addr); + + priv->irq = platform_get_irq(pdev, 0); + if (priv->irq < 0) + return priv->irq; + + ret = devm_request_irq(dev, priv->irq, socfpga_fpga_isr, 0, + dev_name(dev), priv); + if (IS_ERR_VALUE(ret)) + return ret; + + return fpga_mgr_register(dev, "Altera SOCFPGA FPGA Manager", + &socfpga_fpga_ops, priv); +} + +static int socfpga_fpga_remove(struct platform_device *pdev) +{ + fpga_mgr_unregister(&pdev->dev); + + return 0; +} + +#ifdef CONFIG_OF +static const struct of_device_id socfpga_fpga_of_match[] = { + { .compatible = "altr,socfpga-fpga-mgr", }, + {}, +}; + +MODULE_DEVICE_TABLE(of, socfpga_fpga_of_match); +#endif + +static struct platform_driver socfpga_fpga_driver = { + .probe = socfpga_fpga_probe, + .remove = socfpga_fpga_remove, + .driver = { + .name = "socfpga_fpga_manager", + .of_match_table = of_match_ptr(socfpga_fpga_of_match), + }, +}; + +module_platform_driver(socfpga_fpga_driver); + +MODULE_AUTHOR("Alan Tull <atull@opensource.altera.com>"); +MODULE_DESCRIPTION("Altera SOCFPGA FPGA Manager"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/hwtracing/coresight/coresight-etm3x.c b/drivers/hwtracing/coresight/coresight-etm3x.c index bf2476ed9356..d630b7ece735 100644 --- a/drivers/hwtracing/coresight/coresight-etm3x.c +++ b/drivers/hwtracing/coresight/coresight-etm3x.c @@ -191,7 +191,8 @@ static void etm_set_prog(struct etm_drvdata *drvdata) isb(); if (coresight_timeout_etm(drvdata, ETMSR, ETMSR_PROG_BIT, 1)) { dev_err(drvdata->dev, - "timeout observed when probing at offset %#x\n", ETMSR); + "%s: timeout observed when probing at offset %#x\n", + __func__, ETMSR); } } @@ -209,7 +210,8 @@ static void etm_clr_prog(struct etm_drvdata *drvdata) isb(); if (coresight_timeout_etm(drvdata, ETMSR, ETMSR_PROG_BIT, 0)) { dev_err(drvdata->dev, - "timeout observed when probing at offset %#x\n", ETMSR); + "%s: timeout observed when probing at offset %#x\n", + __func__, ETMSR); } } @@ -313,14 +315,6 @@ static void etm_enable_hw(void *info) dev_dbg(drvdata->dev, "cpu: %d enable smp call done\n", drvdata->cpu); } -static int etm_trace_id_simple(struct etm_drvdata *drvdata) -{ - if (!drvdata->enable) - return drvdata->traceid; - - return (etm_readl(drvdata, ETMTRACEIDR) & ETM_TRACEID_MASK); -} - static int etm_trace_id(struct coresight_device *csdev) { struct etm_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); @@ -1506,44 +1500,17 @@ static ssize_t timestamp_event_store(struct device *dev, } static DEVICE_ATTR_RW(timestamp_event); -static ssize_t status_show(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t cpu_show(struct device *dev, + struct device_attribute *attr, char *buf) { - int ret; - unsigned long flags; + int val; struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent); - pm_runtime_get_sync(drvdata->dev); - spin_lock_irqsave(&drvdata->spinlock, flags); - - CS_UNLOCK(drvdata->base); - ret = sprintf(buf, - "ETMCCR: 0x%08x\n" - "ETMCCER: 0x%08x\n" - "ETMSCR: 0x%08x\n" - "ETMIDR: 0x%08x\n" - "ETMCR: 0x%08x\n" - "ETMTRACEIDR: 0x%08x\n" - "Enable event: 0x%08x\n" - "Enable start/stop: 0x%08x\n" - "Enable control: CR1 0x%08x CR2 0x%08x\n" - "CPU affinity: %d\n", - drvdata->etmccr, drvdata->etmccer, - etm_readl(drvdata, ETMSCR), etm_readl(drvdata, ETMIDR), - etm_readl(drvdata, ETMCR), etm_trace_id_simple(drvdata), - etm_readl(drvdata, ETMTEEVR), - etm_readl(drvdata, ETMTSSCR), - etm_readl(drvdata, ETMTECR1), - etm_readl(drvdata, ETMTECR2), - drvdata->cpu); - CS_LOCK(drvdata->base); - - spin_unlock_irqrestore(&drvdata->spinlock, flags); - pm_runtime_put(drvdata->dev); + val = drvdata->cpu; + return scnprintf(buf, PAGE_SIZE, "%d\n", val); - return ret; } -static DEVICE_ATTR_RO(status); +static DEVICE_ATTR_RO(cpu); static ssize_t traceid_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -1619,11 +1586,61 @@ static struct attribute *coresight_etm_attrs[] = { &dev_attr_ctxid_mask.attr, &dev_attr_sync_freq.attr, &dev_attr_timestamp_event.attr, - &dev_attr_status.attr, &dev_attr_traceid.attr, + &dev_attr_cpu.attr, + NULL, +}; + +#define coresight_simple_func(name, offset) \ +static ssize_t name##_show(struct device *_dev, \ + struct device_attribute *attr, char *buf) \ +{ \ + struct etm_drvdata *drvdata = dev_get_drvdata(_dev->parent); \ + return scnprintf(buf, PAGE_SIZE, "0x%x\n", \ + readl_relaxed(drvdata->base + offset)); \ +} \ +DEVICE_ATTR_RO(name) + +coresight_simple_func(etmccr, ETMCCR); +coresight_simple_func(etmccer, ETMCCER); +coresight_simple_func(etmscr, ETMSCR); +coresight_simple_func(etmidr, ETMIDR); +coresight_simple_func(etmcr, ETMCR); +coresight_simple_func(etmtraceidr, ETMTRACEIDR); +coresight_simple_func(etmteevr, ETMTEEVR); +coresight_simple_func(etmtssvr, ETMTSSCR); +coresight_simple_func(etmtecr1, ETMTECR1); +coresight_simple_func(etmtecr2, ETMTECR2); + +static struct attribute *coresight_etm_mgmt_attrs[] = { + &dev_attr_etmccr.attr, + &dev_attr_etmccer.attr, + &dev_attr_etmscr.attr, + &dev_attr_etmidr.attr, + &dev_attr_etmcr.attr, + &dev_attr_etmtraceidr.attr, + &dev_attr_etmteevr.attr, + &dev_attr_etmtssvr.attr, + &dev_attr_etmtecr1.attr, + &dev_attr_etmtecr2.attr, + NULL, +}; + +static const struct attribute_group coresight_etm_group = { + .attrs = coresight_etm_attrs, +}; + + +static const struct attribute_group coresight_etm_mgmt_group = { + .attrs = coresight_etm_mgmt_attrs, + .name = "mgmt", +}; + +static const struct attribute_group *coresight_etm_groups[] = { + &coresight_etm_group, + &coresight_etm_mgmt_group, NULL, }; -ATTRIBUTE_GROUPS(coresight_etm); static int etm_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) diff --git a/drivers/hwtracing/coresight/coresight-etm4x.c b/drivers/hwtracing/coresight/coresight-etm4x.c index 254a81a4e6f4..a6707642bb23 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x.c +++ b/drivers/hwtracing/coresight/coresight-etm4x.c @@ -136,7 +136,9 @@ static void etm4_enable_hw(void *info) writel_relaxed(drvdata->cntr_val[i], drvdata->base + TRCCNTVRn(i)); } - for (i = 0; i < drvdata->nr_resource; i++) + + /* Resource selector pair 0 is always implemented and reserved */ + for (i = 2; i < drvdata->nr_resource * 2; i++) writel_relaxed(drvdata->res_ctrl[i], drvdata->base + TRCRSCTLRn(i)); @@ -489,8 +491,9 @@ static ssize_t reset_store(struct device *dev, drvdata->cntr_val[i] = 0x0; } - drvdata->res_idx = 0x0; - for (i = 0; i < drvdata->nr_resource; i++) + /* Resource selector pair 0 is always implemented and reserved */ + drvdata->res_idx = 0x2; + for (i = 2; i < drvdata->nr_resource * 2; i++) drvdata->res_ctrl[i] = 0x0; for (i = 0; i < drvdata->nr_ss_cmp; i++) { @@ -1732,7 +1735,7 @@ static ssize_t res_idx_store(struct device *dev, if (kstrtoul(buf, 16, &val)) return -EINVAL; /* Resource selector pair 0 is always implemented and reserved */ - if ((val == 0) || (val >= drvdata->nr_resource)) + if (val < 2 || val >= drvdata->nr_resource * 2) return -EINVAL; /* @@ -2416,8 +2419,13 @@ static void etm4_init_arch_data(void *info) drvdata->nr_addr_cmp = BMVAL(etmidr4, 0, 3); /* NUMPC, bits[15:12] number of PE comparator inputs for tracing */ drvdata->nr_pe_cmp = BMVAL(etmidr4, 12, 15); - /* NUMRSPAIR, bits[19:16] the number of resource pairs for tracing */ - drvdata->nr_resource = BMVAL(etmidr4, 16, 19); + /* + * NUMRSPAIR, bits[19:16] + * The number of resource pairs conveyed by the HW starts at 0, i.e a + * value of 0x0 indicate 1 resource pair, 0x1 indicate two and so on. + * As such add 1 to the value of NUMRSPAIR for a better representation. + */ + drvdata->nr_resource = BMVAL(etmidr4, 16, 19) + 1; /* * NUMSSCC, bits[23:20] the number of single-shot * comparator control for tracing @@ -2504,6 +2512,8 @@ static void etm4_init_default_data(struct etmv4_drvdata *drvdata) drvdata->cntr_val[i] = 0x0; } + /* Resource selector pair 0 is always implemented and reserved */ + drvdata->res_idx = 0x2; for (i = 2; i < drvdata->nr_resource * 2; i++) drvdata->res_ctrl[i] = 0x0; diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c index 894531d315b8..e25492137d8b 100644 --- a/drivers/hwtracing/coresight/coresight.c +++ b/drivers/hwtracing/coresight/coresight.c @@ -240,6 +240,11 @@ static int coresight_enable_path(struct list_head *path) int ret = 0; struct coresight_device *cd; + /* + * At this point we have a full @path, from source to sink. The + * sink is the first entry and the source the last one. Go through + * all the components and enable them one by one. + */ list_for_each_entry(cd, path, path_link) { if (cd == list_first_entry(path, struct coresight_device, path_link)) { diff --git a/drivers/hwtracing/intel_th/Kconfig b/drivers/hwtracing/intel_th/Kconfig new file mode 100644 index 000000000000..b7a9073d968b --- /dev/null +++ b/drivers/hwtracing/intel_th/Kconfig @@ -0,0 +1,72 @@ +config INTEL_TH + tristate "Intel(R) Trace Hub controller" + help + Intel(R) Trace Hub (TH) is a set of hardware blocks (subdevices) that + produce, switch and output trace data from multiple hardware and + software sources over several types of trace output ports encoded + in System Trace Protocol (MIPI STPv2) and is intended to perform + full system debugging. + + This option enables intel_th bus and common code used by TH + subdevices to interact with each other and hardware and for + platform glue layers to drive Intel TH devices. + + Say Y here to enable Intel(R) Trace Hub controller support. + +if INTEL_TH + +config INTEL_TH_PCI + tristate "Intel(R) Trace Hub PCI controller" + depends on PCI + help + Intel(R) Trace Hub may exist as a PCI device. This option enables + support glue layer for PCI-based Intel TH. + + Say Y here to enable PCI Intel TH support. + +config INTEL_TH_GTH + tristate "Intel(R) Trace Hub Global Trace Hub" + help + Global Trace Hub (GTH) is the central component of the + Intel TH infrastructure and acts as a switch for source + and output devices. This driver is required for other + Intel TH subdevices to initialize. + + Say Y here to enable GTH subdevice of Intel(R) Trace Hub. + +config INTEL_TH_STH + tristate "Intel(R) Trace Hub Software Trace Hub support" + depends on STM + help + Software Trace Hub (STH) enables trace data from software + trace sources to be sent out via Intel(R) Trace Hub. It + uses stm class device to interface with its sources. + + Say Y here to enable STH subdevice of Intel(R) Trace Hub. + +config INTEL_TH_MSU + tristate "Intel(R) Trace Hub Memory Storage Unit" + help + Memory Storage Unit (MSU) trace output device enables + storing STP traces to system memory. It supports single + and multiblock modes of operation and provides read() + and mmap() access to the collected data. + + Say Y here to enable MSU output device for Intel TH. + +config INTEL_TH_PTI + tristate "Intel(R) Trace Hub PTI output" + help + Parallel Trace Interface unit (PTI) is a trace output device + of Intel TH architecture that facilitates STP trace output via + a PTI port. + + Say Y to enable PTI output of Intel TH data. + +config INTEL_TH_DEBUG + bool "Intel(R) Trace Hub debugging" + depends on DEBUG_FS + help + Say Y here to enable debugging. + +endif diff --git a/drivers/hwtracing/intel_th/Makefile b/drivers/hwtracing/intel_th/Makefile new file mode 100644 index 000000000000..81d42fe918f7 --- /dev/null +++ b/drivers/hwtracing/intel_th/Makefile @@ -0,0 +1,18 @@ +obj-$(CONFIG_INTEL_TH) += intel_th.o +intel_th-y := core.o +intel_th-$(CONFIG_INTEL_TH_DEBUG) += debug.o + +obj-$(CONFIG_INTEL_TH_PCI) += intel_th_pci.o +intel_th_pci-y := pci.o + +obj-$(CONFIG_INTEL_TH_GTH) += intel_th_gth.o +intel_th_gth-y := gth.o + +obj-$(CONFIG_INTEL_TH_STH) += intel_th_sth.o +intel_th_sth-y := sth.o + +obj-$(CONFIG_INTEL_TH_MSU) += intel_th_msu.o +intel_th_msu-y := msu.o + +obj-$(CONFIG_INTEL_TH_PTI) += intel_th_pti.o +intel_th_pti-y := pti.o diff --git a/drivers/hwtracing/intel_th/core.c b/drivers/hwtracing/intel_th/core.c new file mode 100644 index 000000000000..165d3001c301 --- /dev/null +++ b/drivers/hwtracing/intel_th/core.c @@ -0,0 +1,692 @@ +/* + * Intel(R) Trace Hub driver core + * + * Copyright (C) 2014-2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/types.h> +#include <linux/module.h> +#include <linux/device.h> +#include <linux/sysfs.h> +#include <linux/kdev_t.h> +#include <linux/debugfs.h> +#include <linux/idr.h> +#include <linux/pci.h> +#include <linux/dma-mapping.h> + +#include "intel_th.h" +#include "debug.h" + +static DEFINE_IDA(intel_th_ida); + +static int intel_th_match(struct device *dev, struct device_driver *driver) +{ + struct intel_th_driver *thdrv = to_intel_th_driver(driver); + struct intel_th_device *thdev = to_intel_th_device(dev); + + if (thdev->type == INTEL_TH_SWITCH && + (!thdrv->enable || !thdrv->disable)) + return 0; + + return !strcmp(thdev->name, driver->name); +} + +static int intel_th_child_remove(struct device *dev, void *data) +{ + device_release_driver(dev); + + return 0; +} + +static int intel_th_probe(struct device *dev) +{ + struct intel_th_driver *thdrv = to_intel_th_driver(dev->driver); + struct intel_th_device *thdev = to_intel_th_device(dev); + struct intel_th_driver *hubdrv; + struct intel_th_device *hub = NULL; + int ret; + + if (thdev->type == INTEL_TH_SWITCH) + hub = thdev; + else if (dev->parent) + hub = to_intel_th_device(dev->parent); + + if (!hub || !hub->dev.driver) + return -EPROBE_DEFER; + + hubdrv = to_intel_th_driver(hub->dev.driver); + + ret = thdrv->probe(to_intel_th_device(dev)); + if (ret) + return ret; + + if (thdev->type == INTEL_TH_OUTPUT && + !intel_th_output_assigned(thdev)) + ret = hubdrv->assign(hub, thdev); + + return ret; +} + +static int intel_th_remove(struct device *dev) +{ + struct intel_th_driver *thdrv = to_intel_th_driver(dev->driver); + struct intel_th_device *thdev = to_intel_th_device(dev); + struct intel_th_device *hub = to_intel_th_device(dev->parent); + int err; + + if (thdev->type == INTEL_TH_SWITCH) { + err = device_for_each_child(dev, thdev, intel_th_child_remove); + if (err) + return err; + } + + thdrv->remove(thdev); + + if (intel_th_output_assigned(thdev)) { + struct intel_th_driver *hubdrv = + to_intel_th_driver(dev->parent->driver); + + if (hub->dev.driver) + hubdrv->unassign(hub, thdev); + } + + return 0; +} + +static struct bus_type intel_th_bus = { + .name = "intel_th", + .dev_attrs = NULL, + .match = intel_th_match, + .probe = intel_th_probe, + .remove = intel_th_remove, +}; + +static void intel_th_device_free(struct intel_th_device *thdev); + +static void intel_th_device_release(struct device *dev) +{ + intel_th_device_free(to_intel_th_device(dev)); +} + +static struct device_type intel_th_source_device_type = { + .name = "intel_th_source_device", + .release = intel_th_device_release, +}; + +static char *intel_th_output_devnode(struct device *dev, umode_t *mode, + kuid_t *uid, kgid_t *gid) +{ + struct intel_th_device *thdev = to_intel_th_device(dev); + char *node; + + if (thdev->id >= 0) + node = kasprintf(GFP_KERNEL, "intel_th%d/%s%d", 0, thdev->name, + thdev->id); + else + node = kasprintf(GFP_KERNEL, "intel_th%d/%s", 0, thdev->name); + + return node; +} + +static ssize_t port_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct intel_th_device *thdev = to_intel_th_device(dev); + + if (thdev->output.port >= 0) + return scnprintf(buf, PAGE_SIZE, "%u\n", thdev->output.port); + + return scnprintf(buf, PAGE_SIZE, "unassigned\n"); +} + +static DEVICE_ATTR_RO(port); + +static int intel_th_output_activate(struct intel_th_device *thdev) +{ + struct intel_th_driver *thdrv = to_intel_th_driver(thdev->dev.driver); + + if (thdrv->activate) + return thdrv->activate(thdev); + + intel_th_trace_enable(thdev); + + return 0; +} + +static void intel_th_output_deactivate(struct intel_th_device *thdev) +{ + struct intel_th_driver *thdrv = to_intel_th_driver(thdev->dev.driver); + + if (thdrv->deactivate) + thdrv->deactivate(thdev); + else + intel_th_trace_disable(thdev); +} + +static ssize_t active_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct intel_th_device *thdev = to_intel_th_device(dev); + + return scnprintf(buf, PAGE_SIZE, "%d\n", thdev->output.active); +} + +static ssize_t active_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t size) +{ + struct intel_th_device *thdev = to_intel_th_device(dev); + unsigned long val; + int ret; + + ret = kstrtoul(buf, 10, &val); + if (ret) + return ret; + + if (!!val != thdev->output.active) { + if (val) + ret = intel_th_output_activate(thdev); + else + intel_th_output_deactivate(thdev); + } + + return ret ? ret : size; +} + +static DEVICE_ATTR_RW(active); + +static struct attribute *intel_th_output_attrs[] = { + &dev_attr_port.attr, + &dev_attr_active.attr, + NULL, +}; + +ATTRIBUTE_GROUPS(intel_th_output); + +static struct device_type intel_th_output_device_type = { + .name = "intel_th_output_device", + .groups = intel_th_output_groups, + .release = intel_th_device_release, + .devnode = intel_th_output_devnode, +}; + +static struct device_type intel_th_switch_device_type = { + .name = "intel_th_switch_device", + .release = intel_th_device_release, +}; + +static struct device_type *intel_th_device_type[] = { + [INTEL_TH_SOURCE] = &intel_th_source_device_type, + [INTEL_TH_OUTPUT] = &intel_th_output_device_type, + [INTEL_TH_SWITCH] = &intel_th_switch_device_type, +}; + +int intel_th_driver_register(struct intel_th_driver *thdrv) +{ + if (!thdrv->probe || !thdrv->remove) + return -EINVAL; + + thdrv->driver.bus = &intel_th_bus; + + return driver_register(&thdrv->driver); +} +EXPORT_SYMBOL_GPL(intel_th_driver_register); + +void intel_th_driver_unregister(struct intel_th_driver *thdrv) +{ + driver_unregister(&thdrv->driver); +} +EXPORT_SYMBOL_GPL(intel_th_driver_unregister); + +static struct intel_th_device * +intel_th_device_alloc(struct intel_th *th, unsigned int type, const char *name, + int id) +{ + struct device *parent; + struct intel_th_device *thdev; + + if (type == INTEL_TH_SWITCH) + parent = th->dev; + else + parent = &th->hub->dev; + + thdev = kzalloc(sizeof(*thdev) + strlen(name) + 1, GFP_KERNEL); + if (!thdev) + return NULL; + + thdev->id = id; + thdev->type = type; + + strcpy(thdev->name, name); + device_initialize(&thdev->dev); + thdev->dev.bus = &intel_th_bus; + thdev->dev.type = intel_th_device_type[type]; + thdev->dev.parent = parent; + thdev->dev.dma_mask = parent->dma_mask; + thdev->dev.dma_parms = parent->dma_parms; + dma_set_coherent_mask(&thdev->dev, parent->coherent_dma_mask); + if (id >= 0) + dev_set_name(&thdev->dev, "%d-%s%d", th->id, name, id); + else + dev_set_name(&thdev->dev, "%d-%s", th->id, name); + + return thdev; +} + +static int intel_th_device_add_resources(struct intel_th_device *thdev, + struct resource *res, int nres) +{ + struct resource *r; + + r = kmemdup(res, sizeof(*res) * nres, GFP_KERNEL); + if (!r) + return -ENOMEM; + + thdev->resource = r; + thdev->num_resources = nres; + + return 0; +} + +static void intel_th_device_remove(struct intel_th_device *thdev) +{ + device_del(&thdev->dev); + put_device(&thdev->dev); +} + +static void intel_th_device_free(struct intel_th_device *thdev) +{ + kfree(thdev->resource); + kfree(thdev); +} + +/* + * Intel(R) Trace Hub subdevices + */ +static struct intel_th_subdevice { + const char *name; + struct resource res[3]; + unsigned nres; + unsigned type; + unsigned otype; + int id; +} intel_th_subdevices[TH_SUBDEVICE_MAX] = { + { + .nres = 1, + .res = { + { + .start = REG_GTH_OFFSET, + .end = REG_GTH_OFFSET + REG_GTH_LENGTH - 1, + .flags = IORESOURCE_MEM, + }, + }, + .name = "gth", + .type = INTEL_TH_SWITCH, + .id = -1, + }, + { + .nres = 2, + .res = { + { + .start = REG_MSU_OFFSET, + .end = REG_MSU_OFFSET + REG_MSU_LENGTH - 1, + .flags = IORESOURCE_MEM, + }, + { + .start = BUF_MSU_OFFSET, + .end = BUF_MSU_OFFSET + BUF_MSU_LENGTH - 1, + .flags = IORESOURCE_MEM, + }, + }, + .name = "msc", + .id = 0, + .type = INTEL_TH_OUTPUT, + .otype = GTH_MSU, + }, + { + .nres = 2, + .res = { + { + .start = REG_MSU_OFFSET, + .end = REG_MSU_OFFSET + REG_MSU_LENGTH - 1, + .flags = IORESOURCE_MEM, + }, + { + .start = BUF_MSU_OFFSET, + .end = BUF_MSU_OFFSET + BUF_MSU_LENGTH - 1, + .flags = IORESOURCE_MEM, + }, + }, + .name = "msc", + .id = 1, + .type = INTEL_TH_OUTPUT, + .otype = GTH_MSU, + }, + { + .nres = 2, + .res = { + { + .start = REG_STH_OFFSET, + .end = REG_STH_OFFSET + REG_STH_LENGTH - 1, + .flags = IORESOURCE_MEM, + }, + { + .start = TH_MMIO_SW, + .end = 0, + .flags = IORESOURCE_MEM, + }, + }, + .id = -1, + .name = "sth", + .type = INTEL_TH_SOURCE, + }, + { + .nres = 1, + .res = { + { + .start = REG_PTI_OFFSET, + .end = REG_PTI_OFFSET + REG_PTI_LENGTH - 1, + .flags = IORESOURCE_MEM, + }, + }, + .id = -1, + .name = "pti", + .type = INTEL_TH_OUTPUT, + .otype = GTH_PTI, + }, + { + .nres = 1, + .res = { + { + .start = REG_DCIH_OFFSET, + .end = REG_DCIH_OFFSET + REG_DCIH_LENGTH - 1, + .flags = IORESOURCE_MEM, + }, + }, + .id = -1, + .name = "dcih", + .type = INTEL_TH_OUTPUT, + }, +}; + +static int intel_th_populate(struct intel_th *th, struct resource *devres, + unsigned int ndevres, int irq) +{ + struct resource res[3]; + unsigned int req = 0; + int i, err; + + /* create devices for each intel_th_subdevice */ + for (i = 0; i < ARRAY_SIZE(intel_th_subdevices); i++) { + struct intel_th_subdevice *subdev = &intel_th_subdevices[i]; + struct intel_th_device *thdev; + int r; + + thdev = intel_th_device_alloc(th, subdev->type, subdev->name, + subdev->id); + if (!thdev) { + err = -ENOMEM; + goto kill_subdevs; + } + + memcpy(res, subdev->res, + sizeof(struct resource) * subdev->nres); + + for (r = 0; r < subdev->nres; r++) { + int bar = TH_MMIO_CONFIG; + + /* + * Take .end == 0 to mean 'take the whole bar', + * .start then tells us which bar it is. Default to + * TH_MMIO_CONFIG. + */ + if (!res[r].end && res[r].flags == IORESOURCE_MEM) { + bar = res[r].start; + res[r].start = 0; + res[r].end = resource_size(&devres[bar]) - 1; + } + + if (res[r].flags & IORESOURCE_MEM) { + res[r].start += devres[bar].start; + res[r].end += devres[bar].start; + + dev_dbg(th->dev, "%s:%d @ %pR\n", + subdev->name, r, &res[r]); + } else if (res[r].flags & IORESOURCE_IRQ) { + res[r].start = irq; + } + } + + err = intel_th_device_add_resources(thdev, res, subdev->nres); + if (err) { + put_device(&thdev->dev); + goto kill_subdevs; + } + + if (subdev->type == INTEL_TH_OUTPUT) { + thdev->dev.devt = MKDEV(th->major, i); + thdev->output.type = subdev->otype; + thdev->output.port = -1; + } + + err = device_add(&thdev->dev); + if (err) { + put_device(&thdev->dev); + goto kill_subdevs; + } + + /* need switch driver to be loaded to enumerate the rest */ + if (subdev->type == INTEL_TH_SWITCH && !req) { + th->hub = thdev; + err = request_module("intel_th_%s", subdev->name); + if (!err) + req++; + } + + th->thdev[i] = thdev; + } + + return 0; + +kill_subdevs: + for (i-- ; i >= 0; i--) + intel_th_device_remove(th->thdev[i]); + + return err; +} + +static int match_devt(struct device *dev, void *data) +{ + dev_t devt = (dev_t)(unsigned long)data; + + return dev->devt == devt; +} + +static int intel_th_output_open(struct inode *inode, struct file *file) +{ + const struct file_operations *fops; + struct intel_th_driver *thdrv; + struct device *dev; + int err; + + dev = bus_find_device(&intel_th_bus, NULL, + (void *)(unsigned long)inode->i_rdev, + match_devt); + if (!dev || !dev->driver) + return -ENODEV; + + thdrv = to_intel_th_driver(dev->driver); + fops = fops_get(thdrv->fops); + if (!fops) + return -ENODEV; + + replace_fops(file, fops); + + file->private_data = to_intel_th_device(dev); + + if (file->f_op->open) { + err = file->f_op->open(inode, file); + return err; + } + + return 0; +} + +static const struct file_operations intel_th_output_fops = { + .open = intel_th_output_open, + .llseek = noop_llseek, +}; + +/** + * intel_th_alloc() - allocate a new Intel TH device and its subdevices + * @dev: parent device + * @devres: parent's resources + * @ndevres: number of resources + * @irq: irq number + */ +struct intel_th * +intel_th_alloc(struct device *dev, struct resource *devres, + unsigned int ndevres, int irq) +{ + struct intel_th *th; + int err; + + th = kzalloc(sizeof(*th), GFP_KERNEL); + if (!th) + return ERR_PTR(-ENOMEM); + + th->id = ida_simple_get(&intel_th_ida, 0, 0, GFP_KERNEL); + if (th->id < 0) { + err = th->id; + goto err_alloc; + } + + th->major = __register_chrdev(0, 0, TH_POSSIBLE_OUTPUTS, + "intel_th/output", &intel_th_output_fops); + if (th->major < 0) { + err = th->major; + goto err_ida; + } + th->dev = dev; + + err = intel_th_populate(th, devres, ndevres, irq); + if (err) + goto err_chrdev; + + return th; + +err_chrdev: + __unregister_chrdev(th->major, 0, TH_POSSIBLE_OUTPUTS, + "intel_th/output"); + +err_ida: + ida_simple_remove(&intel_th_ida, th->id); + +err_alloc: + kfree(th); + + return ERR_PTR(err); +} +EXPORT_SYMBOL_GPL(intel_th_alloc); + +void intel_th_free(struct intel_th *th) +{ + int i; + + for (i = 0; i < TH_SUBDEVICE_MAX; i++) + if (th->thdev[i] != th->hub) + intel_th_device_remove(th->thdev[i]); + + intel_th_device_remove(th->hub); + + __unregister_chrdev(th->major, 0, TH_POSSIBLE_OUTPUTS, + "intel_th/output"); + + ida_simple_remove(&intel_th_ida, th->id); + + kfree(th); +} +EXPORT_SYMBOL_GPL(intel_th_free); + +/** + * intel_th_trace_enable() - enable tracing for an output device + * @thdev: output device that requests tracing be enabled + */ +int intel_th_trace_enable(struct intel_th_device *thdev) +{ + struct intel_th_device *hub = to_intel_th_device(thdev->dev.parent); + struct intel_th_driver *hubdrv = to_intel_th_driver(hub->dev.driver); + + if (WARN_ON_ONCE(hub->type != INTEL_TH_SWITCH)) + return -EINVAL; + + if (WARN_ON_ONCE(thdev->type != INTEL_TH_OUTPUT)) + return -EINVAL; + + hubdrv->enable(hub, &thdev->output); + + return 0; +} +EXPORT_SYMBOL_GPL(intel_th_trace_enable); + +/** + * intel_th_trace_disable() - disable tracing for an output device + * @thdev: output device that requests tracing be disabled + */ +int intel_th_trace_disable(struct intel_th_device *thdev) +{ + struct intel_th_device *hub = to_intel_th_device(thdev->dev.parent); + struct intel_th_driver *hubdrv = to_intel_th_driver(hub->dev.driver); + + WARN_ON_ONCE(hub->type != INTEL_TH_SWITCH); + if (WARN_ON_ONCE(thdev->type != INTEL_TH_OUTPUT)) + return -EINVAL; + + hubdrv->disable(hub, &thdev->output); + + return 0; +} +EXPORT_SYMBOL_GPL(intel_th_trace_disable); + +int intel_th_set_output(struct intel_th_device *thdev, + unsigned int master) +{ + struct intel_th_device *hub = to_intel_th_device(thdev->dev.parent); + struct intel_th_driver *hubdrv = to_intel_th_driver(hub->dev.driver); + + if (!hubdrv->set_output) + return -ENOTSUPP; + + return hubdrv->set_output(hub, master); +} +EXPORT_SYMBOL_GPL(intel_th_set_output); + +static int __init intel_th_init(void) +{ + intel_th_debug_init(); + + return bus_register(&intel_th_bus); +} +subsys_initcall(intel_th_init); + +static void __exit intel_th_exit(void) +{ + intel_th_debug_done(); + + bus_unregister(&intel_th_bus); +} +module_exit(intel_th_exit); + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("Intel(R) Trace Hub controller driver"); +MODULE_AUTHOR("Alexander Shishkin <alexander.shishkin@linux.intel.com>"); diff --git a/drivers/hwtracing/intel_th/debug.c b/drivers/hwtracing/intel_th/debug.c new file mode 100644 index 000000000000..788a1f0a97ad --- /dev/null +++ b/drivers/hwtracing/intel_th/debug.c @@ -0,0 +1,36 @@ +/* + * Intel(R) Trace Hub driver debugging + * + * Copyright (C) 2014-2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include <linux/types.h> +#include <linux/device.h> +#include <linux/debugfs.h> + +#include "intel_th.h" +#include "debug.h" + +struct dentry *intel_th_dbg; + +void intel_th_debug_init(void) +{ + intel_th_dbg = debugfs_create_dir("intel_th", NULL); + if (IS_ERR(intel_th_dbg)) + intel_th_dbg = NULL; +} + +void intel_th_debug_done(void) +{ + debugfs_remove(intel_th_dbg); + intel_th_dbg = NULL; +} diff --git a/drivers/hwtracing/intel_th/debug.h b/drivers/hwtracing/intel_th/debug.h new file mode 100644 index 000000000000..88311bad3ba4 --- /dev/null +++ b/drivers/hwtracing/intel_th/debug.h @@ -0,0 +1,34 @@ +/* + * Intel(R) Trace Hub driver debugging + * + * Copyright (C) 2014-2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef __INTEL_TH_DEBUG_H__ +#define __INTEL_TH_DEBUG_H__ + +#ifdef CONFIG_INTEL_TH_DEBUG +extern struct dentry *intel_th_dbg; + +void intel_th_debug_init(void); +void intel_th_debug_done(void); +#else +static inline void intel_th_debug_init(void) +{ +} + +static inline void intel_th_debug_done(void) +{ +} +#endif + +#endif /* __INTEL_TH_DEBUG_H__ */ diff --git a/drivers/hwtracing/intel_th/gth.c b/drivers/hwtracing/intel_th/gth.c new file mode 100644 index 000000000000..673d272ee400 --- /dev/null +++ b/drivers/hwtracing/intel_th/gth.c @@ -0,0 +1,706 @@ +/* + * Intel(R) Trace Hub Global Trace Hub + * + * Copyright (C) 2014-2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/types.h> +#include <linux/module.h> +#include <linux/device.h> +#include <linux/io.h> +#include <linux/mm.h> +#include <linux/slab.h> +#include <linux/bitmap.h> + +#include "intel_th.h" +#include "gth.h" + +struct gth_device; + +/** + * struct gth_output - GTH view on an output port + * @gth: backlink to the GTH device + * @output: link to output device's output descriptor + * @index: output port number + * @port_type: one of GTH_* port type values + * @master: bitmap of masters configured for this output + */ +struct gth_output { + struct gth_device *gth; + struct intel_th_output *output; + unsigned int index; + unsigned int port_type; + DECLARE_BITMAP(master, TH_CONFIGURABLE_MASTERS + 1); +}; + +/** + * struct gth_device - GTH device + * @dev: driver core's device + * @base: register window base address + * @output_group: attributes describing output ports + * @master_group: attributes describing master assignments + * @output: output ports + * @master: master/output port assignments + * @gth_lock: serializes accesses to GTH bits + */ +struct gth_device { + struct device *dev; + void __iomem *base; + + struct attribute_group output_group; + struct attribute_group master_group; + struct gth_output output[TH_POSSIBLE_OUTPUTS]; + signed char master[TH_CONFIGURABLE_MASTERS + 1]; + spinlock_t gth_lock; +}; + +static void gth_output_set(struct gth_device *gth, int port, + unsigned int config) +{ + unsigned long reg = port & 4 ? REG_GTH_GTHOPT1 : REG_GTH_GTHOPT0; + u32 val; + int shift = (port & 3) * 8; + + val = ioread32(gth->base + reg); + val &= ~(0xff << shift); + val |= config << shift; + iowrite32(val, gth->base + reg); +} + +static unsigned int gth_output_get(struct gth_device *gth, int port) +{ + unsigned long reg = port & 4 ? REG_GTH_GTHOPT1 : REG_GTH_GTHOPT0; + u32 val; + int shift = (port & 3) * 8; + + val = ioread32(gth->base + reg); + val &= 0xff << shift; + val >>= shift; + + return val; +} + +static void gth_smcfreq_set(struct gth_device *gth, int port, + unsigned int freq) +{ + unsigned long reg = REG_GTH_SMCR0 + ((port / 2) * 4); + int shift = (port & 1) * 16; + u32 val; + + val = ioread32(gth->base + reg); + val &= ~(0xffff << shift); + val |= freq << shift; + iowrite32(val, gth->base + reg); +} + +static unsigned int gth_smcfreq_get(struct gth_device *gth, int port) +{ + unsigned long reg = REG_GTH_SMCR0 + ((port / 2) * 4); + int shift = (port & 1) * 16; + u32 val; + + val = ioread32(gth->base + reg); + val &= 0xffff << shift; + val >>= shift; + + return val; +} + +/* + * "masters" attribute group + */ + +struct master_attribute { + struct device_attribute attr; + struct gth_device *gth; + unsigned int master; +}; + +static void +gth_master_set(struct gth_device *gth, unsigned int master, int port) +{ + unsigned int reg = REG_GTH_SWDEST0 + ((master >> 1) & ~3u); + unsigned int shift = (master & 0x7) * 4; + u32 val; + + if (master >= 256) { + reg = REG_GTH_GSWTDEST; + shift = 0; + } + + val = ioread32(gth->base + reg); + val &= ~(0xf << shift); + if (port >= 0) + val |= (0x8 | port) << shift; + iowrite32(val, gth->base + reg); +} + +/*static int gth_master_get(struct gth_device *gth, unsigned int master) +{ + unsigned int reg = REG_GTH_SWDEST0 + ((master >> 1) & ~3u); + unsigned int shift = (master & 0x7) * 4; + u32 val; + + if (master >= 256) { + reg = REG_GTH_GSWTDEST; + shift = 0; + } + + val = ioread32(gth->base + reg); + val &= (0xf << shift); + val >>= shift; + + return val ? val & 0x7 : -1; + }*/ + +static ssize_t master_attr_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct master_attribute *ma = + container_of(attr, struct master_attribute, attr); + struct gth_device *gth = ma->gth; + size_t count; + int port; + + spin_lock(>h->gth_lock); + port = gth->master[ma->master]; + spin_unlock(>h->gth_lock); + + if (port >= 0) + count = snprintf(buf, PAGE_SIZE, "%x\n", port); + else + count = snprintf(buf, PAGE_SIZE, "disabled\n"); + + return count; +} + +static ssize_t master_attr_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct master_attribute *ma = + container_of(attr, struct master_attribute, attr); + struct gth_device *gth = ma->gth; + int old_port, port; + + if (kstrtoint(buf, 10, &port) < 0) + return -EINVAL; + + if (port >= TH_POSSIBLE_OUTPUTS || port < -1) + return -EINVAL; + + spin_lock(>h->gth_lock); + + /* disconnect from the previous output port, if any */ + old_port = gth->master[ma->master]; + if (old_port >= 0) { + gth->master[ma->master] = -1; + clear_bit(ma->master, gth->output[old_port].master); + if (gth->output[old_port].output->active) + gth_master_set(gth, ma->master, -1); + } + + /* connect to the new output port, if any */ + if (port >= 0) { + /* check if there's a driver for this port */ + if (!gth->output[port].output) { + count = -ENODEV; + goto unlock; + } + + set_bit(ma->master, gth->output[port].master); + + /* if the port is active, program this setting */ + if (gth->output[port].output->active) + gth_master_set(gth, ma->master, port); + } + + gth->master[ma->master] = port; + +unlock: + spin_unlock(>h->gth_lock); + + return count; +} + +struct output_attribute { + struct device_attribute attr; + struct gth_device *gth; + unsigned int port; + unsigned int parm; +}; + +#define OUTPUT_PARM(_name, _mask, _r, _w, _what) \ + [TH_OUTPUT_PARM(_name)] = { .name = __stringify(_name), \ + .get = gth_ ## _what ## _get, \ + .set = gth_ ## _what ## _set, \ + .mask = (_mask), \ + .readable = (_r), \ + .writable = (_w) } + +static const struct output_parm { + const char *name; + unsigned int (*get)(struct gth_device *gth, int port); + void (*set)(struct gth_device *gth, int port, + unsigned int val); + unsigned int mask; + unsigned int readable : 1, + writable : 1; +} output_parms[] = { + OUTPUT_PARM(port, 0x7, 1, 0, output), + OUTPUT_PARM(null, BIT(3), 1, 1, output), + OUTPUT_PARM(drop, BIT(4), 1, 1, output), + OUTPUT_PARM(reset, BIT(5), 1, 0, output), + OUTPUT_PARM(flush, BIT(7), 0, 1, output), + OUTPUT_PARM(smcfreq, 0xffff, 1, 1, smcfreq), +}; + +static void +gth_output_parm_set(struct gth_device *gth, int port, unsigned int parm, + unsigned int val) +{ + unsigned int config = output_parms[parm].get(gth, port); + unsigned int mask = output_parms[parm].mask; + unsigned int shift = __ffs(mask); + + config &= ~mask; + config |= (val << shift) & mask; + output_parms[parm].set(gth, port, config); +} + +static unsigned int +gth_output_parm_get(struct gth_device *gth, int port, unsigned int parm) +{ + unsigned int config = output_parms[parm].get(gth, port); + unsigned int mask = output_parms[parm].mask; + unsigned int shift = __ffs(mask); + + config &= mask; + config >>= shift; + return config; +} + +/* + * Reset outputs and sources + */ +static int intel_th_gth_reset(struct gth_device *gth) +{ + u32 scratchpad; + int port, i; + + scratchpad = ioread32(gth->base + REG_GTH_SCRPD0); + if (scratchpad & SCRPD_DEBUGGER_IN_USE) + return -EBUSY; + + /* output ports */ + for (port = 0; port < 8; port++) { + if (gth_output_parm_get(gth, port, TH_OUTPUT_PARM(port)) == + GTH_NONE) + continue; + + gth_output_set(gth, port, 0); + gth_smcfreq_set(gth, port, 16); + } + /* disable overrides */ + iowrite32(0, gth->base + REG_GTH_DESTOVR); + + /* masters swdest_0~31 and gswdest */ + for (i = 0; i < 33; i++) + iowrite32(0, gth->base + REG_GTH_SWDEST0 + i * 4); + + /* sources */ + iowrite32(0, gth->base + REG_GTH_SCR); + iowrite32(0xfc, gth->base + REG_GTH_SCR2); + + return 0; +} + +/* + * "outputs" attribute group + */ + +static ssize_t output_attr_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct output_attribute *oa = + container_of(attr, struct output_attribute, attr); + struct gth_device *gth = oa->gth; + size_t count; + + spin_lock(>h->gth_lock); + count = snprintf(buf, PAGE_SIZE, "%x\n", + gth_output_parm_get(gth, oa->port, oa->parm)); + spin_unlock(>h->gth_lock); + + return count; +} + +static ssize_t output_attr_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct output_attribute *oa = + container_of(attr, struct output_attribute, attr); + struct gth_device *gth = oa->gth; + unsigned int config; + + if (kstrtouint(buf, 16, &config) < 0) + return -EINVAL; + + spin_lock(>h->gth_lock); + gth_output_parm_set(gth, oa->port, oa->parm, config); + spin_unlock(>h->gth_lock); + + return count; +} + +static int intel_th_master_attributes(struct gth_device *gth) +{ + struct master_attribute *master_attrs; + struct attribute **attrs; + int i, nattrs = TH_CONFIGURABLE_MASTERS + 2; + + attrs = devm_kcalloc(gth->dev, nattrs, sizeof(void *), GFP_KERNEL); + if (!attrs) + return -ENOMEM; + + master_attrs = devm_kcalloc(gth->dev, nattrs, + sizeof(struct master_attribute), + GFP_KERNEL); + if (!master_attrs) + return -ENOMEM; + + for (i = 0; i < TH_CONFIGURABLE_MASTERS + 1; i++) { + char *name; + + name = devm_kasprintf(gth->dev, GFP_KERNEL, "%d%s", i, + i == TH_CONFIGURABLE_MASTERS ? "+" : ""); + if (!name) + return -ENOMEM; + + master_attrs[i].attr.attr.name = name; + master_attrs[i].attr.attr.mode = S_IRUGO | S_IWUSR; + master_attrs[i].attr.show = master_attr_show; + master_attrs[i].attr.store = master_attr_store; + + sysfs_attr_init(&master_attrs[i].attr.attr); + attrs[i] = &master_attrs[i].attr.attr; + + master_attrs[i].gth = gth; + master_attrs[i].master = i; + } + + gth->master_group.name = "masters"; + gth->master_group.attrs = attrs; + + return sysfs_create_group(>h->dev->kobj, >h->master_group); +} + +static int intel_th_output_attributes(struct gth_device *gth) +{ + struct output_attribute *out_attrs; + struct attribute **attrs; + int i, j, nouts = TH_POSSIBLE_OUTPUTS; + int nparms = ARRAY_SIZE(output_parms); + int nattrs = nouts * nparms + 1; + + attrs = devm_kcalloc(gth->dev, nattrs, sizeof(void *), GFP_KERNEL); + if (!attrs) + return -ENOMEM; + + out_attrs = devm_kcalloc(gth->dev, nattrs, + sizeof(struct output_attribute), + GFP_KERNEL); + if (!out_attrs) + return -ENOMEM; + + for (i = 0; i < nouts; i++) { + for (j = 0; j < nparms; j++) { + unsigned int idx = i * nparms + j; + char *name; + + name = devm_kasprintf(gth->dev, GFP_KERNEL, "%d_%s", i, + output_parms[j].name); + if (!name) + return -ENOMEM; + + out_attrs[idx].attr.attr.name = name; + + if (output_parms[j].readable) { + out_attrs[idx].attr.attr.mode |= S_IRUGO; + out_attrs[idx].attr.show = output_attr_show; + } + + if (output_parms[j].writable) { + out_attrs[idx].attr.attr.mode |= S_IWUSR; + out_attrs[idx].attr.store = output_attr_store; + } + + sysfs_attr_init(&out_attrs[idx].attr.attr); + attrs[idx] = &out_attrs[idx].attr.attr; + + out_attrs[idx].gth = gth; + out_attrs[idx].port = i; + out_attrs[idx].parm = j; + } + } + + gth->output_group.name = "outputs"; + gth->output_group.attrs = attrs; + + return sysfs_create_group(>h->dev->kobj, >h->output_group); +} + +/** + * intel_th_gth_disable() - enable tracing to an output device + * @thdev: GTH device + * @output: output device's descriptor + * + * This will deconfigure all masters set to output to this device, + * disable tracing using force storeEn off signal and wait for the + * "pipeline empty" bit for corresponding output port. + */ +static void intel_th_gth_disable(struct intel_th_device *thdev, + struct intel_th_output *output) +{ + struct gth_device *gth = dev_get_drvdata(&thdev->dev); + unsigned long count; + int master; + u32 reg; + + spin_lock(>h->gth_lock); + output->active = false; + + for_each_set_bit(master, gth->output[output->port].master, + TH_CONFIGURABLE_MASTERS) { + gth_master_set(gth, master, -1); + } + spin_unlock(>h->gth_lock); + + iowrite32(0, gth->base + REG_GTH_SCR); + iowrite32(0xfd, gth->base + REG_GTH_SCR2); + + /* wait on pipeline empty for the given port */ + for (reg = 0, count = GTH_PLE_WAITLOOP_DEPTH; + count && !(reg & BIT(output->port)); count--) { + reg = ioread32(gth->base + REG_GTH_STAT); + cpu_relax(); + } + + /* clear force capture done for next captures */ + iowrite32(0xfc, gth->base + REG_GTH_SCR2); + + if (!count) + dev_dbg(&thdev->dev, "timeout waiting for GTH[%d] PLE\n", + output->port); +} + +/** + * intel_th_gth_enable() - enable tracing to an output device + * @thdev: GTH device + * @output: output device's descriptor + * + * This will configure all masters set to output to this device and + * enable tracing using force storeEn signal. + */ +static void intel_th_gth_enable(struct intel_th_device *thdev, + struct intel_th_output *output) +{ + struct gth_device *gth = dev_get_drvdata(&thdev->dev); + u32 scr = 0xfc0000; + int master; + + spin_lock(>h->gth_lock); + for_each_set_bit(master, gth->output[output->port].master, + TH_CONFIGURABLE_MASTERS + 1) { + gth_master_set(gth, master, output->port); + } + + if (output->multiblock) + scr |= 0xff; + + output->active = true; + spin_unlock(>h->gth_lock); + + iowrite32(scr, gth->base + REG_GTH_SCR); + iowrite32(0, gth->base + REG_GTH_SCR2); +} + +/** + * intel_th_gth_assign() - assign output device to a GTH output port + * @thdev: GTH device + * @othdev: output device + * + * This will match a given output device parameters against present + * output ports on the GTH and fill out relevant bits in output device's + * descriptor. + * + * Return: 0 on success, -errno on error. + */ +static int intel_th_gth_assign(struct intel_th_device *thdev, + struct intel_th_device *othdev) +{ + struct gth_device *gth = dev_get_drvdata(&thdev->dev); + int i, id; + + if (othdev->type != INTEL_TH_OUTPUT) + return -EINVAL; + + for (i = 0, id = 0; i < TH_POSSIBLE_OUTPUTS; i++) { + if (gth->output[i].port_type != othdev->output.type) + continue; + + if (othdev->id == -1 || othdev->id == id) + goto found; + + id++; + } + + return -ENOENT; + +found: + spin_lock(>h->gth_lock); + othdev->output.port = i; + othdev->output.active = false; + gth->output[i].output = &othdev->output; + spin_unlock(>h->gth_lock); + + return 0; +} + +/** + * intel_th_gth_unassign() - deassociate an output device from its output port + * @thdev: GTH device + * @othdev: output device + */ +static void intel_th_gth_unassign(struct intel_th_device *thdev, + struct intel_th_device *othdev) +{ + struct gth_device *gth = dev_get_drvdata(&thdev->dev); + int port = othdev->output.port; + + spin_lock(>h->gth_lock); + othdev->output.port = -1; + othdev->output.active = false; + gth->output[port].output = NULL; + spin_unlock(>h->gth_lock); +} + +static int +intel_th_gth_set_output(struct intel_th_device *thdev, unsigned int master) +{ + struct gth_device *gth = dev_get_drvdata(&thdev->dev); + int port = 0; /* FIXME: make default output configurable */ + + /* + * everything above TH_CONFIGURABLE_MASTERS is controlled by the + * same register + */ + if (master > TH_CONFIGURABLE_MASTERS) + master = TH_CONFIGURABLE_MASTERS; + + spin_lock(>h->gth_lock); + if (gth->master[master] == -1) { + set_bit(master, gth->output[port].master); + gth->master[master] = port; + } + spin_unlock(>h->gth_lock); + + return 0; +} + +static int intel_th_gth_probe(struct intel_th_device *thdev) +{ + struct device *dev = &thdev->dev; + struct gth_device *gth; + struct resource *res; + void __iomem *base; + int i, ret; + + res = intel_th_device_get_resource(thdev, IORESOURCE_MEM, 0); + if (!res) + return -ENODEV; + + base = devm_ioremap(dev, res->start, resource_size(res)); + if (IS_ERR(base)) + return PTR_ERR(base); + + gth = devm_kzalloc(dev, sizeof(*gth), GFP_KERNEL); + if (!gth) + return -ENOMEM; + + gth->dev = dev; + gth->base = base; + spin_lock_init(>h->gth_lock); + + ret = intel_th_gth_reset(gth); + if (ret) + return ret; + + for (i = 0; i < TH_CONFIGURABLE_MASTERS + 1; i++) + gth->master[i] = -1; + + for (i = 0; i < TH_POSSIBLE_OUTPUTS; i++) { + gth->output[i].gth = gth; + gth->output[i].index = i; + gth->output[i].port_type = + gth_output_parm_get(gth, i, TH_OUTPUT_PARM(port)); + } + + if (intel_th_output_attributes(gth) || + intel_th_master_attributes(gth)) { + pr_warn("Can't initialize sysfs attributes\n"); + + if (gth->output_group.attrs) + sysfs_remove_group(>h->dev->kobj, >h->output_group); + return -ENOMEM; + } + + dev_set_drvdata(dev, gth); + + return 0; +} + +static void intel_th_gth_remove(struct intel_th_device *thdev) +{ + struct gth_device *gth = dev_get_drvdata(&thdev->dev); + + sysfs_remove_group(>h->dev->kobj, >h->output_group); + sysfs_remove_group(>h->dev->kobj, >h->master_group); +} + +static struct intel_th_driver intel_th_gth_driver = { + .probe = intel_th_gth_probe, + .remove = intel_th_gth_remove, + .assign = intel_th_gth_assign, + .unassign = intel_th_gth_unassign, + .set_output = intel_th_gth_set_output, + .enable = intel_th_gth_enable, + .disable = intel_th_gth_disable, + .driver = { + .name = "gth", + .owner = THIS_MODULE, + }, +}; + +module_driver(intel_th_gth_driver, + intel_th_driver_register, + intel_th_driver_unregister); + +MODULE_ALIAS("intel_th_switch"); +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("Intel(R) Trace Hub Global Trace Hub driver"); +MODULE_AUTHOR("Alexander Shishkin <alexander.shishkin@linux.intel.com>"); diff --git a/drivers/hwtracing/intel_th/gth.h b/drivers/hwtracing/intel_th/gth.h new file mode 100644 index 000000000000..3b714b7a61db --- /dev/null +++ b/drivers/hwtracing/intel_th/gth.h @@ -0,0 +1,66 @@ +/* + * Intel(R) Trace Hub Global Trace Hub (GTH) data structures + * + * Copyright (C) 2014-2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef __INTEL_TH_GTH_H__ +#define __INTEL_TH_GTH_H__ + +/* Map output port parameter bits to symbolic names */ +#define TH_OUTPUT_PARM(name) \ + TH_OUTPUT_ ## name + +enum intel_th_output_parm { + /* output port type */ + TH_OUTPUT_PARM(port), + /* generate NULL packet */ + TH_OUTPUT_PARM(null), + /* packet drop */ + TH_OUTPUT_PARM(drop), + /* port in reset state */ + TH_OUTPUT_PARM(reset), + /* flush out data */ + TH_OUTPUT_PARM(flush), + /* mainenance packet frequency */ + TH_OUTPUT_PARM(smcfreq), +}; + +/* + * Register offsets + */ +enum { + REG_GTH_GTHOPT0 = 0x00, /* Output ports 0..3 config */ + REG_GTH_GTHOPT1 = 0x04, /* Output ports 4..7 config */ + REG_GTH_SWDEST0 = 0x08, /* Switching destination masters 0..7 */ + REG_GTH_GSWTDEST = 0x88, /* Global sw trace destination */ + REG_GTH_SMCR0 = 0x9c, /* STP mainenance for ports 0/1 */ + REG_GTH_SMCR1 = 0xa0, /* STP mainenance for ports 2/3 */ + REG_GTH_SMCR2 = 0xa4, /* STP mainenance for ports 4/5 */ + REG_GTH_SMCR3 = 0xa8, /* STP mainenance for ports 6/7 */ + REG_GTH_SCR = 0xc8, /* Source control (storeEn override) */ + REG_GTH_STAT = 0xd4, /* GTH status */ + REG_GTH_SCR2 = 0xd8, /* Source control (force storeEn off) */ + REG_GTH_DESTOVR = 0xdc, /* Destination override */ + REG_GTH_SCRPD0 = 0xe0, /* ScratchPad[0] */ + REG_GTH_SCRPD1 = 0xe4, /* ScratchPad[1] */ + REG_GTH_SCRPD2 = 0xe8, /* ScratchPad[2] */ + REG_GTH_SCRPD3 = 0xec, /* ScratchPad[3] */ +}; + +/* Externall debugger is using Intel TH */ +#define SCRPD_DEBUGGER_IN_USE BIT(24) + +/* waiting for Pipeline Empty bit(s) to assert for GTH */ +#define GTH_PLE_WAITLOOP_DEPTH 10000 + +#endif /* __INTEL_TH_GTH_H__ */ diff --git a/drivers/hwtracing/intel_th/intel_th.h b/drivers/hwtracing/intel_th/intel_th.h new file mode 100644 index 000000000000..57fd72b20fae --- /dev/null +++ b/drivers/hwtracing/intel_th/intel_th.h @@ -0,0 +1,244 @@ +/* + * Intel(R) Trace Hub data structures + * + * Copyright (C) 2014-2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef __INTEL_TH_H__ +#define __INTEL_TH_H__ + +/* intel_th_device device types */ +enum { + /* Devices that generate trace data */ + INTEL_TH_SOURCE = 0, + /* Output ports (MSC, PTI) */ + INTEL_TH_OUTPUT, + /* Switch, the Global Trace Hub (GTH) */ + INTEL_TH_SWITCH, +}; + +/** + * struct intel_th_output - descriptor INTEL_TH_OUTPUT type devices + * @port: output port number, assigned by the switch + * @type: GTH_{MSU,CTP,PTI} + * @multiblock: true for multiblock output configuration + * @active: true when this output is enabled + * + * Output port descriptor, used by switch driver to tell which output + * port this output device corresponds to. Filled in at output device's + * probe time by switch::assign(). Passed from output device driver to + * switch related code to enable/disable its port. + */ +struct intel_th_output { + int port; + unsigned int type; + bool multiblock; + bool active; +}; + +/** + * struct intel_th_device - device on the intel_th bus + * @dev: device + * @resource: array of resources available to this device + * @num_resources: number of resources in @resource array + * @type: INTEL_TH_{SOURCE,OUTPUT,SWITCH} + * @id: device instance or -1 + * @output: output descriptor for INTEL_TH_OUTPUT devices + * @name: device name to match the driver + */ +struct intel_th_device { + struct device dev; + struct resource *resource; + unsigned int num_resources; + unsigned int type; + int id; + + /* INTEL_TH_OUTPUT specific */ + struct intel_th_output output; + + char name[]; +}; + +#define to_intel_th_device(_d) \ + container_of((_d), struct intel_th_device, dev) + +/** + * intel_th_device_get_resource() - obtain @num'th resource of type @type + * @thdev: the device to search the resource for + * @type: resource type + * @num: number of the resource + */ +static inline struct resource * +intel_th_device_get_resource(struct intel_th_device *thdev, unsigned int type, + unsigned int num) +{ + int i; + + for (i = 0; i < thdev->num_resources; i++) + if (resource_type(&thdev->resource[i]) == type && !num--) + return &thdev->resource[i]; + + return NULL; +} + +/** + * intel_th_output_assigned() - if an output device is assigned to a switch port + * @thdev: the output device + * + * Return: true if the device is INTEL_TH_OUTPUT *and* is assigned a port + */ +static inline bool +intel_th_output_assigned(struct intel_th_device *thdev) +{ + return thdev->type == INTEL_TH_OUTPUT && + thdev->output.port >= 0; +} + +/** + * struct intel_th_driver - driver for an intel_th_device device + * @driver: generic driver + * @probe: probe method + * @remove: remove method + * @assign: match a given output type device against available outputs + * @unassign: deassociate an output type device from an output port + * @enable: enable tracing for a given output device + * @disable: disable tracing for a given output device + * @fops: file operations for device nodes + * + * Callbacks @probe and @remove are required for all device types. + * Switch device driver needs to fill in @assign, @enable and @disable + * callbacks. + */ +struct intel_th_driver { + struct device_driver driver; + int (*probe)(struct intel_th_device *thdev); + void (*remove)(struct intel_th_device *thdev); + /* switch (GTH) ops */ + int (*assign)(struct intel_th_device *thdev, + struct intel_th_device *othdev); + void (*unassign)(struct intel_th_device *thdev, + struct intel_th_device *othdev); + void (*enable)(struct intel_th_device *thdev, + struct intel_th_output *output); + void (*disable)(struct intel_th_device *thdev, + struct intel_th_output *output); + /* output ops */ + void (*irq)(struct intel_th_device *thdev); + int (*activate)(struct intel_th_device *thdev); + void (*deactivate)(struct intel_th_device *thdev); + /* file_operations for those who want a device node */ + const struct file_operations *fops; + + /* source ops */ + int (*set_output)(struct intel_th_device *thdev, + unsigned int master); +}; + +#define to_intel_th_driver(_d) \ + container_of((_d), struct intel_th_driver, driver) + +static inline struct intel_th_device * +to_intel_th_hub(struct intel_th_device *thdev) +{ + struct device *parent = thdev->dev.parent; + + if (!parent) + return NULL; + + return to_intel_th_device(parent); +} + +struct intel_th * +intel_th_alloc(struct device *dev, struct resource *devres, + unsigned int ndevres, int irq); +void intel_th_free(struct intel_th *th); + +int intel_th_driver_register(struct intel_th_driver *thdrv); +void intel_th_driver_unregister(struct intel_th_driver *thdrv); + +int intel_th_trace_enable(struct intel_th_device *thdev); +int intel_th_trace_disable(struct intel_th_device *thdev); +int intel_th_set_output(struct intel_th_device *thdev, + unsigned int master); + +enum { + TH_MMIO_CONFIG = 0, + TH_MMIO_SW = 2, + TH_MMIO_END, +}; + +#define TH_SUBDEVICE_MAX 6 +#define TH_POSSIBLE_OUTPUTS 8 +#define TH_CONFIGURABLE_MASTERS 256 +#define TH_MSC_MAX 2 + +/** + * struct intel_th - Intel TH controller + * @dev: driver core's device + * @thdev: subdevices + * @hub: "switch" subdevice (GTH) + * @id: this Intel TH controller's device ID in the system + * @major: device node major for output devices + */ +struct intel_th { + struct device *dev; + + struct intel_th_device *thdev[TH_SUBDEVICE_MAX]; + struct intel_th_device *hub; + + int id; + int major; +#ifdef CONFIG_INTEL_TH_DEBUG + struct dentry *dbg; +#endif +}; + +/* + * Register windows + */ +enum { + /* Global Trace Hub (GTH) */ + REG_GTH_OFFSET = 0x0000, + REG_GTH_LENGTH = 0x2000, + + /* Software Trace Hub (STH) [0x4000..0x4fff] */ + REG_STH_OFFSET = 0x4000, + REG_STH_LENGTH = 0x2000, + + /* Memory Storage Unit (MSU) [0xa0000..0xa1fff] */ + REG_MSU_OFFSET = 0xa0000, + REG_MSU_LENGTH = 0x02000, + + /* Internal MSU trace buffer [0x80000..0x9ffff] */ + BUF_MSU_OFFSET = 0x80000, + BUF_MSU_LENGTH = 0x20000, + + /* PTI output == same window as GTH */ + REG_PTI_OFFSET = REG_GTH_OFFSET, + REG_PTI_LENGTH = REG_GTH_LENGTH, + + /* DCI Handler (DCIH) == some window as MSU */ + REG_DCIH_OFFSET = REG_MSU_OFFSET, + REG_DCIH_LENGTH = REG_MSU_LENGTH, +}; + +/* + * GTH, output ports configuration + */ +enum { + GTH_NONE = 0, + GTH_MSU, /* memory/usb */ + GTH_CTP, /* Common Trace Port */ + GTH_PTI = 4, /* MIPI-PTI */ +}; + +#endif diff --git a/drivers/hwtracing/intel_th/msu.c b/drivers/hwtracing/intel_th/msu.c new file mode 100644 index 000000000000..80a12384ed20 --- /dev/null +++ b/drivers/hwtracing/intel_th/msu.c @@ -0,0 +1,1509 @@ +/* + * Intel(R) Trace Hub Memory Storage Unit + * + * Copyright (C) 2014-2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/types.h> +#include <linux/module.h> +#include <linux/device.h> +#include <linux/uaccess.h> +#include <linux/sizes.h> +#include <linux/printk.h> +#include <linux/slab.h> +#include <linux/mm.h> +#include <linux/fs.h> +#include <linux/io.h> +#include <linux/dma-mapping.h> + +#include <asm/cacheflush.h> + +#include "intel_th.h" +#include "msu.h" + +#define msc_dev(x) (&(x)->thdev->dev) + +/** + * struct msc_block - multiblock mode block descriptor + * @bdesc: pointer to hardware descriptor (beginning of the block) + * @addr: physical address of the block + */ +struct msc_block { + struct msc_block_desc *bdesc; + dma_addr_t addr; +}; + +/** + * struct msc_window - multiblock mode window descriptor + * @entry: window list linkage (msc::win_list) + * @pgoff: page offset into the buffer that this window starts at + * @nr_blocks: number of blocks (pages) in this window + * @block: array of block descriptors + */ +struct msc_window { + struct list_head entry; + unsigned long pgoff; + unsigned int nr_blocks; + struct msc *msc; + struct msc_block block[0]; +}; + +/** + * struct msc_iter - iterator for msc buffer + * @entry: msc::iter_list linkage + * @msc: pointer to the MSC device + * @start_win: oldest window + * @win: current window + * @offset: current logical offset into the buffer + * @start_block: oldest block in the window + * @block: block number in the window + * @block_off: offset into current block + * @wrap_count: block wrapping handling + * @eof: end of buffer reached + */ +struct msc_iter { + struct list_head entry; + struct msc *msc; + struct msc_window *start_win; + struct msc_window *win; + unsigned long offset; + int start_block; + int block; + unsigned int block_off; + unsigned int wrap_count; + unsigned int eof; +}; + +/** + * struct msc - MSC device representation + * @reg_base: register window base address + * @thdev: intel_th_device pointer + * @win_list: list of windows in multiblock mode + * @nr_pages: total number of pages allocated for this buffer + * @single_sz: amount of data in single mode + * @single_wrap: single mode wrap occurred + * @base: buffer's base pointer + * @base_addr: buffer's base address + * @user_count: number of users of the buffer + * @mmap_count: number of mappings + * @buf_mutex: mutex to serialize access to buffer-related bits + + * @enabled: MSC is enabled + * @wrap: wrapping is enabled + * @mode: MSC operating mode + * @burst_len: write burst length + * @index: number of this MSC in the MSU + */ +struct msc { + void __iomem *reg_base; + struct intel_th_device *thdev; + + struct list_head win_list; + unsigned long nr_pages; + unsigned long single_sz; + unsigned int single_wrap : 1; + void *base; + dma_addr_t base_addr; + + /* <0: no buffer, 0: no users, >0: active users */ + atomic_t user_count; + + atomic_t mmap_count; + struct mutex buf_mutex; + + struct mutex iter_mutex; + struct list_head iter_list; + + /* config */ + unsigned int enabled : 1, + wrap : 1; + unsigned int mode; + unsigned int burst_len; + unsigned int index; +}; + +static inline bool msc_block_is_empty(struct msc_block_desc *bdesc) +{ + /* header hasn't been written */ + if (!bdesc->valid_dw) + return true; + + /* valid_dw includes the header */ + if (!msc_data_sz(bdesc)) + return true; + + return false; +} + +/** + * msc_oldest_window() - locate the window with oldest data + * @msc: MSC device + * + * This should only be used in multiblock mode. Caller should hold the + * msc::user_count reference. + * + * Return: the oldest window with valid data + */ +static struct msc_window *msc_oldest_window(struct msc *msc) +{ + struct msc_window *win; + u32 reg = ioread32(msc->reg_base + REG_MSU_MSC0NWSA); + unsigned long win_addr = (unsigned long)reg << PAGE_SHIFT; + unsigned int found = 0; + + if (list_empty(&msc->win_list)) + return NULL; + + /* + * we might need a radix tree for this, depending on how + * many windows a typical user would allocate; ideally it's + * something like 2, in which case we're good + */ + list_for_each_entry(win, &msc->win_list, entry) { + if (win->block[0].addr == win_addr) + found++; + + /* skip the empty ones */ + if (msc_block_is_empty(win->block[0].bdesc)) + continue; + + if (found) + return win; + } + + return list_entry(msc->win_list.next, struct msc_window, entry); +} + +/** + * msc_win_oldest_block() - locate the oldest block in a given window + * @win: window to look at + * + * Return: index of the block with the oldest data + */ +static unsigned int msc_win_oldest_block(struct msc_window *win) +{ + unsigned int blk; + struct msc_block_desc *bdesc = win->block[0].bdesc; + + /* without wrapping, first block is the oldest */ + if (!msc_block_wrapped(bdesc)) + return 0; + + /* + * with wrapping, last written block contains both the newest and the + * oldest data for this window. + */ + for (blk = 0; blk < win->nr_blocks; blk++) { + bdesc = win->block[blk].bdesc; + + if (msc_block_last_written(bdesc)) + return blk; + } + + return 0; +} + +/** + * msc_is_last_win() - check if a window is the last one for a given MSC + * @win: window + * Return: true if @win is the last window in MSC's multiblock buffer + */ +static inline bool msc_is_last_win(struct msc_window *win) +{ + return win->entry.next == &win->msc->win_list; +} + +/** + * msc_next_window() - return next window in the multiblock buffer + * @win: current window + * + * Return: window following the current one + */ +static struct msc_window *msc_next_window(struct msc_window *win) +{ + if (msc_is_last_win(win)) + return list_entry(win->msc->win_list.next, struct msc_window, + entry); + + return list_entry(win->entry.next, struct msc_window, entry); +} + +static struct msc_block_desc *msc_iter_bdesc(struct msc_iter *iter) +{ + return iter->win->block[iter->block].bdesc; +} + +static void msc_iter_init(struct msc_iter *iter) +{ + memset(iter, 0, sizeof(*iter)); + iter->start_block = -1; + iter->block = -1; +} + +static struct msc_iter *msc_iter_install(struct msc *msc) +{ + struct msc_iter *iter; + + iter = kzalloc(sizeof(*iter), GFP_KERNEL); + if (!iter) + return NULL; + + msc_iter_init(iter); + iter->msc = msc; + + mutex_lock(&msc->iter_mutex); + list_add_tail(&iter->entry, &msc->iter_list); + mutex_unlock(&msc->iter_mutex); + + return iter; +} + +static void msc_iter_remove(struct msc_iter *iter, struct msc *msc) +{ + mutex_lock(&msc->iter_mutex); + list_del(&iter->entry); + mutex_unlock(&msc->iter_mutex); + + kfree(iter); +} + +static void msc_iter_block_start(struct msc_iter *iter) +{ + if (iter->start_block != -1) + return; + + iter->start_block = msc_win_oldest_block(iter->win); + iter->block = iter->start_block; + iter->wrap_count = 0; + + /* + * start with the block with oldest data; if data has wrapped + * in this window, it should be in this block + */ + if (msc_block_wrapped(msc_iter_bdesc(iter))) + iter->wrap_count = 2; + +} + +static int msc_iter_win_start(struct msc_iter *iter, struct msc *msc) +{ + /* already started, nothing to do */ + if (iter->start_win) + return 0; + + iter->start_win = msc_oldest_window(msc); + if (!iter->start_win) + return -EINVAL; + + iter->win = iter->start_win; + iter->start_block = -1; + + msc_iter_block_start(iter); + + return 0; +} + +static int msc_iter_win_advance(struct msc_iter *iter) +{ + iter->win = msc_next_window(iter->win); + iter->start_block = -1; + + if (iter->win == iter->start_win) { + iter->eof++; + return 1; + } + + msc_iter_block_start(iter); + + return 0; +} + +static int msc_iter_block_advance(struct msc_iter *iter) +{ + iter->block_off = 0; + + /* wrapping */ + if (iter->wrap_count && iter->block == iter->start_block) { + iter->wrap_count--; + if (!iter->wrap_count) + /* copied newest data from the wrapped block */ + return msc_iter_win_advance(iter); + } + + /* no wrapping, check for last written block */ + if (!iter->wrap_count && msc_block_last_written(msc_iter_bdesc(iter))) + /* copied newest data for the window */ + return msc_iter_win_advance(iter); + + /* block advance */ + if (++iter->block == iter->win->nr_blocks) + iter->block = 0; + + /* no wrapping, sanity check in case there is no last written block */ + if (!iter->wrap_count && iter->block == iter->start_block) + return msc_iter_win_advance(iter); + + return 0; +} + +/** + * msc_buffer_iterate() - go through multiblock buffer's data + * @iter: iterator structure + * @size: amount of data to scan + * @data: callback's private data + * @fn: iterator callback + * + * This will start at the window which will be written to next (containing + * the oldest data) and work its way to the current window, calling @fn + * for each chunk of data as it goes. + * + * Caller should have msc::user_count reference to make sure the buffer + * doesn't disappear from under us. + * + * Return: amount of data actually scanned. + */ +static ssize_t +msc_buffer_iterate(struct msc_iter *iter, size_t size, void *data, + unsigned long (*fn)(void *, void *, size_t)) +{ + struct msc *msc = iter->msc; + size_t len = size; + unsigned int advance; + + if (iter->eof) + return 0; + + /* start with the oldest window */ + if (msc_iter_win_start(iter, msc)) + return 0; + + do { + unsigned long data_bytes = msc_data_sz(msc_iter_bdesc(iter)); + void *src = (void *)msc_iter_bdesc(iter) + MSC_BDESC; + size_t tocopy = data_bytes, copied = 0; + size_t remaining = 0; + + advance = 1; + + /* + * If block wrapping happened, we need to visit the last block + * twice, because it contains both the oldest and the newest + * data in this window. + * + * First time (wrap_count==2), in the very beginning, to collect + * the oldest data, which is in the range + * (data_bytes..DATA_IN_PAGE). + * + * Second time (wrap_count==1), it's just like any other block, + * containing data in the range of [MSC_BDESC..data_bytes]. + */ + if (iter->block == iter->start_block && iter->wrap_count) { + tocopy = DATA_IN_PAGE - data_bytes; + src += data_bytes; + } + + if (!tocopy) + goto next_block; + + tocopy -= iter->block_off; + src += iter->block_off; + + if (len < tocopy) { + tocopy = len; + advance = 0; + } + + remaining = fn(data, src, tocopy); + + if (remaining) + advance = 0; + + copied = tocopy - remaining; + len -= copied; + iter->block_off += copied; + iter->offset += copied; + + if (!advance) + break; + +next_block: + if (msc_iter_block_advance(iter)) + break; + + } while (len); + + return size - len; +} + +/** + * msc_buffer_clear_hw_header() - clear hw header for multiblock + * @msc: MSC device + */ +static void msc_buffer_clear_hw_header(struct msc *msc) +{ + struct msc_window *win; + + mutex_lock(&msc->buf_mutex); + list_for_each_entry(win, &msc->win_list, entry) { + unsigned int blk; + size_t hw_sz = sizeof(struct msc_block_desc) - + offsetof(struct msc_block_desc, hw_tag); + + for (blk = 0; blk < win->nr_blocks; blk++) { + struct msc_block_desc *bdesc = win->block[blk].bdesc; + + memset(&bdesc->hw_tag, 0, hw_sz); + } + } + mutex_unlock(&msc->buf_mutex); +} + +/** + * msc_configure() - set up MSC hardware + * @msc: the MSC device to configure + * + * Program storage mode, wrapping, burst length and trace buffer address + * into a given MSC. If msc::enabled is set, enable the trace, too. + */ +static int msc_configure(struct msc *msc) +{ + u32 reg; + + if (msc->mode > MSC_MODE_MULTI) + return -ENOTSUPP; + + if (msc->mode == MSC_MODE_MULTI) + msc_buffer_clear_hw_header(msc); + + reg = msc->base_addr >> PAGE_SHIFT; + iowrite32(reg, msc->reg_base + REG_MSU_MSC0BAR); + + if (msc->mode == MSC_MODE_SINGLE) { + reg = msc->nr_pages; + iowrite32(reg, msc->reg_base + REG_MSU_MSC0SIZE); + } + + reg = ioread32(msc->reg_base + REG_MSU_MSC0CTL); + reg &= ~(MSC_MODE | MSC_WRAPEN | MSC_EN | MSC_RD_HDR_OVRD); + + reg |= msc->mode << __ffs(MSC_MODE); + reg |= msc->burst_len << __ffs(MSC_LEN); + /*if (msc->mode == MSC_MODE_MULTI) + reg |= MSC_RD_HDR_OVRD; */ + if (msc->wrap) + reg |= MSC_WRAPEN; + if (msc->enabled) + reg |= MSC_EN; + + iowrite32(reg, msc->reg_base + REG_MSU_MSC0CTL); + + if (msc->enabled) { + msc->thdev->output.multiblock = msc->mode == MSC_MODE_MULTI; + intel_th_trace_enable(msc->thdev); + } + + return 0; +} + +/** + * msc_disable() - disable MSC hardware + * @msc: MSC device to disable + * + * If @msc is enabled, disable tracing on the switch and then disable MSC + * storage. + */ +static void msc_disable(struct msc *msc) +{ + unsigned long count; + u32 reg; + + if (!msc->enabled) + return; + + intel_th_trace_disable(msc->thdev); + + for (reg = 0, count = MSC_PLE_WAITLOOP_DEPTH; + count && !(reg & MSCSTS_PLE); count--) { + reg = ioread32(msc->reg_base + REG_MSU_MSC0STS); + cpu_relax(); + } + + if (!count) + dev_dbg(msc_dev(msc), "timeout waiting for MSC0 PLE\n"); + + if (msc->mode == MSC_MODE_SINGLE) { + msc->single_wrap = !!(reg & MSCSTS_WRAPSTAT); + + reg = ioread32(msc->reg_base + REG_MSU_MSC0MWP); + msc->single_sz = reg & ((msc->nr_pages << PAGE_SHIFT) - 1); + dev_dbg(msc_dev(msc), "MSCnMWP: %08x/%08lx, wrap: %d\n", + reg, msc->single_sz, msc->single_wrap); + } + + reg = ioread32(msc->reg_base + REG_MSU_MSC0CTL); + reg &= ~MSC_EN; + iowrite32(reg, msc->reg_base + REG_MSU_MSC0CTL); + msc->enabled = 0; + + iowrite32(0, msc->reg_base + REG_MSU_MSC0BAR); + iowrite32(0, msc->reg_base + REG_MSU_MSC0SIZE); + + dev_dbg(msc_dev(msc), "MSCnNWSA: %08x\n", + ioread32(msc->reg_base + REG_MSU_MSC0NWSA)); + + reg = ioread32(msc->reg_base + REG_MSU_MSC0STS); + dev_dbg(msc_dev(msc), "MSCnSTS: %08x\n", reg); +} + +static int intel_th_msc_activate(struct intel_th_device *thdev) +{ + struct msc *msc = dev_get_drvdata(&thdev->dev); + int ret = 0; + + if (!atomic_inc_unless_negative(&msc->user_count)) + return -ENODEV; + + mutex_lock(&msc->iter_mutex); + if (!list_empty(&msc->iter_list)) + ret = -EBUSY; + mutex_unlock(&msc->iter_mutex); + + if (ret) { + atomic_dec(&msc->user_count); + return ret; + } + + msc->enabled = 1; + + return msc_configure(msc); +} + +static void intel_th_msc_deactivate(struct intel_th_device *thdev) +{ + struct msc *msc = dev_get_drvdata(&thdev->dev); + + msc_disable(msc); + + atomic_dec(&msc->user_count); +} + +/** + * msc_buffer_contig_alloc() - allocate a contiguous buffer for SINGLE mode + * @msc: MSC device + * @size: allocation size in bytes + * + * This modifies msc::base, which requires msc::buf_mutex to serialize, so the + * caller is expected to hold it. + * + * Return: 0 on success, -errno otherwise. + */ +static int msc_buffer_contig_alloc(struct msc *msc, unsigned long size) +{ + unsigned int order = get_order(size); + struct page *page; + + if (!size) + return 0; + + page = alloc_pages(GFP_KERNEL | __GFP_ZERO, order); + if (!page) + return -ENOMEM; + + split_page(page, order); + msc->nr_pages = size >> PAGE_SHIFT; + msc->base = page_address(page); + msc->base_addr = page_to_phys(page); + + return 0; +} + +/** + * msc_buffer_contig_free() - free a contiguous buffer + * @msc: MSC configured in SINGLE mode + */ +static void msc_buffer_contig_free(struct msc *msc) +{ + unsigned long off; + + for (off = 0; off < msc->nr_pages << PAGE_SHIFT; off += PAGE_SIZE) { + struct page *page = virt_to_page(msc->base + off); + + page->mapping = NULL; + __free_page(page); + } + + msc->nr_pages = 0; +} + +/** + * msc_buffer_contig_get_page() - find a page at a given offset + * @msc: MSC configured in SINGLE mode + * @pgoff: page offset + * + * Return: page, if @pgoff is within the range, NULL otherwise. + */ +static struct page *msc_buffer_contig_get_page(struct msc *msc, + unsigned long pgoff) +{ + if (pgoff >= msc->nr_pages) + return NULL; + + return virt_to_page(msc->base + (pgoff << PAGE_SHIFT)); +} + +/** + * msc_buffer_win_alloc() - alloc a window for a multiblock mode + * @msc: MSC device + * @nr_blocks: number of pages in this window + * + * This modifies msc::win_list and msc::base, which requires msc::buf_mutex + * to serialize, so the caller is expected to hold it. + * + * Return: 0 on success, -errno otherwise. + */ +static int msc_buffer_win_alloc(struct msc *msc, unsigned int nr_blocks) +{ + struct msc_window *win; + unsigned long size = PAGE_SIZE; + int i, ret = -ENOMEM; + + if (!nr_blocks) + return 0; + + win = kzalloc(offsetof(struct msc_window, block[nr_blocks]), + GFP_KERNEL); + if (!win) + return -ENOMEM; + + if (!list_empty(&msc->win_list)) { + struct msc_window *prev = list_entry(msc->win_list.prev, + struct msc_window, entry); + + win->pgoff = prev->pgoff + prev->nr_blocks; + } + + for (i = 0; i < nr_blocks; i++) { + win->block[i].bdesc = dma_alloc_coherent(msc_dev(msc), size, + &win->block[i].addr, + GFP_KERNEL); + +#ifdef CONFIG_X86 + /* Set the page as uncached */ + set_memory_uc((unsigned long)win->block[i].bdesc, 1); +#endif + + if (!win->block[i].bdesc) + goto err_nomem; + } + + win->msc = msc; + win->nr_blocks = nr_blocks; + + if (list_empty(&msc->win_list)) { + msc->base = win->block[0].bdesc; + msc->base_addr = win->block[0].addr; + } + + list_add_tail(&win->entry, &msc->win_list); + msc->nr_pages += nr_blocks; + + return 0; + +err_nomem: + for (i--; i >= 0; i--) { +#ifdef CONFIG_X86 + /* Reset the page to write-back before releasing */ + set_memory_wb((unsigned long)win->block[i].bdesc, 1); +#endif + dma_free_coherent(msc_dev(msc), size, win->block[i].bdesc, + win->block[i].addr); + } + kfree(win); + + return ret; +} + +/** + * msc_buffer_win_free() - free a window from MSC's window list + * @msc: MSC device + * @win: window to free + * + * This modifies msc::win_list and msc::base, which requires msc::buf_mutex + * to serialize, so the caller is expected to hold it. + */ +static void msc_buffer_win_free(struct msc *msc, struct msc_window *win) +{ + int i; + + msc->nr_pages -= win->nr_blocks; + + list_del(&win->entry); + if (list_empty(&msc->win_list)) { + msc->base = NULL; + msc->base_addr = 0; + } + + for (i = 0; i < win->nr_blocks; i++) { + struct page *page = virt_to_page(win->block[i].bdesc); + + page->mapping = NULL; +#ifdef CONFIG_X86 + /* Reset the page to write-back before releasing */ + set_memory_wb((unsigned long)win->block[i].bdesc, 1); +#endif + dma_free_coherent(msc_dev(win->msc), PAGE_SIZE, + win->block[i].bdesc, win->block[i].addr); + } + + kfree(win); +} + +/** + * msc_buffer_relink() - set up block descriptors for multiblock mode + * @msc: MSC device + * + * This traverses msc::win_list, which requires msc::buf_mutex to serialize, + * so the caller is expected to hold it. + */ +static void msc_buffer_relink(struct msc *msc) +{ + struct msc_window *win, *next_win; + + /* call with msc::mutex locked */ + list_for_each_entry(win, &msc->win_list, entry) { + unsigned int blk; + u32 sw_tag = 0; + + /* + * Last window's next_win should point to the first window + * and MSC_SW_TAG_LASTWIN should be set. + */ + if (msc_is_last_win(win)) { + sw_tag |= MSC_SW_TAG_LASTWIN; + next_win = list_entry(msc->win_list.next, + struct msc_window, entry); + } else { + next_win = list_entry(win->entry.next, + struct msc_window, entry); + } + + for (blk = 0; blk < win->nr_blocks; blk++) { + struct msc_block_desc *bdesc = win->block[blk].bdesc; + + memset(bdesc, 0, sizeof(*bdesc)); + + bdesc->next_win = next_win->block[0].addr >> PAGE_SHIFT; + + /* + * Similarly to last window, last block should point + * to the first one. + */ + if (blk == win->nr_blocks - 1) { + sw_tag |= MSC_SW_TAG_LASTBLK; + bdesc->next_blk = + win->block[0].addr >> PAGE_SHIFT; + } else { + bdesc->next_blk = + win->block[blk + 1].addr >> PAGE_SHIFT; + } + + bdesc->sw_tag = sw_tag; + bdesc->block_sz = PAGE_SIZE / 64; + } + } + + /* + * Make the above writes globally visible before tracing is + * enabled to make sure hardware sees them coherently. + */ + wmb(); +} + +static void msc_buffer_multi_free(struct msc *msc) +{ + struct msc_window *win, *iter; + + list_for_each_entry_safe(win, iter, &msc->win_list, entry) + msc_buffer_win_free(msc, win); +} + +static int msc_buffer_multi_alloc(struct msc *msc, unsigned long *nr_pages, + unsigned int nr_wins) +{ + int ret, i; + + for (i = 0; i < nr_wins; i++) { + ret = msc_buffer_win_alloc(msc, nr_pages[i]); + if (ret) { + msc_buffer_multi_free(msc); + return ret; + } + } + + msc_buffer_relink(msc); + + return 0; +} + +/** + * msc_buffer_free() - free buffers for MSC + * @msc: MSC device + * + * Free MSC's storage buffers. + * + * This modifies msc::win_list and msc::base, which requires msc::buf_mutex to + * serialize, so the caller is expected to hold it. + */ +static void msc_buffer_free(struct msc *msc) +{ + if (msc->mode == MSC_MODE_SINGLE) + msc_buffer_contig_free(msc); + else if (msc->mode == MSC_MODE_MULTI) + msc_buffer_multi_free(msc); +} + +/** + * msc_buffer_alloc() - allocate a buffer for MSC + * @msc: MSC device + * @size: allocation size in bytes + * + * Allocate a storage buffer for MSC, depending on the msc::mode, it will be + * either done via msc_buffer_contig_alloc() for SINGLE operation mode or + * msc_buffer_win_alloc() for multiblock operation. The latter allocates one + * window per invocation, so in multiblock mode this can be called multiple + * times for the same MSC to allocate multiple windows. + * + * This modifies msc::win_list and msc::base, which requires msc::buf_mutex + * to serialize, so the caller is expected to hold it. + * + * Return: 0 on success, -errno otherwise. + */ +static int msc_buffer_alloc(struct msc *msc, unsigned long *nr_pages, + unsigned int nr_wins) +{ + int ret; + + /* -1: buffer not allocated */ + if (atomic_read(&msc->user_count) != -1) + return -EBUSY; + + if (msc->mode == MSC_MODE_SINGLE) { + if (nr_wins != 1) + return -EINVAL; + + ret = msc_buffer_contig_alloc(msc, nr_pages[0] << PAGE_SHIFT); + } else if (msc->mode == MSC_MODE_MULTI) { + ret = msc_buffer_multi_alloc(msc, nr_pages, nr_wins); + } else { + ret = -ENOTSUPP; + } + + if (!ret) { + /* allocation should be visible before the counter goes to 0 */ + smp_mb__before_atomic(); + + if (WARN_ON_ONCE(atomic_cmpxchg(&msc->user_count, -1, 0) != -1)) + return -EINVAL; + } + + return ret; +} + +/** + * msc_buffer_unlocked_free_unless_used() - free a buffer unless it's in use + * @msc: MSC device + * + * This will free MSC buffer unless it is in use or there is no allocated + * buffer. + * Caller needs to hold msc::buf_mutex. + * + * Return: 0 on successful deallocation or if there was no buffer to + * deallocate, -EBUSY if there are active users. + */ +static int msc_buffer_unlocked_free_unless_used(struct msc *msc) +{ + int count, ret = 0; + + count = atomic_cmpxchg(&msc->user_count, 0, -1); + + /* > 0: buffer is allocated and has users */ + if (count > 0) + ret = -EBUSY; + /* 0: buffer is allocated, no users */ + else if (!count) + msc_buffer_free(msc); + /* < 0: no buffer, nothing to do */ + + return ret; +} + +/** + * msc_buffer_free_unless_used() - free a buffer unless it's in use + * @msc: MSC device + * + * This is a locked version of msc_buffer_unlocked_free_unless_used(). + */ +static int msc_buffer_free_unless_used(struct msc *msc) +{ + int ret; + + mutex_lock(&msc->buf_mutex); + ret = msc_buffer_unlocked_free_unless_used(msc); + mutex_unlock(&msc->buf_mutex); + + return ret; +} + +/** + * msc_buffer_get_page() - get MSC buffer page at a given offset + * @msc: MSC device + * @pgoff: page offset into the storage buffer + * + * This traverses msc::win_list, so holding msc::buf_mutex is expected from + * the caller. + * + * Return: page if @pgoff corresponds to a valid buffer page or NULL. + */ +static struct page *msc_buffer_get_page(struct msc *msc, unsigned long pgoff) +{ + struct msc_window *win; + + if (msc->mode == MSC_MODE_SINGLE) + return msc_buffer_contig_get_page(msc, pgoff); + + list_for_each_entry(win, &msc->win_list, entry) + if (pgoff >= win->pgoff && pgoff < win->pgoff + win->nr_blocks) + goto found; + + return NULL; + +found: + pgoff -= win->pgoff; + return virt_to_page(win->block[pgoff].bdesc); +} + +/** + * struct msc_win_to_user_struct - data for copy_to_user() callback + * @buf: userspace buffer to copy data to + * @offset: running offset + */ +struct msc_win_to_user_struct { + char __user *buf; + unsigned long offset; +}; + +/** + * msc_win_to_user() - iterator for msc_buffer_iterate() to copy data to user + * @data: callback's private data + * @src: source buffer + * @len: amount of data to copy from the source buffer + */ +static unsigned long msc_win_to_user(void *data, void *src, size_t len) +{ + struct msc_win_to_user_struct *u = data; + unsigned long ret; + + ret = copy_to_user(u->buf + u->offset, src, len); + u->offset += len - ret; + + return ret; +} + + +/* + * file operations' callbacks + */ + +static int intel_th_msc_open(struct inode *inode, struct file *file) +{ + struct intel_th_device *thdev = file->private_data; + struct msc *msc = dev_get_drvdata(&thdev->dev); + struct msc_iter *iter; + + if (!capable(CAP_SYS_RAWIO)) + return -EPERM; + + iter = msc_iter_install(msc); + if (!iter) + return -ENOMEM; + + file->private_data = iter; + + return nonseekable_open(inode, file); +} + +static int intel_th_msc_release(struct inode *inode, struct file *file) +{ + struct msc_iter *iter = file->private_data; + struct msc *msc = iter->msc; + + msc_iter_remove(iter, msc); + + return 0; +} + +static ssize_t +msc_single_to_user(struct msc *msc, char __user *buf, loff_t off, size_t len) +{ + unsigned long size = msc->nr_pages << PAGE_SHIFT, rem = len; + unsigned long start = off, tocopy = 0; + + if (msc->single_wrap) { + start += msc->single_sz; + if (start < size) { + tocopy = min(rem, size - start); + if (copy_to_user(buf, msc->base + start, tocopy)) + return -EFAULT; + + buf += tocopy; + rem -= tocopy; + start += tocopy; + } + + start &= size - 1; + if (rem) { + tocopy = min(rem, msc->single_sz - start); + if (copy_to_user(buf, msc->base + start, tocopy)) + return -EFAULT; + + rem -= tocopy; + } + + return len - rem; + } + + if (copy_to_user(buf, msc->base + start, rem)) + return -EFAULT; + + return len; +} + +static ssize_t intel_th_msc_read(struct file *file, char __user *buf, + size_t len, loff_t *ppos) +{ + struct msc_iter *iter = file->private_data; + struct msc *msc = iter->msc; + size_t size; + loff_t off = *ppos; + ssize_t ret = 0; + + if (!atomic_inc_unless_negative(&msc->user_count)) + return 0; + + if (msc->enabled) { + ret = -EBUSY; + goto put_count; + } + + if (msc->mode == MSC_MODE_SINGLE && !msc->single_wrap) + size = msc->single_sz; + else + size = msc->nr_pages << PAGE_SHIFT; + + if (!size) + return 0; + + if (off >= size) { + len = 0; + goto put_count; + } + if (off + len >= size) + len = size - off; + + if (msc->mode == MSC_MODE_SINGLE) { + ret = msc_single_to_user(msc, buf, off, len); + if (ret >= 0) + *ppos += ret; + } else if (msc->mode == MSC_MODE_MULTI) { + struct msc_win_to_user_struct u = { + .buf = buf, + .offset = 0, + }; + + ret = msc_buffer_iterate(iter, len, &u, msc_win_to_user); + if (ret >= 0) + *ppos = iter->offset; + } else { + ret = -ENOTSUPP; + } + +put_count: + atomic_dec(&msc->user_count); + + return ret; +} + +/* + * vm operations callbacks (vm_ops) + */ + +static void msc_mmap_open(struct vm_area_struct *vma) +{ + struct msc_iter *iter = vma->vm_file->private_data; + struct msc *msc = iter->msc; + + atomic_inc(&msc->mmap_count); +} + +static void msc_mmap_close(struct vm_area_struct *vma) +{ + struct msc_iter *iter = vma->vm_file->private_data; + struct msc *msc = iter->msc; + unsigned long pg; + + if (!atomic_dec_and_mutex_lock(&msc->mmap_count, &msc->buf_mutex)) + return; + + /* drop page _counts */ + for (pg = 0; pg < msc->nr_pages; pg++) { + struct page *page = msc_buffer_get_page(msc, pg); + + if (WARN_ON_ONCE(!page)) + continue; + + if (page->mapping) + page->mapping = NULL; + } + + /* last mapping -- drop user_count */ + atomic_dec(&msc->user_count); + mutex_unlock(&msc->buf_mutex); +} + +static int msc_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + struct msc_iter *iter = vma->vm_file->private_data; + struct msc *msc = iter->msc; + + vmf->page = msc_buffer_get_page(msc, vmf->pgoff); + if (!vmf->page) + return VM_FAULT_SIGBUS; + + get_page(vmf->page); + vmf->page->mapping = vma->vm_file->f_mapping; + vmf->page->index = vmf->pgoff; + + return 0; +} + +static const struct vm_operations_struct msc_mmap_ops = { + .open = msc_mmap_open, + .close = msc_mmap_close, + .fault = msc_mmap_fault, +}; + +static int intel_th_msc_mmap(struct file *file, struct vm_area_struct *vma) +{ + unsigned long size = vma->vm_end - vma->vm_start; + struct msc_iter *iter = vma->vm_file->private_data; + struct msc *msc = iter->msc; + int ret = -EINVAL; + + if (!size || offset_in_page(size)) + return -EINVAL; + + if (vma->vm_pgoff) + return -EINVAL; + + /* grab user_count once per mmap; drop in msc_mmap_close() */ + if (!atomic_inc_unless_negative(&msc->user_count)) + return -EINVAL; + + if (msc->mode != MSC_MODE_SINGLE && + msc->mode != MSC_MODE_MULTI) + goto out; + + if (size >> PAGE_SHIFT != msc->nr_pages) + goto out; + + atomic_set(&msc->mmap_count, 1); + ret = 0; + +out: + if (ret) + atomic_dec(&msc->user_count); + + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + vma->vm_flags |= VM_DONTEXPAND | VM_DONTCOPY; + vma->vm_ops = &msc_mmap_ops; + return ret; +} + +static const struct file_operations intel_th_msc_fops = { + .open = intel_th_msc_open, + .release = intel_th_msc_release, + .read = intel_th_msc_read, + .mmap = intel_th_msc_mmap, + .llseek = no_llseek, +}; + +static int intel_th_msc_init(struct msc *msc) +{ + atomic_set(&msc->user_count, -1); + + msc->mode = MSC_MODE_MULTI; + mutex_init(&msc->buf_mutex); + INIT_LIST_HEAD(&msc->win_list); + + mutex_init(&msc->iter_mutex); + INIT_LIST_HEAD(&msc->iter_list); + + msc->burst_len = + (ioread32(msc->reg_base + REG_MSU_MSC0CTL) & MSC_LEN) >> + __ffs(MSC_LEN); + + return 0; +} + +static const char * const msc_mode[] = { + [MSC_MODE_SINGLE] = "single", + [MSC_MODE_MULTI] = "multi", + [MSC_MODE_EXI] = "ExI", + [MSC_MODE_DEBUG] = "debug", +}; + +static ssize_t +wrap_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct msc *msc = dev_get_drvdata(dev); + + return scnprintf(buf, PAGE_SIZE, "%d\n", msc->wrap); +} + +static ssize_t +wrap_store(struct device *dev, struct device_attribute *attr, const char *buf, + size_t size) +{ + struct msc *msc = dev_get_drvdata(dev); + unsigned long val; + int ret; + + ret = kstrtoul(buf, 10, &val); + if (ret) + return ret; + + msc->wrap = !!val; + + return size; +} + +static DEVICE_ATTR_RW(wrap); + +static ssize_t +mode_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct msc *msc = dev_get_drvdata(dev); + + return scnprintf(buf, PAGE_SIZE, "%s\n", msc_mode[msc->mode]); +} + +static ssize_t +mode_store(struct device *dev, struct device_attribute *attr, const char *buf, + size_t size) +{ + struct msc *msc = dev_get_drvdata(dev); + size_t len = size; + char *cp; + int i, ret; + + if (!capable(CAP_SYS_RAWIO)) + return -EPERM; + + cp = memchr(buf, '\n', len); + if (cp) + len = cp - buf; + + for (i = 0; i < ARRAY_SIZE(msc_mode); i++) + if (!strncmp(msc_mode[i], buf, len)) + goto found; + + return -EINVAL; + +found: + mutex_lock(&msc->buf_mutex); + ret = msc_buffer_unlocked_free_unless_used(msc); + if (!ret) + msc->mode = i; + mutex_unlock(&msc->buf_mutex); + + return ret ? ret : size; +} + +static DEVICE_ATTR_RW(mode); + +static ssize_t +nr_pages_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct msc *msc = dev_get_drvdata(dev); + struct msc_window *win; + size_t count = 0; + + mutex_lock(&msc->buf_mutex); + + if (msc->mode == MSC_MODE_SINGLE) + count = scnprintf(buf, PAGE_SIZE, "%ld\n", msc->nr_pages); + else if (msc->mode == MSC_MODE_MULTI) { + list_for_each_entry(win, &msc->win_list, entry) { + count += scnprintf(buf + count, PAGE_SIZE - count, + "%d%c", win->nr_blocks, + msc_is_last_win(win) ? '\n' : ','); + } + } else { + count = scnprintf(buf, PAGE_SIZE, "unsupported\n"); + } + + mutex_unlock(&msc->buf_mutex); + + return count; +} + +static ssize_t +nr_pages_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t size) +{ + struct msc *msc = dev_get_drvdata(dev); + unsigned long val, *win = NULL, *rewin; + size_t len = size; + const char *p = buf; + char *end, *s; + int ret, nr_wins = 0; + + if (!capable(CAP_SYS_RAWIO)) + return -EPERM; + + ret = msc_buffer_free_unless_used(msc); + if (ret) + return ret; + + /* scan the comma-separated list of allocation sizes */ + end = memchr(buf, '\n', len); + if (end) + len = end - buf; + + do { + end = memchr(p, ',', len); + s = kstrndup(p, end ? end - p : len, GFP_KERNEL); + ret = kstrtoul(s, 10, &val); + kfree(s); + + if (ret || !val) + goto free_win; + + if (nr_wins && msc->mode == MSC_MODE_SINGLE) { + ret = -EINVAL; + goto free_win; + } + + nr_wins++; + rewin = krealloc(win, sizeof(*win) * nr_wins, GFP_KERNEL); + if (!rewin) { + kfree(win); + return -ENOMEM; + } + + win = rewin; + win[nr_wins - 1] = val; + + if (!end) + break; + + len -= end - p; + p = end + 1; + } while (len); + + mutex_lock(&msc->buf_mutex); + ret = msc_buffer_alloc(msc, win, nr_wins); + mutex_unlock(&msc->buf_mutex); + +free_win: + kfree(win); + + return ret ? ret : size; +} + +static DEVICE_ATTR_RW(nr_pages); + +static struct attribute *msc_output_attrs[] = { + &dev_attr_wrap.attr, + &dev_attr_mode.attr, + &dev_attr_nr_pages.attr, + NULL, +}; + +static struct attribute_group msc_output_group = { + .attrs = msc_output_attrs, +}; + +static int intel_th_msc_probe(struct intel_th_device *thdev) +{ + struct device *dev = &thdev->dev; + struct resource *res; + struct msc *msc; + void __iomem *base; + int err; + + res = intel_th_device_get_resource(thdev, IORESOURCE_MEM, 0); + if (!res) + return -ENODEV; + + base = devm_ioremap(dev, res->start, resource_size(res)); + if (IS_ERR(base)) + return PTR_ERR(base); + + msc = devm_kzalloc(dev, sizeof(*msc), GFP_KERNEL); + if (!msc) + return -ENOMEM; + + msc->index = thdev->id; + + msc->thdev = thdev; + msc->reg_base = base + msc->index * 0x100; + + err = intel_th_msc_init(msc); + if (err) + return err; + + err = sysfs_create_group(&dev->kobj, &msc_output_group); + if (err) + return err; + + dev_set_drvdata(dev, msc); + + return 0; +} + +static void intel_th_msc_remove(struct intel_th_device *thdev) +{ + sysfs_remove_group(&thdev->dev.kobj, &msc_output_group); +} + +static struct intel_th_driver intel_th_msc_driver = { + .probe = intel_th_msc_probe, + .remove = intel_th_msc_remove, + .activate = intel_th_msc_activate, + .deactivate = intel_th_msc_deactivate, + .fops = &intel_th_msc_fops, + .driver = { + .name = "msc", + .owner = THIS_MODULE, + }, +}; + +module_driver(intel_th_msc_driver, + intel_th_driver_register, + intel_th_driver_unregister); + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("Intel(R) Trace Hub Memory Storage Unit driver"); +MODULE_AUTHOR("Alexander Shishkin <alexander.shishkin@linux.intel.com>"); diff --git a/drivers/hwtracing/intel_th/msu.h b/drivers/hwtracing/intel_th/msu.h new file mode 100644 index 000000000000..9b710e4aa98a --- /dev/null +++ b/drivers/hwtracing/intel_th/msu.h @@ -0,0 +1,116 @@ +/* + * Intel(R) Trace Hub Memory Storage Unit (MSU) data structures + * + * Copyright (C) 2014-2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef __INTEL_TH_MSU_H__ +#define __INTEL_TH_MSU_H__ + +enum { + REG_MSU_MSUPARAMS = 0x0000, + REG_MSU_MSUSTS = 0x0008, + REG_MSU_MSC0CTL = 0x0100, /* MSC0 control */ + REG_MSU_MSC0STS = 0x0104, /* MSC0 status */ + REG_MSU_MSC0BAR = 0x0108, /* MSC0 output base address */ + REG_MSU_MSC0SIZE = 0x010c, /* MSC0 output size */ + REG_MSU_MSC0MWP = 0x0110, /* MSC0 write pointer */ + REG_MSU_MSC0NWSA = 0x011c, /* MSC0 next window start address */ + + REG_MSU_MSC1CTL = 0x0200, /* MSC1 control */ + REG_MSU_MSC1STS = 0x0204, /* MSC1 status */ + REG_MSU_MSC1BAR = 0x0208, /* MSC1 output base address */ + REG_MSU_MSC1SIZE = 0x020c, /* MSC1 output size */ + REG_MSU_MSC1MWP = 0x0210, /* MSC1 write pointer */ + REG_MSU_MSC1NWSA = 0x021c, /* MSC1 next window start address */ +}; + +/* MSUSTS bits */ +#define MSUSTS_MSU_INT BIT(0) + +/* MSCnCTL bits */ +#define MSC_EN BIT(0) +#define MSC_WRAPEN BIT(1) +#define MSC_RD_HDR_OVRD BIT(2) +#define MSC_MODE (BIT(4) | BIT(5)) +#define MSC_LEN (BIT(8) | BIT(9) | BIT(10)) + +/* MSC operating modes (MSC_MODE) */ +enum { + MSC_MODE_SINGLE = 0, + MSC_MODE_MULTI, + MSC_MODE_EXI, + MSC_MODE_DEBUG, +}; + +/* MSCnSTS bits */ +#define MSCSTS_WRAPSTAT BIT(1) /* Wrap occurred */ +#define MSCSTS_PLE BIT(2) /* Pipeline Empty */ + +/* + * Multiblock/multiwindow block descriptor + */ +struct msc_block_desc { + u32 sw_tag; + u32 block_sz; + u32 next_blk; + u32 next_win; + u32 res0[4]; + u32 hw_tag; + u32 valid_dw; + u32 ts_low; + u32 ts_high; + u32 res1[4]; +} __packed; + +#define MSC_BDESC sizeof(struct msc_block_desc) +#define DATA_IN_PAGE (PAGE_SIZE - MSC_BDESC) + +/* MSC multiblock sw tag bits */ +#define MSC_SW_TAG_LASTBLK BIT(0) +#define MSC_SW_TAG_LASTWIN BIT(1) + +/* MSC multiblock hw tag bits */ +#define MSC_HW_TAG_TRIGGER BIT(0) +#define MSC_HW_TAG_BLOCKWRAP BIT(1) +#define MSC_HW_TAG_WINWRAP BIT(2) +#define MSC_HW_TAG_ENDBIT BIT(3) + +static inline unsigned long msc_data_sz(struct msc_block_desc *bdesc) +{ + if (!bdesc->valid_dw) + return 0; + + return bdesc->valid_dw * 4 - MSC_BDESC; +} + +static inline bool msc_block_wrapped(struct msc_block_desc *bdesc) +{ + if (bdesc->hw_tag & MSC_HW_TAG_BLOCKWRAP) + return true; + + return false; +} + +static inline bool msc_block_last_written(struct msc_block_desc *bdesc) +{ + if ((bdesc->hw_tag & MSC_HW_TAG_ENDBIT) || + (msc_data_sz(bdesc) != DATA_IN_PAGE)) + return true; + + return false; +} + +/* waiting for Pipeline Empty bit(s) to assert for MSC */ +#define MSC_PLE_WAITLOOP_DEPTH 10000 + +#endif /* __INTEL_TH_MSU_H__ */ diff --git a/drivers/hwtracing/intel_th/pci.c b/drivers/hwtracing/intel_th/pci.c new file mode 100644 index 000000000000..641e87936064 --- /dev/null +++ b/drivers/hwtracing/intel_th/pci.c @@ -0,0 +1,86 @@ +/* + * Intel(R) Trace Hub pci driver + * + * Copyright (C) 2014-2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/types.h> +#include <linux/module.h> +#include <linux/device.h> +#include <linux/sysfs.h> +#include <linux/pci.h> + +#include "intel_th.h" + +#define DRIVER_NAME "intel_th_pci" + +#define BAR_MASK (BIT(TH_MMIO_CONFIG) | BIT(TH_MMIO_SW)) + +static int intel_th_pci_probe(struct pci_dev *pdev, + const struct pci_device_id *id) +{ + struct intel_th *th; + int err; + + err = pcim_enable_device(pdev); + if (err) + return err; + + err = pcim_iomap_regions_request_all(pdev, BAR_MASK, DRIVER_NAME); + if (err) + return err; + + th = intel_th_alloc(&pdev->dev, pdev->resource, + DEVICE_COUNT_RESOURCE, pdev->irq); + if (IS_ERR(th)) + return PTR_ERR(th); + + pci_set_drvdata(pdev, th); + + return 0; +} + +static void intel_th_pci_remove(struct pci_dev *pdev) +{ + struct intel_th *th = pci_get_drvdata(pdev); + + intel_th_free(th); +} + +static const struct pci_device_id intel_th_pci_id_table[] = { + { + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x9d26), + .driver_data = (kernel_ulong_t)0, + }, + { + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xa126), + .driver_data = (kernel_ulong_t)0, + }, + { 0 }, +}; + +MODULE_DEVICE_TABLE(pci, intel_th_pci_id_table); + +static struct pci_driver intel_th_pci_driver = { + .name = DRIVER_NAME, + .id_table = intel_th_pci_id_table, + .probe = intel_th_pci_probe, + .remove = intel_th_pci_remove, +}; + +module_pci_driver(intel_th_pci_driver); + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("Intel(R) Trace Hub PCI controller driver"); +MODULE_AUTHOR("Alexander Shishkin <alexander.shishkin@intel.com>"); diff --git a/drivers/hwtracing/intel_th/pti.c b/drivers/hwtracing/intel_th/pti.c new file mode 100644 index 000000000000..1e3bbc89825c --- /dev/null +++ b/drivers/hwtracing/intel_th/pti.c @@ -0,0 +1,252 @@ +/* + * Intel(R) Trace Hub PTI output driver + * + * Copyright (C) 2014-2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/types.h> +#include <linux/module.h> +#include <linux/device.h> +#include <linux/sizes.h> +#include <linux/printk.h> +#include <linux/slab.h> +#include <linux/mm.h> +#include <linux/io.h> + +#include "intel_th.h" +#include "pti.h" + +struct pti_device { + void __iomem *base; + struct intel_th_device *thdev; + unsigned int mode; + unsigned int freeclk; + unsigned int clkdiv; + unsigned int patgen; +}; + +/* map PTI widths to MODE settings of PTI_CTL register */ +static const unsigned int pti_mode[] = { + 0, 4, 8, 0, 12, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, +}; + +static int pti_width_mode(unsigned int width) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(pti_mode); i++) + if (pti_mode[i] == width) + return i; + + return -EINVAL; +} + +static ssize_t mode_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct pti_device *pti = dev_get_drvdata(dev); + + return scnprintf(buf, PAGE_SIZE, "%d\n", pti_mode[pti->mode]); +} + +static ssize_t mode_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t size) +{ + struct pti_device *pti = dev_get_drvdata(dev); + unsigned long val; + int ret; + + ret = kstrtoul(buf, 10, &val); + if (ret) + return ret; + + ret = pti_width_mode(val); + if (ret < 0) + return ret; + + pti->mode = ret; + + return size; +} + +static DEVICE_ATTR_RW(mode); + +static ssize_t +freerunning_clock_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct pti_device *pti = dev_get_drvdata(dev); + + return scnprintf(buf, PAGE_SIZE, "%d\n", pti->freeclk); +} + +static ssize_t +freerunning_clock_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t size) +{ + struct pti_device *pti = dev_get_drvdata(dev); + unsigned long val; + int ret; + + ret = kstrtoul(buf, 10, &val); + if (ret) + return ret; + + pti->freeclk = !!val; + + return size; +} + +static DEVICE_ATTR_RW(freerunning_clock); + +static ssize_t +clock_divider_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct pti_device *pti = dev_get_drvdata(dev); + + return scnprintf(buf, PAGE_SIZE, "%d\n", 1u << pti->clkdiv); +} + +static ssize_t +clock_divider_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t size) +{ + struct pti_device *pti = dev_get_drvdata(dev); + unsigned long val; + int ret; + + ret = kstrtoul(buf, 10, &val); + if (ret) + return ret; + + if (!is_power_of_2(val) || val > 8 || !val) + return -EINVAL; + + pti->clkdiv = val; + + return size; +} + +static DEVICE_ATTR_RW(clock_divider); + +static struct attribute *pti_output_attrs[] = { + &dev_attr_mode.attr, + &dev_attr_freerunning_clock.attr, + &dev_attr_clock_divider.attr, + NULL, +}; + +static struct attribute_group pti_output_group = { + .attrs = pti_output_attrs, +}; + +static int intel_th_pti_activate(struct intel_th_device *thdev) +{ + struct pti_device *pti = dev_get_drvdata(&thdev->dev); + u32 ctl = PTI_EN; + + if (pti->patgen) + ctl |= pti->patgen << __ffs(PTI_PATGENMODE); + if (pti->freeclk) + ctl |= PTI_FCEN; + ctl |= pti->mode << __ffs(PTI_MODE); + ctl |= pti->clkdiv << __ffs(PTI_CLKDIV); + + iowrite32(ctl, pti->base + REG_PTI_CTL); + + intel_th_trace_enable(thdev); + + return 0; +} + +static void intel_th_pti_deactivate(struct intel_th_device *thdev) +{ + struct pti_device *pti = dev_get_drvdata(&thdev->dev); + + intel_th_trace_disable(thdev); + + iowrite32(0, pti->base + REG_PTI_CTL); +} + +static void read_hw_config(struct pti_device *pti) +{ + u32 ctl = ioread32(pti->base + REG_PTI_CTL); + + pti->mode = (ctl & PTI_MODE) >> __ffs(PTI_MODE); + pti->clkdiv = (ctl & PTI_CLKDIV) >> __ffs(PTI_CLKDIV); + pti->freeclk = !!(ctl & PTI_FCEN); + + if (!pti_mode[pti->mode]) + pti->mode = pti_width_mode(4); + if (!pti->clkdiv) + pti->clkdiv = 1; +} + +static int intel_th_pti_probe(struct intel_th_device *thdev) +{ + struct device *dev = &thdev->dev; + struct resource *res; + struct pti_device *pti; + void __iomem *base; + int ret; + + res = intel_th_device_get_resource(thdev, IORESOURCE_MEM, 0); + if (!res) + return -ENODEV; + + base = devm_ioremap(dev, res->start, resource_size(res)); + if (IS_ERR(base)) + return PTR_ERR(base); + + pti = devm_kzalloc(dev, sizeof(*pti), GFP_KERNEL); + if (!pti) + return -ENOMEM; + + pti->thdev = thdev; + pti->base = base; + + read_hw_config(pti); + + ret = sysfs_create_group(&dev->kobj, &pti_output_group); + if (ret) + return ret; + + dev_set_drvdata(dev, pti); + + return 0; +} + +static void intel_th_pti_remove(struct intel_th_device *thdev) +{ +} + +static struct intel_th_driver intel_th_pti_driver = { + .probe = intel_th_pti_probe, + .remove = intel_th_pti_remove, + .activate = intel_th_pti_activate, + .deactivate = intel_th_pti_deactivate, + .driver = { + .name = "pti", + .owner = THIS_MODULE, + }, +}; + +module_driver(intel_th_pti_driver, + intel_th_driver_register, + intel_th_driver_unregister); + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("Intel(R) Trace Hub PTI output driver"); +MODULE_AUTHOR("Alexander Shishkin <alexander.shishkin@linux.intel.com>"); diff --git a/drivers/hwtracing/intel_th/pti.h b/drivers/hwtracing/intel_th/pti.h new file mode 100644 index 000000000000..20883f5628cf --- /dev/null +++ b/drivers/hwtracing/intel_th/pti.h @@ -0,0 +1,29 @@ +/* + * Intel(R) Trace Hub PTI output data structures + * + * Copyright (C) 2014-2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef __INTEL_TH_STH_H__ +#define __INTEL_TH_STH_H__ + +enum { + REG_PTI_CTL = 0x1c00, +}; + +#define PTI_EN BIT(0) +#define PTI_FCEN BIT(1) +#define PTI_MODE 0xf0 +#define PTI_CLKDIV 0x000f0000 +#define PTI_PATGENMODE 0x00f00000 + +#endif /* __INTEL_TH_STH_H__ */ diff --git a/drivers/hwtracing/intel_th/sth.c b/drivers/hwtracing/intel_th/sth.c new file mode 100644 index 000000000000..e488fccbfdec --- /dev/null +++ b/drivers/hwtracing/intel_th/sth.c @@ -0,0 +1,259 @@ +/* + * Intel(R) Trace Hub Software Trace Hub support + * + * Copyright (C) 2014-2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/types.h> +#include <linux/module.h> +#include <linux/device.h> +#include <linux/io.h> +#include <linux/mm.h> +#include <linux/slab.h> +#include <linux/stm.h> + +#include "intel_th.h" +#include "sth.h" + +struct sth_device { + void __iomem *base; + void __iomem *channels; + phys_addr_t channels_phys; + struct device *dev; + struct stm_data stm; + unsigned int sw_nmasters; +}; + +static struct intel_th_channel __iomem * +sth_channel(struct sth_device *sth, unsigned int master, unsigned int channel) +{ + struct intel_th_channel __iomem *sw_map = sth->channels; + + return &sw_map[(master - sth->stm.sw_start) * sth->stm.sw_nchannels + + channel]; +} + +static void sth_iowrite(void __iomem *dest, const unsigned char *payload, + unsigned int size) +{ + switch (size) { +#ifdef CONFIG_64BIT + case 8: + writeq_relaxed(*(u64 *)payload, dest); + break; +#endif + case 4: + writel_relaxed(*(u32 *)payload, dest); + break; + case 2: + writew_relaxed(*(u16 *)payload, dest); + break; + case 1: + writeb_relaxed(*(u8 *)payload, dest); + break; + default: + break; + } +} + +static ssize_t sth_stm_packet(struct stm_data *stm_data, unsigned int master, + unsigned int channel, unsigned int packet, + unsigned int flags, unsigned int size, + const unsigned char *payload) +{ + struct sth_device *sth = container_of(stm_data, struct sth_device, stm); + struct intel_th_channel __iomem *out = + sth_channel(sth, master, channel); + u64 __iomem *outp = &out->Dn; + unsigned long reg = REG_STH_TRIG; + +#ifndef CONFIG_64BIT + if (size > 4) + size = 4; +#endif + + size = rounddown_pow_of_two(size); + + switch (packet) { + /* Global packets (GERR, XSYNC, TRIG) are sent with register writes */ + case STP_PACKET_GERR: + reg += 4; + case STP_PACKET_XSYNC: + reg += 8; + case STP_PACKET_TRIG: + if (flags & STP_PACKET_TIMESTAMPED) + reg += 4; + iowrite8(*payload, sth->base + reg); + break; + + case STP_PACKET_MERR: + sth_iowrite(&out->MERR, payload, size); + break; + + case STP_PACKET_FLAG: + if (flags & STP_PACKET_TIMESTAMPED) + outp = (u64 __iomem *)&out->FLAG_TS; + else + outp = (u64 __iomem *)&out->FLAG; + + size = 1; + sth_iowrite(outp, payload, size); + break; + + case STP_PACKET_USER: + if (flags & STP_PACKET_TIMESTAMPED) + outp = &out->USER_TS; + else + outp = &out->USER; + sth_iowrite(outp, payload, size); + break; + + case STP_PACKET_DATA: + outp = &out->Dn; + + if (flags & STP_PACKET_TIMESTAMPED) + outp += 2; + if (flags & STP_PACKET_MARKED) + outp++; + + sth_iowrite(outp, payload, size); + break; + } + + return size; +} + +static phys_addr_t +sth_stm_mmio_addr(struct stm_data *stm_data, unsigned int master, + unsigned int channel, unsigned int nr_chans) +{ + struct sth_device *sth = container_of(stm_data, struct sth_device, stm); + phys_addr_t addr; + + master -= sth->stm.sw_start; + addr = sth->channels_phys + (master * sth->stm.sw_nchannels + channel) * + sizeof(struct intel_th_channel); + + if (offset_in_page(addr) || + offset_in_page(nr_chans * sizeof(struct intel_th_channel))) + return 0; + + return addr; +} + +static int sth_stm_link(struct stm_data *stm_data, unsigned int master, + unsigned int channel) +{ + struct sth_device *sth = container_of(stm_data, struct sth_device, stm); + + intel_th_set_output(to_intel_th_device(sth->dev), master); + + return 0; +} + +static int intel_th_sw_init(struct sth_device *sth) +{ + u32 reg; + + reg = ioread32(sth->base + REG_STH_STHCAP1); + sth->stm.sw_nchannels = reg & 0xff; + + reg = ioread32(sth->base + REG_STH_STHCAP0); + sth->stm.sw_start = reg & 0xffff; + sth->stm.sw_end = reg >> 16; + + sth->sw_nmasters = sth->stm.sw_end - sth->stm.sw_start; + dev_dbg(sth->dev, "sw_start: %x sw_end: %x masters: %x nchannels: %x\n", + sth->stm.sw_start, sth->stm.sw_end, sth->sw_nmasters, + sth->stm.sw_nchannels); + + return 0; +} + +static int intel_th_sth_probe(struct intel_th_device *thdev) +{ + struct device *dev = &thdev->dev; + struct sth_device *sth; + struct resource *res; + void __iomem *base, *channels; + int err; + + res = intel_th_device_get_resource(thdev, IORESOURCE_MEM, 0); + if (!res) + return -ENODEV; + + base = devm_ioremap(dev, res->start, resource_size(res)); + if (IS_ERR(base)) + return PTR_ERR(base); + + res = intel_th_device_get_resource(thdev, IORESOURCE_MEM, 1); + if (!res) + return -ENODEV; + + channels = devm_ioremap(dev, res->start, resource_size(res)); + if (IS_ERR(channels)) + return PTR_ERR(channels); + + sth = devm_kzalloc(dev, sizeof(*sth), GFP_KERNEL); + if (!sth) + return -ENOMEM; + + sth->dev = dev; + sth->base = base; + sth->channels = channels; + sth->channels_phys = res->start; + sth->stm.name = dev_name(dev); + sth->stm.packet = sth_stm_packet; + sth->stm.mmio_addr = sth_stm_mmio_addr; + sth->stm.sw_mmiosz = sizeof(struct intel_th_channel); + sth->stm.link = sth_stm_link; + + err = intel_th_sw_init(sth); + if (err) + return err; + + err = stm_register_device(dev, &sth->stm, THIS_MODULE); + if (err) { + dev_err(dev, "stm_register_device failed\n"); + return err; + } + + dev_set_drvdata(dev, sth); + + return 0; +} + +static void intel_th_sth_remove(struct intel_th_device *thdev) +{ + struct sth_device *sth = dev_get_drvdata(&thdev->dev); + + stm_unregister_device(&sth->stm); +} + +static struct intel_th_driver intel_th_sth_driver = { + .probe = intel_th_sth_probe, + .remove = intel_th_sth_remove, + .driver = { + .name = "sth", + .owner = THIS_MODULE, + }, +}; + +module_driver(intel_th_sth_driver, + intel_th_driver_register, + intel_th_driver_unregister); + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("Intel(R) Trace Hub Software Trace Hub driver"); +MODULE_AUTHOR("Alexander Shishkin <alexander.shishkin@intel.com>"); diff --git a/drivers/hwtracing/intel_th/sth.h b/drivers/hwtracing/intel_th/sth.h new file mode 100644 index 000000000000..f1390cd4f2ed --- /dev/null +++ b/drivers/hwtracing/intel_th/sth.h @@ -0,0 +1,42 @@ +/* + * Intel(R) Trace Hub Software Trace Hub (STH) data structures + * + * Copyright (C) 2014-2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef __INTEL_TH_STH_H__ +#define __INTEL_TH_STH_H__ + +enum { + REG_STH_STHCAP0 = 0x0000, /* capabilities pt1 */ + REG_STH_STHCAP1 = 0x0004, /* capabilities pt2 */ + REG_STH_TRIG = 0x0008, /* TRIG packet payload */ + REG_STH_TRIG_TS = 0x000c, /* TRIG_TS packet payload */ + REG_STH_XSYNC = 0x0010, /* XSYNC packet payload */ + REG_STH_XSYNC_TS = 0x0014, /* XSYNC_TS packet payload */ + REG_STH_GERR = 0x0018, /* GERR packet payload */ +}; + +struct intel_th_channel { + u64 Dn; + u64 DnM; + u64 DnTS; + u64 DnMTS; + u64 USER; + u64 USER_TS; + u32 FLAG; + u32 FLAG_TS; + u32 MERR; + u32 __unused; +} __packed; + +#endif /* __INTEL_TH_STH_H__ */ diff --git a/drivers/hwtracing/stm/Kconfig b/drivers/hwtracing/stm/Kconfig new file mode 100644 index 000000000000..5a59a28cc3da --- /dev/null +++ b/drivers/hwtracing/stm/Kconfig @@ -0,0 +1,25 @@ +config STM + tristate "System Trace Module devices" + help + A System Trace Module (STM) is a device exporting data in System + Trace Protocol (STP) format as defined by MIPI STP standards. + Examples of such devices are Intel(R) Trace Hub and Coresight STM. + + Say Y here to enable System Trace Module device support. + +config STM_DUMMY + tristate "Dummy STM driver" + help + This is a simple dummy device that pretends to be an stm device + and discards your data. Use for stm class testing. + + If you don't know what this is, say N. + +config STM_SOURCE_CONSOLE + tristate "Kernel console over STM devices" + help + This is a kernel space trace source that sends kernel log + messages to trace hosts over STM devices. + + If you want to send kernel console messages over STM devices, + say Y. diff --git a/drivers/hwtracing/stm/Makefile b/drivers/hwtracing/stm/Makefile new file mode 100644 index 000000000000..f9312c38dd7a --- /dev/null +++ b/drivers/hwtracing/stm/Makefile @@ -0,0 +1,9 @@ +obj-$(CONFIG_STM) += stm_core.o + +stm_core-y := core.o policy.o + +obj-$(CONFIG_STM_DUMMY) += dummy_stm.o + +obj-$(CONFIG_STM_SOURCE_CONSOLE) += stm_console.o + +stm_console-y := console.o diff --git a/drivers/hwtracing/stm/console.c b/drivers/hwtracing/stm/console.c new file mode 100644 index 000000000000..c9d9a8d2ff52 --- /dev/null +++ b/drivers/hwtracing/stm/console.c @@ -0,0 +1,80 @@ +/* + * Simple kernel console driver for STM devices + * Copyright (c) 2014, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * STM console will send kernel messages over STM devices to a trace host. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/console.h> +#include <linux/slab.h> +#include <linux/stm.h> + +static int stm_console_link(struct stm_source_data *data); +static void stm_console_unlink(struct stm_source_data *data); + +static struct stm_console { + struct stm_source_data data; + struct console console; +} stm_console = { + .data = { + .name = "console", + .nr_chans = 1, + .link = stm_console_link, + .unlink = stm_console_unlink, + }, +}; + +static void +stm_console_write(struct console *con, const char *buf, unsigned len) +{ + struct stm_console *sc = container_of(con, struct stm_console, console); + + stm_source_write(&sc->data, 0, buf, len); +} + +static int stm_console_link(struct stm_source_data *data) +{ + struct stm_console *sc = container_of(data, struct stm_console, data); + + strcpy(sc->console.name, "stm_console"); + sc->console.write = stm_console_write; + sc->console.flags = CON_ENABLED | CON_PRINTBUFFER; + register_console(&sc->console); + + return 0; +} + +static void stm_console_unlink(struct stm_source_data *data) +{ + struct stm_console *sc = container_of(data, struct stm_console, data); + + unregister_console(&sc->console); +} + +static int stm_console_init(void) +{ + return stm_source_register_device(NULL, &stm_console.data); +} + +static void stm_console_exit(void) +{ + stm_source_unregister_device(&stm_console.data); +} + +module_init(stm_console_init); +module_exit(stm_console_exit); + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("stm_console driver"); +MODULE_AUTHOR("Alexander Shishkin <alexander.shishkin@linux.intel.com>"); diff --git a/drivers/hwtracing/stm/core.c b/drivers/hwtracing/stm/core.c new file mode 100644 index 000000000000..b6445d9e5453 --- /dev/null +++ b/drivers/hwtracing/stm/core.c @@ -0,0 +1,1032 @@ +/* + * System Trace Module (STM) infrastructure + * Copyright (c) 2014, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * STM class implements generic infrastructure for System Trace Module devices + * as defined in MIPI STPv2 specification. + */ + +#include <linux/uaccess.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/device.h> +#include <linux/compat.h> +#include <linux/kdev_t.h> +#include <linux/srcu.h> +#include <linux/slab.h> +#include <linux/stm.h> +#include <linux/fs.h> +#include <linux/mm.h> +#include "stm.h" + +#include <uapi/linux/stm.h> + +static unsigned int stm_core_up; + +/* + * The SRCU here makes sure that STM device doesn't disappear from under a + * stm_source_write() caller, which may want to have as little overhead as + * possible. + */ +static struct srcu_struct stm_source_srcu; + +static ssize_t masters_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct stm_device *stm = to_stm_device(dev); + int ret; + + ret = sprintf(buf, "%u %u\n", stm->data->sw_start, stm->data->sw_end); + + return ret; +} + +static DEVICE_ATTR_RO(masters); + +static ssize_t channels_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct stm_device *stm = to_stm_device(dev); + int ret; + + ret = sprintf(buf, "%u\n", stm->data->sw_nchannels); + + return ret; +} + +static DEVICE_ATTR_RO(channels); + +static struct attribute *stm_attrs[] = { + &dev_attr_masters.attr, + &dev_attr_channels.attr, + NULL, +}; + +ATTRIBUTE_GROUPS(stm); + +static struct class stm_class = { + .name = "stm", + .dev_groups = stm_groups, +}; + +static int stm_dev_match(struct device *dev, const void *data) +{ + const char *name = data; + + return sysfs_streq(name, dev_name(dev)); +} + +/** + * stm_find_device() - find stm device by name + * @buf: character buffer containing the name + * + * This is called when either policy gets assigned to an stm device or an + * stm_source device gets linked to an stm device. + * + * This grabs device's reference (get_device()) and module reference, both + * of which the calling path needs to make sure to drop with stm_put_device(). + * + * Return: stm device pointer or null if lookup failed. + */ +struct stm_device *stm_find_device(const char *buf) +{ + struct stm_device *stm; + struct device *dev; + + if (!stm_core_up) + return NULL; + + dev = class_find_device(&stm_class, NULL, buf, stm_dev_match); + if (!dev) + return NULL; + + stm = to_stm_device(dev); + if (!try_module_get(stm->owner)) { + put_device(dev); + return NULL; + } + + return stm; +} + +/** + * stm_put_device() - drop references on the stm device + * @stm: stm device, previously acquired by stm_find_device() + * + * This drops the module reference and device reference taken by + * stm_find_device(). + */ +void stm_put_device(struct stm_device *stm) +{ + module_put(stm->owner); + put_device(&stm->dev); +} + +/* + * Internally we only care about software-writable masters here, that is the + * ones in the range [stm_data->sw_start..stm_data..sw_end], however we need + * original master numbers to be visible externally, since they are the ones + * that will appear in the STP stream. Thus, the internal bookkeeping uses + * $master - stm_data->sw_start to reference master descriptors and such. + */ + +#define __stm_master(_s, _m) \ + ((_s)->masters[(_m) - (_s)->data->sw_start]) + +static inline struct stp_master * +stm_master(struct stm_device *stm, unsigned int idx) +{ + if (idx < stm->data->sw_start || idx > stm->data->sw_end) + return NULL; + + return __stm_master(stm, idx); +} + +static int stp_master_alloc(struct stm_device *stm, unsigned int idx) +{ + struct stp_master *master; + size_t size; + + size = ALIGN(stm->data->sw_nchannels, 8) / 8; + size += sizeof(struct stp_master); + master = kzalloc(size, GFP_ATOMIC); + if (!master) + return -ENOMEM; + + master->nr_free = stm->data->sw_nchannels; + __stm_master(stm, idx) = master; + + return 0; +} + +static void stp_master_free(struct stm_device *stm, unsigned int idx) +{ + struct stp_master *master = stm_master(stm, idx); + + if (!master) + return; + + __stm_master(stm, idx) = NULL; + kfree(master); +} + +static void stm_output_claim(struct stm_device *stm, struct stm_output *output) +{ + struct stp_master *master = stm_master(stm, output->master); + + if (WARN_ON_ONCE(master->nr_free < output->nr_chans)) + return; + + bitmap_allocate_region(&master->chan_map[0], output->channel, + ilog2(output->nr_chans)); + + master->nr_free -= output->nr_chans; +} + +static void +stm_output_disclaim(struct stm_device *stm, struct stm_output *output) +{ + struct stp_master *master = stm_master(stm, output->master); + + bitmap_release_region(&master->chan_map[0], output->channel, + ilog2(output->nr_chans)); + + output->nr_chans = 0; + master->nr_free += output->nr_chans; +} + +/* + * This is like bitmap_find_free_region(), except it can ignore @start bits + * at the beginning. + */ +static int find_free_channels(unsigned long *bitmap, unsigned int start, + unsigned int end, unsigned int width) +{ + unsigned int pos; + int i; + + for (pos = start; pos < end + 1; pos = ALIGN(pos, width)) { + pos = find_next_zero_bit(bitmap, end + 1, pos); + if (pos + width > end + 1) + break; + + if (pos & (width - 1)) + continue; + + for (i = 1; i < width && !test_bit(pos + i, bitmap); i++) + ; + if (i == width) + return pos; + } + + return -1; +} + +static unsigned int +stm_find_master_chan(struct stm_device *stm, unsigned int width, + unsigned int *mstart, unsigned int mend, + unsigned int *cstart, unsigned int cend) +{ + struct stp_master *master; + unsigned int midx; + int pos, err; + + for (midx = *mstart; midx <= mend; midx++) { + if (!stm_master(stm, midx)) { + err = stp_master_alloc(stm, midx); + if (err) + return err; + } + + master = stm_master(stm, midx); + + if (!master->nr_free) + continue; + + pos = find_free_channels(master->chan_map, *cstart, cend, + width); + if (pos < 0) + continue; + + *mstart = midx; + *cstart = pos; + return 0; + } + + return -ENOSPC; +} + +static int stm_output_assign(struct stm_device *stm, unsigned int width, + struct stp_policy_node *policy_node, + struct stm_output *output) +{ + unsigned int midx, cidx, mend, cend; + int ret = -EINVAL; + + if (width > stm->data->sw_nchannels) + return -EINVAL; + + if (policy_node) { + stp_policy_node_get_ranges(policy_node, + &midx, &mend, &cidx, &cend); + } else { + midx = stm->data->sw_start; + cidx = 0; + mend = stm->data->sw_end; + cend = stm->data->sw_nchannels - 1; + } + + spin_lock(&stm->mc_lock); + /* output is already assigned -- shouldn't happen */ + if (WARN_ON_ONCE(output->nr_chans)) + goto unlock; + + ret = stm_find_master_chan(stm, width, &midx, mend, &cidx, cend); + if (ret) + goto unlock; + + output->master = midx; + output->channel = cidx; + output->nr_chans = width; + stm_output_claim(stm, output); + dev_dbg(&stm->dev, "assigned %u:%u (+%u)\n", midx, cidx, width); + + ret = 0; +unlock: + spin_unlock(&stm->mc_lock); + + return ret; +} + +static void stm_output_free(struct stm_device *stm, struct stm_output *output) +{ + spin_lock(&stm->mc_lock); + if (output->nr_chans) + stm_output_disclaim(stm, output); + spin_unlock(&stm->mc_lock); +} + +static int major_match(struct device *dev, const void *data) +{ + unsigned int major = *(unsigned int *)data; + + return MAJOR(dev->devt) == major; +} + +static int stm_char_open(struct inode *inode, struct file *file) +{ + struct stm_file *stmf; + struct device *dev; + unsigned int major = imajor(inode); + int err = -ENODEV; + + dev = class_find_device(&stm_class, NULL, &major, major_match); + if (!dev) + return -ENODEV; + + stmf = kzalloc(sizeof(*stmf), GFP_KERNEL); + if (!stmf) + return -ENOMEM; + + stmf->stm = to_stm_device(dev); + + if (!try_module_get(stmf->stm->owner)) + goto err_free; + + file->private_data = stmf; + + return nonseekable_open(inode, file); + +err_free: + kfree(stmf); + + return err; +} + +static int stm_char_release(struct inode *inode, struct file *file) +{ + struct stm_file *stmf = file->private_data; + + stm_output_free(stmf->stm, &stmf->output); + stm_put_device(stmf->stm); + kfree(stmf); + + return 0; +} + +static int stm_file_assign(struct stm_file *stmf, char *id, unsigned int width) +{ + struct stm_device *stm = stmf->stm; + int ret; + + stmf->policy_node = stp_policy_node_lookup(stm, id); + + ret = stm_output_assign(stm, width, stmf->policy_node, &stmf->output); + + if (stmf->policy_node) + stp_policy_node_put(stmf->policy_node); + + return ret; +} + +static void stm_write(struct stm_data *data, unsigned int master, + unsigned int channel, const char *buf, size_t count) +{ + unsigned int flags = STP_PACKET_TIMESTAMPED; + const unsigned char *p = buf, nil = 0; + size_t pos; + ssize_t sz; + + for (pos = 0, p = buf; count > pos; pos += sz, p += sz) { + sz = min_t(unsigned int, count - pos, 8); + sz = data->packet(data, master, channel, STP_PACKET_DATA, flags, + sz, p); + flags = 0; + } + + data->packet(data, master, channel, STP_PACKET_FLAG, 0, 0, &nil); +} + +static ssize_t stm_char_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct stm_file *stmf = file->private_data; + struct stm_device *stm = stmf->stm; + char *kbuf; + int err; + + /* + * if no m/c have been assigned to this writer up to this + * point, use "default" policy entry + */ + if (!stmf->output.nr_chans) { + err = stm_file_assign(stmf, "default", 1); + /* + * EBUSY means that somebody else just assigned this + * output, which is just fine for write() + */ + if (err && err != -EBUSY) + return err; + } + + kbuf = kmalloc(count + 1, GFP_KERNEL); + if (!kbuf) + return -ENOMEM; + + err = copy_from_user(kbuf, buf, count); + if (err) { + kfree(kbuf); + return -EFAULT; + } + + stm_write(stm->data, stmf->output.master, stmf->output.channel, kbuf, + count); + + kfree(kbuf); + + return count; +} + +static int stm_char_mmap(struct file *file, struct vm_area_struct *vma) +{ + struct stm_file *stmf = file->private_data; + struct stm_device *stm = stmf->stm; + unsigned long size, phys; + + if (!stm->data->mmio_addr) + return -EOPNOTSUPP; + + if (vma->vm_pgoff) + return -EINVAL; + + size = vma->vm_end - vma->vm_start; + + if (stmf->output.nr_chans * stm->data->sw_mmiosz != size) + return -EINVAL; + + phys = stm->data->mmio_addr(stm->data, stmf->output.master, + stmf->output.channel, + stmf->output.nr_chans); + + if (!phys) + return -EINVAL; + + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP; + vm_iomap_memory(vma, phys, size); + + return 0; +} + +static int stm_char_policy_set_ioctl(struct stm_file *stmf, void __user *arg) +{ + struct stm_device *stm = stmf->stm; + struct stp_policy_id *id; + int ret = -EINVAL; + u32 size; + + if (stmf->output.nr_chans) + return -EBUSY; + + if (copy_from_user(&size, arg, sizeof(size))) + return -EFAULT; + + if (size >= PATH_MAX + sizeof(*id)) + return -EINVAL; + + /* + * size + 1 to make sure the .id string at the bottom is terminated, + * which is also why memdup_user() is not useful here + */ + id = kzalloc(size + 1, GFP_KERNEL); + if (!id) + return -ENOMEM; + + if (copy_from_user(id, arg, size)) { + ret = -EFAULT; + goto err_free; + } + + if (id->__reserved_0 || id->__reserved_1) + goto err_free; + + if (id->width < 1 || + id->width > PAGE_SIZE / stm->data->sw_mmiosz) + goto err_free; + + ret = stm_file_assign(stmf, id->id, id->width); + if (ret) + goto err_free; + + ret = 0; + + if (stm->data->link) + ret = stm->data->link(stm->data, stmf->output.master, + stmf->output.channel); + + if (ret) { + stm_output_free(stmf->stm, &stmf->output); + stm_put_device(stmf->stm); + } + +err_free: + kfree(id); + + return ret; +} + +static int stm_char_policy_get_ioctl(struct stm_file *stmf, void __user *arg) +{ + struct stp_policy_id id = { + .size = sizeof(id), + .master = stmf->output.master, + .channel = stmf->output.channel, + .width = stmf->output.nr_chans, + .__reserved_0 = 0, + .__reserved_1 = 0, + }; + + return copy_to_user(arg, &id, id.size) ? -EFAULT : 0; +} + +static long +stm_char_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + struct stm_file *stmf = file->private_data; + struct stm_data *stm_data = stmf->stm->data; + int err = -ENOTTY; + u64 options; + + switch (cmd) { + case STP_POLICY_ID_SET: + err = stm_char_policy_set_ioctl(stmf, (void __user *)arg); + if (err) + return err; + + return stm_char_policy_get_ioctl(stmf, (void __user *)arg); + + case STP_POLICY_ID_GET: + return stm_char_policy_get_ioctl(stmf, (void __user *)arg); + + case STP_SET_OPTIONS: + if (copy_from_user(&options, (u64 __user *)arg, sizeof(u64))) + return -EFAULT; + + if (stm_data->set_options) + err = stm_data->set_options(stm_data, + stmf->output.master, + stmf->output.channel, + stmf->output.nr_chans, + options); + + break; + default: + break; + } + + return err; +} + +#ifdef CONFIG_COMPAT +static long +stm_char_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + return stm_char_ioctl(file, cmd, (unsigned long)compat_ptr(arg)); +} +#else +#define stm_char_compat_ioctl NULL +#endif + +static const struct file_operations stm_fops = { + .open = stm_char_open, + .release = stm_char_release, + .write = stm_char_write, + .mmap = stm_char_mmap, + .unlocked_ioctl = stm_char_ioctl, + .compat_ioctl = stm_char_compat_ioctl, + .llseek = no_llseek, +}; + +static void stm_device_release(struct device *dev) +{ + struct stm_device *stm = to_stm_device(dev); + + kfree(stm); +} + +int stm_register_device(struct device *parent, struct stm_data *stm_data, + struct module *owner) +{ + struct stm_device *stm; + unsigned int nmasters; + int err = -ENOMEM; + + if (!stm_core_up) + return -EPROBE_DEFER; + + if (!stm_data->packet || !stm_data->sw_nchannels) + return -EINVAL; + + nmasters = stm_data->sw_end - stm_data->sw_start; + stm = kzalloc(sizeof(*stm) + nmasters * sizeof(void *), GFP_KERNEL); + if (!stm) + return -ENOMEM; + + stm->major = register_chrdev(0, stm_data->name, &stm_fops); + if (stm->major < 0) + goto err_free; + + device_initialize(&stm->dev); + stm->dev.devt = MKDEV(stm->major, 0); + stm->dev.class = &stm_class; + stm->dev.parent = parent; + stm->dev.release = stm_device_release; + + err = kobject_set_name(&stm->dev.kobj, "%s", stm_data->name); + if (err) + goto err_device; + + err = device_add(&stm->dev); + if (err) + goto err_device; + + spin_lock_init(&stm->link_lock); + INIT_LIST_HEAD(&stm->link_list); + + spin_lock_init(&stm->mc_lock); + mutex_init(&stm->policy_mutex); + stm->sw_nmasters = nmasters; + stm->owner = owner; + stm->data = stm_data; + stm_data->stm = stm; + + return 0; + +err_device: + put_device(&stm->dev); +err_free: + kfree(stm); + + return err; +} +EXPORT_SYMBOL_GPL(stm_register_device); + +static void __stm_source_link_drop(struct stm_source_device *src, + struct stm_device *stm); + +void stm_unregister_device(struct stm_data *stm_data) +{ + struct stm_device *stm = stm_data->stm; + struct stm_source_device *src, *iter; + int i; + + spin_lock(&stm->link_lock); + list_for_each_entry_safe(src, iter, &stm->link_list, link_entry) { + __stm_source_link_drop(src, stm); + } + spin_unlock(&stm->link_lock); + + synchronize_srcu(&stm_source_srcu); + + unregister_chrdev(stm->major, stm_data->name); + + mutex_lock(&stm->policy_mutex); + if (stm->policy) + stp_policy_unbind(stm->policy); + mutex_unlock(&stm->policy_mutex); + + for (i = 0; i < stm->sw_nmasters; i++) + stp_master_free(stm, i); + + device_unregister(&stm->dev); + stm_data->stm = NULL; +} +EXPORT_SYMBOL_GPL(stm_unregister_device); + +/** + * stm_source_link_add() - connect an stm_source device to an stm device + * @src: stm_source device + * @stm: stm device + * + * This function establishes a link from stm_source to an stm device so that + * the former can send out trace data to the latter. + * + * Return: 0 on success, -errno otherwise. + */ +static int stm_source_link_add(struct stm_source_device *src, + struct stm_device *stm) +{ + char *id; + int err; + + spin_lock(&stm->link_lock); + spin_lock(&src->link_lock); + + /* src->link is dereferenced under stm_source_srcu but not the list */ + rcu_assign_pointer(src->link, stm); + list_add_tail(&src->link_entry, &stm->link_list); + + spin_unlock(&src->link_lock); + spin_unlock(&stm->link_lock); + + id = kstrdup(src->data->name, GFP_KERNEL); + if (id) { + src->policy_node = + stp_policy_node_lookup(stm, id); + + kfree(id); + } + + err = stm_output_assign(stm, src->data->nr_chans, + src->policy_node, &src->output); + + if (src->policy_node) + stp_policy_node_put(src->policy_node); + + if (err) + goto fail_detach; + + /* this is to notify the STM device that a new link has been made */ + if (stm->data->link) + err = stm->data->link(stm->data, src->output.master, + src->output.channel); + + if (err) + goto fail_free_output; + + /* this is to let the source carry out all necessary preparations */ + if (src->data->link) + src->data->link(src->data); + + return 0; + +fail_free_output: + stm_output_free(stm, &src->output); + stm_put_device(stm); + +fail_detach: + spin_lock(&stm->link_lock); + spin_lock(&src->link_lock); + + rcu_assign_pointer(src->link, NULL); + list_del_init(&src->link_entry); + + spin_unlock(&src->link_lock); + spin_unlock(&stm->link_lock); + + return err; +} + +/** + * __stm_source_link_drop() - detach stm_source from an stm device + * @src: stm_source device + * @stm: stm device + * + * If @stm is @src::link, disconnect them from one another and put the + * reference on the @stm device. + * + * Caller must hold stm::link_lock. + */ +static void __stm_source_link_drop(struct stm_source_device *src, + struct stm_device *stm) +{ + struct stm_device *link; + + spin_lock(&src->link_lock); + link = srcu_dereference_check(src->link, &stm_source_srcu, 1); + if (WARN_ON_ONCE(link != stm)) { + spin_unlock(&src->link_lock); + return; + } + + stm_output_free(link, &src->output); + /* caller must hold stm::link_lock */ + list_del_init(&src->link_entry); + /* matches stm_find_device() from stm_source_link_store() */ + stm_put_device(link); + rcu_assign_pointer(src->link, NULL); + + spin_unlock(&src->link_lock); +} + +/** + * stm_source_link_drop() - detach stm_source from its stm device + * @src: stm_source device + * + * Unlinking means disconnecting from source's STM device; after this + * writes will be unsuccessful until it is linked to a new STM device. + * + * This will happen on "stm_source_link" sysfs attribute write to undo + * the existing link (if any), or on linked STM device's de-registration. + */ +static void stm_source_link_drop(struct stm_source_device *src) +{ + struct stm_device *stm; + int idx; + + idx = srcu_read_lock(&stm_source_srcu); + stm = srcu_dereference(src->link, &stm_source_srcu); + + if (stm) { + if (src->data->unlink) + src->data->unlink(src->data); + + spin_lock(&stm->link_lock); + __stm_source_link_drop(src, stm); + spin_unlock(&stm->link_lock); + } + + srcu_read_unlock(&stm_source_srcu, idx); +} + +static ssize_t stm_source_link_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct stm_source_device *src = to_stm_source_device(dev); + struct stm_device *stm; + int idx, ret; + + idx = srcu_read_lock(&stm_source_srcu); + stm = srcu_dereference(src->link, &stm_source_srcu); + ret = sprintf(buf, "%s\n", + stm ? dev_name(&stm->dev) : "<none>"); + srcu_read_unlock(&stm_source_srcu, idx); + + return ret; +} + +static ssize_t stm_source_link_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct stm_source_device *src = to_stm_source_device(dev); + struct stm_device *link; + int err; + + stm_source_link_drop(src); + + link = stm_find_device(buf); + if (!link) + return -EINVAL; + + err = stm_source_link_add(src, link); + if (err) + stm_put_device(link); + + return err ? : count; +} + +static DEVICE_ATTR_RW(stm_source_link); + +static struct attribute *stm_source_attrs[] = { + &dev_attr_stm_source_link.attr, + NULL, +}; + +ATTRIBUTE_GROUPS(stm_source); + +static struct class stm_source_class = { + .name = "stm_source", + .dev_groups = stm_source_groups, +}; + +static void stm_source_device_release(struct device *dev) +{ + struct stm_source_device *src = to_stm_source_device(dev); + + kfree(src); +} + +/** + * stm_source_register_device() - register an stm_source device + * @parent: parent device + * @data: device description structure + * + * This will create a device of stm_source class that can write + * data to an stm device once linked. + * + * Return: 0 on success, -errno otherwise. + */ +int stm_source_register_device(struct device *parent, + struct stm_source_data *data) +{ + struct stm_source_device *src; + int err; + + if (!stm_core_up) + return -EPROBE_DEFER; + + src = kzalloc(sizeof(*src), GFP_KERNEL); + if (!src) + return -ENOMEM; + + device_initialize(&src->dev); + src->dev.class = &stm_source_class; + src->dev.parent = parent; + src->dev.release = stm_source_device_release; + + err = kobject_set_name(&src->dev.kobj, "%s", data->name); + if (err) + goto err; + + err = device_add(&src->dev); + if (err) + goto err; + + spin_lock_init(&src->link_lock); + INIT_LIST_HEAD(&src->link_entry); + src->data = data; + data->src = src; + + return 0; + +err: + put_device(&src->dev); + kfree(src); + + return err; +} +EXPORT_SYMBOL_GPL(stm_source_register_device); + +/** + * stm_source_unregister_device() - unregister an stm_source device + * @data: device description that was used to register the device + * + * This will remove a previously created stm_source device from the system. + */ +void stm_source_unregister_device(struct stm_source_data *data) +{ + struct stm_source_device *src = data->src; + + stm_source_link_drop(src); + + device_destroy(&stm_source_class, src->dev.devt); +} +EXPORT_SYMBOL_GPL(stm_source_unregister_device); + +int stm_source_write(struct stm_source_data *data, unsigned int chan, + const char *buf, size_t count) +{ + struct stm_source_device *src = data->src; + struct stm_device *stm; + int idx; + + if (!src->output.nr_chans) + return -ENODEV; + + if (chan >= src->output.nr_chans) + return -EINVAL; + + idx = srcu_read_lock(&stm_source_srcu); + + stm = srcu_dereference(src->link, &stm_source_srcu); + if (stm) + stm_write(stm->data, src->output.master, + src->output.channel + chan, + buf, count); + else + count = -ENODEV; + + srcu_read_unlock(&stm_source_srcu, idx); + + return count; +} +EXPORT_SYMBOL_GPL(stm_source_write); + +static int __init stm_core_init(void) +{ + int err; + + err = class_register(&stm_class); + if (err) + return err; + + err = class_register(&stm_source_class); + if (err) + goto err_stm; + + err = stp_configfs_init(); + if (err) + goto err_src; + + init_srcu_struct(&stm_source_srcu); + + stm_core_up++; + + return 0; + +err_src: + class_unregister(&stm_source_class); +err_stm: + class_unregister(&stm_class); + + return err; +} + +module_init(stm_core_init); + +static void __exit stm_core_exit(void) +{ + cleanup_srcu_struct(&stm_source_srcu); + class_unregister(&stm_source_class); + class_unregister(&stm_class); + stp_configfs_exit(); +} + +module_exit(stm_core_exit); + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("System Trace Module device class"); +MODULE_AUTHOR("Alexander Shishkin <alexander.shishkin@linux.intel.com>"); diff --git a/drivers/hwtracing/stm/dummy_stm.c b/drivers/hwtracing/stm/dummy_stm.c new file mode 100644 index 000000000000..3709bef0b21f --- /dev/null +++ b/drivers/hwtracing/stm/dummy_stm.c @@ -0,0 +1,66 @@ +/* + * A dummy STM device for stm/stm_source class testing. + * Copyright (c) 2014, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * STM class implements generic infrastructure for System Trace Module devices + * as defined in MIPI STPv2 specification. + */ + +#undef DEBUG +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/stm.h> + +static ssize_t +dummy_stm_packet(struct stm_data *stm_data, unsigned int master, + unsigned int channel, unsigned int packet, unsigned int flags, + unsigned int size, const unsigned char *payload) +{ +#ifdef DEBUG + u64 pl = 0; + + if (payload) + pl = *(u64 *)payload; + + if (size < 8) + pl &= (1ull << (size * 8)) - 1; + trace_printk("[%u:%u] [pkt: %x/%x] (%llx)\n", master, channel, + packet, size, pl); +#endif + return size; +} + +static struct stm_data dummy_stm = { + .name = "dummy_stm", + .sw_start = 0x0000, + .sw_end = 0xffff, + .sw_nchannels = 0xffff, + .packet = dummy_stm_packet, +}; + +static int dummy_stm_init(void) +{ + return stm_register_device(NULL, &dummy_stm, THIS_MODULE); +} + +static void dummy_stm_exit(void) +{ + stm_unregister_device(&dummy_stm); +} + +module_init(dummy_stm_init); +module_exit(dummy_stm_exit); + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("dummy_stm device"); +MODULE_AUTHOR("Alexander Shishkin <alexander.shishkin@linux.intel.com>"); diff --git a/drivers/hwtracing/stm/policy.c b/drivers/hwtracing/stm/policy.c new file mode 100644 index 000000000000..6498a9dbb7bd --- /dev/null +++ b/drivers/hwtracing/stm/policy.c @@ -0,0 +1,529 @@ +/* + * System Trace Module (STM) master/channel allocation policy management + * Copyright (c) 2014, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * A master/channel allocation policy allows mapping string identifiers to + * master and channel ranges, where allocation can be done. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/types.h> +#include <linux/module.h> +#include <linux/device.h> +#include <linux/configfs.h> +#include <linux/slab.h> +#include <linux/stm.h> +#include "stm.h" + +/* + * STP Master/Channel allocation policy configfs layout. + */ + +struct stp_policy { + struct config_group group; + struct stm_device *stm; +}; + +struct stp_policy_node { + struct config_group group; + struct stp_policy *policy; + unsigned int first_master; + unsigned int last_master; + unsigned int first_channel; + unsigned int last_channel; +}; + +static struct configfs_subsystem stp_policy_subsys; + +void stp_policy_node_get_ranges(struct stp_policy_node *policy_node, + unsigned int *mstart, unsigned int *mend, + unsigned int *cstart, unsigned int *cend) +{ + *mstart = policy_node->first_master; + *mend = policy_node->last_master; + *cstart = policy_node->first_channel; + *cend = policy_node->last_channel; +} + +static inline char *stp_policy_node_name(struct stp_policy_node *policy_node) +{ + return policy_node->group.cg_item.ci_name ? : "<none>"; +} + +static inline struct stp_policy *to_stp_policy(struct config_item *item) +{ + return item ? + container_of(to_config_group(item), struct stp_policy, group) : + NULL; +} + +static inline struct stp_policy_node * +to_stp_policy_node(struct config_item *item) +{ + return item ? + container_of(to_config_group(item), struct stp_policy_node, + group) : + NULL; +} + +static ssize_t stp_policy_node_masters_show(struct stp_policy_node *policy_node, + char *page) +{ + ssize_t count; + + count = sprintf(page, "%u %u\n", policy_node->first_master, + policy_node->last_master); + + return count; +} + +static ssize_t +stp_policy_node_masters_store(struct stp_policy_node *policy_node, + const char *page, size_t count) +{ + unsigned int first, last; + struct stm_device *stm; + char *p = (char *)page; + ssize_t ret = -ENODEV; + + if (sscanf(p, "%u %u", &first, &last) != 2) + return -EINVAL; + + mutex_lock(&stp_policy_subsys.su_mutex); + stm = policy_node->policy->stm; + if (!stm) + goto unlock; + + /* must be within [sw_start..sw_end], which is an inclusive range */ + if (first > INT_MAX || last > INT_MAX || first > last || + first < stm->data->sw_start || + last > stm->data->sw_end) { + ret = -ERANGE; + goto unlock; + } + + ret = count; + policy_node->first_master = first; + policy_node->last_master = last; + +unlock: + mutex_unlock(&stp_policy_subsys.su_mutex); + + return ret; +} + +static ssize_t +stp_policy_node_channels_show(struct stp_policy_node *policy_node, char *page) +{ + ssize_t count; + + count = sprintf(page, "%u %u\n", policy_node->first_channel, + policy_node->last_channel); + + return count; +} + +static ssize_t +stp_policy_node_channels_store(struct stp_policy_node *policy_node, + const char *page, size_t count) +{ + unsigned int first, last; + struct stm_device *stm; + char *p = (char *)page; + ssize_t ret = -ENODEV; + + if (sscanf(p, "%u %u", &first, &last) != 2) + return -EINVAL; + + mutex_lock(&stp_policy_subsys.su_mutex); + stm = policy_node->policy->stm; + if (!stm) + goto unlock; + + if (first > INT_MAX || last > INT_MAX || first > last || + last >= stm->data->sw_nchannels) { + ret = -ERANGE; + goto unlock; + } + + ret = count; + policy_node->first_channel = first; + policy_node->last_channel = last; + +unlock: + mutex_unlock(&stp_policy_subsys.su_mutex); + + return ret; +} + +static void stp_policy_node_release(struct config_item *item) +{ + kfree(to_stp_policy_node(item)); +} + +struct stp_policy_node_attribute { + struct configfs_attribute attr; + ssize_t (*show)(struct stp_policy_node *, char *); + ssize_t (*store)(struct stp_policy_node *, const char *, size_t); +}; + +static ssize_t stp_policy_node_attr_show(struct config_item *item, + struct configfs_attribute *attr, + char *page) +{ + struct stp_policy_node *policy_node = to_stp_policy_node(item); + struct stp_policy_node_attribute *pn_attr = + container_of(attr, struct stp_policy_node_attribute, attr); + ssize_t count = 0; + + if (pn_attr->show) + count = pn_attr->show(policy_node, page); + + return count; +} + +static ssize_t stp_policy_node_attr_store(struct config_item *item, + struct configfs_attribute *attr, + const char *page, size_t len) +{ + struct stp_policy_node *policy_node = to_stp_policy_node(item); + struct stp_policy_node_attribute *pn_attr = + container_of(attr, struct stp_policy_node_attribute, attr); + ssize_t count = -EINVAL; + + if (pn_attr->store) + count = pn_attr->store(policy_node, page, len); + + return count; +} + +static struct configfs_item_operations stp_policy_node_item_ops = { + .release = stp_policy_node_release, + .show_attribute = stp_policy_node_attr_show, + .store_attribute = stp_policy_node_attr_store, +}; + +static struct stp_policy_node_attribute stp_policy_node_attr_range = { + .attr = { + .ca_owner = THIS_MODULE, + .ca_name = "masters", + .ca_mode = S_IRUGO | S_IWUSR, + }, + .show = stp_policy_node_masters_show, + .store = stp_policy_node_masters_store, +}; + +static struct stp_policy_node_attribute stp_policy_node_attr_channels = { + .attr = { + .ca_owner = THIS_MODULE, + .ca_name = "channels", + .ca_mode = S_IRUGO | S_IWUSR, + }, + .show = stp_policy_node_channels_show, + .store = stp_policy_node_channels_store, +}; + +static struct configfs_attribute *stp_policy_node_attrs[] = { + &stp_policy_node_attr_range.attr, + &stp_policy_node_attr_channels.attr, + NULL, +}; + +static struct config_item_type stp_policy_type; +static struct config_item_type stp_policy_node_type; + +static struct config_group * +stp_policy_node_make(struct config_group *group, const char *name) +{ + struct stp_policy_node *policy_node, *parent_node; + struct stp_policy *policy; + + if (group->cg_item.ci_type == &stp_policy_type) { + policy = container_of(group, struct stp_policy, group); + } else { + parent_node = container_of(group, struct stp_policy_node, + group); + policy = parent_node->policy; + } + + if (!policy->stm) + return ERR_PTR(-ENODEV); + + policy_node = kzalloc(sizeof(struct stp_policy_node), GFP_KERNEL); + if (!policy_node) + return ERR_PTR(-ENOMEM); + + config_group_init_type_name(&policy_node->group, name, + &stp_policy_node_type); + + policy_node->policy = policy; + + /* default values for the attributes */ + policy_node->first_master = policy->stm->data->sw_start; + policy_node->last_master = policy->stm->data->sw_end; + policy_node->first_channel = 0; + policy_node->last_channel = policy->stm->data->sw_nchannels - 1; + + return &policy_node->group; +} + +static void +stp_policy_node_drop(struct config_group *group, struct config_item *item) +{ + config_item_put(item); +} + +static struct configfs_group_operations stp_policy_node_group_ops = { + .make_group = stp_policy_node_make, + .drop_item = stp_policy_node_drop, +}; + +static struct config_item_type stp_policy_node_type = { + .ct_item_ops = &stp_policy_node_item_ops, + .ct_group_ops = &stp_policy_node_group_ops, + .ct_attrs = stp_policy_node_attrs, + .ct_owner = THIS_MODULE, +}; + +/* + * Root group: policies. + */ +static struct configfs_attribute stp_policy_attr_device = { + .ca_owner = THIS_MODULE, + .ca_name = "device", + .ca_mode = S_IRUGO, +}; + +static struct configfs_attribute *stp_policy_attrs[] = { + &stp_policy_attr_device, + NULL, +}; + +static ssize_t stp_policy_attr_show(struct config_item *item, + struct configfs_attribute *attr, + char *page) +{ + struct stp_policy *policy = to_stp_policy(item); + ssize_t count; + + count = sprintf(page, "%s\n", + (policy && policy->stm) ? + policy->stm->data->name : + "<none>"); + + return count; +} + +void stp_policy_unbind(struct stp_policy *policy) +{ + struct stm_device *stm = policy->stm; + + if (WARN_ON_ONCE(!policy->stm)) + return; + + mutex_lock(&stm->policy_mutex); + stm->policy = NULL; + mutex_unlock(&stm->policy_mutex); + + policy->stm = NULL; + + stm_put_device(stm); +} + +static void stp_policy_release(struct config_item *item) +{ + struct stp_policy *policy = to_stp_policy(item); + + stp_policy_unbind(policy); + kfree(policy); +} + +static struct configfs_item_operations stp_policy_item_ops = { + .release = stp_policy_release, + .show_attribute = stp_policy_attr_show, +}; + +static struct configfs_group_operations stp_policy_group_ops = { + .make_group = stp_policy_node_make, +}; + +static struct config_item_type stp_policy_type = { + .ct_item_ops = &stp_policy_item_ops, + .ct_group_ops = &stp_policy_group_ops, + .ct_attrs = stp_policy_attrs, + .ct_owner = THIS_MODULE, +}; + +static struct config_group * +stp_policies_make(struct config_group *group, const char *name) +{ + struct config_group *ret; + struct stm_device *stm; + char *devname, *p; + + devname = kasprintf(GFP_KERNEL, "%s", name); + if (!devname) + return ERR_PTR(-ENOMEM); + + /* + * node must look like <device_name>.<policy_name>, where + * <device_name> is the name of an existing stm device and + * <policy_name> is an arbitrary string + */ + p = strchr(devname, '.'); + if (!p) { + kfree(devname); + return ERR_PTR(-EINVAL); + } + + *p++ = '\0'; + + stm = stm_find_device(devname); + kfree(devname); + + if (!stm) + return ERR_PTR(-ENODEV); + + mutex_lock(&stm->policy_mutex); + if (stm->policy) { + ret = ERR_PTR(-EBUSY); + goto unlock_policy; + } + + stm->policy = kzalloc(sizeof(*stm->policy), GFP_KERNEL); + if (!stm->policy) { + ret = ERR_PTR(-ENOMEM); + goto unlock_policy; + } + + config_group_init_type_name(&stm->policy->group, name, + &stp_policy_type); + stm->policy->stm = stm; + + ret = &stm->policy->group; + +unlock_policy: + mutex_unlock(&stm->policy_mutex); + + if (IS_ERR(ret)) + stm_put_device(stm); + + return ret; +} + +static struct configfs_group_operations stp_policies_group_ops = { + .make_group = stp_policies_make, +}; + +static struct config_item_type stp_policies_type = { + .ct_group_ops = &stp_policies_group_ops, + .ct_owner = THIS_MODULE, +}; + +static struct configfs_subsystem stp_policy_subsys = { + .su_group = { + .cg_item = { + .ci_namebuf = "stp-policy", + .ci_type = &stp_policies_type, + }, + }, +}; + +/* + * Lock the policy mutex from the outside + */ +static struct stp_policy_node * +__stp_policy_node_lookup(struct stp_policy *policy, char *s) +{ + struct stp_policy_node *policy_node, *ret; + struct list_head *head = &policy->group.cg_children; + struct config_item *item; + char *start, *end = s; + + if (list_empty(head)) + return NULL; + + /* return the first entry if everything else fails */ + item = list_entry(head->next, struct config_item, ci_entry); + ret = to_stp_policy_node(item); + +next: + for (;;) { + start = strsep(&end, "/"); + if (!start) + break; + + if (!*start) + continue; + + list_for_each_entry(item, head, ci_entry) { + policy_node = to_stp_policy_node(item); + + if (!strcmp(start, + policy_node->group.cg_item.ci_name)) { + ret = policy_node; + + if (!end) + goto out; + + head = &policy_node->group.cg_children; + goto next; + } + } + break; + } + +out: + return ret; +} + + +struct stp_policy_node * +stp_policy_node_lookup(struct stm_device *stm, char *s) +{ + struct stp_policy_node *policy_node = NULL; + + mutex_lock(&stp_policy_subsys.su_mutex); + + mutex_lock(&stm->policy_mutex); + if (stm->policy) + policy_node = __stp_policy_node_lookup(stm->policy, s); + mutex_unlock(&stm->policy_mutex); + + if (policy_node) + config_item_get(&policy_node->group.cg_item); + mutex_unlock(&stp_policy_subsys.su_mutex); + + return policy_node; +} + +void stp_policy_node_put(struct stp_policy_node *policy_node) +{ + config_item_put(&policy_node->group.cg_item); +} + +int __init stp_configfs_init(void) +{ + int err; + + config_group_init(&stp_policy_subsys.su_group); + mutex_init(&stp_policy_subsys.su_mutex); + err = configfs_register_subsystem(&stp_policy_subsys); + + return err; +} + +void __exit stp_configfs_exit(void) +{ + configfs_unregister_subsystem(&stp_policy_subsys); +} diff --git a/drivers/hwtracing/stm/stm.h b/drivers/hwtracing/stm/stm.h new file mode 100644 index 000000000000..95ece0292c99 --- /dev/null +++ b/drivers/hwtracing/stm/stm.h @@ -0,0 +1,87 @@ +/* + * System Trace Module (STM) infrastructure + * Copyright (c) 2014, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * STM class implements generic infrastructure for System Trace Module devices + * as defined in MIPI STPv2 specification. + */ + +#ifndef _STM_STM_H_ +#define _STM_STM_H_ + +struct stp_policy; +struct stp_policy_node; + +struct stp_policy_node * +stp_policy_node_lookup(struct stm_device *stm, char *s); +void stp_policy_node_put(struct stp_policy_node *policy_node); +void stp_policy_unbind(struct stp_policy *policy); + +void stp_policy_node_get_ranges(struct stp_policy_node *policy_node, + unsigned int *mstart, unsigned int *mend, + unsigned int *cstart, unsigned int *cend); +int stp_configfs_init(void); +void stp_configfs_exit(void); + +struct stp_master { + unsigned int nr_free; + unsigned long chan_map[0]; +}; + +struct stm_device { + struct device dev; + struct module *owner; + struct stp_policy *policy; + struct mutex policy_mutex; + int major; + unsigned int sw_nmasters; + struct stm_data *data; + spinlock_t link_lock; + struct list_head link_list; + /* master allocation */ + spinlock_t mc_lock; + struct stp_master *masters[0]; +}; + +#define to_stm_device(_d) \ + container_of((_d), struct stm_device, dev) + +struct stm_output { + unsigned int master; + unsigned int channel; + unsigned int nr_chans; +}; + +struct stm_file { + struct stm_device *stm; + struct stp_policy_node *policy_node; + struct stm_output output; +}; + +struct stm_device *stm_find_device(const char *name); +void stm_put_device(struct stm_device *stm); + +struct stm_source_device { + struct device dev; + struct stm_source_data *data; + spinlock_t link_lock; + struct stm_device __rcu *link; + struct list_head link_entry; + /* one output per stm_source device */ + struct stp_policy_node *policy_node; + struct stm_output output; +}; + +#define to_stm_source_device(_d) \ + container_of((_d), struct stm_source_device, dev) + +#endif /* _STM_STM_H_ */ diff --git a/drivers/memory/fsl-corenet-cf.c b/drivers/memory/fsl-corenet-cf.c index d708ded5457b..662d050243be 100644 --- a/drivers/memory/fsl-corenet-cf.c +++ b/drivers/memory/fsl-corenet-cf.c @@ -61,6 +61,7 @@ static const struct of_device_id ccf_matches[] = { }, {} }; +MODULE_DEVICE_TABLE(of, ccf_matches); struct ccf_err_regs { u32 errdet; /* 0x00 Error Detect Register */ diff --git a/drivers/memory/ti-aemif.c b/drivers/memory/ti-aemif.c index ca7d97a9a9ba..a579a0f25840 100644 --- a/drivers/memory/ti-aemif.c +++ b/drivers/memory/ti-aemif.c @@ -324,6 +324,7 @@ static const struct of_device_id aemif_of_match[] = { { .compatible = "ti,da850-aemif", }, {}, }; +MODULE_DEVICE_TABLE(of, aemif_of_match); static int aemif_probe(struct platform_device *pdev) { diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index ccccc2943f2f..22892c701c63 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -414,7 +414,7 @@ config TI_DAC7512 config VMWARE_BALLOON tristate "VMware Balloon Driver" - depends on X86 && HYPERVISOR_GUEST + depends on VMWARE_VMCI && X86 && HYPERVISOR_GUEST help This is VMware physical memory management driver which acts like a "balloon" that can be inflated to reclaim physical pages diff --git a/drivers/misc/ad525x_dpot-i2c.c b/drivers/misc/ad525x_dpot-i2c.c index d11187d36ddd..4f832002d116 100644 --- a/drivers/misc/ad525x_dpot-i2c.c +++ b/drivers/misc/ad525x_dpot-i2c.c @@ -117,4 +117,3 @@ module_i2c_driver(ad_dpot_i2c_driver); MODULE_AUTHOR("Michael Hennerich <hennerich@blackfin.uclinux.org>"); MODULE_DESCRIPTION("digital potentiometer I2C bus driver"); MODULE_LICENSE("GPL"); -MODULE_ALIAS("i2c:ad_dpot"); diff --git a/drivers/misc/genwqe/card_base.h b/drivers/misc/genwqe/card_base.h index e7353449874b..cb851c14ca4b 100644 --- a/drivers/misc/genwqe/card_base.h +++ b/drivers/misc/genwqe/card_base.h @@ -514,7 +514,7 @@ int __genwqe_execute_ddcb(struct genwqe_dev *cd, /** * __genwqe_execute_raw_ddcb() - Execute DDCB request without addr translation * - * This version will not do address translation or any modifcation of + * This version will not do address translation or any modification of * the DDCB data. It is used e.g. for the MoveFlash DDCB which is * entirely prepared by the driver itself. That means the appropriate * DMA addresses are already in the DDCB and do not need any diff --git a/drivers/misc/genwqe/card_ddcb.c b/drivers/misc/genwqe/card_ddcb.c index 6d51e5f08664..353ee0cc733d 100644 --- a/drivers/misc/genwqe/card_ddcb.c +++ b/drivers/misc/genwqe/card_ddcb.c @@ -203,7 +203,7 @@ struct genwqe_ddcb_cmd *ddcb_requ_alloc(void) { struct ddcb_requ *req; - req = kzalloc(sizeof(*req), GFP_ATOMIC); + req = kzalloc(sizeof(*req), GFP_KERNEL); if (!req) return NULL; diff --git a/drivers/misc/genwqe/card_dev.c b/drivers/misc/genwqe/card_dev.c index 70e62d6a3231..7f1b282d7d96 100644 --- a/drivers/misc/genwqe/card_dev.c +++ b/drivers/misc/genwqe/card_dev.c @@ -449,7 +449,7 @@ static int genwqe_mmap(struct file *filp, struct vm_area_struct *vma) if (get_order(vsize) > MAX_ORDER) return -ENOMEM; - dma_map = kzalloc(sizeof(struct dma_mapping), GFP_ATOMIC); + dma_map = kzalloc(sizeof(struct dma_mapping), GFP_KERNEL); if (dma_map == NULL) return -ENOMEM; @@ -785,7 +785,7 @@ static int genwqe_pin_mem(struct genwqe_file *cfile, struct genwqe_mem *m) map_addr = (m->addr & PAGE_MASK); map_size = round_up(m->size + (m->addr & ~PAGE_MASK), PAGE_SIZE); - dma_map = kzalloc(sizeof(struct dma_mapping), GFP_ATOMIC); + dma_map = kzalloc(sizeof(struct dma_mapping), GFP_KERNEL); if (dma_map == NULL) return -ENOMEM; diff --git a/drivers/misc/genwqe/card_utils.c b/drivers/misc/genwqe/card_utils.c index 1ca94e6fa8fb..222367cc8c81 100644 --- a/drivers/misc/genwqe/card_utils.c +++ b/drivers/misc/genwqe/card_utils.c @@ -220,7 +220,8 @@ void *__genwqe_alloc_consistent(struct genwqe_dev *cd, size_t size, if (get_order(size) > MAX_ORDER) return NULL; - return pci_alloc_consistent(cd->pci_dev, size, dma_handle); + return dma_alloc_coherent(&cd->pci_dev->dev, size, dma_handle, + GFP_KERNEL); } void __genwqe_free_consistent(struct genwqe_dev *cd, size_t size, @@ -229,7 +230,7 @@ void __genwqe_free_consistent(struct genwqe_dev *cd, size_t size, if (vaddr == NULL) return; - pci_free_consistent(cd->pci_dev, size, vaddr, dma_handle); + dma_free_coherent(&cd->pci_dev->dev, size, vaddr, dma_handle); } static void genwqe_unmap_pages(struct genwqe_dev *cd, dma_addr_t *dma_list, diff --git a/drivers/misc/hpilo.c b/drivers/misc/hpilo.c index b83e3ca12a41..d6a901cd4222 100644 --- a/drivers/misc/hpilo.c +++ b/drivers/misc/hpilo.c @@ -2,7 +2,7 @@ * Driver for the HP iLO management processor. * * Copyright (C) 2008 Hewlett-Packard Development Company, L.P. - * David Altobelli <david.altobelli@hp.com> + * David Altobelli <david.altobelli@hpe.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -902,11 +902,11 @@ static void __exit ilo_exit(void) MODULE_VERSION("1.4.1"); MODULE_ALIAS(ILO_NAME); MODULE_DESCRIPTION(ILO_NAME); -MODULE_AUTHOR("David Altobelli <david.altobelli@hp.com>"); +MODULE_AUTHOR("David Altobelli <david.altobelli@hpe.com>"); MODULE_LICENSE("GPL v2"); module_param(max_ccb, uint, 0444); -MODULE_PARM_DESC(max_ccb, "Maximum number of HP iLO channels to attach (16)"); +MODULE_PARM_DESC(max_ccb, "Maximum number of HP iLO channels to attach (8-24)(default=16)"); module_init(ilo_init); module_exit(ilo_exit); diff --git a/drivers/misc/kgdbts.c b/drivers/misc/kgdbts.c index 9a60bd4d3c49..99635dd9dbac 100644 --- a/drivers/misc/kgdbts.c +++ b/drivers/misc/kgdbts.c @@ -1112,6 +1112,7 @@ static int __init init_kgdbts(void) return configure_kgdbts(); } +device_initcall(init_kgdbts); static int kgdbts_get_char(void) { @@ -1180,10 +1181,9 @@ static struct kgdb_io kgdbts_io_ops = { .post_exception = kgdbts_post_exp_handler, }; -module_init(init_kgdbts); +/* + * not really modular, but the easiest way to keep compat with existing + * bootargs behaviour is to continue using module_param here. + */ module_param_call(kgdbts, param_set_kgdbts_var, param_get_string, &kps, 0644); MODULE_PARM_DESC(kgdbts, "<A|V1|V2>[F#|S#][N#]"); -MODULE_DESCRIPTION("KGDB Test Suite"); -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Wind River Systems, Inc."); - diff --git a/drivers/misc/mei/bus-fixup.c b/drivers/misc/mei/bus-fixup.c index 3e536ca85f7d..020de5919c21 100644 --- a/drivers/misc/mei/bus-fixup.c +++ b/drivers/misc/mei/bus-fixup.c @@ -285,11 +285,11 @@ static struct mei_fixup { }; /** - * mei_cl_dev_fixup - run fixup handlers + * mei_cldev_fixup - run fixup handlers * * @cldev: me client device */ -void mei_cl_dev_fixup(struct mei_cl_device *cldev) +void mei_cl_bus_dev_fixup(struct mei_cl_device *cldev) { struct mei_fixup *f; const uuid_le *uuid = mei_me_cl_uuid(cldev->me_cl); diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c index eef1c6b46ad8..832085207a7f 100644 --- a/drivers/misc/mei/bus.c +++ b/drivers/misc/mei/bus.c @@ -165,7 +165,7 @@ out: } /** - * mei_cl_send - me device send (write) + * mei_cldev_send - me device send (write) * * @cldev: me client device * @buf: buffer to send @@ -173,7 +173,7 @@ out: * * Return: written size in bytes or < 0 on error */ -ssize_t mei_cl_send(struct mei_cl_device *cldev, u8 *buf, size_t length) +ssize_t mei_cldev_send(struct mei_cl_device *cldev, u8 *buf, size_t length) { struct mei_cl *cl = cldev->cl; @@ -182,10 +182,10 @@ ssize_t mei_cl_send(struct mei_cl_device *cldev, u8 *buf, size_t length) return __mei_cl_send(cl, buf, length, 1); } -EXPORT_SYMBOL_GPL(mei_cl_send); +EXPORT_SYMBOL_GPL(mei_cldev_send); /** - * mei_cl_recv - client receive (read) + * mei_cldev_recv - client receive (read) * * @cldev: me client device * @buf: buffer to send @@ -193,7 +193,7 @@ EXPORT_SYMBOL_GPL(mei_cl_send); * * Return: read size in bytes of < 0 on error */ -ssize_t mei_cl_recv(struct mei_cl_device *cldev, u8 *buf, size_t length) +ssize_t mei_cldev_recv(struct mei_cl_device *cldev, u8 *buf, size_t length) { struct mei_cl *cl = cldev->cl; @@ -202,15 +202,15 @@ ssize_t mei_cl_recv(struct mei_cl_device *cldev, u8 *buf, size_t length) return __mei_cl_recv(cl, buf, length); } -EXPORT_SYMBOL_GPL(mei_cl_recv); +EXPORT_SYMBOL_GPL(mei_cldev_recv); /** - * mei_bus_event_work - dispatch rx event for a bus device + * mei_cl_bus_event_work - dispatch rx event for a bus device * and schedule new work * * @work: work */ -static void mei_bus_event_work(struct work_struct *work) +static void mei_cl_bus_event_work(struct work_struct *work) { struct mei_cl_device *cldev; @@ -272,7 +272,7 @@ void mei_cl_bus_rx_event(struct mei_cl *cl) } /** - * mei_cl_register_event_cb - register event callback + * mei_cldev_register_event_cb - register event callback * * @cldev: me client devices * @event_cb: callback function @@ -283,9 +283,9 @@ void mei_cl_bus_rx_event(struct mei_cl *cl) * -EALREADY if an callback is already registered * <0 on other errors */ -int mei_cl_register_event_cb(struct mei_cl_device *cldev, - unsigned long events_mask, - mei_cl_event_cb_t event_cb, void *context) +int mei_cldev_register_event_cb(struct mei_cl_device *cldev, + unsigned long events_mask, + mei_cldev_event_cb_t event_cb, void *context) { int ret; @@ -296,7 +296,7 @@ int mei_cl_register_event_cb(struct mei_cl_device *cldev, cldev->events_mask = events_mask; cldev->event_cb = event_cb; cldev->event_context = context; - INIT_WORK(&cldev->event_work, mei_bus_event_work); + INIT_WORK(&cldev->event_work, mei_cl_bus_event_work); if (cldev->events_mask & BIT(MEI_CL_EVENT_RX)) { ret = mei_cl_read_start(cldev->cl, 0, NULL); @@ -314,42 +314,81 @@ int mei_cl_register_event_cb(struct mei_cl_device *cldev, return 0; } -EXPORT_SYMBOL_GPL(mei_cl_register_event_cb); +EXPORT_SYMBOL_GPL(mei_cldev_register_event_cb); /** - * mei_cl_get_drvdata - driver data getter + * mei_cldev_get_drvdata - driver data getter * * @cldev: mei client device * * Return: driver private data */ -void *mei_cl_get_drvdata(const struct mei_cl_device *cldev) +void *mei_cldev_get_drvdata(const struct mei_cl_device *cldev) { return dev_get_drvdata(&cldev->dev); } -EXPORT_SYMBOL_GPL(mei_cl_get_drvdata); +EXPORT_SYMBOL_GPL(mei_cldev_get_drvdata); /** - * mei_cl_set_drvdata - driver data setter + * mei_cldev_set_drvdata - driver data setter * * @cldev: mei client device * @data: data to store */ -void mei_cl_set_drvdata(struct mei_cl_device *cldev, void *data) +void mei_cldev_set_drvdata(struct mei_cl_device *cldev, void *data) { dev_set_drvdata(&cldev->dev, data); } -EXPORT_SYMBOL_GPL(mei_cl_set_drvdata); +EXPORT_SYMBOL_GPL(mei_cldev_set_drvdata); /** - * mei_cl_enable_device - enable me client device + * mei_cldev_uuid - return uuid of the underlying me client + * + * @cldev: mei client device + * + * Return: me client uuid + */ +const uuid_le *mei_cldev_uuid(const struct mei_cl_device *cldev) +{ + return mei_me_cl_uuid(cldev->me_cl); +} +EXPORT_SYMBOL_GPL(mei_cldev_uuid); + +/** + * mei_cldev_ver - return protocol version of the underlying me client + * + * @cldev: mei client device + * + * Return: me client protocol version + */ +u8 mei_cldev_ver(const struct mei_cl_device *cldev) +{ + return mei_me_cl_ver(cldev->me_cl); +} +EXPORT_SYMBOL_GPL(mei_cldev_ver); + +/** + * mei_cldev_enabled - check whether the device is enabled + * + * @cldev: mei client device + * + * Return: true if me client is initialized and connected + */ +bool mei_cldev_enabled(struct mei_cl_device *cldev) +{ + return cldev->cl && mei_cl_is_connected(cldev->cl); +} +EXPORT_SYMBOL_GPL(mei_cldev_enabled); + +/** + * mei_cldev_enable_device - enable me client device * create connection with me client * * @cldev: me client device * * Return: 0 on success and < 0 on error */ -int mei_cl_enable_device(struct mei_cl_device *cldev) +int mei_cldev_enable(struct mei_cl_device *cldev) { struct mei_device *bus = cldev->bus; struct mei_cl *cl; @@ -389,17 +428,17 @@ out: return ret; } -EXPORT_SYMBOL_GPL(mei_cl_enable_device); +EXPORT_SYMBOL_GPL(mei_cldev_enable); /** - * mei_cl_disable_device - disable me client device + * mei_cldev_disable - disable me client device * disconnect form the me client * * @cldev: me client device * * Return: 0 on success and < 0 on error */ -int mei_cl_disable_device(struct mei_cl_device *cldev) +int mei_cldev_disable(struct mei_cl_device *cldev) { struct mei_device *bus; struct mei_cl *cl; @@ -437,7 +476,7 @@ out: mutex_unlock(&bus->device_lock); return err; } -EXPORT_SYMBOL_GPL(mei_cl_disable_device); +EXPORT_SYMBOL_GPL(mei_cldev_disable); /** * mei_cl_device_find - find matching entry in the driver id table @@ -453,17 +492,26 @@ struct mei_cl_device_id *mei_cl_device_find(struct mei_cl_device *cldev, { const struct mei_cl_device_id *id; const uuid_le *uuid; + u8 version; + bool match; uuid = mei_me_cl_uuid(cldev->me_cl); + version = mei_me_cl_ver(cldev->me_cl); id = cldrv->id_table; while (uuid_le_cmp(NULL_UUID_LE, id->uuid)) { if (!uuid_le_cmp(*uuid, id->uuid)) { + match = true; - if (!cldev->name[0]) - return id; + if (cldev->name[0]) + if (strncmp(cldev->name, id->name, + sizeof(id->name))) + match = false; - if (!strncmp(cldev->name, id->name, sizeof(id->name))) + if (id->version != MEI_CL_VERSION_ANY) + if (id->version != version) + match = false; + if (match) return id; } @@ -590,6 +638,19 @@ static ssize_t uuid_show(struct device *dev, struct device_attribute *a, } static DEVICE_ATTR_RO(uuid); +static ssize_t version_show(struct device *dev, struct device_attribute *a, + char *buf) +{ + struct mei_cl_device *cldev = to_mei_cl_device(dev); + u8 version = mei_me_cl_ver(cldev->me_cl); + size_t len; + + len = snprintf(buf, PAGE_SIZE, "%02X", version); + + return (len >= PAGE_SIZE) ? (PAGE_SIZE - 1) : len; +} +static DEVICE_ATTR_RO(version); + static ssize_t modalias_show(struct device *dev, struct device_attribute *a, char *buf) { @@ -597,20 +658,19 @@ static ssize_t modalias_show(struct device *dev, struct device_attribute *a, const uuid_le *uuid = mei_me_cl_uuid(cldev->me_cl); size_t len; - len = snprintf(buf, PAGE_SIZE, "mei:%s:" MEI_CL_UUID_FMT ":", - cldev->name, MEI_CL_UUID_ARGS(uuid->b)); - + len = snprintf(buf, PAGE_SIZE, "mei:%s:%pUl:", cldev->name, uuid); return (len >= PAGE_SIZE) ? (PAGE_SIZE - 1) : len; } static DEVICE_ATTR_RO(modalias); -static struct attribute *mei_cl_dev_attrs[] = { +static struct attribute *mei_cldev_attrs[] = { &dev_attr_name.attr, &dev_attr_uuid.attr, + &dev_attr_version.attr, &dev_attr_modalias.attr, NULL, }; -ATTRIBUTE_GROUPS(mei_cl_dev); +ATTRIBUTE_GROUPS(mei_cldev); /** * mei_cl_device_uevent - me client bus uevent handler @@ -624,6 +684,10 @@ static int mei_cl_device_uevent(struct device *dev, struct kobj_uevent_env *env) { struct mei_cl_device *cldev = to_mei_cl_device(dev); const uuid_le *uuid = mei_me_cl_uuid(cldev->me_cl); + u8 version = mei_me_cl_ver(cldev->me_cl); + + if (add_uevent_var(env, "MEI_CL_VERSION=%d", version)) + return -ENOMEM; if (add_uevent_var(env, "MEI_CL_UUID=%pUl", uuid)) return -ENOMEM; @@ -631,8 +695,8 @@ static int mei_cl_device_uevent(struct device *dev, struct kobj_uevent_env *env) if (add_uevent_var(env, "MEI_CL_NAME=%s", cldev->name)) return -ENOMEM; - if (add_uevent_var(env, "MODALIAS=mei:%s:" MEI_CL_UUID_FMT ":", - cldev->name, MEI_CL_UUID_ARGS(uuid->b))) + if (add_uevent_var(env, "MODALIAS=mei:%s:%pUl:%02X:", + cldev->name, uuid, version)) return -ENOMEM; return 0; @@ -640,7 +704,7 @@ static int mei_cl_device_uevent(struct device *dev, struct kobj_uevent_env *env) static struct bus_type mei_cl_bus_type = { .name = "mei", - .dev_groups = mei_cl_dev_groups, + .dev_groups = mei_cldev_groups, .match = mei_cl_device_match, .probe = mei_cl_device_probe, .remove = mei_cl_device_remove, @@ -661,7 +725,7 @@ static void mei_dev_bus_put(struct mei_device *bus) put_device(bus->dev); } -static void mei_cl_dev_release(struct device *dev) +static void mei_cl_bus_dev_release(struct device *dev) { struct mei_cl_device *cldev = to_mei_cl_device(dev); @@ -674,19 +738,19 @@ static void mei_cl_dev_release(struct device *dev) } static struct device_type mei_cl_device_type = { - .release = mei_cl_dev_release, + .release = mei_cl_bus_dev_release, }; /** - * mei_cl_dev_alloc - initialize and allocate mei client device + * mei_cl_bus_dev_alloc - initialize and allocate mei client device * * @bus: mei device * @me_cl: me client * * Return: allocated device structur or NULL on allocation failure */ -static struct mei_cl_device *mei_cl_dev_alloc(struct mei_device *bus, - struct mei_me_client *me_cl) +static struct mei_cl_device *mei_cl_bus_dev_alloc(struct mei_device *bus, + struct mei_me_client *me_cl) { struct mei_cl_device *cldev; @@ -715,15 +779,17 @@ static struct mei_cl_device *mei_cl_dev_alloc(struct mei_device *bus, * * Return: true if the device is eligible for enumeration */ -static bool mei_cl_dev_setup(struct mei_device *bus, - struct mei_cl_device *cldev) +static bool mei_cl_bus_dev_setup(struct mei_device *bus, + struct mei_cl_device *cldev) { cldev->do_match = 1; - mei_cl_dev_fixup(cldev); + mei_cl_bus_dev_fixup(cldev); if (cldev->do_match) - dev_set_name(&cldev->dev, "mei:%s:%pUl", - cldev->name, mei_me_cl_uuid(cldev->me_cl)); + dev_set_name(&cldev->dev, "mei:%s:%pUl:%02X", + cldev->name, + mei_me_cl_uuid(cldev->me_cl), + mei_me_cl_ver(cldev->me_cl)); return cldev->do_match == 1; } @@ -739,7 +805,9 @@ static int mei_cl_bus_dev_add(struct mei_cl_device *cldev) { int ret; - dev_dbg(cldev->bus->dev, "adding %pUL\n", mei_me_cl_uuid(cldev->me_cl)); + dev_dbg(cldev->bus->dev, "adding %pUL:%02X\n", + mei_me_cl_uuid(cldev->me_cl), + mei_me_cl_ver(cldev->me_cl)); ret = device_add(&cldev->dev); if (!ret) cldev->is_added = 1; @@ -804,13 +872,14 @@ void mei_cl_bus_remove_devices(struct mei_device *bus) /** - * mei_cl_dev_init - allocate and initializes an mei client devices + * mei_cl_bus_dev_init - allocate and initializes an mei client devices * based on me client * * @bus: mei device * @me_cl: me client */ -static void mei_cl_dev_init(struct mei_device *bus, struct mei_me_client *me_cl) +static void mei_cl_bus_dev_init(struct mei_device *bus, + struct mei_me_client *me_cl) { struct mei_cl_device *cldev; @@ -819,7 +888,7 @@ static void mei_cl_dev_init(struct mei_device *bus, struct mei_me_client *me_cl) if (me_cl->bus_added) return; - cldev = mei_cl_dev_alloc(bus, me_cl); + cldev = mei_cl_bus_dev_alloc(bus, me_cl); if (!cldev) return; @@ -843,7 +912,7 @@ void mei_cl_bus_rescan(struct mei_device *bus) down_read(&bus->me_clients_rwsem); list_for_each_entry(me_cl, &bus->me_clients, list) - mei_cl_dev_init(bus, me_cl); + mei_cl_bus_dev_init(bus, me_cl); up_read(&bus->me_clients_rwsem); mutex_lock(&bus->cl_bus_lock); @@ -857,7 +926,7 @@ void mei_cl_bus_rescan(struct mei_device *bus) if (cldev->is_added) continue; - if (mei_cl_dev_setup(bus, cldev)) + if (mei_cl_bus_dev_setup(bus, cldev)) mei_cl_bus_dev_add(cldev); else { list_del_init(&cldev->bus_list); @@ -869,7 +938,8 @@ void mei_cl_bus_rescan(struct mei_device *bus) dev_dbg(bus->dev, "rescan end"); } -int __mei_cl_driver_register(struct mei_cl_driver *cldrv, struct module *owner) +int __mei_cldev_driver_register(struct mei_cl_driver *cldrv, + struct module *owner) { int err; @@ -885,15 +955,15 @@ int __mei_cl_driver_register(struct mei_cl_driver *cldrv, struct module *owner) return 0; } -EXPORT_SYMBOL_GPL(__mei_cl_driver_register); +EXPORT_SYMBOL_GPL(__mei_cldev_driver_register); -void mei_cl_driver_unregister(struct mei_cl_driver *cldrv) +void mei_cldev_driver_unregister(struct mei_cl_driver *cldrv) { driver_unregister(&cldrv->driver); pr_debug("mei: driver [%s] unregistered\n", cldrv->driver.name); } -EXPORT_SYMBOL_GPL(mei_cl_driver_unregister); +EXPORT_SYMBOL_GPL(mei_cldev_driver_unregister); int __init mei_cl_bus_init(void) diff --git a/drivers/misc/mei/client.h b/drivers/misc/mei/client.h index 1c7cad07d731..04e1aa39243f 100644 --- a/drivers/misc/mei/client.h +++ b/drivers/misc/mei/client.h @@ -68,6 +68,18 @@ static inline const uuid_le *mei_me_cl_uuid(const struct mei_me_client *me_cl) return &me_cl->props.protocol_name; } +/** + * mei_me_cl_ver - return me client protocol version + * + * @me_cl: me client + * + * Return: me client protocol version + */ +static inline u8 mei_me_cl_ver(const struct mei_me_client *me_cl) +{ + return me_cl->props.protocol_version; +} + /* * MEI IO Functions */ diff --git a/drivers/misc/mei/hw-me.c b/drivers/misc/mei/hw-me.c index 65511d39d89b..25b1997a62cb 100644 --- a/drivers/misc/mei/hw-me.c +++ b/drivers/misc/mei/hw-me.c @@ -150,7 +150,7 @@ static inline u32 mei_me_d0i3c_read(const struct mei_device *dev) u32 reg; reg = mei_me_reg_read(to_me_hw(dev), H_D0I3C); - trace_mei_reg_read(dev->dev, "H_D0I3C", H_CSR, reg); + trace_mei_reg_read(dev->dev, "H_D0I3C", H_D0I3C, reg); return reg; } @@ -163,7 +163,7 @@ static inline u32 mei_me_d0i3c_read(const struct mei_device *dev) */ static inline void mei_me_d0i3c_write(struct mei_device *dev, u32 reg) { - trace_mei_reg_write(dev->dev, "H_D0I3C", H_CSR, reg); + trace_mei_reg_write(dev->dev, "H_D0I3C", H_D0I3C, reg); mei_me_reg_write(to_me_hw(dev), H_D0I3C, reg); } diff --git a/drivers/misc/mei/mei_dev.h b/drivers/misc/mei/mei_dev.h index e25ee16c658e..0f87dffa6be1 100644 --- a/drivers/misc/mei/mei_dev.h +++ b/drivers/misc/mei/mei_dev.h @@ -340,7 +340,7 @@ struct mei_hw_ops { /* MEI bus API*/ void mei_cl_bus_rescan(struct mei_device *bus); -void mei_cl_dev_fixup(struct mei_cl_device *dev); +void mei_cl_bus_dev_fixup(struct mei_cl_device *dev); ssize_t __mei_cl_send(struct mei_cl *cl, u8 *buf, size_t length, bool blocking); ssize_t __mei_cl_recv(struct mei_cl *cl, u8 *buf, size_t length); diff --git a/drivers/misc/mic/Kconfig b/drivers/misc/mic/Kconfig index e9f2f56c370d..40677df7f996 100644 --- a/drivers/misc/mic/Kconfig +++ b/drivers/misc/mic/Kconfig @@ -36,7 +36,7 @@ comment "Intel MIC Host Driver" config INTEL_MIC_HOST tristate "Intel MIC Host Driver" - depends on 64BIT && PCI && X86 && INTEL_MIC_BUS && SCIF_BUS + depends on 64BIT && PCI && X86 && INTEL_MIC_BUS && SCIF_BUS && MIC_COSM select VHOST_RING help This enables Host Driver support for the Intel Many Integrated @@ -56,7 +56,7 @@ comment "Intel MIC Card Driver" config INTEL_MIC_CARD tristate "Intel MIC Card Driver" - depends on 64BIT && X86 && INTEL_MIC_BUS && SCIF_BUS + depends on 64BIT && X86 && INTEL_MIC_BUS && SCIF_BUS && MIC_COSM select VIRTIO help This enables card driver support for the Intel Many Integrated @@ -74,7 +74,8 @@ comment "SCIF Driver" config SCIF tristate "SCIF Driver" - depends on 64BIT && PCI && X86 && SCIF_BUS + depends on 64BIT && PCI && X86 && SCIF_BUS && IOMMU_SUPPORT + select IOMMU_IOVA help This enables SCIF Driver support for the Intel Many Integrated Core (MIC) family of PCIe form factor coprocessor devices that @@ -88,3 +89,21 @@ config SCIF More information about the Intel MIC family as well as the Linux OS and tools for MIC to use with this driver are available from <http://software.intel.com/en-us/mic-developer>. + +comment "Intel MIC Coprocessor State Management (COSM) Drivers" + +config MIC_COSM + tristate "Intel MIC Coprocessor State Management (COSM) Drivers" + depends on 64BIT && PCI && X86 && SCIF + help + This enables COSM driver support for the Intel Many + Integrated Core (MIC) family of PCIe form factor coprocessor + devices. COSM drivers implement functions such as boot, + shutdown, reset and reboot of MIC devices. + + If you are building a host kernel with an Intel MIC device then + say M (recommended) or Y, else say N. If unsure say N. + + More information about the Intel MIC family as well as the Linux + OS and tools for MIC to use with this driver are available from + <http://software.intel.com/en-us/mic-developer>. diff --git a/drivers/misc/mic/Makefile b/drivers/misc/mic/Makefile index a74042c58649..e288a1106738 100644 --- a/drivers/misc/mic/Makefile +++ b/drivers/misc/mic/Makefile @@ -6,3 +6,5 @@ obj-$(CONFIG_INTEL_MIC_HOST) += host/ obj-$(CONFIG_INTEL_MIC_CARD) += card/ obj-y += bus/ obj-$(CONFIG_SCIF) += scif/ +obj-$(CONFIG_MIC_COSM) += cosm/ +obj-$(CONFIG_MIC_COSM) += cosm_client/ diff --git a/drivers/misc/mic/bus/Makefile b/drivers/misc/mic/bus/Makefile index 1ed37e234c96..761842b0d0bb 100644 --- a/drivers/misc/mic/bus/Makefile +++ b/drivers/misc/mic/bus/Makefile @@ -4,3 +4,4 @@ # obj-$(CONFIG_INTEL_MIC_BUS) += mic_bus.o obj-$(CONFIG_SCIF_BUS) += scif_bus.o +obj-$(CONFIG_MIC_COSM) += cosm_bus.o diff --git a/drivers/misc/mic/bus/cosm_bus.c b/drivers/misc/mic/bus/cosm_bus.c new file mode 100644 index 000000000000..1e36830f1a4e --- /dev/null +++ b/drivers/misc/mic/bus/cosm_bus.c @@ -0,0 +1,141 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Intel MIC COSM Bus Driver + */ +#include <linux/slab.h> +#include <linux/module.h> +#include <linux/idr.h> +#include "cosm_bus.h" + +/* Unique numbering for cosm devices. */ +static DEFINE_IDA(cosm_index_ida); + +static int cosm_dev_probe(struct device *d) +{ + struct cosm_device *dev = dev_to_cosm(d); + struct cosm_driver *drv = drv_to_cosm(dev->dev.driver); + + return drv->probe(dev); +} + +static int cosm_dev_remove(struct device *d) +{ + struct cosm_device *dev = dev_to_cosm(d); + struct cosm_driver *drv = drv_to_cosm(dev->dev.driver); + + drv->remove(dev); + return 0; +} + +static struct bus_type cosm_bus = { + .name = "cosm_bus", + .probe = cosm_dev_probe, + .remove = cosm_dev_remove, +}; + +int cosm_register_driver(struct cosm_driver *driver) +{ + driver->driver.bus = &cosm_bus; + return driver_register(&driver->driver); +} +EXPORT_SYMBOL_GPL(cosm_register_driver); + +void cosm_unregister_driver(struct cosm_driver *driver) +{ + driver_unregister(&driver->driver); +} +EXPORT_SYMBOL_GPL(cosm_unregister_driver); + +static inline void cosm_release_dev(struct device *d) +{ + struct cosm_device *cdev = dev_to_cosm(d); + + kfree(cdev); +} + +struct cosm_device * +cosm_register_device(struct device *pdev, struct cosm_hw_ops *hw_ops) +{ + struct cosm_device *cdev; + int ret; + + cdev = kzalloc(sizeof(*cdev), GFP_KERNEL); + if (!cdev) + return ERR_PTR(-ENOMEM); + + cdev->dev.parent = pdev; + cdev->dev.release = cosm_release_dev; + cdev->hw_ops = hw_ops; + dev_set_drvdata(&cdev->dev, cdev); + cdev->dev.bus = &cosm_bus; + + /* Assign a unique device index and hence name */ + ret = ida_simple_get(&cosm_index_ida, 0, 0, GFP_KERNEL); + if (ret < 0) + goto free_cdev; + + cdev->index = ret; + cdev->dev.id = ret; + dev_set_name(&cdev->dev, "cosm-dev%u", cdev->index); + + ret = device_register(&cdev->dev); + if (ret) + goto ida_remove; + return cdev; +ida_remove: + ida_simple_remove(&cosm_index_ida, cdev->index); +free_cdev: + kfree(cdev); + return ERR_PTR(ret); +} +EXPORT_SYMBOL_GPL(cosm_register_device); + +void cosm_unregister_device(struct cosm_device *dev) +{ + int index = dev->index; /* save for after device release */ + + device_unregister(&dev->dev); + ida_simple_remove(&cosm_index_ida, index); +} +EXPORT_SYMBOL_GPL(cosm_unregister_device); + +struct cosm_device *cosm_find_cdev_by_id(int id) +{ + struct device *dev = subsys_find_device_by_id(&cosm_bus, id, NULL); + + return dev ? container_of(dev, struct cosm_device, dev) : NULL; +} +EXPORT_SYMBOL_GPL(cosm_find_cdev_by_id); + +static int __init cosm_init(void) +{ + return bus_register(&cosm_bus); +} + +static void __exit cosm_exit(void) +{ + bus_unregister(&cosm_bus); + ida_destroy(&cosm_index_ida); +} + +core_initcall(cosm_init); +module_exit(cosm_exit); + +MODULE_AUTHOR("Intel Corporation"); +MODULE_DESCRIPTION("Intel(R) MIC card OS state management bus driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/misc/mic/bus/cosm_bus.h b/drivers/misc/mic/bus/cosm_bus.h new file mode 100644 index 000000000000..f7c57f266916 --- /dev/null +++ b/drivers/misc/mic/bus/cosm_bus.h @@ -0,0 +1,134 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Intel MIC COSM Bus Driver + */ +#ifndef _COSM_BUS_H_ +#define _COSM_BUS_H_ + +#include <linux/scif.h> +#include <linux/mic_common.h> +#include "../common/mic_dev.h" + +/** + * cosm_device - representation of a cosm device + * + * @attr_group: Pointer to list of sysfs attribute groups. + * @sdev: Device for sysfs entries. + * @state: MIC state. + * @shutdown_status: MIC status reported by card for shutdown/crashes. + * @shutdown_status_int: Internal shutdown status maintained by the driver + * @cosm_mutex: Mutex for synchronizing access to data structures. + * @reset_trigger_work: Work for triggering reset requests. + * @scif_work: Work for handling per device SCIF connections + * @cmdline: Kernel command line. + * @firmware: Firmware file name. + * @ramdisk: Ramdisk file name. + * @bootmode: Boot mode i.e. "linux" or "elf" for flash updates. + * @log_buf_addr: Log buffer address for MIC. + * @log_buf_len: Log buffer length address for MIC. + * @state_sysfs: Sysfs dirent for notifying ring 3 about MIC state changes. + * @hw_ops: the hardware bus ops for this device. + * @dev: underlying device. + * @index: unique position on the cosm bus + * @dbg_dir: debug fs directory + * @newepd: new endpoint from scif accept to be assigned to this cdev + * @epd: SCIF endpoint for this cdev + * @heartbeat_watchdog_enable: if heartbeat watchdog is enabled for this cdev + * @sysfs_heartbeat_enable: sysfs setting for disabling heartbeat notification + */ +struct cosm_device { + const struct attribute_group **attr_group; + struct device *sdev; + u8 state; + u8 shutdown_status; + u8 shutdown_status_int; + struct mutex cosm_mutex; + struct work_struct reset_trigger_work; + struct work_struct scif_work; + char *cmdline; + char *firmware; + char *ramdisk; + char *bootmode; + void *log_buf_addr; + int *log_buf_len; + struct kernfs_node *state_sysfs; + struct cosm_hw_ops *hw_ops; + struct device dev; + int index; + struct dentry *dbg_dir; + scif_epd_t newepd; + scif_epd_t epd; + bool heartbeat_watchdog_enable; + bool sysfs_heartbeat_enable; +}; + +/** + * cosm_driver - operations for a cosm driver + * + * @driver: underlying device driver (populate name and owner). + * @probe: the function to call when a device is found. Returns 0 or -errno. + * @remove: the function to call when a device is removed. + */ +struct cosm_driver { + struct device_driver driver; + int (*probe)(struct cosm_device *dev); + void (*remove)(struct cosm_device *dev); +}; + +/** + * cosm_hw_ops - cosm bus ops + * + * @reset: trigger MIC reset + * @force_reset: force MIC reset + * @post_reset: inform MIC reset is complete + * @ready: is MIC ready for OS download + * @start: boot MIC + * @stop: prepare MIC for reset + * @family: return MIC HW family string + * @stepping: return MIC HW stepping string + * @aper: return MIC PCIe aperture + */ +struct cosm_hw_ops { + void (*reset)(struct cosm_device *cdev); + void (*force_reset)(struct cosm_device *cdev); + void (*post_reset)(struct cosm_device *cdev, enum mic_states state); + bool (*ready)(struct cosm_device *cdev); + int (*start)(struct cosm_device *cdev, int id); + void (*stop)(struct cosm_device *cdev, bool force); + ssize_t (*family)(struct cosm_device *cdev, char *buf); + ssize_t (*stepping)(struct cosm_device *cdev, char *buf); + struct mic_mw *(*aper)(struct cosm_device *cdev); +}; + +struct cosm_device * +cosm_register_device(struct device *pdev, struct cosm_hw_ops *hw_ops); +void cosm_unregister_device(struct cosm_device *dev); +int cosm_register_driver(struct cosm_driver *drv); +void cosm_unregister_driver(struct cosm_driver *drv); +struct cosm_device *cosm_find_cdev_by_id(int id); + +static inline struct cosm_device *dev_to_cosm(struct device *dev) +{ + return container_of(dev, struct cosm_device, dev); +} + +static inline struct cosm_driver *drv_to_cosm(struct device_driver *drv) +{ + return container_of(drv, struct cosm_driver, driver); +} +#endif /* _COSM_BUS_H */ diff --git a/drivers/misc/mic/bus/mic_bus.c b/drivers/misc/mic/bus/mic_bus.c index 961ae90aae47..c64955d8cbc1 100644 --- a/drivers/misc/mic/bus/mic_bus.c +++ b/drivers/misc/mic/bus/mic_bus.c @@ -25,9 +25,6 @@ #include <linux/idr.h> #include <linux/mic_bus.h> -/* Unique numbering for mbus devices. */ -static DEFINE_IDA(mbus_index_ida); - static ssize_t device_show(struct device *d, struct device_attribute *attr, char *buf) { @@ -147,7 +144,8 @@ static void mbus_release_dev(struct device *d) struct mbus_device * mbus_register_device(struct device *pdev, int id, struct dma_map_ops *dma_ops, - struct mbus_hw_ops *hw_ops, void __iomem *mmio_va) + struct mbus_hw_ops *hw_ops, int index, + void __iomem *mmio_va) { int ret; struct mbus_device *mbdev; @@ -166,13 +164,7 @@ mbus_register_device(struct device *pdev, int id, struct dma_map_ops *dma_ops, mbdev->dev.release = mbus_release_dev; mbdev->hw_ops = hw_ops; mbdev->dev.bus = &mic_bus; - - /* Assign a unique device index and hence name. */ - ret = ida_simple_get(&mbus_index_ida, 0, 0, GFP_KERNEL); - if (ret < 0) - goto free_mbdev; - - mbdev->index = ret; + mbdev->index = index; dev_set_name(&mbdev->dev, "mbus-dev%u", mbdev->index); /* * device_register() causes the bus infrastructure to look for a @@ -180,10 +172,8 @@ mbus_register_device(struct device *pdev, int id, struct dma_map_ops *dma_ops, */ ret = device_register(&mbdev->dev); if (ret) - goto ida_remove; + goto free_mbdev; return mbdev; -ida_remove: - ida_simple_remove(&mbus_index_ida, mbdev->index); free_mbdev: kfree(mbdev); return ERR_PTR(ret); @@ -192,10 +182,7 @@ EXPORT_SYMBOL_GPL(mbus_register_device); void mbus_unregister_device(struct mbus_device *mbdev) { - int index = mbdev->index; /* save for after device release */ - device_unregister(&mbdev->dev); - ida_simple_remove(&mbus_index_ida, index); } EXPORT_SYMBOL_GPL(mbus_unregister_device); @@ -207,7 +194,6 @@ static int __init mbus_init(void) static void __exit mbus_exit(void) { bus_unregister(&mic_bus); - ida_destroy(&mbus_index_ida); } core_initcall(mbus_init); diff --git a/drivers/misc/mic/bus/scif_bus.c b/drivers/misc/mic/bus/scif_bus.c index 2da7ceed015d..fd2702143022 100644 --- a/drivers/misc/mic/bus/scif_bus.c +++ b/drivers/misc/mic/bus/scif_bus.c @@ -28,7 +28,6 @@ static ssize_t device_show(struct device *d, return sprintf(buf, "0x%04x\n", dev->id.device); } - static DEVICE_ATTR_RO(device); static ssize_t vendor_show(struct device *d, @@ -38,7 +37,6 @@ static ssize_t vendor_show(struct device *d, return sprintf(buf, "0x%04x\n", dev->id.vendor); } - static DEVICE_ATTR_RO(vendor); static ssize_t modalias_show(struct device *d, @@ -49,7 +47,6 @@ static ssize_t modalias_show(struct device *d, return sprintf(buf, "scif:d%08Xv%08X\n", dev->id.device, dev->id.vendor); } - static DEVICE_ATTR_RO(modalias); static struct attribute *scif_dev_attrs[] = { @@ -144,7 +141,8 @@ struct scif_hw_dev * scif_register_device(struct device *pdev, int id, struct dma_map_ops *dma_ops, struct scif_hw_ops *hw_ops, u8 dnode, u8 snode, struct mic_mw *mmio, struct mic_mw *aper, void *dp, - void __iomem *rdp, struct dma_chan **chan, int num_chan) + void __iomem *rdp, struct dma_chan **chan, int num_chan, + bool card_rel_da) { int ret; struct scif_hw_dev *sdev; @@ -171,6 +169,7 @@ scif_register_device(struct device *pdev, int id, struct dma_map_ops *dma_ops, dma_set_mask(&sdev->dev, DMA_BIT_MASK(64)); sdev->dma_ch = chan; sdev->num_dma_ch = num_chan; + sdev->card_rel_da = card_rel_da; dev_set_name(&sdev->dev, "scif-dev%u", sdev->dnode); /* * device_register() causes the bus infrastructure to look for a diff --git a/drivers/misc/mic/bus/scif_bus.h b/drivers/misc/mic/bus/scif_bus.h index 335a228a8236..94f29ac608b6 100644 --- a/drivers/misc/mic/bus/scif_bus.h +++ b/drivers/misc/mic/bus/scif_bus.h @@ -46,6 +46,8 @@ struct scif_hw_dev_id { * @rdp - Remote device page * @dma_ch - Array of DMA channels * @num_dma_ch - Number of DMA channels available + * @card_rel_da - Set to true if DMA addresses programmed in the DMA engine + * are relative to the card point of view */ struct scif_hw_dev { struct scif_hw_ops *hw_ops; @@ -59,6 +61,7 @@ struct scif_hw_dev { void __iomem *rdp; struct dma_chan **dma_ch; int num_dma_ch; + bool card_rel_da; }; /** @@ -114,7 +117,8 @@ scif_register_device(struct device *pdev, int id, struct scif_hw_ops *hw_ops, u8 dnode, u8 snode, struct mic_mw *mmio, struct mic_mw *aper, void *dp, void __iomem *rdp, - struct dma_chan **chan, int num_chan); + struct dma_chan **chan, int num_chan, + bool card_rel_da); void scif_unregister_device(struct scif_hw_dev *sdev); static inline struct scif_hw_dev *dev_to_scif(struct device *dev) diff --git a/drivers/misc/mic/card/mic_device.c b/drivers/misc/mic/card/mic_device.c index 6338908b2252..d0edaf7e0cd5 100644 --- a/drivers/misc/mic/card/mic_device.c +++ b/drivers/misc/mic/card/mic_device.c @@ -37,71 +37,6 @@ #include "mic_virtio.h" static struct mic_driver *g_drv; -static struct mic_irq *shutdown_cookie; - -static void mic_notify_host(u8 state) -{ - struct mic_driver *mdrv = g_drv; - struct mic_bootparam __iomem *bootparam = mdrv->dp; - - iowrite8(state, &bootparam->shutdown_status); - dev_dbg(mdrv->dev, "%s %d system_state %d\n", - __func__, __LINE__, state); - mic_send_intr(&mdrv->mdev, ioread8(&bootparam->c2h_shutdown_db)); -} - -static int mic_panic_event(struct notifier_block *this, unsigned long event, - void *ptr) -{ - struct mic_driver *mdrv = g_drv; - struct mic_bootparam __iomem *bootparam = mdrv->dp; - - iowrite8(-1, &bootparam->h2c_config_db); - iowrite8(-1, &bootparam->h2c_shutdown_db); - mic_notify_host(MIC_CRASHED); - return NOTIFY_DONE; -} - -static struct notifier_block mic_panic = { - .notifier_call = mic_panic_event, -}; - -static irqreturn_t mic_shutdown_isr(int irq, void *data) -{ - struct mic_driver *mdrv = g_drv; - struct mic_bootparam __iomem *bootparam = mdrv->dp; - - mic_ack_interrupt(&g_drv->mdev); - if (ioread8(&bootparam->shutdown_card)) - orderly_poweroff(true); - return IRQ_HANDLED; -} - -static int mic_shutdown_init(void) -{ - int rc = 0; - struct mic_driver *mdrv = g_drv; - struct mic_bootparam __iomem *bootparam = mdrv->dp; - int shutdown_db; - - shutdown_db = mic_next_card_db(); - shutdown_cookie = mic_request_card_irq(mic_shutdown_isr, NULL, - "Shutdown", mdrv, shutdown_db); - if (IS_ERR(shutdown_cookie)) - rc = PTR_ERR(shutdown_cookie); - else - iowrite8(shutdown_db, &bootparam->h2c_shutdown_db); - return rc; -} - -static void mic_shutdown_uninit(void) -{ - struct mic_driver *mdrv = g_drv; - struct mic_bootparam __iomem *bootparam = mdrv->dp; - - iowrite8(-1, &bootparam->h2c_shutdown_db); - mic_free_card_irq(shutdown_cookie, mdrv); -} static int __init mic_dp_init(void) { @@ -359,11 +294,7 @@ int __init mic_driver_init(struct mic_driver *mdrv) u8 node_id; g_drv = mdrv; - /* - * Unloading the card module is not supported. The MIC card module - * handles fundamental operations like host/card initiated shutdowns - * and informing the host about card crashes and cannot be unloaded. - */ + /* Unloading the card module is not supported. */ if (!try_module_get(mdrv->dev->driver->owner)) { rc = -ENODEV; goto done; @@ -374,12 +305,9 @@ int __init mic_driver_init(struct mic_driver *mdrv) rc = mic_init_irq(); if (rc) goto dp_uninit; - rc = mic_shutdown_init(); - if (rc) - goto irq_uninit; if (!mic_request_dma_chans(mdrv)) { rc = -ENODEV; - goto shutdown_uninit; + goto irq_uninit; } rc = mic_devices_init(mdrv); if (rc) @@ -390,21 +318,18 @@ int __init mic_driver_init(struct mic_driver *mdrv) NULL, &scif_hw_ops, 0, node_id, &mdrv->mdev.mmio, NULL, NULL, mdrv->dp, mdrv->dma_ch, - mdrv->num_dma_ch); + mdrv->num_dma_ch, true); if (IS_ERR(mdrv->scdev)) { rc = PTR_ERR(mdrv->scdev); goto device_uninit; } mic_create_card_debug_dir(mdrv); - atomic_notifier_chain_register(&panic_notifier_list, &mic_panic); done: return rc; device_uninit: mic_devices_uninit(mdrv); dma_free: mic_free_dma_chans(mdrv); -shutdown_uninit: - mic_shutdown_uninit(); irq_uninit: mic_uninit_irq(); dp_uninit: @@ -425,13 +350,6 @@ void mic_driver_uninit(struct mic_driver *mdrv) scif_unregister_device(mdrv->scdev); mic_devices_uninit(mdrv); mic_free_dma_chans(mdrv); - /* - * Inform the host about the shutdown status i.e. poweroff/restart etc. - * The module cannot be unloaded so the only code path to call - * mic_devices_uninit(..) is the shutdown callback. - */ - mic_notify_host(system_state); - mic_shutdown_uninit(); mic_uninit_irq(); mic_dp_uninit(); module_put(mdrv->dev->driver->owner); diff --git a/drivers/misc/mic/card/mic_x100.c b/drivers/misc/mic/card/mic_x100.c index 77fd41781c2e..b2958ce2368c 100644 --- a/drivers/misc/mic/card/mic_x100.c +++ b/drivers/misc/mic/card/mic_x100.c @@ -261,7 +261,7 @@ static int __init mic_probe(struct platform_device *pdev) mic_hw_intr_init(mdrv); platform_set_drvdata(pdev, mdrv); mdrv->dma_mbdev = mbus_register_device(mdrv->dev, MBUS_DEV_DMA_MIC, - NULL, &mbus_hw_ops, + NULL, &mbus_hw_ops, 0, mdrv->mdev.mmio.va); if (IS_ERR(mdrv->dma_mbdev)) { rc = PTR_ERR(mdrv->dma_mbdev); diff --git a/drivers/misc/mic/common/mic_dev.h b/drivers/misc/mic/common/mic_dev.h index 0b58c46045dc..50776772ebdf 100644 --- a/drivers/misc/mic/common/mic_dev.h +++ b/drivers/misc/mic/common/mic_dev.h @@ -21,6 +21,19 @@ #ifndef __MIC_DEV_H__ #define __MIC_DEV_H__ +/* The maximum number of MIC devices supported in a single host system. */ +#define MIC_MAX_NUM_DEVS 128 + +/** + * enum mic_hw_family - The hardware family to which a device belongs. + */ +enum mic_hw_family { + MIC_FAMILY_X100 = 0, + MIC_FAMILY_X200, + MIC_FAMILY_UNKNOWN, + MIC_FAMILY_LAST +}; + /** * struct mic_mw - MIC memory window * diff --git a/drivers/misc/mic/cosm/Makefile b/drivers/misc/mic/cosm/Makefile new file mode 100644 index 000000000000..b85d4d49df46 --- /dev/null +++ b/drivers/misc/mic/cosm/Makefile @@ -0,0 +1,10 @@ +# +# Makefile - Intel MIC Coprocessor State Management (COSM) Driver +# Copyright(c) 2015, Intel Corporation. +# +obj-$(CONFIG_MIC_COSM) += mic_cosm.o + +mic_cosm-objs := cosm_main.o +mic_cosm-objs += cosm_debugfs.o +mic_cosm-objs += cosm_sysfs.o +mic_cosm-objs += cosm_scif_server.o diff --git a/drivers/misc/mic/cosm/cosm_debugfs.c b/drivers/misc/mic/cosm/cosm_debugfs.c new file mode 100644 index 000000000000..30d953dff4fa --- /dev/null +++ b/drivers/misc/mic/cosm/cosm_debugfs.c @@ -0,0 +1,155 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Intel MIC Coprocessor State Management (COSM) Driver + * + */ + +#include <linux/debugfs.h> +#include <linux/slab.h> +#include "cosm_main.h" + +/* Debugfs parent dir */ +static struct dentry *cosm_dbg; + +/** + * cosm_log_buf_show - Display MIC kernel log buffer + * + * log_buf addr/len is read from System.map by user space + * and populated in sysfs entries. + */ +static int cosm_log_buf_show(struct seq_file *s, void *unused) +{ + void __iomem *log_buf_va; + int __iomem *log_buf_len_va; + struct cosm_device *cdev = s->private; + void *kva; + int size; + u64 aper_offset; + + if (!cdev || !cdev->log_buf_addr || !cdev->log_buf_len) + goto done; + + mutex_lock(&cdev->cosm_mutex); + switch (cdev->state) { + case MIC_BOOTING: + case MIC_ONLINE: + case MIC_SHUTTING_DOWN: + break; + default: + goto unlock; + } + + /* + * Card kernel will never be relocated and any kernel text/data mapping + * can be translated to phys address by subtracting __START_KERNEL_map. + */ + aper_offset = (u64)cdev->log_buf_len - __START_KERNEL_map; + log_buf_len_va = cdev->hw_ops->aper(cdev)->va + aper_offset; + aper_offset = (u64)cdev->log_buf_addr - __START_KERNEL_map; + log_buf_va = cdev->hw_ops->aper(cdev)->va + aper_offset; + + size = ioread32(log_buf_len_va); + kva = kmalloc(size, GFP_KERNEL); + if (!kva) + goto unlock; + + memcpy_fromio(kva, log_buf_va, size); + seq_write(s, kva, size); + kfree(kva); +unlock: + mutex_unlock(&cdev->cosm_mutex); +done: + return 0; +} + +static int cosm_log_buf_open(struct inode *inode, struct file *file) +{ + return single_open(file, cosm_log_buf_show, inode->i_private); +} + +static const struct file_operations log_buf_ops = { + .owner = THIS_MODULE, + .open = cosm_log_buf_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release +}; + +/** + * cosm_force_reset_show - Force MIC reset + * + * Invokes the force_reset COSM bus op instead of the standard reset + * op in case a force reset of the MIC device is required + */ +static int cosm_force_reset_show(struct seq_file *s, void *pos) +{ + struct cosm_device *cdev = s->private; + + cosm_stop(cdev, true); + return 0; +} + +static int cosm_force_reset_debug_open(struct inode *inode, struct file *file) +{ + return single_open(file, cosm_force_reset_show, inode->i_private); +} + +static const struct file_operations force_reset_ops = { + .owner = THIS_MODULE, + .open = cosm_force_reset_debug_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release +}; + +void cosm_create_debug_dir(struct cosm_device *cdev) +{ + char name[16]; + + if (!cosm_dbg) + return; + + scnprintf(name, sizeof(name), "mic%d", cdev->index); + cdev->dbg_dir = debugfs_create_dir(name, cosm_dbg); + if (!cdev->dbg_dir) + return; + + debugfs_create_file("log_buf", 0444, cdev->dbg_dir, cdev, &log_buf_ops); + debugfs_create_file("force_reset", 0444, cdev->dbg_dir, cdev, + &force_reset_ops); +} + +void cosm_delete_debug_dir(struct cosm_device *cdev) +{ + if (!cdev->dbg_dir) + return; + + debugfs_remove_recursive(cdev->dbg_dir); +} + +void cosm_init_debugfs(void) +{ + cosm_dbg = debugfs_create_dir(KBUILD_MODNAME, NULL); + if (!cosm_dbg) + pr_err("can't create debugfs dir\n"); +} + +void cosm_exit_debugfs(void) +{ + debugfs_remove(cosm_dbg); +} diff --git a/drivers/misc/mic/cosm/cosm_main.c b/drivers/misc/mic/cosm/cosm_main.c new file mode 100644 index 000000000000..4b4b356c797d --- /dev/null +++ b/drivers/misc/mic/cosm/cosm_main.c @@ -0,0 +1,388 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Intel MIC Coprocessor State Management (COSM) Driver + * + */ + +#include <linux/module.h> +#include <linux/delay.h> +#include <linux/idr.h> +#include <linux/slab.h> +#include <linux/cred.h> +#include "cosm_main.h" + +static const char cosm_driver_name[] = "mic"; + +/* COSM ID allocator */ +static struct ida g_cosm_ida; +/* Class of MIC devices for sysfs accessibility. */ +static struct class *g_cosm_class; +/* Number of MIC devices */ +static atomic_t g_num_dev; + +/** + * cosm_hw_reset - Issue a HW reset for the MIC device + * @cdev: pointer to cosm_device instance + */ +static void cosm_hw_reset(struct cosm_device *cdev, bool force) +{ + int i; + +#define MIC_RESET_TO (45) + if (force && cdev->hw_ops->force_reset) + cdev->hw_ops->force_reset(cdev); + else + cdev->hw_ops->reset(cdev); + + for (i = 0; i < MIC_RESET_TO; i++) { + if (cdev->hw_ops->ready(cdev)) { + cosm_set_state(cdev, MIC_READY); + return; + } + /* + * Resets typically take 10s of seconds to complete. + * Since an MMIO read is required to check if the + * firmware is ready or not, a 1 second delay works nicely. + */ + msleep(1000); + } + cosm_set_state(cdev, MIC_RESET_FAILED); +} + +/** + * cosm_start - Start the MIC + * @cdev: pointer to cosm_device instance + * + * This function prepares an MIC for boot and initiates boot. + * RETURNS: An appropriate -ERRNO error value on error, or 0 for success. + */ +int cosm_start(struct cosm_device *cdev) +{ + const struct cred *orig_cred; + struct cred *override_cred; + int rc; + + mutex_lock(&cdev->cosm_mutex); + if (!cdev->bootmode) { + dev_err(&cdev->dev, "%s %d bootmode not set\n", + __func__, __LINE__); + rc = -EINVAL; + goto unlock_ret; + } +retry: + if (cdev->state != MIC_READY) { + dev_err(&cdev->dev, "%s %d MIC state not READY\n", + __func__, __LINE__); + rc = -EINVAL; + goto unlock_ret; + } + if (!cdev->hw_ops->ready(cdev)) { + cosm_hw_reset(cdev, false); + /* + * The state will either be MIC_READY if the reset succeeded + * or MIC_RESET_FAILED if the firmware reset failed. + */ + goto retry; + } + + /* + * Set credentials to root to allow non-root user to download initramsfs + * with 600 permissions + */ + override_cred = prepare_creds(); + if (!override_cred) { + dev_err(&cdev->dev, "%s %d prepare_creds failed\n", + __func__, __LINE__); + rc = -ENOMEM; + goto unlock_ret; + } + override_cred->fsuid = GLOBAL_ROOT_UID; + orig_cred = override_creds(override_cred); + + rc = cdev->hw_ops->start(cdev, cdev->index); + + revert_creds(orig_cred); + put_cred(override_cred); + if (rc) + goto unlock_ret; + + /* + * If linux is being booted, card is treated 'online' only + * when the scif interface in the card is up. If anything else + * is booted, we set card to 'online' immediately. + */ + if (!strcmp(cdev->bootmode, "linux")) + cosm_set_state(cdev, MIC_BOOTING); + else + cosm_set_state(cdev, MIC_ONLINE); +unlock_ret: + mutex_unlock(&cdev->cosm_mutex); + if (rc) + dev_err(&cdev->dev, "cosm_start failed rc %d\n", rc); + return rc; +} + +/** + * cosm_stop - Prepare the MIC for reset and trigger reset + * @cdev: pointer to cosm_device instance + * @force: force a MIC to reset even if it is already reset and ready. + * + * RETURNS: None + */ +void cosm_stop(struct cosm_device *cdev, bool force) +{ + mutex_lock(&cdev->cosm_mutex); + if (cdev->state != MIC_READY || force) { + /* + * Don't call hw_ops if they have been called previously. + * stop(..) calls device_unregister and will crash the system if + * called multiple times. + */ + bool call_hw_ops = cdev->state != MIC_RESET_FAILED && + cdev->state != MIC_READY; + + if (cdev->state != MIC_RESETTING) + cosm_set_state(cdev, MIC_RESETTING); + cdev->heartbeat_watchdog_enable = false; + if (call_hw_ops) + cdev->hw_ops->stop(cdev, force); + cosm_hw_reset(cdev, force); + cosm_set_shutdown_status(cdev, MIC_NOP); + if (call_hw_ops && cdev->hw_ops->post_reset) + cdev->hw_ops->post_reset(cdev, cdev->state); + } + mutex_unlock(&cdev->cosm_mutex); + flush_work(&cdev->scif_work); +} + +/** + * cosm_reset_trigger_work - Trigger MIC reset + * @work: The work structure + * + * This work is scheduled whenever the host wants to reset the MIC. + */ +static void cosm_reset_trigger_work(struct work_struct *work) +{ + struct cosm_device *cdev = container_of(work, struct cosm_device, + reset_trigger_work); + cosm_stop(cdev, false); +} + +/** + * cosm_reset - Schedule MIC reset + * @cdev: pointer to cosm_device instance + * + * RETURNS: An -EINVAL if the card is already READY or 0 for success. + */ +int cosm_reset(struct cosm_device *cdev) +{ + int rc = 0; + + mutex_lock(&cdev->cosm_mutex); + if (cdev->state != MIC_READY) { + cosm_set_state(cdev, MIC_RESETTING); + schedule_work(&cdev->reset_trigger_work); + } else { + dev_err(&cdev->dev, "%s %d MIC is READY\n", __func__, __LINE__); + rc = -EINVAL; + } + mutex_unlock(&cdev->cosm_mutex); + return rc; +} + +/** + * cosm_shutdown - Initiate MIC shutdown. + * @cdev: pointer to cosm_device instance + * + * RETURNS: None + */ +int cosm_shutdown(struct cosm_device *cdev) +{ + struct cosm_msg msg = { .id = COSM_MSG_SHUTDOWN }; + int rc = 0; + + mutex_lock(&cdev->cosm_mutex); + if (cdev->state != MIC_ONLINE) { + rc = -EINVAL; + dev_err(&cdev->dev, "%s %d skipping shutdown in state: %s\n", + __func__, __LINE__, cosm_state_string[cdev->state]); + goto err; + } + + if (!cdev->epd) { + rc = -ENOTCONN; + dev_err(&cdev->dev, "%s %d scif endpoint not connected rc %d\n", + __func__, __LINE__, rc); + goto err; + } + + rc = scif_send(cdev->epd, &msg, sizeof(msg), SCIF_SEND_BLOCK); + if (rc < 0) { + dev_err(&cdev->dev, "%s %d scif_send failed rc %d\n", + __func__, __LINE__, rc); + goto err; + } + cdev->heartbeat_watchdog_enable = false; + cosm_set_state(cdev, MIC_SHUTTING_DOWN); + rc = 0; +err: + mutex_unlock(&cdev->cosm_mutex); + return rc; +} + +static int cosm_driver_probe(struct cosm_device *cdev) +{ + int rc; + + /* Initialize SCIF server at first probe */ + if (atomic_add_return(1, &g_num_dev) == 1) { + rc = cosm_scif_init(); + if (rc) + goto scif_exit; + } + mutex_init(&cdev->cosm_mutex); + INIT_WORK(&cdev->reset_trigger_work, cosm_reset_trigger_work); + INIT_WORK(&cdev->scif_work, cosm_scif_work); + cdev->sysfs_heartbeat_enable = true; + cosm_sysfs_init(cdev); + cdev->sdev = device_create_with_groups(g_cosm_class, cdev->dev.parent, + MKDEV(0, cdev->index), cdev, cdev->attr_group, + "mic%d", cdev->index); + if (IS_ERR(cdev->sdev)) { + rc = PTR_ERR(cdev->sdev); + dev_err(&cdev->dev, "device_create_with_groups failed rc %d\n", + rc); + goto scif_exit; + } + + cdev->state_sysfs = sysfs_get_dirent(cdev->sdev->kobj.sd, + "state"); + if (!cdev->state_sysfs) { + rc = -ENODEV; + dev_err(&cdev->dev, "sysfs_get_dirent failed rc %d\n", rc); + goto destroy_device; + } + cosm_create_debug_dir(cdev); + return 0; +destroy_device: + device_destroy(g_cosm_class, MKDEV(0, cdev->index)); +scif_exit: + if (atomic_dec_and_test(&g_num_dev)) + cosm_scif_exit(); + return rc; +} + +static void cosm_driver_remove(struct cosm_device *cdev) +{ + cosm_delete_debug_dir(cdev); + sysfs_put(cdev->state_sysfs); + device_destroy(g_cosm_class, MKDEV(0, cdev->index)); + flush_work(&cdev->reset_trigger_work); + cosm_stop(cdev, false); + if (atomic_dec_and_test(&g_num_dev)) + cosm_scif_exit(); + + /* These sysfs entries might have allocated */ + kfree(cdev->cmdline); + kfree(cdev->firmware); + kfree(cdev->ramdisk); + kfree(cdev->bootmode); +} + +static int cosm_suspend(struct device *dev) +{ + struct cosm_device *cdev = dev_to_cosm(dev); + + mutex_lock(&cdev->cosm_mutex); + switch (cdev->state) { + /** + * Suspend/freeze hooks in userspace have already shutdown the card. + * Card should be 'ready' in most cases. It is however possible that + * some userspace application initiated a boot. In those cases, we + * simply reset the card. + */ + case MIC_ONLINE: + case MIC_BOOTING: + case MIC_SHUTTING_DOWN: + mutex_unlock(&cdev->cosm_mutex); + cosm_stop(cdev, false); + break; + default: + mutex_unlock(&cdev->cosm_mutex); + break; + } + return 0; +} + +static const struct dev_pm_ops cosm_pm_ops = { + .suspend = cosm_suspend, + .freeze = cosm_suspend +}; + +static struct cosm_driver cosm_driver = { + .driver = { + .name = KBUILD_MODNAME, + .owner = THIS_MODULE, + .pm = &cosm_pm_ops, + }, + .probe = cosm_driver_probe, + .remove = cosm_driver_remove +}; + +static int __init cosm_init(void) +{ + int ret; + + cosm_init_debugfs(); + + g_cosm_class = class_create(THIS_MODULE, cosm_driver_name); + if (IS_ERR(g_cosm_class)) { + ret = PTR_ERR(g_cosm_class); + pr_err("class_create failed ret %d\n", ret); + goto cleanup_debugfs; + } + + ida_init(&g_cosm_ida); + ret = cosm_register_driver(&cosm_driver); + if (ret) { + pr_err("cosm_register_driver failed ret %d\n", ret); + goto ida_destroy; + } + return 0; +ida_destroy: + ida_destroy(&g_cosm_ida); + class_destroy(g_cosm_class); +cleanup_debugfs: + cosm_exit_debugfs(); + return ret; +} + +static void __exit cosm_exit(void) +{ + cosm_unregister_driver(&cosm_driver); + ida_destroy(&g_cosm_ida); + class_destroy(g_cosm_class); + cosm_exit_debugfs(); +} + +module_init(cosm_init); +module_exit(cosm_exit); + +MODULE_AUTHOR("Intel Corporation"); +MODULE_DESCRIPTION("Intel(R) MIC Coprocessor State Management (COSM) Driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/misc/mic/cosm/cosm_main.h b/drivers/misc/mic/cosm/cosm_main.h new file mode 100644 index 000000000000..f01156fca881 --- /dev/null +++ b/drivers/misc/mic/cosm/cosm_main.h @@ -0,0 +1,70 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Intel MIC Coprocessor State Management (COSM) Driver + * + */ +#ifndef _COSM_COSM_H_ +#define _COSM_COSM_H_ + +#include <linux/scif.h> +#include "../bus/cosm_bus.h" + +#define COSM_HEARTBEAT_SEND_SEC 30 +#define SCIF_COSM_LISTEN_PORT 201 + +/** + * enum COSM msg id's + * @COSM_MSG_SHUTDOWN: host->card trigger shutdown + * @COSM_MSG_SYNC_TIME: host->card send host time to card to sync time + * @COSM_MSG_HEARTBEAT: card->host heartbeat + * @COSM_MSG_SHUTDOWN_STATUS: card->host with shutdown status as payload + */ +enum cosm_msg_id { + COSM_MSG_SHUTDOWN, + COSM_MSG_SYNC_TIME, + COSM_MSG_HEARTBEAT, + COSM_MSG_SHUTDOWN_STATUS, +}; + +struct cosm_msg { + u64 id; + union { + u64 shutdown_status; + struct timespec64 timespec; + }; +}; + +extern const char * const cosm_state_string[]; +extern const char * const cosm_shutdown_status_string[]; + +void cosm_sysfs_init(struct cosm_device *cdev); +int cosm_start(struct cosm_device *cdev); +void cosm_stop(struct cosm_device *cdev, bool force); +int cosm_reset(struct cosm_device *cdev); +int cosm_shutdown(struct cosm_device *cdev); +void cosm_set_state(struct cosm_device *cdev, u8 state); +void cosm_set_shutdown_status(struct cosm_device *cdev, u8 status); +void cosm_init_debugfs(void); +void cosm_exit_debugfs(void); +void cosm_create_debug_dir(struct cosm_device *cdev); +void cosm_delete_debug_dir(struct cosm_device *cdev); +int cosm_scif_init(void); +void cosm_scif_exit(void); +void cosm_scif_work(struct work_struct *work); + +#endif diff --git a/drivers/misc/mic/cosm/cosm_scif_server.c b/drivers/misc/mic/cosm/cosm_scif_server.c new file mode 100644 index 000000000000..5696df4326b5 --- /dev/null +++ b/drivers/misc/mic/cosm/cosm_scif_server.c @@ -0,0 +1,405 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Intel MIC Coprocessor State Management (COSM) Driver + * + */ +#include <linux/kthread.h> +#include "cosm_main.h" + +/* + * The COSM driver uses SCIF to communicate between the management node and the + * MIC cards. SCIF is used to (a) Send a shutdown command to the card (b) + * receive a shutdown status back from the card upon completion of shutdown and + * (c) receive periodic heartbeat messages from the card used to deduce if the + * card has crashed. + * + * A COSM server consisting of a SCIF listening endpoint waits for incoming + * connections from the card. Upon acceptance of the connection, a separate + * work-item is scheduled to handle SCIF message processing for that card. The + * life-time of this work-item is therefore the time from which the connection + * from a card is accepted to the time at which the connection is closed. A new + * work-item starts each time the card boots and is alive till the card (a) + * shuts down (b) is reset (c) crashes (d) cosm_client driver on the card is + * unloaded. + * + * From the point of view of COSM interactions with SCIF during card + * shutdown, reset and crash are as follows: + * + * Card shutdown + * ------------- + * 1. COSM client on the card invokes orderly_poweroff() in response to SHUTDOWN + * message from the host. + * 2. Card driver shutdown callback invokes scif_unregister_device(..) resulting + * in scif_remove(..) getting called on the card + * 3. scif_remove -> scif_stop -> scif_handle_remove_node -> + * scif_peer_unregister_device -> device_unregister for the host peer device + * 4. During device_unregister remove(..) method of cosm_client is invoked which + * closes the COSM SCIF endpoint on the card. This results in a SCIF_DISCNCT + * message being sent to host SCIF. SCIF_DISCNCT message processing on the + * host SCIF sets the host COSM SCIF endpoint state to DISCONNECTED and wakes + * up the host COSM thread blocked in scif_poll(..) resulting in + * scif_poll(..) returning POLLHUP. + * 5. On the card, scif_peer_release_dev is next called which results in an + * SCIF_EXIT message being sent to the host and after receiving the + * SCIF_EXIT_ACK from the host the peer device teardown on the card is + * complete. + * 6. As part of the SCIF_EXIT message processing on the host, host sends a + * SCIF_REMOVE_NODE to itself corresponding to the card being removed. This + * starts a similar SCIF peer device teardown sequence on the host + * corresponding to the card being shut down. + * + * Card reset + * ---------- + * The case of interest here is when the card has not been previously shut down + * since most of the steps below are skipped in that case: + + * 1. cosm_stop(..) invokes hw_ops->stop(..) method of the base PCIe driver + * which unregisters the SCIF HW device resulting in scif_remove(..) being + * called on the host. + * 2. scif_remove(..) calls scif_disconnect_node(..) which results in a + * SCIF_EXIT message being sent to the card. + * 3. The card executes scif_stop() as part of SCIF_EXIT message + * processing. This results in the COSM endpoint on the card being closed and + * the SCIF host peer device on the card getting unregistered similar to + * steps 3, 4 and 5 for the card shutdown case above. scif_poll(..) on the + * host returns POLLHUP as a result. + * 4. On the host, card peer device unregister and SCIF HW remove(..) also + * subsequently complete. + * + * Card crash + * ---------- + * If a reset is issued after the card has crashed, there is no SCIF_DISCNT + * message from the card which would result in scif_poll(..) returning + * POLLHUP. In this case when the host SCIF driver sends a SCIF_REMOVE_NODE + * message to itself resulting in the card SCIF peer device being unregistered, + * this results in a scif_peer_release_dev -> scif_cleanup_scifdev-> + * scif_invalidate_ep call sequence which sets the endpoint state to + * DISCONNECTED and results in scif_poll(..) returning POLLHUP. + */ + +#define COSM_SCIF_BACKLOG 16 +#define COSM_HEARTBEAT_CHECK_DELTA_SEC 10 +#define COSM_HEARTBEAT_TIMEOUT_SEC \ + (COSM_HEARTBEAT_SEND_SEC + COSM_HEARTBEAT_CHECK_DELTA_SEC) +#define COSM_HEARTBEAT_TIMEOUT_MSEC (COSM_HEARTBEAT_TIMEOUT_SEC * MSEC_PER_SEC) + +static struct task_struct *server_thread; +static scif_epd_t listen_epd; + +/* Publish MIC card's shutdown status to user space MIC daemon */ +static void cosm_update_mic_status(struct cosm_device *cdev) +{ + if (cdev->shutdown_status_int != MIC_NOP) { + cosm_set_shutdown_status(cdev, cdev->shutdown_status_int); + cdev->shutdown_status_int = MIC_NOP; + } +} + +/* Store MIC card's shutdown status internally when it is received */ +static void cosm_shutdown_status_int(struct cosm_device *cdev, + enum mic_status shutdown_status) +{ + switch (shutdown_status) { + case MIC_HALTED: + case MIC_POWER_OFF: + case MIC_RESTART: + case MIC_CRASHED: + break; + default: + dev_err(&cdev->dev, "%s %d Unexpected shutdown_status %d\n", + __func__, __LINE__, shutdown_status); + return; + }; + cdev->shutdown_status_int = shutdown_status; + cdev->heartbeat_watchdog_enable = false; + + if (cdev->state != MIC_SHUTTING_DOWN) + cosm_set_state(cdev, MIC_SHUTTING_DOWN); +} + +/* Non-blocking recv. Read and process all available messages */ +static void cosm_scif_recv(struct cosm_device *cdev) +{ + struct cosm_msg msg; + int rc; + + while (1) { + rc = scif_recv(cdev->epd, &msg, sizeof(msg), 0); + if (!rc) { + break; + } else if (rc < 0) { + dev_dbg(&cdev->dev, "%s: %d rc %d\n", + __func__, __LINE__, rc); + break; + } + dev_dbg(&cdev->dev, "%s: %d rc %d id 0x%llx\n", + __func__, __LINE__, rc, msg.id); + + switch (msg.id) { + case COSM_MSG_SHUTDOWN_STATUS: + cosm_shutdown_status_int(cdev, msg.shutdown_status); + break; + case COSM_MSG_HEARTBEAT: + /* Nothing to do, heartbeat only unblocks scif_poll */ + break; + default: + dev_err(&cdev->dev, "%s: %d unknown msg.id %lld\n", + __func__, __LINE__, msg.id); + break; + } + } +} + +/* Publish crashed status for this MIC card */ +static void cosm_set_crashed(struct cosm_device *cdev) +{ + dev_err(&cdev->dev, "node alive timeout\n"); + cosm_shutdown_status_int(cdev, MIC_CRASHED); + cosm_update_mic_status(cdev); +} + +/* Send host time to the MIC card to sync system time between host and MIC */ +static void cosm_send_time(struct cosm_device *cdev) +{ + struct cosm_msg msg = { .id = COSM_MSG_SYNC_TIME }; + int rc; + + getnstimeofday64(&msg.timespec); + rc = scif_send(cdev->epd, &msg, sizeof(msg), SCIF_SEND_BLOCK); + if (rc < 0) + dev_err(&cdev->dev, "%s %d scif_send failed rc %d\n", + __func__, __LINE__, rc); +} + +/* + * Close this cosm_device's endpoint after its peer endpoint on the card has + * been closed. In all cases except MIC card crash POLLHUP on the host is + * triggered by the client's endpoint being closed. + */ +static void cosm_scif_close(struct cosm_device *cdev) +{ + /* + * Because SHUTDOWN_STATUS message is sent by the MIC cards in the + * reboot notifier when shutdown is still not complete, we notify mpssd + * to reset the card when SCIF endpoint is closed. + */ + cosm_update_mic_status(cdev); + scif_close(cdev->epd); + cdev->epd = NULL; + dev_dbg(&cdev->dev, "%s %d\n", __func__, __LINE__); +} + +/* + * Set card state to ONLINE when a new SCIF connection from a MIC card is + * received. Normally the state is BOOTING when the connection comes in, but can + * be ONLINE if cosm_client driver on the card was unloaded and then reloaded. + */ +static int cosm_set_online(struct cosm_device *cdev) +{ + int rc = 0; + + if (MIC_BOOTING == cdev->state || MIC_ONLINE == cdev->state) { + cdev->heartbeat_watchdog_enable = cdev->sysfs_heartbeat_enable; + cdev->epd = cdev->newepd; + if (cdev->state == MIC_BOOTING) + cosm_set_state(cdev, MIC_ONLINE); + cosm_send_time(cdev); + dev_dbg(&cdev->dev, "%s %d\n", __func__, __LINE__); + } else { + dev_warn(&cdev->dev, "%s %d not going online in state: %s\n", + __func__, __LINE__, cosm_state_string[cdev->state]); + rc = -EINVAL; + } + /* Drop reference acquired by bus_find_device in the server thread */ + put_device(&cdev->dev); + return rc; +} + +/* + * Work function for handling work for a SCIF connection from a particular MIC + * card. It first sets the card state to ONLINE and then calls scif_poll to + * block on activity such as incoming messages on the SCIF endpoint. When the + * endpoint is closed, the work function exits, completing its life cycle, from + * MIC card boot to card shutdown/reset/crash. + */ +void cosm_scif_work(struct work_struct *work) +{ + struct cosm_device *cdev = container_of(work, struct cosm_device, + scif_work); + struct scif_pollepd pollepd; + int rc; + + mutex_lock(&cdev->cosm_mutex); + if (cosm_set_online(cdev)) + goto exit; + + while (1) { + pollepd.epd = cdev->epd; + pollepd.events = POLLIN; + + /* Drop the mutex before blocking in scif_poll(..) */ + mutex_unlock(&cdev->cosm_mutex); + /* poll(..) with timeout on our endpoint */ + rc = scif_poll(&pollepd, 1, COSM_HEARTBEAT_TIMEOUT_MSEC); + mutex_lock(&cdev->cosm_mutex); + if (rc < 0) { + dev_err(&cdev->dev, "%s %d scif_poll rc %d\n", + __func__, __LINE__, rc); + continue; + } + + /* There is a message from the card */ + if (pollepd.revents & POLLIN) + cosm_scif_recv(cdev); + + /* The peer endpoint is closed or this endpoint disconnected */ + if (pollepd.revents & POLLHUP) { + cosm_scif_close(cdev); + break; + } + + /* Did we timeout from poll? */ + if (!rc && cdev->heartbeat_watchdog_enable) + cosm_set_crashed(cdev); + } +exit: + dev_dbg(&cdev->dev, "%s %d exiting\n", __func__, __LINE__); + mutex_unlock(&cdev->cosm_mutex); +} + +/* + * COSM SCIF server thread function. Accepts incoming SCIF connections from MIC + * cards, finds the correct cosm_device to associate that connection with and + * schedules individual work items for each MIC card. + */ +static int cosm_scif_server(void *unused) +{ + struct cosm_device *cdev; + scif_epd_t newepd; + struct scif_port_id port_id; + int rc; + + allow_signal(SIGKILL); + + while (!kthread_should_stop()) { + rc = scif_accept(listen_epd, &port_id, &newepd, + SCIF_ACCEPT_SYNC); + if (rc < 0) { + if (-ERESTARTSYS != rc) + pr_err("%s %d rc %d\n", __func__, __LINE__, rc); + continue; + } + + /* + * Associate the incoming connection with a particular + * cosm_device, COSM device ID == SCIF node ID - 1 + */ + cdev = cosm_find_cdev_by_id(port_id.node - 1); + if (!cdev) + continue; + cdev->newepd = newepd; + schedule_work(&cdev->scif_work); + } + + pr_debug("%s %d Server thread stopped\n", __func__, __LINE__); + return 0; +} + +static int cosm_scif_listen(void) +{ + int rc; + + listen_epd = scif_open(); + if (!listen_epd) { + pr_err("%s %d scif_open failed\n", __func__, __LINE__); + return -ENOMEM; + } + + rc = scif_bind(listen_epd, SCIF_COSM_LISTEN_PORT); + if (rc < 0) { + pr_err("%s %d scif_bind failed rc %d\n", + __func__, __LINE__, rc); + goto err; + } + + rc = scif_listen(listen_epd, COSM_SCIF_BACKLOG); + if (rc < 0) { + pr_err("%s %d scif_listen rc %d\n", __func__, __LINE__, rc); + goto err; + } + pr_debug("%s %d listen_epd set up\n", __func__, __LINE__); + return 0; +err: + scif_close(listen_epd); + listen_epd = NULL; + return rc; +} + +static void cosm_scif_listen_exit(void) +{ + pr_debug("%s %d closing listen_epd\n", __func__, __LINE__); + if (listen_epd) { + scif_close(listen_epd); + listen_epd = NULL; + } +} + +/* + * Create a listening SCIF endpoint and a server kthread which accepts incoming + * SCIF connections from MIC cards + */ +int cosm_scif_init(void) +{ + int rc = cosm_scif_listen(); + + if (rc) { + pr_err("%s %d cosm_scif_listen rc %d\n", + __func__, __LINE__, rc); + goto err; + } + + server_thread = kthread_run(cosm_scif_server, NULL, "cosm_server"); + if (IS_ERR(server_thread)) { + rc = PTR_ERR(server_thread); + pr_err("%s %d kthread_run rc %d\n", __func__, __LINE__, rc); + goto listen_exit; + } + return 0; +listen_exit: + cosm_scif_listen_exit(); +err: + return rc; +} + +/* Stop the running server thread and close the listening SCIF endpoint */ +void cosm_scif_exit(void) +{ + int rc; + + if (!IS_ERR_OR_NULL(server_thread)) { + rc = send_sig(SIGKILL, server_thread, 0); + if (rc) { + pr_err("%s %d send_sig rc %d\n", + __func__, __LINE__, rc); + return; + } + kthread_stop(server_thread); + } + + cosm_scif_listen_exit(); +} diff --git a/drivers/misc/mic/cosm/cosm_sysfs.c b/drivers/misc/mic/cosm/cosm_sysfs.c new file mode 100644 index 000000000000..29d6863b6e59 --- /dev/null +++ b/drivers/misc/mic/cosm/cosm_sysfs.c @@ -0,0 +1,461 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Intel MIC Coprocessor State Management (COSM) Driver + * + */ +#include <linux/slab.h> +#include "cosm_main.h" + +/* + * A state-to-string lookup table, for exposing a human readable state + * via sysfs. Always keep in sync with enum cosm_states + */ +const char * const cosm_state_string[] = { + [MIC_READY] = "ready", + [MIC_BOOTING] = "booting", + [MIC_ONLINE] = "online", + [MIC_SHUTTING_DOWN] = "shutting_down", + [MIC_RESETTING] = "resetting", + [MIC_RESET_FAILED] = "reset_failed", +}; + +/* + * A shutdown-status-to-string lookup table, for exposing a human + * readable state via sysfs. Always keep in sync with enum cosm_shutdown_status + */ +const char * const cosm_shutdown_status_string[] = { + [MIC_NOP] = "nop", + [MIC_CRASHED] = "crashed", + [MIC_HALTED] = "halted", + [MIC_POWER_OFF] = "poweroff", + [MIC_RESTART] = "restart", +}; + +void cosm_set_shutdown_status(struct cosm_device *cdev, u8 shutdown_status) +{ + dev_dbg(&cdev->dev, "Shutdown Status %s -> %s\n", + cosm_shutdown_status_string[cdev->shutdown_status], + cosm_shutdown_status_string[shutdown_status]); + cdev->shutdown_status = shutdown_status; +} + +void cosm_set_state(struct cosm_device *cdev, u8 state) +{ + dev_dbg(&cdev->dev, "State %s -> %s\n", + cosm_state_string[cdev->state], + cosm_state_string[state]); + cdev->state = state; + sysfs_notify_dirent(cdev->state_sysfs); +} + +static ssize_t +family_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct cosm_device *cdev = dev_get_drvdata(dev); + + if (!cdev) + return -EINVAL; + + return cdev->hw_ops->family(cdev, buf); +} +static DEVICE_ATTR_RO(family); + +static ssize_t +stepping_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct cosm_device *cdev = dev_get_drvdata(dev); + + if (!cdev) + return -EINVAL; + + return cdev->hw_ops->stepping(cdev, buf); +} +static DEVICE_ATTR_RO(stepping); + +static ssize_t +state_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct cosm_device *cdev = dev_get_drvdata(dev); + + if (!cdev || cdev->state >= MIC_LAST) + return -EINVAL; + + return scnprintf(buf, PAGE_SIZE, "%s\n", + cosm_state_string[cdev->state]); +} + +static ssize_t +state_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct cosm_device *cdev = dev_get_drvdata(dev); + int rc; + + if (!cdev) + return -EINVAL; + + if (sysfs_streq(buf, "boot")) { + rc = cosm_start(cdev); + goto done; + } + if (sysfs_streq(buf, "reset")) { + rc = cosm_reset(cdev); + goto done; + } + + if (sysfs_streq(buf, "shutdown")) { + rc = cosm_shutdown(cdev); + goto done; + } + rc = -EINVAL; +done: + if (rc) + count = rc; + return count; +} +static DEVICE_ATTR_RW(state); + +static ssize_t shutdown_status_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct cosm_device *cdev = dev_get_drvdata(dev); + + if (!cdev || cdev->shutdown_status >= MIC_STATUS_LAST) + return -EINVAL; + + return scnprintf(buf, PAGE_SIZE, "%s\n", + cosm_shutdown_status_string[cdev->shutdown_status]); +} +static DEVICE_ATTR_RO(shutdown_status); + +static ssize_t +heartbeat_enable_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct cosm_device *cdev = dev_get_drvdata(dev); + + if (!cdev) + return -EINVAL; + + return scnprintf(buf, PAGE_SIZE, "%d\n", cdev->sysfs_heartbeat_enable); +} + +static ssize_t +heartbeat_enable_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct cosm_device *cdev = dev_get_drvdata(dev); + int enable; + int ret; + + if (!cdev) + return -EINVAL; + + mutex_lock(&cdev->cosm_mutex); + ret = kstrtoint(buf, 10, &enable); + if (ret) + goto unlock; + + cdev->sysfs_heartbeat_enable = enable; + /* if state is not online, cdev->heartbeat_watchdog_enable is 0 */ + if (cdev->state == MIC_ONLINE) + cdev->heartbeat_watchdog_enable = enable; + ret = count; +unlock: + mutex_unlock(&cdev->cosm_mutex); + return ret; +} +static DEVICE_ATTR_RW(heartbeat_enable); + +static ssize_t +cmdline_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct cosm_device *cdev = dev_get_drvdata(dev); + char *cmdline; + + if (!cdev) + return -EINVAL; + + cmdline = cdev->cmdline; + + if (cmdline) + return scnprintf(buf, PAGE_SIZE, "%s\n", cmdline); + return 0; +} + +static ssize_t +cmdline_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct cosm_device *cdev = dev_get_drvdata(dev); + + if (!cdev) + return -EINVAL; + + mutex_lock(&cdev->cosm_mutex); + kfree(cdev->cmdline); + + cdev->cmdline = kmalloc(count + 1, GFP_KERNEL); + if (!cdev->cmdline) { + count = -ENOMEM; + goto unlock; + } + + strncpy(cdev->cmdline, buf, count); + + if (cdev->cmdline[count - 1] == '\n') + cdev->cmdline[count - 1] = '\0'; + else + cdev->cmdline[count] = '\0'; +unlock: + mutex_unlock(&cdev->cosm_mutex); + return count; +} +static DEVICE_ATTR_RW(cmdline); + +static ssize_t +firmware_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct cosm_device *cdev = dev_get_drvdata(dev); + char *firmware; + + if (!cdev) + return -EINVAL; + + firmware = cdev->firmware; + + if (firmware) + return scnprintf(buf, PAGE_SIZE, "%s\n", firmware); + return 0; +} + +static ssize_t +firmware_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct cosm_device *cdev = dev_get_drvdata(dev); + + if (!cdev) + return -EINVAL; + + mutex_lock(&cdev->cosm_mutex); + kfree(cdev->firmware); + + cdev->firmware = kmalloc(count + 1, GFP_KERNEL); + if (!cdev->firmware) { + count = -ENOMEM; + goto unlock; + } + strncpy(cdev->firmware, buf, count); + + if (cdev->firmware[count - 1] == '\n') + cdev->firmware[count - 1] = '\0'; + else + cdev->firmware[count] = '\0'; +unlock: + mutex_unlock(&cdev->cosm_mutex); + return count; +} +static DEVICE_ATTR_RW(firmware); + +static ssize_t +ramdisk_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct cosm_device *cdev = dev_get_drvdata(dev); + char *ramdisk; + + if (!cdev) + return -EINVAL; + + ramdisk = cdev->ramdisk; + + if (ramdisk) + return scnprintf(buf, PAGE_SIZE, "%s\n", ramdisk); + return 0; +} + +static ssize_t +ramdisk_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct cosm_device *cdev = dev_get_drvdata(dev); + + if (!cdev) + return -EINVAL; + + mutex_lock(&cdev->cosm_mutex); + kfree(cdev->ramdisk); + + cdev->ramdisk = kmalloc(count + 1, GFP_KERNEL); + if (!cdev->ramdisk) { + count = -ENOMEM; + goto unlock; + } + + strncpy(cdev->ramdisk, buf, count); + + if (cdev->ramdisk[count - 1] == '\n') + cdev->ramdisk[count - 1] = '\0'; + else + cdev->ramdisk[count] = '\0'; +unlock: + mutex_unlock(&cdev->cosm_mutex); + return count; +} +static DEVICE_ATTR_RW(ramdisk); + +static ssize_t +bootmode_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct cosm_device *cdev = dev_get_drvdata(dev); + char *bootmode; + + if (!cdev) + return -EINVAL; + + bootmode = cdev->bootmode; + + if (bootmode) + return scnprintf(buf, PAGE_SIZE, "%s\n", bootmode); + return 0; +} + +static ssize_t +bootmode_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct cosm_device *cdev = dev_get_drvdata(dev); + + if (!cdev) + return -EINVAL; + + if (!sysfs_streq(buf, "linux") && !sysfs_streq(buf, "flash")) + return -EINVAL; + + mutex_lock(&cdev->cosm_mutex); + kfree(cdev->bootmode); + + cdev->bootmode = kmalloc(count + 1, GFP_KERNEL); + if (!cdev->bootmode) { + count = -ENOMEM; + goto unlock; + } + + strncpy(cdev->bootmode, buf, count); + + if (cdev->bootmode[count - 1] == '\n') + cdev->bootmode[count - 1] = '\0'; + else + cdev->bootmode[count] = '\0'; +unlock: + mutex_unlock(&cdev->cosm_mutex); + return count; +} +static DEVICE_ATTR_RW(bootmode); + +static ssize_t +log_buf_addr_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct cosm_device *cdev = dev_get_drvdata(dev); + + if (!cdev) + return -EINVAL; + + return scnprintf(buf, PAGE_SIZE, "%p\n", cdev->log_buf_addr); +} + +static ssize_t +log_buf_addr_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct cosm_device *cdev = dev_get_drvdata(dev); + int ret; + unsigned long addr; + + if (!cdev) + return -EINVAL; + + ret = kstrtoul(buf, 16, &addr); + if (ret) + goto exit; + + cdev->log_buf_addr = (void *)addr; + ret = count; +exit: + return ret; +} +static DEVICE_ATTR_RW(log_buf_addr); + +static ssize_t +log_buf_len_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct cosm_device *cdev = dev_get_drvdata(dev); + + if (!cdev) + return -EINVAL; + + return scnprintf(buf, PAGE_SIZE, "%p\n", cdev->log_buf_len); +} + +static ssize_t +log_buf_len_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct cosm_device *cdev = dev_get_drvdata(dev); + int ret; + unsigned long addr; + + if (!cdev) + return -EINVAL; + + ret = kstrtoul(buf, 16, &addr); + if (ret) + goto exit; + + cdev->log_buf_len = (int *)addr; + ret = count; +exit: + return ret; +} +static DEVICE_ATTR_RW(log_buf_len); + +static struct attribute *cosm_default_attrs[] = { + &dev_attr_family.attr, + &dev_attr_stepping.attr, + &dev_attr_state.attr, + &dev_attr_shutdown_status.attr, + &dev_attr_heartbeat_enable.attr, + &dev_attr_cmdline.attr, + &dev_attr_firmware.attr, + &dev_attr_ramdisk.attr, + &dev_attr_bootmode.attr, + &dev_attr_log_buf_addr.attr, + &dev_attr_log_buf_len.attr, + + NULL +}; + +ATTRIBUTE_GROUPS(cosm_default); + +void cosm_sysfs_init(struct cosm_device *cdev) +{ + cdev->attr_group = cosm_default_groups; +} diff --git a/drivers/misc/mic/cosm_client/Makefile b/drivers/misc/mic/cosm_client/Makefile new file mode 100644 index 000000000000..6f751a519a09 --- /dev/null +++ b/drivers/misc/mic/cosm_client/Makefile @@ -0,0 +1,7 @@ +# +# Makefile - Intel MIC COSM Client Driver +# Copyright(c) 2015, Intel Corporation. +# +obj-$(CONFIG_MIC_COSM) += cosm_client.o + +cosm_client-objs += cosm_scif_client.o diff --git a/drivers/misc/mic/cosm_client/cosm_scif_client.c b/drivers/misc/mic/cosm_client/cosm_scif_client.c new file mode 100644 index 000000000000..03e98bf1ac15 --- /dev/null +++ b/drivers/misc/mic/cosm_client/cosm_scif_client.c @@ -0,0 +1,275 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Intel MIC COSM Client Driver + * + */ +#include <linux/module.h> +#include <linux/delay.h> +#include <linux/reboot.h> +#include <linux/kthread.h> +#include "../cosm/cosm_main.h" + +#define COSM_SCIF_MAX_RETRIES 10 +#define COSM_HEARTBEAT_SEND_MSEC (COSM_HEARTBEAT_SEND_SEC * MSEC_PER_SEC) + +static struct task_struct *client_thread; +static scif_epd_t client_epd; +static struct scif_peer_dev *client_spdev; + +/* + * Reboot notifier: receives shutdown status from the OS and communicates it + * back to the COSM process on the host + */ +static int cosm_reboot_event(struct notifier_block *this, unsigned long event, + void *ptr) +{ + struct cosm_msg msg = { .id = COSM_MSG_SHUTDOWN_STATUS }; + int rc; + + event = (event == SYS_RESTART) ? SYSTEM_RESTART : event; + dev_info(&client_spdev->dev, "%s %d received event %ld\n", + __func__, __LINE__, event); + + msg.shutdown_status = event; + rc = scif_send(client_epd, &msg, sizeof(msg), SCIF_SEND_BLOCK); + if (rc < 0) + dev_err(&client_spdev->dev, "%s %d scif_send rc %d\n", + __func__, __LINE__, rc); + + return NOTIFY_DONE; +} + +static struct notifier_block cosm_reboot = { + .notifier_call = cosm_reboot_event, +}; + +/* Set system time from timespec value received from the host */ +static void cosm_set_time(struct cosm_msg *msg) +{ + int rc = do_settimeofday64(&msg->timespec); + + if (rc) + dev_err(&client_spdev->dev, "%s: %d settimeofday rc %d\n", + __func__, __LINE__, rc); +} + +/* COSM client receive message processing */ +static void cosm_client_recv(void) +{ + struct cosm_msg msg; + int rc; + + while (1) { + rc = scif_recv(client_epd, &msg, sizeof(msg), 0); + if (!rc) { + return; + } else if (rc < 0) { + dev_err(&client_spdev->dev, "%s: %d rc %d\n", + __func__, __LINE__, rc); + return; + } + + dev_dbg(&client_spdev->dev, "%s: %d rc %d id 0x%llx\n", + __func__, __LINE__, rc, msg.id); + + switch (msg.id) { + case COSM_MSG_SYNC_TIME: + cosm_set_time(&msg); + break; + case COSM_MSG_SHUTDOWN: + orderly_poweroff(true); + break; + default: + dev_err(&client_spdev->dev, "%s: %d unknown id %lld\n", + __func__, __LINE__, msg.id); + break; + } + } +} + +/* Initiate connection to the COSM server on the host */ +static int cosm_scif_connect(void) +{ + struct scif_port_id port_id; + int i, rc; + + client_epd = scif_open(); + if (!client_epd) { + dev_err(&client_spdev->dev, "%s %d scif_open failed\n", + __func__, __LINE__); + return -ENOMEM; + } + + port_id.node = 0; + port_id.port = SCIF_COSM_LISTEN_PORT; + + for (i = 0; i < COSM_SCIF_MAX_RETRIES; i++) { + rc = scif_connect(client_epd, &port_id); + if (rc < 0) + msleep(1000); + else + break; + } + + if (rc < 0) { + dev_err(&client_spdev->dev, "%s %d scif_connect rc %d\n", + __func__, __LINE__, rc); + scif_close(client_epd); + client_epd = NULL; + } + return rc < 0 ? rc : 0; +} + +/* Close host SCIF connection */ +static void cosm_scif_connect_exit(void) +{ + if (client_epd) { + scif_close(client_epd); + client_epd = NULL; + } +} + +/* + * COSM SCIF client thread function: waits for messages from the host and sends + * a heartbeat to the host + */ +static int cosm_scif_client(void *unused) +{ + struct cosm_msg msg = { .id = COSM_MSG_HEARTBEAT }; + struct scif_pollepd pollepd; + int rc; + + allow_signal(SIGKILL); + + while (!kthread_should_stop()) { + pollepd.epd = client_epd; + pollepd.events = POLLIN; + + rc = scif_poll(&pollepd, 1, COSM_HEARTBEAT_SEND_MSEC); + if (rc < 0) { + if (-EINTR != rc) + dev_err(&client_spdev->dev, + "%s %d scif_poll rc %d\n", + __func__, __LINE__, rc); + continue; + } + + if (pollepd.revents & POLLIN) + cosm_client_recv(); + + msg.id = COSM_MSG_HEARTBEAT; + rc = scif_send(client_epd, &msg, sizeof(msg), SCIF_SEND_BLOCK); + if (rc < 0) + dev_err(&client_spdev->dev, "%s %d scif_send rc %d\n", + __func__, __LINE__, rc); + } + + dev_dbg(&client_spdev->dev, "%s %d Client thread stopped\n", + __func__, __LINE__); + return 0; +} + +static void cosm_scif_probe(struct scif_peer_dev *spdev) +{ + int rc; + + dev_dbg(&spdev->dev, "%s %d: dnode %d\n", + __func__, __LINE__, spdev->dnode); + + /* We are only interested in the host with spdev->dnode == 0 */ + if (spdev->dnode) + return; + + client_spdev = spdev; + rc = cosm_scif_connect(); + if (rc) + goto exit; + + rc = register_reboot_notifier(&cosm_reboot); + if (rc) { + dev_err(&spdev->dev, + "reboot notifier registration failed rc %d\n", rc); + goto connect_exit; + } + + client_thread = kthread_run(cosm_scif_client, NULL, "cosm_client"); + if (IS_ERR(client_thread)) { + rc = PTR_ERR(client_thread); + dev_err(&spdev->dev, "%s %d kthread_run rc %d\n", + __func__, __LINE__, rc); + goto unreg_reboot; + } + return; +unreg_reboot: + unregister_reboot_notifier(&cosm_reboot); +connect_exit: + cosm_scif_connect_exit(); +exit: + client_spdev = NULL; +} + +static void cosm_scif_remove(struct scif_peer_dev *spdev) +{ + int rc; + + dev_dbg(&spdev->dev, "%s %d: dnode %d\n", + __func__, __LINE__, spdev->dnode); + + if (spdev->dnode) + return; + + if (!IS_ERR_OR_NULL(client_thread)) { + rc = send_sig(SIGKILL, client_thread, 0); + if (rc) { + pr_err("%s %d send_sig rc %d\n", + __func__, __LINE__, rc); + return; + } + kthread_stop(client_thread); + } + unregister_reboot_notifier(&cosm_reboot); + cosm_scif_connect_exit(); + client_spdev = NULL; +} + +static struct scif_client scif_client_cosm = { + .name = KBUILD_MODNAME, + .probe = cosm_scif_probe, + .remove = cosm_scif_remove, +}; + +static int __init cosm_client_init(void) +{ + int rc = scif_client_register(&scif_client_cosm); + + if (rc) + pr_err("scif_client_register failed rc %d\n", rc); + return rc; +} + +static void __exit cosm_client_exit(void) +{ + scif_client_unregister(&scif_client_cosm); +} + +module_init(cosm_client_init); +module_exit(cosm_client_exit); + +MODULE_AUTHOR("Intel Corporation"); +MODULE_DESCRIPTION("Intel(R) MIC card OS state management client driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/misc/mic/host/Makefile b/drivers/misc/mic/host/Makefile index c2197f999394..004d3db0f990 100644 --- a/drivers/misc/mic/host/Makefile +++ b/drivers/misc/mic/host/Makefile @@ -5,7 +5,6 @@ obj-$(CONFIG_INTEL_MIC_HOST) += mic_host.o mic_host-objs := mic_main.o mic_host-objs += mic_x100.o -mic_host-objs += mic_sysfs.o mic_host-objs += mic_smpt.o mic_host-objs += mic_intr.o mic_host-objs += mic_boot.o diff --git a/drivers/misc/mic/host/mic_boot.c b/drivers/misc/mic/host/mic_boot.c index e5f6a5e7bca1..7845564dff64 100644 --- a/drivers/misc/mic/host/mic_boot.c +++ b/drivers/misc/mic/host/mic_boot.c @@ -22,9 +22,9 @@ #include <linux/firmware.h> #include <linux/pci.h> #include <linux/kmod.h> - #include <linux/mic_common.h> #include <linux/mic_bus.h> +#include "../bus/scif_bus.h" #include "../common/mic_dev.h" #include "mic_device.h" #include "mic_smpt.h" @@ -99,7 +99,7 @@ static int __mic_dma_map_sg(struct device *dev, struct scatterlist *sg, int i, j, ret; dma_addr_t da; - ret = dma_map_sg(mdev->sdev->parent, sg, nents, dir); + ret = dma_map_sg(&mdev->pdev->dev, sg, nents, dir); if (ret <= 0) return 0; @@ -115,7 +115,7 @@ err: mic_unmap(mdev, sg_dma_address(s), s->length); sg_dma_address(s) = mic_to_dma_addr(mdev, sg_dma_address(s)); } - dma_unmap_sg(mdev->sdev->parent, sg, nents, dir); + dma_unmap_sg(&mdev->pdev->dev, sg, nents, dir); return 0; } @@ -135,7 +135,7 @@ static void __mic_dma_unmap_sg(struct device *dev, mic_unmap(mdev, sg_dma_address(s), s->length); sg_dma_address(s) = da; } - dma_unmap_sg(mdev->sdev->parent, sg, nents, dir); + dma_unmap_sg(&mdev->pdev->dev, sg, nents, dir); } static struct dma_map_ops __mic_dma_ops = { @@ -270,48 +270,13 @@ static struct mbus_hw_ops mbus_hw_ops = { .ack_interrupt = _mic_ack_interrupt, }; -/** - * mic_reset - Reset the MIC device. - * @mdev: pointer to mic_device instance - */ -static void mic_reset(struct mic_device *mdev) -{ - int i; - -#define MIC_RESET_TO (45) - - reinit_completion(&mdev->reset_wait); - mdev->ops->reset_fw_ready(mdev); - mdev->ops->reset(mdev); - - for (i = 0; i < MIC_RESET_TO; i++) { - if (mdev->ops->is_fw_ready(mdev)) - goto done; - /* - * Resets typically take 10s of seconds to complete. - * Since an MMIO read is required to check if the - * firmware is ready or not, a 1 second delay works nicely. - */ - msleep(1000); - } - mic_set_state(mdev, MIC_RESET_FAILED); -done: - complete_all(&mdev->reset_wait); -} - /* Initialize the MIC bootparams */ void mic_bootparam_init(struct mic_device *mdev) { struct mic_bootparam *bootparam = mdev->dp; bootparam->magic = cpu_to_le32(MIC_MAGIC); - bootparam->c2h_shutdown_db = mdev->shutdown_db; - bootparam->h2c_shutdown_db = -1; bootparam->h2c_config_db = -1; - bootparam->shutdown_status = 0; - bootparam->shutdown_card = 0; - /* Total nodes = number of MICs + 1 for self node */ - bootparam->tot_nodes = atomic_read(&g_num_mics) + 1; bootparam->node_id = mdev->id + 1; bootparam->scif_host_dma_addr = 0x0; bootparam->scif_card_dma_addr = 0x0; @@ -319,6 +284,26 @@ void mic_bootparam_init(struct mic_device *mdev) bootparam->h2c_scif_db = -1; } +static inline struct mic_device *cosmdev_to_mdev(struct cosm_device *cdev) +{ + return dev_get_drvdata(cdev->dev.parent); +} + +static void _mic_reset(struct cosm_device *cdev) +{ + struct mic_device *mdev = cosmdev_to_mdev(cdev); + + mdev->ops->reset_fw_ready(mdev); + mdev->ops->reset(mdev); +} + +static bool _mic_ready(struct cosm_device *cdev) +{ + struct mic_device *mdev = cosmdev_to_mdev(cdev); + + return mdev->ops->is_fw_ready(mdev); +} + /** * mic_request_dma_chans - Request DMA channels * @mdev: pointer to mic_device instance @@ -336,14 +321,14 @@ static int mic_request_dma_chans(struct mic_device *mdev) do { chan = dma_request_channel(mask, mdev->ops->dma_filter, - mdev->sdev->parent); + &mdev->pdev->dev); if (chan) { mdev->dma_ch[mdev->num_dma_ch++] = chan; if (mdev->num_dma_ch >= MIC_MAX_DMA_CHAN) break; } } while (chan); - dev_info(mdev->sdev->parent, "DMA channels # %d\n", mdev->num_dma_ch); + dev_info(&mdev->pdev->dev, "DMA channels # %d\n", mdev->num_dma_ch); return mdev->num_dma_ch; } @@ -365,34 +350,24 @@ static void mic_free_dma_chans(struct mic_device *mdev) } /** - * mic_start - Start the MIC. - * @mdev: pointer to mic_device instance - * @buf: buffer containing boot string including firmware/ramdisk path. + * _mic_start - Start the MIC. + * @cdev: pointer to cosm_device instance + * @id: MIC device id/index provided by COSM used in other drivers like SCIF * * This function prepares an MIC for boot and initiates boot. * RETURNS: An appropriate -ERRNO error value on error, or zero for success. + * + * For all cosm_hw_ops the caller holds a mutex to ensure serialization. */ -int mic_start(struct mic_device *mdev, const char *buf) +static int _mic_start(struct cosm_device *cdev, int id) { + struct mic_device *mdev = cosmdev_to_mdev(cdev); int rc; - mutex_lock(&mdev->mic_mutex); + mic_bootparam_init(mdev); -retry: - if (MIC_OFFLINE != mdev->state) { - rc = -EINVAL; - goto unlock_ret; - } - if (!mdev->ops->is_fw_ready(mdev)) { - mic_reset(mdev); - /* - * The state will either be MIC_OFFLINE if the reset succeeded - * or MIC_RESET_FAILED if the firmware reset failed. - */ - goto retry; - } - mdev->dma_mbdev = mbus_register_device(mdev->sdev->parent, + mdev->dma_mbdev = mbus_register_device(&mdev->pdev->dev, MBUS_DEV_DMA_HOST, &mic_dma_ops, - &mbus_hw_ops, mdev->mmio.va); + &mbus_hw_ops, id, mdev->mmio.va); if (IS_ERR(mdev->dma_mbdev)) { rc = PTR_ERR(mdev->dma_mbdev); goto unlock_ret; @@ -401,16 +376,18 @@ retry: rc = -ENODEV; goto dma_remove; } - mdev->scdev = scif_register_device(mdev->sdev->parent, MIC_SCIF_DEV, + mdev->scdev = scif_register_device(&mdev->pdev->dev, MIC_SCIF_DEV, &__mic_dma_ops, &scif_hw_ops, - mdev->id + 1, 0, &mdev->mmio, + id + 1, 0, &mdev->mmio, &mdev->aper, mdev->dp, NULL, - mdev->dma_ch, mdev->num_dma_ch); + mdev->dma_ch, mdev->num_dma_ch, + true); if (IS_ERR(mdev->scdev)) { rc = PTR_ERR(mdev->scdev); goto dma_free; } - rc = mdev->ops->load_mic_fw(mdev, buf); + + rc = mdev->ops->load_mic_fw(mdev, NULL); if (rc) goto scif_remove; mic_smpt_restore(mdev); @@ -419,7 +396,6 @@ retry: mdev->ops->write_spad(mdev, MIC_DPLO_SPAD, mdev->dp_dma_addr); mdev->ops->write_spad(mdev, MIC_DPHI_SPAD, mdev->dp_dma_addr >> 32); mdev->ops->send_firmware_intr(mdev); - mic_set_state(mdev, MIC_ONLINE); goto unlock_ret; scif_remove: scif_unregister_device(mdev->scdev); @@ -428,198 +404,79 @@ dma_free: dma_remove: mbus_unregister_device(mdev->dma_mbdev); unlock_ret: - mutex_unlock(&mdev->mic_mutex); return rc; } /** - * mic_stop - Prepare the MIC for reset and trigger reset. - * @mdev: pointer to mic_device instance + * _mic_stop - Prepare the MIC for reset and trigger reset. + * @cdev: pointer to cosm_device instance * @force: force a MIC to reset even if it is already offline. * * RETURNS: None. */ -void mic_stop(struct mic_device *mdev, bool force) -{ - mutex_lock(&mdev->mic_mutex); - if (MIC_OFFLINE != mdev->state || force) { - scif_unregister_device(mdev->scdev); - mic_virtio_reset_devices(mdev); - mic_free_dma_chans(mdev); - mbus_unregister_device(mdev->dma_mbdev); - mic_bootparam_init(mdev); - mic_reset(mdev); - if (MIC_RESET_FAILED == mdev->state) - goto unlock; - mic_set_shutdown_status(mdev, MIC_NOP); - if (MIC_SUSPENDED != mdev->state) - mic_set_state(mdev, MIC_OFFLINE); - } -unlock: - mutex_unlock(&mdev->mic_mutex); -} - -/** - * mic_shutdown - Initiate MIC shutdown. - * @mdev: pointer to mic_device instance - * - * RETURNS: None. - */ -void mic_shutdown(struct mic_device *mdev) +static void _mic_stop(struct cosm_device *cdev, bool force) { - struct mic_bootparam *bootparam = mdev->dp; - s8 db = bootparam->h2c_shutdown_db; - - mutex_lock(&mdev->mic_mutex); - if (MIC_ONLINE == mdev->state && db != -1) { - bootparam->shutdown_card = 1; - mdev->ops->send_intr(mdev, db); - mic_set_state(mdev, MIC_SHUTTING_DOWN); - } - mutex_unlock(&mdev->mic_mutex); -} - -/** - * mic_shutdown_work - Handle shutdown interrupt from MIC. - * @work: The work structure. - * - * This work is scheduled whenever the host has received a shutdown - * interrupt from the MIC. - */ -void mic_shutdown_work(struct work_struct *work) -{ - struct mic_device *mdev = container_of(work, struct mic_device, - shutdown_work); - struct mic_bootparam *bootparam = mdev->dp; - - mutex_lock(&mdev->mic_mutex); - mic_set_shutdown_status(mdev, bootparam->shutdown_status); - bootparam->shutdown_status = 0; + struct mic_device *mdev = cosmdev_to_mdev(cdev); /* - * if state is MIC_SUSPENDED, OSPM suspend is in progress. We do not - * change the state here so as to prevent users from booting the card - * during and after the suspend operation. + * Since SCIF handles card shutdown and reset (using COSM), it will + * will be the first to be registered and the last to be + * unregistered. */ - if (MIC_SHUTTING_DOWN != mdev->state && - MIC_SUSPENDED != mdev->state) - mic_set_state(mdev, MIC_SHUTTING_DOWN); - mutex_unlock(&mdev->mic_mutex); + mic_virtio_reset_devices(mdev); + scif_unregister_device(mdev->scdev); + mic_free_dma_chans(mdev); + mbus_unregister_device(mdev->dma_mbdev); + mic_bootparam_init(mdev); } -/** - * mic_reset_trigger_work - Trigger MIC reset. - * @work: The work structure. - * - * This work is scheduled whenever the host wants to reset the MIC. - */ -void mic_reset_trigger_work(struct work_struct *work) +static ssize_t _mic_family(struct cosm_device *cdev, char *buf) { - struct mic_device *mdev = container_of(work, struct mic_device, - reset_trigger_work); + struct mic_device *mdev = cosmdev_to_mdev(cdev); + static const char *family[MIC_FAMILY_LAST] = { "x100", "Unknown" }; - mic_stop(mdev, false); + return scnprintf(buf, PAGE_SIZE, "%s\n", family[mdev->family]); } -/** - * mic_complete_resume - Complete MIC Resume after an OSPM suspend/hibernate - * event. - * @mdev: pointer to mic_device instance - * - * RETURNS: None. - */ -void mic_complete_resume(struct mic_device *mdev) +static ssize_t _mic_stepping(struct cosm_device *cdev, char *buf) { - if (mdev->state != MIC_SUSPENDED) { - dev_warn(mdev->sdev->parent, "state %d should be %d\n", - mdev->state, MIC_SUSPENDED); - return; - } - - /* Make sure firmware is ready */ - if (!mdev->ops->is_fw_ready(mdev)) - mic_stop(mdev, true); + struct mic_device *mdev = cosmdev_to_mdev(cdev); + const char *string = "??"; - mutex_lock(&mdev->mic_mutex); - mic_set_state(mdev, MIC_OFFLINE); - mutex_unlock(&mdev->mic_mutex); -} - -/** - * mic_prepare_suspend - Handle suspend notification for the MIC device. - * @mdev: pointer to mic_device instance - * - * RETURNS: None. - */ -void mic_prepare_suspend(struct mic_device *mdev) -{ - unsigned long timeout; - -#define MIC_SUSPEND_TIMEOUT (60 * HZ) - - mutex_lock(&mdev->mic_mutex); - switch (mdev->state) { - case MIC_OFFLINE: - /* - * Card is already offline. Set state to MIC_SUSPENDED - * to prevent users from booting the card. - */ - mic_set_state(mdev, MIC_SUSPENDED); - mutex_unlock(&mdev->mic_mutex); + switch (mdev->stepping) { + case MIC_A0_STEP: + string = "A0"; break; - case MIC_ONLINE: - /* - * Card is online. Set state to MIC_SUSPENDING and notify - * MIC user space daemon which will issue card - * shutdown and reset. - */ - mic_set_state(mdev, MIC_SUSPENDING); - mutex_unlock(&mdev->mic_mutex); - timeout = wait_for_completion_timeout(&mdev->reset_wait, - MIC_SUSPEND_TIMEOUT); - /* Force reset the card if the shutdown completion timed out */ - if (!timeout) { - mutex_lock(&mdev->mic_mutex); - mic_set_state(mdev, MIC_SUSPENDED); - mutex_unlock(&mdev->mic_mutex); - mic_stop(mdev, true); - } + case MIC_B0_STEP: + string = "B0"; + break; + case MIC_B1_STEP: + string = "B1"; break; - case MIC_SHUTTING_DOWN: - /* - * Card is shutting down. Set state to MIC_SUSPENDED - * to prevent further boot of the card. - */ - mic_set_state(mdev, MIC_SUSPENDED); - mutex_unlock(&mdev->mic_mutex); - timeout = wait_for_completion_timeout(&mdev->reset_wait, - MIC_SUSPEND_TIMEOUT); - /* Force reset the card if the shutdown completion timed out */ - if (!timeout) - mic_stop(mdev, true); + case MIC_C0_STEP: + string = "C0"; break; default: - mutex_unlock(&mdev->mic_mutex); break; } + return scnprintf(buf, PAGE_SIZE, "%s\n", string); } -/** - * mic_suspend - Initiate MIC suspend. Suspend merely issues card shutdown. - * @mdev: pointer to mic_device instance - * - * RETURNS: None. - */ -void mic_suspend(struct mic_device *mdev) +static struct mic_mw *_mic_aper(struct cosm_device *cdev) { - struct mic_bootparam *bootparam = mdev->dp; - s8 db = bootparam->h2c_shutdown_db; + struct mic_device *mdev = cosmdev_to_mdev(cdev); - mutex_lock(&mdev->mic_mutex); - if (MIC_SUSPENDING == mdev->state && db != -1) { - bootparam->shutdown_card = 1; - mdev->ops->send_intr(mdev, db); - mic_set_state(mdev, MIC_SUSPENDED); - } - mutex_unlock(&mdev->mic_mutex); + return &mdev->aper; } + +struct cosm_hw_ops cosm_hw_ops = { + .reset = _mic_reset, + .force_reset = _mic_reset, + .post_reset = NULL, + .ready = _mic_ready, + .start = _mic_start, + .stop = _mic_stop, + .family = _mic_family, + .stepping = _mic_stepping, + .aper = _mic_aper, +}; diff --git a/drivers/misc/mic/host/mic_debugfs.c b/drivers/misc/mic/host/mic_debugfs.c index 3c9ea4896f3c..10581600777a 100644 --- a/drivers/misc/mic/host/mic_debugfs.c +++ b/drivers/misc/mic/host/mic_debugfs.c @@ -31,71 +31,6 @@ /* Debugfs parent dir */ static struct dentry *mic_dbg; -/** - * mic_log_buf_show - Display MIC kernel log buffer. - * - * log_buf addr/len is read from System.map by user space - * and populated in sysfs entries. - */ -static int mic_log_buf_show(struct seq_file *s, void *unused) -{ - void __iomem *log_buf_va; - int __iomem *log_buf_len_va; - struct mic_device *mdev = s->private; - void *kva; - int size; - unsigned long aper_offset; - - if (!mdev || !mdev->log_buf_addr || !mdev->log_buf_len) - goto done; - /* - * Card kernel will never be relocated and any kernel text/data mapping - * can be translated to phys address by subtracting __START_KERNEL_map. - */ - aper_offset = (unsigned long)mdev->log_buf_len - __START_KERNEL_map; - log_buf_len_va = mdev->aper.va + aper_offset; - aper_offset = (unsigned long)mdev->log_buf_addr - __START_KERNEL_map; - log_buf_va = mdev->aper.va + aper_offset; - size = ioread32(log_buf_len_va); - - kva = kmalloc(size, GFP_KERNEL); - if (!kva) - goto done; - mutex_lock(&mdev->mic_mutex); - memcpy_fromio(kva, log_buf_va, size); - switch (mdev->state) { - case MIC_ONLINE: - /* Fall through */ - case MIC_SHUTTING_DOWN: - seq_write(s, kva, size); - break; - default: - break; - } - mutex_unlock(&mdev->mic_mutex); - kfree(kva); -done: - return 0; -} - -static int mic_log_buf_open(struct inode *inode, struct file *file) -{ - return single_open(file, mic_log_buf_show, inode->i_private); -} - -static int mic_log_buf_release(struct inode *inode, struct file *file) -{ - return single_release(inode, file); -} - -static const struct file_operations log_buf_ops = { - .owner = THIS_MODULE, - .open = mic_log_buf_open, - .read = seq_read, - .llseek = seq_lseek, - .release = mic_log_buf_release -}; - static int mic_smpt_show(struct seq_file *s, void *pos) { int i; @@ -138,32 +73,6 @@ static const struct file_operations smpt_file_ops = { .release = mic_smpt_debug_release }; -static int mic_soft_reset_show(struct seq_file *s, void *pos) -{ - struct mic_device *mdev = s->private; - - mic_stop(mdev, true); - return 0; -} - -static int mic_soft_reset_debug_open(struct inode *inode, struct file *file) -{ - return single_open(file, mic_soft_reset_show, inode->i_private); -} - -static int mic_soft_reset_debug_release(struct inode *inode, struct file *file) -{ - return single_release(inode, file); -} - -static const struct file_operations soft_reset_ops = { - .owner = THIS_MODULE, - .open = mic_soft_reset_debug_open, - .read = seq_read, - .llseek = seq_lseek, - .release = mic_soft_reset_debug_release -}; - static int mic_post_code_show(struct seq_file *s, void *pos) { struct mic_device *mdev = s->private; @@ -204,18 +113,8 @@ static int mic_dp_show(struct seq_file *s, void *pos) seq_printf(s, "Bootparam: magic 0x%x\n", bootparam->magic); - seq_printf(s, "Bootparam: h2c_shutdown_db %d\n", - bootparam->h2c_shutdown_db); seq_printf(s, "Bootparam: h2c_config_db %d\n", bootparam->h2c_config_db); - seq_printf(s, "Bootparam: c2h_shutdown_db %d\n", - bootparam->c2h_shutdown_db); - seq_printf(s, "Bootparam: shutdown_status %d\n", - bootparam->shutdown_status); - seq_printf(s, "Bootparam: shutdown_card %d\n", - bootparam->shutdown_card); - seq_printf(s, "Bootparam: tot_nodes %d\n", - bootparam->tot_nodes); seq_printf(s, "Bootparam: node_id %d\n", bootparam->node_id); seq_printf(s, "Bootparam: c2h_scif_db %d\n", @@ -392,8 +291,7 @@ static int mic_msi_irq_info_show(struct seq_file *s, void *pos) int i, j; u16 entry; u16 vector; - struct pci_dev *pdev = container_of(mdev->sdev->parent, - struct pci_dev, dev); + struct pci_dev *pdev = mdev->pdev; if (pci_dev_msi_enabled(pdev)) { for (i = 0; i < mdev->irq_info.num_vectors; i++) { @@ -454,20 +352,18 @@ static const struct file_operations msi_irq_info_ops = { */ void mic_create_debug_dir(struct mic_device *mdev) { + char name[16]; + if (!mic_dbg) return; - mdev->dbg_dir = debugfs_create_dir(dev_name(mdev->sdev), mic_dbg); + scnprintf(name, sizeof(name), "mic%d", mdev->id); + mdev->dbg_dir = debugfs_create_dir(name, mic_dbg); if (!mdev->dbg_dir) return; - debugfs_create_file("log_buf", 0444, mdev->dbg_dir, mdev, &log_buf_ops); - debugfs_create_file("smpt", 0444, mdev->dbg_dir, mdev, &smpt_file_ops); - debugfs_create_file("soft_reset", 0444, mdev->dbg_dir, mdev, - &soft_reset_ops); - debugfs_create_file("post_code", 0444, mdev->dbg_dir, mdev, &post_code_ops); diff --git a/drivers/misc/mic/host/mic_device.h b/drivers/misc/mic/host/mic_device.h index 01a7555aa648..461184a12fbb 100644 --- a/drivers/misc/mic/host/mic_device.h +++ b/drivers/misc/mic/host/mic_device.h @@ -26,21 +26,12 @@ #include <linux/notifier.h> #include <linux/irqreturn.h> #include <linux/dmaengine.h> +#include <linux/miscdevice.h> #include <linux/mic_bus.h> #include "../bus/scif_bus.h" +#include "../bus/cosm_bus.h" #include "mic_intr.h" -/* The maximum number of MIC devices supported in a single host system. */ -#define MIC_MAX_NUM_DEVS 256 - -/** - * enum mic_hw_family - The hardware family to which a device belongs. - */ -enum mic_hw_family { - MIC_FAMILY_X100 = 0, - MIC_FAMILY_UNKNOWN -}; - /** * enum mic_stepping - MIC stepping ids. */ @@ -51,6 +42,8 @@ enum mic_stepping { MIC_C0_STEP = 0x20, }; +extern struct cosm_hw_ops cosm_hw_ops; + /** * struct mic_device - MIC device information for each card. * @@ -60,8 +53,7 @@ enum mic_stepping { * @ops: MIC HW specific operations. * @id: The unique device id for this MIC device. * @stepping: Stepping ID. - * @attr_group: Pointer to list of sysfs attribute groups. - * @sdev: Device for sysfs entries. + * @pdev: Underlying PCI device. * @mic_mutex: Mutex for synchronizing access to mic_device. * @intr_ops: HW specific interrupt operations. * @smpt_ops: Hardware specific SMPT operations. @@ -69,30 +61,17 @@ enum mic_stepping { * @intr_info: H/W specific interrupt information. * @irq_info: The OS specific irq information * @dbg_dir: debugfs directory of this MIC device. - * @cmdline: Kernel command line. - * @firmware: Firmware file name. - * @ramdisk: Ramdisk file name. - * @bootmode: Boot mode i.e. "linux" or "elf" for flash updates. * @bootaddr: MIC boot address. - * @reset_trigger_work: Work for triggering reset requests. - * @shutdown_work: Work for handling shutdown interrupts. - * @state: MIC state. - * @shutdown_status: MIC status reported by card for shutdown/crashes. - * @state_sysfs: Sysfs dirent for notifying ring 3 about MIC state changes. - * @reset_wait: Waitqueue for sleeping while reset completes. - * @log_buf_addr: Log buffer address for MIC. - * @log_buf_len: Log buffer length address for MIC. * @dp: virtio device page * @dp_dma_addr: virtio device page DMA address. - * @shutdown_db: shutdown doorbell. - * @shutdown_cookie: shutdown cookie. - * @cdev: Character device for MIC. + * @name: name for the misc char device + * @miscdev: registered misc char device * @vdev_list: list of virtio devices. - * @pm_notifier: Handles PM notifications from the OS. * @dma_mbdev: MIC BUS DMA device. * @dma_ch - Array of DMA channels * @num_dma_ch - Number of DMA channels available * @scdev: SCIF device on the SCIF virtual bus. + * @cosm_dev: COSM device */ struct mic_device { struct mic_mw mmio; @@ -101,8 +80,7 @@ struct mic_device { struct mic_hw_ops *ops; int id; enum mic_stepping stepping; - const struct attribute_group **attr_group; - struct device *sdev; + struct pci_dev *pdev; struct mutex mic_mutex; struct mic_hw_intr_ops *intr_ops; struct mic_smpt_ops *smpt_ops; @@ -110,30 +88,17 @@ struct mic_device { struct mic_intr_info *intr_info; struct mic_irq_info irq_info; struct dentry *dbg_dir; - char *cmdline; - char *firmware; - char *ramdisk; - char *bootmode; u32 bootaddr; - struct work_struct reset_trigger_work; - struct work_struct shutdown_work; - u8 state; - u8 shutdown_status; - struct kernfs_node *state_sysfs; - struct completion reset_wait; - void *log_buf_addr; - int *log_buf_len; void *dp; dma_addr_t dp_dma_addr; - int shutdown_db; - struct mic_irq *shutdown_cookie; - struct cdev cdev; + char name[16]; + struct miscdevice miscdev; struct list_head vdev_list; - struct notifier_block pm_notifier; struct mbus_device *dma_mbdev; struct dma_chan *dma_ch[MIC_MAX_DMA_CHAN]; int num_dma_ch; struct scif_hw_dev *scdev; + struct cosm_device *cosm_dev; }; /** @@ -199,38 +164,9 @@ mic_mmio_write(struct mic_mw *mw, u32 val, u32 offset) iowrite32(val, mw->va + offset); } -static inline struct dma_chan *mic_request_dma_chan(struct mic_device *mdev) -{ - dma_cap_mask_t mask; - struct dma_chan *chan; - - dma_cap_zero(mask); - dma_cap_set(DMA_MEMCPY, mask); - chan = dma_request_channel(mask, mdev->ops->dma_filter, - mdev->sdev->parent); - if (chan) - return chan; - dev_err(mdev->sdev->parent, "%s %d unable to acquire channel\n", - __func__, __LINE__); - return NULL; -} - -void mic_sysfs_init(struct mic_device *mdev); -int mic_start(struct mic_device *mdev, const char *buf); -void mic_stop(struct mic_device *mdev, bool force); -void mic_shutdown(struct mic_device *mdev); -void mic_reset_delayed_work(struct work_struct *work); -void mic_reset_trigger_work(struct work_struct *work); -void mic_shutdown_work(struct work_struct *work); void mic_bootparam_init(struct mic_device *mdev); -void mic_set_state(struct mic_device *mdev, u8 state); -void mic_set_shutdown_status(struct mic_device *mdev, u8 status); void mic_create_debug_dir(struct mic_device *dev); void mic_delete_debug_dir(struct mic_device *dev); void __init mic_init_debugfs(void); void mic_exit_debugfs(void); -void mic_prepare_suspend(struct mic_device *mdev); -void mic_complete_resume(struct mic_device *mdev); -void mic_suspend(struct mic_device *mdev); -extern atomic_t g_num_mics; #endif diff --git a/drivers/misc/mic/host/mic_fops.c b/drivers/misc/mic/host/mic_fops.c index 85776d7327f3..8cc1d90cd949 100644 --- a/drivers/misc/mic/host/mic_fops.c +++ b/drivers/misc/mic/host/mic_fops.c @@ -30,8 +30,8 @@ int mic_open(struct inode *inode, struct file *f) { struct mic_vdev *mvdev; - struct mic_device *mdev = container_of(inode->i_cdev, - struct mic_device, cdev); + struct mic_device *mdev = container_of(f->private_data, + struct mic_device, miscdev); mvdev = kzalloc(sizeof(*mvdev), GFP_KERNEL); if (!mvdev) diff --git a/drivers/misc/mic/host/mic_intr.c b/drivers/misc/mic/host/mic_intr.c index b4ca6c884d19..08ca3e372fa4 100644 --- a/drivers/misc/mic/host/mic_intr.c +++ b/drivers/misc/mic/host/mic_intr.c @@ -30,8 +30,7 @@ static irqreturn_t mic_thread_fn(int irq, void *dev) struct mic_intr_info *intr_info = mdev->intr_info; struct mic_irq_info *irq_info = &mdev->irq_info; struct mic_intr_cb *intr_cb; - struct pci_dev *pdev = container_of(mdev->sdev->parent, - struct pci_dev, dev); + struct pci_dev *pdev = mdev->pdev; int i; spin_lock(&irq_info->mic_thread_lock); @@ -57,8 +56,7 @@ static irqreturn_t mic_interrupt(int irq, void *dev) struct mic_intr_info *intr_info = mdev->intr_info; struct mic_irq_info *irq_info = &mdev->irq_info; struct mic_intr_cb *intr_cb; - struct pci_dev *pdev = container_of(mdev->sdev->parent, - struct pci_dev, dev); + struct pci_dev *pdev = mdev->pdev; u32 mask; int i; @@ -83,7 +81,7 @@ static irqreturn_t mic_interrupt(int irq, void *dev) /* Return the interrupt offset from the index. Index is 0 based. */ static u16 mic_map_src_to_offset(struct mic_device *mdev, - int intr_src, enum mic_intr_type type) + int intr_src, enum mic_intr_type type) { if (type >= MIC_NUM_INTR_TYPES) return MIC_NUM_OFFSETS; @@ -214,7 +212,7 @@ static int mic_setup_msix(struct mic_device *mdev, struct pci_dev *pdev) mdev->irq_info.msix_entries[i].entry = i; rc = pci_enable_msix_exact(pdev, mdev->irq_info.msix_entries, - MIC_MIN_MSIX); + MIC_MIN_MSIX); if (rc) { dev_dbg(&pdev->dev, "Error enabling MSIx. rc = %d\n", rc); goto err_enable_msix; @@ -229,7 +227,7 @@ static int mic_setup_msix(struct mic_device *mdev, struct pci_dev *pdev) goto err_nomem2; } - dev_dbg(mdev->sdev->parent, + dev_dbg(&mdev->pdev->dev, "%d MSIx irqs setup\n", mdev->irq_info.num_vectors); return 0; err_nomem2: @@ -281,7 +279,6 @@ static void mic_release_callbacks(struct mic_device *mdev) spin_lock(&mdev->irq_info.mic_thread_lock); spin_lock_irqsave(&mdev->irq_info.mic_intr_lock, flags); for (i = 0; i < MIC_NUM_OFFSETS; i++) { - if (list_empty(&mdev->irq_info.cb_list[i])) break; @@ -443,12 +440,11 @@ mic_request_threaded_irq(struct mic_device *mdev, unsigned long cookie = 0; u16 entry; struct mic_intr_cb *intr_cb; - struct pci_dev *pdev = container_of(mdev->sdev->parent, - struct pci_dev, dev); + struct pci_dev *pdev = mdev->pdev; offset = mic_map_src_to_offset(mdev, intr_src, type); if (offset >= MIC_NUM_OFFSETS) { - dev_err(mdev->sdev->parent, + dev_err(&mdev->pdev->dev, "Error mapping index %d to a valid source id.\n", intr_src); rc = -EINVAL; @@ -458,7 +454,7 @@ mic_request_threaded_irq(struct mic_device *mdev, if (mdev->irq_info.num_vectors > 1) { msix = mic_get_available_vector(mdev); if (!msix) { - dev_err(mdev->sdev->parent, + dev_err(&mdev->pdev->dev, "No MSIx vectors available for use.\n"); rc = -ENOSPC; goto err; @@ -467,7 +463,7 @@ mic_request_threaded_irq(struct mic_device *mdev, rc = request_threaded_irq(msix->vector, handler, thread_fn, 0, name, data); if (rc) { - dev_dbg(mdev->sdev->parent, + dev_dbg(&mdev->pdev->dev, "request irq failed rc = %d\n", rc); goto err; } @@ -476,13 +472,13 @@ mic_request_threaded_irq(struct mic_device *mdev, mdev->intr_ops->program_msi_to_src_map(mdev, entry, offset, true); cookie = MK_COOKIE(entry, offset); - dev_dbg(mdev->sdev->parent, "irq: %d assigned for src: %d\n", + dev_dbg(&mdev->pdev->dev, "irq: %d assigned for src: %d\n", msix->vector, intr_src); } else { intr_cb = mic_register_intr_callback(mdev, offset, handler, thread_fn, data); if (IS_ERR(intr_cb)) { - dev_err(mdev->sdev->parent, + dev_err(&mdev->pdev->dev, "No available callback entries for use\n"); rc = PTR_ERR(intr_cb); goto err; @@ -495,7 +491,7 @@ mic_request_threaded_irq(struct mic_device *mdev, entry, offset, true); } cookie = MK_COOKIE(entry, intr_cb->cb_id); - dev_dbg(mdev->sdev->parent, "callback %d registered for src: %d\n", + dev_dbg(&mdev->pdev->dev, "callback %d registered for src: %d\n", intr_cb->cb_id, intr_src); } return (struct mic_irq *)cookie; @@ -515,20 +511,19 @@ err: * returns: none. */ void mic_free_irq(struct mic_device *mdev, - struct mic_irq *cookie, void *data) + struct mic_irq *cookie, void *data) { u32 offset; u32 entry; u8 src_id; unsigned int irq; - struct pci_dev *pdev = container_of(mdev->sdev->parent, - struct pci_dev, dev); + struct pci_dev *pdev = mdev->pdev; entry = GET_ENTRY((unsigned long)cookie); offset = GET_OFFSET((unsigned long)cookie); if (mdev->irq_info.num_vectors > 1) { if (entry >= mdev->irq_info.num_vectors) { - dev_warn(mdev->sdev->parent, + dev_warn(&mdev->pdev->dev, "entry %d should be < num_irq %d\n", entry, mdev->irq_info.num_vectors); return; @@ -539,12 +534,12 @@ void mic_free_irq(struct mic_device *mdev, mdev->intr_ops->program_msi_to_src_map(mdev, entry, offset, false); - dev_dbg(mdev->sdev->parent, "irq: %d freed\n", irq); + dev_dbg(&mdev->pdev->dev, "irq: %d freed\n", irq); } else { irq = pdev->irq; src_id = mic_unregister_intr_callback(mdev, offset); if (src_id >= MIC_NUM_OFFSETS) { - dev_warn(mdev->sdev->parent, "Error unregistering callback\n"); + dev_warn(&mdev->pdev->dev, "Error unregistering callback\n"); return; } if (pci_dev_msi_enabled(pdev)) { @@ -552,7 +547,7 @@ void mic_free_irq(struct mic_device *mdev, mdev->intr_ops->program_msi_to_src_map(mdev, entry, src_id, false); } - dev_dbg(mdev->sdev->parent, "callback %d unregistered for src: %d\n", + dev_dbg(&mdev->pdev->dev, "callback %d unregistered for src: %d\n", offset, src_id); } } @@ -579,7 +574,7 @@ int mic_setup_interrupts(struct mic_device *mdev, struct pci_dev *pdev) rc = mic_setup_intx(mdev, pdev); if (rc) { - dev_err(mdev->sdev->parent, "no usable interrupts\n"); + dev_err(&mdev->pdev->dev, "no usable interrupts\n"); return rc; } done: @@ -635,8 +630,7 @@ void mic_free_interrupts(struct mic_device *mdev, struct pci_dev *pdev) void mic_intr_restore(struct mic_device *mdev) { int entry, offset; - struct pci_dev *pdev = container_of(mdev->sdev->parent, - struct pci_dev, dev); + struct pci_dev *pdev = mdev->pdev; if (!pci_dev_msi_enabled(pdev)) return; diff --git a/drivers/misc/mic/host/mic_main.c b/drivers/misc/mic/host/mic_main.c index 456462932151..153894e7ed5b 100644 --- a/drivers/misc/mic/host/mic_main.c +++ b/drivers/misc/mic/host/mic_main.c @@ -16,17 +16,11 @@ * the file called "COPYING". * * Intel MIC Host driver. - * - * Global TODO's across the driver to be added after initial base - * patches are accepted upstream: - * 1) Enable DMA support. - * 2) Enable per vring interrupt support. */ #include <linux/fs.h> #include <linux/module.h> #include <linux/pci.h> #include <linux/poll.h> -#include <linux/suspend.h> #include <linux/mic_common.h> #include "../common/mic_dev.h" @@ -63,12 +57,8 @@ MODULE_DEVICE_TABLE(pci, mic_pci_tbl); /* ID allocator for MIC devices */ static struct ida g_mic_ida; -/* Class of MIC devices for sysfs accessibility. */ -static struct class *g_mic_class; /* Base device node number for MIC devices */ static dev_t g_mic_devno; -/* Track the total number of MIC devices */ -atomic_t g_num_mics; static const struct file_operations mic_fops = { .open = mic_open, @@ -83,17 +73,14 @@ static const struct file_operations mic_fops = { static int mic_dp_init(struct mic_device *mdev) { mdev->dp = kzalloc(MIC_DP_SIZE, GFP_KERNEL); - if (!mdev->dp) { - dev_err(mdev->sdev->parent, "%s %d err %d\n", - __func__, __LINE__, -ENOMEM); + if (!mdev->dp) return -ENOMEM; - } mdev->dp_dma_addr = mic_map_single(mdev, mdev->dp, MIC_DP_SIZE); if (mic_map_error(mdev->dp_dma_addr)) { kfree(mdev->dp); - dev_err(mdev->sdev->parent, "%s %d err %d\n", + dev_err(&mdev->pdev->dev, "%s %d err %d\n", __func__, __LINE__, -ENOMEM); return -ENOMEM; } @@ -110,30 +97,6 @@ static void mic_dp_uninit(struct mic_device *mdev) } /** - * mic_shutdown_db - Shutdown doorbell interrupt handler. - */ -static irqreturn_t mic_shutdown_db(int irq, void *data) -{ - struct mic_device *mdev = data; - struct mic_bootparam *bootparam = mdev->dp; - - mdev->ops->intr_workarounds(mdev); - - switch (bootparam->shutdown_status) { - case MIC_HALTED: - case MIC_POWER_OFF: - case MIC_RESTART: - /* Fall through */ - case MIC_CRASHED: - schedule_work(&mdev->shutdown_work); - break; - default: - break; - }; - return IRQ_HANDLED; -} - -/** * mic_ops_init: Initialize HW specific operation tables. * * @mdev: pointer to mic_device instance @@ -190,43 +153,6 @@ static enum mic_hw_family mic_get_family(struct pci_dev *pdev) } /** -* mic_pm_notifier: Notifier callback function that handles -* PM notifications. -* -* @notifier_block: The notifier structure. -* @pm_event: The event for which the driver was notified. -* @unused: Meaningless. Always NULL. -* -* returns NOTIFY_DONE -*/ -static int mic_pm_notifier(struct notifier_block *notifier, - unsigned long pm_event, void *unused) -{ - struct mic_device *mdev = container_of(notifier, - struct mic_device, pm_notifier); - - switch (pm_event) { - case PM_HIBERNATION_PREPARE: - /* Fall through */ - case PM_SUSPEND_PREPARE: - mic_prepare_suspend(mdev); - break; - case PM_POST_HIBERNATION: - /* Fall through */ - case PM_POST_SUSPEND: - /* Fall through */ - case PM_POST_RESTORE: - mic_complete_resume(mdev); - break; - case PM_RESTORE_PREPARE: - break; - default: - break; - } - return NOTIFY_DONE; -} - -/** * mic_device_init - Allocates and initializes the MIC device structure * * @mdev: pointer to mic_device instance @@ -234,52 +160,16 @@ static int mic_pm_notifier(struct notifier_block *notifier, * * returns none. */ -static int +static void mic_device_init(struct mic_device *mdev, struct pci_dev *pdev) { - int rc; - + mdev->pdev = pdev; mdev->family = mic_get_family(pdev); mdev->stepping = pdev->revision; mic_ops_init(mdev); - mic_sysfs_init(mdev); mutex_init(&mdev->mic_mutex); mdev->irq_info.next_avail_src = 0; - INIT_WORK(&mdev->reset_trigger_work, mic_reset_trigger_work); - INIT_WORK(&mdev->shutdown_work, mic_shutdown_work); - init_completion(&mdev->reset_wait); INIT_LIST_HEAD(&mdev->vdev_list); - mdev->pm_notifier.notifier_call = mic_pm_notifier; - rc = register_pm_notifier(&mdev->pm_notifier); - if (rc) { - dev_err(&pdev->dev, "register_pm_notifier failed rc %d\n", - rc); - goto register_pm_notifier_fail; - } - return 0; -register_pm_notifier_fail: - flush_work(&mdev->shutdown_work); - flush_work(&mdev->reset_trigger_work); - return rc; -} - -/** - * mic_device_uninit - Frees resources allocated during mic_device_init(..) - * - * @mdev: pointer to mic_device instance - * - * returns none - */ -static void mic_device_uninit(struct mic_device *mdev) -{ - /* The cmdline sysfs entry might have allocated cmdline */ - kfree(mdev->cmdline); - kfree(mdev->firmware); - kfree(mdev->ramdisk); - kfree(mdev->bootmode); - flush_work(&mdev->reset_trigger_work); - flush_work(&mdev->shutdown_work); - unregister_pm_notifier(&mdev->pm_notifier); } /** @@ -291,7 +181,7 @@ static void mic_device_uninit(struct mic_device *mdev) * returns 0 on success, < 0 on failure. */ static int mic_probe(struct pci_dev *pdev, - const struct pci_device_id *ent) + const struct pci_device_id *ent) { int rc; struct mic_device *mdev; @@ -309,16 +199,12 @@ static int mic_probe(struct pci_dev *pdev, goto ida_fail; } - rc = mic_device_init(mdev, pdev); - if (rc) { - dev_err(&pdev->dev, "mic_device_init failed rc %d\n", rc); - goto device_init_fail; - } + mic_device_init(mdev, pdev); rc = pci_enable_device(pdev); if (rc) { dev_err(&pdev->dev, "failed to enable pci device.\n"); - goto uninit_device; + goto ida_remove; } pci_set_master(pdev); @@ -367,62 +253,39 @@ static int mic_probe(struct pci_dev *pdev, pci_set_drvdata(pdev, mdev); - mdev->sdev = device_create_with_groups(g_mic_class, &pdev->dev, - MKDEV(MAJOR(g_mic_devno), mdev->id), NULL, - mdev->attr_group, "mic%d", mdev->id); - if (IS_ERR(mdev->sdev)) { - rc = PTR_ERR(mdev->sdev); - dev_err(&pdev->dev, - "device_create_with_groups failed rc %d\n", rc); - goto smpt_uninit; - } - mdev->state_sysfs = sysfs_get_dirent(mdev->sdev->kobj.sd, "state"); - if (!mdev->state_sysfs) { - rc = -ENODEV; - dev_err(&pdev->dev, "sysfs_get_dirent failed rc %d\n", rc); - goto destroy_device; - } - rc = mic_dp_init(mdev); if (rc) { dev_err(&pdev->dev, "mic_dp_init failed rc %d\n", rc); - goto sysfs_put; - } - mutex_lock(&mdev->mic_mutex); - - mdev->shutdown_db = mic_next_db(mdev); - mdev->shutdown_cookie = mic_request_threaded_irq(mdev, mic_shutdown_db, - NULL, "shutdown-interrupt", mdev, - mdev->shutdown_db, MIC_INTR_DB); - if (IS_ERR(mdev->shutdown_cookie)) { - rc = PTR_ERR(mdev->shutdown_cookie); - mutex_unlock(&mdev->mic_mutex); - goto dp_uninit; + goto smpt_uninit; } - mutex_unlock(&mdev->mic_mutex); mic_bootparam_init(mdev); mic_create_debug_dir(mdev); - cdev_init(&mdev->cdev, &mic_fops); - mdev->cdev.owner = THIS_MODULE; - rc = cdev_add(&mdev->cdev, MKDEV(MAJOR(g_mic_devno), mdev->id), 1); + + mdev->miscdev.minor = MISC_DYNAMIC_MINOR; + snprintf(mdev->name, sizeof(mdev->name), "mic%d", mdev->id); + mdev->miscdev.name = mdev->name; + mdev->miscdev.fops = &mic_fops; + mdev->miscdev.parent = &mdev->pdev->dev; + rc = misc_register(&mdev->miscdev); if (rc) { - dev_err(&pdev->dev, "cdev_add err id %d rc %d\n", mdev->id, rc); + dev_err(&pdev->dev, "misc_register err id %d rc %d\n", + mdev->id, rc); goto cleanup_debug_dir; } - atomic_inc(&g_num_mics); + + mdev->cosm_dev = cosm_register_device(&mdev->pdev->dev, &cosm_hw_ops); + if (IS_ERR(mdev->cosm_dev)) { + rc = PTR_ERR(mdev->cosm_dev); + dev_err(&pdev->dev, "cosm_add_device failed rc %d\n", rc); + goto misc_dereg; + } return 0; +misc_dereg: + misc_deregister(&mdev->miscdev); cleanup_debug_dir: mic_delete_debug_dir(mdev); - mutex_lock(&mdev->mic_mutex); - mic_free_irq(mdev, mdev->shutdown_cookie, mdev); - mutex_unlock(&mdev->mic_mutex); -dp_uninit: mic_dp_uninit(mdev); -sysfs_put: - sysfs_put(mdev->state_sysfs); -destroy_device: - device_destroy(g_mic_class, MKDEV(MAJOR(g_mic_devno), mdev->id)); smpt_uninit: mic_smpt_uninit(mdev); free_interrupts: @@ -435,9 +298,7 @@ release_regions: pci_release_regions(pdev); disable_device: pci_disable_device(pdev); -uninit_device: - mic_device_uninit(mdev); -device_init_fail: +ida_remove: ida_simple_remove(&g_mic_ida, mdev->id); ida_fail: kfree(mdev); @@ -461,22 +322,14 @@ static void mic_remove(struct pci_dev *pdev) if (!mdev) return; - mic_stop(mdev, false); - atomic_dec(&g_num_mics); - cdev_del(&mdev->cdev); + cosm_unregister_device(mdev->cosm_dev); + misc_deregister(&mdev->miscdev); mic_delete_debug_dir(mdev); - mutex_lock(&mdev->mic_mutex); - mic_free_irq(mdev, mdev->shutdown_cookie, mdev); - mutex_unlock(&mdev->mic_mutex); - flush_work(&mdev->shutdown_work); mic_dp_uninit(mdev); - sysfs_put(mdev->state_sysfs); - device_destroy(g_mic_class, MKDEV(MAJOR(g_mic_devno), mdev->id)); mic_smpt_uninit(mdev); mic_free_interrupts(mdev, pdev); - iounmap(mdev->mmio.va); iounmap(mdev->aper.va); - mic_device_uninit(mdev); + iounmap(mdev->mmio.va); pci_release_regions(pdev); pci_disable_device(pdev); ida_simple_remove(&g_mic_ida, mdev->id); @@ -495,32 +348,23 @@ static int __init mic_init(void) int ret; ret = alloc_chrdev_region(&g_mic_devno, 0, - MIC_MAX_NUM_DEVS, mic_driver_name); + MIC_MAX_NUM_DEVS, mic_driver_name); if (ret) { pr_err("alloc_chrdev_region failed ret %d\n", ret); goto error; } - g_mic_class = class_create(THIS_MODULE, mic_driver_name); - if (IS_ERR(g_mic_class)) { - ret = PTR_ERR(g_mic_class); - pr_err("class_create failed ret %d\n", ret); - goto cleanup_chrdev; - } - mic_init_debugfs(); ida_init(&g_mic_ida); ret = pci_register_driver(&mic_driver); if (ret) { pr_err("pci_register_driver failed ret %d\n", ret); - goto cleanup_debugfs; + goto cleanup_chrdev; } return ret; -cleanup_debugfs: +cleanup_chrdev: ida_destroy(&g_mic_ida); mic_exit_debugfs(); - class_destroy(g_mic_class); -cleanup_chrdev: unregister_chrdev_region(g_mic_devno, MIC_MAX_NUM_DEVS); error: return ret; @@ -531,7 +375,6 @@ static void __exit mic_exit(void) pci_unregister_driver(&mic_driver); ida_destroy(&g_mic_ida); mic_exit_debugfs(); - class_destroy(g_mic_class); unregister_chrdev_region(g_mic_devno, MIC_MAX_NUM_DEVS); } diff --git a/drivers/misc/mic/host/mic_smpt.c b/drivers/misc/mic/host/mic_smpt.c index cec82034875f..c3f958580fb0 100644 --- a/drivers/misc/mic/host/mic_smpt.c +++ b/drivers/misc/mic/host/mic_smpt.c @@ -76,7 +76,7 @@ mic_is_system_addr(struct mic_device *mdev, dma_addr_t pa) /* Populate an SMPT entry and update the reference counts. */ static void mic_add_smpt_entry(int spt, s64 *ref, u64 addr, - int entries, struct mic_device *mdev) + int entries, struct mic_device *mdev) { struct mic_smpt_info *smpt_info = mdev->smpt; int i; @@ -97,7 +97,7 @@ static void mic_add_smpt_entry(int spt, s64 *ref, u64 addr, * for a given DMA address and size. */ static dma_addr_t mic_smpt_op(struct mic_device *mdev, u64 dma_addr, - int entries, s64 *ref, size_t size) + int entries, s64 *ref, size_t size) { int spt; int ae = 0; @@ -148,7 +148,7 @@ found: * and the starting smpt address */ static int mic_get_smpt_ref_count(struct mic_device *mdev, dma_addr_t dma_addr, - size_t size, s64 *ref, u64 *smpt_start) + size_t size, s64 *ref, u64 *smpt_start) { u64 start = dma_addr; u64 end = dma_addr + size; @@ -181,7 +181,7 @@ dma_addr_t mic_to_dma_addr(struct mic_device *mdev, dma_addr_t mic_addr) dma_addr_t dma_addr; if (!mic_is_system_addr(mdev, mic_addr)) { - dev_err(mdev->sdev->parent, + dev_err(&mdev->pdev->dev, "mic_addr is invalid. mic_addr = 0x%llx\n", mic_addr); return -EINVAL; } @@ -218,7 +218,7 @@ dma_addr_t mic_map(struct mic_device *mdev, dma_addr_t dma_addr, size_t size) return mic_addr; num_entries = mic_get_smpt_ref_count(mdev, dma_addr, size, - ref, &smpt_start); + ref, &smpt_start); /* Set the smpt table appropriately and get 16G aligned mic address */ mic_addr = mic_smpt_op(mdev, smpt_start, num_entries, ref, size); @@ -231,7 +231,7 @@ dma_addr_t mic_map(struct mic_device *mdev, dma_addr_t dma_addr, size_t size) * else generate mic_addr by adding the 16G offset in dma_addr */ if (!mic_addr && MIC_FAMILY_X100 == mdev->family) { - dev_err(mdev->sdev->parent, + dev_err(&mdev->pdev->dev, "mic_map failed dma_addr 0x%llx size 0x%lx\n", dma_addr, size); return mic_addr; @@ -264,7 +264,7 @@ void mic_unmap(struct mic_device *mdev, dma_addr_t mic_addr, size_t size) return; if (!mic_is_system_addr(mdev, mic_addr)) { - dev_err(mdev->sdev->parent, + dev_err(&mdev->pdev->dev, "invalid address: 0x%llx\n", mic_addr); return; } @@ -284,7 +284,7 @@ void mic_unmap(struct mic_device *mdev, dma_addr_t mic_addr, size_t size) for (i = spt; i < spt + num_smpt; i++) { smpt_info->entry[i].ref_count -= ref[i - spt]; if (smpt_info->entry[i].ref_count < 0) - dev_warn(mdev->sdev->parent, + dev_warn(&mdev->pdev->dev, "ref count for entry %d is negative\n", i); } spin_unlock_irqrestore(&smpt_info->smpt_lock, flags); @@ -307,15 +307,14 @@ void mic_unmap(struct mic_device *mdev, dma_addr_t mic_addr, size_t size) dma_addr_t mic_map_single(struct mic_device *mdev, void *va, size_t size) { dma_addr_t mic_addr = 0; - struct pci_dev *pdev = container_of(mdev->sdev->parent, - struct pci_dev, dev); + struct pci_dev *pdev = mdev->pdev; dma_addr_t dma_addr = pci_map_single(pdev, va, size, PCI_DMA_BIDIRECTIONAL); if (!pci_dma_mapping_error(pdev, dma_addr)) { mic_addr = mic_map(mdev, dma_addr, size); if (!mic_addr) { - dev_err(mdev->sdev->parent, + dev_err(&mdev->pdev->dev, "mic_map failed dma_addr 0x%llx size 0x%lx\n", dma_addr, size); pci_unmap_single(pdev, dma_addr, @@ -339,8 +338,7 @@ dma_addr_t mic_map_single(struct mic_device *mdev, void *va, size_t size) void mic_unmap_single(struct mic_device *mdev, dma_addr_t mic_addr, size_t size) { - struct pci_dev *pdev = container_of(mdev->sdev->parent, - struct pci_dev, dev); + struct pci_dev *pdev = mdev->pdev; dma_addr_t dma_addr = mic_to_dma_addr(mdev, mic_addr); mic_unmap(mdev, mic_addr, size); pci_unmap_single(pdev, dma_addr, size, PCI_DMA_BIDIRECTIONAL); @@ -399,18 +397,18 @@ void mic_smpt_uninit(struct mic_device *mdev) struct mic_smpt_info *smpt_info = mdev->smpt; int i; - dev_dbg(mdev->sdev->parent, + dev_dbg(&mdev->pdev->dev, "nodeid %d SMPT ref count %lld map %lld unmap %lld\n", mdev->id, smpt_info->ref_count, smpt_info->map_count, smpt_info->unmap_count); for (i = 0; i < smpt_info->info.num_reg; i++) { - dev_dbg(mdev->sdev->parent, + dev_dbg(&mdev->pdev->dev, "SMPT entry[%d] dma_addr = 0x%llx ref_count = %lld\n", i, smpt_info->entry[i].dma_addr, smpt_info->entry[i].ref_count); if (smpt_info->entry[i].ref_count) - dev_warn(mdev->sdev->parent, + dev_warn(&mdev->pdev->dev, "ref count for entry %d is not zero\n", i); } kfree(smpt_info->entry); diff --git a/drivers/misc/mic/host/mic_sysfs.c b/drivers/misc/mic/host/mic_sysfs.c deleted file mode 100644 index 6dd864e4a617..000000000000 --- a/drivers/misc/mic/host/mic_sysfs.c +++ /dev/null @@ -1,459 +0,0 @@ -/* - * Intel MIC Platform Software Stack (MPSS) - * - * Copyright(c) 2013 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * The full GNU General Public License is included in this distribution in - * the file called "COPYING". - * - * Intel MIC Host driver. - * - */ -#include <linux/pci.h> - -#include <linux/mic_common.h> -#include "../common/mic_dev.h" -#include "mic_device.h" - -/* - * A state-to-string lookup table, for exposing a human readable state - * via sysfs. Always keep in sync with enum mic_states - */ -static const char * const mic_state_string[] = { - [MIC_OFFLINE] = "offline", - [MIC_ONLINE] = "online", - [MIC_SHUTTING_DOWN] = "shutting_down", - [MIC_RESET_FAILED] = "reset_failed", - [MIC_SUSPENDING] = "suspending", - [MIC_SUSPENDED] = "suspended", -}; - -/* - * A shutdown-status-to-string lookup table, for exposing a human - * readable state via sysfs. Always keep in sync with enum mic_shutdown_status - */ -static const char * const mic_shutdown_status_string[] = { - [MIC_NOP] = "nop", - [MIC_CRASHED] = "crashed", - [MIC_HALTED] = "halted", - [MIC_POWER_OFF] = "poweroff", - [MIC_RESTART] = "restart", -}; - -void mic_set_shutdown_status(struct mic_device *mdev, u8 shutdown_status) -{ - dev_dbg(mdev->sdev->parent, "Shutdown Status %s -> %s\n", - mic_shutdown_status_string[mdev->shutdown_status], - mic_shutdown_status_string[shutdown_status]); - mdev->shutdown_status = shutdown_status; -} - -void mic_set_state(struct mic_device *mdev, u8 state) -{ - dev_dbg(mdev->sdev->parent, "State %s -> %s\n", - mic_state_string[mdev->state], - mic_state_string[state]); - mdev->state = state; - sysfs_notify_dirent(mdev->state_sysfs); -} - -static ssize_t -family_show(struct device *dev, struct device_attribute *attr, char *buf) -{ - static const char x100[] = "x100"; - static const char unknown[] = "Unknown"; - const char *card = NULL; - struct mic_device *mdev = dev_get_drvdata(dev->parent); - - if (!mdev) - return -EINVAL; - - switch (mdev->family) { - case MIC_FAMILY_X100: - card = x100; - break; - default: - card = unknown; - break; - } - return scnprintf(buf, PAGE_SIZE, "%s\n", card); -} -static DEVICE_ATTR_RO(family); - -static ssize_t -stepping_show(struct device *dev, struct device_attribute *attr, char *buf) -{ - struct mic_device *mdev = dev_get_drvdata(dev->parent); - char *string = "??"; - - if (!mdev) - return -EINVAL; - - switch (mdev->stepping) { - case MIC_A0_STEP: - string = "A0"; - break; - case MIC_B0_STEP: - string = "B0"; - break; - case MIC_B1_STEP: - string = "B1"; - break; - case MIC_C0_STEP: - string = "C0"; - break; - default: - break; - } - return scnprintf(buf, PAGE_SIZE, "%s\n", string); -} -static DEVICE_ATTR_RO(stepping); - -static ssize_t -state_show(struct device *dev, struct device_attribute *attr, char *buf) -{ - struct mic_device *mdev = dev_get_drvdata(dev->parent); - - if (!mdev || mdev->state >= MIC_LAST) - return -EINVAL; - - return scnprintf(buf, PAGE_SIZE, "%s\n", - mic_state_string[mdev->state]); -} - -static ssize_t -state_store(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count) -{ - int rc = 0; - struct mic_device *mdev = dev_get_drvdata(dev->parent); - if (!mdev) - return -EINVAL; - if (sysfs_streq(buf, "boot")) { - rc = mic_start(mdev, buf); - if (rc) { - dev_err(mdev->sdev->parent, - "mic_boot failed rc %d\n", rc); - count = rc; - } - goto done; - } - - if (sysfs_streq(buf, "reset")) { - schedule_work(&mdev->reset_trigger_work); - goto done; - } - - if (sysfs_streq(buf, "shutdown")) { - mic_shutdown(mdev); - goto done; - } - - if (sysfs_streq(buf, "suspend")) { - mic_suspend(mdev); - goto done; - } - - count = -EINVAL; -done: - return count; -} -static DEVICE_ATTR_RW(state); - -static ssize_t shutdown_status_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct mic_device *mdev = dev_get_drvdata(dev->parent); - - if (!mdev || mdev->shutdown_status >= MIC_STATUS_LAST) - return -EINVAL; - - return scnprintf(buf, PAGE_SIZE, "%s\n", - mic_shutdown_status_string[mdev->shutdown_status]); -} -static DEVICE_ATTR_RO(shutdown_status); - -static ssize_t -cmdline_show(struct device *dev, struct device_attribute *attr, char *buf) -{ - struct mic_device *mdev = dev_get_drvdata(dev->parent); - char *cmdline; - - if (!mdev) - return -EINVAL; - - cmdline = mdev->cmdline; - - if (cmdline) - return scnprintf(buf, PAGE_SIZE, "%s\n", cmdline); - return 0; -} - -static ssize_t -cmdline_store(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count) -{ - struct mic_device *mdev = dev_get_drvdata(dev->parent); - - if (!mdev) - return -EINVAL; - - mutex_lock(&mdev->mic_mutex); - kfree(mdev->cmdline); - - mdev->cmdline = kmalloc(count + 1, GFP_KERNEL); - if (!mdev->cmdline) { - count = -ENOMEM; - goto unlock; - } - - strncpy(mdev->cmdline, buf, count); - - if (mdev->cmdline[count - 1] == '\n') - mdev->cmdline[count - 1] = '\0'; - else - mdev->cmdline[count] = '\0'; -unlock: - mutex_unlock(&mdev->mic_mutex); - return count; -} -static DEVICE_ATTR_RW(cmdline); - -static ssize_t -firmware_show(struct device *dev, struct device_attribute *attr, char *buf) -{ - struct mic_device *mdev = dev_get_drvdata(dev->parent); - char *firmware; - - if (!mdev) - return -EINVAL; - - firmware = mdev->firmware; - - if (firmware) - return scnprintf(buf, PAGE_SIZE, "%s\n", firmware); - return 0; -} - -static ssize_t -firmware_store(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count) -{ - struct mic_device *mdev = dev_get_drvdata(dev->parent); - - if (!mdev) - return -EINVAL; - - mutex_lock(&mdev->mic_mutex); - kfree(mdev->firmware); - - mdev->firmware = kmalloc(count + 1, GFP_KERNEL); - if (!mdev->firmware) { - count = -ENOMEM; - goto unlock; - } - strncpy(mdev->firmware, buf, count); - - if (mdev->firmware[count - 1] == '\n') - mdev->firmware[count - 1] = '\0'; - else - mdev->firmware[count] = '\0'; -unlock: - mutex_unlock(&mdev->mic_mutex); - return count; -} -static DEVICE_ATTR_RW(firmware); - -static ssize_t -ramdisk_show(struct device *dev, struct device_attribute *attr, char *buf) -{ - struct mic_device *mdev = dev_get_drvdata(dev->parent); - char *ramdisk; - - if (!mdev) - return -EINVAL; - - ramdisk = mdev->ramdisk; - - if (ramdisk) - return scnprintf(buf, PAGE_SIZE, "%s\n", ramdisk); - return 0; -} - -static ssize_t -ramdisk_store(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count) -{ - struct mic_device *mdev = dev_get_drvdata(dev->parent); - - if (!mdev) - return -EINVAL; - - mutex_lock(&mdev->mic_mutex); - kfree(mdev->ramdisk); - - mdev->ramdisk = kmalloc(count + 1, GFP_KERNEL); - if (!mdev->ramdisk) { - count = -ENOMEM; - goto unlock; - } - - strncpy(mdev->ramdisk, buf, count); - - if (mdev->ramdisk[count - 1] == '\n') - mdev->ramdisk[count - 1] = '\0'; - else - mdev->ramdisk[count] = '\0'; -unlock: - mutex_unlock(&mdev->mic_mutex); - return count; -} -static DEVICE_ATTR_RW(ramdisk); - -static ssize_t -bootmode_show(struct device *dev, struct device_attribute *attr, char *buf) -{ - struct mic_device *mdev = dev_get_drvdata(dev->parent); - char *bootmode; - - if (!mdev) - return -EINVAL; - - bootmode = mdev->bootmode; - - if (bootmode) - return scnprintf(buf, PAGE_SIZE, "%s\n", bootmode); - return 0; -} - -static ssize_t -bootmode_store(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count) -{ - struct mic_device *mdev = dev_get_drvdata(dev->parent); - - if (!mdev) - return -EINVAL; - - if (!sysfs_streq(buf, "linux") && !sysfs_streq(buf, "elf")) - return -EINVAL; - - mutex_lock(&mdev->mic_mutex); - kfree(mdev->bootmode); - - mdev->bootmode = kmalloc(count + 1, GFP_KERNEL); - if (!mdev->bootmode) { - count = -ENOMEM; - goto unlock; - } - - strncpy(mdev->bootmode, buf, count); - - if (mdev->bootmode[count - 1] == '\n') - mdev->bootmode[count - 1] = '\0'; - else - mdev->bootmode[count] = '\0'; -unlock: - mutex_unlock(&mdev->mic_mutex); - return count; -} -static DEVICE_ATTR_RW(bootmode); - -static ssize_t -log_buf_addr_show(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct mic_device *mdev = dev_get_drvdata(dev->parent); - - if (!mdev) - return -EINVAL; - - return scnprintf(buf, PAGE_SIZE, "%p\n", mdev->log_buf_addr); -} - -static ssize_t -log_buf_addr_store(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count) -{ - struct mic_device *mdev = dev_get_drvdata(dev->parent); - int ret; - unsigned long addr; - - if (!mdev) - return -EINVAL; - - ret = kstrtoul(buf, 16, &addr); - if (ret) - goto exit; - - mdev->log_buf_addr = (void *)addr; - ret = count; -exit: - return ret; -} -static DEVICE_ATTR_RW(log_buf_addr); - -static ssize_t -log_buf_len_show(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct mic_device *mdev = dev_get_drvdata(dev->parent); - - if (!mdev) - return -EINVAL; - - return scnprintf(buf, PAGE_SIZE, "%p\n", mdev->log_buf_len); -} - -static ssize_t -log_buf_len_store(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count) -{ - struct mic_device *mdev = dev_get_drvdata(dev->parent); - int ret; - unsigned long addr; - - if (!mdev) - return -EINVAL; - - ret = kstrtoul(buf, 16, &addr); - if (ret) - goto exit; - - mdev->log_buf_len = (int *)addr; - ret = count; -exit: - return ret; -} -static DEVICE_ATTR_RW(log_buf_len); - -static struct attribute *mic_default_attrs[] = { - &dev_attr_family.attr, - &dev_attr_stepping.attr, - &dev_attr_state.attr, - &dev_attr_shutdown_status.attr, - &dev_attr_cmdline.attr, - &dev_attr_firmware.attr, - &dev_attr_ramdisk.attr, - &dev_attr_bootmode.attr, - &dev_attr_log_buf_addr.attr, - &dev_attr_log_buf_len.attr, - - NULL -}; - -ATTRIBUTE_GROUPS(mic_default); - -void mic_sysfs_init(struct mic_device *mdev) -{ - mdev->attr_group = mic_default_groups; -} diff --git a/drivers/misc/mic/host/mic_virtio.c b/drivers/misc/mic/host/mic_virtio.c index cc08e9f733c9..58b107a24a8b 100644 --- a/drivers/misc/mic/host/mic_virtio.c +++ b/drivers/misc/mic/host/mic_virtio.c @@ -23,7 +23,6 @@ #include <linux/uaccess.h> #include <linux/dmaengine.h> #include <linux/mic_common.h> - #include "../common/mic_dev.h" #include "mic_device.h" #include "mic_smpt.h" @@ -62,7 +61,7 @@ static int mic_sync_dma(struct mic_device *mdev, dma_addr_t dst, } error: if (err) - dev_err(mdev->sdev->parent, "%s %d err %d\n", + dev_err(&mdev->pdev->dev, "%s %d err %d\n", __func__, __LINE__, err); return err; } @@ -440,7 +439,7 @@ void mic_virtio_reset_devices(struct mic_device *mdev) struct list_head *pos, *tmp; struct mic_vdev *mvdev; - dev_dbg(mdev->sdev->parent, "%s\n", __func__); + dev_dbg(&mdev->pdev->dev, "%s\n", __func__); list_for_each_safe(pos, tmp, &mdev->vdev_list) { mvdev = list_entry(pos, struct mic_vdev, list); @@ -686,7 +685,7 @@ int mic_virtio_add_device(struct mic_vdev *mvdev, mvr->head = USHRT_MAX; mvr->mvdev = mvdev; mvr->vrh.notify = mic_notify; - dev_dbg(mdev->sdev->parent, + dev_dbg(&mdev->pdev->dev, "%s %d index %d va %p info %p vr_size 0x%x\n", __func__, __LINE__, i, vr->va, vr->info, vr_size); mvr->buf = (void *)__get_free_pages(GFP_KERNEL, @@ -704,7 +703,7 @@ int mic_virtio_add_device(struct mic_vdev *mvdev, mvdev->virtio_db, MIC_INTR_DB); if (IS_ERR(mvdev->virtio_cookie)) { ret = PTR_ERR(mvdev->virtio_cookie); - dev_dbg(mdev->sdev->parent, "request irq failed\n"); + dev_dbg(&mdev->pdev->dev, "request irq failed\n"); goto err; } @@ -720,7 +719,7 @@ int mic_virtio_add_device(struct mic_vdev *mvdev, smp_wmb(); dd->type = type; - dev_dbg(mdev->sdev->parent, "Added virtio device id %d\n", dd->type); + dev_dbg(&mdev->pdev->dev, "Added virtio device id %d\n", dd->type); db = bootparam->h2c_config_db; if (db != -1) @@ -755,7 +754,7 @@ void mic_virtio_del_device(struct mic_vdev *mvdev) db = bootparam->h2c_config_db; if (db == -1) goto skip_hot_remove; - dev_dbg(mdev->sdev->parent, + dev_dbg(&mdev->pdev->dev, "Requesting hot remove id %d\n", mvdev->virtio_id); mvdev->dc->config_change = MIC_VIRTIO_PARAM_DEV_REMOVE; mdev->ops->send_intr(mdev, db); @@ -765,7 +764,7 @@ void mic_virtio_del_device(struct mic_vdev *mvdev) if (ret) break; } - dev_dbg(mdev->sdev->parent, + dev_dbg(&mdev->pdev->dev, "Device id %d config_change %d guest_ack %d retry %d\n", mvdev->virtio_id, mvdev->dc->config_change, mvdev->dc->guest_ack, retry); @@ -794,7 +793,7 @@ skip_hot_remove: tmp_mvdev = list_entry(pos, struct mic_vdev, list); if (tmp_mvdev == mvdev) { list_del(pos); - dev_dbg(mdev->sdev->parent, + dev_dbg(&mdev->pdev->dev, "Removing virtio device id %d\n", mvdev->virtio_id); break; diff --git a/drivers/misc/mic/host/mic_virtio.h b/drivers/misc/mic/host/mic_virtio.h index d574efb853d9..a80631f2790d 100644 --- a/drivers/misc/mic/host/mic_virtio.h +++ b/drivers/misc/mic/host/mic_virtio.h @@ -124,7 +124,7 @@ void mic_bh_handler(struct work_struct *work); /* Helper API to obtain the MIC PCIe device */ static inline struct device *mic_dev(struct mic_vdev *mvdev) { - return mvdev->mdev->sdev->parent; + return &mvdev->mdev->pdev->dev; } /* Helper API to check if a virtio device is initialized */ diff --git a/drivers/misc/mic/host/mic_x100.c b/drivers/misc/mic/host/mic_x100.c index 3341e90dede4..8118ac48c764 100644 --- a/drivers/misc/mic/host/mic_x100.c +++ b/drivers/misc/mic/host/mic_x100.c @@ -43,7 +43,7 @@ static void mic_x100_write_spad(struct mic_device *mdev, unsigned int idx, u32 val) { - dev_dbg(mdev->sdev->parent, "Writing 0x%x to scratch pad index %d\n", + dev_dbg(&mdev->pdev->dev, "Writing 0x%x to scratch pad index %d\n", val, idx); mic_mmio_write(&mdev->mmio, val, MIC_X100_SBOX_BASE_ADDRESS + @@ -66,7 +66,7 @@ mic_x100_read_spad(struct mic_device *mdev, unsigned int idx) MIC_X100_SBOX_BASE_ADDRESS + MIC_X100_SBOX_SPAD0 + idx * 4); - dev_dbg(mdev->sdev->parent, + dev_dbg(&mdev->pdev->dev, "Reading 0x%x from scratch pad index %d\n", val, idx); return val; } @@ -126,7 +126,7 @@ static void mic_x100_disable_interrupts(struct mic_device *mdev) * @mdev: pointer to mic_device instance */ static void mic_x100_send_sbox_intr(struct mic_device *mdev, - int doorbell) + int doorbell) { struct mic_mw *mw = &mdev->mmio; u64 apic_icr_offset = MIC_X100_SBOX_APICICR0 + doorbell * 8; @@ -147,7 +147,7 @@ static void mic_x100_send_sbox_intr(struct mic_device *mdev, * @mdev: pointer to mic_device instance */ static void mic_x100_send_rdmasr_intr(struct mic_device *mdev, - int doorbell) + int doorbell) { int rdmasr_offset = MIC_X100_SBOX_RDMASR0 + (doorbell << 2); /* Ensure that the interrupt is ordered w.r.t. previous stores. */ @@ -359,15 +359,14 @@ mic_x100_load_command_line(struct mic_device *mdev, const struct firmware *fw) boot_mem = mdev->aper.len >> 20; buf = kzalloc(CMDLINE_SIZE, GFP_KERNEL); - if (!buf) { - dev_err(mdev->sdev->parent, - "%s %d allocation failed\n", __func__, __LINE__); + if (!buf) return -ENOMEM; - } + len += snprintf(buf, CMDLINE_SIZE - len, " mem=%dM", boot_mem); - if (mdev->cmdline) - snprintf(buf + len, CMDLINE_SIZE - len, " %s", mdev->cmdline); + if (mdev->cosm_dev->cmdline) + snprintf(buf + len, CMDLINE_SIZE - len, " %s", + mdev->cosm_dev->cmdline); memcpy_toio(cmd_line_va, buf, strlen(buf) + 1); kfree(buf); return 0; @@ -386,12 +385,11 @@ mic_x100_load_ramdisk(struct mic_device *mdev) int rc; struct boot_params __iomem *bp = mdev->aper.va + mdev->bootaddr; - rc = request_firmware(&fw, - mdev->ramdisk, mdev->sdev->parent); + rc = request_firmware(&fw, mdev->cosm_dev->ramdisk, &mdev->pdev->dev); if (rc < 0) { - dev_err(mdev->sdev->parent, + dev_err(&mdev->pdev->dev, "ramdisk request_firmware failed: %d %s\n", - rc, mdev->ramdisk); + rc, mdev->cosm_dev->ramdisk); goto error; } /* @@ -423,10 +421,10 @@ mic_x100_get_boot_addr(struct mic_device *mdev) scratch2 = mdev->ops->read_spad(mdev, MIC_X100_DOWNLOAD_INFO); boot_addr = MIC_X100_SPAD2_DOWNLOAD_ADDR(scratch2); - dev_dbg(mdev->sdev->parent, "%s %d boot_addr 0x%x\n", + dev_dbg(&mdev->pdev->dev, "%s %d boot_addr 0x%x\n", __func__, __LINE__, boot_addr); if (boot_addr > (1 << 31)) { - dev_err(mdev->sdev->parent, + dev_err(&mdev->pdev->dev, "incorrect bootaddr 0x%x\n", boot_addr); rc = -EINVAL; @@ -454,37 +452,37 @@ mic_x100_load_firmware(struct mic_device *mdev, const char *buf) if (rc) goto error; /* load OS */ - rc = request_firmware(&fw, mdev->firmware, mdev->sdev->parent); + rc = request_firmware(&fw, mdev->cosm_dev->firmware, &mdev->pdev->dev); if (rc < 0) { - dev_err(mdev->sdev->parent, + dev_err(&mdev->pdev->dev, "ramdisk request_firmware failed: %d %s\n", - rc, mdev->firmware); + rc, mdev->cosm_dev->firmware); goto error; } if (mdev->bootaddr > mdev->aper.len - fw->size) { rc = -EINVAL; - dev_err(mdev->sdev->parent, "%s %d rc %d bootaddr 0x%x\n", + dev_err(&mdev->pdev->dev, "%s %d rc %d bootaddr 0x%x\n", __func__, __LINE__, rc, mdev->bootaddr); release_firmware(fw); goto error; } memcpy_toio(mdev->aper.va + mdev->bootaddr, fw->data, fw->size); mdev->ops->write_spad(mdev, MIC_X100_FW_SIZE, fw->size); - if (!strcmp(mdev->bootmode, "elf")) + if (!strcmp(mdev->cosm_dev->bootmode, "flash")) goto done; /* load command line */ rc = mic_x100_load_command_line(mdev, fw); if (rc) { - dev_err(mdev->sdev->parent, "%s %d rc %d\n", + dev_err(&mdev->pdev->dev, "%s %d rc %d\n", __func__, __LINE__, rc); goto error; } release_firmware(fw); /* load ramdisk */ - if (mdev->ramdisk) + if (mdev->cosm_dev->ramdisk) rc = mic_x100_load_ramdisk(mdev); error: - dev_dbg(mdev->sdev->parent, "%s %d rc %d\n", __func__, __LINE__, rc); + dev_dbg(&mdev->pdev->dev, "%s %d rc %d\n", __func__, __LINE__, rc); done: return rc; } diff --git a/drivers/misc/mic/scif/Makefile b/drivers/misc/mic/scif/Makefile index bf10bb7e2b91..29cfc3e51ac9 100644 --- a/drivers/misc/mic/scif/Makefile +++ b/drivers/misc/mic/scif/Makefile @@ -13,3 +13,8 @@ scif-objs += scif_epd.o scif-objs += scif_rb.o scif-objs += scif_nodeqp.o scif-objs += scif_nm.o +scif-objs += scif_dma.o +scif-objs += scif_fence.o +scif-objs += scif_mmap.o +scif-objs += scif_rma.o +scif-objs += scif_rma_list.o diff --git a/drivers/misc/mic/scif/scif_api.c b/drivers/misc/mic/scif/scif_api.c index f39d3135a9ef..ddc9e4b08b5c 100644 --- a/drivers/misc/mic/scif/scif_api.c +++ b/drivers/misc/mic/scif/scif_api.c @@ -37,9 +37,21 @@ enum conn_async_state { ASYNC_CONN_FLUSH_WORK /* async work flush in progress */ }; +/* + * File operations for anonymous inode file associated with a SCIF endpoint, + * used in kernel mode SCIF poll. Kernel mode SCIF poll calls portions of the + * poll API in the kernel and these take in a struct file *. Since a struct + * file is not available to kernel mode SCIF, it uses an anonymous file for + * this purpose. + */ +const struct file_operations scif_anon_fops = { + .owner = THIS_MODULE, +}; + scif_epd_t scif_open(void) { struct scif_endpt *ep; + int err; might_sleep(); ep = kzalloc(sizeof(*ep), GFP_KERNEL); @@ -50,15 +62,22 @@ scif_epd_t scif_open(void) if (!ep->qp_info.qp) goto err_qp_alloc; + err = scif_anon_inode_getfile(ep); + if (err) + goto err_anon_inode; + spin_lock_init(&ep->lock); mutex_init(&ep->sendlock); mutex_init(&ep->recvlock); + scif_rma_ep_init(ep); ep->state = SCIFEP_UNBOUND; dev_dbg(scif_info.mdev.this_device, "SCIFAPI open: ep %p success\n", ep); return ep; +err_anon_inode: + kfree(ep->qp_info.qp); err_qp_alloc: kfree(ep); err_ep_alloc: @@ -166,8 +185,11 @@ int scif_close(scif_epd_t epd) switch (oldstate) { case SCIFEP_ZOMBIE: + dev_err(scif_info.mdev.this_device, + "SCIFAPI close: zombie state unexpected\n"); case SCIFEP_DISCONNECTED: spin_unlock(&ep->lock); + scif_unregister_all_windows(epd); /* Remove from the disconnected list */ mutex_lock(&scif_info.connlock); list_for_each_safe(pos, tmpq, &scif_info.disconnected) { @@ -189,6 +211,7 @@ int scif_close(scif_epd_t epd) case SCIFEP_CLOSING: { spin_unlock(&ep->lock); + scif_unregister_all_windows(epd); scif_disconnect_ep(ep); break; } @@ -200,7 +223,7 @@ int scif_close(scif_epd_t epd) struct scif_endpt *aep; spin_unlock(&ep->lock); - spin_lock(&scif_info.eplock); + mutex_lock(&scif_info.eplock); /* remove from listen list */ list_for_each_safe(pos, tmpq, &scif_info.listen) { @@ -222,7 +245,7 @@ int scif_close(scif_epd_t epd) break; } } - spin_unlock(&scif_info.eplock); + mutex_unlock(&scif_info.eplock); mutex_lock(&scif_info.connlock); list_for_each_safe(pos, tmpq, &scif_info.connected) { tmpep = list_entry(pos, @@ -242,13 +265,13 @@ int scif_close(scif_epd_t epd) } mutex_unlock(&scif_info.connlock); scif_teardown_ep(aep); - spin_lock(&scif_info.eplock); + mutex_lock(&scif_info.eplock); scif_add_epd_to_zombie_list(aep, SCIF_EPLOCK_HELD); ep->acceptcnt--; } spin_lock(&ep->lock); - spin_unlock(&scif_info.eplock); + mutex_unlock(&scif_info.eplock); /* Remove and reject any pending connection requests. */ while (ep->conreqcnt) { @@ -279,6 +302,7 @@ int scif_close(scif_epd_t epd) } } scif_put_port(ep->port.port); + scif_anon_inode_fput(ep); scif_teardown_ep(ep); scif_add_epd_to_zombie_list(ep, !SCIF_EPLOCK_HELD); return 0; @@ -409,9 +433,9 @@ int scif_listen(scif_epd_t epd, int backlog) scif_teardown_ep(ep); ep->qp_info.qp = NULL; - spin_lock(&scif_info.eplock); + mutex_lock(&scif_info.eplock); list_add_tail(&ep->list, &scif_info.listen); - spin_unlock(&scif_info.eplock); + mutex_unlock(&scif_info.eplock); return 0; } EXPORT_SYMBOL_GPL(scif_listen); @@ -450,6 +474,13 @@ static int scif_conn_func(struct scif_endpt *ep) struct scifmsg msg; struct device *spdev; + err = scif_reserve_dma_chan(ep); + if (err) { + dev_err(&ep->remote_dev->sdev->dev, + "%s %d err %d\n", __func__, __LINE__, err); + ep->state = SCIFEP_BOUND; + goto connect_error_simple; + } /* Initiate the first part of the endpoint QP setup */ err = scif_setup_qp_connect(ep->qp_info.qp, &ep->qp_info.qp_offset, SCIF_ENDPT_QP_SIZE, ep->remote_dev); @@ -558,8 +589,10 @@ void scif_conn_handler(struct work_struct *work) list_del(&ep->conn_list); } spin_unlock(&scif_info.nb_connect_lock); - if (ep) + if (ep) { ep->conn_err = scif_conn_func(ep); + wake_up_interruptible(&ep->conn_pend_wq); + } } while (ep); } @@ -660,6 +693,7 @@ int __scif_connect(scif_epd_t epd, struct scif_port_id *dst, bool non_block) ep->remote_dev = &scif_dev[dst->node]; ep->qp_info.qp->magic = SCIFEP_MAGIC; if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) { + init_waitqueue_head(&ep->conn_pend_wq); spin_lock(&scif_info.nb_connect_lock); list_add_tail(&ep->conn_list, &scif_info.nb_connect_list); spin_unlock(&scif_info.nb_connect_lock); @@ -782,12 +816,25 @@ retry_connection: cep->remote_dev = &scif_dev[peer->node]; cep->remote_ep = conreq->msg.payload[0]; + scif_rma_ep_init(cep); + + err = scif_reserve_dma_chan(cep); + if (err) { + dev_err(scif_info.mdev.this_device, + "%s %d err %d\n", __func__, __LINE__, err); + goto scif_accept_error_qpalloc; + } + cep->qp_info.qp = kzalloc(sizeof(*cep->qp_info.qp), GFP_KERNEL); if (!cep->qp_info.qp) { err = -ENOMEM; goto scif_accept_error_qpalloc; } + err = scif_anon_inode_getfile(cep); + if (err) + goto scif_accept_error_anon_inode; + cep->qp_info.qp->magic = SCIFEP_MAGIC; spdev = scif_get_peer_dev(cep->remote_dev); if (IS_ERR(spdev)) { @@ -858,6 +905,8 @@ retry: spin_unlock(&cep->lock); return 0; scif_accept_error_map: + scif_anon_inode_fput(cep); +scif_accept_error_anon_inode: scif_teardown_ep(cep); scif_accept_error_qpalloc: kfree(cep); @@ -1247,6 +1296,134 @@ int scif_recv(scif_epd_t epd, void *msg, int len, int flags) } EXPORT_SYMBOL_GPL(scif_recv); +static inline void _scif_poll_wait(struct file *f, wait_queue_head_t *wq, + poll_table *p, struct scif_endpt *ep) +{ + /* + * Because poll_wait makes a GFP_KERNEL allocation, give up the lock + * and regrab it afterwards. Because the endpoint state might have + * changed while the lock was given up, the state must be checked + * again after re-acquiring the lock. The code in __scif_pollfd(..) + * does this. + */ + spin_unlock(&ep->lock); + poll_wait(f, wq, p); + spin_lock(&ep->lock); +} + +unsigned int +__scif_pollfd(struct file *f, poll_table *wait, struct scif_endpt *ep) +{ + unsigned int mask = 0; + + dev_dbg(scif_info.mdev.this_device, + "SCIFAPI pollfd: ep %p %s\n", ep, scif_ep_states[ep->state]); + + spin_lock(&ep->lock); + + /* Endpoint is waiting for a non-blocking connect to complete */ + if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) { + _scif_poll_wait(f, &ep->conn_pend_wq, wait, ep); + if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) { + if (ep->state == SCIFEP_CONNECTED || + ep->state == SCIFEP_DISCONNECTED || + ep->conn_err) + mask |= POLLOUT; + goto exit; + } + } + + /* Endpoint is listening for incoming connection requests */ + if (ep->state == SCIFEP_LISTENING) { + _scif_poll_wait(f, &ep->conwq, wait, ep); + if (ep->state == SCIFEP_LISTENING) { + if (ep->conreqcnt) + mask |= POLLIN; + goto exit; + } + } + + /* Endpoint is connected or disconnected */ + if (ep->state == SCIFEP_CONNECTED || ep->state == SCIFEP_DISCONNECTED) { + if (poll_requested_events(wait) & POLLIN) + _scif_poll_wait(f, &ep->recvwq, wait, ep); + if (poll_requested_events(wait) & POLLOUT) + _scif_poll_wait(f, &ep->sendwq, wait, ep); + if (ep->state == SCIFEP_CONNECTED || + ep->state == SCIFEP_DISCONNECTED) { + /* Data can be read without blocking */ + if (scif_rb_count(&ep->qp_info.qp->inbound_q, 1)) + mask |= POLLIN; + /* Data can be written without blocking */ + if (scif_rb_space(&ep->qp_info.qp->outbound_q)) + mask |= POLLOUT; + /* Return POLLHUP if endpoint is disconnected */ + if (ep->state == SCIFEP_DISCONNECTED) + mask |= POLLHUP; + goto exit; + } + } + + /* Return POLLERR if the endpoint is in none of the above states */ + mask |= POLLERR; +exit: + spin_unlock(&ep->lock); + return mask; +} + +/** + * scif_poll() - Kernel mode SCIF poll + * @ufds: Array of scif_pollepd structures containing the end points + * and events to poll on + * @nfds: Size of the ufds array + * @timeout_msecs: Timeout in msecs, -ve implies infinite timeout + * + * The code flow in this function is based on do_poll(..) in select.c + * + * Returns the number of endpoints which have pending events or 0 in + * the event of a timeout. If a signal is used for wake up, -EINTR is + * returned. + */ +int +scif_poll(struct scif_pollepd *ufds, unsigned int nfds, long timeout_msecs) +{ + struct poll_wqueues table; + poll_table *pt; + int i, mask, count = 0, timed_out = timeout_msecs == 0; + u64 timeout = timeout_msecs < 0 ? MAX_SCHEDULE_TIMEOUT + : msecs_to_jiffies(timeout_msecs); + + poll_initwait(&table); + pt = &table.pt; + while (1) { + for (i = 0; i < nfds; i++) { + pt->_key = ufds[i].events | POLLERR | POLLHUP; + mask = __scif_pollfd(ufds[i].epd->anon, + pt, ufds[i].epd); + mask &= ufds[i].events | POLLERR | POLLHUP; + if (mask) { + count++; + pt->_qproc = NULL; + } + ufds[i].revents = mask; + } + pt->_qproc = NULL; + if (!count) { + count = table.error; + if (signal_pending(current)) + count = -EINTR; + } + if (count || timed_out) + break; + + if (!schedule_timeout_interruptible(timeout)) + timed_out = 1; + } + poll_freewait(&table); + return count; +} +EXPORT_SYMBOL_GPL(scif_poll); + int scif_get_node_ids(u16 *nodes, int len, u16 *self) { int online = 0; @@ -1274,3 +1451,46 @@ int scif_get_node_ids(u16 *nodes, int len, u16 *self) return online; } EXPORT_SYMBOL_GPL(scif_get_node_ids); + +static int scif_add_client_dev(struct device *dev, struct subsys_interface *si) +{ + struct scif_client *client = + container_of(si, struct scif_client, si); + struct scif_peer_dev *spdev = + container_of(dev, struct scif_peer_dev, dev); + + if (client->probe) + client->probe(spdev); + return 0; +} + +static void scif_remove_client_dev(struct device *dev, + struct subsys_interface *si) +{ + struct scif_client *client = + container_of(si, struct scif_client, si); + struct scif_peer_dev *spdev = + container_of(dev, struct scif_peer_dev, dev); + + if (client->remove) + client->remove(spdev); +} + +void scif_client_unregister(struct scif_client *client) +{ + subsys_interface_unregister(&client->si); +} +EXPORT_SYMBOL_GPL(scif_client_unregister); + +int scif_client_register(struct scif_client *client) +{ + struct subsys_interface *si = &client->si; + + si->name = client->name; + si->subsys = &scif_peer_bus; + si->add_dev = scif_add_client_dev; + si->remove_dev = scif_remove_client_dev; + + return subsys_interface_register(&client->si); +} +EXPORT_SYMBOL_GPL(scif_client_register); diff --git a/drivers/misc/mic/scif/scif_debugfs.c b/drivers/misc/mic/scif/scif_debugfs.c index 51f14e2a1196..6884dad97e17 100644 --- a/drivers/misc/mic/scif/scif_debugfs.c +++ b/drivers/misc/mic/scif/scif_debugfs.c @@ -62,10 +62,87 @@ static const struct file_operations scif_dev_ops = { .release = scif_dev_test_release }; -void __init scif_init_debugfs(void) +static void scif_display_window(struct scif_window *window, struct seq_file *s) +{ + int j; + struct scatterlist *sg; + scif_pinned_pages_t pin = window->pinned_pages; + + seq_printf(s, "window %p type %d temp %d offset 0x%llx ", + window, window->type, window->temp, window->offset); + seq_printf(s, "nr_pages 0x%llx nr_contig_chunks 0x%x prot %d ", + window->nr_pages, window->nr_contig_chunks, window->prot); + seq_printf(s, "ref_count %d magic 0x%llx peer_window 0x%llx ", + window->ref_count, window->magic, window->peer_window); + seq_printf(s, "unreg_state 0x%x va_for_temp 0x%lx\n", + window->unreg_state, window->va_for_temp); + + for (j = 0; j < window->nr_contig_chunks; j++) + seq_printf(s, "page[%d] dma_addr 0x%llx num_pages 0x%llx\n", j, + window->dma_addr[j], window->num_pages[j]); + + if (window->type == SCIF_WINDOW_SELF && pin) + for (j = 0; j < window->nr_pages; j++) + seq_printf(s, "page[%d] = pinned_pages %p address %p\n", + j, pin->pages[j], + page_address(pin->pages[j])); + + if (window->st) + for_each_sg(window->st->sgl, sg, window->st->nents, j) + seq_printf(s, "sg[%d] dma addr 0x%llx length 0x%x\n", + j, sg_dma_address(sg), sg_dma_len(sg)); +} + +static void scif_display_all_windows(struct list_head *head, struct seq_file *s) { - struct dentry *d; + struct list_head *item; + struct scif_window *window; + list_for_each(item, head) { + window = list_entry(item, struct scif_window, list); + scif_display_window(window, s); + } +} + +static int scif_rma_test(struct seq_file *s, void *unused) +{ + struct scif_endpt *ep; + struct list_head *pos; + + mutex_lock(&scif_info.connlock); + list_for_each(pos, &scif_info.connected) { + ep = list_entry(pos, struct scif_endpt, list); + seq_printf(s, "ep %p self windows\n", ep); + mutex_lock(&ep->rma_info.rma_lock); + scif_display_all_windows(&ep->rma_info.reg_list, s); + seq_printf(s, "ep %p remote windows\n", ep); + scif_display_all_windows(&ep->rma_info.remote_reg_list, s); + mutex_unlock(&ep->rma_info.rma_lock); + } + mutex_unlock(&scif_info.connlock); + return 0; +} + +static int scif_rma_test_open(struct inode *inode, struct file *file) +{ + return single_open(file, scif_rma_test, inode->i_private); +} + +static int scif_rma_test_release(struct inode *inode, struct file *file) +{ + return single_release(inode, file); +} + +static const struct file_operations scif_rma_ops = { + .owner = THIS_MODULE, + .open = scif_rma_test_open, + .read = seq_read, + .llseek = seq_lseek, + .release = scif_rma_test_release +}; + +void __init scif_init_debugfs(void) +{ scif_dbg = debugfs_create_dir(KBUILD_MODNAME, NULL); if (!scif_dbg) { dev_err(scif_info.mdev.this_device, @@ -73,8 +150,8 @@ void __init scif_init_debugfs(void) return; } - d = debugfs_create_file("scif_dev", 0444, scif_dbg, - NULL, &scif_dev_ops); + debugfs_create_file("scif_dev", 0444, scif_dbg, NULL, &scif_dev_ops); + debugfs_create_file("scif_rma", 0444, scif_dbg, NULL, &scif_rma_ops); debugfs_create_u8("en_msg_log", 0666, scif_dbg, &scif_info.en_msg_log); debugfs_create_u8("p2p_enable", 0666, scif_dbg, &scif_info.p2p_enable); } diff --git a/drivers/misc/mic/scif/scif_dma.c b/drivers/misc/mic/scif/scif_dma.c new file mode 100644 index 000000000000..95a13c629a8e --- /dev/null +++ b/drivers/misc/mic/scif/scif_dma.c @@ -0,0 +1,1979 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Intel SCIF driver. + * + */ +#include "scif_main.h" +#include "scif_map.h" + +/* + * struct scif_dma_comp_cb - SCIF DMA completion callback + * + * @dma_completion_func: DMA completion callback + * @cb_cookie: DMA completion callback cookie + * @temp_buf: Temporary buffer + * @temp_buf_to_free: Temporary buffer to be freed + * @is_cache: Is a kmem_cache allocated buffer + * @dst_offset: Destination registration offset + * @dst_window: Destination registration window + * @len: Length of the temp buffer + * @temp_phys: DMA address of the temp buffer + * @sdev: The SCIF device + * @header_padding: padding for cache line alignment + */ +struct scif_dma_comp_cb { + void (*dma_completion_func)(void *cookie); + void *cb_cookie; + u8 *temp_buf; + u8 *temp_buf_to_free; + bool is_cache; + s64 dst_offset; + struct scif_window *dst_window; + size_t len; + dma_addr_t temp_phys; + struct scif_dev *sdev; + int header_padding; +}; + +/** + * struct scif_copy_work - Work for DMA copy + * + * @src_offset: Starting source offset + * @dst_offset: Starting destination offset + * @src_window: Starting src registered window + * @dst_window: Starting dst registered window + * @loopback: true if this is a loopback DMA transfer + * @len: Length of the transfer + * @comp_cb: DMA copy completion callback + * @remote_dev: The remote SCIF peer device + * @fence_type: polling or interrupt based + * @ordered: is this a tail byte ordered DMA transfer + */ +struct scif_copy_work { + s64 src_offset; + s64 dst_offset; + struct scif_window *src_window; + struct scif_window *dst_window; + int loopback; + size_t len; + struct scif_dma_comp_cb *comp_cb; + struct scif_dev *remote_dev; + int fence_type; + bool ordered; +}; + +#ifndef list_entry_next +#define list_entry_next(pos, member) \ + list_entry(pos->member.next, typeof(*pos), member) +#endif + +/** + * scif_reserve_dma_chan: + * @ep: Endpoint Descriptor. + * + * This routine reserves a DMA channel for a particular + * endpoint. All DMA transfers for an endpoint are always + * programmed on the same DMA channel. + */ +int scif_reserve_dma_chan(struct scif_endpt *ep) +{ + int err = 0; + struct scif_dev *scifdev; + struct scif_hw_dev *sdev; + struct dma_chan *chan; + + /* Loopback DMAs are not supported on the management node */ + if (!scif_info.nodeid && scifdev_self(ep->remote_dev)) + return 0; + if (scif_info.nodeid) + scifdev = &scif_dev[0]; + else + scifdev = ep->remote_dev; + sdev = scifdev->sdev; + if (!sdev->num_dma_ch) + return -ENODEV; + chan = sdev->dma_ch[scifdev->dma_ch_idx]; + scifdev->dma_ch_idx = (scifdev->dma_ch_idx + 1) % sdev->num_dma_ch; + mutex_lock(&ep->rma_info.rma_lock); + ep->rma_info.dma_chan = chan; + mutex_unlock(&ep->rma_info.rma_lock); + return err; +} + +#ifdef CONFIG_MMU_NOTIFIER +/** + * scif_rma_destroy_tcw: + * + * This routine destroys temporary cached windows + */ +static +void __scif_rma_destroy_tcw(struct scif_mmu_notif *mmn, + struct scif_endpt *ep, + u64 start, u64 len) +{ + struct list_head *item, *tmp; + struct scif_window *window; + u64 start_va, end_va; + u64 end = start + len; + + if (end <= start) + return; + + list_for_each_safe(item, tmp, &mmn->tc_reg_list) { + window = list_entry(item, struct scif_window, list); + ep = (struct scif_endpt *)window->ep; + if (!len) + break; + start_va = window->va_for_temp; + end_va = start_va + (window->nr_pages << PAGE_SHIFT); + if (start < start_va && end <= start_va) + break; + if (start >= end_va) + continue; + __scif_rma_destroy_tcw_helper(window); + } +} + +static void scif_rma_destroy_tcw(struct scif_mmu_notif *mmn, u64 start, u64 len) +{ + struct scif_endpt *ep = mmn->ep; + + spin_lock(&ep->rma_info.tc_lock); + __scif_rma_destroy_tcw(mmn, ep, start, len); + spin_unlock(&ep->rma_info.tc_lock); +} + +static void scif_rma_destroy_tcw_ep(struct scif_endpt *ep) +{ + struct list_head *item, *tmp; + struct scif_mmu_notif *mmn; + + list_for_each_safe(item, tmp, &ep->rma_info.mmn_list) { + mmn = list_entry(item, struct scif_mmu_notif, list); + scif_rma_destroy_tcw(mmn, 0, ULONG_MAX); + } +} + +static void __scif_rma_destroy_tcw_ep(struct scif_endpt *ep) +{ + struct list_head *item, *tmp; + struct scif_mmu_notif *mmn; + + spin_lock(&ep->rma_info.tc_lock); + list_for_each_safe(item, tmp, &ep->rma_info.mmn_list) { + mmn = list_entry(item, struct scif_mmu_notif, list); + __scif_rma_destroy_tcw(mmn, ep, 0, ULONG_MAX); + } + spin_unlock(&ep->rma_info.tc_lock); +} + +static bool scif_rma_tc_can_cache(struct scif_endpt *ep, size_t cur_bytes) +{ + if ((cur_bytes >> PAGE_SHIFT) > scif_info.rma_tc_limit) + return false; + if ((atomic_read(&ep->rma_info.tcw_total_pages) + + (cur_bytes >> PAGE_SHIFT)) > + scif_info.rma_tc_limit) { + dev_info(scif_info.mdev.this_device, + "%s %d total=%d, current=%zu reached max\n", + __func__, __LINE__, + atomic_read(&ep->rma_info.tcw_total_pages), + (1 + (cur_bytes >> PAGE_SHIFT))); + scif_rma_destroy_tcw_invalid(); + __scif_rma_destroy_tcw_ep(ep); + } + return true; +} + +static void scif_mmu_notifier_release(struct mmu_notifier *mn, + struct mm_struct *mm) +{ + struct scif_mmu_notif *mmn; + + mmn = container_of(mn, struct scif_mmu_notif, ep_mmu_notifier); + scif_rma_destroy_tcw(mmn, 0, ULONG_MAX); + schedule_work(&scif_info.misc_work); +} + +static void scif_mmu_notifier_invalidate_page(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long address) +{ + struct scif_mmu_notif *mmn; + + mmn = container_of(mn, struct scif_mmu_notif, ep_mmu_notifier); + scif_rma_destroy_tcw(mmn, address, PAGE_SIZE); +} + +static void scif_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, + unsigned long end) +{ + struct scif_mmu_notif *mmn; + + mmn = container_of(mn, struct scif_mmu_notif, ep_mmu_notifier); + scif_rma_destroy_tcw(mmn, start, end - start); +} + +static void scif_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, + unsigned long end) +{ + /* + * Nothing to do here, everything needed was done in + * invalidate_range_start. + */ +} + +static const struct mmu_notifier_ops scif_mmu_notifier_ops = { + .release = scif_mmu_notifier_release, + .clear_flush_young = NULL, + .invalidate_page = scif_mmu_notifier_invalidate_page, + .invalidate_range_start = scif_mmu_notifier_invalidate_range_start, + .invalidate_range_end = scif_mmu_notifier_invalidate_range_end}; + +static void scif_ep_unregister_mmu_notifier(struct scif_endpt *ep) +{ + struct scif_endpt_rma_info *rma = &ep->rma_info; + struct scif_mmu_notif *mmn = NULL; + struct list_head *item, *tmp; + + mutex_lock(&ep->rma_info.mmn_lock); + list_for_each_safe(item, tmp, &rma->mmn_list) { + mmn = list_entry(item, struct scif_mmu_notif, list); + mmu_notifier_unregister(&mmn->ep_mmu_notifier, mmn->mm); + list_del(item); + kfree(mmn); + } + mutex_unlock(&ep->rma_info.mmn_lock); +} + +static void scif_init_mmu_notifier(struct scif_mmu_notif *mmn, + struct mm_struct *mm, struct scif_endpt *ep) +{ + mmn->ep = ep; + mmn->mm = mm; + mmn->ep_mmu_notifier.ops = &scif_mmu_notifier_ops; + INIT_LIST_HEAD(&mmn->list); + INIT_LIST_HEAD(&mmn->tc_reg_list); +} + +static struct scif_mmu_notif * +scif_find_mmu_notifier(struct mm_struct *mm, struct scif_endpt_rma_info *rma) +{ + struct scif_mmu_notif *mmn; + struct list_head *item; + + list_for_each(item, &rma->mmn_list) { + mmn = list_entry(item, struct scif_mmu_notif, list); + if (mmn->mm == mm) + return mmn; + } + return NULL; +} + +static struct scif_mmu_notif * +scif_add_mmu_notifier(struct mm_struct *mm, struct scif_endpt *ep) +{ + struct scif_mmu_notif *mmn + = kzalloc(sizeof(*mmn), GFP_KERNEL); + + if (!mmn) + return ERR_PTR(ENOMEM); + + scif_init_mmu_notifier(mmn, current->mm, ep); + if (mmu_notifier_register(&mmn->ep_mmu_notifier, + current->mm)) { + kfree(mmn); + return ERR_PTR(EBUSY); + } + list_add(&mmn->list, &ep->rma_info.mmn_list); + return mmn; +} + +/* + * Called from the misc thread to destroy temporary cached windows and + * unregister the MMU notifier for the SCIF endpoint. + */ +void scif_mmu_notif_handler(struct work_struct *work) +{ + struct list_head *pos, *tmpq; + struct scif_endpt *ep; +restart: + scif_rma_destroy_tcw_invalid(); + spin_lock(&scif_info.rmalock); + list_for_each_safe(pos, tmpq, &scif_info.mmu_notif_cleanup) { + ep = list_entry(pos, struct scif_endpt, mmu_list); + list_del(&ep->mmu_list); + spin_unlock(&scif_info.rmalock); + scif_rma_destroy_tcw_ep(ep); + scif_ep_unregister_mmu_notifier(ep); + goto restart; + } + spin_unlock(&scif_info.rmalock); +} + +static bool scif_is_set_reg_cache(int flags) +{ + return !!(flags & SCIF_RMA_USECACHE); +} +#else +static struct scif_mmu_notif * +scif_find_mmu_notifier(struct mm_struct *mm, + struct scif_endpt_rma_info *rma) +{ + return NULL; +} + +static struct scif_mmu_notif * +scif_add_mmu_notifier(struct mm_struct *mm, struct scif_endpt *ep) +{ + return NULL; +} + +void scif_mmu_notif_handler(struct work_struct *work) +{ +} + +static bool scif_is_set_reg_cache(int flags) +{ + return false; +} + +static bool scif_rma_tc_can_cache(struct scif_endpt *ep, size_t cur_bytes) +{ + return false; +} +#endif + +/** + * scif_register_temp: + * @epd: End Point Descriptor. + * @addr: virtual address to/from which to copy + * @len: length of range to copy + * @out_offset: computed offset returned by reference. + * @out_window: allocated registered window returned by reference. + * + * Create a temporary registered window. The peer will not know about this + * window. This API is used for scif_vreadfrom()/scif_vwriteto() API's. + */ +static int +scif_register_temp(scif_epd_t epd, unsigned long addr, size_t len, int prot, + off_t *out_offset, struct scif_window **out_window) +{ + struct scif_endpt *ep = (struct scif_endpt *)epd; + int err; + scif_pinned_pages_t pinned_pages; + size_t aligned_len; + + aligned_len = ALIGN(len, PAGE_SIZE); + + err = __scif_pin_pages((void *)(addr & PAGE_MASK), + aligned_len, &prot, 0, &pinned_pages); + if (err) + return err; + + pinned_pages->prot = prot; + + /* Compute the offset for this registration */ + err = scif_get_window_offset(ep, 0, 0, + aligned_len >> PAGE_SHIFT, + (s64 *)out_offset); + if (err) + goto error_unpin; + + /* Allocate and prepare self registration window */ + *out_window = scif_create_window(ep, aligned_len >> PAGE_SHIFT, + *out_offset, true); + if (!*out_window) { + scif_free_window_offset(ep, NULL, *out_offset); + err = -ENOMEM; + goto error_unpin; + } + + (*out_window)->pinned_pages = pinned_pages; + (*out_window)->nr_pages = pinned_pages->nr_pages; + (*out_window)->prot = pinned_pages->prot; + + (*out_window)->va_for_temp = addr & PAGE_MASK; + err = scif_map_window(ep->remote_dev, *out_window); + if (err) { + /* Something went wrong! Rollback */ + scif_destroy_window(ep, *out_window); + *out_window = NULL; + } else { + *out_offset |= (addr - (*out_window)->va_for_temp); + } + return err; +error_unpin: + if (err) + dev_err(&ep->remote_dev->sdev->dev, + "%s %d err %d\n", __func__, __LINE__, err); + scif_unpin_pages(pinned_pages); + return err; +} + +#define SCIF_DMA_TO (3 * HZ) + +/* + * scif_sync_dma - Program a DMA without an interrupt descriptor + * + * @dev - The address of the pointer to the device instance used + * for DMA registration. + * @chan - DMA channel to be used. + * @sync_wait: Wait for DMA to complete? + * + * Return 0 on success and -errno on error. + */ +static int scif_sync_dma(struct scif_hw_dev *sdev, struct dma_chan *chan, + bool sync_wait) +{ + int err = 0; + struct dma_async_tx_descriptor *tx = NULL; + enum dma_ctrl_flags flags = DMA_PREP_FENCE; + dma_cookie_t cookie; + struct dma_device *ddev; + + if (!chan) { + err = -EIO; + dev_err(&sdev->dev, "%s %d err %d\n", + __func__, __LINE__, err); + return err; + } + ddev = chan->device; + + tx = ddev->device_prep_dma_memcpy(chan, 0, 0, 0, flags); + if (!tx) { + err = -ENOMEM; + dev_err(&sdev->dev, "%s %d err %d\n", + __func__, __LINE__, err); + goto release; + } + cookie = tx->tx_submit(tx); + + if (dma_submit_error(cookie)) { + err = -ENOMEM; + dev_err(&sdev->dev, "%s %d err %d\n", + __func__, __LINE__, err); + goto release; + } + if (!sync_wait) { + dma_async_issue_pending(chan); + } else { + if (dma_sync_wait(chan, cookie) == DMA_COMPLETE) { + err = 0; + } else { + err = -EIO; + dev_err(&sdev->dev, "%s %d err %d\n", + __func__, __LINE__, err); + } + } +release: + return err; +} + +static void scif_dma_callback(void *arg) +{ + struct completion *done = (struct completion *)arg; + + complete(done); +} + +#define SCIF_DMA_SYNC_WAIT true +#define SCIF_DMA_POLL BIT(0) +#define SCIF_DMA_INTR BIT(1) + +/* + * scif_async_dma - Program a DMA with an interrupt descriptor + * + * @dev - The address of the pointer to the device instance used + * for DMA registration. + * @chan - DMA channel to be used. + * Return 0 on success and -errno on error. + */ +static int scif_async_dma(struct scif_hw_dev *sdev, struct dma_chan *chan) +{ + int err = 0; + struct dma_device *ddev; + struct dma_async_tx_descriptor *tx = NULL; + enum dma_ctrl_flags flags = DMA_PREP_INTERRUPT | DMA_PREP_FENCE; + DECLARE_COMPLETION_ONSTACK(done_wait); + dma_cookie_t cookie; + enum dma_status status; + + if (!chan) { + err = -EIO; + dev_err(&sdev->dev, "%s %d err %d\n", + __func__, __LINE__, err); + return err; + } + ddev = chan->device; + + tx = ddev->device_prep_dma_memcpy(chan, 0, 0, 0, flags); + if (!tx) { + err = -ENOMEM; + dev_err(&sdev->dev, "%s %d err %d\n", + __func__, __LINE__, err); + goto release; + } + reinit_completion(&done_wait); + tx->callback = scif_dma_callback; + tx->callback_param = &done_wait; + cookie = tx->tx_submit(tx); + + if (dma_submit_error(cookie)) { + err = -ENOMEM; + dev_err(&sdev->dev, "%s %d err %d\n", + __func__, __LINE__, err); + goto release; + } + dma_async_issue_pending(chan); + + err = wait_for_completion_timeout(&done_wait, SCIF_DMA_TO); + if (!err) { + err = -EIO; + dev_err(&sdev->dev, "%s %d err %d\n", + __func__, __LINE__, err); + goto release; + } + err = 0; + status = dma_async_is_tx_complete(chan, cookie, NULL, NULL); + if (status != DMA_COMPLETE) { + err = -EIO; + dev_err(&sdev->dev, "%s %d err %d\n", + __func__, __LINE__, err); + goto release; + } +release: + return err; +} + +/* + * scif_drain_dma_poll - Drain all outstanding DMA operations for a particular + * DMA channel via polling. + * + * @sdev - The SCIF device + * @chan - DMA channel + * Return 0 on success and -errno on error. + */ +static int scif_drain_dma_poll(struct scif_hw_dev *sdev, struct dma_chan *chan) +{ + if (!chan) + return -EINVAL; + return scif_sync_dma(sdev, chan, SCIF_DMA_SYNC_WAIT); +} + +/* + * scif_drain_dma_intr - Drain all outstanding DMA operations for a particular + * DMA channel via interrupt based blocking wait. + * + * @sdev - The SCIF device + * @chan - DMA channel + * Return 0 on success and -errno on error. + */ +int scif_drain_dma_intr(struct scif_hw_dev *sdev, struct dma_chan *chan) +{ + if (!chan) + return -EINVAL; + return scif_async_dma(sdev, chan); +} + +/** + * scif_rma_destroy_windows: + * + * This routine destroys all windows queued for cleanup + */ +void scif_rma_destroy_windows(void) +{ + struct list_head *item, *tmp; + struct scif_window *window; + struct scif_endpt *ep; + struct dma_chan *chan; + + might_sleep(); +restart: + spin_lock(&scif_info.rmalock); + list_for_each_safe(item, tmp, &scif_info.rma) { + window = list_entry(item, struct scif_window, + list); + ep = (struct scif_endpt *)window->ep; + chan = ep->rma_info.dma_chan; + + list_del_init(&window->list); + spin_unlock(&scif_info.rmalock); + if (!chan || !scifdev_alive(ep) || + !scif_drain_dma_intr(ep->remote_dev->sdev, + ep->rma_info.dma_chan)) + /* Remove window from global list */ + window->unreg_state = OP_COMPLETED; + else + dev_warn(&ep->remote_dev->sdev->dev, + "DMA engine hung?\n"); + if (window->unreg_state == OP_COMPLETED) { + if (window->type == SCIF_WINDOW_SELF) + scif_destroy_window(ep, window); + else + scif_destroy_remote_window(window); + atomic_dec(&ep->rma_info.tw_refcount); + } + goto restart; + } + spin_unlock(&scif_info.rmalock); +} + +/** + * scif_rma_destroy_tcw: + * + * This routine destroys temporary cached registered windows + * which have been queued for cleanup. + */ +void scif_rma_destroy_tcw_invalid(void) +{ + struct list_head *item, *tmp; + struct scif_window *window; + struct scif_endpt *ep; + struct dma_chan *chan; + + might_sleep(); +restart: + spin_lock(&scif_info.rmalock); + list_for_each_safe(item, tmp, &scif_info.rma_tc) { + window = list_entry(item, struct scif_window, list); + ep = (struct scif_endpt *)window->ep; + chan = ep->rma_info.dma_chan; + list_del_init(&window->list); + spin_unlock(&scif_info.rmalock); + mutex_lock(&ep->rma_info.rma_lock); + if (!chan || !scifdev_alive(ep) || + !scif_drain_dma_intr(ep->remote_dev->sdev, + ep->rma_info.dma_chan)) { + atomic_sub(window->nr_pages, + &ep->rma_info.tcw_total_pages); + scif_destroy_window(ep, window); + atomic_dec(&ep->rma_info.tcw_refcount); + } else { + dev_warn(&ep->remote_dev->sdev->dev, + "DMA engine hung?\n"); + } + mutex_unlock(&ep->rma_info.rma_lock); + goto restart; + } + spin_unlock(&scif_info.rmalock); +} + +static inline +void *_get_local_va(off_t off, struct scif_window *window, size_t len) +{ + int page_nr = (off - window->offset) >> PAGE_SHIFT; + off_t page_off = off & ~PAGE_MASK; + void *va = NULL; + + if (window->type == SCIF_WINDOW_SELF) { + struct page **pages = window->pinned_pages->pages; + + va = page_address(pages[page_nr]) + page_off; + } + return va; +} + +static inline +void *ioremap_remote(off_t off, struct scif_window *window, + size_t len, struct scif_dev *dev, + struct scif_window_iter *iter) +{ + dma_addr_t phys = scif_off_to_dma_addr(window, off, NULL, iter); + + /* + * If the DMA address is not card relative then we need the DMA + * addresses to be an offset into the bar. The aperture base was already + * added so subtract it here since scif_ioremap is going to add it again + */ + if (!scifdev_self(dev) && window->type == SCIF_WINDOW_PEER && + dev->sdev->aper && !dev->sdev->card_rel_da) + phys = phys - dev->sdev->aper->pa; + return scif_ioremap(phys, len, dev); +} + +static inline void +iounmap_remote(void *virt, size_t size, struct scif_copy_work *work) +{ + scif_iounmap(virt, size, work->remote_dev); +} + +/* + * Takes care of ordering issue caused by + * 1. Hardware: Only in the case of cpu copy from mgmt node to card + * because of WC memory. + * 2. Software: If memcpy reorders copy instructions for optimization. + * This could happen at both mgmt node and card. + */ +static inline void +scif_ordered_memcpy_toio(char *dst, const char *src, size_t count) +{ + if (!count) + return; + + memcpy_toio((void __iomem __force *)dst, src, --count); + /* Order the last byte with the previous stores */ + wmb(); + *(dst + count) = *(src + count); +} + +static inline void scif_unaligned_cpy_toio(char *dst, const char *src, + size_t count, bool ordered) +{ + if (ordered) + scif_ordered_memcpy_toio(dst, src, count); + else + memcpy_toio((void __iomem __force *)dst, src, count); +} + +static inline +void scif_ordered_memcpy_fromio(char *dst, const char *src, size_t count) +{ + if (!count) + return; + + memcpy_fromio(dst, (void __iomem __force *)src, --count); + /* Order the last byte with the previous loads */ + rmb(); + *(dst + count) = *(src + count); +} + +static inline void scif_unaligned_cpy_fromio(char *dst, const char *src, + size_t count, bool ordered) +{ + if (ordered) + scif_ordered_memcpy_fromio(dst, src, count); + else + memcpy_fromio(dst, (void __iomem __force *)src, count); +} + +#define SCIF_RMA_ERROR_CODE (~(dma_addr_t)0x0) + +/* + * scif_off_to_dma_addr: + * Obtain the dma_addr given the window and the offset. + * @window: Registered window. + * @off: Window offset. + * @nr_bytes: Return the number of contiguous bytes till next DMA addr index. + * @index: Return the index of the dma_addr array found. + * @start_off: start offset of index of the dma addr array found. + * The nr_bytes provides the callee an estimate of the maximum possible + * DMA xfer possible while the index/start_off provide faster lookups + * for the next iteration. + */ +dma_addr_t scif_off_to_dma_addr(struct scif_window *window, s64 off, + size_t *nr_bytes, struct scif_window_iter *iter) +{ + int i, page_nr; + s64 start, end; + off_t page_off; + + if (window->nr_pages == window->nr_contig_chunks) { + page_nr = (off - window->offset) >> PAGE_SHIFT; + page_off = off & ~PAGE_MASK; + + if (nr_bytes) + *nr_bytes = PAGE_SIZE - page_off; + return window->dma_addr[page_nr] | page_off; + } + if (iter) { + i = iter->index; + start = iter->offset; + } else { + i = 0; + start = window->offset; + } + for (; i < window->nr_contig_chunks; i++) { + end = start + (window->num_pages[i] << PAGE_SHIFT); + if (off >= start && off < end) { + if (iter) { + iter->index = i; + iter->offset = start; + } + if (nr_bytes) + *nr_bytes = end - off; + return (window->dma_addr[i] + (off - start)); + } + start += (window->num_pages[i] << PAGE_SHIFT); + } + dev_err(scif_info.mdev.this_device, + "%s %d BUG. Addr not found? window %p off 0x%llx\n", + __func__, __LINE__, window, off); + return SCIF_RMA_ERROR_CODE; +} + +/* + * Copy between rma window and temporary buffer + */ +static void scif_rma_local_cpu_copy(s64 offset, struct scif_window *window, + u8 *temp, size_t rem_len, bool to_temp) +{ + void *window_virt; + size_t loop_len; + int offset_in_page; + s64 end_offset; + + offset_in_page = offset & ~PAGE_MASK; + loop_len = PAGE_SIZE - offset_in_page; + + if (rem_len < loop_len) + loop_len = rem_len; + + window_virt = _get_local_va(offset, window, loop_len); + if (!window_virt) + return; + if (to_temp) + memcpy(temp, window_virt, loop_len); + else + memcpy(window_virt, temp, loop_len); + + offset += loop_len; + temp += loop_len; + rem_len -= loop_len; + + end_offset = window->offset + + (window->nr_pages << PAGE_SHIFT); + while (rem_len) { + if (offset == end_offset) { + window = list_entry_next(window, list); + end_offset = window->offset + + (window->nr_pages << PAGE_SHIFT); + } + loop_len = min(PAGE_SIZE, rem_len); + window_virt = _get_local_va(offset, window, loop_len); + if (!window_virt) + return; + if (to_temp) + memcpy(temp, window_virt, loop_len); + else + memcpy(window_virt, temp, loop_len); + offset += loop_len; + temp += loop_len; + rem_len -= loop_len; + } +} + +/** + * scif_rma_completion_cb: + * @data: RMA cookie + * + * RMA interrupt completion callback. + */ +static void scif_rma_completion_cb(void *data) +{ + struct scif_dma_comp_cb *comp_cb = data; + + /* Free DMA Completion CB. */ + if (comp_cb->dst_window) + scif_rma_local_cpu_copy(comp_cb->dst_offset, + comp_cb->dst_window, + comp_cb->temp_buf + + comp_cb->header_padding, + comp_cb->len, false); + scif_unmap_single(comp_cb->temp_phys, comp_cb->sdev, + SCIF_KMEM_UNALIGNED_BUF_SIZE); + if (comp_cb->is_cache) + kmem_cache_free(unaligned_cache, + comp_cb->temp_buf_to_free); + else + kfree(comp_cb->temp_buf_to_free); +} + +/* Copies between temporary buffer and offsets provided in work */ +static int +scif_rma_list_dma_copy_unaligned(struct scif_copy_work *work, + u8 *temp, struct dma_chan *chan, + bool src_local) +{ + struct scif_dma_comp_cb *comp_cb = work->comp_cb; + dma_addr_t window_dma_addr, temp_dma_addr; + dma_addr_t temp_phys = comp_cb->temp_phys; + size_t loop_len, nr_contig_bytes = 0, remaining_len = work->len; + int offset_in_ca, ret = 0; + s64 end_offset, offset; + struct scif_window *window; + void *window_virt_addr; + size_t tail_len; + struct dma_async_tx_descriptor *tx; + struct dma_device *dev = chan->device; + dma_cookie_t cookie; + + if (src_local) { + offset = work->dst_offset; + window = work->dst_window; + } else { + offset = work->src_offset; + window = work->src_window; + } + + offset_in_ca = offset & (L1_CACHE_BYTES - 1); + if (offset_in_ca) { + loop_len = L1_CACHE_BYTES - offset_in_ca; + loop_len = min(loop_len, remaining_len); + window_virt_addr = ioremap_remote(offset, window, + loop_len, + work->remote_dev, + NULL); + if (!window_virt_addr) + return -ENOMEM; + if (src_local) + scif_unaligned_cpy_toio(window_virt_addr, temp, + loop_len, + work->ordered && + !(remaining_len - loop_len)); + else + scif_unaligned_cpy_fromio(temp, window_virt_addr, + loop_len, work->ordered && + !(remaining_len - loop_len)); + iounmap_remote(window_virt_addr, loop_len, work); + + offset += loop_len; + temp += loop_len; + temp_phys += loop_len; + remaining_len -= loop_len; + } + + offset_in_ca = offset & ~PAGE_MASK; + end_offset = window->offset + + (window->nr_pages << PAGE_SHIFT); + + tail_len = remaining_len & (L1_CACHE_BYTES - 1); + remaining_len -= tail_len; + while (remaining_len) { + if (offset == end_offset) { + window = list_entry_next(window, list); + end_offset = window->offset + + (window->nr_pages << PAGE_SHIFT); + } + if (scif_is_mgmt_node()) + temp_dma_addr = temp_phys; + else + /* Fix if we ever enable IOMMU on the card */ + temp_dma_addr = (dma_addr_t)virt_to_phys(temp); + window_dma_addr = scif_off_to_dma_addr(window, offset, + &nr_contig_bytes, + NULL); + loop_len = min(nr_contig_bytes, remaining_len); + if (src_local) { + if (work->ordered && !tail_len && + !(remaining_len - loop_len) && + loop_len != L1_CACHE_BYTES) { + /* + * Break up the last chunk of the transfer into + * two steps. if there is no tail to guarantee + * DMA ordering. SCIF_DMA_POLLING inserts + * a status update descriptor in step 1 which + * acts as a double sided synchronization fence + * for the DMA engine to ensure that the last + * cache line in step 2 is updated last. + */ + /* Step 1) DMA: Body Length - L1_CACHE_BYTES. */ + tx = + dev->device_prep_dma_memcpy(chan, + window_dma_addr, + temp_dma_addr, + loop_len - + L1_CACHE_BYTES, + DMA_PREP_FENCE); + if (!tx) { + ret = -ENOMEM; + goto err; + } + cookie = tx->tx_submit(tx); + if (dma_submit_error(cookie)) { + ret = -ENOMEM; + goto err; + } + dma_async_issue_pending(chan); + offset += (loop_len - L1_CACHE_BYTES); + temp_dma_addr += (loop_len - L1_CACHE_BYTES); + window_dma_addr += (loop_len - L1_CACHE_BYTES); + remaining_len -= (loop_len - L1_CACHE_BYTES); + loop_len = remaining_len; + + /* Step 2) DMA: L1_CACHE_BYTES */ + tx = + dev->device_prep_dma_memcpy(chan, + window_dma_addr, + temp_dma_addr, + loop_len, 0); + if (!tx) { + ret = -ENOMEM; + goto err; + } + cookie = tx->tx_submit(tx); + if (dma_submit_error(cookie)) { + ret = -ENOMEM; + goto err; + } + dma_async_issue_pending(chan); + } else { + tx = + dev->device_prep_dma_memcpy(chan, + window_dma_addr, + temp_dma_addr, + loop_len, 0); + if (!tx) { + ret = -ENOMEM; + goto err; + } + cookie = tx->tx_submit(tx); + if (dma_submit_error(cookie)) { + ret = -ENOMEM; + goto err; + } + dma_async_issue_pending(chan); + } + } else { + tx = dev->device_prep_dma_memcpy(chan, temp_dma_addr, + window_dma_addr, loop_len, 0); + if (!tx) { + ret = -ENOMEM; + goto err; + } + cookie = tx->tx_submit(tx); + if (dma_submit_error(cookie)) { + ret = -ENOMEM; + goto err; + } + dma_async_issue_pending(chan); + } + if (ret < 0) + goto err; + offset += loop_len; + temp += loop_len; + temp_phys += loop_len; + remaining_len -= loop_len; + offset_in_ca = 0; + } + if (tail_len) { + if (offset == end_offset) { + window = list_entry_next(window, list); + end_offset = window->offset + + (window->nr_pages << PAGE_SHIFT); + } + window_virt_addr = ioremap_remote(offset, window, tail_len, + work->remote_dev, + NULL); + if (!window_virt_addr) + return -ENOMEM; + /* + * The CPU copy for the tail bytes must be initiated only once + * previous DMA transfers for this endpoint have completed + * to guarantee ordering. + */ + if (work->ordered) { + struct scif_dev *rdev = work->remote_dev; + + ret = scif_drain_dma_intr(rdev->sdev, chan); + if (ret) + return ret; + } + if (src_local) + scif_unaligned_cpy_toio(window_virt_addr, temp, + tail_len, work->ordered); + else + scif_unaligned_cpy_fromio(temp, window_virt_addr, + tail_len, work->ordered); + iounmap_remote(window_virt_addr, tail_len, work); + } + tx = dev->device_prep_dma_memcpy(chan, 0, 0, 0, DMA_PREP_INTERRUPT); + if (!tx) { + ret = -ENOMEM; + return ret; + } + tx->callback = &scif_rma_completion_cb; + tx->callback_param = comp_cb; + cookie = tx->tx_submit(tx); + + if (dma_submit_error(cookie)) { + ret = -ENOMEM; + return ret; + } + dma_async_issue_pending(chan); + return 0; +err: + dev_err(scif_info.mdev.this_device, + "%s %d Desc Prog Failed ret %d\n", + __func__, __LINE__, ret); + return ret; +} + +/* + * _scif_rma_list_dma_copy_aligned: + * + * Traverse all the windows and perform DMA copy. + */ +static int _scif_rma_list_dma_copy_aligned(struct scif_copy_work *work, + struct dma_chan *chan) +{ + dma_addr_t src_dma_addr, dst_dma_addr; + size_t loop_len, remaining_len, src_contig_bytes = 0; + size_t dst_contig_bytes = 0; + struct scif_window_iter src_win_iter; + struct scif_window_iter dst_win_iter; + s64 end_src_offset, end_dst_offset; + struct scif_window *src_window = work->src_window; + struct scif_window *dst_window = work->dst_window; + s64 src_offset = work->src_offset, dst_offset = work->dst_offset; + int ret = 0; + struct dma_async_tx_descriptor *tx; + struct dma_device *dev = chan->device; + dma_cookie_t cookie; + + remaining_len = work->len; + + scif_init_window_iter(src_window, &src_win_iter); + scif_init_window_iter(dst_window, &dst_win_iter); + end_src_offset = src_window->offset + + (src_window->nr_pages << PAGE_SHIFT); + end_dst_offset = dst_window->offset + + (dst_window->nr_pages << PAGE_SHIFT); + while (remaining_len) { + if (src_offset == end_src_offset) { + src_window = list_entry_next(src_window, list); + end_src_offset = src_window->offset + + (src_window->nr_pages << PAGE_SHIFT); + scif_init_window_iter(src_window, &src_win_iter); + } + if (dst_offset == end_dst_offset) { + dst_window = list_entry_next(dst_window, list); + end_dst_offset = dst_window->offset + + (dst_window->nr_pages << PAGE_SHIFT); + scif_init_window_iter(dst_window, &dst_win_iter); + } + + /* compute dma addresses for transfer */ + src_dma_addr = scif_off_to_dma_addr(src_window, src_offset, + &src_contig_bytes, + &src_win_iter); + dst_dma_addr = scif_off_to_dma_addr(dst_window, dst_offset, + &dst_contig_bytes, + &dst_win_iter); + loop_len = min(src_contig_bytes, dst_contig_bytes); + loop_len = min(loop_len, remaining_len); + if (work->ordered && !(remaining_len - loop_len)) { + /* + * Break up the last chunk of the transfer into two + * steps to ensure that the last byte in step 2 is + * updated last. + */ + /* Step 1) DMA: Body Length - 1 */ + tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr, + src_dma_addr, + loop_len - 1, + DMA_PREP_FENCE); + if (!tx) { + ret = -ENOMEM; + goto err; + } + cookie = tx->tx_submit(tx); + if (dma_submit_error(cookie)) { + ret = -ENOMEM; + goto err; + } + src_offset += (loop_len - 1); + dst_offset += (loop_len - 1); + src_dma_addr += (loop_len - 1); + dst_dma_addr += (loop_len - 1); + remaining_len -= (loop_len - 1); + loop_len = remaining_len; + + /* Step 2) DMA: 1 BYTES */ + tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr, + src_dma_addr, loop_len, 0); + if (!tx) { + ret = -ENOMEM; + goto err; + } + cookie = tx->tx_submit(tx); + if (dma_submit_error(cookie)) { + ret = -ENOMEM; + goto err; + } + dma_async_issue_pending(chan); + } else { + tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr, + src_dma_addr, loop_len, 0); + if (!tx) { + ret = -ENOMEM; + goto err; + } + cookie = tx->tx_submit(tx); + if (dma_submit_error(cookie)) { + ret = -ENOMEM; + goto err; + } + } + src_offset += loop_len; + dst_offset += loop_len; + remaining_len -= loop_len; + } + return ret; +err: + dev_err(scif_info.mdev.this_device, + "%s %d Desc Prog Failed ret %d\n", + __func__, __LINE__, ret); + return ret; +} + +/* + * scif_rma_list_dma_copy_aligned: + * + * Traverse all the windows and perform DMA copy. + */ +static int scif_rma_list_dma_copy_aligned(struct scif_copy_work *work, + struct dma_chan *chan) +{ + dma_addr_t src_dma_addr, dst_dma_addr; + size_t loop_len, remaining_len, tail_len, src_contig_bytes = 0; + size_t dst_contig_bytes = 0; + int src_cache_off; + s64 end_src_offset, end_dst_offset; + struct scif_window_iter src_win_iter; + struct scif_window_iter dst_win_iter; + void *src_virt, *dst_virt; + struct scif_window *src_window = work->src_window; + struct scif_window *dst_window = work->dst_window; + s64 src_offset = work->src_offset, dst_offset = work->dst_offset; + int ret = 0; + struct dma_async_tx_descriptor *tx; + struct dma_device *dev = chan->device; + dma_cookie_t cookie; + + remaining_len = work->len; + scif_init_window_iter(src_window, &src_win_iter); + scif_init_window_iter(dst_window, &dst_win_iter); + + src_cache_off = src_offset & (L1_CACHE_BYTES - 1); + if (src_cache_off != 0) { + /* Head */ + loop_len = L1_CACHE_BYTES - src_cache_off; + loop_len = min(loop_len, remaining_len); + src_dma_addr = __scif_off_to_dma_addr(src_window, src_offset); + dst_dma_addr = __scif_off_to_dma_addr(dst_window, dst_offset); + if (src_window->type == SCIF_WINDOW_SELF) + src_virt = _get_local_va(src_offset, src_window, + loop_len); + else + src_virt = ioremap_remote(src_offset, src_window, + loop_len, + work->remote_dev, NULL); + if (!src_virt) + return -ENOMEM; + if (dst_window->type == SCIF_WINDOW_SELF) + dst_virt = _get_local_va(dst_offset, dst_window, + loop_len); + else + dst_virt = ioremap_remote(dst_offset, dst_window, + loop_len, + work->remote_dev, NULL); + if (!dst_virt) { + if (src_window->type != SCIF_WINDOW_SELF) + iounmap_remote(src_virt, loop_len, work); + return -ENOMEM; + } + if (src_window->type == SCIF_WINDOW_SELF) + scif_unaligned_cpy_toio(dst_virt, src_virt, loop_len, + remaining_len == loop_len ? + work->ordered : false); + else + scif_unaligned_cpy_fromio(dst_virt, src_virt, loop_len, + remaining_len == loop_len ? + work->ordered : false); + if (src_window->type != SCIF_WINDOW_SELF) + iounmap_remote(src_virt, loop_len, work); + if (dst_window->type != SCIF_WINDOW_SELF) + iounmap_remote(dst_virt, loop_len, work); + src_offset += loop_len; + dst_offset += loop_len; + remaining_len -= loop_len; + } + + end_src_offset = src_window->offset + + (src_window->nr_pages << PAGE_SHIFT); + end_dst_offset = dst_window->offset + + (dst_window->nr_pages << PAGE_SHIFT); + tail_len = remaining_len & (L1_CACHE_BYTES - 1); + remaining_len -= tail_len; + while (remaining_len) { + if (src_offset == end_src_offset) { + src_window = list_entry_next(src_window, list); + end_src_offset = src_window->offset + + (src_window->nr_pages << PAGE_SHIFT); + scif_init_window_iter(src_window, &src_win_iter); + } + if (dst_offset == end_dst_offset) { + dst_window = list_entry_next(dst_window, list); + end_dst_offset = dst_window->offset + + (dst_window->nr_pages << PAGE_SHIFT); + scif_init_window_iter(dst_window, &dst_win_iter); + } + + /* compute dma addresses for transfer */ + src_dma_addr = scif_off_to_dma_addr(src_window, src_offset, + &src_contig_bytes, + &src_win_iter); + dst_dma_addr = scif_off_to_dma_addr(dst_window, dst_offset, + &dst_contig_bytes, + &dst_win_iter); + loop_len = min(src_contig_bytes, dst_contig_bytes); + loop_len = min(loop_len, remaining_len); + if (work->ordered && !tail_len && + !(remaining_len - loop_len)) { + /* + * Break up the last chunk of the transfer into two + * steps. if there is no tail to gurantee DMA ordering. + * Passing SCIF_DMA_POLLING inserts a status update + * descriptor in step 1 which acts as a double sided + * synchronization fence for the DMA engine to ensure + * that the last cache line in step 2 is updated last. + */ + /* Step 1) DMA: Body Length - L1_CACHE_BYTES. */ + tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr, + src_dma_addr, + loop_len - + L1_CACHE_BYTES, + DMA_PREP_FENCE); + if (!tx) { + ret = -ENOMEM; + goto err; + } + cookie = tx->tx_submit(tx); + if (dma_submit_error(cookie)) { + ret = -ENOMEM; + goto err; + } + dma_async_issue_pending(chan); + src_offset += (loop_len - L1_CACHE_BYTES); + dst_offset += (loop_len - L1_CACHE_BYTES); + src_dma_addr += (loop_len - L1_CACHE_BYTES); + dst_dma_addr += (loop_len - L1_CACHE_BYTES); + remaining_len -= (loop_len - L1_CACHE_BYTES); + loop_len = remaining_len; + + /* Step 2) DMA: L1_CACHE_BYTES */ + tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr, + src_dma_addr, + loop_len, 0); + if (!tx) { + ret = -ENOMEM; + goto err; + } + cookie = tx->tx_submit(tx); + if (dma_submit_error(cookie)) { + ret = -ENOMEM; + goto err; + } + dma_async_issue_pending(chan); + } else { + tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr, + src_dma_addr, + loop_len, 0); + if (!tx) { + ret = -ENOMEM; + goto err; + } + cookie = tx->tx_submit(tx); + if (dma_submit_error(cookie)) { + ret = -ENOMEM; + goto err; + } + dma_async_issue_pending(chan); + } + src_offset += loop_len; + dst_offset += loop_len; + remaining_len -= loop_len; + } + remaining_len = tail_len; + if (remaining_len) { + loop_len = remaining_len; + if (src_offset == end_src_offset) + src_window = list_entry_next(src_window, list); + if (dst_offset == end_dst_offset) + dst_window = list_entry_next(dst_window, list); + + src_dma_addr = __scif_off_to_dma_addr(src_window, src_offset); + dst_dma_addr = __scif_off_to_dma_addr(dst_window, dst_offset); + /* + * The CPU copy for the tail bytes must be initiated only once + * previous DMA transfers for this endpoint have completed to + * guarantee ordering. + */ + if (work->ordered) { + struct scif_dev *rdev = work->remote_dev; + + ret = scif_drain_dma_poll(rdev->sdev, chan); + if (ret) + return ret; + } + if (src_window->type == SCIF_WINDOW_SELF) + src_virt = _get_local_va(src_offset, src_window, + loop_len); + else + src_virt = ioremap_remote(src_offset, src_window, + loop_len, + work->remote_dev, NULL); + if (!src_virt) + return -ENOMEM; + + if (dst_window->type == SCIF_WINDOW_SELF) + dst_virt = _get_local_va(dst_offset, dst_window, + loop_len); + else + dst_virt = ioremap_remote(dst_offset, dst_window, + loop_len, + work->remote_dev, NULL); + if (!dst_virt) { + if (src_window->type != SCIF_WINDOW_SELF) + iounmap_remote(src_virt, loop_len, work); + return -ENOMEM; + } + + if (src_window->type == SCIF_WINDOW_SELF) + scif_unaligned_cpy_toio(dst_virt, src_virt, loop_len, + work->ordered); + else + scif_unaligned_cpy_fromio(dst_virt, src_virt, + loop_len, work->ordered); + if (src_window->type != SCIF_WINDOW_SELF) + iounmap_remote(src_virt, loop_len, work); + + if (dst_window->type != SCIF_WINDOW_SELF) + iounmap_remote(dst_virt, loop_len, work); + remaining_len -= loop_len; + } + return ret; +err: + dev_err(scif_info.mdev.this_device, + "%s %d Desc Prog Failed ret %d\n", + __func__, __LINE__, ret); + return ret; +} + +/* + * scif_rma_list_cpu_copy: + * + * Traverse all the windows and perform CPU copy. + */ +static int scif_rma_list_cpu_copy(struct scif_copy_work *work) +{ + void *src_virt, *dst_virt; + size_t loop_len, remaining_len; + int src_page_off, dst_page_off; + s64 src_offset = work->src_offset, dst_offset = work->dst_offset; + struct scif_window *src_window = work->src_window; + struct scif_window *dst_window = work->dst_window; + s64 end_src_offset, end_dst_offset; + int ret = 0; + struct scif_window_iter src_win_iter; + struct scif_window_iter dst_win_iter; + + remaining_len = work->len; + + scif_init_window_iter(src_window, &src_win_iter); + scif_init_window_iter(dst_window, &dst_win_iter); + while (remaining_len) { + src_page_off = src_offset & ~PAGE_MASK; + dst_page_off = dst_offset & ~PAGE_MASK; + loop_len = min(PAGE_SIZE - + max(src_page_off, dst_page_off), + remaining_len); + + if (src_window->type == SCIF_WINDOW_SELF) + src_virt = _get_local_va(src_offset, src_window, + loop_len); + else + src_virt = ioremap_remote(src_offset, src_window, + loop_len, + work->remote_dev, + &src_win_iter); + if (!src_virt) { + ret = -ENOMEM; + goto error; + } + + if (dst_window->type == SCIF_WINDOW_SELF) + dst_virt = _get_local_va(dst_offset, dst_window, + loop_len); + else + dst_virt = ioremap_remote(dst_offset, dst_window, + loop_len, + work->remote_dev, + &dst_win_iter); + if (!dst_virt) { + if (src_window->type == SCIF_WINDOW_PEER) + iounmap_remote(src_virt, loop_len, work); + ret = -ENOMEM; + goto error; + } + + if (work->loopback) { + memcpy(dst_virt, src_virt, loop_len); + } else { + if (src_window->type == SCIF_WINDOW_SELF) + memcpy_toio((void __iomem __force *)dst_virt, + src_virt, loop_len); + else + memcpy_fromio(dst_virt, + (void __iomem __force *)src_virt, + loop_len); + } + if (src_window->type == SCIF_WINDOW_PEER) + iounmap_remote(src_virt, loop_len, work); + + if (dst_window->type == SCIF_WINDOW_PEER) + iounmap_remote(dst_virt, loop_len, work); + + src_offset += loop_len; + dst_offset += loop_len; + remaining_len -= loop_len; + if (remaining_len) { + end_src_offset = src_window->offset + + (src_window->nr_pages << PAGE_SHIFT); + end_dst_offset = dst_window->offset + + (dst_window->nr_pages << PAGE_SHIFT); + if (src_offset == end_src_offset) { + src_window = list_entry_next(src_window, list); + scif_init_window_iter(src_window, + &src_win_iter); + } + if (dst_offset == end_dst_offset) { + dst_window = list_entry_next(dst_window, list); + scif_init_window_iter(dst_window, + &dst_win_iter); + } + } + } +error: + return ret; +} + +static int scif_rma_list_dma_copy_wrapper(struct scif_endpt *epd, + struct scif_copy_work *work, + struct dma_chan *chan, off_t loffset) +{ + int src_cache_off, dst_cache_off; + s64 src_offset = work->src_offset, dst_offset = work->dst_offset; + u8 *temp = NULL; + bool src_local = true, dst_local = false; + struct scif_dma_comp_cb *comp_cb; + dma_addr_t src_dma_addr, dst_dma_addr; + int err; + + if (is_dma_copy_aligned(chan->device, 1, 1, 1)) + return _scif_rma_list_dma_copy_aligned(work, chan); + + src_cache_off = src_offset & (L1_CACHE_BYTES - 1); + dst_cache_off = dst_offset & (L1_CACHE_BYTES - 1); + + if (dst_cache_off == src_cache_off) + return scif_rma_list_dma_copy_aligned(work, chan); + + if (work->loopback) + return scif_rma_list_cpu_copy(work); + src_dma_addr = __scif_off_to_dma_addr(work->src_window, src_offset); + dst_dma_addr = __scif_off_to_dma_addr(work->dst_window, dst_offset); + src_local = work->src_window->type == SCIF_WINDOW_SELF; + dst_local = work->dst_window->type == SCIF_WINDOW_SELF; + + dst_local = dst_local; + /* Allocate dma_completion cb */ + comp_cb = kzalloc(sizeof(*comp_cb), GFP_KERNEL); + if (!comp_cb) + goto error; + + work->comp_cb = comp_cb; + comp_cb->cb_cookie = comp_cb; + comp_cb->dma_completion_func = &scif_rma_completion_cb; + + if (work->len + (L1_CACHE_BYTES << 1) < SCIF_KMEM_UNALIGNED_BUF_SIZE) { + comp_cb->is_cache = false; + /* Allocate padding bytes to align to a cache line */ + temp = kmalloc(work->len + (L1_CACHE_BYTES << 1), + GFP_KERNEL); + if (!temp) + goto free_comp_cb; + comp_cb->temp_buf_to_free = temp; + /* kmalloc(..) does not guarantee cache line alignment */ + if (!IS_ALIGNED((u64)temp, L1_CACHE_BYTES)) + temp = PTR_ALIGN(temp, L1_CACHE_BYTES); + } else { + comp_cb->is_cache = true; + temp = kmem_cache_alloc(unaligned_cache, GFP_KERNEL); + if (!temp) + goto free_comp_cb; + comp_cb->temp_buf_to_free = temp; + } + + if (src_local) { + temp += dst_cache_off; + scif_rma_local_cpu_copy(work->src_offset, work->src_window, + temp, work->len, true); + } else { + comp_cb->dst_window = work->dst_window; + comp_cb->dst_offset = work->dst_offset; + work->src_offset = work->src_offset - src_cache_off; + comp_cb->len = work->len; + work->len = ALIGN(work->len + src_cache_off, L1_CACHE_BYTES); + comp_cb->header_padding = src_cache_off; + } + comp_cb->temp_buf = temp; + + err = scif_map_single(&comp_cb->temp_phys, temp, + work->remote_dev, SCIF_KMEM_UNALIGNED_BUF_SIZE); + if (err) + goto free_temp_buf; + comp_cb->sdev = work->remote_dev; + if (scif_rma_list_dma_copy_unaligned(work, temp, chan, src_local) < 0) + goto free_temp_buf; + if (!src_local) + work->fence_type = SCIF_DMA_INTR; + return 0; +free_temp_buf: + if (comp_cb->is_cache) + kmem_cache_free(unaligned_cache, comp_cb->temp_buf_to_free); + else + kfree(comp_cb->temp_buf_to_free); +free_comp_cb: + kfree(comp_cb); +error: + return -ENOMEM; +} + +/** + * scif_rma_copy: + * @epd: end point descriptor. + * @loffset: offset in local registered address space to/from which to copy + * @addr: user virtual address to/from which to copy + * @len: length of range to copy + * @roffset: offset in remote registered address space to/from which to copy + * @flags: flags + * @dir: LOCAL->REMOTE or vice versa. + * @last_chunk: true if this is the last chunk of a larger transfer + * + * Validate parameters, check if src/dst registered ranges requested for copy + * are valid and initiate either CPU or DMA copy. + */ +static int scif_rma_copy(scif_epd_t epd, off_t loffset, unsigned long addr, + size_t len, off_t roffset, int flags, + enum scif_rma_dir dir, bool last_chunk) +{ + struct scif_endpt *ep = (struct scif_endpt *)epd; + struct scif_rma_req remote_req; + struct scif_rma_req req; + struct scif_window *local_window = NULL; + struct scif_window *remote_window = NULL; + struct scif_copy_work copy_work; + bool loopback; + int err = 0; + struct dma_chan *chan; + struct scif_mmu_notif *mmn = NULL; + bool cache = false; + struct device *spdev; + + err = scif_verify_epd(ep); + if (err) + return err; + + if (flags && !(flags & (SCIF_RMA_USECPU | SCIF_RMA_USECACHE | + SCIF_RMA_SYNC | SCIF_RMA_ORDERED))) + return -EINVAL; + + loopback = scifdev_self(ep->remote_dev) ? true : false; + copy_work.fence_type = ((flags & SCIF_RMA_SYNC) && last_chunk) ? + SCIF_DMA_POLL : 0; + copy_work.ordered = !!((flags & SCIF_RMA_ORDERED) && last_chunk); + + /* Use CPU for Mgmt node <-> Mgmt node copies */ + if (loopback && scif_is_mgmt_node()) { + flags |= SCIF_RMA_USECPU; + copy_work.fence_type = 0x0; + } + + cache = scif_is_set_reg_cache(flags); + + remote_req.out_window = &remote_window; + remote_req.offset = roffset; + remote_req.nr_bytes = len; + /* + * If transfer is from local to remote then the remote window + * must be writeable and vice versa. + */ + remote_req.prot = dir == SCIF_LOCAL_TO_REMOTE ? VM_WRITE : VM_READ; + remote_req.type = SCIF_WINDOW_PARTIAL; + remote_req.head = &ep->rma_info.remote_reg_list; + + spdev = scif_get_peer_dev(ep->remote_dev); + if (IS_ERR(spdev)) { + err = PTR_ERR(spdev); + return err; + } + + if (addr && cache) { + mutex_lock(&ep->rma_info.mmn_lock); + mmn = scif_find_mmu_notifier(current->mm, &ep->rma_info); + if (!mmn) + scif_add_mmu_notifier(current->mm, ep); + mutex_unlock(&ep->rma_info.mmn_lock); + if (IS_ERR(mmn)) { + scif_put_peer_dev(spdev); + return PTR_ERR(mmn); + } + cache = cache && !scif_rma_tc_can_cache(ep, len); + } + mutex_lock(&ep->rma_info.rma_lock); + if (addr) { + req.out_window = &local_window; + req.nr_bytes = ALIGN(len + (addr & ~PAGE_MASK), + PAGE_SIZE); + req.va_for_temp = addr & PAGE_MASK; + req.prot = (dir == SCIF_LOCAL_TO_REMOTE ? + VM_READ : VM_WRITE | VM_READ); + /* Does a valid local window exist? */ + if (mmn) { + spin_lock(&ep->rma_info.tc_lock); + req.head = &mmn->tc_reg_list; + err = scif_query_tcw(ep, &req); + spin_unlock(&ep->rma_info.tc_lock); + } + if (!mmn || err) { + err = scif_register_temp(epd, req.va_for_temp, + req.nr_bytes, req.prot, + &loffset, &local_window); + if (err) { + mutex_unlock(&ep->rma_info.rma_lock); + goto error; + } + if (!cache) + goto skip_cache; + atomic_inc(&ep->rma_info.tcw_refcount); + atomic_add_return(local_window->nr_pages, + &ep->rma_info.tcw_total_pages); + if (mmn) { + spin_lock(&ep->rma_info.tc_lock); + scif_insert_tcw(local_window, + &mmn->tc_reg_list); + spin_unlock(&ep->rma_info.tc_lock); + } + } +skip_cache: + loffset = local_window->offset + + (addr - local_window->va_for_temp); + } else { + req.out_window = &local_window; + req.offset = loffset; + /* + * If transfer is from local to remote then the self window + * must be readable and vice versa. + */ + req.prot = dir == SCIF_LOCAL_TO_REMOTE ? VM_READ : VM_WRITE; + req.nr_bytes = len; + req.type = SCIF_WINDOW_PARTIAL; + req.head = &ep->rma_info.reg_list; + /* Does a valid local window exist? */ + err = scif_query_window(&req); + if (err) { + mutex_unlock(&ep->rma_info.rma_lock); + goto error; + } + } + + /* Does a valid remote window exist? */ + err = scif_query_window(&remote_req); + if (err) { + mutex_unlock(&ep->rma_info.rma_lock); + goto error; + } + + /* + * Prepare copy_work for submitting work to the DMA kernel thread + * or CPU copy routine. + */ + copy_work.len = len; + copy_work.loopback = loopback; + copy_work.remote_dev = ep->remote_dev; + if (dir == SCIF_LOCAL_TO_REMOTE) { + copy_work.src_offset = loffset; + copy_work.src_window = local_window; + copy_work.dst_offset = roffset; + copy_work.dst_window = remote_window; + } else { + copy_work.src_offset = roffset; + copy_work.src_window = remote_window; + copy_work.dst_offset = loffset; + copy_work.dst_window = local_window; + } + + if (flags & SCIF_RMA_USECPU) { + scif_rma_list_cpu_copy(©_work); + } else { + chan = ep->rma_info.dma_chan; + err = scif_rma_list_dma_copy_wrapper(epd, ©_work, + chan, loffset); + } + if (addr && !cache) + atomic_inc(&ep->rma_info.tw_refcount); + + mutex_unlock(&ep->rma_info.rma_lock); + + if (last_chunk) { + struct scif_dev *rdev = ep->remote_dev; + + if (copy_work.fence_type == SCIF_DMA_POLL) + err = scif_drain_dma_poll(rdev->sdev, + ep->rma_info.dma_chan); + else if (copy_work.fence_type == SCIF_DMA_INTR) + err = scif_drain_dma_intr(rdev->sdev, + ep->rma_info.dma_chan); + } + + if (addr && !cache) + scif_queue_for_cleanup(local_window, &scif_info.rma); + scif_put_peer_dev(spdev); + return err; +error: + if (err) { + if (addr && local_window && !cache) + scif_destroy_window(ep, local_window); + dev_err(scif_info.mdev.this_device, + "%s %d err %d len 0x%lx\n", + __func__, __LINE__, err, len); + } + scif_put_peer_dev(spdev); + return err; +} + +int scif_readfrom(scif_epd_t epd, off_t loffset, size_t len, + off_t roffset, int flags) +{ + int err; + + dev_dbg(scif_info.mdev.this_device, + "SCIFAPI readfrom: ep %p loffset 0x%lx len 0x%lx offset 0x%lx flags 0x%x\n", + epd, loffset, len, roffset, flags); + if (scif_unaligned(loffset, roffset)) { + while (len > SCIF_MAX_UNALIGNED_BUF_SIZE) { + err = scif_rma_copy(epd, loffset, 0x0, + SCIF_MAX_UNALIGNED_BUF_SIZE, + roffset, flags, + SCIF_REMOTE_TO_LOCAL, false); + if (err) + goto readfrom_err; + loffset += SCIF_MAX_UNALIGNED_BUF_SIZE; + roffset += SCIF_MAX_UNALIGNED_BUF_SIZE; + len -= SCIF_MAX_UNALIGNED_BUF_SIZE; + } + } + err = scif_rma_copy(epd, loffset, 0x0, len, + roffset, flags, SCIF_REMOTE_TO_LOCAL, true); +readfrom_err: + return err; +} +EXPORT_SYMBOL_GPL(scif_readfrom); + +int scif_writeto(scif_epd_t epd, off_t loffset, size_t len, + off_t roffset, int flags) +{ + int err; + + dev_dbg(scif_info.mdev.this_device, + "SCIFAPI writeto: ep %p loffset 0x%lx len 0x%lx roffset 0x%lx flags 0x%x\n", + epd, loffset, len, roffset, flags); + if (scif_unaligned(loffset, roffset)) { + while (len > SCIF_MAX_UNALIGNED_BUF_SIZE) { + err = scif_rma_copy(epd, loffset, 0x0, + SCIF_MAX_UNALIGNED_BUF_SIZE, + roffset, flags, + SCIF_LOCAL_TO_REMOTE, false); + if (err) + goto writeto_err; + loffset += SCIF_MAX_UNALIGNED_BUF_SIZE; + roffset += SCIF_MAX_UNALIGNED_BUF_SIZE; + len -= SCIF_MAX_UNALIGNED_BUF_SIZE; + } + } + err = scif_rma_copy(epd, loffset, 0x0, len, + roffset, flags, SCIF_LOCAL_TO_REMOTE, true); +writeto_err: + return err; +} +EXPORT_SYMBOL_GPL(scif_writeto); + +int scif_vreadfrom(scif_epd_t epd, void *addr, size_t len, + off_t roffset, int flags) +{ + int err; + + dev_dbg(scif_info.mdev.this_device, + "SCIFAPI vreadfrom: ep %p addr %p len 0x%lx roffset 0x%lx flags 0x%x\n", + epd, addr, len, roffset, flags); + if (scif_unaligned((off_t __force)addr, roffset)) { + if (len > SCIF_MAX_UNALIGNED_BUF_SIZE) + flags &= ~SCIF_RMA_USECACHE; + + while (len > SCIF_MAX_UNALIGNED_BUF_SIZE) { + err = scif_rma_copy(epd, 0, (u64)addr, + SCIF_MAX_UNALIGNED_BUF_SIZE, + roffset, flags, + SCIF_REMOTE_TO_LOCAL, false); + if (err) + goto vreadfrom_err; + addr += SCIF_MAX_UNALIGNED_BUF_SIZE; + roffset += SCIF_MAX_UNALIGNED_BUF_SIZE; + len -= SCIF_MAX_UNALIGNED_BUF_SIZE; + } + } + err = scif_rma_copy(epd, 0, (u64)addr, len, + roffset, flags, SCIF_REMOTE_TO_LOCAL, true); +vreadfrom_err: + return err; +} +EXPORT_SYMBOL_GPL(scif_vreadfrom); + +int scif_vwriteto(scif_epd_t epd, void *addr, size_t len, + off_t roffset, int flags) +{ + int err; + + dev_dbg(scif_info.mdev.this_device, + "SCIFAPI vwriteto: ep %p addr %p len 0x%lx roffset 0x%lx flags 0x%x\n", + epd, addr, len, roffset, flags); + if (scif_unaligned((off_t __force)addr, roffset)) { + if (len > SCIF_MAX_UNALIGNED_BUF_SIZE) + flags &= ~SCIF_RMA_USECACHE; + + while (len > SCIF_MAX_UNALIGNED_BUF_SIZE) { + err = scif_rma_copy(epd, 0, (u64)addr, + SCIF_MAX_UNALIGNED_BUF_SIZE, + roffset, flags, + SCIF_LOCAL_TO_REMOTE, false); + if (err) + goto vwriteto_err; + addr += SCIF_MAX_UNALIGNED_BUF_SIZE; + roffset += SCIF_MAX_UNALIGNED_BUF_SIZE; + len -= SCIF_MAX_UNALIGNED_BUF_SIZE; + } + } + err = scif_rma_copy(epd, 0, (u64)addr, len, + roffset, flags, SCIF_LOCAL_TO_REMOTE, true); +vwriteto_err: + return err; +} +EXPORT_SYMBOL_GPL(scif_vwriteto); diff --git a/drivers/misc/mic/scif/scif_epd.c b/drivers/misc/mic/scif/scif_epd.c index b4bfbb08a8e3..00e5d6d66e7b 100644 --- a/drivers/misc/mic/scif/scif_epd.c +++ b/drivers/misc/mic/scif/scif_epd.c @@ -65,14 +65,14 @@ void scif_teardown_ep(void *endpt) void scif_add_epd_to_zombie_list(struct scif_endpt *ep, bool eplock_held) { if (!eplock_held) - spin_lock(&scif_info.eplock); + mutex_lock(&scif_info.eplock); spin_lock(&ep->lock); ep->state = SCIFEP_ZOMBIE; spin_unlock(&ep->lock); list_add_tail(&ep->list, &scif_info.zombie); scif_info.nr_zombies++; if (!eplock_held) - spin_unlock(&scif_info.eplock); + mutex_unlock(&scif_info.eplock); schedule_work(&scif_info.misc_work); } @@ -81,16 +81,15 @@ static struct scif_endpt *scif_find_listen_ep(u16 port) struct scif_endpt *ep = NULL; struct list_head *pos, *tmpq; - spin_lock(&scif_info.eplock); + mutex_lock(&scif_info.eplock); list_for_each_safe(pos, tmpq, &scif_info.listen) { ep = list_entry(pos, struct scif_endpt, list); if (ep->port.port == port) { - spin_lock(&ep->lock); - spin_unlock(&scif_info.eplock); + mutex_unlock(&scif_info.eplock); return ep; } } - spin_unlock(&scif_info.eplock); + mutex_unlock(&scif_info.eplock); return NULL; } @@ -99,14 +98,17 @@ void scif_cleanup_zombie_epd(void) struct list_head *pos, *tmpq; struct scif_endpt *ep; - spin_lock(&scif_info.eplock); + mutex_lock(&scif_info.eplock); list_for_each_safe(pos, tmpq, &scif_info.zombie) { ep = list_entry(pos, struct scif_endpt, list); - list_del(pos); - scif_info.nr_zombies--; - kfree(ep); + if (scif_rma_ep_can_uninit(ep)) { + list_del(pos); + scif_info.nr_zombies--; + put_iova_domain(&ep->rma_info.iovad); + kfree(ep); + } } - spin_unlock(&scif_info.eplock); + mutex_unlock(&scif_info.eplock); } /** @@ -137,6 +139,8 @@ void scif_cnctreq(struct scif_dev *scifdev, struct scifmsg *msg) if (!ep) /* Send reject due to no listening ports */ goto conreq_sendrej_free; + else + spin_lock(&ep->lock); if (ep->backlog <= ep->conreqcnt) { /* Send reject due to too many pending requests */ diff --git a/drivers/misc/mic/scif/scif_epd.h b/drivers/misc/mic/scif/scif_epd.h index 331322a25213..1771d7a9b8d0 100644 --- a/drivers/misc/mic/scif/scif_epd.h +++ b/drivers/misc/mic/scif/scif_epd.h @@ -96,7 +96,11 @@ struct scif_endpt_qp_info { * @conn_port: Connection port * @conn_err: Errors during connection * @conn_async_state: Async connection + * @conn_pend_wq: Used by poll while waiting for incoming connections * @conn_list: List of async connection requests + * @rma_info: Information for triggering SCIF RMA and DMA operations + * @mmu_list: link to list of MMU notifier cleanup work + * @anon: anonymous file for use in kernel mode scif poll */ struct scif_endpt { enum scif_epd_state state; @@ -125,7 +129,11 @@ struct scif_endpt { struct scif_port_id conn_port; int conn_err; int conn_async_state; + wait_queue_head_t conn_pend_wq; struct list_head conn_list; + struct scif_endpt_rma_info rma_info; + struct list_head mmu_list; + struct file *anon; }; static inline int scifdev_alive(struct scif_endpt *ep) @@ -133,6 +141,43 @@ static inline int scifdev_alive(struct scif_endpt *ep) return _scifdev_alive(ep->remote_dev); } +/* + * scif_verify_epd: + * ep: SCIF endpoint + * + * Checks several generic error conditions and returns the + * appropriate error. + */ +static inline int scif_verify_epd(struct scif_endpt *ep) +{ + if (ep->state == SCIFEP_DISCONNECTED) + return -ECONNRESET; + + if (ep->state != SCIFEP_CONNECTED) + return -ENOTCONN; + + if (!scifdev_alive(ep)) + return -ENODEV; + + return 0; +} + +static inline int scif_anon_inode_getfile(scif_epd_t epd) +{ + epd->anon = anon_inode_getfile("scif", &scif_anon_fops, NULL, 0); + if (IS_ERR(epd->anon)) + return PTR_ERR(epd->anon); + return 0; +} + +static inline void scif_anon_inode_fput(scif_epd_t epd) +{ + if (epd->anon) { + fput(epd->anon); + epd->anon = NULL; + } +} + void scif_cleanup_zombie_epd(void); void scif_teardown_ep(void *endpt); void scif_cleanup_ep_qp(struct scif_endpt *ep); @@ -157,4 +202,9 @@ void scif_clientsend(struct scif_dev *scifdev, struct scifmsg *msg); void scif_clientrcvd(struct scif_dev *scifdev, struct scifmsg *msg); int __scif_connect(scif_epd_t epd, struct scif_port_id *dst, bool non_block); int __scif_flush(scif_epd_t epd); +int scif_mmap(struct vm_area_struct *vma, scif_epd_t epd); +unsigned int __scif_pollfd(struct file *f, poll_table *wait, + struct scif_endpt *ep); +int __scif_pin_pages(void *addr, size_t len, int *out_prot, + int map_flags, scif_pinned_pages_t *pages); #endif /* SCIF_EPD_H */ diff --git a/drivers/misc/mic/scif/scif_fd.c b/drivers/misc/mic/scif/scif_fd.c index eccf7e7135f9..f7e826142a72 100644 --- a/drivers/misc/mic/scif/scif_fd.c +++ b/drivers/misc/mic/scif/scif_fd.c @@ -34,6 +34,20 @@ static int scif_fdclose(struct inode *inode, struct file *f) return scif_close(priv); } +static int scif_fdmmap(struct file *f, struct vm_area_struct *vma) +{ + struct scif_endpt *priv = f->private_data; + + return scif_mmap(vma, priv); +} + +static unsigned int scif_fdpoll(struct file *f, poll_table *wait) +{ + struct scif_endpt *priv = f->private_data; + + return __scif_pollfd(f, wait, priv); +} + static int scif_fdflush(struct file *f, fl_owner_t id) { struct scif_endpt *ep = f->private_data; @@ -140,12 +154,12 @@ static long scif_fdioctl(struct file *f, unsigned int cmd, unsigned long arg) * Add to the list of user mode eps where the second half * of the accept is not yet completed. */ - spin_lock(&scif_info.eplock); + mutex_lock(&scif_info.eplock); list_add_tail(&((*ep)->miacceptlist), &scif_info.uaccept); list_add_tail(&((*ep)->liacceptlist), &priv->li_accept); (*ep)->listenep = priv; priv->acceptcnt++; - spin_unlock(&scif_info.eplock); + mutex_unlock(&scif_info.eplock); return 0; } @@ -163,7 +177,7 @@ static long scif_fdioctl(struct file *f, unsigned int cmd, unsigned long arg) return -EFAULT; /* Remove form the user accept queue */ - spin_lock(&scif_info.eplock); + mutex_lock(&scif_info.eplock); list_for_each_safe(pos, tmpq, &scif_info.uaccept) { tmpep = list_entry(pos, struct scif_endpt, miacceptlist); @@ -175,7 +189,7 @@ static long scif_fdioctl(struct file *f, unsigned int cmd, unsigned long arg) } if (!fep) { - spin_unlock(&scif_info.eplock); + mutex_unlock(&scif_info.eplock); return -ENOENT; } @@ -190,9 +204,10 @@ static long scif_fdioctl(struct file *f, unsigned int cmd, unsigned long arg) } } - spin_unlock(&scif_info.eplock); + mutex_unlock(&scif_info.eplock); /* Free the resources automatically created from the open. */ + scif_anon_inode_fput(priv); scif_teardown_ep(priv); scif_add_epd_to_zombie_list(priv, !SCIF_EPLOCK_HELD); f->private_data = newep; @@ -290,6 +305,157 @@ getnodes_err1: getnodes_err2: return err; } + case SCIF_REG: + { + struct scif_endpt *priv = f->private_data; + struct scifioctl_reg reg; + off_t ret; + + if (copy_from_user(®, argp, sizeof(reg))) { + err = -EFAULT; + goto reg_err; + } + if (reg.flags & SCIF_MAP_KERNEL) { + err = -EINVAL; + goto reg_err; + } + ret = scif_register(priv, (void *)reg.addr, reg.len, + reg.offset, reg.prot, reg.flags); + if (ret < 0) { + err = (int)ret; + goto reg_err; + } + + if (copy_to_user(&((struct scifioctl_reg __user *)argp) + ->out_offset, &ret, sizeof(reg.out_offset))) { + err = -EFAULT; + goto reg_err; + } + err = 0; +reg_err: + scif_err_debug(err, "scif_register"); + return err; + } + case SCIF_UNREG: + { + struct scif_endpt *priv = f->private_data; + struct scifioctl_unreg unreg; + + if (copy_from_user(&unreg, argp, sizeof(unreg))) { + err = -EFAULT; + goto unreg_err; + } + err = scif_unregister(priv, unreg.offset, unreg.len); +unreg_err: + scif_err_debug(err, "scif_unregister"); + return err; + } + case SCIF_READFROM: + { + struct scif_endpt *priv = f->private_data; + struct scifioctl_copy copy; + + if (copy_from_user(©, argp, sizeof(copy))) { + err = -EFAULT; + goto readfrom_err; + } + err = scif_readfrom(priv, copy.loffset, copy.len, copy.roffset, + copy.flags); +readfrom_err: + scif_err_debug(err, "scif_readfrom"); + return err; + } + case SCIF_WRITETO: + { + struct scif_endpt *priv = f->private_data; + struct scifioctl_copy copy; + + if (copy_from_user(©, argp, sizeof(copy))) { + err = -EFAULT; + goto writeto_err; + } + err = scif_writeto(priv, copy.loffset, copy.len, copy.roffset, + copy.flags); +writeto_err: + scif_err_debug(err, "scif_writeto"); + return err; + } + case SCIF_VREADFROM: + { + struct scif_endpt *priv = f->private_data; + struct scifioctl_copy copy; + + if (copy_from_user(©, argp, sizeof(copy))) { + err = -EFAULT; + goto vreadfrom_err; + } + err = scif_vreadfrom(priv, (void __force *)copy.addr, copy.len, + copy.roffset, copy.flags); +vreadfrom_err: + scif_err_debug(err, "scif_vreadfrom"); + return err; + } + case SCIF_VWRITETO: + { + struct scif_endpt *priv = f->private_data; + struct scifioctl_copy copy; + + if (copy_from_user(©, argp, sizeof(copy))) { + err = -EFAULT; + goto vwriteto_err; + } + err = scif_vwriteto(priv, (void __force *)copy.addr, copy.len, + copy.roffset, copy.flags); +vwriteto_err: + scif_err_debug(err, "scif_vwriteto"); + return err; + } + case SCIF_FENCE_MARK: + { + struct scif_endpt *priv = f->private_data; + struct scifioctl_fence_mark mark; + int tmp_mark = 0; + + if (copy_from_user(&mark, argp, sizeof(mark))) { + err = -EFAULT; + goto fence_mark_err; + } + err = scif_fence_mark(priv, mark.flags, &tmp_mark); + if (err) + goto fence_mark_err; + if (copy_to_user((void __user *)mark.mark, &tmp_mark, + sizeof(tmp_mark))) { + err = -EFAULT; + goto fence_mark_err; + } +fence_mark_err: + scif_err_debug(err, "scif_fence_mark"); + return err; + } + case SCIF_FENCE_WAIT: + { + struct scif_endpt *priv = f->private_data; + + err = scif_fence_wait(priv, arg); + scif_err_debug(err, "scif_fence_wait"); + return err; + } + case SCIF_FENCE_SIGNAL: + { + struct scif_endpt *priv = f->private_data; + struct scifioctl_fence_signal signal; + + if (copy_from_user(&signal, argp, sizeof(signal))) { + err = -EFAULT; + goto fence_signal_err; + } + + err = scif_fence_signal(priv, signal.loff, signal.lval, + signal.roff, signal.rval, signal.flags); +fence_signal_err: + scif_err_debug(err, "scif_fence_signal"); + return err; + } } return -EINVAL; } @@ -298,6 +464,8 @@ const struct file_operations scif_fops = { .open = scif_fdopen, .release = scif_fdclose, .unlocked_ioctl = scif_fdioctl, + .mmap = scif_fdmmap, + .poll = scif_fdpoll, .flush = scif_fdflush, .owner = THIS_MODULE, }; diff --git a/drivers/misc/mic/scif/scif_fence.c b/drivers/misc/mic/scif/scif_fence.c new file mode 100644 index 000000000000..7f2c96f57066 --- /dev/null +++ b/drivers/misc/mic/scif/scif_fence.c @@ -0,0 +1,771 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Intel SCIF driver. + * + */ + +#include "scif_main.h" + +/** + * scif_recv_mark: Handle SCIF_MARK request + * @msg: Interrupt message + * + * The peer has requested a mark. + */ +void scif_recv_mark(struct scif_dev *scifdev, struct scifmsg *msg) +{ + struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0]; + int mark, err; + + err = _scif_fence_mark(ep, &mark); + if (err) + msg->uop = SCIF_MARK_NACK; + else + msg->uop = SCIF_MARK_ACK; + msg->payload[0] = ep->remote_ep; + msg->payload[2] = mark; + scif_nodeqp_send(ep->remote_dev, msg); +} + +/** + * scif_recv_mark_resp: Handle SCIF_MARK_(N)ACK messages. + * @msg: Interrupt message + * + * The peer has responded to a SCIF_MARK message. + */ +void scif_recv_mark_resp(struct scif_dev *scifdev, struct scifmsg *msg) +{ + struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0]; + struct scif_fence_info *fence_req = + (struct scif_fence_info *)msg->payload[1]; + + mutex_lock(&ep->rma_info.rma_lock); + if (msg->uop == SCIF_MARK_ACK) { + fence_req->state = OP_COMPLETED; + fence_req->dma_mark = (int)msg->payload[2]; + } else { + fence_req->state = OP_FAILED; + } + mutex_unlock(&ep->rma_info.rma_lock); + complete(&fence_req->comp); +} + +/** + * scif_recv_wait: Handle SCIF_WAIT request + * @msg: Interrupt message + * + * The peer has requested waiting on a fence. + */ +void scif_recv_wait(struct scif_dev *scifdev, struct scifmsg *msg) +{ + struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0]; + struct scif_remote_fence_info *fence; + + /* + * Allocate structure for remote fence information and + * send a NACK if the allocation failed. The peer will + * return ENOMEM upon receiving a NACK. + */ + fence = kmalloc(sizeof(*fence), GFP_KERNEL); + if (!fence) { + msg->payload[0] = ep->remote_ep; + msg->uop = SCIF_WAIT_NACK; + scif_nodeqp_send(ep->remote_dev, msg); + return; + } + + /* Prepare the fence request */ + memcpy(&fence->msg, msg, sizeof(struct scifmsg)); + INIT_LIST_HEAD(&fence->list); + + /* Insert to the global remote fence request list */ + mutex_lock(&scif_info.fencelock); + atomic_inc(&ep->rma_info.fence_refcount); + list_add_tail(&fence->list, &scif_info.fence); + mutex_unlock(&scif_info.fencelock); + + schedule_work(&scif_info.misc_work); +} + +/** + * scif_recv_wait_resp: Handle SCIF_WAIT_(N)ACK messages. + * @msg: Interrupt message + * + * The peer has responded to a SCIF_WAIT message. + */ +void scif_recv_wait_resp(struct scif_dev *scifdev, struct scifmsg *msg) +{ + struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0]; + struct scif_fence_info *fence_req = + (struct scif_fence_info *)msg->payload[1]; + + mutex_lock(&ep->rma_info.rma_lock); + if (msg->uop == SCIF_WAIT_ACK) + fence_req->state = OP_COMPLETED; + else + fence_req->state = OP_FAILED; + mutex_unlock(&ep->rma_info.rma_lock); + complete(&fence_req->comp); +} + +/** + * scif_recv_sig_local: Handle SCIF_SIG_LOCAL request + * @msg: Interrupt message + * + * The peer has requested a signal on a local offset. + */ +void scif_recv_sig_local(struct scif_dev *scifdev, struct scifmsg *msg) +{ + struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0]; + int err; + + err = scif_prog_signal(ep, msg->payload[1], msg->payload[2], + SCIF_WINDOW_SELF); + if (err) + msg->uop = SCIF_SIG_NACK; + else + msg->uop = SCIF_SIG_ACK; + msg->payload[0] = ep->remote_ep; + scif_nodeqp_send(ep->remote_dev, msg); +} + +/** + * scif_recv_sig_remote: Handle SCIF_SIGNAL_REMOTE request + * @msg: Interrupt message + * + * The peer has requested a signal on a remote offset. + */ +void scif_recv_sig_remote(struct scif_dev *scifdev, struct scifmsg *msg) +{ + struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0]; + int err; + + err = scif_prog_signal(ep, msg->payload[1], msg->payload[2], + SCIF_WINDOW_PEER); + if (err) + msg->uop = SCIF_SIG_NACK; + else + msg->uop = SCIF_SIG_ACK; + msg->payload[0] = ep->remote_ep; + scif_nodeqp_send(ep->remote_dev, msg); +} + +/** + * scif_recv_sig_resp: Handle SCIF_SIG_(N)ACK messages. + * @msg: Interrupt message + * + * The peer has responded to a signal request. + */ +void scif_recv_sig_resp(struct scif_dev *scifdev, struct scifmsg *msg) +{ + struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0]; + struct scif_fence_info *fence_req = + (struct scif_fence_info *)msg->payload[3]; + + mutex_lock(&ep->rma_info.rma_lock); + if (msg->uop == SCIF_SIG_ACK) + fence_req->state = OP_COMPLETED; + else + fence_req->state = OP_FAILED; + mutex_unlock(&ep->rma_info.rma_lock); + complete(&fence_req->comp); +} + +static inline void *scif_get_local_va(off_t off, struct scif_window *window) +{ + struct page **pages = window->pinned_pages->pages; + int page_nr = (off - window->offset) >> PAGE_SHIFT; + off_t page_off = off & ~PAGE_MASK; + + return page_address(pages[page_nr]) + page_off; +} + +static void scif_prog_signal_cb(void *arg) +{ + struct scif_status *status = arg; + + dma_pool_free(status->ep->remote_dev->signal_pool, status, + status->src_dma_addr); +} + +static int _scif_prog_signal(scif_epd_t epd, dma_addr_t dst, u64 val) +{ + struct scif_endpt *ep = (struct scif_endpt *)epd; + struct dma_chan *chan = ep->rma_info.dma_chan; + struct dma_device *ddev = chan->device; + bool x100 = !is_dma_copy_aligned(chan->device, 1, 1, 1); + struct dma_async_tx_descriptor *tx; + struct scif_status *status = NULL; + dma_addr_t src; + dma_cookie_t cookie; + int err; + + tx = ddev->device_prep_dma_memcpy(chan, 0, 0, 0, DMA_PREP_FENCE); + if (!tx) { + err = -ENOMEM; + dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n", + __func__, __LINE__, err); + goto alloc_fail; + } + cookie = tx->tx_submit(tx); + if (dma_submit_error(cookie)) { + err = (int)cookie; + dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n", + __func__, __LINE__, err); + goto alloc_fail; + } + dma_async_issue_pending(chan); + if (x100) { + /* + * For X100 use the status descriptor to write the value to + * the destination. + */ + tx = ddev->device_prep_dma_imm_data(chan, dst, val, 0); + } else { + status = dma_pool_alloc(ep->remote_dev->signal_pool, GFP_KERNEL, + &src); + if (!status) { + err = -ENOMEM; + dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n", + __func__, __LINE__, err); + goto alloc_fail; + } + status->val = val; + status->src_dma_addr = src; + status->ep = ep; + src += offsetof(struct scif_status, val); + tx = ddev->device_prep_dma_memcpy(chan, dst, src, sizeof(val), + DMA_PREP_INTERRUPT); + } + if (!tx) { + err = -ENOMEM; + dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n", + __func__, __LINE__, err); + goto dma_fail; + } + if (!x100) { + tx->callback = scif_prog_signal_cb; + tx->callback_param = status; + } + cookie = tx->tx_submit(tx); + if (dma_submit_error(cookie)) { + err = -EIO; + dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n", + __func__, __LINE__, err); + goto dma_fail; + } + dma_async_issue_pending(chan); + return 0; +dma_fail: + if (!x100) + dma_pool_free(ep->remote_dev->signal_pool, status, + status->src_dma_addr); +alloc_fail: + return err; +} + +/* + * scif_prog_signal: + * @epd - Endpoint Descriptor + * @offset - registered address to write @val to + * @val - Value to be written at @offset + * @type - Type of the window. + * + * Arrange to write a value to the registered offset after ensuring that the + * offset provided is indeed valid. + */ +int scif_prog_signal(scif_epd_t epd, off_t offset, u64 val, + enum scif_window_type type) +{ + struct scif_endpt *ep = (struct scif_endpt *)epd; + struct scif_window *window = NULL; + struct scif_rma_req req; + dma_addr_t dst_dma_addr; + int err; + + mutex_lock(&ep->rma_info.rma_lock); + req.out_window = &window; + req.offset = offset; + req.nr_bytes = sizeof(u64); + req.prot = SCIF_PROT_WRITE; + req.type = SCIF_WINDOW_SINGLE; + if (type == SCIF_WINDOW_SELF) + req.head = &ep->rma_info.reg_list; + else + req.head = &ep->rma_info.remote_reg_list; + /* Does a valid window exist? */ + err = scif_query_window(&req); + if (err) { + dev_err(scif_info.mdev.this_device, + "%s %d err %d\n", __func__, __LINE__, err); + goto unlock_ret; + } + + if (scif_is_mgmt_node() && scifdev_self(ep->remote_dev)) { + u64 *dst_virt; + + if (type == SCIF_WINDOW_SELF) + dst_virt = scif_get_local_va(offset, window); + else + dst_virt = + scif_get_local_va(offset, (struct scif_window *) + window->peer_window); + *dst_virt = val; + } else { + dst_dma_addr = __scif_off_to_dma_addr(window, offset); + err = _scif_prog_signal(epd, dst_dma_addr, val); + } +unlock_ret: + mutex_unlock(&ep->rma_info.rma_lock); + return err; +} + +static int _scif_fence_wait(scif_epd_t epd, int mark) +{ + struct scif_endpt *ep = (struct scif_endpt *)epd; + dma_cookie_t cookie = mark & ~SCIF_REMOTE_FENCE; + int err; + + /* Wait for DMA callback in scif_fence_mark_cb(..) */ + err = wait_event_interruptible_timeout(ep->rma_info.markwq, + dma_async_is_tx_complete( + ep->rma_info.dma_chan, + cookie, NULL, NULL) == + DMA_COMPLETE, + SCIF_NODE_ALIVE_TIMEOUT); + if (!err) + err = -ETIMEDOUT; + else if (err > 0) + err = 0; + return err; +} + +/** + * scif_rma_handle_remote_fences: + * + * This routine services remote fence requests. + */ +void scif_rma_handle_remote_fences(void) +{ + struct list_head *item, *tmp; + struct scif_remote_fence_info *fence; + struct scif_endpt *ep; + int mark, err; + + might_sleep(); + mutex_lock(&scif_info.fencelock); + list_for_each_safe(item, tmp, &scif_info.fence) { + fence = list_entry(item, struct scif_remote_fence_info, + list); + /* Remove fence from global list */ + list_del(&fence->list); + + /* Initiate the fence operation */ + ep = (struct scif_endpt *)fence->msg.payload[0]; + mark = fence->msg.payload[2]; + err = _scif_fence_wait(ep, mark); + if (err) + fence->msg.uop = SCIF_WAIT_NACK; + else + fence->msg.uop = SCIF_WAIT_ACK; + fence->msg.payload[0] = ep->remote_ep; + scif_nodeqp_send(ep->remote_dev, &fence->msg); + kfree(fence); + if (!atomic_sub_return(1, &ep->rma_info.fence_refcount)) + schedule_work(&scif_info.misc_work); + } + mutex_unlock(&scif_info.fencelock); +} + +static int _scif_send_fence(scif_epd_t epd, int uop, int mark, int *out_mark) +{ + int err; + struct scifmsg msg; + struct scif_fence_info *fence_req; + struct scif_endpt *ep = (struct scif_endpt *)epd; + + fence_req = kmalloc(sizeof(*fence_req), GFP_KERNEL); + if (!fence_req) { + err = -ENOMEM; + goto error; + } + + fence_req->state = OP_IN_PROGRESS; + init_completion(&fence_req->comp); + + msg.src = ep->port; + msg.uop = uop; + msg.payload[0] = ep->remote_ep; + msg.payload[1] = (u64)fence_req; + if (uop == SCIF_WAIT) + msg.payload[2] = mark; + spin_lock(&ep->lock); + if (ep->state == SCIFEP_CONNECTED) + err = scif_nodeqp_send(ep->remote_dev, &msg); + else + err = -ENOTCONN; + spin_unlock(&ep->lock); + if (err) + goto error_free; +retry: + /* Wait for a SCIF_WAIT_(N)ACK message */ + err = wait_for_completion_timeout(&fence_req->comp, + SCIF_NODE_ALIVE_TIMEOUT); + if (!err && scifdev_alive(ep)) + goto retry; + if (!err) + err = -ENODEV; + if (err > 0) + err = 0; + mutex_lock(&ep->rma_info.rma_lock); + if (err < 0) { + if (fence_req->state == OP_IN_PROGRESS) + fence_req->state = OP_FAILED; + } + if (fence_req->state == OP_FAILED && !err) + err = -ENOMEM; + if (uop == SCIF_MARK && fence_req->state == OP_COMPLETED) + *out_mark = SCIF_REMOTE_FENCE | fence_req->dma_mark; + mutex_unlock(&ep->rma_info.rma_lock); +error_free: + kfree(fence_req); +error: + return err; +} + +/** + * scif_send_fence_mark: + * @epd: end point descriptor. + * @out_mark: Output DMA mark reported by peer. + * + * Send a remote fence mark request. + */ +static int scif_send_fence_mark(scif_epd_t epd, int *out_mark) +{ + return _scif_send_fence(epd, SCIF_MARK, 0, out_mark); +} + +/** + * scif_send_fence_wait: + * @epd: end point descriptor. + * @mark: DMA mark to wait for. + * + * Send a remote fence wait request. + */ +static int scif_send_fence_wait(scif_epd_t epd, int mark) +{ + return _scif_send_fence(epd, SCIF_WAIT, mark, NULL); +} + +static int _scif_send_fence_signal_wait(struct scif_endpt *ep, + struct scif_fence_info *fence_req) +{ + int err; + +retry: + /* Wait for a SCIF_SIG_(N)ACK message */ + err = wait_for_completion_timeout(&fence_req->comp, + SCIF_NODE_ALIVE_TIMEOUT); + if (!err && scifdev_alive(ep)) + goto retry; + if (!err) + err = -ENODEV; + if (err > 0) + err = 0; + if (err < 0) { + mutex_lock(&ep->rma_info.rma_lock); + if (fence_req->state == OP_IN_PROGRESS) + fence_req->state = OP_FAILED; + mutex_unlock(&ep->rma_info.rma_lock); + } + if (fence_req->state == OP_FAILED && !err) + err = -ENXIO; + return err; +} + +/** + * scif_send_fence_signal: + * @epd - endpoint descriptor + * @loff - local offset + * @lval - local value to write to loffset + * @roff - remote offset + * @rval - remote value to write to roffset + * @flags - flags + * + * Sends a remote fence signal request + */ +static int scif_send_fence_signal(scif_epd_t epd, off_t roff, u64 rval, + off_t loff, u64 lval, int flags) +{ + int err = 0; + struct scifmsg msg; + struct scif_fence_info *fence_req; + struct scif_endpt *ep = (struct scif_endpt *)epd; + + fence_req = kmalloc(sizeof(*fence_req), GFP_KERNEL); + if (!fence_req) { + err = -ENOMEM; + goto error; + } + + fence_req->state = OP_IN_PROGRESS; + init_completion(&fence_req->comp); + msg.src = ep->port; + if (flags & SCIF_SIGNAL_LOCAL) { + msg.uop = SCIF_SIG_LOCAL; + msg.payload[0] = ep->remote_ep; + msg.payload[1] = roff; + msg.payload[2] = rval; + msg.payload[3] = (u64)fence_req; + spin_lock(&ep->lock); + if (ep->state == SCIFEP_CONNECTED) + err = scif_nodeqp_send(ep->remote_dev, &msg); + else + err = -ENOTCONN; + spin_unlock(&ep->lock); + if (err) + goto error_free; + err = _scif_send_fence_signal_wait(ep, fence_req); + if (err) + goto error_free; + } + fence_req->state = OP_IN_PROGRESS; + + if (flags & SCIF_SIGNAL_REMOTE) { + msg.uop = SCIF_SIG_REMOTE; + msg.payload[0] = ep->remote_ep; + msg.payload[1] = loff; + msg.payload[2] = lval; + msg.payload[3] = (u64)fence_req; + spin_lock(&ep->lock); + if (ep->state == SCIFEP_CONNECTED) + err = scif_nodeqp_send(ep->remote_dev, &msg); + else + err = -ENOTCONN; + spin_unlock(&ep->lock); + if (err) + goto error_free; + err = _scif_send_fence_signal_wait(ep, fence_req); + } +error_free: + kfree(fence_req); +error: + return err; +} + +static void scif_fence_mark_cb(void *arg) +{ + struct scif_endpt *ep = (struct scif_endpt *)arg; + + wake_up_interruptible(&ep->rma_info.markwq); + atomic_dec(&ep->rma_info.fence_refcount); +} + +/* + * _scif_fence_mark: + * + * @epd - endpoint descriptor + * Set up a mark for this endpoint and return the value of the mark. + */ +int _scif_fence_mark(scif_epd_t epd, int *mark) +{ + struct scif_endpt *ep = (struct scif_endpt *)epd; + struct dma_chan *chan = ep->rma_info.dma_chan; + struct dma_device *ddev = chan->device; + struct dma_async_tx_descriptor *tx; + dma_cookie_t cookie; + int err; + + tx = ddev->device_prep_dma_memcpy(chan, 0, 0, 0, DMA_PREP_FENCE); + if (!tx) { + err = -ENOMEM; + dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n", + __func__, __LINE__, err); + return err; + } + cookie = tx->tx_submit(tx); + if (dma_submit_error(cookie)) { + err = (int)cookie; + dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n", + __func__, __LINE__, err); + return err; + } + dma_async_issue_pending(chan); + tx = ddev->device_prep_dma_interrupt(chan, DMA_PREP_INTERRUPT); + if (!tx) { + err = -ENOMEM; + dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n", + __func__, __LINE__, err); + return err; + } + tx->callback = scif_fence_mark_cb; + tx->callback_param = ep; + *mark = cookie = tx->tx_submit(tx); + if (dma_submit_error(cookie)) { + err = (int)cookie; + dev_err(&ep->remote_dev->sdev->dev, "%s %d err %d\n", + __func__, __LINE__, err); + return err; + } + atomic_inc(&ep->rma_info.fence_refcount); + dma_async_issue_pending(chan); + return 0; +} + +#define SCIF_LOOPB_MAGIC_MARK 0xdead + +int scif_fence_mark(scif_epd_t epd, int flags, int *mark) +{ + struct scif_endpt *ep = (struct scif_endpt *)epd; + int err = 0; + + dev_dbg(scif_info.mdev.this_device, + "SCIFAPI fence_mark: ep %p flags 0x%x mark 0x%x\n", + ep, flags, *mark); + err = scif_verify_epd(ep); + if (err) + return err; + + /* Invalid flags? */ + if (flags & ~(SCIF_FENCE_INIT_SELF | SCIF_FENCE_INIT_PEER)) + return -EINVAL; + + /* At least one of init self or peer RMA should be set */ + if (!(flags & (SCIF_FENCE_INIT_SELF | SCIF_FENCE_INIT_PEER))) + return -EINVAL; + + /* Exactly one of init self or peer RMA should be set but not both */ + if ((flags & SCIF_FENCE_INIT_SELF) && (flags & SCIF_FENCE_INIT_PEER)) + return -EINVAL; + + /* + * Management node loopback does not need to use DMA. + * Return a valid mark to be symmetric. + */ + if (scifdev_self(ep->remote_dev) && scif_is_mgmt_node()) { + *mark = SCIF_LOOPB_MAGIC_MARK; + return 0; + } + + if (flags & SCIF_FENCE_INIT_SELF) + err = _scif_fence_mark(epd, mark); + else + err = scif_send_fence_mark(ep, mark); + + if (err) + dev_err(scif_info.mdev.this_device, + "%s %d err %d\n", __func__, __LINE__, err); + dev_dbg(scif_info.mdev.this_device, + "SCIFAPI fence_mark: ep %p flags 0x%x mark 0x%x err %d\n", + ep, flags, *mark, err); + return err; +} +EXPORT_SYMBOL_GPL(scif_fence_mark); + +int scif_fence_wait(scif_epd_t epd, int mark) +{ + struct scif_endpt *ep = (struct scif_endpt *)epd; + int err = 0; + + dev_dbg(scif_info.mdev.this_device, + "SCIFAPI fence_wait: ep %p mark 0x%x\n", + ep, mark); + err = scif_verify_epd(ep); + if (err) + return err; + /* + * Management node loopback does not need to use DMA. + * The only valid mark provided is 0 so simply + * return success if the mark is valid. + */ + if (scifdev_self(ep->remote_dev) && scif_is_mgmt_node()) { + if (mark == SCIF_LOOPB_MAGIC_MARK) + return 0; + else + return -EINVAL; + } + if (mark & SCIF_REMOTE_FENCE) + err = scif_send_fence_wait(epd, mark); + else + err = _scif_fence_wait(epd, mark); + if (err < 0) + dev_err(scif_info.mdev.this_device, + "%s %d err %d\n", __func__, __LINE__, err); + return err; +} +EXPORT_SYMBOL_GPL(scif_fence_wait); + +int scif_fence_signal(scif_epd_t epd, off_t loff, u64 lval, + off_t roff, u64 rval, int flags) +{ + struct scif_endpt *ep = (struct scif_endpt *)epd; + int err = 0; + + dev_dbg(scif_info.mdev.this_device, + "SCIFAPI fence_signal: ep %p loff 0x%lx lval 0x%llx roff 0x%lx rval 0x%llx flags 0x%x\n", + ep, loff, lval, roff, rval, flags); + err = scif_verify_epd(ep); + if (err) + return err; + + /* Invalid flags? */ + if (flags & ~(SCIF_FENCE_INIT_SELF | SCIF_FENCE_INIT_PEER | + SCIF_SIGNAL_LOCAL | SCIF_SIGNAL_REMOTE)) + return -EINVAL; + + /* At least one of init self or peer RMA should be set */ + if (!(flags & (SCIF_FENCE_INIT_SELF | SCIF_FENCE_INIT_PEER))) + return -EINVAL; + + /* Exactly one of init self or peer RMA should be set but not both */ + if ((flags & SCIF_FENCE_INIT_SELF) && (flags & SCIF_FENCE_INIT_PEER)) + return -EINVAL; + + /* At least one of SCIF_SIGNAL_LOCAL or SCIF_SIGNAL_REMOTE required */ + if (!(flags & (SCIF_SIGNAL_LOCAL | SCIF_SIGNAL_REMOTE))) + return -EINVAL; + + /* Only Dword offsets allowed */ + if ((flags & SCIF_SIGNAL_LOCAL) && (loff & (sizeof(u32) - 1))) + return -EINVAL; + + /* Only Dword aligned offsets allowed */ + if ((flags & SCIF_SIGNAL_REMOTE) && (roff & (sizeof(u32) - 1))) + return -EINVAL; + + if (flags & SCIF_FENCE_INIT_PEER) { + err = scif_send_fence_signal(epd, roff, rval, loff, + lval, flags); + } else { + /* Local Signal in Local RAS */ + if (flags & SCIF_SIGNAL_LOCAL) { + err = scif_prog_signal(epd, loff, lval, + SCIF_WINDOW_SELF); + if (err) + goto error_ret; + } + + /* Signal in Remote RAS */ + if (flags & SCIF_SIGNAL_REMOTE) + err = scif_prog_signal(epd, roff, + rval, SCIF_WINDOW_PEER); + } +error_ret: + if (err) + dev_err(scif_info.mdev.this_device, + "%s %d err %d\n", __func__, __LINE__, err); + return err; +} +EXPORT_SYMBOL_GPL(scif_fence_signal); diff --git a/drivers/misc/mic/scif/scif_main.c b/drivers/misc/mic/scif/scif_main.c index 6ce851f5c7e6..36d847af1209 100644 --- a/drivers/misc/mic/scif/scif_main.c +++ b/drivers/misc/mic/scif/scif_main.c @@ -34,6 +34,7 @@ struct scif_info scif_info = { }; struct scif_dev *scif_dev; +struct kmem_cache *unaligned_cache; static atomic_t g_loopb_cnt; /* Runs in the context of intr_wq */ @@ -80,35 +81,6 @@ irqreturn_t scif_intr_handler(int irq, void *data) return IRQ_HANDLED; } -static int scif_peer_probe(struct scif_peer_dev *spdev) -{ - struct scif_dev *scifdev = &scif_dev[spdev->dnode]; - - mutex_lock(&scif_info.conflock); - scif_info.total++; - scif_info.maxid = max_t(u32, spdev->dnode, scif_info.maxid); - mutex_unlock(&scif_info.conflock); - rcu_assign_pointer(scifdev->spdev, spdev); - - /* In the future SCIF kernel client devices will be added here */ - return 0; -} - -static void scif_peer_remove(struct scif_peer_dev *spdev) -{ - struct scif_dev *scifdev = &scif_dev[spdev->dnode]; - - /* In the future SCIF kernel client devices will be removed here */ - spdev = rcu_dereference(scifdev->spdev); - if (spdev) - RCU_INIT_POINTER(scifdev->spdev, NULL); - synchronize_rcu(); - - mutex_lock(&scif_info.conflock); - scif_info.total--; - mutex_unlock(&scif_info.conflock); -} - static void scif_qp_setup_handler(struct work_struct *work) { struct scif_dev *scifdev = container_of(work, struct scif_dev, @@ -139,20 +111,13 @@ static void scif_qp_setup_handler(struct work_struct *work) } } -static int scif_setup_scifdev(struct scif_hw_dev *sdev) +static int scif_setup_scifdev(void) { + /* We support a maximum of 129 SCIF nodes including the mgmt node */ +#define MAX_SCIF_NODES 129 int i; - u8 num_nodes; - - if (sdev->snode) { - struct mic_bootparam __iomem *bp = sdev->rdp; - - num_nodes = ioread8(&bp->tot_nodes); - } else { - struct mic_bootparam *bp = sdev->dp; + u8 num_nodes = MAX_SCIF_NODES; - num_nodes = bp->tot_nodes; - } scif_dev = kcalloc(num_nodes, sizeof(*scif_dev), GFP_KERNEL); if (!scif_dev) return -ENOMEM; @@ -163,7 +128,7 @@ static int scif_setup_scifdev(struct scif_hw_dev *sdev) scifdev->exit = OP_IDLE; init_waitqueue_head(&scifdev->disconn_wq); mutex_init(&scifdev->lock); - INIT_WORK(&scifdev->init_msg_work, scif_qp_response_ack); + INIT_WORK(&scifdev->peer_add_work, scif_add_peer_device); INIT_DELAYED_WORK(&scifdev->p2p_dwork, scif_poll_qp_state); INIT_DELAYED_WORK(&scifdev->qp_dwork, @@ -181,27 +146,21 @@ static void scif_destroy_scifdev(void) static int scif_probe(struct scif_hw_dev *sdev) { - struct scif_dev *scifdev; + struct scif_dev *scifdev = &scif_dev[sdev->dnode]; int rc; dev_set_drvdata(&sdev->dev, sdev); + scifdev->sdev = sdev; + if (1 == atomic_add_return(1, &g_loopb_cnt)) { - struct scif_dev *loopb_dev; + struct scif_dev *loopb_dev = &scif_dev[sdev->snode]; - rc = scif_setup_scifdev(sdev); - if (rc) - goto exit; - scifdev = &scif_dev[sdev->dnode]; - scifdev->sdev = sdev; - loopb_dev = &scif_dev[sdev->snode]; loopb_dev->sdev = sdev; rc = scif_setup_loopback_qp(loopb_dev); if (rc) - goto free_sdev; - } else { - scifdev = &scif_dev[sdev->dnode]; - scifdev->sdev = sdev; + goto exit; } + rc = scif_setup_intr_wq(scifdev); if (rc) goto destroy_loopb; @@ -237,8 +196,6 @@ destroy_intr: destroy_loopb: if (atomic_dec_and_test(&g_loopb_cnt)) scif_destroy_loopback_qp(&scif_dev[sdev->snode]); -free_sdev: - scif_destroy_scifdev(); exit: return rc; } @@ -290,13 +247,6 @@ static void scif_remove(struct scif_hw_dev *sdev) scifdev->sdev = NULL; } -static struct scif_peer_driver scif_peer_driver = { - .driver.name = KBUILD_MODNAME, - .driver.owner = THIS_MODULE, - .probe = scif_peer_probe, - .remove = scif_peer_remove, -}; - static struct scif_hw_dev_id id_table[] = { { MIC_SCIF_DEV, SCIF_DEV_ANY_ID }, { 0 }, @@ -312,29 +262,54 @@ static struct scif_driver scif_driver = { static int _scif_init(void) { - spin_lock_init(&scif_info.eplock); + int rc; + + mutex_init(&scif_info.eplock); + spin_lock_init(&scif_info.rmalock); spin_lock_init(&scif_info.nb_connect_lock); spin_lock_init(&scif_info.port_lock); mutex_init(&scif_info.conflock); mutex_init(&scif_info.connlock); + mutex_init(&scif_info.fencelock); INIT_LIST_HEAD(&scif_info.uaccept); INIT_LIST_HEAD(&scif_info.listen); INIT_LIST_HEAD(&scif_info.zombie); INIT_LIST_HEAD(&scif_info.connected); INIT_LIST_HEAD(&scif_info.disconnected); + INIT_LIST_HEAD(&scif_info.rma); + INIT_LIST_HEAD(&scif_info.rma_tc); + INIT_LIST_HEAD(&scif_info.mmu_notif_cleanup); + INIT_LIST_HEAD(&scif_info.fence); INIT_LIST_HEAD(&scif_info.nb_connect_list); init_waitqueue_head(&scif_info.exitwq); + scif_info.rma_tc_limit = SCIF_RMA_TEMP_CACHE_LIMIT; scif_info.en_msg_log = 0; scif_info.p2p_enable = 1; + rc = scif_setup_scifdev(); + if (rc) + goto error; + unaligned_cache = kmem_cache_create("Unaligned_DMA", + SCIF_KMEM_UNALIGNED_BUF_SIZE, + 0, SLAB_HWCACHE_ALIGN, NULL); + if (!unaligned_cache) { + rc = -ENOMEM; + goto free_sdev; + } INIT_WORK(&scif_info.misc_work, scif_misc_handler); + INIT_WORK(&scif_info.mmu_notif_work, scif_mmu_notif_handler); INIT_WORK(&scif_info.conn_work, scif_conn_handler); idr_init(&scif_ports); return 0; +free_sdev: + scif_destroy_scifdev(); +error: + return rc; } static void _scif_exit(void) { idr_destroy(&scif_ports); + kmem_cache_destroy(unaligned_cache); scif_destroy_scifdev(); } @@ -344,15 +319,13 @@ static int __init scif_init(void) int rc; _scif_init(); + iova_cache_get(); rc = scif_peer_bus_init(); if (rc) goto exit; - rc = scif_peer_register_driver(&scif_peer_driver); - if (rc) - goto peer_bus_exit; rc = scif_register_driver(&scif_driver); if (rc) - goto unreg_scif_peer; + goto peer_bus_exit; rc = misc_register(mdev); if (rc) goto unreg_scif; @@ -360,8 +333,6 @@ static int __init scif_init(void) return 0; unreg_scif: scif_unregister_driver(&scif_driver); -unreg_scif_peer: - scif_peer_unregister_driver(&scif_peer_driver); peer_bus_exit: scif_peer_bus_exit(); exit: @@ -374,8 +345,8 @@ static void __exit scif_exit(void) scif_exit_debugfs(); misc_deregister(&scif_info.mdev); scif_unregister_driver(&scif_driver); - scif_peer_unregister_driver(&scif_peer_driver); scif_peer_bus_exit(); + iova_cache_put(); _scif_exit(); } diff --git a/drivers/misc/mic/scif/scif_main.h b/drivers/misc/mic/scif/scif_main.h index 580bc63e1b23..a08f0b600a9e 100644 --- a/drivers/misc/mic/scif/scif_main.h +++ b/drivers/misc/mic/scif/scif_main.h @@ -22,15 +22,18 @@ #include <linux/pci.h> #include <linux/miscdevice.h> #include <linux/dmaengine.h> +#include <linux/iova.h> +#include <linux/anon_inodes.h> #include <linux/file.h> +#include <linux/vmalloc.h> #include <linux/scif.h> - #include "../common/mic_dev.h" #define SCIF_MGMT_NODE 0 #define SCIF_DEFAULT_WATCHDOG_TO 30 #define SCIF_NODE_ACCEPT_TIMEOUT (3 * HZ) #define SCIF_NODE_ALIVE_TIMEOUT (SCIF_DEFAULT_WATCHDOG_TO * HZ) +#define SCIF_RMA_TEMP_CACHE_LIMIT 0x20000 /* * Generic state used for certain node QP message exchanges @@ -73,13 +76,21 @@ enum scif_msg_state { * @loopb_work: Used for submitting work to loopb_wq * @loopb_recv_q: List of messages received on the loopb_wq * @card_initiated_exit: set when the card has initiated the exit + * @rmalock: Synchronize access to RMA operations + * @fencelock: Synchronize access to list of remote fences requested. + * @rma: List of temporary registered windows to be destroyed. + * @rma_tc: List of temporary registered & cached Windows to be destroyed + * @fence: List of remote fence requests + * @mmu_notif_work: Work for registration caching MMU notifier workqueue + * @mmu_notif_cleanup: List of temporary cached windows for reg cache + * @rma_tc_limit: RMA temporary cache limit */ struct scif_info { u8 nodeid; u8 maxid; u8 total; u32 nr_zombies; - spinlock_t eplock; + struct mutex eplock; struct mutex connlock; spinlock_t nb_connect_lock; spinlock_t port_lock; @@ -102,6 +113,14 @@ struct scif_info { struct work_struct loopb_work; struct list_head loopb_recv_q; bool card_initiated_exit; + spinlock_t rmalock; + struct mutex fencelock; + struct list_head rma; + struct list_head rma_tc; + struct list_head fence; + struct work_struct mmu_notif_work; + struct list_head mmu_notif_cleanup; + unsigned long rma_tc_limit; }; /* @@ -139,7 +158,7 @@ struct scif_p2p_info { * @db: doorbell the peer will trigger to generate an interrupt on self * @rdb: Doorbell to trigger on the peer to generate an interrupt on the peer * @cookie: Cookie received while registering the interrupt handler - * init_msg_work: work scheduled for SCIF_INIT message processing + * @peer_add_work: Work for handling device_add for peer devices * @p2p_dwork: Delayed work to enable polling for P2P state * @qp_dwork: Delayed work for enabling polling for remote QP information * @p2p_retry: Number of times to retry polling of P2P state @@ -152,6 +171,8 @@ struct scif_p2p_info { * @disconn_rescnt: Keeps track of number of node remove requests sent * @exit: Status of exit message * @qp_dma_addr: Queue pair DMA address passed to the peer + * @dma_ch_idx: Round robin index for DMA channels + * @signal_pool: DMA pool used for scheduling scif_fence_signal DMA's */ struct scif_dev { u8 node; @@ -165,7 +186,7 @@ struct scif_dev { int db; int rdb; struct mic_irq *cookie; - struct work_struct init_msg_work; + struct work_struct peer_add_work; struct delayed_work p2p_dwork; struct delayed_work qp_dwork; int p2p_retry; @@ -178,17 +199,25 @@ struct scif_dev { atomic_t disconn_rescnt; enum scif_msg_state exit; dma_addr_t qp_dma_addr; + int dma_ch_idx; + struct dma_pool *signal_pool; }; +extern bool scif_reg_cache_enable; +extern bool scif_ulimit_check; extern struct scif_info scif_info; extern struct idr scif_ports; +extern struct bus_type scif_peer_bus; extern struct scif_dev *scif_dev; extern const struct file_operations scif_fops; +extern const struct file_operations scif_anon_fops; /* Size of the RB for the Node QP */ #define SCIF_NODE_QP_SIZE 0x10000 #include "scif_nodeqp.h" +#include "scif_rma.h" +#include "scif_rma_list.h" /* * scifdev_self: diff --git a/drivers/misc/mic/scif/scif_map.h b/drivers/misc/mic/scif/scif_map.h index 20e50b4e19b2..3e86360ba5a6 100644 --- a/drivers/misc/mic/scif/scif_map.h +++ b/drivers/misc/mic/scif/scif_map.h @@ -80,7 +80,7 @@ scif_unmap_single(dma_addr_t local, struct scif_dev *scifdev, size_t size) { if (!scifdev_self(scifdev)) { - if (scifdev_is_p2p(scifdev) && local > scifdev->base_addr) + if (scifdev_is_p2p(scifdev)) local = local - scifdev->base_addr; dma_unmap_single(&scifdev->sdev->dev, local, size, DMA_BIDIRECTIONAL); @@ -110,4 +110,27 @@ scif_iounmap(void *virt, size_t len, struct scif_dev *scifdev) sdev->hw_ops->iounmap(sdev, (void __force __iomem *)virt); } } + +static __always_inline int +scif_map_page(dma_addr_t *dma_handle, struct page *page, + struct scif_dev *scifdev) +{ + int err = 0; + + if (scifdev_self(scifdev)) { + *dma_handle = page_to_phys(page); + } else { + struct scif_hw_dev *sdev = scifdev->sdev; + *dma_handle = dma_map_page(&sdev->dev, + page, 0x0, PAGE_SIZE, + DMA_BIDIRECTIONAL); + if (dma_mapping_error(&sdev->dev, *dma_handle)) + err = -ENOMEM; + else if (scifdev_is_p2p(scifdev)) + *dma_handle = *dma_handle + scifdev->base_addr; + } + if (err) + *dma_handle = 0; + return err; +} #endif /* SCIF_MAP_H */ diff --git a/drivers/misc/mic/scif/scif_mmap.c b/drivers/misc/mic/scif/scif_mmap.c new file mode 100644 index 000000000000..49cb8f7b4672 --- /dev/null +++ b/drivers/misc/mic/scif/scif_mmap.c @@ -0,0 +1,699 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Intel SCIF driver. + * + */ +#include "scif_main.h" + +/* + * struct scif_vma_info - Information about a remote memory mapping + * created via scif_mmap(..) + * @vma: VM area struct + * @list: link to list of active vmas + */ +struct scif_vma_info { + struct vm_area_struct *vma; + struct list_head list; +}; + +void scif_recv_munmap(struct scif_dev *scifdev, struct scifmsg *msg) +{ + struct scif_rma_req req; + struct scif_window *window = NULL; + struct scif_window *recv_window = + (struct scif_window *)msg->payload[0]; + struct scif_endpt *ep; + + ep = (struct scif_endpt *)recv_window->ep; + req.out_window = &window; + req.offset = recv_window->offset; + req.prot = recv_window->prot; + req.nr_bytes = recv_window->nr_pages << PAGE_SHIFT; + req.type = SCIF_WINDOW_FULL; + req.head = &ep->rma_info.reg_list; + msg->payload[0] = ep->remote_ep; + + mutex_lock(&ep->rma_info.rma_lock); + /* Does a valid window exist? */ + if (scif_query_window(&req)) { + dev_err(&scifdev->sdev->dev, + "%s %d -ENXIO\n", __func__, __LINE__); + msg->uop = SCIF_UNREGISTER_ACK; + goto error; + } + + scif_put_window(window, window->nr_pages); + + if (!window->ref_count) { + atomic_inc(&ep->rma_info.tw_refcount); + ep->rma_info.async_list_del = 1; + list_del_init(&window->list); + scif_free_window_offset(ep, window, window->offset); + } +error: + mutex_unlock(&ep->rma_info.rma_lock); + if (window && !window->ref_count) + scif_queue_for_cleanup(window, &scif_info.rma); +} + +/* + * Remove valid remote memory mappings created via scif_mmap(..) from the + * process address space since the remote node is lost + */ +static void __scif_zap_mmaps(struct scif_endpt *ep) +{ + struct list_head *item; + struct scif_vma_info *info; + struct vm_area_struct *vma; + unsigned long size; + + spin_lock(&ep->lock); + list_for_each(item, &ep->rma_info.vma_list) { + info = list_entry(item, struct scif_vma_info, list); + vma = info->vma; + size = vma->vm_end - vma->vm_start; + zap_vma_ptes(vma, vma->vm_start, size); + dev_dbg(scif_info.mdev.this_device, + "%s ep %p zap vma %p size 0x%lx\n", + __func__, ep, info->vma, size); + } + spin_unlock(&ep->lock); +} + +/* + * Traverse the list of endpoints for a particular remote node and + * zap valid remote memory mappings since the remote node is lost + */ +static void _scif_zap_mmaps(int node, struct list_head *head) +{ + struct scif_endpt *ep; + struct list_head *item; + + mutex_lock(&scif_info.connlock); + list_for_each(item, head) { + ep = list_entry(item, struct scif_endpt, list); + if (ep->remote_dev->node == node) + __scif_zap_mmaps(ep); + } + mutex_unlock(&scif_info.connlock); +} + +/* + * Wrapper for removing remote memory mappings for a particular node. This API + * is called by peer nodes as part of handling a lost node. + */ +void scif_zap_mmaps(int node) +{ + _scif_zap_mmaps(node, &scif_info.connected); + _scif_zap_mmaps(node, &scif_info.disconnected); +} + +/* + * This API is only called while handling a lost node: + * a) Remote node is dead. + * b) Remote memory mappings have been zapped + * So we can traverse the remote_reg_list without any locks. Since + * the window has not yet been unregistered we can drop the ref count + * and queue it to the cleanup thread. + */ +static void __scif_cleanup_rma_for_zombies(struct scif_endpt *ep) +{ + struct list_head *pos, *tmp; + struct scif_window *window; + + list_for_each_safe(pos, tmp, &ep->rma_info.remote_reg_list) { + window = list_entry(pos, struct scif_window, list); + if (window->ref_count) + scif_put_window(window, window->nr_pages); + else + dev_err(scif_info.mdev.this_device, + "%s %d unexpected\n", + __func__, __LINE__); + if (!window->ref_count) { + atomic_inc(&ep->rma_info.tw_refcount); + list_del_init(&window->list); + scif_queue_for_cleanup(window, &scif_info.rma); + } + } +} + +/* Cleanup remote registration lists for zombie endpoints */ +void scif_cleanup_rma_for_zombies(int node) +{ + struct scif_endpt *ep; + struct list_head *item; + + mutex_lock(&scif_info.eplock); + list_for_each(item, &scif_info.zombie) { + ep = list_entry(item, struct scif_endpt, list); + if (ep->remote_dev && ep->remote_dev->node == node) + __scif_cleanup_rma_for_zombies(ep); + } + mutex_unlock(&scif_info.eplock); + flush_work(&scif_info.misc_work); +} + +/* Insert the VMA into the per endpoint VMA list */ +static int scif_insert_vma(struct scif_endpt *ep, struct vm_area_struct *vma) +{ + struct scif_vma_info *info; + int err = 0; + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) { + err = -ENOMEM; + goto done; + } + info->vma = vma; + spin_lock(&ep->lock); + list_add_tail(&info->list, &ep->rma_info.vma_list); + spin_unlock(&ep->lock); +done: + return err; +} + +/* Delete the VMA from the per endpoint VMA list */ +static void scif_delete_vma(struct scif_endpt *ep, struct vm_area_struct *vma) +{ + struct list_head *item; + struct scif_vma_info *info; + + spin_lock(&ep->lock); + list_for_each(item, &ep->rma_info.vma_list) { + info = list_entry(item, struct scif_vma_info, list); + if (info->vma == vma) { + list_del(&info->list); + kfree(info); + break; + } + } + spin_unlock(&ep->lock); +} + +static phys_addr_t scif_get_phys(phys_addr_t phys, struct scif_endpt *ep) +{ + struct scif_dev *scifdev = (struct scif_dev *)ep->remote_dev; + struct scif_hw_dev *sdev = scifdev->sdev; + phys_addr_t out_phys, apt_base = 0; + + /* + * If the DMA address is card relative then we need to add the + * aperture base for mmap to work correctly + */ + if (!scifdev_self(scifdev) && sdev->aper && sdev->card_rel_da) + apt_base = sdev->aper->pa; + out_phys = apt_base + phys; + return out_phys; +} + +int scif_get_pages(scif_epd_t epd, off_t offset, size_t len, + struct scif_range **pages) +{ + struct scif_endpt *ep = (struct scif_endpt *)epd; + struct scif_rma_req req; + struct scif_window *window = NULL; + int nr_pages, err, i; + + dev_dbg(scif_info.mdev.this_device, + "SCIFAPI get_pinned_pages: ep %p offset 0x%lx len 0x%lx\n", + ep, offset, len); + err = scif_verify_epd(ep); + if (err) + return err; + + if (!len || (offset < 0) || + (offset + len < offset) || + (ALIGN(offset, PAGE_SIZE) != offset) || + (ALIGN(len, PAGE_SIZE) != len)) + return -EINVAL; + + nr_pages = len >> PAGE_SHIFT; + + req.out_window = &window; + req.offset = offset; + req.prot = 0; + req.nr_bytes = len; + req.type = SCIF_WINDOW_SINGLE; + req.head = &ep->rma_info.remote_reg_list; + + mutex_lock(&ep->rma_info.rma_lock); + /* Does a valid window exist? */ + err = scif_query_window(&req); + if (err) { + dev_err(&ep->remote_dev->sdev->dev, + "%s %d err %d\n", __func__, __LINE__, err); + goto error; + } + + /* Allocate scif_range */ + *pages = kzalloc(sizeof(**pages), GFP_KERNEL); + if (!*pages) { + err = -ENOMEM; + goto error; + } + + /* Allocate phys addr array */ + (*pages)->phys_addr = scif_zalloc(nr_pages * sizeof(dma_addr_t)); + if (!((*pages)->phys_addr)) { + err = -ENOMEM; + goto error; + } + + if (scif_is_mgmt_node() && !scifdev_self(ep->remote_dev)) { + /* Allocate virtual address array */ + ((*pages)->va = scif_zalloc(nr_pages * sizeof(void *))); + if (!(*pages)->va) { + err = -ENOMEM; + goto error; + } + } + /* Populate the values */ + (*pages)->cookie = window; + (*pages)->nr_pages = nr_pages; + (*pages)->prot_flags = window->prot; + + for (i = 0; i < nr_pages; i++) { + (*pages)->phys_addr[i] = + __scif_off_to_dma_addr(window, offset + + (i * PAGE_SIZE)); + (*pages)->phys_addr[i] = scif_get_phys((*pages)->phys_addr[i], + ep); + if (scif_is_mgmt_node() && !scifdev_self(ep->remote_dev)) + (*pages)->va[i] = + ep->remote_dev->sdev->aper->va + + (*pages)->phys_addr[i] - + ep->remote_dev->sdev->aper->pa; + } + + scif_get_window(window, nr_pages); +error: + mutex_unlock(&ep->rma_info.rma_lock); + if (err) { + if (*pages) { + scif_free((*pages)->phys_addr, + nr_pages * sizeof(dma_addr_t)); + scif_free((*pages)->va, + nr_pages * sizeof(void *)); + kfree(*pages); + *pages = NULL; + } + dev_err(&ep->remote_dev->sdev->dev, + "%s %d err %d\n", __func__, __LINE__, err); + } + return err; +} +EXPORT_SYMBOL_GPL(scif_get_pages); + +int scif_put_pages(struct scif_range *pages) +{ + struct scif_endpt *ep; + struct scif_window *window; + struct scifmsg msg; + + if (!pages || !pages->cookie) + return -EINVAL; + + window = pages->cookie; + + if (!window || window->magic != SCIFEP_MAGIC) + return -EINVAL; + + ep = (struct scif_endpt *)window->ep; + /* + * If the state is SCIFEP_CONNECTED or SCIFEP_DISCONNECTED then the + * callee should be allowed to release references to the pages, + * else the endpoint was not connected in the first place, + * hence the ENOTCONN. + */ + if (ep->state != SCIFEP_CONNECTED && ep->state != SCIFEP_DISCONNECTED) + return -ENOTCONN; + + mutex_lock(&ep->rma_info.rma_lock); + + scif_put_window(window, pages->nr_pages); + + /* Initiate window destruction if ref count is zero */ + if (!window->ref_count) { + list_del(&window->list); + mutex_unlock(&ep->rma_info.rma_lock); + scif_drain_dma_intr(ep->remote_dev->sdev, + ep->rma_info.dma_chan); + /* Inform the peer about this window being destroyed. */ + msg.uop = SCIF_MUNMAP; + msg.src = ep->port; + msg.payload[0] = window->peer_window; + /* No error handling for notification messages */ + scif_nodeqp_send(ep->remote_dev, &msg); + /* Destroy this window from the peer's registered AS */ + scif_destroy_remote_window(window); + } else { + mutex_unlock(&ep->rma_info.rma_lock); + } + + scif_free(pages->phys_addr, pages->nr_pages * sizeof(dma_addr_t)); + scif_free(pages->va, pages->nr_pages * sizeof(void *)); + kfree(pages); + return 0; +} +EXPORT_SYMBOL_GPL(scif_put_pages); + +/* + * scif_rma_list_mmap: + * + * Traverse the remote registration list starting from start_window: + * 1) Create VtoP mappings via remap_pfn_range(..) + * 2) Once step 1) and 2) complete successfully then traverse the range of + * windows again and bump the reference count. + * RMA lock must be held. + */ +static int scif_rma_list_mmap(struct scif_window *start_window, s64 offset, + int nr_pages, struct vm_area_struct *vma) +{ + s64 end_offset, loop_offset = offset; + struct scif_window *window = start_window; + int loop_nr_pages, nr_pages_left = nr_pages; + struct scif_endpt *ep = (struct scif_endpt *)start_window->ep; + struct list_head *head = &ep->rma_info.remote_reg_list; + int i, err = 0; + dma_addr_t phys_addr; + struct scif_window_iter src_win_iter; + size_t contig_bytes = 0; + + might_sleep(); + list_for_each_entry_from(window, head, list) { + end_offset = window->offset + + (window->nr_pages << PAGE_SHIFT); + loop_nr_pages = min_t(int, + (end_offset - loop_offset) >> PAGE_SHIFT, + nr_pages_left); + scif_init_window_iter(window, &src_win_iter); + for (i = 0; i < loop_nr_pages; i++) { + phys_addr = scif_off_to_dma_addr(window, loop_offset, + &contig_bytes, + &src_win_iter); + phys_addr = scif_get_phys(phys_addr, ep); + err = remap_pfn_range(vma, + vma->vm_start + + loop_offset - offset, + phys_addr >> PAGE_SHIFT, + PAGE_SIZE, + vma->vm_page_prot); + if (err) + goto error; + loop_offset += PAGE_SIZE; + } + nr_pages_left -= loop_nr_pages; + if (!nr_pages_left) + break; + } + /* + * No more failures expected. Bump up the ref count for all + * the windows. Another traversal from start_window required + * for handling errors encountered across windows during + * remap_pfn_range(..). + */ + loop_offset = offset; + nr_pages_left = nr_pages; + window = start_window; + head = &ep->rma_info.remote_reg_list; + list_for_each_entry_from(window, head, list) { + end_offset = window->offset + + (window->nr_pages << PAGE_SHIFT); + loop_nr_pages = min_t(int, + (end_offset - loop_offset) >> PAGE_SHIFT, + nr_pages_left); + scif_get_window(window, loop_nr_pages); + nr_pages_left -= loop_nr_pages; + loop_offset += (loop_nr_pages << PAGE_SHIFT); + if (!nr_pages_left) + break; + } +error: + if (err) + dev_err(scif_info.mdev.this_device, + "%s %d err %d\n", __func__, __LINE__, err); + return err; +} + +/* + * scif_rma_list_munmap: + * + * Traverse the remote registration list starting from window: + * 1) Decrement ref count. + * 2) If the ref count drops to zero then send a SCIF_MUNMAP message to peer. + * RMA lock must be held. + */ +static void scif_rma_list_munmap(struct scif_window *start_window, + s64 offset, int nr_pages) +{ + struct scifmsg msg; + s64 loop_offset = offset, end_offset; + int loop_nr_pages, nr_pages_left = nr_pages; + struct scif_endpt *ep = (struct scif_endpt *)start_window->ep; + struct list_head *head = &ep->rma_info.remote_reg_list; + struct scif_window *window = start_window, *_window; + + msg.uop = SCIF_MUNMAP; + msg.src = ep->port; + loop_offset = offset; + nr_pages_left = nr_pages; + list_for_each_entry_safe_from(window, _window, head, list) { + end_offset = window->offset + + (window->nr_pages << PAGE_SHIFT); + loop_nr_pages = min_t(int, + (end_offset - loop_offset) >> PAGE_SHIFT, + nr_pages_left); + scif_put_window(window, loop_nr_pages); + if (!window->ref_count) { + struct scif_dev *rdev = ep->remote_dev; + + scif_drain_dma_intr(rdev->sdev, + ep->rma_info.dma_chan); + /* Inform the peer about this munmap */ + msg.payload[0] = window->peer_window; + /* No error handling for Notification messages. */ + scif_nodeqp_send(ep->remote_dev, &msg); + list_del(&window->list); + /* Destroy this window from the peer's registered AS */ + scif_destroy_remote_window(window); + } + nr_pages_left -= loop_nr_pages; + loop_offset += (loop_nr_pages << PAGE_SHIFT); + if (!nr_pages_left) + break; + } +} + +/* + * The private data field of each VMA used to mmap a remote window + * points to an instance of struct vma_pvt + */ +struct vma_pvt { + struct scif_endpt *ep; /* End point for remote window */ + s64 offset; /* offset within remote window */ + bool valid_offset; /* offset is valid only if the original + * mmap request was for a single page + * else the offset within the vma is + * the correct offset + */ + struct kref ref; +}; + +static void vma_pvt_release(struct kref *ref) +{ + struct vma_pvt *vmapvt = container_of(ref, struct vma_pvt, ref); + + kfree(vmapvt); +} + +/** + * scif_vma_open - VMA open driver callback + * @vma: VMM memory area. + * The open method is called by the kernel to allow the subsystem implementing + * the VMA to initialize the area. This method is invoked any time a new + * reference to the VMA is made (when a process forks, for example). + * The one exception happens when the VMA is first created by mmap; + * in this case, the driver's mmap method is called instead. + * This function is also invoked when an existing VMA is split by the kernel + * due to a call to munmap on a subset of the VMA resulting in two VMAs. + * The kernel invokes this function only on one of the two VMAs. + */ +static void scif_vma_open(struct vm_area_struct *vma) +{ + struct vma_pvt *vmapvt = vma->vm_private_data; + + dev_dbg(scif_info.mdev.this_device, + "SCIFAPI vma open: vma_start 0x%lx vma_end 0x%lx\n", + vma->vm_start, vma->vm_end); + scif_insert_vma(vmapvt->ep, vma); + kref_get(&vmapvt->ref); +} + +/** + * scif_munmap - VMA close driver callback. + * @vma: VMM memory area. + * When an area is destroyed, the kernel calls its close operation. + * Note that there's no usage count associated with VMA's; the area + * is opened and closed exactly once by each process that uses it. + */ +static void scif_munmap(struct vm_area_struct *vma) +{ + struct scif_endpt *ep; + struct vma_pvt *vmapvt = vma->vm_private_data; + int nr_pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + s64 offset; + struct scif_rma_req req; + struct scif_window *window = NULL; + int err; + + might_sleep(); + dev_dbg(scif_info.mdev.this_device, + "SCIFAPI munmap: vma_start 0x%lx vma_end 0x%lx\n", + vma->vm_start, vma->vm_end); + ep = vmapvt->ep; + offset = vmapvt->valid_offset ? vmapvt->offset : + (vma->vm_pgoff) << PAGE_SHIFT; + dev_dbg(scif_info.mdev.this_device, + "SCIFAPI munmap: ep %p nr_pages 0x%x offset 0x%llx\n", + ep, nr_pages, offset); + req.out_window = &window; + req.offset = offset; + req.nr_bytes = vma->vm_end - vma->vm_start; + req.prot = vma->vm_flags & (VM_READ | VM_WRITE); + req.type = SCIF_WINDOW_PARTIAL; + req.head = &ep->rma_info.remote_reg_list; + + mutex_lock(&ep->rma_info.rma_lock); + + err = scif_query_window(&req); + if (err) + dev_err(scif_info.mdev.this_device, + "%s %d err %d\n", __func__, __LINE__, err); + else + scif_rma_list_munmap(window, offset, nr_pages); + + mutex_unlock(&ep->rma_info.rma_lock); + /* + * The kernel probably zeroes these out but we still want + * to clean up our own mess just in case. + */ + vma->vm_ops = NULL; + vma->vm_private_data = NULL; + kref_put(&vmapvt->ref, vma_pvt_release); + scif_delete_vma(ep, vma); +} + +static const struct vm_operations_struct scif_vm_ops = { + .open = scif_vma_open, + .close = scif_munmap, +}; + +/** + * scif_mmap - Map pages in virtual address space to a remote window. + * @vma: VMM memory area. + * @epd: endpoint descriptor + * + * Return: Upon successful completion, scif_mmap() returns zero + * else an apt error is returned as documented in scif.h + */ +int scif_mmap(struct vm_area_struct *vma, scif_epd_t epd) +{ + struct scif_rma_req req; + struct scif_window *window = NULL; + struct scif_endpt *ep = (struct scif_endpt *)epd; + s64 start_offset = vma->vm_pgoff << PAGE_SHIFT; + int nr_pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + int err; + struct vma_pvt *vmapvt; + + dev_dbg(scif_info.mdev.this_device, + "SCIFAPI mmap: ep %p start_offset 0x%llx nr_pages 0x%x\n", + ep, start_offset, nr_pages); + err = scif_verify_epd(ep); + if (err) + return err; + + might_sleep(); + + err = scif_insert_vma(ep, vma); + if (err) + return err; + + vmapvt = kzalloc(sizeof(*vmapvt), GFP_KERNEL); + if (!vmapvt) { + scif_delete_vma(ep, vma); + return -ENOMEM; + } + + vmapvt->ep = ep; + kref_init(&vmapvt->ref); + + req.out_window = &window; + req.offset = start_offset; + req.nr_bytes = vma->vm_end - vma->vm_start; + req.prot = vma->vm_flags & (VM_READ | VM_WRITE); + req.type = SCIF_WINDOW_PARTIAL; + req.head = &ep->rma_info.remote_reg_list; + + mutex_lock(&ep->rma_info.rma_lock); + /* Does a valid window exist? */ + err = scif_query_window(&req); + if (err) { + dev_err(&ep->remote_dev->sdev->dev, + "%s %d err %d\n", __func__, __LINE__, err); + goto error_unlock; + } + + /* Default prot for loopback */ + if (!scifdev_self(ep->remote_dev)) + vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); + + /* + * VM_DONTCOPY - Do not copy this vma on fork + * VM_DONTEXPAND - Cannot expand with mremap() + * VM_RESERVED - Count as reserved_vm like IO + * VM_PFNMAP - Page-ranges managed without "struct page" + * VM_IO - Memory mapped I/O or similar + * + * We do not want to copy this VMA automatically on a fork(), + * expand this VMA due to mremap() or swap out these pages since + * the VMA is actually backed by physical pages in the remote + * node's physical memory and not via a struct page. + */ + vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP; + + if (!scifdev_self(ep->remote_dev)) + vma->vm_flags |= VM_IO | VM_PFNMAP; + + /* Map this range of windows */ + err = scif_rma_list_mmap(window, start_offset, nr_pages, vma); + if (err) { + dev_err(&ep->remote_dev->sdev->dev, + "%s %d err %d\n", __func__, __LINE__, err); + goto error_unlock; + } + /* Set up the driver call back */ + vma->vm_ops = &scif_vm_ops; + vma->vm_private_data = vmapvt; +error_unlock: + mutex_unlock(&ep->rma_info.rma_lock); + if (err) { + kfree(vmapvt); + dev_err(&ep->remote_dev->sdev->dev, + "%s %d err %d\n", __func__, __LINE__, err); + scif_delete_vma(ep, vma); + } + return err; +} diff --git a/drivers/misc/mic/scif/scif_nm.c b/drivers/misc/mic/scif/scif_nm.c index 9b4c5382d6a7..79f26a02a1cb 100644 --- a/drivers/misc/mic/scif/scif_nm.c +++ b/drivers/misc/mic/scif/scif_nm.c @@ -34,6 +34,7 @@ static void scif_invalidate_ep(int node) list_for_each_safe(pos, tmpq, &scif_info.disconnected) { ep = list_entry(pos, struct scif_endpt, list); if (ep->remote_dev->node == node) { + scif_unmap_all_windows(ep); spin_lock(&ep->lock); scif_cleanup_ep_qp(ep); spin_unlock(&ep->lock); @@ -50,6 +51,7 @@ static void scif_invalidate_ep(int node) wake_up_interruptible(&ep->sendwq); wake_up_interruptible(&ep->recvwq); spin_unlock(&ep->lock); + scif_unmap_all_windows(ep); } } mutex_unlock(&scif_info.connlock); @@ -61,8 +63,8 @@ void scif_free_qp(struct scif_dev *scifdev) if (!qp) return; - scif_free_coherent((void *)qp->inbound_q.rb_base, - qp->local_buf, scifdev, qp->inbound_q.size); + scif_unmap_single(qp->local_buf, scifdev, qp->inbound_q.size); + kfree(qp->inbound_q.rb_base); scif_unmap_single(qp->local_qp, scifdev, sizeof(struct scif_qp)); kfree(scifdev->qpairs); scifdev->qpairs = NULL; @@ -125,8 +127,12 @@ void scif_cleanup_scifdev(struct scif_dev *dev) } scif_destroy_intr_wq(dev); } + flush_work(&scif_info.misc_work); scif_destroy_p2p(dev); scif_invalidate_ep(dev->node); + scif_zap_mmaps(dev->node); + scif_cleanup_rma_for_zombies(dev->node); + flush_work(&scif_info.misc_work); scif_send_acks(dev); if (!dev->node && scif_info.card_initiated_exit) { /* @@ -147,14 +153,8 @@ void scif_cleanup_scifdev(struct scif_dev *dev) void scif_handle_remove_node(int node) { struct scif_dev *scifdev = &scif_dev[node]; - struct scif_peer_dev *spdev; - - rcu_read_lock(); - spdev = rcu_dereference(scifdev->spdev); - rcu_read_unlock(); - if (spdev) - scif_peer_unregister_device(spdev); - else + + if (scif_peer_unregister_device(scifdev)) scif_send_acks(scifdev); } diff --git a/drivers/misc/mic/scif/scif_nodeqp.c b/drivers/misc/mic/scif/scif_nodeqp.c index 6dfdae3452d6..7180d566c74b 100644 --- a/drivers/misc/mic/scif/scif_nodeqp.c +++ b/drivers/misc/mic/scif/scif_nodeqp.c @@ -105,18 +105,22 @@ int scif_setup_qp_connect(struct scif_qp *qp, dma_addr_t *qp_offset, int local_size, struct scif_dev *scifdev) { - void *local_q = NULL; + void *local_q = qp->inbound_q.rb_base; int err = 0; u32 tmp_rd = 0; spin_lock_init(&qp->send_lock); spin_lock_init(&qp->recv_lock); - local_q = kzalloc(local_size, GFP_KERNEL); + /* Allocate rb only if not already allocated */ if (!local_q) { - err = -ENOMEM; - return err; + local_q = kzalloc(local_size, GFP_KERNEL); + if (!local_q) { + err = -ENOMEM; + return err; + } } + err = scif_map_single(&qp->local_buf, local_q, scifdev, local_size); if (err) goto kfree; @@ -260,6 +264,11 @@ int scif_setup_qp_connect_response(struct scif_dev *scifdev, r_buf, get_count_order(remote_size)); /* + * Because the node QP may already be processing an INIT message, set + * the read pointer so the cached read offset isn't lost + */ + qp->remote_qp->local_read = qp->inbound_q.current_read_offset; + /* * resetup the inbound_q now that we know where the * inbound_read really is. */ @@ -529,27 +538,6 @@ static void scif_p2p_setup(void) } } -void scif_qp_response_ack(struct work_struct *work) -{ - struct scif_dev *scifdev = container_of(work, struct scif_dev, - init_msg_work); - struct scif_peer_dev *spdev; - - /* Drop the INIT message if it has already been received */ - if (_scifdev_alive(scifdev)) - return; - - spdev = scif_peer_register_device(scifdev); - if (IS_ERR(spdev)) - return; - - if (scif_is_mgmt_node()) { - mutex_lock(&scif_info.conflock); - scif_p2p_setup(); - mutex_unlock(&scif_info.conflock); - } -} - static char *message_types[] = {"BAD", "INIT", "EXIT", @@ -568,7 +556,29 @@ static char *message_types[] = {"BAD", "DISCNT_ACK", "CLIENT_SENT", "CLIENT_RCVD", - "SCIF_GET_NODE_INFO"}; + "SCIF_GET_NODE_INFO", + "REGISTER", + "REGISTER_ACK", + "REGISTER_NACK", + "UNREGISTER", + "UNREGISTER_ACK", + "UNREGISTER_NACK", + "ALLOC_REQ", + "ALLOC_GNT", + "ALLOC_REJ", + "FREE_PHYS", + "FREE_VIRT", + "MUNMAP", + "MARK", + "MARK_ACK", + "MARK_NACK", + "WAIT", + "WAIT_ACK", + "WAIT_NACK", + "SIGNAL_LOCAL", + "SIGNAL_REMOTE", + "SIG_ACK", + "SIG_NACK"}; static void scif_display_message(struct scif_dev *scifdev, struct scifmsg *msg, @@ -662,10 +672,16 @@ int scif_nodeqp_send(struct scif_dev *scifdev, struct scifmsg *msg) * * Work queue handler for servicing miscellaneous SCIF tasks. * Examples include: - * 1) Cleanup of zombie endpoints. + * 1) Remote fence requests. + * 2) Destruction of temporary registered windows + * created during scif_vreadfrom()/scif_vwriteto(). + * 3) Cleanup of zombie endpoints. */ void scif_misc_handler(struct work_struct *work) { + scif_rma_handle_remote_fences(); + scif_rma_destroy_windows(); + scif_rma_destroy_tcw_invalid(); scif_cleanup_zombie_epd(); } @@ -682,13 +698,14 @@ scif_init(struct scif_dev *scifdev, struct scifmsg *msg) * address to complete initializing the inbound_q. */ flush_delayed_work(&scifdev->qp_dwork); - /* - * Delegate the peer device registration to a workqueue, otherwise if - * SCIF client probe (called during peer device registration) calls - * scif_connect(..), it will block the message processing thread causing - * a deadlock. - */ - schedule_work(&scifdev->init_msg_work); + + scif_peer_register_device(scifdev); + + if (scif_is_mgmt_node()) { + mutex_lock(&scif_info.conflock); + scif_p2p_setup(); + mutex_unlock(&scif_info.conflock); + } } /** @@ -838,13 +855,13 @@ void scif_poll_qp_state(struct work_struct *work) msecs_to_jiffies(SCIF_NODE_QP_TIMEOUT)); return; } - scif_peer_register_device(peerdev); return; timeout: dev_err(&peerdev->sdev->dev, "%s %d remote node %d offline, state = 0x%x\n", __func__, __LINE__, peerdev->node, qp->qp_state); qp->remote_qp->qp_state = SCIF_QP_OFFLINE; + scif_peer_unregister_device(peerdev); scif_cleanup_scifdev(peerdev); } @@ -894,6 +911,9 @@ scif_node_add_ack(struct scif_dev *scifdev, struct scifmsg *msg) goto local_error; peerdev->rdb = msg->payload[2]; qp->remote_qp->qp_state = SCIF_QP_ONLINE; + + scif_peer_register_device(peerdev); + schedule_delayed_work(&peerdev->p2p_dwork, 0); return; local_error: @@ -1007,6 +1027,27 @@ static void (*scif_intr_func[SCIF_MAX_MSG + 1]) scif_clientsend, /* SCIF_CLIENT_SENT */ scif_clientrcvd, /* SCIF_CLIENT_RCVD */ scif_get_node_info_resp,/* SCIF_GET_NODE_INFO */ + scif_recv_reg, /* SCIF_REGISTER */ + scif_recv_reg_ack, /* SCIF_REGISTER_ACK */ + scif_recv_reg_nack, /* SCIF_REGISTER_NACK */ + scif_recv_unreg, /* SCIF_UNREGISTER */ + scif_recv_unreg_ack, /* SCIF_UNREGISTER_ACK */ + scif_recv_unreg_nack, /* SCIF_UNREGISTER_NACK */ + scif_alloc_req, /* SCIF_ALLOC_REQ */ + scif_alloc_gnt_rej, /* SCIF_ALLOC_GNT */ + scif_alloc_gnt_rej, /* SCIF_ALLOC_REJ */ + scif_free_virt, /* SCIF_FREE_VIRT */ + scif_recv_munmap, /* SCIF_MUNMAP */ + scif_recv_mark, /* SCIF_MARK */ + scif_recv_mark_resp, /* SCIF_MARK_ACK */ + scif_recv_mark_resp, /* SCIF_MARK_NACK */ + scif_recv_wait, /* SCIF_WAIT */ + scif_recv_wait_resp, /* SCIF_WAIT_ACK */ + scif_recv_wait_resp, /* SCIF_WAIT_NACK */ + scif_recv_sig_local, /* SCIF_SIG_LOCAL */ + scif_recv_sig_remote, /* SCIF_SIG_REMOTE */ + scif_recv_sig_resp, /* SCIF_SIG_ACK */ + scif_recv_sig_resp, /* SCIF_SIG_NACK */ }; /** @@ -1169,7 +1210,6 @@ int scif_setup_loopback_qp(struct scif_dev *scifdev) int err = 0; void *local_q; struct scif_qp *qp; - struct scif_peer_dev *spdev; err = scif_setup_intr_wq(scifdev); if (err) @@ -1216,15 +1256,11 @@ int scif_setup_loopback_qp(struct scif_dev *scifdev) &qp->local_write, local_q, get_count_order(SCIF_NODE_QP_SIZE)); scif_info.nodeid = scifdev->node; - spdev = scif_peer_register_device(scifdev); - if (IS_ERR(spdev)) { - err = PTR_ERR(spdev); - goto free_local_q; - } + + scif_peer_register_device(scifdev); + scif_info.loopb_dev = scifdev; return err; -free_local_q: - kfree(local_q); free_qpairs: kfree(scifdev->qpairs); destroy_loopb_wq: @@ -1243,13 +1279,7 @@ exit: */ int scif_destroy_loopback_qp(struct scif_dev *scifdev) { - struct scif_peer_dev *spdev; - - rcu_read_lock(); - spdev = rcu_dereference(scifdev->spdev); - rcu_read_unlock(); - if (spdev) - scif_peer_unregister_device(spdev); + scif_peer_unregister_device(scifdev); destroy_workqueue(scif_info.loopb_wq); scif_destroy_intr_wq(scifdev); kfree(scifdev->qpairs->outbound_q.rb_base); diff --git a/drivers/misc/mic/scif/scif_nodeqp.h b/drivers/misc/mic/scif/scif_nodeqp.h index 6c0ed6783479..95896273138e 100644 --- a/drivers/misc/mic/scif/scif_nodeqp.h +++ b/drivers/misc/mic/scif/scif_nodeqp.h @@ -74,7 +74,28 @@ #define SCIF_CLIENT_SENT 16 /* Notify the peer that data has been written */ #define SCIF_CLIENT_RCVD 17 /* Notify the peer that data has been read */ #define SCIF_GET_NODE_INFO 18 /* Get current node mask from the mgmt node*/ -#define SCIF_MAX_MSG SCIF_GET_NODE_INFO +#define SCIF_REGISTER 19 /* Tell peer about a new registered window */ +#define SCIF_REGISTER_ACK 20 /* Notify peer about unregistration success */ +#define SCIF_REGISTER_NACK 21 /* Notify peer about registration success */ +#define SCIF_UNREGISTER 22 /* Tell peer about unregistering a window */ +#define SCIF_UNREGISTER_ACK 23 /* Notify peer about registration failure */ +#define SCIF_UNREGISTER_NACK 24 /* Notify peer about unregistration failure */ +#define SCIF_ALLOC_REQ 25 /* Request a mapped buffer */ +#define SCIF_ALLOC_GNT 26 /* Notify peer about allocation success */ +#define SCIF_ALLOC_REJ 27 /* Notify peer about allocation failure */ +#define SCIF_FREE_VIRT 28 /* Free previously allocated virtual memory */ +#define SCIF_MUNMAP 29 /* Acknowledgment for a SCIF_MMAP request */ +#define SCIF_MARK 30 /* SCIF Remote Fence Mark Request */ +#define SCIF_MARK_ACK 31 /* SCIF Remote Fence Mark Success */ +#define SCIF_MARK_NACK 32 /* SCIF Remote Fence Mark Failure */ +#define SCIF_WAIT 33 /* SCIF Remote Fence Wait Request */ +#define SCIF_WAIT_ACK 34 /* SCIF Remote Fence Wait Success */ +#define SCIF_WAIT_NACK 35 /* SCIF Remote Fence Wait Failure */ +#define SCIF_SIG_LOCAL 36 /* SCIF Remote Fence Local Signal Request */ +#define SCIF_SIG_REMOTE 37 /* SCIF Remote Fence Remote Signal Request */ +#define SCIF_SIG_ACK 38 /* SCIF Remote Fence Remote Signal Success */ +#define SCIF_SIG_NACK 39 /* SCIF Remote Fence Remote Signal Failure */ +#define SCIF_MAX_MSG SCIF_SIG_NACK /* * struct scifmsg - Node QP message format @@ -92,6 +113,24 @@ struct scifmsg { } __packed; /* + * struct scif_allocmsg - Used with SCIF_ALLOC_REQ to request + * the remote note to allocate memory + * + * phys_addr: Physical address of the buffer + * vaddr: Virtual address of the buffer + * size: Size of the buffer + * state: Current state + * allocwq: wait queue for status + */ +struct scif_allocmsg { + dma_addr_t phys_addr; + unsigned long vaddr; + size_t size; + enum scif_msg_state state; + wait_queue_head_t allocwq; +}; + +/* * struct scif_qp - Node Queue Pair * * Interesting structure -- a little difficult because we can only @@ -158,7 +197,6 @@ int scif_setup_qp_connect_response(struct scif_dev *scifdev, int scif_setup_loopback_qp(struct scif_dev *scifdev); int scif_destroy_loopback_qp(struct scif_dev *scifdev); void scif_poll_qp_state(struct work_struct *work); -void scif_qp_response_ack(struct work_struct *work); void scif_destroy_p2p(struct scif_dev *scifdev); void scif_send_exit(struct scif_dev *scifdev); static inline struct device *scif_get_peer_dev(struct scif_dev *scifdev) diff --git a/drivers/misc/mic/scif/scif_peer_bus.c b/drivers/misc/mic/scif/scif_peer_bus.c index 589ae9ad2501..6ffa3bdbd45b 100644 --- a/drivers/misc/mic/scif/scif_peer_bus.c +++ b/drivers/misc/mic/scif/scif_peer_bus.c @@ -24,93 +24,152 @@ dev_to_scif_peer(struct device *dev) return container_of(dev, struct scif_peer_dev, dev); } -static inline struct scif_peer_driver * -drv_to_scif_peer(struct device_driver *drv) -{ - return container_of(drv, struct scif_peer_driver, driver); -} +struct bus_type scif_peer_bus = { + .name = "scif_peer_bus", +}; -static int scif_peer_dev_match(struct device *dv, struct device_driver *dr) +static void scif_peer_release_dev(struct device *d) { - return !strncmp(dev_name(dv), dr->name, 4); + struct scif_peer_dev *sdev = dev_to_scif_peer(d); + struct scif_dev *scifdev = &scif_dev[sdev->dnode]; + + scif_cleanup_scifdev(scifdev); + kfree(sdev); } -static int scif_peer_dev_probe(struct device *d) +static int scif_peer_initialize_device(struct scif_dev *scifdev) { - struct scif_peer_dev *dev = dev_to_scif_peer(d); - struct scif_peer_driver *drv = drv_to_scif_peer(dev->dev.driver); + struct scif_peer_dev *spdev; + int ret; - return drv->probe(dev); -} + spdev = kzalloc(sizeof(*spdev), GFP_KERNEL); + if (!spdev) { + ret = -ENOMEM; + goto err; + } -static int scif_peer_dev_remove(struct device *d) -{ - struct scif_peer_dev *dev = dev_to_scif_peer(d); - struct scif_peer_driver *drv = drv_to_scif_peer(dev->dev.driver); + spdev->dev.parent = scifdev->sdev->dev.parent; + spdev->dev.release = scif_peer_release_dev; + spdev->dnode = scifdev->node; + spdev->dev.bus = &scif_peer_bus; + dev_set_name(&spdev->dev, "scif_peer-dev%u", spdev->dnode); + + device_initialize(&spdev->dev); + get_device(&spdev->dev); + rcu_assign_pointer(scifdev->spdev, spdev); - drv->remove(dev); + mutex_lock(&scif_info.conflock); + scif_info.total++; + scif_info.maxid = max_t(u32, spdev->dnode, scif_info.maxid); + mutex_unlock(&scif_info.conflock); return 0; +err: + dev_err(&scifdev->sdev->dev, + "dnode %d: initialize_device rc %d\n", scifdev->node, ret); + return ret; } -static struct bus_type scif_peer_bus = { - .name = "scif_peer_bus", - .match = scif_peer_dev_match, - .probe = scif_peer_dev_probe, - .remove = scif_peer_dev_remove, -}; - -int scif_peer_register_driver(struct scif_peer_driver *driver) +static int scif_peer_add_device(struct scif_dev *scifdev) { - driver->driver.bus = &scif_peer_bus; - return driver_register(&driver->driver); + struct scif_peer_dev *spdev = rcu_dereference(scifdev->spdev); + char pool_name[16]; + int ret; + + ret = device_add(&spdev->dev); + put_device(&spdev->dev); + if (ret) { + dev_err(&scifdev->sdev->dev, + "dnode %d: peer device_add failed\n", scifdev->node); + goto put_spdev; + } + + scnprintf(pool_name, sizeof(pool_name), "scif-%d", spdev->dnode); + scifdev->signal_pool = dmam_pool_create(pool_name, &scifdev->sdev->dev, + sizeof(struct scif_status), 1, + 0); + if (!scifdev->signal_pool) { + dev_err(&scifdev->sdev->dev, + "dnode %d: dmam_pool_create failed\n", scifdev->node); + ret = -ENOMEM; + goto del_spdev; + } + dev_dbg(&spdev->dev, "Added peer dnode %d\n", spdev->dnode); + return 0; +del_spdev: + device_del(&spdev->dev); +put_spdev: + RCU_INIT_POINTER(scifdev->spdev, NULL); + synchronize_rcu(); + put_device(&spdev->dev); + + mutex_lock(&scif_info.conflock); + scif_info.total--; + mutex_unlock(&scif_info.conflock); + return ret; } -void scif_peer_unregister_driver(struct scif_peer_driver *driver) +void scif_add_peer_device(struct work_struct *work) { - driver_unregister(&driver->driver); + struct scif_dev *scifdev = container_of(work, struct scif_dev, + peer_add_work); + + scif_peer_add_device(scifdev); } -static void scif_peer_release_dev(struct device *d) +/* + * Peer device registration is split into a device_initialize and a device_add. + * The reason for doing this is as follows: First, peer device registration + * itself cannot be done in the message processing thread and must be delegated + * to another workqueue, otherwise if SCIF client probe, called during peer + * device registration, calls scif_connect(..), it will block the message + * processing thread causing a deadlock. Next, device_initialize is done in the + * "top-half" message processing thread and device_add in the "bottom-half" + * workqueue. If this is not done, SCIF_CNCT_REQ message processing executing + * concurrently with SCIF_INIT message processing is unable to get a reference + * on the peer device, thereby failing the connect request. + */ +void scif_peer_register_device(struct scif_dev *scifdev) { - struct scif_peer_dev *sdev = dev_to_scif_peer(d); - struct scif_dev *scifdev = &scif_dev[sdev->dnode]; + int ret; - scif_cleanup_scifdev(scifdev); - kfree(sdev); + mutex_lock(&scifdev->lock); + ret = scif_peer_initialize_device(scifdev); + if (ret) + goto exit; + schedule_work(&scifdev->peer_add_work); +exit: + mutex_unlock(&scifdev->lock); } -struct scif_peer_dev * -scif_peer_register_device(struct scif_dev *scifdev) +int scif_peer_unregister_device(struct scif_dev *scifdev) { - int ret; struct scif_peer_dev *spdev; - spdev = kzalloc(sizeof(*spdev), GFP_KERNEL); - if (!spdev) - return ERR_PTR(-ENOMEM); - - spdev->dev.parent = scifdev->sdev->dev.parent; - spdev->dev.release = scif_peer_release_dev; - spdev->dnode = scifdev->node; - spdev->dev.bus = &scif_peer_bus; + mutex_lock(&scifdev->lock); + /* Flush work to ensure device register is complete */ + flush_work(&scifdev->peer_add_work); - dev_set_name(&spdev->dev, "scif_peer-dev%u", spdev->dnode); /* - * device_register() causes the bus infrastructure to look for a - * matching driver. + * Continue holding scifdev->lock since theoretically unregister_device + * can be called simultaneously from multiple threads */ - ret = device_register(&spdev->dev); - if (ret) - goto free_spdev; - return spdev; -free_spdev: - kfree(spdev); - return ERR_PTR(ret); -} - -void scif_peer_unregister_device(struct scif_peer_dev *sdev) -{ - device_unregister(&sdev->dev); + spdev = rcu_dereference(scifdev->spdev); + if (!spdev) { + mutex_unlock(&scifdev->lock); + return -ENODEV; + } + + RCU_INIT_POINTER(scifdev->spdev, NULL); + synchronize_rcu(); + mutex_unlock(&scifdev->lock); + + dev_dbg(&spdev->dev, "Removing peer dnode %d\n", spdev->dnode); + device_unregister(&spdev->dev); + + mutex_lock(&scif_info.conflock); + scif_info.total--; + mutex_unlock(&scif_info.conflock); + return 0; } int scif_peer_bus_init(void) diff --git a/drivers/misc/mic/scif/scif_peer_bus.h b/drivers/misc/mic/scif/scif_peer_bus.h index 33f0dbb30152..a3b8dd2edaa5 100644 --- a/drivers/misc/mic/scif/scif_peer_bus.h +++ b/drivers/misc/mic/scif/scif_peer_bus.h @@ -19,47 +19,13 @@ #include <linux/device.h> #include <linux/mic_common.h> - -/* - * Peer devices show up as PCIe devices for the mgmt node but not the cards. - * The mgmt node discovers all the cards on the PCIe bus and informs the other - * cards about their peers. Upon notification of a peer a node adds a peer - * device to the peer bus to maintain symmetry in the way devices are - * discovered across all nodes in the SCIF network. - */ -/** - * scif_peer_dev - representation of a peer SCIF device - * @dev: underlying device - * @dnode - The destination node which this device will communicate with. - */ -struct scif_peer_dev { - struct device dev; - u8 dnode; -}; - -/** - * scif_peer_driver - operations for a scif_peer I/O driver - * @driver: underlying device driver (populate name and owner). - * @id_table: the ids serviced by this driver. - * @probe: the function to call when a device is found. Returns 0 or -errno. - * @remove: the function to call when a device is removed. - */ -struct scif_peer_driver { - struct device_driver driver; - const struct scif_peer_dev_id *id_table; - - int (*probe)(struct scif_peer_dev *dev); - void (*remove)(struct scif_peer_dev *dev); -}; +#include <linux/scif.h> struct scif_dev; -int scif_peer_register_driver(struct scif_peer_driver *driver); -void scif_peer_unregister_driver(struct scif_peer_driver *driver); - -struct scif_peer_dev *scif_peer_register_device(struct scif_dev *sdev); -void scif_peer_unregister_device(struct scif_peer_dev *sdev); - +void scif_add_peer_device(struct work_struct *work); +void scif_peer_register_device(struct scif_dev *sdev); +int scif_peer_unregister_device(struct scif_dev *scifdev); int scif_peer_bus_init(void); void scif_peer_bus_exit(void); #endif /* _SCIF_PEER_BUS_H */ diff --git a/drivers/misc/mic/scif/scif_rma.c b/drivers/misc/mic/scif/scif_rma.c new file mode 100644 index 000000000000..e2889967036a --- /dev/null +++ b/drivers/misc/mic/scif/scif_rma.c @@ -0,0 +1,1770 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Intel SCIF driver. + * + */ +#include <linux/dma_remapping.h> +#include <linux/pagemap.h> +#include "scif_main.h" +#include "scif_map.h" + +/* Used to skip ulimit checks for registrations with SCIF_MAP_KERNEL flag */ +#define SCIF_MAP_ULIMIT 0x40 + +bool scif_ulimit_check = 1; + +/** + * scif_rma_ep_init: + * @ep: end point + * + * Initialize RMA per EP data structures. + */ +void scif_rma_ep_init(struct scif_endpt *ep) +{ + struct scif_endpt_rma_info *rma = &ep->rma_info; + + mutex_init(&rma->rma_lock); + init_iova_domain(&rma->iovad, PAGE_SIZE, SCIF_IOVA_START_PFN, + SCIF_DMA_64BIT_PFN); + spin_lock_init(&rma->tc_lock); + mutex_init(&rma->mmn_lock); + INIT_LIST_HEAD(&rma->reg_list); + INIT_LIST_HEAD(&rma->remote_reg_list); + atomic_set(&rma->tw_refcount, 0); + atomic_set(&rma->tcw_refcount, 0); + atomic_set(&rma->tcw_total_pages, 0); + atomic_set(&rma->fence_refcount, 0); + + rma->async_list_del = 0; + rma->dma_chan = NULL; + INIT_LIST_HEAD(&rma->mmn_list); + INIT_LIST_HEAD(&rma->vma_list); + init_waitqueue_head(&rma->markwq); +} + +/** + * scif_rma_ep_can_uninit: + * @ep: end point + * + * Returns 1 if an endpoint can be uninitialized and 0 otherwise. + */ +int scif_rma_ep_can_uninit(struct scif_endpt *ep) +{ + int ret = 0; + + mutex_lock(&ep->rma_info.rma_lock); + /* Destroy RMA Info only if both lists are empty */ + if (list_empty(&ep->rma_info.reg_list) && + list_empty(&ep->rma_info.remote_reg_list) && + list_empty(&ep->rma_info.mmn_list) && + !atomic_read(&ep->rma_info.tw_refcount) && + !atomic_read(&ep->rma_info.tcw_refcount) && + !atomic_read(&ep->rma_info.fence_refcount)) + ret = 1; + mutex_unlock(&ep->rma_info.rma_lock); + return ret; +} + +/** + * scif_create_pinned_pages: + * @nr_pages: number of pages in window + * @prot: read/write protection + * + * Allocate and prepare a set of pinned pages. + */ +static struct scif_pinned_pages * +scif_create_pinned_pages(int nr_pages, int prot) +{ + struct scif_pinned_pages *pin; + + might_sleep(); + pin = scif_zalloc(sizeof(*pin)); + if (!pin) + goto error; + + pin->pages = scif_zalloc(nr_pages * sizeof(*pin->pages)); + if (!pin->pages) + goto error_free_pinned_pages; + + pin->prot = prot; + pin->magic = SCIFEP_MAGIC; + return pin; + +error_free_pinned_pages: + scif_free(pin, sizeof(*pin)); +error: + return NULL; +} + +/** + * scif_destroy_pinned_pages: + * @pin: A set of pinned pages. + * + * Deallocate resources for pinned pages. + */ +static int scif_destroy_pinned_pages(struct scif_pinned_pages *pin) +{ + int j; + int writeable = pin->prot & SCIF_PROT_WRITE; + int kernel = SCIF_MAP_KERNEL & pin->map_flags; + + for (j = 0; j < pin->nr_pages; j++) { + if (pin->pages[j] && !kernel) { + if (writeable) + SetPageDirty(pin->pages[j]); + put_page(pin->pages[j]); + } + } + + scif_free(pin->pages, + pin->nr_pages * sizeof(*pin->pages)); + scif_free(pin, sizeof(*pin)); + return 0; +} + +/* + * scif_create_window: + * @ep: end point + * @nr_pages: number of pages + * @offset: registration offset + * @temp: true if a temporary window is being created + * + * Allocate and prepare a self registration window. + */ +struct scif_window *scif_create_window(struct scif_endpt *ep, int nr_pages, + s64 offset, bool temp) +{ + struct scif_window *window; + + might_sleep(); + window = scif_zalloc(sizeof(*window)); + if (!window) + goto error; + + window->dma_addr = scif_zalloc(nr_pages * sizeof(*window->dma_addr)); + if (!window->dma_addr) + goto error_free_window; + + window->num_pages = scif_zalloc(nr_pages * sizeof(*window->num_pages)); + if (!window->num_pages) + goto error_free_window; + + window->offset = offset; + window->ep = (u64)ep; + window->magic = SCIFEP_MAGIC; + window->reg_state = OP_IDLE; + init_waitqueue_head(&window->regwq); + window->unreg_state = OP_IDLE; + init_waitqueue_head(&window->unregwq); + INIT_LIST_HEAD(&window->list); + window->type = SCIF_WINDOW_SELF; + window->temp = temp; + return window; + +error_free_window: + scif_free(window->dma_addr, + nr_pages * sizeof(*window->dma_addr)); + scif_free(window, sizeof(*window)); +error: + return NULL; +} + +/** + * scif_destroy_incomplete_window: + * @ep: end point + * @window: registration window + * + * Deallocate resources for self window. + */ +static void scif_destroy_incomplete_window(struct scif_endpt *ep, + struct scif_window *window) +{ + int err; + int nr_pages = window->nr_pages; + struct scif_allocmsg *alloc = &window->alloc_handle; + struct scifmsg msg; + +retry: + /* Wait for a SCIF_ALLOC_GNT/REJ message */ + err = wait_event_timeout(alloc->allocwq, + alloc->state != OP_IN_PROGRESS, + SCIF_NODE_ALIVE_TIMEOUT); + if (!err && scifdev_alive(ep)) + goto retry; + + mutex_lock(&ep->rma_info.rma_lock); + if (alloc->state == OP_COMPLETED) { + msg.uop = SCIF_FREE_VIRT; + msg.src = ep->port; + msg.payload[0] = ep->remote_ep; + msg.payload[1] = window->alloc_handle.vaddr; + msg.payload[2] = (u64)window; + msg.payload[3] = SCIF_REGISTER; + _scif_nodeqp_send(ep->remote_dev, &msg); + } + mutex_unlock(&ep->rma_info.rma_lock); + + scif_free_window_offset(ep, window, window->offset); + scif_free(window->dma_addr, nr_pages * sizeof(*window->dma_addr)); + scif_free(window->num_pages, nr_pages * sizeof(*window->num_pages)); + scif_free(window, sizeof(*window)); +} + +/** + * scif_unmap_window: + * @remote_dev: SCIF remote device + * @window: registration window + * + * Delete any DMA mappings created for a registered self window + */ +void scif_unmap_window(struct scif_dev *remote_dev, struct scif_window *window) +{ + int j; + + if (scif_is_iommu_enabled() && !scifdev_self(remote_dev)) { + if (window->st) { + dma_unmap_sg(&remote_dev->sdev->dev, + window->st->sgl, window->st->nents, + DMA_BIDIRECTIONAL); + sg_free_table(window->st); + kfree(window->st); + window->st = NULL; + } + } else { + for (j = 0; j < window->nr_contig_chunks; j++) { + if (window->dma_addr[j]) { + scif_unmap_single(window->dma_addr[j], + remote_dev, + window->num_pages[j] << + PAGE_SHIFT); + window->dma_addr[j] = 0x0; + } + } + } +} + +static inline struct mm_struct *__scif_acquire_mm(void) +{ + if (scif_ulimit_check) + return get_task_mm(current); + return NULL; +} + +static inline void __scif_release_mm(struct mm_struct *mm) +{ + if (mm) + mmput(mm); +} + +static inline int +__scif_dec_pinned_vm_lock(struct mm_struct *mm, + int nr_pages, bool try_lock) +{ + if (!mm || !nr_pages || !scif_ulimit_check) + return 0; + if (try_lock) { + if (!down_write_trylock(&mm->mmap_sem)) { + dev_err(scif_info.mdev.this_device, + "%s %d err\n", __func__, __LINE__); + return -1; + } + } else { + down_write(&mm->mmap_sem); + } + mm->pinned_vm -= nr_pages; + up_write(&mm->mmap_sem); + return 0; +} + +static inline int __scif_check_inc_pinned_vm(struct mm_struct *mm, + int nr_pages) +{ + unsigned long locked, lock_limit; + + if (!mm || !nr_pages || !scif_ulimit_check) + return 0; + + locked = nr_pages; + locked += mm->pinned_vm; + lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; + if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) { + dev_err(scif_info.mdev.this_device, + "locked(%lu) > lock_limit(%lu)\n", + locked, lock_limit); + return -ENOMEM; + } + mm->pinned_vm = locked; + return 0; +} + +/** + * scif_destroy_window: + * @ep: end point + * @window: registration window + * + * Deallocate resources for self window. + */ +int scif_destroy_window(struct scif_endpt *ep, struct scif_window *window) +{ + int j; + struct scif_pinned_pages *pinned_pages = window->pinned_pages; + int nr_pages = window->nr_pages; + + might_sleep(); + if (!window->temp && window->mm) { + __scif_dec_pinned_vm_lock(window->mm, window->nr_pages, 0); + __scif_release_mm(window->mm); + window->mm = NULL; + } + + scif_free_window_offset(ep, window, window->offset); + scif_unmap_window(ep->remote_dev, window); + /* + * Decrement references for this set of pinned pages from + * this window. + */ + j = atomic_sub_return(1, &pinned_pages->ref_count); + if (j < 0) + dev_err(scif_info.mdev.this_device, + "%s %d incorrect ref count %d\n", + __func__, __LINE__, j); + /* + * If the ref count for pinned_pages is zero then someone + * has already called scif_unpin_pages() for it and we should + * destroy the page cache. + */ + if (!j) + scif_destroy_pinned_pages(window->pinned_pages); + scif_free(window->dma_addr, nr_pages * sizeof(*window->dma_addr)); + scif_free(window->num_pages, nr_pages * sizeof(*window->num_pages)); + window->magic = 0; + scif_free(window, sizeof(*window)); + return 0; +} + +/** + * scif_create_remote_lookup: + * @remote_dev: SCIF remote device + * @window: remote window + * + * Allocate and prepare lookup entries for the remote + * end to copy over the physical addresses. + * Returns 0 on success and appropriate errno on failure. + */ +static int scif_create_remote_lookup(struct scif_dev *remote_dev, + struct scif_window *window) +{ + int i, j, err = 0; + int nr_pages = window->nr_pages; + bool vmalloc_dma_phys, vmalloc_num_pages; + + might_sleep(); + /* Map window */ + err = scif_map_single(&window->mapped_offset, + window, remote_dev, sizeof(*window)); + if (err) + goto error_window; + + /* Compute the number of lookup entries. 21 == 2MB Shift */ + window->nr_lookup = ALIGN(nr_pages * PAGE_SIZE, + ((2) * 1024 * 1024)) >> 21; + + window->dma_addr_lookup.lookup = + scif_alloc_coherent(&window->dma_addr_lookup.offset, + remote_dev, window->nr_lookup * + sizeof(*window->dma_addr_lookup.lookup), + GFP_KERNEL | __GFP_ZERO); + if (!window->dma_addr_lookup.lookup) + goto error_window; + + window->num_pages_lookup.lookup = + scif_alloc_coherent(&window->num_pages_lookup.offset, + remote_dev, window->nr_lookup * + sizeof(*window->num_pages_lookup.lookup), + GFP_KERNEL | __GFP_ZERO); + if (!window->num_pages_lookup.lookup) + goto error_window; + + vmalloc_dma_phys = is_vmalloc_addr(&window->dma_addr[0]); + vmalloc_num_pages = is_vmalloc_addr(&window->num_pages[0]); + + /* Now map each of the pages containing physical addresses */ + for (i = 0, j = 0; i < nr_pages; i += SCIF_NR_ADDR_IN_PAGE, j++) { + err = scif_map_page(&window->dma_addr_lookup.lookup[j], + vmalloc_dma_phys ? + vmalloc_to_page(&window->dma_addr[i]) : + virt_to_page(&window->dma_addr[i]), + remote_dev); + if (err) + goto error_window; + err = scif_map_page(&window->num_pages_lookup.lookup[j], + vmalloc_dma_phys ? + vmalloc_to_page(&window->num_pages[i]) : + virt_to_page(&window->num_pages[i]), + remote_dev); + if (err) + goto error_window; + } + return 0; +error_window: + return err; +} + +/** + * scif_destroy_remote_lookup: + * @remote_dev: SCIF remote device + * @window: remote window + * + * Destroy lookup entries used for the remote + * end to copy over the physical addresses. + */ +static void scif_destroy_remote_lookup(struct scif_dev *remote_dev, + struct scif_window *window) +{ + int i, j; + + if (window->nr_lookup) { + struct scif_rma_lookup *lup = &window->dma_addr_lookup; + struct scif_rma_lookup *npup = &window->num_pages_lookup; + + for (i = 0, j = 0; i < window->nr_pages; + i += SCIF_NR_ADDR_IN_PAGE, j++) { + if (lup->lookup && lup->lookup[j]) + scif_unmap_single(lup->lookup[j], + remote_dev, + PAGE_SIZE); + if (npup->lookup && npup->lookup[j]) + scif_unmap_single(npup->lookup[j], + remote_dev, + PAGE_SIZE); + } + if (lup->lookup) + scif_free_coherent(lup->lookup, lup->offset, + remote_dev, window->nr_lookup * + sizeof(*lup->lookup)); + if (npup->lookup) + scif_free_coherent(npup->lookup, npup->offset, + remote_dev, window->nr_lookup * + sizeof(*npup->lookup)); + if (window->mapped_offset) + scif_unmap_single(window->mapped_offset, + remote_dev, sizeof(*window)); + window->nr_lookup = 0; + } +} + +/** + * scif_create_remote_window: + * @ep: end point + * @nr_pages: number of pages in window + * + * Allocate and prepare a remote registration window. + */ +static struct scif_window * +scif_create_remote_window(struct scif_dev *scifdev, int nr_pages) +{ + struct scif_window *window; + + might_sleep(); + window = scif_zalloc(sizeof(*window)); + if (!window) + goto error_ret; + + window->magic = SCIFEP_MAGIC; + window->nr_pages = nr_pages; + + window->dma_addr = scif_zalloc(nr_pages * sizeof(*window->dma_addr)); + if (!window->dma_addr) + goto error_window; + + window->num_pages = scif_zalloc(nr_pages * + sizeof(*window->num_pages)); + if (!window->num_pages) + goto error_window; + + if (scif_create_remote_lookup(scifdev, window)) + goto error_window; + + window->type = SCIF_WINDOW_PEER; + window->unreg_state = OP_IDLE; + INIT_LIST_HEAD(&window->list); + return window; +error_window: + scif_destroy_remote_window(window); +error_ret: + return NULL; +} + +/** + * scif_destroy_remote_window: + * @ep: end point + * @window: remote registration window + * + * Deallocate resources for remote window. + */ +void +scif_destroy_remote_window(struct scif_window *window) +{ + scif_free(window->dma_addr, window->nr_pages * + sizeof(*window->dma_addr)); + scif_free(window->num_pages, window->nr_pages * + sizeof(*window->num_pages)); + window->magic = 0; + scif_free(window, sizeof(*window)); +} + +/** + * scif_iommu_map: create DMA mappings if the IOMMU is enabled + * @remote_dev: SCIF remote device + * @window: remote registration window + * + * Map the physical pages using dma_map_sg(..) and then detect the number + * of contiguous DMA mappings allocated + */ +static int scif_iommu_map(struct scif_dev *remote_dev, + struct scif_window *window) +{ + struct scatterlist *sg; + int i, err; + scif_pinned_pages_t pin = window->pinned_pages; + + window->st = kzalloc(sizeof(*window->st), GFP_KERNEL); + if (!window->st) + return -ENOMEM; + + err = sg_alloc_table(window->st, window->nr_pages, GFP_KERNEL); + if (err) + return err; + + for_each_sg(window->st->sgl, sg, window->st->nents, i) + sg_set_page(sg, pin->pages[i], PAGE_SIZE, 0x0); + + err = dma_map_sg(&remote_dev->sdev->dev, window->st->sgl, + window->st->nents, DMA_BIDIRECTIONAL); + if (!err) + return -ENOMEM; + /* Detect contiguous ranges of DMA mappings */ + sg = window->st->sgl; + for (i = 0; sg; i++) { + dma_addr_t last_da; + + window->dma_addr[i] = sg_dma_address(sg); + window->num_pages[i] = sg_dma_len(sg) >> PAGE_SHIFT; + last_da = sg_dma_address(sg) + sg_dma_len(sg); + while ((sg = sg_next(sg)) && sg_dma_address(sg) == last_da) { + window->num_pages[i] += + (sg_dma_len(sg) >> PAGE_SHIFT); + last_da = window->dma_addr[i] + + sg_dma_len(sg); + } + window->nr_contig_chunks++; + } + return 0; +} + +/** + * scif_map_window: + * @remote_dev: SCIF remote device + * @window: self registration window + * + * Map pages of a window into the aperture/PCI. + * Also determine addresses required for DMA. + */ +int +scif_map_window(struct scif_dev *remote_dev, struct scif_window *window) +{ + int i, j, k, err = 0, nr_contig_pages; + scif_pinned_pages_t pin; + phys_addr_t phys_prev, phys_curr; + + might_sleep(); + + pin = window->pinned_pages; + + if (intel_iommu_enabled && !scifdev_self(remote_dev)) + return scif_iommu_map(remote_dev, window); + + for (i = 0, j = 0; i < window->nr_pages; i += nr_contig_pages, j++) { + phys_prev = page_to_phys(pin->pages[i]); + nr_contig_pages = 1; + + /* Detect physically contiguous chunks */ + for (k = i + 1; k < window->nr_pages; k++) { + phys_curr = page_to_phys(pin->pages[k]); + if (phys_curr != (phys_prev + PAGE_SIZE)) + break; + phys_prev = phys_curr; + nr_contig_pages++; + } + window->num_pages[j] = nr_contig_pages; + window->nr_contig_chunks++; + if (scif_is_mgmt_node()) { + /* + * Management node has to deal with SMPT on X100 and + * hence the DMA mapping is required + */ + err = scif_map_single(&window->dma_addr[j], + phys_to_virt(page_to_phys( + pin->pages[i])), + remote_dev, + nr_contig_pages << PAGE_SHIFT); + if (err) + return err; + } else { + window->dma_addr[j] = page_to_phys(pin->pages[i]); + } + } + return err; +} + +/** + * scif_send_scif_unregister: + * @ep: end point + * @window: self registration window + * + * Send a SCIF_UNREGISTER message. + */ +static int scif_send_scif_unregister(struct scif_endpt *ep, + struct scif_window *window) +{ + struct scifmsg msg; + + msg.uop = SCIF_UNREGISTER; + msg.src = ep->port; + msg.payload[0] = window->alloc_handle.vaddr; + msg.payload[1] = (u64)window; + return scif_nodeqp_send(ep->remote_dev, &msg); +} + +/** + * scif_unregister_window: + * @window: self registration window + * + * Send an unregistration request and wait for a response. + */ +int scif_unregister_window(struct scif_window *window) +{ + int err = 0; + struct scif_endpt *ep = (struct scif_endpt *)window->ep; + bool send_msg = false; + + might_sleep(); + switch (window->unreg_state) { + case OP_IDLE: + { + window->unreg_state = OP_IN_PROGRESS; + send_msg = true; + /* fall through */ + } + case OP_IN_PROGRESS: + { + scif_get_window(window, 1); + mutex_unlock(&ep->rma_info.rma_lock); + if (send_msg) { + err = scif_send_scif_unregister(ep, window); + if (err) { + window->unreg_state = OP_COMPLETED; + goto done; + } + } else { + /* Return ENXIO since unregistration is in progress */ + return -ENXIO; + } +retry: + /* Wait for a SCIF_UNREGISTER_(N)ACK message */ + err = wait_event_timeout(window->unregwq, + window->unreg_state != OP_IN_PROGRESS, + SCIF_NODE_ALIVE_TIMEOUT); + if (!err && scifdev_alive(ep)) + goto retry; + if (!err) { + err = -ENODEV; + window->unreg_state = OP_COMPLETED; + dev_err(scif_info.mdev.this_device, + "%s %d err %d\n", __func__, __LINE__, err); + } + if (err > 0) + err = 0; +done: + mutex_lock(&ep->rma_info.rma_lock); + scif_put_window(window, 1); + break; + } + case OP_FAILED: + { + if (!scifdev_alive(ep)) { + err = -ENODEV; + window->unreg_state = OP_COMPLETED; + } + break; + } + case OP_COMPLETED: + break; + default: + err = -ENODEV; + } + + if (window->unreg_state == OP_COMPLETED && window->ref_count) + scif_put_window(window, window->nr_pages); + + if (!window->ref_count) { + atomic_inc(&ep->rma_info.tw_refcount); + list_del_init(&window->list); + scif_free_window_offset(ep, window, window->offset); + mutex_unlock(&ep->rma_info.rma_lock); + if ((!!(window->pinned_pages->map_flags & SCIF_MAP_KERNEL)) && + scifdev_alive(ep)) { + scif_drain_dma_intr(ep->remote_dev->sdev, + ep->rma_info.dma_chan); + } else { + if (!__scif_dec_pinned_vm_lock(window->mm, + window->nr_pages, 1)) { + __scif_release_mm(window->mm); + window->mm = NULL; + } + } + scif_queue_for_cleanup(window, &scif_info.rma); + mutex_lock(&ep->rma_info.rma_lock); + } + return err; +} + +/** + * scif_send_alloc_request: + * @ep: end point + * @window: self registration window + * + * Send a remote window allocation request + */ +static int scif_send_alloc_request(struct scif_endpt *ep, + struct scif_window *window) +{ + struct scifmsg msg; + struct scif_allocmsg *alloc = &window->alloc_handle; + + /* Set up the Alloc Handle */ + alloc->state = OP_IN_PROGRESS; + init_waitqueue_head(&alloc->allocwq); + + /* Send out an allocation request */ + msg.uop = SCIF_ALLOC_REQ; + msg.payload[1] = window->nr_pages; + msg.payload[2] = (u64)&window->alloc_handle; + return _scif_nodeqp_send(ep->remote_dev, &msg); +} + +/** + * scif_prep_remote_window: + * @ep: end point + * @window: self registration window + * + * Send a remote window allocation request, wait for an allocation response, + * and prepares the remote window by copying over the page lists + */ +static int scif_prep_remote_window(struct scif_endpt *ep, + struct scif_window *window) +{ + struct scifmsg msg; + struct scif_window *remote_window; + struct scif_allocmsg *alloc = &window->alloc_handle; + dma_addr_t *dma_phys_lookup, *tmp, *num_pages_lookup, *tmp1; + int i = 0, j = 0; + int nr_contig_chunks, loop_nr_contig_chunks; + int remaining_nr_contig_chunks, nr_lookup; + int err, map_err; + + map_err = scif_map_window(ep->remote_dev, window); + if (map_err) + dev_err(&ep->remote_dev->sdev->dev, + "%s %d map_err %d\n", __func__, __LINE__, map_err); + remaining_nr_contig_chunks = window->nr_contig_chunks; + nr_contig_chunks = window->nr_contig_chunks; +retry: + /* Wait for a SCIF_ALLOC_GNT/REJ message */ + err = wait_event_timeout(alloc->allocwq, + alloc->state != OP_IN_PROGRESS, + SCIF_NODE_ALIVE_TIMEOUT); + mutex_lock(&ep->rma_info.rma_lock); + /* Synchronize with the thread waking up allocwq */ + mutex_unlock(&ep->rma_info.rma_lock); + if (!err && scifdev_alive(ep)) + goto retry; + + if (!err) + err = -ENODEV; + + if (err > 0) + err = 0; + else + return err; + + /* Bail out. The remote end rejected this request */ + if (alloc->state == OP_FAILED) + return -ENOMEM; + + if (map_err) { + dev_err(&ep->remote_dev->sdev->dev, + "%s %d err %d\n", __func__, __LINE__, map_err); + msg.uop = SCIF_FREE_VIRT; + msg.src = ep->port; + msg.payload[0] = ep->remote_ep; + msg.payload[1] = window->alloc_handle.vaddr; + msg.payload[2] = (u64)window; + msg.payload[3] = SCIF_REGISTER; + spin_lock(&ep->lock); + if (ep->state == SCIFEP_CONNECTED) + err = _scif_nodeqp_send(ep->remote_dev, &msg); + else + err = -ENOTCONN; + spin_unlock(&ep->lock); + return err; + } + + remote_window = scif_ioremap(alloc->phys_addr, sizeof(*window), + ep->remote_dev); + + /* Compute the number of lookup entries. 21 == 2MB Shift */ + nr_lookup = ALIGN(nr_contig_chunks, SCIF_NR_ADDR_IN_PAGE) + >> ilog2(SCIF_NR_ADDR_IN_PAGE); + + dma_phys_lookup = + scif_ioremap(remote_window->dma_addr_lookup.offset, + nr_lookup * + sizeof(*remote_window->dma_addr_lookup.lookup), + ep->remote_dev); + num_pages_lookup = + scif_ioremap(remote_window->num_pages_lookup.offset, + nr_lookup * + sizeof(*remote_window->num_pages_lookup.lookup), + ep->remote_dev); + + while (remaining_nr_contig_chunks) { + loop_nr_contig_chunks = min_t(int, remaining_nr_contig_chunks, + (int)SCIF_NR_ADDR_IN_PAGE); + /* #1/2 - Copy physical addresses over to the remote side */ + + /* #2/2 - Copy DMA addresses (addresses that are fed into the + * DMA engine) We transfer bus addresses which are then + * converted into a MIC physical address on the remote + * side if it is a MIC, if the remote node is a mgmt node we + * transfer the MIC physical address + */ + tmp = scif_ioremap(dma_phys_lookup[j], + loop_nr_contig_chunks * + sizeof(*window->dma_addr), + ep->remote_dev); + tmp1 = scif_ioremap(num_pages_lookup[j], + loop_nr_contig_chunks * + sizeof(*window->num_pages), + ep->remote_dev); + if (scif_is_mgmt_node()) { + memcpy_toio((void __force __iomem *)tmp, + &window->dma_addr[i], loop_nr_contig_chunks + * sizeof(*window->dma_addr)); + memcpy_toio((void __force __iomem *)tmp1, + &window->num_pages[i], loop_nr_contig_chunks + * sizeof(*window->num_pages)); + } else { + if (scifdev_is_p2p(ep->remote_dev)) { + /* + * add remote node's base address for this node + * to convert it into a MIC address + */ + int m; + dma_addr_t dma_addr; + + for (m = 0; m < loop_nr_contig_chunks; m++) { + dma_addr = window->dma_addr[i + m] + + ep->remote_dev->base_addr; + writeq(dma_addr, + (void __force __iomem *)&tmp[m]); + } + memcpy_toio((void __force __iomem *)tmp1, + &window->num_pages[i], + loop_nr_contig_chunks + * sizeof(*window->num_pages)); + } else { + /* Mgmt node or loopback - transfer DMA + * addresses as is, this is the same as a + * MIC physical address (we use the dma_addr + * and not the phys_addr array since the + * phys_addr is only setup if there is a mmap() + * request from the mgmt node) + */ + memcpy_toio((void __force __iomem *)tmp, + &window->dma_addr[i], + loop_nr_contig_chunks * + sizeof(*window->dma_addr)); + memcpy_toio((void __force __iomem *)tmp1, + &window->num_pages[i], + loop_nr_contig_chunks * + sizeof(*window->num_pages)); + } + } + remaining_nr_contig_chunks -= loop_nr_contig_chunks; + i += loop_nr_contig_chunks; + j++; + scif_iounmap(tmp, loop_nr_contig_chunks * + sizeof(*window->dma_addr), ep->remote_dev); + scif_iounmap(tmp1, loop_nr_contig_chunks * + sizeof(*window->num_pages), ep->remote_dev); + } + + /* Prepare the remote window for the peer */ + remote_window->peer_window = (u64)window; + remote_window->offset = window->offset; + remote_window->prot = window->prot; + remote_window->nr_contig_chunks = nr_contig_chunks; + remote_window->ep = ep->remote_ep; + scif_iounmap(num_pages_lookup, + nr_lookup * + sizeof(*remote_window->num_pages_lookup.lookup), + ep->remote_dev); + scif_iounmap(dma_phys_lookup, + nr_lookup * + sizeof(*remote_window->dma_addr_lookup.lookup), + ep->remote_dev); + scif_iounmap(remote_window, sizeof(*remote_window), ep->remote_dev); + window->peer_window = alloc->vaddr; + return err; +} + +/** + * scif_send_scif_register: + * @ep: end point + * @window: self registration window + * + * Send a SCIF_REGISTER message if EP is connected and wait for a + * SCIF_REGISTER_(N)ACK message else send a SCIF_FREE_VIRT + * message so that the peer can free its remote window allocated earlier. + */ +static int scif_send_scif_register(struct scif_endpt *ep, + struct scif_window *window) +{ + int err = 0; + struct scifmsg msg; + + msg.src = ep->port; + msg.payload[0] = ep->remote_ep; + msg.payload[1] = window->alloc_handle.vaddr; + msg.payload[2] = (u64)window; + spin_lock(&ep->lock); + if (ep->state == SCIFEP_CONNECTED) { + msg.uop = SCIF_REGISTER; + window->reg_state = OP_IN_PROGRESS; + err = _scif_nodeqp_send(ep->remote_dev, &msg); + spin_unlock(&ep->lock); + if (!err) { +retry: + /* Wait for a SCIF_REGISTER_(N)ACK message */ + err = wait_event_timeout(window->regwq, + window->reg_state != + OP_IN_PROGRESS, + SCIF_NODE_ALIVE_TIMEOUT); + if (!err && scifdev_alive(ep)) + goto retry; + err = !err ? -ENODEV : 0; + if (window->reg_state == OP_FAILED) + err = -ENOTCONN; + } + } else { + msg.uop = SCIF_FREE_VIRT; + msg.payload[3] = SCIF_REGISTER; + err = _scif_nodeqp_send(ep->remote_dev, &msg); + spin_unlock(&ep->lock); + if (!err) + err = -ENOTCONN; + } + return err; +} + +/** + * scif_get_window_offset: + * @ep: end point descriptor + * @flags: flags + * @offset: offset hint + * @num_pages: number of pages + * @out_offset: computed offset returned by reference. + * + * Compute/Claim a new offset for this EP. + */ +int scif_get_window_offset(struct scif_endpt *ep, int flags, s64 offset, + int num_pages, s64 *out_offset) +{ + s64 page_index; + struct iova *iova_ptr; + int err = 0; + + if (flags & SCIF_MAP_FIXED) { + page_index = SCIF_IOVA_PFN(offset); + iova_ptr = reserve_iova(&ep->rma_info.iovad, page_index, + page_index + num_pages - 1); + if (!iova_ptr) + err = -EADDRINUSE; + } else { + iova_ptr = alloc_iova(&ep->rma_info.iovad, num_pages, + SCIF_DMA_63BIT_PFN - 1, 0); + if (!iova_ptr) + err = -ENOMEM; + } + if (!err) + *out_offset = (iova_ptr->pfn_lo) << PAGE_SHIFT; + return err; +} + +/** + * scif_free_window_offset: + * @ep: end point descriptor + * @window: registration window + * @offset: Offset to be freed + * + * Free offset for this EP. The callee is supposed to grab + * the RMA mutex before calling this API. + */ +void scif_free_window_offset(struct scif_endpt *ep, + struct scif_window *window, s64 offset) +{ + if ((window && !window->offset_freed) || !window) { + free_iova(&ep->rma_info.iovad, offset >> PAGE_SHIFT); + if (window) + window->offset_freed = true; + } +} + +/** + * scif_alloc_req: Respond to SCIF_ALLOC_REQ interrupt message + * @msg: Interrupt message + * + * Remote side is requesting a memory allocation. + */ +void scif_alloc_req(struct scif_dev *scifdev, struct scifmsg *msg) +{ + int err; + struct scif_window *window = NULL; + int nr_pages = msg->payload[1]; + + window = scif_create_remote_window(scifdev, nr_pages); + if (!window) { + err = -ENOMEM; + goto error; + } + + /* The peer's allocation request is granted */ + msg->uop = SCIF_ALLOC_GNT; + msg->payload[0] = (u64)window; + msg->payload[1] = window->mapped_offset; + err = scif_nodeqp_send(scifdev, msg); + if (err) + scif_destroy_remote_window(window); + return; +error: + /* The peer's allocation request is rejected */ + dev_err(&scifdev->sdev->dev, + "%s %d error %d alloc_ptr %p nr_pages 0x%x\n", + __func__, __LINE__, err, window, nr_pages); + msg->uop = SCIF_ALLOC_REJ; + scif_nodeqp_send(scifdev, msg); +} + +/** + * scif_alloc_gnt_rej: Respond to SCIF_ALLOC_GNT/REJ interrupt message + * @msg: Interrupt message + * + * Remote side responded to a memory allocation. + */ +void scif_alloc_gnt_rej(struct scif_dev *scifdev, struct scifmsg *msg) +{ + struct scif_allocmsg *handle = (struct scif_allocmsg *)msg->payload[2]; + struct scif_window *window = container_of(handle, struct scif_window, + alloc_handle); + struct scif_endpt *ep = (struct scif_endpt *)window->ep; + + mutex_lock(&ep->rma_info.rma_lock); + handle->vaddr = msg->payload[0]; + handle->phys_addr = msg->payload[1]; + if (msg->uop == SCIF_ALLOC_GNT) + handle->state = OP_COMPLETED; + else + handle->state = OP_FAILED; + wake_up(&handle->allocwq); + mutex_unlock(&ep->rma_info.rma_lock); +} + +/** + * scif_free_virt: Respond to SCIF_FREE_VIRT interrupt message + * @msg: Interrupt message + * + * Free up memory kmalloc'd earlier. + */ +void scif_free_virt(struct scif_dev *scifdev, struct scifmsg *msg) +{ + struct scif_window *window = (struct scif_window *)msg->payload[1]; + + scif_destroy_remote_window(window); +} + +static void +scif_fixup_aper_base(struct scif_dev *dev, struct scif_window *window) +{ + int j; + struct scif_hw_dev *sdev = dev->sdev; + phys_addr_t apt_base = 0; + + /* + * Add the aperture base if the DMA address is not card relative + * since the DMA addresses need to be an offset into the bar + */ + if (!scifdev_self(dev) && window->type == SCIF_WINDOW_PEER && + sdev->aper && !sdev->card_rel_da) + apt_base = sdev->aper->pa; + else + return; + + for (j = 0; j < window->nr_contig_chunks; j++) { + if (window->num_pages[j]) + window->dma_addr[j] += apt_base; + else + break; + } +} + +/** + * scif_recv_reg: Respond to SCIF_REGISTER interrupt message + * @msg: Interrupt message + * + * Update remote window list with a new registered window. + */ +void scif_recv_reg(struct scif_dev *scifdev, struct scifmsg *msg) +{ + struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0]; + struct scif_window *window = + (struct scif_window *)msg->payload[1]; + + mutex_lock(&ep->rma_info.rma_lock); + spin_lock(&ep->lock); + if (ep->state == SCIFEP_CONNECTED) { + msg->uop = SCIF_REGISTER_ACK; + scif_nodeqp_send(ep->remote_dev, msg); + scif_fixup_aper_base(ep->remote_dev, window); + /* No further failures expected. Insert new window */ + scif_insert_window(window, &ep->rma_info.remote_reg_list); + } else { + msg->uop = SCIF_REGISTER_NACK; + scif_nodeqp_send(ep->remote_dev, msg); + } + spin_unlock(&ep->lock); + mutex_unlock(&ep->rma_info.rma_lock); + /* free up any lookup resources now that page lists are transferred */ + scif_destroy_remote_lookup(ep->remote_dev, window); + /* + * We could not insert the window but we need to + * destroy the window. + */ + if (msg->uop == SCIF_REGISTER_NACK) + scif_destroy_remote_window(window); +} + +/** + * scif_recv_unreg: Respond to SCIF_UNREGISTER interrupt message + * @msg: Interrupt message + * + * Remove window from remote registration list; + */ +void scif_recv_unreg(struct scif_dev *scifdev, struct scifmsg *msg) +{ + struct scif_rma_req req; + struct scif_window *window = NULL; + struct scif_window *recv_window = + (struct scif_window *)msg->payload[0]; + struct scif_endpt *ep; + int del_window = 0; + + ep = (struct scif_endpt *)recv_window->ep; + req.out_window = &window; + req.offset = recv_window->offset; + req.prot = 0; + req.nr_bytes = recv_window->nr_pages << PAGE_SHIFT; + req.type = SCIF_WINDOW_FULL; + req.head = &ep->rma_info.remote_reg_list; + msg->payload[0] = ep->remote_ep; + + mutex_lock(&ep->rma_info.rma_lock); + /* Does a valid window exist? */ + if (scif_query_window(&req)) { + dev_err(&scifdev->sdev->dev, + "%s %d -ENXIO\n", __func__, __LINE__); + msg->uop = SCIF_UNREGISTER_ACK; + goto error; + } + if (window) { + if (window->ref_count) + scif_put_window(window, window->nr_pages); + else + dev_err(&scifdev->sdev->dev, + "%s %d ref count should be +ve\n", + __func__, __LINE__); + window->unreg_state = OP_COMPLETED; + if (!window->ref_count) { + msg->uop = SCIF_UNREGISTER_ACK; + atomic_inc(&ep->rma_info.tw_refcount); + ep->rma_info.async_list_del = 1; + list_del_init(&window->list); + del_window = 1; + } else { + /* NACK! There are valid references to this window */ + msg->uop = SCIF_UNREGISTER_NACK; + } + } else { + /* The window did not make its way to the list at all. ACK */ + msg->uop = SCIF_UNREGISTER_ACK; + scif_destroy_remote_window(recv_window); + } +error: + mutex_unlock(&ep->rma_info.rma_lock); + if (del_window) + scif_drain_dma_intr(ep->remote_dev->sdev, + ep->rma_info.dma_chan); + scif_nodeqp_send(ep->remote_dev, msg); + if (del_window) + scif_queue_for_cleanup(window, &scif_info.rma); +} + +/** + * scif_recv_reg_ack: Respond to SCIF_REGISTER_ACK interrupt message + * @msg: Interrupt message + * + * Wake up the window waiting to complete registration. + */ +void scif_recv_reg_ack(struct scif_dev *scifdev, struct scifmsg *msg) +{ + struct scif_window *window = + (struct scif_window *)msg->payload[2]; + struct scif_endpt *ep = (struct scif_endpt *)window->ep; + + mutex_lock(&ep->rma_info.rma_lock); + window->reg_state = OP_COMPLETED; + wake_up(&window->regwq); + mutex_unlock(&ep->rma_info.rma_lock); +} + +/** + * scif_recv_reg_nack: Respond to SCIF_REGISTER_NACK interrupt message + * @msg: Interrupt message + * + * Wake up the window waiting to inform it that registration + * cannot be completed. + */ +void scif_recv_reg_nack(struct scif_dev *scifdev, struct scifmsg *msg) +{ + struct scif_window *window = + (struct scif_window *)msg->payload[2]; + struct scif_endpt *ep = (struct scif_endpt *)window->ep; + + mutex_lock(&ep->rma_info.rma_lock); + window->reg_state = OP_FAILED; + wake_up(&window->regwq); + mutex_unlock(&ep->rma_info.rma_lock); +} + +/** + * scif_recv_unreg_ack: Respond to SCIF_UNREGISTER_ACK interrupt message + * @msg: Interrupt message + * + * Wake up the window waiting to complete unregistration. + */ +void scif_recv_unreg_ack(struct scif_dev *scifdev, struct scifmsg *msg) +{ + struct scif_window *window = + (struct scif_window *)msg->payload[1]; + struct scif_endpt *ep = (struct scif_endpt *)window->ep; + + mutex_lock(&ep->rma_info.rma_lock); + window->unreg_state = OP_COMPLETED; + wake_up(&window->unregwq); + mutex_unlock(&ep->rma_info.rma_lock); +} + +/** + * scif_recv_unreg_nack: Respond to SCIF_UNREGISTER_NACK interrupt message + * @msg: Interrupt message + * + * Wake up the window waiting to inform it that unregistration + * cannot be completed immediately. + */ +void scif_recv_unreg_nack(struct scif_dev *scifdev, struct scifmsg *msg) +{ + struct scif_window *window = + (struct scif_window *)msg->payload[1]; + struct scif_endpt *ep = (struct scif_endpt *)window->ep; + + mutex_lock(&ep->rma_info.rma_lock); + window->unreg_state = OP_FAILED; + wake_up(&window->unregwq); + mutex_unlock(&ep->rma_info.rma_lock); +} + +int __scif_pin_pages(void *addr, size_t len, int *out_prot, + int map_flags, scif_pinned_pages_t *pages) +{ + struct scif_pinned_pages *pinned_pages; + int nr_pages, err = 0, i; + bool vmalloc_addr = false; + bool try_upgrade = false; + int prot = *out_prot; + int ulimit = 0; + struct mm_struct *mm = NULL; + + /* Unsupported flags */ + if (map_flags & ~(SCIF_MAP_KERNEL | SCIF_MAP_ULIMIT)) + return -EINVAL; + ulimit = !!(map_flags & SCIF_MAP_ULIMIT); + + /* Unsupported protection requested */ + if (prot & ~(SCIF_PROT_READ | SCIF_PROT_WRITE)) + return -EINVAL; + + /* addr/len must be page aligned. len should be non zero */ + if (!len || + (ALIGN((u64)addr, PAGE_SIZE) != (u64)addr) || + (ALIGN((u64)len, PAGE_SIZE) != (u64)len)) + return -EINVAL; + + might_sleep(); + + nr_pages = len >> PAGE_SHIFT; + + /* Allocate a set of pinned pages */ + pinned_pages = scif_create_pinned_pages(nr_pages, prot); + if (!pinned_pages) + return -ENOMEM; + + if (map_flags & SCIF_MAP_KERNEL) { + if (is_vmalloc_addr(addr)) + vmalloc_addr = true; + + for (i = 0; i < nr_pages; i++) { + if (vmalloc_addr) + pinned_pages->pages[i] = + vmalloc_to_page(addr + (i * PAGE_SIZE)); + else + pinned_pages->pages[i] = + virt_to_page(addr + (i * PAGE_SIZE)); + } + pinned_pages->nr_pages = nr_pages; + pinned_pages->map_flags = SCIF_MAP_KERNEL; + } else { + /* + * SCIF supports registration caching. If a registration has + * been requested with read only permissions, then we try + * to pin the pages with RW permissions so that a subsequent + * transfer with RW permission can hit the cache instead of + * invalidating it. If the upgrade fails with RW then we + * revert back to R permission and retry + */ + if (prot == SCIF_PROT_READ) + try_upgrade = true; + prot |= SCIF_PROT_WRITE; +retry: + mm = current->mm; + down_write(&mm->mmap_sem); + if (ulimit) { + err = __scif_check_inc_pinned_vm(mm, nr_pages); + if (err) { + up_write(&mm->mmap_sem); + pinned_pages->nr_pages = 0; + goto error_unmap; + } + } + + pinned_pages->nr_pages = get_user_pages( + current, + mm, + (u64)addr, + nr_pages, + !!(prot & SCIF_PROT_WRITE), + 0, + pinned_pages->pages, + NULL); + up_write(&mm->mmap_sem); + if (nr_pages != pinned_pages->nr_pages) { + if (try_upgrade) { + if (ulimit) + __scif_dec_pinned_vm_lock(mm, + nr_pages, 0); + /* Roll back any pinned pages */ + for (i = 0; i < pinned_pages->nr_pages; i++) { + if (pinned_pages->pages[i]) + put_page( + pinned_pages->pages[i]); + } + prot &= ~SCIF_PROT_WRITE; + try_upgrade = false; + goto retry; + } + } + pinned_pages->map_flags = 0; + } + + if (pinned_pages->nr_pages < nr_pages) { + err = -EFAULT; + pinned_pages->nr_pages = nr_pages; + goto dec_pinned; + } + + *out_prot = prot; + atomic_set(&pinned_pages->ref_count, 1); + *pages = pinned_pages; + return err; +dec_pinned: + if (ulimit) + __scif_dec_pinned_vm_lock(mm, nr_pages, 0); + /* Something went wrong! Rollback */ +error_unmap: + pinned_pages->nr_pages = nr_pages; + scif_destroy_pinned_pages(pinned_pages); + *pages = NULL; + dev_dbg(scif_info.mdev.this_device, + "%s %d err %d len 0x%lx\n", __func__, __LINE__, err, len); + return err; +} + +int scif_pin_pages(void *addr, size_t len, int prot, + int map_flags, scif_pinned_pages_t *pages) +{ + return __scif_pin_pages(addr, len, &prot, map_flags, pages); +} +EXPORT_SYMBOL_GPL(scif_pin_pages); + +int scif_unpin_pages(scif_pinned_pages_t pinned_pages) +{ + int err = 0, ret; + + if (!pinned_pages || SCIFEP_MAGIC != pinned_pages->magic) + return -EINVAL; + + ret = atomic_sub_return(1, &pinned_pages->ref_count); + if (ret < 0) { + dev_err(scif_info.mdev.this_device, + "%s %d scif_unpin_pages called without pinning? rc %d\n", + __func__, __LINE__, ret); + return -EINVAL; + } + /* + * Destroy the window if the ref count for this set of pinned + * pages has dropped to zero. If it is positive then there is + * a valid registered window which is backed by these pages and + * it will be destroyed once all such windows are unregistered. + */ + if (!ret) + err = scif_destroy_pinned_pages(pinned_pages); + + return err; +} +EXPORT_SYMBOL_GPL(scif_unpin_pages); + +static inline void +scif_insert_local_window(struct scif_window *window, struct scif_endpt *ep) +{ + mutex_lock(&ep->rma_info.rma_lock); + scif_insert_window(window, &ep->rma_info.reg_list); + mutex_unlock(&ep->rma_info.rma_lock); +} + +off_t scif_register_pinned_pages(scif_epd_t epd, + scif_pinned_pages_t pinned_pages, + off_t offset, int map_flags) +{ + struct scif_endpt *ep = (struct scif_endpt *)epd; + s64 computed_offset; + struct scif_window *window; + int err; + size_t len; + struct device *spdev; + + /* Unsupported flags */ + if (map_flags & ~SCIF_MAP_FIXED) + return -EINVAL; + + len = pinned_pages->nr_pages << PAGE_SHIFT; + + /* + * Offset is not page aligned/negative or offset+len + * wraps around with SCIF_MAP_FIXED. + */ + if ((map_flags & SCIF_MAP_FIXED) && + ((ALIGN(offset, PAGE_SIZE) != offset) || + (offset < 0) || + (offset + (off_t)len < offset))) + return -EINVAL; + + might_sleep(); + + err = scif_verify_epd(ep); + if (err) + return err; + /* + * It is an error to pass pinned_pages to scif_register_pinned_pages() + * after calling scif_unpin_pages(). + */ + if (!atomic_add_unless(&pinned_pages->ref_count, 1, 0)) + return -EINVAL; + + /* Compute the offset for this registration */ + err = scif_get_window_offset(ep, map_flags, offset, + len, &computed_offset); + if (err) { + atomic_sub(1, &pinned_pages->ref_count); + return err; + } + + /* Allocate and prepare self registration window */ + window = scif_create_window(ep, pinned_pages->nr_pages, + computed_offset, false); + if (!window) { + atomic_sub(1, &pinned_pages->ref_count); + scif_free_window_offset(ep, NULL, computed_offset); + return -ENOMEM; + } + + window->pinned_pages = pinned_pages; + window->nr_pages = pinned_pages->nr_pages; + window->prot = pinned_pages->prot; + + spdev = scif_get_peer_dev(ep->remote_dev); + if (IS_ERR(spdev)) { + err = PTR_ERR(spdev); + scif_destroy_window(ep, window); + return err; + } + err = scif_send_alloc_request(ep, window); + if (err) { + dev_err(&ep->remote_dev->sdev->dev, + "%s %d err %d\n", __func__, __LINE__, err); + goto error_unmap; + } + + /* Prepare the remote registration window */ + err = scif_prep_remote_window(ep, window); + if (err) { + dev_err(&ep->remote_dev->sdev->dev, + "%s %d err %d\n", __func__, __LINE__, err); + goto error_unmap; + } + + /* Tell the peer about the new window */ + err = scif_send_scif_register(ep, window); + if (err) { + dev_err(&ep->remote_dev->sdev->dev, + "%s %d err %d\n", __func__, __LINE__, err); + goto error_unmap; + } + + scif_put_peer_dev(spdev); + /* No further failures expected. Insert new window */ + scif_insert_local_window(window, ep); + return computed_offset; +error_unmap: + scif_destroy_window(ep, window); + scif_put_peer_dev(spdev); + dev_err(&ep->remote_dev->sdev->dev, + "%s %d err %d\n", __func__, __LINE__, err); + return err; +} +EXPORT_SYMBOL_GPL(scif_register_pinned_pages); + +off_t scif_register(scif_epd_t epd, void *addr, size_t len, off_t offset, + int prot, int map_flags) +{ + scif_pinned_pages_t pinned_pages; + off_t err; + struct scif_endpt *ep = (struct scif_endpt *)epd; + s64 computed_offset; + struct scif_window *window; + struct mm_struct *mm = NULL; + struct device *spdev; + + dev_dbg(scif_info.mdev.this_device, + "SCIFAPI register: ep %p addr %p len 0x%lx offset 0x%lx prot 0x%x map_flags 0x%x\n", + epd, addr, len, offset, prot, map_flags); + /* Unsupported flags */ + if (map_flags & ~(SCIF_MAP_FIXED | SCIF_MAP_KERNEL)) + return -EINVAL; + + /* + * Offset is not page aligned/negative or offset+len + * wraps around with SCIF_MAP_FIXED. + */ + if ((map_flags & SCIF_MAP_FIXED) && + ((ALIGN(offset, PAGE_SIZE) != offset) || + (offset < 0) || + (offset + (off_t)len < offset))) + return -EINVAL; + + /* Unsupported protection requested */ + if (prot & ~(SCIF_PROT_READ | SCIF_PROT_WRITE)) + return -EINVAL; + + /* addr/len must be page aligned. len should be non zero */ + if (!len || (ALIGN((u64)addr, PAGE_SIZE) != (u64)addr) || + (ALIGN(len, PAGE_SIZE) != len)) + return -EINVAL; + + might_sleep(); + + err = scif_verify_epd(ep); + if (err) + return err; + + /* Compute the offset for this registration */ + err = scif_get_window_offset(ep, map_flags, offset, + len >> PAGE_SHIFT, &computed_offset); + if (err) + return err; + + spdev = scif_get_peer_dev(ep->remote_dev); + if (IS_ERR(spdev)) { + err = PTR_ERR(spdev); + scif_free_window_offset(ep, NULL, computed_offset); + return err; + } + /* Allocate and prepare self registration window */ + window = scif_create_window(ep, len >> PAGE_SHIFT, + computed_offset, false); + if (!window) { + scif_free_window_offset(ep, NULL, computed_offset); + scif_put_peer_dev(spdev); + return -ENOMEM; + } + + window->nr_pages = len >> PAGE_SHIFT; + + err = scif_send_alloc_request(ep, window); + if (err) { + scif_destroy_incomplete_window(ep, window); + scif_put_peer_dev(spdev); + return err; + } + + if (!(map_flags & SCIF_MAP_KERNEL)) { + mm = __scif_acquire_mm(); + map_flags |= SCIF_MAP_ULIMIT; + } + /* Pin down the pages */ + err = __scif_pin_pages(addr, len, &prot, + map_flags & (SCIF_MAP_KERNEL | SCIF_MAP_ULIMIT), + &pinned_pages); + if (err) { + scif_destroy_incomplete_window(ep, window); + __scif_release_mm(mm); + goto error; + } + + window->pinned_pages = pinned_pages; + window->prot = pinned_pages->prot; + window->mm = mm; + + /* Prepare the remote registration window */ + err = scif_prep_remote_window(ep, window); + if (err) { + dev_err(&ep->remote_dev->sdev->dev, + "%s %d err %ld\n", __func__, __LINE__, err); + goto error_unmap; + } + + /* Tell the peer about the new window */ + err = scif_send_scif_register(ep, window); + if (err) { + dev_err(&ep->remote_dev->sdev->dev, + "%s %d err %ld\n", __func__, __LINE__, err); + goto error_unmap; + } + + scif_put_peer_dev(spdev); + /* No further failures expected. Insert new window */ + scif_insert_local_window(window, ep); + dev_dbg(&ep->remote_dev->sdev->dev, + "SCIFAPI register: ep %p addr %p len 0x%lx computed_offset 0x%llx\n", + epd, addr, len, computed_offset); + return computed_offset; +error_unmap: + scif_destroy_window(ep, window); +error: + scif_put_peer_dev(spdev); + dev_err(&ep->remote_dev->sdev->dev, + "%s %d err %ld\n", __func__, __LINE__, err); + return err; +} +EXPORT_SYMBOL_GPL(scif_register); + +int +scif_unregister(scif_epd_t epd, off_t offset, size_t len) +{ + struct scif_endpt *ep = (struct scif_endpt *)epd; + struct scif_window *window = NULL; + struct scif_rma_req req; + int nr_pages, err; + struct device *spdev; + + dev_dbg(scif_info.mdev.this_device, + "SCIFAPI unregister: ep %p offset 0x%lx len 0x%lx\n", + ep, offset, len); + /* len must be page aligned. len should be non zero */ + if (!len || + (ALIGN((u64)len, PAGE_SIZE) != (u64)len)) + return -EINVAL; + + /* Offset is not page aligned or offset+len wraps around */ + if ((ALIGN(offset, PAGE_SIZE) != offset) || + (offset + (off_t)len < offset)) + return -EINVAL; + + err = scif_verify_epd(ep); + if (err) + return err; + + might_sleep(); + nr_pages = len >> PAGE_SHIFT; + + req.out_window = &window; + req.offset = offset; + req.prot = 0; + req.nr_bytes = len; + req.type = SCIF_WINDOW_FULL; + req.head = &ep->rma_info.reg_list; + + spdev = scif_get_peer_dev(ep->remote_dev); + if (IS_ERR(spdev)) { + err = PTR_ERR(spdev); + return err; + } + mutex_lock(&ep->rma_info.rma_lock); + /* Does a valid window exist? */ + err = scif_query_window(&req); + if (err) { + dev_err(&ep->remote_dev->sdev->dev, + "%s %d err %d\n", __func__, __LINE__, err); + goto error; + } + /* Unregister all the windows in this range */ + err = scif_rma_list_unregister(window, offset, nr_pages); + if (err) + dev_err(&ep->remote_dev->sdev->dev, + "%s %d err %d\n", __func__, __LINE__, err); +error: + mutex_unlock(&ep->rma_info.rma_lock); + scif_put_peer_dev(spdev); + return err; +} +EXPORT_SYMBOL_GPL(scif_unregister); diff --git a/drivers/misc/mic/scif/scif_rma.h b/drivers/misc/mic/scif/scif_rma.h new file mode 100644 index 000000000000..fa6722279196 --- /dev/null +++ b/drivers/misc/mic/scif/scif_rma.h @@ -0,0 +1,464 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Copyright(c) 2015 Intel Corporation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Intel SCIF driver. + * + */ +#ifndef SCIF_RMA_H +#define SCIF_RMA_H + +#include <linux/dma_remapping.h> +#include <linux/mmu_notifier.h> + +#include "../bus/scif_bus.h" + +/* If this bit is set then the mark is a remote fence mark */ +#define SCIF_REMOTE_FENCE_BIT 31 +/* Magic value used to indicate a remote fence request */ +#define SCIF_REMOTE_FENCE BIT_ULL(SCIF_REMOTE_FENCE_BIT) + +#define SCIF_MAX_UNALIGNED_BUF_SIZE (1024 * 1024ULL) +#define SCIF_KMEM_UNALIGNED_BUF_SIZE (SCIF_MAX_UNALIGNED_BUF_SIZE + \ + (L1_CACHE_BYTES << 1)) + +#define SCIF_IOVA_START_PFN (1) +#define SCIF_IOVA_PFN(addr) ((addr) >> PAGE_SHIFT) +#define SCIF_DMA_64BIT_PFN SCIF_IOVA_PFN(DMA_BIT_MASK(64)) +#define SCIF_DMA_63BIT_PFN SCIF_IOVA_PFN(DMA_BIT_MASK(63)) + +/* + * struct scif_endpt_rma_info - Per Endpoint Remote Memory Access Information + * + * @reg_list: List of registration windows for self + * @remote_reg_list: List of registration windows for peer + * @iovad: Offset generator + * @rma_lock: Synchronizes access to self/remote list and also protects the + * window from being destroyed while RMAs are in progress. + * @tc_lock: Synchronizes access to temporary cached windows list + * for SCIF Registration Caching. + * @mmn_lock: Synchronizes access to the list of MMU notifiers registered + * @tw_refcount: Keeps track of number of outstanding temporary registered + * windows created by scif_vreadfrom/scif_vwriteto which have + * not been destroyed. + * @tcw_refcount: Same as tw_refcount but for temporary cached windows + * @tcw_total_pages: Same as tcw_refcount but in terms of pages pinned + * @mmn_list: MMU notifier so that we can destroy the windows when required + * @fence_refcount: Keeps track of number of outstanding remote fence + * requests which have been received by the peer. + * @dma_chan: DMA channel used for all DMA transfers for this endpoint. + * @async_list_del: Detect asynchronous list entry deletion + * @vma_list: List of vmas with remote memory mappings + * @markwq: Wait queue used for scif_fence_mark/scif_fence_wait +*/ +struct scif_endpt_rma_info { + struct list_head reg_list; + struct list_head remote_reg_list; + struct iova_domain iovad; + struct mutex rma_lock; + spinlock_t tc_lock; + struct mutex mmn_lock; + atomic_t tw_refcount; + atomic_t tcw_refcount; + atomic_t tcw_total_pages; + struct list_head mmn_list; + atomic_t fence_refcount; + struct dma_chan *dma_chan; + int async_list_del; + struct list_head vma_list; + wait_queue_head_t markwq; +}; + +/* + * struct scif_fence_info - used for tracking fence requests + * + * @state: State of this transfer + * @wq: Fences wait on this queue + * @dma_mark: Used for storing the DMA mark + */ +struct scif_fence_info { + enum scif_msg_state state; + struct completion comp; + int dma_mark; +}; + +/* + * struct scif_remote_fence_info - used for tracking remote fence requests + * + * @msg: List of SCIF node QP fence messages + * @list: Link to list of remote fence requests + */ +struct scif_remote_fence_info { + struct scifmsg msg; + struct list_head list; +}; + +/* + * Specifies whether an RMA operation can span across partial windows, a single + * window or multiple contiguous windows. Mmaps can span across partial windows. + * Unregistration can span across complete windows. scif_get_pages() can span a + * single window. A window can also be of type self or peer. + */ +enum scif_window_type { + SCIF_WINDOW_PARTIAL, + SCIF_WINDOW_SINGLE, + SCIF_WINDOW_FULL, + SCIF_WINDOW_SELF, + SCIF_WINDOW_PEER +}; + +/* The number of physical addresses that can be stored in a PAGE. */ +#define SCIF_NR_ADDR_IN_PAGE (0x1000 >> 3) + +/* + * struct scif_rma_lookup - RMA lookup data structure for page list transfers + * + * Store an array of lookup offsets. Each offset in this array maps + * one 4K page containing 512 physical addresses i.e. 2MB. 512 such + * offsets in a 4K page will correspond to 1GB of registered address space. + + * @lookup: Array of offsets + * @offset: DMA offset of lookup array + */ +struct scif_rma_lookup { + dma_addr_t *lookup; + dma_addr_t offset; +}; + +/* + * struct scif_pinned_pages - A set of pinned pages obtained with + * scif_pin_pages() which could be part of multiple registered + * windows across different end points. + * + * @nr_pages: Number of pages which is defined as a s64 instead of an int + * to avoid sign extension with buffers >= 2GB + * @prot: read/write protections + * @map_flags: Flags specified during the pin operation + * @ref_count: Reference count bumped in terms of number of pages + * @magic: A magic value + * @pages: Array of pointers to struct pages populated with get_user_pages(..) + */ +struct scif_pinned_pages { + s64 nr_pages; + int prot; + int map_flags; + atomic_t ref_count; + u64 magic; + struct page **pages; +}; + +/* + * struct scif_status - Stores DMA status update information + * + * @src_dma_addr: Source buffer DMA address + * @val: src location for value to be written to the destination + * @ep: SCIF endpoint + */ +struct scif_status { + dma_addr_t src_dma_addr; + u64 val; + struct scif_endpt *ep; +}; + +/* + * struct scif_window - Registration Window for Self and Remote + * + * @nr_pages: Number of pages which is defined as a s64 instead of an int + * to avoid sign extension with buffers >= 2GB + * @nr_contig_chunks: Number of contiguous physical chunks + * @prot: read/write protections + * @ref_count: reference count in terms of number of pages + * @magic: Cookie to detect corruption + * @offset: registered offset + * @va_for_temp: va address that this window represents + * @dma_mark: Used to determine if all DMAs against the window are done + * @ep: Pointer to EP. Useful for passing EP around with messages to + avoid expensive list traversals. + * @list: link to list of windows for the endpoint + * @type: self or peer window + * @peer_window: Pointer to peer window. Useful for sending messages to peer + * without requiring an extra list traversal + * @unreg_state: unregistration state + * @offset_freed: True if the offset has been freed + * @temp: True for temporary windows created via scif_vreadfrom/scif_vwriteto + * @mm: memory descriptor for the task_struct which initiated the RMA + * @st: scatter gather table for DMA mappings with IOMMU enabled + * @pinned_pages: The set of pinned_pages backing this window + * @alloc_handle: Handle for sending ALLOC_REQ + * @regwq: Wait Queue for an registration (N)ACK + * @reg_state: Registration state + * @unregwq: Wait Queue for an unregistration (N)ACK + * @dma_addr_lookup: Lookup for physical addresses used for DMA + * @nr_lookup: Number of entries in lookup + * @mapped_offset: Offset used to map the window by the peer + * @dma_addr: Array of physical addresses used for Mgmt node & MIC initiated DMA + * @num_pages: Array specifying number of pages for each physical address + */ +struct scif_window { + s64 nr_pages; + int nr_contig_chunks; + int prot; + int ref_count; + u64 magic; + s64 offset; + unsigned long va_for_temp; + int dma_mark; + u64 ep; + struct list_head list; + enum scif_window_type type; + u64 peer_window; + enum scif_msg_state unreg_state; + bool offset_freed; + bool temp; + struct mm_struct *mm; + struct sg_table *st; + union { + struct { + struct scif_pinned_pages *pinned_pages; + struct scif_allocmsg alloc_handle; + wait_queue_head_t regwq; + enum scif_msg_state reg_state; + wait_queue_head_t unregwq; + }; + struct { + struct scif_rma_lookup dma_addr_lookup; + struct scif_rma_lookup num_pages_lookup; + int nr_lookup; + dma_addr_t mapped_offset; + }; + }; + dma_addr_t *dma_addr; + u64 *num_pages; +} __packed; + +/* + * scif_mmu_notif - SCIF mmu notifier information + * + * @mmu_notifier ep_mmu_notifier: MMU notifier operations + * @tc_reg_list: List of temp registration windows for self + * @mm: memory descriptor for the task_struct which initiated the RMA + * @ep: SCIF endpoint + * @list: link to list of MMU notifier information + */ +struct scif_mmu_notif { +#ifdef CONFIG_MMU_NOTIFIER + struct mmu_notifier ep_mmu_notifier; +#endif + struct list_head tc_reg_list; + struct mm_struct *mm; + struct scif_endpt *ep; + struct list_head list; +}; + +enum scif_rma_dir { + SCIF_LOCAL_TO_REMOTE, + SCIF_REMOTE_TO_LOCAL +}; + +extern struct kmem_cache *unaligned_cache; +/* Initialize RMA for this EP */ +void scif_rma_ep_init(struct scif_endpt *ep); +/* Check if epd can be uninitialized */ +int scif_rma_ep_can_uninit(struct scif_endpt *ep); +/* Obtain a new offset. Callee must grab RMA lock */ +int scif_get_window_offset(struct scif_endpt *ep, int flags, + s64 offset, int nr_pages, s64 *out_offset); +/* Free offset. Callee must grab RMA lock */ +void scif_free_window_offset(struct scif_endpt *ep, + struct scif_window *window, s64 offset); +/* Create self registration window */ +struct scif_window *scif_create_window(struct scif_endpt *ep, int nr_pages, + s64 offset, bool temp); +/* Destroy self registration window.*/ +int scif_destroy_window(struct scif_endpt *ep, struct scif_window *window); +void scif_unmap_window(struct scif_dev *remote_dev, struct scif_window *window); +/* Map pages of self window to Aperture/PCI */ +int scif_map_window(struct scif_dev *remote_dev, + struct scif_window *window); +/* Unregister a self window */ +int scif_unregister_window(struct scif_window *window); +/* Destroy remote registration window */ +void +scif_destroy_remote_window(struct scif_window *window); +/* remove valid remote memory mappings from process address space */ +void scif_zap_mmaps(int node); +/* Query if any applications have remote memory mappings */ +bool scif_rma_do_apps_have_mmaps(int node); +/* Cleanup remote registration lists for zombie endpoints */ +void scif_cleanup_rma_for_zombies(int node); +/* Reserve a DMA channel for a particular endpoint */ +int scif_reserve_dma_chan(struct scif_endpt *ep); +/* Setup a DMA mark for an endpoint */ +int _scif_fence_mark(scif_epd_t epd, int *mark); +int scif_prog_signal(scif_epd_t epd, off_t offset, u64 val, + enum scif_window_type type); +void scif_alloc_req(struct scif_dev *scifdev, struct scifmsg *msg); +void scif_alloc_gnt_rej(struct scif_dev *scifdev, struct scifmsg *msg); +void scif_free_virt(struct scif_dev *scifdev, struct scifmsg *msg); +void scif_recv_reg(struct scif_dev *scifdev, struct scifmsg *msg); +void scif_recv_unreg(struct scif_dev *scifdev, struct scifmsg *msg); +void scif_recv_reg_ack(struct scif_dev *scifdev, struct scifmsg *msg); +void scif_recv_reg_nack(struct scif_dev *scifdev, struct scifmsg *msg); +void scif_recv_unreg_ack(struct scif_dev *scifdev, struct scifmsg *msg); +void scif_recv_unreg_nack(struct scif_dev *scifdev, struct scifmsg *msg); +void scif_recv_munmap(struct scif_dev *scifdev, struct scifmsg *msg); +void scif_recv_mark(struct scif_dev *scifdev, struct scifmsg *msg); +void scif_recv_mark_resp(struct scif_dev *scifdev, struct scifmsg *msg); +void scif_recv_wait(struct scif_dev *scifdev, struct scifmsg *msg); +void scif_recv_wait_resp(struct scif_dev *scifdev, struct scifmsg *msg); +void scif_recv_sig_local(struct scif_dev *scifdev, struct scifmsg *msg); +void scif_recv_sig_remote(struct scif_dev *scifdev, struct scifmsg *msg); +void scif_recv_sig_resp(struct scif_dev *scifdev, struct scifmsg *msg); +void scif_mmu_notif_handler(struct work_struct *work); +void scif_rma_handle_remote_fences(void); +void scif_rma_destroy_windows(void); +void scif_rma_destroy_tcw_invalid(void); +int scif_drain_dma_intr(struct scif_hw_dev *sdev, struct dma_chan *chan); + +struct scif_window_iter { + s64 offset; + int index; +}; + +static inline void +scif_init_window_iter(struct scif_window *window, struct scif_window_iter *iter) +{ + iter->offset = window->offset; + iter->index = 0; +} + +dma_addr_t scif_off_to_dma_addr(struct scif_window *window, s64 off, + size_t *nr_bytes, + struct scif_window_iter *iter); +static inline +dma_addr_t __scif_off_to_dma_addr(struct scif_window *window, s64 off) +{ + return scif_off_to_dma_addr(window, off, NULL, NULL); +} + +static inline bool scif_unaligned(off_t src_offset, off_t dst_offset) +{ + src_offset = src_offset & (L1_CACHE_BYTES - 1); + dst_offset = dst_offset & (L1_CACHE_BYTES - 1); + return !(src_offset == dst_offset); +} + +/* + * scif_zalloc: + * @size: Size of the allocation request. + * + * Helper API which attempts to allocate zeroed pages via + * __get_free_pages(..) first and then falls back on + * vzalloc(..) if that fails. + */ +static inline void *scif_zalloc(size_t size) +{ + void *ret = NULL; + size_t align = ALIGN(size, PAGE_SIZE); + + if (align && get_order(align) < MAX_ORDER) + ret = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, + get_order(align)); + return ret ? ret : vzalloc(align); +} + +/* + * scif_free: + * @addr: Address to be freed. + * @size: Size of the allocation. + * Helper API which frees memory allocated via scif_zalloc(). + */ +static inline void scif_free(void *addr, size_t size) +{ + size_t align = ALIGN(size, PAGE_SIZE); + + if (is_vmalloc_addr(addr)) + vfree(addr); + else + free_pages((unsigned long)addr, get_order(align)); +} + +static inline void scif_get_window(struct scif_window *window, int nr_pages) +{ + window->ref_count += nr_pages; +} + +static inline void scif_put_window(struct scif_window *window, int nr_pages) +{ + window->ref_count -= nr_pages; +} + +static inline void scif_set_window_ref(struct scif_window *window, int nr_pages) +{ + window->ref_count = nr_pages; +} + +static inline void +scif_queue_for_cleanup(struct scif_window *window, struct list_head *list) +{ + spin_lock(&scif_info.rmalock); + list_add_tail(&window->list, list); + spin_unlock(&scif_info.rmalock); + schedule_work(&scif_info.misc_work); +} + +static inline void __scif_rma_destroy_tcw_helper(struct scif_window *window) +{ + list_del_init(&window->list); + scif_queue_for_cleanup(window, &scif_info.rma_tc); +} + +static inline bool scif_is_iommu_enabled(void) +{ +#ifdef CONFIG_INTEL_IOMMU + return intel_iommu_enabled; +#else + return false; +#endif +} +#endif /* SCIF_RMA_H */ diff --git a/drivers/misc/mic/scif/scif_rma_list.c b/drivers/misc/mic/scif/scif_rma_list.c new file mode 100644 index 000000000000..e1ef8daedd5a --- /dev/null +++ b/drivers/misc/mic/scif/scif_rma_list.c @@ -0,0 +1,291 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Intel SCIF driver. + * + */ +#include "scif_main.h" +#include <linux/mmu_notifier.h> +#include <linux/highmem.h> + +/* + * scif_insert_tcw: + * + * Insert a temp window to the temp registration list sorted by va_for_temp. + * RMA lock must be held. + */ +void scif_insert_tcw(struct scif_window *window, struct list_head *head) +{ + struct scif_window *curr = NULL; + struct scif_window *prev = list_entry(head, struct scif_window, list); + struct list_head *item; + + INIT_LIST_HEAD(&window->list); + /* Compare with tail and if the entry is new tail add it to the end */ + if (!list_empty(head)) { + curr = list_entry(head->prev, struct scif_window, list); + if (curr->va_for_temp < window->va_for_temp) { + list_add_tail(&window->list, head); + return; + } + } + list_for_each(item, head) { + curr = list_entry(item, struct scif_window, list); + if (curr->va_for_temp > window->va_for_temp) + break; + prev = curr; + } + list_add(&window->list, &prev->list); +} + +/* + * scif_insert_window: + * + * Insert a window to the self registration list sorted by offset. + * RMA lock must be held. + */ +void scif_insert_window(struct scif_window *window, struct list_head *head) +{ + struct scif_window *curr = NULL, *prev = NULL; + struct list_head *item; + + INIT_LIST_HEAD(&window->list); + list_for_each(item, head) { + curr = list_entry(item, struct scif_window, list); + if (curr->offset > window->offset) + break; + prev = curr; + } + if (!prev) + list_add(&window->list, head); + else + list_add(&window->list, &prev->list); + scif_set_window_ref(window, window->nr_pages); +} + +/* + * scif_query_tcw: + * + * Query the temp cached registration list of ep for an overlapping window + * in case of permission mismatch, destroy the previous window. if permissions + * match and overlap is partial, destroy the window but return the new range + * RMA lock must be held. + */ +int scif_query_tcw(struct scif_endpt *ep, struct scif_rma_req *req) +{ + struct list_head *item, *temp, *head = req->head; + struct scif_window *window; + u64 start_va_window, start_va_req = req->va_for_temp; + u64 end_va_window, end_va_req = start_va_req + req->nr_bytes; + + if (!req->nr_bytes) + return -EINVAL; + /* + * Avoid traversing the entire list to find out that there + * is no entry that matches + */ + if (!list_empty(head)) { + window = list_last_entry(head, struct scif_window, list); + end_va_window = window->va_for_temp + + (window->nr_pages << PAGE_SHIFT); + if (start_va_req > end_va_window) + return -ENXIO; + } + list_for_each_safe(item, temp, head) { + window = list_entry(item, struct scif_window, list); + start_va_window = window->va_for_temp; + end_va_window = window->va_for_temp + + (window->nr_pages << PAGE_SHIFT); + if (start_va_req < start_va_window && + end_va_req < start_va_window) + break; + if (start_va_req >= end_va_window) + continue; + if ((window->prot & req->prot) == req->prot) { + if (start_va_req >= start_va_window && + end_va_req <= end_va_window) { + *req->out_window = window; + return 0; + } + /* expand window */ + if (start_va_req < start_va_window) { + req->nr_bytes += + start_va_window - start_va_req; + req->va_for_temp = start_va_window; + } + if (end_va_req >= end_va_window) + req->nr_bytes += end_va_window - end_va_req; + } + /* Destroy the old window to create a new one */ + __scif_rma_destroy_tcw_helper(window); + break; + } + return -ENXIO; +} + +/* + * scif_query_window: + * + * Query the registration list and check if a valid contiguous + * range of windows exist. + * RMA lock must be held. + */ +int scif_query_window(struct scif_rma_req *req) +{ + struct list_head *item; + struct scif_window *window; + s64 end_offset, offset = req->offset; + u64 tmp_min, nr_bytes_left = req->nr_bytes; + + if (!req->nr_bytes) + return -EINVAL; + + list_for_each(item, req->head) { + window = list_entry(item, struct scif_window, list); + end_offset = window->offset + + (window->nr_pages << PAGE_SHIFT); + if (offset < window->offset) + /* Offset not found! */ + return -ENXIO; + if (offset >= end_offset) + continue; + /* Check read/write protections. */ + if ((window->prot & req->prot) != req->prot) + return -EPERM; + if (nr_bytes_left == req->nr_bytes) + /* Store the first window */ + *req->out_window = window; + tmp_min = min((u64)end_offset - offset, nr_bytes_left); + nr_bytes_left -= tmp_min; + offset += tmp_min; + /* + * Range requested encompasses + * multiple windows contiguously. + */ + if (!nr_bytes_left) { + /* Done for partial window */ + if (req->type == SCIF_WINDOW_PARTIAL || + req->type == SCIF_WINDOW_SINGLE) + return 0; + /* Extra logic for full windows */ + if (offset == end_offset) + /* Spanning multiple whole windows */ + return 0; + /* Not spanning multiple whole windows */ + return -ENXIO; + } + if (req->type == SCIF_WINDOW_SINGLE) + break; + } + dev_err(scif_info.mdev.this_device, + "%s %d ENXIO\n", __func__, __LINE__); + return -ENXIO; +} + +/* + * scif_rma_list_unregister: + * + * Traverse the self registration list starting from window: + * 1) Call scif_unregister_window(..) + * RMA lock must be held. + */ +int scif_rma_list_unregister(struct scif_window *window, + s64 offset, int nr_pages) +{ + struct scif_endpt *ep = (struct scif_endpt *)window->ep; + struct list_head *head = &ep->rma_info.reg_list; + s64 end_offset; + int err = 0; + int loop_nr_pages; + struct scif_window *_window; + + list_for_each_entry_safe_from(window, _window, head, list) { + end_offset = window->offset + (window->nr_pages << PAGE_SHIFT); + loop_nr_pages = min((int)((end_offset - offset) >> PAGE_SHIFT), + nr_pages); + err = scif_unregister_window(window); + if (err) + return err; + nr_pages -= loop_nr_pages; + offset += (loop_nr_pages << PAGE_SHIFT); + if (!nr_pages) + break; + } + return 0; +} + +/* + * scif_unmap_all_window: + * + * Traverse all the windows in the self registration list and: + * 1) Delete any DMA mappings created + */ +void scif_unmap_all_windows(scif_epd_t epd) +{ + struct list_head *item, *tmp; + struct scif_window *window; + struct scif_endpt *ep = (struct scif_endpt *)epd; + struct list_head *head = &ep->rma_info.reg_list; + + mutex_lock(&ep->rma_info.rma_lock); + list_for_each_safe(item, tmp, head) { + window = list_entry(item, struct scif_window, list); + scif_unmap_window(ep->remote_dev, window); + } + mutex_unlock(&ep->rma_info.rma_lock); +} + +/* + * scif_unregister_all_window: + * + * Traverse all the windows in the self registration list and: + * 1) Call scif_unregister_window(..) + * RMA lock must be held. + */ +int scif_unregister_all_windows(scif_epd_t epd) +{ + struct list_head *item, *tmp; + struct scif_window *window; + struct scif_endpt *ep = (struct scif_endpt *)epd; + struct list_head *head = &ep->rma_info.reg_list; + int err = 0; + + mutex_lock(&ep->rma_info.rma_lock); +retry: + item = NULL; + tmp = NULL; + list_for_each_safe(item, tmp, head) { + window = list_entry(item, struct scif_window, list); + ep->rma_info.async_list_del = 0; + err = scif_unregister_window(window); + if (err) + dev_err(scif_info.mdev.this_device, + "%s %d err %d\n", + __func__, __LINE__, err); + /* + * Need to restart list traversal if there has been + * an asynchronous list entry deletion. + */ + if (ACCESS_ONCE(ep->rma_info.async_list_del)) + goto retry; + } + mutex_unlock(&ep->rma_info.rma_lock); + if (!list_empty(&ep->rma_info.mmn_list)) { + spin_lock(&scif_info.rmalock); + list_add_tail(&ep->mmu_list, &scif_info.mmu_notif_cleanup); + spin_unlock(&scif_info.rmalock); + schedule_work(&scif_info.mmu_notif_work); + } + return err; +} diff --git a/drivers/misc/mic/scif/scif_rma_list.h b/drivers/misc/mic/scif/scif_rma_list.h new file mode 100644 index 000000000000..7d58d1d551b0 --- /dev/null +++ b/drivers/misc/mic/scif/scif_rma_list.h @@ -0,0 +1,57 @@ +/* + * Intel MIC Platform Software Stack (MPSS) + * + * Copyright(c) 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Intel SCIF driver. + * + */ +#ifndef SCIF_RMA_LIST_H +#define SCIF_RMA_LIST_H + +/* + * struct scif_rma_req - Self Registration list RMA Request query + * + * @out_window - Returns the window if found + * @offset: Starting offset + * @nr_bytes: number of bytes + * @prot: protection requested i.e. read or write or both + * @type: Specify single, partial or multiple windows + * @head: Head of list on which to search + * @va_for_temp: VA for searching temporary cached windows + */ +struct scif_rma_req { + struct scif_window **out_window; + union { + s64 offset; + unsigned long va_for_temp; + }; + size_t nr_bytes; + int prot; + enum scif_window_type type; + struct list_head *head; +}; + +/* Insert */ +void scif_insert_window(struct scif_window *window, struct list_head *head); +void scif_insert_tcw(struct scif_window *window, + struct list_head *head); +/* Query */ +int scif_query_window(struct scif_rma_req *request); +int scif_query_tcw(struct scif_endpt *ep, struct scif_rma_req *request); +/* Called from close to unregister all self windows */ +int scif_unregister_all_windows(scif_epd_t epd); +void scif_unmap_all_windows(scif_epd_t epd); +/* Traverse list and unregister */ +int scif_rma_list_unregister(struct scif_window *window, s64 offset, + int nr_pages); +#endif /* SCIF_RMA_LIST_H */ diff --git a/drivers/misc/sgi-gru/gruhandles.c b/drivers/misc/sgi-gru/gruhandles.c index 2f30badc6ffd..1ee8e82ba710 100644 --- a/drivers/misc/sgi-gru/gruhandles.c +++ b/drivers/misc/sgi-gru/gruhandles.c @@ -196,12 +196,6 @@ void tfh_write_restart(struct gru_tlb_fault_handle *tfh, start_instruction(tfh); } -void tfh_restart(struct gru_tlb_fault_handle *tfh) -{ - tfh->opc = TFHOP_RESTART; - start_instruction(tfh); -} - void tfh_user_polling_mode(struct gru_tlb_fault_handle *tfh) { tfh->opc = TFHOP_USER_POLLING_MODE; diff --git a/drivers/misc/sgi-gru/gruhandles.h b/drivers/misc/sgi-gru/gruhandles.h index 3f998b924d8f..3d7bd36a1c89 100644 --- a/drivers/misc/sgi-gru/gruhandles.h +++ b/drivers/misc/sgi-gru/gruhandles.h @@ -524,7 +524,6 @@ int tfh_write_only(struct gru_tlb_fault_handle *tfh, unsigned long paddr, int gaa, unsigned long vaddr, int asid, int dirty, int pagesize); void tfh_write_restart(struct gru_tlb_fault_handle *tfh, unsigned long paddr, int gaa, unsigned long vaddr, int asid, int dirty, int pagesize); -void tfh_restart(struct gru_tlb_fault_handle *tfh); void tfh_user_polling_mode(struct gru_tlb_fault_handle *tfh); void tfh_exception(struct gru_tlb_fault_handle *tfh); diff --git a/drivers/misc/sgi-gru/grukdump.c b/drivers/misc/sgi-gru/grukdump.c index a3700a56b8ff..313da3150262 100644 --- a/drivers/misc/sgi-gru/grukdump.c +++ b/drivers/misc/sgi-gru/grukdump.c @@ -78,11 +78,10 @@ static int gru_dump_tfm(struct gru_state *gru, void __user *ubuf, void __user *ubufend) { struct gru_tlb_fault_map *tfm; - int i, ret, bytes; + int i; - bytes = GRU_NUM_TFM * GRU_CACHE_LINE_BYTES; - if (bytes > ubufend - ubuf) - ret = -EFBIG; + if (GRU_NUM_TFM * GRU_CACHE_LINE_BYTES > ubufend - ubuf) + return -EFBIG; for (i = 0; i < GRU_NUM_TFM; i++) { tfm = get_tfm(gru->gs_gru_base_vaddr, i); @@ -99,11 +98,10 @@ static int gru_dump_tgh(struct gru_state *gru, void __user *ubuf, void __user *ubufend) { struct gru_tlb_global_handle *tgh; - int i, ret, bytes; + int i; - bytes = GRU_NUM_TGH * GRU_CACHE_LINE_BYTES; - if (bytes > ubufend - ubuf) - ret = -EFBIG; + if (GRU_NUM_TGH * GRU_CACHE_LINE_BYTES > ubufend - ubuf) + return -EFBIG; for (i = 0; i < GRU_NUM_TGH; i++) { tgh = get_tgh(gru->gs_gru_base_vaddr, i); @@ -196,7 +194,7 @@ int gru_dump_chiplet_request(unsigned long arg) return -EFAULT; /* Currently, only dump by gid is implemented */ - if (req.gid >= gru_max_gids || req.gid < 0) + if (req.gid >= gru_max_gids) return -EINVAL; gru = GID_TO_GRU(req.gid); diff --git a/drivers/misc/sgi-gru/grukservices.c b/drivers/misc/sgi-gru/grukservices.c index 913de07e577c..967b9dd24fe9 100644 --- a/drivers/misc/sgi-gru/grukservices.c +++ b/drivers/misc/sgi-gru/grukservices.c @@ -160,7 +160,12 @@ static void gru_load_kernel_context(struct gru_blade_state *bs, int blade_id) down_write(&bs->bs_kgts_sema); if (!bs->bs_kgts) { - bs->bs_kgts = gru_alloc_gts(NULL, 0, 0, 0, 0, 0); + do { + bs->bs_kgts = gru_alloc_gts(NULL, 0, 0, 0, 0, 0); + if (!IS_ERR(bs->bs_kgts)) + break; + msleep(1); + } while (true); bs->bs_kgts->ts_user_blade_id = blade_id; } kgts = bs->bs_kgts; @@ -429,8 +434,8 @@ int gru_get_cb_exception_detail(void *cb, return 0; } -char *gru_get_cb_exception_detail_str(int ret, void *cb, - char *buf, int size) +static char *gru_get_cb_exception_detail_str(int ret, void *cb, + char *buf, int size) { struct gru_control_block_status *gen = (void *)cb; struct control_block_extended_exc_detail excdet; @@ -505,7 +510,7 @@ int gru_wait_proc(void *cb) return ret; } -void gru_abort(int ret, void *cb, char *str) +static void gru_abort(int ret, void *cb, char *str) { char buf[GRU_EXC_STR_SIZE]; @@ -997,7 +1002,6 @@ static int quicktest1(unsigned long arg) { struct gru_message_queue_desc mqd; void *p, *mq; - unsigned long *dw; int i, ret = -EIO; char mes[GRU_CACHE_LINE_BYTES], *m; @@ -1007,7 +1011,6 @@ static int quicktest1(unsigned long arg) return -ENOMEM; mq = ALIGNUP(p, 1024); memset(mes, 0xee, sizeof(mes)); - dw = mq; gru_create_message_queue(&mqd, mq, 8 * GRU_CACHE_LINE_BYTES, 0, 0, 0); for (i = 0; i < 6; i++) { diff --git a/drivers/misc/sgi-gru/grumain.c b/drivers/misc/sgi-gru/grumain.c index ae16c8cb4f3e..1525870f460a 100644 --- a/drivers/misc/sgi-gru/grumain.c +++ b/drivers/misc/sgi-gru/grumain.c @@ -930,6 +930,7 @@ int gru_fault(struct vm_area_struct *vma, struct vm_fault *vmf) { struct gru_thread_state *gts; unsigned long paddr, vaddr; + unsigned long expires; vaddr = (unsigned long)vmf->virtual_address; gru_dbg(grudev, "vma %p, vaddr 0x%lx (0x%lx)\n", @@ -954,7 +955,8 @@ again: mutex_unlock(>s->ts_ctxlock); set_current_state(TASK_INTERRUPTIBLE); schedule_timeout(GRU_ASSIGN_DELAY); /* true hack ZZZ */ - if (gts->ts_steal_jiffies + GRU_STEAL_DELAY < jiffies) + expires = gts->ts_steal_jiffies + GRU_STEAL_DELAY; + if (time_before(expires, jiffies)) gru_steal_context(gts); goto again; } diff --git a/drivers/misc/sgi-gru/grutlbpurge.c b/drivers/misc/sgi-gru/grutlbpurge.c index 2129274ef7ab..e936d43895d2 100644 --- a/drivers/misc/sgi-gru/grutlbpurge.c +++ b/drivers/misc/sgi-gru/grutlbpurge.c @@ -306,19 +306,20 @@ struct gru_mm_struct *gru_register_mmu_notifier(void) atomic_inc(&gms->ms_refcnt); } else { gms = kzalloc(sizeof(*gms), GFP_KERNEL); - if (gms) { - STAT(gms_alloc); - spin_lock_init(&gms->ms_asid_lock); - gms->ms_notifier.ops = &gru_mmuops; - atomic_set(&gms->ms_refcnt, 1); - init_waitqueue_head(&gms->ms_wait_queue); - err = __mmu_notifier_register(&gms->ms_notifier, current->mm); - if (err) - goto error; - } + if (!gms) + return ERR_PTR(-ENOMEM); + STAT(gms_alloc); + spin_lock_init(&gms->ms_asid_lock); + gms->ms_notifier.ops = &gru_mmuops; + atomic_set(&gms->ms_refcnt, 1); + init_waitqueue_head(&gms->ms_wait_queue); + err = __mmu_notifier_register(&gms->ms_notifier, current->mm); + if (err) + goto error; } - gru_dbg(grudev, "gms %p, refcnt %d\n", gms, - atomic_read(&gms->ms_refcnt)); + if (gms) + gru_dbg(grudev, "gms %p, refcnt %d\n", gms, + atomic_read(&gms->ms_refcnt)); return gms; error: kfree(gms); diff --git a/drivers/misc/ti-st/st_core.c b/drivers/misc/ti-st/st_core.c index c8c6a363069c..6e3af8b42cdd 100644 --- a/drivers/misc/ti-st/st_core.c +++ b/drivers/misc/ti-st/st_core.c @@ -460,6 +460,13 @@ static void st_int_enqueue(struct st_data_s *st_gdata, struct sk_buff *skb) * - TTY layer when write's finished * - st_write (in context of the protocol stack) */ +static void work_fn_write_wakeup(struct work_struct *work) +{ + struct st_data_s *st_gdata = container_of(work, struct st_data_s, + work_write_wakeup); + + st_tx_wakeup((void *)st_gdata); +} void st_tx_wakeup(struct st_data_s *st_data) { struct sk_buff *skb; @@ -812,8 +819,12 @@ static void st_tty_wakeup(struct tty_struct *tty) /* don't do an wakeup for now */ clear_bit(TTY_DO_WRITE_WAKEUP, &tty->flags); - /* call our internal wakeup */ - st_tx_wakeup((void *)st_gdata); + /* + * schedule the internal wakeup instead of calling directly to + * avoid lockup (port->lock needed in tty->ops->write is + * already taken here + */ + schedule_work(&st_gdata->work_write_wakeup); } static void st_tty_flush_buffer(struct tty_struct *tty) @@ -881,6 +892,9 @@ int st_core_init(struct st_data_s **core_data) pr_err("unable to un-register ldisc"); return err; } + + INIT_WORK(&st_gdata->work_write_wakeup, work_fn_write_wakeup); + *core_data = st_gdata; return 0; } diff --git a/drivers/misc/vmw_balloon.c b/drivers/misc/vmw_balloon.c index ffb56340d0c7..89300870fefb 100644 --- a/drivers/misc/vmw_balloon.c +++ b/drivers/misc/vmw_balloon.c @@ -1,7 +1,7 @@ /* * VMware Balloon driver. * - * Copyright (C) 2000-2010, VMware, Inc. All Rights Reserved. + * Copyright (C) 2000-2014, VMware, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -37,16 +37,19 @@ #include <linux/types.h> #include <linux/kernel.h> #include <linux/mm.h> +#include <linux/vmalloc.h> #include <linux/sched.h> #include <linux/module.h> #include <linux/workqueue.h> #include <linux/debugfs.h> #include <linux/seq_file.h> +#include <linux/vmw_vmci_defs.h> +#include <linux/vmw_vmci_api.h> #include <asm/hypervisor.h> MODULE_AUTHOR("VMware, Inc."); MODULE_DESCRIPTION("VMware Memory Control (Balloon) Driver"); -MODULE_VERSION("1.3.0.0-k"); +MODULE_VERSION("1.5.0.0-k"); MODULE_ALIAS("dmi:*:svnVMware*:*"); MODULE_ALIAS("vmware_vmmemctl"); MODULE_LICENSE("GPL"); @@ -57,12 +60,6 @@ MODULE_LICENSE("GPL"); */ /* - * Rate of allocating memory when there is no memory pressure - * (driver performs non-sleeping allocations). - */ -#define VMW_BALLOON_NOSLEEP_ALLOC_MAX 16384U - -/* * Rates of memory allocaton when guest experiences memory pressure * (driver performs sleeping allocations). */ @@ -71,13 +68,6 @@ MODULE_LICENSE("GPL"); #define VMW_BALLOON_RATE_ALLOC_INC 16U /* - * Rates for releasing pages while deflating balloon. - */ -#define VMW_BALLOON_RATE_FREE_MIN 512U -#define VMW_BALLOON_RATE_FREE_MAX 16384U -#define VMW_BALLOON_RATE_FREE_INC 16U - -/* * When guest is under memory pressure, use a reduced page allocation * rate for next several cycles. */ @@ -99,9 +89,6 @@ MODULE_LICENSE("GPL"); */ #define VMW_PAGE_ALLOC_CANSLEEP (GFP_HIGHUSER) -/* Maximum number of page allocations without yielding processor */ -#define VMW_BALLOON_YIELD_THRESHOLD 1024 - /* Maximum number of refused pages we accumulate during inflation cycle */ #define VMW_BALLOON_MAX_REFUSED 16 @@ -116,17 +103,45 @@ enum vmwballoon_capabilities { /* * Bit 0 is reserved and not associated to any capability. */ - VMW_BALLOON_BASIC_CMDS = (1 << 1), - VMW_BALLOON_BATCHED_CMDS = (1 << 2) + VMW_BALLOON_BASIC_CMDS = (1 << 1), + VMW_BALLOON_BATCHED_CMDS = (1 << 2), + VMW_BALLOON_BATCHED_2M_CMDS = (1 << 3), + VMW_BALLOON_SIGNALLED_WAKEUP_CMD = (1 << 4), }; -#define VMW_BALLOON_CAPABILITIES (VMW_BALLOON_BASIC_CMDS) +#define VMW_BALLOON_CAPABILITIES (VMW_BALLOON_BASIC_CMDS \ + | VMW_BALLOON_BATCHED_CMDS \ + | VMW_BALLOON_BATCHED_2M_CMDS \ + | VMW_BALLOON_SIGNALLED_WAKEUP_CMD) + +#define VMW_BALLOON_2M_SHIFT (9) +#define VMW_BALLOON_NUM_PAGE_SIZES (2) + +/* + * Backdoor commands availability: + * + * START, GET_TARGET and GUEST_ID are always available, + * + * VMW_BALLOON_BASIC_CMDS: + * LOCK and UNLOCK commands, + * VMW_BALLOON_BATCHED_CMDS: + * BATCHED_LOCK and BATCHED_UNLOCK commands. + * VMW BALLOON_BATCHED_2M_CMDS: + * BATCHED_2M_LOCK and BATCHED_2M_UNLOCK commands, + * VMW VMW_BALLOON_SIGNALLED_WAKEUP_CMD: + * VMW_BALLOON_CMD_VMCI_DOORBELL_SET command. + */ +#define VMW_BALLOON_CMD_START 0 +#define VMW_BALLOON_CMD_GET_TARGET 1 +#define VMW_BALLOON_CMD_LOCK 2 +#define VMW_BALLOON_CMD_UNLOCK 3 +#define VMW_BALLOON_CMD_GUEST_ID 4 +#define VMW_BALLOON_CMD_BATCHED_LOCK 6 +#define VMW_BALLOON_CMD_BATCHED_UNLOCK 7 +#define VMW_BALLOON_CMD_BATCHED_2M_LOCK 8 +#define VMW_BALLOON_CMD_BATCHED_2M_UNLOCK 9 +#define VMW_BALLOON_CMD_VMCI_DOORBELL_SET 10 -#define VMW_BALLOON_CMD_START 0 -#define VMW_BALLOON_CMD_GET_TARGET 1 -#define VMW_BALLOON_CMD_LOCK 2 -#define VMW_BALLOON_CMD_UNLOCK 3 -#define VMW_BALLOON_CMD_GUEST_ID 4 /* error codes */ #define VMW_BALLOON_SUCCESS 0 @@ -142,18 +157,60 @@ enum vmwballoon_capabilities { #define VMW_BALLOON_SUCCESS_WITH_CAPABILITIES (0x03000000) -#define VMWARE_BALLOON_CMD(cmd, data, result) \ +/* Batch page description */ + +/* + * Layout of a page in the batch page: + * + * +-------------+----------+--------+ + * | | | | + * | Page number | Reserved | Status | + * | | | | + * +-------------+----------+--------+ + * 64 PAGE_SHIFT 6 0 + * + * The reserved field should be set to 0. + */ +#define VMW_BALLOON_BATCH_MAX_PAGES (PAGE_SIZE / sizeof(u64)) +#define VMW_BALLOON_BATCH_STATUS_MASK ((1UL << 5) - 1) +#define VMW_BALLOON_BATCH_PAGE_MASK (~((1UL << PAGE_SHIFT) - 1)) + +struct vmballoon_batch_page { + u64 pages[VMW_BALLOON_BATCH_MAX_PAGES]; +}; + +static u64 vmballoon_batch_get_pa(struct vmballoon_batch_page *batch, int idx) +{ + return batch->pages[idx] & VMW_BALLOON_BATCH_PAGE_MASK; +} + +static int vmballoon_batch_get_status(struct vmballoon_batch_page *batch, + int idx) +{ + return (int)(batch->pages[idx] & VMW_BALLOON_BATCH_STATUS_MASK); +} + +static void vmballoon_batch_set_pa(struct vmballoon_batch_page *batch, int idx, + u64 pa) +{ + batch->pages[idx] = pa; +} + + +#define VMWARE_BALLOON_CMD(cmd, arg1, arg2, result) \ ({ \ - unsigned long __status, __dummy1, __dummy2; \ + unsigned long __status, __dummy1, __dummy2, __dummy3; \ __asm__ __volatile__ ("inl %%dx" : \ "=a"(__status), \ "=c"(__dummy1), \ "=d"(__dummy2), \ - "=b"(result) : \ + "=b"(result), \ + "=S" (__dummy3) : \ "0"(VMW_BALLOON_HV_MAGIC), \ "1"(VMW_BALLOON_CMD_##cmd), \ "2"(VMW_BALLOON_HV_PORT), \ - "3"(data) : \ + "3"(arg1), \ + "4" (arg2) : \ "memory"); \ if (VMW_BALLOON_CMD_##cmd == VMW_BALLOON_CMD_START) \ result = __dummy1; \ @@ -164,27 +221,30 @@ enum vmwballoon_capabilities { #ifdef CONFIG_DEBUG_FS struct vmballoon_stats { unsigned int timer; + unsigned int doorbell; /* allocation statistics */ - unsigned int alloc; - unsigned int alloc_fail; + unsigned int alloc[VMW_BALLOON_NUM_PAGE_SIZES]; + unsigned int alloc_fail[VMW_BALLOON_NUM_PAGE_SIZES]; unsigned int sleep_alloc; unsigned int sleep_alloc_fail; - unsigned int refused_alloc; - unsigned int refused_free; - unsigned int free; + unsigned int refused_alloc[VMW_BALLOON_NUM_PAGE_SIZES]; + unsigned int refused_free[VMW_BALLOON_NUM_PAGE_SIZES]; + unsigned int free[VMW_BALLOON_NUM_PAGE_SIZES]; /* monitor operations */ - unsigned int lock; - unsigned int lock_fail; - unsigned int unlock; - unsigned int unlock_fail; + unsigned int lock[VMW_BALLOON_NUM_PAGE_SIZES]; + unsigned int lock_fail[VMW_BALLOON_NUM_PAGE_SIZES]; + unsigned int unlock[VMW_BALLOON_NUM_PAGE_SIZES]; + unsigned int unlock_fail[VMW_BALLOON_NUM_PAGE_SIZES]; unsigned int target; unsigned int target_fail; unsigned int start; unsigned int start_fail; unsigned int guest_type; unsigned int guest_type_fail; + unsigned int doorbell_set; + unsigned int doorbell_unset; }; #define STATS_INC(stat) (stat)++ @@ -192,14 +252,30 @@ struct vmballoon_stats { #define STATS_INC(stat) #endif -struct vmballoon { +struct vmballoon; +struct vmballoon_ops { + void (*add_page)(struct vmballoon *b, int idx, struct page *p); + int (*lock)(struct vmballoon *b, unsigned int num_pages, + bool is_2m_pages, unsigned int *target); + int (*unlock)(struct vmballoon *b, unsigned int num_pages, + bool is_2m_pages, unsigned int *target); +}; + +struct vmballoon_page_size { /* list of reserved physical pages */ struct list_head pages; /* transient list of non-balloonable pages */ struct list_head refused_pages; unsigned int n_refused_pages; +}; + +struct vmballoon { + struct vmballoon_page_size page_sizes[VMW_BALLOON_NUM_PAGE_SIZES]; + + /* supported page sizes. 1 == 4k pages only, 2 == 4k and 2m pages */ + unsigned supported_page_sizes; /* balloon size in pages */ unsigned int size; @@ -210,11 +286,18 @@ struct vmballoon { /* adjustment rates (pages per second) */ unsigned int rate_alloc; - unsigned int rate_free; /* slowdown page allocations for next few cycles */ unsigned int slow_allocation_cycles; + unsigned long capabilities; + + struct vmballoon_batch_page *batch_page; + unsigned int batch_max_pages; + struct page *page; + + const struct vmballoon_ops *ops; + #ifdef CONFIG_DEBUG_FS /* statistics */ struct vmballoon_stats stats; @@ -226,6 +309,8 @@ struct vmballoon { struct sysinfo sysinfo; struct delayed_work dwork; + + struct vmci_handle vmci_doorbell; }; static struct vmballoon balloon; @@ -234,20 +319,38 @@ static struct vmballoon balloon; * Send "start" command to the host, communicating supported version * of the protocol. */ -static bool vmballoon_send_start(struct vmballoon *b) +static bool vmballoon_send_start(struct vmballoon *b, unsigned long req_caps) { - unsigned long status, capabilities; + unsigned long status, capabilities, dummy = 0; + bool success; STATS_INC(b->stats.start); - status = VMWARE_BALLOON_CMD(START, VMW_BALLOON_CAPABILITIES, - capabilities); - if (status == VMW_BALLOON_SUCCESS) - return true; + status = VMWARE_BALLOON_CMD(START, req_caps, dummy, capabilities); - pr_debug("%s - failed, hv returns %ld\n", __func__, status); - STATS_INC(b->stats.start_fail); - return false; + switch (status) { + case VMW_BALLOON_SUCCESS_WITH_CAPABILITIES: + b->capabilities = capabilities; + success = true; + break; + case VMW_BALLOON_SUCCESS: + b->capabilities = VMW_BALLOON_BASIC_CMDS; + success = true; + break; + default: + success = false; + } + + if (b->capabilities & VMW_BALLOON_BATCHED_2M_CMDS) + b->supported_page_sizes = 2; + else + b->supported_page_sizes = 1; + + if (!success) { + pr_debug("%s - failed, hv returns %ld\n", __func__, status); + STATS_INC(b->stats.start_fail); + } + return success; } static bool vmballoon_check_status(struct vmballoon *b, unsigned long status) @@ -273,9 +376,10 @@ static bool vmballoon_check_status(struct vmballoon *b, unsigned long status) */ static bool vmballoon_send_guest_id(struct vmballoon *b) { - unsigned long status, dummy; + unsigned long status, dummy = 0; - status = VMWARE_BALLOON_CMD(GUEST_ID, VMW_BALLOON_GUEST_ID, dummy); + status = VMWARE_BALLOON_CMD(GUEST_ID, VMW_BALLOON_GUEST_ID, dummy, + dummy); STATS_INC(b->stats.guest_type); @@ -287,6 +391,14 @@ static bool vmballoon_send_guest_id(struct vmballoon *b) return false; } +static u16 vmballoon_page_size(bool is_2m_page) +{ + if (is_2m_page) + return 1 << VMW_BALLOON_2M_SHIFT; + + return 1; +} + /* * Retrieve desired balloon size from the host. */ @@ -295,6 +407,7 @@ static bool vmballoon_send_get_target(struct vmballoon *b, u32 *new_target) unsigned long status; unsigned long target; unsigned long limit; + unsigned long dummy = 0; u32 limit32; /* @@ -313,7 +426,7 @@ static bool vmballoon_send_get_target(struct vmballoon *b, u32 *new_target) /* update stats */ STATS_INC(b->stats.target); - status = VMWARE_BALLOON_CMD(GET_TARGET, limit, target); + status = VMWARE_BALLOON_CMD(GET_TARGET, limit, dummy, target); if (vmballoon_check_status(b, status)) { *new_target = target; return true; @@ -330,23 +443,46 @@ static bool vmballoon_send_get_target(struct vmballoon *b, u32 *new_target) * check the return value and maybe submit a different page. */ static int vmballoon_send_lock_page(struct vmballoon *b, unsigned long pfn, - unsigned int *hv_status) + unsigned int *hv_status, unsigned int *target) { - unsigned long status, dummy; + unsigned long status, dummy = 0; u32 pfn32; pfn32 = (u32)pfn; if (pfn32 != pfn) return -1; - STATS_INC(b->stats.lock); + STATS_INC(b->stats.lock[false]); - *hv_status = status = VMWARE_BALLOON_CMD(LOCK, pfn, dummy); + *hv_status = status = VMWARE_BALLOON_CMD(LOCK, pfn, dummy, *target); if (vmballoon_check_status(b, status)) return 0; pr_debug("%s - ppn %lx, hv returns %ld\n", __func__, pfn, status); - STATS_INC(b->stats.lock_fail); + STATS_INC(b->stats.lock_fail[false]); + return 1; +} + +static int vmballoon_send_batched_lock(struct vmballoon *b, + unsigned int num_pages, bool is_2m_pages, unsigned int *target) +{ + unsigned long status; + unsigned long pfn = page_to_pfn(b->page); + + STATS_INC(b->stats.lock[is_2m_pages]); + + if (is_2m_pages) + status = VMWARE_BALLOON_CMD(BATCHED_2M_LOCK, pfn, num_pages, + *target); + else + status = VMWARE_BALLOON_CMD(BATCHED_LOCK, pfn, num_pages, + *target); + + if (vmballoon_check_status(b, status)) + return 0; + + pr_debug("%s - batch ppn %lx, hv returns %ld\n", __func__, pfn, status); + STATS_INC(b->stats.lock_fail[is_2m_pages]); return 1; } @@ -354,26 +490,66 @@ static int vmballoon_send_lock_page(struct vmballoon *b, unsigned long pfn, * Notify the host that guest intends to release given page back into * the pool of available (to the guest) pages. */ -static bool vmballoon_send_unlock_page(struct vmballoon *b, unsigned long pfn) +static bool vmballoon_send_unlock_page(struct vmballoon *b, unsigned long pfn, + unsigned int *target) { - unsigned long status, dummy; + unsigned long status, dummy = 0; u32 pfn32; pfn32 = (u32)pfn; if (pfn32 != pfn) return false; - STATS_INC(b->stats.unlock); + STATS_INC(b->stats.unlock[false]); - status = VMWARE_BALLOON_CMD(UNLOCK, pfn, dummy); + status = VMWARE_BALLOON_CMD(UNLOCK, pfn, dummy, *target); if (vmballoon_check_status(b, status)) return true; pr_debug("%s - ppn %lx, hv returns %ld\n", __func__, pfn, status); - STATS_INC(b->stats.unlock_fail); + STATS_INC(b->stats.unlock_fail[false]); + return false; +} + +static bool vmballoon_send_batched_unlock(struct vmballoon *b, + unsigned int num_pages, bool is_2m_pages, unsigned int *target) +{ + unsigned long status; + unsigned long pfn = page_to_pfn(b->page); + + STATS_INC(b->stats.unlock[is_2m_pages]); + + if (is_2m_pages) + status = VMWARE_BALLOON_CMD(BATCHED_2M_UNLOCK, pfn, num_pages, + *target); + else + status = VMWARE_BALLOON_CMD(BATCHED_UNLOCK, pfn, num_pages, + *target); + + if (vmballoon_check_status(b, status)) + return true; + + pr_debug("%s - batch ppn %lx, hv returns %ld\n", __func__, pfn, status); + STATS_INC(b->stats.unlock_fail[is_2m_pages]); return false; } +static struct page *vmballoon_alloc_page(gfp_t flags, bool is_2m_page) +{ + if (is_2m_page) + return alloc_pages(flags, VMW_BALLOON_2M_SHIFT); + + return alloc_page(flags); +} + +static void vmballoon_free_page(struct page *page, bool is_2m_page) +{ + if (is_2m_page) + __free_pages(page, VMW_BALLOON_2M_SHIFT); + else + __free_page(page); +} + /* * Quickly release all pages allocated for the balloon. This function is * called when host decides to "reset" balloon for one reason or another. @@ -383,35 +559,31 @@ static bool vmballoon_send_unlock_page(struct vmballoon *b, unsigned long pfn) static void vmballoon_pop(struct vmballoon *b) { struct page *page, *next; - unsigned int count = 0; - - list_for_each_entry_safe(page, next, &b->pages, lru) { - list_del(&page->lru); - __free_page(page); - STATS_INC(b->stats.free); - b->size--; - - if (++count >= b->rate_free) { - count = 0; + unsigned is_2m_pages; + + for (is_2m_pages = 0; is_2m_pages < VMW_BALLOON_NUM_PAGE_SIZES; + is_2m_pages++) { + struct vmballoon_page_size *page_size = + &b->page_sizes[is_2m_pages]; + u16 size_per_page = vmballoon_page_size(is_2m_pages); + + list_for_each_entry_safe(page, next, &page_size->pages, lru) { + list_del(&page->lru); + vmballoon_free_page(page, is_2m_pages); + STATS_INC(b->stats.free[is_2m_pages]); + b->size -= size_per_page; cond_resched(); } } -} -/* - * Perform standard reset sequence by popping the balloon (in case it - * is not empty) and then restarting protocol. This operation normally - * happens when host responds with VMW_BALLOON_ERROR_RESET to a command. - */ -static void vmballoon_reset(struct vmballoon *b) -{ - /* free all pages, skipping monitor unlock */ - vmballoon_pop(b); + if (b->batch_page) { + vunmap(b->batch_page); + b->batch_page = NULL; + } - if (vmballoon_send_start(b)) { - b->reset_required = false; - if (!vmballoon_send_guest_id(b)) - pr_err("failed to send guest ID to the host\n"); + if (b->page) { + __free_page(b->page); + b->page = NULL; } } @@ -420,17 +592,23 @@ static void vmballoon_reset(struct vmballoon *b) * refuse list, those refused page are then released at the end of the * inflation cycle. */ -static int vmballoon_lock_page(struct vmballoon *b, struct page *page) +static int vmballoon_lock_page(struct vmballoon *b, unsigned int num_pages, + bool is_2m_pages, unsigned int *target) { int locked, hv_status; + struct page *page = b->page; + struct vmballoon_page_size *page_size = &b->page_sizes[false]; + + /* is_2m_pages can never happen as 2m pages support implies batching */ - locked = vmballoon_send_lock_page(b, page_to_pfn(page), &hv_status); + locked = vmballoon_send_lock_page(b, page_to_pfn(page), &hv_status, + target); if (locked > 0) { - STATS_INC(b->stats.refused_alloc); + STATS_INC(b->stats.refused_alloc[false]); if (hv_status == VMW_BALLOON_ERROR_RESET || hv_status == VMW_BALLOON_ERROR_PPN_NOTNEEDED) { - __free_page(page); + vmballoon_free_page(page, false); return -EIO; } @@ -439,17 +617,17 @@ static int vmballoon_lock_page(struct vmballoon *b, struct page *page) * and retry allocation, unless we already accumulated * too many of them, in which case take a breather. */ - if (b->n_refused_pages < VMW_BALLOON_MAX_REFUSED) { - b->n_refused_pages++; - list_add(&page->lru, &b->refused_pages); + if (page_size->n_refused_pages < VMW_BALLOON_MAX_REFUSED) { + page_size->n_refused_pages++; + list_add(&page->lru, &page_size->refused_pages); } else { - __free_page(page); + vmballoon_free_page(page, false); } return -EIO; } /* track allocated page */ - list_add(&page->lru, &b->pages); + list_add(&page->lru, &page_size->pages); /* update balloon size */ b->size++; @@ -457,21 +635,81 @@ static int vmballoon_lock_page(struct vmballoon *b, struct page *page) return 0; } +static int vmballoon_lock_batched_page(struct vmballoon *b, + unsigned int num_pages, bool is_2m_pages, unsigned int *target) +{ + int locked, i; + u16 size_per_page = vmballoon_page_size(is_2m_pages); + + locked = vmballoon_send_batched_lock(b, num_pages, is_2m_pages, + target); + if (locked > 0) { + for (i = 0; i < num_pages; i++) { + u64 pa = vmballoon_batch_get_pa(b->batch_page, i); + struct page *p = pfn_to_page(pa >> PAGE_SHIFT); + + vmballoon_free_page(p, is_2m_pages); + } + + return -EIO; + } + + for (i = 0; i < num_pages; i++) { + u64 pa = vmballoon_batch_get_pa(b->batch_page, i); + struct page *p = pfn_to_page(pa >> PAGE_SHIFT); + struct vmballoon_page_size *page_size = + &b->page_sizes[is_2m_pages]; + + locked = vmballoon_batch_get_status(b->batch_page, i); + + switch (locked) { + case VMW_BALLOON_SUCCESS: + list_add(&p->lru, &page_size->pages); + b->size += size_per_page; + break; + case VMW_BALLOON_ERROR_PPN_PINNED: + case VMW_BALLOON_ERROR_PPN_INVALID: + if (page_size->n_refused_pages + < VMW_BALLOON_MAX_REFUSED) { + list_add(&p->lru, &page_size->refused_pages); + page_size->n_refused_pages++; + break; + } + /* Fallthrough */ + case VMW_BALLOON_ERROR_RESET: + case VMW_BALLOON_ERROR_PPN_NOTNEEDED: + vmballoon_free_page(p, is_2m_pages); + break; + default: + /* This should never happen */ + WARN_ON_ONCE(true); + } + } + + return 0; +} + /* * Release the page allocated for the balloon. Note that we first notify * the host so it can make sure the page will be available for the guest * to use, if needed. */ -static int vmballoon_release_page(struct vmballoon *b, struct page *page) +static int vmballoon_unlock_page(struct vmballoon *b, unsigned int num_pages, + bool is_2m_pages, unsigned int *target) { - if (!vmballoon_send_unlock_page(b, page_to_pfn(page))) - return -EIO; + struct page *page = b->page; + struct vmballoon_page_size *page_size = &b->page_sizes[false]; + + /* is_2m_pages can never happen as 2m pages support implies batching */ - list_del(&page->lru); + if (!vmballoon_send_unlock_page(b, page_to_pfn(page), target)) { + list_add(&page->lru, &page_size->pages); + return -EIO; + } /* deallocate page */ - __free_page(page); - STATS_INC(b->stats.free); + vmballoon_free_page(page, false); + STATS_INC(b->stats.free[false]); /* update balloon size */ b->size--; @@ -479,21 +717,76 @@ static int vmballoon_release_page(struct vmballoon *b, struct page *page) return 0; } +static int vmballoon_unlock_batched_page(struct vmballoon *b, + unsigned int num_pages, bool is_2m_pages, + unsigned int *target) +{ + int locked, i, ret = 0; + bool hv_success; + u16 size_per_page = vmballoon_page_size(is_2m_pages); + + hv_success = vmballoon_send_batched_unlock(b, num_pages, is_2m_pages, + target); + if (!hv_success) + ret = -EIO; + + for (i = 0; i < num_pages; i++) { + u64 pa = vmballoon_batch_get_pa(b->batch_page, i); + struct page *p = pfn_to_page(pa >> PAGE_SHIFT); + struct vmballoon_page_size *page_size = + &b->page_sizes[is_2m_pages]; + + locked = vmballoon_batch_get_status(b->batch_page, i); + if (!hv_success || locked != VMW_BALLOON_SUCCESS) { + /* + * That page wasn't successfully unlocked by the + * hypervisor, re-add it to the list of pages owned by + * the balloon driver. + */ + list_add(&p->lru, &page_size->pages); + } else { + /* deallocate page */ + vmballoon_free_page(p, is_2m_pages); + STATS_INC(b->stats.free[is_2m_pages]); + + /* update balloon size */ + b->size -= size_per_page; + } + } + + return ret; +} + /* * Release pages that were allocated while attempting to inflate the * balloon but were refused by the host for one reason or another. */ -static void vmballoon_release_refused_pages(struct vmballoon *b) +static void vmballoon_release_refused_pages(struct vmballoon *b, + bool is_2m_pages) { struct page *page, *next; + struct vmballoon_page_size *page_size = + &b->page_sizes[is_2m_pages]; - list_for_each_entry_safe(page, next, &b->refused_pages, lru) { + list_for_each_entry_safe(page, next, &page_size->refused_pages, lru) { list_del(&page->lru); - __free_page(page); - STATS_INC(b->stats.refused_free); + vmballoon_free_page(page, is_2m_pages); + STATS_INC(b->stats.refused_free[is_2m_pages]); } - b->n_refused_pages = 0; + page_size->n_refused_pages = 0; +} + +static void vmballoon_add_page(struct vmballoon *b, int idx, struct page *p) +{ + b->page = p; +} + +static void vmballoon_add_batched_page(struct vmballoon *b, int idx, + struct page *p) +{ + vmballoon_batch_set_pa(b->batch_page, idx, + (u64)page_to_pfn(p) << PAGE_SHIFT); } /* @@ -503,12 +796,12 @@ static void vmballoon_release_refused_pages(struct vmballoon *b) */ static void vmballoon_inflate(struct vmballoon *b) { - unsigned int goal; - unsigned int rate; - unsigned int i; + unsigned rate; unsigned int allocations = 0; + unsigned int num_pages = 0; int error = 0; gfp_t flags = VMW_PAGE_ALLOC_NOSLEEP; + bool is_2m_pages; pr_debug("%s - size: %d, target %d\n", __func__, b->size, b->target); @@ -527,27 +820,50 @@ static void vmballoon_inflate(struct vmballoon *b) * slowdown page allocations considerably. */ - goal = b->target - b->size; /* * Start with no sleep allocation rate which may be higher * than sleeping allocation rate. */ - rate = b->slow_allocation_cycles ? - b->rate_alloc : VMW_BALLOON_NOSLEEP_ALLOC_MAX; + if (b->slow_allocation_cycles) { + rate = b->rate_alloc; + is_2m_pages = false; + } else { + rate = UINT_MAX; + is_2m_pages = + b->supported_page_sizes == VMW_BALLOON_NUM_PAGE_SIZES; + } - pr_debug("%s - goal: %d, no-sleep rate: %d, sleep rate: %d\n", - __func__, goal, rate, b->rate_alloc); + pr_debug("%s - goal: %d, no-sleep rate: %u, sleep rate: %d\n", + __func__, b->target - b->size, rate, b->rate_alloc); - for (i = 0; i < goal; i++) { + while (!b->reset_required && + b->size + num_pages * vmballoon_page_size(is_2m_pages) + < b->target) { struct page *page; if (flags == VMW_PAGE_ALLOC_NOSLEEP) - STATS_INC(b->stats.alloc); + STATS_INC(b->stats.alloc[is_2m_pages]); else STATS_INC(b->stats.sleep_alloc); - page = alloc_page(flags); + page = vmballoon_alloc_page(flags, is_2m_pages); if (!page) { + STATS_INC(b->stats.alloc_fail[is_2m_pages]); + + if (is_2m_pages) { + b->ops->lock(b, num_pages, true, &b->target); + + /* + * ignore errors from locking as we now switch + * to 4k pages and we might get different + * errors. + */ + + num_pages = 0; + is_2m_pages = false; + continue; + } + if (flags == VMW_PAGE_ALLOC_CANSLEEP) { /* * CANSLEEP page allocation failed, so guest @@ -559,7 +875,6 @@ static void vmballoon_inflate(struct vmballoon *b) STATS_INC(b->stats.sleep_alloc_fail); break; } - STATS_INC(b->stats.alloc_fail); /* * NOSLEEP page allocation failed, so the guest is @@ -571,7 +886,7 @@ static void vmballoon_inflate(struct vmballoon *b) */ b->slow_allocation_cycles = VMW_BALLOON_SLOW_CYCLES; - if (i >= b->rate_alloc) + if (allocations >= b->rate_alloc) break; flags = VMW_PAGE_ALLOC_CANSLEEP; @@ -580,34 +895,40 @@ static void vmballoon_inflate(struct vmballoon *b) continue; } - error = vmballoon_lock_page(b, page); - if (error) - break; - - if (++allocations > VMW_BALLOON_YIELD_THRESHOLD) { - cond_resched(); - allocations = 0; + b->ops->add_page(b, num_pages++, page); + if (num_pages == b->batch_max_pages) { + error = b->ops->lock(b, num_pages, is_2m_pages, + &b->target); + num_pages = 0; + if (error) + break; } - if (i >= rate) { + cond_resched(); + + if (allocations >= rate) { /* We allocated enough pages, let's take a break. */ break; } } + if (num_pages > 0) + b->ops->lock(b, num_pages, is_2m_pages, &b->target); + /* * We reached our goal without failures so try increasing * allocation rate. */ - if (error == 0 && i >= b->rate_alloc) { - unsigned int mult = i / b->rate_alloc; + if (error == 0 && allocations >= b->rate_alloc) { + unsigned int mult = allocations / b->rate_alloc; b->rate_alloc = min(b->rate_alloc + mult * VMW_BALLOON_RATE_ALLOC_INC, VMW_BALLOON_RATE_ALLOC_MAX); } - vmballoon_release_refused_pages(b); + vmballoon_release_refused_pages(b, true); + vmballoon_release_refused_pages(b, false); } /* @@ -615,35 +936,176 @@ static void vmballoon_inflate(struct vmballoon *b) */ static void vmballoon_deflate(struct vmballoon *b) { - struct page *page, *next; - unsigned int i = 0; - unsigned int goal; - int error; + unsigned is_2m_pages; pr_debug("%s - size: %d, target %d\n", __func__, b->size, b->target); - /* limit deallocation rate */ - goal = min(b->size - b->target, b->rate_free); + /* free pages to reach target */ + for (is_2m_pages = 0; is_2m_pages < b->supported_page_sizes; + is_2m_pages++) { + struct page *page, *next; + unsigned int num_pages = 0; + struct vmballoon_page_size *page_size = + &b->page_sizes[is_2m_pages]; + + list_for_each_entry_safe(page, next, &page_size->pages, lru) { + if (b->reset_required || + (b->target > 0 && + b->size - num_pages + * vmballoon_page_size(is_2m_pages) + < b->target + vmballoon_page_size(true))) + break; + + list_del(&page->lru); + b->ops->add_page(b, num_pages++, page); - pr_debug("%s - goal: %d, rate: %d\n", __func__, goal, b->rate_free); + if (num_pages == b->batch_max_pages) { + int error; - /* free pages to reach target */ - list_for_each_entry_safe(page, next, &b->pages, lru) { - error = vmballoon_release_page(b, page); - if (error) { - /* quickly decrease rate in case of error */ - b->rate_free = max(b->rate_free / 2, - VMW_BALLOON_RATE_FREE_MIN); - return; + error = b->ops->unlock(b, num_pages, + is_2m_pages, &b->target); + num_pages = 0; + if (error) + return; + } + + cond_resched(); } - if (++i >= goal) - break; + if (num_pages > 0) + b->ops->unlock(b, num_pages, is_2m_pages, &b->target); + } +} + +static const struct vmballoon_ops vmballoon_basic_ops = { + .add_page = vmballoon_add_page, + .lock = vmballoon_lock_page, + .unlock = vmballoon_unlock_page +}; + +static const struct vmballoon_ops vmballoon_batched_ops = { + .add_page = vmballoon_add_batched_page, + .lock = vmballoon_lock_batched_page, + .unlock = vmballoon_unlock_batched_page +}; + +static bool vmballoon_init_batching(struct vmballoon *b) +{ + b->page = alloc_page(VMW_PAGE_ALLOC_NOSLEEP); + if (!b->page) + return false; + + b->batch_page = vmap(&b->page, 1, VM_MAP, PAGE_KERNEL); + if (!b->batch_page) { + __free_page(b->page); + return false; + } + + return true; +} + +/* + * Receive notification and resize balloon + */ +static void vmballoon_doorbell(void *client_data) +{ + struct vmballoon *b = client_data; + + STATS_INC(b->stats.doorbell); + + mod_delayed_work(system_freezable_wq, &b->dwork, 0); +} + +/* + * Clean up vmci doorbell + */ +static void vmballoon_vmci_cleanup(struct vmballoon *b) +{ + int error; + + VMWARE_BALLOON_CMD(VMCI_DOORBELL_SET, VMCI_INVALID_ID, + VMCI_INVALID_ID, error); + STATS_INC(b->stats.doorbell_unset); + + if (!vmci_handle_is_invalid(b->vmci_doorbell)) { + vmci_doorbell_destroy(b->vmci_doorbell); + b->vmci_doorbell = VMCI_INVALID_HANDLE; + } +} + +/* + * Initialize vmci doorbell, to get notified as soon as balloon changes + */ +static int vmballoon_vmci_init(struct vmballoon *b) +{ + int error = 0; + + if ((b->capabilities & VMW_BALLOON_SIGNALLED_WAKEUP_CMD) != 0) { + error = vmci_doorbell_create(&b->vmci_doorbell, + VMCI_FLAG_DELAYED_CB, + VMCI_PRIVILEGE_FLAG_RESTRICTED, + vmballoon_doorbell, b); + + if (error == VMCI_SUCCESS) { + VMWARE_BALLOON_CMD(VMCI_DOORBELL_SET, + b->vmci_doorbell.context, + b->vmci_doorbell.resource, error); + STATS_INC(b->stats.doorbell_set); + } + } + + if (error != 0) { + vmballoon_vmci_cleanup(b); + + return -EIO; } - /* slowly increase rate if there were no errors */ - b->rate_free = min(b->rate_free + VMW_BALLOON_RATE_FREE_INC, - VMW_BALLOON_RATE_FREE_MAX); + return 0; +} + +/* + * Perform standard reset sequence by popping the balloon (in case it + * is not empty) and then restarting protocol. This operation normally + * happens when host responds with VMW_BALLOON_ERROR_RESET to a command. + */ +static void vmballoon_reset(struct vmballoon *b) +{ + int error; + + vmballoon_vmci_cleanup(b); + + /* free all pages, skipping monitor unlock */ + vmballoon_pop(b); + + if (!vmballoon_send_start(b, VMW_BALLOON_CAPABILITIES)) + return; + + if ((b->capabilities & VMW_BALLOON_BATCHED_CMDS) != 0) { + b->ops = &vmballoon_batched_ops; + b->batch_max_pages = VMW_BALLOON_BATCH_MAX_PAGES; + if (!vmballoon_init_batching(b)) { + /* + * We failed to initialize batching, inform the monitor + * about it by sending a null capability. + * + * The guest will retry in one second. + */ + vmballoon_send_start(b, 0); + return; + } + } else if ((b->capabilities & VMW_BALLOON_BASIC_CMDS) != 0) { + b->ops = &vmballoon_basic_ops; + b->batch_max_pages = 1; + } + + b->reset_required = false; + + error = vmballoon_vmci_init(b); + if (error) + pr_err("failed to initialize vmci doorbell\n"); + + if (!vmballoon_send_guest_id(b)) + pr_err("failed to send guest ID to the host\n"); } /* @@ -664,13 +1126,14 @@ static void vmballoon_work(struct work_struct *work) if (b->slow_allocation_cycles > 0) b->slow_allocation_cycles--; - if (vmballoon_send_get_target(b, &target)) { + if (!b->reset_required && vmballoon_send_get_target(b, &target)) { /* update target, adjust size */ b->target = target; if (b->size < target) vmballoon_inflate(b); - else if (b->size > target) + else if (target == 0 || + b->size > target + vmballoon_page_size(true)) vmballoon_deflate(b); } @@ -692,6 +1155,14 @@ static int vmballoon_debug_show(struct seq_file *f, void *offset) struct vmballoon *b = f->private; struct vmballoon_stats *stats = &b->stats; + /* format capabilities info */ + seq_printf(f, + "balloon capabilities: %#4x\n" + "used capabilities: %#4lx\n" + "is resetting: %c\n", + VMW_BALLOON_CAPABILITIES, b->capabilities, + b->reset_required ? 'y' : 'n'); + /* format size info */ seq_printf(f, "target: %8d pages\n" @@ -700,35 +1171,48 @@ static int vmballoon_debug_show(struct seq_file *f, void *offset) /* format rate info */ seq_printf(f, - "rateNoSleepAlloc: %8d pages/sec\n" - "rateSleepAlloc: %8d pages/sec\n" - "rateFree: %8d pages/sec\n", - VMW_BALLOON_NOSLEEP_ALLOC_MAX, - b->rate_alloc, b->rate_free); + "rateSleepAlloc: %8d pages/sec\n", + b->rate_alloc); seq_printf(f, "\n" "timer: %8u\n" + "doorbell: %8u\n" "start: %8u (%4u failed)\n" "guestType: %8u (%4u failed)\n" + "2m-lock: %8u (%4u failed)\n" "lock: %8u (%4u failed)\n" + "2m-unlock: %8u (%4u failed)\n" "unlock: %8u (%4u failed)\n" "target: %8u (%4u failed)\n" + "prim2mAlloc: %8u (%4u failed)\n" "primNoSleepAlloc: %8u (%4u failed)\n" "primCanSleepAlloc: %8u (%4u failed)\n" + "prim2mFree: %8u\n" "primFree: %8u\n" + "err2mAlloc: %8u\n" "errAlloc: %8u\n" - "errFree: %8u\n", + "err2mFree: %8u\n" + "errFree: %8u\n" + "doorbellSet: %8u\n" + "doorbellUnset: %8u\n", stats->timer, + stats->doorbell, stats->start, stats->start_fail, stats->guest_type, stats->guest_type_fail, - stats->lock, stats->lock_fail, - stats->unlock, stats->unlock_fail, + stats->lock[true], stats->lock_fail[true], + stats->lock[false], stats->lock_fail[false], + stats->unlock[true], stats->unlock_fail[true], + stats->unlock[false], stats->unlock_fail[false], stats->target, stats->target_fail, - stats->alloc, stats->alloc_fail, + stats->alloc[true], stats->alloc_fail[true], + stats->alloc[false], stats->alloc_fail[false], stats->sleep_alloc, stats->sleep_alloc_fail, - stats->free, - stats->refused_alloc, stats->refused_free); + stats->free[true], + stats->free[false], + stats->refused_alloc[true], stats->refused_alloc[false], + stats->refused_free[true], stats->refused_free[false], + stats->doorbell_set, stats->doorbell_unset); return 0; } @@ -782,7 +1266,7 @@ static inline void vmballoon_debugfs_exit(struct vmballoon *b) static int __init vmballoon_init(void) { int error; - + unsigned is_2m_pages; /* * Check if we are running on VMware's hypervisor and bail out * if we are not. @@ -790,32 +1274,26 @@ static int __init vmballoon_init(void) if (x86_hyper != &x86_hyper_vmware) return -ENODEV; - INIT_LIST_HEAD(&balloon.pages); - INIT_LIST_HEAD(&balloon.refused_pages); + for (is_2m_pages = 0; is_2m_pages < VMW_BALLOON_NUM_PAGE_SIZES; + is_2m_pages++) { + INIT_LIST_HEAD(&balloon.page_sizes[is_2m_pages].pages); + INIT_LIST_HEAD(&balloon.page_sizes[is_2m_pages].refused_pages); + } /* initialize rates */ balloon.rate_alloc = VMW_BALLOON_RATE_ALLOC_MAX; - balloon.rate_free = VMW_BALLOON_RATE_FREE_MAX; INIT_DELAYED_WORK(&balloon.dwork, vmballoon_work); - /* - * Start balloon. - */ - if (!vmballoon_send_start(&balloon)) { - pr_err("failed to send start command to the host\n"); - return -EIO; - } - - if (!vmballoon_send_guest_id(&balloon)) { - pr_err("failed to send guest ID to the host\n"); - return -EIO; - } - error = vmballoon_debugfs_init(&balloon); if (error) return error; + balloon.vmci_doorbell = VMCI_INVALID_HANDLE; + balloon.batch_page = NULL; + balloon.page = NULL; + balloon.reset_required = true; + queue_delayed_work(system_freezable_wq, &balloon.dwork, 0); return 0; @@ -824,6 +1302,7 @@ module_init(vmballoon_init); static void __exit vmballoon_exit(void) { + vmballoon_vmci_cleanup(&balloon); cancel_delayed_work_sync(&balloon.dwork); vmballoon_debugfs_exit(&balloon); @@ -833,7 +1312,7 @@ static void __exit vmballoon_exit(void) * Reset connection before deallocating memory to avoid potential for * additional spurious resets from guest touching deallocated pages. */ - vmballoon_send_start(&balloon); + vmballoon_send_start(&balloon, 0); vmballoon_pop(&balloon); } module_exit(vmballoon_exit); diff --git a/drivers/misc/vmw_vmci/vmci_datagram.c b/drivers/misc/vmw_vmci/vmci_datagram.c index 822665245588..8a4b6bbe1bee 100644 --- a/drivers/misc/vmw_vmci/vmci_datagram.c +++ b/drivers/misc/vmw_vmci/vmci_datagram.c @@ -276,11 +276,10 @@ static int dg_dispatch_as_host(u32 context_id, struct vmci_datagram *dg) } /* We make a copy to enqueue. */ - new_dg = kmalloc(dg_size, GFP_KERNEL); + new_dg = kmemdup(dg, dg_size, GFP_KERNEL); if (new_dg == NULL) return VMCI_ERROR_NO_MEM; - memcpy(new_dg, dg, dg_size); retval = vmci_ctx_enqueue_datagram(dg->dst.context, new_dg); if (retval < VMCI_SUCCESS) { kfree(new_dg); diff --git a/drivers/nfc/mei_phy.c b/drivers/nfc/mei_phy.c index 754a9bb0f58d..83deda4bb4d6 100644 --- a/drivers/nfc/mei_phy.c +++ b/drivers/nfc/mei_phy.c @@ -118,7 +118,7 @@ static int mei_nfc_if_version(struct nfc_mei_phy *phy) cmd.sub_command = MEI_NFC_SUBCMD_IF_VERSION; MEI_DUMP_NFC_HDR("version", &cmd.hdr); - r = mei_cl_send(phy->device, (u8 *)&cmd, sizeof(struct mei_nfc_cmd)); + r = mei_cldev_send(phy->cldev, (u8 *)&cmd, sizeof(struct mei_nfc_cmd)); if (r < 0) { pr_err("Could not send IF version cmd\n"); return r; @@ -132,7 +132,7 @@ static int mei_nfc_if_version(struct nfc_mei_phy *phy) if (!reply) return -ENOMEM; - bytes_recv = mei_cl_recv(phy->device, (u8 *)reply, if_version_length); + bytes_recv = mei_cldev_recv(phy->cldev, (u8 *)reply, if_version_length); if (bytes_recv < 0 || bytes_recv < sizeof(struct mei_nfc_reply)) { pr_err("Could not read IF version\n"); r = -EIO; @@ -186,13 +186,14 @@ static int mei_nfc_connect(struct nfc_mei_phy *phy) connect->vendor_id = phy->vendor_id; MEI_DUMP_NFC_HDR("connect request", &cmd->hdr); - r = mei_cl_send(phy->device, (u8 *)cmd, connect_length); + r = mei_cldev_send(phy->cldev, (u8 *)cmd, connect_length); if (r < 0) { pr_err("Could not send connect cmd %d\n", r); goto err; } - bytes_recv = mei_cl_recv(phy->device, (u8 *)reply, connect_resp_length); + bytes_recv = mei_cldev_recv(phy->cldev, (u8 *)reply, + connect_resp_length); if (bytes_recv < 0) { r = bytes_recv; pr_err("Could not read connect response %d\n", r); @@ -238,7 +239,7 @@ static int mei_nfc_send(struct nfc_mei_phy *phy, u8 *buf, size_t length) MEI_DUMP_NFC_HDR("send", hdr); memcpy(mei_buf + MEI_NFC_HEADER_SIZE, buf, length); - err = mei_cl_send(phy->device, mei_buf, length + MEI_NFC_HEADER_SIZE); + err = mei_cldev_send(phy->cldev, mei_buf, length + MEI_NFC_HEADER_SIZE); if (err < 0) goto out; @@ -278,7 +279,7 @@ static int mei_nfc_recv(struct nfc_mei_phy *phy, u8 *buf, size_t length) struct mei_nfc_hdr *hdr; int received_length; - received_length = mei_cl_recv(phy->device, buf, length); + received_length = mei_cldev_recv(phy->cldev, buf, length); if (received_length < 0) return received_length; @@ -296,7 +297,7 @@ static int mei_nfc_recv(struct nfc_mei_phy *phy, u8 *buf, size_t length) } -static void nfc_mei_event_cb(struct mei_cl_device *device, u32 events, +static void nfc_mei_event_cb(struct mei_cl_device *cldev, u32 events, void *context) { struct nfc_mei_phy *phy = context; @@ -337,7 +338,7 @@ static int nfc_mei_phy_enable(void *phy_id) if (phy->powered == 1) return 0; - r = mei_cl_enable_device(phy->device); + r = mei_cldev_enable(phy->cldev); if (r < 0) { pr_err("Could not enable device %d\n", r); return r; @@ -355,7 +356,7 @@ static int nfc_mei_phy_enable(void *phy_id) goto err; } - r = mei_cl_register_event_cb(phy->device, BIT(MEI_CL_EVENT_RX), + r = mei_cldev_register_event_cb(phy->cldev, BIT(MEI_CL_EVENT_RX), nfc_mei_event_cb, phy); if (r) { pr_err("Event cb registration failed %d\n", r); @@ -368,7 +369,7 @@ static int nfc_mei_phy_enable(void *phy_id) err: phy->powered = 0; - mei_cl_disable_device(phy->device); + mei_cldev_disable(phy->cldev); return r; } @@ -378,7 +379,7 @@ static void nfc_mei_phy_disable(void *phy_id) pr_info("%s\n", __func__); - mei_cl_disable_device(phy->device); + mei_cldev_disable(phy->cldev); phy->powered = 0; } @@ -390,7 +391,7 @@ struct nfc_phy_ops mei_phy_ops = { }; EXPORT_SYMBOL_GPL(mei_phy_ops); -struct nfc_mei_phy *nfc_mei_phy_alloc(struct mei_cl_device *device) +struct nfc_mei_phy *nfc_mei_phy_alloc(struct mei_cl_device *cldev) { struct nfc_mei_phy *phy; @@ -398,9 +399,9 @@ struct nfc_mei_phy *nfc_mei_phy_alloc(struct mei_cl_device *device) if (!phy) return NULL; - phy->device = device; + phy->cldev = cldev; init_waitqueue_head(&phy->send_wq); - mei_cl_set_drvdata(device, phy); + mei_cldev_set_drvdata(cldev, phy); return phy; } @@ -408,7 +409,7 @@ EXPORT_SYMBOL_GPL(nfc_mei_phy_alloc); void nfc_mei_phy_free(struct nfc_mei_phy *phy) { - mei_cl_disable_device(phy->device); + mei_cldev_disable(phy->cldev); kfree(phy); } EXPORT_SYMBOL_GPL(nfc_mei_phy_free); diff --git a/drivers/nfc/mei_phy.h b/drivers/nfc/mei_phy.h index fbfa3e61738f..acd3a1fc69e6 100644 --- a/drivers/nfc/mei_phy.h +++ b/drivers/nfc/mei_phy.h @@ -13,7 +13,7 @@ /** * struct nfc_mei_phy * - * @device: mei device + * @cldev: mei client device * @hdev: nfc hci device * @send_wq: send completion wait queue @@ -28,7 +28,7 @@ * and prevents normal operation. */ struct nfc_mei_phy { - struct mei_cl_device *device; + struct mei_cl_device *cldev; struct nfc_hci_dev *hdev; wait_queue_head_t send_wq; diff --git a/drivers/nfc/microread/mei.c b/drivers/nfc/microread/mei.c index f9f5fc97cdd7..3092501f26c4 100644 --- a/drivers/nfc/microread/mei.c +++ b/drivers/nfc/microread/mei.c @@ -29,7 +29,7 @@ #define MICROREAD_DRIVER_NAME "microread" -static int microread_mei_probe(struct mei_cl_device *device, +static int microread_mei_probe(struct mei_cl_device *cldev, const struct mei_cl_device_id *id) { struct nfc_mei_phy *phy; @@ -37,7 +37,7 @@ static int microread_mei_probe(struct mei_cl_device *device, pr_info("Probing NFC microread\n"); - phy = nfc_mei_phy_alloc(device); + phy = nfc_mei_phy_alloc(cldev); if (!phy) { pr_err("Cannot allocate memory for microread mei phy.\n"); return -ENOMEM; @@ -55,9 +55,9 @@ static int microread_mei_probe(struct mei_cl_device *device, return 0; } -static int microread_mei_remove(struct mei_cl_device *device) +static int microread_mei_remove(struct mei_cl_device *cldev) { - struct nfc_mei_phy *phy = mei_cl_get_drvdata(device); + struct nfc_mei_phy *phy = mei_cldev_get_drvdata(cldev); microread_remove(phy->hdev); @@ -67,7 +67,7 @@ static int microread_mei_remove(struct mei_cl_device *device) } static struct mei_cl_device_id microread_mei_tbl[] = { - { MICROREAD_DRIVER_NAME, MEI_NFC_UUID}, + { MICROREAD_DRIVER_NAME, MEI_NFC_UUID, MEI_CL_VERSION_ANY}, /* required last entry */ { } @@ -88,7 +88,7 @@ static int microread_mei_init(void) pr_debug(DRIVER_DESC ": %s\n", __func__); - r = mei_cl_driver_register(µread_driver); + r = mei_cldev_driver_register(µread_driver); if (r) { pr_err(MICROREAD_DRIVER_NAME ": driver registration failed\n"); return r; @@ -99,7 +99,7 @@ static int microread_mei_init(void) static void microread_mei_exit(void) { - mei_cl_driver_unregister(µread_driver); + mei_cldev_driver_unregister(µread_driver); } module_init(microread_mei_init); diff --git a/drivers/nfc/pn544/mei.c b/drivers/nfc/pn544/mei.c index 101a37e12efa..46d0eb24eef9 100644 --- a/drivers/nfc/pn544/mei.c +++ b/drivers/nfc/pn544/mei.c @@ -27,7 +27,7 @@ #define PN544_DRIVER_NAME "pn544" -static int pn544_mei_probe(struct mei_cl_device *device, +static int pn544_mei_probe(struct mei_cl_device *cldev, const struct mei_cl_device_id *id) { struct nfc_mei_phy *phy; @@ -35,7 +35,7 @@ static int pn544_mei_probe(struct mei_cl_device *device, pr_info("Probing NFC pn544\n"); - phy = nfc_mei_phy_alloc(device); + phy = nfc_mei_phy_alloc(cldev); if (!phy) { pr_err("Cannot allocate memory for pn544 mei phy.\n"); return -ENOMEM; @@ -53,9 +53,9 @@ static int pn544_mei_probe(struct mei_cl_device *device, return 0; } -static int pn544_mei_remove(struct mei_cl_device *device) +static int pn544_mei_remove(struct mei_cl_device *cldev) { - struct nfc_mei_phy *phy = mei_cl_get_drvdata(device); + struct nfc_mei_phy *phy = mei_cldev_get_drvdata(cldev); pr_info("Removing pn544\n"); @@ -67,7 +67,7 @@ static int pn544_mei_remove(struct mei_cl_device *device) } static struct mei_cl_device_id pn544_mei_tbl[] = { - { PN544_DRIVER_NAME, MEI_NFC_UUID}, + { PN544_DRIVER_NAME, MEI_NFC_UUID, MEI_CL_VERSION_ANY}, /* required last entry */ { } @@ -88,7 +88,7 @@ static int pn544_mei_init(void) pr_debug(DRIVER_DESC ": %s\n", __func__); - r = mei_cl_driver_register(&pn544_driver); + r = mei_cldev_driver_register(&pn544_driver); if (r) { pr_err(PN544_DRIVER_NAME ": driver registration failed\n"); return r; @@ -99,7 +99,7 @@ static int pn544_mei_init(void) static void pn544_mei_exit(void) { - mei_cl_driver_unregister(&pn544_driver); + mei_cldev_driver_unregister(&pn544_driver); } module_init(pn544_mei_init); diff --git a/drivers/nvmem/Kconfig b/drivers/nvmem/Kconfig index 8db297821f78..bc4ea585b42e 100644 --- a/drivers/nvmem/Kconfig +++ b/drivers/nvmem/Kconfig @@ -14,6 +14,28 @@ menuconfig NVMEM if NVMEM +config NVMEM_IMX_OCOTP + tristate "i.MX6 On-Chip OTP Controller support" + depends on SOC_IMX6 + help + This is a driver for the On-Chip OTP Controller (OCOTP) available on + i.MX6 SoCs, providing access to 4 Kbits of one-time programmable + eFuses. + + This driver can also be built as a module. If so, the module + will be called nvmem-imx-ocotp. + +config NVMEM_MXS_OCOTP + tristate "Freescale MXS On-Chip OTP Memory Support" + depends on ARCH_MXS || COMPILE_TEST + help + If you say Y here, you will get readonly access to the + One Time Programmable memory pages that are stored + on the Freescale i.MX23/i.MX28 processor. + + This driver can also be built as a module. If so, the module + will be called nvmem-mxs-ocotp. + config QCOM_QFPROM tristate "QCOM QFPROM Support" depends on ARCH_QCOM || COMPILE_TEST @@ -25,6 +47,16 @@ config QCOM_QFPROM This driver can also be built as a module. If so, the module will be called nvmem_qfprom. +config ROCKCHIP_EFUSE + tristate "Rockchip eFuse Support" + depends on ARCH_ROCKCHIP || COMPILE_TEST + help + This is a simple drive to dump specified values of Rockchip SoC + from eFuse, such as cpu-leakage. + + This driver can also be built as a module. If so, the module + will be called nvmem_rockchip_efuse. + config NVMEM_SUNXI_SID tristate "Allwinner SoCs SID support" depends on ARCH_SUNXI @@ -36,4 +68,14 @@ config NVMEM_SUNXI_SID This driver can also be built as a module. If so, the module will be called nvmem_sunxi_sid. +config NVMEM_VF610_OCOTP + tristate "VF610 SoC OCOTP support" + depends on SOC_VF610 || COMPILE_TEST + help + This is a driver for the 'OCOTP' peripheral available on Vybrid + devices like VF5xx and VF6xx. + + This driver can also be build as a module. If so, the module will + be called nvmem-vf610-ocotp. + endif diff --git a/drivers/nvmem/Makefile b/drivers/nvmem/Makefile index 4328b930ad9a..95dde3f8f085 100644 --- a/drivers/nvmem/Makefile +++ b/drivers/nvmem/Makefile @@ -6,7 +6,15 @@ obj-$(CONFIG_NVMEM) += nvmem_core.o nvmem_core-y := core.o # Devices +obj-$(CONFIG_NVMEM_IMX_OCOTP) += nvmem-imx-ocotp.o +nvmem-imx-ocotp-y := imx-ocotp.o +obj-$(CONFIG_NVMEM_MXS_OCOTP) += nvmem-mxs-ocotp.o +nvmem-mxs-ocotp-y := mxs-ocotp.o obj-$(CONFIG_QCOM_QFPROM) += nvmem_qfprom.o nvmem_qfprom-y := qfprom.o +obj-$(CONFIG_ROCKCHIP_EFUSE) += nvmem_rockchip_efuse.o +nvmem_rockchip_efuse-y := rockchip-efuse.o obj-$(CONFIG_NVMEM_SUNXI_SID) += nvmem_sunxi_sid.o nvmem_sunxi_sid-y := sunxi_sid.o +obj-$(CONFIG_NVMEM_VF610_OCOTP) += nvmem-vf610-ocotp.o +nvmem-vf610-ocotp-y := vf610-ocotp.o diff --git a/drivers/nvmem/imx-ocotp.c b/drivers/nvmem/imx-ocotp.c new file mode 100644 index 000000000000..b7971d410b60 --- /dev/null +++ b/drivers/nvmem/imx-ocotp.c @@ -0,0 +1,154 @@ +/* + * i.MX6 OCOTP fusebox driver + * + * Copyright (c) 2015 Pengutronix, Philipp Zabel <p.zabel@pengutronix.de> + * + * Based on the barebox ocotp driver, + * Copyright (c) 2010 Baruch Siach <baruch@tkos.co.il>, + * Orex Computed Radiography + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * http://www.opensource.org/licenses/gpl-license.html + * http://www.gnu.org/copyleft/gpl.html + */ + +#include <linux/device.h> +#include <linux/io.h> +#include <linux/module.h> +#include <linux/nvmem-provider.h> +#include <linux/of.h> +#include <linux/of_device.h> +#include <linux/platform_device.h> +#include <linux/regmap.h> +#include <linux/slab.h> + +struct ocotp_priv { + struct device *dev; + void __iomem *base; + unsigned int nregs; +}; + +static int imx_ocotp_read(void *context, const void *reg, size_t reg_size, + void *val, size_t val_size) +{ + struct ocotp_priv *priv = context; + unsigned int offset = *(u32 *)reg; + unsigned int count; + int i; + u32 index; + + index = offset >> 2; + count = val_size >> 2; + + if (count > (priv->nregs - index)) + count = priv->nregs - index; + + for (i = index; i < (index + count); i++) { + *(u32 *)val = readl(priv->base + 0x400 + i * 0x10); + val += 4; + } + + return (i - index) * 4; +} + +static int imx_ocotp_write(void *context, const void *data, size_t count) +{ + /* Not implemented */ + return 0; +} + +static struct regmap_bus imx_ocotp_bus = { + .read = imx_ocotp_read, + .write = imx_ocotp_write, + .reg_format_endian_default = REGMAP_ENDIAN_NATIVE, + .val_format_endian_default = REGMAP_ENDIAN_NATIVE, +}; + +static bool imx_ocotp_writeable_reg(struct device *dev, unsigned int reg) +{ + return false; +} + +static struct regmap_config imx_ocotp_regmap_config = { + .reg_bits = 32, + .val_bits = 32, + .reg_stride = 4, + .writeable_reg = imx_ocotp_writeable_reg, + .name = "imx-ocotp", +}; + +static struct nvmem_config imx_ocotp_nvmem_config = { + .name = "imx-ocotp", + .read_only = true, + .owner = THIS_MODULE, +}; + +static const struct of_device_id imx_ocotp_dt_ids[] = { + { .compatible = "fsl,imx6q-ocotp", (void *)128 }, + { .compatible = "fsl,imx6sl-ocotp", (void *)32 }, + { .compatible = "fsl,imx6sx-ocotp", (void *)128 }, + { }, +}; +MODULE_DEVICE_TABLE(of, imx_ocotp_dt_ids); + +static int imx_ocotp_probe(struct platform_device *pdev) +{ + const struct of_device_id *of_id; + struct device *dev = &pdev->dev; + struct resource *res; + struct regmap *regmap; + struct ocotp_priv *priv; + struct nvmem_device *nvmem; + + priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + priv->base = devm_ioremap_resource(dev, res); + if (IS_ERR(priv->base)) + return PTR_ERR(priv->base); + + of_id = of_match_device(imx_ocotp_dt_ids, dev); + priv->nregs = (unsigned int)of_id->data; + imx_ocotp_regmap_config.max_register = 4 * priv->nregs - 4; + + regmap = devm_regmap_init(dev, &imx_ocotp_bus, priv, + &imx_ocotp_regmap_config); + if (IS_ERR(regmap)) { + dev_err(dev, "regmap init failed\n"); + return PTR_ERR(regmap); + } + imx_ocotp_nvmem_config.dev = dev; + nvmem = nvmem_register(&imx_ocotp_nvmem_config); + if (IS_ERR(nvmem)) + return PTR_ERR(nvmem); + + platform_set_drvdata(pdev, nvmem); + + return 0; +} + +static int imx_ocotp_remove(struct platform_device *pdev) +{ + struct nvmem_device *nvmem = platform_get_drvdata(pdev); + + return nvmem_unregister(nvmem); +} + +static struct platform_driver imx_ocotp_driver = { + .probe = imx_ocotp_probe, + .remove = imx_ocotp_remove, + .driver = { + .name = "imx_ocotp", + .of_match_table = imx_ocotp_dt_ids, + }, +}; +module_platform_driver(imx_ocotp_driver); + +MODULE_AUTHOR("Philipp Zabel <p.zabel@pengutronix.de>"); +MODULE_DESCRIPTION("i.MX6 OCOTP fuse box driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/nvmem/mxs-ocotp.c b/drivers/nvmem/mxs-ocotp.c new file mode 100644 index 000000000000..8ba19bba3156 --- /dev/null +++ b/drivers/nvmem/mxs-ocotp.c @@ -0,0 +1,257 @@ +/* + * Freescale MXS On-Chip OTP driver + * + * Copyright (C) 2015 Stefan Wahren <stefan.wahren@i2se.com> + * + * Based on the driver from Huang Shijie and Christoph G. Baumann + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#include <linux/clk.h> +#include <linux/delay.h> +#include <linux/device.h> +#include <linux/err.h> +#include <linux/io.h> +#include <linux/module.h> +#include <linux/nvmem-provider.h> +#include <linux/of_device.h> +#include <linux/platform_device.h> +#include <linux/regmap.h> +#include <linux/slab.h> +#include <linux/stmp_device.h> + +/* OCOTP registers and bits */ + +#define BM_OCOTP_CTRL_RD_BANK_OPEN BIT(12) +#define BM_OCOTP_CTRL_ERROR BIT(9) +#define BM_OCOTP_CTRL_BUSY BIT(8) + +#define OCOTP_TIMEOUT 10000 +#define OCOTP_DATA_OFFSET 0x20 + +struct mxs_ocotp { + struct clk *clk; + void __iomem *base; + struct nvmem_device *nvmem; +}; + +static int mxs_ocotp_wait(struct mxs_ocotp *otp) +{ + int timeout = OCOTP_TIMEOUT; + unsigned int status = 0; + + while (timeout--) { + status = readl(otp->base); + + if (!(status & (BM_OCOTP_CTRL_BUSY | BM_OCOTP_CTRL_ERROR))) + break; + + cpu_relax(); + } + + if (status & BM_OCOTP_CTRL_BUSY) + return -EBUSY; + else if (status & BM_OCOTP_CTRL_ERROR) + return -EIO; + + return 0; +} + +static int mxs_ocotp_read(void *context, const void *reg, size_t reg_size, + void *val, size_t val_size) +{ + struct mxs_ocotp *otp = context; + unsigned int offset = *(u32 *)reg; + u32 *buf = val; + int ret; + + ret = clk_enable(otp->clk); + if (ret) + return ret; + + writel(BM_OCOTP_CTRL_ERROR, otp->base + STMP_OFFSET_REG_CLR); + + ret = mxs_ocotp_wait(otp); + if (ret) + goto disable_clk; + + /* open OCOTP banks for read */ + writel(BM_OCOTP_CTRL_RD_BANK_OPEN, otp->base + STMP_OFFSET_REG_SET); + + /* approximately wait 33 hclk cycles */ + udelay(1); + + ret = mxs_ocotp_wait(otp); + if (ret) + goto close_banks; + + while (val_size) { + if ((offset < OCOTP_DATA_OFFSET) || (offset % 16)) { + /* fill up non-data register */ + *buf = 0; + } else { + *buf = readl(otp->base + offset); + } + + buf++; + val_size--; + offset += reg_size; + } + +close_banks: + /* close banks for power saving */ + writel(BM_OCOTP_CTRL_RD_BANK_OPEN, otp->base + STMP_OFFSET_REG_CLR); + +disable_clk: + clk_disable(otp->clk); + + return ret; +} + +static int mxs_ocotp_write(void *context, const void *data, size_t count) +{ + /* We don't want to support writing */ + return 0; +} + +static bool mxs_ocotp_writeable_reg(struct device *dev, unsigned int reg) +{ + return false; +} + +static struct nvmem_config ocotp_config = { + .name = "mxs-ocotp", + .owner = THIS_MODULE, +}; + +static const struct regmap_range imx23_ranges[] = { + regmap_reg_range(OCOTP_DATA_OFFSET, 0x210), +}; + +static const struct regmap_access_table imx23_access = { + .yes_ranges = imx23_ranges, + .n_yes_ranges = ARRAY_SIZE(imx23_ranges), +}; + +static const struct regmap_range imx28_ranges[] = { + regmap_reg_range(OCOTP_DATA_OFFSET, 0x290), +}; + +static const struct regmap_access_table imx28_access = { + .yes_ranges = imx28_ranges, + .n_yes_ranges = ARRAY_SIZE(imx28_ranges), +}; + +static struct regmap_bus mxs_ocotp_bus = { + .read = mxs_ocotp_read, + .write = mxs_ocotp_write, /* make regmap_init() happy */ + .reg_format_endian_default = REGMAP_ENDIAN_NATIVE, + .val_format_endian_default = REGMAP_ENDIAN_NATIVE, +}; + +static struct regmap_config mxs_ocotp_config = { + .reg_bits = 32, + .val_bits = 32, + .reg_stride = 16, + .writeable_reg = mxs_ocotp_writeable_reg, +}; + +static const struct of_device_id mxs_ocotp_match[] = { + { .compatible = "fsl,imx23-ocotp", .data = &imx23_access }, + { .compatible = "fsl,imx28-ocotp", .data = &imx28_access }, + { /* sentinel */}, +}; +MODULE_DEVICE_TABLE(of, mxs_ocotp_match); + +static int mxs_ocotp_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct mxs_ocotp *otp; + struct resource *res; + const struct of_device_id *match; + struct regmap *regmap; + const struct regmap_access_table *access; + int ret; + + match = of_match_device(dev->driver->of_match_table, dev); + if (!match || !match->data) + return -EINVAL; + + otp = devm_kzalloc(dev, sizeof(*otp), GFP_KERNEL); + if (!otp) + return -ENOMEM; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + otp->base = devm_ioremap_resource(dev, res); + if (IS_ERR(otp->base)) + return PTR_ERR(otp->base); + + otp->clk = devm_clk_get(&pdev->dev, NULL); + if (IS_ERR(otp->clk)) + return PTR_ERR(otp->clk); + + ret = clk_prepare(otp->clk); + if (ret < 0) { + dev_err(dev, "failed to prepare clk: %d\n", ret); + return ret; + } + + access = match->data; + mxs_ocotp_config.rd_table = access; + mxs_ocotp_config.max_register = access->yes_ranges[0].range_max; + + regmap = devm_regmap_init(dev, &mxs_ocotp_bus, otp, &mxs_ocotp_config); + if (IS_ERR(regmap)) { + dev_err(dev, "regmap init failed\n"); + ret = PTR_ERR(regmap); + goto err_clk; + } + + ocotp_config.dev = dev; + otp->nvmem = nvmem_register(&ocotp_config); + if (IS_ERR(otp->nvmem)) { + ret = PTR_ERR(otp->nvmem); + goto err_clk; + } + + platform_set_drvdata(pdev, otp); + + return 0; + +err_clk: + clk_unprepare(otp->clk); + + return ret; +} + +static int mxs_ocotp_remove(struct platform_device *pdev) +{ + struct mxs_ocotp *otp = platform_get_drvdata(pdev); + + clk_unprepare(otp->clk); + + return nvmem_unregister(otp->nvmem); +} + +static struct platform_driver mxs_ocotp_driver = { + .probe = mxs_ocotp_probe, + .remove = mxs_ocotp_remove, + .driver = { + .name = "mxs-ocotp", + .of_match_table = mxs_ocotp_match, + }, +}; + +module_platform_driver(mxs_ocotp_driver); +MODULE_AUTHOR("Stefan Wahren <stefan.wahren@i2se.com>"); +MODULE_DESCRIPTION("driver for OCOTP in i.MX23/i.MX28"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/nvmem/rockchip-efuse.c b/drivers/nvmem/rockchip-efuse.c new file mode 100644 index 000000000000..f55213424222 --- /dev/null +++ b/drivers/nvmem/rockchip-efuse.c @@ -0,0 +1,186 @@ +/* + * Rockchip eFuse Driver + * + * Copyright (c) 2015 Rockchip Electronics Co. Ltd. + * Author: Caesar Wang <wxt@rock-chips.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include <linux/platform_device.h> +#include <linux/nvmem-provider.h> +#include <linux/slab.h> +#include <linux/regmap.h> +#include <linux/device.h> +#include <linux/io.h> +#include <linux/module.h> +#include <linux/delay.h> +#include <linux/of.h> +#include <linux/clk.h> + +#define EFUSE_A_SHIFT 6 +#define EFUSE_A_MASK 0x3ff +#define EFUSE_PGENB BIT(3) +#define EFUSE_LOAD BIT(2) +#define EFUSE_STROBE BIT(1) +#define EFUSE_CSB BIT(0) + +#define REG_EFUSE_CTRL 0x0000 +#define REG_EFUSE_DOUT 0x0004 + +struct rockchip_efuse_context { + struct device *dev; + void __iomem *base; + struct clk *efuse_clk; +}; + +static int rockchip_efuse_write(void *context, const void *data, size_t count) +{ + /* Nothing TBD, Read-Only */ + return 0; +} + +static int rockchip_efuse_read(void *context, + const void *reg, size_t reg_size, + void *val, size_t val_size) +{ + unsigned int offset = *(u32 *)reg; + struct rockchip_efuse_context *_context = context; + void __iomem *base = _context->base; + struct clk *clk = _context->efuse_clk; + u8 *buf = val; + int ret; + + ret = clk_prepare_enable(clk); + if (ret < 0) { + dev_err(_context->dev, "failed to prepare/enable efuse clk\n"); + return ret; + } + + writel(EFUSE_LOAD | EFUSE_PGENB, base + REG_EFUSE_CTRL); + udelay(1); + while (val_size) { + writel(readl(base + REG_EFUSE_CTRL) & + (~(EFUSE_A_MASK << EFUSE_A_SHIFT)), + base + REG_EFUSE_CTRL); + writel(readl(base + REG_EFUSE_CTRL) | + ((offset & EFUSE_A_MASK) << EFUSE_A_SHIFT), + base + REG_EFUSE_CTRL); + udelay(1); + writel(readl(base + REG_EFUSE_CTRL) | + EFUSE_STROBE, base + REG_EFUSE_CTRL); + udelay(1); + *buf++ = readb(base + REG_EFUSE_DOUT); + writel(readl(base + REG_EFUSE_CTRL) & + (~EFUSE_STROBE), base + REG_EFUSE_CTRL); + udelay(1); + + val_size -= 1; + offset += 1; + } + + /* Switch to standby mode */ + writel(EFUSE_PGENB | EFUSE_CSB, base + REG_EFUSE_CTRL); + + clk_disable_unprepare(clk); + + return 0; +} + +static struct regmap_bus rockchip_efuse_bus = { + .read = rockchip_efuse_read, + .write = rockchip_efuse_write, + .reg_format_endian_default = REGMAP_ENDIAN_NATIVE, + .val_format_endian_default = REGMAP_ENDIAN_NATIVE, +}; + +static struct regmap_config rockchip_efuse_regmap_config = { + .reg_bits = 32, + .reg_stride = 1, + .val_bits = 8, +}; + +static struct nvmem_config econfig = { + .name = "rockchip-efuse", + .owner = THIS_MODULE, + .read_only = true, +}; + +static const struct of_device_id rockchip_efuse_match[] = { + { .compatible = "rockchip,rockchip-efuse",}, + { /* sentinel */}, +}; +MODULE_DEVICE_TABLE(of, rockchip_efuse_match); + +static int rockchip_efuse_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct resource *res; + struct nvmem_device *nvmem; + struct regmap *regmap; + void __iomem *base; + struct clk *clk; + struct rockchip_efuse_context *context; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + base = devm_ioremap_resource(dev, res); + if (IS_ERR(base)) + return PTR_ERR(base); + + context = devm_kzalloc(dev, sizeof(struct rockchip_efuse_context), + GFP_KERNEL); + if (IS_ERR(context)) + return PTR_ERR(context); + + clk = devm_clk_get(dev, "pclk_efuse"); + if (IS_ERR(clk)) + return PTR_ERR(clk); + + context->dev = dev; + context->base = base; + context->efuse_clk = clk; + + rockchip_efuse_regmap_config.max_register = resource_size(res) - 1; + + regmap = devm_regmap_init(dev, &rockchip_efuse_bus, + context, &rockchip_efuse_regmap_config); + if (IS_ERR(regmap)) { + dev_err(dev, "regmap init failed\n"); + return PTR_ERR(regmap); + } + econfig.dev = dev; + nvmem = nvmem_register(&econfig); + if (IS_ERR(nvmem)) + return PTR_ERR(nvmem); + + platform_set_drvdata(pdev, nvmem); + + return 0; +} + +static int rockchip_efuse_remove(struct platform_device *pdev) +{ + struct nvmem_device *nvmem = platform_get_drvdata(pdev); + + return nvmem_unregister(nvmem); +} + +static struct platform_driver rockchip_efuse_driver = { + .probe = rockchip_efuse_probe, + .remove = rockchip_efuse_remove, + .driver = { + .name = "rockchip-efuse", + .of_match_table = rockchip_efuse_match, + }, +}; + +module_platform_driver(rockchip_efuse_driver); +MODULE_DESCRIPTION("rockchip_efuse driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/nvmem/vf610-ocotp.c b/drivers/nvmem/vf610-ocotp.c new file mode 100644 index 000000000000..8641319efeda --- /dev/null +++ b/drivers/nvmem/vf610-ocotp.c @@ -0,0 +1,302 @@ +/* + * Copyright (C) 2015 Toradex AG. + * + * Author: Sanchayan Maity <sanchayan.maity@toradex.com> + * + * Based on the barebox ocotp driver, + * Copyright (c) 2010 Baruch Siach <baruch@tkos.co.il> + * Orex Computed Radiography + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/clk.h> +#include <linux/delay.h> +#include <linux/device.h> +#include <linux/io.h> +#include <linux/module.h> +#include <linux/nvmem-provider.h> +#include <linux/of.h> +#include <linux/platform_device.h> +#include <linux/regmap.h> +#include <linux/slab.h> + +/* OCOTP Register Offsets */ +#define OCOTP_CTRL_REG 0x00 +#define OCOTP_CTRL_SET 0x04 +#define OCOTP_CTRL_CLR 0x08 +#define OCOTP_TIMING 0x10 +#define OCOTP_DATA 0x20 +#define OCOTP_READ_CTRL_REG 0x30 +#define OCOTP_READ_FUSE_DATA 0x40 + +/* OCOTP Register bits and masks */ +#define OCOTP_CTRL_WR_UNLOCK 16 +#define OCOTP_CTRL_WR_UNLOCK_KEY 0x3E77 +#define OCOTP_CTRL_WR_UNLOCK_MASK GENMASK(31, 16) +#define OCOTP_CTRL_ADDR 0 +#define OCOTP_CTRL_ADDR_MASK GENMASK(6, 0) +#define OCOTP_CTRL_RELOAD_SHADOWS BIT(10) +#define OCOTP_CTRL_ERR BIT(9) +#define OCOTP_CTRL_BUSY BIT(8) + +#define OCOTP_TIMING_STROBE_READ 16 +#define OCOTP_TIMING_STROBE_READ_MASK GENMASK(21, 16) +#define OCOTP_TIMING_RELAX 12 +#define OCOTP_TIMING_RELAX_MASK GENMASK(15, 12) +#define OCOTP_TIMING_STROBE_PROG 0 +#define OCOTP_TIMING_STROBE_PROG_MASK GENMASK(11, 0) + +#define OCOTP_READ_CTRL_READ_FUSE 0x1 + +#define VF610_OCOTP_TIMEOUT 100000 + +#define BF(value, field) (((value) << field) & field##_MASK) + +#define DEF_RELAX 20 + +static const int base_to_fuse_addr_mappings[][2] = { + {0x400, 0x00}, + {0x410, 0x01}, + {0x420, 0x02}, + {0x450, 0x05}, + {0x4F0, 0x0F}, + {0x600, 0x20}, + {0x610, 0x21}, + {0x620, 0x22}, + {0x630, 0x23}, + {0x640, 0x24}, + {0x650, 0x25}, + {0x660, 0x26}, + {0x670, 0x27}, + {0x6F0, 0x2F}, + {0x880, 0x38}, + {0x890, 0x39}, + {0x8A0, 0x3A}, + {0x8B0, 0x3B}, + {0x8C0, 0x3C}, + {0x8D0, 0x3D}, + {0x8E0, 0x3E}, + {0x8F0, 0x3F}, + {0xC80, 0x78}, + {0xC90, 0x79}, + {0xCA0, 0x7A}, + {0xCB0, 0x7B}, + {0xCC0, 0x7C}, + {0xCD0, 0x7D}, + {0xCE0, 0x7E}, + {0xCF0, 0x7F}, +}; + +struct vf610_ocotp { + void __iomem *base; + struct clk *clk; + struct device *dev; + struct nvmem_device *nvmem; + int timing; +}; + +static int vf610_ocotp_wait_busy(void __iomem *base) +{ + int timeout = VF610_OCOTP_TIMEOUT; + + while ((readl(base) & OCOTP_CTRL_BUSY) && --timeout) + udelay(10); + + if (!timeout) { + writel(OCOTP_CTRL_ERR, base + OCOTP_CTRL_CLR); + return -ETIMEDOUT; + } + + udelay(10); + + return 0; +} + +static int vf610_ocotp_calculate_timing(struct vf610_ocotp *ocotp_dev) +{ + u32 clk_rate; + u32 relax, strobe_read, strobe_prog; + u32 timing; + + clk_rate = clk_get_rate(ocotp_dev->clk); + + /* Refer section OTP read/write timing parameters in TRM */ + relax = clk_rate / (1000000000 / DEF_RELAX) - 1; + strobe_prog = clk_rate / (1000000000 / 10000) + 2 * (DEF_RELAX + 1) - 1; + strobe_read = clk_rate / (1000000000 / 40) + 2 * (DEF_RELAX + 1) - 1; + + timing = BF(relax, OCOTP_TIMING_RELAX); + timing |= BF(strobe_read, OCOTP_TIMING_STROBE_READ); + timing |= BF(strobe_prog, OCOTP_TIMING_STROBE_PROG); + + return timing; +} + +static int vf610_get_fuse_address(int base_addr_offset) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(base_to_fuse_addr_mappings); i++) { + if (base_to_fuse_addr_mappings[i][0] == base_addr_offset) + return base_to_fuse_addr_mappings[i][1]; + } + + return -EINVAL; +} + +static int vf610_ocotp_write(void *context, const void *data, size_t count) +{ + return 0; +} + +static int vf610_ocotp_read(void *context, + const void *off, size_t reg_size, + void *val, size_t val_size) +{ + struct vf610_ocotp *ocotp = context; + void __iomem *base = ocotp->base; + unsigned int offset = *(u32 *)off; + u32 reg, *buf = val; + int fuse_addr; + int ret; + + while (val_size > 0) { + fuse_addr = vf610_get_fuse_address(offset); + if (fuse_addr > 0) { + writel(ocotp->timing, base + OCOTP_TIMING); + ret = vf610_ocotp_wait_busy(base + OCOTP_CTRL_REG); + if (ret) + return ret; + + reg = readl(base + OCOTP_CTRL_REG); + reg &= ~OCOTP_CTRL_ADDR_MASK; + reg &= ~OCOTP_CTRL_WR_UNLOCK_MASK; + reg |= BF(fuse_addr, OCOTP_CTRL_ADDR); + writel(reg, base + OCOTP_CTRL_REG); + + writel(OCOTP_READ_CTRL_READ_FUSE, + base + OCOTP_READ_CTRL_REG); + ret = vf610_ocotp_wait_busy(base + OCOTP_CTRL_REG); + if (ret) + return ret; + + if (readl(base) & OCOTP_CTRL_ERR) { + dev_dbg(ocotp->dev, "Error reading from fuse address %x\n", + fuse_addr); + writel(OCOTP_CTRL_ERR, base + OCOTP_CTRL_CLR); + } + + /* + * In case of error, we do not abort and expect to read + * 0xBADABADA as mentioned by the TRM. We just read this + * value and return. + */ + *buf = readl(base + OCOTP_READ_FUSE_DATA); + } else { + *buf = 0; + } + + buf++; + val_size--; + offset += reg_size; + } + + return 0; +} + +static struct regmap_bus vf610_ocotp_bus = { + .read = vf610_ocotp_read, + .write = vf610_ocotp_write, + .reg_format_endian_default = REGMAP_ENDIAN_NATIVE, + .val_format_endian_default = REGMAP_ENDIAN_NATIVE, +}; + +static struct regmap_config ocotp_regmap_config = { + .reg_bits = 32, + .val_bits = 32, + .reg_stride = 4, +}; + +static struct nvmem_config ocotp_config = { + .name = "ocotp", + .owner = THIS_MODULE, +}; + +static const struct of_device_id ocotp_of_match[] = { + { .compatible = "fsl,vf610-ocotp", }, + {/* sentinel */}, +}; +MODULE_DEVICE_TABLE(of, ocotp_of_match); + +static int vf610_ocotp_remove(struct platform_device *pdev) +{ + struct vf610_ocotp *ocotp_dev = platform_get_drvdata(pdev); + + return nvmem_unregister(ocotp_dev->nvmem); +} + +static int vf610_ocotp_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct resource *res; + struct regmap *regmap; + struct vf610_ocotp *ocotp_dev; + + ocotp_dev = devm_kzalloc(&pdev->dev, + sizeof(struct vf610_ocotp), GFP_KERNEL); + if (!ocotp_dev) + return -ENOMEM; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + ocotp_dev->base = devm_ioremap_resource(dev, res); + if (IS_ERR(ocotp_dev->base)) + return PTR_ERR(ocotp_dev->base); + + ocotp_dev->clk = devm_clk_get(dev, NULL); + if (IS_ERR(ocotp_dev->clk)) { + dev_err(dev, "failed getting clock, err = %ld\n", + PTR_ERR(ocotp_dev->clk)); + return PTR_ERR(ocotp_dev->clk); + } + + ocotp_regmap_config.max_register = resource_size(res); + regmap = devm_regmap_init(dev, + &vf610_ocotp_bus, ocotp_dev, &ocotp_regmap_config); + if (IS_ERR(regmap)) { + dev_err(dev, "regmap init failed\n"); + return PTR_ERR(regmap); + } + ocotp_config.dev = dev; + + ocotp_dev->nvmem = nvmem_register(&ocotp_config); + if (IS_ERR(ocotp_dev->nvmem)) + return PTR_ERR(ocotp_dev->nvmem); + + ocotp_dev->dev = dev; + platform_set_drvdata(pdev, ocotp_dev); + + ocotp_dev->timing = vf610_ocotp_calculate_timing(ocotp_dev); + + return 0; +} + +static struct platform_driver vf610_ocotp_driver = { + .probe = vf610_ocotp_probe, + .remove = vf610_ocotp_remove, + .driver = { + .name = "vf610-ocotp", + .of_match_table = ocotp_of_match, + }, +}; +module_platform_driver(vf610_ocotp_driver); +MODULE_AUTHOR("Sanchayan Maity <sanchayan.maity@toradex.com>"); +MODULE_DESCRIPTION("Vybrid OCOTP driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/pcmcia/ds.c b/drivers/pcmcia/ds.c index 0decee6c556e..489ea1098c96 100644 --- a/drivers/pcmcia/ds.c +++ b/drivers/pcmcia/ds.c @@ -468,12 +468,10 @@ static int pcmcia_device_query(struct pcmcia_device *p_dev) if ((length < 2) || (length > 255)) continue; - new = kmalloc(sizeof(char) * length, GFP_KERNEL); + new = kstrdup(tmp, GFP_KERNEL); if (!new) continue; - new = strncpy(new, tmp, length); - tmp = p_dev->prod_id[i]; p_dev->prod_id[i] = new; kfree(tmp); diff --git a/drivers/spmi/spmi-pmic-arb.c b/drivers/spmi/spmi-pmic-arb.c index 4a3cf9ba152f..96615d807694 100644 --- a/drivers/spmi/spmi-pmic-arb.c +++ b/drivers/spmi/spmi-pmic-arb.c @@ -168,11 +168,6 @@ struct pmic_arb_ver_ops { u32 (*irq_clear)(u8 n); }; -static inline u32 pmic_arb_base_read(struct spmi_pmic_arb_dev *dev, u32 offset) -{ - return readl_relaxed(dev->rd_base + offset); -} - static inline void pmic_arb_base_write(struct spmi_pmic_arb_dev *dev, u32 offset, u32 val) { @@ -193,7 +188,7 @@ static inline void pmic_arb_set_rd_cmd(struct spmi_pmic_arb_dev *dev, */ static void pa_read_data(struct spmi_pmic_arb_dev *dev, u8 *buf, u32 reg, u8 bc) { - u32 data = pmic_arb_base_read(dev, reg); + u32 data = __raw_readl(dev->rd_base + reg); memcpy(buf, &data, (bc & 3) + 1); } @@ -208,7 +203,7 @@ pa_write_data(struct spmi_pmic_arb_dev *dev, const u8 *buf, u32 reg, u8 bc) { u32 data = 0; memcpy(&data, buf, (bc & 3) + 1); - pmic_arb_base_write(dev, reg, data); + __raw_writel(data, dev->wr_base + reg); } static int pmic_arb_wait_for_done(struct spmi_controller *ctrl, @@ -365,7 +360,7 @@ static int pmic_arb_write_cmd(struct spmi_controller *ctrl, u8 opc, u8 sid, opc = PMIC_ARB_OP_EXT_WRITE; else if (opc >= 0x30 && opc <= 0x37) opc = PMIC_ARB_OP_EXT_WRITEL; - else if (opc >= 0x80 && opc <= 0xFF) + else if (opc >= 0x80) opc = PMIC_ARB_OP_ZERO_WRITE; else return -EINVAL; diff --git a/drivers/spmi/spmi.c b/drivers/spmi/spmi.c index 11467e17bdd8..6b3da1bb0d63 100644 --- a/drivers/spmi/spmi.c +++ b/drivers/spmi/spmi.c @@ -560,12 +560,13 @@ EXPORT_SYMBOL_GPL(spmi_controller_remove); * This API will register the client driver with the SPMI framework. * It is typically called from the driver's module-init function. */ -int spmi_driver_register(struct spmi_driver *sdrv) +int __spmi_driver_register(struct spmi_driver *sdrv, struct module *owner) { sdrv->driver.bus = &spmi_bus_type; + sdrv->driver.owner = owner; return driver_register(&sdrv->driver); } -EXPORT_SYMBOL_GPL(spmi_driver_register); +EXPORT_SYMBOL_GPL(__spmi_driver_register); static void __exit spmi_exit(void) { diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c index 8196581f54c2..bcc1fc027311 100644 --- a/drivers/uio/uio.c +++ b/drivers/uio/uio.c @@ -524,6 +524,7 @@ static ssize_t uio_read(struct file *filep, char __user *buf, event_count = atomic_read(&idev->event); if (event_count != listener->event_count) { + __set_current_state(TASK_RUNNING); if (copy_to_user(buf, &event_count, count)) retval = -EFAULT; else { diff --git a/drivers/uio/uio_fsl_elbc_gpcm.c b/drivers/uio/uio_fsl_elbc_gpcm.c index 2bcf80c159c1..b46323d9dc18 100644 --- a/drivers/uio/uio_fsl_elbc_gpcm.c +++ b/drivers/uio/uio_fsl_elbc_gpcm.c @@ -470,6 +470,7 @@ static const struct of_device_id uio_fsl_elbc_gpcm_match[] = { { .compatible = "fsl,elbc-gpcm-uio", }, {} }; +MODULE_DEVICE_TABLE(of, uio_fsl_elbc_gpcm_match); static struct platform_driver uio_fsl_elbc_gpcm_driver = { .driver = { diff --git a/drivers/w1/masters/omap_hdq.c b/drivers/w1/masters/omap_hdq.c index e7d448963a24..0e2f43bccf1f 100644 --- a/drivers/w1/masters/omap_hdq.c +++ b/drivers/w1/masters/omap_hdq.c @@ -17,6 +17,7 @@ #include <linux/io.h> #include <linux/sched.h> #include <linux/pm_runtime.h> +#include <linux/of.h> #include "../w1.h" #include "../w1_int.h" @@ -27,21 +28,23 @@ #define OMAP_HDQ_TX_DATA 0x04 #define OMAP_HDQ_RX_DATA 0x08 #define OMAP_HDQ_CTRL_STATUS 0x0c -#define OMAP_HDQ_CTRL_STATUS_INTERRUPTMASK (1<<6) -#define OMAP_HDQ_CTRL_STATUS_CLOCKENABLE (1<<5) -#define OMAP_HDQ_CTRL_STATUS_GO (1<<4) -#define OMAP_HDQ_CTRL_STATUS_INITIALIZATION (1<<2) -#define OMAP_HDQ_CTRL_STATUS_DIR (1<<1) -#define OMAP_HDQ_CTRL_STATUS_MODE (1<<0) +#define OMAP_HDQ_CTRL_STATUS_SINGLE BIT(7) +#define OMAP_HDQ_CTRL_STATUS_INTERRUPTMASK BIT(6) +#define OMAP_HDQ_CTRL_STATUS_CLOCKENABLE BIT(5) +#define OMAP_HDQ_CTRL_STATUS_GO BIT(4) +#define OMAP_HDQ_CTRL_STATUS_PRESENCE BIT(3) +#define OMAP_HDQ_CTRL_STATUS_INITIALIZATION BIT(2) +#define OMAP_HDQ_CTRL_STATUS_DIR BIT(1) #define OMAP_HDQ_INT_STATUS 0x10 -#define OMAP_HDQ_INT_STATUS_TXCOMPLETE (1<<2) -#define OMAP_HDQ_INT_STATUS_RXCOMPLETE (1<<1) -#define OMAP_HDQ_INT_STATUS_TIMEOUT (1<<0) +#define OMAP_HDQ_INT_STATUS_TXCOMPLETE BIT(2) +#define OMAP_HDQ_INT_STATUS_RXCOMPLETE BIT(1) +#define OMAP_HDQ_INT_STATUS_TIMEOUT BIT(0) #define OMAP_HDQ_SYSCONFIG 0x14 -#define OMAP_HDQ_SYSCONFIG_SOFTRESET (1<<1) -#define OMAP_HDQ_SYSCONFIG_AUTOIDLE (1<<0) +#define OMAP_HDQ_SYSCONFIG_SOFTRESET BIT(1) +#define OMAP_HDQ_SYSCONFIG_AUTOIDLE BIT(0) +#define OMAP_HDQ_SYSCONFIG_NOIDLE 0x0 #define OMAP_HDQ_SYSSTATUS 0x18 -#define OMAP_HDQ_SYSSTATUS_RESETDONE (1<<0) +#define OMAP_HDQ_SYSSTATUS_RESETDONE BIT(0) #define OMAP_HDQ_FLAG_CLEAR 0 #define OMAP_HDQ_FLAG_SET 1 @@ -67,6 +70,10 @@ struct hdq_data { * the data wrire or read. */ int init_trans; + int rrw; + /* mode: 0-HDQ 1-W1 */ + int mode; + }; static int omap_hdq_probe(struct platform_device *pdev); @@ -74,6 +81,7 @@ static int omap_hdq_remove(struct platform_device *pdev); static const struct of_device_id omap_hdq_dt_ids[] = { { .compatible = "ti,omap3-1w" }, + { .compatible = "ti,am4372-hdq" }, {} }; MODULE_DEVICE_TABLE(of, omap_hdq_dt_ids); @@ -90,15 +98,12 @@ static struct platform_driver omap_hdq_driver = { static u8 omap_w1_read_byte(void *_hdq); static void omap_w1_write_byte(void *_hdq, u8 byte); static u8 omap_w1_reset_bus(void *_hdq); -static void omap_w1_search_bus(void *_hdq, struct w1_master *master_dev, - u8 search_type, w1_slave_found_callback slave_found); static struct w1_bus_master omap_w1_master = { .read_byte = omap_w1_read_byte, .write_byte = omap_w1_write_byte, .reset_bus = omap_w1_reset_bus, - .search = omap_w1_search_bus, }; /* HDQ register I/O routines */ @@ -122,6 +127,15 @@ static inline u8 hdq_reg_merge(struct hdq_data *hdq_data, u32 offset, return new_val; } +static void hdq_disable_interrupt(struct hdq_data *hdq_data, u32 offset, + u32 mask) +{ + u32 ie; + + ie = readl(hdq_data->hdq_base + offset); + writel(ie & mask, hdq_data->hdq_base + offset); +} + /* * Wait for one or more bits in flag change. * HDQ_FLAG_SET: wait until any bit in the flag is set. @@ -229,13 +243,7 @@ static irqreturn_t hdq_isr(int irq, void *_hdq) return IRQ_HANDLED; } -/* HDQ Mode: always return success */ -static u8 omap_w1_reset_bus(void *_hdq) -{ - return 0; -} - -/* W1 search callback function */ +/* W1 search callback function in HDQ mode */ static void omap_w1_search_bus(void *_hdq, struct w1_master *master_dev, u8 search_type, w1_slave_found_callback slave_found) { @@ -262,9 +270,10 @@ static int _omap_hdq_reset(struct hdq_data *hdq_data) int ret; u8 tmp_status; - hdq_reg_out(hdq_data, OMAP_HDQ_SYSCONFIG, OMAP_HDQ_SYSCONFIG_SOFTRESET); + hdq_reg_out(hdq_data, OMAP_HDQ_SYSCONFIG, + OMAP_HDQ_SYSCONFIG_SOFTRESET); /* - * Select HDQ mode & enable clocks. + * Select HDQ/1W mode & enable clocks. * It is observed that INT flags can't be cleared via a read and GO/INIT * won't return to zero if interrupt is disabled. So we always enable * interrupt. @@ -282,7 +291,8 @@ static int _omap_hdq_reset(struct hdq_data *hdq_data) else { hdq_reg_out(hdq_data, OMAP_HDQ_CTRL_STATUS, OMAP_HDQ_CTRL_STATUS_CLOCKENABLE | - OMAP_HDQ_CTRL_STATUS_INTERRUPTMASK); + OMAP_HDQ_CTRL_STATUS_INTERRUPTMASK | + hdq_data->mode); hdq_reg_out(hdq_data, OMAP_HDQ_SYSCONFIG, OMAP_HDQ_SYSCONFIG_AUTOIDLE); } @@ -334,6 +344,18 @@ static int omap_hdq_break(struct hdq_data *hdq_data) ret = -ETIMEDOUT; goto out; } + + /* + * check for the presence detect bit to get + * set to show that the slave is responding + */ + if (!(hdq_reg_in(hdq_data, OMAP_HDQ_CTRL_STATUS) & + OMAP_HDQ_CTRL_STATUS_PRESENCE)) { + dev_dbg(hdq_data->dev, "Presence bit not set\n"); + ret = -ETIMEDOUT; + goto out; + } + /* * wait for both INIT and GO bits rerurn to zero. * zero wait time expected for interrupt mode. @@ -368,6 +390,8 @@ static int hdq_read_byte(struct hdq_data *hdq_data, u8 *val) goto out; } + hdq_data->hdq_irqstatus = 0; + if (!(hdq_data->hdq_irqstatus & OMAP_HDQ_INT_STATUS_RXCOMPLETE)) { hdq_reg_merge(hdq_data, OMAP_HDQ_CTRL_STATUS, OMAP_HDQ_CTRL_STATUS_DIR | OMAP_HDQ_CTRL_STATUS_GO, @@ -400,7 +424,7 @@ rtn: } -/* Enable clocks and set the controller to HDQ mode */ +/* Enable clocks and set the controller to HDQ/1W mode */ static int omap_hdq_get(struct hdq_data *hdq_data) { int ret = 0; @@ -422,7 +446,7 @@ static int omap_hdq_get(struct hdq_data *hdq_data) pm_runtime_get_sync(hdq_data->dev); - /* make sure HDQ is out of reset */ + /* make sure HDQ/1W is out of reset */ if (!(hdq_reg_in(hdq_data, OMAP_HDQ_SYSSTATUS) & OMAP_HDQ_SYSSTATUS_RESETDONE)) { ret = _omap_hdq_reset(hdq_data); @@ -430,12 +454,13 @@ static int omap_hdq_get(struct hdq_data *hdq_data) /* back up the count */ hdq_data->hdq_usecount--; } else { - /* select HDQ mode & enable clocks */ + /* select HDQ/1W mode & enable clocks */ hdq_reg_out(hdq_data, OMAP_HDQ_CTRL_STATUS, OMAP_HDQ_CTRL_STATUS_CLOCKENABLE | - OMAP_HDQ_CTRL_STATUS_INTERRUPTMASK); + OMAP_HDQ_CTRL_STATUS_INTERRUPTMASK | + hdq_data->mode); hdq_reg_out(hdq_data, OMAP_HDQ_SYSCONFIG, - OMAP_HDQ_SYSCONFIG_AUTOIDLE); + OMAP_HDQ_SYSCONFIG_NOIDLE); hdq_reg_in(hdq_data, OMAP_HDQ_INT_STATUS); } } @@ -456,6 +481,8 @@ static int omap_hdq_put(struct hdq_data *hdq_data) if (ret < 0) return -EINTR; + hdq_reg_out(hdq_data, OMAP_HDQ_SYSCONFIG, + OMAP_HDQ_SYSCONFIG_AUTOIDLE); if (0 == hdq_data->hdq_usecount) { dev_dbg(hdq_data->dev, "attempt to decrement use count" " when it is zero"); @@ -471,6 +498,100 @@ static int omap_hdq_put(struct hdq_data *hdq_data) return ret; } +/* + * W1 triplet callback function - used for searching ROM addresses. + * Registered only when controller is in 1-wire mode. + */ +static u8 omap_w1_triplet(void *_hdq, u8 bdir) +{ + u8 id_bit, comp_bit; + int err; + u8 ret = 0x3; /* no slaves responded */ + struct hdq_data *hdq_data = _hdq; + u8 ctrl = OMAP_HDQ_CTRL_STATUS_SINGLE | OMAP_HDQ_CTRL_STATUS_GO | + OMAP_HDQ_CTRL_STATUS_INTERRUPTMASK; + u8 mask = ctrl | OMAP_HDQ_CTRL_STATUS_DIR; + + omap_hdq_get(_hdq); + + err = mutex_lock_interruptible(&hdq_data->hdq_mutex); + if (err < 0) { + dev_dbg(hdq_data->dev, "Could not acquire mutex\n"); + goto rtn; + } + + hdq_data->hdq_irqstatus = 0; + /* read id_bit */ + hdq_reg_merge(_hdq, OMAP_HDQ_CTRL_STATUS, + ctrl | OMAP_HDQ_CTRL_STATUS_DIR, mask); + err = wait_event_timeout(hdq_wait_queue, + (hdq_data->hdq_irqstatus + & OMAP_HDQ_INT_STATUS_RXCOMPLETE), + OMAP_HDQ_TIMEOUT); + if (err == 0) { + dev_dbg(hdq_data->dev, "RX wait elapsed\n"); + goto out; + } + id_bit = (hdq_reg_in(_hdq, OMAP_HDQ_RX_DATA) & 0x01); + + hdq_data->hdq_irqstatus = 0; + /* read comp_bit */ + hdq_reg_merge(_hdq, OMAP_HDQ_CTRL_STATUS, + ctrl | OMAP_HDQ_CTRL_STATUS_DIR, mask); + err = wait_event_timeout(hdq_wait_queue, + (hdq_data->hdq_irqstatus + & OMAP_HDQ_INT_STATUS_RXCOMPLETE), + OMAP_HDQ_TIMEOUT); + if (err == 0) { + dev_dbg(hdq_data->dev, "RX wait elapsed\n"); + goto out; + } + comp_bit = (hdq_reg_in(_hdq, OMAP_HDQ_RX_DATA) & 0x01); + + if (id_bit && comp_bit) { + ret = 0x03; /* no slaves responded */ + goto out; + } + if (!id_bit && !comp_bit) { + /* Both bits are valid, take the direction given */ + ret = bdir ? 0x04 : 0; + } else { + /* Only one bit is valid, take that direction */ + bdir = id_bit; + ret = id_bit ? 0x05 : 0x02; + } + + /* write bdir bit */ + hdq_reg_out(_hdq, OMAP_HDQ_TX_DATA, bdir); + hdq_reg_merge(_hdq, OMAP_HDQ_CTRL_STATUS, ctrl, mask); + err = wait_event_timeout(hdq_wait_queue, + (hdq_data->hdq_irqstatus + & OMAP_HDQ_INT_STATUS_TXCOMPLETE), + OMAP_HDQ_TIMEOUT); + if (err == 0) { + dev_dbg(hdq_data->dev, "TX wait elapsed\n"); + goto out; + } + + hdq_reg_merge(_hdq, OMAP_HDQ_CTRL_STATUS, 0, + OMAP_HDQ_CTRL_STATUS_SINGLE); + +out: + mutex_unlock(&hdq_data->hdq_mutex); +rtn: + omap_hdq_put(_hdq); + return ret; +} + +/* reset callback */ +static u8 omap_w1_reset_bus(void *_hdq) +{ + omap_hdq_get(_hdq); + omap_hdq_break(_hdq); + omap_hdq_put(_hdq); + return 0; +} + /* Read a byte of data from the device */ static u8 omap_w1_read_byte(void *_hdq) { @@ -478,6 +599,10 @@ static u8 omap_w1_read_byte(void *_hdq) u8 val = 0; int ret; + /* First write to initialize the transfer */ + if (hdq_data->init_trans == 0) + omap_hdq_get(hdq_data); + ret = hdq_read_byte(hdq_data, &val); if (ret) { ret = mutex_lock_interruptible(&hdq_data->hdq_mutex); @@ -491,6 +616,10 @@ static u8 omap_w1_read_byte(void *_hdq) return -1; } + hdq_disable_interrupt(hdq_data, OMAP_HDQ_CTRL_STATUS, + ~OMAP_HDQ_CTRL_STATUS_INTERRUPTMASK); + hdq_data->hdq_usecount = 0; + /* Write followed by a read, release the module */ if (hdq_data->init_trans) { ret = mutex_lock_interruptible(&hdq_data->hdq_mutex); @@ -517,6 +646,14 @@ static void omap_w1_write_byte(void *_hdq, u8 byte) if (hdq_data->init_trans == 0) omap_hdq_get(hdq_data); + /* + * We need to reset the slave before + * issuing the SKIP ROM command, else + * the slave will not work. + */ + if (byte == W1_SKIP_ROM) + omap_hdq_break(hdq_data); + ret = mutex_lock_interruptible(&hdq_data->hdq_mutex); if (ret < 0) { dev_dbg(hdq_data->dev, "Could not acquire mutex\n"); @@ -551,6 +688,7 @@ static int omap_hdq_probe(struct platform_device *pdev) struct resource *res; int ret, irq; u8 rev; + const char *mode; hdq_data = devm_kzalloc(dev, sizeof(*hdq_data), GFP_KERNEL); if (!hdq_data) { @@ -567,10 +705,21 @@ static int omap_hdq_probe(struct platform_device *pdev) return PTR_ERR(hdq_data->hdq_base); hdq_data->hdq_usecount = 0; + hdq_data->rrw = 0; mutex_init(&hdq_data->hdq_mutex); pm_runtime_enable(&pdev->dev); - pm_runtime_get_sync(&pdev->dev); + ret = pm_runtime_get_sync(&pdev->dev); + if (ret < 0) { + dev_dbg(&pdev->dev, "pm_runtime_get_sync failed\n"); + goto err_w1; + } + + ret = _omap_hdq_reset(hdq_data); + if (ret) { + dev_dbg(&pdev->dev, "reset failed\n"); + return -EINVAL; + } rev = hdq_reg_in(hdq_data, OMAP_HDQ_REVISION); dev_info(&pdev->dev, "OMAP HDQ Hardware Rev %c.%c. Driver in %s mode\n", @@ -594,6 +743,15 @@ static int omap_hdq_probe(struct platform_device *pdev) pm_runtime_put_sync(&pdev->dev); + ret = of_property_read_string(pdev->dev.of_node, "ti,mode", &mode); + if (ret < 0 || !strcmp(mode, "hdq")) { + hdq_data->mode = 0; + omap_w1_master.search = omap_w1_search_bus; + } else { + hdq_data->mode = 1; + omap_w1_master.triplet = omap_w1_triplet; + } + omap_w1_master.data = hdq_data; ret = w1_add_master_device(&omap_w1_master); @@ -635,8 +793,8 @@ static int omap_hdq_remove(struct platform_device *pdev) module_platform_driver(omap_hdq_driver); module_param(w1_id, int, S_IRUSR); -MODULE_PARM_DESC(w1_id, "1-wire id for the slave detection"); +MODULE_PARM_DESC(w1_id, "1-wire id for the slave detection in HDQ mode"); MODULE_AUTHOR("Texas Instruments"); -MODULE_DESCRIPTION("HDQ driver Library"); +MODULE_DESCRIPTION("HDQ-1W driver Library"); MODULE_LICENSE("GPL"); diff --git a/drivers/w1/w1_int.c b/drivers/w1/w1_int.c index 47249a30eae3..20f766afa4c7 100644 --- a/drivers/w1/w1_int.c +++ b/drivers/w1/w1_int.c @@ -91,8 +91,7 @@ static struct w1_master *w1_alloc_dev(u32 id, int slave_count, int slave_ttl, err = device_register(&dev->dev); if (err) { pr_err("Failed to register master device. err=%d\n", err); - memset(dev, 0, sizeof(struct w1_master)); - kfree(dev); + put_device(&dev->dev); dev = NULL; } |