diff options
-rw-r--r-- | Documentation/driver-api/ntb.rst | 27 | ||||
-rw-r--r-- | drivers/ntb/Kconfig | 11 | ||||
-rw-r--r-- | drivers/ntb/Makefile | 3 | ||||
-rw-r--r-- | drivers/ntb/core.c (renamed from drivers/ntb/ntb.c) | 0 | ||||
-rw-r--r-- | drivers/ntb/hw/amd/ntb_hw_amd.c | 10 | ||||
-rw-r--r-- | drivers/ntb/hw/intel/ntb_hw_gen3.c | 6 | ||||
-rw-r--r-- | drivers/ntb/hw/mscc/ntb_hw_switchtec.c | 82 | ||||
-rw-r--r-- | drivers/ntb/msi.c | 415 | ||||
-rw-r--r-- | drivers/ntb/ntb_transport.c | 170 | ||||
-rw-r--r-- | drivers/ntb/test/Kconfig | 9 | ||||
-rw-r--r-- | drivers/ntb/test/Makefile | 1 | ||||
-rw-r--r-- | drivers/ntb/test/ntb_msi_test.c | 433 | ||||
-rw-r--r-- | drivers/ntb/test/ntb_perf.c | 14 | ||||
-rw-r--r-- | drivers/pci/msi.c | 54 | ||||
-rw-r--r-- | drivers/pci/switch/switchtec.c | 12 | ||||
-rw-r--r-- | include/linux/msi.h | 8 | ||||
-rw-r--r-- | include/linux/ntb.h | 200 | ||||
-rw-r--r-- | include/linux/pci.h | 9 | ||||
-rwxr-xr-x | tools/testing/selftests/ntb/ntb_test.sh | 54 |
19 files changed, 1458 insertions, 60 deletions
diff --git a/Documentation/driver-api/ntb.rst b/Documentation/driver-api/ntb.rst index 074a423c853c..87d1372da879 100644 --- a/Documentation/driver-api/ntb.rst +++ b/Documentation/driver-api/ntb.rst @@ -200,6 +200,33 @@ Debugfs Files: This file is used to read and write peer scratchpads. See *spad* for details. +NTB MSI Test Client (ntb\_msi\_test) +------------------------------------ + +The MSI test client serves to test and debug the MSI library which +allows for passing MSI interrupts across NTB memory windows. The +test client is interacted with through the debugfs filesystem: + +* *debugfs*/ntb\_tool/*hw*/ + A directory in debugfs will be created for each + NTB device probed by the tool. This directory is shortened to *hw* + below. +* *hw*/port + This file describes the local port number +* *hw*/irq*_occurrences + One occurrences file exists for each interrupt and, when read, + returns the number of times the interrupt has been triggered. +* *hw*/peer*/port + This file describes the port number for each peer +* *hw*/peer*/count + This file describes the number of interrupts that can be + triggered on each peer +* *hw*/peer*/trigger + Writing an interrupt number (any number less than the value + specified in count) will trigger the interrupt on the + specified peer. That peer's interrupt's occurrence file + should be incremented. + NTB Hardware Drivers ==================== diff --git a/drivers/ntb/Kconfig b/drivers/ntb/Kconfig index c99eed87382a..df16c755b4da 100644 --- a/drivers/ntb/Kconfig +++ b/drivers/ntb/Kconfig @@ -13,6 +13,17 @@ menuconfig NTB if NTB +config NTB_MSI + bool "MSI Interrupt Support" + depends on PCI_MSI + help + Support using MSI interrupt forwarding instead of (or in addition to) + hardware doorbells. MSI interrupts typically offer lower latency + than doorbells and more MSI interrupts can be made available to + clients. However this requires an extra memory window and support + in the hardware driver for creating the MSI interrupts. + + If unsure, say N. source "drivers/ntb/hw/Kconfig" source "drivers/ntb/test/Kconfig" diff --git a/drivers/ntb/Makefile b/drivers/ntb/Makefile index 5c64438d5b3f..3a6fa181ff99 100644 --- a/drivers/ntb/Makefile +++ b/drivers/ntb/Makefile @@ -1,3 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_NTB) += ntb.o hw/ test/ obj-$(CONFIG_NTB_TRANSPORT) += ntb_transport.o + +ntb-y := core.o +ntb-$(CONFIG_NTB_MSI) += msi.o diff --git a/drivers/ntb/ntb.c b/drivers/ntb/core.c index 2581ab724c34..2581ab724c34 100644 --- a/drivers/ntb/ntb.c +++ b/drivers/ntb/core.c diff --git a/drivers/ntb/hw/amd/ntb_hw_amd.c b/drivers/ntb/hw/amd/ntb_hw_amd.c index efb214fc545a..2859cc99b73e 100644 --- a/drivers/ntb/hw/amd/ntb_hw_amd.c +++ b/drivers/ntb/hw/amd/ntb_hw_amd.c @@ -160,8 +160,8 @@ static int amd_ntb_mw_set_trans(struct ntb_dev *ntb, int pidx, int idx, } /* set and verify setting the limit */ - write64(limit, mmio + limit_reg); - reg_val = read64(mmio + limit_reg); + write64(limit, peer_mmio + limit_reg); + reg_val = read64(peer_mmio + limit_reg); if (reg_val != limit) { write64(base_addr, mmio + limit_reg); write64(0, peer_mmio + xlat_reg); @@ -183,8 +183,8 @@ static int amd_ntb_mw_set_trans(struct ntb_dev *ntb, int pidx, int idx, } /* set and verify setting the limit */ - writel(limit, mmio + limit_reg); - reg_val = readl(mmio + limit_reg); + writel(limit, peer_mmio + limit_reg); + reg_val = readl(peer_mmio + limit_reg); if (reg_val != limit) { writel(base_addr, mmio + limit_reg); writel(0, peer_mmio + xlat_reg); @@ -333,7 +333,7 @@ static u64 amd_ntb_db_vector_mask(struct ntb_dev *ntb, int db_vector) if (db_vector < 0 || db_vector > ndev->db_count) return 0; - return ntb_ndev(ntb)->db_valid_mask & (1 << db_vector); + return ntb_ndev(ntb)->db_valid_mask & (1ULL << db_vector); } static u64 amd_ntb_db_read(struct ntb_dev *ntb) diff --git a/drivers/ntb/hw/intel/ntb_hw_gen3.c b/drivers/ntb/hw/intel/ntb_hw_gen3.c index f475b56a3f49..c3397160db7f 100644 --- a/drivers/ntb/hw/intel/ntb_hw_gen3.c +++ b/drivers/ntb/hw/intel/ntb_hw_gen3.c @@ -532,9 +532,9 @@ static int intel_ntb3_mw_set_trans(struct ntb_dev *ntb, int pidx, int idx, return 0; } -int intel_ntb3_peer_db_addr(struct ntb_dev *ntb, phys_addr_t *db_addr, - resource_size_t *db_size, - u64 *db_data, int db_bit) +static int intel_ntb3_peer_db_addr(struct ntb_dev *ntb, phys_addr_t *db_addr, + resource_size_t *db_size, + u64 *db_data, int db_bit) { phys_addr_t db_addr_base; struct intel_ntb_dev *ndev = ntb_ndev(ntb); diff --git a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c index db4967748e4d..f4959458d909 100644 --- a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c +++ b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c @@ -86,7 +86,8 @@ struct switchtec_ntb { bool link_is_up; enum ntb_speed link_speed; enum ntb_width link_width; - struct work_struct link_reinit_work; + struct work_struct check_link_status_work; + bool link_force_down; }; static struct switchtec_ntb *ntb_sndev(struct ntb_dev *ntb) @@ -485,33 +486,11 @@ enum switchtec_msg { static int switchtec_ntb_reinit_peer(struct switchtec_ntb *sndev); -static void link_reinit_work(struct work_struct *work) -{ - struct switchtec_ntb *sndev; - - sndev = container_of(work, struct switchtec_ntb, link_reinit_work); - - switchtec_ntb_reinit_peer(sndev); -} - -static void switchtec_ntb_check_link(struct switchtec_ntb *sndev, - enum switchtec_msg msg) +static void switchtec_ntb_link_status_update(struct switchtec_ntb *sndev) { int link_sta; int old = sndev->link_is_up; - if (msg == MSG_LINK_FORCE_DOWN) { - schedule_work(&sndev->link_reinit_work); - - if (sndev->link_is_up) { - sndev->link_is_up = 0; - ntb_link_event(&sndev->ntb); - dev_info(&sndev->stdev->dev, "ntb link forced down\n"); - } - - return; - } - link_sta = sndev->self_shared->link_sta; if (link_sta) { u64 peer = ioread64(&sndev->peer_shared->magic); @@ -536,6 +515,38 @@ static void switchtec_ntb_check_link(struct switchtec_ntb *sndev, } } +static void check_link_status_work(struct work_struct *work) +{ + struct switchtec_ntb *sndev; + + sndev = container_of(work, struct switchtec_ntb, + check_link_status_work); + + if (sndev->link_force_down) { + sndev->link_force_down = false; + switchtec_ntb_reinit_peer(sndev); + + if (sndev->link_is_up) { + sndev->link_is_up = 0; + ntb_link_event(&sndev->ntb); + dev_info(&sndev->stdev->dev, "ntb link forced down\n"); + } + + return; + } + + switchtec_ntb_link_status_update(sndev); +} + +static void switchtec_ntb_check_link(struct switchtec_ntb *sndev, + enum switchtec_msg msg) +{ + if (msg == MSG_LINK_FORCE_DOWN) + sndev->link_force_down = true; + + schedule_work(&sndev->check_link_status_work); +} + static void switchtec_ntb_link_notification(struct switchtec_dev *stdev) { struct switchtec_ntb *sndev = stdev->sndev; @@ -568,7 +579,7 @@ static int switchtec_ntb_link_enable(struct ntb_dev *ntb, sndev->self_shared->link_sta = 1; switchtec_ntb_send_msg(sndev, LINK_MESSAGE, MSG_LINK_UP); - switchtec_ntb_check_link(sndev, MSG_CHECK_LINK); + switchtec_ntb_link_status_update(sndev); return 0; } @@ -582,7 +593,7 @@ static int switchtec_ntb_link_disable(struct ntb_dev *ntb) sndev->self_shared->link_sta = 0; switchtec_ntb_send_msg(sndev, LINK_MESSAGE, MSG_LINK_DOWN); - switchtec_ntb_check_link(sndev, MSG_CHECK_LINK); + switchtec_ntb_link_status_update(sndev); return 0; } @@ -835,7 +846,8 @@ static int switchtec_ntb_init_sndev(struct switchtec_ntb *sndev) sndev->ntb.topo = NTB_TOPO_SWITCH; sndev->ntb.ops = &switchtec_ntb_ops; - INIT_WORK(&sndev->link_reinit_work, link_reinit_work); + INIT_WORK(&sndev->check_link_status_work, check_link_status_work); + sndev->link_force_down = false; sndev->self_partition = sndev->stdev->partition; @@ -872,7 +884,7 @@ static int switchtec_ntb_init_sndev(struct switchtec_ntb *sndev) } sndev->peer_partition = ffs(tpart_vec) - 1; - if (!(part_map & (1 << sndev->peer_partition))) { + if (!(part_map & (1ULL << sndev->peer_partition))) { dev_err(&sndev->stdev->dev, "ntb target partition is not NT partition\n"); return -ENODEV; @@ -1448,10 +1460,16 @@ static void switchtec_ntb_deinit_db_msg_irq(struct switchtec_ntb *sndev) static int switchtec_ntb_reinit_peer(struct switchtec_ntb *sndev) { - dev_info(&sndev->stdev->dev, "peer reinitialized\n"); - switchtec_ntb_deinit_shared_mw(sndev); - switchtec_ntb_init_mw(sndev); - return switchtec_ntb_init_shared_mw(sndev); + int rc; + + if (crosslink_is_enabled(sndev)) + return 0; + + dev_info(&sndev->stdev->dev, "reinitialize shared memory window\n"); + rc = config_rsvd_lut_win(sndev, sndev->mmio_peer_ctrl, 0, + sndev->self_partition, + sndev->self_shared_dma); + return rc; } static int switchtec_ntb_add(struct device *dev, diff --git a/drivers/ntb/msi.c b/drivers/ntb/msi.c new file mode 100644 index 000000000000..9dddf133658f --- /dev/null +++ b/drivers/ntb/msi.c @@ -0,0 +1,415 @@ +// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) + +#include <linux/irq.h> +#include <linux/module.h> +#include <linux/ntb.h> +#include <linux/msi.h> +#include <linux/pci.h> + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_VERSION("0.1"); +MODULE_AUTHOR("Logan Gunthorpe <logang@deltatee.com>"); +MODULE_DESCRIPTION("NTB MSI Interrupt Library"); + +struct ntb_msi { + u64 base_addr; + u64 end_addr; + + void (*desc_changed)(void *ctx); + + u32 __iomem *peer_mws[]; +}; + +/** + * ntb_msi_init() - Initialize the MSI context + * @ntb: NTB device context + * + * This function must be called before any other ntb_msi function. + * It initializes the context for MSI operations and maps + * the peer memory windows. + * + * This function reserves the last N outbound memory windows (where N + * is the number of peers). + * + * Return: Zero on success, otherwise a negative error number. + */ +int ntb_msi_init(struct ntb_dev *ntb, + void (*desc_changed)(void *ctx)) +{ + phys_addr_t mw_phys_addr; + resource_size_t mw_size; + size_t struct_size; + int peer_widx; + int peers; + int ret; + int i; + + peers = ntb_peer_port_count(ntb); + if (peers <= 0) + return -EINVAL; + + struct_size = sizeof(*ntb->msi) + sizeof(*ntb->msi->peer_mws) * peers; + + ntb->msi = devm_kzalloc(&ntb->dev, struct_size, GFP_KERNEL); + if (!ntb->msi) + return -ENOMEM; + + ntb->msi->desc_changed = desc_changed; + + for (i = 0; i < peers; i++) { + peer_widx = ntb_peer_mw_count(ntb) - 1 - i; + + ret = ntb_peer_mw_get_addr(ntb, peer_widx, &mw_phys_addr, + &mw_size); + if (ret) + goto unroll; + + ntb->msi->peer_mws[i] = devm_ioremap(&ntb->dev, mw_phys_addr, + mw_size); + if (!ntb->msi->peer_mws[i]) { + ret = -EFAULT; + goto unroll; + } + } + + return 0; + +unroll: + for (i = 0; i < peers; i++) + if (ntb->msi->peer_mws[i]) + devm_iounmap(&ntb->dev, ntb->msi->peer_mws[i]); + + devm_kfree(&ntb->dev, ntb->msi); + ntb->msi = NULL; + return ret; +} +EXPORT_SYMBOL(ntb_msi_init); + +/** + * ntb_msi_setup_mws() - Initialize the MSI inbound memory windows + * @ntb: NTB device context + * + * This function sets up the required inbound memory windows. It should be + * called from a work function after a link up event. + * + * Over the entire network, this function will reserves the last N + * inbound memory windows for each peer (where N is the number of peers). + * + * ntb_msi_init() must be called before this function. + * + * Return: Zero on success, otherwise a negative error number. + */ +int ntb_msi_setup_mws(struct ntb_dev *ntb) +{ + struct msi_desc *desc; + u64 addr; + int peer, peer_widx; + resource_size_t addr_align, size_align, size_max; + resource_size_t mw_size = SZ_32K; + resource_size_t mw_min_size = mw_size; + int i; + int ret; + + if (!ntb->msi) + return -EINVAL; + + desc = first_msi_entry(&ntb->pdev->dev); + addr = desc->msg.address_lo + ((uint64_t)desc->msg.address_hi << 32); + + for (peer = 0; peer < ntb_peer_port_count(ntb); peer++) { + peer_widx = ntb_peer_highest_mw_idx(ntb, peer); + if (peer_widx < 0) + return peer_widx; + + ret = ntb_mw_get_align(ntb, peer, peer_widx, &addr_align, + NULL, NULL); + if (ret) + return ret; + + addr &= ~(addr_align - 1); + } + + for (peer = 0; peer < ntb_peer_port_count(ntb); peer++) { + peer_widx = ntb_peer_highest_mw_idx(ntb, peer); + if (peer_widx < 0) { + ret = peer_widx; + goto error_out; + } + + ret = ntb_mw_get_align(ntb, peer, peer_widx, NULL, + &size_align, &size_max); + if (ret) + goto error_out; + + mw_size = round_up(mw_size, size_align); + mw_size = max(mw_size, size_max); + if (mw_size < mw_min_size) + mw_min_size = mw_size; + + ret = ntb_mw_set_trans(ntb, peer, peer_widx, + addr, mw_size); + if (ret) + goto error_out; + } + + ntb->msi->base_addr = addr; + ntb->msi->end_addr = addr + mw_min_size; + + return 0; + +error_out: + for (i = 0; i < peer; i++) { + peer_widx = ntb_peer_highest_mw_idx(ntb, peer); + if (peer_widx < 0) + continue; + + ntb_mw_clear_trans(ntb, i, peer_widx); + } + + return ret; +} +EXPORT_SYMBOL(ntb_msi_setup_mws); + +/** + * ntb_msi_clear_mws() - Clear all inbound memory windows + * @ntb: NTB device context + * + * This function tears down the resources used by ntb_msi_setup_mws(). + */ +void ntb_msi_clear_mws(struct ntb_dev *ntb) +{ + int peer; + int peer_widx; + + for (peer = 0; peer < ntb_peer_port_count(ntb); peer++) { + peer_widx = ntb_peer_highest_mw_idx(ntb, peer); + if (peer_widx < 0) + continue; + + ntb_mw_clear_trans(ntb, peer, peer_widx); + } +} +EXPORT_SYMBOL(ntb_msi_clear_mws); + +struct ntb_msi_devres { + struct ntb_dev *ntb; + struct msi_desc *entry; + struct ntb_msi_desc *msi_desc; +}; + +static int ntb_msi_set_desc(struct ntb_dev *ntb, struct msi_desc *entry, + struct ntb_msi_desc *msi_desc) +{ + u64 addr; + + addr = entry->msg.address_lo + + ((uint64_t)entry->msg.address_hi << 32); + + if (addr < ntb->msi->base_addr || addr >= ntb->msi->end_addr) { + dev_warn_once(&ntb->dev, + "IRQ %d: MSI Address not within the memory window (%llx, [%llx %llx])\n", + entry->irq, addr, ntb->msi->base_addr, + ntb->msi->end_addr); + return -EFAULT; + } + + msi_desc->addr_offset = addr - ntb->msi->base_addr; + msi_desc->data = entry->msg.data; + + return 0; +} + +static void ntb_msi_write_msg(struct msi_desc *entry, void *data) +{ + struct ntb_msi_devres *dr = data; + + WARN_ON(ntb_msi_set_desc(dr->ntb, entry, dr->msi_desc)); + + if (dr->ntb->msi->desc_changed) + dr->ntb->msi->desc_changed(dr->ntb->ctx); +} + +static void ntbm_msi_callback_release(struct device *dev, void *res) +{ + struct ntb_msi_devres *dr = res; + + dr->entry->write_msi_msg = NULL; + dr->entry->write_msi_msg_data = NULL; +} + +static int ntbm_msi_setup_callback(struct ntb_dev *ntb, struct msi_desc *entry, + struct ntb_msi_desc *msi_desc) +{ + struct ntb_msi_devres *dr; + + dr = devres_alloc(ntbm_msi_callback_release, + sizeof(struct ntb_msi_devres), GFP_KERNEL); + if (!dr) + return -ENOMEM; + + dr->ntb = ntb; + dr->entry = entry; + dr->msi_desc = msi_desc; + + devres_add(&ntb->dev, dr); + + dr->entry->write_msi_msg = ntb_msi_write_msg; + dr->entry->write_msi_msg_data = dr; + + return 0; +} + +/** + * ntbm_msi_request_threaded_irq() - allocate an MSI interrupt + * @ntb: NTB device context + * @handler: Function to be called when the IRQ occurs + * @thread_fn: Function to be called in a threaded interrupt context. NULL + * for clients which handle everything in @handler + * @devname: An ascii name for the claiming device, dev_name(dev) if NULL + * @dev_id: A cookie passed back to the handler function + * + * This function assigns an interrupt handler to an unused + * MSI interrupt and returns the descriptor used to trigger + * it. The descriptor can then be sent to a peer to trigger + * the interrupt. + * + * The interrupt resource is managed with devres so it will + * be automatically freed when the NTB device is torn down. + * + * If an IRQ allocated with this function needs to be freed + * separately, ntbm_free_irq() must be used. + * + * Return: IRQ number assigned on success, otherwise a negative error number. + */ +int ntbm_msi_request_threaded_irq(struct ntb_dev *ntb, irq_handler_t handler, + irq_handler_t thread_fn, + const char *name, void *dev_id, + struct ntb_msi_desc *msi_desc) +{ + struct msi_desc *entry; + struct irq_desc *desc; + int ret; + + if (!ntb->msi) + return -EINVAL; + + for_each_pci_msi_entry(entry, ntb->pdev) { + desc = irq_to_desc(entry->irq); + if (desc->action) + continue; + + ret = devm_request_threaded_irq(&ntb->dev, entry->irq, handler, + thread_fn, 0, name, dev_id); + if (ret) + continue; + + if (ntb_msi_set_desc(ntb, entry, msi_desc)) { + devm_free_irq(&ntb->dev, entry->irq, dev_id); + continue; + } + + ret = ntbm_msi_setup_callback(ntb, entry, msi_desc); + if (ret) { + devm_free_irq(&ntb->dev, entry->irq, dev_id); + return ret; + } + + + return entry->irq; + } + + return -ENODEV; +} +EXPORT_SYMBOL(ntbm_msi_request_threaded_irq); + +static int ntbm_msi_callback_match(struct device *dev, void *res, void *data) +{ + struct ntb_dev *ntb = dev_ntb(dev); + struct ntb_msi_devres *dr = res; + + return dr->ntb == ntb && dr->entry == data; +} + +/** + * ntbm_msi_free_irq() - free an interrupt + * @ntb: NTB device context + * @irq: Interrupt line to free + * @dev_id: Device identity to free + * + * This function should be used to manually free IRQs allocated with + * ntbm_request_[threaded_]irq(). + */ +void ntbm_msi_free_irq(struct ntb_dev *ntb, unsigned int irq, void *dev_id) +{ + struct msi_desc *entry = irq_get_msi_desc(irq); + + entry->write_msi_msg = NULL; + entry->write_msi_msg_data = NULL; + + WARN_ON(devres_destroy(&ntb->dev, ntbm_msi_callback_release, + ntbm_msi_callback_match, entry)); + + devm_free_irq(&ntb->dev, irq, dev_id); +} +EXPORT_SYMBOL(ntbm_msi_free_irq); + +/** + * ntb_msi_peer_trigger() - Trigger an interrupt handler on a peer + * @ntb: NTB device context + * @peer: Peer index + * @desc: MSI descriptor data which triggers the interrupt + * + * This function triggers an interrupt on a peer. It requires + * the descriptor structure to have been passed from that peer + * by some other means. + * + * Return: Zero on success, otherwise a negative error number. + */ +int ntb_msi_peer_trigger(struct ntb_dev *ntb, int peer, + struct ntb_msi_desc *desc) +{ + int idx; + + if (!ntb->msi) + return -EINVAL; + + idx = desc->addr_offset / sizeof(*ntb->msi->peer_mws[peer]); + + iowrite32(desc->data, &ntb->msi->peer_mws[peer][idx]); + + return 0; +} +EXPORT_SYMBOL(ntb_msi_peer_trigger); + +/** + * ntb_msi_peer_addr() - Get the DMA address to trigger a peer's MSI interrupt + * @ntb: NTB device context + * @peer: Peer index + * @desc: MSI descriptor data which triggers the interrupt + * @msi_addr: Physical address to trigger the interrupt + * + * This function allows using DMA engines to trigger an interrupt + * (for example, trigger an interrupt to process the data after + * sending it). To trigger the interrupt, write @desc.data to the address + * returned in @msi_addr + * + * Return: Zero on success, otherwise a negative error number. + */ +int ntb_msi_peer_addr(struct ntb_dev *ntb, int peer, + struct ntb_msi_desc *desc, + phys_addr_t *msi_addr) +{ + int peer_widx = ntb_peer_mw_count(ntb) - 1 - peer; + phys_addr_t mw_phys_addr; + int ret; + + ret = ntb_peer_mw_get_addr(ntb, peer_widx, &mw_phys_addr, NULL); + if (ret) + return ret; + + if (msi_addr) + *msi_addr = mw_phys_addr + desc->addr_offset; + + return 0; +} +EXPORT_SYMBOL(ntb_msi_peer_addr); diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c index d4f39ba1d976..40c90ca10729 100644 --- a/drivers/ntb/ntb_transport.c +++ b/drivers/ntb/ntb_transport.c @@ -93,6 +93,12 @@ static bool use_dma; module_param(use_dma, bool, 0644); MODULE_PARM_DESC(use_dma, "Use DMA engine to perform large data copy"); +static bool use_msi; +#ifdef CONFIG_NTB_MSI +module_param(use_msi, bool, 0644); +MODULE_PARM_DESC(use_msi, "Use MSI interrupts instead of doorbells"); +#endif + static struct dentry *nt_debugfs_dir; /* Only two-ports NTB devices are supported */ @@ -188,6 +194,11 @@ struct ntb_transport_qp { u64 tx_err_no_buf; u64 tx_memcpy; u64 tx_async; + + bool use_msi; + int msi_irq; + struct ntb_msi_desc msi_desc; + struct ntb_msi_desc peer_msi_desc; }; struct ntb_transport_mw { @@ -221,6 +232,10 @@ struct ntb_transport_ctx { u64 qp_bitmap; u64 qp_bitmap_free; + bool use_msi; + unsigned int msi_spad_offset; + u64 msi_db_mask; + bool link_is_up; struct delayed_work link_work; struct work_struct link_cleanup; @@ -667,6 +682,114 @@ static int ntb_transport_setup_qp_mw(struct ntb_transport_ctx *nt, return 0; } +static irqreturn_t ntb_transport_isr(int irq, void *dev) +{ + struct ntb_transport_qp *qp = dev; + + tasklet_schedule(&qp->rxc_db_work); + + return IRQ_HANDLED; +} + +static void ntb_transport_setup_qp_peer_msi(struct ntb_transport_ctx *nt, + unsigned int qp_num) +{ + struct ntb_transport_qp *qp = &nt->qp_vec[qp_num]; + int spad = qp_num * 2 + nt->msi_spad_offset; + + if (!nt->use_msi) + return; + + if (spad >= ntb_spad_count(nt->ndev)) + return; + + qp->peer_msi_desc.addr_offset = + ntb_peer_spad_read(qp->ndev, PIDX, spad); + qp->peer_msi_desc.data = + ntb_peer_spad_read(qp->ndev, PIDX, spad + 1); + + dev_dbg(&qp->ndev->pdev->dev, "QP%d Peer MSI addr=%x data=%x\n", + qp_num, qp->peer_msi_desc.addr_offset, qp->peer_msi_desc.data); + + if (qp->peer_msi_desc.addr_offset) { + qp->use_msi = true; + dev_info(&qp->ndev->pdev->dev, + "Using MSI interrupts for QP%d\n", qp_num); + } +} + +static void ntb_transport_setup_qp_msi(struct ntb_transport_ctx *nt, + unsigned int qp_num) +{ + struct ntb_transport_qp *qp = &nt->qp_vec[qp_num]; + int spad = qp_num * 2 + nt->msi_spad_offset; + int rc; + + if (!nt->use_msi) + return; + + if (spad >= ntb_spad_count(nt->ndev)) { + dev_warn_once(&qp->ndev->pdev->dev, + "Not enough SPADS to use MSI interrupts\n"); + return; + } + + ntb_spad_write(qp->ndev, spad, 0); + ntb_spad_write(qp->ndev, spad + 1, 0); + + if (!qp->msi_irq) { + qp->msi_irq = ntbm_msi_request_irq(qp->ndev, ntb_transport_isr, + KBUILD_MODNAME, qp, + &qp->msi_desc); + if (qp->msi_irq < 0) { + dev_warn(&qp->ndev->pdev->dev, + "Unable to allocate MSI interrupt for qp%d\n", + qp_num); + return; + } + } + + rc = ntb_spad_write(qp->ndev, spad, qp->msi_desc.addr_offset); + if (rc) + goto err_free_interrupt; + + rc = ntb_spad_write(qp->ndev, spad + 1, qp->msi_desc.data); + if (rc) + goto err_free_interrupt; + + dev_dbg(&qp->ndev->pdev->dev, "QP%d MSI %d addr=%x data=%x\n", + qp_num, qp->msi_irq, qp->msi_desc.addr_offset, + qp->msi_desc.data); + + return; + +err_free_interrupt: + devm_free_irq(&nt->ndev->dev, qp->msi_irq, qp); +} + +static void ntb_transport_msi_peer_desc_changed(struct ntb_transport_ctx *nt) +{ + int i; + + dev_dbg(&nt->ndev->pdev->dev, "Peer MSI descriptors changed"); + + for (i = 0; i < nt->qp_count; i++) + ntb_transport_setup_qp_peer_msi(nt, i); +} + +static void ntb_transport_msi_desc_changed(void *data) +{ + struct ntb_transport_ctx *nt = data; + int i; + + dev_dbg(&nt->ndev->pdev->dev, "MSI descriptors changed"); + + for (i = 0; i < nt->qp_count; i++) + ntb_transport_setup_qp_msi(nt, i); + + ntb_peer_db_set(nt->ndev, nt->msi_db_mask); +} + static void ntb_free_mw(struct ntb_transport_ctx *nt, int num_mw) { struct ntb_transport_mw *mw = &nt->mw_vec[num_mw]; @@ -905,6 +1028,20 @@ static void ntb_transport_link_work(struct work_struct *work) int rc = 0, i, spad; /* send the local info, in the opposite order of the way we read it */ + + if (nt->use_msi) { + rc = ntb_msi_setup_mws(ndev); + if (rc) { + dev_warn(&pdev->dev, + "Failed to register MSI memory window: %d\n", + rc); + nt->use_msi = false; + } + } + + for (i = 0; i < nt->qp_count; i++) + ntb_transport_setup_qp_msi(nt, i); + for (i = 0; i < nt->mw_count; i++) { size = nt->mw_vec[i].phys_size; @@ -962,6 +1099,7 @@ static void ntb_transport_link_work(struct work_struct *work) struct ntb_transport_qp *qp = &nt->qp_vec[i]; ntb_transport_setup_qp_mw(nt, i); + ntb_transport_setup_qp_peer_msi(nt, i); if (qp->client_ready) schedule_delayed_work(&qp->link_work, 0); @@ -1135,6 +1273,19 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev) return -ENOMEM; nt->ndev = ndev; + + /* + * If we are using MSI, and have at least one extra memory window, + * we will reserve the last MW for the MSI window. + */ + if (use_msi && mw_count > 1) { + rc = ntb_msi_init(ndev, ntb_transport_msi_desc_changed); + if (!rc) { + mw_count -= 1; + nt->use_msi = true; + } + } + spad_count = ntb_spad_count(ndev); /* Limit the MW's based on the availability of scratchpads */ @@ -1148,6 +1299,8 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev) max_mw_count_for_spads = (spad_count - MW0_SZ_HIGH) / 2; nt->mw_count = min(mw_count, max_mw_count_for_spads); + nt->msi_spad_offset = nt->mw_count * 2 + MW0_SZ_HIGH; + nt->mw_vec = kcalloc_node(mw_count, sizeof(*nt->mw_vec), GFP_KERNEL, node); if (!nt->mw_vec) { @@ -1178,6 +1331,12 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev) qp_bitmap = ntb_db_valid_mask(ndev); qp_count = ilog2(qp_bitmap); + if (nt->use_msi) { + qp_count -= 1; + nt->msi_db_mask = 1 << qp_count; + ntb_db_clear_mask(ndev, nt->msi_db_mask); + } + if (max_num_clients && max_num_clients < qp_count) qp_count = max_num_clients; else if (nt->mw_count < qp_count) @@ -1601,7 +1760,10 @@ static void ntb_tx_copy_callback(void *data, iowrite32(entry->flags | DESC_DONE_FLAG, &hdr->flags); - ntb_peer_db_set(qp->ndev, BIT_ULL(qp->qp_num)); + if (qp->use_msi) + ntb_msi_peer_trigger(qp->ndev, PIDX, &qp->peer_msi_desc); + else + ntb_peer_db_set(qp->ndev, BIT_ULL(qp->qp_num)); /* The entry length can only be zero if the packet is intended to be a * "link down" or similar. Since no payload is being sent in these @@ -1869,6 +2031,7 @@ ntb_transport_create_queue(void *data, struct device *client_dev, qp->rx_dma_chan = NULL; } + qp->tx_mw_dma_addr = 0; if (qp->tx_dma_chan) { qp->tx_mw_dma_addr = dma_map_resource(qp->tx_dma_chan->device->dev, @@ -2268,6 +2431,11 @@ static void ntb_transport_doorbell_callback(void *data, int vector) u64 db_bits; unsigned int qp_num; + if (ntb_db_read(nt->ndev) & nt->msi_db_mask) { + ntb_transport_msi_peer_desc_changed(nt); + ntb_db_clear(nt->ndev, nt->msi_db_mask); + } + db_bits = (nt->qp_bitmap & ~nt->qp_bitmap_free & ntb_db_vector_mask(nt->ndev, vector)); diff --git a/drivers/ntb/test/Kconfig b/drivers/ntb/test/Kconfig index a8db00a7e087..516b991f33b9 100644 --- a/drivers/ntb/test/Kconfig +++ b/drivers/ntb/test/Kconfig @@ -26,3 +26,12 @@ config NTB_PERF to and from the window without additional software interaction. If unsure, say N. + +config NTB_MSI_TEST + tristate "NTB MSI Test Client" + depends on NTB_MSI + help + This tool demonstrates the use of the NTB MSI library to + send MSI interrupts between peers. + + If unsure, say N. diff --git a/drivers/ntb/test/Makefile b/drivers/ntb/test/Makefile index cbfd67622ef7..19ed91d8a3b1 100644 --- a/drivers/ntb/test/Makefile +++ b/drivers/ntb/test/Makefile @@ -2,3 +2,4 @@ obj-$(CONFIG_NTB_PINGPONG) += ntb_pingpong.o obj-$(CONFIG_NTB_TOOL) += ntb_tool.o obj-$(CONFIG_NTB_PERF) += ntb_perf.o +obj-$(CONFIG_NTB_MSI_TEST) += ntb_msi_test.o diff --git a/drivers/ntb/test/ntb_msi_test.c b/drivers/ntb/test/ntb_msi_test.c new file mode 100644 index 000000000000..99d826ed9c34 --- /dev/null +++ b/drivers/ntb/test/ntb_msi_test.c @@ -0,0 +1,433 @@ +// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) + +#include <linux/module.h> +#include <linux/debugfs.h> +#include <linux/ntb.h> +#include <linux/pci.h> +#include <linux/radix-tree.h> +#include <linux/workqueue.h> + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_VERSION("0.1"); +MODULE_AUTHOR("Logan Gunthorpe <logang@deltatee.com>"); +MODULE_DESCRIPTION("Test for sending MSI interrupts over an NTB memory window"); + +static int num_irqs = 4; +module_param(num_irqs, int, 0644); +MODULE_PARM_DESC(num_irqs, "number of irqs to use"); + +struct ntb_msit_ctx { + struct ntb_dev *ntb; + struct dentry *dbgfs_dir; + struct work_struct setup_work; + + struct ntb_msit_isr_ctx { + int irq_idx; + int irq_num; + int occurrences; + struct ntb_msit_ctx *nm; + struct ntb_msi_desc desc; + } *isr_ctx; + + struct ntb_msit_peer { + struct ntb_msit_ctx *nm; + int pidx; + int num_irqs; + struct completion init_comp; + struct ntb_msi_desc *msi_desc; + } peers[]; +}; + +static struct dentry *ntb_msit_dbgfs_topdir; + +static irqreturn_t ntb_msit_isr(int irq, void *dev) +{ + struct ntb_msit_isr_ctx *isr_ctx = dev; + struct ntb_msit_ctx *nm = isr_ctx->nm; + + dev_dbg(&nm->ntb->dev, "Interrupt Occurred: %d", + isr_ctx->irq_idx); + + isr_ctx->occurrences++; + + return IRQ_HANDLED; +} + +static void ntb_msit_setup_work(struct work_struct *work) +{ + struct ntb_msit_ctx *nm = container_of(work, struct ntb_msit_ctx, + setup_work); + int irq_count = 0; + int irq; + int ret; + uintptr_t i; + + ret = ntb_msi_setup_mws(nm->ntb); + if (ret) { + dev_err(&nm->ntb->dev, "Unable to setup MSI windows: %d\n", + ret); + return; + } + + for (i = 0; i < num_irqs; i++) { + nm->isr_ctx[i].irq_idx = i; + nm->isr_ctx[i].nm = nm; + + if (!nm->isr_ctx[i].irq_num) { + irq = ntbm_msi_request_irq(nm->ntb, ntb_msit_isr, + KBUILD_MODNAME, + &nm->isr_ctx[i], + &nm->isr_ctx[i].desc); + if (irq < 0) + break; + + nm->isr_ctx[i].irq_num = irq; + } + + ret = ntb_spad_write(nm->ntb, 2 * i + 1, + nm->isr_ctx[i].desc.addr_offset); + if (ret) + break; + + ret = ntb_spad_write(nm->ntb, 2 * i + 2, + nm->isr_ctx[i].desc.data); + if (ret) + break; + + irq_count++; + } + + ntb_spad_write(nm->ntb, 0, irq_count); + ntb_peer_db_set(nm->ntb, BIT(ntb_port_number(nm->ntb))); +} + +static void ntb_msit_desc_changed(void *ctx) +{ + struct ntb_msit_ctx *nm = ctx; + int i; + + dev_dbg(&nm->ntb->dev, "MSI Descriptors Changed\n"); + + for (i = 0; i < num_irqs; i++) { + ntb_spad_write(nm->ntb, 2 * i + 1, + nm->isr_ctx[i].desc.addr_offset); + ntb_spad_write(nm->ntb, 2 * i + 2, + nm->isr_ctx[i].desc.data); + } + + ntb_peer_db_set(nm->ntb, BIT(ntb_port_number(nm->ntb))); +} + +static void ntb_msit_link_event(void *ctx) +{ + struct ntb_msit_ctx *nm = ctx; + + if (!ntb_link_is_up(nm->ntb, NULL, NULL)) + return; + + schedule_work(&nm->setup_work); +} + +static void ntb_msit_copy_peer_desc(struct ntb_msit_ctx *nm, int peer) +{ + int i; + struct ntb_msi_desc *desc = nm->peers[peer].msi_desc; + int irq_count = nm->peers[peer].num_irqs; + + for (i = 0; i < irq_count; i++) { + desc[i].addr_offset = ntb_peer_spad_read(nm->ntb, peer, + 2 * i + 1); + desc[i].data = ntb_peer_spad_read(nm->ntb, peer, 2 * i + 2); + } + + dev_info(&nm->ntb->dev, "Found %d interrupts on peer %d\n", + irq_count, peer); + + complete_all(&nm->peers[peer].init_comp); +} + +static void ntb_msit_db_event(void *ctx, int vec) +{ + struct ntb_msit_ctx *nm = ctx; + struct ntb_msi_desc *desc; + u64 peer_mask = ntb_db_read(nm->ntb); + u32 irq_count; + int peer; + + ntb_db_clear(nm->ntb, peer_mask); + + for (peer = 0; peer < sizeof(peer_mask) * 8; peer++) { + if (!(peer_mask & BIT(peer))) + continue; + + irq_count = ntb_peer_spad_read(nm->ntb, peer, 0); + if (irq_count == -1) + continue; + + desc = kcalloc(irq_count, sizeof(*desc), GFP_ATOMIC); + if (!desc) + continue; + + kfree(nm->peers[peer].msi_desc); + nm->peers[peer].msi_desc = desc; + nm->peers[peer].num_irqs = irq_count; + + ntb_msit_copy_peer_desc(nm, peer); + } +} + +static const struct ntb_ctx_ops ntb_msit_ops = { + .link_event = ntb_msit_link_event, + .db_event = ntb_msit_db_event, +}; + +static int ntb_msit_dbgfs_trigger(void *data, u64 idx) +{ + struct ntb_msit_peer *peer = data; + + if (idx >= peer->num_irqs) + return -EINVAL; + + dev_dbg(&peer->nm->ntb->dev, "trigger irq %llu on peer %u\n", + idx, peer->pidx); + + return ntb_msi_peer_trigger(peer->nm->ntb, peer->pidx, + &peer->msi_desc[idx]); +} + +DEFINE_DEBUGFS_ATTRIBUTE(ntb_msit_trigger_fops, NULL, + ntb_msit_dbgfs_trigger, "%llu\n"); + +static int ntb_msit_dbgfs_port_get(void *data, u64 *port) +{ + struct ntb_msit_peer *peer = data; + + *port = ntb_peer_port_number(peer->nm->ntb, peer->pidx); + + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(ntb_msit_port_fops, ntb_msit_dbgfs_port_get, + NULL, "%llu\n"); + +static int ntb_msit_dbgfs_count_get(void *data, u64 *count) +{ + struct ntb_msit_peer *peer = data; + + *count = peer->num_irqs; + + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(ntb_msit_count_fops, ntb_msit_dbgfs_count_get, + NULL, "%llu\n"); + +static int ntb_msit_dbgfs_ready_get(void *data, u64 *ready) +{ + struct ntb_msit_peer *peer = data; + + *ready = try_wait_for_completion(&peer->init_comp); + + return 0; +} + +static int ntb_msit_dbgfs_ready_set(void *data, u64 ready) +{ + struct ntb_msit_peer *peer = data; + + return wait_for_completion_interruptible(&peer->init_comp); +} + +DEFINE_DEBUGFS_ATTRIBUTE(ntb_msit_ready_fops, ntb_msit_dbgfs_ready_get, + ntb_msit_dbgfs_ready_set, "%llu\n"); + +static int ntb_msit_dbgfs_occurrences_get(void *data, u64 *occurrences) +{ + struct ntb_msit_isr_ctx *isr_ctx = data; + + *occurrences = isr_ctx->occurrences; + + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(ntb_msit_occurrences_fops, + ntb_msit_dbgfs_occurrences_get, + NULL, "%llu\n"); + +static int ntb_msit_dbgfs_local_port_get(void *data, u64 *port) +{ + struct ntb_msit_ctx *nm = data; + + *port = ntb_port_number(nm->ntb); + + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(ntb_msit_local_port_fops, + ntb_msit_dbgfs_local_port_get, + NULL, "%llu\n"); + +static void ntb_msit_create_dbgfs(struct ntb_msit_ctx *nm) +{ + struct pci_dev *pdev = nm->ntb->pdev; + char buf[32]; + int i; + struct dentry *peer_dir; + + nm->dbgfs_dir = debugfs_create_dir(pci_name(pdev), + ntb_msit_dbgfs_topdir); + debugfs_create_file("port", 0400, nm->dbgfs_dir, nm, + &ntb_msit_local_port_fops); + + for (i = 0; i < ntb_peer_port_count(nm->ntb); i++) { + nm->peers[i].pidx = i; + nm->peers[i].nm = nm; + init_completion(&nm->peers[i].init_comp); + + snprintf(buf, sizeof(buf), "peer%d", i); + peer_dir = debugfs_create_dir(buf, nm->dbgfs_dir); + + debugfs_create_file_unsafe("trigger", 0200, peer_dir, + &nm->peers[i], + &ntb_msit_trigger_fops); + + debugfs_create_file_unsafe("port", 0400, peer_dir, + &nm->peers[i], &ntb_msit_port_fops); + + debugfs_create_file_unsafe("count", 0400, peer_dir, + &nm->peers[i], + &ntb_msit_count_fops); + + debugfs_create_file_unsafe("ready", 0600, peer_dir, + &nm->peers[i], + &ntb_msit_ready_fops); + } + + for (i = 0; i < num_irqs; i++) { + snprintf(buf, sizeof(buf), "irq%d_occurrences", i); + debugfs_create_file_unsafe(buf, 0400, nm->dbgfs_dir, + &nm->isr_ctx[i], + &ntb_msit_occurrences_fops); + } +} + +static void ntb_msit_remove_dbgfs(struct ntb_msit_ctx *nm) +{ + debugfs_remove_recursive(nm->dbgfs_dir); +} + +static int ntb_msit_probe(struct ntb_client *client, struct ntb_dev *ntb) +{ + struct ntb_msit_ctx *nm; + size_t struct_size; + int peers; + int ret; + + peers = ntb_peer_port_count(ntb); + if (peers <= 0) + return -EINVAL; + + if (ntb_spad_is_unsafe(ntb) || ntb_spad_count(ntb) < 2 * num_irqs + 1) { + dev_err(&ntb->dev, "NTB MSI test requires at least %d spads for %d irqs\n", + 2 * num_irqs + 1, num_irqs); + return -EFAULT; + } + + ret = ntb_spad_write(ntb, 0, -1); + if (ret) { + dev_err(&ntb->dev, "Unable to write spads: %d\n", ret); + return ret; + } + + ret = ntb_db_clear_mask(ntb, GENMASK(peers - 1, 0)); + if (ret) { + dev_err(&ntb->dev, "Unable to clear doorbell mask: %d\n", ret); + return ret; + } + + ret = ntb_msi_init(ntb, ntb_msit_desc_changed); + if (ret) { + dev_err(&ntb->dev, "Unable to initialize MSI library: %d\n", + ret); + return ret; + } + + struct_size = sizeof(*nm) + sizeof(*nm->peers) * peers; + + nm = devm_kzalloc(&ntb->dev, struct_size, GFP_KERNEL); + if (!nm) + return -ENOMEM; + + nm->isr_ctx = devm_kcalloc(&ntb->dev, num_irqs, sizeof(*nm->isr_ctx), + GFP_KERNEL); + if (!nm->isr_ctx) + return -ENOMEM; + + INIT_WORK(&nm->setup_work, ntb_msit_setup_work); + nm->ntb = ntb; + + ntb_msit_create_dbgfs(nm); + + ret = ntb_set_ctx(ntb, nm, &ntb_msit_ops); + if (ret) + goto remove_dbgfs; + + if (!nm->isr_ctx) + goto remove_dbgfs; + + ntb_link_enable(ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO); + + return 0; + +remove_dbgfs: + ntb_msit_remove_dbgfs(nm); + devm_kfree(&ntb->dev, nm->isr_ctx); + devm_kfree(&ntb->dev, nm); + return ret; +} + +static void ntb_msit_remove(struct ntb_client *client, struct ntb_dev *ntb) +{ + struct ntb_msit_ctx *nm = ntb->ctx; + int i; + + ntb_link_disable(ntb); + ntb_db_set_mask(ntb, ntb_db_valid_mask(ntb)); + ntb_msi_clear_mws(ntb); + + for (i = 0; i < ntb_peer_port_count(ntb); i++) + kfree(nm->peers[i].msi_desc); + + ntb_clear_ctx(ntb); + ntb_msit_remove_dbgfs(nm); +} + +static struct ntb_client ntb_msit_client = { + .ops = { + .probe = ntb_msit_probe, + .remove = ntb_msit_remove + } +}; + +static int __init ntb_msit_init(void) +{ + int ret; + + if (debugfs_initialized()) + ntb_msit_dbgfs_topdir = debugfs_create_dir(KBUILD_MODNAME, + NULL); + + ret = ntb_register_client(&ntb_msit_client); + if (ret) + debugfs_remove_recursive(ntb_msit_dbgfs_topdir); + + return ret; +} +module_init(ntb_msit_init); + +static void __exit ntb_msit_exit(void) +{ + ntb_unregister_client(&ntb_msit_client); + debugfs_remove_recursive(ntb_msit_dbgfs_topdir); +} +module_exit(ntb_msit_exit); diff --git a/drivers/ntb/test/ntb_perf.c b/drivers/ntb/test/ntb_perf.c index 11a6cd374004..d028331558ea 100644 --- a/drivers/ntb/test/ntb_perf.c +++ b/drivers/ntb/test/ntb_perf.c @@ -100,7 +100,7 @@ MODULE_DESCRIPTION("PCIe NTB Performance Measurement Tool"); #define DMA_TRIES 100 #define DMA_MDELAY 10 -#define MSG_TRIES 500 +#define MSG_TRIES 1000 #define MSG_UDELAY_LOW 1000 #define MSG_UDELAY_HIGH 2000 @@ -734,8 +734,6 @@ static void perf_disable_service(struct perf_ctx *perf) { int pidx; - ntb_link_disable(perf->ntb); - if (perf->cmd_send == perf_msg_cmd_send) { u64 inbits; @@ -752,6 +750,16 @@ static void perf_disable_service(struct perf_ctx *perf) for (pidx = 0; pidx < perf->pcnt; pidx++) flush_work(&perf->peers[pidx].service); + + for (pidx = 0; pidx < perf->pcnt; pidx++) { + struct perf_peer *peer = &perf->peers[pidx]; + + ntb_spad_write(perf->ntb, PERF_SPAD_CMD(peer->gidx), 0); + } + + ntb_db_clear(perf->ntb, PERF_SPAD_NOTIFY(perf->gidx)); + + ntb_link_disable(perf->ntb); } /*============================================================================== diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 59a6d232f77a..0884bedcfc7a 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -192,6 +192,9 @@ static void msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag) static void __iomem *pci_msix_desc_addr(struct msi_desc *desc) { + if (desc->msi_attrib.is_virtual) + return NULL; + return desc->mask_base + desc->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE; } @@ -206,14 +209,19 @@ static void __iomem *pci_msix_desc_addr(struct msi_desc *desc) u32 __pci_msix_desc_mask_irq(struct msi_desc *desc, u32 flag) { u32 mask_bits = desc->masked; + void __iomem *desc_addr; if (pci_msi_ignore_mask) return 0; + desc_addr = pci_msix_desc_addr(desc); + if (!desc_addr) + return 0; mask_bits &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT; if (flag) mask_bits |= PCI_MSIX_ENTRY_CTRL_MASKBIT; - writel(mask_bits, pci_msix_desc_addr(desc) + PCI_MSIX_ENTRY_VECTOR_CTRL); + + writel(mask_bits, desc_addr + PCI_MSIX_ENTRY_VECTOR_CTRL); return mask_bits; } @@ -273,6 +281,11 @@ void __pci_read_msi_msg(struct msi_desc *entry, struct msi_msg *msg) if (entry->msi_attrib.is_msix) { void __iomem *base = pci_msix_desc_addr(entry); + if (!base) { + WARN_ON(1); + return; + } + msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR); msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR); msg->data = readl(base + PCI_MSIX_ENTRY_DATA); @@ -303,6 +316,9 @@ void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg) } else if (entry->msi_attrib.is_msix) { void __iomem *base = pci_msix_desc_addr(entry); + if (!base) + goto skip; + writel(msg->address_lo, base + PCI_MSIX_ENTRY_LOWER_ADDR); writel(msg->address_hi, base + PCI_MSIX_ENTRY_UPPER_ADDR); writel(msg->data, base + PCI_MSIX_ENTRY_DATA); @@ -327,7 +343,13 @@ void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg) msg->data); } } + +skip: entry->msg = *msg; + + if (entry->write_msi_msg) + entry->write_msi_msg(entry, entry->write_msi_msg_data); + } void pci_write_msi_msg(unsigned int irq, struct msi_msg *msg) @@ -550,6 +572,7 @@ msi_setup_entry(struct pci_dev *dev, int nvec, struct irq_affinity *affd) entry->msi_attrib.is_msix = 0; entry->msi_attrib.is_64 = !!(control & PCI_MSI_FLAGS_64BIT); + entry->msi_attrib.is_virtual = 0; entry->msi_attrib.entry_nr = 0; entry->msi_attrib.maskbit = !!(control & PCI_MSI_FLAGS_MASKBIT); entry->msi_attrib.default_irq = dev->irq; /* Save IOAPIC IRQ */ @@ -674,6 +697,7 @@ static int msix_setup_entries(struct pci_dev *dev, void __iomem *base, struct irq_affinity_desc *curmsk, *masks = NULL; struct msi_desc *entry; int ret, i; + int vec_count = pci_msix_vec_count(dev); if (affd) masks = irq_create_affinity_masks(nvec, affd); @@ -696,6 +720,10 @@ static int msix_setup_entries(struct pci_dev *dev, void __iomem *base, entry->msi_attrib.entry_nr = entries[i].entry; else entry->msi_attrib.entry_nr = i; + + entry->msi_attrib.is_virtual = + entry->msi_attrib.entry_nr >= vec_count; + entry->msi_attrib.default_irq = dev->irq; entry->mask_base = base; @@ -714,12 +742,19 @@ static void msix_program_entries(struct pci_dev *dev, { struct msi_desc *entry; int i = 0; + void __iomem *desc_addr; for_each_pci_msi_entry(entry, dev) { if (entries) entries[i++].vector = entry->irq; - entry->masked = readl(pci_msix_desc_addr(entry) + - PCI_MSIX_ENTRY_VECTOR_CTRL); + + desc_addr = pci_msix_desc_addr(entry); + if (desc_addr) + entry->masked = readl(desc_addr + + PCI_MSIX_ENTRY_VECTOR_CTRL); + else + entry->masked = 0; + msix_mask_irq(entry, 1); } } @@ -932,7 +967,7 @@ int pci_msix_vec_count(struct pci_dev *dev) EXPORT_SYMBOL(pci_msix_vec_count); static int __pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, - int nvec, struct irq_affinity *affd) + int nvec, struct irq_affinity *affd, int flags) { int nr_entries; int i, j; @@ -943,7 +978,7 @@ static int __pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, nr_entries = pci_msix_vec_count(dev); if (nr_entries < 0) return nr_entries; - if (nvec > nr_entries) + if (nvec > nr_entries && !(flags & PCI_IRQ_VIRTUAL)) return nr_entries; if (entries) { @@ -1079,7 +1114,8 @@ EXPORT_SYMBOL(pci_enable_msi); static int __pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries, int minvec, - int maxvec, struct irq_affinity *affd) + int maxvec, struct irq_affinity *affd, + int flags) { int rc, nvec = maxvec; @@ -1096,7 +1132,7 @@ static int __pci_enable_msix_range(struct pci_dev *dev, return -ENOSPC; } - rc = __pci_enable_msix(dev, entries, nvec, affd); + rc = __pci_enable_msix(dev, entries, nvec, affd, flags); if (rc == 0) return nvec; @@ -1127,7 +1163,7 @@ static int __pci_enable_msix_range(struct pci_dev *dev, int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries, int minvec, int maxvec) { - return __pci_enable_msix_range(dev, entries, minvec, maxvec, NULL); + return __pci_enable_msix_range(dev, entries, minvec, maxvec, NULL, 0); } EXPORT_SYMBOL(pci_enable_msix_range); @@ -1167,7 +1203,7 @@ int pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs, if (flags & PCI_IRQ_MSIX) { msix_vecs = __pci_enable_msix_range(dev, NULL, min_vecs, - max_vecs, affd); + max_vecs, affd, flags); if (msix_vecs > 0) return msix_vecs; } diff --git a/drivers/pci/switch/switchtec.c b/drivers/pci/switch/switchtec.c index bebbde4ebec0..8c94cd3fd1f2 100644 --- a/drivers/pci/switch/switchtec.c +++ b/drivers/pci/switch/switchtec.c @@ -30,6 +30,10 @@ module_param(use_dma_mrpc, bool, 0644); MODULE_PARM_DESC(use_dma_mrpc, "Enable the use of the DMA MRPC feature"); +static int nirqs = 32; +module_param(nirqs, int, 0644); +MODULE_PARM_DESC(nirqs, "number of interrupts to allocate (more may be useful for NTB applications)"); + static dev_t switchtec_devt; static DEFINE_IDA(switchtec_minor_ida); @@ -1263,8 +1267,12 @@ static int switchtec_init_isr(struct switchtec_dev *stdev) int dma_mrpc_irq; int rc; - nvecs = pci_alloc_irq_vectors(stdev->pdev, 1, 4, - PCI_IRQ_MSIX | PCI_IRQ_MSI); + if (nirqs < 4) + nirqs = 4; + + nvecs = pci_alloc_irq_vectors(stdev->pdev, 1, nirqs, + PCI_IRQ_MSIX | PCI_IRQ_MSI | + PCI_IRQ_VIRTUAL); if (nvecs < 0) return nvecs; diff --git a/include/linux/msi.h b/include/linux/msi.h index d48e919d55ae..8ad679e9d9c0 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -64,6 +64,10 @@ struct ti_sci_inta_msi_desc { * @msg: The last set MSI message cached for reuse * @affinity: Optional pointer to a cpu affinity mask for this descriptor * + * @write_msi_msg: Callback that may be called when the MSI message + * address or data changes + * @write_msi_msg_data: Data parameter for the callback. + * * @masked: [PCI MSI/X] Mask bits * @is_msix: [PCI MSI/X] True if MSI-X * @multiple: [PCI MSI/X] log2 num of messages allocated @@ -90,6 +94,9 @@ struct msi_desc { const void *iommu_cookie; #endif + void (*write_msi_msg)(struct msi_desc *entry, void *data); + void *write_msi_msg_data; + union { /* PCI MSI/X specific data */ struct { @@ -100,6 +107,7 @@ struct msi_desc { u8 multi_cap : 3; u8 maskbit : 1; u8 is_64 : 1; + u8 is_virtual : 1; u16 entry_nr; unsigned default_irq; } msi_attrib; diff --git a/include/linux/ntb.h b/include/linux/ntb.h index 56a92e3ae3ae..8c13538aeffe 100644 --- a/include/linux/ntb.h +++ b/include/linux/ntb.h @@ -58,9 +58,11 @@ #include <linux/completion.h> #include <linux/device.h> +#include <linux/interrupt.h> struct ntb_client; struct ntb_dev; +struct ntb_msi; struct pci_dev; /** @@ -205,7 +207,7 @@ static inline int ntb_ctx_ops_is_valid(const struct ntb_ctx_ops *ops) } /** - * struct ntb_ctx_ops - ntb device operations + * struct ntb_dev_ops - ntb device operations * @port_number: See ntb_port_number(). * @peer_port_count: See ntb_peer_port_count(). * @peer_port_number: See ntb_peer_port_number(). @@ -404,7 +406,7 @@ struct ntb_client { #define drv_ntb_client(__drv) container_of((__drv), struct ntb_client, drv) /** - * struct ntb_device - ntb device + * struct ntb_dev - ntb device * @dev: Linux device object. * @pdev: PCI device entry of the ntb. * @topo: Detected topology of the ntb. @@ -426,6 +428,10 @@ struct ntb_dev { spinlock_t ctx_lock; /* block unregister until device is fully released */ struct completion released; + +#ifdef CONFIG_NTB_MSI + struct ntb_msi *msi; +#endif }; #define dev_ntb(__dev) container_of((__dev), struct ntb_dev, dev) @@ -616,7 +622,6 @@ static inline int ntb_port_number(struct ntb_dev *ntb) return ntb->ops->port_number(ntb); } - /** * ntb_peer_port_count() - get the number of peer device ports * @ntb: NTB device context. @@ -654,6 +659,58 @@ static inline int ntb_peer_port_number(struct ntb_dev *ntb, int pidx) } /** + * ntb_logical_port_number() - get the logical port number of the local port + * @ntb: NTB device context. + * + * The Logical Port Number is defined to be a unique number for each + * port starting from zero through to the number of ports minus one. + * This is in contrast to the Port Number where each port can be assigned + * any unique physical number by the hardware. + * + * The logical port number is useful for calculating the resource indexes + * used by peers. + * + * Return: the logical port number or negative value indicating an error + */ +static inline int ntb_logical_port_number(struct ntb_dev *ntb) +{ + int lport = ntb_port_number(ntb); + int pidx; + + if (lport < 0) + return lport; + + for (pidx = 0; pidx < ntb_peer_port_count(ntb); pidx++) + if (lport <= ntb_peer_port_number(ntb, pidx)) + return pidx; + + return pidx; +} + +/** + * ntb_peer_logical_port_number() - get the logical peer port by given index + * @ntb: NTB device context. + * @pidx: Peer port index. + * + * The Logical Port Number is defined to be a unique number for each + * port starting from zero through to the number of ports minus one. + * This is in contrast to the Port Number where each port can be assigned + * any unique physical number by the hardware. + * + * The logical port number is useful for calculating the resource indexes + * used by peers. + * + * Return: the peer's logical port number or negative value indicating an error + */ +static inline int ntb_peer_logical_port_number(struct ntb_dev *ntb, int pidx) +{ + if (ntb_peer_port_number(ntb, pidx) < ntb_port_number(ntb)) + return pidx; + else + return pidx + 1; +} + +/** * ntb_peer_port_idx() - get the peer device port index by given port number * @ntb: NTB device context. * @port: Peer port number. @@ -1506,4 +1563,141 @@ static inline int ntb_peer_msg_write(struct ntb_dev *ntb, int pidx, int midx, return ntb->ops->peer_msg_write(ntb, pidx, midx, msg); } +/** + * ntb_peer_resource_idx() - get a resource index for a given peer idx + * @ntb: NTB device context. + * @pidx: Peer port index. + * + * When constructing a graph of peers, each remote peer must use a different + * resource index (mw, doorbell, etc) to communicate with each other + * peer. + * + * In a two peer system, this function should always return 0 such that + * resource 0 points to the remote peer on both ports. + * + * In a 5 peer system, this function will return the following matrix + * + * pidx \ port 0 1 2 3 4 + * 0 0 0 1 2 3 + * 1 0 1 1 2 3 + * 2 0 1 2 2 3 + * 3 0 1 2 3 3 + * + * For example, if this function is used to program peer's memory + * windows, port 0 will program MW 0 on all it's peers to point to itself. + * port 1 will program MW 0 in port 0 to point to itself and MW 1 on all + * other ports. etc. + * + * For the legacy two host case, ntb_port_number() and ntb_peer_port_number() + * both return zero and therefore this function will always return zero. + * So MW 0 on each host would be programmed to point to the other host. + * + * Return: the resource index to use for that peer. + */ +static inline int ntb_peer_resource_idx(struct ntb_dev *ntb, int pidx) +{ + int local_port, peer_port; + + if (pidx >= ntb_peer_port_count(ntb)) + return -EINVAL; + + local_port = ntb_logical_port_number(ntb); + peer_port = ntb_peer_logical_port_number(ntb, pidx); + + if (peer_port < local_port) + return local_port - 1; + else + return local_port; +} + +/** + * ntb_peer_highest_mw_idx() - get a memory window index for a given peer idx + * using the highest index memory windows first + * + * @ntb: NTB device context. + * @pidx: Peer port index. + * + * Like ntb_peer_resource_idx(), except it returns indexes starting with + * last memory window index. + * + * Return: the resource index to use for that peer. + */ +static inline int ntb_peer_highest_mw_idx(struct ntb_dev *ntb, int pidx) +{ + int ret; + + ret = ntb_peer_resource_idx(ntb, pidx); + if (ret < 0) + return ret; + + return ntb_mw_count(ntb, pidx) - ret - 1; +} + +struct ntb_msi_desc { + u32 addr_offset; + u32 data; +}; + +#ifdef CONFIG_NTB_MSI + +int ntb_msi_init(struct ntb_dev *ntb, void (*desc_changed)(void *ctx)); +int ntb_msi_setup_mws(struct ntb_dev *ntb); +void ntb_msi_clear_mws(struct ntb_dev *ntb); +int ntbm_msi_request_threaded_irq(struct ntb_dev *ntb, irq_handler_t handler, + irq_handler_t thread_fn, + const char *name, void *dev_id, + struct ntb_msi_desc *msi_desc); +void ntbm_msi_free_irq(struct ntb_dev *ntb, unsigned int irq, void *dev_id); +int ntb_msi_peer_trigger(struct ntb_dev *ntb, int peer, + struct ntb_msi_desc *desc); +int ntb_msi_peer_addr(struct ntb_dev *ntb, int peer, + struct ntb_msi_desc *desc, + phys_addr_t *msi_addr); + +#else /* not CONFIG_NTB_MSI */ + +static inline int ntb_msi_init(struct ntb_dev *ntb, + void (*desc_changed)(void *ctx)) +{ + return -EOPNOTSUPP; +} +static inline int ntb_msi_setup_mws(struct ntb_dev *ntb) +{ + return -EOPNOTSUPP; +} +static inline void ntb_msi_clear_mws(struct ntb_dev *ntb) {} +static inline int ntbm_msi_request_threaded_irq(struct ntb_dev *ntb, + irq_handler_t handler, + irq_handler_t thread_fn, + const char *name, void *dev_id, + struct ntb_msi_desc *msi_desc) +{ + return -EOPNOTSUPP; +} +static inline void ntbm_msi_free_irq(struct ntb_dev *ntb, unsigned int irq, + void *dev_id) {} +static inline int ntb_msi_peer_trigger(struct ntb_dev *ntb, int peer, + struct ntb_msi_desc *desc) +{ + return -EOPNOTSUPP; +} +static inline int ntb_msi_peer_addr(struct ntb_dev *ntb, int peer, + struct ntb_msi_desc *desc, + phys_addr_t *msi_addr) +{ + return -EOPNOTSUPP; + +} + +#endif /* CONFIG_NTB_MSI */ + +static inline int ntbm_msi_request_irq(struct ntb_dev *ntb, + irq_handler_t handler, + const char *name, void *dev_id, + struct ntb_msi_desc *msi_desc) +{ + return ntbm_msi_request_threaded_irq(ntb, handler, NULL, name, + dev_id, msi_desc); +} + #endif diff --git a/include/linux/pci.h b/include/linux/pci.h index 2972793e3028..9e700d9f9f28 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1412,6 +1412,15 @@ int pci_set_vga_state(struct pci_dev *pdev, bool decode, #define PCI_IRQ_MSI (1 << 1) /* Allow MSI interrupts */ #define PCI_IRQ_MSIX (1 << 2) /* Allow MSI-X interrupts */ #define PCI_IRQ_AFFINITY (1 << 3) /* Auto-assign affinity */ + +/* + * Virtual interrupts allow for more interrupts to be allocated + * than the device has interrupts for. These are not programmed + * into the device's MSI-X table and must be handled by some + * other driver means. + */ +#define PCI_IRQ_VIRTUAL (1 << 4) + #define PCI_IRQ_ALL_TYPES \ (PCI_IRQ_LEGACY | PCI_IRQ_MSI | PCI_IRQ_MSIX) diff --git a/tools/testing/selftests/ntb/ntb_test.sh b/tools/testing/selftests/ntb/ntb_test.sh index 8a20e03d4cb7..9c60337317c6 100755 --- a/tools/testing/selftests/ntb/ntb_test.sh +++ b/tools/testing/selftests/ntb/ntb_test.sh @@ -78,10 +78,10 @@ set -e function _modprobe() { - modprobe "$@" + modprobe "$@" || return 1 if [[ "$REMOTE_HOST" != "" ]]; then - ssh "$REMOTE_HOST" modprobe "$@" + ssh "$REMOTE_HOST" modprobe "$@" || return 1 fi } @@ -442,6 +442,30 @@ function pingpong_test() echo " Passed" } +function msi_test() +{ + LOC=$1 + REM=$2 + + write_file 1 $LOC/ready + + echo "Running MSI interrupt tests on: $(subdirname $LOC) / $(subdirname $REM)" + + CNT=$(read_file "$LOC/count") + for ((i = 0; i < $CNT; i++)); do + START=$(read_file $REM/../irq${i}_occurrences) + write_file $i $LOC/trigger + END=$(read_file $REM/../irq${i}_occurrences) + + if [[ $(($END - $START)) != 1 ]]; then + echo "MSI did not trigger the interrupt on the remote side!" >&2 + exit 1 + fi + done + + echo " Passed" +} + function perf_test() { USE_DMA=$1 @@ -520,6 +544,29 @@ function ntb_pingpong_tests() _modprobe -r ntb_pingpong } +function ntb_msi_tests() +{ + LOCAL_MSI="$DEBUGFS/ntb_msi_test/$LOCAL_DEV" + REMOTE_MSI="$REMOTE_HOST:$DEBUGFS/ntb_msi_test/$REMOTE_DEV" + + echo "Starting ntb_msi_test tests..." + + if ! _modprobe ntb_msi_test 2> /dev/null; then + echo " Not doing MSI tests seeing the module is not available." + return + fi + + port_test $LOCAL_MSI $REMOTE_MSI + + LOCAL_PEER="$LOCAL_MSI/peer$LOCAL_PIDX" + REMOTE_PEER="$REMOTE_MSI/peer$REMOTE_PIDX" + + msi_test $LOCAL_PEER $REMOTE_PEER + msi_test $REMOTE_PEER $LOCAL_PEER + + _modprobe -r ntb_msi_test +} + function ntb_perf_tests() { LOCAL_PERF="$DEBUGFS/ntb_perf/$LOCAL_DEV" @@ -541,6 +588,7 @@ function cleanup() _modprobe -r ntb_perf 2> /dev/null _modprobe -r ntb_pingpong 2> /dev/null _modprobe -r ntb_transport 2> /dev/null + _modprobe -r ntb_msi_test 2> /dev/null set -e } @@ -577,5 +625,7 @@ ntb_tool_tests echo ntb_pingpong_tests echo +ntb_msi_tests +echo ntb_perf_tests echo |