diff options
Diffstat (limited to 'drivers/infiniband')
29 files changed, 560 insertions, 292 deletions
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index e0ef5fdc361e..4ffc224faa7f 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -204,7 +204,7 @@ static int addr4_resolve(struct sockaddr_in *src_in, /* If the device does ARP internally, return 'done' */ if (rt->dst.dev->flags & IFF_NOARP) { - rdma_copy_addr(addr, rt->dst.dev, NULL); + ret = rdma_copy_addr(addr, rt->dst.dev, NULL); goto put; } diff --git a/drivers/infiniband/core/agent.c b/drivers/infiniband/core/agent.c index 91916a8d5de4..2bc7f5af64f4 100644 --- a/drivers/infiniband/core/agent.c +++ b/drivers/infiniband/core/agent.c @@ -101,7 +101,8 @@ void agent_send_response(struct ib_mad *mad, struct ib_grh *grh, agent = port_priv->agent[qpn]; ah = ib_create_ah_from_wc(agent->qp->pd, wc, grh, port_num); if (IS_ERR(ah)) { - printk(KERN_ERR SPFX "ib_create_ah_from_wc error\n"); + printk(KERN_ERR SPFX "ib_create_ah_from_wc error %ld\n", + PTR_ERR(ah)); return; } diff --git a/drivers/infiniband/hw/amso1100/c2_ae.c b/drivers/infiniband/hw/amso1100/c2_ae.c index 62af74295dbe..24f9e3a90e8e 100644 --- a/drivers/infiniband/hw/amso1100/c2_ae.c +++ b/drivers/infiniband/hw/amso1100/c2_ae.c @@ -157,7 +157,7 @@ void c2_ae_event(struct c2_dev *c2dev, u32 mq_index) int status; /* - * retreive the message + * retrieve the message */ wr = c2_mq_consume(mq); if (!wr) diff --git a/drivers/infiniband/hw/amso1100/c2_qp.c b/drivers/infiniband/hw/amso1100/c2_qp.c index d8f4bb8bf42e..0d7b6f23caff 100644 --- a/drivers/infiniband/hw/amso1100/c2_qp.c +++ b/drivers/infiniband/hw/amso1100/c2_qp.c @@ -612,7 +612,7 @@ void c2_free_qp(struct c2_dev *c2dev, struct c2_qp *qp) c2_unlock_cqs(send_cq, recv_cq); /* - * Destory qp in the rnic... + * Destroy qp in the rnic... */ destroy_qp(c2dev, qp); diff --git a/drivers/infiniband/hw/amso1100/c2_wr.h b/drivers/infiniband/hw/amso1100/c2_wr.h index c65fbdd6e469..8d4b4ca463ca 100644 --- a/drivers/infiniband/hw/amso1100/c2_wr.h +++ b/drivers/infiniband/hw/amso1100/c2_wr.h @@ -131,7 +131,7 @@ enum c2wr_ids { * All the preceding IDs are fixed, and must not change. * You can add new IDs, but must not remove or reorder * any IDs. If you do, YOU will ruin any hope of - * compatability between versions. + * compatibility between versions. */ CCWR_LAST, @@ -242,7 +242,7 @@ enum c2_acf { /* * to fix bug 1815 we define the max size allowable of the * terminate message (per the IETF spec).Refer to the IETF - * protocal specification, section 12.1.6, page 64) + * protocol specification, section 12.1.6, page 64) * The message is prefixed by 20 types of DDP info. * * Then the message has 6 bytes for the terminate control diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c index 47db4bf34628..58c0e417bc30 100644 --- a/drivers/infiniband/hw/ipath/ipath_driver.c +++ b/drivers/infiniband/hw/ipath/ipath_driver.c @@ -2392,7 +2392,7 @@ void ipath_shutdown_device(struct ipath_devdata *dd) /* * clear SerdesEnable and turn the leds off; do this here because * we are unloading, so don't count on interrupts to move along - * Turn the LEDs off explictly for the same reason. + * Turn the LEDs off explicitly for the same reason. */ dd->ipath_f_quiet_serdes(dd); diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c index 6d4b29c4cd89..ee79a2d97b14 100644 --- a/drivers/infiniband/hw/ipath/ipath_file_ops.c +++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c @@ -1972,7 +1972,7 @@ static int ipath_do_user_init(struct file *fp, * 0 to 1. So for those chips, we turn it off and then back on. * This will (very briefly) affect any other open ports, but the * duration is very short, and therefore isn't an issue. We - * explictly set the in-memory tail copy to 0 beforehand, so we + * explicitly set the in-memory tail copy to 0 beforehand, so we * don't have to wait to be sure the DMA update has happened * (chip resets head/tail to 0 on transition to enable). */ diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c index fef0f4201257..7c1eebe8c7c9 100644 --- a/drivers/infiniband/hw/ipath/ipath_init_chip.c +++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c @@ -335,7 +335,7 @@ done: * @dd: the infinipath device * * sanity check at least some of the values after reset, and - * ensure no receive or transmit (explictly, in case reset + * ensure no receive or transmit (explicitly, in case reset * failed */ static int init_chip_reset(struct ipath_devdata *dd) diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c index 7420715256a9..e8a2a915251e 100644 --- a/drivers/infiniband/hw/ipath/ipath_ud.c +++ b/drivers/infiniband/hw/ipath/ipath_ud.c @@ -86,7 +86,7 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe) } /* - * A GRH is expected to preceed the data even if not + * A GRH is expected to precede the data even if not * present on the wire. */ length = swqe->length; @@ -515,7 +515,7 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, } /* - * A GRH is expected to preceed the data even if not + * A GRH is expected to precede the data even if not * present on the wire. */ wc.byte_len = tlen + sizeof(struct ib_grh); diff --git a/drivers/infiniband/hw/ipath/ipath_user_sdma.c b/drivers/infiniband/hw/ipath/ipath_user_sdma.c index be78f6643c06..f5cb13b21445 100644 --- a/drivers/infiniband/hw/ipath/ipath_user_sdma.c +++ b/drivers/infiniband/hw/ipath/ipath_user_sdma.c @@ -236,7 +236,7 @@ static int ipath_user_sdma_num_pages(const struct iovec *iov) return 1 + ((epage - spage) >> PAGE_SHIFT); } -/* truncate length to page boundry */ +/* truncate length to page boundary */ static int ipath_user_sdma_page_length(unsigned long addr, unsigned long len) { const unsigned long offset = addr & ~PAGE_MASK; diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index c7a6213c6996..fbe1973f77b0 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -625,7 +625,7 @@ static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw, !!(mqp->flags & MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK), - MLX4_PROTOCOL_IB); + MLX4_PROT_IB_IPV6); if (err) return err; @@ -636,7 +636,7 @@ static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) return 0; err_add: - mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw, MLX4_PROTOCOL_IB); + mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw, MLX4_PROT_IB_IPV6); return err; } @@ -666,7 +666,7 @@ static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) struct mlx4_ib_gid_entry *ge; err = mlx4_multicast_detach(mdev->dev, - &mqp->mqp, gid->raw, MLX4_PROTOCOL_IB); + &mqp->mqp, gid->raw, MLX4_PROT_IB_IPV6); if (err) return err; @@ -721,7 +721,6 @@ static int init_node_data(struct mlx4_ib_dev *dev) if (err) goto out; - dev->dev->rev_id = be32_to_cpup((__be32 *) (out_mad->data + 32)); memcpy(&dev->ib_dev.node_guid, out_mad->data + 12, 8); out: @@ -954,7 +953,7 @@ static int mlx4_ib_netdev_event(struct notifier_block *this, unsigned long event mlx4_foreach_ib_transport_port(port, ibdev->dev) { oldnd = iboe->netdevs[port - 1]; iboe->netdevs[port - 1] = - mlx4_get_protocol_dev(ibdev->dev, MLX4_PROTOCOL_EN, port); + mlx4_get_protocol_dev(ibdev->dev, MLX4_PROT_ETH, port); if (oldnd != iboe->netdevs[port - 1]) { if (iboe->netdevs[port - 1]) netdev_added(ibdev, port); @@ -1207,7 +1206,7 @@ static struct mlx4_interface mlx4_ib_interface = { .add = mlx4_ib_add, .remove = mlx4_ib_remove, .event = mlx4_ib_event, - .protocol = MLX4_PROTOCOL_IB + .protocol = MLX4_PROT_IB_IPV6 }; static int __init mlx4_ib_init(void) diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c index 8a40cd539ab1..f24b79b805f2 100644 --- a/drivers/infiniband/hw/mthca/mthca_main.c +++ b/drivers/infiniband/hw/mthca/mthca_main.c @@ -1043,6 +1043,9 @@ static int __mthca_init_one(struct pci_dev *pdev, int hca_type) } } + /* We can handle large RDMA requests, so allow larger segments. */ + dma_set_max_seg_size(&pdev->dev, 1024 * 1024 * 1024); + mdev = (struct mthca_dev *) ib_alloc_device(sizeof *mdev); if (!mdev) { dev_err(&pdev->dev, "Device struct alloc failed, " diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c index 3d7f3664b67b..13de1192927c 100644 --- a/drivers/infiniband/hw/nes/nes.c +++ b/drivers/infiniband/hw/nes/nes.c @@ -694,7 +694,7 @@ static int __devinit nes_probe(struct pci_dev *pcidev, const struct pci_device_i nesdev->netdev_count++; nesdev->nesadapter->netdev_count++; - printk(KERN_ERR PFX "%s: NetEffect RNIC driver successfully loaded.\n", + printk(KERN_INFO PFX "%s: NetEffect RNIC driver successfully loaded.\n", pci_name(pcidev)); return 0; diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index ef3291551bc6..33c7eedaba6c 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -1116,7 +1116,7 @@ static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip, int arpi return rc; } - if (netif_is_bond_slave(netdev)) + if (netif_is_bond_slave(nesvnic->netdev)) netdev = nesvnic->netdev->master; else netdev = nesvnic->netdev; @@ -1397,7 +1397,7 @@ static void handle_fin_pkt(struct nes_cm_node *cm_node) cleanup_retrans_entry(cm_node); cm_node->state = NES_CM_STATE_CLOSING; send_ack(cm_node, NULL); - /* Wait for ACK as this is simultanous close.. + /* Wait for ACK as this is simultaneous close.. * After we receive ACK, do not send anything.. * Just rm the node.. Done.. */ break; diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c index 08c194861af5..10d0a5ec9add 100644 --- a/drivers/infiniband/hw/nes/nes_hw.c +++ b/drivers/infiniband/hw/nes/nes_hw.c @@ -80,7 +80,7 @@ static void nes_terminate_start_timer(struct nes_qp *nesqp); #ifdef CONFIG_INFINIBAND_NES_DEBUG static unsigned char *nes_iwarp_state_str[] = { - "Non-Existant", + "Non-Existent", "Idle", "RTS", "Closing", @@ -91,7 +91,7 @@ static unsigned char *nes_iwarp_state_str[] = { }; static unsigned char *nes_tcp_state_str[] = { - "Non-Existant", + "Non-Existent", "Closed", "Listen", "SYN Sent", diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c index 2c9c1933bbe3..e96b8fb5d44c 100644 --- a/drivers/infiniband/hw/nes/nes_nic.c +++ b/drivers/infiniband/hw/nes/nes_nic.c @@ -902,7 +902,7 @@ static void nes_netdev_set_multicast_list(struct net_device *netdev) nes_write_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL, nic_active); } - nes_debug(NES_DBG_NIC_RX, "Number of MC entries = %d, Promiscous = %d, All Multicast = %d.\n", + nes_debug(NES_DBG_NIC_RX, "Number of MC entries = %d, Promiscuous = %d, All Multicast = %d.\n", mc_count, !!(netdev->flags & IFF_PROMISC), !!(netdev->flags & IFF_ALLMULTI)); if (!mc_all_on) { diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h index 73225eee3cc6..769a1d9da4b7 100644 --- a/drivers/infiniband/hw/qib/qib.h +++ b/drivers/infiniband/hw/qib/qib.h @@ -653,7 +653,7 @@ struct diag_observer_list_elt; /* device data struct now contains only "general per-device" info. * fields related to a physical IB port are in a qib_pportdata struct, - * described above) while fields only used by a particualr chip-type are in + * described above) while fields only used by a particular chip-type are in * a qib_chipdata struct, whose contents are opaque to this file. */ struct qib_devdata { diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c index 75bfad16c114..406fca50d036 100644 --- a/drivers/infiniband/hw/qib/qib_file_ops.c +++ b/drivers/infiniband/hw/qib/qib_file_ops.c @@ -1539,7 +1539,7 @@ done_chk_sdma: /* * If process has NOT already set it's affinity, select and - * reserve a processor for it, as a rendevous for all + * reserve a processor for it, as a rendezvous for all * users of the driver. If they don't actually later * set affinity to this cpu, or set it to some other cpu, * it just means that sooner or later we don't recommend @@ -1657,7 +1657,7 @@ static int qib_do_user_init(struct file *fp, * 0 to 1. So for those chips, we turn it off and then back on. * This will (very briefly) affect any other open ctxts, but the * duration is very short, and therefore isn't an issue. We - * explictly set the in-memory tail copy to 0 beforehand, so we + * explicitly set the in-memory tail copy to 0 beforehand, so we * don't have to wait to be sure the DMA update has happened * (chip resets head/tail to 0 on transition to enable). */ diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c index 774dea897e9c..7de4b7ebffc5 100644 --- a/drivers/infiniband/hw/qib/qib_iba6120.c +++ b/drivers/infiniband/hw/qib/qib_iba6120.c @@ -1799,7 +1799,7 @@ static int qib_6120_setup_reset(struct qib_devdata *dd) /* * Keep chip from being accessed until we are ready. Use * writeq() directly, to allow the write even though QIB_PRESENT - * isnt' set. + * isn't' set. */ dd->flags &= ~(QIB_INITTED | QIB_PRESENT); dd->int_counter = 0; /* so we check interrupts work again */ @@ -2171,7 +2171,7 @@ static void rcvctrl_6120_mod(struct qib_pportdata *ppd, unsigned int op, * Init the context registers also; if we were * disabled, tail and head should both be zero * already from the enable, but since we don't - * know, we have to do it explictly. + * know, we have to do it explicitly. */ val = qib_read_ureg32(dd, ur_rcvegrindextail, ctxt); qib_write_ureg(dd, ur_rcvegrindexhead, val, ctxt); diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c index de799f17cb9e..74fe0360bec7 100644 --- a/drivers/infiniband/hw/qib/qib_iba7220.c +++ b/drivers/infiniband/hw/qib/qib_iba7220.c @@ -2111,7 +2111,7 @@ static int qib_setup_7220_reset(struct qib_devdata *dd) /* * Keep chip from being accessed until we are ready. Use * writeq() directly, to allow the write even though QIB_PRESENT - * isnt' set. + * isn't' set. */ dd->flags &= ~(QIB_INITTED | QIB_PRESENT); dd->int_counter = 0; /* so we check interrupts work again */ @@ -2479,7 +2479,7 @@ static int qib_7220_set_ib_cfg(struct qib_pportdata *ppd, int which, u32 val) * we command the link down. As with width, only write the * actual register if the link is currently down, otherwise * takes effect on next link change. Since setting is being - * explictly requested (via MAD or sysfs), clear autoneg + * explicitly requested (via MAD or sysfs), clear autoneg * failure status if speed autoneg is enabled. */ ppd->link_speed_enabled = val; @@ -2778,7 +2778,7 @@ static void rcvctrl_7220_mod(struct qib_pportdata *ppd, unsigned int op, * Init the context registers also; if we were * disabled, tail and head should both be zero * already from the enable, but since we don't - * know, we have to do it explictly. + * know, we have to do it explicitly. */ val = qib_read_ureg32(dd, ur_rcvegrindextail, ctxt); qib_write_ureg(dd, ur_rcvegrindexhead, val, ctxt); diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index 4a2d21e15a70..55de3cf3441c 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -3299,7 +3299,7 @@ static int qib_do_7322_reset(struct qib_devdata *dd) /* * Keep chip from being accessed until we are ready. Use * writeq() directly, to allow the write even though QIB_PRESENT - * isnt' set. + * isn't' set. */ dd->flags &= ~(QIB_INITTED | QIB_PRESENT | QIB_BADINTR); dd->flags |= QIB_DOING_RESET; @@ -3727,7 +3727,7 @@ static int qib_7322_set_ib_cfg(struct qib_pportdata *ppd, int which, u32 val) /* * As with width, only write the actual register if the * link is currently down, otherwise takes effect on next - * link change. Since setting is being explictly requested + * link change. Since setting is being explicitly requested * (via MAD or sysfs), clear autoneg failure status if speed * autoneg is enabled. */ @@ -4163,7 +4163,7 @@ static void rcvctrl_7322_mod(struct qib_pportdata *ppd, unsigned int op, * Init the context registers also; if we were * disabled, tail and head should both be zero * already from the enable, but since we don't - * know, we have to do it explictly. + * know, we have to do it explicitly. */ val = qib_read_ureg32(dd, ur_rcvegrindextail, ctxt); qib_write_ureg(dd, ur_rcvegrindexhead, val, ctxt); @@ -7483,7 +7483,7 @@ static int serdes_7322_init_new(struct qib_pportdata *ppd) /* Baseline Wander Correction Gain [13:4-0] (leave as default) */ /* Baseline Wander Correction Gain [3:7-5] (leave as default) */ /* Data Rate Select [5:7-6] (leave as default) */ - /* RX Parralel Word Width [3:10-8] (leave as default) */ + /* RX Parallel Word Width [3:10-8] (leave as default) */ /* RX REST */ /* Single- or Multi-channel reset */ diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c index ffefb78b8949..a01f3fce8eb3 100644 --- a/drivers/infiniband/hw/qib/qib_init.c +++ b/drivers/infiniband/hw/qib/qib_init.c @@ -346,7 +346,7 @@ done: * @dd: the qlogic_ib device * * sanity check at least some of the values after reset, and - * ensure no receive or transmit (explictly, in case reset + * ensure no receive or transmit (explicitly, in case reset * failed */ static int init_after_reset(struct qib_devdata *dd) diff --git a/drivers/infiniband/hw/qib/qib_mad.h b/drivers/infiniband/hw/qib/qib_mad.h index 147aff9117d7..7840ab593bcf 100644 --- a/drivers/infiniband/hw/qib/qib_mad.h +++ b/drivers/infiniband/hw/qib/qib_mad.h @@ -73,7 +73,7 @@ struct ib_mad_notice_attr { struct { __be16 reserved; - __be16 lid; /* LID where change occured */ + __be16 lid; /* LID where change occurred */ u8 reserved2; u8 local_changes; /* low bit - local changes */ __be32 new_cap_mask; /* new capability mask */ diff --git a/drivers/infiniband/hw/qib/qib_twsi.c b/drivers/infiniband/hw/qib/qib_twsi.c index 6f31ca5039db..ddde72e11edb 100644 --- a/drivers/infiniband/hw/qib/qib_twsi.c +++ b/drivers/infiniband/hw/qib/qib_twsi.c @@ -41,7 +41,7 @@ * QLogic_IB "Two Wire Serial Interface" driver. * Originally written for a not-quite-i2c serial eeprom, which is * still used on some supported boards. Later boards have added a - * variety of other uses, most board-specific, so teh bit-boffing + * variety of other uses, most board-specific, so the bit-boffing * part has been split off to this file, while the other parts * have been moved to chip-specific files. * diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c index 4a51fd1e9cb7..828609fa4d28 100644 --- a/drivers/infiniband/hw/qib/qib_ud.c +++ b/drivers/infiniband/hw/qib/qib_ud.c @@ -116,7 +116,7 @@ static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe) } /* - * A GRH is expected to preceed the data even if not + * A GRH is expected to precede the data even if not * present on the wire. */ length = swqe->length; @@ -520,7 +520,7 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr, goto drop; /* - * A GRH is expected to preceed the data even if not + * A GRH is expected to precede the data even if not * present on the wire. */ wc.byte_len = tlen + sizeof(struct ib_grh); diff --git a/drivers/infiniband/hw/qib/qib_user_sdma.c b/drivers/infiniband/hw/qib/qib_user_sdma.c index 66208bcd7c13..82442085cbe6 100644 --- a/drivers/infiniband/hw/qib/qib_user_sdma.c +++ b/drivers/infiniband/hw/qib/qib_user_sdma.c @@ -239,7 +239,7 @@ static int qib_user_sdma_num_pages(const struct iovec *iov) } /* - * Truncate length to page boundry. + * Truncate length to page boundary. */ static int qib_user_sdma_page_length(unsigned long addr, unsigned long len) { diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index f1df01567bb6..2f02ab0ccc1e 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -91,7 +91,7 @@ #define SIZE_4K (1UL << SHIFT_4K) #define MASK_4K (~(SIZE_4K-1)) - /* support upto 512KB in one RDMA */ + /* support up to 512KB in one RDMA */ #define ISCSI_ISER_SG_TABLESIZE (0x80000 >> SHIFT_4K) #define ISER_DEF_CMD_PER_LUN 128 diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 83664ed2804f..376d640487d2 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -59,25 +59,31 @@ MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator " "v" DRV_VERSION " (" DRV_RELDATE ")"); MODULE_LICENSE("Dual BSD/GPL"); -static int srp_sg_tablesize = SRP_DEF_SG_TABLESIZE; -static int srp_max_iu_len; +static unsigned int srp_sg_tablesize; +static unsigned int cmd_sg_entries; +static unsigned int indirect_sg_entries; +static bool allow_ext_sg; +static int topspin_workarounds = 1; -module_param(srp_sg_tablesize, int, 0444); -MODULE_PARM_DESC(srp_sg_tablesize, - "Max number of gather/scatter entries per I/O (default is 12, max 255)"); +module_param(srp_sg_tablesize, uint, 0444); +MODULE_PARM_DESC(srp_sg_tablesize, "Deprecated name for cmd_sg_entries"); -static int topspin_workarounds = 1; +module_param(cmd_sg_entries, uint, 0444); +MODULE_PARM_DESC(cmd_sg_entries, + "Default number of gather/scatter entries in the SRP command (default is 12, max 255)"); + +module_param(indirect_sg_entries, uint, 0444); +MODULE_PARM_DESC(indirect_sg_entries, + "Default max number of gather/scatter entries (default is 12, max is " __stringify(SCSI_MAX_SG_CHAIN_SEGMENTS) ")"); + +module_param(allow_ext_sg, bool, 0444); +MODULE_PARM_DESC(allow_ext_sg, + "Default behavior when there are more than cmd_sg_entries S/G entries after mapping; fails the request when false (default false)"); module_param(topspin_workarounds, int, 0444); MODULE_PARM_DESC(topspin_workarounds, "Enable workarounds for Topspin/Cisco SRP target bugs if != 0"); -static int mellanox_workarounds = 1; - -module_param(mellanox_workarounds, int, 0444); -MODULE_PARM_DESC(mellanox_workarounds, - "Enable workarounds for Mellanox SRP target bugs if != 0"); - static void srp_add_one(struct ib_device *device); static void srp_remove_one(struct ib_device *device); static void srp_recv_completion(struct ib_cq *cq, void *target_ptr); @@ -114,14 +120,6 @@ static int srp_target_is_topspin(struct srp_target_port *target) !memcmp(&target->ioc_guid, cisco_oui, sizeof cisco_oui)); } -static int srp_target_is_mellanox(struct srp_target_port *target) -{ - static const u8 mellanox_oui[3] = { 0x00, 0x02, 0xc9 }; - - return mellanox_workarounds && - !memcmp(&target->ioc_guid, mellanox_oui, sizeof mellanox_oui); -} - static struct srp_iu *srp_alloc_iu(struct srp_host *host, size_t size, gfp_t gfp_mask, enum dma_data_direction direction) @@ -378,7 +376,7 @@ static int srp_send_req(struct srp_target_port *target) req->priv.opcode = SRP_LOGIN_REQ; req->priv.tag = 0; - req->priv.req_it_iu_len = cpu_to_be32(srp_max_iu_len); + req->priv.req_it_iu_len = cpu_to_be32(target->max_iu_len); req->priv.req_buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT | SRP_BUF_FORMAT_INDIRECT); /* @@ -456,6 +454,24 @@ static bool srp_change_state(struct srp_target_port *target, return changed; } +static void srp_free_req_data(struct srp_target_port *target) +{ + struct ib_device *ibdev = target->srp_host->srp_dev->dev; + struct srp_request *req; + int i; + + for (i = 0, req = target->req_ring; i < SRP_CMD_SQ_SIZE; ++i, ++req) { + kfree(req->fmr_list); + kfree(req->map_page); + if (req->indirect_dma_addr) { + ib_dma_unmap_single(ibdev, req->indirect_dma_addr, + target->indirect_size, + DMA_TO_DEVICE); + } + kfree(req->indirect_desc); + } +} + static void srp_remove_work(struct work_struct *work) { struct srp_target_port *target = @@ -472,6 +488,7 @@ static void srp_remove_work(struct work_struct *work) scsi_remove_host(target->scsi_host); ib_destroy_cm_id(target->cm_id); srp_free_target_ib(target); + srp_free_req_data(target); scsi_host_put(target->scsi_host); } @@ -535,18 +552,20 @@ static void srp_unmap_data(struct scsi_cmnd *scmnd, struct srp_target_port *target, struct srp_request *req) { + struct ib_device *ibdev = target->srp_host->srp_dev->dev; + struct ib_pool_fmr **pfmr; + if (!scsi_sglist(scmnd) || (scmnd->sc_data_direction != DMA_TO_DEVICE && scmnd->sc_data_direction != DMA_FROM_DEVICE)) return; - if (req->fmr) { - ib_fmr_pool_unmap(req->fmr); - req->fmr = NULL; - } + pfmr = req->fmr_list; + while (req->nfmr--) + ib_fmr_pool_unmap(*pfmr++); - ib_dma_unmap_sg(target->srp_host->srp_dev->dev, scsi_sglist(scmnd), - scsi_sg_count(scmnd), scmnd->sc_data_direction); + ib_dma_unmap_sg(ibdev, scsi_sglist(scmnd), scsi_sg_count(scmnd), + scmnd->sc_data_direction); } static void srp_remove_req(struct srp_target_port *target, @@ -645,96 +664,151 @@ err: return ret; } -static int srp_map_fmr(struct srp_target_port *target, struct scatterlist *scat, - int sg_cnt, struct srp_request *req, - struct srp_direct_buf *buf) +static void srp_map_desc(struct srp_map_state *state, dma_addr_t dma_addr, + unsigned int dma_len, u32 rkey) { - u64 io_addr = 0; - u64 *dma_pages; - u32 len; - int page_cnt; - int i, j; - int ret; - struct srp_device *dev = target->srp_host->srp_dev; - struct ib_device *ibdev = dev->dev; - struct scatterlist *sg; + struct srp_direct_buf *desc = state->desc; - if (!dev->fmr_pool) - return -ENODEV; + desc->va = cpu_to_be64(dma_addr); + desc->key = cpu_to_be32(rkey); + desc->len = cpu_to_be32(dma_len); - if (srp_target_is_mellanox(target) && - (ib_sg_dma_address(ibdev, &scat[0]) & ~dev->fmr_page_mask)) - return -EINVAL; + state->total_len += dma_len; + state->desc++; + state->ndesc++; +} - len = page_cnt = 0; - scsi_for_each_sg(req->scmnd, sg, sg_cnt, i) { - unsigned int dma_len = ib_sg_dma_len(ibdev, sg); +static int srp_map_finish_fmr(struct srp_map_state *state, + struct srp_target_port *target) +{ + struct srp_device *dev = target->srp_host->srp_dev; + struct ib_pool_fmr *fmr; + u64 io_addr = 0; - if (ib_sg_dma_address(ibdev, sg) & ~dev->fmr_page_mask) { - if (i > 0) - return -EINVAL; - else - ++page_cnt; - } - if ((ib_sg_dma_address(ibdev, sg) + dma_len) & - ~dev->fmr_page_mask) { - if (i < sg_cnt - 1) - return -EINVAL; - else - ++page_cnt; - } + if (!state->npages) + return 0; - len += dma_len; + if (state->npages == 1) { + srp_map_desc(state, state->base_dma_addr, state->fmr_len, + target->rkey); + state->npages = state->fmr_len = 0; + return 0; } - page_cnt += len >> dev->fmr_page_shift; - if (page_cnt > SRP_FMR_SIZE) - return -ENOMEM; + fmr = ib_fmr_pool_map_phys(dev->fmr_pool, state->pages, + state->npages, io_addr); + if (IS_ERR(fmr)) + return PTR_ERR(fmr); - dma_pages = kmalloc(sizeof (u64) * page_cnt, GFP_ATOMIC); - if (!dma_pages) - return -ENOMEM; + *state->next_fmr++ = fmr; + state->nfmr++; - page_cnt = 0; - scsi_for_each_sg(req->scmnd, sg, sg_cnt, i) { - unsigned int dma_len = ib_sg_dma_len(ibdev, sg); + srp_map_desc(state, 0, state->fmr_len, fmr->fmr->rkey); + state->npages = state->fmr_len = 0; + return 0; +} - for (j = 0; j < dma_len; j += dev->fmr_page_size) - dma_pages[page_cnt++] = - (ib_sg_dma_address(ibdev, sg) & - dev->fmr_page_mask) + j; +static void srp_map_update_start(struct srp_map_state *state, + struct scatterlist *sg, int sg_index, + dma_addr_t dma_addr) +{ + state->unmapped_sg = sg; + state->unmapped_index = sg_index; + state->unmapped_addr = dma_addr; +} + +static int srp_map_sg_entry(struct srp_map_state *state, + struct srp_target_port *target, + struct scatterlist *sg, int sg_index, + int use_fmr) +{ + struct srp_device *dev = target->srp_host->srp_dev; + struct ib_device *ibdev = dev->dev; + dma_addr_t dma_addr = ib_sg_dma_address(ibdev, sg); + unsigned int dma_len = ib_sg_dma_len(ibdev, sg); + unsigned int len; + int ret; + + if (!dma_len) + return 0; + + if (use_fmr == SRP_MAP_NO_FMR) { + /* Once we're in direct map mode for a request, we don't + * go back to FMR mode, so no need to update anything + * other than the descriptor. + */ + srp_map_desc(state, dma_addr, dma_len, target->rkey); + return 0; } - req->fmr = ib_fmr_pool_map_phys(dev->fmr_pool, - dma_pages, page_cnt, io_addr); - if (IS_ERR(req->fmr)) { - ret = PTR_ERR(req->fmr); - req->fmr = NULL; - goto out; + /* If we start at an offset into the FMR page, don't merge into + * the current FMR. Finish it out, and use the kernel's MR for this + * sg entry. This is to avoid potential bugs on some SRP targets + * that were never quite defined, but went away when the initiator + * avoided using FMR on such page fragments. + */ + if (dma_addr & ~dev->fmr_page_mask || dma_len > dev->fmr_max_size) { + ret = srp_map_finish_fmr(state, target); + if (ret) + return ret; + + srp_map_desc(state, dma_addr, dma_len, target->rkey); + srp_map_update_start(state, NULL, 0, 0); + return 0; } - buf->va = cpu_to_be64(ib_sg_dma_address(ibdev, &scat[0]) & - ~dev->fmr_page_mask); - buf->key = cpu_to_be32(req->fmr->fmr->rkey); - buf->len = cpu_to_be32(len); + /* If this is the first sg to go into the FMR, save our position. + * We need to know the first unmapped entry, its index, and the + * first unmapped address within that entry to be able to restart + * mapping after an error. + */ + if (!state->unmapped_sg) + srp_map_update_start(state, sg, sg_index, dma_addr); - ret = 0; + while (dma_len) { + if (state->npages == SRP_FMR_SIZE) { + ret = srp_map_finish_fmr(state, target); + if (ret) + return ret; -out: - kfree(dma_pages); + srp_map_update_start(state, sg, sg_index, dma_addr); + } + + len = min_t(unsigned int, dma_len, dev->fmr_page_size); + if (!state->npages) + state->base_dma_addr = dma_addr; + state->pages[state->npages++] = dma_addr; + state->fmr_len += len; + dma_addr += len; + dma_len -= len; + } + + /* If the last entry of the FMR wasn't a full page, then we need to + * close it out and start a new one -- we can only merge at page + * boundries. + */ + ret = 0; + if (len != dev->fmr_page_size) { + ret = srp_map_finish_fmr(state, target); + if (!ret) + srp_map_update_start(state, NULL, 0, 0); + } return ret; } static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target, struct srp_request *req) { - struct scatterlist *scat; + struct scatterlist *scat, *sg; struct srp_cmd *cmd = req->cmd->buf; - int len, nents, count; - u8 fmt = SRP_DATA_DESC_DIRECT; + int i, len, nents, count, use_fmr; struct srp_device *dev; struct ib_device *ibdev; + struct srp_map_state state; + struct srp_indirect_buf *indirect_hdr; + u32 table_len; + u8 fmt; if (!scsi_sglist(scmnd) || scmnd->sc_data_direction == DMA_NONE) return sizeof (struct srp_cmd); @@ -754,6 +828,8 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target, ibdev = dev->dev; count = ib_dma_map_sg(ibdev, scat, nents, scmnd->sc_data_direction); + if (unlikely(count == 0)) + return -EIO; fmt = SRP_DATA_DESC_DIRECT; len = sizeof (struct srp_cmd) + sizeof (struct srp_direct_buf); @@ -770,49 +846,99 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target, buf->va = cpu_to_be64(ib_sg_dma_address(ibdev, scat)); buf->key = cpu_to_be32(target->rkey); buf->len = cpu_to_be32(ib_sg_dma_len(ibdev, scat)); - } else if (srp_map_fmr(target, scat, count, req, - (void *) cmd->add_data)) { - /* - * FMR mapping failed, and the scatterlist has more - * than one entry. Generate an indirect memory - * descriptor. - */ - struct srp_indirect_buf *buf = (void *) cmd->add_data; - struct scatterlist *sg; - u32 datalen = 0; - int i; - - fmt = SRP_DATA_DESC_INDIRECT; - len = sizeof (struct srp_cmd) + - sizeof (struct srp_indirect_buf) + - count * sizeof (struct srp_direct_buf); - - scsi_for_each_sg(scmnd, sg, count, i) { - unsigned int dma_len = ib_sg_dma_len(ibdev, sg); - - buf->desc_list[i].va = - cpu_to_be64(ib_sg_dma_address(ibdev, sg)); - buf->desc_list[i].key = - cpu_to_be32(target->rkey); - buf->desc_list[i].len = cpu_to_be32(dma_len); - datalen += dma_len; + + req->nfmr = 0; + goto map_complete; + } + + /* We have more than one scatter/gather entry, so build our indirect + * descriptor table, trying to merge as many entries with FMR as we + * can. + */ + indirect_hdr = (void *) cmd->add_data; + + ib_dma_sync_single_for_cpu(ibdev, req->indirect_dma_addr, + target->indirect_size, DMA_TO_DEVICE); + + memset(&state, 0, sizeof(state)); + state.desc = req->indirect_desc; + state.pages = req->map_page; + state.next_fmr = req->fmr_list; + + use_fmr = dev->fmr_pool ? SRP_MAP_ALLOW_FMR : SRP_MAP_NO_FMR; + + for_each_sg(scat, sg, count, i) { + if (srp_map_sg_entry(&state, target, sg, i, use_fmr)) { + /* FMR mapping failed, so backtrack to the first + * unmapped entry and continue on without using FMR. + */ + dma_addr_t dma_addr; + unsigned int dma_len; + +backtrack: + sg = state.unmapped_sg; + i = state.unmapped_index; + + dma_addr = ib_sg_dma_address(ibdev, sg); + dma_len = ib_sg_dma_len(ibdev, sg); + dma_len -= (state.unmapped_addr - dma_addr); + dma_addr = state.unmapped_addr; + use_fmr = SRP_MAP_NO_FMR; + srp_map_desc(&state, dma_addr, dma_len, target->rkey); } + } - if (scmnd->sc_data_direction == DMA_TO_DEVICE) - cmd->data_out_desc_cnt = count; - else - cmd->data_in_desc_cnt = count; + if (use_fmr == SRP_MAP_ALLOW_FMR && srp_map_finish_fmr(&state, target)) + goto backtrack; - buf->table_desc.va = - cpu_to_be64(req->cmd->dma + sizeof *cmd + sizeof *buf); - buf->table_desc.key = - cpu_to_be32(target->rkey); - buf->table_desc.len = - cpu_to_be32(count * sizeof (struct srp_direct_buf)); + /* We've mapped the request, now pull as much of the indirect + * descriptor table as we can into the command buffer. If this + * target is not using an external indirect table, we are + * guaranteed to fit into the command, as the SCSI layer won't + * give us more S/G entries than we allow. + */ + req->nfmr = state.nfmr; + if (state.ndesc == 1) { + /* FMR mapping was able to collapse this to one entry, + * so use a direct descriptor. + */ + struct srp_direct_buf *buf = (void *) cmd->add_data; - buf->len = cpu_to_be32(datalen); + *buf = req->indirect_desc[0]; + goto map_complete; + } + + if (unlikely(target->cmd_sg_cnt < state.ndesc && + !target->allow_ext_sg)) { + shost_printk(KERN_ERR, target->scsi_host, + "Could not fit S/G list into SRP_CMD\n"); + return -EIO; } + count = min(state.ndesc, target->cmd_sg_cnt); + table_len = state.ndesc * sizeof (struct srp_direct_buf); + + fmt = SRP_DATA_DESC_INDIRECT; + len = sizeof(struct srp_cmd) + sizeof (struct srp_indirect_buf); + len += count * sizeof (struct srp_direct_buf); + + memcpy(indirect_hdr->desc_list, req->indirect_desc, + count * sizeof (struct srp_direct_buf)); + + indirect_hdr->table_desc.va = cpu_to_be64(req->indirect_dma_addr); + indirect_hdr->table_desc.key = cpu_to_be32(target->rkey); + indirect_hdr->table_desc.len = cpu_to_be32(table_len); + indirect_hdr->len = cpu_to_be32(state.total_len); + + if (scmnd->sc_data_direction == DMA_TO_DEVICE) + cmd->data_out_desc_cnt = count; + else + cmd->data_in_desc_cnt = count; + + ib_dma_sync_single_for_device(ibdev, req->indirect_dma_addr, table_len, + DMA_TO_DEVICE); + +map_complete: if (scmnd->sc_data_direction == DMA_TO_DEVICE) cmd->buf_fmt = fmt << 4; else @@ -1140,7 +1266,7 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd) spin_unlock_irqrestore(&target->lock, flags); dev = target->srp_host->srp_dev->dev; - ib_dma_sync_single_for_cpu(dev, iu->dma, srp_max_iu_len, + ib_dma_sync_single_for_cpu(dev, iu->dma, target->max_iu_len, DMA_TO_DEVICE); scmnd->result = 0; @@ -1164,7 +1290,7 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd) goto err_iu; } - ib_dma_sync_single_for_device(dev, iu->dma, srp_max_iu_len, + ib_dma_sync_single_for_device(dev, iu->dma, target->max_iu_len, DMA_TO_DEVICE); if (srp_post_send(target, iu, len)) { @@ -1204,7 +1330,7 @@ static int srp_alloc_iu_bufs(struct srp_target_port *target) for (i = 0; i < SRP_SQ_SIZE; ++i) { target->tx_ring[i] = srp_alloc_iu(target->srp_host, - srp_max_iu_len, + target->max_iu_len, GFP_KERNEL, DMA_TO_DEVICE); if (!target->tx_ring[i]) goto err; @@ -1228,6 +1354,78 @@ err: return -ENOMEM; } +static void srp_cm_rep_handler(struct ib_cm_id *cm_id, + struct srp_login_rsp *lrsp, + struct srp_target_port *target) +{ + struct ib_qp_attr *qp_attr = NULL; + int attr_mask = 0; + int ret; + int i; + + if (lrsp->opcode == SRP_LOGIN_RSP) { + target->max_ti_iu_len = be32_to_cpu(lrsp->max_ti_iu_len); + target->req_lim = be32_to_cpu(lrsp->req_lim_delta); + + /* + * Reserve credits for task management so we don't + * bounce requests back to the SCSI mid-layer. + */ + target->scsi_host->can_queue + = min(target->req_lim - SRP_TSK_MGMT_SQ_SIZE, + target->scsi_host->can_queue); + } else { + shost_printk(KERN_WARNING, target->scsi_host, + PFX "Unhandled RSP opcode %#x\n", lrsp->opcode); + ret = -ECONNRESET; + goto error; + } + + if (!target->rx_ring[0]) { + ret = srp_alloc_iu_bufs(target); + if (ret) + goto error; + } + + ret = -ENOMEM; + qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL); + if (!qp_attr) + goto error; + + qp_attr->qp_state = IB_QPS_RTR; + ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask); + if (ret) + goto error_free; + + ret = ib_modify_qp(target->qp, qp_attr, attr_mask); + if (ret) + goto error_free; + + for (i = 0; i < SRP_RQ_SIZE; i++) { + struct srp_iu *iu = target->rx_ring[i]; + ret = srp_post_recv(target, iu); + if (ret) + goto error_free; + } + + qp_attr->qp_state = IB_QPS_RTS; + ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask); + if (ret) + goto error_free; + + ret = ib_modify_qp(target->qp, qp_attr, attr_mask); + if (ret) + goto error_free; + + ret = ib_send_cm_rtu(cm_id, NULL, 0); + +error_free: + kfree(qp_attr); + +error: + target->status = ret; +} + static void srp_cm_rej_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event, struct srp_target_port *target) @@ -1311,11 +1509,7 @@ static void srp_cm_rej_handler(struct ib_cm_id *cm_id, static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) { struct srp_target_port *target = cm_id->context; - struct ib_qp_attr *qp_attr = NULL; - int attr_mask = 0; int comp = 0; - int opcode = 0; - int i; switch (event->event) { case IB_CM_REQ_ERROR: @@ -1327,71 +1521,7 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) case IB_CM_REP_RECEIVED: comp = 1; - opcode = *(u8 *) event->private_data; - - if (opcode == SRP_LOGIN_RSP) { - struct srp_login_rsp *rsp = event->private_data; - - target->max_ti_iu_len = be32_to_cpu(rsp->max_ti_iu_len); - target->req_lim = be32_to_cpu(rsp->req_lim_delta); - - /* - * Reserve credits for task management so we don't - * bounce requests back to the SCSI mid-layer. - */ - target->scsi_host->can_queue - = min(target->req_lim - SRP_TSK_MGMT_SQ_SIZE, - target->scsi_host->can_queue); - } else { - shost_printk(KERN_WARNING, target->scsi_host, - PFX "Unhandled RSP opcode %#x\n", opcode); - target->status = -ECONNRESET; - break; - } - - if (!target->rx_ring[0]) { - target->status = srp_alloc_iu_bufs(target); - if (target->status) - break; - } - - qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL); - if (!qp_attr) { - target->status = -ENOMEM; - break; - } - - qp_attr->qp_state = IB_QPS_RTR; - target->status = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask); - if (target->status) - break; - - target->status = ib_modify_qp(target->qp, qp_attr, attr_mask); - if (target->status) - break; - - for (i = 0; i < SRP_RQ_SIZE; i++) { - struct srp_iu *iu = target->rx_ring[i]; - target->status = srp_post_recv(target, iu); - if (target->status) - break; - } - if (target->status) - break; - - qp_attr->qp_state = IB_QPS_RTS; - target->status = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask); - if (target->status) - break; - - target->status = ib_modify_qp(target->qp, qp_attr, attr_mask); - if (target->status) - break; - - target->status = ib_send_cm_rtu(cm_id, NULL, 0); - if (target->status) - break; - + srp_cm_rep_handler(cm_id, event->private_data, target); break; case IB_CM_REJ_RECEIVED: @@ -1431,8 +1561,6 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) if (comp) complete(&target->done); - kfree(qp_attr); - return 0; } @@ -1658,6 +1786,22 @@ static ssize_t show_local_ib_device(struct device *dev, return sprintf(buf, "%s\n", target->srp_host->srp_dev->dev->name); } +static ssize_t show_cmd_sg_entries(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct srp_target_port *target = host_to_target(class_to_shost(dev)); + + return sprintf(buf, "%u\n", target->cmd_sg_cnt); +} + +static ssize_t show_allow_ext_sg(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct srp_target_port *target = host_to_target(class_to_shost(dev)); + + return sprintf(buf, "%s\n", target->allow_ext_sg ? "true" : "false"); +} + static DEVICE_ATTR(id_ext, S_IRUGO, show_id_ext, NULL); static DEVICE_ATTR(ioc_guid, S_IRUGO, show_ioc_guid, NULL); static DEVICE_ATTR(service_id, S_IRUGO, show_service_id, NULL); @@ -1668,6 +1812,8 @@ static DEVICE_ATTR(req_lim, S_IRUGO, show_req_lim, NULL); static DEVICE_ATTR(zero_req_lim, S_IRUGO, show_zero_req_lim, NULL); static DEVICE_ATTR(local_ib_port, S_IRUGO, show_local_ib_port, NULL); static DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL); +static DEVICE_ATTR(cmd_sg_entries, S_IRUGO, show_cmd_sg_entries, NULL); +static DEVICE_ATTR(allow_ext_sg, S_IRUGO, show_allow_ext_sg, NULL); static struct device_attribute *srp_host_attrs[] = { &dev_attr_id_ext, @@ -1680,6 +1826,8 @@ static struct device_attribute *srp_host_attrs[] = { &dev_attr_zero_req_lim, &dev_attr_local_ib_port, &dev_attr_local_ib_device, + &dev_attr_cmd_sg_entries, + &dev_attr_allow_ext_sg, NULL }; @@ -1692,6 +1840,7 @@ static struct scsi_host_template srp_template = { .eh_abort_handler = srp_abort, .eh_device_reset_handler = srp_reset_device, .eh_host_reset_handler = srp_reset_host, + .sg_tablesize = SRP_DEF_SG_TABLESIZE, .can_queue = SRP_CMD_SQ_SIZE, .this_id = -1, .cmd_per_lun = SRP_CMD_SQ_SIZE, @@ -1763,6 +1912,9 @@ enum { SRP_OPT_MAX_CMD_PER_LUN = 1 << 6, SRP_OPT_IO_CLASS = 1 << 7, SRP_OPT_INITIATOR_EXT = 1 << 8, + SRP_OPT_CMD_SG_ENTRIES = 1 << 9, + SRP_OPT_ALLOW_EXT_SG = 1 << 10, + SRP_OPT_SG_TABLESIZE = 1 << 11, SRP_OPT_ALL = (SRP_OPT_ID_EXT | SRP_OPT_IOC_GUID | SRP_OPT_DGID | @@ -1780,6 +1932,9 @@ static const match_table_t srp_opt_tokens = { { SRP_OPT_MAX_CMD_PER_LUN, "max_cmd_per_lun=%d" }, { SRP_OPT_IO_CLASS, "io_class=%x" }, { SRP_OPT_INITIATOR_EXT, "initiator_ext=%s" }, + { SRP_OPT_CMD_SG_ENTRIES, "cmd_sg_entries=%u" }, + { SRP_OPT_ALLOW_EXT_SG, "allow_ext_sg=%u" }, + { SRP_OPT_SG_TABLESIZE, "sg_tablesize=%u" }, { SRP_OPT_ERR, NULL } }; @@ -1907,6 +2062,31 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target) kfree(p); break; + case SRP_OPT_CMD_SG_ENTRIES: + if (match_int(args, &token) || token < 1 || token > 255) { + printk(KERN_WARNING PFX "bad max cmd_sg_entries parameter '%s'\n", p); + goto out; + } + target->cmd_sg_cnt = token; + break; + + case SRP_OPT_ALLOW_EXT_SG: + if (match_int(args, &token)) { + printk(KERN_WARNING PFX "bad allow_ext_sg parameter '%s'\n", p); + goto out; + } + target->allow_ext_sg = !!token; + break; + + case SRP_OPT_SG_TABLESIZE: + if (match_int(args, &token) || token < 1 || + token > SCSI_MAX_SG_CHAIN_SEGMENTS) { + printk(KERN_WARNING PFX "bad max sg_tablesize parameter '%s'\n", p); + goto out; + } + target->sg_tablesize = token; + break; + default: printk(KERN_WARNING PFX "unknown parameter or missing value " "'%s' in target creation request\n", p); @@ -1937,39 +2117,73 @@ static ssize_t srp_create_target(struct device *dev, container_of(dev, struct srp_host, dev); struct Scsi_Host *target_host; struct srp_target_port *target; - int ret; - int i; + struct ib_device *ibdev = host->srp_dev->dev; + dma_addr_t dma_addr; + int i, ret; target_host = scsi_host_alloc(&srp_template, sizeof (struct srp_target_port)); if (!target_host) return -ENOMEM; - target_host->transportt = ib_srp_transport_template; + target_host->transportt = ib_srp_transport_template; target_host->max_lun = SRP_MAX_LUN; target_host->max_cmd_len = sizeof ((struct srp_cmd *) (void *) 0L)->cdb; target = host_to_target(target_host); - target->io_class = SRP_REV16A_IB_IO_CLASS; - target->scsi_host = target_host; - target->srp_host = host; - target->lkey = host->srp_dev->mr->lkey; - target->rkey = host->srp_dev->mr->rkey; + target->io_class = SRP_REV16A_IB_IO_CLASS; + target->scsi_host = target_host; + target->srp_host = host; + target->lkey = host->srp_dev->mr->lkey; + target->rkey = host->srp_dev->mr->rkey; + target->cmd_sg_cnt = cmd_sg_entries; + target->sg_tablesize = indirect_sg_entries ? : cmd_sg_entries; + target->allow_ext_sg = allow_ext_sg; + + ret = srp_parse_options(buf, target); + if (ret) + goto err; + + if (!host->srp_dev->fmr_pool && !target->allow_ext_sg && + target->cmd_sg_cnt < target->sg_tablesize) { + printk(KERN_WARNING PFX "No FMR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n"); + target->sg_tablesize = target->cmd_sg_cnt; + } + + target_host->sg_tablesize = target->sg_tablesize; + target->indirect_size = target->sg_tablesize * + sizeof (struct srp_direct_buf); + target->max_iu_len = sizeof (struct srp_cmd) + + sizeof (struct srp_indirect_buf) + + target->cmd_sg_cnt * sizeof (struct srp_direct_buf); spin_lock_init(&target->lock); INIT_LIST_HEAD(&target->free_tx); INIT_LIST_HEAD(&target->free_reqs); for (i = 0; i < SRP_CMD_SQ_SIZE; ++i) { - target->req_ring[i].index = i; - list_add_tail(&target->req_ring[i].list, &target->free_reqs); - } + struct srp_request *req = &target->req_ring[i]; - ret = srp_parse_options(buf, target); - if (ret) - goto err; + req->fmr_list = kmalloc(target->cmd_sg_cnt * sizeof (void *), + GFP_KERNEL); + req->map_page = kmalloc(SRP_FMR_SIZE * sizeof (void *), + GFP_KERNEL); + req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL); + if (!req->fmr_list || !req->map_page || !req->indirect_desc) + goto err_free_mem; + + dma_addr = ib_dma_map_single(ibdev, req->indirect_desc, + target->indirect_size, + DMA_TO_DEVICE); + if (ib_dma_mapping_error(ibdev, dma_addr)) + goto err_free_mem; + + req->indirect_dma_addr = dma_addr; + req->index = i; + list_add_tail(&req->list, &target->free_reqs); + } - ib_query_gid(host->srp_dev->dev, host->port, 0, &target->path.sgid); + ib_query_gid(ibdev, host->port, 0, &target->path.sgid); shost_printk(KERN_DEBUG, target->scsi_host, PFX "new target: id_ext %016llx ioc_guid %016llx pkey %04x " @@ -1982,11 +2196,11 @@ static ssize_t srp_create_target(struct device *dev, ret = srp_create_target_ib(target); if (ret) - goto err; + goto err_free_mem; ret = srp_new_cm_id(target); if (ret) - goto err_free; + goto err_free_ib; target->qp_in_error = 0; ret = srp_connect_target(target); @@ -2008,9 +2222,12 @@ err_disconnect: err_cm_id: ib_destroy_cm_id(target->cm_id); -err_free: +err_free_ib: srp_free_target_ib(target); +err_free_mem: + srp_free_req_data(target); + err: scsi_host_put(target_host); @@ -2083,7 +2300,7 @@ static void srp_add_one(struct ib_device *device) struct ib_device_attr *dev_attr; struct ib_fmr_pool_param fmr_param; struct srp_host *host; - int s, e, p; + int max_pages_per_fmr, fmr_page_shift, s, e, p; dev_attr = kmalloc(sizeof *dev_attr, GFP_KERNEL); if (!dev_attr) @@ -2101,12 +2318,13 @@ static void srp_add_one(struct ib_device *device) /* * Use the smallest page size supported by the HCA, down to a - * minimum of 512 bytes (which is the smallest sector that a - * SCSI command will ever carry). + * minimum of 4096 bytes. We're unlikely to build large sglists + * out of smaller entries. */ - srp_dev->fmr_page_shift = max(9, ffs(dev_attr->page_size_cap) - 1); - srp_dev->fmr_page_size = 1 << srp_dev->fmr_page_shift; - srp_dev->fmr_page_mask = ~((u64) srp_dev->fmr_page_size - 1); + fmr_page_shift = max(12, ffs(dev_attr->page_size_cap) - 1); + srp_dev->fmr_page_size = 1 << fmr_page_shift; + srp_dev->fmr_page_mask = ~((u64) srp_dev->fmr_page_size - 1); + srp_dev->fmr_max_size = srp_dev->fmr_page_size * SRP_FMR_SIZE; INIT_LIST_HEAD(&srp_dev->dev_list); @@ -2122,17 +2340,24 @@ static void srp_add_one(struct ib_device *device) if (IS_ERR(srp_dev->mr)) goto err_pd; - memset(&fmr_param, 0, sizeof fmr_param); - fmr_param.pool_size = SRP_FMR_POOL_SIZE; - fmr_param.dirty_watermark = SRP_FMR_DIRTY_SIZE; - fmr_param.cache = 1; - fmr_param.max_pages_per_fmr = SRP_FMR_SIZE; - fmr_param.page_shift = srp_dev->fmr_page_shift; - fmr_param.access = (IB_ACCESS_LOCAL_WRITE | - IB_ACCESS_REMOTE_WRITE | - IB_ACCESS_REMOTE_READ); - - srp_dev->fmr_pool = ib_create_fmr_pool(srp_dev->pd, &fmr_param); + for (max_pages_per_fmr = SRP_FMR_SIZE; + max_pages_per_fmr >= SRP_FMR_MIN_SIZE; + max_pages_per_fmr /= 2, srp_dev->fmr_max_size /= 2) { + memset(&fmr_param, 0, sizeof fmr_param); + fmr_param.pool_size = SRP_FMR_POOL_SIZE; + fmr_param.dirty_watermark = SRP_FMR_DIRTY_SIZE; + fmr_param.cache = 1; + fmr_param.max_pages_per_fmr = max_pages_per_fmr; + fmr_param.page_shift = fmr_page_shift; + fmr_param.access = (IB_ACCESS_LOCAL_WRITE | + IB_ACCESS_REMOTE_WRITE | + IB_ACCESS_REMOTE_READ); + + srp_dev->fmr_pool = ib_create_fmr_pool(srp_dev->pd, &fmr_param); + if (!IS_ERR(srp_dev->fmr_pool)) + break; + } + if (IS_ERR(srp_dev->fmr_pool)) srp_dev->fmr_pool = NULL; @@ -2207,6 +2432,7 @@ static void srp_remove_one(struct ib_device *device) srp_disconnect_target(target); ib_destroy_cm_id(target->cm_id); srp_free_target_ib(target); + srp_free_req_data(target); scsi_host_put(target->scsi_host); } @@ -2230,9 +2456,25 @@ static int __init srp_init_module(void) BUILD_BUG_ON(FIELD_SIZEOF(struct ib_wc, wr_id) < sizeof(void *)); - if (srp_sg_tablesize > 255) { - printk(KERN_WARNING PFX "Clamping srp_sg_tablesize to 255\n"); - srp_sg_tablesize = 255; + if (srp_sg_tablesize) { + printk(KERN_WARNING PFX "srp_sg_tablesize is deprecated, please use cmd_sg_entries\n"); + if (!cmd_sg_entries) + cmd_sg_entries = srp_sg_tablesize; + } + + if (!cmd_sg_entries) + cmd_sg_entries = SRP_DEF_SG_TABLESIZE; + + if (cmd_sg_entries > 255) { + printk(KERN_WARNING PFX "Clamping cmd_sg_entries to 255\n"); + cmd_sg_entries = 255; + } + + if (!indirect_sg_entries) + indirect_sg_entries = cmd_sg_entries; + else if (indirect_sg_entries < cmd_sg_entries) { + printk(KERN_WARNING PFX "Bumping up indirect_sg_entries to match cmd_sg_entries (%u)\n", cmd_sg_entries); + indirect_sg_entries = cmd_sg_entries; } ib_srp_transport_template = @@ -2240,11 +2482,6 @@ static int __init srp_init_module(void) if (!ib_srp_transport_template) return -ENOMEM; - srp_template.sg_tablesize = srp_sg_tablesize; - srp_max_iu_len = (sizeof (struct srp_cmd) + - sizeof (struct srp_indirect_buf) + - srp_sg_tablesize * 16); - ret = class_register(&srp_class); if (ret) { printk(KERN_ERR PFX "couldn't register class infiniband_srp\n"); diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h index 9dc6fc3fd894..020caf0c3789 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.h +++ b/drivers/infiniband/ulp/srp/ib_srp.h @@ -69,9 +69,13 @@ enum { SRP_TAG_NO_REQ = ~0U, SRP_TAG_TSK_MGMT = 1U << 31, - SRP_FMR_SIZE = 256, + SRP_FMR_SIZE = 512, + SRP_FMR_MIN_SIZE = 128, SRP_FMR_POOL_SIZE = 1024, - SRP_FMR_DIRTY_SIZE = SRP_FMR_POOL_SIZE / 4 + SRP_FMR_DIRTY_SIZE = SRP_FMR_POOL_SIZE / 4, + + SRP_MAP_ALLOW_FMR = 0, + SRP_MAP_NO_FMR = 1, }; enum srp_target_state { @@ -93,9 +97,9 @@ struct srp_device { struct ib_pd *pd; struct ib_mr *mr; struct ib_fmr_pool *fmr_pool; - int fmr_page_shift; - int fmr_page_size; u64 fmr_page_mask; + int fmr_page_size; + int fmr_max_size; }; struct srp_host { @@ -112,7 +116,11 @@ struct srp_request { struct list_head list; struct scsi_cmnd *scmnd; struct srp_iu *cmd; - struct ib_pool_fmr *fmr; + struct ib_pool_fmr **fmr_list; + u64 *map_page; + struct srp_direct_buf *indirect_desc; + dma_addr_t indirect_dma_addr; + short nfmr; short index; }; @@ -130,6 +138,10 @@ struct srp_target_port { u32 lkey; u32 rkey; enum srp_target_state state; + unsigned int max_iu_len; + unsigned int cmd_sg_cnt; + unsigned int indirect_size; + bool allow_ext_sg; /* Everything above this point is used in the hot path of * command processing. Try to keep them packed into cachelines. @@ -144,6 +156,7 @@ struct srp_target_port { struct Scsi_Host *scsi_host; char target_name[32]; unsigned int scsi_id; + unsigned int sg_tablesize; struct ib_sa_path_rec path; __be16 orig_dgid[8]; @@ -179,4 +192,19 @@ struct srp_iu { enum dma_data_direction direction; }; +struct srp_map_state { + struct ib_pool_fmr **next_fmr; + struct srp_direct_buf *desc; + u64 *pages; + dma_addr_t base_dma_addr; + u32 fmr_len; + u32 total_len; + unsigned int npages; + unsigned int nfmr; + unsigned int ndesc; + struct scatterlist *unmapped_sg; + int unmapped_index; + dma_addr_t unmapped_addr; +}; + #endif /* IB_SRP_H */ |