diff options
Diffstat (limited to 'drivers/infiniband/ulp/ipoib')
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib.h | 1 | ||||
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_cm.c | 4 | ||||
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_main.c | 236 | ||||
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 50 | ||||
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_verbs.c | 22 |
5 files changed, 274 insertions, 39 deletions
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 79859c4d43c9..ca2873698d75 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -342,7 +342,6 @@ struct ipoib_dev_priv { u16 pkey; u16 pkey_index; struct ib_pd *pd; - struct ib_mr *mr; struct ib_cq *recv_cq; struct ib_cq *send_cq; struct ib_qp *qp; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index ee39be6ccfb0..c78dc1638030 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -332,7 +332,7 @@ static void ipoib_cm_init_rx_wr(struct net_device *dev, int i; for (i = 0; i < priv->cm.num_frags; ++i) - sge[i].lkey = priv->mr->lkey; + sge[i].lkey = priv->pd->local_dma_lkey; sge[0].length = IPOIB_CM_HEAD_SIZE; for (i = 1; i < priv->cm.num_frags; ++i) @@ -848,7 +848,7 @@ int ipoib_cm_dev_open(struct net_device *dev) } ret = ib_cm_listen(priv->cm.id, cpu_to_be64(IPOIB_CM_IETF_ID | priv->qp->qp_num), - 0, NULL); + 0); if (ret) { printk(KERN_WARNING "%s: failed to listen on ID 0x%llx\n", priv->ca->name, IPOIB_CM_IETF_ID | priv->qp->qp_num); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index b2943c84a5dd..36536ce5a3e2 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -48,6 +48,9 @@ #include <linux/jhash.h> #include <net/arp.h> +#include <net/addrconf.h> +#include <linux/inetdevice.h> +#include <rdma/ib_cache.h> #define DRV_VERSION "1.0.0" @@ -89,13 +92,18 @@ struct workqueue_struct *ipoib_workqueue; struct ib_sa_client ipoib_sa_client; static void ipoib_add_one(struct ib_device *device); -static void ipoib_remove_one(struct ib_device *device); +static void ipoib_remove_one(struct ib_device *device, void *client_data); static void ipoib_neigh_reclaim(struct rcu_head *rp); +static struct net_device *ipoib_get_net_dev_by_params( + struct ib_device *dev, u8 port, u16 pkey, + const union ib_gid *gid, const struct sockaddr *addr, + void *client_data); static struct ib_client ipoib_client = { .name = "ipoib", .add = ipoib_add_one, - .remove = ipoib_remove_one + .remove = ipoib_remove_one, + .get_net_dev_by_params = ipoib_get_net_dev_by_params, }; int ipoib_open(struct net_device *dev) @@ -222,6 +230,225 @@ static int ipoib_change_mtu(struct net_device *dev, int new_mtu) return 0; } +/* Called with an RCU read lock taken */ +static bool ipoib_is_dev_match_addr_rcu(const struct sockaddr *addr, + struct net_device *dev) +{ + struct net *net = dev_net(dev); + struct in_device *in_dev; + struct sockaddr_in *addr_in = (struct sockaddr_in *)addr; + struct sockaddr_in6 *addr_in6 = (struct sockaddr_in6 *)addr; + __be32 ret_addr; + + switch (addr->sa_family) { + case AF_INET: + in_dev = in_dev_get(dev); + if (!in_dev) + return false; + + ret_addr = inet_confirm_addr(net, in_dev, 0, + addr_in->sin_addr.s_addr, + RT_SCOPE_HOST); + in_dev_put(in_dev); + if (ret_addr) + return true; + + break; + case AF_INET6: + if (IS_ENABLED(CONFIG_IPV6) && + ipv6_chk_addr(net, &addr_in6->sin6_addr, dev, 1)) + return true; + + break; + } + return false; +} + +/** + * Find the master net_device on top of the given net_device. + * @dev: base IPoIB net_device + * + * Returns the master net_device with a reference held, or the same net_device + * if no master exists. + */ +static struct net_device *ipoib_get_master_net_dev(struct net_device *dev) +{ + struct net_device *master; + + rcu_read_lock(); + master = netdev_master_upper_dev_get_rcu(dev); + if (master) + dev_hold(master); + rcu_read_unlock(); + + if (master) + return master; + + dev_hold(dev); + return dev; +} + +/** + * Find a net_device matching the given address, which is an upper device of + * the given net_device. + * @addr: IP address to look for. + * @dev: base IPoIB net_device + * + * If found, returns the net_device with a reference held. Otherwise return + * NULL. + */ +static struct net_device *ipoib_get_net_dev_match_addr( + const struct sockaddr *addr, struct net_device *dev) +{ + struct net_device *upper, + *result = NULL; + struct list_head *iter; + + rcu_read_lock(); + if (ipoib_is_dev_match_addr_rcu(addr, dev)) { + dev_hold(dev); + result = dev; + goto out; + } + + netdev_for_each_all_upper_dev_rcu(dev, upper, iter) { + if (ipoib_is_dev_match_addr_rcu(addr, upper)) { + dev_hold(upper); + result = upper; + break; + } + } +out: + rcu_read_unlock(); + return result; +} + +/* returns the number of IPoIB netdevs on top a given ipoib device matching a + * pkey_index and address, if one exists. + * + * @found_net_dev: contains a matching net_device if the return value >= 1, + * with a reference held. */ +static int ipoib_match_gid_pkey_addr(struct ipoib_dev_priv *priv, + const union ib_gid *gid, + u16 pkey_index, + const struct sockaddr *addr, + int nesting, + struct net_device **found_net_dev) +{ + struct ipoib_dev_priv *child_priv; + struct net_device *net_dev = NULL; + int matches = 0; + + if (priv->pkey_index == pkey_index && + (!gid || !memcmp(gid, &priv->local_gid, sizeof(*gid)))) { + if (!addr) { + net_dev = ipoib_get_master_net_dev(priv->dev); + } else { + /* Verify the net_device matches the IP address, as + * IPoIB child devices currently share a GID. */ + net_dev = ipoib_get_net_dev_match_addr(addr, priv->dev); + } + if (net_dev) { + if (!*found_net_dev) + *found_net_dev = net_dev; + else + dev_put(net_dev); + ++matches; + } + } + + /* Check child interfaces */ + down_read_nested(&priv->vlan_rwsem, nesting); + list_for_each_entry(child_priv, &priv->child_intfs, list) { + matches += ipoib_match_gid_pkey_addr(child_priv, gid, + pkey_index, addr, + nesting + 1, + found_net_dev); + if (matches > 1) + break; + } + up_read(&priv->vlan_rwsem); + + return matches; +} + +/* Returns the number of matching net_devs found (between 0 and 2). Also + * return the matching net_device in the @net_dev parameter, holding a + * reference to the net_device, if the number of matches >= 1 */ +static int __ipoib_get_net_dev_by_params(struct list_head *dev_list, u8 port, + u16 pkey_index, + const union ib_gid *gid, + const struct sockaddr *addr, + struct net_device **net_dev) +{ + struct ipoib_dev_priv *priv; + int matches = 0; + + *net_dev = NULL; + + list_for_each_entry(priv, dev_list, list) { + if (priv->port != port) + continue; + + matches += ipoib_match_gid_pkey_addr(priv, gid, pkey_index, + addr, 0, net_dev); + if (matches > 1) + break; + } + + return matches; +} + +static struct net_device *ipoib_get_net_dev_by_params( + struct ib_device *dev, u8 port, u16 pkey, + const union ib_gid *gid, const struct sockaddr *addr, + void *client_data) +{ + struct net_device *net_dev; + struct list_head *dev_list = client_data; + u16 pkey_index; + int matches; + int ret; + + if (!rdma_protocol_ib(dev, port)) + return NULL; + + ret = ib_find_cached_pkey(dev, port, pkey, &pkey_index); + if (ret) + return NULL; + + if (!dev_list) + return NULL; + + /* See if we can find a unique device matching the L2 parameters */ + matches = __ipoib_get_net_dev_by_params(dev_list, port, pkey_index, + gid, NULL, &net_dev); + + switch (matches) { + case 0: + return NULL; + case 1: + return net_dev; + } + + dev_put(net_dev); + + /* Couldn't find a unique device with L2 parameters only. Use L3 + * address to uniquely match the net device */ + matches = __ipoib_get_net_dev_by_params(dev_list, port, pkey_index, + gid, addr, &net_dev); + switch (matches) { + case 0: + return NULL; + default: + dev_warn_ratelimited(&dev->dev, + "duplicate IP address detected\n"); + /* Fall through */ + case 1: + return net_dev; + } +} + int ipoib_set_mode(struct net_device *dev, const char *buf) { struct ipoib_dev_priv *priv = netdev_priv(dev); @@ -1715,12 +1942,11 @@ static void ipoib_add_one(struct ib_device *device) ib_set_client_data(device, &ipoib_client, dev_list); } -static void ipoib_remove_one(struct ib_device *device) +static void ipoib_remove_one(struct ib_device *device, void *client_data) { struct ipoib_dev_priv *priv, *tmp; - struct list_head *dev_list; + struct list_head *dev_list = client_data; - dev_list = ib_get_client_data(device, &ipoib_client); if (!dev_list) return; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 0d23e0568deb..09a1748f9d13 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -393,8 +393,13 @@ static int ipoib_mcast_join_complete(int status, goto out_locked; } } else { - if (mcast->logcount++ < 20) { - if (status == -ETIMEDOUT || status == -EAGAIN) { + bool silent_fail = + test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) && + status == -EINVAL; + + if (mcast->logcount < 20) { + if (status == -ETIMEDOUT || status == -EAGAIN || + silent_fail) { ipoib_dbg_mcast(priv, "%smulticast join failed for %pI6, status %d\n", test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) ? "sendonly " : "", mcast->mcmember.mgid.raw, status); @@ -403,6 +408,9 @@ static int ipoib_mcast_join_complete(int status, test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) ? "sendonly " : "", mcast->mcmember.mgid.raw, status); } + + if (!silent_fail) + mcast->logcount++; } if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) && @@ -448,8 +456,7 @@ out_locked: return status; } -static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast, - int create) +static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct ib_sa_multicast *multicast; @@ -471,7 +478,14 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast, IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE; - if (create) { + if (mcast != priv->broadcast) { + /* + * RFC 4391: + * The MGID MUST use the same P_Key, Q_Key, SL, MTU, + * and HopLimit as those used in the broadcast-GID. The rest + * of attributes SHOULD follow the values used in the + * broadcast-GID as well. + */ comp_mask |= IB_SA_MCMEMBER_REC_QKEY | IB_SA_MCMEMBER_REC_MTU_SELECTOR | @@ -492,6 +506,22 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast, rec.sl = priv->broadcast->mcmember.sl; rec.flow_label = priv->broadcast->mcmember.flow_label; rec.hop_limit = priv->broadcast->mcmember.hop_limit; + + /* + * Historically Linux IPoIB has never properly supported SEND + * ONLY join. It emulated it by not providing all the required + * attributes, which is enough to prevent group creation and + * detect if there are full members or not. A major problem + * with supporting SEND ONLY is detecting when the group is + * auto-destroyed as IPoIB will cache the MLID.. + */ +#if 1 + if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) + comp_mask &= ~IB_SA_MCMEMBER_REC_TRAFFIC_CLASS; +#else + if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) + rec.join_state = 4; +#endif } multicast = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port, @@ -517,7 +547,6 @@ void ipoib_mcast_join_task(struct work_struct *work) struct ib_port_attr port_attr; unsigned long delay_until = 0; struct ipoib_mcast *mcast = NULL; - int create = 1; if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) return; @@ -566,7 +595,6 @@ void ipoib_mcast_join_task(struct work_struct *work) if (IS_ERR_OR_NULL(priv->broadcast->mc) && !test_bit(IPOIB_MCAST_FLAG_BUSY, &priv->broadcast->flags)) { mcast = priv->broadcast; - create = 0; if (mcast->backoff > 1 && time_before(jiffies, mcast->delay_until)) { delay_until = mcast->delay_until; @@ -590,12 +618,8 @@ void ipoib_mcast_join_task(struct work_struct *work) /* Found the next unjoined group */ init_completion(&mcast->done); set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); - if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) - create = 0; - else - create = 1; spin_unlock_irq(&priv->lock); - ipoib_mcast_join(dev, mcast, create); + ipoib_mcast_join(dev, mcast); spin_lock_irq(&priv->lock); } else if (!delay_until || time_before(mcast->delay_until, delay_until)) @@ -618,7 +642,7 @@ out: } spin_unlock_irq(&priv->lock); if (mcast) - ipoib_mcast_join(dev, mcast, create); + ipoib_mcast_join(dev, mcast); } int ipoib_mcast_start_thread(struct net_device *dev) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index 851c8219d501..78845b6e8b81 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -152,12 +152,6 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) return -ENODEV; } - priv->mr = ib_get_dma_mr(priv->pd, IB_ACCESS_LOCAL_WRITE); - if (IS_ERR(priv->mr)) { - printk(KERN_WARNING "%s: ib_get_dma_mr failed\n", ca->name); - goto out_free_pd; - } - /* * the various IPoIB tasks assume they will never race against * themselves, so always use a single thread workqueue @@ -165,7 +159,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) priv->wq = create_singlethread_workqueue("ipoib_wq"); if (!priv->wq) { printk(KERN_WARNING "ipoib: failed to allocate device WQ\n"); - goto out_free_mr; + goto out_free_pd; } size = ipoib_recvq_size + 1; @@ -225,13 +219,13 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) priv->dev->dev_addr[3] = (priv->qp->qp_num ) & 0xff; for (i = 0; i < MAX_SKB_FRAGS + 1; ++i) - priv->tx_sge[i].lkey = priv->mr->lkey; + priv->tx_sge[i].lkey = priv->pd->local_dma_lkey; priv->tx_wr.opcode = IB_WR_SEND; priv->tx_wr.sg_list = priv->tx_sge; priv->tx_wr.send_flags = IB_SEND_SIGNALED; - priv->rx_sge[0].lkey = priv->mr->lkey; + priv->rx_sge[0].lkey = priv->pd->local_dma_lkey; priv->rx_sge[0].length = IPOIB_UD_BUF_SIZE(priv->max_ib_mtu); priv->rx_wr.num_sge = 1; @@ -254,9 +248,6 @@ out_free_wq: destroy_workqueue(priv->wq); priv->wq = NULL; -out_free_mr: - ib_dereg_mr(priv->mr); - out_free_pd: ib_dealloc_pd(priv->pd); @@ -289,12 +280,7 @@ void ipoib_transport_dev_cleanup(struct net_device *dev) priv->wq = NULL; } - if (ib_dereg_mr(priv->mr)) - ipoib_warn(priv, "ib_dereg_mr failed\n"); - - if (ib_dealloc_pd(priv->pd)) - ipoib_warn(priv, "ib_dealloc_pd failed\n"); - + ib_dealloc_pd(priv->pd); } void ipoib_event(struct ib_event_handler *handler, |