From 6266ed6e4164466177238b11ecb825a3a108a3e4 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Thu, 19 Nov 2009 12:55:22 -0800 Subject: RDMA/cma: Replace net_device pointer with index Provide the device interface when resolving route information to ensure that the correct outbound device is used. This will also simplify processing of sin6_scope_id for IPv6 support. Based on work from: David Wilder Jason Gunthorpe Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier --- include/rdma/ib_addr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/rdma/ib_addr.h') diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h index 483057b2f4b4..27f17cc2c919 100644 --- a/include/rdma/ib_addr.h +++ b/include/rdma/ib_addr.h @@ -61,7 +61,7 @@ struct rdma_dev_addr { unsigned char dst_dev_addr[MAX_ADDR_LEN]; unsigned char broadcast[MAX_ADDR_LEN]; enum rdma_node_type dev_type; - struct net_device *src_dev; + int bound_dev_if; }; /** -- cgit v1.2.3 From c4315d85f9b76834289fd503796c01b8311c4b84 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Thu, 19 Nov 2009 12:57:18 -0800 Subject: IB/addr: Store net_device type instead of translating to RDMA transport The struct rdma_dev_addr stores net_device address information: the source device address, destination hardware address, and broadcast address. For consistency, store the net_device type rather than converting it to the rdma_node_type. The type indicates the format of the various hardware addresses, which is what we're concerned with, and not the RDMA node type that the address may map to. Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier --- drivers/infiniband/core/addr.c | 13 +------------ drivers/infiniband/core/cma.c | 6 +++--- include/rdma/ib_addr.h | 3 ++- 3 files changed, 6 insertions(+), 16 deletions(-) (limited to 'include/rdma/ib_addr.h') diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index b59ba7ccef0e..de5fe161a1b9 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -36,7 +36,6 @@ #include #include #include -#include #include #include #include @@ -92,17 +91,7 @@ EXPORT_SYMBOL(rdma_addr_unregister_client); int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev, const unsigned char *dst_dev_addr) { - switch (dev->type) { - case ARPHRD_INFINIBAND: - dev_addr->dev_type = RDMA_NODE_IB_CA; - break; - case ARPHRD_ETHER: - dev_addr->dev_type = RDMA_NODE_RNIC; - break; - default: - return -EADDRNOTAVAIL; - } - + dev_addr->dev_type = dev->type; memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN); memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN); if (dst_dev_addr) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 699ad12b3a2f..b305b5c17f8d 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -330,11 +330,11 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv) union ib_gid gid; int ret = -ENODEV; - switch (rdma_node_get_transport(dev_addr->dev_type)) { - case RDMA_TRANSPORT_IB: + switch (dev_addr->dev_type) { + case ARPHRD_INFINIBAND: ib_addr_get_sgid(dev_addr, &gid); break; - case RDMA_TRANSPORT_IWARP: + case ARPHRD_ETHER: iw_addr_get_sgid(dev_addr, &gid); break; default: diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h index 27f17cc2c919..3a39c55d2b9a 100644 --- a/include/rdma/ib_addr.h +++ b/include/rdma/ib_addr.h @@ -36,6 +36,7 @@ #include #include +#include #include #include #include @@ -60,7 +61,7 @@ struct rdma_dev_addr { unsigned char src_dev_addr[MAX_ADDR_LEN]; unsigned char dst_dev_addr[MAX_ADDR_LEN]; unsigned char broadcast[MAX_ADDR_LEN]; - enum rdma_node_type dev_type; + unsigned short dev_type; int bound_dev_if; }; -- cgit v1.2.3 From 6f8372b69c3198e06cecb1df2cb9682d0c55e657 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Thu, 19 Nov 2009 13:26:06 -0800 Subject: RDMA/cm: fix loopback address support The RDMA CM is intended to support the use of a loopback address when establishing a connection; however, the behavior of the CM when loopback addresses are used is confusing and does not always work, depending on whether loopback was specified by the server, the client, or both. The defined behavior of rdma_bind_addr is to associate an RDMA device with an rdma_cm_id, as long as the user specified a non- zero address. (ie they weren't just trying to reserve a port) Currently, if the loopback address is passed to rdam_bind_addr, no device is associated with the rdma_cm_id. Fix this. If a loopback address is specified by the client as the destination address for a connection, it will fail to establish a connection. This is true even if the server is listing across all addresses or on the loopback address itself. The issue is that the server tries to translate the IP address carried in the REQ message to a local net_device address, which fails. The translation is not needed in this case, since the REQ carries the actual HW address that should be used. Finally, cleanup loopback support to be more transport neutral. Replace separate calls to get/set the sgid and dgid from the device address to a single call that behaves correctly depending on the format of the device address. And support both IPv4 and IPv6 address formats. Signed-off-by: Sean Hefty [ Fixed RDS build by s/ib_addr_get/rdma_addr_get/ - Roland ] Signed-off-by: Roland Dreier --- drivers/infiniband/core/cma.c | 77 +++++++++++++++++++++++------------------- drivers/infiniband/core/ucma.c | 8 ++--- include/rdma/ib_addr.h | 31 ++++++----------- net/rds/ib.c | 4 +-- net/rds/iw.c | 4 +-- 5 files changed, 61 insertions(+), 63 deletions(-) (limited to 'include/rdma/ib_addr.h') diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index b305b5c17f8d..38867a46d39e 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -330,17 +330,7 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv) union ib_gid gid; int ret = -ENODEV; - switch (dev_addr->dev_type) { - case ARPHRD_INFINIBAND: - ib_addr_get_sgid(dev_addr, &gid); - break; - case ARPHRD_ETHER: - iw_addr_get_sgid(dev_addr, &gid); - break; - default: - return -ENODEV; - } - + rdma_addr_get_sgid(dev_addr, &gid); list_for_each_entry(cma_dev, &dev_list, list) { ret = ib_find_cached_gid(cma_dev->device, &gid, &id_priv->id.port_num, NULL); @@ -1032,11 +1022,17 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id, if (rt->num_paths == 2) rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path; - ib_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid); - ret = rdma_translate_ip((struct sockaddr *) &id->route.addr.src_addr, - &id->route.addr.dev_addr); - if (ret) - goto destroy_id; + if (cma_any_addr((struct sockaddr *) &rt->addr.src_addr)) { + rt->addr.dev_addr.dev_type = ARPHRD_INFINIBAND; + rdma_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid); + ib_addr_set_pkey(&rt->addr.dev_addr, rt->path_rec[0].pkey); + } else { + ret = rdma_translate_ip((struct sockaddr *) &rt->addr.src_addr, + &rt->addr.dev_addr); + if (ret) + goto destroy_id; + } + rdma_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid); id_priv = container_of(id, struct rdma_id_private, id); id_priv->state = CMA_CONNECT; @@ -1071,10 +1067,12 @@ static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id, cma_save_net_info(&id->route.addr, &listen_id->route.addr, ip_ver, port, src, dst); - ret = rdma_translate_ip((struct sockaddr *) &id->route.addr.src_addr, - &id->route.addr.dev_addr); - if (ret) - goto err; + if (!cma_any_addr((struct sockaddr *) &id->route.addr.src_addr)) { + ret = rdma_translate_ip((struct sockaddr *) &id->route.addr.src_addr, + &id->route.addr.dev_addr); + if (ret) + goto err; + } id_priv = container_of(id, struct rdma_id_private, id); id_priv->state = CMA_CONNECT; @@ -1565,8 +1563,8 @@ static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms, struct sockaddr_in6 *sin6; memset(&path_rec, 0, sizeof path_rec); - ib_addr_get_sgid(&addr->dev_addr, &path_rec.sgid); - ib_addr_get_dgid(&addr->dev_addr, &path_rec.dgid); + rdma_addr_get_sgid(&addr->dev_addr, &path_rec.sgid); + rdma_addr_get_dgid(&addr->dev_addr, &path_rec.dgid); path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(&addr->dev_addr)); path_rec.numb_path = 1; path_rec.reversible = 1; @@ -1781,7 +1779,11 @@ port_found: if (ret) goto out; - ib_addr_set_sgid(&id_priv->id.route.addr.dev_addr, &gid); + id_priv->id.route.addr.dev_addr.dev_type = + (rdma_node_get_transport(cma_dev->device->node_type) == RDMA_TRANSPORT_IB) ? + ARPHRD_INFINIBAND : ARPHRD_ETHER; + + rdma_addr_set_sgid(&id_priv->id.route.addr.dev_addr, &gid); ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, pkey); id_priv->id.port_num = p; cma_attach_to_dev(id_priv, cma_dev); @@ -1839,7 +1841,7 @@ out: static int cma_resolve_loopback(struct rdma_id_private *id_priv) { struct cma_work *work; - struct sockaddr_in *src_in, *dst_in; + struct sockaddr *src, *dst; union ib_gid gid; int ret; @@ -1853,14 +1855,19 @@ static int cma_resolve_loopback(struct rdma_id_private *id_priv) goto err; } - ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid); - ib_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid); + rdma_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid); + rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid); - if (cma_zero_addr((struct sockaddr *) &id_priv->id.route.addr.src_addr)) { - src_in = (struct sockaddr_in *)&id_priv->id.route.addr.src_addr; - dst_in = (struct sockaddr_in *)&id_priv->id.route.addr.dst_addr; - src_in->sin_family = dst_in->sin_family; - src_in->sin_addr.s_addr = dst_in->sin_addr.s_addr; + src = (struct sockaddr *) &id_priv->id.route.addr.src_addr; + if (cma_zero_addr(src)) { + dst = (struct sockaddr *) &id_priv->id.route.addr.dst_addr; + if ((src->sa_family = dst->sa_family) == AF_INET) { + ((struct sockaddr_in *) src)->sin_addr.s_addr = + ((struct sockaddr_in *) dst)->sin_addr.s_addr; + } else { + ipv6_addr_copy(&((struct sockaddr_in6 *) src)->sin6_addr, + &((struct sockaddr_in6 *) dst)->sin6_addr); + } } work->id = id_priv; @@ -2089,7 +2096,9 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) if (!cma_comp_exch(id_priv, CMA_IDLE, CMA_ADDR_BOUND)) return -EINVAL; - if (!cma_any_addr(addr)) { + if (cma_loopback_addr(addr)) { + ret = cma_bind_loopback(id_priv); + } else if (!cma_zero_addr(addr)) { ret = rdma_translate_ip(addr, &id->route.addr.dev_addr); if (ret) goto err1; @@ -2108,7 +2117,7 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) return 0; err2: - if (!cma_any_addr(addr)) { + if (id_priv->cma_dev) { mutex_lock(&lock); cma_detach_from_dev(id_priv); mutex_unlock(&lock); @@ -2721,7 +2730,7 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv, cma_set_mgid(id_priv, (struct sockaddr *) &mc->addr, &rec.mgid); if (id_priv->id.ps == RDMA_PS_UDP) rec.qkey = cpu_to_be32(RDMA_UDP_QKEY); - ib_addr_get_sgid(dev_addr, &rec.port_gid); + rdma_addr_get_sgid(dev_addr, &rec.port_gid); rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); rec.join_state = 1; diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index f1cbd26a9de0..b2e16c332d5b 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -563,10 +563,10 @@ static void ucma_copy_ib_route(struct rdma_ucm_query_route_resp *resp, switch (route->num_paths) { case 0: dev_addr = &route->addr.dev_addr; - ib_addr_get_dgid(dev_addr, - (union ib_gid *) &resp->ib_route[0].dgid); - ib_addr_get_sgid(dev_addr, - (union ib_gid *) &resp->ib_route[0].sgid); + rdma_addr_get_dgid(dev_addr, + (union ib_gid *) &resp->ib_route[0].dgid); + rdma_addr_get_sgid(dev_addr, + (union ib_gid *) &resp->ib_route[0].sgid); resp->ib_route[0].pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); break; case 2: diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h index 3a39c55d2b9a..fa0d52b8e622 100644 --- a/include/rdma/ib_addr.h +++ b/include/rdma/ib_addr.h @@ -122,40 +122,29 @@ static inline void ib_addr_get_mgid(struct rdma_dev_addr *dev_addr, memcpy(gid, dev_addr->broadcast + 4, sizeof *gid); } -static inline void ib_addr_get_sgid(struct rdma_dev_addr *dev_addr, - union ib_gid *gid) +static inline int rdma_addr_gid_offset(struct rdma_dev_addr *dev_addr) { - memcpy(gid, dev_addr->src_dev_addr + 4, sizeof *gid); + return dev_addr->dev_type == ARPHRD_INFINIBAND ? 4 : 0; } -static inline void ib_addr_set_sgid(struct rdma_dev_addr *dev_addr, - union ib_gid *gid) +static inline void rdma_addr_get_sgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid) { - memcpy(dev_addr->src_dev_addr + 4, gid, sizeof *gid); + memcpy(gid, dev_addr->src_dev_addr + rdma_addr_gid_offset(dev_addr), sizeof *gid); } -static inline void ib_addr_get_dgid(struct rdma_dev_addr *dev_addr, - union ib_gid *gid) +static inline void rdma_addr_set_sgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid) { - memcpy(gid, dev_addr->dst_dev_addr + 4, sizeof *gid); + memcpy(dev_addr->src_dev_addr + rdma_addr_gid_offset(dev_addr), gid, sizeof *gid); } -static inline void ib_addr_set_dgid(struct rdma_dev_addr *dev_addr, - union ib_gid *gid) +static inline void rdma_addr_get_dgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid) { - memcpy(dev_addr->dst_dev_addr + 4, gid, sizeof *gid); + memcpy(gid, dev_addr->dst_dev_addr + rdma_addr_gid_offset(dev_addr), sizeof *gid); } -static inline void iw_addr_get_sgid(struct rdma_dev_addr *dev_addr, - union ib_gid *gid) -{ - memcpy(gid, dev_addr->src_dev_addr, sizeof *gid); -} - -static inline void iw_addr_get_dgid(struct rdma_dev_addr *dev_addr, - union ib_gid *gid) +static inline void rdma_addr_set_dgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid) { - memcpy(gid, dev_addr->dst_dev_addr, sizeof *gid); + memcpy(dev_addr->dst_dev_addr + rdma_addr_gid_offset(dev_addr), gid, sizeof *gid); } #endif /* IB_ADDR_H */ diff --git a/net/rds/ib.c b/net/rds/ib.c index 536ebe5d3f6b..3b8992361042 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -182,8 +182,8 @@ static int rds_ib_conn_info_visitor(struct rds_connection *conn, ic = conn->c_transport_data; dev_addr = &ic->i_cm_id->route.addr.dev_addr; - ib_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid); - ib_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid); + rdma_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid); + rdma_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid); rds_ibdev = ib_get_client_data(ic->i_cm_id->device, &rds_ib_client); iinfo->max_send_wr = ic->i_send_ring.w_nr; diff --git a/net/rds/iw.c b/net/rds/iw.c index db224f7c2937..b28fa8525b24 100644 --- a/net/rds/iw.c +++ b/net/rds/iw.c @@ -184,8 +184,8 @@ static int rds_iw_conn_info_visitor(struct rds_connection *conn, ic = conn->c_transport_data; dev_addr = &ic->i_cm_id->route.addr.dev_addr; - ib_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid); - ib_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid); + rdma_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid); + rdma_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid); rds_iwdev = ib_get_client_data(ic->i_cm_id->device, &rds_iw_client); iinfo->max_send_wr = ic->i_send_ring.w_nr; -- cgit v1.2.3 From 3c86aa70bf677a31b71c8292e349242e26cbc743 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Wed, 13 Oct 2010 21:26:51 +0200 Subject: RDMA/cm: Add RDMA CM support for IBoE devices Add support for IBoE device binding and IP --> GID resolution. Path resolving and multicast joining are implemented within cma.c by filling in the responses and running callbacks in the CMA work queue. IP --> GID resolution always yields IPv6 link local addresses; remote GIDs are derived from the destination MAC address of the remote port. Multicast GIDs are always mapped to multicast MACs as is done in IPv6. (IPv4 multicast is enabled by translating IPv4 multicast addresses to IPv6 multicast as described in .) Some helper functions are added to ib_addr.h. Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- drivers/infiniband/core/cma.c | 309 ++++++++++++++++++++++++++++++++++--- drivers/infiniband/core/sa_query.c | 5 +- drivers/infiniband/core/ucma.c | 45 +++++- include/rdma/ib_addr.h | 99 +++++++++++- 4 files changed, 431 insertions(+), 27 deletions(-) (limited to 'include/rdma/ib_addr.h') diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index b930b8110a63..f61bc0738488 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -59,6 +59,7 @@ MODULE_LICENSE("Dual BSD/GPL"); #define CMA_CM_RESPONSE_TIMEOUT 20 #define CMA_MAX_CM_RETRIES 15 #define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24) +#define CMA_IBOE_PACKET_LIFETIME 18 static void cma_add_one(struct ib_device *device); static void cma_remove_one(struct ib_device *device); @@ -157,6 +158,7 @@ struct cma_multicast { struct list_head list; void *context; struct sockaddr_storage addr; + struct kref mcref; }; struct cma_work { @@ -173,6 +175,12 @@ struct cma_ndev_work { struct rdma_cm_event event; }; +struct iboe_mcast_work { + struct work_struct work; + struct rdma_id_private *id; + struct cma_multicast *mc; +}; + union cma_ip_addr { struct in6_addr ip6; struct { @@ -281,6 +289,8 @@ static void cma_attach_to_dev(struct rdma_id_private *id_priv, atomic_inc(&cma_dev->refcount); id_priv->cma_dev = cma_dev; id_priv->id.device = cma_dev->device; + id_priv->id.route.addr.dev_addr.transport = + rdma_node_get_transport(cma_dev->device->node_type); list_add_tail(&id_priv->list, &cma_dev->id_list); } @@ -290,6 +300,14 @@ static inline void cma_deref_dev(struct cma_device *cma_dev) complete(&cma_dev->comp); } +static inline void release_mc(struct kref *kref) +{ + struct cma_multicast *mc = container_of(kref, struct cma_multicast, mcref); + + kfree(mc->multicast.ib); + kfree(mc); +} + static void cma_detach_from_dev(struct rdma_id_private *id_priv) { list_del(&id_priv->list); @@ -323,22 +341,63 @@ static int cma_set_qkey(struct rdma_id_private *id_priv) return ret; } +static int find_gid_port(struct ib_device *device, union ib_gid *gid, u8 port_num) +{ + int i; + int err; + struct ib_port_attr props; + union ib_gid tmp; + + err = ib_query_port(device, port_num, &props); + if (err) + return 1; + + for (i = 0; i < props.gid_tbl_len; ++i) { + err = ib_query_gid(device, port_num, i, &tmp); + if (err) + return 1; + if (!memcmp(&tmp, gid, sizeof tmp)) + return 0; + } + + return -EAGAIN; +} + static int cma_acquire_dev(struct rdma_id_private *id_priv) { struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; struct cma_device *cma_dev; - union ib_gid gid; + union ib_gid gid, iboe_gid; int ret = -ENODEV; + u8 port; + enum rdma_link_layer dev_ll = dev_addr->dev_type == ARPHRD_INFINIBAND ? + IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET; - rdma_addr_get_sgid(dev_addr, &gid); + iboe_addr_get_sgid(dev_addr, &iboe_gid); + memcpy(&gid, dev_addr->src_dev_addr + + rdma_addr_gid_offset(dev_addr), sizeof gid); list_for_each_entry(cma_dev, &dev_list, list) { - ret = ib_find_cached_gid(cma_dev->device, &gid, - &id_priv->id.port_num, NULL); - if (!ret) { - cma_attach_to_dev(id_priv, cma_dev); - break; + for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port) { + if (rdma_port_get_link_layer(cma_dev->device, port) == dev_ll) { + if (rdma_node_get_transport(cma_dev->device->node_type) == RDMA_TRANSPORT_IB && + rdma_port_get_link_layer(cma_dev->device, port) == IB_LINK_LAYER_ETHERNET) + ret = find_gid_port(cma_dev->device, &iboe_gid, port); + else + ret = find_gid_port(cma_dev->device, &gid, port); + + if (!ret) { + id_priv->id.port_num = port; + goto out; + } else if (ret == 1) + break; + } } } + +out: + if (!ret) + cma_attach_to_dev(id_priv, cma_dev); + return ret; } @@ -556,10 +615,16 @@ static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv, { struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; int ret; + u16 pkey; + + if (rdma_port_get_link_layer(id_priv->id.device, id_priv->id.port_num) == + IB_LINK_LAYER_INFINIBAND) + pkey = ib_addr_get_pkey(dev_addr); + else + pkey = 0xffff; ret = ib_find_cached_pkey(id_priv->id.device, id_priv->id.port_num, - ib_addr_get_pkey(dev_addr), - &qp_attr->pkey_index); + pkey, &qp_attr->pkey_index); if (ret) return ret; @@ -737,8 +802,8 @@ static inline int cma_user_data_offset(enum rdma_port_space ps) static void cma_cancel_route(struct rdma_id_private *id_priv) { - switch (rdma_node_get_transport(id_priv->id.device->node_type)) { - case RDMA_TRANSPORT_IB: + switch (rdma_port_get_link_layer(id_priv->id.device, id_priv->id.port_num)) { + case IB_LINK_LAYER_INFINIBAND: if (id_priv->query) ib_sa_cancel_query(id_priv->query_id, id_priv->query); break; @@ -816,8 +881,17 @@ static void cma_leave_mc_groups(struct rdma_id_private *id_priv) mc = container_of(id_priv->mc_list.next, struct cma_multicast, list); list_del(&mc->list); - ib_sa_free_multicast(mc->multicast.ib); - kfree(mc); + switch (rdma_port_get_link_layer(id_priv->cma_dev->device, id_priv->id.port_num)) { + case IB_LINK_LAYER_INFINIBAND: + ib_sa_free_multicast(mc->multicast.ib); + kfree(mc); + break; + case IB_LINK_LAYER_ETHERNET: + kref_put(&mc->mcref, release_mc); + break; + default: + break; + } } } @@ -833,7 +907,7 @@ void rdma_destroy_id(struct rdma_cm_id *id) mutex_lock(&lock); if (id_priv->cma_dev) { mutex_unlock(&lock); - switch (rdma_node_get_transport(id->device->node_type)) { + switch (rdma_node_get_transport(id_priv->id.device->node_type)) { case RDMA_TRANSPORT_IB: if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib)) ib_destroy_cm_id(id_priv->cm_id.ib); @@ -1708,6 +1782,77 @@ static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms) return 0; } +static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) +{ + struct rdma_route *route = &id_priv->id.route; + struct rdma_addr *addr = &route->addr; + struct cma_work *work; + int ret; + struct sockaddr_in *src_addr = (struct sockaddr_in *)&route->addr.src_addr; + struct sockaddr_in *dst_addr = (struct sockaddr_in *)&route->addr.dst_addr; + struct net_device *ndev = NULL; + + if (src_addr->sin_family != dst_addr->sin_family) + return -EINVAL; + + work = kzalloc(sizeof *work, GFP_KERNEL); + if (!work) + return -ENOMEM; + + work->id = id_priv; + INIT_WORK(&work->work, cma_work_handler); + + route->path_rec = kzalloc(sizeof *route->path_rec, GFP_KERNEL); + if (!route->path_rec) { + ret = -ENOMEM; + goto err1; + } + + route->num_paths = 1; + + iboe_mac_to_ll(&route->path_rec->sgid, addr->dev_addr.src_dev_addr); + iboe_mac_to_ll(&route->path_rec->dgid, addr->dev_addr.dst_dev_addr); + + route->path_rec->hop_limit = 1; + route->path_rec->reversible = 1; + route->path_rec->pkey = cpu_to_be16(0xffff); + route->path_rec->mtu_selector = IB_SA_EQ; + + if (addr->dev_addr.bound_dev_if) + ndev = dev_get_by_index(&init_net, addr->dev_addr.bound_dev_if); + if (!ndev) { + ret = -ENODEV; + goto err2; + } + + route->path_rec->mtu = iboe_get_mtu(ndev->mtu); + route->path_rec->rate_selector = IB_SA_EQ; + route->path_rec->rate = iboe_get_rate(ndev); + dev_put(ndev); + route->path_rec->packet_life_time_selector = IB_SA_EQ; + route->path_rec->packet_life_time = CMA_IBOE_PACKET_LIFETIME; + if (!route->path_rec->mtu) { + ret = -EINVAL; + goto err2; + } + + work->old_state = CMA_ROUTE_QUERY; + work->new_state = CMA_ROUTE_RESOLVED; + work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED; + work->event.status = 0; + + queue_work(cma_wq, &work->work); + + return 0; + +err2: + kfree(route->path_rec); + route->path_rec = NULL; +err1: + kfree(work); + return ret; +} + int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms) { struct rdma_id_private *id_priv; @@ -1720,7 +1865,16 @@ int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms) atomic_inc(&id_priv->refcount); switch (rdma_node_get_transport(id->device->node_type)) { case RDMA_TRANSPORT_IB: - ret = cma_resolve_ib_route(id_priv, timeout_ms); + switch (rdma_port_get_link_layer(id->device, id->port_num)) { + case IB_LINK_LAYER_INFINIBAND: + ret = cma_resolve_ib_route(id_priv, timeout_ms); + break; + case IB_LINK_LAYER_ETHERNET: + ret = cma_resolve_iboe_route(id_priv); + break; + default: + ret = -ENOSYS; + } break; case RDMA_TRANSPORT_IWARP: ret = cma_resolve_iw_route(id_priv, timeout_ms); @@ -1773,7 +1927,7 @@ port_found: goto out; id_priv->id.route.addr.dev_addr.dev_type = - (rdma_node_get_transport(cma_dev->device->node_type) == RDMA_TRANSPORT_IB) ? + (rdma_port_get_link_layer(cma_dev->device, p) == IB_LINK_LAYER_INFINIBAND) ? ARPHRD_INFINIBAND : ARPHRD_ETHER; rdma_addr_set_sgid(&id_priv->id.route.addr.dev_addr, &gid); @@ -2758,6 +2912,102 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv, return 0; } +static void iboe_mcast_work_handler(struct work_struct *work) +{ + struct iboe_mcast_work *mw = container_of(work, struct iboe_mcast_work, work); + struct cma_multicast *mc = mw->mc; + struct ib_sa_multicast *m = mc->multicast.ib; + + mc->multicast.ib->context = mc; + cma_ib_mc_handler(0, m); + kref_put(&mc->mcref, release_mc); + kfree(mw); +} + +static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid) +{ + struct sockaddr_in *sin = (struct sockaddr_in *)addr; + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr; + + if (cma_any_addr(addr)) { + memset(mgid, 0, sizeof *mgid); + } else if (addr->sa_family == AF_INET6) { + memcpy(mgid, &sin6->sin6_addr, sizeof *mgid); + } else { + mgid->raw[0] = 0xff; + mgid->raw[1] = 0x0e; + mgid->raw[2] = 0; + mgid->raw[3] = 0; + mgid->raw[4] = 0; + mgid->raw[5] = 0; + mgid->raw[6] = 0; + mgid->raw[7] = 0; + mgid->raw[8] = 0; + mgid->raw[9] = 0; + mgid->raw[10] = 0xff; + mgid->raw[11] = 0xff; + *(__be32 *)(&mgid->raw[12]) = sin->sin_addr.s_addr; + } +} + +static int cma_iboe_join_multicast(struct rdma_id_private *id_priv, + struct cma_multicast *mc) +{ + struct iboe_mcast_work *work; + struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; + int err; + struct sockaddr *addr = (struct sockaddr *)&mc->addr; + struct net_device *ndev = NULL; + + if (cma_zero_addr((struct sockaddr *)&mc->addr)) + return -EINVAL; + + work = kzalloc(sizeof *work, GFP_KERNEL); + if (!work) + return -ENOMEM; + + mc->multicast.ib = kzalloc(sizeof(struct ib_sa_multicast), GFP_KERNEL); + if (!mc->multicast.ib) { + err = -ENOMEM; + goto out1; + } + + cma_iboe_set_mgid(addr, &mc->multicast.ib->rec.mgid); + + mc->multicast.ib->rec.pkey = cpu_to_be16(0xffff); + if (id_priv->id.ps == RDMA_PS_UDP) + mc->multicast.ib->rec.qkey = cpu_to_be32(RDMA_UDP_QKEY); + + if (dev_addr->bound_dev_if) + ndev = dev_get_by_index(&init_net, dev_addr->bound_dev_if); + if (!ndev) { + err = -ENODEV; + goto out2; + } + mc->multicast.ib->rec.rate = iboe_get_rate(ndev); + mc->multicast.ib->rec.hop_limit = 1; + mc->multicast.ib->rec.mtu = iboe_get_mtu(ndev->mtu); + dev_put(ndev); + if (!mc->multicast.ib->rec.mtu) { + err = -EINVAL; + goto out2; + } + iboe_addr_get_sgid(dev_addr, &mc->multicast.ib->rec.port_gid); + work->id = id_priv; + work->mc = mc; + INIT_WORK(&work->work, iboe_mcast_work_handler); + kref_get(&mc->mcref); + queue_work(cma_wq, &work->work); + + return 0; + +out2: + kfree(mc->multicast.ib); +out1: + kfree(work); + return err; +} + int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr, void *context) { @@ -2784,7 +3034,17 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr, switch (rdma_node_get_transport(id->device->node_type)) { case RDMA_TRANSPORT_IB: - ret = cma_join_ib_multicast(id_priv, mc); + switch (rdma_port_get_link_layer(id->device, id->port_num)) { + case IB_LINK_LAYER_INFINIBAND: + ret = cma_join_ib_multicast(id_priv, mc); + break; + case IB_LINK_LAYER_ETHERNET: + kref_init(&mc->mcref); + ret = cma_iboe_join_multicast(id_priv, mc); + break; + default: + ret = -EINVAL; + } break; default: ret = -ENOSYS; @@ -2817,8 +3077,19 @@ void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr) ib_detach_mcast(id->qp, &mc->multicast.ib->rec.mgid, mc->multicast.ib->rec.mlid); - ib_sa_free_multicast(mc->multicast.ib); - kfree(mc); + if (rdma_node_get_transport(id_priv->cma_dev->device->node_type) == RDMA_TRANSPORT_IB) { + switch (rdma_port_get_link_layer(id->device, id->port_num)) { + case IB_LINK_LAYER_INFINIBAND: + ib_sa_free_multicast(mc->multicast.ib); + kfree(mc); + break; + case IB_LINK_LAYER_ETHERNET: + kref_put(&mc->mcref, release_mc); + break; + default: + break; + } + } return; } } diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 27674c790a73..91a660310b7c 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -496,6 +496,7 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num, { int ret; u16 gid_index; + int force_grh; memset(ah_attr, 0, sizeof *ah_attr); ah_attr->dlid = be16_to_cpu(rec->dlid); @@ -505,7 +506,9 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num, ah_attr->port_num = port_num; ah_attr->static_rate = rec->rate; - if (rec->hop_limit > 1) { + force_grh = rdma_port_get_link_layer(device, port_num) == IB_LINK_LAYER_ETHERNET; + + if (rec->hop_limit > 1 || force_grh) { ah_attr->ah_flags = IB_AH_GRH; ah_attr->grh.dgid = rec->dgid; diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index ac7edc24165c..3d3c9264c450 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -583,6 +583,34 @@ static void ucma_copy_ib_route(struct rdma_ucm_query_route_resp *resp, } } +static void ucma_copy_iboe_route(struct rdma_ucm_query_route_resp *resp, + struct rdma_route *route) +{ + struct rdma_dev_addr *dev_addr; + + resp->num_paths = route->num_paths; + switch (route->num_paths) { + case 0: + dev_addr = &route->addr.dev_addr; + iboe_mac_to_ll((union ib_gid *) &resp->ib_route[0].dgid, + dev_addr->dst_dev_addr); + iboe_addr_get_sgid(dev_addr, + (union ib_gid *) &resp->ib_route[0].sgid); + resp->ib_route[0].pkey = cpu_to_be16(0xffff); + break; + case 2: + ib_copy_path_rec_to_user(&resp->ib_route[1], + &route->path_rec[1]); + /* fall through */ + case 1: + ib_copy_path_rec_to_user(&resp->ib_route[0], + &route->path_rec[0]); + break; + default: + break; + } +} + static ssize_t ucma_query_route(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) @@ -617,12 +645,17 @@ static ssize_t ucma_query_route(struct ucma_file *file, resp.node_guid = (__force __u64) ctx->cm_id->device->node_guid; resp.port_num = ctx->cm_id->port_num; - switch (rdma_node_get_transport(ctx->cm_id->device->node_type)) { - case RDMA_TRANSPORT_IB: - ucma_copy_ib_route(&resp, &ctx->cm_id->route); - break; - default: - break; + if (rdma_node_get_transport(ctx->cm_id->device->node_type) == RDMA_TRANSPORT_IB) { + switch (rdma_port_get_link_layer(ctx->cm_id->device, ctx->cm_id->port_num)) { + case IB_LINK_LAYER_INFINIBAND: + ucma_copy_ib_route(&resp, &ctx->cm_id->route); + break; + case IB_LINK_LAYER_ETHERNET: + ucma_copy_iboe_route(&resp, &ctx->cm_id->route); + break; + default: + break; + } } out: diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h index fa0d52b8e622..904ffa92fc93 100644 --- a/include/rdma/ib_addr.h +++ b/include/rdma/ib_addr.h @@ -40,6 +40,7 @@ #include #include #include +#include struct rdma_addr_client { atomic_t refcount; @@ -63,6 +64,7 @@ struct rdma_dev_addr { unsigned char broadcast[MAX_ADDR_LEN]; unsigned short dev_type; int bound_dev_if; + enum rdma_transport_type transport; }; /** @@ -127,9 +129,31 @@ static inline int rdma_addr_gid_offset(struct rdma_dev_addr *dev_addr) return dev_addr->dev_type == ARPHRD_INFINIBAND ? 4 : 0; } +static inline void iboe_mac_to_ll(union ib_gid *gid, u8 *mac) +{ + memset(gid->raw, 0, 16); + *((__be32 *) gid->raw) = cpu_to_be32(0xfe800000); + gid->raw[12] = 0xfe; + gid->raw[11] = 0xff; + memcpy(gid->raw + 13, mac + 3, 3); + memcpy(gid->raw + 8, mac, 3); + gid->raw[8] ^= 2; +} + +static inline void iboe_addr_get_sgid(struct rdma_dev_addr *dev_addr, + union ib_gid *gid) +{ + iboe_mac_to_ll(gid, dev_addr->src_dev_addr); +} + static inline void rdma_addr_get_sgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid) { - memcpy(gid, dev_addr->src_dev_addr + rdma_addr_gid_offset(dev_addr), sizeof *gid); + if (dev_addr->transport == RDMA_TRANSPORT_IB && + dev_addr->dev_type != ARPHRD_INFINIBAND) + iboe_addr_get_sgid(dev_addr, gid); + else + memcpy(gid, dev_addr->src_dev_addr + + rdma_addr_gid_offset(dev_addr), sizeof *gid); } static inline void rdma_addr_set_sgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid) @@ -147,4 +171,77 @@ static inline void rdma_addr_set_dgid(struct rdma_dev_addr *dev_addr, union ib_g memcpy(dev_addr->dst_dev_addr + rdma_addr_gid_offset(dev_addr), gid, sizeof *gid); } +static inline enum ib_mtu iboe_get_mtu(int mtu) +{ + /* + * reduce IB headers from effective IBoE MTU. 28 stands for + * atomic header which is the biggest possible header after BTH + */ + mtu = mtu - IB_GRH_BYTES - IB_BTH_BYTES - 28; + + if (mtu >= ib_mtu_enum_to_int(IB_MTU_4096)) + return IB_MTU_4096; + else if (mtu >= ib_mtu_enum_to_int(IB_MTU_2048)) + return IB_MTU_2048; + else if (mtu >= ib_mtu_enum_to_int(IB_MTU_1024)) + return IB_MTU_1024; + else if (mtu >= ib_mtu_enum_to_int(IB_MTU_512)) + return IB_MTU_512; + else if (mtu >= ib_mtu_enum_to_int(IB_MTU_256)) + return IB_MTU_256; + else + return 0; +} + +static inline int iboe_get_rate(struct net_device *dev) +{ + struct ethtool_cmd cmd; + + if (!dev->ethtool_ops || !dev->ethtool_ops->get_settings || + dev->ethtool_ops->get_settings(dev, &cmd)) + return IB_RATE_PORT_CURRENT; + + if (cmd.speed >= 40000) + return IB_RATE_40_GBPS; + else if (cmd.speed >= 30000) + return IB_RATE_30_GBPS; + else if (cmd.speed >= 20000) + return IB_RATE_20_GBPS; + else if (cmd.speed >= 10000) + return IB_RATE_10_GBPS; + else + return IB_RATE_PORT_CURRENT; +} + +static inline int rdma_link_local_addr(struct in6_addr *addr) +{ + if (addr->s6_addr32[0] == htonl(0xfe800000) && + addr->s6_addr32[1] == 0) + return 1; + + return 0; +} + +static inline void rdma_get_ll_mac(struct in6_addr *addr, u8 *mac) +{ + memcpy(mac, &addr->s6_addr[8], 3); + memcpy(mac + 3, &addr->s6_addr[13], 3); + mac[0] ^= 2; +} + +static inline int rdma_is_multicast_addr(struct in6_addr *addr) +{ + return addr->s6_addr[0] == 0xff; +} + +static inline void rdma_get_mcast_mac(struct in6_addr *addr, u8 *mac) +{ + int i; + + mac[0] = 0x33; + mac[1] = 0x33; + for (i = 2; i < 6; ++i) + mac[i] = addr->s6_addr[i + 10]; +} + #endif /* IB_ADDR_H */ -- cgit v1.2.3 From af7bd463761c6abd8ca8d831f9cc0ac19f3b7d4b Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Thu, 26 Aug 2010 17:18:59 +0300 Subject: IB/core: Add VLAN support for IBoE Add 802.1q VLAN support to IBoE. The VLAN tag is encoded within the GID derived from a link local address in the following way: GID[11] GID[12] contain the VLAN ID when the GID contains a VLAN. The 3 bits user priority field of the packets are identical to the 3 bits of the SL. In case of rdma_cm apps, the TOS field is used to generate the SL field by doing a shift right of 5 bits effectively taking to 3 MS bits of the TOS field. Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- drivers/infiniband/core/cma.c | 20 +++++++++------- drivers/infiniband/core/ucma.c | 12 ++++++++-- drivers/infiniband/core/ud_header.c | 23 ++++++++++++++++++ drivers/infiniband/hw/mlx4/qp.c | 2 +- drivers/infiniband/hw/mthca/mthca_qp.c | 2 +- include/rdma/ib_addr.h | 43 ++++++++++++++++++++++++++++++---- include/rdma/ib_pack.h | 9 +++++++ 7 files changed, 95 insertions(+), 16 deletions(-) (limited to 'include/rdma/ib_addr.h') diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index f61bc0738488..6884da24fde1 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -1791,6 +1791,7 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) struct sockaddr_in *src_addr = (struct sockaddr_in *)&route->addr.src_addr; struct sockaddr_in *dst_addr = (struct sockaddr_in *)&route->addr.dst_addr; struct net_device *ndev = NULL; + u16 vid; if (src_addr->sin_family != dst_addr->sin_family) return -EINVAL; @@ -1810,14 +1811,6 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) route->num_paths = 1; - iboe_mac_to_ll(&route->path_rec->sgid, addr->dev_addr.src_dev_addr); - iboe_mac_to_ll(&route->path_rec->dgid, addr->dev_addr.dst_dev_addr); - - route->path_rec->hop_limit = 1; - route->path_rec->reversible = 1; - route->path_rec->pkey = cpu_to_be16(0xffff); - route->path_rec->mtu_selector = IB_SA_EQ; - if (addr->dev_addr.bound_dev_if) ndev = dev_get_by_index(&init_net, addr->dev_addr.bound_dev_if); if (!ndev) { @@ -1825,6 +1818,17 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) goto err2; } + vid = rdma_vlan_dev_vlan_id(ndev); + + iboe_mac_vlan_to_ll(&route->path_rec->sgid, addr->dev_addr.src_dev_addr, vid); + iboe_mac_vlan_to_ll(&route->path_rec->dgid, addr->dev_addr.dst_dev_addr, vid); + + route->path_rec->hop_limit = 1; + route->path_rec->reversible = 1; + route->path_rec->pkey = cpu_to_be16(0xffff); + route->path_rec->mtu_selector = IB_SA_EQ; + route->path_rec->sl = id_priv->tos >> 5; + route->path_rec->mtu = iboe_get_mtu(ndev->mtu); route->path_rec->rate_selector = IB_SA_EQ; route->path_rec->rate = iboe_get_rate(ndev); diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 3d3c9264c450..357a766bd220 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -587,13 +587,21 @@ static void ucma_copy_iboe_route(struct rdma_ucm_query_route_resp *resp, struct rdma_route *route) { struct rdma_dev_addr *dev_addr; + struct net_device *dev; + u16 vid = 0; resp->num_paths = route->num_paths; switch (route->num_paths) { case 0: dev_addr = &route->addr.dev_addr; - iboe_mac_to_ll((union ib_gid *) &resp->ib_route[0].dgid, - dev_addr->dst_dev_addr); + dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if); + if (dev) { + vid = rdma_vlan_dev_vlan_id(dev); + dev_put(dev); + } + + iboe_mac_vlan_to_ll((union ib_gid *) &resp->ib_route[0].dgid, + dev_addr->dst_dev_addr, vid); iboe_addr_get_sgid(dev_addr, (union ib_gid *) &resp->ib_route[0].sgid); resp->ib_route[0].pkey = cpu_to_be16(0xffff); diff --git a/drivers/infiniband/core/ud_header.c b/drivers/infiniband/core/ud_header.c index cb0dd5ae2777..bb7e19280821 100644 --- a/drivers/infiniband/core/ud_header.c +++ b/drivers/infiniband/core/ud_header.c @@ -33,6 +33,7 @@ #include #include +#include #include @@ -103,6 +104,17 @@ static const struct ib_field eth_table[] = { .size_bits = 16 } }; +static const struct ib_field vlan_table[] = { + { STRUCT_FIELD(vlan, tag), + .offset_words = 0, + .offset_bits = 0, + .size_bits = 16 }, + { STRUCT_FIELD(vlan, type), + .offset_words = 0, + .offset_bits = 16, + .size_bits = 16 } +}; + static const struct ib_field grh_table[] = { { STRUCT_FIELD(grh, ip_version), .offset_words = 0, @@ -205,6 +217,7 @@ static const struct ib_field deth_table[] = { * @payload_bytes:Length of packet payload * @lrh_present: specify if LRH is present * @eth_present: specify if Eth header is present + * @vlan_present: packet is tagged vlan * @grh_present:GRH flag (if non-zero, GRH will be included) * @immediate_present: specify if immediate data is present * @header:Structure to initialize @@ -212,6 +225,7 @@ static const struct ib_field deth_table[] = { void ib_ud_header_init(int payload_bytes, int lrh_present, int eth_present, + int vlan_present, int grh_present, int immediate_present, struct ib_ud_header *header) @@ -234,6 +248,9 @@ void ib_ud_header_init(int payload_bytes, header->lrh.packet_length = cpu_to_be16(packet_length); } + if (vlan_present) + header->eth.type = cpu_to_be16(ETH_P_8021Q); + if (grh_present) { header->grh.ip_version = 6; header->grh.payload_length = @@ -254,6 +271,7 @@ void ib_ud_header_init(int payload_bytes, header->lrh_present = lrh_present; header->eth_present = eth_present; + header->vlan_present = vlan_present; header->grh_present = grh_present; header->immediate_present = immediate_present; } @@ -312,6 +330,11 @@ int ib_ud_header_pack(struct ib_ud_header *header, &header->eth, buf + len); len += IB_ETH_BYTES; } + if (header->vlan_present) { + ib_pack(vlan_table, ARRAY_SIZE(vlan_table), + &header->vlan, buf + len); + len += IB_VLAN_BYTES; + } if (header->grh_present) { ib_pack(grh_table, ARRAY_SIZE(grh_table), &header->grh, buf + len); diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 17f60fe6e5b6..269648445113 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1291,7 +1291,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, is_eth = rdma_port_get_link_layer(sqp->qp.ibqp.device, sqp->qp.port) == IB_LINK_LAYER_ETHERNET; is_grh = mlx4_ib_ah_grh_present(ah); - ib_ud_header_init(send_size, !is_eth, is_eth, is_grh, 0, &sqp->ud_header); + ib_ud_header_init(send_size, !is_eth, is_eth, 0, is_grh, 0, &sqp->ud_header); if (!is_eth) { sqp->ud_header.lrh.service_level = diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index 1a1c55fb13f3..a34c9d38e822 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -1493,7 +1493,7 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp, int err; u16 pkey; - ib_ud_header_init(256, /* assume a MAD */ 1, 0, + ib_ud_header_init(256, /* assume a MAD */ 1, 0, 0, mthca_ah_grh_present(to_mah(wr->wr.ud.ah)), 0, &sqp->ud_header); diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h index 904ffa92fc93..b5fc9f39122b 100644 --- a/include/rdma/ib_addr.h +++ b/include/rdma/ib_addr.h @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -129,21 +130,41 @@ static inline int rdma_addr_gid_offset(struct rdma_dev_addr *dev_addr) return dev_addr->dev_type == ARPHRD_INFINIBAND ? 4 : 0; } -static inline void iboe_mac_to_ll(union ib_gid *gid, u8 *mac) +static inline void iboe_mac_vlan_to_ll(union ib_gid *gid, u8 *mac, u16 vid) { memset(gid->raw, 0, 16); *((__be32 *) gid->raw) = cpu_to_be32(0xfe800000); - gid->raw[12] = 0xfe; - gid->raw[11] = 0xff; + if (vid < 0x1000) { + gid->raw[12] = vid & 0xff; + gid->raw[11] = vid >> 8; + } else { + gid->raw[12] = 0xfe; + gid->raw[11] = 0xff; + } memcpy(gid->raw + 13, mac + 3, 3); memcpy(gid->raw + 8, mac, 3); gid->raw[8] ^= 2; } +static inline u16 rdma_vlan_dev_vlan_id(const struct net_device *dev) +{ + return dev->priv_flags & IFF_802_1Q_VLAN ? + vlan_dev_vlan_id(dev) : 0xffff; +} + static inline void iboe_addr_get_sgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid) { - iboe_mac_to_ll(gid, dev_addr->src_dev_addr); + struct net_device *dev; + u16 vid = 0xffff; + + dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if); + if (dev) { + vid = rdma_vlan_dev_vlan_id(dev); + dev_put(dev); + } + + iboe_mac_vlan_to_ll(gid, dev_addr->src_dev_addr, vid); } static inline void rdma_addr_get_sgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid) @@ -244,4 +265,18 @@ static inline void rdma_get_mcast_mac(struct in6_addr *addr, u8 *mac) mac[i] = addr->s6_addr[i + 10]; } +static inline u16 rdma_get_vlan_id(union ib_gid *dgid) +{ + u16 vid; + + vid = dgid->raw[11] << 8 | dgid->raw[12]; + return vid < 0x1000 ? vid : 0xffff; +} + +static inline struct net_device *rdma_vlan_dev_real_dev(const struct net_device *dev) +{ + return dev->priv_flags & IFF_802_1Q_VLAN ? + vlan_dev_real_dev(dev) : 0; +} + #endif /* IB_ADDR_H */ diff --git a/include/rdma/ib_pack.h b/include/rdma/ib_pack.h index 6b91d8e7a1fa..b37fe3b10a9d 100644 --- a/include/rdma/ib_pack.h +++ b/include/rdma/ib_pack.h @@ -38,6 +38,7 @@ enum { IB_LRH_BYTES = 8, IB_ETH_BYTES = 14, + IB_VLAN_BYTES = 4, IB_GRH_BYTES = 40, IB_BTH_BYTES = 12, IB_DETH_BYTES = 8 @@ -219,11 +220,18 @@ struct ib_unpacked_eth { __be16 type; }; +struct ib_unpacked_vlan { + __be16 tag; + __be16 type; +}; + struct ib_ud_header { int lrh_present; struct ib_unpacked_lrh lrh; int eth_present; struct ib_unpacked_eth eth; + int vlan_present; + struct ib_unpacked_vlan vlan; int grh_present; struct ib_unpacked_grh grh; struct ib_unpacked_bth bth; @@ -245,6 +253,7 @@ void ib_unpack(const struct ib_field *desc, void ib_ud_header_init(int payload_bytes, int lrh_present, int eth_present, + int vlan_present, int grh_present, int immediate_present, struct ib_ud_header *header); -- cgit v1.2.3