diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-08-16 00:04:25 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-08-16 00:04:25 +0200 |
commit | 9a76aba02a37718242d7cdc294f0a3901928aa57 (patch) | |
tree | 2040d038f85d2120f21af83b0793efd5af1864e3 /net/smc/smc_pnet.c | |
parent | x86: i8259: Add missing include file (diff) | |
parent | bpf: test: fix spelling mistake "REUSEEPORT" -> "REUSEPORT" (diff) | |
download | linux-9a76aba02a37718242d7cdc294f0a3901928aa57.tar.xz linux-9a76aba02a37718242d7cdc294f0a3901928aa57.zip |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller:
"Highlights:
- Gustavo A. R. Silva keeps working on the implicit switch fallthru
changes.
- Support 802.11ax High-Efficiency wireless in cfg80211 et al, From
Luca Coelho.
- Re-enable ASPM in r8169, from Kai-Heng Feng.
- Add virtual XFRM interfaces, which avoids all of the limitations of
existing IPSEC tunnels. From Steffen Klassert.
- Convert GRO over to use a hash table, so that when we have many
flows active we don't traverse a long list during accumluation.
- Many new self tests for routing, TC, tunnels, etc. Too many
contributors to mention them all, but I'm really happy to keep
seeing this stuff.
- Hardware timestamping support for dpaa_eth/fsl-fman from Yangbo Lu.
- Lots of cleanups and fixes in L2TP code from Guillaume Nault.
- Add IPSEC offload support to netdevsim, from Shannon Nelson.
- Add support for slotting with non-uniform distribution to netem
packet scheduler, from Yousuk Seung.
- Add UDP GSO support to mlx5e, from Boris Pismenny.
- Support offloading of Team LAG in NFP, from John Hurley.
- Allow to configure TX queue selection based upon RX queue, from
Amritha Nambiar.
- Support ethtool ring size configuration in aquantia, from Anton
Mikaev.
- Support DSCP and flowlabel per-transport in SCTP, from Xin Long.
- Support list based batching and stack traversal of SKBs, this is
very exciting work. From Edward Cree.
- Busyloop optimizations in vhost_net, from Toshiaki Makita.
- Introduce the ETF qdisc, which allows time based transmissions. IGB
can offload this in hardware. From Vinicius Costa Gomes.
- Add parameter support to devlink, from Moshe Shemesh.
- Several multiplication and division optimizations for BPF JIT in
nfp driver, from Jiong Wang.
- Lots of prepatory work to make more of the packet scheduler layer
lockless, when possible, from Vlad Buslov.
- Add ACK filter and NAT awareness to sch_cake packet scheduler, from
Toke Høiland-Jørgensen.
- Support regions and region snapshots in devlink, from Alex Vesker.
- Allow to attach XDP programs to both HW and SW at the same time on
a given device, with initial support in nfp. From Jakub Kicinski.
- Add TLS RX offload and support in mlx5, from Ilya Lesokhin.
- Use PHYLIB in r8169 driver, from Heiner Kallweit.
- All sorts of changes to support Spectrum 2 in mlxsw driver, from
Ido Schimmel.
- PTP support in mv88e6xxx DSA driver, from Andrew Lunn.
- Make TCP_USER_TIMEOUT socket option more accurate, from Jon
Maxwell.
- Support for templates in packet scheduler classifier, from Jiri
Pirko.
- IPV6 support in RDS, from Ka-Cheong Poon.
- Native tproxy support in nf_tables, from Máté Eckl.
- Maintain IP fragment queue in an rbtree, but optimize properly for
in-order frags. From Peter Oskolkov.
- Improvde handling of ACKs on hole repairs, from Yuchung Cheng"
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1996 commits)
bpf: test: fix spelling mistake "REUSEEPORT" -> "REUSEPORT"
hv/netvsc: Fix NULL dereference at single queue mode fallback
net: filter: mark expected switch fall-through
xen-netfront: fix warn message as irq device name has '/'
cxgb4: Add new T5 PCI device ids 0x50af and 0x50b0
net: dsa: mv88e6xxx: missing unlock on error path
rds: fix building with IPV6=m
inet/connection_sock: prefer _THIS_IP_ to current_text_addr
net: dsa: mv88e6xxx: bitwise vs logical bug
net: sock_diag: Fix spectre v1 gadget in __sock_diag_cmd()
ieee802154: hwsim: using right kind of iteration
net: hns3: Add vlan filter setting by ethtool command -K
net: hns3: Set tx ring' tc info when netdev is up
net: hns3: Remove tx ring BD len register in hns3_enet
net: hns3: Fix desc num set to default when setting channel
net: hns3: Fix for phy link issue when using marvell phy driver
net: hns3: Fix for information of phydev lost problem when down/up
net: hns3: Fix for command format parsing error in hclge_is_all_function_id_zero
net: hns3: Add support for serdes loopback selftest
bnxt_en: take coredump_record structure off stack
...
Diffstat (limited to 'net/smc/smc_pnet.c')
-rw-r--r-- | net/smc/smc_pnet.c | 171 |
1 files changed, 146 insertions, 25 deletions
diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index d7b88b2d1b22..01c6ce042a1c 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -22,13 +22,12 @@ #include "smc_pnet.h" #include "smc_ib.h" - -#define SMC_MAX_PNET_ID_LEN 16 /* Max. length of PNET id */ +#include "smc_ism.h" static struct nla_policy smc_pnet_policy[SMC_PNETID_MAX + 1] = { [SMC_PNETID_NAME] = { .type = NLA_NUL_STRING, - .len = SMC_MAX_PNET_ID_LEN - 1 + .len = SMC_MAX_PNETID_LEN - 1 }, [SMC_PNETID_ETHNAME] = { .type = NLA_NUL_STRING, @@ -65,7 +64,7 @@ static struct smc_pnettable { */ struct smc_pnetentry { struct list_head list; - char pnet_name[SMC_MAX_PNET_ID_LEN + 1]; + char pnet_name[SMC_MAX_PNETID_LEN + 1]; struct net_device *ndev; struct smc_ib_device *smcibdev; u8 ib_port; @@ -209,7 +208,7 @@ static bool smc_pnetid_valid(const char *pnet_name, char *pnetid) return false; while (--end >= bf && isspace(*end)) ; - if (end - bf >= SMC_MAX_PNET_ID_LEN) + if (end - bf >= SMC_MAX_PNETID_LEN) return false; while (bf <= end) { if (!isalnum(*bf)) @@ -358,9 +357,6 @@ static int smc_pnet_add(struct sk_buff *skb, struct genl_info *info) kfree(pnetelem); return rc; } - rc = smc_ib_remember_port_attr(pnetelem->smcibdev, pnetelem->ib_port); - if (rc) - smc_pnet_remove_by_pnetid(pnetelem->pnet_name); return rc; } @@ -485,10 +481,10 @@ static int smc_pnet_netdev_event(struct notifier_block *this, case NETDEV_REBOOT: case NETDEV_UNREGISTER: smc_pnet_remove_by_ndev(event_dev); + return NOTIFY_OK; default: - break; + return NOTIFY_DONE; } - return NOTIFY_DONE; } static struct notifier_block smc_netdev_notifier = { @@ -515,28 +511,104 @@ void smc_pnet_exit(void) genl_unregister_family(&smc_pnet_nl_family); } -/* PNET table analysis for a given sock: - * determine ib_device and port belonging to used internal TCP socket - * ethernet interface. +/* Determine one base device for stacked net devices. + * If the lower device level contains more than one devices + * (for instance with bonding slaves), just the first device + * is used to reach a base device. */ -void smc_pnet_find_roce_resource(struct sock *sk, - struct smc_ib_device **smcibdev, u8 *ibport) +static struct net_device *pnet_find_base_ndev(struct net_device *ndev) { - struct dst_entry *dst = sk_dst_get(sk); - struct smc_pnetentry *pnetelem; + int i, nest_lvl; - *smcibdev = NULL; - *ibport = 0; + rtnl_lock(); + nest_lvl = dev_get_nest_level(ndev); + for (i = 0; i < nest_lvl; i++) { + struct list_head *lower = &ndev->adj_list.lower; + + if (list_empty(lower)) + break; + lower = lower->next; + ndev = netdev_lower_get_next(ndev, &lower); + } + rtnl_unlock(); + return ndev; +} + +/* Determine the corresponding IB device port based on the hardware PNETID. + * Searching stops at the first matching active IB device port with vlan_id + * configured. + */ +static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev, + struct smc_ib_device **smcibdev, + u8 *ibport, unsigned short vlan_id, + u8 gid[]) +{ + u8 ndev_pnetid[SMC_MAX_PNETID_LEN]; + struct smc_ib_device *ibdev; + int i; + + ndev = pnet_find_base_ndev(ndev); + if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port, + ndev_pnetid)) + return; /* pnetid could not be determined */ + + spin_lock(&smc_ib_devices.lock); + list_for_each_entry(ibdev, &smc_ib_devices.list, list) { + for (i = 1; i <= SMC_MAX_PORTS; i++) { + if (!rdma_is_port_valid(ibdev->ibdev, i)) + continue; + if (!memcmp(ibdev->pnetid[i - 1], ndev_pnetid, + SMC_MAX_PNETID_LEN) && + smc_ib_port_active(ibdev, i) && + !smc_ib_determine_gid(ibdev, i, vlan_id, gid, + NULL)) { + *smcibdev = ibdev; + *ibport = i; + goto out; + } + } + } +out: + spin_unlock(&smc_ib_devices.lock); +} + +static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev, + struct smcd_dev **smcismdev) +{ + u8 ndev_pnetid[SMC_MAX_PNETID_LEN]; + struct smcd_dev *ismdev; + + ndev = pnet_find_base_ndev(ndev); + if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port, + ndev_pnetid)) + return; /* pnetid could not be determined */ + + spin_lock(&smcd_dev_list.lock); + list_for_each_entry(ismdev, &smcd_dev_list.list, list) { + if (!memcmp(ismdev->pnetid, ndev_pnetid, SMC_MAX_PNETID_LEN)) { + *smcismdev = ismdev; + break; + } + } + spin_unlock(&smcd_dev_list.lock); +} + +/* Lookup of coupled ib_device via SMC pnet table */ +static void smc_pnet_find_roce_by_table(struct net_device *netdev, + struct smc_ib_device **smcibdev, + u8 *ibport, unsigned short vlan_id, + u8 gid[]) +{ + struct smc_pnetentry *pnetelem; - if (!dst) - return; - if (!dst->dev) - goto out_rel; read_lock(&smc_pnettable.lock); list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) { - if (dst->dev == pnetelem->ndev) { + if (netdev == pnetelem->ndev) { if (smc_ib_port_active(pnetelem->smcibdev, - pnetelem->ib_port)) { + pnetelem->ib_port) && + !smc_ib_determine_gid(pnetelem->smcibdev, + pnetelem->ib_port, vlan_id, + gid, NULL)) { *smcibdev = pnetelem->smcibdev; *ibport = pnetelem->ib_port; } @@ -544,6 +616,55 @@ void smc_pnet_find_roce_resource(struct sock *sk, } } read_unlock(&smc_pnettable.lock); +} + +/* PNET table analysis for a given sock: + * determine ib_device and port belonging to used internal TCP socket + * ethernet interface. + */ +void smc_pnet_find_roce_resource(struct sock *sk, + struct smc_ib_device **smcibdev, u8 *ibport, + unsigned short vlan_id, u8 gid[]) +{ + struct dst_entry *dst = sk_dst_get(sk); + + *smcibdev = NULL; + *ibport = 0; + + if (!dst) + goto out; + if (!dst->dev) + goto out_rel; + + /* if possible, lookup via hardware-defined pnetid */ + smc_pnet_find_roce_by_pnetid(dst->dev, smcibdev, ibport, vlan_id, gid); + if (*smcibdev) + goto out_rel; + + /* lookup via SMC PNET table */ + smc_pnet_find_roce_by_table(dst->dev, smcibdev, ibport, vlan_id, gid); + +out_rel: + dst_release(dst); +out: + return; +} + +void smc_pnet_find_ism_resource(struct sock *sk, struct smcd_dev **smcismdev) +{ + struct dst_entry *dst = sk_dst_get(sk); + + *smcismdev = NULL; + if (!dst) + goto out; + if (!dst->dev) + goto out_rel; + + /* if possible, lookup via hardware-defined pnetid */ + smc_pnet_find_ism_by_pnetid(dst->dev, smcismdev); + out_rel: dst_release(dst); +out: + return; } |