Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next

Pull networking updates from David Miller: 1) Support offloading wireless authentication to userspace via NL80211_CMD_EXTERNAL_AUTH, from Srinivas Dasari. 2) A lot of work on network namespace setup/teardown from Kirill Tkhai. Setup and cleanup of namespaces now all run asynchronously and thus performance is significantly increased. 3) Add rx/tx timestamping support to mv88e6xxx driver, from Brandon Streiff. 4) Support zerocopy on RDS sockets, from Sowmini Varadhan. 5) Use denser instruction encoding in x86 eBPF JIT, from Daniel Borkmann. 6) Support hw offload of vlan filtering in mvpp2 dreiver, from Maxime Chevallier. 7) Support grafting of child qdiscs in mlxsw driver, from Nogah Frankel. 8) Add packet forwarding tests to selftests, from Ido Schimmel. 9) Deal with sub-optimal GSO packets better in BBR congestion control, from Eric Dumazet. 10) Support 5-tuple hashing in ipv6 multipath routing, from David Ahern. 11) Add path MTU tests to selftests, from Stefano Brivio. 12) Various bits of IPSEC offloading support for mlx5, from Aviad Yehezkel, Yossi Kuperman, and Saeed Mahameed. 13) Support RSS spreading on ntuple filters in SFC driver, from Edward Cree. 14) Lots of sockmap work from John Fastabend. Applications can use eBPF to filter sendmsg and sendpage operations. 15) In-kernel receive TLS support, from Dave Watson. 16) Add XDP support to ixgbevf, this is significant because it should allow optimized XDP usage in various cloud environments. From Tony Nguyen. 17) Add new Intel E800 series "ice" ethernet driver, from Anirudh Venkataramanan et al. 18) IP fragmentation match offload support in nfp driver, from Pieter Jansen van Vuuren. 19) Support XDP redirect in i40e driver, from Björn Töpel. 20) Add BPF_RAW_TRACEPOINT program type for accessing the arguments of tracepoints in their raw form, from Alexei Starovoitov. 21) Lots of striding RQ improvements to mlx5 driver with many performance improvements, from Tariq Toukan. 22) Use rhashtable for inet frag reassembly, from Eric Dumazet. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1678 commits) net: mvneta: improve suspend/resume net: mvneta: split rxq/txq init and txq deinit into SW and HW parts ipv6: frags: fix /proc/sys/net/ipv6/ip6frag_low_thresh net: bgmac: Fix endian access in bgmac_dma_tx_ring_free() net: bgmac: Correctly annotate register space route: check sysctl_fib_multipath_use_neigh earlier than hash fix typo in command value in drivers/net/phy/mdio-bitbang. sky2: Increase D3 delay to sky2 stops working after suspend net/mlx5e: Set EQE based as default TX interrupt moderation mode ibmvnic: Disable irqs before exiting reset from closed state net: sched: do not emit messages while holding spinlock vlan: also check phy_driver ts_info for vlan's real device Bluetooth: Mark expected switch fall-throughs Bluetooth: Set HCI_QUIRK_SIMULTANEOUS_DISCOVERY for BTUSB_QCA_ROME Bluetooth: btrsi: remove unused including <linux/version.h> Bluetooth: hci_bcm: Remove DMI quirk for the MINIX Z83-4 sh_eth: kill useless check in __sh_eth_get_regs() sh_eth: add sh_eth_cpu_data::no_xdfar flag ipv6: factorize sk_wmem_alloc updates done by __ip6_append_data() ipv4: factorize sk_wmem_alloc updates done by __ip_append_data() ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2018-04-03 23:04:18 +0200
committer: Linus Torvalds <torvalds@linux-foundation.org> 2018-04-03 23:04:18 +0200
commit: 5bb053bef82523a8fd78d650bca81c9f114fa276 (patch)
tree: 58c2fe47f60bb69230bb05d57a6c9e3f47f7b1fe /lib
parent: Merge tag 'docs-4.17' of git://git.lwn.net/linux (diff)
parent: Merge branch 'net-mvneta-improve-suspend-resume' (diff)
download: linux-5bb053bef82523a8fd78d650bca81c9f114fa276.tar.xz
linux-5bb053bef82523a8fd78d650bca81c9f114fa276.zip
4 files changed, 179 insertions, 21 deletions
diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c
index 9fe6ec8fda28..15ea216a67ce 100644
--- a/lib/kobject_uevent.c
+++ b/lib/kobject_uevent.c
@@ -25,6 +25,7 @@
 #include <linux/uuid.h>
 #include <linux/ctype.h>
 #include <net/sock.h>
+#include <net/netlink.h>
 #include <net/net_namespace.h>
 
 
@@ -32,11 +33,13 @@ u64 uevent_seqnum;
 #ifdef CONFIG_UEVENT_HELPER
 char uevent_helper[UEVENT_HELPER_PATH_LEN] = CONFIG_UEVENT_HELPER_PATH;
 #endif
-#ifdef CONFIG_NET
+
 struct uevent_sock {
 	struct list_head list;
 	struct sock *sk;
 };
+
+#ifdef CONFIG_NET
 static LIST_HEAD(uevent_sock_list);
 #endif
 
@@ -602,12 +605,88 @@ int add_uevent_var(struct kobj_uevent_env *env, const char *format, ...)
 EXPORT_SYMBOL_GPL(add_uevent_var);
 
 #if defined(CONFIG_NET)
+static int uevent_net_broadcast(struct sock *usk, struct sk_buff *skb,
+				struct netlink_ext_ack *extack)
+{
+	/* u64 to chars: 2^64 - 1 = 21 chars */
+	char buf[sizeof("SEQNUM=") + 21];
+	struct sk_buff *skbc;
+	int ret;
+
+	/* bump and prepare sequence number */
+	ret = snprintf(buf, sizeof(buf), "SEQNUM=%llu", ++uevent_seqnum);
+	if (ret < 0 || (size_t)ret >= sizeof(buf))
+		return -ENOMEM;
+	ret++;
+
+	/* verify message does not overflow */
+	if ((skb->len + ret) > UEVENT_BUFFER_SIZE) {
+		NL_SET_ERR_MSG(extack, "uevent message too big");
+		return -EINVAL;
+	}
+
+	/* copy skb and extend to accommodate sequence number */
+	skbc = skb_copy_expand(skb, 0, ret, GFP_KERNEL);
+	if (!skbc)
+		return -ENOMEM;
+
+	/* append sequence number */
+	skb_put_data(skbc, buf, ret);
+
+	/* remove msg header */
+	skb_pull(skbc, NLMSG_HDRLEN);
+
+	/* set portid 0 to inform userspace message comes from kernel */
+	NETLINK_CB(skbc).portid = 0;
+	NETLINK_CB(skbc).dst_group = 1;
+
+	ret = netlink_broadcast(usk, skbc, 0, 1, GFP_KERNEL);
+	/* ENOBUFS should be handled in userspace */
+	if (ret == -ENOBUFS || ret == -ESRCH)
+		ret = 0;
+
+	return ret;
+}
+
+static int uevent_net_rcv_skb(struct sk_buff *skb, struct nlmsghdr *nlh,
+			      struct netlink_ext_ack *extack)
+{
+	struct net *net;
+	int ret;
+
+	if (!nlmsg_data(nlh))
+		return -EINVAL;
+
+	/*
+	 * Verify that we are allowed to send messages to the target
+	 * network namespace. The caller must have CAP_SYS_ADMIN in the
+	 * owning user namespace of the target network namespace.
+	 */
+	net = sock_net(NETLINK_CB(skb).sk);
+	if (!netlink_ns_capable(skb, net->user_ns, CAP_SYS_ADMIN)) {
+		NL_SET_ERR_MSG(extack, "missing CAP_SYS_ADMIN capability");
+		return -EPERM;
+	}
+
+	mutex_lock(&uevent_sock_mutex);
+	ret = uevent_net_broadcast(net->uevent_sock->sk, skb, extack);
+	mutex_unlock(&uevent_sock_mutex);
+
+	return ret;
+}
+
+static void uevent_net_rcv(struct sk_buff *skb)
+{
+	netlink_rcv_skb(skb, &uevent_net_rcv_skb);
+}
+
 static int uevent_net_init(struct net *net)
 {
 	struct uevent_sock *ue_sk;
 	struct netlink_kernel_cfg cfg = {
 		.groups	= 1,
-		.flags	= NL_CFG_F_NONROOT_RECV,
+		.input = uevent_net_rcv,
+		.flags	= NL_CFG_F_NONROOT_RECV
 	};
 
 	ue_sk = kzalloc(sizeof(*ue_sk), GFP_KERNEL);
@@ -621,6 +700,9 @@ static int uevent_net_init(struct net *net)
 		kfree(ue_sk);
 		return -ENODEV;
 	}
+
+	net->uevent_sock = ue_sk;
+
 	mutex_lock(&uevent_sock_mutex);
 	list_add_tail(&ue_sk->list, &uevent_sock_list);
 	mutex_unlock(&uevent_sock_mutex);
@@ -629,17 +711,9 @@ static int uevent_net_init(struct net *net)
 
 static void uevent_net_exit(struct net *net)
 {
-	struct uevent_sock *ue_sk;
+	struct uevent_sock *ue_sk = net->uevent_sock;
 
 	mutex_lock(&uevent_sock_mutex);
-	list_for_each_entry(ue_sk, &uevent_sock_list, list) {
-		if (sock_net(ue_sk->sk) == net)
-			goto found;
-	}
-	mutex_unlock(&uevent_sock_mutex);
-	return;
-
-found:
 	list_del(&ue_sk->list);
 	mutex_unlock(&uevent_sock_mutex);
 
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 47de025b6245..2b2b79974b61 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -333,6 +333,7 @@ static int rhashtable_rehash_table(struct rhashtable *ht)
 		err = rhashtable_rehash_chain(ht, old_hash);
 		if (err)
 			return err;
+		cond_resched();
 	}
 
 	/* Publish the new table pointer. */
@@ -1112,6 +1113,7 @@ void rhashtable_free_and_destroy(struct rhashtable *ht,
 		for (i = 0; i < tbl->size; i++) {
 			struct rhash_head *pos, *next;
 
+			cond_resched();
 			for (pos = rht_dereference(*rht_bucket(tbl, i), ht),
 			     next = !rht_is_a_nulls(pos) ?
 					rht_dereference(pos->next, ht) : NULL;
diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index 53728d391d3a..06dad7a072fd 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -132,14 +132,7 @@ EXPORT_SYMBOL(sg_last);
 void sg_init_table(struct scatterlist *sgl, unsigned int nents)
 {
 	memset(sgl, 0, sizeof(*sgl) * nents);
-#ifdef CONFIG_DEBUG_SG
-	{
-		unsigned int i;
-		for (i = 0; i < nents; i++)
-			sgl[i].sg_magic = SG_MAGIC;
-	}
-#endif
-	sg_mark_end(&sgl[nents - 1]);
+	sg_init_marker(sgl, nents);
 }
 EXPORT_SYMBOL(sg_init_table);
 
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index 3e9335493fe4..8e157806df7a 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -6574,6 +6574,93 @@ static bool exclude_test(int test_id)
 	return test_id < test_range[0] || test_id > test_range[1];
 }
 
+static __init struct sk_buff *build_test_skb(void)
+{
+	u32 headroom = NET_SKB_PAD + NET_IP_ALIGN + ETH_HLEN;
+	struct sk_buff *skb[2];
+	struct page *page[2];
+	int i, data_size = 8;
+
+	for (i = 0; i < 2; i++) {
+		page[i] = alloc_page(GFP_KERNEL);
+		if (!page[i]) {
+			if (i == 0)
+				goto err_page0;
+			else
+				goto err_page1;
+		}
+
+		/* this will set skb[i]->head_frag */
+		skb[i] = dev_alloc_skb(headroom + data_size);
+		if (!skb[i]) {
+			if (i == 0)
+				goto err_skb0;
+			else
+				goto err_skb1;
+		}
+
+		skb_reserve(skb[i], headroom);
+		skb_put(skb[i], data_size);
+		skb[i]->protocol = htons(ETH_P_IP);
+		skb_reset_network_header(skb[i]);
+		skb_set_mac_header(skb[i], -ETH_HLEN);
+
+		skb_add_rx_frag(skb[i], 0, page[i], 0, 64, 64);
+		// skb_headlen(skb[i]): 8, skb[i]->head_frag = 1
+	}
+
+	/* setup shinfo */
+	skb_shinfo(skb[0])->gso_size = 1448;
+	skb_shinfo(skb[0])->gso_type = SKB_GSO_TCPV4;
+	skb_shinfo(skb[0])->gso_type |= SKB_GSO_DODGY;
+	skb_shinfo(skb[0])->gso_segs = 0;
+	skb_shinfo(skb[0])->frag_list = skb[1];
+
+	/* adjust skb[0]'s len */
+	skb[0]->len += skb[1]->len;
+	skb[0]->data_len += skb[1]->data_len;
+	skb[0]->truesize += skb[1]->truesize;
+
+	return skb[0];
+
+err_skb1:
+	__free_page(page[1]);
+err_page1:
+	kfree_skb(skb[0]);
+err_skb0:
+	__free_page(page[0]);
+err_page0:
+	return NULL;
+}
+
+static __init int test_skb_segment(void)
+{
+	netdev_features_t features;
+	struct sk_buff *skb, *segs;
+	int ret = -1;
+
+	features = NETIF_F_SG | NETIF_F_GSO_PARTIAL | NETIF_F_IP_CSUM |
+		   NETIF_F_IPV6_CSUM;
+	features |= NETIF_F_RXCSUM;
+	skb = build_test_skb();
+	if (!skb) {
+		pr_info("%s: failed to build_test_skb", __func__);
+		goto done;
+	}
+
+	segs = skb_segment(skb, features);
+	if (!IS_ERR(segs)) {
+		kfree_skb_list(segs);
+		ret = 0;
+		pr_info("%s: success in skb_segment!", __func__);
+	} else {
+		pr_info("%s: failed in skb_segment!", __func__);
+	}
+	kfree_skb(skb);
+done:
+	return ret;
+}
+
 static __init int test_bpf(void)
 {
 	int i, err_cnt = 0, pass_cnt = 0;
@@ -6632,9 +6719,11 @@ static int __init test_bpf_init(void)
 		return ret;
 
 	ret = test_bpf();
-
 	destroy_bpf_tests();
-	return ret;
+	if (ret)
+		return ret;
+
+	return test_skb_segment();
 }
 
 static void __exit test_bpf_exit(void)
author	Linus Torvalds <torvalds@linux-foundation.org>	2018-04-03 23:04:18 +0200
committer	Linus Torvalds <torvalds@linux-foundation.org>	2018-04-03 23:04:18 +0200
commit	5bb053bef82523a8fd78d650bca81c9f114fa276 (patch)
tree	58c2fe47f60bb69230bb05d57a6c9e3f47f7b1fe /lib
parent	Merge tag 'docs-4.17' of git://git.lwn.net/linux (diff)
parent	Merge branch 'net-mvneta-improve-suspend-resume' (diff)
download	linux-5bb053bef82523a8fd78d650bca81c9f114fa276.tar.xz linux-5bb053bef82523a8fd78d650bca81c9f114fa276.zip