bpf: cpumap do bulk allocation of SKBs

As cpumap now batch consume xdp_frame's from the ptr_ring, it knows how many SKBs it need to allocate. Thus, lets bulk allocate these SKBs via kmem_cache_alloc_bulk() API, and use the previously introduced function build_skb_around(). Notice that the flag __GFP_ZERO asks the slab/slub allocator to clear the memory for us. This does clear a larger area than needed, but my micro benchmarks on Intel CPUs show that this is slightly faster due to being a cacheline aligned area is cleared for the SKBs. (For SLUB allocator, there is a future optimization potential, because SKBs will with high probability originate from same page. If we can find/identify continuous memory areas then the Intel CPU memset rep stos will have a real performance gain.) Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com> Acked-by: Song Liu <songliubraving@fb.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org>
author: Jesper Dangaard Brouer <brouer@redhat.com> 2019-04-12 17:07:43 +0200
committer: Alexei Starovoitov <ast@kernel.org> 2019-04-18 04:09:25 +0200
commit: 8f0504a97e1ba6b70e1c8b5a88255c280f263287 (patch)
tree: 762bb6478b65c42ba2e0e79f52d8421cc0dcb707 /kernel/bpf
parent: net: core: introduce build_skb_around (diff)
download: linux-8f0504a97e1ba6b70e1c8b5a88255c280f263287.tar.xz
linux-8f0504a97e1ba6b70e1c8b5a88255c280f263287.zip
1 files changed, 15 insertions, 7 deletions
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index 430103e182a0..732d6ced3987 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -160,12 +160,12 @@ static void cpu_map_kthread_stop(struct work_struct *work)
 }
 
 static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu,
-					 struct xdp_frame *xdpf)
+					 struct xdp_frame *xdpf,
+					 struct sk_buff *skb)
 {
 	unsigned int hard_start_headroom;
 	unsigned int frame_size;
 	void *pkt_data_start;
-	struct sk_buff *skb;
 
 	/* Part of headroom was reserved to xdpf */
 	hard_start_headroom = sizeof(struct xdp_frame) +  xdpf->headroom;
@@ -191,8 +191,8 @@ static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu,
 		SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
 
 	pkt_data_start = xdpf->data - hard_start_headroom;
-	skb = build_skb(pkt_data_start, frame_size);
-	if (!skb)
+	skb = build_skb_around(skb, pkt_data_start, frame_size);
+	if (unlikely(!skb))
 		return NULL;
 
 	skb_reserve(skb, hard_start_headroom);
@@ -256,7 +256,9 @@ static int cpu_map_kthread_run(void *data)
 	while (!kthread_should_stop() || !__ptr_ring_empty(rcpu->queue)) {
 		unsigned int drops = 0, sched = 0;
 		void *frames[CPUMAP_BATCH];
-		int i, n;
+		void *skbs[CPUMAP_BATCH];
+		gfp_t gfp = __GFP_ZERO | GFP_ATOMIC;
+		int i, n, m;
 
 		/* Release CPU reschedule checks */
 		if (__ptr_ring_empty(rcpu->queue)) {
@@ -278,14 +280,20 @@ static int cpu_map_kthread_run(void *data)
 		 * consume side valid as no-resize allowed of queue.
 		 */
 		n = ptr_ring_consume_batched(rcpu->queue, frames, CPUMAP_BATCH);
+		m = kmem_cache_alloc_bulk(skbuff_head_cache, gfp, n, skbs);
+		if (unlikely(m == 0)) {
+			for (i = 0; i < n; i++)
+				skbs[i] = NULL; /* effect: xdp_return_frame */
+			drops = n;
+		}
 
 		local_bh_disable();
 		for (i = 0; i < n; i++) {
 			struct xdp_frame *xdpf = frames[i];
-			struct sk_buff *skb;
+			struct sk_buff *skb = skbs[i];
 			int ret;
 
-			skb = cpu_map_build_skb(rcpu, xdpf);
+			skb = cpu_map_build_skb(rcpu, xdpf, skb);
 			if (!skb) {
 				xdp_return_frame(xdpf);
 				continue;
author	Jesper Dangaard Brouer <brouer@redhat.com>	2019-04-12 17:07:43 +0200
committer	Alexei Starovoitov <ast@kernel.org>	2019-04-18 04:09:25 +0200
commit	8f0504a97e1ba6b70e1c8b5a88255c280f263287 (patch)
tree	762bb6478b65c42ba2e0e79f52d8421cc0dcb707 /kernel/bpf
parent	net: core: introduce build_skb_around (diff)
download	linux-8f0504a97e1ba6b70e1c8b5a88255c280f263287.tar.xz linux-8f0504a97e1ba6b70e1c8b5a88255c280f263287.zip