net: bulk free infrastructure for NAPI context, use napi_consume_skb

Discovered that network stack were hitting the kmem_cache/SLUB slowpath when freeing SKBs. Doing bulk free with kmem_cache_free_bulk can speedup this slowpath. NAPI context is a bit special, lets take advantage of that for bulk free'ing SKBs. In NAPI context we are running in softirq, which gives us certain protection. A softirq can run on several CPUs at once. BUT the important part is a softirq will never preempt another softirq running on the same CPU. This gives us the opportunity to access per-cpu variables in softirq context. Extend napi_alloc_cache (before only contained page_frag_cache) to be a struct with a small array based stack for holding SKBs. Introduce a SKB defer and flush API for accessing this. Introduce napi_consume_skb() as replacement for e.g. dev_consume_skb_any() when running in NAPI context. A small trick to handle/detect if we are called from netpoll is to see if budget is 0. In that case, we need to invoke dev_consume_skb_irq(). Joint work with Alexander Duyck. Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com> Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
author: Jesper Dangaard Brouer <brouer@redhat.com> 2016-02-08 13:14:59 +0100
committer: David S. Miller <davem@davemloft.net> 2016-02-11 17:59:09 +0100
commit: 795bb1c00dd338aa0d12f9a7f1f4776fb3160416 (patch)
tree: ab5da980a221b054236b2800d3c4d80358879d86 /net/core/skbuff.c
parent: Merge branch 'virtio_net-ethtool-validation' (diff)
download: linux-795bb1c00dd338aa0d12f9a7f1f4776fb3160416.tar.xz
linux-795bb1c00dd338aa0d12f9a7f1f4776fb3160416.zip
1 files changed, 77 insertions, 6 deletions
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index b0cce744e2a0..b64187b87773 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -347,8 +347,16 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size)
 }
 EXPORT_SYMBOL(build_skb);
 
+#define NAPI_SKB_CACHE_SIZE	64
+
+struct napi_alloc_cache {
+	struct page_frag_cache page;
+	size_t skb_count;
+	void *skb_cache[NAPI_SKB_CACHE_SIZE];
+};
+
 static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache);
-static DEFINE_PER_CPU(struct page_frag_cache, napi_alloc_cache);
+static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache);
 
 static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
 {
@@ -378,9 +386,9 @@ EXPORT_SYMBOL(netdev_alloc_frag);
 
 static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
 {
-	struct page_frag_cache *nc = this_cpu_ptr(&napi_alloc_cache);
+	struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
 
-	return __alloc_page_frag(nc, fragsz, gfp_mask);
+	return __alloc_page_frag(&nc->page, fragsz, gfp_mask);
 }
 
 void *napi_alloc_frag(unsigned int fragsz)
@@ -474,7 +482,7 @@ EXPORT_SYMBOL(__netdev_alloc_skb);
 struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
 				 gfp_t gfp_mask)
 {
-	struct page_frag_cache *nc = this_cpu_ptr(&napi_alloc_cache);
+	struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
 	struct sk_buff *skb;
 	void *data;
 
@@ -494,7 +502,7 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
 	if (sk_memalloc_socks())
 		gfp_mask |= __GFP_MEMALLOC;
 
-	data = __alloc_page_frag(nc, len, gfp_mask);
+	data = __alloc_page_frag(&nc->page, len, gfp_mask);
 	if (unlikely(!data))
 		return NULL;
 
@@ -505,7 +513,7 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
 	}
 
 	/* use OR instead of assignment to avoid clearing of bits in mask */
-	if (nc->pfmemalloc)
+	if (nc->page.pfmemalloc)
 		skb->pfmemalloc = 1;
 	skb->head_frag = 1;
 
@@ -747,6 +755,69 @@ void consume_skb(struct sk_buff *skb)
 }
 EXPORT_SYMBOL(consume_skb);
 
+void __kfree_skb_flush(void)
+{
+	struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
+
+	/* flush skb_cache if containing objects */
+	if (nc->skb_count) {
+		kmem_cache_free_bulk(skbuff_head_cache, nc->skb_count,
+				     nc->skb_cache);
+		nc->skb_count = 0;
+	}
+}
+
+static void __kfree_skb_defer(struct sk_buff *skb)
+{
+	struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
+
+	/* drop skb->head and call any destructors for packet */
+	skb_release_all(skb);
+
+	/* record skb to CPU local list */
+	nc->skb_cache[nc->skb_count++] = skb;
+
+#ifdef CONFIG_SLUB
+	/* SLUB writes into objects when freeing */
+	prefetchw(skb);
+#endif
+
+	/* flush skb_cache if it is filled */
+	if (unlikely(nc->skb_count == NAPI_SKB_CACHE_SIZE)) {
+		kmem_cache_free_bulk(skbuff_head_cache, NAPI_SKB_CACHE_SIZE,
+				     nc->skb_cache);
+		nc->skb_count = 0;
+	}
+}
+
+void napi_consume_skb(struct sk_buff *skb, int budget)
+{
+	if (unlikely(!skb))
+		return;
+
+	/* if budget is 0 assume netpoll w/ IRQs disabled */
+	if (unlikely(!budget)) {
+		dev_consume_skb_irq(skb);
+		return;
+	}
+
+	if (likely(atomic_read(&skb->users) == 1))
+		smp_rmb();
+	else if (likely(!atomic_dec_and_test(&skb->users)))
+		return;
+	/* if reaching here SKB is ready to free */
+	trace_consume_skb(skb);
+
+	/* if SKB is a clone, don't handle this case */
+	if (unlikely(skb->fclone != SKB_FCLONE_UNAVAILABLE)) {
+		__kfree_skb(skb);
+		return;
+	}
+
+	__kfree_skb_defer(skb);
+}
+EXPORT_SYMBOL(napi_consume_skb);
+
 /* Make sure a field is enclosed inside headers_start/headers_end section */
 #define CHECK_SKB_FIELD(field) \
 	BUILD_BUG_ON(offsetof(struct sk_buff, field) <		\
author	Jesper Dangaard Brouer <brouer@redhat.com>	2016-02-08 13:14:59 +0100
committer	David S. Miller <davem@davemloft.net>	2016-02-11 17:59:09 +0100
commit	795bb1c00dd338aa0d12f9a7f1f4776fb3160416 (patch)
tree	ab5da980a221b054236b2800d3c4d80358879d86 /net/core/skbuff.c
parent	Merge branch 'virtio_net-ethtool-validation' (diff)
download	linux-795bb1c00dd338aa0d12f9a7f1f4776fb3160416.tar.xz linux-795bb1c00dd338aa0d12f9a7f1f4776fb3160416.zip