From bc9d3a9f2afca189a6ae40225b6985e3c775375e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner <tglx@linutronix.de> Date: Thu, 23 Mar 2023 21:55:32 +0100 Subject: net: dst: Switch to rcuref_t reference counting Under high contention dst_entry::__refcnt becomes a significant bottleneck. atomic_inc_not_zero() is implemented with a cmpxchg() loop, which goes into high retry rates on contention. Switch the reference count to rcuref_t which results in a significant performance gain. Rename the reference count member to __rcuref to reflect the change. The gain depends on the micro-architecture and the number of concurrent operations and has been measured in the range of +25% to +130% with a localhost memtier/memcached benchmark which amplifies the problem massively. Running the memtier/memcached benchmark over a real (1Gb) network connection the conversion on top of the false sharing fix for struct dst_entry::__refcnt results in a total gain in the 2%-5% range over the upstream baseline. Reported-by: Wangyang Guo <wangyang.guo@intel.com> Reported-by: Arjan Van De Ven <arjan.van.de.ven@intel.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Link: https://lore.kernel.org/r/20230307125538.989175656@linutronix.de Link: https://lore.kernel.org/r/20230323102800.215027837@linutronix.de Signed-off-by: Jakub Kicinski <kuba@kernel.org> --- include/net/dst.h | 19 ++++++++++--------- include/net/sock.h | 2 +- 2 files changed, 11 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/net/dst.h b/include/net/dst.h index 81f2279ea911..78884429deed 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -16,6 +16,7 @@ #include <linux/bug.h> #include <linux/jiffies.h> #include <linux/refcount.h> +#include <linux/rcuref.h> #include <net/neighbour.h> #include <asm/processor.h> #include <linux/indirect_call_wrapper.h> @@ -61,11 +62,11 @@ struct dst_entry { unsigned short trailer_len; /* space to reserve at tail */ /* - * __refcnt wants to be on a different cache line from + * __rcuref wants to be on a different cache line from * input/output/ops or performance tanks badly */ #ifdef CONFIG_64BIT - atomic_t __refcnt; /* 64-bit offset 64 */ + rcuref_t __rcuref; /* 64-bit offset 64 */ #endif int __use; unsigned long lastuse; @@ -75,16 +76,16 @@ struct dst_entry { __u32 tclassid; #ifndef CONFIG_64BIT struct lwtunnel_state *lwtstate; - atomic_t __refcnt; /* 32-bit offset 64 */ + rcuref_t __rcuref; /* 32-bit offset 64 */ #endif netdevice_tracker dev_tracker; /* * Used by rtable and rt6_info. Moves lwtstate into the next cache * line on 64bit so that lwtstate does not cause false sharing with - * __refcnt under contention of __refcnt. This also puts the + * __rcuref under contention of __rcuref. This also puts the * frequently accessed members of rtable and rt6_info out of the - * __refcnt cache line. + * __rcuref cache line. */ struct list_head rt_uncached; struct uncached_list *rt_uncached_list; @@ -238,10 +239,10 @@ static inline void dst_hold(struct dst_entry *dst) { /* * If your kernel compilation stops here, please check - * the placement of __refcnt in struct dst_entry + * the placement of __rcuref in struct dst_entry */ - BUILD_BUG_ON(offsetof(struct dst_entry, __refcnt) & 63); - WARN_ON(atomic_inc_not_zero(&dst->__refcnt) == 0); + BUILD_BUG_ON(offsetof(struct dst_entry, __rcuref) & 63); + WARN_ON(!rcuref_get(&dst->__rcuref)); } static inline void dst_use_noref(struct dst_entry *dst, unsigned long time) @@ -305,7 +306,7 @@ static inline void skb_dst_copy(struct sk_buff *nskb, const struct sk_buff *oskb */ static inline bool dst_hold_safe(struct dst_entry *dst) { - return atomic_inc_not_zero(&dst->__refcnt); + return rcuref_get(&dst->__rcuref); } /** diff --git a/include/net/sock.h b/include/net/sock.h index 573f2bf7e0de..5edf0038867c 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2131,7 +2131,7 @@ sk_dst_get(struct sock *sk) rcu_read_lock(); dst = rcu_dereference(sk->sk_dst_cache); - if (dst && !atomic_inc_not_zero(&dst->__refcnt)) + if (dst && !rcuref_get(&dst->__rcuref)) dst = NULL; rcu_read_unlock(); return dst; -- cgit v1.2.3