diff options
Diffstat (limited to 'include/net/sock.h')
-rw-r--r-- | include/net/sock.h | 173 |
1 files changed, 64 insertions, 109 deletions
diff --git a/include/net/sock.h b/include/net/sock.h index 54ca8dcbfb43..b5e00702acc1 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -378,14 +378,10 @@ struct sock { #define sk_flags __sk_common.skc_flags #define sk_rxhash __sk_common.skc_rxhash - /* early demux fields */ - struct dst_entry __rcu *sk_rx_dst; - int sk_rx_dst_ifindex; - u32 sk_rx_dst_cookie; + __cacheline_group_begin(sock_write_rx); - socket_lock_t sk_lock; atomic_t sk_drops; - int sk_rcvlowat; + __s32 sk_peek_off; struct sk_buff_head sk_error_queue; struct sk_buff_head sk_receive_queue; /* @@ -402,18 +398,24 @@ struct sock { struct sk_buff *head; struct sk_buff *tail; } sk_backlog; - #define sk_rmem_alloc sk_backlog.rmem_alloc - int sk_forward_alloc; - u32 sk_reserved_mem; + __cacheline_group_end(sock_write_rx); + + __cacheline_group_begin(sock_read_rx); + /* early demux fields */ + struct dst_entry __rcu *sk_rx_dst; + int sk_rx_dst_ifindex; + u32 sk_rx_dst_cookie; + #ifdef CONFIG_NET_RX_BUSY_POLL unsigned int sk_ll_usec; - /* ===== mostly read cache line ===== */ unsigned int sk_napi_id; + u16 sk_busy_poll_budget; + u8 sk_prefer_busy_poll; #endif + u8 sk_userlocks; int sk_rcvbuf; - int sk_disconnects; struct sk_filter __rcu *sk_filter; union { @@ -422,15 +424,33 @@ struct sock { struct socket_wq *sk_wq_raw; /* public: */ }; + + void (*sk_data_ready)(struct sock *sk); + long sk_rcvtimeo; + int sk_rcvlowat; + __cacheline_group_end(sock_read_rx); + + __cacheline_group_begin(sock_read_rxtx); + int sk_err; + struct socket *sk_socket; + struct mem_cgroup *sk_memcg; #ifdef CONFIG_XFRM struct xfrm_policy __rcu *sk_policy[2]; #endif + __cacheline_group_end(sock_read_rxtx); - struct dst_entry __rcu *sk_dst_cache; + __cacheline_group_begin(sock_write_rxtx); + socket_lock_t sk_lock; + u32 sk_reserved_mem; + int sk_forward_alloc; + u32 sk_tsflags; + __cacheline_group_end(sock_write_rxtx); + + __cacheline_group_begin(sock_write_tx); + int sk_write_pending; atomic_t sk_omem_alloc; int sk_sndbuf; - /* ===== cache line for TX ===== */ int sk_wmem_queued; refcount_t sk_wmem_alloc; unsigned long sk_tsq_flags; @@ -439,22 +459,36 @@ struct sock { struct rb_root tcp_rtx_queue; }; struct sk_buff_head sk_write_queue; - __s32 sk_peek_off; - int sk_write_pending; - __u32 sk_dst_pending_confirm; + u32 sk_dst_pending_confirm; u32 sk_pacing_status; /* see enum sk_pacing */ - long sk_sndtimeo; + struct page_frag sk_frag; struct timer_list sk_timer; - __u32 sk_priority; - __u32 sk_mark; + unsigned long sk_pacing_rate; /* bytes per second */ + atomic_t sk_zckey; + atomic_t sk_tskey; + __cacheline_group_end(sock_write_tx); + + __cacheline_group_begin(sock_read_tx); unsigned long sk_max_pacing_rate; - struct page_frag sk_frag; + long sk_sndtimeo; + u32 sk_priority; + u32 sk_mark; + struct dst_entry __rcu *sk_dst_cache; netdev_features_t sk_route_caps; - int sk_gso_type; +#ifdef CONFIG_SOCK_VALIDATE_XMIT + struct sk_buff* (*sk_validate_xmit_skb)(struct sock *sk, + struct net_device *dev, + struct sk_buff *skb); +#endif + u16 sk_gso_type; + u16 sk_gso_max_segs; unsigned int sk_gso_max_size; gfp_t sk_allocation; - __u32 sk_txhash; + u32 sk_txhash; + u8 sk_pacing_shift; + bool sk_use_task_frag; + __cacheline_group_end(sock_read_tx); /* * Because of non atomicity rules, all @@ -463,64 +497,44 @@ struct sock { u8 sk_gso_disabled : 1, sk_kern_sock : 1, sk_no_check_tx : 1, - sk_no_check_rx : 1, - sk_userlocks : 4; - u8 sk_pacing_shift; + sk_no_check_rx : 1; + u8 sk_shutdown; u16 sk_type; u16 sk_protocol; - u16 sk_gso_max_segs; unsigned long sk_lingertime; struct proto *sk_prot_creator; rwlock_t sk_callback_lock; - int sk_err, - sk_err_soft; + int sk_err_soft; u32 sk_ack_backlog; u32 sk_max_ack_backlog; kuid_t sk_uid; - u8 sk_txrehash; -#ifdef CONFIG_NET_RX_BUSY_POLL - u8 sk_prefer_busy_poll; - u16 sk_busy_poll_budget; -#endif spinlock_t sk_peer_lock; int sk_bind_phc; struct pid *sk_peer_pid; const struct cred *sk_peer_cred; - long sk_rcvtimeo; ktime_t sk_stamp; #if BITS_PER_LONG==32 seqlock_t sk_stamp_seq; #endif - atomic_t sk_tskey; - atomic_t sk_zckey; - u32 sk_tsflags; - u8 sk_shutdown; + int sk_disconnects; + u8 sk_txrehash; u8 sk_clockid; u8 sk_txtime_deadline_mode : 1, sk_txtime_report_errors : 1, sk_txtime_unused : 6; - bool sk_use_task_frag; - struct socket *sk_socket; void *sk_user_data; #ifdef CONFIG_SECURITY void *sk_security; #endif struct sock_cgroup_data sk_cgrp_data; - struct mem_cgroup *sk_memcg; void (*sk_state_change)(struct sock *sk); - void (*sk_data_ready)(struct sock *sk); void (*sk_write_space)(struct sock *sk); void (*sk_error_report)(struct sock *sk); int (*sk_backlog_rcv)(struct sock *sk, struct sk_buff *skb); -#ifdef CONFIG_SOCK_VALIDATE_XMIT - struct sk_buff* (*sk_validate_xmit_skb)(struct sock *sk, - struct net_device *dev, - struct sk_buff *skb); -#endif void (*sk_destruct)(struct sock *sk); struct sock_reuseport __rcu *sk_reuseport_cb; #ifdef CONFIG_BPF_SYSCALL @@ -1103,41 +1117,6 @@ static inline void sk_incoming_cpu_update(struct sock *sk) WRITE_ONCE(sk->sk_incoming_cpu, cpu); } -static inline void sock_rps_record_flow_hash(__u32 hash) -{ -#ifdef CONFIG_RPS - struct rps_sock_flow_table *sock_flow_table; - - rcu_read_lock(); - sock_flow_table = rcu_dereference(rps_sock_flow_table); - rps_record_sock_flow(sock_flow_table, hash); - rcu_read_unlock(); -#endif -} - -static inline void sock_rps_record_flow(const struct sock *sk) -{ -#ifdef CONFIG_RPS - if (static_branch_unlikely(&rfs_needed)) { - /* Reading sk->sk_rxhash might incur an expensive cache line - * miss. - * - * TCP_ESTABLISHED does cover almost all states where RFS - * might be useful, and is cheaper [1] than testing : - * IPv4: inet_sk(sk)->inet_daddr - * IPv6: ipv6_addr_any(&sk->sk_v6_daddr) - * OR an additional socket flag - * [1] : sk_state and sk_prot are in the same cache line. - */ - if (sk->sk_state == TCP_ESTABLISHED) { - /* This READ_ONCE() is paired with the WRITE_ONCE() - * from sock_rps_save_rxhash() and sock_rps_reset_rxhash(). - */ - sock_rps_record_flow_hash(READ_ONCE(sk->sk_rxhash)); - } - } -#endif -} static inline void sock_rps_save_rxhash(struct sock *sk, const struct sk_buff *skb) @@ -1429,6 +1408,7 @@ sk_memory_allocated(const struct sock *sk) /* 1 MB per cpu, in page units */ #define SK_MEMORY_PCPU_RESERVE (1 << (20 - PAGE_SHIFT)) +extern int sysctl_mem_pcpu_rsv; static inline void sk_memory_allocated_add(struct sock *sk, int amt) @@ -1437,7 +1417,7 @@ sk_memory_allocated_add(struct sock *sk, int amt) preempt_disable(); local_reserve = __this_cpu_add_return(*sk->sk_prot->per_cpu_fw_alloc, amt); - if (local_reserve >= SK_MEMORY_PCPU_RESERVE) { + if (local_reserve >= READ_ONCE(sysctl_mem_pcpu_rsv)) { __this_cpu_sub(*sk->sk_prot->per_cpu_fw_alloc, local_reserve); atomic_long_add(local_reserve, sk->sk_prot->memory_allocated); } @@ -1451,7 +1431,7 @@ sk_memory_allocated_sub(struct sock *sk, int amt) preempt_disable(); local_reserve = __this_cpu_sub_return(*sk->sk_prot->per_cpu_fw_alloc, amt); - if (local_reserve <= -SK_MEMORY_PCPU_RESERVE) { + if (local_reserve <= -READ_ONCE(sysctl_mem_pcpu_rsv)) { __this_cpu_sub(*sk->sk_prot->per_cpu_fw_alloc, local_reserve); atomic_long_add(local_reserve, sk->sk_prot->memory_allocated); } @@ -2830,31 +2810,6 @@ sk_is_refcounted(struct sock *sk) return !sk_fullsock(sk) || !sock_flag(sk, SOCK_RCU_FREE); } -/** - * skb_steal_sock - steal a socket from an sk_buff - * @skb: sk_buff to steal the socket from - * @refcounted: is set to true if the socket is reference-counted - * @prefetched: is set to true if the socket was assigned from bpf - */ -static inline struct sock * -skb_steal_sock(struct sk_buff *skb, bool *refcounted, bool *prefetched) -{ - if (skb->sk) { - struct sock *sk = skb->sk; - - *refcounted = true; - *prefetched = skb_sk_is_prefetched(skb); - if (*prefetched) - *refcounted = sk_is_refcounted(sk); - skb->destructor = NULL; - skb->sk = NULL; - return sk; - } - *prefetched = false; - *refcounted = false; - return NULL; -} - /* Checks if this SKB belongs to an HW offloaded socket * and whether any SW fallbacks are required based on dev. * Check decrypted mark in case skb_orphan() cleared socket. |