summaryrefslogtreecommitdiffstats
path: root/include/net/sock.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/net/sock.h')
-rw-r--r--include/net/sock.h176
1 files changed, 132 insertions, 44 deletions
diff --git a/include/net/sock.h b/include/net/sock.h
index 255d3e03727b..649d2a8c17fc 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -178,7 +178,7 @@ struct sock_common {
int skc_bound_dev_if;
union {
struct hlist_node skc_bind_node;
- struct hlist_nulls_node skc_portaddr_node;
+ struct hlist_node skc_portaddr_node;
};
struct proto *skc_prot;
possible_net_t skc_net;
@@ -382,8 +382,13 @@ struct sock {
atomic_t sk_omem_alloc;
int sk_sndbuf;
struct sk_buff_head sk_write_queue;
+
+ /*
+ * Because of non atomicity rules, all
+ * changes are protected by socket lock.
+ */
kmemcheck_bitfield_begin(flags);
- unsigned int sk_shutdown : 2,
+ unsigned int sk_padding : 2,
sk_no_check_tx : 1,
sk_no_check_rx : 1,
sk_userlocks : 4,
@@ -391,6 +396,7 @@ struct sock {
sk_type : 16;
#define SK_PROTOCOL_MAX U8_MAX
kmemcheck_bitfield_end(flags);
+
int sk_wmem_queued;
gfp_t sk_allocation;
u32 sk_pacing_rate; /* bytes per second */
@@ -418,6 +424,7 @@ struct sock {
struct timer_list sk_timer;
ktime_t sk_stamp;
u16 sk_tsflags;
+ u8 sk_shutdown;
u32 sk_tskey;
struct socket *sk_socket;
void *sk_user_data;
@@ -438,6 +445,7 @@ struct sock {
struct sk_buff *skb);
void (*sk_destruct)(struct sock *sk);
struct sock_reuseport __rcu *sk_reuseport_cb;
+ struct rcu_head sk_rcu;
};
#define __sk_user_data(sk) ((*((void __rcu **)&(sk)->sk_user_data)))
@@ -456,28 +464,32 @@ struct sock {
#define SK_CAN_REUSE 1
#define SK_FORCE_REUSE 2
+int sk_set_peek_off(struct sock *sk, int val);
+
static inline int sk_peek_offset(struct sock *sk, int flags)
{
- if ((flags & MSG_PEEK) && (sk->sk_peek_off >= 0))
- return sk->sk_peek_off;
- else
- return 0;
+ if (unlikely(flags & MSG_PEEK)) {
+ s32 off = READ_ONCE(sk->sk_peek_off);
+ if (off >= 0)
+ return off;
+ }
+
+ return 0;
}
static inline void sk_peek_offset_bwd(struct sock *sk, int val)
{
- if (sk->sk_peek_off >= 0) {
- if (sk->sk_peek_off >= val)
- sk->sk_peek_off -= val;
- else
- sk->sk_peek_off = 0;
+ s32 off = READ_ONCE(sk->sk_peek_off);
+
+ if (unlikely(off >= 0)) {
+ off = max_t(s32, off - val, 0);
+ WRITE_ONCE(sk->sk_peek_off, off);
}
}
static inline void sk_peek_offset_fwd(struct sock *sk, int val)
{
- if (sk->sk_peek_off >= 0)
- sk->sk_peek_off += val;
+ sk_peek_offset_bwd(sk, -val);
}
/*
@@ -564,7 +576,7 @@ static inline bool __sk_del_node_init(struct sock *sk)
modifications.
*/
-static inline void sock_hold(struct sock *sk)
+static __always_inline void sock_hold(struct sock *sk)
{
atomic_inc(&sk->sk_refcnt);
}
@@ -572,7 +584,7 @@ static inline void sock_hold(struct sock *sk)
/* Ungrab socket in the context, which assumes that socket refcnt
cannot hit zero, f.e. it is true in context of any socketcall.
*/
-static inline void __sock_put(struct sock *sk)
+static __always_inline void __sock_put(struct sock *sk)
{
atomic_dec(&sk->sk_refcnt);
}
@@ -625,12 +637,20 @@ static inline void sk_add_node(struct sock *sk, struct hlist_head *list)
static inline void sk_add_node_rcu(struct sock *sk, struct hlist_head *list)
{
sock_hold(sk);
- hlist_add_head_rcu(&sk->sk_node, list);
+ if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport &&
+ sk->sk_family == AF_INET6)
+ hlist_add_tail_rcu(&sk->sk_node, list);
+ else
+ hlist_add_head_rcu(&sk->sk_node, list);
}
static inline void __sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list)
{
- hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list);
+ if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport &&
+ sk->sk_family == AF_INET6)
+ hlist_nulls_add_tail_rcu(&sk->sk_nulls_node, list);
+ else
+ hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list);
}
static inline void sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list)
@@ -669,18 +689,18 @@ static inline void sk_add_bind_node(struct sock *sk,
hlist_for_each_entry(__sk, list, sk_bind_node)
/**
- * sk_nulls_for_each_entry_offset - iterate over a list at a given struct offset
+ * sk_for_each_entry_offset_rcu - iterate over a list at a given struct offset
* @tpos: the type * to use as a loop cursor.
* @pos: the &struct hlist_node to use as a loop cursor.
* @head: the head for your list.
* @offset: offset of hlist_node within the struct.
*
*/
-#define sk_nulls_for_each_entry_offset(tpos, pos, head, offset) \
- for (pos = (head)->first; \
- (!is_a_nulls(pos)) && \
+#define sk_for_each_entry_offset_rcu(tpos, pos, head, offset) \
+ for (pos = rcu_dereference((head)->first); \
+ pos != NULL && \
({ tpos = (typeof(*tpos) *)((void *)pos - offset); 1;}); \
- pos = pos->next)
+ pos = rcu_dereference(pos->next))
static inline struct user_namespace *sk_user_ns(struct sock *sk)
{
@@ -720,6 +740,7 @@ enum sock_flags {
*/
SOCK_FILTER_LOCKED, /* Filter cannot be changed anymore */
SOCK_SELECT_ERR_QUEUE, /* Wake select on error queue */
+ SOCK_RCU_FREE, /* wait rcu grace period in sk_destruct() */
};
#define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE))
@@ -912,6 +933,17 @@ void sk_stream_kill_queues(struct sock *sk);
void sk_set_memalloc(struct sock *sk);
void sk_clear_memalloc(struct sock *sk);
+void __sk_flush_backlog(struct sock *sk);
+
+static inline bool sk_flush_backlog(struct sock *sk)
+{
+ if (unlikely(READ_ONCE(sk->sk_backlog.tail))) {
+ __sk_flush_backlog(sk);
+ return true;
+ }
+ return false;
+}
+
int sk_wait_data(struct sock *sk, long *timeo, const struct sk_buff *skb);
struct request_sock_ops;
@@ -1310,24 +1342,14 @@ static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb)
__kfree_skb(skb);
}
-/* Used by processes to "lock" a socket state, so that
- * interrupts and bottom half handlers won't change it
- * from under us. It essentially blocks any incoming
- * packets, so that we won't get any new data or any
- * packets that change the state of the socket.
- *
- * While locked, BH processing will add new packets to
- * the backlog queue. This queue is processed by the
- * owner of the socket lock right before it is released.
- *
- * Since ~2.3.5 it is also exclusive sleep lock serializing
- * accesses from user process context.
- */
-#define sock_owned_by_user(sk) ((sk)->sk_lock.owned)
-
static inline void sock_release_ownership(struct sock *sk)
{
- sk->sk_lock.owned = 0;
+ if (sk->sk_lock.owned) {
+ sk->sk_lock.owned = 0;
+
+ /* The sk_lock has mutex_unlock() semantics: */
+ mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
+ }
}
/*
@@ -1349,6 +1371,16 @@ do { \
lockdep_init_map(&(sk)->sk_lock.dep_map, (name), (key), 0); \
} while (0)
+#ifdef CONFIG_LOCKDEP
+static inline bool lockdep_sock_is_held(const struct sock *csk)
+{
+ struct sock *sk = (struct sock *)csk;
+
+ return lockdep_is_held(&sk->sk_lock) ||
+ lockdep_is_held(&sk->sk_lock.slock);
+}
+#endif
+
void lock_sock_nested(struct sock *sk, int subclass);
static inline void lock_sock(struct sock *sk)
@@ -1382,6 +1414,40 @@ static inline void unlock_sock_fast(struct sock *sk, bool slow)
spin_unlock_bh(&sk->sk_lock.slock);
}
+/* Used by processes to "lock" a socket state, so that
+ * interrupts and bottom half handlers won't change it
+ * from under us. It essentially blocks any incoming
+ * packets, so that we won't get any new data or any
+ * packets that change the state of the socket.
+ *
+ * While locked, BH processing will add new packets to
+ * the backlog queue. This queue is processed by the
+ * owner of the socket lock right before it is released.
+ *
+ * Since ~2.3.5 it is also exclusive sleep lock serializing
+ * accesses from user process context.
+ */
+
+static inline void sock_owned_by_me(const struct sock *sk)
+{
+#ifdef CONFIG_LOCKDEP
+ WARN_ON_ONCE(!lockdep_sock_is_held(sk) && debug_locks);
+#endif
+}
+
+static inline bool sock_owned_by_user(const struct sock *sk)
+{
+ sock_owned_by_me(sk);
+ return sk->sk_lock.owned;
+}
+
+/* no reclassification while locks are held */
+static inline bool sock_allow_reclassification(const struct sock *csk)
+{
+ struct sock *sk = (struct sock *)csk;
+
+ return !sk->sk_lock.owned && !spin_is_locked(&sk->sk_lock.slock);
+}
struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
struct proto *prot, int kern);
@@ -1391,6 +1457,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority);
struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
gfp_t priority);
+void __sock_wfree(struct sk_buff *skb);
void sock_wfree(struct sk_buff *skb);
void skb_orphan_partial(struct sk_buff *skb);
void sock_rfree(struct sk_buff *skb);
@@ -1418,8 +1485,11 @@ void sk_send_sigurg(struct sock *sk);
struct sockcm_cookie {
u32 mark;
+ u16 tsflags;
};
+int __sock_cmsg_send(struct sock *sk, struct msghdr *msg, struct cmsghdr *cmsg,
+ struct sockcm_cookie *sockc);
int sock_cmsg_send(struct sock *sk, struct msghdr *msg,
struct sockcm_cookie *sockc);
@@ -1584,8 +1654,8 @@ static inline void sk_rethink_txhash(struct sock *sk)
static inline struct dst_entry *
__sk_dst_get(struct sock *sk)
{
- return rcu_dereference_check(sk->sk_dst_cache, sock_owned_by_user(sk) ||
- lockdep_is_held(&sk->sk_lock.slock));
+ return rcu_dereference_check(sk->sk_dst_cache,
+ lockdep_sock_is_held(sk));
}
static inline struct dst_entry *
@@ -1857,6 +1927,7 @@ void sk_reset_timer(struct sock *sk, struct timer_list *timer,
void sk_stop_timer(struct sock *sk, struct timer_list *timer);
+int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb);
@@ -1893,11 +1964,19 @@ static inline unsigned long sock_wspace(struct sock *sk)
*/
static inline void sk_set_bit(int nr, struct sock *sk)
{
+ if ((nr == SOCKWQ_ASYNC_NOSPACE || nr == SOCKWQ_ASYNC_WAITDATA) &&
+ !sock_flag(sk, SOCK_FASYNC))
+ return;
+
set_bit(nr, &sk->sk_wq_raw->flags);
}
static inline void sk_clear_bit(int nr, struct sock *sk)
{
+ if ((nr == SOCKWQ_ASYNC_NOSPACE || nr == SOCKWQ_ASYNC_WAITDATA) &&
+ !sock_flag(sk, SOCK_FASYNC))
+ return;
+
clear_bit(nr, &sk->sk_wq_raw->flags);
}
@@ -2007,6 +2086,13 @@ sock_skb_set_dropcount(const struct sock *sk, struct sk_buff *skb)
SOCK_SKB_CB(skb)->dropcount = atomic_read(&sk->sk_drops);
}
+static inline void sk_drops_add(struct sock *sk, const struct sk_buff *skb)
+{
+ int segs = max_t(u16, 1, skb_shinfo(skb)->gso_segs);
+
+ atomic_add(segs, &sk->sk_drops);
+}
+
void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
struct sk_buff *skb);
void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
@@ -2054,19 +2140,21 @@ static inline void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
sk->sk_stamp = skb->tstamp;
}
-void __sock_tx_timestamp(const struct sock *sk, __u8 *tx_flags);
+void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags);
/**
* sock_tx_timestamp - checks whether the outgoing packet is to be time stamped
* @sk: socket sending this packet
+ * @tsflags: timestamping flags to use
* @tx_flags: completed with instructions for time stamping
*
* Note : callers should take care of initial *tx_flags value (usually 0)
*/
-static inline void sock_tx_timestamp(const struct sock *sk, __u8 *tx_flags)
+static inline void sock_tx_timestamp(const struct sock *sk, __u16 tsflags,
+ __u8 *tx_flags)
{
- if (unlikely(sk->sk_tsflags))
- __sock_tx_timestamp(sk, tx_flags);
+ if (unlikely(tsflags))
+ __sock_tx_timestamp(tsflags, tx_flags);
if (unlikely(sock_flag(sk, SOCK_WIFI_STATUS)))
*tx_flags |= SKBTX_WIFI_STATUS;
}