diff options
author | Jakub Kicinski <kuba@kernel.org> | 2023-07-20 21:34:24 +0200 |
---|---|---|
committer | Jakub Kicinski <kuba@kernel.org> | 2023-07-20 21:34:24 +0200 |
commit | 7998c0ad4c89cbb0bcd1594d9f989c8a5853daf7 (patch) | |
tree | bc0434cfbd9a1d6ecf48ff10f073e031e60808e9 | |
parent | Merge branch 'net-support-stp-on-bridge-in-non-root-netns' (diff) | |
parent | tcp: annotate data-races around fastopenq.max_qlen (diff) | |
download | linux-7998c0ad4c89cbb0bcd1594d9f989c8a5853daf7.tar.xz linux-7998c0ad4c89cbb0bcd1594d9f989c8a5853daf7.zip |
Merge branch 'tcp-add-missing-annotations'
Eric Dumazet says:
====================
tcp: add missing annotations
This series was inspired by one syzbot (KCSAN) report.
do_tcp_getsockopt() does not lock the socket, we need to
annotate most of the reads there (and other places as well).
This is a first round, another series will come later.
====================
Link: https://lore.kernel.org/r/20230719212857.3943972-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r-- | include/linux/tcp.h | 2 | ||||
-rw-r--r-- | include/net/tcp.h | 31 | ||||
-rw-r--r-- | net/ipv4/inet_connection_sock.c | 2 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 57 | ||||
-rw-r--r-- | net/ipv4/tcp_fastopen.c | 6 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 5 |
6 files changed, 63 insertions, 40 deletions
diff --git a/include/linux/tcp.h b/include/linux/tcp.h index b4c08ac86983..91a37c99ba66 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -513,7 +513,7 @@ static inline void fastopen_queue_tune(struct sock *sk, int backlog) struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; int somaxconn = READ_ONCE(sock_net(sk)->core.sysctl_somaxconn); - queue->fastopenq.max_qlen = min_t(unsigned int, backlog, somaxconn); + WRITE_ONCE(queue->fastopenq.max_qlen, min_t(unsigned int, backlog, somaxconn)); } static inline void tcp_move_syn(struct tcp_sock *tp, diff --git a/include/net/tcp.h b/include/net/tcp.h index 226bce6d1e8c..0ca972ebd3dd 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1509,25 +1509,38 @@ void tcp_leave_memory_pressure(struct sock *sk); static inline int keepalive_intvl_when(const struct tcp_sock *tp) { struct net *net = sock_net((struct sock *)tp); + int val; - return tp->keepalive_intvl ? : - READ_ONCE(net->ipv4.sysctl_tcp_keepalive_intvl); + /* Paired with WRITE_ONCE() in tcp_sock_set_keepintvl() + * and do_tcp_setsockopt(). + */ + val = READ_ONCE(tp->keepalive_intvl); + + return val ? : READ_ONCE(net->ipv4.sysctl_tcp_keepalive_intvl); } static inline int keepalive_time_when(const struct tcp_sock *tp) { struct net *net = sock_net((struct sock *)tp); + int val; - return tp->keepalive_time ? : - READ_ONCE(net->ipv4.sysctl_tcp_keepalive_time); + /* Paired with WRITE_ONCE() in tcp_sock_set_keepidle_locked() */ + val = READ_ONCE(tp->keepalive_time); + + return val ? : READ_ONCE(net->ipv4.sysctl_tcp_keepalive_time); } static inline int keepalive_probes(const struct tcp_sock *tp) { struct net *net = sock_net((struct sock *)tp); + int val; - return tp->keepalive_probes ? : - READ_ONCE(net->ipv4.sysctl_tcp_keepalive_probes); + /* Paired with WRITE_ONCE() in tcp_sock_set_keepcnt() + * and do_tcp_setsockopt(). + */ + val = READ_ONCE(tp->keepalive_probes); + + return val ? : READ_ONCE(net->ipv4.sysctl_tcp_keepalive_probes); } static inline u32 keepalive_time_elapsed(const struct tcp_sock *tp) @@ -2048,7 +2061,11 @@ void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr); static inline u32 tcp_notsent_lowat(const struct tcp_sock *tp) { struct net *net = sock_net((struct sock *)tp); - return tp->notsent_lowat ?: READ_ONCE(net->ipv4.sysctl_tcp_notsent_lowat); + u32 val; + + val = READ_ONCE(tp->notsent_lowat); + + return val ?: READ_ONCE(net->ipv4.sysctl_tcp_notsent_lowat); } bool tcp_stream_memory_free(const struct sock *sk, int wake); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 0cc19cfbb673..aeebe8816689 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -1019,7 +1019,7 @@ static void reqsk_timer_handler(struct timer_list *t) icsk = inet_csk(sk_listener); net = sock_net(sk_listener); - max_syn_ack_retries = icsk->icsk_syn_retries ? : + max_syn_ack_retries = READ_ONCE(icsk->icsk_syn_retries) ? : READ_ONCE(net->ipv4.sysctl_tcp_synack_retries); /* Normally all the openreqs are young and become mature * (i.e. converted to established socket) for first timeout. diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index e03e08745308..8ed52e1e3c99 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3291,7 +3291,7 @@ int tcp_sock_set_syncnt(struct sock *sk, int val) return -EINVAL; lock_sock(sk); - inet_csk(sk)->icsk_syn_retries = val; + WRITE_ONCE(inet_csk(sk)->icsk_syn_retries, val); release_sock(sk); return 0; } @@ -3300,7 +3300,7 @@ EXPORT_SYMBOL(tcp_sock_set_syncnt); void tcp_sock_set_user_timeout(struct sock *sk, u32 val) { lock_sock(sk); - inet_csk(sk)->icsk_user_timeout = val; + WRITE_ONCE(inet_csk(sk)->icsk_user_timeout, val); release_sock(sk); } EXPORT_SYMBOL(tcp_sock_set_user_timeout); @@ -3312,7 +3312,8 @@ int tcp_sock_set_keepidle_locked(struct sock *sk, int val) if (val < 1 || val > MAX_TCP_KEEPIDLE) return -EINVAL; - tp->keepalive_time = val * HZ; + /* Paired with WRITE_ONCE() in keepalive_time_when() */ + WRITE_ONCE(tp->keepalive_time, val * HZ); if (sock_flag(sk, SOCK_KEEPOPEN) && !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) { u32 elapsed = keepalive_time_elapsed(tp); @@ -3344,7 +3345,7 @@ int tcp_sock_set_keepintvl(struct sock *sk, int val) return -EINVAL; lock_sock(sk); - tcp_sk(sk)->keepalive_intvl = val * HZ; + WRITE_ONCE(tcp_sk(sk)->keepalive_intvl, val * HZ); release_sock(sk); return 0; } @@ -3356,7 +3357,8 @@ int tcp_sock_set_keepcnt(struct sock *sk, int val) return -EINVAL; lock_sock(sk); - tcp_sk(sk)->keepalive_probes = val; + /* Paired with READ_ONCE() in keepalive_probes() */ + WRITE_ONCE(tcp_sk(sk)->keepalive_probes, val); release_sock(sk); return 0; } @@ -3558,19 +3560,19 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname, if (val < 1 || val > MAX_TCP_KEEPINTVL) err = -EINVAL; else - tp->keepalive_intvl = val * HZ; + WRITE_ONCE(tp->keepalive_intvl, val * HZ); break; case TCP_KEEPCNT: if (val < 1 || val > MAX_TCP_KEEPCNT) err = -EINVAL; else - tp->keepalive_probes = val; + WRITE_ONCE(tp->keepalive_probes, val); break; case TCP_SYNCNT: if (val < 1 || val > MAX_TCP_SYNCNT) err = -EINVAL; else - icsk->icsk_syn_retries = val; + WRITE_ONCE(icsk->icsk_syn_retries, val); break; case TCP_SAVE_SYN: @@ -3583,18 +3585,18 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname, case TCP_LINGER2: if (val < 0) - tp->linger2 = -1; + WRITE_ONCE(tp->linger2, -1); else if (val > TCP_FIN_TIMEOUT_MAX / HZ) - tp->linger2 = TCP_FIN_TIMEOUT_MAX; + WRITE_ONCE(tp->linger2, TCP_FIN_TIMEOUT_MAX); else - tp->linger2 = val * HZ; + WRITE_ONCE(tp->linger2, val * HZ); break; case TCP_DEFER_ACCEPT: /* Translate value in seconds to number of retransmits */ - icsk->icsk_accept_queue.rskq_defer_accept = - secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ, - TCP_RTO_MAX / HZ); + WRITE_ONCE(icsk->icsk_accept_queue.rskq_defer_accept, + secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ, + TCP_RTO_MAX / HZ)); break; case TCP_WINDOW_CLAMP: @@ -3618,7 +3620,7 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname, if (val < 0) err = -EINVAL; else - icsk->icsk_user_timeout = val; + WRITE_ONCE(icsk->icsk_user_timeout, val); break; case TCP_FASTOPEN: @@ -3656,13 +3658,13 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname, if (!tp->repair) err = -EPERM; else - tp->tsoffset = val - tcp_time_stamp_raw(); + WRITE_ONCE(tp->tsoffset, val - tcp_time_stamp_raw()); break; case TCP_REPAIR_WINDOW: err = tcp_repair_set_window(tp, optval, optlen); break; case TCP_NOTSENT_LOWAT: - tp->notsent_lowat = val; + WRITE_ONCE(tp->notsent_lowat, val); sk->sk_write_space(sk); break; case TCP_INQ: @@ -3674,7 +3676,7 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname, case TCP_TX_DELAY: if (val) tcp_enable_tx_delay(); - tp->tcp_tx_delay = val; + WRITE_ONCE(tp->tcp_tx_delay, val); break; default: err = -ENOPROTOOPT; @@ -3991,17 +3993,18 @@ int do_tcp_getsockopt(struct sock *sk, int level, val = keepalive_probes(tp); break; case TCP_SYNCNT: - val = icsk->icsk_syn_retries ? : + val = READ_ONCE(icsk->icsk_syn_retries) ? : READ_ONCE(net->ipv4.sysctl_tcp_syn_retries); break; case TCP_LINGER2: - val = tp->linger2; + val = READ_ONCE(tp->linger2); if (val >= 0) val = (val ? : READ_ONCE(net->ipv4.sysctl_tcp_fin_timeout)) / HZ; break; case TCP_DEFER_ACCEPT: - val = retrans_to_secs(icsk->icsk_accept_queue.rskq_defer_accept, - TCP_TIMEOUT_INIT / HZ, TCP_RTO_MAX / HZ); + val = READ_ONCE(icsk->icsk_accept_queue.rskq_defer_accept); + val = retrans_to_secs(val, TCP_TIMEOUT_INIT / HZ, + TCP_RTO_MAX / HZ); break; case TCP_WINDOW_CLAMP: val = tp->window_clamp; @@ -4138,11 +4141,11 @@ int do_tcp_getsockopt(struct sock *sk, int level, break; case TCP_USER_TIMEOUT: - val = icsk->icsk_user_timeout; + val = READ_ONCE(icsk->icsk_user_timeout); break; case TCP_FASTOPEN: - val = icsk->icsk_accept_queue.fastopenq.max_qlen; + val = READ_ONCE(icsk->icsk_accept_queue.fastopenq.max_qlen); break; case TCP_FASTOPEN_CONNECT: @@ -4154,14 +4157,14 @@ int do_tcp_getsockopt(struct sock *sk, int level, break; case TCP_TX_DELAY: - val = tp->tcp_tx_delay; + val = READ_ONCE(tp->tcp_tx_delay); break; case TCP_TIMESTAMP: - val = tcp_time_stamp_raw() + tp->tsoffset; + val = tcp_time_stamp_raw() + READ_ONCE(tp->tsoffset); break; case TCP_NOTSENT_LOWAT: - val = tp->notsent_lowat; + val = READ_ONCE(tp->notsent_lowat); break; case TCP_INQ: val = tp->recvmsg_inq; diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 45cc7f1ca296..85e4953f1182 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -296,6 +296,7 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk, static bool tcp_fastopen_queue_check(struct sock *sk) { struct fastopen_queue *fastopenq; + int max_qlen; /* Make sure the listener has enabled fastopen, and we don't * exceed the max # of pending TFO requests allowed before trying @@ -308,10 +309,11 @@ static bool tcp_fastopen_queue_check(struct sock *sk) * temporarily vs a server not supporting Fast Open at all. */ fastopenq = &inet_csk(sk)->icsk_accept_queue.fastopenq; - if (fastopenq->max_qlen == 0) + max_qlen = READ_ONCE(fastopenq->max_qlen); + if (max_qlen == 0) return false; - if (fastopenq->qlen >= fastopenq->max_qlen) { + if (fastopenq->qlen >= max_qlen) { struct request_sock *req1; spin_lock(&fastopenq->lock); req1 = fastopenq->rskq_rst_head; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index b5c81cf5b86f..069642014636 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -307,8 +307,9 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) inet->inet_daddr, inet->inet_sport, usin->sin_port)); - tp->tsoffset = secure_tcp_ts_off(net, inet->inet_saddr, - inet->inet_daddr); + WRITE_ONCE(tp->tsoffset, + secure_tcp_ts_off(net, inet->inet_saddr, + inet->inet_daddr)); } inet->inet_id = get_random_u16(); |