diff options
author | Eric Dumazet <edumazet@google.com> | 2023-10-18 11:00:13 +0200 |
---|---|---|
committer | Paolo Abeni <pabeni@redhat.com> | 2023-10-19 13:13:13 +0200 |
commit | 878d951c6712b655c38e78ac1ee63c35cd913b22 (patch) | |
tree | 21e6d2da284a82ad41e971ccaf1ef3ed75bc6c17 | |
parent | Merge branch 'net-stmmac-use-correct-pps-input-indexing' (diff) | |
download | linux-878d951c6712b655c38e78ac1ee63c35cd913b22.tar.xz linux-878d951c6712b655c38e78ac1ee63c35cd913b22.zip |
inet: lock the socket in ip_sock_set_tos()
Christoph Paasch reported a panic in TCP stack [1]
Indeed, we should not call sk_dst_reset() without holding
the socket lock, as __sk_dst_get() callers do not all rely
on bare RCU.
[1]
BUG: kernel NULL pointer dereference, address: 0000000000000000
PGD 12bad6067 P4D 12bad6067 PUD 12bad5067 PMD 0
Oops: 0000 [#1] PREEMPT SMP
CPU: 1 PID: 2750 Comm: syz-executor.5 Not tainted 6.6.0-rc4-g7a5720a344e7 #49
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-2.el7 04/01/2014
RIP: 0010:tcp_get_metrics+0x118/0x8f0 net/ipv4/tcp_metrics.c:321
Code: c7 44 24 70 02 00 8b 03 89 44 24 48 c7 44 24 4c 00 00 00 00 66 c7 44 24 58 02 00 66 ba 02 00 b1 01 89 4c 24 04 4c 89 7c 24 10 <49> 8b 0f 48 8b 89 50 05 00 00 48 89 4c 24 30 33 81 00 02 00 00 69
RSP: 0018:ffffc90000af79b8 EFLAGS: 00010293
RAX: 000000000100007f RBX: ffff88812ae8f500 RCX: ffff88812b5f8f01
RDX: 0000000000000002 RSI: ffffffff8300f080 RDI: 0000000000000002
RBP: 0000000000000002 R08: 0000000000000003 R09: ffffffff8205eca0
R10: 0000000000000002 R11: ffff88812b5f8f00 R12: ffff88812a9e0580
R13: 0000000000000000 R14: ffff88812ae8fbd2 R15: 0000000000000000
FS: 00007f70a006b640(0000) GS:ffff88813bd00000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000000000000 CR3: 000000012bad7003 CR4: 0000000000170ee0
Call Trace:
<TASK>
tcp_fastopen_cache_get+0x32/0x140 net/ipv4/tcp_metrics.c:567
tcp_fastopen_cookie_check+0x28/0x180 net/ipv4/tcp_fastopen.c:419
tcp_connect+0x9c8/0x12a0 net/ipv4/tcp_output.c:3839
tcp_v4_connect+0x645/0x6e0 net/ipv4/tcp_ipv4.c:323
__inet_stream_connect+0x120/0x590 net/ipv4/af_inet.c:676
tcp_sendmsg_fastopen+0x2d6/0x3a0 net/ipv4/tcp.c:1021
tcp_sendmsg_locked+0x1957/0x1b00 net/ipv4/tcp.c:1073
tcp_sendmsg+0x30/0x50 net/ipv4/tcp.c:1336
__sock_sendmsg+0x83/0xd0 net/socket.c:730
__sys_sendto+0x20a/0x2a0 net/socket.c:2194
__do_sys_sendto net/socket.c:2206 [inline]
Fixes: e08d0b3d1723 ("inet: implement lockless IP_TOS")
Reported-by: Christoph Paasch <cpaasch@apple.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Link: https://lore.kernel.org/r/20231018090014.345158-1-edumazet@google.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
-rw-r--r-- | include/net/ip.h | 1 | ||||
-rw-r--r-- | net/ipv4/ip_sockglue.c | 11 | ||||
-rw-r--r-- | net/mptcp/sockopt.c | 4 | ||||
-rwxr-xr-x | tools/testing/selftests/net/mptcp/mptcp_connect.sh | 2 |
4 files changed, 13 insertions, 5 deletions
diff --git a/include/net/ip.h b/include/net/ip.h index 6fbc0dcf4b97..1fc4c8d69e33 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -810,5 +810,6 @@ int ip_sock_set_mtu_discover(struct sock *sk, int val); void ip_sock_set_pktinfo(struct sock *sk); void ip_sock_set_recverr(struct sock *sk); void ip_sock_set_tos(struct sock *sk, int val); +void __ip_sock_set_tos(struct sock *sk, int val); #endif /* _IP_H */ diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 0b74ac49d6a6..9c68b6b74d9f 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -585,9 +585,9 @@ out: return err; } -void ip_sock_set_tos(struct sock *sk, int val) +void __ip_sock_set_tos(struct sock *sk, int val) { - u8 old_tos = READ_ONCE(inet_sk(sk)->tos); + u8 old_tos = inet_sk(sk)->tos; if (sk->sk_type == SOCK_STREAM) { val &= ~INET_ECN_MASK; @@ -599,6 +599,13 @@ void ip_sock_set_tos(struct sock *sk, int val) sk_dst_reset(sk); } } + +void ip_sock_set_tos(struct sock *sk, int val) +{ + lock_sock(sk); + __ip_sock_set_tos(sk, val); + release_sock(sk); +} EXPORT_SYMBOL(ip_sock_set_tos); void ip_sock_set_freebind(struct sock *sk) diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index 18ce624bfde2..59bd5e114392 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -738,7 +738,7 @@ static int mptcp_setsockopt_v4_set_tos(struct mptcp_sock *msk, int optname, mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - ip_sock_set_tos(ssk, val); + __ip_sock_set_tos(ssk, val); } release_sock(sk); @@ -1411,7 +1411,7 @@ static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk) ssk->sk_bound_dev_if = sk->sk_bound_dev_if; ssk->sk_incoming_cpu = sk->sk_incoming_cpu; ssk->sk_ipv6only = sk->sk_ipv6only; - ip_sock_set_tos(ssk, inet_sk(sk)->tos); + __ip_sock_set_tos(ssk, inet_sk(sk)->tos); if (sk->sk_userlocks & tx_rx_locks) { ssk->sk_userlocks |= sk->sk_userlocks & tx_rx_locks; diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index 61a2a1988ce6..b1fc8afd072d 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -716,7 +716,7 @@ run_test_transparent() # the required infrastructure in MPTCP sockopt code. To support TOS, the # following function has been exported (T). Not great but better than # checking for a specific kernel version. - if ! mptcp_lib_kallsyms_has "T ip_sock_set_tos$"; then + if ! mptcp_lib_kallsyms_has "T __ip_sock_set_tos$"; then echo "INFO: ${msg} not supported by the kernel: SKIP" mptcp_lib_result_skip "${TEST_GROUP}" return |