From e40b801b3603a8f90b46acbacdea3505c27f01c0 Mon Sep 17 00:00:00 2001 From: "D. Wythe" Date: Thu, 16 Feb 2023 14:37:36 +0800 Subject: net/smc: fix potential panic dues to unprotected smc_llc_srv_add_link() There is a certain chance to trigger the following panic: PID: 5900 TASK: ffff88c1c8af4100 CPU: 1 COMMAND: "kworker/1:48" #0 [ffff9456c1cc79a0] machine_kexec at ffffffff870665b7 #1 [ffff9456c1cc79f0] __crash_kexec at ffffffff871b4c7a #2 [ffff9456c1cc7ab0] crash_kexec at ffffffff871b5b60 #3 [ffff9456c1cc7ac0] oops_end at ffffffff87026ce7 #4 [ffff9456c1cc7ae0] page_fault_oops at ffffffff87075715 #5 [ffff9456c1cc7b58] exc_page_fault at ffffffff87ad0654 #6 [ffff9456c1cc7b80] asm_exc_page_fault at ffffffff87c00b62 [exception RIP: ib_alloc_mr+19] RIP: ffffffffc0c9cce3 RSP: ffff9456c1cc7c38 RFLAGS: 00010202 RAX: 0000000000000000 RBX: 0000000000000002 RCX: 0000000000000004 RDX: 0000000000000010 RSI: 0000000000000000 RDI: 0000000000000000 RBP: ffff88c1ea281d00 R8: 000000020a34ffff R9: ffff88c1350bbb20 R10: 0000000000000000 R11: 0000000000000001 R12: 0000000000000000 R13: 0000000000000010 R14: ffff88c1ab040a50 R15: ffff88c1ea281d00 ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 #7 [ffff9456c1cc7c60] smc_ib_get_memory_region at ffffffffc0aff6df [smc] #8 [ffff9456c1cc7c88] smcr_buf_map_link at ffffffffc0b0278c [smc] #9 [ffff9456c1cc7ce0] __smc_buf_create at ffffffffc0b03586 [smc] The reason here is that when the server tries to create a second link, smc_llc_srv_add_link() has no protection and may add a new link to link group. This breaks the security environment protected by llc_conf_mutex. Fixes: 2d2209f20189 ("net/smc: first part of add link processing as SMC server") Signed-off-by: D. Wythe Reviewed-by: Larysa Zaremba Reviewed-by: Wenjia Zhang Signed-off-by: David S. Miller --- net/smc/af_smc.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index e12d4fa5aece..d9413d43b104 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -1826,8 +1826,10 @@ static int smcr_serv_conf_first_link(struct smc_sock *smc) smc_llc_link_active(link); smcr_lgr_set_type(link->lgr, SMC_LGR_SINGLE); + mutex_lock(&link->lgr->llc_conf_mutex); /* initial contact - try to establish second link */ smc_llc_srv_add_link(link, NULL); + mutex_unlock(&link->lgr->llc_conf_mutex); return 0; } -- cgit v1.2.3 From 475f9ff63ee8c296aa46c6e9e9ad9bdd301c6bdf Mon Sep 17 00:00:00 2001 From: "D. Wythe" Date: Thu, 16 Feb 2023 14:39:05 +0800 Subject: net/smc: fix application data exception There is a certain probability that following exceptions will occur in the wrk benchmark test: Running 10s test @ http://11.213.45.6:80 8 threads and 64 connections Thread Stats Avg Stdev Max +/- Stdev Latency 3.72ms 13.94ms 245.33ms 94.17% Req/Sec 1.96k 713.67 5.41k 75.16% 155262 requests in 10.10s, 23.10MB read Non-2xx or 3xx responses: 3 We will find that the error is HTTP 400 error, which is a serious exception in our test, which means the application data was corrupted. Consider the following scenarios: CPU0 CPU1 buf_desc->used = 0; cmpxchg(buf_desc->used, 0, 1) deal_with(buf_desc) memset(buf_desc->cpu_addr,0); This will cause the data received by a victim connection to be cleared, thus triggering an HTTP 400 error in the server. This patch exchange the order between clear used and memset, add barrier to ensure memory consistency. Fixes: 1c5526968e27 ("net/smc: Clear memory when release and reuse buffer") Signed-off-by: D. Wythe Reviewed-by: Wenjia Zhang Signed-off-by: David S. Miller --- net/smc/smc_core.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index c305d8dd23f8..c19d4b7c1f28 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -1120,8 +1120,9 @@ static void smcr_buf_unuse(struct smc_buf_desc *buf_desc, bool is_rmb, smc_buf_free(lgr, is_rmb, buf_desc); } else { - buf_desc->used = 0; - memset(buf_desc->cpu_addr, 0, buf_desc->len); + /* memzero_explicit provides potential memory barrier semantics */ + memzero_explicit(buf_desc->cpu_addr, buf_desc->len); + WRITE_ONCE(buf_desc->used, 0); } } @@ -1132,19 +1133,17 @@ static void smc_buf_unuse(struct smc_connection *conn, if (!lgr->is_smcd && conn->sndbuf_desc->is_vm) { smcr_buf_unuse(conn->sndbuf_desc, false, lgr); } else { - conn->sndbuf_desc->used = 0; - memset(conn->sndbuf_desc->cpu_addr, 0, - conn->sndbuf_desc->len); + memzero_explicit(conn->sndbuf_desc->cpu_addr, conn->sndbuf_desc->len); + WRITE_ONCE(conn->sndbuf_desc->used, 0); } } if (conn->rmb_desc) { if (!lgr->is_smcd) { smcr_buf_unuse(conn->rmb_desc, true, lgr); } else { - conn->rmb_desc->used = 0; - memset(conn->rmb_desc->cpu_addr, 0, - conn->rmb_desc->len + - sizeof(struct smcd_cdc_msg)); + memzero_explicit(conn->rmb_desc->cpu_addr, + conn->rmb_desc->len + sizeof(struct smcd_cdc_msg)); + WRITE_ONCE(conn->rmb_desc->used, 0); } } } -- cgit v1.2.3 From 9ca5e7ecab064f1f47da07f7c1ddf40e4bc0e5ac Mon Sep 17 00:00:00 2001 From: Shigeru Yoshida Date: Fri, 17 Feb 2023 01:37:10 +0900 Subject: l2tp: Avoid possible recursive deadlock in l2tp_tunnel_register() When a file descriptor of pppol2tp socket is passed as file descriptor of UDP socket, a recursive deadlock occurs in l2tp_tunnel_register(). This situation is reproduced by the following program: int main(void) { int sock; struct sockaddr_pppol2tp addr; sock = socket(AF_PPPOX, SOCK_DGRAM, PX_PROTO_OL2TP); if (sock < 0) { perror("socket"); return 1; } addr.sa_family = AF_PPPOX; addr.sa_protocol = PX_PROTO_OL2TP; addr.pppol2tp.pid = 0; addr.pppol2tp.fd = sock; addr.pppol2tp.addr.sin_family = PF_INET; addr.pppol2tp.addr.sin_port = htons(0); addr.pppol2tp.addr.sin_addr.s_addr = inet_addr("192.168.0.1"); addr.pppol2tp.s_tunnel = 1; addr.pppol2tp.s_session = 0; addr.pppol2tp.d_tunnel = 0; addr.pppol2tp.d_session = 0; if (connect(sock, (const struct sockaddr *)&addr, sizeof(addr)) < 0) { perror("connect"); return 1; } return 0; } This program causes the following lockdep warning: ============================================ WARNING: possible recursive locking detected 6.2.0-rc5-00205-gc96618275234 #56 Not tainted -------------------------------------------- repro/8607 is trying to acquire lock: ffff8880213c8130 (sk_lock-AF_PPPOX){+.+.}-{0:0}, at: l2tp_tunnel_register+0x2b7/0x11c0 but task is already holding lock: ffff8880213c8130 (sk_lock-AF_PPPOX){+.+.}-{0:0}, at: pppol2tp_connect+0xa82/0x1a30 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(sk_lock-AF_PPPOX); lock(sk_lock-AF_PPPOX); *** DEADLOCK *** May be due to missing lock nesting notation 1 lock held by repro/8607: #0: ffff8880213c8130 (sk_lock-AF_PPPOX){+.+.}-{0:0}, at: pppol2tp_connect+0xa82/0x1a30 stack backtrace: CPU: 0 PID: 8607 Comm: repro Not tainted 6.2.0-rc5-00205-gc96618275234 #56 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.1-2.fc37 04/01/2014 Call Trace: dump_stack_lvl+0x100/0x178 __lock_acquire.cold+0x119/0x3b9 ? lockdep_hardirqs_on_prepare+0x410/0x410 lock_acquire+0x1e0/0x610 ? l2tp_tunnel_register+0x2b7/0x11c0 ? lock_downgrade+0x710/0x710 ? __fget_files+0x283/0x3e0 lock_sock_nested+0x3a/0xf0 ? l2tp_tunnel_register+0x2b7/0x11c0 l2tp_tunnel_register+0x2b7/0x11c0 ? sprintf+0xc4/0x100 ? l2tp_tunnel_del_work+0x6b0/0x6b0 ? debug_object_deactivate+0x320/0x320 ? lockdep_init_map_type+0x16d/0x7a0 ? lockdep_init_map_type+0x16d/0x7a0 ? l2tp_tunnel_create+0x2bf/0x4b0 ? l2tp_tunnel_create+0x3c6/0x4b0 pppol2tp_connect+0x14e1/0x1a30 ? pppol2tp_put_sk+0xd0/0xd0 ? aa_sk_perm+0x2b7/0xa80 ? aa_af_perm+0x260/0x260 ? bpf_lsm_socket_connect+0x9/0x10 ? pppol2tp_put_sk+0xd0/0xd0 __sys_connect_file+0x14f/0x190 __sys_connect+0x133/0x160 ? __sys_connect_file+0x190/0x190 ? lockdep_hardirqs_on+0x7d/0x100 ? ktime_get_coarse_real_ts64+0x1b7/0x200 ? ktime_get_coarse_real_ts64+0x147/0x200 ? __audit_syscall_entry+0x396/0x500 __x64_sys_connect+0x72/0xb0 do_syscall_64+0x38/0xb0 entry_SYSCALL_64_after_hwframe+0x63/0xcd This patch fixes the issue by getting/creating the tunnel before locking the pppol2tp socket. Fixes: 0b2c59720e65 ("l2tp: close all race conditions in l2tp_tunnel_register()") Cc: Cong Wang Signed-off-by: Shigeru Yoshida Reviewed-by: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_ppp.c | 125 ++++++++++++++++++++++++++++------------------------ 1 file changed, 67 insertions(+), 58 deletions(-) (limited to 'net') diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index db2e584c625e..f011af6601c9 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -650,54 +650,22 @@ static int pppol2tp_tunnel_mtu(const struct l2tp_tunnel *tunnel) return mtu - PPPOL2TP_HEADER_OVERHEAD; } -/* connect() handler. Attach a PPPoX socket to a tunnel UDP socket - */ -static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr, - int sockaddr_len, int flags) +static struct l2tp_tunnel *pppol2tp_tunnel_get(struct net *net, + const struct l2tp_connect_info *info, + bool *new_tunnel) { - struct sock *sk = sock->sk; - struct pppox_sock *po = pppox_sk(sk); - struct l2tp_session *session = NULL; - struct l2tp_connect_info info; struct l2tp_tunnel *tunnel; - struct pppol2tp_session *ps; - struct l2tp_session_cfg cfg = { 0, }; - bool drop_refcnt = false; - bool drop_tunnel = false; - bool new_session = false; - bool new_tunnel = false; int error; - error = pppol2tp_sockaddr_get_info(uservaddr, sockaddr_len, &info); - if (error < 0) - return error; + *new_tunnel = false; - lock_sock(sk); - - /* Check for already bound sockets */ - error = -EBUSY; - if (sk->sk_state & PPPOX_CONNECTED) - goto end; - - /* We don't supporting rebinding anyway */ - error = -EALREADY; - if (sk->sk_user_data) - goto end; /* socket is already attached */ - - /* Don't bind if tunnel_id is 0 */ - error = -EINVAL; - if (!info.tunnel_id) - goto end; - - tunnel = l2tp_tunnel_get(sock_net(sk), info.tunnel_id); - if (tunnel) - drop_tunnel = true; + tunnel = l2tp_tunnel_get(net, info->tunnel_id); /* Special case: create tunnel context if session_id and * peer_session_id is 0. Otherwise look up tunnel using supplied * tunnel id. */ - if (!info.session_id && !info.peer_session_id) { + if (!info->session_id && !info->peer_session_id) { if (!tunnel) { struct l2tp_tunnel_cfg tcfg = { .encap = L2TP_ENCAPTYPE_UDP, @@ -706,40 +674,82 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr, /* Prevent l2tp_tunnel_register() from trying to set up * a kernel socket. */ - if (info.fd < 0) { - error = -EBADF; - goto end; - } + if (info->fd < 0) + return ERR_PTR(-EBADF); - error = l2tp_tunnel_create(info.fd, - info.version, - info.tunnel_id, - info.peer_tunnel_id, &tcfg, + error = l2tp_tunnel_create(info->fd, + info->version, + info->tunnel_id, + info->peer_tunnel_id, &tcfg, &tunnel); if (error < 0) - goto end; + return ERR_PTR(error); l2tp_tunnel_inc_refcount(tunnel); - error = l2tp_tunnel_register(tunnel, sock_net(sk), - &tcfg); + error = l2tp_tunnel_register(tunnel, net, &tcfg); if (error < 0) { kfree(tunnel); - goto end; + return ERR_PTR(error); } - drop_tunnel = true; - new_tunnel = true; + + *new_tunnel = true; } } else { /* Error if we can't find the tunnel */ - error = -ENOENT; if (!tunnel) - goto end; + return ERR_PTR(-ENOENT); /* Error if socket is not prepped */ - if (!tunnel->sock) - goto end; + if (!tunnel->sock) { + l2tp_tunnel_dec_refcount(tunnel); + return ERR_PTR(-ENOENT); + } } + return tunnel; +} + +/* connect() handler. Attach a PPPoX socket to a tunnel UDP socket + */ +static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr, + int sockaddr_len, int flags) +{ + struct sock *sk = sock->sk; + struct pppox_sock *po = pppox_sk(sk); + struct l2tp_session *session = NULL; + struct l2tp_connect_info info; + struct l2tp_tunnel *tunnel; + struct pppol2tp_session *ps; + struct l2tp_session_cfg cfg = { 0, }; + bool drop_refcnt = false; + bool new_session = false; + bool new_tunnel = false; + int error; + + error = pppol2tp_sockaddr_get_info(uservaddr, sockaddr_len, &info); + if (error < 0) + return error; + + /* Don't bind if tunnel_id is 0 */ + if (!info.tunnel_id) + return -EINVAL; + + tunnel = pppol2tp_tunnel_get(sock_net(sk), &info, &new_tunnel); + if (IS_ERR(tunnel)) + return PTR_ERR(tunnel); + + lock_sock(sk); + + /* Check for already bound sockets */ + error = -EBUSY; + if (sk->sk_state & PPPOX_CONNECTED) + goto end; + + /* We don't supporting rebinding anyway */ + error = -EALREADY; + if (sk->sk_user_data) + goto end; /* socket is already attached */ + if (tunnel->peer_tunnel_id == 0) tunnel->peer_tunnel_id = info.peer_tunnel_id; @@ -840,8 +850,7 @@ end: } if (drop_refcnt) l2tp_session_dec_refcount(session); - if (drop_tunnel) - l2tp_tunnel_dec_refcount(tunnel); + l2tp_tunnel_dec_refcount(tunnel); release_sock(sk); return error; -- cgit v1.2.3