From e5e1a4bc916d29958c3b587354293738fcb984d7 Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Tue, 27 Oct 2020 13:32:01 +0100 Subject: xsk: Fix possible memory leak at socket close MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix a possible memory leak at xsk socket close that is caused by the refcounting of the umem object being wrong. The reference count of the umem was decremented only after the pool had been freed. Note that if the buffer pool is destroyed, it is important that the umem is destroyed after the pool, otherwise the umem would disappear while the driver is still running. And as the buffer pool needs to be destroyed in a work queue, the umem is also (if its refcount reaches zero) destroyed after the buffer pool in that same work queue. What was missing is that the refcount also needs to be decremented when the pool is not freed and when the pool has not even been created. The first case happens when the refcount of the pool is higher than 1, i.e. it is still being used by some other socket using the same device and queue id. In this case, it is safe to decrement the refcount of the umem outside of the work queue as the umem will never be freed because the refcount of the umem is always greater than or equal to the refcount of the buffer pool. The second case is if the buffer pool has not been created yet, i.e. the socket was closed before it was bound but after the umem was created. In this case, it is safe to destroy the umem outside of the work queue, since there is no pool that can use it by definition. Fixes: 1c1efc2af158 ("xsk: Create and free buffer pool independently from umem") Reported-by: syzbot+eb71df123dc2be2c1456@syzkaller.appspotmail.com Signed-off-by: Magnus Karlsson Signed-off-by: Daniel Borkmann Acked-by: Björn Töpel Link: https://lore.kernel.org/bpf/1603801921-2712-1-git-send-email-magnus.karlsson@gmail.com --- net/xdp/xsk.c | 3 ++- net/xdp/xsk_buff_pool.c | 7 +++++-- 2 files changed, 7 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index b71a32eeae65..cfbec3989a76 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -1146,7 +1146,8 @@ static void xsk_destruct(struct sock *sk) if (!sock_flag(sk, SOCK_DEAD)) return; - xp_put_pool(xs->pool); + if (!xp_put_pool(xs->pool)) + xdp_put_umem(xs->umem); sk_refcnt_debug_dec(sk); } diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index 64c9e55d4d4e..8a3bf4e1318e 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -251,15 +251,18 @@ void xp_get_pool(struct xsk_buff_pool *pool) refcount_inc(&pool->users); } -void xp_put_pool(struct xsk_buff_pool *pool) +bool xp_put_pool(struct xsk_buff_pool *pool) { if (!pool) - return; + return false; if (refcount_dec_and_test(&pool->users)) { INIT_WORK(&pool->work, xp_release_deferred); schedule_work(&pool->work); + return true; } + + return false; } static struct xsk_dma_map *xp_find_dma_map(struct xsk_buff_pool *pool) -- cgit v1.2.3 From 8ef9ba4d666614497a057d09b0a6eafc1e34eadf Mon Sep 17 00:00:00 2001 From: Oliver Herms Date: Tue, 3 Nov 2020 11:41:33 +0100 Subject: IPv6: Set SIT tunnel hard_header_len to zero Due to the legacy usage of hard_header_len for SIT tunnels while already using infrastructure from net/ipv4/ip_tunnel.c the calculation of the path MTU in tnl_update_pmtu is incorrect. This leads to unnecessary creation of MTU exceptions for any flow going over a SIT tunnel. As SIT tunnels do not have a header themsevles other than their transport (L3, L2) headers we're leaving hard_header_len set to zero as tnl_update_pmtu is already taking care of the transport headers sizes. This will also help avoiding unnecessary IPv6 GC runs and spinlock contention seen when using SIT tunnels and for more than net.ipv6.route.gc_thresh flows. Fixes: c54419321455 ("GRE: Refactor GRE tunneling code.") Signed-off-by: Oliver Herms Acked-by: Willem de Bruijn Link: https://lore.kernel.org/r/20201103104133.GA1573211@tws Signed-off-by: Jakub Kicinski --- net/ipv6/sit.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net') diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 5e2c34c0ac97..5e7983cb6154 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1128,7 +1128,6 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev) if (tdev && !netif_is_l3_master(tdev)) { int t_hlen = tunnel->hlen + sizeof(struct iphdr); - dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr); dev->mtu = tdev->mtu - t_hlen; if (dev->mtu < IPV6_MIN_MTU) dev->mtu = IPV6_MIN_MTU; @@ -1426,7 +1425,6 @@ static void ipip6_tunnel_setup(struct net_device *dev) dev->priv_destructor = ipip6_dev_free; dev->type = ARPHRD_SIT; - dev->hard_header_len = LL_MAX_HEADER + t_hlen; dev->mtu = ETH_DATA_LEN - t_hlen; dev->min_mtu = IPV6_MIN_MTU; dev->max_mtu = IP6_MAX_MTU - t_hlen; -- cgit v1.2.3 From 77a2d673d5c9d1d359b5652ff75043273c5dea28 Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Fri, 6 Nov 2020 17:59:52 +0100 Subject: tunnels: Fix off-by-one in lower MTU bounds for ICMP/ICMPv6 replies Jianlin reports that a bridged IPv6 VXLAN endpoint, carrying IPv6 packets over a link with a PMTU estimation of exactly 1350 bytes, won't trigger ICMPv6 Packet Too Big replies when the encapsulated datagrams exceed said PMTU value. VXLAN over IPv6 adds 70 bytes of overhead, so an ICMPv6 reply indicating 1280 bytes as inner MTU would be legitimate and expected. This comes from an off-by-one error I introduced in checks added as part of commit 4cb47a8644cc ("tunnels: PMTU discovery support for directly bridged IP packets"), whose purpose was to prevent sending ICMPv6 Packet Too Big messages with an MTU lower than the smallest permissible IPv6 link MTU, i.e. 1280 bytes. In iptunnel_pmtud_check_icmpv6(), avoid triggering a reply only if the advertised MTU would be less than, and not equal to, 1280 bytes. Also fix the analogous comparison for IPv4, that is, skip the ICMP reply only if the resulting MTU is strictly less than 576 bytes. This becomes apparent while running the net/pmtu.sh bridged VXLAN or GENEVE selftests with adjusted lower-link MTU values. Using e.g. GENEVE, setting ll_mtu to the values reported below, in the test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception() test function, we can see failures on the following tests: test | ll_mtu -------------------------------|-------- pmtu_ipv4_br_geneve4_exception | 626 pmtu_ipv6_br_geneve4_exception | 1330 pmtu_ipv6_br_geneve6_exception | 1350 owing to the different tunneling overheads implied by the corresponding configurations. Reported-by: Jianlin Shi Fixes: 4cb47a8644cc ("tunnels: PMTU discovery support for directly bridged IP packets") Signed-off-by: Stefano Brivio Link: https://lore.kernel.org/r/4f5fc2f33bfdf8409549fafd4f952b008bf04d63.1604681709.git.sbrivio@redhat.com Signed-off-by: Jakub Kicinski --- net/ipv4/ip_tunnel_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 25f1caf5abf9..e25be2d01a7a 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -263,7 +263,7 @@ static int iptunnel_pmtud_check_icmp(struct sk_buff *skb, int mtu) const struct icmphdr *icmph = icmp_hdr(skb); const struct iphdr *iph = ip_hdr(skb); - if (mtu <= 576 || iph->frag_off != htons(IP_DF)) + if (mtu < 576 || iph->frag_off != htons(IP_DF)) return 0; if (ipv4_is_lbcast(iph->daddr) || ipv4_is_multicast(iph->daddr) || @@ -359,7 +359,7 @@ static int iptunnel_pmtud_check_icmpv6(struct sk_buff *skb, int mtu) __be16 frag_off; int offset; - if (mtu <= IPV6_MIN_MTU) + if (mtu < IPV6_MIN_MTU) return 0; if (stype == IPV6_ADDR_ANY || stype == IPV6_ADDR_MULTICAST || -- cgit v1.2.3 From 413691384a37fe27f43460226c4160e33140e638 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Sun, 8 Nov 2020 00:46:15 +0000 Subject: ethtool: netlink: add missing netdev_features_change() call After updating userspace Ethtool from 5.7 to 5.9, I noticed that NETDEV_FEAT_CHANGE is no more raised when changing netdev features through Ethtool. That's because the old Ethtool ioctl interface always calls netdev_features_change() at the end of user request processing to inform the kernel that our netdevice has some features changed, but the new Netlink interface does not. Instead, it just notifies itself with ETHTOOL_MSG_FEATURES_NTF. Replace this ethtool_notify() call with netdev_features_change(), so the kernel will be aware of any features changes, just like in case with the ioctl interface. This does not omit Ethtool notifications, as Ethtool itself listens to NETDEV_FEAT_CHANGE and drops ETHTOOL_MSG_FEATURES_NTF on it (net/ethtool/netlink.c:ethnl_netdev_event()). From v1 [1]: - dropped extra new line as advised by Jakub; - no functional changes. [1] https://lore.kernel.org/netdev/AlZXQ2o5uuTVHCfNGOiGgJ8vJ3KgO5YIWAnQjH0cDE@cp3-web-009.plabs.ch Fixes: 0980bfcd6954 ("ethtool: set netdev features with FEATURES_SET request") Signed-off-by: Alexander Lobakin Reviewed-by: Michal Kubecek Link: https://lore.kernel.org/r/ahA2YWXYICz5rbUSQqNG4roJ8OlJzzYQX7PTiG80@cp4-web-028.plabs.ch Signed-off-by: Jakub Kicinski --- net/ethtool/features.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ethtool/features.c b/net/ethtool/features.c index 8ee4cdbd6b82..1c9f4df273bd 100644 --- a/net/ethtool/features.c +++ b/net/ethtool/features.c @@ -280,7 +280,7 @@ int ethnl_set_features(struct sk_buff *skb, struct genl_info *info) active_diff_mask, compact); } if (mod) - ethtool_notify(dev, ETHTOOL_MSG_FEATURES_NTF, NULL); + netdev_features_change(dev); out_rtnl: rtnl_unlock(); -- cgit v1.2.3 From 989ef49bdf100cc772b3a8737089df36b1ab1e30 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Sun, 8 Nov 2020 19:49:59 +0100 Subject: mptcp: provide rmem[0] limit The mptcp proto struct currently does not provide the required limit for forward memory scheduling. Under pressure sk_rmem_schedule() will unconditionally try to use such field and will oops. Address the issue inheriting the tcp limit, as we already do for the wmem one. Fixes: 9c3f94e1681b ("mptcp: add missing memory scheduling in the rx path") Signed-off-by: Paolo Abeni Reviewed-by: Matthieu Baerts Link: https://lore.kernel.org/r/37af798bd46f402fb7c79f57ebbdd00614f5d7fa.1604861097.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index e7419fd15d84..88f2a7a0ccb8 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2467,6 +2467,7 @@ static struct proto mptcp_prot = { .memory_pressure = &tcp_memory_pressure, .stream_memory_free = mptcp_memory_free, .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem), + .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem), .sysctl_mem = sysctl_tcp_mem, .obj_size = sizeof(struct mptcp_sock), .slab_flags = SLAB_TYPESAFE_BY_RCU, -- cgit v1.2.3 From 866358ec331f8faa394995fb4b511af1db0247c8 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Sun, 8 Nov 2020 09:08:26 -0500 Subject: netlabel: fix our progress tracking in netlbl_unlabel_staticlist() The current NetLabel code doesn't correctly keep track of the netlink dump state in some cases, in particular when multiple interfaces with large configurations are loaded. The problem manifests itself by not reporting the full configuration to userspace, even though it is loaded and active in the kernel. This patch fixes this by ensuring that the dump state is properly reset when necessary inside the netlbl_unlabel_staticlist() function. Fixes: 8cc44579d1bd ("NetLabel: Introduce static network labels for unlabeled connections") Signed-off-by: Paul Moore Link: https://lore.kernel.org/r/160484450633.3752.16512718263560813473.stgit@sifl Signed-off-by: Jakub Kicinski --- net/netlabel/netlabel_unlabeled.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index 2e8e3f7b2111..fc55c9116da0 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -1166,12 +1166,13 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb, struct netlbl_unlhsh_walk_arg cb_arg; u32 skip_bkt = cb->args[0]; u32 skip_chain = cb->args[1]; - u32 iter_bkt; - u32 iter_chain = 0, iter_addr4 = 0, iter_addr6 = 0; + u32 skip_addr4 = cb->args[2]; + u32 iter_bkt, iter_chain, iter_addr4 = 0, iter_addr6 = 0; struct netlbl_unlhsh_iface *iface; struct list_head *iter_list; struct netlbl_af4list *addr4; #if IS_ENABLED(CONFIG_IPV6) + u32 skip_addr6 = cb->args[3]; struct netlbl_af6list *addr6; #endif @@ -1182,7 +1183,7 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb, rcu_read_lock(); for (iter_bkt = skip_bkt; iter_bkt < rcu_dereference(netlbl_unlhsh)->size; - iter_bkt++, iter_chain = 0, iter_addr4 = 0, iter_addr6 = 0) { + iter_bkt++) { iter_list = &rcu_dereference(netlbl_unlhsh)->tbl[iter_bkt]; list_for_each_entry_rcu(iface, iter_list, list) { if (!iface->valid || @@ -1190,7 +1191,7 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb, continue; netlbl_af4list_foreach_rcu(addr4, &iface->addr4_list) { - if (iter_addr4++ < cb->args[2]) + if (iter_addr4++ < skip_addr4) continue; if (netlbl_unlabel_staticlist_gen( NLBL_UNLABEL_C_STATICLIST, @@ -1203,10 +1204,12 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb, goto unlabel_staticlist_return; } } + iter_addr4 = 0; + skip_addr4 = 0; #if IS_ENABLED(CONFIG_IPV6) netlbl_af6list_foreach_rcu(addr6, &iface->addr6_list) { - if (iter_addr6++ < cb->args[3]) + if (iter_addr6++ < skip_addr6) continue; if (netlbl_unlabel_staticlist_gen( NLBL_UNLABEL_C_STATICLIST, @@ -1219,8 +1222,12 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb, goto unlabel_staticlist_return; } } + iter_addr6 = 0; + skip_addr6 = 0; #endif /* IPv6 */ } + iter_chain = 0; + skip_chain = 0; } unlabel_staticlist_return: -- cgit v1.2.3 From 909172a149749242990a6e64cb55d55460d4e417 Mon Sep 17 00:00:00 2001 From: Mao Wenan Date: Tue, 10 Nov 2020 08:16:31 +0800 Subject: net: Update window_clamp if SOCK_RCVBUF is set When net.ipv4.tcp_syncookies=1 and syn flood is happened, cookie_v4_check or cookie_v6_check tries to redo what tcp_v4_send_synack or tcp_v6_send_synack did, rsk_window_clamp will be changed if SOCK_RCVBUF is set, which will make rcv_wscale is different, the client still operates with initial window scale and can overshot granted window, the client use the initial scale but local server use new scale to advertise window value, and session work abnormally. Fixes: e88c64f0a425 ("tcp: allow effective reduction of TCP's rcv-buffer via setsockopt") Signed-off-by: Mao Wenan Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/r/1604967391-123737-1-git-send-email-wenan.mao@linux.alibaba.com Signed-off-by: Jakub Kicinski --- net/ipv4/syncookies.c | 9 +++++++-- net/ipv6/syncookies.c | 10 ++++++++-- 2 files changed, 15 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 6ac473b47f30..00dc3f943c80 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -331,7 +331,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) __u32 cookie = ntohl(th->ack_seq) - 1; struct sock *ret = sk; struct request_sock *req; - int mss; + int full_space, mss; struct rtable *rt; __u8 rcv_wscale; struct flowi4 fl4; @@ -427,8 +427,13 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) /* Try to redo what tcp_v4_send_synack did. */ req->rsk_window_clamp = tp->window_clamp ? :dst_metric(&rt->dst, RTAX_WINDOW); + /* limit the window selection if the user enforce a smaller rx buffer */ + full_space = tcp_full_space(sk); + if (sk->sk_userlocks & SOCK_RCVBUF_LOCK && + (req->rsk_window_clamp > full_space || req->rsk_window_clamp == 0)) + req->rsk_window_clamp = full_space; - tcp_select_initial_window(sk, tcp_full_space(sk), req->mss, + tcp_select_initial_window(sk, full_space, req->mss, &req->rsk_rcv_wnd, &req->rsk_window_clamp, ireq->wscale_ok, &rcv_wscale, dst_metric(&rt->dst, RTAX_INITRWND)); diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index e796a64be308..9b6cae1e49d9 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -136,7 +136,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) __u32 cookie = ntohl(th->ack_seq) - 1; struct sock *ret = sk; struct request_sock *req; - int mss; + int full_space, mss; struct dst_entry *dst; __u8 rcv_wscale; u32 tsoff = 0; @@ -241,7 +241,13 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) } req->rsk_window_clamp = tp->window_clamp ? :dst_metric(dst, RTAX_WINDOW); - tcp_select_initial_window(sk, tcp_full_space(sk), req->mss, + /* limit the window selection if the user enforce a smaller rx buffer */ + full_space = tcp_full_space(sk); + if (sk->sk_userlocks & SOCK_RCVBUF_LOCK && + (req->rsk_window_clamp > full_space || req->rsk_window_clamp == 0)) + req->rsk_window_clamp = full_space; + + tcp_select_initial_window(sk, full_space, req->mss, &req->rsk_rcv_wnd, &req->rsk_window_clamp, ireq->wscale_ok, &rcv_wscale, dst_metric(dst, RTAX_INITRWND)); -- cgit v1.2.3 From 4031eeafa71eaf22ae40a15606a134ae86345daf Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Mon, 9 Nov 2020 08:57:05 +0100 Subject: net/af_iucv: fix null pointer dereference on shutdown syzbot reported the following KASAN finding: BUG: KASAN: nullptr-dereference in iucv_send_ctrl+0x390/0x3f0 net/iucv/af_iucv.c:385 Read of size 2 at addr 000000000000021e by task syz-executor907/519 CPU: 0 PID: 519 Comm: syz-executor907 Not tainted 5.9.0-syzkaller-07043-gbcf9877ad213 #0 Hardware name: IBM 3906 M04 701 (KVM/Linux) Call Trace: [<00000000c576af60>] unwind_start arch/s390/include/asm/unwind.h:65 [inline] [<00000000c576af60>] show_stack+0x180/0x228 arch/s390/kernel/dumpstack.c:135 [<00000000c9dcd1f8>] __dump_stack lib/dump_stack.c:77 [inline] [<00000000c9dcd1f8>] dump_stack+0x268/0x2f0 lib/dump_stack.c:118 [<00000000c5fed016>] print_address_description.constprop.0+0x5e/0x218 mm/kasan/report.c:383 [<00000000c5fec82a>] __kasan_report mm/kasan/report.c:517 [inline] [<00000000c5fec82a>] kasan_report+0x11a/0x168 mm/kasan/report.c:534 [<00000000c98b5b60>] iucv_send_ctrl+0x390/0x3f0 net/iucv/af_iucv.c:385 [<00000000c98b6262>] iucv_sock_shutdown+0x44a/0x4c0 net/iucv/af_iucv.c:1457 [<00000000c89d3a54>] __sys_shutdown+0x12c/0x1c8 net/socket.c:2204 [<00000000c89d3b70>] __do_sys_shutdown net/socket.c:2212 [inline] [<00000000c89d3b70>] __s390x_sys_shutdown+0x38/0x48 net/socket.c:2210 [<00000000c9e36eac>] system_call+0xe0/0x28c arch/s390/kernel/entry.S:415 There is nothing to shutdown if a connection has never been established. Besides that iucv->hs_dev is not yet initialized if a socket is in IUCV_OPEN state and iucv->path is not yet initialized if socket is in IUCV_BOUND state. So, just skip the shutdown calls for a socket in these states. Fixes: eac3731bd04c ("[S390]: Add AF_IUCV socket support") Fixes: 82492a355fac ("af_iucv: add shutdown for HS transport") Reviewed-by: Vasily Gorbik Signed-off-by: Ursula Braun [jwi: correct one Fixes tag] Signed-off-by: Julian Wiedmann Signed-off-by: Jakub Kicinski --- net/iucv/af_iucv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index d80572074667..047238f01ba6 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -1434,7 +1434,8 @@ static int iucv_sock_shutdown(struct socket *sock, int how) break; } - if (how == SEND_SHUTDOWN || how == SHUTDOWN_MASK) { + if ((how == SEND_SHUTDOWN || how == SHUTDOWN_MASK) && + sk->sk_state == IUCV_CONNECTED) { if (iucv->transport == AF_IUCV_TRANS_IUCV) { txmsg.class = 0; txmsg.tag = 0; -- cgit v1.2.3 From fa6882c63621821f73cc806f291208e1c6ea6187 Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Mon, 9 Nov 2020 22:09:13 +0800 Subject: tipc: fix memory leak in tipc_topsrv_start() kmemleak report a memory leak as follows: unreferenced object 0xffff88810a596800 (size 512): comm "ip", pid 21558, jiffies 4297568990 (age 112.120s) hex dump (first 32 bytes): 00 00 00 00 ad 4e ad de ff ff ff ff 00 00 00 00 .....N.......... ff ff ff ff ff ff ff ff 00 83 60 b0 ff ff ff ff ..........`..... backtrace: [<0000000022bbe21f>] tipc_topsrv_init_net+0x1f3/0xa70 [<00000000fe15ddf7>] ops_init+0xa8/0x3c0 [<00000000138af6f2>] setup_net+0x2de/0x7e0 [<000000008c6807a3>] copy_net_ns+0x27d/0x530 [<000000006b21adbd>] create_new_namespaces+0x382/0xa30 [<00000000bb169746>] unshare_nsproxy_namespaces+0xa1/0x1d0 [<00000000fe2e42bc>] ksys_unshare+0x39c/0x780 [<0000000009ba3b19>] __x64_sys_unshare+0x2d/0x40 [<00000000614ad866>] do_syscall_64+0x56/0xa0 [<00000000a1b5ca3c>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 'srv' is malloced in tipc_topsrv_start() but not free before leaving from the error handling cases. We need to free it. Fixes: 5c45ab24ac77 ("tipc: make struct tipc_server private for server.c") Reported-by: Hulk Robot Signed-off-by: Wang Hai Link: https://lore.kernel.org/r/20201109140913.47370-1-wanghai38@huawei.com Signed-off-by: Jakub Kicinski --- net/tipc/topsrv.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c index 5f6f86051c83..13f3143609f9 100644 --- a/net/tipc/topsrv.c +++ b/net/tipc/topsrv.c @@ -664,12 +664,18 @@ static int tipc_topsrv_start(struct net *net) ret = tipc_topsrv_work_start(srv); if (ret < 0) - return ret; + goto err_start; ret = tipc_topsrv_create_listener(srv); if (ret < 0) - tipc_topsrv_work_stop(srv); + goto err_create; + return 0; + +err_create: + tipc_topsrv_work_stop(srv); +err_start: + kfree(srv); return ret; } -- cgit v1.2.3 From 361182308766a265b6c521879b34302617a8c209 Mon Sep 17 00:00:00 2001 From: Martin Schiller Date: Mon, 9 Nov 2020 07:54:49 +0100 Subject: net/x25: Fix null-ptr-deref in x25_connect This fixes a regression for blocking connects introduced by commit 4becb7ee5b3d ("net/x25: Fix x25_neigh refcnt leak when x25 disconnect"). The x25->neighbour is already set to "NULL" by x25_disconnect() now, while a blocking connect is waiting in x25_wait_for_connection_establishment(). Therefore x25->neighbour must not be accessed here again and x25->state is also already set to X25_STATE_0 by x25_disconnect(). Fixes: 4becb7ee5b3d ("net/x25: Fix x25_neigh refcnt leak when x25 disconnect") Signed-off-by: Martin Schiller Reviewed-by: Xie He Link: https://lore.kernel.org/r/20201109065449.9014-1-ms@dev.tdt.de Signed-off-by: Jakub Kicinski --- net/x25/af_x25.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 0bbb283f23c9..046d3fee66a9 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -825,7 +825,7 @@ static int x25_connect(struct socket *sock, struct sockaddr *uaddr, sock->state = SS_CONNECTED; rc = 0; out_put_neigh: - if (rc) { + if (rc && x25->neighbour) { read_lock_bh(&x25_list_lock); x25_neigh_put(x25->neighbour); x25->neighbour = NULL; -- cgit v1.2.3 From 9f73bd1c2c4c304b238051fc92b3f807326f0a89 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Wed, 11 Nov 2020 05:47:44 +0200 Subject: devlink: Avoid overwriting port attributes of registered port Cited commit in fixes tag overwrites the port attributes for the registered port. Avoid such error by checking registered flag before setting attributes. Fixes: 71ad8d55f8e5 ("devlink: Replace devlink_port_attrs_set parameters with a struct") Signed-off-by: Parav Pandit Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20201111034744.35554-1-parav@nvidia.com Signed-off-by: Jakub Kicinski --- net/core/devlink.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/devlink.c b/net/core/devlink.c index a932d95be798..ab4b1368904f 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -8254,8 +8254,6 @@ static int __devlink_port_attrs_set(struct devlink_port *devlink_port, { struct devlink_port_attrs *attrs = &devlink_port->attrs; - if (WARN_ON(devlink_port->registered)) - return -EEXIST; devlink_port->attrs_set = true; attrs->flavour = flavour; if (attrs->switch_id.id_len) { @@ -8279,6 +8277,8 @@ void devlink_port_attrs_set(struct devlink_port *devlink_port, { int ret; + if (WARN_ON(devlink_port->registered)) + return; devlink_port->attrs = *attrs; ret = __devlink_port_attrs_set(devlink_port, attrs->flavour); if (ret) @@ -8301,6 +8301,8 @@ void devlink_port_attrs_pci_pf_set(struct devlink_port *devlink_port, u32 contro struct devlink_port_attrs *attrs = &devlink_port->attrs; int ret; + if (WARN_ON(devlink_port->registered)) + return; ret = __devlink_port_attrs_set(devlink_port, DEVLINK_PORT_FLAVOUR_PCI_PF); if (ret) @@ -8326,6 +8328,8 @@ void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port, u32 contro struct devlink_port_attrs *attrs = &devlink_port->attrs; int ret; + if (WARN_ON(devlink_port->registered)) + return; ret = __devlink_port_attrs_set(devlink_port, DEVLINK_PORT_FLAVOUR_PCI_VF); if (ret) -- cgit v1.2.3 From 4b1a86281cc1d0de46df3ad2cb8c1f86ac07681c Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Wed, 11 Nov 2020 20:45:25 +0000 Subject: net: udp: fix UDP header access on Fast/frag0 UDP GRO UDP GRO uses udp_hdr(skb) in its .gro_receive() callback. While it's probably OK for non-frag0 paths (when all headers or even the entire frame are already in skb head), this inline points to junk when using Fast GRO (napi_gro_frags() or napi_gro_receive() with only Ethernet header in skb head and all the rest in the frags) and breaks GRO packet compilation and the packet flow itself. To support both modes, skb_gro_header_fast() + skb_gro_header_slow() are typically used. UDP even has an inline helper that makes use of them, udp_gro_udphdr(). Use that instead of troublemaking udp_hdr() to get rid of the out-of-order delivers. Present since the introduction of plain UDP GRO in 5.0-rc1. Fixes: e20cf8d3f1f7 ("udp: implement GRO for plain UDP sockets.") Cc: Eric Dumazet Signed-off-by: Alexander Lobakin Acked-by: Willem de Bruijn Signed-off-by: Jakub Kicinski --- net/ipv4/udp_offload.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index e67a66fbf27b..13740e9fe6ec 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -366,7 +366,7 @@ out: static struct sk_buff *udp_gro_receive_segment(struct list_head *head, struct sk_buff *skb) { - struct udphdr *uh = udp_hdr(skb); + struct udphdr *uh = udp_gro_udphdr(skb); struct sk_buff *pp = NULL; struct udphdr *uh2; struct sk_buff *p; -- cgit v1.2.3 From 55e729889bb07d68ab071660ce3f5e7a7872ebe8 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Wed, 11 Nov 2020 20:45:38 +0000 Subject: net: udp: fix IP header access and skb lookup on Fast/frag0 UDP GRO udp{4,6}_lib_lookup_skb() use ip{,v6}_hdr() to get IP header of the packet. While it's probably OK for non-frag0 paths, this helpers will also point to junk on Fast/frag0 GRO when all headers are located in frags. As a result, sk/skb lookup may fail or give wrong results. To support both GRO modes, skb_gro_network_header() might be used. To not modify original functions, add private versions of udp{4,6}_lib_lookup_skb() only to perform correct sk lookups on GRO. Present since the introduction of "application-level" UDP GRO in 4.7-rc1. Misc: replace totally unneeded ternaries with plain ifs. Fixes: a6024562ffd7 ("udp: Add GRO functions to UDP socket") Suggested-by: Willem de Bruijn Cc: Eric Dumazet Signed-off-by: Alexander Lobakin Acked-by: Willem de Bruijn Signed-off-by: Jakub Kicinski --- net/ipv4/udp_offload.c | 17 +++++++++++++++-- net/ipv6/udp_offload.c | 17 +++++++++++++++-- 2 files changed, 30 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 13740e9fe6ec..c62805cd3131 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -500,12 +500,22 @@ out: } EXPORT_SYMBOL(udp_gro_receive); +static struct sock *udp4_gro_lookup_skb(struct sk_buff *skb, __be16 sport, + __be16 dport) +{ + const struct iphdr *iph = skb_gro_network_header(skb); + + return __udp4_lib_lookup(dev_net(skb->dev), iph->saddr, sport, + iph->daddr, dport, inet_iif(skb), + inet_sdif(skb), &udp_table, NULL); +} + INDIRECT_CALLABLE_SCOPE struct sk_buff *udp4_gro_receive(struct list_head *head, struct sk_buff *skb) { struct udphdr *uh = udp_gro_udphdr(skb); + struct sock *sk = NULL; struct sk_buff *pp; - struct sock *sk; if (unlikely(!uh)) goto flush; @@ -523,7 +533,10 @@ struct sk_buff *udp4_gro_receive(struct list_head *head, struct sk_buff *skb) skip: NAPI_GRO_CB(skb)->is_ipv6 = 0; rcu_read_lock(); - sk = static_branch_unlikely(&udp_encap_needed_key) ? udp4_lib_lookup_skb(skb, uh->source, uh->dest) : NULL; + + if (static_branch_unlikely(&udp_encap_needed_key)) + sk = udp4_gro_lookup_skb(skb, uh->source, uh->dest); + pp = udp_gro_receive(head, skb, uh, sk); rcu_read_unlock(); return pp; diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index 584157a07759..f9e888d1b9af 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -111,12 +111,22 @@ out: return segs; } +static struct sock *udp6_gro_lookup_skb(struct sk_buff *skb, __be16 sport, + __be16 dport) +{ + const struct ipv6hdr *iph = skb_gro_network_header(skb); + + return __udp6_lib_lookup(dev_net(skb->dev), &iph->saddr, sport, + &iph->daddr, dport, inet6_iif(skb), + inet6_sdif(skb), &udp_table, NULL); +} + INDIRECT_CALLABLE_SCOPE struct sk_buff *udp6_gro_receive(struct list_head *head, struct sk_buff *skb) { struct udphdr *uh = udp_gro_udphdr(skb); + struct sock *sk = NULL; struct sk_buff *pp; - struct sock *sk; if (unlikely(!uh)) goto flush; @@ -135,7 +145,10 @@ struct sk_buff *udp6_gro_receive(struct list_head *head, struct sk_buff *skb) skip: NAPI_GRO_CB(skb)->is_ipv6 = 1; rcu_read_lock(); - sk = static_branch_unlikely(&udpv6_encap_needed_key) ? udp6_lib_lookup_skb(skb, uh->source, uh->dest) : NULL; + + if (static_branch_unlikely(&udpv6_encap_needed_key)) + sk = udp6_gro_lookup_skb(skb, uh->source, uh->dest); + pp = udp_gro_receive(head, skb, uh, sk); rcu_read_unlock(); return pp; -- cgit v1.2.3