From bcf141b2eb551b3477b24997ebc09c65f117a803 Mon Sep 17 00:00:00 2001 From: Ghalem Boudour Date: Fri, 19 Nov 2021 18:20:16 +0100 Subject: xfrm: fix policy lookup for ipv6 gre packets On egress side, xfrm lookup is called from __gre6_xmit() with the fl6_gre_key field not initialized leading to policies selectors check failure. Consequently, gre packets are sent without encryption. On ingress side, INET6_PROTO_NOPOLICY was set, thus packets were not checked against xfrm policies. Like for egress side, fl6_gre_key should be correctly set, this is now done in decode_session6(). Fixes: c12b395a4664 ("gre: Support GRE over IPv6") Cc: stable@vger.kernel.org Signed-off-by: Ghalem Boudour Signed-off-by: Nicolas Dichtel Signed-off-by: Steffen Klassert --- net/ipv6/ip6_gre.c | 5 ++++- net/xfrm/xfrm_policy.c | 21 +++++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index d831d2439693..f5a511c57aa2 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -755,6 +755,7 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb, fl6->daddr = key->u.ipv6.dst; fl6->flowlabel = key->label; fl6->flowi6_uid = sock_net_uid(dev_net(dev), NULL); + fl6->fl6_gre_key = tunnel_id_to_key32(key->tun_id); dsfield = key->tos; flags = key->tun_flags & @@ -990,6 +991,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, fl6.daddr = key->u.ipv6.dst; fl6.flowlabel = key->label; fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); + fl6.fl6_gre_key = tunnel_id_to_key32(key->tun_id); dsfield = key->tos; if (!(tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT)) @@ -1098,6 +1100,7 @@ static void ip6gre_tnl_link_config_common(struct ip6_tnl *t) fl6->flowi6_oif = p->link; fl6->flowlabel = 0; fl6->flowi6_proto = IPPROTO_GRE; + fl6->fl6_gre_key = t->parms.o_key; if (!(p->flags&IP6_TNL_F_USE_ORIG_TCLASS)) fl6->flowlabel |= IPV6_TCLASS_MASK & p->flowinfo; @@ -1544,7 +1547,7 @@ static void ip6gre_fb_tunnel_init(struct net_device *dev) static struct inet6_protocol ip6gre_protocol __read_mostly = { .handler = gre_rcv, .err_handler = ip6gre_err, - .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, + .flags = INET6_PROTO_FINAL, }; static void ip6gre_destroy_tunnels(struct net *net, struct list_head *head) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 1a06585022ab..84d2361da015 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -33,6 +33,7 @@ #include #include #include +#include #if IS_ENABLED(CONFIG_IPV6_MIP6) #include #endif @@ -3422,6 +3423,26 @@ decode_session6(struct sk_buff *skb, struct flowi *fl, bool reverse) } fl6->flowi6_proto = nexthdr; return; + case IPPROTO_GRE: + if (!onlyproto && + (nh + offset + 12 < skb->data || + pskb_may_pull(skb, nh + offset + 12 - skb->data))) { + struct gre_base_hdr *gre_hdr; + __be32 *gre_key; + + nh = skb_network_header(skb); + gre_hdr = (struct gre_base_hdr *)(nh + offset); + gre_key = (__be32 *)(gre_hdr + 1); + + if (gre_hdr->flags & GRE_KEY) { + if (gre_hdr->flags & GRE_CSUM) + gre_key++; + fl6->fl6_gre_key = *gre_key; + } + } + fl6->flowi6_proto = nexthdr; + return; + #if IS_ENABLED(CONFIG_IPV6_MIP6) case IPPROTO_MH: offset += ipv6_optlen(exthdr); -- cgit v1.2.3 From 7770a39d7c63faec6c4f33666d49a8cb664d0482 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 8 Dec 2021 12:20:19 -0800 Subject: xfrm: fix a small bug in xfrm_sa_len() copy_user_offload() will actually push a struct struct xfrm_user_offload, which is different than (struct xfrm_state *)->xso (struct xfrm_state_offload) Fixes: d77e38e612a01 ("xfrm: Add an IPsec hardware offloading API") Signed-off-by: Eric Dumazet Cc: Steffen Klassert Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_user.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 7c36cc1f3d79..0a2d2bae2831 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -3058,7 +3058,7 @@ static inline unsigned int xfrm_sa_len(struct xfrm_state *x) if (x->props.extra_flags) l += nla_total_size(sizeof(x->props.extra_flags)); if (x->xso.dev) - l += nla_total_size(sizeof(x->xso)); + l += nla_total_size(sizeof(struct xfrm_user_offload)); if (x->props.smark.v | x->props.smark.m) { l += nla_total_size(sizeof(x->props.smark.v)); l += nla_total_size(sizeof(x->props.smark.m)); -- cgit v1.2.3 From 8dce43919566f06e865f7e8949f5c10d8c2493f5 Mon Sep 17 00:00:00 2001 From: Antony Antony Date: Sun, 12 Dec 2021 11:34:30 +0100 Subject: xfrm: interface with if_id 0 should return error xfrm interface if_id = 0 would cause xfrm policy lookup errors since Commit 9f8550e4bd9d. Now explicitly fail to create an xfrm interface when if_id = 0 With this commit: ip link add ipsec0 type xfrm dev lo if_id 0 Error: if_id must be non zero. v1->v2 change: - add Fixes: tag Fixes: 9f8550e4bd9d ("xfrm: fix disable_xfrm sysctl when used on xfrm interfaces") Signed-off-by: Antony Antony Reviewed-by: Eyal Birger Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_interface.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c index 41de46b5ffa9..57448fc519fc 100644 --- a/net/xfrm/xfrm_interface.c +++ b/net/xfrm/xfrm_interface.c @@ -637,11 +637,16 @@ static int xfrmi_newlink(struct net *src_net, struct net_device *dev, struct netlink_ext_ack *extack) { struct net *net = dev_net(dev); - struct xfrm_if_parms p; + struct xfrm_if_parms p = {}; struct xfrm_if *xi; int err; xfrmi_netlink_parms(data, &p); + if (!p.if_id) { + NL_SET_ERR_MSG(extack, "if_id must be non zero"); + return -EINVAL; + } + xi = xfrmi_locate(net, &p); if (xi) return -EEXIST; @@ -666,7 +671,12 @@ static int xfrmi_changelink(struct net_device *dev, struct nlattr *tb[], { struct xfrm_if *xi = netdev_priv(dev); struct net *net = xi->net; - struct xfrm_if_parms p; + struct xfrm_if_parms p = {}; + + if (!p.if_id) { + NL_SET_ERR_MSG(extack, "if_id must be non zero"); + return -EINVAL; + } xfrmi_netlink_parms(data, &p); xi = xfrmi_locate(net, &p); -- cgit v1.2.3 From 68ac0f3810e76a853b5f7b90601a05c3048b8b54 Mon Sep 17 00:00:00 2001 From: Antony Antony Date: Sun, 12 Dec 2021 11:35:00 +0100 Subject: xfrm: state and policy should fail if XFRMA_IF_ID 0 xfrm ineterface does not allow xfrm if_id = 0 fail to create or update xfrm state and policy. With this commit: ip xfrm policy add src 192.0.2.1 dst 192.0.2.2 dir out if_id 0 RTNETLINK answers: Invalid argument ip xfrm state add src 192.0.2.1 dst 192.0.2.2 proto esp spi 1 \ reqid 1 mode tunnel aead 'rfc4106(gcm(aes))' \ 0x1111111111111111111111111111111111111111 96 if_id 0 RTNETLINK answers: Invalid argument v1->v2 change: - add Fixes: tag Fixes: 9f8550e4bd9d ("xfrm: fix disable_xfrm sysctl when used on xfrm interfaces") Signed-off-by: Antony Antony Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_user.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 0a2d2bae2831..b571892fe10f 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -621,8 +621,13 @@ static struct xfrm_state *xfrm_state_construct(struct net *net, xfrm_smark_init(attrs, &x->props.smark); - if (attrs[XFRMA_IF_ID]) + if (attrs[XFRMA_IF_ID]) { x->if_id = nla_get_u32(attrs[XFRMA_IF_ID]); + if (!x->if_id) { + err = -EINVAL; + goto error; + } + } err = __xfrm_init_state(x, false, attrs[XFRMA_OFFLOAD_DEV]); if (err) @@ -1413,8 +1418,13 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, mark = xfrm_mark_get(attrs, &m); - if (attrs[XFRMA_IF_ID]) + if (attrs[XFRMA_IF_ID]) { if_id = nla_get_u32(attrs[XFRMA_IF_ID]); + if (!if_id) { + err = -EINVAL; + goto out_noput; + } + } if (p->info.seq) { x = xfrm_find_acq_byseq(net, mark, p->info.seq); @@ -1727,8 +1737,13 @@ static struct xfrm_policy *xfrm_policy_construct(struct net *net, struct xfrm_us xfrm_mark_get(attrs, &xp->mark); - if (attrs[XFRMA_IF_ID]) + if (attrs[XFRMA_IF_ID]) { xp->if_id = nla_get_u32(attrs[XFRMA_IF_ID]); + if (!xp->if_id) { + err = -EINVAL; + goto error; + } + } return xp; error: -- cgit v1.2.3 From 45a98ef4922def8c679ca7c454403d1957fe70e7 Mon Sep 17 00:00:00 2001 From: Raed Salem Date: Mon, 3 Jan 2022 13:19:29 +0200 Subject: net/xfrm: IPsec tunnel mode fix inner_ipproto setting in sec_path The inner_ipproto saves the inner IP protocol of the plain text packet. This allows vendor's IPsec feature making offload decision at skb's features_check and configuring hardware at ndo_start_xmit, current code implenetation did not handle the case where IPsec is used in tunnel mode. Fix by handling the case when IPsec is used in tunnel mode by reading the protocol of the plain text packet IP protocol. Fixes: fa4535238fb5 ("net/xfrm: Add inner_ipproto into sec_path") Signed-off-by: Raed Salem Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_output.c | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 229544bc70c2..4dc4a7bbe51c 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -647,10 +647,12 @@ static int xfrm_output_gso(struct net *net, struct sock *sk, struct sk_buff *skb * This requires hardware to know the inner packet type to calculate * the inner header checksum. Save inner ip protocol here to avoid * traversing the packet in the vendor's xmit code. - * If the encap type is IPIP, just save skb->inner_ipproto. Otherwise, - * get the ip protocol from the IP header. + * For IPsec tunnel mode save the ip protocol from the IP header of the + * plain text packet. Otherwise If the encap type is IPIP, just save + * skb->inner_ipproto in any other case get the ip protocol from the IP + * header. */ -static void xfrm_get_inner_ipproto(struct sk_buff *skb) +static void xfrm_get_inner_ipproto(struct sk_buff *skb, struct xfrm_state *x) { struct xfrm_offload *xo = xfrm_offload(skb); const struct ethhdr *eth; @@ -658,6 +660,25 @@ static void xfrm_get_inner_ipproto(struct sk_buff *skb) if (!xo) return; + if (x->outer_mode.encap == XFRM_MODE_TUNNEL) { + switch (x->outer_mode.family) { + case AF_INET: + xo->inner_ipproto = ip_hdr(skb)->protocol; + break; + case AF_INET6: + xo->inner_ipproto = ipv6_hdr(skb)->nexthdr; + break; + default: + break; + } + + return; + } + + /* non-Tunnel Mode */ + if (!skb->encapsulation) + return; + if (skb->inner_protocol_type == ENCAP_TYPE_IPPROTO) { xo->inner_ipproto = skb->inner_ipproto; return; @@ -712,8 +733,7 @@ int xfrm_output(struct sock *sk, struct sk_buff *skb) sp->xvec[sp->len++] = x; xfrm_state_hold(x); - if (skb->encapsulation) - xfrm_get_inner_ipproto(skb); + xfrm_get_inner_ipproto(skb, x); skb->encapsulation = 1; if (skb_is_gso(skb)) { -- cgit v1.2.3 From d94a69cb2cfa77294921aae9afcfb866e723a2da Mon Sep 17 00:00:00 2001 From: Xin Xiong Date: Thu, 23 Dec 2021 10:48:12 +0800 Subject: netfilter: ipt_CLUSTERIP: fix refcount leak in clusterip_tg_check() The issue takes place in one error path of clusterip_tg_check(). When memcmp() returns nonzero, the function simply returns the error code, forgetting to decrease the reference count of a clusterip_config object, which is bumped earlier by clusterip_config_find_get(). This may incur reference count leak. Fix this issue by decrementing the refcount of the object in specific error path. Fixes: 06aa151ad1fc74 ("netfilter: ipt_CLUSTERIP: check MAC address when duplicate config is set") Signed-off-by: Xin Xiong Signed-off-by: Xiyu Yang Signed-off-by: Xin Tan Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/ipt_CLUSTERIP.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 8fd1aba8af31..b518f20c9a24 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -520,8 +520,11 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par) if (IS_ERR(config)) return PTR_ERR(config); } - } else if (memcmp(&config->clustermac, &cipinfo->clustermac, ETH_ALEN)) + } else if (memcmp(&config->clustermac, &cipinfo->clustermac, ETH_ALEN)) { + clusterip_config_entry_put(config); + clusterip_config_put(config); return -EINVAL; + } ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) { -- cgit v1.2.3 From 5f33a09e769a9da0482f20a6770a342842443776 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Wed, 5 Jan 2022 14:01:12 +0100 Subject: can: isotp: convert struct tpcon::{idx,len} to unsigned int In isotp_rcv_ff() 32 bit of data received over the network is assigned to struct tpcon::len. Later in that function the length is checked for the maximal supported length against MAX_MSG_LENGTH. As struct tpcon::len is an "int" this check does not work, if the provided length overflows the "int". Later on struct tpcon::idx is compared against struct tpcon::len. To fix this problem this patch converts both struct tpcon::{idx,len} to unsigned int. Fixes: e057dd3fc20f ("can: add ISO 15765-2:2016 transport protocol") Link: https://lore.kernel.org/all/20220105132429.1170627-1-mkl@pengutronix.de Cc: stable@vger.kernel.org Acked-by: Oliver Hartkopp Reported-by: syzbot+4c63f36709a642f801c5@syzkaller.appspotmail.com Signed-off-by: Marc Kleine-Budde --- net/can/isotp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/can/isotp.c b/net/can/isotp.c index df6968b28bf4..02cbcb2ecf0d 100644 --- a/net/can/isotp.c +++ b/net/can/isotp.c @@ -119,8 +119,8 @@ enum { }; struct tpcon { - int idx; - int len; + unsigned int idx; + unsigned int len; u32 state; u8 bs; u8 sn; -- cgit v1.2.3 From 4e1860a3863707e8177329c006d10f9e37e097a8 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 5 Jan 2022 16:09:57 +0100 Subject: netfilter: nft_payload: do not update layer 4 checksum when mangling fragments IP fragments do not come with the transport header, hence skip bogus layer 4 checksum updates. Fixes: 1814096980bb ("netfilter: nft_payload: layer 4 checksum adjustment for pseudoheader fields") Reported-and-tested-by: Steffen Weinreich Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_payload.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index bd689938a2e0..58e96a0fe0b4 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -546,6 +546,9 @@ static int nft_payload_l4csum_offset(const struct nft_pktinfo *pkt, struct sk_buff *skb, unsigned int *l4csum_offset) { + if (pkt->fragoff) + return -1; + switch (pkt->tprot) { case IPPROTO_TCP: *l4csum_offset = offsetof(struct tcphdr, check); -- cgit v1.2.3 From 23c54263efd7cb605e2f7af72717a2a951999217 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 5 Jan 2022 14:19:54 +0100 Subject: netfilter: nft_set_pipapo: allocate pcpu scratch maps on clone This is needed in case a new transaction is made that doesn't insert any new elements into an already existing set. Else, after second 'nft -f ruleset.txt', lookups in such a set will fail because ->lookup() encounters raw_cpu_ptr(m->scratch) == NULL. For the initial rule load, insertion of elements takes care of the allocation, but for rule reloads this isn't guaranteed: we might not have additions to the set. Fixes: 3c4287f62044a90e ("nf_tables: Add set type for arbitrary concatenation of ranges") Reported-by: etkaar Signed-off-by: Florian Westphal Reviewed-by: Stefano Brivio Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_set_pipapo.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'net') diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index dce866d93fee..2c8051d8cca6 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -1290,6 +1290,11 @@ static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old) if (!new->scratch_aligned) goto out_scratch; #endif + for_each_possible_cpu(i) + *per_cpu_ptr(new->scratch, i) = NULL; + + if (pipapo_realloc_scratch(new, old->bsize_max)) + goto out_scratch_realloc; rcu_head_init(&new->rcu); @@ -1334,6 +1339,9 @@ out_lt: kvfree(dst->lt); dst--; } +out_scratch_realloc: + for_each_possible_cpu(i) + kfree(*per_cpu_ptr(new->scratch, i)); #ifdef NFT_PIPAPO_ALIGN free_percpu(new->scratch_aligned); #endif -- cgit v1.2.3 From 36595d8ad46d9e4c41cc7c48c4405b7c3322deac Mon Sep 17 00:00:00 2001 From: Wen Gu Date: Thu, 6 Jan 2022 20:42:08 +0800 Subject: net/smc: Reset conn->lgr when link group registration fails SMC connections might fail to be registered in a link group due to unable to find a usable link during its creation. As a result, smc_conn_create() will return a failure and most resources related to the connection won't be applied or initialized, such as conn->abort_work or conn->lnk. If smc_conn_free() is invoked later, it will try to access the uninitialized resources related to the connection, thus causing a warning or crash. This patch tries to fix this by resetting conn->lgr to NULL if an abnormal exit occurs in smc_lgr_register_conn(), thus avoiding the access to uninitialized resources in smc_conn_free(). Meanwhile, the new created link group should be terminated if smc connections can't be registered in it. So smc_lgr_cleanup_early() is modified to take care of link group only and invoked to terminate unusable link group by smc_conn_create(). The call to smc_conn_free() is moved out from smc_lgr_cleanup_early() to smc_conn_abort(). Fixes: 56bc3b2094b4 ("net/smc: assign link to a new connection") Suggested-by: Karsten Graul Signed-off-by: Wen Gu Acked-by: Karsten Graul Signed-off-by: David S. Miller --- net/smc/af_smc.c | 8 +++++--- net/smc/smc_core.c | 12 +++++++----- net/smc/smc_core.h | 2 +- 3 files changed, 13 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 1c9289f56dc4..211cd91b6c40 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -632,10 +632,12 @@ static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code, static void smc_conn_abort(struct smc_sock *smc, int local_first) { + struct smc_connection *conn = &smc->conn; + struct smc_link_group *lgr = conn->lgr; + + smc_conn_free(conn); if (local_first) - smc_lgr_cleanup_early(&smc->conn); - else - smc_conn_free(&smc->conn); + smc_lgr_cleanup_early(lgr); } /* check if there is a rdma device available for this connection. */ diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index a6849362f4dd..c9a8092c4ac9 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -171,8 +171,10 @@ static int smc_lgr_register_conn(struct smc_connection *conn, bool first) if (!conn->lgr->is_smcd) { rc = smcr_lgr_conn_assign_link(conn, first); - if (rc) + if (rc) { + conn->lgr = NULL; return rc; + } } /* find a new alert_token_local value not yet used by some connection * in this link group @@ -622,15 +624,13 @@ int smcd_nl_get_lgr(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; } -void smc_lgr_cleanup_early(struct smc_connection *conn) +void smc_lgr_cleanup_early(struct smc_link_group *lgr) { - struct smc_link_group *lgr = conn->lgr; spinlock_t *lgr_lock; if (!lgr) return; - smc_conn_free(conn); smc_lgr_list_head(lgr, &lgr_lock); spin_lock_bh(lgr_lock); /* do not use this link group for new connections */ @@ -1832,8 +1832,10 @@ create: write_lock_bh(&lgr->conns_lock); rc = smc_lgr_register_conn(conn, true); write_unlock_bh(&lgr->conns_lock); - if (rc) + if (rc) { + smc_lgr_cleanup_early(lgr); goto out; + } } conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE; conn->local_tx_ctrl.len = SMC_WR_TX_SIZE; diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index d63b08274197..73d0c35d3eb7 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -468,7 +468,7 @@ static inline void smc_set_pci_values(struct pci_dev *pci_dev, struct smc_sock; struct smc_clc_msg_accept_confirm; -void smc_lgr_cleanup_early(struct smc_connection *conn); +void smc_lgr_cleanup_early(struct smc_link_group *lgr); void smc_lgr_terminate_sched(struct smc_link_group *lgr); void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport); void smcr_port_err(struct smc_ib_device *smcibdev, u8 ibport); -- cgit v1.2.3 From 04fac2cae9422a3401c172571afbcfdd58fa5c7e Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Thu, 6 Jan 2022 14:06:36 -0800 Subject: mptcp: fix opt size when sending DSS + MP_FAIL When these two options had to be sent -- which is not common -- the DSS size was not being taken into account in the remaining size. Additionally in this situation, the reported size was only the one of the MP_FAIL which can cause issue if at the end, we need to write more in the TCP options than previously said. Here we use a dedicated variable for MP_FAIL size to keep the WARN_ON_ONCE() just after. Fixes: c25aeb4e0953 ("mptcp: MP_FAIL suboption sending") Acked-and-tested-by: Geliang Tang Signed-off-by: Matthieu Baerts Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/options.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/mptcp/options.c b/net/mptcp/options.c index fe98e4f475ba..96c6efdd48bc 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -821,10 +821,13 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb, if (mptcp_established_options_mp(sk, skb, snd_data_fin, &opt_size, remaining, opts)) ret = true; else if (mptcp_established_options_dss(sk, skb, snd_data_fin, &opt_size, remaining, opts)) { + unsigned int mp_fail_size; + ret = true; - if (mptcp_established_options_mp_fail(sk, &opt_size, remaining, opts)) { - *size += opt_size; - remaining -= opt_size; + if (mptcp_established_options_mp_fail(sk, &mp_fail_size, + remaining - opt_size, opts)) { + *size += opt_size + mp_fail_size; + remaining -= opt_size - mp_fail_size; return true; } } -- cgit v1.2.3 From 110b6d1fe98fd7af9893992459b651594d789293 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Thu, 6 Jan 2022 14:06:37 -0800 Subject: mptcp: fix a DSS option writing error 'ptr += 1;' was omitted in the original code. If the DSS is the last option -- which is what we have most of the time -- that's not an issue. But it is if we need to send something else after like a RM_ADDR or an MP_PRIO. Fixes: 1bff1e43a30e ("mptcp: optimize out option generation") Reviewed-by: Matthieu Baerts Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/options.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 96c6efdd48bc..6661b1d6520f 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -1319,6 +1319,7 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, put_unaligned_be32(mpext->data_len << 16 | TCPOPT_NOP << 8 | TCPOPT_NOP, ptr); } + ptr += 1; } } else if (OPTIONS_MPTCP_MPC & opts->suboptions) { u8 len, flag = MPTCP_CAP_HMAC_SHA256; -- cgit v1.2.3 From 269bda9e7da48eafb599d01c96199caa2f7547e5 Mon Sep 17 00:00:00 2001 From: Mat Martineau Date: Thu, 6 Jan 2022 14:06:38 -0800 Subject: mptcp: Check reclaim amount before reducing allocation syzbot found a page counter underflow that was triggered by MPTCP's reclaim code: page_counter underflow: -4294964789 nr_pages=4294967295 WARNING: CPU: 2 PID: 3785 at mm/page_counter.c:56 page_counter_cancel+0xcf/0xe0 mm/page_counter.c:56 Modules linked in: CPU: 2 PID: 3785 Comm: kworker/2:6 Not tainted 5.16.0-rc1-syzkaller #0 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.14.0-2 04/01/2014 Workqueue: events mptcp_worker RIP: 0010:page_counter_cancel+0xcf/0xe0 mm/page_counter.c:56 Code: c7 04 24 00 00 00 00 45 31 f6 eb 97 e8 2a 2b b5 ff 4c 89 ea 48 89 ee 48 c7 c7 00 9e b8 89 c6 05 a0 c1 ba 0b 01 e8 95 e4 4b 07 <0f> 0b eb a8 4c 89 e7 e8 25 5a fb ff eb c7 0f 1f 00 41 56 41 55 49 RSP: 0018:ffffc90002d4f918 EFLAGS: 00010082 RAX: 0000000000000000 RBX: ffff88806a494120 RCX: 0000000000000000 RDX: ffff8880688c41c0 RSI: ffffffff815e8f28 RDI: fffff520005a9f15 RBP: ffffffff000009cb R08: 0000000000000000 R09: 0000000000000000 R10: ffffffff815e2cfe R11: 0000000000000000 R12: ffff88806a494120 R13: 00000000ffffffff R14: 0000000000000000 R15: 0000000000000001 FS: 0000000000000000(0000) GS:ffff88802cc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000001b2de21000 CR3: 000000005ad59000 CR4: 0000000000150ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: page_counter_uncharge+0x2e/0x60 mm/page_counter.c:160 drain_stock+0xc1/0x180 mm/memcontrol.c:2219 refill_stock+0x139/0x2f0 mm/memcontrol.c:2271 __sk_mem_reduce_allocated+0x24d/0x550 net/core/sock.c:2945 __mptcp_rmem_reclaim net/mptcp/protocol.c:167 [inline] __mptcp_mem_reclaim_partial+0x124/0x410 net/mptcp/protocol.c:975 mptcp_mem_reclaim_partial net/mptcp/protocol.c:982 [inline] mptcp_alloc_tx_skb net/mptcp/protocol.c:1212 [inline] mptcp_sendmsg_frag+0x18c6/0x2190 net/mptcp/protocol.c:1279 __mptcp_push_pending+0x232/0x720 net/mptcp/protocol.c:1545 mptcp_release_cb+0xfe/0x200 net/mptcp/protocol.c:2975 release_sock+0xb4/0x1b0 net/core/sock.c:3306 mptcp_worker+0x51e/0xc10 net/mptcp/protocol.c:2443 process_one_work+0x9b2/0x1690 kernel/workqueue.c:2298 worker_thread+0x658/0x11f0 kernel/workqueue.c:2445 kthread+0x405/0x4f0 kernel/kthread.c:327 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:295 __mptcp_mem_reclaim_partial() could call __mptcp_rmem_reclaim() with a negative value, which passed that negative value to __sk_mem_reduce_allocated() and triggered the splat above. Check for a reclaim amount that is positive and large enough for __mptcp_rmem_reclaim() to actually adjust rmem_fwd_alloc (much like the sk_mem_reclaim_partial() code the function is based on). v2: Use '>' instead of '>=', since SK_MEM_QUANTUM - 1 would get right-shifted into nothing by __mptcp_rmem_reclaim. Fixes: 6511882cdd82 ("mptcp: allocate fwd memory separately on the rx and tx path") Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/252 Reported-and-tested-by: syzbot+bc9e2d2dbcb347dd215a@syzkaller.appspotmail.com Cc: Andrew Morton Cc: Michal Hocko Acked-by: Paolo Abeni Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 54613f5b7521..0cd55e4c30fa 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -972,7 +972,9 @@ static void __mptcp_mem_reclaim_partial(struct sock *sk) lockdep_assert_held_once(&sk->sk_lock.slock); - __mptcp_rmem_reclaim(sk, reclaimable - 1); + if (reclaimable > SK_MEM_QUANTUM) + __mptcp_rmem_reclaim(sk, reclaimable - 1); + sk_mem_reclaim_partial(sk); } -- cgit v1.2.3 From 9371937092d5fd502032c1bb4475b36b39b1f1b3 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 7 Jan 2022 10:13:12 +0300 Subject: ax25: uninitialized variable in ax25_setsockopt() The "opt" variable is unsigned long but we only copy 4 bytes from the user so the lower 4 bytes are uninitialized. I have changed the integer overflow checks from ULONG to UINT as well. This is a slight API change but I don't expect it to break anything. Fixes: a7b75c5a8c41 ("net: pass a sockptr_t into ->setsockopt") Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- net/ax25/af_ax25.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index cfca99e295b8..02f43f3e2c56 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -536,7 +536,7 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname, ax25_cb *ax25; struct net_device *dev; char devname[IFNAMSIZ]; - unsigned long opt; + unsigned int opt; int res = 0; if (level != SOL_AX25) @@ -568,7 +568,7 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname, break; case AX25_T1: - if (opt < 1 || opt > ULONG_MAX / HZ) { + if (opt < 1 || opt > UINT_MAX / HZ) { res = -EINVAL; break; } @@ -577,7 +577,7 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname, break; case AX25_T2: - if (opt < 1 || opt > ULONG_MAX / HZ) { + if (opt < 1 || opt > UINT_MAX / HZ) { res = -EINVAL; break; } @@ -593,7 +593,7 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname, break; case AX25_T3: - if (opt < 1 || opt > ULONG_MAX / HZ) { + if (opt < 1 || opt > UINT_MAX / HZ) { res = -EINVAL; break; } @@ -601,7 +601,7 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname, break; case AX25_IDLE: - if (opt > ULONG_MAX / (60 * HZ)) { + if (opt > UINT_MAX / (60 * HZ)) { res = -EINVAL; break; } -- cgit v1.2.3 From dc35616e6c2907b0c0c391a205802d8880f7fd85 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 7 Jan 2022 10:12:10 +0300 Subject: netrom: fix api breakage in nr_setsockopt() This needs to copy an unsigned int from user space instead of a long to avoid breaking user space with an API change. I have updated all the integer overflow checks from ULONG to UINT as well. This is a slight API change but I do not expect it to affect anything in real life. Fixes: 3087a6f36ee0 ("netrom: fix copying in user data in nr_setsockopt") Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- net/netrom/af_netrom.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index f1ba7dd3d253..fa9dc2ba3941 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -298,7 +298,7 @@ static int nr_setsockopt(struct socket *sock, int level, int optname, { struct sock *sk = sock->sk; struct nr_sock *nr = nr_sk(sk); - unsigned long opt; + unsigned int opt; if (level != SOL_NETROM) return -ENOPROTOOPT; @@ -306,18 +306,18 @@ static int nr_setsockopt(struct socket *sock, int level, int optname, if (optlen < sizeof(unsigned int)) return -EINVAL; - if (copy_from_sockptr(&opt, optval, sizeof(unsigned long))) + if (copy_from_sockptr(&opt, optval, sizeof(opt))) return -EFAULT; switch (optname) { case NETROM_T1: - if (opt < 1 || opt > ULONG_MAX / HZ) + if (opt < 1 || opt > UINT_MAX / HZ) return -EINVAL; nr->t1 = opt * HZ; return 0; case NETROM_T2: - if (opt < 1 || opt > ULONG_MAX / HZ) + if (opt < 1 || opt > UINT_MAX / HZ) return -EINVAL; nr->t2 = opt * HZ; return 0; @@ -329,13 +329,13 @@ static int nr_setsockopt(struct socket *sock, int level, int optname, return 0; case NETROM_T4: - if (opt < 1 || opt > ULONG_MAX / HZ) + if (opt < 1 || opt > UINT_MAX / HZ) return -EINVAL; nr->t4 = opt * HZ; return 0; case NETROM_IDLE: - if (opt > ULONG_MAX / (60 * HZ)) + if (opt > UINT_MAX / (60 * HZ)) return -EINVAL; nr->idle = opt * 60 * HZ; return 0; -- cgit v1.2.3 From 6f022c2ddbcefaee79502ce5386dfe351d457070 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Thu, 6 Jan 2022 17:38:04 +0200 Subject: net: openvswitch: Fix ct_state nat flags for conns arriving from tc Netfilter conntrack maintains NAT flags per connection indicating whether NAT was configured for the connection. Openvswitch maintains NAT flags on the per packet flow key ct_state field, indicating whether NAT was actually executed on the packet. When a packet misses from tc to ovs the conntrack NAT flags are set. However, NAT was not necessarily executed on the packet because the connection's state might still be in NEW state. As such, openvswitch wrongly assumes that NAT was executed and sets an incorrect flow key NAT flags. Fix this, by flagging to openvswitch which NAT was actually done in act_ct via tc_skb_ext and tc_skb_cb to the openvswitch module, so the packet flow key NAT flags will be correctly set. Fixes: b57dc7c13ea9 ("net/sched: Introduce action ct") Signed-off-by: Paul Blakey Acked-by: Jamal Hadi Salim Link: https://lore.kernel.org/r/20220106153804.26451-1-paulb@nvidia.com Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 4 +++- include/net/pkt_sched.h | 4 +++- net/openvswitch/flow.c | 16 +++++++++++++--- net/sched/act_ct.c | 6 ++++++ net/sched/cls_api.c | 2 ++ 5 files changed, 27 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 4507d77d6941..60ab0c2fe567 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -287,7 +287,9 @@ struct tc_skb_ext { __u32 chain; __u16 mru; __u16 zone; - bool post_ct; + u8 post_ct:1; + u8 post_ct_snat:1; + u8 post_ct_dnat:1; }; #endif diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 9e71691c491b..9e7b21c0b3a6 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -197,7 +197,9 @@ struct tc_skb_cb { struct qdisc_skb_cb qdisc_cb; u16 mru; - bool post_ct; + u8 post_ct:1; + u8 post_ct_snat:1; + u8 post_ct_dnat:1; u16 zone; /* Only valid if post_ct = true */ }; diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 6d262d9aa10e..02096f2ec678 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -859,7 +859,7 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info, #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) struct tc_skb_ext *tc_ext; #endif - bool post_ct = false; + bool post_ct = false, post_ct_snat = false, post_ct_dnat = false; int res, err; u16 zone = 0; @@ -900,6 +900,8 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info, key->recirc_id = tc_ext ? tc_ext->chain : 0; OVS_CB(skb)->mru = tc_ext ? tc_ext->mru : 0; post_ct = tc_ext ? tc_ext->post_ct : false; + post_ct_snat = post_ct ? tc_ext->post_ct_snat : false; + post_ct_dnat = post_ct ? tc_ext->post_ct_dnat : false; zone = post_ct ? tc_ext->zone : 0; } else { key->recirc_id = 0; @@ -911,8 +913,16 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info, err = key_extract(skb, key); if (!err) { ovs_ct_fill_key(skb, key, post_ct); /* Must be after key_extract(). */ - if (post_ct && !skb_get_nfct(skb)) - key->ct_zone = zone; + if (post_ct) { + if (!skb_get_nfct(skb)) { + key->ct_zone = zone; + } else { + if (!post_ct_dnat) + key->ct_state &= ~OVS_CS_F_DST_NAT; + if (!post_ct_snat) + key->ct_state &= ~OVS_CS_F_SRC_NAT; + } + } } return err; } diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index ab3591408419..2a17eb77c904 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -839,6 +839,12 @@ static int ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct, } err = nf_nat_packet(ct, ctinfo, hooknum, skb); + if (err == NF_ACCEPT) { + if (maniptype == NF_NAT_MANIP_SRC) + tc_skb_cb(skb)->post_ct_snat = 1; + if (maniptype == NF_NAT_MANIP_DST) + tc_skb_cb(skb)->post_ct_dnat = 1; + } out: return err; } diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 35c74bdde848..cc9409aa755e 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1625,6 +1625,8 @@ int tcf_classify(struct sk_buff *skb, ext->chain = last_executed_chain; ext->mru = cb->mru; ext->post_ct = cb->post_ct; + ext->post_ct_snat = cb->post_ct_snat; + ext->post_ct_dnat = cb->post_ct_dnat; ext->zone = cb->zone; } -- cgit v1.2.3