diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2023-09-08 03:33:07 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2023-09-08 03:33:07 +0200 |
commit | 73be7fb14e83d24383f840a22f24d3ed222ca319 (patch) | |
tree | c03cb0058aa829aca0b30cad5e82f400e4561a00 /net/core | |
parent | Merge tag 'devicetree-fixes-for-6.6-1' of git://git.kernel.org/pub/scm/linux/... (diff) | |
parent | net: enetc: distinguish error from valid pointers in enetc_fixup_clear_rss_rfs() (diff) | |
download | linux-73be7fb14e83d24383f840a22f24d3ed222ca319.tar.xz linux-73be7fb14e83d24383f840a22f24d3ed222ca319.zip |
Merge tag 'net-6.6-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Pull networking updates from Jakub Kicinski:
"Including fixes from netfilter and bpf.
Current release - regressions:
- eth: stmmac: fix failure to probe without MAC interface specified
Current release - new code bugs:
- docs: netlink: fix missing classic_netlink doc reference
Previous releases - regressions:
- deal with integer overflows in kmalloc_reserve()
- use sk_forward_alloc_get() in sk_get_meminfo()
- bpf_sk_storage: fix the missing uncharge in sk_omem_alloc
- fib: avoid warn splat in flow dissector after packet mangling
- skb_segment: call zero copy functions before using skbuff frags
- eth: sfc: check for zero length in EF10 RX prefix
Previous releases - always broken:
- af_unix: fix msg_controllen test in scm_pidfd_recv() for
MSG_CMSG_COMPAT
- xsk: fix xsk_build_skb() dereferencing possible ERR_PTR()
- netfilter:
- nft_exthdr: fix non-linear header modification
- xt_u32, xt_sctp: validate user space input
- nftables: exthdr: fix 4-byte stack OOB write
- nfnetlink_osf: avoid OOB read
- one more fix for the garbage collection work from last release
- igmp: limit igmpv3_newpack() packet size to IP_MAX_MTU
- bpf, sockmap: fix preempt_rt splat when using raw_spin_lock_t
- handshake: fix null-deref in handshake_nl_done_doit()
- ip: ignore dst hint for multipath routes to ensure packets are
hashed across the nexthops
- phy: micrel:
- correct bit assignments for cable test errata
- disable EEE according to the KSZ9477 errata
Misc:
- docs/bpf: document compile-once-run-everywhere (CO-RE) relocations
- Revert "net: macsec: preserve ingress frame ordering", it appears
to have been developed against an older kernel, problem doesn't
exist upstream"
* tag 'net-6.6-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net: (95 commits)
net: enetc: distinguish error from valid pointers in enetc_fixup_clear_rss_rfs()
Revert "net: team: do not use dynamic lockdep key"
net: hns3: remove GSO partial feature bit
net: hns3: fix the port information display when sfp is absent
net: hns3: fix invalid mutex between tc qdisc and dcb ets command issue
net: hns3: fix debugfs concurrency issue between kfree buffer and read
net: hns3: fix byte order conversion issue in hclge_dbg_fd_tcam_read()
net: hns3: Support query tx timeout threshold by debugfs
net: hns3: fix tx timeout issue
net: phy: Provide Module 4 KSZ9477 errata (DS80000754C)
netfilter: nf_tables: Unbreak audit log reset
netfilter: ipset: add the missing IP_SET_HASH_WITH_NET0 macro for ip_set_hash_netportnet.c
netfilter: nft_set_rbtree: skip sync GC for new elements in this transaction
netfilter: nf_tables: uapi: Describe NFTA_RULE_CHAIN_ID
netfilter: nfnetlink_osf: avoid OOB read
netfilter: nftables: exthdr: fix 4-byte stack OOB write
selftests/bpf: Check bpf_sk_storage has uncharged sk_omem_alloc
bpf: bpf_sk_storage: Fix the missing uncharge in sk_omem_alloc
bpf: bpf_sk_storage: Fix invalid wait context lockdep report
s390/bpf: Pass through tail call counter in trampolines
...
Diffstat (limited to 'net/core')
-rw-r--r-- | net/core/flow_dissector.c | 3 | ||||
-rw-r--r-- | net/core/skbuff.c | 54 | ||||
-rw-r--r-- | net/core/skmsg.c | 12 | ||||
-rw-r--r-- | net/core/sock.c | 27 | ||||
-rw-r--r-- | net/core/sock_map.c | 36 |
5 files changed, 75 insertions, 57 deletions
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 89d15ceaf9af..b3b3af0e7844 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -1831,8 +1831,7 @@ u32 __skb_get_hash_symmetric(const struct sk_buff *skb) memset(&keys, 0, sizeof(keys)); __skb_flow_dissect(NULL, skb, &flow_keys_dissector_symmetric, - &keys, NULL, 0, 0, 0, - FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL); + &keys, NULL, 0, 0, 0, 0); return __flow_hash_from_keys(&keys, &hashrnd); } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 45707059082f..4eaf7ed0d1f4 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -550,7 +550,7 @@ static void *kmalloc_reserve(unsigned int *size, gfp_t flags, int node, bool *pfmemalloc) { bool ret_pfmemalloc = false; - unsigned int obj_size; + size_t obj_size; void *obj; obj_size = SKB_HEAD_ALIGN(*size); @@ -567,7 +567,13 @@ static void *kmalloc_reserve(unsigned int *size, gfp_t flags, int node, obj = kmem_cache_alloc_node(skb_small_head_cache, flags, node); goto out; } - *size = obj_size = kmalloc_size_roundup(obj_size); + + obj_size = kmalloc_size_roundup(obj_size); + /* The following cast might truncate high-order bits of obj_size, this + * is harmless because kmalloc(obj_size >= 2^32) will fail anyway. + */ + *size = (unsigned int)obj_size; + /* * Try a regular allocation, when that fails and we're not entitled * to the reserves, fail. @@ -4423,21 +4429,20 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, struct sk_buff *segs = NULL; struct sk_buff *tail = NULL; struct sk_buff *list_skb = skb_shinfo(head_skb)->frag_list; - skb_frag_t *frag = skb_shinfo(head_skb)->frags; unsigned int mss = skb_shinfo(head_skb)->gso_size; unsigned int doffset = head_skb->data - skb_mac_header(head_skb); - struct sk_buff *frag_skb = head_skb; unsigned int offset = doffset; unsigned int tnl_hlen = skb_tnl_header_len(head_skb); unsigned int partial_segs = 0; unsigned int headroom; unsigned int len = head_skb->len; + struct sk_buff *frag_skb; + skb_frag_t *frag; __be16 proto; bool csum, sg; - int nfrags = skb_shinfo(head_skb)->nr_frags; int err = -ENOMEM; int i = 0; - int pos; + int nfrags, pos; if ((skb_shinfo(head_skb)->gso_type & SKB_GSO_DODGY) && mss != GSO_BY_FRAGS && mss != skb_headlen(head_skb)) { @@ -4514,6 +4519,13 @@ normal: headroom = skb_headroom(head_skb); pos = skb_headlen(head_skb); + if (skb_orphan_frags(head_skb, GFP_ATOMIC)) + return ERR_PTR(-ENOMEM); + + nfrags = skb_shinfo(head_skb)->nr_frags; + frag = skb_shinfo(head_skb)->frags; + frag_skb = head_skb; + do { struct sk_buff *nskb; skb_frag_t *nskb_frag; @@ -4534,6 +4546,10 @@ normal: (skb_headlen(list_skb) == len || sg)) { BUG_ON(skb_headlen(list_skb) > len); + nskb = skb_clone(list_skb, GFP_ATOMIC); + if (unlikely(!nskb)) + goto err; + i = 0; nfrags = skb_shinfo(list_skb)->nr_frags; frag = skb_shinfo(list_skb)->frags; @@ -4552,12 +4568,8 @@ normal: frag++; } - nskb = skb_clone(list_skb, GFP_ATOMIC); list_skb = list_skb->next; - if (unlikely(!nskb)) - goto err; - if (unlikely(pskb_trim(nskb, len))) { kfree_skb(nskb); goto err; @@ -4633,12 +4645,16 @@ normal: skb_shinfo(nskb)->flags |= skb_shinfo(head_skb)->flags & SKBFL_SHARED_FRAG; - if (skb_orphan_frags(frag_skb, GFP_ATOMIC) || - skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC)) + if (skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC)) goto err; while (pos < offset + len) { if (i >= nfrags) { + if (skb_orphan_frags(list_skb, GFP_ATOMIC) || + skb_zerocopy_clone(nskb, list_skb, + GFP_ATOMIC)) + goto err; + i = 0; nfrags = skb_shinfo(list_skb)->nr_frags; frag = skb_shinfo(list_skb)->frags; @@ -4652,10 +4668,6 @@ normal: i--; frag--; } - if (skb_orphan_frags(frag_skb, GFP_ATOMIC) || - skb_zerocopy_clone(nskb, frag_skb, - GFP_ATOMIC)) - goto err; list_skb = list_skb->next; } @@ -5207,7 +5219,7 @@ static void __skb_complete_tx_timestamp(struct sk_buff *skb, serr->ee.ee_info = tstype; serr->opt_stats = opt_stats; serr->header.h4.iif = skb->dev ? skb->dev->ifindex : 0; - if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) { + if (READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID) { serr->ee.ee_data = skb_shinfo(skb)->tskey; if (sk_is_tcp(sk)) serr->ee.ee_data -= atomic_read(&sk->sk_tskey); @@ -5263,21 +5275,23 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb, { struct sk_buff *skb; bool tsonly, opt_stats = false; + u32 tsflags; if (!sk) return; - if (!hwtstamps && !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TX_SWHW) && + tsflags = READ_ONCE(sk->sk_tsflags); + if (!hwtstamps && !(tsflags & SOF_TIMESTAMPING_OPT_TX_SWHW) && skb_shinfo(orig_skb)->tx_flags & SKBTX_IN_PROGRESS) return; - tsonly = sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TSONLY; + tsonly = tsflags & SOF_TIMESTAMPING_OPT_TSONLY; if (!skb_may_tx_timestamp(sk, tsonly)) return; if (tsonly) { #ifdef CONFIG_INET - if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_STATS) && + if ((tsflags & SOF_TIMESTAMPING_OPT_STATS) && sk_is_tcp(sk)) { skb = tcp_get_timestamping_opt_stats(sk, orig_skb, ack_skb); diff --git a/net/core/skmsg.c b/net/core/skmsg.c index a0659fc29bcc..6c31eefbd777 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -612,12 +612,18 @@ static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb, u32 off, u32 len, bool ingress) { + int err = 0; + if (!ingress) { if (!sock_writeable(psock->sk)) return -EAGAIN; return skb_send_sock(psock->sk, skb, off, len); } - return sk_psock_skb_ingress(psock, skb, off, len); + skb_get(skb); + err = sk_psock_skb_ingress(psock, skb, off, len); + if (err < 0) + kfree_skb(skb); + return err; } static void sk_psock_skb_state(struct sk_psock *psock, @@ -685,9 +691,7 @@ static void sk_psock_backlog(struct work_struct *work) } while (len); skb = skb_dequeue(&psock->ingress_skb); - if (!ingress) { - kfree_skb(skb); - } + kfree_skb(skb); } end: mutex_unlock(&psock->work_mutex); diff --git a/net/core/sock.c b/net/core/sock.c index 666a17cab4f5..16584e2dd648 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -765,7 +765,8 @@ bool sk_mc_loop(struct sock *sk) return false; if (!sk) return true; - switch (sk->sk_family) { + /* IPV6_ADDRFORM can change sk->sk_family under us. */ + switch (READ_ONCE(sk->sk_family)) { case AF_INET: return inet_test_bit(MC_LOOP, sk); #if IS_ENABLED(CONFIG_IPV6) @@ -893,7 +894,7 @@ static int sock_timestamping_bind_phc(struct sock *sk, int phc_index) if (!match) return -EINVAL; - sk->sk_bind_phc = phc_index; + WRITE_ONCE(sk->sk_bind_phc, phc_index); return 0; } @@ -936,7 +937,7 @@ int sock_set_timestamping(struct sock *sk, int optname, return ret; } - sk->sk_tsflags = val; + WRITE_ONCE(sk->sk_tsflags, val); sock_valbool_flag(sk, SOCK_TSTAMP_NEW, optname == SO_TIMESTAMPING_NEW); if (val & SOF_TIMESTAMPING_RX_SOFTWARE) @@ -1044,7 +1045,7 @@ static int sock_reserve_memory(struct sock *sk, int bytes) mem_cgroup_uncharge_skmem(sk->sk_memcg, pages); return -ENOMEM; } - sk->sk_forward_alloc += pages << PAGE_SHIFT; + sk_forward_alloc_add(sk, pages << PAGE_SHIFT); WRITE_ONCE(sk->sk_reserved_mem, sk->sk_reserved_mem + (pages << PAGE_SHIFT)); @@ -1718,8 +1719,8 @@ int sk_getsockopt(struct sock *sk, int level, int optname, case SO_TIMESTAMPING_OLD: lv = sizeof(v.timestamping); - v.timestamping.flags = sk->sk_tsflags; - v.timestamping.bind_phc = sk->sk_bind_phc; + v.timestamping.flags = READ_ONCE(sk->sk_tsflags); + v.timestamping.bind_phc = READ_ONCE(sk->sk_bind_phc); break; case SO_RCVTIMEO_OLD: @@ -2746,9 +2747,9 @@ static long sock_wait_for_wmem(struct sock *sk, long timeo) prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); if (refcount_read(&sk->sk_wmem_alloc) < READ_ONCE(sk->sk_sndbuf)) break; - if (sk->sk_shutdown & SEND_SHUTDOWN) + if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN) break; - if (sk->sk_err) + if (READ_ONCE(sk->sk_err)) break; timeo = schedule_timeout(timeo); } @@ -2776,7 +2777,7 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len, goto failure; err = -EPIPE; - if (sk->sk_shutdown & SEND_SHUTDOWN) + if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN) goto failure; if (sk_wmem_alloc_get(sk) < READ_ONCE(sk->sk_sndbuf)) @@ -3138,10 +3139,10 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind) { int ret, amt = sk_mem_pages(size); - sk->sk_forward_alloc += amt << PAGE_SHIFT; + sk_forward_alloc_add(sk, amt << PAGE_SHIFT); ret = __sk_mem_raise_allocated(sk, size, amt, kind); if (!ret) - sk->sk_forward_alloc -= amt << PAGE_SHIFT; + sk_forward_alloc_add(sk, -(amt << PAGE_SHIFT)); return ret; } EXPORT_SYMBOL(__sk_mem_schedule); @@ -3173,7 +3174,7 @@ void __sk_mem_reduce_allocated(struct sock *sk, int amount) void __sk_mem_reclaim(struct sock *sk, int amount) { amount >>= PAGE_SHIFT; - sk->sk_forward_alloc -= amount << PAGE_SHIFT; + sk_forward_alloc_add(sk, -(amount << PAGE_SHIFT)); __sk_mem_reduce_allocated(sk, amount); } EXPORT_SYMBOL(__sk_mem_reclaim); @@ -3742,7 +3743,7 @@ void sk_get_meminfo(const struct sock *sk, u32 *mem) mem[SK_MEMINFO_RCVBUF] = READ_ONCE(sk->sk_rcvbuf); mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk); mem[SK_MEMINFO_SNDBUF] = READ_ONCE(sk->sk_sndbuf); - mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc; + mem[SK_MEMINFO_FWD_ALLOC] = sk_forward_alloc_get(sk); mem[SK_MEMINFO_WMEM_QUEUED] = READ_ONCE(sk->sk_wmem_queued); mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc); mem[SK_MEMINFO_BACKLOG] = READ_ONCE(sk->sk_backlog.len); diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 8f07fea39d9e..cb11750b1df5 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -18,7 +18,7 @@ struct bpf_stab { struct bpf_map map; struct sock **sks; struct sk_psock_progs progs; - raw_spinlock_t lock; + spinlock_t lock; }; #define SOCK_CREATE_FLAG_MASK \ @@ -44,7 +44,7 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr) return ERR_PTR(-ENOMEM); bpf_map_init_from_attr(&stab->map, attr); - raw_spin_lock_init(&stab->lock); + spin_lock_init(&stab->lock); stab->sks = bpf_map_area_alloc((u64) stab->map.max_entries * sizeof(struct sock *), @@ -411,7 +411,7 @@ static int __sock_map_delete(struct bpf_stab *stab, struct sock *sk_test, struct sock *sk; int err = 0; - raw_spin_lock_bh(&stab->lock); + spin_lock_bh(&stab->lock); sk = *psk; if (!sk_test || sk_test == sk) sk = xchg(psk, NULL); @@ -421,7 +421,7 @@ static int __sock_map_delete(struct bpf_stab *stab, struct sock *sk_test, else err = -EINVAL; - raw_spin_unlock_bh(&stab->lock); + spin_unlock_bh(&stab->lock); return err; } @@ -487,7 +487,7 @@ static int sock_map_update_common(struct bpf_map *map, u32 idx, psock = sk_psock(sk); WARN_ON_ONCE(!psock); - raw_spin_lock_bh(&stab->lock); + spin_lock_bh(&stab->lock); osk = stab->sks[idx]; if (osk && flags == BPF_NOEXIST) { ret = -EEXIST; @@ -501,10 +501,10 @@ static int sock_map_update_common(struct bpf_map *map, u32 idx, stab->sks[idx] = sk; if (osk) sock_map_unref(osk, &stab->sks[idx]); - raw_spin_unlock_bh(&stab->lock); + spin_unlock_bh(&stab->lock); return 0; out_unlock: - raw_spin_unlock_bh(&stab->lock); + spin_unlock_bh(&stab->lock); if (psock) sk_psock_put(sk, psock); out_free: @@ -835,7 +835,7 @@ struct bpf_shtab_elem { struct bpf_shtab_bucket { struct hlist_head head; - raw_spinlock_t lock; + spinlock_t lock; }; struct bpf_shtab { @@ -910,7 +910,7 @@ static void sock_hash_delete_from_link(struct bpf_map *map, struct sock *sk, * is okay since it's going away only after RCU grace period. * However, we need to check whether it's still present. */ - raw_spin_lock_bh(&bucket->lock); + spin_lock_bh(&bucket->lock); elem_probe = sock_hash_lookup_elem_raw(&bucket->head, elem->hash, elem->key, map->key_size); if (elem_probe && elem_probe == elem) { @@ -918,7 +918,7 @@ static void sock_hash_delete_from_link(struct bpf_map *map, struct sock *sk, sock_map_unref(elem->sk, elem); sock_hash_free_elem(htab, elem); } - raw_spin_unlock_bh(&bucket->lock); + spin_unlock_bh(&bucket->lock); } static long sock_hash_delete_elem(struct bpf_map *map, void *key) @@ -932,7 +932,7 @@ static long sock_hash_delete_elem(struct bpf_map *map, void *key) hash = sock_hash_bucket_hash(key, key_size); bucket = sock_hash_select_bucket(htab, hash); - raw_spin_lock_bh(&bucket->lock); + spin_lock_bh(&bucket->lock); elem = sock_hash_lookup_elem_raw(&bucket->head, hash, key, key_size); if (elem) { hlist_del_rcu(&elem->node); @@ -940,7 +940,7 @@ static long sock_hash_delete_elem(struct bpf_map *map, void *key) sock_hash_free_elem(htab, elem); ret = 0; } - raw_spin_unlock_bh(&bucket->lock); + spin_unlock_bh(&bucket->lock); return ret; } @@ -1000,7 +1000,7 @@ static int sock_hash_update_common(struct bpf_map *map, void *key, hash = sock_hash_bucket_hash(key, key_size); bucket = sock_hash_select_bucket(htab, hash); - raw_spin_lock_bh(&bucket->lock); + spin_lock_bh(&bucket->lock); elem = sock_hash_lookup_elem_raw(&bucket->head, hash, key, key_size); if (elem && flags == BPF_NOEXIST) { ret = -EEXIST; @@ -1026,10 +1026,10 @@ static int sock_hash_update_common(struct bpf_map *map, void *key, sock_map_unref(elem->sk, elem); sock_hash_free_elem(htab, elem); } - raw_spin_unlock_bh(&bucket->lock); + spin_unlock_bh(&bucket->lock); return 0; out_unlock: - raw_spin_unlock_bh(&bucket->lock); + spin_unlock_bh(&bucket->lock); sk_psock_put(sk, psock); out_free: sk_psock_free_link(link); @@ -1115,7 +1115,7 @@ static struct bpf_map *sock_hash_alloc(union bpf_attr *attr) for (i = 0; i < htab->buckets_num; i++) { INIT_HLIST_HEAD(&htab->buckets[i].head); - raw_spin_lock_init(&htab->buckets[i].lock); + spin_lock_init(&htab->buckets[i].lock); } return &htab->map; @@ -1147,11 +1147,11 @@ static void sock_hash_free(struct bpf_map *map) * exists, psock exists and holds a ref to socket. That * lets us to grab a socket ref too. */ - raw_spin_lock_bh(&bucket->lock); + spin_lock_bh(&bucket->lock); hlist_for_each_entry(elem, &bucket->head, node) sock_hold(elem->sk); hlist_move_list(&bucket->head, &unlink_list); - raw_spin_unlock_bh(&bucket->lock); + spin_unlock_bh(&bucket->lock); /* Process removed entries out of atomic context to * block for socket lock before deleting the psock's |