summaryrefslogtreecommitdiffstats
path: root/net/ipv6
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-01-11 19:07:29 +0100
committerLinus Torvalds <torvalds@linux-foundation.org>2024-01-11 19:07:29 +0100
commit3e7aeb78ab01c2c2f0e1f784e5ddec88fcd3d106 (patch)
treebdbfd45f8d8e967b06ed2d9cb92f67f686d02659 /net/ipv6
parentMerge tag 's390-6.8-1' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/... (diff)
parentMerge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net (diff)
downloadlinux-3e7aeb78ab01c2c2f0e1f784e5ddec88fcd3d106.tar.xz
linux-3e7aeb78ab01c2c2f0e1f784e5ddec88fcd3d106.zip
Merge tag 'net-next-6.8' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from Paolo Abeni: "The most interesting thing is probably the networking structs reorganization and a significant amount of changes is around self-tests. Core & protocols: - Analyze and reorganize core networking structs (socks, netdev, netns, mibs) to optimize cacheline consumption and set up build time warnings to safeguard against future header changes This improves TCP performances with many concurrent connections up to 40% - Add page-pool netlink-based introspection, exposing the memory usage and recycling stats. This helps indentify bad PP users and possible leaks - Refine TCP/DCCP source port selection to no longer favor even source port at connect() time when IP_LOCAL_PORT_RANGE is set. This lowers the time taken by connect() for hosts having many active connections to the same destination - Refactor the TCP bind conflict code, shrinking related socket structs - Refactor TCP SYN-Cookie handling, as a preparation step to allow arbitrary SYN-Cookie processing via eBPF - Tune optmem_max for 0-copy usage, increasing the default value to 128KB and namespecifying it - Allow coalescing for cloned skbs coming from page pools, improving RX performances with some common configurations - Reduce extension header parsing overhead at GRO time - Add bridge MDB bulk deletion support, allowing user-space to request the deletion of matching entries - Reorder nftables struct members, to keep data accessed by the datapath first - Introduce TC block ports tracking and use. This allows supporting multicast-like behavior at the TC layer - Remove UAPI support for retired TC qdiscs (dsmark, CBQ and ATM) and classifiers (RSVP and tcindex) - More data-race annotations - Extend the diag interface to dump TCP bound-only sockets - Conditional notification of events for TC qdisc class and actions - Support for WPAN dynamic associations with nearby devices, to form a sub-network using a specific PAN ID - Implement SMCv2.1 virtual ISM device support - Add support for Batman-avd mulicast packet type BPF: - Tons of verifier improvements: - BPF register bounds logic and range support along with a large test suite - log improvements - complete precision tracking support for register spills - track aligned STACK_ZERO cases as imprecise spilled registers. This improves the verifier "instructions processed" metric from single digit to 50-60% for some programs - support for user's global BPF subprogram arguments with few commonly requested annotations for a better developer experience - support tracking of BPF_JNE which helps cases when the compiler transforms (unsigned) "a > 0" into "if a == 0 goto xxx" and the like - several fixes - Add initial TX metadata implementation for AF_XDP with support in mlx5 and stmmac drivers. Two types of offloads are supported right now, that is, TX timestamp and TX checksum offload - Fix kCFI bugs in BPF all forms of indirect calls from BPF into kernel and from kernel into BPF work with CFI enabled. This allows BPF to work with CONFIG_FINEIBT=y - Change BPF verifier logic to validate global subprograms lazily instead of unconditionally before the main program, so they can be guarded using BPF CO-RE techniques - Support uid/gid options when mounting bpffs - Add a new kfunc which acquires the associated cgroup of a task within a specific cgroup v1 hierarchy where the latter is identified by its id - Extend verifier to allow bpf_refcount_acquire() of a map value field obtained via direct load which is a use-case needed in sched_ext - Add BPF link_info support for uprobe multi link along with bpftool integration for the latter - Support for VLAN tag in XDP hints - Remove deprecated bpfilter kernel leftovers given the project is developed in user-space (https://github.com/facebook/bpfilter) Misc: - Support for parellel TC self-tests execution - Increase MPTCP self-tests coverage - Updated the bridge documentation, including several so-far undocumented features - Convert all the net self-tests to run in unique netns, to avoid random failures due to conflict and allow concurrent runs - Add TCP-AO self-tests - Add kunit tests for both cfg80211 and mac80211 - Autogenerate Netlink families documentation from YAML spec - Add yml-gen support for fixed headers and recursive nests, the tool can now generate user-space code for all genetlink families for which we have specs - A bunch of additional module descriptions fixes - Catch incorrect freeing of pages belonging to a page pool Driver API: - Rust abstractions for network PHY drivers; do not cover yet the full C API, but already allow implementing functional PHY drivers in rust - Introduce queue and NAPI support in the netdev Netlink interface, allowing complete access to the device <> NAPIs <> queues relationship - Introduce notifications filtering for devlink to allow control application scale to thousands of instances - Improve PHY validation, requesting rate matching information for each ethtool link mode supported by both the PHY and host - Add support for ethtool symmetric-xor RSS hash - ACPI based Wifi band RFI (WBRF) mitigation feature for the AMD platform - Expose pin fractional frequency offset value over new DPLL generic netlink attribute - Convert older drivers to platform remove callback returning void - Add support for PHY package MMD read/write New hardware / drivers: - Ethernet: - Octeon CN10K devices - Broadcom 5760X P7 - Qualcomm SM8550 SoC - Texas Instrument DP83TG720S PHY - Bluetooth: - IMC Networks Bluetooth radio Removed: - WiFi: - libertas 16-bit PCMCIA support - Atmel at76c50x drivers - HostAP ISA/PCMCIA style 802.11b driver - zd1201 802.11b USB dongles - Orinoco ISA/PCMCIA 802.11b driver - Aviator/Raytheon driver - Planet WL3501 driver - RNDIS USB 802.11b driver Driver updates: - Ethernet high-speed NICs: - Intel (100G, ice, idpf): - allow one by one port representors creation and removal - add temperature and clock information reporting - add get/set for ethtool's header split ringparam - add again FW logging - adds support switchdev hardware packet mirroring - iavf: implement symmetric-xor RSS hash - igc: add support for concurrent physical and free-running timers - i40e: increase the allowable descriptors - nVidia/Mellanox: - Preparation for Socket-Direct multi-dev netdev. That will allow in future releases combining multiple PFs devices attached to different NUMA nodes under the same netdev - Broadcom (bnxt): - TX completion handling improvements - add basic ntuple filter support - reduce MSIX vectors usage for MQPRIO offload - add VXLAN support, USO offload and TX coalesce completion for P7 - Marvell Octeon EP: - xmit-more support - add PF-VF mailbox support and use it for FW notifications for VFs - Wangxun (ngbe/txgbe): - implement ethtool functions to operate pause param, ring param, coalesce channel number and msglevel - Netronome/Corigine (nfp): - add flow-steering support - support UDP segmentation offload - Ethernet NICs embedded, slower, virtual: - Xilinx AXI: remove duplicate DMA code adopting the dma engine driver - stmmac: add support for HW-accelerated VLAN stripping - TI AM654x sw: add mqprio, frame preemption & coalescing - gve: add support for non-4k page sizes. - virtio-net: support dynamic coalescing moderation - nVidia/Mellanox Ethernet datacenter switches: - allow firmware upgrade without a reboot - more flexible support for bridge flooding via the compressed FID flooding mode - Ethernet embedded switches: - Microchip: - fine-tune flow control and speed configurations in KSZ8xxx - KSZ88X3: enable setting rmii reference - Renesas: - add jumbo frames support - Marvell: - 88E6xxx: add "eth-mac" and "rmon" stats support - Ethernet PHYs: - aquantia: add firmware load support - at803x: refactor the driver to simplify adding support for more chip variants - NXP C45 TJA11xx: Add MACsec offload support - Wifi: - MediaTek (mt76): - NVMEM EEPROM improvements - mt7996 Extremely High Throughput (EHT) improvements - mt7996 Wireless Ethernet Dispatcher (WED) support - mt7996 36-bit DMA support - Qualcomm (ath12k): - support for a single MSI vector - WCN7850: support AP mode - Intel (iwlwifi): - new debugfs file fw_dbg_clear - allow concurrent P2P operation on DFS channels - Bluetooth: - QCA2066: support HFP offload - ISO: more broadcast-related improvements - NXP: better recovery in case receiver/transmitter get out of sync" * tag 'net-next-6.8' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (1714 commits) lan78xx: remove redundant statement in lan78xx_get_eee lan743x: remove redundant statement in lan743x_ethtool_get_eee bnxt_en: Fix RCU locking for ntuple filters in bnxt_rx_flow_steer() bnxt_en: Fix RCU locking for ntuple filters in bnxt_srxclsrldel() bnxt_en: Remove unneeded variable in bnxt_hwrm_clear_vnic_filter() tcp: Revert no longer abort SYN_SENT when receiving some ICMP Revert "mlx5 updates 2023-12-20" Revert "net: stmmac: Enable Per DMA Channel interrupt" ipvlan: Remove usage of the deprecated ida_simple_xx() API ipvlan: Fix a typo in a comment net/sched: Remove ipt action tests net: stmmac: Use interrupt mode INTM=1 for per channel irq net: stmmac: Add support for TX/RX channel interrupt net: stmmac: Make MSI interrupt routine generic dt-bindings: net: snps,dwmac: per channel irq net: phy: at803x: make read_status more generic net: phy: at803x: add support for cdt cross short test for qca808x net: phy: at803x: refactor qca808x cable test get status function net: phy: at803x: generalize cdt fault length function net: ethernet: cortina: Drop TSO support ...
Diffstat (limited to 'net/ipv6')
-rw-r--r--net/ipv6/datagram.c6
-rw-r--r--net/ipv6/exthdrs_offload.c11
-rw-r--r--net/ipv6/fib6_rules.c4
-rw-r--r--net/ipv6/icmp.c8
-rw-r--r--net/ipv6/ip6_offload.c76
-rw-r--r--net/ipv6/ip6_tunnel.c26
-rw-r--r--net/ipv6/ip6mr.c2
-rw-r--r--net/ipv6/ipv6_sockglue.c136
-rw-r--r--net/ipv6/ping.c8
-rw-r--r--net/ipv6/raw.c4
-rw-r--r--net/ipv6/syncookies.c108
-rw-r--r--net/ipv6/tcp_ipv6.c2
-rw-r--r--net/ipv6/udp.c4
13 files changed, 209 insertions, 186 deletions
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index cc6a502db39d..fff78496803d 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -60,9 +60,9 @@ static void ip6_datagram_flow_key_init(struct flowi6 *fl6,
if (!oif) {
if (ipv6_addr_is_multicast(&fl6->daddr))
- oif = np->mcast_oif;
+ oif = READ_ONCE(np->mcast_oif);
else
- oif = np->ucast_oif;
+ oif = READ_ONCE(np->ucast_oif);
}
fl6->flowi6_oif = oif;
@@ -229,7 +229,7 @@ ipv4_connected:
}
if (!sk->sk_bound_dev_if && (addr_type & IPV6_ADDR_MULTICAST))
- WRITE_ONCE(sk->sk_bound_dev_if, np->mcast_oif);
+ WRITE_ONCE(sk->sk_bound_dev_if, READ_ONCE(np->mcast_oif));
/* Connect to link-local address requires an interface */
if (!sk->sk_bound_dev_if) {
diff --git a/net/ipv6/exthdrs_offload.c b/net/ipv6/exthdrs_offload.c
index 06750d65d480..4c00398f4dca 100644
--- a/net/ipv6/exthdrs_offload.c
+++ b/net/ipv6/exthdrs_offload.c
@@ -16,6 +16,10 @@ static const struct net_offload dstopt_offload = {
.flags = INET6_PROTO_GSO_EXTHDR,
};
+static const struct net_offload hbh_offload = {
+ .flags = INET6_PROTO_GSO_EXTHDR,
+};
+
int __init ipv6_exthdrs_offload_init(void)
{
int ret;
@@ -28,9 +32,16 @@ int __init ipv6_exthdrs_offload_init(void)
if (ret)
goto out_rt;
+ ret = inet6_add_offload(&hbh_offload, IPPROTO_HOPOPTS);
+ if (ret)
+ goto out_dstopts;
+
out:
return ret;
+out_dstopts:
+ inet6_del_offload(&dstopt_offload, IPPROTO_DSTOPTS);
+
out_rt:
inet6_del_offload(&rthdr_offload, IPPROTO_ROUTING);
goto out;
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 7c2003833010..7523c4baef35 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -475,11 +475,11 @@ static int __net_init fib6_rules_net_init(struct net *net)
if (IS_ERR(ops))
return PTR_ERR(ops);
- err = fib_default_rule_add(ops, 0, RT6_TABLE_LOCAL, 0);
+ err = fib_default_rule_add(ops, 0, RT6_TABLE_LOCAL);
if (err)
goto out_fib6_rules_ops;
- err = fib_default_rule_add(ops, 0x7FFE, RT6_TABLE_MAIN, 0);
+ err = fib_default_rule_add(ops, 0x7FFE, RT6_TABLE_MAIN);
if (err)
goto out_fib6_rules_ops;
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index f62427097126..1635da07285f 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -584,9 +584,9 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
tmp_hdr.icmp6_pointer = htonl(info);
if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
- fl6.flowi6_oif = np->mcast_oif;
+ fl6.flowi6_oif = READ_ONCE(np->mcast_oif);
else if (!fl6.flowi6_oif)
- fl6.flowi6_oif = np->ucast_oif;
+ fl6.flowi6_oif = READ_ONCE(np->ucast_oif);
ipcm6_init_sk(&ipc6, sk);
ipc6.sockc.mark = mark;
@@ -770,9 +770,9 @@ static enum skb_drop_reason icmpv6_echo_reply(struct sk_buff *skb)
np = inet6_sk(sk);
if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
- fl6.flowi6_oif = np->mcast_oif;
+ fl6.flowi6_oif = READ_ONCE(np->mcast_oif);
else if (!fl6.flowi6_oif)
- fl6.flowi6_oif = np->ucast_oif;
+ fl6.flowi6_oif = READ_ONCE(np->ucast_oif);
if (ip6_dst_lookup(net, sk, &dst, &fl6))
goto out;
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index d6314287338d..cca64c7809be 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -37,6 +37,40 @@
INDIRECT_CALL_L4(cb, f2, f1, head, skb); \
})
+static int ipv6_gro_pull_exthdrs(struct sk_buff *skb, int off, int proto)
+{
+ const struct net_offload *ops = NULL;
+ struct ipv6_opt_hdr *opth;
+
+ for (;;) {
+ int len;
+
+ ops = rcu_dereference(inet6_offloads[proto]);
+
+ if (unlikely(!ops))
+ break;
+
+ if (!(ops->flags & INET6_PROTO_GSO_EXTHDR))
+ break;
+
+ opth = skb_gro_header(skb, off + sizeof(*opth), off);
+ if (unlikely(!opth))
+ break;
+
+ len = ipv6_optlen(opth);
+
+ opth = skb_gro_header(skb, off + len, off);
+ if (unlikely(!opth))
+ break;
+ proto = opth->nexthdr;
+
+ off += len;
+ }
+
+ skb_gro_pull(skb, off - skb_network_offset(skb));
+ return proto;
+}
+
static int ipv6_gso_pull_exthdrs(struct sk_buff *skb, int proto)
{
const struct net_offload *ops = NULL;
@@ -45,15 +79,13 @@ static int ipv6_gso_pull_exthdrs(struct sk_buff *skb, int proto)
struct ipv6_opt_hdr *opth;
int len;
- if (proto != NEXTHDR_HOP) {
- ops = rcu_dereference(inet6_offloads[proto]);
+ ops = rcu_dereference(inet6_offloads[proto]);
- if (unlikely(!ops))
- break;
+ if (unlikely(!ops))
+ break;
- if (!(ops->flags & INET6_PROTO_GSO_EXTHDR))
- break;
- }
+ if (!(ops->flags & INET6_PROTO_GSO_EXTHDR))
+ break;
if (unlikely(!pskb_may_pull(skb, 8)))
break;
@@ -171,13 +203,12 @@ static int ipv6_exthdrs_len(struct ipv6hdr *iph,
proto = iph->nexthdr;
for (;;) {
- if (proto != NEXTHDR_HOP) {
- *opps = rcu_dereference(inet6_offloads[proto]);
- if (unlikely(!(*opps)))
- break;
- if (!((*opps)->flags & INET6_PROTO_GSO_EXTHDR))
- break;
- }
+ *opps = rcu_dereference(inet6_offloads[proto]);
+ if (unlikely(!(*opps)))
+ break;
+ if (!((*opps)->flags & INET6_PROTO_GSO_EXTHDR))
+ break;
+
opth = (void *)opth + optlen;
optlen = ipv6_optlen(opth);
len += optlen;
@@ -206,28 +237,25 @@ INDIRECT_CALLABLE_SCOPE struct sk_buff *ipv6_gro_receive(struct list_head *head,
goto out;
skb_set_network_header(skb, off);
- skb_gro_pull(skb, sizeof(*iph));
- skb_set_transport_header(skb, skb_gro_offset(skb));
- flush += ntohs(iph->payload_len) != skb_gro_len(skb);
+ flush += ntohs(iph->payload_len) != skb->len - hlen;
proto = iph->nexthdr;
ops = rcu_dereference(inet6_offloads[proto]);
if (!ops || !ops->callbacks.gro_receive) {
- pskb_pull(skb, skb_gro_offset(skb));
- skb_gro_frag0_invalidate(skb);
- proto = ipv6_gso_pull_exthdrs(skb, proto);
- skb_gro_pull(skb, -skb_transport_offset(skb));
- skb_reset_transport_header(skb);
- __skb_push(skb, skb_gro_offset(skb));
+ proto = ipv6_gro_pull_exthdrs(skb, hlen, proto);
ops = rcu_dereference(inet6_offloads[proto]);
if (!ops || !ops->callbacks.gro_receive)
goto out;
- iph = ipv6_hdr(skb);
+ iph = skb_gro_network_header(skb);
+ } else {
+ skb_gro_pull(skb, sizeof(*iph));
}
+ skb_set_transport_header(skb, skb_gro_offset(skb));
+
NAPI_GRO_CB(skb)->proto = proto;
flush--;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 5e80e517f071..46c19bd48990 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -399,7 +399,7 @@ __u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw)
const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)raw;
unsigned int nhoff = raw - skb->data;
unsigned int off = nhoff + sizeof(*ipv6h);
- u8 next, nexthdr = ipv6h->nexthdr;
+ u8 nexthdr = ipv6h->nexthdr;
while (ipv6_ext_hdr(nexthdr) && nexthdr != NEXTHDR_NONE) {
struct ipv6_opt_hdr *hdr;
@@ -410,25 +410,25 @@ __u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw)
hdr = (struct ipv6_opt_hdr *)(skb->data + off);
if (nexthdr == NEXTHDR_FRAGMENT) {
- struct frag_hdr *frag_hdr = (struct frag_hdr *) hdr;
- if (frag_hdr->frag_off)
- break;
optlen = 8;
} else if (nexthdr == NEXTHDR_AUTH) {
optlen = ipv6_authlen(hdr);
} else {
optlen = ipv6_optlen(hdr);
}
- /* cache hdr->nexthdr, since pskb_may_pull() might
- * invalidate hdr
- */
- next = hdr->nexthdr;
- if (nexthdr == NEXTHDR_DEST) {
- u16 i = 2;
- /* Remember : hdr is no longer valid at this point. */
- if (!pskb_may_pull(skb, off + optlen))
+ if (!pskb_may_pull(skb, off + optlen))
+ break;
+
+ hdr = (struct ipv6_opt_hdr *)(skb->data + off);
+ if (nexthdr == NEXTHDR_FRAGMENT) {
+ struct frag_hdr *frag_hdr = (struct frag_hdr *)hdr;
+
+ if (frag_hdr->frag_off)
break;
+ }
+ if (nexthdr == NEXTHDR_DEST) {
+ u16 i = 2;
while (1) {
struct ipv6_tlv_tnl_enc_lim *tel;
@@ -449,7 +449,7 @@ __u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw)
i++;
}
}
- nexthdr = next;
+ nexthdr = hdr->nexthdr;
off += optlen;
}
return 0;
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 30ca064b76ef..9782c180fee6 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -242,7 +242,7 @@ static int __net_init ip6mr_rules_init(struct net *net)
goto err1;
}
- err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0);
+ err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT);
if (err < 0)
goto err2;
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 7d661735cb9d..56c3c467f9de 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -210,7 +210,7 @@ static int ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
if (optlen < GROUP_FILTER_SIZE(0))
return -EINVAL;
- if (optlen > READ_ONCE(sysctl_optmem_max))
+ if (optlen > READ_ONCE(sock_net(sk)->core.sysctl_optmem_max))
return -ENOBUFS;
gsf = memdup_sockptr(optval, optlen);
@@ -244,7 +244,7 @@ static int compat_ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
if (optlen < size0)
return -EINVAL;
- if (optlen > READ_ONCE(sysctl_optmem_max) - 4)
+ if (optlen > READ_ONCE(sock_net(sk)->core.sysctl_optmem_max) - 4)
return -ENOBUFS;
p = kmalloc(optlen + 4, GFP_KERNEL);
@@ -509,6 +509,59 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
if (optlen < sizeof(int))
return -EINVAL;
return ip6_sock_set_addr_preferences(sk, val);
+ case IPV6_MULTICAST_IF:
+ if (sk->sk_type == SOCK_STREAM)
+ return -ENOPROTOOPT;
+ if (optlen < sizeof(int))
+ return -EINVAL;
+ if (val) {
+ struct net_device *dev;
+ int bound_dev_if, midx;
+
+ rcu_read_lock();
+
+ dev = dev_get_by_index_rcu(net, val);
+ if (!dev) {
+ rcu_read_unlock();
+ return -ENODEV;
+ }
+ midx = l3mdev_master_ifindex_rcu(dev);
+
+ rcu_read_unlock();
+
+ bound_dev_if = READ_ONCE(sk->sk_bound_dev_if);
+ if (bound_dev_if &&
+ bound_dev_if != val &&
+ (!midx || midx != bound_dev_if))
+ return -EINVAL;
+ }
+ WRITE_ONCE(np->mcast_oif, val);
+ return 0;
+ case IPV6_UNICAST_IF:
+ {
+ struct net_device *dev;
+ int ifindex;
+
+ if (optlen != sizeof(int))
+ return -EINVAL;
+
+ ifindex = (__force int)ntohl((__force __be32)val);
+ if (!ifindex) {
+ WRITE_ONCE(np->ucast_oif, 0);
+ return 0;
+ }
+
+ dev = dev_get_by_index(net, ifindex);
+ if (!dev)
+ return -EADDRNOTAVAIL;
+ dev_put(dev);
+
+ if (READ_ONCE(sk->sk_bound_dev_if))
+ return -EINVAL;
+
+ WRITE_ONCE(np->ucast_oif, ifindex);
+ return 0;
+ }
}
if (needs_rtnl)
rtnl_lock();
@@ -829,67 +882,6 @@ done:
break;
}
-
- case IPV6_UNICAST_IF:
- {
- struct net_device *dev = NULL;
- int ifindex;
-
- if (optlen != sizeof(int))
- goto e_inval;
-
- ifindex = (__force int)ntohl((__force __be32)val);
- if (ifindex == 0) {
- np->ucast_oif = 0;
- retv = 0;
- break;
- }
-
- dev = dev_get_by_index(net, ifindex);
- retv = -EADDRNOTAVAIL;
- if (!dev)
- break;
- dev_put(dev);
-
- retv = -EINVAL;
- if (sk->sk_bound_dev_if)
- break;
-
- np->ucast_oif = ifindex;
- retv = 0;
- break;
- }
-
- case IPV6_MULTICAST_IF:
- if (sk->sk_type == SOCK_STREAM)
- break;
- if (optlen < sizeof(int))
- goto e_inval;
-
- if (val) {
- struct net_device *dev;
- int midx;
-
- rcu_read_lock();
-
- dev = dev_get_by_index_rcu(net, val);
- if (!dev) {
- rcu_read_unlock();
- retv = -ENODEV;
- break;
- }
- midx = l3mdev_master_ifindex_rcu(dev);
-
- rcu_read_unlock();
-
- if (sk->sk_bound_dev_if &&
- sk->sk_bound_dev_if != val &&
- (!midx || midx != sk->sk_bound_dev_if))
- goto e_inval;
- }
- np->mcast_oif = val;
- retv = 0;
- break;
case IPV6_ADD_MEMBERSHIP:
case IPV6_DROP_MEMBERSHIP:
{
@@ -1161,10 +1153,12 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
sockopt_release_sock(sk);
if (!skb) {
if (np->rxopt.bits.rxinfo) {
+ int mcast_oif = READ_ONCE(np->mcast_oif);
struct in6_pktinfo src_info;
- src_info.ipi6_ifindex = np->mcast_oif ? np->mcast_oif :
+
+ src_info.ipi6_ifindex = mcast_oif ? :
np->sticky_pktinfo.ipi6_ifindex;
- src_info.ipi6_addr = np->mcast_oif ? sk->sk_v6_daddr : np->sticky_pktinfo.ipi6_addr;
+ src_info.ipi6_addr = mcast_oif ? sk->sk_v6_daddr : np->sticky_pktinfo.ipi6_addr;
put_cmsg(&msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info);
}
if (np->rxopt.bits.rxhlim) {
@@ -1178,11 +1172,13 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
put_cmsg(&msg, SOL_IPV6, IPV6_TCLASS, sizeof(tclass), &tclass);
}
if (np->rxopt.bits.rxoinfo) {
+ int mcast_oif = READ_ONCE(np->mcast_oif);
struct in6_pktinfo src_info;
- src_info.ipi6_ifindex = np->mcast_oif ? np->mcast_oif :
+
+ src_info.ipi6_ifindex = mcast_oif ? :
np->sticky_pktinfo.ipi6_ifindex;
- src_info.ipi6_addr = np->mcast_oif ? sk->sk_v6_daddr :
- np->sticky_pktinfo.ipi6_addr;
+ src_info.ipi6_addr = mcast_oif ? sk->sk_v6_daddr :
+ np->sticky_pktinfo.ipi6_addr;
put_cmsg(&msg, SOL_IPV6, IPV6_2292PKTINFO, sizeof(src_info), &src_info);
}
if (np->rxopt.bits.rxohlim) {
@@ -1359,7 +1355,7 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
break;
case IPV6_MULTICAST_IF:
- val = np->mcast_oif;
+ val = READ_ONCE(np->mcast_oif);
break;
case IPV6_MULTICAST_ALL:
@@ -1367,7 +1363,7 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
break;
case IPV6_UNICAST_IF:
- val = (__force int)htonl((__u32) np->ucast_oif);
+ val = (__force int)htonl((__u32) READ_ONCE(np->ucast_oif));
break;
case IPV6_MTU_DISCOVER:
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index d2098dd4ceae..ef2059c88955 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -107,9 +107,9 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
oif = np->sticky_pktinfo.ipi6_ifindex;
if (!oif && ipv6_addr_is_multicast(daddr))
- oif = np->mcast_oif;
+ oif = READ_ONCE(np->mcast_oif);
else if (!oif)
- oif = np->ucast_oif;
+ oif = READ_ONCE(np->ucast_oif);
addr_type = ipv6_addr_type(daddr);
if ((__ipv6_addr_needs_scope_id(addr_type) && !oif) ||
@@ -157,9 +157,9 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
rt = (struct rt6_info *) dst;
if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
- fl6.flowi6_oif = np->mcast_oif;
+ fl6.flowi6_oif = READ_ONCE(np->mcast_oif);
else if (!fl6.flowi6_oif)
- fl6.flowi6_oif = np->ucast_oif;
+ fl6.flowi6_oif = READ_ONCE(np->ucast_oif);
pfh.icmph.type = user_icmph.icmp6_type;
pfh.icmph.code = user_icmph.icmp6_code;
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index dd0a4e73e602..03dbb874c363 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -876,9 +876,9 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
final_p = fl6_update_dst(&fl6, opt, &final);
if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
- fl6.flowi6_oif = np->mcast_oif;
+ fl6.flowi6_oif = READ_ONCE(np->mcast_oif);
else if (!fl6.flowi6_oif)
- fl6.flowi6_oif = np->ucast_oif;
+ fl6.flowi6_oif = READ_ONCE(np->ucast_oif);
security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
if (hdrincl)
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 12eedc6ca2cc..c8d2ca27220c 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -114,76 +114,82 @@ __u32 cookie_v6_init_sequence(const struct sk_buff *skb, __u16 *mssp)
return __cookie_v6_init_sequence(iph, th, mssp);
}
-int __cookie_v6_check(const struct ipv6hdr *iph, const struct tcphdr *th,
- __u32 cookie)
+int __cookie_v6_check(const struct ipv6hdr *iph, const struct tcphdr *th)
{
+ __u32 cookie = ntohl(th->ack_seq) - 1;
__u32 seq = ntohl(th->seq) - 1;
- __u32 mssind = check_tcp_syn_cookie(cookie, &iph->saddr, &iph->daddr,
- th->source, th->dest, seq);
+ __u32 mssind;
+
+ mssind = check_tcp_syn_cookie(cookie, &iph->saddr, &iph->daddr,
+ th->source, th->dest, seq);
return mssind < ARRAY_SIZE(msstab) ? msstab[mssind] : 0;
}
EXPORT_SYMBOL_GPL(__cookie_v6_check);
-struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
+static struct request_sock *cookie_tcp_check(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
{
struct tcp_options_received tcp_opt;
- struct inet_request_sock *ireq;
- struct tcp_request_sock *treq;
- struct ipv6_pinfo *np = inet6_sk(sk);
- struct tcp_sock *tp = tcp_sk(sk);
- const struct tcphdr *th = tcp_hdr(skb);
- __u32 cookie = ntohl(th->ack_seq) - 1;
- struct sock *ret = sk;
- struct request_sock *req;
- int full_space, mss;
- struct dst_entry *dst;
- __u8 rcv_wscale;
u32 tsoff = 0;
- int l3index;
-
- if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies) ||
- !th->ack || th->rst)
- goto out;
+ int mss;
if (tcp_synq_no_recent_overflow(sk))
goto out;
- mss = __cookie_v6_check(ipv6_hdr(skb), th, cookie);
- if (mss == 0) {
- __NET_INC_STATS(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED);
+ mss = __cookie_v6_check(ipv6_hdr(skb), tcp_hdr(skb));
+ if (!mss) {
+ __NET_INC_STATS(net, LINUX_MIB_SYNCOOKIESFAILED);
goto out;
}
- __NET_INC_STATS(sock_net(sk), LINUX_MIB_SYNCOOKIESRECV);
+ __NET_INC_STATS(net, LINUX_MIB_SYNCOOKIESRECV);
/* check for timestamp cookie support */
memset(&tcp_opt, 0, sizeof(tcp_opt));
- tcp_parse_options(sock_net(sk), skb, &tcp_opt, 0, NULL);
+ tcp_parse_options(net, skb, &tcp_opt, 0, NULL);
if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) {
- tsoff = secure_tcpv6_ts_off(sock_net(sk),
+ tsoff = secure_tcpv6_ts_off(net,
ipv6_hdr(skb)->daddr.s6_addr32,
ipv6_hdr(skb)->saddr.s6_addr32);
tcp_opt.rcv_tsecr -= tsoff;
}
- if (!cookie_timestamp_decode(sock_net(sk), &tcp_opt))
+ if (!cookie_timestamp_decode(net, &tcp_opt))
goto out;
- ret = NULL;
- req = cookie_tcp_reqsk_alloc(&tcp6_request_sock_ops,
- &tcp_request_sock_ipv6_ops, sk, skb);
- if (!req)
+ return cookie_tcp_reqsk_alloc(&tcp6_request_sock_ops, sk, skb,
+ &tcp_opt, mss, tsoff);
+out:
+ return ERR_PTR(-EINVAL);
+}
+
+struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
+{
+ const struct tcphdr *th = tcp_hdr(skb);
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct inet_request_sock *ireq;
+ struct net *net = sock_net(sk);
+ struct request_sock *req;
+ struct dst_entry *dst;
+ struct sock *ret = sk;
+ __u8 rcv_wscale;
+ int full_space;
+
+ if (!READ_ONCE(net->ipv4.sysctl_tcp_syncookies) ||
+ !th->ack || th->rst)
goto out;
+ req = cookie_tcp_check(net, sk, skb);
+ if (IS_ERR(req))
+ goto out;
+ if (!req)
+ goto out_drop;
+
ireq = inet_rsk(req);
- treq = tcp_rsk(req);
- treq->tfo_listener = false;
- req->mss = mss;
- ireq->ir_rmt_port = th->source;
- ireq->ir_num = ntohs(th->dest);
ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
@@ -197,31 +203,12 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
ireq->pktopts = skb;
}
- ireq->ir_iif = inet_request_bound_dev_if(sk, skb);
/* So that link locals have meaning */
if (!sk->sk_bound_dev_if &&
ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
ireq->ir_iif = tcp_v6_iif(skb);
- ireq->ir_mark = inet_request_mark(sk, skb);
-
- req->num_retrans = 0;
- ireq->snd_wscale = tcp_opt.snd_wscale;
- ireq->sack_ok = tcp_opt.sack_ok;
- ireq->wscale_ok = tcp_opt.wscale_ok;
- ireq->tstamp_ok = tcp_opt.saw_tstamp;
- req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
- treq->snt_synack = 0;
- treq->rcv_isn = ntohl(th->seq) - 1;
- treq->snt_isn = cookie;
- treq->ts_off = 0;
- treq->txhash = net_tx_rndhash();
-
- l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
- tcp_ao_syncookie(sk, skb, treq, AF_INET6, l3index);
-
- if (IS_ENABLED(CONFIG_SMC))
- ireq->smc_ok = 0;
+ tcp_ao_syncookie(sk, skb, req, AF_INET6);
/*
* We need to lookup the dst_entry to get the correct window size.
@@ -243,7 +230,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
fl6.flowi6_uid = sk->sk_uid;
security_req_classify_flow(req, flowi6_to_flowi_common(&fl6));
- dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
+ dst = ip6_dst_lookup_flow(net, sk, &fl6, final_p);
if (IS_ERR(dst))
goto out_free;
}
@@ -261,12 +248,13 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
dst_metric(dst, RTAX_INITRWND));
ireq->rcv_wscale = rcv_wscale;
- ireq->ecn_ok = cookie_ecn_ok(&tcp_opt, sock_net(sk), dst);
+ ireq->ecn_ok &= cookie_ecn_ok(net, dst);
- ret = tcp_get_cookie_sock(sk, skb, req, dst, tsoff);
+ ret = tcp_get_cookie_sock(sk, skb, req, dst);
out:
return ret;
out_free:
reqsk_free(req);
+out_drop:
return NULL;
}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 8c6623496dd7..57b25b1fc9d9 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1699,7 +1699,7 @@ ipv6_pktoptions:
if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
!((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
- np->mcast_oif = tcp_v6_iif(opt_skb);
+ WRITE_ONCE(np->mcast_oif, tcp_v6_iif(opt_skb));
if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
WRITE_ONCE(np->mcast_hops,
ipv6_hdr(opt_skb)->hop_limit);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 622b10a549f7..594e3f23c129 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1541,10 +1541,10 @@ do_udp_sendmsg:
connected = false;
if (!fl6->flowi6_oif && ipv6_addr_is_multicast(&fl6->daddr)) {
- fl6->flowi6_oif = np->mcast_oif;
+ fl6->flowi6_oif = READ_ONCE(np->mcast_oif);
connected = false;
} else if (!fl6->flowi6_oif)
- fl6->flowi6_oif = np->ucast_oif;
+ fl6->flowi6_oif = READ_ONCE(np->ucast_oif);
security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6));