summaryrefslogtreecommitdiffstats
path: root/net/socket.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-05-25 21:22:58 +0200
committerLinus Torvalds <torvalds@linux-foundation.org>2022-05-25 21:22:58 +0200
commit7e062cda7d90543ac8c7700fc7c5527d0c0f22ad (patch)
tree2f1602595d9416be41cc2e88a659ba4c145734b9 /net/socket.c
parentMerge branch 'for-5.19' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq (diff)
parentMerge branch 'ptp-ocp-various-updates' (diff)
downloadlinux-7e062cda7d90543ac8c7700fc7c5527d0c0f22ad.tar.xz
linux-7e062cda7d90543ac8c7700fc7c5527d0c0f22ad.zip
Merge tag 'net-next-5.19' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from Jakub Kicinski: "Core ---- - Support TCPv6 segmentation offload with super-segments larger than 64k bytes using the IPv6 Jumbogram extension header (AKA BIG TCP). - Generalize skb freeing deferral to per-cpu lists, instead of per-socket lists. - Add a netdev statistic for packets dropped due to L2 address mismatch (rx_otherhost_dropped). - Continue work annotating skb drop reasons. - Accept alternative netdev names (ALT_IFNAME) in more netlink requests. - Add VLAN support for AF_PACKET SOCK_RAW GSO. - Allow receiving skb mark from the socket as a cmsg. - Enable memcg accounting for veth queues, sysctl tables and IPv6. BPF --- - Add libbpf support for User Statically-Defined Tracing (USDTs). - Speed up symbol resolution for kprobes multi-link attachments. - Support storing typed pointers to referenced and unreferenced objects in BPF maps. - Add support for BPF link iterator. - Introduce access to remote CPU map elements in BPF per-cpu map. - Allow middle-of-the-road settings for the kernel.unprivileged_bpf_disabled sysctl. - Implement basic types of dynamic pointers e.g. to allow for dynamically sized ringbuf reservations without extra memory copies. Protocols --------- - Retire port only listening_hash table, add a second bind table hashed by port and address. Avoid linear list walk when binding to very popular ports (e.g. 443). - Add bridge FDB bulk flush filtering support allowing user space to remove all FDB entries matching a condition. - Introduce accept_unsolicited_na sysctl for IPv6 to implement router-side changes for RFC9131. - Support for MPTCP path manager in user space. - Add MPTCP support for fallback to regular TCP for connections that have never connected additional subflows or transmitted out-of-sequence data (partial support for RFC8684 fallback). - Avoid races in MPTCP-level window tracking, stabilize and improve throughput. - Support lockless operation of GRE tunnels with seq numbers enabled. - WiFi support for host based BSS color collision detection. - Add support for SO_TXTIME/SCM_TXTIME on CAN sockets. - Support transmission w/o flow control in CAN ISOTP (ISO 15765-2). - Support zero-copy Tx with TLS 1.2 crypto offload (sendfile). - Allow matching on the number of VLAN tags via tc-flower. - Add tracepoint for tcp_set_ca_state(). Driver API ---------- - Improve error reporting from classifier and action offload. - Add support for listing line cards in switches (devlink). - Add helpers for reporting page pool statistics with ethtool -S. - Add support for reading clock cycles when using PTP virtual clocks, instead of having the driver convert to time before reporting. This makes it possible to report time from different vclocks. - Support configuring low-latency Tx descriptor push via ethtool. - Separate Clause 22 and Clause 45 MDIO accesses more explicitly. New hardware / drivers ---------------------- - Ethernet: - Marvell's Octeon NIC PCI Endpoint support (octeon_ep) - Sunplus SP7021 SoC (sp7021_emac) - Add support for Renesas RZ/V2M (in ravb) - Add support for MediaTek mt7986 switches (in mtk_eth_soc) - Ethernet PHYs: - ADIN1100 industrial PHYs (w/ 10BASE-T1L and SQI reporting) - TI DP83TD510 PHY - Microchip LAN8742/LAN88xx PHYs - WiFi: - Driver for pureLiFi X, XL, XC devices (plfxlc) - Driver for Silicon Labs devices (wfx) - Support for WCN6750 (in ath11k) - Support Realtek 8852ce devices (in rtw89) - Mobile: - MediaTek T700 modems (Intel 5G 5000 M.2 cards) - CAN: - ctucanfd: add support for CTU CAN FD open-source IP core from Czech Technical University in Prague Drivers ------- - Delete a number of old drivers still using virt_to_bus(). - Ethernet NICs: - intel: support TSO on tunnels MPLS - broadcom: support multi-buffer XDP - nfp: support VF rate limiting - sfc: use hardware tx timestamps for more than PTP - mlx5: multi-port eswitch support - hyper-v: add support for XDP_REDIRECT - atlantic: XDP support (including multi-buffer) - macb: improve real-time perf by deferring Tx processing to NAPI - High-speed Ethernet switches: - mlxsw: implement basic line card information querying - prestera: add support for traffic policing on ingress and egress - Embedded Ethernet switches: - lan966x: add support for packet DMA (FDMA) - lan966x: add support for PTP programmable pins - ti: cpsw_new: enable bc/mc storm prevention - Qualcomm 802.11ax WiFi (ath11k): - Wake-on-WLAN support for QCA6390 and WCN6855 - device recovery (firmware restart) support - support setting Specific Absorption Rate (SAR) for WCN6855 - read country code from SMBIOS for WCN6855/QCA6390 - enable keep-alive during WoWLAN suspend - implement remain-on-channel support - MediaTek WiFi (mt76): - support Wireless Ethernet Dispatch offloading packet movement between the Ethernet switch and WiFi interfaces - non-standard VHT MCS10-11 support - mt7921 AP mode support - mt7921 IPv6 NS offload support - Ethernet PHYs: - micrel: ksz9031/ksz9131: cabletest support - lan87xx: SQI support for T1 PHYs - lan937x: add interrupt support for link detection" * tag 'net-next-5.19' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (1809 commits) ptp: ocp: Add firmware header checks ptp: ocp: fix PPS source selector debugfs reporting ptp: ocp: add .init function for sma_op vector ptp: ocp: vectorize the sma accessor functions ptp: ocp: constify selectors ptp: ocp: parameterize input/output sma selectors ptp: ocp: revise firmware display ptp: ocp: add Celestica timecard PCI ids ptp: ocp: Remove #ifdefs around PCI IDs ptp: ocp: 32-bit fixups for pci start address Revert "net/smc: fix listen processing for SMC-Rv2" ath6kl: Use cc-disable-warning to disable -Wdangling-pointer selftests/bpf: Dynptr tests bpf: Add dynptr data slices bpf: Add bpf_dynptr_read and bpf_dynptr_write bpf: Dynptr support for ring buffers bpf: Add bpf_dynptr_from_mem for local dynptrs bpf: Add verifier support for dynptrs bpf: Suppress 'passing zero to PTR_ERR' warning bpf: Introduce bpf_arch_text_invalidate for bpf_prog_pack ...
Diffstat (limited to 'net/socket.c')
-rw-r--r--net/socket.c75
1 files changed, 61 insertions, 14 deletions
diff --git a/net/socket.c b/net/socket.c
index bb6a1a12fbde..2bc8773d9dc5 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -683,9 +683,18 @@ void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags)
{
u8 flags = *tx_flags;
- if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE)
+ if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE) {
flags |= SKBTX_HW_TSTAMP;
+ /* PTP hardware clocks can provide a free running cycle counter
+ * as a time base for virtual clocks. Tell driver to use the
+ * free running cycle counter for timestamp if socket is bound
+ * to virtual clock.
+ */
+ if (tsflags & SOF_TIMESTAMPING_BIND_PHC)
+ flags |= SKBTX_HW_TSTAMP_USE_CYCLES;
+ }
+
if (tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
flags |= SKBTX_SW_TSTAMP;
@@ -796,7 +805,28 @@ static bool skb_is_swtx_tstamp(const struct sk_buff *skb, int false_tstamp)
return skb->tstamp && !false_tstamp && skb_is_err_queue(skb);
}
-static void put_ts_pktinfo(struct msghdr *msg, struct sk_buff *skb)
+static ktime_t get_timestamp(struct sock *sk, struct sk_buff *skb, int *if_index)
+{
+ bool cycles = sk->sk_tsflags & SOF_TIMESTAMPING_BIND_PHC;
+ struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
+ struct net_device *orig_dev;
+ ktime_t hwtstamp;
+
+ rcu_read_lock();
+ orig_dev = dev_get_by_napi_id(skb_napi_id(skb));
+ if (orig_dev) {
+ *if_index = orig_dev->ifindex;
+ hwtstamp = netdev_get_tstamp(orig_dev, shhwtstamps, cycles);
+ } else {
+ hwtstamp = shhwtstamps->hwtstamp;
+ }
+ rcu_read_unlock();
+
+ return hwtstamp;
+}
+
+static void put_ts_pktinfo(struct msghdr *msg, struct sk_buff *skb,
+ int if_index)
{
struct scm_ts_pktinfo ts_pktinfo;
struct net_device *orig_dev;
@@ -806,11 +836,14 @@ static void put_ts_pktinfo(struct msghdr *msg, struct sk_buff *skb)
memset(&ts_pktinfo, 0, sizeof(ts_pktinfo));
- rcu_read_lock();
- orig_dev = dev_get_by_napi_id(skb_napi_id(skb));
- if (orig_dev)
- ts_pktinfo.if_index = orig_dev->ifindex;
- rcu_read_unlock();
+ if (!if_index) {
+ rcu_read_lock();
+ orig_dev = dev_get_by_napi_id(skb_napi_id(skb));
+ if (orig_dev)
+ if_index = orig_dev->ifindex;
+ rcu_read_unlock();
+ }
+ ts_pktinfo.if_index = if_index;
ts_pktinfo.pkt_length = skb->len - skb_mac_offset(skb);
put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_PKTINFO,
@@ -830,6 +863,7 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
int empty = 1, false_tstamp = 0;
struct skb_shared_hwtstamps *shhwtstamps =
skb_hwtstamps(skb);
+ int if_index;
ktime_t hwtstamp;
/* Race occurred between timestamp enabling and packet
@@ -878,18 +912,22 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
if (shhwtstamps &&
(sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
!skb_is_swtx_tstamp(skb, false_tstamp)) {
- if (sk->sk_tsflags & SOF_TIMESTAMPING_BIND_PHC)
- hwtstamp = ptp_convert_timestamp(shhwtstamps,
- sk->sk_bind_phc);
+ if_index = 0;
+ if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP_NETDEV)
+ hwtstamp = get_timestamp(sk, skb, &if_index);
else
hwtstamp = shhwtstamps->hwtstamp;
+ if (sk->sk_tsflags & SOF_TIMESTAMPING_BIND_PHC)
+ hwtstamp = ptp_convert_timestamp(&hwtstamp,
+ sk->sk_bind_phc);
+
if (ktime_to_timespec64_cond(hwtstamp, tss.ts + 2)) {
empty = 0;
if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
!skb_is_err_queue(skb))
- put_ts_pktinfo(msg, skb);
+ put_ts_pktinfo(msg, skb, if_index);
}
}
if (!empty) {
@@ -930,13 +968,22 @@ static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount);
}
-void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
- struct sk_buff *skb)
+static void sock_recv_mark(struct msghdr *msg, struct sock *sk,
+ struct sk_buff *skb)
+{
+ if (sock_flag(sk, SOCK_RCVMARK) && skb)
+ put_cmsg(msg, SOL_SOCKET, SO_MARK, sizeof(__u32),
+ &skb->mark);
+}
+
+void __sock_recv_cmsgs(struct msghdr *msg, struct sock *sk,
+ struct sk_buff *skb)
{
sock_recv_timestamp(msg, sk, skb);
sock_recv_drops(msg, sk, skb);
+ sock_recv_mark(msg, sk, skb);
}
-EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
+EXPORT_SYMBOL_GPL(__sock_recv_cmsgs);
INDIRECT_CALLABLE_DECLARE(int inet_recvmsg(struct socket *, struct msghdr *,
size_t, int));