summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/bluetooth/af_bluetooth.c2
-rw-r--r--net/bridge/br_stp_if.c11
-rw-r--r--net/bridge/netfilter/nft_reject_bridge.c5
-rw-r--r--net/caif/caif_socket.c2
-rw-r--r--net/core/datagram.c92
-rw-r--r--net/core/dev.c90
-rw-r--r--net/core/dev_ioctl.c1
-rw-r--r--net/core/net-sysfs.c8
-rw-r--r--net/core/rtnetlink.c4
-rw-r--r--net/core/skbuff.c30
-rw-r--r--net/core/sock.c4
-rw-r--r--net/dcb/dcbnl.c11
-rw-r--r--net/dccp/ccids/ccid2.c8
-rw-r--r--net/dccp/ccids/ccid2.h2
-rw-r--r--net/dsa/Kconfig8
-rw-r--r--net/dsa/Makefile8
-rw-r--r--net/dsa/dsa.c20
-rw-r--r--net/dsa/dsa2.c16
-rw-r--r--net/dsa/dsa_priv.h102
-rw-r--r--net/dsa/legacy.c12
-rw-r--r--net/dsa/port.c260
-rw-r--r--net/dsa/slave.c367
-rw-r--r--net/dsa/switch.c178
-rw-r--r--net/dsa/tag_brcm.c4
-rw-r--r--net/dsa/tag_dsa.c2
-rw-r--r--net/dsa/tag_edsa.c2
-rw-r--r--net/dsa/tag_lan9303.c2
-rw-r--r--net/dsa/tag_mtk.c2
-rw-r--r--net/dsa/tag_qca.c4
-rw-r--r--net/dsa/tag_trailer.c4
-rw-r--r--net/ipv4/fib_frontend.c18
-rw-r--r--net/ipv4/fib_lookup.h3
-rw-r--r--net/ipv4/fib_semantics.c133
-rw-r--r--net/ipv4/fib_trie.c4
-rw-r--r--net/ipv4/fou.c82
-rw-r--r--net/ipv4/inet_connection_sock.c1
-rw-r--r--net/ipv4/netfilter/nf_reject_ipv4.c2
-rw-r--r--net/ipv4/syncookies.c8
-rw-r--r--net/ipv4/tcp.c15
-rw-r--r--net/ipv4/tcp_bbr.c43
-rw-r--r--net/ipv4/tcp_bic.c6
-rw-r--r--net/ipv4/tcp_cubic.c14
-rw-r--r--net/ipv4/tcp_htcp.c2
-rw-r--r--net/ipv4/tcp_input.c139
-rw-r--r--net/ipv4/tcp_ipv4.c16
-rw-r--r--net/ipv4/tcp_lp.c17
-rw-r--r--net/ipv4/tcp_metrics.c2
-rw-r--r--net/ipv4/tcp_minisocks.c8
-rw-r--r--net/ipv4/tcp_nv.c5
-rw-r--r--net/ipv4/tcp_output.c131
-rw-r--r--net/ipv4/tcp_rate.c16
-rw-r--r--net/ipv4/tcp_recovery.c24
-rw-r--r--net/ipv4/tcp_timer.c21
-rw-r--r--net/ipv4/tcp_westwood.c6
-rw-r--r--net/ipv4/udp.c156
-rw-r--r--net/ipv6/addrconf.c4
-rw-r--r--net/ipv6/fou6.c14
-rw-r--r--net/ipv6/ila/ila_lwt.c2
-rw-r--r--net/ipv6/ip6_fib.c18
-rw-r--r--net/ipv6/netfilter/nf_reject_ipv6.c3
-rw-r--r--net/ipv6/route.c97
-rw-r--r--net/ipv6/seg6.c4
-rw-r--r--net/ipv6/syncookies.c2
-rw-r--r--net/ipv6/tcp_ipv6.c4
-rw-r--r--net/ipv6/udp.c3
-rw-r--r--net/kcm/kcmsock.c2
-rw-r--r--net/netfilter/nf_synproxy_core.c2
-rw-r--r--net/nfc/af_nfc.c2
-rw-r--r--net/openvswitch/datapath.c2
-rw-r--r--net/sched/act_api.c55
-rw-r--r--net/sched/act_csum.c1
-rw-r--r--net/sched/cls_api.c411
-rw-r--r--net/sched/sch_api.c50
-rw-r--r--net/sched/sch_atm.c29
-rw-r--r--net/sched/sch_cbq.c21
-rw-r--r--net/sched/sch_drr.c15
-rw-r--r--net/sched/sch_dsmark.c17
-rw-r--r--net/sched/sch_fq.c8
-rw-r--r--net/sched/sch_fq_codel.c17
-rw-r--r--net/sched/sch_hfsc.c21
-rw-r--r--net/sched/sch_htb.c28
-rw-r--r--net/sched/sch_ingress.c61
-rw-r--r--net/sched/sch_multiq.c16
-rw-r--r--net/sched/sch_prio.c19
-rw-r--r--net/sched/sch_qfq.c16
-rw-r--r--net/sched/sch_sfb.c17
-rw-r--r--net/sched/sch_sfq.c17
-rw-r--r--net/sctp/offload.c7
-rw-r--r--net/sctp/output.c1
-rw-r--r--net/socket.c49
90 files changed, 2102 insertions, 1066 deletions
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 42d0997e2fbb..8a8f77a247e6 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -733,7 +733,7 @@ void bt_procfs_cleanup(struct net *net, const char *name)
EXPORT_SYMBOL(bt_procfs_init);
EXPORT_SYMBOL(bt_procfs_cleanup);
-static struct net_proto_family bt_sock_family_ops = {
+static const struct net_proto_family bt_sock_family_ops = {
.owner = THIS_MODULE,
.family = PF_BLUETOOTH,
.create = bt_sock_create,
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index 0db8102995a5..4efd5d54498a 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -150,7 +150,6 @@ static int br_stp_call_user(struct net_bridge *br, char *arg)
static void br_stp_start(struct net_bridge *br)
{
- struct net_bridge_port *p;
int err = -ENOENT;
if (net_eq(dev_net(br->dev), &init_net))
@@ -169,11 +168,6 @@ static void br_stp_start(struct net_bridge *br)
if (!err) {
br->stp_enabled = BR_USER_STP;
br_debug(br, "userspace STP started\n");
-
- /* Stop hello and hold timers */
- del_timer(&br->hello_timer);
- list_for_each_entry(p, &br->port_list, list)
- del_timer(&p->hold_timer);
} else {
br->stp_enabled = BR_KERNEL_STP;
br_debug(br, "using kernel STP\n");
@@ -188,7 +182,6 @@ static void br_stp_start(struct net_bridge *br)
static void br_stp_stop(struct net_bridge *br)
{
- struct net_bridge_port *p;
int err;
if (br->stp_enabled == BR_USER_STP) {
@@ -197,10 +190,6 @@ static void br_stp_stop(struct net_bridge *br)
br_err(br, "failed to stop userspace STP (%d)\n", err);
/* To start timers on any ports left in blocking */
- mod_timer(&br->hello_timer, jiffies + br->hello_time);
- list_for_each_entry(p, &br->port_list, list)
- mod_timer(&p->hold_timer,
- round_jiffies(jiffies + BR_HOLD_TIME));
spin_lock_bh(&br->lock);
br_port_state_selection(br);
spin_unlock_bh(&br->lock);
diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c
index 346ef6b00b8f..c16dd3a47fc6 100644
--- a/net/bridge/netfilter/nft_reject_bridge.c
+++ b/net/bridge/netfilter/nft_reject_bridge.c
@@ -111,7 +111,7 @@ static void nft_reject_br_send_v4_unreach(struct net *net,
__wsum csum;
u8 proto;
- if (oldskb->csum_bad || !nft_bridge_iphdr_validate(oldskb))
+ if (!nft_bridge_iphdr_validate(oldskb))
return;
/* IP header checks: fragment. */
@@ -226,9 +226,6 @@ static bool reject6_br_csum_ok(struct sk_buff *skb, int hook)
__be16 fo;
u8 proto = ip6h->nexthdr;
- if (skb->csum_bad)
- return false;
-
if (skb_csum_unnecessary(skb))
return true;
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index adcad344c843..4674d17e7c08 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -1099,7 +1099,7 @@ static int caif_create(struct net *net, struct socket *sock, int protocol,
}
-static struct net_proto_family caif_family_ops = {
+static const struct net_proto_family caif_family_ops = {
.family = PF_CAIF,
.create = caif_create,
.owner = THIS_MODULE,
diff --git a/net/core/datagram.c b/net/core/datagram.c
index db1866f2ffcf..bc46118486fe 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -161,6 +161,45 @@ done:
return skb;
}
+struct sk_buff *__skb_try_recv_from_queue(struct sock *sk,
+ struct sk_buff_head *queue,
+ unsigned int flags,
+ void (*destructor)(struct sock *sk,
+ struct sk_buff *skb),
+ int *peeked, int *off, int *err,
+ struct sk_buff **last)
+{
+ struct sk_buff *skb;
+ int _off = *off;
+
+ *last = queue->prev;
+ skb_queue_walk(queue, skb) {
+ if (flags & MSG_PEEK) {
+ if (_off >= skb->len && (skb->len || _off ||
+ skb->peeked)) {
+ _off -= skb->len;
+ continue;
+ }
+ if (!skb->len) {
+ skb = skb_set_peeked(skb);
+ if (unlikely(IS_ERR(skb))) {
+ *err = PTR_ERR(skb);
+ return NULL;
+ }
+ }
+ *peeked = 1;
+ atomic_inc(&skb->users);
+ } else {
+ __skb_unlink(skb, queue);
+ if (destructor)
+ destructor(sk, skb);
+ }
+ *off = _off;
+ return skb;
+ }
+ return NULL;
+}
+
/**
* __skb_try_recv_datagram - Receive a datagram skbuff
* @sk: socket
@@ -222,40 +261,14 @@ struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags,
* Look at current nfs client by the way...
* However, this function was correct in any case. 8)
*/
- int _off = *off;
-
- *last = (struct sk_buff *)queue;
spin_lock_irqsave(&queue->lock, cpu_flags);
- skb_queue_walk(queue, skb) {
- *last = skb;
- if (flags & MSG_PEEK) {
- if (_off >= skb->len && (skb->len || _off ||
- skb->peeked)) {
- _off -= skb->len;
- continue;
- }
- if (!skb->len) {
- skb = skb_set_peeked(skb);
- if (IS_ERR(skb)) {
- error = PTR_ERR(skb);
- spin_unlock_irqrestore(&queue->lock,
- cpu_flags);
- goto no_packet;
- }
- }
- *peeked = 1;
- atomic_inc(&skb->users);
- } else {
- __skb_unlink(skb, queue);
- if (destructor)
- destructor(sk, skb);
- }
- spin_unlock_irqrestore(&queue->lock, cpu_flags);
- *off = _off;
- return skb;
- }
-
+ skb = __skb_try_recv_from_queue(sk, queue, flags, destructor,
+ peeked, off, &error, last);
spin_unlock_irqrestore(&queue->lock, cpu_flags);
+ if (error)
+ goto no_packet;
+ if (skb)
+ return skb;
if (!sk_can_busy_loop(sk))
break;
@@ -335,8 +348,8 @@ void __skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb, int len)
}
EXPORT_SYMBOL(__skb_free_datagram_locked);
-int __sk_queue_drop_skb(struct sock *sk, struct sk_buff *skb,
- unsigned int flags,
+int __sk_queue_drop_skb(struct sock *sk, struct sk_buff_head *sk_queue,
+ struct sk_buff *skb, unsigned int flags,
void (*destructor)(struct sock *sk,
struct sk_buff *skb))
{
@@ -344,15 +357,15 @@ int __sk_queue_drop_skb(struct sock *sk, struct sk_buff *skb,
if (flags & MSG_PEEK) {
err = -ENOENT;
- spin_lock_bh(&sk->sk_receive_queue.lock);
- if (skb == skb_peek(&sk->sk_receive_queue)) {
- __skb_unlink(skb, &sk->sk_receive_queue);
+ spin_lock_bh(&sk_queue->lock);
+ if (skb == skb_peek(sk_queue)) {
+ __skb_unlink(skb, sk_queue);
atomic_dec(&skb->users);
if (destructor)
destructor(sk, skb);
err = 0;
}
- spin_unlock_bh(&sk->sk_receive_queue.lock);
+ spin_unlock_bh(&sk_queue->lock);
}
atomic_inc(&sk->sk_drops);
@@ -383,7 +396,8 @@ EXPORT_SYMBOL(__sk_queue_drop_skb);
int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags)
{
- int err = __sk_queue_drop_skb(sk, skb, flags, NULL);
+ int err = __sk_queue_drop_skb(sk, &sk->sk_receive_queue, skb, flags,
+ NULL);
kfree_skb(skb);
sk_mem_reclaim_partial(sk);
diff --git a/net/core/dev.c b/net/core/dev.c
index fca407b4a6ea..3d98fbf4cbb0 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -105,6 +105,7 @@
#include <net/dst.h>
#include <net/dst_metadata.h>
#include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
#include <net/checksum.h>
#include <net/xfrm.h>
#include <linux/highmem.h>
@@ -142,6 +143,7 @@
#include <linux/hrtimer.h>
#include <linux/netfilter_ingress.h>
#include <linux/crash_dump.h>
+#include <linux/sctp.h>
#include "net-sysfs.h"
@@ -161,6 +163,7 @@ static int netif_rx_internal(struct sk_buff *skb);
static int call_netdevice_notifiers_info(unsigned long val,
struct net_device *dev,
struct netdev_notifier_info *info);
+static struct napi_struct *napi_by_id(unsigned int napi_id);
/*
* The @dev_base_head list is protected by @dev_base_lock and the rtnl
@@ -865,6 +868,31 @@ struct net_device *dev_get_by_index(struct net *net, int ifindex)
EXPORT_SYMBOL(dev_get_by_index);
/**
+ * dev_get_by_napi_id - find a device by napi_id
+ * @napi_id: ID of the NAPI struct
+ *
+ * Search for an interface by NAPI ID. Returns %NULL if the device
+ * is not found or a pointer to the device. The device has not had
+ * its reference counter increased so the caller must be careful
+ * about locking. The caller must hold RCU lock.
+ */
+
+struct net_device *dev_get_by_napi_id(unsigned int napi_id)
+{
+ struct napi_struct *napi;
+
+ WARN_ON_ONCE(!rcu_read_lock_held());
+
+ if (napi_id < MIN_NAPI_ID)
+ return NULL;
+
+ napi = napi_by_id(napi_id);
+
+ return napi ? napi->dev : NULL;
+}
+EXPORT_SYMBOL(dev_get_by_napi_id);
+
+/**
* netdev_get_name - get a netdevice name, knowing its ifindex.
* @net: network namespace
* @name: a pointer to the buffer where the name will be stored.
@@ -2611,6 +2639,47 @@ out:
}
EXPORT_SYMBOL(skb_checksum_help);
+int skb_crc32c_csum_help(struct sk_buff *skb)
+{
+ __le32 crc32c_csum;
+ int ret = 0, offset, start;
+
+ if (skb->ip_summed != CHECKSUM_PARTIAL)
+ goto out;
+
+ if (unlikely(skb_is_gso(skb)))
+ goto out;
+
+ /* Before computing a checksum, we should make sure no frag could
+ * be modified by an external entity : checksum could be wrong.
+ */
+ if (unlikely(skb_has_shared_frag(skb))) {
+ ret = __skb_linearize(skb);
+ if (ret)
+ goto out;
+ }
+ start = skb_checksum_start_offset(skb);
+ offset = start + offsetof(struct sctphdr, checksum);
+ if (WARN_ON_ONCE(offset >= skb_headlen(skb))) {
+ ret = -EINVAL;
+ goto out;
+ }
+ if (skb_cloned(skb) &&
+ !skb_clone_writable(skb, offset + sizeof(__le32))) {
+ ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
+ if (ret)
+ goto out;
+ }
+ crc32c_csum = cpu_to_le32(~__skb_checksum(skb, start,
+ skb->len - start, ~(__u32)0,
+ crc32c_csum_stub));
+ *(__le32 *)(skb->data + offset) = crc32c_csum;
+ skb->ip_summed = CHECKSUM_NONE;
+ skb->csum_not_inet = 0;
+out:
+ return ret;
+}
+
__be16 skb_network_protocol(struct sk_buff *skb, int *depth)
{
__be16 type = skb->protocol;
@@ -2953,6 +3022,17 @@ static struct sk_buff *validate_xmit_vlan(struct sk_buff *skb,
return skb;
}
+int skb_csum_hwoffload_help(struct sk_buff *skb,
+ const netdev_features_t features)
+{
+ if (unlikely(skb->csum_not_inet))
+ return !!(features & NETIF_F_SCTP_CRC) ? 0 :
+ skb_crc32c_csum_help(skb);
+
+ return !!(features & NETIF_F_CSUM_MASK) ? 0 : skb_checksum_help(skb);
+}
+EXPORT_SYMBOL(skb_csum_hwoffload_help);
+
static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev)
{
netdev_features_t features;
@@ -2991,8 +3071,7 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device
else
skb_set_transport_header(skb,
skb_checksum_start_offset(skb));
- if (!(features & NETIF_F_CSUM_MASK) &&
- skb_checksum_help(skb))
+ if (skb_csum_hwoffload_help(skb, features))
goto out_kfree_skb;
}
}
@@ -3178,7 +3257,7 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
/* qdisc_skb_cb(skb)->pkt_len was already set by the caller. */
qdisc_bstats_cpu_update(cl->q, skb);
- switch (tc_classify(skb, cl, &cl_res, false)) {
+ switch (tcf_classify(skb, cl, &cl_res, false)) {
case TC_ACT_OK:
case TC_ACT_RECLASSIFY:
skb->tc_index = TC_H_MIN(cl_res.classid);
@@ -3948,7 +4027,7 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
skb->tc_at_ingress = 1;
qdisc_bstats_cpu_update(cl->q, skb);
- switch (tc_classify(skb, cl, &cl_res, false)) {
+ switch (tcf_classify(skb, cl, &cl_res, false)) {
case TC_ACT_OK:
case TC_ACT_RECLASSIFY:
skb->tc_index = TC_H_MIN(cl_res.classid);
@@ -4636,9 +4715,6 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
if (netif_elide_gro(skb->dev))
goto normal;
- if (skb->csum_bad)
- goto normal;
-
gro_list_prepare(napi, skb);
rcu_read_lock();
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index b94b1d293506..77f04e71100f 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -225,6 +225,7 @@ static int net_hwtstamp_validate(struct ifreq *ifr)
case HWTSTAMP_FILTER_PTP_V2_EVENT:
case HWTSTAMP_FILTER_PTP_V2_SYNC:
case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+ case HWTSTAMP_FILTER_NTP_ALL:
rx_filter_valid = 1;
break;
}
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 65ea0ff4017c..58e6cc70500d 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -323,7 +323,11 @@ NETDEVICE_SHOW_RW(flags, fmt_hex);
static int change_tx_queue_len(struct net_device *dev, unsigned long new_len)
{
- int res, orig_len = dev->tx_queue_len;
+ unsigned int orig_len = dev->tx_queue_len;
+ int res;
+
+ if (new_len != (unsigned int)new_len)
+ return -ERANGE;
if (new_len != orig_len) {
dev->tx_queue_len = new_len;
@@ -349,7 +353,7 @@ static ssize_t tx_queue_len_store(struct device *dev,
return netdev_store(dev, attr, buf, len, change_tx_queue_len);
}
-NETDEVICE_SHOW_RW(tx_queue_len, fmt_ulong);
+NETDEVICE_SHOW_RW(tx_queue_len, fmt_dec);
static int change_gro_flush_timeout(struct net_device *dev, unsigned long val)
{
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 49a279a7cc15..dab2834f58f8 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2048,8 +2048,8 @@ static int do_setlink(const struct sk_buff *skb,
}
if (tb[IFLA_TXQLEN]) {
- unsigned long value = nla_get_u32(tb[IFLA_TXQLEN]);
- unsigned long orig_len = dev->tx_queue_len;
+ unsigned int value = nla_get_u32(tb[IFLA_TXQLEN]);
+ unsigned int orig_len = dev->tx_queue_len;
if (dev->tx_queue_len ^ value) {
dev->tx_queue_len = value;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 346d3e85dfbc..780b7c1563d0 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2243,6 +2243,32 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
}
EXPORT_SYMBOL(skb_copy_and_csum_bits);
+static __wsum warn_crc32c_csum_update(const void *buff, int len, __wsum sum)
+{
+ net_warn_ratelimited(
+ "%s: attempt to compute crc32c without libcrc32c.ko\n",
+ __func__);
+ return 0;
+}
+
+static __wsum warn_crc32c_csum_combine(__wsum csum, __wsum csum2,
+ int offset, int len)
+{
+ net_warn_ratelimited(
+ "%s: attempt to compute crc32c without libcrc32c.ko\n",
+ __func__);
+ return 0;
+}
+
+static const struct skb_checksum_ops default_crc32c_ops = {
+ .update = warn_crc32c_csum_update,
+ .combine = warn_crc32c_csum_combine,
+};
+
+const struct skb_checksum_ops *crc32c_csum_stub __read_mostly =
+ &default_crc32c_ops;
+EXPORT_SYMBOL(crc32c_csum_stub);
+
/**
* skb_zerocopy_headlen - Calculate headroom needed for skb_zerocopy()
* @from: source buffer
@@ -3875,6 +3901,10 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
if (!sk)
return;
+ if (!hwtstamps && !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TX_SWHW) &&
+ skb_shinfo(orig_skb)->tx_flags & SKBTX_IN_PROGRESS)
+ return;
+
tsonly = sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TSONLY;
if (!skb_may_tx_timestamp(sk, tsonly))
return;
diff --git a/net/core/sock.c b/net/core/sock.c
index 727f924b7f91..bef844127e01 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1038,6 +1038,10 @@ set_rcvbuf:
#endif
case SO_MAX_PACING_RATE:
+ if (val != ~0U)
+ cmpxchg(&sk->sk_pacing_status,
+ SK_PACING_NONE,
+ SK_PACING_NEEDED);
sk->sk_max_pacing_rate = val;
sk->sk_pacing_rate = min(sk->sk_pacing_rate,
sk->sk_max_pacing_rate);
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index 93106120f987..733f523707ac 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -178,10 +178,6 @@ static const struct nla_policy dcbnl_ieee_policy[DCB_ATTR_IEEE_MAX + 1] = {
[DCB_ATTR_IEEE_QCN_STATS] = {.len = sizeof(struct ieee_qcn_stats)},
};
-static const struct nla_policy dcbnl_ieee_app[DCB_ATTR_IEEE_APP_MAX + 1] = {
- [DCB_ATTR_IEEE_APP] = {.len = sizeof(struct dcb_app)},
-};
-
/* DCB number of traffic classes nested attributes. */
static const struct nla_policy dcbnl_featcfg_nest[DCB_FEATCFG_ATTR_MAX + 1] = {
[DCB_FEATCFG_ATTR_ALL] = {.type = NLA_FLAG},
@@ -1463,8 +1459,15 @@ static int dcbnl_ieee_set(struct net_device *netdev, struct nlmsghdr *nlh,
nla_for_each_nested(attr, ieee[DCB_ATTR_IEEE_APP_TABLE], rem) {
struct dcb_app *app_data;
+
if (nla_type(attr) != DCB_ATTR_IEEE_APP)
continue;
+
+ if (nla_len(attr) < sizeof(struct dcb_app)) {
+ err = -ERANGE;
+ goto err;
+ }
+
app_data = nla_data(attr);
if (ops->ieee_setapp)
err = ops->ieee_setapp(netdev, app_data);
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index 5e3a7302f774..e1295d5f2c56 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -233,7 +233,7 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len)
{
struct dccp_sock *dp = dccp_sk(sk);
struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
- const u32 now = ccid2_time_stamp;
+ const u32 now = ccid2_jiffies32;
struct ccid2_seq *next;
/* slow-start after idle periods (RFC 2581, RFC 2861) */
@@ -466,7 +466,7 @@ static void ccid2_new_ack(struct sock *sk, struct ccid2_seq *seqp,
* The cleanest solution is to not use the ccid2s_sent field at all
* and instead use DCCP timestamps: requires changes in other places.
*/
- ccid2_rtt_estimator(sk, ccid2_time_stamp - seqp->ccid2s_sent);
+ ccid2_rtt_estimator(sk, ccid2_jiffies32 - seqp->ccid2s_sent);
}
static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp)
@@ -478,7 +478,7 @@ static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp)
return;
}
- hc->tx_last_cong = ccid2_time_stamp;
+ hc->tx_last_cong = ccid2_jiffies32;
hc->tx_cwnd = hc->tx_cwnd / 2 ? : 1U;
hc->tx_ssthresh = max(hc->tx_cwnd, 2U);
@@ -731,7 +731,7 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
hc->tx_rto = DCCP_TIMEOUT_INIT;
hc->tx_rpdupack = -1;
- hc->tx_last_cong = hc->tx_lsndtime = hc->tx_cwnd_stamp = ccid2_time_stamp;
+ hc->tx_last_cong = hc->tx_lsndtime = hc->tx_cwnd_stamp = ccid2_jiffies32;
hc->tx_cwnd_used = 0;
setup_timer(&hc->tx_rtotimer, ccid2_hc_tx_rto_expire,
(unsigned long)sk);
diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h
index 18c97543e522..6e50ef2898fb 100644
--- a/net/dccp/ccids/ccid2.h
+++ b/net/dccp/ccids/ccid2.h
@@ -27,7 +27,7 @@
* CCID-2 timestamping faces the same issues as TCP timestamping.
* Hence we reuse/share as much of the code as possible.
*/
-#define ccid2_time_stamp tcp_time_stamp
+#define ccid2_jiffies32 ((u32)jiffies)
/* NUMDUPACK parameter from RFC 4341, p. 6 */
#define NUMDUPACK 3
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index 81a0868edb1d..297389b2ab35 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -25,16 +25,16 @@ config NET_DSA_TAG_DSA
config NET_DSA_TAG_EDSA
bool
-config NET_DSA_TAG_TRAILER
+config NET_DSA_TAG_LAN9303
bool
-config NET_DSA_TAG_QCA
+config NET_DSA_TAG_MTK
bool
-config NET_DSA_TAG_MTK
+config NET_DSA_TAG_TRAILER
bool
-config NET_DSA_TAG_LAN9303
+config NET_DSA_TAG_QCA
bool
endif
diff --git a/net/dsa/Makefile b/net/dsa/Makefile
index 0b747d75e65a..90e5aa6f7d0f 100644
--- a/net/dsa/Makefile
+++ b/net/dsa/Makefile
@@ -1,12 +1,12 @@
# the core
obj-$(CONFIG_NET_DSA) += dsa_core.o
-dsa_core-y += dsa.o slave.o dsa2.o switch.o legacy.o
+dsa_core-y += dsa.o dsa2.o legacy.o port.o slave.o switch.o
# tagging formats
dsa_core-$(CONFIG_NET_DSA_TAG_BRCM) += tag_brcm.o
dsa_core-$(CONFIG_NET_DSA_TAG_DSA) += tag_dsa.o
dsa_core-$(CONFIG_NET_DSA_TAG_EDSA) += tag_edsa.o
-dsa_core-$(CONFIG_NET_DSA_TAG_TRAILER) += tag_trailer.o
-dsa_core-$(CONFIG_NET_DSA_TAG_QCA) += tag_qca.o
-dsa_core-$(CONFIG_NET_DSA_TAG_MTK) += tag_mtk.o
dsa_core-$(CONFIG_NET_DSA_TAG_LAN9303) += tag_lan9303.o
+dsa_core-$(CONFIG_NET_DSA_TAG_MTK) += tag_mtk.o
+dsa_core-$(CONFIG_NET_DSA_TAG_QCA) += tag_qca.o
+dsa_core-$(CONFIG_NET_DSA_TAG_TRAILER) += tag_trailer.o
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 26130ae438da..3288a80d4d6c 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -24,7 +24,7 @@
#include <linux/phy_fixed.h>
#include <linux/gpio/consumer.h>
#include <linux/etherdevice.h>
-#include <net/dsa.h>
+
#include "dsa_priv.h"
static struct sk_buff *dsa_slave_notag_xmit(struct sk_buff *skb,
@@ -40,26 +40,26 @@ static const struct dsa_device_ops none_ops = {
};
const struct dsa_device_ops *dsa_device_ops[DSA_TAG_LAST] = {
+#ifdef CONFIG_NET_DSA_TAG_BRCM
+ [DSA_TAG_PROTO_BRCM] = &brcm_netdev_ops,
+#endif
#ifdef CONFIG_NET_DSA_TAG_DSA
[DSA_TAG_PROTO_DSA] = &dsa_netdev_ops,
#endif
#ifdef CONFIG_NET_DSA_TAG_EDSA
[DSA_TAG_PROTO_EDSA] = &edsa_netdev_ops,
#endif
-#ifdef CONFIG_NET_DSA_TAG_TRAILER
- [DSA_TAG_PROTO_TRAILER] = &trailer_netdev_ops,
+#ifdef CONFIG_NET_DSA_TAG_LAN9303
+ [DSA_TAG_PROTO_LAN9303] = &lan9303_netdev_ops,
#endif
-#ifdef CONFIG_NET_DSA_TAG_BRCM
- [DSA_TAG_PROTO_BRCM] = &brcm_netdev_ops,
+#ifdef CONFIG_NET_DSA_TAG_MTK
+ [DSA_TAG_PROTO_MTK] = &mtk_netdev_ops,
#endif
#ifdef CONFIG_NET_DSA_TAG_QCA
[DSA_TAG_PROTO_QCA] = &qca_netdev_ops,
#endif
-#ifdef CONFIG_NET_DSA_TAG_MTK
- [DSA_TAG_PROTO_MTK] = &mtk_netdev_ops,
-#endif
-#ifdef CONFIG_NET_DSA_TAG_LAN9303
- [DSA_TAG_PROTO_LAN9303] = &lan9303_netdev_ops,
+#ifdef CONFIG_NET_DSA_TAG_TRAILER
+ [DSA_TAG_PROTO_TRAILER] = &trailer_netdev_ops,
#endif
[DSA_TAG_PROTO_NONE] = &none_ops,
};
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index 033b3bfb63dc..4301f52e4f5a 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -18,7 +18,7 @@
#include <linux/rtnetlink.h>
#include <linux/of.h>
#include <linux/of_net.h>
-#include <net/dsa.h>
+
#include "dsa_priv.h"
static LIST_HEAD(dsa_switch_trees);
@@ -443,8 +443,8 @@ static int dsa_dst_apply(struct dsa_switch_tree *dst)
return err;
}
- if (dst->cpu_switch) {
- err = dsa_cpu_port_ethtool_setup(dst->cpu_switch);
+ if (dst->cpu_dp) {
+ err = dsa_cpu_port_ethtool_setup(dst->cpu_dp->ds);
if (err)
return err;
}
@@ -484,8 +484,8 @@ static void dsa_dst_unapply(struct dsa_switch_tree *dst)
dsa_ds_unapply(dst, ds);
}
- if (dst->cpu_switch)
- dsa_cpu_port_ethtool_restore(dst->cpu_switch);
+ if (dst->cpu_dp)
+ dsa_cpu_port_ethtool_restore(dst->cpu_dp->ds);
pr_info("DSA: tree %d unapplied\n", dst->tree);
dst->applied = false;
@@ -518,10 +518,8 @@ static int dsa_cpu_parse(struct dsa_port *port, u32 index,
if (!dst->master_netdev)
dst->master_netdev = ethernet_dev;
- if (!dst->cpu_switch) {
- dst->cpu_switch = ds;
- dst->cpu_port = index;
- }
+ if (!dst->cpu_dp)
+ dst->cpu_dp = port;
tag_protocol = ds->ops->get_tag_protocol(ds);
dst->tag_ops = dsa_resolve_tag_protocol(tag_protocol);
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index f4a88e485213..1d52f9051d0e 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -14,6 +14,57 @@
#include <linux/phy.h>
#include <linux/netdevice.h>
#include <linux/netpoll.h>
+#include <net/dsa.h>
+
+enum {
+ DSA_NOTIFIER_AGEING_TIME,
+ DSA_NOTIFIER_BRIDGE_JOIN,
+ DSA_NOTIFIER_BRIDGE_LEAVE,
+ DSA_NOTIFIER_FDB_ADD,
+ DSA_NOTIFIER_FDB_DEL,
+ DSA_NOTIFIER_MDB_ADD,
+ DSA_NOTIFIER_MDB_DEL,
+ DSA_NOTIFIER_VLAN_ADD,
+ DSA_NOTIFIER_VLAN_DEL,
+};
+
+/* DSA_NOTIFIER_AGEING_TIME */
+struct dsa_notifier_ageing_time_info {
+ struct switchdev_trans *trans;
+ unsigned int ageing_time;
+ int sw_index;
+};
+
+/* DSA_NOTIFIER_BRIDGE_* */
+struct dsa_notifier_bridge_info {
+ struct net_device *br;
+ int sw_index;
+ int port;
+};
+
+/* DSA_NOTIFIER_FDB_* */
+struct dsa_notifier_fdb_info {
+ const struct switchdev_obj_port_fdb *fdb;
+ struct switchdev_trans *trans;
+ int sw_index;
+ int port;
+};
+
+/* DSA_NOTIFIER_MDB_* */
+struct dsa_notifier_mdb_info {
+ const struct switchdev_obj_port_mdb *mdb;
+ struct switchdev_trans *trans;
+ int sw_index;
+ int port;
+};
+
+/* DSA_NOTIFIER_VLAN_* */
+struct dsa_notifier_vlan_info {
+ const struct switchdev_obj_port_vlan *vlan;
+ struct switchdev_trans *trans;
+ int sw_index;
+ int port;
+};
struct dsa_device_ops {
struct sk_buff *(*xmit)(struct sk_buff *skb, struct net_device *dev);
@@ -59,6 +110,39 @@ void dsa_cpu_port_ethtool_restore(struct dsa_switch *ds);
int dsa_legacy_register(void);
void dsa_legacy_unregister(void);
+/* port.c */
+int dsa_port_set_state(struct dsa_port *dp, u8 state,
+ struct switchdev_trans *trans);
+void dsa_port_set_state_now(struct dsa_port *dp, u8 state);
+int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br);
+void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br);
+int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering,
+ struct switchdev_trans *trans);
+int dsa_port_ageing_time(struct dsa_port *dp, clock_t ageing_clock,
+ struct switchdev_trans *trans);
+int dsa_port_fdb_add(struct dsa_port *dp,
+ const struct switchdev_obj_port_fdb *fdb,
+ struct switchdev_trans *trans);
+int dsa_port_fdb_del(struct dsa_port *dp,
+ const struct switchdev_obj_port_fdb *fdb);
+int dsa_port_fdb_dump(struct dsa_port *dp, struct switchdev_obj_port_fdb *fdb,
+ switchdev_obj_dump_cb_t *cb);
+int dsa_port_mdb_add(struct dsa_port *dp,
+ const struct switchdev_obj_port_mdb *mdb,
+ struct switchdev_trans *trans);
+int dsa_port_mdb_del(struct dsa_port *dp,
+ const struct switchdev_obj_port_mdb *mdb);
+int dsa_port_mdb_dump(struct dsa_port *dp, struct switchdev_obj_port_mdb *mdb,
+ switchdev_obj_dump_cb_t *cb);
+int dsa_port_vlan_add(struct dsa_port *dp,
+ const struct switchdev_obj_port_vlan *vlan,
+ struct switchdev_trans *trans);
+int dsa_port_vlan_del(struct dsa_port *dp,
+ const struct switchdev_obj_port_vlan *vlan);
+int dsa_port_vlan_dump(struct dsa_port *dp,
+ struct switchdev_obj_port_vlan *vlan,
+ switchdev_obj_dump_cb_t *cb);
+
/* slave.c */
extern const struct dsa_device_ops notag_netdev_ops;
void dsa_slave_mii_bus_init(struct dsa_switch *ds);
@@ -75,25 +159,25 @@ void dsa_slave_unregister_notifier(void);
int dsa_switch_register_notifier(struct dsa_switch *ds);
void dsa_switch_unregister_notifier(struct dsa_switch *ds);
+/* tag_brcm.c */
+extern const struct dsa_device_ops brcm_netdev_ops;
+
/* tag_dsa.c */
extern const struct dsa_device_ops dsa_netdev_ops;
/* tag_edsa.c */
extern const struct dsa_device_ops edsa_netdev_ops;
-/* tag_trailer.c */
-extern const struct dsa_device_ops trailer_netdev_ops;
+/* tag_lan9303.c */
+extern const struct dsa_device_ops lan9303_netdev_ops;
-/* tag_brcm.c */
-extern const struct dsa_device_ops brcm_netdev_ops;
+/* tag_mtk.c */
+extern const struct dsa_device_ops mtk_netdev_ops;
/* tag_qca.c */
extern const struct dsa_device_ops qca_netdev_ops;
-/* tag_mtk.c */
-extern const struct dsa_device_ops mtk_netdev_ops;
-
-/* tag_lan9303.c */
-extern const struct dsa_device_ops lan9303_netdev_ops;
+/* tag_trailer.c */
+extern const struct dsa_device_ops trailer_netdev_ops;
#endif
diff --git a/net/dsa/legacy.c b/net/dsa/legacy.c
index ad345c8b0b06..ac4379b8d7ac 100644
--- a/net/dsa/legacy.c
+++ b/net/dsa/legacy.c
@@ -22,7 +22,7 @@
#include <linux/sysfs.h>
#include <linux/phy_fixed.h>
#include <linux/etherdevice.h>
-#include <net/dsa.h>
+
#include "dsa_priv.h"
/* switch driver registration ***********************************************/
@@ -115,13 +115,12 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent)
continue;
if (!strcmp(name, "cpu")) {
- if (dst->cpu_switch) {
+ if (dst->cpu_dp) {
netdev_err(dst->master_netdev,
"multiple cpu ports?!\n");
return -EINVAL;
}
- dst->cpu_switch = ds;
- dst->cpu_port = i;
+ dst->cpu_dp = &ds->ports[i];
ds->cpu_port_mask |= 1 << i;
} else if (!strcmp(name, "dsa")) {
ds->dsa_port_mask |= 1 << i;
@@ -144,7 +143,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent)
* tagging protocol to the preferred tagging format of this
* switch.
*/
- if (dst->cpu_switch == ds) {
+ if (dst->cpu_dp->ds == ds) {
enum dsa_tag_protocol tag_protocol;
tag_protocol = ops->get_tag_protocol(ds);
@@ -624,7 +623,6 @@ static int dsa_setup_dst(struct dsa_switch_tree *dst, struct net_device *dev,
dst->pd = pd;
dst->master_netdev = dev;
- dst->cpu_port = -1;
for (i = 0; i < pd->nr_chips; i++) {
struct dsa_switch *ds;
@@ -735,7 +733,7 @@ static void dsa_remove_dst(struct dsa_switch_tree *dst)
dsa_switch_destroy(ds);
}
- dsa_cpu_port_ethtool_restore(dst->cpu_switch);
+ dsa_cpu_port_ethtool_restore(dst->cpu_dp->ds);
dev_put(dst->master_netdev);
}
diff --git a/net/dsa/port.c b/net/dsa/port.c
new file mode 100644
index 000000000000..c88c0cec8454
--- /dev/null
+++ b/net/dsa/port.c
@@ -0,0 +1,260 @@
+/*
+ * Handling of a single switch port
+ *
+ * Copyright (c) 2017 Savoir-faire Linux Inc.
+ * Vivien Didelot <vivien.didelot@savoirfairelinux.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/if_bridge.h>
+#include <linux/notifier.h>
+
+#include "dsa_priv.h"
+
+static int dsa_port_notify(struct dsa_port *dp, unsigned long e, void *v)
+{
+ struct raw_notifier_head *nh = &dp->ds->dst->nh;
+ int err;
+
+ err = raw_notifier_call_chain(nh, e, v);
+
+ return notifier_to_errno(err);
+}
+
+int dsa_port_set_state(struct dsa_port *dp, u8 state,
+ struct switchdev_trans *trans)
+{
+ struct dsa_switch *ds = dp->ds;
+ int port = dp->index;
+
+ if (switchdev_trans_ph_prepare(trans))
+ return ds->ops->port_stp_state_set ? 0 : -EOPNOTSUPP;
+
+ if (ds->ops->port_stp_state_set)
+ ds->ops->port_stp_state_set(ds, port, state);
+
+ if (ds->ops->port_fast_age) {
+ /* Fast age FDB entries or flush appropriate forwarding database
+ * for the given port, if we are moving it from Learning or
+ * Forwarding state, to Disabled or Blocking or Listening state.
+ */
+
+ if ((dp->stp_state == BR_STATE_LEARNING ||
+ dp->stp_state == BR_STATE_FORWARDING) &&
+ (state == BR_STATE_DISABLED ||
+ state == BR_STATE_BLOCKING ||
+ state == BR_STATE_LISTENING))
+ ds->ops->port_fast_age(ds, port);
+ }
+
+ dp->stp_state = state;
+
+ return 0;
+}
+
+void dsa_port_set_state_now(struct dsa_port *dp, u8 state)
+{
+ int err;
+
+ err = dsa_port_set_state(dp, state, NULL);
+ if (err)
+ pr_err("DSA: failed to set STP state %u (%d)\n", state, err);
+}
+
+int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br)
+{
+ struct dsa_notifier_bridge_info info = {
+ .sw_index = dp->ds->index,
+ .port = dp->index,
+ .br = br,
+ };
+ int err;
+
+ /* Here the port is already bridged. Reflect the current configuration
+ * so that drivers can program their chips accordingly.
+ */
+ dp->bridge_dev = br;
+
+ err = dsa_port_notify(dp, DSA_NOTIFIER_BRIDGE_JOIN, &info);
+
+ /* The bridging is rolled back on error */
+ if (err)
+ dp->bridge_dev = NULL;
+
+ return err;
+}
+
+void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br)
+{
+ struct dsa_notifier_bridge_info info = {
+ .sw_index = dp->ds->index,
+ .port = dp->index,
+ .br = br,
+ };
+ int err;
+
+ /* Here the port is already unbridged. Reflect the current configuration
+ * so that drivers can program their chips accordingly.
+ */
+ dp->bridge_dev = NULL;
+
+ err = dsa_port_notify(dp, DSA_NOTIFIER_BRIDGE_LEAVE, &info);
+ if (err)
+ pr_err("DSA: failed to notify DSA_NOTIFIER_BRIDGE_LEAVE\n");
+
+ /* Port left the bridge, put in BR_STATE_DISABLED by the bridge layer,
+ * so allow it to be in BR_STATE_FORWARDING to be kept functional
+ */
+ dsa_port_set_state_now(dp, BR_STATE_FORWARDING);
+}
+
+int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering,
+ struct switchdev_trans *trans)
+{
+ struct dsa_switch *ds = dp->ds;
+
+ /* bridge skips -EOPNOTSUPP, so skip the prepare phase */
+ if (switchdev_trans_ph_prepare(trans))
+ return 0;
+
+ if (ds->ops->port_vlan_filtering)
+ return ds->ops->port_vlan_filtering(ds, dp->index,
+ vlan_filtering);
+
+ return 0;
+}
+
+int dsa_port_ageing_time(struct dsa_port *dp, clock_t ageing_clock,
+ struct switchdev_trans *trans)
+{
+ unsigned long ageing_jiffies = clock_t_to_jiffies(ageing_clock);
+ unsigned int ageing_time = jiffies_to_msecs(ageing_jiffies);
+ struct dsa_notifier_ageing_time_info info = {
+ .ageing_time = ageing_time,
+ .sw_index = dp->ds->index,
+ .trans = trans,
+ };
+
+ if (switchdev_trans_ph_prepare(trans))
+ return dsa_port_notify(dp, DSA_NOTIFIER_AGEING_TIME, &info);
+
+ dp->ageing_time = ageing_time;
+
+ return dsa_port_notify(dp, DSA_NOTIFIER_AGEING_TIME, &info);
+}
+
+int dsa_port_fdb_add(struct dsa_port *dp,
+ const struct switchdev_obj_port_fdb *fdb,
+ struct switchdev_trans *trans)
+{
+ struct dsa_notifier_fdb_info info = {
+ .sw_index = dp->ds->index,
+ .port = dp->index,
+ .trans = trans,
+ .fdb = fdb,
+ };
+
+ return dsa_port_notify(dp, DSA_NOTIFIER_FDB_ADD, &info);
+}
+
+int dsa_port_fdb_del(struct dsa_port *dp,
+ const struct switchdev_obj_port_fdb *fdb)
+{
+ struct dsa_notifier_fdb_info info = {
+ .sw_index = dp->ds->index,
+ .port = dp->index,
+ .fdb = fdb,
+ };
+
+ return dsa_port_notify(dp, DSA_NOTIFIER_FDB_DEL, &info);
+}
+
+int dsa_port_fdb_dump(struct dsa_port *dp, struct switchdev_obj_port_fdb *fdb,
+ switchdev_obj_dump_cb_t *cb)
+{
+ struct dsa_switch *ds = dp->ds;
+
+ if (ds->ops->port_fdb_dump)
+ return ds->ops->port_fdb_dump(ds, dp->index, fdb, cb);
+
+ return -EOPNOTSUPP;
+}
+
+int dsa_port_mdb_add(struct dsa_port *dp,
+ const struct switchdev_obj_port_mdb *mdb,
+ struct switchdev_trans *trans)
+{
+ struct dsa_notifier_mdb_info info = {
+ .sw_index = dp->ds->index,
+ .port = dp->index,
+ .trans = trans,
+ .mdb = mdb,
+ };
+
+ return dsa_port_notify(dp, DSA_NOTIFIER_MDB_ADD, &info);
+}
+
+int dsa_port_mdb_del(struct dsa_port *dp,
+ const struct switchdev_obj_port_mdb *mdb)
+{
+ struct dsa_notifier_mdb_info info = {
+ .sw_index = dp->ds->index,
+ .port = dp->index,
+ .mdb = mdb,
+ };
+
+ return dsa_port_notify(dp, DSA_NOTIFIER_MDB_DEL, &info);
+}
+
+int dsa_port_mdb_dump(struct dsa_port *dp, struct switchdev_obj_port_mdb *mdb,
+ switchdev_obj_dump_cb_t *cb)
+{
+ struct dsa_switch *ds = dp->ds;
+
+ if (ds->ops->port_mdb_dump)
+ return ds->ops->port_mdb_dump(ds, dp->index, mdb, cb);
+
+ return -EOPNOTSUPP;
+}
+
+int dsa_port_vlan_add(struct dsa_port *dp,
+ const struct switchdev_obj_port_vlan *vlan,
+ struct switchdev_trans *trans)
+{
+ struct dsa_notifier_vlan_info info = {
+ .sw_index = dp->ds->index,
+ .port = dp->index,
+ .trans = trans,
+ .vlan = vlan,
+ };
+
+ return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_ADD, &info);
+}
+
+int dsa_port_vlan_del(struct dsa_port *dp,
+ const struct switchdev_obj_port_vlan *vlan)
+{
+ struct dsa_notifier_vlan_info info = {
+ .sw_index = dp->ds->index,
+ .port = dp->index,
+ .vlan = vlan,
+ };
+
+ return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_DEL, &info);
+}
+
+int dsa_port_vlan_dump(struct dsa_port *dp,
+ struct switchdev_obj_port_vlan *vlan,
+ switchdev_obj_dump_cb_t *cb)
+{
+ struct dsa_switch *ds = dp->ds;
+
+ if (ds->ops->port_vlan_dump)
+ return ds->ops->port_vlan_dump(ds, dp->index, vlan, cb);
+
+ return -EOPNOTSUPP;
+}
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 7693182df81e..887e26695519 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -17,28 +17,16 @@
#include <linux/of_mdio.h>
#include <linux/mdio.h>
#include <linux/list.h>
-#include <net/dsa.h>
#include <net/rtnetlink.h>
-#include <net/switchdev.h>
#include <net/pkt_cls.h>
#include <net/tc_act/tc_mirred.h>
#include <linux/if_bridge.h>
#include <linux/netpoll.h>
+
#include "dsa_priv.h"
static bool dsa_slave_dev_check(struct net_device *dev);
-static int dsa_slave_notify(struct net_device *dev, unsigned long e, void *v)
-{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct raw_notifier_head *nh = &p->dp->ds->dst->nh;
- int err;
-
- err = raw_notifier_call_chain(nh, e, v);
-
- return notifier_to_errno(err);
-}
-
/* slave mii_bus handling ***************************************************/
static int dsa_slave_phy_read(struct mii_bus *bus, int addr, int reg)
{
@@ -86,33 +74,6 @@ static inline bool dsa_port_is_bridged(struct dsa_port *dp)
return !!dp->bridge_dev;
}
-static void dsa_slave_set_state(struct net_device *dev, u8 state)
-{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_port *dp = p->dp;
- struct dsa_switch *ds = dp->ds;
- int port = dp->index;
-
- if (ds->ops->port_stp_state_set)
- ds->ops->port_stp_state_set(ds, port, state);
-
- if (ds->ops->port_fast_age) {
- /* Fast age FDB entries or flush appropriate forwarding database
- * for the given port, if we are moving it from Learning or
- * Forwarding state, to Disabled or Blocking or Listening state.
- */
-
- if ((dp->stp_state == BR_STATE_LEARNING ||
- dp->stp_state == BR_STATE_FORWARDING) &&
- (state == BR_STATE_DISABLED ||
- state == BR_STATE_BLOCKING ||
- state == BR_STATE_LISTENING))
- ds->ops->port_fast_age(ds, port);
- }
-
- dp->stp_state = state;
-}
-
static int dsa_slave_open(struct net_device *dev)
{
struct dsa_slave_priv *p = netdev_priv(dev);
@@ -148,7 +109,7 @@ static int dsa_slave_open(struct net_device *dev)
goto clear_promisc;
}
- dsa_slave_set_state(dev, stp_state);
+ dsa_port_set_state_now(p->dp, stp_state);
if (p->phy)
phy_start(p->phy);
@@ -190,7 +151,7 @@ static int dsa_slave_close(struct net_device *dev)
if (ds->ops->port_disable)
ds->ops->port_disable(ds, p->dp->index, p->phy);
- dsa_slave_set_state(dev, BR_STATE_DISABLED);
+ dsa_port_set_state_now(p->dp, BR_STATE_DISABLED);
return 0;
}
@@ -243,140 +204,6 @@ out:
return 0;
}
-static int dsa_slave_port_vlan_add(struct net_device *dev,
- const struct switchdev_obj_port_vlan *vlan,
- struct switchdev_trans *trans)
-{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_port *dp = p->dp;
- struct dsa_switch *ds = dp->ds;
-
- if (switchdev_trans_ph_prepare(trans)) {
- if (!ds->ops->port_vlan_prepare || !ds->ops->port_vlan_add)
- return -EOPNOTSUPP;
-
- return ds->ops->port_vlan_prepare(ds, dp->index, vlan, trans);
- }
-
- ds->ops->port_vlan_add(ds, dp->index, vlan, trans);
-
- return 0;
-}
-
-static int dsa_slave_port_vlan_del(struct net_device *dev,
- const struct switchdev_obj_port_vlan *vlan)
-{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_switch *ds = p->dp->ds;
-
- if (!ds->ops->port_vlan_del)
- return -EOPNOTSUPP;
-
- return ds->ops->port_vlan_del(ds, p->dp->index, vlan);
-}
-
-static int dsa_slave_port_vlan_dump(struct net_device *dev,
- struct switchdev_obj_port_vlan *vlan,
- switchdev_obj_dump_cb_t *cb)
-{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_switch *ds = p->dp->ds;
-
- if (ds->ops->port_vlan_dump)
- return ds->ops->port_vlan_dump(ds, p->dp->index, vlan, cb);
-
- return -EOPNOTSUPP;
-}
-
-static int dsa_slave_port_fdb_add(struct net_device *dev,
- const struct switchdev_obj_port_fdb *fdb,
- struct switchdev_trans *trans)
-{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_switch *ds = p->dp->ds;
-
- if (switchdev_trans_ph_prepare(trans)) {
- if (!ds->ops->port_fdb_prepare || !ds->ops->port_fdb_add)
- return -EOPNOTSUPP;
-
- return ds->ops->port_fdb_prepare(ds, p->dp->index, fdb, trans);
- }
-
- ds->ops->port_fdb_add(ds, p->dp->index, fdb, trans);
-
- return 0;
-}
-
-static int dsa_slave_port_fdb_del(struct net_device *dev,
- const struct switchdev_obj_port_fdb *fdb)
-{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_switch *ds = p->dp->ds;
- int ret = -EOPNOTSUPP;
-
- if (ds->ops->port_fdb_del)
- ret = ds->ops->port_fdb_del(ds, p->dp->index, fdb);
-
- return ret;
-}
-
-static int dsa_slave_port_fdb_dump(struct net_device *dev,
- struct switchdev_obj_port_fdb *fdb,
- switchdev_obj_dump_cb_t *cb)
-{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_switch *ds = p->dp->ds;
-
- if (ds->ops->port_fdb_dump)
- return ds->ops->port_fdb_dump(ds, p->dp->index, fdb, cb);
-
- return -EOPNOTSUPP;
-}
-
-static int dsa_slave_port_mdb_add(struct net_device *dev,
- const struct switchdev_obj_port_mdb *mdb,
- struct switchdev_trans *trans)
-{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_switch *ds = p->dp->ds;
-
- if (switchdev_trans_ph_prepare(trans)) {
- if (!ds->ops->port_mdb_prepare || !ds->ops->port_mdb_add)
- return -EOPNOTSUPP;
-
- return ds->ops->port_mdb_prepare(ds, p->dp->index, mdb, trans);
- }
-
- ds->ops->port_mdb_add(ds, p->dp->index, mdb, trans);
-
- return 0;
-}
-
-static int dsa_slave_port_mdb_del(struct net_device *dev,
- const struct switchdev_obj_port_mdb *mdb)
-{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_switch *ds = p->dp->ds;
-
- if (ds->ops->port_mdb_del)
- return ds->ops->port_mdb_del(ds, p->dp->index, mdb);
-
- return -EOPNOTSUPP;
-}
-
-static int dsa_slave_port_mdb_dump(struct net_device *dev,
- struct switchdev_obj_port_mdb *mdb,
- switchdev_obj_dump_cb_t *cb)
-{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_switch *ds = p->dp->ds;
-
- if (ds->ops->port_mdb_dump)
- return ds->ops->port_mdb_dump(ds, p->dp->index, mdb, cb);
-
- return -EOPNOTSUPP;
-}
-
static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
{
struct dsa_slave_priv *p = netdev_priv(dev);
@@ -387,96 +214,24 @@ static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
return -EOPNOTSUPP;
}
-static int dsa_slave_stp_state_set(struct net_device *dev,
- const struct switchdev_attr *attr,
- struct switchdev_trans *trans)
-{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_switch *ds = p->dp->ds;
-
- if (switchdev_trans_ph_prepare(trans))
- return ds->ops->port_stp_state_set ? 0 : -EOPNOTSUPP;
-
- dsa_slave_set_state(dev, attr->u.stp_state);
-
- return 0;
-}
-
-static int dsa_slave_vlan_filtering(struct net_device *dev,
- const struct switchdev_attr *attr,
- struct switchdev_trans *trans)
-{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_switch *ds = p->dp->ds;
-
- /* bridge skips -EOPNOTSUPP, so skip the prepare phase */
- if (switchdev_trans_ph_prepare(trans))
- return 0;
-
- if (ds->ops->port_vlan_filtering)
- return ds->ops->port_vlan_filtering(ds, p->dp->index,
- attr->u.vlan_filtering);
-
- return 0;
-}
-
-static unsigned int dsa_fastest_ageing_time(struct dsa_switch *ds,
- unsigned int ageing_time)
-{
- int i;
-
- for (i = 0; i < ds->num_ports; ++i) {
- struct dsa_port *dp = &ds->ports[i];
-
- if (dp && dp->ageing_time && dp->ageing_time < ageing_time)
- ageing_time = dp->ageing_time;
- }
-
- return ageing_time;
-}
-
-static int dsa_slave_ageing_time(struct net_device *dev,
- const struct switchdev_attr *attr,
- struct switchdev_trans *trans)
-{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_switch *ds = p->dp->ds;
- unsigned long ageing_jiffies = clock_t_to_jiffies(attr->u.ageing_time);
- unsigned int ageing_time = jiffies_to_msecs(ageing_jiffies);
-
- if (switchdev_trans_ph_prepare(trans)) {
- if (ds->ageing_time_min && ageing_time < ds->ageing_time_min)
- return -ERANGE;
- if (ds->ageing_time_max && ageing_time > ds->ageing_time_max)
- return -ERANGE;
- return 0;
- }
-
- /* Keep the fastest ageing time in case of multiple bridges */
- p->dp->ageing_time = ageing_time;
- ageing_time = dsa_fastest_ageing_time(ds, ageing_time);
-
- if (ds->ops->set_ageing_time)
- return ds->ops->set_ageing_time(ds, ageing_time);
-
- return 0;
-}
-
static int dsa_slave_port_attr_set(struct net_device *dev,
const struct switchdev_attr *attr,
struct switchdev_trans *trans)
{
+ struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_port *dp = p->dp;
int ret;
switch (attr->id) {
case SWITCHDEV_ATTR_ID_PORT_STP_STATE:
- ret = dsa_slave_stp_state_set(dev, attr, trans);
+ ret = dsa_port_set_state(dp, attr->u.stp_state, trans);
break;
case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING:
- ret = dsa_slave_vlan_filtering(dev, attr, trans);
+ ret = dsa_port_vlan_filtering(dp, attr->u.vlan_filtering,
+ trans);
break;
case SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME:
- ret = dsa_slave_ageing_time(dev, attr, trans);
+ ret = dsa_port_ageing_time(dp, attr->u.ageing_time, trans);
break;
default:
ret = -EOPNOTSUPP;
@@ -490,6 +245,8 @@ static int dsa_slave_port_obj_add(struct net_device *dev,
const struct switchdev_obj *obj,
struct switchdev_trans *trans)
{
+ struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_port *dp = p->dp;
int err;
/* For the prepare phase, ensure the full set of changes is feasable in
@@ -499,18 +256,14 @@ static int dsa_slave_port_obj_add(struct net_device *dev,
switch (obj->id) {
case SWITCHDEV_OBJ_ID_PORT_FDB:
- err = dsa_slave_port_fdb_add(dev,
- SWITCHDEV_OBJ_PORT_FDB(obj),
- trans);
+ err = dsa_port_fdb_add(dp, SWITCHDEV_OBJ_PORT_FDB(obj), trans);
break;
case SWITCHDEV_OBJ_ID_PORT_MDB:
- err = dsa_slave_port_mdb_add(dev, SWITCHDEV_OBJ_PORT_MDB(obj),
- trans);
+ err = dsa_port_mdb_add(dp, SWITCHDEV_OBJ_PORT_MDB(obj), trans);
break;
case SWITCHDEV_OBJ_ID_PORT_VLAN:
- err = dsa_slave_port_vlan_add(dev,
- SWITCHDEV_OBJ_PORT_VLAN(obj),
- trans);
+ err = dsa_port_vlan_add(dp, SWITCHDEV_OBJ_PORT_VLAN(obj),
+ trans);
break;
default:
err = -EOPNOTSUPP;
@@ -523,19 +276,19 @@ static int dsa_slave_port_obj_add(struct net_device *dev,
static int dsa_slave_port_obj_del(struct net_device *dev,
const struct switchdev_obj *obj)
{
+ struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_port *dp = p->dp;
int err;
switch (obj->id) {
case SWITCHDEV_OBJ_ID_PORT_FDB:
- err = dsa_slave_port_fdb_del(dev,
- SWITCHDEV_OBJ_PORT_FDB(obj));
+ err = dsa_port_fdb_del(dp, SWITCHDEV_OBJ_PORT_FDB(obj));
break;
case SWITCHDEV_OBJ_ID_PORT_MDB:
- err = dsa_slave_port_mdb_del(dev, SWITCHDEV_OBJ_PORT_MDB(obj));
+ err = dsa_port_mdb_del(dp, SWITCHDEV_OBJ_PORT_MDB(obj));
break;
case SWITCHDEV_OBJ_ID_PORT_VLAN:
- err = dsa_slave_port_vlan_del(dev,
- SWITCHDEV_OBJ_PORT_VLAN(obj));
+ err = dsa_port_vlan_del(dp, SWITCHDEV_OBJ_PORT_VLAN(obj));
break;
default:
err = -EOPNOTSUPP;
@@ -549,22 +302,19 @@ static int dsa_slave_port_obj_dump(struct net_device *dev,
struct switchdev_obj *obj,
switchdev_obj_dump_cb_t *cb)
{
+ struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_port *dp = p->dp;
int err;
switch (obj->id) {
case SWITCHDEV_OBJ_ID_PORT_FDB:
- err = dsa_slave_port_fdb_dump(dev,
- SWITCHDEV_OBJ_PORT_FDB(obj),
- cb);
+ err = dsa_port_fdb_dump(dp, SWITCHDEV_OBJ_PORT_FDB(obj), cb);
break;
case SWITCHDEV_OBJ_ID_PORT_MDB:
- err = dsa_slave_port_mdb_dump(dev, SWITCHDEV_OBJ_PORT_MDB(obj),
- cb);
+ err = dsa_port_mdb_dump(dp, SWITCHDEV_OBJ_PORT_MDB(obj), cb);
break;
case SWITCHDEV_OBJ_ID_PORT_VLAN:
- err = dsa_slave_port_vlan_dump(dev,
- SWITCHDEV_OBJ_PORT_VLAN(obj),
- cb);
+ err = dsa_port_vlan_dump(dp, SWITCHDEV_OBJ_PORT_VLAN(obj), cb);
break;
default:
err = -EOPNOTSUPP;
@@ -574,57 +324,6 @@ static int dsa_slave_port_obj_dump(struct net_device *dev,
return err;
}
-static int dsa_slave_bridge_port_join(struct net_device *dev,
- struct net_device *br)
-{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_notifier_bridge_info info = {
- .sw_index = p->dp->ds->index,
- .port = p->dp->index,
- .br = br,
- };
- int err;
-
- /* Here the port is already bridged. Reflect the current configuration
- * so that drivers can program their chips accordingly.
- */
- p->dp->bridge_dev = br;
-
- err = dsa_slave_notify(dev, DSA_NOTIFIER_BRIDGE_JOIN, &info);
-
- /* The bridging is rolled back on error */
- if (err)
- p->dp->bridge_dev = NULL;
-
- return err;
-}
-
-static void dsa_slave_bridge_port_leave(struct net_device *dev,
- struct net_device *br)
-{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_notifier_bridge_info info = {
- .sw_index = p->dp->ds->index,
- .port = p->dp->index,
- .br = br,
- };
- int err;
-
- /* Here the port is already unbridged. Reflect the current configuration
- * so that drivers can program their chips accordingly.
- */
- p->dp->bridge_dev = NULL;
-
- err = dsa_slave_notify(dev, DSA_NOTIFIER_BRIDGE_LEAVE, &info);
- if (err)
- netdev_err(dev, "failed to notify DSA_NOTIFIER_BRIDGE_LEAVE\n");
-
- /* Port left the bridge, put in BR_STATE_DISABLED by the bridge layer,
- * so allow it to be in BR_STATE_FORWARDING to be kept functional
- */
- dsa_slave_set_state(dev, BR_STATE_FORWARDING);
-}
-
static int dsa_slave_port_attr_get(struct net_device *dev,
struct switchdev_attr *attr)
{
@@ -821,8 +520,8 @@ static void dsa_cpu_port_get_ethtool_stats(struct net_device *dev,
uint64_t *data)
{
struct dsa_switch_tree *dst = dev->dsa_ptr;
- struct dsa_switch *ds = dst->cpu_switch;
- s8 cpu_port = dst->cpu_port;
+ struct dsa_switch *ds = dst->cpu_dp->ds;
+ s8 cpu_port = dst->cpu_dp->index;
int count = 0;
if (dst->master_ethtool_ops.get_sset_count) {
@@ -838,7 +537,7 @@ static void dsa_cpu_port_get_ethtool_stats(struct net_device *dev,
static int dsa_cpu_port_get_sset_count(struct net_device *dev, int sset)
{
struct dsa_switch_tree *dst = dev->dsa_ptr;
- struct dsa_switch *ds = dst->cpu_switch;
+ struct dsa_switch *ds = dst->cpu_dp->ds;
int count = 0;
if (dst->master_ethtool_ops.get_sset_count)
@@ -854,8 +553,8 @@ static void dsa_cpu_port_get_strings(struct net_device *dev,
uint32_t stringset, uint8_t *data)
{
struct dsa_switch_tree *dst = dev->dsa_ptr;
- struct dsa_switch *ds = dst->cpu_switch;
- s8 cpu_port = dst->cpu_port;
+ struct dsa_switch *ds = dst->cpu_dp->ds;
+ s8 cpu_port = dst->cpu_dp->index;
int len = ETH_GSTRING_LEN;
int mcount = 0, count;
unsigned int i;
@@ -1528,14 +1227,16 @@ static bool dsa_slave_dev_check(struct net_device *dev)
static int dsa_slave_changeupper(struct net_device *dev,
struct netdev_notifier_changeupper_info *info)
{
+ struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_port *dp = p->dp;
int err = NOTIFY_DONE;
if (netif_is_bridge_master(info->upper_dev)) {
if (info->linking) {
- err = dsa_slave_bridge_port_join(dev, info->upper_dev);
+ err = dsa_port_bridge_join(dp, info->upper_dev);
err = notifier_from_errno(err);
} else {
- dsa_slave_bridge_port_leave(dev, info->upper_dev);
+ dsa_port_bridge_leave(dp, info->upper_dev);
err = NOTIFY_OK;
}
}
diff --git a/net/dsa/switch.c b/net/dsa/switch.c
index ca6e26e514f0..c1e4b2d5a3ae 100644
--- a/net/dsa/switch.c
+++ b/net/dsa/switch.c
@@ -12,7 +12,51 @@
#include <linux/netdevice.h>
#include <linux/notifier.h>
-#include <net/dsa.h>
+#include <net/switchdev.h>
+
+#include "dsa_priv.h"
+
+static unsigned int dsa_switch_fastest_ageing_time(struct dsa_switch *ds,
+ unsigned int ageing_time)
+{
+ int i;
+
+ for (i = 0; i < ds->num_ports; ++i) {
+ struct dsa_port *dp = &ds->ports[i];
+
+ if (dp->ageing_time && dp->ageing_time < ageing_time)
+ ageing_time = dp->ageing_time;
+ }
+
+ return ageing_time;
+}
+
+static int dsa_switch_ageing_time(struct dsa_switch *ds,
+ struct dsa_notifier_ageing_time_info *info)
+{
+ unsigned int ageing_time = info->ageing_time;
+ struct switchdev_trans *trans = info->trans;
+
+ /* Do not care yet about other switch chips of the fabric */
+ if (ds->index != info->sw_index)
+ return 0;
+
+ if (switchdev_trans_ph_prepare(trans)) {
+ if (ds->ageing_time_min && ageing_time < ds->ageing_time_min)
+ return -ERANGE;
+ if (ds->ageing_time_max && ageing_time > ds->ageing_time_max)
+ return -ERANGE;
+ return 0;
+ }
+
+ /* Program the fastest ageing time in case of multiple bridges */
+ ageing_time = dsa_switch_fastest_ageing_time(ds, ageing_time);
+
+ if (ds->ops->set_ageing_time)
+ return ds->ops->set_ageing_time(ds, ageing_time);
+
+ return 0;
+}
static int dsa_switch_bridge_join(struct dsa_switch *ds,
struct dsa_notifier_bridge_info *info)
@@ -40,6 +84,117 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds,
return 0;
}
+static int dsa_switch_fdb_add(struct dsa_switch *ds,
+ struct dsa_notifier_fdb_info *info)
+{
+ const struct switchdev_obj_port_fdb *fdb = info->fdb;
+ struct switchdev_trans *trans = info->trans;
+
+ /* Do not care yet about other switch chips of the fabric */
+ if (ds->index != info->sw_index)
+ return 0;
+
+ if (switchdev_trans_ph_prepare(trans)) {
+ if (!ds->ops->port_fdb_prepare || !ds->ops->port_fdb_add)
+ return -EOPNOTSUPP;
+
+ return ds->ops->port_fdb_prepare(ds, info->port, fdb, trans);
+ }
+
+ ds->ops->port_fdb_add(ds, info->port, fdb, trans);
+
+ return 0;
+}
+
+static int dsa_switch_fdb_del(struct dsa_switch *ds,
+ struct dsa_notifier_fdb_info *info)
+{
+ const struct switchdev_obj_port_fdb *fdb = info->fdb;
+
+ /* Do not care yet about other switch chips of the fabric */
+ if (ds->index != info->sw_index)
+ return 0;
+
+ if (!ds->ops->port_fdb_del)
+ return -EOPNOTSUPP;
+
+ return ds->ops->port_fdb_del(ds, info->port, fdb);
+}
+
+static int dsa_switch_mdb_add(struct dsa_switch *ds,
+ struct dsa_notifier_mdb_info *info)
+{
+ const struct switchdev_obj_port_mdb *mdb = info->mdb;
+ struct switchdev_trans *trans = info->trans;
+
+ /* Do not care yet about other switch chips of the fabric */
+ if (ds->index != info->sw_index)
+ return 0;
+
+ if (switchdev_trans_ph_prepare(trans)) {
+ if (!ds->ops->port_mdb_prepare || !ds->ops->port_mdb_add)
+ return -EOPNOTSUPP;
+
+ return ds->ops->port_mdb_prepare(ds, info->port, mdb, trans);
+ }
+
+ ds->ops->port_mdb_add(ds, info->port, mdb, trans);
+
+ return 0;
+}
+
+static int dsa_switch_mdb_del(struct dsa_switch *ds,
+ struct dsa_notifier_mdb_info *info)
+{
+ const struct switchdev_obj_port_mdb *mdb = info->mdb;
+
+ /* Do not care yet about other switch chips of the fabric */
+ if (ds->index != info->sw_index)
+ return 0;
+
+ if (!ds->ops->port_mdb_del)
+ return -EOPNOTSUPP;
+
+ return ds->ops->port_mdb_del(ds, info->port, mdb);
+}
+
+static int dsa_switch_vlan_add(struct dsa_switch *ds,
+ struct dsa_notifier_vlan_info *info)
+{
+ const struct switchdev_obj_port_vlan *vlan = info->vlan;
+ struct switchdev_trans *trans = info->trans;
+
+ /* Do not care yet about other switch chips of the fabric */
+ if (ds->index != info->sw_index)
+ return 0;
+
+ if (switchdev_trans_ph_prepare(trans)) {
+ if (!ds->ops->port_vlan_prepare || !ds->ops->port_vlan_add)
+ return -EOPNOTSUPP;
+
+ return ds->ops->port_vlan_prepare(ds, info->port, vlan, trans);
+ }
+
+ ds->ops->port_vlan_add(ds, info->port, vlan, trans);
+
+ return 0;
+}
+
+static int dsa_switch_vlan_del(struct dsa_switch *ds,
+ struct dsa_notifier_vlan_info *info)
+{
+ const struct switchdev_obj_port_vlan *vlan = info->vlan;
+
+ /* Do not care yet about other switch chips of the fabric */
+ if (ds->index != info->sw_index)
+ return 0;
+
+ if (!ds->ops->port_vlan_del)
+ return -EOPNOTSUPP;
+
+ return ds->ops->port_vlan_del(ds, info->port, vlan);
+}
+
static int dsa_switch_event(struct notifier_block *nb,
unsigned long event, void *info)
{
@@ -47,12 +202,33 @@ static int dsa_switch_event(struct notifier_block *nb,
int err;
switch (event) {
+ case DSA_NOTIFIER_AGEING_TIME:
+ err = dsa_switch_ageing_time(ds, info);
+ break;
case DSA_NOTIFIER_BRIDGE_JOIN:
err = dsa_switch_bridge_join(ds, info);
break;
case DSA_NOTIFIER_BRIDGE_LEAVE:
err = dsa_switch_bridge_leave(ds, info);
break;
+ case DSA_NOTIFIER_FDB_ADD:
+ err = dsa_switch_fdb_add(ds, info);
+ break;
+ case DSA_NOTIFIER_FDB_DEL:
+ err = dsa_switch_fdb_del(ds, info);
+ break;
+ case DSA_NOTIFIER_MDB_ADD:
+ err = dsa_switch_mdb_add(ds, info);
+ break;
+ case DSA_NOTIFIER_MDB_DEL:
+ err = dsa_switch_mdb_del(ds, info);
+ break;
+ case DSA_NOTIFIER_VLAN_ADD:
+ err = dsa_switch_vlan_add(ds, info);
+ break;
+ case DSA_NOTIFIER_VLAN_DEL:
+ err = dsa_switch_vlan_del(ds, info);
+ break;
default:
err = -EOPNOTSUPP;
break;
diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c
index 2a9b52c5af86..9f204f18ada3 100644
--- a/net/dsa/tag_brcm.c
+++ b/net/dsa/tag_brcm.c
@@ -12,7 +12,7 @@
#include <linux/etherdevice.h>
#include <linux/list.h>
#include <linux/slab.h>
-#include <net/dsa.h>
+
#include "dsa_priv.h"
/* This tag length is 4 bytes, older ones were 6 bytes, we do not
@@ -101,7 +101,7 @@ static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev,
int source_port;
u8 *brcm_tag;
- ds = dst->cpu_switch;
+ ds = dst->cpu_dp->ds;
if (unlikely(!pskb_may_pull(skb, BRCM_TAG_LEN)))
goto out_drop;
diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c
index 1c6633f0de01..3b62a57956a3 100644
--- a/net/dsa/tag_dsa.c
+++ b/net/dsa/tag_dsa.c
@@ -11,7 +11,7 @@
#include <linux/etherdevice.h>
#include <linux/list.h>
#include <linux/slab.h>
-#include <net/dsa.h>
+
#include "dsa_priv.h"
#define DSA_HLEN 4
diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c
index d9c668aa5e54..f95cafd05702 100644
--- a/net/dsa/tag_edsa.c
+++ b/net/dsa/tag_edsa.c
@@ -11,7 +11,7 @@
#include <linux/etherdevice.h>
#include <linux/list.h>
#include <linux/slab.h>
-#include <net/dsa.h>
+
#include "dsa_priv.h"
#define DSA_HLEN 4
diff --git a/net/dsa/tag_lan9303.c b/net/dsa/tag_lan9303.c
index 70130ed5c21a..afd59330b5f1 100644
--- a/net/dsa/tag_lan9303.c
+++ b/net/dsa/tag_lan9303.c
@@ -14,7 +14,7 @@
#include <linux/etherdevice.h>
#include <linux/list.h>
#include <linux/slab.h>
-#include <net/dsa.h>
+
#include "dsa_priv.h"
/* To define the outgoing port and to discover the incoming port a regular
diff --git a/net/dsa/tag_mtk.c b/net/dsa/tag_mtk.c
index 837cdddb53f0..d1258e84cd71 100644
--- a/net/dsa/tag_mtk.c
+++ b/net/dsa/tag_mtk.c
@@ -13,7 +13,7 @@
*/
#include <linux/etherdevice.h>
-#include <net/dsa.h>
+
#include "dsa_priv.h"
#define MTK_HDR_LEN 4
diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c
index 3ba3f59f7a34..2451007699b7 100644
--- a/net/dsa/tag_qca.c
+++ b/net/dsa/tag_qca.c
@@ -12,7 +12,7 @@
*/
#include <linux/etherdevice.h>
-#include <net/dsa.h>
+
#include "dsa_priv.h"
#define QCA_HDR_LEN 2
@@ -99,7 +99,7 @@ static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev,
/* This protocol doesn't support cascading multiple switches so it's
* safe to assume the switch is first in the tree
*/
- ds = dst->cpu_switch;
+ ds = dst->cpu_dp->ds;
if (!ds)
goto out_drop;
diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c
index aafc2fc74c30..7488ab2932ab 100644
--- a/net/dsa/tag_trailer.c
+++ b/net/dsa/tag_trailer.c
@@ -11,7 +11,7 @@
#include <linux/etherdevice.h>
#include <linux/list.h>
#include <linux/slab.h>
-#include <net/dsa.h>
+
#include "dsa_priv.h"
static struct sk_buff *trailer_xmit(struct sk_buff *skb, struct net_device *dev)
@@ -67,7 +67,7 @@ static struct sk_buff *trailer_rcv(struct sk_buff *skb, struct net_device *dev,
u8 *trailer;
int source_port;
- ds = dst->cpu_switch;
+ ds = dst->cpu_dp->ds;
if (skb_linearize(skb))
goto out_drop;
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 83e3ed258467..14d2f7bd7c76 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -594,7 +594,8 @@ int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
} else {
tb = fib_new_table(net, cfg.fc_table);
if (tb)
- err = fib_table_insert(net, tb, &cfg);
+ err = fib_table_insert(net, tb,
+ &cfg, NULL);
else
err = -ENOBUFS;
}
@@ -626,14 +627,15 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = {
};
static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
- struct nlmsghdr *nlh, struct fib_config *cfg)
+ struct nlmsghdr *nlh, struct fib_config *cfg,
+ struct netlink_ext_ack *extack)
{
struct nlattr *attr;
int err, remaining;
struct rtmsg *rtm;
err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy,
- NULL);
+ extack);
if (err < 0)
goto errout;
@@ -654,6 +656,7 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
cfg->fc_nlinfo.nl_net = net;
if (cfg->fc_type > RTN_MAX) {
+ NL_SET_ERR_MSG(extack, "Invalid route type");
err = -EINVAL;
goto errout;
}
@@ -718,12 +721,13 @@ static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
struct fib_table *tb;
int err;
- err = rtm_to_fib_config(net, skb, nlh, &cfg);
+ err = rtm_to_fib_config(net, skb, nlh, &cfg, extack);
if (err < 0)
goto errout;
tb = fib_get_table(net, cfg.fc_table);
if (!tb) {
+ NL_SET_ERR_MSG(extack, "FIB table does not exist");
err = -ESRCH;
goto errout;
}
@@ -741,7 +745,7 @@ static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
struct fib_table *tb;
int err;
- err = rtm_to_fib_config(net, skb, nlh, &cfg);
+ err = rtm_to_fib_config(net, skb, nlh, &cfg, extack);
if (err < 0)
goto errout;
@@ -751,7 +755,7 @@ static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
goto errout;
}
- err = fib_table_insert(net, tb, &cfg);
+ err = fib_table_insert(net, tb, &cfg, extack);
errout:
return err;
}
@@ -845,7 +849,7 @@ static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifad
cfg.fc_scope = RT_SCOPE_HOST;
if (cmd == RTM_NEWROUTE)
- fib_table_insert(net, tb, &cfg);
+ fib_table_insert(net, tb, &cfg, NULL);
else
fib_table_delete(net, tb, &cfg);
}
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index 9c02920725db..2704e08545da 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -28,7 +28,8 @@ static inline void fib_alias_accessed(struct fib_alias *fa)
/* Exported by fib_semantics.c */
void fib_release_info(struct fib_info *);
-struct fib_info *fib_create_info(struct fib_config *cfg);
+struct fib_info *fib_create_info(struct fib_config *cfg,
+ struct netlink_ext_ack *extack);
int fib_nh_match(struct fib_config *cfg, struct fib_info *fi);
int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, u32 tb_id,
u8 type, __be32 dst, int dst_len, u8 tos, struct fib_info *fi,
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index da449ddb8cc1..4852e183afe0 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -32,6 +32,7 @@
#include <linux/skbuff.h>
#include <linux/init.h>
#include <linux/slab.h>
+#include <linux/netlink.h>
#include <net/arp.h>
#include <net/ip.h>
@@ -454,7 +455,8 @@ static int fib_detect_death(struct fib_info *fi, int order,
#ifdef CONFIG_IP_ROUTE_MULTIPATH
-static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining)
+static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining,
+ struct netlink_ext_ack *extack)
{
int nhs = 0;
@@ -464,22 +466,35 @@ static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining)
}
/* leftover implies invalid nexthop configuration, discard it */
- return remaining > 0 ? 0 : nhs;
+ if (remaining > 0) {
+ NL_SET_ERR_MSG(extack,
+ "Invalid nexthop configuration - extra data after nexthops");
+ nhs = 0;
+ }
+
+ return nhs;
}
static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
- int remaining, struct fib_config *cfg)
+ int remaining, struct fib_config *cfg,
+ struct netlink_ext_ack *extack)
{
int ret;
change_nexthops(fi) {
int attrlen;
- if (!rtnh_ok(rtnh, remaining))
+ if (!rtnh_ok(rtnh, remaining)) {
+ NL_SET_ERR_MSG(extack,
+ "Invalid nexthop configuration - extra data after nexthop");
return -EINVAL;
+ }
- if (rtnh->rtnh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN))
+ if (rtnh->rtnh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) {
+ NL_SET_ERR_MSG(extack,
+ "Invalid flags for nexthop - can not contain DEAD or LINKDOWN");
return -EINVAL;
+ }
nexthop_nh->nh_flags =
(cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags;
@@ -505,8 +520,12 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
nla_entype = nla_find(attrs, attrlen,
RTA_ENCAP_TYPE);
- if (!nla_entype)
+ if (!nla_entype) {
+ NL_SET_BAD_ATTR(extack, nla);
+ NL_SET_ERR_MSG(extack,
+ "Encap type is missing");
goto err_inval;
+ }
ret = lwtunnel_build_state(nla_get_u16(
nla_entype),
@@ -714,7 +733,7 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi)
* |-> {local prefix} (terminal node)
*/
static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
- struct fib_nh *nh)
+ struct fib_nh *nh, struct netlink_ext_ack *extack)
{
int err = 0;
struct net *net;
@@ -727,16 +746,25 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
if (nh->nh_flags & RTNH_F_ONLINK) {
unsigned int addr_type;
- if (cfg->fc_scope >= RT_SCOPE_LINK)
+ if (cfg->fc_scope >= RT_SCOPE_LINK) {
+ NL_SET_ERR_MSG(extack,
+ "Nexthop has invalid scope");
return -EINVAL;
+ }
dev = __dev_get_by_index(net, nh->nh_oif);
if (!dev)
return -ENODEV;
- if (!(dev->flags & IFF_UP))
+ if (!(dev->flags & IFF_UP)) {
+ NL_SET_ERR_MSG(extack,
+ "Nexthop device is not up");
return -ENETDOWN;
+ }
addr_type = inet_addr_type_dev_table(net, dev, nh->nh_gw);
- if (addr_type != RTN_UNICAST)
+ if (addr_type != RTN_UNICAST) {
+ NL_SET_ERR_MSG(extack,
+ "Nexthop has invalid gateway");
return -EINVAL;
+ }
if (!netif_carrier_ok(dev))
nh->nh_flags |= RTNH_F_LINKDOWN;
nh->nh_dev = dev;
@@ -776,18 +804,25 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
}
if (err) {
+ NL_SET_ERR_MSG(extack,
+ "Nexthop has invalid gateway");
rcu_read_unlock();
return err;
}
}
err = -EINVAL;
- if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
+ if (res.type != RTN_UNICAST && res.type != RTN_LOCAL) {
+ NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway");
goto out;
+ }
nh->nh_scope = res.scope;
nh->nh_oif = FIB_RES_OIF(res);
nh->nh_dev = dev = FIB_RES_DEV(res);
- if (!dev)
+ if (!dev) {
+ NL_SET_ERR_MSG(extack,
+ "No egress device for nexthop gateway");
goto out;
+ }
dev_hold(dev);
if (!netif_carrier_ok(dev))
nh->nh_flags |= RTNH_F_LINKDOWN;
@@ -795,17 +830,21 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
} else {
struct in_device *in_dev;
- if (nh->nh_flags & (RTNH_F_PERVASIVE | RTNH_F_ONLINK))
+ if (nh->nh_flags & (RTNH_F_PERVASIVE | RTNH_F_ONLINK)) {
+ NL_SET_ERR_MSG(extack,
+ "Invalid flags for nexthop - PERVASIVE and ONLINK can not be set");
return -EINVAL;
-
+ }
rcu_read_lock();
err = -ENODEV;
in_dev = inetdev_by_index(net, nh->nh_oif);
if (!in_dev)
goto out;
err = -ENETDOWN;
- if (!(in_dev->dev->flags & IFF_UP))
+ if (!(in_dev->dev->flags & IFF_UP)) {
+ NL_SET_ERR_MSG(extack, "Device for nexthop is not up");
goto out;
+ }
nh->nh_dev = in_dev->dev;
dev_hold(nh->nh_dev);
nh->nh_scope = RT_SCOPE_HOST;
@@ -980,7 +1019,8 @@ fib_convert_metrics(struct fib_info *fi, const struct fib_config *cfg)
return 0;
}
-struct fib_info *fib_create_info(struct fib_config *cfg)
+struct fib_info *fib_create_info(struct fib_config *cfg,
+ struct netlink_ext_ack *extack)
{
int err;
struct fib_info *fi = NULL;
@@ -992,15 +1032,20 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
goto err_inval;
/* Fast check to catch the most weird cases */
- if (fib_props[cfg->fc_type].scope > cfg->fc_scope)
+ if (fib_props[cfg->fc_type].scope > cfg->fc_scope) {
+ NL_SET_ERR_MSG(extack, "Invalid scope");
goto err_inval;
+ }
- if (cfg->fc_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN))
+ if (cfg->fc_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) {
+ NL_SET_ERR_MSG(extack,
+ "Invalid rtm_flags - can not contain DEAD or LINKDOWN");
goto err_inval;
+ }
#ifdef CONFIG_IP_ROUTE_MULTIPATH
if (cfg->fc_mp) {
- nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len);
+ nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len, extack);
if (nhs == 0)
goto err_inval;
}
@@ -1062,18 +1107,29 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
if (cfg->fc_mp) {
#ifdef CONFIG_IP_ROUTE_MULTIPATH
- err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg);
+ err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg, extack);
if (err != 0)
goto failure;
- if (cfg->fc_oif && fi->fib_nh->nh_oif != cfg->fc_oif)
+ if (cfg->fc_oif && fi->fib_nh->nh_oif != cfg->fc_oif) {
+ NL_SET_ERR_MSG(extack,
+ "Nexthop device index does not match RTA_OIF");
goto err_inval;
- if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw)
+ }
+ if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw) {
+ NL_SET_ERR_MSG(extack,
+ "Nexthop gateway does not match RTA_GATEWAY");
goto err_inval;
+ }
#ifdef CONFIG_IP_ROUTE_CLASSID
- if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow)
+ if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow) {
+ NL_SET_ERR_MSG(extack,
+ "Nexthop class id does not match RTA_FLOW");
goto err_inval;
+ }
#endif
#else
+ NL_SET_ERR_MSG(extack,
+ "Multipath support not enabled in kernel");
goto err_inval;
#endif
} else {
@@ -1082,8 +1138,11 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
if (cfg->fc_encap) {
struct lwtunnel_state *lwtstate;
- if (cfg->fc_encap_type == LWTUNNEL_ENCAP_NONE)
+ if (cfg->fc_encap_type == LWTUNNEL_ENCAP_NONE) {
+ NL_SET_ERR_MSG(extack,
+ "LWT encap type not specified");
goto err_inval;
+ }
err = lwtunnel_build_state(cfg->fc_encap_type,
cfg->fc_encap, AF_INET, cfg,
&lwtstate);
@@ -1106,8 +1165,11 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
}
if (fib_props[cfg->fc_type].error) {
- if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp)
+ if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp) {
+ NL_SET_ERR_MSG(extack,
+ "Gateway, device and multipath can not be specified for this route type");
goto err_inval;
+ }
goto link_it;
} else {
switch (cfg->fc_type) {
@@ -1118,19 +1180,30 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
case RTN_MULTICAST:
break;
default:
+ NL_SET_ERR_MSG(extack, "Invalid route type");
goto err_inval;
}
}
- if (cfg->fc_scope > RT_SCOPE_HOST)
+ if (cfg->fc_scope > RT_SCOPE_HOST) {
+ NL_SET_ERR_MSG(extack, "Invalid scope");
goto err_inval;
+ }
if (cfg->fc_scope == RT_SCOPE_HOST) {
struct fib_nh *nh = fi->fib_nh;
/* Local address is added. */
- if (nhs != 1 || nh->nh_gw)
+ if (nhs != 1) {
+ NL_SET_ERR_MSG(extack,
+ "Route with host scope can not have multiple nexthops");
+ goto err_inval;
+ }
+ if (nh->nh_gw) {
+ NL_SET_ERR_MSG(extack,
+ "Route with host scope can not have a gateway");
goto err_inval;
+ }
nh->nh_scope = RT_SCOPE_NOWHERE;
nh->nh_dev = dev_get_by_index(net, fi->fib_nh->nh_oif);
err = -ENODEV;
@@ -1140,7 +1213,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
int linkdown = 0;
change_nexthops(fi) {
- err = fib_check_nh(cfg, fi, nexthop_nh);
+ err = fib_check_nh(cfg, fi, nexthop_nh, extack);
if (err != 0)
goto failure;
if (nexthop_nh->nh_flags & RTNH_F_LINKDOWN)
@@ -1150,8 +1223,10 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
fi->fib_flags |= RTNH_F_LINKDOWN;
}
- if (fi->fib_prefsrc && !fib_valid_prefsrc(cfg, fi->fib_prefsrc))
+ if (fi->fib_prefsrc && !fib_valid_prefsrc(cfg, fi->fib_prefsrc)) {
+ NL_SET_ERR_MSG(extack, "Invalid prefsrc address");
goto err_inval;
+ }
change_nexthops(fi) {
fib_info_update_nh_saddr(net, nexthop_nh);
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 51182ff2b441..6d0f6c79d9aa 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1101,7 +1101,7 @@ static int fib_insert_alias(struct trie *t, struct key_vector *tp,
/* Caller must hold RTNL. */
int fib_table_insert(struct net *net, struct fib_table *tb,
- struct fib_config *cfg)
+ struct fib_config *cfg, struct netlink_ext_ack *extack)
{
enum fib_event_type event = FIB_EVENT_ENTRY_ADD;
struct trie *t = (struct trie *)tb->tb_data;
@@ -1125,7 +1125,7 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
if ((plen < KEYLENGTH) && (key << plen))
return -EINVAL;
- fi = fib_create_info(cfg);
+ fi = fib_create_info(cfg, extack);
if (IS_ERR(fi)) {
err = PTR_ERR(fi);
goto err;
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index 805f6607f8d9..8e0257d01200 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -8,6 +8,7 @@
#include <linux/kernel.h>
#include <net/genetlink.h>
#include <net/gue.h>
+#include <net/fou.h>
#include <net/ip.h>
#include <net/protocol.h>
#include <net/udp.h>
@@ -859,25 +860,6 @@ size_t gue_encap_hlen(struct ip_tunnel_encap *e)
}
EXPORT_SYMBOL(gue_encap_hlen);
-static void fou_build_udp(struct sk_buff *skb, struct ip_tunnel_encap *e,
- struct flowi4 *fl4, u8 *protocol, __be16 sport)
-{
- struct udphdr *uh;
-
- skb_push(skb, sizeof(struct udphdr));
- skb_reset_transport_header(skb);
-
- uh = udp_hdr(skb);
-
- uh->dest = e->dport;
- uh->source = sport;
- uh->len = htons(skb->len);
- udp_set_csum(!(e->flags & TUNNEL_ENCAP_FLAG_CSUM), skb,
- fl4->saddr, fl4->daddr, skb->len);
-
- *protocol = IPPROTO_UDP;
-}
-
int __fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
u8 *protocol, __be16 *sport, int type)
{
@@ -894,24 +876,6 @@ int __fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
}
EXPORT_SYMBOL(__fou_build_header);
-int fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
- u8 *protocol, struct flowi4 *fl4)
-{
- int type = e->flags & TUNNEL_ENCAP_FLAG_CSUM ? SKB_GSO_UDP_TUNNEL_CSUM :
- SKB_GSO_UDP_TUNNEL;
- __be16 sport;
- int err;
-
- err = __fou_build_header(skb, e, protocol, &sport, type);
- if (err)
- return err;
-
- fou_build_udp(skb, e, fl4, protocol, sport);
-
- return 0;
-}
-EXPORT_SYMBOL(fou_build_header);
-
int __gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
u8 *protocol, __be16 *sport, int type)
{
@@ -985,8 +949,46 @@ int __gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
}
EXPORT_SYMBOL(__gue_build_header);
-int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
- u8 *protocol, struct flowi4 *fl4)
+#ifdef CONFIG_NET_FOU_IP_TUNNELS
+
+static void fou_build_udp(struct sk_buff *skb, struct ip_tunnel_encap *e,
+ struct flowi4 *fl4, u8 *protocol, __be16 sport)
+{
+ struct udphdr *uh;
+
+ skb_push(skb, sizeof(struct udphdr));
+ skb_reset_transport_header(skb);
+
+ uh = udp_hdr(skb);
+
+ uh->dest = e->dport;
+ uh->source = sport;
+ uh->len = htons(skb->len);
+ udp_set_csum(!(e->flags & TUNNEL_ENCAP_FLAG_CSUM), skb,
+ fl4->saddr, fl4->daddr, skb->len);
+
+ *protocol = IPPROTO_UDP;
+}
+
+static int fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
+ u8 *protocol, struct flowi4 *fl4)
+{
+ int type = e->flags & TUNNEL_ENCAP_FLAG_CSUM ? SKB_GSO_UDP_TUNNEL_CSUM :
+ SKB_GSO_UDP_TUNNEL;
+ __be16 sport;
+ int err;
+
+ err = __fou_build_header(skb, e, protocol, &sport, type);
+ if (err)
+ return err;
+
+ fou_build_udp(skb, e, fl4, protocol, sport);
+
+ return 0;
+}
+
+static int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
+ u8 *protocol, struct flowi4 *fl4)
{
int type = e->flags & TUNNEL_ENCAP_FLAG_CSUM ? SKB_GSO_UDP_TUNNEL_CSUM :
SKB_GSO_UDP_TUNNEL;
@@ -1001,9 +1003,7 @@ int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
return 0;
}
-EXPORT_SYMBOL(gue_build_header);
-#ifdef CONFIG_NET_FOU_IP_TUNNELS
static const struct ip_tunnel_encap_ops fou_iptun_ops = {
.encap_hlen = fou_encap_hlen,
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 1054d330bf9d..82dec8825d28 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -25,6 +25,7 @@
#include <net/xfrm.h>
#include <net/tcp.h>
#include <net/sock_reuseport.h>
+#include <net/addrconf.h>
#ifdef INET_CSK_DEBUG
const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n";
diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c
index 7cd8d0d918f8..6f8d9e5e062b 100644
--- a/net/ipv4/netfilter/nf_reject_ipv4.c
+++ b/net/ipv4/netfilter/nf_reject_ipv4.c
@@ -172,7 +172,7 @@ void nf_send_unreach(struct sk_buff *skb_in, int code, int hook)
struct iphdr *iph = ip_hdr(skb_in);
u8 proto;
- if (skb_in->csum_bad || iph->frag_off & htons(IP_OFFSET))
+ if (iph->frag_off & htons(IP_OFFSET))
return;
if (skb_csum_unnecessary(skb_in)) {
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 0257d965f111..6426250a58ea 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -66,10 +66,10 @@ static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport,
* Since subsequent timestamps use the normal tcp_time_stamp value, we
* must make sure that the resulting initial timestamp is <= tcp_time_stamp.
*/
-__u32 cookie_init_timestamp(struct request_sock *req)
+u64 cookie_init_timestamp(struct request_sock *req)
{
struct inet_request_sock *ireq;
- u32 ts, ts_now = tcp_time_stamp;
+ u32 ts, ts_now = tcp_time_stamp_raw();
u32 options = 0;
ireq = inet_rsk(req);
@@ -88,7 +88,7 @@ __u32 cookie_init_timestamp(struct request_sock *req)
ts <<= TSBITS;
ts |= options;
}
- return ts;
+ return (u64)ts * (USEC_PER_SEC / TCP_TS_HZ);
}
@@ -343,7 +343,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
ireq->wscale_ok = tcp_opt.wscale_ok;
ireq->tstamp_ok = tcp_opt.saw_tstamp;
req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
- treq->snt_synack.v64 = 0;
+ treq->snt_synack = 0;
treq->tfo_listener = false;
ireq->ir_iif = inet_request_bound_dev_if(sk, skb);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 842b575f8fdd..aaf663a1e571 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -386,7 +386,7 @@ void tcp_init_sock(struct sock *sk)
icsk->icsk_rto = TCP_TIMEOUT_INIT;
tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
- minmax_reset(&tp->rtt_min, tcp_time_stamp, ~0U);
+ minmax_reset(&tp->rtt_min, tcp_jiffies32, ~0U);
/* So many TCP implementations out there (incorrectly) count the
* initial SYN frame in their delayed-ACK and congestion control
@@ -2183,7 +2183,7 @@ adjudge_to_death:
/* Now socket is owned by kernel and we acquire BH lock
- to finish close. No need to check for user refs.
+ * to finish close. No need to check for user refs.
*/
local_bh_disable();
bh_lock_sock(sk);
@@ -2475,7 +2475,8 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
case TCP_MAXSEG:
/* Values greater than interface MTU won't take effect. However
* at the point when this call is done we typically don't yet
- * know which interface is going to be used */
+ * know which interface is going to be used
+ */
if (val && (val < TCP_MIN_MSS || val > MAX_TCP_WINDOW)) {
err = -EINVAL;
break;
@@ -2710,7 +2711,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
if (!tp->repair)
err = -EPERM;
else
- tp->tsoffset = val - tcp_time_stamp;
+ tp->tsoffset = val - tcp_time_stamp_raw();
break;
case TCP_REPAIR_WINDOW:
err = tcp_repair_set_window(tp, optval, optlen);
@@ -2761,7 +2762,7 @@ static void tcp_get_info_chrono_stats(const struct tcp_sock *tp,
for (i = TCP_CHRONO_BUSY; i < __TCP_CHRONO_MAX; ++i) {
stats[i] = tp->chrono_stat[i - 1];
if (i == tp->chrono_type)
- stats[i] += tcp_time_stamp - tp->chrono_start;
+ stats[i] += tcp_jiffies32 - tp->chrono_start;
stats[i] *= USEC_PER_SEC / HZ;
total += stats[i];
}
@@ -2845,7 +2846,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
info->tcpi_retrans = tp->retrans_out;
info->tcpi_fackets = tp->fackets_out;
- now = tcp_time_stamp;
+ now = tcp_jiffies32;
info->tcpi_last_data_sent = jiffies_to_msecs(now - tp->lsndtime);
info->tcpi_last_data_recv = jiffies_to_msecs(now - icsk->icsk_ack.lrcvtime);
info->tcpi_last_ack_recv = jiffies_to_msecs(now - tp->rcv_tstamp);
@@ -3076,7 +3077,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
break;
case TCP_TIMESTAMP:
- val = tcp_time_stamp + tp->tsoffset;
+ val = tcp_time_stamp_raw() + tp->tsoffset;
break;
case TCP_NOTSENT_LOWAT:
val = tp->notsent_lowat;
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index b89bce4c721e..dbcc9352a48f 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -52,10 +52,9 @@
* There is a public e-mail list for discussing BBR development and testing:
* https://groups.google.com/forum/#!forum/bbr-dev
*
- * NOTE: BBR *must* be used with the fq qdisc ("man tc-fq") with pacing enabled,
- * since pacing is integral to the BBR design and implementation.
- * BBR without pacing would not function properly, and may incur unnecessary
- * high packet loss rates.
+ * NOTE: BBR might be used with the fq qdisc ("man tc-fq") with pacing enabled,
+ * otherwise TCP stack falls back to an internal pacing using one high
+ * resolution timer per TCP socket and may use more resources.
*/
#include <linux/module.h>
#include <net/tcp.h>
@@ -92,7 +91,7 @@ struct bbr {
struct minmax bw; /* Max recent delivery rate in pkts/uS << 24 */
u32 rtt_cnt; /* count of packet-timed rounds elapsed */
u32 next_rtt_delivered; /* scb->tx.delivered at end of round */
- struct skb_mstamp cycle_mstamp; /* time of this cycle phase start */
+ u64 cycle_mstamp; /* time of this cycle phase start */
u32 mode:3, /* current bbr_mode in state machine */
prev_ca_state:3, /* CA state on previous ACK */
packet_conservation:1, /* use packet conservation? */
@@ -412,7 +411,7 @@ static bool bbr_is_next_cycle_phase(struct sock *sk,
struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
bool is_full_length =
- skb_mstamp_us_delta(&tp->delivered_mstamp, &bbr->cycle_mstamp) >
+ tcp_stamp_us_delta(tp->delivered_mstamp, bbr->cycle_mstamp) >
bbr->min_rtt_us;
u32 inflight, bw;
@@ -498,7 +497,7 @@ static void bbr_reset_lt_bw_sampling_interval(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
- bbr->lt_last_stamp = tp->delivered_mstamp.stamp_jiffies;
+ bbr->lt_last_stamp = div_u64(tp->delivered_mstamp, USEC_PER_MSEC);
bbr->lt_last_delivered = tp->delivered;
bbr->lt_last_lost = tp->lost;
bbr->lt_rtt_cnt = 0;
@@ -552,7 +551,7 @@ static void bbr_lt_bw_sampling(struct sock *sk, const struct rate_sample *rs)
struct bbr *bbr = inet_csk_ca(sk);
u32 lost, delivered;
u64 bw;
- s32 t;
+ u32 t;
if (bbr->lt_use_bw) { /* already using long-term rate, lt_bw? */
if (bbr->mode == BBR_PROBE_BW && bbr->round_start &&
@@ -604,15 +603,15 @@ static void bbr_lt_bw_sampling(struct sock *sk, const struct rate_sample *rs)
return;
/* Find average delivery rate in this sampling interval. */
- t = (s32)(tp->delivered_mstamp.stamp_jiffies - bbr->lt_last_stamp);
- if (t < 1)
- return; /* interval is less than one jiffy, so wait */
- t = jiffies_to_usecs(t);
- /* Interval long enough for jiffies_to_usecs() to return a bogus 0? */
- if (t < 1) {
+ t = div_u64(tp->delivered_mstamp, USEC_PER_MSEC) - bbr->lt_last_stamp;
+ if ((s32)t < 1)
+ return; /* interval is less than one ms, so wait */
+ /* Check if can multiply without overflow */
+ if (t >= ~0U / USEC_PER_MSEC) {
bbr_reset_lt_bw_sampling(sk); /* interval too long; reset */
return;
}
+ t *= USEC_PER_MSEC;
bw = (u64)delivered * BW_UNIT;
do_div(bw, t);
bbr_lt_bw_interval_done(sk, bw);
@@ -731,12 +730,12 @@ static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs)
bool filter_expired;
/* Track min RTT seen in the min_rtt_win_sec filter window: */
- filter_expired = after(tcp_time_stamp,
+ filter_expired = after(tcp_jiffies32,
bbr->min_rtt_stamp + bbr_min_rtt_win_sec * HZ);
if (rs->rtt_us >= 0 &&
(rs->rtt_us <= bbr->min_rtt_us || filter_expired)) {
bbr->min_rtt_us = rs->rtt_us;
- bbr->min_rtt_stamp = tcp_time_stamp;
+ bbr->min_rtt_stamp = tcp_jiffies32;
}
if (bbr_probe_rtt_mode_ms > 0 && filter_expired &&
@@ -755,7 +754,7 @@ static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs)
/* Maintain min packets in flight for max(200 ms, 1 round). */
if (!bbr->probe_rtt_done_stamp &&
tcp_packets_in_flight(tp) <= bbr_cwnd_min_target) {
- bbr->probe_rtt_done_stamp = tcp_time_stamp +
+ bbr->probe_rtt_done_stamp = tcp_jiffies32 +
msecs_to_jiffies(bbr_probe_rtt_mode_ms);
bbr->probe_rtt_round_done = 0;
bbr->next_rtt_delivered = tp->delivered;
@@ -763,8 +762,8 @@ static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs)
if (bbr->round_start)
bbr->probe_rtt_round_done = 1;
if (bbr->probe_rtt_round_done &&
- after(tcp_time_stamp, bbr->probe_rtt_done_stamp)) {
- bbr->min_rtt_stamp = tcp_time_stamp;
+ after(tcp_jiffies32, bbr->probe_rtt_done_stamp)) {
+ bbr->min_rtt_stamp = tcp_jiffies32;
bbr->restore_cwnd = 1; /* snap to prior_cwnd */
bbr_reset_mode(sk);
}
@@ -811,7 +810,7 @@ static void bbr_init(struct sock *sk)
bbr->probe_rtt_done_stamp = 0;
bbr->probe_rtt_round_done = 0;
bbr->min_rtt_us = tcp_min_rtt(tp);
- bbr->min_rtt_stamp = tcp_time_stamp;
+ bbr->min_rtt_stamp = tcp_jiffies32;
minmax_reset(&bbr->bw, bbr->rtt_cnt, 0); /* init max bw to 0 */
@@ -826,10 +825,12 @@ static void bbr_init(struct sock *sk)
bbr->idle_restart = 0;
bbr->full_bw = 0;
bbr->full_bw_cnt = 0;
- bbr->cycle_mstamp.v64 = 0;
+ bbr->cycle_mstamp = 0;
bbr->cycle_idx = 0;
bbr_reset_lt_bw_sampling(sk);
bbr_reset_startup_mode(sk);
+
+ cmpxchg(&sk->sk_pacing_status, SK_PACING_NONE, SK_PACING_NEEDED);
}
static u32 bbr_sndbuf_expand(struct sock *sk)
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c
index 36087bca9f48..609965f0e298 100644
--- a/net/ipv4/tcp_bic.c
+++ b/net/ipv4/tcp_bic.c
@@ -84,14 +84,14 @@ static void bictcp_init(struct sock *sk)
static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
{
if (ca->last_cwnd == cwnd &&
- (s32)(tcp_time_stamp - ca->last_time) <= HZ / 32)
+ (s32)(tcp_jiffies32 - ca->last_time) <= HZ / 32)
return;
ca->last_cwnd = cwnd;
- ca->last_time = tcp_time_stamp;
+ ca->last_time = tcp_jiffies32;
if (ca->epoch_start == 0) /* record the beginning of an epoch */
- ca->epoch_start = tcp_time_stamp;
+ ca->epoch_start = tcp_jiffies32;
/* start off normal */
if (cwnd <= low_window) {
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 0683ba447d77..57ae5b5ae643 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -155,7 +155,7 @@ static void bictcp_cwnd_event(struct sock *sk, enum tcp_ca_event event)
{
if (event == CA_EVENT_TX_START) {
struct bictcp *ca = inet_csk_ca(sk);
- u32 now = tcp_time_stamp;
+ u32 now = tcp_jiffies32;
s32 delta;
delta = now - tcp_sk(sk)->lsndtime;
@@ -231,21 +231,21 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd, u32 acked)
ca->ack_cnt += acked; /* count the number of ACKed packets */
if (ca->last_cwnd == cwnd &&
- (s32)(tcp_time_stamp - ca->last_time) <= HZ / 32)
+ (s32)(tcp_jiffies32 - ca->last_time) <= HZ / 32)
return;
/* The CUBIC function can update ca->cnt at most once per jiffy.
* On all cwnd reduction events, ca->epoch_start is set to 0,
* which will force a recalculation of ca->cnt.
*/
- if (ca->epoch_start && tcp_time_stamp == ca->last_time)
+ if (ca->epoch_start && tcp_jiffies32 == ca->last_time)
goto tcp_friendliness;
ca->last_cwnd = cwnd;
- ca->last_time = tcp_time_stamp;
+ ca->last_time = tcp_jiffies32;
if (ca->epoch_start == 0) {
- ca->epoch_start = tcp_time_stamp; /* record beginning */
+ ca->epoch_start = tcp_jiffies32; /* record beginning */
ca->ack_cnt = acked; /* start counting */
ca->tcp_cwnd = cwnd; /* syn with cubic */
@@ -276,7 +276,7 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd, u32 acked)
* if the cwnd < 1 million packets !!!
*/
- t = (s32)(tcp_time_stamp - ca->epoch_start);
+ t = (s32)(tcp_jiffies32 - ca->epoch_start);
t += msecs_to_jiffies(ca->delay_min >> 3);
/* change the unit from HZ to bictcp_HZ */
t <<= BICTCP_HZ;
@@ -448,7 +448,7 @@ static void bictcp_acked(struct sock *sk, const struct ack_sample *sample)
return;
/* Discard delay samples right after fast recovery */
- if (ca->epoch_start && (s32)(tcp_time_stamp - ca->epoch_start) < HZ)
+ if (ca->epoch_start && (s32)(tcp_jiffies32 - ca->epoch_start) < HZ)
return;
delay = (sample->rtt_us << 3) / USEC_PER_MSEC;
diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c
index 4a4d8e76738f..3eb78cde6ff0 100644
--- a/net/ipv4/tcp_htcp.c
+++ b/net/ipv4/tcp_htcp.c
@@ -104,7 +104,7 @@ static void measure_achieved_throughput(struct sock *sk,
const struct inet_connection_sock *icsk = inet_csk(sk);
const struct tcp_sock *tp = tcp_sk(sk);
struct htcp *ca = inet_csk_ca(sk);
- u32 now = tcp_time_stamp;
+ u32 now = tcp_jiffies32;
if (icsk->icsk_ca_state == TCP_CA_Open)
ca->pkts_acked = sample->pkts_acked;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 174d4376baa5..2fa55f57ac06 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -441,7 +441,7 @@ void tcp_init_buffer_space(struct sock *sk)
tcp_sndbuf_expand(sk);
tp->rcvq_space.space = tp->rcv_wnd;
- skb_mstamp_get(&tp->tcp_mstamp);
+ tcp_mstamp_refresh(tp);
tp->rcvq_space.time = tp->tcp_mstamp;
tp->rcvq_space.seq = tp->copied_seq;
@@ -463,7 +463,7 @@ void tcp_init_buffer_space(struct sock *sk)
tp->window_clamp = max(2 * tp->advmss, maxwin - tp->advmss);
tp->rcv_ssthresh = min(tp->rcv_ssthresh, tp->window_clamp);
- tp->snd_cwnd_stamp = tcp_time_stamp;
+ tp->snd_cwnd_stamp = tcp_jiffies32;
}
/* 5. Recalculate window clamp after socket hit its memory bounds. */
@@ -555,11 +555,11 @@ static inline void tcp_rcv_rtt_measure(struct tcp_sock *tp)
{
u32 delta_us;
- if (tp->rcv_rtt_est.time.v64 == 0)
+ if (tp->rcv_rtt_est.time == 0)
goto new_measure;
if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq))
return;
- delta_us = skb_mstamp_us_delta(&tp->tcp_mstamp, &tp->rcv_rtt_est.time);
+ delta_us = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcv_rtt_est.time);
tcp_rcv_rtt_update(tp, delta_us, 1);
new_measure:
@@ -571,13 +571,15 @@ static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
const struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
+
if (tp->rx_opt.rcv_tsecr &&
(TCP_SKB_CB(skb)->end_seq -
- TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss))
- tcp_rcv_rtt_update(tp,
- jiffies_to_usecs(tcp_time_stamp -
- tp->rx_opt.rcv_tsecr),
- 0);
+ TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss)) {
+ u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
+ u32 delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
+
+ tcp_rcv_rtt_update(tp, delta_us, 0);
+ }
}
/*
@@ -590,7 +592,7 @@ void tcp_rcv_space_adjust(struct sock *sk)
int time;
int copied;
- time = skb_mstamp_us_delta(&tp->tcp_mstamp, &tp->rcvq_space.time);
+ time = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcvq_space.time);
if (time < (tp->rcv_rtt_est.rtt_us >> 3) || tp->rcv_rtt_est.rtt_us == 0)
return;
@@ -672,7 +674,7 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
tcp_rcv_rtt_measure(tp);
- now = tcp_time_stamp;
+ now = tcp_jiffies32;
if (!icsk->icsk_ack.ato) {
/* The _first_ data packet received, initialize
@@ -885,6 +887,9 @@ static void tcp_update_reordering(struct sock *sk, const int metric,
struct tcp_sock *tp = tcp_sk(sk);
int mib_idx;
+ if (WARN_ON_ONCE(metric < 0))
+ return;
+
if (metric > tp->reordering) {
tp->reordering = min(sysctl_tcp_max_reordering, metric);
@@ -1134,8 +1139,8 @@ struct tcp_sacktag_state {
* that was SACKed. RTO needs the earliest RTT to stay conservative,
* but congestion control should still get an accurate delay signal.
*/
- struct skb_mstamp first_sackt;
- struct skb_mstamp last_sackt;
+ u64 first_sackt;
+ u64 last_sackt;
struct rate_sample *rate;
int flag;
};
@@ -1200,7 +1205,7 @@ static u8 tcp_sacktag_one(struct sock *sk,
struct tcp_sacktag_state *state, u8 sacked,
u32 start_seq, u32 end_seq,
int dup_sack, int pcount,
- const struct skb_mstamp *xmit_time)
+ u64 xmit_time)
{
struct tcp_sock *tp = tcp_sk(sk);
int fack_count = state->fack_count;
@@ -1242,9 +1247,9 @@ static u8 tcp_sacktag_one(struct sock *sk,
state->reord);
if (!after(end_seq, tp->high_seq))
state->flag |= FLAG_ORIG_SACK_ACKED;
- if (state->first_sackt.v64 == 0)
- state->first_sackt = *xmit_time;
- state->last_sackt = *xmit_time;
+ if (state->first_sackt == 0)
+ state->first_sackt = xmit_time;
+ state->last_sackt = xmit_time;
}
if (sacked & TCPCB_LOST) {
@@ -1304,7 +1309,7 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
*/
tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked,
start_seq, end_seq, dup_sack, pcount,
- &skb->skb_mstamp);
+ skb->skb_mstamp);
tcp_rate_skb_delivered(sk, skb, state->rate);
if (skb == tp->lost_skb_hint)
@@ -1356,8 +1361,8 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
tcp_advance_highest_sack(sk, skb);
tcp_skb_collapse_tstamp(prev, skb);
- if (unlikely(TCP_SKB_CB(prev)->tx.delivered_mstamp.v64))
- TCP_SKB_CB(prev)->tx.delivered_mstamp.v64 = 0;
+ if (unlikely(TCP_SKB_CB(prev)->tx.delivered_mstamp))
+ TCP_SKB_CB(prev)->tx.delivered_mstamp = 0;
tcp_unlink_write_queue(skb, sk);
sk_wmem_free_skb(sk, skb);
@@ -1587,7 +1592,7 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
TCP_SKB_CB(skb)->end_seq,
dup_sack,
tcp_skb_pcount(skb),
- &skb->skb_mstamp);
+ skb->skb_mstamp);
tcp_rate_skb_delivered(sk, skb, state->rate);
if (!before(TCP_SKB_CB(skb)->seq,
@@ -1954,7 +1959,7 @@ void tcp_enter_loss(struct sock *sk)
}
tp->snd_cwnd = 1;
tp->snd_cwnd_cnt = 0;
- tp->snd_cwnd_stamp = tcp_time_stamp;
+ tp->snd_cwnd_stamp = tcp_jiffies32;
tp->retrans_out = 0;
tp->lost_out = 0;
@@ -2383,7 +2388,7 @@ static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss)
tcp_ecn_withdraw_cwr(tp);
}
}
- tp->snd_cwnd_stamp = tcp_time_stamp;
+ tp->snd_cwnd_stamp = tcp_jiffies32;
tp->undo_marker = 0;
}
@@ -2520,7 +2525,7 @@ static inline void tcp_end_cwnd_reduction(struct sock *sk)
if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR ||
(tp->undo_marker && tp->snd_ssthresh < TCP_INFINITE_SSTHRESH)) {
tp->snd_cwnd = tp->snd_ssthresh;
- tp->snd_cwnd_stamp = tcp_time_stamp;
+ tp->snd_cwnd_stamp = tcp_jiffies32;
}
tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);
}
@@ -2590,7 +2595,7 @@ static void tcp_mtup_probe_success(struct sock *sk)
tcp_mss_to_mtu(sk, tp->mss_cache) /
icsk->icsk_mtup.probe_size;
tp->snd_cwnd_cnt = 0;
- tp->snd_cwnd_stamp = tcp_time_stamp;
+ tp->snd_cwnd_stamp = tcp_jiffies32;
tp->snd_ssthresh = tcp_current_ssthresh(sk);
icsk->icsk_mtup.search_low = icsk->icsk_mtup.probe_size;
@@ -2911,7 +2916,7 @@ static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us)
struct tcp_sock *tp = tcp_sk(sk);
u32 wlen = sysctl_tcp_min_rtt_wlen * HZ;
- minmax_running_min(&tp->rtt_min, wlen, tcp_time_stamp,
+ minmax_running_min(&tp->rtt_min, wlen, tcp_jiffies32,
rtt_us ? : jiffies_to_usecs(1));
}
@@ -2936,9 +2941,12 @@ static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag,
* See draft-ietf-tcplw-high-performance-00, section 3.3.
*/
if (seq_rtt_us < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
- flag & FLAG_ACKED)
- seq_rtt_us = ca_rtt_us = jiffies_to_usecs(tcp_time_stamp -
- tp->rx_opt.rcv_tsecr);
+ flag & FLAG_ACKED) {
+ u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
+ u32 delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
+
+ seq_rtt_us = ca_rtt_us = delta_us;
+ }
if (seq_rtt_us < 0)
return false;
@@ -2960,12 +2968,8 @@ void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req)
{
long rtt_us = -1L;
- if (req && !req->num_retrans && tcp_rsk(req)->snt_synack.v64) {
- struct skb_mstamp now;
-
- skb_mstamp_get(&now);
- rtt_us = skb_mstamp_us_delta(&now, &tcp_rsk(req)->snt_synack);
- }
+ if (req && !req->num_retrans && tcp_rsk(req)->snt_synack)
+ rtt_us = tcp_stamp_us_delta(tcp_clock_us(), tcp_rsk(req)->snt_synack);
tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, rtt_us, -1L, rtt_us);
}
@@ -2976,7 +2980,7 @@ static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
const struct inet_connection_sock *icsk = inet_csk(sk);
icsk->icsk_ca_ops->cong_avoid(sk, ack, acked);
- tcp_sk(sk)->snd_cwnd_stamp = tcp_time_stamp;
+ tcp_sk(sk)->snd_cwnd_stamp = tcp_jiffies32;
}
/* Restart timer after forward progress on connection.
@@ -3001,14 +3005,14 @@ void tcp_rearm_rto(struct sock *sk)
if (icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
struct sk_buff *skb = tcp_write_queue_head(sk);
- const u32 rto_time_stamp =
- tcp_skb_timestamp(skb) + rto;
- s32 delta = (s32)(rto_time_stamp - tcp_time_stamp);
- /* delta may not be positive if the socket is locked
+ u64 rto_time_stamp = skb->skb_mstamp +
+ jiffies_to_usecs(rto);
+ s64 delta_us = rto_time_stamp - tp->tcp_mstamp;
+ /* delta_us may not be positive if the socket is locked
* when the retrans timer fires and is rescheduled.
*/
- if (delta > 0)
- rto = delta;
+ if (delta_us > 0)
+ rto = usecs_to_jiffies(delta_us);
}
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto,
TCP_RTO_MAX);
@@ -3060,9 +3064,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
struct tcp_sacktag_state *sack)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
- struct skb_mstamp first_ackt, last_ackt;
+ u64 first_ackt, last_ackt;
struct tcp_sock *tp = tcp_sk(sk);
- struct skb_mstamp *now = &tp->tcp_mstamp;
u32 prior_sacked = tp->sacked_out;
u32 reord = tp->packets_out;
bool fully_acked = true;
@@ -3075,7 +3078,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
bool rtt_update;
int flag = 0;
- first_ackt.v64 = 0;
+ first_ackt = 0;
while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) {
struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
@@ -3106,8 +3109,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
flag |= FLAG_RETRANS_DATA_ACKED;
} else if (!(sacked & TCPCB_SACKED_ACKED)) {
last_ackt = skb->skb_mstamp;
- WARN_ON_ONCE(last_ackt.v64 == 0);
- if (!first_ackt.v64)
+ WARN_ON_ONCE(last_ackt == 0);
+ if (!first_ackt)
first_ackt = last_ackt;
last_in_flight = TCP_SKB_CB(skb)->tx.in_flight;
@@ -3122,7 +3125,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
tp->delivered += acked_pcount;
if (!tcp_skb_spurious_retrans(tp, skb))
tcp_rack_advance(tp, sacked, scb->end_seq,
- &skb->skb_mstamp);
+ skb->skb_mstamp);
}
if (sacked & TCPCB_LOST)
tp->lost_out -= acked_pcount;
@@ -3165,13 +3168,13 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
flag |= FLAG_SACK_RENEGING;
- if (likely(first_ackt.v64) && !(flag & FLAG_RETRANS_DATA_ACKED)) {
- seq_rtt_us = skb_mstamp_us_delta(now, &first_ackt);
- ca_rtt_us = skb_mstamp_us_delta(now, &last_ackt);
+ if (likely(first_ackt) && !(flag & FLAG_RETRANS_DATA_ACKED)) {
+ seq_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, first_ackt);
+ ca_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, last_ackt);
}
- if (sack->first_sackt.v64) {
- sack_rtt_us = skb_mstamp_us_delta(now, &sack->first_sackt);
- ca_rtt_us = skb_mstamp_us_delta(now, &sack->last_sackt);
+ if (sack->first_sackt) {
+ sack_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, sack->first_sackt);
+ ca_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, sack->last_sackt);
}
sack->rate->rtt_us = ca_rtt_us; /* RTT of last (S)ACKed packet, or -1 */
rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt_us, sack_rtt_us,
@@ -3201,7 +3204,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
tp->fackets_out -= min(pkts_acked, tp->fackets_out);
} else if (skb && rtt_update && sack_rtt_us >= 0 &&
- sack_rtt_us > skb_mstamp_us_delta(now, &skb->skb_mstamp)) {
+ sack_rtt_us > tcp_stamp_us_delta(tp->tcp_mstamp, skb->skb_mstamp)) {
/* Do not re-arm RTO if the sack RTT is measured from data sent
* after when the head was last (re)transmitted. Otherwise the
* timeout may continue to extend in loss recovery.
@@ -3390,7 +3393,7 @@ static bool __tcp_oow_rate_limited(struct net *net, int mib_idx,
u32 *last_oow_ack_time)
{
if (*last_oow_ack_time) {
- s32 elapsed = (s32)(tcp_time_stamp - *last_oow_ack_time);
+ s32 elapsed = (s32)(tcp_jiffies32 - *last_oow_ack_time);
if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) {
NET_INC_STATS(net, mib_idx);
@@ -3398,7 +3401,7 @@ static bool __tcp_oow_rate_limited(struct net *net, int mib_idx,
}
}
- *last_oow_ack_time = tcp_time_stamp;
+ *last_oow_ack_time = tcp_jiffies32;
return false; /* not rate-limited: go ahead, send dupack now! */
}
@@ -3553,7 +3556,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
int acked = 0; /* Number of packets newly acked */
int rexmit = REXMIT_NONE; /* Flag to (re)transmit to recover losses */
- sack_state.first_sackt.v64 = 0;
+ sack_state.first_sackt = 0;
sack_state.rate = &rs;
/* We very likely will need to access write queue head. */
@@ -3636,7 +3639,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
*/
sk->sk_err_soft = 0;
icsk->icsk_probes_out = 0;
- tp->rcv_tstamp = tcp_time_stamp;
+ tp->rcv_tstamp = tcp_jiffies32;
if (!prior_packets)
goto no_queue;
@@ -5019,7 +5022,7 @@ static void tcp_new_space(struct sock *sk)
if (tcp_should_expand_sndbuf(sk)) {
tcp_sndbuf_expand(sk);
- tp->snd_cwnd_stamp = tcp_time_stamp;
+ tp->snd_cwnd_stamp = tcp_jiffies32;
}
sk->sk_write_space(sk);
@@ -5356,7 +5359,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
{
struct tcp_sock *tp = tcp_sk(sk);
- skb_mstamp_get(&tp->tcp_mstamp);
+ tcp_mstamp_refresh(tp);
if (unlikely(!sk->sk_rx_dst))
inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb);
/*
@@ -5554,7 +5557,7 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
struct inet_connection_sock *icsk = inet_csk(sk);
tcp_set_state(sk, TCP_ESTABLISHED);
- icsk->icsk_ack.lrcvtime = tcp_time_stamp;
+ icsk->icsk_ack.lrcvtime = tcp_jiffies32;
if (skb) {
icsk->icsk_af_ops->sk_rx_dst_set(sk, skb);
@@ -5571,7 +5574,7 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
/* Prevent spurious tcp_cwnd_restart() on first data
* packet.
*/
- tp->lsndtime = tcp_time_stamp;
+ tp->lsndtime = tcp_jiffies32;
tcp_init_buffer_space(sk);
@@ -5672,7 +5675,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
!between(tp->rx_opt.rcv_tsecr, tp->retrans_stamp,
- tcp_time_stamp)) {
+ tcp_time_stamp(tp))) {
NET_INC_STATS(sock_net(sk),
LINUX_MIB_PAWSACTIVEREJECTED);
goto reset_and_undo;
@@ -5917,7 +5920,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
case TCP_SYN_SENT:
tp->rx_opt.saw_tstamp = 0;
- skb_mstamp_get(&tp->tcp_mstamp);
+ tcp_mstamp_refresh(tp);
queued = tcp_rcv_synsent_state_process(sk, skb, th);
if (queued >= 0)
return queued;
@@ -5929,7 +5932,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
return 0;
}
- skb_mstamp_get(&tp->tcp_mstamp);
+ tcp_mstamp_refresh(tp);
tp->rx_opt.saw_tstamp = 0;
req = tp->fastopen_rsk;
if (req) {
@@ -6008,7 +6011,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
tcp_update_pacing_rate(sk);
/* Prevent spurious tcp_cwnd_restart() on first data packet */
- tp->lsndtime = tcp_time_stamp;
+ tp->lsndtime = tcp_jiffies32;
tcp_initialize_rcv_mss(sk);
tcp_fast_path_on(tp);
@@ -6202,7 +6205,7 @@ static void tcp_openreq_init(struct request_sock *req,
req->cookie_ts = 0;
tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq;
tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
- skb_mstamp_get(&tcp_rsk(req)->snt_synack);
+ tcp_rsk(req)->snt_synack = tcp_clock_us();
tcp_rsk(req)->last_oow_ack_time = 0;
req->mss = rx_opt->mss_clamp;
req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 5ab2aac5ca19..191b2f78b19d 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -376,8 +376,9 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
struct sock *sk;
struct sk_buff *skb;
struct request_sock *fastopen;
- __u32 seq, snd_una;
- __u32 remaining;
+ u32 seq, snd_una;
+ s32 remaining;
+ u32 delta_us;
int err;
struct net *net = dev_net(icmp_skb->dev);
@@ -483,11 +484,12 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
skb = tcp_write_queue_head(sk);
BUG_ON(!skb);
+ tcp_mstamp_refresh(tp);
+ delta_us = (u32)(tp->tcp_mstamp - skb->skb_mstamp);
remaining = icsk->icsk_rto -
- min(icsk->icsk_rto,
- tcp_time_stamp - tcp_skb_timestamp(skb));
+ usecs_to_jiffies(delta_us);
- if (remaining) {
+ if (remaining > 0) {
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
remaining, TCP_RTO_MAX);
} else {
@@ -811,7 +813,7 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
tcp_v4_send_ack(sk, skb,
tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
- tcp_time_stamp + tcptw->tw_ts_offset,
+ tcp_time_stamp_raw() + tcptw->tw_ts_offset,
tcptw->tw_ts_recent,
tw->tw_bound_dev_if,
tcp_twsk_md5_key(tcptw),
@@ -839,7 +841,7 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
tcp_v4_send_ack(sk, skb, seq,
tcp_rsk(req)->rcv_nxt,
req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
- tcp_time_stamp + tcp_rsk(req)->ts_off,
+ tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
req->ts_recent,
0,
tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c
index d6fb6c067af4..ae10ed64fe13 100644
--- a/net/ipv4/tcp_lp.c
+++ b/net/ipv4/tcp_lp.c
@@ -37,7 +37,7 @@
#include <net/tcp.h>
/* resolution of owd */
-#define LP_RESOL 1000
+#define LP_RESOL TCP_TS_HZ
/**
* enum tcp_lp_state
@@ -147,9 +147,9 @@ static u32 tcp_lp_remote_hz_estimator(struct sock *sk)
tp->rx_opt.rcv_tsecr == lp->local_ref_time)
goto out;
- m = HZ * (tp->rx_opt.rcv_tsval -
- lp->remote_ref_time) / (tp->rx_opt.rcv_tsecr -
- lp->local_ref_time);
+ m = TCP_TS_HZ *
+ (tp->rx_opt.rcv_tsval - lp->remote_ref_time) /
+ (tp->rx_opt.rcv_tsecr - lp->local_ref_time);
if (m < 0)
m = -m;
@@ -194,7 +194,7 @@ static u32 tcp_lp_owd_calculator(struct sock *sk)
if (lp->flag & LP_VALID_RHZ) {
owd =
tp->rx_opt.rcv_tsval * (LP_RESOL / lp->remote_hz) -
- tp->rx_opt.rcv_tsecr * (LP_RESOL / HZ);
+ tp->rx_opt.rcv_tsecr * (LP_RESOL / TCP_TS_HZ);
if (owd < 0)
owd = -owd;
}
@@ -264,18 +264,19 @@ static void tcp_lp_pkts_acked(struct sock *sk, const struct ack_sample *sample)
{
struct tcp_sock *tp = tcp_sk(sk);
struct lp *lp = inet_csk_ca(sk);
+ u32 now = tcp_time_stamp(tp);
u32 delta;
if (sample->rtt_us > 0)
tcp_lp_rtt_sample(sk, sample->rtt_us);
/* calc inference */
- delta = tcp_time_stamp - tp->rx_opt.rcv_tsecr;
+ delta = now - tp->rx_opt.rcv_tsecr;
if ((s32)delta > 0)
lp->inference = 3 * delta;
/* test if within inference */
- if (lp->last_drop && (tcp_time_stamp - lp->last_drop < lp->inference))
+ if (lp->last_drop && (now - lp->last_drop < lp->inference))
lp->flag |= LP_WITHIN_INF;
else
lp->flag &= ~LP_WITHIN_INF;
@@ -312,7 +313,7 @@ static void tcp_lp_pkts_acked(struct sock *sk, const struct ack_sample *sample)
tp->snd_cwnd = max(tp->snd_cwnd >> 1U, 1U);
/* record this drop time */
- lp->last_drop = tcp_time_stamp;
+ lp->last_drop = now;
}
static struct tcp_congestion_ops tcp_lp __read_mostly = {
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 653bbd67e3a3..102b2c90bb80 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -524,7 +524,7 @@ reset:
tp->snd_cwnd = 1;
else
tp->snd_cwnd = tcp_init_cwnd(tp, dst);
- tp->snd_cwnd_stamp = tcp_time_stamp;
+ tp->snd_cwnd_stamp = tcp_jiffies32;
}
bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst)
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 717be4de5324..d0642df73044 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -445,9 +445,9 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
newtp->srtt_us = 0;
newtp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
- minmax_reset(&newtp->rtt_min, tcp_time_stamp, ~0U);
+ minmax_reset(&newtp->rtt_min, tcp_jiffies32, ~0U);
newicsk->icsk_rto = TCP_TIMEOUT_INIT;
- newicsk->icsk_ack.lrcvtime = tcp_time_stamp;
+ newicsk->icsk_ack.lrcvtime = tcp_jiffies32;
newtp->packets_out = 0;
newtp->retrans_out = 0;
@@ -455,7 +455,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
newtp->fackets_out = 0;
newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
newtp->tlp_high_seq = 0;
- newtp->lsndtime = treq->snt_synack.stamp_jiffies;
+ newtp->lsndtime = tcp_jiffies32;
newsk->sk_txhash = treq->txhash;
newtp->last_oow_ack_time = 0;
newtp->total_retrans = req->num_retrans;
@@ -526,7 +526,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
newtp->fastopen_req = NULL;
newtp->fastopen_rsk = NULL;
newtp->syn_data_acked = 0;
- newtp->rack.mstamp.v64 = 0;
+ newtp->rack.mstamp = 0;
newtp->rack.advanced = 0;
__TCP_INC_STATS(sock_net(sk), TCP_MIB_PASSIVEOPENS);
diff --git a/net/ipv4/tcp_nv.c b/net/ipv4/tcp_nv.c
index 5de82a8d4d87..6d650ed3cb59 100644
--- a/net/ipv4/tcp_nv.c
+++ b/net/ipv4/tcp_nv.c
@@ -424,8 +424,8 @@ static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample)
}
/* Extract info for Tcp socket info provided via netlink */
-size_t tcpnv_get_info(struct sock *sk, u32 ext, int *attr,
- union tcp_cc_info *info)
+static size_t tcpnv_get_info(struct sock *sk, u32 ext, int *attr,
+ union tcp_cc_info *info)
{
const struct tcpnv *ca = inet_csk_ca(sk);
@@ -440,7 +440,6 @@ size_t tcpnv_get_info(struct sock *sk, u32 ext, int *attr,
}
return 0;
}
-EXPORT_SYMBOL_GPL(tcpnv_get_info);
static struct tcp_congestion_ops tcpnv __read_mostly = {
.init = tcpnv_init,
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 4858e190f6ac..478f75baee31 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -151,7 +151,7 @@ void tcp_cwnd_restart(struct sock *sk, s32 delta)
while ((delta -= inet_csk(sk)->icsk_rto) > 0 && cwnd > restart_cwnd)
cwnd >>= 1;
tp->snd_cwnd = max(cwnd, restart_cwnd);
- tp->snd_cwnd_stamp = tcp_time_stamp;
+ tp->snd_cwnd_stamp = tcp_jiffies32;
tp->snd_cwnd_used = 0;
}
@@ -160,7 +160,7 @@ static void tcp_event_data_sent(struct tcp_sock *tp,
struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
- const u32 now = tcp_time_stamp;
+ const u32 now = tcp_jiffies32;
if (tcp_packets_in_flight(tp) == 0)
tcp_ca_event(sk, CA_EVENT_TX_START);
@@ -904,6 +904,72 @@ out:
sk_free(sk);
}
+/* Note: Called under hard irq.
+ * We can not call TCP stack right away.
+ */
+enum hrtimer_restart tcp_pace_kick(struct hrtimer *timer)
+{
+ struct tcp_sock *tp = container_of(timer, struct tcp_sock, pacing_timer);
+ struct sock *sk = (struct sock *)tp;
+ unsigned long nval, oval;
+
+ for (oval = READ_ONCE(sk->sk_tsq_flags);; oval = nval) {
+ struct tsq_tasklet *tsq;
+ bool empty;
+
+ if (oval & TSQF_QUEUED)
+ break;
+
+ nval = (oval & ~TSQF_THROTTLED) | TSQF_QUEUED | TCPF_TSQ_DEFERRED;
+ nval = cmpxchg(&sk->sk_tsq_flags, oval, nval);
+ if (nval != oval)
+ continue;
+
+ if (!atomic_inc_not_zero(&sk->sk_wmem_alloc))
+ break;
+ /* queue this socket to tasklet queue */
+ tsq = this_cpu_ptr(&tsq_tasklet);
+ empty = list_empty(&tsq->head);
+ list_add(&tp->tsq_node, &tsq->head);
+ if (empty)
+ tasklet_schedule(&tsq->tasklet);
+ break;
+ }
+ return HRTIMER_NORESTART;
+}
+
+/* BBR congestion control needs pacing.
+ * Same remark for SO_MAX_PACING_RATE.
+ * sch_fq packet scheduler is efficiently handling pacing,
+ * but is not always installed/used.
+ * Return true if TCP stack should pace packets itself.
+ */
+static bool tcp_needs_internal_pacing(const struct sock *sk)
+{
+ return smp_load_acquire(&sk->sk_pacing_status) == SK_PACING_NEEDED;
+}
+
+static void tcp_internal_pacing(struct sock *sk, const struct sk_buff *skb)
+{
+ u64 len_ns;
+ u32 rate;
+
+ if (!tcp_needs_internal_pacing(sk))
+ return;
+ rate = sk->sk_pacing_rate;
+ if (!rate || rate == ~0U)
+ return;
+
+ /* Should account for header sizes as sch_fq does,
+ * but lets make things simple.
+ */
+ len_ns = (u64)skb->len * NSEC_PER_SEC;
+ do_div(len_ns, rate);
+ hrtimer_start(&tcp_sk(sk)->pacing_timer,
+ ktime_add_ns(ktime_get(), len_ns),
+ HRTIMER_MODE_ABS_PINNED);
+}
+
/* This routine actually transmits TCP packets queued in by
* tcp_do_sendmsg(). This is used by both the initial
* transmission and possible later retransmissions.
@@ -931,8 +997,8 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
BUG_ON(!skb || !tcp_skb_pcount(skb));
tp = tcp_sk(sk);
+ skb->skb_mstamp = tp->tcp_mstamp;
if (clone_it) {
- skb_mstamp_get(&skb->skb_mstamp);
TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq
- tp->snd_una;
tcp_rate_skb_sent(sk, skb);
@@ -1034,6 +1100,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
if (skb->len != tcp_header_size) {
tcp_event_data_sent(tp, sk);
tp->data_segs_out += tcp_skb_pcount(skb);
+ tcp_internal_pacing(sk, skb);
}
if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq)
@@ -1408,7 +1475,7 @@ void tcp_mtup_init(struct sock *sk)
icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, net->ipv4.sysctl_tcp_base_mss);
icsk->icsk_mtup.probe_size = 0;
if (icsk->icsk_mtup.enabled)
- icsk->icsk_mtup.probe_timestamp = tcp_time_stamp;
+ icsk->icsk_mtup.probe_timestamp = tcp_jiffies32;
}
EXPORT_SYMBOL(tcp_mtup_init);
@@ -1509,7 +1576,7 @@ static void tcp_cwnd_application_limited(struct sock *sk)
}
tp->snd_cwnd_used = 0;
}
- tp->snd_cwnd_stamp = tcp_time_stamp;
+ tp->snd_cwnd_stamp = tcp_jiffies32;
}
static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited)
@@ -1530,14 +1597,14 @@ static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited)
if (tcp_is_cwnd_limited(sk)) {
/* Network is feed fully. */
tp->snd_cwnd_used = 0;
- tp->snd_cwnd_stamp = tcp_time_stamp;
+ tp->snd_cwnd_stamp = tcp_jiffies32;
} else {
/* Network starves. */
if (tp->packets_out > tp->snd_cwnd_used)
tp->snd_cwnd_used = tp->packets_out;
if (sysctl_tcp_slow_start_after_idle &&
- (s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto &&
+ (s32)(tcp_jiffies32 - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto &&
!ca_ops->cong_control)
tcp_cwnd_application_limited(sk);
@@ -1839,7 +1906,6 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
const struct inet_connection_sock *icsk = inet_csk(sk);
u32 age, send_win, cong_win, limit, in_flight;
struct tcp_sock *tp = tcp_sk(sk);
- struct skb_mstamp now;
struct sk_buff *head;
int win_divisor;
@@ -1852,7 +1918,7 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
/* Avoid bursty behavior by allowing defer
* only if the last write was recent.
*/
- if ((s32)(tcp_time_stamp - tp->lsndtime) > 0)
+ if ((s32)(tcp_jiffies32 - tp->lsndtime) > 0)
goto send_now;
in_flight = tcp_packets_in_flight(tp);
@@ -1895,8 +1961,8 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
}
head = tcp_write_queue_head(sk);
- skb_mstamp_get(&now);
- age = skb_mstamp_us_delta(&now, &head->skb_mstamp);
+
+ age = tcp_stamp_us_delta(tp->tcp_mstamp, head->skb_mstamp);
/* If next ACK is likely to come too late (half srtt), do not defer */
if (age < (tp->srtt_us >> 4))
goto send_now;
@@ -1921,7 +1987,7 @@ static inline void tcp_mtu_check_reprobe(struct sock *sk)
s32 delta;
interval = net->ipv4.sysctl_tcp_probe_interval;
- delta = tcp_time_stamp - icsk->icsk_mtup.probe_timestamp;
+ delta = tcp_jiffies32 - icsk->icsk_mtup.probe_timestamp;
if (unlikely(delta >= interval * HZ)) {
int mss = tcp_current_mss(sk);
@@ -1933,7 +1999,7 @@ static inline void tcp_mtu_check_reprobe(struct sock *sk)
icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss);
/* Update probe time stamp */
- icsk->icsk_mtup.probe_timestamp = tcp_time_stamp;
+ icsk->icsk_mtup.probe_timestamp = tcp_jiffies32;
}
}
@@ -2086,6 +2152,12 @@ static int tcp_mtu_probe(struct sock *sk)
return -1;
}
+static bool tcp_pacing_check(const struct sock *sk)
+{
+ return tcp_needs_internal_pacing(sk) &&
+ hrtimer_active(&tcp_sk(sk)->pacing_timer);
+}
+
/* TCP Small Queues :
* Control number of packets in qdisc/devices to two packets / or ~1 ms.
* (These limits are doubled for retransmits)
@@ -2130,7 +2202,7 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
static void tcp_chrono_set(struct tcp_sock *tp, const enum tcp_chrono new)
{
- const u32 now = tcp_time_stamp;
+ const u32 now = tcp_jiffies32;
if (tp->chrono_type > TCP_CHRONO_UNSPEC)
tp->chrono_stat[tp->chrono_type - 1] += now - tp->chrono_start;
@@ -2207,15 +2279,19 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
}
max_segs = tcp_tso_segs(sk, mss_now);
+ tcp_mstamp_refresh(tp);
while ((skb = tcp_send_head(sk))) {
unsigned int limit;
+ if (tcp_pacing_check(sk))
+ break;
+
tso_segs = tcp_init_tso_segs(skb, mss_now);
BUG_ON(!tso_segs);
if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) {
/* "skb_mstamp" is used as a start point for the retransmit timer */
- skb_mstamp_get(&skb->skb_mstamp);
+ skb->skb_mstamp = tp->tcp_mstamp;
goto repair; /* Skip network transmission */
}
@@ -2342,10 +2418,10 @@ bool tcp_schedule_loss_probe(struct sock *sk)
timeout = max_t(u32, timeout, msecs_to_jiffies(10));
/* If RTO is shorter, just schedule TLP in its place. */
- tlp_time_stamp = tcp_time_stamp + timeout;
+ tlp_time_stamp = tcp_jiffies32 + timeout;
rto_time_stamp = (u32)inet_csk(sk)->icsk_timeout;
if ((s32)(tlp_time_stamp - rto_time_stamp) > 0) {
- s32 delta = rto_time_stamp - tcp_time_stamp;
+ s32 delta = rto_time_stamp - tcp_jiffies32;
if (delta > 0)
timeout = delta;
}
@@ -2803,7 +2879,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
skb_headroom(skb) >= 0xFFFF)) {
struct sk_buff *nskb;
- skb_mstamp_get(&skb->skb_mstamp);
+ skb->skb_mstamp = tp->tcp_mstamp;
nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC);
err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) :
-ENOBUFS;
@@ -2878,6 +2954,10 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
if (skb == tcp_send_head(sk))
break;
+
+ if (tcp_pacing_check(sk))
+ break;
+
/* we could do better than to assign each time */
if (!hole)
tp->retransmit_skb_hint = skb;
@@ -3015,7 +3095,7 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
skb_reserve(skb, MAX_TCP_HEADER);
tcp_init_nondata_skb(skb, tcp_acceptable_seq(sk),
TCPHDR_ACK | TCPHDR_RST);
- skb_mstamp_get(&skb->skb_mstamp);
+ tcp_mstamp_refresh(tcp_sk(sk));
/* Send it off. */
if (tcp_transmit_skb(sk, skb, 0, priority))
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED);
@@ -3111,10 +3191,10 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
memset(&opts, 0, sizeof(opts));
#ifdef CONFIG_SYN_COOKIES
if (unlikely(req->cookie_ts))
- skb->skb_mstamp.stamp_jiffies = cookie_init_timestamp(req);
+ skb->skb_mstamp = cookie_init_timestamp(req);
else
#endif
- skb_mstamp_get(&skb->skb_mstamp);
+ skb->skb_mstamp = tcp_clock_us();
#ifdef CONFIG_TCP_MD5SIG
rcu_read_lock();
@@ -3244,7 +3324,7 @@ static void tcp_connect_init(struct sock *sk)
if (likely(!tp->repair))
tp->rcv_nxt = 0;
else
- tp->rcv_tstamp = tcp_time_stamp;
+ tp->rcv_tstamp = tcp_jiffies32;
tp->rcv_wup = tp->rcv_nxt;
tp->copied_seq = tp->rcv_nxt;
@@ -3373,7 +3453,8 @@ int tcp_connect(struct sock *sk)
return -ENOBUFS;
tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN);
- tp->retrans_stamp = tcp_time_stamp;
+ tcp_mstamp_refresh(tp);
+ tp->retrans_stamp = tcp_time_stamp(tp);
tcp_connect_queue_skb(sk, buff);
tcp_ecn_send_syn(sk, buff);
@@ -3492,7 +3573,6 @@ void tcp_send_ack(struct sock *sk)
skb_set_tcp_pure_ack(buff);
/* Send it off, this clears delayed acks for us. */
- skb_mstamp_get(&buff->skb_mstamp);
tcp_transmit_skb(sk, buff, 0, (__force gfp_t)0);
}
EXPORT_SYMBOL_GPL(tcp_send_ack);
@@ -3526,15 +3606,16 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent, int mib)
* send it.
*/
tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK);
- skb_mstamp_get(&skb->skb_mstamp);
NET_INC_STATS(sock_net(sk), mib);
return tcp_transmit_skb(sk, skb, 0, (__force gfp_t)0);
}
+/* Called from setsockopt( ... TCP_REPAIR ) */
void tcp_send_window_probe(struct sock *sk)
{
if (sk->sk_state == TCP_ESTABLISHED) {
tcp_sk(sk)->snd_wl1 = tcp_sk(sk)->rcv_nxt - 1;
+ tcp_mstamp_refresh(tcp_sk(sk));
tcp_xmit_probe_skb(sk, 0, LINUX_MIB_TCPWINPROBE);
}
}
diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c
index c6a9fa894646..ad99569d4c1e 100644
--- a/net/ipv4/tcp_rate.c
+++ b/net/ipv4/tcp_rate.c
@@ -78,7 +78,7 @@ void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb,
struct tcp_sock *tp = tcp_sk(sk);
struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
- if (!scb->tx.delivered_mstamp.v64)
+ if (!scb->tx.delivered_mstamp)
return;
if (!rs->prior_delivered ||
@@ -89,9 +89,9 @@ void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb,
rs->is_retrans = scb->sacked & TCPCB_RETRANS;
/* Find the duration of the "send phase" of this window: */
- rs->interval_us = skb_mstamp_us_delta(
- &skb->skb_mstamp,
- &scb->tx.first_tx_mstamp);
+ rs->interval_us = tcp_stamp_us_delta(
+ skb->skb_mstamp,
+ scb->tx.first_tx_mstamp);
/* Record send time of most recently ACKed packet: */
tp->first_tx_mstamp = skb->skb_mstamp;
@@ -101,7 +101,7 @@ void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb,
* we don't need to reset since it'll be freed soon.
*/
if (scb->sacked & TCPCB_SACKED_ACKED)
- scb->tx.delivered_mstamp.v64 = 0;
+ scb->tx.delivered_mstamp = 0;
}
/* Update the connection delivery information and generate a rate sample. */
@@ -125,7 +125,7 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost,
rs->acked_sacked = delivered; /* freshly ACKed or SACKed */
rs->losses = lost; /* freshly marked lost */
/* Return an invalid sample if no timing information is available. */
- if (!rs->prior_mstamp.v64) {
+ if (!rs->prior_mstamp) {
rs->delivered = -1;
rs->interval_us = -1;
return;
@@ -138,8 +138,8 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost,
* longer phase.
*/
snd_us = rs->interval_us; /* send phase */
- ack_us = skb_mstamp_us_delta(&tp->tcp_mstamp,
- &rs->prior_mstamp); /* ack phase */
+ ack_us = tcp_stamp_us_delta(tp->tcp_mstamp,
+ rs->prior_mstamp); /* ack phase */
rs->interval_us = max(snd_us, ack_us);
/* Normally we expect interval_us >= min-rtt.
diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c
index 362b8c75bfab..fe9a493d0208 100644
--- a/net/ipv4/tcp_recovery.c
+++ b/net/ipv4/tcp_recovery.c
@@ -17,12 +17,9 @@ static void tcp_rack_mark_skb_lost(struct sock *sk, struct sk_buff *skb)
}
}
-static bool tcp_rack_sent_after(const struct skb_mstamp *t1,
- const struct skb_mstamp *t2,
- u32 seq1, u32 seq2)
+static bool tcp_rack_sent_after(u64 t1, u64 t2, u32 seq1, u32 seq2)
{
- return skb_mstamp_after(t1, t2) ||
- (t1->v64 == t2->v64 && after(seq1, seq2));
+ return t1 > t2 || (t1 == t2 && after(seq1, seq2));
}
/* RACK loss detection (IETF draft draft-ietf-tcpm-rack-01):
@@ -72,14 +69,14 @@ static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout)
scb->sacked & TCPCB_SACKED_ACKED)
continue;
- if (tcp_rack_sent_after(&tp->rack.mstamp, &skb->skb_mstamp,
+ if (tcp_rack_sent_after(tp->rack.mstamp, skb->skb_mstamp,
tp->rack.end_seq, scb->end_seq)) {
/* Step 3 in draft-cheng-tcpm-rack-00.txt:
* A packet is lost if its elapsed time is beyond
* the recent RTT plus the reordering window.
*/
- u32 elapsed = skb_mstamp_us_delta(&tp->tcp_mstamp,
- &skb->skb_mstamp);
+ u32 elapsed = tcp_stamp_us_delta(tp->tcp_mstamp,
+ skb->skb_mstamp);
s32 remaining = tp->rack.rtt_us + reo_wnd - elapsed;
if (remaining < 0) {
@@ -127,16 +124,16 @@ void tcp_rack_mark_lost(struct sock *sk)
* draft-cheng-tcpm-rack-00.txt
*/
void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq,
- const struct skb_mstamp *xmit_time)
+ u64 xmit_time)
{
u32 rtt_us;
- if (tp->rack.mstamp.v64 &&
- !tcp_rack_sent_after(xmit_time, &tp->rack.mstamp,
+ if (tp->rack.mstamp &&
+ !tcp_rack_sent_after(xmit_time, tp->rack.mstamp,
end_seq, tp->rack.end_seq))
return;
- rtt_us = skb_mstamp_us_delta(&tp->tcp_mstamp, xmit_time);
+ rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, xmit_time);
if (sacked & TCPCB_RETRANS) {
/* If the sacked packet was retransmitted, it's ambiguous
* whether the retransmission or the original (or the prior
@@ -152,7 +149,7 @@ void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq,
return;
}
tp->rack.rtt_us = rtt_us;
- tp->rack.mstamp = *xmit_time;
+ tp->rack.mstamp = xmit_time;
tp->rack.end_seq = end_seq;
tp->rack.advanced = 1;
}
@@ -166,7 +163,6 @@ void tcp_rack_reo_timeout(struct sock *sk)
u32 timeout, prior_inflight;
prior_inflight = tcp_packets_in_flight(tp);
- skb_mstamp_get(&tp->tcp_mstamp);
tcp_rack_detect_loss(sk, &timeout);
if (prior_inflight != tcp_packets_in_flight(tp)) {
if (inet_csk(sk)->icsk_ca_state != TCP_CA_Recovery) {
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 14672543cf0b..c4a35ba7f8ed 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -63,7 +63,7 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset)
/* If peer does not open window for long time, or did not transmit
* anything for long time, penalize it. */
- if ((s32)(tcp_time_stamp - tp->lsndtime) > 2*TCP_RTO_MAX || !do_reset)
+ if ((s32)(tcp_jiffies32 - tp->lsndtime) > 2*TCP_RTO_MAX || !do_reset)
shift++;
/* If some dubious ICMP arrived, penalize even more. */
@@ -73,7 +73,7 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset)
if (tcp_check_oom(sk, shift)) {
/* Catch exceptional cases, when connection requires reset.
* 1. Last segment was sent recently. */
- if ((s32)(tcp_time_stamp - tp->lsndtime) <= TCP_TIMEWAIT_LEN ||
+ if ((s32)(tcp_jiffies32 - tp->lsndtime) <= TCP_TIMEWAIT_LEN ||
/* 2. Window is closed. */
(!tp->snd_wnd && !tp->packets_out))
do_reset = true;
@@ -115,7 +115,7 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
if (net->ipv4.sysctl_tcp_mtu_probing) {
if (!icsk->icsk_mtup.enabled) {
icsk->icsk_mtup.enabled = 1;
- icsk->icsk_mtup.probe_timestamp = tcp_time_stamp;
+ icsk->icsk_mtup.probe_timestamp = tcp_jiffies32;
tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
} else {
struct net *net = sock_net(sk);
@@ -153,8 +153,8 @@ static bool retransmits_timed_out(struct sock *sk,
unsigned int timeout,
bool syn_set)
{
- unsigned int linear_backoff_thresh, start_ts;
unsigned int rto_base = syn_set ? TCP_TIMEOUT_INIT : TCP_RTO_MIN;
+ unsigned int linear_backoff_thresh, start_ts;
if (!inet_csk(sk)->icsk_retransmits)
return false;
@@ -172,7 +172,7 @@ static bool retransmits_timed_out(struct sock *sk,
timeout = ((2 << linear_backoff_thresh) - 1) * rto_base +
(boundary - linear_backoff_thresh) * TCP_RTO_MAX;
}
- return (tcp_time_stamp - start_ts) >= timeout;
+ return (tcp_time_stamp(tcp_sk(sk)) - start_ts) >= jiffies_to_msecs(timeout);
}
/* A write timeout has occurred. Process the after effects. */
@@ -339,9 +339,10 @@ static void tcp_probe_timer(struct sock *sk)
*/
start_ts = tcp_skb_timestamp(tcp_send_head(sk));
if (!start_ts)
- skb_mstamp_get(&tcp_send_head(sk)->skb_mstamp);
+ tcp_send_head(sk)->skb_mstamp = tp->tcp_mstamp;
else if (icsk->icsk_user_timeout &&
- (s32)(tcp_time_stamp - start_ts) > icsk->icsk_user_timeout)
+ (s32)(tcp_time_stamp(tp) - start_ts) >
+ jiffies_to_msecs(icsk->icsk_user_timeout))
goto abort;
max_probes = sock_net(sk)->ipv4.sysctl_tcp_retries2;
@@ -451,7 +452,7 @@ void tcp_retransmit_timer(struct sock *sk)
tp->snd_una, tp->snd_nxt);
}
#endif
- if (tcp_time_stamp - tp->rcv_tstamp > TCP_RTO_MAX) {
+ if (tcp_jiffies32 - tp->rcv_tstamp > TCP_RTO_MAX) {
tcp_write_err(sk);
goto out;
}
@@ -561,6 +562,7 @@ void tcp_write_timer_handler(struct sock *sk)
goto out;
}
+ tcp_mstamp_refresh(tcp_sk(sk));
event = icsk->icsk_pending;
switch (event) {
@@ -710,4 +712,7 @@ void tcp_init_xmit_timers(struct sock *sk)
{
inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer,
&tcp_keepalive_timer);
+ hrtimer_init(&tcp_sk(sk)->pacing_timer, CLOCK_MONOTONIC,
+ HRTIMER_MODE_ABS_PINNED);
+ tcp_sk(sk)->pacing_timer.function = tcp_pace_kick;
}
diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c
index 9775453b8d17..bec9cafbe3f9 100644
--- a/net/ipv4/tcp_westwood.c
+++ b/net/ipv4/tcp_westwood.c
@@ -68,7 +68,7 @@ static void tcp_westwood_init(struct sock *sk)
w->cumul_ack = 0;
w->reset_rtt_min = 1;
w->rtt_min = w->rtt = TCP_WESTWOOD_INIT_RTT;
- w->rtt_win_sx = tcp_time_stamp;
+ w->rtt_win_sx = tcp_jiffies32;
w->snd_una = tcp_sk(sk)->snd_una;
w->first_ack = 1;
}
@@ -116,7 +116,7 @@ static void tcp_westwood_pkts_acked(struct sock *sk,
static void westwood_update_window(struct sock *sk)
{
struct westwood *w = inet_csk_ca(sk);
- s32 delta = tcp_time_stamp - w->rtt_win_sx;
+ s32 delta = tcp_jiffies32 - w->rtt_win_sx;
/* Initialize w->snd_una with the first acked sequence number in order
* to fix mismatch between tp->snd_una and w->snd_una for the first
@@ -140,7 +140,7 @@ static void westwood_update_window(struct sock *sk)
westwood_filter(w, delta);
w->bk = 0;
- w->rtt_win_sx = tcp_time_stamp;
+ w->rtt_win_sx = tcp_jiffies32;
}
}
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 1d6219bf2d6b..fdcb7437cc15 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1164,22 +1164,32 @@ out:
}
/* fully reclaim rmem/fwd memory allocated for skb */
-static void udp_rmem_release(struct sock *sk, int size, int partial)
+static void udp_rmem_release(struct sock *sk, int size, int partial,
+ bool rx_queue_lock_held)
{
struct udp_sock *up = udp_sk(sk);
+ struct sk_buff_head *sk_queue;
int amt;
if (likely(partial)) {
up->forward_deficit += size;
size = up->forward_deficit;
if (size < (sk->sk_rcvbuf >> 2) &&
- !skb_queue_empty(&sk->sk_receive_queue))
+ !skb_queue_empty(&up->reader_queue))
return;
} else {
size += up->forward_deficit;
}
up->forward_deficit = 0;
+ /* acquire the sk_receive_queue for fwd allocated memory scheduling,
+ * if the called don't held it already
+ */
+ sk_queue = &sk->sk_receive_queue;
+ if (!rx_queue_lock_held)
+ spin_lock(&sk_queue->lock);
+
+
sk->sk_forward_alloc += size;
amt = (sk->sk_forward_alloc - partial) & ~(SK_MEM_QUANTUM - 1);
sk->sk_forward_alloc -= amt;
@@ -1188,19 +1198,31 @@ static void udp_rmem_release(struct sock *sk, int size, int partial)
__sk_mem_reduce_allocated(sk, amt >> SK_MEM_QUANTUM_SHIFT);
atomic_sub(size, &sk->sk_rmem_alloc);
+
+ /* this can save us from acquiring the rx queue lock on next receive */
+ skb_queue_splice_tail_init(sk_queue, &up->reader_queue);
+
+ if (!rx_queue_lock_held)
+ spin_unlock(&sk_queue->lock);
}
-/* Note: called with sk_receive_queue.lock held.
+/* Note: called with reader_queue.lock held.
* Instead of using skb->truesize here, find a copy of it in skb->dev_scratch
* This avoids a cache line miss while receive_queue lock is held.
* Look at __udp_enqueue_schedule_skb() to find where this copy is done.
*/
void udp_skb_destructor(struct sock *sk, struct sk_buff *skb)
{
- udp_rmem_release(sk, skb->dev_scratch, 1);
+ udp_rmem_release(sk, skb->dev_scratch, 1, false);
}
EXPORT_SYMBOL(udp_skb_destructor);
+/* as above, but the caller held the rx queue lock, too */
+static void udp_skb_dtor_locked(struct sock *sk, struct sk_buff *skb)
+{
+ udp_rmem_release(sk, skb->dev_scratch, 1, true);
+}
+
/* Idea of busylocks is to let producers grab an extra spinlock
* to relieve pressure on the receive_queue spinlock shared by consumer.
* Under flood, this means that only one producer can be in line
@@ -1306,14 +1328,16 @@ EXPORT_SYMBOL_GPL(__udp_enqueue_schedule_skb);
void udp_destruct_sock(struct sock *sk)
{
/* reclaim completely the forward allocated memory */
+ struct udp_sock *up = udp_sk(sk);
unsigned int total = 0;
struct sk_buff *skb;
- while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
+ skb_queue_splice_tail_init(&sk->sk_receive_queue, &up->reader_queue);
+ while ((skb = __skb_dequeue(&up->reader_queue)) != NULL) {
total += skb->truesize;
kfree_skb(skb);
}
- udp_rmem_release(sk, total, 0);
+ udp_rmem_release(sk, total, 0, true);
inet_sock_destruct(sk);
}
@@ -1321,6 +1345,7 @@ EXPORT_SYMBOL_GPL(udp_destruct_sock);
int udp_init_sock(struct sock *sk)
{
+ skb_queue_head_init(&udp_sk(sk)->reader_queue);
sk->sk_destruct = udp_destruct_sock;
return 0;
}
@@ -1338,6 +1363,26 @@ void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len)
}
EXPORT_SYMBOL_GPL(skb_consume_udp);
+static struct sk_buff *__first_packet_length(struct sock *sk,
+ struct sk_buff_head *rcvq,
+ int *total)
+{
+ struct sk_buff *skb;
+
+ while ((skb = skb_peek(rcvq)) != NULL &&
+ udp_lib_checksum_complete(skb)) {
+ __UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS,
+ IS_UDPLITE(sk));
+ __UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS,
+ IS_UDPLITE(sk));
+ atomic_inc(&sk->sk_drops);
+ __skb_unlink(skb, rcvq);
+ *total += skb->truesize;
+ kfree_skb(skb);
+ }
+ return skb;
+}
+
/**
* first_packet_length - return length of first packet in receive queue
* @sk: socket
@@ -1347,26 +1392,24 @@ EXPORT_SYMBOL_GPL(skb_consume_udp);
*/
static int first_packet_length(struct sock *sk)
{
- struct sk_buff_head *rcvq = &sk->sk_receive_queue;
+ struct sk_buff_head *rcvq = &udp_sk(sk)->reader_queue;
+ struct sk_buff_head *sk_queue = &sk->sk_receive_queue;
struct sk_buff *skb;
int total = 0;
int res;
spin_lock_bh(&rcvq->lock);
- while ((skb = skb_peek(rcvq)) != NULL &&
- udp_lib_checksum_complete(skb)) {
- __UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS,
- IS_UDPLITE(sk));
- __UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS,
- IS_UDPLITE(sk));
- atomic_inc(&sk->sk_drops);
- __skb_unlink(skb, rcvq);
- total += skb->truesize;
- kfree_skb(skb);
+ skb = __first_packet_length(sk, rcvq, &total);
+ if (!skb && !skb_queue_empty(sk_queue)) {
+ spin_lock(&sk_queue->lock);
+ skb_queue_splice_tail_init(sk_queue, rcvq);
+ spin_unlock(&sk_queue->lock);
+
+ skb = __first_packet_length(sk, rcvq, &total);
}
res = skb ? skb->len : -1;
if (total)
- udp_rmem_release(sk, total, 1);
+ udp_rmem_release(sk, total, 1, false);
spin_unlock_bh(&rcvq->lock);
return res;
}
@@ -1400,6 +1443,77 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
}
EXPORT_SYMBOL(udp_ioctl);
+struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags,
+ int noblock, int *peeked, int *off, int *err)
+{
+ struct sk_buff_head *sk_queue = &sk->sk_receive_queue;
+ struct sk_buff_head *queue;
+ struct sk_buff *last;
+ long timeo;
+ int error;
+
+ queue = &udp_sk(sk)->reader_queue;
+ flags |= noblock ? MSG_DONTWAIT : 0;
+ timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
+ do {
+ struct sk_buff *skb;
+
+ error = sock_error(sk);
+ if (error)
+ break;
+
+ error = -EAGAIN;
+ *peeked = 0;
+ do {
+ spin_lock_bh(&queue->lock);
+ skb = __skb_try_recv_from_queue(sk, queue, flags,
+ udp_skb_destructor,
+ peeked, off, err,
+ &last);
+ if (skb) {
+ spin_unlock_bh(&queue->lock);
+ return skb;
+ }
+
+ if (skb_queue_empty(sk_queue)) {
+ spin_unlock_bh(&queue->lock);
+ goto busy_check;
+ }
+
+ /* refill the reader queue and walk it again
+ * keep both queues locked to avoid re-acquiring
+ * the sk_receive_queue lock if fwd memory scheduling
+ * is needed.
+ */
+ spin_lock(&sk_queue->lock);
+ skb_queue_splice_tail_init(sk_queue, queue);
+
+ skb = __skb_try_recv_from_queue(sk, queue, flags,
+ udp_skb_dtor_locked,
+ peeked, off, err,
+ &last);
+ spin_unlock(&sk_queue->lock);
+ spin_unlock_bh(&queue->lock);
+ if (skb)
+ return skb;
+
+busy_check:
+ if (!sk_can_busy_loop(sk))
+ break;
+
+ sk_busy_loop(sk, flags & MSG_DONTWAIT);
+ } while (!skb_queue_empty(sk_queue));
+
+ /* sk_queue is empty, reader_queue may contain peeked packets */
+ } while (timeo &&
+ !__skb_wait_for_more_packets(sk, &error, &timeo,
+ (struct sk_buff *)sk_queue));
+
+ *err = error;
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(__skb_recv_udp);
+
/*
* This should be easy, if there is something there we
* return it, otherwise we block.
@@ -1490,7 +1604,8 @@ try_again:
return err;
csum_copy_err:
- if (!__sk_queue_drop_skb(sk, skb, flags, udp_skb_destructor)) {
+ if (!__sk_queue_drop_skb(sk, &udp_sk(sk)->reader_queue, skb, flags,
+ udp_skb_destructor)) {
UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite);
UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
}
@@ -2325,6 +2440,9 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait)
unsigned int mask = datagram_poll(file, sock, wait);
struct sock *sk = sock->sk;
+ if (!skb_queue_empty(&udp_sk(sk)->reader_queue))
+ mask |= POLLIN | POLLRDNORM;
+
sock_rps_record_flow(sk);
/* Check for false positives due to checksum errors */
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 6a4fb1e629fb..25443fd946a8 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2280,7 +2280,7 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev,
cfg.fc_flags |= RTF_NONEXTHOP;
#endif
- ip6_route_add(&cfg);
+ ip6_route_add(&cfg, NULL);
}
@@ -2335,7 +2335,7 @@ static void addrconf_add_mroute(struct net_device *dev)
ipv6_addr_set(&cfg.fc_dst, htonl(0xFF000000), 0, 0, 0);
- ip6_route_add(&cfg);
+ ip6_route_add(&cfg, NULL);
}
static struct inet6_dev *addrconf_add_dev(struct net_device *dev)
diff --git a/net/ipv6/fou6.c b/net/ipv6/fou6.c
index 9ea249b9451e..6de3c04b0f30 100644
--- a/net/ipv6/fou6.c
+++ b/net/ipv6/fou6.c
@@ -14,6 +14,8 @@
#include <net/udp.h>
#include <net/udp_tunnel.h>
+#if IS_ENABLED(CONFIG_IPV6_FOU_TUNNEL)
+
static void fou6_build_udp(struct sk_buff *skb, struct ip_tunnel_encap *e,
struct flowi6 *fl6, u8 *protocol, __be16 sport)
{
@@ -33,8 +35,8 @@ static void fou6_build_udp(struct sk_buff *skb, struct ip_tunnel_encap *e,
*protocol = IPPROTO_UDP;
}
-int fou6_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
- u8 *protocol, struct flowi6 *fl6)
+static int fou6_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
+ u8 *protocol, struct flowi6 *fl6)
{
__be16 sport;
int err;
@@ -49,10 +51,9 @@ int fou6_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
return 0;
}
-EXPORT_SYMBOL(fou6_build_header);
-int gue6_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
- u8 *protocol, struct flowi6 *fl6)
+static int gue6_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
+ u8 *protocol, struct flowi6 *fl6)
{
__be16 sport;
int err;
@@ -67,9 +68,6 @@ int gue6_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
return 0;
}
-EXPORT_SYMBOL(gue6_build_header);
-
-#if IS_ENABLED(CONFIG_IPV6_FOU_TUNNEL)
static const struct ip6_tnl_encap_ops fou_ip6tun_ops = {
.encap_hlen = fou_encap_hlen,
diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c
index b3df03e3faa0..f4a413aba423 100644
--- a/net/ipv6/ila/ila_lwt.c
+++ b/net/ipv6/ila/ila_lwt.c
@@ -91,7 +91,7 @@ static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb)
drop:
kfree_skb(skb);
- return -EINVAL;
+ return err;
}
static int ila_input(struct sk_buff *skb)
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index d4bf2c68a545..deea901746c8 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -473,7 +473,8 @@ out:
static struct fib6_node *fib6_add_1(struct fib6_node *root,
struct in6_addr *addr, int plen,
int offset, int allow_create,
- int replace_required, int sernum)
+ int replace_required, int sernum,
+ struct netlink_ext_ack *extack)
{
struct fib6_node *fn, *in, *ln;
struct fib6_node *pn = NULL;
@@ -497,6 +498,8 @@ static struct fib6_node *fib6_add_1(struct fib6_node *root,
!ipv6_prefix_equal(&key->addr, addr, fn->fn_bit)) {
if (!allow_create) {
if (replace_required) {
+ NL_SET_ERR_MSG(extack,
+ "Can not replace route - no match found");
pr_warn("Can't replace route, no match found\n");
return ERR_PTR(-ENOENT);
}
@@ -543,6 +546,8 @@ static struct fib6_node *fib6_add_1(struct fib6_node *root,
* That would keep IPv6 consistent with IPv4
*/
if (replace_required) {
+ NL_SET_ERR_MSG(extack,
+ "Can not replace route - no match found");
pr_warn("Can't replace route, no match found\n");
return ERR_PTR(-ENOENT);
}
@@ -964,7 +969,8 @@ void fib6_force_start_gc(struct net *net)
*/
int fib6_add(struct fib6_node *root, struct rt6_info *rt,
- struct nl_info *info, struct mx6_config *mxc)
+ struct nl_info *info, struct mx6_config *mxc,
+ struct netlink_ext_ack *extack)
{
struct fib6_node *fn, *pn = NULL;
int err = -ENOMEM;
@@ -987,7 +993,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
fn = fib6_add_1(root, &rt->rt6i_dst.addr, rt->rt6i_dst.plen,
offsetof(struct rt6_info, rt6i_dst), allow_create,
- replace_required, sernum);
+ replace_required, sernum, extack);
if (IS_ERR(fn)) {
err = PTR_ERR(fn);
fn = NULL;
@@ -1028,7 +1034,8 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
sn = fib6_add_1(sfn, &rt->rt6i_src.addr,
rt->rt6i_src.plen,
offsetof(struct rt6_info, rt6i_src),
- allow_create, replace_required, sernum);
+ allow_create, replace_required, sernum,
+ extack);
if (IS_ERR(sn)) {
/* If it is failed, discard just allocated
@@ -1047,7 +1054,8 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
sn = fib6_add_1(fn->subtree, &rt->rt6i_src.addr,
rt->rt6i_src.plen,
offsetof(struct rt6_info, rt6i_src),
- allow_create, replace_required, sernum);
+ allow_create, replace_required, sernum,
+ extack);
if (IS_ERR(sn)) {
err = PTR_ERR(sn);
diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c
index eedee5d108d9..f63b18e05c69 100644
--- a/net/ipv6/netfilter/nf_reject_ipv6.c
+++ b/net/ipv6/netfilter/nf_reject_ipv6.c
@@ -220,9 +220,6 @@ static bool reject6_csum_ok(struct sk_buff *skb, int hook)
__be16 fo;
u8 proto;
- if (skb->csum_bad)
- return false;
-
if (skb_csum_unnecessary(skb))
return true;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index dc61b0b5e64e..80bda31ffbbe 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -938,14 +938,15 @@ EXPORT_SYMBOL(rt6_lookup);
*/
static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
- struct mx6_config *mxc)
+ struct mx6_config *mxc,
+ struct netlink_ext_ack *extack)
{
int err;
struct fib6_table *table;
table = rt->rt6i_table;
write_lock_bh(&table->tb6_lock);
- err = fib6_add(&table->tb6_root, rt, info, mxc);
+ err = fib6_add(&table->tb6_root, rt, info, mxc, extack);
write_unlock_bh(&table->tb6_lock);
return err;
@@ -956,7 +957,7 @@ int ip6_ins_rt(struct rt6_info *rt)
struct nl_info info = { .nl_net = dev_net(rt->dst.dev), };
struct mx6_config mxc = { .mx = NULL, };
- return __ip6_ins_rt(rt, &info, &mxc);
+ return __ip6_ins_rt(rt, &info, &mxc, NULL);
}
static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
@@ -1844,7 +1845,8 @@ static struct rt6_info *ip6_nh_lookup_table(struct net *net,
return rt;
}
-static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
+static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
+ struct netlink_ext_ack *extack)
{
struct net *net = cfg->fc_nlinfo.nl_net;
struct rt6_info *rt = NULL;
@@ -1855,14 +1857,25 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
int err = -EINVAL;
/* RTF_PCPU is an internal flag; can not be set by userspace */
- if (cfg->fc_flags & RTF_PCPU)
+ if (cfg->fc_flags & RTF_PCPU) {
+ NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
goto out;
+ }
- if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
+ if (cfg->fc_dst_len > 128) {
+ NL_SET_ERR_MSG(extack, "Invalid prefix length");
+ goto out;
+ }
+ if (cfg->fc_src_len > 128) {
+ NL_SET_ERR_MSG(extack, "Invalid source address length");
goto out;
+ }
#ifndef CONFIG_IPV6_SUBTREES
- if (cfg->fc_src_len)
+ if (cfg->fc_src_len) {
+ NL_SET_ERR_MSG(extack,
+ "Specifying source address requires IPV6_SUBTREES to be enabled");
goto out;
+ }
#endif
if (cfg->fc_ifindex) {
err = -ENODEV;
@@ -2013,9 +2026,10 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
err = -EINVAL;
if (ipv6_chk_addr_and_flags(net, gw_addr,
gwa_type & IPV6_ADDR_LINKLOCAL ?
- dev : NULL, 0, 0))
+ dev : NULL, 0, 0)) {
+ NL_SET_ERR_MSG(extack, "Invalid gateway address");
goto out;
-
+ }
rt->rt6i_gateway = *gw_addr;
if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
@@ -2031,8 +2045,11 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
addressing
*/
if (!(gwa_type & (IPV6_ADDR_UNICAST |
- IPV6_ADDR_MAPPED)))
+ IPV6_ADDR_MAPPED))) {
+ NL_SET_ERR_MSG(extack,
+ "Invalid gateway address");
goto out;
+ }
if (cfg->fc_table) {
grt = ip6_nh_lookup_table(net, cfg, gw_addr);
@@ -2072,8 +2089,14 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
goto out;
}
err = -EINVAL;
- if (!dev || (dev->flags & IFF_LOOPBACK))
+ if (!dev) {
+ NL_SET_ERR_MSG(extack, "Egress device not specified");
goto out;
+ } else if (dev->flags & IFF_LOOPBACK) {
+ NL_SET_ERR_MSG(extack,
+ "Egress device can not be loopback device for this route");
+ goto out;
+ }
}
err = -ENODEV;
@@ -2082,6 +2105,7 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
+ NL_SET_ERR_MSG(extack, "Invalid source address");
err = -EINVAL;
goto out;
}
@@ -2111,13 +2135,14 @@ out:
return ERR_PTR(err);
}
-int ip6_route_add(struct fib6_config *cfg)
+int ip6_route_add(struct fib6_config *cfg,
+ struct netlink_ext_ack *extack)
{
struct mx6_config mxc = { .mx = NULL, };
struct rt6_info *rt;
int err;
- rt = ip6_route_info_create(cfg);
+ rt = ip6_route_info_create(cfg, extack);
if (IS_ERR(rt)) {
err = PTR_ERR(rt);
rt = NULL;
@@ -2128,7 +2153,7 @@ int ip6_route_add(struct fib6_config *cfg)
if (err)
goto out;
- err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
+ err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc, extack);
kfree(mxc.mx);
@@ -2222,7 +2247,8 @@ out_put:
return err;
}
-static int ip6_route_del(struct fib6_config *cfg)
+static int ip6_route_del(struct fib6_config *cfg,
+ struct netlink_ext_ack *extack)
{
struct fib6_table *table;
struct fib6_node *fn;
@@ -2230,8 +2256,10 @@ static int ip6_route_del(struct fib6_config *cfg)
int err = -ESRCH;
table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
- if (!table)
+ if (!table) {
+ NL_SET_ERR_MSG(extack, "FIB table does not exist");
return err;
+ }
read_lock_bh(&table->tb6_lock);
@@ -2483,7 +2511,7 @@ static struct rt6_info *rt6_add_route_info(struct net *net,
if (!prefixlen)
cfg.fc_flags |= RTF_DEFAULT;
- ip6_route_add(&cfg);
+ ip6_route_add(&cfg, NULL);
return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
}
@@ -2529,7 +2557,7 @@ struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
cfg.fc_gateway = *gwaddr;
- if (!ip6_route_add(&cfg)) {
+ if (!ip6_route_add(&cfg, NULL)) {
struct fib6_table *table;
table = fib6_get_table(dev_net(dev), cfg.fc_table);
@@ -2622,10 +2650,10 @@ int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
rtnl_lock();
switch (cmd) {
case SIOCADDRT:
- err = ip6_route_add(&cfg);
+ err = ip6_route_add(&cfg, NULL);
break;
case SIOCDELRT:
- err = ip6_route_del(&cfg);
+ err = ip6_route_del(&cfg, NULL);
break;
default:
err = -EINVAL;
@@ -2903,7 +2931,8 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
};
static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
- struct fib6_config *cfg)
+ struct fib6_config *cfg,
+ struct netlink_ext_ack *extack)
{
struct rtmsg *rtm;
struct nlattr *tb[RTA_MAX+1];
@@ -3097,7 +3126,8 @@ static void ip6_route_mpath_notify(struct rt6_info *rt,
inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
}
-static int ip6_route_multipath_add(struct fib6_config *cfg)
+static int ip6_route_multipath_add(struct fib6_config *cfg,
+ struct netlink_ext_ack *extack)
{
struct rt6_info *rt_notif = NULL, *rt_last = NULL;
struct nl_info *info = &cfg->fc_nlinfo;
@@ -3145,7 +3175,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg)
r_cfg.fc_encap_type = nla_get_u16(nla);
}
- rt = ip6_route_info_create(&r_cfg);
+ rt = ip6_route_info_create(&r_cfg, extack);
if (IS_ERR(rt)) {
err = PTR_ERR(rt);
rt = NULL;
@@ -3170,7 +3200,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg)
err_nh = NULL;
list_for_each_entry(nh, &rt6_nh_list, next) {
rt_last = nh->rt6_info;
- err = __ip6_ins_rt(nh->rt6_info, info, &nh->mxc);
+ err = __ip6_ins_rt(nh->rt6_info, info, &nh->mxc, extack);
/* save reference to first route for notification */
if (!rt_notif && !err)
rt_notif = nh->rt6_info;
@@ -3212,7 +3242,7 @@ add_errout:
list_for_each_entry(nh, &rt6_nh_list, next) {
if (err_nh == nh)
break;
- ip6_route_del(&nh->r_cfg);
+ ip6_route_del(&nh->r_cfg, extack);
}
cleanup:
@@ -3227,7 +3257,8 @@ cleanup:
return err;
}
-static int ip6_route_multipath_del(struct fib6_config *cfg)
+static int ip6_route_multipath_del(struct fib6_config *cfg,
+ struct netlink_ext_ack *extack)
{
struct fib6_config r_cfg;
struct rtnexthop *rtnh;
@@ -3254,7 +3285,7 @@ static int ip6_route_multipath_del(struct fib6_config *cfg)
r_cfg.fc_flags |= RTF_GATEWAY;
}
}
- err = ip6_route_del(&r_cfg);
+ err = ip6_route_del(&r_cfg, extack);
if (err)
last_err = err;
@@ -3270,15 +3301,15 @@ static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
struct fib6_config cfg;
int err;
- err = rtm_to_fib6_config(skb, nlh, &cfg);
+ err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
if (err < 0)
return err;
if (cfg.fc_mp)
- return ip6_route_multipath_del(&cfg);
+ return ip6_route_multipath_del(&cfg, extack);
else {
cfg.fc_delete_all_nh = 1;
- return ip6_route_del(&cfg);
+ return ip6_route_del(&cfg, extack);
}
}
@@ -3288,14 +3319,14 @@ static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
struct fib6_config cfg;
int err;
- err = rtm_to_fib6_config(skb, nlh, &cfg);
+ err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
if (err < 0)
return err;
if (cfg.fc_mp)
- return ip6_route_multipath_add(&cfg);
+ return ip6_route_multipath_add(&cfg, extack);
else
- return ip6_route_add(&cfg);
+ return ip6_route_add(&cfg, extack);
}
static size_t rt6_nlmsg_size(struct rt6_info *rt)
diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c
index 5f44ffed2576..15fba55e3da8 100644
--- a/net/ipv6/seg6.c
+++ b/net/ipv6/seg6.c
@@ -303,13 +303,9 @@ static int seg6_genl_dumphmac_done(struct netlink_callback *cb)
static int seg6_genl_dumphmac(struct sk_buff *skb, struct netlink_callback *cb)
{
struct rhashtable_iter *iter = (struct rhashtable_iter *)cb->args[0];
- struct net *net = sock_net(skb->sk);
- struct seg6_pernet_data *sdata;
struct seg6_hmac_info *hinfo;
int ret;
- sdata = seg6_pernet(net);
-
ret = rhashtable_walk_start(iter);
if (ret && ret != -EAGAIN)
goto done;
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 5abc3692b901..971823359f5b 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -211,7 +211,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
ireq->wscale_ok = tcp_opt.wscale_ok;
ireq->tstamp_ok = tcp_opt.saw_tstamp;
req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
- treq->snt_synack.v64 = 0;
+ treq->snt_synack = 0;
treq->rcv_isn = ntohl(th->seq) - 1;
treq->snt_isn = cookie;
treq->ts_off = 0;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 4f4310a36a04..233edfabe1db 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -949,7 +949,7 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
- tcp_time_stamp + tcptw->tw_ts_offset,
+ tcp_time_stamp_raw() + tcptw->tw_ts_offset,
tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel));
@@ -971,7 +971,7 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
tcp_rsk(req)->rcv_nxt,
req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
- tcp_time_stamp + tcp_rsk(req)->ts_off,
+ tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
req->ts_recent, sk->sk_bound_dev_if,
tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr),
0, 0);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 06ec39b79609..2e9b52bded2d 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -455,7 +455,8 @@ try_again:
return err;
csum_copy_err:
- if (!__sk_queue_drop_skb(sk, skb, flags, udp_skb_destructor)) {
+ if (!__sk_queue_drop_skb(sk, &udp_sk(sk)->reader_queue, skb, flags,
+ udp_skb_destructor)) {
if (is_udp4) {
UDP_INC_STATS(sock_net(sk),
UDP_MIB_CSUMERRORS, is_udplite);
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index deca20fb2ce2..da49191f7ad0 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -1985,7 +1985,7 @@ static int kcm_create(struct net *net, struct socket *sock,
return 0;
}
-static struct net_proto_family kcm_family_ops = {
+static const struct net_proto_family kcm_family_ops = {
.family = PF_KCM,
.create = kcm_create,
.owner = THIS_MODULE,
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
index a504e87c6ddf..49bd8bb16b18 100644
--- a/net/netfilter/nf_synproxy_core.c
+++ b/net/netfilter/nf_synproxy_core.c
@@ -152,7 +152,7 @@ void synproxy_init_timestamp_cookie(const struct xt_synproxy_info *info,
struct synproxy_options *opts)
{
opts->tsecr = opts->tsval;
- opts->tsval = tcp_time_stamp & ~0x3f;
+ opts->tsval = tcp_time_stamp_raw() & ~0x3f;
if (opts->options & XT_SYNPROXY_OPT_WSCALE) {
opts->tsval |= opts->wscale;
diff --git a/net/nfc/af_nfc.c b/net/nfc/af_nfc.c
index 54e40fa47822..d3e594eb36d0 100644
--- a/net/nfc/af_nfc.c
+++ b/net/nfc/af_nfc.c
@@ -48,7 +48,7 @@ static int nfc_sock_create(struct net *net, struct socket *sock, int proto,
return rc;
}
-static struct net_proto_family nfc_sock_family_ops = {
+static const struct net_proto_family nfc_sock_family_ops = {
.owner = THIS_MODULE,
.family = PF_NFC,
.create = nfc_sock_create,
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 7b17da9a94a0..9ddc9f8412a2 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -453,7 +453,7 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
/* Complete checksum if needed */
if (skb->ip_summed == CHECKSUM_PARTIAL &&
- (err = skb_checksum_help(skb)))
+ (err = skb_csum_hwoffload_help(skb, 0)))
goto out;
/* Older versions of OVS user space enforce alignment of the last
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index a90e8f355c00..0ecf2a858767 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -28,6 +28,31 @@
#include <net/act_api.h>
#include <net/netlink.h>
+static int tcf_action_goto_chain_init(struct tc_action *a, struct tcf_proto *tp)
+{
+ u32 chain_index = a->tcfa_action & TC_ACT_EXT_VAL_MASK;
+
+ if (!tp)
+ return -EINVAL;
+ a->goto_chain = tcf_chain_get(tp->chain->block, chain_index);
+ if (!a->goto_chain)
+ return -ENOMEM;
+ return 0;
+}
+
+static void tcf_action_goto_chain_fini(struct tc_action *a)
+{
+ tcf_chain_put(a->goto_chain);
+}
+
+static void tcf_action_goto_chain_exec(const struct tc_action *a,
+ struct tcf_result *res)
+{
+ const struct tcf_chain *chain = a->goto_chain;
+
+ res->goto_tp = rcu_dereference_bh(chain->filter_chain);
+}
+
static void free_tcf(struct rcu_head *head)
{
struct tc_action *p = container_of(head, struct tc_action, tcfa_rcu);
@@ -39,6 +64,8 @@ static void free_tcf(struct rcu_head *head)
kfree(p->act_cookie->data);
kfree(p->act_cookie);
}
+ if (p->goto_chain)
+ tcf_action_goto_chain_fini(p);
kfree(p);
}
@@ -465,6 +492,8 @@ repeat:
else /* faulty graph, stop pipeline */
return TC_ACT_OK;
}
+ } else if (TC_ACT_EXT_CMP(ret, TC_ACT_GOTO_CHAIN)) {
+ tcf_action_goto_chain_exec(a, res);
}
if (ret != TC_ACT_PIPE)
@@ -570,9 +599,9 @@ static struct tc_cookie *nla_memdup_cookie(struct nlattr **tb)
return c;
}
-struct tc_action *tcf_action_init_1(struct net *net, struct nlattr *nla,
- struct nlattr *est, char *name, int ovr,
- int bind)
+struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
+ struct nlattr *nla, struct nlattr *est,
+ char *name, int ovr, int bind)
{
struct tc_action *a;
struct tc_action_ops *a_o;
@@ -657,6 +686,17 @@ struct tc_action *tcf_action_init_1(struct net *net, struct nlattr *nla,
if (err != ACT_P_CREATED)
module_put(a_o->owner);
+ if (TC_ACT_EXT_CMP(a->tcfa_action, TC_ACT_GOTO_CHAIN)) {
+ err = tcf_action_goto_chain_init(a, tp);
+ if (err) {
+ LIST_HEAD(actions);
+
+ list_add_tail(&a->list, &actions);
+ tcf_action_destroy(&actions, bind);
+ return ERR_PTR(err);
+ }
+ }
+
return a;
err_mod:
@@ -680,8 +720,9 @@ static void cleanup_a(struct list_head *actions, int ovr)
a->tcfa_refcnt--;
}
-int tcf_action_init(struct net *net, struct nlattr *nla, struct nlattr *est,
- char *name, int ovr, int bind, struct list_head *actions)
+int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
+ struct nlattr *est, char *name, int ovr, int bind,
+ struct list_head *actions)
{
struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
struct tc_action *act;
@@ -693,7 +734,7 @@ int tcf_action_init(struct net *net, struct nlattr *nla, struct nlattr *est,
return err;
for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
- act = tcf_action_init_1(net, tb[i], est, name, ovr, bind);
+ act = tcf_action_init_1(net, tp, tb[i], est, name, ovr, bind);
if (IS_ERR(act)) {
err = PTR_ERR(act);
goto err;
@@ -1020,7 +1061,7 @@ static int tcf_action_add(struct net *net, struct nlattr *nla,
int ret = 0;
LIST_HEAD(actions);
- ret = tcf_action_init(net, nla, NULL, NULL, ovr, 0, &actions);
+ ret = tcf_action_init(net, NULL, nla, NULL, NULL, ovr, 0, &actions);
if (ret)
return ret;
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index ab6fdbd34db7..3317a2f579da 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -350,6 +350,7 @@ static int tcf_csum_sctp(struct sk_buff *skb, unsigned int ihl,
sctph->checksum = sctp_compute_cksum(skb,
skb_network_offset(skb) + ihl);
skb->ip_summed = CHECKSUM_NONE;
+ skb->csum_not_inet = 0;
return 1;
}
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 22f88b35a546..4020b8d932a1 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -106,13 +106,12 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb,
static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb,
struct nlmsghdr *n,
- struct tcf_proto __rcu **chain, int event)
+ struct tcf_chain *chain, int event)
{
- struct tcf_proto __rcu **it_chain;
struct tcf_proto *tp;
- for (it_chain = chain; (tp = rtnl_dereference(*it_chain)) != NULL;
- it_chain = &tp->next)
+ for (tp = rtnl_dereference(chain->filter_chain);
+ tp; tp = rtnl_dereference(tp->next))
tfilter_notify(net, oskb, n, tp, 0, event, false);
}
@@ -125,11 +124,12 @@ static inline u32 tcf_auto_prio(struct tcf_proto *tp)
if (tp)
first = tp->prio - 1;
- return first;
+ return TC_H_MAJ(first);
}
static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol,
- u32 prio, u32 parent, struct Qdisc *q)
+ u32 prio, u32 parent, struct Qdisc *q,
+ struct tcf_chain *chain)
{
struct tcf_proto *tp;
int err;
@@ -165,6 +165,7 @@ static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol,
tp->prio = prio;
tp->classid = parent;
tp->q = q;
+ tp->chain = chain;
err = tp->ops->init(tp);
if (err) {
@@ -185,16 +186,213 @@ static void tcf_proto_destroy(struct tcf_proto *tp)
kfree_rcu(tp, rcu);
}
-void tcf_destroy_chain(struct tcf_proto __rcu **fl)
+static struct tcf_chain *tcf_chain_create(struct tcf_block *block,
+ u32 chain_index)
+{
+ struct tcf_chain *chain;
+
+ chain = kzalloc(sizeof(*chain), GFP_KERNEL);
+ if (!chain)
+ return NULL;
+ list_add_tail(&chain->list, &block->chain_list);
+ chain->block = block;
+ chain->index = chain_index;
+ chain->refcnt = 1;
+ return chain;
+}
+
+static void tcf_chain_destroy(struct tcf_chain *chain)
{
struct tcf_proto *tp;
- while ((tp = rtnl_dereference(*fl)) != NULL) {
- RCU_INIT_POINTER(*fl, tp->next);
+ list_del(&chain->list);
+ while ((tp = rtnl_dereference(chain->filter_chain)) != NULL) {
+ RCU_INIT_POINTER(chain->filter_chain, tp->next);
tcf_proto_destroy(tp);
}
+ kfree(chain);
+}
+
+struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index)
+{
+ struct tcf_chain *chain;
+
+ list_for_each_entry(chain, &block->chain_list, list) {
+ if (chain->index == chain_index) {
+ chain->refcnt++;
+ return chain;
+ }
+ }
+ return tcf_chain_create(block, chain_index);
+}
+EXPORT_SYMBOL(tcf_chain_get);
+
+void tcf_chain_put(struct tcf_chain *chain)
+{
+ /* Destroy unused chain, with exception of chain 0, which is the
+ * default one and has to be always present.
+ */
+ if (--chain->refcnt == 0 && !chain->filter_chain && chain->index != 0)
+ tcf_chain_destroy(chain);
+}
+EXPORT_SYMBOL(tcf_chain_put);
+
+static void
+tcf_chain_filter_chain_ptr_set(struct tcf_chain *chain,
+ struct tcf_proto __rcu **p_filter_chain)
+{
+ chain->p_filter_chain = p_filter_chain;
+}
+
+int tcf_block_get(struct tcf_block **p_block,
+ struct tcf_proto __rcu **p_filter_chain)
+{
+ struct tcf_block *block = kzalloc(sizeof(*block), GFP_KERNEL);
+ struct tcf_chain *chain;
+ int err;
+
+ if (!block)
+ return -ENOMEM;
+ INIT_LIST_HEAD(&block->chain_list);
+ /* Create chain 0 by default, it has to be always present. */
+ chain = tcf_chain_create(block, 0);
+ if (!chain) {
+ err = -ENOMEM;
+ goto err_chain_create;
+ }
+ tcf_chain_filter_chain_ptr_set(chain, p_filter_chain);
+ *p_block = block;
+ return 0;
+
+err_chain_create:
+ kfree(block);
+ return err;
+}
+EXPORT_SYMBOL(tcf_block_get);
+
+void tcf_block_put(struct tcf_block *block)
+{
+ struct tcf_chain *chain, *tmp;
+
+ if (!block)
+ return;
+
+ list_for_each_entry_safe(chain, tmp, &block->chain_list, list)
+ tcf_chain_destroy(chain);
+ kfree(block);
+}
+EXPORT_SYMBOL(tcf_block_put);
+
+/* Main classifier routine: scans classifier chain attached
+ * to this qdisc, (optionally) tests for protocol and asks
+ * specific classifiers.
+ */
+int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
+ struct tcf_result *res, bool compat_mode)
+{
+ __be16 protocol = tc_skb_protocol(skb);
+#ifdef CONFIG_NET_CLS_ACT
+ const int max_reclassify_loop = 4;
+ const struct tcf_proto *old_tp = tp;
+ int limit = 0;
+
+reclassify:
+#endif
+ for (; tp; tp = rcu_dereference_bh(tp->next)) {
+ int err;
+
+ if (tp->protocol != protocol &&
+ tp->protocol != htons(ETH_P_ALL))
+ continue;
+
+ err = tp->classify(skb, tp, res);
+#ifdef CONFIG_NET_CLS_ACT
+ if (unlikely(err == TC_ACT_RECLASSIFY && !compat_mode)) {
+ goto reset;
+ } else if (unlikely(TC_ACT_EXT_CMP(err, TC_ACT_GOTO_CHAIN))) {
+ old_tp = res->goto_tp;
+ goto reset;
+ }
+#endif
+ if (err >= 0)
+ return err;
+ }
+
+ return TC_ACT_UNSPEC; /* signal: continue lookup */
+#ifdef CONFIG_NET_CLS_ACT
+reset:
+ if (unlikely(limit++ >= max_reclassify_loop)) {
+ net_notice_ratelimited("%s: reclassify loop, rule prio %u, protocol %02x\n",
+ tp->q->ops->id, tp->prio & 0xffff,
+ ntohs(tp->protocol));
+ return TC_ACT_SHOT;
+ }
+
+ tp = old_tp;
+ protocol = tc_skb_protocol(skb);
+ goto reclassify;
+#endif
+}
+EXPORT_SYMBOL(tcf_classify);
+
+struct tcf_chain_info {
+ struct tcf_proto __rcu **pprev;
+ struct tcf_proto __rcu *next;
+};
+
+static struct tcf_proto *tcf_chain_tp_prev(struct tcf_chain_info *chain_info)
+{
+ return rtnl_dereference(*chain_info->pprev);
+}
+
+static void tcf_chain_tp_insert(struct tcf_chain *chain,
+ struct tcf_chain_info *chain_info,
+ struct tcf_proto *tp)
+{
+ if (chain->p_filter_chain &&
+ *chain_info->pprev == chain->filter_chain)
+ *chain->p_filter_chain = tp;
+ RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain_info));
+ rcu_assign_pointer(*chain_info->pprev, tp);
+}
+
+static void tcf_chain_tp_remove(struct tcf_chain *chain,
+ struct tcf_chain_info *chain_info,
+ struct tcf_proto *tp)
+{
+ struct tcf_proto *next = rtnl_dereference(chain_info->next);
+
+ if (chain->p_filter_chain && tp == chain->filter_chain)
+ *chain->p_filter_chain = next;
+ RCU_INIT_POINTER(*chain_info->pprev, next);
+}
+
+static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
+ struct tcf_chain_info *chain_info,
+ u32 protocol, u32 prio,
+ bool prio_allocate)
+{
+ struct tcf_proto **pprev;
+ struct tcf_proto *tp;
+
+ /* Check the chain for existence of proto-tcf with this priority */
+ for (pprev = &chain->filter_chain;
+ (tp = rtnl_dereference(*pprev)); pprev = &tp->next) {
+ if (tp->prio >= prio) {
+ if (tp->prio == prio) {
+ if (prio_allocate ||
+ (tp->protocol != protocol && protocol))
+ return ERR_PTR(-EINVAL);
+ } else {
+ tp = NULL;
+ }
+ break;
+ }
+ }
+ chain_info->pprev = pprev;
+ chain_info->next = tp ? tp->next : NULL;
+ return tp;
}
-EXPORT_SYMBOL(tcf_destroy_chain);
/* Add/change/delete/get a filter node */
@@ -206,13 +404,14 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
struct tcmsg *t;
u32 protocol;
u32 prio;
- u32 nprio;
+ bool prio_allocate;
u32 parent;
+ u32 chain_index;
struct net_device *dev;
struct Qdisc *q;
- struct tcf_proto __rcu **back;
- struct tcf_proto __rcu **chain;
- struct tcf_proto *next;
+ struct tcf_chain_info chain_info;
+ struct tcf_chain *chain = NULL;
+ struct tcf_block *block;
struct tcf_proto *tp;
const struct Qdisc_class_ops *cops;
unsigned long cl;
@@ -234,7 +433,7 @@ replay:
t = nlmsg_data(n);
protocol = TC_H_MIN(t->tcm_info);
prio = TC_H_MAJ(t->tcm_info);
- nprio = prio;
+ prio_allocate = false;
parent = t->tcm_parent;
cl = 0;
@@ -250,6 +449,7 @@ replay:
*/
if (n->nlmsg_flags & NLM_F_CREATE) {
prio = TC_H_MAKE(0x80000000U, 0U);
+ prio_allocate = true;
break;
}
/* fall-through */
@@ -280,7 +480,7 @@ replay:
if (!cops)
return -EINVAL;
- if (cops->tcf_chain == NULL)
+ if (!cops->tcf_block)
return -EOPNOTSUPP;
/* Do we search for filter, attached to class? */
@@ -291,34 +491,35 @@ replay:
}
/* And the last stroke */
- chain = cops->tcf_chain(q, cl);
- if (chain == NULL) {
+ block = cops->tcf_block(q, cl);
+ if (!block) {
+ err = -EINVAL;
+ goto errout;
+ }
+
+ chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
+ if (chain_index > TC_ACT_EXT_VAL_MASK) {
err = -EINVAL;
goto errout;
}
+ chain = tcf_chain_get(block, chain_index);
+ if (!chain) {
+ err = -ENOMEM;
+ goto errout;
+ }
+
if (n->nlmsg_type == RTM_DELTFILTER && prio == 0) {
tfilter_notify_chain(net, skb, n, chain, RTM_DELTFILTER);
- tcf_destroy_chain(chain);
+ tcf_chain_destroy(chain);
err = 0;
goto errout;
}
- /* Check the chain for existence of proto-tcf with this priority */
- for (back = chain;
- (tp = rtnl_dereference(*back)) != NULL;
- back = &tp->next) {
- if (tp->prio >= prio) {
- if (tp->prio == prio) {
- if (!nprio ||
- (tp->protocol != protocol && protocol)) {
- err = -EINVAL;
- goto errout;
- }
- } else {
- tp = NULL;
- }
- break;
- }
+ tp = tcf_chain_tp_find(chain, &chain_info, protocol,
+ prio, prio_allocate);
+ if (IS_ERR(tp)) {
+ err = PTR_ERR(tp);
+ goto errout;
}
if (tp == NULL) {
@@ -335,11 +536,11 @@ replay:
goto errout;
}
- if (!nprio)
- nprio = TC_H_MAJ(tcf_auto_prio(rtnl_dereference(*back)));
+ if (prio_allocate)
+ prio = tcf_auto_prio(tcf_chain_tp_prev(&chain_info));
tp = tcf_proto_create(nla_data(tca[TCA_KIND]),
- protocol, nprio, parent, q);
+ protocol, prio, parent, q, chain);
if (IS_ERR(tp)) {
err = PTR_ERR(tp);
goto errout;
@@ -354,8 +555,7 @@ replay:
if (fh == 0) {
if (n->nlmsg_type == RTM_DELTFILTER && t->tcm_handle == 0) {
- next = rtnl_dereference(tp->next);
- RCU_INIT_POINTER(*back, next);
+ tcf_chain_tp_remove(chain, &chain_info, tp);
tfilter_notify(net, skb, n, tp, fh,
RTM_DELTFILTER, false);
tcf_proto_destroy(tp);
@@ -384,11 +584,10 @@ replay:
err = tp->ops->delete(tp, fh, &last);
if (err)
goto errout;
- next = rtnl_dereference(tp->next);
tfilter_notify(net, skb, n, tp, t->tcm_handle,
RTM_DELTFILTER, false);
if (last) {
- RCU_INIT_POINTER(*back, next);
+ tcf_chain_tp_remove(chain, &chain_info, tp);
tcf_proto_destroy(tp);
}
goto errout;
@@ -405,10 +604,8 @@ replay:
err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh,
n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE);
if (err == 0) {
- if (tp_created) {
- RCU_INIT_POINTER(tp->next, rtnl_dereference(*back));
- rcu_assign_pointer(*back, tp);
- }
+ if (tp_created)
+ tcf_chain_tp_insert(chain, &chain_info, tp);
tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER, false);
} else {
if (tp_created)
@@ -416,6 +613,8 @@ replay:
}
errout:
+ if (chain)
+ tcf_chain_put(chain);
if (cl)
cops->put(q, cl);
if (err == -EAGAIN)
@@ -444,6 +643,8 @@ static int tcf_fill_node(struct net *net, struct sk_buff *skb,
tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol);
if (nla_put_string(skb, TCA_KIND, tp->ops->kind))
goto nla_put_failure;
+ if (nla_put_u32(skb, TCA_CHAIN, tp->chain->index))
+ goto nla_put_failure;
tcm->tcm_handle = fh;
if (RTM_DELTFILTER != event) {
tcm->tcm_handle = 0;
@@ -500,22 +701,76 @@ static int tcf_node_dump(struct tcf_proto *tp, unsigned long n,
RTM_NEWTFILTER);
}
+static bool tcf_chain_dump(struct tcf_chain *chain, struct sk_buff *skb,
+ struct netlink_callback *cb,
+ long index_start, long *p_index)
+{
+ struct net *net = sock_net(skb->sk);
+ struct tcmsg *tcm = nlmsg_data(cb->nlh);
+ struct tcf_dump_args arg;
+ struct tcf_proto *tp;
+
+ for (tp = rtnl_dereference(chain->filter_chain);
+ tp; tp = rtnl_dereference(tp->next), (*p_index)++) {
+ if (*p_index < index_start)
+ continue;
+ if (TC_H_MAJ(tcm->tcm_info) &&
+ TC_H_MAJ(tcm->tcm_info) != tp->prio)
+ continue;
+ if (TC_H_MIN(tcm->tcm_info) &&
+ TC_H_MIN(tcm->tcm_info) != tp->protocol)
+ continue;
+ if (*p_index > index_start)
+ memset(&cb->args[1], 0,
+ sizeof(cb->args) - sizeof(cb->args[0]));
+ if (cb->args[1] == 0) {
+ if (tcf_fill_node(net, skb, tp, 0,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, NLM_F_MULTI,
+ RTM_NEWTFILTER) <= 0)
+ return false;
+
+ cb->args[1] = 1;
+ }
+ if (!tp->ops->walk)
+ continue;
+ arg.w.fn = tcf_node_dump;
+ arg.skb = skb;
+ arg.cb = cb;
+ arg.w.stop = 0;
+ arg.w.skip = cb->args[1] - 1;
+ arg.w.count = 0;
+ tp->ops->walk(tp, &arg.w);
+ cb->args[1] = arg.w.count + 1;
+ if (arg.w.stop)
+ return false;
+ }
+ return true;
+}
+
/* called with RTNL */
static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
{
struct net *net = sock_net(skb->sk);
- int t;
- int s_t;
+ struct nlattr *tca[TCA_MAX + 1];
struct net_device *dev;
struct Qdisc *q;
- struct tcf_proto *tp, __rcu **chain;
+ struct tcf_block *block;
+ struct tcf_chain *chain;
struct tcmsg *tcm = nlmsg_data(cb->nlh);
unsigned long cl = 0;
const struct Qdisc_class_ops *cops;
- struct tcf_dump_args arg;
+ long index_start;
+ long index;
+ int err;
if (nlmsg_len(cb->nlh) < sizeof(*tcm))
return skb->len;
+
+ err = nlmsg_parse(cb->nlh, sizeof(*tcm), tca, TCA_MAX, NULL, NULL);
+ if (err)
+ return err;
+
dev = __dev_get_by_index(net, tcm->tcm_ifindex);
if (!dev)
return skb->len;
@@ -529,56 +784,29 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
cops = q->ops->cl_ops;
if (!cops)
goto errout;
- if (cops->tcf_chain == NULL)
+ if (!cops->tcf_block)
goto errout;
if (TC_H_MIN(tcm->tcm_parent)) {
cl = cops->get(q, tcm->tcm_parent);
if (cl == 0)
goto errout;
}
- chain = cops->tcf_chain(q, cl);
- if (chain == NULL)
+ block = cops->tcf_block(q, cl);
+ if (!block)
goto errout;
- s_t = cb->args[0];
+ index_start = cb->args[0];
+ index = 0;
- for (tp = rtnl_dereference(*chain), t = 0;
- tp; tp = rtnl_dereference(tp->next), t++) {
- if (t < s_t)
- continue;
- if (TC_H_MAJ(tcm->tcm_info) &&
- TC_H_MAJ(tcm->tcm_info) != tp->prio)
- continue;
- if (TC_H_MIN(tcm->tcm_info) &&
- TC_H_MIN(tcm->tcm_info) != tp->protocol)
+ list_for_each_entry(chain, &block->chain_list, list) {
+ if (tca[TCA_CHAIN] &&
+ nla_get_u32(tca[TCA_CHAIN]) != chain->index)
continue;
- if (t > s_t)
- memset(&cb->args[1], 0,
- sizeof(cb->args)-sizeof(cb->args[0]));
- if (cb->args[1] == 0) {
- if (tcf_fill_node(net, skb, tp, 0,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, NLM_F_MULTI,
- RTM_NEWTFILTER) <= 0)
- break;
-
- cb->args[1] = 1;
- }
- if (tp->ops->walk == NULL)
- continue;
- arg.w.fn = tcf_node_dump;
- arg.skb = skb;
- arg.cb = cb;
- arg.w.stop = 0;
- arg.w.skip = cb->args[1] - 1;
- arg.w.count = 0;
- tp->ops->walk(tp, &arg.w);
- cb->args[1] = arg.w.count + 1;
- if (arg.w.stop)
+ if (!tcf_chain_dump(chain, skb, cb, index_start, &index))
break;
}
- cb->args[0] = t;
+ cb->args[0] = index;
errout:
if (cl)
@@ -608,8 +836,9 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
struct tc_action *act;
if (exts->police && tb[exts->police]) {
- act = tcf_action_init_1(net, tb[exts->police], rate_tlv,
- "police", ovr, TCA_ACT_BIND);
+ act = tcf_action_init_1(net, tp, tb[exts->police],
+ rate_tlv, "police", ovr,
+ TCA_ACT_BIND);
if (IS_ERR(act))
return PTR_ERR(act);
@@ -620,8 +849,8 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
LIST_HEAD(actions);
int err, i = 0;
- err = tcf_action_init(net, tb[exts->action], rate_tlv,
- NULL, ovr, TCA_ACT_BIND,
+ err = tcf_action_init(net, tp, tb[exts->action],
+ rate_tlv, NULL, ovr, TCA_ACT_BIND,
&actions);
if (err)
return err;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index e88342fde1bc..5d95401bbc02 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -163,7 +163,7 @@ int register_qdisc(struct Qdisc_ops *qops)
if (!(cops->get && cops->put && cops->walk && cops->leaf))
goto out_einval;
- if (cops->tcf_chain && !(cops->bind_tcf && cops->unbind_tcf))
+ if (cops->tcf_block && !(cops->bind_tcf && cops->unbind_tcf))
goto out_einval;
}
@@ -1878,54 +1878,6 @@ done:
return skb->len;
}
-/* Main classifier routine: scans classifier chain attached
- * to this qdisc, (optionally) tests for protocol and asks
- * specific classifiers.
- */
-int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp,
- struct tcf_result *res, bool compat_mode)
-{
- __be16 protocol = tc_skb_protocol(skb);
-#ifdef CONFIG_NET_CLS_ACT
- const int max_reclassify_loop = 4;
- const struct tcf_proto *old_tp = tp;
- int limit = 0;
-
-reclassify:
-#endif
- for (; tp; tp = rcu_dereference_bh(tp->next)) {
- int err;
-
- if (tp->protocol != protocol &&
- tp->protocol != htons(ETH_P_ALL))
- continue;
-
- err = tp->classify(skb, tp, res);
-#ifdef CONFIG_NET_CLS_ACT
- if (unlikely(err == TC_ACT_RECLASSIFY && !compat_mode))
- goto reset;
-#endif
- if (err >= 0)
- return err;
- }
-
- return TC_ACT_UNSPEC; /* signal: continue lookup */
-#ifdef CONFIG_NET_CLS_ACT
-reset:
- if (unlikely(limit++ >= max_reclassify_loop)) {
- net_notice_ratelimited("%s: reclassify loop, rule prio %u, protocol %02x\n",
- tp->q->ops->id, tp->prio & 0xffff,
- ntohs(tp->protocol));
- return TC_ACT_SHOT;
- }
-
- tp = old_tp;
- protocol = tc_skb_protocol(skb);
- goto reclassify;
-#endif
-}
-EXPORT_SYMBOL(tc_classify);
-
#ifdef CONFIG_PROC_FS
static int psched_show(struct seq_file *seq, void *v)
{
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 40cbceed4de8..f435546c3864 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -43,6 +43,7 @@
struct atm_flow_data {
struct Qdisc *q; /* FIFO, TBF, etc. */
struct tcf_proto __rcu *filter_list;
+ struct tcf_block *block;
struct atm_vcc *vcc; /* VCC; NULL if VCC is closed */
void (*old_pop)(struct atm_vcc *vcc,
struct sk_buff *skb); /* chaining */
@@ -143,7 +144,7 @@ static void atm_tc_put(struct Qdisc *sch, unsigned long cl)
list_del_init(&flow->list);
pr_debug("atm_tc_put: qdisc %p\n", flow->q);
qdisc_destroy(flow->q);
- tcf_destroy_chain(&flow->filter_list);
+ tcf_block_put(flow->block);
if (flow->sock) {
pr_debug("atm_tc_put: f_count %ld\n",
file_count(flow->sock->file));
@@ -274,7 +275,13 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
error = -ENOBUFS;
goto err_out;
}
- RCU_INIT_POINTER(flow->filter_list, NULL);
+
+ error = tcf_block_get(&flow->block, &flow->filter_list);
+ if (error) {
+ kfree(flow);
+ goto err_out;
+ }
+
flow->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid);
if (!flow->q)
flow->q = &noop_qdisc;
@@ -346,14 +353,13 @@ static void atm_tc_walk(struct Qdisc *sch, struct qdisc_walker *walker)
}
}
-static struct tcf_proto __rcu **atm_tc_find_tcf(struct Qdisc *sch,
- unsigned long cl)
+static struct tcf_block *atm_tc_tcf_block(struct Qdisc *sch, unsigned long cl)
{
struct atm_qdisc_data *p = qdisc_priv(sch);
struct atm_flow_data *flow = (struct atm_flow_data *)cl;
pr_debug("atm_tc_find_tcf(sch %p,[qdisc %p],flow %p)\n", sch, p, flow);
- return flow ? &flow->filter_list : &p->link.filter_list;
+ return flow ? flow->block : p->link.block;
}
/* --------------------------- Qdisc operations ---------------------------- */
@@ -377,7 +383,7 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
list_for_each_entry(flow, &p->flows, list) {
fl = rcu_dereference_bh(flow->filter_list);
if (fl) {
- result = tc_classify(skb, fl, &res, true);
+ result = tcf_classify(skb, fl, &res, true);
if (result < 0)
continue;
flow = (struct atm_flow_data *)res.class;
@@ -524,6 +530,7 @@ static struct sk_buff *atm_tc_peek(struct Qdisc *sch)
static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt)
{
struct atm_qdisc_data *p = qdisc_priv(sch);
+ int err;
pr_debug("atm_tc_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt);
INIT_LIST_HEAD(&p->flows);
@@ -534,7 +541,11 @@ static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt)
if (!p->link.q)
p->link.q = &noop_qdisc;
pr_debug("atm_tc_init: link (%p) qdisc %p\n", &p->link, p->link.q);
- RCU_INIT_POINTER(p->link.filter_list, NULL);
+
+ err = tcf_block_get(&p->link.block, &p->link.filter_list);
+ if (err)
+ return err;
+
p->link.vcc = NULL;
p->link.sock = NULL;
p->link.classid = sch->handle;
@@ -561,7 +572,7 @@ static void atm_tc_destroy(struct Qdisc *sch)
pr_debug("atm_tc_destroy(sch %p,[qdisc %p])\n", sch, p);
list_for_each_entry(flow, &p->flows, list)
- tcf_destroy_chain(&flow->filter_list);
+ tcf_block_put(flow->block);
list_for_each_entry_safe(flow, tmp, &p->flows, list) {
if (flow->ref > 1)
@@ -646,7 +657,7 @@ static const struct Qdisc_class_ops atm_class_ops = {
.change = atm_tc_change,
.delete = atm_tc_delete,
.walk = atm_tc_walk,
- .tcf_chain = atm_tc_find_tcf,
+ .tcf_block = atm_tc_tcf_block,
.bind_tcf = atm_tc_bind_filter,
.unbind_tcf = atm_tc_put,
.dump = atm_tc_dump_class,
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 7415859fd4c3..8dd6d0aca678 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -127,6 +127,7 @@ struct cbq_class {
struct tc_cbq_xstats xstats;
struct tcf_proto __rcu *filter_list;
+ struct tcf_block *block;
int refcnt;
int filters;
@@ -233,7 +234,7 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
/*
* Step 2+n. Apply classifier.
*/
- result = tc_classify(skb, fl, &res, true);
+ result = tcf_classify(skb, fl, &res, true);
if (!fl || result < 0)
goto fallback;
@@ -1405,7 +1406,7 @@ static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
WARN_ON(cl->filters);
- tcf_destroy_chain(&cl->filter_list);
+ tcf_block_put(cl->block);
qdisc_destroy(cl->q);
qdisc_put_rtab(cl->R_tab);
gen_kill_estimator(&cl->rate_est);
@@ -1430,7 +1431,7 @@ static void cbq_destroy(struct Qdisc *sch)
*/
for (h = 0; h < q->clhash.hashsize; h++) {
hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode)
- tcf_destroy_chain(&cl->filter_list);
+ tcf_block_put(cl->block);
}
for (h = 0; h < q->clhash.hashsize; h++) {
hlist_for_each_entry_safe(cl, next, &q->clhash.hash[h],
@@ -1585,12 +1586,19 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
if (cl == NULL)
goto failure;
+ err = tcf_block_get(&cl->block, &cl->filter_list);
+ if (err) {
+ kfree(cl);
+ return err;
+ }
+
if (tca[TCA_RATE]) {
err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est,
NULL,
qdisc_root_sleeping_running(sch),
tca[TCA_RATE]);
if (err) {
+ tcf_block_put(cl->block);
kfree(cl);
goto failure;
}
@@ -1688,8 +1696,7 @@ static int cbq_delete(struct Qdisc *sch, unsigned long arg)
return 0;
}
-static struct tcf_proto __rcu **cbq_find_tcf(struct Qdisc *sch,
- unsigned long arg)
+static struct tcf_block *cbq_tcf_block(struct Qdisc *sch, unsigned long arg)
{
struct cbq_sched_data *q = qdisc_priv(sch);
struct cbq_class *cl = (struct cbq_class *)arg;
@@ -1697,7 +1704,7 @@ static struct tcf_proto __rcu **cbq_find_tcf(struct Qdisc *sch,
if (cl == NULL)
cl = &q->link;
- return &cl->filter_list;
+ return cl->block;
}
static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
@@ -1756,7 +1763,7 @@ static const struct Qdisc_class_ops cbq_class_ops = {
.change = cbq_change_class,
.delete = cbq_delete,
.walk = cbq_walk,
- .tcf_chain = cbq_find_tcf,
+ .tcf_block = cbq_tcf_block,
.bind_tcf = cbq_bind_filter,
.unbind_tcf = cbq_unbind_filter,
.dump = cbq_dump_class,
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index 58a8c32eab23..5db2a2843c66 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -36,6 +36,7 @@ struct drr_class {
struct drr_sched {
struct list_head active;
struct tcf_proto __rcu *filter_list;
+ struct tcf_block *block;
struct Qdisc_class_hash clhash;
};
@@ -190,15 +191,14 @@ static void drr_put_class(struct Qdisc *sch, unsigned long arg)
drr_destroy_class(sch, cl);
}
-static struct tcf_proto __rcu **drr_tcf_chain(struct Qdisc *sch,
- unsigned long cl)
+static struct tcf_block *drr_tcf_block(struct Qdisc *sch, unsigned long cl)
{
struct drr_sched *q = qdisc_priv(sch);
if (cl)
return NULL;
- return &q->filter_list;
+ return q->block;
}
static unsigned long drr_bind_tcf(struct Qdisc *sch, unsigned long parent,
@@ -333,7 +333,7 @@ static struct drr_class *drr_classify(struct sk_buff *skb, struct Qdisc *sch,
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
fl = rcu_dereference_bh(q->filter_list);
- result = tc_classify(skb, fl, &res, false);
+ result = tcf_classify(skb, fl, &res, false);
if (result >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
@@ -431,6 +431,9 @@ static int drr_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
struct drr_sched *q = qdisc_priv(sch);
int err;
+ err = tcf_block_get(&q->block, &q->filter_list);
+ if (err)
+ return err;
err = qdisc_class_hash_init(&q->clhash);
if (err < 0)
return err;
@@ -462,7 +465,7 @@ static void drr_destroy_qdisc(struct Qdisc *sch)
struct hlist_node *next;
unsigned int i;
- tcf_destroy_chain(&q->filter_list);
+ tcf_block_put(q->block);
for (i = 0; i < q->clhash.hashsize; i++) {
hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
@@ -477,7 +480,7 @@ static const struct Qdisc_class_ops drr_class_ops = {
.delete = drr_delete_class,
.get = drr_get_class,
.put = drr_put_class,
- .tcf_chain = drr_tcf_chain,
+ .tcf_block = drr_tcf_block,
.bind_tcf = drr_bind_tcf,
.unbind_tcf = drr_unbind_tcf,
.graft = drr_graft_class,
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 1c0f877f673a..7ccdd825d34e 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -44,6 +44,7 @@ struct mask_value {
struct dsmark_qdisc_data {
struct Qdisc *q;
struct tcf_proto __rcu *filter_list;
+ struct tcf_block *block;
struct mask_value *mv;
u16 indices;
u8 set_tc_index;
@@ -183,11 +184,11 @@ ignore:
}
}
-static inline struct tcf_proto __rcu **dsmark_find_tcf(struct Qdisc *sch,
- unsigned long cl)
+static struct tcf_block *dsmark_tcf_block(struct Qdisc *sch, unsigned long cl)
{
struct dsmark_qdisc_data *p = qdisc_priv(sch);
- return &p->filter_list;
+
+ return p->block;
}
/* --------------------------- Qdisc operations ---------------------------- */
@@ -234,7 +235,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch,
else {
struct tcf_result res;
struct tcf_proto *fl = rcu_dereference_bh(p->filter_list);
- int result = tc_classify(skb, fl, &res, false);
+ int result = tcf_classify(skb, fl, &res, false);
pr_debug("result %d class 0x%04x\n", result, res.classid);
@@ -342,6 +343,10 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt)
if (!opt)
goto errout;
+ err = tcf_block_get(&p->block, &p->filter_list);
+ if (err)
+ return err;
+
err = nla_parse_nested(tb, TCA_DSMARK_MAX, opt, dsmark_policy, NULL);
if (err < 0)
goto errout;
@@ -400,7 +405,7 @@ static void dsmark_destroy(struct Qdisc *sch)
pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p);
- tcf_destroy_chain(&p->filter_list);
+ tcf_block_put(p->block);
qdisc_destroy(p->q);
if (p->mv != p->embedded)
kfree(p->mv);
@@ -468,7 +473,7 @@ static const struct Qdisc_class_ops dsmark_class_ops = {
.change = dsmark_change,
.delete = dsmark_delete,
.walk = dsmark_walk,
- .tcf_chain = dsmark_find_tcf,
+ .tcf_block = dsmark_tcf_block,
.bind_tcf = dsmark_bind_filter,
.unbind_tcf = dsmark_put,
.dump = dsmark_dump_class,
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index b488721a0059..147fde73a0f5 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -390,9 +390,17 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
q->stat_tcp_retrans++;
qdisc_qstats_backlog_inc(sch, skb);
if (fq_flow_is_detached(f)) {
+ struct sock *sk = skb->sk;
+
fq_flow_add_tail(&q->new_flows, f);
if (time_after(jiffies, f->age + q->flow_refill_delay))
f->credit = max_t(u32, f->credit, q->quantum);
+ if (sk && q->rate_enable) {
+ if (unlikely(smp_load_acquire(&sk->sk_pacing_status) !=
+ SK_PACING_FQ))
+ smp_store_release(&sk->sk_pacing_status,
+ SK_PACING_FQ);
+ }
q->inactive_flows--;
}
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index 9201abce928c..f201e73947fb 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -55,6 +55,7 @@ struct fq_codel_flow {
struct fq_codel_sched_data {
struct tcf_proto __rcu *filter_list; /* optional external classifier */
+ struct tcf_block *block;
struct fq_codel_flow *flows; /* Flows table [flows_cnt] */
u32 *backlogs; /* backlog table [flows_cnt] */
u32 flows_cnt; /* number of flows */
@@ -96,7 +97,7 @@ static unsigned int fq_codel_classify(struct sk_buff *skb, struct Qdisc *sch,
return fq_codel_hash(q, skb) + 1;
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
- result = tc_classify(skb, filter, &res, false);
+ result = tcf_classify(skb, filter, &res, false);
if (result >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
@@ -450,7 +451,7 @@ static void fq_codel_destroy(struct Qdisc *sch)
{
struct fq_codel_sched_data *q = qdisc_priv(sch);
- tcf_destroy_chain(&q->filter_list);
+ tcf_block_put(q->block);
kvfree(q->backlogs);
kvfree(q->flows);
}
@@ -459,6 +460,7 @@ static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt)
{
struct fq_codel_sched_data *q = qdisc_priv(sch);
int i;
+ int err;
sch->limit = 10*1024;
q->flows_cnt = 1024;
@@ -478,6 +480,10 @@ static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt)
return err;
}
+ err = tcf_block_get(&q->block, &q->filter_list);
+ if (err)
+ return err;
+
if (!q->flows) {
q->flows = kvzalloc(q->flows_cnt *
sizeof(struct fq_codel_flow), GFP_KERNEL);
@@ -589,14 +595,13 @@ static void fq_codel_put(struct Qdisc *q, unsigned long cl)
{
}
-static struct tcf_proto __rcu **fq_codel_find_tcf(struct Qdisc *sch,
- unsigned long cl)
+static struct tcf_block *fq_codel_tcf_block(struct Qdisc *sch, unsigned long cl)
{
struct fq_codel_sched_data *q = qdisc_priv(sch);
if (cl)
return NULL;
- return &q->filter_list;
+ return q->block;
}
static int fq_codel_dump_class(struct Qdisc *sch, unsigned long cl,
@@ -679,7 +684,7 @@ static const struct Qdisc_class_ops fq_codel_class_ops = {
.leaf = fq_codel_leaf,
.get = fq_codel_get,
.put = fq_codel_put,
- .tcf_chain = fq_codel_find_tcf,
+ .tcf_block = fq_codel_tcf_block,
.bind_tcf = fq_codel_bind,
.unbind_tcf = fq_codel_put,
.dump = fq_codel_dump_class,
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 5cb82f6c1b06..a324f84b1ccd 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -116,6 +116,7 @@ struct hfsc_class {
struct gnet_stats_queue qstats;
struct net_rate_estimator __rcu *rate_est;
struct tcf_proto __rcu *filter_list; /* filter list */
+ struct tcf_block *block;
unsigned int filter_cnt; /* filter count */
unsigned int level; /* class level in hierarchy */
@@ -1040,12 +1041,19 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
if (cl == NULL)
return -ENOBUFS;
+ err = tcf_block_get(&cl->block, &cl->filter_list);
+ if (err) {
+ kfree(cl);
+ return err;
+ }
+
if (tca[TCA_RATE]) {
err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est,
NULL,
qdisc_root_sleeping_running(sch),
tca[TCA_RATE]);
if (err) {
+ tcf_block_put(cl->block);
kfree(cl);
return err;
}
@@ -1091,7 +1099,7 @@ hfsc_destroy_class(struct Qdisc *sch, struct hfsc_class *cl)
{
struct hfsc_sched *q = qdisc_priv(sch);
- tcf_destroy_chain(&cl->filter_list);
+ tcf_block_put(cl->block);
qdisc_destroy(cl->qdisc);
gen_kill_estimator(&cl->rate_est);
if (cl != &q->root)
@@ -1142,7 +1150,7 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
head = &q->root;
tcf = rcu_dereference_bh(q->root.filter_list);
- while (tcf && (result = tc_classify(skb, tcf, &res, false)) >= 0) {
+ while (tcf && (result = tcf_classify(skb, tcf, &res, false)) >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
case TC_ACT_QUEUED:
@@ -1261,8 +1269,7 @@ hfsc_unbind_tcf(struct Qdisc *sch, unsigned long arg)
cl->filter_cnt--;
}
-static struct tcf_proto __rcu **
-hfsc_tcf_chain(struct Qdisc *sch, unsigned long arg)
+static struct tcf_block *hfsc_tcf_block(struct Qdisc *sch, unsigned long arg)
{
struct hfsc_sched *q = qdisc_priv(sch);
struct hfsc_class *cl = (struct hfsc_class *)arg;
@@ -1270,7 +1277,7 @@ hfsc_tcf_chain(struct Qdisc *sch, unsigned long arg)
if (cl == NULL)
cl = &q->root;
- return &cl->filter_list;
+ return cl->block;
}
static int
@@ -1515,7 +1522,7 @@ hfsc_destroy_qdisc(struct Qdisc *sch)
for (i = 0; i < q->clhash.hashsize; i++) {
hlist_for_each_entry(cl, &q->clhash.hash[i], cl_common.hnode)
- tcf_destroy_chain(&cl->filter_list);
+ tcf_block_put(cl->block);
}
for (i = 0; i < q->clhash.hashsize; i++) {
hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
@@ -1662,7 +1669,7 @@ static const struct Qdisc_class_ops hfsc_class_ops = {
.put = hfsc_put_class,
.bind_tcf = hfsc_bind_tcf,
.unbind_tcf = hfsc_unbind_tcf,
- .tcf_chain = hfsc_tcf_chain,
+ .tcf_block = hfsc_tcf_block,
.dump = hfsc_dump_class,
.dump_stats = hfsc_dump_class_stats,
.walk = hfsc_walk
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 570ef3b0c09b..195bbca9eb0b 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -105,6 +105,7 @@ struct htb_class {
int quantum; /* but stored for parent-to-leaf return */
struct tcf_proto __rcu *filter_list; /* class attached filters */
+ struct tcf_block *block;
int filter_cnt;
int refcnt; /* usage count of this class */
@@ -156,6 +157,7 @@ struct htb_sched {
/* filters for qdisc itself */
struct tcf_proto __rcu *filter_list;
+ struct tcf_block *block;
#define HTB_WARN_TOOMANYEVENTS 0x1
unsigned int warned; /* only one warning */
@@ -231,7 +233,7 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
}
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
- while (tcf && (result = tc_classify(skb, tcf, &res, false)) >= 0) {
+ while (tcf && (result = tcf_classify(skb, tcf, &res, false)) >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
case TC_ACT_QUEUED:
@@ -1017,6 +1019,10 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt)
if (!opt)
return -EINVAL;
+ err = tcf_block_get(&q->block, &q->filter_list);
+ if (err)
+ return err;
+
err = nla_parse_nested(tb, TCA_HTB_MAX, opt, htb_policy, NULL);
if (err < 0)
return err;
@@ -1230,7 +1236,7 @@ static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
qdisc_destroy(cl->un.leaf.q);
}
gen_kill_estimator(&cl->rate_est);
- tcf_destroy_chain(&cl->filter_list);
+ tcf_block_put(cl->block);
kfree(cl);
}
@@ -1248,11 +1254,11 @@ static void htb_destroy(struct Qdisc *sch)
* because filter need its target class alive to be able to call
* unbind_filter on it (without Oops).
*/
- tcf_destroy_chain(&q->filter_list);
+ tcf_block_put(q->block);
for (i = 0; i < q->clhash.hashsize; i++) {
hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode)
- tcf_destroy_chain(&cl->filter_list);
+ tcf_block_put(cl->block);
}
for (i = 0; i < q->clhash.hashsize; i++) {
hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
@@ -1396,6 +1402,11 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
if (!cl)
goto failure;
+ err = tcf_block_get(&cl->block, &cl->filter_list);
+ if (err) {
+ kfree(cl);
+ goto failure;
+ }
if (htb_rate_est || tca[TCA_RATE]) {
err = gen_new_estimator(&cl->bstats, NULL,
&cl->rate_est,
@@ -1403,6 +1414,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
qdisc_root_sleeping_running(sch),
tca[TCA_RATE] ? : &est.nla);
if (err) {
+ tcf_block_put(cl->block);
kfree(cl);
goto failure;
}
@@ -1521,14 +1533,12 @@ failure:
return err;
}
-static struct tcf_proto __rcu **htb_find_tcf(struct Qdisc *sch,
- unsigned long arg)
+static struct tcf_block *htb_tcf_block(struct Qdisc *sch, unsigned long arg)
{
struct htb_sched *q = qdisc_priv(sch);
struct htb_class *cl = (struct htb_class *)arg;
- struct tcf_proto __rcu **fl = cl ? &cl->filter_list : &q->filter_list;
- return fl;
+ return cl ? cl->block : q->block;
}
static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
@@ -1591,7 +1601,7 @@ static const struct Qdisc_class_ops htb_class_ops = {
.change = htb_change_class,
.delete = htb_delete,
.walk = htb_walk,
- .tcf_chain = htb_find_tcf,
+ .tcf_block = htb_tcf_block,
.bind_tcf = htb_bind_filter,
.unbind_tcf = htb_unbind_filter,
.dump = htb_dump_class,
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index 3bab5f66c392..d8a9bebcab90 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -18,6 +18,10 @@
#include <net/pkt_sched.h>
#include <net/pkt_cls.h>
+struct ingress_sched_data {
+ struct tcf_block *block;
+};
+
static struct Qdisc *ingress_leaf(struct Qdisc *sch, unsigned long arg)
{
return NULL;
@@ -47,16 +51,23 @@ static void ingress_walk(struct Qdisc *sch, struct qdisc_walker *walker)
{
}
-static struct tcf_proto __rcu **ingress_find_tcf(struct Qdisc *sch,
- unsigned long cl)
+static struct tcf_block *ingress_tcf_block(struct Qdisc *sch, unsigned long cl)
{
- struct net_device *dev = qdisc_dev(sch);
+ struct ingress_sched_data *q = qdisc_priv(sch);
- return &dev->ingress_cl_list;
+ return q->block;
}
static int ingress_init(struct Qdisc *sch, struct nlattr *opt)
{
+ struct ingress_sched_data *q = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
+ int err;
+
+ err = tcf_block_get(&q->block, &dev->ingress_cl_list);
+ if (err)
+ return err;
+
net_inc_ingress_queue();
sch->flags |= TCQ_F_CPUSTATS;
@@ -65,9 +76,9 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt)
static void ingress_destroy(struct Qdisc *sch)
{
- struct net_device *dev = qdisc_dev(sch);
+ struct ingress_sched_data *q = qdisc_priv(sch);
- tcf_destroy_chain(&dev->ingress_cl_list);
+ tcf_block_put(q->block);
net_dec_ingress_queue();
}
@@ -91,7 +102,7 @@ static const struct Qdisc_class_ops ingress_class_ops = {
.get = ingress_get,
.put = ingress_put,
.walk = ingress_walk,
- .tcf_chain = ingress_find_tcf,
+ .tcf_block = ingress_tcf_block,
.tcf_cl_offload = ingress_cl_offload,
.bind_tcf = ingress_bind_filter,
.unbind_tcf = ingress_put,
@@ -100,12 +111,18 @@ static const struct Qdisc_class_ops ingress_class_ops = {
static struct Qdisc_ops ingress_qdisc_ops __read_mostly = {
.cl_ops = &ingress_class_ops,
.id = "ingress",
+ .priv_size = sizeof(struct ingress_sched_data),
.init = ingress_init,
.destroy = ingress_destroy,
.dump = ingress_dump,
.owner = THIS_MODULE,
};
+struct clsact_sched_data {
+ struct tcf_block *ingress_block;
+ struct tcf_block *egress_block;
+};
+
static unsigned long clsact_get(struct Qdisc *sch, u32 classid)
{
switch (TC_H_MIN(classid)) {
@@ -128,16 +145,15 @@ static unsigned long clsact_bind_filter(struct Qdisc *sch,
return clsact_get(sch, classid);
}
-static struct tcf_proto __rcu **clsact_find_tcf(struct Qdisc *sch,
- unsigned long cl)
+static struct tcf_block *clsact_tcf_block(struct Qdisc *sch, unsigned long cl)
{
- struct net_device *dev = qdisc_dev(sch);
+ struct clsact_sched_data *q = qdisc_priv(sch);
switch (cl) {
case TC_H_MIN(TC_H_MIN_INGRESS):
- return &dev->ingress_cl_list;
+ return q->ingress_block;
case TC_H_MIN(TC_H_MIN_EGRESS):
- return &dev->egress_cl_list;
+ return q->egress_block;
default:
return NULL;
}
@@ -145,6 +161,18 @@ static struct tcf_proto __rcu **clsact_find_tcf(struct Qdisc *sch,
static int clsact_init(struct Qdisc *sch, struct nlattr *opt)
{
+ struct clsact_sched_data *q = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
+ int err;
+
+ err = tcf_block_get(&q->ingress_block, &dev->ingress_cl_list);
+ if (err)
+ return err;
+
+ err = tcf_block_get(&q->egress_block, &dev->egress_cl_list);
+ if (err)
+ return err;
+
net_inc_ingress_queue();
net_inc_egress_queue();
@@ -155,10 +183,10 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt)
static void clsact_destroy(struct Qdisc *sch)
{
- struct net_device *dev = qdisc_dev(sch);
+ struct clsact_sched_data *q = qdisc_priv(sch);
- tcf_destroy_chain(&dev->ingress_cl_list);
- tcf_destroy_chain(&dev->egress_cl_list);
+ tcf_block_put(q->egress_block);
+ tcf_block_put(q->ingress_block);
net_dec_ingress_queue();
net_dec_egress_queue();
@@ -169,7 +197,7 @@ static const struct Qdisc_class_ops clsact_class_ops = {
.get = clsact_get,
.put = ingress_put,
.walk = ingress_walk,
- .tcf_chain = clsact_find_tcf,
+ .tcf_block = clsact_tcf_block,
.tcf_cl_offload = clsact_cl_offload,
.bind_tcf = clsact_bind_filter,
.unbind_tcf = ingress_put,
@@ -178,6 +206,7 @@ static const struct Qdisc_class_ops clsact_class_ops = {
static struct Qdisc_ops clsact_qdisc_ops __read_mostly = {
.cl_ops = &clsact_class_ops,
.id = "clsact",
+ .priv_size = sizeof(struct clsact_sched_data),
.init = clsact_init,
.destroy = clsact_destroy,
.dump = ingress_dump,
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index 43a3a10b3c81..604767482ad0 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -32,6 +32,7 @@ struct multiq_sched_data {
u16 max_bands;
u16 curband;
struct tcf_proto __rcu *filter_list;
+ struct tcf_block *block;
struct Qdisc **queues;
};
@@ -46,7 +47,7 @@ multiq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
int err;
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
- err = tc_classify(skb, fl, &res, false);
+ err = tcf_classify(skb, fl, &res, false);
#ifdef CONFIG_NET_CLS_ACT
switch (err) {
case TC_ACT_STOLEN:
@@ -170,7 +171,7 @@ multiq_destroy(struct Qdisc *sch)
int band;
struct multiq_sched_data *q = qdisc_priv(sch);
- tcf_destroy_chain(&q->filter_list);
+ tcf_block_put(q->block);
for (band = 0; band < q->bands; band++)
qdisc_destroy(q->queues[band]);
@@ -243,6 +244,10 @@ static int multiq_init(struct Qdisc *sch, struct nlattr *opt)
if (opt == NULL)
return -EINVAL;
+ err = tcf_block_get(&q->block, &q->filter_list);
+ if (err)
+ return err;
+
q->max_bands = qdisc_dev(sch)->num_tx_queues;
q->queues = kcalloc(q->max_bands, sizeof(struct Qdisc *), GFP_KERNEL);
@@ -367,14 +372,13 @@ static void multiq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
}
}
-static struct tcf_proto __rcu **multiq_find_tcf(struct Qdisc *sch,
- unsigned long cl)
+static struct tcf_block *multiq_tcf_block(struct Qdisc *sch, unsigned long cl)
{
struct multiq_sched_data *q = qdisc_priv(sch);
if (cl)
return NULL;
- return &q->filter_list;
+ return q->block;
}
static const struct Qdisc_class_ops multiq_class_ops = {
@@ -383,7 +387,7 @@ static const struct Qdisc_class_ops multiq_class_ops = {
.get = multiq_get,
.put = multiq_put,
.walk = multiq_walk,
- .tcf_chain = multiq_find_tcf,
+ .tcf_block = multiq_tcf_block,
.bind_tcf = multiq_bind,
.unbind_tcf = multiq_put,
.dump = multiq_dump_class,
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 92c2e6d448d7..a2404688dd01 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -25,6 +25,7 @@
struct prio_sched_data {
int bands;
struct tcf_proto __rcu *filter_list;
+ struct tcf_block *block;
u8 prio2band[TC_PRIO_MAX+1];
struct Qdisc *queues[TCQ_PRIO_BANDS];
};
@@ -42,7 +43,7 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
if (TC_H_MAJ(skb->priority) != sch->handle) {
fl = rcu_dereference_bh(q->filter_list);
- err = tc_classify(skb, fl, &res, false);
+ err = tcf_classify(skb, fl, &res, false);
#ifdef CONFIG_NET_CLS_ACT
switch (err) {
case TC_ACT_STOLEN:
@@ -145,7 +146,7 @@ prio_destroy(struct Qdisc *sch)
int prio;
struct prio_sched_data *q = qdisc_priv(sch);
- tcf_destroy_chain(&q->filter_list);
+ tcf_block_put(q->block);
for (prio = 0; prio < q->bands; prio++)
qdisc_destroy(q->queues[prio]);
}
@@ -204,9 +205,16 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
static int prio_init(struct Qdisc *sch, struct nlattr *opt)
{
+ struct prio_sched_data *q = qdisc_priv(sch);
+ int err;
+
if (!opt)
return -EINVAL;
+ err = tcf_block_get(&q->block, &q->filter_list);
+ if (err)
+ return err;
+
return prio_tune(sch, opt);
}
@@ -317,14 +325,13 @@ static void prio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
}
}
-static struct tcf_proto __rcu **prio_find_tcf(struct Qdisc *sch,
- unsigned long cl)
+static struct tcf_block *prio_tcf_block(struct Qdisc *sch, unsigned long cl)
{
struct prio_sched_data *q = qdisc_priv(sch);
if (cl)
return NULL;
- return &q->filter_list;
+ return q->block;
}
static const struct Qdisc_class_ops prio_class_ops = {
@@ -333,7 +340,7 @@ static const struct Qdisc_class_ops prio_class_ops = {
.get = prio_get,
.put = prio_put,
.walk = prio_walk,
- .tcf_chain = prio_find_tcf,
+ .tcf_block = prio_tcf_block,
.bind_tcf = prio_bind,
.unbind_tcf = prio_put,
.dump = prio_dump_class,
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index 041eba3006cc..076ad032befb 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -182,6 +182,7 @@ struct qfq_group {
struct qfq_sched {
struct tcf_proto __rcu *filter_list;
+ struct tcf_block *block;
struct Qdisc_class_hash clhash;
u64 oldV, V; /* Precise virtual times. */
@@ -582,15 +583,14 @@ static void qfq_put_class(struct Qdisc *sch, unsigned long arg)
qfq_destroy_class(sch, cl);
}
-static struct tcf_proto __rcu **qfq_tcf_chain(struct Qdisc *sch,
- unsigned long cl)
+static struct tcf_block *qfq_tcf_block(struct Qdisc *sch, unsigned long cl)
{
struct qfq_sched *q = qdisc_priv(sch);
if (cl)
return NULL;
- return &q->filter_list;
+ return q->block;
}
static unsigned long qfq_bind_tcf(struct Qdisc *sch, unsigned long parent,
@@ -720,7 +720,7 @@ static struct qfq_class *qfq_classify(struct sk_buff *skb, struct Qdisc *sch,
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
fl = rcu_dereference_bh(q->filter_list);
- result = tc_classify(skb, fl, &res, false);
+ result = tcf_classify(skb, fl, &res, false);
if (result >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
@@ -1438,6 +1438,10 @@ static int qfq_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
int i, j, err;
u32 max_cl_shift, maxbudg_shift, max_classes;
+ err = tcf_block_get(&q->block, &q->filter_list);
+ if (err)
+ return err;
+
err = qdisc_class_hash_init(&q->clhash);
if (err < 0)
return err;
@@ -1492,7 +1496,7 @@ static void qfq_destroy_qdisc(struct Qdisc *sch)
struct hlist_node *next;
unsigned int i;
- tcf_destroy_chain(&q->filter_list);
+ tcf_block_put(q->block);
for (i = 0; i < q->clhash.hashsize; i++) {
hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
@@ -1508,7 +1512,7 @@ static const struct Qdisc_class_ops qfq_class_ops = {
.delete = qfq_delete_class,
.get = qfq_get_class,
.put = qfq_put_class,
- .tcf_chain = qfq_tcf_chain,
+ .tcf_block = qfq_tcf_block,
.bind_tcf = qfq_bind_tcf,
.unbind_tcf = qfq_unbind_tcf,
.graft = qfq_graft_class,
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index 0f777273ba29..9756b1ccd345 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -56,6 +56,7 @@ struct sfb_bins {
struct sfb_sched_data {
struct Qdisc *qdisc;
struct tcf_proto __rcu *filter_list;
+ struct tcf_block *block;
unsigned long rehash_interval;
unsigned long warmup_time; /* double buffering warmup time in jiffies */
u32 max;
@@ -259,7 +260,7 @@ static bool sfb_classify(struct sk_buff *skb, struct tcf_proto *fl,
struct tcf_result res;
int result;
- result = tc_classify(skb, fl, &res, false);
+ result = tcf_classify(skb, fl, &res, false);
if (result >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
@@ -465,7 +466,7 @@ static void sfb_destroy(struct Qdisc *sch)
{
struct sfb_sched_data *q = qdisc_priv(sch);
- tcf_destroy_chain(&q->filter_list);
+ tcf_block_put(q->block);
qdisc_destroy(q->qdisc);
}
@@ -549,6 +550,11 @@ static int sfb_change(struct Qdisc *sch, struct nlattr *opt)
static int sfb_init(struct Qdisc *sch, struct nlattr *opt)
{
struct sfb_sched_data *q = qdisc_priv(sch);
+ int err;
+
+ err = tcf_block_get(&q->block, &q->filter_list);
+ if (err)
+ return err;
q->qdisc = &noop_qdisc;
return sfb_change(sch, opt);
@@ -657,14 +663,13 @@ static void sfb_walk(struct Qdisc *sch, struct qdisc_walker *walker)
}
}
-static struct tcf_proto __rcu **sfb_find_tcf(struct Qdisc *sch,
- unsigned long cl)
+static struct tcf_block *sfb_tcf_block(struct Qdisc *sch, unsigned long cl)
{
struct sfb_sched_data *q = qdisc_priv(sch);
if (cl)
return NULL;
- return &q->filter_list;
+ return q->block;
}
static unsigned long sfb_bind(struct Qdisc *sch, unsigned long parent,
@@ -682,7 +687,7 @@ static const struct Qdisc_class_ops sfb_class_ops = {
.change = sfb_change_class,
.delete = sfb_delete,
.walk = sfb_walk,
- .tcf_chain = sfb_find_tcf,
+ .tcf_block = sfb_tcf_block,
.bind_tcf = sfb_bind,
.unbind_tcf = sfb_put,
.dump = sfb_dump_class,
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 332d94be6e1c..66dfd15b7946 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -126,6 +126,7 @@ struct sfq_sched_data {
u8 flags;
unsigned short scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */
struct tcf_proto __rcu *filter_list;
+ struct tcf_block *block;
sfq_index *ht; /* Hash table ('divisor' slots) */
struct sfq_slot *slots; /* Flows table ('maxflows' entries) */
@@ -180,7 +181,7 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
return sfq_hash(q, skb) + 1;
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
- result = tc_classify(skb, fl, &res, false);
+ result = tcf_classify(skb, fl, &res, false);
if (result >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
@@ -697,7 +698,7 @@ static void sfq_destroy(struct Qdisc *sch)
{
struct sfq_sched_data *q = qdisc_priv(sch);
- tcf_destroy_chain(&q->filter_list);
+ tcf_block_put(q->block);
q->perturb_period = 0;
del_timer_sync(&q->perturb_timer);
sfq_free(q->ht);
@@ -709,6 +710,11 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
{
struct sfq_sched_data *q = qdisc_priv(sch);
int i;
+ int err;
+
+ err = tcf_block_get(&q->block, &q->filter_list);
+ if (err)
+ return err;
setup_deferrable_timer(&q->perturb_timer, sfq_perturbation,
(unsigned long)sch);
@@ -815,14 +821,13 @@ static void sfq_put(struct Qdisc *q, unsigned long cl)
{
}
-static struct tcf_proto __rcu **sfq_find_tcf(struct Qdisc *sch,
- unsigned long cl)
+static struct tcf_block *sfq_tcf_block(struct Qdisc *sch, unsigned long cl)
{
struct sfq_sched_data *q = qdisc_priv(sch);
if (cl)
return NULL;
- return &q->filter_list;
+ return q->block;
}
static int sfq_dump_class(struct Qdisc *sch, unsigned long cl,
@@ -878,7 +883,7 @@ static const struct Qdisc_class_ops sfq_class_ops = {
.leaf = sfq_leaf,
.get = sfq_get,
.put = sfq_put,
- .tcf_chain = sfq_find_tcf,
+ .tcf_block = sfq_tcf_block,
.bind_tcf = sfq_bind,
.unbind_tcf = sfq_put,
.dump = sfq_dump_class,
diff --git a/net/sctp/offload.c b/net/sctp/offload.c
index 4f5a2b580aa5..275925b93b29 100644
--- a/net/sctp/offload.c
+++ b/net/sctp/offload.c
@@ -35,6 +35,7 @@
static __le32 sctp_gso_make_checksum(struct sk_buff *skb)
{
skb->ip_summed = CHECKSUM_NONE;
+ skb->csum_not_inet = 0;
return sctp_compute_cksum(skb, skb_transport_offset(skb));
}
@@ -98,6 +99,11 @@ static const struct net_offload sctp6_offload = {
},
};
+static const struct skb_checksum_ops crc32c_csum_ops = {
+ .update = sctp_csum_update,
+ .combine = sctp_csum_combine,
+};
+
int __init sctp_offload_init(void)
{
int ret;
@@ -110,6 +116,7 @@ int __init sctp_offload_init(void)
if (ret)
goto ipv4;
+ crc32c_csum_stub = &crc32c_csum_ops;
return ret;
ipv4:
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 1409a875ad8e..e2edf2ebbade 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -538,6 +538,7 @@ merge:
} else {
chksum:
head->ip_summed = CHECKSUM_PARTIAL;
+ head->csum_not_inet = 1;
head->csum_start = skb_transport_header(head) - head->head;
head->csum_offset = offsetof(struct sctphdr, checksum);
}
diff --git a/net/socket.c b/net/socket.c
index c2564eb25c6b..8f9dab330d57 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -461,7 +461,7 @@ EXPORT_SYMBOL(sock_from_file);
* @err: pointer to an error code return
*
* The file handle passed in is locked and the socket it is bound
- * too is returned. If an error occurs the err pointer is overwritten
+ * to is returned. If an error occurs the err pointer is overwritten
* with a negative errno code and NULL is returned. The function checks
* for both invalid handles and passing a handle which is not a socket.
*
@@ -662,6 +662,40 @@ static bool skb_is_err_queue(const struct sk_buff *skb)
return skb->pkt_type == PACKET_OUTGOING;
}
+/* On transmit, software and hardware timestamps are returned independently.
+ * As the two skb clones share the hardware timestamp, which may be updated
+ * before the software timestamp is received, a hardware TX timestamp may be
+ * returned only if there is no software TX timestamp. Ignore false software
+ * timestamps, which may be made in the __sock_recv_timestamp() call when the
+ * option SO_TIMESTAMP(NS) is enabled on the socket, even when the skb has a
+ * hardware timestamp.
+ */
+static bool skb_is_swtx_tstamp(const struct sk_buff *skb, int false_tstamp)
+{
+ return skb->tstamp && !false_tstamp && skb_is_err_queue(skb);
+}
+
+static void put_ts_pktinfo(struct msghdr *msg, struct sk_buff *skb)
+{
+ struct scm_ts_pktinfo ts_pktinfo;
+ struct net_device *orig_dev;
+
+ if (!skb_mac_header_was_set(skb))
+ return;
+
+ memset(&ts_pktinfo, 0, sizeof(ts_pktinfo));
+
+ rcu_read_lock();
+ orig_dev = dev_get_by_napi_id(skb_napi_id(skb));
+ if (orig_dev)
+ ts_pktinfo.if_index = orig_dev->ifindex;
+ rcu_read_unlock();
+
+ ts_pktinfo.pkt_length = skb->len - skb_mac_offset(skb);
+ put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_PKTINFO,
+ sizeof(ts_pktinfo), &ts_pktinfo);
+}
+
/*
* called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
*/
@@ -670,14 +704,16 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
{
int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
struct scm_timestamping tss;
- int empty = 1;
+ int empty = 1, false_tstamp = 0;
struct skb_shared_hwtstamps *shhwtstamps =
skb_hwtstamps(skb);
/* Race occurred between timestamp enabling and packet
receiving. Fill in the current time for now. */
- if (need_software_tstamp && skb->tstamp == 0)
+ if (need_software_tstamp && skb->tstamp == 0) {
__net_timestamp(skb);
+ false_tstamp = 1;
+ }
if (need_software_tstamp) {
if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
@@ -699,8 +735,13 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
empty = 0;
if (shhwtstamps &&
(sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
- ktime_to_timespec_cond(shhwtstamps->hwtstamp, tss.ts + 2))
+ !skb_is_swtx_tstamp(skb, false_tstamp) &&
+ ktime_to_timespec_cond(shhwtstamps->hwtstamp, tss.ts + 2)) {
empty = 0;
+ if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
+ !skb_is_err_queue(skb))
+ put_ts_pktinfo(msg, skb);
+ }
if (!empty) {
put_cmsg(msg, SOL_SOCKET,
SCM_TIMESTAMPING, sizeof(tss), &tss);