summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2021-11-26 22:26:29 +0100
committerJakub Kicinski <kuba@kernel.org>2021-11-26 22:45:19 +0100
commit93d5404e8988882bd33f6acc0d343c4db51eb8b4 (patch)
treedd08a576dab4d61fda56dd005c7b2d0001a04297 /net
parentnfc: fdp: Merge the same judgment (diff)
parentMerge tag 'net-5.16-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/net... (diff)
downloadlinux-93d5404e8988882bd33f6acc0d343c4db51eb8b4.tar.xz
linux-93d5404e8988882bd33f6acc0d343c4db51eb8b4.zip
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
drivers/net/ipa/ipa_main.c 8afc7e471ad3 ("net: ipa: separate disabling setup from modem stop") 76b5fbcd6b47 ("net: ipa: kill ipa_modem_init()") Duplicated include, drop one. Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan.c3
-rw-r--r--net/8021q/vlan_dev.c3
-rw-r--r--net/core/neighbour.c1
-rw-r--r--net/ethtool/ioctl.c2
-rw-r--r--net/ipv4/nexthop.c35
-rw-r--r--net/ipv4/tcp_cubic.c5
-rw-r--r--net/ipv6/af_inet6.c1
-rw-r--r--net/ipv6/ip6_output.c2
-rw-r--r--net/ipv6/route.c19
-rw-r--r--net/mptcp/options.c32
-rw-r--r--net/mptcp/protocol.c51
-rw-r--r--net/mptcp/protocol.h17
-rw-r--r--net/ncsi/ncsi-cmd.c24
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c8
-rw-r--r--net/netfilter/nf_conntrack_netlink.c6
-rw-r--r--net/netfilter/nf_flow_table_offload.c4
-rw-r--r--net/netfilter/nft_payload.c1
-rw-r--r--net/netfilter/xt_IDLETIMER.c4
-rw-r--r--net/sched/sch_ets.c8
-rw-r--r--net/smc/af_smc.c14
-rw-r--r--net/smc/smc_close.c10
-rw-r--r--net/smc/smc_core.c35
-rw-r--r--net/tls/tls_main.c47
-rw-r--r--net/tls/tls_sw.c40
-rw-r--r--net/unix/af_unix.c3
-rw-r--r--net/vmw_vsock/virtio_transport.c1
26 files changed, 255 insertions, 121 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 0c00200c9cb2..788076b002b3 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -184,9 +184,6 @@ int register_vlan_dev(struct net_device *dev, struct netlink_ext_ack *extack)
if (err)
goto out_unregister_netdev;
- /* Account for reference in struct vlan_dev_priv */
- dev_hold(real_dev);
-
vlan_stacked_transfer_operstate(real_dev, dev, vlan);
linkwatch_fire_event(dev); /* _MUST_ call rfc2863_policy() */
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index e9499a7c19fe..866556e041b7 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -615,6 +615,9 @@ static int vlan_dev_init(struct net_device *dev)
if (!vlan->vlan_pcpu_stats)
return -ENOMEM;
+ /* Get vlan's reference to real_dev */
+ dev_hold(real_dev);
+
return 0;
}
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 47931c8be04b..72ba027c34cf 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1779,6 +1779,7 @@ int neigh_table_clear(int index, struct neigh_table *tbl)
{
neigh_tables[index] = NULL;
/* It is not clean... Fix it to unload IPv6 module safely */
+ cancel_delayed_work_sync(&tbl->managed_work);
cancel_delayed_work_sync(&tbl->gc_work);
del_timer_sync(&tbl->proxy_timer);
pneigh_queue_purge(&tbl->proxy_queue);
diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c
index af2d4e022076..06d3866c69b4 100644
--- a/net/ethtool/ioctl.c
+++ b/net/ethtool/ioctl.c
@@ -1719,7 +1719,7 @@ static noinline_for_stack int ethtool_set_coalesce(struct net_device *dev,
struct ethtool_coalesce coalesce;
int ret;
- if (!dev->ethtool_ops->set_coalesce && !dev->ethtool_ops->get_coalesce)
+ if (!dev->ethtool_ops->set_coalesce || !dev->ethtool_ops->get_coalesce)
return -EOPNOTSUPP;
ret = dev->ethtool_ops->get_coalesce(dev, &coalesce, &kernel_coalesce,
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index 9e8100728d46..5dbd4b5505eb 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -1899,15 +1899,36 @@ static void remove_nexthop(struct net *net, struct nexthop *nh,
/* if any FIB entries reference this nexthop, any dst entries
* need to be regenerated
*/
-static void nh_rt_cache_flush(struct net *net, struct nexthop *nh)
+static void nh_rt_cache_flush(struct net *net, struct nexthop *nh,
+ struct nexthop *replaced_nh)
{
struct fib6_info *f6i;
+ struct nh_group *nhg;
+ int i;
if (!list_empty(&nh->fi_list))
rt_cache_flush(net);
list_for_each_entry(f6i, &nh->f6i_list, nh_list)
ipv6_stub->fib6_update_sernum(net, f6i);
+
+ /* if an IPv6 group was replaced, we have to release all old
+ * dsts to make sure all refcounts are released
+ */
+ if (!replaced_nh->is_group)
+ return;
+
+ /* new dsts must use only the new nexthop group */
+ synchronize_net();
+
+ nhg = rtnl_dereference(replaced_nh->nh_grp);
+ for (i = 0; i < nhg->num_nh; i++) {
+ struct nh_grp_entry *nhge = &nhg->nh_entries[i];
+ struct nh_info *nhi = rtnl_dereference(nhge->nh->nh_info);
+
+ if (nhi->family == AF_INET6)
+ ipv6_stub->fib6_nh_release_dsts(&nhi->fib6_nh);
+ }
}
static int replace_nexthop_grp(struct net *net, struct nexthop *old,
@@ -2247,7 +2268,7 @@ static int replace_nexthop(struct net *net, struct nexthop *old,
err = replace_nexthop_single(net, old, new, extack);
if (!err) {
- nh_rt_cache_flush(net, old);
+ nh_rt_cache_flush(net, old, new);
__remove_nexthop(net, new, NULL);
nexthop_put(new);
@@ -2544,11 +2565,15 @@ static int nh_create_ipv6(struct net *net, struct nexthop *nh,
/* sets nh_dev if successful */
err = ipv6_stub->fib6_nh_init(net, fib6_nh, &fib6_cfg, GFP_KERNEL,
extack);
- if (err)
+ if (err) {
+ /* IPv6 is not enabled, don't call fib6_nh_release */
+ if (err == -EAFNOSUPPORT)
+ goto out;
ipv6_stub->fib6_nh_release(fib6_nh);
- else
+ } else {
nh->nh_flags = fib6_nh->fib_nh_flags;
-
+ }
+out:
return err;
}
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 5e9d9c51164c..e07837e23b3f 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -330,8 +330,6 @@ static void cubictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
return;
if (tcp_in_slow_start(tp)) {
- if (hystart && after(ack, ca->end_seq))
- bictcp_hystart_reset(sk);
acked = tcp_slow_start(tp, acked);
if (!acked)
return;
@@ -391,6 +389,9 @@ static void hystart_update(struct sock *sk, u32 delay)
struct bictcp *ca = inet_csk_ca(sk);
u32 threshold;
+ if (after(tp->snd_una, ca->end_seq))
+ bictcp_hystart_reset(sk);
+
if (hystart_detect & HYSTART_ACK_TRAIN) {
u32 now = bictcp_clock_us(sk);
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 1f28c9820c2e..d1636425654e 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -1023,6 +1023,7 @@ static const struct ipv6_stub ipv6_stub_impl = {
.ip6_mtu_from_fib6 = ip6_mtu_from_fib6,
.fib6_nh_init = fib6_nh_init,
.fib6_nh_release = fib6_nh_release,
+ .fib6_nh_release_dsts = fib6_nh_release_dsts,
.fib6_update_sernum = fib6_update_sernum_stub,
.fib6_rt_update = fib6_rt_update,
.ip6_del_rt = ip6_del_rt,
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 007e433d4d4d..2995f8d89e7e 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -174,7 +174,7 @@ static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff
#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
/* Policy lookup after SNAT yielded a new policy */
if (skb_dst(skb)->xfrm) {
- IPCB(skb)->flags |= IPSKB_REROUTED;
+ IP6CB(skb)->flags |= IP6SKB_REROUTED;
return dst_output(net, sk, skb);
}
#endif
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index ecc6df6592a8..62f1e16eea2b 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -3678,6 +3678,25 @@ void fib6_nh_release(struct fib6_nh *fib6_nh)
fib_nh_common_release(&fib6_nh->nh_common);
}
+void fib6_nh_release_dsts(struct fib6_nh *fib6_nh)
+{
+ int cpu;
+
+ if (!fib6_nh->rt6i_pcpu)
+ return;
+
+ for_each_possible_cpu(cpu) {
+ struct rt6_info *pcpu_rt, **ppcpu_rt;
+
+ ppcpu_rt = per_cpu_ptr(fib6_nh->rt6i_pcpu, cpu);
+ pcpu_rt = xchg(ppcpu_rt, NULL);
+ if (pcpu_rt) {
+ dst_dev_put(&pcpu_rt->dst);
+ dst_release(&pcpu_rt->dst);
+ }
+ }
+}
+
static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
gfp_t gfp_flags,
struct netlink_ext_ack *extack)
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 7c3420afb1a0..fe98e4f475ba 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -422,28 +422,6 @@ bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb,
return false;
}
-/* MP_JOIN client subflow must wait for 4th ack before sending any data:
- * TCP can't schedule delack timer before the subflow is fully established.
- * MPTCP uses the delack timer to do 3rd ack retransmissions
- */
-static void schedule_3rdack_retransmission(struct sock *sk)
-{
- struct inet_connection_sock *icsk = inet_csk(sk);
- struct tcp_sock *tp = tcp_sk(sk);
- unsigned long timeout;
-
- /* reschedule with a timeout above RTT, as we must look only for drop */
- if (tp->srtt_us)
- timeout = tp->srtt_us << 1;
- else
- timeout = TCP_TIMEOUT_INIT;
-
- WARN_ON_ONCE(icsk->icsk_ack.pending & ICSK_ACK_TIMER);
- icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER;
- icsk->icsk_ack.timeout = timeout;
- sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout);
-}
-
static void clear_3rdack_retransmission(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
@@ -526,7 +504,15 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb,
*size = TCPOLEN_MPTCP_MPJ_ACK;
pr_debug("subflow=%p", subflow);
- schedule_3rdack_retransmission(sk);
+ /* we can use the full delegate action helper only from BH context
+ * If we are in process context - sk is flushing the backlog at
+ * socket lock release time - just set the appropriate flag, will
+ * be handled by the release callback
+ */
+ if (sock_owned_by_user(sk))
+ set_bit(MPTCP_DELEGATE_ACK, &subflow->delegated_status);
+ else
+ mptcp_subflow_delegate(subflow, MPTCP_DELEGATE_ACK);
return true;
}
return false;
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 6db93da59843..b100048e43fe 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -1596,7 +1596,8 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk)
if (!xmit_ssk)
goto out;
if (xmit_ssk != ssk) {
- mptcp_subflow_delegate(mptcp_subflow_ctx(xmit_ssk));
+ mptcp_subflow_delegate(mptcp_subflow_ctx(xmit_ssk),
+ MPTCP_DELEGATE_SEND);
goto out;
}
@@ -2943,7 +2944,7 @@ void __mptcp_check_push(struct sock *sk, struct sock *ssk)
if (xmit_ssk == ssk)
__mptcp_subflow_push_pending(sk, ssk);
else if (xmit_ssk)
- mptcp_subflow_delegate(mptcp_subflow_ctx(xmit_ssk));
+ mptcp_subflow_delegate(mptcp_subflow_ctx(xmit_ssk), MPTCP_DELEGATE_SEND);
} else {
set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags);
}
@@ -2993,18 +2994,50 @@ static void mptcp_release_cb(struct sock *sk)
__mptcp_update_rmem(sk);
}
+/* MP_JOIN client subflow must wait for 4th ack before sending any data:
+ * TCP can't schedule delack timer before the subflow is fully established.
+ * MPTCP uses the delack timer to do 3rd ack retransmissions
+ */
+static void schedule_3rdack_retransmission(struct sock *ssk)
+{
+ struct inet_connection_sock *icsk = inet_csk(ssk);
+ struct tcp_sock *tp = tcp_sk(ssk);
+ unsigned long timeout;
+
+ if (mptcp_subflow_ctx(ssk)->fully_established)
+ return;
+
+ /* reschedule with a timeout above RTT, as we must look only for drop */
+ if (tp->srtt_us)
+ timeout = usecs_to_jiffies(tp->srtt_us >> (3 - 1));
+ else
+ timeout = TCP_TIMEOUT_INIT;
+ timeout += jiffies;
+
+ WARN_ON_ONCE(icsk->icsk_ack.pending & ICSK_ACK_TIMER);
+ icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER;
+ icsk->icsk_ack.timeout = timeout;
+ sk_reset_timer(ssk, &icsk->icsk_delack_timer, timeout);
+}
+
void mptcp_subflow_process_delegated(struct sock *ssk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
struct sock *sk = subflow->conn;
- mptcp_data_lock(sk);
- if (!sock_owned_by_user(sk))
- __mptcp_subflow_push_pending(sk, ssk);
- else
- set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags);
- mptcp_data_unlock(sk);
- mptcp_subflow_delegated_done(subflow);
+ if (test_bit(MPTCP_DELEGATE_SEND, &subflow->delegated_status)) {
+ mptcp_data_lock(sk);
+ if (!sock_owned_by_user(sk))
+ __mptcp_subflow_push_pending(sk, ssk);
+ else
+ set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags);
+ mptcp_data_unlock(sk);
+ mptcp_subflow_delegated_done(subflow, MPTCP_DELEGATE_SEND);
+ }
+ if (test_bit(MPTCP_DELEGATE_ACK, &subflow->delegated_status)) {
+ schedule_3rdack_retransmission(ssk);
+ mptcp_subflow_delegated_done(subflow, MPTCP_DELEGATE_ACK);
+ }
}
static int mptcp_hash(struct sock *sk)
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 67a61ac48b20..d87cc040352e 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -387,6 +387,7 @@ struct mptcp_delegated_action {
DECLARE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions);
#define MPTCP_DELEGATE_SEND 0
+#define MPTCP_DELEGATE_ACK 1
/* MPTCP subflow context */
struct mptcp_subflow_context {
@@ -492,23 +493,23 @@ static inline void mptcp_add_pending_subflow(struct mptcp_sock *msk,
void mptcp_subflow_process_delegated(struct sock *ssk);
-static inline void mptcp_subflow_delegate(struct mptcp_subflow_context *subflow)
+static inline void mptcp_subflow_delegate(struct mptcp_subflow_context *subflow, int action)
{
struct mptcp_delegated_action *delegated;
bool schedule;
+ /* the caller held the subflow bh socket lock */
+ lockdep_assert_in_softirq();
+
/* The implied barrier pairs with mptcp_subflow_delegated_done(), and
* ensures the below list check sees list updates done prior to status
* bit changes
*/
- if (!test_and_set_bit(MPTCP_DELEGATE_SEND, &subflow->delegated_status)) {
+ if (!test_and_set_bit(action, &subflow->delegated_status)) {
/* still on delegated list from previous scheduling */
if (!list_empty(&subflow->delegated_node))
return;
- /* the caller held the subflow bh socket lock */
- lockdep_assert_in_softirq();
-
delegated = this_cpu_ptr(&mptcp_delegated_actions);
schedule = list_empty(&delegated->head);
list_add_tail(&subflow->delegated_node, &delegated->head);
@@ -533,16 +534,16 @@ mptcp_subflow_delegated_next(struct mptcp_delegated_action *delegated)
static inline bool mptcp_subflow_has_delegated_action(const struct mptcp_subflow_context *subflow)
{
- return test_bit(MPTCP_DELEGATE_SEND, &subflow->delegated_status);
+ return !!READ_ONCE(subflow->delegated_status);
}
-static inline void mptcp_subflow_delegated_done(struct mptcp_subflow_context *subflow)
+static inline void mptcp_subflow_delegated_done(struct mptcp_subflow_context *subflow, int action)
{
/* pairs with mptcp_subflow_delegate, ensures delegate_node is updated before
* touching the status bit
*/
smp_wmb();
- clear_bit(MPTCP_DELEGATE_SEND, &subflow->delegated_status);
+ clear_bit(action, &subflow->delegated_status);
}
int mptcp_is_enabled(const struct net *net);
diff --git a/net/ncsi/ncsi-cmd.c b/net/ncsi/ncsi-cmd.c
index ba9ae482141b..dda8b76b7798 100644
--- a/net/ncsi/ncsi-cmd.c
+++ b/net/ncsi/ncsi-cmd.c
@@ -18,6 +18,8 @@
#include "internal.h"
#include "ncsi-pkt.h"
+static const int padding_bytes = 26;
+
u32 ncsi_calculate_checksum(unsigned char *data, int len)
{
u32 checksum = 0;
@@ -213,12 +215,17 @@ static int ncsi_cmd_handler_oem(struct sk_buff *skb,
{
struct ncsi_cmd_oem_pkt *cmd;
unsigned int len;
+ int payload;
+ /* NC-SI spec DSP_0222_1.2.0, section 8.2.2.2
+ * requires payload to be padded with 0 to
+ * 32-bit boundary before the checksum field.
+ * Ensure the padding bytes are accounted for in
+ * skb allocation
+ */
+ payload = ALIGN(nca->payload, 4);
len = sizeof(struct ncsi_cmd_pkt_hdr) + 4;
- if (nca->payload < 26)
- len += 26;
- else
- len += nca->payload;
+ len += max(payload, padding_bytes);
cmd = skb_put_zero(skb, len);
memcpy(&cmd->mfr_id, nca->data, nca->payload);
@@ -272,6 +279,7 @@ static struct ncsi_request *ncsi_alloc_command(struct ncsi_cmd_arg *nca)
struct net_device *dev = nd->dev;
int hlen = LL_RESERVED_SPACE(dev);
int tlen = dev->needed_tailroom;
+ int payload;
int len = hlen + tlen;
struct sk_buff *skb;
struct ncsi_request *nr;
@@ -281,14 +289,14 @@ static struct ncsi_request *ncsi_alloc_command(struct ncsi_cmd_arg *nca)
return NULL;
/* NCSI command packet has 16-bytes header, payload, 4 bytes checksum.
+ * Payload needs padding so that the checksum field following payload is
+ * aligned to 32-bit boundary.
* The packet needs padding if its payload is less than 26 bytes to
* meet 64 bytes minimal ethernet frame length.
*/
len += sizeof(struct ncsi_cmd_pkt_hdr) + 4;
- if (nca->payload < 26)
- len += 26;
- else
- len += nca->payload;
+ payload = ALIGN(nca->payload, 4);
+ len += max(payload, padding_bytes);
/* Allocate skb */
skb = alloc_skb(len, GFP_ATOMIC);
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index e93c937a8bf0..51ad557a525b 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1919,7 +1919,6 @@ ip_vs_in_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state
struct ip_vs_proto_data *pd;
struct ip_vs_conn *cp;
int ret, pkts;
- int conn_reuse_mode;
struct sock *sk;
int af = state->pf;
@@ -1997,15 +1996,16 @@ ip_vs_in_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state
cp = INDIRECT_CALL_1(pp->conn_in_get, ip_vs_conn_in_get_proto,
ipvs, af, skb, &iph);
- conn_reuse_mode = sysctl_conn_reuse_mode(ipvs);
- if (conn_reuse_mode && !iph.fragoffs && is_new_conn(skb, &iph) && cp) {
+ if (!iph.fragoffs && is_new_conn(skb, &iph) && cp) {
+ int conn_reuse_mode = sysctl_conn_reuse_mode(ipvs);
bool old_ct = false, resched = false;
if (unlikely(sysctl_expire_nodest_conn(ipvs)) && cp->dest &&
unlikely(!atomic_read(&cp->dest->weight))) {
resched = true;
old_ct = ip_vs_conn_uses_old_conntrack(cp, skb);
- } else if (is_new_conn_expected(cp, conn_reuse_mode)) {
+ } else if (conn_reuse_mode &&
+ is_new_conn_expected(cp, conn_reuse_mode)) {
old_ct = ip_vs_conn_uses_old_conntrack(cp, skb);
if (!atomic_read(&cp->n_control)) {
resched = true;
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 3d6c8da3de1f..849fa7f4353c 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1011,11 +1011,9 @@ ctnetlink_alloc_filter(const struct nlattr * const cda[], u8 family)
CTA_TUPLE_REPLY,
filter->family,
&filter->zone,
- filter->orig_flags);
- if (err < 0) {
- err = -EINVAL;
+ filter->reply_flags);
+ if (err < 0)
goto err_filter;
- }
}
return filter;
diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
index d6bf1b2cd541..b561e0a44a45 100644
--- a/net/netfilter/nf_flow_table_offload.c
+++ b/net/netfilter/nf_flow_table_offload.c
@@ -65,11 +65,11 @@ static void nf_flow_rule_lwt_match(struct nf_flow_match *match,
sizeof(struct in6_addr));
if (memcmp(&key->enc_ipv6.src, &in6addr_any,
sizeof(struct in6_addr)))
- memset(&key->enc_ipv6.src, 0xff,
+ memset(&mask->enc_ipv6.src, 0xff,
sizeof(struct in6_addr));
if (memcmp(&key->enc_ipv6.dst, &in6addr_any,
sizeof(struct in6_addr)))
- memset(&key->enc_ipv6.dst, 0xff,
+ memset(&mask->enc_ipv6.dst, 0xff,
sizeof(struct in6_addr));
enc_keys |= BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS);
key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index cbfe4e4a4ad7..bd689938a2e0 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -22,7 +22,6 @@
#include <linux/icmpv6.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
-#include <linux/ip.h>
#include <net/sctp/checksum.h>
static bool nft_payload_rebuild_vlan_hdr(const struct sk_buff *skb, int mac_off,
diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c
index 2f7cf5ecebf4..0f8bb0bf558f 100644
--- a/net/netfilter/xt_IDLETIMER.c
+++ b/net/netfilter/xt_IDLETIMER.c
@@ -85,9 +85,9 @@ static ssize_t idletimer_tg_show(struct device *dev,
mutex_unlock(&list_mutex);
if (time_after(expires, jiffies) || ktimespec.tv_sec > 0)
- return snprintf(buf, PAGE_SIZE, "%ld\n", time_diff);
+ return sysfs_emit(buf, "%ld\n", time_diff);
- return snprintf(buf, PAGE_SIZE, "0\n");
+ return sysfs_emit(buf, "0\n");
}
static void idletimer_tg_work(struct work_struct *work)
diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c
index 0eae9ff5edf6..e007fc75ef2f 100644
--- a/net/sched/sch_ets.c
+++ b/net/sched/sch_ets.c
@@ -665,12 +665,14 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt,
q->classes[i].deficit = quanta[i];
}
}
+ for (i = q->nbands; i < oldbands; i++) {
+ qdisc_tree_flush_backlog(q->classes[i].qdisc);
+ if (i >= q->nstrict)
+ list_del(&q->classes[i].alist);
+ }
q->nstrict = nstrict;
memcpy(q->prio2band, priomap, sizeof(priomap));
- for (i = q->nbands; i < oldbands; i++)
- qdisc_tree_flush_backlog(q->classes[i].qdisc);
-
for (i = 0; i < q->nbands; i++)
q->classes[i].quantum = quanta[i];
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 5e93a5aa347e..67a78bbf305f 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -585,7 +585,7 @@ static void smc_switch_to_fallback(struct smc_sock *smc, int reason_code)
* to clcsocket->wq during the fallback.
*/
spin_lock_irqsave(&smc_wait->lock, flags);
- spin_lock(&clc_wait->lock);
+ spin_lock_nested(&clc_wait->lock, SINGLE_DEPTH_NESTING);
list_splice_init(&smc_wait->head, &clc_wait->head);
spin_unlock(&clc_wait->lock);
spin_unlock_irqrestore(&smc_wait->lock, flags);
@@ -2134,8 +2134,10 @@ static int smc_listen(struct socket *sock, int backlog)
smc->clcsock->sk->sk_user_data =
(void *)((uintptr_t)smc | SK_USER_DATA_NOCOPY);
rc = kernel_listen(smc->clcsock, backlog);
- if (rc)
+ if (rc) {
+ smc->clcsock->sk->sk_data_ready = smc->clcsk_data_ready;
goto out;
+ }
sk->sk_max_ack_backlog = backlog;
sk->sk_ack_backlog = 0;
sk->sk_state = SMC_LISTEN;
@@ -2368,8 +2370,10 @@ static __poll_t smc_poll(struct file *file, struct socket *sock,
static int smc_shutdown(struct socket *sock, int how)
{
struct sock *sk = sock->sk;
+ bool do_shutdown = true;
struct smc_sock *smc;
int rc = -EINVAL;
+ int old_state;
int rc1 = 0;
smc = smc_sk(sk);
@@ -2396,7 +2400,11 @@ static int smc_shutdown(struct socket *sock, int how)
}
switch (how) {
case SHUT_RDWR: /* shutdown in both directions */
+ old_state = sk->sk_state;
rc = smc_close_active(smc);
+ if (old_state == SMC_ACTIVE &&
+ sk->sk_state == SMC_PEERCLOSEWAIT1)
+ do_shutdown = false;
break;
case SHUT_WR:
rc = smc_close_shutdown_write(smc);
@@ -2406,7 +2414,7 @@ static int smc_shutdown(struct socket *sock, int how)
/* nothing more to do because peer is not involved */
break;
}
- if (smc->clcsock)
+ if (do_shutdown && smc->clcsock)
rc1 = kernel_sock_shutdown(smc->clcsock, how);
/* map sock_shutdown_cmd constants to sk_shutdown value range */
sk->sk_shutdown |= how + 1;
diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c
index 0f9ffba07d26..3715d2f5ad55 100644
--- a/net/smc/smc_close.c
+++ b/net/smc/smc_close.c
@@ -228,6 +228,12 @@ again:
/* send close request */
rc = smc_close_final(conn);
sk->sk_state = SMC_PEERCLOSEWAIT1;
+
+ /* actively shutdown clcsock before peer close it,
+ * prevent peer from entering TIME_WAIT state.
+ */
+ if (smc->clcsock && smc->clcsock->sk)
+ rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR);
} else {
/* peer event has changed the state */
goto again;
@@ -354,9 +360,9 @@ static void smc_close_passive_work(struct work_struct *work)
if (rxflags->peer_conn_abort) {
/* peer has not received all data */
smc_close_passive_abort_received(smc);
- release_sock(&smc->sk);
+ release_sock(sk);
cancel_delayed_work_sync(&conn->tx_work);
- lock_sock(&smc->sk);
+ lock_sock(sk);
goto wakeup;
}
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 25ebd30feecd..bb52c8b5f148 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -1672,14 +1672,26 @@ static void smc_link_down_work(struct work_struct *work)
mutex_unlock(&lgr->llc_conf_mutex);
}
-/* Determine vlan of internal TCP socket.
- * @vlan_id: address to store the determined vlan id into
- */
+static int smc_vlan_by_tcpsk_walk(struct net_device *lower_dev,
+ struct netdev_nested_priv *priv)
+{
+ unsigned short *vlan_id = (unsigned short *)priv->data;
+
+ if (is_vlan_dev(lower_dev)) {
+ *vlan_id = vlan_dev_vlan_id(lower_dev);
+ return 1;
+ }
+
+ return 0;
+}
+
+/* Determine vlan of internal TCP socket. */
int smc_vlan_by_tcpsk(struct socket *clcsock, struct smc_init_info *ini)
{
struct dst_entry *dst = sk_dst_get(clcsock->sk);
+ struct netdev_nested_priv priv;
struct net_device *ndev;
- int i, nest_lvl, rc = 0;
+ int rc = 0;
ini->vlan_id = 0;
if (!dst) {
@@ -1697,20 +1709,9 @@ int smc_vlan_by_tcpsk(struct socket *clcsock, struct smc_init_info *ini)
goto out_rel;
}
+ priv.data = (void *)&ini->vlan_id;
rtnl_lock();
- nest_lvl = ndev->lower_level;
- for (i = 0; i < nest_lvl; i++) {
- struct list_head *lower = &ndev->adj_list.lower;
-
- if (list_empty(lower))
- break;
- lower = lower->next;
- ndev = (struct net_device *)netdev_lower_get_next(ndev, &lower);
- if (is_vlan_dev(ndev)) {
- ini->vlan_id = vlan_dev_vlan_id(ndev);
- break;
- }
- }
+ netdev_walk_all_lower_dev(ndev, smc_vlan_by_tcpsk_walk, &priv);
rtnl_unlock();
out_rel:
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index acfba9f1ba72..6bc2879ba637 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -61,7 +61,7 @@ static DEFINE_MUTEX(tcpv6_prot_mutex);
static const struct proto *saved_tcpv4_prot;
static DEFINE_MUTEX(tcpv4_prot_mutex);
static struct proto tls_prots[TLS_NUM_PROTS][TLS_NUM_CONFIG][TLS_NUM_CONFIG];
-static struct proto_ops tls_sw_proto_ops;
+static struct proto_ops tls_proto_ops[TLS_NUM_PROTS][TLS_NUM_CONFIG][TLS_NUM_CONFIG];
static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG],
const struct proto *base);
@@ -71,6 +71,8 @@ void update_sk_prot(struct sock *sk, struct tls_context *ctx)
WRITE_ONCE(sk->sk_prot,
&tls_prots[ip_ver][ctx->tx_conf][ctx->rx_conf]);
+ WRITE_ONCE(sk->sk_socket->ops,
+ &tls_proto_ops[ip_ver][ctx->tx_conf][ctx->rx_conf]);
}
int wait_on_pending_writer(struct sock *sk, long *timeo)
@@ -669,8 +671,6 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval,
if (tx) {
ctx->sk_write_space = sk->sk_write_space;
sk->sk_write_space = tls_write_space;
- } else {
- sk->sk_socket->ops = &tls_sw_proto_ops;
}
goto out;
@@ -728,6 +728,39 @@ struct tls_context *tls_ctx_create(struct sock *sk)
return ctx;
}
+static void build_proto_ops(struct proto_ops ops[TLS_NUM_CONFIG][TLS_NUM_CONFIG],
+ const struct proto_ops *base)
+{
+ ops[TLS_BASE][TLS_BASE] = *base;
+
+ ops[TLS_SW ][TLS_BASE] = ops[TLS_BASE][TLS_BASE];
+ ops[TLS_SW ][TLS_BASE].sendpage_locked = tls_sw_sendpage_locked;
+
+ ops[TLS_BASE][TLS_SW ] = ops[TLS_BASE][TLS_BASE];
+ ops[TLS_BASE][TLS_SW ].splice_read = tls_sw_splice_read;
+
+ ops[TLS_SW ][TLS_SW ] = ops[TLS_SW ][TLS_BASE];
+ ops[TLS_SW ][TLS_SW ].splice_read = tls_sw_splice_read;
+
+#ifdef CONFIG_TLS_DEVICE
+ ops[TLS_HW ][TLS_BASE] = ops[TLS_BASE][TLS_BASE];
+ ops[TLS_HW ][TLS_BASE].sendpage_locked = NULL;
+
+ ops[TLS_HW ][TLS_SW ] = ops[TLS_BASE][TLS_SW ];
+ ops[TLS_HW ][TLS_SW ].sendpage_locked = NULL;
+
+ ops[TLS_BASE][TLS_HW ] = ops[TLS_BASE][TLS_SW ];
+
+ ops[TLS_SW ][TLS_HW ] = ops[TLS_SW ][TLS_SW ];
+
+ ops[TLS_HW ][TLS_HW ] = ops[TLS_HW ][TLS_SW ];
+ ops[TLS_HW ][TLS_HW ].sendpage_locked = NULL;
+#endif
+#ifdef CONFIG_TLS_TOE
+ ops[TLS_HW_RECORD][TLS_HW_RECORD] = *base;
+#endif
+}
+
static void tls_build_proto(struct sock *sk)
{
int ip_ver = sk->sk_family == AF_INET6 ? TLSV6 : TLSV4;
@@ -739,6 +772,8 @@ static void tls_build_proto(struct sock *sk)
mutex_lock(&tcpv6_prot_mutex);
if (likely(prot != saved_tcpv6_prot)) {
build_protos(tls_prots[TLSV6], prot);
+ build_proto_ops(tls_proto_ops[TLSV6],
+ sk->sk_socket->ops);
smp_store_release(&saved_tcpv6_prot, prot);
}
mutex_unlock(&tcpv6_prot_mutex);
@@ -749,6 +784,8 @@ static void tls_build_proto(struct sock *sk)
mutex_lock(&tcpv4_prot_mutex);
if (likely(prot != saved_tcpv4_prot)) {
build_protos(tls_prots[TLSV4], prot);
+ build_proto_ops(tls_proto_ops[TLSV4],
+ sk->sk_socket->ops);
smp_store_release(&saved_tcpv4_prot, prot);
}
mutex_unlock(&tcpv4_prot_mutex);
@@ -959,10 +996,6 @@ static int __init tls_register(void)
if (err)
return err;
- tls_sw_proto_ops = inet_stream_ops;
- tls_sw_proto_ops.splice_read = tls_sw_splice_read;
- tls_sw_proto_ops.sendpage_locked = tls_sw_sendpage_locked;
-
tls_device_init();
tcp_register_ulp(&tcp_tls_ulp_ops);
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index d81564078557..d3e7ff90889e 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -2005,6 +2005,7 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos,
struct sock *sk = sock->sk;
struct sk_buff *skb;
ssize_t copied = 0;
+ bool from_queue;
int err = 0;
long timeo;
int chunk;
@@ -2014,25 +2015,28 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos,
timeo = sock_rcvtimeo(sk, flags & SPLICE_F_NONBLOCK);
- skb = tls_wait_data(sk, NULL, flags & SPLICE_F_NONBLOCK, timeo, &err);
- if (!skb)
- goto splice_read_end;
-
- if (!ctx->decrypted) {
- err = decrypt_skb_update(sk, skb, NULL, &chunk, &zc, false);
-
- /* splice does not support reading control messages */
- if (ctx->control != TLS_RECORD_TYPE_DATA) {
- err = -EINVAL;
+ from_queue = !skb_queue_empty(&ctx->rx_list);
+ if (from_queue) {
+ skb = __skb_dequeue(&ctx->rx_list);
+ } else {
+ skb = tls_wait_data(sk, NULL, flags & SPLICE_F_NONBLOCK, timeo,
+ &err);
+ if (!skb)
goto splice_read_end;
- }
+ err = decrypt_skb_update(sk, skb, NULL, &chunk, &zc, false);
if (err < 0) {
tls_err_abort(sk, -EBADMSG);
goto splice_read_end;
}
- ctx->decrypted = 1;
}
+
+ /* splice does not support reading control messages */
+ if (ctx->control != TLS_RECORD_TYPE_DATA) {
+ err = -EINVAL;
+ goto splice_read_end;
+ }
+
rxm = strp_msg(skb);
chunk = min_t(unsigned int, rxm->full_len, len);
@@ -2040,7 +2044,17 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos,
if (copied < 0)
goto splice_read_end;
- tls_sw_advance_skb(sk, skb, copied);
+ if (!from_queue) {
+ ctx->recv_pkt = NULL;
+ __strp_unpause(&ctx->strp);
+ }
+ if (chunk < rxm->full_len) {
+ __skb_queue_head(&ctx->rx_list, skb);
+ rxm->offset += len;
+ rxm->full_len -= len;
+ } else {
+ consume_skb(skb);
+ }
splice_read_end:
release_sock(sk);
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 54e5553a150e..b9ff1f31f3af 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -2878,9 +2878,6 @@ static int unix_shutdown(struct socket *sock, int mode)
unix_state_lock(sk);
sk->sk_shutdown |= mode;
- if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
- mode == SHUTDOWN_MASK)
- sk->sk_state = TCP_CLOSE;
other = unix_peer(sk);
if (other)
sock_hold(other);
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index 4f7c99dfd16c..3f82b2f1e6dd 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -731,6 +731,7 @@ static unsigned int features[] = {
static struct virtio_driver virtio_vsock_driver = {
.feature_table = features,
.feature_table_size = ARRAY_SIZE(features),
+ .suppress_used_validation = true,
.driver.name = KBUILD_MODNAME,
.driver.owner = THIS_MODULE,
.id_table = id_table,