summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2018-07-20 23:45:10 +0200
committerDavid S. Miller <davem@davemloft.net>2018-07-21 06:17:12 +0200
commitc4c5551df136a7c4edd7c2f433d9a296b39826a2 (patch)
treea33d9f228efd34b0ad18e380385093405ef72d98 /net
parenttipc: make link capability update thread safe (diff)
parentMerge tag 'vfio-v4.18-rc6' of git://github.com/awilliam/linux-vfio (diff)
downloadlinux-c4c5551df136a7c4edd7c2f433d9a296b39826a2.tar.xz
linux-c4c5551df136a7c4edd7c2f433d9a296b39826a2.zip
Merge ra.kernel.org:/pub/scm/linux/kernel/git/torvalds/linux
All conflicts were trivial overlapping changes, so reasonably easy to resolve. Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/9p/client.c3
-rw-r--r--net/batman-adv/bat_iv_ogm.c4
-rw-r--r--net/batman-adv/bat_v.c4
-rw-r--r--net/batman-adv/debugfs.c40
-rw-r--r--net/batman-adv/debugfs.h11
-rw-r--r--net/batman-adv/hard-interface.c37
-rw-r--r--net/batman-adv/translation-table.c7
-rw-r--r--net/bpf/test_run.c17
-rw-r--r--net/core/filter.c149
-rw-r--r--net/core/gen_stats.c16
-rw-r--r--net/core/skbuff.c1
-rw-r--r--net/dns_resolver/dns_key.c28
-rw-r--r--net/ieee802154/6lowpan/core.c6
-rw-r--r--net/ipv4/fib_frontend.c1
-rw-r--r--net/ipv4/igmp.c58
-rw-r--r--net/ipv4/inet_fragment.c2
-rw-r--r--net/ipv4/ip_sockglue.c4
-rw-r--r--net/ipv4/netfilter/ip_tables.c1
-rw-r--r--net/ipv4/netfilter/nf_tproxy_ipv4.c18
-rw-r--r--net/ipv4/sysctl_net_ipv4.c5
-rw-r--r--net/ipv4/tcp.c16
-rw-r--r--net/ipv4/tcp_dctcp.c31
-rw-r--r--net/ipv4/tcp_ipv4.c23
-rw-r--r--net/ipv4/tcp_output.c4
-rw-r--r--net/ipv6/Kconfig1
-rw-r--r--net/ipv6/calipso.c9
-rw-r--r--net/ipv6/exthdrs.c111
-rw-r--r--net/ipv6/ip6_fib.c156
-rw-r--r--net/ipv6/ip6_gre.c3
-rw-r--r--net/ipv6/ipv6_sockglue.c32
-rw-r--r--net/ipv6/mcast.c64
-rw-r--r--net/ipv6/ndisc.c2
-rw-r--r--net/ipv6/netfilter/ip6_tables.c1
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c2
-rw-r--r--net/ipv6/netfilter/nf_tproxy_ipv6.c18
-rw-r--r--net/ipv6/route.c10
-rw-r--r--net/ipv6/seg6_iptunnel.c2
-rw-r--r--net/netfilter/Kconfig25
-rw-r--r--net/netfilter/Makefile7
-rw-r--r--net/netfilter/nf_conntrack_core.c2
-rw-r--r--net/netfilter/nf_tables_set_core.c28
-rw-r--r--net/netfilter/nft_compat.c13
-rw-r--r--net/netfilter/nft_set_bitmap.c19
-rw-r--r--net/netfilter/nft_set_hash.c29
-rw-r--r--net/netfilter/nft_set_rbtree.c19
-rw-r--r--net/netfilter/xt_TPROXY.c8
-rw-r--r--net/nfc/llcp_commands.c9
-rw-r--r--net/nsh/nsh.c2
-rw-r--r--net/packet/af_packet.c2
-rw-r--r--net/qrtr/qrtr.c13
-rw-r--r--net/sched/act_csum.c6
-rw-r--r--net/sched/act_tunnel_key.c6
-rw-r--r--net/sched/cls_api.c4
-rw-r--r--net/sched/sch_fq_codel.c25
-rw-r--r--net/sctp/transport.c2
-rw-r--r--net/smc/af_smc.c40
-rw-r--r--net/smc/smc_clc.c3
-rw-r--r--net/smc/smc_close.c2
-rw-r--r--net/tipc/discover.c18
-rw-r--r--net/tipc/net.c17
-rw-r--r--net/tipc/node.c7
-rw-r--r--net/tls/tls_sw.c7
-rw-r--r--net/xdp/xsk.c30
-rw-r--r--net/xdp/xsk_queue.h9
64 files changed, 764 insertions, 485 deletions
diff --git a/net/9p/client.c b/net/9p/client.c
index 18c5271910dc..5c1343195292 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -225,7 +225,8 @@ static int parse_opts(char *opts, struct p9_client *clnt)
}
free_and_return:
- v9fs_put_trans(clnt->trans_mod);
+ if (ret)
+ v9fs_put_trans(clnt->trans_mod);
kfree(tmp_options);
return ret;
}
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index be09a9883825..73bf6a93a3cf 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -2732,7 +2732,7 @@ static int batadv_iv_gw_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
{
struct batadv_neigh_ifinfo *router_ifinfo = NULL;
struct batadv_neigh_node *router;
- struct batadv_gw_node *curr_gw;
+ struct batadv_gw_node *curr_gw = NULL;
int ret = 0;
void *hdr;
@@ -2780,6 +2780,8 @@ static int batadv_iv_gw_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
ret = 0;
out:
+ if (curr_gw)
+ batadv_gw_node_put(curr_gw);
if (router_ifinfo)
batadv_neigh_ifinfo_put(router_ifinfo);
if (router)
diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c
index ec93337ee259..6baec4e68898 100644
--- a/net/batman-adv/bat_v.c
+++ b/net/batman-adv/bat_v.c
@@ -927,7 +927,7 @@ static int batadv_v_gw_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
{
struct batadv_neigh_ifinfo *router_ifinfo = NULL;
struct batadv_neigh_node *router;
- struct batadv_gw_node *curr_gw;
+ struct batadv_gw_node *curr_gw = NULL;
int ret = 0;
void *hdr;
@@ -995,6 +995,8 @@ static int batadv_v_gw_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
ret = 0;
out:
+ if (curr_gw)
+ batadv_gw_node_put(curr_gw);
if (router_ifinfo)
batadv_neigh_ifinfo_put(router_ifinfo);
if (router)
diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c
index 95a94160baf8..3cb82378300b 100644
--- a/net/batman-adv/debugfs.c
+++ b/net/batman-adv/debugfs.c
@@ -19,6 +19,7 @@
#include "debugfs.h"
#include "main.h"
+#include <linux/dcache.h>
#include <linux/debugfs.h>
#include <linux/err.h>
#include <linux/errno.h>
@@ -344,6 +345,25 @@ out:
}
/**
+ * batadv_debugfs_rename_hardif() - Fix debugfs path for renamed hardif
+ * @hard_iface: hard interface which was renamed
+ */
+void batadv_debugfs_rename_hardif(struct batadv_hard_iface *hard_iface)
+{
+ const char *name = hard_iface->net_dev->name;
+ struct dentry *dir;
+ struct dentry *d;
+
+ dir = hard_iface->debug_dir;
+ if (!dir)
+ return;
+
+ d = debugfs_rename(dir->d_parent, dir, dir->d_parent, name);
+ if (!d)
+ pr_err("Can't rename debugfs dir to %s\n", name);
+}
+
+/**
* batadv_debugfs_del_hardif() - delete the base directory for a hard interface
* in debugfs.
* @hard_iface: hard interface which is deleted.
@@ -414,6 +434,26 @@ out:
}
/**
+ * batadv_debugfs_rename_meshif() - Fix debugfs path for renamed softif
+ * @dev: net_device which was renamed
+ */
+void batadv_debugfs_rename_meshif(struct net_device *dev)
+{
+ struct batadv_priv *bat_priv = netdev_priv(dev);
+ const char *name = dev->name;
+ struct dentry *dir;
+ struct dentry *d;
+
+ dir = bat_priv->debug_dir;
+ if (!dir)
+ return;
+
+ d = debugfs_rename(dir->d_parent, dir, dir->d_parent, name);
+ if (!d)
+ pr_err("Can't rename debugfs dir to %s\n", name);
+}
+
+/**
* batadv_debugfs_del_meshif() - Remove interface dependent debugfs entries
* @dev: netdev struct of the soft interface
*/
diff --git a/net/batman-adv/debugfs.h b/net/batman-adv/debugfs.h
index 37b069698b04..08a592ffbee5 100644
--- a/net/batman-adv/debugfs.h
+++ b/net/batman-adv/debugfs.h
@@ -30,8 +30,10 @@ struct net_device;
void batadv_debugfs_init(void);
void batadv_debugfs_destroy(void);
int batadv_debugfs_add_meshif(struct net_device *dev);
+void batadv_debugfs_rename_meshif(struct net_device *dev);
void batadv_debugfs_del_meshif(struct net_device *dev);
int batadv_debugfs_add_hardif(struct batadv_hard_iface *hard_iface);
+void batadv_debugfs_rename_hardif(struct batadv_hard_iface *hard_iface);
void batadv_debugfs_del_hardif(struct batadv_hard_iface *hard_iface);
#else
@@ -49,6 +51,10 @@ static inline int batadv_debugfs_add_meshif(struct net_device *dev)
return 0;
}
+static inline void batadv_debugfs_rename_meshif(struct net_device *dev)
+{
+}
+
static inline void batadv_debugfs_del_meshif(struct net_device *dev)
{
}
@@ -60,6 +66,11 @@ int batadv_debugfs_add_hardif(struct batadv_hard_iface *hard_iface)
}
static inline
+void batadv_debugfs_rename_hardif(struct batadv_hard_iface *hard_iface)
+{
+}
+
+static inline
void batadv_debugfs_del_hardif(struct batadv_hard_iface *hard_iface)
{
}
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index c405d15befd6..2f0d42f2f913 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -989,6 +989,32 @@ void batadv_hardif_remove_interfaces(void)
rtnl_unlock();
}
+/**
+ * batadv_hard_if_event_softif() - Handle events for soft interfaces
+ * @event: NETDEV_* event to handle
+ * @net_dev: net_device which generated an event
+ *
+ * Return: NOTIFY_* result
+ */
+static int batadv_hard_if_event_softif(unsigned long event,
+ struct net_device *net_dev)
+{
+ struct batadv_priv *bat_priv;
+
+ switch (event) {
+ case NETDEV_REGISTER:
+ batadv_sysfs_add_meshif(net_dev);
+ bat_priv = netdev_priv(net_dev);
+ batadv_softif_create_vlan(bat_priv, BATADV_NO_FLAGS);
+ break;
+ case NETDEV_CHANGENAME:
+ batadv_debugfs_rename_meshif(net_dev);
+ break;
+ }
+
+ return NOTIFY_DONE;
+}
+
static int batadv_hard_if_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
@@ -997,12 +1023,8 @@ static int batadv_hard_if_event(struct notifier_block *this,
struct batadv_hard_iface *primary_if = NULL;
struct batadv_priv *bat_priv;
- if (batadv_softif_is_valid(net_dev) && event == NETDEV_REGISTER) {
- batadv_sysfs_add_meshif(net_dev);
- bat_priv = netdev_priv(net_dev);
- batadv_softif_create_vlan(bat_priv, BATADV_NO_FLAGS);
- return NOTIFY_DONE;
- }
+ if (batadv_softif_is_valid(net_dev))
+ return batadv_hard_if_event_softif(event, net_dev);
hard_iface = batadv_hardif_get_by_netdev(net_dev);
if (!hard_iface && (event == NETDEV_REGISTER ||
@@ -1051,6 +1073,9 @@ static int batadv_hard_if_event(struct notifier_block *this,
if (batadv_is_wifi_hardif(hard_iface))
hard_iface->num_bcasts = BATADV_NUM_BCASTS_WIRELESS;
break;
+ case NETDEV_CHANGENAME:
+ batadv_debugfs_rename_hardif(hard_iface);
+ break;
default:
break;
}
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 3986551397ca..12a2b7d21376 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -1705,7 +1705,9 @@ static bool batadv_tt_global_add(struct batadv_priv *bat_priv,
ether_addr_copy(common->addr, tt_addr);
common->vid = vid;
- common->flags = flags;
+ if (!is_multicast_ether_addr(common->addr))
+ common->flags = flags & (~BATADV_TT_SYNC_MASK);
+
tt_global_entry->roam_at = 0;
/* node must store current time in case of roaming. This is
* needed to purge this entry out on timeout (if nobody claims
@@ -1768,7 +1770,8 @@ static bool batadv_tt_global_add(struct batadv_priv *bat_priv,
* TT_CLIENT_TEMP, therefore they have to be copied in the
* client entry
*/
- common->flags |= flags & (~BATADV_TT_SYNC_MASK);
+ if (!is_multicast_ether_addr(common->addr))
+ common->flags |= flags & (~BATADV_TT_SYNC_MASK);
/* If there is the BATADV_TT_CLIENT_ROAM flag set, there is only
* one originator left in the list and we previously received a
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 68c3578343b4..22a78eedf4b1 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -96,6 +96,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
u32 size = kattr->test.data_size_in;
u32 repeat = kattr->test.repeat;
u32 retval, duration;
+ int hh_len = ETH_HLEN;
struct sk_buff *skb;
void *data;
int ret;
@@ -131,12 +132,22 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
skb_reset_network_header(skb);
if (is_l2)
- __skb_push(skb, ETH_HLEN);
+ __skb_push(skb, hh_len);
if (is_direct_pkt_access)
bpf_compute_data_pointers(skb);
retval = bpf_test_run(prog, skb, repeat, &duration);
- if (!is_l2)
- __skb_push(skb, ETH_HLEN);
+ if (!is_l2) {
+ if (skb_headroom(skb) < hh_len) {
+ int nhead = HH_DATA_ALIGN(hh_len - skb_headroom(skb));
+
+ if (pskb_expand_head(skb, nhead, 0, GFP_USER)) {
+ kfree_skb(skb);
+ return -ENOMEM;
+ }
+ }
+ memset(__skb_push(skb, hh_len), 0, hh_len);
+ }
+
size = skb->len;
/* bpf program can never convert linear skb to non-linear */
if (WARN_ON_ONCE(skb_is_nonlinear(skb)))
diff --git a/net/core/filter.c b/net/core/filter.c
index b9ec916f4e3a..104d560946da 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -459,11 +459,21 @@ static bool convert_bpf_ld_abs(struct sock_filter *fp, struct bpf_insn **insnp)
(!unaligned_ok && offset >= 0 &&
offset + ip_align >= 0 &&
offset + ip_align % size == 0))) {
+ bool ldx_off_ok = offset <= S16_MAX;
+
*insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_H);
*insn++ = BPF_ALU64_IMM(BPF_SUB, BPF_REG_TMP, offset);
- *insn++ = BPF_JMP_IMM(BPF_JSLT, BPF_REG_TMP, size, 2 + endian);
- *insn++ = BPF_LDX_MEM(BPF_SIZE(fp->code), BPF_REG_A, BPF_REG_D,
- offset);
+ *insn++ = BPF_JMP_IMM(BPF_JSLT, BPF_REG_TMP,
+ size, 2 + endian + (!ldx_off_ok * 2));
+ if (ldx_off_ok) {
+ *insn++ = BPF_LDX_MEM(BPF_SIZE(fp->code), BPF_REG_A,
+ BPF_REG_D, offset);
+ } else {
+ *insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_D);
+ *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_TMP, offset);
+ *insn++ = BPF_LDX_MEM(BPF_SIZE(fp->code), BPF_REG_A,
+ BPF_REG_TMP, 0);
+ }
if (endian)
*insn++ = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, size * 8);
*insn++ = BPF_JMP_A(8);
@@ -1762,6 +1772,37 @@ static const struct bpf_func_proto bpf_skb_pull_data_proto = {
.arg2_type = ARG_ANYTHING,
};
+static inline int sk_skb_try_make_writable(struct sk_buff *skb,
+ unsigned int write_len)
+{
+ int err = __bpf_try_make_writable(skb, write_len);
+
+ bpf_compute_data_end_sk_skb(skb);
+ return err;
+}
+
+BPF_CALL_2(sk_skb_pull_data, struct sk_buff *, skb, u32, len)
+{
+ /* Idea is the following: should the needed direct read/write
+ * test fail during runtime, we can pull in more data and redo
+ * again, since implicitly, we invalidate previous checks here.
+ *
+ * Or, since we know how much we need to make read/writeable,
+ * this can be done once at the program beginning for direct
+ * access case. By this we overcome limitations of only current
+ * headroom being accessible.
+ */
+ return sk_skb_try_make_writable(skb, len ? : skb_headlen(skb));
+}
+
+static const struct bpf_func_proto sk_skb_pull_data_proto = {
+ .func = sk_skb_pull_data,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+};
+
BPF_CALL_5(bpf_l3_csum_replace, struct sk_buff *, skb, u32, offset,
u64, from, u64, to, u64, flags)
{
@@ -2779,7 +2820,8 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 len_diff)
static u32 __bpf_skb_max_len(const struct sk_buff *skb)
{
- return skb->dev->mtu + skb->dev->hard_header_len;
+ return skb->dev ? skb->dev->mtu + skb->dev->hard_header_len :
+ SKB_MAX_ALLOC;
}
static int bpf_skb_adjust_net(struct sk_buff *skb, s32 len_diff)
@@ -2863,8 +2905,8 @@ static int bpf_skb_trim_rcsum(struct sk_buff *skb, unsigned int new_len)
return __skb_trim_rcsum(skb, new_len);
}
-BPF_CALL_3(bpf_skb_change_tail, struct sk_buff *, skb, u32, new_len,
- u64, flags)
+static inline int __bpf_skb_change_tail(struct sk_buff *skb, u32 new_len,
+ u64 flags)
{
u32 max_len = __bpf_skb_max_len(skb);
u32 min_len = __bpf_skb_min_len(skb);
@@ -2900,6 +2942,13 @@ BPF_CALL_3(bpf_skb_change_tail, struct sk_buff *, skb, u32, new_len,
if (!ret && skb_is_gso(skb))
skb_gso_reset(skb);
}
+ return ret;
+}
+
+BPF_CALL_3(bpf_skb_change_tail, struct sk_buff *, skb, u32, new_len,
+ u64, flags)
+{
+ int ret = __bpf_skb_change_tail(skb, new_len, flags);
bpf_compute_data_pointers(skb);
return ret;
@@ -2914,9 +2963,27 @@ static const struct bpf_func_proto bpf_skb_change_tail_proto = {
.arg3_type = ARG_ANYTHING,
};
-BPF_CALL_3(bpf_skb_change_head, struct sk_buff *, skb, u32, head_room,
+BPF_CALL_3(sk_skb_change_tail, struct sk_buff *, skb, u32, new_len,
u64, flags)
{
+ int ret = __bpf_skb_change_tail(skb, new_len, flags);
+
+ bpf_compute_data_end_sk_skb(skb);
+ return ret;
+}
+
+static const struct bpf_func_proto sk_skb_change_tail_proto = {
+ .func = sk_skb_change_tail,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_ANYTHING,
+};
+
+static inline int __bpf_skb_change_head(struct sk_buff *skb, u32 head_room,
+ u64 flags)
+{
u32 max_len = __bpf_skb_max_len(skb);
u32 new_len = skb->len + head_room;
int ret;
@@ -2941,8 +3008,16 @@ BPF_CALL_3(bpf_skb_change_head, struct sk_buff *, skb, u32, head_room,
skb_reset_mac_header(skb);
}
+ return ret;
+}
+
+BPF_CALL_3(bpf_skb_change_head, struct sk_buff *, skb, u32, head_room,
+ u64, flags)
+{
+ int ret = __bpf_skb_change_head(skb, head_room, flags);
+
bpf_compute_data_pointers(skb);
- return 0;
+ return ret;
}
static const struct bpf_func_proto bpf_skb_change_head_proto = {
@@ -2954,6 +3029,23 @@ static const struct bpf_func_proto bpf_skb_change_head_proto = {
.arg3_type = ARG_ANYTHING,
};
+BPF_CALL_3(sk_skb_change_head, struct sk_buff *, skb, u32, head_room,
+ u64, flags)
+{
+ int ret = __bpf_skb_change_head(skb, head_room, flags);
+
+ bpf_compute_data_end_sk_skb(skb);
+ return ret;
+}
+
+static const struct bpf_func_proto sk_skb_change_head_proto = {
+ .func = sk_skb_change_head,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_ANYTHING,
+};
static unsigned long xdp_get_metalen(const struct xdp_buff *xdp)
{
return xdp_data_meta_unsupported(xdp) ? 0 :
@@ -3046,12 +3138,16 @@ static int __bpf_tx_xdp(struct net_device *dev,
u32 index)
{
struct xdp_frame *xdpf;
- int sent;
+ int err, sent;
if (!dev->netdev_ops->ndo_xdp_xmit) {
return -EOPNOTSUPP;
}
+ err = xdp_ok_fwd_dev(dev, xdp->data_end - xdp->data);
+ if (unlikely(err))
+ return err;
+
xdpf = convert_to_xdp_frame(xdp);
if (unlikely(!xdpf))
return -EOVERFLOW;
@@ -3285,7 +3381,8 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
goto err;
}
- if (unlikely((err = __xdp_generic_ok_fwd_dev(skb, fwd))))
+ err = xdp_ok_fwd_dev(fwd, skb->len);
+ if (unlikely(err))
goto err;
skb->dev = fwd;
@@ -4439,10 +4536,10 @@ static const struct bpf_func_proto bpf_lwt_push_encap_proto = {
.arg4_type = ARG_CONST_SIZE
};
+#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
BPF_CALL_4(bpf_lwt_seg6_store_bytes, struct sk_buff *, skb, u32, offset,
const void *, from, u32, len)
{
-#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
struct seg6_bpf_srh_state *srh_state =
this_cpu_ptr(&seg6_bpf_srh_states);
void *srh_tlvs, *srh_end, *ptr;
@@ -4468,9 +4565,6 @@ BPF_CALL_4(bpf_lwt_seg6_store_bytes, struct sk_buff *, skb, u32, offset,
memcpy(skb->data + offset, from, len);
return 0;
-#else /* CONFIG_IPV6_SEG6_BPF */
- return -EOPNOTSUPP;
-#endif
}
static const struct bpf_func_proto bpf_lwt_seg6_store_bytes_proto = {
@@ -4486,7 +4580,6 @@ static const struct bpf_func_proto bpf_lwt_seg6_store_bytes_proto = {
BPF_CALL_4(bpf_lwt_seg6_action, struct sk_buff *, skb,
u32, action, void *, param, u32, param_len)
{
-#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
struct seg6_bpf_srh_state *srh_state =
this_cpu_ptr(&seg6_bpf_srh_states);
struct ipv6_sr_hdr *srh;
@@ -4534,9 +4627,6 @@ BPF_CALL_4(bpf_lwt_seg6_action, struct sk_buff *, skb,
default:
return -EINVAL;
}
-#else /* CONFIG_IPV6_SEG6_BPF */
- return -EOPNOTSUPP;
-#endif
}
static const struct bpf_func_proto bpf_lwt_seg6_action_proto = {
@@ -4552,7 +4642,6 @@ static const struct bpf_func_proto bpf_lwt_seg6_action_proto = {
BPF_CALL_3(bpf_lwt_seg6_adjust_srh, struct sk_buff *, skb, u32, offset,
s32, len)
{
-#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
struct seg6_bpf_srh_state *srh_state =
this_cpu_ptr(&seg6_bpf_srh_states);
void *srh_end, *srh_tlvs, *ptr;
@@ -4596,9 +4685,6 @@ BPF_CALL_3(bpf_lwt_seg6_adjust_srh, struct sk_buff *, skb, u32, offset,
srh_state->hdrlen += len;
srh_state->valid = 0;
return 0;
-#else /* CONFIG_IPV6_SEG6_BPF */
- return -EOPNOTSUPP;
-#endif
}
static const struct bpf_func_proto bpf_lwt_seg6_adjust_srh_proto = {
@@ -4609,6 +4695,7 @@ static const struct bpf_func_proto bpf_lwt_seg6_adjust_srh_proto = {
.arg2_type = ARG_ANYTHING,
.arg3_type = ARG_ANYTHING,
};
+#endif /* CONFIG_IPV6_SEG6_BPF */
bool bpf_helper_changes_pkt_data(void *func)
{
@@ -4617,9 +4704,12 @@ bool bpf_helper_changes_pkt_data(void *func)
func == bpf_skb_store_bytes ||
func == bpf_skb_change_proto ||
func == bpf_skb_change_head ||
+ func == sk_skb_change_head ||
func == bpf_skb_change_tail ||
+ func == sk_skb_change_tail ||
func == bpf_skb_adjust_room ||
func == bpf_skb_pull_data ||
+ func == sk_skb_pull_data ||
func == bpf_clone_redirect ||
func == bpf_l3_csum_replace ||
func == bpf_l4_csum_replace ||
@@ -4627,11 +4717,12 @@ bool bpf_helper_changes_pkt_data(void *func)
func == bpf_xdp_adjust_meta ||
func == bpf_msg_pull_data ||
func == bpf_xdp_adjust_tail ||
- func == bpf_lwt_push_encap ||
+#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
func == bpf_lwt_seg6_store_bytes ||
func == bpf_lwt_seg6_adjust_srh ||
- func == bpf_lwt_seg6_action
- )
+ func == bpf_lwt_seg6_action ||
+#endif
+ func == bpf_lwt_push_encap)
return true;
return false;
@@ -4872,11 +4963,11 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_skb_load_bytes:
return &bpf_skb_load_bytes_proto;
case BPF_FUNC_skb_pull_data:
- return &bpf_skb_pull_data_proto;
+ return &sk_skb_pull_data_proto;
case BPF_FUNC_skb_change_tail:
- return &bpf_skb_change_tail_proto;
+ return &sk_skb_change_tail_proto;
case BPF_FUNC_skb_change_head:
- return &bpf_skb_change_head_proto;
+ return &sk_skb_change_head_proto;
case BPF_FUNC_get_socket_cookie:
return &bpf_get_socket_cookie_proto;
case BPF_FUNC_get_socket_uid:
@@ -4967,12 +5058,14 @@ static const struct bpf_func_proto *
lwt_seg6local_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
+#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
case BPF_FUNC_lwt_seg6_store_bytes:
return &bpf_lwt_seg6_store_bytes_proto;
case BPF_FUNC_lwt_seg6_action:
return &bpf_lwt_seg6_action_proto;
case BPF_FUNC_lwt_seg6_adjust_srh:
return &bpf_lwt_seg6_adjust_srh_proto;
+#endif
default:
return lwt_out_func_proto(func_id, prog);
}
diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
index b2b2323bdc84..188d693cb251 100644
--- a/net/core/gen_stats.c
+++ b/net/core/gen_stats.c
@@ -77,8 +77,20 @@ gnet_stats_start_copy_compat(struct sk_buff *skb, int type, int tc_stats_type,
d->lock = lock;
spin_lock_bh(lock);
}
- if (d->tail)
- return gnet_stats_copy(d, type, NULL, 0, padattr);
+ if (d->tail) {
+ int ret = gnet_stats_copy(d, type, NULL, 0, padattr);
+
+ /* The initial attribute added in gnet_stats_copy() may be
+ * preceded by a padding attribute, in which case d->tail will
+ * end up pointing at the padding instead of the real attribute.
+ * Fix this so gnet_stats_finish_copy() adjusts the length of
+ * the right attribute.
+ */
+ if (ret == 0 && d->tail->nla_type == padattr)
+ d->tail = (struct nlattr *)((char *)d->tail +
+ NLA_ALIGN(d->tail->nla_len));
+ return ret;
+ }
return 0;
}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index c4e24ac27464..0c1a00672ba9 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -858,6 +858,7 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb)
n->cloned = 1;
n->nohdr = 0;
n->peeked = 0;
+ C(pfmemalloc);
n->destructor = NULL;
C(tail);
C(end);
diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c
index 40c851693f77..0c9478b91fa5 100644
--- a/net/dns_resolver/dns_key.c
+++ b/net/dns_resolver/dns_key.c
@@ -86,35 +86,39 @@ dns_resolver_preparse(struct key_preparsed_payload *prep)
opt++;
kdebug("options: '%s'", opt);
do {
+ int opt_len, opt_nlen;
const char *eq;
- int opt_len, opt_nlen, opt_vlen, tmp;
+ char optval[128];
next_opt = memchr(opt, '#', end - opt) ?: end;
opt_len = next_opt - opt;
- if (opt_len <= 0 || opt_len > 128) {
+ if (opt_len <= 0 || opt_len > sizeof(optval)) {
pr_warn_ratelimited("Invalid option length (%d) for dns_resolver key\n",
opt_len);
return -EINVAL;
}
- eq = memchr(opt, '=', opt_len) ?: end;
- opt_nlen = eq - opt;
- eq++;
- opt_vlen = next_opt - eq; /* will be -1 if no value */
+ eq = memchr(opt, '=', opt_len);
+ if (eq) {
+ opt_nlen = eq - opt;
+ eq++;
+ memcpy(optval, eq, next_opt - eq);
+ optval[next_opt - eq] = '\0';
+ } else {
+ opt_nlen = opt_len;
+ optval[0] = '\0';
+ }
- tmp = opt_vlen >= 0 ? opt_vlen : 0;
- kdebug("option '%*.*s' val '%*.*s'",
- opt_nlen, opt_nlen, opt, tmp, tmp, eq);
+ kdebug("option '%*.*s' val '%s'",
+ opt_nlen, opt_nlen, opt, optval);
/* see if it's an error number representing a DNS error
* that's to be recorded as the result in this key */
if (opt_nlen == sizeof(DNS_ERRORNO_OPTION) - 1 &&
memcmp(opt, DNS_ERRORNO_OPTION, opt_nlen) == 0) {
kdebug("dns error number option");
- if (opt_vlen <= 0)
- goto bad_option_value;
- ret = kstrtoul(eq, 10, &derrno);
+ ret = kstrtoul(optval, 10, &derrno);
if (ret < 0)
goto bad_option_value;
diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c
index 275449b0d633..3297e7fa9945 100644
--- a/net/ieee802154/6lowpan/core.c
+++ b/net/ieee802154/6lowpan/core.c
@@ -90,12 +90,18 @@ static int lowpan_neigh_construct(struct net_device *dev, struct neighbour *n)
return 0;
}
+static int lowpan_get_iflink(const struct net_device *dev)
+{
+ return lowpan_802154_dev(dev)->wdev->ifindex;
+}
+
static const struct net_device_ops lowpan_netdev_ops = {
.ndo_init = lowpan_dev_init,
.ndo_start_xmit = lowpan_xmit,
.ndo_open = lowpan_open,
.ndo_stop = lowpan_stop,
.ndo_neigh_construct = lowpan_neigh_construct,
+ .ndo_get_iflink = lowpan_get_iflink,
};
static void lowpan_setup(struct net_device *ldev)
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index b21833651394..e46cdd310e5f 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -300,6 +300,7 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb)
if (!ipv4_is_zeronet(ip_hdr(skb)->saddr)) {
struct flowi4 fl4 = {
.flowi4_iif = LOOPBACK_IFINDEX,
+ .flowi4_oif = l3mdev_master_ifindex_rcu(dev),
.daddr = ip_hdr(skb)->saddr,
.flowi4_tos = RT_TOS(ip_hdr(skb)->tos),
.flowi4_scope = scope,
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 85b617b655bc..b3c899a630a0 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1200,13 +1200,14 @@ static void igmpv3_del_delrec(struct in_device *in_dev, struct ip_mc_list *im)
spin_lock_bh(&im->lock);
if (pmc) {
im->interface = pmc->interface;
- im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
im->sfmode = pmc->sfmode;
if (pmc->sfmode == MCAST_INCLUDE) {
im->tomb = pmc->tomb;
im->sources = pmc->sources;
for (psf = im->sources; psf; psf = psf->sf_next)
- psf->sf_crcount = im->crcount;
+ psf->sf_crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
+ } else {
+ im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
}
in_dev_put(pmc->interface);
kfree(pmc);
@@ -1288,7 +1289,7 @@ static void igmp_group_dropped(struct ip_mc_list *im)
#endif
}
-static void igmp_group_added(struct ip_mc_list *im)
+static void igmp_group_added(struct ip_mc_list *im, unsigned int mode)
{
struct in_device *in_dev = im->interface;
#ifdef CONFIG_IP_MULTICAST
@@ -1316,7 +1317,13 @@ static void igmp_group_added(struct ip_mc_list *im)
}
/* else, v3 */
- im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
+ /* Based on RFC3376 5.1, for newly added INCLUDE SSM, we should
+ * not send filter-mode change record as the mode should be from
+ * IN() to IN(A).
+ */
+ if (mode == MCAST_EXCLUDE)
+ im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
+
igmp_ifc_event(in_dev);
#endif
}
@@ -1381,8 +1388,7 @@ static void ip_mc_hash_remove(struct in_device *in_dev,
/*
* A socket has joined a multicast group on device dev.
*/
-
-void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
+void __ip_mc_inc_group(struct in_device *in_dev, __be32 addr, unsigned int mode)
{
struct ip_mc_list *im;
#ifdef CONFIG_IP_MULTICAST
@@ -1394,7 +1400,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
for_each_pmc_rtnl(in_dev, im) {
if (im->multiaddr == addr) {
im->users++;
- ip_mc_add_src(in_dev, &addr, MCAST_EXCLUDE, 0, NULL, 0);
+ ip_mc_add_src(in_dev, &addr, mode, 0, NULL, 0);
goto out;
}
}
@@ -1408,8 +1414,8 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
in_dev_hold(in_dev);
im->multiaddr = addr;
/* initial mode is (EX, empty) */
- im->sfmode = MCAST_EXCLUDE;
- im->sfcount[MCAST_EXCLUDE] = 1;
+ im->sfmode = mode;
+ im->sfcount[mode] = 1;
refcount_set(&im->refcnt, 1);
spin_lock_init(&im->lock);
#ifdef CONFIG_IP_MULTICAST
@@ -1426,12 +1432,17 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
#ifdef CONFIG_IP_MULTICAST
igmpv3_del_delrec(in_dev, im);
#endif
- igmp_group_added(im);
+ igmp_group_added(im, mode);
if (!in_dev->dead)
ip_rt_multicast_event(in_dev);
out:
return;
}
+
+void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
+{
+ __ip_mc_inc_group(in_dev, addr, MCAST_EXCLUDE);
+}
EXPORT_SYMBOL(ip_mc_inc_group);
static int ip_mc_check_iphdr(struct sk_buff *skb)
@@ -1688,7 +1699,7 @@ void ip_mc_remap(struct in_device *in_dev)
#ifdef CONFIG_IP_MULTICAST
igmpv3_del_delrec(in_dev, pmc);
#endif
- igmp_group_added(pmc);
+ igmp_group_added(pmc, pmc->sfmode);
}
}
@@ -1751,7 +1762,7 @@ void ip_mc_up(struct in_device *in_dev)
#ifdef CONFIG_IP_MULTICAST
igmpv3_del_delrec(in_dev, pmc);
#endif
- igmp_group_added(pmc);
+ igmp_group_added(pmc, pmc->sfmode);
}
}
@@ -2130,8 +2141,8 @@ static void ip_mc_clear_src(struct ip_mc_list *pmc)
/* Join a multicast group
*/
-
-int ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr)
+static int __ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr,
+ unsigned int mode)
{
__be32 addr = imr->imr_multiaddr.s_addr;
struct ip_mc_socklist *iml, *i;
@@ -2172,15 +2183,30 @@ int ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr)
memcpy(&iml->multi, imr, sizeof(*imr));
iml->next_rcu = inet->mc_list;
iml->sflist = NULL;
- iml->sfmode = MCAST_EXCLUDE;
+ iml->sfmode = mode;
rcu_assign_pointer(inet->mc_list, iml);
- ip_mc_inc_group(in_dev, addr);
+ __ip_mc_inc_group(in_dev, addr, mode);
err = 0;
done:
return err;
}
+
+/* Join ASM (Any-Source Multicast) group
+ */
+int ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr)
+{
+ return __ip_mc_join_group(sk, imr, MCAST_EXCLUDE);
+}
EXPORT_SYMBOL(ip_mc_join_group);
+/* Join SSM (Source-Specific Multicast) group
+ */
+int ip_mc_join_group_ssm(struct sock *sk, struct ip_mreqn *imr,
+ unsigned int mode)
+{
+ return __ip_mc_join_group(sk, imr, mode);
+}
+
static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml,
struct in_device *in_dev)
{
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 316518f87294..d3162baca9f1 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -91,7 +91,7 @@ static void inet_frags_free_cb(void *ptr, void *arg)
void inet_frags_exit_net(struct netns_frags *nf)
{
- nf->low_thresh = 0; /* prevent creation of new frags */
+ nf->high_thresh = 0; /* prevent creation of new frags */
rhashtable_free_and_destroy(&nf->rhashtable, inet_frags_free_cb, NULL);
}
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index fc32fdbeefa6..64c76dcf7386 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -984,7 +984,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
mreq.imr_multiaddr.s_addr = mreqs.imr_multiaddr;
mreq.imr_address.s_addr = mreqs.imr_interface;
mreq.imr_ifindex = 0;
- err = ip_mc_join_group(sk, &mreq);
+ err = ip_mc_join_group_ssm(sk, &mreq, MCAST_INCLUDE);
if (err && err != -EADDRINUSE)
break;
omode = MCAST_INCLUDE;
@@ -1061,7 +1061,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
mreq.imr_multiaddr = psin->sin_addr;
mreq.imr_address.s_addr = 0;
mreq.imr_ifindex = greqs.gsr_interface;
- err = ip_mc_join_group(sk, &mreq);
+ err = ip_mc_join_group_ssm(sk, &mreq, MCAST_INCLUDE);
if (err && err != -EADDRINUSE)
break;
greqs.gsr_interface = mreq.imr_ifindex;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index ca0dad90803a..e77872c93c20 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1898,6 +1898,7 @@ static struct xt_match ipt_builtin_mt[] __read_mostly = {
.checkentry = icmp_checkentry,
.proto = IPPROTO_ICMP,
.family = NFPROTO_IPV4,
+ .me = THIS_MODULE,
},
};
diff --git a/net/ipv4/netfilter/nf_tproxy_ipv4.c b/net/ipv4/netfilter/nf_tproxy_ipv4.c
index 805e83ec3ad9..164714104965 100644
--- a/net/ipv4/netfilter/nf_tproxy_ipv4.c
+++ b/net/ipv4/netfilter/nf_tproxy_ipv4.c
@@ -37,7 +37,7 @@ nf_tproxy_handle_time_wait4(struct net *net, struct sk_buff *skb,
* to a listener socket if there's one */
struct sock *sk2;
- sk2 = nf_tproxy_get_sock_v4(net, skb, hp, iph->protocol,
+ sk2 = nf_tproxy_get_sock_v4(net, skb, iph->protocol,
iph->saddr, laddr ? laddr : iph->daddr,
hp->source, lport ? lport : hp->dest,
skb->dev, NF_TPROXY_LOOKUP_LISTENER);
@@ -71,7 +71,7 @@ __be32 nf_tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr)
EXPORT_SYMBOL_GPL(nf_tproxy_laddr4);
struct sock *
-nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, void *hp,
+nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb,
const u8 protocol,
const __be32 saddr, const __be32 daddr,
const __be16 sport, const __be16 dport,
@@ -79,16 +79,21 @@ nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, void *hp,
const enum nf_tproxy_lookup_t lookup_type)
{
struct sock *sk;
- struct tcphdr *tcph;
switch (protocol) {
- case IPPROTO_TCP:
+ case IPPROTO_TCP: {
+ struct tcphdr _hdr, *hp;
+
+ hp = skb_header_pointer(skb, ip_hdrlen(skb),
+ sizeof(struct tcphdr), &_hdr);
+ if (hp == NULL)
+ return NULL;
+
switch (lookup_type) {
case NF_TPROXY_LOOKUP_LISTENER:
- tcph = hp;
sk = inet_lookup_listener(net, &tcp_hashinfo, skb,
ip_hdrlen(skb) +
- __tcp_hdrlen(tcph),
+ __tcp_hdrlen(hp),
saddr, sport,
daddr, dport,
in->ifindex, 0);
@@ -110,6 +115,7 @@ nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, void *hp,
BUG();
}
break;
+ }
case IPPROTO_UDP:
sk = udp4_lib_lookup(net, saddr, sport, daddr, dport,
in->ifindex);
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index af0a857d8352..5fa335fd3852 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -189,8 +189,9 @@ static int ipv4_ping_group_range(struct ctl_table *table, int write,
if (write && ret == 0) {
low = make_kgid(user_ns, urange[0]);
high = make_kgid(user_ns, urange[1]);
- if (!gid_valid(low) || !gid_valid(high) ||
- (urange[1] < urange[0]) || gid_lt(high, low)) {
+ if (!gid_valid(low) || !gid_valid(high))
+ return -EINVAL;
+ if (urange[1] < urange[0] || gid_lt(high, low)) {
low = make_kgid(&init_user_ns, 1);
high = make_kgid(&init_user_ns, 0);
}
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index e3704a49164b..bce53b1728a6 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1994,7 +1994,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
* shouldn't happen.
*/
if (WARN(before(*seq, TCP_SKB_CB(skb)->seq),
- "recvmsg bug: copied %X seq %X rcvnxt %X fl %X\n",
+ "TCP recvmsg seq # bug: copied %X, seq %X, rcvnxt %X, fl %X\n",
*seq, TCP_SKB_CB(skb)->seq, tp->rcv_nxt,
flags))
break;
@@ -2009,7 +2009,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
goto found_fin_ok;
WARN(!(flags & MSG_PEEK),
- "recvmsg bug 2: copied %X seq %X rcvnxt %X fl %X\n",
+ "TCP recvmsg seq # bug 2: copied %X, seq %X, rcvnxt %X, fl %X\n",
*seq, TCP_SKB_CB(skb)->seq, tp->rcv_nxt, flags);
}
@@ -2555,6 +2555,8 @@ int tcp_disconnect(struct sock *sk, int flags)
tcp_clear_xmit_timers(sk);
__skb_queue_purge(&sk->sk_receive_queue);
+ tp->copied_seq = tp->rcv_nxt;
+ tp->urg_data = 0;
tcp_write_queue_purge(sk);
tcp_fastopen_active_disable_ofo_check(sk);
skb_rbtree_purge(&tp->out_of_order_queue);
@@ -2815,14 +2817,17 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
case TCP_REPAIR:
if (!tcp_can_repair_sock(sk))
err = -EPERM;
- else if (val == 1) {
+ else if (val == TCP_REPAIR_ON) {
tp->repair = 1;
sk->sk_reuse = SK_FORCE_REUSE;
tp->repair_queue = TCP_NO_QUEUE;
- } else if (val == 0) {
+ } else if (val == TCP_REPAIR_OFF) {
tp->repair = 0;
sk->sk_reuse = SK_NO_REUSE;
tcp_send_window_probe(sk);
+ } else if (val == TCP_REPAIR_OFF_NO_WP) {
+ tp->repair = 0;
+ sk->sk_reuse = SK_NO_REUSE;
} else
err = -EINVAL;
@@ -3714,8 +3719,7 @@ int tcp_abort(struct sock *sk, int err)
struct request_sock *req = inet_reqsk(sk);
local_bh_disable();
- inet_csk_reqsk_queue_drop_and_put(req->rsk_listener,
- req);
+ inet_csk_reqsk_queue_drop(req->rsk_listener, req);
local_bh_enable();
return 0;
}
diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c
index 5f5e5936760e..5869f89ca656 100644
--- a/net/ipv4/tcp_dctcp.c
+++ b/net/ipv4/tcp_dctcp.c
@@ -55,7 +55,6 @@ struct dctcp {
u32 dctcp_alpha;
u32 next_seq;
u32 ce_state;
- u32 delayed_ack_reserved;
u32 loss_cwnd;
};
@@ -96,7 +95,6 @@ static void dctcp_init(struct sock *sk)
ca->dctcp_alpha = min(dctcp_alpha_on_init, DCTCP_MAX_ALPHA);
- ca->delayed_ack_reserved = 0;
ca->loss_cwnd = 0;
ca->ce_state = 0;
@@ -134,7 +132,8 @@ static void dctcp_ce_state_0_to_1(struct sock *sk)
/* State has changed from CE=0 to CE=1 and delayed
* ACK has not sent yet.
*/
- if (!ca->ce_state && ca->delayed_ack_reserved) {
+ if (!ca->ce_state &&
+ inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) {
u32 tmp_rcv_nxt;
/* Save current rcv_nxt. */
@@ -164,7 +163,8 @@ static void dctcp_ce_state_1_to_0(struct sock *sk)
/* State has changed from CE=1 to CE=0 and delayed
* ACK has not sent yet.
*/
- if (ca->ce_state && ca->delayed_ack_reserved) {
+ if (ca->ce_state &&
+ inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) {
u32 tmp_rcv_nxt;
/* Save current rcv_nxt. */
@@ -248,25 +248,6 @@ static void dctcp_state(struct sock *sk, u8 new_state)
}
}
-static void dctcp_update_ack_reserved(struct sock *sk, enum tcp_ca_event ev)
-{
- struct dctcp *ca = inet_csk_ca(sk);
-
- switch (ev) {
- case CA_EVENT_DELAYED_ACK:
- if (!ca->delayed_ack_reserved)
- ca->delayed_ack_reserved = 1;
- break;
- case CA_EVENT_NON_DELAYED_ACK:
- if (ca->delayed_ack_reserved)
- ca->delayed_ack_reserved = 0;
- break;
- default:
- /* Don't care for the rest. */
- break;
- }
-}
-
static void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev)
{
switch (ev) {
@@ -276,10 +257,6 @@ static void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev)
case CA_EVENT_ECN_NO_CE:
dctcp_ce_state_1_to_0(sk);
break;
- case CA_EVENT_DELAYED_ACK:
- case CA_EVENT_NON_DELAYED_ACK:
- dctcp_update_ack_reserved(sk, ev);
- break;
default:
/* Don't care for the rest. */
break;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index dc415c66a33a..9e041fa5c545 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -157,11 +157,24 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
if (tcptw->tw_ts_recent_stamp &&
(!twp || (reuse && time_after32(ktime_get_seconds(),
tcptw->tw_ts_recent_stamp)))) {
- tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
- if (tp->write_seq == 0)
- tp->write_seq = 1;
- tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
- tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
+ /* In case of repair and re-using TIME-WAIT sockets we still
+ * want to be sure that it is safe as above but honor the
+ * sequence numbers and time stamps set as part of the repair
+ * process.
+ *
+ * Without this check re-using a TIME-WAIT socket with TCP
+ * repair would accumulate a -1 on the repair assigned
+ * sequence number. The first time it is reused the sequence
+ * is -1, the second time -2, etc. This fixes that issue
+ * without appearing to create any others.
+ */
+ if (likely(!tp->repair)) {
+ tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
+ if (tp->write_seq == 0)
+ tp->write_seq = 1;
+ tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
+ tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
+ }
sock_hold(sktw);
return 1;
}
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index f8f6129160dd..6cbab56e7407 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -3509,8 +3509,6 @@ void tcp_send_delayed_ack(struct sock *sk)
int ato = icsk->icsk_ack.ato;
unsigned long timeout;
- tcp_ca_event(sk, CA_EVENT_DELAYED_ACK);
-
if (ato > TCP_DELACK_MIN) {
const struct tcp_sock *tp = tcp_sk(sk);
int max_ato = HZ / 2;
@@ -3567,8 +3565,6 @@ void tcp_send_ack(struct sock *sk)
if (sk->sk_state == TCP_CLOSE)
return;
- tcp_ca_event(sk, CA_EVENT_NON_DELAYED_ACK);
-
/* We are not putting this on the write queue, so
* tcp_transmit_skb() will set the ownership to this
* sock.
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 0eff75525da1..b3885ca22d6f 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -108,6 +108,7 @@ config IPV6_MIP6
config IPV6_ILA
tristate "IPv6: Identifier Locator Addressing (ILA)"
depends on NETFILTER
+ select DST_CACHE
select LWTUNNEL
---help---
Support for IPv6 Identifier Locator Addressing (ILA).
diff --git a/net/ipv6/calipso.c b/net/ipv6/calipso.c
index 1323b9679cf7..1c0bb9fb76e6 100644
--- a/net/ipv6/calipso.c
+++ b/net/ipv6/calipso.c
@@ -799,8 +799,7 @@ static int calipso_opt_update(struct sock *sk, struct ipv6_opt_hdr *hop)
{
struct ipv6_txoptions *old = txopt_get(inet6_sk(sk)), *txopts;
- txopts = ipv6_renew_options_kern(sk, old, IPV6_HOPOPTS,
- hop, hop ? ipv6_optlen(hop) : 0);
+ txopts = ipv6_renew_options(sk, old, IPV6_HOPOPTS, hop);
txopt_put(old);
if (IS_ERR(txopts))
return PTR_ERR(txopts);
@@ -1222,8 +1221,7 @@ static int calipso_req_setattr(struct request_sock *req,
if (IS_ERR(new))
return PTR_ERR(new);
- txopts = ipv6_renew_options_kern(sk, req_inet->ipv6_opt, IPV6_HOPOPTS,
- new, new ? ipv6_optlen(new) : 0);
+ txopts = ipv6_renew_options(sk, req_inet->ipv6_opt, IPV6_HOPOPTS, new);
kfree(new);
@@ -1260,8 +1258,7 @@ static void calipso_req_delattr(struct request_sock *req)
if (calipso_opt_del(req_inet->ipv6_opt->hopopt, &new))
return; /* Nothing to do */
- txopts = ipv6_renew_options_kern(sk, req_inet->ipv6_opt, IPV6_HOPOPTS,
- new, new ? ipv6_optlen(new) : 0);
+ txopts = ipv6_renew_options(sk, req_inet->ipv6_opt, IPV6_HOPOPTS, new);
if (!IS_ERR(txopts)) {
txopts = xchg(&req_inet->ipv6_opt, txopts);
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 5bc2bf3733ab..20291c2036fc 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -1015,29 +1015,21 @@ ipv6_dup_options(struct sock *sk, struct ipv6_txoptions *opt)
}
EXPORT_SYMBOL_GPL(ipv6_dup_options);
-static int ipv6_renew_option(void *ohdr,
- struct ipv6_opt_hdr __user *newopt, int newoptlen,
- int inherit,
- struct ipv6_opt_hdr **hdr,
- char **p)
+static void ipv6_renew_option(int renewtype,
+ struct ipv6_opt_hdr **dest,
+ struct ipv6_opt_hdr *old,
+ struct ipv6_opt_hdr *new,
+ int newtype, char **p)
{
- if (inherit) {
- if (ohdr) {
- memcpy(*p, ohdr, ipv6_optlen((struct ipv6_opt_hdr *)ohdr));
- *hdr = (struct ipv6_opt_hdr *)*p;
- *p += CMSG_ALIGN(ipv6_optlen(*hdr));
- }
- } else {
- if (newopt) {
- if (copy_from_user(*p, newopt, newoptlen))
- return -EFAULT;
- *hdr = (struct ipv6_opt_hdr *)*p;
- if (ipv6_optlen(*hdr) > newoptlen)
- return -EINVAL;
- *p += CMSG_ALIGN(newoptlen);
- }
- }
- return 0;
+ struct ipv6_opt_hdr *src;
+
+ src = (renewtype == newtype ? new : old);
+ if (!src)
+ return;
+
+ memcpy(*p, src, ipv6_optlen(src));
+ *dest = (struct ipv6_opt_hdr *)*p;
+ *p += CMSG_ALIGN(ipv6_optlen(*dest));
}
/**
@@ -1063,13 +1055,11 @@ static int ipv6_renew_option(void *ohdr,
*/
struct ipv6_txoptions *
ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt,
- int newtype,
- struct ipv6_opt_hdr __user *newopt, int newoptlen)
+ int newtype, struct ipv6_opt_hdr *newopt)
{
int tot_len = 0;
char *p;
struct ipv6_txoptions *opt2;
- int err;
if (opt) {
if (newtype != IPV6_HOPOPTS && opt->hopopt)
@@ -1082,8 +1072,8 @@ ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt,
tot_len += CMSG_ALIGN(ipv6_optlen(opt->dst1opt));
}
- if (newopt && newoptlen)
- tot_len += CMSG_ALIGN(newoptlen);
+ if (newopt)
+ tot_len += CMSG_ALIGN(ipv6_optlen(newopt));
if (!tot_len)
return NULL;
@@ -1098,29 +1088,19 @@ ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt,
opt2->tot_len = tot_len;
p = (char *)(opt2 + 1);
- err = ipv6_renew_option(opt ? opt->hopopt : NULL, newopt, newoptlen,
- newtype != IPV6_HOPOPTS,
- &opt2->hopopt, &p);
- if (err)
- goto out;
-
- err = ipv6_renew_option(opt ? opt->dst0opt : NULL, newopt, newoptlen,
- newtype != IPV6_RTHDRDSTOPTS,
- &opt2->dst0opt, &p);
- if (err)
- goto out;
-
- err = ipv6_renew_option(opt ? opt->srcrt : NULL, newopt, newoptlen,
- newtype != IPV6_RTHDR,
- (struct ipv6_opt_hdr **)&opt2->srcrt, &p);
- if (err)
- goto out;
-
- err = ipv6_renew_option(opt ? opt->dst1opt : NULL, newopt, newoptlen,
- newtype != IPV6_DSTOPTS,
- &opt2->dst1opt, &p);
- if (err)
- goto out;
+ ipv6_renew_option(IPV6_HOPOPTS, &opt2->hopopt,
+ (opt ? opt->hopopt : NULL),
+ newopt, newtype, &p);
+ ipv6_renew_option(IPV6_RTHDRDSTOPTS, &opt2->dst0opt,
+ (opt ? opt->dst0opt : NULL),
+ newopt, newtype, &p);
+ ipv6_renew_option(IPV6_RTHDR,
+ (struct ipv6_opt_hdr **)&opt2->srcrt,
+ (opt ? (struct ipv6_opt_hdr *)opt->srcrt : NULL),
+ newopt, newtype, &p);
+ ipv6_renew_option(IPV6_DSTOPTS, &opt2->dst1opt,
+ (opt ? opt->dst1opt : NULL),
+ newopt, newtype, &p);
opt2->opt_nflen = (opt2->hopopt ? ipv6_optlen(opt2->hopopt) : 0) +
(opt2->dst0opt ? ipv6_optlen(opt2->dst0opt) : 0) +
@@ -1128,37 +1108,6 @@ ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt,
opt2->opt_flen = (opt2->dst1opt ? ipv6_optlen(opt2->dst1opt) : 0);
return opt2;
-out:
- sock_kfree_s(sk, opt2, opt2->tot_len);
- return ERR_PTR(err);
-}
-
-/**
- * ipv6_renew_options_kern - replace a specific ext hdr with a new one.
- *
- * @sk: sock from which to allocate memory
- * @opt: original options
- * @newtype: option type to replace in @opt
- * @newopt: new option of type @newtype to replace (kernel-mem)
- * @newoptlen: length of @newopt
- *
- * See ipv6_renew_options(). The difference is that @newopt is
- * kernel memory, rather than user memory.
- */
-struct ipv6_txoptions *
-ipv6_renew_options_kern(struct sock *sk, struct ipv6_txoptions *opt,
- int newtype, struct ipv6_opt_hdr *newopt,
- int newoptlen)
-{
- struct ipv6_txoptions *ret_val;
- const mm_segment_t old_fs = get_fs();
-
- set_fs(KERNEL_DS);
- ret_val = ipv6_renew_options(sk, opt, newtype,
- (struct ipv6_opt_hdr __user *)newopt,
- newoptlen);
- set_fs(old_fs);
- return ret_val;
}
struct ipv6_txoptions *ipv6_fixup_options(struct ipv6_txoptions *opt_space,
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 1fb2f3118d60..d212738e9d10 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -935,20 +935,19 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
{
struct fib6_info *leaf = rcu_dereference_protected(fn->leaf,
lockdep_is_held(&rt->fib6_table->tb6_lock));
- enum fib_event_type event = FIB_EVENT_ENTRY_ADD;
- struct fib6_info *iter = NULL, *match = NULL;
+ struct fib6_info *iter = NULL;
struct fib6_info __rcu **ins;
+ struct fib6_info __rcu **fallback_ins = NULL;
int replace = (info->nlh &&
(info->nlh->nlmsg_flags & NLM_F_REPLACE));
- int append = (info->nlh &&
- (info->nlh->nlmsg_flags & NLM_F_APPEND));
int add = (!info->nlh ||
(info->nlh->nlmsg_flags & NLM_F_CREATE));
int found = 0;
+ bool rt_can_ecmp = rt6_qualify_for_ecmp(rt);
u16 nlflags = NLM_F_EXCL;
int err;
- if (append)
+ if (info->nlh && (info->nlh->nlmsg_flags & NLM_F_APPEND))
nlflags |= NLM_F_APPEND;
ins = &fn->leaf;
@@ -970,8 +969,13 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
nlflags &= ~NLM_F_EXCL;
if (replace) {
- found++;
- break;
+ if (rt_can_ecmp == rt6_qualify_for_ecmp(iter)) {
+ found++;
+ break;
+ }
+ if (rt_can_ecmp)
+ fallback_ins = fallback_ins ?: ins;
+ goto next_iter;
}
if (rt6_duplicate_nexthop(iter, rt)) {
@@ -986,51 +990,71 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
fib6_metric_set(iter, RTAX_MTU, rt->fib6_pmtu);
return -EEXIST;
}
-
- /* first route that matches */
- if (!match)
- match = iter;
+ /* If we have the same destination and the same metric,
+ * but not the same gateway, then the route we try to
+ * add is sibling to this route, increment our counter
+ * of siblings, and later we will add our route to the
+ * list.
+ * Only static routes (which don't have flag
+ * RTF_EXPIRES) are used for ECMPv6.
+ *
+ * To avoid long list, we only had siblings if the
+ * route have a gateway.
+ */
+ if (rt_can_ecmp &&
+ rt6_qualify_for_ecmp(iter))
+ rt->fib6_nsiblings++;
}
if (iter->fib6_metric > rt->fib6_metric)
break;
+next_iter:
ins = &iter->fib6_next;
}
+ if (fallback_ins && !found) {
+ /* No ECMP-able route found, replace first non-ECMP one */
+ ins = fallback_ins;
+ iter = rcu_dereference_protected(*ins,
+ lockdep_is_held(&rt->fib6_table->tb6_lock));
+ found++;
+ }
+
/* Reset round-robin state, if necessary */
if (ins == &fn->leaf)
fn->rr_ptr = NULL;
/* Link this route to others same route. */
- if (append && match) {
+ if (rt->fib6_nsiblings) {
+ unsigned int fib6_nsiblings;
struct fib6_info *sibling, *temp_sibling;
- if (rt->fib6_flags & RTF_REJECT) {
- NL_SET_ERR_MSG(extack,
- "Can not append a REJECT route");
- return -EINVAL;
- } else if (match->fib6_flags & RTF_REJECT) {
- NL_SET_ERR_MSG(extack,
- "Can not append to a REJECT route");
- return -EINVAL;
+ /* Find the first route that have the same metric */
+ sibling = leaf;
+ while (sibling) {
+ if (sibling->fib6_metric == rt->fib6_metric &&
+ rt6_qualify_for_ecmp(sibling)) {
+ list_add_tail(&rt->fib6_siblings,
+ &sibling->fib6_siblings);
+ break;
+ }
+ sibling = rcu_dereference_protected(sibling->fib6_next,
+ lockdep_is_held(&rt->fib6_table->tb6_lock));
}
- event = FIB_EVENT_ENTRY_APPEND;
- rt->fib6_nsiblings = match->fib6_nsiblings;
- list_add_tail(&rt->fib6_siblings, &match->fib6_siblings);
- match->fib6_nsiblings++;
-
/* For each sibling in the list, increment the counter of
* siblings. BUG() if counters does not match, list of siblings
* is broken!
*/
+ fib6_nsiblings = 0;
list_for_each_entry_safe(sibling, temp_sibling,
- &match->fib6_siblings, fib6_siblings) {
+ &rt->fib6_siblings, fib6_siblings) {
sibling->fib6_nsiblings++;
- BUG_ON(sibling->fib6_nsiblings != match->fib6_nsiblings);
+ BUG_ON(sibling->fib6_nsiblings != rt->fib6_nsiblings);
+ fib6_nsiblings++;
}
-
- rt6_multipath_rebalance(match);
+ BUG_ON(fib6_nsiblings != rt->fib6_nsiblings);
+ rt6_multipath_rebalance(temp_sibling);
}
/*
@@ -1043,8 +1067,9 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
add:
nlflags |= NLM_F_CREATE;
- err = call_fib6_entry_notifiers(info->nl_net, event, rt,
- extack);
+ err = call_fib6_entry_notifiers(info->nl_net,
+ FIB_EVENT_ENTRY_ADD,
+ rt, extack);
if (err)
return err;
@@ -1062,7 +1087,7 @@ add:
}
} else {
- struct fib6_info *tmp;
+ int nsiblings;
if (!found) {
if (add)
@@ -1077,57 +1102,48 @@ add:
if (err)
return err;
- /* if route being replaced has siblings, set tmp to
- * last one, otherwise tmp is current route. this is
- * used to set fib6_next for new route
- */
- if (iter->fib6_nsiblings)
- tmp = list_last_entry(&iter->fib6_siblings,
- struct fib6_info,
- fib6_siblings);
- else
- tmp = iter;
-
- /* insert new route */
atomic_inc(&rt->fib6_ref);
rcu_assign_pointer(rt->fib6_node, fn);
- rt->fib6_next = tmp->fib6_next;
+ rt->fib6_next = iter->fib6_next;
rcu_assign_pointer(*ins, rt);
-
if (!info->skip_notify)
inet6_rt_notify(RTM_NEWROUTE, rt, info, NLM_F_REPLACE);
if (!(fn->fn_flags & RTN_RTINFO)) {
info->nl_net->ipv6.rt6_stats->fib_route_nodes++;
fn->fn_flags |= RTN_RTINFO;
}
+ nsiblings = iter->fib6_nsiblings;
+ iter->fib6_node = NULL;
+ fib6_purge_rt(iter, fn, info->nl_net);
+ if (rcu_access_pointer(fn->rr_ptr) == iter)
+ fn->rr_ptr = NULL;
+ fib6_info_release(iter);
- /* delete old route */
- rt = iter;
-
- if (rt->fib6_nsiblings) {
- struct fib6_info *tmp;
-
+ if (nsiblings) {
/* Replacing an ECMP route, remove all siblings */
- list_for_each_entry_safe(iter, tmp, &rt->fib6_siblings,
- fib6_siblings) {
- iter->fib6_node = NULL;
- fib6_purge_rt(iter, fn, info->nl_net);
- if (rcu_access_pointer(fn->rr_ptr) == iter)
- fn->rr_ptr = NULL;
- fib6_info_release(iter);
-
- rt->fib6_nsiblings--;
- info->nl_net->ipv6.rt6_stats->fib_rt_entries--;
+ ins = &rt->fib6_next;
+ iter = rcu_dereference_protected(*ins,
+ lockdep_is_held(&rt->fib6_table->tb6_lock));
+ while (iter) {
+ if (iter->fib6_metric > rt->fib6_metric)
+ break;
+ if (rt6_qualify_for_ecmp(iter)) {
+ *ins = iter->fib6_next;
+ iter->fib6_node = NULL;
+ fib6_purge_rt(iter, fn, info->nl_net);
+ if (rcu_access_pointer(fn->rr_ptr) == iter)
+ fn->rr_ptr = NULL;
+ fib6_info_release(iter);
+ nsiblings--;
+ info->nl_net->ipv6.rt6_stats->fib_rt_entries--;
+ } else {
+ ins = &iter->fib6_next;
+ }
+ iter = rcu_dereference_protected(*ins,
+ lockdep_is_held(&rt->fib6_table->tb6_lock));
}
+ WARN_ON(nsiblings != 0);
}
-
- WARN_ON(rt->fib6_nsiblings != 0);
-
- rt->fib6_node = NULL;
- fib6_purge_rt(rt, fn, info->nl_net);
- if (rcu_access_pointer(fn->rr_ptr) == rt)
- fn->rr_ptr = NULL;
- fib6_info_release(rt);
}
return 0;
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 367177786e34..fc7dd3a04360 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -927,7 +927,6 @@ tx_err:
static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
struct net_device *dev)
{
- struct ipv6hdr *ipv6h = ipv6_hdr(skb);
struct ip6_tnl *t = netdev_priv(dev);
struct dst_entry *dst = skb_dst(skb);
struct net_device_stats *stats;
@@ -1012,6 +1011,8 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
goto tx_err;
}
} else {
+ struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+
switch (skb->protocol) {
case htons(ETH_P_IP):
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index fabe3ba1bddc..c0cac9cc3a28 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -398,6 +398,12 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
case IPV6_DSTOPTS:
{
struct ipv6_txoptions *opt;
+ struct ipv6_opt_hdr *new = NULL;
+
+ /* hop-by-hop / destination options are privileged option */
+ retv = -EPERM;
+ if (optname != IPV6_RTHDR && !ns_capable(net->user_ns, CAP_NET_RAW))
+ break;
/* remove any sticky options header with a zero option
* length, per RFC3542.
@@ -409,17 +415,22 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
else if (optlen < sizeof(struct ipv6_opt_hdr) ||
optlen & 0x7 || optlen > 8 * 255)
goto e_inval;
-
- /* hop-by-hop / destination options are privileged option */
- retv = -EPERM;
- if (optname != IPV6_RTHDR && !ns_capable(net->user_ns, CAP_NET_RAW))
- break;
+ else {
+ new = memdup_user(optval, optlen);
+ if (IS_ERR(new)) {
+ retv = PTR_ERR(new);
+ break;
+ }
+ if (unlikely(ipv6_optlen(new) > optlen)) {
+ kfree(new);
+ goto e_inval;
+ }
+ }
opt = rcu_dereference_protected(np->opt,
lockdep_sock_is_held(sk));
- opt = ipv6_renew_options(sk, opt, optname,
- (struct ipv6_opt_hdr __user *)optval,
- optlen);
+ opt = ipv6_renew_options(sk, opt, optname, new);
+ kfree(new);
if (IS_ERR(opt)) {
retv = PTR_ERR(opt);
break;
@@ -717,8 +728,9 @@ done:
struct sockaddr_in6 *psin6;
psin6 = (struct sockaddr_in6 *)&greqs.gsr_group;
- retv = ipv6_sock_mc_join(sk, greqs.gsr_interface,
- &psin6->sin6_addr);
+ retv = ipv6_sock_mc_join_ssm(sk, greqs.gsr_interface,
+ &psin6->sin6_addr,
+ MCAST_INCLUDE);
/* prior join w/ different source is ok */
if (retv && retv != -EADDRINUSE)
break;
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index c0c74088f2af..2699be7202be 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -95,6 +95,8 @@ static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca,
int delta);
static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml,
struct inet6_dev *idev);
+static int __ipv6_dev_mc_inc(struct net_device *dev,
+ const struct in6_addr *addr, unsigned int mode);
#define MLD_QRV_DEFAULT 2
/* RFC3810, 9.2. Query Interval */
@@ -132,7 +134,8 @@ static int unsolicited_report_interval(struct inet6_dev *idev)
return iv > 0 ? iv : 1;
}
-int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
+static int __ipv6_sock_mc_join(struct sock *sk, int ifindex,
+ const struct in6_addr *addr, unsigned int mode)
{
struct net_device *dev = NULL;
struct ipv6_mc_socklist *mc_lst;
@@ -179,7 +182,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
}
mc_lst->ifindex = dev->ifindex;
- mc_lst->sfmode = MCAST_EXCLUDE;
+ mc_lst->sfmode = mode;
rwlock_init(&mc_lst->sflock);
mc_lst->sflist = NULL;
@@ -187,7 +190,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
* now add/increase the group membership on the device
*/
- err = ipv6_dev_mc_inc(dev, addr);
+ err = __ipv6_dev_mc_inc(dev, addr, mode);
if (err) {
sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
@@ -199,8 +202,19 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
return 0;
}
+
+int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
+{
+ return __ipv6_sock_mc_join(sk, ifindex, addr, MCAST_EXCLUDE);
+}
EXPORT_SYMBOL(ipv6_sock_mc_join);
+int ipv6_sock_mc_join_ssm(struct sock *sk, int ifindex,
+ const struct in6_addr *addr, unsigned int mode)
+{
+ return __ipv6_sock_mc_join(sk, ifindex, addr, mode);
+}
+
/*
* socket leave on multicast group
*/
@@ -646,7 +660,7 @@ bool inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr,
return rv;
}
-static void igmp6_group_added(struct ifmcaddr6 *mc)
+static void igmp6_group_added(struct ifmcaddr6 *mc, unsigned int mode)
{
struct net_device *dev = mc->idev->dev;
char buf[MAX_ADDR_LEN];
@@ -672,7 +686,13 @@ static void igmp6_group_added(struct ifmcaddr6 *mc)
}
/* else v2 */
- mc->mca_crcount = mc->idev->mc_qrv;
+ /* Based on RFC3810 6.1, for newly added INCLUDE SSM, we
+ * should not send filter-mode change record as the mode
+ * should be from IN() to IN(A).
+ */
+ if (mode == MCAST_EXCLUDE)
+ mc->mca_crcount = mc->idev->mc_qrv;
+
mld_ifc_event(mc->idev);
}
@@ -770,13 +790,14 @@ static void mld_del_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im)
spin_lock_bh(&im->mca_lock);
if (pmc) {
im->idev = pmc->idev;
- im->mca_crcount = idev->mc_qrv;
im->mca_sfmode = pmc->mca_sfmode;
if (pmc->mca_sfmode == MCAST_INCLUDE) {
im->mca_tomb = pmc->mca_tomb;
im->mca_sources = pmc->mca_sources;
for (psf = im->mca_sources; psf; psf = psf->sf_next)
- psf->sf_crcount = im->mca_crcount;
+ psf->sf_crcount = idev->mc_qrv;
+ } else {
+ im->mca_crcount = idev->mc_qrv;
}
in6_dev_put(pmc->idev);
kfree(pmc);
@@ -831,7 +852,8 @@ static void ma_put(struct ifmcaddr6 *mc)
}
static struct ifmcaddr6 *mca_alloc(struct inet6_dev *idev,
- const struct in6_addr *addr)
+ const struct in6_addr *addr,
+ unsigned int mode)
{
struct ifmcaddr6 *mc;
@@ -849,9 +871,8 @@ static struct ifmcaddr6 *mca_alloc(struct inet6_dev *idev,
refcount_set(&mc->mca_refcnt, 1);
spin_lock_init(&mc->mca_lock);
- /* initial mode is (EX, empty) */
- mc->mca_sfmode = MCAST_EXCLUDE;
- mc->mca_sfcount[MCAST_EXCLUDE] = 1;
+ mc->mca_sfmode = mode;
+ mc->mca_sfcount[mode] = 1;
if (ipv6_addr_is_ll_all_nodes(&mc->mca_addr) ||
IPV6_ADDR_MC_SCOPE(&mc->mca_addr) < IPV6_ADDR_SCOPE_LINKLOCAL)
@@ -863,7 +884,8 @@ static struct ifmcaddr6 *mca_alloc(struct inet6_dev *idev,
/*
* device multicast group inc (add if not found)
*/
-int ipv6_dev_mc_inc(struct net_device *dev, const struct in6_addr *addr)
+static int __ipv6_dev_mc_inc(struct net_device *dev,
+ const struct in6_addr *addr, unsigned int mode)
{
struct ifmcaddr6 *mc;
struct inet6_dev *idev;
@@ -887,14 +909,13 @@ int ipv6_dev_mc_inc(struct net_device *dev, const struct in6_addr *addr)
if (ipv6_addr_equal(&mc->mca_addr, addr)) {
mc->mca_users++;
write_unlock_bh(&idev->lock);
- ip6_mc_add_src(idev, &mc->mca_addr, MCAST_EXCLUDE, 0,
- NULL, 0);
+ ip6_mc_add_src(idev, &mc->mca_addr, mode, 0, NULL, 0);
in6_dev_put(idev);
return 0;
}
}
- mc = mca_alloc(idev, addr);
+ mc = mca_alloc(idev, addr, mode);
if (!mc) {
write_unlock_bh(&idev->lock);
in6_dev_put(idev);
@@ -911,11 +932,16 @@ int ipv6_dev_mc_inc(struct net_device *dev, const struct in6_addr *addr)
write_unlock_bh(&idev->lock);
mld_del_delrec(idev, mc);
- igmp6_group_added(mc);
+ igmp6_group_added(mc, mode);
ma_put(mc);
return 0;
}
+int ipv6_dev_mc_inc(struct net_device *dev, const struct in6_addr *addr)
+{
+ return __ipv6_dev_mc_inc(dev, addr, MCAST_EXCLUDE);
+}
+
/*
* device multicast group del
*/
@@ -1751,7 +1777,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
psf_next = psf->sf_next;
- if (!is_in(pmc, psf, type, gdeleted, sdeleted)) {
+ if (!is_in(pmc, psf, type, gdeleted, sdeleted) && !crsend) {
psf_prev = psf;
continue;
}
@@ -2066,7 +2092,7 @@ static void mld_send_initial_cr(struct inet6_dev *idev)
if (pmc->mca_sfcount[MCAST_EXCLUDE])
type = MLD2_CHANGE_TO_EXCLUDE;
else
- type = MLD2_CHANGE_TO_INCLUDE;
+ type = MLD2_ALLOW_NEW_SOURCES;
skb = add_grec(skb, pmc, type, 0, 0, 1);
spin_unlock_bh(&pmc->mca_lock);
}
@@ -2546,7 +2572,7 @@ void ipv6_mc_up(struct inet6_dev *idev)
ipv6_mc_reset(idev);
for (i = idev->mc_list; i; i = i->next) {
mld_del_delrec(idev, i);
- igmp6_group_added(i);
+ igmp6_group_added(i, i->mca_sfmode);
}
read_unlock_bh(&idev->lock);
}
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index e640d2f3c55c..0ec273997d1d 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -811,7 +811,7 @@ static void ndisc_recv_ns(struct sk_buff *skb)
return;
}
}
- if (ndopts.nd_opts_nonce)
+ if (ndopts.nd_opts_nonce && ndopts.nd_opts_nonce->nd_opt_len == 1)
memcpy(&nonce, (u8 *)(ndopts.nd_opts_nonce + 1), 6);
inc = ipv6_addr_is_multicast(daddr);
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 7eab959734bc..daf2e9e9193d 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -1909,6 +1909,7 @@ static struct xt_match ip6t_builtin_mt[] __read_mostly = {
.checkentry = icmp6_checkentry,
.proto = IPPROTO_ICMPV6,
.family = NFPROTO_IPV6,
+ .me = THIS_MODULE,
},
};
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index a452d99c9f52..e4d9e6976d3c 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -585,6 +585,8 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
fq->q.meat == fq->q.len &&
nf_ct_frag6_reasm(fq, skb, dev))
ret = 0;
+ else
+ skb_dst_drop(skb);
out_unlock:
spin_unlock_bh(&fq->q.lock);
diff --git a/net/ipv6/netfilter/nf_tproxy_ipv6.c b/net/ipv6/netfilter/nf_tproxy_ipv6.c
index bf1d6c421e3b..5dfd33af6451 100644
--- a/net/ipv6/netfilter/nf_tproxy_ipv6.c
+++ b/net/ipv6/netfilter/nf_tproxy_ipv6.c
@@ -55,7 +55,7 @@ nf_tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff,
* to a listener socket if there's one */
struct sock *sk2;
- sk2 = nf_tproxy_get_sock_v6(net, skb, thoff, hp, tproto,
+ sk2 = nf_tproxy_get_sock_v6(net, skb, thoff, tproto,
&iph->saddr,
nf_tproxy_laddr6(skb, laddr, &iph->daddr),
hp->source,
@@ -72,7 +72,7 @@ nf_tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff,
EXPORT_SYMBOL_GPL(nf_tproxy_handle_time_wait6);
struct sock *
-nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, void *hp,
+nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff,
const u8 protocol,
const struct in6_addr *saddr, const struct in6_addr *daddr,
const __be16 sport, const __be16 dport,
@@ -80,15 +80,20 @@ nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, void *hp,
const enum nf_tproxy_lookup_t lookup_type)
{
struct sock *sk;
- struct tcphdr *tcph;
switch (protocol) {
- case IPPROTO_TCP:
+ case IPPROTO_TCP: {
+ struct tcphdr _hdr, *hp;
+
+ hp = skb_header_pointer(skb, thoff,
+ sizeof(struct tcphdr), &_hdr);
+ if (hp == NULL)
+ return NULL;
+
switch (lookup_type) {
case NF_TPROXY_LOOKUP_LISTENER:
- tcph = hp;
sk = inet6_lookup_listener(net, &tcp_hashinfo, skb,
- thoff + __tcp_hdrlen(tcph),
+ thoff + __tcp_hdrlen(hp),
saddr, sport,
daddr, ntohs(dport),
in->ifindex, 0);
@@ -110,6 +115,7 @@ nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, void *hp,
BUG();
}
break;
+ }
case IPPROTO_UDP:
sk = udp6_lib_lookup(net, saddr, sport, daddr, dport,
in->ifindex);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 86a0e4333d42..2ce0bd17de4f 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -3842,7 +3842,7 @@ static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt)
lockdep_is_held(&rt->fib6_table->tb6_lock));
while (iter) {
if (iter->fib6_metric == rt->fib6_metric &&
- iter->fib6_nsiblings)
+ rt6_qualify_for_ecmp(iter))
return iter;
iter = rcu_dereference_protected(iter->fib6_next,
lockdep_is_held(&rt->fib6_table->tb6_lock));
@@ -4388,6 +4388,13 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
rt = NULL;
goto cleanup;
}
+ if (!rt6_qualify_for_ecmp(rt)) {
+ err = -EINVAL;
+ NL_SET_ERR_MSG(extack,
+ "Device only routes can not be added for IPv6 using the multipath API.");
+ fib6_info_release(rt);
+ goto cleanup;
+ }
rt->fib6_nh.nh_weight = rtnh->rtnh_hops + 1;
@@ -4439,7 +4446,6 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
*/
cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
NLM_F_REPLACE);
- cfg->fc_nlinfo.nlh->nlmsg_flags |= NLM_F_APPEND;
nhn++;
}
diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
index 19ccf0dc996c..a8854dd3e9c5 100644
--- a/net/ipv6/seg6_iptunnel.c
+++ b/net/ipv6/seg6_iptunnel.c
@@ -101,7 +101,7 @@ static __be32 seg6_make_flowlabel(struct net *net, struct sk_buff *skb,
if (do_flowlabel > 0) {
hash = skb_get_hash(skb);
- rol32(hash, 16);
+ hash = rol32(hash, 16);
flowlabel = (__force __be32)hash & IPV6_FLOWLABEL_MASK;
} else if (!do_flowlabel && skb->protocol == htons(ETH_P_IPV6)) {
flowlabel = ip6_flowlabel(inner_hdr);
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index dbd7d1fad277..f0a1c536ef15 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -460,6 +460,13 @@ config NF_TABLES
if NF_TABLES
+config NF_TABLES_SET
+ tristate "Netfilter nf_tables set infrastructure"
+ help
+ This option enables the nf_tables set infrastructure that allows to
+ look up for elements in a set and to build one-way mappings between
+ matchings and actions.
+
config NF_TABLES_INET
depends on IPV6
select NF_TABLES_IPV4
@@ -493,24 +500,6 @@ config NFT_FLOW_OFFLOAD
This option adds the "flow_offload" expression that you can use to
choose what flows are placed into the hardware.
-config NFT_SET_RBTREE
- tristate "Netfilter nf_tables rbtree set module"
- help
- This option adds the "rbtree" set type (Red Black tree) that is used
- to build interval-based sets.
-
-config NFT_SET_HASH
- tristate "Netfilter nf_tables hash set module"
- help
- This option adds the "hash" set type that is used to build one-way
- mappings between matchings and actions.
-
-config NFT_SET_BITMAP
- tristate "Netfilter nf_tables bitmap set module"
- help
- This option adds the "bitmap" set type that is used to build sets
- whose keys are smaller or equal to 16 bits.
-
config NFT_COUNTER
tristate "Netfilter nf_tables counter module"
help
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 44449389e527..8a76dced974d 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -78,7 +78,11 @@ nf_tables-objs := nf_tables_core.o nf_tables_api.o nft_chain_filter.o \
nft_bitwise.o nft_byteorder.o nft_payload.o nft_lookup.o \
nft_dynset.o nft_meta.o nft_rt.o nft_exthdr.o
+nf_tables_set-objs := nf_tables_set_core.o \
+ nft_set_hash.o nft_set_bitmap.o nft_set_rbtree.o
+
obj-$(CONFIG_NF_TABLES) += nf_tables.o
+obj-$(CONFIG_NF_TABLES_SET) += nf_tables_set.o
obj-$(CONFIG_NFT_COMPAT) += nft_compat.o
obj-$(CONFIG_NFT_CONNLIMIT) += nft_connlimit.o
obj-$(CONFIG_NFT_NUMGEN) += nft_numgen.o
@@ -91,9 +95,6 @@ obj-$(CONFIG_NFT_QUEUE) += nft_queue.o
obj-$(CONFIG_NFT_QUOTA) += nft_quota.o
obj-$(CONFIG_NFT_REJECT) += nft_reject.o
obj-$(CONFIG_NFT_REJECT_INET) += nft_reject_inet.o
-obj-$(CONFIG_NFT_SET_RBTREE) += nft_set_rbtree.o
-obj-$(CONFIG_NFT_SET_HASH) += nft_set_hash.o
-obj-$(CONFIG_NFT_SET_BITMAP) += nft_set_bitmap.o
obj-$(CONFIG_NFT_COUNTER) += nft_counter.o
obj-$(CONFIG_NFT_LOG) += nft_log.o
obj-$(CONFIG_NFT_MASQ) += nft_masq.o
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 85ab2fd6a665..805500197c22 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -2078,7 +2078,7 @@ int nf_conntrack_set_hashsize(const char *val, const struct kernel_param *kp)
return -EOPNOTSUPP;
/* On boot, we can set this without any fancy locking. */
- if (!nf_conntrack_htable_size)
+ if (!nf_conntrack_hash)
return param_set_uint(val, kp);
rc = kstrtouint(val, 0, &hashsize);
diff --git a/net/netfilter/nf_tables_set_core.c b/net/netfilter/nf_tables_set_core.c
new file mode 100644
index 000000000000..814789644bd3
--- /dev/null
+++ b/net/netfilter/nf_tables_set_core.c
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <net/netfilter/nf_tables_core.h>
+
+static int __init nf_tables_set_module_init(void)
+{
+ nft_register_set(&nft_set_hash_fast_type);
+ nft_register_set(&nft_set_hash_type);
+ nft_register_set(&nft_set_rhash_type);
+ nft_register_set(&nft_set_bitmap_type);
+ nft_register_set(&nft_set_rbtree_type);
+
+ return 0;
+}
+
+static void __exit nf_tables_set_module_exit(void)
+{
+ nft_unregister_set(&nft_set_rbtree_type);
+ nft_unregister_set(&nft_set_bitmap_type);
+ nft_unregister_set(&nft_set_rhash_type);
+ nft_unregister_set(&nft_set_hash_type);
+ nft_unregister_set(&nft_set_hash_fast_type);
+}
+
+module_init(nf_tables_set_module_init);
+module_exit(nf_tables_set_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NFT_SET();
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index 8d1ff654e5af..32535eea51b2 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -832,10 +832,18 @@ nft_target_select_ops(const struct nft_ctx *ctx,
rev = ntohl(nla_get_be32(tb[NFTA_TARGET_REV]));
family = ctx->family;
+ if (strcmp(tg_name, XT_ERROR_TARGET) == 0 ||
+ strcmp(tg_name, XT_STANDARD_TARGET) == 0 ||
+ strcmp(tg_name, "standard") == 0)
+ return ERR_PTR(-EINVAL);
+
/* Re-use the existing target if it's already loaded. */
list_for_each_entry(nft_target, &nft_target_list, head) {
struct xt_target *target = nft_target->ops.data;
+ if (!target->target)
+ continue;
+
if (nft_target_cmp(target, tg_name, rev, family))
return &nft_target->ops;
}
@@ -844,6 +852,11 @@ nft_target_select_ops(const struct nft_ctx *ctx,
if (IS_ERR(target))
return ERR_PTR(-ENOENT);
+ if (!target->target) {
+ err = -EINVAL;
+ goto err;
+ }
+
if (target->targetsize > nla_len(tb[NFTA_TARGET_INFO])) {
err = -EINVAL;
goto err;
diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c
index d6626e01c7ee..128bc16f52dd 100644
--- a/net/netfilter/nft_set_bitmap.c
+++ b/net/netfilter/nft_set_bitmap.c
@@ -296,7 +296,7 @@ static bool nft_bitmap_estimate(const struct nft_set_desc *desc, u32 features,
return true;
}
-static struct nft_set_type nft_bitmap_type __read_mostly = {
+struct nft_set_type nft_set_bitmap_type __read_mostly = {
.owner = THIS_MODULE,
.ops = {
.privsize = nft_bitmap_privsize,
@@ -314,20 +314,3 @@ static struct nft_set_type nft_bitmap_type __read_mostly = {
.get = nft_bitmap_get,
},
};
-
-static int __init nft_bitmap_module_init(void)
-{
- return nft_register_set(&nft_bitmap_type);
-}
-
-static void __exit nft_bitmap_module_exit(void)
-{
- nft_unregister_set(&nft_bitmap_type);
-}
-
-module_init(nft_bitmap_module_init);
-module_exit(nft_bitmap_module_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
-MODULE_ALIAS_NFT_SET();
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index 6f9a1365a09f..72ef35b51cac 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -654,7 +654,7 @@ static bool nft_hash_fast_estimate(const struct nft_set_desc *desc, u32 features
return true;
}
-static struct nft_set_type nft_rhash_type __read_mostly = {
+struct nft_set_type nft_set_rhash_type __read_mostly = {
.owner = THIS_MODULE,
.features = NFT_SET_MAP | NFT_SET_OBJECT |
NFT_SET_TIMEOUT | NFT_SET_EVAL,
@@ -677,7 +677,7 @@ static struct nft_set_type nft_rhash_type __read_mostly = {
},
};
-static struct nft_set_type nft_hash_type __read_mostly = {
+struct nft_set_type nft_set_hash_type __read_mostly = {
.owner = THIS_MODULE,
.features = NFT_SET_MAP | NFT_SET_OBJECT,
.ops = {
@@ -697,7 +697,7 @@ static struct nft_set_type nft_hash_type __read_mostly = {
},
};
-static struct nft_set_type nft_hash_fast_type __read_mostly = {
+struct nft_set_type nft_set_hash_fast_type __read_mostly = {
.owner = THIS_MODULE,
.features = NFT_SET_MAP | NFT_SET_OBJECT,
.ops = {
@@ -716,26 +716,3 @@ static struct nft_set_type nft_hash_fast_type __read_mostly = {
.get = nft_hash_get,
},
};
-
-static int __init nft_hash_module_init(void)
-{
- if (nft_register_set(&nft_hash_fast_type) ||
- nft_register_set(&nft_hash_type) ||
- nft_register_set(&nft_rhash_type))
- return 1;
- return 0;
-}
-
-static void __exit nft_hash_module_exit(void)
-{
- nft_unregister_set(&nft_rhash_type);
- nft_unregister_set(&nft_hash_type);
- nft_unregister_set(&nft_hash_fast_type);
-}
-
-module_init(nft_hash_module_init);
-module_exit(nft_hash_module_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_ALIAS_NFT_SET();
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index 7f3a9a211034..1f8f257cb518 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -462,7 +462,7 @@ static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features,
return true;
}
-static struct nft_set_type nft_rbtree_type __read_mostly = {
+struct nft_set_type nft_set_rbtree_type __read_mostly = {
.owner = THIS_MODULE,
.features = NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT | NFT_SET_TIMEOUT,
.ops = {
@@ -481,20 +481,3 @@ static struct nft_set_type nft_rbtree_type __read_mostly = {
.get = nft_rbtree_get,
},
};
-
-static int __init nft_rbtree_module_init(void)
-{
- return nft_register_set(&nft_rbtree_type);
-}
-
-static void __exit nft_rbtree_module_exit(void)
-{
- nft_unregister_set(&nft_rbtree_type);
-}
-
-module_init(nft_rbtree_module_init);
-module_exit(nft_rbtree_module_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_ALIAS_NFT_SET();
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index 58fce4e749a9..d76550a8b642 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -61,7 +61,7 @@ tproxy_tg4(struct net *net, struct sk_buff *skb, __be32 laddr, __be16 lport,
* addresses, this happens if the redirect already happened
* and the current packet belongs to an already established
* connection */
- sk = nf_tproxy_get_sock_v4(net, skb, hp, iph->protocol,
+ sk = nf_tproxy_get_sock_v4(net, skb, iph->protocol,
iph->saddr, iph->daddr,
hp->source, hp->dest,
skb->dev, NF_TPROXY_LOOKUP_ESTABLISHED);
@@ -77,7 +77,7 @@ tproxy_tg4(struct net *net, struct sk_buff *skb, __be32 laddr, __be16 lport,
else if (!sk)
/* no, there's no established connection, check if
* there's a listener on the redirected addr/port */
- sk = nf_tproxy_get_sock_v4(net, skb, hp, iph->protocol,
+ sk = nf_tproxy_get_sock_v4(net, skb, iph->protocol,
iph->saddr, laddr,
hp->source, lport,
skb->dev, NF_TPROXY_LOOKUP_LISTENER);
@@ -150,7 +150,7 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par)
* addresses, this happens if the redirect already happened
* and the current packet belongs to an already established
* connection */
- sk = nf_tproxy_get_sock_v6(xt_net(par), skb, thoff, hp, tproto,
+ sk = nf_tproxy_get_sock_v6(xt_net(par), skb, thoff, tproto,
&iph->saddr, &iph->daddr,
hp->source, hp->dest,
xt_in(par), NF_TPROXY_LOOKUP_ESTABLISHED);
@@ -171,7 +171,7 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par)
else if (!sk)
/* no there's no established connection, check if
* there's a listener on the redirected addr/port */
- sk = nf_tproxy_get_sock_v6(xt_net(par), skb, thoff, hp,
+ sk = nf_tproxy_get_sock_v6(xt_net(par), skb, thoff,
tproto, &iph->saddr, laddr,
hp->source, lport,
xt_in(par), NF_TPROXY_LOOKUP_LISTENER);
diff --git a/net/nfc/llcp_commands.c b/net/nfc/llcp_commands.c
index 2ceefa183cee..6a196e438b6c 100644
--- a/net/nfc/llcp_commands.c
+++ b/net/nfc/llcp_commands.c
@@ -752,11 +752,14 @@ int nfc_llcp_send_ui_frame(struct nfc_llcp_sock *sock, u8 ssap, u8 dsap,
pr_debug("Fragment %zd bytes remaining %zd",
frag_len, remaining_len);
- pdu = nfc_alloc_send_skb(sock->dev, &sock->sk, MSG_DONTWAIT,
+ pdu = nfc_alloc_send_skb(sock->dev, &sock->sk, 0,
frag_len + LLCP_HEADER_SIZE, &err);
if (pdu == NULL) {
- pr_err("Could not allocate PDU\n");
- continue;
+ pr_err("Could not allocate PDU (error=%d)\n", err);
+ len -= remaining_len;
+ if (len == 0)
+ len = err;
+ break;
}
pdu = llcp_add_header(pdu, dsap, ssap, LLCP_PDU_UI);
diff --git a/net/nsh/nsh.c b/net/nsh/nsh.c
index 9696ef96b719..1a30e165eeb4 100644
--- a/net/nsh/nsh.c
+++ b/net/nsh/nsh.c
@@ -104,7 +104,7 @@ static struct sk_buff *nsh_gso_segment(struct sk_buff *skb,
__skb_pull(skb, nsh_len);
skb_reset_mac_header(skb);
- skb_reset_mac_len(skb);
+ skb->mac_len = proto == htons(ETH_P_TEB) ? ETH_HLEN : 0;
skb->protocol = proto;
features &= NETIF_F_SG;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 00189a3b07f2..e3e00d3a972e 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2881,6 +2881,8 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
goto out_free;
} else if (reserve) {
skb_reserve(skb, -reserve);
+ if (len < reserve)
+ skb_reset_network_header(skb);
}
/* Returns -EFAULT on error */
diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c
index 2aa07b547b16..86e1e37eb4e8 100644
--- a/net/qrtr/qrtr.c
+++ b/net/qrtr/qrtr.c
@@ -191,8 +191,13 @@ static int qrtr_node_enqueue(struct qrtr_node *node, struct sk_buff *skb,
hdr->type = cpu_to_le32(type);
hdr->src_node_id = cpu_to_le32(from->sq_node);
hdr->src_port_id = cpu_to_le32(from->sq_port);
- hdr->dst_node_id = cpu_to_le32(to->sq_node);
- hdr->dst_port_id = cpu_to_le32(to->sq_port);
+ if (to->sq_port == QRTR_PORT_CTRL) {
+ hdr->dst_node_id = cpu_to_le32(node->nid);
+ hdr->dst_port_id = cpu_to_le32(QRTR_NODE_BCAST);
+ } else {
+ hdr->dst_node_id = cpu_to_le32(to->sq_node);
+ hdr->dst_port_id = cpu_to_le32(to->sq_port);
+ }
hdr->size = cpu_to_le32(len);
hdr->confirm_rx = 0;
@@ -764,6 +769,10 @@ static int qrtr_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
node = NULL;
if (addr->sq_node == QRTR_NODE_BCAST) {
enqueue_fn = qrtr_bcast_enqueue;
+ if (addr->sq_port != QRTR_PORT_CTRL) {
+ release_sock(sk);
+ return -ENOTCONN;
+ }
} else if (addr->sq_node == ipc->us.sq_node) {
enqueue_fn = qrtr_local_enqueue;
} else {
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index bd232d3bd022..4e8c383f379e 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -97,7 +97,7 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla,
}
params_old = rtnl_dereference(p->params);
- params_new->action = parm->action;
+ p->tcf_action = parm->action;
params_new->update_flags = parm->update_flags;
rcu_assign_pointer(p->params, params_new);
if (params_old)
@@ -567,7 +567,7 @@ static int tcf_csum(struct sk_buff *skb, const struct tc_action *a,
tcf_lastuse_update(&p->tcf_tm);
bstats_cpu_update(this_cpu_ptr(p->common.cpu_bstats), skb);
- action = params->action;
+ action = READ_ONCE(p->tcf_action);
if (unlikely(action == TC_ACT_SHOT))
goto drop_stats;
@@ -605,11 +605,11 @@ static int tcf_csum_dump(struct sk_buff *skb, struct tc_action *a, int bind,
.index = p->tcf_index,
.refcnt = refcount_read(&p->tcf_refcnt) - ref,
.bindcnt = atomic_read(&p->tcf_bindcnt) - bind,
+ .action = p->tcf_action,
};
struct tcf_t t;
params = rtnl_dereference(p->params);
- opt.action = params->action;
opt.update_flags = params->update_flags;
if (nla_put(skb, TCA_CSUM_PARMS, sizeof(opt), &opt))
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index 22f26e9ea8f1..f811850fd1d0 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -37,7 +37,7 @@ static int tunnel_key_act(struct sk_buff *skb, const struct tc_action *a,
tcf_lastuse_update(&t->tcf_tm);
bstats_cpu_update(this_cpu_ptr(t->common.cpu_bstats), skb);
- action = params->action;
+ action = READ_ONCE(t->tcf_action);
switch (params->tcft_action) {
case TCA_TUNNEL_KEY_ACT_RELEASE:
@@ -360,7 +360,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
params_old = rtnl_dereference(t->params);
- params_new->action = parm->action;
+ t->tcf_action = parm->action;
params_new->tcft_action = parm->t_action;
params_new->tcft_enc_metadata = metadata;
@@ -489,13 +489,13 @@ static int tunnel_key_dump(struct sk_buff *skb, struct tc_action *a,
.index = t->tcf_index,
.refcnt = refcount_read(&t->tcf_refcnt) - ref,
.bindcnt = atomic_read(&t->tcf_bindcnt) - bind,
+ .action = t->tcf_action,
};
struct tcf_t tm;
params = rtnl_dereference(t->params);
opt.t_action = params->tcft_action;
- opt.action = params->action;
if (nla_put(skb, TCA_TUNNEL_KEY_PARMS, sizeof(opt), &opt))
goto nla_put_failure;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index c51b1b12450d..623fe2cfe529 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -1098,7 +1098,7 @@ static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb,
for (tp = rtnl_dereference(chain->filter_chain);
tp; tp = rtnl_dereference(tp->next))
tfilter_notify(net, oskb, n, tp, block,
- q, parent, 0, event, false);
+ q, parent, NULL, event, false);
}
static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
@@ -1489,7 +1489,7 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
memset(&cb->args[1], 0,
sizeof(cb->args) - sizeof(cb->args[0]));
if (cb->args[1] == 0) {
- if (tcf_fill_node(net, skb, tp, block, q, parent, 0,
+ if (tcf_fill_node(net, skb, tp, block, q, parent, NULL,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
RTM_NEWTFILTER) <= 0)
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index cd2e0e342fb6..6c0a9d5dbf94 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -479,24 +479,28 @@ static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt,
q->cparams.mtu = psched_mtu(qdisc_dev(sch));
if (opt) {
- int err = fq_codel_change(sch, opt, extack);
+ err = fq_codel_change(sch, opt, extack);
if (err)
- return err;
+ goto init_failure;
}
err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
if (err)
- return err;
+ goto init_failure;
if (!q->flows) {
q->flows = kvcalloc(q->flows_cnt,
sizeof(struct fq_codel_flow),
GFP_KERNEL);
- if (!q->flows)
- return -ENOMEM;
+ if (!q->flows) {
+ err = -ENOMEM;
+ goto init_failure;
+ }
q->backlogs = kvcalloc(q->flows_cnt, sizeof(u32), GFP_KERNEL);
- if (!q->backlogs)
- return -ENOMEM;
+ if (!q->backlogs) {
+ err = -ENOMEM;
+ goto alloc_failure;
+ }
for (i = 0; i < q->flows_cnt; i++) {
struct fq_codel_flow *flow = q->flows + i;
@@ -509,6 +513,13 @@ static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt,
else
sch->flags &= ~TCQ_F_CAN_BYPASS;
return 0;
+
+alloc_failure:
+ kvfree(q->flows);
+ q->flows = NULL;
+init_failure:
+ q->flows_cnt = 0;
+ return err;
}
static int fq_codel_dump(struct Qdisc *sch, struct sk_buff *skb)
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 445b7ef61677..12cac85da994 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -282,7 +282,7 @@ bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu)
if (dst) {
/* Re-fetch, as under layers may have a higher minimum size */
- pmtu = SCTP_TRUNC4(dst_mtu(dst));
+ pmtu = sctp_dst_mtu(dst);
change = t->pathmtu != pmtu;
}
t->pathmtu = pmtu;
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index f3fdf3714f8b..143b2220c0c8 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -149,7 +149,8 @@ static int smc_release(struct socket *sock)
smc->clcsock = NULL;
}
if (smc->use_fallback) {
- sock_put(sk); /* passive closing */
+ if (sk->sk_state != SMC_LISTEN && sk->sk_state != SMC_INIT)
+ sock_put(sk); /* passive closing */
sk->sk_state = SMC_CLOSED;
sk->sk_state_change(sk);
}
@@ -441,12 +442,18 @@ static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code)
{
int rc;
- if (reason_code < 0) /* error, fallback is not possible */
+ if (reason_code < 0) { /* error, fallback is not possible */
+ if (smc->sk.sk_state == SMC_INIT)
+ sock_put(&smc->sk); /* passive closing */
return reason_code;
+ }
if (reason_code != SMC_CLC_DECL_REPLY) {
rc = smc_clc_send_decline(smc, reason_code);
- if (rc < 0)
+ if (rc < 0) {
+ if (smc->sk.sk_state == SMC_INIT)
+ sock_put(&smc->sk); /* passive closing */
return rc;
+ }
}
return smc_connect_fallback(smc);
}
@@ -459,8 +466,6 @@ static int smc_connect_abort(struct smc_sock *smc, int reason_code,
smc_lgr_forget(smc->conn.lgr);
mutex_unlock(&smc_create_lgr_pending);
smc_conn_free(&smc->conn);
- if (reason_code < 0 && smc->sk.sk_state == SMC_INIT)
- sock_put(&smc->sk); /* passive closing */
return reason_code;
}
@@ -1527,6 +1532,8 @@ static __poll_t smc_poll(struct file *file, struct socket *sock,
if (sk->sk_err)
mask |= EPOLLERR;
} else {
+ if (sk->sk_state != SMC_CLOSED)
+ sock_poll_wait(file, sk_sleep(sk), wait);
if (sk->sk_err)
mask |= EPOLLERR;
if ((sk->sk_shutdown == SHUTDOWN_MASK) ||
@@ -1552,7 +1559,6 @@ static __poll_t smc_poll(struct file *file, struct socket *sock,
}
if (smc->conn.urg_state == SMC_URG_VALID)
mask |= EPOLLPRI;
-
}
return mask;
@@ -1633,7 +1639,8 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
if (optlen < sizeof(int))
return -EINVAL;
- get_user(val, (int __user *)optval);
+ if (get_user(val, (int __user *)optval))
+ return -EFAULT;
lock_sock(sk);
switch (optname) {
@@ -1701,10 +1708,13 @@ static int smc_ioctl(struct socket *sock, unsigned int cmd,
return -EBADF;
return smc->clcsock->ops->ioctl(smc->clcsock, cmd, arg);
}
+ lock_sock(&smc->sk);
switch (cmd) {
case SIOCINQ: /* same as FIONREAD */
- if (smc->sk.sk_state == SMC_LISTEN)
+ if (smc->sk.sk_state == SMC_LISTEN) {
+ release_sock(&smc->sk);
return -EINVAL;
+ }
if (smc->sk.sk_state == SMC_INIT ||
smc->sk.sk_state == SMC_CLOSED)
answ = 0;
@@ -1713,8 +1723,10 @@ static int smc_ioctl(struct socket *sock, unsigned int cmd,
break;
case SIOCOUTQ:
/* output queue size (not send + not acked) */
- if (smc->sk.sk_state == SMC_LISTEN)
+ if (smc->sk.sk_state == SMC_LISTEN) {
+ release_sock(&smc->sk);
return -EINVAL;
+ }
if (smc->sk.sk_state == SMC_INIT ||
smc->sk.sk_state == SMC_CLOSED)
answ = 0;
@@ -1724,8 +1736,10 @@ static int smc_ioctl(struct socket *sock, unsigned int cmd,
break;
case SIOCOUTQNSD:
/* output queue size (not send only) */
- if (smc->sk.sk_state == SMC_LISTEN)
+ if (smc->sk.sk_state == SMC_LISTEN) {
+ release_sock(&smc->sk);
return -EINVAL;
+ }
if (smc->sk.sk_state == SMC_INIT ||
smc->sk.sk_state == SMC_CLOSED)
answ = 0;
@@ -1733,8 +1747,10 @@ static int smc_ioctl(struct socket *sock, unsigned int cmd,
answ = smc_tx_prepared_sends(&smc->conn);
break;
case SIOCATMARK:
- if (smc->sk.sk_state == SMC_LISTEN)
+ if (smc->sk.sk_state == SMC_LISTEN) {
+ release_sock(&smc->sk);
return -EINVAL;
+ }
if (smc->sk.sk_state == SMC_INIT ||
smc->sk.sk_state == SMC_CLOSED) {
answ = 0;
@@ -1750,8 +1766,10 @@ static int smc_ioctl(struct socket *sock, unsigned int cmd,
}
break;
default:
+ release_sock(&smc->sk);
return -ENOIOCTLCMD;
}
+ release_sock(&smc->sk);
return put_user(answ, (int __user *)arg);
}
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index 038d70ef7892..ad39efdb4f1c 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -267,6 +267,7 @@ out:
int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
u8 expected_type)
{
+ long rcvtimeo = smc->clcsock->sk->sk_rcvtimeo;
struct sock *clc_sk = smc->clcsock->sk;
struct smc_clc_msg_hdr *clcm = buf;
struct msghdr msg = {NULL, 0};
@@ -326,7 +327,6 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
memset(&msg, 0, sizeof(struct msghdr));
iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &vec, 1, datlen);
krflags = MSG_WAITALL;
- smc->clcsock->sk->sk_rcvtimeo = CLC_WAIT_TIME;
len = sock_recvmsg(smc->clcsock, &msg, krflags);
if (len < datlen || !smc_clc_msg_hdr_valid(clcm)) {
smc->sk.sk_err = EPROTO;
@@ -342,6 +342,7 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
}
out:
+ smc->clcsock->sk->sk_rcvtimeo = rcvtimeo;
return reason_code;
}
diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c
index fa41d9881741..ac961dfb1ea1 100644
--- a/net/smc/smc_close.c
+++ b/net/smc/smc_close.c
@@ -107,6 +107,8 @@ static void smc_close_active_abort(struct smc_sock *smc)
}
switch (sk->sk_state) {
case SMC_INIT:
+ sk->sk_state = SMC_PEERABORTWAIT;
+ break;
case SMC_ACTIVE:
sk->sk_state = SMC_PEERABORTWAIT;
release_sock(sk);
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index 9f666e0650e2..2830709957bd 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -133,6 +133,8 @@ static void disc_dupl_alert(struct tipc_bearer *b, u32 node_addr,
}
/* tipc_disc_addr_trial(): - handle an address uniqueness trial from peer
+ * Returns true if message should be dropped by caller, i.e., if it is a
+ * trial message or we are inside trial period. Otherwise false.
*/
static bool tipc_disc_addr_trial_msg(struct tipc_discoverer *d,
struct tipc_media_addr *maddr,
@@ -168,8 +170,9 @@ static bool tipc_disc_addr_trial_msg(struct tipc_discoverer *d,
msg_set_type(buf_msg(d->skb), DSC_REQ_MSG);
}
+ /* Accept regular link requests/responses only after trial period */
if (mtyp != DSC_TRIAL_MSG)
- return false;
+ return trial;
sugg_addr = tipc_node_try_addr(net, peer_id, src);
if (sugg_addr)
@@ -284,7 +287,6 @@ static void tipc_disc_timeout(struct timer_list *t)
{
struct tipc_discoverer *d = from_timer(d, t, timer);
struct tipc_net *tn = tipc_net(d->net);
- u32 self = tipc_own_addr(d->net);
struct tipc_media_addr maddr;
struct sk_buff *skb = NULL;
struct net *net = d->net;
@@ -298,12 +300,14 @@ static void tipc_disc_timeout(struct timer_list *t)
goto exit;
}
- /* Did we just leave the address trial period ? */
- if (!self && !time_before(jiffies, tn->addr_trial_end)) {
- self = tn->trial_addr;
- tipc_net_finalize(net, self);
- msg_set_prevnode(buf_msg(d->skb), self);
+ /* Trial period over ? */
+ if (!time_before(jiffies, tn->addr_trial_end)) {
+ /* Did we just leave it ? */
+ if (!tipc_own_addr(net))
+ tipc_net_finalize(net, tn->trial_addr);
+
msg_set_type(buf_msg(d->skb), DSC_REQ_MSG);
+ msg_set_prevnode(buf_msg(d->skb), tipc_own_addr(net));
}
/* Adjust timeout interval according to discovery phase */
diff --git a/net/tipc/net.c b/net/tipc/net.c
index 4fbaa0464405..a7f6964c3a4b 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -121,12 +121,17 @@ int tipc_net_init(struct net *net, u8 *node_id, u32 addr)
void tipc_net_finalize(struct net *net, u32 addr)
{
- tipc_set_node_addr(net, addr);
- smp_mb();
- tipc_named_reinit(net);
- tipc_sk_reinit(net);
- tipc_nametbl_publish(net, TIPC_CFG_SRV, addr, addr,
- TIPC_CLUSTER_SCOPE, 0, addr);
+ struct tipc_net *tn = tipc_net(net);
+
+ spin_lock_bh(&tn->node_list_lock);
+ if (!tipc_own_addr(net)) {
+ tipc_set_node_addr(net, addr);
+ tipc_named_reinit(net);
+ tipc_sk_reinit(net);
+ tipc_nametbl_publish(net, TIPC_CFG_SRV, addr, addr,
+ TIPC_CLUSTER_SCOPE, 0, addr);
+ }
+ spin_unlock_bh(&tn->node_list_lock);
}
void tipc_net_stop(struct net *net)
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 3819ab14e073..68014f1b6976 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -862,6 +862,7 @@ static u32 tipc_node_suggest_addr(struct net *net, u32 addr)
}
/* tipc_node_try_addr(): Check if addr can be used by peer, suggest other if not
+ * Returns suggested address if any, otherwise 0
*/
u32 tipc_node_try_addr(struct net *net, u8 *id, u32 addr)
{
@@ -884,12 +885,14 @@ u32 tipc_node_try_addr(struct net *net, u8 *id, u32 addr)
if (n) {
addr = n->addr;
tipc_node_put(n);
+ return addr;
}
- /* Even this node may be in trial phase */
+
+ /* Even this node may be in conflict */
if (tn->trial_addr == addr)
return tipc_node_suggest_addr(net, addr);
- return addr;
+ return 0;
}
void tipc_node_check_dest(struct net *net, u32 addr,
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 7d194c0cd6cf..0c2d029c9d4c 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -425,7 +425,7 @@ alloc_encrypted:
ret = tls_push_record(sk, msg->msg_flags, record_type);
if (!ret)
continue;
- if (ret == -EAGAIN)
+ if (ret < 0)
goto send_end;
copied -= try_to_copy;
@@ -716,6 +716,10 @@ int decrypt_skb(struct sock *sk, struct sk_buff *skb,
nsg = skb_to_sgvec(skb, &sgin[1],
rxm->offset + tls_ctx->rx.prepend_size,
rxm->full_len - tls_ctx->rx.prepend_size);
+ if (nsg < 0) {
+ ret = nsg;
+ goto out;
+ }
tls_make_aad(ctx->rx_aad_ciphertext,
rxm->full_len - tls_ctx->rx.overhead_size,
@@ -727,6 +731,7 @@ int decrypt_skb(struct sock *sk, struct sk_buff *skb,
rxm->full_len - tls_ctx->rx.overhead_size,
skb, sk->sk_allocation);
+out:
if (sgin != &sgin_arr[0])
kfree(sgin);
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 59fb7d3c36a3..72335c2e8108 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -199,8 +199,11 @@ static void xsk_destruct_skb(struct sk_buff *skb)
{
u64 addr = (u64)(long)skb_shinfo(skb)->destructor_arg;
struct xdp_sock *xs = xdp_sk(skb->sk);
+ unsigned long flags;
+ spin_lock_irqsave(&xs->tx_completion_lock, flags);
WARN_ON_ONCE(xskq_produce_addr(xs->umem->cq, addr));
+ spin_unlock_irqrestore(&xs->tx_completion_lock, flags);
sock_wfree(skb);
}
@@ -215,9 +218,6 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
struct sk_buff *skb;
int err = 0;
- if (unlikely(!xs->tx))
- return -ENOBUFS;
-
mutex_lock(&xs->mutex);
while (xskq_peek_desc(xs->tx, &desc)) {
@@ -230,22 +230,13 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
goto out;
}
- if (xskq_reserve_addr(xs->umem->cq)) {
- err = -EAGAIN;
- goto out;
- }
-
- len = desc.len;
- if (unlikely(len > xs->dev->mtu)) {
- err = -EMSGSIZE;
+ if (xskq_reserve_addr(xs->umem->cq))
goto out;
- }
- if (xs->queue_id >= xs->dev->real_num_tx_queues) {
- err = -ENXIO;
+ if (xs->queue_id >= xs->dev->real_num_tx_queues)
goto out;
- }
+ len = desc.len;
skb = sock_alloc_send_skb(sk, len, 1, &err);
if (unlikely(!skb)) {
err = -EAGAIN;
@@ -268,15 +259,15 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
skb->destructor = xsk_destruct_skb;
err = dev_direct_xmit(skb, xs->queue_id);
+ xskq_discard_desc(xs->tx);
/* Ignore NET_XMIT_CN as packet might have been sent */
if (err == NET_XMIT_DROP || err == NETDEV_TX_BUSY) {
- err = -EAGAIN;
- /* SKB consumed by dev_direct_xmit() */
+ /* SKB completed but not sent */
+ err = -EBUSY;
goto out;
}
sent_frame = true;
- xskq_discard_desc(xs->tx);
}
out:
@@ -297,6 +288,8 @@ static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
return -ENXIO;
if (unlikely(!(xs->dev->flags & IFF_UP)))
return -ENETDOWN;
+ if (unlikely(!xs->tx))
+ return -ENOBUFS;
if (need_wait)
return -EOPNOTSUPP;
@@ -755,6 +748,7 @@ static int xsk_create(struct net *net, struct socket *sock, int protocol,
xs = xdp_sk(sk);
mutex_init(&xs->mutex);
+ spin_lock_init(&xs->tx_completion_lock);
local_bh_disable();
sock_prot_inuse_add(net, &xsk_proto, 1);
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index ef6a6f0ec949..52ecaf770642 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -62,14 +62,9 @@ static inline u32 xskq_nb_avail(struct xsk_queue *q, u32 dcnt)
return (entries > dcnt) ? dcnt : entries;
}
-static inline u32 xskq_nb_free_lazy(struct xsk_queue *q, u32 producer)
-{
- return q->nentries - (producer - q->cons_tail);
-}
-
static inline u32 xskq_nb_free(struct xsk_queue *q, u32 producer, u32 dcnt)
{
- u32 free_entries = xskq_nb_free_lazy(q, producer);
+ u32 free_entries = q->nentries - (producer - q->cons_tail);
if (free_entries >= dcnt)
return free_entries;
@@ -129,7 +124,7 @@ static inline int xskq_produce_addr(struct xsk_queue *q, u64 addr)
{
struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring;
- if (xskq_nb_free(q, q->prod_tail, LAZY_UPDATE_THRESHOLD) == 0)
+ if (xskq_nb_free(q, q->prod_tail, 1) == 0)
return -ENOSPC;
ring->desc[q->prod_tail++ & q->ring_mask] = addr;