summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--net/bridge/br_netfilter_hooks.c6
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c4
-rw-r--r--net/netfilter/nf_flow_table_offload.c2
-rw-r--r--net/netfilter/nf_tables_api.c147
-rw-r--r--net/netfilter/nfnetlink.c5
-rw-r--r--net/netfilter/nfnetlink_queue.c35
-rw-r--r--tools/testing/selftests/net/netfilter/Makefile1
-rwxr-xr-xtools/testing/selftests/net/netfilter/br_netfilter_queue.sh78
8 files changed, 228 insertions, 50 deletions
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 09f6a773a708..8f9c19d992ac 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -622,8 +622,12 @@ static unsigned int br_nf_local_in(void *priv,
if (likely(nf_ct_is_confirmed(ct)))
return NF_ACCEPT;
+ if (WARN_ON_ONCE(refcount_read(&nfct->use) != 1)) {
+ nf_reset_ct(skb);
+ return NF_ACCEPT;
+ }
+
WARN_ON_ONCE(skb_shared(skb));
- WARN_ON_ONCE(refcount_read(&nfct->use) != 1);
/* We can't call nf_confirm here, it would create a dependency
* on nf_conntrack module.
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 6f0844c9315d..4120e67a8ce6 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -154,6 +154,10 @@ static struct frag_queue *fq_find(struct net *net, __be32 id, u32 user,
};
struct inet_frag_queue *q;
+ if (!(ipv6_addr_type(&hdr->daddr) & (IPV6_ADDR_MULTICAST |
+ IPV6_ADDR_LINKLOCAL)))
+ key.iif = 0;
+
q = inet_frag_find(nf_frag->fqdir, &key);
if (!q)
return NULL;
diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
index ff1a4e36c2b5..e06bc36f49fe 100644
--- a/net/netfilter/nf_flow_table_offload.c
+++ b/net/netfilter/nf_flow_table_offload.c
@@ -841,8 +841,8 @@ static int nf_flow_offload_tuple(struct nf_flowtable *flowtable,
struct list_head *block_cb_list)
{
struct flow_cls_offload cls_flow = {};
+ struct netlink_ext_ack extack = {};
struct flow_block_cb *block_cb;
- struct netlink_ext_ack extack;
__be16 proto = ETH_P_ALL;
int err, i = 0;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 481ee78e77bc..0a2f79346958 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -8020,6 +8020,19 @@ cont:
return skb->len;
}
+static int nf_tables_dumpreset_obj(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ struct nftables_pernet *nft_net = nft_pernet(sock_net(skb->sk));
+ int ret;
+
+ mutex_lock(&nft_net->commit_mutex);
+ ret = nf_tables_dump_obj(skb, cb);
+ mutex_unlock(&nft_net->commit_mutex);
+
+ return ret;
+}
+
static int nf_tables_dump_obj_start(struct netlink_callback *cb)
{
struct nft_obj_dump_ctx *ctx = (void *)cb->ctx;
@@ -8036,12 +8049,18 @@ static int nf_tables_dump_obj_start(struct netlink_callback *cb)
if (nla[NFTA_OBJ_TYPE])
ctx->type = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
- if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET)
- ctx->reset = true;
-
return 0;
}
+static int nf_tables_dumpreset_obj_start(struct netlink_callback *cb)
+{
+ struct nft_obj_dump_ctx *ctx = (void *)cb->ctx;
+
+ ctx->reset = true;
+
+ return nf_tables_dump_obj_start(cb);
+}
+
static int nf_tables_dump_obj_done(struct netlink_callback *cb)
{
struct nft_obj_dump_ctx *ctx = (void *)cb->ctx;
@@ -8052,8 +8071,9 @@ static int nf_tables_dump_obj_done(struct netlink_callback *cb)
}
/* called with rcu_read_lock held */
-static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info,
- const struct nlattr * const nla[])
+static struct sk_buff *
+nf_tables_getobj_single(u32 portid, const struct nfnl_info *info,
+ const struct nlattr * const nla[], bool reset)
{
struct netlink_ext_ack *extack = info->extack;
u8 genmask = nft_genmask_cur(info->net);
@@ -8062,72 +8082,109 @@ static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info,
struct net *net = info->net;
struct nft_object *obj;
struct sk_buff *skb2;
- bool reset = false;
u32 objtype;
int err;
- if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
- struct netlink_dump_control c = {
- .start = nf_tables_dump_obj_start,
- .dump = nf_tables_dump_obj,
- .done = nf_tables_dump_obj_done,
- .module = THIS_MODULE,
- .data = (void *)nla,
- };
-
- return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c);
- }
-
if (!nla[NFTA_OBJ_NAME] ||
!nla[NFTA_OBJ_TYPE])
- return -EINVAL;
+ return ERR_PTR(-EINVAL);
table = nft_table_lookup(net, nla[NFTA_OBJ_TABLE], family, genmask, 0);
if (IS_ERR(table)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_TABLE]);
- return PTR_ERR(table);
+ return ERR_CAST(table);
}
objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
obj = nft_obj_lookup(net, table, nla[NFTA_OBJ_NAME], objtype, genmask);
if (IS_ERR(obj)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_NAME]);
- return PTR_ERR(obj);
+ return ERR_CAST(obj);
}
skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
if (!skb2)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
- if (NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET)
- reset = true;
+ err = nf_tables_fill_obj_info(skb2, net, portid,
+ info->nlh->nlmsg_seq, NFT_MSG_NEWOBJ, 0,
+ family, table, obj, reset);
+ if (err < 0) {
+ kfree_skb(skb2);
+ return ERR_PTR(err);
+ }
- if (reset) {
- const struct nftables_pernet *nft_net;
- char *buf;
+ return skb2;
+}
- nft_net = nft_pernet(net);
- buf = kasprintf(GFP_ATOMIC, "%s:%u", table->name, nft_net->base_seq);
+static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info,
+ const struct nlattr * const nla[])
+{
+ u32 portid = NETLINK_CB(skb).portid;
+ struct sk_buff *skb2;
- audit_log_nfcfg(buf,
- family,
- 1,
- AUDIT_NFT_OP_OBJ_RESET,
- GFP_ATOMIC);
- kfree(buf);
+ if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
+ struct netlink_dump_control c = {
+ .start = nf_tables_dump_obj_start,
+ .dump = nf_tables_dump_obj,
+ .done = nf_tables_dump_obj_done,
+ .module = THIS_MODULE,
+ .data = (void *)nla,
+ };
+
+ return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c);
}
- err = nf_tables_fill_obj_info(skb2, net, NETLINK_CB(skb).portid,
- info->nlh->nlmsg_seq, NFT_MSG_NEWOBJ, 0,
- family, table, obj, reset);
- if (err < 0)
- goto err_fill_obj_info;
+ skb2 = nf_tables_getobj_single(portid, info, nla, false);
+ if (IS_ERR(skb2))
+ return PTR_ERR(skb2);
- return nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid);
+ return nfnetlink_unicast(skb2, info->net, portid);
+}
-err_fill_obj_info:
- kfree_skb(skb2);
- return err;
+static int nf_tables_getobj_reset(struct sk_buff *skb,
+ const struct nfnl_info *info,
+ const struct nlattr * const nla[])
+{
+ struct nftables_pernet *nft_net = nft_pernet(info->net);
+ u32 portid = NETLINK_CB(skb).portid;
+ struct net *net = info->net;
+ struct sk_buff *skb2;
+ char *buf;
+
+ if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
+ struct netlink_dump_control c = {
+ .start = nf_tables_dumpreset_obj_start,
+ .dump = nf_tables_dumpreset_obj,
+ .done = nf_tables_dump_obj_done,
+ .module = THIS_MODULE,
+ .data = (void *)nla,
+ };
+
+ return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c);
+ }
+
+ if (!try_module_get(THIS_MODULE))
+ return -EINVAL;
+ rcu_read_unlock();
+ mutex_lock(&nft_net->commit_mutex);
+ skb2 = nf_tables_getobj_single(portid, info, nla, true);
+ mutex_unlock(&nft_net->commit_mutex);
+ rcu_read_lock();
+ module_put(THIS_MODULE);
+
+ if (IS_ERR(skb2))
+ return PTR_ERR(skb2);
+
+ buf = kasprintf(GFP_ATOMIC, "%.*s:%u",
+ nla_len(nla[NFTA_OBJ_TABLE]),
+ (char *)nla_data(nla[NFTA_OBJ_TABLE]),
+ nft_net->base_seq);
+ audit_log_nfcfg(buf, info->nfmsg->nfgen_family, 1,
+ AUDIT_NFT_OP_OBJ_RESET, GFP_ATOMIC);
+ kfree(buf);
+
+ return nfnetlink_unicast(skb2, net, portid);
}
static void nft_obj_destroy(const struct nft_ctx *ctx, struct nft_object *obj)
@@ -9410,7 +9467,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
.policy = nft_obj_policy,
},
[NFT_MSG_GETOBJ_RESET] = {
- .call = nf_tables_getobj,
+ .call = nf_tables_getobj_reset,
.type = NFNL_CB_RCU,
.attr_count = NFTA_OBJ_MAX,
.policy = nft_obj_policy,
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 4abf660c7baf..932b3ddb34f1 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -427,8 +427,10 @@ replay_abort:
nfnl_unlock(subsys_id);
- if (nlh->nlmsg_flags & NLM_F_ACK)
+ if (nlh->nlmsg_flags & NLM_F_ACK) {
+ memset(&extack, 0, sizeof(extack));
nfnl_err_add(&err_list, nlh, 0, &extack);
+ }
while (skb->len >= nlmsg_total_size(0)) {
int msglen, type;
@@ -577,6 +579,7 @@ done:
ss->abort(net, oskb, NFNL_ABORT_NONE);
netlink_ack(oskb, nlmsg_hdr(oskb), err, NULL);
} else if (nlh->nlmsg_flags & NLM_F_ACK) {
+ memset(&extack, 0, sizeof(extack));
nfnl_err_add(&err_list, nlh, 0, &extack);
}
} else {
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 55e28e1da66e..e0716da256bf 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -820,10 +820,41 @@ static bool nf_ct_drop_unconfirmed(const struct nf_queue_entry *entry)
{
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
static const unsigned long flags = IPS_CONFIRMED | IPS_DYING;
- const struct nf_conn *ct = (void *)skb_nfct(entry->skb);
+ struct nf_conn *ct = (void *)skb_nfct(entry->skb);
+ unsigned long status;
+ unsigned int use;
- if (ct && ((ct->status & flags) == IPS_DYING))
+ if (!ct)
+ return false;
+
+ status = READ_ONCE(ct->status);
+ if ((status & flags) == IPS_DYING)
return true;
+
+ if (status & IPS_CONFIRMED)
+ return false;
+
+ /* in some cases skb_clone() can occur after initial conntrack
+ * pickup, but conntrack assumes exclusive skb->_nfct ownership for
+ * unconfirmed entries.
+ *
+ * This happens for br_netfilter and with ip multicast routing.
+ * We can't be solved with serialization here because one clone could
+ * have been queued for local delivery.
+ */
+ use = refcount_read(&ct->ct_general.use);
+ if (likely(use == 1))
+ return false;
+
+ /* Can't decrement further? Exclusive ownership. */
+ if (!refcount_dec_not_one(&ct->ct_general.use))
+ return false;
+
+ skb_set_nfct(entry->skb, 0);
+ /* No nf_ct_put(): we already decremented .use and it cannot
+ * drop down to 0.
+ */
+ return true;
#endif
return false;
}
diff --git a/tools/testing/selftests/net/netfilter/Makefile b/tools/testing/selftests/net/netfilter/Makefile
index 47945b2b3f92..d13fb5ea3e89 100644
--- a/tools/testing/selftests/net/netfilter/Makefile
+++ b/tools/testing/selftests/net/netfilter/Makefile
@@ -7,6 +7,7 @@ MNL_CFLAGS := $(shell $(HOSTPKG_CONFIG) --cflags libmnl 2>/dev/null)
MNL_LDLIBS := $(shell $(HOSTPKG_CONFIG) --libs libmnl 2>/dev/null || echo -lmnl)
TEST_PROGS := br_netfilter.sh bridge_brouter.sh
+TEST_PROGS += br_netfilter_queue.sh
TEST_PROGS += conntrack_icmp_related.sh
TEST_PROGS += conntrack_ipip_mtu.sh
TEST_PROGS += conntrack_tcp_unreplied.sh
diff --git a/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh b/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh
new file mode 100755
index 000000000000..6a764d70ab06
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh
@@ -0,0 +1,78 @@
+#!/bin/bash
+
+source lib.sh
+
+checktool "nft --version" "run test without nft tool"
+
+cleanup() {
+ cleanup_all_ns
+}
+
+setup_ns c1 c2 c3 sender
+
+trap cleanup EXIT
+
+nf_queue_wait()
+{
+ grep -q "^ *$1 " "/proc/self/net/netfilter/nfnetlink_queue"
+}
+
+port_add() {
+ ns="$1"
+ dev="$2"
+ a="$3"
+
+ ip link add name "$dev" type veth peer name "$dev" netns "$ns"
+
+ ip -net "$ns" addr add 192.168.1."$a"/24 dev "$dev"
+ ip -net "$ns" link set "$dev" up
+
+ ip link set "$dev" master br0
+ ip link set "$dev" up
+}
+
+[ "${1}" != "run" ] && { unshare -n "${0}" run; exit $?; }
+
+ip link add br0 type bridge
+ip addr add 192.168.1.254/24 dev br0
+
+port_add "$c1" "c1" 1
+port_add "$c2" "c2" 2
+port_add "$c3" "c3" 3
+port_add "$sender" "sender" 253
+
+ip link set br0 up
+
+modprobe -q br_netfilter
+
+sysctl net.bridge.bridge-nf-call-iptables=1 || exit 1
+
+ip netns exec "$sender" ping -I sender -c1 192.168.1.1 || exit 1
+ip netns exec "$sender" ping -I sender -c1 192.168.1.2 || exit 2
+ip netns exec "$sender" ping -I sender -c1 192.168.1.3 || exit 3
+
+nft -f /dev/stdin <<EOF
+table ip filter {
+ chain forward {
+ type filter hook forward priority 0; policy accept;
+ ct state new counter
+ ip protocol icmp counter queue num 0 bypass
+ }
+}
+EOF
+./nf_queue -t 5 > /dev/null &
+
+busywait 5000 nf_queue_wait
+
+for i in $(seq 1 5); do conntrack -F > /dev/null 2> /dev/null; sleep 0.1 ; done &
+ip netns exec "$sender" ping -I sender -f -c 50 -b 192.168.1.255
+
+read t < /proc/sys/kernel/tainted
+if [ "$t" -eq 0 ];then
+ echo PASS: kernel not tainted
+else
+ echo ERROR: kernel is tainted
+ exit 1
+fi
+
+exit 0