summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan.c2
-rw-r--r--net/appletalk/ddp.c4
-rw-r--r--net/bpf/test_run.c107
-rw-r--r--net/bridge/br.c9
-rw-r--r--net/bridge/br_device.c14
-rw-r--r--net/bridge/br_fdb.c4
-rw-r--r--net/bridge/br_forward.c7
-rw-r--r--net/bridge/br_if.c11
-rw-r--r--net/bridge/br_input.c17
-rw-r--r--net/bridge/br_mdb.c109
-rw-r--r--net/bridge/br_multicast.c1670
-rw-r--r--net/bridge/br_multicast_eht.c92
-rw-r--r--net/bridge/br_netlink.c41
-rw-r--r--net/bridge/br_private.h418
-rw-r--r--net/bridge/br_private_mcast_eht.h3
-rw-r--r--net/bridge/br_switchdev.c227
-rw-r--r--net/bridge/br_sysfs_br.c38
-rw-r--r--net/bridge/br_sysfs_if.c2
-rw-r--r--net/bridge/br_vlan.c89
-rw-r--r--net/bridge/br_vlan_options.c216
-rw-r--r--net/core/Makefile2
-rw-r--r--net/core/dev.c109
-rw-r--r--net/core/dev_ioctl.c153
-rw-r--r--net/core/fib_rules.c4
-rw-r--r--net/core/filter.c10
-rw-r--r--net/core/lwtunnel.c2
-rw-r--r--net/core/rtnetlink.c9
-rw-r--r--net/core/scm.c4
-rw-r--r--net/core/selftests.c12
-rw-r--r--net/core/sock_map.c22
-rw-r--r--net/dccp/proto.c2
-rw-r--r--net/dsa/Kconfig12
-rw-r--r--net/dsa/Makefile3
-rw-r--r--net/dsa/dsa_priv.h47
-rw-r--r--net/dsa/port.c118
-rw-r--r--net/dsa/slave.c209
-rw-r--r--net/dsa/switch.c30
-rw-r--r--net/dsa/tag_8021q.c569
-rw-r--r--net/dsa/tag_ocelot_8021q.c4
-rw-r--r--net/dsa/tag_sja1105.c28
-rw-r--r--net/ethtool/ioctl.c136
-rw-r--r--net/ieee802154/socket.c4
-rw-r--r--net/ipv4/af_inet.c6
-rw-r--r--net/ipv4/devinet.c16
-rw-r--r--net/ipv4/fib_trie.c4
-rw-r--r--net/ipv4/igmp.c2
-rw-r--r--net/ipv4/route.c21
-rw-r--r--net/ipv4/tcp.c4
-rw-r--r--net/ipv4/tcp_fastopen.c17
-rw-r--r--net/ipv4/tcp_input.c40
-rw-r--r--net/ipv4/tcp_ipv4.c1
-rw-r--r--net/ipv4/udp_bpf.c1
-rw-r--r--net/ipv6/Kconfig11
-rw-r--r--net/ipv6/Makefile3
-rw-r--r--net/ipv6/addrconf.c44
-rw-r--r--net/ipv6/af_inet6.c10
-rw-r--r--net/ipv6/exthdrs.c61
-rw-r--r--net/ipv6/ioam6.c910
-rw-r--r--net/ipv6/ioam6_iptunnel.c274
-rw-r--r--net/ipv6/ip6_fib.c4
-rw-r--r--net/ipv6/ip6_output.c2
-rw-r--r--net/ipv6/route.c22
-rw-r--r--net/ipv6/sit.c5
-rw-r--r--net/ipv6/sysctl_net_ipv6.c19
-rw-r--r--net/netfilter/nf_flow_table_core.c2
-rw-r--r--net/netlink/af_netlink.c4
-rw-r--r--net/openvswitch/actions.c6
-rw-r--r--net/openvswitch/datapath.c72
-rw-r--r--net/openvswitch/datapath.h20
-rw-r--r--net/qrtr/qrtr.c4
-rw-r--r--net/sched/act_api.c12
-rw-r--r--net/sched/cls_api.c15
-rw-r--r--net/sched/sch_api.c10
-rw-r--r--net/socket.c292
-rw-r--r--net/switchdev/switchdev.c260
-rw-r--r--net/tipc/socket.c36
-rw-r--r--net/unix/Makefile1
-rw-r--r--net/unix/af_unix.c85
-rw-r--r--net/unix/unix_bpf.c122
79 files changed, 5058 insertions, 1929 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 4cdf8416869d..55275ef9a31a 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -67,7 +67,7 @@ static int vlan_group_prealloc_vid(struct vlan_group *vg,
return 0;
size = sizeof(struct net_device *) * VLAN_GROUP_ARRAY_PART_LEN;
- array = kzalloc(size, GFP_KERNEL);
+ array = kzalloc(size, GFP_KERNEL_ACCOUNT);
if (array == NULL)
return -ENOBUFS;
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 8ade5a4ceaf5..bf5736c1d458 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -666,7 +666,7 @@ static int atif_ioctl(int cmd, void __user *arg)
struct rtentry rtdef;
int add_route;
- if (copy_from_user(&atreq, arg, sizeof(atreq)))
+ if (get_user_ifreq(&atreq, NULL, arg))
return -EFAULT;
dev = __dev_get_by_name(&init_net, atreq.ifr_name);
@@ -865,7 +865,7 @@ static int atif_ioctl(int cmd, void __user *arg)
return 0;
}
- return copy_to_user(arg, &atreq, sizeof(atreq)) ? -EFAULT : 0;
+ return put_user_ifreq(&atreq, arg);
}
static int atrtr_ioctl_addrt(struct rtentry *rt)
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 1cc75c811e24..b488e2779718 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -15,6 +15,7 @@
#include <linux/error-injection.h>
#include <linux/smp.h>
#include <linux/sock_diag.h>
+#include <net/xdp.h>
#define CREATE_TRACE_POINTS
#include <trace/events/bpf_test_run.h>
@@ -687,6 +688,64 @@ out:
return ret;
}
+static int xdp_convert_md_to_buff(struct xdp_md *xdp_md, struct xdp_buff *xdp)
+{
+ unsigned int ingress_ifindex, rx_queue_index;
+ struct netdev_rx_queue *rxqueue;
+ struct net_device *device;
+
+ if (!xdp_md)
+ return 0;
+
+ if (xdp_md->egress_ifindex != 0)
+ return -EINVAL;
+
+ ingress_ifindex = xdp_md->ingress_ifindex;
+ rx_queue_index = xdp_md->rx_queue_index;
+
+ if (!ingress_ifindex && rx_queue_index)
+ return -EINVAL;
+
+ if (ingress_ifindex) {
+ device = dev_get_by_index(current->nsproxy->net_ns,
+ ingress_ifindex);
+ if (!device)
+ return -ENODEV;
+
+ if (rx_queue_index >= device->real_num_rx_queues)
+ goto free_dev;
+
+ rxqueue = __netif_get_rx_queue(device, rx_queue_index);
+
+ if (!xdp_rxq_info_is_reg(&rxqueue->xdp_rxq))
+ goto free_dev;
+
+ xdp->rxq = &rxqueue->xdp_rxq;
+ /* The device is now tracked in the xdp->rxq for later
+ * dev_put()
+ */
+ }
+
+ xdp->data = xdp->data_meta + xdp_md->data;
+ return 0;
+
+free_dev:
+ dev_put(device);
+ return -EINVAL;
+}
+
+static void xdp_convert_buff_to_md(struct xdp_buff *xdp, struct xdp_md *xdp_md)
+{
+ if (!xdp_md)
+ return;
+
+ xdp_md->data = xdp->data - xdp->data_meta;
+ xdp_md->data_end = xdp->data_end - xdp->data_meta;
+
+ if (xdp_md->ingress_ifindex)
+ dev_put(xdp->rxq->dev);
+}
+
int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
union bpf_attr __user *uattr)
{
@@ -697,38 +756,74 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
struct netdev_rx_queue *rxqueue;
struct xdp_buff xdp = {};
u32 retval, duration;
+ struct xdp_md *ctx;
u32 max_data_sz;
void *data;
- int ret;
+ int ret = -EINVAL;
if (prog->expected_attach_type == BPF_XDP_DEVMAP ||
prog->expected_attach_type == BPF_XDP_CPUMAP)
return -EINVAL;
if (kattr->test.ctx_in || kattr->test.ctx_out)
return -EINVAL;
+ ctx = bpf_ctx_init(kattr, sizeof(struct xdp_md));
+ if (IS_ERR(ctx))
+ return PTR_ERR(ctx);
+
+ if (ctx) {
+ /* There can't be user provided data before the meta data */
+ if (ctx->data_meta || ctx->data_end != size ||
+ ctx->data > ctx->data_end ||
+ unlikely(xdp_metalen_invalid(ctx->data)))
+ goto free_ctx;
+ /* Meta data is allocated from the headroom */
+ headroom -= ctx->data;
+ }
/* XDP have extra tailroom as (most) drivers use full page */
max_data_sz = 4096 - headroom - tailroom;
data = bpf_test_init(kattr, max_data_sz, headroom, tailroom);
- if (IS_ERR(data))
- return PTR_ERR(data);
+ if (IS_ERR(data)) {
+ ret = PTR_ERR(data);
+ goto free_ctx;
+ }
rxqueue = __netif_get_rx_queue(current->nsproxy->net_ns->loopback_dev, 0);
xdp_init_buff(&xdp, headroom + max_data_sz + tailroom,
&rxqueue->xdp_rxq);
xdp_prepare_buff(&xdp, data, headroom, size, true);
+ ret = xdp_convert_md_to_buff(ctx, &xdp);
+ if (ret)
+ goto free_data;
+
bpf_prog_change_xdp(NULL, prog);
ret = bpf_test_run(prog, &xdp, repeat, &retval, &duration, true);
+ /* We convert the xdp_buff back to an xdp_md before checking the return
+ * code so the reference count of any held netdevice will be decremented
+ * even if the test run failed.
+ */
+ xdp_convert_buff_to_md(&xdp, ctx);
if (ret)
goto out;
- if (xdp.data != data + headroom || xdp.data_end != xdp.data + size)
- size = xdp.data_end - xdp.data;
- ret = bpf_test_finish(kattr, uattr, xdp.data, size, retval, duration);
+
+ if (xdp.data_meta != data + headroom ||
+ xdp.data_end != xdp.data_meta + size)
+ size = xdp.data_end - xdp.data_meta;
+
+ ret = bpf_test_finish(kattr, uattr, xdp.data_meta, size, retval,
+ duration);
+ if (!ret)
+ ret = bpf_ctx_finish(kattr, uattr, ctx,
+ sizeof(struct xdp_md));
+
out:
bpf_prog_change_xdp(prog, NULL);
+free_data:
kfree(data);
+free_ctx:
+ kfree(ctx);
return ret;
}
diff --git a/net/bridge/br.c b/net/bridge/br.c
index ef743f94254d..51f2e25c4cd6 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -214,17 +214,22 @@ static struct notifier_block br_switchdev_notifier = {
int br_boolopt_toggle(struct net_bridge *br, enum br_boolopt_id opt, bool on,
struct netlink_ext_ack *extack)
{
+ int err = 0;
+
switch (opt) {
case BR_BOOLOPT_NO_LL_LEARN:
br_opt_toggle(br, BROPT_NO_LL_LEARN, on);
break;
+ case BR_BOOLOPT_MCAST_VLAN_SNOOPING:
+ err = br_multicast_toggle_vlan_snooping(br, on, extack);
+ break;
default:
/* shouldn't be called with unsupported options */
WARN_ON(1);
break;
}
- return 0;
+ return err;
}
int br_boolopt_get(const struct net_bridge *br, enum br_boolopt_id opt)
@@ -232,6 +237,8 @@ int br_boolopt_get(const struct net_bridge *br, enum br_boolopt_id opt)
switch (opt) {
case BR_BOOLOPT_NO_LL_LEARN:
return br_opt_get(br, BROPT_NO_LL_LEARN);
+ case BR_BOOLOPT_MCAST_VLAN_SNOOPING:
+ return br_opt_get(br, BROPT_MCAST_VLAN_SNOOPING_ENABLED);
default:
/* shouldn't be called with unsupported options */
WARN_ON(1);
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index e8b626cc6bfd..00daf35f54d5 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -27,11 +27,14 @@ EXPORT_SYMBOL_GPL(nf_br_ops);
/* net device transmit always called with BH disabled */
netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
{
+ struct net_bridge_mcast_port *pmctx_null = NULL;
struct net_bridge *br = netdev_priv(dev);
+ struct net_bridge_mcast *brmctx = &br->multicast_ctx;
struct net_bridge_fdb_entry *dst;
struct net_bridge_mdb_entry *mdst;
const struct nf_br_ops *nf_ops;
u8 state = BR_STATE_FORWARDING;
+ struct net_bridge_vlan *vlan;
const unsigned char *dest;
u16 vid = 0;
@@ -53,7 +56,8 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
skb_reset_mac_header(skb);
skb_pull(skb, ETH_HLEN);
- if (!br_allowed_ingress(br, br_vlan_group_rcu(br), skb, &vid, &state))
+ if (!br_allowed_ingress(br, br_vlan_group_rcu(br), skb, &vid,
+ &state, &vlan))
goto out;
if (IS_ENABLED(CONFIG_INET) &&
@@ -82,15 +86,15 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
br_flood(br, skb, BR_PKT_MULTICAST, false, true);
goto out;
}
- if (br_multicast_rcv(br, NULL, skb, vid)) {
+ if (br_multicast_rcv(&brmctx, &pmctx_null, vlan, skb, vid)) {
kfree_skb(skb);
goto out;
}
- mdst = br_mdb_get(br, skb, vid);
+ mdst = br_mdb_get(brmctx, skb, vid);
if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) &&
- br_multicast_querier_exists(br, eth_hdr(skb), mdst))
- br_multicast_flood(mdst, skb, false, true);
+ br_multicast_querier_exists(brmctx, eth_hdr(skb), mdst))
+ br_multicast_flood(mdst, skb, brmctx, false, true);
else
br_flood(br, skb, BR_PKT_MULTICAST, false, true);
} else if ((dst = br_fdb_find_rcu(br, dest, vid)) != NULL) {
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index a16191dcaed1..5b345bb72078 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -760,6 +760,9 @@ int br_fdb_replay(const struct net_device *br_dev, const struct net_device *dev,
unsigned long action;
int err = 0;
+ if (!nb)
+ return 0;
+
if (!netif_is_bridge_master(br_dev))
return -EINVAL;
@@ -792,7 +795,6 @@ int br_fdb_replay(const struct net_device *br_dev, const struct net_device *dev,
return err;
}
-EXPORT_SYMBOL_GPL(br_fdb_replay);
static void fdb_notify(struct net_bridge *br,
const struct net_bridge_fdb_entry *fdb, int type,
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 07856362538f..bfdbaf3015b9 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -267,20 +267,19 @@ static void maybe_deliver_addr(struct net_bridge_port *p, struct sk_buff *skb,
/* called with rcu_read_lock */
void br_multicast_flood(struct net_bridge_mdb_entry *mdst,
struct sk_buff *skb,
+ struct net_bridge_mcast *brmctx,
bool local_rcv, bool local_orig)
{
- struct net_device *dev = BR_INPUT_SKB_CB(skb)->brdev;
- struct net_bridge *br = netdev_priv(dev);
struct net_bridge_port *prev = NULL;
struct net_bridge_port_group *p;
bool allow_mode_include = true;
struct hlist_node *rp;
- rp = br_multicast_get_first_rport_node(br, skb);
+ rp = br_multicast_get_first_rport_node(brmctx, skb);
if (mdst) {
p = rcu_dereference(mdst->ports);
- if (br_multicast_should_handle_mode(br, mdst->addr.proto) &&
+ if (br_multicast_should_handle_mode(brmctx, mdst->addr.proto) &&
br_multicast_is_star_g(&mdst->addr))
allow_mode_include = false;
} else {
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 6e4a32354a13..86f6d7e93ea8 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -643,10 +643,6 @@ int br_add_if(struct net_bridge *br, struct net_device *dev,
if (err)
goto err5;
- err = nbp_switchdev_mark_set(p);
- if (err)
- goto err6;
-
dev_disable_lro(dev);
list_add_rcu(&p->list, &br->port_list);
@@ -684,13 +680,13 @@ int br_add_if(struct net_bridge *br, struct net_device *dev,
*/
err = dev_pre_changeaddr_notify(br->dev, dev->dev_addr, extack);
if (err)
- goto err7;
+ goto err6;
}
err = nbp_vlan_init(p, extack);
if (err) {
netdev_err(dev, "failed to initialize vlan filtering on this port\n");
- goto err7;
+ goto err6;
}
spin_lock_bh(&br->lock);
@@ -713,13 +709,12 @@ int br_add_if(struct net_bridge *br, struct net_device *dev,
return 0;
-err7:
+err6:
if (fdb_synced)
br_fdb_unsync_static(br, p);
list_del_rcu(&p->list);
br_fdb_delete_by_port(br, p, 0, 1);
nbp_update_port_count(br);
-err6:
netdev_upper_dev_unlink(dev, br->dev);
err5:
dev->priv_flags &= ~IFF_BRIDGE_PORT;
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 1f506309efa8..8a0c0cc55cb4 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -69,8 +69,11 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
struct net_bridge_port *p = br_port_get_rcu(skb->dev);
enum br_pkt_type pkt_type = BR_PKT_UNICAST;
struct net_bridge_fdb_entry *dst = NULL;
+ struct net_bridge_mcast_port *pmctx;
struct net_bridge_mdb_entry *mdst;
bool local_rcv, mcast_hit = false;
+ struct net_bridge_mcast *brmctx;
+ struct net_bridge_vlan *vlan;
struct net_bridge *br;
u16 vid = 0;
u8 state;
@@ -78,9 +81,11 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
if (!p || p->state == BR_STATE_DISABLED)
goto drop;
+ brmctx = &p->br->multicast_ctx;
+ pmctx = &p->multicast_ctx;
state = p->state;
if (!br_allowed_ingress(p->br, nbp_vlan_group_rcu(p), skb, &vid,
- &state))
+ &state, &vlan))
goto out;
nbp_switchdev_frame_mark(p, skb);
@@ -98,7 +103,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
local_rcv = true;
} else {
pkt_type = BR_PKT_MULTICAST;
- if (br_multicast_rcv(br, p, skb, vid))
+ if (br_multicast_rcv(&brmctx, &pmctx, vlan, skb, vid))
goto drop;
}
}
@@ -128,11 +133,11 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
switch (pkt_type) {
case BR_PKT_MULTICAST:
- mdst = br_mdb_get(br, skb, vid);
+ mdst = br_mdb_get(brmctx, skb, vid);
if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) &&
- br_multicast_querier_exists(br, eth_hdr(skb), mdst)) {
+ br_multicast_querier_exists(brmctx, eth_hdr(skb), mdst)) {
if ((mdst && mdst->host_joined) ||
- br_multicast_is_router(br, skb)) {
+ br_multicast_is_router(brmctx, skb)) {
local_rcv = true;
br->dev->stats.multicast++;
}
@@ -162,7 +167,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
if (!mcast_hit)
br_flood(br, skb, pkt_type, local_rcv, false);
else
- br_multicast_flood(mdst, skb, local_rcv, false);
+ br_multicast_flood(mdst, skb, brmctx, local_rcv, false);
}
if (local_rcv)
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 17a720b4473f..73a8915b0148 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -16,29 +16,29 @@
#include "br_private.h"
-static bool br_rports_have_mc_router(struct net_bridge *br)
+static bool br_rports_have_mc_router(struct net_bridge_mcast *brmctx)
{
#if IS_ENABLED(CONFIG_IPV6)
- return !hlist_empty(&br->ip4_mc_router_list) ||
- !hlist_empty(&br->ip6_mc_router_list);
+ return !hlist_empty(&brmctx->ip4_mc_router_list) ||
+ !hlist_empty(&brmctx->ip6_mc_router_list);
#else
- return !hlist_empty(&br->ip4_mc_router_list);
+ return !hlist_empty(&brmctx->ip4_mc_router_list);
#endif
}
static bool
br_ip4_rports_get_timer(struct net_bridge_port *port, unsigned long *timer)
{
- *timer = br_timer_value(&port->ip4_mc_router_timer);
- return !hlist_unhashed(&port->ip4_rlist);
+ *timer = br_timer_value(&port->multicast_ctx.ip4_mc_router_timer);
+ return !hlist_unhashed(&port->multicast_ctx.ip4_rlist);
}
static bool
br_ip6_rports_get_timer(struct net_bridge_port *port, unsigned long *timer)
{
#if IS_ENABLED(CONFIG_IPV6)
- *timer = br_timer_value(&port->ip6_mc_router_timer);
- return !hlist_unhashed(&port->ip6_rlist);
+ *timer = br_timer_value(&port->multicast_ctx.ip6_mc_router_timer);
+ return !hlist_unhashed(&port->multicast_ctx.ip6_rlist);
#else
*timer = 0;
return false;
@@ -54,10 +54,10 @@ static int br_rports_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
struct nlattr *nest, *port_nest;
struct net_bridge_port *p;
- if (!br->multicast_router)
+ if (!br->multicast_ctx.multicast_router)
return 0;
- if (!br_rports_have_mc_router(br))
+ if (!br_rports_have_mc_router(&br->multicast_ctx))
return 0;
nest = nla_nest_start_noflag(skb, MDBA_ROUTER);
@@ -79,7 +79,7 @@ static int br_rports_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
nla_put_u32(skb, MDBA_ROUTER_PATTR_TIMER,
max(ip4_timer, ip6_timer)) ||
nla_put_u8(skb, MDBA_ROUTER_PATTR_TYPE,
- p->multicast_router) ||
+ p->multicast_ctx.multicast_router) ||
(have_ip4_mc_rtr &&
nla_put_u32(skb, MDBA_ROUTER_PATTR_INET_TIMER,
ip4_timer)) ||
@@ -240,7 +240,7 @@ static int __mdb_fill_info(struct sk_buff *skb,
switch (mp->addr.proto) {
case htons(ETH_P_IP):
- dump_srcs_mode = !!(mp->br->multicast_igmp_version == 3);
+ dump_srcs_mode = !!(mp->br->multicast_ctx.multicast_igmp_version == 3);
if (mp->addr.src.ip4) {
if (nla_put_in_addr(skb, MDBA_MDB_EATTR_SOURCE,
mp->addr.src.ip4))
@@ -250,7 +250,7 @@ static int __mdb_fill_info(struct sk_buff *skb,
break;
#if IS_ENABLED(CONFIG_IPV6)
case htons(ETH_P_IPV6):
- dump_srcs_mode = !!(mp->br->multicast_mld_version == 2);
+ dump_srcs_mode = !!(mp->br->multicast_ctx.multicast_mld_version == 2);
if (!ipv6_addr_any(&mp->addr.src.ip6)) {
if (nla_put_in6_addr(skb, MDBA_MDB_EATTR_SOURCE,
&mp->addr.src.ip6))
@@ -483,7 +483,7 @@ static size_t rtnl_mdb_nlmsg_size(struct net_bridge_port_group *pg)
/* MDBA_MDB_EATTR_SOURCE */
if (pg->key.addr.src.ip4)
nlmsg_size += nla_total_size(sizeof(__be32));
- if (pg->key.port->br->multicast_igmp_version == 2)
+ if (pg->key.port->br->multicast_ctx.multicast_igmp_version == 2)
goto out;
addr_size = sizeof(__be32);
break;
@@ -492,7 +492,7 @@ static size_t rtnl_mdb_nlmsg_size(struct net_bridge_port_group *pg)
/* MDBA_MDB_EATTR_SOURCE */
if (!ipv6_addr_any(&pg->key.addr.src.ip6))
nlmsg_size += nla_total_size(sizeof(struct in6_addr));
- if (pg->key.port->br->multicast_mld_version == 1)
+ if (pg->key.port->br->multicast_ctx.multicast_mld_version == 1)
goto out;
addr_size = sizeof(struct in6_addr);
break;
@@ -617,6 +617,9 @@ int br_mdb_replay(struct net_device *br_dev, struct net_device *dev,
ASSERT_RTNL();
+ if (!nb)
+ return 0;
+
if (!netif_is_bridge_master(br_dev) || !netif_is_bridge_port(dev))
return -EINVAL;
@@ -686,7 +689,6 @@ out_free_mdb:
return err;
}
-EXPORT_SYMBOL_GPL(br_mdb_replay);
static void br_mdb_switchdev_host_port(struct net_device *dev,
struct net_device *lower_dev,
@@ -781,12 +783,12 @@ errout:
static int nlmsg_populate_rtr_fill(struct sk_buff *skb,
struct net_device *dev,
- int ifindex, u32 pid,
+ int ifindex, u16 vid, u32 pid,
u32 seq, int type, unsigned int flags)
{
+ struct nlattr *nest, *port_nest;
struct br_port_msg *bpm;
struct nlmsghdr *nlh;
- struct nlattr *nest;
nlh = nlmsg_put(skb, pid, seq, type, sizeof(*bpm), 0);
if (!nlh)
@@ -800,8 +802,18 @@ static int nlmsg_populate_rtr_fill(struct sk_buff *skb,
if (!nest)
goto cancel;
- if (nla_put_u32(skb, MDBA_ROUTER_PORT, ifindex))
+ port_nest = nla_nest_start_noflag(skb, MDBA_ROUTER_PORT);
+ if (!port_nest)
+ goto end;
+ if (nla_put_nohdr(skb, sizeof(u32), &ifindex)) {
+ nla_nest_cancel(skb, port_nest);
goto end;
+ }
+ if (vid && nla_put_u16(skb, MDBA_ROUTER_PATTR_VID, vid)) {
+ nla_nest_cancel(skb, port_nest);
+ goto end;
+ }
+ nla_nest_end(skb, port_nest);
nla_nest_end(skb, nest);
nlmsg_end(skb, nlh);
@@ -817,23 +829,28 @@ cancel:
static inline size_t rtnl_rtr_nlmsg_size(void)
{
return NLMSG_ALIGN(sizeof(struct br_port_msg))
- + nla_total_size(sizeof(__u32));
+ + nla_total_size(sizeof(__u32))
+ + nla_total_size(sizeof(u16));
}
-void br_rtr_notify(struct net_device *dev, struct net_bridge_port *port,
+void br_rtr_notify(struct net_device *dev, struct net_bridge_mcast_port *pmctx,
int type)
{
struct net *net = dev_net(dev);
struct sk_buff *skb;
int err = -ENOBUFS;
int ifindex;
+ u16 vid;
- ifindex = port ? port->dev->ifindex : 0;
+ ifindex = pmctx ? pmctx->port->dev->ifindex : 0;
+ vid = pmctx && br_multicast_port_ctx_is_vlan(pmctx) ? pmctx->vlan->vid :
+ 0;
skb = nlmsg_new(rtnl_rtr_nlmsg_size(), GFP_ATOMIC);
if (!skb)
goto errout;
- err = nlmsg_populate_rtr_fill(skb, dev, ifindex, 0, 0, type, NTF_SELF);
+ err = nlmsg_populate_rtr_fill(skb, dev, ifindex, vid, 0, 0, type,
+ NTF_SELF);
if (err < 0) {
kfree_skb(skb);
goto errout;
@@ -1004,14 +1021,47 @@ static int br_mdb_parse(struct sk_buff *skb, struct nlmsghdr *nlh,
return 0;
}
+static struct net_bridge_mcast *
+__br_mdb_choose_context(struct net_bridge *br,
+ const struct br_mdb_entry *entry,
+ struct netlink_ext_ack *extack)
+{
+ struct net_bridge_mcast *brmctx = NULL;
+ struct net_bridge_vlan *v;
+
+ if (!br_opt_get(br, BROPT_MCAST_VLAN_SNOOPING_ENABLED)) {
+ brmctx = &br->multicast_ctx;
+ goto out;
+ }
+
+ if (!entry->vid) {
+ NL_SET_ERR_MSG_MOD(extack, "Cannot add an entry without a vlan when vlan snooping is enabled");
+ goto out;
+ }
+
+ v = br_vlan_find(br_vlan_group(br), entry->vid);
+ if (!v) {
+ NL_SET_ERR_MSG_MOD(extack, "Vlan is not configured");
+ goto out;
+ }
+ if (br_multicast_ctx_vlan_global_disabled(&v->br_mcast_ctx)) {
+ NL_SET_ERR_MSG_MOD(extack, "Vlan's multicast processing is disabled");
+ goto out;
+ }
+ brmctx = &v->br_mcast_ctx;
+out:
+ return brmctx;
+}
+
static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
struct br_mdb_entry *entry,
struct nlattr **mdb_attrs,
struct netlink_ext_ack *extack)
{
struct net_bridge_mdb_entry *mp, *star_mp;
- struct net_bridge_port_group *p;
struct net_bridge_port_group __rcu **pp;
+ struct net_bridge_port_group *p;
+ struct net_bridge_mcast *brmctx;
struct br_ip group, star_group;
unsigned long now = jiffies;
unsigned char flags = 0;
@@ -1020,6 +1070,10 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
__mdb_entry_to_br_ip(entry, &group, mdb_attrs);
+ brmctx = __br_mdb_choose_context(br, entry, extack);
+ if (!brmctx)
+ return -EINVAL;
+
/* host join errors which can happen before creating the group */
if (!port) {
/* don't allow any flags for host-joined groups */
@@ -1053,7 +1107,7 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
return -EEXIST;
}
- br_multicast_host_join(mp, false);
+ br_multicast_host_join(brmctx, mp, false);
br_mdb_notify(br->dev, mp, NULL, RTM_NEWMDB);
return 0;
@@ -1084,14 +1138,15 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
}
rcu_assign_pointer(*pp, p);
if (entry->state == MDB_TEMPORARY)
- mod_timer(&p->timer, now + br->multicast_membership_interval);
+ mod_timer(&p->timer,
+ now + brmctx->multicast_membership_interval);
br_mdb_notify(br->dev, mp, p, RTM_NEWMDB);
/* if we are adding a new EXCLUDE port group (*,G) it needs to be also
* added to all S,G entries for proper replication, if we are adding
* a new INCLUDE port (S,G) then all of *,G EXCLUDE ports need to be
* added to it for proper replication
*/
- if (br_multicast_should_handle_mode(br, group.proto)) {
+ if (br_multicast_should_handle_mode(brmctx, group.proto)) {
switch (filter_mode) {
case MCAST_EXCLUDE:
br_multicast_star_g_handle_mode(p, MCAST_EXCLUDE);
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index d0434dc8c03b..470f1ec3b579 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -49,30 +49,30 @@ static const struct rhashtable_params br_sg_port_rht_params = {
.automatic_shrinking = true,
};
-static void br_multicast_start_querier(struct net_bridge *br,
+static void br_multicast_start_querier(struct net_bridge_mcast *brmctx,
struct bridge_mcast_own_query *query);
-static void br_ip4_multicast_add_router(struct net_bridge *br,
- struct net_bridge_port *port);
-static void br_ip4_multicast_leave_group(struct net_bridge *br,
- struct net_bridge_port *port,
+static void br_ip4_multicast_add_router(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx);
+static void br_ip4_multicast_leave_group(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
__be32 group,
__u16 vid,
const unsigned char *src);
static void br_multicast_port_group_rexmit(struct timer_list *t);
static void
-br_multicast_rport_del_notify(struct net_bridge_port *p, bool deleted);
-static void br_ip6_multicast_add_router(struct net_bridge *br,
- struct net_bridge_port *port);
+br_multicast_rport_del_notify(struct net_bridge_mcast_port *pmctx, bool deleted);
+static void br_ip6_multicast_add_router(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx);
#if IS_ENABLED(CONFIG_IPV6)
-static void br_ip6_multicast_leave_group(struct net_bridge *br,
- struct net_bridge_port *port,
+static void br_ip6_multicast_leave_group(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
const struct in6_addr *group,
__u16 vid, const unsigned char *src);
#endif
static struct net_bridge_port_group *
-__br_multicast_add_group(struct net_bridge *br,
- struct net_bridge_port *port,
+__br_multicast_add_group(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
struct br_ip *group,
const unsigned char *src,
u8 filter_mode,
@@ -80,6 +80,7 @@ __br_multicast_add_group(struct net_bridge *br,
bool blocked);
static void br_multicast_find_del_pg(struct net_bridge *br,
struct net_bridge_port_group *pg);
+static void __br_multicast_stop(struct net_bridge_mcast *brmctx);
static struct net_bridge_port_group *
br_sg_port_find(struct net_bridge *br,
@@ -140,12 +141,14 @@ static struct net_bridge_mdb_entry *br_mdb_ip6_get(struct net_bridge *br,
}
#endif
-struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
+struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge_mcast *brmctx,
struct sk_buff *skb, u16 vid)
{
+ struct net_bridge *br = brmctx->br;
struct br_ip ip;
- if (!br_opt_get(br, BROPT_MULTICAST_ENABLED))
+ if (!br_opt_get(br, BROPT_MULTICAST_ENABLED) ||
+ br_multicast_ctx_vlan_global_disabled(brmctx))
return NULL;
if (BR_INPUT_SKB_CB(skb)->igmp)
@@ -158,7 +161,7 @@ struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
switch (skb->protocol) {
case htons(ETH_P_IP):
ip.dst.ip4 = ip_hdr(skb)->daddr;
- if (br->multicast_igmp_version == 3) {
+ if (brmctx->multicast_igmp_version == 3) {
struct net_bridge_mdb_entry *mdb;
ip.src.ip4 = ip_hdr(skb)->saddr;
@@ -171,7 +174,7 @@ struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
#if IS_ENABLED(CONFIG_IPV6)
case htons(ETH_P_IPV6):
ip.dst.ip6 = ipv6_hdr(skb)->daddr;
- if (br->multicast_mld_version == 2) {
+ if (brmctx->multicast_mld_version == 2) {
struct net_bridge_mdb_entry *mdb;
ip.src.ip6 = ipv6_hdr(skb)->saddr;
@@ -190,6 +193,62 @@ struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
return br_mdb_ip_get_rcu(br, &ip);
}
+/* IMPORTANT: this function must be used only when the contexts cannot be
+ * passed down (e.g. timer) and must be used for read-only purposes because
+ * the vlan snooping option can change, so it can return any context
+ * (non-vlan or vlan). Its initial intended purpose is to read timer values
+ * from the *current* context based on the option. At worst that could lead
+ * to inconsistent timers when the contexts are changed, i.e. src timer
+ * which needs to re-arm with a specific delay taken from the old context
+ */
+static struct net_bridge_mcast_port *
+br_multicast_pg_to_port_ctx(const struct net_bridge_port_group *pg)
+{
+ struct net_bridge_mcast_port *pmctx = &pg->key.port->multicast_ctx;
+ struct net_bridge_vlan *vlan;
+
+ lockdep_assert_held_once(&pg->key.port->br->multicast_lock);
+
+ /* if vlan snooping is disabled use the port's multicast context */
+ if (!pg->key.addr.vid ||
+ !br_opt_get(pg->key.port->br, BROPT_MCAST_VLAN_SNOOPING_ENABLED))
+ goto out;
+
+ /* locking is tricky here, due to different rules for multicast and
+ * vlans we need to take rcu to find the vlan and make sure it has
+ * the BR_VLFLAG_MCAST_ENABLED flag set, it can only change under
+ * multicast_lock which must be already held here, so the vlan's pmctx
+ * can safely be used on return
+ */
+ rcu_read_lock();
+ vlan = br_vlan_find(nbp_vlan_group(pg->key.port), pg->key.addr.vid);
+ if (vlan && !br_multicast_port_ctx_vlan_disabled(&vlan->port_mcast_ctx))
+ pmctx = &vlan->port_mcast_ctx;
+ else
+ pmctx = NULL;
+ rcu_read_unlock();
+out:
+ return pmctx;
+}
+
+/* when snooping we need to check if the contexts should be used
+ * in the following order:
+ * - if pmctx is non-NULL (port), check if it should be used
+ * - if pmctx is NULL (bridge), check if brmctx should be used
+ */
+static bool
+br_multicast_ctx_should_use(const struct net_bridge_mcast *brmctx,
+ const struct net_bridge_mcast_port *pmctx)
+{
+ if (!netif_running(brmctx->br->dev))
+ return false;
+
+ if (pmctx)
+ return !br_multicast_port_ctx_state_disabled(pmctx);
+ else
+ return !br_multicast_ctx_vlan_disabled(brmctx);
+}
+
static bool br_port_group_equal(struct net_bridge_port_group *p,
struct net_bridge_port *port,
const unsigned char *src)
@@ -203,20 +262,23 @@ static bool br_port_group_equal(struct net_bridge_port_group *p,
return ether_addr_equal(src, p->eth_addr);
}
-static void __fwd_add_star_excl(struct net_bridge_port_group *pg,
+static void __fwd_add_star_excl(struct net_bridge_mcast_port *pmctx,
+ struct net_bridge_port_group *pg,
struct br_ip *sg_ip)
{
struct net_bridge_port_group_sg_key sg_key;
- struct net_bridge *br = pg->key.port->br;
struct net_bridge_port_group *src_pg;
+ struct net_bridge_mcast *brmctx;
memset(&sg_key, 0, sizeof(sg_key));
+ brmctx = br_multicast_port_ctx_get_global(pmctx);
sg_key.port = pg->key.port;
sg_key.addr = *sg_ip;
- if (br_sg_port_find(br, &sg_key))
+ if (br_sg_port_find(brmctx->br, &sg_key))
return;
- src_pg = __br_multicast_add_group(br, pg->key.port, sg_ip, pg->eth_addr,
+ src_pg = __br_multicast_add_group(brmctx, pmctx,
+ sg_ip, pg->eth_addr,
MCAST_INCLUDE, false, false);
if (IS_ERR_OR_NULL(src_pg) ||
src_pg->rt_protocol != RTPROT_KERNEL)
@@ -256,6 +318,7 @@ void br_multicast_star_g_handle_mode(struct net_bridge_port_group *pg,
{
struct net_bridge *br = pg->key.port->br;
struct net_bridge_port_group *pg_lst;
+ struct net_bridge_mcast_port *pmctx;
struct net_bridge_mdb_entry *mp;
struct br_ip sg_ip;
@@ -265,9 +328,13 @@ void br_multicast_star_g_handle_mode(struct net_bridge_port_group *pg,
mp = br_mdb_ip_get(br, &pg->key.addr);
if (!mp)
return;
+ pmctx = br_multicast_pg_to_port_ctx(pg);
+ if (!pmctx)
+ return;
memset(&sg_ip, 0, sizeof(sg_ip));
sg_ip = pg->key.addr;
+
for (pg_lst = mlock_dereference(mp->ports, br);
pg_lst;
pg_lst = mlock_dereference(pg_lst->next, br)) {
@@ -284,7 +351,7 @@ void br_multicast_star_g_handle_mode(struct net_bridge_port_group *pg,
__fwd_del_star_excl(pg, &sg_ip);
break;
case MCAST_EXCLUDE:
- __fwd_add_star_excl(pg, &sg_ip);
+ __fwd_add_star_excl(pmctx, pg, &sg_ip);
break;
}
}
@@ -377,7 +444,9 @@ void br_multicast_sg_add_exclude_ports(struct net_bridge_mdb_entry *star_mp,
{
struct net_bridge_port_group_sg_key sg_key;
struct net_bridge *br = star_mp->br;
+ struct net_bridge_mcast_port *pmctx;
struct net_bridge_port_group *pg;
+ struct net_bridge_mcast *brmctx;
if (WARN_ON(br_multicast_is_star_g(&sg->key.addr)))
return;
@@ -400,7 +469,12 @@ void br_multicast_sg_add_exclude_ports(struct net_bridge_mdb_entry *star_mp,
if (br_sg_port_find(br, &sg_key))
continue;
- src_pg = __br_multicast_add_group(br, pg->key.port,
+ pmctx = br_multicast_pg_to_port_ctx(pg);
+ if (!pmctx)
+ continue;
+ brmctx = br_multicast_port_ctx_get_global(pmctx);
+
+ src_pg = __br_multicast_add_group(brmctx, pmctx,
&sg->key.addr,
sg->eth_addr,
MCAST_INCLUDE, false, false);
@@ -414,16 +488,23 @@ void br_multicast_sg_add_exclude_ports(struct net_bridge_mdb_entry *star_mp,
static void br_multicast_fwd_src_add(struct net_bridge_group_src *src)
{
struct net_bridge_mdb_entry *star_mp;
+ struct net_bridge_mcast_port *pmctx;
struct net_bridge_port_group *sg;
+ struct net_bridge_mcast *brmctx;
struct br_ip sg_ip;
if (src->flags & BR_SGRP_F_INSTALLED)
return;
memset(&sg_ip, 0, sizeof(sg_ip));
+ pmctx = br_multicast_pg_to_port_ctx(src->pg);
+ if (!pmctx)
+ return;
+ brmctx = br_multicast_port_ctx_get_global(pmctx);
sg_ip = src->pg->key.addr;
sg_ip.src = src->addr.src;
- sg = __br_multicast_add_group(src->br, src->pg->key.port, &sg_ip,
+
+ sg = __br_multicast_add_group(brmctx, pmctx, &sg_ip,
src->pg->eth_addr, MCAST_INCLUDE, false,
!timer_pending(&src->timer));
if (IS_ERR_OR_NULL(sg))
@@ -692,7 +773,28 @@ static void br_multicast_gc(struct hlist_head *head)
}
}
-static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br,
+static void __br_multicast_query_handle_vlan(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
+ struct sk_buff *skb)
+{
+ struct net_bridge_vlan *vlan = NULL;
+
+ if (pmctx && br_multicast_port_ctx_is_vlan(pmctx))
+ vlan = pmctx->vlan;
+ else if (br_multicast_ctx_is_vlan(brmctx))
+ vlan = brmctx->vlan;
+
+ if (vlan && !(vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED)) {
+ u16 vlan_proto;
+
+ if (br_vlan_get_proto(brmctx->br->dev, &vlan_proto) != 0)
+ return;
+ __vlan_hwaccel_put_tag(skb, htons(vlan_proto), vlan->vid);
+ }
+}
+
+static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
struct net_bridge_port_group *pg,
__be32 ip_dst, __be32 group,
bool with_srcs, bool over_lmqt,
@@ -714,11 +816,11 @@ static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br,
u16 lmqt_srcs = 0;
igmp_hdr_size = sizeof(*ih);
- if (br->multicast_igmp_version == 3) {
+ if (brmctx->multicast_igmp_version == 3) {
igmp_hdr_size = sizeof(*ihv3);
if (pg && with_srcs) {
- lmqt = now + (br->multicast_last_member_interval *
- br->multicast_last_member_count);
+ lmqt = now + (brmctx->multicast_last_member_interval *
+ brmctx->multicast_last_member_count);
hlist_for_each_entry(ent, &pg->src_list, node) {
if (over_lmqt == time_after(ent->timer.expires,
lmqt) &&
@@ -734,19 +836,20 @@ static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br,
pkt_size = sizeof(*eth) + sizeof(*iph) + 4 + igmp_hdr_size;
if ((p && pkt_size > p->dev->mtu) ||
- pkt_size > br->dev->mtu)
+ pkt_size > brmctx->br->dev->mtu)
return NULL;
- skb = netdev_alloc_skb_ip_align(br->dev, pkt_size);
+ skb = netdev_alloc_skb_ip_align(brmctx->br->dev, pkt_size);
if (!skb)
goto out;
+ __br_multicast_query_handle_vlan(brmctx, pmctx, skb);
skb->protocol = htons(ETH_P_IP);
skb_reset_mac_header(skb);
eth = eth_hdr(skb);
- ether_addr_copy(eth->h_source, br->dev->dev_addr);
+ ether_addr_copy(eth->h_source, brmctx->br->dev->dev_addr);
ip_eth_mc_map(ip_dst, eth->h_dest);
eth->h_proto = htons(ETH_P_IP);
skb_put(skb, sizeof(*eth));
@@ -762,8 +865,8 @@ static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br,
iph->frag_off = htons(IP_DF);
iph->ttl = 1;
iph->protocol = IPPROTO_IGMP;
- iph->saddr = br_opt_get(br, BROPT_MULTICAST_QUERY_USE_IFADDR) ?
- inet_select_addr(br->dev, 0, RT_SCOPE_LINK) : 0;
+ iph->saddr = br_opt_get(brmctx->br, BROPT_MULTICAST_QUERY_USE_IFADDR) ?
+ inet_select_addr(brmctx->br->dev, 0, RT_SCOPE_LINK) : 0;
iph->daddr = ip_dst;
((u8 *)&iph[1])[0] = IPOPT_RA;
((u8 *)&iph[1])[1] = 4;
@@ -775,12 +878,12 @@ static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br,
skb_set_transport_header(skb, skb->len);
*igmp_type = IGMP_HOST_MEMBERSHIP_QUERY;
- switch (br->multicast_igmp_version) {
+ switch (brmctx->multicast_igmp_version) {
case 2:
ih = igmp_hdr(skb);
ih->type = IGMP_HOST_MEMBERSHIP_QUERY;
- ih->code = (group ? br->multicast_last_member_interval :
- br->multicast_query_response_interval) /
+ ih->code = (group ? brmctx->multicast_last_member_interval :
+ brmctx->multicast_query_response_interval) /
(HZ / IGMP_TIMER_SCALE);
ih->group = group;
ih->csum = 0;
@@ -790,11 +893,11 @@ static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br,
case 3:
ihv3 = igmpv3_query_hdr(skb);
ihv3->type = IGMP_HOST_MEMBERSHIP_QUERY;
- ihv3->code = (group ? br->multicast_last_member_interval :
- br->multicast_query_response_interval) /
+ ihv3->code = (group ? brmctx->multicast_last_member_interval :
+ brmctx->multicast_query_response_interval) /
(HZ / IGMP_TIMER_SCALE);
ihv3->group = group;
- ihv3->qqic = br->multicast_query_interval / HZ;
+ ihv3->qqic = brmctx->multicast_query_interval / HZ;
ihv3->nsrcs = htons(lmqt_srcs);
ihv3->resv = 0;
ihv3->suppress = sflag;
@@ -837,7 +940,8 @@ out:
}
#if IS_ENABLED(CONFIG_IPV6)
-static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
+static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
struct net_bridge_port_group *pg,
const struct in6_addr *ip6_dst,
const struct in6_addr *group,
@@ -862,11 +966,11 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
u8 *hopopt;
mld_hdr_size = sizeof(*mldq);
- if (br->multicast_mld_version == 2) {
+ if (brmctx->multicast_mld_version == 2) {
mld_hdr_size = sizeof(*mld2q);
if (pg && with_srcs) {
- llqt = now + (br->multicast_last_member_interval *
- br->multicast_last_member_count);
+ llqt = now + (brmctx->multicast_last_member_interval *
+ brmctx->multicast_last_member_count);
hlist_for_each_entry(ent, &pg->src_list, node) {
if (over_llqt == time_after(ent->timer.expires,
llqt) &&
@@ -882,20 +986,21 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
pkt_size = sizeof(*eth) + sizeof(*ip6h) + 8 + mld_hdr_size;
if ((p && pkt_size > p->dev->mtu) ||
- pkt_size > br->dev->mtu)
+ pkt_size > brmctx->br->dev->mtu)
return NULL;
- skb = netdev_alloc_skb_ip_align(br->dev, pkt_size);
+ skb = netdev_alloc_skb_ip_align(brmctx->br->dev, pkt_size);
if (!skb)
goto out;
+ __br_multicast_query_handle_vlan(brmctx, pmctx, skb);
skb->protocol = htons(ETH_P_IPV6);
/* Ethernet header */
skb_reset_mac_header(skb);
eth = eth_hdr(skb);
- ether_addr_copy(eth->h_source, br->dev->dev_addr);
+ ether_addr_copy(eth->h_source, brmctx->br->dev->dev_addr);
eth->h_proto = htons(ETH_P_IPV6);
skb_put(skb, sizeof(*eth));
@@ -908,14 +1013,14 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
ip6h->nexthdr = IPPROTO_HOPOPTS;
ip6h->hop_limit = 1;
ip6h->daddr = *ip6_dst;
- if (ipv6_dev_get_saddr(dev_net(br->dev), br->dev, &ip6h->daddr, 0,
- &ip6h->saddr)) {
+ if (ipv6_dev_get_saddr(dev_net(brmctx->br->dev), brmctx->br->dev,
+ &ip6h->daddr, 0, &ip6h->saddr)) {
kfree_skb(skb);
- br_opt_toggle(br, BROPT_HAS_IPV6_ADDR, false);
+ br_opt_toggle(brmctx->br, BROPT_HAS_IPV6_ADDR, false);
return NULL;
}
- br_opt_toggle(br, BROPT_HAS_IPV6_ADDR, true);
+ br_opt_toggle(brmctx->br, BROPT_HAS_IPV6_ADDR, true);
ipv6_eth_mc_map(&ip6h->daddr, eth->h_dest);
hopopt = (u8 *)(ip6h + 1);
@@ -933,10 +1038,10 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
/* ICMPv6 */
skb_set_transport_header(skb, skb->len);
interval = ipv6_addr_any(group) ?
- br->multicast_query_response_interval :
- br->multicast_last_member_interval;
+ brmctx->multicast_query_response_interval :
+ brmctx->multicast_last_member_interval;
*igmp_type = ICMPV6_MGM_QUERY;
- switch (br->multicast_mld_version) {
+ switch (brmctx->multicast_mld_version) {
case 1:
mldq = (struct mld_msg *)icmp6_hdr(skb);
mldq->mld_type = ICMPV6_MGM_QUERY;
@@ -959,7 +1064,7 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
mld2q->mld2q_suppress = sflag;
mld2q->mld2q_qrv = 2;
mld2q->mld2q_nsrcs = htons(llqt_srcs);
- mld2q->mld2q_qqic = br->multicast_query_interval / HZ;
+ mld2q->mld2q_qqic = brmctx->multicast_query_interval / HZ;
mld2q->mld2q_mca = *group;
csum = &mld2q->mld2q_cksum;
csum_start = (void *)mld2q;
@@ -1000,7 +1105,8 @@ out:
}
#endif
-static struct sk_buff *br_multicast_alloc_query(struct net_bridge *br,
+static struct sk_buff *br_multicast_alloc_query(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
struct net_bridge_port_group *pg,
struct br_ip *ip_dst,
struct br_ip *group,
@@ -1013,7 +1119,7 @@ static struct sk_buff *br_multicast_alloc_query(struct net_bridge *br,
switch (group->proto) {
case htons(ETH_P_IP):
ip4_dst = ip_dst ? ip_dst->dst.ip4 : htonl(INADDR_ALLHOSTS_GROUP);
- return br_ip4_multicast_alloc_query(br, pg,
+ return br_ip4_multicast_alloc_query(brmctx, pmctx, pg,
ip4_dst, group->dst.ip4,
with_srcs, over_lmqt,
sflag, igmp_type,
@@ -1028,7 +1134,7 @@ static struct sk_buff *br_multicast_alloc_query(struct net_bridge *br,
ipv6_addr_set(&ip6_dst, htonl(0xff020000), 0, 0,
htonl(1));
- return br_ip6_multicast_alloc_query(br, pg,
+ return br_ip6_multicast_alloc_query(brmctx, pmctx, pg,
&ip6_dst, &group->dst.ip6,
with_srcs, over_lmqt,
sflag, igmp_type,
@@ -1206,7 +1312,8 @@ struct net_bridge_port_group *br_multicast_new_port_group(
return p;
}
-void br_multicast_host_join(struct net_bridge_mdb_entry *mp, bool notify)
+void br_multicast_host_join(const struct net_bridge_mcast *brmctx,
+ struct net_bridge_mdb_entry *mp, bool notify)
{
if (!mp->host_joined) {
mp->host_joined = true;
@@ -1219,7 +1326,7 @@ void br_multicast_host_join(struct net_bridge_mdb_entry *mp, bool notify)
if (br_group_is_l2(&mp->addr))
return;
- mod_timer(&mp->timer, jiffies + mp->br->multicast_membership_interval);
+ mod_timer(&mp->timer, jiffies + brmctx->multicast_membership_interval);
}
void br_multicast_host_leave(struct net_bridge_mdb_entry *mp, bool notify)
@@ -1235,8 +1342,8 @@ void br_multicast_host_leave(struct net_bridge_mdb_entry *mp, bool notify)
}
static struct net_bridge_port_group *
-__br_multicast_add_group(struct net_bridge *br,
- struct net_bridge_port *port,
+__br_multicast_add_group(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
struct br_ip *group,
const unsigned char *src,
u8 filter_mode,
@@ -1248,29 +1355,28 @@ __br_multicast_add_group(struct net_bridge *br,
struct net_bridge_mdb_entry *mp;
unsigned long now = jiffies;
- if (!netif_running(br->dev) ||
- (port && port->state == BR_STATE_DISABLED))
+ if (!br_multicast_ctx_should_use(brmctx, pmctx))
goto out;
- mp = br_multicast_new_group(br, group);
+ mp = br_multicast_new_group(brmctx->br, group);
if (IS_ERR(mp))
return ERR_CAST(mp);
- if (!port) {
- br_multicast_host_join(mp, true);
+ if (!pmctx) {
+ br_multicast_host_join(brmctx, mp, true);
goto out;
}
for (pp = &mp->ports;
- (p = mlock_dereference(*pp, br)) != NULL;
+ (p = mlock_dereference(*pp, brmctx->br)) != NULL;
pp = &p->next) {
- if (br_port_group_equal(p, port, src))
+ if (br_port_group_equal(p, pmctx->port, src))
goto found;
- if ((unsigned long)p->key.port < (unsigned long)port)
+ if ((unsigned long)p->key.port < (unsigned long)pmctx->port)
break;
}
- p = br_multicast_new_port_group(port, group, *pp, 0, src,
+ p = br_multicast_new_port_group(pmctx->port, group, *pp, 0, src,
filter_mode, RTPROT_KERNEL);
if (unlikely(!p)) {
p = ERR_PTR(-ENOMEM);
@@ -1279,18 +1385,19 @@ __br_multicast_add_group(struct net_bridge *br,
rcu_assign_pointer(*pp, p);
if (blocked)
p->flags |= MDB_PG_FLAGS_BLOCKED;
- br_mdb_notify(br->dev, mp, p, RTM_NEWMDB);
+ br_mdb_notify(brmctx->br->dev, mp, p, RTM_NEWMDB);
found:
if (igmpv2_mldv1)
- mod_timer(&p->timer, now + br->multicast_membership_interval);
+ mod_timer(&p->timer,
+ now + brmctx->multicast_membership_interval);
out:
return p;
}
-static int br_multicast_add_group(struct net_bridge *br,
- struct net_bridge_port *port,
+static int br_multicast_add_group(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
struct br_ip *group,
const unsigned char *src,
u8 filter_mode,
@@ -1299,18 +1406,18 @@ static int br_multicast_add_group(struct net_bridge *br,
struct net_bridge_port_group *pg;
int err;
- spin_lock(&br->multicast_lock);
- pg = __br_multicast_add_group(br, port, group, src, filter_mode,
+ spin_lock(&brmctx->br->multicast_lock);
+ pg = __br_multicast_add_group(brmctx, pmctx, group, src, filter_mode,
igmpv2_mldv1, false);
/* NULL is considered valid for host joined groups */
err = PTR_ERR_OR_ZERO(pg);
- spin_unlock(&br->multicast_lock);
+ spin_unlock(&brmctx->br->multicast_lock);
return err;
}
-static int br_ip4_multicast_add_group(struct net_bridge *br,
- struct net_bridge_port *port,
+static int br_ip4_multicast_add_group(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
__be32 group,
__u16 vid,
const unsigned char *src,
@@ -1328,13 +1435,13 @@ static int br_ip4_multicast_add_group(struct net_bridge *br,
br_group.vid = vid;
filter_mode = igmpv2 ? MCAST_EXCLUDE : MCAST_INCLUDE;
- return br_multicast_add_group(br, port, &br_group, src, filter_mode,
- igmpv2);
+ return br_multicast_add_group(brmctx, pmctx, &br_group, src,
+ filter_mode, igmpv2);
}
#if IS_ENABLED(CONFIG_IPV6)
-static int br_ip6_multicast_add_group(struct net_bridge *br,
- struct net_bridge_port *port,
+static int br_ip6_multicast_add_group(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
const struct in6_addr *group,
__u16 vid,
const unsigned char *src,
@@ -1352,8 +1459,8 @@ static int br_ip6_multicast_add_group(struct net_bridge *br,
br_group.vid = vid;
filter_mode = mldv1 ? MCAST_EXCLUDE : MCAST_INCLUDE;
- return br_multicast_add_group(br, port, &br_group, src, filter_mode,
- mldv1);
+ return br_multicast_add_group(brmctx, pmctx, &br_group, src,
+ filter_mode, mldv1);
}
#endif
@@ -1366,52 +1473,54 @@ static bool br_multicast_rport_del(struct hlist_node *rlist)
return true;
}
-static bool br_ip4_multicast_rport_del(struct net_bridge_port *p)
+static bool br_ip4_multicast_rport_del(struct net_bridge_mcast_port *pmctx)
{
- return br_multicast_rport_del(&p->ip4_rlist);
+ return br_multicast_rport_del(&pmctx->ip4_rlist);
}
-static bool br_ip6_multicast_rport_del(struct net_bridge_port *p)
+static bool br_ip6_multicast_rport_del(struct net_bridge_mcast_port *pmctx)
{
#if IS_ENABLED(CONFIG_IPV6)
- return br_multicast_rport_del(&p->ip6_rlist);
+ return br_multicast_rport_del(&pmctx->ip6_rlist);
#else
return false;
#endif
}
-static void br_multicast_router_expired(struct net_bridge_port *port,
+static void br_multicast_router_expired(struct net_bridge_mcast_port *pmctx,
struct timer_list *t,
struct hlist_node *rlist)
{
- struct net_bridge *br = port->br;
+ struct net_bridge *br = pmctx->port->br;
bool del;
spin_lock(&br->multicast_lock);
- if (port->multicast_router == MDB_RTR_TYPE_DISABLED ||
- port->multicast_router == MDB_RTR_TYPE_PERM ||
+ if (pmctx->multicast_router == MDB_RTR_TYPE_DISABLED ||
+ pmctx->multicast_router == MDB_RTR_TYPE_PERM ||
timer_pending(t))
goto out;
del = br_multicast_rport_del(rlist);
- br_multicast_rport_del_notify(port, del);
+ br_multicast_rport_del_notify(pmctx, del);
out:
spin_unlock(&br->multicast_lock);
}
static void br_ip4_multicast_router_expired(struct timer_list *t)
{
- struct net_bridge_port *port = from_timer(port, t, ip4_mc_router_timer);
+ struct net_bridge_mcast_port *pmctx = from_timer(pmctx, t,
+ ip4_mc_router_timer);
- br_multicast_router_expired(port, t, &port->ip4_rlist);
+ br_multicast_router_expired(pmctx, t, &pmctx->ip4_rlist);
}
#if IS_ENABLED(CONFIG_IPV6)
static void br_ip6_multicast_router_expired(struct timer_list *t)
{
- struct net_bridge_port *port = from_timer(port, t, ip6_mc_router_timer);
+ struct net_bridge_mcast_port *pmctx = from_timer(pmctx, t,
+ ip6_mc_router_timer);
- br_multicast_router_expired(port, t, &port->ip6_rlist);
+ br_multicast_router_expired(pmctx, t, &pmctx->ip6_rlist);
}
#endif
@@ -1428,80 +1537,86 @@ static void br_mc_router_state_change(struct net_bridge *p,
switchdev_port_attr_set(p->dev, &attr, NULL);
}
-static void br_multicast_local_router_expired(struct net_bridge *br,
+static void br_multicast_local_router_expired(struct net_bridge_mcast *brmctx,
struct timer_list *timer)
{
- spin_lock(&br->multicast_lock);
- if (br->multicast_router == MDB_RTR_TYPE_DISABLED ||
- br->multicast_router == MDB_RTR_TYPE_PERM ||
- br_ip4_multicast_is_router(br) ||
- br_ip6_multicast_is_router(br))
+ spin_lock(&brmctx->br->multicast_lock);
+ if (brmctx->multicast_router == MDB_RTR_TYPE_DISABLED ||
+ brmctx->multicast_router == MDB_RTR_TYPE_PERM ||
+ br_ip4_multicast_is_router(brmctx) ||
+ br_ip6_multicast_is_router(brmctx))
goto out;
- br_mc_router_state_change(br, false);
+ br_mc_router_state_change(brmctx->br, false);
out:
- spin_unlock(&br->multicast_lock);
+ spin_unlock(&brmctx->br->multicast_lock);
}
static void br_ip4_multicast_local_router_expired(struct timer_list *t)
{
- struct net_bridge *br = from_timer(br, t, ip4_mc_router_timer);
+ struct net_bridge_mcast *brmctx = from_timer(brmctx, t,
+ ip4_mc_router_timer);
- br_multicast_local_router_expired(br, t);
+ br_multicast_local_router_expired(brmctx, t);
}
#if IS_ENABLED(CONFIG_IPV6)
static void br_ip6_multicast_local_router_expired(struct timer_list *t)
{
- struct net_bridge *br = from_timer(br, t, ip6_mc_router_timer);
+ struct net_bridge_mcast *brmctx = from_timer(brmctx, t,
+ ip6_mc_router_timer);
- br_multicast_local_router_expired(br, t);
+ br_multicast_local_router_expired(brmctx, t);
}
#endif
-static void br_multicast_querier_expired(struct net_bridge *br,
+static void br_multicast_querier_expired(struct net_bridge_mcast *brmctx,
struct bridge_mcast_own_query *query)
{
- spin_lock(&br->multicast_lock);
- if (!netif_running(br->dev) || !br_opt_get(br, BROPT_MULTICAST_ENABLED))
+ spin_lock(&brmctx->br->multicast_lock);
+ if (!netif_running(brmctx->br->dev) ||
+ br_multicast_ctx_vlan_global_disabled(brmctx) ||
+ !br_opt_get(brmctx->br, BROPT_MULTICAST_ENABLED))
goto out;
- br_multicast_start_querier(br, query);
+ br_multicast_start_querier(brmctx, query);
out:
- spin_unlock(&br->multicast_lock);
+ spin_unlock(&brmctx->br->multicast_lock);
}
static void br_ip4_multicast_querier_expired(struct timer_list *t)
{
- struct net_bridge *br = from_timer(br, t, ip4_other_query.timer);
+ struct net_bridge_mcast *brmctx = from_timer(brmctx, t,
+ ip4_other_query.timer);
- br_multicast_querier_expired(br, &br->ip4_own_query);
+ br_multicast_querier_expired(brmctx, &brmctx->ip4_own_query);
}
#if IS_ENABLED(CONFIG_IPV6)
static void br_ip6_multicast_querier_expired(struct timer_list *t)
{
- struct net_bridge *br = from_timer(br, t, ip6_other_query.timer);
+ struct net_bridge_mcast *brmctx = from_timer(brmctx, t,
+ ip6_other_query.timer);
- br_multicast_querier_expired(br, &br->ip6_own_query);
+ br_multicast_querier_expired(brmctx, &brmctx->ip6_own_query);
}
#endif
-static void br_multicast_select_own_querier(struct net_bridge *br,
+static void br_multicast_select_own_querier(struct net_bridge_mcast *brmctx,
struct br_ip *ip,
struct sk_buff *skb)
{
if (ip->proto == htons(ETH_P_IP))
- br->ip4_querier.addr.src.ip4 = ip_hdr(skb)->saddr;
+ brmctx->ip4_querier.addr.src.ip4 = ip_hdr(skb)->saddr;
#if IS_ENABLED(CONFIG_IPV6)
else
- br->ip6_querier.addr.src.ip6 = ipv6_hdr(skb)->saddr;
+ brmctx->ip6_querier.addr.src.ip6 = ipv6_hdr(skb)->saddr;
#endif
}
-static void __br_multicast_send_query(struct net_bridge *br,
- struct net_bridge_port *port,
+static void __br_multicast_send_query(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
struct net_bridge_port_group *pg,
struct br_ip *ip_dst,
struct br_ip *group,
@@ -1513,19 +1628,22 @@ static void __br_multicast_send_query(struct net_bridge *br,
struct sk_buff *skb;
u8 igmp_type;
+ if (!br_multicast_ctx_should_use(brmctx, pmctx))
+ return;
+
again_under_lmqt:
- skb = br_multicast_alloc_query(br, pg, ip_dst, group, with_srcs,
- over_lmqt, sflag, &igmp_type,
+ skb = br_multicast_alloc_query(brmctx, pmctx, pg, ip_dst, group,
+ with_srcs, over_lmqt, sflag, &igmp_type,
need_rexmit);
if (!skb)
return;
- if (port) {
- skb->dev = port->dev;
- br_multicast_count(br, port, skb, igmp_type,
+ if (pmctx) {
+ skb->dev = pmctx->port->dev;
+ br_multicast_count(brmctx->br, pmctx->port, skb, igmp_type,
BR_MCAST_DIR_TX);
NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT,
- dev_net(port->dev), NULL, skb, NULL, skb->dev,
+ dev_net(pmctx->port->dev), NULL, skb, NULL, skb->dev,
br_dev_queue_push_xmit);
if (over_lmqt && with_srcs && sflag) {
@@ -1533,35 +1651,35 @@ again_under_lmqt:
goto again_under_lmqt;
}
} else {
- br_multicast_select_own_querier(br, group, skb);
- br_multicast_count(br, port, skb, igmp_type,
+ br_multicast_select_own_querier(brmctx, group, skb);
+ br_multicast_count(brmctx->br, NULL, skb, igmp_type,
BR_MCAST_DIR_RX);
netif_rx(skb);
}
}
-static void br_multicast_send_query(struct net_bridge *br,
- struct net_bridge_port *port,
+static void br_multicast_send_query(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
struct bridge_mcast_own_query *own_query)
{
struct bridge_mcast_other_query *other_query = NULL;
struct br_ip br_group;
unsigned long time;
- if (!netif_running(br->dev) ||
- !br_opt_get(br, BROPT_MULTICAST_ENABLED) ||
- !br_opt_get(br, BROPT_MULTICAST_QUERIER))
+ if (!br_multicast_ctx_should_use(brmctx, pmctx) ||
+ !br_opt_get(brmctx->br, BROPT_MULTICAST_ENABLED) ||
+ !br_opt_get(brmctx->br, BROPT_MULTICAST_QUERIER))
return;
memset(&br_group.dst, 0, sizeof(br_group.dst));
- if (port ? (own_query == &port->ip4_own_query) :
- (own_query == &br->ip4_own_query)) {
- other_query = &br->ip4_other_query;
+ if (pmctx ? (own_query == &pmctx->ip4_own_query) :
+ (own_query == &brmctx->ip4_own_query)) {
+ other_query = &brmctx->ip4_other_query;
br_group.proto = htons(ETH_P_IP);
#if IS_ENABLED(CONFIG_IPV6)
} else {
- other_query = &br->ip6_other_query;
+ other_query = &brmctx->ip6_other_query;
br_group.proto = htons(ETH_P_IPV6);
#endif
}
@@ -1569,31 +1687,32 @@ static void br_multicast_send_query(struct net_bridge *br,
if (!other_query || timer_pending(&other_query->timer))
return;
- __br_multicast_send_query(br, port, NULL, NULL, &br_group, false, 0,
- NULL);
+ __br_multicast_send_query(brmctx, pmctx, NULL, NULL, &br_group, false,
+ 0, NULL);
time = jiffies;
- time += own_query->startup_sent < br->multicast_startup_query_count ?
- br->multicast_startup_query_interval :
- br->multicast_query_interval;
+ time += own_query->startup_sent < brmctx->multicast_startup_query_count ?
+ brmctx->multicast_startup_query_interval :
+ brmctx->multicast_query_interval;
mod_timer(&own_query->timer, time);
}
static void
-br_multicast_port_query_expired(struct net_bridge_port *port,
+br_multicast_port_query_expired(struct net_bridge_mcast_port *pmctx,
struct bridge_mcast_own_query *query)
{
- struct net_bridge *br = port->br;
+ struct net_bridge *br = pmctx->port->br;
+ struct net_bridge_mcast *brmctx;
spin_lock(&br->multicast_lock);
- if (port->state == BR_STATE_DISABLED ||
- port->state == BR_STATE_BLOCKING)
+ if (br_multicast_port_ctx_state_stopped(pmctx))
goto out;
- if (query->startup_sent < br->multicast_startup_query_count)
+ brmctx = br_multicast_port_ctx_get_global(pmctx);
+ if (query->startup_sent < brmctx->multicast_startup_query_count)
query->startup_sent++;
- br_multicast_send_query(port->br, port, query);
+ br_multicast_send_query(brmctx, pmctx, query);
out:
spin_unlock(&br->multicast_lock);
@@ -1601,17 +1720,19 @@ out:
static void br_ip4_multicast_port_query_expired(struct timer_list *t)
{
- struct net_bridge_port *port = from_timer(port, t, ip4_own_query.timer);
+ struct net_bridge_mcast_port *pmctx = from_timer(pmctx, t,
+ ip4_own_query.timer);
- br_multicast_port_query_expired(port, &port->ip4_own_query);
+ br_multicast_port_query_expired(pmctx, &pmctx->ip4_own_query);
}
#if IS_ENABLED(CONFIG_IPV6)
static void br_ip6_multicast_port_query_expired(struct timer_list *t)
{
- struct net_bridge_port *port = from_timer(port, t, ip6_own_query.timer);
+ struct net_bridge_mcast_port *pmctx = from_timer(pmctx, t,
+ ip6_own_query.timer);
- br_multicast_port_query_expired(port, &port->ip6_own_query);
+ br_multicast_port_query_expired(pmctx, &pmctx->ip6_own_query);
}
#endif
@@ -1620,6 +1741,8 @@ static void br_multicast_port_group_rexmit(struct timer_list *t)
struct net_bridge_port_group *pg = from_timer(pg, t, rexmit_timer);
struct bridge_mcast_other_query *other_query = NULL;
struct net_bridge *br = pg->key.port->br;
+ struct net_bridge_mcast_port *pmctx;
+ struct net_bridge_mcast *brmctx;
bool need_rexmit = false;
spin_lock(&br->multicast_lock);
@@ -1628,11 +1751,15 @@ static void br_multicast_port_group_rexmit(struct timer_list *t)
!br_opt_get(br, BROPT_MULTICAST_QUERIER))
goto out;
+ pmctx = br_multicast_pg_to_port_ctx(pg);
+ if (!pmctx)
+ goto out;
+ brmctx = br_multicast_port_ctx_get_global(pmctx);
if (pg->key.addr.proto == htons(ETH_P_IP))
- other_query = &br->ip4_other_query;
+ other_query = &brmctx->ip4_other_query;
#if IS_ENABLED(CONFIG_IPV6)
else
- other_query = &br->ip6_other_query;
+ other_query = &brmctx->ip6_other_query;
#endif
if (!other_query || timer_pending(&other_query->timer))
@@ -1640,15 +1767,15 @@ static void br_multicast_port_group_rexmit(struct timer_list *t)
if (pg->grp_query_rexmit_cnt) {
pg->grp_query_rexmit_cnt--;
- __br_multicast_send_query(br, pg->key.port, pg, &pg->key.addr,
+ __br_multicast_send_query(brmctx, pmctx, pg, &pg->key.addr,
&pg->key.addr, false, 1, NULL);
}
- __br_multicast_send_query(br, pg->key.port, pg, &pg->key.addr,
+ __br_multicast_send_query(brmctx, pmctx, pg, &pg->key.addr,
&pg->key.addr, true, 0, &need_rexmit);
if (pg->grp_query_rexmit_cnt || need_rexmit)
mod_timer(&pg->rexmit_timer, jiffies +
- br->multicast_last_member_interval);
+ brmctx->multicast_last_member_interval);
out:
spin_unlock(&br->multicast_lock);
}
@@ -1666,23 +1793,40 @@ static int br_mc_disabled_update(struct net_device *dev, bool value,
return switchdev_port_attr_set(dev, &attr, extack);
}
-int br_multicast_add_port(struct net_bridge_port *port)
+void br_multicast_port_ctx_init(struct net_bridge_port *port,
+ struct net_bridge_vlan *vlan,
+ struct net_bridge_mcast_port *pmctx)
{
- int err;
-
- port->multicast_router = MDB_RTR_TYPE_TEMP_QUERY;
- port->multicast_eht_hosts_limit = BR_MCAST_DEFAULT_EHT_HOSTS_LIMIT;
-
- timer_setup(&port->ip4_mc_router_timer,
+ pmctx->port = port;
+ pmctx->vlan = vlan;
+ pmctx->multicast_router = MDB_RTR_TYPE_TEMP_QUERY;
+ timer_setup(&pmctx->ip4_mc_router_timer,
br_ip4_multicast_router_expired, 0);
- timer_setup(&port->ip4_own_query.timer,
+ timer_setup(&pmctx->ip4_own_query.timer,
br_ip4_multicast_port_query_expired, 0);
#if IS_ENABLED(CONFIG_IPV6)
- timer_setup(&port->ip6_mc_router_timer,
+ timer_setup(&pmctx->ip6_mc_router_timer,
br_ip6_multicast_router_expired, 0);
- timer_setup(&port->ip6_own_query.timer,
+ timer_setup(&pmctx->ip6_own_query.timer,
br_ip6_multicast_port_query_expired, 0);
#endif
+}
+
+void br_multicast_port_ctx_deinit(struct net_bridge_mcast_port *pmctx)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ del_timer_sync(&pmctx->ip6_mc_router_timer);
+#endif
+ del_timer_sync(&pmctx->ip4_mc_router_timer);
+}
+
+int br_multicast_add_port(struct net_bridge_port *port)
+{
+ int err;
+
+ port->multicast_eht_hosts_limit = BR_MCAST_DEFAULT_EHT_HOSTS_LIMIT;
+ br_multicast_port_ctx_init(port, NULL, &port->multicast_ctx);
+
err = br_mc_disabled_update(port->dev,
br_opt_get(port->br,
BROPT_MULTICAST_ENABLED),
@@ -1711,10 +1855,7 @@ void br_multicast_del_port(struct net_bridge_port *port)
hlist_move_list(&br->mcast_gc_list, &deleted_head);
spin_unlock_bh(&br->multicast_lock);
br_multicast_gc(&deleted_head);
- del_timer_sync(&port->ip4_mc_router_timer);
-#if IS_ENABLED(CONFIG_IPV6)
- del_timer_sync(&port->ip6_mc_router_timer);
-#endif
+ br_multicast_port_ctx_deinit(&port->multicast_ctx);
free_percpu(port->mcast_stats);
}
@@ -1727,20 +1868,23 @@ static void br_multicast_enable(struct bridge_mcast_own_query *query)
mod_timer(&query->timer, jiffies);
}
-static void __br_multicast_enable_port(struct net_bridge_port *port)
+static void __br_multicast_enable_port_ctx(struct net_bridge_mcast_port *pmctx)
{
- struct net_bridge *br = port->br;
+ struct net_bridge *br = pmctx->port->br;
+ struct net_bridge_mcast *brmctx;
- if (!br_opt_get(br, BROPT_MULTICAST_ENABLED) || !netif_running(br->dev))
+ brmctx = br_multicast_port_ctx_get_global(pmctx);
+ if (!br_opt_get(br, BROPT_MULTICAST_ENABLED) ||
+ !netif_running(br->dev))
return;
- br_multicast_enable(&port->ip4_own_query);
+ br_multicast_enable(&pmctx->ip4_own_query);
#if IS_ENABLED(CONFIG_IPV6)
- br_multicast_enable(&port->ip6_own_query);
+ br_multicast_enable(&pmctx->ip6_own_query);
#endif
- if (port->multicast_router == MDB_RTR_TYPE_PERM) {
- br_ip4_multicast_add_router(br, port);
- br_ip6_multicast_add_router(br, port);
+ if (pmctx->multicast_router == MDB_RTR_TYPE_PERM) {
+ br_ip4_multicast_add_router(brmctx, pmctx);
+ br_ip6_multicast_add_router(brmctx, pmctx);
}
}
@@ -1748,33 +1892,39 @@ void br_multicast_enable_port(struct net_bridge_port *port)
{
struct net_bridge *br = port->br;
- spin_lock(&br->multicast_lock);
- __br_multicast_enable_port(port);
- spin_unlock(&br->multicast_lock);
+ spin_lock_bh(&br->multicast_lock);
+ __br_multicast_enable_port_ctx(&port->multicast_ctx);
+ spin_unlock_bh(&br->multicast_lock);
}
-void br_multicast_disable_port(struct net_bridge_port *port)
+static void __br_multicast_disable_port_ctx(struct net_bridge_mcast_port *pmctx)
{
- struct net_bridge *br = port->br;
struct net_bridge_port_group *pg;
struct hlist_node *n;
bool del = false;
- spin_lock(&br->multicast_lock);
- hlist_for_each_entry_safe(pg, n, &port->mglist, mglist)
- if (!(pg->flags & MDB_PG_FLAGS_PERMANENT))
- br_multicast_find_del_pg(br, pg);
+ hlist_for_each_entry_safe(pg, n, &pmctx->port->mglist, mglist)
+ if (!(pg->flags & MDB_PG_FLAGS_PERMANENT) &&
+ (!br_multicast_port_ctx_is_vlan(pmctx) ||
+ pg->key.addr.vid == pmctx->vlan->vid))
+ br_multicast_find_del_pg(pmctx->port->br, pg);
- del |= br_ip4_multicast_rport_del(port);
- del_timer(&port->ip4_mc_router_timer);
- del_timer(&port->ip4_own_query.timer);
- del |= br_ip6_multicast_rport_del(port);
+ del |= br_ip4_multicast_rport_del(pmctx);
+ del_timer(&pmctx->ip4_mc_router_timer);
+ del_timer(&pmctx->ip4_own_query.timer);
+ del |= br_ip6_multicast_rport_del(pmctx);
#if IS_ENABLED(CONFIG_IPV6)
- del_timer(&port->ip6_mc_router_timer);
- del_timer(&port->ip6_own_query.timer);
+ del_timer(&pmctx->ip6_mc_router_timer);
+ del_timer(&pmctx->ip6_own_query.timer);
#endif
- br_multicast_rport_del_notify(port, del);
- spin_unlock(&br->multicast_lock);
+ br_multicast_rport_del_notify(pmctx, del);
+}
+
+void br_multicast_disable_port(struct net_bridge_port *port)
+{
+ spin_lock_bh(&port->br->multicast_lock);
+ __br_multicast_disable_port_ctx(&port->multicast_ctx);
+ spin_unlock_bh(&port->br->multicast_lock);
}
static int __grp_src_delete_marked(struct net_bridge_port_group *pg)
@@ -1799,31 +1949,33 @@ static void __grp_src_mod_timer(struct net_bridge_group_src *src,
br_multicast_fwd_src_handle(src);
}
-static void __grp_src_query_marked_and_rexmit(struct net_bridge_port_group *pg)
+static void __grp_src_query_marked_and_rexmit(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
+ struct net_bridge_port_group *pg)
{
struct bridge_mcast_other_query *other_query = NULL;
- struct net_bridge *br = pg->key.port->br;
- u32 lmqc = br->multicast_last_member_count;
+ u32 lmqc = brmctx->multicast_last_member_count;
unsigned long lmqt, lmi, now = jiffies;
struct net_bridge_group_src *ent;
- if (!netif_running(br->dev) ||
- !br_opt_get(br, BROPT_MULTICAST_ENABLED))
+ if (!netif_running(brmctx->br->dev) ||
+ !br_opt_get(brmctx->br, BROPT_MULTICAST_ENABLED))
return;
if (pg->key.addr.proto == htons(ETH_P_IP))
- other_query = &br->ip4_other_query;
+ other_query = &brmctx->ip4_other_query;
#if IS_ENABLED(CONFIG_IPV6)
else
- other_query = &br->ip6_other_query;
+ other_query = &brmctx->ip6_other_query;
#endif
- lmqt = now + br_multicast_lmqt(br);
+ lmqt = now + br_multicast_lmqt(brmctx);
hlist_for_each_entry(ent, &pg->src_list, node) {
if (ent->flags & BR_SGRP_F_SEND) {
ent->flags &= ~BR_SGRP_F_SEND;
if (ent->timer.expires > lmqt) {
- if (br_opt_get(br, BROPT_MULTICAST_QUERIER) &&
+ if (br_opt_get(brmctx->br,
+ BROPT_MULTICAST_QUERIER) &&
other_query &&
!timer_pending(&other_query->timer))
ent->src_query_rexmit_cnt = lmqc;
@@ -1832,41 +1984,42 @@ static void __grp_src_query_marked_and_rexmit(struct net_bridge_port_group *pg)
}
}
- if (!br_opt_get(br, BROPT_MULTICAST_QUERIER) ||
+ if (!br_opt_get(brmctx->br, BROPT_MULTICAST_QUERIER) ||
!other_query || timer_pending(&other_query->timer))
return;
- __br_multicast_send_query(br, pg->key.port, pg, &pg->key.addr,
+ __br_multicast_send_query(brmctx, pmctx, pg, &pg->key.addr,
&pg->key.addr, true, 1, NULL);
- lmi = now + br->multicast_last_member_interval;
+ lmi = now + brmctx->multicast_last_member_interval;
if (!timer_pending(&pg->rexmit_timer) ||
time_after(pg->rexmit_timer.expires, lmi))
mod_timer(&pg->rexmit_timer, lmi);
}
-static void __grp_send_query_and_rexmit(struct net_bridge_port_group *pg)
+static void __grp_send_query_and_rexmit(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
+ struct net_bridge_port_group *pg)
{
struct bridge_mcast_other_query *other_query = NULL;
- struct net_bridge *br = pg->key.port->br;
unsigned long now = jiffies, lmi;
- if (!netif_running(br->dev) ||
- !br_opt_get(br, BROPT_MULTICAST_ENABLED))
+ if (!netif_running(brmctx->br->dev) ||
+ !br_opt_get(brmctx->br, BROPT_MULTICAST_ENABLED))
return;
if (pg->key.addr.proto == htons(ETH_P_IP))
- other_query = &br->ip4_other_query;
+ other_query = &brmctx->ip4_other_query;
#if IS_ENABLED(CONFIG_IPV6)
else
- other_query = &br->ip6_other_query;
+ other_query = &brmctx->ip6_other_query;
#endif
- if (br_opt_get(br, BROPT_MULTICAST_QUERIER) &&
+ if (br_opt_get(brmctx->br, BROPT_MULTICAST_QUERIER) &&
other_query && !timer_pending(&other_query->timer)) {
- lmi = now + br->multicast_last_member_interval;
- pg->grp_query_rexmit_cnt = br->multicast_last_member_count - 1;
- __br_multicast_send_query(br, pg->key.port, pg, &pg->key.addr,
+ lmi = now + brmctx->multicast_last_member_interval;
+ pg->grp_query_rexmit_cnt = brmctx->multicast_last_member_count - 1;
+ __br_multicast_send_query(brmctx, pmctx, pg, &pg->key.addr,
&pg->key.addr, false, 0, NULL);
if (!timer_pending(&pg->rexmit_timer) ||
time_after(pg->rexmit_timer.expires, lmi))
@@ -1875,8 +2028,8 @@ static void __grp_send_query_and_rexmit(struct net_bridge_port_group *pg)
if (pg->filter_mode == MCAST_EXCLUDE &&
(!timer_pending(&pg->timer) ||
- time_after(pg->timer.expires, now + br_multicast_lmqt(br))))
- mod_timer(&pg->timer, now + br_multicast_lmqt(br));
+ time_after(pg->timer.expires, now + br_multicast_lmqt(brmctx))))
+ mod_timer(&pg->timer, now + br_multicast_lmqt(brmctx));
}
/* State Msg type New state Actions
@@ -1884,11 +2037,11 @@ static void __grp_send_query_and_rexmit(struct net_bridge_port_group *pg)
* INCLUDE (A) ALLOW (B) INCLUDE (A+B) (B)=GMI
* EXCLUDE (X,Y) ALLOW (A) EXCLUDE (X+A,Y-A) (A)=GMI
*/
-static bool br_multicast_isinc_allow(struct net_bridge_port_group *pg, void *h_addr,
+static bool br_multicast_isinc_allow(const struct net_bridge_mcast *brmctx,
+ struct net_bridge_port_group *pg, void *h_addr,
void *srcs, u32 nsrcs, size_t addr_size,
int grec_type)
{
- struct net_bridge *br = pg->key.port->br;
struct net_bridge_group_src *ent;
unsigned long now = jiffies;
bool changed = false;
@@ -1907,10 +2060,11 @@ static bool br_multicast_isinc_allow(struct net_bridge_port_group *pg, void *h_a
}
if (ent)
- __grp_src_mod_timer(ent, now + br_multicast_gmi(br));
+ __grp_src_mod_timer(ent, now + br_multicast_gmi(brmctx));
}
- if (br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type))
+ if (br_multicast_eht_handle(brmctx, pg, h_addr, srcs, nsrcs, addr_size,
+ grec_type))
changed = true;
return changed;
@@ -1921,7 +2075,8 @@ static bool br_multicast_isinc_allow(struct net_bridge_port_group *pg, void *h_a
* Delete (A-B)
* Group Timer=GMI
*/
-static void __grp_src_isexc_incl(struct net_bridge_port_group *pg, void *h_addr,
+static void __grp_src_isexc_incl(const struct net_bridge_mcast *brmctx,
+ struct net_bridge_port_group *pg, void *h_addr,
void *srcs, u32 nsrcs, size_t addr_size,
int grec_type)
{
@@ -1945,7 +2100,8 @@ static void __grp_src_isexc_incl(struct net_bridge_port_group *pg, void *h_addr,
br_multicast_fwd_src_handle(ent);
}
- br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type);
+ br_multicast_eht_handle(brmctx, pg, h_addr, srcs, nsrcs, addr_size,
+ grec_type);
__grp_src_delete_marked(pg);
}
@@ -1956,11 +2112,11 @@ static void __grp_src_isexc_incl(struct net_bridge_port_group *pg, void *h_addr,
* Delete (Y-A)
* Group Timer=GMI
*/
-static bool __grp_src_isexc_excl(struct net_bridge_port_group *pg, void *h_addr,
+static bool __grp_src_isexc_excl(const struct net_bridge_mcast *brmctx,
+ struct net_bridge_port_group *pg, void *h_addr,
void *srcs, u32 nsrcs, size_t addr_size,
int grec_type)
{
- struct net_bridge *br = pg->key.port->br;
struct net_bridge_group_src *ent;
unsigned long now = jiffies;
bool changed = false;
@@ -1981,13 +2137,14 @@ static bool __grp_src_isexc_excl(struct net_bridge_port_group *pg, void *h_addr,
ent = br_multicast_new_group_src(pg, &src_ip);
if (ent) {
__grp_src_mod_timer(ent,
- now + br_multicast_gmi(br));
+ now + br_multicast_gmi(brmctx));
changed = true;
}
}
}
- if (br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type))
+ if (br_multicast_eht_handle(brmctx, pg, h_addr, srcs, nsrcs, addr_size,
+ grec_type))
changed = true;
if (__grp_src_delete_marked(pg))
@@ -1996,28 +2153,28 @@ static bool __grp_src_isexc_excl(struct net_bridge_port_group *pg, void *h_addr,
return changed;
}
-static bool br_multicast_isexc(struct net_bridge_port_group *pg, void *h_addr,
+static bool br_multicast_isexc(const struct net_bridge_mcast *brmctx,
+ struct net_bridge_port_group *pg, void *h_addr,
void *srcs, u32 nsrcs, size_t addr_size,
int grec_type)
{
- struct net_bridge *br = pg->key.port->br;
bool changed = false;
switch (pg->filter_mode) {
case MCAST_INCLUDE:
- __grp_src_isexc_incl(pg, h_addr, srcs, nsrcs, addr_size,
+ __grp_src_isexc_incl(brmctx, pg, h_addr, srcs, nsrcs, addr_size,
grec_type);
br_multicast_star_g_handle_mode(pg, MCAST_EXCLUDE);
changed = true;
break;
case MCAST_EXCLUDE:
- changed = __grp_src_isexc_excl(pg, h_addr, srcs, nsrcs, addr_size,
- grec_type);
+ changed = __grp_src_isexc_excl(brmctx, pg, h_addr, srcs, nsrcs,
+ addr_size, grec_type);
break;
}
pg->filter_mode = MCAST_EXCLUDE;
- mod_timer(&pg->timer, jiffies + br_multicast_gmi(br));
+ mod_timer(&pg->timer, jiffies + br_multicast_gmi(brmctx));
return changed;
}
@@ -2026,11 +2183,12 @@ static bool br_multicast_isexc(struct net_bridge_port_group *pg, void *h_addr,
* INCLUDE (A) TO_IN (B) INCLUDE (A+B) (B)=GMI
* Send Q(G,A-B)
*/
-static bool __grp_src_toin_incl(struct net_bridge_port_group *pg, void *h_addr,
+static bool __grp_src_toin_incl(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
+ struct net_bridge_port_group *pg, void *h_addr,
void *srcs, u32 nsrcs, size_t addr_size,
int grec_type)
{
- struct net_bridge *br = pg->key.port->br;
u32 src_idx, to_send = pg->src_ents;
struct net_bridge_group_src *ent;
unsigned long now = jiffies;
@@ -2054,14 +2212,15 @@ static bool __grp_src_toin_incl(struct net_bridge_port_group *pg, void *h_addr,
changed = true;
}
if (ent)
- __grp_src_mod_timer(ent, now + br_multicast_gmi(br));
+ __grp_src_mod_timer(ent, now + br_multicast_gmi(brmctx));
}
- if (br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type))
+ if (br_multicast_eht_handle(brmctx, pg, h_addr, srcs, nsrcs, addr_size,
+ grec_type))
changed = true;
if (to_send)
- __grp_src_query_marked_and_rexmit(pg);
+ __grp_src_query_marked_and_rexmit(brmctx, pmctx, pg);
return changed;
}
@@ -2071,11 +2230,12 @@ static bool __grp_src_toin_incl(struct net_bridge_port_group *pg, void *h_addr,
* Send Q(G,X-A)
* Send Q(G)
*/
-static bool __grp_src_toin_excl(struct net_bridge_port_group *pg, void *h_addr,
+static bool __grp_src_toin_excl(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
+ struct net_bridge_port_group *pg, void *h_addr,
void *srcs, u32 nsrcs, size_t addr_size,
int grec_type)
{
- struct net_bridge *br = pg->key.port->br;
u32 src_idx, to_send = pg->src_ents;
struct net_bridge_group_src *ent;
unsigned long now = jiffies;
@@ -2102,21 +2262,24 @@ static bool __grp_src_toin_excl(struct net_bridge_port_group *pg, void *h_addr,
changed = true;
}
if (ent)
- __grp_src_mod_timer(ent, now + br_multicast_gmi(br));
+ __grp_src_mod_timer(ent, now + br_multicast_gmi(brmctx));
}
- if (br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type))
+ if (br_multicast_eht_handle(brmctx, pg, h_addr, srcs, nsrcs, addr_size,
+ grec_type))
changed = true;
if (to_send)
- __grp_src_query_marked_and_rexmit(pg);
+ __grp_src_query_marked_and_rexmit(brmctx, pmctx, pg);
- __grp_send_query_and_rexmit(pg);
+ __grp_send_query_and_rexmit(brmctx, pmctx, pg);
return changed;
}
-static bool br_multicast_toin(struct net_bridge_port_group *pg, void *h_addr,
+static bool br_multicast_toin(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
+ struct net_bridge_port_group *pg, void *h_addr,
void *srcs, u32 nsrcs, size_t addr_size,
int grec_type)
{
@@ -2124,12 +2287,12 @@ static bool br_multicast_toin(struct net_bridge_port_group *pg, void *h_addr,
switch (pg->filter_mode) {
case MCAST_INCLUDE:
- changed = __grp_src_toin_incl(pg, h_addr, srcs, nsrcs, addr_size,
- grec_type);
+ changed = __grp_src_toin_incl(brmctx, pmctx, pg, h_addr, srcs,
+ nsrcs, addr_size, grec_type);
break;
case MCAST_EXCLUDE:
- changed = __grp_src_toin_excl(pg, h_addr, srcs, nsrcs, addr_size,
- grec_type);
+ changed = __grp_src_toin_excl(brmctx, pmctx, pg, h_addr, srcs,
+ nsrcs, addr_size, grec_type);
break;
}
@@ -2151,7 +2314,9 @@ static bool br_multicast_toin(struct net_bridge_port_group *pg, void *h_addr,
* Send Q(G,A*B)
* Group Timer=GMI
*/
-static void __grp_src_toex_incl(struct net_bridge_port_group *pg, void *h_addr,
+static void __grp_src_toex_incl(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
+ struct net_bridge_port_group *pg, void *h_addr,
void *srcs, u32 nsrcs, size_t addr_size,
int grec_type)
{
@@ -2178,11 +2343,12 @@ static void __grp_src_toex_incl(struct net_bridge_port_group *pg, void *h_addr,
br_multicast_fwd_src_handle(ent);
}
- br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type);
+ br_multicast_eht_handle(brmctx, pg, h_addr, srcs, nsrcs, addr_size,
+ grec_type);
__grp_src_delete_marked(pg);
if (to_send)
- __grp_src_query_marked_and_rexmit(pg);
+ __grp_src_query_marked_and_rexmit(brmctx, pmctx, pg);
}
/* State Msg type New state Actions
@@ -2192,7 +2358,9 @@ static void __grp_src_toex_incl(struct net_bridge_port_group *pg, void *h_addr,
* Send Q(G,A-Y)
* Group Timer=GMI
*/
-static bool __grp_src_toex_excl(struct net_bridge_port_group *pg, void *h_addr,
+static bool __grp_src_toex_excl(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
+ struct net_bridge_port_group *pg, void *h_addr,
void *srcs, u32 nsrcs, size_t addr_size,
int grec_type)
{
@@ -2224,39 +2392,41 @@ static bool __grp_src_toex_excl(struct net_bridge_port_group *pg, void *h_addr,
}
}
- if (br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type))
+ if (br_multicast_eht_handle(brmctx, pg, h_addr, srcs, nsrcs, addr_size,
+ grec_type))
changed = true;
if (__grp_src_delete_marked(pg))
changed = true;
if (to_send)
- __grp_src_query_marked_and_rexmit(pg);
+ __grp_src_query_marked_and_rexmit(brmctx, pmctx, pg);
return changed;
}
-static bool br_multicast_toex(struct net_bridge_port_group *pg, void *h_addr,
+static bool br_multicast_toex(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
+ struct net_bridge_port_group *pg, void *h_addr,
void *srcs, u32 nsrcs, size_t addr_size,
int grec_type)
{
- struct net_bridge *br = pg->key.port->br;
bool changed = false;
switch (pg->filter_mode) {
case MCAST_INCLUDE:
- __grp_src_toex_incl(pg, h_addr, srcs, nsrcs, addr_size,
- grec_type);
+ __grp_src_toex_incl(brmctx, pmctx, pg, h_addr, srcs, nsrcs,
+ addr_size, grec_type);
br_multicast_star_g_handle_mode(pg, MCAST_EXCLUDE);
changed = true;
break;
case MCAST_EXCLUDE:
- changed = __grp_src_toex_excl(pg, h_addr, srcs, nsrcs, addr_size,
- grec_type);
+ changed = __grp_src_toex_excl(brmctx, pmctx, pg, h_addr, srcs,
+ nsrcs, addr_size, grec_type);
break;
}
pg->filter_mode = MCAST_EXCLUDE;
- mod_timer(&pg->timer, jiffies + br_multicast_gmi(br));
+ mod_timer(&pg->timer, jiffies + br_multicast_gmi(brmctx));
return changed;
}
@@ -2264,7 +2434,9 @@ static bool br_multicast_toex(struct net_bridge_port_group *pg, void *h_addr,
/* State Msg type New state Actions
* INCLUDE (A) BLOCK (B) INCLUDE (A) Send Q(G,A*B)
*/
-static bool __grp_src_block_incl(struct net_bridge_port_group *pg, void *h_addr,
+static bool __grp_src_block_incl(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
+ struct net_bridge_port_group *pg, void *h_addr,
void *srcs, u32 nsrcs, size_t addr_size, int grec_type)
{
struct net_bridge_group_src *ent;
@@ -2286,11 +2458,12 @@ static bool __grp_src_block_incl(struct net_bridge_port_group *pg, void *h_addr,
}
}
- if (br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type))
+ if (br_multicast_eht_handle(brmctx, pg, h_addr, srcs, nsrcs, addr_size,
+ grec_type))
changed = true;
if (to_send)
- __grp_src_query_marked_and_rexmit(pg);
+ __grp_src_query_marked_and_rexmit(brmctx, pmctx, pg);
return changed;
}
@@ -2299,7 +2472,9 @@ static bool __grp_src_block_incl(struct net_bridge_port_group *pg, void *h_addr,
* EXCLUDE (X,Y) BLOCK (A) EXCLUDE (X+(A-Y),Y) (A-X-Y)=Group Timer
* Send Q(G,A-Y)
*/
-static bool __grp_src_block_excl(struct net_bridge_port_group *pg, void *h_addr,
+static bool __grp_src_block_excl(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
+ struct net_bridge_port_group *pg, void *h_addr,
void *srcs, u32 nsrcs, size_t addr_size, int grec_type)
{
struct net_bridge_group_src *ent;
@@ -2328,28 +2503,31 @@ static bool __grp_src_block_excl(struct net_bridge_port_group *pg, void *h_addr,
}
}
- if (br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type))
+ if (br_multicast_eht_handle(brmctx, pg, h_addr, srcs, nsrcs, addr_size,
+ grec_type))
changed = true;
if (to_send)
- __grp_src_query_marked_and_rexmit(pg);
+ __grp_src_query_marked_and_rexmit(brmctx, pmctx, pg);
return changed;
}
-static bool br_multicast_block(struct net_bridge_port_group *pg, void *h_addr,
+static bool br_multicast_block(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
+ struct net_bridge_port_group *pg, void *h_addr,
void *srcs, u32 nsrcs, size_t addr_size, int grec_type)
{
bool changed = false;
switch (pg->filter_mode) {
case MCAST_INCLUDE:
- changed = __grp_src_block_incl(pg, h_addr, srcs, nsrcs, addr_size,
- grec_type);
+ changed = __grp_src_block_incl(brmctx, pmctx, pg, h_addr, srcs,
+ nsrcs, addr_size, grec_type);
break;
case MCAST_EXCLUDE:
- changed = __grp_src_block_excl(pg, h_addr, srcs, nsrcs, addr_size,
- grec_type);
+ changed = __grp_src_block_excl(brmctx, pmctx, pg, h_addr, srcs,
+ nsrcs, addr_size, grec_type);
break;
}
@@ -2384,12 +2562,12 @@ br_multicast_find_port(struct net_bridge_mdb_entry *mp,
return NULL;
}
-static int br_ip4_multicast_igmp3_report(struct net_bridge *br,
- struct net_bridge_port *port,
+static int br_ip4_multicast_igmp3_report(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
struct sk_buff *skb,
u16 vid)
{
- bool igmpv2 = br->multicast_igmp_version == 2;
+ bool igmpv2 = brmctx->multicast_igmp_version == 2;
struct net_bridge_mdb_entry *mdst;
struct net_bridge_port_group *pg;
const unsigned char *src;
@@ -2436,25 +2614,29 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br,
if (nsrcs == 0 &&
(type == IGMPV3_CHANGE_TO_INCLUDE ||
type == IGMPV3_MODE_IS_INCLUDE)) {
- if (!port || igmpv2) {
- br_ip4_multicast_leave_group(br, port, group, vid, src);
+ if (!pmctx || igmpv2) {
+ br_ip4_multicast_leave_group(brmctx, pmctx,
+ group, vid, src);
continue;
}
} else {
- err = br_ip4_multicast_add_group(br, port, group, vid,
- src, igmpv2);
+ err = br_ip4_multicast_add_group(brmctx, pmctx, group,
+ vid, src, igmpv2);
if (err)
break;
}
- if (!port || igmpv2)
+ if (!pmctx || igmpv2)
continue;
- spin_lock_bh(&br->multicast_lock);
- mdst = br_mdb_ip4_get(br, group, vid);
+ spin_lock_bh(&brmctx->br->multicast_lock);
+ if (!br_multicast_ctx_should_use(brmctx, pmctx))
+ goto unlock_continue;
+
+ mdst = br_mdb_ip4_get(brmctx->br, group, vid);
if (!mdst)
goto unlock_continue;
- pg = br_multicast_find_port(mdst, port, src);
+ pg = br_multicast_find_port(mdst, pmctx->port, src);
if (!pg || (pg->flags & MDB_PG_FLAGS_PERMANENT))
goto unlock_continue;
/* reload grec and host addr */
@@ -2462,46 +2644,52 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br,
h_addr = &ip_hdr(skb)->saddr;
switch (type) {
case IGMPV3_ALLOW_NEW_SOURCES:
- changed = br_multicast_isinc_allow(pg, h_addr, grec->grec_src,
+ changed = br_multicast_isinc_allow(brmctx, pg, h_addr,
+ grec->grec_src,
nsrcs, sizeof(__be32), type);
break;
case IGMPV3_MODE_IS_INCLUDE:
- changed = br_multicast_isinc_allow(pg, h_addr, grec->grec_src,
+ changed = br_multicast_isinc_allow(brmctx, pg, h_addr,
+ grec->grec_src,
nsrcs, sizeof(__be32), type);
break;
case IGMPV3_MODE_IS_EXCLUDE:
- changed = br_multicast_isexc(pg, h_addr, grec->grec_src,
+ changed = br_multicast_isexc(brmctx, pg, h_addr,
+ grec->grec_src,
nsrcs, sizeof(__be32), type);
break;
case IGMPV3_CHANGE_TO_INCLUDE:
- changed = br_multicast_toin(pg, h_addr, grec->grec_src,
+ changed = br_multicast_toin(brmctx, pmctx, pg, h_addr,
+ grec->grec_src,
nsrcs, sizeof(__be32), type);
break;
case IGMPV3_CHANGE_TO_EXCLUDE:
- changed = br_multicast_toex(pg, h_addr, grec->grec_src,
+ changed = br_multicast_toex(brmctx, pmctx, pg, h_addr,
+ grec->grec_src,
nsrcs, sizeof(__be32), type);
break;
case IGMPV3_BLOCK_OLD_SOURCES:
- changed = br_multicast_block(pg, h_addr, grec->grec_src,
+ changed = br_multicast_block(brmctx, pmctx, pg, h_addr,
+ grec->grec_src,
nsrcs, sizeof(__be32), type);
break;
}
if (changed)
- br_mdb_notify(br->dev, mdst, pg, RTM_NEWMDB);
+ br_mdb_notify(brmctx->br->dev, mdst, pg, RTM_NEWMDB);
unlock_continue:
- spin_unlock_bh(&br->multicast_lock);
+ spin_unlock_bh(&brmctx->br->multicast_lock);
}
return err;
}
#if IS_ENABLED(CONFIG_IPV6)
-static int br_ip6_multicast_mld2_report(struct net_bridge *br,
- struct net_bridge_port *port,
+static int br_ip6_multicast_mld2_report(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
struct sk_buff *skb,
u16 vid)
{
- bool mldv1 = br->multicast_mld_version == 1;
+ bool mldv1 = brmctx->multicast_mld_version == 1;
struct net_bridge_mdb_entry *mdst;
struct net_bridge_port_group *pg;
unsigned int nsrcs_offset;
@@ -2562,137 +2750,144 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br,
if ((grec->grec_type == MLD2_CHANGE_TO_INCLUDE ||
grec->grec_type == MLD2_MODE_IS_INCLUDE) &&
nsrcs == 0) {
- if (!port || mldv1) {
- br_ip6_multicast_leave_group(br, port,
+ if (!pmctx || mldv1) {
+ br_ip6_multicast_leave_group(brmctx, pmctx,
&grec->grec_mca,
vid, src);
continue;
}
} else {
- err = br_ip6_multicast_add_group(br, port,
+ err = br_ip6_multicast_add_group(brmctx, pmctx,
&grec->grec_mca, vid,
src, mldv1);
if (err)
break;
}
- if (!port || mldv1)
+ if (!pmctx || mldv1)
continue;
- spin_lock_bh(&br->multicast_lock);
- mdst = br_mdb_ip6_get(br, &grec->grec_mca, vid);
+ spin_lock_bh(&brmctx->br->multicast_lock);
+ if (!br_multicast_ctx_should_use(brmctx, pmctx))
+ goto unlock_continue;
+
+ mdst = br_mdb_ip6_get(brmctx->br, &grec->grec_mca, vid);
if (!mdst)
goto unlock_continue;
- pg = br_multicast_find_port(mdst, port, src);
+ pg = br_multicast_find_port(mdst, pmctx->port, src);
if (!pg || (pg->flags & MDB_PG_FLAGS_PERMANENT))
goto unlock_continue;
h_addr = &ipv6_hdr(skb)->saddr;
switch (grec->grec_type) {
case MLD2_ALLOW_NEW_SOURCES:
- changed = br_multicast_isinc_allow(pg, h_addr,
+ changed = br_multicast_isinc_allow(brmctx, pg, h_addr,
grec->grec_src, nsrcs,
sizeof(struct in6_addr),
grec->grec_type);
break;
case MLD2_MODE_IS_INCLUDE:
- changed = br_multicast_isinc_allow(pg, h_addr,
+ changed = br_multicast_isinc_allow(brmctx, pg, h_addr,
grec->grec_src, nsrcs,
sizeof(struct in6_addr),
grec->grec_type);
break;
case MLD2_MODE_IS_EXCLUDE:
- changed = br_multicast_isexc(pg, h_addr,
+ changed = br_multicast_isexc(brmctx, pg, h_addr,
grec->grec_src, nsrcs,
sizeof(struct in6_addr),
grec->grec_type);
break;
case MLD2_CHANGE_TO_INCLUDE:
- changed = br_multicast_toin(pg, h_addr,
+ changed = br_multicast_toin(brmctx, pmctx, pg, h_addr,
grec->grec_src, nsrcs,
sizeof(struct in6_addr),
grec->grec_type);
break;
case MLD2_CHANGE_TO_EXCLUDE:
- changed = br_multicast_toex(pg, h_addr,
+ changed = br_multicast_toex(brmctx, pmctx, pg, h_addr,
grec->grec_src, nsrcs,
sizeof(struct in6_addr),
grec->grec_type);
break;
case MLD2_BLOCK_OLD_SOURCES:
- changed = br_multicast_block(pg, h_addr,
+ changed = br_multicast_block(brmctx, pmctx, pg, h_addr,
grec->grec_src, nsrcs,
sizeof(struct in6_addr),
grec->grec_type);
break;
}
if (changed)
- br_mdb_notify(br->dev, mdst, pg, RTM_NEWMDB);
+ br_mdb_notify(brmctx->br->dev, mdst, pg, RTM_NEWMDB);
unlock_continue:
- spin_unlock_bh(&br->multicast_lock);
+ spin_unlock_bh(&brmctx->br->multicast_lock);
}
return err;
}
#endif
-static bool br_ip4_multicast_select_querier(struct net_bridge *br,
- struct net_bridge_port *port,
+static bool br_ip4_multicast_select_querier(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
__be32 saddr)
{
- if (!timer_pending(&br->ip4_own_query.timer) &&
- !timer_pending(&br->ip4_other_query.timer))
+ struct net_bridge_port *port = pmctx ? pmctx->port : NULL;
+
+ if (!timer_pending(&brmctx->ip4_own_query.timer) &&
+ !timer_pending(&brmctx->ip4_other_query.timer))
goto update;
- if (!br->ip4_querier.addr.src.ip4)
+ if (!brmctx->ip4_querier.addr.src.ip4)
goto update;
- if (ntohl(saddr) <= ntohl(br->ip4_querier.addr.src.ip4))
+ if (ntohl(saddr) <= ntohl(brmctx->ip4_querier.addr.src.ip4))
goto update;
return false;
update:
- br->ip4_querier.addr.src.ip4 = saddr;
+ brmctx->ip4_querier.addr.src.ip4 = saddr;
/* update protected by general multicast_lock by caller */
- rcu_assign_pointer(br->ip4_querier.port, port);
+ rcu_assign_pointer(brmctx->ip4_querier.port, port);
return true;
}
#if IS_ENABLED(CONFIG_IPV6)
-static bool br_ip6_multicast_select_querier(struct net_bridge *br,
- struct net_bridge_port *port,
+static bool br_ip6_multicast_select_querier(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
struct in6_addr *saddr)
{
- if (!timer_pending(&br->ip6_own_query.timer) &&
- !timer_pending(&br->ip6_other_query.timer))
+ struct net_bridge_port *port = pmctx ? pmctx->port : NULL;
+
+ if (!timer_pending(&brmctx->ip6_own_query.timer) &&
+ !timer_pending(&brmctx->ip6_other_query.timer))
goto update;
- if (ipv6_addr_cmp(saddr, &br->ip6_querier.addr.src.ip6) <= 0)
+ if (ipv6_addr_cmp(saddr, &brmctx->ip6_querier.addr.src.ip6) <= 0)
goto update;
return false;
update:
- br->ip6_querier.addr.src.ip6 = *saddr;
+ brmctx->ip6_querier.addr.src.ip6 = *saddr;
/* update protected by general multicast_lock by caller */
- rcu_assign_pointer(br->ip6_querier.port, port);
+ rcu_assign_pointer(brmctx->ip6_querier.port, port);
return true;
}
#endif
static void
-br_multicast_update_query_timer(struct net_bridge *br,
+br_multicast_update_query_timer(struct net_bridge_mcast *brmctx,
struct bridge_mcast_other_query *query,
unsigned long max_delay)
{
if (!timer_pending(&query->timer))
query->delay_time = jiffies + max_delay;
- mod_timer(&query->timer, jiffies + br->multicast_querier_interval);
+ mod_timer(&query->timer, jiffies + brmctx->multicast_querier_interval);
}
static void br_port_mc_router_state_change(struct net_bridge_port *p,
@@ -2709,19 +2904,26 @@ static void br_port_mc_router_state_change(struct net_bridge_port *p,
}
static struct net_bridge_port *
-br_multicast_rport_from_node(struct net_bridge *br,
+br_multicast_rport_from_node(struct net_bridge_mcast *brmctx,
struct hlist_head *mc_router_list,
struct hlist_node *rlist)
{
+ struct net_bridge_mcast_port *pmctx;
+
#if IS_ENABLED(CONFIG_IPV6)
- if (mc_router_list == &br->ip6_mc_router_list)
- return hlist_entry(rlist, struct net_bridge_port, ip6_rlist);
+ if (mc_router_list == &brmctx->ip6_mc_router_list)
+ pmctx = hlist_entry(rlist, struct net_bridge_mcast_port,
+ ip6_rlist);
+ else
#endif
- return hlist_entry(rlist, struct net_bridge_port, ip4_rlist);
+ pmctx = hlist_entry(rlist, struct net_bridge_mcast_port,
+ ip4_rlist);
+
+ return pmctx->port;
}
static struct hlist_node *
-br_multicast_get_rport_slot(struct net_bridge *br,
+br_multicast_get_rport_slot(struct net_bridge_mcast *brmctx,
struct net_bridge_port *port,
struct hlist_head *mc_router_list)
@@ -2731,7 +2933,7 @@ br_multicast_get_rport_slot(struct net_bridge *br,
struct hlist_node *rlist;
hlist_for_each(rlist, mc_router_list) {
- p = br_multicast_rport_from_node(br, mc_router_list, rlist);
+ p = br_multicast_rport_from_node(brmctx, mc_router_list, rlist);
if ((unsigned long)port >= (unsigned long)p)
break;
@@ -2742,14 +2944,14 @@ br_multicast_get_rport_slot(struct net_bridge *br,
return slot;
}
-static bool br_multicast_no_router_otherpf(struct net_bridge_port *port,
+static bool br_multicast_no_router_otherpf(struct net_bridge_mcast_port *pmctx,
struct hlist_node *rnode)
{
#if IS_ENABLED(CONFIG_IPV6)
- if (rnode != &port->ip6_rlist)
- return hlist_unhashed(&port->ip6_rlist);
+ if (rnode != &pmctx->ip6_rlist)
+ return hlist_unhashed(&pmctx->ip6_rlist);
else
- return hlist_unhashed(&port->ip4_rlist);
+ return hlist_unhashed(&pmctx->ip4_rlist);
#else
return true;
#endif
@@ -2759,8 +2961,8 @@ static bool br_multicast_no_router_otherpf(struct net_bridge_port *port,
* list is maintained ordered by pointer value
* and locked by br->multicast_lock and RCU
*/
-static void br_multicast_add_router(struct net_bridge *br,
- struct net_bridge_port *port,
+static void br_multicast_add_router(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
struct hlist_node *rlist,
struct hlist_head *mc_router_list)
{
@@ -2769,7 +2971,7 @@ static void br_multicast_add_router(struct net_bridge *br,
if (!hlist_unhashed(rlist))
return;
- slot = br_multicast_get_rport_slot(br, port, mc_router_list);
+ slot = br_multicast_get_rport_slot(brmctx, pmctx->port, mc_router_list);
if (slot)
hlist_add_behind_rcu(rlist, slot);
@@ -2780,9 +2982,9 @@ static void br_multicast_add_router(struct net_bridge *br,
* switched from no IPv4/IPv6 multicast router to a new
* IPv4 or IPv6 multicast router.
*/
- if (br_multicast_no_router_otherpf(port, rlist)) {
- br_rtr_notify(br->dev, port, RTM_NEWMDB);
- br_port_mc_router_state_change(port, true);
+ if (br_multicast_no_router_otherpf(pmctx, rlist)) {
+ br_rtr_notify(pmctx->port->br->dev, pmctx, RTM_NEWMDB);
+ br_port_mc_router_state_change(pmctx->port, true);
}
}
@@ -2790,116 +2992,119 @@ static void br_multicast_add_router(struct net_bridge *br,
* list is maintained ordered by pointer value
* and locked by br->multicast_lock and RCU
*/
-static void br_ip4_multicast_add_router(struct net_bridge *br,
- struct net_bridge_port *port)
+static void br_ip4_multicast_add_router(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx)
{
- br_multicast_add_router(br, port, &port->ip4_rlist,
- &br->ip4_mc_router_list);
+ br_multicast_add_router(brmctx, pmctx, &pmctx->ip4_rlist,
+ &brmctx->ip4_mc_router_list);
}
/* Add port to router_list
* list is maintained ordered by pointer value
* and locked by br->multicast_lock and RCU
*/
-static void br_ip6_multicast_add_router(struct net_bridge *br,
- struct net_bridge_port *port)
+static void br_ip6_multicast_add_router(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx)
{
#if IS_ENABLED(CONFIG_IPV6)
- br_multicast_add_router(br, port, &port->ip6_rlist,
- &br->ip6_mc_router_list);
+ br_multicast_add_router(brmctx, pmctx, &pmctx->ip6_rlist,
+ &brmctx->ip6_mc_router_list);
#endif
}
-static void br_multicast_mark_router(struct net_bridge *br,
- struct net_bridge_port *port,
+static void br_multicast_mark_router(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
struct timer_list *timer,
struct hlist_node *rlist,
struct hlist_head *mc_router_list)
{
unsigned long now = jiffies;
- if (!port) {
- if (br->multicast_router == MDB_RTR_TYPE_TEMP_QUERY) {
- if (!br_ip4_multicast_is_router(br) &&
- !br_ip6_multicast_is_router(br))
- br_mc_router_state_change(br, true);
- mod_timer(timer, now + br->multicast_querier_interval);
+ if (!br_multicast_ctx_should_use(brmctx, pmctx))
+ return;
+
+ if (!pmctx) {
+ if (brmctx->multicast_router == MDB_RTR_TYPE_TEMP_QUERY) {
+ if (!br_ip4_multicast_is_router(brmctx) &&
+ !br_ip6_multicast_is_router(brmctx))
+ br_mc_router_state_change(brmctx->br, true);
+ mod_timer(timer, now + brmctx->multicast_querier_interval);
}
return;
}
- if (port->multicast_router == MDB_RTR_TYPE_DISABLED ||
- port->multicast_router == MDB_RTR_TYPE_PERM)
+ if (pmctx->multicast_router == MDB_RTR_TYPE_DISABLED ||
+ pmctx->multicast_router == MDB_RTR_TYPE_PERM)
return;
- br_multicast_add_router(br, port, rlist, mc_router_list);
- mod_timer(timer, now + br->multicast_querier_interval);
+ br_multicast_add_router(brmctx, pmctx, rlist, mc_router_list);
+ mod_timer(timer, now + brmctx->multicast_querier_interval);
}
-static void br_ip4_multicast_mark_router(struct net_bridge *br,
- struct net_bridge_port *port)
+static void br_ip4_multicast_mark_router(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx)
{
- struct timer_list *timer = &br->ip4_mc_router_timer;
+ struct timer_list *timer = &brmctx->ip4_mc_router_timer;
struct hlist_node *rlist = NULL;
- if (port) {
- timer = &port->ip4_mc_router_timer;
- rlist = &port->ip4_rlist;
+ if (pmctx) {
+ timer = &pmctx->ip4_mc_router_timer;
+ rlist = &pmctx->ip4_rlist;
}
- br_multicast_mark_router(br, port, timer, rlist,
- &br->ip4_mc_router_list);
+ br_multicast_mark_router(brmctx, pmctx, timer, rlist,
+ &brmctx->ip4_mc_router_list);
}
-static void br_ip6_multicast_mark_router(struct net_bridge *br,
- struct net_bridge_port *port)
+static void br_ip6_multicast_mark_router(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx)
{
#if IS_ENABLED(CONFIG_IPV6)
- struct timer_list *timer = &br->ip6_mc_router_timer;
+ struct timer_list *timer = &brmctx->ip6_mc_router_timer;
struct hlist_node *rlist = NULL;
- if (port) {
- timer = &port->ip6_mc_router_timer;
- rlist = &port->ip6_rlist;
+ if (pmctx) {
+ timer = &pmctx->ip6_mc_router_timer;
+ rlist = &pmctx->ip6_rlist;
}
- br_multicast_mark_router(br, port, timer, rlist,
- &br->ip6_mc_router_list);
+ br_multicast_mark_router(brmctx, pmctx, timer, rlist,
+ &brmctx->ip6_mc_router_list);
#endif
}
static void
-br_ip4_multicast_query_received(struct net_bridge *br,
- struct net_bridge_port *port,
+br_ip4_multicast_query_received(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
struct bridge_mcast_other_query *query,
struct br_ip *saddr,
unsigned long max_delay)
{
- if (!br_ip4_multicast_select_querier(br, port, saddr->src.ip4))
+ if (!br_ip4_multicast_select_querier(brmctx, pmctx, saddr->src.ip4))
return;
- br_multicast_update_query_timer(br, query, max_delay);
- br_ip4_multicast_mark_router(br, port);
+ br_multicast_update_query_timer(brmctx, query, max_delay);
+ br_ip4_multicast_mark_router(brmctx, pmctx);
}
#if IS_ENABLED(CONFIG_IPV6)
static void
-br_ip6_multicast_query_received(struct net_bridge *br,
- struct net_bridge_port *port,
+br_ip6_multicast_query_received(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
struct bridge_mcast_other_query *query,
struct br_ip *saddr,
unsigned long max_delay)
{
- if (!br_ip6_multicast_select_querier(br, port, &saddr->src.ip6))
+ if (!br_ip6_multicast_select_querier(brmctx, pmctx, &saddr->src.ip6))
return;
- br_multicast_update_query_timer(br, query, max_delay);
- br_ip6_multicast_mark_router(br, port);
+ br_multicast_update_query_timer(brmctx, query, max_delay);
+ br_ip6_multicast_mark_router(brmctx, pmctx);
}
#endif
-static void br_ip4_multicast_query(struct net_bridge *br,
- struct net_bridge_port *port,
+static void br_ip4_multicast_query(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
struct sk_buff *skb,
u16 vid)
{
@@ -2915,9 +3120,8 @@ static void br_ip4_multicast_query(struct net_bridge *br,
unsigned long now = jiffies;
__be32 group;
- spin_lock(&br->multicast_lock);
- if (!netif_running(br->dev) ||
- (port && port->state == BR_STATE_DISABLED))
+ spin_lock(&brmctx->br->multicast_lock);
+ if (!br_multicast_ctx_should_use(brmctx, pmctx))
goto out;
group = ih->group;
@@ -2932,7 +3136,8 @@ static void br_ip4_multicast_query(struct net_bridge *br,
} else if (transport_len >= sizeof(*ih3)) {
ih3 = igmpv3_query_hdr(skb);
if (ih3->nsrcs ||
- (br->multicast_igmp_version == 3 && group && ih3->suppress))
+ (brmctx->multicast_igmp_version == 3 && group &&
+ ih3->suppress))
goto out;
max_delay = ih3->code ?
@@ -2945,16 +3150,17 @@ static void br_ip4_multicast_query(struct net_bridge *br,
saddr.proto = htons(ETH_P_IP);
saddr.src.ip4 = iph->saddr;
- br_ip4_multicast_query_received(br, port, &br->ip4_other_query,
+ br_ip4_multicast_query_received(brmctx, pmctx,
+ &brmctx->ip4_other_query,
&saddr, max_delay);
goto out;
}
- mp = br_mdb_ip4_get(br, group, vid);
+ mp = br_mdb_ip4_get(brmctx->br, group, vid);
if (!mp)
goto out;
- max_delay *= br->multicast_last_member_count;
+ max_delay *= brmctx->multicast_last_member_count;
if (mp->host_joined &&
(timer_pending(&mp->timer) ?
@@ -2963,23 +3169,23 @@ static void br_ip4_multicast_query(struct net_bridge *br,
mod_timer(&mp->timer, now + max_delay);
for (pp = &mp->ports;
- (p = mlock_dereference(*pp, br)) != NULL;
+ (p = mlock_dereference(*pp, brmctx->br)) != NULL;
pp = &p->next) {
if (timer_pending(&p->timer) ?
time_after(p->timer.expires, now + max_delay) :
try_to_del_timer_sync(&p->timer) >= 0 &&
- (br->multicast_igmp_version == 2 ||
+ (brmctx->multicast_igmp_version == 2 ||
p->filter_mode == MCAST_EXCLUDE))
mod_timer(&p->timer, now + max_delay);
}
out:
- spin_unlock(&br->multicast_lock);
+ spin_unlock(&brmctx->br->multicast_lock);
}
#if IS_ENABLED(CONFIG_IPV6)
-static int br_ip6_multicast_query(struct net_bridge *br,
- struct net_bridge_port *port,
+static int br_ip6_multicast_query(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
struct sk_buff *skb,
u16 vid)
{
@@ -2997,9 +3203,8 @@ static int br_ip6_multicast_query(struct net_bridge *br,
bool is_general_query;
int err = 0;
- spin_lock(&br->multicast_lock);
- if (!netif_running(br->dev) ||
- (port && port->state == BR_STATE_DISABLED))
+ spin_lock(&brmctx->br->multicast_lock);
+ if (!br_multicast_ctx_should_use(brmctx, pmctx))
goto out;
if (transport_len == sizeof(*mld)) {
@@ -3019,7 +3224,7 @@ static int br_ip6_multicast_query(struct net_bridge *br,
mld2q = (struct mld2_query *)icmp6_hdr(skb);
if (!mld2q->mld2q_nsrcs)
group = &mld2q->mld2q_mca;
- if (br->multicast_mld_version == 2 &&
+ if (brmctx->multicast_mld_version == 2 &&
!ipv6_addr_any(&mld2q->mld2q_mca) &&
mld2q->mld2q_suppress)
goto out;
@@ -3033,18 +3238,19 @@ static int br_ip6_multicast_query(struct net_bridge *br,
saddr.proto = htons(ETH_P_IPV6);
saddr.src.ip6 = ipv6_hdr(skb)->saddr;
- br_ip6_multicast_query_received(br, port, &br->ip6_other_query,
+ br_ip6_multicast_query_received(brmctx, pmctx,
+ &brmctx->ip6_other_query,
&saddr, max_delay);
goto out;
} else if (!group) {
goto out;
}
- mp = br_mdb_ip6_get(br, group, vid);
+ mp = br_mdb_ip6_get(brmctx->br, group, vid);
if (!mp)
goto out;
- max_delay *= br->multicast_last_member_count;
+ max_delay *= brmctx->multicast_last_member_count;
if (mp->host_joined &&
(timer_pending(&mp->timer) ?
time_after(mp->timer.expires, now + max_delay) :
@@ -3052,25 +3258,25 @@ static int br_ip6_multicast_query(struct net_bridge *br,
mod_timer(&mp->timer, now + max_delay);
for (pp = &mp->ports;
- (p = mlock_dereference(*pp, br)) != NULL;
+ (p = mlock_dereference(*pp, brmctx->br)) != NULL;
pp = &p->next) {
if (timer_pending(&p->timer) ?
time_after(p->timer.expires, now + max_delay) :
try_to_del_timer_sync(&p->timer) >= 0 &&
- (br->multicast_mld_version == 1 ||
+ (brmctx->multicast_mld_version == 1 ||
p->filter_mode == MCAST_EXCLUDE))
mod_timer(&p->timer, now + max_delay);
}
out:
- spin_unlock(&br->multicast_lock);
+ spin_unlock(&brmctx->br->multicast_lock);
return err;
}
#endif
static void
-br_multicast_leave_group(struct net_bridge *br,
- struct net_bridge_port *port,
+br_multicast_leave_group(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
struct br_ip *group,
struct bridge_mcast_other_query *other_query,
struct bridge_mcast_own_query *own_query,
@@ -3081,22 +3287,21 @@ br_multicast_leave_group(struct net_bridge *br,
unsigned long now;
unsigned long time;
- spin_lock(&br->multicast_lock);
- if (!netif_running(br->dev) ||
- (port && port->state == BR_STATE_DISABLED))
+ spin_lock(&brmctx->br->multicast_lock);
+ if (!br_multicast_ctx_should_use(brmctx, pmctx))
goto out;
- mp = br_mdb_ip_get(br, group);
+ mp = br_mdb_ip_get(brmctx->br, group);
if (!mp)
goto out;
- if (port && (port->flags & BR_MULTICAST_FAST_LEAVE)) {
+ if (pmctx && (pmctx->port->flags & BR_MULTICAST_FAST_LEAVE)) {
struct net_bridge_port_group __rcu **pp;
for (pp = &mp->ports;
- (p = mlock_dereference(*pp, br)) != NULL;
+ (p = mlock_dereference(*pp, brmctx->br)) != NULL;
pp = &p->next) {
- if (!br_port_group_equal(p, port, src))
+ if (!br_port_group_equal(p, pmctx->port, src))
continue;
if (p->flags & MDB_PG_FLAGS_PERMANENT)
@@ -3111,19 +3316,19 @@ br_multicast_leave_group(struct net_bridge *br,
if (timer_pending(&other_query->timer))
goto out;
- if (br_opt_get(br, BROPT_MULTICAST_QUERIER)) {
- __br_multicast_send_query(br, port, NULL, NULL, &mp->addr,
+ if (br_opt_get(brmctx->br, BROPT_MULTICAST_QUERIER)) {
+ __br_multicast_send_query(brmctx, pmctx, NULL, NULL, &mp->addr,
false, 0, NULL);
- time = jiffies + br->multicast_last_member_count *
- br->multicast_last_member_interval;
+ time = jiffies + brmctx->multicast_last_member_count *
+ brmctx->multicast_last_member_interval;
mod_timer(&own_query->timer, time);
- for (p = mlock_dereference(mp->ports, br);
- p != NULL;
- p = mlock_dereference(p->next, br)) {
- if (!br_port_group_equal(p, port, src))
+ for (p = mlock_dereference(mp->ports, brmctx->br);
+ p != NULL && pmctx != NULL;
+ p = mlock_dereference(p->next, brmctx->br)) {
+ if (!br_port_group_equal(p, pmctx->port, src))
continue;
if (!hlist_unhashed(&p->mglist) &&
@@ -3138,10 +3343,10 @@ br_multicast_leave_group(struct net_bridge *br,
}
now = jiffies;
- time = now + br->multicast_last_member_count *
- br->multicast_last_member_interval;
+ time = now + brmctx->multicast_last_member_count *
+ brmctx->multicast_last_member_interval;
- if (!port) {
+ if (!pmctx) {
if (mp->host_joined &&
(timer_pending(&mp->timer) ?
time_after(mp->timer.expires, time) :
@@ -3152,10 +3357,10 @@ br_multicast_leave_group(struct net_bridge *br,
goto out;
}
- for (p = mlock_dereference(mp->ports, br);
+ for (p = mlock_dereference(mp->ports, brmctx->br);
p != NULL;
- p = mlock_dereference(p->next, br)) {
- if (p->key.port != port)
+ p = mlock_dereference(p->next, brmctx->br)) {
+ if (p->key.port != pmctx->port)
continue;
if (!hlist_unhashed(&p->mglist) &&
@@ -3168,11 +3373,11 @@ br_multicast_leave_group(struct net_bridge *br,
break;
}
out:
- spin_unlock(&br->multicast_lock);
+ spin_unlock(&brmctx->br->multicast_lock);
}
-static void br_ip4_multicast_leave_group(struct net_bridge *br,
- struct net_bridge_port *port,
+static void br_ip4_multicast_leave_group(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
__be32 group,
__u16 vid,
const unsigned char *src)
@@ -3183,20 +3388,21 @@ static void br_ip4_multicast_leave_group(struct net_bridge *br,
if (ipv4_is_local_multicast(group))
return;
- own_query = port ? &port->ip4_own_query : &br->ip4_own_query;
+ own_query = pmctx ? &pmctx->ip4_own_query : &brmctx->ip4_own_query;
memset(&br_group, 0, sizeof(br_group));
br_group.dst.ip4 = group;
br_group.proto = htons(ETH_P_IP);
br_group.vid = vid;
- br_multicast_leave_group(br, port, &br_group, &br->ip4_other_query,
+ br_multicast_leave_group(brmctx, pmctx, &br_group,
+ &brmctx->ip4_other_query,
own_query, src);
}
#if IS_ENABLED(CONFIG_IPV6)
-static void br_ip6_multicast_leave_group(struct net_bridge *br,
- struct net_bridge_port *port,
+static void br_ip6_multicast_leave_group(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
const struct in6_addr *group,
__u16 vid,
const unsigned char *src)
@@ -3207,14 +3413,15 @@ static void br_ip6_multicast_leave_group(struct net_bridge *br,
if (ipv6_addr_is_ll_all_nodes(group))
return;
- own_query = port ? &port->ip6_own_query : &br->ip6_own_query;
+ own_query = pmctx ? &pmctx->ip6_own_query : &brmctx->ip6_own_query;
memset(&br_group, 0, sizeof(br_group));
br_group.dst.ip6 = *group;
br_group.proto = htons(ETH_P_IPV6);
br_group.vid = vid;
- br_multicast_leave_group(br, port, &br_group, &br->ip6_other_query,
+ br_multicast_leave_group(brmctx, pmctx, &br_group,
+ &brmctx->ip6_other_query,
own_query, src);
}
#endif
@@ -3252,8 +3459,8 @@ static void br_multicast_err_count(const struct net_bridge *br,
u64_stats_update_end(&pstats->syncp);
}
-static void br_multicast_pim(struct net_bridge *br,
- struct net_bridge_port *port,
+static void br_multicast_pim(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
const struct sk_buff *skb)
{
unsigned int offset = skb_transport_offset(skb);
@@ -3264,31 +3471,32 @@ static void br_multicast_pim(struct net_bridge *br,
pim_hdr_type(pimhdr) != PIM_TYPE_HELLO)
return;
- spin_lock(&br->multicast_lock);
- br_ip4_multicast_mark_router(br, port);
- spin_unlock(&br->multicast_lock);
+ spin_lock(&brmctx->br->multicast_lock);
+ br_ip4_multicast_mark_router(brmctx, pmctx);
+ spin_unlock(&brmctx->br->multicast_lock);
}
-static int br_ip4_multicast_mrd_rcv(struct net_bridge *br,
- struct net_bridge_port *port,
+static int br_ip4_multicast_mrd_rcv(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
struct sk_buff *skb)
{
if (ip_hdr(skb)->protocol != IPPROTO_IGMP ||
igmp_hdr(skb)->type != IGMP_MRDISC_ADV)
return -ENOMSG;
- spin_lock(&br->multicast_lock);
- br_ip4_multicast_mark_router(br, port);
- spin_unlock(&br->multicast_lock);
+ spin_lock(&brmctx->br->multicast_lock);
+ br_ip4_multicast_mark_router(brmctx, pmctx);
+ spin_unlock(&brmctx->br->multicast_lock);
return 0;
}
-static int br_multicast_ipv4_rcv(struct net_bridge *br,
- struct net_bridge_port *port,
+static int br_multicast_ipv4_rcv(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
struct sk_buff *skb,
u16 vid)
{
+ struct net_bridge_port *p = pmctx ? pmctx->port : NULL;
const unsigned char *src;
struct igmphdr *ih;
int err;
@@ -3300,14 +3508,14 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br,
BR_INPUT_SKB_CB(skb)->mrouters_only = 1;
} else if (pim_ipv4_all_pim_routers(ip_hdr(skb)->daddr)) {
if (ip_hdr(skb)->protocol == IPPROTO_PIM)
- br_multicast_pim(br, port, skb);
+ br_multicast_pim(brmctx, pmctx, skb);
} else if (ipv4_is_all_snoopers(ip_hdr(skb)->daddr)) {
- br_ip4_multicast_mrd_rcv(br, port, skb);
+ br_ip4_multicast_mrd_rcv(brmctx, pmctx, skb);
}
return 0;
} else if (err < 0) {
- br_multicast_err_count(br, port, skb->protocol);
+ br_multicast_err_count(brmctx->br, p, skb->protocol);
return err;
}
@@ -3319,44 +3527,45 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br,
case IGMP_HOST_MEMBERSHIP_REPORT:
case IGMPV2_HOST_MEMBERSHIP_REPORT:
BR_INPUT_SKB_CB(skb)->mrouters_only = 1;
- err = br_ip4_multicast_add_group(br, port, ih->group, vid, src,
- true);
+ err = br_ip4_multicast_add_group(brmctx, pmctx, ih->group, vid,
+ src, true);
break;
case IGMPV3_HOST_MEMBERSHIP_REPORT:
- err = br_ip4_multicast_igmp3_report(br, port, skb, vid);
+ err = br_ip4_multicast_igmp3_report(brmctx, pmctx, skb, vid);
break;
case IGMP_HOST_MEMBERSHIP_QUERY:
- br_ip4_multicast_query(br, port, skb, vid);
+ br_ip4_multicast_query(brmctx, pmctx, skb, vid);
break;
case IGMP_HOST_LEAVE_MESSAGE:
- br_ip4_multicast_leave_group(br, port, ih->group, vid, src);
+ br_ip4_multicast_leave_group(brmctx, pmctx, ih->group, vid, src);
break;
}
- br_multicast_count(br, port, skb, BR_INPUT_SKB_CB(skb)->igmp,
+ br_multicast_count(brmctx->br, p, skb, BR_INPUT_SKB_CB(skb)->igmp,
BR_MCAST_DIR_RX);
return err;
}
#if IS_ENABLED(CONFIG_IPV6)
-static void br_ip6_multicast_mrd_rcv(struct net_bridge *br,
- struct net_bridge_port *port,
+static void br_ip6_multicast_mrd_rcv(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
struct sk_buff *skb)
{
if (icmp6_hdr(skb)->icmp6_type != ICMPV6_MRDISC_ADV)
return;
- spin_lock(&br->multicast_lock);
- br_ip6_multicast_mark_router(br, port);
- spin_unlock(&br->multicast_lock);
+ spin_lock(&brmctx->br->multicast_lock);
+ br_ip6_multicast_mark_router(brmctx, pmctx);
+ spin_unlock(&brmctx->br->multicast_lock);
}
-static int br_multicast_ipv6_rcv(struct net_bridge *br,
- struct net_bridge_port *port,
+static int br_multicast_ipv6_rcv(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
struct sk_buff *skb,
u16 vid)
{
+ struct net_bridge_port *p = pmctx ? pmctx->port : NULL;
const unsigned char *src;
struct mld_msg *mld;
int err;
@@ -3368,11 +3577,11 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
BR_INPUT_SKB_CB(skb)->mrouters_only = 1;
if (err == -ENODATA &&
ipv6_addr_is_all_snoopers(&ipv6_hdr(skb)->daddr))
- br_ip6_multicast_mrd_rcv(br, port, skb);
+ br_ip6_multicast_mrd_rcv(brmctx, pmctx, skb);
return 0;
} else if (err < 0) {
- br_multicast_err_count(br, port, skb->protocol);
+ br_multicast_err_count(brmctx->br, p, skb->protocol);
return err;
}
@@ -3383,29 +3592,32 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
case ICMPV6_MGM_REPORT:
src = eth_hdr(skb)->h_source;
BR_INPUT_SKB_CB(skb)->mrouters_only = 1;
- err = br_ip6_multicast_add_group(br, port, &mld->mld_mca, vid,
- src, true);
+ err = br_ip6_multicast_add_group(brmctx, pmctx, &mld->mld_mca,
+ vid, src, true);
break;
case ICMPV6_MLD2_REPORT:
- err = br_ip6_multicast_mld2_report(br, port, skb, vid);
+ err = br_ip6_multicast_mld2_report(brmctx, pmctx, skb, vid);
break;
case ICMPV6_MGM_QUERY:
- err = br_ip6_multicast_query(br, port, skb, vid);
+ err = br_ip6_multicast_query(brmctx, pmctx, skb, vid);
break;
case ICMPV6_MGM_REDUCTION:
src = eth_hdr(skb)->h_source;
- br_ip6_multicast_leave_group(br, port, &mld->mld_mca, vid, src);
+ br_ip6_multicast_leave_group(brmctx, pmctx, &mld->mld_mca, vid,
+ src);
break;
}
- br_multicast_count(br, port, skb, BR_INPUT_SKB_CB(skb)->igmp,
+ br_multicast_count(brmctx->br, p, skb, BR_INPUT_SKB_CB(skb)->igmp,
BR_MCAST_DIR_RX);
return err;
}
#endif
-int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port,
+int br_multicast_rcv(struct net_bridge_mcast **brmctx,
+ struct net_bridge_mcast_port **pmctx,
+ struct net_bridge_vlan *vlan,
struct sk_buff *skb, u16 vid)
{
int ret = 0;
@@ -3413,16 +3625,36 @@ int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port,
BR_INPUT_SKB_CB(skb)->igmp = 0;
BR_INPUT_SKB_CB(skb)->mrouters_only = 0;
- if (!br_opt_get(br, BROPT_MULTICAST_ENABLED))
+ if (!br_opt_get((*brmctx)->br, BROPT_MULTICAST_ENABLED))
return 0;
+ if (br_opt_get((*brmctx)->br, BROPT_MCAST_VLAN_SNOOPING_ENABLED) && vlan) {
+ const struct net_bridge_vlan *masterv;
+
+ /* the vlan has the master flag set only when transmitting
+ * through the bridge device
+ */
+ if (br_vlan_is_master(vlan)) {
+ masterv = vlan;
+ *brmctx = &vlan->br_mcast_ctx;
+ *pmctx = NULL;
+ } else {
+ masterv = vlan->brvlan;
+ *brmctx = &vlan->brvlan->br_mcast_ctx;
+ *pmctx = &vlan->port_mcast_ctx;
+ }
+
+ if (!(masterv->priv_flags & BR_VLFLAG_GLOBAL_MCAST_ENABLED))
+ return 0;
+ }
+
switch (skb->protocol) {
case htons(ETH_P_IP):
- ret = br_multicast_ipv4_rcv(br, port, skb, vid);
+ ret = br_multicast_ipv4_rcv(*brmctx, *pmctx, skb, vid);
break;
#if IS_ENABLED(CONFIG_IPV6)
case htons(ETH_P_IPV6):
- ret = br_multicast_ipv6_rcv(br, port, skb, vid);
+ ret = br_multicast_ipv6_rcv(*brmctx, *pmctx, skb, vid);
break;
#endif
}
@@ -3430,32 +3662,40 @@ int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port,
return ret;
}
-static void br_multicast_query_expired(struct net_bridge *br,
+static void br_multicast_query_expired(struct net_bridge_mcast *brmctx,
struct bridge_mcast_own_query *query,
struct bridge_mcast_querier *querier)
{
- spin_lock(&br->multicast_lock);
- if (query->startup_sent < br->multicast_startup_query_count)
+ spin_lock(&brmctx->br->multicast_lock);
+ if (br_multicast_ctx_vlan_disabled(brmctx))
+ goto out;
+
+ if (query->startup_sent < brmctx->multicast_startup_query_count)
query->startup_sent++;
RCU_INIT_POINTER(querier->port, NULL);
- br_multicast_send_query(br, NULL, query);
- spin_unlock(&br->multicast_lock);
+ br_multicast_send_query(brmctx, NULL, query);
+out:
+ spin_unlock(&brmctx->br->multicast_lock);
}
static void br_ip4_multicast_query_expired(struct timer_list *t)
{
- struct net_bridge *br = from_timer(br, t, ip4_own_query.timer);
+ struct net_bridge_mcast *brmctx = from_timer(brmctx, t,
+ ip4_own_query.timer);
- br_multicast_query_expired(br, &br->ip4_own_query, &br->ip4_querier);
+ br_multicast_query_expired(brmctx, &brmctx->ip4_own_query,
+ &brmctx->ip4_querier);
}
#if IS_ENABLED(CONFIG_IPV6)
static void br_ip6_multicast_query_expired(struct timer_list *t)
{
- struct net_bridge *br = from_timer(br, t, ip6_own_query.timer);
+ struct net_bridge_mcast *brmctx = from_timer(brmctx, t,
+ ip6_own_query.timer);
- br_multicast_query_expired(br, &br->ip6_own_query, &br->ip6_querier);
+ br_multicast_query_expired(brmctx, &brmctx->ip6_own_query,
+ &brmctx->ip6_querier);
}
#endif
@@ -3472,47 +3712,63 @@ static void br_multicast_gc_work(struct work_struct *work)
br_multicast_gc(&deleted_head);
}
-void br_multicast_init(struct net_bridge *br)
+void br_multicast_ctx_init(struct net_bridge *br,
+ struct net_bridge_vlan *vlan,
+ struct net_bridge_mcast *brmctx)
{
- br->hash_max = BR_MULTICAST_DEFAULT_HASH_MAX;
-
- br->multicast_router = MDB_RTR_TYPE_TEMP_QUERY;
- br->multicast_last_member_count = 2;
- br->multicast_startup_query_count = 2;
+ brmctx->br = br;
+ brmctx->vlan = vlan;
+ brmctx->multicast_router = MDB_RTR_TYPE_TEMP_QUERY;
+ brmctx->multicast_last_member_count = 2;
+ brmctx->multicast_startup_query_count = 2;
- br->multicast_last_member_interval = HZ;
- br->multicast_query_response_interval = 10 * HZ;
- br->multicast_startup_query_interval = 125 * HZ / 4;
- br->multicast_query_interval = 125 * HZ;
- br->multicast_querier_interval = 255 * HZ;
- br->multicast_membership_interval = 260 * HZ;
+ brmctx->multicast_last_member_interval = HZ;
+ brmctx->multicast_query_response_interval = 10 * HZ;
+ brmctx->multicast_startup_query_interval = 125 * HZ / 4;
+ brmctx->multicast_query_interval = 125 * HZ;
+ brmctx->multicast_querier_interval = 255 * HZ;
+ brmctx->multicast_membership_interval = 260 * HZ;
- br->ip4_other_query.delay_time = 0;
- br->ip4_querier.port = NULL;
- br->multicast_igmp_version = 2;
+ brmctx->ip4_other_query.delay_time = 0;
+ brmctx->ip4_querier.port = NULL;
+ brmctx->multicast_igmp_version = 2;
#if IS_ENABLED(CONFIG_IPV6)
- br->multicast_mld_version = 1;
- br->ip6_other_query.delay_time = 0;
- br->ip6_querier.port = NULL;
+ brmctx->multicast_mld_version = 1;
+ brmctx->ip6_other_query.delay_time = 0;
+ brmctx->ip6_querier.port = NULL;
#endif
- br_opt_toggle(br, BROPT_MULTICAST_ENABLED, true);
- br_opt_toggle(br, BROPT_HAS_IPV6_ADDR, true);
- spin_lock_init(&br->multicast_lock);
- timer_setup(&br->ip4_mc_router_timer,
+ timer_setup(&brmctx->ip4_mc_router_timer,
br_ip4_multicast_local_router_expired, 0);
- timer_setup(&br->ip4_other_query.timer,
+ timer_setup(&brmctx->ip4_other_query.timer,
br_ip4_multicast_querier_expired, 0);
- timer_setup(&br->ip4_own_query.timer,
+ timer_setup(&brmctx->ip4_own_query.timer,
br_ip4_multicast_query_expired, 0);
#if IS_ENABLED(CONFIG_IPV6)
- timer_setup(&br->ip6_mc_router_timer,
+ timer_setup(&brmctx->ip6_mc_router_timer,
br_ip6_multicast_local_router_expired, 0);
- timer_setup(&br->ip6_other_query.timer,
+ timer_setup(&brmctx->ip6_other_query.timer,
br_ip6_multicast_querier_expired, 0);
- timer_setup(&br->ip6_own_query.timer,
+ timer_setup(&brmctx->ip6_own_query.timer,
br_ip6_multicast_query_expired, 0);
#endif
+}
+
+void br_multicast_ctx_deinit(struct net_bridge_mcast *brmctx)
+{
+ __br_multicast_stop(brmctx);
+}
+
+void br_multicast_init(struct net_bridge *br)
+{
+ br->hash_max = BR_MULTICAST_DEFAULT_HASH_MAX;
+
+ br_multicast_ctx_init(br, NULL, &br->multicast_ctx);
+
+ br_opt_toggle(br, BROPT_MULTICAST_ENABLED, true);
+ br_opt_toggle(br, BROPT_HAS_IPV6_ADDR, true);
+
+ spin_lock_init(&br->multicast_lock);
INIT_HLIST_HEAD(&br->mdb_list);
INIT_HLIST_HEAD(&br->mcast_gc_list);
INIT_WORK(&br->mcast_gc_work, br_multicast_gc_work);
@@ -3580,8 +3836,8 @@ void br_multicast_leave_snoopers(struct net_bridge *br)
br_ip6_multicast_leave_snoopers(br);
}
-static void __br_multicast_open(struct net_bridge *br,
- struct bridge_mcast_own_query *query)
+static void __br_multicast_open_query(struct net_bridge *br,
+ struct bridge_mcast_own_query *query)
{
query->startup_sent = 0;
@@ -3591,26 +3847,191 @@ static void __br_multicast_open(struct net_bridge *br,
mod_timer(&query->timer, jiffies);
}
-void br_multicast_open(struct net_bridge *br)
+static void __br_multicast_open(struct net_bridge_mcast *brmctx)
{
- __br_multicast_open(br, &br->ip4_own_query);
+ __br_multicast_open_query(brmctx->br, &brmctx->ip4_own_query);
#if IS_ENABLED(CONFIG_IPV6)
- __br_multicast_open(br, &br->ip6_own_query);
+ __br_multicast_open_query(brmctx->br, &brmctx->ip6_own_query);
#endif
}
-void br_multicast_stop(struct net_bridge *br)
+void br_multicast_open(struct net_bridge *br)
+{
+ ASSERT_RTNL();
+
+ if (br_opt_get(br, BROPT_MCAST_VLAN_SNOOPING_ENABLED)) {
+ struct net_bridge_vlan_group *vg;
+ struct net_bridge_vlan *vlan;
+
+ vg = br_vlan_group(br);
+ if (vg) {
+ list_for_each_entry(vlan, &vg->vlan_list, vlist) {
+ struct net_bridge_mcast *brmctx;
+
+ brmctx = &vlan->br_mcast_ctx;
+ if (br_vlan_is_brentry(vlan) &&
+ !br_multicast_ctx_vlan_disabled(brmctx))
+ __br_multicast_open(&vlan->br_mcast_ctx);
+ }
+ }
+ }
+
+ __br_multicast_open(&br->multicast_ctx);
+}
+
+static void __br_multicast_stop(struct net_bridge_mcast *brmctx)
{
- del_timer_sync(&br->ip4_mc_router_timer);
- del_timer_sync(&br->ip4_other_query.timer);
- del_timer_sync(&br->ip4_own_query.timer);
+ del_timer_sync(&brmctx->ip4_mc_router_timer);
+ del_timer_sync(&brmctx->ip4_other_query.timer);
+ del_timer_sync(&brmctx->ip4_own_query.timer);
#if IS_ENABLED(CONFIG_IPV6)
- del_timer_sync(&br->ip6_mc_router_timer);
- del_timer_sync(&br->ip6_other_query.timer);
- del_timer_sync(&br->ip6_own_query.timer);
+ del_timer_sync(&brmctx->ip6_mc_router_timer);
+ del_timer_sync(&brmctx->ip6_other_query.timer);
+ del_timer_sync(&brmctx->ip6_own_query.timer);
#endif
}
+void br_multicast_toggle_one_vlan(struct net_bridge_vlan *vlan, bool on)
+{
+ struct net_bridge *br;
+
+ /* it's okay to check for the flag without the multicast lock because it
+ * can only change under RTNL -> multicast_lock, we need the latter to
+ * sync with timers and packets
+ */
+ if (on == !!(vlan->priv_flags & BR_VLFLAG_MCAST_ENABLED))
+ return;
+
+ if (br_vlan_is_master(vlan)) {
+ br = vlan->br;
+
+ if (!br_vlan_is_brentry(vlan) ||
+ (on &&
+ br_multicast_ctx_vlan_global_disabled(&vlan->br_mcast_ctx)))
+ return;
+
+ spin_lock_bh(&br->multicast_lock);
+ vlan->priv_flags ^= BR_VLFLAG_MCAST_ENABLED;
+ spin_unlock_bh(&br->multicast_lock);
+
+ if (on)
+ __br_multicast_open(&vlan->br_mcast_ctx);
+ else
+ __br_multicast_stop(&vlan->br_mcast_ctx);
+ } else {
+ struct net_bridge_mcast *brmctx;
+
+ brmctx = br_multicast_port_ctx_get_global(&vlan->port_mcast_ctx);
+ if (on && br_multicast_ctx_vlan_global_disabled(brmctx))
+ return;
+
+ br = vlan->port->br;
+ spin_lock_bh(&br->multicast_lock);
+ vlan->priv_flags ^= BR_VLFLAG_MCAST_ENABLED;
+ if (on)
+ __br_multicast_enable_port_ctx(&vlan->port_mcast_ctx);
+ else
+ __br_multicast_disable_port_ctx(&vlan->port_mcast_ctx);
+ spin_unlock_bh(&br->multicast_lock);
+ }
+}
+
+void br_multicast_toggle_vlan(struct net_bridge_vlan *vlan, bool on)
+{
+ struct net_bridge_port *p;
+
+ if (WARN_ON_ONCE(!br_vlan_is_master(vlan)))
+ return;
+
+ list_for_each_entry(p, &vlan->br->port_list, list) {
+ struct net_bridge_vlan *vport;
+
+ vport = br_vlan_find(nbp_vlan_group(p), vlan->vid);
+ if (!vport)
+ continue;
+ br_multicast_toggle_one_vlan(vport, on);
+ }
+}
+
+int br_multicast_toggle_vlan_snooping(struct net_bridge *br, bool on,
+ struct netlink_ext_ack *extack)
+{
+ struct net_bridge_vlan_group *vg;
+ struct net_bridge_vlan *vlan;
+ struct net_bridge_port *p;
+
+ if (br_opt_get(br, BROPT_MCAST_VLAN_SNOOPING_ENABLED) == on)
+ return 0;
+
+ if (on && !br_opt_get(br, BROPT_VLAN_ENABLED)) {
+ NL_SET_ERR_MSG_MOD(extack, "Cannot enable multicast vlan snooping with vlan filtering disabled");
+ return -EINVAL;
+ }
+
+ vg = br_vlan_group(br);
+ if (!vg)
+ return 0;
+
+ br_opt_toggle(br, BROPT_MCAST_VLAN_SNOOPING_ENABLED, on);
+
+ /* disable/enable non-vlan mcast contexts based on vlan snooping */
+ if (on)
+ __br_multicast_stop(&br->multicast_ctx);
+ else
+ __br_multicast_open(&br->multicast_ctx);
+ list_for_each_entry(p, &br->port_list, list) {
+ if (on)
+ br_multicast_disable_port(p);
+ else
+ br_multicast_enable_port(p);
+ }
+
+ list_for_each_entry(vlan, &vg->vlan_list, vlist)
+ br_multicast_toggle_vlan(vlan, on);
+
+ return 0;
+}
+
+bool br_multicast_toggle_global_vlan(struct net_bridge_vlan *vlan, bool on)
+{
+ ASSERT_RTNL();
+
+ /* BR_VLFLAG_GLOBAL_MCAST_ENABLED relies on eventual consistency and
+ * requires only RTNL to change
+ */
+ if (on == !!(vlan->priv_flags & BR_VLFLAG_GLOBAL_MCAST_ENABLED))
+ return false;
+
+ vlan->priv_flags ^= BR_VLFLAG_GLOBAL_MCAST_ENABLED;
+ br_multicast_toggle_vlan(vlan, on);
+
+ return true;
+}
+
+void br_multicast_stop(struct net_bridge *br)
+{
+ ASSERT_RTNL();
+
+ if (br_opt_get(br, BROPT_MCAST_VLAN_SNOOPING_ENABLED)) {
+ struct net_bridge_vlan_group *vg;
+ struct net_bridge_vlan *vlan;
+
+ vg = br_vlan_group(br);
+ if (vg) {
+ list_for_each_entry(vlan, &vg->vlan_list, vlist) {
+ struct net_bridge_mcast *brmctx;
+
+ brmctx = &vlan->br_mcast_ctx;
+ if (br_vlan_is_brentry(vlan) &&
+ !br_multicast_ctx_vlan_disabled(brmctx))
+ __br_multicast_stop(&vlan->br_mcast_ctx);
+ }
+ }
+ }
+
+ __br_multicast_stop(&br->multicast_ctx);
+}
+
void br_multicast_dev_del(struct net_bridge *br)
{
struct net_bridge_mdb_entry *mp;
@@ -3623,6 +4044,7 @@ void br_multicast_dev_del(struct net_bridge *br)
hlist_move_list(&br->mcast_gc_list, &deleted_head);
spin_unlock_bh(&br->multicast_lock);
+ br_multicast_ctx_deinit(&br->multicast_ctx);
br_multicast_gc(&deleted_head);
cancel_work_sync(&br->mcast_gc_work);
@@ -3631,6 +4053,7 @@ void br_multicast_dev_del(struct net_bridge *br)
int br_multicast_set_router(struct net_bridge *br, unsigned long val)
{
+ struct net_bridge_mcast *brmctx = &br->multicast_ctx;
int err = -EINVAL;
spin_lock_bh(&br->multicast_lock);
@@ -3639,17 +4062,17 @@ int br_multicast_set_router(struct net_bridge *br, unsigned long val)
case MDB_RTR_TYPE_DISABLED:
case MDB_RTR_TYPE_PERM:
br_mc_router_state_change(br, val == MDB_RTR_TYPE_PERM);
- del_timer(&br->ip4_mc_router_timer);
+ del_timer(&brmctx->ip4_mc_router_timer);
#if IS_ENABLED(CONFIG_IPV6)
- del_timer(&br->ip6_mc_router_timer);
+ del_timer(&brmctx->ip6_mc_router_timer);
#endif
- br->multicast_router = val;
+ brmctx->multicast_router = val;
err = 0;
break;
case MDB_RTR_TYPE_TEMP_QUERY:
- if (br->multicast_router != MDB_RTR_TYPE_TEMP_QUERY)
+ if (brmctx->multicast_router != MDB_RTR_TYPE_TEMP_QUERY)
br_mc_router_state_change(br, false);
- br->multicast_router = val;
+ brmctx->multicast_router = val;
err = 0;
break;
}
@@ -3660,7 +4083,7 @@ int br_multicast_set_router(struct net_bridge *br, unsigned long val)
}
static void
-br_multicast_rport_del_notify(struct net_bridge_port *p, bool deleted)
+br_multicast_rport_del_notify(struct net_bridge_mcast_port *pmctx, bool deleted)
{
if (!deleted)
return;
@@ -3668,37 +4091,38 @@ br_multicast_rport_del_notify(struct net_bridge_port *p, bool deleted)
/* For backwards compatibility for now, only notify if there is
* no multicast router anymore for both IPv4 and IPv6.
*/
- if (!hlist_unhashed(&p->ip4_rlist))
+ if (!hlist_unhashed(&pmctx->ip4_rlist))
return;
#if IS_ENABLED(CONFIG_IPV6)
- if (!hlist_unhashed(&p->ip6_rlist))
+ if (!hlist_unhashed(&pmctx->ip6_rlist))
return;
#endif
- br_rtr_notify(p->br->dev, p, RTM_DELMDB);
- br_port_mc_router_state_change(p, false);
+ br_rtr_notify(pmctx->port->br->dev, pmctx, RTM_DELMDB);
+ br_port_mc_router_state_change(pmctx->port, false);
/* don't allow timer refresh */
- if (p->multicast_router == MDB_RTR_TYPE_TEMP)
- p->multicast_router = MDB_RTR_TYPE_TEMP_QUERY;
+ if (pmctx->multicast_router == MDB_RTR_TYPE_TEMP)
+ pmctx->multicast_router = MDB_RTR_TYPE_TEMP_QUERY;
}
int br_multicast_set_port_router(struct net_bridge_port *p, unsigned long val)
{
- struct net_bridge *br = p->br;
+ struct net_bridge_mcast *brmctx = &p->br->multicast_ctx;
+ struct net_bridge_mcast_port *pmctx = &p->multicast_ctx;
unsigned long now = jiffies;
int err = -EINVAL;
bool del = false;
- spin_lock(&br->multicast_lock);
- if (p->multicast_router == val) {
+ spin_lock(&p->br->multicast_lock);
+ if (pmctx->multicast_router == val) {
/* Refresh the temp router port timer */
- if (p->multicast_router == MDB_RTR_TYPE_TEMP) {
- mod_timer(&p->ip4_mc_router_timer,
- now + br->multicast_querier_interval);
+ if (pmctx->multicast_router == MDB_RTR_TYPE_TEMP) {
+ mod_timer(&pmctx->ip4_mc_router_timer,
+ now + brmctx->multicast_querier_interval);
#if IS_ENABLED(CONFIG_IPV6)
- mod_timer(&p->ip6_mc_router_timer,
- now + br->multicast_querier_interval);
+ mod_timer(&pmctx->ip6_mc_router_timer,
+ now + brmctx->multicast_querier_interval);
#endif
}
err = 0;
@@ -3706,63 +4130,86 @@ int br_multicast_set_port_router(struct net_bridge_port *p, unsigned long val)
}
switch (val) {
case MDB_RTR_TYPE_DISABLED:
- p->multicast_router = MDB_RTR_TYPE_DISABLED;
- del |= br_ip4_multicast_rport_del(p);
- del_timer(&p->ip4_mc_router_timer);
- del |= br_ip6_multicast_rport_del(p);
+ pmctx->multicast_router = MDB_RTR_TYPE_DISABLED;
+ del |= br_ip4_multicast_rport_del(pmctx);
+ del_timer(&pmctx->ip4_mc_router_timer);
+ del |= br_ip6_multicast_rport_del(pmctx);
#if IS_ENABLED(CONFIG_IPV6)
- del_timer(&p->ip6_mc_router_timer);
+ del_timer(&pmctx->ip6_mc_router_timer);
#endif
- br_multicast_rport_del_notify(p, del);
+ br_multicast_rport_del_notify(pmctx, del);
break;
case MDB_RTR_TYPE_TEMP_QUERY:
- p->multicast_router = MDB_RTR_TYPE_TEMP_QUERY;
- del |= br_ip4_multicast_rport_del(p);
- del |= br_ip6_multicast_rport_del(p);
- br_multicast_rport_del_notify(p, del);
+ pmctx->multicast_router = MDB_RTR_TYPE_TEMP_QUERY;
+ del |= br_ip4_multicast_rport_del(pmctx);
+ del |= br_ip6_multicast_rport_del(pmctx);
+ br_multicast_rport_del_notify(pmctx, del);
break;
case MDB_RTR_TYPE_PERM:
- p->multicast_router = MDB_RTR_TYPE_PERM;
- del_timer(&p->ip4_mc_router_timer);
- br_ip4_multicast_add_router(br, p);
+ pmctx->multicast_router = MDB_RTR_TYPE_PERM;
+ del_timer(&pmctx->ip4_mc_router_timer);
+ br_ip4_multicast_add_router(brmctx, pmctx);
#if IS_ENABLED(CONFIG_IPV6)
- del_timer(&p->ip6_mc_router_timer);
+ del_timer(&pmctx->ip6_mc_router_timer);
#endif
- br_ip6_multicast_add_router(br, p);
+ br_ip6_multicast_add_router(brmctx, pmctx);
break;
case MDB_RTR_TYPE_TEMP:
- p->multicast_router = MDB_RTR_TYPE_TEMP;
- br_ip4_multicast_mark_router(br, p);
- br_ip6_multicast_mark_router(br, p);
+ pmctx->multicast_router = MDB_RTR_TYPE_TEMP;
+ br_ip4_multicast_mark_router(brmctx, pmctx);
+ br_ip6_multicast_mark_router(brmctx, pmctx);
break;
default:
goto unlock;
}
err = 0;
unlock:
- spin_unlock(&br->multicast_lock);
+ spin_unlock(&p->br->multicast_lock);
return err;
}
-static void br_multicast_start_querier(struct net_bridge *br,
+static void br_multicast_start_querier(struct net_bridge_mcast *brmctx,
struct bridge_mcast_own_query *query)
{
struct net_bridge_port *port;
- __br_multicast_open(br, query);
+ __br_multicast_open_query(brmctx->br, query);
rcu_read_lock();
- list_for_each_entry_rcu(port, &br->port_list, list) {
- if (port->state == BR_STATE_DISABLED ||
- port->state == BR_STATE_BLOCKING)
+ list_for_each_entry_rcu(port, &brmctx->br->port_list, list) {
+ struct bridge_mcast_own_query *ip4_own_query;
+#if IS_ENABLED(CONFIG_IPV6)
+ struct bridge_mcast_own_query *ip6_own_query;
+#endif
+
+ if (br_multicast_port_ctx_state_stopped(&port->multicast_ctx))
continue;
- if (query == &br->ip4_own_query)
- br_multicast_enable(&port->ip4_own_query);
+ if (br_multicast_ctx_is_vlan(brmctx)) {
+ struct net_bridge_vlan *vlan;
+
+ vlan = br_vlan_find(nbp_vlan_group(port), brmctx->vlan->vid);
+ if (!vlan ||
+ br_multicast_port_ctx_state_stopped(&vlan->port_mcast_ctx))
+ continue;
+
+ ip4_own_query = &vlan->port_mcast_ctx.ip4_own_query;
+#if IS_ENABLED(CONFIG_IPV6)
+ ip6_own_query = &vlan->port_mcast_ctx.ip6_own_query;
+#endif
+ } else {
+ ip4_own_query = &port->multicast_ctx.ip4_own_query;
+#if IS_ENABLED(CONFIG_IPV6)
+ ip6_own_query = &port->multicast_ctx.ip6_own_query;
+#endif
+ }
+
+ if (query == &brmctx->ip4_own_query)
+ br_multicast_enable(ip4_own_query);
#if IS_ENABLED(CONFIG_IPV6)
else
- br_multicast_enable(&port->ip6_own_query);
+ br_multicast_enable(ip6_own_query);
#endif
}
rcu_read_unlock();
@@ -3796,7 +4243,7 @@ int br_multicast_toggle(struct net_bridge *br, unsigned long val,
br_multicast_open(br);
list_for_each_entry(port, &br->port_list, list)
- __br_multicast_enable_port(port);
+ __br_multicast_enable_port_ctx(&port->multicast_ctx);
change_snoopers = true;
@@ -3839,7 +4286,7 @@ bool br_multicast_router(const struct net_device *dev)
bool is_router;
spin_lock_bh(&br->multicast_lock);
- is_router = br_multicast_is_router(br, NULL);
+ is_router = br_multicast_is_router(&br->multicast_ctx, NULL);
spin_unlock_bh(&br->multicast_lock);
return is_router;
}
@@ -3847,6 +4294,7 @@ EXPORT_SYMBOL_GPL(br_multicast_router);
int br_multicast_set_querier(struct net_bridge *br, unsigned long val)
{
+ struct net_bridge_mcast *brmctx = &br->multicast_ctx;
unsigned long max_delay;
val = !!val;
@@ -3859,18 +4307,18 @@ int br_multicast_set_querier(struct net_bridge *br, unsigned long val)
if (!val)
goto unlock;
- max_delay = br->multicast_query_response_interval;
+ max_delay = brmctx->multicast_query_response_interval;
- if (!timer_pending(&br->ip4_other_query.timer))
- br->ip4_other_query.delay_time = jiffies + max_delay;
+ if (!timer_pending(&brmctx->ip4_other_query.timer))
+ brmctx->ip4_other_query.delay_time = jiffies + max_delay;
- br_multicast_start_querier(br, &br->ip4_own_query);
+ br_multicast_start_querier(brmctx, &brmctx->ip4_own_query);
#if IS_ENABLED(CONFIG_IPV6)
- if (!timer_pending(&br->ip6_other_query.timer))
- br->ip6_other_query.delay_time = jiffies + max_delay;
+ if (!timer_pending(&brmctx->ip6_other_query.timer))
+ brmctx->ip6_other_query.delay_time = jiffies + max_delay;
- br_multicast_start_querier(br, &br->ip6_own_query);
+ br_multicast_start_querier(brmctx, &brmctx->ip6_own_query);
#endif
unlock:
@@ -3891,7 +4339,7 @@ int br_multicast_set_igmp_version(struct net_bridge *br, unsigned long val)
}
spin_lock_bh(&br->multicast_lock);
- br->multicast_igmp_version = val;
+ br->multicast_ctx.multicast_igmp_version = val;
spin_unlock_bh(&br->multicast_lock);
return 0;
@@ -3910,7 +4358,7 @@ int br_multicast_set_mld_version(struct net_bridge *br, unsigned long val)
}
spin_lock_bh(&br->multicast_lock);
- br->multicast_mld_version = val;
+ br->multicast_ctx.multicast_mld_version = val;
spin_unlock_bh(&br->multicast_lock);
return 0;
@@ -4003,7 +4451,7 @@ bool br_multicast_has_querier_anywhere(struct net_device *dev, int proto)
memset(&eth, 0, sizeof(eth));
eth.h_proto = htons(proto);
- ret = br_multicast_querier_exists(br, &eth, NULL);
+ ret = br_multicast_querier_exists(&br->multicast_ctx, &eth, NULL);
unlock:
rcu_read_unlock();
@@ -4022,6 +4470,7 @@ EXPORT_SYMBOL_GPL(br_multicast_has_querier_anywhere);
*/
bool br_multicast_has_querier_adjacent(struct net_device *dev, int proto)
{
+ struct net_bridge_mcast *brmctx;
struct net_bridge *br;
struct net_bridge_port *port;
bool ret = false;
@@ -4035,17 +4484,18 @@ bool br_multicast_has_querier_adjacent(struct net_device *dev, int proto)
goto unlock;
br = port->br;
+ brmctx = &br->multicast_ctx;
switch (proto) {
case ETH_P_IP:
- if (!timer_pending(&br->ip4_other_query.timer) ||
- rcu_dereference(br->ip4_querier.port) == port)
+ if (!timer_pending(&brmctx->ip4_other_query.timer) ||
+ rcu_dereference(brmctx->ip4_querier.port) == port)
goto unlock;
break;
#if IS_ENABLED(CONFIG_IPV6)
case ETH_P_IPV6:
- if (!timer_pending(&br->ip6_other_query.timer) ||
- rcu_dereference(br->ip6_querier.port) == port)
+ if (!timer_pending(&brmctx->ip6_other_query.timer) ||
+ rcu_dereference(brmctx->ip6_querier.port) == port)
goto unlock;
break;
#endif
@@ -4071,7 +4521,9 @@ EXPORT_SYMBOL_GPL(br_multicast_has_querier_adjacent);
*/
bool br_multicast_has_router_adjacent(struct net_device *dev, int proto)
{
- struct net_bridge_port *port, *p;
+ struct net_bridge_mcast_port *pmctx;
+ struct net_bridge_mcast *brmctx;
+ struct net_bridge_port *port;
bool ret = false;
rcu_read_lock();
@@ -4079,11 +4531,12 @@ bool br_multicast_has_router_adjacent(struct net_device *dev, int proto)
if (!port)
goto unlock;
+ brmctx = &port->br->multicast_ctx;
switch (proto) {
case ETH_P_IP:
- hlist_for_each_entry_rcu(p, &port->br->ip4_mc_router_list,
+ hlist_for_each_entry_rcu(pmctx, &brmctx->ip4_mc_router_list,
ip4_rlist) {
- if (p == port)
+ if (pmctx->port == port)
continue;
ret = true;
@@ -4092,9 +4545,9 @@ bool br_multicast_has_router_adjacent(struct net_device *dev, int proto)
break;
#if IS_ENABLED(CONFIG_IPV6)
case ETH_P_IPV6:
- hlist_for_each_entry_rcu(p, &port->br->ip6_mc_router_list,
+ hlist_for_each_entry_rcu(pmctx, &brmctx->ip6_mc_router_list,
ip6_rlist) {
- if (p == port)
+ if (pmctx->port == port)
continue;
ret = true;
@@ -4186,7 +4639,8 @@ static void br_mcast_stats_add(struct bridge_mcast_stats __percpu *stats,
u64_stats_update_end(&pstats->syncp);
}
-void br_multicast_count(struct net_bridge *br, const struct net_bridge_port *p,
+void br_multicast_count(struct net_bridge *br,
+ const struct net_bridge_port *p,
const struct sk_buff *skb, u8 type, u8 dir)
{
struct bridge_mcast_stats __percpu *stats;
diff --git a/net/bridge/br_multicast_eht.c b/net/bridge/br_multicast_eht.c
index 13290a749d09..f91c071d1608 100644
--- a/net/bridge/br_multicast_eht.c
+++ b/net/bridge/br_multicast_eht.c
@@ -33,7 +33,8 @@
static bool br_multicast_del_eht_set_entry(struct net_bridge_port_group *pg,
union net_bridge_eht_addr *src_addr,
union net_bridge_eht_addr *h_addr);
-static void br_multicast_create_eht_set_entry(struct net_bridge_port_group *pg,
+static void br_multicast_create_eht_set_entry(const struct net_bridge_mcast *brmctx,
+ struct net_bridge_port_group *pg,
union net_bridge_eht_addr *src_addr,
union net_bridge_eht_addr *h_addr,
int filter_mode,
@@ -388,7 +389,8 @@ static void br_multicast_ip_src_to_eht_addr(const struct br_ip *src,
}
}
-static void br_eht_convert_host_filter_mode(struct net_bridge_port_group *pg,
+static void br_eht_convert_host_filter_mode(const struct net_bridge_mcast *brmctx,
+ struct net_bridge_port_group *pg,
union net_bridge_eht_addr *h_addr,
int filter_mode)
{
@@ -405,14 +407,15 @@ static void br_eht_convert_host_filter_mode(struct net_bridge_port_group *pg,
br_multicast_del_eht_set_entry(pg, &zero_addr, h_addr);
break;
case MCAST_EXCLUDE:
- br_multicast_create_eht_set_entry(pg, &zero_addr, h_addr,
- MCAST_EXCLUDE,
+ br_multicast_create_eht_set_entry(brmctx, pg, &zero_addr,
+ h_addr, MCAST_EXCLUDE,
true);
break;
}
}
-static void br_multicast_create_eht_set_entry(struct net_bridge_port_group *pg,
+static void br_multicast_create_eht_set_entry(const struct net_bridge_mcast *brmctx,
+ struct net_bridge_port_group *pg,
union net_bridge_eht_addr *src_addr,
union net_bridge_eht_addr *h_addr,
int filter_mode,
@@ -441,8 +444,8 @@ static void br_multicast_create_eht_set_entry(struct net_bridge_port_group *pg,
if (!set_h)
goto fail_set_entry;
- mod_timer(&set_h->timer, jiffies + br_multicast_gmi(br));
- mod_timer(&eht_set->timer, jiffies + br_multicast_gmi(br));
+ mod_timer(&set_h->timer, jiffies + br_multicast_gmi(brmctx));
+ mod_timer(&eht_set->timer, jiffies + br_multicast_gmi(brmctx));
return;
@@ -499,7 +502,8 @@ static void br_multicast_del_eht_host(struct net_bridge_port_group *pg,
}
/* create new set entries from reports */
-static void __eht_create_set_entries(struct net_bridge_port_group *pg,
+static void __eht_create_set_entries(const struct net_bridge_mcast *brmctx,
+ struct net_bridge_port_group *pg,
union net_bridge_eht_addr *h_addr,
void *srcs,
u32 nsrcs,
@@ -512,8 +516,8 @@ static void __eht_create_set_entries(struct net_bridge_port_group *pg,
memset(&eht_src_addr, 0, sizeof(eht_src_addr));
for (src_idx = 0; src_idx < nsrcs; src_idx++) {
memcpy(&eht_src_addr, srcs + (src_idx * addr_size), addr_size);
- br_multicast_create_eht_set_entry(pg, &eht_src_addr, h_addr,
- filter_mode,
+ br_multicast_create_eht_set_entry(brmctx, pg, &eht_src_addr,
+ h_addr, filter_mode,
false);
}
}
@@ -549,7 +553,8 @@ static bool __eht_del_set_entries(struct net_bridge_port_group *pg,
return changed;
}
-static bool br_multicast_eht_allow(struct net_bridge_port_group *pg,
+static bool br_multicast_eht_allow(const struct net_bridge_mcast *brmctx,
+ struct net_bridge_port_group *pg,
union net_bridge_eht_addr *h_addr,
void *srcs,
u32 nsrcs,
@@ -559,8 +564,8 @@ static bool br_multicast_eht_allow(struct net_bridge_port_group *pg,
switch (br_multicast_eht_host_filter_mode(pg, h_addr)) {
case MCAST_INCLUDE:
- __eht_create_set_entries(pg, h_addr, srcs, nsrcs, addr_size,
- MCAST_INCLUDE);
+ __eht_create_set_entries(brmctx, pg, h_addr, srcs, nsrcs,
+ addr_size, MCAST_INCLUDE);
break;
case MCAST_EXCLUDE:
changed = __eht_del_set_entries(pg, h_addr, srcs, nsrcs,
@@ -571,7 +576,8 @@ static bool br_multicast_eht_allow(struct net_bridge_port_group *pg,
return changed;
}
-static bool br_multicast_eht_block(struct net_bridge_port_group *pg,
+static bool br_multicast_eht_block(const struct net_bridge_mcast *brmctx,
+ struct net_bridge_port_group *pg,
union net_bridge_eht_addr *h_addr,
void *srcs,
u32 nsrcs,
@@ -585,7 +591,7 @@ static bool br_multicast_eht_block(struct net_bridge_port_group *pg,
addr_size);
break;
case MCAST_EXCLUDE:
- __eht_create_set_entries(pg, h_addr, srcs, nsrcs, addr_size,
+ __eht_create_set_entries(brmctx, pg, h_addr, srcs, nsrcs, addr_size,
MCAST_EXCLUDE);
break;
}
@@ -594,7 +600,8 @@ static bool br_multicast_eht_block(struct net_bridge_port_group *pg,
}
/* flush_entries is true when changing mode */
-static bool __eht_inc_exc(struct net_bridge_port_group *pg,
+static bool __eht_inc_exc(const struct net_bridge_mcast *brmctx,
+ struct net_bridge_port_group *pg,
union net_bridge_eht_addr *h_addr,
void *srcs,
u32 nsrcs,
@@ -612,11 +619,10 @@ static bool __eht_inc_exc(struct net_bridge_port_group *pg,
/* if we're changing mode del host and its entries */
if (flush_entries)
br_multicast_del_eht_host(pg, h_addr);
- __eht_create_set_entries(pg, h_addr, srcs, nsrcs, addr_size,
+ __eht_create_set_entries(brmctx, pg, h_addr, srcs, nsrcs, addr_size,
filter_mode);
/* we can be missing sets only if we've deleted some entries */
if (flush_entries) {
- struct net_bridge *br = pg->key.port->br;
struct net_bridge_group_eht_set *eht_set;
struct net_bridge_group_src *src_ent;
struct hlist_node *tmp;
@@ -647,14 +653,15 @@ static bool __eht_inc_exc(struct net_bridge_port_group *pg,
&eht_src_addr);
if (!eht_set)
continue;
- mod_timer(&eht_set->timer, jiffies + br_multicast_lmqt(br));
+ mod_timer(&eht_set->timer, jiffies + br_multicast_lmqt(brmctx));
}
}
return changed;
}
-static bool br_multicast_eht_inc(struct net_bridge_port_group *pg,
+static bool br_multicast_eht_inc(const struct net_bridge_mcast *brmctx,
+ struct net_bridge_port_group *pg,
union net_bridge_eht_addr *h_addr,
void *srcs,
u32 nsrcs,
@@ -663,14 +670,15 @@ static bool br_multicast_eht_inc(struct net_bridge_port_group *pg,
{
bool changed;
- changed = __eht_inc_exc(pg, h_addr, srcs, nsrcs, addr_size,
+ changed = __eht_inc_exc(brmctx, pg, h_addr, srcs, nsrcs, addr_size,
MCAST_INCLUDE, to_report);
- br_eht_convert_host_filter_mode(pg, h_addr, MCAST_INCLUDE);
+ br_eht_convert_host_filter_mode(brmctx, pg, h_addr, MCAST_INCLUDE);
return changed;
}
-static bool br_multicast_eht_exc(struct net_bridge_port_group *pg,
+static bool br_multicast_eht_exc(const struct net_bridge_mcast *brmctx,
+ struct net_bridge_port_group *pg,
union net_bridge_eht_addr *h_addr,
void *srcs,
u32 nsrcs,
@@ -679,14 +687,15 @@ static bool br_multicast_eht_exc(struct net_bridge_port_group *pg,
{
bool changed;
- changed = __eht_inc_exc(pg, h_addr, srcs, nsrcs, addr_size,
+ changed = __eht_inc_exc(brmctx, pg, h_addr, srcs, nsrcs, addr_size,
MCAST_EXCLUDE, to_report);
- br_eht_convert_host_filter_mode(pg, h_addr, MCAST_EXCLUDE);
+ br_eht_convert_host_filter_mode(brmctx, pg, h_addr, MCAST_EXCLUDE);
return changed;
}
-static bool __eht_ip4_handle(struct net_bridge_port_group *pg,
+static bool __eht_ip4_handle(const struct net_bridge_mcast *brmctx,
+ struct net_bridge_port_group *pg,
union net_bridge_eht_addr *h_addr,
void *srcs,
u32 nsrcs,
@@ -696,24 +705,25 @@ static bool __eht_ip4_handle(struct net_bridge_port_group *pg,
switch (grec_type) {
case IGMPV3_ALLOW_NEW_SOURCES:
- br_multicast_eht_allow(pg, h_addr, srcs, nsrcs, sizeof(__be32));
+ br_multicast_eht_allow(brmctx, pg, h_addr, srcs, nsrcs,
+ sizeof(__be32));
break;
case IGMPV3_BLOCK_OLD_SOURCES:
- changed = br_multicast_eht_block(pg, h_addr, srcs, nsrcs,
+ changed = br_multicast_eht_block(brmctx, pg, h_addr, srcs, nsrcs,
sizeof(__be32));
break;
case IGMPV3_CHANGE_TO_INCLUDE:
to_report = true;
fallthrough;
case IGMPV3_MODE_IS_INCLUDE:
- changed = br_multicast_eht_inc(pg, h_addr, srcs, nsrcs,
+ changed = br_multicast_eht_inc(brmctx, pg, h_addr, srcs, nsrcs,
sizeof(__be32), to_report);
break;
case IGMPV3_CHANGE_TO_EXCLUDE:
to_report = true;
fallthrough;
case IGMPV3_MODE_IS_EXCLUDE:
- changed = br_multicast_eht_exc(pg, h_addr, srcs, nsrcs,
+ changed = br_multicast_eht_exc(brmctx, pg, h_addr, srcs, nsrcs,
sizeof(__be32), to_report);
break;
}
@@ -722,7 +732,8 @@ static bool __eht_ip4_handle(struct net_bridge_port_group *pg,
}
#if IS_ENABLED(CONFIG_IPV6)
-static bool __eht_ip6_handle(struct net_bridge_port_group *pg,
+static bool __eht_ip6_handle(const struct net_bridge_mcast *brmctx,
+ struct net_bridge_port_group *pg,
union net_bridge_eht_addr *h_addr,
void *srcs,
u32 nsrcs,
@@ -732,18 +743,18 @@ static bool __eht_ip6_handle(struct net_bridge_port_group *pg,
switch (grec_type) {
case MLD2_ALLOW_NEW_SOURCES:
- br_multicast_eht_allow(pg, h_addr, srcs, nsrcs,
+ br_multicast_eht_allow(brmctx, pg, h_addr, srcs, nsrcs,
sizeof(struct in6_addr));
break;
case MLD2_BLOCK_OLD_SOURCES:
- changed = br_multicast_eht_block(pg, h_addr, srcs, nsrcs,
+ changed = br_multicast_eht_block(brmctx, pg, h_addr, srcs, nsrcs,
sizeof(struct in6_addr));
break;
case MLD2_CHANGE_TO_INCLUDE:
to_report = true;
fallthrough;
case MLD2_MODE_IS_INCLUDE:
- changed = br_multicast_eht_inc(pg, h_addr, srcs, nsrcs,
+ changed = br_multicast_eht_inc(brmctx, pg, h_addr, srcs, nsrcs,
sizeof(struct in6_addr),
to_report);
break;
@@ -751,7 +762,7 @@ static bool __eht_ip6_handle(struct net_bridge_port_group *pg,
to_report = true;
fallthrough;
case MLD2_MODE_IS_EXCLUDE:
- changed = br_multicast_eht_exc(pg, h_addr, srcs, nsrcs,
+ changed = br_multicast_eht_exc(brmctx, pg, h_addr, srcs, nsrcs,
sizeof(struct in6_addr),
to_report);
break;
@@ -762,7 +773,8 @@ static bool __eht_ip6_handle(struct net_bridge_port_group *pg,
#endif
/* true means an entry was deleted */
-bool br_multicast_eht_handle(struct net_bridge_port_group *pg,
+bool br_multicast_eht_handle(const struct net_bridge_mcast *brmctx,
+ struct net_bridge_port_group *pg,
void *h_addr,
void *srcs,
u32 nsrcs,
@@ -779,12 +791,12 @@ bool br_multicast_eht_handle(struct net_bridge_port_group *pg,
memset(&eht_host_addr, 0, sizeof(eht_host_addr));
memcpy(&eht_host_addr, h_addr, addr_size);
if (addr_size == sizeof(__be32))
- changed = __eht_ip4_handle(pg, &eht_host_addr, srcs, nsrcs,
- grec_type);
+ changed = __eht_ip4_handle(brmctx, pg, &eht_host_addr, srcs,
+ nsrcs, grec_type);
#if IS_ENABLED(CONFIG_IPV6)
else
- changed = __eht_ip6_handle(pg, &eht_host_addr, srcs, nsrcs,
- grec_type);
+ changed = __eht_ip6_handle(brmctx, pg, &eht_host_addr, srcs,
+ nsrcs, grec_type);
#endif
out:
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 8642e56059fb..616a1b6dec3c 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -287,7 +287,7 @@ static int br_port_fill_attrs(struct sk_buff *skb,
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
if (nla_put_u8(skb, IFLA_BRPORT_MULTICAST_ROUTER,
- p->multicast_router) ||
+ p->multicast_ctx.multicast_router) ||
nla_put_u32(skb, IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT,
p->multicast_eht_hosts_limit) ||
nla_put_u32(skb, IFLA_BRPORT_MCAST_EHT_HOSTS_CNT,
@@ -1324,49 +1324,49 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
if (data[IFLA_BR_MCAST_LAST_MEMBER_CNT]) {
u32 val = nla_get_u32(data[IFLA_BR_MCAST_LAST_MEMBER_CNT]);
- br->multicast_last_member_count = val;
+ br->multicast_ctx.multicast_last_member_count = val;
}
if (data[IFLA_BR_MCAST_STARTUP_QUERY_CNT]) {
u32 val = nla_get_u32(data[IFLA_BR_MCAST_STARTUP_QUERY_CNT]);
- br->multicast_startup_query_count = val;
+ br->multicast_ctx.multicast_startup_query_count = val;
}
if (data[IFLA_BR_MCAST_LAST_MEMBER_INTVL]) {
u64 val = nla_get_u64(data[IFLA_BR_MCAST_LAST_MEMBER_INTVL]);
- br->multicast_last_member_interval = clock_t_to_jiffies(val);
+ br->multicast_ctx.multicast_last_member_interval = clock_t_to_jiffies(val);
}
if (data[IFLA_BR_MCAST_MEMBERSHIP_INTVL]) {
u64 val = nla_get_u64(data[IFLA_BR_MCAST_MEMBERSHIP_INTVL]);
- br->multicast_membership_interval = clock_t_to_jiffies(val);
+ br->multicast_ctx.multicast_membership_interval = clock_t_to_jiffies(val);
}
if (data[IFLA_BR_MCAST_QUERIER_INTVL]) {
u64 val = nla_get_u64(data[IFLA_BR_MCAST_QUERIER_INTVL]);
- br->multicast_querier_interval = clock_t_to_jiffies(val);
+ br->multicast_ctx.multicast_querier_interval = clock_t_to_jiffies(val);
}
if (data[IFLA_BR_MCAST_QUERY_INTVL]) {
u64 val = nla_get_u64(data[IFLA_BR_MCAST_QUERY_INTVL]);
- br->multicast_query_interval = clock_t_to_jiffies(val);
+ br->multicast_ctx.multicast_query_interval = clock_t_to_jiffies(val);
}
if (data[IFLA_BR_MCAST_QUERY_RESPONSE_INTVL]) {
u64 val = nla_get_u64(data[IFLA_BR_MCAST_QUERY_RESPONSE_INTVL]);
- br->multicast_query_response_interval = clock_t_to_jiffies(val);
+ br->multicast_ctx.multicast_query_response_interval = clock_t_to_jiffies(val);
}
if (data[IFLA_BR_MCAST_STARTUP_QUERY_INTVL]) {
u64 val = nla_get_u64(data[IFLA_BR_MCAST_STARTUP_QUERY_INTVL]);
- br->multicast_startup_query_interval = clock_t_to_jiffies(val);
+ br->multicast_ctx.multicast_startup_query_interval = clock_t_to_jiffies(val);
}
if (data[IFLA_BR_MCAST_STATS_ENABLED]) {
@@ -1566,7 +1566,8 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev)
return -EMSGSIZE;
#endif
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
- if (nla_put_u8(skb, IFLA_BR_MCAST_ROUTER, br->multicast_router) ||
+ if (nla_put_u8(skb, IFLA_BR_MCAST_ROUTER,
+ br->multicast_ctx.multicast_router) ||
nla_put_u8(skb, IFLA_BR_MCAST_SNOOPING,
br_opt_get(br, BROPT_MULTICAST_ENABLED)) ||
nla_put_u8(skb, IFLA_BR_MCAST_QUERY_USE_IFADDR,
@@ -1578,38 +1579,38 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev)
nla_put_u32(skb, IFLA_BR_MCAST_HASH_ELASTICITY, RHT_ELASTICITY) ||
nla_put_u32(skb, IFLA_BR_MCAST_HASH_MAX, br->hash_max) ||
nla_put_u32(skb, IFLA_BR_MCAST_LAST_MEMBER_CNT,
- br->multicast_last_member_count) ||
+ br->multicast_ctx.multicast_last_member_count) ||
nla_put_u32(skb, IFLA_BR_MCAST_STARTUP_QUERY_CNT,
- br->multicast_startup_query_count) ||
+ br->multicast_ctx.multicast_startup_query_count) ||
nla_put_u8(skb, IFLA_BR_MCAST_IGMP_VERSION,
- br->multicast_igmp_version))
+ br->multicast_ctx.multicast_igmp_version))
return -EMSGSIZE;
#if IS_ENABLED(CONFIG_IPV6)
if (nla_put_u8(skb, IFLA_BR_MCAST_MLD_VERSION,
- br->multicast_mld_version))
+ br->multicast_ctx.multicast_mld_version))
return -EMSGSIZE;
#endif
- clockval = jiffies_to_clock_t(br->multicast_last_member_interval);
+ clockval = jiffies_to_clock_t(br->multicast_ctx.multicast_last_member_interval);
if (nla_put_u64_64bit(skb, IFLA_BR_MCAST_LAST_MEMBER_INTVL, clockval,
IFLA_BR_PAD))
return -EMSGSIZE;
- clockval = jiffies_to_clock_t(br->multicast_membership_interval);
+ clockval = jiffies_to_clock_t(br->multicast_ctx.multicast_membership_interval);
if (nla_put_u64_64bit(skb, IFLA_BR_MCAST_MEMBERSHIP_INTVL, clockval,
IFLA_BR_PAD))
return -EMSGSIZE;
- clockval = jiffies_to_clock_t(br->multicast_querier_interval);
+ clockval = jiffies_to_clock_t(br->multicast_ctx.multicast_querier_interval);
if (nla_put_u64_64bit(skb, IFLA_BR_MCAST_QUERIER_INTVL, clockval,
IFLA_BR_PAD))
return -EMSGSIZE;
- clockval = jiffies_to_clock_t(br->multicast_query_interval);
+ clockval = jiffies_to_clock_t(br->multicast_ctx.multicast_query_interval);
if (nla_put_u64_64bit(skb, IFLA_BR_MCAST_QUERY_INTVL, clockval,
IFLA_BR_PAD))
return -EMSGSIZE;
- clockval = jiffies_to_clock_t(br->multicast_query_response_interval);
+ clockval = jiffies_to_clock_t(br->multicast_ctx.multicast_query_response_interval);
if (nla_put_u64_64bit(skb, IFLA_BR_MCAST_QUERY_RESPONSE_INTVL, clockval,
IFLA_BR_PAD))
return -EMSGSIZE;
- clockval = jiffies_to_clock_t(br->multicast_startup_query_interval);
+ clockval = jiffies_to_clock_t(br->multicast_ctx.multicast_startup_query_interval);
if (nla_put_u64_64bit(skb, IFLA_BR_MCAST_STARTUP_QUERY_INTVL, clockval,
IFLA_BR_PAD))
return -EMSGSIZE;
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 2b48b204205e..2f32d330b648 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -29,6 +29,8 @@
#define BR_MULTICAST_DEFAULT_HASH_MAX 4096
+#define BR_HWDOM_MAX BITS_PER_LONG
+
#define BR_VERSION "2.3"
/* Control of forwarding link local multicast */
@@ -89,6 +91,59 @@ struct bridge_mcast_stats {
};
#endif
+/* net_bridge_mcast_port must be always defined due to forwarding stubs */
+struct net_bridge_mcast_port {
+#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+ struct net_bridge_port *port;
+ struct net_bridge_vlan *vlan;
+
+ struct bridge_mcast_own_query ip4_own_query;
+ struct timer_list ip4_mc_router_timer;
+ struct hlist_node ip4_rlist;
+#if IS_ENABLED(CONFIG_IPV6)
+ struct bridge_mcast_own_query ip6_own_query;
+ struct timer_list ip6_mc_router_timer;
+ struct hlist_node ip6_rlist;
+#endif /* IS_ENABLED(CONFIG_IPV6) */
+ unsigned char multicast_router;
+#endif /* CONFIG_BRIDGE_IGMP_SNOOPING */
+};
+
+/* net_bridge_mcast must be always defined due to forwarding stubs */
+struct net_bridge_mcast {
+#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+ struct net_bridge *br;
+ struct net_bridge_vlan *vlan;
+
+ u32 multicast_last_member_count;
+ u32 multicast_startup_query_count;
+
+ u8 multicast_igmp_version;
+ u8 multicast_router;
+#if IS_ENABLED(CONFIG_IPV6)
+ u8 multicast_mld_version;
+#endif
+ unsigned long multicast_last_member_interval;
+ unsigned long multicast_membership_interval;
+ unsigned long multicast_querier_interval;
+ unsigned long multicast_query_interval;
+ unsigned long multicast_query_response_interval;
+ unsigned long multicast_startup_query_interval;
+ struct hlist_head ip4_mc_router_list;
+ struct timer_list ip4_mc_router_timer;
+ struct bridge_mcast_other_query ip4_other_query;
+ struct bridge_mcast_own_query ip4_own_query;
+ struct bridge_mcast_querier ip4_querier;
+#if IS_ENABLED(CONFIG_IPV6)
+ struct hlist_head ip6_mc_router_list;
+ struct timer_list ip6_mc_router_timer;
+ struct bridge_mcast_other_query ip6_other_query;
+ struct bridge_mcast_own_query ip6_own_query;
+ struct bridge_mcast_querier ip6_querier;
+#endif /* IS_ENABLED(CONFIG_IPV6) */
+#endif /* CONFIG_BRIDGE_IGMP_SNOOPING */
+};
+
struct br_tunnel_info {
__be64 tunnel_id;
struct metadata_dst __rcu *tunnel_dst;
@@ -98,6 +153,8 @@ struct br_tunnel_info {
enum {
BR_VLFLAG_PER_PORT_STATS = BIT(0),
BR_VLFLAG_ADDED_BY_SWITCHDEV = BIT(1),
+ BR_VLFLAG_MCAST_ENABLED = BIT(2),
+ BR_VLFLAG_GLOBAL_MCAST_ENABLED = BIT(3),
};
/**
@@ -114,6 +171,9 @@ enum {
* @refcnt: if MASTER flag set, this is bumped for each port referencing it
* @brvlan: if MASTER flag unset, this points to the global per-VLAN context
* for this VLAN entry
+ * @br_mcast_ctx: if MASTER flag set, this is the global vlan multicast context
+ * @port_mcast_ctx: if MASTER flag unset, this is the per-port/vlan multicast
+ * context
* @vlist: sorted list of VLAN entries
* @rcu: used for entry destruction
*
@@ -141,6 +201,11 @@ struct net_bridge_vlan {
struct br_tunnel_info tinfo;
+ union {
+ struct net_bridge_mcast br_mcast_ctx;
+ struct net_bridge_mcast_port port_mcast_ctx;
+ };
+
struct list_head vlist;
struct rcu_head rcu;
@@ -305,19 +370,13 @@ struct net_bridge_port {
struct kobject kobj;
struct rcu_head rcu;
+ struct net_bridge_mcast_port multicast_ctx;
+
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
- struct bridge_mcast_own_query ip4_own_query;
- struct timer_list ip4_mc_router_timer;
- struct hlist_node ip4_rlist;
-#if IS_ENABLED(CONFIG_IPV6)
- struct bridge_mcast_own_query ip6_own_query;
- struct timer_list ip6_mc_router_timer;
- struct hlist_node ip6_rlist;
-#endif /* IS_ENABLED(CONFIG_IPV6) */
+ struct bridge_mcast_stats __percpu *mcast_stats;
+
u32 multicast_eht_hosts_limit;
u32 multicast_eht_hosts_cnt;
- unsigned char multicast_router;
- struct bridge_mcast_stats __percpu *mcast_stats;
struct hlist_head mglist;
#endif
@@ -329,7 +388,12 @@ struct net_bridge_port {
struct netpoll *np;
#endif
#ifdef CONFIG_NET_SWITCHDEV
- int offload_fwd_mark;
+ /* Identifier used to group ports that share the same switchdev
+ * hardware domain.
+ */
+ int hwdom;
+ int offload_count;
+ struct netdev_phys_item_id ppid;
#endif
u16 group_fwd_mask;
u16 backup_redirected_cnt;
@@ -376,6 +440,7 @@ enum net_bridge_opts {
BROPT_VLAN_STATS_PER_PORT,
BROPT_NO_LL_LEARN,
BROPT_VLAN_BRIDGE_BINDING,
+ BROPT_MCAST_VLAN_SNOOPING_ENABLED,
};
struct net_bridge {
@@ -426,25 +491,14 @@ struct net_bridge {
BR_USER_STP, /* new RSTP in userspace */
} stp_enabled;
+ struct net_bridge_mcast multicast_ctx;
+
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+ struct bridge_mcast_stats __percpu *mcast_stats;
u32 hash_max;
- u32 multicast_last_member_count;
- u32 multicast_startup_query_count;
-
- u8 multicast_igmp_version;
- u8 multicast_router;
-#if IS_ENABLED(CONFIG_IPV6)
- u8 multicast_mld_version;
-#endif
spinlock_t multicast_lock;
- unsigned long multicast_last_member_interval;
- unsigned long multicast_membership_interval;
- unsigned long multicast_querier_interval;
- unsigned long multicast_query_interval;
- unsigned long multicast_query_response_interval;
- unsigned long multicast_startup_query_interval;
struct rhashtable mdb_hash_tbl;
struct rhashtable sg_port_tbl;
@@ -452,19 +506,6 @@ struct net_bridge {
struct hlist_head mcast_gc_list;
struct hlist_head mdb_list;
- struct hlist_head ip4_mc_router_list;
- struct timer_list ip4_mc_router_timer;
- struct bridge_mcast_other_query ip4_other_query;
- struct bridge_mcast_own_query ip4_own_query;
- struct bridge_mcast_querier ip4_querier;
- struct bridge_mcast_stats __percpu *mcast_stats;
-#if IS_ENABLED(CONFIG_IPV6)
- struct hlist_head ip6_mc_router_list;
- struct timer_list ip6_mc_router_timer;
- struct bridge_mcast_other_query ip6_other_query;
- struct bridge_mcast_own_query ip6_own_query;
- struct bridge_mcast_querier ip6_querier;
-#endif /* IS_ENABLED(CONFIG_IPV6) */
struct work_struct mcast_gc_work;
#endif
@@ -476,7 +517,12 @@ struct net_bridge {
u32 auto_cnt;
#ifdef CONFIG_NET_SWITCHDEV
- int offload_fwd_mark;
+ /* Counter used to make sure that hardware domains get unique
+ * identifiers in case a bridge spans multiple switchdev instances.
+ */
+ int last_hwdom;
+ /* Bit mask of hardware domain numbers in use */
+ unsigned long busy_hwdoms;
#endif
struct hlist_head fdb_list;
@@ -506,7 +552,12 @@ struct br_input_skb_cb {
#endif
#ifdef CONFIG_NET_SWITCHDEV
- int offload_fwd_mark;
+ /* The switchdev hardware domain from which this packet was received.
+ * If skb->offload_fwd_mark was set, then this packet was already
+ * forwarded by hardware to the other ports in the source hardware
+ * domain, otherwise it wasn't.
+ */
+ int src_hwdom;
#endif
};
@@ -718,6 +769,8 @@ int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p,
bool swdev_notify);
void br_fdb_offloaded_set(struct net_bridge *br, struct net_bridge_port *p,
const unsigned char *addr, u16 vid, bool offloaded);
+int br_fdb_replay(const struct net_device *br_dev, const struct net_device *dev,
+ const void *ctx, bool adding, struct notifier_block *nb);
/* br_forward.c */
enum br_pkt_type {
@@ -796,9 +849,11 @@ int br_ioctl_deviceless_stub(struct net *net, unsigned int cmd,
/* br_multicast.c */
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
-int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port,
+int br_multicast_rcv(struct net_bridge_mcast **brmctx,
+ struct net_bridge_mcast_port **pmctx,
+ struct net_bridge_vlan *vlan,
struct sk_buff *skb, u16 vid);
-struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
+struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge_mcast *brmctx,
struct sk_buff *skb, u16 vid);
int br_multicast_add_port(struct net_bridge_port *port);
void br_multicast_del_port(struct net_bridge_port *port);
@@ -810,8 +865,9 @@ void br_multicast_leave_snoopers(struct net_bridge *br);
void br_multicast_open(struct net_bridge *br);
void br_multicast_stop(struct net_bridge *br);
void br_multicast_dev_del(struct net_bridge *br);
-void br_multicast_flood(struct net_bridge_mdb_entry *mdst,
- struct sk_buff *skb, bool local_rcv, bool local_orig);
+void br_multicast_flood(struct net_bridge_mdb_entry *mdst, struct sk_buff *skb,
+ struct net_bridge_mcast *brmctx,
+ bool local_rcv, bool local_orig);
int br_multicast_set_router(struct net_bridge *br, unsigned long val);
int br_multicast_set_port_router(struct net_bridge_port *p, unsigned long val);
int br_multicast_toggle(struct net_bridge *br, unsigned long val,
@@ -835,12 +891,13 @@ int br_mdb_hash_init(struct net_bridge *br);
void br_mdb_hash_fini(struct net_bridge *br);
void br_mdb_notify(struct net_device *dev, struct net_bridge_mdb_entry *mp,
struct net_bridge_port_group *pg, int type);
-void br_rtr_notify(struct net_device *dev, struct net_bridge_port *port,
+void br_rtr_notify(struct net_device *dev, struct net_bridge_mcast_port *pmctx,
int type);
void br_multicast_del_pg(struct net_bridge_mdb_entry *mp,
struct net_bridge_port_group *pg,
struct net_bridge_port_group __rcu **pp);
-void br_multicast_count(struct net_bridge *br, const struct net_bridge_port *p,
+void br_multicast_count(struct net_bridge *br,
+ const struct net_bridge_port *p,
const struct sk_buff *skb, u8 type, u8 dir);
int br_multicast_init_stats(struct net_bridge *br);
void br_multicast_uninit_stats(struct net_bridge *br);
@@ -849,7 +906,8 @@ void br_multicast_get_stats(const struct net_bridge *br,
struct br_mcast_stats *dest);
void br_mdb_init(void);
void br_mdb_uninit(void);
-void br_multicast_host_join(struct net_bridge_mdb_entry *mp, bool notify);
+void br_multicast_host_join(const struct net_bridge_mcast *brmctx,
+ struct net_bridge_mdb_entry *mp, bool notify);
void br_multicast_host_leave(struct net_bridge_mdb_entry *mp, bool notify);
void br_multicast_star_g_handle_mode(struct net_bridge_port_group *pg,
u8 filter_mode);
@@ -859,6 +917,23 @@ struct net_bridge_group_src *
br_multicast_find_group_src(struct net_bridge_port_group *pg, struct br_ip *ip);
void br_multicast_del_group_src(struct net_bridge_group_src *src,
bool fastleave);
+void br_multicast_ctx_init(struct net_bridge *br,
+ struct net_bridge_vlan *vlan,
+ struct net_bridge_mcast *brmctx);
+void br_multicast_ctx_deinit(struct net_bridge_mcast *brmctx);
+void br_multicast_port_ctx_init(struct net_bridge_port *port,
+ struct net_bridge_vlan *vlan,
+ struct net_bridge_mcast_port *pmctx);
+void br_multicast_port_ctx_deinit(struct net_bridge_mcast_port *pmctx);
+void br_multicast_toggle_one_vlan(struct net_bridge_vlan *vlan, bool on);
+void br_multicast_toggle_vlan(struct net_bridge_vlan *vlan, bool on);
+int br_multicast_toggle_vlan_snooping(struct net_bridge *br, bool on,
+ struct netlink_ext_ack *extack);
+bool br_multicast_toggle_global_vlan(struct net_bridge_vlan *vlan, bool on);
+
+int br_mdb_replay(struct net_device *br_dev, struct net_device *dev,
+ const void *ctx, bool adding, struct notifier_block *nb,
+ struct netlink_ext_ack *extack);
static inline bool br_group_is_l2(const struct br_ip *group)
{
@@ -869,52 +944,65 @@ static inline bool br_group_is_l2(const struct br_ip *group)
rcu_dereference_protected(X, lockdep_is_held(&br->multicast_lock))
static inline struct hlist_node *
-br_multicast_get_first_rport_node(struct net_bridge *b, struct sk_buff *skb) {
+br_multicast_get_first_rport_node(struct net_bridge_mcast *brmctx,
+ struct sk_buff *skb)
+{
#if IS_ENABLED(CONFIG_IPV6)
if (skb->protocol == htons(ETH_P_IPV6))
- return rcu_dereference(hlist_first_rcu(&b->ip6_mc_router_list));
+ return rcu_dereference(hlist_first_rcu(&brmctx->ip6_mc_router_list));
#endif
- return rcu_dereference(hlist_first_rcu(&b->ip4_mc_router_list));
+ return rcu_dereference(hlist_first_rcu(&brmctx->ip4_mc_router_list));
}
static inline struct net_bridge_port *
-br_multicast_rport_from_node_skb(struct hlist_node *rp, struct sk_buff *skb) {
+br_multicast_rport_from_node_skb(struct hlist_node *rp, struct sk_buff *skb)
+{
+ struct net_bridge_mcast_port *mctx;
+
#if IS_ENABLED(CONFIG_IPV6)
if (skb->protocol == htons(ETH_P_IPV6))
- return hlist_entry_safe(rp, struct net_bridge_port, ip6_rlist);
+ mctx = hlist_entry_safe(rp, struct net_bridge_mcast_port,
+ ip6_rlist);
+ else
#endif
- return hlist_entry_safe(rp, struct net_bridge_port, ip4_rlist);
+ mctx = hlist_entry_safe(rp, struct net_bridge_mcast_port,
+ ip4_rlist);
+
+ if (mctx)
+ return mctx->port;
+ else
+ return NULL;
}
-static inline bool br_ip4_multicast_is_router(struct net_bridge *br)
+static inline bool br_ip4_multicast_is_router(struct net_bridge_mcast *brmctx)
{
- return timer_pending(&br->ip4_mc_router_timer);
+ return timer_pending(&brmctx->ip4_mc_router_timer);
}
-static inline bool br_ip6_multicast_is_router(struct net_bridge *br)
+static inline bool br_ip6_multicast_is_router(struct net_bridge_mcast *brmctx)
{
#if IS_ENABLED(CONFIG_IPV6)
- return timer_pending(&br->ip6_mc_router_timer);
+ return timer_pending(&brmctx->ip6_mc_router_timer);
#else
return false;
#endif
}
static inline bool
-br_multicast_is_router(struct net_bridge *br, struct sk_buff *skb)
+br_multicast_is_router(struct net_bridge_mcast *brmctx, struct sk_buff *skb)
{
- switch (br->multicast_router) {
+ switch (brmctx->multicast_router) {
case MDB_RTR_TYPE_PERM:
return true;
case MDB_RTR_TYPE_TEMP_QUERY:
if (skb) {
if (skb->protocol == htons(ETH_P_IP))
- return br_ip4_multicast_is_router(br);
+ return br_ip4_multicast_is_router(brmctx);
else if (skb->protocol == htons(ETH_P_IPV6))
- return br_ip6_multicast_is_router(br);
+ return br_ip6_multicast_is_router(brmctx);
} else {
- return br_ip4_multicast_is_router(br) ||
- br_ip6_multicast_is_router(br);
+ return br_ip4_multicast_is_router(brmctx) ||
+ br_ip6_multicast_is_router(brmctx);
}
fallthrough;
default:
@@ -923,14 +1011,14 @@ br_multicast_is_router(struct net_bridge *br, struct sk_buff *skb)
}
static inline bool
-__br_multicast_querier_exists(struct net_bridge *br,
- struct bridge_mcast_other_query *querier,
- const bool is_ipv6)
+__br_multicast_querier_exists(struct net_bridge_mcast *brmctx,
+ struct bridge_mcast_other_query *querier,
+ const bool is_ipv6)
{
bool own_querier_enabled;
- if (br_opt_get(br, BROPT_MULTICAST_QUERIER)) {
- if (is_ipv6 && !br_opt_get(br, BROPT_HAS_IPV6_ADDR))
+ if (br_opt_get(brmctx->br, BROPT_MULTICAST_QUERIER)) {
+ if (is_ipv6 && !br_opt_get(brmctx->br, BROPT_HAS_IPV6_ADDR))
own_querier_enabled = false;
else
own_querier_enabled = true;
@@ -942,18 +1030,18 @@ __br_multicast_querier_exists(struct net_bridge *br,
(own_querier_enabled || timer_pending(&querier->timer));
}
-static inline bool br_multicast_querier_exists(struct net_bridge *br,
+static inline bool br_multicast_querier_exists(struct net_bridge_mcast *brmctx,
struct ethhdr *eth,
const struct net_bridge_mdb_entry *mdb)
{
switch (eth->h_proto) {
case (htons(ETH_P_IP)):
- return __br_multicast_querier_exists(br,
- &br->ip4_other_query, false);
+ return __br_multicast_querier_exists(brmctx,
+ &brmctx->ip4_other_query, false);
#if IS_ENABLED(CONFIG_IPV6)
case (htons(ETH_P_IPV6)):
- return __br_multicast_querier_exists(br,
- &br->ip6_other_query, true);
+ return __br_multicast_querier_exists(brmctx,
+ &brmctx->ip6_other_query, true);
#endif
default:
return !!mdb && br_group_is_l2(&mdb->addr);
@@ -974,15 +1062,16 @@ static inline bool br_multicast_is_star_g(const struct br_ip *ip)
}
}
-static inline bool br_multicast_should_handle_mode(const struct net_bridge *br,
- __be16 proto)
+static inline bool
+br_multicast_should_handle_mode(const struct net_bridge_mcast *brmctx,
+ __be16 proto)
{
switch (proto) {
case htons(ETH_P_IP):
- return !!(br->multicast_igmp_version == 3);
+ return !!(brmctx->multicast_igmp_version == 3);
#if IS_ENABLED(CONFIG_IPV6)
case htons(ETH_P_IPV6):
- return !!(br->multicast_mld_version == 2);
+ return !!(brmctx->multicast_mld_version == 2);
#endif
default:
return false;
@@ -994,28 +1083,90 @@ static inline int br_multicast_igmp_type(const struct sk_buff *skb)
return BR_INPUT_SKB_CB(skb)->igmp;
}
-static inline unsigned long br_multicast_lmqt(const struct net_bridge *br)
+static inline unsigned long br_multicast_lmqt(const struct net_bridge_mcast *brmctx)
{
- return br->multicast_last_member_interval *
- br->multicast_last_member_count;
+ return brmctx->multicast_last_member_interval *
+ brmctx->multicast_last_member_count;
}
-static inline unsigned long br_multicast_gmi(const struct net_bridge *br)
+static inline unsigned long br_multicast_gmi(const struct net_bridge_mcast *brmctx)
{
/* use the RFC default of 2 for QRV */
- return 2 * br->multicast_query_interval +
- br->multicast_query_response_interval;
+ return 2 * brmctx->multicast_query_interval +
+ brmctx->multicast_query_response_interval;
+}
+
+static inline bool
+br_multicast_ctx_is_vlan(const struct net_bridge_mcast *brmctx)
+{
+ return !!brmctx->vlan;
+}
+
+static inline bool
+br_multicast_port_ctx_is_vlan(const struct net_bridge_mcast_port *pmctx)
+{
+ return !!pmctx->vlan;
+}
+
+static inline struct net_bridge_mcast *
+br_multicast_port_ctx_get_global(const struct net_bridge_mcast_port *pmctx)
+{
+ if (!br_multicast_port_ctx_is_vlan(pmctx))
+ return &pmctx->port->br->multicast_ctx;
+ else
+ return &pmctx->vlan->brvlan->br_mcast_ctx;
+}
+
+static inline bool
+br_multicast_ctx_vlan_global_disabled(const struct net_bridge_mcast *brmctx)
+{
+ return br_opt_get(brmctx->br, BROPT_MCAST_VLAN_SNOOPING_ENABLED) &&
+ br_multicast_ctx_is_vlan(brmctx) &&
+ !(brmctx->vlan->priv_flags & BR_VLFLAG_GLOBAL_MCAST_ENABLED);
+}
+
+static inline bool
+br_multicast_ctx_vlan_disabled(const struct net_bridge_mcast *brmctx)
+{
+ return br_multicast_ctx_is_vlan(brmctx) &&
+ !(brmctx->vlan->priv_flags & BR_VLFLAG_MCAST_ENABLED);
+}
+
+static inline bool
+br_multicast_port_ctx_vlan_disabled(const struct net_bridge_mcast_port *pmctx)
+{
+ return br_multicast_port_ctx_is_vlan(pmctx) &&
+ !(pmctx->vlan->priv_flags & BR_VLFLAG_MCAST_ENABLED);
+}
+
+static inline bool
+br_multicast_port_ctx_state_disabled(const struct net_bridge_mcast_port *pmctx)
+{
+ return pmctx->port->state == BR_STATE_DISABLED ||
+ (br_multicast_port_ctx_is_vlan(pmctx) &&
+ (br_multicast_port_ctx_vlan_disabled(pmctx) ||
+ pmctx->vlan->state == BR_STATE_DISABLED));
+}
+
+static inline bool
+br_multicast_port_ctx_state_stopped(const struct net_bridge_mcast_port *pmctx)
+{
+ return br_multicast_port_ctx_state_disabled(pmctx) ||
+ pmctx->port->state == BR_STATE_BLOCKING ||
+ (br_multicast_port_ctx_is_vlan(pmctx) &&
+ pmctx->vlan->state == BR_STATE_BLOCKING);
}
#else
-static inline int br_multicast_rcv(struct net_bridge *br,
- struct net_bridge_port *port,
+static inline int br_multicast_rcv(struct net_bridge_mcast **brmctx,
+ struct net_bridge_mcast_port **pmctx,
+ struct net_bridge_vlan *vlan,
struct sk_buff *skb,
u16 vid)
{
return 0;
}
-static inline struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
+static inline struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge_mcast *brmctx,
struct sk_buff *skb, u16 vid)
{
return NULL;
@@ -1064,17 +1215,18 @@ static inline void br_multicast_dev_del(struct net_bridge *br)
static inline void br_multicast_flood(struct net_bridge_mdb_entry *mdst,
struct sk_buff *skb,
+ struct net_bridge_mcast *brmctx,
bool local_rcv, bool local_orig)
{
}
-static inline bool br_multicast_is_router(struct net_bridge *br,
+static inline bool br_multicast_is_router(struct net_bridge_mcast *brmctx,
struct sk_buff *skb)
{
return false;
}
-static inline bool br_multicast_querier_exists(struct net_bridge *br,
+static inline bool br_multicast_querier_exists(struct net_bridge_mcast *brmctx,
struct ethhdr *eth,
const struct net_bridge_mdb_entry *mdb)
{
@@ -1118,13 +1270,65 @@ static inline int br_multicast_igmp_type(const struct sk_buff *skb)
{
return 0;
}
+
+static inline void br_multicast_ctx_init(struct net_bridge *br,
+ struct net_bridge_vlan *vlan,
+ struct net_bridge_mcast *brmctx)
+{
+}
+
+static inline void br_multicast_ctx_deinit(struct net_bridge_mcast *brmctx)
+{
+}
+
+static inline void br_multicast_port_ctx_init(struct net_bridge_port *port,
+ struct net_bridge_vlan *vlan,
+ struct net_bridge_mcast_port *pmctx)
+{
+}
+
+static inline void br_multicast_port_ctx_deinit(struct net_bridge_mcast_port *pmctx)
+{
+}
+
+static inline void br_multicast_toggle_one_vlan(struct net_bridge_vlan *vlan,
+ bool on)
+{
+}
+
+static inline void br_multicast_toggle_vlan(struct net_bridge_vlan *vlan,
+ bool on)
+{
+}
+
+static inline int br_multicast_toggle_vlan_snooping(struct net_bridge *br,
+ bool on,
+ struct netlink_ext_ack *extack)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline bool br_multicast_toggle_global_vlan(struct net_bridge_vlan *vlan,
+ bool on)
+{
+ return false;
+}
+
+static inline int br_mdb_replay(struct net_device *br_dev,
+ struct net_device *dev, const void *ctx,
+ bool adding, struct notifier_block *nb,
+ struct netlink_ext_ack *extack)
+{
+ return -EOPNOTSUPP;
+}
#endif
/* br_vlan.c */
#ifdef CONFIG_BRIDGE_VLAN_FILTERING
bool br_allowed_ingress(const struct net_bridge *br,
struct net_bridge_vlan_group *vg, struct sk_buff *skb,
- u16 *vid, u8 *state);
+ u16 *vid, u8 *state,
+ struct net_bridge_vlan **vlan);
bool br_allowed_egress(struct net_bridge_vlan_group *vg,
const struct sk_buff *skb);
bool br_should_learn(struct net_bridge_port *p, struct sk_buff *skb, u16 *vid);
@@ -1168,6 +1372,9 @@ void br_vlan_notify(const struct net_bridge *br,
const struct net_bridge_port *p,
u16 vid, u16 vid_range,
int cmd);
+int br_vlan_replay(struct net_device *br_dev, struct net_device *dev,
+ const void *ctx, bool adding, struct notifier_block *nb,
+ struct netlink_ext_ack *extack);
bool br_vlan_can_enter_range(const struct net_bridge_vlan *v_curr,
const struct net_bridge_vlan *range_end);
@@ -1236,8 +1443,11 @@ static inline u16 br_vlan_flags(const struct net_bridge_vlan *v, u16 pvid)
static inline bool br_allowed_ingress(const struct net_bridge *br,
struct net_bridge_vlan_group *vg,
struct sk_buff *skb,
- u16 *vid, u8 *state)
+ u16 *vid, u8 *state,
+ struct net_bridge_vlan **vlan)
+
{
+ *vlan = NULL;
return true;
}
@@ -1410,6 +1620,14 @@ static inline bool br_vlan_can_enter_range(const struct net_bridge_vlan *v_curr,
{
return true;
}
+
+static inline int br_vlan_replay(struct net_device *br_dev,
+ struct net_device *dev, const void *ctx,
+ bool adding, struct notifier_block *nb,
+ struct netlink_ext_ack *extack)
+{
+ return -EOPNOTSUPP;
+}
#endif
/* br_vlan_options.c */
@@ -1424,6 +1642,14 @@ int br_vlan_process_options(const struct net_bridge *br,
struct net_bridge_vlan *range_end,
struct nlattr **tb,
struct netlink_ext_ack *extack);
+int br_vlan_rtm_process_global_options(struct net_device *dev,
+ const struct nlattr *attr,
+ int cmd,
+ struct netlink_ext_ack *extack);
+bool br_vlan_global_opts_can_enter_range(const struct net_bridge_vlan *v_curr,
+ const struct net_bridge_vlan *r_end);
+bool br_vlan_global_opts_fill(struct sk_buff *skb, u16 vid, u16 vid_range,
+ const struct net_bridge_vlan *v_opts);
/* vlan state manipulation helpers using *_ONCE to annotate lock-free access */
static inline u8 br_vlan_get_state(const struct net_bridge_vlan *v)
@@ -1645,7 +1871,6 @@ static inline void br_sysfs_delbr(struct net_device *dev) { return; }
/* br_switchdev.c */
#ifdef CONFIG_NET_SWITCHDEV
-int nbp_switchdev_mark_set(struct net_bridge_port *p);
void nbp_switchdev_frame_mark(const struct net_bridge_port *p,
struct sk_buff *skb);
bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p,
@@ -1659,17 +1884,13 @@ void br_switchdev_fdb_notify(struct net_bridge *br,
int br_switchdev_port_vlan_add(struct net_device *dev, u16 vid, u16 flags,
struct netlink_ext_ack *extack);
int br_switchdev_port_vlan_del(struct net_device *dev, u16 vid);
+void br_switchdev_init(struct net_bridge *br);
static inline void br_switchdev_frame_unmark(struct sk_buff *skb)
{
skb->offload_fwd_mark = 0;
}
#else
-static inline int nbp_switchdev_mark_set(struct net_bridge_port *p)
-{
- return 0;
-}
-
static inline void nbp_switchdev_frame_mark(const struct net_bridge_port *p,
struct sk_buff *skb)
{
@@ -1710,6 +1931,11 @@ br_switchdev_fdb_notify(struct net_bridge *br,
static inline void br_switchdev_frame_unmark(struct sk_buff *skb)
{
}
+
+static inline void br_switchdev_init(struct net_bridge *br)
+{
+}
+
#endif /* CONFIG_NET_SWITCHDEV */
/* br_arp_nd_proxy.c */
diff --git a/net/bridge/br_private_mcast_eht.h b/net/bridge/br_private_mcast_eht.h
index f89049f4892c..adf82a05515a 100644
--- a/net/bridge/br_private_mcast_eht.h
+++ b/net/bridge/br_private_mcast_eht.h
@@ -51,7 +51,8 @@ struct net_bridge_group_eht_set {
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
void br_multicast_eht_clean_sets(struct net_bridge_port_group *pg);
-bool br_multicast_eht_handle(struct net_bridge_port_group *pg,
+bool br_multicast_eht_handle(const struct net_bridge_mcast *brmctx,
+ struct net_bridge_port_group *pg,
void *h_addr,
void *srcs,
u32 nsrcs,
diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
index d3adee0f91f9..6bfff28ede23 100644
--- a/net/bridge/br_switchdev.c
+++ b/net/bridge/br_switchdev.c
@@ -8,50 +8,18 @@
#include "br_private.h"
-static int br_switchdev_mark_get(struct net_bridge *br, struct net_device *dev)
-{
- struct net_bridge_port *p;
-
- /* dev is yet to be added to the port list. */
- list_for_each_entry(p, &br->port_list, list) {
- if (netdev_port_same_parent_id(dev, p->dev))
- return p->offload_fwd_mark;
- }
-
- return ++br->offload_fwd_mark;
-}
-
-int nbp_switchdev_mark_set(struct net_bridge_port *p)
-{
- struct netdev_phys_item_id ppid = { };
- int err;
-
- ASSERT_RTNL();
-
- err = dev_get_port_parent_id(p->dev, &ppid, true);
- if (err) {
- if (err == -EOPNOTSUPP)
- return 0;
- return err;
- }
-
- p->offload_fwd_mark = br_switchdev_mark_get(p->br, p->dev);
-
- return 0;
-}
-
void nbp_switchdev_frame_mark(const struct net_bridge_port *p,
struct sk_buff *skb)
{
- if (skb->offload_fwd_mark && !WARN_ON_ONCE(!p->offload_fwd_mark))
- BR_INPUT_SKB_CB(skb)->offload_fwd_mark = p->offload_fwd_mark;
+ if (p->hwdom)
+ BR_INPUT_SKB_CB(skb)->src_hwdom = p->hwdom;
}
bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p,
const struct sk_buff *skb)
{
return !skb->offload_fwd_mark ||
- BR_INPUT_SKB_CB(skb)->offload_fwd_mark != p->offload_fwd_mark;
+ BR_INPUT_SKB_CB(skb)->src_hwdom != p->hwdom;
}
/* Flags that can be offloaded to hardware */
@@ -156,3 +124,192 @@ int br_switchdev_port_vlan_del(struct net_device *dev, u16 vid)
return switchdev_port_obj_del(dev, &v.obj);
}
+
+static int nbp_switchdev_hwdom_set(struct net_bridge_port *joining)
+{
+ struct net_bridge *br = joining->br;
+ struct net_bridge_port *p;
+ int hwdom;
+
+ /* joining is yet to be added to the port list. */
+ list_for_each_entry(p, &br->port_list, list) {
+ if (netdev_phys_item_id_same(&joining->ppid, &p->ppid)) {
+ joining->hwdom = p->hwdom;
+ return 0;
+ }
+ }
+
+ hwdom = find_next_zero_bit(&br->busy_hwdoms, BR_HWDOM_MAX, 1);
+ if (hwdom >= BR_HWDOM_MAX)
+ return -EBUSY;
+
+ set_bit(hwdom, &br->busy_hwdoms);
+ joining->hwdom = hwdom;
+ return 0;
+}
+
+static void nbp_switchdev_hwdom_put(struct net_bridge_port *leaving)
+{
+ struct net_bridge *br = leaving->br;
+ struct net_bridge_port *p;
+
+ /* leaving is no longer in the port list. */
+ list_for_each_entry(p, &br->port_list, list) {
+ if (p->hwdom == leaving->hwdom)
+ return;
+ }
+
+ clear_bit(leaving->hwdom, &br->busy_hwdoms);
+}
+
+static int nbp_switchdev_add(struct net_bridge_port *p,
+ struct netdev_phys_item_id ppid,
+ struct netlink_ext_ack *extack)
+{
+ if (p->offload_count) {
+ /* Prevent unsupported configurations such as a bridge port
+ * which is a bonding interface, and the member ports are from
+ * different hardware switches.
+ */
+ if (!netdev_phys_item_id_same(&p->ppid, &ppid)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Same bridge port cannot be offloaded by two physical switches");
+ return -EBUSY;
+ }
+
+ /* Tolerate drivers that call switchdev_bridge_port_offload()
+ * more than once for the same bridge port, such as when the
+ * bridge port is an offloaded bonding/team interface.
+ */
+ p->offload_count++;
+
+ return 0;
+ }
+
+ p->ppid = ppid;
+ p->offload_count = 1;
+
+ return nbp_switchdev_hwdom_set(p);
+}
+
+static void nbp_switchdev_del(struct net_bridge_port *p)
+{
+ if (WARN_ON(!p->offload_count))
+ return;
+
+ p->offload_count--;
+
+ if (p->offload_count)
+ return;
+
+ if (p->hwdom)
+ nbp_switchdev_hwdom_put(p);
+}
+
+static int nbp_switchdev_sync_objs(struct net_bridge_port *p, const void *ctx,
+ struct notifier_block *atomic_nb,
+ struct notifier_block *blocking_nb,
+ struct netlink_ext_ack *extack)
+{
+ struct net_device *br_dev = p->br->dev;
+ struct net_device *dev = p->dev;
+ int err;
+
+ err = br_vlan_replay(br_dev, dev, ctx, true, blocking_nb, extack);
+ if (err && err != -EOPNOTSUPP)
+ return err;
+
+ err = br_mdb_replay(br_dev, dev, ctx, true, blocking_nb, extack);
+ if (err && err != -EOPNOTSUPP)
+ return err;
+
+ /* Forwarding and termination FDB entries on the port */
+ err = br_fdb_replay(br_dev, dev, ctx, true, atomic_nb);
+ if (err && err != -EOPNOTSUPP)
+ return err;
+
+ /* Termination FDB entries on the bridge itself */
+ err = br_fdb_replay(br_dev, br_dev, ctx, true, atomic_nb);
+ if (err && err != -EOPNOTSUPP)
+ return err;
+
+ return 0;
+}
+
+static void nbp_switchdev_unsync_objs(struct net_bridge_port *p,
+ const void *ctx,
+ struct notifier_block *atomic_nb,
+ struct notifier_block *blocking_nb)
+{
+ struct net_device *br_dev = p->br->dev;
+ struct net_device *dev = p->dev;
+
+ br_vlan_replay(br_dev, dev, ctx, false, blocking_nb, NULL);
+
+ br_mdb_replay(br_dev, dev, ctx, false, blocking_nb, NULL);
+
+ /* Forwarding and termination FDB entries on the port */
+ br_fdb_replay(br_dev, dev, ctx, false, atomic_nb);
+
+ /* Termination FDB entries on the bridge itself */
+ br_fdb_replay(br_dev, br_dev, ctx, false, atomic_nb);
+}
+
+/* Let the bridge know that this port is offloaded, so that it can assign a
+ * switchdev hardware domain to it.
+ */
+int switchdev_bridge_port_offload(struct net_device *brport_dev,
+ struct net_device *dev, const void *ctx,
+ struct notifier_block *atomic_nb,
+ struct notifier_block *blocking_nb,
+ struct netlink_ext_ack *extack)
+{
+ struct netdev_phys_item_id ppid;
+ struct net_bridge_port *p;
+ int err;
+
+ ASSERT_RTNL();
+
+ p = br_port_get_rtnl(brport_dev);
+ if (!p)
+ return -ENODEV;
+
+ err = dev_get_port_parent_id(dev, &ppid, false);
+ if (err)
+ return err;
+
+ err = nbp_switchdev_add(p, ppid, extack);
+ if (err)
+ return err;
+
+ err = nbp_switchdev_sync_objs(p, ctx, atomic_nb, blocking_nb, extack);
+ if (err)
+ goto out_switchdev_del;
+
+ return 0;
+
+out_switchdev_del:
+ nbp_switchdev_del(p);
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(switchdev_bridge_port_offload);
+
+void switchdev_bridge_port_unoffload(struct net_device *brport_dev,
+ const void *ctx,
+ struct notifier_block *atomic_nb,
+ struct notifier_block *blocking_nb)
+{
+ struct net_bridge_port *p;
+
+ ASSERT_RTNL();
+
+ p = br_port_get_rtnl(brport_dev);
+ if (!p)
+ return;
+
+ nbp_switchdev_unsync_objs(p, ctx, atomic_nb, blocking_nb);
+
+ nbp_switchdev_del(p);
+}
+EXPORT_SYMBOL_GPL(switchdev_bridge_port_unoffload);
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index 381467b691d5..953d544663d5 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -384,7 +384,7 @@ static ssize_t multicast_router_show(struct device *d,
struct device_attribute *attr, char *buf)
{
struct net_bridge *br = to_bridge(d);
- return sprintf(buf, "%d\n", br->multicast_router);
+ return sprintf(buf, "%d\n", br->multicast_ctx.multicast_router);
}
static int set_multicast_router(struct net_bridge *br, unsigned long val,
@@ -514,7 +514,7 @@ static ssize_t multicast_igmp_version_show(struct device *d,
{
struct net_bridge *br = to_bridge(d);
- return sprintf(buf, "%u\n", br->multicast_igmp_version);
+ return sprintf(buf, "%u\n", br->multicast_ctx.multicast_igmp_version);
}
static int set_multicast_igmp_version(struct net_bridge *br, unsigned long val,
@@ -536,13 +536,13 @@ static ssize_t multicast_last_member_count_show(struct device *d,
char *buf)
{
struct net_bridge *br = to_bridge(d);
- return sprintf(buf, "%u\n", br->multicast_last_member_count);
+ return sprintf(buf, "%u\n", br->multicast_ctx.multicast_last_member_count);
}
static int set_last_member_count(struct net_bridge *br, unsigned long val,
struct netlink_ext_ack *extack)
{
- br->multicast_last_member_count = val;
+ br->multicast_ctx.multicast_last_member_count = val;
return 0;
}
@@ -558,13 +558,13 @@ static ssize_t multicast_startup_query_count_show(
struct device *d, struct device_attribute *attr, char *buf)
{
struct net_bridge *br = to_bridge(d);
- return sprintf(buf, "%u\n", br->multicast_startup_query_count);
+ return sprintf(buf, "%u\n", br->multicast_ctx.multicast_startup_query_count);
}
static int set_startup_query_count(struct net_bridge *br, unsigned long val,
struct netlink_ext_ack *extack)
{
- br->multicast_startup_query_count = val;
+ br->multicast_ctx.multicast_startup_query_count = val;
return 0;
}
@@ -581,13 +581,13 @@ static ssize_t multicast_last_member_interval_show(
{
struct net_bridge *br = to_bridge(d);
return sprintf(buf, "%lu\n",
- jiffies_to_clock_t(br->multicast_last_member_interval));
+ jiffies_to_clock_t(br->multicast_ctx.multicast_last_member_interval));
}
static int set_last_member_interval(struct net_bridge *br, unsigned long val,
struct netlink_ext_ack *extack)
{
- br->multicast_last_member_interval = clock_t_to_jiffies(val);
+ br->multicast_ctx.multicast_last_member_interval = clock_t_to_jiffies(val);
return 0;
}
@@ -604,13 +604,13 @@ static ssize_t multicast_membership_interval_show(
{
struct net_bridge *br = to_bridge(d);
return sprintf(buf, "%lu\n",
- jiffies_to_clock_t(br->multicast_membership_interval));
+ jiffies_to_clock_t(br->multicast_ctx.multicast_membership_interval));
}
static int set_membership_interval(struct net_bridge *br, unsigned long val,
struct netlink_ext_ack *extack)
{
- br->multicast_membership_interval = clock_t_to_jiffies(val);
+ br->multicast_ctx.multicast_membership_interval = clock_t_to_jiffies(val);
return 0;
}
@@ -628,13 +628,13 @@ static ssize_t multicast_querier_interval_show(struct device *d,
{
struct net_bridge *br = to_bridge(d);
return sprintf(buf, "%lu\n",
- jiffies_to_clock_t(br->multicast_querier_interval));
+ jiffies_to_clock_t(br->multicast_ctx.multicast_querier_interval));
}
static int set_querier_interval(struct net_bridge *br, unsigned long val,
struct netlink_ext_ack *extack)
{
- br->multicast_querier_interval = clock_t_to_jiffies(val);
+ br->multicast_ctx.multicast_querier_interval = clock_t_to_jiffies(val);
return 0;
}
@@ -652,13 +652,13 @@ static ssize_t multicast_query_interval_show(struct device *d,
{
struct net_bridge *br = to_bridge(d);
return sprintf(buf, "%lu\n",
- jiffies_to_clock_t(br->multicast_query_interval));
+ jiffies_to_clock_t(br->multicast_ctx.multicast_query_interval));
}
static int set_query_interval(struct net_bridge *br, unsigned long val,
struct netlink_ext_ack *extack)
{
- br->multicast_query_interval = clock_t_to_jiffies(val);
+ br->multicast_ctx.multicast_query_interval = clock_t_to_jiffies(val);
return 0;
}
@@ -676,13 +676,13 @@ static ssize_t multicast_query_response_interval_show(
struct net_bridge *br = to_bridge(d);
return sprintf(
buf, "%lu\n",
- jiffies_to_clock_t(br->multicast_query_response_interval));
+ jiffies_to_clock_t(br->multicast_ctx.multicast_query_response_interval));
}
static int set_query_response_interval(struct net_bridge *br, unsigned long val,
struct netlink_ext_ack *extack)
{
- br->multicast_query_response_interval = clock_t_to_jiffies(val);
+ br->multicast_ctx.multicast_query_response_interval = clock_t_to_jiffies(val);
return 0;
}
@@ -700,13 +700,13 @@ static ssize_t multicast_startup_query_interval_show(
struct net_bridge *br = to_bridge(d);
return sprintf(
buf, "%lu\n",
- jiffies_to_clock_t(br->multicast_startup_query_interval));
+ jiffies_to_clock_t(br->multicast_ctx.multicast_startup_query_interval));
}
static int set_startup_query_interval(struct net_bridge *br, unsigned long val,
struct netlink_ext_ack *extack)
{
- br->multicast_startup_query_interval = clock_t_to_jiffies(val);
+ br->multicast_ctx.multicast_startup_query_interval = clock_t_to_jiffies(val);
return 0;
}
@@ -751,7 +751,7 @@ static ssize_t multicast_mld_version_show(struct device *d,
{
struct net_bridge *br = to_bridge(d);
- return sprintf(buf, "%u\n", br->multicast_mld_version);
+ return sprintf(buf, "%u\n", br->multicast_ctx.multicast_mld_version);
}
static int set_multicast_mld_version(struct net_bridge *br, unsigned long val,
diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
index 72e92376eef1..e9e3aedd3178 100644
--- a/net/bridge/br_sysfs_if.c
+++ b/net/bridge/br_sysfs_if.c
@@ -244,7 +244,7 @@ BRPORT_ATTR_FLAG(isolated, BR_ISOLATED);
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
static ssize_t show_multicast_router(struct net_bridge_port *p, char *buf)
{
- return sprintf(buf, "%d\n", p->multicast_router);
+ return sprintf(buf, "%d\n", p->multicast_ctx.multicast_router);
}
static int store_multicast_router(struct net_bridge_port *p,
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index a08e9f193009..382ab992badf 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -190,6 +190,8 @@ static void br_vlan_put_master(struct net_bridge_vlan *masterv)
rhashtable_remove_fast(&vg->vlan_hash,
&masterv->vnode, br_vlan_rht_params);
__vlan_del_list(masterv);
+ br_multicast_toggle_one_vlan(masterv, false);
+ br_multicast_ctx_deinit(&masterv->br_mcast_ctx);
call_rcu(&masterv->rcu, br_master_vlan_rcu_free);
}
}
@@ -280,10 +282,13 @@ static int __vlan_add(struct net_bridge_vlan *v, u16 flags,
} else {
v->stats = masterv->stats;
}
+ br_multicast_port_ctx_init(p, v, &v->port_mcast_ctx);
} else {
err = br_switchdev_port_vlan_add(dev, v->vid, flags, extack);
if (err && err != -EOPNOTSUPP)
goto out;
+ br_multicast_ctx_init(br, v, &v->br_mcast_ctx);
+ v->priv_flags |= BR_VLFLAG_GLOBAL_MCAST_ENABLED;
}
/* Add the dev mac and count the vlan only if it's usable */
@@ -306,6 +311,7 @@ static int __vlan_add(struct net_bridge_vlan *v, u16 flags,
__vlan_add_list(v);
__vlan_add_flags(v, flags);
+ br_multicast_toggle_one_vlan(v, true);
if (p)
nbp_vlan_set_vlan_dev_state(p, v->vid);
@@ -374,6 +380,8 @@ static int __vlan_del(struct net_bridge_vlan *v)
br_vlan_rht_params);
__vlan_del_list(v);
nbp_vlan_set_vlan_dev_state(p, v->vid);
+ br_multicast_toggle_one_vlan(v, false);
+ br_multicast_port_ctx_deinit(&v->port_mcast_ctx);
call_rcu(&v->rcu, nbp_vlan_rcu_free);
}
@@ -473,7 +481,8 @@ out:
static bool __allowed_ingress(const struct net_bridge *br,
struct net_bridge_vlan_group *vg,
struct sk_buff *skb, u16 *vid,
- u8 *state)
+ u8 *state,
+ struct net_bridge_vlan **vlan)
{
struct pcpu_sw_netstats *stats;
struct net_bridge_vlan *v;
@@ -538,8 +547,9 @@ static bool __allowed_ingress(const struct net_bridge *br,
*/
skb->vlan_tci |= pvid;
- /* if stats are disabled we can avoid the lookup */
- if (!br_opt_get(br, BROPT_VLAN_STATS_ENABLED)) {
+ /* if snooping and stats are disabled we can avoid the lookup */
+ if (!br_opt_get(br, BROPT_MCAST_VLAN_SNOOPING_ENABLED) &&
+ !br_opt_get(br, BROPT_VLAN_STATS_ENABLED)) {
if (*state == BR_STATE_FORWARDING) {
*state = br_vlan_get_pvid_state(vg);
return br_vlan_state_allowed(*state, true);
@@ -566,6 +576,8 @@ static bool __allowed_ingress(const struct net_bridge *br,
u64_stats_update_end(&stats->syncp);
}
+ *vlan = v;
+
return true;
drop:
@@ -575,17 +587,19 @@ drop:
bool br_allowed_ingress(const struct net_bridge *br,
struct net_bridge_vlan_group *vg, struct sk_buff *skb,
- u16 *vid, u8 *state)
+ u16 *vid, u8 *state,
+ struct net_bridge_vlan **vlan)
{
/* If VLAN filtering is disabled on the bridge, all packets are
* permitted.
*/
+ *vlan = NULL;
if (!br_opt_get(br, BROPT_VLAN_ENABLED)) {
BR_INPUT_SKB_CB(skb)->vlan_filtered = false;
return true;
}
- return __allowed_ingress(br, vg, skb, vid, state);
+ return __allowed_ingress(br, vg, skb, vid, state, vlan);
}
/* Called under RCU. */
@@ -826,6 +840,10 @@ int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val,
br_manage_promisc(br);
recalculate_group_addr(br);
br_recalculate_fwd_mask(br);
+ if (!val && br_opt_get(br, BROPT_MCAST_VLAN_SNOOPING_ENABLED)) {
+ br_info(br, "vlan filtering disabled, automatically disabling multicast vlan snooping\n");
+ br_multicast_toggle_vlan_snooping(br, false, NULL);
+ }
return 0;
}
@@ -1838,6 +1856,9 @@ int br_vlan_replay(struct net_device *br_dev, struct net_device *dev,
ASSERT_RTNL();
+ if (!nb)
+ return 0;
+
if (!netif_is_bridge_master(br_dev))
return -EINVAL;
@@ -1884,7 +1905,6 @@ int br_vlan_replay(struct net_device *br_dev, struct net_device *dev,
return err;
}
-EXPORT_SYMBOL_GPL(br_vlan_replay);
/* check if v_curr can enter a range ending in range_end */
bool br_vlan_can_enter_range(const struct net_bridge_vlan *v_curr,
@@ -1901,6 +1921,7 @@ static int br_vlan_dump_dev(const struct net_device *dev,
u32 dump_flags)
{
struct net_bridge_vlan *v, *range_start = NULL, *range_end = NULL;
+ bool dump_global = !!(dump_flags & BRIDGE_VLANDB_DUMPF_GLOBAL);
bool dump_stats = !!(dump_flags & BRIDGE_VLANDB_DUMPF_STATS);
struct net_bridge_vlan_group *vg;
int idx = 0, s_idx = cb->args[1];
@@ -1919,6 +1940,10 @@ static int br_vlan_dump_dev(const struct net_device *dev,
vg = br_vlan_group_rcu(br);
p = NULL;
} else {
+ /* global options are dumped only for bridge devices */
+ if (dump_global)
+ return 0;
+
p = br_port_get_rcu(dev);
if (WARN_ON(!p))
return -EINVAL;
@@ -1941,7 +1966,7 @@ static int br_vlan_dump_dev(const struct net_device *dev,
/* idx must stay at range's beginning until it is filled in */
list_for_each_entry_rcu(v, &vg->vlan_list, vlist) {
- if (!br_vlan_should_use(v))
+ if (!dump_global && !br_vlan_should_use(v))
continue;
if (idx < s_idx) {
idx++;
@@ -1954,8 +1979,21 @@ static int br_vlan_dump_dev(const struct net_device *dev,
continue;
}
- if (dump_stats || v->vid == pvid ||
- !br_vlan_can_enter_range(v, range_end)) {
+ if (dump_global) {
+ if (br_vlan_global_opts_can_enter_range(v, range_end))
+ continue;
+ if (!br_vlan_global_opts_fill(skb, range_start->vid,
+ range_end->vid,
+ range_start)) {
+ err = -EMSGSIZE;
+ break;
+ }
+ /* advance number of filled vlans */
+ idx += range_end->vid - range_start->vid + 1;
+
+ range_start = v;
+ } else if (dump_stats || v->vid == pvid ||
+ !br_vlan_can_enter_range(v, range_end)) {
u16 vlan_flags = br_vlan_flags(range_start, pvid);
if (!br_vlan_fill_vids(skb, range_start->vid,
@@ -1977,11 +2015,18 @@ static int br_vlan_dump_dev(const struct net_device *dev,
* - last vlan (range_start == range_end, not in range)
* - last vlan range (range_start != range_end, in range)
*/
- if (!err && range_start &&
- !br_vlan_fill_vids(skb, range_start->vid, range_end->vid,
- range_start, br_vlan_flags(range_start, pvid),
- dump_stats))
- err = -EMSGSIZE;
+ if (!err && range_start) {
+ if (dump_global &&
+ !br_vlan_global_opts_fill(skb, range_start->vid,
+ range_end->vid, range_start))
+ err = -EMSGSIZE;
+ else if (!dump_global &&
+ !br_vlan_fill_vids(skb, range_start->vid,
+ range_end->vid, range_start,
+ br_vlan_flags(range_start, pvid),
+ dump_stats))
+ err = -EMSGSIZE;
+ }
cb->args[1] = err ? idx : 0;
@@ -2185,12 +2230,22 @@ static int br_vlan_rtm_process(struct sk_buff *skb, struct nlmsghdr *nlh,
}
nlmsg_for_each_attr(attr, nlh, sizeof(*bvm), rem) {
- if (nla_type(attr) != BRIDGE_VLANDB_ENTRY)
+ switch (nla_type(attr)) {
+ case BRIDGE_VLANDB_ENTRY:
+ err = br_vlan_rtm_process_one(dev, attr,
+ nlh->nlmsg_type,
+ extack);
+ break;
+ case BRIDGE_VLANDB_GLOBAL_OPTIONS:
+ err = br_vlan_rtm_process_global_options(dev, attr,
+ nlh->nlmsg_type,
+ extack);
+ break;
+ default:
continue;
+ }
vlans++;
- err = br_vlan_rtm_process_one(dev, attr, nlh->nlmsg_type,
- extack);
if (err)
break;
}
diff --git a/net/bridge/br_vlan_options.c b/net/bridge/br_vlan_options.c
index b4add9ea8964..4ef975b20185 100644
--- a/net/bridge/br_vlan_options.c
+++ b/net/bridge/br_vlan_options.c
@@ -258,3 +258,219 @@ int br_vlan_process_options(const struct net_bridge *br,
return err;
}
+
+bool br_vlan_global_opts_can_enter_range(const struct net_bridge_vlan *v_curr,
+ const struct net_bridge_vlan *r_end)
+{
+ return v_curr->vid - r_end->vid == 1 &&
+ ((v_curr->priv_flags ^ r_end->priv_flags) &
+ BR_VLFLAG_GLOBAL_MCAST_ENABLED) == 0;
+}
+
+bool br_vlan_global_opts_fill(struct sk_buff *skb, u16 vid, u16 vid_range,
+ const struct net_bridge_vlan *v_opts)
+{
+ struct nlattr *nest;
+
+ nest = nla_nest_start(skb, BRIDGE_VLANDB_GLOBAL_OPTIONS);
+ if (!nest)
+ return false;
+
+ if (nla_put_u16(skb, BRIDGE_VLANDB_GOPTS_ID, vid))
+ goto out_err;
+
+ if (vid_range && vid < vid_range &&
+ nla_put_u16(skb, BRIDGE_VLANDB_GOPTS_RANGE, vid_range))
+ goto out_err;
+
+#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+ if (nla_put_u8(skb, BRIDGE_VLANDB_GOPTS_MCAST_SNOOPING,
+ !!(v_opts->priv_flags & BR_VLFLAG_GLOBAL_MCAST_ENABLED)))
+ goto out_err;
+#endif
+
+ nla_nest_end(skb, nest);
+
+ return true;
+
+out_err:
+ nla_nest_cancel(skb, nest);
+ return false;
+}
+
+static size_t rtnl_vlan_global_opts_nlmsg_size(void)
+{
+ return NLMSG_ALIGN(sizeof(struct br_vlan_msg))
+ + nla_total_size(0) /* BRIDGE_VLANDB_GLOBAL_OPTIONS */
+ + nla_total_size(sizeof(u16)) /* BRIDGE_VLANDB_GOPTS_ID */
+#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+ + nla_total_size(sizeof(u8)) /* BRIDGE_VLANDB_GOPTS_MCAST_SNOOPING */
+#endif
+ + nla_total_size(sizeof(u16)); /* BRIDGE_VLANDB_GOPTS_RANGE */
+}
+
+static void br_vlan_global_opts_notify(const struct net_bridge *br,
+ u16 vid, u16 vid_range)
+{
+ struct net_bridge_vlan *v;
+ struct br_vlan_msg *bvm;
+ struct nlmsghdr *nlh;
+ struct sk_buff *skb;
+ int err = -ENOBUFS;
+
+ /* right now notifications are done only with rtnl held */
+ ASSERT_RTNL();
+
+ skb = nlmsg_new(rtnl_vlan_global_opts_nlmsg_size(), GFP_KERNEL);
+ if (!skb)
+ goto out_err;
+
+ err = -EMSGSIZE;
+ nlh = nlmsg_put(skb, 0, 0, RTM_NEWVLAN, sizeof(*bvm), 0);
+ if (!nlh)
+ goto out_err;
+ bvm = nlmsg_data(nlh);
+ memset(bvm, 0, sizeof(*bvm));
+ bvm->family = AF_BRIDGE;
+ bvm->ifindex = br->dev->ifindex;
+
+ /* need to find the vlan due to flags/options */
+ v = br_vlan_find(br_vlan_group(br), vid);
+ if (!v)
+ goto out_kfree;
+
+ if (!br_vlan_global_opts_fill(skb, vid, vid_range, v))
+ goto out_err;
+
+ nlmsg_end(skb, nlh);
+ rtnl_notify(skb, dev_net(br->dev), 0, RTNLGRP_BRVLAN, NULL, GFP_KERNEL);
+ return;
+
+out_err:
+ rtnl_set_sk_err(dev_net(br->dev), RTNLGRP_BRVLAN, err);
+out_kfree:
+ kfree_skb(skb);
+}
+
+static int br_vlan_process_global_one_opts(const struct net_bridge *br,
+ struct net_bridge_vlan_group *vg,
+ struct net_bridge_vlan *v,
+ struct nlattr **tb,
+ bool *changed,
+ struct netlink_ext_ack *extack)
+{
+ *changed = false;
+#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+ if (tb[BRIDGE_VLANDB_GOPTS_MCAST_SNOOPING]) {
+ u8 mc_snooping;
+
+ mc_snooping = nla_get_u8(tb[BRIDGE_VLANDB_GOPTS_MCAST_SNOOPING]);
+ if (br_multicast_toggle_global_vlan(v, !!mc_snooping))
+ *changed = true;
+ }
+#endif
+
+ return 0;
+}
+
+static const struct nla_policy br_vlan_db_gpol[BRIDGE_VLANDB_GOPTS_MAX + 1] = {
+ [BRIDGE_VLANDB_GOPTS_ID] = { .type = NLA_U16 },
+ [BRIDGE_VLANDB_GOPTS_RANGE] = { .type = NLA_U16 },
+ [BRIDGE_VLANDB_GOPTS_MCAST_SNOOPING] = { .type = NLA_U8 },
+};
+
+int br_vlan_rtm_process_global_options(struct net_device *dev,
+ const struct nlattr *attr,
+ int cmd,
+ struct netlink_ext_ack *extack)
+{
+ struct net_bridge_vlan *v, *curr_start = NULL, *curr_end = NULL;
+ struct nlattr *tb[BRIDGE_VLANDB_GOPTS_MAX + 1];
+ struct net_bridge_vlan_group *vg;
+ u16 vid, vid_range = 0;
+ struct net_bridge *br;
+ int err = 0;
+
+ if (cmd != RTM_NEWVLAN) {
+ NL_SET_ERR_MSG_MOD(extack, "Global vlan options support only set operation");
+ return -EINVAL;
+ }
+ if (!netif_is_bridge_master(dev)) {
+ NL_SET_ERR_MSG_MOD(extack, "Global vlan options can only be set on bridge device");
+ return -EINVAL;
+ }
+ br = netdev_priv(dev);
+ vg = br_vlan_group(br);
+ if (WARN_ON(!vg))
+ return -ENODEV;
+
+ err = nla_parse_nested(tb, BRIDGE_VLANDB_GOPTS_MAX, attr,
+ br_vlan_db_gpol, extack);
+ if (err)
+ return err;
+
+ if (!tb[BRIDGE_VLANDB_GOPTS_ID]) {
+ NL_SET_ERR_MSG_MOD(extack, "Missing vlan entry id");
+ return -EINVAL;
+ }
+ vid = nla_get_u16(tb[BRIDGE_VLANDB_GOPTS_ID]);
+ if (!br_vlan_valid_id(vid, extack))
+ return -EINVAL;
+
+ if (tb[BRIDGE_VLANDB_GOPTS_RANGE]) {
+ vid_range = nla_get_u16(tb[BRIDGE_VLANDB_GOPTS_RANGE]);
+ if (!br_vlan_valid_id(vid_range, extack))
+ return -EINVAL;
+ if (vid >= vid_range) {
+ NL_SET_ERR_MSG_MOD(extack, "End vlan id is less than or equal to start vlan id");
+ return -EINVAL;
+ }
+ } else {
+ vid_range = vid;
+ }
+
+ for (; vid <= vid_range; vid++) {
+ bool changed = false;
+
+ v = br_vlan_find(vg, vid);
+ if (!v) {
+ NL_SET_ERR_MSG_MOD(extack, "Vlan in range doesn't exist, can't process global options");
+ err = -ENOENT;
+ break;
+ }
+
+ err = br_vlan_process_global_one_opts(br, vg, v, tb, &changed,
+ extack);
+ if (err)
+ break;
+
+ if (changed) {
+ /* vlan options changed, check for range */
+ if (!curr_start) {
+ curr_start = v;
+ curr_end = v;
+ continue;
+ }
+
+ if (!br_vlan_global_opts_can_enter_range(v, curr_end)) {
+ br_vlan_global_opts_notify(br, curr_start->vid,
+ curr_end->vid);
+ curr_start = v;
+ }
+ curr_end = v;
+ } else {
+ /* nothing changed and nothing to notify yet */
+ if (!curr_start)
+ continue;
+
+ br_vlan_global_opts_notify(br, curr_start->vid,
+ curr_end->vid);
+ curr_start = NULL;
+ curr_end = NULL;
+ }
+ }
+ if (curr_start)
+ br_vlan_global_opts_notify(br, curr_start->vid, curr_end->vid);
+
+ return err;
+}
diff --git a/net/core/Makefile b/net/core/Makefile
index f7f16650fe9e..35ced6201814 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -33,8 +33,6 @@ obj-$(CONFIG_HWBM) += hwbm.o
obj-$(CONFIG_NET_DEVLINK) += devlink.o
obj-$(CONFIG_GRO_CELLS) += gro_cells.o
obj-$(CONFIG_FAILOVER) += failover.o
-ifeq ($(CONFIG_INET),y)
obj-$(CONFIG_NET_SOCK_MSG) += skmsg.o
obj-$(CONFIG_BPF_SYSCALL) += sock_map.o
-endif
obj-$(CONFIG_BPF_SYSCALL) += bpf_sk_storage.o
diff --git a/net/core/dev.c b/net/core/dev.c
index 8f1a47ad6781..fb5d12a3d52d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4756,45 +4756,18 @@ static struct netdev_rx_queue *netif_get_rxqueue(struct sk_buff *skb)
return rxqueue;
}
-static u32 netif_receive_generic_xdp(struct sk_buff *skb,
- struct xdp_buff *xdp,
- struct bpf_prog *xdp_prog)
+u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp,
+ struct bpf_prog *xdp_prog)
{
void *orig_data, *orig_data_end, *hard_start;
struct netdev_rx_queue *rxqueue;
- u32 metalen, act = XDP_DROP;
bool orig_bcast, orig_host;
u32 mac_len, frame_sz;
__be16 orig_eth_type;
struct ethhdr *eth;
+ u32 metalen, act;
int off;
- /* Reinjected packets coming from act_mirred or similar should
- * not get XDP generic processing.
- */
- if (skb_is_redirected(skb))
- return XDP_PASS;
-
- /* XDP packets must be linear and must have sufficient headroom
- * of XDP_PACKET_HEADROOM bytes. This is the guarantee that also
- * native XDP provides, thus we need to do it here as well.
- */
- if (skb_cloned(skb) || skb_is_nonlinear(skb) ||
- skb_headroom(skb) < XDP_PACKET_HEADROOM) {
- int hroom = XDP_PACKET_HEADROOM - skb_headroom(skb);
- int troom = skb->tail + skb->data_len - skb->end;
-
- /* In case we have to go down the path and also linearize,
- * then lets do the pskb_expand_head() work just once here.
- */
- if (pskb_expand_head(skb,
- hroom > 0 ? ALIGN(hroom, NET_SKB_PAD) : 0,
- troom > 0 ? troom + 128 : 0, GFP_ATOMIC))
- goto do_drop;
- if (skb_linearize(skb))
- goto do_drop;
- }
-
/* The XDP program wants to see the packet starting at the MAC
* header.
*/
@@ -4849,6 +4822,13 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
skb->protocol = eth_type_trans(skb, skb->dev);
}
+ /* Redirect/Tx gives L2 packet, code that will reuse skb must __skb_pull
+ * before calling us again on redirect path. We do not call do_redirect
+ * as we leave that up to the caller.
+ *
+ * Caller is responsible for managing lifetime of skb (i.e. calling
+ * kfree_skb in response to actions it cannot handle/XDP_DROP).
+ */
switch (act) {
case XDP_REDIRECT:
case XDP_TX:
@@ -4859,6 +4839,49 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
if (metalen)
skb_metadata_set(skb, metalen);
break;
+ }
+
+ return act;
+}
+
+static u32 netif_receive_generic_xdp(struct sk_buff *skb,
+ struct xdp_buff *xdp,
+ struct bpf_prog *xdp_prog)
+{
+ u32 act = XDP_DROP;
+
+ /* Reinjected packets coming from act_mirred or similar should
+ * not get XDP generic processing.
+ */
+ if (skb_is_redirected(skb))
+ return XDP_PASS;
+
+ /* XDP packets must be linear and must have sufficient headroom
+ * of XDP_PACKET_HEADROOM bytes. This is the guarantee that also
+ * native XDP provides, thus we need to do it here as well.
+ */
+ if (skb_cloned(skb) || skb_is_nonlinear(skb) ||
+ skb_headroom(skb) < XDP_PACKET_HEADROOM) {
+ int hroom = XDP_PACKET_HEADROOM - skb_headroom(skb);
+ int troom = skb->tail + skb->data_len - skb->end;
+
+ /* In case we have to go down the path and also linearize,
+ * then lets do the pskb_expand_head() work just once here.
+ */
+ if (pskb_expand_head(skb,
+ hroom > 0 ? ALIGN(hroom, NET_SKB_PAD) : 0,
+ troom > 0 ? troom + 128 : 0, GFP_ATOMIC))
+ goto do_drop;
+ if (skb_linearize(skb))
+ goto do_drop;
+ }
+
+ act = bpf_prog_run_generic_xdp(skb, xdp, xdp_prog);
+ switch (act) {
+ case XDP_REDIRECT:
+ case XDP_TX:
+ case XDP_PASS:
+ break;
default:
bpf_warn_invalid_xdp_action(act);
fallthrough;
@@ -5324,7 +5347,6 @@ another_round:
ret = NET_RX_DROP;
goto out;
}
- skb_reset_mac_len(skb);
}
if (eth_type_vlan(skb->protocol)) {
@@ -5650,25 +5672,6 @@ static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp)
struct bpf_prog *new = xdp->prog;
int ret = 0;
- if (new) {
- u32 i;
-
- mutex_lock(&new->aux->used_maps_mutex);
-
- /* generic XDP does not work with DEVMAPs that can
- * have a bpf_prog installed on an entry
- */
- for (i = 0; i < new->aux->used_map_cnt; i++) {
- if (dev_map_can_have_prog(new->aux->used_maps[i]) ||
- cpu_map_prog_allowed(new->aux->used_maps[i])) {
- mutex_unlock(&new->aux->used_maps_mutex);
- return -EINVAL;
- }
- }
-
- mutex_unlock(&new->aux->used_maps_mutex);
- }
-
switch (xdp->command) {
case XDP_SETUP_PROG:
rcu_assign_pointer(dev->xdp_prog, new);
@@ -10134,7 +10137,7 @@ static int netif_alloc_rx_queues(struct net_device *dev)
BUG_ON(count < 1);
- rx = kvzalloc(sz, GFP_KERNEL | __GFP_RETRY_MAYFAIL);
+ rx = kvzalloc(sz, GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL);
if (!rx)
return -ENOMEM;
@@ -10201,7 +10204,7 @@ static int netif_alloc_netdev_queues(struct net_device *dev)
if (count < 1 || count > 0xffff)
return -EINVAL;
- tx = kvzalloc(sz, GFP_KERNEL | __GFP_RETRY_MAYFAIL);
+ tx = kvzalloc(sz, GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL);
if (!tx)
return -ENOMEM;
@@ -10841,7 +10844,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
/* ensure 32-byte alignment of whole construct */
alloc_size += NETDEV_ALIGN - 1;
- p = kvzalloc(alloc_size, GFP_KERNEL | __GFP_RETRY_MAYFAIL);
+ p = kvzalloc(alloc_size, GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL);
if (!p)
return NULL;
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index 478d032f34ac..950e2fe5d56a 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/kmod.h>
#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
#include <linux/etherdevice.h>
#include <linux/rtnetlink.h>
#include <linux/net_tstamp.h>
@@ -25,79 +26,108 @@ static int dev_ifname(struct net *net, struct ifreq *ifr)
return netdev_get_name(net, ifr->ifr_name, ifr->ifr_ifindex);
}
-static gifconf_func_t *gifconf_list[NPROTO];
-
-/**
- * register_gifconf - register a SIOCGIF handler
- * @family: Address family
- * @gifconf: Function handler
- *
- * Register protocol dependent address dumping routines. The handler
- * that is passed must not be freed or reused until it has been replaced
- * by another handler.
- */
-int register_gifconf(unsigned int family, gifconf_func_t *gifconf)
-{
- if (family >= NPROTO)
- return -EINVAL;
- gifconf_list[family] = gifconf;
- return 0;
-}
-EXPORT_SYMBOL(register_gifconf);
-
/*
* Perform a SIOCGIFCONF call. This structure will change
* size eventually, and there is nothing I can do about it.
* Thus we will need a 'compatibility mode'.
*/
-
-int dev_ifconf(struct net *net, struct ifconf *ifc, int size)
+int dev_ifconf(struct net *net, struct ifconf __user *uifc)
{
struct net_device *dev;
- char __user *pos;
- int len;
- int total;
- int i;
+ void __user *pos;
+ size_t size;
+ int len, total = 0, done;
- /*
- * Fetch the caller's info block.
- */
+ /* both the ifconf and the ifreq structures are slightly different */
+ if (in_compat_syscall()) {
+ struct compat_ifconf ifc32;
- pos = ifc->ifc_buf;
- len = ifc->ifc_len;
+ if (copy_from_user(&ifc32, uifc, sizeof(struct compat_ifconf)))
+ return -EFAULT;
- /*
- * Loop over the interfaces, and write an info block for each.
- */
+ pos = compat_ptr(ifc32.ifcbuf);
+ len = ifc32.ifc_len;
+ size = sizeof(struct compat_ifreq);
+ } else {
+ struct ifconf ifc;
+
+ if (copy_from_user(&ifc, uifc, sizeof(struct ifconf)))
+ return -EFAULT;
+
+ pos = ifc.ifc_buf;
+ len = ifc.ifc_len;
+ size = sizeof(struct ifreq);
+ }
- total = 0;
+ /* Loop over the interfaces, and write an info block for each. */
+ rtnl_lock();
for_each_netdev(net, dev) {
- for (i = 0; i < NPROTO; i++) {
- if (gifconf_list[i]) {
- int done;
- if (!pos)
- done = gifconf_list[i](dev, NULL, 0, size);
- else
- done = gifconf_list[i](dev, pos + total,
- len - total, size);
- if (done < 0)
- return -EFAULT;
- total += done;
- }
+ if (!pos)
+ done = inet_gifconf(dev, NULL, 0, size);
+ else
+ done = inet_gifconf(dev, pos + total,
+ len - total, size);
+ if (done < 0) {
+ rtnl_unlock();
+ return -EFAULT;
}
+ total += done;
}
+ rtnl_unlock();
- /*
- * All done. Write the updated control block back to the caller.
- */
- ifc->ifc_len = total;
+ return put_user(total, &uifc->ifc_len);
+}
+
+static int dev_getifmap(struct net_device *dev, struct ifreq *ifr)
+{
+ struct ifmap *ifmap = &ifr->ifr_map;
+
+ if (in_compat_syscall()) {
+ struct compat_ifmap *cifmap = (struct compat_ifmap *)ifmap;
+
+ cifmap->mem_start = dev->mem_start;
+ cifmap->mem_end = dev->mem_end;
+ cifmap->base_addr = dev->base_addr;
+ cifmap->irq = dev->irq;
+ cifmap->dma = dev->dma;
+ cifmap->port = dev->if_port;
+
+ return 0;
+ }
+
+ ifmap->mem_start = dev->mem_start;
+ ifmap->mem_end = dev->mem_end;
+ ifmap->base_addr = dev->base_addr;
+ ifmap->irq = dev->irq;
+ ifmap->dma = dev->dma;
+ ifmap->port = dev->if_port;
- /*
- * Both BSD and Solaris return 0 here, so we do too.
- */
return 0;
}
+static int dev_setifmap(struct net_device *dev, struct ifreq *ifr)
+{
+ struct compat_ifmap *cifmap = (struct compat_ifmap *)&ifr->ifr_map;
+
+ if (!dev->netdev_ops->ndo_set_config)
+ return -EOPNOTSUPP;
+
+ if (in_compat_syscall()) {
+ struct ifmap ifmap = {
+ .mem_start = cifmap->mem_start,
+ .mem_end = cifmap->mem_end,
+ .base_addr = cifmap->base_addr,
+ .irq = cifmap->irq,
+ .dma = cifmap->dma,
+ .port = cifmap->port,
+ };
+
+ return dev->netdev_ops->ndo_set_config(dev, &ifmap);
+ }
+
+ return dev->netdev_ops->ndo_set_config(dev, &ifr->ifr_map);
+}
+
/*
* Perform the SIOCxIFxxx calls, inside rcu_read_lock()
*/
@@ -128,13 +158,7 @@ static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cm
break;
case SIOCGIFMAP:
- ifr->ifr_map.mem_start = dev->mem_start;
- ifr->ifr_map.mem_end = dev->mem_end;
- ifr->ifr_map.base_addr = dev->base_addr;
- ifr->ifr_map.irq = dev->irq;
- ifr->ifr_map.dma = dev->dma;
- ifr->ifr_map.port = dev->if_port;
- return 0;
+ return dev_getifmap(dev, ifr);
case SIOCGIFINDEX:
ifr->ifr_ifindex = dev->ifindex;
@@ -275,12 +299,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
return 0;
case SIOCSIFMAP:
- if (ops->ndo_set_config) {
- if (!netif_device_present(dev))
- return -ENODEV;
- return ops->ndo_set_config(dev, &ifr->ifr_map);
- }
- return -EOPNOTSUPP;
+ return dev_setifmap(dev, ifr);
case SIOCADDMULTI:
if (!ops->ndo_set_rx_mode ||
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index a9f937975080..79df7cd9dbc1 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -57,7 +57,7 @@ int fib_default_rule_add(struct fib_rules_ops *ops,
{
struct fib_rule *r;
- r = kzalloc(ops->rule_size, GFP_KERNEL);
+ r = kzalloc(ops->rule_size, GFP_KERNEL_ACCOUNT);
if (r == NULL)
return -ENOMEM;
@@ -541,7 +541,7 @@ static int fib_nl2rule(struct sk_buff *skb, struct nlmsghdr *nlh,
goto errout;
}
- nlrule = kzalloc(ops->rule_size, GFP_KERNEL);
+ nlrule = kzalloc(ops->rule_size, GFP_KERNEL_ACCOUNT);
if (!nlrule) {
err = -ENOMEM;
goto errout;
diff --git a/net/core/filter.c b/net/core/filter.c
index d70187ce851b..3b4986e96e9c 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -77,6 +77,7 @@
#include <net/transp_v6.h>
#include <linux/btf_ids.h>
#include <net/tls.h>
+#include <net/xdp.h>
static const struct bpf_func_proto *
bpf_sk_base_func_proto(enum bpf_func_id func_id);
@@ -3880,8 +3881,7 @@ BPF_CALL_2(bpf_xdp_adjust_meta, struct xdp_buff *, xdp, int, offset)
if (unlikely(meta < xdp_frame_end ||
meta > xdp->data))
return -EINVAL;
- if (unlikely((metalen & (sizeof(__u32) - 1)) ||
- (metalen > 32)))
+ if (unlikely(xdp_metalen_invalid(metalen)))
return -EACCES;
xdp->data_meta = meta;
@@ -4040,8 +4040,12 @@ static int xdp_do_generic_redirect_map(struct net_device *dev,
goto err;
consume_skb(skb);
break;
+ case BPF_MAP_TYPE_CPUMAP:
+ err = cpu_map_generic_redirect(fwd, skb);
+ if (unlikely(err))
+ goto err;
+ break;
default:
- /* TODO: Handle BPF_MAP_TYPE_CPUMAP */
err = -EBADRQC;
goto err;
}
diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c
index 8ec7d13d2860..d0ae987d2de9 100644
--- a/net/core/lwtunnel.c
+++ b/net/core/lwtunnel.c
@@ -43,6 +43,8 @@ static const char *lwtunnel_encap_str(enum lwtunnel_encap_types encap_type)
return "SEG6LOCAL";
case LWTUNNEL_ENCAP_RPL:
return "RPL";
+ case LWTUNNEL_ENCAP_IOAM6:
+ return "IOAM6";
case LWTUNNEL_ENCAP_IP6:
case LWTUNNEL_ENCAP_IP:
case LWTUNNEL_ENCAP_NONE:
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index f6af3e74fc44..670d74ab91ae 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -710,15 +710,8 @@ out:
int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, unsigned int group, int echo)
{
struct sock *rtnl = net->rtnl;
- int err = 0;
- NETLINK_CB(skb).dst_group = group;
- if (echo)
- refcount_inc(&skb->users);
- netlink_broadcast(rtnl, skb, pid, group, GFP_KERNEL);
- if (echo)
- err = netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT);
- return err;
+ return nlmsg_notify(rtnl, skb, pid, group, echo, GFP_KERNEL);
}
int rtnl_unicast(struct sk_buff *skb, struct net *net, u32 pid)
diff --git a/net/core/scm.c b/net/core/scm.c
index ae3085d9aae8..5c356f0dee30 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -79,7 +79,7 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
if (!fpl)
{
- fpl = kmalloc(sizeof(struct scm_fp_list), GFP_KERNEL);
+ fpl = kmalloc(sizeof(struct scm_fp_list), GFP_KERNEL_ACCOUNT);
if (!fpl)
return -ENOMEM;
*fplp = fpl;
@@ -355,7 +355,7 @@ struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl)
return NULL;
new_fpl = kmemdup(fpl, offsetof(struct scm_fp_list, fp[fpl->count]),
- GFP_KERNEL);
+ GFP_KERNEL_ACCOUNT);
if (new_fpl) {
for (i = 0; i < fpl->count; i++)
get_file(fpl->fp[i]);
diff --git a/net/core/selftests.c b/net/core/selftests.c
index ba7b0171974c..9077fa969892 100644
--- a/net/core/selftests.c
+++ b/net/core/selftests.c
@@ -318,6 +318,15 @@ static int net_test_phy_loopback_udp(struct net_device *ndev)
return __net_test_loopback(ndev, &attr);
}
+static int net_test_phy_loopback_udp_mtu(struct net_device *ndev)
+{
+ struct net_packet_attrs attr = { };
+
+ attr.dst = ndev->dev_addr;
+ attr.max_size = ndev->mtu;
+ return __net_test_loopback(ndev, &attr);
+}
+
static int net_test_phy_loopback_tcp(struct net_device *ndev)
{
struct net_packet_attrs attr = { };
@@ -345,6 +354,9 @@ static const struct net_test {
.name = "PHY internal loopback, UDP ",
.fn = net_test_phy_loopback_udp,
}, {
+ .name = "PHY internal loopback, MTU ",
+ .fn = net_test_phy_loopback_udp_mtu,
+ }, {
.name = "PHY internal loopback, TCP ",
.fn = net_test_phy_loopback_tcp,
}, {
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 60decd6420ca..ae5fa4338d9c 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -211,8 +211,6 @@ out:
return psock;
}
-static bool sock_map_redirect_allowed(const struct sock *sk);
-
static int sock_map_link(struct bpf_map *map, struct sock *sk)
{
struct sk_psock_progs *progs = sock_map_progs(map);
@@ -223,13 +221,6 @@ static int sock_map_link(struct bpf_map *map, struct sock *sk)
struct sk_psock *psock;
int ret;
- /* Only sockets we can redirect into/from in BPF need to hold
- * refs to parser/verdict progs and have their sk_data_ready
- * and sk_write_space callbacks overridden.
- */
- if (!sock_map_redirect_allowed(sk))
- goto no_progs;
-
stream_verdict = READ_ONCE(progs->stream_verdict);
if (stream_verdict) {
stream_verdict = bpf_prog_inc_not_zero(stream_verdict);
@@ -264,7 +255,6 @@ static int sock_map_link(struct bpf_map *map, struct sock *sk)
}
}
-no_progs:
psock = sock_map_psock_get_checked(sk);
if (IS_ERR(psock)) {
ret = PTR_ERR(psock);
@@ -527,12 +517,6 @@ static bool sk_is_tcp(const struct sock *sk)
sk->sk_protocol == IPPROTO_TCP;
}
-static bool sk_is_udp(const struct sock *sk)
-{
- return sk->sk_type == SOCK_DGRAM &&
- sk->sk_protocol == IPPROTO_UDP;
-}
-
static bool sock_map_redirect_allowed(const struct sock *sk)
{
if (sk_is_tcp(sk))
@@ -550,10 +534,7 @@ static bool sock_map_sk_state_allowed(const struct sock *sk)
{
if (sk_is_tcp(sk))
return (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_LISTEN);
- else if (sk_is_udp(sk))
- return sk_hashed(sk);
-
- return false;
+ return true;
}
static int sock_hash_update_common(struct bpf_map *map, void *key,
@@ -1536,6 +1517,7 @@ void sock_map_close(struct sock *sk, long timeout)
release_sock(sk);
saved_close(sk, timeout);
}
+EXPORT_SYMBOL_GPL(sock_map_close);
static int sock_map_iter_attach_target(struct bpf_prog *prog,
union bpf_iter_link_info *linfo,
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 7eb0fb231940..abb5c596a817 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -1126,7 +1126,7 @@ static int __init dccp_init(void)
dccp_hashinfo.bind_bucket_cachep =
kmem_cache_create("dccp_bind_bucket",
sizeof(struct inet_bind_bucket), 0,
- SLAB_HWCACHE_ALIGN, NULL);
+ SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL);
if (!dccp_hashinfo.bind_bucket_cachep)
goto out_free_hashinfo2;
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index 00bb89b2d86f..bca1b5d66df2 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -18,16 +18,6 @@ if NET_DSA
# Drivers must select the appropriate tagging format(s)
-config NET_DSA_TAG_8021Q
- tristate
- select VLAN_8021Q
- help
- Unlike the other tagging protocols, the 802.1Q config option simply
- provides helpers for other tagging implementations that might rely on
- VLAN in one way or another. It is not a complete solution.
-
- Drivers which use these helpers should select this as dependency.
-
config NET_DSA_TAG_AR9331
tristate "Tag driver for Atheros AR9331 SoC with built-in switch"
help
@@ -126,7 +116,6 @@ config NET_DSA_TAG_OCELOT_8021Q
tristate "Tag driver for Ocelot family of switches, using VLAN"
depends on MSCC_OCELOT_SWITCH_LIB || \
(MSCC_OCELOT_SWITCH_LIB=n && COMPILE_TEST)
- select NET_DSA_TAG_8021Q
help
Say Y or M if you want to enable support for tagging frames with a
custom VLAN-based header. Frames that require timestamping, such as
@@ -149,7 +138,6 @@ config NET_DSA_TAG_LAN9303
config NET_DSA_TAG_SJA1105
tristate "Tag driver for NXP SJA1105 switches"
- select NET_DSA_TAG_8021Q
select PACKING
help
Say Y or M if you want to enable support for tagging frames with the
diff --git a/net/dsa/Makefile b/net/dsa/Makefile
index 44bc79952b8b..67ea009f242c 100644
--- a/net/dsa/Makefile
+++ b/net/dsa/Makefile
@@ -1,10 +1,9 @@
# SPDX-License-Identifier: GPL-2.0
# the core
obj-$(CONFIG_NET_DSA) += dsa_core.o
-dsa_core-y += dsa.o dsa2.o master.o port.o slave.o switch.o
+dsa_core-y += dsa.o dsa2.o master.o port.o slave.o switch.o tag_8021q.o
# tagging formats
-obj-$(CONFIG_NET_DSA_TAG_8021Q) += tag_8021q.o
obj-$(CONFIG_NET_DSA_TAG_AR9331) += tag_ar9331.o
obj-$(CONFIG_NET_DSA_TAG_BRCM_COMMON) += tag_brcm.o
obj-$(CONFIG_NET_DSA_TAG_DSA_COMMON) += tag_dsa.o
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index f201c33980bf..78c70f5bdab5 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -39,6 +39,8 @@ enum {
DSA_NOTIFIER_MRP_DEL,
DSA_NOTIFIER_MRP_ADD_RING_ROLE,
DSA_NOTIFIER_MRP_DEL_RING_ROLE,
+ DSA_NOTIFIER_TAG_8021Q_VLAN_ADD,
+ DSA_NOTIFIER_TAG_8021Q_VLAN_DEL,
};
/* DSA_NOTIFIER_AGEING_TIME */
@@ -113,6 +115,14 @@ struct dsa_notifier_mrp_ring_role_info {
int port;
};
+/* DSA_NOTIFIER_TAG_8021Q_VLAN_* */
+struct dsa_notifier_tag_8021q_vlan_info {
+ int tree_index;
+ int sw_index;
+ int port;
+ u16 vid;
+};
+
struct dsa_switchdev_event_work {
struct dsa_switch *ds;
int port;
@@ -194,16 +204,14 @@ void dsa_port_disable_rt(struct dsa_port *dp);
void dsa_port_disable(struct dsa_port *dp);
int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br,
struct netlink_ext_ack *extack);
-int dsa_port_pre_bridge_leave(struct dsa_port *dp, struct net_device *br,
- struct netlink_ext_ack *extack);
+void dsa_port_pre_bridge_leave(struct dsa_port *dp, struct net_device *br);
void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br);
int dsa_port_lag_change(struct dsa_port *dp,
struct netdev_lag_lower_state_info *linfo);
int dsa_port_lag_join(struct dsa_port *dp, struct net_device *lag_dev,
struct netdev_lag_upper_info *uinfo,
struct netlink_ext_ack *extack);
-int dsa_port_pre_lag_leave(struct dsa_port *dp, struct net_device *lag_dev,
- struct netlink_ext_ack *extack);
+void dsa_port_pre_lag_leave(struct dsa_port *dp, struct net_device *lag_dev);
void dsa_port_lag_leave(struct dsa_port *dp, struct net_device *lag_dev);
int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering,
struct netlink_ext_ack *extack);
@@ -253,16 +261,18 @@ int dsa_port_link_register_of(struct dsa_port *dp);
void dsa_port_link_unregister_of(struct dsa_port *dp);
int dsa_port_hsr_join(struct dsa_port *dp, struct net_device *hsr);
void dsa_port_hsr_leave(struct dsa_port *dp, struct net_device *hsr);
+int dsa_port_tag_8021q_vlan_add(struct dsa_port *dp, u16 vid);
+void dsa_port_tag_8021q_vlan_del(struct dsa_port *dp, u16 vid);
extern const struct phylink_mac_ops dsa_port_phylink_mac_ops;
static inline bool dsa_port_offloads_bridge_port(struct dsa_port *dp,
- struct net_device *dev)
+ const struct net_device *dev)
{
return dsa_port_to_bridge_port(dp) == dev;
}
static inline bool dsa_port_offloads_bridge(struct dsa_port *dp,
- struct net_device *bridge_dev)
+ const struct net_device *bridge_dev)
{
/* DSA ports connected to a bridge, and event was emitted
* for the bridge.
@@ -272,7 +282,7 @@ static inline bool dsa_port_offloads_bridge(struct dsa_port *dp,
/* Returns true if any port of this tree offloads the given net_device */
static inline bool dsa_tree_offloads_bridge_port(struct dsa_switch_tree *dst,
- struct net_device *dev)
+ const struct net_device *dev)
{
struct dsa_port *dp;
@@ -283,6 +293,19 @@ static inline bool dsa_tree_offloads_bridge_port(struct dsa_switch_tree *dst,
return false;
}
+/* Returns true if any port of this tree offloads the given bridge */
+static inline bool dsa_tree_offloads_bridge(struct dsa_switch_tree *dst,
+ const struct net_device *bridge_dev)
+{
+ struct dsa_port *dp;
+
+ list_for_each_entry(dp, &dst->ports, list)
+ if (dsa_port_offloads_bridge(dp, bridge_dev))
+ return true;
+
+ return false;
+}
+
/* slave.c */
extern const struct dsa_device_ops notag_netdev_ops;
extern struct notifier_block dsa_slave_switchdev_notifier;
@@ -386,6 +409,16 @@ int dsa_tree_change_tag_proto(struct dsa_switch_tree *dst,
const struct dsa_device_ops *tag_ops,
const struct dsa_device_ops *old_tag_ops);
+/* tag_8021q.c */
+int dsa_tag_8021q_bridge_join(struct dsa_switch *ds,
+ struct dsa_notifier_bridge_info *info);
+int dsa_tag_8021q_bridge_leave(struct dsa_switch *ds,
+ struct dsa_notifier_bridge_info *info);
+int dsa_switch_tag_8021q_vlan_add(struct dsa_switch *ds,
+ struct dsa_notifier_tag_8021q_vlan_info *info);
+int dsa_switch_tag_8021q_vlan_del(struct dsa_switch *ds,
+ struct dsa_notifier_tag_8021q_vlan_info *info);
+
extern struct list_head dsa_tree_list;
#endif
diff --git a/net/dsa/port.c b/net/dsa/port.c
index 28b45b7e66df..d81c283b7358 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -167,8 +167,8 @@ static void dsa_port_clear_brport_flags(struct dsa_port *dp)
}
}
-static int dsa_port_switchdev_sync(struct dsa_port *dp,
- struct netlink_ext_ack *extack)
+static int dsa_port_switchdev_sync_attrs(struct dsa_port *dp,
+ struct netlink_ext_ack *extack)
{
struct net_device *brport_dev = dsa_port_to_bridge_port(dp);
struct net_device *br = dp->bridge_dev;
@@ -194,59 +194,6 @@ static int dsa_port_switchdev_sync(struct dsa_port *dp,
if (err && err != -EOPNOTSUPP)
return err;
- err = br_mdb_replay(br, brport_dev, dp, true,
- &dsa_slave_switchdev_blocking_notifier, extack);
- if (err && err != -EOPNOTSUPP)
- return err;
-
- /* Forwarding and termination FDB entries on the port */
- err = br_fdb_replay(br, brport_dev, dp, true,
- &dsa_slave_switchdev_notifier);
- if (err && err != -EOPNOTSUPP)
- return err;
-
- /* Termination FDB entries on the bridge itself */
- err = br_fdb_replay(br, br, dp, true, &dsa_slave_switchdev_notifier);
- if (err && err != -EOPNOTSUPP)
- return err;
-
- err = br_vlan_replay(br, brport_dev, dp, true,
- &dsa_slave_switchdev_blocking_notifier, extack);
- if (err && err != -EOPNOTSUPP)
- return err;
-
- return 0;
-}
-
-static int dsa_port_switchdev_unsync_objs(struct dsa_port *dp,
- struct net_device *br,
- struct netlink_ext_ack *extack)
-{
- struct net_device *brport_dev = dsa_port_to_bridge_port(dp);
- int err;
-
- /* Delete the switchdev objects left on this port */
- err = br_mdb_replay(br, brport_dev, dp, false,
- &dsa_slave_switchdev_blocking_notifier, extack);
- if (err && err != -EOPNOTSUPP)
- return err;
-
- /* Forwarding and termination FDB entries on the port */
- err = br_fdb_replay(br, brport_dev, dp, false,
- &dsa_slave_switchdev_notifier);
- if (err && err != -EOPNOTSUPP)
- return err;
-
- /* Termination FDB entries on the bridge itself */
- err = br_fdb_replay(br, br, dp, false, &dsa_slave_switchdev_notifier);
- if (err && err != -EOPNOTSUPP)
- return err;
-
- err = br_vlan_replay(br, brport_dev, dp, false,
- &dsa_slave_switchdev_blocking_notifier, extack);
- if (err && err != -EOPNOTSUPP)
- return err;
-
return 0;
}
@@ -292,6 +239,8 @@ int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br,
.port = dp->index,
.br = br,
};
+ struct net_device *dev = dp->slave;
+ struct net_device *brport_dev;
int err;
/* Here the interface is already bridged. Reflect the current
@@ -299,16 +248,29 @@ int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br,
*/
dp->bridge_dev = br;
+ brport_dev = dsa_port_to_bridge_port(dp);
+
err = dsa_broadcast(DSA_NOTIFIER_BRIDGE_JOIN, &info);
if (err)
goto out_rollback;
- err = dsa_port_switchdev_sync(dp, extack);
+ err = switchdev_bridge_port_offload(brport_dev, dev, dp,
+ &dsa_slave_switchdev_notifier,
+ &dsa_slave_switchdev_blocking_notifier,
+ extack);
if (err)
goto out_rollback_unbridge;
+ err = dsa_port_switchdev_sync_attrs(dp, extack);
+ if (err)
+ goto out_rollback_unoffload;
+
return 0;
+out_rollback_unoffload:
+ switchdev_bridge_port_unoffload(brport_dev, dp,
+ &dsa_slave_switchdev_notifier,
+ &dsa_slave_switchdev_blocking_notifier);
out_rollback_unbridge:
dsa_broadcast(DSA_NOTIFIER_BRIDGE_LEAVE, &info);
out_rollback:
@@ -316,10 +278,13 @@ out_rollback:
return err;
}
-int dsa_port_pre_bridge_leave(struct dsa_port *dp, struct net_device *br,
- struct netlink_ext_ack *extack)
+void dsa_port_pre_bridge_leave(struct dsa_port *dp, struct net_device *br)
{
- return dsa_port_switchdev_unsync_objs(dp, br, extack);
+ struct net_device *brport_dev = dsa_port_to_bridge_port(dp);
+
+ switchdev_bridge_port_unoffload(brport_dev, dp,
+ &dsa_slave_switchdev_notifier,
+ &dsa_slave_switchdev_blocking_notifier);
}
void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br)
@@ -409,13 +374,10 @@ err_lag_join:
return err;
}
-int dsa_port_pre_lag_leave(struct dsa_port *dp, struct net_device *lag,
- struct netlink_ext_ack *extack)
+void dsa_port_pre_lag_leave(struct dsa_port *dp, struct net_device *lag)
{
if (dp->bridge_dev)
- return dsa_port_pre_bridge_leave(dp, dp->bridge_dev, extack);
-
- return 0;
+ dsa_port_pre_bridge_leave(dp, dp->bridge_dev);
}
void dsa_port_lag_leave(struct dsa_port *dp, struct net_device *lag)
@@ -1217,3 +1179,31 @@ void dsa_port_hsr_leave(struct dsa_port *dp, struct net_device *hsr)
if (err)
pr_err("DSA: failed to notify DSA_NOTIFIER_HSR_LEAVE\n");
}
+
+int dsa_port_tag_8021q_vlan_add(struct dsa_port *dp, u16 vid)
+{
+ struct dsa_notifier_tag_8021q_vlan_info info = {
+ .tree_index = dp->ds->dst->index,
+ .sw_index = dp->ds->index,
+ .port = dp->index,
+ .vid = vid,
+ };
+
+ return dsa_broadcast(DSA_NOTIFIER_TAG_8021Q_VLAN_ADD, &info);
+}
+
+void dsa_port_tag_8021q_vlan_del(struct dsa_port *dp, u16 vid)
+{
+ struct dsa_notifier_tag_8021q_vlan_info info = {
+ .tree_index = dp->ds->dst->index,
+ .sw_index = dp->ds->index,
+ .port = dp->index,
+ .vid = vid,
+ };
+ int err;
+
+ err = dsa_broadcast(DSA_NOTIFIER_TAG_8021Q_VLAN_DEL, &info);
+ if (err)
+ pr_err("DSA: failed to notify tag_8021q VLAN deletion: %pe\n",
+ ERR_PTR(err));
+}
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 532085da8d8f..8c112d7d5b0a 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -2056,20 +2056,16 @@ static int dsa_slave_prechangeupper(struct net_device *dev,
struct netdev_notifier_changeupper_info *info)
{
struct dsa_port *dp = dsa_slave_to_port(dev);
- struct netlink_ext_ack *extack;
- int err = 0;
-
- extack = netdev_notifier_info_to_extack(&info->info);
if (netif_is_bridge_master(info->upper_dev) && !info->linking)
- err = dsa_port_pre_bridge_leave(dp, info->upper_dev, extack);
+ dsa_port_pre_bridge_leave(dp, info->upper_dev);
else if (netif_is_lag_master(info->upper_dev) && !info->linking)
- err = dsa_port_pre_lag_leave(dp, info->upper_dev, extack);
+ dsa_port_pre_lag_leave(dp, info->upper_dev);
/* dsa_port_pre_hsr_leave is not yet necessary since hsr cannot be
* meaningfully enslaved to a bridge yet
*/
- return notifier_from_errno(err);
+ return NOTIFY_DONE;
}
static int
@@ -2357,26 +2353,98 @@ static void dsa_slave_switchdev_event_work(struct work_struct *work)
kfree(switchdev_work);
}
-static int dsa_lower_dev_walk(struct net_device *lower_dev,
- struct netdev_nested_priv *priv)
+static bool dsa_foreign_dev_check(const struct net_device *dev,
+ const struct net_device *foreign_dev)
{
- if (dsa_slave_dev_check(lower_dev)) {
- priv->data = (void *)netdev_priv(lower_dev);
- return 1;
- }
+ const struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch_tree *dst = dp->ds->dst;
- return 0;
+ if (netif_is_bridge_master(foreign_dev))
+ return !dsa_tree_offloads_bridge(dst, foreign_dev);
+
+ if (netif_is_bridge_port(foreign_dev))
+ return !dsa_tree_offloads_bridge_port(dst, foreign_dev);
+
+ /* Everything else is foreign */
+ return true;
}
-static struct dsa_slave_priv *dsa_slave_dev_lower_find(struct net_device *dev)
+static int dsa_slave_fdb_event(struct net_device *dev,
+ const struct net_device *orig_dev,
+ const void *ctx,
+ const struct switchdev_notifier_fdb_info *fdb_info,
+ unsigned long event)
{
- struct netdev_nested_priv priv = {
- .data = NULL,
- };
+ struct dsa_switchdev_event_work *switchdev_work;
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ bool host_addr = fdb_info->is_local;
+ struct dsa_switch *ds = dp->ds;
- netdev_walk_all_lower_dev_rcu(dev, dsa_lower_dev_walk, &priv);
+ if (ctx && ctx != dp)
+ return 0;
+
+ if (!ds->ops->port_fdb_add || !ds->ops->port_fdb_del)
+ return -EOPNOTSUPP;
+
+ if (dsa_slave_dev_check(orig_dev) &&
+ switchdev_fdb_is_dynamically_learned(fdb_info))
+ return 0;
+
+ /* FDB entries learned by the software bridge should be installed as
+ * host addresses only if the driver requests assisted learning.
+ */
+ if (switchdev_fdb_is_dynamically_learned(fdb_info) &&
+ !ds->assisted_learning_on_cpu_port)
+ return 0;
+
+ /* Also treat FDB entries on foreign interfaces bridged with us as host
+ * addresses.
+ */
+ if (dsa_foreign_dev_check(dev, orig_dev))
+ host_addr = true;
+
+ switchdev_work = kzalloc(sizeof(*switchdev_work), GFP_ATOMIC);
+ if (!switchdev_work)
+ return -ENOMEM;
- return (struct dsa_slave_priv *)priv.data;
+ netdev_dbg(dev, "%s FDB entry towards %s, addr %pM vid %d%s\n",
+ event == SWITCHDEV_FDB_ADD_TO_DEVICE ? "Adding" : "Deleting",
+ orig_dev->name, fdb_info->addr, fdb_info->vid,
+ host_addr ? " as host address" : "");
+
+ INIT_WORK(&switchdev_work->work, dsa_slave_switchdev_event_work);
+ switchdev_work->ds = ds;
+ switchdev_work->port = dp->index;
+ switchdev_work->event = event;
+ switchdev_work->dev = dev;
+
+ ether_addr_copy(switchdev_work->addr, fdb_info->addr);
+ switchdev_work->vid = fdb_info->vid;
+ switchdev_work->host_addr = host_addr;
+
+ /* Hold a reference for dsa_fdb_offload_notify */
+ dev_hold(dev);
+ dsa_schedule_work(&switchdev_work->work);
+
+ return 0;
+}
+
+static int
+dsa_slave_fdb_add_to_device(struct net_device *dev,
+ const struct net_device *orig_dev, const void *ctx,
+ const struct switchdev_notifier_fdb_info *fdb_info)
+{
+ return dsa_slave_fdb_event(dev, orig_dev, ctx, fdb_info,
+ SWITCHDEV_FDB_ADD_TO_DEVICE);
+}
+
+static int
+dsa_slave_fdb_del_to_device(struct net_device *dev,
+ const struct net_device *orig_dev, const void *ctx,
+ const struct switchdev_notifier_fdb_info *fdb_info)
+{
+ return dsa_slave_fdb_event(dev, orig_dev, ctx, fdb_info,
+ SWITCHDEV_FDB_DEL_TO_DEVICE);
}
/* Called under rcu_read_lock() */
@@ -2384,10 +2452,6 @@ static int dsa_slave_switchdev_event(struct notifier_block *unused,
unsigned long event, void *ptr)
{
struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
- const struct switchdev_notifier_fdb_info *fdb_info;
- struct dsa_switchdev_event_work *switchdev_work;
- bool host_addr = false;
- struct dsa_port *dp;
int err;
switch (event) {
@@ -2397,92 +2461,19 @@ static int dsa_slave_switchdev_event(struct notifier_block *unused,
dsa_slave_port_attr_set);
return notifier_from_errno(err);
case SWITCHDEV_FDB_ADD_TO_DEVICE:
+ err = switchdev_handle_fdb_add_to_device(dev, ptr,
+ dsa_slave_dev_check,
+ dsa_foreign_dev_check,
+ dsa_slave_fdb_add_to_device,
+ NULL);
+ return notifier_from_errno(err);
case SWITCHDEV_FDB_DEL_TO_DEVICE:
- fdb_info = ptr;
-
- if (dsa_slave_dev_check(dev)) {
- dp = dsa_slave_to_port(dev);
-
- if (fdb_info->is_local)
- host_addr = true;
- else if (!fdb_info->added_by_user)
- return NOTIFY_OK;
- } else {
- /* Snoop addresses added to foreign interfaces
- * bridged with us, or the bridge
- * itself. Dynamically learned addresses can
- * also be added for switches that don't
- * automatically learn SA from CPU-injected
- * traffic.
- */
- struct net_device *br_dev;
- struct dsa_slave_priv *p;
-
- if (netif_is_bridge_master(dev))
- br_dev = dev;
- else
- br_dev = netdev_master_upper_dev_get_rcu(dev);
-
- if (!br_dev)
- return NOTIFY_DONE;
-
- if (!netif_is_bridge_master(br_dev))
- return NOTIFY_DONE;
-
- p = dsa_slave_dev_lower_find(br_dev);
- if (!p)
- return NOTIFY_DONE;
-
- dp = p->dp;
- host_addr = fdb_info->is_local;
-
- /* FDB entries learned by the software bridge should
- * be installed as host addresses only if the driver
- * requests assisted learning.
- * On the other hand, FDB entries for local termination
- * should always be installed.
- */
- if (!fdb_info->added_by_user && !fdb_info->is_local &&
- !dp->ds->assisted_learning_on_cpu_port)
- return NOTIFY_DONE;
-
- /* When the bridge learns an address on an offloaded
- * LAG we don't want to send traffic to the CPU, the
- * other ports bridged with the LAG should be able to
- * autonomously forward towards it.
- * On the other hand, if the address is local
- * (therefore not learned) then we want to trap it to
- * the CPU regardless of whether the interface it
- * belongs to is offloaded or not.
- */
- if (dsa_tree_offloads_bridge_port(dp->ds->dst, dev) &&
- !fdb_info->is_local)
- return NOTIFY_DONE;
- }
-
- if (!dp->ds->ops->port_fdb_add || !dp->ds->ops->port_fdb_del)
- return NOTIFY_DONE;
-
- switchdev_work = kzalloc(sizeof(*switchdev_work), GFP_ATOMIC);
- if (!switchdev_work)
- return NOTIFY_BAD;
-
- INIT_WORK(&switchdev_work->work,
- dsa_slave_switchdev_event_work);
- switchdev_work->ds = dp->ds;
- switchdev_work->port = dp->index;
- switchdev_work->event = event;
- switchdev_work->dev = dev;
-
- ether_addr_copy(switchdev_work->addr,
- fdb_info->addr);
- switchdev_work->vid = fdb_info->vid;
- switchdev_work->host_addr = host_addr;
-
- /* Hold a reference for dsa_fdb_offload_notify */
- dev_hold(dev);
- dsa_schedule_work(&switchdev_work->work);
- break;
+ err = switchdev_handle_fdb_del_to_device(dev, ptr,
+ dsa_slave_dev_check,
+ dsa_foreign_dev_check,
+ dsa_slave_fdb_del_to_device,
+ NULL);
+ return notifier_from_errno(err);
default:
return NOTIFY_DONE;
}
diff --git a/net/dsa/switch.c b/net/dsa/switch.c
index 5ece05dfd8f2..fd1a1c6bf9cf 100644
--- a/net/dsa/switch.c
+++ b/net/dsa/switch.c
@@ -90,18 +90,25 @@ static int dsa_switch_bridge_join(struct dsa_switch *ds,
struct dsa_notifier_bridge_info *info)
{
struct dsa_switch_tree *dst = ds->dst;
+ int err;
if (dst->index == info->tree_index && ds->index == info->sw_index &&
- ds->ops->port_bridge_join)
- return ds->ops->port_bridge_join(ds, info->port, info->br);
+ ds->ops->port_bridge_join) {
+ err = ds->ops->port_bridge_join(ds, info->port, info->br);
+ if (err)
+ return err;
+ }
if ((dst->index != info->tree_index || ds->index != info->sw_index) &&
- ds->ops->crosschip_bridge_join)
- return ds->ops->crosschip_bridge_join(ds, info->tree_index,
- info->sw_index,
- info->port, info->br);
+ ds->ops->crosschip_bridge_join) {
+ err = ds->ops->crosschip_bridge_join(ds, info->tree_index,
+ info->sw_index,
+ info->port, info->br);
+ if (err)
+ return err;
+ }
- return 0;
+ return dsa_tag_8021q_bridge_join(ds, info);
}
static int dsa_switch_bridge_leave(struct dsa_switch *ds,
@@ -151,7 +158,8 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds,
if (err && err != EOPNOTSUPP)
return err;
}
- return 0;
+
+ return dsa_tag_8021q_bridge_leave(ds, info);
}
/* Matches for all upstream-facing ports (the CPU port and all upstream-facing
@@ -726,6 +734,12 @@ static int dsa_switch_event(struct notifier_block *nb,
case DSA_NOTIFIER_MRP_DEL_RING_ROLE:
err = dsa_switch_mrp_del_ring_role(ds, info);
break;
+ case DSA_NOTIFIER_TAG_8021Q_VLAN_ADD:
+ err = dsa_switch_tag_8021q_vlan_add(ds, info);
+ break;
+ case DSA_NOTIFIER_TAG_8021Q_VLAN_DEL:
+ err = dsa_switch_tag_8021q_vlan_del(ds, info);
+ break;
default:
err = -EOPNOTSUPP;
break;
diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c
index 4aa29f90ecea..51dcde7db26b 100644
--- a/net/dsa/tag_8021q.c
+++ b/net/dsa/tag_8021q.c
@@ -17,7 +17,7 @@
*
* | 11 | 10 | 9 | 8 | 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 |
* +-----------+-----+-----------------+-----------+-----------------------+
- * | DIR | SVL | SWITCH_ID | SUBVLAN | PORT |
+ * | DIR | RSV | SWITCH_ID | RSV | PORT |
* +-----------+-----+-----------------+-----------+-----------------------+
*
* DIR - VID[11:10]:
@@ -27,24 +27,13 @@
* These values make the special VIDs of 0, 1 and 4095 to be left
* unused by this coding scheme.
*
- * SVL/SUBVLAN - { VID[9], VID[5:4] }:
- * Sub-VLAN encoding. Valid only when DIR indicates an RX VLAN.
- * * 0 (0b000): Field does not encode a sub-VLAN, either because
- * received traffic is untagged, PVID-tagged or because a second
- * VLAN tag is present after this tag and not inside of it.
- * * 1 (0b001): Received traffic is tagged with a VID value private
- * to the host. This field encodes the index in the host's lookup
- * table through which the value of the ingress VLAN ID can be
- * recovered.
- * * 2 (0b010): Field encodes a sub-VLAN.
- * ...
- * * 7 (0b111): Field encodes a sub-VLAN.
- * When DIR indicates a TX VLAN, SUBVLAN must be transmitted as zero
- * (by the host) and ignored on receive (by the switch).
- *
* SWITCH_ID - VID[8:6]:
* Index of switch within DSA tree. Must be between 0 and 7.
*
+ * RSV - VID[5:4]:
+ * To be used for further expansion of PORT or for other purposes.
+ * Must be transmitted as zero and ignored on receive.
+ *
* PORT - VID[3:0]:
* Index of switch port. Must be between 0 and 15.
*/
@@ -61,18 +50,6 @@
#define DSA_8021Q_SWITCH_ID(x) (((x) << DSA_8021Q_SWITCH_ID_SHIFT) & \
DSA_8021Q_SWITCH_ID_MASK)
-#define DSA_8021Q_SUBVLAN_HI_SHIFT 9
-#define DSA_8021Q_SUBVLAN_HI_MASK GENMASK(9, 9)
-#define DSA_8021Q_SUBVLAN_LO_SHIFT 4
-#define DSA_8021Q_SUBVLAN_LO_MASK GENMASK(5, 4)
-#define DSA_8021Q_SUBVLAN_HI(x) (((x) & GENMASK(2, 2)) >> 2)
-#define DSA_8021Q_SUBVLAN_LO(x) ((x) & GENMASK(1, 0))
-#define DSA_8021Q_SUBVLAN(x) \
- (((DSA_8021Q_SUBVLAN_LO(x) << DSA_8021Q_SUBVLAN_LO_SHIFT) & \
- DSA_8021Q_SUBVLAN_LO_MASK) | \
- ((DSA_8021Q_SUBVLAN_HI(x) << DSA_8021Q_SUBVLAN_HI_SHIFT) & \
- DSA_8021Q_SUBVLAN_HI_MASK))
-
#define DSA_8021Q_PORT_SHIFT 0
#define DSA_8021Q_PORT_MASK GENMASK(3, 0)
#define DSA_8021Q_PORT(x) (((x) << DSA_8021Q_PORT_SHIFT) & \
@@ -98,13 +75,6 @@ u16 dsa_8021q_rx_vid(struct dsa_switch *ds, int port)
}
EXPORT_SYMBOL_GPL(dsa_8021q_rx_vid);
-u16 dsa_8021q_rx_vid_subvlan(struct dsa_switch *ds, int port, u16 subvlan)
-{
- return DSA_8021Q_DIR_RX | DSA_8021Q_SWITCH_ID(ds->index) |
- DSA_8021Q_PORT(port) | DSA_8021Q_SUBVLAN(subvlan);
-}
-EXPORT_SYMBOL_GPL(dsa_8021q_rx_vid_subvlan);
-
/* Returns the decoded switch ID from the RX VID. */
int dsa_8021q_rx_switch_id(u16 vid)
{
@@ -119,20 +89,6 @@ int dsa_8021q_rx_source_port(u16 vid)
}
EXPORT_SYMBOL_GPL(dsa_8021q_rx_source_port);
-/* Returns the decoded subvlan from the RX VID. */
-u16 dsa_8021q_rx_subvlan(u16 vid)
-{
- u16 svl_hi, svl_lo;
-
- svl_hi = (vid & DSA_8021Q_SUBVLAN_HI_MASK) >>
- DSA_8021Q_SUBVLAN_HI_SHIFT;
- svl_lo = (vid & DSA_8021Q_SUBVLAN_LO_MASK) >>
- DSA_8021Q_SUBVLAN_LO_SHIFT;
-
- return (svl_hi << 2) | svl_lo;
-}
-EXPORT_SYMBOL_GPL(dsa_8021q_rx_subvlan);
-
bool vid_is_dsa_8021q_rxvlan(u16 vid)
{
return (vid & DSA_8021Q_DIR_MASK) == DSA_8021Q_DIR_RX;
@@ -151,21 +107,152 @@ bool vid_is_dsa_8021q(u16 vid)
}
EXPORT_SYMBOL_GPL(vid_is_dsa_8021q);
-/* If @enabled is true, installs @vid with @flags into the switch port's HW
- * filter.
- * If @enabled is false, deletes @vid (ignores @flags) from the port. Had the
- * user explicitly configured this @vid through the bridge core, then the @vid
- * is installed again, but this time with the flags from the bridge layer.
- */
-static int dsa_8021q_vid_apply(struct dsa_8021q_context *ctx, int port, u16 vid,
- u16 flags, bool enabled)
+static struct dsa_tag_8021q_vlan *
+dsa_tag_8021q_vlan_find(struct dsa_8021q_context *ctx, int port, u16 vid)
+{
+ struct dsa_tag_8021q_vlan *v;
+
+ list_for_each_entry(v, &ctx->vlans, list)
+ if (v->vid == vid && v->port == port)
+ return v;
+
+ return NULL;
+}
+
+static int dsa_switch_do_tag_8021q_vlan_add(struct dsa_switch *ds, int port,
+ u16 vid, u16 flags)
+{
+ struct dsa_8021q_context *ctx = ds->tag_8021q_ctx;
+ struct dsa_port *dp = dsa_to_port(ds, port);
+ struct dsa_tag_8021q_vlan *v;
+ int err;
+
+ /* No need to bother with refcounting for user ports */
+ if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp)))
+ return ds->ops->tag_8021q_vlan_add(ds, port, vid, flags);
+
+ v = dsa_tag_8021q_vlan_find(ctx, port, vid);
+ if (v) {
+ refcount_inc(&v->refcount);
+ return 0;
+ }
+
+ v = kzalloc(sizeof(*v), GFP_KERNEL);
+ if (!v)
+ return -ENOMEM;
+
+ err = ds->ops->tag_8021q_vlan_add(ds, port, vid, flags);
+ if (err) {
+ kfree(v);
+ return err;
+ }
+
+ v->vid = vid;
+ v->port = port;
+ refcount_set(&v->refcount, 1);
+ list_add_tail(&v->list, &ctx->vlans);
+
+ return 0;
+}
+
+static int dsa_switch_do_tag_8021q_vlan_del(struct dsa_switch *ds, int port,
+ u16 vid)
+{
+ struct dsa_8021q_context *ctx = ds->tag_8021q_ctx;
+ struct dsa_port *dp = dsa_to_port(ds, port);
+ struct dsa_tag_8021q_vlan *v;
+ int err;
+
+ /* No need to bother with refcounting for user ports */
+ if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp)))
+ return ds->ops->tag_8021q_vlan_del(ds, port, vid);
+
+ v = dsa_tag_8021q_vlan_find(ctx, port, vid);
+ if (!v)
+ return -ENOENT;
+
+ if (!refcount_dec_and_test(&v->refcount))
+ return 0;
+
+ err = ds->ops->tag_8021q_vlan_del(ds, port, vid);
+ if (err) {
+ refcount_inc(&v->refcount);
+ return err;
+ }
+
+ list_del(&v->list);
+ kfree(v);
+
+ return 0;
+}
+
+static bool
+dsa_switch_tag_8021q_vlan_match(struct dsa_switch *ds, int port,
+ struct dsa_notifier_tag_8021q_vlan_info *info)
+{
+ if (dsa_is_dsa_port(ds, port) || dsa_is_cpu_port(ds, port))
+ return true;
+
+ if (ds->dst->index == info->tree_index && ds->index == info->sw_index)
+ return port == info->port;
+
+ return false;
+}
+
+int dsa_switch_tag_8021q_vlan_add(struct dsa_switch *ds,
+ struct dsa_notifier_tag_8021q_vlan_info *info)
+{
+ int port, err;
+
+ /* Since we use dsa_broadcast(), there might be other switches in other
+ * trees which don't support tag_8021q, so don't return an error.
+ * Or they might even support tag_8021q but have not registered yet to
+ * use it (maybe they use another tagger currently).
+ */
+ if (!ds->ops->tag_8021q_vlan_add || !ds->tag_8021q_ctx)
+ return 0;
+
+ for (port = 0; port < ds->num_ports; port++) {
+ if (dsa_switch_tag_8021q_vlan_match(ds, port, info)) {
+ u16 flags = 0;
+
+ if (dsa_is_user_port(ds, port))
+ flags |= BRIDGE_VLAN_INFO_UNTAGGED;
+
+ if (vid_is_dsa_8021q_rxvlan(info->vid) &&
+ dsa_8021q_rx_switch_id(info->vid) == ds->index &&
+ dsa_8021q_rx_source_port(info->vid) == port)
+ flags |= BRIDGE_VLAN_INFO_PVID;
+
+ err = dsa_switch_do_tag_8021q_vlan_add(ds, port,
+ info->vid,
+ flags);
+ if (err)
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+int dsa_switch_tag_8021q_vlan_del(struct dsa_switch *ds,
+ struct dsa_notifier_tag_8021q_vlan_info *info)
{
- struct dsa_port *dp = dsa_to_port(ctx->ds, port);
+ int port, err;
- if (enabled)
- return ctx->ops->vlan_add(ctx->ds, dp->index, vid, flags);
+ if (!ds->ops->tag_8021q_vlan_del || !ds->tag_8021q_ctx)
+ return 0;
- return ctx->ops->vlan_del(ctx->ds, dp->index, vid);
+ for (port = 0; port < ds->num_ports; port++) {
+ if (dsa_switch_tag_8021q_vlan_match(ds, port, info)) {
+ err = dsa_switch_do_tag_8021q_vlan_del(ds, port,
+ info->vid);
+ if (err)
+ return err;
+ }
+ }
+
+ return 0;
}
/* RX VLAN tagging (left) and TX VLAN tagging (right) setup shown for a single
@@ -181,12 +268,6 @@ static int dsa_8021q_vid_apply(struct dsa_8021q_context *ctx, int port, u16 vid,
* force all switched traffic to pass through the CPU. So we must also make
* the other front-panel ports members of this VID we're adding, albeit
* we're not making it their PVID (they'll still have their own).
- * By the way - just because we're installing the same VID in multiple
- * switch ports doesn't mean that they'll start to talk to one another, even
- * while not bridged: the final forwarding decision is still an AND between
- * the L2 forwarding information (which is limiting forwarding in this case)
- * and the VLAN-based restrictions (of which there are none in this case,
- * since all ports are members).
* - On TX (ingress from CPU and towards network) we are faced with a problem.
* If we were to tag traffic (from within DSA) with the port's pvid, all
* would be well, assuming the switch ports were standalone. Frames would
@@ -200,9 +281,10 @@ static int dsa_8021q_vid_apply(struct dsa_8021q_context *ctx, int port, u16 vid,
* a member of the VID we're tagging the traffic with - the desired one.
*
* So at the end, each front-panel port will have one RX VID (also the PVID),
- * the RX VID of all other front-panel ports, and one TX VID. Whereas the CPU
- * port will have the RX and TX VIDs of all front-panel ports, and on top of
- * that, is also tagged-input and tagged-output (VLAN trunk).
+ * the RX VID of all other front-panel ports that are in the same bridge, and
+ * one TX VID. Whereas the CPU port will have the RX and TX VIDs of all
+ * front-panel ports, and on top of that, is also tagged-input and
+ * tagged-output (VLAN trunk).
*
* CPU port CPU port
* +-------------+-----+-------------+ +-------------+-----+-------------+
@@ -220,246 +302,225 @@ static int dsa_8021q_vid_apply(struct dsa_8021q_context *ctx, int port, u16 vid,
* +-+-----+-+-----+-+-----+-+-----+-+ +-+-----+-+-----+-+-----+-+-----+-+
* swp0 swp1 swp2 swp3 swp0 swp1 swp2 swp3
*/
-static int dsa_8021q_setup_port(struct dsa_8021q_context *ctx, int port,
- bool enabled)
+static bool dsa_tag_8021q_bridge_match(struct dsa_switch *ds, int port,
+ struct dsa_notifier_bridge_info *info)
+{
+ struct dsa_port *dp = dsa_to_port(ds, port);
+
+ /* Don't match on self */
+ if (ds->dst->index == info->tree_index &&
+ ds->index == info->sw_index &&
+ port == info->port)
+ return false;
+
+ if (dsa_port_is_user(dp))
+ return dp->bridge_dev == info->br;
+
+ return false;
+}
+
+int dsa_tag_8021q_bridge_join(struct dsa_switch *ds,
+ struct dsa_notifier_bridge_info *info)
{
- int upstream = dsa_upstream_port(ctx->ds, port);
- u16 rx_vid = dsa_8021q_rx_vid(ctx->ds, port);
- u16 tx_vid = dsa_8021q_tx_vid(ctx->ds, port);
+ struct dsa_switch *targeted_ds;
+ struct dsa_port *targeted_dp;
+ u16 targeted_rx_vid;
+ int err, port;
+
+ if (!ds->tag_8021q_ctx)
+ return 0;
+
+ targeted_ds = dsa_switch_find(info->tree_index, info->sw_index);
+ targeted_dp = dsa_to_port(targeted_ds, info->port);
+ targeted_rx_vid = dsa_8021q_rx_vid(targeted_ds, info->port);
+
+ for (port = 0; port < ds->num_ports; port++) {
+ struct dsa_port *dp = dsa_to_port(ds, port);
+ u16 rx_vid = dsa_8021q_rx_vid(ds, port);
+
+ if (!dsa_tag_8021q_bridge_match(ds, port, info))
+ continue;
+
+ /* Install the RX VID of the targeted port in our VLAN table */
+ err = dsa_port_tag_8021q_vlan_add(dp, targeted_rx_vid);
+ if (err)
+ return err;
+
+ /* Install our RX VID into the targeted port's VLAN table */
+ err = dsa_port_tag_8021q_vlan_add(targeted_dp, rx_vid);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+int dsa_tag_8021q_bridge_leave(struct dsa_switch *ds,
+ struct dsa_notifier_bridge_info *info)
+{
+ struct dsa_switch *targeted_ds;
+ struct dsa_port *targeted_dp;
+ u16 targeted_rx_vid;
+ int port;
+
+ if (!ds->tag_8021q_ctx)
+ return 0;
+
+ targeted_ds = dsa_switch_find(info->tree_index, info->sw_index);
+ targeted_dp = dsa_to_port(targeted_ds, info->port);
+ targeted_rx_vid = dsa_8021q_rx_vid(targeted_ds, info->port);
+
+ for (port = 0; port < ds->num_ports; port++) {
+ struct dsa_port *dp = dsa_to_port(ds, port);
+ u16 rx_vid = dsa_8021q_rx_vid(ds, port);
+
+ if (!dsa_tag_8021q_bridge_match(ds, port, info))
+ continue;
+
+ /* Remove the RX VID of the targeted port from our VLAN table */
+ dsa_port_tag_8021q_vlan_del(dp, targeted_rx_vid);
+
+ /* Remove our RX VID from the targeted port's VLAN table */
+ dsa_port_tag_8021q_vlan_del(targeted_dp, rx_vid);
+ }
+
+ return 0;
+}
+
+/* Set up a port's tag_8021q RX and TX VLAN for standalone mode operation */
+static int dsa_tag_8021q_port_setup(struct dsa_switch *ds, int port)
+{
+ struct dsa_8021q_context *ctx = ds->tag_8021q_ctx;
+ struct dsa_port *dp = dsa_to_port(ds, port);
+ u16 rx_vid = dsa_8021q_rx_vid(ds, port);
+ u16 tx_vid = dsa_8021q_tx_vid(ds, port);
struct net_device *master;
- int i, err, subvlan;
+ int err;
/* The CPU port is implicitly configured by
* configuring the front-panel ports
*/
- if (!dsa_is_user_port(ctx->ds, port))
+ if (!dsa_port_is_user(dp))
return 0;
- master = dsa_to_port(ctx->ds, port)->cpu_dp->master;
+ master = dp->cpu_dp->master;
/* Add this user port's RX VID to the membership list of all others
* (including itself). This is so that bridging will not be hindered.
* L2 forwarding rules still take precedence when there are no VLAN
* restrictions, so there are no concerns about leaking traffic.
*/
- for (i = 0; i < ctx->ds->num_ports; i++) {
- u16 flags;
-
- if (i == upstream)
- continue;
- else if (i == port)
- /* The RX VID is pvid on this port */
- flags = BRIDGE_VLAN_INFO_UNTAGGED |
- BRIDGE_VLAN_INFO_PVID;
- else
- /* The RX VID is a regular VLAN on all others */
- flags = BRIDGE_VLAN_INFO_UNTAGGED;
-
- err = dsa_8021q_vid_apply(ctx, i, rx_vid, flags, enabled);
- if (err) {
- dev_err(ctx->ds->dev,
- "Failed to apply RX VID %d to port %d: %d\n",
- rx_vid, port, err);
- return err;
- }
- }
-
- /* CPU port needs to see this port's RX VID
- * as tagged egress.
- */
- err = dsa_8021q_vid_apply(ctx, upstream, rx_vid, 0, enabled);
+ err = dsa_port_tag_8021q_vlan_add(dp, rx_vid);
if (err) {
- dev_err(ctx->ds->dev,
- "Failed to apply RX VID %d to port %d: %d\n",
- rx_vid, port, err);
+ dev_err(ds->dev,
+ "Failed to apply RX VID %d to port %d: %pe\n",
+ rx_vid, port, ERR_PTR(err));
return err;
}
- /* Add to the master's RX filter not only @rx_vid, but in fact
- * the entire subvlan range, just in case this DSA switch might
- * want to use sub-VLANs.
- */
- for (subvlan = 0; subvlan < DSA_8021Q_N_SUBVLAN; subvlan++) {
- u16 vid = dsa_8021q_rx_vid_subvlan(ctx->ds, port, subvlan);
-
- if (enabled)
- vlan_vid_add(master, ctx->proto, vid);
- else
- vlan_vid_del(master, ctx->proto, vid);
- }
+ /* Add @rx_vid to the master's RX filter. */
+ vlan_vid_add(master, ctx->proto, rx_vid);
/* Finally apply the TX VID on this port and on the CPU port */
- err = dsa_8021q_vid_apply(ctx, port, tx_vid, BRIDGE_VLAN_INFO_UNTAGGED,
- enabled);
- if (err) {
- dev_err(ctx->ds->dev,
- "Failed to apply TX VID %d on port %d: %d\n",
- tx_vid, port, err);
- return err;
- }
- err = dsa_8021q_vid_apply(ctx, upstream, tx_vid, 0, enabled);
+ err = dsa_port_tag_8021q_vlan_add(dp, tx_vid);
if (err) {
- dev_err(ctx->ds->dev,
- "Failed to apply TX VID %d on port %d: %d\n",
- tx_vid, upstream, err);
+ dev_err(ds->dev,
+ "Failed to apply TX VID %d on port %d: %pe\n",
+ tx_vid, port, ERR_PTR(err));
return err;
}
return err;
}
-int dsa_8021q_setup(struct dsa_8021q_context *ctx, bool enabled)
+static void dsa_tag_8021q_port_teardown(struct dsa_switch *ds, int port)
{
- int rc, port;
+ struct dsa_8021q_context *ctx = ds->tag_8021q_ctx;
+ struct dsa_port *dp = dsa_to_port(ds, port);
+ u16 rx_vid = dsa_8021q_rx_vid(ds, port);
+ u16 tx_vid = dsa_8021q_tx_vid(ds, port);
+ struct net_device *master;
- ASSERT_RTNL();
+ /* The CPU port is implicitly configured by
+ * configuring the front-panel ports
+ */
+ if (!dsa_port_is_user(dp))
+ return;
- for (port = 0; port < ctx->ds->num_ports; port++) {
- rc = dsa_8021q_setup_port(ctx, port, enabled);
- if (rc < 0) {
- dev_err(ctx->ds->dev,
- "Failed to setup VLAN tagging for port %d: %d\n",
- port, rc);
- return rc;
- }
- }
+ master = dp->cpu_dp->master;
- return 0;
-}
-EXPORT_SYMBOL_GPL(dsa_8021q_setup);
+ dsa_port_tag_8021q_vlan_del(dp, rx_vid);
-static int dsa_8021q_crosschip_link_apply(struct dsa_8021q_context *ctx,
- int port,
- struct dsa_8021q_context *other_ctx,
- int other_port, bool enabled)
-{
- u16 rx_vid = dsa_8021q_rx_vid(ctx->ds, port);
+ vlan_vid_del(master, ctx->proto, rx_vid);
- /* @rx_vid of local @ds port @port goes to @other_port of
- * @other_ds
- */
- return dsa_8021q_vid_apply(other_ctx, other_port, rx_vid,
- BRIDGE_VLAN_INFO_UNTAGGED, enabled);
+ dsa_port_tag_8021q_vlan_del(dp, tx_vid);
}
-static int dsa_8021q_crosschip_link_add(struct dsa_8021q_context *ctx, int port,
- struct dsa_8021q_context *other_ctx,
- int other_port)
+static int dsa_tag_8021q_setup(struct dsa_switch *ds)
{
- struct dsa_8021q_crosschip_link *c;
+ int err, port;
+
+ ASSERT_RTNL();
- list_for_each_entry(c, &ctx->crosschip_links, list) {
- if (c->port == port && c->other_ctx == other_ctx &&
- c->other_port == other_port) {
- refcount_inc(&c->refcount);
- return 0;
+ for (port = 0; port < ds->num_ports; port++) {
+ err = dsa_tag_8021q_port_setup(ds, port);
+ if (err < 0) {
+ dev_err(ds->dev,
+ "Failed to setup VLAN tagging for port %d: %pe\n",
+ port, ERR_PTR(err));
+ return err;
}
}
- dev_dbg(ctx->ds->dev,
- "adding crosschip link from port %d to %s port %d\n",
- port, dev_name(other_ctx->ds->dev), other_port);
-
- c = kzalloc(sizeof(*c), GFP_KERNEL);
- if (!c)
- return -ENOMEM;
-
- c->port = port;
- c->other_ctx = other_ctx;
- c->other_port = other_port;
- refcount_set(&c->refcount, 1);
-
- list_add(&c->list, &ctx->crosschip_links);
-
return 0;
}
-static void dsa_8021q_crosschip_link_del(struct dsa_8021q_context *ctx,
- struct dsa_8021q_crosschip_link *c,
- bool *keep)
+static void dsa_tag_8021q_teardown(struct dsa_switch *ds)
{
- *keep = !refcount_dec_and_test(&c->refcount);
-
- if (*keep)
- return;
+ int port;
- dev_dbg(ctx->ds->dev,
- "deleting crosschip link from port %d to %s port %d\n",
- c->port, dev_name(c->other_ctx->ds->dev), c->other_port);
+ ASSERT_RTNL();
- list_del(&c->list);
- kfree(c);
+ for (port = 0; port < ds->num_ports; port++)
+ dsa_tag_8021q_port_teardown(ds, port);
}
-/* Make traffic from local port @port be received by remote port @other_port.
- * This means that our @rx_vid needs to be installed on @other_ds's upstream
- * and user ports. The user ports should be egress-untagged so that they can
- * pop the dsa_8021q VLAN. But the @other_upstream can be either egress-tagged
- * or untagged: it doesn't matter, since it should never egress a frame having
- * our @rx_vid.
- */
-int dsa_8021q_crosschip_bridge_join(struct dsa_8021q_context *ctx, int port,
- struct dsa_8021q_context *other_ctx,
- int other_port)
+int dsa_tag_8021q_register(struct dsa_switch *ds, __be16 proto)
{
- /* @other_upstream is how @other_ds reaches us. If we are part
- * of disjoint trees, then we are probably connected through
- * our CPU ports. If we're part of the same tree though, we should
- * probably use dsa_towards_port.
- */
- int other_upstream = dsa_upstream_port(other_ctx->ds, other_port);
- int rc;
+ struct dsa_8021q_context *ctx;
+
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
- rc = dsa_8021q_crosschip_link_add(ctx, port, other_ctx, other_port);
- if (rc)
- return rc;
+ ctx->proto = proto;
+ ctx->ds = ds;
- rc = dsa_8021q_crosschip_link_apply(ctx, port, other_ctx,
- other_port, true);
- if (rc)
- return rc;
+ INIT_LIST_HEAD(&ctx->vlans);
- rc = dsa_8021q_crosschip_link_add(ctx, port, other_ctx, other_upstream);
- if (rc)
- return rc;
+ ds->tag_8021q_ctx = ctx;
- return dsa_8021q_crosschip_link_apply(ctx, port, other_ctx,
- other_upstream, true);
+ return dsa_tag_8021q_setup(ds);
}
-EXPORT_SYMBOL_GPL(dsa_8021q_crosschip_bridge_join);
+EXPORT_SYMBOL_GPL(dsa_tag_8021q_register);
-int dsa_8021q_crosschip_bridge_leave(struct dsa_8021q_context *ctx, int port,
- struct dsa_8021q_context *other_ctx,
- int other_port)
+void dsa_tag_8021q_unregister(struct dsa_switch *ds)
{
- int other_upstream = dsa_upstream_port(other_ctx->ds, other_port);
- struct dsa_8021q_crosschip_link *c, *n;
-
- list_for_each_entry_safe(c, n, &ctx->crosschip_links, list) {
- if (c->port == port && c->other_ctx == other_ctx &&
- (c->other_port == other_port ||
- c->other_port == other_upstream)) {
- struct dsa_8021q_context *other_ctx = c->other_ctx;
- int other_port = c->other_port;
- bool keep;
- int rc;
-
- dsa_8021q_crosschip_link_del(ctx, c, &keep);
- if (keep)
- continue;
-
- rc = dsa_8021q_crosschip_link_apply(ctx, port,
- other_ctx,
- other_port,
- false);
- if (rc)
- return rc;
- }
+ struct dsa_8021q_context *ctx = ds->tag_8021q_ctx;
+ struct dsa_tag_8021q_vlan *v, *n;
+
+ dsa_tag_8021q_teardown(ds);
+
+ list_for_each_entry_safe(v, n, &ctx->vlans, list) {
+ list_del(&v->list);
+ kfree(v);
}
- return 0;
+ ds->tag_8021q_ctx = NULL;
+
+ kfree(ctx);
}
-EXPORT_SYMBOL_GPL(dsa_8021q_crosschip_bridge_leave);
+EXPORT_SYMBOL_GPL(dsa_tag_8021q_unregister);
struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev,
u16 tpid, u16 tci)
@@ -471,8 +532,7 @@ struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev,
}
EXPORT_SYMBOL_GPL(dsa_8021q_xmit);
-void dsa_8021q_rcv(struct sk_buff *skb, int *source_port, int *switch_id,
- int *subvlan)
+void dsa_8021q_rcv(struct sk_buff *skb, int *source_port, int *switch_id)
{
u16 vid, tci;
@@ -489,9 +549,6 @@ void dsa_8021q_rcv(struct sk_buff *skb, int *source_port, int *switch_id,
*source_port = dsa_8021q_rx_source_port(vid);
*switch_id = dsa_8021q_rx_switch_id(vid);
- *subvlan = dsa_8021q_rx_subvlan(vid);
skb->priority = (tci & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
}
EXPORT_SYMBOL_GPL(dsa_8021q_rcv);
-
-MODULE_LICENSE("GPL v2");
diff --git a/net/dsa/tag_ocelot_8021q.c b/net/dsa/tag_ocelot_8021q.c
index 85ac85c3af8c..d0781b058610 100644
--- a/net/dsa/tag_ocelot_8021q.c
+++ b/net/dsa/tag_ocelot_8021q.c
@@ -41,9 +41,9 @@ static struct sk_buff *ocelot_rcv(struct sk_buff *skb,
struct net_device *netdev,
struct packet_type *pt)
{
- int src_port, switch_id, subvlan;
+ int src_port, switch_id;
- dsa_8021q_rcv(skb, &src_port, &switch_id, &subvlan);
+ dsa_8021q_rcv(skb, &src_port, &switch_id);
skb->dev = dsa_master_find_slave(netdev, switch_id, src_port);
if (!skb->dev)
diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c
index 9c2df9ece01b..7c92c329a092 100644
--- a/net/dsa/tag_sja1105.c
+++ b/net/dsa/tag_sja1105.c
@@ -358,20 +358,6 @@ static struct sk_buff
return skb;
}
-static void sja1105_decode_subvlan(struct sk_buff *skb, u16 subvlan)
-{
- struct dsa_port *dp = dsa_slave_to_port(skb->dev);
- struct sja1105_port *sp = dp->priv;
- u16 vid = sp->subvlan_map[subvlan];
- u16 vlan_tci;
-
- if (vid == VLAN_N_VID)
- return;
-
- vlan_tci = (skb->priority << VLAN_PRIO_SHIFT) | vid;
- __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tci);
-}
-
static bool sja1105_skb_has_tag_8021q(const struct sk_buff *skb)
{
u16 tpid = ntohs(eth_hdr(skb)->h_proto);
@@ -389,8 +375,8 @@ static struct sk_buff *sja1105_rcv(struct sk_buff *skb,
struct net_device *netdev,
struct packet_type *pt)
{
- int source_port, switch_id, subvlan = 0;
struct sja1105_meta meta = {0};
+ int source_port, switch_id;
struct ethhdr *hdr;
bool is_link_local;
bool is_meta;
@@ -403,7 +389,7 @@ static struct sk_buff *sja1105_rcv(struct sk_buff *skb,
if (sja1105_skb_has_tag_8021q(skb)) {
/* Normal traffic path. */
- dsa_8021q_rcv(skb, &source_port, &switch_id, &subvlan);
+ dsa_8021q_rcv(skb, &source_port, &switch_id);
} else if (is_link_local) {
/* Management traffic path. Switch embeds the switch ID and
* port ID into bytes of the destination MAC, courtesy of
@@ -428,9 +414,6 @@ static struct sk_buff *sja1105_rcv(struct sk_buff *skb,
return NULL;
}
- if (subvlan)
- sja1105_decode_subvlan(skb, subvlan);
-
return sja1105_rcv_meta_state_machine(skb, &meta, is_link_local,
is_meta);
}
@@ -538,7 +521,7 @@ static struct sk_buff *sja1110_rcv(struct sk_buff *skb,
struct net_device *netdev,
struct packet_type *pt)
{
- int source_port = -1, switch_id = -1, subvlan = 0;
+ int source_port = -1, switch_id = -1;
skb->offload_fwd_mark = 1;
@@ -551,7 +534,7 @@ static struct sk_buff *sja1110_rcv(struct sk_buff *skb,
/* Packets with in-band control extensions might still have RX VLANs */
if (likely(sja1105_skb_has_tag_8021q(skb)))
- dsa_8021q_rcv(skb, &source_port, &switch_id, &subvlan);
+ dsa_8021q_rcv(skb, &source_port, &switch_id);
skb->dev = dsa_master_find_slave(netdev, switch_id, source_port);
if (!skb->dev) {
@@ -561,9 +544,6 @@ static struct sk_buff *sja1110_rcv(struct sk_buff *skb,
return NULL;
}
- if (subvlan)
- sja1105_decode_subvlan(skb, subvlan);
-
return skb;
}
diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c
index baa5d10043cb..6134b180f59f 100644
--- a/net/ethtool/ioctl.c
+++ b/net/ethtool/ioctl.c
@@ -7,6 +7,7 @@
* the information ethtool needs.
*/
+#include <linux/compat.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/capability.h>
@@ -807,6 +808,120 @@ out:
return ret;
}
+static noinline_for_stack int
+ethtool_rxnfc_copy_from_compat(struct ethtool_rxnfc *rxnfc,
+ const struct compat_ethtool_rxnfc __user *useraddr,
+ size_t size)
+{
+ struct compat_ethtool_rxnfc crxnfc = {};
+
+ /* We expect there to be holes between fs.m_ext and
+ * fs.ring_cookie and at the end of fs, but nowhere else.
+ * On non-x86, no conversion should be needed.
+ */
+ BUILD_BUG_ON(!IS_ENABLED(CONFIG_X86_64) &&
+ sizeof(struct compat_ethtool_rxnfc) !=
+ sizeof(struct ethtool_rxnfc));
+ BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
+ sizeof(useraddr->fs.m_ext) !=
+ offsetof(struct ethtool_rxnfc, fs.m_ext) +
+ sizeof(rxnfc->fs.m_ext));
+ BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.location) -
+ offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
+ offsetof(struct ethtool_rxnfc, fs.location) -
+ offsetof(struct ethtool_rxnfc, fs.ring_cookie));
+
+ if (copy_from_user(&crxnfc, useraddr, min(size, sizeof(crxnfc))))
+ return -EFAULT;
+
+ *rxnfc = (struct ethtool_rxnfc) {
+ .cmd = crxnfc.cmd,
+ .flow_type = crxnfc.flow_type,
+ .data = crxnfc.data,
+ .fs = {
+ .flow_type = crxnfc.fs.flow_type,
+ .h_u = crxnfc.fs.h_u,
+ .h_ext = crxnfc.fs.h_ext,
+ .m_u = crxnfc.fs.m_u,
+ .m_ext = crxnfc.fs.m_ext,
+ .ring_cookie = crxnfc.fs.ring_cookie,
+ .location = crxnfc.fs.location,
+ },
+ .rule_cnt = crxnfc.rule_cnt,
+ };
+
+ return 0;
+}
+
+static int ethtool_rxnfc_copy_from_user(struct ethtool_rxnfc *rxnfc,
+ const void __user *useraddr,
+ size_t size)
+{
+ if (compat_need_64bit_alignment_fixup())
+ return ethtool_rxnfc_copy_from_compat(rxnfc, useraddr, size);
+
+ if (copy_from_user(rxnfc, useraddr, size))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int ethtool_rxnfc_copy_to_compat(void __user *useraddr,
+ const struct ethtool_rxnfc *rxnfc,
+ size_t size, const u32 *rule_buf)
+{
+ struct compat_ethtool_rxnfc crxnfc;
+
+ memset(&crxnfc, 0, sizeof(crxnfc));
+ crxnfc = (struct compat_ethtool_rxnfc) {
+ .cmd = rxnfc->cmd,
+ .flow_type = rxnfc->flow_type,
+ .data = rxnfc->data,
+ .fs = {
+ .flow_type = rxnfc->fs.flow_type,
+ .h_u = rxnfc->fs.h_u,
+ .h_ext = rxnfc->fs.h_ext,
+ .m_u = rxnfc->fs.m_u,
+ .m_ext = rxnfc->fs.m_ext,
+ .ring_cookie = rxnfc->fs.ring_cookie,
+ .location = rxnfc->fs.location,
+ },
+ .rule_cnt = rxnfc->rule_cnt,
+ };
+
+ if (copy_to_user(useraddr, &crxnfc, min(size, sizeof(crxnfc))))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int ethtool_rxnfc_copy_to_user(void __user *useraddr,
+ const struct ethtool_rxnfc *rxnfc,
+ size_t size, const u32 *rule_buf)
+{
+ int ret;
+
+ if (compat_need_64bit_alignment_fixup()) {
+ ret = ethtool_rxnfc_copy_to_compat(useraddr, rxnfc, size,
+ rule_buf);
+ useraddr += offsetof(struct compat_ethtool_rxnfc, rule_locs);
+ } else {
+ ret = copy_to_user(useraddr, &rxnfc, size);
+ useraddr += offsetof(struct ethtool_rxnfc, rule_locs);
+ }
+
+ if (ret)
+ return -EFAULT;
+
+ if (rule_buf) {
+ if (copy_to_user(useraddr, rule_buf,
+ rxnfc->rule_cnt * sizeof(u32)))
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev,
u32 cmd, void __user *useraddr)
{
@@ -825,7 +940,7 @@ static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev,
info_size = (offsetof(struct ethtool_rxnfc, data) +
sizeof(info.data));
- if (copy_from_user(&info, useraddr, info_size))
+ if (ethtool_rxnfc_copy_from_user(&info, useraddr, info_size))
return -EFAULT;
rc = dev->ethtool_ops->set_rxnfc(dev, &info);
@@ -833,7 +948,7 @@ static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev,
return rc;
if (cmd == ETHTOOL_SRXCLSRLINS &&
- copy_to_user(useraddr, &info, info_size))
+ ethtool_rxnfc_copy_to_user(useraddr, &info, info_size, NULL))
return -EFAULT;
return 0;
@@ -859,7 +974,7 @@ static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev,
info_size = (offsetof(struct ethtool_rxnfc, data) +
sizeof(info.data));
- if (copy_from_user(&info, useraddr, info_size))
+ if (ethtool_rxnfc_copy_from_user(&info, useraddr, info_size))
return -EFAULT;
/* If FLOW_RSS was requested then user-space must be using the
@@ -867,7 +982,7 @@ static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev,
*/
if (cmd == ETHTOOL_GRXFH && info.flow_type & FLOW_RSS) {
info_size = sizeof(info);
- if (copy_from_user(&info, useraddr, info_size))
+ if (ethtool_rxnfc_copy_from_user(&info, useraddr, info_size))
return -EFAULT;
/* Since malicious users may modify the original data,
* we need to check whether FLOW_RSS is still requested.
@@ -893,18 +1008,7 @@ static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev,
if (ret < 0)
goto err_out;
- ret = -EFAULT;
- if (copy_to_user(useraddr, &info, info_size))
- goto err_out;
-
- if (rule_buf) {
- useraddr += offsetof(struct ethtool_rxnfc, rule_locs);
- if (copy_to_user(useraddr, rule_buf,
- info.rule_cnt * sizeof(u32)))
- goto err_out;
- }
- ret = 0;
-
+ ret = ethtool_rxnfc_copy_to_user(useraddr, &info, info_size, rule_buf);
err_out:
kfree(rule_buf);
diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c
index a45a0401adc5..f5077de3619e 100644
--- a/net/ieee802154/socket.c
+++ b/net/ieee802154/socket.c
@@ -129,7 +129,7 @@ static int ieee802154_dev_ioctl(struct sock *sk, struct ifreq __user *arg,
int ret = -ENOIOCTLCMD;
struct net_device *dev;
- if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
+ if (get_user_ifreq(&ifr, NULL, arg))
return -EFAULT;
ifr.ifr_name[IFNAMSIZ-1] = 0;
@@ -143,7 +143,7 @@ static int ieee802154_dev_ioctl(struct sock *sk, struct ifreq __user *arg,
if (dev->type == ARPHRD_IEEE802154 && dev->netdev_ops->ndo_do_ioctl)
ret = dev->netdev_ops->ndo_do_ioctl(dev, &ifr, cmd);
- if (!ret && copy_to_user(arg, &ifr, sizeof(struct ifreq)))
+ if (!ret && put_user_ifreq(&ifr, arg))
ret = -EFAULT;
dev_put(dev);
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 54648181dd56..0e4d758c2585 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -953,10 +953,10 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
case SIOCGIFNETMASK:
case SIOCGIFDSTADDR:
case SIOCGIFPFLAGS:
- if (copy_from_user(&ifr, p, sizeof(struct ifreq)))
+ if (get_user_ifreq(&ifr, NULL, p))
return -EFAULT;
err = devinet_ioctl(net, cmd, &ifr);
- if (!err && copy_to_user(p, &ifr, sizeof(struct ifreq)))
+ if (!err && put_user_ifreq(&ifr, p))
err = -EFAULT;
break;
@@ -966,7 +966,7 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
case SIOCSIFDSTADDR:
case SIOCSIFPFLAGS:
case SIOCSIFFLAGS:
- if (copy_from_user(&ifr, p, sizeof(struct ifreq)))
+ if (get_user_ifreq(&ifr, NULL, p))
return -EFAULT;
err = devinet_ioctl(net, cmd, &ifr);
break;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 73721a4448bd..c82aded8da7d 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -215,7 +215,7 @@ static void devinet_sysctl_unregister(struct in_device *idev)
static struct in_ifaddr *inet_alloc_ifa(void)
{
- return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
+ return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL_ACCOUNT);
}
static void inet_rcu_free_ifa(struct rcu_head *head)
@@ -1243,7 +1243,7 @@ out:
return ret;
}
-static int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
+int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
{
struct in_device *in_dev = __in_dev_get_rtnl(dev);
const struct in_ifaddr *ifa;
@@ -2424,11 +2424,15 @@ static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
int *valp = ctl->data;
int val = *valp;
loff_t pos = *ppos;
- int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
+ struct net *net = ctl->extra2;
+ int ret;
- if (write && *valp != val) {
- struct net *net = ctl->extra2;
+ if (write && !ns_capable(net->user_ns, CAP_NET_ADMIN))
+ return -EPERM;
+
+ ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
+ if (write && *valp != val) {
if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
if (!rtnl_trylock()) {
/* Restore the original values before restarting */
@@ -2762,8 +2766,6 @@ void __init devinet_init(void)
INIT_HLIST_HEAD(&inet_addr_lst[i]);
register_pernet_subsys(&devinet_ops);
-
- register_gifconf(PF_INET, inet_gifconf);
register_netdevice_notifier(&ip_netdev_notifier);
queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 25cf387cca5b..8060524f4256 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -2380,11 +2380,11 @@ void __init fib_trie_init(void)
{
fn_alias_kmem = kmem_cache_create("ip_fib_alias",
sizeof(struct fib_alias),
- 0, SLAB_PANIC, NULL);
+ 0, SLAB_PANIC | SLAB_ACCOUNT, NULL);
trie_leaf_kmem = kmem_cache_create("ip_fib_trie",
LEAF_SIZE,
- 0, SLAB_PANIC, NULL);
+ 0, SLAB_PANIC | SLAB_ACCOUNT, NULL);
}
struct fib_table *fib_trie_table(u32 id, struct fib_table *alias)
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 6b3c558a4f23..03589a04f9aa 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -2713,6 +2713,7 @@ int ip_check_mc_rcu(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u
rv = 1;
} else if (im) {
if (src_addr) {
+ spin_lock_bh(&im->lock);
for (psf = im->sources; psf; psf = psf->sf_next) {
if (psf->sf_inaddr == src_addr)
break;
@@ -2723,6 +2724,7 @@ int ip_check_mc_rcu(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u
im->sfcount[MCAST_EXCLUDE];
else
rv = im->sfcount[MCAST_EXCLUDE] != 0;
+ spin_unlock_bh(&im->lock);
} else
rv = 1; /* unspecified source; tentatively allow */
}
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 99c06944501a..04754d55b3c1 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1299,26 +1299,7 @@ static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
INDIRECT_CALLABLE_SCOPE unsigned int ipv4_mtu(const struct dst_entry *dst)
{
- const struct rtable *rt = (const struct rtable *)dst;
- unsigned int mtu = rt->rt_pmtu;
-
- if (!mtu || time_after_eq(jiffies, rt->dst.expires))
- mtu = dst_metric_raw(dst, RTAX_MTU);
-
- if (mtu)
- goto out;
-
- mtu = READ_ONCE(dst->dev->mtu);
-
- if (unlikely(ip_mtu_locked(dst))) {
- if (rt->rt_uses_gateway && mtu > 576)
- mtu = 576;
- }
-
-out:
- mtu = min_t(unsigned int, mtu, IP_MAX_MTU);
-
- return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
+ return ip_dst_mtu_maybe_forward(dst, false);
}
EXPORT_INDIRECT_CALLABLE(ipv4_mtu);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 8cb44040ec68..f931def6302e 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -4512,7 +4512,9 @@ void __init tcp_init(void)
tcp_hashinfo.bind_bucket_cachep =
kmem_cache_create("tcp_bind_bucket",
sizeof(struct inet_bind_bucket), 0,
- SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
+ SLAB_HWCACHE_ALIGN | SLAB_PANIC |
+ SLAB_ACCOUNT,
+ NULL);
/* Size and allocate the main established and bind bucket
* hash tables.
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 25fa4c01a17f..62ba8d0f2c60 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -55,12 +55,7 @@ void tcp_fastopen_ctx_destroy(struct net *net)
{
struct tcp_fastopen_context *ctxt;
- spin_lock(&net->ipv4.tcp_fastopen_ctx_lock);
-
- ctxt = rcu_dereference_protected(net->ipv4.tcp_fastopen_ctx,
- lockdep_is_held(&net->ipv4.tcp_fastopen_ctx_lock));
- rcu_assign_pointer(net->ipv4.tcp_fastopen_ctx, NULL);
- spin_unlock(&net->ipv4.tcp_fastopen_ctx_lock);
+ ctxt = xchg((__force struct tcp_fastopen_context **)&net->ipv4.tcp_fastopen_ctx, NULL);
if (ctxt)
call_rcu(&ctxt->rcu, tcp_fastopen_ctx_free);
@@ -89,18 +84,12 @@ int tcp_fastopen_reset_cipher(struct net *net, struct sock *sk,
ctx->num = 1;
}
- spin_lock(&net->ipv4.tcp_fastopen_ctx_lock);
if (sk) {
q = &inet_csk(sk)->icsk_accept_queue.fastopenq;
- octx = rcu_dereference_protected(q->ctx,
- lockdep_is_held(&net->ipv4.tcp_fastopen_ctx_lock));
- rcu_assign_pointer(q->ctx, ctx);
+ octx = xchg((__force struct tcp_fastopen_context **)&q->ctx, ctx);
} else {
- octx = rcu_dereference_protected(net->ipv4.tcp_fastopen_ctx,
- lockdep_is_held(&net->ipv4.tcp_fastopen_ctx_lock));
- rcu_assign_pointer(net->ipv4.tcp_fastopen_ctx, ctx);
+ octx = xchg((__force struct tcp_fastopen_context **)&net->ipv4.tcp_fastopen_ctx, ctx);
}
- spin_unlock(&net->ipv4.tcp_fastopen_ctx_lock);
if (octx)
call_rcu(&octx->rcu, tcp_fastopen_ctx_free);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 149ceb5c94ff..501d8d4d4ba4 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -454,11 +454,12 @@ static void tcp_sndbuf_expand(struct sock *sk)
*/
/* Slow part of check#2. */
-static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)
+static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb,
+ unsigned int skbtruesize)
{
struct tcp_sock *tp = tcp_sk(sk);
/* Optimize this! */
- int truesize = tcp_win_from_space(sk, skb->truesize) >> 1;
+ int truesize = tcp_win_from_space(sk, skbtruesize) >> 1;
int window = tcp_win_from_space(sk, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1;
while (tp->rcv_ssthresh <= window) {
@@ -471,7 +472,27 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)
return 0;
}
-static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
+/* Even if skb appears to have a bad len/truesize ratio, TCP coalescing
+ * can play nice with us, as sk_buff and skb->head might be either
+ * freed or shared with up to MAX_SKB_FRAGS segments.
+ * Only give a boost to drivers using page frag(s) to hold the frame(s),
+ * and if no payload was pulled in skb->head before reaching us.
+ */
+static u32 truesize_adjust(bool adjust, const struct sk_buff *skb)
+{
+ u32 truesize = skb->truesize;
+
+ if (adjust && !skb_headlen(skb)) {
+ truesize -= SKB_TRUESIZE(skb_end_offset(skb));
+ /* paranoid check, some drivers might be buggy */
+ if (unlikely((int)truesize < (int)skb->len))
+ truesize = skb->truesize;
+ }
+ return truesize;
+}
+
+static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb,
+ bool adjust)
{
struct tcp_sock *tp = tcp_sk(sk);
int room;
@@ -480,15 +501,16 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
/* Check #1 */
if (room > 0 && !tcp_under_memory_pressure(sk)) {
+ unsigned int truesize = truesize_adjust(adjust, skb);
int incr;
/* Check #2. Increase window, if skb with such overhead
* will fit to rcvbuf in future.
*/
- if (tcp_win_from_space(sk, skb->truesize) <= skb->len)
+ if (tcp_win_from_space(sk, truesize) <= skb->len)
incr = 2 * tp->advmss;
else
- incr = __tcp_grow_window(sk, skb);
+ incr = __tcp_grow_window(sk, skb, truesize);
if (incr) {
incr = max_t(int, incr, 2 * skb->len);
@@ -782,7 +804,7 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
tcp_ecn_check_ce(sk, skb);
if (skb->len >= 128)
- tcp_grow_window(sk, skb);
+ tcp_grow_window(sk, skb, true);
}
/* Called to compute a smoothed rtt estimate. The data fed to this
@@ -4769,7 +4791,7 @@ coalesce_done:
* and trigger fast retransmit.
*/
if (tcp_is_sack(tp))
- tcp_grow_window(sk, skb);
+ tcp_grow_window(sk, skb, true);
kfree_skb_partial(skb, fragstolen);
skb = NULL;
goto add_sack;
@@ -4857,7 +4879,7 @@ end:
* and trigger fast retransmit.
*/
if (tcp_is_sack(tp))
- tcp_grow_window(sk, skb);
+ tcp_grow_window(sk, skb, false);
skb_condense(skb);
skb_set_owner_r(skb, sk);
}
@@ -5383,7 +5405,7 @@ static void tcp_new_space(struct sock *sk)
tp->snd_cwnd_stamp = tcp_jiffies32;
}
- sk->sk_write_space(sk);
+ INDIRECT_CALL_1(sk->sk_write_space, sk_stream_write_space, sk);
}
static void tcp_check_space(struct sock *sk)
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index a692626c19e4..84db1c9ee92a 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2964,7 +2964,6 @@ static int __net_init tcp_sk_init(struct net *net)
net->ipv4.sysctl_tcp_comp_sack_slack_ns = 100 * NSEC_PER_USEC;
net->ipv4.sysctl_tcp_comp_sack_nr = 44;
net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE;
- spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock);
net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 0;
atomic_set(&net->ipv4.tfo_active_disable_times, 0);
diff --git a/net/ipv4/udp_bpf.c b/net/ipv4/udp_bpf.c
index 9f5a5cdc38e6..7a1d5f473878 100644
--- a/net/ipv4/udp_bpf.c
+++ b/net/ipv4/udp_bpf.c
@@ -112,7 +112,6 @@ static struct proto udp_bpf_prots[UDP_BPF_NUM_PROTS];
static void udp_bpf_rebuild_protos(struct proto *prot, const struct proto *base)
{
*prot = *base;
- prot->unhash = sock_map_unhash;
prot->close = sock_map_close;
prot->recvmsg = udp_bpf_recvmsg;
}
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 747f56e0c636..e504204bca92 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -328,4 +328,15 @@ config IPV6_RPL_LWTUNNEL
If unsure, say N.
+config IPV6_IOAM6_LWTUNNEL
+ bool "IPv6: IOAM Pre-allocated Trace insertion support"
+ depends on IPV6
+ select LWTUNNEL
+ help
+ Support for the inline insertion of IOAM Pre-allocated
+ Trace Header (only on locally generated packets), using
+ the lightweight tunnels mechanism.
+
+ If unsure, say N.
+
endif # IPV6
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index cf7b47bdb9b3..1bc7e143217b 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -10,7 +10,7 @@ ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \
route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \
raw.o icmp.o mcast.o reassembly.o tcp_ipv6.o ping.o \
exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o \
- udp_offload.o seg6.o fib6_notifier.o rpl.o
+ udp_offload.o seg6.o fib6_notifier.o rpl.o ioam6.o
ipv6-offload := ip6_offload.o tcpv6_offload.o exthdrs_offload.o
@@ -27,6 +27,7 @@ ipv6-$(CONFIG_NETLABEL) += calipso.o
ipv6-$(CONFIG_IPV6_SEG6_LWTUNNEL) += seg6_iptunnel.o seg6_local.o
ipv6-$(CONFIG_IPV6_SEG6_HMAC) += seg6_hmac.o
ipv6-$(CONFIG_IPV6_RPL_LWTUNNEL) += rpl_iptunnel.o
+ipv6-$(CONFIG_IPV6_IOAM6_LWTUNNEL) += ioam6_iptunnel.o
ipv6-objs += $(ipv6-y)
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 3bf685fe64b9..db0a89810f28 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -89,6 +89,7 @@
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/export.h>
+#include <linux/ioam6.h>
#define INFINITY_LIFE_TIME 0xFFFFFFFF
@@ -237,6 +238,9 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
.addr_gen_mode = IN6_ADDR_GEN_MODE_EUI64,
.disable_policy = 0,
.rpl_seg_enabled = 0,
+ .ioam6_enabled = 0,
+ .ioam6_id = IOAM6_DEFAULT_IF_ID,
+ .ioam6_id_wide = IOAM6_DEFAULT_IF_ID_WIDE,
};
static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
@@ -293,6 +297,9 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
.addr_gen_mode = IN6_ADDR_GEN_MODE_EUI64,
.disable_policy = 0,
.rpl_seg_enabled = 0,
+ .ioam6_enabled = 0,
+ .ioam6_id = IOAM6_DEFAULT_IF_ID,
+ .ioam6_id_wide = IOAM6_DEFAULT_IF_ID_WIDE,
};
/* Check if link is ready: is it up and is a valid qdisc available */
@@ -1080,7 +1087,7 @@ ipv6_add_addr(struct inet6_dev *idev, struct ifa6_config *cfg,
goto out;
}
- ifa = kzalloc(sizeof(*ifa), gfp_flags);
+ ifa = kzalloc(sizeof(*ifa), gfp_flags | __GFP_ACCOUNT);
if (!ifa) {
err = -ENOBUFS;
goto out;
@@ -5211,8 +5218,7 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
.netnsid = -1,
.type = type,
};
- struct net *net = sock_net(skb->sk);
- struct net *tgt_net = net;
+ struct net *tgt_net = sock_net(skb->sk);
int idx, s_idx, s_ip_idx;
int h, s_h;
struct net_device *dev;
@@ -5351,7 +5357,7 @@ static int inet6_rtm_valid_getaddr_req(struct sk_buff *skb,
static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
- struct net *net = sock_net(in_skb->sk);
+ struct net *tgt_net = sock_net(in_skb->sk);
struct inet6_fill_args fillargs = {
.portid = NETLINK_CB(in_skb).portid,
.seq = nlh->nlmsg_seq,
@@ -5359,7 +5365,6 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh,
.flags = 0,
.netnsid = -1,
};
- struct net *tgt_net = net;
struct ifaddrmsg *ifm;
struct nlattr *tb[IFA_MAX+1];
struct in6_addr *addr = NULL, *peer;
@@ -5526,6 +5531,9 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
array[DEVCONF_DISABLE_POLICY] = cnf->disable_policy;
array[DEVCONF_NDISC_TCLASS] = cnf->ndisc_tclass;
array[DEVCONF_RPL_SEG_ENABLED] = cnf->rpl_seg_enabled;
+ array[DEVCONF_IOAM6_ENABLED] = cnf->ioam6_enabled;
+ array[DEVCONF_IOAM6_ID] = cnf->ioam6_id;
+ array[DEVCONF_IOAM6_ID_WIDE] = cnf->ioam6_id_wide;
}
static inline size_t inet6_ifla6_size(void)
@@ -6540,6 +6548,7 @@ static int addrconf_sysctl_disable_policy(struct ctl_table *ctl, int write,
static int minus_one = -1;
static const int two_five_five = 255;
+static u32 ioam6_if_id_max = U16_MAX;
static const struct ctl_table addrconf_sysctl[] = {
{
@@ -6933,6 +6942,31 @@ static const struct ctl_table addrconf_sysctl[] = {
.proc_handler = proc_dointvec,
},
{
+ .procname = "ioam6_enabled",
+ .data = &ipv6_devconf.ioam6_enabled,
+ .maxlen = sizeof(u8),
+ .mode = 0644,
+ .proc_handler = proc_dou8vec_minmax,
+ .extra1 = (void *)SYSCTL_ZERO,
+ .extra2 = (void *)SYSCTL_ONE,
+ },
+ {
+ .procname = "ioam6_id",
+ .data = &ipv6_devconf.ioam6_id,
+ .maxlen = sizeof(u32),
+ .mode = 0644,
+ .proc_handler = proc_douintvec_minmax,
+ .extra1 = (void *)SYSCTL_ZERO,
+ .extra2 = (void *)&ioam6_if_id_max,
+ },
+ {
+ .procname = "ioam6_id_wide",
+ .data = &ipv6_devconf.ioam6_id_wide,
+ .maxlen = sizeof(u32),
+ .mode = 0644,
+ .proc_handler = proc_douintvec,
+ },
+ {
/* sentinel */
}
};
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 2389ff702f51..d92c90d97763 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -62,6 +62,7 @@
#include <net/rpl.h>
#include <net/compat.h>
#include <net/xfrm.h>
+#include <net/ioam6.h>
#include <linux/uaccess.h>
#include <linux/mroute6.h>
@@ -961,6 +962,9 @@ static int __net_init inet6_net_init(struct net *net)
net->ipv6.sysctl.fib_notify_on_flag_change = 0;
atomic_set(&net->ipv6.fib6_sernum, 1);
+ net->ipv6.sysctl.ioam6_id = IOAM6_DEFAULT_ID;
+ net->ipv6.sysctl.ioam6_id_wide = IOAM6_DEFAULT_ID_WIDE;
+
err = ipv6_init_mibs(net);
if (err)
return err;
@@ -1191,6 +1195,10 @@ static int __init inet6_init(void)
if (err)
goto rpl_fail;
+ err = ioam6_init();
+ if (err)
+ goto ioam6_fail;
+
err = igmp6_late_init();
if (err)
goto igmp6_late_err;
@@ -1213,6 +1221,8 @@ sysctl_fail:
igmp6_late_cleanup();
#endif
igmp6_late_err:
+ ioam6_exit();
+ioam6_fail:
rpl_exit();
rpl_fail:
seg6_exit();
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 26882e165c9e..d897faa4e9e6 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -49,6 +49,9 @@
#include <net/seg6_hmac.h>
#endif
#include <net/rpl.h>
+#include <linux/ioam6.h>
+#include <net/ioam6.h>
+#include <net/dst_metadata.h>
#include <linux/uaccess.h>
@@ -928,6 +931,60 @@ static bool ipv6_hop_ra(struct sk_buff *skb, int optoff)
return false;
}
+/* IOAM */
+
+static bool ipv6_hop_ioam(struct sk_buff *skb, int optoff)
+{
+ struct ioam6_trace_hdr *trace;
+ struct ioam6_namespace *ns;
+ struct ioam6_hdr *hdr;
+
+ /* Bad alignment (must be 4n-aligned) */
+ if (optoff & 3)
+ goto drop;
+
+ /* Ignore if IOAM is not enabled on ingress */
+ if (!__in6_dev_get(skb->dev)->cnf.ioam6_enabled)
+ goto ignore;
+
+ /* Truncated Option header */
+ hdr = (struct ioam6_hdr *)(skb_network_header(skb) + optoff);
+ if (hdr->opt_len < 2)
+ goto drop;
+
+ switch (hdr->type) {
+ case IOAM6_TYPE_PREALLOC:
+ /* Truncated Pre-allocated Trace header */
+ if (hdr->opt_len < 2 + sizeof(*trace))
+ goto drop;
+
+ /* Malformed Pre-allocated Trace header */
+ trace = (struct ioam6_trace_hdr *)((u8 *)hdr + sizeof(*hdr));
+ if (hdr->opt_len < 2 + sizeof(*trace) + trace->remlen * 4)
+ goto drop;
+
+ /* Ignore if the IOAM namespace is unknown */
+ ns = ioam6_namespace(ipv6_skb_net(skb), trace->namespace_id);
+ if (!ns)
+ goto ignore;
+
+ if (!skb_valid_dst(skb))
+ ip6_route_input(skb);
+
+ ioam6_fill_trace_data(skb, ns, trace);
+ break;
+ default:
+ break;
+ }
+
+ignore:
+ return true;
+
+drop:
+ kfree_skb(skb);
+ return false;
+}
+
/* Jumbo payload */
static bool ipv6_hop_jumbo(struct sk_buff *skb, int optoff)
@@ -1000,6 +1057,10 @@ static const struct tlvtype_proc tlvprochopopt_lst[] = {
.func = ipv6_hop_ra,
},
{
+ .type = IPV6_TLV_IOAM,
+ .func = ipv6_hop_ioam,
+ },
+ {
.type = IPV6_TLV_JUMBO,
.func = ipv6_hop_jumbo,
},
diff --git a/net/ipv6/ioam6.c b/net/ipv6/ioam6.c
new file mode 100644
index 000000000000..5e8961004832
--- /dev/null
+++ b/net/ipv6/ioam6.c
@@ -0,0 +1,910 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * IPv6 IOAM implementation
+ *
+ * Author:
+ * Justin Iurman <justin.iurman@uliege.be>
+ */
+
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/net.h>
+#include <linux/ioam6.h>
+#include <linux/ioam6_genl.h>
+#include <linux/rhashtable.h>
+
+#include <net/addrconf.h>
+#include <net/genetlink.h>
+#include <net/ioam6.h>
+
+static void ioam6_ns_release(struct ioam6_namespace *ns)
+{
+ kfree_rcu(ns, rcu);
+}
+
+static void ioam6_sc_release(struct ioam6_schema *sc)
+{
+ kfree_rcu(sc, rcu);
+}
+
+static void ioam6_free_ns(void *ptr, void *arg)
+{
+ struct ioam6_namespace *ns = (struct ioam6_namespace *)ptr;
+
+ if (ns)
+ ioam6_ns_release(ns);
+}
+
+static void ioam6_free_sc(void *ptr, void *arg)
+{
+ struct ioam6_schema *sc = (struct ioam6_schema *)ptr;
+
+ if (sc)
+ ioam6_sc_release(sc);
+}
+
+static int ioam6_ns_cmpfn(struct rhashtable_compare_arg *arg, const void *obj)
+{
+ const struct ioam6_namespace *ns = obj;
+
+ return (ns->id != *(__be16 *)arg->key);
+}
+
+static int ioam6_sc_cmpfn(struct rhashtable_compare_arg *arg, const void *obj)
+{
+ const struct ioam6_schema *sc = obj;
+
+ return (sc->id != *(u32 *)arg->key);
+}
+
+static const struct rhashtable_params rht_ns_params = {
+ .key_len = sizeof(__be16),
+ .key_offset = offsetof(struct ioam6_namespace, id),
+ .head_offset = offsetof(struct ioam6_namespace, head),
+ .automatic_shrinking = true,
+ .obj_cmpfn = ioam6_ns_cmpfn,
+};
+
+static const struct rhashtable_params rht_sc_params = {
+ .key_len = sizeof(u32),
+ .key_offset = offsetof(struct ioam6_schema, id),
+ .head_offset = offsetof(struct ioam6_schema, head),
+ .automatic_shrinking = true,
+ .obj_cmpfn = ioam6_sc_cmpfn,
+};
+
+static struct genl_family ioam6_genl_family;
+
+static const struct nla_policy ioam6_genl_policy_addns[] = {
+ [IOAM6_ATTR_NS_ID] = { .type = NLA_U16 },
+ [IOAM6_ATTR_NS_DATA] = { .type = NLA_U32 },
+ [IOAM6_ATTR_NS_DATA_WIDE] = { .type = NLA_U64 },
+};
+
+static const struct nla_policy ioam6_genl_policy_delns[] = {
+ [IOAM6_ATTR_NS_ID] = { .type = NLA_U16 },
+};
+
+static const struct nla_policy ioam6_genl_policy_addsc[] = {
+ [IOAM6_ATTR_SC_ID] = { .type = NLA_U32 },
+ [IOAM6_ATTR_SC_DATA] = { .type = NLA_BINARY,
+ .len = IOAM6_MAX_SCHEMA_DATA_LEN },
+};
+
+static const struct nla_policy ioam6_genl_policy_delsc[] = {
+ [IOAM6_ATTR_SC_ID] = { .type = NLA_U32 },
+};
+
+static const struct nla_policy ioam6_genl_policy_ns_sc[] = {
+ [IOAM6_ATTR_NS_ID] = { .type = NLA_U16 },
+ [IOAM6_ATTR_SC_ID] = { .type = NLA_U32 },
+ [IOAM6_ATTR_SC_NONE] = { .type = NLA_FLAG },
+};
+
+static int ioam6_genl_addns(struct sk_buff *skb, struct genl_info *info)
+{
+ struct ioam6_pernet_data *nsdata;
+ struct ioam6_namespace *ns;
+ u64 data64;
+ u32 data32;
+ __be16 id;
+ int err;
+
+ if (!info->attrs[IOAM6_ATTR_NS_ID])
+ return -EINVAL;
+
+ id = cpu_to_be16(nla_get_u16(info->attrs[IOAM6_ATTR_NS_ID]));
+ nsdata = ioam6_pernet(genl_info_net(info));
+
+ mutex_lock(&nsdata->lock);
+
+ ns = rhashtable_lookup_fast(&nsdata->namespaces, &id, rht_ns_params);
+ if (ns) {
+ err = -EEXIST;
+ goto out_unlock;
+ }
+
+ ns = kzalloc(sizeof(*ns), GFP_KERNEL);
+ if (!ns) {
+ err = -ENOMEM;
+ goto out_unlock;
+ }
+
+ ns->id = id;
+
+ if (!info->attrs[IOAM6_ATTR_NS_DATA])
+ data32 = IOAM6_U32_UNAVAILABLE;
+ else
+ data32 = nla_get_u32(info->attrs[IOAM6_ATTR_NS_DATA]);
+
+ if (!info->attrs[IOAM6_ATTR_NS_DATA_WIDE])
+ data64 = IOAM6_U64_UNAVAILABLE;
+ else
+ data64 = nla_get_u64(info->attrs[IOAM6_ATTR_NS_DATA_WIDE]);
+
+ ns->data = cpu_to_be32(data32);
+ ns->data_wide = cpu_to_be64(data64);
+
+ err = rhashtable_lookup_insert_fast(&nsdata->namespaces, &ns->head,
+ rht_ns_params);
+ if (err)
+ kfree(ns);
+
+out_unlock:
+ mutex_unlock(&nsdata->lock);
+ return err;
+}
+
+static int ioam6_genl_delns(struct sk_buff *skb, struct genl_info *info)
+{
+ struct ioam6_pernet_data *nsdata;
+ struct ioam6_namespace *ns;
+ struct ioam6_schema *sc;
+ __be16 id;
+ int err;
+
+ if (!info->attrs[IOAM6_ATTR_NS_ID])
+ return -EINVAL;
+
+ id = cpu_to_be16(nla_get_u16(info->attrs[IOAM6_ATTR_NS_ID]));
+ nsdata = ioam6_pernet(genl_info_net(info));
+
+ mutex_lock(&nsdata->lock);
+
+ ns = rhashtable_lookup_fast(&nsdata->namespaces, &id, rht_ns_params);
+ if (!ns) {
+ err = -ENOENT;
+ goto out_unlock;
+ }
+
+ sc = rcu_dereference_protected(ns->schema,
+ lockdep_is_held(&nsdata->lock));
+
+ err = rhashtable_remove_fast(&nsdata->namespaces, &ns->head,
+ rht_ns_params);
+ if (err)
+ goto out_unlock;
+
+ if (sc)
+ rcu_assign_pointer(sc->ns, NULL);
+
+ ioam6_ns_release(ns);
+
+out_unlock:
+ mutex_unlock(&nsdata->lock);
+ return err;
+}
+
+static int __ioam6_genl_dumpns_element(struct ioam6_namespace *ns,
+ u32 portid,
+ u32 seq,
+ u32 flags,
+ struct sk_buff *skb,
+ u8 cmd)
+{
+ struct ioam6_schema *sc;
+ u64 data64;
+ u32 data32;
+ void *hdr;
+
+ hdr = genlmsg_put(skb, portid, seq, &ioam6_genl_family, flags, cmd);
+ if (!hdr)
+ return -ENOMEM;
+
+ data32 = be32_to_cpu(ns->data);
+ data64 = be64_to_cpu(ns->data_wide);
+
+ if (nla_put_u16(skb, IOAM6_ATTR_NS_ID, be16_to_cpu(ns->id)) ||
+ (data32 != IOAM6_U32_UNAVAILABLE &&
+ nla_put_u32(skb, IOAM6_ATTR_NS_DATA, data32)) ||
+ (data64 != IOAM6_U64_UNAVAILABLE &&
+ nla_put_u64_64bit(skb, IOAM6_ATTR_NS_DATA_WIDE,
+ data64, IOAM6_ATTR_PAD)))
+ goto nla_put_failure;
+
+ rcu_read_lock();
+
+ sc = rcu_dereference(ns->schema);
+ if (sc && nla_put_u32(skb, IOAM6_ATTR_SC_ID, sc->id)) {
+ rcu_read_unlock();
+ goto nla_put_failure;
+ }
+
+ rcu_read_unlock();
+
+ genlmsg_end(skb, hdr);
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(skb, hdr);
+ return -EMSGSIZE;
+}
+
+static int ioam6_genl_dumpns_start(struct netlink_callback *cb)
+{
+ struct ioam6_pernet_data *nsdata = ioam6_pernet(sock_net(cb->skb->sk));
+ struct rhashtable_iter *iter = (struct rhashtable_iter *)cb->args[0];
+
+ if (!iter) {
+ iter = kmalloc(sizeof(*iter), GFP_KERNEL);
+ if (!iter)
+ return -ENOMEM;
+
+ cb->args[0] = (long)iter;
+ }
+
+ rhashtable_walk_enter(&nsdata->namespaces, iter);
+
+ return 0;
+}
+
+static int ioam6_genl_dumpns_done(struct netlink_callback *cb)
+{
+ struct rhashtable_iter *iter = (struct rhashtable_iter *)cb->args[0];
+
+ rhashtable_walk_exit(iter);
+ kfree(iter);
+
+ return 0;
+}
+
+static int ioam6_genl_dumpns(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct rhashtable_iter *iter;
+ struct ioam6_namespace *ns;
+ int err;
+
+ iter = (struct rhashtable_iter *)cb->args[0];
+ rhashtable_walk_start(iter);
+
+ for (;;) {
+ ns = rhashtable_walk_next(iter);
+
+ if (IS_ERR(ns)) {
+ if (PTR_ERR(ns) == -EAGAIN)
+ continue;
+ err = PTR_ERR(ns);
+ goto done;
+ } else if (!ns) {
+ break;
+ }
+
+ err = __ioam6_genl_dumpns_element(ns,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI,
+ skb,
+ IOAM6_CMD_DUMP_NAMESPACES);
+ if (err)
+ goto done;
+ }
+
+ err = skb->len;
+
+done:
+ rhashtable_walk_stop(iter);
+ return err;
+}
+
+static int ioam6_genl_addsc(struct sk_buff *skb, struct genl_info *info)
+{
+ struct ioam6_pernet_data *nsdata;
+ int len, len_aligned, err;
+ struct ioam6_schema *sc;
+ u32 id;
+
+ if (!info->attrs[IOAM6_ATTR_SC_ID] || !info->attrs[IOAM6_ATTR_SC_DATA])
+ return -EINVAL;
+
+ id = nla_get_u32(info->attrs[IOAM6_ATTR_SC_ID]);
+ nsdata = ioam6_pernet(genl_info_net(info));
+
+ mutex_lock(&nsdata->lock);
+
+ sc = rhashtable_lookup_fast(&nsdata->schemas, &id, rht_sc_params);
+ if (sc) {
+ err = -EEXIST;
+ goto out_unlock;
+ }
+
+ len = nla_len(info->attrs[IOAM6_ATTR_SC_DATA]);
+ len_aligned = ALIGN(len, 4);
+
+ sc = kzalloc(sizeof(*sc) + len_aligned, GFP_KERNEL);
+ if (!sc) {
+ err = -ENOMEM;
+ goto out_unlock;
+ }
+
+ sc->id = id;
+ sc->len = len_aligned;
+ sc->hdr = cpu_to_be32(sc->id | ((u8)(sc->len / 4) << 24));
+ nla_memcpy(sc->data, info->attrs[IOAM6_ATTR_SC_DATA], len);
+
+ err = rhashtable_lookup_insert_fast(&nsdata->schemas, &sc->head,
+ rht_sc_params);
+ if (err)
+ goto free_sc;
+
+out_unlock:
+ mutex_unlock(&nsdata->lock);
+ return err;
+free_sc:
+ kfree(sc);
+ goto out_unlock;
+}
+
+static int ioam6_genl_delsc(struct sk_buff *skb, struct genl_info *info)
+{
+ struct ioam6_pernet_data *nsdata;
+ struct ioam6_namespace *ns;
+ struct ioam6_schema *sc;
+ int err;
+ u32 id;
+
+ if (!info->attrs[IOAM6_ATTR_SC_ID])
+ return -EINVAL;
+
+ id = nla_get_u32(info->attrs[IOAM6_ATTR_SC_ID]);
+ nsdata = ioam6_pernet(genl_info_net(info));
+
+ mutex_lock(&nsdata->lock);
+
+ sc = rhashtable_lookup_fast(&nsdata->schemas, &id, rht_sc_params);
+ if (!sc) {
+ err = -ENOENT;
+ goto out_unlock;
+ }
+
+ ns = rcu_dereference_protected(sc->ns, lockdep_is_held(&nsdata->lock));
+
+ err = rhashtable_remove_fast(&nsdata->schemas, &sc->head,
+ rht_sc_params);
+ if (err)
+ goto out_unlock;
+
+ if (ns)
+ rcu_assign_pointer(ns->schema, NULL);
+
+ ioam6_sc_release(sc);
+
+out_unlock:
+ mutex_unlock(&nsdata->lock);
+ return err;
+}
+
+static int __ioam6_genl_dumpsc_element(struct ioam6_schema *sc,
+ u32 portid, u32 seq, u32 flags,
+ struct sk_buff *skb, u8 cmd)
+{
+ struct ioam6_namespace *ns;
+ void *hdr;
+
+ hdr = genlmsg_put(skb, portid, seq, &ioam6_genl_family, flags, cmd);
+ if (!hdr)
+ return -ENOMEM;
+
+ if (nla_put_u32(skb, IOAM6_ATTR_SC_ID, sc->id) ||
+ nla_put(skb, IOAM6_ATTR_SC_DATA, sc->len, sc->data))
+ goto nla_put_failure;
+
+ rcu_read_lock();
+
+ ns = rcu_dereference(sc->ns);
+ if (ns && nla_put_u16(skb, IOAM6_ATTR_NS_ID, be16_to_cpu(ns->id))) {
+ rcu_read_unlock();
+ goto nla_put_failure;
+ }
+
+ rcu_read_unlock();
+
+ genlmsg_end(skb, hdr);
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(skb, hdr);
+ return -EMSGSIZE;
+}
+
+static int ioam6_genl_dumpsc_start(struct netlink_callback *cb)
+{
+ struct ioam6_pernet_data *nsdata = ioam6_pernet(sock_net(cb->skb->sk));
+ struct rhashtable_iter *iter = (struct rhashtable_iter *)cb->args[0];
+
+ if (!iter) {
+ iter = kmalloc(sizeof(*iter), GFP_KERNEL);
+ if (!iter)
+ return -ENOMEM;
+
+ cb->args[0] = (long)iter;
+ }
+
+ rhashtable_walk_enter(&nsdata->schemas, iter);
+
+ return 0;
+}
+
+static int ioam6_genl_dumpsc_done(struct netlink_callback *cb)
+{
+ struct rhashtable_iter *iter = (struct rhashtable_iter *)cb->args[0];
+
+ rhashtable_walk_exit(iter);
+ kfree(iter);
+
+ return 0;
+}
+
+static int ioam6_genl_dumpsc(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct rhashtable_iter *iter;
+ struct ioam6_schema *sc;
+ int err;
+
+ iter = (struct rhashtable_iter *)cb->args[0];
+ rhashtable_walk_start(iter);
+
+ for (;;) {
+ sc = rhashtable_walk_next(iter);
+
+ if (IS_ERR(sc)) {
+ if (PTR_ERR(sc) == -EAGAIN)
+ continue;
+ err = PTR_ERR(sc);
+ goto done;
+ } else if (!sc) {
+ break;
+ }
+
+ err = __ioam6_genl_dumpsc_element(sc,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI,
+ skb,
+ IOAM6_CMD_DUMP_SCHEMAS);
+ if (err)
+ goto done;
+ }
+
+ err = skb->len;
+
+done:
+ rhashtable_walk_stop(iter);
+ return err;
+}
+
+static int ioam6_genl_ns_set_schema(struct sk_buff *skb, struct genl_info *info)
+{
+ struct ioam6_namespace *ns, *ns_ref;
+ struct ioam6_schema *sc, *sc_ref;
+ struct ioam6_pernet_data *nsdata;
+ __be16 ns_id;
+ u32 sc_id;
+ int err;
+
+ if (!info->attrs[IOAM6_ATTR_NS_ID] ||
+ (!info->attrs[IOAM6_ATTR_SC_ID] &&
+ !info->attrs[IOAM6_ATTR_SC_NONE]))
+ return -EINVAL;
+
+ ns_id = cpu_to_be16(nla_get_u16(info->attrs[IOAM6_ATTR_NS_ID]));
+ nsdata = ioam6_pernet(genl_info_net(info));
+
+ mutex_lock(&nsdata->lock);
+
+ ns = rhashtable_lookup_fast(&nsdata->namespaces, &ns_id, rht_ns_params);
+ if (!ns) {
+ err = -ENOENT;
+ goto out_unlock;
+ }
+
+ if (info->attrs[IOAM6_ATTR_SC_NONE]) {
+ sc = NULL;
+ } else {
+ sc_id = nla_get_u32(info->attrs[IOAM6_ATTR_SC_ID]);
+ sc = rhashtable_lookup_fast(&nsdata->schemas, &sc_id,
+ rht_sc_params);
+ if (!sc) {
+ err = -ENOENT;
+ goto out_unlock;
+ }
+ }
+
+ sc_ref = rcu_dereference_protected(ns->schema,
+ lockdep_is_held(&nsdata->lock));
+ if (sc_ref)
+ rcu_assign_pointer(sc_ref->ns, NULL);
+ rcu_assign_pointer(ns->schema, sc);
+
+ if (sc) {
+ ns_ref = rcu_dereference_protected(sc->ns,
+ lockdep_is_held(&nsdata->lock));
+ if (ns_ref)
+ rcu_assign_pointer(ns_ref->schema, NULL);
+ rcu_assign_pointer(sc->ns, ns);
+ }
+
+ err = 0;
+
+out_unlock:
+ mutex_unlock(&nsdata->lock);
+ return err;
+}
+
+static const struct genl_ops ioam6_genl_ops[] = {
+ {
+ .cmd = IOAM6_CMD_ADD_NAMESPACE,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = ioam6_genl_addns,
+ .flags = GENL_ADMIN_PERM,
+ .policy = ioam6_genl_policy_addns,
+ .maxattr = ARRAY_SIZE(ioam6_genl_policy_addns) - 1,
+ },
+ {
+ .cmd = IOAM6_CMD_DEL_NAMESPACE,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = ioam6_genl_delns,
+ .flags = GENL_ADMIN_PERM,
+ .policy = ioam6_genl_policy_delns,
+ .maxattr = ARRAY_SIZE(ioam6_genl_policy_delns) - 1,
+ },
+ {
+ .cmd = IOAM6_CMD_DUMP_NAMESPACES,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .start = ioam6_genl_dumpns_start,
+ .dumpit = ioam6_genl_dumpns,
+ .done = ioam6_genl_dumpns_done,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = IOAM6_CMD_ADD_SCHEMA,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = ioam6_genl_addsc,
+ .flags = GENL_ADMIN_PERM,
+ .policy = ioam6_genl_policy_addsc,
+ .maxattr = ARRAY_SIZE(ioam6_genl_policy_addsc) - 1,
+ },
+ {
+ .cmd = IOAM6_CMD_DEL_SCHEMA,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = ioam6_genl_delsc,
+ .flags = GENL_ADMIN_PERM,
+ .policy = ioam6_genl_policy_delsc,
+ .maxattr = ARRAY_SIZE(ioam6_genl_policy_delsc) - 1,
+ },
+ {
+ .cmd = IOAM6_CMD_DUMP_SCHEMAS,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .start = ioam6_genl_dumpsc_start,
+ .dumpit = ioam6_genl_dumpsc,
+ .done = ioam6_genl_dumpsc_done,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = IOAM6_CMD_NS_SET_SCHEMA,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = ioam6_genl_ns_set_schema,
+ .flags = GENL_ADMIN_PERM,
+ .policy = ioam6_genl_policy_ns_sc,
+ .maxattr = ARRAY_SIZE(ioam6_genl_policy_ns_sc) - 1,
+ },
+};
+
+static struct genl_family ioam6_genl_family __ro_after_init = {
+ .name = IOAM6_GENL_NAME,
+ .version = IOAM6_GENL_VERSION,
+ .netnsok = true,
+ .parallel_ops = true,
+ .ops = ioam6_genl_ops,
+ .n_ops = ARRAY_SIZE(ioam6_genl_ops),
+ .module = THIS_MODULE,
+};
+
+struct ioam6_namespace *ioam6_namespace(struct net *net, __be16 id)
+{
+ struct ioam6_pernet_data *nsdata = ioam6_pernet(net);
+
+ return rhashtable_lookup_fast(&nsdata->namespaces, &id, rht_ns_params);
+}
+
+static void __ioam6_fill_trace_data(struct sk_buff *skb,
+ struct ioam6_namespace *ns,
+ struct ioam6_trace_hdr *trace,
+ struct ioam6_schema *sc,
+ u8 sclen)
+{
+ struct __kernel_sock_timeval ts;
+ u64 raw64;
+ u32 raw32;
+ u16 raw16;
+ u8 *data;
+ u8 byte;
+
+ data = trace->data + trace->remlen * 4 - trace->nodelen * 4 - sclen * 4;
+
+ /* hop_lim and node_id */
+ if (trace->type.bit0) {
+ byte = ipv6_hdr(skb)->hop_limit;
+ if (skb->dev)
+ byte--;
+
+ raw32 = dev_net(skb_dst(skb)->dev)->ipv6.sysctl.ioam6_id;
+
+ *(__be32 *)data = cpu_to_be32((byte << 24) | raw32);
+ data += sizeof(__be32);
+ }
+
+ /* ingress_if_id and egress_if_id */
+ if (trace->type.bit1) {
+ if (!skb->dev)
+ raw16 = IOAM6_U16_UNAVAILABLE;
+ else
+ raw16 = (__force u16)__in6_dev_get(skb->dev)->cnf.ioam6_id;
+
+ *(__be16 *)data = cpu_to_be16(raw16);
+ data += sizeof(__be16);
+
+ if (skb_dst(skb)->dev->flags & IFF_LOOPBACK)
+ raw16 = IOAM6_U16_UNAVAILABLE;
+ else
+ raw16 = (__force u16)__in6_dev_get(skb_dst(skb)->dev)->cnf.ioam6_id;
+
+ *(__be16 *)data = cpu_to_be16(raw16);
+ data += sizeof(__be16);
+ }
+
+ /* timestamp seconds */
+ if (trace->type.bit2) {
+ if (!skb->dev) {
+ *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE);
+ } else {
+ if (!skb->tstamp)
+ __net_timestamp(skb);
+
+ skb_get_new_timestamp(skb, &ts);
+ *(__be32 *)data = cpu_to_be32((u32)ts.tv_sec);
+ }
+ data += sizeof(__be32);
+ }
+
+ /* timestamp subseconds */
+ if (trace->type.bit3) {
+ if (!skb->dev) {
+ *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE);
+ } else {
+ if (!skb->tstamp)
+ __net_timestamp(skb);
+
+ if (!trace->type.bit2)
+ skb_get_new_timestamp(skb, &ts);
+
+ *(__be32 *)data = cpu_to_be32((u32)ts.tv_usec);
+ }
+ data += sizeof(__be32);
+ }
+
+ /* transit delay */
+ if (trace->type.bit4) {
+ *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE);
+ data += sizeof(__be32);
+ }
+
+ /* namespace data */
+ if (trace->type.bit5) {
+ *(__be32 *)data = ns->data;
+ data += sizeof(__be32);
+ }
+
+ /* queue depth */
+ if (trace->type.bit6) {
+ *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE);
+ data += sizeof(__be32);
+ }
+
+ /* checksum complement */
+ if (trace->type.bit7) {
+ *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE);
+ data += sizeof(__be32);
+ }
+
+ /* hop_lim and node_id (wide) */
+ if (trace->type.bit8) {
+ byte = ipv6_hdr(skb)->hop_limit;
+ if (skb->dev)
+ byte--;
+
+ raw64 = dev_net(skb_dst(skb)->dev)->ipv6.sysctl.ioam6_id_wide;
+
+ *(__be64 *)data = cpu_to_be64(((u64)byte << 56) | raw64);
+ data += sizeof(__be64);
+ }
+
+ /* ingress_if_id and egress_if_id (wide) */
+ if (trace->type.bit9) {
+ if (!skb->dev)
+ raw32 = IOAM6_U32_UNAVAILABLE;
+ else
+ raw32 = __in6_dev_get(skb->dev)->cnf.ioam6_id_wide;
+
+ *(__be32 *)data = cpu_to_be32(raw32);
+ data += sizeof(__be32);
+
+ if (skb_dst(skb)->dev->flags & IFF_LOOPBACK)
+ raw32 = IOAM6_U32_UNAVAILABLE;
+ else
+ raw32 = __in6_dev_get(skb_dst(skb)->dev)->cnf.ioam6_id_wide;
+
+ *(__be32 *)data = cpu_to_be32(raw32);
+ data += sizeof(__be32);
+ }
+
+ /* namespace data (wide) */
+ if (trace->type.bit10) {
+ *(__be64 *)data = ns->data_wide;
+ data += sizeof(__be64);
+ }
+
+ /* buffer occupancy */
+ if (trace->type.bit11) {
+ *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE);
+ data += sizeof(__be32);
+ }
+
+ /* opaque state snapshot */
+ if (trace->type.bit22) {
+ if (!sc) {
+ *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE >> 8);
+ } else {
+ *(__be32 *)data = sc->hdr;
+ data += sizeof(__be32);
+
+ memcpy(data, sc->data, sc->len);
+ }
+ }
+}
+
+/* called with rcu_read_lock() */
+void ioam6_fill_trace_data(struct sk_buff *skb,
+ struct ioam6_namespace *ns,
+ struct ioam6_trace_hdr *trace)
+{
+ struct ioam6_schema *sc;
+ u8 sclen = 0;
+
+ /* Skip if Overflow flag is set OR
+ * if an unknown type (bit 12-21) is set
+ */
+ if (trace->overflow ||
+ trace->type.bit12 | trace->type.bit13 | trace->type.bit14 |
+ trace->type.bit15 | trace->type.bit16 | trace->type.bit17 |
+ trace->type.bit18 | trace->type.bit19 | trace->type.bit20 |
+ trace->type.bit21) {
+ return;
+ }
+
+ /* NodeLen does not include Opaque State Snapshot length. We need to
+ * take it into account if the corresponding bit is set (bit 22) and
+ * if the current IOAM namespace has an active schema attached to it
+ */
+ sc = rcu_dereference(ns->schema);
+ if (trace->type.bit22) {
+ sclen = sizeof_field(struct ioam6_schema, hdr) / 4;
+
+ if (sc)
+ sclen += sc->len / 4;
+ }
+
+ /* If there is no space remaining, we set the Overflow flag and we
+ * skip without filling the trace
+ */
+ if (!trace->remlen || trace->remlen < trace->nodelen + sclen) {
+ trace->overflow = 1;
+ return;
+ }
+
+ __ioam6_fill_trace_data(skb, ns, trace, sc, sclen);
+ trace->remlen -= trace->nodelen + sclen;
+}
+
+static int __net_init ioam6_net_init(struct net *net)
+{
+ struct ioam6_pernet_data *nsdata;
+ int err = -ENOMEM;
+
+ nsdata = kzalloc(sizeof(*nsdata), GFP_KERNEL);
+ if (!nsdata)
+ goto out;
+
+ mutex_init(&nsdata->lock);
+ net->ipv6.ioam6_data = nsdata;
+
+ err = rhashtable_init(&nsdata->namespaces, &rht_ns_params);
+ if (err)
+ goto free_nsdata;
+
+ err = rhashtable_init(&nsdata->schemas, &rht_sc_params);
+ if (err)
+ goto free_rht_ns;
+
+out:
+ return err;
+free_rht_ns:
+ rhashtable_destroy(&nsdata->namespaces);
+free_nsdata:
+ kfree(nsdata);
+ net->ipv6.ioam6_data = NULL;
+ goto out;
+}
+
+static void __net_exit ioam6_net_exit(struct net *net)
+{
+ struct ioam6_pernet_data *nsdata = ioam6_pernet(net);
+
+ rhashtable_free_and_destroy(&nsdata->namespaces, ioam6_free_ns, NULL);
+ rhashtable_free_and_destroy(&nsdata->schemas, ioam6_free_sc, NULL);
+
+ kfree(nsdata);
+}
+
+static struct pernet_operations ioam6_net_ops = {
+ .init = ioam6_net_init,
+ .exit = ioam6_net_exit,
+};
+
+int __init ioam6_init(void)
+{
+ int err = register_pernet_subsys(&ioam6_net_ops);
+ if (err)
+ goto out;
+
+ err = genl_register_family(&ioam6_genl_family);
+ if (err)
+ goto out_unregister_pernet_subsys;
+
+#ifdef CONFIG_IPV6_IOAM6_LWTUNNEL
+ err = ioam6_iptunnel_init();
+ if (err)
+ goto out_unregister_genl;
+#endif
+
+ pr_info("In-situ OAM (IOAM) with IPv6\n");
+
+out:
+ return err;
+#ifdef CONFIG_IPV6_IOAM6_LWTUNNEL
+out_unregister_genl:
+ genl_unregister_family(&ioam6_genl_family);
+#endif
+out_unregister_pernet_subsys:
+ unregister_pernet_subsys(&ioam6_net_ops);
+ goto out;
+}
+
+void ioam6_exit(void)
+{
+#ifdef CONFIG_IPV6_IOAM6_LWTUNNEL
+ ioam6_iptunnel_exit();
+#endif
+ genl_unregister_family(&ioam6_genl_family);
+ unregister_pernet_subsys(&ioam6_net_ops);
+}
diff --git a/net/ipv6/ioam6_iptunnel.c b/net/ipv6/ioam6_iptunnel.c
new file mode 100644
index 000000000000..f9ee04541c17
--- /dev/null
+++ b/net/ipv6/ioam6_iptunnel.c
@@ -0,0 +1,274 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * IPv6 IOAM Lightweight Tunnel implementation
+ *
+ * Author:
+ * Justin Iurman <justin.iurman@uliege.be>
+ */
+
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/net.h>
+#include <linux/netlink.h>
+#include <linux/in6.h>
+#include <linux/ioam6.h>
+#include <linux/ioam6_iptunnel.h>
+#include <net/dst.h>
+#include <net/sock.h>
+#include <net/lwtunnel.h>
+#include <net/ioam6.h>
+
+#define IOAM6_MASK_SHORT_FIELDS 0xff100000
+#define IOAM6_MASK_WIDE_FIELDS 0xe00000
+
+struct ioam6_lwt_encap {
+ struct ipv6_hopopt_hdr eh;
+ u8 pad[2]; /* 2-octet padding for 4n-alignment */
+ struct ioam6_hdr ioamh;
+ struct ioam6_trace_hdr traceh;
+} __packed;
+
+struct ioam6_lwt {
+ struct ioam6_lwt_encap tuninfo;
+};
+
+static struct ioam6_lwt *ioam6_lwt_state(struct lwtunnel_state *lwt)
+{
+ return (struct ioam6_lwt *)lwt->data;
+}
+
+static struct ioam6_lwt_encap *ioam6_lwt_info(struct lwtunnel_state *lwt)
+{
+ return &ioam6_lwt_state(lwt)->tuninfo;
+}
+
+static struct ioam6_trace_hdr *ioam6_trace(struct lwtunnel_state *lwt)
+{
+ return &(ioam6_lwt_state(lwt)->tuninfo.traceh);
+}
+
+static const struct nla_policy ioam6_iptunnel_policy[IOAM6_IPTUNNEL_MAX + 1] = {
+ [IOAM6_IPTUNNEL_TRACE] = NLA_POLICY_EXACT_LEN(sizeof(struct ioam6_trace_hdr)),
+};
+
+static int nla_put_ioam6_trace(struct sk_buff *skb, int attrtype,
+ struct ioam6_trace_hdr *trace)
+{
+ struct ioam6_trace_hdr *data;
+ struct nlattr *nla;
+ int len;
+
+ len = sizeof(*trace);
+
+ nla = nla_reserve(skb, attrtype, len);
+ if (!nla)
+ return -EMSGSIZE;
+
+ data = nla_data(nla);
+ memcpy(data, trace, len);
+
+ return 0;
+}
+
+static bool ioam6_validate_trace_hdr(struct ioam6_trace_hdr *trace)
+{
+ u32 fields;
+
+ if (!trace->type_be32 || !trace->remlen ||
+ trace->remlen > IOAM6_TRACE_DATA_SIZE_MAX / 4)
+ return false;
+
+ trace->nodelen = 0;
+ fields = be32_to_cpu(trace->type_be32);
+
+ trace->nodelen += hweight32(fields & IOAM6_MASK_SHORT_FIELDS)
+ * (sizeof(__be32) / 4);
+ trace->nodelen += hweight32(fields & IOAM6_MASK_WIDE_FIELDS)
+ * (sizeof(__be64) / 4);
+
+ return true;
+}
+
+static int ioam6_build_state(struct net *net, struct nlattr *nla,
+ unsigned int family, const void *cfg,
+ struct lwtunnel_state **ts,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[IOAM6_IPTUNNEL_MAX + 1];
+ struct ioam6_lwt_encap *tuninfo;
+ struct ioam6_trace_hdr *trace;
+ struct lwtunnel_state *s;
+ int len_aligned;
+ int len, err;
+
+ if (family != AF_INET6)
+ return -EINVAL;
+
+ err = nla_parse_nested(tb, IOAM6_IPTUNNEL_MAX, nla,
+ ioam6_iptunnel_policy, extack);
+ if (err < 0)
+ return err;
+
+ if (!tb[IOAM6_IPTUNNEL_TRACE]) {
+ NL_SET_ERR_MSG(extack, "missing trace");
+ return -EINVAL;
+ }
+
+ trace = nla_data(tb[IOAM6_IPTUNNEL_TRACE]);
+ if (!ioam6_validate_trace_hdr(trace)) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[IOAM6_IPTUNNEL_TRACE],
+ "invalid trace validation");
+ return -EINVAL;
+ }
+
+ len = sizeof(*tuninfo) + trace->remlen * 4;
+ len_aligned = ALIGN(len, 8);
+
+ s = lwtunnel_state_alloc(len_aligned);
+ if (!s)
+ return -ENOMEM;
+
+ tuninfo = ioam6_lwt_info(s);
+ tuninfo->eh.hdrlen = (len_aligned >> 3) - 1;
+ tuninfo->pad[0] = IPV6_TLV_PADN;
+ tuninfo->ioamh.type = IOAM6_TYPE_PREALLOC;
+ tuninfo->ioamh.opt_type = IPV6_TLV_IOAM;
+ tuninfo->ioamh.opt_len = sizeof(tuninfo->ioamh) - 2 + sizeof(*trace)
+ + trace->remlen * 4;
+
+ memcpy(&tuninfo->traceh, trace, sizeof(*trace));
+
+ len = len_aligned - len;
+ if (len == 1) {
+ tuninfo->traceh.data[trace->remlen * 4] = IPV6_TLV_PAD1;
+ } else if (len > 0) {
+ tuninfo->traceh.data[trace->remlen * 4] = IPV6_TLV_PADN;
+ tuninfo->traceh.data[trace->remlen * 4 + 1] = len - 2;
+ }
+
+ s->type = LWTUNNEL_ENCAP_IOAM6;
+ s->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT;
+
+ *ts = s;
+
+ return 0;
+}
+
+static int ioam6_do_inline(struct sk_buff *skb, struct ioam6_lwt_encap *tuninfo)
+{
+ struct ioam6_trace_hdr *trace;
+ struct ipv6hdr *oldhdr, *hdr;
+ struct ioam6_namespace *ns;
+ int hdrlen, err;
+
+ hdrlen = (tuninfo->eh.hdrlen + 1) << 3;
+
+ err = skb_cow_head(skb, hdrlen + skb->mac_len);
+ if (unlikely(err))
+ return err;
+
+ oldhdr = ipv6_hdr(skb);
+ skb_pull(skb, sizeof(*oldhdr));
+ skb_postpull_rcsum(skb, skb_network_header(skb), sizeof(*oldhdr));
+
+ skb_push(skb, sizeof(*oldhdr) + hdrlen);
+ skb_reset_network_header(skb);
+ skb_mac_header_rebuild(skb);
+
+ hdr = ipv6_hdr(skb);
+ memmove(hdr, oldhdr, sizeof(*oldhdr));
+ tuninfo->eh.nexthdr = hdr->nexthdr;
+
+ skb_set_transport_header(skb, sizeof(*hdr));
+ skb_postpush_rcsum(skb, hdr, sizeof(*hdr) + hdrlen);
+
+ memcpy(skb_transport_header(skb), (u8 *)tuninfo, hdrlen);
+
+ hdr->nexthdr = NEXTHDR_HOP;
+ hdr->payload_len = cpu_to_be16(skb->len - sizeof(*hdr));
+
+ trace = (struct ioam6_trace_hdr *)(skb_transport_header(skb)
+ + sizeof(struct ipv6_hopopt_hdr) + 2
+ + sizeof(struct ioam6_hdr));
+
+ ns = ioam6_namespace(dev_net(skb_dst(skb)->dev), trace->namespace_id);
+ if (ns)
+ ioam6_fill_trace_data(skb, ns, trace);
+
+ return 0;
+}
+
+static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
+{
+ struct lwtunnel_state *lwt = skb_dst(skb)->lwtstate;
+ int err = -EINVAL;
+
+ if (skb->protocol != htons(ETH_P_IPV6))
+ goto drop;
+
+ /* Only for packets we send and
+ * that do not contain a Hop-by-Hop yet
+ */
+ if (skb->dev || ipv6_hdr(skb)->nexthdr == NEXTHDR_HOP)
+ goto out;
+
+ err = ioam6_do_inline(skb, ioam6_lwt_info(lwt));
+ if (unlikely(err))
+ goto drop;
+
+ err = skb_cow_head(skb, LL_RESERVED_SPACE(skb_dst(skb)->dev));
+ if (unlikely(err))
+ goto drop;
+
+out:
+ return lwt->orig_output(net, sk, skb);
+
+drop:
+ kfree_skb(skb);
+ return err;
+}
+
+static int ioam6_fill_encap_info(struct sk_buff *skb,
+ struct lwtunnel_state *lwtstate)
+{
+ struct ioam6_trace_hdr *trace = ioam6_trace(lwtstate);
+
+ if (nla_put_ioam6_trace(skb, IOAM6_IPTUNNEL_TRACE, trace))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+static int ioam6_encap_nlsize(struct lwtunnel_state *lwtstate)
+{
+ struct ioam6_trace_hdr *trace = ioam6_trace(lwtstate);
+
+ return nla_total_size(sizeof(*trace));
+}
+
+static int ioam6_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
+{
+ struct ioam6_trace_hdr *a_hdr = ioam6_trace(a);
+ struct ioam6_trace_hdr *b_hdr = ioam6_trace(b);
+
+ return (a_hdr->namespace_id != b_hdr->namespace_id);
+}
+
+static const struct lwtunnel_encap_ops ioam6_iptun_ops = {
+ .build_state = ioam6_build_state,
+ .output = ioam6_output,
+ .fill_encap = ioam6_fill_encap_info,
+ .get_encap_size = ioam6_encap_nlsize,
+ .cmp_encap = ioam6_encap_cmp,
+ .owner = THIS_MODULE,
+};
+
+int __init ioam6_iptunnel_init(void)
+{
+ return lwtunnel_encap_add_ops(&ioam6_iptun_ops, LWTUNNEL_ENCAP_IOAM6);
+}
+
+void ioam6_iptunnel_exit(void)
+{
+ lwtunnel_encap_del_ops(&ioam6_iptun_ops, LWTUNNEL_ENCAP_IOAM6);
+}
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 2d650dc24349..a8f118e469b7 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -2449,8 +2449,8 @@ int __init fib6_init(void)
int ret = -ENOMEM;
fib6_node_kmem = kmem_cache_create("fib6_nodes",
- sizeof(struct fib6_node),
- 0, SLAB_HWCACHE_ALIGN,
+ sizeof(struct fib6_node), 0,
+ SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT,
NULL);
if (!fib6_node_kmem)
goto out;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index e1b9f7ac8bad..b7b27d94ef61 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -607,7 +607,7 @@ int ip6_forward(struct sk_buff *skb)
}
}
- mtu = ip6_dst_mtu_forward(dst);
+ mtu = ip6_dst_mtu_maybe_forward(dst, true);
if (mtu < IPV6_MIN_MTU)
mtu = IPV6_MIN_MTU;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index b6ddf23d3833..6b8051106aba 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -3201,25 +3201,7 @@ static unsigned int ip6_default_advmss(const struct dst_entry *dst)
INDIRECT_CALLABLE_SCOPE unsigned int ip6_mtu(const struct dst_entry *dst)
{
- struct inet6_dev *idev;
- unsigned int mtu;
-
- mtu = dst_metric_raw(dst, RTAX_MTU);
- if (mtu)
- goto out;
-
- mtu = IPV6_MIN_MTU;
-
- rcu_read_lock();
- idev = __in6_dev_get(dst->dev);
- if (idev)
- mtu = idev->cnf.mtu6;
- rcu_read_unlock();
-
-out:
- mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
-
- return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
+ return ip6_dst_mtu_maybe_forward(dst, false);
}
EXPORT_INDIRECT_CALLABLE(ip6_mtu);
@@ -6638,7 +6620,7 @@ int __init ip6_route_init(void)
ret = -ENOMEM;
ip6_dst_ops_template.kmem_cachep =
kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
- SLAB_HWCACHE_ALIGN, NULL);
+ SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL);
if (!ip6_dst_ops_template.kmem_cachep)
goto out;
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index df5bea818410..33adc12b697d 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -321,7 +321,7 @@ static int ipip6_tunnel_get_prl(struct net_device *dev, struct ifreq *ifr)
* we try harder to allocate.
*/
kp = (cmax <= 1 || capable(CAP_NET_ADMIN)) ?
- kcalloc(cmax, sizeof(*kp), GFP_KERNEL | __GFP_NOWARN) :
+ kcalloc(cmax, sizeof(*kp), GFP_KERNEL_ACCOUNT | __GFP_NOWARN) :
NULL;
rcu_read_lock();
@@ -334,7 +334,8 @@ static int ipip6_tunnel_get_prl(struct net_device *dev, struct ifreq *ifr)
* For root users, retry allocating enough memory for
* the answer.
*/
- kp = kcalloc(ca, sizeof(*kp), GFP_ATOMIC);
+ kp = kcalloc(ca, sizeof(*kp), GFP_ATOMIC | __GFP_ACCOUNT |
+ __GFP_NOWARN);
if (!kp) {
ret = -ENOMEM;
goto out;
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index d7cf26f730d7..d53dd142bf87 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -21,6 +21,7 @@
#ifdef CONFIG_NETLABEL
#include <net/calipso.h>
#endif
+#include <linux/ioam6.h>
static int two = 2;
static int three = 3;
@@ -28,6 +29,8 @@ static int flowlabel_reflect_max = 0x7;
static int auto_flowlabels_max = IP6_AUTO_FLOW_LABEL_MAX;
static u32 rt6_multipath_hash_fields_all_mask =
FIB_MULTIPATH_HASH_FIELD_ALL_MASK;
+static u32 ioam6_id_max = IOAM6_DEFAULT_ID;
+static u64 ioam6_id_wide_max = IOAM6_DEFAULT_ID_WIDE;
static int proc_rt6_multipath_hash_policy(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
@@ -196,6 +199,22 @@ static struct ctl_table ipv6_table_template[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = &two,
},
+ {
+ .procname = "ioam6_id",
+ .data = &init_net.ipv6.sysctl.ioam6_id,
+ .maxlen = sizeof(u32),
+ .mode = 0644,
+ .proc_handler = proc_douintvec_minmax,
+ .extra2 = &ioam6_id_max,
+ },
+ {
+ .procname = "ioam6_id_wide",
+ .data = &init_net.ipv6.sysctl.ioam6_id_wide,
+ .maxlen = sizeof(u64),
+ .mode = 0644,
+ .proc_handler = proc_doulongvec_minmax,
+ .extra2 = &ioam6_id_wide_max,
+ },
{ }
};
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 1e50908b1b7e..8fe024a0ae46 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -99,7 +99,7 @@ static int flow_offload_fill_route(struct flow_offload *flow,
flow_tuple->mtu = ip_dst_mtu_maybe_forward(dst, true);
break;
case NFPROTO_IPV6:
- flow_tuple->mtu = ip6_dst_mtu_forward(dst);
+ flow_tuple->mtu = ip6_dst_mtu_maybe_forward(dst, true);
break;
}
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 380f95aacdec..24b7cf447bc5 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2545,13 +2545,15 @@ int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 portid,
/* errors reported via destination sk->sk_err, but propagate
* delivery errors if NETLINK_BROADCAST_ERROR flag is set */
err = nlmsg_multicast(sk, skb, exclude_portid, group, flags);
+ if (err == -ESRCH)
+ err = 0;
}
if (report) {
int err2;
err2 = nlmsg_unicast(sk, skb, portid);
- if (!err || err == -ESRCH)
+ if (!err)
err = err2;
}
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index ef15d9eb4774..f79679746c62 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -924,7 +924,11 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
break;
case OVS_USERSPACE_ATTR_PID:
- upcall.portid = nla_get_u32(a);
+ if (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU)
+ upcall.portid =
+ ovs_dp_get_upcall_portid(dp, smp_processor_id());
+ else
+ upcall.portid = nla_get_u32(a);
break;
case OVS_USERSPACE_ATTR_EGRESS_TUN_PORT: {
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index bc164b35e67d..7a4edafdc685 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -133,6 +133,8 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *,
static void ovs_dp_masks_rebalance(struct work_struct *work);
+static int ovs_dp_set_upcall_portids(struct datapath *, const struct nlattr *);
+
/* Must be called with rcu_read_lock or ovs_mutex. */
const char *ovs_dp_name(const struct datapath *dp)
{
@@ -166,6 +168,7 @@ static void destroy_dp_rcu(struct rcu_head *rcu)
free_percpu(dp->stats_percpu);
kfree(dp->ports);
ovs_meters_exit(dp);
+ kfree(dp->upcall_portids);
kfree(dp);
}
@@ -239,7 +242,12 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
memset(&upcall, 0, sizeof(upcall));
upcall.cmd = OVS_PACKET_CMD_MISS;
- upcall.portid = ovs_vport_find_upcall_portid(p, skb);
+
+ if (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU)
+ upcall.portid = ovs_dp_get_upcall_portid(dp, smp_processor_id());
+ else
+ upcall.portid = ovs_vport_find_upcall_portid(p, skb);
+
upcall.mru = OVS_CB(skb)->mru;
error = ovs_dp_upcall(dp, skb, key, &upcall, 0);
if (unlikely(error))
@@ -1594,16 +1602,67 @@ static void ovs_dp_reset_user_features(struct sk_buff *skb,
DEFINE_STATIC_KEY_FALSE(tc_recirc_sharing_support);
+static int ovs_dp_set_upcall_portids(struct datapath *dp,
+ const struct nlattr *ids)
+{
+ struct dp_nlsk_pids *old, *dp_nlsk_pids;
+
+ if (!nla_len(ids) || nla_len(ids) % sizeof(u32))
+ return -EINVAL;
+
+ old = ovsl_dereference(dp->upcall_portids);
+
+ dp_nlsk_pids = kmalloc(sizeof(*dp_nlsk_pids) + nla_len(ids),
+ GFP_KERNEL);
+ if (!dp_nlsk_pids)
+ return -ENOMEM;
+
+ dp_nlsk_pids->n_pids = nla_len(ids) / sizeof(u32);
+ nla_memcpy(dp_nlsk_pids->pids, ids, nla_len(ids));
+
+ rcu_assign_pointer(dp->upcall_portids, dp_nlsk_pids);
+
+ kfree_rcu(old, rcu);
+
+ return 0;
+}
+
+u32 ovs_dp_get_upcall_portid(const struct datapath *dp, uint32_t cpu_id)
+{
+ struct dp_nlsk_pids *dp_nlsk_pids;
+
+ dp_nlsk_pids = rcu_dereference(dp->upcall_portids);
+
+ if (dp_nlsk_pids) {
+ if (cpu_id < dp_nlsk_pids->n_pids) {
+ return dp_nlsk_pids->pids[cpu_id];
+ } else if (dp_nlsk_pids->n_pids > 0 && cpu_id >= dp_nlsk_pids->n_pids) {
+ /* If the number of netlink PIDs is mismatched with the number of
+ * CPUs as seen by the kernel, log this and send the upcall to an
+ * arbitrary socket (0) in order to not drop packets
+ */
+ pr_info_ratelimited("cpu_id mismatch with handler threads");
+ return dp_nlsk_pids->pids[cpu_id % dp_nlsk_pids->n_pids];
+ } else {
+ return 0;
+ }
+ } else {
+ return 0;
+ }
+}
+
static int ovs_dp_change(struct datapath *dp, struct nlattr *a[])
{
u32 user_features = 0;
+ int err;
if (a[OVS_DP_ATTR_USER_FEATURES]) {
user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]);
if (user_features & ~(OVS_DP_F_VPORT_PIDS |
OVS_DP_F_UNALIGNED |
- OVS_DP_F_TC_RECIRC_SHARING))
+ OVS_DP_F_TC_RECIRC_SHARING |
+ OVS_DP_F_DISPATCH_UPCALL_PER_CPU))
return -EOPNOTSUPP;
#if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
@@ -1624,6 +1683,15 @@ static int ovs_dp_change(struct datapath *dp, struct nlattr *a[])
dp->user_features = user_features;
+ if (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU &&
+ a[OVS_DP_ATTR_PER_CPU_PIDS]) {
+ /* Upcall Netlink Port IDs have been updated */
+ err = ovs_dp_set_upcall_portids(dp,
+ a[OVS_DP_ATTR_PER_CPU_PIDS]);
+ if (err)
+ return err;
+ }
+
if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING)
static_branch_enable(&tc_recirc_sharing_support);
else
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 38f7d3e66ca6..fcfe6cb46441 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -51,6 +51,21 @@ struct dp_stats_percpu {
};
/**
+ * struct dp_nlsk_pids - array of netlink portids of for a datapath.
+ * This is used when OVS_DP_F_DISPATCH_UPCALL_PER_CPU
+ * is enabled and must be protected by rcu.
+ * @rcu: RCU callback head for deferred destruction.
+ * @n_pids: Size of @pids array.
+ * @pids: Array storing the Netlink socket PIDs indexed by CPU ID for packets
+ * that miss the flow table.
+ */
+struct dp_nlsk_pids {
+ struct rcu_head rcu;
+ u32 n_pids;
+ u32 pids[];
+};
+
+/**
* struct datapath - datapath for flow-based packet switching
* @rcu: RCU callback head for deferred destruction.
* @list_node: Element in global 'dps' list.
@@ -61,6 +76,7 @@ struct dp_stats_percpu {
* @net: Reference to net namespace.
* @max_headroom: the maximum headroom of all vports in this datapath; it will
* be used by all the internal vports in this dp.
+ * @upcall_portids: RCU protected 'struct dp_nlsk_pids'.
*
* Context: See the comment on locking at the top of datapath.c for additional
* locking information.
@@ -87,6 +103,8 @@ struct datapath {
/* Switch meters. */
struct dp_meter_table meter_tbl;
+
+ struct dp_nlsk_pids __rcu *upcall_portids;
};
/**
@@ -243,6 +261,8 @@ int ovs_dp_upcall(struct datapath *, struct sk_buff *,
const struct sw_flow_key *, const struct dp_upcall_info *,
uint32_t cutlen);
+u32 ovs_dp_get_upcall_portid(const struct datapath *dp, uint32_t cpu_id);
+
const char *ovs_dp_name(const struct datapath *dp);
struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, struct net *net,
u32 portid, u32 seq, u8 cmd);
diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c
index e6f4a6202f82..e71847877248 100644
--- a/net/qrtr/qrtr.c
+++ b/net/qrtr/qrtr.c
@@ -1153,14 +1153,14 @@ static int qrtr_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
rc = put_user(len, (int __user *)argp);
break;
case SIOCGIFADDR:
- if (copy_from_user(&ifr, argp, sizeof(ifr))) {
+ if (get_user_ifreq(&ifr, NULL, argp)) {
rc = -EFAULT;
break;
}
sq = (struct sockaddr_qrtr *)&ifr.ifr_addr;
*sq = ipc->us;
- if (copy_to_user(argp, &ifr, sizeof(ifr))) {
+ if (put_user_ifreq(&ifr, argp)) {
rc = -EFAULT;
break;
}
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index d17a66aab8ee..998a2374f7ae 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -1351,8 +1351,6 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
module_put(ops->owner);
err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
n->nlmsg_flags & NLM_F_ECHO);
- if (err > 0)
- return 0;
if (err < 0)
NL_SET_ERR_MSG(extack, "Failed to send TC action flush notification");
@@ -1423,8 +1421,6 @@ tcf_del_notify(struct net *net, struct nlmsghdr *n, struct tc_action *actions[],
ret = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
n->nlmsg_flags & NLM_F_ECHO);
- if (ret > 0)
- return 0;
return ret;
}
@@ -1481,7 +1477,6 @@ tcf_add_notify(struct net *net, struct nlmsghdr *n, struct tc_action *actions[],
u32 portid, size_t attr_size, struct netlink_ext_ack *extack)
{
struct sk_buff *skb;
- int err = 0;
skb = alloc_skb(attr_size <= NLMSG_GOODSIZE ? NLMSG_GOODSIZE : attr_size,
GFP_KERNEL);
@@ -1495,11 +1490,8 @@ tcf_add_notify(struct net *net, struct nlmsghdr *n, struct tc_action *actions[],
return -EINVAL;
}
- err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
- n->nlmsg_flags & NLM_F_ECHO);
- if (err > 0)
- err = 0;
- return err;
+ return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
+ n->nlmsg_flags & NLM_F_ECHO);
}
static int tcf_action_add(struct net *net, struct nlattr *nla,
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index e3e79e9bd706..1167cd0be179 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -1870,13 +1870,10 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb,
}
if (unicast)
- err = netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
+ err = rtnl_unicast(skb, net, portid);
else
err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
n->nlmsg_flags & NLM_F_ECHO);
-
- if (err > 0)
- err = 0;
return err;
}
@@ -1909,15 +1906,13 @@ static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
}
if (unicast)
- err = netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
+ err = rtnl_unicast(skb, net, portid);
else
err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
n->nlmsg_flags & NLM_F_ECHO);
if (err < 0)
NL_SET_ERR_MSG(extack, "Failed to send filter delete notification");
- if (err > 0)
- err = 0;
return err;
}
@@ -2711,13 +2706,11 @@ static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb,
}
if (unicast)
- err = netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
+ err = rtnl_unicast(skb, net, portid);
else
err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
flags & NLM_F_ECHO);
- if (err > 0)
- err = 0;
return err;
}
@@ -2741,7 +2734,7 @@ static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops,
}
if (unicast)
- return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
+ return rtnl_unicast(skb, net, portid);
return rtnetlink_send(skb, net, portid, RTNLGRP_TC, flags & NLM_F_ECHO);
}
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index f87d07736a14..5e90e9b160e3 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1845,7 +1845,6 @@ static int tclass_notify(struct net *net, struct sk_buff *oskb,
{
struct sk_buff *skb;
u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
- int err = 0;
skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (!skb)
@@ -1856,11 +1855,8 @@ static int tclass_notify(struct net *net, struct sk_buff *oskb,
return -EINVAL;
}
- err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
- n->nlmsg_flags & NLM_F_ECHO);
- if (err > 0)
- err = 0;
- return err;
+ return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
+ n->nlmsg_flags & NLM_F_ECHO);
}
static int tclass_del_notify(struct net *net,
@@ -1894,8 +1890,6 @@ static int tclass_del_notify(struct net *net,
err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
n->nlmsg_flags & NLM_F_ECHO);
- if (err > 0)
- err = 0;
return err;
}
diff --git a/net/socket.c b/net/socket.c
index 0b2dad3bdf7f..84de89c1ee9d 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1088,6 +1088,8 @@ EXPORT_SYMBOL(vlan_ioctl_set);
static long sock_do_ioctl(struct net *net, struct socket *sock,
unsigned int cmd, unsigned long arg)
{
+ struct ifreq ifr;
+ bool need_copyout;
int err;
void __user *argp = (void __user *)arg;
@@ -1100,25 +1102,13 @@ static long sock_do_ioctl(struct net *net, struct socket *sock,
if (err != -ENOIOCTLCMD)
return err;
- if (cmd == SIOCGIFCONF) {
- struct ifconf ifc;
- if (copy_from_user(&ifc, argp, sizeof(struct ifconf)))
- return -EFAULT;
- rtnl_lock();
- err = dev_ifconf(net, &ifc, sizeof(struct ifreq));
- rtnl_unlock();
- if (!err && copy_to_user(argp, &ifc, sizeof(struct ifconf)))
- err = -EFAULT;
- } else {
- struct ifreq ifr;
- bool need_copyout;
- if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
+ if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
+ return -EFAULT;
+ err = dev_ioctl(net, cmd, &ifr, &need_copyout);
+ if (!err && need_copyout)
+ if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
return -EFAULT;
- err = dev_ioctl(net, cmd, &ifr, &need_copyout);
- if (!err && need_copyout)
- if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
- return -EFAULT;
- }
+
return err;
}
@@ -1217,6 +1207,11 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
cmd == SIOCGSTAMP_NEW,
false);
break;
+
+ case SIOCGIFCONF:
+ err = dev_ifconf(net, argp);
+ break;
+
default:
err = sock_do_ioctl(net, sock, cmd, arg);
break;
@@ -3126,154 +3121,55 @@ void socket_seq_show(struct seq_file *seq)
}
#endif /* CONFIG_PROC_FS */
-#ifdef CONFIG_COMPAT
-static int compat_dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
+/* Handle the fact that while struct ifreq has the same *layout* on
+ * 32/64 for everything but ifreq::ifru_ifmap and ifreq::ifru_data,
+ * which are handled elsewhere, it still has different *size* due to
+ * ifreq::ifru_ifmap (which is 16 bytes on 32 bit, 24 bytes on 64-bit,
+ * resulting in struct ifreq being 32 and 40 bytes respectively).
+ * As a result, if the struct happens to be at the end of a page and
+ * the next page isn't readable/writable, we get a fault. To prevent
+ * that, copy back and forth to the full size.
+ */
+int get_user_ifreq(struct ifreq *ifr, void __user **ifrdata, void __user *arg)
{
- struct compat_ifconf ifc32;
- struct ifconf ifc;
- int err;
+ if (in_compat_syscall()) {
+ struct compat_ifreq *ifr32 = (struct compat_ifreq *)ifr;
- if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
- return -EFAULT;
+ memset(ifr, 0, sizeof(*ifr));
+ if (copy_from_user(ifr32, arg, sizeof(*ifr32)))
+ return -EFAULT;
- ifc.ifc_len = ifc32.ifc_len;
- ifc.ifc_req = compat_ptr(ifc32.ifcbuf);
+ if (ifrdata)
+ *ifrdata = compat_ptr(ifr32->ifr_data);
- rtnl_lock();
- err = dev_ifconf(net, &ifc, sizeof(struct compat_ifreq));
- rtnl_unlock();
- if (err)
- return err;
+ return 0;
+ }
- ifc32.ifc_len = ifc.ifc_len;
- if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
+ if (copy_from_user(ifr, arg, sizeof(*ifr)))
return -EFAULT;
+ if (ifrdata)
+ *ifrdata = ifr->ifr_data;
+
return 0;
}
+EXPORT_SYMBOL(get_user_ifreq);
-static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
+int put_user_ifreq(struct ifreq *ifr, void __user *arg)
{
- struct compat_ethtool_rxnfc __user *compat_rxnfc;
- bool convert_in = false, convert_out = false;
- size_t buf_size = 0;
- struct ethtool_rxnfc __user *rxnfc = NULL;
- struct ifreq ifr;
- u32 rule_cnt = 0, actual_rule_cnt;
- u32 ethcmd;
- u32 data;
- int ret;
-
- if (get_user(data, &ifr32->ifr_ifru.ifru_data))
- return -EFAULT;
+ size_t size = sizeof(*ifr);
- compat_rxnfc = compat_ptr(data);
+ if (in_compat_syscall())
+ size = sizeof(struct compat_ifreq);
- if (get_user(ethcmd, &compat_rxnfc->cmd))
+ if (copy_to_user(arg, ifr, size))
return -EFAULT;
- /* Most ethtool structures are defined without padding.
- * Unfortunately struct ethtool_rxnfc is an exception.
- */
- switch (ethcmd) {
- default:
- break;
- case ETHTOOL_GRXCLSRLALL:
- /* Buffer size is variable */
- if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
- return -EFAULT;
- if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
- return -ENOMEM;
- buf_size += rule_cnt * sizeof(u32);
- fallthrough;
- case ETHTOOL_GRXRINGS:
- case ETHTOOL_GRXCLSRLCNT:
- case ETHTOOL_GRXCLSRULE:
- case ETHTOOL_SRXCLSRLINS:
- convert_out = true;
- fallthrough;
- case ETHTOOL_SRXCLSRLDEL:
- buf_size += sizeof(struct ethtool_rxnfc);
- convert_in = true;
- rxnfc = compat_alloc_user_space(buf_size);
- break;
- }
-
- if (copy_from_user(&ifr.ifr_name, &ifr32->ifr_name, IFNAMSIZ))
- return -EFAULT;
-
- ifr.ifr_data = convert_in ? rxnfc : (void __user *)compat_rxnfc;
-
- if (convert_in) {
- /* We expect there to be holes between fs.m_ext and
- * fs.ring_cookie and at the end of fs, but nowhere else.
- */
- BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
- sizeof(compat_rxnfc->fs.m_ext) !=
- offsetof(struct ethtool_rxnfc, fs.m_ext) +
- sizeof(rxnfc->fs.m_ext));
- BUILD_BUG_ON(
- offsetof(struct compat_ethtool_rxnfc, fs.location) -
- offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
- offsetof(struct ethtool_rxnfc, fs.location) -
- offsetof(struct ethtool_rxnfc, fs.ring_cookie));
-
- if (copy_in_user(rxnfc, compat_rxnfc,
- (void __user *)(&rxnfc->fs.m_ext + 1) -
- (void __user *)rxnfc) ||
- copy_in_user(&rxnfc->fs.ring_cookie,
- &compat_rxnfc->fs.ring_cookie,
- (void __user *)(&rxnfc->fs.location + 1) -
- (void __user *)&rxnfc->fs.ring_cookie))
- return -EFAULT;
- if (ethcmd == ETHTOOL_GRXCLSRLALL) {
- if (put_user(rule_cnt, &rxnfc->rule_cnt))
- return -EFAULT;
- } else if (copy_in_user(&rxnfc->rule_cnt,
- &compat_rxnfc->rule_cnt,
- sizeof(rxnfc->rule_cnt)))
- return -EFAULT;
- }
-
- ret = dev_ioctl(net, SIOCETHTOOL, &ifr, NULL);
- if (ret)
- return ret;
-
- if (convert_out) {
- if (copy_in_user(compat_rxnfc, rxnfc,
- (const void __user *)(&rxnfc->fs.m_ext + 1) -
- (const void __user *)rxnfc) ||
- copy_in_user(&compat_rxnfc->fs.ring_cookie,
- &rxnfc->fs.ring_cookie,
- (const void __user *)(&rxnfc->fs.location + 1) -
- (const void __user *)&rxnfc->fs.ring_cookie) ||
- copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
- sizeof(rxnfc->rule_cnt)))
- return -EFAULT;
-
- if (ethcmd == ETHTOOL_GRXCLSRLALL) {
- /* As an optimisation, we only copy the actual
- * number of rules that the underlying
- * function returned. Since Mallory might
- * change the rule count in user memory, we
- * check that it is less than the rule count
- * originally given (as the user buffer size),
- * which has been range-checked.
- */
- if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
- return -EFAULT;
- if (actual_rule_cnt < rule_cnt)
- rule_cnt = actual_rule_cnt;
- if (copy_in_user(&compat_rxnfc->rule_locs[0],
- &rxnfc->rule_locs[0],
- rule_cnt * sizeof(u32)))
- return -EFAULT;
- }
- }
-
return 0;
}
+EXPORT_SYMBOL(put_user_ifreq);
+#ifdef CONFIG_COMPAT
static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
{
compat_uptr_t uptr32;
@@ -3281,7 +3177,7 @@ static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32
void __user *saved;
int err;
- if (copy_from_user(&ifr, uifr32, sizeof(struct compat_ifreq)))
+ if (get_user_ifreq(&ifr, NULL, uifr32))
return -EFAULT;
if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
@@ -3293,7 +3189,7 @@ static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32
err = dev_ioctl(net, SIOCWANDEV, &ifr, NULL);
if (!err) {
ifr.ifr_settings.ifs_ifsu.raw_hdlc = saved;
- if (copy_to_user(uifr32, &ifr, sizeof(struct compat_ifreq)))
+ if (put_user_ifreq(&ifr, uifr32))
err = -EFAULT;
}
return err;
@@ -3317,83 +3213,28 @@ static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
static int compat_ifreq_ioctl(struct net *net, struct socket *sock,
unsigned int cmd,
+ unsigned long arg,
struct compat_ifreq __user *uifr32)
{
- struct ifreq __user *uifr;
+ struct ifreq ifr;
+ bool need_copyout;
int err;
- /* Handle the fact that while struct ifreq has the same *layout* on
- * 32/64 for everything but ifreq::ifru_ifmap and ifreq::ifru_data,
- * which are handled elsewhere, it still has different *size* due to
- * ifreq::ifru_ifmap (which is 16 bytes on 32 bit, 24 bytes on 64-bit,
- * resulting in struct ifreq being 32 and 40 bytes respectively).
- * As a result, if the struct happens to be at the end of a page and
- * the next page isn't readable/writable, we get a fault. To prevent
- * that, copy back and forth to the full size.
- */
-
- uifr = compat_alloc_user_space(sizeof(*uifr));
- if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
- return -EFAULT;
-
- err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
-
- if (!err) {
- switch (cmd) {
- case SIOCGIFFLAGS:
- case SIOCGIFMETRIC:
- case SIOCGIFMTU:
- case SIOCGIFMEM:
- case SIOCGIFHWADDR:
- case SIOCGIFINDEX:
- case SIOCGIFADDR:
- case SIOCGIFBRDADDR:
- case SIOCGIFDSTADDR:
- case SIOCGIFNETMASK:
- case SIOCGIFPFLAGS:
- case SIOCGIFTXQLEN:
- case SIOCGMIIPHY:
- case SIOCGMIIREG:
- case SIOCGIFNAME:
- if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
- err = -EFAULT;
- break;
- }
- }
- return err;
-}
+ err = sock->ops->ioctl(sock, cmd, arg);
-static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
- struct compat_ifreq __user *uifr32)
-{
- struct ifreq ifr;
- struct compat_ifmap __user *uifmap32;
- int err;
+ /* If this ioctl is unknown try to hand it down
+ * to the NIC driver.
+ */
+ if (err != -ENOIOCTLCMD)
+ return err;
- uifmap32 = &uifr32->ifr_ifru.ifru_map;
- err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
- err |= get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
- err |= get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
- err |= get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
- err |= get_user(ifr.ifr_map.irq, &uifmap32->irq);
- err |= get_user(ifr.ifr_map.dma, &uifmap32->dma);
- err |= get_user(ifr.ifr_map.port, &uifmap32->port);
- if (err)
+ if (get_user_ifreq(&ifr, NULL, uifr32))
return -EFAULT;
+ err = dev_ioctl(net, cmd, &ifr, &need_copyout);
+ if (!err && need_copyout)
+ if (put_user_ifreq(&ifr, uifr32))
+ return -EFAULT;
- err = dev_ioctl(net, cmd, &ifr, NULL);
-
- if (cmd == SIOCGIFMAP && !err) {
- err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
- err |= put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
- err |= put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
- err |= put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
- err |= put_user(ifr.ifr_map.irq, &uifmap32->irq);
- err |= put_user(ifr.ifr_map.dma, &uifmap32->dma);
- err |= put_user(ifr.ifr_map.port, &uifmap32->port);
- if (err)
- err = -EFAULT;
- }
return err;
}
@@ -3426,15 +3267,8 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
case SIOCSIFBR:
case SIOCGIFBR:
return old_bridge_ioctl(argp);
- case SIOCGIFCONF:
- return compat_dev_ifconf(net, argp);
- case SIOCETHTOOL:
- return ethtool_ioctl(net, argp);
case SIOCWANDEV:
return compat_siocwandev(net, argp);
- case SIOCGIFMAP:
- case SIOCSIFMAP:
- return compat_sioc_ifmap(net, cmd, argp);
case SIOCGSTAMP_OLD:
case SIOCGSTAMPNS_OLD:
if (!sock->ops->gettstamp)
@@ -3442,6 +3276,7 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
return sock->ops->gettstamp(sock, argp, cmd == SIOCGSTAMP_OLD,
!COMPAT_USE_64BIT_TIME);
+ case SIOCETHTOOL:
case SIOCBONDSLAVEINFOQUERY:
case SIOCBONDINFOQUERY:
case SIOCSHWTSTAMP:
@@ -3459,10 +3294,13 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
case SIOCGSKNS:
case SIOCGSTAMP_NEW:
case SIOCGSTAMPNS_NEW:
+ case SIOCGIFCONF:
return sock_ioctl(file, cmd, arg);
case SIOCGIFFLAGS:
case SIOCSIFFLAGS:
+ case SIOCGIFMAP:
+ case SIOCSIFMAP:
case SIOCGIFMETRIC:
case SIOCSIFMETRIC:
case SIOCGIFMTU:
@@ -3499,7 +3337,7 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
case SIOCBONDRELEASE:
case SIOCBONDSETHWADDR:
case SIOCBONDCHANGEACTIVE:
- return compat_ifreq_ioctl(net, sock, cmd, argp);
+ return compat_ifreq_ioctl(net, sock, cmd, arg, argp);
case SIOCSARP:
case SIOCGARP:
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 070698dd19bc..0ae3478561f4 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -378,6 +378,266 @@ int call_switchdev_blocking_notifiers(unsigned long val, struct net_device *dev,
}
EXPORT_SYMBOL_GPL(call_switchdev_blocking_notifiers);
+struct switchdev_nested_priv {
+ bool (*check_cb)(const struct net_device *dev);
+ bool (*foreign_dev_check_cb)(const struct net_device *dev,
+ const struct net_device *foreign_dev);
+ const struct net_device *dev;
+ struct net_device *lower_dev;
+};
+
+static int switchdev_lower_dev_walk(struct net_device *lower_dev,
+ struct netdev_nested_priv *priv)
+{
+ struct switchdev_nested_priv *switchdev_priv = priv->data;
+ bool (*foreign_dev_check_cb)(const struct net_device *dev,
+ const struct net_device *foreign_dev);
+ bool (*check_cb)(const struct net_device *dev);
+ const struct net_device *dev;
+
+ check_cb = switchdev_priv->check_cb;
+ foreign_dev_check_cb = switchdev_priv->foreign_dev_check_cb;
+ dev = switchdev_priv->dev;
+
+ if (check_cb(lower_dev) && !foreign_dev_check_cb(lower_dev, dev)) {
+ switchdev_priv->lower_dev = lower_dev;
+ return 1;
+ }
+
+ return 0;
+}
+
+static struct net_device *
+switchdev_lower_dev_find(struct net_device *dev,
+ bool (*check_cb)(const struct net_device *dev),
+ bool (*foreign_dev_check_cb)(const struct net_device *dev,
+ const struct net_device *foreign_dev))
+{
+ struct switchdev_nested_priv switchdev_priv = {
+ .check_cb = check_cb,
+ .foreign_dev_check_cb = foreign_dev_check_cb,
+ .dev = dev,
+ .lower_dev = NULL,
+ };
+ struct netdev_nested_priv priv = {
+ .data = &switchdev_priv,
+ };
+
+ netdev_walk_all_lower_dev_rcu(dev, switchdev_lower_dev_walk, &priv);
+
+ return switchdev_priv.lower_dev;
+}
+
+static int __switchdev_handle_fdb_add_to_device(struct net_device *dev,
+ const struct net_device *orig_dev,
+ const struct switchdev_notifier_fdb_info *fdb_info,
+ bool (*check_cb)(const struct net_device *dev),
+ bool (*foreign_dev_check_cb)(const struct net_device *dev,
+ const struct net_device *foreign_dev),
+ int (*add_cb)(struct net_device *dev,
+ const struct net_device *orig_dev, const void *ctx,
+ const struct switchdev_notifier_fdb_info *fdb_info),
+ int (*lag_add_cb)(struct net_device *dev,
+ const struct net_device *orig_dev, const void *ctx,
+ const struct switchdev_notifier_fdb_info *fdb_info))
+{
+ const struct switchdev_notifier_info *info = &fdb_info->info;
+ struct net_device *br, *lower_dev;
+ struct list_head *iter;
+ int err = -EOPNOTSUPP;
+
+ if (check_cb(dev))
+ return add_cb(dev, orig_dev, info->ctx, fdb_info);
+
+ if (netif_is_lag_master(dev)) {
+ if (!switchdev_lower_dev_find(dev, check_cb, foreign_dev_check_cb))
+ goto maybe_bridged_with_us;
+
+ /* This is a LAG interface that we offload */
+ if (!lag_add_cb)
+ return -EOPNOTSUPP;
+
+ return lag_add_cb(dev, orig_dev, info->ctx, fdb_info);
+ }
+
+ /* Recurse through lower interfaces in case the FDB entry is pointing
+ * towards a bridge device.
+ */
+ if (netif_is_bridge_master(dev)) {
+ if (!switchdev_lower_dev_find(dev, check_cb, foreign_dev_check_cb))
+ return 0;
+
+ /* This is a bridge interface that we offload */
+ netdev_for_each_lower_dev(dev, lower_dev, iter) {
+ /* Do not propagate FDB entries across bridges */
+ if (netif_is_bridge_master(lower_dev))
+ continue;
+
+ /* Bridge ports might be either us, or LAG interfaces
+ * that we offload.
+ */
+ if (!check_cb(lower_dev) &&
+ !switchdev_lower_dev_find(lower_dev, check_cb,
+ foreign_dev_check_cb))
+ continue;
+
+ err = __switchdev_handle_fdb_add_to_device(lower_dev, orig_dev,
+ fdb_info, check_cb,
+ foreign_dev_check_cb,
+ add_cb, lag_add_cb);
+ if (err && err != -EOPNOTSUPP)
+ return err;
+ }
+
+ return 0;
+ }
+
+maybe_bridged_with_us:
+ /* Event is neither on a bridge nor a LAG. Check whether it is on an
+ * interface that is in a bridge with us.
+ */
+ br = netdev_master_upper_dev_get_rcu(dev);
+ if (!br || !netif_is_bridge_master(br))
+ return 0;
+
+ if (!switchdev_lower_dev_find(br, check_cb, foreign_dev_check_cb))
+ return 0;
+
+ return __switchdev_handle_fdb_add_to_device(br, orig_dev, fdb_info,
+ check_cb, foreign_dev_check_cb,
+ add_cb, lag_add_cb);
+}
+
+int switchdev_handle_fdb_add_to_device(struct net_device *dev,
+ const struct switchdev_notifier_fdb_info *fdb_info,
+ bool (*check_cb)(const struct net_device *dev),
+ bool (*foreign_dev_check_cb)(const struct net_device *dev,
+ const struct net_device *foreign_dev),
+ int (*add_cb)(struct net_device *dev,
+ const struct net_device *orig_dev, const void *ctx,
+ const struct switchdev_notifier_fdb_info *fdb_info),
+ int (*lag_add_cb)(struct net_device *dev,
+ const struct net_device *orig_dev, const void *ctx,
+ const struct switchdev_notifier_fdb_info *fdb_info))
+{
+ int err;
+
+ err = __switchdev_handle_fdb_add_to_device(dev, dev, fdb_info,
+ check_cb,
+ foreign_dev_check_cb,
+ add_cb, lag_add_cb);
+ if (err == -EOPNOTSUPP)
+ err = 0;
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(switchdev_handle_fdb_add_to_device);
+
+static int __switchdev_handle_fdb_del_to_device(struct net_device *dev,
+ const struct net_device *orig_dev,
+ const struct switchdev_notifier_fdb_info *fdb_info,
+ bool (*check_cb)(const struct net_device *dev),
+ bool (*foreign_dev_check_cb)(const struct net_device *dev,
+ const struct net_device *foreign_dev),
+ int (*del_cb)(struct net_device *dev,
+ const struct net_device *orig_dev, const void *ctx,
+ const struct switchdev_notifier_fdb_info *fdb_info),
+ int (*lag_del_cb)(struct net_device *dev,
+ const struct net_device *orig_dev, const void *ctx,
+ const struct switchdev_notifier_fdb_info *fdb_info))
+{
+ const struct switchdev_notifier_info *info = &fdb_info->info;
+ struct net_device *br, *lower_dev;
+ struct list_head *iter;
+ int err = -EOPNOTSUPP;
+
+ if (check_cb(dev))
+ return del_cb(dev, orig_dev, info->ctx, fdb_info);
+
+ if (netif_is_lag_master(dev)) {
+ if (!switchdev_lower_dev_find(dev, check_cb, foreign_dev_check_cb))
+ goto maybe_bridged_with_us;
+
+ /* This is a LAG interface that we offload */
+ if (!lag_del_cb)
+ return -EOPNOTSUPP;
+
+ return lag_del_cb(dev, orig_dev, info->ctx, fdb_info);
+ }
+
+ /* Recurse through lower interfaces in case the FDB entry is pointing
+ * towards a bridge device.
+ */
+ if (netif_is_bridge_master(dev)) {
+ if (!switchdev_lower_dev_find(dev, check_cb, foreign_dev_check_cb))
+ return 0;
+
+ /* This is a bridge interface that we offload */
+ netdev_for_each_lower_dev(dev, lower_dev, iter) {
+ /* Do not propagate FDB entries across bridges */
+ if (netif_is_bridge_master(lower_dev))
+ continue;
+
+ /* Bridge ports might be either us, or LAG interfaces
+ * that we offload.
+ */
+ if (!check_cb(lower_dev) &&
+ !switchdev_lower_dev_find(lower_dev, check_cb,
+ foreign_dev_check_cb))
+ continue;
+
+ err = __switchdev_handle_fdb_del_to_device(lower_dev, orig_dev,
+ fdb_info, check_cb,
+ foreign_dev_check_cb,
+ del_cb, lag_del_cb);
+ if (err && err != -EOPNOTSUPP)
+ return err;
+ }
+
+ return 0;
+ }
+
+maybe_bridged_with_us:
+ /* Event is neither on a bridge nor a LAG. Check whether it is on an
+ * interface that is in a bridge with us.
+ */
+ br = netdev_master_upper_dev_get_rcu(dev);
+ if (!br || !netif_is_bridge_master(br))
+ return 0;
+
+ if (!switchdev_lower_dev_find(br, check_cb, foreign_dev_check_cb))
+ return 0;
+
+ return __switchdev_handle_fdb_del_to_device(br, orig_dev, fdb_info,
+ check_cb, foreign_dev_check_cb,
+ del_cb, lag_del_cb);
+}
+
+int switchdev_handle_fdb_del_to_device(struct net_device *dev,
+ const struct switchdev_notifier_fdb_info *fdb_info,
+ bool (*check_cb)(const struct net_device *dev),
+ bool (*foreign_dev_check_cb)(const struct net_device *dev,
+ const struct net_device *foreign_dev),
+ int (*del_cb)(struct net_device *dev,
+ const struct net_device *orig_dev, const void *ctx,
+ const struct switchdev_notifier_fdb_info *fdb_info),
+ int (*lag_del_cb)(struct net_device *dev,
+ const struct net_device *orig_dev, const void *ctx,
+ const struct switchdev_notifier_fdb_info *fdb_info))
+{
+ int err;
+
+ err = __switchdev_handle_fdb_del_to_device(dev, dev, fdb_info,
+ check_cb,
+ foreign_dev_check_cb,
+ del_cb, lag_del_cb);
+ if (err == -EOPNOTSUPP)
+ err = 0;
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(switchdev_handle_fdb_del_to_device);
+
static int __switchdev_handle_port_obj_add(struct net_device *dev,
struct switchdev_notifier_port_obj_info *port_obj_info,
bool (*check_cb)(const struct net_device *dev),
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 34a97ea36cc8..9b0b311c7ec1 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1880,6 +1880,7 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m,
bool connected = !tipc_sk_type_connectionless(sk);
struct tipc_sock *tsk = tipc_sk(sk);
int rc, err, hlen, dlen, copy;
+ struct tipc_skb_cb *skb_cb;
struct sk_buff_head xmitq;
struct tipc_msg *hdr;
struct sk_buff *skb;
@@ -1903,6 +1904,7 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m,
if (unlikely(rc))
goto exit;
skb = skb_peek(&sk->sk_receive_queue);
+ skb_cb = TIPC_SKB_CB(skb);
hdr = buf_msg(skb);
dlen = msg_data_sz(hdr);
hlen = msg_hdr_sz(hdr);
@@ -1922,18 +1924,33 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m,
/* Capture data if non-error msg, otherwise just set return value */
if (likely(!err)) {
- copy = min_t(int, dlen, buflen);
- if (unlikely(copy != dlen))
- m->msg_flags |= MSG_TRUNC;
- rc = skb_copy_datagram_msg(skb, hlen, m, copy);
+ int offset = skb_cb->bytes_read;
+
+ copy = min_t(int, dlen - offset, buflen);
+ rc = skb_copy_datagram_msg(skb, hlen + offset, m, copy);
+ if (unlikely(rc))
+ goto exit;
+ if (unlikely(offset + copy < dlen)) {
+ if (flags & MSG_EOR) {
+ if (!(flags & MSG_PEEK))
+ skb_cb->bytes_read = offset + copy;
+ } else {
+ m->msg_flags |= MSG_TRUNC;
+ skb_cb->bytes_read = 0;
+ }
+ } else {
+ if (flags & MSG_EOR)
+ m->msg_flags |= MSG_EOR;
+ skb_cb->bytes_read = 0;
+ }
} else {
copy = 0;
rc = 0;
- if (err != TIPC_CONN_SHUTDOWN && connected && !m->msg_control)
+ if (err != TIPC_CONN_SHUTDOWN && connected && !m->msg_control) {
rc = -ECONNRESET;
+ goto exit;
+ }
}
- if (unlikely(rc))
- goto exit;
/* Mark message as group event if applicable */
if (unlikely(grp_evt)) {
@@ -1956,9 +1973,10 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m,
tipc_node_distr_xmit(sock_net(sk), &xmitq);
}
- tsk_advance_rx_queue(sk);
+ if (!skb_cb->bytes_read)
+ tsk_advance_rx_queue(sk);
- if (likely(!connected))
+ if (likely(!connected) || skb_cb->bytes_read)
goto exit;
/* Send connection flow control advertisement when applicable */
diff --git a/net/unix/Makefile b/net/unix/Makefile
index 54e58cc4f945..20491825b4d0 100644
--- a/net/unix/Makefile
+++ b/net/unix/Makefile
@@ -7,6 +7,7 @@ obj-$(CONFIG_UNIX) += unix.o
unix-y := af_unix.o garbage.o
unix-$(CONFIG_SYSCTL) += sysctl_net_unix.o
+unix-$(CONFIG_BPF_SYSCALL) += unix_bpf.o
obj-$(CONFIG_UNIX_DIAG) += unix_diag.o
unix_diag-y := diag.o
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 23c92ad15c61..89927678c0dc 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -494,6 +494,7 @@ static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
sk_error_report(other);
}
}
+ sk->sk_state = other->sk_state = TCP_CLOSE;
}
static void unix_sock_destructor(struct sock *sk)
@@ -669,6 +670,8 @@ static ssize_t unix_stream_splice_read(struct socket *, loff_t *ppos,
unsigned int flags);
static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
+static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,
+ sk_read_actor_t recv_actor);
static int unix_dgram_connect(struct socket *, struct sockaddr *,
int, int);
static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
@@ -746,6 +749,7 @@ static const struct proto_ops unix_dgram_ops = {
.listen = sock_no_listen,
.shutdown = unix_shutdown,
.sendmsg = unix_dgram_sendmsg,
+ .read_sock = unix_read_sock,
.recvmsg = unix_dgram_recvmsg,
.mmap = sock_no_mmap,
.sendpage = sock_no_sendpage,
@@ -777,10 +781,21 @@ static const struct proto_ops unix_seqpacket_ops = {
.show_fdinfo = unix_show_fdinfo,
};
-static struct proto unix_proto = {
+static void unix_close(struct sock *sk, long timeout)
+{
+ /* Nothing to do here, unix socket does not need a ->close().
+ * This is merely for sockmap.
+ */
+}
+
+struct proto unix_proto = {
.name = "UNIX",
.owner = THIS_MODULE,
.obj_size = sizeof(struct unix_sock),
+ .close = unix_close,
+#ifdef CONFIG_BPF_SYSCALL
+ .psock_update_sk_prot = unix_bpf_update_proto,
+#endif
};
static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
@@ -864,6 +879,7 @@ static int unix_release(struct socket *sock)
if (!sk)
return 0;
+ sk->sk_prot->close(sk, 0);
unix_release_sock(sk, 0);
sock->sk = NULL;
@@ -1199,6 +1215,9 @@ restart:
unix_peer(sk) = other;
unix_state_double_unlock(sk, other);
}
+
+ if (unix_peer(sk))
+ sk->sk_state = other->sk_state = TCP_ESTABLISHED;
return 0;
out_unlock:
@@ -1431,12 +1450,10 @@ static int unix_socketpair(struct socket *socka, struct socket *sockb)
init_peercred(ska);
init_peercred(skb);
- if (ska->sk_type != SOCK_DGRAM) {
- ska->sk_state = TCP_ESTABLISHED;
- skb->sk_state = TCP_ESTABLISHED;
- socka->state = SS_CONNECTED;
- sockb->state = SS_CONNECTED;
- }
+ ska->sk_state = TCP_ESTABLISHED;
+ skb->sk_state = TCP_ESTABLISHED;
+ socka->state = SS_CONNECTED;
+ sockb->state = SS_CONNECTED;
return 0;
}
@@ -2081,11 +2098,11 @@ static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
}
}
-static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
- size_t size, int flags)
+int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
+ int flags)
{
struct scm_cookie scm;
- struct sock *sk = sock->sk;
+ struct socket *sock = sk->sk_socket;
struct unix_sock *u = unix_sk(sk);
struct sk_buff *skb, *last;
long timeo;
@@ -2188,6 +2205,53 @@ out:
return err;
}
+static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
+ int flags)
+{
+ struct sock *sk = sock->sk;
+
+#ifdef CONFIG_BPF_SYSCALL
+ if (sk->sk_prot != &unix_proto)
+ return sk->sk_prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT,
+ flags & ~MSG_DONTWAIT, NULL);
+#endif
+ return __unix_dgram_recvmsg(sk, msg, size, flags);
+}
+
+static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,
+ sk_read_actor_t recv_actor)
+{
+ int copied = 0;
+
+ while (1) {
+ struct unix_sock *u = unix_sk(sk);
+ struct sk_buff *skb;
+ int used, err;
+
+ mutex_lock(&u->iolock);
+ skb = skb_recv_datagram(sk, 0, 1, &err);
+ mutex_unlock(&u->iolock);
+ if (!skb)
+ return err;
+
+ used = recv_actor(desc, skb, 0, skb->len);
+ if (used <= 0) {
+ if (!copied)
+ copied = used;
+ kfree_skb(skb);
+ break;
+ } else if (used <= skb->len) {
+ copied += used;
+ }
+
+ kfree_skb(skb);
+ if (!desc->count)
+ break;
+ }
+
+ return copied;
+}
+
/*
* Sleep until more data has arrived. But check for races..
*/
@@ -2925,6 +2989,7 @@ static int __init af_unix_init(void)
sock_register(&unix_family_ops);
register_pernet_subsys(&unix_net_ops);
+ unix_bpf_build_proto();
out:
return rc;
}
diff --git a/net/unix/unix_bpf.c b/net/unix/unix_bpf.c
new file mode 100644
index 000000000000..db0cda29fb2f
--- /dev/null
+++ b/net/unix/unix_bpf.c
@@ -0,0 +1,122 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Cong Wang <cong.wang@bytedance.com> */
+
+#include <linux/skmsg.h>
+#include <linux/bpf.h>
+#include <net/sock.h>
+#include <net/af_unix.h>
+
+#define unix_sk_has_data(__sk, __psock) \
+ ({ !skb_queue_empty(&__sk->sk_receive_queue) || \
+ !skb_queue_empty(&__psock->ingress_skb) || \
+ !list_empty(&__psock->ingress_msg); \
+ })
+
+static int unix_msg_wait_data(struct sock *sk, struct sk_psock *psock,
+ long timeo)
+{
+ DEFINE_WAIT_FUNC(wait, woken_wake_function);
+ struct unix_sock *u = unix_sk(sk);
+ int ret = 0;
+
+ if (sk->sk_shutdown & RCV_SHUTDOWN)
+ return 1;
+
+ if (!timeo)
+ return ret;
+
+ add_wait_queue(sk_sleep(sk), &wait);
+ sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
+ if (!unix_sk_has_data(sk, psock)) {
+ mutex_unlock(&u->iolock);
+ wait_woken(&wait, TASK_INTERRUPTIBLE, timeo);
+ mutex_lock(&u->iolock);
+ ret = unix_sk_has_data(sk, psock);
+ }
+ sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
+ remove_wait_queue(sk_sleep(sk), &wait);
+ return ret;
+}
+
+static int unix_dgram_bpf_recvmsg(struct sock *sk, struct msghdr *msg,
+ size_t len, int nonblock, int flags,
+ int *addr_len)
+{
+ struct unix_sock *u = unix_sk(sk);
+ struct sk_psock *psock;
+ int copied, ret;
+
+ psock = sk_psock_get(sk);
+ if (unlikely(!psock))
+ return __unix_dgram_recvmsg(sk, msg, len, flags);
+
+ mutex_lock(&u->iolock);
+ if (!skb_queue_empty(&sk->sk_receive_queue) &&
+ sk_psock_queue_empty(psock)) {
+ ret = __unix_dgram_recvmsg(sk, msg, len, flags);
+ goto out;
+ }
+
+msg_bytes_ready:
+ copied = sk_msg_recvmsg(sk, psock, msg, len, flags);
+ if (!copied) {
+ long timeo;
+ int data;
+
+ timeo = sock_rcvtimeo(sk, nonblock);
+ data = unix_msg_wait_data(sk, psock, timeo);
+ if (data) {
+ if (!sk_psock_queue_empty(psock))
+ goto msg_bytes_ready;
+ ret = __unix_dgram_recvmsg(sk, msg, len, flags);
+ goto out;
+ }
+ copied = -EAGAIN;
+ }
+ ret = copied;
+out:
+ mutex_unlock(&u->iolock);
+ sk_psock_put(sk, psock);
+ return ret;
+}
+
+static struct proto *unix_prot_saved __read_mostly;
+static DEFINE_SPINLOCK(unix_prot_lock);
+static struct proto unix_bpf_prot;
+
+static void unix_bpf_rebuild_protos(struct proto *prot, const struct proto *base)
+{
+ *prot = *base;
+ prot->close = sock_map_close;
+ prot->recvmsg = unix_dgram_bpf_recvmsg;
+}
+
+static void unix_bpf_check_needs_rebuild(struct proto *ops)
+{
+ if (unlikely(ops != smp_load_acquire(&unix_prot_saved))) {
+ spin_lock_bh(&unix_prot_lock);
+ if (likely(ops != unix_prot_saved)) {
+ unix_bpf_rebuild_protos(&unix_bpf_prot, ops);
+ smp_store_release(&unix_prot_saved, ops);
+ }
+ spin_unlock_bh(&unix_prot_lock);
+ }
+}
+
+int unix_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore)
+{
+ if (restore) {
+ sk->sk_write_space = psock->saved_write_space;
+ WRITE_ONCE(sk->sk_prot, psock->sk_proto);
+ return 0;
+ }
+
+ unix_bpf_check_needs_rebuild(psock->sk_proto);
+ WRITE_ONCE(sk->sk_prot, &unix_bpf_prot);
+ return 0;
+}
+
+void __init unix_bpf_build_proto(void)
+{
+ unix_bpf_rebuild_protos(&unix_bpf_prot, &unix_proto);
+}