From f144febd93d5ee534fdf23505ab091b2b9088edc Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 10 Oct 2013 15:57:59 -0400 Subject: bridge: update mdb expiration timer upon reports. commit 9f00b2e7cf241fa389733d41b615efdaa2cb0f5b bridge: only expire the mdb entry when query is received changed the mdb expiration timer to be armed only when QUERY is received. Howerver, this causes issues in an environment where the multicast server socket comes and goes very fast while a client is trying to send traffic to it. The root cause is a race where a sequence of LEAVE followed by REPORT messages can race against QUERY messages generated in response to LEAVE. The QUERY ends up starting the expiration timer, and that timer can potentially expire after the new REPORT message has been received signaling the new join operation. This leads to a significant drop in multicast traffic and possible complete stall. The solution is to have REPORT messages update the expiration timer on entries that already exist. CC: Cong Wang CC: Herbert Xu CC: Stephen Hemminger Signed-off-by: Vlad Yasevich Acked-by: Herbert Xu Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'net/bridge') diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index d1c578630678..1085f2180f3a 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -611,6 +611,9 @@ rehash: break; default: + /* If we have an existing entry, update it's expire timer */ + mod_timer(&mp->timer, + jiffies + br->multicast_membership_interval); goto out; } @@ -680,8 +683,12 @@ static int br_multicast_add_group(struct net_bridge *br, for (pp = &mp->ports; (p = mlock_dereference(*pp, br)) != NULL; pp = &p->next) { - if (p->port == port) + if (p->port == port) { + /* We already have a portgroup, update the timer. */ + mod_timer(&p->timer, + jiffies + br->multicast_membership_interval); goto out; + } if ((unsigned long)p->port < (unsigned long)port) break; } -- cgit v1.2.3 From 4b6c7879d84ad06a2ac5b964808ed599187a188d Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Tue, 15 Oct 2013 14:57:45 -0400 Subject: bridge: Correctly clamp MAX forward_delay when enabling STP Commit be4f154d5ef0ca147ab6bcd38857a774133f5450 bridge: Clamp forward_delay when enabling STP had a typo when attempting to clamp maximum forward delay. It is possible to set bridge_forward_delay to be higher then permitted maximum when STP is off. When turning STP on, the higher then allowed delay has to be clamed down to max value. CC: Herbert Xu CC: Stephen Hemminger Signed-off-by: Vlad Yasevich Reviewed-by: Veaceslav Falico Acked-by: Herbert Xu Signed-off-by: David S. Miller --- net/bridge/br_stp_if.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/bridge') diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c index 108084a04671..656a6f3e40de 100644 --- a/net/bridge/br_stp_if.c +++ b/net/bridge/br_stp_if.c @@ -134,7 +134,7 @@ static void br_stp_start(struct net_bridge *br) if (br->bridge_forward_delay < BR_MIN_FORWARD_DELAY) __br_set_forward_delay(br, BR_MIN_FORWARD_DELAY); - else if (br->bridge_forward_delay < BR_MAX_FORWARD_DELAY) + else if (br->bridge_forward_delay > BR_MAX_FORWARD_DELAY) __br_set_forward_delay(br, BR_MAX_FORWARD_DELAY); if (r == 0) { -- cgit v1.2.3 From 8adff41c3d259eb5e313b7b04669eee545925154 Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Wed, 16 Oct 2013 17:07:13 +0900 Subject: bridge: Don't use VID 0 and 4095 in vlan filtering IEEE 802.1Q says that: - VID 0 shall not be configured as a PVID, or configured in any Filtering Database entry. - VID 4095 shall not be configured as a PVID, or transmitted in a tag header. This VID value may be used to indicate a wildcard match for the VID in management operations or Filtering Database entries. (See IEEE 802.1Q-2011 6.9.1 and Table 9-2) Don't accept adding these VIDs in the vlan_filtering implementation. Signed-off-by: Toshiaki Makita Reviewed-by: Vlad Yasevich Acked-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/bridge/br_fdb.c | 4 +- net/bridge/br_netlink.c | 2 +- net/bridge/br_vlan.c | 97 +++++++++++++++++++++++-------------------------- 3 files changed, 49 insertions(+), 54 deletions(-) (limited to 'net/bridge') diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index ffd5874f2592..33e8f23acddd 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -700,7 +700,7 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], vid = nla_get_u16(tb[NDA_VLAN]); - if (vid >= VLAN_N_VID) { + if (!vid || vid >= VLAN_VID_MASK) { pr_info("bridge: RTM_NEWNEIGH with invalid vlan id %d\n", vid); return -EINVAL; @@ -794,7 +794,7 @@ int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], vid = nla_get_u16(tb[NDA_VLAN]); - if (vid >= VLAN_N_VID) { + if (!vid || vid >= VLAN_VID_MASK) { pr_info("bridge: RTM_NEWNEIGH with invalid vlan id %d\n", vid); return -EINVAL; diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index e74ddc1c29a8..f75d92e4f96b 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -243,7 +243,7 @@ static int br_afspec(struct net_bridge *br, vinfo = nla_data(tb[IFLA_BRIDGE_VLAN_INFO]); - if (vinfo->vid >= VLAN_N_VID) + if (!vinfo->vid || vinfo->vid >= VLAN_VID_MASK) return -EINVAL; switch (cmd) { diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 9a9ffe7e4019..21b6d217872b 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -45,37 +45,34 @@ static int __vlan_add(struct net_port_vlans *v, u16 vid, u16 flags) return 0; } - if (vid) { - if (v->port_idx) { - p = v->parent.port; - br = p->br; - dev = p->dev; - } else { - br = v->parent.br; - dev = br->dev; - } - ops = dev->netdev_ops; - - if (p && (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)) { - /* Add VLAN to the device filter if it is supported. - * Stricly speaking, this is not necessary now, since - * devices are made promiscuous by the bridge, but if - * that ever changes this code will allow tagged - * traffic to enter the bridge. - */ - err = ops->ndo_vlan_rx_add_vid(dev, htons(ETH_P_8021Q), - vid); - if (err) - return err; - } - - err = br_fdb_insert(br, p, dev->dev_addr, vid); - if (err) { - br_err(br, "failed insert local address into bridge " - "forwarding table\n"); - goto out_filt; - } + if (v->port_idx) { + p = v->parent.port; + br = p->br; + dev = p->dev; + } else { + br = v->parent.br; + dev = br->dev; + } + ops = dev->netdev_ops; + + if (p && (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)) { + /* Add VLAN to the device filter if it is supported. + * Stricly speaking, this is not necessary now, since + * devices are made promiscuous by the bridge, but if + * that ever changes this code will allow tagged + * traffic to enter the bridge. + */ + err = ops->ndo_vlan_rx_add_vid(dev, htons(ETH_P_8021Q), + vid); + if (err) + return err; + } + err = br_fdb_insert(br, p, dev->dev_addr, vid); + if (err) { + br_err(br, "failed insert local address into bridge " + "forwarding table\n"); + goto out_filt; } set_bit(vid, v->vlan_bitmap); @@ -98,7 +95,7 @@ static int __vlan_del(struct net_port_vlans *v, u16 vid) __vlan_delete_pvid(v, vid); clear_bit(vid, v->untagged_bitmap); - if (v->port_idx && vid) { + if (v->port_idx) { struct net_device *dev = v->parent.port->dev; const struct net_device_ops *ops = dev->netdev_ops; @@ -248,7 +245,9 @@ bool br_allowed_egress(struct net_bridge *br, return false; } -/* Must be protected by RTNL */ +/* Must be protected by RTNL. + * Must be called with vid in range from 1 to 4094 inclusive. + */ int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags) { struct net_port_vlans *pv = NULL; @@ -278,7 +277,9 @@ out: return err; } -/* Must be protected by RTNL */ +/* Must be protected by RTNL. + * Must be called with vid in range from 1 to 4094 inclusive. + */ int br_vlan_delete(struct net_bridge *br, u16 vid) { struct net_port_vlans *pv; @@ -289,14 +290,9 @@ int br_vlan_delete(struct net_bridge *br, u16 vid) if (!pv) return -EINVAL; - if (vid) { - /* If the VID !=0 remove fdb for this vid. VID 0 is special - * in that it's the default and is always there in the fdb. - */ - spin_lock_bh(&br->hash_lock); - fdb_delete_by_addr(br, br->dev->dev_addr, vid); - spin_unlock_bh(&br->hash_lock); - } + spin_lock_bh(&br->hash_lock); + fdb_delete_by_addr(br, br->dev->dev_addr, vid); + spin_unlock_bh(&br->hash_lock); __vlan_del(pv, vid); return 0; @@ -329,7 +325,9 @@ unlock: return 0; } -/* Must be protected by RTNL */ +/* Must be protected by RTNL. + * Must be called with vid in range from 1 to 4094 inclusive. + */ int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags) { struct net_port_vlans *pv = NULL; @@ -363,7 +361,9 @@ clean_up: return err; } -/* Must be protected by RTNL */ +/* Must be protected by RTNL. + * Must be called with vid in range from 1 to 4094 inclusive. + */ int nbp_vlan_delete(struct net_bridge_port *port, u16 vid) { struct net_port_vlans *pv; @@ -374,14 +374,9 @@ int nbp_vlan_delete(struct net_bridge_port *port, u16 vid) if (!pv) return -EINVAL; - if (vid) { - /* If the VID !=0 remove fdb for this vid. VID 0 is special - * in that it's the default and is always there in the fdb. - */ - spin_lock_bh(&port->br->hash_lock); - fdb_delete_by_addr(port->br, port->dev->dev_addr, vid); - spin_unlock_bh(&port->br->hash_lock); - } + spin_lock_bh(&port->br->hash_lock); + fdb_delete_by_addr(port->br, port->dev->dev_addr, vid); + spin_unlock_bh(&port->br->hash_lock); return __vlan_del(pv, vid); } -- cgit v1.2.3 From b90356ce17c2b199cd55530cb9c3cfabe18dbdc3 Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Wed, 16 Oct 2013 17:07:14 +0900 Subject: bridge: Apply the PVID to priority-tagged frames IEEE 802.1Q says that when we receive priority-tagged (VID 0) frames use the PVID for the port as its VID. (See IEEE 802.1Q-2011 6.9.1 and Table 9-2) Apply the PVID to not only untagged frames but also priority-tagged frames. Signed-off-by: Toshiaki Makita Reviewed-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/bridge/br_vlan.c | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) (limited to 'net/bridge') diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 21b6d217872b..5a9c44a0c306 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -189,6 +189,8 @@ out: bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v, struct sk_buff *skb, u16 *vid) { + int err; + /* If VLAN filtering is disabled on the bridge, all packets are * permitted. */ @@ -201,20 +203,31 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v, if (!v) return false; - if (br_vlan_get_tag(skb, vid)) { + err = br_vlan_get_tag(skb, vid); + if (!*vid) { u16 pvid = br_get_pvid(v); - /* Frame did not have a tag. See if pvid is set - * on this port. That tells us which vlan untagged - * traffic belongs to. + /* Frame had a tag with VID 0 or did not have a tag. + * See if pvid is set on this port. That tells us which + * vlan untagged or priority-tagged traffic belongs to. */ if (pvid == VLAN_N_VID) return false; - /* PVID is set on this port. Any untagged ingress - * frame is considered to belong to this vlan. + /* PVID is set on this port. Any untagged or priority-tagged + * ingress frame is considered to belong to this vlan. */ - __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), pvid); + if (likely(err)) + /* Untagged Frame. */ + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), pvid); + else + /* Priority-tagged Frame. + * At this point, We know that skb->vlan_tci had + * VLAN_TAG_PRESENT bit and its VID field was 0x000. + * We update only VID field and preserve PCP field. + */ + skb->vlan_tci |= pvid; + return true; } -- cgit v1.2.3 From d1c6c708c4da9d104e0b7c116654cb449bff9b5f Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Wed, 16 Oct 2013 17:07:15 +0900 Subject: bridge: Fix the way the PVID is referenced We are using the VLAN_TAG_PRESENT bit to detect whether the PVID is set or not at br_get_pvid(), while we don't care about the bit in adding/deleting the PVID, which makes it impossible to forward any incomming untagged frame with vlan_filtering enabled. Since vid 0 cannot be used for the PVID, we can use vid 0 to indicate that the PVID is not set, which is slightly more efficient than using the VLAN_TAG_PRESENT. Fix the problem by getting rid of using the VLAN_TAG_PRESENT. Signed-off-by: Toshiaki Makita Reviewed-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/bridge/br_private.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net/bridge') diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index efb57d911569..7ca2ae45b2d5 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -643,9 +643,7 @@ static inline u16 br_get_pvid(const struct net_port_vlans *v) * vid wasn't set */ smp_rmb(); - return (v->pvid & VLAN_TAG_PRESENT) ? - (v->pvid & ~VLAN_TAG_PRESENT) : - VLAN_N_VID; + return v->pvid ?: VLAN_N_VID; } #else -- cgit v1.2.3 From dfb5fa32c66496a53ec6a45302d902416b51ade2 Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Wed, 16 Oct 2013 17:07:16 +0900 Subject: bridge: Fix updating FDB entries when the PVID is applied We currently set the value that variable vid is pointing, which will be used in FDB later, to 0 at br_allowed_ingress() when we receive untagged or priority-tagged frames, even though the PVID is valid. This leads to FDB updates in such a wrong way that they are learned with VID 0. Update the value to that of PVID if the PVID is applied. Signed-off-by: Toshiaki Makita Reviewed-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/bridge/br_vlan.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/bridge') diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 5a9c44a0c306..53f0990eab58 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -217,6 +217,7 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v, /* PVID is set on this port. Any untagged or priority-tagged * ingress frame is considered to belong to this vlan. */ + *vid = pvid; if (likely(err)) /* Untagged Frame. */ __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), pvid); -- cgit v1.2.3 From 454594f3b93a49ef568cd190c5af31376b105a7b Mon Sep 17 00:00:00 2001 From: Linus Lüssing Date: Sun, 20 Oct 2013 00:58:57 +0200 Subject: Revert "bridge: only expire the mdb entry when query is received" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While this commit was a good attempt to fix issues occuring when no multicast querier is present, this commit still has two more issues: 1) There are cases where mdb entries do not expire even if there is a querier present. The bridge will unnecessarily continue flooding multicast packets on the according ports. 2) Never removing an mdb entry could be exploited for a Denial of Service by an attacker on the local link, slowly, but steadily eating up all memory. Actually, this commit became obsolete with "bridge: disable snooping if there is no querier" (b00589af3b) which included fixes for a few more cases. Therefore reverting the following commits (the commit stated in the commit message plus three of its follow up fixes): ==================== Revert "bridge: update mdb expiration timer upon reports." This reverts commit f144febd93d5ee534fdf23505ab091b2b9088edc. Revert "bridge: do not call setup_timer() multiple times" This reverts commit 1faabf2aab1fdaa1ace4e8c829d1b9cf7bfec2f1. Revert "bridge: fix some kernel warning in multicast timer" This reverts commit c7e8e8a8f7a70b343ca1e0f90a31e35ab2d16de1. Revert "bridge: only expire the mdb entry when query is received" This reverts commit 9f00b2e7cf241fa389733d41b615efdaa2cb0f5b. ==================== CC: Cong Wang Signed-off-by: Linus Lüssing Reviewed-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/bridge/br_mdb.c | 2 +- net/bridge/br_multicast.c | 47 +++++++++++++++++++++++++++-------------------- net/bridge/br_private.h | 1 - 3 files changed, 28 insertions(+), 22 deletions(-) (limited to 'net/bridge') diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 85a09bb5ca51..b7b1914dfa25 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -453,7 +453,7 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry) call_rcu_bh(&p->rcu, br_multicast_free_pg); err = 0; - if (!mp->ports && !mp->mglist && mp->timer_armed && + if (!mp->ports && !mp->mglist && netif_running(br->dev)) mod_timer(&mp->timer, jiffies); break; diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 1085f2180f3a..8b0b610ca2c9 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -272,7 +272,7 @@ static void br_multicast_del_pg(struct net_bridge *br, del_timer(&p->timer); call_rcu_bh(&p->rcu, br_multicast_free_pg); - if (!mp->ports && !mp->mglist && mp->timer_armed && + if (!mp->ports && !mp->mglist && netif_running(br->dev)) mod_timer(&mp->timer, jiffies); @@ -611,9 +611,6 @@ rehash: break; default: - /* If we have an existing entry, update it's expire timer */ - mod_timer(&mp->timer, - jiffies + br->multicast_membership_interval); goto out; } @@ -623,7 +620,6 @@ rehash: mp->br = br; mp->addr = *group; - setup_timer(&mp->timer, br_multicast_group_expired, (unsigned long)mp); @@ -663,6 +659,7 @@ static int br_multicast_add_group(struct net_bridge *br, struct net_bridge_mdb_entry *mp; struct net_bridge_port_group *p; struct net_bridge_port_group __rcu **pp; + unsigned long now = jiffies; int err; spin_lock(&br->multicast_lock); @@ -677,18 +674,15 @@ static int br_multicast_add_group(struct net_bridge *br, if (!port) { mp->mglist = true; + mod_timer(&mp->timer, now + br->multicast_membership_interval); goto out; } for (pp = &mp->ports; (p = mlock_dereference(*pp, br)) != NULL; pp = &p->next) { - if (p->port == port) { - /* We already have a portgroup, update the timer. */ - mod_timer(&p->timer, - jiffies + br->multicast_membership_interval); - goto out; - } + if (p->port == port) + goto found; if ((unsigned long)p->port < (unsigned long)port) break; } @@ -699,6 +693,8 @@ static int br_multicast_add_group(struct net_bridge *br, rcu_assign_pointer(*pp, p); br_mdb_notify(br->dev, port, group, RTM_NEWMDB); +found: + mod_timer(&p->timer, now + br->multicast_membership_interval); out: err = 0; @@ -1198,9 +1194,6 @@ static int br_ip4_multicast_query(struct net_bridge *br, if (!mp) goto out; - mod_timer(&mp->timer, now + br->multicast_membership_interval); - mp->timer_armed = true; - max_delay *= br->multicast_last_member_count; if (mp->mglist && @@ -1277,9 +1270,6 @@ static int br_ip6_multicast_query(struct net_bridge *br, if (!mp) goto out; - mod_timer(&mp->timer, now + br->multicast_membership_interval); - mp->timer_armed = true; - max_delay *= br->multicast_last_member_count; if (mp->mglist && (timer_pending(&mp->timer) ? @@ -1365,7 +1355,7 @@ static void br_multicast_leave_group(struct net_bridge *br, call_rcu_bh(&p->rcu, br_multicast_free_pg); br_mdb_notify(br->dev, port, group, RTM_DELMDB); - if (!mp->ports && !mp->mglist && mp->timer_armed && + if (!mp->ports && !mp->mglist && netif_running(br->dev)) mod_timer(&mp->timer, jiffies); } @@ -1377,12 +1367,30 @@ static void br_multicast_leave_group(struct net_bridge *br, br->multicast_last_member_interval; if (!port) { - if (mp->mglist && mp->timer_armed && + if (mp->mglist && (timer_pending(&mp->timer) ? time_after(mp->timer.expires, time) : try_to_del_timer_sync(&mp->timer) >= 0)) { mod_timer(&mp->timer, time); } + + goto out; + } + + for (p = mlock_dereference(mp->ports, br); + p != NULL; + p = mlock_dereference(p->next, br)) { + if (p->port != port) + continue; + + if (!hlist_unhashed(&p->mglist) && + (timer_pending(&p->timer) ? + time_after(p->timer.expires, time) : + try_to_del_timer_sync(&p->timer) >= 0)) { + mod_timer(&p->timer, time); + } + + break; } out: spin_unlock(&br->multicast_lock); @@ -1805,7 +1813,6 @@ void br_multicast_stop(struct net_bridge *br) hlist_for_each_entry_safe(mp, n, &mdb->mhash[i], hlist[ver]) { del_timer(&mp->timer); - mp->timer_armed = false; call_rcu_bh(&mp->rcu, br_multicast_free_group); } } diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 7ca2ae45b2d5..e14c33b42f75 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -126,7 +126,6 @@ struct net_bridge_mdb_entry struct timer_list timer; struct br_ip addr; bool mglist; - bool timer_armed; }; struct net_bridge_mdb_htable -- cgit v1.2.3