From 8617b093d0031837a7be9b32bc674580cfb5f6b5 Mon Sep 17 00:00:00 2001
From: Mohammed Shafi Shajakhan <mohammed@qca.qualcomm.com>
Date: Mon, 20 Feb 2012 10:05:31 +0530
Subject: mac80211: zero initialize count field in ieee80211_tx_rate

rate control algorithms concludes the rate as invalid
with rate[i].idx < -1 , while they do also check for rate[i].count is
non-zero. it would be safer to zero initialize the 'count' field.
recently we had a ath9k rate control crash where the ath9k rate control
in ath_tx_status assumed to check only for rate[i].count being non-zero
in one instance and ended up in using invalid rate index for
'connection monitoring NULL func frames' which eventually lead to the crash.
thanks to Pavel Roskin for fixing it and finding the root cause.
https://bugzilla.redhat.com/show_bug.cgi?id=768639

Cc: stable@vger.kernel.org
Cc: Pavel Roskin <proski@gnu.org>
Signed-off-by: Mohammed Shafi Shajakhan <mohammed@qca.qualcomm.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rate.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')
diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index ad64f4d5271a..f9b8e819ca63 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -344,7 +344,7 @@ void rate_control_get_rate(struct ieee80211_sub_if_data *sdata,
 	for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) {
 		info->control.rates[i].idx = -1;
 		info->control.rates[i].flags = 0;
-		info->control.rates[i].count = 1;
+		info->control.rates[i].count = 0;
 	}
 
 	if (sdata->local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL)
-- 
cgit v1.2.3


From 0dee00686da88cc32753e02e4e8efdd9e326e4d2 Mon Sep 17 00:00:00 2001
From: Mohammed Shafi Shajakhan <mohammed@qca.qualcomm.com>
Date: Mon, 20 Feb 2012 14:44:01 +0530
Subject: mac80211: Fix a warning on changing to monitor mode from STA

nothing needs to be done for monitor/AP_VLAN mode on calling
ieee80211_bss_info_change_notify -> drv_bss_info_changed with the change
flag 'BSS_CHANGED_IDLE'. 'wl1271' seems to use BSS_CHANGED_IDLE only for
STA and IBSS mode. further the non-idle state of the monitor mode is
taken care by the 'count' variable which counts non-idle interfaces.
ieee80211_idle_off(local, "in use") will be called.
this fixes the following WARNING when we have initially STA mode
(network manager running) and not associated, and change it to monitor
mode with network manager disabled and bringing up the monitor mode.
this changes the idle state from 'true' (STA unassociated) to 'false'
(MONITOR mode)
exposed by the commit 405385f8ce7a2ed8f82e216d88b5282142e1288b
"mac80211: set bss_conf.idle when vif is connected"

	WARNING: net/mac80211/main.c:212
	ieee80211_bss_info_change_notify+0x1cf/0x330 [mac80211]()
	Hardware name: 64756D6
	Pid: 3835, comm: ifconfig Tainted: G           O
	3.3.0-rc3-wl #9
	Call Trace:
	  [<c0133b02>] warn_slowpath_common+0x72/0xa0
	  [<fc8e8c3f>] ?
	  ieee80211_bss_info_change_notify+0x1cf/0x330 [mac80211]
	  [<fc8e8c3f>] ?
	  ieee80211_bss_info_change_notify+0x1cf/0x330 [mac80211]
	  [<c0133b52>] warn_slowpath_null+0x22/0x30
	  [<fc8e8c3f>]
	  ieee80211_bss_info_change_notify+0x1cf/0x330 [mac80211]
	  [<fc8f9de3>] __ieee80211_recalc_idle+0x113/0x430
	  [mac80211]
	  [<fc8fabc6>] ieee80211_do_open+0x156/0x7e0 [mac80211]
	  [<fc8f8a25>] ?
	  ieee80211_check_concurrent_iface+0x25/0x180 [mac80211]
	  [<c015dd9f>] ? raw_notifier_call_chain+0x1f/0x30
	  [<fc8fb290>] ieee80211_open+0x40/0x80 [mac80211]
	  [<c05894f6>] __dev_open+0x96/0xe0
	  [<c068fba5>] ? _raw_spin_unlock_bh+0x35/0x40
	  [<c05881d9>] __dev_change_flags+0x109/0x170
	  [<c0589423>] dev_change_flags+0x23/0x60
	  [<c05f3770>] devinet_ioctl+0x6a0/0x770

 ieee80211 phy0: device no longer idle - in use

Cc: Eliad Peller <eliad@wizery.com>
Signed-off-by: Mohammed Shafi Shajakhan <mohammed@qca.qualcomm.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/iface.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 01a21c2f6ab3..8e2137bd87e2 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -1332,6 +1332,9 @@ u32 __ieee80211_recalc_idle(struct ieee80211_local *local)
 		hw_roc = true;
 
 	list_for_each_entry(sdata, &local->interfaces, list) {
+		if (sdata->vif.type == NL80211_IFTYPE_MONITOR ||
+		    sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
+			continue;
 		if (sdata->old_idle == sdata->vif.bss_conf.idle)
 			continue;
 		if (!ieee80211_sdata_running(sdata))
-- 
cgit v1.2.3


From e899b1119f1428f2b04dd7e9dba94864c33dd30b Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 22 Feb 2012 16:14:32 +0100
Subject: netfilter: bridge: fix module autoload in compat case

We expected 0 if module doesn't exist, which is no longer the case
(42046e2e45c109ba703993c510401a11f716c8df,
netfilter: x_tables: return -ENOENT for non-existant matches/targets).

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/bridge/netfilter/ebtables.c | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 5864cc491369..8aa4ad0e06af 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -1893,10 +1893,7 @@ static int compat_mtw_from_user(struct compat_ebt_entry_mwt *mwt,
 
 	switch (compat_mwt) {
 	case EBT_COMPAT_MATCH:
-		match = try_then_request_module(xt_find_match(NFPROTO_BRIDGE,
-						name, 0), "ebt_%s", name);
-		if (match == NULL)
-			return -ENOENT;
+		match = xt_request_find_match(NFPROTO_BRIDGE, name, 0);
 		if (IS_ERR(match))
 			return PTR_ERR(match);
 
@@ -1915,10 +1912,7 @@ static int compat_mtw_from_user(struct compat_ebt_entry_mwt *mwt,
 		break;
 	case EBT_COMPAT_WATCHER: /* fallthrough */
 	case EBT_COMPAT_TARGET:
-		wt = try_then_request_module(xt_find_target(NFPROTO_BRIDGE,
-						name, 0), "ebt_%s", name);
-		if (wt == NULL)
-			return -ENOENT;
+		wt = xt_request_find_target(NFPROTO_BRIDGE, name, 0);
 		if (IS_ERR(wt))
 			return PTR_ERR(wt);
 		off = xt_compat_target_offset(wt);
-- 
cgit v1.2.3


From 4c90d3b30334833450ccbb02f452d4972a3c3c3f Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Sun, 26 Feb 2012 10:06:19 +0000
Subject: tcp: fix false reordering signal in tcp_shifted_skb

When tcp_shifted_skb() shifts bytes from the skb that is currently
pointed to by 'highest_sack' then the increment of
TCP_SKB_CB(skb)->seq implicitly advances tcp_highest_sack_seq(). This
implicit advancement, combined with the recent fix to pass the correct
SACKed range into tcp_sacktag_one(), caused tcp_sacktag_one() to think
that the newly SACKed range was before the tcp_highest_sack_seq(),
leading to a call to tcp_update_reordering() with a degree of
reordering matching the size of the newly SACKed range (typically just
1 packet, which is a NOP, but potentially larger).

This commit fixes this by simply calling tcp_sacktag_one() before the
TCP_SKB_CB(skb)->seq advancement that can advance our notion of the
highest SACKed sequence.

Correspondingly, we can simplify the code a little now that
tcp_shifted_skb() should update the lost_cnt_hint in all cases where
skb == tp->lost_skb_hint.

Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 53c8ce4046b2..ee42d42b2f45 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1403,8 +1403,16 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
 
 	BUG_ON(!pcount);
 
-	/* Adjust hint for FACK. Non-FACK is handled in tcp_sacktag_one(). */
-	if (tcp_is_fack(tp) && (skb == tp->lost_skb_hint))
+	/* Adjust counters and hints for the newly sacked sequence
+	 * range but discard the return value since prev is already
+	 * marked. We must tag the range first because the seq
+	 * advancement below implicitly advances
+	 * tcp_highest_sack_seq() when skb is highest_sack.
+	 */
+	tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked,
+			start_seq, end_seq, dup_sack, pcount);
+
+	if (skb == tp->lost_skb_hint)
 		tp->lost_cnt_hint += pcount;
 
 	TCP_SKB_CB(prev)->end_seq += shifted;
@@ -1430,12 +1438,6 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
 		skb_shinfo(skb)->gso_type = 0;
 	}
 
-	/* Adjust counters and hints for the newly sacked sequence range but
-	 * discard the return value since prev is already marked.
-	 */
-	tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked,
-			start_seq, end_seq, dup_sack, pcount);
-
 	/* Difference in this won't matter, both ACKed by the same cumul. ACK */
 	TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS);
 
-- 
cgit v1.2.3


From c0638c247f559e1a16ee79e54df14bca2cb679ea Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Fri, 2 Mar 2012 21:36:51 +0000
Subject: tcp: don't fragment SACKed skbs in tcp_mark_head_lost()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In tcp_mark_head_lost() we should not attempt to fragment a SACKed skb
to mark the first portion as lost. This is for two primary reasons:

(1) tcp_shifted_skb() coalesces adjacent regions of SACKed skbs. When
doing this, it preserves the sum of their packet counts in order to
reflect the real-world dynamics on the wire. But given that skbs can
have remainders that do not align to MSS boundaries, this packet count
preservation means that for SACKed skbs there is not necessarily a
direct linear relationship between tcp_skb_pcount(skb) and
skb->len. Thus tcp_mark_head_lost()'s previous attempts to fragment
off and mark as lost a prefix of length (packets - oldcnt)*mss from
SACKed skbs were leading to occasional failures of the WARN_ON(len >
skb->len) in tcp_fragment() (which used to be a BUG_ON(); see the
recent "crash in tcp_fragment" thread on netdev).

(2) there is no real point in fragmenting off part of a SACKed skb and
calling tcp_skb_mark_lost() on it, since tcp_skb_mark_lost() is a NOP
for SACKed skbs.

Signed-off-by: Neal Cardwell <ncardwell@google.com>
Acked-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Acked-by: Yuchung Cheng <ycheng@google.com>
Acked-by: Nandita Dukkipati <nanditad@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index ee42d42b2f45..d9b83d198c3d 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2569,6 +2569,7 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
 
 		if (cnt > packets) {
 			if ((tcp_is_sack(tp) && !tcp_is_fack(tp)) ||
+			    (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) ||
 			    (oldcnt >= packets))
 				break;
 
-- 
cgit v1.2.3


From aaca735f4f188641c4786af8f20ed39fcce3809c Mon Sep 17 00:00:00 2001
From: Joakim Tjernlund <joakim.tjernlund@transmode.se>
Date: Thu, 1 Mar 2012 08:12:18 +0000
Subject: bridge: Adjust min age inc for HZ > 256

min age increment needs to round up its min age tick for all
HZ values to guarantee message age is increasing.

Signed-off-by: Joakim Tjernlund <Joakim.Tjernlund@transmode.se>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_stp.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index dd147d78a588..c9ef3db737d9 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -17,9 +17,9 @@
 #include "br_private_stp.h"
 
 /* since time values in bpdu are in jiffies and then scaled (1/256)
- * before sending, make sure that is at least one.
+ * before sending, make sure that is at least one STP tick.
  */
-#define MESSAGE_AGE_INCR	((HZ < 256) ? 1 : (HZ/256))
+#define MESSAGE_AGE_INCR	((HZ / 256) + 1)
 
 static const char *const br_port_state_names[] = {
 	[BR_STATE_DISABLED] = "disabled",
-- 
cgit v1.2.3


From 709e1b5cd9e1915ad4f6c470ebf6b55d4a911d8c Mon Sep 17 00:00:00 2001
From: Joakim Tjernlund <joakim.tjernlund@transmode.se>
Date: Thu, 1 Mar 2012 08:12:19 +0000
Subject: bridge: message age needs to increase, not decrease.

commit bridge: send proper message_age in config BPDU
added this gem:
  bpdu.message_age = (jiffies - root->designated_age)
  p->designated_age = jiffies + bpdu->message_age;
Notice how bpdu->message_age is negated when reassigned to
bpdu.message_age. This causes message age to decrease breaking the
STP protocol.

Signed-off-by: Joakim Tjernlund <Joakim.Tjernlund@transmode.se>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_stp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index c9ef3db737d9..6751ed4e0c07 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -186,7 +186,7 @@ static void br_record_config_information(struct net_bridge_port *p,
 	p->designated_cost = bpdu->root_path_cost;
 	p->designated_bridge = bpdu->bridge_id;
 	p->designated_port = bpdu->port_id;
-	p->designated_age = jiffies + bpdu->message_age;
+	p->designated_age = jiffies - bpdu->message_age;
 
 	mod_timer(&p->message_age_timer, jiffies
 		  + (p->br->max_age - bpdu->message_age));
-- 
cgit v1.2.3


From a4b64fbe482c7766f7925f03067fc637716bfa3f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sun, 4 Mar 2012 12:32:10 +0000
Subject: rtnetlink: fix rtnl_calcit() and rtnl_dump_ifinfo()

nlmsg_parse() might return an error, so test its return value before
potential random memory accesses.

Errors introduced in commit 115c9b81928 (rtnetlink: Fix problem with
buffer allocation)

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Greg Rose <gregory.v.rose@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 606a6e8f3671..f965dce6f20f 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1060,11 +1060,12 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 	rcu_read_lock();
 	cb->seq = net->dev_base_seq;
 
-	nlmsg_parse(cb->nlh, sizeof(struct rtgenmsg), tb, IFLA_MAX,
-		    ifla_policy);
+	if (nlmsg_parse(cb->nlh, sizeof(struct rtgenmsg), tb, IFLA_MAX,
+			ifla_policy) >= 0) {
 
-	if (tb[IFLA_EXT_MASK])
-		ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
+		if (tb[IFLA_EXT_MASK])
+			ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
+	}
 
 	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
 		idx = 0;
@@ -1900,10 +1901,11 @@ static u16 rtnl_calcit(struct sk_buff *skb, struct nlmsghdr *nlh)
 	u32 ext_filter_mask = 0;
 	u16 min_ifinfo_dump_size = 0;
 
-	nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, IFLA_MAX, ifla_policy);
-
-	if (tb[IFLA_EXT_MASK])
-		ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
+	if (nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, IFLA_MAX,
+			ifla_policy) >= 0) {
+		if (tb[IFLA_EXT_MASK])
+			ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
+	}
 
 	if (!ext_filter_mask)
 		return NLMSG_GOODSIZE;
-- 
cgit v1.2.3


From d1d81d4c3dd886d5fa25a2c4fa1e39cb89613712 Mon Sep 17 00:00:00 2001
From: Ulrich Weber <ulrich.weber@sophos.com>
Date: Mon, 5 Mar 2012 04:52:44 +0000
Subject: bridge: check return value of ipv6_dev_get_saddr()

otherwise source IPv6 address of ICMPV6_MGM_QUERY packet
might be random junk if IPv6 is disabled on interface or
link-local address is not yet ready (DAD).

Signed-off-by: Ulrich Weber <ulrich.weber@sophos.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_multicast.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 568d5bf17534..702a1ae9220b 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -446,8 +446,11 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
 	ip6h->nexthdr = IPPROTO_HOPOPTS;
 	ip6h->hop_limit = 1;
 	ipv6_addr_set(&ip6h->daddr, htonl(0xff020000), 0, 0, htonl(1));
-	ipv6_dev_get_saddr(dev_net(br->dev), br->dev, &ip6h->daddr, 0,
-			   &ip6h->saddr);
+	if (ipv6_dev_get_saddr(dev_net(br->dev), br->dev, &ip6h->daddr, 0,
+			       &ip6h->saddr)) {
+		kfree_skb(skb);
+		return NULL;
+	}
 	ipv6_eth_mc_map(&ip6h->daddr, eth->h_dest);
 
 	hopopt = (u8 *)(ip6h + 1);
-- 
cgit v1.2.3


From 4648dc97af9d496218a05353b0e442b3dfa6aaab Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Mon, 5 Mar 2012 19:35:04 +0000
Subject: tcp: fix tcp_shift_skb_data() to not shift SACKed data below snd_una

This commit fixes tcp_shift_skb_data() so that it does not shift
SACKed data below snd_una.

This fixes an issue whose symptoms exactly match reports showing
tp->sacked_out going negative since 3.3.0-rc4 (see "WARNING: at
net/ipv4/tcp_input.c:3418" thread on netdev).

Since 2008 (832d11c5cd076abc0aa1eaf7be96c81d1a59ce41)
tcp_shift_skb_data() had been shifting SACKed ranges that were below
snd_una. It checked that the *end* of the skb it was about to shift
from was above snd_una, but did not check that the end of the actual
shifted range was above snd_una; this commit adds that check.

Shifting SACKed ranges below snd_una is problematic because for such
ranges tcp_sacktag_one() short-circuits: it does not declare anything
as SACKed and does not increase sacked_out.

Before the fixes in commits cc9a672ee522d4805495b98680f4a3db5d0a0af9
and daef52bab1fd26e24e8e9578f8fb33ba1d0cb412, shifting SACKed ranges
below snd_una happened to work because tcp_shifted_skb() was always
(incorrectly) passing in to tcp_sacktag_one() an skb whose end_seq
tcp_shift_skb_data() had already guaranteed was beyond snd_una. Hence
tcp_sacktag_one() never short-circuited and always increased
tp->sacked_out in this case.

After those two fixes, my testing has verified that shifting SACKed
ranges below snd_una could cause tp->sacked_out to go negative with
the following sequence of events:

(1) tcp_shift_skb_data() sees an skb whose end_seq is beyond snd_una,
    then shifts a prefix of that skb that is below snd_una

(2) tcp_shifted_skb() increments the packet count of the
    already-SACKed prev sk_buff

(3) tcp_sacktag_one() sees the end of the new SACKed range is below
    snd_una, so it short-circuits and doesn't increase tp->sacked_out

(5) tcp_clean_rtx_queue() sees the SACKed skb has been ACKed,
    decrements tp->sacked_out by this "inflated" pcount that was
    missing a matching increase in tp->sacked_out, and hence
    tp->sacked_out underflows to a u32 like 0xFFFFFFFF, which casted
    to s32 is negative.

(6) this leads to the warnings seen in the recent "WARNING: at
    net/ipv4/tcp_input.c:3418" thread on the netdev list; e.g.:
    tcp_input.c:3418  WARN_ON((int)tp->sacked_out < 0);

More generally, I think this bug can be tickled in some cases where
two or more ACKs from the receiver are lost and then a DSACK arrives
that is immediately above an existing SACKed skb in the write queue.

This fix changes tcp_shift_skb_data() to abort this sequence at step
(1) in the scenario above by noticing that the bytes are below snd_una
and not shifting them.

Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index d9b83d198c3d..b5e315f13641 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1585,6 +1585,10 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
 		}
 	}
 
+	/* tcp_sacktag_one() won't SACK-tag ranges below snd_una */
+	if (!after(TCP_SKB_CB(skb)->seq + len, tp->snd_una))
+		goto fallback;
+
 	if (!skb_shift(prev, skb, len))
 		goto fallback;
 	if (!tcp_shifted_skb(sk, skb, state, pcount, len, mss, dup_sack))
-- 
cgit v1.2.3


From 848edc69192a38bf9d261032f248b14f47e6af8b Mon Sep 17 00:00:00 2001
From: Santosh Nayak <santoshprasadnayak@gmail.com>
Date: Tue, 6 Mar 2012 01:22:50 +0000
Subject: netfilter: ebtables: fix wrong name length while copying to
 user-space

user-space ebtables expects 32 bytes-long names, but xt_match names
use 29 bytes. We have to copy less 29 bytes and then, make sure we
fill the remaining bytes with zeroes.

Signed-off-by: Santosh Nayak <santoshprasadnayak@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/netfilter/ebtables.c | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 8aa4ad0e06af..15e9575f7207 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -1335,7 +1335,12 @@ static inline int ebt_make_matchname(const struct ebt_entry_match *m,
     const char *base, char __user *ubase)
 {
 	char __user *hlp = ubase + ((char *)m - base);
-	if (copy_to_user(hlp, m->u.match->name, EBT_FUNCTION_MAXNAMELEN))
+	char name[EBT_FUNCTION_MAXNAMELEN] = {};
+
+	/* ebtables expects 32 bytes long names but xt_match names are 29 bytes
+	   long. Copy 29 bytes and fill remaining bytes with zeroes. */
+	strncpy(name, m->u.match->name, sizeof(name));
+	if (copy_to_user(hlp, name, EBT_FUNCTION_MAXNAMELEN))
 		return -EFAULT;
 	return 0;
 }
@@ -1344,7 +1349,10 @@ static inline int ebt_make_watchername(const struct ebt_entry_watcher *w,
     const char *base, char __user *ubase)
 {
 	char __user *hlp = ubase + ((char *)w - base);
-	if (copy_to_user(hlp , w->u.watcher->name, EBT_FUNCTION_MAXNAMELEN))
+	char name[EBT_FUNCTION_MAXNAMELEN] = {};
+
+	strncpy(name, w->u.watcher->name, sizeof(name));
+	if (copy_to_user(hlp , name, EBT_FUNCTION_MAXNAMELEN))
 		return -EFAULT;
 	return 0;
 }
@@ -1355,10 +1363,12 @@ ebt_make_names(struct ebt_entry *e, const char *base, char __user *ubase)
 	int ret;
 	char __user *hlp;
 	const struct ebt_entry_target *t;
+	char name[EBT_FUNCTION_MAXNAMELEN] = {};
 
 	if (e->bitmask == 0)
 		return 0;
 
+	strncpy(name, t->u.target->name, sizeof(name));
 	hlp = ubase + (((char *)e + e->target_offset) - base);
 	t = (struct ebt_entry_target *)(((char *)e) + e->target_offset);
 
@@ -1368,7 +1378,7 @@ ebt_make_names(struct ebt_entry *e, const char *base, char __user *ubase)
 	ret = EBT_WATCHER_ITERATE(e, ebt_make_watchername, base, ubase);
 	if (ret != 0)
 		return ret;
-	if (copy_to_user(hlp, t->u.target->name, EBT_FUNCTION_MAXNAMELEN))
+	if (copy_to_user(hlp, name, EBT_FUNCTION_MAXNAMELEN))
 		return -EFAULT;
 	return 0;
 }
-- 
cgit v1.2.3


From 8be619d1e430fd87a02587a2a6830b692cb91b84 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 6 Mar 2012 01:22:51 +0000
Subject: netfilter: ctnetlink: remove incorrect spin_[un]lock_bh on NAT module
 autoload

Since 7d367e0, ctnetlink_new_conntrack is called without holding
the nf_conntrack_lock spinlock. Thus, ctnetlink_parse_nat_setup
does not require to release that spinlock anymore in the NAT module
autoload case.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_conntrack_netlink.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 30c9d4ca0218..10687692831e 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1041,16 +1041,13 @@ ctnetlink_parse_nat_setup(struct nf_conn *ct,
 	if (!parse_nat_setup) {
 #ifdef CONFIG_MODULES
 		rcu_read_unlock();
-		spin_unlock_bh(&nf_conntrack_lock);
 		nfnl_unlock();
 		if (request_module("nf-nat-ipv4") < 0) {
 			nfnl_lock();
-			spin_lock_bh(&nf_conntrack_lock);
 			rcu_read_lock();
 			return -EOPNOTSUPP;
 		}
 		nfnl_lock();
-		spin_lock_bh(&nf_conntrack_lock);
 		rcu_read_lock();
 		if (nfnetlink_parse_nat_setup_hook)
 			return -EAGAIN;
-- 
cgit v1.2.3


From a157b9d5b5b626e46eba2ac4e342da8db25cabc4 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 6 Mar 2012 01:22:53 +0000
Subject: netfilter: bridge: fix wrong pointer dereference

In adf7ff8, a invalid dereference was added in ebt_make_names.

CC [M]  net/bridge/netfilter/ebtables.o
net/bridge/netfilter/ebtables.c: In function `ebt_make_names':
net/bridge/netfilter/ebtables.c:1371:20: warning: `t' may be used uninitialized in this function [-Wuninitialized]

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/netfilter/ebtables.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 15e9575f7207..5fe2ff3b01ef 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -1368,7 +1368,6 @@ ebt_make_names(struct ebt_entry *e, const char *base, char __user *ubase)
 	if (e->bitmask == 0)
 		return 0;
 
-	strncpy(name, t->u.target->name, sizeof(name));
 	hlp = ubase + (((char *)e + e->target_offset) - base);
 	t = (struct ebt_entry_target *)(((char *)e) + e->target_offset);
 
@@ -1378,6 +1377,7 @@ ebt_make_names(struct ebt_entry *e, const char *base, char __user *ubase)
 	ret = EBT_WATCHER_ITERATE(e, ebt_make_watchername, base, ubase);
 	if (ret != 0)
 		return ret;
+	strncpy(name, t->u.target->name, sizeof(name));
 	if (copy_to_user(hlp, name, EBT_FUNCTION_MAXNAMELEN))
 		return -EFAULT;
 	return 0;
-- 
cgit v1.2.3


From 739e4505a0e8209622dc71743bfa1c804eacf7f4 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 6 Mar 2012 01:22:54 +0000
Subject: bridge: netfilter: don't call iptables on vlan packets if sysctl is
 off

When net.bridge.bridge-nf-filter-vlan-tagged is 0 (default), vlan packets
arriving should not be sent to ip(6)tables by bridge netfilter.

However, it turns out that we currently always send VLAN packets to
netfilter, if ..
a), CONFIG_VLAN_8021Q is enabled ; or
b), CONFIG_VLAN_8021Q is not set but rx vlan offload is enabled
   on the bridge port.

This is because bridge netfilter treats skb with
skb->protocol == ETH_P_IP{V6} as "non-vlan packet".

With rx vlan offload on or CONFIG_VLAN_8021Q=y, the vlan header has
already been removed here, and we cannot rely on skb->protocol alone.

Fix this by only using skb->protocol if the skb has no vlan tag,
or if a vlan tag is present and filter-vlan-tagged bridge netfilter
sysctl is enabled.

We cannot remove the skb->protocol == htons(ETH_P_8021Q) test
because the vlan tag is still around in the CONFIG_VLAN_8021Q=n &&
"ethtool -K $itf rxvlan off" case.

reproducer:
iptables -t raw -I PREROUTING -i br0
iptables -t raw -I PREROUTING -i br0.1

Then send packets to an ip address configured on br0.1 interface.
Even with net.bridge.bridge-nf-filter-vlan-tagged=0, the 1st rule
will match instead of the 2nd one.

With this patch applied, the 2nd rule will match instead.
In the non-local address case, netfilter won't be consulted after
this patch unless the sysctl is switched on.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_netfilter.c | 32 ++++++++++++++++++--------------
 1 file changed, 18 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 84122472656c..dec4f3817133 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -62,6 +62,15 @@ static int brnf_filter_pppoe_tagged __read_mostly = 0;
 #define brnf_filter_pppoe_tagged 0
 #endif
 
+#define IS_IP(skb) \
+	(!vlan_tx_tag_present(skb) && skb->protocol == htons(ETH_P_IP))
+
+#define IS_IPV6(skb) \
+	(!vlan_tx_tag_present(skb) && skb->protocol == htons(ETH_P_IPV6))
+
+#define IS_ARP(skb) \
+	(!vlan_tx_tag_present(skb) && skb->protocol == htons(ETH_P_ARP))
+
 static inline __be16 vlan_proto(const struct sk_buff *skb)
 {
 	if (vlan_tx_tag_present(skb))
@@ -639,8 +648,7 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb,
 		return NF_DROP;
 	br = p->br;
 
-	if (skb->protocol == htons(ETH_P_IPV6) || IS_VLAN_IPV6(skb) ||
-	    IS_PPPOE_IPV6(skb)) {
+	if (IS_IPV6(skb) || IS_VLAN_IPV6(skb) || IS_PPPOE_IPV6(skb)) {
 		if (!brnf_call_ip6tables && !br->nf_call_ip6tables)
 			return NF_ACCEPT;
 
@@ -651,8 +659,7 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb,
 	if (!brnf_call_iptables && !br->nf_call_iptables)
 		return NF_ACCEPT;
 
-	if (skb->protocol != htons(ETH_P_IP) && !IS_VLAN_IP(skb) &&
-	    !IS_PPPOE_IP(skb))
+	if (!IS_IP(skb) && !IS_VLAN_IP(skb) && !IS_PPPOE_IP(skb))
 		return NF_ACCEPT;
 
 	nf_bridge_pull_encap_header_rcsum(skb);
@@ -701,7 +708,7 @@ static int br_nf_forward_finish(struct sk_buff *skb)
 	struct nf_bridge_info *nf_bridge = skb->nf_bridge;
 	struct net_device *in;
 
-	if (skb->protocol != htons(ETH_P_ARP) && !IS_VLAN_ARP(skb)) {
+	if (!IS_ARP(skb) && !IS_VLAN_ARP(skb)) {
 		in = nf_bridge->physindev;
 		if (nf_bridge->mask & BRNF_PKT_TYPE) {
 			skb->pkt_type = PACKET_OTHERHOST;
@@ -718,6 +725,7 @@ static int br_nf_forward_finish(struct sk_buff *skb)
 	return 0;
 }
 
+
 /* This is the 'purely bridged' case.  For IP, we pass the packet to
  * netfilter with indev and outdev set to the bridge device,
  * but we are still able to filter on the 'real' indev/outdev
@@ -744,11 +752,9 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff *skb,
 	if (!parent)
 		return NF_DROP;
 
-	if (skb->protocol == htons(ETH_P_IP) || IS_VLAN_IP(skb) ||
-	    IS_PPPOE_IP(skb))
+	if (IS_IP(skb) || IS_VLAN_IP(skb) || IS_PPPOE_IP(skb))
 		pf = PF_INET;
-	else if (skb->protocol == htons(ETH_P_IPV6) || IS_VLAN_IPV6(skb) ||
-		 IS_PPPOE_IPV6(skb))
+	else if (IS_IPV6(skb) || IS_VLAN_IPV6(skb) || IS_PPPOE_IPV6(skb))
 		pf = PF_INET6;
 	else
 		return NF_ACCEPT;
@@ -795,7 +801,7 @@ static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff *skb,
 	if (!brnf_call_arptables && !br->nf_call_arptables)
 		return NF_ACCEPT;
 
-	if (skb->protocol != htons(ETH_P_ARP)) {
+	if (!IS_ARP(skb)) {
 		if (!IS_VLAN_ARP(skb))
 			return NF_ACCEPT;
 		nf_bridge_pull_encap_header(skb);
@@ -853,11 +859,9 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff *skb,
 	if (!realoutdev)
 		return NF_DROP;
 
-	if (skb->protocol == htons(ETH_P_IP) || IS_VLAN_IP(skb) ||
-	    IS_PPPOE_IP(skb))
+	if (IS_IP(skb) || IS_VLAN_IP(skb) || IS_PPPOE_IP(skb))
 		pf = PF_INET;
-	else if (skb->protocol == htons(ETH_P_IPV6) || IS_VLAN_IPV6(skb) ||
-		 IS_PPPOE_IPV6(skb))
+	else if (IS_IPV6(skb) || IS_VLAN_IPV6(skb) || IS_PPPOE_IPV6(skb))
 		pf = PF_INET6;
 	else
 		return NF_ACCEPT;
-- 
cgit v1.2.3


From 741385119706d4370eb7899c5ca96ad125c520e5 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 6 Mar 2012 01:22:55 +0000
Subject: netfilter: nf_conntrack: fix early_drop with reliable event delivery

If reliable event delivery is enabled and ctnetlink fails to deliver
the destroy event in early_drop, the conntrack subsystem cannot
drop any the candidate flow that was planned to be evicted.

Reported-by: Kerin Millar <kerframil@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_conntrack_core.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index ed86a3be678e..fa4b82c8ae80 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -635,8 +635,12 @@ static noinline int early_drop(struct net *net, unsigned int hash)
 
 	if (del_timer(&ct->timeout)) {
 		death_by_timeout((unsigned long)ct);
-		dropped = 1;
-		NF_CT_STAT_INC_ATOMIC(net, early_drop);
+		/* Check if we indeed killed this entry. Reliable event
+		   delivery may have inserted it into the dying list. */
+		if (test_bit(IPS_DYING_BIT, &ct->status)) {
+			dropped = 1;
+			NF_CT_STAT_INC_ATOMIC(net, early_drop);
+		}
 	}
 	nf_ct_put(ct);
 	return dropped;
-- 
cgit v1.2.3


From d6ddef9e641d1229d4ec841dc75ae703171c3e92 Mon Sep 17 00:00:00 2001
From: Li Wei <lw@cn.fujitsu.com>
Date: Mon, 5 Mar 2012 14:45:17 +0000
Subject: IPv6: Fix not join all-router mcast group when forwarding set.

When forwarding was set and a new net device is register,
we need add this device to the all-router mcast group.

Signed-off-by: Li Wei <lw@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index c02280a4d126..6b8ebc5da0e1 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -434,6 +434,10 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
 	/* Join all-node multicast group */
 	ipv6_dev_mc_inc(dev, &in6addr_linklocal_allnodes);
 
+	/* Join all-router multicast group if forwarding is set */
+	if (ndev->cnf.forwarding && dev && (dev->flags & IFF_MULTICAST))
+		ipv6_dev_mc_inc(dev, &in6addr_linklocal_allrouters);
+
 	return ndev;
 }
 
-- 
cgit v1.2.3


From 651a68ea2ce9738b84e928836053b2e0fb5db2ba Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@nicira.com>
Date: Tue, 6 Mar 2012 15:04:04 -0800
Subject: openvswitch: Honor dp_ifindex, when specified, for vport lookup by
 name.

When OVS_VPORT_ATTR_NAME is specified and dp_ifindex is nonzero, the
logical behavior would be for the vport name lookup scope to be limited
to the specified datapath, but in fact the dp_ifindex value was ignored.
This commit causes the search scope to be honored.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Jesse Gross <jesse@nicira.com>
---
 net/openvswitch/datapath.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index ce64c18b8c79..2c030505b335 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -1521,6 +1521,9 @@ static struct vport *lookup_vport(struct ovs_header *ovs_header,
 		vport = ovs_vport_locate(nla_data(a[OVS_VPORT_ATTR_NAME]));
 		if (!vport)
 			return ERR_PTR(-ENODEV);
+		if (ovs_header->dp_ifindex &&
+		    ovs_header->dp_ifindex != get_dpifindex(vport->dp))
+			return ERR_PTR(-ENODEV);
 		return vport;
 	} else if (a[OVS_VPORT_ATTR_PORT_NO]) {
 		u32 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
-- 
cgit v1.2.3


From 81e5d41d7ed4f6c61ba3d2414f4f9ddf6d934ebb Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@nicira.com>
Date: Tue, 6 Mar 2012 15:05:46 -0800
Subject: openvswitch: Fix checksum update for actions on UDP packets.

When modifying IP addresses or ports on a UDP packet we don't
correctly follow the rules for unchecksummed packets.  This meant
that packets without a checksum can be given a incorrect new checksum
and packets with a checksum can become marked as being unchecksummed.
This fixes it to handle those requirements.

Signed-off-by: Jesse Gross <jesse@nicira.com>
---
 net/openvswitch/actions.c | 44 ++++++++++++++++++++++++++++++++------------
 1 file changed, 32 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 2725d1bdf291..48badffaafc1 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007-2011 Nicira Networks.
+ * Copyright (c) 2007-2012 Nicira Networks.
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of version 2 of the GNU General Public
@@ -145,9 +145,16 @@ static void set_ip_addr(struct sk_buff *skb, struct iphdr *nh,
 			inet_proto_csum_replace4(&tcp_hdr(skb)->check, skb,
 						 *addr, new_addr, 1);
 	} else if (nh->protocol == IPPROTO_UDP) {
-		if (likely(transport_len >= sizeof(struct udphdr)))
-			inet_proto_csum_replace4(&udp_hdr(skb)->check, skb,
-						 *addr, new_addr, 1);
+		if (likely(transport_len >= sizeof(struct udphdr))) {
+			struct udphdr *uh = udp_hdr(skb);
+
+			if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) {
+				inet_proto_csum_replace4(&uh->check, skb,
+							 *addr, new_addr, 1);
+				if (!uh->check)
+					uh->check = CSUM_MANGLED_0;
+			}
+		}
 	}
 
 	csum_replace4(&nh->check, *addr, new_addr);
@@ -197,8 +204,22 @@ static void set_tp_port(struct sk_buff *skb, __be16 *port,
 	skb->rxhash = 0;
 }
 
-static int set_udp_port(struct sk_buff *skb,
-			const struct ovs_key_udp *udp_port_key)
+static void set_udp_port(struct sk_buff *skb, __be16 *port, __be16 new_port)
+{
+	struct udphdr *uh = udp_hdr(skb);
+
+	if (uh->check && skb->ip_summed != CHECKSUM_PARTIAL) {
+		set_tp_port(skb, port, new_port, &uh->check);
+
+		if (!uh->check)
+			uh->check = CSUM_MANGLED_0;
+	} else {
+		*port = new_port;
+		skb->rxhash = 0;
+	}
+}
+
+static int set_udp(struct sk_buff *skb, const struct ovs_key_udp *udp_port_key)
 {
 	struct udphdr *uh;
 	int err;
@@ -210,16 +231,15 @@ static int set_udp_port(struct sk_buff *skb,
 
 	uh = udp_hdr(skb);
 	if (udp_port_key->udp_src != uh->source)
-		set_tp_port(skb, &uh->source, udp_port_key->udp_src, &uh->check);
+		set_udp_port(skb, &uh->source, udp_port_key->udp_src);
 
 	if (udp_port_key->udp_dst != uh->dest)
-		set_tp_port(skb, &uh->dest, udp_port_key->udp_dst, &uh->check);
+		set_udp_port(skb, &uh->dest, udp_port_key->udp_dst);
 
 	return 0;
 }
 
-static int set_tcp_port(struct sk_buff *skb,
-			const struct ovs_key_tcp *tcp_port_key)
+static int set_tcp(struct sk_buff *skb, const struct ovs_key_tcp *tcp_port_key)
 {
 	struct tcphdr *th;
 	int err;
@@ -328,11 +348,11 @@ static int execute_set_action(struct sk_buff *skb,
 		break;
 
 	case OVS_KEY_ATTR_TCP:
-		err = set_tcp_port(skb, nla_data(nested_attr));
+		err = set_tcp(skb, nla_data(nested_attr));
 		break;
 
 	case OVS_KEY_ATTR_UDP:
-		err = set_udp_port(skb, nla_data(nested_attr));
+		err = set_udp(skb, nla_data(nested_attr));
 		break;
 	}
 
-- 
cgit v1.2.3


From d9e179ecec0805c41b17f9a0c3b925d415677772 Mon Sep 17 00:00:00 2001
From: Paulius Zaleckas <paulius.zaleckas@gmail.com>
Date: Tue, 6 Mar 2012 22:25:14 +0000
Subject: bridge: br_log_state() s/entering/entered/

When br_log_state() is reporting state it should say "entered"
istead of "entering" since state at this point is already
changed.

Signed-off-by: Paulius Zaleckas <paulius.zaleckas@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_stp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index 6751ed4e0c07..8c836d96ba76 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -31,7 +31,7 @@ static const char *const br_port_state_names[] = {
 
 void br_log_state(const struct net_bridge_port *p)
 {
-	br_info(p->br, "port %u(%s) entering %s state\n",
+	br_info(p->br, "port %u(%s) entered %s state\n",
 		(unsigned) p->port_no, p->dev->name,
 		br_port_state_names[p->state]);
 }
-- 
cgit v1.2.3


From 5200959b833ddacf28b6ffce8c331dfd6e0ca797 Mon Sep 17 00:00:00 2001
From: Paulius Zaleckas <paulius.zaleckas@gmail.com>
Date: Tue, 6 Mar 2012 22:25:22 +0000
Subject: bridge: fix state reporting when port is disabled

Now we have:
eth0: link *down*
br0: port 1(eth0) entered *forwarding* state

br_log_state(p) should be called *after* p->state is set
to BR_STATE_DISABLED.

Reported-by: Zilvinas Valinskas <zilvinas@wilibox.com>
Signed-off-by: Paulius Zaleckas <paulius.zaleckas@gmail.com>
Acked-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_stp_if.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index 19308e305d85..f494496373d6 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -98,14 +98,13 @@ void br_stp_disable_port(struct net_bridge_port *p)
 	struct net_bridge *br = p->br;
 	int wasroot;
 
-	br_log_state(p);
-
 	wasroot = br_is_root_bridge(br);
 	br_become_designated_port(p);
 	p->state = BR_STATE_DISABLED;
 	p->topology_change_ack = 0;
 	p->config_pending = 0;
 
+	br_log_state(p);
 	br_ifinfo_notify(RTM_NEWLINK, p);
 
 	del_timer(&p->message_age_timer);
-- 
cgit v1.2.3


From 5faa5df1fa2024bd750089ff21dcc4191798263d Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Tue, 6 Mar 2012 21:20:26 +0000
Subject: inetpeer: Invalidate the inetpeer tree along with the routing cache

We initialize the routing metrics with the values cached on the
inetpeer in rt_init_metrics(). So if we have the metrics cached on the
inetpeer, we ignore the user configured fib_metrics.

To fix this issue, we replace the old tree with a fresh initialized
inet_peer_base. The old tree is removed later with a delayed work queue.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inetpeer.h |  3 ++
 net/ipv4/inetpeer.c    | 80 +++++++++++++++++++++++++++++++++++++++++++++++++-
 net/ipv4/route.c       |  1 +
 3 files changed, 83 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h
index 06b795dd5906..ff04a33acf00 100644
--- a/include/net/inetpeer.h
+++ b/include/net/inetpeer.h
@@ -41,6 +41,7 @@ struct inet_peer {
 	u32			pmtu_orig;
 	u32			pmtu_learned;
 	struct inetpeer_addr_base redirect_learned;
+	struct list_head	gc_list;
 	/*
 	 * Once inet_peer is queued for deletion (refcnt == -1), following fields
 	 * are not available: rid, ip_id_count, tcp_ts, tcp_ts_stamp
@@ -96,6 +97,8 @@ static inline struct inet_peer *inet_getpeer_v6(const struct in6_addr *v6daddr,
 extern void inet_putpeer(struct inet_peer *p);
 extern bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout);
 
+extern void inetpeer_invalidate_tree(int family);
+
 /*
  * temporary check to make sure we dont access rid, ip_id_count, tcp_ts,
  * tcp_ts_stamp if no refcount is taken on inet_peer
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index bf4a9c4808e1..deea2e96b7f2 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -17,6 +17,7 @@
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/net.h>
+#include <linux/workqueue.h>
 #include <net/ip.h>
 #include <net/inetpeer.h>
 #include <net/secure_seq.h>
@@ -66,6 +67,11 @@
 
 static struct kmem_cache *peer_cachep __read_mostly;
 
+static LIST_HEAD(gc_list);
+static const int gc_delay = 60 * HZ;
+static struct delayed_work gc_work;
+static DEFINE_SPINLOCK(gc_lock);
+
 #define node_height(x) x->avl_height
 
 #define peer_avl_empty ((struct inet_peer *)&peer_fake_node)
@@ -102,6 +108,50 @@ int inet_peer_threshold __read_mostly = 65536 + 128;	/* start to throw entries m
 int inet_peer_minttl __read_mostly = 120 * HZ;	/* TTL under high load: 120 sec */
 int inet_peer_maxttl __read_mostly = 10 * 60 * HZ;	/* usual time to live: 10 min */
 
+static void inetpeer_gc_worker(struct work_struct *work)
+{
+	struct inet_peer *p, *n;
+	LIST_HEAD(list);
+
+	spin_lock_bh(&gc_lock);
+	list_replace_init(&gc_list, &list);
+	spin_unlock_bh(&gc_lock);
+
+	if (list_empty(&list))
+		return;
+
+	list_for_each_entry_safe(p, n, &list, gc_list) {
+
+		if(need_resched())
+			cond_resched();
+
+		if (p->avl_left != peer_avl_empty) {
+			list_add_tail(&p->avl_left->gc_list, &list);
+			p->avl_left = peer_avl_empty;
+		}
+
+		if (p->avl_right != peer_avl_empty) {
+			list_add_tail(&p->avl_right->gc_list, &list);
+			p->avl_right = peer_avl_empty;
+		}
+
+		n = list_entry(p->gc_list.next, struct inet_peer, gc_list);
+
+		if (!atomic_read(&p->refcnt)) {
+			list_del(&p->gc_list);
+			kmem_cache_free(peer_cachep, p);
+		}
+	}
+
+	if (list_empty(&list))
+		return;
+
+	spin_lock_bh(&gc_lock);
+	list_splice(&list, &gc_list);
+	spin_unlock_bh(&gc_lock);
+
+	schedule_delayed_work(&gc_work, gc_delay);
+}
 
 /* Called from ip_output.c:ip_init  */
 void __init inet_initpeers(void)
@@ -126,6 +176,7 @@ void __init inet_initpeers(void)
 			0, SLAB_HWCACHE_ALIGN | SLAB_PANIC,
 			NULL);
 
+	INIT_DELAYED_WORK_DEFERRABLE(&gc_work, inetpeer_gc_worker);
 }
 
 static int addr_compare(const struct inetpeer_addr *a,
@@ -449,7 +500,7 @@ relookup:
 		p->pmtu_orig = 0;
 		p->redirect_genid = 0;
 		memset(&p->redirect_learned, 0, sizeof(p->redirect_learned));
-
+		INIT_LIST_HEAD(&p->gc_list);
 
 		/* Link the node. */
 		link_to_pool(p, base);
@@ -509,3 +560,30 @@ bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout)
 	return rc;
 }
 EXPORT_SYMBOL(inet_peer_xrlim_allow);
+
+void inetpeer_invalidate_tree(int family)
+{
+	struct inet_peer *old, *new, *prev;
+	struct inet_peer_base *base = family_to_base(family);
+
+	write_seqlock_bh(&base->lock);
+
+	old = base->root;
+	if (old == peer_avl_empty_rcu)
+		goto out;
+
+	new = peer_avl_empty_rcu;
+
+	prev = cmpxchg(&base->root, old, new);
+	if (prev == old) {
+		base->total = 0;
+		spin_lock(&gc_lock);
+		list_add_tail(&prev->gc_list, &gc_list);
+		spin_unlock(&gc_lock);
+		schedule_delayed_work(&gc_work, gc_delay);
+	}
+
+out:
+	write_sequnlock_bh(&base->lock);
+}
+EXPORT_SYMBOL(inetpeer_invalidate_tree);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index bcacf54e5418..23ce0c1287ab 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -938,6 +938,7 @@ static void rt_cache_invalidate(struct net *net)
 	get_random_bytes(&shuffle, sizeof(shuffle));
 	atomic_add(shuffle + 1U, &net->ipv4.rt_genid);
 	redirect_genid++;
+	inetpeer_invalidate_tree(AF_INET);
 }
 
 /*
-- 
cgit v1.2.3


From ac3f48de09d8f4b73397047e413fadff7f65cfa7 Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Tue, 6 Mar 2012 21:21:10 +0000
Subject: route: Remove redirect_genid

As we invalidate the inetpeer tree along with the routing cache now,
we don't need a genid to reset the redirect handling when the routing
cache is flushed.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inetpeer.h |  1 -
 net/ipv4/inetpeer.c    |  1 -
 net/ipv4/route.c       | 11 ++---------
 3 files changed, 2 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h
index ff04a33acf00..b94765e38e80 100644
--- a/include/net/inetpeer.h
+++ b/include/net/inetpeer.h
@@ -35,7 +35,6 @@ struct inet_peer {
 
 	u32			metrics[RTAX_MAX];
 	u32			rate_tokens;	/* rate limiting for ICMP */
-	int			redirect_genid;
 	unsigned long		rate_last;
 	unsigned long		pmtu_expires;
 	u32			pmtu_orig;
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index deea2e96b7f2..d4d61b694fab 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -498,7 +498,6 @@ relookup:
 		p->rate_last = 0;
 		p->pmtu_expires = 0;
 		p->pmtu_orig = 0;
-		p->redirect_genid = 0;
 		memset(&p->redirect_learned, 0, sizeof(p->redirect_learned));
 		INIT_LIST_HEAD(&p->gc_list);
 
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 23ce0c1287ab..019774796174 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -132,7 +132,6 @@ static int ip_rt_mtu_expires __read_mostly	= 10 * 60 * HZ;
 static int ip_rt_min_pmtu __read_mostly		= 512 + 20 + 20;
 static int ip_rt_min_advmss __read_mostly	= 256;
 static int rt_chain_length_max __read_mostly	= 20;
-static int redirect_genid;
 
 static struct delayed_work expires_work;
 static unsigned long expires_ljiffies;
@@ -937,7 +936,6 @@ static void rt_cache_invalidate(struct net *net)
 
 	get_random_bytes(&shuffle, sizeof(shuffle));
 	atomic_add(shuffle + 1U, &net->ipv4.rt_genid);
-	redirect_genid++;
 	inetpeer_invalidate_tree(AF_INET);
 }
 
@@ -1486,10 +1484,8 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
 
 				peer = rt->peer;
 				if (peer) {
-					if (peer->redirect_learned.a4 != new_gw ||
-					    peer->redirect_genid != redirect_genid) {
+					if (peer->redirect_learned.a4 != new_gw) {
 						peer->redirect_learned.a4 = new_gw;
-						peer->redirect_genid = redirect_genid;
 						atomic_inc(&__rt_peer_genid);
 					}
 					check_peer_redir(&rt->dst, peer);
@@ -1794,8 +1790,6 @@ static void ipv4_validate_peer(struct rtable *rt)
 		if (peer) {
 			check_peer_pmtu(&rt->dst, peer);
 
-			if (peer->redirect_genid != redirect_genid)
-				peer->redirect_learned.a4 = 0;
 			if (peer->redirect_learned.a4 &&
 			    peer->redirect_learned.a4 != rt->rt_gateway)
 				check_peer_redir(&rt->dst, peer);
@@ -1959,8 +1953,7 @@ static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4,
 		dst_init_metrics(&rt->dst, peer->metrics, false);
 
 		check_peer_pmtu(&rt->dst, peer);
-		if (peer->redirect_genid != redirect_genid)
-			peer->redirect_learned.a4 = 0;
+
 		if (peer->redirect_learned.a4 &&
 		    peer->redirect_learned.a4 != rt->rt_gateway) {
 			rt->rt_gateway = peer->redirect_learned.a4;
-- 
cgit v1.2.3