From cd3bafc73d11eb51cb2d3691629718431e1768ce Mon Sep 17 00:00:00 2001
From: Hajime Tazaki <tazaki@sfc.wide.ad.jp>
Date: Wed, 4 Feb 2015 23:31:10 +0900
Subject: xfrm6: Fix a offset value for network header in _decode_session6

When a network-layer header has multiple IPv6 extension headers, then offset
for mobility header goes wrong. This regression breaks an xfrm policy lookup
for a particular receive packet. Binding update packets of Mobile IPv6
are all discarded without this fix.

Fixes: de3b7a06dfe1 ("xfrm6: Fix transport header offset in _decode_session6.")
Signed-off-by: Hajime Tazaki <tazaki@sfc.wide.ad.jp>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/ipv6/xfrm6_policy.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 48bf5a06847b..8d2d01b4800a 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -200,6 +200,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
 
 #if IS_ENABLED(CONFIG_IPV6_MIP6)
 		case IPPROTO_MH:
+			offset += ipv6_optlen(exthdr);
 			if (!onlyproto && pskb_may_pull(skb, nh + offset + 3 - skb->data)) {
 				struct ip6_mh *mh;
 
-- 
cgit v1.2.3


From 044a832a7779c0638bea2d0fea901c055b995f4a Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Mon, 12 Jan 2015 13:38:49 +0100
Subject: xfrm: Fix local error reporting crash with interfamily tunnels

We set the outer mode protocol too early. As a result, the
local error handler might dispatch to the wrong address family
and report the error to a wrong socket type. We fix this by
setting the outer protocol to the skb after we accessed the
inner mode for the last time, right before we do the atcual
encapsulation where we switch finally to the outer mode.

Reported-by: Chris Ruehl <chris.ruehl@gtsys.com.hk>
Tested-by: Chris Ruehl <chris.ruehl@gtsys.com.hk>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/ipv4/xfrm4_output.c | 2 +-
 net/ipv6/xfrm6_output.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index d5f6bd9a210a..dab73813cb92 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -63,6 +63,7 @@ int xfrm4_prepare_output(struct xfrm_state *x, struct sk_buff *skb)
 		return err;
 
 	IPCB(skb)->flags |= IPSKB_XFRM_TUNNEL_SIZE;
+	skb->protocol = htons(ETH_P_IP);
 
 	return x->outer_mode->output2(x, skb);
 }
@@ -71,7 +72,6 @@ EXPORT_SYMBOL(xfrm4_prepare_output);
 int xfrm4_output_finish(struct sk_buff *skb)
 {
 	memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
-	skb->protocol = htons(ETH_P_IP);
 
 #ifdef CONFIG_NETFILTER
 	IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED;
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index ca3f29b98ae5..010f8bd2d577 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -114,6 +114,7 @@ int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb)
 		return err;
 
 	skb->ignore_df = 1;
+	skb->protocol = htons(ETH_P_IPV6);
 
 	return x->outer_mode->output2(x, skb);
 }
@@ -122,7 +123,6 @@ EXPORT_SYMBOL(xfrm6_prepare_output);
 int xfrm6_output_finish(struct sk_buff *skb)
 {
 	memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
-	skb->protocol = htons(ETH_P_IPV6);
 
 #ifdef CONFIG_NETFILTER
 	IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED;
-- 
cgit v1.2.3


From ac37e2515c1a89c477459a2020b6bfdedabdb91b Mon Sep 17 00:00:00 2001
From: huaibin Wang <huaibin.wang@6wind.com>
Date: Wed, 11 Feb 2015 18:10:36 +0100
Subject: xfrm: release dst_orig in case of error in xfrm_lookup()

dst_orig should be released on error. Function like __xfrm_route_forward()
expects that behavior.
Since a recent commit, xfrm_lookup() may also be called by xfrm_lookup_route(),
which expects the opposite.
Let's introduce a new flag (XFRM_LOOKUP_KEEP_DST_REF) to tell what should be
done in case of error.

Fixes: f92ee61982d("xfrm: Generate blackhole routes only from route lookup functions")
Signed-off-by: huaibin Wang <huaibin.wang@6wind.com>
Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/dst.h      |  1 +
 net/xfrm/xfrm_policy.c | 12 ++++++------
 2 files changed, 7 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/include/net/dst.h b/include/net/dst.h
index a8ae4e760778..0fb99a26e973 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -481,6 +481,7 @@ void dst_init(void);
 enum {
 	XFRM_LOOKUP_ICMP = 1 << 0,
 	XFRM_LOOKUP_QUEUE = 1 << 1,
+	XFRM_LOOKUP_KEEP_DST_REF = 1 << 2,
 };
 
 struct flowi;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index cee479bc655c..638af0655aaf 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2269,11 +2269,9 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
 		 * have the xfrm_state's. We need to wait for KM to
 		 * negotiate new SA's or bail out with error.*/
 		if (net->xfrm.sysctl_larval_drop) {
-			dst_release(dst);
-			xfrm_pols_put(pols, drop_pols);
 			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
-
-			return ERR_PTR(-EREMOTE);
+			err = -EREMOTE;
+			goto error;
 		}
 
 		err = -EAGAIN;
@@ -2324,7 +2322,8 @@ nopol:
 error:
 	dst_release(dst);
 dropdst:
-	dst_release(dst_orig);
+	if (!(flags & XFRM_LOOKUP_KEEP_DST_REF))
+		dst_release(dst_orig);
 	xfrm_pols_put(pols, drop_pols);
 	return ERR_PTR(err);
 }
@@ -2338,7 +2337,8 @@ struct dst_entry *xfrm_lookup_route(struct net *net, struct dst_entry *dst_orig,
 				    struct sock *sk, int flags)
 {
 	struct dst_entry *dst = xfrm_lookup(net, dst_orig, fl, sk,
-					    flags | XFRM_LOOKUP_QUEUE);
+					    flags | XFRM_LOOKUP_QUEUE |
+					    XFRM_LOOKUP_KEEP_DST_REF);
 
 	if (IS_ERR(dst) && PTR_ERR(dst) == -EREMOTE)
 		return make_blackhole(net, dst_orig->ops->family, dst_orig);
-- 
cgit v1.2.3


From 2156d321b879cdadb95a633d046169cfebdbf784 Mon Sep 17 00:00:00 2001
From: Arturo Borrero <arturo.borrero.glez@gmail.com>
Date: Sat, 21 Feb 2015 19:30:55 +0100
Subject: netfilter: nft_compat: don't truncate ethernet protocol type to u8

Use u16 for protocol and then cast it to __be16

>> net/netfilter/nft_compat.c:140:37: sparse: incorrect type in assignment (different base types)
   net/netfilter/nft_compat.c:140:37:    expected restricted __be16 [usertype] ethproto
   net/netfilter/nft_compat.c:140:37:    got unsigned char [unsigned] [usertype] proto
>> net/netfilter/nft_compat.c:351:37: sparse: incorrect type in assignment (different base types)
   net/netfilter/nft_compat.c:351:37:    expected restricted __be16 [usertype] ethproto
   net/netfilter/nft_compat.c:351:37:    got unsigned char [unsigned] [usertype] proto

Reported-by: kbuild test robot <fengguang.wu@intel.com>
Signed-off-by: Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_compat.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index 1279cd85663e..213584cf04b3 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -123,7 +123,7 @@ static void
 nft_target_set_tgchk_param(struct xt_tgchk_param *par,
 			   const struct nft_ctx *ctx,
 			   struct xt_target *target, void *info,
-			   union nft_entry *entry, u8 proto, bool inv)
+			   union nft_entry *entry, u16 proto, bool inv)
 {
 	par->net	= ctx->net;
 	par->table	= ctx->table->name;
@@ -137,7 +137,7 @@ nft_target_set_tgchk_param(struct xt_tgchk_param *par,
 		entry->e6.ipv6.invflags = inv ? IP6T_INV_PROTO : 0;
 		break;
 	case NFPROTO_BRIDGE:
-		entry->ebt.ethproto = proto;
+		entry->ebt.ethproto = (__force __be16)proto;
 		entry->ebt.invflags = inv ? EBT_IPROTO : 0;
 		break;
 	}
@@ -171,7 +171,7 @@ static const struct nla_policy nft_rule_compat_policy[NFTA_RULE_COMPAT_MAX + 1]
 	[NFTA_RULE_COMPAT_FLAGS]	= { .type = NLA_U32 },
 };
 
-static int nft_parse_compat(const struct nlattr *attr, u8 *proto, bool *inv)
+static int nft_parse_compat(const struct nlattr *attr, u16 *proto, bool *inv)
 {
 	struct nlattr *tb[NFTA_RULE_COMPAT_MAX+1];
 	u32 flags;
@@ -203,7 +203,7 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
 	struct xt_target *target = expr->ops->data;
 	struct xt_tgchk_param par;
 	size_t size = XT_ALIGN(nla_len(tb[NFTA_TARGET_INFO]));
-	u8 proto = 0;
+	u16 proto = 0;
 	bool inv = false;
 	union nft_entry e = {};
 	int ret;
@@ -334,7 +334,7 @@ static const struct nla_policy nft_match_policy[NFTA_MATCH_MAX + 1] = {
 static void
 nft_match_set_mtchk_param(struct xt_mtchk_param *par, const struct nft_ctx *ctx,
 			  struct xt_match *match, void *info,
-			  union nft_entry *entry, u8 proto, bool inv)
+			  union nft_entry *entry, u16 proto, bool inv)
 {
 	par->net	= ctx->net;
 	par->table	= ctx->table->name;
@@ -348,7 +348,7 @@ nft_match_set_mtchk_param(struct xt_mtchk_param *par, const struct nft_ctx *ctx,
 		entry->e6.ipv6.invflags = inv ? IP6T_INV_PROTO : 0;
 		break;
 	case NFPROTO_BRIDGE:
-		entry->ebt.ethproto = proto;
+		entry->ebt.ethproto = (__force __be16)proto;
 		entry->ebt.invflags = inv ? EBT_IPROTO : 0;
 		break;
 	}
@@ -385,7 +385,7 @@ nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
 	struct xt_match *match = expr->ops->data;
 	struct xt_mtchk_param par;
 	size_t size = XT_ALIGN(nla_len(tb[NFTA_MATCH_INFO]));
-	u8 proto = 0;
+	u16 proto = 0;
 	bool inv = false;
 	union nft_entry e = {};
 	int ret;
-- 
cgit v1.2.3


From 02263db00b6cb98701332aa257c07ca549c2324b Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 20 Feb 2015 17:11:10 +0100
Subject: netfilter: nf_tables: fix addition/deletion of elements from
 commit/abort

We have several problems in this path:

1) There is a use-after-free when removing individual elements from
   the commit path.

2) We have to uninit() the data part of the element from the abort
   path to avoid a chain refcount leak.

3) We have to check for set->flags to see if there's a mapping, instead
   of the element flags.

4) We have to check for !(flags & NFT_SET_ELEM_INTERVAL_END) to skip
   elements that are part of the interval that have no data part, so
   they don't need to be uninit().

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 199fd0f27b0e..a8c94620f20e 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -3612,12 +3612,11 @@ static int nf_tables_commit(struct sk_buff *skb)
 						 &te->elem,
 						 NFT_MSG_DELSETELEM, 0);
 			te->set->ops->get(te->set, &te->elem);
-			te->set->ops->remove(te->set, &te->elem);
 			nft_data_uninit(&te->elem.key, NFT_DATA_VALUE);
-			if (te->elem.flags & NFT_SET_MAP) {
-				nft_data_uninit(&te->elem.data,
-						te->set->dtype);
-			}
+			if (te->set->flags & NFT_SET_MAP &&
+			    !(te->elem.flags & NFT_SET_ELEM_INTERVAL_END))
+				nft_data_uninit(&te->elem.data, te->set->dtype);
+			te->set->ops->remove(te->set, &te->elem);
 			nft_trans_destroy(trans);
 			break;
 		}
@@ -3658,7 +3657,7 @@ static int nf_tables_abort(struct sk_buff *skb)
 {
 	struct net *net = sock_net(skb->sk);
 	struct nft_trans *trans, *next;
-	struct nft_set *set;
+	struct nft_trans_elem *te;
 
 	list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
 		switch (trans->msg_type) {
@@ -3719,9 +3718,13 @@ static int nf_tables_abort(struct sk_buff *skb)
 			break;
 		case NFT_MSG_NEWSETELEM:
 			nft_trans_elem_set(trans)->nelems--;
-			set = nft_trans_elem_set(trans);
-			set->ops->get(set, &nft_trans_elem(trans));
-			set->ops->remove(set, &nft_trans_elem(trans));
+			te = (struct nft_trans_elem *)trans->data;
+			te->set->ops->get(te->set, &te->elem);
+			nft_data_uninit(&te->elem.key, NFT_DATA_VALUE);
+			if (te->set->flags & NFT_SET_MAP &&
+			    !(te->elem.flags & NFT_SET_ELEM_INTERVAL_END))
+				nft_data_uninit(&te->elem.data, te->set->dtype);
+			te->set->ops->remove(te->set, &te->elem);
 			nft_trans_destroy(trans);
 			break;
 		case NFT_MSG_DELSETELEM:
-- 
cgit v1.2.3


From 528c943f3bb919aef75ab2fff4f00176f09a4019 Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Sat, 21 Feb 2015 21:03:10 +0200
Subject: ipvs: add missing ip_vs_pe_put in sync code

ip_vs_conn_fill_param_sync() gets in param.pe a module
reference for persistence engine from __ip_vs_pe_getbyname()
but forgets to put it. Problem occurs in backup for
sync protocol v1 (2.6.39).

Also, pe_data usually comes in sync messages for
connection templates and ip_vs_conn_new() copies
the pointer only in this case. Make sure pe_data
is not leaked if it comes unexpectedly for normal
connections. Leak can happen only if bogus messages
are sent to backup server.

Fixes: fe5e7a1efb66 ("IPVS: Backup, Adding Version 1 receive capability")
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 net/netfilter/ipvs/ip_vs_sync.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index c47ffd7a0a70..d93ceeb3ef04 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -896,6 +896,8 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
 			IP_VS_DBG(2, "BACKUP, add new conn. failed\n");
 			return;
 		}
+		if (!(flags & IP_VS_CONN_F_TEMPLATE))
+			kfree(param->pe_data);
 	}
 
 	if (opt)
@@ -1169,6 +1171,7 @@ static inline int ip_vs_proc_sync_conn(struct net *net, __u8 *p, __u8 *msg_end)
 				(opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL)
 				);
 #endif
+	ip_vs_pe_put(param.pe);
 	return 0;
 	/* Error exit */
 out:
-- 
cgit v1.2.3


From d0c22119f574b851e63360c6b8660fe9593bbc3c Mon Sep 17 00:00:00 2001
From: Bob Copeland <me@bobcopeland.com>
Date: Mon, 2 Mar 2015 14:28:52 -0500
Subject: mac80211: drop unencrypted frames in mesh fwding

The mesh forwarding path was not checking that data
frames were protected when running an encrypted network;
add the necessary check.

Cc: stable@vger.kernel.org
Reported-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: Bob Copeland <me@bobcopeland.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/rx.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 1101563357ea..944bdc04e913 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2214,6 +2214,9 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
 	hdr = (struct ieee80211_hdr *) skb->data;
 	mesh_hdr = (struct ieee80211s_hdr *) (skb->data + hdrlen);
 
+	if (ieee80211_drop_unencrypted(rx, hdr->frame_control))
+		return RX_DROP_MONITOR;
+
 	/* frame is in RMC, don't forward */
 	if (ieee80211_is_data(hdr->frame_control) &&
 	    is_multicast_ether_addr(hdr->addr1) &&
-- 
cgit v1.2.3


From aa75ebc275b2a91b193654a177daf900ad6703f0 Mon Sep 17 00:00:00 2001
From: Michal Kazior <michal.kazior@tieto.com>
Date: Tue, 10 Feb 2015 12:48:44 +0100
Subject: mac80211: disable u-APSD queues by default

Some APs experience problems when working with
U-APSD. Decreasing the probability of that
happening by using legacy mode for all ACs but VO
isn't enough.

Cisco 4410N originally forced us to enable VO by
default only because it treated non-VO ACs as
legacy.

However some APs (notably Netgear R7000) silently
reclassify packets to different ACs. Since u-APSD
ACs require trigger frames for frame retrieval
clients would never see some frames (e.g. ARP
responses) or would fetch them accidentally after
a long time.

It makes little sense to enable u-APSD queues by
default because it needs userspace applications to
be aware of it to actually take advantage of the
possible additional powersavings. Implicitly
depending on driver autotrigger frame support
doesn't make much sense.

Cc: stable@vger.kernel.org
Signed-off-by: Michal Kazior <michal.kazior@tieto.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/ieee80211_i.h | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 3afe36824703..c0e089c194f1 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -58,13 +58,24 @@ struct ieee80211_local;
 #define IEEE80211_UNSET_POWER_LEVEL	INT_MIN
 
 /*
- * Some APs experience problems when working with U-APSD. Decrease the
- * probability of that happening by using legacy mode for all ACs but VO.
- * The AP that caused us trouble was a Cisco 4410N. It ignores our
- * setting, and always treats non-VO ACs as legacy.
+ * Some APs experience problems when working with U-APSD. Decreasing the
+ * probability of that happening by using legacy mode for all ACs but VO isn't
+ * enough.
+ *
+ * Cisco 4410N originally forced us to enable VO by default only because it
+ * treated non-VO ACs as legacy.
+ *
+ * However some APs (notably Netgear R7000) silently reclassify packets to
+ * different ACs. Since u-APSD ACs require trigger frames for frame retrieval
+ * clients would never see some frames (e.g. ARP responses) or would fetch them
+ * accidentally after a long time.
+ *
+ * It makes little sense to enable u-APSD queues by default because it needs
+ * userspace applications to be aware of it to actually take advantage of the
+ * possible additional powersavings. Implicitly depending on driver autotrigger
+ * frame support doesn't make much sense.
  */
-#define IEEE80211_DEFAULT_UAPSD_QUEUES \
-	IEEE80211_WMM_IE_STA_QOSINFO_AC_VO
+#define IEEE80211_DEFAULT_UAPSD_QUEUES 0
 
 #define IEEE80211_DEFAULT_MAX_SP_LEN		\
 	IEEE80211_WMM_IE_STA_QOSINFO_SP_ALL
-- 
cgit v1.2.3


From 8670c3a55e91cb27a4b4d4d4c4fa35b0149e1abf Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 3 Mar 2015 20:04:18 +0000
Subject: netfilter: nf_tables: fix transaction race condition

A race condition exists in the rule transaction code for rules that
get added and removed within the same transaction.

The new rule starts out as inactive in the current and active in the
next generation and is inserted into the ruleset. When it is deleted,
it is additionally set to inactive in the next generation as well.

On commit the next generation is begun, then the actions are finalized.
For the new rule this would mean clearing out the inactive bit for
the previously current, now next generation.

However nft_rule_clear() clears out the bits for *both* generations,
activating the rule in the current generation, where it should be
deactivated due to being deleted. The rule will thus be active until
the deletion is finalized, removing the rule from the ruleset.

Similarly, when aborting a transaction for the same case, the undo
of insertion will remove it from the RCU protected rule list, the
deletion will clear out all bits. However until the next RCU
synchronization after all operations have been undone, the rule is
active on CPUs which can still see the rule on the list.

Generally, there may never be any modifications of the current
generations' inactive bit since this defeats the entire purpose of
atomicity. Change nft_rule_clear() to only touch the next generations
bit to fix this.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index a8c94620f20e..6fb532bf0fdb 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -227,7 +227,7 @@ nft_rule_deactivate_next(struct net *net, struct nft_rule *rule)
 
 static inline void nft_rule_clear(struct net *net, struct nft_rule *rule)
 {
-	rule->genmask = 0;
+	rule->genmask &= ~(1 << gencursor_next(net));
 }
 
 static int
-- 
cgit v1.2.3


From 9889840f5988ecfd43b00c9abb83c1804e21406b Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 3 Mar 2015 20:04:19 +0000
Subject: netfilter: nf_tables: check for overflow of rule dlen field

Check that the space required for the expressions doesn't exceed the
size of the dlen field, which would lead to the iterators crashing.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 6fb532bf0fdb..7baafd5ab520 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1968,6 +1968,10 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
 			n++;
 		}
 	}
+	/* Check for overflow of dlen field */
+	err = -EFBIG;
+	if (size >= 1 << 12)
+		goto err1;
 
 	if (nla[NFTA_RULE_USERDATA])
 		ulen = nla_len(nla[NFTA_RULE_USERDATA]);
-- 
cgit v1.2.3


From 86f1ec32318159a24de349f0a38e79b9d2b3131a Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 3 Mar 2015 20:04:20 +0000
Subject: netfilter: nf_tables: fix userdata length overflow

The NFT_USERDATA_MAXLEN is defined to 256, however we only have a u8
to store its size. Introduce a struct nft_userdata which contains a
length field and indicate its presence using a single bit in the rule.

The length field of struct nft_userdata is also a u8, however we don't
store zero sized data, so the actual length is udata->len + 1.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h | 22 +++++++++++++++++++---
 net/netfilter/nf_tables_api.c     | 28 +++++++++++++++++++---------
 2 files changed, 38 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 9eaaa7884586..decb9a095ae7 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -119,6 +119,22 @@ int nft_validate_data_load(const struct nft_ctx *ctx, enum nft_registers reg,
 			   const struct nft_data *data,
 			   enum nft_data_types type);
 
+
+/**
+ *	struct nft_userdata - user defined data associated with an object
+ *
+ *	@len: length of the data
+ *	@data: content
+ *
+ *	The presence of user data is indicated in an object specific fashion,
+ *	so a length of zero can't occur and the value "len" indicates data
+ *	of length len + 1.
+ */
+struct nft_userdata {
+	u8			len;
+	unsigned char		data[0];
+};
+
 /**
  *	struct nft_set_elem - generic representation of set elements
  *
@@ -380,7 +396,7 @@ static inline void *nft_expr_priv(const struct nft_expr *expr)
  *	@handle: rule handle
  *	@genmask: generation mask
  *	@dlen: length of expression data
- *	@ulen: length of user data (used for comments)
+ *	@udata: user data is appended to the rule
  *	@data: expression data
  */
 struct nft_rule {
@@ -388,7 +404,7 @@ struct nft_rule {
 	u64				handle:42,
 					genmask:2,
 					dlen:12,
-					ulen:8;
+					udata:1;
 	unsigned char			data[]
 		__attribute__((aligned(__alignof__(struct nft_expr))));
 };
@@ -476,7 +492,7 @@ static inline struct nft_expr *nft_expr_last(const struct nft_rule *rule)
 	return (struct nft_expr *)&rule->data[rule->dlen];
 }
 
-static inline void *nft_userdata(const struct nft_rule *rule)
+static inline struct nft_userdata *nft_userdata(const struct nft_rule *rule)
 {
 	return (void *)&rule->data[rule->dlen];
 }
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 7baafd5ab520..74e4b876c96e 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1711,9 +1711,12 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net,
 	}
 	nla_nest_end(skb, list);
 
-	if (rule->ulen &&
-	    nla_put(skb, NFTA_RULE_USERDATA, rule->ulen, nft_userdata(rule)))
-		goto nla_put_failure;
+	if (rule->udata) {
+		struct nft_userdata *udata = nft_userdata(rule);
+		if (nla_put(skb, NFTA_RULE_USERDATA, udata->len + 1,
+			    udata->data) < 0)
+			goto nla_put_failure;
+	}
 
 	nlmsg_end(skb, nlh);
 	return 0;
@@ -1896,11 +1899,12 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
 	struct nft_table *table;
 	struct nft_chain *chain;
 	struct nft_rule *rule, *old_rule = NULL;
+	struct nft_userdata *udata;
 	struct nft_trans *trans = NULL;
 	struct nft_expr *expr;
 	struct nft_ctx ctx;
 	struct nlattr *tmp;
-	unsigned int size, i, n, ulen = 0;
+	unsigned int size, i, n, ulen = 0, usize = 0;
 	int err, rem;
 	bool create;
 	u64 handle, pos_handle;
@@ -1973,11 +1977,14 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
 	if (size >= 1 << 12)
 		goto err1;
 
-	if (nla[NFTA_RULE_USERDATA])
+	if (nla[NFTA_RULE_USERDATA]) {
 		ulen = nla_len(nla[NFTA_RULE_USERDATA]);
+		if (ulen > 0)
+			usize = sizeof(struct nft_userdata) + ulen;
+	}
 
 	err = -ENOMEM;
-	rule = kzalloc(sizeof(*rule) + size + ulen, GFP_KERNEL);
+	rule = kzalloc(sizeof(*rule) + size + usize, GFP_KERNEL);
 	if (rule == NULL)
 		goto err1;
 
@@ -1985,10 +1992,13 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
 
 	rule->handle = handle;
 	rule->dlen   = size;
-	rule->ulen   = ulen;
+	rule->udata  = ulen ? 1 : 0;
 
-	if (ulen)
-		nla_memcpy(nft_userdata(rule), nla[NFTA_RULE_USERDATA], ulen);
+	if (ulen) {
+		udata = nft_userdata(rule);
+		udata->len = ulen - 1;
+		nla_memcpy(udata->data, nla[NFTA_RULE_USERDATA], ulen);
+	}
 
 	expr = nft_expr_first(rule);
 	for (i = 0; i < n; i++) {
-- 
cgit v1.2.3


From 59900e0a019e7c2bdb7809a03ed5742d311b15b3 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 4 Mar 2015 17:55:27 +0100
Subject: netfilter: nf_tables: fix error handling of rule replacement

In general, if a transaction object is added to the list successfully,
we can rely on the abort path to undo what we've done. This allows us to
simplify the error handling of the rule replacement path in
nf_tables_newrule().

This implicitly fixes an unnecessary removal of the old rule, which
needs to be left in place if we fail to replace.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 74e4b876c96e..6ab777912237 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2045,12 +2045,6 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
 
 err3:
 	list_del_rcu(&rule->list);
-	if (trans) {
-		list_del_rcu(&nft_trans_rule(trans)->list);
-		nft_rule_clear(net, nft_trans_rule(trans));
-		nft_trans_destroy(trans);
-		chain->use++;
-	}
 err2:
 	nf_tables_rule_destroy(&ctx, rule);
 err1:
-- 
cgit v1.2.3


From 9145736d4862145684009d6a72a6e61324a9439e Mon Sep 17 00:00:00 2001
From: Lorenzo Colitti <lorenzo@google.com>
Date: Tue, 3 Mar 2015 23:16:16 +0900
Subject: net: ping: Return EAFNOSUPPORT when appropriate.

1. For an IPv4 ping socket, ping_check_bind_addr does not check
   the family of the socket address that's passed in. Instead,
   make it behave like inet_bind, which enforces either that the
   address family is AF_INET, or that the family is AF_UNSPEC and
   the address is 0.0.0.0.
2. For an IPv6 ping socket, ping_check_bind_addr returns EINVAL
   if the socket family is not AF_INET6. Return EAFNOSUPPORT
   instead, for consistency with inet6_bind.
3. Make ping_v4_sendmsg and ping_v6_sendmsg return EAFNOSUPPORT
   instead of EINVAL if an incorrect socket address structure is
   passed in.
4. Make IPv6 ping sockets be IPv6-only. The code does not support
   IPv4, and it cannot easily be made to support IPv4 because
   the protocol numbers for ICMP and ICMPv6 are different. This
   makes connect(::ffff:192.0.2.1) fail with EAFNOSUPPORT instead
   of making the socket unusable.

Among other things, this fixes an oops that can be triggered by:

    int s = socket(AF_INET, SOCK_DGRAM, IPPROTO_ICMP);
    struct sockaddr_in6 sin6 = {
        .sin6_family = AF_INET6,
        .sin6_addr = in6addr_any,
    };
    bind(s, (struct sockaddr *) &sin6, sizeof(sin6));

Change-Id: If06ca86d9f1e4593c0d6df174caca3487c57a241
Signed-off-by: Lorenzo Colitti <lorenzo@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ping.c | 12 ++++++++++--
 net/ipv6/ping.c |  5 +++--
 2 files changed, 13 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index e9f66e1cda50..208d5439e59b 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -259,6 +259,9 @@ int ping_init_sock(struct sock *sk)
 	kgid_t low, high;
 	int ret = 0;
 
+	if (sk->sk_family == AF_INET6)
+		sk->sk_ipv6only = 1;
+
 	inet_get_ping_group_range_net(net, &low, &high);
 	if (gid_lte(low, group) && gid_lte(group, high))
 		return 0;
@@ -305,6 +308,11 @@ static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk,
 		if (addr_len < sizeof(*addr))
 			return -EINVAL;
 
+		if (addr->sin_family != AF_INET &&
+		    !(addr->sin_family == AF_UNSPEC &&
+		      addr->sin_addr.s_addr == htonl(INADDR_ANY)))
+			return -EAFNOSUPPORT;
+
 		pr_debug("ping_check_bind_addr(sk=%p,addr=%pI4,port=%d)\n",
 			 sk, &addr->sin_addr.s_addr, ntohs(addr->sin_port));
 
@@ -330,7 +338,7 @@ static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk,
 			return -EINVAL;
 
 		if (addr->sin6_family != AF_INET6)
-			return -EINVAL;
+			return -EAFNOSUPPORT;
 
 		pr_debug("ping_check_bind_addr(sk=%p,addr=%pI6c,port=%d)\n",
 			 sk, addr->sin6_addr.s6_addr, ntohs(addr->sin6_port));
@@ -716,7 +724,7 @@ static int ping_v4_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m
 		if (msg->msg_namelen < sizeof(*usin))
 			return -EINVAL;
 		if (usin->sin_family != AF_INET)
-			return -EINVAL;
+			return -EAFNOSUPPORT;
 		daddr = usin->sin_addr.s_addr;
 		/* no remote port */
 	} else {
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index bd46f736f61d..a2dfff6ff227 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -102,9 +102,10 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 
 	if (msg->msg_name) {
 		DECLARE_SOCKADDR(struct sockaddr_in6 *, u, msg->msg_name);
-		if (msg->msg_namelen < sizeof(struct sockaddr_in6) ||
-		    u->sin6_family != AF_INET6) {
+		if (msg->msg_namelen < sizeof(*u))
 			return -EINVAL;
+		if (u->sin6_family != AF_INET6) {
+			return -EAFNOSUPPORT;
 		}
 		if (sk->sk_bound_dev_if &&
 		    sk->sk_bound_dev_if != u->sin6_scope_id) {
-- 
cgit v1.2.3


From 3e32e733d1bbb3f227259dc782ef01d5706bdae0 Mon Sep 17 00:00:00 2001
From: Alexander Drozdov <al.drozdov@gmail.com>
Date: Thu, 5 Mar 2015 10:29:39 +0300
Subject: ipv4: ip_check_defrag should not assume that skb_network_offset is
 zero

ip_check_defrag() may be used by af_packet to defragment outgoing packets.
skb_network_offset() of af_packet's outgoing packets is not zero.

Signed-off-by: Alexander Drozdov <al.drozdov@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_fragment.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 2c8d98e728c0..145a50c4d566 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -659,27 +659,30 @@ EXPORT_SYMBOL(ip_defrag);
 struct sk_buff *ip_check_defrag(struct sk_buff *skb, u32 user)
 {
 	struct iphdr iph;
+	int netoff;
 	u32 len;
 
 	if (skb->protocol != htons(ETH_P_IP))
 		return skb;
 
-	if (skb_copy_bits(skb, 0, &iph, sizeof(iph)) < 0)
+	netoff = skb_network_offset(skb);
+
+	if (skb_copy_bits(skb, netoff, &iph, sizeof(iph)) < 0)
 		return skb;
 
 	if (iph.ihl < 5 || iph.version != 4)
 		return skb;
 
 	len = ntohs(iph.tot_len);
-	if (skb->len < len || len < (iph.ihl * 4))
+	if (skb->len < netoff + len || len < (iph.ihl * 4))
 		return skb;
 
 	if (ip_is_fragment(&iph)) {
 		skb = skb_share_check(skb, GFP_ATOMIC);
 		if (skb) {
-			if (!pskb_may_pull(skb, iph.ihl*4))
+			if (!pskb_may_pull(skb, netoff + iph.ihl * 4))
 				return skb;
-			if (pskb_trim_rcsum(skb, len))
+			if (pskb_trim_rcsum(skb, netoff + len))
 				return skb;
 			memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
 			if (ip_defrag(skb, user))
-- 
cgit v1.2.3


From 6c09fa09d468d730eecd7122122175da772d3b09 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 5 Mar 2015 08:03:06 -0800
Subject: tcp: align tcp_xmit_size_goal() on tcp_tso_autosize()

With some mss values, it is possible tcp_xmit_size_goal() puts
one segment more in TSO packet than tcp_tso_autosize().

We send then one TSO packet followed by one single MSS.

It is not a serious bug, but we can do slightly better, especially
for drivers using netif_set_gso_max_size() to lower gso_max_size.

Using same formula avoids these corner cases and makes
tcp_xmit_size_goal() a bit faster.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Fixes: 605ad7f184b6 ("tcp: refine TSO autosizing")
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 9d72a0fcd928..995a2259bcfc 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -835,17 +835,13 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
 				       int large_allowed)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	u32 new_size_goal, size_goal, hlen;
+	u32 new_size_goal, size_goal;
 
 	if (!large_allowed || !sk_can_gso(sk))
 		return mss_now;
 
-	/* Maybe we should/could use sk->sk_prot->max_header here ? */
-	hlen = inet_csk(sk)->icsk_af_ops->net_header_len +
-	       inet_csk(sk)->icsk_ext_hdr_len +
-	       tp->tcp_header_len;
-
-	new_size_goal = sk->sk_gso_max_size - 1 - hlen;
+	/* Note : tcp_tso_autosize() will eventually split this later */
+	new_size_goal = sk->sk_gso_max_size - 1 - MAX_TCP_HEADER;
 	new_size_goal = tcp_bound_to_half_wnd(tp, new_size_goal);
 
 	/* We try hard to avoid divides here */
-- 
cgit v1.2.3


From c247f0534cc5a5a547a343903f42295a471844e2 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Sat, 7 Mar 2015 20:33:22 -0500
Subject: ip: fix error queue empty skb handling

When reading from the error queue, msg_name and msg_control are only
populated for some errors. A new exception for empty timestamp skbs
added a false positive on icmp errors without payload.

`traceroute -M udpconn` only displayed gateways that return payload
with the icmp error: the embedded network headers are pulled before
sock_queue_err_skb, leaving an skb with skb->len == 0 otherwise.

Fix this regression by refining when msg_name and msg_control
branches are taken. The solutions for the two fields are independent.

msg_name only makes sense for errors that configure serr->port and
serr->addr_offset. Test the first instead of skb->len. This also fixes
another issue. saddr could hold the wrong data, as serr->addr_offset
is not initialized  in some code paths, pointing to the start of the
network header. It is only valid when serr->port is set (non-zero).

msg_control support differs between IPv4 and IPv6. IPv4 only honors
requests for ICMP and timestamps with SOF_TIMESTAMPING_OPT_CMSG. The
skb->len test can simply be removed, because skb->dev is also tested
and never true for empty skbs. IPv6 honors requests for all errors
aside from local errors and timestamps on empty skbs.

In both cases, make the policy more explicit by moving this logic to
a new function that decides whether to process msg_control and that
optionally prepares the necessary fields in skb->cb[]. After this
change, the IPv4 and IPv6 paths are more similar.

The last case is rxrpc. Here, simply refine to only match timestamps.

Fixes: 49ca0d8bfaf3 ("net-timestamp: no-payload option")

Reported-by: Jan Niehusmann <jan@gondor.com>
Signed-off-by: Willem de Bruijn <willemb@google.com>

----

Changes
  v1->v2
  - fix local origin test inversion in ip6_datagram_support_cmsg
  - make v4 and v6 code paths more similar by introducing analogous
    ipv4_datagram_support_cmsg
  - fix compile bug in rxrpc
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_sockglue.c | 33 +++++++++++++++++++++++----------
 net/ipv6/datagram.c    | 39 ++++++++++++++++++++++++++++-----------
 net/rxrpc/ar-error.c   |  4 ++--
 3 files changed, 53 insertions(+), 23 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 31d8c71986b4..5cd99271d3a6 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -432,17 +432,32 @@ void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 inf
 		kfree_skb(skb);
 }
 
-static bool ipv4_pktinfo_prepare_errqueue(const struct sock *sk,
-					  const struct sk_buff *skb,
-					  int ee_origin)
+/* IPv4 supports cmsg on all imcp errors and some timestamps
+ *
+ * Timestamp code paths do not initialize the fields expected by cmsg:
+ * the PKTINFO fields in skb->cb[]. Fill those in here.
+ */
+static bool ipv4_datagram_support_cmsg(const struct sock *sk,
+				       struct sk_buff *skb,
+				       int ee_origin)
 {
-	struct in_pktinfo *info = PKTINFO_SKB_CB(skb);
+	struct in_pktinfo *info;
+
+	if (ee_origin == SO_EE_ORIGIN_ICMP)
+		return true;
 
-	if ((ee_origin != SO_EE_ORIGIN_TIMESTAMPING) ||
-	    (!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_CMSG)) ||
+	if (ee_origin == SO_EE_ORIGIN_LOCAL)
+		return false;
+
+	/* Support IP_PKTINFO on tstamp packets if requested, to correlate
+	 * timestamp with egress dev. Not possible for packets without dev
+	 * or without payload (SOF_TIMESTAMPING_OPT_TSONLY).
+	 */
+	if ((!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_CMSG)) ||
 	    (!skb->dev))
 		return false;
 
+	info = PKTINFO_SKB_CB(skb);
 	info->ipi_spec_dst.s_addr = ip_hdr(skb)->saddr;
 	info->ipi_ifindex = skb->dev->ifindex;
 	return true;
@@ -483,7 +498,7 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
 
 	serr = SKB_EXT_ERR(skb);
 
-	if (sin && skb->len) {
+	if (sin && serr->port) {
 		sin->sin_family = AF_INET;
 		sin->sin_addr.s_addr = *(__be32 *)(skb_network_header(skb) +
 						   serr->addr_offset);
@@ -496,9 +511,7 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
 	sin = &errhdr.offender;
 	memset(sin, 0, sizeof(*sin));
 
-	if (skb->len &&
-	    (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP ||
-	     ipv4_pktinfo_prepare_errqueue(sk, skb, serr->ee.ee_origin))) {
+	if (ipv4_datagram_support_cmsg(sk, skb, serr->ee.ee_origin)) {
 		sin->sin_family = AF_INET;
 		sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
 		if (inet_sk(sk)->cmsg_flags)
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index c215be70cac0..ace8daca5c83 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -325,14 +325,34 @@ void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu)
 	kfree_skb(skb);
 }
 
-static void ip6_datagram_prepare_pktinfo_errqueue(struct sk_buff *skb)
+/* IPv6 supports cmsg on all origins aside from SO_EE_ORIGIN_LOCAL.
+ *
+ * At one point, excluding local errors was a quick test to identify icmp/icmp6
+ * errors. This is no longer true, but the test remained, so the v6 stack,
+ * unlike v4, also honors cmsg requests on all wifi and timestamp errors.
+ *
+ * Timestamp code paths do not initialize the fields expected by cmsg:
+ * the PKTINFO fields in skb->cb[]. Fill those in here.
+ */
+static bool ip6_datagram_support_cmsg(struct sk_buff *skb,
+				      struct sock_exterr_skb *serr)
 {
-	int ifindex = skb->dev ? skb->dev->ifindex : -1;
+	if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP ||
+	    serr->ee.ee_origin == SO_EE_ORIGIN_ICMP6)
+		return true;
+
+	if (serr->ee.ee_origin == SO_EE_ORIGIN_LOCAL)
+		return false;
+
+	if (!skb->dev)
+		return false;
 
 	if (skb->protocol == htons(ETH_P_IPV6))
-		IP6CB(skb)->iif = ifindex;
+		IP6CB(skb)->iif = skb->dev->ifindex;
 	else
-		PKTINFO_SKB_CB(skb)->ipi_ifindex = ifindex;
+		PKTINFO_SKB_CB(skb)->ipi_ifindex = skb->dev->ifindex;
+
+	return true;
 }
 
 /*
@@ -369,7 +389,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
 
 	serr = SKB_EXT_ERR(skb);
 
-	if (sin && skb->len) {
+	if (sin && serr->port) {
 		const unsigned char *nh = skb_network_header(skb);
 		sin->sin6_family = AF_INET6;
 		sin->sin6_flowinfo = 0;
@@ -394,14 +414,11 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
 	memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err));
 	sin = &errhdr.offender;
 	memset(sin, 0, sizeof(*sin));
-	if (serr->ee.ee_origin != SO_EE_ORIGIN_LOCAL && skb->len) {
+
+	if (ip6_datagram_support_cmsg(skb, serr)) {
 		sin->sin6_family = AF_INET6;
-		if (np->rxopt.all) {
-			if (serr->ee.ee_origin != SO_EE_ORIGIN_ICMP &&
-			    serr->ee.ee_origin != SO_EE_ORIGIN_ICMP6)
-				ip6_datagram_prepare_pktinfo_errqueue(skb);
+		if (np->rxopt.all)
 			ip6_datagram_recv_common_ctl(sk, msg, skb);
-		}
 		if (skb->protocol == htons(ETH_P_IPV6)) {
 			sin->sin6_addr = ipv6_hdr(skb)->saddr;
 			if (np->rxopt.all)
diff --git a/net/rxrpc/ar-error.c b/net/rxrpc/ar-error.c
index 5394b6be46ec..0610efa83d72 100644
--- a/net/rxrpc/ar-error.c
+++ b/net/rxrpc/ar-error.c
@@ -42,7 +42,8 @@ void rxrpc_UDP_error_report(struct sock *sk)
 		_leave("UDP socket errqueue empty");
 		return;
 	}
-	if (!skb->len) {
+	serr = SKB_EXT_ERR(skb);
+	if (!skb->len && serr->ee.ee_origin == SO_EE_ORIGIN_TIMESTAMPING) {
 		_leave("UDP empty message");
 		kfree_skb(skb);
 		return;
@@ -50,7 +51,6 @@ void rxrpc_UDP_error_report(struct sock *sk)
 
 	rxrpc_new_skb(skb);
 
-	serr = SKB_EXT_ERR(skb);
 	addr = *(__be32 *)(skb_network_header(skb) + serr->addr_offset);
 	port = serr->port;
 
-- 
cgit v1.2.3


From 969439016d2cf61fef53a973d7e6d2061c3793b1 Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <socketcan@hartkopp.net>
Date: Mon, 23 Feb 2015 20:37:54 +0100
Subject: can: add missing initialisations in CAN related skbuffs

When accessing CAN network interfaces with AF_PACKET sockets e.g. by dhclient
this can lead to a skb_under_panic due to missing skb initialisations.

Add the missing initialisations at the CAN skbuff creation times on driver
level (rx path) and in the network layer (tx path).

Reported-by: Austin Schuh <austin@peloton-tech.com>
Reported-by: Daniel Steer <daniel.steer@mclaren.com>
Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
Cc: linux-stable <stable@vger.kernel.org>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/dev.c | 8 ++++++++
 net/can/af_can.c      | 3 +++
 2 files changed, 11 insertions(+)

(limited to 'net')

diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c
index 3c82e02e3dae..b0f69248cb71 100644
--- a/drivers/net/can/dev.c
+++ b/drivers/net/can/dev.c
@@ -579,6 +579,10 @@ struct sk_buff *alloc_can_skb(struct net_device *dev, struct can_frame **cf)
 	skb->pkt_type = PACKET_BROADCAST;
 	skb->ip_summed = CHECKSUM_UNNECESSARY;
 
+	skb_reset_mac_header(skb);
+	skb_reset_network_header(skb);
+	skb_reset_transport_header(skb);
+
 	can_skb_reserve(skb);
 	can_skb_prv(skb)->ifindex = dev->ifindex;
 
@@ -603,6 +607,10 @@ struct sk_buff *alloc_canfd_skb(struct net_device *dev,
 	skb->pkt_type = PACKET_BROADCAST;
 	skb->ip_summed = CHECKSUM_UNNECESSARY;
 
+	skb_reset_mac_header(skb);
+	skb_reset_network_header(skb);
+	skb_reset_transport_header(skb);
+
 	can_skb_reserve(skb);
 	can_skb_prv(skb)->ifindex = dev->ifindex;
 
diff --git a/net/can/af_can.c b/net/can/af_can.c
index 66e08040ced7..32d710eaf1fc 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -259,6 +259,9 @@ int can_send(struct sk_buff *skb, int loop)
 		goto inval_skb;
 	}
 
+	skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+	skb_reset_mac_header(skb);
 	skb_reset_network_header(skb);
 	skb_reset_transport_header(skb);
 
-- 
cgit v1.2.3


From 82f17091e68254d1612b42cf23291cad63cfaf04 Mon Sep 17 00:00:00 2001
From: Francesco Ruggeri <fruggeri@arista.com>
Date: Mon, 9 Mar 2015 11:51:04 -0700
Subject: net: delete stale packet_mclist entries

When an interface is deleted from a net namespace the ifindex in the
corresponding entries in PF_PACKET sockets' mclists becomes stale.
This can create inconsistencies if later an interface with the same ifindex
is moved from a different namespace (not that unlikely since ifindexes are
per-namespace).
In particular we saw problems with dev->promiscuity, resulting
in "promiscuity touches roof, set promiscuity failed. promiscuity
feature of device might be broken" warnings and EOVERFLOW failures of
setsockopt(PACKET_ADD_MEMBERSHIP).
This patch deletes the mclist entries for interfaces that are deleted.
Since this now causes setsockopt(PACKET_DROP_MEMBERSHIP) to fail with
EADDRNOTAVAIL if called after the interface is deleted, also make
packet_mc_drop not fail.

Signed-off-by: Francesco Ruggeri <fruggeri@arista.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 5bf1e968a728..f8db7064d81c 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -3123,11 +3123,18 @@ static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i,
 	return 0;
 }
 
-static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, int what)
+static void packet_dev_mclist_delete(struct net_device *dev,
+				     struct packet_mclist **mlp)
 {
-	for ( ; i; i = i->next) {
-		if (i->ifindex == dev->ifindex)
-			packet_dev_mc(dev, i, what);
+	struct packet_mclist *ml;
+
+	while ((ml = *mlp) != NULL) {
+		if (ml->ifindex == dev->ifindex) {
+			packet_dev_mc(dev, ml, -1);
+			*mlp = ml->next;
+			kfree(ml);
+		} else
+			mlp = &ml->next;
 	}
 }
 
@@ -3204,12 +3211,11 @@ static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq)
 					packet_dev_mc(dev, ml, -1);
 				kfree(ml);
 			}
-			rtnl_unlock();
-			return 0;
+			break;
 		}
 	}
 	rtnl_unlock();
-	return -EADDRNOTAVAIL;
+	return 0;
 }
 
 static void packet_flush_mclist(struct sock *sk)
@@ -3559,7 +3565,7 @@ static int packet_notifier(struct notifier_block *this,
 		switch (msg) {
 		case NETDEV_UNREGISTER:
 			if (po->mclist)
-				packet_dev_mclist(dev, po->mclist, -1);
+				packet_dev_mclist_delete(dev, &po->mclist);
 			/* fallthrough */
 
 		case NETDEV_DOWN:
-- 
cgit v1.2.3


From e6441bae326271090755e1707196ad05aa1dc703 Mon Sep 17 00:00:00 2001
From: Jon Paul Maloy <jon.maloy@ericsson.com>
Date: Mon, 9 Mar 2015 16:16:22 -0400
Subject: tipc: fix bug in link failover handling

In commit c637c1035534867b85b78b453c38c495b58e2c5a
("tipc: resolve race problem at unicast message reception") we
introduced a new mechanism for delivering buffers upwards from link
to socket layer.

That code contains a bug in how we handle the new link input queue
during failover. When a link is reset, some of its users may be blocked
because of congestion, and in order to resolve this, we add any pending
wakeup pseudo messages to the link's input queue, and deliver them to
the socket. This misses the case where the other, remaining link also
may have congested users. Currently, the owner node's reference to the
remaining link's input queue is unconditionally overwritten by the
reset link's input queue. This has the effect that wakeup events from
the remaining link may be unduely delayed (but not lost) for a
potentially long period.

We fix this by adding the pending events from the reset link to the
input queue that is currently referenced by the node, whichever one
it is.

This commit should be applied to both net and net-next.

Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/link.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/tipc/link.c b/net/tipc/link.c
index a4cf364316de..14f09b3cb87c 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -464,10 +464,11 @@ void tipc_link_reset(struct tipc_link *l_ptr)
 	/* Clean up all queues, except inputq: */
 	__skb_queue_purge(&l_ptr->outqueue);
 	__skb_queue_purge(&l_ptr->deferred_queue);
-	skb_queue_splice_init(&l_ptr->wakeupq, &l_ptr->inputq);
-	if (!skb_queue_empty(&l_ptr->inputq))
+	if (!owner->inputq)
+		owner->inputq = &l_ptr->inputq;
+	skb_queue_splice_init(&l_ptr->wakeupq, owner->inputq);
+	if (!skb_queue_empty(owner->inputq))
 		owner->action_flags |= TIPC_MSG_EVT;
-	owner->inputq = &l_ptr->inputq;
 	l_ptr->next_out = NULL;
 	l_ptr->unacked_window = 0;
 	l_ptr->checkpoint = 1;
-- 
cgit v1.2.3


From 5778d39d070b4ac5f889928175b7f2d53ae7504e Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Mon, 9 Mar 2015 17:03:40 -0700
Subject: net_sched: fix struct tc_u_hnode layout in u32

We dynamically allocate divisor+1 entries for ->ht[] in tc_u_hnode:

  ht = kzalloc(sizeof(*ht) + divisor*sizeof(void *), GFP_KERNEL);

So ->ht is supposed to be the last field of this struct, however
this is broken, since an rcu head is appended after it.

Fixes: 1ce87720d456 ("net: sched: make cls_u32 lockless")
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_u32.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 09487afbfd51..95fdf4e40051 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -78,8 +78,11 @@ struct tc_u_hnode {
 	struct tc_u_common	*tp_c;
 	int			refcnt;
 	unsigned int		divisor;
-	struct tc_u_knode __rcu	*ht[1];
 	struct rcu_head		rcu;
+	/* The 'ht' field MUST be the last field in structure to allow for
+	 * more entries allocated at end of structure.
+	 */
+	struct tc_u_knode __rcu	*ht[1];
 };
 
 struct tc_u_common {
-- 
cgit v1.2.3


From 7768eed8bf1d2e5eefa38c573f15f737a9824052 Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <socketcan@hartkopp.net>
Date: Tue, 10 Mar 2015 19:03:46 +0100
Subject: net: add comment for sock_efree() usage

Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
Acked-by: Alexander Duyck <alexander.h.duyck@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sock.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/core/sock.c b/net/core/sock.c
index 93c8b20c91e4..78e89eb7eb70 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1655,6 +1655,10 @@ void sock_rfree(struct sk_buff *skb)
 }
 EXPORT_SYMBOL(sock_rfree);
 
+/*
+ * Buffer destructor for skbs that are not used directly in read or write
+ * path, e.g. for error handler skbs. Automatically called from kfree_skb.
+ */
 void sock_efree(struct sk_buff *skb)
 {
 	sock_put(skb->sk);
-- 
cgit v1.2.3


From 4363890079674db7b00cf1bb0e6fa430e846e86b Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 10 Mar 2015 21:58:32 -0400
Subject: net: Handle unregister properly when netdev namespace change fails.

If rtnl_newlink() fails on it's call to dev_change_net_namespace(), we
have to make use of the ->dellink() method, if present, just like we
do when rtnl_configure_link() fails.

Fixes: 317f4810e45e ("rtnl: allow to create device with IFLA_LINK_NETNSID set")
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 25b4b5d23485..ee0608bb3bc0 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2166,28 +2166,28 @@ replay:
 			}
 		}
 		err = rtnl_configure_link(dev, ifm);
-		if (err < 0) {
-			if (ops->newlink) {
-				LIST_HEAD(list_kill);
-
-				ops->dellink(dev, &list_kill);
-				unregister_netdevice_many(&list_kill);
-			} else {
-				unregister_netdevice(dev);
-			}
-			goto out;
-		}
-
+		if (err < 0)
+			goto out_unregister;
 		if (link_net) {
 			err = dev_change_net_namespace(dev, dest_net, ifname);
 			if (err < 0)
-				unregister_netdevice(dev);
+				goto out_unregister;
 		}
 out:
 		if (link_net)
 			put_net(link_net);
 		put_net(dest_net);
 		return err;
+out_unregister:
+		if (ops->newlink) {
+			LIST_HEAD(list_kill);
+
+			ops->dellink(dev, &list_kill);
+			unregister_netdevice_many(&list_kill);
+		} else {
+			unregister_netdevice(dev);
+		}
+		goto out;
 	}
 }
 
-- 
cgit v1.2.3


From 9949afa42be0b76f5832db112ce51bb6b35b2abb Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Tue, 10 Mar 2015 17:17:03 -0400
Subject: tcp: fix tcp_cong_avoid_ai() credit accumulation bug with decreases
 in w

The recent change to tcp_cong_avoid_ai() to handle stretch ACKs
introduced a bug where snd_cwnd_cnt could accumulate a very large
value while w was large, and then if w was reduced snd_cwnd could be
incremented by a large delta, leading to a large burst and high packet
loss. This was tickled when CUBIC's bictcp_update() sets "ca->cnt =
100 * cwnd".

This bug crept in while preparing the upstream version of
814d488c6126.

Testing: This patch has been tested in datacenter netperf transfers
and live youtube.com and google.com servers.

Fixes: 814d488c6126 ("tcp: fix the timid additive increase on stretch ACKs")
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_cong.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index d694088214cd..62856e185a93 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -378,6 +378,12 @@ EXPORT_SYMBOL_GPL(tcp_slow_start);
  */
 void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked)
 {
+	/* If credits accumulated at a higher w, apply them gently now. */
+	if (tp->snd_cwnd_cnt >= w) {
+		tp->snd_cwnd_cnt = 0;
+		tp->snd_cwnd++;
+	}
+
 	tp->snd_cwnd_cnt += acked;
 	if (tp->snd_cwnd_cnt >= w) {
 		u32 delta = tp->snd_cwnd_cnt / w;
-- 
cgit v1.2.3


From d578e18ce93f5d33a7120fd57c453e22a4c0fc37 Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Tue, 10 Mar 2015 17:17:04 -0400
Subject: tcp: restore 1.5x per RTT limit to CUBIC cwnd growth in congestion
 avoidance

Commit 814d488c6126 ("tcp: fix the timid additive increase on stretch
ACKs") fixed a bug where tcp_cong_avoid_ai() would either credit a
connection with an increase of snd_cwnd_cnt, or increase snd_cwnd, but
not both, resulting in cwnd increasing by 1 packet on at most every
alternate invocation of tcp_cong_avoid_ai().

Although the commit correctly implemented the CUBIC algorithm, which
can increase cwnd by as much as 1 packet per 1 packet ACKed (2x per
RTT), in practice that could be too aggressive: in tests on network
paths with small buffers, YouTube server retransmission rates nearly
doubled.

This commit restores CUBIC to a maximum cwnd growth rate of 1 packet
per 2 packets ACKed (1.5x per RTT). In YouTube tests this restored
retransmit rates to low levels.

Testing: This patch has been tested in datacenter netperf transfers
and live youtube.com and google.com servers.

Fixes: 9cd981dcf174 ("tcp: fix stretch ACK bugs in CUBIC")
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_cubic.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 4b276d1ed980..06d3d665a9fd 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -306,8 +306,10 @@ tcp_friendliness:
 		}
 	}
 
-	if (ca->cnt == 0)			/* cannot be zero */
-		ca->cnt = 1;
+	/* The maximum rate of cwnd increase CUBIC allows is 1 packet per
+	 * 2 packets ACKed, meaning cwnd grows at 1.5x per RTT.
+	 */
+	ca->cnt = max(ca->cnt, 2U);
 }
 
 static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
-- 
cgit v1.2.3


From b1cb59cf2efe7971d3d72a7b963d09a512d994c9 Mon Sep 17 00:00:00 2001
From: Alexey Kodanev <alexey.kodanev@oracle.com>
Date: Wed, 11 Mar 2015 14:29:17 +0300
Subject: net: sysctl_net_core: check SNDBUF and RCVBUF for min length

sysctl has sysctl.net.core.rmem_*/wmem_* parameters which can be
set to incorrect values. Given that 'struct sk_buff' allocates from
rcvbuf, incorrectly set buffer length could result to memory
allocation failures. For example, set them as follows:

    # sysctl net.core.rmem_default=64
      net.core.wmem_default = 64
    # sysctl net.core.wmem_default=64
      net.core.wmem_default = 64
    # ping localhost -s 1024 -i 0 > /dev/null

This could result to the following failure:

skbuff: skb_over_panic: text:ffffffff81628db4 len:-32 put:-32
head:ffff88003a1cc200 data:ffff88003a1cc200 tail:0xffffffe0 end:0xc0 dev:<NULL>
kernel BUG at net/core/skbuff.c:102!
invalid opcode: 0000 [#1] SMP
...
task: ffff88003b7f5550 ti: ffff88003ae88000 task.ti: ffff88003ae88000
RIP: 0010:[<ffffffff8155fbd1>]  [<ffffffff8155fbd1>] skb_put+0xa1/0xb0
RSP: 0018:ffff88003ae8bc68  EFLAGS: 00010296
RAX: 000000000000008d RBX: 00000000ffffffe0 RCX: 0000000000000000
RDX: ffff88003fdcf598 RSI: ffff88003fdcd9c8 RDI: ffff88003fdcd9c8
RBP: ffff88003ae8bc88 R08: 0000000000000001 R09: 0000000000000000
R10: 0000000000000001 R11: 00000000000002b2 R12: 0000000000000000
R13: 0000000000000000 R14: ffff88003d3f7300 R15: ffff88000012a900
FS:  00007fa0e2b4a840(0000) GS:ffff88003fc00000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000000d0f7e0 CR3: 000000003b8fb000 CR4: 00000000000006f0
Stack:
 ffff88003a1cc200 00000000ffffffe0 00000000000000c0 ffffffff818cab1d
 ffff88003ae8bd68 ffffffff81628db4 ffff88003ae8bd48 ffff88003b7f5550
 ffff880031a09408 ffff88003b7f5550 ffff88000012aa48 ffff88000012ab00
Call Trace:
 [<ffffffff81628db4>] unix_stream_sendmsg+0x2c4/0x470
 [<ffffffff81556f56>] sock_write_iter+0x146/0x160
 [<ffffffff811d9612>] new_sync_write+0x92/0xd0
 [<ffffffff811d9cd6>] vfs_write+0xd6/0x180
 [<ffffffff811da499>] SyS_write+0x59/0xd0
 [<ffffffff81651532>] system_call_fastpath+0x12/0x17
Code: 00 00 48 89 44 24 10 8b 87 c8 00 00 00 48 89 44 24 08 48 8b 87 d8 00
      00 00 48 c7 c7 30 db 91 81 48 89 04 24 31 c0 e8 4f a8 0e 00 <0f> 0b
      eb fe 66 66 2e 0f 1f 84 00 00 00 00 00 55 48 89 e5 48 83
RIP  [<ffffffff8155fbd1>] skb_put+0xa1/0xb0
RSP <ffff88003ae8bc68>
Kernel panic - not syncing: Fatal exception

Moreover, the possible minimum is 1, so we can get another kernel panic:
...
BUG: unable to handle kernel paging request at ffff88013caee5c0
IP: [<ffffffff815604cf>] __alloc_skb+0x12f/0x1f0
...

Signed-off-by: Alexey Kodanev <alexey.kodanev@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sysctl_net_core.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 433424804284..8ce351ffceb1 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -25,6 +25,8 @@
 static int zero = 0;
 static int one = 1;
 static int ushort_max = USHRT_MAX;
+static int min_sndbuf = SOCK_MIN_SNDBUF;
+static int min_rcvbuf = SOCK_MIN_RCVBUF;
 
 static int net_msg_warn;	/* Unused, but still a sysctl */
 
@@ -237,7 +239,7 @@ static struct ctl_table net_core_table[] = {
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &one,
+		.extra1		= &min_sndbuf,
 	},
 	{
 		.procname	= "rmem_max",
@@ -245,7 +247,7 @@ static struct ctl_table net_core_table[] = {
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &one,
+		.extra1		= &min_rcvbuf,
 	},
 	{
 		.procname	= "wmem_default",
@@ -253,7 +255,7 @@ static struct ctl_table net_core_table[] = {
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &one,
+		.extra1		= &min_sndbuf,
 	},
 	{
 		.procname	= "rmem_default",
@@ -261,7 +263,7 @@ static struct ctl_table net_core_table[] = {
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &one,
+		.extra1		= &min_rcvbuf,
 	},
 	{
 		.procname	= "dev_weight",
-- 
cgit v1.2.3


From c29390c6dfeee0944ac6b5610ebbe403944378fc Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 11 Mar 2015 18:42:02 -0700
Subject: xps: must clear sender_cpu before forwarding

John reported that my previous commit added a regression
on his router.

This is because sender_cpu & napi_id share a common location,
so get_xps_queue() can see garbage and perform an out of bound access.

We need to make sure sender_cpu is cleared before doing the transmit,
otherwise any NIC busy poll enabled (skb_mark_napi_id()) can trigger
this bug.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: John <jw@nuclearfallout.net>
Bisected-by: John <jw@nuclearfallout.net>
Fixes: 2bd82484bb4c ("xps: fix xps for stacked devices")
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 7 +++++++
 net/core/skbuff.c      | 2 +-
 net/ipv4/ip_forward.c  | 1 +
 net/ipv6/ip6_output.c  | 1 +
 4 files changed, 10 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 30007afe70b3..f54d6659713a 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -948,6 +948,13 @@ static inline void skb_copy_hash(struct sk_buff *to, const struct sk_buff *from)
 	to->l4_hash = from->l4_hash;
 };
 
+static inline void skb_sender_cpu_clear(struct sk_buff *skb)
+{
+#ifdef CONFIG_XPS
+	skb->sender_cpu = 0;
+#endif
+}
+
 #ifdef NET_SKBUFF_DATA_USES_OFFSET
 static inline unsigned char *skb_end_pointer(const struct sk_buff *skb)
 {
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index f80507823531..434e78e5254d 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4173,7 +4173,7 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet)
 	skb->ignore_df = 0;
 	skb_dst_drop(skb);
 	skb->mark = 0;
-	skb->sender_cpu = 0;
+	skb_sender_cpu_clear(skb);
 	skb_init_secmark(skb);
 	secpath_reset(skb);
 	nf_reset(skb);
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 787b3c294ce6..d9bc28ac5d1b 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -67,6 +67,7 @@ static int ip_forward_finish(struct sk_buff *skb)
 	if (unlikely(opt->optlen))
 		ip_forward_options(skb);
 
+	skb_sender_cpu_clear(skb);
 	return dst_output(skb);
 }
 
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 0a04a37305d5..7e80b61b51ff 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -318,6 +318,7 @@ static int ip6_forward_proxy_check(struct sk_buff *skb)
 
 static inline int ip6_forward_finish(struct sk_buff *skb)
 {
+	skb_sender_cpu_clear(skb);
 	return dst_output(skb);
 }
 
-- 
cgit v1.2.3


From 3a8dd9711e0792f64394edafadd66c2d1f1904df Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Wed, 11 Mar 2015 15:43:55 -0400
Subject: sock: fix possible NULL sk dereference in __skb_tstamp_tx

Test that sk != NULL before reading sk->sk_tsflags.

Fixes: 49ca0d8bfaf3 ("net-timestamp: no-payload option")
Reported-by: One Thousand Gnomes <gnomes@lxorguk.ukuu.org.uk>
Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 434e78e5254d..8e4ac97c8477 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3733,9 +3733,13 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
 		     struct sock *sk, int tstype)
 {
 	struct sk_buff *skb;
-	bool tsonly = sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TSONLY;
+	bool tsonly;
 
-	if (!sk || !skb_may_tx_timestamp(sk, tsonly))
+	if (!sk)
+		return;
+
+	tsonly = sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TSONLY;
+	if (!skb_may_tx_timestamp(sk, tsonly))
 		return;
 
 	if (tsonly)
-- 
cgit v1.2.3


From f862e07cf95d5b62a5fc5e981dd7d0dbaf33a501 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 11 Mar 2015 22:46:59 +0100
Subject: rds: avoid potential stack overflow

The rds_iw_update_cm_id function stores a large 'struct rds_sock' object
on the stack in order to pass a pair of addresses. This happens to just
fit withint the 1024 byte stack size warning limit on x86, but just
exceed that limit on ARM, which gives us this warning:

net/rds/iw_rdma.c:200:1: warning: the frame size of 1056 bytes is larger than 1024 bytes [-Wframe-larger-than=]

As the use of this large variable is basically bogus, we can rearrange
the code to not do that. Instead of passing an rds socket into
rds_iw_get_device, we now just pass the two addresses that we have
available in rds_iw_update_cm_id, and we change rds_iw_get_mr accordingly,
to create two address structures on the stack there.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/iw_rdma.c | 40 ++++++++++++++++++++++------------------
 1 file changed, 22 insertions(+), 18 deletions(-)

(limited to 'net')

diff --git a/net/rds/iw_rdma.c b/net/rds/iw_rdma.c
index a817705ce2d0..dba8d0864f18 100644
--- a/net/rds/iw_rdma.c
+++ b/net/rds/iw_rdma.c
@@ -88,7 +88,9 @@ static unsigned int rds_iw_unmap_fastreg_list(struct rds_iw_mr_pool *pool,
 			int *unpinned);
 static void rds_iw_destroy_fastreg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr);
 
-static int rds_iw_get_device(struct rds_sock *rs, struct rds_iw_device **rds_iwdev, struct rdma_cm_id **cm_id)
+static int rds_iw_get_device(struct sockaddr_in *src, struct sockaddr_in *dst,
+			     struct rds_iw_device **rds_iwdev,
+			     struct rdma_cm_id **cm_id)
 {
 	struct rds_iw_device *iwdev;
 	struct rds_iw_cm_id *i_cm_id;
@@ -112,15 +114,15 @@ static int rds_iw_get_device(struct rds_sock *rs, struct rds_iw_device **rds_iwd
 				src_addr->sin_port,
 				dst_addr->sin_addr.s_addr,
 				dst_addr->sin_port,
-				rs->rs_bound_addr,
-				rs->rs_bound_port,
-				rs->rs_conn_addr,
-				rs->rs_conn_port);
+				src->sin_addr.s_addr,
+				src->sin_port,
+				dst->sin_addr.s_addr,
+				dst->sin_port);
 #ifdef WORKING_TUPLE_DETECTION
-			if (src_addr->sin_addr.s_addr == rs->rs_bound_addr &&
-			    src_addr->sin_port == rs->rs_bound_port &&
-			    dst_addr->sin_addr.s_addr == rs->rs_conn_addr &&
-			    dst_addr->sin_port == rs->rs_conn_port) {
+			if (src_addr->sin_addr.s_addr == src->sin_addr.s_addr &&
+			    src_addr->sin_port == src->sin_port &&
+			    dst_addr->sin_addr.s_addr == dst->sin_addr.s_addr &&
+			    dst_addr->sin_port == dst->sin_port) {
 #else
 			/* FIXME - needs to compare the local and remote
 			 * ipaddr/port tuple, but the ipaddr is the only
@@ -128,7 +130,7 @@ static int rds_iw_get_device(struct rds_sock *rs, struct rds_iw_device **rds_iwd
 			 * zero'ed.  It doesn't appear to be properly populated
 			 * during connection setup...
 			 */
-			if (src_addr->sin_addr.s_addr == rs->rs_bound_addr) {
+			if (src_addr->sin_addr.s_addr == src->sin_addr.s_addr) {
 #endif
 				spin_unlock_irq(&iwdev->spinlock);
 				*rds_iwdev = iwdev;
@@ -180,19 +182,13 @@ int rds_iw_update_cm_id(struct rds_iw_device *rds_iwdev, struct rdma_cm_id *cm_i
 {
 	struct sockaddr_in *src_addr, *dst_addr;
 	struct rds_iw_device *rds_iwdev_old;
-	struct rds_sock rs;
 	struct rdma_cm_id *pcm_id;
 	int rc;
 
 	src_addr = (struct sockaddr_in *)&cm_id->route.addr.src_addr;
 	dst_addr = (struct sockaddr_in *)&cm_id->route.addr.dst_addr;
 
-	rs.rs_bound_addr = src_addr->sin_addr.s_addr;
-	rs.rs_bound_port = src_addr->sin_port;
-	rs.rs_conn_addr = dst_addr->sin_addr.s_addr;
-	rs.rs_conn_port = dst_addr->sin_port;
-
-	rc = rds_iw_get_device(&rs, &rds_iwdev_old, &pcm_id);
+	rc = rds_iw_get_device(src_addr, dst_addr, &rds_iwdev_old, &pcm_id);
 	if (rc)
 		rds_iw_remove_cm_id(rds_iwdev, cm_id);
 
@@ -598,9 +594,17 @@ void *rds_iw_get_mr(struct scatterlist *sg, unsigned long nents,
 	struct rds_iw_device *rds_iwdev;
 	struct rds_iw_mr *ibmr = NULL;
 	struct rdma_cm_id *cm_id;
+	struct sockaddr_in src = {
+		.sin_addr.s_addr = rs->rs_bound_addr,
+		.sin_port = rs->rs_bound_port,
+	};
+	struct sockaddr_in dst = {
+		.sin_addr.s_addr = rs->rs_conn_addr,
+		.sin_port = rs->rs_conn_port,
+	};
 	int ret;
 
-	ret = rds_iw_get_device(rs, &rds_iwdev, &cm_id);
+	ret = rds_iw_get_device(&src, &dst, &rds_iwdev, &cm_id);
 	if (ret || !cm_id) {
 		ret = -ENODEV;
 		goto out;
-- 
cgit v1.2.3


From 8051a2a518fcf3827a143470083ad6008697ff17 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Thu, 12 Mar 2015 11:53:41 +1030
Subject: 9p/trans_virtio: fix hot-unplug

On device hot-unplug, 9p/virtio currently will kfree channel while
it might still be in use.

Of course, it might stay used forever, so it's an extremely ugly hack,
but it seems better than use-after-free that we have now.

[ Unused variable removed, whitespace cleanup, msg single-lined --RR ]
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 net/9p/trans_virtio.c | 24 ++++++++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index d8e376a5f0f1..36a1a739ad68 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -658,14 +658,30 @@ p9_virtio_create(struct p9_client *client, const char *devname, char *args)
 static void p9_virtio_remove(struct virtio_device *vdev)
 {
 	struct virtio_chan *chan = vdev->priv;
-
-	if (chan->inuse)
-		p9_virtio_close(chan->client);
-	vdev->config->del_vqs(vdev);
+	unsigned long warning_time;
 
 	mutex_lock(&virtio_9p_lock);
+
+	/* Remove self from list so we don't get new users. */
 	list_del(&chan->chan_list);
+	warning_time = jiffies;
+
+	/* Wait for existing users to close. */
+	while (chan->inuse) {
+		mutex_unlock(&virtio_9p_lock);
+		msleep(250);
+		if (time_after(jiffies, warning_time + 10 * HZ)) {
+			dev_emerg(&vdev->dev,
+				  "p9_virtio_remove: waiting for device in use.\n");
+			warning_time = jiffies;
+		}
+		mutex_lock(&virtio_9p_lock);
+	}
+
 	mutex_unlock(&virtio_9p_lock);
+
+	vdev->config->del_vqs(vdev);
+
 	sysfs_remove_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr);
 	kobject_uevent(&(vdev->dev.kobj), KOBJ_CHANGE);
 	kfree(chan->tag);
-- 
cgit v1.2.3


From c8e2c80d7ec00d020320f905822bf49c5ad85250 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 13 Mar 2015 09:49:59 -0700
Subject: inet_diag: fix possible overflow in inet_diag_dump_one_icsk()

inet_diag_dump_one_icsk() allocates too small skb.

Add inet_sk_attr_size() helper right before inet_sk_diag_fill()
so that it can be updated if/when new attributes are added.

iproute2/ss currently does not use this dump_one() interface,
this might explain nobody noticed this problem yet.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inet_diag.c | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 81751f12645f..592aff37366b 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -71,6 +71,20 @@ static inline void inet_diag_unlock_handler(
 	mutex_unlock(&inet_diag_table_mutex);
 }
 
+static size_t inet_sk_attr_size(void)
+{
+	return	  nla_total_size(sizeof(struct tcp_info))
+		+ nla_total_size(1) /* INET_DIAG_SHUTDOWN */
+		+ nla_total_size(1) /* INET_DIAG_TOS */
+		+ nla_total_size(1) /* INET_DIAG_TCLASS */
+		+ nla_total_size(sizeof(struct inet_diag_meminfo))
+		+ nla_total_size(sizeof(struct inet_diag_msg))
+		+ nla_total_size(SK_MEMINFO_VARS * sizeof(u32))
+		+ nla_total_size(TCP_CA_NAME_MAX)
+		+ nla_total_size(sizeof(struct tcpvegas_info))
+		+ 64;
+}
+
 int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
 			      struct sk_buff *skb, struct inet_diag_req_v2 *req,
 			      struct user_namespace *user_ns,		      	
@@ -326,9 +340,7 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_s
 	if (err)
 		goto out;
 
-	rep = nlmsg_new(sizeof(struct inet_diag_msg) +
-			sizeof(struct inet_diag_meminfo) +
-			sizeof(struct tcp_info) + 64, GFP_KERNEL);
+	rep = nlmsg_new(inet_sk_attr_size(), GFP_KERNEL);
 	if (!rep) {
 		err = -ENOMEM;
 		goto out;
-- 
cgit v1.2.3


From 4c906c279886550d2aaac6facf71d709158e4e3c Mon Sep 17 00:00:00 2001
From: Venkat Venkatsubra <venkat.x.venkatsubra@oracle.com>
Date: Fri, 13 Mar 2015 07:08:22 -0700
Subject: bridge: reset bridge mtu after deleting an interface

On adding an interface br_add_if() sets the MTU to the min of
all the interfaces. Do the same thing on removing an interface too
in br_del_if.

Signed-off-by: Venkat Venkatsubra <venkat.x.venkatsubra@oracle.com>
Acked-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_if.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index b087d278c679..1849d96b3c91 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -563,6 +563,8 @@ int br_del_if(struct net_bridge *br, struct net_device *dev)
 	 */
 	del_nbp(p);
 
+	dev_set_mtu(br->dev, br_min_mtu(br));
+
 	spin_lock_bh(&br->lock);
 	changed_addr = br_stp_recalculate_bridge_id(br);
 	spin_unlock_bh(&br->lock);
-- 
cgit v1.2.3


From 3eeff778e00c956875c70b145c52638c313dfb23 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ZenIV.linux.org.uk>
Date: Sat, 14 Mar 2015 05:22:21 +0000
Subject: caif: fix MSG_OOB test in caif_seqpkt_recvmsg()

It should be checking flags, not msg->msg_flags.  It's ->sendmsg()
instances that need to look for that in ->msg_flags, ->recvmsg() ones
(including the other ->recvmsg() instance in that file, as well as
unix_dgram_recvmsg() this one claims to be imitating) check in flags.
Braino had been introduced in commit dcda13 ("caif: Bugfix - use MSG_TRUNC
in receive") back in 2010, so it goes quite a while back.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/caif_socket.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 769b185fefbd..a6e2da0bc718 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -281,7 +281,7 @@ static int caif_seqpkt_recvmsg(struct kiocb *iocb, struct socket *sock,
 	int copylen;
 
 	ret = -EOPNOTSUPP;
-	if (m->msg_flags&MSG_OOB)
+	if (flags & MSG_OOB)
 		goto read_error;
 
 	skb = skb_recv_datagram(sk, flags, 0 , &ret);
-- 
cgit v1.2.3


From 7d985ed1dca5c90535d67ce92ef6ca520302340a Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ZenIV.linux.org.uk>
Date: Sat, 14 Mar 2015 05:34:56 +0000
Subject: rxrpc: bogus MSG_PEEK test in rxrpc_recvmsg()

[I would really like an ACK on that one from dhowells; it appears to be
quite straightforward, but...]

MSG_PEEK isn't passed to ->recvmsg() via msg->msg_flags; as the matter of
fact, neither the kernel users of rxrpc, nor the syscalls ever set that bit
in there.  It gets passed via flags; in fact, another such check in the same
function is done correctly - as flags & MSG_PEEK.

It had been that way (effectively disabled) for 8 years, though, so the patch
needs beating up - that case had never been tested.  If it is correct, it's
-stable fodder.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rxrpc/ar-recvmsg.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/rxrpc/ar-recvmsg.c b/net/rxrpc/ar-recvmsg.c
index 4575485ad1b4..19a560626dc4 100644
--- a/net/rxrpc/ar-recvmsg.c
+++ b/net/rxrpc/ar-recvmsg.c
@@ -87,7 +87,7 @@ int rxrpc_recvmsg(struct kiocb *iocb, struct socket *sock,
 		if (!skb) {
 			/* nothing remains on the queue */
 			if (copied &&
-			    (msg->msg_flags & MSG_PEEK || timeo == 0))
+			    (flags & MSG_PEEK || timeo == 0))
 				goto out;
 
 			/* wait for a message to turn up */
-- 
cgit v1.2.3


From 0f611d28fc2e13cfec64e1c544c16a086886805a Mon Sep 17 00:00:00 2001
From: Andrei Otcheretianski <andrei.otcheretianski@intel.com>
Date: Thu, 12 Mar 2015 08:53:30 +0200
Subject: mac80211: count interfaces correctly for combination checks

Since moving the interface combination checks to mac80211, it's
broken because it now only considers interfaces with an assigned
channel context, so for example any interface that isn't active
can still be up, which is clearly an issue; also, in particular
P2P-Device wdevs are an issue since they never have a chanctx.

Fix this by counting running interfaces instead the ones with a
channel context assigned.

Cc: stable@vger.kernel.org [3.16+]
Fixes: 73de86a38962b ("cfg80211/mac80211: move interface counting for combination check to mac80211")
Signed-off-by: Andrei Otcheretianski <andrei.otcheretianski@intel.com>
Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
[rewrite commit message, dig out the commit it fixes]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/util.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 8428f4a95479..747bdcf72e92 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -3178,7 +3178,7 @@ int ieee80211_check_combinations(struct ieee80211_sub_if_data *sdata,
 		wdev_iter = &sdata_iter->wdev;
 
 		if (sdata_iter == sdata ||
-		    rcu_access_pointer(sdata_iter->vif.chanctx_conf) == NULL ||
+		    !ieee80211_sdata_running(sdata_iter) ||
 		    local->hw.wiphy->software_iftypes & BIT(wdev_iter->iftype))
 			continue;
 
-- 
cgit v1.2.3


From 70a3fd6c61c46c07c63cab935dca9a17d8de1709 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 12 Mar 2015 08:53:29 +0200
Subject: mac80211: ask for ECSA IE to be considered for beacon parse CRC

When a beacon from the AP contains only the ECSA IE, and not a CSA IE
as well, this ECSA IE is not considered for calculating the CRC and
the beacon might be dropped as not being interesting. This is clearly
wrong, it should be handled and the channel switch should be executed.

Fix this by including the ECSA IE ID in the bitmap of interesting IEs.

Reported-by: Gil Tribush <gil.tribush@intel.com>
Reviewed-by: Luciano Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/mlme.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 10ac6324c1d0..cde8cd3d6595 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -3204,7 +3204,8 @@ static const u64 care_about_ies =
 	(1ULL << WLAN_EID_CHANNEL_SWITCH) |
 	(1ULL << WLAN_EID_PWR_CONSTRAINT) |
 	(1ULL << WLAN_EID_HT_CAPABILITY) |
-	(1ULL << WLAN_EID_HT_OPERATION);
+	(1ULL << WLAN_EID_HT_OPERATION) |
+	(1ULL << WLAN_EID_EXT_CHANSWITCH_ANN);
 
 static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
 				     struct ieee80211_mgmt *mgmt, size_t len,
-- 
cgit v1.2.3


From 496fcc294daab18799e190c0264863d653588d1f Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 12 Mar 2015 08:53:27 +0200
Subject: nl80211: ignore HT/VHT capabilities without QoS/WMM

As HT/VHT depend heavily on QoS/WMM, it's not a good idea to
let userspace add clients that have HT/VHT but not QoS/WMM.
Since it does so in certain cases we've observed (client is
using HT IEs but not QoS/WMM) just ignore the HT/VHT info at
this point and don't pass it down to the drivers which might
unconditionally use it.

Cc: stable@vger.kernel.org
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/nl80211.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'net')

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index be2501538011..b6f84f6a2a09 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -4400,6 +4400,16 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
 	if (parse_station_flags(info, dev->ieee80211_ptr->iftype, &params))
 		return -EINVAL;
 
+	/* HT/VHT requires QoS, but if we don't have that just ignore HT/VHT
+	 * as userspace might just pass through the capabilities from the IEs
+	 * directly, rather than enforcing this restriction and returning an
+	 * error in this case.
+	 */
+	if (!(params.sta_flags_set & BIT(NL80211_STA_FLAG_WME))) {
+		params.ht_capa = NULL;
+		params.vht_capa = NULL;
+	}
+
 	/* When you run into this, adjust the code below for the new flag */
 	BUILD_BUG_ON(NL80211_STA_FLAG_MAX != 7);
 
-- 
cgit v1.2.3


From f84eaa1068315409ffbef57e6fea312180787db3 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 12 Mar 2015 08:53:26 +0200
Subject: mac80211: ignore CSA to same channel

If the AP is confused and starts doing a CSA to the same channel,
just ignore that request instead of trying to act it out since it
was likely sent in error anyway.

In the case of the bug I was investigating the GO was misbehaving
and sending out a beacon with CSA IEs still included after having
actually done the channel switch.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/ieee80211_i.h |  1 +
 net/mac80211/mlme.c        | 13 +++++++++++++
 2 files changed, 14 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index c0e089c194f1..8d53d65bd2ab 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -464,6 +464,7 @@ struct ieee80211_if_managed {
 	unsigned int flags;
 
 	bool csa_waiting_bcn;
+	bool csa_ignored_same_chan;
 
 	bool beacon_crc_valid;
 	u32 beacon_crc;
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index cde8cd3d6595..142f66aece18 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1150,6 +1150,17 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
 		return;
 	}
 
+	if (cfg80211_chandef_identical(&csa_ie.chandef,
+				       &sdata->vif.bss_conf.chandef)) {
+		if (ifmgd->csa_ignored_same_chan)
+			return;
+		sdata_info(sdata,
+			   "AP %pM tries to chanswitch to same channel, ignore\n",
+			   ifmgd->associated->bssid);
+		ifmgd->csa_ignored_same_chan = true;
+		return;
+	}
+
 	mutex_lock(&local->mtx);
 	mutex_lock(&local->chanctx_mtx);
 	conf = rcu_dereference_protected(sdata->vif.chanctx_conf,
@@ -1210,6 +1221,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
 	sdata->vif.csa_active = true;
 	sdata->csa_chandef = csa_ie.chandef;
 	sdata->csa_block_tx = csa_ie.mode;
+	ifmgd->csa_ignored_same_chan = false;
 
 	if (sdata->csa_block_tx)
 		ieee80211_stop_vif_queues(local, sdata,
@@ -2090,6 +2102,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
 
 	sdata->vif.csa_active = false;
 	ifmgd->csa_waiting_bcn = false;
+	ifmgd->csa_ignored_same_chan = false;
 	if (sdata->csa_block_tx) {
 		ieee80211_wake_vif_queues(local, sdata,
 					  IEEE80211_QUEUE_STOP_REASON_CSA);
-- 
cgit v1.2.3


From 37355565ba57fd45f78f0934305be2761b641f8f Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Mon, 16 Mar 2015 15:56:05 +0100
Subject: ip6_tunnel: fix error code when tunnel exists

After commit 2b0bb01b6edb, the kernel returns -ENOBUFS when user tries to add
an existing tunnel with ioctl API:
$ ip -6 tunnel add ip6tnl1 mode ip6ip6 dev eth1
add tunnel "ip6tnl0" failed: No buffer space available

It's confusing, the right error is EEXIST.

This patch also change a bit the code returned:
 - ENOBUFS -> ENOMEM
 - ENOENT -> ENODEV

Fixes: 2b0bb01b6edb ("ip6_tunnel: Return an error when adding an existing tunnel.")
CC: Steffen Klassert <steffen.klassert@secunet.com>
Reported-by: Pierre Cheynier <me@pierre-cheynier.net>
Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_tunnel.c | 33 +++++++++++++++++----------------
 1 file changed, 17 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 266a264ec212..ddd94eca19b3 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -314,7 +314,7 @@ out:
  *   Create tunnel matching given parameters.
  *
  * Return:
- *   created tunnel or NULL
+ *   created tunnel or error pointer
  **/
 
 static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p)
@@ -322,7 +322,7 @@ static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p)
 	struct net_device *dev;
 	struct ip6_tnl *t;
 	char name[IFNAMSIZ];
-	int err;
+	int err = -ENOMEM;
 
 	if (p->name[0])
 		strlcpy(name, p->name, IFNAMSIZ);
@@ -348,7 +348,7 @@ static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p)
 failed_free:
 	ip6_dev_free(dev);
 failed:
-	return NULL;
+	return ERR_PTR(err);
 }
 
 /**
@@ -362,7 +362,7 @@ failed:
  *   tunnel device is created and registered for use.
  *
  * Return:
- *   matching tunnel or NULL
+ *   matching tunnel or error pointer
  **/
 
 static struct ip6_tnl *ip6_tnl_locate(struct net *net,
@@ -380,13 +380,13 @@ static struct ip6_tnl *ip6_tnl_locate(struct net *net,
 		if (ipv6_addr_equal(local, &t->parms.laddr) &&
 		    ipv6_addr_equal(remote, &t->parms.raddr)) {
 			if (create)
-				return NULL;
+				return ERR_PTR(-EEXIST);
 
 			return t;
 		}
 	}
 	if (!create)
-		return NULL;
+		return ERR_PTR(-ENODEV);
 	return ip6_tnl_create(net, p);
 }
 
@@ -1420,7 +1420,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 			}
 			ip6_tnl_parm_from_user(&p1, &p);
 			t = ip6_tnl_locate(net, &p1, 0);
-			if (t == NULL)
+			if (IS_ERR(t))
 				t = netdev_priv(dev);
 		} else {
 			memset(&p, 0, sizeof(p));
@@ -1445,7 +1445,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 		ip6_tnl_parm_from_user(&p1, &p);
 		t = ip6_tnl_locate(net, &p1, cmd == SIOCADDTUNNEL);
 		if (cmd == SIOCCHGTUNNEL) {
-			if (t != NULL) {
+			if (!IS_ERR(t)) {
 				if (t->dev != dev) {
 					err = -EEXIST;
 					break;
@@ -1457,14 +1457,15 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 			else
 				err = ip6_tnl_update(t, &p1);
 		}
-		if (t) {
+		if (!IS_ERR(t)) {
 			err = 0;
 			ip6_tnl_parm_to_user(&p, &t->parms);
 			if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
 				err = -EFAULT;
 
-		} else
-			err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
+		} else {
+			err = PTR_ERR(t);
+		}
 		break;
 	case SIOCDELTUNNEL:
 		err = -EPERM;
@@ -1478,7 +1479,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 			err = -ENOENT;
 			ip6_tnl_parm_from_user(&p1, &p);
 			t = ip6_tnl_locate(net, &p1, 0);
-			if (t == NULL)
+			if (IS_ERR(t))
 				break;
 			err = -EPERM;
 			if (t->dev == ip6n->fb_tnl_dev)
@@ -1672,12 +1673,13 @@ static int ip6_tnl_newlink(struct net *src_net, struct net_device *dev,
 			   struct nlattr *tb[], struct nlattr *data[])
 {
 	struct net *net = dev_net(dev);
-	struct ip6_tnl *nt;
+	struct ip6_tnl *nt, *t;
 
 	nt = netdev_priv(dev);
 	ip6_tnl_netlink_parms(data, &nt->parms);
 
-	if (ip6_tnl_locate(net, &nt->parms, 0))
+	t = ip6_tnl_locate(net, &nt->parms, 0);
+	if (!IS_ERR(t))
 		return -EEXIST;
 
 	return ip6_tnl_create2(dev);
@@ -1697,8 +1699,7 @@ static int ip6_tnl_changelink(struct net_device *dev, struct nlattr *tb[],
 	ip6_tnl_netlink_parms(data, &p);
 
 	t = ip6_tnl_locate(net, &p, 0);
-
-	if (t) {
+	if (!IS_ERR(t)) {
 		if (t->dev != dev)
 			return -EEXIST;
 	} else
-- 
cgit v1.2.3


From cb7cf8a33ff73cf638481d1edf883d8968f934f8 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 16 Mar 2015 12:19:24 -0700
Subject: inet: Clean up inet_csk_wait_for_connect() vs. might_sleep()

I got the following trace with current net-next kernel :

[14723.885290] WARNING: CPU: 26 PID: 22658 at kernel/sched/core.c:7285 __might_sleep+0x89/0xa0()
[14723.885325] do not call blocking ops when !TASK_RUNNING; state=1 set at [<ffffffff810e8734>] prepare_to_wait_exclusive+0x34/0xa0
[14723.885355] CPU: 26 PID: 22658 Comm: netserver Not tainted 4.0.0-dbg-DEV #1379
[14723.885359]  ffffffff81a223a8 ffff881fae9e7ca8 ffffffff81650b5d 0000000000000001
[14723.885364]  ffff881fae9e7cf8 ffff881fae9e7ce8 ffffffff810a72e7 0000000000000000
[14723.885367]  ffffffff81a57620 000000000000093a 0000000000000000 ffff881fae9e7e64
[14723.885371] Call Trace:
[14723.885377]  [<ffffffff81650b5d>] dump_stack+0x4c/0x65
[14723.885382]  [<ffffffff810a72e7>] warn_slowpath_common+0x97/0xe0
[14723.885386]  [<ffffffff810a73e6>] warn_slowpath_fmt+0x46/0x50
[14723.885390]  [<ffffffff810f4c5d>] ? trace_hardirqs_on_caller+0x10d/0x1d0
[14723.885393]  [<ffffffff810e8734>] ? prepare_to_wait_exclusive+0x34/0xa0
[14723.885396]  [<ffffffff810e8734>] ? prepare_to_wait_exclusive+0x34/0xa0
[14723.885399]  [<ffffffff810ccdc9>] __might_sleep+0x89/0xa0
[14723.885403]  [<ffffffff81581846>] lock_sock_nested+0x36/0xb0
[14723.885406]  [<ffffffff815829a3>] ? release_sock+0x173/0x1c0
[14723.885411]  [<ffffffff815ea1f7>] inet_csk_accept+0x157/0x2a0
[14723.885415]  [<ffffffff810e8900>] ? abort_exclusive_wait+0xc0/0xc0
[14723.885419]  [<ffffffff8161b96d>] inet_accept+0x2d/0x150
[14723.885424]  [<ffffffff8157db6f>] SYSC_accept4+0xff/0x210
[14723.885428]  [<ffffffff8165a451>] ? retint_swapgs+0xe/0x44
[14723.885431]  [<ffffffff810f4c5d>] ? trace_hardirqs_on_caller+0x10d/0x1d0
[14723.885437]  [<ffffffff81369c0e>] ? trace_hardirqs_on_thunk+0x3a/0x3f
[14723.885441]  [<ffffffff8157ef40>] SyS_accept+0x10/0x20
[14723.885444]  [<ffffffff81659872>] system_call_fastpath+0x12/0x17
[14723.885447] ---[ end trace ff74cd83355b1873 ]---

In commit 26cabd31259ba43f68026ce3f62b78094124333f
Peter added a sched_annotate_sleep() in sk_wait_event()

Is the following patch needed as well ?

Alternative would be to use sk_wait_event() from inet_csk_wait_for_connect()

Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inet_connection_sock.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 14d02ea905b6..3e44b9b0b78e 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -268,6 +268,7 @@ static int inet_csk_wait_for_connect(struct sock *sk, long timeo)
 		release_sock(sk);
 		if (reqsk_queue_empty(&icsk->icsk_accept_queue))
 			timeo = schedule_timeout(timeo);
+		sched_annotate_sleep();
 		lock_sock(sk);
 		err = 0;
 		if (!reqsk_queue_empty(&icsk->icsk_accept_queue))
-- 
cgit v1.2.3


From ced585c83b27deca427c606a34dd3eaa6b96d82b Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Tue, 17 Mar 2015 20:25:57 +0100
Subject: act_bpf: allow non-default TC_ACT opcodes as BPF exec outcome

Revisiting commit d23b8ad8ab23 ("tc: add BPF based action") with regards
to eBPF support, I was thinking that it might be better to improve
return semantics from a BPF program invoked through BPF_PROG_RUN().

Currently, in case filter_res is 0, we overwrite the default action
opcode with TC_ACT_SHOT. A default action opcode configured through tc's
m_bpf can be: TC_ACT_RECLASSIFY, TC_ACT_PIPE, TC_ACT_SHOT, TC_ACT_UNSPEC,
TC_ACT_OK.

In cls_bpf, we have the possibility to overwrite the default class
associated with the classifier in case filter_res is _not_ 0xffffffff
(-1).

That allows us to fold multiple [e]BPF programs into a single one, where
they would otherwise need to be defined as a separate classifier with
its own classid, needlessly redoing parsing work, etc.

Similarly, we could do better in act_bpf: Since above TC_ACT* opcodes
are exported to UAPI anyway, we reuse them for return-code-to-tc-opcode
mapping, where we would allow above possibilities. Thus, like in cls_bpf,
a filter_res of 0xffffffff (-1) means that the configured _default_ action
is used. Any unkown return code from the BPF program would fail in
tcf_bpf() with TC_ACT_UNSPEC.

Should we one day want to make use of TC_ACT_STOLEN or TC_ACT_QUEUED,
which both have the same semantics, we have the option to either use
that as a default action (filter_res of 0xffffffff) or non-default BPF
return code.

All that will allow us to transparently use tcf_bpf() for both BPF
flavours.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Jiri Pirko <jiri@resnulli.us>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Acked-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_bpf.c | 36 ++++++++++++++++++++++++++++--------
 1 file changed, 28 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 82c5d7fc1988..5f6288fa3f12 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -25,21 +25,41 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *a,
 		   struct tcf_result *res)
 {
 	struct tcf_bpf *b = a->priv;
-	int action;
-	int filter_res;
+	int action, filter_res;
 
 	spin_lock(&b->tcf_lock);
+
 	b->tcf_tm.lastuse = jiffies;
 	bstats_update(&b->tcf_bstats, skb);
-	action = b->tcf_action;
 
 	filter_res = BPF_PROG_RUN(b->filter, skb);
-	if (filter_res == 0) {
-		/* Return code 0 from the BPF program
-		 * is being interpreted as a drop here.
-		 */
-		action = TC_ACT_SHOT;
+
+	/* A BPF program may overwrite the default action opcode.
+	 * Similarly as in cls_bpf, if filter_res == -1 we use the
+	 * default action specified from tc.
+	 *
+	 * In case a different well-known TC_ACT opcode has been
+	 * returned, it will overwrite the default one.
+	 *
+	 * For everything else that is unkown, TC_ACT_UNSPEC is
+	 * returned.
+	 */
+	switch (filter_res) {
+	case TC_ACT_PIPE:
+	case TC_ACT_RECLASSIFY:
+	case TC_ACT_OK:
+		action = filter_res;
+		break;
+	case TC_ACT_SHOT:
+		action = filter_res;
 		b->tcf_qstats.drops++;
+		break;
+	case TC_ACT_UNSPEC:
+		action = b->tcf_action;
+		break;
+	default:
+		action = TC_ACT_UNSPEC;
+		break;
 	}
 
 	spin_unlock(&b->tcf_lock);
-- 
cgit v1.2.3