From 2b70fe5aba0dd00d81173243c7ab04c66aeb67d8 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 28 Apr 2015 13:33:21 +0200
Subject: net: sched: act_connmark: don't zap skb->nfct

This action is meant to be passive, i.e. we should not alter
skb->nfct: If nfct is present just leave it alone.

Compile tested only.

Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Acked-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_connmark.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'net/sched')

diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index 8e472518f9f6..295d14bd6c67 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -63,7 +63,6 @@ static int tcf_connmark(struct sk_buff *skb, const struct tc_action *a,
 		skb->mark = c->mark;
 		/* using overlimits stats to count how many packets marked */
 		ca->tcf_qstats.overlimits++;
-		nf_ct_put(c);
 		goto out;
 	}
 
@@ -82,7 +81,6 @@ static int tcf_connmark(struct sk_buff *skb, const struct tc_action *a,
 	nf_ct_put(c);
 
 out:
-	skb->nfct = NULL;
 	spin_unlock(&ca->tcf_lock);
 	return ca->tcf_action;
 }
-- 
cgit v1.2.3


From a5d28090405038ca1f40c13f38d6d4285456efee Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 30 Apr 2015 09:40:40 -0700
Subject: codel: fix maxpacket/mtu confusion

Under presence of TSO/GSO/GRO packets, codel at low rates can be quite
useless. In following example, not a single packet was ever dropped,
while average delay in codel queue is ~100 ms !

qdisc codel 0: parent 1:12 limit 16000p target 5.0ms interval 100.0ms
 Sent 134376498 bytes 88797 pkt (dropped 0, overlimits 0 requeues 0)
 backlog 13626b 3p requeues 0
  count 0 lastcount 0 ldelay 96.9ms drop_next 0us
  maxpacket 9084 ecn_mark 0 drop_overlimit 0

This comes from a confusion of what should be the minimal backlog. It is
pretty clear it is not 64KB or whatever max GSO packet ever reached the
qdisc.

codel intent was to use MTU of the device.

After the fix, we finally drop some packets, and rtt/cwnd of my single
TCP flow are meeting our expectations.

qdisc codel 0: parent 1:12 limit 16000p target 5.0ms interval 100.0ms
 Sent 102798497 bytes 67912 pkt (dropped 1365, overlimits 0 requeues 0)
 backlog 6056b 3p requeues 0
  count 1 lastcount 1 ldelay 36.3ms drop_next 0us
  maxpacket 10598 ecn_mark 0 drop_overlimit 0

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Kathleen Nichols <nichols@pollere.com>
Cc: Dave Taht <dave.taht@gmail.com>
Cc: Van Jacobson <vanj@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/codel.h      | 10 +++++++---
 net/sched/sch_codel.c    |  2 +-
 net/sched/sch_fq_codel.c |  2 +-
 3 files changed, 9 insertions(+), 5 deletions(-)

(limited to 'net/sched')

diff --git a/include/net/codel.h b/include/net/codel.h
index aeee28081245..1e18005f7f65 100644
--- a/include/net/codel.h
+++ b/include/net/codel.h
@@ -120,11 +120,13 @@ static inline u32 codel_time_to_us(codel_time_t val)
  * struct codel_params - contains codel parameters
  * @target:	target queue size (in time units)
  * @interval:	width of moving time window
+ * @mtu:	device mtu, or minimal queue backlog in bytes.
  * @ecn:	is Explicit Congestion Notification enabled
  */
 struct codel_params {
 	codel_time_t	target;
 	codel_time_t	interval;
+	u32		mtu;
 	bool		ecn;
 };
 
@@ -166,10 +168,12 @@ struct codel_stats {
 	u32		ecn_mark;
 };
 
-static void codel_params_init(struct codel_params *params)
+static void codel_params_init(struct codel_params *params,
+			      const struct Qdisc *sch)
 {
 	params->interval = MS2TIME(100);
 	params->target = MS2TIME(5);
+	params->mtu = psched_mtu(qdisc_dev(sch));
 	params->ecn = false;
 }
 
@@ -180,7 +184,7 @@ static void codel_vars_init(struct codel_vars *vars)
 
 static void codel_stats_init(struct codel_stats *stats)
 {
-	stats->maxpacket = 256;
+	stats->maxpacket = 0;
 }
 
 /*
@@ -234,7 +238,7 @@ static bool codel_should_drop(const struct sk_buff *skb,
 		stats->maxpacket = qdisc_pkt_len(skb);
 
 	if (codel_time_before(vars->ldelay, params->target) ||
-	    sch->qstats.backlog <= stats->maxpacket) {
+	    sch->qstats.backlog <= params->mtu) {
 		/* went below - stay below for at least interval */
 		vars->first_above_time = 0;
 		return false;
diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c
index de28f8e968e8..7a0bdb16ac92 100644
--- a/net/sched/sch_codel.c
+++ b/net/sched/sch_codel.c
@@ -164,7 +164,7 @@ static int codel_init(struct Qdisc *sch, struct nlattr *opt)
 
 	sch->limit = DEFAULT_CODEL_LIMIT;
 
-	codel_params_init(&q->params);
+	codel_params_init(&q->params, sch);
 	codel_vars_init(&q->vars);
 	codel_stats_init(&q->stats);
 
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index 1e52decb7b59..c244c45b78d7 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -391,7 +391,7 @@ static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt)
 	q->perturbation = prandom_u32();
 	INIT_LIST_HEAD(&q->new_flows);
 	INIT_LIST_HEAD(&q->old_flows);
-	codel_params_init(&q->cparams);
+	codel_params_init(&q->cparams, sch);
 	codel_stats_init(&q->cstats);
 	q->cparams.ecn = true;
 
-- 
cgit v1.2.3


From d744318574090c3b796915d9d710bdb17db191a1 Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Tue, 5 May 2015 15:22:02 -0700
Subject: net_sched: fix a use-after-free in tc_ctl_tfilter()

When tcf_destroy() returns true, tp could be already destroyed,
we should not use tp->next after that.

For long term, we probably should move tp list to list_head.

Fixes: 1e052be69d04 ("net_sched: destroy proto tp when all filters are gone")
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_api.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'net/sched')

diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 8b0470e418dc..b6ef9a04de06 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -308,12 +308,11 @@ replay:
 		case RTM_DELTFILTER:
 			err = tp->ops->delete(tp, fh);
 			if (err == 0) {
-				tfilter_notify(net, skb, n, tp, fh, RTM_DELTFILTER);
-				if (tcf_destroy(tp, false)) {
-					struct tcf_proto *next = rtnl_dereference(tp->next);
+				struct tcf_proto *next = rtnl_dereference(tp->next);
 
+				tfilter_notify(net, skb, n, tp, fh, RTM_DELTFILTER);
+				if (tcf_destroy(tp, false))
 					RCU_INIT_POINTER(*back, next);
-				}
 			}
 			goto errout;
 		case RTM_GETTFILTER:
-- 
cgit v1.2.3


From 145a42b3a964c6647464f05bd58aa33787de7f75 Mon Sep 17 00:00:00 2001
From: David Ward <david.ward@ll.mit.edu>
Date: Sat, 9 May 2015 22:01:47 -0400
Subject: net_sched: gred: use correct backlog value in WRED mode

In WRED mode, the backlog for a single virtual queue (VQ) should not be
used to determine queue behavior; instead the backlog is summed across
all VQs. This sum is currently used when calculating the average queue
lengths. It also needs to be used when determining if the queue's hard
limit has been reached, or when reporting each VQ's backlog via netlink.
q->backlog will only be used if the queue switches out of WRED mode.

Signed-off-by: David Ward <david.ward@ll.mit.edu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_gred.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net/sched')

diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index a4ca4517cdc8..634529e0ce6b 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -229,7 +229,7 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		break;
 	}
 
-	if (q->backlog + qdisc_pkt_len(skb) <= q->limit) {
+	if (gred_backlog(t, q, sch) + qdisc_pkt_len(skb) <= q->limit) {
 		q->backlog += qdisc_pkt_len(skb);
 		return qdisc_enqueue_tail(skb, sch);
 	}
@@ -553,7 +553,7 @@ static int gred_dump(struct Qdisc *sch, struct sk_buff *skb)
 
 		opt.limit	= q->limit;
 		opt.DP		= q->DP;
-		opt.backlog	= q->backlog;
+		opt.backlog	= gred_backlog(table, q, sch);
 		opt.prio	= q->prio;
 		opt.qth_min	= q->parms.qth_min >> q->parms.Wlog;
 		opt.qth_max	= q->parms.qth_max >> q->parms.Wlog;
-- 
cgit v1.2.3