summaryrefslogtreecommitdiffstats
path: root/net/sched
diff options
context:
space:
mode:
Diffstat (limited to 'net/sched')
-rw-r--r--net/sched/Kconfig25
-rw-r--r--net/sched/Makefile2
-rw-r--r--net/sched/act_api.c20
-rw-r--r--net/sched/act_gact.c2
-rw-r--r--net/sched/act_ipt.c48
-rw-r--r--net/sched/act_mirred.c4
-rw-r--r--net/sched/act_nat.c2
-rw-r--r--net/sched/act_pedit.c2
-rw-r--r--net/sched/act_police.c10
-rw-r--r--net/sched/act_simple.c2
-rw-r--r--net/sched/act_skbedit.c203
-rw-r--r--net/sched/cls_api.c20
-rw-r--r--net/sched/cls_flow.c80
-rw-r--r--net/sched/cls_route.c12
-rw-r--r--net/sched/cls_u32.c28
-rw-r--r--net/sched/em_cmp.c9
-rw-r--r--net/sched/sch_api.c695
-rw-r--r--net/sched/sch_atm.c37
-rw-r--r--net/sched/sch_cbq.c194
-rw-r--r--net/sched/sch_dsmark.c30
-rw-r--r--net/sched/sch_fifo.c49
-rw-r--r--net/sched/sch_generic.c411
-rw-r--r--net/sched/sch_gred.c14
-rw-r--r--net/sched/sch_hfsc.c124
-rw-r--r--net/sched/sch_htb.c242
-rw-r--r--net/sched/sch_ingress.c4
-rw-r--r--net/sched/sch_multiq.c477
-rw-r--r--net/sched/sch_netem.c88
-rw-r--r--net/sched/sch_prio.c169
-rw-r--r--net/sched/sch_red.c39
-rw-r--r--net/sched/sch_sfq.c34
-rw-r--r--net/sched/sch_tbf.c54
-rw-r--r--net/sched/sch_teql.c57
33 files changed, 2150 insertions, 1037 deletions
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 82adfe6447d7..6767e54155db 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -106,16 +106,14 @@ config NET_SCH_PRIO
To compile this code as a module, choose M here: the
module will be called sch_prio.
-config NET_SCH_RR
- tristate "Multi Band Round Robin Queuing (RR)"
- select NET_SCH_PRIO
+config NET_SCH_MULTIQ
+ tristate "Hardware Multiqueue-aware Multi Band Queuing (MULTIQ)"
---help---
- Say Y here if you want to use an n-band round robin packet
- scheduler.
+ Say Y here if you want to use an n-band queue packet scheduler
+ to support devices that have multiple hardware transmit queues.
- The module uses sch_prio for its framework and is aliased as
- sch_rr, so it will load sch_prio, although it is referred
- to using sch_rr.
+ To compile this code as a module, choose M here: the
+ module will be called sch_multiq.
config NET_SCH_RED
tristate "Random Early Detection (RED)"
@@ -487,6 +485,17 @@ config NET_ACT_SIMP
To compile this code as a module, choose M here: the
module will be called simple.
+config NET_ACT_SKBEDIT
+ tristate "SKB Editing"
+ depends on NET_CLS_ACT
+ ---help---
+ Say Y here to change skb priority or queue_mapping settings.
+
+ If unsure, say N.
+
+ To compile this code as a module, choose M here: the
+ module will be called skbedit.
+
config NET_CLS_IND
bool "Incoming device classification"
depends on NET_CLS_U32 || NET_CLS_FW
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 1d2b0f7df848..e60c9925b269 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -14,6 +14,7 @@ obj-$(CONFIG_NET_ACT_IPT) += act_ipt.o
obj-$(CONFIG_NET_ACT_NAT) += act_nat.o
obj-$(CONFIG_NET_ACT_PEDIT) += act_pedit.o
obj-$(CONFIG_NET_ACT_SIMP) += act_simple.o
+obj-$(CONFIG_NET_ACT_SKBEDIT) += act_skbedit.o
obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o
obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o
obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o
@@ -26,6 +27,7 @@ obj-$(CONFIG_NET_SCH_SFQ) += sch_sfq.o
obj-$(CONFIG_NET_SCH_TBF) += sch_tbf.o
obj-$(CONFIG_NET_SCH_TEQL) += sch_teql.o
obj-$(CONFIG_NET_SCH_PRIO) += sch_prio.o
+obj-$(CONFIG_NET_SCH_MULTIQ) += sch_multiq.o
obj-$(CONFIG_NET_SCH_ATM) += sch_atm.o
obj-$(CONFIG_NET_SCH_NETEM) += sch_netem.o
obj-$(CONFIG_NET_CLS_U32) += cls_u32.o
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 74e662cbb2c5..9974b3f04f05 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -41,7 +41,7 @@ void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo)
return;
}
}
- BUG_TRAP(0);
+ WARN_ON(1);
}
EXPORT_SYMBOL(tcf_hash_destroy);
@@ -205,10 +205,9 @@ struct tcf_common *tcf_hash_check(u32 index, struct tc_action *a, int bind,
{
struct tcf_common *p = NULL;
if (index && (p = tcf_hash_lookup(index, hinfo)) != NULL) {
- if (bind) {
+ if (bind)
p->tcfc_bindcnt++;
- p->tcfc_refcnt++;
- }
+ p->tcfc_refcnt++;
a->priv = p;
}
return p;
@@ -752,7 +751,7 @@ static int tca_action_flush(struct nlattr *nla, struct nlmsghdr *n, u32 pid)
struct nlattr *tb[TCA_ACT_MAX+1];
struct nlattr *kind;
struct tc_action *a = create_a(0);
- int err = -EINVAL;
+ int err = -ENOMEM;
if (a == NULL) {
printk("tca_action_flush: couldnt create tc_action\n");
@@ -763,7 +762,7 @@ static int tca_action_flush(struct nlattr *nla, struct nlmsghdr *n, u32 pid)
if (!skb) {
printk("tca_action_flush: failed skb alloc\n");
kfree(a);
- return -ENOBUFS;
+ return err;
}
b = skb_tail_pointer(skb);
@@ -791,6 +790,8 @@ static int tca_action_flush(struct nlattr *nla, struct nlmsghdr *n, u32 pid)
err = a->ops->walk(skb, &dcb, RTM_DELACTION, a);
if (err < 0)
goto nla_put_failure;
+ if (err == 0)
+ goto noflush_out;
nla_nest_end(skb, nest);
@@ -808,6 +809,7 @@ nla_put_failure:
nlmsg_failure:
module_put(a->ops->owner);
err_out:
+noflush_out:
kfree_skb(skb);
kfree(a);
return err;
@@ -825,8 +827,10 @@ tca_action_gd(struct nlattr *nla, struct nlmsghdr *n, u32 pid, int event)
return ret;
if (event == RTM_DELACTION && n->nlmsg_flags&NLM_F_ROOT) {
- if (tb[0] != NULL && tb[1] == NULL)
- return tca_action_flush(tb[0], n, pid);
+ if (tb[1] != NULL)
+ return tca_action_flush(tb[1], n, pid);
+ else
+ return -EINVAL;
}
for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index 422872c4f14b..ac04289da5d7 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -139,7 +139,7 @@ static int tcf_gact(struct sk_buff *skb, struct tc_action *a, struct tcf_result
#else
action = gact->tcf_action;
#endif
- gact->tcf_bstats.bytes += skb->len;
+ gact->tcf_bstats.bytes += qdisc_pkt_len(skb);
gact->tcf_bstats.packets++;
if (action == TC_ACT_SHOT)
gact->tcf_qstats.drops++;
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index da696fd3e341..0453d79ebf57 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -40,6 +40,7 @@ static struct tcf_hashinfo ipt_hash_info = {
static int ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int hook)
{
+ struct xt_tgchk_param par;
struct xt_target *target;
int ret = 0;
@@ -49,29 +50,30 @@ static int ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int
return -ENOENT;
t->u.kernel.target = target;
-
- ret = xt_check_target(target, AF_INET, t->u.target_size - sizeof(*t),
- table, hook, 0, 0);
- if (ret) {
+ par.table = table;
+ par.entryinfo = NULL;
+ par.target = target;
+ par.targinfo = t->data;
+ par.hook_mask = hook;
+ par.family = NFPROTO_IPV4;
+
+ ret = xt_check_target(&par, t->u.target_size - sizeof(*t), 0, false);
+ if (ret < 0) {
module_put(t->u.kernel.target->me);
return ret;
}
- if (t->u.kernel.target->checkentry
- && !t->u.kernel.target->checkentry(table, NULL,
- t->u.kernel.target, t->data,
- hook)) {
- module_put(t->u.kernel.target->me);
- ret = -EINVAL;
- }
-
- return ret;
+ return 0;
}
static void ipt_destroy_target(struct ipt_entry_target *t)
{
- if (t->u.kernel.target->destroy)
- t->u.kernel.target->destroy(t->u.kernel.target, t->data);
- module_put(t->u.kernel.target->me);
+ struct xt_tgdtor_param par = {
+ .target = t->u.kernel.target,
+ .targinfo = t->data,
+ };
+ if (par.target->destroy != NULL)
+ par.target->destroy(&par);
+ module_put(par.target->me);
}
static int tcf_ipt_release(struct tcf_ipt *ipt, int bind)
@@ -196,6 +198,7 @@ static int tcf_ipt(struct sk_buff *skb, struct tc_action *a,
{
int ret = 0, result = 0;
struct tcf_ipt *ipt = a->priv;
+ struct xt_target_param par;
if (skb_cloned(skb)) {
if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
@@ -205,16 +208,19 @@ static int tcf_ipt(struct sk_buff *skb, struct tc_action *a,
spin_lock(&ipt->tcf_lock);
ipt->tcf_tm.lastuse = jiffies;
- ipt->tcf_bstats.bytes += skb->len;
+ ipt->tcf_bstats.bytes += qdisc_pkt_len(skb);
ipt->tcf_bstats.packets++;
/* yes, we have to worry about both in and out dev
worry later - danger - this API seems to have changed
from earlier kernels */
- ret = ipt->tcfi_t->u.kernel.target->target(skb, skb->dev, NULL,
- ipt->tcfi_hook,
- ipt->tcfi_t->u.kernel.target,
- ipt->tcfi_t->data);
+ par.in = skb->dev;
+ par.out = NULL;
+ par.hooknum = ipt->tcfi_hook;
+ par.target = ipt->tcfi_t->u.kernel.target;
+ par.targinfo = ipt->tcfi_t->data;
+ ret = par.target->target(skb, &par);
+
switch (ret) {
case NF_ACCEPT:
result = TC_ACT_OK;
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 1aff005d95cd..70341c020b6d 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -164,7 +164,7 @@ bad_mirred:
if (skb2 != NULL)
kfree_skb(skb2);
m->tcf_qstats.overlimits++;
- m->tcf_bstats.bytes += skb->len;
+ m->tcf_bstats.bytes += qdisc_pkt_len(skb);
m->tcf_bstats.packets++;
spin_unlock(&m->tcf_lock);
/* should we be asking for packet to be dropped?
@@ -184,7 +184,7 @@ bad_mirred:
goto bad_mirred;
}
- m->tcf_bstats.bytes += skb2->len;
+ m->tcf_bstats.bytes += qdisc_pkt_len(skb2);
m->tcf_bstats.packets++;
if (!(at & AT_EGRESS))
if (m->tcfm_ok_push)
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 0a3c8339767a..7b39ed485bca 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -124,7 +124,7 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a,
egress = p->flags & TCA_NAT_FLAG_EGRESS;
action = p->tcf_action;
- p->tcf_bstats.bytes += skb->len;
+ p->tcf_bstats.bytes += qdisc_pkt_len(skb);
p->tcf_bstats.packets++;
spin_unlock(&p->tcf_lock);
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 3cc4cb9e500e..d5f4e3404864 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -182,7 +182,7 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a,
bad:
p->tcf_qstats.overlimits++;
done:
- p->tcf_bstats.bytes += skb->len;
+ p->tcf_bstats.bytes += qdisc_pkt_len(skb);
p->tcf_bstats.packets++;
spin_unlock(&p->tcf_lock);
return p->tcf_action;
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 0898120bbcc0..38015b493947 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -116,7 +116,7 @@ static void tcf_police_destroy(struct tcf_police *p)
return;
}
}
- BUG_TRAP(0);
+ WARN_ON(1);
}
static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = {
@@ -272,7 +272,7 @@ static int tcf_act_police(struct sk_buff *skb, struct tc_action *a,
spin_lock(&police->tcf_lock);
- police->tcf_bstats.bytes += skb->len;
+ police->tcf_bstats.bytes += qdisc_pkt_len(skb);
police->tcf_bstats.packets++;
if (police->tcfp_ewma_rate &&
@@ -282,7 +282,7 @@ static int tcf_act_police(struct sk_buff *skb, struct tc_action *a,
return police->tcf_action;
}
- if (skb->len <= police->tcfp_mtu) {
+ if (qdisc_pkt_len(skb) <= police->tcfp_mtu) {
if (police->tcfp_R_tab == NULL) {
spin_unlock(&police->tcf_lock);
return police->tcfp_result;
@@ -295,12 +295,12 @@ static int tcf_act_police(struct sk_buff *skb, struct tc_action *a,
ptoks = toks + police->tcfp_ptoks;
if (ptoks > (long)L2T_P(police, police->tcfp_mtu))
ptoks = (long)L2T_P(police, police->tcfp_mtu);
- ptoks -= L2T_P(police, skb->len);
+ ptoks -= L2T_P(police, qdisc_pkt_len(skb));
}
toks += police->tcfp_toks;
if (toks > (long)police->tcfp_burst)
toks = police->tcfp_burst;
- toks -= L2T(police, skb->len);
+ toks -= L2T(police, qdisc_pkt_len(skb));
if ((toks|ptoks) >= 0) {
police->tcfp_t_c = now;
police->tcfp_toks = toks;
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 1d421d059caf..e7851ce92cfe 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -41,7 +41,7 @@ static int tcf_simp(struct sk_buff *skb, struct tc_action *a, struct tcf_result
spin_lock(&d->tcf_lock);
d->tcf_tm.lastuse = jiffies;
- d->tcf_bstats.bytes += skb->len;
+ d->tcf_bstats.bytes += qdisc_pkt_len(skb);
d->tcf_bstats.packets++;
/* print policy string followed by _ then packet count
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
new file mode 100644
index 000000000000..fe9777e77f35
--- /dev/null
+++ b/net/sched/act_skbedit.c
@@ -0,0 +1,203 @@
+/*
+ * Copyright (c) 2008, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Author: Alexander Duyck <alexander.h.duyck@intel.com>
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+
+#include <linux/tc_act/tc_skbedit.h>
+#include <net/tc_act/tc_skbedit.h>
+
+#define SKBEDIT_TAB_MASK 15
+static struct tcf_common *tcf_skbedit_ht[SKBEDIT_TAB_MASK + 1];
+static u32 skbedit_idx_gen;
+static DEFINE_RWLOCK(skbedit_lock);
+
+static struct tcf_hashinfo skbedit_hash_info = {
+ .htab = tcf_skbedit_ht,
+ .hmask = SKBEDIT_TAB_MASK,
+ .lock = &skbedit_lock,
+};
+
+static int tcf_skbedit(struct sk_buff *skb, struct tc_action *a,
+ struct tcf_result *res)
+{
+ struct tcf_skbedit *d = a->priv;
+
+ spin_lock(&d->tcf_lock);
+ d->tcf_tm.lastuse = jiffies;
+ d->tcf_bstats.bytes += qdisc_pkt_len(skb);
+ d->tcf_bstats.packets++;
+
+ if (d->flags & SKBEDIT_F_PRIORITY)
+ skb->priority = d->priority;
+ if (d->flags & SKBEDIT_F_QUEUE_MAPPING &&
+ skb->dev->real_num_tx_queues > d->queue_mapping)
+ skb_set_queue_mapping(skb, d->queue_mapping);
+
+ spin_unlock(&d->tcf_lock);
+ return d->tcf_action;
+}
+
+static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = {
+ [TCA_SKBEDIT_PARMS] = { .len = sizeof(struct tc_skbedit) },
+ [TCA_SKBEDIT_PRIORITY] = { .len = sizeof(u32) },
+ [TCA_SKBEDIT_QUEUE_MAPPING] = { .len = sizeof(u16) },
+};
+
+static int tcf_skbedit_init(struct nlattr *nla, struct nlattr *est,
+ struct tc_action *a, int ovr, int bind)
+{
+ struct nlattr *tb[TCA_SKBEDIT_MAX + 1];
+ struct tc_skbedit *parm;
+ struct tcf_skbedit *d;
+ struct tcf_common *pc;
+ u32 flags = 0, *priority = NULL;
+ u16 *queue_mapping = NULL;
+ int ret = 0, err;
+
+ if (nla == NULL)
+ return -EINVAL;
+
+ err = nla_parse_nested(tb, TCA_SKBEDIT_MAX, nla, skbedit_policy);
+ if (err < 0)
+ return err;
+
+ if (tb[TCA_SKBEDIT_PARMS] == NULL)
+ return -EINVAL;
+
+ if (tb[TCA_SKBEDIT_PRIORITY] != NULL) {
+ flags |= SKBEDIT_F_PRIORITY;
+ priority = nla_data(tb[TCA_SKBEDIT_PRIORITY]);
+ }
+
+ if (tb[TCA_SKBEDIT_QUEUE_MAPPING] != NULL) {
+ flags |= SKBEDIT_F_QUEUE_MAPPING;
+ queue_mapping = nla_data(tb[TCA_SKBEDIT_QUEUE_MAPPING]);
+ }
+ if (!flags)
+ return -EINVAL;
+
+ parm = nla_data(tb[TCA_SKBEDIT_PARMS]);
+
+ pc = tcf_hash_check(parm->index, a, bind, &skbedit_hash_info);
+ if (!pc) {
+ pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind,
+ &skbedit_idx_gen, &skbedit_hash_info);
+ if (unlikely(!pc))
+ return -ENOMEM;
+
+ d = to_skbedit(pc);
+ ret = ACT_P_CREATED;
+ } else {
+ d = to_skbedit(pc);
+ if (!ovr) {
+ tcf_hash_release(pc, bind, &skbedit_hash_info);
+ return -EEXIST;
+ }
+ }
+
+ spin_lock_bh(&d->tcf_lock);
+
+ d->flags = flags;
+ if (flags & SKBEDIT_F_PRIORITY)
+ d->priority = *priority;
+ if (flags & SKBEDIT_F_QUEUE_MAPPING)
+ d->queue_mapping = *queue_mapping;
+ d->tcf_action = parm->action;
+
+ spin_unlock_bh(&d->tcf_lock);
+
+ if (ret == ACT_P_CREATED)
+ tcf_hash_insert(pc, &skbedit_hash_info);
+ return ret;
+}
+
+static inline int tcf_skbedit_cleanup(struct tc_action *a, int bind)
+{
+ struct tcf_skbedit *d = a->priv;
+
+ if (d)
+ return tcf_hash_release(&d->common, bind, &skbedit_hash_info);
+ return 0;
+}
+
+static inline int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a,
+ int bind, int ref)
+{
+ unsigned char *b = skb_tail_pointer(skb);
+ struct tcf_skbedit *d = a->priv;
+ struct tc_skbedit opt;
+ struct tcf_t t;
+
+ opt.index = d->tcf_index;
+ opt.refcnt = d->tcf_refcnt - ref;
+ opt.bindcnt = d->tcf_bindcnt - bind;
+ opt.action = d->tcf_action;
+ NLA_PUT(skb, TCA_SKBEDIT_PARMS, sizeof(opt), &opt);
+ if (d->flags & SKBEDIT_F_PRIORITY)
+ NLA_PUT(skb, TCA_SKBEDIT_PRIORITY, sizeof(d->priority),
+ &d->priority);
+ if (d->flags & SKBEDIT_F_QUEUE_MAPPING)
+ NLA_PUT(skb, TCA_SKBEDIT_QUEUE_MAPPING,
+ sizeof(d->queue_mapping), &d->queue_mapping);
+ t.install = jiffies_to_clock_t(jiffies - d->tcf_tm.install);
+ t.lastuse = jiffies_to_clock_t(jiffies - d->tcf_tm.lastuse);
+ t.expires = jiffies_to_clock_t(d->tcf_tm.expires);
+ NLA_PUT(skb, TCA_SKBEDIT_TM, sizeof(t), &t);
+ return skb->len;
+
+nla_put_failure:
+ nlmsg_trim(skb, b);
+ return -1;
+}
+
+static struct tc_action_ops act_skbedit_ops = {
+ .kind = "skbedit",
+ .hinfo = &skbedit_hash_info,
+ .type = TCA_ACT_SKBEDIT,
+ .capab = TCA_CAP_NONE,
+ .owner = THIS_MODULE,
+ .act = tcf_skbedit,
+ .dump = tcf_skbedit_dump,
+ .cleanup = tcf_skbedit_cleanup,
+ .init = tcf_skbedit_init,
+ .walk = tcf_generic_walker,
+};
+
+MODULE_AUTHOR("Alexander Duyck, <alexander.h.duyck@intel.com>");
+MODULE_DESCRIPTION("SKB Editing");
+MODULE_LICENSE("GPL");
+
+static int __init skbedit_init_module(void)
+{
+ return tcf_register_action(&act_skbedit_ops);
+}
+
+static void __exit skbedit_cleanup_module(void)
+{
+ tcf_unregister_action(&act_skbedit_ops);
+}
+
+module_init(skbedit_init_module);
+module_exit(skbedit_cleanup_module);
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 9360fc81e8c7..8eb79e92e94c 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -120,6 +120,7 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
{
struct net *net = sock_net(skb->sk);
struct nlattr *tca[TCA_MAX + 1];
+ spinlock_t *root_lock;
struct tcmsg *t;
u32 protocol;
u32 prio;
@@ -166,7 +167,8 @@ replay:
/* Find qdisc */
if (!parent) {
- q = dev->qdisc_sleeping;
+ struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, 0);
+ q = dev_queue->qdisc_sleeping;
parent = q->handle;
} else {
q = qdisc_lookup(dev, TC_H_MAJ(t->tcm_parent));
@@ -203,6 +205,8 @@ replay:
}
}
+ root_lock = qdisc_root_sleeping_lock(q);
+
if (tp == NULL) {
/* Proto-tcf does not exist, create new one */
@@ -262,10 +266,10 @@ replay:
goto errout;
}
- qdisc_lock_tree(dev);
+ spin_lock_bh(root_lock);
tp->next = *back;
*back = tp;
- qdisc_unlock_tree(dev);
+ spin_unlock_bh(root_lock);
} else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind))
goto errout;
@@ -274,9 +278,9 @@ replay:
if (fh == 0) {
if (n->nlmsg_type == RTM_DELTFILTER && t->tcm_handle == 0) {
- qdisc_lock_tree(dev);
+ spin_lock_bh(root_lock);
*back = tp->next;
- qdisc_unlock_tree(dev);
+ spin_unlock_bh(root_lock);
tfilter_notify(skb, n, tp, fh, RTM_DELTFILTER);
tcf_destroy(tp);
@@ -334,7 +338,7 @@ static int tcf_fill_node(struct sk_buff *skb, struct tcf_proto *tp,
tcm->tcm_family = AF_UNSPEC;
tcm->tcm__pad1 = 0;
tcm->tcm__pad1 = 0;
- tcm->tcm_ifindex = tp->q->dev->ifindex;
+ tcm->tcm_ifindex = qdisc_dev(tp->q)->ifindex;
tcm->tcm_parent = tp->classid;
tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol);
NLA_PUT_STRING(skb, TCA_KIND, tp->ops->kind);
@@ -390,6 +394,7 @@ static int tcf_node_dump(struct tcf_proto *tp, unsigned long n,
static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
{
struct net *net = sock_net(skb->sk);
+ struct netdev_queue *dev_queue;
int t;
int s_t;
struct net_device *dev;
@@ -408,8 +413,9 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
if ((dev = dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
return skb->len;
+ dev_queue = netdev_get_tx_queue(dev, 0);
if (!tcm->tcm_parent)
- q = dev->qdisc_sleeping;
+ q = dev_queue->qdisc_sleeping;
else
q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
if (!q)
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 971b867e0484..0ebaff637e31 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -36,6 +36,8 @@ struct flow_filter {
struct list_head list;
struct tcf_exts exts;
struct tcf_ematch_tree ematches;
+ struct timer_list perturb_timer;
+ u32 perturb_period;
u32 handle;
u32 nkeys;
@@ -47,11 +49,9 @@ struct flow_filter {
u32 addend;
u32 divisor;
u32 baseclass;
+ u32 hashrnd;
};
-static u32 flow_hashrnd __read_mostly;
-static int flow_hashrnd_initted __read_mostly;
-
static const struct tcf_ext_map flow_ext_map = {
.action = TCA_FLOW_ACT,
.police = TCA_FLOW_POLICE,
@@ -67,9 +67,9 @@ static inline u32 addr_fold(void *addr)
static u32 flow_get_src(const struct sk_buff *skb)
{
switch (skb->protocol) {
- case __constant_htons(ETH_P_IP):
+ case htons(ETH_P_IP):
return ntohl(ip_hdr(skb)->saddr);
- case __constant_htons(ETH_P_IPV6):
+ case htons(ETH_P_IPV6):
return ntohl(ipv6_hdr(skb)->saddr.s6_addr32[3]);
default:
return addr_fold(skb->sk);
@@ -79,9 +79,9 @@ static u32 flow_get_src(const struct sk_buff *skb)
static u32 flow_get_dst(const struct sk_buff *skb)
{
switch (skb->protocol) {
- case __constant_htons(ETH_P_IP):
+ case htons(ETH_P_IP):
return ntohl(ip_hdr(skb)->daddr);
- case __constant_htons(ETH_P_IPV6):
+ case htons(ETH_P_IPV6):
return ntohl(ipv6_hdr(skb)->daddr.s6_addr32[3]);
default:
return addr_fold(skb->dst) ^ (__force u16)skb->protocol;
@@ -91,9 +91,9 @@ static u32 flow_get_dst(const struct sk_buff *skb)
static u32 flow_get_proto(const struct sk_buff *skb)
{
switch (skb->protocol) {
- case __constant_htons(ETH_P_IP):
+ case htons(ETH_P_IP):
return ip_hdr(skb)->protocol;
- case __constant_htons(ETH_P_IPV6):
+ case htons(ETH_P_IPV6):
return ipv6_hdr(skb)->nexthdr;
default:
return 0;
@@ -120,7 +120,7 @@ static u32 flow_get_proto_src(const struct sk_buff *skb)
u32 res = 0;
switch (skb->protocol) {
- case __constant_htons(ETH_P_IP): {
+ case htons(ETH_P_IP): {
struct iphdr *iph = ip_hdr(skb);
if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) &&
@@ -128,7 +128,7 @@ static u32 flow_get_proto_src(const struct sk_buff *skb)
res = ntohs(*(__be16 *)((void *)iph + iph->ihl * 4));
break;
}
- case __constant_htons(ETH_P_IPV6): {
+ case htons(ETH_P_IPV6): {
struct ipv6hdr *iph = ipv6_hdr(skb);
if (has_ports(iph->nexthdr))
@@ -147,7 +147,7 @@ static u32 flow_get_proto_dst(const struct sk_buff *skb)
u32 res = 0;
switch (skb->protocol) {
- case __constant_htons(ETH_P_IP): {
+ case htons(ETH_P_IP): {
struct iphdr *iph = ip_hdr(skb);
if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) &&
@@ -155,7 +155,7 @@ static u32 flow_get_proto_dst(const struct sk_buff *skb)
res = ntohs(*(__be16 *)((void *)iph + iph->ihl * 4 + 2));
break;
}
- case __constant_htons(ETH_P_IPV6): {
+ case htons(ETH_P_IPV6): {
struct ipv6hdr *iph = ipv6_hdr(skb);
if (has_ports(iph->nexthdr))
@@ -213,9 +213,9 @@ static u32 flow_get_nfct(const struct sk_buff *skb)
static u32 flow_get_nfct_src(const struct sk_buff *skb)
{
switch (skb->protocol) {
- case __constant_htons(ETH_P_IP):
+ case htons(ETH_P_IP):
return ntohl(CTTUPLE(skb, src.u3.ip));
- case __constant_htons(ETH_P_IPV6):
+ case htons(ETH_P_IPV6):
return ntohl(CTTUPLE(skb, src.u3.ip6[3]));
}
fallback:
@@ -225,9 +225,9 @@ fallback:
static u32 flow_get_nfct_dst(const struct sk_buff *skb)
{
switch (skb->protocol) {
- case __constant_htons(ETH_P_IP):
+ case htons(ETH_P_IP):
return ntohl(CTTUPLE(skb, dst.u3.ip));
- case __constant_htons(ETH_P_IPV6):
+ case htons(ETH_P_IPV6):
return ntohl(CTTUPLE(skb, dst.u3.ip6[3]));
}
fallback:
@@ -348,7 +348,7 @@ static int flow_classify(struct sk_buff *skb, struct tcf_proto *tp,
}
if (f->mode == FLOW_MODE_HASH)
- classid = jhash2(keys, f->nkeys, flow_hashrnd);
+ classid = jhash2(keys, f->nkeys, f->hashrnd);
else {
classid = keys[0];
classid = (classid & f->mask) ^ f->xor;
@@ -369,6 +369,15 @@ static int flow_classify(struct sk_buff *skb, struct tcf_proto *tp,
return -1;
}
+static void flow_perturbation(unsigned long arg)
+{
+ struct flow_filter *f = (struct flow_filter *)arg;
+
+ get_random_bytes(&f->hashrnd, 4);
+ if (f->perturb_period)
+ mod_timer(&f->perturb_timer, jiffies + f->perturb_period);
+}
+
static const struct nla_policy flow_policy[TCA_FLOW_MAX + 1] = {
[TCA_FLOW_KEYS] = { .type = NLA_U32 },
[TCA_FLOW_MODE] = { .type = NLA_U32 },
@@ -381,6 +390,7 @@ static const struct nla_policy flow_policy[TCA_FLOW_MAX + 1] = {
[TCA_FLOW_ACT] = { .type = NLA_NESTED },
[TCA_FLOW_POLICE] = { .type = NLA_NESTED },
[TCA_FLOW_EMATCHES] = { .type = NLA_NESTED },
+ [TCA_FLOW_PERTURB] = { .type = NLA_U32 },
};
static int flow_change(struct tcf_proto *tp, unsigned long base,
@@ -394,6 +404,7 @@ static int flow_change(struct tcf_proto *tp, unsigned long base,
struct tcf_exts e;
struct tcf_ematch_tree t;
unsigned int nkeys = 0;
+ unsigned int perturb_period = 0;
u32 baseclass = 0;
u32 keymask = 0;
u32 mode;
@@ -442,6 +453,14 @@ static int flow_change(struct tcf_proto *tp, unsigned long base,
mode = nla_get_u32(tb[TCA_FLOW_MODE]);
if (mode != FLOW_MODE_HASH && nkeys > 1)
goto err2;
+
+ if (mode == FLOW_MODE_HASH)
+ perturb_period = f->perturb_period;
+ if (tb[TCA_FLOW_PERTURB]) {
+ if (mode != FLOW_MODE_HASH)
+ goto err2;
+ perturb_period = nla_get_u32(tb[TCA_FLOW_PERTURB]) * HZ;
+ }
} else {
err = -EINVAL;
if (!handle)
@@ -455,6 +474,12 @@ static int flow_change(struct tcf_proto *tp, unsigned long base,
if (mode != FLOW_MODE_HASH && nkeys > 1)
goto err2;
+ if (tb[TCA_FLOW_PERTURB]) {
+ if (mode != FLOW_MODE_HASH)
+ goto err2;
+ perturb_period = nla_get_u32(tb[TCA_FLOW_PERTURB]) * HZ;
+ }
+
if (TC_H_MAJ(baseclass) == 0)
baseclass = TC_H_MAKE(tp->q->handle, baseclass);
if (TC_H_MIN(baseclass) == 0)
@@ -467,6 +492,11 @@ static int flow_change(struct tcf_proto *tp, unsigned long base,
f->handle = handle;
f->mask = ~0U;
+
+ get_random_bytes(&f->hashrnd, 4);
+ f->perturb_timer.function = flow_perturbation;
+ f->perturb_timer.data = (unsigned long)f;
+ init_timer_deferrable(&f->perturb_timer);
}
tcf_exts_change(tp, &f->exts, &e);
@@ -495,6 +525,11 @@ static int flow_change(struct tcf_proto *tp, unsigned long base,
if (baseclass)
f->baseclass = baseclass;
+ f->perturb_period = perturb_period;
+ del_timer(&f->perturb_timer);
+ if (perturb_period)
+ mod_timer(&f->perturb_timer, jiffies + perturb_period);
+
if (*arg == 0)
list_add_tail(&f->list, &head->filters);
@@ -512,6 +547,7 @@ err1:
static void flow_destroy_filter(struct tcf_proto *tp, struct flow_filter *f)
{
+ del_timer_sync(&f->perturb_timer);
tcf_exts_destroy(tp, &f->exts);
tcf_em_tree_destroy(tp, &f->ematches);
kfree(f);
@@ -532,11 +568,6 @@ static int flow_init(struct tcf_proto *tp)
{
struct flow_head *head;
- if (!flow_hashrnd_initted) {
- get_random_bytes(&flow_hashrnd, 4);
- flow_hashrnd_initted = 1;
- }
-
head = kzalloc(sizeof(*head), GFP_KERNEL);
if (head == NULL)
return -ENOBUFS;
@@ -605,6 +636,9 @@ static int flow_dump(struct tcf_proto *tp, unsigned long fh,
if (f->baseclass)
NLA_PUT_U32(skb, TCA_FLOW_BASECLASS, f->baseclass);
+ if (f->perturb_period)
+ NLA_PUT_U32(skb, TCA_FLOW_PERTURB, f->perturb_period / HZ);
+
if (tcf_exts_dump(skb, &f->exts, &flow_ext_map) < 0)
goto nla_put_failure;
#ifdef CONFIG_NET_EMATCH
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 784dcb870b98..e3d8455eebc2 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -73,11 +73,13 @@ static __inline__ int route4_fastmap_hash(u32 id, int iif)
}
static inline
-void route4_reset_fastmap(struct net_device *dev, struct route4_head *head, u32 id)
+void route4_reset_fastmap(struct Qdisc *q, struct route4_head *head, u32 id)
{
- qdisc_lock_tree(dev);
+ spinlock_t *root_lock = qdisc_root_sleeping_lock(q);
+
+ spin_lock_bh(root_lock);
memset(head->fastmap, 0, sizeof(head->fastmap));
- qdisc_unlock_tree(dev);
+ spin_unlock_bh(root_lock);
}
static inline void
@@ -302,7 +304,7 @@ static int route4_delete(struct tcf_proto *tp, unsigned long arg)
*fp = f->next;
tcf_tree_unlock(tp);
- route4_reset_fastmap(tp->q->dev, head, f->id);
+ route4_reset_fastmap(tp->q, head, f->id);
route4_delete_filter(tp, f);
/* Strip tree */
@@ -500,7 +502,7 @@ reinsert:
}
tcf_tree_unlock(tp);
- route4_reset_fastmap(tp->q->dev, head, f->id);
+ route4_reset_fastmap(tp->q, head, f->id);
*arg = (unsigned long)f;
return 0;
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 4d755444c449..246f9065ce34 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -75,7 +75,6 @@ struct tc_u_hnode
struct tc_u_common
{
- struct tc_u_common *next;
struct tc_u_hnode *hlist;
struct Qdisc *q;
int refcnt;
@@ -87,8 +86,6 @@ static const struct tcf_ext_map u32_ext_map = {
.police = TCA_U32_POLICE
};
-static struct tc_u_common *u32_list;
-
static __inline__ unsigned u32_hash_fold(__be32 key, struct tc_u32_sel *sel, u8 fshift)
{
unsigned h = ntohl(key & sel->hmask)>>fshift;
@@ -287,9 +284,7 @@ static int u32_init(struct tcf_proto *tp)
struct tc_u_hnode *root_ht;
struct tc_u_common *tp_c;
- for (tp_c = u32_list; tp_c; tp_c = tp_c->next)
- if (tp_c->q == tp->q)
- break;
+ tp_c = tp->q->u32_node;
root_ht = kzalloc(sizeof(*root_ht), GFP_KERNEL);
if (root_ht == NULL)
@@ -307,8 +302,7 @@ static int u32_init(struct tcf_proto *tp)
return -ENOBUFS;
}
tp_c->q = tp->q;
- tp_c->next = u32_list;
- u32_list = tp_c;
+ tp->q->u32_node = tp_c;
}
tp_c->refcnt++;
@@ -351,7 +345,7 @@ static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode* key)
}
}
}
- BUG_TRAP(0);
+ WARN_ON(1);
return 0;
}
@@ -374,7 +368,7 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
struct tc_u_common *tp_c = tp->data;
struct tc_u_hnode **hn;
- BUG_TRAP(!ht->refcnt);
+ WARN_ON(ht->refcnt);
u32_clear_hnode(tp, ht);
@@ -386,7 +380,7 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
}
}
- BUG_TRAP(0);
+ WARN_ON(1);
return -ENOENT;
}
@@ -395,21 +389,15 @@ static void u32_destroy(struct tcf_proto *tp)
struct tc_u_common *tp_c = tp->data;
struct tc_u_hnode *root_ht = xchg(&tp->root, NULL);
- BUG_TRAP(root_ht != NULL);
+ WARN_ON(root_ht == NULL);
if (root_ht && --root_ht->refcnt == 0)
u32_destroy_hnode(tp, root_ht);
if (--tp_c->refcnt == 0) {
struct tc_u_hnode *ht;
- struct tc_u_common **tp_cp;
- for (tp_cp = &u32_list; *tp_cp; tp_cp = &(*tp_cp)->next) {
- if (*tp_cp == tp_c) {
- *tp_cp = tp_c->next;
- break;
- }
- }
+ tp->q->u32_node = NULL;
for (ht = tp_c->hlist; ht; ht = ht->next) {
ht->refcnt--;
@@ -419,7 +407,7 @@ static void u32_destroy(struct tcf_proto *tp)
while ((ht = tp_c->hlist) != NULL) {
tp_c->hlist = ht->next;
- BUG_TRAP(ht->refcnt == 0);
+ WARN_ON(ht->refcnt != 0);
kfree(ht);
}
diff --git a/net/sched/em_cmp.c b/net/sched/em_cmp.c
index cc49c932641d..bc450397487a 100644
--- a/net/sched/em_cmp.c
+++ b/net/sched/em_cmp.c
@@ -14,6 +14,7 @@
#include <linux/kernel.h>
#include <linux/skbuff.h>
#include <linux/tc_ematch/tc_em_cmp.h>
+#include <asm/unaligned.h>
#include <net/pkt_cls.h>
static inline int cmp_needs_transformation(struct tcf_em_cmp *cmp)
@@ -37,8 +38,7 @@ static int em_cmp_match(struct sk_buff *skb, struct tcf_ematch *em,
break;
case TCF_EM_ALIGN_U16:
- val = *ptr << 8;
- val |= *(ptr+1);
+ val = get_unaligned_be16(ptr);
if (cmp_needs_transformation(cmp))
val = be16_to_cpu(val);
@@ -47,10 +47,7 @@ static int em_cmp_match(struct sk_buff *skb, struct tcf_ematch *em,
case TCF_EM_ALIGN_U32:
/* Worth checking boundries? The branching seems
* to get worse. Visit again. */
- val = *ptr << 24;
- val |= *(ptr+1) << 16;
- val |= *(ptr+2) << 8;
- val |= *(ptr+3);
+ val = get_unaligned_be32(ptr);
if (cmp_needs_transformation(cmp))
val = be32_to_cpu(val);
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index c40773cdbe45..1122c952aa99 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -27,6 +27,7 @@
#include <linux/kmod.h>
#include <linux/list.h>
#include <linux/hrtimer.h>
+#include <linux/lockdep.h>
#include <net/net_namespace.h>
#include <net/sock.h>
@@ -99,7 +100,7 @@ static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
---requeue
requeues once dequeued packet. It is used for non-standard or
- just buggy devices, which can defer output even if dev->tbusy=0.
+ just buggy devices, which can defer output even if netif_queue_stopped()=0.
---reset
@@ -183,17 +184,70 @@ EXPORT_SYMBOL(unregister_qdisc);
(root qdisc, all its children, children of children etc.)
*/
-struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
+struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
{
struct Qdisc *q;
- list_for_each_entry(q, &dev->qdisc_list, list) {
+ if (!(root->flags & TCQ_F_BUILTIN) &&
+ root->handle == handle)
+ return root;
+
+ list_for_each_entry(q, &root->list, list) {
if (q->handle == handle)
return q;
}
return NULL;
}
+/*
+ * This lock is needed until some qdiscs stop calling qdisc_tree_decrease_qlen()
+ * without rtnl_lock(); currently hfsc_dequeue(), netem_dequeue(), tbf_dequeue()
+ */
+static DEFINE_SPINLOCK(qdisc_list_lock);
+
+static void qdisc_list_add(struct Qdisc *q)
+{
+ if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
+ spin_lock_bh(&qdisc_list_lock);
+ list_add_tail(&q->list, &qdisc_root_sleeping(q)->list);
+ spin_unlock_bh(&qdisc_list_lock);
+ }
+}
+
+void qdisc_list_del(struct Qdisc *q)
+{
+ if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
+ spin_lock_bh(&qdisc_list_lock);
+ list_del(&q->list);
+ spin_unlock_bh(&qdisc_list_lock);
+ }
+}
+EXPORT_SYMBOL(qdisc_list_del);
+
+struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
+{
+ unsigned int i;
+ struct Qdisc *q;
+
+ spin_lock_bh(&qdisc_list_lock);
+
+ for (i = 0; i < dev->num_tx_queues; i++) {
+ struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
+ struct Qdisc *txq_root = txq->qdisc_sleeping;
+
+ q = qdisc_match_from_root(txq_root, handle);
+ if (q)
+ goto unlock;
+ }
+
+ q = qdisc_match_from_root(dev->rx_queue.qdisc_sleeping, handle);
+
+unlock:
+ spin_unlock_bh(&qdisc_list_lock);
+
+ return q;
+}
+
static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
{
unsigned long cl;
@@ -277,15 +331,137 @@ void qdisc_put_rtab(struct qdisc_rate_table *tab)
}
EXPORT_SYMBOL(qdisc_put_rtab);
+static LIST_HEAD(qdisc_stab_list);
+static DEFINE_SPINLOCK(qdisc_stab_lock);
+
+static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
+ [TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) },
+ [TCA_STAB_DATA] = { .type = NLA_BINARY },
+};
+
+static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
+{
+ struct nlattr *tb[TCA_STAB_MAX + 1];
+ struct qdisc_size_table *stab;
+ struct tc_sizespec *s;
+ unsigned int tsize = 0;
+ u16 *tab = NULL;
+ int err;
+
+ err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy);
+ if (err < 0)
+ return ERR_PTR(err);
+ if (!tb[TCA_STAB_BASE])
+ return ERR_PTR(-EINVAL);
+
+ s = nla_data(tb[TCA_STAB_BASE]);
+
+ if (s->tsize > 0) {
+ if (!tb[TCA_STAB_DATA])
+ return ERR_PTR(-EINVAL);
+ tab = nla_data(tb[TCA_STAB_DATA]);
+ tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
+ }
+
+ if (!s || tsize != s->tsize || (!tab && tsize > 0))
+ return ERR_PTR(-EINVAL);
+
+ spin_lock(&qdisc_stab_lock);
+
+ list_for_each_entry(stab, &qdisc_stab_list, list) {
+ if (memcmp(&stab->szopts, s, sizeof(*s)))
+ continue;
+ if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16)))
+ continue;
+ stab->refcnt++;
+ spin_unlock(&qdisc_stab_lock);
+ return stab;
+ }
+
+ spin_unlock(&qdisc_stab_lock);
+
+ stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL);
+ if (!stab)
+ return ERR_PTR(-ENOMEM);
+
+ stab->refcnt = 1;
+ stab->szopts = *s;
+ if (tsize > 0)
+ memcpy(stab->data, tab, tsize * sizeof(u16));
+
+ spin_lock(&qdisc_stab_lock);
+ list_add_tail(&stab->list, &qdisc_stab_list);
+ spin_unlock(&qdisc_stab_lock);
+
+ return stab;
+}
+
+void qdisc_put_stab(struct qdisc_size_table *tab)
+{
+ if (!tab)
+ return;
+
+ spin_lock(&qdisc_stab_lock);
+
+ if (--tab->refcnt == 0) {
+ list_del(&tab->list);
+ kfree(tab);
+ }
+
+ spin_unlock(&qdisc_stab_lock);
+}
+EXPORT_SYMBOL(qdisc_put_stab);
+
+static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
+{
+ struct nlattr *nest;
+
+ nest = nla_nest_start(skb, TCA_STAB);
+ NLA_PUT(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts);
+ nla_nest_end(skb, nest);
+
+ return skb->len;
+
+nla_put_failure:
+ return -1;
+}
+
+void qdisc_calculate_pkt_len(struct sk_buff *skb, struct qdisc_size_table *stab)
+{
+ int pkt_len, slot;
+
+ pkt_len = skb->len + stab->szopts.overhead;
+ if (unlikely(!stab->szopts.tsize))
+ goto out;
+
+ slot = pkt_len + stab->szopts.cell_align;
+ if (unlikely(slot < 0))
+ slot = 0;
+
+ slot >>= stab->szopts.cell_log;
+ if (likely(slot < stab->szopts.tsize))
+ pkt_len = stab->data[slot];
+ else
+ pkt_len = stab->data[stab->szopts.tsize - 1] *
+ (slot / stab->szopts.tsize) +
+ stab->data[slot % stab->szopts.tsize];
+
+ pkt_len <<= stab->szopts.size_log;
+out:
+ if (unlikely(pkt_len < 1))
+ pkt_len = 1;
+ qdisc_skb_cb(skb)->pkt_len = pkt_len;
+}
+EXPORT_SYMBOL(qdisc_calculate_pkt_len);
+
static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
{
struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
timer);
- struct net_device *dev = wd->qdisc->dev;
wd->qdisc->flags &= ~TCQ_F_THROTTLED;
smp_wmb();
- netif_schedule(dev);
+ __netif_schedule(qdisc_root(wd->qdisc));
return HRTIMER_NORESTART;
}
@@ -302,6 +478,10 @@ void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires)
{
ktime_t time;
+ if (test_bit(__QDISC_STATE_DEACTIVATED,
+ &qdisc_root_sleeping(wd->qdisc)->state))
+ return;
+
wd->qdisc->flags |= TCQ_F_THROTTLED;
time = ktime_set(0, 0);
time = ktime_add_ns(time, PSCHED_US2NS(expires));
@@ -316,6 +496,110 @@ void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
}
EXPORT_SYMBOL(qdisc_watchdog_cancel);
+static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
+{
+ unsigned int size = n * sizeof(struct hlist_head), i;
+ struct hlist_head *h;
+
+ if (size <= PAGE_SIZE)
+ h = kmalloc(size, GFP_KERNEL);
+ else
+ h = (struct hlist_head *)
+ __get_free_pages(GFP_KERNEL, get_order(size));
+
+ if (h != NULL) {
+ for (i = 0; i < n; i++)
+ INIT_HLIST_HEAD(&h[i]);
+ }
+ return h;
+}
+
+static void qdisc_class_hash_free(struct hlist_head *h, unsigned int n)
+{
+ unsigned int size = n * sizeof(struct hlist_head);
+
+ if (size <= PAGE_SIZE)
+ kfree(h);
+ else
+ free_pages((unsigned long)h, get_order(size));
+}
+
+void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
+{
+ struct Qdisc_class_common *cl;
+ struct hlist_node *n, *next;
+ struct hlist_head *nhash, *ohash;
+ unsigned int nsize, nmask, osize;
+ unsigned int i, h;
+
+ /* Rehash when load factor exceeds 0.75 */
+ if (clhash->hashelems * 4 <= clhash->hashsize * 3)
+ return;
+ nsize = clhash->hashsize * 2;
+ nmask = nsize - 1;
+ nhash = qdisc_class_hash_alloc(nsize);
+ if (nhash == NULL)
+ return;
+
+ ohash = clhash->hash;
+ osize = clhash->hashsize;
+
+ sch_tree_lock(sch);
+ for (i = 0; i < osize; i++) {
+ hlist_for_each_entry_safe(cl, n, next, &ohash[i], hnode) {
+ h = qdisc_class_hash(cl->classid, nmask);
+ hlist_add_head(&cl->hnode, &nhash[h]);
+ }
+ }
+ clhash->hash = nhash;
+ clhash->hashsize = nsize;
+ clhash->hashmask = nmask;
+ sch_tree_unlock(sch);
+
+ qdisc_class_hash_free(ohash, osize);
+}
+EXPORT_SYMBOL(qdisc_class_hash_grow);
+
+int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
+{
+ unsigned int size = 4;
+
+ clhash->hash = qdisc_class_hash_alloc(size);
+ if (clhash->hash == NULL)
+ return -ENOMEM;
+ clhash->hashsize = size;
+ clhash->hashmask = size - 1;
+ clhash->hashelems = 0;
+ return 0;
+}
+EXPORT_SYMBOL(qdisc_class_hash_init);
+
+void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
+{
+ qdisc_class_hash_free(clhash->hash, clhash->hashsize);
+}
+EXPORT_SYMBOL(qdisc_class_hash_destroy);
+
+void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
+ struct Qdisc_class_common *cl)
+{
+ unsigned int h;
+
+ INIT_HLIST_NODE(&cl->hnode);
+ h = qdisc_class_hash(cl->classid, clhash->hashmask);
+ hlist_add_head(&cl->hnode, &clhash->hash[h]);
+ clhash->hashelems++;
+}
+EXPORT_SYMBOL(qdisc_class_hash_insert);
+
+void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
+ struct Qdisc_class_common *cl)
+{
+ hlist_del(&cl->hnode);
+ clhash->hashelems--;
+}
+EXPORT_SYMBOL(qdisc_class_hash_remove);
+
/* Allocate an unique handle from space managed by kernel */
static u32 qdisc_alloc_handle(struct net_device *dev)
@@ -332,47 +616,28 @@ static u32 qdisc_alloc_handle(struct net_device *dev)
return i>0 ? autohandle : 0;
}
-/* Attach toplevel qdisc to device dev */
+/* Attach toplevel qdisc to device queue. */
-static struct Qdisc *
-dev_graft_qdisc(struct net_device *dev, struct Qdisc *qdisc)
+static struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
+ struct Qdisc *qdisc)
{
- struct Qdisc *oqdisc;
-
- if (dev->flags & IFF_UP)
- dev_deactivate(dev);
-
- qdisc_lock_tree(dev);
- if (qdisc && qdisc->flags&TCQ_F_INGRESS) {
- oqdisc = dev->qdisc_ingress;
- /* Prune old scheduler */
- if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1) {
- /* delete */
- qdisc_reset(oqdisc);
- dev->qdisc_ingress = NULL;
- } else { /* new */
- dev->qdisc_ingress = qdisc;
- }
-
- } else {
-
- oqdisc = dev->qdisc_sleeping;
+ struct Qdisc *oqdisc = dev_queue->qdisc_sleeping;
+ spinlock_t *root_lock;
- /* Prune old scheduler */
- if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1)
- qdisc_reset(oqdisc);
+ root_lock = qdisc_lock(oqdisc);
+ spin_lock_bh(root_lock);
- /* ... and graft new one */
- if (qdisc == NULL)
- qdisc = &noop_qdisc;
- dev->qdisc_sleeping = qdisc;
- dev->qdisc = &noop_qdisc;
- }
+ /* Prune old scheduler */
+ if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1)
+ qdisc_reset(oqdisc);
- qdisc_unlock_tree(dev);
+ /* ... and graft new one */
+ if (qdisc == NULL)
+ qdisc = &noop_qdisc;
+ dev_queue->qdisc_sleeping = qdisc;
+ rcu_assign_pointer(dev_queue->qdisc, &noop_qdisc);
- if (dev->flags & IFF_UP)
- dev_activate(dev);
+ spin_unlock_bh(root_lock);
return oqdisc;
}
@@ -389,7 +654,7 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
return;
- sch = qdisc_lookup(sch->dev, TC_H_MAJ(parentid));
+ sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
if (sch == NULL) {
WARN_ON(parentid != TC_H_ROOT);
return;
@@ -405,26 +670,61 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
}
EXPORT_SYMBOL(qdisc_tree_decrease_qlen);
-/* Graft qdisc "new" to class "classid" of qdisc "parent" or
- to device "dev".
+static void notify_and_destroy(struct sk_buff *skb, struct nlmsghdr *n, u32 clid,
+ struct Qdisc *old, struct Qdisc *new)
+{
+ if (new || old)
+ qdisc_notify(skb, n, clid, old, new);
+
+ if (old)
+ qdisc_destroy(old);
+}
- Old qdisc is not destroyed but returned in *old.
+/* Graft qdisc "new" to class "classid" of qdisc "parent" or
+ * to device "dev".
+ *
+ * When appropriate send a netlink notification using 'skb'
+ * and "n".
+ *
+ * On success, destroy old qdisc.
*/
static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
- u32 classid,
- struct Qdisc *new, struct Qdisc **old)
+ struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
+ struct Qdisc *new, struct Qdisc *old)
{
+ struct Qdisc *q = old;
int err = 0;
- struct Qdisc *q = *old;
-
if (parent == NULL) {
- if (q && q->flags&TCQ_F_INGRESS) {
- *old = dev_graft_qdisc(dev, q);
- } else {
- *old = dev_graft_qdisc(dev, new);
+ unsigned int i, num_q, ingress;
+
+ ingress = 0;
+ num_q = dev->num_tx_queues;
+ if ((q && q->flags & TCQ_F_INGRESS) ||
+ (new && new->flags & TCQ_F_INGRESS)) {
+ num_q = 1;
+ ingress = 1;
}
+
+ if (dev->flags & IFF_UP)
+ dev_deactivate(dev);
+
+ for (i = 0; i < num_q; i++) {
+ struct netdev_queue *dev_queue = &dev->rx_queue;
+
+ if (!ingress)
+ dev_queue = netdev_get_tx_queue(dev, i);
+
+ old = dev_graft_qdisc(dev_queue, new);
+ if (new && i > 0)
+ atomic_inc(&new->refcnt);
+
+ notify_and_destroy(skb, n, classid, old, new);
+ }
+
+ if (dev->flags & IFF_UP)
+ dev_activate(dev);
} else {
const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
@@ -433,14 +733,20 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
if (cops) {
unsigned long cl = cops->get(parent, classid);
if (cl) {
- err = cops->graft(parent, cl, new, old);
+ err = cops->graft(parent, cl, new, &old);
cops->put(parent, cl);
}
}
+ if (!err)
+ notify_and_destroy(skb, n, classid, old, new);
}
return err;
}
+/* lockdep annotation is needed for ingress; egress gets it only for name */
+static struct lock_class_key qdisc_tx_lock;
+static struct lock_class_key qdisc_rx_lock;
+
/*
Allocate and initialize new qdisc.
@@ -448,13 +754,14 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
*/
static struct Qdisc *
-qdisc_create(struct net_device *dev, u32 parent, u32 handle,
- struct nlattr **tca, int *errp)
+qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
+ u32 parent, u32 handle, struct nlattr **tca, int *errp)
{
int err;
struct nlattr *kind = tca[TCA_KIND];
struct Qdisc *sch;
struct Qdisc_ops *ops;
+ struct qdisc_size_table *stab;
ops = qdisc_lookup_ops(kind);
#ifdef CONFIG_KMOD
@@ -489,7 +796,7 @@ qdisc_create(struct net_device *dev, u32 parent, u32 handle,
if (ops == NULL)
goto err_out;
- sch = qdisc_alloc(dev, ops);
+ sch = qdisc_alloc(dev_queue, ops);
if (IS_ERR(sch)) {
err = PTR_ERR(sch);
goto err_out2;
@@ -499,25 +806,40 @@ qdisc_create(struct net_device *dev, u32 parent, u32 handle,
if (handle == TC_H_INGRESS) {
sch->flags |= TCQ_F_INGRESS;
- sch->stats_lock = &dev->ingress_lock;
handle = TC_H_MAKE(TC_H_INGRESS, 0);
+ lockdep_set_class(qdisc_lock(sch), &qdisc_rx_lock);
} else {
- sch->stats_lock = &dev->queue_lock;
if (handle == 0) {
handle = qdisc_alloc_handle(dev);
err = -ENOMEM;
if (handle == 0)
goto err_out3;
}
+ lockdep_set_class(qdisc_lock(sch), &qdisc_tx_lock);
}
sch->handle = handle;
if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) {
+ if (tca[TCA_STAB]) {
+ stab = qdisc_get_stab(tca[TCA_STAB]);
+ if (IS_ERR(stab)) {
+ err = PTR_ERR(stab);
+ goto err_out3;
+ }
+ sch->stab = stab;
+ }
if (tca[TCA_RATE]) {
+ spinlock_t *root_lock;
+
+ if ((sch->parent != TC_H_ROOT) &&
+ !(sch->flags & TCQ_F_INGRESS))
+ root_lock = qdisc_root_sleeping_lock(sch);
+ else
+ root_lock = qdisc_lock(sch);
+
err = gen_new_estimator(&sch->bstats, &sch->rate_est,
- sch->stats_lock,
- tca[TCA_RATE]);
+ root_lock, tca[TCA_RATE]);
if (err) {
/*
* Any broken qdiscs that would require
@@ -529,13 +851,13 @@ qdisc_create(struct net_device *dev, u32 parent, u32 handle,
goto err_out3;
}
}
- qdisc_lock_tree(dev);
- list_add_tail(&sch->list, &dev->qdisc_list);
- qdisc_unlock_tree(dev);
+
+ qdisc_list_add(sch);
return sch;
}
err_out3:
+ qdisc_put_stab(sch->stab);
dev_put(dev);
kfree((char *) sch - sch->padded);
err_out2:
@@ -547,18 +869,30 @@ err_out:
static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
{
- if (tca[TCA_OPTIONS]) {
- int err;
+ struct qdisc_size_table *stab = NULL;
+ int err = 0;
+ if (tca[TCA_OPTIONS]) {
if (sch->ops->change == NULL)
return -EINVAL;
err = sch->ops->change(sch, tca[TCA_OPTIONS]);
if (err)
return err;
}
+
+ if (tca[TCA_STAB]) {
+ stab = qdisc_get_stab(tca[TCA_STAB]);
+ if (IS_ERR(stab))
+ return PTR_ERR(stab);
+ }
+
+ qdisc_put_stab(sch->stab);
+ sch->stab = stab;
+
if (tca[TCA_RATE])
gen_replace_estimator(&sch->bstats, &sch->rate_est,
- sch->stats_lock, tca[TCA_RATE]);
+ qdisc_root_sleeping_lock(sch),
+ tca[TCA_RATE]);
return 0;
}
@@ -634,10 +968,12 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
return -ENOENT;
q = qdisc_leaf(p, clid);
} else { /* ingress */
- q = dev->qdisc_ingress;
+ q = dev->rx_queue.qdisc_sleeping;
}
} else {
- q = dev->qdisc_sleeping;
+ struct netdev_queue *dev_queue;
+ dev_queue = netdev_get_tx_queue(dev, 0);
+ q = dev_queue->qdisc_sleeping;
}
if (!q)
return -ENOENT;
@@ -657,14 +993,8 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
return -EINVAL;
if (q->handle == 0)
return -ENOENT;
- if ((err = qdisc_graft(dev, p, clid, NULL, &q)) != 0)
+ if ((err = qdisc_graft(dev, p, skb, n, clid, NULL, q)) != 0)
return err;
- if (q) {
- qdisc_notify(skb, n, clid, q, NULL);
- qdisc_lock_tree(dev);
- qdisc_destroy(q);
- qdisc_unlock_tree(dev);
- }
} else {
qdisc_notify(skb, n, clid, NULL, q);
}
@@ -708,10 +1038,12 @@ replay:
return -ENOENT;
q = qdisc_leaf(p, clid);
} else { /*ingress */
- q = dev->qdisc_ingress;
+ q = dev->rx_queue.qdisc_sleeping;
}
} else {
- q = dev->qdisc_sleeping;
+ struct netdev_queue *dev_queue;
+ dev_queue = netdev_get_tx_queue(dev, 0);
+ q = dev_queue->qdisc_sleeping;
}
/* It may be default qdisc, ignore it */
@@ -788,10 +1120,12 @@ create_n_graft:
if (!(n->nlmsg_flags&NLM_F_CREATE))
return -ENOENT;
if (clid == TC_H_INGRESS)
- q = qdisc_create(dev, tcm->tcm_parent, tcm->tcm_parent,
+ q = qdisc_create(dev, &dev->rx_queue,
+ tcm->tcm_parent, tcm->tcm_parent,
tca, &err);
else
- q = qdisc_create(dev, tcm->tcm_parent, tcm->tcm_handle,
+ q = qdisc_create(dev, netdev_get_tx_queue(dev, 0),
+ tcm->tcm_parent, tcm->tcm_handle,
tca, &err);
if (q == NULL) {
if (err == -EAGAIN)
@@ -800,24 +1134,13 @@ create_n_graft:
}
graft:
- if (1) {
- struct Qdisc *old_q = NULL;
- err = qdisc_graft(dev, p, clid, q, &old_q);
- if (err) {
- if (q) {
- qdisc_lock_tree(dev);
- qdisc_destroy(q);
- qdisc_unlock_tree(dev);
- }
- return err;
- }
- qdisc_notify(skb, n, clid, old_q, q);
- if (old_q) {
- qdisc_lock_tree(dev);
- qdisc_destroy(old_q);
- qdisc_unlock_tree(dev);
- }
+ err = qdisc_graft(dev, p, skb, n, clid, q, NULL);
+ if (err) {
+ if (q)
+ qdisc_destroy(q);
+ return err;
}
+
return 0;
}
@@ -834,7 +1157,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
tcm->tcm_family = AF_UNSPEC;
tcm->tcm__pad1 = 0;
tcm->tcm__pad2 = 0;
- tcm->tcm_ifindex = q->dev->ifindex;
+ tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
tcm->tcm_parent = clid;
tcm->tcm_handle = q->handle;
tcm->tcm_info = atomic_read(&q->refcnt);
@@ -843,8 +1166,11 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
goto nla_put_failure;
q->qstats.qlen = q->q.qlen;
- if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
- TCA_XSTATS, q->stats_lock, &d) < 0)
+ if (q->stab && qdisc_dump_stab(skb, q->stab) < 0)
+ goto nla_put_failure;
+
+ if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
+ qdisc_root_sleeping_lock(q), &d) < 0)
goto nla_put_failure;
if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
@@ -894,13 +1220,57 @@ err_out:
return -EINVAL;
}
+static bool tc_qdisc_dump_ignore(struct Qdisc *q)
+{
+ return (q->flags & TCQ_F_BUILTIN) ? true : false;
+}
+
+static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
+ struct netlink_callback *cb,
+ int *q_idx_p, int s_q_idx)
+{
+ int ret = 0, q_idx = *q_idx_p;
+ struct Qdisc *q;
+
+ if (!root)
+ return 0;
+
+ q = root;
+ if (q_idx < s_q_idx) {
+ q_idx++;
+ } else {
+ if (!tc_qdisc_dump_ignore(q) &&
+ tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
+ cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
+ goto done;
+ q_idx++;
+ }
+ list_for_each_entry(q, &root->list, list) {
+ if (q_idx < s_q_idx) {
+ q_idx++;
+ continue;
+ }
+ if (!tc_qdisc_dump_ignore(q) &&
+ tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
+ cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
+ goto done;
+ q_idx++;
+ }
+
+out:
+ *q_idx_p = q_idx;
+ return ret;
+done:
+ ret = -1;
+ goto out;
+}
+
static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
{
struct net *net = sock_net(skb->sk);
int idx, q_idx;
int s_idx, s_q_idx;
struct net_device *dev;
- struct Qdisc *q;
if (net != &init_net)
return 0;
@@ -910,21 +1280,22 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
read_lock(&dev_base_lock);
idx = 0;
for_each_netdev(&init_net, dev) {
+ struct netdev_queue *dev_queue;
+
if (idx < s_idx)
goto cont;
if (idx > s_idx)
s_q_idx = 0;
q_idx = 0;
- list_for_each_entry(q, &dev->qdisc_list, list) {
- if (q_idx < s_q_idx) {
- q_idx++;
- continue;
- }
- if (tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
- cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
- goto done;
- q_idx++;
- }
+
+ dev_queue = netdev_get_tx_queue(dev, 0);
+ if (tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb, &q_idx, s_q_idx) < 0)
+ goto done;
+
+ dev_queue = &dev->rx_queue;
+ if (tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb, &q_idx, s_q_idx) < 0)
+ goto done;
+
cont:
idx++;
}
@@ -949,6 +1320,7 @@ done:
static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
{
struct net *net = sock_net(skb->sk);
+ struct netdev_queue *dev_queue;
struct tcmsg *tcm = NLMSG_DATA(n);
struct nlattr *tca[TCA_MAX + 1];
struct net_device *dev;
@@ -986,6 +1358,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
/* Step 1. Determine qdisc handle X:0 */
+ dev_queue = netdev_get_tx_queue(dev, 0);
if (pid != TC_H_ROOT) {
u32 qid1 = TC_H_MAJ(pid);
@@ -996,7 +1369,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
} else if (qid1) {
qid = qid1;
} else if (qid == 0)
- qid = dev->qdisc_sleeping->handle;
+ qid = dev_queue->qdisc_sleeping->handle;
/* Now qid is genuine qdisc handle consistent
both with parent and child.
@@ -1007,7 +1380,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
pid = TC_H_MAKE(qid, pid);
} else {
if (qid == 0)
- qid = dev->qdisc_sleeping->handle;
+ qid = dev_queue->qdisc_sleeping->handle;
}
/* OK. Locate qdisc */
@@ -1080,7 +1453,7 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
tcm = NLMSG_DATA(nlh);
tcm->tcm_family = AF_UNSPEC;
- tcm->tcm_ifindex = q->dev->ifindex;
+ tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
tcm->tcm_parent = q->handle;
tcm->tcm_handle = q->handle;
tcm->tcm_info = 0;
@@ -1088,8 +1461,8 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
goto nla_put_failure;
- if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
- TCA_XSTATS, q->stats_lock, &d) < 0)
+ if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
+ qdisc_root_sleeping_lock(q), &d) < 0)
goto nla_put_failure;
if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
@@ -1140,15 +1513,62 @@ static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walk
a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS);
}
+static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
+ struct tcmsg *tcm, struct netlink_callback *cb,
+ int *t_p, int s_t)
+{
+ struct qdisc_dump_args arg;
+
+ if (tc_qdisc_dump_ignore(q) ||
+ *t_p < s_t || !q->ops->cl_ops ||
+ (tcm->tcm_parent &&
+ TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
+ (*t_p)++;
+ return 0;
+ }
+ if (*t_p > s_t)
+ memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
+ arg.w.fn = qdisc_class_dump;
+ arg.skb = skb;
+ arg.cb = cb;
+ arg.w.stop = 0;
+ arg.w.skip = cb->args[1];
+ arg.w.count = 0;
+ q->ops->cl_ops->walk(q, &arg.w);
+ cb->args[1] = arg.w.count;
+ if (arg.w.stop)
+ return -1;
+ (*t_p)++;
+ return 0;
+}
+
+static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
+ struct tcmsg *tcm, struct netlink_callback *cb,
+ int *t_p, int s_t)
+{
+ struct Qdisc *q;
+
+ if (!root)
+ return 0;
+
+ if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
+ return -1;
+
+ list_for_each_entry(q, &root->list, list) {
+ if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
+ return -1;
+ }
+
+ return 0;
+}
+
static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
{
+ struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh);
struct net *net = sock_net(skb->sk);
- int t;
- int s_t;
+ struct netdev_queue *dev_queue;
struct net_device *dev;
- struct Qdisc *q;
- struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh);
- struct qdisc_dump_args arg;
+ int t, s_t;
if (net != &init_net)
return 0;
@@ -1161,28 +1581,15 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
s_t = cb->args[0];
t = 0;
- list_for_each_entry(q, &dev->qdisc_list, list) {
- if (t < s_t || !q->ops->cl_ops ||
- (tcm->tcm_parent &&
- TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
- t++;
- continue;
- }
- if (t > s_t)
- memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
- arg.w.fn = qdisc_class_dump;
- arg.skb = skb;
- arg.cb = cb;
- arg.w.stop = 0;
- arg.w.skip = cb->args[1];
- arg.w.count = 0;
- q->ops->cl_ops->walk(q, &arg.w);
- cb->args[1] = arg.w.count;
- if (arg.w.stop)
- break;
- t++;
- }
+ dev_queue = netdev_get_tx_queue(dev, 0);
+ if (tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb, &t, s_t) < 0)
+ goto done;
+
+ dev_queue = &dev->rx_queue;
+ if (tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb, &t, s_t) < 0)
+ goto done;
+done:
cb->args[0] = t;
dev_put(dev);
@@ -1252,12 +1659,12 @@ void tcf_destroy(struct tcf_proto *tp)
kfree(tp);
}
-void tcf_destroy_chain(struct tcf_proto *fl)
+void tcf_destroy_chain(struct tcf_proto **fl)
{
struct tcf_proto *tp;
- while ((tp = fl) != NULL) {
- fl = tp->next;
+ while ((tp = *fl) != NULL) {
+ *fl = tp->next;
tcf_destroy(tp);
}
}
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 335273416384..43d37256c15e 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -160,9 +160,9 @@ static void atm_tc_put(struct Qdisc *sch, unsigned long cl)
*prev = flow->next;
pr_debug("atm_tc_put: qdisc %p\n", flow->q);
qdisc_destroy(flow->q);
- tcf_destroy_chain(flow->filter_list);
+ tcf_destroy_chain(&flow->filter_list);
if (flow->sock) {
- pr_debug("atm_tc_put: f_count %d\n",
+ pr_debug("atm_tc_put: f_count %ld\n",
file_count(flow->sock->file));
flow->vcc->pop = flow->old_pop;
sockfd_put(flow->sock);
@@ -259,7 +259,7 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
sock = sockfd_lookup(fd, &error);
if (!sock)
return error; /* f_count++ */
- pr_debug("atm_tc_change: f_count %d\n", file_count(sock->file));
+ pr_debug("atm_tc_change: f_count %ld\n", file_count(sock->file));
if (sock->ops->family != PF_ATMSVC && sock->ops->family != PF_ATMPVC) {
error = -EPROTOTYPE;
goto err_out;
@@ -296,7 +296,8 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
goto err_out;
}
flow->filter_list = NULL;
- flow->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid);
+ flow->q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
+ &pfifo_qdisc_ops, classid);
if (!flow->q)
flow->q = &noop_qdisc;
pr_debug("atm_tc_change: qdisc %p\n", flow->q);
@@ -414,7 +415,7 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
case TC_ACT_QUEUED:
case TC_ACT_STOLEN:
kfree_skb(skb);
- return NET_XMIT_SUCCESS;
+ return NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
case TC_ACT_SHOT:
kfree_skb(skb);
goto drop;
@@ -428,17 +429,19 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
#endif
}
- ret = flow->q->enqueue(skb, flow->q);
+ ret = qdisc_enqueue(skb, flow->q);
if (ret != 0) {
drop: __maybe_unused
- sch->qstats.drops++;
- if (flow)
- flow->qstats.drops++;
+ if (net_xmit_drop_count(ret)) {
+ sch->qstats.drops++;
+ if (flow)
+ flow->qstats.drops++;
+ }
return ret;
}
- sch->bstats.bytes += skb->len;
+ sch->bstats.bytes += qdisc_pkt_len(skb);
sch->bstats.packets++;
- flow->bstats.bytes += skb->len;
+ flow->bstats.bytes += qdisc_pkt_len(skb);
flow->bstats.packets++;
/*
* Okay, this may seem weird. We pretend we've dropped the packet if
@@ -454,7 +457,7 @@ drop: __maybe_unused
return 0;
}
tasklet_schedule(&p->task);
- return NET_XMIT_BYPASS;
+ return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
}
/*
@@ -529,7 +532,7 @@ static int atm_tc_requeue(struct sk_buff *skb, struct Qdisc *sch)
if (!ret) {
sch->q.qlen++;
sch->qstats.requeues++;
- } else {
+ } else if (net_xmit_drop_count(ret)) {
sch->qstats.drops++;
p->link.qstats.drops++;
}
@@ -555,7 +558,8 @@ static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt)
pr_debug("atm_tc_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt);
p->flows = &p->link;
- p->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, sch->handle);
+ p->link.q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
+ &pfifo_qdisc_ops, sch->handle);
if (!p->link.q)
p->link.q = &noop_qdisc;
pr_debug("atm_tc_init: link (%p) qdisc %p\n", &p->link, p->link.q);
@@ -586,10 +590,11 @@ static void atm_tc_destroy(struct Qdisc *sch)
struct atm_flow_data *flow;
pr_debug("atm_tc_destroy(sch %p,[qdisc %p])\n", sch, p);
+ for (flow = p->flows; flow; flow = flow->next)
+ tcf_destroy_chain(&flow->filter_list);
+
/* races ? */
while ((flow = p->flows)) {
- tcf_destroy_chain(flow->filter_list);
- flow->filter_list = NULL;
if (flow->ref > 1)
printk(KERN_ERR "atm_destroy: %p->ref = %d\n", flow,
flow->ref);
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 09969c1fbc08..8b06fa900482 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -73,11 +73,10 @@ struct cbq_sched_data;
struct cbq_class
{
- struct cbq_class *next; /* hash table link */
+ struct Qdisc_class_common common;
struct cbq_class *next_alive; /* next class with backlog in this priority band */
/* Parameters */
- u32 classid;
unsigned char priority; /* class priority */
unsigned char priority2; /* priority to be used after overlimit */
unsigned char ewma_log; /* time constant for idle time calculation */
@@ -144,7 +143,7 @@ struct cbq_class
struct cbq_sched_data
{
- struct cbq_class *classes[16]; /* Hash table of all classes */
+ struct Qdisc_class_hash clhash; /* Hash table of all classes */
int nclasses[TC_CBQ_MAXPRIO+1];
unsigned quanta[TC_CBQ_MAXPRIO+1];
@@ -177,23 +176,15 @@ struct cbq_sched_data
#define L2T(cl,len) qdisc_l2t((cl)->R_tab,len)
-
-static __inline__ unsigned cbq_hash(u32 h)
-{
- h ^= h>>8;
- h ^= h>>4;
- return h&0xF;
-}
-
static __inline__ struct cbq_class *
cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
{
- struct cbq_class *cl;
+ struct Qdisc_class_common *clc;
- for (cl = q->classes[cbq_hash(classid)]; cl; cl = cl->next)
- if (cl->classid == classid)
- return cl;
- return NULL;
+ clc = qdisc_class_find(&q->clhash, classid);
+ if (clc == NULL)
+ return NULL;
+ return container_of(clc, struct cbq_class, common);
}
#ifdef CONFIG_NET_CLS_ACT
@@ -239,7 +230,7 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
(cl = cbq_class_lookup(q, prio)) != NULL)
return cl;
- *qerr = NET_XMIT_BYPASS;
+ *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
for (;;) {
int result = 0;
defmap = head->defaults;
@@ -265,7 +256,7 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
switch (result) {
case TC_ACT_QUEUED:
case TC_ACT_STOLEN:
- *qerr = NET_XMIT_SUCCESS;
+ *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
case TC_ACT_SHOT:
return NULL;
case TC_ACT_RECLASSIFY:
@@ -379,7 +370,6 @@ static int
cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
{
struct cbq_sched_data *q = qdisc_priv(sch);
- int len = skb->len;
int uninitialized_var(ret);
struct cbq_class *cl = cbq_classify(skb, sch, &ret);
@@ -387,7 +377,7 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
q->rx_class = cl;
#endif
if (cl == NULL) {
- if (ret == NET_XMIT_BYPASS)
+ if (ret & __NET_XMIT_BYPASS)
sch->qstats.drops++;
kfree_skb(skb);
return ret;
@@ -396,19 +386,22 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
#ifdef CONFIG_NET_CLS_ACT
cl->q->__parent = sch;
#endif
- if ((ret = cl->q->enqueue(skb, cl->q)) == NET_XMIT_SUCCESS) {
+ ret = qdisc_enqueue(skb, cl->q);
+ if (ret == NET_XMIT_SUCCESS) {
sch->q.qlen++;
sch->bstats.packets++;
- sch->bstats.bytes+=len;
+ sch->bstats.bytes += qdisc_pkt_len(skb);
cbq_mark_toplevel(q, cl);
if (!cl->next_alive)
cbq_activate_class(cl);
return ret;
}
- sch->qstats.drops++;
- cbq_mark_toplevel(q, cl);
- cl->qstats.drops++;
+ if (net_xmit_drop_count(ret)) {
+ sch->qstats.drops++;
+ cbq_mark_toplevel(q, cl);
+ cl->qstats.drops++;
+ }
return ret;
}
@@ -439,8 +432,10 @@ cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
cbq_activate_class(cl);
return 0;
}
- sch->qstats.drops++;
- cl->qstats.drops++;
+ if (net_xmit_drop_count(ret)) {
+ sch->qstats.drops++;
+ cl->qstats.drops++;
+ }
return ret;
}
@@ -526,6 +521,10 @@ static void cbq_ovl_delay(struct cbq_class *cl)
struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
psched_tdiff_t delay = cl->undertime - q->now;
+ if (test_bit(__QDISC_STATE_DEACTIVATED,
+ &qdisc_root_sleeping(cl->qdisc)->state))
+ return;
+
if (!cl->delayed) {
psched_time_t sched = q->now;
ktime_t expires;
@@ -659,14 +658,13 @@ static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
}
sch->flags &= ~TCQ_F_THROTTLED;
- netif_schedule(sch->dev);
+ __netif_schedule(qdisc_root(sch));
return HRTIMER_NORESTART;
}
#ifdef CONFIG_NET_CLS_ACT
static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
{
- int len = skb->len;
struct Qdisc *sch = child->__parent;
struct cbq_sched_data *q = qdisc_priv(sch);
struct cbq_class *cl = q->rx_class;
@@ -674,21 +672,24 @@ static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
q->rx_class = NULL;
if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
+ int ret;
cbq_mark_toplevel(q, cl);
q->rx_class = cl;
cl->q->__parent = sch;
- if (cl->q->enqueue(skb, cl->q) == 0) {
+ ret = qdisc_enqueue(skb, cl->q);
+ if (ret == NET_XMIT_SUCCESS) {
sch->q.qlen++;
sch->bstats.packets++;
- sch->bstats.bytes+=len;
+ sch->bstats.bytes += qdisc_pkt_len(skb);
if (!cl->next_alive)
cbq_activate_class(cl);
return 0;
}
- sch->qstats.drops++;
+ if (net_xmit_drop_count(ret))
+ sch->qstats.drops++;
return 0;
}
@@ -889,7 +890,7 @@ cbq_dequeue_prio(struct Qdisc *sch, int prio)
if (skb == NULL)
goto skip_class;
- cl->deficit -= skb->len;
+ cl->deficit -= qdisc_pkt_len(skb);
q->tx_class = cl;
q->tx_borrowed = borrow;
if (borrow != cl) {
@@ -897,11 +898,11 @@ cbq_dequeue_prio(struct Qdisc *sch, int prio)
borrow->xstats.borrows++;
cl->xstats.borrows++;
#else
- borrow->xstats.borrows += skb->len;
- cl->xstats.borrows += skb->len;
+ borrow->xstats.borrows += qdisc_pkt_len(skb);
+ cl->xstats.borrows += qdisc_pkt_len(skb);
#endif
}
- q->tx_len = skb->len;
+ q->tx_len = qdisc_pkt_len(skb);
if (cl->deficit <= 0) {
q->active[prio] = cl;
@@ -1071,13 +1072,14 @@ static void cbq_adjust_levels(struct cbq_class *this)
static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
{
struct cbq_class *cl;
- unsigned h;
+ struct hlist_node *n;
+ unsigned int h;
if (q->quanta[prio] == 0)
return;
- for (h=0; h<16; h++) {
- for (cl = q->classes[h]; cl; cl = cl->next) {
+ for (h = 0; h < q->clhash.hashsize; h++) {
+ hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
/* BUGGGG... Beware! This expression suffer of
arithmetic overflows!
*/
@@ -1085,9 +1087,9 @@ static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
q->quanta[prio];
}
- if (cl->quantum <= 0 || cl->quantum>32*cl->qdisc->dev->mtu) {
- printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->classid, cl->quantum);
- cl->quantum = cl->qdisc->dev->mtu/2 + 1;
+ if (cl->quantum <= 0 || cl->quantum>32*qdisc_dev(cl->qdisc)->mtu) {
+ printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->common.classid, cl->quantum);
+ cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1;
}
}
}
@@ -1114,10 +1116,12 @@ static void cbq_sync_defmap(struct cbq_class *cl)
if (split->defaults[i])
continue;
- for (h=0; h<16; h++) {
+ for (h = 0; h < q->clhash.hashsize; h++) {
+ struct hlist_node *n;
struct cbq_class *c;
- for (c = q->classes[h]; c; c = c->next) {
+ hlist_for_each_entry(c, n, &q->clhash.hash[h],
+ common.hnode) {
if (c->split == split && c->level < level &&
c->defmap&(1<<i)) {
split->defaults[i] = c;
@@ -1135,12 +1139,12 @@ static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 ma
if (splitid == 0) {
if ((split = cl->split) == NULL)
return;
- splitid = split->classid;
+ splitid = split->common.classid;
}
- if (split == NULL || split->classid != splitid) {
+ if (split == NULL || split->common.classid != splitid) {
for (split = cl->tparent; split; split = split->tparent)
- if (split->classid == splitid)
+ if (split->common.classid == splitid)
break;
}
@@ -1163,13 +1167,7 @@ static void cbq_unlink_class(struct cbq_class *this)
struct cbq_class *cl, **clp;
struct cbq_sched_data *q = qdisc_priv(this->qdisc);
- for (clp = &q->classes[cbq_hash(this->classid)]; (cl = *clp) != NULL; clp = &cl->next) {
- if (cl == this) {
- *clp = cl->next;
- cl->next = NULL;
- break;
- }
- }
+ qdisc_class_hash_remove(&q->clhash, &this->common);
if (this->tparent) {
clp=&this->sibling;
@@ -1188,19 +1186,17 @@ static void cbq_unlink_class(struct cbq_class *this)
this->tparent->children = NULL;
}
} else {
- BUG_TRAP(this->sibling == this);
+ WARN_ON(this->sibling != this);
}
}
static void cbq_link_class(struct cbq_class *this)
{
struct cbq_sched_data *q = qdisc_priv(this->qdisc);
- unsigned h = cbq_hash(this->classid);
struct cbq_class *parent = this->tparent;
this->sibling = this;
- this->next = q->classes[h];
- q->classes[h] = this;
+ qdisc_class_hash_insert(&q->clhash, &this->common);
if (parent == NULL)
return;
@@ -1242,6 +1238,7 @@ cbq_reset(struct Qdisc* sch)
{
struct cbq_sched_data *q = qdisc_priv(sch);
struct cbq_class *cl;
+ struct hlist_node *n;
int prio;
unsigned h;
@@ -1258,8 +1255,8 @@ cbq_reset(struct Qdisc* sch)
for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
q->active[prio] = NULL;
- for (h = 0; h < 16; h++) {
- for (cl = q->classes[h]; cl; cl = cl->next) {
+ for (h = 0; h < q->clhash.hashsize; h++) {
+ hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
qdisc_reset(cl->q);
cl->next_alive = NULL;
@@ -1406,11 +1403,16 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB])) == NULL)
return -EINVAL;
+ err = qdisc_class_hash_init(&q->clhash);
+ if (err < 0)
+ goto put_rtab;
+
q->link.refcnt = 1;
q->link.sibling = &q->link;
- q->link.classid = sch->handle;
+ q->link.common.classid = sch->handle;
q->link.qdisc = sch;
- if (!(q->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
+ if (!(q->link.q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
+ &pfifo_qdisc_ops,
sch->handle)))
q->link.q = &noop_qdisc;
@@ -1419,7 +1421,7 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
q->link.cpriority = TC_CBQ_MAXPRIO-1;
q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
q->link.overlimit = cbq_ovl_classic;
- q->link.allot = psched_mtu(sch->dev);
+ q->link.allot = psched_mtu(qdisc_dev(sch));
q->link.quantum = q->link.allot;
q->link.weight = q->link.R_tab->rate.rate;
@@ -1441,6 +1443,10 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
cbq_addprio(q, &q->link);
return 0;
+
+put_rtab:
+ qdisc_put_rtab(q->link.R_tab);
+ return err;
}
static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
@@ -1521,7 +1527,7 @@ static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
struct tc_cbq_fopt opt;
if (cl->split || cl->defmap) {
- opt.split = cl->split ? cl->split->classid : 0;
+ opt.split = cl->split ? cl->split->common.classid : 0;
opt.defmap = cl->defmap;
opt.defchange = ~0;
NLA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
@@ -1602,10 +1608,10 @@ cbq_dump_class(struct Qdisc *sch, unsigned long arg,
struct nlattr *nest;
if (cl->tparent)
- tcm->tcm_parent = cl->tparent->classid;
+ tcm->tcm_parent = cl->tparent->common.classid;
else
tcm->tcm_parent = TC_H_ROOT;
- tcm->tcm_handle = cl->classid;
+ tcm->tcm_handle = cl->common.classid;
tcm->tcm_info = cl->q->handle;
nest = nla_nest_start(skb, TCA_OPTIONS);
@@ -1650,8 +1656,10 @@ static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
if (cl) {
if (new == NULL) {
- if ((new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
- cl->classid)) == NULL)
+ new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
+ &pfifo_qdisc_ops,
+ cl->common.classid);
+ if (new == NULL)
return -ENOBUFS;
} else {
#ifdef CONFIG_NET_CLS_ACT
@@ -1702,9 +1710,9 @@ static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
{
struct cbq_sched_data *q = qdisc_priv(sch);
- BUG_TRAP(!cl->filters);
+ WARN_ON(cl->filters);
- tcf_destroy_chain(cl->filter_list);
+ tcf_destroy_chain(&cl->filter_list);
qdisc_destroy(cl->q);
qdisc_put_rtab(cl->R_tab);
gen_kill_estimator(&cl->bstats, &cl->rate_est);
@@ -1716,6 +1724,7 @@ static void
cbq_destroy(struct Qdisc* sch)
{
struct cbq_sched_data *q = qdisc_priv(sch);
+ struct hlist_node *n, *next;
struct cbq_class *cl;
unsigned h;
@@ -1727,20 +1736,16 @@ cbq_destroy(struct Qdisc* sch)
* classes from root to leafs which means that filters can still
* be bound to classes which have been destroyed already. --TGR '04
*/
- for (h = 0; h < 16; h++) {
- for (cl = q->classes[h]; cl; cl = cl->next) {
- tcf_destroy_chain(cl->filter_list);
- cl->filter_list = NULL;
- }
+ for (h = 0; h < q->clhash.hashsize; h++) {
+ hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode)
+ tcf_destroy_chain(&cl->filter_list);
}
- for (h = 0; h < 16; h++) {
- struct cbq_class *next;
-
- for (cl = q->classes[h]; cl; cl = next) {
- next = cl->next;
+ for (h = 0; h < q->clhash.hashsize; h++) {
+ hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[h],
+ common.hnode)
cbq_destroy_class(sch, cl);
- }
}
+ qdisc_class_hash_destroy(&q->clhash);
}
static void cbq_put(struct Qdisc *sch, unsigned long arg)
@@ -1749,12 +1754,13 @@ static void cbq_put(struct Qdisc *sch, unsigned long arg)
if (--cl->refcnt == 0) {
#ifdef CONFIG_NET_CLS_ACT
+ spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
struct cbq_sched_data *q = qdisc_priv(sch);
- spin_lock_bh(&sch->dev->queue_lock);
+ spin_lock_bh(root_lock);
if (q->rx_class == cl)
q->rx_class = NULL;
- spin_unlock_bh(&sch->dev->queue_lock);
+ spin_unlock_bh(root_lock);
#endif
cbq_destroy_class(sch, cl);
@@ -1783,7 +1789,8 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
if (cl) {
/* Check parent */
if (parentid) {
- if (cl->tparent && cl->tparent->classid != parentid)
+ if (cl->tparent &&
+ cl->tparent->common.classid != parentid)
return -EINVAL;
if (!cl->tparent && parentid != TC_H_ROOT)
return -EINVAL;
@@ -1832,7 +1839,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
if (tca[TCA_RATE])
gen_replace_estimator(&cl->bstats, &cl->rate_est,
- &sch->dev->queue_lock,
+ qdisc_root_sleeping_lock(sch),
tca[TCA_RATE]);
return 0;
}
@@ -1883,9 +1890,10 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
cl->R_tab = rtab;
rtab = NULL;
cl->refcnt = 1;
- if (!(cl->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid)))
+ if (!(cl->q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
+ &pfifo_qdisc_ops, classid)))
cl->q = &noop_qdisc;
- cl->classid = classid;
+ cl->common.classid = classid;
cl->tparent = parent;
cl->qdisc = sch;
cl->allot = parent->allot;
@@ -1918,9 +1926,11 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
sch_tree_unlock(sch);
+ qdisc_class_hash_grow(sch, &q->clhash);
+
if (tca[TCA_RATE])
gen_new_estimator(&cl->bstats, &cl->rate_est,
- &sch->dev->queue_lock, tca[TCA_RATE]);
+ qdisc_root_sleeping_lock(sch), tca[TCA_RATE]);
*arg = (unsigned long)cl;
return 0;
@@ -2010,15 +2020,15 @@ static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
{
struct cbq_sched_data *q = qdisc_priv(sch);
+ struct cbq_class *cl;
+ struct hlist_node *n;
unsigned h;
if (arg->stop)
return;
- for (h = 0; h < 16; h++) {
- struct cbq_class *cl;
-
- for (cl = q->classes[h]; cl; cl = cl->next) {
+ for (h = 0; h < q->clhash.hashsize; h++) {
+ hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
if (arg->count < arg->skip) {
arg->count++;
continue;
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 64465bacbe79..ba43aab3a851 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -60,7 +60,8 @@ static int dsmark_graft(struct Qdisc *sch, unsigned long arg,
sch, p, new, old);
if (new == NULL) {
- new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
+ new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
+ &pfifo_qdisc_ops,
sch->handle);
if (new == NULL)
new = &noop_qdisc;
@@ -201,7 +202,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
if (p->set_tc_index) {
switch (skb->protocol) {
- case __constant_htons(ETH_P_IP):
+ case htons(ETH_P_IP):
if (skb_cow_head(skb, sizeof(struct iphdr)))
goto drop;
@@ -209,7 +210,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
& ~INET_ECN_MASK;
break;
- case __constant_htons(ETH_P_IPV6):
+ case htons(ETH_P_IPV6):
if (skb_cow_head(skb, sizeof(struct ipv6hdr)))
goto drop;
@@ -235,7 +236,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
case TC_ACT_QUEUED:
case TC_ACT_STOLEN:
kfree_skb(skb);
- return NET_XMIT_SUCCESS;
+ return NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
case TC_ACT_SHOT:
goto drop;
@@ -251,13 +252,14 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
}
}
- err = p->q->enqueue(skb, p->q);
+ err = qdisc_enqueue(skb, p->q);
if (err != NET_XMIT_SUCCESS) {
- sch->qstats.drops++;
+ if (net_xmit_drop_count(err))
+ sch->qstats.drops++;
return err;
}
- sch->bstats.bytes += skb->len;
+ sch->bstats.bytes += qdisc_pkt_len(skb);
sch->bstats.packets++;
sch->q.qlen++;
@@ -266,7 +268,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
drop:
kfree_skb(skb);
sch->qstats.drops++;
- return NET_XMIT_BYPASS;
+ return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
}
static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
@@ -287,11 +289,11 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
pr_debug("index %d->%d\n", skb->tc_index, index);
switch (skb->protocol) {
- case __constant_htons(ETH_P_IP):
+ case htons(ETH_P_IP):
ipv4_change_dsfield(ip_hdr(skb), p->mask[index],
p->value[index]);
break;
- case __constant_htons(ETH_P_IPV6):
+ case htons(ETH_P_IPV6):
ipv6_change_dsfield(ipv6_hdr(skb), p->mask[index],
p->value[index]);
break;
@@ -320,7 +322,8 @@ static int dsmark_requeue(struct sk_buff *skb, struct Qdisc *sch)
err = p->q->ops->requeue(skb, p->q);
if (err != NET_XMIT_SUCCESS) {
- sch->qstats.drops++;
+ if (net_xmit_drop_count(err))
+ sch->qstats.drops++;
return err;
}
@@ -390,7 +393,8 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt)
p->default_index = default_index;
p->set_tc_index = nla_get_flag(tb[TCA_DSMARK_SET_TC_INDEX]);
- p->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, sch->handle);
+ p->q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
+ &pfifo_qdisc_ops, sch->handle);
if (p->q == NULL)
p->q = &noop_qdisc;
@@ -416,7 +420,7 @@ static void dsmark_destroy(struct Qdisc *sch)
pr_debug("dsmark_destroy(sch %p,[qdisc %p])\n", sch, p);
- tcf_destroy_chain(p->filter_list);
+ tcf_destroy_chain(&p->filter_list);
qdisc_destroy(p->q);
kfree(p->mask);
}
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index 95ed48221652..23d258bfe8ac 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -27,7 +27,7 @@ static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch)
{
struct fifo_sched_data *q = qdisc_priv(sch);
- if (likely(sch->qstats.backlog + skb->len <= q->limit))
+ if (likely(sch->qstats.backlog + qdisc_pkt_len(skb) <= q->limit))
return qdisc_enqueue_tail(skb, sch);
return qdisc_reshape_fail(skb, sch);
@@ -48,10 +48,10 @@ static int fifo_init(struct Qdisc *sch, struct nlattr *opt)
struct fifo_sched_data *q = qdisc_priv(sch);
if (opt == NULL) {
- u32 limit = sch->dev->tx_queue_len ? : 1;
+ u32 limit = qdisc_dev(sch)->tx_queue_len ? : 1;
if (sch->ops == &bfifo_qdisc_ops)
- limit *= sch->dev->mtu;
+ limit *= qdisc_dev(sch)->mtu;
q->limit = limit;
} else {
@@ -107,3 +107,46 @@ struct Qdisc_ops bfifo_qdisc_ops __read_mostly = {
.owner = THIS_MODULE,
};
EXPORT_SYMBOL(bfifo_qdisc_ops);
+
+/* Pass size change message down to embedded FIFO */
+int fifo_set_limit(struct Qdisc *q, unsigned int limit)
+{
+ struct nlattr *nla;
+ int ret = -ENOMEM;
+
+ /* Hack to avoid sending change message to non-FIFO */
+ if (strncmp(q->ops->id + 1, "fifo", 4) != 0)
+ return 0;
+
+ nla = kmalloc(nla_attr_size(sizeof(struct tc_fifo_qopt)), GFP_KERNEL);
+ if (nla) {
+ nla->nla_type = RTM_NEWQDISC;
+ nla->nla_len = nla_attr_size(sizeof(struct tc_fifo_qopt));
+ ((struct tc_fifo_qopt *)nla_data(nla))->limit = limit;
+
+ ret = q->ops->change(q, nla);
+ kfree(nla);
+ }
+ return ret;
+}
+EXPORT_SYMBOL(fifo_set_limit);
+
+struct Qdisc *fifo_create_dflt(struct Qdisc *sch, struct Qdisc_ops *ops,
+ unsigned int limit)
+{
+ struct Qdisc *q;
+ int err = -ENOMEM;
+
+ q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
+ ops, TC_H_MAKE(sch->handle, 1));
+ if (q) {
+ err = fifo_set_limit(q, limit);
+ if (err < 0) {
+ qdisc_destroy(q);
+ q = NULL;
+ }
+ }
+
+ return q ? : ERR_PTR(err);
+}
+EXPORT_SYMBOL(fifo_create_dflt);
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index d355e5e47fe3..7b5572d6beb5 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -29,71 +29,56 @@
/* Main transmission queue. */
/* Modifications to data participating in scheduling must be protected with
- * dev->queue_lock spinlock.
+ * qdisc_lock(qdisc) spinlock.
*
* The idea is the following:
- * - enqueue, dequeue are serialized via top level device
- * spinlock dev->queue_lock.
- * - ingress filtering is serialized via top level device
- * spinlock dev->ingress_lock.
+ * - enqueue, dequeue are serialized via qdisc root lock
+ * - ingress filtering is also serialized via qdisc root lock
* - updates to tree and tree walking are only done under the rtnl mutex.
*/
-void qdisc_lock_tree(struct net_device *dev)
- __acquires(dev->queue_lock)
- __acquires(dev->ingress_lock)
-{
- spin_lock_bh(&dev->queue_lock);
- spin_lock(&dev->ingress_lock);
-}
-EXPORT_SYMBOL(qdisc_lock_tree);
-
-void qdisc_unlock_tree(struct net_device *dev)
- __releases(dev->ingress_lock)
- __releases(dev->queue_lock)
-{
- spin_unlock(&dev->ingress_lock);
- spin_unlock_bh(&dev->queue_lock);
-}
-EXPORT_SYMBOL(qdisc_unlock_tree);
-
static inline int qdisc_qlen(struct Qdisc *q)
{
return q->q.qlen;
}
-static inline int dev_requeue_skb(struct sk_buff *skb, struct net_device *dev,
- struct Qdisc *q)
+static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
{
- if (unlikely(skb->next))
- dev->gso_skb = skb;
- else
- q->ops->requeue(skb, q);
+ q->gso_skb = skb;
+ q->qstats.requeues++;
+ __netif_schedule(q);
- netif_schedule(dev);
return 0;
}
-static inline struct sk_buff *dev_dequeue_skb(struct net_device *dev,
- struct Qdisc *q)
+static inline struct sk_buff *dequeue_skb(struct Qdisc *q)
{
- struct sk_buff *skb;
+ struct sk_buff *skb = q->gso_skb;
+
+ if (unlikely(skb)) {
+ struct net_device *dev = qdisc_dev(q);
+ struct netdev_queue *txq;
- if ((skb = dev->gso_skb))
- dev->gso_skb = NULL;
- else
+ /* check the reason of requeuing without tx lock first */
+ txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
+ if (!netif_tx_queue_stopped(txq) && !netif_tx_queue_frozen(txq))
+ q->gso_skb = NULL;
+ else
+ skb = NULL;
+ } else {
skb = q->dequeue(q);
+ }
return skb;
}
static inline int handle_dev_cpu_collision(struct sk_buff *skb,
- struct net_device *dev,
+ struct netdev_queue *dev_queue,
struct Qdisc *q)
{
int ret;
- if (unlikely(dev->xmit_lock_owner == smp_processor_id())) {
+ if (unlikely(dev_queue->xmit_lock_owner == smp_processor_id())) {
/*
* Same CPU holding the lock. It may be a transient
* configuration error, when hard_start_xmit() recurses. We
@@ -103,7 +88,7 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb,
kfree_skb(skb);
if (net_ratelimit())
printk(KERN_WARNING "Dead loop on netdevice %s, "
- "fix it urgently!\n", dev->name);
+ "fix it urgently!\n", dev_queue->dev->name);
ret = qdisc_qlen(q);
} else {
/*
@@ -111,22 +96,22 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb,
* some time.
*/
__get_cpu_var(netdev_rx_stat).cpu_collision++;
- ret = dev_requeue_skb(skb, dev, q);
+ ret = dev_requeue_skb(skb, q);
}
return ret;
}
/*
- * NOTE: Called under dev->queue_lock with locally disabled BH.
+ * NOTE: Called under qdisc_lock(q) with locally disabled BH.
*
- * __LINK_STATE_QDISC_RUNNING guarantees only one CPU can process this
- * device at a time. dev->queue_lock serializes queue accesses for
- * this device AND dev->qdisc pointer itself.
+ * __QDISC_STATE_RUNNING guarantees only one CPU can process
+ * this qdisc at a time. qdisc_lock(q) serializes queue accesses for
+ * this queue.
*
* netif_tx_lock serializes accesses to device driver.
*
- * dev->queue_lock and netif_tx_lock are mutually exclusive,
+ * qdisc_lock(q) and netif_tx_lock are mutually exclusive,
* if one is grabbed, another must be free.
*
* Note, that this procedure can be called by a watchdog timer
@@ -136,27 +121,33 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb,
* >0 - queue is not empty.
*
*/
-static inline int qdisc_restart(struct net_device *dev)
+static inline int qdisc_restart(struct Qdisc *q)
{
- struct Qdisc *q = dev->qdisc;
- struct sk_buff *skb;
+ struct netdev_queue *txq;
int ret = NETDEV_TX_BUSY;
+ struct net_device *dev;
+ spinlock_t *root_lock;
+ struct sk_buff *skb;
/* Dequeue packet */
- if (unlikely((skb = dev_dequeue_skb(dev, q)) == NULL))
+ if (unlikely((skb = dequeue_skb(q)) == NULL))
return 0;
+ root_lock = qdisc_lock(q);
- /* And release queue */
- spin_unlock(&dev->queue_lock);
+ /* And release qdisc */
+ spin_unlock(root_lock);
- HARD_TX_LOCK(dev, smp_processor_id());
- if (!netif_subqueue_stopped(dev, skb))
- ret = dev_hard_start_xmit(skb, dev);
- HARD_TX_UNLOCK(dev);
+ dev = qdisc_dev(q);
+ txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
- spin_lock(&dev->queue_lock);
- q = dev->qdisc;
+ HARD_TX_LOCK(dev, txq, smp_processor_id());
+ if (!netif_tx_queue_stopped(txq) &&
+ !netif_tx_queue_frozen(txq))
+ ret = dev_hard_start_xmit(skb, dev, txq);
+ HARD_TX_UNLOCK(dev, txq);
+
+ spin_lock(root_lock);
switch (ret) {
case NETDEV_TX_OK:
@@ -166,7 +157,7 @@ static inline int qdisc_restart(struct net_device *dev)
case NETDEV_TX_LOCKED:
/* Driver try lock failed */
- ret = handle_dev_cpu_collision(skb, dev, q);
+ ret = handle_dev_cpu_collision(skb, txq, q);
break;
default:
@@ -175,33 +166,34 @@ static inline int qdisc_restart(struct net_device *dev)
printk(KERN_WARNING "BUG %s code %d qlen %d\n",
dev->name, ret, q->q.qlen);
- ret = dev_requeue_skb(skb, dev, q);
+ ret = dev_requeue_skb(skb, q);
break;
}
+ if (ret && (netif_tx_queue_stopped(txq) ||
+ netif_tx_queue_frozen(txq)))
+ ret = 0;
+
return ret;
}
-void __qdisc_run(struct net_device *dev)
+void __qdisc_run(struct Qdisc *q)
{
unsigned long start_time = jiffies;
- while (qdisc_restart(dev)) {
- if (netif_queue_stopped(dev))
- break;
-
+ while (qdisc_restart(q)) {
/*
* Postpone processing if
* 1. another process needs the CPU;
* 2. we've been doing it for too long.
*/
if (need_resched() || jiffies != start_time) {
- netif_schedule(dev);
+ __netif_schedule(q);
break;
}
}
- clear_bit(__LINK_STATE_QDISC_RUNNING, &dev->state);
+ clear_bit(__QDISC_STATE_RUNNING, &q->state);
}
static void dev_watchdog(unsigned long arg)
@@ -209,19 +201,34 @@ static void dev_watchdog(unsigned long arg)
struct net_device *dev = (struct net_device *)arg;
netif_tx_lock(dev);
- if (dev->qdisc != &noop_qdisc) {
+ if (!qdisc_tx_is_noop(dev)) {
if (netif_device_present(dev) &&
netif_running(dev) &&
netif_carrier_ok(dev)) {
- if (netif_queue_stopped(dev) &&
- time_after(jiffies, dev->trans_start + dev->watchdog_timeo)) {
+ int some_queue_stopped = 0;
+ unsigned int i;
+
+ for (i = 0; i < dev->num_tx_queues; i++) {
+ struct netdev_queue *txq;
- printk(KERN_INFO "NETDEV WATCHDOG: %s: transmit timed out\n",
- dev->name);
+ txq = netdev_get_tx_queue(dev, i);
+ if (netif_tx_queue_stopped(txq)) {
+ some_queue_stopped = 1;
+ break;
+ }
+ }
+
+ if (some_queue_stopped &&
+ time_after(jiffies, (dev->trans_start +
+ dev->watchdog_timeo))) {
+ char drivername[64];
+ WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit timed out\n",
+ dev->name, netdev_drivername(dev, drivername, 64));
dev->tx_timeout(dev);
- WARN_ON_ONCE(1);
}
- if (!mod_timer(&dev->watchdog_timer, round_jiffies(jiffies + dev->watchdog_timeo)))
+ if (!mod_timer(&dev->watchdog_timer,
+ round_jiffies(jiffies +
+ dev->watchdog_timeo)))
dev_hold(dev);
}
}
@@ -317,12 +324,19 @@ struct Qdisc_ops noop_qdisc_ops __read_mostly = {
.owner = THIS_MODULE,
};
+static struct netdev_queue noop_netdev_queue = {
+ .qdisc = &noop_qdisc,
+};
+
struct Qdisc noop_qdisc = {
.enqueue = noop_enqueue,
.dequeue = noop_dequeue,
.flags = TCQ_F_BUILTIN,
.ops = &noop_qdisc_ops,
.list = LIST_HEAD_INIT(noop_qdisc.list),
+ .requeue.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
+ .q.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
+ .dev_queue = &noop_netdev_queue,
};
EXPORT_SYMBOL(noop_qdisc);
@@ -335,12 +349,20 @@ static struct Qdisc_ops noqueue_qdisc_ops __read_mostly = {
.owner = THIS_MODULE,
};
+static struct Qdisc noqueue_qdisc;
+static struct netdev_queue noqueue_netdev_queue = {
+ .qdisc = &noqueue_qdisc,
+};
+
static struct Qdisc noqueue_qdisc = {
.enqueue = NULL,
.dequeue = noop_dequeue,
.flags = TCQ_F_BUILTIN,
.ops = &noqueue_qdisc_ops,
.list = LIST_HEAD_INIT(noqueue_qdisc.list),
+ .requeue.lock = __SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock),
+ .q.lock = __SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock),
+ .dev_queue = &noqueue_netdev_queue,
};
@@ -364,7 +386,7 @@ static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc)
{
struct sk_buff_head *list = prio2list(skb, qdisc);
- if (skb_queue_len(list) < qdisc->dev->tx_queue_len) {
+ if (skb_queue_len(list) < qdisc_dev(qdisc)->tx_queue_len) {
qdisc->q.qlen++;
return __qdisc_enqueue_tail(skb, qdisc, list);
}
@@ -440,7 +462,8 @@ static struct Qdisc_ops pfifo_fast_ops __read_mostly = {
.owner = THIS_MODULE,
};
-struct Qdisc *qdisc_alloc(struct net_device *dev, struct Qdisc_ops *ops)
+struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
+ struct Qdisc_ops *ops)
{
void *p;
struct Qdisc *sch;
@@ -458,28 +481,30 @@ struct Qdisc *qdisc_alloc(struct net_device *dev, struct Qdisc_ops *ops)
sch->padded = (char *) sch - (char *) p;
INIT_LIST_HEAD(&sch->list);
+ skb_queue_head_init(&sch->requeue);
skb_queue_head_init(&sch->q);
sch->ops = ops;
sch->enqueue = ops->enqueue;
sch->dequeue = ops->dequeue;
- sch->dev = dev;
- dev_hold(dev);
+ sch->dev_queue = dev_queue;
+ dev_hold(qdisc_dev(sch));
atomic_set(&sch->refcnt, 1);
return sch;
errout:
- return ERR_PTR(-err);
+ return ERR_PTR(err);
}
-struct Qdisc * qdisc_create_dflt(struct net_device *dev, struct Qdisc_ops *ops,
+struct Qdisc * qdisc_create_dflt(struct net_device *dev,
+ struct netdev_queue *dev_queue,
+ struct Qdisc_ops *ops,
unsigned int parentid)
{
struct Qdisc *sch;
- sch = qdisc_alloc(dev, ops);
+ sch = qdisc_alloc(dev_queue, ops);
if (IS_ERR(sch))
goto errout;
- sch->stats_lock = &dev->queue_lock;
sch->parent = parentid;
if (!ops->init || ops->init(sch, NULL) == 0)
@@ -491,7 +516,7 @@ errout:
}
EXPORT_SYMBOL(qdisc_create_dflt);
-/* Under dev->queue_lock and BH! */
+/* Under qdisc_lock(qdisc) and BH! */
void qdisc_reset(struct Qdisc *qdisc)
{
@@ -502,17 +527,6 @@ void qdisc_reset(struct Qdisc *qdisc)
}
EXPORT_SYMBOL(qdisc_reset);
-/* this is the rcu callback function to clean up a qdisc when there
- * are no further references to it */
-
-static void __qdisc_destroy(struct rcu_head *head)
-{
- struct Qdisc *qdisc = container_of(head, struct Qdisc, q_rcu);
- kfree((char *) qdisc - qdisc->padded);
-}
-
-/* Under dev->queue_lock and BH! */
-
void qdisc_destroy(struct Qdisc *qdisc)
{
const struct Qdisc_ops *ops = qdisc->ops;
@@ -521,7 +535,11 @@ void qdisc_destroy(struct Qdisc *qdisc)
!atomic_dec_and_test(&qdisc->refcnt))
return;
- list_del(&qdisc->list);
+#ifdef CONFIG_NET_SCHED
+ qdisc_list_del(qdisc);
+
+ qdisc_put_stab(qdisc->stab);
+#endif
gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est);
if (ops->reset)
ops->reset(qdisc);
@@ -529,65 +547,141 @@ void qdisc_destroy(struct Qdisc *qdisc)
ops->destroy(qdisc);
module_put(ops->owner);
- dev_put(qdisc->dev);
- call_rcu(&qdisc->q_rcu, __qdisc_destroy);
+ dev_put(qdisc_dev(qdisc));
+
+ kfree_skb(qdisc->gso_skb);
+ __skb_queue_purge(&qdisc->requeue);
+
+ kfree((char *) qdisc - qdisc->padded);
}
EXPORT_SYMBOL(qdisc_destroy);
+static bool dev_all_qdisc_sleeping_noop(struct net_device *dev)
+{
+ unsigned int i;
+
+ for (i = 0; i < dev->num_tx_queues; i++) {
+ struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
+
+ if (txq->qdisc_sleeping != &noop_qdisc)
+ return false;
+ }
+ return true;
+}
+
+static void attach_one_default_qdisc(struct net_device *dev,
+ struct netdev_queue *dev_queue,
+ void *_unused)
+{
+ struct Qdisc *qdisc;
+
+ if (dev->tx_queue_len) {
+ qdisc = qdisc_create_dflt(dev, dev_queue,
+ &pfifo_fast_ops, TC_H_ROOT);
+ if (!qdisc) {
+ printk(KERN_INFO "%s: activation failed\n", dev->name);
+ return;
+ }
+ } else {
+ qdisc = &noqueue_qdisc;
+ }
+ dev_queue->qdisc_sleeping = qdisc;
+}
+
+static void transition_one_qdisc(struct net_device *dev,
+ struct netdev_queue *dev_queue,
+ void *_need_watchdog)
+{
+ struct Qdisc *new_qdisc = dev_queue->qdisc_sleeping;
+ int *need_watchdog_p = _need_watchdog;
+
+ if (!(new_qdisc->flags & TCQ_F_BUILTIN))
+ clear_bit(__QDISC_STATE_DEACTIVATED, &new_qdisc->state);
+
+ rcu_assign_pointer(dev_queue->qdisc, new_qdisc);
+ if (need_watchdog_p && new_qdisc != &noqueue_qdisc)
+ *need_watchdog_p = 1;
+}
+
void dev_activate(struct net_device *dev)
{
+ int need_watchdog;
+
/* No queueing discipline is attached to device;
create default one i.e. pfifo_fast for devices,
which need queueing and noqueue_qdisc for
virtual interfaces
*/
- if (dev->qdisc_sleeping == &noop_qdisc) {
- struct Qdisc *qdisc;
- if (dev->tx_queue_len) {
- qdisc = qdisc_create_dflt(dev, &pfifo_fast_ops,
- TC_H_ROOT);
- if (qdisc == NULL) {
- printk(KERN_INFO "%s: activation failed\n", dev->name);
- return;
- }
- list_add_tail(&qdisc->list, &dev->qdisc_list);
- } else {
- qdisc = &noqueue_qdisc;
- }
- dev->qdisc_sleeping = qdisc;
- }
+ if (dev_all_qdisc_sleeping_noop(dev))
+ netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL);
if (!netif_carrier_ok(dev))
/* Delay activation until next carrier-on event */
return;
- spin_lock_bh(&dev->queue_lock);
- rcu_assign_pointer(dev->qdisc, dev->qdisc_sleeping);
- if (dev->qdisc != &noqueue_qdisc) {
+ need_watchdog = 0;
+ netdev_for_each_tx_queue(dev, transition_one_qdisc, &need_watchdog);
+ transition_one_qdisc(dev, &dev->rx_queue, NULL);
+
+ if (need_watchdog) {
dev->trans_start = jiffies;
dev_watchdog_up(dev);
}
- spin_unlock_bh(&dev->queue_lock);
}
-void dev_deactivate(struct net_device *dev)
+static void dev_deactivate_queue(struct net_device *dev,
+ struct netdev_queue *dev_queue,
+ void *_qdisc_default)
{
+ struct Qdisc *qdisc_default = _qdisc_default;
struct Qdisc *qdisc;
- struct sk_buff *skb;
- int running;
- spin_lock_bh(&dev->queue_lock);
- qdisc = dev->qdisc;
- dev->qdisc = &noop_qdisc;
+ qdisc = dev_queue->qdisc;
+ if (qdisc) {
+ spin_lock_bh(qdisc_lock(qdisc));
- qdisc_reset(qdisc);
+ if (!(qdisc->flags & TCQ_F_BUILTIN))
+ set_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state);
- skb = dev->gso_skb;
- dev->gso_skb = NULL;
- spin_unlock_bh(&dev->queue_lock);
+ rcu_assign_pointer(dev_queue->qdisc, qdisc_default);
+ qdisc_reset(qdisc);
- kfree_skb(skb);
+ spin_unlock_bh(qdisc_lock(qdisc));
+ }
+}
+
+static bool some_qdisc_is_busy(struct net_device *dev)
+{
+ unsigned int i;
+
+ for (i = 0; i < dev->num_tx_queues; i++) {
+ struct netdev_queue *dev_queue;
+ spinlock_t *root_lock;
+ struct Qdisc *q;
+ int val;
+
+ dev_queue = netdev_get_tx_queue(dev, i);
+ q = dev_queue->qdisc_sleeping;
+ root_lock = qdisc_lock(q);
+
+ spin_lock_bh(root_lock);
+
+ val = (test_bit(__QDISC_STATE_RUNNING, &q->state) ||
+ test_bit(__QDISC_STATE_SCHED, &q->state));
+
+ spin_unlock_bh(root_lock);
+
+ if (val)
+ return true;
+ }
+ return false;
+}
+
+void dev_deactivate(struct net_device *dev)
+{
+ netdev_for_each_tx_queue(dev, dev_deactivate_queue, &noop_qdisc);
+ dev_deactivate_queue(dev, &dev->rx_queue, &noop_qdisc);
dev_watchdog_down(dev);
@@ -595,55 +689,46 @@ void dev_deactivate(struct net_device *dev)
synchronize_rcu();
/* Wait for outstanding qdisc_run calls. */
- do {
- while (test_bit(__LINK_STATE_QDISC_RUNNING, &dev->state))
- yield();
+ while (some_qdisc_is_busy(dev))
+ yield();
+}
- /*
- * Double-check inside queue lock to ensure that all effects
- * of the queue run are visible when we return.
- */
- spin_lock_bh(&dev->queue_lock);
- running = test_bit(__LINK_STATE_QDISC_RUNNING, &dev->state);
- spin_unlock_bh(&dev->queue_lock);
+static void dev_init_scheduler_queue(struct net_device *dev,
+ struct netdev_queue *dev_queue,
+ void *_qdisc)
+{
+ struct Qdisc *qdisc = _qdisc;
- /*
- * The running flag should never be set at this point because
- * we've already set dev->qdisc to noop_qdisc *inside* the same
- * pair of spin locks. That is, if any qdisc_run starts after
- * our initial test it should see the noop_qdisc and then
- * clear the RUNNING bit before dropping the queue lock. So
- * if it is set here then we've found a bug.
- */
- } while (WARN_ON_ONCE(running));
+ dev_queue->qdisc = qdisc;
+ dev_queue->qdisc_sleeping = qdisc;
}
void dev_init_scheduler(struct net_device *dev)
{
- qdisc_lock_tree(dev);
- dev->qdisc = &noop_qdisc;
- dev->qdisc_sleeping = &noop_qdisc;
- INIT_LIST_HEAD(&dev->qdisc_list);
- qdisc_unlock_tree(dev);
+ netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc);
+ dev_init_scheduler_queue(dev, &dev->rx_queue, &noop_qdisc);
setup_timer(&dev->watchdog_timer, dev_watchdog, (unsigned long)dev);
}
-void dev_shutdown(struct net_device *dev)
+static void shutdown_scheduler_queue(struct net_device *dev,
+ struct netdev_queue *dev_queue,
+ void *_qdisc_default)
{
- struct Qdisc *qdisc;
+ struct Qdisc *qdisc = dev_queue->qdisc_sleeping;
+ struct Qdisc *qdisc_default = _qdisc_default;
+
+ if (qdisc) {
+ rcu_assign_pointer(dev_queue->qdisc, qdisc_default);
+ dev_queue->qdisc_sleeping = qdisc_default;
- qdisc_lock_tree(dev);
- qdisc = dev->qdisc_sleeping;
- dev->qdisc = &noop_qdisc;
- dev->qdisc_sleeping = &noop_qdisc;
- qdisc_destroy(qdisc);
-#if defined(CONFIG_NET_SCH_INGRESS) || defined(CONFIG_NET_SCH_INGRESS_MODULE)
- if ((qdisc = dev->qdisc_ingress) != NULL) {
- dev->qdisc_ingress = NULL;
qdisc_destroy(qdisc);
}
-#endif
- BUG_TRAP(!timer_pending(&dev->watchdog_timer));
- qdisc_unlock_tree(dev);
+}
+
+void dev_shutdown(struct net_device *dev)
+{
+ netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc);
+ shutdown_scheduler_queue(dev, &dev->rx_queue, &noop_qdisc);
+ WARN_ON(timer_pending(&dev->watchdog_timer));
}
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index c89fba56db56..c1ad6b8de105 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -164,7 +164,7 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc* sch)
* if no default DP has been configured. This
* allows for DP flows to be left untouched.
*/
- if (skb_queue_len(&sch->q) < sch->dev->tx_queue_len)
+ if (skb_queue_len(&sch->q) < qdisc_dev(sch)->tx_queue_len)
return qdisc_enqueue_tail(skb, sch);
else
goto drop;
@@ -188,7 +188,7 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc* sch)
}
q->packetsin++;
- q->bytesin += skb->len;
+ q->bytesin += qdisc_pkt_len(skb);
if (gred_wred_mode(t))
gred_load_wred_set(t, q);
@@ -226,8 +226,8 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc* sch)
break;
}
- if (q->backlog + skb->len <= q->limit) {
- q->backlog += skb->len;
+ if (q->backlog + qdisc_pkt_len(skb) <= q->limit) {
+ q->backlog += qdisc_pkt_len(skb);
return qdisc_enqueue_tail(skb, sch);
}
@@ -254,7 +254,7 @@ static int gred_requeue(struct sk_buff *skb, struct Qdisc* sch)
} else {
if (red_is_idling(&q->parms))
red_end_of_idle_period(&q->parms);
- q->backlog += skb->len;
+ q->backlog += qdisc_pkt_len(skb);
}
return qdisc_requeue(skb, sch);
@@ -277,7 +277,7 @@ static struct sk_buff *gred_dequeue(struct Qdisc* sch)
"VQ 0x%x after dequeue, screwing up "
"backlog.\n", tc_index_to_dp(skb));
} else {
- q->backlog -= skb->len;
+ q->backlog -= qdisc_pkt_len(skb);
if (!q->backlog && !gred_wred_mode(t))
red_start_of_idle_period(&q->parms);
@@ -299,7 +299,7 @@ static unsigned int gred_drop(struct Qdisc* sch)
skb = qdisc_dequeue_tail(sch);
if (skb) {
- unsigned int len = skb->len;
+ unsigned int len = qdisc_pkt_len(skb);
struct gred_sched_data *q;
u16 dp = tc_index_to_dp(skb);
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index fdfaa3fcc16d..c1e77da8cd09 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -113,7 +113,7 @@ enum hfsc_class_flags
struct hfsc_class
{
- u32 classid; /* class id */
+ struct Qdisc_class_common cl_common;
unsigned int refcnt; /* usage count */
struct gnet_stats_basic bstats;
@@ -134,7 +134,6 @@ struct hfsc_class
struct rb_node vt_node; /* parent's vt_tree member */
struct rb_root cf_tree; /* active children sorted by cl_f */
struct rb_node cf_node; /* parent's cf_heap member */
- struct list_head hlist; /* hash list member */
struct list_head dlist; /* drop list member */
u64 cl_total; /* total work in bytes */
@@ -177,13 +176,11 @@ struct hfsc_class
unsigned long cl_nactive; /* number of active children */
};
-#define HFSC_HSIZE 16
-
struct hfsc_sched
{
u16 defcls; /* default class id */
struct hfsc_class root; /* root class */
- struct list_head clhash[HFSC_HSIZE]; /* class hash */
+ struct Qdisc_class_hash clhash; /* class hash */
struct rb_root eligible; /* eligible tree */
struct list_head droplist; /* active leaf class list (for
dropping) */
@@ -898,7 +895,7 @@ qdisc_peek_len(struct Qdisc *sch)
printk("qdisc_peek_len: non work-conserving qdisc ?\n");
return 0;
}
- len = skb->len;
+ len = qdisc_pkt_len(skb);
if (unlikely(sch->ops->requeue(skb, sch) != NET_XMIT_SUCCESS)) {
if (net_ratelimit())
printk("qdisc_peek_len: failed to requeue\n");
@@ -933,26 +930,16 @@ hfsc_adjust_levels(struct hfsc_class *cl)
} while ((cl = cl->cl_parent) != NULL);
}
-static inline unsigned int
-hfsc_hash(u32 h)
-{
- h ^= h >> 8;
- h ^= h >> 4;
-
- return h & (HFSC_HSIZE - 1);
-}
-
static inline struct hfsc_class *
hfsc_find_class(u32 classid, struct Qdisc *sch)
{
struct hfsc_sched *q = qdisc_priv(sch);
- struct hfsc_class *cl;
+ struct Qdisc_class_common *clc;
- list_for_each_entry(cl, &q->clhash[hfsc_hash(classid)], hlist) {
- if (cl->classid == classid)
- return cl;
- }
- return NULL;
+ clc = qdisc_class_find(&q->clhash, classid);
+ if (clc == NULL)
+ return NULL;
+ return container_of(clc, struct hfsc_class, cl_common);
}
static void
@@ -1032,7 +1019,8 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
if (cl != NULL) {
if (parentid) {
- if (cl->cl_parent && cl->cl_parent->classid != parentid)
+ if (cl->cl_parent &&
+ cl->cl_parent->cl_common.classid != parentid)
return -EINVAL;
if (cl->cl_parent == NULL && parentid != TC_H_ROOT)
return -EINVAL;
@@ -1057,7 +1045,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
if (tca[TCA_RATE])
gen_replace_estimator(&cl->bstats, &cl->rate_est,
- &sch->dev->queue_lock,
+ qdisc_root_sleeping_lock(sch),
tca[TCA_RATE]);
return 0;
}
@@ -1091,11 +1079,12 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
if (usc != NULL)
hfsc_change_usc(cl, usc, 0);
+ cl->cl_common.classid = classid;
cl->refcnt = 1;
- cl->classid = classid;
cl->sched = q;
cl->cl_parent = parent;
- cl->qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid);
+ cl->qdisc = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
+ &pfifo_qdisc_ops, classid);
if (cl->qdisc == NULL)
cl->qdisc = &noop_qdisc;
INIT_LIST_HEAD(&cl->children);
@@ -1103,7 +1092,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
cl->cf_tree = RB_ROOT;
sch_tree_lock(sch);
- list_add_tail(&cl->hlist, &q->clhash[hfsc_hash(classid)]);
+ qdisc_class_hash_insert(&q->clhash, &cl->cl_common);
list_add_tail(&cl->siblings, &parent->children);
if (parent->level == 0)
hfsc_purge_queue(sch, parent);
@@ -1111,9 +1100,11 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
cl->cl_pcvtoff = parent->cl_cvtoff;
sch_tree_unlock(sch);
+ qdisc_class_hash_grow(sch, &q->clhash);
+
if (tca[TCA_RATE])
gen_new_estimator(&cl->bstats, &cl->rate_est,
- &sch->dev->queue_lock, tca[TCA_RATE]);
+ qdisc_root_sleeping_lock(sch), tca[TCA_RATE]);
*arg = (unsigned long)cl;
return 0;
}
@@ -1123,7 +1114,7 @@ hfsc_destroy_class(struct Qdisc *sch, struct hfsc_class *cl)
{
struct hfsc_sched *q = qdisc_priv(sch);
- tcf_destroy_chain(cl->filter_list);
+ tcf_destroy_chain(&cl->filter_list);
qdisc_destroy(cl->qdisc);
gen_kill_estimator(&cl->bstats, &cl->rate_est);
if (cl != &q->root)
@@ -1145,7 +1136,7 @@ hfsc_delete_class(struct Qdisc *sch, unsigned long arg)
hfsc_adjust_levels(cl->cl_parent);
hfsc_purge_queue(sch, cl);
- list_del(&cl->hlist);
+ qdisc_class_hash_remove(&q->clhash, &cl->cl_common);
if (--cl->refcnt == 0)
hfsc_destroy_class(sch, cl);
@@ -1168,14 +1159,14 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
if (cl->level == 0)
return cl;
- *qerr = NET_XMIT_BYPASS;
+ *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
tcf = q->root.filter_list;
while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
case TC_ACT_QUEUED:
case TC_ACT_STOLEN:
- *qerr = NET_XMIT_SUCCESS;
+ *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
case TC_ACT_SHOT:
return NULL;
}
@@ -1211,8 +1202,9 @@ hfsc_graft_class(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
if (cl->level > 0)
return -EINVAL;
if (new == NULL) {
- new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
- cl->classid);
+ new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
+ &pfifo_qdisc_ops,
+ cl->cl_common.classid);
if (new == NULL)
new = &noop_qdisc;
}
@@ -1345,8 +1337,9 @@ hfsc_dump_class(struct Qdisc *sch, unsigned long arg, struct sk_buff *skb,
struct hfsc_class *cl = (struct hfsc_class *)arg;
struct nlattr *nest;
- tcm->tcm_parent = cl->cl_parent ? cl->cl_parent->classid : TC_H_ROOT;
- tcm->tcm_handle = cl->classid;
+ tcm->tcm_parent = cl->cl_parent ? cl->cl_parent->cl_common.classid :
+ TC_H_ROOT;
+ tcm->tcm_handle = cl->cl_common.classid;
if (cl->level == 0)
tcm->tcm_info = cl->qdisc->handle;
@@ -1390,14 +1383,16 @@ static void
hfsc_walk(struct Qdisc *sch, struct qdisc_walker *arg)
{
struct hfsc_sched *q = qdisc_priv(sch);
+ struct hlist_node *n;
struct hfsc_class *cl;
unsigned int i;
if (arg->stop)
return;
- for (i = 0; i < HFSC_HSIZE; i++) {
- list_for_each_entry(cl, &q->clhash[i], hlist) {
+ for (i = 0; i < q->clhash.hashsize; i++) {
+ hlist_for_each_entry(cl, n, &q->clhash.hash[i],
+ cl_common.hnode) {
if (arg->count < arg->skip) {
arg->count++;
continue;
@@ -1433,23 +1428,25 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
{
struct hfsc_sched *q = qdisc_priv(sch);
struct tc_hfsc_qopt *qopt;
- unsigned int i;
+ int err;
if (opt == NULL || nla_len(opt) < sizeof(*qopt))
return -EINVAL;
qopt = nla_data(opt);
q->defcls = qopt->defcls;
- for (i = 0; i < HFSC_HSIZE; i++)
- INIT_LIST_HEAD(&q->clhash[i]);
+ err = qdisc_class_hash_init(&q->clhash);
+ if (err < 0)
+ return err;
q->eligible = RB_ROOT;
INIT_LIST_HEAD(&q->droplist);
skb_queue_head_init(&q->requeue);
+ q->root.cl_common.classid = sch->handle;
q->root.refcnt = 1;
- q->root.classid = sch->handle;
q->root.sched = q;
- q->root.qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
+ q->root.qdisc = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
+ &pfifo_qdisc_ops,
sch->handle);
if (q->root.qdisc == NULL)
q->root.qdisc = &noop_qdisc;
@@ -1457,7 +1454,8 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
q->root.vt_tree = RB_ROOT;
q->root.cf_tree = RB_ROOT;
- list_add(&q->root.hlist, &q->clhash[hfsc_hash(q->root.classid)]);
+ qdisc_class_hash_insert(&q->clhash, &q->root.cl_common);
+ qdisc_class_hash_grow(sch, &q->clhash);
qdisc_watchdog_init(&q->watchdog, sch);
@@ -1520,10 +1518,11 @@ hfsc_reset_qdisc(struct Qdisc *sch)
{
struct hfsc_sched *q = qdisc_priv(sch);
struct hfsc_class *cl;
+ struct hlist_node *n;
unsigned int i;
- for (i = 0; i < HFSC_HSIZE; i++) {
- list_for_each_entry(cl, &q->clhash[i], hlist)
+ for (i = 0; i < q->clhash.hashsize; i++) {
+ hlist_for_each_entry(cl, n, &q->clhash.hash[i], cl_common.hnode)
hfsc_reset_class(cl);
}
__skb_queue_purge(&q->requeue);
@@ -1537,13 +1536,20 @@ static void
hfsc_destroy_qdisc(struct Qdisc *sch)
{
struct hfsc_sched *q = qdisc_priv(sch);
- struct hfsc_class *cl, *next;
+ struct hlist_node *n, *next;
+ struct hfsc_class *cl;
unsigned int i;
- for (i = 0; i < HFSC_HSIZE; i++) {
- list_for_each_entry_safe(cl, next, &q->clhash[i], hlist)
+ for (i = 0; i < q->clhash.hashsize; i++) {
+ hlist_for_each_entry(cl, n, &q->clhash.hash[i], cl_common.hnode)
+ tcf_destroy_chain(&cl->filter_list);
+ }
+ for (i = 0; i < q->clhash.hashsize; i++) {
+ hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[i],
+ cl_common.hnode)
hfsc_destroy_class(sch, cl);
}
+ qdisc_class_hash_destroy(&q->clhash);
__skb_queue_purge(&q->requeue);
qdisc_watchdog_cancel(&q->watchdog);
}
@@ -1568,32 +1574,32 @@ static int
hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
{
struct hfsc_class *cl;
- unsigned int len;
int err;
cl = hfsc_classify(skb, sch, &err);
if (cl == NULL) {
- if (err == NET_XMIT_BYPASS)
+ if (err & __NET_XMIT_BYPASS)
sch->qstats.drops++;
kfree_skb(skb);
return err;
}
- len = skb->len;
- err = cl->qdisc->enqueue(skb, cl->qdisc);
+ err = qdisc_enqueue(skb, cl->qdisc);
if (unlikely(err != NET_XMIT_SUCCESS)) {
- cl->qstats.drops++;
- sch->qstats.drops++;
+ if (net_xmit_drop_count(err)) {
+ cl->qstats.drops++;
+ sch->qstats.drops++;
+ }
return err;
}
if (cl->qdisc->q.qlen == 1)
- set_active(cl, len);
+ set_active(cl, qdisc_pkt_len(skb));
cl->bstats.packets++;
- cl->bstats.bytes += len;
+ cl->bstats.bytes += qdisc_pkt_len(skb);
sch->bstats.packets++;
- sch->bstats.bytes += len;
+ sch->bstats.bytes += qdisc_pkt_len(skb);
sch->q.qlen++;
return NET_XMIT_SUCCESS;
@@ -1643,9 +1649,9 @@ hfsc_dequeue(struct Qdisc *sch)
return NULL;
}
- update_vf(cl, skb->len, cur_time);
+ update_vf(cl, qdisc_pkt_len(skb), cur_time);
if (realtime)
- cl->cl_cumul += skb->len;
+ cl->cl_cumul += qdisc_pkt_len(skb);
if (cl->qdisc->q.qlen != 0) {
if (cl->cl_flags & HFSC_RSC) {
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 6807c97985a5..d14f02056ae6 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -24,8 +24,6 @@
* Jiri Fojtasek
* fixed requeue routine
* and many others. thanks.
- *
- * $Id: sch_htb.c,v 1.25 2003/12/07 11:08:25 devik Exp devik $
*/
#include <linux/module.h>
#include <linux/moduleparam.h>
@@ -53,7 +51,6 @@
one less than their parent.
*/
-#define HTB_HSIZE 16 /* classid hash size */
static int htb_hysteresis __read_mostly = 0; /* whether to use mode hysteresis for speedup */
#define HTB_VER 0x30011 /* major must be matched with number suplied by TC as version */
@@ -74,8 +71,8 @@ enum htb_cmode {
/* interior & leaf nodes; props specific to leaves are marked L: */
struct htb_class {
+ struct Qdisc_class_common common;
/* general class parameters */
- u32 classid;
struct gnet_stats_basic bstats;
struct gnet_stats_queue qstats;
struct gnet_stats_rate_est rate_est;
@@ -84,10 +81,8 @@ struct htb_class {
/* topology */
int level; /* our level (see above) */
+ unsigned int children;
struct htb_class *parent; /* parent class */
- struct hlist_node hlist; /* classid hash list item */
- struct list_head sibling; /* sibling list item */
- struct list_head children; /* children list */
union {
struct htb_class_leaf {
@@ -142,8 +137,7 @@ static inline long L2T(struct htb_class *cl, struct qdisc_rate_table *rate,
}
struct htb_sched {
- struct list_head root; /* root classes list */
- struct hlist_head hash[HTB_HSIZE]; /* hashed by classid */
+ struct Qdisc_class_hash clhash;
struct list_head drops[TC_HTB_NUMPRIO];/* active leaves (for drops) */
/* self list - roots of self generating tree */
@@ -165,7 +159,6 @@ struct htb_sched {
/* filters for qdisc itself */
struct tcf_proto *filter_list;
- int filter_cnt;
int rate2quantum; /* quant = rate / rate2quantum */
psched_time_t now; /* cached dequeue time */
@@ -178,32 +171,16 @@ struct htb_sched {
long direct_pkts;
};
-/* compute hash of size HTB_HSIZE for given handle */
-static inline int htb_hash(u32 h)
-{
-#if HTB_HSIZE != 16
-#error "Declare new hash for your HTB_HSIZE"
-#endif
- h ^= h >> 8; /* stolen from cbq_hash */
- h ^= h >> 4;
- return h & 0xf;
-}
-
/* find class in global hash table using given handle */
static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
{
struct htb_sched *q = qdisc_priv(sch);
- struct hlist_node *p;
- struct htb_class *cl;
+ struct Qdisc_class_common *clc;
- if (TC_H_MAJ(handle) != sch->handle)
+ clc = qdisc_class_find(&q->clhash, handle);
+ if (clc == NULL)
return NULL;
-
- hlist_for_each_entry(cl, p, q->hash + htb_hash(handle), hlist) {
- if (cl->classid == handle)
- return cl;
- }
- return NULL;
+ return container_of(clc, struct htb_class, common);
}
/**
@@ -237,14 +214,14 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
if ((cl = htb_find(skb->priority, sch)) != NULL && cl->level == 0)
return cl;
- *qerr = NET_XMIT_BYPASS;
+ *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
tcf = q->filter_list;
while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
case TC_ACT_QUEUED:
case TC_ACT_STOLEN:
- *qerr = NET_XMIT_SUCCESS;
+ *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
case TC_ACT_SHOT:
return NULL;
}
@@ -284,7 +261,7 @@ static void htb_add_to_id_tree(struct rb_root *root,
parent = *p;
c = rb_entry(parent, struct htb_class, node[prio]);
- if (cl->classid > c->classid)
+ if (cl->common.classid > c->common.classid)
p = &parent->rb_right;
else
p = &parent->rb_left;
@@ -448,7 +425,7 @@ static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
/* we are removing child which is pointed to from
parent feed - forget the pointer but remember
classid */
- p->un.inner.last_ptr_id[prio] = cl->classid;
+ p->un.inner.last_ptr_id[prio] = cl->common.classid;
p->un.inner.ptr[prio] = NULL;
}
@@ -547,7 +524,7 @@ htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, long *diff)
*/
static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
{
- BUG_TRAP(!cl->level && cl->un.leaf.q && cl->un.leaf.q->q.qlen);
+ WARN_ON(cl->level || !cl->un.leaf.q || !cl->un.leaf.q->q.qlen);
if (!cl->prio_activity) {
cl->prio_activity = 1 << (cl->un.leaf.aprio = cl->un.leaf.prio);
@@ -565,7 +542,7 @@ static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
*/
static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
{
- BUG_TRAP(cl->prio_activity);
+ WARN_ON(!cl->prio_activity);
htb_deactivate_prios(q, cl);
cl->prio_activity = 0;
@@ -590,26 +567,27 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
}
#ifdef CONFIG_NET_CLS_ACT
} else if (!cl) {
- if (ret == NET_XMIT_BYPASS)
+ if (ret & __NET_XMIT_BYPASS)
sch->qstats.drops++;
kfree_skb(skb);
return ret;
#endif
- } else if (cl->un.leaf.q->enqueue(skb, cl->un.leaf.q) !=
- NET_XMIT_SUCCESS) {
- sch->qstats.drops++;
- cl->qstats.drops++;
- return NET_XMIT_DROP;
+ } else if ((ret = qdisc_enqueue(skb, cl->un.leaf.q)) != NET_XMIT_SUCCESS) {
+ if (net_xmit_drop_count(ret)) {
+ sch->qstats.drops++;
+ cl->qstats.drops++;
+ }
+ return ret;
} else {
cl->bstats.packets +=
skb_is_gso(skb)?skb_shinfo(skb)->gso_segs:1;
- cl->bstats.bytes += skb->len;
+ cl->bstats.bytes += qdisc_pkt_len(skb);
htb_activate(q, cl);
}
sch->q.qlen++;
sch->bstats.packets += skb_is_gso(skb)?skb_shinfo(skb)->gso_segs:1;
- sch->bstats.bytes += skb->len;
+ sch->bstats.bytes += qdisc_pkt_len(skb);
return NET_XMIT_SUCCESS;
}
@@ -634,16 +612,18 @@ static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch)
}
#ifdef CONFIG_NET_CLS_ACT
} else if (!cl) {
- if (ret == NET_XMIT_BYPASS)
+ if (ret & __NET_XMIT_BYPASS)
sch->qstats.drops++;
kfree_skb(skb);
return ret;
#endif
- } else if (cl->un.leaf.q->ops->requeue(skb, cl->un.leaf.q) !=
+ } else if ((ret = cl->un.leaf.q->ops->requeue(skb, cl->un.leaf.q)) !=
NET_XMIT_SUCCESS) {
- sch->qstats.drops++;
- cl->qstats.drops++;
- return NET_XMIT_DROP;
+ if (net_xmit_drop_count(ret)) {
+ sch->qstats.drops++;
+ cl->qstats.drops++;
+ }
+ return ret;
} else
htb_activate(q, cl);
@@ -666,7 +646,7 @@ static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch)
static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
int level, struct sk_buff *skb)
{
- int bytes = skb->len;
+ int bytes = qdisc_pkt_len(skb);
long toks, diff;
enum htb_cmode old_mode;
@@ -753,10 +733,10 @@ static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
while (n) {
struct htb_class *cl =
rb_entry(n, struct htb_class, node[prio]);
- if (id == cl->classid)
+ if (id == cl->common.classid)
return n;
- if (id > cl->classid) {
+ if (id > cl->common.classid) {
n = n->rb_right;
} else {
r = n;
@@ -781,7 +761,7 @@ static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio,
u32 *pid;
} stk[TC_HTB_MAXDEPTH], *sp = stk;
- BUG_TRAP(tree->rb_node);
+ WARN_ON(!tree->rb_node);
sp->root = tree->rb_node;
sp->pptr = pptr;
sp->pid = pid;
@@ -801,7 +781,7 @@ static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio,
*sp->pptr = (*sp->pptr)->rb_left;
if (sp > stk) {
sp--;
- BUG_TRAP(*sp->pptr);
+ WARN_ON(!*sp->pptr);
if (!*sp->pptr)
return NULL;
htb_next_rb_node(sp->pptr);
@@ -816,7 +796,7 @@ static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio,
sp->pid = cl->un.inner.last_ptr_id + prio;
}
}
- BUG_TRAP(0);
+ WARN_ON(1);
return NULL;
}
@@ -834,7 +814,7 @@ static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, int prio,
do {
next:
- BUG_TRAP(cl);
+ WARN_ON(!cl);
if (!cl)
return NULL;
@@ -866,7 +846,7 @@ next:
if (!cl->warned) {
printk(KERN_WARNING
"htb: class %X isn't work conserving ?!\n",
- cl->classid);
+ cl->common.classid);
cl->warned = 1;
}
q->nwc_hit++;
@@ -879,7 +859,8 @@ next:
} while (cl != start);
if (likely(skb != NULL)) {
- if ((cl->un.leaf.deficit[level] -= skb->len) < 0) {
+ cl->un.leaf.deficit[level] -= qdisc_pkt_len(skb);
+ if (cl->un.leaf.deficit[level] < 0) {
cl->un.leaf.deficit[level] += cl->un.leaf.quantum;
htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
ptr[0]) + prio);
@@ -977,13 +958,12 @@ static unsigned int htb_drop(struct Qdisc *sch)
static void htb_reset(struct Qdisc *sch)
{
struct htb_sched *q = qdisc_priv(sch);
- int i;
-
- for (i = 0; i < HTB_HSIZE; i++) {
- struct hlist_node *p;
- struct htb_class *cl;
+ struct htb_class *cl;
+ struct hlist_node *n;
+ unsigned int i;
- hlist_for_each_entry(cl, p, q->hash + i, hlist) {
+ for (i = 0; i < q->clhash.hashsize; i++) {
+ hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) {
if (cl->level)
memset(&cl->un.inner, 0, sizeof(cl->un.inner));
else {
@@ -1041,16 +1021,16 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt)
return -EINVAL;
}
- INIT_LIST_HEAD(&q->root);
- for (i = 0; i < HTB_HSIZE; i++)
- INIT_HLIST_HEAD(q->hash + i);
+ err = qdisc_class_hash_init(&q->clhash);
+ if (err < 0)
+ return err;
for (i = 0; i < TC_HTB_NUMPRIO; i++)
INIT_LIST_HEAD(q->drops + i);
qdisc_watchdog_init(&q->watchdog, sch);
skb_queue_head_init(&q->direct_queue);
- q->direct_qlen = sch->dev->tx_queue_len;
+ q->direct_qlen = qdisc_dev(sch)->tx_queue_len;
if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */
q->direct_qlen = 2;
@@ -1063,11 +1043,12 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt)
static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
{
+ spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
struct htb_sched *q = qdisc_priv(sch);
struct nlattr *nest;
struct tc_htb_glob gopt;
- spin_lock_bh(&sch->dev->queue_lock);
+ spin_lock_bh(root_lock);
gopt.direct_pkts = q->direct_pkts;
gopt.version = HTB_VER;
@@ -1081,11 +1062,11 @@ static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
NLA_PUT(skb, TCA_HTB_INIT, sizeof(gopt), &gopt);
nla_nest_end(skb, nest);
- spin_unlock_bh(&sch->dev->queue_lock);
+ spin_unlock_bh(root_lock);
return skb->len;
nla_put_failure:
- spin_unlock_bh(&sch->dev->queue_lock);
+ spin_unlock_bh(root_lock);
nla_nest_cancel(skb, nest);
return -1;
}
@@ -1094,12 +1075,13 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
struct sk_buff *skb, struct tcmsg *tcm)
{
struct htb_class *cl = (struct htb_class *)arg;
+ spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
struct nlattr *nest;
struct tc_htb_opt opt;
- spin_lock_bh(&sch->dev->queue_lock);
- tcm->tcm_parent = cl->parent ? cl->parent->classid : TC_H_ROOT;
- tcm->tcm_handle = cl->classid;
+ spin_lock_bh(root_lock);
+ tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT;
+ tcm->tcm_handle = cl->common.classid;
if (!cl->level && cl->un.leaf.q)
tcm->tcm_info = cl->un.leaf.q->handle;
@@ -1119,11 +1101,11 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
NLA_PUT(skb, TCA_HTB_PARMS, sizeof(opt), &opt);
nla_nest_end(skb, nest);
- spin_unlock_bh(&sch->dev->queue_lock);
+ spin_unlock_bh(root_lock);
return skb->len;
nla_put_failure:
- spin_unlock_bh(&sch->dev->queue_lock);
+ spin_unlock_bh(root_lock);
nla_nest_cancel(skb, nest);
return -1;
}
@@ -1153,8 +1135,9 @@ static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
if (cl && !cl->level) {
if (new == NULL &&
- (new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
- cl->classid))
+ (new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
+ &pfifo_qdisc_ops,
+ cl->common.classid))
== NULL)
return -ENOBUFS;
sch_tree_lock(sch);
@@ -1195,12 +1178,9 @@ static inline int htb_parent_last_child(struct htb_class *cl)
if (!cl->parent)
/* the root class */
return 0;
-
- if (!(cl->parent->children.next == &cl->sibling &&
- cl->parent->children.prev == &cl->sibling))
+ if (cl->parent->children > 1)
/* not the last child */
return 0;
-
return 1;
}
@@ -1209,7 +1189,7 @@ static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl,
{
struct htb_class *parent = cl->parent;
- BUG_TRAP(!cl->level && cl->un.leaf.q && !cl->prio_activity);
+ WARN_ON(cl->level || !cl->un.leaf.q || cl->prio_activity);
if (parent->cmode != HTB_CAN_SEND)
htb_safe_rb_erase(&parent->pq_node, q->wait_pq + parent->level);
@@ -1228,32 +1208,15 @@ static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl,
static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
{
- struct htb_sched *q = qdisc_priv(sch);
-
if (!cl->level) {
- BUG_TRAP(cl->un.leaf.q);
+ WARN_ON(!cl->un.leaf.q);
qdisc_destroy(cl->un.leaf.q);
}
gen_kill_estimator(&cl->bstats, &cl->rate_est);
qdisc_put_rtab(cl->rate);
qdisc_put_rtab(cl->ceil);
- tcf_destroy_chain(cl->filter_list);
-
- while (!list_empty(&cl->children))
- htb_destroy_class(sch, list_entry(cl->children.next,
- struct htb_class, sibling));
-
- /* note: this delete may happen twice (see htb_delete) */
- hlist_del_init(&cl->hlist);
- list_del(&cl->sibling);
-
- if (cl->prio_activity)
- htb_deactivate(q, cl);
-
- if (cl->cmode != HTB_CAN_SEND)
- htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level);
-
+ tcf_destroy_chain(&cl->filter_list);
kfree(cl);
}
@@ -1261,18 +1224,27 @@ static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
static void htb_destroy(struct Qdisc *sch)
{
struct htb_sched *q = qdisc_priv(sch);
+ struct hlist_node *n, *next;
+ struct htb_class *cl;
+ unsigned int i;
qdisc_watchdog_cancel(&q->watchdog);
/* This line used to be after htb_destroy_class call below
and surprisingly it worked in 2.4. But it must precede it
because filter need its target class alive to be able to call
unbind_filter on it (without Oops). */
- tcf_destroy_chain(q->filter_list);
-
- while (!list_empty(&q->root))
- htb_destroy_class(sch, list_entry(q->root.next,
- struct htb_class, sibling));
+ tcf_destroy_chain(&q->filter_list);
+ for (i = 0; i < q->clhash.hashsize; i++) {
+ hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode)
+ tcf_destroy_chain(&cl->filter_list);
+ }
+ for (i = 0; i < q->clhash.hashsize; i++) {
+ hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[i],
+ common.hnode)
+ htb_destroy_class(sch, cl);
+ }
+ qdisc_class_hash_destroy(&q->clhash);
__skb_queue_purge(&q->direct_queue);
}
@@ -1287,12 +1259,13 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg)
// TODO: why don't allow to delete subtree ? references ? does
// tc subsys quarantee us that in htb_destroy it holds no class
// refs so that we can remove children safely there ?
- if (!list_empty(&cl->children) || cl->filter_cnt)
+ if (cl->children || cl->filter_cnt)
return -EBUSY;
if (!cl->level && htb_parent_last_child(cl)) {
- new_q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
- cl->parent->classid);
+ new_q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
+ &pfifo_qdisc_ops,
+ cl->parent->common.classid);
last_child = 1;
}
@@ -1305,11 +1278,16 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg)
}
/* delete from hash and active; remainder in destroy_class */
- hlist_del_init(&cl->hlist);
+ qdisc_class_hash_remove(&q->clhash, &cl->common);
+ if (cl->parent)
+ cl->parent->children--;
if (cl->prio_activity)
htb_deactivate(q, cl);
+ if (cl->cmode != HTB_CAN_SEND)
+ htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level);
+
if (last_child)
htb_parent_to_leaf(q, cl, new_q);
@@ -1394,12 +1372,10 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
goto failure;
gen_new_estimator(&cl->bstats, &cl->rate_est,
- &sch->dev->queue_lock,
+ qdisc_root_sleeping_lock(sch),
tca[TCA_RATE] ? : &est.nla);
cl->refcnt = 1;
- INIT_LIST_HEAD(&cl->sibling);
- INIT_HLIST_NODE(&cl->hlist);
- INIT_LIST_HEAD(&cl->children);
+ cl->children = 0;
INIT_LIST_HEAD(&cl->un.leaf.drop_list);
RB_CLEAR_NODE(&cl->pq_node);
@@ -1409,7 +1385,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
/* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
so that can't be used inside of sch_tree_lock
-- thanks to Karlis Peisenieks */
- new_q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid);
+ new_q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
+ &pfifo_qdisc_ops, classid);
sch_tree_lock(sch);
if (parent && !parent->level) {
unsigned int qlen = parent->un.leaf.q->q.qlen;
@@ -1433,7 +1410,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
/* leaf (we) needs elementary qdisc */
cl->un.leaf.q = new_q ? new_q : &noop_qdisc;
- cl->classid = classid;
+ cl->common.classid = classid;
cl->parent = parent;
/* set class to be in HTB_CAN_SEND state */
@@ -1444,13 +1421,13 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
cl->cmode = HTB_CAN_SEND;
/* attach to the hash list and parent's family */
- hlist_add_head(&cl->hlist, q->hash + htb_hash(classid));
- list_add_tail(&cl->sibling,
- parent ? &parent->children : &q->root);
+ qdisc_class_hash_insert(&q->clhash, &cl->common);
+ if (parent)
+ parent->children++;
} else {
if (tca[TCA_RATE])
gen_replace_estimator(&cl->bstats, &cl->rate_est,
- &sch->dev->queue_lock,
+ qdisc_root_sleeping_lock(sch),
tca[TCA_RATE]);
sch_tree_lock(sch);
}
@@ -1462,13 +1439,13 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
if (!hopt->quantum && cl->un.leaf.quantum < 1000) {
printk(KERN_WARNING
"HTB: quantum of class %X is small. Consider r2q change.\n",
- cl->classid);
+ cl->common.classid);
cl->un.leaf.quantum = 1000;
}
if (!hopt->quantum && cl->un.leaf.quantum > 200000) {
printk(KERN_WARNING
"HTB: quantum of class %X is big. Consider r2q change.\n",
- cl->classid);
+ cl->common.classid);
cl->un.leaf.quantum = 200000;
}
if (hopt->quantum)
@@ -1491,6 +1468,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
cl->ceil = ctab;
sch_tree_unlock(sch);
+ qdisc_class_hash_grow(sch, &q->clhash);
+
*arg = (unsigned long)cl;
return 0;
@@ -1514,7 +1493,6 @@ static struct tcf_proto **htb_find_tcf(struct Qdisc *sch, unsigned long arg)
static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
u32 classid)
{
- struct htb_sched *q = qdisc_priv(sch);
struct htb_class *cl = htb_find(classid, sch);
/*if (cl && !cl->level) return 0;
@@ -1528,35 +1506,29 @@ static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
*/
if (cl)
cl->filter_cnt++;
- else
- q->filter_cnt++;
return (unsigned long)cl;
}
static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg)
{
- struct htb_sched *q = qdisc_priv(sch);
struct htb_class *cl = (struct htb_class *)arg;
if (cl)
cl->filter_cnt--;
- else
- q->filter_cnt--;
}
static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
{
struct htb_sched *q = qdisc_priv(sch);
- int i;
+ struct htb_class *cl;
+ struct hlist_node *n;
+ unsigned int i;
if (arg->stop)
return;
- for (i = 0; i < HTB_HSIZE; i++) {
- struct hlist_node *p;
- struct htb_class *cl;
-
- hlist_for_each_entry(cl, p, q->hash + i, hlist) {
+ for (i = 0; i < q->clhash.hashsize; i++) {
+ hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) {
if (arg->count < arg->skip) {
arg->count++;
continue;
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index 274b1ddb160c..4a2b77374358 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -77,7 +77,7 @@ static int ingress_enqueue(struct sk_buff *skb, struct Qdisc *sch)
result = tc_classify(skb, p->filter_list, &res);
sch->bstats.packets++;
- sch->bstats.bytes += skb->len;
+ sch->bstats.bytes += qdisc_pkt_len(skb);
switch (result) {
case TC_ACT_SHOT:
result = TC_ACT_SHOT;
@@ -104,7 +104,7 @@ static void ingress_destroy(struct Qdisc *sch)
{
struct ingress_qdisc_data *p = qdisc_priv(sch);
- tcf_destroy_chain(p->filter_list);
+ tcf_destroy_chain(&p->filter_list);
}
static int ingress_dump(struct Qdisc *sch, struct sk_buff *skb)
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
new file mode 100644
index 000000000000..915f3149dde2
--- /dev/null
+++ b/net/sched/sch_multiq.c
@@ -0,0 +1,477 @@
+/*
+ * Copyright (c) 2008, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Author: Alexander Duyck <alexander.h.duyck@intel.com>
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+
+
+struct multiq_sched_data {
+ u16 bands;
+ u16 max_bands;
+ u16 curband;
+ struct tcf_proto *filter_list;
+ struct Qdisc **queues;
+};
+
+
+static struct Qdisc *
+multiq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
+{
+ struct multiq_sched_data *q = qdisc_priv(sch);
+ u32 band;
+ struct tcf_result res;
+ int err;
+
+ *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
+ err = tc_classify(skb, q->filter_list, &res);
+#ifdef CONFIG_NET_CLS_ACT
+ switch (err) {
+ case TC_ACT_STOLEN:
+ case TC_ACT_QUEUED:
+ *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+ case TC_ACT_SHOT:
+ return NULL;
+ }
+#endif
+ band = skb_get_queue_mapping(skb);
+
+ if (band >= q->bands)
+ return q->queues[0];
+
+ return q->queues[band];
+}
+
+static int
+multiq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+{
+ struct Qdisc *qdisc;
+ int ret;
+
+ qdisc = multiq_classify(skb, sch, &ret);
+#ifdef CONFIG_NET_CLS_ACT
+ if (qdisc == NULL) {
+
+ if (ret & __NET_XMIT_BYPASS)
+ sch->qstats.drops++;
+ kfree_skb(skb);
+ return ret;
+ }
+#endif
+
+ ret = qdisc_enqueue(skb, qdisc);
+ if (ret == NET_XMIT_SUCCESS) {
+ sch->bstats.bytes += qdisc_pkt_len(skb);
+ sch->bstats.packets++;
+ sch->q.qlen++;
+ return NET_XMIT_SUCCESS;
+ }
+ if (net_xmit_drop_count(ret))
+ sch->qstats.drops++;
+ return ret;
+}
+
+
+static int
+multiq_requeue(struct sk_buff *skb, struct Qdisc *sch)
+{
+ struct Qdisc *qdisc;
+ struct multiq_sched_data *q = qdisc_priv(sch);
+ int ret;
+
+ qdisc = multiq_classify(skb, sch, &ret);
+#ifdef CONFIG_NET_CLS_ACT
+ if (qdisc == NULL) {
+ if (ret & __NET_XMIT_BYPASS)
+ sch->qstats.drops++;
+ kfree_skb(skb);
+ return ret;
+ }
+#endif
+
+ ret = qdisc->ops->requeue(skb, qdisc);
+ if (ret == NET_XMIT_SUCCESS) {
+ sch->q.qlen++;
+ sch->qstats.requeues++;
+ if (q->curband)
+ q->curband--;
+ else
+ q->curband = q->bands - 1;
+ return NET_XMIT_SUCCESS;
+ }
+ if (net_xmit_drop_count(ret))
+ sch->qstats.drops++;
+ return ret;
+}
+
+
+static struct sk_buff *multiq_dequeue(struct Qdisc *sch)
+{
+ struct multiq_sched_data *q = qdisc_priv(sch);
+ struct Qdisc *qdisc;
+ struct sk_buff *skb;
+ int band;
+
+ for (band = 0; band < q->bands; band++) {
+ /* cycle through bands to ensure fairness */
+ q->curband++;
+ if (q->curband >= q->bands)
+ q->curband = 0;
+
+ /* Check that target subqueue is available before
+ * pulling an skb to avoid excessive requeues
+ */
+ if (!__netif_subqueue_stopped(qdisc_dev(sch), q->curband)) {
+ qdisc = q->queues[q->curband];
+ skb = qdisc->dequeue(qdisc);
+ if (skb) {
+ sch->q.qlen--;
+ return skb;
+ }
+ }
+ }
+ return NULL;
+
+}
+
+static unsigned int multiq_drop(struct Qdisc *sch)
+{
+ struct multiq_sched_data *q = qdisc_priv(sch);
+ int band;
+ unsigned int len;
+ struct Qdisc *qdisc;
+
+ for (band = q->bands-1; band >= 0; band--) {
+ qdisc = q->queues[band];
+ if (qdisc->ops->drop) {
+ len = qdisc->ops->drop(qdisc);
+ if (len != 0) {
+ sch->q.qlen--;
+ return len;
+ }
+ }
+ }
+ return 0;
+}
+
+
+static void
+multiq_reset(struct Qdisc *sch)
+{
+ u16 band;
+ struct multiq_sched_data *q = qdisc_priv(sch);
+
+ for (band = 0; band < q->bands; band++)
+ qdisc_reset(q->queues[band]);
+ sch->q.qlen = 0;
+ q->curband = 0;
+}
+
+static void
+multiq_destroy(struct Qdisc *sch)
+{
+ int band;
+ struct multiq_sched_data *q = qdisc_priv(sch);
+
+ tcf_destroy_chain(&q->filter_list);
+ for (band = 0; band < q->bands; band++)
+ qdisc_destroy(q->queues[band]);
+
+ kfree(q->queues);
+}
+
+static int multiq_tune(struct Qdisc *sch, struct nlattr *opt)
+{
+ struct multiq_sched_data *q = qdisc_priv(sch);
+ struct tc_multiq_qopt *qopt;
+ int i;
+
+ if (!netif_is_multiqueue(qdisc_dev(sch)))
+ return -EINVAL;
+ if (nla_len(opt) < sizeof(*qopt))
+ return -EINVAL;
+
+ qopt = nla_data(opt);
+
+ qopt->bands = qdisc_dev(sch)->real_num_tx_queues;
+
+ sch_tree_lock(sch);
+ q->bands = qopt->bands;
+ for (i = q->bands; i < q->max_bands; i++) {
+ if (q->queues[i] != &noop_qdisc) {
+ struct Qdisc *child = xchg(&q->queues[i], &noop_qdisc);
+ qdisc_tree_decrease_qlen(child, child->q.qlen);
+ qdisc_destroy(child);
+ }
+ }
+
+ sch_tree_unlock(sch);
+
+ for (i = 0; i < q->bands; i++) {
+ if (q->queues[i] == &noop_qdisc) {
+ struct Qdisc *child;
+ child = qdisc_create_dflt(qdisc_dev(sch),
+ sch->dev_queue,
+ &pfifo_qdisc_ops,
+ TC_H_MAKE(sch->handle,
+ i + 1));
+ if (child) {
+ sch_tree_lock(sch);
+ child = xchg(&q->queues[i], child);
+
+ if (child != &noop_qdisc) {
+ qdisc_tree_decrease_qlen(child,
+ child->q.qlen);
+ qdisc_destroy(child);
+ }
+ sch_tree_unlock(sch);
+ }
+ }
+ }
+ return 0;
+}
+
+static int multiq_init(struct Qdisc *sch, struct nlattr *opt)
+{
+ struct multiq_sched_data *q = qdisc_priv(sch);
+ int i, err;
+
+ q->queues = NULL;
+
+ if (opt == NULL)
+ return -EINVAL;
+
+ q->max_bands = qdisc_dev(sch)->num_tx_queues;
+
+ q->queues = kcalloc(q->max_bands, sizeof(struct Qdisc *), GFP_KERNEL);
+ if (!q->queues)
+ return -ENOBUFS;
+ for (i = 0; i < q->max_bands; i++)
+ q->queues[i] = &noop_qdisc;
+
+ err = multiq_tune(sch,opt);
+
+ if (err)
+ kfree(q->queues);
+
+ return err;
+}
+
+static int multiq_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+ struct multiq_sched_data *q = qdisc_priv(sch);
+ unsigned char *b = skb_tail_pointer(skb);
+ struct tc_multiq_qopt opt;
+
+ opt.bands = q->bands;
+ opt.max_bands = q->max_bands;
+
+ NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
+
+ return skb->len;
+
+nla_put_failure:
+ nlmsg_trim(skb, b);
+ return -1;
+}
+
+static int multiq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
+ struct Qdisc **old)
+{
+ struct multiq_sched_data *q = qdisc_priv(sch);
+ unsigned long band = arg - 1;
+
+ if (band >= q->bands)
+ return -EINVAL;
+
+ if (new == NULL)
+ new = &noop_qdisc;
+
+ sch_tree_lock(sch);
+ *old = q->queues[band];
+ q->queues[band] = new;
+ qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
+ qdisc_reset(*old);
+ sch_tree_unlock(sch);
+
+ return 0;
+}
+
+static struct Qdisc *
+multiq_leaf(struct Qdisc *sch, unsigned long arg)
+{
+ struct multiq_sched_data *q = qdisc_priv(sch);
+ unsigned long band = arg - 1;
+
+ if (band >= q->bands)
+ return NULL;
+
+ return q->queues[band];
+}
+
+static unsigned long multiq_get(struct Qdisc *sch, u32 classid)
+{
+ struct multiq_sched_data *q = qdisc_priv(sch);
+ unsigned long band = TC_H_MIN(classid);
+
+ if (band - 1 >= q->bands)
+ return 0;
+ return band;
+}
+
+static unsigned long multiq_bind(struct Qdisc *sch, unsigned long parent,
+ u32 classid)
+{
+ return multiq_get(sch, classid);
+}
+
+
+static void multiq_put(struct Qdisc *q, unsigned long cl)
+{
+ return;
+}
+
+static int multiq_change(struct Qdisc *sch, u32 handle, u32 parent,
+ struct nlattr **tca, unsigned long *arg)
+{
+ unsigned long cl = *arg;
+ struct multiq_sched_data *q = qdisc_priv(sch);
+
+ if (cl - 1 > q->bands)
+ return -ENOENT;
+ return 0;
+}
+
+static int multiq_delete(struct Qdisc *sch, unsigned long cl)
+{
+ struct multiq_sched_data *q = qdisc_priv(sch);
+ if (cl - 1 > q->bands)
+ return -ENOENT;
+ return 0;
+}
+
+
+static int multiq_dump_class(struct Qdisc *sch, unsigned long cl,
+ struct sk_buff *skb, struct tcmsg *tcm)
+{
+ struct multiq_sched_data *q = qdisc_priv(sch);
+
+ if (cl - 1 > q->bands)
+ return -ENOENT;
+ tcm->tcm_handle |= TC_H_MIN(cl);
+ if (q->queues[cl-1])
+ tcm->tcm_info = q->queues[cl-1]->handle;
+ return 0;
+}
+
+static int multiq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
+ struct gnet_dump *d)
+{
+ struct multiq_sched_data *q = qdisc_priv(sch);
+ struct Qdisc *cl_q;
+
+ cl_q = q->queues[cl - 1];
+ if (gnet_stats_copy_basic(d, &cl_q->bstats) < 0 ||
+ gnet_stats_copy_queue(d, &cl_q->qstats) < 0)
+ return -1;
+
+ return 0;
+}
+
+static void multiq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
+{
+ struct multiq_sched_data *q = qdisc_priv(sch);
+ int band;
+
+ if (arg->stop)
+ return;
+
+ for (band = 0; band < q->bands; band++) {
+ if (arg->count < arg->skip) {
+ arg->count++;
+ continue;
+ }
+ if (arg->fn(sch, band+1, arg) < 0) {
+ arg->stop = 1;
+ break;
+ }
+ arg->count++;
+ }
+}
+
+static struct tcf_proto **multiq_find_tcf(struct Qdisc *sch, unsigned long cl)
+{
+ struct multiq_sched_data *q = qdisc_priv(sch);
+
+ if (cl)
+ return NULL;
+ return &q->filter_list;
+}
+
+static const struct Qdisc_class_ops multiq_class_ops = {
+ .graft = multiq_graft,
+ .leaf = multiq_leaf,
+ .get = multiq_get,
+ .put = multiq_put,
+ .change = multiq_change,
+ .delete = multiq_delete,
+ .walk = multiq_walk,
+ .tcf_chain = multiq_find_tcf,
+ .bind_tcf = multiq_bind,
+ .unbind_tcf = multiq_put,
+ .dump = multiq_dump_class,
+ .dump_stats = multiq_dump_class_stats,
+};
+
+static struct Qdisc_ops multiq_qdisc_ops __read_mostly = {
+ .next = NULL,
+ .cl_ops = &multiq_class_ops,
+ .id = "multiq",
+ .priv_size = sizeof(struct multiq_sched_data),
+ .enqueue = multiq_enqueue,
+ .dequeue = multiq_dequeue,
+ .requeue = multiq_requeue,
+ .drop = multiq_drop,
+ .init = multiq_init,
+ .reset = multiq_reset,
+ .destroy = multiq_destroy,
+ .change = multiq_tune,
+ .dump = multiq_dump,
+ .owner = THIS_MODULE,
+};
+
+static int __init multiq_module_init(void)
+{
+ return register_qdisc(&multiq_qdisc_ops);
+}
+
+static void __exit multiq_module_exit(void)
+{
+ unregister_qdisc(&multiq_qdisc_ops);
+}
+
+module_init(multiq_module_init)
+module_exit(multiq_module_exit)
+
+MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index c9c649b26eaa..a11959908d9a 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -82,6 +82,13 @@ struct netem_skb_cb {
psched_time_t time_to_send;
};
+static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb)
+{
+ BUILD_BUG_ON(sizeof(skb->cb) <
+ sizeof(struct qdisc_skb_cb) + sizeof(struct netem_skb_cb));
+ return (struct netem_skb_cb *)qdisc_skb_cb(skb)->data;
+}
+
/* init_crandom - initialize correlated random number generator
* Use entropy source for initial seed.
*/
@@ -169,7 +176,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
if (count == 0) {
sch->qstats.drops++;
kfree_skb(skb);
- return NET_XMIT_BYPASS;
+ return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
}
skb_orphan(skb);
@@ -180,11 +187,11 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
* skb will be queued.
*/
if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) {
- struct Qdisc *rootq = sch->dev->qdisc;
+ struct Qdisc *rootq = qdisc_root(sch);
u32 dupsave = q->duplicate; /* prevent duplicating a dup... */
q->duplicate = 0;
- rootq->enqueue(skb2, rootq);
+ qdisc_enqueue_root(skb2, rootq);
q->duplicate = dupsave;
}
@@ -205,7 +212,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
skb->data[net_random() % skb_headlen(skb)] ^= 1<<(net_random() % 8);
}
- cb = (struct netem_skb_cb *)skb->cb;
+ cb = netem_skb_cb(skb);
if (q->gap == 0 /* not doing reordering */
|| q->counter < q->gap /* inside last reordering gap */
|| q->reorder < get_crandom(&q->reorder_cor)) {
@@ -218,7 +225,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
now = psched_get_time();
cb->time_to_send = now + delay;
++q->counter;
- ret = q->qdisc->enqueue(skb, q->qdisc);
+ ret = qdisc_enqueue(skb, q->qdisc);
} else {
/*
* Do re-ordering by putting one out of N packets at the front
@@ -231,10 +238,11 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
if (likely(ret == NET_XMIT_SUCCESS)) {
sch->q.qlen++;
- sch->bstats.bytes += skb->len;
+ sch->bstats.bytes += qdisc_pkt_len(skb);
sch->bstats.packets++;
- } else
+ } else if (net_xmit_drop_count(ret)) {
sch->qstats.drops++;
+ }
pr_debug("netem: enqueue ret %d\n", ret);
return ret;
@@ -277,8 +285,7 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
skb = q->qdisc->dequeue(q->qdisc);
if (skb) {
- const struct netem_skb_cb *cb
- = (const struct netem_skb_cb *)skb->cb;
+ const struct netem_skb_cb *cb = netem_skb_cb(skb);
psched_time_t now = psched_get_time();
/* if more time remaining? */
@@ -310,28 +317,6 @@ static void netem_reset(struct Qdisc *sch)
qdisc_watchdog_cancel(&q->watchdog);
}
-/* Pass size change message down to embedded FIFO */
-static int set_fifo_limit(struct Qdisc *q, int limit)
-{
- struct nlattr *nla;
- int ret = -ENOMEM;
-
- /* Hack to avoid sending change message to non-FIFO */
- if (strncmp(q->ops->id + 1, "fifo", 4) != 0)
- return 0;
-
- nla = kmalloc(nla_attr_size(sizeof(struct tc_fifo_qopt)), GFP_KERNEL);
- if (nla) {
- nla->nla_type = RTM_NEWQDISC;
- nla->nla_len = nla_attr_size(sizeof(struct tc_fifo_qopt));
- ((struct tc_fifo_qopt *)nla_data(nla))->limit = limit;
-
- ret = q->ops->change(q, nla);
- kfree(nla);
- }
- return ret;
-}
-
/*
* Distribution data is a variable size payload containing
* signed 16 bit values.
@@ -341,6 +326,7 @@ static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
struct netem_sched_data *q = qdisc_priv(sch);
unsigned long n = nla_len(attr)/sizeof(__s16);
const __s16 *data = nla_data(attr);
+ spinlock_t *root_lock;
struct disttable *d;
int i;
@@ -355,9 +341,11 @@ static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
for (i = 0; i < n; i++)
d->table[i] = data[i];
- spin_lock_bh(&sch->dev->queue_lock);
+ root_lock = qdisc_root_sleeping_lock(sch);
+
+ spin_lock_bh(root_lock);
d = xchg(&q->delay_dist, d);
- spin_unlock_bh(&sch->dev->queue_lock);
+ spin_unlock_bh(root_lock);
kfree(d);
return 0;
@@ -400,6 +388,20 @@ static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = {
[TCA_NETEM_CORRUPT] = { .len = sizeof(struct tc_netem_corrupt) },
};
+static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
+ const struct nla_policy *policy, int len)
+{
+ int nested_len = nla_len(nla) - NLA_ALIGN(len);
+
+ if (nested_len < 0)
+ return -EINVAL;
+ if (nested_len >= nla_attr_size(0))
+ return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len),
+ nested_len, policy);
+ memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
+ return 0;
+}
+
/* Parse netlink message to set options */
static int netem_change(struct Qdisc *sch, struct nlattr *opt)
{
@@ -411,12 +413,12 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt)
if (opt == NULL)
return -EINVAL;
- ret = nla_parse_nested_compat(tb, TCA_NETEM_MAX, opt, netem_policy,
- qopt, sizeof(*qopt));
+ qopt = nla_data(opt);
+ ret = parse_attr(tb, TCA_NETEM_MAX, opt, netem_policy, sizeof(*qopt));
if (ret < 0)
return ret;
- ret = set_fifo_limit(q->qdisc, qopt->limit);
+ ret = fifo_set_limit(q->qdisc, qopt->limit);
if (ret) {
pr_debug("netem: can't set fifo limit\n");
return ret;
@@ -476,7 +478,7 @@ static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
{
struct fifo_sched_data *q = qdisc_priv(sch);
struct sk_buff_head *list = &sch->q;
- psched_time_t tnext = ((struct netem_skb_cb *)nskb->cb)->time_to_send;
+ psched_time_t tnext = netem_skb_cb(nskb)->time_to_send;
struct sk_buff *skb;
if (likely(skb_queue_len(list) < q->limit)) {
@@ -487,8 +489,7 @@ static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
}
skb_queue_reverse_walk(list, skb) {
- const struct netem_skb_cb *cb
- = (const struct netem_skb_cb *)skb->cb;
+ const struct netem_skb_cb *cb = netem_skb_cb(skb);
if (tnext >= cb->time_to_send)
break;
@@ -496,8 +497,8 @@ static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
__skb_queue_after(list, skb, nskb);
- sch->qstats.backlog += nskb->len;
- sch->bstats.bytes += nskb->len;
+ sch->qstats.backlog += qdisc_pkt_len(nskb);
+ sch->bstats.bytes += qdisc_pkt_len(nskb);
sch->bstats.packets++;
return NET_XMIT_SUCCESS;
@@ -517,7 +518,7 @@ static int tfifo_init(struct Qdisc *sch, struct nlattr *opt)
q->limit = ctl->limit;
} else
- q->limit = max_t(u32, sch->dev->tx_queue_len, 1);
+ q->limit = max_t(u32, qdisc_dev(sch)->tx_queue_len, 1);
q->oldest = PSCHED_PASTPERFECT;
return 0;
@@ -558,7 +559,8 @@ static int netem_init(struct Qdisc *sch, struct nlattr *opt)
qdisc_watchdog_init(&q->watchdog, sch);
- q->qdisc = qdisc_create_dflt(sch->dev, &tfifo_qdisc_ops,
+ q->qdisc = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
+ &tfifo_qdisc_ops,
TC_H_MAKE(sch->handle, 1));
if (!q->qdisc) {
pr_debug("netem: qdisc create failed\n");
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 4aa2b45dad0a..504a78cdb718 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -24,11 +24,9 @@
struct prio_sched_data
{
int bands;
- int curband; /* for round-robin */
struct tcf_proto *filter_list;
u8 prio2band[TC_PRIO_MAX+1];
struct Qdisc *queues[TCQ_PRIO_BANDS];
- int mq;
};
@@ -40,14 +38,14 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
struct tcf_result res;
int err;
- *qerr = NET_XMIT_BYPASS;
+ *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
if (TC_H_MAJ(skb->priority) != sch->handle) {
err = tc_classify(skb, q->filter_list, &res);
#ifdef CONFIG_NET_CLS_ACT
switch (err) {
case TC_ACT_STOLEN:
case TC_ACT_QUEUED:
- *qerr = NET_XMIT_SUCCESS;
+ *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
case TC_ACT_SHOT:
return NULL;
}
@@ -55,17 +53,14 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
if (!q->filter_list || err < 0) {
if (TC_H_MAJ(band))
band = 0;
- band = q->prio2band[band&TC_PRIO_MAX];
- goto out;
+ return q->queues[q->prio2band[band&TC_PRIO_MAX]];
}
band = res.classid;
}
band = TC_H_MIN(band) - 1;
if (band >= q->bands)
- band = q->prio2band[0];
-out:
- if (q->mq)
- skb_set_queue_mapping(skb, band);
+ return q->queues[q->prio2band[0]];
+
return q->queues[band];
}
@@ -79,20 +74,22 @@ prio_enqueue(struct sk_buff *skb, struct Qdisc *sch)
#ifdef CONFIG_NET_CLS_ACT
if (qdisc == NULL) {
- if (ret == NET_XMIT_BYPASS)
+ if (ret & __NET_XMIT_BYPASS)
sch->qstats.drops++;
kfree_skb(skb);
return ret;
}
#endif
- if ((ret = qdisc->enqueue(skb, qdisc)) == NET_XMIT_SUCCESS) {
- sch->bstats.bytes += skb->len;
+ ret = qdisc_enqueue(skb, qdisc);
+ if (ret == NET_XMIT_SUCCESS) {
+ sch->bstats.bytes += qdisc_pkt_len(skb);
sch->bstats.packets++;
sch->q.qlen++;
return NET_XMIT_SUCCESS;
}
- sch->qstats.drops++;
+ if (net_xmit_drop_count(ret))
+ sch->qstats.drops++;
return ret;
}
@@ -106,7 +103,7 @@ prio_requeue(struct sk_buff *skb, struct Qdisc* sch)
qdisc = prio_classify(skb, sch, &ret);
#ifdef CONFIG_NET_CLS_ACT
if (qdisc == NULL) {
- if (ret == NET_XMIT_BYPASS)
+ if (ret & __NET_XMIT_BYPASS)
sch->qstats.drops++;
kfree_skb(skb);
return ret;
@@ -116,74 +113,31 @@ prio_requeue(struct sk_buff *skb, struct Qdisc* sch)
if ((ret = qdisc->ops->requeue(skb, qdisc)) == NET_XMIT_SUCCESS) {
sch->q.qlen++;
sch->qstats.requeues++;
- return 0;
+ return NET_XMIT_SUCCESS;
}
- sch->qstats.drops++;
- return NET_XMIT_DROP;
+ if (net_xmit_drop_count(ret))
+ sch->qstats.drops++;
+ return ret;
}
-static struct sk_buff *
-prio_dequeue(struct Qdisc* sch)
+static struct sk_buff *prio_dequeue(struct Qdisc* sch)
{
- struct sk_buff *skb;
struct prio_sched_data *q = qdisc_priv(sch);
int prio;
- struct Qdisc *qdisc;
for (prio = 0; prio < q->bands; prio++) {
- /* Check if the target subqueue is available before
- * pulling an skb. This way we avoid excessive requeues
- * for slower queues.
- */
- if (!__netif_subqueue_stopped(sch->dev, (q->mq ? prio : 0))) {
- qdisc = q->queues[prio];
- skb = qdisc->dequeue(qdisc);
- if (skb) {
- sch->q.qlen--;
- return skb;
- }
+ struct Qdisc *qdisc = q->queues[prio];
+ struct sk_buff *skb = qdisc->dequeue(qdisc);
+ if (skb) {
+ sch->q.qlen--;
+ return skb;
}
}
return NULL;
}
-static struct sk_buff *rr_dequeue(struct Qdisc* sch)
-{
- struct sk_buff *skb;
- struct prio_sched_data *q = qdisc_priv(sch);
- struct Qdisc *qdisc;
- int bandcount;
-
- /* Only take one pass through the queues. If nothing is available,
- * return nothing.
- */
- for (bandcount = 0; bandcount < q->bands; bandcount++) {
- /* Check if the target subqueue is available before
- * pulling an skb. This way we avoid excessive requeues
- * for slower queues. If the queue is stopped, try the
- * next queue.
- */
- if (!__netif_subqueue_stopped(sch->dev,
- (q->mq ? q->curband : 0))) {
- qdisc = q->queues[q->curband];
- skb = qdisc->dequeue(qdisc);
- if (skb) {
- sch->q.qlen--;
- q->curband++;
- if (q->curband >= q->bands)
- q->curband = 0;
- return skb;
- }
- }
- q->curband++;
- if (q->curband >= q->bands)
- q->curband = 0;
- }
- return NULL;
-}
-
static unsigned int prio_drop(struct Qdisc* sch)
{
struct prio_sched_data *q = qdisc_priv(sch);
@@ -219,7 +173,7 @@ prio_destroy(struct Qdisc* sch)
int prio;
struct prio_sched_data *q = qdisc_priv(sch);
- tcf_destroy_chain(q->filter_list);
+ tcf_destroy_chain(&q->filter_list);
for (prio=0; prio<q->bands; prio++)
qdisc_destroy(q->queues[prio]);
}
@@ -228,45 +182,22 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
{
struct prio_sched_data *q = qdisc_priv(sch);
struct tc_prio_qopt *qopt;
- struct nlattr *tb[TCA_PRIO_MAX + 1];
- int err;
int i;
- err = nla_parse_nested_compat(tb, TCA_PRIO_MAX, opt, NULL, qopt,
- sizeof(*qopt));
- if (err < 0)
- return err;
-
- q->bands = qopt->bands;
- /* If we're multiqueue, make sure the number of incoming bands
- * matches the number of queues on the device we're associating with.
- * If the number of bands requested is zero, then set q->bands to
- * dev->egress_subqueue_count. Also, the root qdisc must be the
- * only one that is enabled for multiqueue, since it's the only one
- * that interacts with the underlying device.
- */
- q->mq = nla_get_flag(tb[TCA_PRIO_MQ]);
- if (q->mq) {
- if (sch->parent != TC_H_ROOT)
- return -EINVAL;
- if (netif_is_multiqueue(sch->dev)) {
- if (q->bands == 0)
- q->bands = sch->dev->egress_subqueue_count;
- else if (q->bands != sch->dev->egress_subqueue_count)
- return -EINVAL;
- } else
- return -EOPNOTSUPP;
- }
+ if (nla_len(opt) < sizeof(*qopt))
+ return -EINVAL;
+ qopt = nla_data(opt);
- if (q->bands > TCQ_PRIO_BANDS || q->bands < 2)
+ if (qopt->bands > TCQ_PRIO_BANDS || qopt->bands < 2)
return -EINVAL;
for (i=0; i<=TC_PRIO_MAX; i++) {
- if (qopt->priomap[i] >= q->bands)
+ if (qopt->priomap[i] >= qopt->bands)
return -EINVAL;
}
sch_tree_lock(sch);
+ q->bands = qopt->bands;
memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1);
for (i=q->bands; i<TCQ_PRIO_BANDS; i++) {
@@ -281,7 +212,8 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
for (i=0; i<q->bands; i++) {
if (q->queues[i] == &noop_qdisc) {
struct Qdisc *child;
- child = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
+ child = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
+ &pfifo_qdisc_ops,
TC_H_MAKE(sch->handle, i + 1));
if (child) {
sch_tree_lock(sch);
@@ -322,20 +254,12 @@ static int prio_dump(struct Qdisc *sch, struct sk_buff *skb)
{
struct prio_sched_data *q = qdisc_priv(sch);
unsigned char *b = skb_tail_pointer(skb);
- struct nlattr *nest;
struct tc_prio_qopt opt;
opt.bands = q->bands;
memcpy(&opt.priomap, q->prio2band, TC_PRIO_MAX+1);
- nest = nla_nest_compat_start(skb, TCA_OPTIONS, sizeof(opt), &opt);
- if (nest == NULL)
- goto nla_put_failure;
- if (q->mq) {
- if (nla_put_flag(skb, TCA_PRIO_MQ) < 0)
- goto nla_put_failure;
- }
- nla_nest_compat_end(skb, nest);
+ NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
return skb->len;
@@ -507,44 +431,17 @@ static struct Qdisc_ops prio_qdisc_ops __read_mostly = {
.owner = THIS_MODULE,
};
-static struct Qdisc_ops rr_qdisc_ops __read_mostly = {
- .next = NULL,
- .cl_ops = &prio_class_ops,
- .id = "rr",
- .priv_size = sizeof(struct prio_sched_data),
- .enqueue = prio_enqueue,
- .dequeue = rr_dequeue,
- .requeue = prio_requeue,
- .drop = prio_drop,
- .init = prio_init,
- .reset = prio_reset,
- .destroy = prio_destroy,
- .change = prio_tune,
- .dump = prio_dump,
- .owner = THIS_MODULE,
-};
-
static int __init prio_module_init(void)
{
- int err;
-
- err = register_qdisc(&prio_qdisc_ops);
- if (err < 0)
- return err;
- err = register_qdisc(&rr_qdisc_ops);
- if (err < 0)
- unregister_qdisc(&prio_qdisc_ops);
- return err;
+ return register_qdisc(&prio_qdisc_ops);
}
static void __exit prio_module_exit(void)
{
unregister_qdisc(&prio_qdisc_ops);
- unregister_qdisc(&rr_qdisc_ops);
}
module_init(prio_module_init)
module_exit(prio_module_exit)
MODULE_LICENSE("GPL");
-MODULE_ALIAS("sch_rr");
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 5c569853b9c0..5da05839e225 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -92,12 +92,12 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc* sch)
break;
}
- ret = child->enqueue(skb, child);
+ ret = qdisc_enqueue(skb, child);
if (likely(ret == NET_XMIT_SUCCESS)) {
- sch->bstats.bytes += skb->len;
+ sch->bstats.bytes += qdisc_pkt_len(skb);
sch->bstats.packets++;
sch->q.qlen++;
- } else {
+ } else if (net_xmit_drop_count(ret)) {
q->stats.pdrop++;
sch->qstats.drops++;
}
@@ -174,33 +174,6 @@ static void red_destroy(struct Qdisc *sch)
qdisc_destroy(q->qdisc);
}
-static struct Qdisc *red_create_dflt(struct Qdisc *sch, u32 limit)
-{
- struct Qdisc *q;
- struct nlattr *nla;
- int ret;
-
- q = qdisc_create_dflt(sch->dev, &bfifo_qdisc_ops,
- TC_H_MAKE(sch->handle, 1));
- if (q) {
- nla = kmalloc(nla_attr_size(sizeof(struct tc_fifo_qopt)),
- GFP_KERNEL);
- if (nla) {
- nla->nla_type = RTM_NEWQDISC;
- nla->nla_len = nla_attr_size(sizeof(struct tc_fifo_qopt));
- ((struct tc_fifo_qopt *)nla_data(nla))->limit = limit;
-
- ret = q->ops->change(q, nla);
- kfree(nla);
-
- if (ret == 0)
- return q;
- }
- qdisc_destroy(q);
- }
- return NULL;
-}
-
static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
[TCA_RED_PARMS] = { .len = sizeof(struct tc_red_qopt) },
[TCA_RED_STAB] = { .len = RED_STAB_SIZE },
@@ -228,9 +201,9 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt)
ctl = nla_data(tb[TCA_RED_PARMS]);
if (ctl->limit > 0) {
- child = red_create_dflt(sch, ctl->limit);
- if (child == NULL)
- return -ENOMEM;
+ child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit);
+ if (IS_ERR(child))
+ return PTR_ERR(child);
}
sch_tree_lock(sch);
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index f0463d757a98..fe1508ef0d3d 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -119,7 +119,7 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
u32 h, h2;
switch (skb->protocol) {
- case __constant_htons(ETH_P_IP):
+ case htons(ETH_P_IP):
{
const struct iphdr *iph = ip_hdr(skb);
h = iph->daddr;
@@ -134,7 +134,7 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
h2 ^= *(((u32*)iph) + iph->ihl);
break;
}
- case __constant_htons(ETH_P_IPV6):
+ case htons(ETH_P_IPV6):
{
struct ipv6hdr *iph = ipv6_hdr(skb);
h = iph->daddr.s6_addr32[3];
@@ -171,14 +171,14 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
if (!q->filter_list)
return sfq_hash(q, skb) + 1;
- *qerr = NET_XMIT_BYPASS;
+ *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
result = tc_classify(skb, q->filter_list, &res);
if (result >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
case TC_ACT_STOLEN:
case TC_ACT_QUEUED:
- *qerr = NET_XMIT_SUCCESS;
+ *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
case TC_ACT_SHOT:
return 0;
}
@@ -245,7 +245,7 @@ static unsigned int sfq_drop(struct Qdisc *sch)
if (d > 1) {
sfq_index x = q->dep[d + SFQ_DEPTH].next;
skb = q->qs[x].prev;
- len = skb->len;
+ len = qdisc_pkt_len(skb);
__skb_unlink(skb, &q->qs[x]);
kfree_skb(skb);
sfq_dec(q, x);
@@ -261,7 +261,7 @@ static unsigned int sfq_drop(struct Qdisc *sch)
q->next[q->tail] = q->next[d];
q->allot[q->next[d]] += q->quantum;
skb = q->qs[d].prev;
- len = skb->len;
+ len = qdisc_pkt_len(skb);
__skb_unlink(skb, &q->qs[d]);
kfree_skb(skb);
sfq_dec(q, d);
@@ -285,7 +285,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
hash = sfq_classify(skb, sch, &ret);
if (hash == 0) {
- if (ret == NET_XMIT_BYPASS)
+ if (ret & __NET_XMIT_BYPASS)
sch->qstats.drops++;
kfree_skb(skb);
return ret;
@@ -305,7 +305,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
if (q->qs[x].qlen >= q->limit)
return qdisc_drop(skb, sch);
- sch->qstats.backlog += skb->len;
+ sch->qstats.backlog += qdisc_pkt_len(skb);
__skb_queue_tail(&q->qs[x], skb);
sfq_inc(q, x);
if (q->qs[x].qlen == 1) { /* The flow is new */
@@ -320,7 +320,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
}
}
if (++sch->q.qlen <= q->limit) {
- sch->bstats.bytes += skb->len;
+ sch->bstats.bytes += qdisc_pkt_len(skb);
sch->bstats.packets++;
return 0;
}
@@ -339,7 +339,7 @@ sfq_requeue(struct sk_buff *skb, struct Qdisc *sch)
hash = sfq_classify(skb, sch, &ret);
if (hash == 0) {
- if (ret == NET_XMIT_BYPASS)
+ if (ret & __NET_XMIT_BYPASS)
sch->qstats.drops++;
kfree_skb(skb);
return ret;
@@ -352,7 +352,7 @@ sfq_requeue(struct sk_buff *skb, struct Qdisc *sch)
q->hash[x] = hash;
}
- sch->qstats.backlog += skb->len;
+ sch->qstats.backlog += qdisc_pkt_len(skb);
__skb_queue_head(&q->qs[x], skb);
/* If selected queue has length q->limit+1, this means that
* all another queues are empty and we do simple tail drop.
@@ -363,7 +363,7 @@ sfq_requeue(struct sk_buff *skb, struct Qdisc *sch)
skb = q->qs[x].prev;
__skb_unlink(skb, &q->qs[x]);
sch->qstats.drops++;
- sch->qstats.backlog -= skb->len;
+ sch->qstats.backlog -= qdisc_pkt_len(skb);
kfree_skb(skb);
return NET_XMIT_CN;
}
@@ -411,7 +411,7 @@ sfq_dequeue(struct Qdisc *sch)
skb = __skb_dequeue(&q->qs[a]);
sfq_dec(q, a);
sch->q.qlen--;
- sch->qstats.backlog -= skb->len;
+ sch->qstats.backlog -= qdisc_pkt_len(skb);
/* Is the slot empty? */
if (q->qs[a].qlen == 0) {
@@ -423,7 +423,7 @@ sfq_dequeue(struct Qdisc *sch)
}
q->next[q->tail] = a;
q->allot[a] += q->quantum;
- } else if ((q->allot[a] -= skb->len) <= 0) {
+ } else if ((q->allot[a] -= qdisc_pkt_len(skb)) <= 0) {
q->tail = a;
a = q->next[a];
q->allot[a] += q->quantum;
@@ -461,7 +461,7 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
return -EINVAL;
sch_tree_lock(sch);
- q->quantum = ctl->quantum ? : psched_mtu(sch->dev);
+ q->quantum = ctl->quantum ? : psched_mtu(qdisc_dev(sch));
q->perturb_period = ctl->perturb_period * HZ;
if (ctl->limit)
q->limit = min_t(u32, ctl->limit, SFQ_DEPTH - 1);
@@ -502,7 +502,7 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
q->max_depth = 0;
q->tail = SFQ_DEPTH;
if (opt == NULL) {
- q->quantum = psched_mtu(sch->dev);
+ q->quantum = psched_mtu(qdisc_dev(sch));
q->perturb_period = 0;
q->perturbation = net_random();
} else {
@@ -520,7 +520,7 @@ static void sfq_destroy(struct Qdisc *sch)
{
struct sfq_sched_data *q = qdisc_priv(sch);
- tcf_destroy_chain(q->filter_list);
+ tcf_destroy_chain(&q->filter_list);
q->perturb_period = 0;
del_timer_sync(&q->perturb_timer);
}
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 0b7d78f59d8c..94c61598b86a 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -123,23 +123,18 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch)
struct tbf_sched_data *q = qdisc_priv(sch);
int ret;
- if (skb->len > q->max_size) {
- sch->qstats.drops++;
-#ifdef CONFIG_NET_CLS_ACT
- if (sch->reshape_fail == NULL || sch->reshape_fail(skb, sch))
-#endif
- kfree_skb(skb);
-
- return NET_XMIT_DROP;
- }
+ if (qdisc_pkt_len(skb) > q->max_size)
+ return qdisc_reshape_fail(skb, sch);
- if ((ret = q->qdisc->enqueue(skb, q->qdisc)) != 0) {
- sch->qstats.drops++;
+ ret = qdisc_enqueue(skb, q->qdisc);
+ if (ret != 0) {
+ if (net_xmit_drop_count(ret))
+ sch->qstats.drops++;
return ret;
}
sch->q.qlen++;
- sch->bstats.bytes += skb->len;
+ sch->bstats.bytes += qdisc_pkt_len(skb);
sch->bstats.packets++;
return 0;
}
@@ -180,7 +175,7 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch)
psched_time_t now;
long toks;
long ptoks = 0;
- unsigned int len = skb->len;
+ unsigned int len = qdisc_pkt_len(skb);
now = psched_get_time();
toks = psched_tdiff_bounded(now, q->t_c, q->buffer);
@@ -242,34 +237,6 @@ static void tbf_reset(struct Qdisc* sch)
qdisc_watchdog_cancel(&q->watchdog);
}
-static struct Qdisc *tbf_create_dflt_qdisc(struct Qdisc *sch, u32 limit)
-{
- struct Qdisc *q;
- struct nlattr *nla;
- int ret;
-
- q = qdisc_create_dflt(sch->dev, &bfifo_qdisc_ops,
- TC_H_MAKE(sch->handle, 1));
- if (q) {
- nla = kmalloc(nla_attr_size(sizeof(struct tc_fifo_qopt)),
- GFP_KERNEL);
- if (nla) {
- nla->nla_type = RTM_NEWQDISC;
- nla->nla_len = nla_attr_size(sizeof(struct tc_fifo_qopt));
- ((struct tc_fifo_qopt *)nla_data(nla))->limit = limit;
-
- ret = q->ops->change(q, nla);
- kfree(nla);
-
- if (ret == 0)
- return q;
- }
- qdisc_destroy(q);
- }
-
- return NULL;
-}
-
static const struct nla_policy tbf_policy[TCA_TBF_MAX + 1] = {
[TCA_TBF_PARMS] = { .len = sizeof(struct tc_tbf_qopt) },
[TCA_TBF_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
@@ -322,8 +289,11 @@ static int tbf_change(struct Qdisc* sch, struct nlattr *opt)
goto done;
if (qopt->limit > 0) {
- if ((child = tbf_create_dflt_qdisc(sch, qopt->limit)) == NULL)
+ child = fifo_create_dflt(sch, &bfifo_qdisc_ops, qopt->limit);
+ if (IS_ERR(child)) {
+ err = PTR_ERR(child);
goto done;
+ }
}
sch_tree_lock(sch);
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 0444fd0f0d22..d35ef059abb1 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -78,12 +78,12 @@ struct teql_sched_data
static int
teql_enqueue(struct sk_buff *skb, struct Qdisc* sch)
{
- struct net_device *dev = sch->dev;
+ struct net_device *dev = qdisc_dev(sch);
struct teql_sched_data *q = qdisc_priv(sch);
if (q->q.qlen < dev->tx_queue_len) {
__skb_queue_tail(&q->q, skb);
- sch->bstats.bytes += skb->len;
+ sch->bstats.bytes += qdisc_pkt_len(skb);
sch->bstats.packets++;
return 0;
}
@@ -107,17 +107,19 @@ static struct sk_buff *
teql_dequeue(struct Qdisc* sch)
{
struct teql_sched_data *dat = qdisc_priv(sch);
+ struct netdev_queue *dat_queue;
struct sk_buff *skb;
skb = __skb_dequeue(&dat->q);
+ dat_queue = netdev_get_tx_queue(dat->m->dev, 0);
if (skb == NULL) {
- struct net_device *m = dat->m->dev->qdisc->dev;
+ struct net_device *m = qdisc_dev(dat_queue->qdisc);
if (m) {
dat->m->slaves = sch;
netif_wake_queue(m);
}
}
- sch->q.qlen = dat->q.qlen + dat->m->dev->qdisc->q.qlen;
+ sch->q.qlen = dat->q.qlen + dat_queue->qdisc->q.qlen;
return skb;
}
@@ -153,10 +155,16 @@ teql_destroy(struct Qdisc* sch)
if (q == master->slaves) {
master->slaves = NEXT_SLAVE(q);
if (q == master->slaves) {
+ struct netdev_queue *txq;
+ spinlock_t *root_lock;
+
+ txq = netdev_get_tx_queue(master->dev, 0);
master->slaves = NULL;
- spin_lock_bh(&master->dev->queue_lock);
- qdisc_reset(master->dev->qdisc);
- spin_unlock_bh(&master->dev->queue_lock);
+
+ root_lock = qdisc_root_sleeping_lock(txq->qdisc);
+ spin_lock_bh(root_lock);
+ qdisc_reset(txq->qdisc);
+ spin_unlock_bh(root_lock);
}
}
skb_queue_purge(&dat->q);
@@ -170,7 +178,7 @@ teql_destroy(struct Qdisc* sch)
static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt)
{
- struct net_device *dev = sch->dev;
+ struct net_device *dev = qdisc_dev(sch);
struct teql_master *m = (struct teql_master*)sch->ops;
struct teql_sched_data *q = qdisc_priv(sch);
@@ -216,7 +224,8 @@ static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt)
static int
__teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev)
{
- struct teql_sched_data *q = qdisc_priv(dev->qdisc);
+ struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, 0);
+ struct teql_sched_data *q = qdisc_priv(dev_queue->qdisc);
struct neighbour *mn = skb->dst->neighbour;
struct neighbour *n = q->ncache;
@@ -252,7 +261,8 @@ __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *
static inline int teql_resolve(struct sk_buff *skb,
struct sk_buff *skb_res, struct net_device *dev)
{
- if (dev->qdisc == &noop_qdisc)
+ struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
+ if (txq->qdisc == &noop_qdisc)
return -ENODEV;
if (dev->header_ops == NULL ||
@@ -268,7 +278,6 @@ static int teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
struct Qdisc *start, *q;
int busy;
int nores;
- int len = skb->len;
int subq = skb_get_queue_mapping(skb);
struct sk_buff *skb_res = NULL;
@@ -282,12 +291,13 @@ restart:
goto drop;
do {
- struct net_device *slave = q->dev;
+ struct net_device *slave = qdisc_dev(q);
+ struct netdev_queue *slave_txq;
- if (slave->qdisc_sleeping != q)
+ slave_txq = netdev_get_tx_queue(slave, 0);
+ if (slave_txq->qdisc_sleeping != q)
continue;
- if (netif_queue_stopped(slave) ||
- __netif_subqueue_stopped(slave, subq) ||
+ if (__netif_subqueue_stopped(slave, subq) ||
!netif_running(slave)) {
busy = 1;
continue;
@@ -295,18 +305,19 @@ restart:
switch (teql_resolve(skb, skb_res, slave)) {
case 0:
- if (netif_tx_trylock(slave)) {
- if (!netif_queue_stopped(slave) &&
- !__netif_subqueue_stopped(slave, subq) &&
+ if (__netif_tx_trylock(slave_txq)) {
+ if (!netif_tx_queue_stopped(slave_txq) &&
+ !netif_tx_queue_frozen(slave_txq) &&
slave->hard_start_xmit(skb, slave) == 0) {
- netif_tx_unlock(slave);
+ __netif_tx_unlock(slave_txq);
master->slaves = NEXT_SLAVE(q);
netif_wake_queue(dev);
master->stats.tx_packets++;
- master->stats.tx_bytes += len;
+ master->stats.tx_bytes +=
+ qdisc_pkt_len(skb);
return 0;
}
- netif_tx_unlock(slave);
+ __netif_tx_unlock(slave_txq);
}
if (netif_queue_stopped(dev))
busy = 1;
@@ -352,7 +363,7 @@ static int teql_master_open(struct net_device *dev)
q = m->slaves;
do {
- struct net_device *slave = q->dev;
+ struct net_device *slave = qdisc_dev(q);
if (slave == NULL)
return -EUNATCH;
@@ -403,7 +414,7 @@ static int teql_master_mtu(struct net_device *dev, int new_mtu)
q = m->slaves;
if (q) {
do {
- if (new_mtu > q->dev->mtu)
+ if (new_mtu > qdisc_dev(q)->mtu)
return -EINVAL;
} while ((q=NEXT_SLAVE(q)) != m->slaves);
}