diff options
Diffstat (limited to 'net/sched')
33 files changed, 528 insertions, 785 deletions
diff --git a/net/sched/Kconfig b/net/sched/Kconfig index f4544dd86476..475df8449be9 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -46,62 +46,6 @@ config NET_SCH_FIFO if NET_SCHED -choice - prompt "Packet scheduler clock source" - default NET_SCH_CLK_GETTIMEOFDAY - ---help--- - Packet schedulers need a monotonic clock that increments at a static - rate. The kernel provides several suitable interfaces, each with - different properties: - - - high resolution (us or better) - - fast to read (minimal locking, no i/o access) - - synchronized on all processors - - handles cpu clock frequency changes - - but nothing provides all of the above. - -config NET_SCH_CLK_JIFFIES - bool "Timer interrupt" - ---help--- - Say Y here if you want to use the timer interrupt (jiffies) as clock - source. This clock source is fast, synchronized on all processors and - handles cpu clock frequency changes, but its resolution is too low - for accurate shaping except at very low speed. - -config NET_SCH_CLK_GETTIMEOFDAY - bool "gettimeofday" - ---help--- - Say Y here if you want to use gettimeofday as clock source. This clock - source has high resolution, is synchronized on all processors and - handles cpu clock frequency changes, but it is slow. - - Choose this if you need a high resolution clock source but can't use - the CPU's cycle counter. - -# don't allow on SMP x86 because they can have unsynchronized TSCs. -# gettimeofday is a good alternative -config NET_SCH_CLK_CPU - bool "CPU cycle counter" - depends on ((X86_TSC || X86_64) && !SMP) || ALPHA || SPARC64 || PPC64 || IA64 - ---help--- - Say Y here if you want to use the CPU's cycle counter as clock source. - This is a cheap and high resolution clock source, but on some - architectures it is not synchronized on all processors and doesn't - handle cpu clock frequency changes. - - The useable cycle counters are: - - x86/x86_64 - Timestamp Counter - alpha - Cycle Counter - sparc64 - %ticks register - ppc64 - Time base - ia64 - Interval Time Counter - - Choose this if your CPU's cycle counter is working properly. - -endchoice - comment "Queueing/Scheduling" config NET_SCH_CBQ diff --git a/net/sched/Makefile b/net/sched/Makefile index ff2d6e5e282c..020767a204d4 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -17,7 +17,6 @@ obj-$(CONFIG_NET_ACT_SIMP) += act_simple.o obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o -obj-$(CONFIG_NET_SCH_HPFQ) += sch_hpfq.o obj-$(CONFIG_NET_SCH_HFSC) += sch_hfsc.o obj-$(CONFIG_NET_SCH_RED) += sch_red.o obj-$(CONFIG_NET_SCH_GRED) += sch_gred.o diff --git a/net/sched/act_api.c b/net/sched/act_api.c index cb21617a5670..711dd26c95c3 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -25,12 +25,12 @@ #include <linux/interrupt.h> #include <linux/netdevice.h> #include <linux/skbuff.h> -#include <linux/rtnetlink.h> #include <linux/init.h> #include <linux/kmod.h> #include <net/sock.h> #include <net/sch_generic.h> #include <net/act_api.h> +#include <net/netlink.h> void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo) { @@ -93,15 +93,15 @@ static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb, continue; a->priv = p; a->order = n_i; - r = (struct rtattr*) skb->tail; + r = (struct rtattr *)skb_tail_pointer(skb); RTA_PUT(skb, a->order, 0, NULL); err = tcf_action_dump_1(skb, a, 0, 0); if (err < 0) { index--; - skb_trim(skb, (u8*)r - skb->data); + nlmsg_trim(skb, r); goto done; } - r->rta_len = skb->tail - (u8*)r; + r->rta_len = skb_tail_pointer(skb) - (u8 *)r; n_i++; if (n_i >= TCA_ACT_MAX_PRIO) goto done; @@ -114,7 +114,7 @@ done: return n_i; rtattr_failure: - skb_trim(skb, (u8*)r - skb->data); + nlmsg_trim(skb, r); goto done; } @@ -125,7 +125,7 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a, struct rtattr *r ; int i= 0, n_i = 0; - r = (struct rtattr*) skb->tail; + r = (struct rtattr *)skb_tail_pointer(skb); RTA_PUT(skb, a->order, 0, NULL); RTA_PUT(skb, TCA_KIND, IFNAMSIZ, a->ops->kind); for (i = 0; i < (hinfo->hmask + 1); i++) { @@ -140,11 +140,11 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a, } } RTA_PUT(skb, TCA_FCNT, 4, &n_i); - r->rta_len = skb->tail - (u8*)r; + r->rta_len = skb_tail_pointer(skb) - (u8 *)r; return n_i; rtattr_failure: - skb_trim(skb, (u8*)r - skb->data); + nlmsg_trim(skb, r); return -EINVAL; } @@ -423,7 +423,7 @@ int tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { int err = -EINVAL; - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); struct rtattr *r; if (a->ops == NULL || a->ops->dump == NULL) @@ -432,15 +432,15 @@ tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref) RTA_PUT(skb, TCA_KIND, IFNAMSIZ, a->ops->kind); if (tcf_action_copy_stats(skb, a, 0)) goto rtattr_failure; - r = (struct rtattr*) skb->tail; + r = (struct rtattr *)skb_tail_pointer(skb); RTA_PUT(skb, TCA_OPTIONS, 0, NULL); if ((err = tcf_action_dump_old(skb, a, bind, ref)) > 0) { - r->rta_len = skb->tail - (u8*)r; + r->rta_len = skb_tail_pointer(skb) - (u8 *)r; return err; } rtattr_failure: - skb_trim(skb, b - skb->data); + nlmsg_trim(skb, b); return -1; } @@ -449,17 +449,17 @@ tcf_action_dump(struct sk_buff *skb, struct tc_action *act, int bind, int ref) { struct tc_action *a; int err = -EINVAL; - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); struct rtattr *r ; while ((a = act) != NULL) { - r = (struct rtattr*) skb->tail; + r = (struct rtattr *)skb_tail_pointer(skb); act = a->next; RTA_PUT(skb, a->order, 0, NULL); err = tcf_action_dump_1(skb, a, bind, ref); if (err < 0) goto errout; - r->rta_len = skb->tail - (u8*)r; + r->rta_len = skb_tail_pointer(skb) - (u8 *)r; } return 0; @@ -467,7 +467,7 @@ tcf_action_dump(struct sk_buff *skb, struct tc_action *act, int bind, int ref) rtattr_failure: err = -EINVAL; errout: - skb_trim(skb, b - skb->data); + nlmsg_trim(skb, b); return err; } @@ -635,7 +635,7 @@ tca_get_fill(struct sk_buff *skb, struct tc_action *a, u32 pid, u32 seq, { struct tcamsg *t; struct nlmsghdr *nlh; - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); struct rtattr *x; nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*t), flags); @@ -645,20 +645,20 @@ tca_get_fill(struct sk_buff *skb, struct tc_action *a, u32 pid, u32 seq, t->tca__pad1 = 0; t->tca__pad2 = 0; - x = (struct rtattr*) skb->tail; + x = (struct rtattr *)skb_tail_pointer(skb); RTA_PUT(skb, TCA_ACT_TAB, 0, NULL); if (tcf_action_dump(skb, a, bind, ref) < 0) goto rtattr_failure; - x->rta_len = skb->tail - (u8*)x; + x->rta_len = skb_tail_pointer(skb) - (u8 *)x; - nlh->nlmsg_len = skb->tail - b; + nlh->nlmsg_len = skb_tail_pointer(skb) - b; return skb->len; rtattr_failure: nlmsg_failure: - skb_trim(skb, b - skb->data); + nlmsg_trim(skb, b); return -1; } @@ -767,7 +767,7 @@ static int tca_action_flush(struct rtattr *rta, struct nlmsghdr *n, u32 pid) return -ENOBUFS; } - b = (unsigned char *)skb->tail; + b = skb_tail_pointer(skb); if (rtattr_parse_nested(tb, TCA_ACT_MAX, rta) < 0) goto err_out; @@ -783,16 +783,16 @@ static int tca_action_flush(struct rtattr *rta, struct nlmsghdr *n, u32 pid) t->tca__pad1 = 0; t->tca__pad2 = 0; - x = (struct rtattr *) skb->tail; + x = (struct rtattr *)skb_tail_pointer(skb); RTA_PUT(skb, TCA_ACT_TAB, 0, NULL); err = a->ops->walk(skb, &dcb, RTM_DELACTION, a); if (err < 0) goto rtattr_failure; - x->rta_len = skb->tail - (u8 *) x; + x->rta_len = skb_tail_pointer(skb) - (u8 *)x; - nlh->nlmsg_len = skb->tail - b; + nlh->nlmsg_len = skb_tail_pointer(skb) - b; nlh->nlmsg_flags |= NLM_F_ROOT; module_put(a->ops->owner); kfree(a); @@ -884,7 +884,7 @@ static int tcf_add_notify(struct tc_action *a, u32 pid, u32 seq, int event, if (!skb) return -ENOBUFS; - b = (unsigned char *)skb->tail; + b = skb_tail_pointer(skb); nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*t), flags); t = NLMSG_DATA(nlh); @@ -892,15 +892,15 @@ static int tcf_add_notify(struct tc_action *a, u32 pid, u32 seq, int event, t->tca__pad1 = 0; t->tca__pad2 = 0; - x = (struct rtattr*) skb->tail; + x = (struct rtattr *)skb_tail_pointer(skb); RTA_PUT(skb, TCA_ACT_TAB, 0, NULL); if (tcf_action_dump(skb, a, 0, 0) < 0) goto rtattr_failure; - x->rta_len = skb->tail - (u8*)x; + x->rta_len = skb_tail_pointer(skb) - (u8 *)x; - nlh->nlmsg_len = skb->tail - b; + nlh->nlmsg_len = skb_tail_pointer(skb) - b; NETLINK_CB(skb).dst_group = RTNLGRP_TC; err = rtnetlink_send(skb, pid, RTNLGRP_TC, flags&NLM_F_ECHO); @@ -1015,7 +1015,7 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) { struct nlmsghdr *nlh; - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); struct rtattr *x; struct tc_action_ops *a_o; struct tc_action a; @@ -1048,7 +1048,7 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) t->tca__pad1 = 0; t->tca__pad2 = 0; - x = (struct rtattr *) skb->tail; + x = (struct rtattr *)skb_tail_pointer(skb); RTA_PUT(skb, TCA_ACT_TAB, 0, NULL); ret = a_o->walk(skb, cb, RTM_GETACTION, &a); @@ -1056,12 +1056,12 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) goto rtattr_failure; if (ret > 0) { - x->rta_len = skb->tail - (u8 *) x; + x->rta_len = skb_tail_pointer(skb) - (u8 *)x; ret = skb->len; } else - skb_trim(skb, (u8*)x - skb->data); + nlmsg_trim(skb, x); - nlh->nlmsg_len = skb->tail - b; + nlh->nlmsg_len = skb_tail_pointer(skb) - b; if (NETLINK_CB(cb->skb).pid && ret) nlh->nlmsg_flags |= NLM_F_MULTI; module_put(a_o->owner); @@ -1070,20 +1070,15 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) rtattr_failure: nlmsg_failure: module_put(a_o->owner); - skb_trim(skb, b - skb->data); + nlmsg_trim(skb, b); return skb->len; } static int __init tc_action_init(void) { - struct rtnetlink_link *link_p = rtnetlink_links[PF_UNSPEC]; - - if (link_p) { - link_p[RTM_NEWACTION-RTM_BASE].doit = tc_ctl_action; - link_p[RTM_DELACTION-RTM_BASE].doit = tc_ctl_action; - link_p[RTM_GETACTION-RTM_BASE].doit = tc_ctl_action; - link_p[RTM_GETACTION-RTM_BASE].dumpit = tc_dump_action; - } + rtnl_register(PF_UNSPEC, RTM_NEWACTION, tc_ctl_action, NULL); + rtnl_register(PF_UNSPEC, RTM_DELACTION, tc_ctl_action, NULL); + rtnl_register(PF_UNSPEC, RTM_GETACTION, tc_ctl_action, tc_dump_action); return 0; } diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index 87d0faf32867..7517f3791541 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -28,6 +28,7 @@ #include <linux/module.h> #include <linux/init.h> #include <linux/proc_fs.h> +#include <net/netlink.h> #include <net/sock.h> #include <net/pkt_sched.h> #include <linux/tc_act/tc_gact.h> @@ -155,7 +156,7 @@ static int tcf_gact(struct sk_buff *skb, struct tc_action *a, struct tcf_result static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); struct tc_gact opt; struct tcf_gact *gact = a->priv; struct tcf_t t; @@ -181,7 +182,7 @@ static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, int bind, int return skb->len; rtattr_failure: - skb_trim(skb, b - skb->data); + nlmsg_trim(skb, b); return -1; } diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 47f0b1324239..00b05f422d45 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -30,6 +30,7 @@ #include <linux/init.h> #include <linux/proc_fs.h> #include <linux/kmod.h> +#include <net/netlink.h> #include <net/sock.h> #include <net/pkt_sched.h> #include <linux/tc_act/tc_ipt.h> @@ -245,7 +246,7 @@ static int tcf_ipt(struct sk_buff *skb, struct tc_action *a, static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); struct tcf_ipt *ipt = a->priv; struct ipt_entry_target *t; struct tcf_t tm; @@ -277,7 +278,7 @@ static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int return skb->len; rtattr_failure: - skb_trim(skb, b - skb->data); + nlmsg_trim(skb, b); kfree(t); return -1; } diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 68f26cb278f9..de21c92faaa2 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -30,6 +30,7 @@ #include <linux/module.h> #include <linux/init.h> #include <linux/proc_fs.h> +#include <net/netlink.h> #include <net/sock.h> #include <net/pkt_sched.h> #include <linux/tc_act/tc_mirred.h> @@ -198,7 +199,7 @@ bad_mirred: skb2->tc_verd = SET_TC_FROM(skb2->tc_verd, at); skb2->dev = dev; - skb2->input_dev = skb->dev; + skb2->iif = skb->dev->ifindex; dev_queue_xmit(skb2); spin_unlock(&m->tcf_lock); return m->tcf_action; @@ -206,7 +207,7 @@ bad_mirred: static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); struct tcf_mirred *m = a->priv; struct tc_mirred opt; struct tcf_t t; @@ -225,7 +226,7 @@ static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, i return skb->len; rtattr_failure: - skb_trim(skb, b - skb->data); + nlmsg_trim(skb, b); return -1; } diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 3d6a2fcc9ce4..45b3cda86a21 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -27,6 +27,7 @@ #include <linux/module.h> #include <linux/init.h> #include <linux/proc_fs.h> +#include <net/netlink.h> #include <net/sock.h> #include <net/pkt_sched.h> #include <linux/tc_act/tc_pedit.h> @@ -136,7 +137,7 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a, } } - pptr = skb->nh.raw; + pptr = skb_network_header(skb); spin_lock(&p->tcf_lock); @@ -195,7 +196,7 @@ done: static int tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); struct tcf_pedit *p = a->priv; struct tc_pedit *opt; struct tcf_t t; @@ -226,7 +227,7 @@ static int tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a, return skb->len; rtattr_failure: - skb_trim(skb, b - skb->data); + nlmsg_trim(skb, b); kfree(opt); return -1; } diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 10a5a5c36f76..616f465f407e 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -30,6 +30,7 @@ #include <linux/init.h> #include <net/sock.h> #include <net/act_api.h> +#include <net/netlink.h> #define L2T(p,L) ((p)->tcfp_R_tab->data[(L)>>(p)->tcfp_R_tab->rate.cell_log]) #define L2T_P(p,L) ((p)->tcfp_P_tab->data[(L)>>(p)->tcfp_P_tab->rate.cell_log]) @@ -80,7 +81,7 @@ static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *c continue; a->priv = p; a->order = index; - r = (struct rtattr*) skb->tail; + r = (struct rtattr *)skb_tail_pointer(skb); RTA_PUT(skb, a->order, 0, NULL); if (type == RTM_DELACTION) err = tcf_action_dump_1(skb, a, 0, 1); @@ -88,10 +89,10 @@ static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *c err = tcf_action_dump_1(skb, a, 0, 0); if (err < 0) { index--; - skb_trim(skb, (u8*)r - skb->data); + nlmsg_trim(skb, r); goto done; } - r->rta_len = skb->tail - (u8*)r; + r->rta_len = skb_tail_pointer(skb) - (u8 *)r; n_i++; } } @@ -102,7 +103,7 @@ done: return n_i; rtattr_failure: - skb_trim(skb, (u8*)r - skb->data); + nlmsg_trim(skb, r); goto done; } #endif @@ -240,7 +241,7 @@ override: if (ret != ACT_P_CREATED) return ret; - PSCHED_GET_TIME(police->tcfp_t_c); + police->tcfp_t_c = psched_get_time(); police->tcf_index = parm->index ? parm->index : tcf_hash_new_index(&police_idx_gen, &police_hash_info); h = tcf_hash(police->tcf_index, POL_TAB_MASK); @@ -295,10 +296,9 @@ static int tcf_act_police(struct sk_buff *skb, struct tc_action *a, return police->tcfp_result; } - PSCHED_GET_TIME(now); - - toks = PSCHED_TDIFF_SAFE(now, police->tcfp_t_c, - police->tcfp_burst); + now = psched_get_time(); + toks = psched_tdiff_bounded(now, police->tcfp_t_c, + police->tcfp_burst); if (police->tcfp_P_tab) { ptoks = toks + police->tcfp_ptoks; if (ptoks > (long)L2T_P(police, police->tcfp_mtu)) @@ -326,7 +326,7 @@ static int tcf_act_police(struct sk_buff *skb, struct tc_action *a, static int tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); struct tcf_police *police = a->priv; struct tc_police opt; @@ -355,7 +355,7 @@ tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) return skb->len; rtattr_failure: - skb_trim(skb, b - skb->data); + nlmsg_trim(skb, b); return -1; } @@ -494,7 +494,7 @@ struct tcf_police *tcf_police_locate(struct rtattr *rta, struct rtattr *est) } if (police->tcfp_P_tab) police->tcfp_ptoks = L2T_P(police, police->tcfp_mtu); - PSCHED_GET_TIME(police->tcfp_t_c); + police->tcfp_t_c = psched_get_time(); police->tcf_index = parm->index ? parm->index : tcf_police_new_index(); police->tcf_action = parm->action; @@ -542,9 +542,9 @@ int tcf_police(struct sk_buff *skb, struct tcf_police *police) return police->tcfp_result; } - PSCHED_GET_TIME(now); - toks = PSCHED_TDIFF_SAFE(now, police->tcfp_t_c, - police->tcfp_burst); + now = psched_get_time(); + toks = psched_tdiff_bounded(now, police->tcfp_t_c, + police->tcfp_burst); if (police->tcfp_P_tab) { ptoks = toks + police->tcfp_ptoks; if (ptoks > (long)L2T_P(police, police->tcfp_mtu)) @@ -572,7 +572,7 @@ EXPORT_SYMBOL(tcf_police); int tcf_police_dump(struct sk_buff *skb, struct tcf_police *police) { - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); struct tc_police opt; opt.index = police->tcf_index; @@ -598,7 +598,7 @@ int tcf_police_dump(struct sk_buff *skb, struct tcf_police *police) return skb->len; rtattr_failure: - skb_trim(skb, b - skb->data); + nlmsg_trim(skb, b); return -1; } diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index c7971182af07..36e1edad5990 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -16,6 +16,7 @@ #include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/rtnetlink.h> +#include <net/netlink.h> #include <net/pkt_sched.h> #define TCA_ACT_SIMP 22 @@ -155,7 +156,7 @@ static inline int tcf_simp_cleanup(struct tc_action *a, int bind) static inline int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); struct tcf_defact *d = a->priv; struct tc_defact opt; struct tcf_t t; @@ -173,7 +174,7 @@ static inline int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a, return skb->len; rtattr_failure: - skb_trim(skb, b - skb->data); + nlmsg_trim(skb, b); return -1; } diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 5c6ffdb77d2d..ebf94edf0478 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -29,9 +29,10 @@ #include <linux/interrupt.h> #include <linux/netdevice.h> #include <linux/skbuff.h> -#include <linux/rtnetlink.h> #include <linux/init.h> #include <linux/kmod.h> +#include <linux/netlink.h> +#include <net/netlink.h> #include <net/sock.h> #include <net/pkt_sched.h> #include <net/pkt_cls.h> @@ -323,7 +324,7 @@ tcf_fill_node(struct sk_buff *skb, struct tcf_proto *tp, unsigned long fh, { struct tcmsg *tcm; struct nlmsghdr *nlh; - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags); tcm = NLMSG_DATA(nlh); @@ -340,12 +341,12 @@ tcf_fill_node(struct sk_buff *skb, struct tcf_proto *tp, unsigned long fh, if (tp->ops->dump && tp->ops->dump(tp, fh, skb, tcm) < 0) goto rtattr_failure; } - nlh->nlmsg_len = skb->tail - b; + nlh->nlmsg_len = skb_tail_pointer(skb) - b; return skb->len; nlmsg_failure: rtattr_failure: - skb_trim(skb, b - skb->data); + nlmsg_trim(skb, b); return -1; } @@ -399,7 +400,6 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) if ((dev = dev_get_by_index(tcm->tcm_ifindex)) == NULL) return skb->len; - read_lock(&qdisc_tree_lock); if (!tcm->tcm_parent) q = dev->qdisc_sleeping; else @@ -456,7 +456,6 @@ errout: if (cl) cops->put(q, cl); out: - read_unlock(&qdisc_tree_lock); dev_put(dev); return skb->len; } @@ -563,30 +562,30 @@ tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts, * to work with both old and new modes of entering * tc data even if iproute2 was newer - jhs */ - struct rtattr * p_rta = (struct rtattr*) skb->tail; + struct rtattr *p_rta = (struct rtattr *)skb_tail_pointer(skb); if (exts->action->type != TCA_OLD_COMPAT) { RTA_PUT(skb, map->action, 0, NULL); if (tcf_action_dump(skb, exts->action, 0, 0) < 0) goto rtattr_failure; - p_rta->rta_len = skb->tail - (u8*)p_rta; + p_rta->rta_len = skb_tail_pointer(skb) - (u8 *)p_rta; } else if (map->police) { RTA_PUT(skb, map->police, 0, NULL); if (tcf_action_dump_old(skb, exts->action, 0, 0) < 0) goto rtattr_failure; - p_rta->rta_len = skb->tail - (u8*)p_rta; + p_rta->rta_len = skb_tail_pointer(skb) - (u8 *)p_rta; } } #elif defined CONFIG_NET_CLS_POLICE if (map->police && exts->police) { - struct rtattr * p_rta = (struct rtattr*) skb->tail; + struct rtattr *p_rta = (struct rtattr *)skb_tail_pointer(skb); RTA_PUT(skb, map->police, 0, NULL); if (tcf_police_dump(skb, exts->police) < 0) goto rtattr_failure; - p_rta->rta_len = skb->tail - (u8*)p_rta; + p_rta->rta_len = skb_tail_pointer(skb) - (u8 *)p_rta; } #endif return 0; @@ -614,18 +613,11 @@ rtattr_failure: __attribute__ ((unused)) static int __init tc_filter_init(void) { - struct rtnetlink_link *link_p = rtnetlink_links[PF_UNSPEC]; + rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_ctl_tfilter, NULL); + rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_ctl_tfilter, NULL); + rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_ctl_tfilter, + tc_dump_tfilter); - /* Setup rtnetlink links. It is made here to avoid - exporting large number of public symbols. - */ - - if (link_p) { - link_p[RTM_NEWTFILTER-RTM_BASE].doit = tc_ctl_tfilter; - link_p[RTM_DELTFILTER-RTM_BASE].doit = tc_ctl_tfilter; - link_p[RTM_GETTFILTER-RTM_BASE].doit = tc_ctl_tfilter; - link_p[RTM_GETTFILTER-RTM_BASE].dumpit = tc_dump_tfilter; - } return 0; } diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index fad08e521c24..c885412d79d5 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -17,6 +17,7 @@ #include <linux/errno.h> #include <linux/rtnetlink.h> #include <linux/skbuff.h> +#include <net/netlink.h> #include <net/act_api.h> #include <net/pkt_cls.h> @@ -81,6 +82,13 @@ static void basic_put(struct tcf_proto *tp, unsigned long f) static int basic_init(struct tcf_proto *tp) { + struct basic_head *head; + + head = kzalloc(sizeof(*head), GFP_KERNEL); + if (head == NULL) + return -ENOBUFS; + INIT_LIST_HEAD(&head->flist); + tp->root = head; return 0; } @@ -102,6 +110,7 @@ static void basic_destroy(struct tcf_proto *tp) list_del(&f->link); basic_delete_filter(tp, f); } + kfree(head); } static int basic_delete(struct tcf_proto *tp, unsigned long arg) @@ -176,15 +185,6 @@ static int basic_change(struct tcf_proto *tp, unsigned long base, u32 handle, } err = -ENOBUFS; - if (head == NULL) { - head = kzalloc(sizeof(*head), GFP_KERNEL); - if (head == NULL) - goto errout; - - INIT_LIST_HEAD(&head->flist); - tp->root = head; - } - f = kzalloc(sizeof(*f), GFP_KERNEL); if (f == NULL) goto errout; @@ -246,7 +246,7 @@ static int basic_dump(struct tcf_proto *tp, unsigned long fh, struct sk_buff *skb, struct tcmsg *t) { struct basic_filter *f = (struct basic_filter *) fh; - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); struct rtattr *rta; if (f == NULL) @@ -264,11 +264,11 @@ static int basic_dump(struct tcf_proto *tp, unsigned long fh, tcf_em_tree_dump(skb, &f->ematches, TCA_BASIC_EMATCHES) < 0) goto rtattr_failure; - rta->rta_len = (skb->tail - b); + rta->rta_len = skb_tail_pointer(skb) - b; return skb->len; rtattr_failure: - skb_trim(skb, b - skb->data); + nlmsg_trim(skb, b); return -1; } diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index 5dbb9d451f73..bbec4a0d4dcb 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -38,6 +38,7 @@ #include <linux/notifier.h> #include <linux/netfilter.h> #include <net/ip.h> +#include <net/netlink.h> #include <net/route.h> #include <linux/skbuff.h> #include <net/sock.h> @@ -348,7 +349,7 @@ static int fw_dump(struct tcf_proto *tp, unsigned long fh, { struct fw_head *head = (struct fw_head *)tp->root; struct fw_filter *f = (struct fw_filter*)fh; - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); struct rtattr *rta; if (f == NULL) @@ -374,7 +375,7 @@ static int fw_dump(struct tcf_proto *tp, unsigned long fh, if (tcf_exts_dump(skb, &f->exts, &fw_ext_map) < 0) goto rtattr_failure; - rta->rta_len = skb->tail - b; + rta->rta_len = skb_tail_pointer(skb) - b; if (tcf_exts_dump_stats(skb, &f->exts, &fw_ext_map) < 0) goto rtattr_failure; @@ -382,7 +383,7 @@ static int fw_dump(struct tcf_proto *tp, unsigned long fh, return skb->len; rtattr_failure: - skb_trim(skb, b - skb->data); + nlmsg_trim(skb, b); return -1; } diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index abc47cc48ad0..cc941d0ee3a5 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -28,6 +28,7 @@ #include <linux/etherdevice.h> #include <linux/notifier.h> #include <net/ip.h> +#include <net/netlink.h> #include <net/route.h> #include <linux/skbuff.h> #include <net/sock.h> @@ -88,9 +89,9 @@ static __inline__ int route4_fastmap_hash(u32 id, int iif) static inline void route4_reset_fastmap(struct net_device *dev, struct route4_head *head, u32 id) { - spin_lock_bh(&dev->queue_lock); + qdisc_lock_tree(dev); memset(head->fastmap, 0, sizeof(head->fastmap)); - spin_unlock_bh(&dev->queue_lock); + qdisc_unlock_tree(dev); } static inline void @@ -562,7 +563,7 @@ static int route4_dump(struct tcf_proto *tp, unsigned long fh, struct sk_buff *skb, struct tcmsg *t) { struct route4_filter *f = (struct route4_filter*)fh; - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); struct rtattr *rta; u32 id; @@ -591,7 +592,7 @@ static int route4_dump(struct tcf_proto *tp, unsigned long fh, if (tcf_exts_dump(skb, &f->exts, &route_ext_map) < 0) goto rtattr_failure; - rta->rta_len = skb->tail - b; + rta->rta_len = skb_tail_pointer(skb) - b; if (tcf_exts_dump_stats(skb, &f->exts, &route_ext_map) < 0) goto rtattr_failure; @@ -599,7 +600,7 @@ static int route4_dump(struct tcf_proto *tp, unsigned long fh, return skb->len; rtattr_failure: - skb_trim(skb, b - skb->data); + nlmsg_trim(skb, b); return -1; } diff --git a/net/sched/cls_rsvp.c b/net/sched/cls_rsvp.c index 1d4a1fb17608..0a683c07c648 100644 --- a/net/sched/cls_rsvp.c +++ b/net/sched/cls_rsvp.c @@ -31,6 +31,7 @@ #include <net/route.h> #include <linux/skbuff.h> #include <net/sock.h> +#include <net/netlink.h> #include <net/act_api.h> #include <net/pkt_cls.h> diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index 7853621a04cc..22f9ede70e8f 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -143,9 +143,9 @@ static int rsvp_classify(struct sk_buff *skb, struct tcf_proto *tp, u8 tunnelid = 0; u8 *xprt; #if RSVP_DST_LEN == 4 - struct ipv6hdr *nhptr = skb->nh.ipv6h; + struct ipv6hdr *nhptr = ipv6_hdr(skb); #else - struct iphdr *nhptr = skb->nh.iph; + struct iphdr *nhptr = ip_hdr(skb); #endif restart: @@ -160,7 +160,7 @@ restart: dst = &nhptr->daddr; protocol = nhptr->protocol; xprt = ((u8*)nhptr) + (nhptr->ihl<<2); - if (nhptr->frag_off&__constant_htons(IP_MF|IP_OFFSET)) + if (nhptr->frag_off & htons(IP_MF|IP_OFFSET)) return -1; #endif @@ -593,7 +593,7 @@ static int rsvp_dump(struct tcf_proto *tp, unsigned long fh, { struct rsvp_filter *f = (struct rsvp_filter*)fh; struct rsvp_session *s; - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); struct rtattr *rta; struct tc_rsvp_pinfo pinfo; @@ -623,14 +623,14 @@ static int rsvp_dump(struct tcf_proto *tp, unsigned long fh, if (tcf_exts_dump(skb, &f->exts, &rsvp_ext_map) < 0) goto rtattr_failure; - rta->rta_len = skb->tail - b; + rta->rta_len = skb_tail_pointer(skb) - b; if (tcf_exts_dump_stats(skb, &f->exts, &rsvp_ext_map) < 0) goto rtattr_failure; return skb->len; rtattr_failure: - skb_trim(skb, b - skb->data); + nlmsg_trim(skb, b); return -1; } diff --git a/net/sched/cls_rsvp6.c b/net/sched/cls_rsvp6.c index a2979d89798f..93b6abed57db 100644 --- a/net/sched/cls_rsvp6.c +++ b/net/sched/cls_rsvp6.c @@ -34,6 +34,7 @@ #include <net/sock.h> #include <net/act_api.h> #include <net/pkt_cls.h> +#include <net/netlink.h> #define RSVP_DST_LEN 4 #define RSVP_ID "rsvp6" diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 040e2d2d281a..47ac0c556429 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -12,6 +12,7 @@ #include <linux/netdevice.h> #include <net/ip.h> #include <net/act_api.h> +#include <net/netlink.h> #include <net/pkt_cls.h> #include <net/route.h> @@ -245,9 +246,9 @@ tcindex_set_parms(struct tcf_proto *tp, unsigned long base, u32 handle, } if (tb[TCA_TCINDEX_SHIFT-1]) { - if (RTA_PAYLOAD(tb[TCA_TCINDEX_SHIFT-1]) < sizeof(u16)) + if (RTA_PAYLOAD(tb[TCA_TCINDEX_SHIFT-1]) < sizeof(int)) goto errout; - cp.shift = *(u16 *) RTA_DATA(tb[TCA_TCINDEX_SHIFT-1]); + cp.shift = *(int *) RTA_DATA(tb[TCA_TCINDEX_SHIFT-1]); } err = -EBUSY; @@ -448,7 +449,7 @@ static int tcindex_dump(struct tcf_proto *tp, unsigned long fh, { struct tcindex_data *p = PRIV(tp); struct tcindex_filter_result *r = (struct tcindex_filter_result *) fh; - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); struct rtattr *rta; DPRINTK("tcindex_dump(tp %p,fh 0x%lx,skb %p,t %p),p %p,r %p,b %p\n", @@ -463,7 +464,7 @@ static int tcindex_dump(struct tcf_proto *tp, unsigned long fh, RTA_PUT(skb,TCA_TCINDEX_SHIFT,sizeof(p->shift),&p->shift); RTA_PUT(skb,TCA_TCINDEX_FALL_THROUGH,sizeof(p->fall_through), &p->fall_through); - rta->rta_len = skb->tail-b; + rta->rta_len = skb_tail_pointer(skb) - b; } else { if (p->perfect) { t->tcm_handle = r-p->perfect; @@ -486,7 +487,7 @@ static int tcindex_dump(struct tcf_proto *tp, unsigned long fh, if (tcf_exts_dump(skb, &r->exts, &tcindex_ext_map) < 0) goto rtattr_failure; - rta->rta_len = skb->tail-b; + rta->rta_len = skb_tail_pointer(skb) - b; if (tcf_exts_dump_stats(skb, &r->exts, &tcindex_ext_map) < 0) goto rtattr_failure; @@ -495,7 +496,7 @@ static int tcindex_dump(struct tcf_proto *tp, unsigned long fh, return skb->len; rtattr_failure: - skb_trim(skb, b - skb->data); + nlmsg_trim(skb, b); return -1; } diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 0bcb16928d25..c7a347bd6d70 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -50,6 +50,7 @@ #include <linux/notifier.h> #include <linux/rtnetlink.h> #include <net/ip.h> +#include <net/netlink.h> #include <net/route.h> #include <linux/skbuff.h> #include <net/sock.h> @@ -119,7 +120,7 @@ static int u32_classify(struct sk_buff *skb, struct tcf_proto *tp, struct tcf_re } stack[TC_U32_MAXDEPTH]; struct tc_u_hnode *ht = (struct tc_u_hnode*)tp->root; - u8 *ptr = skb->nh.raw; + u8 *ptr = skb_network_header(skb); struct tc_u_knode *n; int sdepth = 0; int off2 = 0; @@ -213,7 +214,7 @@ check_terminal: off2 = 0; } - if (ptr < skb->tail) + if (ptr < skb_tail_pointer(skb)) goto next_ht; } @@ -435,7 +436,7 @@ static void u32_destroy(struct tcf_proto *tp) BUG_TRAP(ht->refcnt == 0); kfree(ht); - }; + } kfree(tp_c); } @@ -718,7 +719,7 @@ static int u32_dump(struct tcf_proto *tp, unsigned long fh, struct sk_buff *skb, struct tcmsg *t) { struct tc_u_knode *n = (struct tc_u_knode*)fh; - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); struct rtattr *rta; if (n == NULL) @@ -765,14 +766,14 @@ static int u32_dump(struct tcf_proto *tp, unsigned long fh, #endif } - rta->rta_len = skb->tail - b; + rta->rta_len = skb_tail_pointer(skb) - b; if (TC_U32_KEY(n->handle)) if (tcf_exts_dump_stats(skb, &n->exts, &u32_ext_map) < 0) goto rtattr_failure; return skb->len; rtattr_failure: - skb_trim(skb, b - skb->data); + nlmsg_trim(skb, b); return -1; } diff --git a/net/sched/em_u32.c b/net/sched/em_u32.c index cd0600c67969..0a2a7fe08de3 100644 --- a/net/sched/em_u32.c +++ b/net/sched/em_u32.c @@ -22,7 +22,7 @@ static int em_u32_match(struct sk_buff *skb, struct tcf_ematch *em, struct tcf_pkt_info *info) { struct tc_u32_key *key = (struct tc_u32_key *) em->data; - unsigned char *ptr = skb->nh.raw; + const unsigned char *ptr = skb_network_header(skb); if (info) { if (info->ptr) diff --git a/net/sched/ematch.c b/net/sched/ematch.c index 959c306c5714..63146d339d81 100644 --- a/net/sched/ematch.c +++ b/net/sched/ematch.c @@ -418,17 +418,19 @@ void tcf_em_tree_destroy(struct tcf_proto *tp, struct tcf_ematch_tree *tree) int tcf_em_tree_dump(struct sk_buff *skb, struct tcf_ematch_tree *tree, int tlv) { int i; - struct rtattr * top_start = (struct rtattr*) skb->tail; - struct rtattr * list_start; + u8 *tail; + struct rtattr *top_start = (struct rtattr *)skb_tail_pointer(skb); + struct rtattr *list_start; RTA_PUT(skb, tlv, 0, NULL); RTA_PUT(skb, TCA_EMATCH_TREE_HDR, sizeof(tree->hdr), &tree->hdr); - list_start = (struct rtattr *) skb->tail; + list_start = (struct rtattr *)skb_tail_pointer(skb); RTA_PUT(skb, TCA_EMATCH_TREE_LIST, 0, NULL); + tail = skb_tail_pointer(skb); for (i = 0; i < tree->hdr.nmatches; i++) { - struct rtattr *match_start = (struct rtattr*) skb->tail; + struct rtattr *match_start = (struct rtattr *)tail; struct tcf_ematch *em = tcf_em_get_match(tree, i); struct tcf_ematch_hdr em_hdr = { .kind = em->ops ? em->ops->kind : TCF_EM_CONTAINER, @@ -447,11 +449,12 @@ int tcf_em_tree_dump(struct sk_buff *skb, struct tcf_ematch_tree *tree, int tlv) } else if (em->datalen > 0) RTA_PUT_NOHDR(skb, em->datalen, (void *) em->data); - match_start->rta_len = skb->tail - (u8*) match_start; + tail = skb_tail_pointer(skb); + match_start->rta_len = tail - (u8 *)match_start; } - list_start->rta_len = skb->tail - (u8 *) list_start; - top_start->rta_len = skb->tail - (u8 *) top_start; + list_start->rta_len = tail - (u8 *)list_start; + top_start->rta_len = tail - (u8 *)top_start; return 0; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index ecc988af4a9a..8699e7006d80 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -27,14 +27,15 @@ #include <linux/interrupt.h> #include <linux/netdevice.h> #include <linux/skbuff.h> -#include <linux/rtnetlink.h> #include <linux/init.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/kmod.h> #include <linux/list.h> #include <linux/bitops.h> +#include <linux/hrtimer.h> +#include <net/netlink.h> #include <net/sock.h> #include <net/pkt_sched.h> @@ -190,7 +191,7 @@ int unregister_qdisc(struct Qdisc_ops *qops) (root qdisc, all its children, children of children etc.) */ -static struct Qdisc *__qdisc_lookup(struct net_device *dev, u32 handle) +struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle) { struct Qdisc *q; @@ -201,16 +202,6 @@ static struct Qdisc *__qdisc_lookup(struct net_device *dev, u32 handle) return NULL; } -struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle) -{ - struct Qdisc *q; - - read_lock(&qdisc_tree_lock); - q = __qdisc_lookup(dev, handle); - read_unlock(&qdisc_tree_lock); - return q; -} - static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid) { unsigned long cl; @@ -291,6 +282,48 @@ void qdisc_put_rtab(struct qdisc_rate_table *tab) } } +static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer) +{ + struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog, + timer); + struct net_device *dev = wd->qdisc->dev; + + wd->qdisc->flags &= ~TCQ_F_THROTTLED; + smp_wmb(); + if (spin_trylock(&dev->queue_lock)) { + qdisc_run(dev); + spin_unlock(&dev->queue_lock); + } else + netif_schedule(dev); + + return HRTIMER_NORESTART; +} + +void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc) +{ + hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); + wd->timer.function = qdisc_watchdog; + wd->qdisc = qdisc; +} +EXPORT_SYMBOL(qdisc_watchdog_init); + +void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires) +{ + ktime_t time; + + wd->qdisc->flags |= TCQ_F_THROTTLED; + time = ktime_set(0, 0); + time = ktime_add_ns(time, PSCHED_US2NS(expires)); + hrtimer_start(&wd->timer, time, HRTIMER_MODE_ABS); +} +EXPORT_SYMBOL(qdisc_watchdog_schedule); + +void qdisc_watchdog_cancel(struct qdisc_watchdog *wd) +{ + hrtimer_cancel(&wd->timer); + wd->qdisc->flags &= ~TCQ_F_THROTTLED; +} +EXPORT_SYMBOL(qdisc_watchdog_cancel); /* Allocate an unique handle from space managed by kernel */ @@ -362,7 +395,7 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n) if (n == 0) return; while ((parentid = sch->parent)) { - sch = __qdisc_lookup(sch->dev, TC_H_MAJ(parentid)); + sch = qdisc_lookup(sch->dev, TC_H_MAJ(parentid)); cops = sch->ops->cl_ops; if (cops->qlen_notify) { cl = cops->get(sch, parentid); @@ -467,12 +500,16 @@ qdisc_create(struct net_device *dev, u32 handle, struct rtattr **tca, int *errp) if (handle == TC_H_INGRESS) { sch->flags |= TCQ_F_INGRESS; + sch->stats_lock = &dev->ingress_lock; handle = TC_H_MAKE(TC_H_INGRESS, 0); - } else if (handle == 0) { - handle = qdisc_alloc_handle(dev); - err = -ENOMEM; - if (handle == 0) - goto err_out3; + } else { + sch->stats_lock = &dev->queue_lock; + if (handle == 0) { + handle = qdisc_alloc_handle(dev); + err = -ENOMEM; + if (handle == 0) + goto err_out3; + } } sch->handle = handle; @@ -621,9 +658,9 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) return err; if (q) { qdisc_notify(skb, n, clid, q, NULL); - spin_lock_bh(&dev->queue_lock); + qdisc_lock_tree(dev); qdisc_destroy(q); - spin_unlock_bh(&dev->queue_lock); + qdisc_unlock_tree(dev); } } else { qdisc_notify(skb, n, clid, NULL, q); @@ -756,17 +793,17 @@ graft: err = qdisc_graft(dev, p, clid, q, &old_q); if (err) { if (q) { - spin_lock_bh(&dev->queue_lock); + qdisc_lock_tree(dev); qdisc_destroy(q); - spin_unlock_bh(&dev->queue_lock); + qdisc_unlock_tree(dev); } return err; } qdisc_notify(skb, n, clid, old_q, q); if (old_q) { - spin_lock_bh(&dev->queue_lock); + qdisc_lock_tree(dev); qdisc_destroy(old_q); - spin_unlock_bh(&dev->queue_lock); + qdisc_unlock_tree(dev); } } return 0; @@ -777,7 +814,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, { struct tcmsg *tcm; struct nlmsghdr *nlh; - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); struct gnet_dump d; nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags); @@ -811,12 +848,12 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, if (gnet_stats_finish_copy(&d) < 0) goto rtattr_failure; - nlh->nlmsg_len = skb->tail - b; + nlh->nlmsg_len = skb_tail_pointer(skb) - b; return skb->len; nlmsg_failure: rtattr_failure: - skb_trim(skb, b - skb->data); + nlmsg_trim(skb, b); return -1; } @@ -862,7 +899,6 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) continue; if (idx > s_idx) s_q_idx = 0; - read_lock(&qdisc_tree_lock); q_idx = 0; list_for_each_entry(q, &dev->qdisc_list, list) { if (q_idx < s_q_idx) { @@ -870,13 +906,10 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) continue; } if (tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) { - read_unlock(&qdisc_tree_lock); + cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) goto done; - } q_idx++; } - read_unlock(&qdisc_tree_lock); } done: @@ -1015,7 +1048,7 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q, { struct tcmsg *tcm; struct nlmsghdr *nlh; - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); struct gnet_dump d; struct Qdisc_class_ops *cl_ops = q->ops->cl_ops; @@ -1040,12 +1073,12 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q, if (gnet_stats_finish_copy(&d) < 0) goto rtattr_failure; - nlh->nlmsg_len = skb->tail - b; + nlh->nlmsg_len = skb_tail_pointer(skb) - b; return skb->len; nlmsg_failure: rtattr_failure: - skb_trim(skb, b - skb->data); + nlmsg_trim(skb, b); return -1; } @@ -1099,7 +1132,6 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) s_t = cb->args[0]; t = 0; - read_lock(&qdisc_tree_lock); list_for_each_entry(q, &dev->qdisc_list, list) { if (t < s_t || !q->ops->cl_ops || (tcm->tcm_parent && @@ -1121,7 +1153,6 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) break; t++; } - read_unlock(&qdisc_tree_lock); cb->args[0] = t; @@ -1146,7 +1177,7 @@ reclassify: for ( ; tp; tp = tp->next) { if ((tp->protocol == protocol || - tp->protocol == __constant_htons(ETH_P_ALL)) && + tp->protocol == htons(ETH_P_ALL)) && (err = tp->classify(skb, tp, res)) >= 0) { #ifdef CONFIG_NET_CLS_ACT if ( TC_ACT_RECLASSIFY == err) { @@ -1175,15 +1206,31 @@ reclassify: return -1; } -static int psched_us_per_tick = 1; -static int psched_tick_per_us = 1; +void tcf_destroy(struct tcf_proto *tp) +{ + tp->ops->destroy(tp); + module_put(tp->ops->owner); + kfree(tp); +} + +void tcf_destroy_chain(struct tcf_proto *fl) +{ + struct tcf_proto *tp; + + while ((tp = fl) != NULL) { + fl = tp->next; + tcf_destroy(tp); + } +} +EXPORT_SYMBOL(tcf_destroy_chain); #ifdef CONFIG_PROC_FS static int psched_show(struct seq_file *seq, void *v) { seq_printf(seq, "%08x %08x %08x %08x\n", - psched_tick_per_us, psched_us_per_tick, - 1000000, HZ); + (u32)NSEC_PER_USEC, (u32)PSCHED_US2NS(1), + 1000000, + (u32)NSEC_PER_SEC/(u32)ktime_to_ns(KTIME_MONOTONIC_RES)); return 0; } @@ -1202,101 +1249,19 @@ static const struct file_operations psched_fops = { }; #endif -#ifdef CONFIG_NET_SCH_CLK_CPU -psched_tdiff_t psched_clock_per_hz; -int psched_clock_scale; -EXPORT_SYMBOL(psched_clock_per_hz); -EXPORT_SYMBOL(psched_clock_scale); - -psched_time_t psched_time_base; -cycles_t psched_time_mark; -EXPORT_SYMBOL(psched_time_mark); -EXPORT_SYMBOL(psched_time_base); - -/* - * Periodically adjust psched_time_base to avoid overflow - * with 32-bit get_cycles(). Safe up to 4GHz CPU. - */ -static void psched_tick(unsigned long); -static DEFINE_TIMER(psched_timer, psched_tick, 0, 0); - -static void psched_tick(unsigned long dummy) -{ - if (sizeof(cycles_t) == sizeof(u32)) { - psched_time_t dummy_stamp; - PSCHED_GET_TIME(dummy_stamp); - psched_timer.expires = jiffies + 1*HZ; - add_timer(&psched_timer); - } -} - -int __init psched_calibrate_clock(void) -{ - psched_time_t stamp, stamp1; - struct timeval tv, tv1; - psched_tdiff_t delay; - long rdelay; - unsigned long stop; - - psched_tick(0); - stop = jiffies + HZ/10; - PSCHED_GET_TIME(stamp); - do_gettimeofday(&tv); - while (time_before(jiffies, stop)) { - barrier(); - cpu_relax(); - } - PSCHED_GET_TIME(stamp1); - do_gettimeofday(&tv1); - - delay = PSCHED_TDIFF(stamp1, stamp); - rdelay = tv1.tv_usec - tv.tv_usec; - rdelay += (tv1.tv_sec - tv.tv_sec)*1000000; - if (rdelay > delay) - return -1; - delay /= rdelay; - psched_tick_per_us = delay; - while ((delay>>=1) != 0) - psched_clock_scale++; - psched_us_per_tick = 1<<psched_clock_scale; - psched_clock_per_hz = (psched_tick_per_us*(1000000/HZ))>>psched_clock_scale; - return 0; -} -#endif - static int __init pktsched_init(void) { - struct rtnetlink_link *link_p; - -#ifdef CONFIG_NET_SCH_CLK_CPU - if (psched_calibrate_clock() < 0) - return -1; -#elif defined(CONFIG_NET_SCH_CLK_JIFFIES) - psched_tick_per_us = HZ<<PSCHED_JSCALE; - psched_us_per_tick = 1000000; -#endif - - link_p = rtnetlink_links[PF_UNSPEC]; - - /* Setup rtnetlink links. It is made here to avoid - exporting large number of public symbols. - */ - - if (link_p) { - link_p[RTM_NEWQDISC-RTM_BASE].doit = tc_modify_qdisc; - link_p[RTM_DELQDISC-RTM_BASE].doit = tc_get_qdisc; - link_p[RTM_GETQDISC-RTM_BASE].doit = tc_get_qdisc; - link_p[RTM_GETQDISC-RTM_BASE].dumpit = tc_dump_qdisc; - link_p[RTM_NEWTCLASS-RTM_BASE].doit = tc_ctl_tclass; - link_p[RTM_DELTCLASS-RTM_BASE].doit = tc_ctl_tclass; - link_p[RTM_GETTCLASS-RTM_BASE].doit = tc_ctl_tclass; - link_p[RTM_GETTCLASS-RTM_BASE].dumpit = tc_dump_tclass; - } - register_qdisc(&pfifo_qdisc_ops); register_qdisc(&bfifo_qdisc_ops); proc_net_fops_create("psched", 0, &psched_fops); + rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL); + rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL); + rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc); + rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL); + rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL); + rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass); + return 0; } diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index afb3bbd571f2..be7d299acd73 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -14,6 +14,7 @@ #include <linux/netdevice.h> #include <linux/rtnetlink.h> #include <linux/file.h> /* for fput */ +#include <net/netlink.h> #include <net/pkt_sched.h> #include <net/sock.h> @@ -157,19 +158,6 @@ static unsigned long atm_tc_bind_filter(struct Qdisc *sch, return atm_tc_get(sch,classid); } - -static void destroy_filters(struct atm_flow_data *flow) -{ - struct tcf_proto *filter; - - while ((filter = flow->filter_list)) { - DPRINTK("destroy_filters: destroying filter %p\n",filter); - flow->filter_list = filter->next; - tcf_destroy(filter); - } -} - - /* * atm_tc_put handles all destructions, including the ones that are explicitly * requested (atm_tc_destroy, etc.). The assumption here is that we never drop @@ -194,7 +182,7 @@ static void atm_tc_put(struct Qdisc *sch, unsigned long cl) *prev = flow->next; DPRINTK("atm_tc_put: qdisc %p\n",flow->q); qdisc_destroy(flow->q); - destroy_filters(flow); + tcf_destroy_chain(flow->filter_list); if (flow->sock) { DPRINTK("atm_tc_put: f_count %d\n", file_count(flow->sock->file)); @@ -503,7 +491,7 @@ static void sch_atm_dequeue(unsigned long data) } D2PRINTK("atm_tc_dequeue: sending on class %p\n",flow); /* remove any LL header somebody else has attached */ - skb_pull(skb,(char *) skb->nh.iph-(char *) skb->data); + skb_pull(skb, skb_network_offset(skb)); if (skb_headroom(skb) < flow->hdr_len) { struct sk_buff *new; @@ -513,7 +501,7 @@ static void sch_atm_dequeue(unsigned long data) skb = new; } D2PRINTK("sch_atm_dequeue: ip %p, data %p\n", - skb->nh.iph,skb->data); + skb_network_header(skb), skb->data); ATM_SKB(skb)->vcc = flow->vcc; memcpy(skb_push(skb,flow->hdr_len),flow->hdr, flow->hdr_len); @@ -610,7 +598,7 @@ static void atm_tc_destroy(struct Qdisc *sch) DPRINTK("atm_tc_destroy(sch %p,[qdisc %p])\n",sch,p); /* races ? */ while ((flow = p->flows)) { - destroy_filters(flow); + tcf_destroy_chain(flow->filter_list); if (flow->ref > 1) printk(KERN_ERR "atm_destroy: %p->ref = %d\n",flow, flow->ref); @@ -631,7 +619,7 @@ static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl, { struct atm_qdisc_data *p = PRIV(sch); struct atm_flow_data *flow = (struct atm_flow_data *) cl; - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); struct rtattr *rta; DPRINTK("atm_tc_dump_class(sch %p,[qdisc %p],flow %p,skb %p,tcm %p)\n", @@ -661,11 +649,11 @@ static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl, RTA_PUT(skb,TCA_ATM_EXCESS,sizeof(zero),&zero); } - rta->rta_len = skb->tail-b; + rta->rta_len = skb_tail_pointer(skb) - b; return skb->len; rtattr_failure: - skb_trim(skb,b-skb->data); + nlmsg_trim(skb, b); return -1; } static int diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 76c92e710a33..a294542cb8e4 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -29,6 +29,7 @@ #include <linux/etherdevice.h> #include <linux/notifier.h> #include <net/ip.h> +#include <net/netlink.h> #include <net/route.h> #include <linux/skbuff.h> #include <net/sock.h> @@ -112,7 +113,7 @@ struct cbq_class /* Overlimit strategy parameters */ void (*overlimit)(struct cbq_class *cl); - long penalty; + psched_tdiff_t penalty; /* General scheduler (WRR) parameters */ long allot; @@ -143,7 +144,7 @@ struct cbq_class psched_time_t undertime; long avgidle; long deficit; /* Saved deficit for WRR */ - unsigned long penalized; + psched_time_t penalized; struct gnet_stats_basic bstats; struct gnet_stats_queue qstats; struct gnet_stats_rate_est rate_est; @@ -180,12 +181,12 @@ struct cbq_sched_data psched_time_t now_rt; /* Cached real time */ unsigned pmask; - struct timer_list delay_timer; - struct timer_list wd_timer; /* Watchdog timer, + struct hrtimer delay_timer; + struct qdisc_watchdog watchdog; /* Watchdog timer, started when CBQ has backlog, but cannot transmit just now */ - long wd_expires; + psched_tdiff_t wd_expires; int toplevel; u32 hgenerator; }; @@ -384,12 +385,12 @@ cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl) psched_time_t now; psched_tdiff_t incr; - PSCHED_GET_TIME(now); - incr = PSCHED_TDIFF(now, q->now_rt); - PSCHED_TADD2(q->now, incr, now); + now = psched_get_time(); + incr = now - q->now_rt; + now = q->now + incr; do { - if (PSCHED_TLESS(cl->undertime, now)) { + if (cl->undertime < now) { q->toplevel = cl->level; return; } @@ -473,7 +474,7 @@ cbq_requeue(struct sk_buff *skb, struct Qdisc *sch) static void cbq_ovl_classic(struct cbq_class *cl) { struct cbq_sched_data *q = qdisc_priv(cl->qdisc); - psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now); + psched_tdiff_t delay = cl->undertime - q->now; if (!cl->delayed) { delay += cl->offtime; @@ -491,7 +492,7 @@ static void cbq_ovl_classic(struct cbq_class *cl) cl->avgidle = cl->minidle; if (delay <= 0) delay = 1; - PSCHED_TADD2(q->now, delay, cl->undertime); + cl->undertime = q->now + delay; cl->xstats.overactions++; cl->delayed = 1; @@ -508,7 +509,7 @@ static void cbq_ovl_classic(struct cbq_class *cl) psched_tdiff_t base_delay = q->wd_expires; for (b = cl->borrow; b; b = b->borrow) { - delay = PSCHED_TDIFF(b->undertime, q->now); + delay = b->undertime - q->now; if (delay < base_delay) { if (delay <= 0) delay = 1; @@ -546,27 +547,32 @@ static void cbq_ovl_rclassic(struct cbq_class *cl) static void cbq_ovl_delay(struct cbq_class *cl) { struct cbq_sched_data *q = qdisc_priv(cl->qdisc); - psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now); + psched_tdiff_t delay = cl->undertime - q->now; if (!cl->delayed) { - unsigned long sched = jiffies; + psched_time_t sched = q->now; + ktime_t expires; delay += cl->offtime; if (cl->avgidle < 0) delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log); if (cl->avgidle < cl->minidle) cl->avgidle = cl->minidle; - PSCHED_TADD2(q->now, delay, cl->undertime); + cl->undertime = q->now + delay; if (delay > 0) { - sched += PSCHED_US2JIFFIE(delay) + cl->penalty; + sched += delay + cl->penalty; cl->penalized = sched; cl->cpriority = TC_CBQ_MAXPRIO; q->pmask |= (1<<TC_CBQ_MAXPRIO); - if (del_timer(&q->delay_timer) && - (long)(q->delay_timer.expires - sched) > 0) - q->delay_timer.expires = sched; - add_timer(&q->delay_timer); + + expires = ktime_set(0, 0); + expires = ktime_add_ns(expires, PSCHED_US2NS(sched)); + if (hrtimer_try_to_cancel(&q->delay_timer) && + ktime_to_ns(ktime_sub(q->delay_timer.expires, + expires)) > 0) + q->delay_timer.expires = expires; + hrtimer_restart(&q->delay_timer); cl->delayed = 1; cl->xstats.overactions++; return; @@ -583,7 +589,7 @@ static void cbq_ovl_lowprio(struct cbq_class *cl) { struct cbq_sched_data *q = qdisc_priv(cl->qdisc); - cl->penalized = jiffies + cl->penalty; + cl->penalized = q->now + cl->penalty; if (cl->cpriority != cl->priority2) { cl->cpriority = cl->priority2; @@ -604,27 +610,19 @@ static void cbq_ovl_drop(struct cbq_class *cl) cbq_ovl_classic(cl); } -static void cbq_watchdog(unsigned long arg) -{ - struct Qdisc *sch = (struct Qdisc*)arg; - - sch->flags &= ~TCQ_F_THROTTLED; - netif_schedule(sch->dev); -} - -static unsigned long cbq_undelay_prio(struct cbq_sched_data *q, int prio) +static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio, + psched_time_t now) { struct cbq_class *cl; struct cbq_class *cl_prev = q->active[prio]; - unsigned long now = jiffies; - unsigned long sched = now; + psched_time_t sched = now; if (cl_prev == NULL) - return now; + return 0; do { cl = cl_prev->next_alive; - if ((long)(now - cl->penalized) > 0) { + if (now - cl->penalized > 0) { cl_prev->next_alive = cl->next_alive; cl->next_alive = NULL; cl->cpriority = cl->priority; @@ -640,30 +638,34 @@ static unsigned long cbq_undelay_prio(struct cbq_sched_data *q, int prio) } cl = cl_prev->next_alive; - } else if ((long)(sched - cl->penalized) > 0) + } else if (sched - cl->penalized > 0) sched = cl->penalized; } while ((cl_prev = cl) != q->active[prio]); - return (long)(sched - now); + return sched - now; } -static void cbq_undelay(unsigned long arg) +static enum hrtimer_restart cbq_undelay(struct hrtimer *timer) { - struct Qdisc *sch = (struct Qdisc*)arg; - struct cbq_sched_data *q = qdisc_priv(sch); - long delay = 0; + struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data, + delay_timer); + struct Qdisc *sch = q->watchdog.qdisc; + psched_time_t now; + psched_tdiff_t delay = 0; unsigned pmask; + now = psched_get_time(); + pmask = q->pmask; q->pmask = 0; while (pmask) { int prio = ffz(~pmask); - long tmp; + psched_tdiff_t tmp; pmask &= ~(1<<prio); - tmp = cbq_undelay_prio(q, prio); + tmp = cbq_undelay_prio(q, prio, now); if (tmp > 0) { q->pmask |= 1<<prio; if (tmp < delay || delay == 0) @@ -672,12 +674,16 @@ static void cbq_undelay(unsigned long arg) } if (delay) { - q->delay_timer.expires = jiffies + delay; - add_timer(&q->delay_timer); + ktime_t time; + + time = ktime_set(0, 0); + time = ktime_add_ns(time, PSCHED_US2NS(now + delay)); + hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS); } sch->flags &= ~TCQ_F_THROTTLED; netif_schedule(sch->dev); + return HRTIMER_NORESTART; } @@ -732,7 +738,7 @@ cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl, if (cl && q->toplevel >= borrowed->level) { if (cl->q->q.qlen > 1) { do { - if (PSCHED_IS_PASTPERFECT(borrowed->undertime)) { + if (borrowed->undertime == PSCHED_PASTPERFECT) { q->toplevel = borrowed->level; return; } @@ -770,7 +776,7 @@ cbq_update(struct cbq_sched_data *q) idle = (now - last) - last_pktlen/rate */ - idle = PSCHED_TDIFF(q->now, cl->last); + idle = q->now - cl->last; if ((unsigned long)idle > 128*1024*1024) { avgidle = cl->maxidle; } else { @@ -814,13 +820,11 @@ cbq_update(struct cbq_sched_data *q) idle -= L2T(&q->link, len); idle += L2T(cl, len); - PSCHED_AUDIT_TDIFF(idle); - - PSCHED_TADD2(q->now, idle, cl->undertime); + cl->undertime = q->now + idle; } else { /* Underlimit */ - PSCHED_SET_PASTPERFECT(cl->undertime); + cl->undertime = PSCHED_PASTPERFECT; if (avgidle > cl->maxidle) cl->avgidle = cl->maxidle; else @@ -841,8 +845,7 @@ cbq_under_limit(struct cbq_class *cl) if (cl->tparent == NULL) return cl; - if (PSCHED_IS_PASTPERFECT(cl->undertime) || - !PSCHED_TLESS(q->now, cl->undertime)) { + if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) { cl->delayed = 0; return cl; } @@ -865,8 +868,7 @@ cbq_under_limit(struct cbq_class *cl) } if (cl->level > q->toplevel) return NULL; - } while (!PSCHED_IS_PASTPERFECT(cl->undertime) && - PSCHED_TLESS(q->now, cl->undertime)); + } while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime); cl->delayed = 0; return cl; @@ -1001,8 +1003,8 @@ cbq_dequeue(struct Qdisc *sch) psched_time_t now; psched_tdiff_t incr; - PSCHED_GET_TIME(now); - incr = PSCHED_TDIFF(now, q->now_rt); + now = psched_get_time(); + incr = now - q->now_rt; if (q->tx_class) { psched_tdiff_t incr2; @@ -1014,12 +1016,12 @@ cbq_dequeue(struct Qdisc *sch) cbq_time = max(real_time, work); */ incr2 = L2T(&q->link, q->tx_len); - PSCHED_TADD(q->now, incr2); + q->now += incr2; cbq_update(q); if ((incr -= incr2) < 0) incr = 0; } - PSCHED_TADD(q->now, incr); + q->now += incr; q->now_rt = now; for (;;) { @@ -1051,11 +1053,11 @@ cbq_dequeue(struct Qdisc *sch) */ if (q->toplevel == TC_CBQ_MAXLEVEL && - PSCHED_IS_PASTPERFECT(q->link.undertime)) + q->link.undertime == PSCHED_PASTPERFECT) break; q->toplevel = TC_CBQ_MAXLEVEL; - PSCHED_SET_PASTPERFECT(q->link.undertime); + q->link.undertime = PSCHED_PASTPERFECT; } /* No packets in scheduler or nobody wants to give them to us :-( @@ -1063,13 +1065,9 @@ cbq_dequeue(struct Qdisc *sch) if (sch->q.qlen) { sch->qstats.overlimits++; - if (q->wd_expires) { - long delay = PSCHED_US2JIFFIE(q->wd_expires); - if (delay <= 0) - delay = 1; - mod_timer(&q->wd_timer, jiffies + delay); - sch->flags |= TCQ_F_THROTTLED; - } + if (q->wd_expires) + qdisc_watchdog_schedule(&q->watchdog, + now + q->wd_expires); } return NULL; } @@ -1276,10 +1274,10 @@ cbq_reset(struct Qdisc* sch) q->pmask = 0; q->tx_class = NULL; q->tx_borrowed = NULL; - del_timer(&q->wd_timer); - del_timer(&q->delay_timer); + qdisc_watchdog_cancel(&q->watchdog); + hrtimer_cancel(&q->delay_timer); q->toplevel = TC_CBQ_MAXLEVEL; - PSCHED_GET_TIME(q->now); + q->now = psched_get_time(); q->now_rt = q->now; for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++) @@ -1290,7 +1288,7 @@ cbq_reset(struct Qdisc* sch) qdisc_reset(cl->q); cl->next_alive = NULL; - PSCHED_SET_PASTPERFECT(cl->undertime); + cl->undertime = PSCHED_PASTPERFECT; cl->avgidle = cl->maxidle; cl->deficit = cl->quantum; cl->cpriority = cl->priority; @@ -1379,7 +1377,7 @@ static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl) default: return -EINVAL; } - cl->penalty = (ovl->penalty*HZ)/1000; + cl->penalty = ovl->penalty; return 0; } @@ -1446,14 +1444,11 @@ static int cbq_init(struct Qdisc *sch, struct rtattr *opt) q->link.minidle = -0x7FFFFFFF; q->link.stats_lock = &sch->dev->queue_lock; - init_timer(&q->wd_timer); - q->wd_timer.data = (unsigned long)sch; - q->wd_timer.function = cbq_watchdog; - init_timer(&q->delay_timer); - q->delay_timer.data = (unsigned long)sch; + qdisc_watchdog_init(&q->watchdog, sch); + hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); q->delay_timer.function = cbq_undelay; q->toplevel = TC_CBQ_MAXLEVEL; - PSCHED_GET_TIME(q->now); + q->now = psched_get_time(); q->now_rt = q->now; cbq_link_class(&q->link); @@ -1467,19 +1462,19 @@ static int cbq_init(struct Qdisc *sch, struct rtattr *opt) static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl) { - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); RTA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate); return skb->len; rtattr_failure: - skb_trim(skb, b - skb->data); + nlmsg_trim(skb, b); return -1; } static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl) { - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); struct tc_cbq_lssopt opt; opt.flags = 0; @@ -1498,13 +1493,13 @@ static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl) return skb->len; rtattr_failure: - skb_trim(skb, b - skb->data); + nlmsg_trim(skb, b); return -1; } static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl) { - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); struct tc_cbq_wrropt opt; opt.flags = 0; @@ -1516,30 +1511,30 @@ static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl) return skb->len; rtattr_failure: - skb_trim(skb, b - skb->data); + nlmsg_trim(skb, b); return -1; } static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl) { - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); struct tc_cbq_ovl opt; opt.strategy = cl->ovl_strategy; opt.priority2 = cl->priority2+1; opt.pad = 0; - opt.penalty = (cl->penalty*1000)/HZ; + opt.penalty = cl->penalty; RTA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt); return skb->len; rtattr_failure: - skb_trim(skb, b - skb->data); + nlmsg_trim(skb, b); return -1; } static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl) { - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); struct tc_cbq_fopt opt; if (cl->split || cl->defmap) { @@ -1551,14 +1546,14 @@ static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl) return skb->len; rtattr_failure: - skb_trim(skb, b - skb->data); + nlmsg_trim(skb, b); return -1; } #ifdef CONFIG_NET_CLS_POLICE static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl) { - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); struct tc_cbq_police opt; if (cl->police) { @@ -1570,7 +1565,7 @@ static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl) return skb->len; rtattr_failure: - skb_trim(skb, b - skb->data); + nlmsg_trim(skb, b); return -1; } #endif @@ -1592,18 +1587,18 @@ static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl) static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb) { struct cbq_sched_data *q = qdisc_priv(sch); - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); struct rtattr *rta; rta = (struct rtattr*)b; RTA_PUT(skb, TCA_OPTIONS, 0, NULL); if (cbq_dump_attr(skb, &q->link) < 0) goto rtattr_failure; - rta->rta_len = skb->tail - b; + rta->rta_len = skb_tail_pointer(skb) - b; return skb->len; rtattr_failure: - skb_trim(skb, b - skb->data); + nlmsg_trim(skb, b); return -1; } @@ -1621,7 +1616,7 @@ cbq_dump_class(struct Qdisc *sch, unsigned long arg, struct sk_buff *skb, struct tcmsg *tcm) { struct cbq_class *cl = (struct cbq_class*)arg; - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); struct rtattr *rta; if (cl->tparent) @@ -1635,11 +1630,11 @@ cbq_dump_class(struct Qdisc *sch, unsigned long arg, RTA_PUT(skb, TCA_OPTIONS, 0, NULL); if (cbq_dump_attr(skb, cl) < 0) goto rtattr_failure; - rta->rta_len = skb->tail - b; + rta->rta_len = skb_tail_pointer(skb) - b; return skb->len; rtattr_failure: - skb_trim(skb, b - skb->data); + nlmsg_trim(skb, b); return -1; } @@ -1654,8 +1649,8 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg, cl->xstats.avgidle = cl->avgidle; cl->xstats.undertime = 0; - if (!PSCHED_IS_PASTPERFECT(cl->undertime)) - cl->xstats.undertime = PSCHED_TDIFF(cl->undertime, q->now); + if (cl->undertime != PSCHED_PASTPERFECT) + cl->xstats.undertime = cl->undertime - q->now; if (gnet_stats_copy_basic(d, &cl->bstats) < 0 || #ifdef CONFIG_NET_ESTIMATOR @@ -1722,23 +1717,13 @@ static unsigned long cbq_get(struct Qdisc *sch, u32 classid) return 0; } -static void cbq_destroy_filters(struct cbq_class *cl) -{ - struct tcf_proto *tp; - - while ((tp = cl->filter_list) != NULL) { - cl->filter_list = tp->next; - tcf_destroy(tp); - } -} - static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl) { struct cbq_sched_data *q = qdisc_priv(sch); BUG_TRAP(!cl->filters); - cbq_destroy_filters(cl); + tcf_destroy_chain(cl->filter_list); qdisc_destroy(cl->q); qdisc_put_rtab(cl->R_tab); #ifdef CONFIG_NET_ESTIMATOR @@ -1765,7 +1750,7 @@ cbq_destroy(struct Qdisc* sch) */ for (h = 0; h < 16; h++) for (cl = q->classes[h]; cl; cl = cl->next) - cbq_destroy_filters(cl); + tcf_destroy_chain(cl->filter_list); for (h = 0; h < 16; h++) { struct cbq_class *next; diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 96324cf4e6a9..3c6fd181263f 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -216,17 +216,17 @@ static int dsmark_enqueue(struct sk_buff *skb,struct Qdisc *sch) /* FIXME: Safe with non-linear skbs? --RR */ switch (skb->protocol) { case __constant_htons(ETH_P_IP): - skb->tc_index = ipv4_get_dsfield(skb->nh.iph) + skb->tc_index = ipv4_get_dsfield(ip_hdr(skb)) & ~INET_ECN_MASK; break; case __constant_htons(ETH_P_IPV6): - skb->tc_index = ipv6_get_dsfield(skb->nh.ipv6h) + skb->tc_index = ipv6_get_dsfield(ipv6_hdr(skb)) & ~INET_ECN_MASK; break; default: skb->tc_index = 0; break; - }; + } } if (TC_H_MAJ(skb->priority) == sch->handle) @@ -257,7 +257,7 @@ static int dsmark_enqueue(struct sk_buff *skb,struct Qdisc *sch) if (p->default_index != NO_DEFAULT_INDEX) skb->tc_index = p->default_index; break; - }; + } } err = p->q->enqueue(skb,p->q); @@ -292,11 +292,11 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch) switch (skb->protocol) { case __constant_htons(ETH_P_IP): - ipv4_change_dsfield(skb->nh.iph, p->mask[index], + ipv4_change_dsfield(ip_hdr(skb), p->mask[index], p->value[index]); break; case __constant_htons(ETH_P_IPV6): - ipv6_change_dsfield(skb->nh.ipv6h, p->mask[index], + ipv6_change_dsfield(ipv6_hdr(skb), p->mask[index], p->value[index]); break; default: @@ -310,7 +310,7 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch) "unsupported protocol %d\n", ntohs(skb->protocol)); break; - }; + } return skb; } @@ -412,16 +412,10 @@ static void dsmark_reset(struct Qdisc *sch) static void dsmark_destroy(struct Qdisc *sch) { struct dsmark_qdisc_data *p = PRIV(sch); - struct tcf_proto *tp; DPRINTK("dsmark_destroy(sch %p,[qdisc %p])\n", sch, p); - while (p->filter_list) { - tp = p->filter_list; - p->filter_list = tp->next; - tcf_destroy(tp); - } - + tcf_destroy_chain(p->filter_list); qdisc_destroy(p->q); kfree(p->mask); } diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 52eb3439d7c6..3385ee592541 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -36,34 +36,27 @@ /* Main transmission queue. */ -/* Main qdisc structure lock. - - However, modifications - to data, participating in scheduling must be additionally - protected with dev->queue_lock spinlock. - - The idea is the following: - - enqueue, dequeue are serialized via top level device - spinlock dev->queue_lock. - - tree walking is protected by read_lock(qdisc_tree_lock) - and this lock is used only in process context. - - updates to tree are made only under rtnl semaphore, - hence this lock may be made without local bh disabling. - - qdisc_tree_lock must be grabbed BEFORE dev->queue_lock! +/* Modifications to data participating in scheduling must be protected with + * dev->queue_lock spinlock. + * + * The idea is the following: + * - enqueue, dequeue are serialized via top level device + * spinlock dev->queue_lock. + * - ingress filtering is serialized via top level device + * spinlock dev->ingress_lock. + * - updates to tree and tree walking are only done under the rtnl mutex. */ -DEFINE_RWLOCK(qdisc_tree_lock); void qdisc_lock_tree(struct net_device *dev) { - write_lock(&qdisc_tree_lock); spin_lock_bh(&dev->queue_lock); + spin_lock(&dev->ingress_lock); } void qdisc_unlock_tree(struct net_device *dev) { + spin_unlock(&dev->ingress_lock); spin_unlock_bh(&dev->queue_lock); - write_unlock(&qdisc_tree_lock); } /* @@ -442,7 +435,6 @@ struct Qdisc *qdisc_alloc(struct net_device *dev, struct Qdisc_ops *ops) sch->dequeue = ops->dequeue; sch->dev = dev; dev_hold(dev); - sch->stats_lock = &dev->queue_lock; atomic_set(&sch->refcnt, 1); return sch; @@ -458,6 +450,7 @@ struct Qdisc * qdisc_create_dflt(struct net_device *dev, struct Qdisc_ops *ops, sch = qdisc_alloc(dev, ops); if (IS_ERR(sch)) goto errout; + sch->stats_lock = &dev->queue_lock; sch->parent = parentid; if (!ops->init || ops->init(sch, NULL) == 0) @@ -528,15 +521,11 @@ void dev_activate(struct net_device *dev) printk(KERN_INFO "%s: activation failed\n", dev->name); return; } - write_lock(&qdisc_tree_lock); list_add_tail(&qdisc->list, &dev->qdisc_list); - write_unlock(&qdisc_tree_lock); } else { qdisc = &noqueue_qdisc; } - write_lock(&qdisc_tree_lock); dev->qdisc_sleeping = qdisc; - write_unlock(&qdisc_tree_lock); } if (!netif_carrier_ok(dev)) diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 396deb71480f..9d124c4ee3a7 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -59,13 +59,13 @@ #include <linux/skbuff.h> #include <linux/string.h> #include <linux/slab.h> -#include <linux/timer.h> #include <linux/list.h> #include <linux/rbtree.h> #include <linux/init.h> #include <linux/netdevice.h> #include <linux/rtnetlink.h> #include <linux/pkt_sched.h> +#include <net/netlink.h> #include <net/pkt_sched.h> #include <net/pkt_cls.h> #include <asm/system.h> @@ -192,23 +192,9 @@ struct hfsc_sched struct list_head droplist; /* active leaf class list (for dropping) */ struct sk_buff_head requeue; /* requeued packet */ - struct timer_list wd_timer; /* watchdog timer */ + struct qdisc_watchdog watchdog; /* watchdog timer */ }; -/* - * macros - */ -#ifdef CONFIG_NET_SCH_CLK_GETTIMEOFDAY -#include <linux/time.h> -#undef PSCHED_GET_TIME -#define PSCHED_GET_TIME(stamp) \ -do { \ - struct timeval tv; \ - do_gettimeofday(&tv); \ - (stamp) = 1ULL * USEC_PER_SEC * tv.tv_sec + tv.tv_usec; \ -} while (0) -#endif - #define HT_INFINITY 0xffffffffffffffffULL /* infinite time value */ @@ -394,28 +380,17 @@ cftree_update(struct hfsc_class *cl) * ism: (psched_us/byte) << ISM_SHIFT * dx: psched_us * - * Clock source resolution (CONFIG_NET_SCH_CLK_*) - * JIFFIES: for 48<=HZ<=1534 resolution is between 0.63us and 1.27us. - * CPU: resolution is between 0.5us and 1us. - * GETTIMEOFDAY: resolution is exactly 1us. + * The clock source resolution with ktime is 1.024us. * * sm and ism are scaled in order to keep effective digits. * SM_SHIFT and ISM_SHIFT are selected to keep at least 4 effective * digits in decimal using the following table. * - * Note: We can afford the additional accuracy (altq hfsc keeps at most - * 3 effective digits) thanks to the fact that linux clock is bounded - * much more tightly. - * * bits/sec 100Kbps 1Mbps 10Mbps 100Mbps 1Gbps * ------------+------------------------------------------------------- - * bytes/0.5us 6.25e-3 62.5e-3 625e-3 6250e-e 62500e-3 - * bytes/us 12.5e-3 125e-3 1250e-3 12500e-3 125000e-3 - * bytes/1.27us 15.875e-3 158.75e-3 1587.5e-3 15875e-3 158750e-3 + * bytes/1.024us 12.8e-3 128e-3 1280e-3 12800e-3 128000e-3 * - * 0.5us/byte 160 16 1.6 0.16 0.016 - * us/byte 80 8 0.8 0.08 0.008 - * 1.27us/byte 63 6.3 0.63 0.063 0.0063 + * 1.024us/byte 78.125 7.8125 0.78125 0.078125 0.0078125 */ #define SM_SHIFT 20 #define ISM_SHIFT 18 @@ -460,8 +435,8 @@ m2sm(u32 m) u64 sm; sm = ((u64)m << SM_SHIFT); - sm += PSCHED_JIFFIE2US(HZ) - 1; - do_div(sm, PSCHED_JIFFIE2US(HZ)); + sm += PSCHED_TICKS_PER_SEC - 1; + do_div(sm, PSCHED_TICKS_PER_SEC); return sm; } @@ -474,7 +449,7 @@ m2ism(u32 m) if (m == 0) ism = HT_INFINITY; else { - ism = ((u64)PSCHED_JIFFIE2US(HZ) << ISM_SHIFT); + ism = ((u64)PSCHED_TICKS_PER_SEC << ISM_SHIFT); ism += m - 1; do_div(ism, m); } @@ -487,7 +462,7 @@ d2dx(u32 d) { u64 dx; - dx = ((u64)d * PSCHED_JIFFIE2US(HZ)); + dx = ((u64)d * PSCHED_TICKS_PER_SEC); dx += USEC_PER_SEC - 1; do_div(dx, USEC_PER_SEC); return dx; @@ -499,7 +474,7 @@ sm2m(u64 sm) { u64 m; - m = (sm * PSCHED_JIFFIE2US(HZ)) >> SM_SHIFT; + m = (sm * PSCHED_TICKS_PER_SEC) >> SM_SHIFT; return (u32)m; } @@ -510,7 +485,7 @@ dx2d(u64 dx) u64 d; d = dx * USEC_PER_SEC; - do_div(d, PSCHED_JIFFIE2US(HZ)); + do_div(d, PSCHED_TICKS_PER_SEC); return (u32)d; } @@ -654,9 +629,7 @@ rtsc_min(struct runtime_sc *rtsc, struct internal_sc *isc, u64 x, u64 y) static void init_ed(struct hfsc_class *cl, unsigned int next_len) { - u64 cur_time; - - PSCHED_GET_TIME(cur_time); + u64 cur_time = psched_get_time(); /* update the deadline curve */ rtsc_min(&cl->cl_deadline, &cl->cl_rsc, cur_time, cl->cl_cumul); @@ -779,7 +752,7 @@ init_vf(struct hfsc_class *cl, unsigned int len) if (cl->cl_flags & HFSC_USC) { /* class has upper limit curve */ if (cur_time == 0) - PSCHED_GET_TIME(cur_time); + cur_time = psched_get_time(); /* update the ulimit curve */ rtsc_min(&cl->cl_ulimit, &cl->cl_usc, cur_time, @@ -1063,7 +1036,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (cl->cl_parent == NULL && parentid != TC_H_ROOT) return -EINVAL; } - PSCHED_GET_TIME(cur_time); + cur_time = psched_get_time(); sch_tree_lock(sch); if (rsc != NULL) @@ -1149,22 +1122,11 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, } static void -hfsc_destroy_filters(struct tcf_proto **fl) -{ - struct tcf_proto *tp; - - while ((tp = *fl) != NULL) { - *fl = tp->next; - tcf_destroy(tp); - } -} - -static void hfsc_destroy_class(struct Qdisc *sch, struct hfsc_class *cl) { struct hfsc_sched *q = qdisc_priv(sch); - hfsc_destroy_filters(&cl->filter_list); + tcf_destroy_chain(cl->filter_list); qdisc_destroy(cl->qdisc); #ifdef CONFIG_NET_ESTIMATOR gen_kill_estimator(&cl->bstats, &cl->rate_est); @@ -1184,10 +1146,12 @@ hfsc_delete_class(struct Qdisc *sch, unsigned long arg) sch_tree_lock(sch); - list_del(&cl->hlist); list_del(&cl->siblings); hfsc_adjust_levels(cl->cl_parent); + hfsc_purge_queue(sch, cl); + list_del(&cl->hlist); + if (--cl->refcnt == 0) hfsc_destroy_class(sch, cl); @@ -1387,7 +1351,7 @@ hfsc_dump_class(struct Qdisc *sch, unsigned long arg, struct sk_buff *skb, struct tcmsg *tcm) { struct hfsc_class *cl = (struct hfsc_class *)arg; - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); struct rtattr *rta = (struct rtattr *)b; tcm->tcm_parent = cl->cl_parent ? cl->cl_parent->classid : TC_H_ROOT; @@ -1398,11 +1362,11 @@ hfsc_dump_class(struct Qdisc *sch, unsigned long arg, struct sk_buff *skb, RTA_PUT(skb, TCA_OPTIONS, 0, NULL); if (hfsc_dump_curves(skb, cl) < 0) goto rtattr_failure; - rta->rta_len = skb->tail - b; + rta->rta_len = skb_tail_pointer(skb) - b; return skb->len; rtattr_failure: - skb_trim(skb, b - skb->data); + nlmsg_trim(skb, b); return -1; } @@ -1457,21 +1421,11 @@ hfsc_walk(struct Qdisc *sch, struct qdisc_walker *arg) } static void -hfsc_watchdog(unsigned long arg) -{ - struct Qdisc *sch = (struct Qdisc *)arg; - - sch->flags &= ~TCQ_F_THROTTLED; - netif_schedule(sch->dev); -} - -static void -hfsc_schedule_watchdog(struct Qdisc *sch, u64 cur_time) +hfsc_schedule_watchdog(struct Qdisc *sch) { struct hfsc_sched *q = qdisc_priv(sch); struct hfsc_class *cl; u64 next_time = 0; - long delay; if ((cl = eltree_get_minel(q)) != NULL) next_time = cl->cl_e; @@ -1480,11 +1434,7 @@ hfsc_schedule_watchdog(struct Qdisc *sch, u64 cur_time) next_time = q->root.cl_cfmin; } WARN_ON(next_time == 0); - delay = next_time - cur_time; - delay = PSCHED_US2JIFFIE(delay); - - sch->flags |= TCQ_F_THROTTLED; - mod_timer(&q->wd_timer, jiffies + delay); + qdisc_watchdog_schedule(&q->watchdog, next_time); } static int @@ -1521,9 +1471,7 @@ hfsc_init_qdisc(struct Qdisc *sch, struct rtattr *opt) list_add(&q->root.hlist, &q->clhash[hfsc_hash(q->root.classid)]); - init_timer(&q->wd_timer); - q->wd_timer.function = hfsc_watchdog; - q->wd_timer.data = (unsigned long)sch; + qdisc_watchdog_init(&q->watchdog, sch); return 0; } @@ -1593,8 +1541,7 @@ hfsc_reset_qdisc(struct Qdisc *sch) __skb_queue_purge(&q->requeue); q->eligible = RB_ROOT; INIT_LIST_HEAD(&q->droplist); - del_timer(&q->wd_timer); - sch->flags &= ~TCQ_F_THROTTLED; + qdisc_watchdog_cancel(&q->watchdog); sch->q.qlen = 0; } @@ -1610,14 +1557,14 @@ hfsc_destroy_qdisc(struct Qdisc *sch) hfsc_destroy_class(sch, cl); } __skb_queue_purge(&q->requeue); - del_timer(&q->wd_timer); + qdisc_watchdog_cancel(&q->watchdog); } static int hfsc_dump_qdisc(struct Qdisc *sch, struct sk_buff *skb) { struct hfsc_sched *q = qdisc_priv(sch); - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); struct tc_hfsc_qopt qopt; qopt.defcls = q->defcls; @@ -1625,7 +1572,7 @@ hfsc_dump_qdisc(struct Qdisc *sch, struct sk_buff *skb) return skb->len; rtattr_failure: - skb_trim(skb, b - skb->data); + nlmsg_trim(skb, b); return -1; } @@ -1679,7 +1626,7 @@ hfsc_dequeue(struct Qdisc *sch) if ((skb = __skb_dequeue(&q->requeue))) goto out; - PSCHED_GET_TIME(cur_time); + cur_time = psched_get_time(); /* * if there are eligible classes, use real-time criteria. @@ -1696,7 +1643,7 @@ hfsc_dequeue(struct Qdisc *sch) cl = vttree_get_minvt(&q->root, cur_time); if (cl == NULL) { sch->qstats.overlimits++; - hfsc_schedule_watchdog(sch, cur_time); + hfsc_schedule_watchdog(sch); return NULL; } } diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 97cbb9aec946..99bcec8dd04c 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -50,6 +50,7 @@ #include <linux/skbuff.h> #include <linux/list.h> #include <linux/compiler.h> +#include <net/netlink.h> #include <net/sock.h> #include <net/pkt_sched.h> #include <linux/rbtree.h> @@ -128,7 +129,7 @@ struct htb_class { } un; struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */ struct rb_node pq_node; /* node for event queue */ - unsigned long pq_key; /* the same type as jiffies global */ + psched_time_t pq_key; int prio_activity; /* for which prios are we active */ enum htb_cmode cmode; /* current mode of the class */ @@ -179,10 +180,7 @@ struct htb_sched { struct rb_root wait_pq[TC_HTB_MAXDEPTH]; /* time of nearest event per level (row) */ - unsigned long near_ev_cache[TC_HTB_MAXDEPTH]; - - /* cached value of jiffies in dequeue */ - unsigned long jiffies; + psched_time_t near_ev_cache[TC_HTB_MAXDEPTH]; /* whether we hit non-work conserving class during this dequeue; we use */ int nwc_hit; /* this to disable mindelay complaint in dequeue */ @@ -195,7 +193,7 @@ struct htb_sched { int rate2quantum; /* quant = rate / rate2quantum */ psched_time_t now; /* cached dequeue time */ - struct timer_list timer; /* send delay timer */ + struct qdisc_watchdog watchdog; #ifdef HTB_RATECM struct timer_list rttim; /* rate computer timer */ int recmp_bucket; /* which hash bucket to recompute next */ @@ -342,19 +340,19 @@ static void htb_add_to_wait_tree(struct htb_sched *q, { struct rb_node **p = &q->wait_pq[cl->level].rb_node, *parent = NULL; - cl->pq_key = q->jiffies + PSCHED_US2JIFFIE(delay); - if (cl->pq_key == q->jiffies) + cl->pq_key = q->now + delay; + if (cl->pq_key == q->now) cl->pq_key++; /* update the nearest event cache */ - if (time_after(q->near_ev_cache[cl->level], cl->pq_key)) + if (q->near_ev_cache[cl->level] > cl->pq_key) q->near_ev_cache[cl->level] = cl->pq_key; while (*p) { struct htb_class *c; parent = *p; c = rb_entry(parent, struct htb_class, pq_node); - if (time_after_eq(cl->pq_key, c->pq_key)) + if (cl->pq_key >= c->pq_key) p = &parent->rb_right; else p = &parent->rb_left; @@ -679,14 +677,6 @@ static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch) return NET_XMIT_SUCCESS; } -static void htb_timer(unsigned long arg) -{ - struct Qdisc *sch = (struct Qdisc *)arg; - sch->flags &= ~TCQ_F_THROTTLED; - wmb(); - netif_schedule(sch->dev); -} - #ifdef HTB_RATECM #define RT_GEN(D,R) R+=D-(R/HTB_EWMAC);D=0 static void htb_rate_timer(unsigned long arg) @@ -739,7 +729,7 @@ static void htb_charge_class(struct htb_sched *q, struct htb_class *cl, cl->T = toks while (cl) { - diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32) cl->mbuffer); + diff = psched_tdiff_bounded(q->now, cl->t_c, cl->mbuffer); if (cl->level >= level) { if (cl->level == level) cl->xstats.lends++; @@ -778,11 +768,11 @@ static void htb_charge_class(struct htb_sched *q, struct htb_class *cl, /** * htb_do_events - make mode changes to classes at the level * - * Scans event queue for pending events and applies them. Returns jiffies to + * Scans event queue for pending events and applies them. Returns time of * next pending event (0 for no event in pq). - * Note: Aplied are events whose have cl->pq_key <= jiffies. + * Note: Applied are events whose have cl->pq_key <= q->now. */ -static long htb_do_events(struct htb_sched *q, int level) +static psched_time_t htb_do_events(struct htb_sched *q, int level) { int i; @@ -795,18 +785,18 @@ static long htb_do_events(struct htb_sched *q, int level) return 0; cl = rb_entry(p, struct htb_class, pq_node); - if (time_after(cl->pq_key, q->jiffies)) { - return cl->pq_key - q->jiffies; - } + if (cl->pq_key > q->now) + return cl->pq_key; + htb_safe_rb_erase(p, q->wait_pq + level); - diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32) cl->mbuffer); + diff = psched_tdiff_bounded(q->now, cl->t_c, cl->mbuffer); htb_change_class_mode(q, cl, &diff); if (cl->cmode != HTB_CAN_SEND) htb_add_to_wait_tree(q, cl, diff); } if (net_ratelimit()) printk(KERN_WARNING "htb: too many events !\n"); - return HZ / 10; + return q->now + PSCHED_TICKS_PER_SEC / 10; } /* Returns class->node+prio from id-tree where classe's id is >= id. NULL @@ -958,30 +948,12 @@ next: return skb; } -static void htb_delay_by(struct Qdisc *sch, long delay) -{ - struct htb_sched *q = qdisc_priv(sch); - if (delay <= 0) - delay = 1; - if (unlikely(delay > 5 * HZ)) { - if (net_ratelimit()) - printk(KERN_INFO "HTB delay %ld > 5sec\n", delay); - delay = 5 * HZ; - } - /* why don't use jiffies here ? because expires can be in past */ - mod_timer(&q->timer, q->jiffies + delay); - sch->flags |= TCQ_F_THROTTLED; - sch->qstats.overlimits++; -} - static struct sk_buff *htb_dequeue(struct Qdisc *sch) { struct sk_buff *skb = NULL; struct htb_sched *q = qdisc_priv(sch); int level; - long min_delay; - - q->jiffies = jiffies; + psched_time_t next_event; /* try to dequeue direct packets as high prio (!) to minimize cpu work */ skb = __skb_dequeue(&q->direct_queue); @@ -993,23 +965,25 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch) if (!sch->q.qlen) goto fin; - PSCHED_GET_TIME(q->now); + q->now = psched_get_time(); - min_delay = LONG_MAX; + next_event = q->now + 5 * PSCHED_TICKS_PER_SEC; q->nwc_hit = 0; for (level = 0; level < TC_HTB_MAXDEPTH; level++) { /* common case optimization - skip event handler quickly */ int m; - long delay; - if (time_after_eq(q->jiffies, q->near_ev_cache[level])) { - delay = htb_do_events(q, level); - q->near_ev_cache[level] = - q->jiffies + (delay ? delay : HZ); + psched_time_t event; + + if (q->now >= q->near_ev_cache[level]) { + event = htb_do_events(q, level); + q->near_ev_cache[level] = event ? event : + PSCHED_TICKS_PER_SEC; } else - delay = q->near_ev_cache[level] - q->jiffies; + event = q->near_ev_cache[level]; + + if (event && next_event > event) + next_event = event; - if (delay && min_delay > delay) - min_delay = delay; m = ~q->row_mask[level]; while (m != (int)(-1)) { int prio = ffz(m); @@ -1022,7 +996,8 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch) } } } - htb_delay_by(sch, min_delay > 5 * HZ ? 5 * HZ : min_delay); + sch->qstats.overlimits++; + qdisc_watchdog_schedule(&q->watchdog, next_event); fin: return skb; } @@ -1075,8 +1050,7 @@ static void htb_reset(struct Qdisc *sch) } } - sch->flags &= ~TCQ_F_THROTTLED; - del_timer(&q->timer); + qdisc_watchdog_cancel(&q->watchdog); __skb_queue_purge(&q->direct_queue); sch->q.qlen = 0; memset(q->row, 0, sizeof(q->row)); @@ -1113,14 +1087,12 @@ static int htb_init(struct Qdisc *sch, struct rtattr *opt) for (i = 0; i < TC_HTB_NUMPRIO; i++) INIT_LIST_HEAD(q->drops + i); - init_timer(&q->timer); + qdisc_watchdog_init(&q->watchdog, sch); skb_queue_head_init(&q->direct_queue); q->direct_qlen = sch->dev->tx_queue_len; if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */ q->direct_qlen = 2; - q->timer.function = htb_timer; - q->timer.data = (unsigned long)sch; #ifdef HTB_RATECM init_timer(&q->rttim); @@ -1139,7 +1111,7 @@ static int htb_init(struct Qdisc *sch, struct rtattr *opt) static int htb_dump(struct Qdisc *sch, struct sk_buff *skb) { struct htb_sched *q = qdisc_priv(sch); - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); struct rtattr *rta; struct tc_htb_glob gopt; spin_lock_bh(&sch->dev->queue_lock); @@ -1152,12 +1124,12 @@ static int htb_dump(struct Qdisc *sch, struct sk_buff *skb) rta = (struct rtattr *)b; RTA_PUT(skb, TCA_OPTIONS, 0, NULL); RTA_PUT(skb, TCA_HTB_INIT, sizeof(gopt), &gopt); - rta->rta_len = skb->tail - b; + rta->rta_len = skb_tail_pointer(skb) - b; spin_unlock_bh(&sch->dev->queue_lock); return skb->len; rtattr_failure: spin_unlock_bh(&sch->dev->queue_lock); - skb_trim(skb, skb->tail - skb->data); + nlmsg_trim(skb, skb_tail_pointer(skb)); return -1; } @@ -1165,7 +1137,7 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg, struct sk_buff *skb, struct tcmsg *tcm) { struct htb_class *cl = (struct htb_class *)arg; - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); struct rtattr *rta; struct tc_htb_opt opt; @@ -1188,12 +1160,12 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg, opt.prio = cl->un.leaf.prio; opt.level = cl->level; RTA_PUT(skb, TCA_HTB_PARMS, sizeof(opt), &opt); - rta->rta_len = skb->tail - b; + rta->rta_len = skb_tail_pointer(skb) - b; spin_unlock_bh(&sch->dev->queue_lock); return skb->len; rtattr_failure: spin_unlock_bh(&sch->dev->queue_lock); - skb_trim(skb, b - skb->data); + nlmsg_trim(skb, b); return -1; } @@ -1264,16 +1236,6 @@ static unsigned long htb_get(struct Qdisc *sch, u32 classid) return (unsigned long)cl; } -static void htb_destroy_filters(struct tcf_proto **fl) -{ - struct tcf_proto *tp; - - while ((tp = *fl) != NULL) { - *fl = tp->next; - tcf_destroy(tp); - } -} - static inline int htb_parent_last_child(struct htb_class *cl) { if (!cl->parent) @@ -1302,7 +1264,7 @@ static void htb_parent_to_leaf(struct htb_class *cl, struct Qdisc *new_q) parent->un.leaf.prio = parent->prio; parent->tokens = parent->buffer; parent->ctokens = parent->cbuffer; - PSCHED_GET_TIME(parent->t_c); + parent->t_c = psched_get_time(); parent->cmode = HTB_CAN_SEND; } @@ -1317,7 +1279,7 @@ static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl) qdisc_put_rtab(cl->rate); qdisc_put_rtab(cl->ceil); - htb_destroy_filters(&cl->filter_list); + tcf_destroy_chain(cl->filter_list); while (!list_empty(&cl->children)) htb_destroy_class(sch, list_entry(cl->children.next, @@ -1341,7 +1303,7 @@ static void htb_destroy(struct Qdisc *sch) { struct htb_sched *q = qdisc_priv(sch); - del_timer_sync(&q->timer); + qdisc_watchdog_cancel(&q->watchdog); #ifdef HTB_RATECM del_timer_sync(&q->rttim); #endif @@ -1349,7 +1311,7 @@ static void htb_destroy(struct Qdisc *sch) and surprisingly it worked in 2.4. But it must precede it because filter need its target class alive to be able to call unbind_filter on it (without Oops). */ - htb_destroy_filters(&q->filter_list); + tcf_destroy_chain(q->filter_list); while (!list_empty(&q->root)) htb_destroy_class(sch, list_entry(q->root.next, @@ -1380,15 +1342,15 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg) sch_tree_lock(sch); - /* delete from hash and active; remainder in destroy_class */ - hlist_del_init(&cl->hlist); - if (!cl->level) { qlen = cl->un.leaf.q->q.qlen; qdisc_reset(cl->un.leaf.q); qdisc_tree_decrease_qlen(cl->un.leaf.q, qlen); } + /* delete from hash and active; remainder in destroy_class */ + hlist_del_init(&cl->hlist); + if (cl->prio_activity) htb_deactivate(q, cl); @@ -1498,8 +1460,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, /* set class to be in HTB_CAN_SEND state */ cl->tokens = hopt->buffer; cl->ctokens = hopt->cbuffer; - cl->mbuffer = PSCHED_JIFFIE2US(HZ * 60); /* 1min */ - PSCHED_GET_TIME(cl->t_c); + cl->mbuffer = 60 * PSCHED_TICKS_PER_SEC; /* 1min */ + cl->t_c = psched_get_time(); cl->cmode = HTB_CAN_SEND; /* attach to the hash list and parent's family */ diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index cfe070ee6ee3..f8b9f1cdf738 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -16,6 +16,7 @@ #include <linux/netfilter_ipv6.h> #include <linux/netfilter.h> #include <linux/smp.h> +#include <net/netlink.h> #include <net/pkt_sched.h> #include <asm/byteorder.h> #include <asm/uaccess.h> @@ -169,7 +170,7 @@ static int ingress_enqueue(struct sk_buff *skb,struct Qdisc *sch) skb->tc_index = TC_H_MIN(res.classid); result = TC_ACT_OK; break; - }; + } /* backward compat */ #else #ifdef CONFIG_NET_CLS_POLICE @@ -186,7 +187,7 @@ static int ingress_enqueue(struct sk_buff *skb,struct Qdisc *sch) sch->bstats.bytes += skb->len; result = NF_ACCEPT; break; - }; + } #else D2PRINTK("Overriding result to ACCEPT\n"); @@ -247,16 +248,11 @@ ing_hook(unsigned int hook, struct sk_buff **pskb, skb->dev ? (*pskb)->dev->name : "(no dev)", skb->len); -/* -revisit later: Use a private since lock dev->queue_lock is also -used on the egress (might slow things for an iota) -*/ - if (dev->qdisc_ingress) { - spin_lock(&dev->queue_lock); + spin_lock(&dev->ingress_lock); if ((q = dev->qdisc_ingress) != NULL) fwres = q->enqueue(skb, q); - spin_unlock(&dev->queue_lock); + spin_unlock(&dev->ingress_lock); } return fwres; @@ -345,14 +341,9 @@ static void ingress_reset(struct Qdisc *sch) static void ingress_destroy(struct Qdisc *sch) { struct ingress_qdisc_data *p = PRIV(sch); - struct tcf_proto *tp; DPRINTK("ingress_destroy(sch %p,[qdisc %p])\n", sch, p); - while (p->filter_list) { - tp = p->filter_list; - p->filter_list = tp->next; - tcf_destroy(tp); - } + tcf_destroy_chain(p->filter_list); #if 0 /* for future use */ qdisc_destroy(p->q); @@ -362,16 +353,16 @@ static void ingress_destroy(struct Qdisc *sch) static int ingress_dump(struct Qdisc *sch, struct sk_buff *skb) { - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); struct rtattr *rta; rta = (struct rtattr *) b; RTA_PUT(skb, TCA_OPTIONS, 0, NULL); - rta->rta_len = skb->tail - b; + rta->rta_len = skb_tail_pointer(skb) - b; return skb->len; rtattr_failure: - skb_trim(skb, b - skb->data); + nlmsg_trim(skb, b); return -1; } diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 1ccbfb55b0b8..5d9d8bc9cc3a 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -22,6 +22,7 @@ #include <linux/skbuff.h> #include <linux/rtnetlink.h> +#include <net/netlink.h> #include <net/pkt_sched.h> #define VERSION "1.2" @@ -54,21 +55,22 @@ struct netem_sched_data { struct Qdisc *qdisc; - struct timer_list timer; + struct qdisc_watchdog watchdog; + + psched_tdiff_t latency; + psched_tdiff_t jitter; - u32 latency; u32 loss; u32 limit; u32 counter; u32 gap; - u32 jitter; u32 duplicate; u32 reorder; u32 corrupt; struct crndstate { - unsigned long last; - unsigned long rho; + u32 last; + u32 rho; } delay_cor, loss_cor, dup_cor, reorder_cor, corrupt_cor; struct disttable { @@ -95,12 +97,12 @@ static void init_crandom(struct crndstate *state, unsigned long rho) * Next number depends on last value. * rho is scaled to avoid floating point. */ -static unsigned long get_crandom(struct crndstate *state) +static u32 get_crandom(struct crndstate *state) { u64 value, rho; unsigned long answer; - if (state->rho == 0) /* no correllation */ + if (state->rho == 0) /* no correlation */ return net_random(); value = net_random(); @@ -114,11 +116,13 @@ static unsigned long get_crandom(struct crndstate *state) * std deviation sigma. Uses table lookup to approximate the desired * distribution, and a uniformly-distributed pseudo-random source. */ -static long tabledist(unsigned long mu, long sigma, - struct crndstate *state, const struct disttable *dist) +static psched_tdiff_t tabledist(psched_tdiff_t mu, psched_tdiff_t sigma, + struct crndstate *state, + const struct disttable *dist) { - long t, x; - unsigned long rnd; + psched_tdiff_t x; + long t; + u32 rnd; if (sigma == 0) return mu; @@ -213,8 +217,8 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) delay = tabledist(q->latency, q->jitter, &q->delay_cor, q->delay_dist); - PSCHED_GET_TIME(now); - PSCHED_TADD2(now, delay, cb->time_to_send); + now = psched_get_time(); + cb->time_to_send = now + delay; ++q->counter; ret = q->qdisc->enqueue(skb, q->qdisc); } else { @@ -222,7 +226,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) * Do re-ordering by putting one out of N packets at the front * of the queue. */ - PSCHED_GET_TIME(cb->time_to_send); + cb->time_to_send = psched_get_time(); q->counter = 0; ret = q->qdisc->ops->requeue(skb, q->qdisc); } @@ -269,55 +273,43 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) struct netem_sched_data *q = qdisc_priv(sch); struct sk_buff *skb; + smp_mb(); + if (sch->flags & TCQ_F_THROTTLED) + return NULL; + skb = q->qdisc->dequeue(q->qdisc); if (skb) { const struct netem_skb_cb *cb = (const struct netem_skb_cb *)skb->cb; - psched_time_t now; + psched_time_t now = psched_get_time(); /* if more time remaining? */ - PSCHED_GET_TIME(now); - - if (PSCHED_TLESS(cb->time_to_send, now)) { + if (cb->time_to_send <= now) { pr_debug("netem_dequeue: return skb=%p\n", skb); sch->q.qlen--; - sch->flags &= ~TCQ_F_THROTTLED; return skb; - } else { - psched_tdiff_t delay = PSCHED_TDIFF(cb->time_to_send, now); - - if (q->qdisc->ops->requeue(skb, q->qdisc) != NET_XMIT_SUCCESS) { - qdisc_tree_decrease_qlen(q->qdisc, 1); - sch->qstats.drops++; - printk(KERN_ERR "netem: queue discpline %s could not requeue\n", - q->qdisc->ops->id); - } + } - mod_timer(&q->timer, jiffies + PSCHED_US2JIFFIE(delay)); - sch->flags |= TCQ_F_THROTTLED; + if (unlikely(q->qdisc->ops->requeue(skb, q->qdisc) != NET_XMIT_SUCCESS)) { + qdisc_tree_decrease_qlen(q->qdisc, 1); + sch->qstats.drops++; + printk(KERN_ERR "netem: %s could not requeue\n", + q->qdisc->ops->id); } + + qdisc_watchdog_schedule(&q->watchdog, cb->time_to_send); } return NULL; } -static void netem_watchdog(unsigned long arg) -{ - struct Qdisc *sch = (struct Qdisc *)arg; - - pr_debug("netem_watchdog qlen=%d\n", sch->q.qlen); - sch->flags &= ~TCQ_F_THROTTLED; - netif_schedule(sch->dev); -} - static void netem_reset(struct Qdisc *sch) { struct netem_sched_data *q = qdisc_priv(sch); qdisc_reset(q->qdisc); sch->q.qlen = 0; - sch->flags &= ~TCQ_F_THROTTLED; - del_timer_sync(&q->timer); + qdisc_watchdog_cancel(&q->watchdog); } /* Pass size change message down to embedded FIFO */ @@ -438,10 +430,11 @@ static int netem_change(struct Qdisc *sch, struct rtattr *opt) q->loss = qopt->loss; q->duplicate = qopt->duplicate; - /* for compatiablity with earlier versions. - * if gap is set, need to assume 100% probablity + /* for compatibility with earlier versions. + * if gap is set, need to assume 100% probability */ - q->reorder = ~0; + if (q->gap) + q->reorder = ~0; /* Handle nested options after initial queue options. * Should have put all options in nested format but too late now. @@ -487,22 +480,28 @@ static int netem_change(struct Qdisc *sch, struct rtattr *opt) */ struct fifo_sched_data { u32 limit; + psched_time_t oldest; }; static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) { struct fifo_sched_data *q = qdisc_priv(sch); struct sk_buff_head *list = &sch->q; - const struct netem_skb_cb *ncb - = (const struct netem_skb_cb *)nskb->cb; + psched_time_t tnext = ((struct netem_skb_cb *)nskb->cb)->time_to_send; struct sk_buff *skb; if (likely(skb_queue_len(list) < q->limit)) { + /* Optimize for add at tail */ + if (likely(skb_queue_empty(list) || tnext >= q->oldest)) { + q->oldest = tnext; + return qdisc_enqueue_tail(nskb, sch); + } + skb_queue_reverse_walk(list, skb) { const struct netem_skb_cb *cb = (const struct netem_skb_cb *)skb->cb; - if (!PSCHED_TLESS(ncb->time_to_send, cb->time_to_send)) + if (tnext >= cb->time_to_send) break; } @@ -515,7 +514,7 @@ static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) return NET_XMIT_SUCCESS; } - return qdisc_drop(nskb, sch); + return qdisc_reshape_fail(nskb, sch); } static int tfifo_init(struct Qdisc *sch, struct rtattr *opt) @@ -531,6 +530,7 @@ static int tfifo_init(struct Qdisc *sch, struct rtattr *opt) } else q->limit = max_t(u32, sch->dev->tx_queue_len, 1); + q->oldest = PSCHED_PASTPERFECT; return 0; } @@ -567,9 +567,7 @@ static int netem_init(struct Qdisc *sch, struct rtattr *opt) if (!opt) return -EINVAL; - init_timer(&q->timer); - q->timer.function = netem_watchdog; - q->timer.data = (unsigned long) sch; + qdisc_watchdog_init(&q->watchdog, sch); q->qdisc = qdisc_create_dflt(sch->dev, &tfifo_qdisc_ops, TC_H_MAKE(sch->handle, 1)); @@ -590,7 +588,7 @@ static void netem_destroy(struct Qdisc *sch) { struct netem_sched_data *q = qdisc_priv(sch); - del_timer_sync(&q->timer); + qdisc_watchdog_cancel(&q->watchdog); qdisc_destroy(q->qdisc); kfree(q->delay_dist); } @@ -598,7 +596,7 @@ static void netem_destroy(struct Qdisc *sch) static int netem_dump(struct Qdisc *sch, struct sk_buff *skb) { const struct netem_sched_data *q = qdisc_priv(sch); - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); struct rtattr *rta = (struct rtattr *) b; struct tc_netem_qopt qopt; struct tc_netem_corr cor; @@ -626,12 +624,12 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb) corrupt.correlation = q->corrupt_cor.rho; RTA_PUT(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt); - rta->rta_len = skb->tail - b; + rta->rta_len = skb_tail_pointer(skb) - b; return skb->len; rtattr_failure: - skb_trim(skb, b - skb->data); + nlmsg_trim(skb, b); return -1; } diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index de889f23f22a..269a6e17c6c4 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -32,6 +32,7 @@ #include <net/ip.h> #include <net/route.h> #include <linux/skbuff.h> +#include <net/netlink.h> #include <net/sock.h> #include <net/pkt_sched.h> @@ -61,7 +62,7 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) *qerr = NET_XMIT_SUCCESS; case TC_ACT_SHOT: return NULL; - }; + } if (!q->filter_list ) { #else @@ -188,13 +189,8 @@ prio_destroy(struct Qdisc* sch) { int prio; struct prio_sched_data *q = qdisc_priv(sch); - struct tcf_proto *tp; - - while ((tp = q->filter_list) != NULL) { - q->filter_list = tp->next; - tcf_destroy(tp); - } + tcf_destroy_chain(q->filter_list); for (prio=0; prio<q->bands; prio++) qdisc_destroy(q->queues[prio]); } @@ -271,7 +267,7 @@ static int prio_init(struct Qdisc *sch, struct rtattr *opt) static int prio_dump(struct Qdisc *sch, struct sk_buff *skb) { struct prio_sched_data *q = qdisc_priv(sch); - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); struct tc_prio_qopt opt; opt.bands = q->bands; @@ -280,7 +276,7 @@ static int prio_dump(struct Qdisc *sch, struct sk_buff *skb) return skb->len; rtattr_failure: - skb_trim(skb, b - skb->data); + nlmsg_trim(skb, b); return -1; } diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 66f32051a99b..96dfdf78d32c 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -30,6 +30,7 @@ #include <linux/notifier.h> #include <linux/init.h> #include <net/ip.h> +#include <net/netlink.h> #include <linux/ipv6.h> #include <net/route.h> #include <linux/skbuff.h> @@ -137,7 +138,7 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb) switch (skb->protocol) { case __constant_htons(ETH_P_IP): { - struct iphdr *iph = skb->nh.iph; + const struct iphdr *iph = ip_hdr(skb); h = iph->daddr; h2 = iph->saddr^iph->protocol; if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) && @@ -152,7 +153,7 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb) } case __constant_htons(ETH_P_IPV6): { - struct ipv6hdr *iph = skb->nh.ipv6h; + struct ipv6hdr *iph = ipv6_hdr(skb); h = iph->daddr.s6_addr32[3]; h2 = iph->saddr.s6_addr32[3]^iph->nexthdr; if (iph->nexthdr == IPPROTO_TCP || @@ -461,7 +462,7 @@ static void sfq_destroy(struct Qdisc *sch) static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb) { struct sfq_sched_data *q = qdisc_priv(sch); - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); struct tc_sfq_qopt opt; opt.quantum = q->quantum; @@ -476,7 +477,7 @@ static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb) return skb->len; rtattr_failure: - skb_trim(skb, b - skb->data); + nlmsg_trim(skb, b); return -1; } diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 85da8daa61d2..53862953baaf 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -32,6 +32,7 @@ #include <linux/etherdevice.h> #include <linux/notifier.h> #include <net/ip.h> +#include <net/netlink.h> #include <net/route.h> #include <linux/skbuff.h> #include <net/sock.h> @@ -127,8 +128,8 @@ struct tbf_sched_data long tokens; /* Current number of B tokens */ long ptokens; /* Current number of P tokens */ psched_time_t t_c; /* Time check-point */ - struct timer_list wd_timer; /* Watchdog timer */ struct Qdisc *qdisc; /* Inner qdisc, default - bfifo queue */ + struct qdisc_watchdog watchdog; /* Watchdog timer */ }; #define L2T(q,L) ((q)->R_tab->data[(L)>>(q)->R_tab->rate.cell_log]) @@ -185,14 +186,6 @@ static unsigned int tbf_drop(struct Qdisc* sch) return len; } -static void tbf_watchdog(unsigned long arg) -{ - struct Qdisc *sch = (struct Qdisc*)arg; - - sch->flags &= ~TCQ_F_THROTTLED; - netif_schedule(sch->dev); -} - static struct sk_buff *tbf_dequeue(struct Qdisc* sch) { struct tbf_sched_data *q = qdisc_priv(sch); @@ -202,13 +195,12 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch) if (skb) { psched_time_t now; - long toks, delay; + long toks; long ptoks = 0; unsigned int len = skb->len; - PSCHED_GET_TIME(now); - - toks = PSCHED_TDIFF_SAFE(now, q->t_c, q->buffer); + now = psched_get_time(); + toks = psched_tdiff_bounded(now, q->t_c, q->buffer); if (q->P_tab) { ptoks = toks + q->ptokens; @@ -230,12 +222,8 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch) return skb; } - delay = PSCHED_US2JIFFIE(max_t(long, -toks, -ptoks)); - - if (delay == 0) - delay = 1; - - mod_timer(&q->wd_timer, jiffies+delay); + qdisc_watchdog_schedule(&q->watchdog, + now + max_t(long, -toks, -ptoks)); /* Maybe we have a shorter packet in the queue, which can be sent now. It sounds cool, @@ -254,7 +242,6 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch) sch->qstats.drops++; } - sch->flags |= TCQ_F_THROTTLED; sch->qstats.overlimits++; } return NULL; @@ -266,11 +253,10 @@ static void tbf_reset(struct Qdisc* sch) qdisc_reset(q->qdisc); sch->q.qlen = 0; - PSCHED_GET_TIME(q->t_c); + q->t_c = psched_get_time(); q->tokens = q->buffer; q->ptokens = q->mtu; - sch->flags &= ~TCQ_F_THROTTLED; - del_timer(&q->wd_timer); + qdisc_watchdog_cancel(&q->watchdog); } static struct Qdisc *tbf_create_dflt_qdisc(struct Qdisc *sch, u32 limit) @@ -377,11 +363,8 @@ static int tbf_init(struct Qdisc* sch, struct rtattr *opt) if (opt == NULL) return -EINVAL; - PSCHED_GET_TIME(q->t_c); - init_timer(&q->wd_timer); - q->wd_timer.function = tbf_watchdog; - q->wd_timer.data = (unsigned long)sch; - + q->t_c = psched_get_time(); + qdisc_watchdog_init(&q->watchdog, sch); q->qdisc = &noop_qdisc; return tbf_change(sch, opt); @@ -391,7 +374,7 @@ static void tbf_destroy(struct Qdisc *sch) { struct tbf_sched_data *q = qdisc_priv(sch); - del_timer(&q->wd_timer); + qdisc_watchdog_cancel(&q->watchdog); if (q->P_tab) qdisc_put_rtab(q->P_tab); @@ -404,7 +387,7 @@ static void tbf_destroy(struct Qdisc *sch) static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb) { struct tbf_sched_data *q = qdisc_priv(sch); - unsigned char *b = skb->tail; + unsigned char *b = skb_tail_pointer(skb); struct rtattr *rta; struct tc_tbf_qopt opt; @@ -420,12 +403,12 @@ static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb) opt.mtu = q->mtu; opt.buffer = q->buffer; RTA_PUT(skb, TCA_TBF_PARMS, sizeof(opt), &opt); - rta->rta_len = skb->tail - b; + rta->rta_len = skb_tail_pointer(skb) - b; return skb->len; rtattr_failure: - skb_trim(skb, b - skb->data); + nlmsg_trim(skb, b); return -1; } diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 587123c61af9..d24914db7861 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -323,7 +323,7 @@ restart: nores = 1; break; } - __skb_pull(skb, skb->nh.raw - skb->data); + __skb_pull(skb, skb_network_offset(skb)); } while ((q = NEXT_SLAVE(q)) != start); if (nores && skb_res == NULL) { |