diff options
Diffstat (limited to 'net/sched')
32 files changed, 958 insertions, 373 deletions
diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 87956a768d1b..403790cce7d2 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -650,6 +650,18 @@ config NET_ACT_MIRRED To compile this code as a module, choose M here: the module will be called act_mirred. +config NET_ACT_SAMPLE + tristate "Traffic Sampling" + depends on NET_CLS_ACT + select PSAMPLE + ---help--- + Say Y here to allow packet sampling tc action. The packet sample + action consists of statistically choosing packets and sampling + them using the psample module. + + To compile this code as a module, choose M here: the + module will be called act_sample. + config NET_ACT_IPT tristate "IPtables targets" depends on NET_CLS_ACT && NETFILTER && IP_NF_IPTABLES @@ -707,6 +719,7 @@ config NET_ACT_SKBEDIT config NET_ACT_CSUM tristate "Checksum Updating" depends on NET_CLS_ACT && INET + select LIBCRC32C ---help--- Say Y here to update some common checksum after some direct packet alterations. @@ -763,6 +776,7 @@ config NET_ACT_SKBMOD config NET_ACT_IFE tristate "Inter-FE action based on IETF ForCES InterFE LFB" depends on NET_CLS_ACT + select NET_IFE ---help--- Say Y here to allow for sourcing and terminating metadata For details refer to netdev01 paper: diff --git a/net/sched/Makefile b/net/sched/Makefile index 4bdda3634e0b..7b915d226de7 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -10,6 +10,7 @@ obj-$(CONFIG_NET_CLS_ACT) += act_api.o obj-$(CONFIG_NET_ACT_POLICE) += act_police.o obj-$(CONFIG_NET_ACT_GACT) += act_gact.o obj-$(CONFIG_NET_ACT_MIRRED) += act_mirred.o +obj-$(CONFIG_NET_ACT_SAMPLE) += act_sample.o obj-$(CONFIG_NET_ACT_IPT) += act_ipt.o obj-$(CONFIG_NET_ACT_NAT) += act_nat.o obj-$(CONFIG_NET_ACT_PEDIT) += act_pedit.o diff --git a/net/sched/act_api.c b/net/sched/act_api.c index e10456ef6f7a..b70aa57319ea 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -24,6 +24,7 @@ #include <net/net_namespace.h> #include <net/sock.h> #include <net/sch_generic.h> +#include <net/pkt_cls.h> #include <net/act_api.h> #include <net/netlink.h> @@ -33,6 +34,12 @@ static void free_tcf(struct rcu_head *head) free_percpu(p->cpu_bstats); free_percpu(p->cpu_qstats); + + if (p->act_cookie) { + kfree(p->act_cookie->data); + kfree(p->act_cookie); + } + kfree(p); } @@ -426,11 +433,9 @@ int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions, { int ret = -1, i; - if (skb->tc_verd & TC_NCLS) { - skb->tc_verd = CLR_TC_NCLS(skb->tc_verd); - ret = TC_ACT_OK; - goto exec_done; - } + if (skb_skip_tc_classify(skb)) + return TC_ACT_OK; + for (i = 0; i < nr_actions; i++) { const struct tc_action *a = actions[i]; @@ -439,9 +444,8 @@ repeat: if (ret == TC_ACT_REPEAT) goto repeat; /* we need a ttl - JHS */ if (ret != TC_ACT_PIPE) - goto exec_done; + break; } -exec_done: return ret; } EXPORT_SYMBOL(tcf_action_exec); @@ -478,6 +482,12 @@ tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref) goto nla_put_failure; if (tcf_action_copy_stats(skb, a, 0)) goto nla_put_failure; + if (a->act_cookie) { + if (nla_put(skb, TCA_ACT_COOKIE, a->act_cookie->len, + a->act_cookie->data)) + goto nla_put_failure; + } + nest = nla_nest_start(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; @@ -519,6 +529,22 @@ errout: return err; } +static int nla_memdup_cookie(struct tc_action *a, struct nlattr **tb) +{ + a->act_cookie = kzalloc(sizeof(*a->act_cookie), GFP_KERNEL); + if (!a->act_cookie) + return -ENOMEM; + + a->act_cookie->data = nla_memdup(tb[TCA_ACT_COOKIE], GFP_KERNEL); + if (!a->act_cookie->data) { + kfree(a->act_cookie); + return -ENOMEM; + } + a->act_cookie->len = nla_len(tb[TCA_ACT_COOKIE]); + + return 0; +} + struct tc_action *tcf_action_init_1(struct net *net, struct nlattr *nla, struct nlattr *est, char *name, int ovr, int bind) @@ -578,6 +604,22 @@ struct tc_action *tcf_action_init_1(struct net *net, struct nlattr *nla, if (err < 0) goto err_mod; + if (tb[TCA_ACT_COOKIE]) { + int cklen = nla_len(tb[TCA_ACT_COOKIE]); + + if (cklen > TC_COOKIE_MAX_SIZE) { + err = -EINVAL; + tcf_hash_release(a, bind); + goto err_mod; + } + + if (nla_memdup_cookie(a, tb) < 0) { + err = -ENOMEM; + tcf_hash_release(a, bind); + goto err_mod; + } + } + /* module count goes up only when brand new policy is created * if it exists and is only bound to in a_o->init() then * ACT_P_CREATED is not returned (a zero is). @@ -817,10 +859,8 @@ static int tca_action_flush(struct net *net, struct nlattr *nla, goto out_module_put; err = ops->walk(net, skb, &dcb, RTM_DELACTION, ops); - if (err < 0) + if (err <= 0) goto out_module_put; - if (err == 0) - goto noflush_out; nla_nest_end(skb, nest); @@ -837,7 +877,6 @@ static int tca_action_flush(struct net *net, struct nlattr *nla, out_module_put: module_put(ops->owner); err_out: -noflush_out: kfree_skb(skb); return err; } diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index a0edd80a44db..e978ccd4402c 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -30,6 +30,7 @@ #include <net/tcp.h> #include <net/udp.h> #include <net/ip6_checksum.h> +#include <net/sctp/checksum.h> #include <net/act_api.h> @@ -322,6 +323,25 @@ ignore_obscure_skb: return 1; } +static int tcf_csum_sctp(struct sk_buff *skb, unsigned int ihl, + unsigned int ipl) +{ + struct sctphdr *sctph; + + if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_SCTP) + return 1; + + sctph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*sctph)); + if (!sctph) + return 0; + + sctph->checksum = sctp_compute_cksum(skb, + skb_network_offset(skb) + ihl); + skb->ip_summed = CHECKSUM_NONE; + + return 1; +} + static int tcf_csum_ipv4(struct sk_buff *skb, u32 update_flags) { const struct iphdr *iph; @@ -365,6 +385,11 @@ static int tcf_csum_ipv4(struct sk_buff *skb, u32 update_flags) ntohs(iph->tot_len), 1)) goto fail; break; + case IPPROTO_SCTP: + if ((update_flags & TCA_CSUM_UPDATE_FLAG_SCTP) && + !tcf_csum_sctp(skb, iph->ihl * 4, ntohs(iph->tot_len))) + goto fail; + break; } if (update_flags & TCA_CSUM_UPDATE_FLAG_IPV4HDR) { @@ -481,6 +506,11 @@ static int tcf_csum_ipv6(struct sk_buff *skb, u32 update_flags) pl + sizeof(*ip6h), 1)) goto fail; goto done; + case IPPROTO_SCTP: + if ((update_flags & TCA_CSUM_UPDATE_FLAG_SCTP) && + !tcf_csum_sctp(skb, hl, pl + sizeof(*ip6h))) + goto fail; + goto done; default: goto ignore_skb; } diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 80b848d3f096..71e7ff22f7c9 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -32,6 +32,7 @@ #include <uapi/linux/tc_act/tc_ife.h> #include <net/tc_act/tc_ife.h> #include <linux/etherdevice.h> +#include <net/ife.h> #define IFE_TAB_MASK 15 @@ -46,23 +47,6 @@ static const struct nla_policy ife_policy[TCA_IFE_MAX + 1] = { [TCA_IFE_TYPE] = { .type = NLA_U16}, }; -/* Caller takes care of presenting data in network order -*/ -int ife_tlv_meta_encode(void *skbdata, u16 attrtype, u16 dlen, const void *dval) -{ - u32 *tlv = (u32 *)(skbdata); - u16 totlen = nla_total_size(dlen); /*alignment + hdr */ - char *dptr = (char *)tlv + NLA_HDRLEN; - u32 htlv = attrtype << 16 | (dlen + NLA_HDRLEN); - - *tlv = htonl(htlv); - memset(dptr, 0, totlen - NLA_HDRLEN); - memcpy(dptr, dval, dlen); - - return totlen; -} -EXPORT_SYMBOL_GPL(ife_tlv_meta_encode); - int ife_encode_meta_u16(u16 metaval, void *skbdata, struct tcf_meta_info *mi) { u16 edata = 0; @@ -637,69 +621,59 @@ int find_decode_metaid(struct sk_buff *skb, struct tcf_ife_info *ife, return 0; } -struct ifeheadr { - __be16 metalen; - u8 tlv_data[]; -}; - -struct meta_tlvhdr { - __be16 type; - __be16 len; -}; - static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_ife_info *ife = to_ife(a); int action = ife->tcf_action; - struct ifeheadr *ifehdr = (struct ifeheadr *)skb->data; - int ifehdrln = (int)ifehdr->metalen; - struct meta_tlvhdr *tlv = (struct meta_tlvhdr *)(ifehdr->tlv_data); + u8 *ifehdr_end; + u8 *tlv_data; + u16 metalen; spin_lock(&ife->tcf_lock); bstats_update(&ife->tcf_bstats, skb); tcf_lastuse_update(&ife->tcf_tm); spin_unlock(&ife->tcf_lock); - ifehdrln = ntohs(ifehdrln); - if (unlikely(!pskb_may_pull(skb, ifehdrln))) { + if (skb_at_tc_ingress(skb)) + skb_push(skb, skb->dev->hard_header_len); + + tlv_data = ife_decode(skb, &metalen); + if (unlikely(!tlv_data)) { spin_lock(&ife->tcf_lock); ife->tcf_qstats.drops++; spin_unlock(&ife->tcf_lock); return TC_ACT_SHOT; } - skb_set_mac_header(skb, ifehdrln); - __skb_pull(skb, ifehdrln); - skb->protocol = eth_type_trans(skb, skb->dev); - ifehdrln -= IFE_METAHDRLEN; - - while (ifehdrln > 0) { - u8 *tlvdata = (u8 *)tlv; - u16 mtype = tlv->type; - u16 mlen = tlv->len; - u16 alen; + ifehdr_end = tlv_data + metalen; + for (; tlv_data < ifehdr_end; tlv_data = ife_tlv_meta_next(tlv_data)) { + u8 *curr_data; + u16 mtype; + u16 dlen; - mtype = ntohs(mtype); - mlen = ntohs(mlen); - alen = NLA_ALIGN(mlen); + curr_data = ife_tlv_meta_decode(tlv_data, &mtype, &dlen, NULL); - if (find_decode_metaid(skb, ife, mtype, (mlen - NLA_HDRLEN), - (void *)(tlvdata + NLA_HDRLEN))) { + if (find_decode_metaid(skb, ife, mtype, dlen, curr_data)) { /* abuse overlimits to count when we receive metadata * but dont have an ops for it */ - pr_info_ratelimited("Unknown metaid %d alnlen %d\n", - mtype, mlen); + pr_info_ratelimited("Unknown metaid %d dlen %d\n", + mtype, dlen); ife->tcf_qstats.overlimits++; } + } - tlvdata += alen; - ifehdrln -= alen; - tlv = (struct meta_tlvhdr *)tlvdata; + if (WARN_ON(tlv_data != ifehdr_end)) { + spin_lock(&ife->tcf_lock); + ife->tcf_qstats.drops++; + spin_unlock(&ife->tcf_lock); + return TC_ACT_SHOT; } + skb->protocol = eth_type_trans(skb, skb->dev); skb_reset_network_header(skb); + return action; } @@ -727,7 +701,6 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a, struct tcf_ife_info *ife = to_ife(a); int action = ife->tcf_action; struct ethhdr *oethh; /* outer ether header */ - struct ethhdr *iethh; /* inner eth header */ struct tcf_meta_info *e; /* OUTERHDR:TOTMETALEN:{TLVHDR:Metadatum:TLVHDR..}:ORIGDATA @@ -735,13 +708,13 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a, */ u16 metalen = ife_get_sz(skb, ife); int hdrm = metalen + skb->dev->hard_header_len + IFE_METAHDRLEN; - unsigned int skboff = skb->dev->hard_header_len; - u32 at = G_TC_AT(skb->tc_verd); + unsigned int skboff = 0; int new_len = skb->len + hdrm; bool exceed_mtu = false; - int err; + void *ife_meta; + int err = 0; - if (at & AT_EGRESS) { + if (!skb_at_tc_ingress(skb)) { if (new_len > skb->dev->mtu) exceed_mtu = true; } @@ -766,27 +739,10 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a, return TC_ACT_SHOT; } - err = skb_cow_head(skb, hdrm); - if (unlikely(err)) { - ife->tcf_qstats.drops++; - spin_unlock(&ife->tcf_lock); - return TC_ACT_SHOT; - } - - if (!(at & AT_EGRESS)) + if (skb_at_tc_ingress(skb)) skb_push(skb, skb->dev->hard_header_len); - iethh = (struct ethhdr *)skb->data; - __skb_push(skb, hdrm); - memcpy(skb->data, iethh, skb->mac_len); - skb_reset_mac_header(skb); - oethh = eth_hdr(skb); - - /*total metadata length */ - metalen += IFE_METAHDRLEN; - metalen = htons(metalen); - memcpy((skb->data + skboff), &metalen, IFE_METAHDRLEN); - skboff += IFE_METAHDRLEN; + ife_meta = ife_encode(skb, metalen); /* XXX: we dont have a clever way of telling encode to * not repeat some of the computations that are done by @@ -794,7 +750,7 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a, */ list_for_each_entry(e, &ife->metalist, metalist) { if (e->ops->encode) { - err = e->ops->encode(skb, (void *)(skb->data + skboff), + err = e->ops->encode(skb, (void *)(ife_meta + skboff), e); } if (err < 0) { @@ -805,18 +761,15 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a, } skboff += err; } + oethh = (struct ethhdr *)skb->data; if (!is_zero_ether_addr(ife->eth_src)) ether_addr_copy(oethh->h_source, ife->eth_src); - else - ether_addr_copy(oethh->h_source, iethh->h_source); if (!is_zero_ether_addr(ife->eth_dst)) ether_addr_copy(oethh->h_dest, ife->eth_dst); - else - ether_addr_copy(oethh->h_dest, iethh->h_dest); oethh->h_proto = htons(ife->eth_type); - if (!(at & AT_EGRESS)) + if (skb_at_tc_ingress(skb)) skb_pull(skb, skb->dev->hard_header_len); spin_unlock(&ife->tcf_lock); diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 2d9fa6e0a1b4..af49c7dca860 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -28,8 +28,6 @@ #include <linux/tc_act/tc_mirred.h> #include <net/tc_act/tc_mirred.h> -#include <linux/if_arp.h> - #define MIRRED_TAB_MASK 7 static LIST_HEAD(mirred_list); static DEFINE_SPINLOCK(mirred_list_lock); @@ -39,15 +37,15 @@ static bool tcf_mirred_is_act_redirect(int action) return action == TCA_EGRESS_REDIR || action == TCA_INGRESS_REDIR; } -static u32 tcf_mirred_act_direction(int action) +static bool tcf_mirred_act_wants_ingress(int action) { switch (action) { case TCA_EGRESS_REDIR: case TCA_EGRESS_MIRROR: - return AT_EGRESS; + return false; case TCA_INGRESS_REDIR: case TCA_INGRESS_MIRROR: - return AT_INGRESS; + return true; default: BUG(); } @@ -170,7 +168,6 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a, int retval, err = 0; int m_eaction; int mac_len; - u32 at; tcf_lastuse_update(&m->tcf_tm); bstats_cpu_update(this_cpu_ptr(m->common.cpu_bstats), skb); @@ -191,7 +188,6 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a, goto out; } - at = G_TC_AT(skb->tc_verd); skb2 = skb_clone(skb, GFP_ATOMIC); if (!skb2) goto out; @@ -200,8 +196,9 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a, * and devices expect a mac header on xmit, then mac push/pull is * needed. */ - if (at != tcf_mirred_act_direction(m_eaction) && m_mac_header_xmit) { - if (at & AT_EGRESS) { + if (skb_at_tc_ingress(skb) != tcf_mirred_act_wants_ingress(m_eaction) && + m_mac_header_xmit) { + if (!skb_at_tc_ingress(skb)) { /* caught at egress, act ingress: pull mac */ mac_len = skb_network_header(skb) - skb_mac_header(skb); skb_pull_rcsum(skb2, mac_len); @@ -212,12 +209,14 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a, } /* mirror is always swallowed */ - if (tcf_mirred_is_act_redirect(m_eaction)) - skb2->tc_verd = SET_TC_FROM(skb2->tc_verd, at); + if (tcf_mirred_is_act_redirect(m_eaction)) { + skb2->tc_redirected = 1; + skb2->tc_from_ingress = skb2->tc_at_ingress; + } skb2->skb_iif = skb->dev->ifindex; skb2->dev = dev; - if (tcf_mirred_act_direction(m_eaction) & AT_EGRESS) + if (!tcf_mirred_act_wants_ingress(m_eaction)) err = dev_queue_xmit(skb2); else err = netif_receive_skb(skb2); diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index b27c4daec88f..c1310472f620 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -22,6 +22,7 @@ #include <net/pkt_sched.h> #include <linux/tc_act/tc_pedit.h> #include <net/tc_act/tc_pedit.h> +#include <uapi/linux/tc_act/tc_pedit.h> #define PEDIT_TAB_MASK 15 @@ -30,18 +31,117 @@ static struct tc_action_ops act_pedit_ops; static const struct nla_policy pedit_policy[TCA_PEDIT_MAX + 1] = { [TCA_PEDIT_PARMS] = { .len = sizeof(struct tc_pedit) }, + [TCA_PEDIT_KEYS_EX] = { .type = NLA_NESTED }, }; +static const struct nla_policy pedit_key_ex_policy[TCA_PEDIT_KEY_EX_MAX + 1] = { + [TCA_PEDIT_KEY_EX_HTYPE] = { .type = NLA_U16 }, + [TCA_PEDIT_KEY_EX_CMD] = { .type = NLA_U16 }, +}; + +static struct tcf_pedit_key_ex *tcf_pedit_keys_ex_parse(struct nlattr *nla, + u8 n) +{ + struct tcf_pedit_key_ex *keys_ex; + struct tcf_pedit_key_ex *k; + const struct nlattr *ka; + int err = -EINVAL; + int rem; + + if (!nla || !n) + return NULL; + + keys_ex = kcalloc(n, sizeof(*k), GFP_KERNEL); + if (!keys_ex) + return ERR_PTR(-ENOMEM); + + k = keys_ex; + + nla_for_each_nested(ka, nla, rem) { + struct nlattr *tb[TCA_PEDIT_KEY_EX_MAX + 1]; + + if (!n) { + err = -EINVAL; + goto err_out; + } + n--; + + if (nla_type(ka) != TCA_PEDIT_KEY_EX) { + err = -EINVAL; + goto err_out; + } + + err = nla_parse_nested(tb, TCA_PEDIT_KEY_EX_MAX, ka, + pedit_key_ex_policy); + if (err) + goto err_out; + + if (!tb[TCA_PEDIT_KEY_EX_HTYPE] || + !tb[TCA_PEDIT_KEY_EX_CMD]) { + err = -EINVAL; + goto err_out; + } + + k->htype = nla_get_u16(tb[TCA_PEDIT_KEY_EX_HTYPE]); + k->cmd = nla_get_u16(tb[TCA_PEDIT_KEY_EX_CMD]); + + if (k->htype > TCA_PEDIT_HDR_TYPE_MAX || + k->cmd > TCA_PEDIT_CMD_MAX) { + err = -EINVAL; + goto err_out; + } + + k++; + } + + if (n) + goto err_out; + + return keys_ex; + +err_out: + kfree(keys_ex); + return ERR_PTR(err); +} + +static int tcf_pedit_key_ex_dump(struct sk_buff *skb, + struct tcf_pedit_key_ex *keys_ex, int n) +{ + struct nlattr *keys_start = nla_nest_start(skb, TCA_PEDIT_KEYS_EX); + + for (; n > 0; n--) { + struct nlattr *key_start; + + key_start = nla_nest_start(skb, TCA_PEDIT_KEY_EX); + + if (nla_put_u16(skb, TCA_PEDIT_KEY_EX_HTYPE, keys_ex->htype) || + nla_put_u16(skb, TCA_PEDIT_KEY_EX_CMD, keys_ex->cmd)) { + nlmsg_trim(skb, keys_start); + return -EINVAL; + } + + nla_nest_end(skb, key_start); + + keys_ex++; + } + + nla_nest_end(skb, keys_start); + + return 0; +} + static int tcf_pedit_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, int bind) { struct tc_action_net *tn = net_generic(net, pedit_net_id); struct nlattr *tb[TCA_PEDIT_MAX + 1]; + struct nlattr *pattr; struct tc_pedit *parm; int ret = 0, err; struct tcf_pedit *p; struct tc_pedit_key *keys = NULL; + struct tcf_pedit_key_ex *keys_ex; int ksize; if (nla == NULL) @@ -51,13 +151,21 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, if (err < 0) return err; - if (tb[TCA_PEDIT_PARMS] == NULL) + pattr = tb[TCA_PEDIT_PARMS]; + if (!pattr) + pattr = tb[TCA_PEDIT_PARMS_EX]; + if (!pattr) return -EINVAL; - parm = nla_data(tb[TCA_PEDIT_PARMS]); + + parm = nla_data(pattr); ksize = parm->nkeys * sizeof(struct tc_pedit_key); - if (nla_len(tb[TCA_PEDIT_PARMS]) < sizeof(*parm) + ksize) + if (nla_len(pattr) < sizeof(*parm) + ksize) return -EINVAL; + keys_ex = tcf_pedit_keys_ex_parse(tb[TCA_PEDIT_KEYS_EX], parm->nkeys); + if (IS_ERR(keys_ex)) + return PTR_ERR(keys_ex); + if (!tcf_hash_check(tn, parm->index, a, bind)) { if (!parm->nkeys) return -EINVAL; @@ -69,6 +177,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, keys = kmalloc(ksize, GFP_KERNEL); if (keys == NULL) { tcf_hash_cleanup(*a, est); + kfree(keys_ex); return -ENOMEM; } ret = ACT_P_CREATED; @@ -81,8 +190,10 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, p = to_pedit(*a); if (p->tcfp_nkeys && p->tcfp_nkeys != parm->nkeys) { keys = kmalloc(ksize, GFP_KERNEL); - if (keys == NULL) + if (!keys) { + kfree(keys_ex); return -ENOMEM; + } } } @@ -95,6 +206,10 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, p->tcfp_nkeys = parm->nkeys; } memcpy(p->tcfp_keys, parm->keys, ksize); + + kfree(p->tcfp_keys_ex); + p->tcfp_keys_ex = keys_ex; + spin_unlock_bh(&p->tcf_lock); if (ret == ACT_P_CREATED) tcf_hash_insert(tn, *a); @@ -106,6 +221,7 @@ static void tcf_pedit_cleanup(struct tc_action *a, int bind) struct tcf_pedit *p = to_pedit(a); struct tc_pedit_key *keys = p->tcfp_keys; kfree(keys); + kfree(p->tcfp_keys_ex); } static bool offset_valid(struct sk_buff *skb, int offset) @@ -119,38 +235,88 @@ static bool offset_valid(struct sk_buff *skb, int offset) return true; } +static int pedit_skb_hdr_offset(struct sk_buff *skb, + enum pedit_header_type htype, int *hoffset) +{ + int ret = -EINVAL; + + switch (htype) { + case TCA_PEDIT_KEY_EX_HDR_TYPE_ETH: + if (skb_mac_header_was_set(skb)) { + *hoffset = skb_mac_offset(skb); + ret = 0; + } + break; + case TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK: + case TCA_PEDIT_KEY_EX_HDR_TYPE_IP4: + case TCA_PEDIT_KEY_EX_HDR_TYPE_IP6: + *hoffset = skb_network_offset(skb); + ret = 0; + break; + case TCA_PEDIT_KEY_EX_HDR_TYPE_TCP: + case TCA_PEDIT_KEY_EX_HDR_TYPE_UDP: + if (skb_transport_header_was_set(skb)) { + *hoffset = skb_transport_offset(skb); + ret = 0; + } + break; + default: + ret = -EINVAL; + break; + }; + + return ret; +} + static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_pedit *p = to_pedit(a); int i; - unsigned int off; if (skb_unclone(skb, GFP_ATOMIC)) return p->tcf_action; - off = skb_network_offset(skb); - spin_lock(&p->tcf_lock); tcf_lastuse_update(&p->tcf_tm); if (p->tcfp_nkeys > 0) { struct tc_pedit_key *tkey = p->tcfp_keys; + struct tcf_pedit_key_ex *tkey_ex = p->tcfp_keys_ex; + enum pedit_header_type htype = TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK; + enum pedit_cmd cmd = TCA_PEDIT_KEY_EX_CMD_SET; for (i = p->tcfp_nkeys; i > 0; i--, tkey++) { u32 *ptr, _data; int offset = tkey->off; + int hoffset; + u32 val; + int rc; + + if (tkey_ex) { + htype = tkey_ex->htype; + cmd = tkey_ex->cmd; + + tkey_ex++; + } + + rc = pedit_skb_hdr_offset(skb, htype, &hoffset); + if (rc) { + pr_info("tc filter pedit bad header type specified (0x%x)\n", + htype); + goto bad; + } if (tkey->offmask) { char *d, _d; - if (!offset_valid(skb, off + tkey->at)) { + if (!offset_valid(skb, hoffset + tkey->at)) { pr_info("tc filter pedit 'at' offset %d out of bounds\n", - off + tkey->at); + hoffset + tkey->at); goto bad; } - d = skb_header_pointer(skb, off + tkey->at, 1, + d = skb_header_pointer(skb, hoffset + tkey->at, 1, &_d); if (!d) goto bad; @@ -163,19 +329,32 @@ static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a, goto bad; } - if (!offset_valid(skb, off + offset)) { + if (!offset_valid(skb, hoffset + offset)) { pr_info("tc filter pedit offset %d out of bounds\n", - offset); + hoffset + offset); goto bad; } - ptr = skb_header_pointer(skb, off + offset, 4, &_data); + ptr = skb_header_pointer(skb, hoffset + offset, 4, &_data); if (!ptr) goto bad; /* just do it, baby */ - *ptr = ((*ptr & tkey->mask) ^ tkey->val); + switch (cmd) { + case TCA_PEDIT_KEY_EX_CMD_SET: + val = tkey->val; + break; + case TCA_PEDIT_KEY_EX_CMD_ADD: + val = (*ptr + tkey->val) & ~tkey->mask; + break; + default: + pr_info("tc filter pedit bad command (%d)\n", + cmd); + goto bad; + } + + *ptr = ((*ptr & tkey->mask) ^ val); if (ptr == &_data) - skb_store_bits(skb, off + offset, ptr, 4); + skb_store_bits(skb, hoffset + offset, ptr, 4); } goto done; @@ -215,8 +394,15 @@ static int tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a, opt->refcnt = p->tcf_refcnt - ref; opt->bindcnt = p->tcf_bindcnt - bind; - if (nla_put(skb, TCA_PEDIT_PARMS, s, opt)) - goto nla_put_failure; + if (p->tcfp_keys_ex) { + tcf_pedit_key_ex_dump(skb, p->tcfp_keys_ex, p->tcfp_nkeys); + + if (nla_put(skb, TCA_PEDIT_PARMS_EX, s, opt)) + goto nla_put_failure; + } else { + if (nla_put(skb, TCA_PEDIT_PARMS, s, opt)) + goto nla_put_failure; + } tcf_tm_dump(&t, &p->tcf_tm); if (nla_put_64bit(skb, TCA_PEDIT_TM, sizeof(t), &t, TCA_PEDIT_PAD)) diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c new file mode 100644 index 000000000000..0b8217b4763f --- /dev/null +++ b/net/sched/act_sample.c @@ -0,0 +1,276 @@ +/* + * net/sched/act_sample.c - Packet sampling tc action + * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/string.h> +#include <linux/errno.h> +#include <linux/skbuff.h> +#include <linux/rtnetlink.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/gfp.h> +#include <net/net_namespace.h> +#include <net/netlink.h> +#include <net/pkt_sched.h> +#include <linux/tc_act/tc_sample.h> +#include <net/tc_act/tc_sample.h> +#include <net/psample.h> + +#include <linux/if_arp.h> + +#define SAMPLE_TAB_MASK 7 +static unsigned int sample_net_id; +static struct tc_action_ops act_sample_ops; + +static const struct nla_policy sample_policy[TCA_SAMPLE_MAX + 1] = { + [TCA_SAMPLE_PARMS] = { .len = sizeof(struct tc_sample) }, + [TCA_SAMPLE_RATE] = { .type = NLA_U32 }, + [TCA_SAMPLE_TRUNC_SIZE] = { .type = NLA_U32 }, + [TCA_SAMPLE_PSAMPLE_GROUP] = { .type = NLA_U32 }, +}; + +static int tcf_sample_init(struct net *net, struct nlattr *nla, + struct nlattr *est, struct tc_action **a, int ovr, + int bind) +{ + struct tc_action_net *tn = net_generic(net, sample_net_id); + struct nlattr *tb[TCA_SAMPLE_MAX + 1]; + struct psample_group *psample_group; + struct tc_sample *parm; + struct tcf_sample *s; + bool exists = false; + int ret; + + if (!nla) + return -EINVAL; + ret = nla_parse_nested(tb, TCA_SAMPLE_MAX, nla, sample_policy); + if (ret < 0) + return ret; + if (!tb[TCA_SAMPLE_PARMS] || !tb[TCA_SAMPLE_RATE] || + !tb[TCA_SAMPLE_PSAMPLE_GROUP]) + return -EINVAL; + + parm = nla_data(tb[TCA_SAMPLE_PARMS]); + + exists = tcf_hash_check(tn, parm->index, a, bind); + if (exists && bind) + return 0; + + if (!exists) { + ret = tcf_hash_create(tn, parm->index, est, a, + &act_sample_ops, bind, false); + if (ret) + return ret; + ret = ACT_P_CREATED; + } else { + tcf_hash_release(*a, bind); + if (!ovr) + return -EEXIST; + } + s = to_sample(*a); + + s->tcf_action = parm->action; + s->rate = nla_get_u32(tb[TCA_SAMPLE_RATE]); + s->psample_group_num = nla_get_u32(tb[TCA_SAMPLE_PSAMPLE_GROUP]); + psample_group = psample_group_get(net, s->psample_group_num); + if (!psample_group) { + if (ret == ACT_P_CREATED) + tcf_hash_release(*a, bind); + return -ENOMEM; + } + RCU_INIT_POINTER(s->psample_group, psample_group); + + if (tb[TCA_SAMPLE_TRUNC_SIZE]) { + s->truncate = true; + s->trunc_size = nla_get_u32(tb[TCA_SAMPLE_TRUNC_SIZE]); + } + + if (ret == ACT_P_CREATED) + tcf_hash_insert(tn, *a); + return ret; +} + +static void tcf_sample_cleanup_rcu(struct rcu_head *rcu) +{ + struct tcf_sample *s = container_of(rcu, struct tcf_sample, rcu); + struct psample_group *psample_group; + + psample_group = rcu_dereference_protected(s->psample_group, 1); + RCU_INIT_POINTER(s->psample_group, NULL); + psample_group_put(psample_group); +} + +static void tcf_sample_cleanup(struct tc_action *a, int bind) +{ + struct tcf_sample *s = to_sample(a); + + call_rcu(&s->rcu, tcf_sample_cleanup_rcu); +} + +static bool tcf_sample_dev_ok_push(struct net_device *dev) +{ + switch (dev->type) { + case ARPHRD_TUNNEL: + case ARPHRD_TUNNEL6: + case ARPHRD_SIT: + case ARPHRD_IPGRE: + case ARPHRD_VOID: + case ARPHRD_NONE: + return false; + default: + return true; + } +} + +static int tcf_sample_act(struct sk_buff *skb, const struct tc_action *a, + struct tcf_result *res) +{ + struct tcf_sample *s = to_sample(a); + struct psample_group *psample_group; + int retval; + int size; + int iif; + int oif; + + tcf_lastuse_update(&s->tcf_tm); + bstats_cpu_update(this_cpu_ptr(s->common.cpu_bstats), skb); + retval = READ_ONCE(s->tcf_action); + + rcu_read_lock(); + psample_group = rcu_dereference(s->psample_group); + + /* randomly sample packets according to rate */ + if (psample_group && (prandom_u32() % s->rate == 0)) { + if (!skb_at_tc_ingress(skb)) { + iif = skb->skb_iif; + oif = skb->dev->ifindex; + } else { + iif = skb->dev->ifindex; + oif = 0; + } + + /* on ingress, the mac header gets popped, so push it back */ + if (skb_at_tc_ingress(skb) && tcf_sample_dev_ok_push(skb->dev)) + skb_push(skb, skb->mac_len); + + size = s->truncate ? s->trunc_size : skb->len; + psample_sample_packet(psample_group, skb, size, iif, oif, + s->rate); + + if (skb_at_tc_ingress(skb) && tcf_sample_dev_ok_push(skb->dev)) + skb_pull(skb, skb->mac_len); + } + + rcu_read_unlock(); + return retval; +} + +static int tcf_sample_dump(struct sk_buff *skb, struct tc_action *a, + int bind, int ref) +{ + unsigned char *b = skb_tail_pointer(skb); + struct tcf_sample *s = to_sample(a); + struct tc_sample opt = { + .index = s->tcf_index, + .action = s->tcf_action, + .refcnt = s->tcf_refcnt - ref, + .bindcnt = s->tcf_bindcnt - bind, + }; + struct tcf_t t; + + if (nla_put(skb, TCA_SAMPLE_PARMS, sizeof(opt), &opt)) + goto nla_put_failure; + + tcf_tm_dump(&t, &s->tcf_tm); + if (nla_put_64bit(skb, TCA_SAMPLE_TM, sizeof(t), &t, TCA_SAMPLE_PAD)) + goto nla_put_failure; + + if (nla_put_u32(skb, TCA_SAMPLE_RATE, s->rate)) + goto nla_put_failure; + + if (s->truncate) + if (nla_put_u32(skb, TCA_SAMPLE_TRUNC_SIZE, s->trunc_size)) + goto nla_put_failure; + + if (nla_put_u32(skb, TCA_SAMPLE_PSAMPLE_GROUP, s->psample_group_num)) + goto nla_put_failure; + return skb->len; + +nla_put_failure: + nlmsg_trim(skb, b); + return -1; +} + +static int tcf_sample_walker(struct net *net, struct sk_buff *skb, + struct netlink_callback *cb, int type, + const struct tc_action_ops *ops) +{ + struct tc_action_net *tn = net_generic(net, sample_net_id); + + return tcf_generic_walker(tn, skb, cb, type, ops); +} + +static int tcf_sample_search(struct net *net, struct tc_action **a, u32 index) +{ + struct tc_action_net *tn = net_generic(net, sample_net_id); + + return tcf_hash_search(tn, a, index); +} + +static struct tc_action_ops act_sample_ops = { + .kind = "sample", + .type = TCA_ACT_SAMPLE, + .owner = THIS_MODULE, + .act = tcf_sample_act, + .dump = tcf_sample_dump, + .init = tcf_sample_init, + .cleanup = tcf_sample_cleanup, + .walk = tcf_sample_walker, + .lookup = tcf_sample_search, + .size = sizeof(struct tcf_sample), +}; + +static __net_init int sample_init_net(struct net *net) +{ + struct tc_action_net *tn = net_generic(net, sample_net_id); + + return tc_action_net_init(tn, &act_sample_ops, SAMPLE_TAB_MASK); +} + +static void __net_exit sample_exit_net(struct net *net) +{ + struct tc_action_net *tn = net_generic(net, sample_net_id); + + tc_action_net_exit(tn); +} + +static struct pernet_operations sample_net_ops = { + .init = sample_init_net, + .exit = sample_exit_net, + .id = &sample_net_id, + .size = sizeof(struct tc_action_net), +}; + +static int __init sample_init_module(void) +{ + return tcf_register_action(&act_sample_ops, &sample_net_ops); +} + +static void __exit sample_cleanup_module(void) +{ + tcf_unregister_action(&act_sample_ops, &sample_net_ops); +} + +module_init(sample_init_module); +module_exit(sample_cleanup_module); + +MODULE_AUTHOR("Yotam Gigi <yotamg@mellanox.com>"); +MODULE_DESCRIPTION("Packet sampling action"); +MODULE_LICENSE("GPL v2"); diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 1ecdf809b5fa..732f7cae459d 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -19,6 +19,7 @@ #include <linux/kernel.h> #include <linux/string.h> #include <linux/errno.h> +#include <linux/err.h> #include <linux/skbuff.h> #include <linux/init.h> #include <linux/kmod.h> @@ -38,14 +39,14 @@ static DEFINE_RWLOCK(cls_mod_lock); /* Find classifier type by string name */ -static const struct tcf_proto_ops *tcf_proto_lookup_ops(struct nlattr *kind) +static const struct tcf_proto_ops *tcf_proto_lookup_ops(const char *kind) { const struct tcf_proto_ops *t, *res = NULL; if (kind) { read_lock(&cls_mod_lock); list_for_each_entry(t, &tcf_proto_base, head) { - if (nla_strcmp(kind, t->kind) == 0) { + if (strcmp(kind, t->kind) == 0) { if (try_module_get(t->owner)) res = t; break; @@ -127,6 +128,77 @@ static inline u32 tcf_auto_prio(struct tcf_proto *tp) return first; } +static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol, + u32 prio, u32 parent, struct Qdisc *q) +{ + struct tcf_proto *tp; + int err; + + tp = kzalloc(sizeof(*tp), GFP_KERNEL); + if (!tp) + return ERR_PTR(-ENOBUFS); + + err = -ENOENT; + tp->ops = tcf_proto_lookup_ops(kind); + if (!tp->ops) { +#ifdef CONFIG_MODULES + rtnl_unlock(); + request_module("cls_%s", kind); + rtnl_lock(); + tp->ops = tcf_proto_lookup_ops(kind); + /* We dropped the RTNL semaphore in order to perform + * the module load. So, even if we succeeded in loading + * the module we have to replay the request. We indicate + * this using -EAGAIN. + */ + if (tp->ops) { + module_put(tp->ops->owner); + err = -EAGAIN; + } else { + err = -ENOENT; + } + goto errout; +#endif + } + tp->classify = tp->ops->classify; + tp->protocol = protocol; + tp->prio = prio; + tp->classid = parent; + tp->q = q; + + err = tp->ops->init(tp); + if (err) { + module_put(tp->ops->owner); + goto errout; + } + return tp; + +errout: + kfree(tp); + return ERR_PTR(err); +} + +static bool tcf_proto_destroy(struct tcf_proto *tp, bool force) +{ + if (tp->ops->destroy(tp, force)) { + module_put(tp->ops->owner); + kfree_rcu(tp, rcu); + return true; + } + return false; +} + +void tcf_destroy_chain(struct tcf_proto __rcu **fl) +{ + struct tcf_proto *tp; + + while ((tp = rtnl_dereference(*fl)) != NULL) { + RCU_INIT_POINTER(*fl, tp->next); + tcf_proto_destroy(tp, true); + } +} +EXPORT_SYMBOL(tcf_destroy_chain); + /* Add/change/delete/get a filter node */ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n) @@ -142,8 +214,8 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n) struct Qdisc *q; struct tcf_proto __rcu **back; struct tcf_proto __rcu **chain; + struct tcf_proto *next; struct tcf_proto *tp; - const struct tcf_proto_ops *tp_ops; const struct Qdisc_class_ops *cops; unsigned long cl; unsigned long fh; @@ -222,9 +294,10 @@ replay: /* And the last stroke */ chain = cops->tcf_chain(q, cl); - err = -EINVAL; - if (chain == NULL) + if (chain == NULL) { + err = -EINVAL; goto errout; + } if (n->nlmsg_type == RTM_DELTFILTER && prio == 0) { tfilter_notify_chain(net, skb, n, chain, RTM_DELTFILTER); tcf_destroy_chain(chain); @@ -239,10 +312,13 @@ replay: if (tp->prio >= prio) { if (tp->prio == prio) { if (!nprio || - (tp->protocol != protocol && protocol)) + (tp->protocol != protocol && protocol)) { + err = -EINVAL; goto errout; - } else + } + } else { tp = NULL; + } break; } } @@ -250,109 +326,69 @@ replay: if (tp == NULL) { /* Proto-tcf does not exist, create new one */ - if (tca[TCA_KIND] == NULL || !protocol) + if (tca[TCA_KIND] == NULL || !protocol) { + err = -EINVAL; goto errout; + } - err = -ENOENT; if (n->nlmsg_type != RTM_NEWTFILTER || - !(n->nlmsg_flags & NLM_F_CREATE)) + !(n->nlmsg_flags & NLM_F_CREATE)) { + err = -ENOENT; goto errout; + } + if (!nprio) + nprio = TC_H_MAJ(tcf_auto_prio(rtnl_dereference(*back))); - /* Create new proto tcf */ - - err = -ENOBUFS; - tp = kzalloc(sizeof(*tp), GFP_KERNEL); - if (tp == NULL) - goto errout; - err = -ENOENT; - tp_ops = tcf_proto_lookup_ops(tca[TCA_KIND]); - if (tp_ops == NULL) { -#ifdef CONFIG_MODULES - struct nlattr *kind = tca[TCA_KIND]; - char name[IFNAMSIZ]; - - if (kind != NULL && - nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) { - rtnl_unlock(); - request_module("cls_%s", name); - rtnl_lock(); - tp_ops = tcf_proto_lookup_ops(kind); - /* We dropped the RTNL semaphore in order to - * perform the module load. So, even if we - * succeeded in loading the module we have to - * replay the request. We indicate this using - * -EAGAIN. - */ - if (tp_ops != NULL) { - module_put(tp_ops->owner); - err = -EAGAIN; - } - } -#endif - kfree(tp); - goto errout; - } - tp->ops = tp_ops; - tp->protocol = protocol; - tp->prio = nprio ? : - TC_H_MAJ(tcf_auto_prio(rtnl_dereference(*back))); - tp->q = q; - tp->classify = tp_ops->classify; - tp->classid = parent; - - err = tp_ops->init(tp); - if (err != 0) { - module_put(tp_ops->owner); - kfree(tp); + tp = tcf_proto_create(nla_data(tca[TCA_KIND]), + protocol, nprio, parent, q); + if (IS_ERR(tp)) { + err = PTR_ERR(tp); goto errout; } - tp_created = 1; - - } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) + } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) { + err = -EINVAL; goto errout; + } fh = tp->ops->get(tp, t->tcm_handle); if (fh == 0) { if (n->nlmsg_type == RTM_DELTFILTER && t->tcm_handle == 0) { - struct tcf_proto *next = rtnl_dereference(tp->next); - + next = rtnl_dereference(tp->next); RCU_INIT_POINTER(*back, next); - tfilter_notify(net, skb, n, tp, fh, RTM_DELTFILTER, false); - tcf_destroy(tp, true); + tcf_proto_destroy(tp, true); err = 0; goto errout; } - err = -ENOENT; if (n->nlmsg_type != RTM_NEWTFILTER || - !(n->nlmsg_flags & NLM_F_CREATE)) + !(n->nlmsg_flags & NLM_F_CREATE)) { + err = -ENOENT; goto errout; + } } else { switch (n->nlmsg_type) { case RTM_NEWTFILTER: - err = -EEXIST; if (n->nlmsg_flags & NLM_F_EXCL) { if (tp_created) - tcf_destroy(tp, true); + tcf_proto_destroy(tp, true); + err = -EEXIST; goto errout; } break; case RTM_DELTFILTER: err = tp->ops->delete(tp, fh); - if (err == 0) { - struct tcf_proto *next = rtnl_dereference(tp->next); - - tfilter_notify(net, skb, n, tp, - t->tcm_handle, - RTM_DELTFILTER, false); - if (tcf_destroy(tp, false)) - RCU_INIT_POINTER(*back, next); - } + if (err) + goto errout; + next = rtnl_dereference(tp->next); + tfilter_notify(net, skb, n, tp, t->tcm_handle, + RTM_DELTFILTER, false); + if (tcf_proto_destroy(tp, false)) + RCU_INIT_POINTER(*back, next); goto errout; case RTM_GETTFILTER: err = tfilter_notify(net, skb, n, tp, fh, @@ -374,7 +410,7 @@ replay: tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER, false); } else { if (tp_created) - tcf_destroy(tp, true); + tcf_proto_destroy(tp, true); } errout: diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index d9c97018317d..80f688436dd7 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -148,6 +148,7 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog, struct net_device *dev = tp->q->dev_queue->dev; struct tc_cls_bpf_offload bpf_offload = {}; struct tc_to_netdev offload; + int err; offload.type = TC_SETUP_CLSBPF; offload.cls_bpf = &bpf_offload; @@ -159,8 +160,13 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog, bpf_offload.exts_integrated = prog->exts_integrated; bpf_offload.gen_flags = prog->gen_flags; - return dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, - tp->protocol, &offload); + err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, + tp->protocol, &offload); + + if (!err && (cmd == TC_CLSBPF_ADD || cmd == TC_CLSBPF_REPLACE)) + prog->gen_flags |= TCA_CLS_FLAGS_IN_HW; + + return err; } static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog, @@ -511,6 +517,9 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, return ret; } + if (!tc_in_hw(prog->gen_flags)) + prog->gen_flags |= TCA_CLS_FLAGS_NOT_IN_HW; + if (oldprog) { list_replace_rcu(&oldprog->link, &prog->link); tcf_unbind_filter(tp, &oldprog->res); diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 6575aba87630..3d6b9286c203 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -129,7 +129,7 @@ static u32 flow_get_mark(const struct sk_buff *skb) static u32 flow_get_nfct(const struct sk_buff *skb) { #if IS_ENABLED(CONFIG_NF_CONNTRACK) - return addr_fold(skb->nfct); + return addr_fold(skb_nfct(skb)); #else return 0; #endif diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 970db7a41684..9d0c99d2e9fb 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -40,6 +40,7 @@ struct fl_flow_key { }; struct flow_dissector_key_ports tp; struct flow_dissector_key_icmp icmp; + struct flow_dissector_key_arp arp; struct flow_dissector_key_keyid enc_key_id; union { struct flow_dissector_key_ipv4_addrs enc_ipv4; @@ -133,6 +134,14 @@ static void fl_clear_masked_range(struct fl_flow_key *key, memset(fl_key_get_start(key, mask), 0, fl_mask_range(mask)); } +static struct cls_fl_filter *fl_lookup(struct cls_fl_head *head, + struct fl_flow_key *mkey) +{ + return rhashtable_lookup_fast(&head->ht, + fl_key_get_start(mkey, &head->mask), + head->ht_params); +} + static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res) { @@ -180,9 +189,7 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp, fl_set_masked_key(&skb_mkey, &skb_key, &head->mask); - f = rhashtable_lookup_fast(&head->ht, - fl_key_get_start(&skb_mkey, &head->mask), - head->ht_params); + f = fl_lookup(head, &skb_mkey); if (f && !tc_skip_sw(f->flags)) { *res = f->res; return tcf_exts_exec(skb, &f->exts, res); @@ -222,6 +229,7 @@ static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f) return; offload.command = TC_CLSFLOWER_DESTROY; + offload.prio = tp->prio; offload.cookie = (unsigned long)f; tc->type = TC_SETUP_CLSFLOWER; @@ -253,6 +261,7 @@ static int fl_hw_replace_filter(struct tcf_proto *tp, } offload.command = TC_CLSFLOWER_REPLACE; + offload.prio = tp->prio; offload.cookie = (unsigned long)f; offload.dissector = dissector; offload.mask = mask; @@ -264,6 +273,8 @@ static int fl_hw_replace_filter(struct tcf_proto *tp, err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, tc); + if (!err) + f->flags |= TCA_CLS_FLAGS_IN_HW; if (tc_skip_sw(f->flags)) return err; @@ -280,6 +291,7 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f) return; offload.command = TC_CLSFLOWER_STATS; + offload.prio = tp->prio; offload.cookie = (unsigned long)f; offload.exts = &f->exts; @@ -401,6 +413,16 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = { [TCA_FLOWER_KEY_ICMPV6_TYPE_MASK] = { .type = NLA_U8 }, [TCA_FLOWER_KEY_ICMPV6_CODE] = { .type = NLA_U8 }, [TCA_FLOWER_KEY_ICMPV6_CODE_MASK] = { .type = NLA_U8 }, + [TCA_FLOWER_KEY_ARP_SIP] = { .type = NLA_U32 }, + [TCA_FLOWER_KEY_ARP_SIP_MASK] = { .type = NLA_U32 }, + [TCA_FLOWER_KEY_ARP_TIP] = { .type = NLA_U32 }, + [TCA_FLOWER_KEY_ARP_TIP_MASK] = { .type = NLA_U32 }, + [TCA_FLOWER_KEY_ARP_OP] = { .type = NLA_U8 }, + [TCA_FLOWER_KEY_ARP_OP_MASK] = { .type = NLA_U8 }, + [TCA_FLOWER_KEY_ARP_SHA] = { .len = ETH_ALEN }, + [TCA_FLOWER_KEY_ARP_SHA_MASK] = { .len = ETH_ALEN }, + [TCA_FLOWER_KEY_ARP_THA] = { .len = ETH_ALEN }, + [TCA_FLOWER_KEY_ARP_THA_MASK] = { .len = ETH_ALEN }, }; static void fl_set_key_val(struct nlattr **tb, @@ -568,10 +590,27 @@ static int fl_set_key(struct net *net, struct nlattr **tb, &mask->icmp.type, TCA_FLOWER_KEY_ICMPV6_TYPE_MASK, sizeof(key->icmp.type)); - fl_set_key_val(tb, &key->icmp.code, TCA_FLOWER_KEY_ICMPV4_CODE, + fl_set_key_val(tb, &key->icmp.code, TCA_FLOWER_KEY_ICMPV6_CODE, &mask->icmp.code, - TCA_FLOWER_KEY_ICMPV4_CODE_MASK, + TCA_FLOWER_KEY_ICMPV6_CODE_MASK, sizeof(key->icmp.code)); + } else if (key->basic.n_proto == htons(ETH_P_ARP) || + key->basic.n_proto == htons(ETH_P_RARP)) { + fl_set_key_val(tb, &key->arp.sip, TCA_FLOWER_KEY_ARP_SIP, + &mask->arp.sip, TCA_FLOWER_KEY_ARP_SIP_MASK, + sizeof(key->arp.sip)); + fl_set_key_val(tb, &key->arp.tip, TCA_FLOWER_KEY_ARP_TIP, + &mask->arp.tip, TCA_FLOWER_KEY_ARP_TIP_MASK, + sizeof(key->arp.tip)); + fl_set_key_val(tb, &key->arp.op, TCA_FLOWER_KEY_ARP_OP, + &mask->arp.op, TCA_FLOWER_KEY_ARP_OP_MASK, + sizeof(key->arp.op)); + fl_set_key_val(tb, key->arp.sha, TCA_FLOWER_KEY_ARP_SHA, + mask->arp.sha, TCA_FLOWER_KEY_ARP_SHA_MASK, + sizeof(key->arp.sha)); + fl_set_key_val(tb, key->arp.tha, TCA_FLOWER_KEY_ARP_THA, + mask->arp.tha, TCA_FLOWER_KEY_ARP_THA_MASK, + sizeof(key->arp.tha)); } if (tb[TCA_FLOWER_KEY_ENC_IPV4_SRC] || @@ -689,6 +728,8 @@ static void fl_init_dissector(struct cls_fl_head *head, FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt, FLOW_DISSECTOR_KEY_ICMP, icmp); FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt, + FLOW_DISSECTOR_KEY_ARP, arp); + FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt, FLOW_DISSECTOR_KEY_VLAN, vlan); FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt, FLOW_DISSECTOR_KEY_ENC_KEYID, enc_key_id); @@ -796,23 +837,31 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, struct cls_fl_head *head = rtnl_dereference(tp->root); struct cls_fl_filter *fold = (struct cls_fl_filter *) *arg; struct cls_fl_filter *fnew; - struct nlattr *tb[TCA_FLOWER_MAX + 1]; + struct nlattr **tb; struct fl_flow_mask mask = {}; int err; if (!tca[TCA_OPTIONS]) return -EINVAL; + tb = kcalloc(TCA_FLOWER_MAX + 1, sizeof(struct nlattr *), GFP_KERNEL); + if (!tb) + return -ENOBUFS; + err = nla_parse_nested(tb, TCA_FLOWER_MAX, tca[TCA_OPTIONS], fl_policy); if (err < 0) - return err; + goto errout_tb; - if (fold && handle && fold->handle != handle) - return -EINVAL; + if (fold && handle && fold->handle != handle) { + err = -EINVAL; + goto errout_tb; + } fnew = kzalloc(sizeof(*fnew), GFP_KERNEL); - if (!fnew) - return -ENOBUFS; + if (!fnew) { + err = -ENOBUFS; + goto errout_tb; + } err = tcf_exts_init(&fnew->exts, TCA_FLOWER_ACT, 0); if (err < 0) @@ -845,6 +894,11 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, goto errout; if (!tc_skip_sw(fnew->flags)) { + if (!fold && fl_lookup(head, &fnew->mkey)) { + err = -EEXIST; + goto errout; + } + err = rhashtable_insert_fast(&head->ht, &fnew->ht_node, head->ht_params); if (err) @@ -860,6 +914,9 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, goto errout; } + if (!tc_in_hw(fnew->flags)) + fnew->flags |= TCA_CLS_FLAGS_NOT_IN_HW; + if (fold) { if (!tc_skip_sw(fold->flags)) rhashtable_remove_fast(&head->ht, &fold->ht_node, @@ -878,11 +935,14 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, list_add_tail_rcu(&fnew->list, &head->filters); } + kfree(tb); return 0; errout: tcf_exts_destroy(&fnew->exts); kfree(fnew); +errout_tb: + kfree(tb); return err; } @@ -1112,6 +1172,27 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, TCA_FLOWER_KEY_ICMPV6_CODE_MASK, sizeof(key->icmp.code)))) goto nla_put_failure; + else if ((key->basic.n_proto == htons(ETH_P_ARP) || + key->basic.n_proto == htons(ETH_P_RARP)) && + (fl_dump_key_val(skb, &key->arp.sip, + TCA_FLOWER_KEY_ARP_SIP, &mask->arp.sip, + TCA_FLOWER_KEY_ARP_SIP_MASK, + sizeof(key->arp.sip)) || + fl_dump_key_val(skb, &key->arp.tip, + TCA_FLOWER_KEY_ARP_TIP, &mask->arp.tip, + TCA_FLOWER_KEY_ARP_TIP_MASK, + sizeof(key->arp.tip)) || + fl_dump_key_val(skb, &key->arp.op, + TCA_FLOWER_KEY_ARP_OP, &mask->arp.op, + TCA_FLOWER_KEY_ARP_OP_MASK, + sizeof(key->arp.op)) || + fl_dump_key_val(skb, key->arp.sha, TCA_FLOWER_KEY_ARP_SHA, + mask->arp.sha, TCA_FLOWER_KEY_ARP_SHA_MASK, + sizeof(key->arp.sha)) || + fl_dump_key_val(skb, key->arp.tha, TCA_FLOWER_KEY_ARP_THA, + mask->arp.tha, TCA_FLOWER_KEY_ARP_THA_MASK, + sizeof(key->arp.tha)))) + goto nla_put_failure; if (key->enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS && (fl_dump_key_val(skb, &key->enc_ipv4.src, @@ -1153,7 +1234,8 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, if (fl_dump_key_flags(skb, key->control.flags, mask->control.flags)) goto nla_put_failure; - nla_put_u32(skb, TCA_FLOWER_FLAGS, f->flags); + if (f->flags && nla_put_u32(skb, TCA_FLOWER_FLAGS, f->flags)) + goto nla_put_failure; if (tcf_exts_dump(skb, &f->exts)) goto nla_put_failure; diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index f935429bd5ef..224eb2c14346 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -16,16 +16,11 @@ #include <net/sch_generic.h> #include <net/pkt_cls.h> -struct cls_mall_filter { +struct cls_mall_head { struct tcf_exts exts; struct tcf_result res; u32 handle; - struct rcu_head rcu; u32 flags; -}; - -struct cls_mall_head { - struct cls_mall_filter *filter; struct rcu_head rcu; }; @@ -33,56 +28,52 @@ static int mall_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res) { struct cls_mall_head *head = rcu_dereference_bh(tp->root); - struct cls_mall_filter *f = head->filter; - if (tc_skip_sw(f->flags)) + if (tc_skip_sw(head->flags)) return -1; - return tcf_exts_exec(skb, &f->exts, res); + return tcf_exts_exec(skb, &head->exts, res); } static int mall_init(struct tcf_proto *tp) { - struct cls_mall_head *head; - - head = kzalloc(sizeof(*head), GFP_KERNEL); - if (!head) - return -ENOBUFS; - - rcu_assign_pointer(tp->root, head); - return 0; } -static void mall_destroy_filter(struct rcu_head *head) +static void mall_destroy_rcu(struct rcu_head *rcu) { - struct cls_mall_filter *f = container_of(head, struct cls_mall_filter, rcu); + struct cls_mall_head *head = container_of(rcu, struct cls_mall_head, + rcu); - tcf_exts_destroy(&f->exts); - - kfree(f); + tcf_exts_destroy(&head->exts); + kfree(head); } static int mall_replace_hw_filter(struct tcf_proto *tp, - struct cls_mall_filter *f, + struct cls_mall_head *head, unsigned long cookie) { struct net_device *dev = tp->q->dev_queue->dev; struct tc_to_netdev offload; struct tc_cls_matchall_offload mall_offload = {0}; + int err; offload.type = TC_SETUP_MATCHALL; offload.cls_mall = &mall_offload; offload.cls_mall->command = TC_CLSMATCHALL_REPLACE; - offload.cls_mall->exts = &f->exts; + offload.cls_mall->exts = &head->exts; offload.cls_mall->cookie = cookie; - return dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, - &offload); + err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, + &offload); + if (!err) + head->flags |= TCA_CLS_FLAGS_IN_HW; + + return err; } static void mall_destroy_hw_filter(struct tcf_proto *tp, - struct cls_mall_filter *f, + struct cls_mall_head *head, unsigned long cookie) { struct net_device *dev = tp->q->dev_queue->dev; @@ -103,29 +94,20 @@ static bool mall_destroy(struct tcf_proto *tp, bool force) { struct cls_mall_head *head = rtnl_dereference(tp->root); struct net_device *dev = tp->q->dev_queue->dev; - struct cls_mall_filter *f = head->filter; - if (!force && f) - return false; + if (!head) + return true; - if (f) { - if (tc_should_offload(dev, tp, f->flags)) - mall_destroy_hw_filter(tp, f, (unsigned long) f); + if (tc_should_offload(dev, tp, head->flags)) + mall_destroy_hw_filter(tp, head, (unsigned long) head); - call_rcu(&f->rcu, mall_destroy_filter); - } - kfree_rcu(head, rcu); + call_rcu(&head->rcu, mall_destroy_rcu); return true; } static unsigned long mall_get(struct tcf_proto *tp, u32 handle) { - struct cls_mall_head *head = rtnl_dereference(tp->root); - struct cls_mall_filter *f = head->filter; - - if (f && f->handle == handle) - return (unsigned long) f; - return 0; + return 0UL; } static const struct nla_policy mall_policy[TCA_MATCHALL_MAX + 1] = { @@ -134,26 +116,31 @@ static const struct nla_policy mall_policy[TCA_MATCHALL_MAX + 1] = { }; static int mall_set_parms(struct net *net, struct tcf_proto *tp, - struct cls_mall_filter *f, + struct cls_mall_head *head, unsigned long base, struct nlattr **tb, struct nlattr *est, bool ovr) { struct tcf_exts e; int err; - tcf_exts_init(&e, TCA_MATCHALL_ACT, 0); + err = tcf_exts_init(&e, TCA_MATCHALL_ACT, 0); + if (err) + return err; err = tcf_exts_validate(net, tp, tb, est, &e, ovr); if (err < 0) - return err; + goto errout; if (tb[TCA_MATCHALL_CLASSID]) { - f->res.classid = nla_get_u32(tb[TCA_MATCHALL_CLASSID]); - tcf_bind_filter(tp, &f->res, base); + head->res.classid = nla_get_u32(tb[TCA_MATCHALL_CLASSID]); + tcf_bind_filter(tp, &head->res, base); } - tcf_exts_change(tp, &f->exts, &e); + tcf_exts_change(tp, &head->exts, &e); return 0; +errout: + tcf_exts_destroy(&e); + return err; } static int mall_change(struct net *net, struct sk_buff *in_skb, @@ -162,21 +149,17 @@ static int mall_change(struct net *net, struct sk_buff *in_skb, unsigned long *arg, bool ovr) { struct cls_mall_head *head = rtnl_dereference(tp->root); - struct cls_mall_filter *fold = (struct cls_mall_filter *) *arg; struct net_device *dev = tp->q->dev_queue->dev; - struct cls_mall_filter *f; struct nlattr *tb[TCA_MATCHALL_MAX + 1]; + struct cls_mall_head *new; u32 flags = 0; int err; if (!tca[TCA_OPTIONS]) return -EINVAL; - if (head->filter) - return -EBUSY; - - if (fold) - return -EINVAL; + if (head) + return -EEXIST; err = nla_parse_nested(tb, TCA_MATCHALL_MAX, tca[TCA_OPTIONS], mall_policy); @@ -189,64 +172,62 @@ static int mall_change(struct net *net, struct sk_buff *in_skb, return -EINVAL; } - f = kzalloc(sizeof(*f), GFP_KERNEL); - if (!f) + new = kzalloc(sizeof(*new), GFP_KERNEL); + if (!new) return -ENOBUFS; - tcf_exts_init(&f->exts, TCA_MATCHALL_ACT, 0); + err = tcf_exts_init(&new->exts, TCA_MATCHALL_ACT, 0); + if (err) + goto err_exts_init; if (!handle) handle = 1; - f->handle = handle; - f->flags = flags; + new->handle = handle; + new->flags = flags; - err = mall_set_parms(net, tp, f, base, tb, tca[TCA_RATE], ovr); + err = mall_set_parms(net, tp, new, base, tb, tca[TCA_RATE], ovr); if (err) - goto errout; + goto err_set_parms; if (tc_should_offload(dev, tp, flags)) { - err = mall_replace_hw_filter(tp, f, (unsigned long) f); + err = mall_replace_hw_filter(tp, new, (unsigned long) new); if (err) { if (tc_skip_sw(flags)) - goto errout; + goto err_replace_hw_filter; else err = 0; } } - *arg = (unsigned long) f; - rcu_assign_pointer(head->filter, f); + if (!tc_in_hw(new->flags)) + new->flags |= TCA_CLS_FLAGS_NOT_IN_HW; + *arg = (unsigned long) head; + rcu_assign_pointer(tp->root, new); + if (head) + call_rcu(&head->rcu, mall_destroy_rcu); return 0; -errout: - kfree(f); +err_replace_hw_filter: +err_set_parms: + tcf_exts_destroy(&new->exts); +err_exts_init: + kfree(new); return err; } static int mall_delete(struct tcf_proto *tp, unsigned long arg) { - struct cls_mall_head *head = rtnl_dereference(tp->root); - struct cls_mall_filter *f = (struct cls_mall_filter *) arg; - struct net_device *dev = tp->q->dev_queue->dev; - - if (tc_should_offload(dev, tp, f->flags)) - mall_destroy_hw_filter(tp, f, (unsigned long) f); - - RCU_INIT_POINTER(head->filter, NULL); - tcf_unbind_filter(tp, &f->res); - call_rcu(&f->rcu, mall_destroy_filter); - return 0; + return -EOPNOTSUPP; } static void mall_walk(struct tcf_proto *tp, struct tcf_walker *arg) { struct cls_mall_head *head = rtnl_dereference(tp->root); - struct cls_mall_filter *f = head->filter; if (arg->count < arg->skip) goto skip; - if (arg->fn(tp, (unsigned long) f, arg) < 0) + if (arg->fn(tp, (unsigned long) head, arg) < 0) arg->stop = 1; skip: arg->count++; @@ -255,28 +236,31 @@ skip: static int mall_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, struct sk_buff *skb, struct tcmsg *t) { - struct cls_mall_filter *f = (struct cls_mall_filter *) fh; + struct cls_mall_head *head = (struct cls_mall_head *) fh; struct nlattr *nest; - if (!f) + if (!head) return skb->len; - t->tcm_handle = f->handle; + t->tcm_handle = head->handle; nest = nla_nest_start(skb, TCA_OPTIONS); if (!nest) goto nla_put_failure; - if (f->res.classid && - nla_put_u32(skb, TCA_MATCHALL_CLASSID, f->res.classid)) + if (head->res.classid && + nla_put_u32(skb, TCA_MATCHALL_CLASSID, head->res.classid)) + goto nla_put_failure; + + if (head->flags && nla_put_u32(skb, TCA_MATCHALL_FLAGS, head->flags)) goto nla_put_failure; - if (tcf_exts_dump(skb, &f->exts)) + if (tcf_exts_dump(skb, &head->exts)) goto nla_put_failure; nla_nest_end(skb, nest); - if (tcf_exts_dump_stats(skb, &f->exts) < 0) + if (tcf_exts_dump_stats(skb, &head->exts) < 0) goto nla_put_failure; return skb->len; diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index ae83c3aec308..4dbe0c680fe6 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -334,7 +334,6 @@ static int u32_init(struct tcf_proto *tp) if (root_ht == NULL) return -ENOBUFS; - root_ht->divisor = 0; root_ht->refcnt++; root_ht->handle = tp_c ? gen_new_htid(tp_c) : 0x80000000; root_ht->prio = tp->prio; @@ -524,6 +523,10 @@ static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n, err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &offload); + + if (!err) + n->flags |= TCA_CLS_FLAGS_IN_HW; + if (tc_skip_sw(flags)) return err; @@ -896,6 +899,9 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, return err; } + if (!tc_in_hw(new->flags)) + new->flags |= TCA_CLS_FLAGS_NOT_IN_HW; + u32_replace_knode(tp, tp_c, new); tcf_unbind_filter(tp, &n->res); call_rcu(&n->rcu, u32_delete_key_rcu); @@ -1015,6 +1021,9 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, if (err) goto errhw; + if (!tc_in_hw(n->flags)) + n->flags |= TCA_CLS_FLAGS_NOT_IN_HW; + ins = &ht->ht[TC_U32_HASH(handle)]; for (pins = rtnl_dereference(*ins); pins; ins = &pins->next, pins = rtnl_dereference(*ins)) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index d7b93429f0cc..bcf49cd22786 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -440,7 +440,6 @@ void qdisc_put_rtab(struct qdisc_rate_table *tab) EXPORT_SYMBOL(qdisc_put_rtab); static LIST_HEAD(qdisc_stab_list); -static DEFINE_SPINLOCK(qdisc_stab_lock); static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = { [TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) }, @@ -474,20 +473,15 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt) if (tsize != s->tsize || (!tab && tsize > 0)) return ERR_PTR(-EINVAL); - spin_lock(&qdisc_stab_lock); - list_for_each_entry(stab, &qdisc_stab_list, list) { if (memcmp(&stab->szopts, s, sizeof(*s))) continue; if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16))) continue; stab->refcnt++; - spin_unlock(&qdisc_stab_lock); return stab; } - spin_unlock(&qdisc_stab_lock); - stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL); if (!stab) return ERR_PTR(-ENOMEM); @@ -497,9 +491,7 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt) if (tsize > 0) memcpy(stab->data, tab, tsize * sizeof(u16)); - spin_lock(&qdisc_stab_lock); list_add_tail(&stab->list, &qdisc_stab_list); - spin_unlock(&qdisc_stab_lock); return stab; } @@ -514,14 +506,10 @@ void qdisc_put_stab(struct qdisc_size_table *tab) if (!tab) return; - spin_lock(&qdisc_stab_lock); - if (--tab->refcnt == 0) { list_del(&tab->list); call_rcu_bh(&tab->rcu, stab_kfree_rcu); } - - spin_unlock(&qdisc_stab_lock); } EXPORT_SYMBOL(qdisc_put_stab); @@ -1019,6 +1007,8 @@ static struct Qdisc *qdisc_create(struct net_device *dev, return sch; } + /* ops->init() failed, we call ->destroy() like qdisc_create_dflt() */ + ops->destroy(sch); err_out3: dev_put(dev); kfree((char *) sch - sch->padded); @@ -1861,6 +1851,7 @@ int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp, { __be16 protocol = tc_skb_protocol(skb); #ifdef CONFIG_NET_CLS_ACT + const int max_reclassify_loop = 4; const struct tcf_proto *old_tp = tp; int limit = 0; @@ -1885,7 +1876,7 @@ reclassify: return TC_ACT_UNSPEC; /* signal: continue lookup */ #ifdef CONFIG_NET_CLS_ACT reset: - if (unlikely(limit++ >= MAX_REC_LOOP)) { + if (unlikely(limit++ >= max_reclassify_loop)) { net_notice_ratelimited("%s: reclassify loop, rule prio %u, protocol %02x\n", tp->q->ops->id, tp->prio & 0xffff, ntohs(tp->protocol)); @@ -1899,28 +1890,6 @@ reset: } EXPORT_SYMBOL(tc_classify); -bool tcf_destroy(struct tcf_proto *tp, bool force) -{ - if (tp->ops->destroy(tp, force)) { - module_put(tp->ops->owner); - kfree_rcu(tp, rcu); - return true; - } - - return false; -} - -void tcf_destroy_chain(struct tcf_proto __rcu **fl) -{ - struct tcf_proto *tp; - - while ((tp = rtnl_dereference(*fl)) != NULL) { - RCU_INIT_POINTER(*fl, tp->next); - tcf_destroy(tp, true); - } -} -EXPORT_SYMBOL(tcf_destroy_chain); - #ifdef CONFIG_PROC_FS static int psched_show(struct seq_file *seq, void *v) { diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 481e4f12aeb4..2209c2ddacbf 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -15,6 +15,7 @@ #include <linux/file.h> /* for fput */ #include <net/netlink.h> #include <net/pkt_sched.h> +#include <net/pkt_cls.h> /* * The ATM queuing discipline provides a framework for invoking classifiers diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index f1207582cbf3..d6ca18dc04c3 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -19,6 +19,7 @@ #include <linux/skbuff.h> #include <net/netlink.h> #include <net/pkt_sched.h> +#include <net/pkt_cls.h> /* Class-Based Queueing (CBQ) algorithm. diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index 3b6d5bd69101..3b86a97bc67c 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c @@ -16,6 +16,7 @@ #include <linux/skbuff.h> #include <linux/vmalloc.h> #include <net/pkt_sched.h> +#include <net/pkt_cls.h> #include <net/inet_ecn.h> #include <net/red.h> #include <net/flow_dissector.h> diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 1308bbf460f7..802ac7c2e5e8 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -13,6 +13,7 @@ #include <linux/rtnetlink.h> #include <linux/bitops.h> #include <net/pkt_sched.h> +#include <net/pkt_cls.h> #include <net/dsfield.h> #include <net/inet_ecn.h> #include <asm/byteorder.h> diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index a5ea0e9b6be4..9f3a884d1590 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -23,6 +23,7 @@ #include <linux/vmalloc.h> #include <net/netlink.h> #include <net/pkt_sched.h> +#include <net/pkt_cls.h> #include <net/codel.h> #include <net/codel_impl.h> #include <net/codel_qdisc.h> @@ -57,7 +58,6 @@ struct fq_codel_sched_data { struct fq_codel_flow *flows; /* Flows table [flows_cnt] */ u32 *backlogs; /* backlog table [flows_cnt] */ u32 flows_cnt; /* number of flows */ - u32 perturbation; /* hash perturbation */ u32 quantum; /* psched_mtu(qdisc_dev(sch)); */ u32 drop_batch_size; u32 memory_limit; @@ -75,9 +75,7 @@ struct fq_codel_sched_data { static unsigned int fq_codel_hash(const struct fq_codel_sched_data *q, struct sk_buff *skb) { - u32 hash = skb_get_hash_perturb(skb, q->perturbation); - - return reciprocal_scale(hash, q->flows_cnt); + return reciprocal_scale(skb_get_hash(skb), q->flows_cnt); } static unsigned int fq_codel_classify(struct sk_buff *skb, struct Qdisc *sch, @@ -482,7 +480,6 @@ static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt) q->memory_limit = 32 << 20; /* 32 MBytes */ q->drop_batch_size = 64; q->quantum = psched_mtu(qdisc_dev(sch)); - q->perturbation = prandom_u32(); INIT_LIST_HEAD(&q->new_flows); INIT_LIST_HEAD(&q->old_flows); codel_params_init(&q->cparams); diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 6eb9c8e88519..b052b27a984e 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -247,7 +247,7 @@ static inline int qdisc_restart(struct Qdisc *q, int *packets) void __qdisc_run(struct Qdisc *q) { - int quota = weight_p; + int quota = dev_tx_weight; int packets; while (qdisc_restart(q, &packets)) { diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c index e3d0458af17b..2fae8b5f1b80 100644 --- a/net/sched/sch_hhf.c +++ b/net/sched/sch_hhf.c @@ -627,7 +627,9 @@ static int hhf_init(struct Qdisc *sch, struct nlattr *opt) q->hhf_arrays[i] = hhf_zalloc(HHF_ARRAYS_LEN * sizeof(u32)); if (!q->hhf_arrays[i]) { - hhf_destroy(sch); + /* Note: hhf_destroy() will be called + * by our caller. + */ return -ENOMEM; } } @@ -638,7 +640,9 @@ static int hhf_init(struct Qdisc *sch, struct nlattr *opt) q->hhf_valid_bits[i] = hhf_zalloc(HHF_ARRAYS_LEN / BITS_PER_BYTE); if (!q->hhf_valid_bits[i]) { - hhf_destroy(sch); + /* Note: hhf_destroy() will be called + * by our caller. + */ return -ENOMEM; } } diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 760f39e7caee..4cd5fb134bc9 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -40,6 +40,7 @@ #include <net/netlink.h> #include <net/sch_generic.h> #include <net/pkt_sched.h> +#include <net/pkt_cls.h> /* HTB algorithm. Author: devik@cdi.cz diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index 8fe6999b642a..3bab5f66c392 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -16,6 +16,7 @@ #include <net/netlink.h> #include <net/pkt_sched.h> +#include <net/pkt_cls.h> static struct Qdisc *ingress_leaf(struct Qdisc *sch, unsigned long arg) { diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c index 2bc8d7f8df16..20b7f1646f69 100644 --- a/net/sched/sch_mq.c +++ b/net/sched/sch_mq.c @@ -52,7 +52,7 @@ static int mq_init(struct Qdisc *sch, struct nlattr *opt) /* pre-allocate qdiscs, attachment can't fail */ priv->qdiscs = kcalloc(dev->num_tx_queues, sizeof(priv->qdiscs[0]), GFP_KERNEL); - if (priv->qdiscs == NULL) + if (!priv->qdiscs) return -ENOMEM; for (ntx = 0; ntx < dev->num_tx_queues; ntx++) { @@ -60,18 +60,14 @@ static int mq_init(struct Qdisc *sch, struct nlattr *opt) qdisc = qdisc_create_dflt(dev_queue, get_default_qdisc_ops(dev, ntx), TC_H_MAKE(TC_H_MAJ(sch->handle), TC_H_MIN(ntx + 1))); - if (qdisc == NULL) - goto err; + if (!qdisc) + return -ENOMEM; priv->qdiscs[ntx] = qdisc; qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; } sch->flags |= TCQ_F_MQROOT; return 0; - -err: - mq_destroy(sch); - return -ENOMEM; } static void mq_attach(struct Qdisc *sch) diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index b5c502c78143..922683418e53 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -118,10 +118,8 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt) /* pre-allocate qdisc, attachment can't fail */ priv->qdiscs = kcalloc(dev->num_tx_queues, sizeof(priv->qdiscs[0]), GFP_KERNEL); - if (priv->qdiscs == NULL) { - err = -ENOMEM; - goto err; - } + if (!priv->qdiscs) + return -ENOMEM; for (i = 0; i < dev->num_tx_queues; i++) { dev_queue = netdev_get_tx_queue(dev, i); @@ -129,10 +127,9 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt) get_default_qdisc_ops(dev, i), TC_H_MAKE(TC_H_MAJ(sch->handle), TC_H_MIN(i + 1))); - if (qdisc == NULL) { - err = -ENOMEM; - goto err; - } + if (!qdisc) + return -ENOMEM; + priv->qdiscs[i] = qdisc; qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; } @@ -148,7 +145,7 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt) priv->hw_owned = 1; err = dev->netdev_ops->ndo_setup_tc(dev, sch->handle, 0, &tc); if (err) - goto err; + return err; } else { netdev_set_num_tc(dev, qopt->num_tc); for (i = 0; i < qopt->num_tc; i++) @@ -162,10 +159,6 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt) sch->flags |= TCQ_F_MQROOT; return 0; - -err: - mqprio_destroy(sch); - return err; } static void mqprio_attach(struct Qdisc *sch) diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index 9ffbb025b37e..e7839a0d0eaa 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c @@ -25,7 +25,7 @@ #include <linux/skbuff.h> #include <net/netlink.h> #include <net/pkt_sched.h> - +#include <net/pkt_cls.h> struct multiq_sched_data { u16 bands; diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index bcfadfdea8e0..c8bb62a1e744 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -626,7 +626,7 @@ deliver: * If it's at ingress let's pretend the delay is * from the network (tstamp will be updated). */ - if (G_TC_FROM(skb->tc_verd) & AT_INGRESS) + if (skb->tc_redirected && skb->tc_from_ingress) skb->tstamp = 0; #endif diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 8f575899adfa..d4d7db267b6e 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -20,7 +20,7 @@ #include <linux/skbuff.h> #include <net/netlink.h> #include <net/pkt_sched.h> - +#include <net/pkt_cls.h> struct prio_sched_data { int bands; diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index 20a350bd1b1d..fe6963d21519 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -25,6 +25,7 @@ #include <linux/jhash.h> #include <net/ip.h> #include <net/pkt_sched.h> +#include <net/pkt_cls.h> #include <net/inet_ecn.h> /* diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 7f195ed4d568..42e8c8615e65 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -23,6 +23,7 @@ #include <linux/vmalloc.h> #include <net/netlink.h> #include <net/pkt_sched.h> +#include <net/pkt_cls.h> #include <net/red.h> @@ -742,9 +743,10 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt) q->ht = sfq_alloc(sizeof(q->ht[0]) * q->divisor); q->slots = sfq_alloc(sizeof(q->slots[0]) * q->maxflows); if (!q->ht || !q->slots) { - sfq_destroy(sch); + /* Note: sfq_destroy() will be called by our caller */ return -ENOMEM; } + for (i = 0; i < q->divisor; i++) q->ht[i] = SFQ_EMPTY_SLOT; diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index b0196366d58d..9fe6b427afed 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -401,8 +401,8 @@ static int teql_master_close(struct net_device *dev) return 0; } -static struct rtnl_link_stats64 *teql_master_stats64(struct net_device *dev, - struct rtnl_link_stats64 *stats) +static void teql_master_stats64(struct net_device *dev, + struct rtnl_link_stats64 *stats) { struct teql_master *m = netdev_priv(dev); @@ -410,7 +410,6 @@ static struct rtnl_link_stats64 *teql_master_stats64(struct net_device *dev, stats->tx_bytes = m->tx_bytes; stats->tx_errors = m->tx_errors; stats->tx_dropped = m->tx_dropped; - return stats; } static int teql_master_mtu(struct net_device *dev, int new_mtu) |