summaryrefslogtreecommitdiffstats
path: root/net/sched/act_bpf.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-02-11 05:01:30 +0100
committerLinus Torvalds <torvalds@linux-foundation.org>2015-02-11 05:01:30 +0100
commitc5ce28df0e7c01a1de23c36ebdefcd803f2b6cbb (patch)
tree9830baf38832769e1cf621708889111bbe3c93df /net/sched/act_bpf.c
parentMerge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jik... (diff)
parentcrypto: fix af_alg_make_sg() conversion to iov_iter (diff)
downloadlinux-c5ce28df0e7c01a1de23c36ebdefcd803f2b6cbb.tar.xz
linux-c5ce28df0e7c01a1de23c36ebdefcd803f2b6cbb.zip
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: 1) More iov_iter conversion work from Al Viro. [ The "crypto: switch af_alg_make_sg() to iov_iter" commit was wrong, and this pull actually adds an extra commit on top of the branch I'm pulling to fix that up, so that the pre-merge state is ok. - Linus ] 2) Various optimizations to the ipv4 forwarding information base trie lookup implementation. From Alexander Duyck. 3) Remove sock_iocb altogether, from CHristoph Hellwig. 4) Allow congestion control algorithm selection via routing metrics. From Daniel Borkmann. 5) Make ipv4 uncached route list per-cpu, from Eric Dumazet. 6) Handle rfs hash collisions more gracefully, also from Eric Dumazet. 7) Add xmit_more support to r8169, e1000, and e1000e drivers. From Florian Westphal. 8) Transparent Ethernet Bridging support for GRO, from Jesse Gross. 9) Add BPF packet actions to packet scheduler, from Jiri Pirko. 10) Add support for uniqu flow IDs to openvswitch, from Joe Stringer. 11) New NetCP ethernet driver, from Muralidharan Karicheri and Wingman Kwok. 12) More sanely handle out-of-window dupacks, which can result in serious ACK storms. From Neal Cardwell. 13) Various rhashtable bug fixes and enhancements, from Herbert Xu, Patrick McHardy, and Thomas Graf. 14) Support xmit_more in be2net, from Sathya Perla. 15) Group Policy extensions for vxlan, from Thomas Graf. 16) Remove Checksum Offload support for vxlan, from Tom Herbert. 17) Like ipv4, support lockless transmit over ipv6 UDP sockets. From Vlad Yasevich. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1494+1 commits) crypto: fix af_alg_make_sg() conversion to iov_iter ipv4: Namespecify TCP PMTU mechanism i40e: Fix for stats init function call in Rx setup tcp: don't include Fast Open option in SYN-ACK on pure SYN-data openvswitch: Only set TUNNEL_VXLAN_OPT if VXLAN-GBP metadata is set ipv6: Make __ipv6_select_ident static ipv6: Fix fragment id assignment on LE arches. bridge: Fix inability to add non-vlan fdb entry net: Mellanox: Delete unnecessary checks before the function call "vunmap" cxgb4: Add support in cxgb4 to get expansion rom version via ethtool ethtool: rename reserved1 memeber in ethtool_drvinfo for expansion ROM version net: dsa: Remove redundant phy_attach() IB/mlx4: Reset flow support for IB kernel ULPs IB/mlx4: Always use the correct port for mirrored multicast attachments net/bonding: Fix potential bad memory access during bonding events tipc: remove tipc_snprintf tipc: nl compat add noop and remove legacy nl framework tipc: convert legacy nl stats show to nl compat tipc: convert legacy nl net id get to nl compat tipc: convert legacy nl net id set to nl compat ...
Diffstat (limited to 'net/sched/act_bpf.c')
-rw-r--r--net/sched/act_bpf.c208
1 files changed, 208 insertions, 0 deletions
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
new file mode 100644
index 000000000000..82c5d7fc1988
--- /dev/null
+++ b/net/sched/act_bpf.c
@@ -0,0 +1,208 @@
+/*
+ * Copyright (c) 2015 Jiri Pirko <jiri@resnulli.us>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <linux/filter.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+
+#include <linux/tc_act/tc_bpf.h>
+#include <net/tc_act/tc_bpf.h>
+
+#define BPF_TAB_MASK 15
+
+static int tcf_bpf(struct sk_buff *skb, const struct tc_action *a,
+ struct tcf_result *res)
+{
+ struct tcf_bpf *b = a->priv;
+ int action;
+ int filter_res;
+
+ spin_lock(&b->tcf_lock);
+ b->tcf_tm.lastuse = jiffies;
+ bstats_update(&b->tcf_bstats, skb);
+ action = b->tcf_action;
+
+ filter_res = BPF_PROG_RUN(b->filter, skb);
+ if (filter_res == 0) {
+ /* Return code 0 from the BPF program
+ * is being interpreted as a drop here.
+ */
+ action = TC_ACT_SHOT;
+ b->tcf_qstats.drops++;
+ }
+
+ spin_unlock(&b->tcf_lock);
+ return action;
+}
+
+static int tcf_bpf_dump(struct sk_buff *skb, struct tc_action *a,
+ int bind, int ref)
+{
+ unsigned char *tp = skb_tail_pointer(skb);
+ struct tcf_bpf *b = a->priv;
+ struct tc_act_bpf opt = {
+ .index = b->tcf_index,
+ .refcnt = b->tcf_refcnt - ref,
+ .bindcnt = b->tcf_bindcnt - bind,
+ .action = b->tcf_action,
+ };
+ struct tcf_t t;
+ struct nlattr *nla;
+
+ if (nla_put(skb, TCA_ACT_BPF_PARMS, sizeof(opt), &opt))
+ goto nla_put_failure;
+
+ if (nla_put_u16(skb, TCA_ACT_BPF_OPS_LEN, b->bpf_num_ops))
+ goto nla_put_failure;
+
+ nla = nla_reserve(skb, TCA_ACT_BPF_OPS, b->bpf_num_ops *
+ sizeof(struct sock_filter));
+ if (!nla)
+ goto nla_put_failure;
+
+ memcpy(nla_data(nla), b->bpf_ops, nla_len(nla));
+
+ t.install = jiffies_to_clock_t(jiffies - b->tcf_tm.install);
+ t.lastuse = jiffies_to_clock_t(jiffies - b->tcf_tm.lastuse);
+ t.expires = jiffies_to_clock_t(b->tcf_tm.expires);
+ if (nla_put(skb, TCA_ACT_BPF_TM, sizeof(t), &t))
+ goto nla_put_failure;
+ return skb->len;
+
+nla_put_failure:
+ nlmsg_trim(skb, tp);
+ return -1;
+}
+
+static const struct nla_policy act_bpf_policy[TCA_ACT_BPF_MAX + 1] = {
+ [TCA_ACT_BPF_PARMS] = { .len = sizeof(struct tc_act_bpf) },
+ [TCA_ACT_BPF_OPS_LEN] = { .type = NLA_U16 },
+ [TCA_ACT_BPF_OPS] = { .type = NLA_BINARY,
+ .len = sizeof(struct sock_filter) * BPF_MAXINSNS },
+};
+
+static int tcf_bpf_init(struct net *net, struct nlattr *nla,
+ struct nlattr *est, struct tc_action *a,
+ int ovr, int bind)
+{
+ struct nlattr *tb[TCA_ACT_BPF_MAX + 1];
+ struct tc_act_bpf *parm;
+ struct tcf_bpf *b;
+ u16 bpf_size, bpf_num_ops;
+ struct sock_filter *bpf_ops;
+ struct sock_fprog_kern tmp;
+ struct bpf_prog *fp;
+ int ret;
+
+ if (!nla)
+ return -EINVAL;
+
+ ret = nla_parse_nested(tb, TCA_ACT_BPF_MAX, nla, act_bpf_policy);
+ if (ret < 0)
+ return ret;
+
+ if (!tb[TCA_ACT_BPF_PARMS] ||
+ !tb[TCA_ACT_BPF_OPS_LEN] || !tb[TCA_ACT_BPF_OPS])
+ return -EINVAL;
+ parm = nla_data(tb[TCA_ACT_BPF_PARMS]);
+
+ bpf_num_ops = nla_get_u16(tb[TCA_ACT_BPF_OPS_LEN]);
+ if (bpf_num_ops > BPF_MAXINSNS || bpf_num_ops == 0)
+ return -EINVAL;
+
+ bpf_size = bpf_num_ops * sizeof(*bpf_ops);
+ if (bpf_size != nla_len(tb[TCA_ACT_BPF_OPS]))
+ return -EINVAL;
+
+ bpf_ops = kzalloc(bpf_size, GFP_KERNEL);
+ if (!bpf_ops)
+ return -ENOMEM;
+
+ memcpy(bpf_ops, nla_data(tb[TCA_ACT_BPF_OPS]), bpf_size);
+
+ tmp.len = bpf_num_ops;
+ tmp.filter = bpf_ops;
+
+ ret = bpf_prog_create(&fp, &tmp);
+ if (ret)
+ goto free_bpf_ops;
+
+ if (!tcf_hash_check(parm->index, a, bind)) {
+ ret = tcf_hash_create(parm->index, est, a, sizeof(*b), bind);
+ if (ret)
+ goto destroy_fp;
+
+ ret = ACT_P_CREATED;
+ } else {
+ if (bind)
+ goto destroy_fp;
+ tcf_hash_release(a, bind);
+ if (!ovr) {
+ ret = -EEXIST;
+ goto destroy_fp;
+ }
+ }
+
+ b = to_bpf(a);
+ spin_lock_bh(&b->tcf_lock);
+ b->tcf_action = parm->action;
+ b->bpf_num_ops = bpf_num_ops;
+ b->bpf_ops = bpf_ops;
+ b->filter = fp;
+ spin_unlock_bh(&b->tcf_lock);
+
+ if (ret == ACT_P_CREATED)
+ tcf_hash_insert(a);
+ return ret;
+
+destroy_fp:
+ bpf_prog_destroy(fp);
+free_bpf_ops:
+ kfree(bpf_ops);
+ return ret;
+}
+
+static void tcf_bpf_cleanup(struct tc_action *a, int bind)
+{
+ struct tcf_bpf *b = a->priv;
+
+ bpf_prog_destroy(b->filter);
+}
+
+static struct tc_action_ops act_bpf_ops = {
+ .kind = "bpf",
+ .type = TCA_ACT_BPF,
+ .owner = THIS_MODULE,
+ .act = tcf_bpf,
+ .dump = tcf_bpf_dump,
+ .cleanup = tcf_bpf_cleanup,
+ .init = tcf_bpf_init,
+};
+
+static int __init bpf_init_module(void)
+{
+ return tcf_register_action(&act_bpf_ops, BPF_TAB_MASK);
+}
+
+static void __exit bpf_cleanup_module(void)
+{
+ tcf_unregister_action(&act_bpf_ops);
+}
+
+module_init(bpf_init_module);
+module_exit(bpf_cleanup_module);
+
+MODULE_AUTHOR("Jiri Pirko <jiri@resnulli.us>");
+MODULE_DESCRIPTION("TC BPF based action");
+MODULE_LICENSE("GPL v2");