diff options
author | David S. Miller <davem@davemloft.net> | 2015-09-18 06:09:07 +0200 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2015-09-18 06:09:07 +0200 |
commit | 41a9802fd87316ac736182e2debc579e987c8311 (patch) | |
tree | 4a74088c6a959041fd267fcf03466ca7eea355e1 | |
parent | net: only check perm protocol when register proto (diff) | |
parent | bpf: add bpf_redirect() helper (diff) | |
download | linux-41a9802fd87316ac736182e2debc579e987c8311.tar.xz linux-41a9802fd87316ac736182e2debc579e987c8311.zip |
Merge branch 'bpf_avoid_clone'
Alexei Starovoitov says:
====================
bpf: performance improvements
v1->v2: dropped redundant iff_up check in patch 2
At plumbers we discussed different options on how to get rid of skb_clone
from bpf_clone_redirect(), the patch 2 implements the best option.
Patch 1 adds 'integrated exts' to cls_bpf to improve performance by
combining simple actions into bpf classifier.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/net/sch_generic.h | 3 | ||||
-rw-r--r-- | include/uapi/linux/bpf.h | 9 | ||||
-rw-r--r-- | include/uapi/linux/pkt_cls.h | 4 | ||||
-rw-r--r-- | net/core/dev.c | 8 | ||||
-rw-r--r-- | net/core/filter.c | 58 | ||||
-rw-r--r-- | net/sched/act_bpf.c | 1 | ||||
-rw-r--r-- | net/sched/cls_bpf.c | 61 | ||||
-rw-r--r-- | samples/bpf/bpf_helpers.h | 4 | ||||
-rw-r--r-- | samples/bpf/tcbpf1_kern.c | 24 |
9 files changed, 159 insertions, 13 deletions
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 444faa89a55f..4c79ce8c1f92 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -251,7 +251,7 @@ struct tcf_proto { struct qdisc_skb_cb { unsigned int pkt_len; u16 slave_dev_queue_mapping; - u16 _pad; + u16 tc_classid; #define QDISC_CB_PRIV_LEN 20 unsigned char data[QDISC_CB_PRIV_LEN]; }; @@ -402,6 +402,7 @@ void __qdisc_calculate_pkt_len(struct sk_buff *skb, const struct qdisc_size_table *stab); bool tcf_destroy(struct tcf_proto *tp, bool force); void tcf_destroy_chain(struct tcf_proto __rcu **fl); +int skb_do_redirect(struct sk_buff *); /* Reset all TX qdiscs greater then index of a device. */ static inline void qdisc_reset_all_tx_gt(struct net_device *dev, unsigned int i) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 92a48e2d5461..4ec0b5488294 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -272,6 +272,14 @@ enum bpf_func_id { BPF_FUNC_skb_get_tunnel_key, BPF_FUNC_skb_set_tunnel_key, BPF_FUNC_perf_event_read, /* u64 bpf_perf_event_read(&map, index) */ + /** + * bpf_redirect(ifindex, flags) - redirect to another netdev + * @ifindex: ifindex of the net device + * @flags: bit 0 - if set, redirect to ingress instead of egress + * other bits - reserved + * Return: TC_ACT_REDIRECT + */ + BPF_FUNC_redirect, __BPF_FUNC_MAX_ID, }; @@ -293,6 +301,7 @@ struct __sk_buff { __u32 tc_index; __u32 cb[5]; __u32 hash; + __u32 tc_classid; }; struct bpf_tunnel_key { diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 4f0d1bc3647d..439873775d49 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -87,6 +87,7 @@ enum { #define TC_ACT_STOLEN 4 #define TC_ACT_QUEUED 5 #define TC_ACT_REPEAT 6 +#define TC_ACT_REDIRECT 7 #define TC_ACT_JUMP 0x10000000 /* Action type identifiers*/ @@ -373,6 +374,8 @@ enum { /* BPF classifier */ +#define TCA_BPF_FLAG_ACT_DIRECT (1 << 0) + enum { TCA_BPF_UNSPEC, TCA_BPF_ACT, @@ -382,6 +385,7 @@ enum { TCA_BPF_OPS, TCA_BPF_FD, TCA_BPF_NAME, + TCA_BPF_FLAGS, __TCA_BPF_MAX, }; diff --git a/net/core/dev.c b/net/core/dev.c index 00dccfac8939..ee0d6286f934 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3670,6 +3670,14 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb, case TC_ACT_QUEUED: kfree_skb(skb); return NULL; + case TC_ACT_REDIRECT: + /* skb_mac_header check was done by cls/act_bpf, so + * we can safely push the L2 header back before + * redirecting to another netdev + */ + __skb_push(skb, skb->mac_len); + skb_do_redirect(skb); + return NULL; default: break; } diff --git a/net/core/filter.c b/net/core/filter.c index 13079f03902e..da3f3d94d6e9 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1427,6 +1427,48 @@ const struct bpf_func_proto bpf_clone_redirect_proto = { .arg3_type = ARG_ANYTHING, }; +struct redirect_info { + u32 ifindex; + u32 flags; +}; + +static DEFINE_PER_CPU(struct redirect_info, redirect_info); +static u64 bpf_redirect(u64 ifindex, u64 flags, u64 r3, u64 r4, u64 r5) +{ + struct redirect_info *ri = this_cpu_ptr(&redirect_info); + + ri->ifindex = ifindex; + ri->flags = flags; + return TC_ACT_REDIRECT; +} + +int skb_do_redirect(struct sk_buff *skb) +{ + struct redirect_info *ri = this_cpu_ptr(&redirect_info); + struct net_device *dev; + + dev = dev_get_by_index_rcu(dev_net(skb->dev), ri->ifindex); + ri->ifindex = 0; + if (unlikely(!dev)) { + kfree_skb(skb); + return -EINVAL; + } + + if (BPF_IS_REDIRECT_INGRESS(ri->flags)) + return dev_forward_skb(dev, skb); + + skb->dev = dev; + return dev_queue_xmit(skb); +} + +const struct bpf_func_proto bpf_redirect_proto = { + .func = bpf_redirect, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_ANYTHING, + .arg2_type = ARG_ANYTHING, +}; + static u64 bpf_get_cgroup_classid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) { return task_get_classid((struct sk_buff *) (unsigned long) r1); @@ -1607,6 +1649,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id) return &bpf_skb_get_tunnel_key_proto; case BPF_FUNC_skb_set_tunnel_key: return bpf_get_skb_set_tunnel_key_proto(); + case BPF_FUNC_redirect: + return &bpf_redirect_proto; default: return sk_filter_func_proto(func_id); } @@ -1632,6 +1676,9 @@ static bool __is_valid_access(int off, int size, enum bpf_access_type type) static bool sk_filter_is_valid_access(int off, int size, enum bpf_access_type type) { + if (off == offsetof(struct __sk_buff, tc_classid)) + return false; + if (type == BPF_WRITE) { switch (off) { case offsetof(struct __sk_buff, cb[0]) ... @@ -1648,6 +1695,9 @@ static bool sk_filter_is_valid_access(int off, int size, static bool tc_cls_act_is_valid_access(int off, int size, enum bpf_access_type type) { + if (off == offsetof(struct __sk_buff, tc_classid)) + return type == BPF_WRITE ? true : false; + if (type == BPF_WRITE) { switch (off) { case offsetof(struct __sk_buff, mark): @@ -1760,6 +1810,14 @@ static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg, *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, ctx_off); break; + case offsetof(struct __sk_buff, tc_classid): + ctx_off -= offsetof(struct __sk_buff, tc_classid); + ctx_off += offsetof(struct sk_buff, cb); + ctx_off += offsetof(struct qdisc_skb_cb, tc_classid); + WARN_ON(type != BPF_WRITE); + *insn++ = BPF_STX_MEM(BPF_H, dst_reg, src_reg, ctx_off); + break; + case offsetof(struct __sk_buff, tc_index): #ifdef CONFIG_NET_SCHED BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, tc_index) != 2); diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index 559bfa011bda..0bc6f912f870 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -72,6 +72,7 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act, case TC_ACT_PIPE: case TC_ACT_RECLASSIFY: case TC_ACT_OK: + case TC_ACT_REDIRECT: action = filter_res; break; case TC_ACT_SHOT: diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index e5168f8b9640..0590816ab7b0 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -38,6 +38,7 @@ struct cls_bpf_prog { struct bpf_prog *filter; struct list_head link; struct tcf_result res; + bool exts_integrated; struct tcf_exts exts; u32 handle; union { @@ -52,6 +53,7 @@ struct cls_bpf_prog { static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = { [TCA_BPF_CLASSID] = { .type = NLA_U32 }, + [TCA_BPF_FLAGS] = { .type = NLA_U32 }, [TCA_BPF_FD] = { .type = NLA_U32 }, [TCA_BPF_NAME] = { .type = NLA_NUL_STRING, .len = CLS_BPF_NAME_LEN }, [TCA_BPF_OPS_LEN] = { .type = NLA_U16 }, @@ -59,6 +61,23 @@ static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = { .len = sizeof(struct sock_filter) * BPF_MAXINSNS }, }; +static int cls_bpf_exec_opcode(int code) +{ + switch (code) { + case TC_ACT_OK: + case TC_ACT_RECLASSIFY: + case TC_ACT_SHOT: + case TC_ACT_PIPE: + case TC_ACT_STOLEN: + case TC_ACT_QUEUED: + case TC_ACT_REDIRECT: + case TC_ACT_UNSPEC: + return code; + default: + return TC_ACT_UNSPEC; + } +} + static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res) { @@ -79,6 +98,8 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp, list_for_each_entry_rcu(prog, &head->plist, link) { int filter_res; + qdisc_skb_cb(skb)->tc_classid = prog->res.classid; + if (at_ingress) { /* It is safe to push/pull even if skb_shared() */ __skb_push(skb, skb->mac_len); @@ -88,6 +109,16 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp, filter_res = BPF_PROG_RUN(prog->filter, skb); } + if (prog->exts_integrated) { + res->class = prog->res.class; + res->classid = qdisc_skb_cb(skb)->tc_classid; + + ret = cls_bpf_exec_opcode(filter_res); + if (ret == TC_ACT_UNSPEC) + continue; + break; + } + if (filter_res == 0) continue; @@ -195,8 +226,7 @@ static unsigned long cls_bpf_get(struct tcf_proto *tp, u32 handle) return ret; } -static int cls_bpf_prog_from_ops(struct nlattr **tb, - struct cls_bpf_prog *prog, u32 classid) +static int cls_bpf_prog_from_ops(struct nlattr **tb, struct cls_bpf_prog *prog) { struct sock_filter *bpf_ops; struct sock_fprog_kern fprog_tmp; @@ -230,15 +260,13 @@ static int cls_bpf_prog_from_ops(struct nlattr **tb, prog->bpf_ops = bpf_ops; prog->bpf_num_ops = bpf_num_ops; prog->bpf_name = NULL; - prog->filter = fp; - prog->res.classid = classid; return 0; } -static int cls_bpf_prog_from_efd(struct nlattr **tb, - struct cls_bpf_prog *prog, u32 classid) +static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog, + const struct tcf_proto *tp) { struct bpf_prog *fp; char *name = NULL; @@ -268,9 +296,7 @@ static int cls_bpf_prog_from_efd(struct nlattr **tb, prog->bpf_ops = NULL; prog->bpf_fd = bpf_fd; prog->bpf_name = name; - prog->filter = fp; - prog->res.classid = classid; return 0; } @@ -280,8 +306,8 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp, unsigned long base, struct nlattr **tb, struct nlattr *est, bool ovr) { + bool is_bpf, is_ebpf, have_exts = false; struct tcf_exts exts; - bool is_bpf, is_ebpf; u32 classid; int ret; @@ -298,9 +324,22 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp, return ret; classid = nla_get_u32(tb[TCA_BPF_CLASSID]); + if (tb[TCA_BPF_FLAGS]) { + u32 bpf_flags = nla_get_u32(tb[TCA_BPF_FLAGS]); + + if (bpf_flags & ~TCA_BPF_FLAG_ACT_DIRECT) { + tcf_exts_destroy(&exts); + return -EINVAL; + } + + have_exts = bpf_flags & TCA_BPF_FLAG_ACT_DIRECT; + } + + prog->res.classid = classid; + prog->exts_integrated = have_exts; - ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog, classid) : - cls_bpf_prog_from_efd(tb, prog, classid); + ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog) : + cls_bpf_prog_from_efd(tb, prog, tp); if (ret < 0) { tcf_exts_destroy(&exts); return ret; diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h index 3a44d3a272af..21aa1b44c30c 100644 --- a/samples/bpf/bpf_helpers.h +++ b/samples/bpf/bpf_helpers.h @@ -33,6 +33,10 @@ static int (*bpf_get_current_comm)(void *buf, int buf_size) = (void *) BPF_FUNC_get_current_comm; static int (*bpf_perf_event_read)(void *map, int index) = (void *) BPF_FUNC_perf_event_read; +static int (*bpf_clone_redirect)(void *ctx, int ifindex, int flags) = + (void *) BPF_FUNC_clone_redirect; +static int (*bpf_redirect)(int ifindex, int flags) = + (void *) BPF_FUNC_redirect; /* llvm builtin functions that eBPF C program may use to * emit BPF_LD_ABS and BPF_LD_IND instructions diff --git a/samples/bpf/tcbpf1_kern.c b/samples/bpf/tcbpf1_kern.c index 9bfb2eb34563..fa051b3d53ee 100644 --- a/samples/bpf/tcbpf1_kern.c +++ b/samples/bpf/tcbpf1_kern.c @@ -5,7 +5,7 @@ #include <uapi/linux/in.h> #include <uapi/linux/tcp.h> #include <uapi/linux/filter.h> - +#include <uapi/linux/pkt_cls.h> #include "bpf_helpers.h" /* compiler workaround */ @@ -64,4 +64,26 @@ int bpf_prog1(struct __sk_buff *skb) return 0; } +SEC("redirect_xmit") +int _redirect_xmit(struct __sk_buff *skb) +{ + return bpf_redirect(skb->ifindex + 1, 0); +} +SEC("redirect_recv") +int _redirect_recv(struct __sk_buff *skb) +{ + return bpf_redirect(skb->ifindex + 1, 1); +} +SEC("clone_redirect_xmit") +int _clone_redirect_xmit(struct __sk_buff *skb) +{ + bpf_clone_redirect(skb, skb->ifindex + 1, 0); + return TC_ACT_SHOT; +} +SEC("clone_redirect_recv") +int _clone_redirect_recv(struct __sk_buff *skb) +{ + bpf_clone_redirect(skb, skb->ifindex + 1, 1); + return TC_ACT_SHOT; +} char _license[] SEC("license") = "GPL"; |