summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDaniel Borkmann <daniel@iogearbox.net>2018-05-11 00:10:58 +0200
committerDaniel Borkmann <daniel@iogearbox.net>2018-05-11 00:10:59 +0200
commitff1f56d9877f82d1fdac64e32827a570a54143bd (patch)
tree2bab4dd9bff8b85508549c97886d92d82d7c22e1
parentbpf, doc: clarification for the meaning of 'id' (diff)
parentsamples/bpf: Add example of ipv4 and ipv6 forwarding in XDP (diff)
downloadlinux-ff1f56d9877f82d1fdac64e32827a570a54143bd.tar.xz
linux-ff1f56d9877f82d1fdac64e32827a570a54143bd.zip
Merge branch 'bpf-fib-lookup-helper'
David Ahern says: ==================== Provide a helper for doing a FIB and neighbor lookup in the kernel tables from an XDP program. The helper provides a fastpath for forwarding packets. If the packet is a local delivery or for any reason is not a simple lookup and forward, the packet is expected to continue up the stack for full processing. The response from a FIB and neighbor lookup is either the egress index with the bpf_fib_lookup struct filled in with dmac and gateway or 0 meaning the packet should continue up the stack. In time we can revisit this to return the FIB lookup result errno if it is one of the special RTN_'s such as RTN_BLACKHOLE (-EINVAL) so that the XDP programs can do an early drop if desired. Patches 1-6 do some more refactoring to IPv6 with the end goal of extracting a FIB lookup function that aligns with fib_lookup for IPv4, basically returning a fib6_info without creating a dst based entry. Patch 7 adds lookup functions to the ipv6 stub. These are needed since bpf is built into the kernel and ipv6 may not be built or loaded. Patch 8 adds the bpf helper and 9 adds a sample program. v3 - remove ETH_ALEN and in6_addr from uapi header v2 - removed pkt_access from bpf_func_proto as noticed by Daniel - added check in that IPv6 forwarding is enabled - added DaveM's ack on patches 1-7 and 9 based on v1 response and fact that no changes were made to them in v2 v1 - updated commit messages and cover letter - added comment to sample program noting lack of verification on egress device supporting XDP RFC v2 - fixed use of foward helper from cls_act as noted by Daniel - in patch 1 rename fib6_lookup_1 as well for consistency ==================== Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
-rw-r--r--include/net/addrconf.h14
-rw-r--r--include/net/ip6_fib.h21
-rw-r--r--include/trace/events/fib6.h14
-rw-r--r--include/uapi/linux/bpf.h81
-rw-r--r--net/core/filter.c267
-rw-r--r--net/ipv6/addrconf_core.c33
-rw-r--r--net/ipv6/af_inet6.c6
-rw-r--r--net/ipv6/fib6_rules.c138
-rw-r--r--net/ipv6/ip6_fib.c21
-rw-r--r--net/ipv6/route.c76
-rw-r--r--samples/bpf/Makefile4
-rw-r--r--samples/bpf/xdp_fwd_kern.c115
-rw-r--r--samples/bpf/xdp_fwd_user.c136
-rw-r--r--tools/testing/selftests/bpf/bpf_helpers.h3
14 files changed, 854 insertions, 75 deletions
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 8312cc25a3af..ff766ab207e0 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -223,6 +223,20 @@ struct ipv6_stub {
const struct in6_addr *addr);
int (*ipv6_dst_lookup)(struct net *net, struct sock *sk,
struct dst_entry **dst, struct flowi6 *fl6);
+
+ struct fib6_table *(*fib6_get_table)(struct net *net, u32 id);
+ struct fib6_info *(*fib6_lookup)(struct net *net, int oif,
+ struct flowi6 *fl6, int flags);
+ struct fib6_info *(*fib6_table_lookup)(struct net *net,
+ struct fib6_table *table,
+ int oif, struct flowi6 *fl6,
+ int flags);
+ struct fib6_info *(*fib6_multipath_select)(const struct net *net,
+ struct fib6_info *f6i,
+ struct flowi6 *fl6, int oif,
+ const struct sk_buff *skb,
+ int strict);
+
void (*udpv6_encap_enable)(void);
void (*ndisc_send_na)(struct net_device *dev, const struct in6_addr *daddr,
const struct in6_addr *solicited_addr,
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index a3ec08d05756..cc70f6da8462 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -376,9 +376,24 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
const struct sk_buff *skb,
int flags, pol_lookup_t lookup);
-struct fib6_node *fib6_lookup(struct fib6_node *root,
- const struct in6_addr *daddr,
- const struct in6_addr *saddr);
+/* called with rcu lock held; can return error pointer
+ * caller needs to select path
+ */
+struct fib6_info *fib6_lookup(struct net *net, int oif, struct flowi6 *fl6,
+ int flags);
+
+/* called with rcu lock held; caller needs to select path */
+struct fib6_info *fib6_table_lookup(struct net *net, struct fib6_table *table,
+ int oif, struct flowi6 *fl6, int strict);
+
+struct fib6_info *fib6_multipath_select(const struct net *net,
+ struct fib6_info *match,
+ struct flowi6 *fl6, int oif,
+ const struct sk_buff *skb, int strict);
+
+struct fib6_node *fib6_node_lookup(struct fib6_node *root,
+ const struct in6_addr *daddr,
+ const struct in6_addr *saddr);
struct fib6_node *fib6_locate(struct fib6_node *root,
const struct in6_addr *daddr, int dst_len,
diff --git a/include/trace/events/fib6.h b/include/trace/events/fib6.h
index 7e8d48a81b91..1b8d951e3c12 100644
--- a/include/trace/events/fib6.h
+++ b/include/trace/events/fib6.h
@@ -12,10 +12,10 @@
TRACE_EVENT(fib6_table_lookup,
- TP_PROTO(const struct net *net, const struct rt6_info *rt,
+ TP_PROTO(const struct net *net, const struct fib6_info *f6i,
struct fib6_table *table, const struct flowi6 *flp),
- TP_ARGS(net, rt, table, flp),
+ TP_ARGS(net, f6i, table, flp),
TP_STRUCT__entry(
__field( u32, tb_id )
@@ -48,20 +48,20 @@ TRACE_EVENT(fib6_table_lookup,
in6 = (struct in6_addr *)__entry->dst;
*in6 = flp->daddr;
- if (rt->rt6i_idev) {
- __assign_str(name, rt->rt6i_idev->dev->name);
+ if (f6i->fib6_nh.nh_dev) {
+ __assign_str(name, f6i->fib6_nh.nh_dev);
} else {
__assign_str(name, "");
}
- if (rt == net->ipv6.ip6_null_entry) {
+ if (f6i == net->ipv6.fib6_null_entry) {
struct in6_addr in6_zero = {};
in6 = (struct in6_addr *)__entry->gw;
*in6 = in6_zero;
- } else if (rt) {
+ } else if (f6i) {
in6 = (struct in6_addr *)__entry->gw;
- *in6 = rt->rt6i_gateway;
+ *in6 = f6i->fib6_nh.nh_gw;
}
),
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index d615c777b573..02e4112510f8 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1828,6 +1828,33 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
+ *
+ * int bpf_fib_lookup(void *ctx, struct bpf_fib_lookup *params, int plen, u32 flags)
+ * Description
+ * Do FIB lookup in kernel tables using parameters in *params*.
+ * If lookup is successful and result shows packet is to be
+ * forwarded, the neighbor tables are searched for the nexthop.
+ * If successful (ie., FIB lookup shows forwarding and nexthop
+ * is resolved), the nexthop address is returned in ipv4_dst,
+ * ipv6_dst or mpls_out based on family, smac is set to mac
+ * address of egress device, dmac is set to nexthop mac address,
+ * rt_metric is set to metric from route.
+ *
+ * *plen* argument is the size of the passed in struct.
+ * *flags* argument can be one or more BPF_FIB_LOOKUP_ flags:
+ *
+ * **BPF_FIB_LOOKUP_DIRECT** means do a direct table lookup vs
+ * full lookup using FIB rules
+ * **BPF_FIB_LOOKUP_OUTPUT** means do lookup from an egress
+ * perspective (default is ingress)
+ *
+ * *ctx* is either **struct xdp_md** for XDP programs or
+ * **struct sk_buff** tc cls_act programs.
+ *
+ * Return
+ * Egress device index on success, 0 if packet needs to continue
+ * up the stack for further processing or a negative error in case
+ * of failure.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -1898,7 +1925,8 @@ union bpf_attr {
FN(xdp_adjust_tail), \
FN(skb_get_xfrm_state), \
FN(get_stack), \
- FN(skb_load_bytes_relative),
+ FN(skb_load_bytes_relative), \
+ FN(fib_lookup),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
@@ -2321,4 +2349,55 @@ struct bpf_raw_tracepoint_args {
__u64 args[0];
};
+/* DIRECT: Skip the FIB rules and go to FIB table associated with device
+ * OUTPUT: Do lookup from egress perspective; default is ingress
+ */
+#define BPF_FIB_LOOKUP_DIRECT BIT(0)
+#define BPF_FIB_LOOKUP_OUTPUT BIT(1)
+
+struct bpf_fib_lookup {
+ /* input */
+ __u8 family; /* network family, AF_INET, AF_INET6, AF_MPLS */
+
+ /* set if lookup is to consider L4 data - e.g., FIB rules */
+ __u8 l4_protocol;
+ __be16 sport;
+ __be16 dport;
+
+ /* total length of packet from network header - used for MTU check */
+ __u16 tot_len;
+ __u32 ifindex; /* L3 device index for lookup */
+
+ union {
+ /* inputs to lookup */
+ __u8 tos; /* AF_INET */
+ __be32 flowlabel; /* AF_INET6 */
+
+ /* output: metric of fib result */
+ __u32 rt_metric;
+ };
+
+ union {
+ __be32 mpls_in;
+ __be32 ipv4_src;
+ __u32 ipv6_src[4]; /* in6_addr; network order */
+ };
+
+ /* input to bpf_fib_lookup, *dst is destination address.
+ * output: bpf_fib_lookup sets to gateway address
+ */
+ union {
+ /* return for MPLS lookups */
+ __be32 mpls_out[4]; /* support up to 4 labels */
+ __be32 ipv4_dst;
+ __u32 ipv6_dst[4]; /* in6_addr; network order */
+ };
+
+ /* output */
+ __be16 h_vlan_proto;
+ __be16 h_vlan_TCI;
+ __u8 smac[6]; /* ETH_ALEN */
+ __u8 dmac[6]; /* ETH_ALEN */
+};
+
#endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/net/core/filter.c b/net/core/filter.c
index 0baa715e4699..ca60d2872da4 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -60,6 +60,10 @@
#include <net/xfrm.h>
#include <linux/bpf_trace.h>
#include <net/xdp_sock.h>
+#include <linux/inetdevice.h>
+#include <net/ip_fib.h>
+#include <net/flow.h>
+#include <net/arp.h>
/**
* sk_filter_trim_cap - run a packet through a socket filter
@@ -4032,6 +4036,265 @@ static const struct bpf_func_proto bpf_skb_get_xfrm_state_proto = {
};
#endif
+#if IS_ENABLED(CONFIG_INET) || IS_ENABLED(CONFIG_IPV6)
+static int bpf_fib_set_fwd_params(struct bpf_fib_lookup *params,
+ const struct neighbour *neigh,
+ const struct net_device *dev)
+{
+ memcpy(params->dmac, neigh->ha, ETH_ALEN);
+ memcpy(params->smac, dev->dev_addr, ETH_ALEN);
+ params->h_vlan_TCI = 0;
+ params->h_vlan_proto = 0;
+
+ return dev->ifindex;
+}
+#endif
+
+#if IS_ENABLED(CONFIG_INET)
+static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
+ u32 flags)
+{
+ struct in_device *in_dev;
+ struct neighbour *neigh;
+ struct net_device *dev;
+ struct fib_result res;
+ struct fib_nh *nh;
+ struct flowi4 fl4;
+ int err;
+
+ dev = dev_get_by_index_rcu(net, params->ifindex);
+ if (unlikely(!dev))
+ return -ENODEV;
+
+ /* verify forwarding is enabled on this interface */
+ in_dev = __in_dev_get_rcu(dev);
+ if (unlikely(!in_dev || !IN_DEV_FORWARD(in_dev)))
+ return 0;
+
+ if (flags & BPF_FIB_LOOKUP_OUTPUT) {
+ fl4.flowi4_iif = 1;
+ fl4.flowi4_oif = params->ifindex;
+ } else {
+ fl4.flowi4_iif = params->ifindex;
+ fl4.flowi4_oif = 0;
+ }
+ fl4.flowi4_tos = params->tos & IPTOS_RT_MASK;
+ fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
+ fl4.flowi4_flags = 0;
+
+ fl4.flowi4_proto = params->l4_protocol;
+ fl4.daddr = params->ipv4_dst;
+ fl4.saddr = params->ipv4_src;
+ fl4.fl4_sport = params->sport;
+ fl4.fl4_dport = params->dport;
+
+ if (flags & BPF_FIB_LOOKUP_DIRECT) {
+ u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN;
+ struct fib_table *tb;
+
+ tb = fib_get_table(net, tbid);
+ if (unlikely(!tb))
+ return 0;
+
+ err = fib_table_lookup(tb, &fl4, &res, FIB_LOOKUP_NOREF);
+ } else {
+ fl4.flowi4_mark = 0;
+ fl4.flowi4_secid = 0;
+ fl4.flowi4_tun_key.tun_id = 0;
+ fl4.flowi4_uid = sock_net_uid(net, NULL);
+
+ err = fib_lookup(net, &fl4, &res, FIB_LOOKUP_NOREF);
+ }
+
+ if (err || res.type != RTN_UNICAST)
+ return 0;
+
+ if (res.fi->fib_nhs > 1)
+ fib_select_path(net, &res, &fl4, NULL);
+
+ nh = &res.fi->fib_nh[res.nh_sel];
+
+ /* do not handle lwt encaps right now */
+ if (nh->nh_lwtstate)
+ return 0;
+
+ dev = nh->nh_dev;
+ if (unlikely(!dev))
+ return 0;
+
+ if (nh->nh_gw)
+ params->ipv4_dst = nh->nh_gw;
+
+ params->rt_metric = res.fi->fib_priority;
+
+ /* xdp and cls_bpf programs are run in RCU-bh so
+ * rcu_read_lock_bh is not needed here
+ */
+ neigh = __ipv4_neigh_lookup_noref(dev, (__force u32)params->ipv4_dst);
+ if (neigh)
+ return bpf_fib_set_fwd_params(params, neigh, dev);
+
+ return 0;
+}
+#endif
+
+#if IS_ENABLED(CONFIG_IPV6)
+static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
+ u32 flags)
+{
+ struct in6_addr *src = (struct in6_addr *) params->ipv6_src;
+ struct in6_addr *dst = (struct in6_addr *) params->ipv6_dst;
+ struct neighbour *neigh;
+ struct net_device *dev;
+ struct inet6_dev *idev;
+ struct fib6_info *f6i;
+ struct flowi6 fl6;
+ int strict = 0;
+ int oif;
+
+ /* link local addresses are never forwarded */
+ if (rt6_need_strict(dst) || rt6_need_strict(src))
+ return 0;
+
+ dev = dev_get_by_index_rcu(net, params->ifindex);
+ if (unlikely(!dev))
+ return -ENODEV;
+
+ idev = __in6_dev_get_safely(dev);
+ if (unlikely(!idev || !net->ipv6.devconf_all->forwarding))
+ return 0;
+
+ if (flags & BPF_FIB_LOOKUP_OUTPUT) {
+ fl6.flowi6_iif = 1;
+ oif = fl6.flowi6_oif = params->ifindex;
+ } else {
+ oif = fl6.flowi6_iif = params->ifindex;
+ fl6.flowi6_oif = 0;
+ strict = RT6_LOOKUP_F_HAS_SADDR;
+ }
+ fl6.flowlabel = params->flowlabel;
+ fl6.flowi6_scope = 0;
+ fl6.flowi6_flags = 0;
+ fl6.mp_hash = 0;
+
+ fl6.flowi6_proto = params->l4_protocol;
+ fl6.daddr = *dst;
+ fl6.saddr = *src;
+ fl6.fl6_sport = params->sport;
+ fl6.fl6_dport = params->dport;
+
+ if (flags & BPF_FIB_LOOKUP_DIRECT) {
+ u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN;
+ struct fib6_table *tb;
+
+ tb = ipv6_stub->fib6_get_table(net, tbid);
+ if (unlikely(!tb))
+ return 0;
+
+ f6i = ipv6_stub->fib6_table_lookup(net, tb, oif, &fl6, strict);
+ } else {
+ fl6.flowi6_mark = 0;
+ fl6.flowi6_secid = 0;
+ fl6.flowi6_tun_key.tun_id = 0;
+ fl6.flowi6_uid = sock_net_uid(net, NULL);
+
+ f6i = ipv6_stub->fib6_lookup(net, oif, &fl6, strict);
+ }
+
+ if (unlikely(IS_ERR_OR_NULL(f6i) || f6i == net->ipv6.fib6_null_entry))
+ return 0;
+
+ if (unlikely(f6i->fib6_flags & RTF_REJECT ||
+ f6i->fib6_type != RTN_UNICAST))
+ return 0;
+
+ if (f6i->fib6_nsiblings && fl6.flowi6_oif == 0)
+ f6i = ipv6_stub->fib6_multipath_select(net, f6i, &fl6,
+ fl6.flowi6_oif, NULL,
+ strict);
+
+ if (f6i->fib6_nh.nh_lwtstate)
+ return 0;
+
+ if (f6i->fib6_flags & RTF_GATEWAY)
+ *dst = f6i->fib6_nh.nh_gw;
+
+ dev = f6i->fib6_nh.nh_dev;
+ params->rt_metric = f6i->fib6_metric;
+
+ /* xdp and cls_bpf programs are run in RCU-bh so rcu_read_lock_bh is
+ * not needed here. Can not use __ipv6_neigh_lookup_noref here
+ * because we need to get nd_tbl via the stub
+ */
+ neigh = ___neigh_lookup_noref(ipv6_stub->nd_tbl, neigh_key_eq128,
+ ndisc_hashfn, dst, dev);
+ if (neigh)
+ return bpf_fib_set_fwd_params(params, neigh, dev);
+
+ return 0;
+}
+#endif
+
+BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx,
+ struct bpf_fib_lookup *, params, int, plen, u32, flags)
+{
+ if (plen < sizeof(*params))
+ return -EINVAL;
+
+ switch (params->family) {
+#if IS_ENABLED(CONFIG_INET)
+ case AF_INET:
+ return bpf_ipv4_fib_lookup(dev_net(ctx->rxq->dev), params,
+ flags);
+#endif
+#if IS_ENABLED(CONFIG_IPV6)
+ case AF_INET6:
+ return bpf_ipv6_fib_lookup(dev_net(ctx->rxq->dev), params,
+ flags);
+#endif
+ }
+ return 0;
+}
+
+static const struct bpf_func_proto bpf_xdp_fib_lookup_proto = {
+ .func = bpf_xdp_fib_lookup,
+ .gpl_only = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_PTR_TO_MEM,
+ .arg3_type = ARG_CONST_SIZE,
+ .arg4_type = ARG_ANYTHING,
+};
+
+BPF_CALL_4(bpf_skb_fib_lookup, struct sk_buff *, skb,
+ struct bpf_fib_lookup *, params, int, plen, u32, flags)
+{
+ if (plen < sizeof(*params))
+ return -EINVAL;
+
+ switch (params->family) {
+#if IS_ENABLED(CONFIG_INET)
+ case AF_INET:
+ return bpf_ipv4_fib_lookup(dev_net(skb->dev), params, flags);
+#endif
+#if IS_ENABLED(CONFIG_IPV6)
+ case AF_INET6:
+ return bpf_ipv6_fib_lookup(dev_net(skb->dev), params, flags);
+#endif
+ }
+ return -ENOTSUPP;
+}
+
+static const struct bpf_func_proto bpf_skb_fib_lookup_proto = {
+ .func = bpf_skb_fib_lookup,
+ .gpl_only = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_PTR_TO_MEM,
+ .arg3_type = ARG_CONST_SIZE,
+ .arg4_type = ARG_ANYTHING,
+};
+
static const struct bpf_func_proto *
bpf_base_func_proto(enum bpf_func_id func_id)
{
@@ -4181,6 +4444,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_skb_get_xfrm_state:
return &bpf_skb_get_xfrm_state_proto;
#endif
+ case BPF_FUNC_fib_lookup:
+ return &bpf_skb_fib_lookup_proto;
default:
return bpf_base_func_proto(func_id);
}
@@ -4206,6 +4471,8 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_xdp_redirect_map_proto;
case BPF_FUNC_xdp_adjust_tail:
return &bpf_xdp_adjust_tail_proto;
+ case BPF_FUNC_fib_lookup:
+ return &bpf_xdp_fib_lookup_proto;
default:
return bpf_base_func_proto(func_id);
}
diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c
index 32b564dfd02a..2fe754fd4f5e 100644
--- a/net/ipv6/addrconf_core.c
+++ b/net/ipv6/addrconf_core.c
@@ -134,8 +134,39 @@ static int eafnosupport_ipv6_dst_lookup(struct net *net, struct sock *u1,
return -EAFNOSUPPORT;
}
+static struct fib6_table *eafnosupport_fib6_get_table(struct net *net, u32 id)
+{
+ return NULL;
+}
+
+static struct fib6_info *
+eafnosupport_fib6_table_lookup(struct net *net, struct fib6_table *table,
+ int oif, struct flowi6 *fl6, int flags)
+{
+ return NULL;
+}
+
+static struct fib6_info *
+eafnosupport_fib6_lookup(struct net *net, int oif, struct flowi6 *fl6,
+ int flags)
+{
+ return NULL;
+}
+
+static struct fib6_info *
+eafnosupport_fib6_multipath_select(const struct net *net, struct fib6_info *f6i,
+ struct flowi6 *fl6, int oif,
+ const struct sk_buff *skb, int strict)
+{
+ return f6i;
+}
+
const struct ipv6_stub *ipv6_stub __read_mostly = &(struct ipv6_stub) {
- .ipv6_dst_lookup = eafnosupport_ipv6_dst_lookup,
+ .ipv6_dst_lookup = eafnosupport_ipv6_dst_lookup,
+ .fib6_get_table = eafnosupport_fib6_get_table,
+ .fib6_table_lookup = eafnosupport_fib6_table_lookup,
+ .fib6_lookup = eafnosupport_fib6_lookup,
+ .fib6_multipath_select = eafnosupport_fib6_multipath_select,
};
EXPORT_SYMBOL_GPL(ipv6_stub);
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index d0af96e0d109..50de8b0d4f70 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -889,7 +889,11 @@ static struct pernet_operations inet6_net_ops = {
static const struct ipv6_stub ipv6_stub_impl = {
.ipv6_sock_mc_join = ipv6_sock_mc_join,
.ipv6_sock_mc_drop = ipv6_sock_mc_drop,
- .ipv6_dst_lookup = ip6_dst_lookup,
+ .ipv6_dst_lookup = ip6_dst_lookup,
+ .fib6_get_table = fib6_get_table,
+ .fib6_table_lookup = fib6_table_lookup,
+ .fib6_lookup = fib6_lookup,
+ .fib6_multipath_select = fib6_multipath_select,
.udpv6_encap_enable = udpv6_encap_enable,
.ndisc_send_na = ndisc_send_na,
.nd_tbl = &nd_tbl,
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 6547fc6491a6..f590446595d8 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -60,6 +60,39 @@ unsigned int fib6_rules_seq_read(struct net *net)
return fib_rules_seq_read(net, AF_INET6);
}
+/* called with rcu lock held; no reference taken on fib6_info */
+struct fib6_info *fib6_lookup(struct net *net, int oif, struct flowi6 *fl6,
+ int flags)
+{
+ struct fib6_info *f6i;
+ int err;
+
+ if (net->ipv6.fib6_has_custom_rules) {
+ struct fib_lookup_arg arg = {
+ .lookup_ptr = fib6_table_lookup,
+ .lookup_data = &oif,
+ .flags = FIB_LOOKUP_NOREF,
+ };
+
+ l3mdev_update_flow(net, flowi6_to_flowi(fl6));
+
+ err = fib_rules_lookup(net->ipv6.fib6_rules_ops,
+ flowi6_to_flowi(fl6), flags, &arg);
+ if (err)
+ return ERR_PTR(err);
+
+ f6i = arg.result ? : net->ipv6.fib6_null_entry;
+ } else {
+ f6i = fib6_table_lookup(net, net->ipv6.fib6_local_tbl,
+ oif, fl6, flags);
+ if (!f6i || f6i == net->ipv6.fib6_null_entry)
+ f6i = fib6_table_lookup(net, net->ipv6.fib6_main_tbl,
+ oif, fl6, flags);
+ }
+
+ return f6i;
+}
+
struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
const struct sk_buff *skb,
int flags, pol_lookup_t lookup)
@@ -96,8 +129,73 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
return &net->ipv6.ip6_null_entry->dst;
}
-static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
- int flags, struct fib_lookup_arg *arg)
+static int fib6_rule_saddr(struct net *net, struct fib_rule *rule, int flags,
+ struct flowi6 *flp6, const struct net_device *dev)
+{
+ struct fib6_rule *r = (struct fib6_rule *)rule;
+
+ /* If we need to find a source address for this traffic,
+ * we check the result if it meets requirement of the rule.
+ */
+ if ((rule->flags & FIB_RULE_FIND_SADDR) &&
+ r->src.plen && !(flags & RT6_LOOKUP_F_HAS_SADDR)) {
+ struct in6_addr saddr;
+
+ if (ipv6_dev_get_saddr(net, dev, &flp6->daddr,
+ rt6_flags2srcprefs(flags), &saddr))
+ return -EAGAIN;
+
+ if (!ipv6_prefix_equal(&saddr, &r->src.addr, r->src.plen))
+ return -EAGAIN;
+
+ flp6->saddr = saddr;
+ }
+
+ return 0;
+}
+
+static int fib6_rule_action_alt(struct fib_rule *rule, struct flowi *flp,
+ int flags, struct fib_lookup_arg *arg)
+{
+ struct flowi6 *flp6 = &flp->u.ip6;
+ struct net *net = rule->fr_net;
+ struct fib6_table *table;
+ struct fib6_info *f6i;
+ int err = -EAGAIN, *oif;
+ u32 tb_id;
+
+ switch (rule->action) {
+ case FR_ACT_TO_TBL:
+ break;
+ case FR_ACT_UNREACHABLE:
+ return -ENETUNREACH;
+ case FR_ACT_PROHIBIT:
+ return -EACCES;
+ case FR_ACT_BLACKHOLE:
+ default:
+ return -EINVAL;
+ }
+
+ tb_id = fib_rule_get_table(rule, arg);
+ table = fib6_get_table(net, tb_id);
+ if (!table)
+ return -EAGAIN;
+
+ oif = (int *)arg->lookup_data;
+ f6i = fib6_table_lookup(net, table, *oif, flp6, flags);
+ if (f6i != net->ipv6.fib6_null_entry) {
+ err = fib6_rule_saddr(net, rule, flags, flp6,
+ fib6_info_nh_dev(f6i));
+
+ if (likely(!err))
+ arg->result = f6i;
+ }
+
+ return err;
+}
+
+static int __fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
+ int flags, struct fib_lookup_arg *arg)
{
struct flowi6 *flp6 = &flp->u.ip6;
struct rt6_info *rt = NULL;
@@ -134,27 +232,12 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
rt = lookup(net, table, flp6, arg->lookup_data, flags);
if (rt != net->ipv6.ip6_null_entry) {
- struct fib6_rule *r = (struct fib6_rule *)rule;
-
- /*
- * If we need to find a source address for this traffic,
- * we check the result if it meets requirement of the rule.
- */
- if ((rule->flags & FIB_RULE_FIND_SADDR) &&
- r->src.plen && !(flags & RT6_LOOKUP_F_HAS_SADDR)) {
- struct in6_addr saddr;
-
- if (ipv6_dev_get_saddr(net,
- ip6_dst_idev(&rt->dst)->dev,
- &flp6->daddr,
- rt6_flags2srcprefs(flags),
- &saddr))
- goto again;
- if (!ipv6_prefix_equal(&saddr, &r->src.addr,
- r->src.plen))
- goto again;
- flp6->saddr = saddr;
- }
+ err = fib6_rule_saddr(net, rule, flags, flp6,
+ ip6_dst_idev(&rt->dst)->dev);
+
+ if (err == -EAGAIN)
+ goto again;
+
err = rt->dst.error;
if (err != -EAGAIN)
goto out;
@@ -172,6 +255,15 @@ out:
return err;
}
+static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
+ int flags, struct fib_lookup_arg *arg)
+{
+ if (arg->lookup_ptr == fib6_table_lookup)
+ return fib6_rule_action_alt(rule, flp, flags, arg);
+
+ return __fib6_rule_action(rule, flp, flags, arg);
+}
+
static bool fib6_rule_suppress(struct fib_rule *rule, struct fib_lookup_arg *arg)
{
struct rt6_info *rt = (struct rt6_info *) arg->result;
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index f0a4262a4789..d1dc6017f5a6 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -354,6 +354,13 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
return &rt->dst;
}
+/* called with rcu lock held; no reference taken on fib6_info */
+struct fib6_info *fib6_lookup(struct net *net, int oif, struct flowi6 *fl6,
+ int flags)
+{
+ return fib6_table_lookup(net, net->ipv6.fib6_main_tbl, oif, fl6, flags);
+}
+
static void __net_init fib6_tables_init(struct net *net)
{
fib6_link_table(net, net->ipv6.fib6_main_tbl);
@@ -1354,8 +1361,8 @@ struct lookup_args {
const struct in6_addr *addr; /* search key */
};
-static struct fib6_node *fib6_lookup_1(struct fib6_node *root,
- struct lookup_args *args)
+static struct fib6_node *fib6_node_lookup_1(struct fib6_node *root,
+ struct lookup_args *args)
{
struct fib6_node *fn;
__be32 dir;
@@ -1400,7 +1407,8 @@ static struct fib6_node *fib6_lookup_1(struct fib6_node *root,
#ifdef CONFIG_IPV6_SUBTREES
if (subtree) {
struct fib6_node *sfn;
- sfn = fib6_lookup_1(subtree, args + 1);
+ sfn = fib6_node_lookup_1(subtree,
+ args + 1);
if (!sfn)
goto backtrack;
fn = sfn;
@@ -1422,8 +1430,9 @@ backtrack:
/* called with rcu_read_lock() held
*/
-struct fib6_node *fib6_lookup(struct fib6_node *root, const struct in6_addr *daddr,
- const struct in6_addr *saddr)
+struct fib6_node *fib6_node_lookup(struct fib6_node *root,
+ const struct in6_addr *daddr,
+ const struct in6_addr *saddr)
{
struct fib6_node *fn;
struct lookup_args args[] = {
@@ -1442,7 +1451,7 @@ struct fib6_node *fib6_lookup(struct fib6_node *root, const struct in6_addr *dad
}
};
- fn = fib6_lookup_1(root, daddr ? args : args + 1);
+ fn = fib6_node_lookup_1(root, daddr ? args : args + 1);
if (!fn || fn->fn_flags & RTN_TL_ROOT)
fn = root;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index daa3662da0ee..73f9c29a5878 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -419,11 +419,11 @@ static bool rt6_check_expired(const struct rt6_info *rt)
return false;
}
-static struct fib6_info *rt6_multipath_select(const struct net *net,
- struct fib6_info *match,
- struct flowi6 *fl6, int oif,
- const struct sk_buff *skb,
- int strict)
+struct fib6_info *fib6_multipath_select(const struct net *net,
+ struct fib6_info *match,
+ struct flowi6 *fl6, int oif,
+ const struct sk_buff *skb,
+ int strict)
{
struct fib6_info *sibling, *next_sibling;
@@ -1006,7 +1006,7 @@ static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
pn = rcu_dereference(fn->parent);
sn = FIB6_SUBTREE(pn);
if (sn && sn != fn)
- fn = fib6_lookup(sn, NULL, saddr);
+ fn = fib6_node_lookup(sn, NULL, saddr);
else
fn = pn;
if (fn->fn_flags & RTN_RTINFO)
@@ -1059,7 +1059,7 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net,
flags &= ~RT6_LOOKUP_F_IFACE;
rcu_read_lock();
- fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
+ fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
restart:
f6i = rcu_dereference(fn->leaf);
if (!f6i) {
@@ -1068,8 +1068,9 @@ restart:
f6i = rt6_device_match(net, f6i, &fl6->saddr,
fl6->flowi6_oif, flags);
if (f6i->fib6_nsiblings && fl6->flowi6_oif == 0)
- f6i = rt6_multipath_select(net, f6i, fl6,
- fl6->flowi6_oif, skb, flags);
+ f6i = fib6_multipath_select(net, f6i, fl6,
+ fl6->flowi6_oif, skb,
+ flags);
}
if (f6i == net->ipv6.fib6_null_entry) {
fn = fib6_backtrack(fn, &fl6->saddr);
@@ -1077,6 +1078,8 @@ restart:
goto restart;
}
+ trace_fib6_table_lookup(net, f6i, table, fl6);
+
/* Search through exception table */
rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
if (rt) {
@@ -1095,8 +1098,6 @@ restart:
rcu_read_unlock();
- trace_fib6_table_lookup(net, rt, table, fl6);
-
return rt;
}
@@ -1799,23 +1800,14 @@ void rt6_age_exceptions(struct fib6_info *rt,
rcu_read_unlock_bh();
}
-struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
- int oif, struct flowi6 *fl6,
- const struct sk_buff *skb, int flags)
+/* must be called with rcu lock held */
+struct fib6_info *fib6_table_lookup(struct net *net, struct fib6_table *table,
+ int oif, struct flowi6 *fl6, int strict)
{
struct fib6_node *fn, *saved_fn;
struct fib6_info *f6i;
- struct rt6_info *rt;
- int strict = 0;
- strict |= flags & RT6_LOOKUP_F_IFACE;
- strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
- if (net->ipv6.devconf_all->forwarding == 0)
- strict |= RT6_LOOKUP_F_REACHABLE;
-
- rcu_read_lock();
-
- fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
+ fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
saved_fn = fn;
if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
@@ -1823,8 +1815,6 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
redo_rt6_select:
f6i = rt6_select(net, fn, oif, strict);
- if (f6i->fib6_nsiblings)
- f6i = rt6_multipath_select(net, f6i, fl6, oif, skb, strict);
if (f6i == net->ipv6.fib6_null_entry) {
fn = fib6_backtrack(fn, &fl6->saddr);
if (fn)
@@ -1837,11 +1827,34 @@ redo_rt6_select:
}
}
+ trace_fib6_table_lookup(net, f6i, table, fl6);
+
+ return f6i;
+}
+
+struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
+ int oif, struct flowi6 *fl6,
+ const struct sk_buff *skb, int flags)
+{
+ struct fib6_info *f6i;
+ struct rt6_info *rt;
+ int strict = 0;
+
+ strict |= flags & RT6_LOOKUP_F_IFACE;
+ strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
+ if (net->ipv6.devconf_all->forwarding == 0)
+ strict |= RT6_LOOKUP_F_REACHABLE;
+
+ rcu_read_lock();
+
+ f6i = fib6_table_lookup(net, table, oif, fl6, strict);
+ if (f6i->fib6_nsiblings)
+ f6i = fib6_multipath_select(net, f6i, fl6, oif, skb, strict);
+
if (f6i == net->ipv6.fib6_null_entry) {
rt = net->ipv6.ip6_null_entry;
rcu_read_unlock();
dst_hold(&rt->dst);
- trace_fib6_table_lookup(net, rt, table, fl6);
return rt;
}
@@ -1852,7 +1865,6 @@ redo_rt6_select:
dst_use_noref(&rt->dst, jiffies);
rcu_read_unlock();
- trace_fib6_table_lookup(net, rt, table, fl6);
return rt;
} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
!(f6i->fib6_flags & RTF_GATEWAY))) {
@@ -1878,9 +1890,7 @@ redo_rt6_select:
dst_hold(&uncached_rt->dst);
}
- trace_fib6_table_lookup(net, uncached_rt, table, fl6);
return uncached_rt;
-
} else {
/* Get a percpu copy */
@@ -1894,7 +1904,7 @@ redo_rt6_select:
local_bh_enable();
rcu_read_unlock();
- trace_fib6_table_lookup(net, pcpu_rt, table, fl6);
+
return pcpu_rt;
}
}
@@ -2425,7 +2435,7 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
*/
rcu_read_lock();
- fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
+ fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
restart:
for_each_fib6_node_rt_rcu(fn) {
if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
@@ -2479,7 +2489,7 @@ out:
rcu_read_unlock();
- trace_fib6_table_lookup(net, ret, table, fl6);
+ trace_fib6_table_lookup(net, rt, table, fl6);
return ret;
};
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 8e0c7fb6d7cc..28513d6be1bf 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -46,6 +46,7 @@ hostprogs-y += syscall_tp
hostprogs-y += cpustat
hostprogs-y += xdp_adjust_tail
hostprogs-y += xdpsock
+hostprogs-y += xdp_fwd
# Libbpf dependencies
LIBBPF := ../../tools/lib/bpf/bpf.o ../../tools/lib/bpf/nlattr.o
@@ -100,6 +101,7 @@ syscall_tp-objs := bpf_load.o $(LIBBPF) syscall_tp_user.o
cpustat-objs := bpf_load.o $(LIBBPF) cpustat_user.o
xdp_adjust_tail-objs := bpf_load.o $(LIBBPF) xdp_adjust_tail_user.o
xdpsock-objs := bpf_load.o $(LIBBPF) xdpsock_user.o
+xdp_fwd-objs := bpf_load.o $(LIBBPF) xdp_fwd_user.o
# Tell kbuild to always build the programs
always := $(hostprogs-y)
@@ -154,6 +156,7 @@ always += syscall_tp_kern.o
always += cpustat_kern.o
always += xdp_adjust_tail_kern.o
always += xdpsock_kern.o
+always += xdp_fwd_kern.o
HOSTCFLAGS += -I$(objtree)/usr/include
HOSTCFLAGS += -I$(srctree)/tools/lib/
@@ -201,6 +204,7 @@ HOSTLOADLIBES_syscall_tp += -lelf
HOSTLOADLIBES_cpustat += -lelf
HOSTLOADLIBES_xdp_adjust_tail += -lelf
HOSTLOADLIBES_xdpsock += -lelf -pthread
+HOSTLOADLIBES_xdp_fwd += -lelf
# Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:
# make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
diff --git a/samples/bpf/xdp_fwd_kern.c b/samples/bpf/xdp_fwd_kern.c
new file mode 100644
index 000000000000..cdf4fc383cc9
--- /dev/null
+++ b/samples/bpf/xdp_fwd_kern.c
@@ -0,0 +1,115 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2017-18 David Ahern <dsahern@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+#define KBUILD_MODNAME "foo"
+#include <uapi/linux/bpf.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/if_vlan.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+
+#include "bpf_helpers.h"
+
+#define IPV6_FLOWINFO_MASK cpu_to_be32(0x0FFFFFFF)
+
+struct bpf_map_def SEC("maps") tx_port = {
+ .type = BPF_MAP_TYPE_DEVMAP,
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .max_entries = 64,
+};
+
+static __always_inline int xdp_fwd_flags(struct xdp_md *ctx, u32 flags)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct bpf_fib_lookup fib_params;
+ struct ethhdr *eth = data;
+ int out_index;
+ u16 h_proto;
+ u64 nh_off;
+
+ nh_off = sizeof(*eth);
+ if (data + nh_off > data_end)
+ return XDP_DROP;
+
+ __builtin_memset(&fib_params, 0, sizeof(fib_params));
+
+ h_proto = eth->h_proto;
+ if (h_proto == htons(ETH_P_IP)) {
+ struct iphdr *iph = data + nh_off;
+
+ if (iph + 1 > data_end)
+ return XDP_DROP;
+
+ fib_params.family = AF_INET;
+ fib_params.tos = iph->tos;
+ fib_params.l4_protocol = iph->protocol;
+ fib_params.sport = 0;
+ fib_params.dport = 0;
+ fib_params.tot_len = ntohs(iph->tot_len);
+ fib_params.ipv4_src = iph->saddr;
+ fib_params.ipv4_dst = iph->daddr;
+ } else if (h_proto == htons(ETH_P_IPV6)) {
+ struct in6_addr *src = (struct in6_addr *) fib_params.ipv6_src;
+ struct in6_addr *dst = (struct in6_addr *) fib_params.ipv6_dst;
+ struct ipv6hdr *iph = data + nh_off;
+
+ if (iph + 1 > data_end)
+ return XDP_DROP;
+
+ fib_params.family = AF_INET6;
+ fib_params.flowlabel = *(__be32 *)iph & IPV6_FLOWINFO_MASK;
+ fib_params.l4_protocol = iph->nexthdr;
+ fib_params.sport = 0;
+ fib_params.dport = 0;
+ fib_params.tot_len = ntohs(iph->payload_len);
+ *src = iph->saddr;
+ *dst = iph->daddr;
+ } else {
+ return XDP_PASS;
+ }
+
+ fib_params.ifindex = ctx->ingress_ifindex;
+
+ out_index = bpf_fib_lookup(ctx, &fib_params, sizeof(fib_params), flags);
+
+ /* verify egress index has xdp support
+ * TO-DO bpf_map_lookup_elem(&tx_port, &key) fails with
+ * cannot pass map_type 14 into func bpf_map_lookup_elem#1:
+ * NOTE: without verification that egress index supports XDP
+ * forwarding packets are dropped.
+ */
+ if (out_index > 0) {
+ memcpy(eth->h_dest, fib_params.dmac, ETH_ALEN);
+ memcpy(eth->h_source, fib_params.smac, ETH_ALEN);
+ return bpf_redirect_map(&tx_port, out_index, 0);
+ }
+
+ return XDP_PASS;
+}
+
+SEC("xdp_fwd")
+int xdp_fwd_prog(struct xdp_md *ctx)
+{
+ return xdp_fwd_flags(ctx, 0);
+}
+
+SEC("xdp_fwd_direct")
+int xdp_fwd_direct_prog(struct xdp_md *ctx)
+{
+ return xdp_fwd_flags(ctx, BPF_FIB_LOOKUP_DIRECT);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/xdp_fwd_user.c b/samples/bpf/xdp_fwd_user.c
new file mode 100644
index 000000000000..9c6606f57126
--- /dev/null
+++ b/samples/bpf/xdp_fwd_user.c
@@ -0,0 +1,136 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2017-18 David Ahern <dsahern@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+#include <linux/bpf.h>
+#include <linux/if_link.h>
+#include <linux/limits.h>
+#include <net/if.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <libgen.h>
+
+#include "bpf_load.h"
+#include "bpf_util.h"
+#include "libbpf.h"
+
+
+static int do_attach(int idx, int fd, const char *name)
+{
+ int err;
+
+ err = bpf_set_link_xdp_fd(idx, fd, 0);
+ if (err < 0)
+ printf("ERROR: failed to attach program to %s\n", name);
+
+ return err;
+}
+
+static int do_detach(int idx, const char *name)
+{
+ int err;
+
+ err = bpf_set_link_xdp_fd(idx, -1, 0);
+ if (err < 0)
+ printf("ERROR: failed to detach program from %s\n", name);
+
+ return err;
+}
+
+static void usage(const char *prog)
+{
+ fprintf(stderr,
+ "usage: %s [OPTS] interface-list\n"
+ "\nOPTS:\n"
+ " -d detach program\n"
+ " -D direct table lookups (skip fib rules)\n",
+ prog);
+}
+
+int main(int argc, char **argv)
+{
+ char filename[PATH_MAX];
+ int opt, i, idx, err;
+ int prog_id = 0;
+ int attach = 1;
+ int ret = 0;
+
+ while ((opt = getopt(argc, argv, ":dD")) != -1) {
+ switch (opt) {
+ case 'd':
+ attach = 0;
+ break;
+ case 'D':
+ prog_id = 1;
+ break;
+ default:
+ usage(basename(argv[0]));
+ return 1;
+ }
+ }
+
+ if (optind == argc) {
+ usage(basename(argv[0]));
+ return 1;
+ }
+
+ if (attach) {
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+ if (access(filename, O_RDONLY) < 0) {
+ printf("error accessing file %s: %s\n",
+ filename, strerror(errno));
+ return 1;
+ }
+
+ if (load_bpf_file(filename)) {
+ printf("%s", bpf_log_buf);
+ return 1;
+ }
+
+ if (!prog_fd[prog_id]) {
+ printf("load_bpf_file: %s\n", strerror(errno));
+ return 1;
+ }
+ }
+ if (attach) {
+ for (i = 1; i < 64; ++i)
+ bpf_map_update_elem(map_fd[0], &i, &i, 0);
+ }
+
+ for (i = optind; i < argc; ++i) {
+ idx = if_nametoindex(argv[i]);
+ if (!idx)
+ idx = strtoul(argv[i], NULL, 0);
+
+ if (!idx) {
+ fprintf(stderr, "Invalid arg\n");
+ return 1;
+ }
+ if (!attach) {
+ err = do_detach(idx, argv[i]);
+ if (err)
+ ret = err;
+ } else {
+ err = do_attach(idx, prog_fd[prog_id], argv[i]);
+ if (err)
+ ret = err;
+ }
+ }
+
+ return ret;
+}
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index 265f8e0e8ada..2375d06c706b 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -103,6 +103,9 @@ static int (*bpf_skb_get_xfrm_state)(void *ctx, int index, void *state,
(void *) BPF_FUNC_skb_get_xfrm_state;
static int (*bpf_get_stack)(void *ctx, void *buf, int size, int flags) =
(void *) BPF_FUNC_get_stack;
+static int (*bpf_fib_lookup)(void *ctx, struct bpf_fib_lookup *params,
+ int plen, __u32 flags) =
+ (void *) BPF_FUNC_fib_lookup;
/* llvm builtin functions that eBPF C program may use to
* emit BPF_LD_ABS and BPF_LD_IND instructions