diff options
Diffstat (limited to 'net')
-rw-r--r-- | net/bpf/test_run.c | 1 | ||||
-rw-r--r-- | net/core/dev.c | 31 | ||||
-rw-r--r-- | net/core/filter.c | 77 | ||||
-rw-r--r-- | net/core/skbuff.c | 2 |
4 files changed, 102 insertions, 9 deletions
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index df672517b4fd..a86e6687026e 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -162,6 +162,7 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, xdp.data_hard_start = data; xdp.data = data + XDP_PACKET_HEADROOM + NET_IP_ALIGN; + xdp.data_meta = xdp.data; xdp.data_end = xdp.data + size; retval = bpf_test_run(prog, &xdp, repeat, &duration); diff --git a/net/core/dev.c b/net/core/dev.c index 97abddd9039a..e350c768d4b5 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3864,8 +3864,8 @@ drop: static u32 netif_receive_generic_xdp(struct sk_buff *skb, struct bpf_prog *xdp_prog) { + u32 metalen, act = XDP_DROP; struct xdp_buff xdp; - u32 act = XDP_DROP; void *orig_data; int hlen, off; u32 mac_len; @@ -3876,8 +3876,25 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, if (skb_cloned(skb)) return XDP_PASS; - if (skb_linearize(skb)) - goto do_drop; + /* XDP packets must be linear and must have sufficient headroom + * of XDP_PACKET_HEADROOM bytes. This is the guarantee that also + * native XDP provides, thus we need to do it here as well. + */ + if (skb_is_nonlinear(skb) || + skb_headroom(skb) < XDP_PACKET_HEADROOM) { + int hroom = XDP_PACKET_HEADROOM - skb_headroom(skb); + int troom = skb->tail + skb->data_len - skb->end; + + /* In case we have to go down the path and also linearize, + * then lets do the pskb_expand_head() work just once here. + */ + if (pskb_expand_head(skb, + hroom > 0 ? ALIGN(hroom, NET_SKB_PAD) : 0, + troom > 0 ? troom + 128 : 0, GFP_ATOMIC)) + goto do_drop; + if (troom > 0 && __skb_linearize(skb)) + goto do_drop; + } /* The XDP program wants to see the packet starting at the MAC * header. @@ -3885,6 +3902,7 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, mac_len = skb->data - skb_mac_header(skb); hlen = skb_headlen(skb) + mac_len; xdp.data = skb->data - mac_len; + xdp.data_meta = xdp.data; xdp.data_end = xdp.data + hlen; xdp.data_hard_start = skb->data - skb_headroom(skb); orig_data = xdp.data; @@ -3902,10 +3920,12 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, case XDP_REDIRECT: case XDP_TX: __skb_push(skb, mac_len); - /* fall through */ + break; case XDP_PASS: + metalen = xdp.data - xdp.data_meta; + if (metalen) + skb_metadata_set(skb, metalen); break; - default: bpf_warn_invalid_xdp_action(act); /* fall through */ @@ -4695,6 +4715,7 @@ static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb) diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev; diffs |= p->vlan_tci ^ skb->vlan_tci; diffs |= skb_metadata_dst_cmp(p, skb); + diffs |= skb_metadata_differs(p, skb); if (maclen == ETH_HLEN) diffs |= compare_ether_header(skb_mac_header(p), skb_mac_header(skb)); diff --git a/net/core/filter.c b/net/core/filter.c index c468e7cfad19..9b6e7e84aafd 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2447,14 +2447,26 @@ static const struct bpf_func_proto bpf_skb_change_head_proto = { .arg3_type = ARG_ANYTHING, }; +static unsigned long xdp_get_metalen(const struct xdp_buff *xdp) +{ + return xdp_data_meta_unsupported(xdp) ? 0 : + xdp->data - xdp->data_meta; +} + BPF_CALL_2(bpf_xdp_adjust_head, struct xdp_buff *, xdp, int, offset) { + unsigned long metalen = xdp_get_metalen(xdp); + void *data_start = xdp->data_hard_start + metalen; void *data = xdp->data + offset; - if (unlikely(data < xdp->data_hard_start || + if (unlikely(data < data_start || data > xdp->data_end - ETH_HLEN)) return -EINVAL; + if (metalen) + memmove(xdp->data_meta + offset, + xdp->data_meta, metalen); + xdp->data_meta += offset; xdp->data = data; return 0; @@ -2468,6 +2480,33 @@ static const struct bpf_func_proto bpf_xdp_adjust_head_proto = { .arg2_type = ARG_ANYTHING, }; +BPF_CALL_2(bpf_xdp_adjust_meta, struct xdp_buff *, xdp, int, offset) +{ + void *meta = xdp->data_meta + offset; + unsigned long metalen = xdp->data - meta; + + if (xdp_data_meta_unsupported(xdp)) + return -ENOTSUPP; + if (unlikely(meta < xdp->data_hard_start || + meta > xdp->data)) + return -EINVAL; + if (unlikely((metalen & (sizeof(__u32) - 1)) || + (metalen > 32))) + return -EACCES; + + xdp->data_meta = meta; + + return 0; +} + +static const struct bpf_func_proto bpf_xdp_adjust_meta_proto = { + .func = bpf_xdp_adjust_meta, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, +}; + static int __bpf_tx_xdp(struct net_device *dev, struct bpf_map *map, struct xdp_buff *xdp, @@ -2692,7 +2731,8 @@ bool bpf_helper_changes_pkt_data(void *func) func == bpf_clone_redirect || func == bpf_l3_csum_replace || func == bpf_l4_csum_replace || - func == bpf_xdp_adjust_head) + func == bpf_xdp_adjust_head || + func == bpf_xdp_adjust_meta) return true; return false; @@ -3288,6 +3328,8 @@ xdp_func_proto(enum bpf_func_id func_id) return &bpf_get_smp_processor_id_proto; case BPF_FUNC_xdp_adjust_head: return &bpf_xdp_adjust_head_proto; + case BPF_FUNC_xdp_adjust_meta: + return &bpf_xdp_adjust_meta_proto; case BPF_FUNC_redirect: return &bpf_xdp_redirect_proto; case BPF_FUNC_redirect_map: @@ -3418,6 +3460,7 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type case bpf_ctx_range_till(struct __sk_buff, remote_ip4, remote_ip4): case bpf_ctx_range_till(struct __sk_buff, local_ip4, local_ip4): case bpf_ctx_range(struct __sk_buff, data): + case bpf_ctx_range(struct __sk_buff, data_meta): case bpf_ctx_range(struct __sk_buff, data_end): if (size != size_default) return false; @@ -3444,6 +3487,7 @@ static bool sk_filter_is_valid_access(int off, int size, switch (off) { case bpf_ctx_range(struct __sk_buff, tc_classid): case bpf_ctx_range(struct __sk_buff, data): + case bpf_ctx_range(struct __sk_buff, data_meta): case bpf_ctx_range(struct __sk_buff, data_end): case bpf_ctx_range_till(struct __sk_buff, family, local_port): return false; @@ -3468,6 +3512,7 @@ static bool lwt_is_valid_access(int off, int size, switch (off) { case bpf_ctx_range(struct __sk_buff, tc_classid): case bpf_ctx_range_till(struct __sk_buff, family, local_port): + case bpf_ctx_range(struct __sk_buff, data_meta): return false; } @@ -3586,6 +3631,9 @@ static bool tc_cls_act_is_valid_access(int off, int size, case bpf_ctx_range(struct __sk_buff, data): info->reg_type = PTR_TO_PACKET; break; + case bpf_ctx_range(struct __sk_buff, data_meta): + info->reg_type = PTR_TO_PACKET_META; + break; case bpf_ctx_range(struct __sk_buff, data_end): info->reg_type = PTR_TO_PACKET_END; break; @@ -3619,6 +3667,9 @@ static bool xdp_is_valid_access(int off, int size, case offsetof(struct xdp_md, data): info->reg_type = PTR_TO_PACKET; break; + case offsetof(struct xdp_md, data_meta): + info->reg_type = PTR_TO_PACKET_META; + break; case offsetof(struct xdp_md, data_end): info->reg_type = PTR_TO_PACKET_END; break; @@ -3677,6 +3728,12 @@ static bool sk_skb_is_valid_access(int off, int size, enum bpf_access_type type, struct bpf_insn_access_aux *info) { + switch (off) { + case bpf_ctx_range(struct __sk_buff, tc_classid): + case bpf_ctx_range(struct __sk_buff, data_meta): + return false; + } + if (type == BPF_WRITE) { switch (off) { case bpf_ctx_range(struct __sk_buff, mark): @@ -3689,8 +3746,6 @@ static bool sk_skb_is_valid_access(int off, int size, } switch (off) { - case bpf_ctx_range(struct __sk_buff, tc_classid): - return false; case bpf_ctx_range(struct __sk_buff, data): info->reg_type = PTR_TO_PACKET; break; @@ -3847,6 +3902,15 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, offsetof(struct sk_buff, data)); break; + case offsetof(struct __sk_buff, data_meta): + off = si->off; + off -= offsetof(struct __sk_buff, data_meta); + off += offsetof(struct sk_buff, cb); + off += offsetof(struct bpf_skb_data_end, data_meta); + *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, + si->src_reg, off); + break; + case offsetof(struct __sk_buff, data_end): off = si->off; off -= offsetof(struct __sk_buff, data_end); @@ -4095,6 +4159,11 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type, si->dst_reg, si->src_reg, offsetof(struct xdp_buff, data)); break; + case offsetof(struct xdp_md, data_meta): + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data_meta), + si->dst_reg, si->src_reg, + offsetof(struct xdp_buff, data_meta)); + break; case offsetof(struct xdp_md, data_end): *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data_end), si->dst_reg, si->src_reg, diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 000ce735fa8d..d98c2e3ce2bf 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1509,6 +1509,8 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, skb->nohdr = 0; atomic_set(&skb_shinfo(skb)->dataref, 1); + skb_metadata_clear(skb); + /* It is not generally safe to change skb->truesize. * For the moment, we really care of rx path, or * when skb is orphaned (not attached to a socket). |