diff options
author | David S. Miller <davem@davemloft.net> | 2018-01-11 19:59:41 +0100 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2018-01-11 19:59:41 +0100 |
commit | 8c2e6c904fd8701a8d02d2bdb86871dc3ec4e85b (patch) | |
tree | 8f6c64f4799f193673c3788b45f3960910d64174 | |
parent | Merge branch 'for-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/... (diff) | |
parent | samples/bpf: xdp2skb_meta shows transferring info from XDP to SKB (diff) | |
download | linux-8c2e6c904fd8701a8d02d2bdb86871dc3ec4e85b.tar.xz linux-8c2e6c904fd8701a8d02d2bdb86871dc3ec4e85b.zip |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Daniel Borkmann says:
====================
pull-request: bpf-next 2018-01-11
The following pull-request contains BPF updates for your *net-next* tree.
The main changes are:
1) Various BPF related improvements and fixes to nfp driver: i) do
not register XDP RXQ structure to control queues, ii) round up
program stack size to word size for nfp, iii) restrict MTU changes
when BPF offload is active, iv) add more fully featured relocation
support to JIT, v) add support for signed compare instructions to
the nfp JIT, vi) export and reuse verfier log routine for nfp, and
many more, from Jakub, Quentin and Nic.
2) Fix a syzkaller reported GPF in BPF's copy_verifier_state() when
we hit kmalloc failure path, from Alexei.
3) Add two follow-up fixes for the recent XDP RXQ series: i) kvzalloc()
allocated memory was only kfree()'ed, and ii) fix a memory leak where
RX queue was not freed in netif_free_rx_queues(), from Jakub.
4) Add a sample for transferring XDP meta data into the skb, here it
is used for setting skb->mark with the buffer from XDP, from Jesper.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | drivers/net/ethernet/netronome/nfp/bpf/jit.c | 248 | ||||
-rw-r--r-- | drivers/net/ethernet/netronome/nfp/bpf/main.c | 38 | ||||
-rw-r--r-- | drivers/net/ethernet/netronome/nfp/bpf/main.h | 44 | ||||
-rw-r--r-- | drivers/net/ethernet/netronome/nfp/bpf/offload.c | 65 | ||||
-rw-r--r-- | drivers/net/ethernet/netronome/nfp/bpf/verifier.c | 30 | ||||
-rw-r--r-- | drivers/net/ethernet/netronome/nfp/nfp_app.h | 60 | ||||
-rw-r--r-- | drivers/net/ethernet/netronome/nfp/nfp_asm.c | 30 | ||||
-rw-r--r-- | drivers/net/ethernet/netronome/nfp/nfp_asm.h | 5 | ||||
-rw-r--r-- | drivers/net/ethernet/netronome/nfp/nfp_net_common.c | 27 | ||||
-rw-r--r-- | drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h | 111 | ||||
-rw-r--r-- | drivers/net/ethernet/netronome/nfp/nfp_net_repr.c | 8 | ||||
-rw-r--r-- | drivers/net/ethernet/netronome/nfp/nfp_net_repr.h | 1 | ||||
-rw-r--r-- | include/linux/bpf_verifier.h | 3 | ||||
-rw-r--r-- | kernel/bpf/verifier.c | 20 | ||||
-rw-r--r-- | net/core/dev.c | 9 | ||||
-rw-r--r-- | samples/bpf/Makefile | 1 | ||||
-rwxr-xr-x | samples/bpf/xdp2skb_meta.sh | 220 | ||||
-rw-r--r-- | samples/bpf/xdp2skb_meta_kern.c | 103 |
18 files changed, 760 insertions, 263 deletions
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index 0de59f04da84..47c5224f8d6f 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -85,7 +85,7 @@ static void nfp_prog_push(struct nfp_prog *nfp_prog, u64 insn) static unsigned int nfp_prog_current_offset(struct nfp_prog *nfp_prog) { - return nfp_prog->start_off + nfp_prog->prog_len; + return nfp_prog->prog_len; } static bool @@ -100,12 +100,6 @@ nfp_prog_confirm_current_offset(struct nfp_prog *nfp_prog, unsigned int off) return !WARN_ON_ONCE(nfp_prog_current_offset(nfp_prog) != off); } -static unsigned int -nfp_prog_offset_to_index(struct nfp_prog *nfp_prog, unsigned int offset) -{ - return offset - nfp_prog->start_off; -} - /* --- Emitters --- */ static void __emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, @@ -195,22 +189,28 @@ __emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, enum br_ev_pip ev_pip, nfp_prog_push(nfp_prog, insn); } -static void emit_br_def(struct nfp_prog *nfp_prog, u16 addr, u8 defer) +static void +emit_br_relo(struct nfp_prog *nfp_prog, enum br_mask mask, u16 addr, u8 defer, + enum nfp_relo_type relo) { - if (defer > 2) { + if (mask == BR_UNC && defer > 2) { pr_err("BUG: branch defer out of bounds %d\n", defer); nfp_prog->error = -EFAULT; return; } - __emit_br(nfp_prog, BR_UNC, BR_EV_PIP_UNCOND, BR_CSS_NONE, addr, defer); + + __emit_br(nfp_prog, mask, + mask != BR_UNC ? BR_EV_PIP_COND : BR_EV_PIP_UNCOND, + BR_CSS_NONE, addr, defer); + + nfp_prog->prog[nfp_prog->prog_len - 1] |= + FIELD_PREP(OP_RELO_TYPE, relo); } static void emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, u16 addr, u8 defer) { - __emit_br(nfp_prog, mask, - mask != BR_UNC ? BR_EV_PIP_COND : BR_EV_PIP_UNCOND, - BR_CSS_NONE, addr, defer); + emit_br_relo(nfp_prog, mask, addr, defer, RELO_BR_REL); } static void @@ -515,16 +515,6 @@ static void wrp_nops(struct nfp_prog *nfp_prog, unsigned int count) emit_nop(nfp_prog); } -static void -wrp_br_special(struct nfp_prog *nfp_prog, enum br_mask mask, - enum br_special special) -{ - emit_br(nfp_prog, mask, 0, 0); - - nfp_prog->prog[nfp_prog->prog_len - 1] |= - FIELD_PREP(OP_BR_SPECIAL, special); -} - static void wrp_mov(struct nfp_prog *nfp_prog, swreg dst, swreg src) { emit_alu(nfp_prog, dst, reg_none(), ALU_OP_NONE, src); @@ -749,7 +739,7 @@ construct_data_ind_ld(struct nfp_prog *nfp_prog, u16 offset, u16 src, u8 size) imm_a(nfp_prog), ALU_OP_ADD, reg_imm(size)); emit_alu(nfp_prog, reg_none(), plen_reg(nfp_prog), ALU_OP_SUB, imm_a(nfp_prog)); - wrp_br_special(nfp_prog, BR_BLO, OP_BR_GO_ABORT); + emit_br_relo(nfp_prog, BR_BLO, BR_OFF_RELO, 0, RELO_BR_GO_ABORT); /* Load data */ return data_ld(nfp_prog, imm_b(nfp_prog), 0, size); @@ -762,7 +752,7 @@ static int construct_data_ld(struct nfp_prog *nfp_prog, u16 offset, u8 size) /* Check packet length */ tmp_reg = ur_load_imm_any(nfp_prog, offset + size, imm_a(nfp_prog)); emit_alu(nfp_prog, reg_none(), plen_reg(nfp_prog), ALU_OP_SUB, tmp_reg); - wrp_br_special(nfp_prog, BR_BLO, OP_BR_GO_ABORT); + emit_br_relo(nfp_prog, BR_BLO, BR_OFF_RELO, 0, RELO_BR_GO_ABORT); /* Load data */ tmp_reg = re_load_imm_any(nfp_prog, offset, imm_b(nfp_prog)); @@ -1269,7 +1259,7 @@ static int adjust_head(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) emit_ld_field(nfp_prog, pptr_reg(nfp_prog), 0x3, tmp, SHF_SC_NONE, 0); /* Skip over the -EINVAL ret code (defer 2) */ - emit_br_def(nfp_prog, end, 2); + emit_br(nfp_prog, BR_UNC, end, 2); emit_alu(nfp_prog, plen_reg(nfp_prog), plen_reg(nfp_prog), ALU_OP_SUB, reg_a(2 * 2)); @@ -1924,6 +1914,26 @@ static int jle_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) return wrp_cmp_imm(nfp_prog, meta, BR_BHS, true); } +static int jsgt_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_cmp_imm(nfp_prog, meta, BR_BLT, true); +} + +static int jsge_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_cmp_imm(nfp_prog, meta, BR_BGE, false); +} + +static int jslt_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_cmp_imm(nfp_prog, meta, BR_BLT, false); +} + +static int jsle_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_cmp_imm(nfp_prog, meta, BR_BGE, true); +} + static int jset_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { const struct bpf_insn *insn = &meta->insn; @@ -2013,6 +2023,26 @@ static int jle_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) return wrp_cmp_reg(nfp_prog, meta, BR_BHS, true); } +static int jsgt_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_cmp_reg(nfp_prog, meta, BR_BLT, true); +} + +static int jsge_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_cmp_reg(nfp_prog, meta, BR_BGE, false); +} + +static int jslt_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_cmp_reg(nfp_prog, meta, BR_BLT, false); +} + +static int jsle_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_cmp_reg(nfp_prog, meta, BR_BGE, true); +} + static int jset_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { return wrp_test_reg(nfp_prog, meta, ALU_OP_AND, BR_BNE); @@ -2036,7 +2066,7 @@ static int call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) static int goto_out(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { - wrp_br_special(nfp_prog, BR_UNC, OP_BR_GO_OUT); + emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 0, RELO_BR_GO_OUT); return 0; } @@ -2097,6 +2127,10 @@ static const instr_cb_t instr_cb[256] = { [BPF_JMP | BPF_JGE | BPF_K] = jge_imm, [BPF_JMP | BPF_JLT | BPF_K] = jlt_imm, [BPF_JMP | BPF_JLE | BPF_K] = jle_imm, + [BPF_JMP | BPF_JSGT | BPF_K] = jsgt_imm, + [BPF_JMP | BPF_JSGE | BPF_K] = jsge_imm, + [BPF_JMP | BPF_JSLT | BPF_K] = jslt_imm, + [BPF_JMP | BPF_JSLE | BPF_K] = jsle_imm, [BPF_JMP | BPF_JSET | BPF_K] = jset_imm, [BPF_JMP | BPF_JNE | BPF_K] = jne_imm, [BPF_JMP | BPF_JEQ | BPF_X] = jeq_reg, @@ -2104,24 +2138,16 @@ static const instr_cb_t instr_cb[256] = { [BPF_JMP | BPF_JGE | BPF_X] = jge_reg, [BPF_JMP | BPF_JLT | BPF_X] = jlt_reg, [BPF_JMP | BPF_JLE | BPF_X] = jle_reg, + [BPF_JMP | BPF_JSGT | BPF_X] = jsgt_reg, + [BPF_JMP | BPF_JSGE | BPF_X] = jsge_reg, + [BPF_JMP | BPF_JSLT | BPF_X] = jslt_reg, + [BPF_JMP | BPF_JSLE | BPF_X] = jsle_reg, [BPF_JMP | BPF_JSET | BPF_X] = jset_reg, [BPF_JMP | BPF_JNE | BPF_X] = jne_reg, [BPF_JMP | BPF_CALL] = call, [BPF_JMP | BPF_EXIT] = goto_out, }; -/* --- Misc code --- */ -static void br_set_offset(u64 *instr, u16 offset) -{ - u16 addr_lo, addr_hi; - - addr_lo = offset & (OP_BR_ADDR_LO >> __bf_shf(OP_BR_ADDR_LO)); - addr_hi = offset != addr_lo; - *instr &= ~(OP_BR_ADDR_HI | OP_BR_ADDR_LO); - *instr |= FIELD_PREP(OP_BR_ADDR_HI, addr_hi); - *instr |= FIELD_PREP(OP_BR_ADDR_LO, addr_lo); -} - /* --- Assembler logic --- */ static int nfp_fixup_branches(struct nfp_prog *nfp_prog) { @@ -2137,11 +2163,9 @@ static int nfp_fixup_branches(struct nfp_prog *nfp_prog) continue; if (list_is_last(&meta->l, &nfp_prog->insns)) - idx = nfp_prog->last_bpf_off; + br_idx = nfp_prog->last_bpf_off; else - idx = list_next_entry(meta, l)->off - 1; - - br_idx = nfp_prog_offset_to_index(nfp_prog, idx); + br_idx = list_next_entry(meta, l)->off - 1; if (!nfp_is_br(nfp_prog->prog[br_idx])) { pr_err("Fixup found block not ending in branch %d %02x %016llx!!\n", @@ -2149,7 +2173,8 @@ static int nfp_fixup_branches(struct nfp_prog *nfp_prog) return -ELOOP; } /* Leave special branches for later */ - if (FIELD_GET(OP_BR_SPECIAL, nfp_prog->prog[br_idx])) + if (FIELD_GET(OP_RELO_TYPE, nfp_prog->prog[br_idx]) != + RELO_BR_REL) continue; if (!meta->jmp_dst) { @@ -2164,38 +2189,13 @@ static int nfp_fixup_branches(struct nfp_prog *nfp_prog) return -ELOOP; } - for (idx = nfp_prog_offset_to_index(nfp_prog, meta->off); - idx <= br_idx; idx++) { + for (idx = meta->off; idx <= br_idx; idx++) { if (!nfp_is_br(nfp_prog->prog[idx])) continue; br_set_offset(&nfp_prog->prog[idx], jmp_dst->off); } } - /* Fixup 'goto out's separately, they can be scattered around */ - for (br_idx = 0; br_idx < nfp_prog->prog_len; br_idx++) { - enum br_special special; - - if ((nfp_prog->prog[br_idx] & OP_BR_BASE_MASK) != OP_BR_BASE) - continue; - - special = FIELD_GET(OP_BR_SPECIAL, nfp_prog->prog[br_idx]); - switch (special) { - case OP_BR_NORMAL: - break; - case OP_BR_GO_OUT: - br_set_offset(&nfp_prog->prog[br_idx], - nfp_prog->tgt_out); - break; - case OP_BR_GO_ABORT: - br_set_offset(&nfp_prog->prog[br_idx], - nfp_prog->tgt_abort); - break; - } - - nfp_prog->prog[br_idx] &= ~OP_BR_SPECIAL; - } - return 0; } @@ -2223,7 +2223,7 @@ static void nfp_outro_tc_da(struct nfp_prog *nfp_prog) /* Target for aborts */ nfp_prog->tgt_abort = nfp_prog_current_offset(nfp_prog); - emit_br_def(nfp_prog, nfp_prog->tgt_done, 2); + emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2, RELO_BR_NEXT_PKT); wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS); emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x11), SHF_SC_L_SHF, 16); @@ -2250,7 +2250,7 @@ static void nfp_outro_tc_da(struct nfp_prog *nfp_prog) emit_shf(nfp_prog, reg_b(2), reg_imm(0xf), SHF_OP_AND, reg_b(3), SHF_SC_R_SHF, 0); - emit_br_def(nfp_prog, nfp_prog->tgt_done, 2); + emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2, RELO_BR_NEXT_PKT); emit_shf(nfp_prog, reg_b(2), reg_a(2), SHF_OP_OR, reg_b(2), SHF_SC_L_SHF, 4); @@ -2269,7 +2269,7 @@ static void nfp_outro_xdp(struct nfp_prog *nfp_prog) /* Target for aborts */ nfp_prog->tgt_abort = nfp_prog_current_offset(nfp_prog); - emit_br_def(nfp_prog, nfp_prog->tgt_done, 2); + emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2, RELO_BR_NEXT_PKT); wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS); emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x82), SHF_SC_L_SHF, 16); @@ -2290,7 +2290,7 @@ static void nfp_outro_xdp(struct nfp_prog *nfp_prog) emit_shf(nfp_prog, reg_b(2), reg_imm(0xff), SHF_OP_AND, reg_b(2), SHF_SC_R_SHF, 0); - emit_br_def(nfp_prog, nfp_prog->tgt_done, 2); + emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2, RELO_BR_NEXT_PKT); wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS); emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_b(2), SHF_SC_L_SHF, 16); @@ -2706,25 +2706,38 @@ static int nfp_bpf_optimize(struct nfp_prog *nfp_prog) return 0; } -static int nfp_bpf_ustore_calc(struct nfp_prog *nfp_prog, __le64 *ustore) +static int nfp_bpf_ustore_calc(u64 *prog, unsigned int len) { + __le64 *ustore = (__force __le64 *)prog; int i; - for (i = 0; i < nfp_prog->prog_len; i++) { + for (i = 0; i < len; i++) { int err; - err = nfp_ustore_check_valid_no_ecc(nfp_prog->prog[i]); + err = nfp_ustore_check_valid_no_ecc(prog[i]); if (err) return err; - nfp_prog->prog[i] = nfp_ustore_calc_ecc_insn(nfp_prog->prog[i]); - - ustore[i] = cpu_to_le64(nfp_prog->prog[i]); + ustore[i] = cpu_to_le64(nfp_ustore_calc_ecc_insn(prog[i])); } return 0; } +static void nfp_bpf_prog_trim(struct nfp_prog *nfp_prog) +{ + void *prog; + + prog = kvmalloc_array(nfp_prog->prog_len, sizeof(u64), GFP_KERNEL); + if (!prog) + return; + + nfp_prog->__prog_alloc_len = nfp_prog->prog_len * sizeof(u64); + memcpy(prog, nfp_prog->prog, nfp_prog->__prog_alloc_len); + kvfree(nfp_prog->prog); + nfp_prog->prog = prog; +} + int nfp_bpf_jit(struct nfp_prog *nfp_prog) { int ret; @@ -2740,5 +2753,78 @@ int nfp_bpf_jit(struct nfp_prog *nfp_prog) return -EINVAL; } - return nfp_bpf_ustore_calc(nfp_prog, (__force __le64 *)nfp_prog->prog); + nfp_bpf_prog_trim(nfp_prog); + + return ret; +} + +void nfp_bpf_jit_prepare(struct nfp_prog *nfp_prog, unsigned int cnt) +{ + struct nfp_insn_meta *meta; + + /* Another pass to record jump information. */ + list_for_each_entry(meta, &nfp_prog->insns, l) { + u64 code = meta->insn.code; + + if (BPF_CLASS(code) == BPF_JMP && BPF_OP(code) != BPF_EXIT && + BPF_OP(code) != BPF_CALL) { + struct nfp_insn_meta *dst_meta; + unsigned short dst_indx; + + dst_indx = meta->n + 1 + meta->insn.off; + dst_meta = nfp_bpf_goto_meta(nfp_prog, meta, dst_indx, + cnt); + + meta->jmp_dst = dst_meta; + dst_meta->flags |= FLAG_INSN_IS_JUMP_DST; + } + } +} + +void *nfp_bpf_relo_for_vnic(struct nfp_prog *nfp_prog, struct nfp_bpf_vnic *bv) +{ + unsigned int i; + u64 *prog; + int err; + + prog = kmemdup(nfp_prog->prog, nfp_prog->prog_len * sizeof(u64), + GFP_KERNEL); + if (!prog) + return ERR_PTR(-ENOMEM); + + for (i = 0; i < nfp_prog->prog_len; i++) { + enum nfp_relo_type special; + + special = FIELD_GET(OP_RELO_TYPE, prog[i]); + switch (special) { + case RELO_NONE: + continue; + case RELO_BR_REL: + br_add_offset(&prog[i], bv->start_off); + break; + case RELO_BR_GO_OUT: + br_set_offset(&prog[i], + nfp_prog->tgt_out + bv->start_off); + break; + case RELO_BR_GO_ABORT: + br_set_offset(&prog[i], + nfp_prog->tgt_abort + bv->start_off); + break; + case RELO_BR_NEXT_PKT: + br_set_offset(&prog[i], bv->tgt_done); + break; + } + + prog[i] &= ~OP_RELO_TYPE; + } + + err = nfp_bpf_ustore_calc(prog, nfp_prog->prog_len); + if (err) + goto err_free_prog; + + return prog; + +err_free_prog: + kfree(prog); + return ERR_PTR(err); } diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c index 4b63167906ca..e8cfe300c8c4 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c @@ -87,16 +87,21 @@ static const char *nfp_bpf_extra_cap(struct nfp_app *app, struct nfp_net *nn) static int nfp_bpf_vnic_alloc(struct nfp_app *app, struct nfp_net *nn, unsigned int id) { + struct nfp_bpf_vnic *bv; int err; - nn->app_priv = kzalloc(sizeof(struct nfp_bpf_vnic), GFP_KERNEL); - if (!nn->app_priv) + bv = kzalloc(sizeof(*bv), GFP_KERNEL); + if (!bv) return -ENOMEM; + nn->app_priv = bv; err = nfp_app_nic_vnic_alloc(app, nn, id); if (err) goto err_free_priv; + bv->start_off = nn_readw(nn, NFP_NET_CFG_BPF_START); + bv->tgt_done = nn_readw(nn, NFP_NET_CFG_BPF_DONE); + return 0; err_free_priv: kfree(nn->app_priv); @@ -191,7 +196,27 @@ static int nfp_bpf_setup_tc(struct nfp_app *app, struct net_device *netdev, static bool nfp_bpf_tc_busy(struct nfp_app *app, struct nfp_net *nn) { - return nn->dp.ctrl & NFP_NET_CFG_CTRL_BPF; + struct nfp_bpf_vnic *bv = nn->app_priv; + + return !!bv->tc_prog; +} + +static int +nfp_bpf_change_mtu(struct nfp_app *app, struct net_device *netdev, int new_mtu) +{ + struct nfp_net *nn = netdev_priv(netdev); + unsigned int max_mtu; + + if (~nn->dp.ctrl & NFP_NET_CFG_CTRL_BPF) + return 0; + + max_mtu = nn_readb(nn, NFP_NET_CFG_BPF_INL_MTU) * 64 - 32; + if (new_mtu > max_mtu) { + nn_info(nn, "BPF offload active, MTU over %u not supported\n", + max_mtu); + return -EBUSY; + } + return 0; } static int @@ -311,6 +336,8 @@ const struct nfp_app_type app_bpf = { .init = nfp_bpf_init, .clean = nfp_bpf_clean, + .change_mtu = nfp_bpf_change_mtu, + .extra_cap = nfp_bpf_extra_cap, .vnic_alloc = nfp_bpf_vnic_alloc, @@ -318,9 +345,6 @@ const struct nfp_app_type app_bpf = { .setup_tc = nfp_bpf_setup_tc, .tc_busy = nfp_bpf_tc_busy, + .bpf = nfp_ndo_bpf, .xdp_offload = nfp_bpf_xdp_offload, - - .bpf_verifier_prep = nfp_bpf_verifier_prep, - .bpf_translate = nfp_bpf_translate, - .bpf_destroy = nfp_bpf_destroy, }; diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h index 89a9b6393882..66381afee2a9 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.h +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h @@ -42,17 +42,28 @@ #include "../nfp_asm.h" -/* For branch fixup logic use up-most byte of branch instruction as scratch +/* For relocation logic use up-most byte of branch instruction as scratch * area. Remember to clear this before sending instructions to HW! */ -#define OP_BR_SPECIAL 0xff00000000000000ULL - -enum br_special { - OP_BR_NORMAL = 0, - OP_BR_GO_OUT, - OP_BR_GO_ABORT, +#define OP_RELO_TYPE 0xff00000000000000ULL + +enum nfp_relo_type { + RELO_NONE = 0, + /* standard internal jumps */ + RELO_BR_REL, + /* internal jumps to parts of the outro */ + RELO_BR_GO_OUT, + RELO_BR_GO_ABORT, + /* external jumps to fixed addresses */ + RELO_BR_NEXT_PKT, }; +/* To make absolute relocated branches (branches other than RELO_BR_REL) + * distinguishable in user space dumps from normal jumps, add a large offset + * to them. + */ +#define BR_OFF_RELO 15000 + enum static_regs { STATIC_REG_IMM = 21, /* Bank AB */ STATIC_REG_STACK = 22, /* Bank A */ @@ -191,11 +202,9 @@ static inline bool is_mbpf_store(const struct nfp_insn_meta *meta) * @__prog_alloc_len: alloc size of @prog array * @verifier_meta: temporary storage for verifier's insn meta * @type: BPF program type - * @start_off: address of the first instruction in the memory * @last_bpf_off: address of the last instruction translated from BPF * @tgt_out: jump target for normal exit * @tgt_abort: jump target for abort (e.g. access outside of packet buffer) - * @tgt_done: jump target to get the next packet * @n_translated: number of successfully translated instructions (for errors) * @error: error code if something went wrong * @stack_depth: max stack depth from the verifier @@ -213,11 +222,9 @@ struct nfp_prog { enum bpf_prog_type type; - unsigned int start_off; unsigned int last_bpf_off; unsigned int tgt_out; unsigned int tgt_abort; - unsigned int tgt_done; unsigned int n_translated; int error; @@ -231,11 +238,16 @@ struct nfp_prog { /** * struct nfp_bpf_vnic - per-vNIC BPF priv structure * @tc_prog: currently loaded cls_bpf program + * @start_off: address of the first instruction in the memory + * @tgt_done: jump target to get the next packet */ struct nfp_bpf_vnic { struct bpf_prog *tc_prog; + unsigned int start_off; + unsigned int tgt_done; }; +void nfp_bpf_jit_prepare(struct nfp_prog *nfp_prog, unsigned int cnt); int nfp_bpf_jit(struct nfp_prog *prog); extern const struct bpf_prog_offload_ops nfp_bpf_analyzer_ops; @@ -244,16 +256,14 @@ struct netdev_bpf; struct nfp_app; struct nfp_net; +int nfp_ndo_bpf(struct nfp_app *app, struct nfp_net *nn, + struct netdev_bpf *bpf); int nfp_net_bpf_offload(struct nfp_net *nn, struct bpf_prog *prog, bool old_prog); -int nfp_bpf_verifier_prep(struct nfp_app *app, struct nfp_net *nn, - struct netdev_bpf *bpf); -int nfp_bpf_translate(struct nfp_app *app, struct nfp_net *nn, - struct bpf_prog *prog); -int nfp_bpf_destroy(struct nfp_app *app, struct nfp_net *nn, - struct bpf_prog *prog); struct nfp_insn_meta * nfp_bpf_goto_meta(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, unsigned int insn_idx, unsigned int n_insns); + +void *nfp_bpf_relo_for_vnic(struct nfp_prog *nfp_prog, struct nfp_bpf_vnic *bv); #endif diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c index fa2905e67b07..320b2250d29a 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c @@ -42,6 +42,7 @@ #include <linux/jiffies.h> #include <linux/timer.h> #include <linux/list.h> +#include <linux/mm.h> #include <net/pkt_cls.h> #include <net/tc_act/tc_gact.h> @@ -70,23 +71,7 @@ nfp_prog_prepare(struct nfp_prog *nfp_prog, const struct bpf_insn *prog, list_add_tail(&meta->l, &nfp_prog->insns); } - /* Another pass to record jump information. */ - list_for_each_entry(meta, &nfp_prog->insns, l) { - u64 code = meta->insn.code; - - if (BPF_CLASS(code) == BPF_JMP && BPF_OP(code) != BPF_EXIT && - BPF_OP(code) != BPF_CALL) { - struct nfp_insn_meta *dst_meta; - unsigned short dst_indx; - - dst_indx = meta->n + 1 + meta->insn.off; - dst_meta = nfp_bpf_goto_meta(nfp_prog, meta, dst_indx, - cnt); - - meta->jmp_dst = dst_meta; - dst_meta->flags |= FLAG_INSN_IS_JUMP_DST; - } - } + nfp_bpf_jit_prepare(nfp_prog, cnt); return 0; } @@ -102,8 +87,9 @@ static void nfp_prog_free(struct nfp_prog *nfp_prog) kfree(nfp_prog); } -int nfp_bpf_verifier_prep(struct nfp_app *app, struct nfp_net *nn, - struct netdev_bpf *bpf) +static int +nfp_bpf_verifier_prep(struct nfp_app *app, struct nfp_net *nn, + struct netdev_bpf *bpf) { struct bpf_prog *prog = bpf->verifier.prog; struct nfp_prog *nfp_prog; @@ -133,8 +119,7 @@ err_free: return ret; } -int nfp_bpf_translate(struct nfp_app *app, struct nfp_net *nn, - struct bpf_prog *prog) +static int nfp_bpf_translate(struct nfp_net *nn, struct bpf_prog *prog) { struct nfp_prog *nfp_prog = prog->aux->offload->dev_priv; unsigned int stack_size; @@ -146,37 +131,48 @@ int nfp_bpf_translate(struct nfp_app *app, struct nfp_net *nn, prog->aux->stack_depth, stack_size); return -EOPNOTSUPP; } - - nfp_prog->stack_depth = prog->aux->stack_depth; - nfp_prog->start_off = nn_readw(nn, NFP_NET_CFG_BPF_START); - nfp_prog->tgt_done = nn_readw(nn, NFP_NET_CFG_BPF_DONE); + nfp_prog->stack_depth = round_up(prog->aux->stack_depth, 4); max_instr = nn_readw(nn, NFP_NET_CFG_BPF_MAX_LEN); nfp_prog->__prog_alloc_len = max_instr * sizeof(u64); - nfp_prog->prog = kmalloc(nfp_prog->__prog_alloc_len, GFP_KERNEL); + nfp_prog->prog = kvmalloc(nfp_prog->__prog_alloc_len, GFP_KERNEL); if (!nfp_prog->prog) return -ENOMEM; return nfp_bpf_jit(nfp_prog); } -int nfp_bpf_destroy(struct nfp_app *app, struct nfp_net *nn, - struct bpf_prog *prog) +static int nfp_bpf_destroy(struct nfp_net *nn, struct bpf_prog *prog) { struct nfp_prog *nfp_prog = prog->aux->offload->dev_priv; - kfree(nfp_prog->prog); + kvfree(nfp_prog->prog); nfp_prog_free(nfp_prog); return 0; } +int nfp_ndo_bpf(struct nfp_app *app, struct nfp_net *nn, struct netdev_bpf *bpf) +{ + switch (bpf->command) { + case BPF_OFFLOAD_VERIFIER_PREP: + return nfp_bpf_verifier_prep(app, nn, bpf); + case BPF_OFFLOAD_TRANSLATE: + return nfp_bpf_translate(nn, bpf->offload.prog); + case BPF_OFFLOAD_DESTROY: + return nfp_bpf_destroy(nn, bpf->offload.prog); + default: + return -EINVAL; + } +} + static int nfp_net_bpf_load(struct nfp_net *nn, struct bpf_prog *prog) { struct nfp_prog *nfp_prog = prog->aux->offload->dev_priv; unsigned int max_mtu; dma_addr_t dma_addr; + void *img; int err; max_mtu = nn_readb(nn, NFP_NET_CFG_BPF_INL_MTU) * 64 - 32; @@ -185,11 +181,17 @@ static int nfp_net_bpf_load(struct nfp_net *nn, struct bpf_prog *prog) return -EOPNOTSUPP; } - dma_addr = dma_map_single(nn->dp.dev, nfp_prog->prog, + img = nfp_bpf_relo_for_vnic(nfp_prog, nn->app_priv); + if (IS_ERR(img)) + return PTR_ERR(img); + + dma_addr = dma_map_single(nn->dp.dev, img, nfp_prog->prog_len * sizeof(u64), DMA_TO_DEVICE); - if (dma_mapping_error(nn->dp.dev, dma_addr)) + if (dma_mapping_error(nn->dp.dev, dma_addr)) { + kfree(img); return -ENOMEM; + } nn_writew(nn, NFP_NET_CFG_BPF_SIZE, nfp_prog->prog_len); nn_writeq(nn, NFP_NET_CFG_BPF_ADDR, dma_addr); @@ -201,6 +203,7 @@ static int nfp_net_bpf_load(struct nfp_net *nn, struct bpf_prog *prog) dma_unmap_single(nn->dp.dev, dma_addr, nfp_prog->prog_len * sizeof(u64), DMA_TO_DEVICE); + kfree(img); return err; } diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c index d8870c2f11f3..7890d95d4018 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c @@ -31,8 +31,6 @@ * SOFTWARE. */ -#define pr_fmt(fmt) "NFP net bpf: " fmt - #include <linux/bpf.h> #include <linux/bpf_verifier.h> #include <linux/kernel.h> @@ -41,6 +39,9 @@ #include "fw.h" #include "main.h" +#define pr_vlog(env, fmt, ...) \ + bpf_verifier_log_write(env, "[nfp] " fmt, ##__VA_ARGS__) + struct nfp_insn_meta * nfp_bpf_goto_meta(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, unsigned int insn_idx, unsigned int n_insns) @@ -116,18 +117,18 @@ nfp_bpf_check_call(struct nfp_prog *nfp_prog, struct bpf_verifier_env *env, switch (func_id) { case BPF_FUNC_xdp_adjust_head: if (!bpf->adjust_head.off_max) { - pr_warn("adjust_head not supported by FW\n"); + pr_vlog(env, "adjust_head not supported by FW\n"); return -EOPNOTSUPP; } if (!(bpf->adjust_head.flags & NFP_BPF_ADJUST_HEAD_NO_META)) { - pr_warn("adjust_head: FW requires shifting metadata, not supported by the driver\n"); + pr_vlog(env, "adjust_head: FW requires shifting metadata, not supported by the driver\n"); return -EOPNOTSUPP; } nfp_record_adjust_head(bpf, nfp_prog, meta, reg2); break; default: - pr_warn("unsupported function id: %d\n", func_id); + pr_vlog(env, "unsupported function id: %d\n", func_id); return -EOPNOTSUPP; } @@ -150,7 +151,7 @@ nfp_bpf_check_exit(struct nfp_prog *nfp_prog, char tn_buf[48]; tnum_strn(tn_buf, sizeof(tn_buf), reg0->var_off); - pr_info("unsupported exit state: %d, var_off: %s\n", + pr_vlog(env, "unsupported exit state: %d, var_off: %s\n", reg0->type, tn_buf); return -EINVAL; } @@ -160,7 +161,7 @@ nfp_bpf_check_exit(struct nfp_prog *nfp_prog, imm <= TC_ACT_REDIRECT && imm != TC_ACT_SHOT && imm != TC_ACT_STOLEN && imm != TC_ACT_QUEUED) { - pr_info("unsupported exit state: %d, imm: %llx\n", + pr_vlog(env, "unsupported exit state: %d, imm: %llx\n", reg0->type, imm); return -EINVAL; } @@ -171,12 +172,13 @@ nfp_bpf_check_exit(struct nfp_prog *nfp_prog, static int nfp_bpf_check_stack_access(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, - const struct bpf_reg_state *reg) + const struct bpf_reg_state *reg, + struct bpf_verifier_env *env) { s32 old_off, new_off; if (!tnum_is_const(reg->var_off)) { - pr_info("variable ptr stack access\n"); + pr_vlog(env, "variable ptr stack access\n"); return -EINVAL; } @@ -194,7 +196,7 @@ nfp_bpf_check_stack_access(struct nfp_prog *nfp_prog, if (old_off % 4 == new_off % 4) return 0; - pr_info("stack access changed location was:%d is:%d\n", + pr_vlog(env, "stack access changed location was:%d is:%d\n", old_off, new_off); return -EINVAL; } @@ -209,18 +211,18 @@ nfp_bpf_check_ptr(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, if (reg->type != PTR_TO_CTX && reg->type != PTR_TO_STACK && reg->type != PTR_TO_PACKET) { - pr_info("unsupported ptr type: %d\n", reg->type); + pr_vlog(env, "unsupported ptr type: %d\n", reg->type); return -EINVAL; } if (reg->type == PTR_TO_STACK) { - err = nfp_bpf_check_stack_access(nfp_prog, meta, reg); + err = nfp_bpf_check_stack_access(nfp_prog, meta, reg, env); if (err) return err; } if (meta->ptr.type != NOT_INIT && meta->ptr.type != reg->type) { - pr_info("ptr type changed for instruction %d -> %d\n", + pr_vlog(env, "ptr type changed for instruction %d -> %d\n", meta->ptr.type, reg->type); return -EINVAL; } @@ -241,7 +243,7 @@ nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx) if (meta->insn.src_reg >= MAX_BPF_REG || meta->insn.dst_reg >= MAX_BPF_REG) { - pr_err("program uses extended registers - jit hardening?\n"); + pr_vlog(env, "program uses extended registers - jit hardening?\n"); return -EINVAL; } diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app.h b/drivers/net/ethernet/netronome/nfp/nfp_app.h index 3af1943a8521..32ff46a00f70 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_app.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_app.h @@ -82,15 +82,15 @@ extern const struct nfp_app_type app_flower; * @repr_clean: representor about to be unregistered * @repr_open: representor netdev open callback * @repr_stop: representor netdev stop callback + * @change_mtu: MTU change on a netdev has been requested (veto-only, change + * is not guaranteed to be committed) * @start: start application logic * @stop: stop application logic * @ctrl_msg_rx: control message handler * @setup_tc: setup TC ndo * @tc_busy: TC HW offload busy (rules loaded) + * @bpf: BPF ndo offload-related calls * @xdp_offload: offload an XDP program - * @bpf_verifier_prep: verifier prep for dev-specific BPF programs - * @bpf_translate: translate call for dev-specific BPF programs - * @bpf_destroy: destroy for dev-specific BPF programs * @eswitch_mode_get: get SR-IOV eswitch mode * @sriov_enable: app-specific sriov initialisation * @sriov_disable: app-specific sriov clean-up @@ -120,6 +120,9 @@ struct nfp_app_type { int (*repr_open)(struct nfp_app *app, struct nfp_repr *repr); int (*repr_stop)(struct nfp_app *app, struct nfp_repr *repr); + int (*change_mtu)(struct nfp_app *app, struct net_device *netdev, + int new_mtu); + int (*start)(struct nfp_app *app); void (*stop)(struct nfp_app *app); @@ -128,14 +131,10 @@ struct nfp_app_type { int (*setup_tc)(struct nfp_app *app, struct net_device *netdev, enum tc_setup_type type, void *type_data); bool (*tc_busy)(struct nfp_app *app, struct nfp_net *nn); + int (*bpf)(struct nfp_app *app, struct nfp_net *nn, + struct netdev_bpf *xdp); int (*xdp_offload)(struct nfp_app *app, struct nfp_net *nn, struct bpf_prog *prog); - int (*bpf_verifier_prep)(struct nfp_app *app, struct nfp_net *nn, - struct netdev_bpf *bpf); - int (*bpf_translate)(struct nfp_app *app, struct nfp_net *nn, - struct bpf_prog *prog); - int (*bpf_destroy)(struct nfp_app *app, struct nfp_net *nn, - struct bpf_prog *prog); int (*sriov_enable)(struct nfp_app *app, int num_vfs); void (*sriov_disable)(struct nfp_app *app); @@ -242,6 +241,14 @@ nfp_app_repr_clean(struct nfp_app *app, struct net_device *netdev) app->type->repr_clean(app, netdev); } +static inline int +nfp_app_change_mtu(struct nfp_app *app, struct net_device *netdev, int new_mtu) +{ + if (!app || !app->type->change_mtu) + return 0; + return app->type->change_mtu(app, netdev, new_mtu); +} + static inline int nfp_app_start(struct nfp_app *app, struct nfp_net *ctrl) { app->ctrl = ctrl; @@ -303,6 +310,14 @@ static inline int nfp_app_setup_tc(struct nfp_app *app, return app->type->setup_tc(app, netdev, type, type_data); } +static inline int nfp_app_bpf(struct nfp_app *app, struct nfp_net *nn, + struct netdev_bpf *bpf) +{ + if (!app || !app->type->bpf) + return -EINVAL; + return app->type->bpf(app, nn, bpf); +} + static inline int nfp_app_xdp_offload(struct nfp_app *app, struct nfp_net *nn, struct bpf_prog *prog) { @@ -311,33 +326,6 @@ static inline int nfp_app_xdp_offload(struct nfp_app *app, struct nfp_net *nn, return app->type->xdp_offload(app, nn, prog); } -static inline int -nfp_app_bpf_verifier_prep(struct nfp_app *app, struct nfp_net *nn, - struct netdev_bpf *bpf) -{ - if (!app || !app->type->bpf_verifier_prep) - return -EOPNOTSUPP; - return app->type->bpf_verifier_prep(app, nn, bpf); -} - -static inline int -nfp_app_bpf_translate(struct nfp_app *app, struct nfp_net *nn, - struct bpf_prog *prog) -{ - if (!app || !app->type->bpf_translate) - return -EOPNOTSUPP; - return app->type->bpf_translate(app, nn, prog); -} - -static inline int -nfp_app_bpf_destroy(struct nfp_app *app, struct nfp_net *nn, - struct bpf_prog *prog) -{ - if (!app || !app->type->bpf_destroy) - return -EOPNOTSUPP; - return app->type->bpf_destroy(app, nn, prog); -} - static inline bool nfp_app_ctrl_tx(struct nfp_app *app, struct sk_buff *skb) { trace_devlink_hwmsg(priv_to_devlink(app->pf), false, 0, diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.c b/drivers/net/ethernet/netronome/nfp/nfp_asm.c index d3610987fb07..9ee3a3f60cc7 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_asm.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.c @@ -50,6 +50,36 @@ const struct cmd_tgt_act cmd_tgt_act[__CMD_TGT_MAP_SIZE] = { [CMD_TGT_READ_SWAP_LE] = { 0x03, 0x40 }, }; +u16 br_get_offset(u64 instr) +{ + u16 addr_lo, addr_hi; + + addr_lo = FIELD_GET(OP_BR_ADDR_LO, instr); + addr_hi = FIELD_GET(OP_BR_ADDR_HI, instr); + + return (addr_hi * ((OP_BR_ADDR_LO >> __bf_shf(OP_BR_ADDR_LO)) + 1)) | + addr_lo; +} + +void br_set_offset(u64 *instr, u16 offset) +{ + u16 addr_lo, addr_hi; + + addr_lo = offset & (OP_BR_ADDR_LO >> __bf_shf(OP_BR_ADDR_LO)); + addr_hi = offset != addr_lo; + *instr &= ~(OP_BR_ADDR_HI | OP_BR_ADDR_LO); + *instr |= FIELD_PREP(OP_BR_ADDR_HI, addr_hi); + *instr |= FIELD_PREP(OP_BR_ADDR_LO, addr_lo); +} + +void br_add_offset(u64 *instr, u16 offset) +{ + u16 addr; + + addr = br_get_offset(*instr); + br_set_offset(instr, addr + offset); +} + static u16 nfp_swreg_to_unreg(swreg reg, bool is_dst) { bool lm_id, lm_dec = false; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.h b/drivers/net/ethernet/netronome/nfp/nfp_asm.h index a24daeab1a77..20e51cb60e69 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_asm.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.h @@ -81,6 +81,7 @@ enum br_mask { BR_BHS = 0x04, BR_BLO = 0x05, BR_BGE = 0x08, + BR_BLT = 0x09, BR_UNC = 0x18, }; @@ -93,6 +94,10 @@ enum br_ctx_signal_state { BR_CSS_NONE = 2, }; +u16 br_get_offset(u64 instr); +void br_set_offset(u64 *instr, u16 offset); +void br_add_offset(u64 *instr, u16 offset); + #define OP_BBYTE_BASE 0x0c800000000ULL #define OP_BB_A_SRC 0x000000000ffULL #define OP_BB_BYTE 0x00000000300ULL diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 05e071b3dc5b..caee147fce04 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -2253,7 +2253,8 @@ static void nfp_net_rx_ring_free(struct nfp_net_rx_ring *rx_ring) struct nfp_net_r_vector *r_vec = rx_ring->r_vec; struct nfp_net_dp *dp = &r_vec->nfp_net->dp; - xdp_rxq_info_unreg(&rx_ring->xdp_rxq); + if (dp->netdev) + xdp_rxq_info_unreg(&rx_ring->xdp_rxq); kfree(rx_ring->rxbufs); if (rx_ring->rxds) @@ -2279,9 +2280,12 @@ nfp_net_rx_ring_alloc(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring) { int sz, err; - err = xdp_rxq_info_reg(&rx_ring->xdp_rxq, dp->netdev, rx_ring->idx); - if (err < 0) - return err; + if (dp->netdev) { + err = xdp_rxq_info_reg(&rx_ring->xdp_rxq, dp->netdev, + rx_ring->idx); + if (err < 0) + return err; + } rx_ring->cnt = dp->rxd_cnt; rx_ring->size = sizeof(*rx_ring->rxds) * rx_ring->cnt; @@ -3045,6 +3049,11 @@ static int nfp_net_change_mtu(struct net_device *netdev, int new_mtu) { struct nfp_net *nn = netdev_priv(netdev); struct nfp_net_dp *dp; + int err; + + err = nfp_app_change_mtu(nn->app, netdev, new_mtu); + if (err) + return err; dp = nfp_net_clone_dp(nn); if (!dp) @@ -3405,16 +3414,8 @@ static int nfp_net_xdp(struct net_device *netdev, struct netdev_bpf *xdp) xdp->prog_id = nn->xdp_prog ? nn->xdp_prog->aux->id : 0; xdp->prog_flags = nn->xdp_prog ? nn->xdp_flags : 0; return 0; - case BPF_OFFLOAD_VERIFIER_PREP: - return nfp_app_bpf_verifier_prep(nn->app, nn, xdp); - case BPF_OFFLOAD_TRANSLATE: - return nfp_app_bpf_translate(nn->app, nn, - xdp->offload.prog); - case BPF_OFFLOAD_DESTROY: - return nfp_app_bpf_destroy(nn->app, nn, - xdp->offload.prog); default: - return -EINVAL; + return nfp_app_bpf(nn->app, nn, xdp); } } diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h index 782d452e0fc2..25c36001bffa 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h @@ -91,23 +91,24 @@ #define NFP_NET_RSS_IPV6_EX_UDP 9 /** - * @NFP_NET_TXR_MAX: Maximum number of TX rings - * @NFP_NET_RXR_MAX: Maximum number of RX rings + * Ring counts + * %NFP_NET_TXR_MAX: Maximum number of TX rings + * %NFP_NET_RXR_MAX: Maximum number of RX rings */ #define NFP_NET_TXR_MAX 64 #define NFP_NET_RXR_MAX 64 /** * Read/Write config words (0x0000 - 0x002c) - * @NFP_NET_CFG_CTRL: Global control - * @NFP_NET_CFG_UPDATE: Indicate which fields are updated - * @NFP_NET_CFG_TXRS_ENABLE: Bitmask of enabled TX rings - * @NFP_NET_CFG_RXRS_ENABLE: Bitmask of enabled RX rings - * @NFP_NET_CFG_MTU: Set MTU size - * @NFP_NET_CFG_FLBUFSZ: Set freelist buffer size (must be larger than MTU) - * @NFP_NET_CFG_EXN: MSI-X table entry for exceptions - * @NFP_NET_CFG_LSC: MSI-X table entry for link state changes - * @NFP_NET_CFG_MACADDR: MAC address + * %NFP_NET_CFG_CTRL: Global control + * %NFP_NET_CFG_UPDATE: Indicate which fields are updated + * %NFP_NET_CFG_TXRS_ENABLE: Bitmask of enabled TX rings + * %NFP_NET_CFG_RXRS_ENABLE: Bitmask of enabled RX rings + * %NFP_NET_CFG_MTU: Set MTU size + * %NFP_NET_CFG_FLBUFSZ: Set freelist buffer size (must be larger than MTU) + * %NFP_NET_CFG_EXN: MSI-X table entry for exceptions + * %NFP_NET_CFG_LSC: MSI-X table entry for link state changes + * %NFP_NET_CFG_MACADDR: MAC address * * TODO: * - define Error details in UPDATE @@ -176,14 +177,14 @@ /** * Read-only words (0x0030 - 0x0050): - * @NFP_NET_CFG_VERSION: Firmware version number - * @NFP_NET_CFG_STS: Status - * @NFP_NET_CFG_CAP: Capabilities (same bits as @NFP_NET_CFG_CTRL) - * @NFP_NET_CFG_MAX_TXRINGS: Maximum number of TX rings - * @NFP_NET_CFG_MAX_RXRINGS: Maximum number of RX rings - * @NFP_NET_CFG_MAX_MTU: Maximum support MTU - * @NFP_NET_CFG_START_TXQ: Start Queue Control Queue to use for TX (PF only) - * @NFP_NET_CFG_START_RXQ: Start Queue Control Queue to use for RX (PF only) + * %NFP_NET_CFG_VERSION: Firmware version number + * %NFP_NET_CFG_STS: Status + * %NFP_NET_CFG_CAP: Capabilities (same bits as %NFP_NET_CFG_CTRL) + * %NFP_NET_CFG_MAX_TXRINGS: Maximum number of TX rings + * %NFP_NET_CFG_MAX_RXRINGS: Maximum number of RX rings + * %NFP_NET_CFG_MAX_MTU: Maximum support MTU + * %NFP_NET_CFG_START_TXQ: Start Queue Control Queue to use for TX (PF only) + * %NFP_NET_CFG_START_RXQ: Start Queue Control Queue to use for RX (PF only) * * TODO: * - define more STS bits @@ -228,31 +229,31 @@ /** * RSS capabilities - * @NFP_NET_CFG_RSS_CAP_HFUNC: supported hash functions (same bits as - * @NFP_NET_CFG_RSS_HFUNC) + * %NFP_NET_CFG_RSS_CAP_HFUNC: supported hash functions (same bits as + * %NFP_NET_CFG_RSS_HFUNC) */ #define NFP_NET_CFG_RSS_CAP 0x0054 #define NFP_NET_CFG_RSS_CAP_HFUNC 0xff000000 /** * VXLAN/UDP encap configuration - * @NFP_NET_CFG_VXLAN_PORT: Base address of table of tunnels' UDP dst ports - * @NFP_NET_CFG_VXLAN_SZ: Size of the UDP port table in bytes + * %NFP_NET_CFG_VXLAN_PORT: Base address of table of tunnels' UDP dst ports + * %NFP_NET_CFG_VXLAN_SZ: Size of the UDP port table in bytes */ #define NFP_NET_CFG_VXLAN_PORT 0x0060 #define NFP_NET_CFG_VXLAN_SZ 0x0008 /** * BPF section - * @NFP_NET_CFG_BPF_ABI: BPF ABI version - * @NFP_NET_CFG_BPF_CAP: BPF capabilities - * @NFP_NET_CFG_BPF_MAX_LEN: Maximum size of JITed BPF code in bytes - * @NFP_NET_CFG_BPF_START: Offset at which BPF will be loaded - * @NFP_NET_CFG_BPF_DONE: Offset to jump to on exit - * @NFP_NET_CFG_BPF_STACK_SZ: Total size of stack area in 64B chunks - * @NFP_NET_CFG_BPF_INL_MTU: Packet data split offset in 64B chunks - * @NFP_NET_CFG_BPF_SIZE: Size of the JITed BPF code in instructions - * @NFP_NET_CFG_BPF_ADDR: DMA address of the buffer with JITed BPF code + * %NFP_NET_CFG_BPF_ABI: BPF ABI version + * %NFP_NET_CFG_BPF_CAP: BPF capabilities + * %NFP_NET_CFG_BPF_MAX_LEN: Maximum size of JITed BPF code in bytes + * %NFP_NET_CFG_BPF_START: Offset at which BPF will be loaded + * %NFP_NET_CFG_BPF_DONE: Offset to jump to on exit + * %NFP_NET_CFG_BPF_STACK_SZ: Total size of stack area in 64B chunks + * %NFP_NET_CFG_BPF_INL_MTU: Packet data split offset in 64B chunks + * %NFP_NET_CFG_BPF_SIZE: Size of the JITed BPF code in instructions + * %NFP_NET_CFG_BPF_ADDR: DMA address of the buffer with JITed BPF code */ #define NFP_NET_CFG_BPF_ABI 0x0080 #define NFP_NET_BPF_ABI 2 @@ -278,9 +279,9 @@ /** * RSS configuration (0x0100 - 0x01ac): * Used only when NFP_NET_CFG_CTRL_RSS is enabled - * @NFP_NET_CFG_RSS_CFG: RSS configuration word - * @NFP_NET_CFG_RSS_KEY: RSS "secret" key - * @NFP_NET_CFG_RSS_ITBL: RSS indirection table + * %NFP_NET_CFG_RSS_CFG: RSS configuration word + * %NFP_NET_CFG_RSS_KEY: RSS "secret" key + * %NFP_NET_CFG_RSS_ITBL: RSS indirection table */ #define NFP_NET_CFG_RSS_BASE 0x0100 #define NFP_NET_CFG_RSS_CTRL NFP_NET_CFG_RSS_BASE @@ -305,13 +306,13 @@ /** * TX ring configuration (0x200 - 0x800) - * @NFP_NET_CFG_TXR_BASE: Base offset for TX ring configuration - * @NFP_NET_CFG_TXR_ADDR: Per TX ring DMA address (8B entries) - * @NFP_NET_CFG_TXR_WB_ADDR: Per TX ring write back DMA address (8B entries) - * @NFP_NET_CFG_TXR_SZ: Per TX ring ring size (1B entries) - * @NFP_NET_CFG_TXR_VEC: Per TX ring MSI-X table entry (1B entries) - * @NFP_NET_CFG_TXR_PRIO: Per TX ring priority (1B entries) - * @NFP_NET_CFG_TXR_IRQ_MOD: Per TX ring interrupt moderation packet + * %NFP_NET_CFG_TXR_BASE: Base offset for TX ring configuration + * %NFP_NET_CFG_TXR_ADDR: Per TX ring DMA address (8B entries) + * %NFP_NET_CFG_TXR_WB_ADDR: Per TX ring write back DMA address (8B entries) + * %NFP_NET_CFG_TXR_SZ: Per TX ring ring size (1B entries) + * %NFP_NET_CFG_TXR_VEC: Per TX ring MSI-X table entry (1B entries) + * %NFP_NET_CFG_TXR_PRIO: Per TX ring priority (1B entries) + * %NFP_NET_CFG_TXR_IRQ_MOD: Per TX ring interrupt moderation packet */ #define NFP_NET_CFG_TXR_BASE 0x0200 #define NFP_NET_CFG_TXR_ADDR(_x) (NFP_NET_CFG_TXR_BASE + ((_x) * 0x8)) @@ -325,12 +326,12 @@ /** * RX ring configuration (0x0800 - 0x0c00) - * @NFP_NET_CFG_RXR_BASE: Base offset for RX ring configuration - * @NFP_NET_CFG_RXR_ADDR: Per RX ring DMA address (8B entries) - * @NFP_NET_CFG_RXR_SZ: Per RX ring ring size (1B entries) - * @NFP_NET_CFG_RXR_VEC: Per RX ring MSI-X table entry (1B entries) - * @NFP_NET_CFG_RXR_PRIO: Per RX ring priority (1B entries) - * @NFP_NET_CFG_RXR_IRQ_MOD: Per RX ring interrupt moderation (4B entries) + * %NFP_NET_CFG_RXR_BASE: Base offset for RX ring configuration + * %NFP_NET_CFG_RXR_ADDR: Per RX ring DMA address (8B entries) + * %NFP_NET_CFG_RXR_SZ: Per RX ring ring size (1B entries) + * %NFP_NET_CFG_RXR_VEC: Per RX ring MSI-X table entry (1B entries) + * %NFP_NET_CFG_RXR_PRIO: Per RX ring priority (1B entries) + * %NFP_NET_CFG_RXR_IRQ_MOD: Per RX ring interrupt moderation (4B entries) */ #define NFP_NET_CFG_RXR_BASE 0x0800 #define NFP_NET_CFG_RXR_ADDR(_x) (NFP_NET_CFG_RXR_BASE + ((_x) * 0x8)) @@ -343,7 +344,7 @@ /** * Interrupt Control/Cause registers (0x0c00 - 0x0d00) * These registers are only used when MSI-X auto-masking is not - * enabled (@NFP_NET_CFG_CTRL_MSIXAUTO not set). The array is index + * enabled (%NFP_NET_CFG_CTRL_MSIXAUTO not set). The array is index * by MSI-X entry and are 1B in size. If an entry is zero, the * corresponding entry is enabled. If the FW generates an interrupt, * it writes a cause into the corresponding field. This also masks @@ -393,8 +394,8 @@ /** * Per ring stats (0x1000 - 0x1800) * options, 64bit per entry - * @NFP_NET_CFG_TXR_STATS: TX ring statistics (Packet and Byte count) - * @NFP_NET_CFG_RXR_STATS: RX ring statistics (Packet and Byte count) + * %NFP_NET_CFG_TXR_STATS: TX ring statistics (Packet and Byte count) + * %NFP_NET_CFG_RXR_STATS: RX ring statistics (Packet and Byte count) */ #define NFP_NET_CFG_TXR_STATS_BASE 0x1000 #define NFP_NET_CFG_TXR_STATS(_x) (NFP_NET_CFG_TXR_STATS_BASE + \ @@ -418,10 +419,10 @@ /** * VLAN filtering using general use mailbox - * @NFP_NET_CFG_VLAN_FILTER: Base address of VLAN filter mailbox - * @NFP_NET_CFG_VLAN_FILTER_VID: VLAN ID to filter - * @NFP_NET_CFG_VLAN_FILTER_PROTO: VLAN proto to filter - * @NFP_NET_CFG_VXLAN_SZ: Size of the VLAN filter mailbox in bytes + * %NFP_NET_CFG_VLAN_FILTER: Base address of VLAN filter mailbox + * %NFP_NET_CFG_VLAN_FILTER_VID: VLAN ID to filter + * %NFP_NET_CFG_VLAN_FILTER_PROTO: VLAN proto to filter + * %NFP_NET_CFG_VXLAN_SZ: Size of the VLAN filter mailbox in bytes */ #define NFP_NET_CFG_VLAN_FILTER NFP_NET_CFG_MBOX_VAL #define NFP_NET_CFG_VLAN_FILTER_VID NFP_NET_CFG_VLAN_FILTER diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c index f50aa119570a..317f87cc3cc6 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c @@ -186,6 +186,13 @@ nfp_repr_get_offload_stats(int attr_id, const struct net_device *dev, return -EINVAL; } +static int nfp_repr_change_mtu(struct net_device *netdev, int new_mtu) +{ + struct nfp_repr *repr = netdev_priv(netdev); + + return nfp_app_change_mtu(repr->app, netdev, new_mtu); +} + static netdev_tx_t nfp_repr_xmit(struct sk_buff *skb, struct net_device *netdev) { struct nfp_repr *repr = netdev_priv(netdev); @@ -240,6 +247,7 @@ const struct net_device_ops nfp_repr_netdev_ops = { .ndo_open = nfp_repr_open, .ndo_stop = nfp_repr_stop, .ndo_start_xmit = nfp_repr_xmit, + .ndo_change_mtu = nfp_repr_change_mtu, .ndo_get_stats64 = nfp_repr_get_stats64, .ndo_has_offload_stats = nfp_repr_has_offload_stats, .ndo_get_offload_stats = nfp_repr_get_offload_stats, diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.h b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.h index 5d4d897bc9c6..cbc7badf40a0 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.h @@ -89,6 +89,7 @@ struct nfp_repr { * @NFP_REPR_TYPE_PHYS_PORT: external NIC port * @NFP_REPR_TYPE_PF: physical function * @NFP_REPR_TYPE_VF: virtual function + * @__NFP_REPR_TYPE_MAX: number of representor types */ enum nfp_repr_type { NFP_REPR_TYPE_PHYS_PORT, diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 2feb218c001d..6b66cd1aa0b9 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -192,6 +192,9 @@ struct bpf_verifier_env { u32 subprog_cnt; }; +__printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env, + const char *fmt, ...); + static inline struct bpf_reg_state *cur_regs(struct bpf_verifier_env *env) { struct bpf_verifier_state *cur = env->cur_state; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index a2b211262c25..3b2b47666180 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -169,11 +169,11 @@ struct bpf_call_arg_meta { static DEFINE_MUTEX(bpf_verifier_lock); /* log_level controls verbosity level of eBPF verifier. - * verbose() is used to dump the verification trace to the log, so the user - * can figure out what's wrong with the program + * bpf_verifier_log_write() is used to dump the verification trace to the log, + * so the user can figure out what's wrong with the program */ -static __printf(2, 3) void verbose(struct bpf_verifier_env *env, - const char *fmt, ...) +__printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env, + const char *fmt, ...) { struct bpf_verifer_log *log = &env->log; unsigned int n; @@ -197,6 +197,14 @@ static __printf(2, 3) void verbose(struct bpf_verifier_env *env, else log->ubuf = NULL; } +EXPORT_SYMBOL_GPL(bpf_verifier_log_write); +/* Historically bpf_verifier_log_write was called verbose, but the name was too + * generic for symbol export. The function was renamed, but not the calls in + * the verifier to avoid complicating backports. Hence the alias below. + */ +static __printf(2, 3) void verbose(struct bpf_verifier_env *env, + const char *fmt, ...) + __attribute__((alias("bpf_verifier_log_write"))); static bool type_is_pkt_pointer(enum bpf_reg_type type) { @@ -375,6 +383,8 @@ static int realloc_func_state(struct bpf_func_state *state, int size, static void free_func_state(struct bpf_func_state *state) { + if (!state) + return; kfree(state->stack); kfree(state); } @@ -487,6 +497,8 @@ static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env, } return &elem->st; err: + free_verifier_state(env->cur_state, true); + env->cur_state = NULL; /* pop all elements and return */ while (!pop_stack(env, NULL, NULL)); return NULL; diff --git a/net/core/dev.c b/net/core/dev.c index 5cb782f074d7..3d24d9a59086 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -7657,7 +7657,7 @@ err_rxq_info: /* Rollback successful reg's and free other resources */ while (i--) xdp_rxq_info_unreg(&rx[i].xdp_rxq); - kfree(dev->_rx); + kvfree(dev->_rx); dev->_rx = NULL; return err; } @@ -7665,16 +7665,15 @@ err_rxq_info: static void netif_free_rx_queues(struct net_device *dev) { unsigned int i, count = dev->num_rx_queues; - struct netdev_rx_queue *rx; /* netif_alloc_rx_queues alloc failed, resources have been unreg'ed */ if (!dev->_rx) return; - rx = dev->_rx; - for (i = 0; i < count; i++) - xdp_rxq_info_unreg(&rx[i].xdp_rxq); + xdp_rxq_info_unreg(&dev->_rx[i].xdp_rxq); + + kvfree(dev->_rx); } static void netdev_init_one_queue(struct net_device *dev, diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 3ff7a05bea9a..7f61a3d57fa7 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -142,6 +142,7 @@ always += xdp_redirect_map_kern.o always += xdp_redirect_cpu_kern.o always += xdp_monitor_kern.o always += xdp_rxq_info_kern.o +always += xdp2skb_meta_kern.o always += syscall_tp_kern.o HOSTCFLAGS += -I$(objtree)/usr/include diff --git a/samples/bpf/xdp2skb_meta.sh b/samples/bpf/xdp2skb_meta.sh new file mode 100755 index 000000000000..b9c9549c4c27 --- /dev/null +++ b/samples/bpf/xdp2skb_meta.sh @@ -0,0 +1,220 @@ +#!/bin/bash +# +# SPDX-License-Identifier: GPL-2.0 +# Copyright (c) 2018 Jesper Dangaard Brouer, Red Hat Inc. +# +# Bash-shell example on using iproute2 tools 'tc' and 'ip' to load +# eBPF programs, both for XDP and clsbpf. Shell script function +# wrappers and even long options parsing is illustrated, for ease of +# use. +# +# Related to sample/bpf/xdp2skb_meta_kern.c, which contains BPF-progs +# that need to collaborate between XDP and TC hooks. Thus, it is +# convenient that the same tool load both programs that need to work +# together. +# +BPF_FILE=xdp2skb_meta_kern.o +DIR=$(dirname $0) + +export TC=/usr/sbin/tc +export IP=/usr/sbin/ip + +function usage() { + echo "" + echo "Usage: $0 [-vfh] --dev ethX" + echo " -d | --dev : Network device (required)" + echo " --flush : Cleanup flush TC and XDP progs" + echo " --list : (\$LIST) List TC and XDP progs" + echo " -v | --verbose : (\$VERBOSE) Verbose" + echo " --dry-run : (\$DRYRUN) Dry-run only (echo commands)" + echo "" +} + +## -- General shell logging cmds -- +function err() { + local exitcode=$1 + shift + echo "ERROR: $@" >&2 + exit $exitcode +} + +function info() { + if [[ -n "$VERBOSE" ]]; then + echo "# $@" + fi +} + +## -- Helper function calls -- + +# Wrapper call for TC and IP +# - Will display the offending command on failure +function _call_cmd() { + local cmd="$1" + local allow_fail="$2" + shift 2 + if [[ -n "$VERBOSE" ]]; then + echo "$(basename $cmd) $@" + fi + if [[ -n "$DRYRUN" ]]; then + return + fi + $cmd "$@" + local status=$? + if (( $status != 0 )); then + if [[ "$allow_fail" == "" ]]; then + err 2 "Exec error($status) occurred cmd: \"$cmd $@\"" + fi + fi +} +function call_tc() { + _call_cmd "$TC" "" "$@" +} +function call_tc_allow_fail() { + _call_cmd "$TC" "allow_fail" "$@" +} +function call_ip() { + _call_cmd "$IP" "" "$@" +} + +## --- Parse command line arguments / parameters --- +# Using external program "getopt" to get --long-options +OPTIONS=$(getopt -o vfhd: \ + --long verbose,flush,help,list,dev:,dry-run -- "$@") +if (( $? != 0 )); then + err 4 "Error calling getopt" +fi +eval set -- "$OPTIONS" + +unset DEV +unset FLUSH +while true; do + case "$1" in + -d | --dev ) # device + DEV=$2 + info "Device set to: DEV=$DEV" >&2 + shift 2 + ;; + -v | --verbose) + VERBOSE=yes + # info "Verbose mode: VERBOSE=$VERBOSE" >&2 + shift + ;; + --dry-run ) + DRYRUN=yes + VERBOSE=yes + info "Dry-run mode: enable VERBOSE and don't call TC+IP" >&2 + shift + ;; + -f | --flush ) + FLUSH=yes + shift + ;; + --list ) + LIST=yes + shift + ;; + -- ) + shift + break + ;; + -h | --help ) + usage; + exit 0 + ;; + * ) + shift + break + ;; + esac +done + +FILE="$DIR/$BPF_FILE" +if [[ ! -e $FILE ]]; then + err 3 "Missing BPF object file ($FILE)" +fi + +if [[ -z $DEV ]]; then + usage + err 2 "Please specify network device -- required option --dev" +fi + +## -- Function calls -- + +function list_tc() +{ + local device="$1" + shift + info "Listing current TC ingress rules" + call_tc filter show dev $device ingress +} + +function list_xdp() +{ + local device="$1" + shift + info "Listing current XDP device($device) setting" + call_ip link show dev $device | grep --color=auto xdp +} + +function flush_tc() +{ + local device="$1" + shift + info "Flush TC on device: $device" + call_tc_allow_fail filter del dev $device ingress + call_tc_allow_fail qdisc del dev $device clsact +} + +function flush_xdp() +{ + local device="$1" + shift + info "Flush XDP on device: $device" + call_ip link set dev $device xdp off +} + +function attach_tc_mark() +{ + local device="$1" + local file="$2" + local prog="tc_mark" + shift 2 + + # Re-attach clsact to clear/flush existing role + call_tc_allow_fail qdisc del dev $device clsact 2> /dev/null + call_tc qdisc add dev $device clsact + + # Attach BPF prog + call_tc filter add dev $device ingress \ + prio 1 handle 1 bpf da obj $file sec $prog +} + +function attach_xdp_mark() +{ + local device="$1" + local file="$2" + local prog="xdp_mark" + shift 2 + + # Remove XDP prog in-case it's already loaded + # TODO: Need ip-link option to override/replace existing XDP prog + flush_xdp $device + + # Attach XDP/BPF prog + call_ip link set dev $device xdp obj $file sec $prog +} + +if [[ -n $FLUSH ]]; then + flush_tc $DEV + flush_xdp $DEV + exit 0 +fi + +if [[ -n $LIST ]]; then + list_tc $DEV + list_xdp $DEV + exit 0 +fi + +attach_tc_mark $DEV $FILE +attach_xdp_mark $DEV $FILE diff --git a/samples/bpf/xdp2skb_meta_kern.c b/samples/bpf/xdp2skb_meta_kern.c new file mode 100644 index 000000000000..12e1024069c2 --- /dev/null +++ b/samples/bpf/xdp2skb_meta_kern.c @@ -0,0 +1,103 @@ +/* SPDX-License-Identifier: GPL-2.0 + * Copyright (c) 2018 Jesper Dangaard Brouer, Red Hat Inc. + * + * Example howto transfer info from XDP to SKB, e.g. skb->mark + * ----------------------------------------------------------- + * This uses the XDP data_meta infrastructure, and is a cooperation + * between two bpf-programs (1) XDP and (2) clsact at TC-ingress hook. + * + * Notice: This example does not use the BPF C-loader (bpf_load.c), + * but instead rely on the iproute2 TC tool for loading BPF-objects. + */ +#include <uapi/linux/bpf.h> +#include <uapi/linux/pkt_cls.h> + +#include "bpf_helpers.h" + +/* + * This struct is stored in the XDP 'data_meta' area, which is located + * just in-front-of the raw packet payload data. The meaning is + * specific to these two BPF programs that use it as a communication + * channel. XDP adjust/increase the area via a bpf-helper, and TC use + * boundary checks to see if data have been provided. + * + * The struct must be 4 byte aligned, which here is enforced by the + * struct __attribute__((aligned(4))). + */ +struct meta_info { + __u32 mark; +} __attribute__((aligned(4))); + +SEC("xdp_mark") +int _xdp_mark(struct xdp_md *ctx) +{ + struct meta_info *meta; + void *data, *data_end; + int ret; + + /* Reserve space in-front data pointer for our meta info. + * (Notice drivers not supporting data_meta will fail here!) + */ + ret = bpf_xdp_adjust_meta(ctx, -(int)sizeof(*meta)); + if (ret < 0) + return XDP_ABORTED; + + /* For some unknown reason, these ctx pointers must be read + * after bpf_xdp_adjust_meta, else verifier will reject prog. + */ + data = (void *)(unsigned long)ctx->data; + + /* Check data_meta have room for meta_info struct */ + meta = (void *)(unsigned long)ctx->data_meta; + if (meta + 1 > data) + return XDP_ABORTED; + + meta->mark = 42; + + return XDP_PASS; +} + +SEC("tc_mark") +int _tc_mark(struct __sk_buff *ctx) +{ + void *data = (void *)(unsigned long)ctx->data; + void *data_end = (void *)(unsigned long)ctx->data_end; + void *data_meta = (void *)(unsigned long)ctx->data_meta; + struct meta_info *meta = data_meta; + + /* Check XDP gave us some data_meta */ + if (meta + 1 > data) { + ctx->mark = 41; + /* Skip "accept" if no data_meta is avail */ + return TC_ACT_OK; + } + + /* Hint: See func tc_cls_act_is_valid_access() for BPF_WRITE access */ + ctx->mark = meta->mark; /* Transfer XDP-mark to SKB-mark */ + + return TC_ACT_OK; +} + +/* Manually attaching these programs: +export DEV=ixgbe2 +export FILE=xdp2skb_meta_kern.o + +# via TC command +tc qdisc del dev $DEV clsact 2> /dev/null +tc qdisc add dev $DEV clsact +tc filter add dev $DEV ingress prio 1 handle 1 bpf da obj $FILE sec tc_mark +tc filter show dev $DEV ingress + +# XDP via IP command: +ip link set dev $DEV xdp off +ip link set dev $DEV xdp obj $FILE sec xdp_mark + +# Use iptable to "see" if SKBs are marked +iptables -I INPUT -p icmp -m mark --mark 41 # == 0x29 +iptables -I INPUT -p icmp -m mark --mark 42 # == 0x2a + +# Hint: catch XDP_ABORTED errors via +perf record -e xdp:* +perf script + +*/ |