summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJiong Wang <jiong.wang@netronome.com>2018-07-07 00:13:23 +0200
committerDaniel Borkmann <daniel@iogearbox.net>2018-07-07 01:45:31 +0200
commit9fb410a89e8fa92f8ebc7aa95563442a14da21eb (patch)
tree2e8fd0bfafbfffba139074dfbf7e8f0406caee31
parentnfp: bpf: support u32 divide using reciprocal_div.h (diff)
downloadlinux-9fb410a89e8fa92f8ebc7aa95563442a14da21eb.tar.xz
linux-9fb410a89e8fa92f8ebc7aa95563442a14da21eb.zip
nfp: bpf: migrate to advanced reciprocal divide in reciprocal_div.h
As we are doing JIT, we would want to use the advanced version of the reciprocal divide (reciprocal_value_adv) to trade performance with host. We could reduce the required ALU instructions from 4 to 2 or 1. Signed-off-by: Jiong Wang <jiong.wang@netronome.com> Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/jit.c64
1 files changed, 53 insertions, 11 deletions
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
index 7c9ee3d85907..1d9e36835404 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
@@ -1497,8 +1497,8 @@ wrp_mul(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
static int wrp_div_imm(struct nfp_prog *nfp_prog, u8 dst, u64 imm)
{
swreg dst_both = reg_both(dst), dst_a = reg_a(dst), dst_b = reg_a(dst);
- struct reciprocal_value rvalue;
- swreg tmp_b = imm_b(nfp_prog);
+ struct reciprocal_value_adv rvalue;
+ u8 pre_shift, exp;
swreg magic;
if (imm > U32_MAX) {
@@ -1506,16 +1506,58 @@ static int wrp_div_imm(struct nfp_prog *nfp_prog, u8 dst, u64 imm)
return 0;
}
- rvalue = reciprocal_value(imm);
+ /* NOTE: because we are using "reciprocal_value_adv" which doesn't
+ * support "divisor > (1u << 31)", we need to JIT separate NFP sequence
+ * to handle such case which actually equals to the result of unsigned
+ * comparison "dst >= imm" which could be calculated using the following
+ * NFP sequence:
+ *
+ * alu[--, dst, -, imm]
+ * immed[imm, 0]
+ * alu[dst, imm, +carry, 0]
+ *
+ */
+ if (imm > 1U << 31) {
+ swreg tmp_b = ur_load_imm_any(nfp_prog, imm, imm_b(nfp_prog));
+
+ emit_alu(nfp_prog, reg_none(), dst_a, ALU_OP_SUB, tmp_b);
+ wrp_immed(nfp_prog, imm_a(nfp_prog), 0);
+ emit_alu(nfp_prog, dst_both, imm_a(nfp_prog), ALU_OP_ADD_C,
+ reg_imm(0));
+ return 0;
+ }
+
+ rvalue = reciprocal_value_adv(imm, 32);
+ exp = rvalue.exp;
+ if (rvalue.is_wide_m && !(imm & 1)) {
+ pre_shift = fls(imm & -imm) - 1;
+ rvalue = reciprocal_value_adv(imm >> pre_shift, 32 - pre_shift);
+ } else {
+ pre_shift = 0;
+ }
magic = ur_load_imm_any(nfp_prog, rvalue.m, imm_b(nfp_prog));
- wrp_mul_u32(nfp_prog, imm_both(nfp_prog), reg_none(), dst_a, magic,
- true);
- emit_alu(nfp_prog, dst_both, dst_a, ALU_OP_SUB, tmp_b);
- emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, dst_b,
- SHF_SC_R_SHF, rvalue.sh1);
- emit_alu(nfp_prog, dst_both, dst_a, ALU_OP_ADD, tmp_b);
- emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, dst_b,
- SHF_SC_R_SHF, rvalue.sh2);
+ if (imm == 1U << exp) {
+ emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, dst_b,
+ SHF_SC_R_SHF, exp);
+ } else if (rvalue.is_wide_m) {
+ wrp_mul_u32(nfp_prog, imm_both(nfp_prog), reg_none(), dst_a,
+ magic, true);
+ emit_alu(nfp_prog, dst_both, dst_a, ALU_OP_SUB,
+ imm_b(nfp_prog));
+ emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, dst_b,
+ SHF_SC_R_SHF, 1);
+ emit_alu(nfp_prog, dst_both, dst_a, ALU_OP_ADD,
+ imm_b(nfp_prog));
+ emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, dst_b,
+ SHF_SC_R_SHF, rvalue.sh - 1);
+ } else {
+ if (pre_shift)
+ emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE,
+ dst_b, SHF_SC_R_SHF, pre_shift);
+ wrp_mul_u32(nfp_prog, dst_both, reg_none(), dst_a, magic, true);
+ emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE,
+ dst_b, SHF_SC_R_SHF, rvalue.sh);
+ }
return 0;
}