diff options
280 files changed, 11791 insertions, 5879 deletions
diff --git a/Documentation/bpf/btf.rst b/Documentation/bpf/btf.rst index 1bfe4072f5fc..9ad4218a751f 100644 --- a/Documentation/bpf/btf.rst +++ b/Documentation/bpf/btf.rst @@ -85,7 +85,7 @@ sequentially and type id is assigned to each recognized type starting from id #define BTF_KIND_VAR 14 /* Variable */ #define BTF_KIND_DATASEC 15 /* Section */ #define BTF_KIND_FLOAT 16 /* Floating point */ - #define BTF_KIND_TAG 17 /* Tag */ + #define BTF_KIND_DECL_TAG 17 /* Decl Tag */ Note that the type section encodes debug info, not just pure types. ``BTF_KIND_FUNC`` is not a type, and it represents a defined subprogram. @@ -107,7 +107,7 @@ Each type contains the following common data:: * "size" tells the size of the type it is describing. * * "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT, - * FUNC, FUNC_PROTO and TAG. + * FUNC, FUNC_PROTO and DECL_TAG. * "type" is a type_id referring to another type. */ union { @@ -466,30 +466,30 @@ map definition. No additional type data follow ``btf_type``. -2.2.17 BTF_KIND_TAG -~~~~~~~~~~~~~~~~~~~ +2.2.17 BTF_KIND_DECL_TAG +~~~~~~~~~~~~~~~~~~~~~~~~ ``struct btf_type`` encoding requirement: * ``name_off``: offset to a non-empty string * ``info.kind_flag``: 0 - * ``info.kind``: BTF_KIND_TAG + * ``info.kind``: BTF_KIND_DECL_TAG * ``info.vlen``: 0 - * ``type``: ``struct``, ``union``, ``func`` or ``var`` + * ``type``: ``struct``, ``union``, ``func``, ``var`` or ``typedef`` -``btf_type`` is followed by ``struct btf_tag``.:: +``btf_type`` is followed by ``struct btf_decl_tag``.:: - struct btf_tag { + struct btf_decl_tag { __u32 component_idx; }; -The ``name_off`` encodes btf_tag attribute string. -The ``type`` should be ``struct``, ``union``, ``func`` or ``var``. -For ``var`` type, ``btf_tag.component_idx`` must be ``-1``. -For the other three types, if the btf_tag attribute is +The ``name_off`` encodes btf_decl_tag attribute string. +The ``type`` should be ``struct``, ``union``, ``func``, ``var`` or ``typedef``. +For ``var`` or ``typedef`` type, ``btf_decl_tag.component_idx`` must be ``-1``. +For the other three types, if the btf_decl_tag attribute is applied to the ``struct``, ``union`` or ``func`` itself, -``btf_tag.component_idx`` must be ``-1``. Otherwise, +``btf_decl_tag.component_idx`` must be ``-1``. Otherwise, the attribute is applied to a ``struct``/``union`` member or -a ``func`` argument, and ``btf_tag.component_idx`` should be a +a ``func`` argument, and ``btf_decl_tag.component_idx`` should be a valid index (starting from 0) pointing to a member or an argument. 3. BTF Kernel API diff --git a/Documentation/bpf/libbpf/libbpf_naming_convention.rst b/Documentation/bpf/libbpf/libbpf_naming_convention.rst index 9c68d5014ff1..f86360f734a8 100644 --- a/Documentation/bpf/libbpf/libbpf_naming_convention.rst +++ b/Documentation/bpf/libbpf/libbpf_naming_convention.rst @@ -150,6 +150,46 @@ mirror of the mainline's version of libbpf for a stand-alone build. However, all changes to libbpf's code base must be upstreamed through the mainline kernel tree. + +API documentation convention +============================ + +The libbpf API is documented via comments above definitions in +header files. These comments can be rendered by doxygen and sphinx +for well organized html output. This section describes the +convention in which these comments should be formated. + +Here is an example from btf.h: + +.. code-block:: c + + /** + * @brief **btf__new()** creates a new instance of a BTF object from the raw + * bytes of an ELF's BTF section + * @param data raw bytes + * @param size number of bytes passed in `data` + * @return new BTF object instance which has to be eventually freed with + * **btf__free()** + * + * On error, error-code-encoded-as-pointer is returned, not a NULL. To extract + * error code from such a pointer `libbpf_get_error()` should be used. If + * `libbpf_set_strict_mode(LIBBPF_STRICT_CLEAN_PTRS)` is enabled, NULL is + * returned on error instead. In both cases thread-local `errno` variable is + * always set to error code as well. + */ + +The comment must start with a block comment of the form '/\*\*'. + +The documentation always starts with a @brief directive. This line is a short +description about this API. It starts with the name of the API, denoted in bold +like so: **api_name**. Please include an open and close parenthesis if this is a +function. Follow with the short description of the API. A longer form description +can be added below the last directive, at the bottom of the comment. + +Parameters are denoted with the @param directive, there should be one for each +parameter. If this is a function with a non-void return, use the @return directive +to document it. + License ------------------- diff --git a/MAINTAINERS b/MAINTAINERS index 917f360b3ece..9a2491ce4c77 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3442,6 +3442,7 @@ S: Supported F: arch/arm64/net/ BPF JIT for MIPS (32-BIT AND 64-BIT) +M: Johan Almbladh <johan.almbladh@anyfinetworks.com> M: Paul Burton <paulburton@kernel.org> L: netdev@vger.kernel.org L: bpf@vger.kernel.org @@ -480,6 +480,8 @@ LZ4 = lz4c XZ = xz ZSTD = zstd +PAHOLE_FLAGS = $(shell PAHOLE=$(PAHOLE) $(srctree)/scripts/pahole-flags.sh) + CHECKFLAGS := -D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ \ -Wbitwise -Wno-return-void -Wno-unknown-attribute $(CF) NOSTDINC_FLAGS := @@ -534,6 +536,7 @@ export KBUILD_CFLAGS CFLAGS_KERNEL CFLAGS_MODULE export KBUILD_AFLAGS AFLAGS_KERNEL AFLAGS_MODULE export KBUILD_AFLAGS_MODULE KBUILD_CFLAGS_MODULE KBUILD_LDFLAGS_MODULE export KBUILD_AFLAGS_KERNEL KBUILD_CFLAGS_KERNEL +export PAHOLE_FLAGS # Files to ignore in find ... statements diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index a903b26cde40..eeb6dc0ecf46 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -1882,11 +1882,6 @@ static int validate_code(struct jit_ctx *ctx) return 0; } -void bpf_jit_compile(struct bpf_prog *prog) -{ - /* Nothing to do here. We support Internal BPF. */ -} - bool bpf_jit_needs_zext(void) { return true; diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 6b8f591c5054..e8976c7f6c89 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -57,7 +57,6 @@ config MIPS select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE if CPU_SUPPORTS_HUGEPAGES select HAVE_ASM_MODVERSIONS - select HAVE_CBPF_JIT if !64BIT && !CPU_MICROMIPS select HAVE_CONTEXT_TRACKING select HAVE_TIF_NOHZ select HAVE_C_RECORDMCOUNT @@ -65,7 +64,10 @@ config MIPS select HAVE_DEBUG_STACKOVERFLOW select HAVE_DMA_CONTIGUOUS select HAVE_DYNAMIC_FTRACE - select HAVE_EBPF_JIT if 64BIT && !CPU_MICROMIPS && TARGET_ISA_REV >= 2 + select HAVE_EBPF_JIT if !CPU_MICROMIPS && \ + !CPU_DADDI_WORKAROUNDS && \ + !CPU_R4000_WORKAROUNDS && \ + !CPU_R4400_WORKAROUNDS select HAVE_EXIT_THREAD select HAVE_FAST_GUP select HAVE_FTRACE_MCOUNT_RECORD @@ -1212,15 +1214,6 @@ config SYS_SUPPORTS_RELOCATABLE The platform must provide plat_get_fdt() if it selects CONFIG_USE_OF to allow access to command line and entropy sources. -config MIPS_CBPF_JIT - def_bool y - depends on BPF_JIT && HAVE_CBPF_JIT - -config MIPS_EBPF_JIT - def_bool y - depends on BPF_JIT && HAVE_EBPF_JIT - - # # Endianness selection. Sufficiently obscure so many users don't know what to # answer,so we try hard to limit the available choices. Also the use of a diff --git a/arch/mips/include/asm/uasm.h b/arch/mips/include/asm/uasm.h index f7effca791a5..296bcf31abb5 100644 --- a/arch/mips/include/asm/uasm.h +++ b/arch/mips/include/asm/uasm.h @@ -145,6 +145,7 @@ Ip_u1(_mtlo); Ip_u3u1u2(_mul); Ip_u1u2(_multu); Ip_u3u1u2(_mulu); +Ip_u3u1u2(_muhu); Ip_u3u1u2(_nor); Ip_u3u1u2(_or); Ip_u2u1u3(_ori); @@ -248,7 +249,11 @@ static inline void uasm_l##lb(struct uasm_label **lab, u32 *addr) \ #define uasm_i_bnezl(buf, rs, off) uasm_i_bnel(buf, rs, 0, off) #define uasm_i_ehb(buf) uasm_i_sll(buf, 0, 0, 3) #define uasm_i_move(buf, a, b) UASM_i_ADDU(buf, a, 0, b) +#ifdef CONFIG_CPU_NOP_WORKAROUNDS +#define uasm_i_nop(buf) uasm_i_or(buf, 1, 1, 0) +#else #define uasm_i_nop(buf) uasm_i_sll(buf, 0, 0, 0) +#endif #define uasm_i_ssnop(buf) uasm_i_sll(buf, 0, 0, 1) static inline void uasm_i_drotr_safe(u32 **p, unsigned int a1, diff --git a/arch/mips/mm/uasm-mips.c b/arch/mips/mm/uasm-mips.c index 7154a1d99aad..e15c6700cd08 100644 --- a/arch/mips/mm/uasm-mips.c +++ b/arch/mips/mm/uasm-mips.c @@ -90,7 +90,7 @@ static const struct insn insn_table[insn_invalid] = { RS | RT | RD}, [insn_dmtc0] = {M(cop0_op, dmtc_op, 0, 0, 0, 0), RT | RD | SET}, [insn_dmultu] = {M(spec_op, 0, 0, 0, 0, dmultu_op), RS | RT}, - [insn_dmulu] = {M(spec_op, 0, 0, 0, dmult_dmul_op, dmultu_op), + [insn_dmulu] = {M(spec_op, 0, 0, 0, dmultu_dmulu_op, dmultu_op), RS | RT | RD}, [insn_drotr] = {M(spec_op, 1, 0, 0, 0, dsrl_op), RT | RD | RE}, [insn_drotr32] = {M(spec_op, 1, 0, 0, 0, dsrl32_op), RT | RD | RE}, @@ -150,6 +150,8 @@ static const struct insn insn_table[insn_invalid] = { [insn_mtlo] = {M(spec_op, 0, 0, 0, 0, mtlo_op), RS}, [insn_mulu] = {M(spec_op, 0, 0, 0, multu_mulu_op, multu_op), RS | RT | RD}, + [insn_muhu] = {M(spec_op, 0, 0, 0, multu_muhu_op, multu_op), + RS | RT | RD}, #ifndef CONFIG_CPU_MIPSR6 [insn_mul] = {M(spec2_op, 0, 0, 0, 0, mul_op), RS | RT | RD}, #else diff --git a/arch/mips/mm/uasm.c b/arch/mips/mm/uasm.c index 81dd226d6b6b..125140979d62 100644 --- a/arch/mips/mm/uasm.c +++ b/arch/mips/mm/uasm.c @@ -59,7 +59,7 @@ enum opcode { insn_lddir, insn_ldpte, insn_ldx, insn_lh, insn_lhu, insn_ll, insn_lld, insn_lui, insn_lw, insn_lwu, insn_lwx, insn_mfc0, insn_mfhc0, insn_mfhi, insn_mflo, insn_modu, insn_movn, insn_movz, insn_mtc0, insn_mthc0, - insn_mthi, insn_mtlo, insn_mul, insn_multu, insn_mulu, insn_nor, + insn_mthi, insn_mtlo, insn_mul, insn_multu, insn_mulu, insn_muhu, insn_nor, insn_or, insn_ori, insn_pref, insn_rfe, insn_rotr, insn_sb, insn_sc, insn_scd, insn_seleqz, insn_selnez, insn_sd, insn_sh, insn_sll, insn_sllv, insn_slt, insn_slti, insn_sltiu, insn_sltu, insn_sra, @@ -344,6 +344,7 @@ I_u1(_mtlo) I_u3u1u2(_mul) I_u1u2(_multu) I_u3u1u2(_mulu) +I_u3u1u2(_muhu) I_u3u1u2(_nor) I_u3u1u2(_or) I_u2u1u3(_ori) diff --git a/arch/mips/net/Makefile b/arch/mips/net/Makefile index d55912349039..e3e6ae6514e8 100644 --- a/arch/mips/net/Makefile +++ b/arch/mips/net/Makefile @@ -1,5 +1,10 @@ # SPDX-License-Identifier: GPL-2.0-only # MIPS networking code -obj-$(CONFIG_MIPS_CBPF_JIT) += bpf_jit.o bpf_jit_asm.o -obj-$(CONFIG_MIPS_EBPF_JIT) += ebpf_jit.o +obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o + +ifeq ($(CONFIG_32BIT),y) + obj-$(CONFIG_BPF_JIT) += bpf_jit_comp32.o +else + obj-$(CONFIG_BPF_JIT) += bpf_jit_comp64.o +endif diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c deleted file mode 100644 index cb6d22439f71..000000000000 --- a/arch/mips/net/bpf_jit.c +++ /dev/null @@ -1,1299 +0,0 @@ -/* - * Just-In-Time compiler for BPF filters on MIPS - * - * Copyright (c) 2014 Imagination Technologies Ltd. - * Author: Markos Chandras <markos.chandras@imgtec.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; version 2 of the License. - */ - -#include <linux/bitops.h> -#include <linux/compiler.h> -#include <linux/errno.h> -#include <linux/filter.h> -#include <linux/if_vlan.h> -#include <linux/moduleloader.h> -#include <linux/netdevice.h> -#include <linux/string.h> -#include <linux/slab.h> -#include <linux/types.h> -#include <asm/asm.h> -#include <asm/bitops.h> -#include <asm/cacheflush.h> -#include <asm/cpu-features.h> -#include <asm/uasm.h> - -#include "bpf_jit.h" - -/* ABI - * r_skb_hl SKB header length - * r_data SKB data pointer - * r_off Offset - * r_A BPF register A - * r_X BPF register X - * r_skb *skb - * r_M *scratch memory - * r_skb_len SKB length - * - * On entry (*bpf_func)(*skb, *filter) - * a0 = MIPS_R_A0 = skb; - * a1 = MIPS_R_A1 = filter; - * - * Stack - * ... - * M[15] - * M[14] - * M[13] - * ... - * M[0] <-- r_M - * saved reg k-1 - * saved reg k-2 - * ... - * saved reg 0 <-- r_sp - * <no argument area> - * - * Packet layout - * - * <--------------------- len ------------------------> - * <--skb-len(r_skb_hl)-->< ----- skb->data_len ------> - * ---------------------------------------------------- - * | skb->data | - * ---------------------------------------------------- - */ - -#define ptr typeof(unsigned long) - -#define SCRATCH_OFF(k) (4 * (k)) - -/* JIT flags */ -#define SEEN_CALL (1 << BPF_MEMWORDS) -#define SEEN_SREG_SFT (BPF_MEMWORDS + 1) -#define SEEN_SREG_BASE (1 << SEEN_SREG_SFT) -#define SEEN_SREG(x) (SEEN_SREG_BASE << (x)) -#define SEEN_OFF SEEN_SREG(2) -#define SEEN_A SEEN_SREG(3) -#define SEEN_X SEEN_SREG(4) -#define SEEN_SKB SEEN_SREG(5) -#define SEEN_MEM SEEN_SREG(6) -/* SEEN_SK_DATA also implies skb_hl an skb_len */ -#define SEEN_SKB_DATA (SEEN_SREG(7) | SEEN_SREG(1) | SEEN_SREG(0)) - -/* Arguments used by JIT */ -#define ARGS_USED_BY_JIT 2 /* only applicable to 64-bit */ - -#define SBIT(x) (1 << (x)) /* Signed version of BIT() */ - -/** - * struct jit_ctx - JIT context - * @skf: The sk_filter - * @prologue_bytes: Number of bytes for prologue - * @idx: Instruction index - * @flags: JIT flags - * @offsets: Instruction offsets - * @target: Memory location for the compiled filter - */ -struct jit_ctx { - const struct bpf_prog *skf; - unsigned int prologue_bytes; - u32 idx; - u32 flags; - u32 *offsets; - u32 *target; -}; - - -static inline int optimize_div(u32 *k) -{ - /* power of 2 divides can be implemented with right shift */ - if (!(*k & (*k-1))) { - *k = ilog2(*k); - return 1; - } - - return 0; -} - -static inline void emit_jit_reg_move(ptr dst, ptr src, struct jit_ctx *ctx); - -/* Simply emit the instruction if the JIT memory space has been allocated */ -#define emit_instr(ctx, func, ...) \ -do { \ - if ((ctx)->target != NULL) { \ - u32 *p = &(ctx)->target[ctx->idx]; \ - uasm_i_##func(&p, ##__VA_ARGS__); \ - } \ - (ctx)->idx++; \ -} while (0) - -/* - * Similar to emit_instr but it must be used when we need to emit - * 32-bit or 64-bit instructions - */ -#define emit_long_instr(ctx, func, ...) \ -do { \ - if ((ctx)->target != NULL) { \ - u32 *p = &(ctx)->target[ctx->idx]; \ - UASM_i_##func(&p, ##__VA_ARGS__); \ - } \ - (ctx)->idx++; \ -} while (0) - -/* Determine if immediate is within the 16-bit signed range */ -static inline bool is_range16(s32 imm) -{ - return !(imm >= SBIT(15) || imm < -SBIT(15)); -} - -static inline void emit_addu(unsigned int dst, unsigned int src1, - unsigned int src2, struct jit_ctx *ctx) -{ - emit_instr(ctx, addu, dst, src1, src2); -} - -static inline void emit_nop(struct jit_ctx *ctx) -{ - emit_instr(ctx, nop); -} - -/* Load a u32 immediate to a register */ -static inline void emit_load_imm(unsigned int dst, u32 imm, struct jit_ctx *ctx) -{ - if (ctx->target != NULL) { - /* addiu can only handle s16 */ - if (!is_range16(imm)) { - u32 *p = &ctx->target[ctx->idx]; - uasm_i_lui(&p, r_tmp_imm, (s32)imm >> 16); - p = &ctx->target[ctx->idx + 1]; - uasm_i_ori(&p, dst, r_tmp_imm, imm & 0xffff); - } else { - u32 *p = &ctx->target[ctx->idx]; - uasm_i_addiu(&p, dst, r_zero, imm); - } - } - ctx->idx++; - - if (!is_range16(imm)) - ctx->idx++; -} - -static inline void emit_or(unsigned int dst, unsigned int src1, - unsigned int src2, struct jit_ctx *ctx) -{ - emit_instr(ctx, or, dst, src1, src2); -} - -static inline void emit_ori(unsigned int dst, unsigned src, u32 imm, - struct jit_ctx *ctx) -{ - if (imm >= BIT(16)) { - emit_load_imm(r_tmp, imm, ctx); - emit_or(dst, src, r_tmp, ctx); - } else { - emit_instr(ctx, ori, dst, src, imm); - } -} - -static inline void emit_daddiu(unsigned int dst, unsigned int src, - int imm, struct jit_ctx *ctx) -{ - /* - * Only used for stack, so the imm is relatively small - * and it fits in 15-bits - */ - emit_instr(ctx, daddiu, dst, src, imm); -} - -static inline void emit_addiu(unsigned int dst, unsigned int src, - u32 imm, struct jit_ctx *ctx) -{ - if (!is_range16(imm)) { - emit_load_imm(r_tmp, imm, ctx); - emit_addu(dst, r_tmp, src, ctx); - } else { - emit_instr(ctx, addiu, dst, src, imm); - } -} - -static inline void emit_and(unsigned int dst, unsigned int src1, - unsigned int src2, struct jit_ctx *ctx) -{ - emit_instr(ctx, and, dst, src1, src2); -} - -static inline void emit_andi(unsigned int dst, unsigned int src, - u32 imm, struct jit_ctx *ctx) -{ - /* If imm does not fit in u16 then load it to register */ - if (imm >= BIT(16)) { - emit_load_imm(r_tmp, imm, ctx); - emit_and(dst, src, r_tmp, ctx); - } else { - emit_instr(ctx, andi, dst, src, imm); - } -} - -static inline void emit_xor(unsigned int dst, unsigned int src1, - unsigned int src2, struct jit_ctx *ctx) -{ - emit_instr(ctx, xor, dst, src1, src2); -} - -static inline void emit_xori(ptr dst, ptr src, u32 imm, struct jit_ctx *ctx) -{ - /* If imm does not fit in u16 then load it to register */ - if (imm >= BIT(16)) { - emit_load_imm(r_tmp, imm, ctx); - emit_xor(dst, src, r_tmp, ctx); - } else { - emit_instr(ctx, xori, dst, src, imm); - } -} - -static inline void emit_stack_offset(int offset, struct jit_ctx *ctx) -{ - emit_long_instr(ctx, ADDIU, r_sp, r_sp, offset); -} - -static inline void emit_subu(unsigned int dst, unsigned int src1, - unsigned int src2, struct jit_ctx *ctx) -{ - emit_instr(ctx, subu, dst, src1, src2); -} - -static inline void emit_neg(unsigned int reg, struct jit_ctx *ctx) -{ - emit_subu(reg, r_zero, reg, ctx); -} - -static inline void emit_sllv(unsigned int dst, unsigned int src, - unsigned int sa, struct jit_ctx *ctx) -{ - emit_instr(ctx, sllv, dst, src, sa); -} - -static inline void emit_sll(unsigned int dst, unsigned int src, - unsigned int sa, struct jit_ctx *ctx) -{ - /* sa is 5-bits long */ - if (sa >= BIT(5)) - /* Shifting >= 32 results in zero */ - emit_jit_reg_move(dst, r_zero, ctx); - else - emit_instr(ctx, sll, dst, src, sa); -} - -static inline void emit_srlv(unsigned int dst, unsigned int src, - unsigned int sa, struct jit_ctx *ctx) -{ - emit_instr(ctx, srlv, dst, src, sa); -} - -static inline void emit_srl(unsigned int dst, unsigned int src, - unsigned int sa, struct jit_ctx *ctx) -{ - /* sa is 5-bits long */ - if (sa >= BIT(5)) - /* Shifting >= 32 results in zero */ - emit_jit_reg_move(dst, r_zero, ctx); - else - emit_instr(ctx, srl, dst, src, sa); -} - -static inline void emit_slt(unsigned int dst, unsigned int src1, - unsigned int src2, struct jit_ctx *ctx) -{ - emit_instr(ctx, slt, dst, src1, src2); -} - -static inline void emit_sltu(unsigned int dst, unsigned int src1, - unsigned int src2, struct jit_ctx *ctx) -{ - emit_instr(ctx, sltu, dst, src1, src2); -} - -static inline void emit_sltiu(unsigned dst, unsigned int src, - unsigned int imm, struct jit_ctx *ctx) -{ - /* 16 bit immediate */ - if (!is_range16((s32)imm)) { - emit_load_imm(r_tmp, imm, ctx); - emit_sltu(dst, src, r_tmp, ctx); - } else { - emit_instr(ctx, sltiu, dst, src, imm); - } - -} - -/* Store register on the stack */ -static inline void emit_store_stack_reg(ptr reg, ptr base, - unsigned int offset, - struct jit_ctx *ctx) -{ - emit_long_instr(ctx, SW, reg, offset, base); -} - -static inline void emit_store(ptr reg, ptr base, unsigned int offset, - struct jit_ctx *ctx) -{ - emit_instr(ctx, sw, reg, offset, base); -} - -static inline void emit_load_stack_reg(ptr reg, ptr base, - unsigned int offset, - struct jit_ctx *ctx) -{ - emit_long_instr(ctx, LW, reg, offset, base); -} - -static inline void emit_load(unsigned int reg, unsigned int base, - unsigned int offset, struct jit_ctx *ctx) -{ - emit_instr(ctx, lw, reg, offset, base); -} - -static inline void emit_load_byte(unsigned int reg, unsigned int base, - unsigned int offset, struct jit_ctx *ctx) -{ - emit_instr(ctx, lb, reg, offset, base); -} - -static inline void emit_half_load(unsigned int reg, unsigned int base, - unsigned int offset, struct jit_ctx *ctx) -{ - emit_instr(ctx, lh, reg, offset, base); -} - -static inline void emit_half_load_unsigned(unsigned int reg, unsigned int base, - unsigned int offset, struct jit_ctx *ctx) -{ - emit_instr(ctx, lhu, reg, offset, base); -} - -static inline void emit_mul(unsigned int dst, unsigned int src1, - unsigned int src2, struct jit_ctx *ctx) -{ - emit_instr(ctx, mul, dst, src1, src2); -} - -static inline void emit_div(unsigned int dst, unsigned int src, - struct jit_ctx *ctx) -{ - if (ctx->target != NULL) { - u32 *p = &ctx->target[ctx->idx]; - uasm_i_divu(&p, dst, src); - p = &ctx->target[ctx->idx + 1]; - uasm_i_mflo(&p, dst); - } - ctx->idx += 2; /* 2 insts */ -} - -static inline void emit_mod(unsigned int dst, unsigned int src, - struct jit_ctx *ctx) -{ - if (ctx->target != NULL) { - u32 *p = &ctx->target[ctx->idx]; - uasm_i_divu(&p, dst, src); - p = &ctx->target[ctx->idx + 1]; - uasm_i_mfhi(&p, dst); - } - ctx->idx += 2; /* 2 insts */ -} - -static inline void emit_dsll(unsigned int dst, unsigned int src, - unsigned int sa, struct jit_ctx *ctx) -{ - emit_instr(ctx, dsll, dst, src, sa); -} - -static inline void emit_dsrl32(unsigned int dst, unsigned int src, - unsigned int sa, struct jit_ctx *ctx) -{ - emit_instr(ctx, dsrl32, dst, src, sa); -} - -static inline void emit_wsbh(unsigned int dst, unsigned int src, - struct jit_ctx *ctx) -{ - emit_instr(ctx, wsbh, dst, src); -} - -/* load pointer to register */ -static inline void emit_load_ptr(unsigned int dst, unsigned int src, - int imm, struct jit_ctx *ctx) -{ - /* src contains the base addr of the 32/64-pointer */ - emit_long_instr(ctx, LW, dst, imm, src); -} - -/* load a function pointer to register */ -static inline void emit_load_func(unsigned int reg, ptr imm, - struct jit_ctx *ctx) -{ - if (IS_ENABLED(CONFIG_64BIT)) { - /* At this point imm is always 64-bit */ - emit_load_imm(r_tmp, (u64)imm >> 32, ctx); - emit_dsll(r_tmp_imm, r_tmp, 16, ctx); /* left shift by 16 */ - emit_ori(r_tmp, r_tmp_imm, (imm >> 16) & 0xffff, ctx); - emit_dsll(r_tmp_imm, r_tmp, 16, ctx); /* left shift by 16 */ - emit_ori(reg, r_tmp_imm, imm & 0xffff, ctx); - } else { - emit_load_imm(reg, imm, ctx); - } -} - -/* Move to real MIPS register */ -static inline void emit_reg_move(ptr dst, ptr src, struct jit_ctx *ctx) -{ - emit_long_instr(ctx, ADDU, dst, src, r_zero); -} - -/* Move to JIT (32-bit) register */ -static inline void emit_jit_reg_move(ptr dst, ptr src, struct jit_ctx *ctx) -{ - emit_addu(dst, src, r_zero, ctx); -} - -/* Compute the immediate value for PC-relative branches. */ -static inline u32 b_imm(unsigned int tgt, struct jit_ctx *ctx) -{ - if (ctx->target == NULL) - return 0; - - /* - * We want a pc-relative branch. We only do forward branches - * so tgt is always after pc. tgt is the instruction offset - * we want to jump to. - - * Branch on MIPS: - * I: target_offset <- sign_extend(offset) - * I+1: PC += target_offset (delay slot) - * - * ctx->idx currently points to the branch instruction - * but the offset is added to the delay slot so we need - * to subtract 4. - */ - return ctx->offsets[tgt] - - (ctx->idx * 4 - ctx->prologue_bytes) - 4; -} - -static inline void emit_bcond(int cond, unsigned int reg1, unsigned int reg2, - unsigned int imm, struct jit_ctx *ctx) -{ - if (ctx->target != NULL) { - u32 *p = &ctx->target[ctx->idx]; - - switch (cond) { - case MIPS_COND_EQ: - uasm_i_beq(&p, reg1, reg2, imm); - break; - case MIPS_COND_NE: - uasm_i_bne(&p, reg1, reg2, imm); - break; - case MIPS_COND_ALL: - uasm_i_b(&p, imm); - break; - default: - pr_warn("%s: Unhandled branch conditional: %d\n", - __func__, cond); - } - } - ctx->idx++; -} - -static inline void emit_b(unsigned int imm, struct jit_ctx *ctx) -{ - emit_bcond(MIPS_COND_ALL, r_zero, r_zero, imm, ctx); -} - -static inline void emit_jalr(unsigned int link, unsigned int reg, - struct jit_ctx *ctx) -{ - emit_instr(ctx, jalr, link, reg); -} - -static inline void emit_jr(unsigned int reg, struct jit_ctx *ctx) -{ - emit_instr(ctx, jr, reg); -} - -static inline u16 align_sp(unsigned int num) -{ - /* Double word alignment for 32-bit, quadword for 64-bit */ - unsigned int align = IS_ENABLED(CONFIG_64BIT) ? 16 : 8; - num = (num + (align - 1)) & -align; - return num; -} - -static void save_bpf_jit_regs(struct jit_ctx *ctx, unsigned offset) -{ - int i = 0, real_off = 0; - u32 sflags, tmp_flags; - - /* Adjust the stack pointer */ - if (offset) - emit_stack_offset(-align_sp(offset), ctx); - - tmp_flags = sflags = ctx->flags >> SEEN_SREG_SFT; - /* sflags is essentially a bitmap */ - while (tmp_flags) { - if ((sflags >> i) & 0x1) { - emit_store_stack_reg(MIPS_R_S0 + i, r_sp, real_off, - ctx); - real_off += SZREG; - } - i++; - tmp_flags >>= 1; - } - - /* save return address */ - if (ctx->flags & SEEN_CALL) { - emit_store_stack_reg(r_ra, r_sp, real_off, ctx); - real_off += SZREG; - } - - /* Setup r_M leaving the alignment gap if necessary */ - if (ctx->flags & SEEN_MEM) { - if (real_off % (SZREG * 2)) - real_off += SZREG; - emit_long_instr(ctx, ADDIU, r_M, r_sp, real_off); - } -} - -static void restore_bpf_jit_regs(struct jit_ctx *ctx, - unsigned int offset) -{ - int i, real_off = 0; - u32 sflags, tmp_flags; - - tmp_flags = sflags = ctx->flags >> SEEN_SREG_SFT; - /* sflags is a bitmap */ - i = 0; - while (tmp_flags) { - if ((sflags >> i) & 0x1) { - emit_load_stack_reg(MIPS_R_S0 + i, r_sp, real_off, - ctx); - real_off += SZREG; - } - i++; - tmp_flags >>= 1; - } - - /* restore return address */ - if (ctx->flags & SEEN_CALL) - emit_load_stack_reg(r_ra, r_sp, real_off, ctx); - - /* Restore the sp and discard the scrach memory */ - if (offset) - emit_stack_offset(align_sp(offset), ctx); -} - -static unsigned int get_stack_depth(struct jit_ctx *ctx) -{ - int sp_off = 0; - - - /* How may s* regs do we need to preserved? */ - sp_off += hweight32(ctx->flags >> SEEN_SREG_SFT) * SZREG; - - if (ctx->flags & SEEN_MEM) - sp_off += 4 * BPF_MEMWORDS; /* BPF_MEMWORDS are 32-bit */ - - if (ctx->flags & SEEN_CALL) - sp_off += SZREG; /* Space for our ra register */ - - return sp_off; -} - -static void build_prologue(struct jit_ctx *ctx) -{ - int sp_off; - - /* Calculate the total offset for the stack pointer */ - sp_off = get_stack_depth(ctx); - save_bpf_jit_regs(ctx, sp_off); - - if (ctx->flags & SEEN_SKB) - emit_reg_move(r_skb, MIPS_R_A0, ctx); - - if (ctx->flags & SEEN_SKB_DATA) { - /* Load packet length */ - emit_load(r_skb_len, r_skb, offsetof(struct sk_buff, len), - ctx); - emit_load(r_tmp, r_skb, offsetof(struct sk_buff, data_len), - ctx); - /* Load the data pointer */ - emit_load_ptr(r_skb_data, r_skb, - offsetof(struct sk_buff, data), ctx); - /* Load the header length */ - emit_subu(r_skb_hl, r_skb_len, r_tmp, ctx); - } - - if (ctx->flags & SEEN_X) - emit_jit_reg_move(r_X, r_zero, ctx); - - /* - * Do not leak kernel data to userspace, we only need to clear - * r_A if it is ever used. In fact if it is never used, we - * will not save/restore it, so clearing it in this case would - * corrupt the state of the caller. - */ - if (bpf_needs_clear_a(&ctx->skf->insns[0]) && - (ctx->flags & SEEN_A)) - emit_jit_reg_move(r_A, r_zero, ctx); -} - -static void build_epilogue(struct jit_ctx *ctx) -{ - unsigned int sp_off; - - /* Calculate the total offset for the stack pointer */ - - sp_off = get_stack_depth(ctx); - restore_bpf_jit_regs(ctx, sp_off); - - /* Return */ - emit_jr(r_ra, ctx); - emit_nop(ctx); -} - -#define CHOOSE_LOAD_FUNC(K, func) \ - ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative : func) : \ - func##_positive) - -static bool is_bad_offset(int b_off) -{ - return b_off > 0x1ffff || b_off < -0x20000; -} - -static int build_body(struct jit_ctx *ctx) -{ - const struct bpf_prog *prog = ctx->skf; - const struct sock_filter *inst; - unsigned int i, off, condt; - u32 k, b_off __maybe_unused; - u8 (*sk_load_func)(unsigned long *skb, int offset); - - for (i = 0; i < prog->len; i++) { - u16 code; - - inst = &(prog->insns[i]); - pr_debug("%s: code->0x%02x, jt->0x%x, jf->0x%x, k->0x%x\n", - __func__, inst->code, inst->jt, inst->jf, inst->k); - k = inst->k; - code = bpf_anc_helper(inst); - - if (ctx->target == NULL) - ctx->offsets[i] = ctx->idx * 4; - - switch (code) { - case BPF_LD | BPF_IMM: - /* A <- k ==> li r_A, k */ - ctx->flags |= SEEN_A; - emit_load_imm(r_A, k, ctx); - break; - case BPF_LD | BPF_W | BPF_LEN: - BUILD_BUG_ON(sizeof_field(struct sk_buff, len) != 4); - /* A <- len ==> lw r_A, offset(skb) */ - ctx->flags |= SEEN_SKB | SEEN_A; - off = offsetof(struct sk_buff, len); - emit_load(r_A, r_skb, off, ctx); - break; - case BPF_LD | BPF_MEM: - /* A <- M[k] ==> lw r_A, offset(M) */ - ctx->flags |= SEEN_MEM | SEEN_A; - emit_load(r_A, r_M, SCRATCH_OFF(k), ctx); - break; - case BPF_LD | BPF_W | BPF_ABS: - /* A <- P[k:4] */ - sk_load_func = CHOOSE_LOAD_FUNC(k, sk_load_word); - goto load; - case BPF_LD | BPF_H | BPF_ABS: - /* A <- P[k:2] */ - sk_load_func = CHOOSE_LOAD_FUNC(k, sk_load_half); - goto load; - case BPF_LD | BPF_B | BPF_ABS: - /* A <- P[k:1] */ - sk_load_func = CHOOSE_LOAD_FUNC(k, sk_load_byte); -load: - emit_load_imm(r_off, k, ctx); -load_common: - ctx->flags |= SEEN_CALL | SEEN_OFF | - SEEN_SKB | SEEN_A | SEEN_SKB_DATA; - - emit_load_func(r_s0, (ptr)sk_load_func, ctx); - emit_reg_move(MIPS_R_A0, r_skb, ctx); - emit_jalr(MIPS_R_RA, r_s0, ctx); - /* Load second argument to delay slot */ - emit_reg_move(MIPS_R_A1, r_off, ctx); - /* Check the error value */ - emit_bcond(MIPS_COND_EQ, r_ret, 0, b_imm(i + 1, ctx), - ctx); - /* Load return register on DS for failures */ - emit_reg_move(r_ret, r_zero, ctx); - /* Return with error */ - b_off = b_imm(prog->len, ctx); - if (is_bad_offset(b_off)) - return -E2BIG; - emit_b(b_off, ctx); - emit_nop(ctx); - break; - case BPF_LD | BPF_W | BPF_IND: - /* A <- P[X + k:4] */ - sk_load_func = sk_load_word; - goto load_ind; - case BPF_LD | BPF_H | BPF_IND: - /* A <- P[X + k:2] */ - sk_load_func = sk_load_half; - goto load_ind; - case BPF_LD | BPF_B | BPF_IND: - /* A <- P[X + k:1] */ - sk_load_func = sk_load_byte; -load_ind: - ctx->flags |= SEEN_OFF | SEEN_X; - emit_addiu(r_off, r_X, k, ctx); - goto load_common; - case BPF_LDX | BPF_IMM: - /* X <- k */ - ctx->flags |= SEEN_X; - emit_load_imm(r_X, k, ctx); - break; - case BPF_LDX | BPF_MEM: - /* X <- M[k] */ - ctx->flags |= SEEN_X | SEEN_MEM; - emit_load(r_X, r_M, SCRATCH_OFF(k), ctx); - break; - case BPF_LDX | BPF_W | BPF_LEN: - /* X <- len */ - ctx->flags |= SEEN_X | SEEN_SKB; - off = offsetof(struct sk_buff, len); - emit_load(r_X, r_skb, off, ctx); - break; - case BPF_LDX | BPF_B | BPF_MSH: - /* X <- 4 * (P[k:1] & 0xf) */ - ctx->flags |= SEEN_X | SEEN_CALL | SEEN_SKB; - /* Load offset to a1 */ - emit_load_func(r_s0, (ptr)sk_load_byte, ctx); - /* - * This may emit two instructions so it may not fit - * in the delay slot. So use a0 in the delay slot. - */ - emit_load_imm(MIPS_R_A1, k, ctx); - emit_jalr(MIPS_R_RA, r_s0, ctx); - emit_reg_move(MIPS_R_A0, r_skb, ctx); /* delay slot */ - /* Check the error value */ - b_off = b_imm(prog->len, ctx); - if (is_bad_offset(b_off)) - return -E2BIG; - emit_bcond(MIPS_COND_NE, r_ret, 0, b_off, ctx); - emit_reg_move(r_ret, r_zero, ctx); - /* We are good */ - /* X <- P[1:K] & 0xf */ - emit_andi(r_X, r_A, 0xf, ctx); - /* X << 2 */ - emit_b(b_imm(i + 1, ctx), ctx); - emit_sll(r_X, r_X, 2, ctx); /* delay slot */ - break; - case BPF_ST: - /* M[k] <- A */ - ctx->flags |= SEEN_MEM | SEEN_A; - emit_store(r_A, r_M, SCRATCH_OFF(k), ctx); - break; - case BPF_STX: - /* M[k] <- X */ - ctx->flags |= SEEN_MEM | SEEN_X; - emit_store(r_X, r_M, SCRATCH_OFF(k), ctx); - break; - case BPF_ALU | BPF_ADD | BPF_K: - /* A += K */ - ctx->flags |= SEEN_A; - emit_addiu(r_A, r_A, k, ctx); - break; - case BPF_ALU | BPF_ADD | BPF_X: - /* A += X */ - ctx->flags |= SEEN_A | SEEN_X; - emit_addu(r_A, r_A, r_X, ctx); - break; - case BPF_ALU | BPF_SUB | BPF_K: - /* A -= K */ - ctx->flags |= SEEN_A; - emit_addiu(r_A, r_A, -k, ctx); - break; - case BPF_ALU | BPF_SUB | BPF_X: - /* A -= X */ - ctx->flags |= SEEN_A | SEEN_X; - emit_subu(r_A, r_A, r_X, ctx); - break; - case BPF_ALU | BPF_MUL | BPF_K: - /* A *= K */ - /* Load K to scratch register before MUL */ - ctx->flags |= SEEN_A; - emit_load_imm(r_s0, k, ctx); - emit_mul(r_A, r_A, r_s0, ctx); - break; - case BPF_ALU | BPF_MUL | BPF_X: - /* A *= X */ - ctx->flags |= SEEN_A | SEEN_X; - emit_mul(r_A, r_A, r_X, ctx); - break; - case BPF_ALU | BPF_DIV | BPF_K: - /* A /= k */ - if (k == 1) - break; - if (optimize_div(&k)) { - ctx->flags |= SEEN_A; - emit_srl(r_A, r_A, k, ctx); - break; - } - ctx->flags |= SEEN_A; - emit_load_imm(r_s0, k, ctx); - emit_div(r_A, r_s0, ctx); - break; - case BPF_ALU | BPF_MOD | BPF_K: - /* A %= k */ - if (k == 1) { - ctx->flags |= SEEN_A; - emit_jit_reg_move(r_A, r_zero, ctx); - } else { - ctx->flags |= SEEN_A; - emit_load_imm(r_s0, k, ctx); - emit_mod(r_A, r_s0, ctx); - } - break; - case BPF_ALU | BPF_DIV | BPF_X: - /* A /= X */ - ctx->flags |= SEEN_X | SEEN_A; - /* Check if r_X is zero */ - b_off = b_imm(prog->len, ctx); - if (is_bad_offset(b_off)) - return -E2BIG; - emit_bcond(MIPS_COND_EQ, r_X, r_zero, b_off, ctx); - emit_load_imm(r_ret, 0, ctx); /* delay slot */ - emit_div(r_A, r_X, ctx); - break; - case BPF_ALU | BPF_MOD | BPF_X: - /* A %= X */ - ctx->flags |= SEEN_X | SEEN_A; - /* Check if r_X is zero */ - b_off = b_imm(prog->len, ctx); - if (is_bad_offset(b_off)) - return -E2BIG; - emit_bcond(MIPS_COND_EQ, r_X, r_zero, b_off, ctx); - emit_load_imm(r_ret, 0, ctx); /* delay slot */ - emit_mod(r_A, r_X, ctx); - break; - case BPF_ALU | BPF_OR | BPF_K: - /* A |= K */ - ctx->flags |= SEEN_A; - emit_ori(r_A, r_A, k, ctx); - break; - case BPF_ALU | BPF_OR | BPF_X: - /* A |= X */ - ctx->flags |= SEEN_A; - emit_ori(r_A, r_A, r_X, ctx); - break; - case BPF_ALU | BPF_XOR | BPF_K: - /* A ^= k */ - ctx->flags |= SEEN_A; - emit_xori(r_A, r_A, k, ctx); - break; - case BPF_ANC | SKF_AD_ALU_XOR_X: - case BPF_ALU | BPF_XOR | BPF_X: - /* A ^= X */ - ctx->flags |= SEEN_A; - emit_xor(r_A, r_A, r_X, ctx); - break; - case BPF_ALU | BPF_AND | BPF_K: - /* A &= K */ - ctx->flags |= SEEN_A; - emit_andi(r_A, r_A, k, ctx); - break; - case BPF_ALU | BPF_AND | BPF_X: - /* A &= X */ - ctx->flags |= SEEN_A | SEEN_X; - emit_and(r_A, r_A, r_X, ctx); - break; - case BPF_ALU | BPF_LSH | BPF_K: - /* A <<= K */ - ctx->flags |= SEEN_A; - emit_sll(r_A, r_A, k, ctx); - break; - case BPF_ALU | BPF_LSH | BPF_X: - /* A <<= X */ - ctx->flags |= SEEN_A | SEEN_X; - emit_sllv(r_A, r_A, r_X, ctx); - break; - case BPF_ALU | BPF_RSH | BPF_K: - /* A >>= K */ - ctx->flags |= SEEN_A; - emit_srl(r_A, r_A, k, ctx); - break; - case BPF_ALU | BPF_RSH | BPF_X: - ctx->flags |= SEEN_A | SEEN_X; - emit_srlv(r_A, r_A, r_X, ctx); - break; - case BPF_ALU | BPF_NEG: - /* A = -A */ - ctx->flags |= SEEN_A; - emit_neg(r_A, ctx); - break; - case BPF_JMP | BPF_JA: - /* pc += K */ - b_off = b_imm(i + k + 1, ctx); - if (is_bad_offset(b_off)) - return -E2BIG; - emit_b(b_off, ctx); - emit_nop(ctx); - break; - case BPF_JMP | BPF_JEQ | BPF_K: - /* pc += ( A == K ) ? pc->jt : pc->jf */ - condt = MIPS_COND_EQ | MIPS_COND_K; - goto jmp_cmp; - case BPF_JMP | BPF_JEQ | BPF_X: - ctx->flags |= SEEN_X; - /* pc += ( A == X ) ? pc->jt : pc->jf */ - condt = MIPS_COND_EQ | MIPS_COND_X; - goto jmp_cmp; - case BPF_JMP | BPF_JGE | BPF_K: - /* pc += ( A >= K ) ? pc->jt : pc->jf */ - condt = MIPS_COND_GE | MIPS_COND_K; - goto jmp_cmp; - case BPF_JMP | BPF_JGE | BPF_X: - ctx->flags |= SEEN_X; - /* pc += ( A >= X ) ? pc->jt : pc->jf */ - condt = MIPS_COND_GE | MIPS_COND_X; - goto jmp_cmp; - case BPF_JMP | BPF_JGT | BPF_K: - /* pc += ( A > K ) ? pc->jt : pc->jf */ - condt = MIPS_COND_GT | MIPS_COND_K; - goto jmp_cmp; - case BPF_JMP | BPF_JGT | BPF_X: - ctx->flags |= SEEN_X; - /* pc += ( A > X ) ? pc->jt : pc->jf */ - condt = MIPS_COND_GT | MIPS_COND_X; -jmp_cmp: - /* Greater or Equal */ - if ((condt & MIPS_COND_GE) || - (condt & MIPS_COND_GT)) { - if (condt & MIPS_COND_K) { /* K */ - ctx->flags |= SEEN_A; - emit_sltiu(r_s0, r_A, k, ctx); - } else { /* X */ - ctx->flags |= SEEN_A | - SEEN_X; - emit_sltu(r_s0, r_A, r_X, ctx); - } - /* A < (K|X) ? r_scrach = 1 */ - b_off = b_imm(i + inst->jf + 1, ctx); - emit_bcond(MIPS_COND_NE, r_s0, r_zero, b_off, - ctx); - emit_nop(ctx); - /* A > (K|X) ? scratch = 0 */ - if (condt & MIPS_COND_GT) { - /* Checking for equality */ - ctx->flags |= SEEN_A | SEEN_X; - if (condt & MIPS_COND_K) - emit_load_imm(r_s0, k, ctx); - else - emit_jit_reg_move(r_s0, r_X, - ctx); - b_off = b_imm(i + inst->jf + 1, ctx); - emit_bcond(MIPS_COND_EQ, r_A, r_s0, - b_off, ctx); - emit_nop(ctx); - /* Finally, A > K|X */ - b_off = b_imm(i + inst->jt + 1, ctx); - emit_b(b_off, ctx); - emit_nop(ctx); - } else { - /* A >= (K|X) so jump */ - b_off = b_imm(i + inst->jt + 1, ctx); - emit_b(b_off, ctx); - emit_nop(ctx); - } - } else { - /* A == K|X */ - if (condt & MIPS_COND_K) { /* K */ - ctx->flags |= SEEN_A; - emit_load_imm(r_s0, k, ctx); - /* jump true */ - b_off = b_imm(i + inst->jt + 1, ctx); - emit_bcond(MIPS_COND_EQ, r_A, r_s0, - b_off, ctx); - emit_nop(ctx); - /* jump false */ - b_off = b_imm(i + inst->jf + 1, - ctx); - emit_bcond(MIPS_COND_NE, r_A, r_s0, - b_off, ctx); - emit_nop(ctx); - } else { /* X */ - /* jump true */ - ctx->flags |= SEEN_A | SEEN_X; - b_off = b_imm(i + inst->jt + 1, - ctx); - emit_bcond(MIPS_COND_EQ, r_A, r_X, - b_off, ctx); - emit_nop(ctx); - /* jump false */ - b_off = b_imm(i + inst->jf + 1, ctx); - emit_bcond(MIPS_COND_NE, r_A, r_X, - b_off, ctx); - emit_nop(ctx); - } - } - break; - case BPF_JMP | BPF_JSET | BPF_K: - ctx->flags |= SEEN_A; - /* pc += (A & K) ? pc -> jt : pc -> jf */ - emit_load_imm(r_s1, k, ctx); - emit_and(r_s0, r_A, r_s1, ctx); - /* jump true */ - b_off = b_imm(i + inst->jt + 1, ctx); - emit_bcond(MIPS_COND_NE, r_s0, r_zero, b_off, ctx); - emit_nop(ctx); - /* jump false */ - b_off = b_imm(i + inst->jf + 1, ctx); - emit_b(b_off, ctx); - emit_nop(ctx); - break; - case BPF_JMP | BPF_JSET | BPF_X: - ctx->flags |= SEEN_X | SEEN_A; - /* pc += (A & X) ? pc -> jt : pc -> jf */ - emit_and(r_s0, r_A, r_X, ctx); - /* jump true */ - b_off = b_imm(i + inst->jt + 1, ctx); - emit_bcond(MIPS_COND_NE, r_s0, r_zero, b_off, ctx); - emit_nop(ctx); - /* jump false */ - b_off = b_imm(i + inst->jf + 1, ctx); - emit_b(b_off, ctx); - emit_nop(ctx); - break; - case BPF_RET | BPF_A: - ctx->flags |= SEEN_A; - if (i != prog->len - 1) { - /* - * If this is not the last instruction - * then jump to the epilogue - */ - b_off = b_imm(prog->len, ctx); - if (is_bad_offset(b_off)) - return -E2BIG; - emit_b(b_off, ctx); - } - emit_reg_move(r_ret, r_A, ctx); /* delay slot */ - break; - case BPF_RET | BPF_K: - /* - * It can emit two instructions so it does not fit on - * the delay slot. - */ - emit_load_imm(r_ret, k, ctx); - if (i != prog->len - 1) { - /* - * If this is not the last instruction - * then jump to the epilogue - */ - b_off = b_imm(prog->len, ctx); - if (is_bad_offset(b_off)) - return -E2BIG; - emit_b(b_off, ctx); - emit_nop(ctx); - } - break; - case BPF_MISC | BPF_TAX: - /* X = A */ - ctx->flags |= SEEN_X | SEEN_A; - emit_jit_reg_move(r_X, r_A, ctx); - break; - case BPF_MISC | BPF_TXA: - /* A = X */ - ctx->flags |= SEEN_A | SEEN_X; - emit_jit_reg_move(r_A, r_X, ctx); - break; - /* AUX */ - case BPF_ANC | SKF_AD_PROTOCOL: - /* A = ntohs(skb->protocol */ - ctx->flags |= SEEN_SKB | SEEN_OFF | SEEN_A; - BUILD_BUG_ON(sizeof_field(struct sk_buff, - protocol) != 2); - off = offsetof(struct sk_buff, protocol); - emit_half_load(r_A, r_skb, off, ctx); -#ifdef CONFIG_CPU_LITTLE_ENDIAN - /* This needs little endian fixup */ - if (cpu_has_wsbh) { - /* R2 and later have the wsbh instruction */ - emit_wsbh(r_A, r_A, ctx); - } else { - /* Get first byte */ - emit_andi(r_tmp_imm, r_A, 0xff, ctx); - /* Shift it */ - emit_sll(r_tmp, r_tmp_imm, 8, ctx); - /* Get second byte */ - emit_srl(r_tmp_imm, r_A, 8, ctx); - emit_andi(r_tmp_imm, r_tmp_imm, 0xff, ctx); - /* Put everyting together in r_A */ - emit_or(r_A, r_tmp, r_tmp_imm, ctx); - } -#endif - break; - case BPF_ANC | SKF_AD_CPU: - ctx->flags |= SEEN_A | SEEN_OFF; - /* A = current_thread_info()->cpu */ - BUILD_BUG_ON(sizeof_field(struct thread_info, - cpu) != 4); - off = offsetof(struct thread_info, cpu); - /* $28/gp points to the thread_info struct */ - emit_load(r_A, 28, off, ctx); - break; - case BPF_ANC | SKF_AD_IFINDEX: - /* A = skb->dev->ifindex */ - case BPF_ANC | SKF_AD_HATYPE: - /* A = skb->dev->type */ - ctx->flags |= SEEN_SKB | SEEN_A; - off = offsetof(struct sk_buff, dev); - /* Load *dev pointer */ - emit_load_ptr(r_s0, r_skb, off, ctx); - /* error (0) in the delay slot */ - b_off = b_imm(prog->len, ctx); - if (is_bad_offset(b_off)) - return -E2BIG; - emit_bcond(MIPS_COND_EQ, r_s0, r_zero, b_off, ctx); - emit_reg_move(r_ret, r_zero, ctx); - if (code == (BPF_ANC | SKF_AD_IFINDEX)) { - BUILD_BUG_ON(sizeof_field(struct net_device, ifindex) != 4); - off = offsetof(struct net_device, ifindex); - emit_load(r_A, r_s0, off, ctx); - } else { /* (code == (BPF_ANC | SKF_AD_HATYPE) */ - BUILD_BUG_ON(sizeof_field(struct net_device, type) != 2); - off = offsetof(struct net_device, type); - emit_half_load_unsigned(r_A, r_s0, off, ctx); - } - break; - case BPF_ANC | SKF_AD_MARK: - ctx->flags |= SEEN_SKB | SEEN_A; - BUILD_BUG_ON(sizeof_field(struct sk_buff, mark) != 4); - off = offsetof(struct sk_buff, mark); - emit_load(r_A, r_skb, off, ctx); - break; - case BPF_ANC | SKF_AD_RXHASH: - ctx->flags |= SEEN_SKB | SEEN_A; - BUILD_BUG_ON(sizeof_field(struct sk_buff, hash) != 4); - off = offsetof(struct sk_buff, hash); - emit_load(r_A, r_skb, off, ctx); - break; - case BPF_ANC | SKF_AD_VLAN_TAG: - ctx->flags |= SEEN_SKB | SEEN_A; - BUILD_BUG_ON(sizeof_field(struct sk_buff, - vlan_tci) != 2); - off = offsetof(struct sk_buff, vlan_tci); - emit_half_load_unsigned(r_A, r_skb, off, ctx); - break; - case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT: - ctx->flags |= SEEN_SKB | SEEN_A; - emit_load_byte(r_A, r_skb, PKT_VLAN_PRESENT_OFFSET(), ctx); - if (PKT_VLAN_PRESENT_BIT) - emit_srl(r_A, r_A, PKT_VLAN_PRESENT_BIT, ctx); - if (PKT_VLAN_PRESENT_BIT < 7) - emit_andi(r_A, r_A, 1, ctx); - break; - case BPF_ANC | SKF_AD_PKTTYPE: - ctx->flags |= SEEN_SKB; - - emit_load_byte(r_tmp, r_skb, PKT_TYPE_OFFSET(), ctx); - /* Keep only the last 3 bits */ - emit_andi(r_A, r_tmp, PKT_TYPE_MAX, ctx); -#ifdef __BIG_ENDIAN_BITFIELD - /* Get the actual packet type to the lower 3 bits */ - emit_srl(r_A, r_A, 5, ctx); -#endif - break; - case BPF_ANC | SKF_AD_QUEUE: - ctx->flags |= SEEN_SKB | SEEN_A; - BUILD_BUG_ON(sizeof_field(struct sk_buff, - queue_mapping) != 2); - BUILD_BUG_ON(offsetof(struct sk_buff, - queue_mapping) > 0xff); - off = offsetof(struct sk_buff, queue_mapping); - emit_half_load_unsigned(r_A, r_skb, off, ctx); - break; - default: - pr_debug("%s: Unhandled opcode: 0x%02x\n", __FILE__, - inst->code); - return -1; - } - } - - /* compute offsets only during the first pass */ - if (ctx->target == NULL) - ctx->offsets[i] = ctx->idx * 4; - - return 0; -} - -void bpf_jit_compile(struct bpf_prog *fp) -{ - struct jit_ctx ctx; - unsigned int alloc_size, tmp_idx; - - if (!bpf_jit_enable) - return; - - memset(&ctx, 0, sizeof(ctx)); - - ctx.offsets = kcalloc(fp->len + 1, sizeof(*ctx.offsets), GFP_KERNEL); - if (ctx.offsets == NULL) - return; - - ctx.skf = fp; - - if (build_body(&ctx)) - goto out; - - tmp_idx = ctx.idx; - build_prologue(&ctx); - ctx.prologue_bytes = (ctx.idx - tmp_idx) * 4; - /* just to complete the ctx.idx count */ - build_epilogue(&ctx); - - alloc_size = 4 * ctx.idx; - ctx.target = module_alloc(alloc_size); - if (ctx.target == NULL) - goto out; - - /* Clean it */ - memset(ctx.target, 0, alloc_size); - - ctx.idx = 0; - - /* Generate the actual JIT code */ - build_prologue(&ctx); - if (build_body(&ctx)) { - module_memfree(ctx.target); - goto out; - } - build_epilogue(&ctx); - - /* Update the icache */ - flush_icache_range((ptr)ctx.target, (ptr)(ctx.target + ctx.idx)); - - if (bpf_jit_enable > 1) - /* Dump JIT code */ - bpf_jit_dump(fp->len, alloc_size, 2, ctx.target); - - fp->bpf_func = (void *)ctx.target; - fp->jited = 1; - -out: - kfree(ctx.offsets); -} - -void bpf_jit_free(struct bpf_prog *fp) -{ - if (fp->jited) - module_memfree(fp->bpf_func); - - bpf_prog_unlock_free(fp); -} diff --git a/arch/mips/net/bpf_jit.h b/arch/mips/net/bpf_jit.h deleted file mode 100644 index 166ca06c9da9..000000000000 --- a/arch/mips/net/bpf_jit.h +++ /dev/null @@ -1,81 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Just-In-Time compiler for BPF filters on MIPS - * - * Copyright (c) 2014 Imagination Technologies Ltd. - * Author: Markos Chandras <markos.chandras@imgtec.com> - */ - -#ifndef BPF_JIT_MIPS_OP_H -#define BPF_JIT_MIPS_OP_H - -/* Registers used by JIT */ -#define MIPS_R_ZERO 0 -#define MIPS_R_V0 2 -#define MIPS_R_A0 4 -#define MIPS_R_A1 5 -#define MIPS_R_T4 12 -#define MIPS_R_T5 13 -#define MIPS_R_T6 14 -#define MIPS_R_T7 15 -#define MIPS_R_S0 16 -#define MIPS_R_S1 17 -#define MIPS_R_S2 18 -#define MIPS_R_S3 19 -#define MIPS_R_S4 20 -#define MIPS_R_S5 21 -#define MIPS_R_S6 22 -#define MIPS_R_S7 23 -#define MIPS_R_SP 29 -#define MIPS_R_RA 31 - -/* Conditional codes */ -#define MIPS_COND_EQ 0x1 -#define MIPS_COND_GE (0x1 << 1) -#define MIPS_COND_GT (0x1 << 2) -#define MIPS_COND_NE (0x1 << 3) -#define MIPS_COND_ALL (0x1 << 4) -/* Conditionals on X register or K immediate */ -#define MIPS_COND_X (0x1 << 5) -#define MIPS_COND_K (0x1 << 6) - -#define r_ret MIPS_R_V0 - -/* - * Use 2 scratch registers to avoid pipeline interlocks. - * There is no overhead during epilogue and prologue since - * any of the $s0-$s6 registers will only be preserved if - * they are going to actually be used. - */ -#define r_skb_hl MIPS_R_S0 /* skb header length */ -#define r_skb_data MIPS_R_S1 /* skb actual data */ -#define r_off MIPS_R_S2 -#define r_A MIPS_R_S3 -#define r_X MIPS_R_S4 -#define r_skb MIPS_R_S5 -#define r_M MIPS_R_S6 -#define r_skb_len MIPS_R_S7 -#define r_s0 MIPS_R_T4 /* scratch reg 1 */ -#define r_s1 MIPS_R_T5 /* scratch reg 2 */ -#define r_tmp_imm MIPS_R_T6 /* No need to preserve this */ -#define r_tmp MIPS_R_T7 /* No need to preserve this */ -#define r_zero MIPS_R_ZERO -#define r_sp MIPS_R_SP -#define r_ra MIPS_R_RA - -#ifndef __ASSEMBLY__ - -/* Declare ASM helpers */ - -#define DECLARE_LOAD_FUNC(func) \ - extern u8 func(unsigned long *skb, int offset); \ - extern u8 func##_negative(unsigned long *skb, int offset); \ - extern u8 func##_positive(unsigned long *skb, int offset) - -DECLARE_LOAD_FUNC(sk_load_word); -DECLARE_LOAD_FUNC(sk_load_half); -DECLARE_LOAD_FUNC(sk_load_byte); - -#endif - -#endif /* BPF_JIT_MIPS_OP_H */ diff --git a/arch/mips/net/bpf_jit_asm.S b/arch/mips/net/bpf_jit_asm.S deleted file mode 100644 index 57154c5883b6..000000000000 --- a/arch/mips/net/bpf_jit_asm.S +++ /dev/null @@ -1,285 +0,0 @@ -/* - * bpf_jib_asm.S: Packet/header access helper functions for MIPS/MIPS64 BPF - * compiler. - * - * Copyright (C) 2015 Imagination Technologies Ltd. - * Author: Markos Chandras <markos.chandras@imgtec.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; version 2 of the License. - */ - -#include <asm/asm.h> -#include <asm/isa-rev.h> -#include <asm/regdef.h> -#include "bpf_jit.h" - -/* ABI - * - * r_skb_hl skb header length - * r_skb_data skb data - * r_off(a1) offset register - * r_A BPF register A - * r_X PF register X - * r_skb(a0) *skb - * r_M *scratch memory - * r_skb_le skb length - * r_s0 Scratch register 0 - * r_s1 Scratch register 1 - * - * On entry: - * a0: *skb - * a1: offset (imm or imm + X) - * - * All non-BPF-ABI registers are free for use. On return, we only - * care about r_ret. The BPF-ABI registers are assumed to remain - * unmodified during the entire filter operation. - */ - -#define skb a0 -#define offset a1 -#define SKF_LL_OFF (-0x200000) /* Can't include linux/filter.h in assembly */ - - /* We know better :) so prevent assembler reordering etc */ - .set noreorder - -#define is_offset_negative(TYPE) \ - /* If offset is negative we have more work to do */ \ - slti t0, offset, 0; \ - bgtz t0, bpf_slow_path_##TYPE##_neg; \ - /* Be careful what follows in DS. */ - -#define is_offset_in_header(SIZE, TYPE) \ - /* Reading from header? */ \ - addiu $r_s0, $r_skb_hl, -SIZE; \ - slt t0, $r_s0, offset; \ - bgtz t0, bpf_slow_path_##TYPE; \ - -LEAF(sk_load_word) - is_offset_negative(word) -FEXPORT(sk_load_word_positive) - is_offset_in_header(4, word) - /* Offset within header boundaries */ - PTR_ADDU t1, $r_skb_data, offset - .set reorder - lw $r_A, 0(t1) - .set noreorder -#ifdef CONFIG_CPU_LITTLE_ENDIAN -# if MIPS_ISA_REV >= 2 - wsbh t0, $r_A - rotr $r_A, t0, 16 -# else - sll t0, $r_A, 24 - srl t1, $r_A, 24 - srl t2, $r_A, 8 - or t0, t0, t1 - andi t2, t2, 0xff00 - andi t1, $r_A, 0xff00 - or t0, t0, t2 - sll t1, t1, 8 - or $r_A, t0, t1 -# endif -#endif - jr $r_ra - move $r_ret, zero - END(sk_load_word) - -LEAF(sk_load_half) - is_offset_negative(half) -FEXPORT(sk_load_half_positive) - is_offset_in_header(2, half) - /* Offset within header boundaries */ - PTR_ADDU t1, $r_skb_data, offset - lhu $r_A, 0(t1) -#ifdef CONFIG_CPU_LITTLE_ENDIAN -# if MIPS_ISA_REV >= 2 - wsbh $r_A, $r_A -# else - sll t0, $r_A, 8 - srl t1, $r_A, 8 - andi t0, t0, 0xff00 - or $r_A, t0, t1 -# endif -#endif - jr $r_ra - move $r_ret, zero - END(sk_load_half) - -LEAF(sk_load_byte) - is_offset_negative(byte) -FEXPORT(sk_load_byte_positive) - is_offset_in_header(1, byte) - /* Offset within header boundaries */ - PTR_ADDU t1, $r_skb_data, offset - lbu $r_A, 0(t1) - jr $r_ra - move $r_ret, zero - END(sk_load_byte) - -/* - * call skb_copy_bits: - * (prototype in linux/skbuff.h) - * - * int skb_copy_bits(sk_buff *skb, int offset, void *to, int len) - * - * o32 mandates we leave 4 spaces for argument registers in case - * the callee needs to use them. Even though we don't care about - * the argument registers ourselves, we need to allocate that space - * to remain ABI compliant since the callee may want to use that space. - * We also allocate 2 more spaces for $r_ra and our return register (*to). - * - * n64 is a bit different. The *caller* will allocate the space to preserve - * the arguments. So in 64-bit kernels, we allocate the 4-arg space for no - * good reason but it does not matter that much really. - * - * (void *to) is returned in r_s0 - * - */ -#ifdef CONFIG_CPU_LITTLE_ENDIAN -#define DS_OFFSET(SIZE) (4 * SZREG) -#else -#define DS_OFFSET(SIZE) ((4 * SZREG) + (4 - SIZE)) -#endif -#define bpf_slow_path_common(SIZE) \ - /* Quick check. Are we within reasonable boundaries? */ \ - LONG_ADDIU $r_s1, $r_skb_len, -SIZE; \ - sltu $r_s0, offset, $r_s1; \ - beqz $r_s0, fault; \ - /* Load 4th argument in DS */ \ - LONG_ADDIU a3, zero, SIZE; \ - PTR_ADDIU $r_sp, $r_sp, -(6 * SZREG); \ - PTR_LA t0, skb_copy_bits; \ - PTR_S $r_ra, (5 * SZREG)($r_sp); \ - /* Assign low slot to a2 */ \ - PTR_ADDIU a2, $r_sp, DS_OFFSET(SIZE); \ - jalr t0; \ - /* Reset our destination slot (DS but it's ok) */ \ - INT_S zero, (4 * SZREG)($r_sp); \ - /* \ - * skb_copy_bits returns 0 on success and -EFAULT \ - * on error. Our data live in a2. Do not bother with \ - * our data if an error has been returned. \ - */ \ - /* Restore our frame */ \ - PTR_L $r_ra, (5 * SZREG)($r_sp); \ - INT_L $r_s0, (4 * SZREG)($r_sp); \ - bltz v0, fault; \ - PTR_ADDIU $r_sp, $r_sp, 6 * SZREG; \ - move $r_ret, zero; \ - -NESTED(bpf_slow_path_word, (6 * SZREG), $r_sp) - bpf_slow_path_common(4) -#ifdef CONFIG_CPU_LITTLE_ENDIAN -# if MIPS_ISA_REV >= 2 - wsbh t0, $r_s0 - jr $r_ra - rotr $r_A, t0, 16 -# else - sll t0, $r_s0, 24 - srl t1, $r_s0, 24 - srl t2, $r_s0, 8 - or t0, t0, t1 - andi t2, t2, 0xff00 - andi t1, $r_s0, 0xff00 - or t0, t0, t2 - sll t1, t1, 8 - jr $r_ra - or $r_A, t0, t1 -# endif -#else - jr $r_ra - move $r_A, $r_s0 -#endif - - END(bpf_slow_path_word) - -NESTED(bpf_slow_path_half, (6 * SZREG), $r_sp) - bpf_slow_path_common(2) -#ifdef CONFIG_CPU_LITTLE_ENDIAN -# if MIPS_ISA_REV >= 2 - jr $r_ra - wsbh $r_A, $r_s0 -# else - sll t0, $r_s0, 8 - andi t1, $r_s0, 0xff00 - andi t0, t0, 0xff00 - srl t1, t1, 8 - jr $r_ra - or $r_A, t0, t1 -# endif -#else - jr $r_ra - move $r_A, $r_s0 -#endif - - END(bpf_slow_path_half) - -NESTED(bpf_slow_path_byte, (6 * SZREG), $r_sp) - bpf_slow_path_common(1) - jr $r_ra - move $r_A, $r_s0 - - END(bpf_slow_path_byte) - -/* - * Negative entry points - */ - .macro bpf_is_end_of_data - li t0, SKF_LL_OFF - /* Reading link layer data? */ - slt t1, offset, t0 - bgtz t1, fault - /* Be careful what follows in DS. */ - .endm -/* - * call skb_copy_bits: - * (prototype in linux/filter.h) - * - * void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, - * int k, unsigned int size) - * - * see above (bpf_slow_path_common) for ABI restrictions - */ -#define bpf_negative_common(SIZE) \ - PTR_ADDIU $r_sp, $r_sp, -(6 * SZREG); \ - PTR_LA t0, bpf_internal_load_pointer_neg_helper; \ - PTR_S $r_ra, (5 * SZREG)($r_sp); \ - jalr t0; \ - li a2, SIZE; \ - PTR_L $r_ra, (5 * SZREG)($r_sp); \ - /* Check return pointer */ \ - beqz v0, fault; \ - PTR_ADDIU $r_sp, $r_sp, 6 * SZREG; \ - /* Preserve our pointer */ \ - move $r_s0, v0; \ - /* Set return value */ \ - move $r_ret, zero; \ - -bpf_slow_path_word_neg: - bpf_is_end_of_data -NESTED(sk_load_word_negative, (6 * SZREG), $r_sp) - bpf_negative_common(4) - jr $r_ra - lw $r_A, 0($r_s0) - END(sk_load_word_negative) - -bpf_slow_path_half_neg: - bpf_is_end_of_data -NESTED(sk_load_half_negative, (6 * SZREG), $r_sp) - bpf_negative_common(2) - jr $r_ra - lhu $r_A, 0($r_s0) - END(sk_load_half_negative) - -bpf_slow_path_byte_neg: - bpf_is_end_of_data -NESTED(sk_load_byte_negative, (6 * SZREG), $r_sp) - bpf_negative_common(1) - jr $r_ra - lbu $r_A, 0($r_s0) - END(sk_load_byte_negative) - -fault: - jr $r_ra - addiu $r_ret, zero, 1 diff --git a/arch/mips/net/bpf_jit_comp.c b/arch/mips/net/bpf_jit_comp.c new file mode 100644 index 000000000000..b17130d510d4 --- /dev/null +++ b/arch/mips/net/bpf_jit_comp.c @@ -0,0 +1,1034 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Just-In-Time compiler for eBPF bytecode on MIPS. + * Implementation of JIT functions common to 32-bit and 64-bit CPUs. + * + * Copyright (c) 2021 Anyfi Networks AB. + * Author: Johan Almbladh <johan.almbladh@gmail.com> + * + * Based on code and ideas from + * Copyright (c) 2017 Cavium, Inc. + * Copyright (c) 2017 Shubham Bansal <illusionist.neo@gmail.com> + * Copyright (c) 2011 Mircea Gherzan <mgherzan@gmail.com> + */ + +/* + * Code overview + * ============= + * + * - bpf_jit_comp.h + * Common definitions and utilities. + * + * - bpf_jit_comp.c + * Implementation of JIT top-level logic and exported JIT API functions. + * Implementation of internal operations shared by 32-bit and 64-bit code. + * JMP and ALU JIT control code, register control code, shared ALU and + * JMP/JMP32 JIT operations. + * + * - bpf_jit_comp32.c + * Implementation of functions to JIT prologue, epilogue and a single eBPF + * instruction for 32-bit MIPS CPUs. The functions use shared operations + * where possible, and implement the rest for 32-bit MIPS such as ALU64 + * operations. + * + * - bpf_jit_comp64.c + * Ditto, for 64-bit MIPS CPUs. + * + * Zero and sign extension + * ======================== + * 32-bit MIPS instructions on 64-bit MIPS registers use sign extension, + * but the eBPF instruction set mandates zero extension. We let the verifier + * insert explicit zero-extensions after 32-bit ALU operations, both for + * 32-bit and 64-bit MIPS JITs. Conditional JMP32 operations on 64-bit MIPs + * are JITed with sign extensions inserted when so expected. + * + * ALU operations + * ============== + * ALU operations on 32/64-bit MIPS and ALU64 operations on 64-bit MIPS are + * JITed in the following steps. ALU64 operations on 32-bit MIPS are more + * complicated and therefore only processed by special implementations in + * step (3). + * + * 1) valid_alu_i: + * Determine if an immediate operation can be emitted as such, or if + * we must fall back to the register version. + * + * 2) rewrite_alu_i: + * Convert BPF operation and immediate value to a canonical form for + * JITing. In some degenerate cases this form may be a no-op. + * + * 3) emit_alu_{i,i64,r,64}: + * Emit instructions for an ALU or ALU64 immediate or register operation. + * + * JMP operations + * ============== + * JMP and JMP32 operations require an JIT instruction offset table for + * translating the jump offset. This table is computed by dry-running the + * JIT without actually emitting anything. However, the computed PC-relative + * offset may overflow the 18-bit offset field width of the native MIPS + * branch instruction. In such cases, the long jump is converted into the + * following sequence. + * + * <branch> !<cond> +2 Inverted PC-relative branch + * nop Delay slot + * j <offset> Unconditional absolute long jump + * nop Delay slot + * + * Since this converted sequence alters the offset table, all offsets must + * be re-calculated. This may in turn trigger new branch conversions, so + * the process is repeated until no further changes are made. Normally it + * completes in 1-2 iterations. If JIT_MAX_ITERATIONS should reached, we + * fall back to converting every remaining jump operation. The branch + * conversion is independent of how the JMP or JMP32 condition is JITed. + * + * JMP32 and JMP operations are JITed as follows. + * + * 1) setup_jmp_{i,r}: + * Convert jump conditional and offset into a form that can be JITed. + * This form may be a no-op, a canonical form, or an inverted PC-relative + * jump if branch conversion is necessary. + * + * 2) valid_jmp_i: + * Determine if an immediate operations can be emitted as such, or if + * we must fall back to the register version. Applies to JMP32 for 32-bit + * MIPS, and both JMP and JMP32 for 64-bit MIPS. + * + * 3) emit_jmp_{i,i64,r,r64}: + * Emit instructions for an JMP or JMP32 immediate or register operation. + * + * 4) finish_jmp_{i,r}: + * Emit any instructions needed to finish the jump. This includes a nop + * for the delay slot if a branch was emitted, and a long absolute jump + * if the branch was converted. + */ + +#include <linux/limits.h> +#include <linux/bitops.h> +#include <linux/errno.h> +#include <linux/filter.h> +#include <linux/bpf.h> +#include <linux/slab.h> +#include <asm/bitops.h> +#include <asm/cacheflush.h> +#include <asm/cpu-features.h> +#include <asm/isa-rev.h> +#include <asm/uasm.h> + +#include "bpf_jit_comp.h" + +/* Convenience macros for descriptor access */ +#define CONVERTED(desc) ((desc) & JIT_DESC_CONVERT) +#define INDEX(desc) ((desc) & ~JIT_DESC_CONVERT) + +/* + * Push registers on the stack, starting at a given depth from the stack + * pointer and increasing. The next depth to be written is returned. + */ +int push_regs(struct jit_context *ctx, u32 mask, u32 excl, int depth) +{ + int reg; + + for (reg = 0; reg < BITS_PER_BYTE * sizeof(mask); reg++) + if (mask & BIT(reg)) { + if ((excl & BIT(reg)) == 0) { + if (sizeof(long) == 4) + emit(ctx, sw, reg, depth, MIPS_R_SP); + else /* sizeof(long) == 8 */ + emit(ctx, sd, reg, depth, MIPS_R_SP); + } + depth += sizeof(long); + } + + ctx->stack_used = max((int)ctx->stack_used, depth); + return depth; +} + +/* + * Pop registers from the stack, starting at a given depth from the stack + * pointer and increasing. The next depth to be read is returned. + */ +int pop_regs(struct jit_context *ctx, u32 mask, u32 excl, int depth) +{ + int reg; + + for (reg = 0; reg < BITS_PER_BYTE * sizeof(mask); reg++) + if (mask & BIT(reg)) { + if ((excl & BIT(reg)) == 0) { + if (sizeof(long) == 4) + emit(ctx, lw, reg, depth, MIPS_R_SP); + else /* sizeof(long) == 8 */ + emit(ctx, ld, reg, depth, MIPS_R_SP); + } + depth += sizeof(long); + } + + return depth; +} + +/* Compute the 28-bit jump target address from a BPF program location */ +int get_target(struct jit_context *ctx, u32 loc) +{ + u32 index = INDEX(ctx->descriptors[loc]); + unsigned long pc = (unsigned long)&ctx->target[ctx->jit_index]; + unsigned long addr = (unsigned long)&ctx->target[index]; + + if (!ctx->target) + return 0; + + if ((addr ^ pc) & ~MIPS_JMP_MASK) + return -1; + + return addr & MIPS_JMP_MASK; +} + +/* Compute the PC-relative offset to relative BPF program offset */ +int get_offset(const struct jit_context *ctx, int off) +{ + return (INDEX(ctx->descriptors[ctx->bpf_index + off]) - + ctx->jit_index - 1) * sizeof(u32); +} + +/* dst = imm (register width) */ +void emit_mov_i(struct jit_context *ctx, u8 dst, s32 imm) +{ + if (imm >= -0x8000 && imm <= 0x7fff) { + emit(ctx, addiu, dst, MIPS_R_ZERO, imm); + } else { + emit(ctx, lui, dst, (s16)((u32)imm >> 16)); + emit(ctx, ori, dst, dst, (u16)(imm & 0xffff)); + } + clobber_reg(ctx, dst); +} + +/* dst = src (register width) */ +void emit_mov_r(struct jit_context *ctx, u8 dst, u8 src) +{ + emit(ctx, ori, dst, src, 0); + clobber_reg(ctx, dst); +} + +/* Validate ALU immediate range */ +bool valid_alu_i(u8 op, s32 imm) +{ + switch (BPF_OP(op)) { + case BPF_NEG: + case BPF_LSH: + case BPF_RSH: + case BPF_ARSH: + /* All legal eBPF values are valid */ + return true; + case BPF_ADD: + /* imm must be 16 bits */ + return imm >= -0x8000 && imm <= 0x7fff; + case BPF_SUB: + /* -imm must be 16 bits */ + return imm >= -0x7fff && imm <= 0x8000; + case BPF_AND: + case BPF_OR: + case BPF_XOR: + /* imm must be 16 bits unsigned */ + return imm >= 0 && imm <= 0xffff; + case BPF_MUL: + /* imm must be zero or a positive power of two */ + return imm == 0 || (imm > 0 && is_power_of_2(imm)); + case BPF_DIV: + case BPF_MOD: + /* imm must be an 17-bit power of two */ + return (u32)imm <= 0x10000 && is_power_of_2((u32)imm); + } + return false; +} + +/* Rewrite ALU immediate operation */ +bool rewrite_alu_i(u8 op, s32 imm, u8 *alu, s32 *val) +{ + bool act = true; + + switch (BPF_OP(op)) { + case BPF_LSH: + case BPF_RSH: + case BPF_ARSH: + case BPF_ADD: + case BPF_SUB: + case BPF_OR: + case BPF_XOR: + /* imm == 0 is a no-op */ + act = imm != 0; + break; + case BPF_MUL: + if (imm == 1) { + /* dst * 1 is a no-op */ + act = false; + } else if (imm == 0) { + /* dst * 0 is dst & 0 */ + op = BPF_AND; + } else { + /* dst * (1 << n) is dst << n */ + op = BPF_LSH; + imm = ilog2(abs(imm)); + } + break; + case BPF_DIV: + if (imm == 1) { + /* dst / 1 is a no-op */ + act = false; + } else { + /* dst / (1 << n) is dst >> n */ + op = BPF_RSH; + imm = ilog2(imm); + } + break; + case BPF_MOD: + /* dst % (1 << n) is dst & ((1 << n) - 1) */ + op = BPF_AND; + imm--; + break; + } + + *alu = op; + *val = imm; + return act; +} + +/* ALU immediate operation (32-bit) */ +void emit_alu_i(struct jit_context *ctx, u8 dst, s32 imm, u8 op) +{ + switch (BPF_OP(op)) { + /* dst = -dst */ + case BPF_NEG: + emit(ctx, subu, dst, MIPS_R_ZERO, dst); + break; + /* dst = dst & imm */ + case BPF_AND: + emit(ctx, andi, dst, dst, (u16)imm); + break; + /* dst = dst | imm */ + case BPF_OR: + emit(ctx, ori, dst, dst, (u16)imm); + break; + /* dst = dst ^ imm */ + case BPF_XOR: + emit(ctx, xori, dst, dst, (u16)imm); + break; + /* dst = dst << imm */ + case BPF_LSH: + emit(ctx, sll, dst, dst, imm); + break; + /* dst = dst >> imm */ + case BPF_RSH: + emit(ctx, srl, dst, dst, imm); + break; + /* dst = dst >> imm (arithmetic) */ + case BPF_ARSH: + emit(ctx, sra, dst, dst, imm); + break; + /* dst = dst + imm */ + case BPF_ADD: + emit(ctx, addiu, dst, dst, imm); + break; + /* dst = dst - imm */ + case BPF_SUB: + emit(ctx, addiu, dst, dst, -imm); + break; + } + clobber_reg(ctx, dst); +} + +/* ALU register operation (32-bit) */ +void emit_alu_r(struct jit_context *ctx, u8 dst, u8 src, u8 op) +{ + switch (BPF_OP(op)) { + /* dst = dst & src */ + case BPF_AND: + emit(ctx, and, dst, dst, src); + break; + /* dst = dst | src */ + case BPF_OR: + emit(ctx, or, dst, dst, src); + break; + /* dst = dst ^ src */ + case BPF_XOR: + emit(ctx, xor, dst, dst, src); + break; + /* dst = dst << src */ + case BPF_LSH: + emit(ctx, sllv, dst, dst, src); + break; + /* dst = dst >> src */ + case BPF_RSH: + emit(ctx, srlv, dst, dst, src); + break; + /* dst = dst >> src (arithmetic) */ + case BPF_ARSH: + emit(ctx, srav, dst, dst, src); + break; + /* dst = dst + src */ + case BPF_ADD: + emit(ctx, addu, dst, dst, src); + break; + /* dst = dst - src */ + case BPF_SUB: + emit(ctx, subu, dst, dst, src); + break; + /* dst = dst * src */ + case BPF_MUL: + if (cpu_has_mips32r1 || cpu_has_mips32r6) { + emit(ctx, mul, dst, dst, src); + } else { + emit(ctx, multu, dst, src); + emit(ctx, mflo, dst); + } + break; + /* dst = dst / src */ + case BPF_DIV: + if (cpu_has_mips32r6) { + emit(ctx, divu_r6, dst, dst, src); + } else { + emit(ctx, divu, dst, src); + emit(ctx, mflo, dst); + } + break; + /* dst = dst % src */ + case BPF_MOD: + if (cpu_has_mips32r6) { + emit(ctx, modu, dst, dst, src); + } else { + emit(ctx, divu, dst, src); + emit(ctx, mfhi, dst); + } + break; + } + clobber_reg(ctx, dst); +} + +/* Atomic read-modify-write (32-bit) */ +void emit_atomic_r(struct jit_context *ctx, u8 dst, u8 src, s16 off, u8 code) +{ + LLSC_sync(ctx); + emit(ctx, ll, MIPS_R_T9, off, dst); + switch (code) { + case BPF_ADD: + case BPF_ADD | BPF_FETCH: + emit(ctx, addu, MIPS_R_T8, MIPS_R_T9, src); + break; + case BPF_AND: + case BPF_AND | BPF_FETCH: + emit(ctx, and, MIPS_R_T8, MIPS_R_T9, src); + break; + case BPF_OR: + case BPF_OR | BPF_FETCH: + emit(ctx, or, MIPS_R_T8, MIPS_R_T9, src); + break; + case BPF_XOR: + case BPF_XOR | BPF_FETCH: + emit(ctx, xor, MIPS_R_T8, MIPS_R_T9, src); + break; + case BPF_XCHG: + emit(ctx, move, MIPS_R_T8, src); + break; + } + emit(ctx, sc, MIPS_R_T8, off, dst); + emit(ctx, LLSC_beqz, MIPS_R_T8, -16 - LLSC_offset); + emit(ctx, nop); /* Delay slot */ + + if (code & BPF_FETCH) { + emit(ctx, move, src, MIPS_R_T9); + clobber_reg(ctx, src); + } +} + +/* Atomic compare-and-exchange (32-bit) */ +void emit_cmpxchg_r(struct jit_context *ctx, u8 dst, u8 src, u8 res, s16 off) +{ + LLSC_sync(ctx); + emit(ctx, ll, MIPS_R_T9, off, dst); + emit(ctx, bne, MIPS_R_T9, res, 12); + emit(ctx, move, MIPS_R_T8, src); /* Delay slot */ + emit(ctx, sc, MIPS_R_T8, off, dst); + emit(ctx, LLSC_beqz, MIPS_R_T8, -20 - LLSC_offset); + emit(ctx, move, res, MIPS_R_T9); /* Delay slot */ + clobber_reg(ctx, res); +} + +/* Swap bytes and truncate a register word or half word */ +void emit_bswap_r(struct jit_context *ctx, u8 dst, u32 width) +{ + u8 tmp = MIPS_R_T8; + u8 msk = MIPS_R_T9; + + switch (width) { + /* Swap bytes in a word */ + case 32: + if (cpu_has_mips32r2 || cpu_has_mips32r6) { + emit(ctx, wsbh, dst, dst); + emit(ctx, rotr, dst, dst, 16); + } else { + emit(ctx, sll, tmp, dst, 16); /* tmp = dst << 16 */ + emit(ctx, srl, dst, dst, 16); /* dst = dst >> 16 */ + emit(ctx, or, dst, dst, tmp); /* dst = dst | tmp */ + + emit(ctx, lui, msk, 0xff); /* msk = 0x00ff0000 */ + emit(ctx, ori, msk, msk, 0xff); /* msk = msk | 0xff */ + + emit(ctx, and, tmp, dst, msk); /* tmp = dst & msk */ + emit(ctx, sll, tmp, tmp, 8); /* tmp = tmp << 8 */ + emit(ctx, srl, dst, dst, 8); /* dst = dst >> 8 */ + emit(ctx, and, dst, dst, msk); /* dst = dst & msk */ + emit(ctx, or, dst, dst, tmp); /* reg = dst | tmp */ + } + break; + /* Swap bytes in a half word */ + case 16: + if (cpu_has_mips32r2 || cpu_has_mips32r6) { + emit(ctx, wsbh, dst, dst); + emit(ctx, andi, dst, dst, 0xffff); + } else { + emit(ctx, andi, tmp, dst, 0xff00); /* t = d & 0xff00 */ + emit(ctx, srl, tmp, tmp, 8); /* t = t >> 8 */ + emit(ctx, andi, dst, dst, 0x00ff); /* d = d & 0x00ff */ + emit(ctx, sll, dst, dst, 8); /* d = d << 8 */ + emit(ctx, or, dst, dst, tmp); /* d = d | t */ + } + break; + } + clobber_reg(ctx, dst); +} + +/* Validate jump immediate range */ +bool valid_jmp_i(u8 op, s32 imm) +{ + switch (op) { + case JIT_JNOP: + /* Immediate value not used */ + return true; + case BPF_JEQ: + case BPF_JNE: + /* No immediate operation */ + return false; + case BPF_JSET: + case JIT_JNSET: + /* imm must be 16 bits unsigned */ + return imm >= 0 && imm <= 0xffff; + case BPF_JGE: + case BPF_JLT: + case BPF_JSGE: + case BPF_JSLT: + /* imm must be 16 bits */ + return imm >= -0x8000 && imm <= 0x7fff; + case BPF_JGT: + case BPF_JLE: + case BPF_JSGT: + case BPF_JSLE: + /* imm + 1 must be 16 bits */ + return imm >= -0x8001 && imm <= 0x7ffe; + } + return false; +} + +/* Invert a conditional jump operation */ +static u8 invert_jmp(u8 op) +{ + switch (op) { + case BPF_JA: return JIT_JNOP; + case BPF_JEQ: return BPF_JNE; + case BPF_JNE: return BPF_JEQ; + case BPF_JSET: return JIT_JNSET; + case BPF_JGT: return BPF_JLE; + case BPF_JGE: return BPF_JLT; + case BPF_JLT: return BPF_JGE; + case BPF_JLE: return BPF_JGT; + case BPF_JSGT: return BPF_JSLE; + case BPF_JSGE: return BPF_JSLT; + case BPF_JSLT: return BPF_JSGE; + case BPF_JSLE: return BPF_JSGT; + } + return 0; +} + +/* Prepare a PC-relative jump operation */ +static void setup_jmp(struct jit_context *ctx, u8 bpf_op, + s16 bpf_off, u8 *jit_op, s32 *jit_off) +{ + u32 *descp = &ctx->descriptors[ctx->bpf_index]; + int op = bpf_op; + int offset = 0; + + /* Do not compute offsets on the first pass */ + if (INDEX(*descp) == 0) + goto done; + + /* Skip jumps never taken */ + if (bpf_op == JIT_JNOP) + goto done; + + /* Convert jumps always taken */ + if (bpf_op == BPF_JA) + *descp |= JIT_DESC_CONVERT; + + /* + * Current ctx->jit_index points to the start of the branch preamble. + * Since the preamble differs among different branch conditionals, + * the current index cannot be used to compute the branch offset. + * Instead, we use the offset table value for the next instruction, + * which gives the index immediately after the branch delay slot. + */ + if (!CONVERTED(*descp)) { + int target = ctx->bpf_index + bpf_off + 1; + int origin = ctx->bpf_index + 1; + + offset = (INDEX(ctx->descriptors[target]) - + INDEX(ctx->descriptors[origin]) + 1) * sizeof(u32); + } + + /* + * The PC-relative branch offset field on MIPS is 18 bits signed, + * so if the computed offset is larger than this we generate a an + * absolute jump that we skip with an inverted conditional branch. + */ + if (CONVERTED(*descp) || offset < -0x20000 || offset > 0x1ffff) { + offset = 3 * sizeof(u32); + op = invert_jmp(bpf_op); + ctx->changes += !CONVERTED(*descp); + *descp |= JIT_DESC_CONVERT; + } + +done: + *jit_off = offset; + *jit_op = op; +} + +/* Prepare a PC-relative jump operation with immediate conditional */ +void setup_jmp_i(struct jit_context *ctx, s32 imm, u8 width, + u8 bpf_op, s16 bpf_off, u8 *jit_op, s32 *jit_off) +{ + bool always = false; + bool never = false; + + switch (bpf_op) { + case BPF_JEQ: + case BPF_JNE: + break; + case BPF_JSET: + case BPF_JLT: + never = imm == 0; + break; + case BPF_JGE: + always = imm == 0; + break; + case BPF_JGT: + never = (u32)imm == U32_MAX; + break; + case BPF_JLE: + always = (u32)imm == U32_MAX; + break; + case BPF_JSGT: + never = imm == S32_MAX && width == 32; + break; + case BPF_JSGE: + always = imm == S32_MIN && width == 32; + break; + case BPF_JSLT: + never = imm == S32_MIN && width == 32; + break; + case BPF_JSLE: + always = imm == S32_MAX && width == 32; + break; + } + + if (never) + bpf_op = JIT_JNOP; + if (always) + bpf_op = BPF_JA; + setup_jmp(ctx, bpf_op, bpf_off, jit_op, jit_off); +} + +/* Prepare a PC-relative jump operation with register conditional */ +void setup_jmp_r(struct jit_context *ctx, bool same_reg, + u8 bpf_op, s16 bpf_off, u8 *jit_op, s32 *jit_off) +{ + switch (bpf_op) { + case BPF_JSET: + break; + case BPF_JEQ: + case BPF_JGE: + case BPF_JLE: + case BPF_JSGE: + case BPF_JSLE: + if (same_reg) + bpf_op = BPF_JA; + break; + case BPF_JNE: + case BPF_JLT: + case BPF_JGT: + case BPF_JSGT: + case BPF_JSLT: + if (same_reg) + bpf_op = JIT_JNOP; + break; + } + setup_jmp(ctx, bpf_op, bpf_off, jit_op, jit_off); +} + +/* Finish a PC-relative jump operation */ +int finish_jmp(struct jit_context *ctx, u8 jit_op, s16 bpf_off) +{ + /* Emit conditional branch delay slot */ + if (jit_op != JIT_JNOP) + emit(ctx, nop); + /* + * Emit an absolute long jump with delay slot, + * if the PC-relative branch was converted. + */ + if (CONVERTED(ctx->descriptors[ctx->bpf_index])) { + int target = get_target(ctx, ctx->bpf_index + bpf_off + 1); + + if (target < 0) + return -1; + emit(ctx, j, target); + emit(ctx, nop); + } + return 0; +} + +/* Jump immediate (32-bit) */ +void emit_jmp_i(struct jit_context *ctx, u8 dst, s32 imm, s32 off, u8 op) +{ + switch (op) { + /* No-op, used internally for branch optimization */ + case JIT_JNOP: + break; + /* PC += off if dst & imm */ + case BPF_JSET: + emit(ctx, andi, MIPS_R_T9, dst, (u16)imm); + emit(ctx, bnez, MIPS_R_T9, off); + break; + /* PC += off if (dst & imm) == 0 (not in BPF, used for long jumps) */ + case JIT_JNSET: + emit(ctx, andi, MIPS_R_T9, dst, (u16)imm); + emit(ctx, beqz, MIPS_R_T9, off); + break; + /* PC += off if dst > imm */ + case BPF_JGT: + emit(ctx, sltiu, MIPS_R_T9, dst, imm + 1); + emit(ctx, beqz, MIPS_R_T9, off); + break; + /* PC += off if dst >= imm */ + case BPF_JGE: + emit(ctx, sltiu, MIPS_R_T9, dst, imm); + emit(ctx, beqz, MIPS_R_T9, off); + break; + /* PC += off if dst < imm */ + case BPF_JLT: + emit(ctx, sltiu, MIPS_R_T9, dst, imm); + emit(ctx, bnez, MIPS_R_T9, off); + break; + /* PC += off if dst <= imm */ + case BPF_JLE: + emit(ctx, sltiu, MIPS_R_T9, dst, imm + 1); + emit(ctx, bnez, MIPS_R_T9, off); + break; + /* PC += off if dst > imm (signed) */ + case BPF_JSGT: + emit(ctx, slti, MIPS_R_T9, dst, imm + 1); + emit(ctx, beqz, MIPS_R_T9, off); + break; + /* PC += off if dst >= imm (signed) */ + case BPF_JSGE: + emit(ctx, slti, MIPS_R_T9, dst, imm); + emit(ctx, beqz, MIPS_R_T9, off); + break; + /* PC += off if dst < imm (signed) */ + case BPF_JSLT: + emit(ctx, slti, MIPS_R_T9, dst, imm); + emit(ctx, bnez, MIPS_R_T9, off); + break; + /* PC += off if dst <= imm (signed) */ + case BPF_JSLE: + emit(ctx, slti, MIPS_R_T9, dst, imm + 1); + emit(ctx, bnez, MIPS_R_T9, off); + break; + } +} + +/* Jump register (32-bit) */ +void emit_jmp_r(struct jit_context *ctx, u8 dst, u8 src, s32 off, u8 op) +{ + switch (op) { + /* No-op, used internally for branch optimization */ + case JIT_JNOP: + break; + /* PC += off if dst == src */ + case BPF_JEQ: + emit(ctx, beq, dst, src, off); + break; + /* PC += off if dst != src */ + case BPF_JNE: + emit(ctx, bne, dst, src, off); + break; + /* PC += off if dst & src */ + case BPF_JSET: + emit(ctx, and, MIPS_R_T9, dst, src); + emit(ctx, bnez, MIPS_R_T9, off); + break; + /* PC += off if (dst & imm) == 0 (not in BPF, used for long jumps) */ + case JIT_JNSET: + emit(ctx, and, MIPS_R_T9, dst, src); + emit(ctx, beqz, MIPS_R_T9, off); + break; + /* PC += off if dst > src */ + case BPF_JGT: + emit(ctx, sltu, MIPS_R_T9, src, dst); + emit(ctx, bnez, MIPS_R_T9, off); + break; + /* PC += off if dst >= src */ + case BPF_JGE: + emit(ctx, sltu, MIPS_R_T9, dst, src); + emit(ctx, beqz, MIPS_R_T9, off); + break; + /* PC += off if dst < src */ + case BPF_JLT: + emit(ctx, sltu, MIPS_R_T9, dst, src); + emit(ctx, bnez, MIPS_R_T9, off); + break; + /* PC += off if dst <= src */ + case BPF_JLE: + emit(ctx, sltu, MIPS_R_T9, src, dst); + emit(ctx, beqz, MIPS_R_T9, off); + break; + /* PC += off if dst > src (signed) */ + case BPF_JSGT: + emit(ctx, slt, MIPS_R_T9, src, dst); + emit(ctx, bnez, MIPS_R_T9, off); + break; + /* PC += off if dst >= src (signed) */ + case BPF_JSGE: + emit(ctx, slt, MIPS_R_T9, dst, src); + emit(ctx, beqz, MIPS_R_T9, off); + break; + /* PC += off if dst < src (signed) */ + case BPF_JSLT: + emit(ctx, slt, MIPS_R_T9, dst, src); + emit(ctx, bnez, MIPS_R_T9, off); + break; + /* PC += off if dst <= src (signed) */ + case BPF_JSLE: + emit(ctx, slt, MIPS_R_T9, src, dst); + emit(ctx, beqz, MIPS_R_T9, off); + break; + } +} + +/* Jump always */ +int emit_ja(struct jit_context *ctx, s16 off) +{ + int target = get_target(ctx, ctx->bpf_index + off + 1); + + if (target < 0) + return -1; + emit(ctx, j, target); + emit(ctx, nop); + return 0; +} + +/* Jump to epilogue */ +int emit_exit(struct jit_context *ctx) +{ + int target = get_target(ctx, ctx->program->len); + + if (target < 0) + return -1; + emit(ctx, j, target); + emit(ctx, nop); + return 0; +} + +/* Build the program body from eBPF bytecode */ +static int build_body(struct jit_context *ctx) +{ + const struct bpf_prog *prog = ctx->program; + unsigned int i; + + ctx->stack_used = 0; + for (i = 0; i < prog->len; i++) { + const struct bpf_insn *insn = &prog->insnsi[i]; + u32 *descp = &ctx->descriptors[i]; + int ret; + + access_reg(ctx, insn->src_reg); + access_reg(ctx, insn->dst_reg); + + ctx->bpf_index = i; + if (ctx->target == NULL) { + ctx->changes += INDEX(*descp) != ctx->jit_index; + *descp &= JIT_DESC_CONVERT; + *descp |= ctx->jit_index; + } + + ret = build_insn(insn, ctx); + if (ret < 0) + return ret; + + if (ret > 0) { + i++; + if (ctx->target == NULL) + descp[1] = ctx->jit_index; + } + } + + /* Store the end offset, where the epilogue begins */ + ctx->descriptors[prog->len] = ctx->jit_index; + return 0; +} + +/* Set the branch conversion flag on all instructions */ +static void set_convert_flag(struct jit_context *ctx, bool enable) +{ + const struct bpf_prog *prog = ctx->program; + u32 flag = enable ? JIT_DESC_CONVERT : 0; + unsigned int i; + + for (i = 0; i <= prog->len; i++) + ctx->descriptors[i] = INDEX(ctx->descriptors[i]) | flag; +} + +static void jit_fill_hole(void *area, unsigned int size) +{ + u32 *p; + + /* We are guaranteed to have aligned memory. */ + for (p = area; size >= sizeof(u32); size -= sizeof(u32)) + uasm_i_break(&p, BRK_BUG); /* Increments p */ +} + +bool bpf_jit_needs_zext(void) +{ + return true; +} + +struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) +{ + struct bpf_prog *tmp, *orig_prog = prog; + struct bpf_binary_header *header = NULL; + struct jit_context ctx; + bool tmp_blinded = false; + unsigned int tmp_idx; + unsigned int image_size; + u8 *image_ptr; + int tries; + + /* + * If BPF JIT was not enabled then we must fall back to + * the interpreter. + */ + if (!prog->jit_requested) + return orig_prog; + /* + * If constant blinding was enabled and we failed during blinding + * then we must fall back to the interpreter. Otherwise, we save + * the new JITed code. + */ + tmp = bpf_jit_blind_constants(prog); + if (IS_ERR(tmp)) + return orig_prog; + if (tmp != prog) { + tmp_blinded = true; + prog = tmp; + } + + memset(&ctx, 0, sizeof(ctx)); + ctx.program = prog; + + /* + * Not able to allocate memory for descriptors[], then + * we must fall back to the interpreter + */ + ctx.descriptors = kcalloc(prog->len + 1, sizeof(*ctx.descriptors), + GFP_KERNEL); + if (ctx.descriptors == NULL) + goto out_err; + + /* First pass discovers used resources */ + if (build_body(&ctx) < 0) + goto out_err; + /* + * Second pass computes instruction offsets. + * If any PC-relative branches are out of range, a sequence of + * a PC-relative branch + a jump is generated, and we have to + * try again from the beginning to generate the new offsets. + * This is done until no additional conversions are necessary. + * The last two iterations are done with all branches being + * converted, to guarantee offset table convergence within a + * fixed number of iterations. + */ + ctx.jit_index = 0; + build_prologue(&ctx); + tmp_idx = ctx.jit_index; + + tries = JIT_MAX_ITERATIONS; + do { + ctx.jit_index = tmp_idx; + ctx.changes = 0; + if (tries == 2) + set_convert_flag(&ctx, true); + if (build_body(&ctx) < 0) + goto out_err; + } while (ctx.changes > 0 && --tries > 0); + + if (WARN_ONCE(ctx.changes > 0, "JIT offsets failed to converge")) + goto out_err; + + build_epilogue(&ctx, MIPS_R_RA); + + /* Now we know the size of the structure to make */ + image_size = sizeof(u32) * ctx.jit_index; + header = bpf_jit_binary_alloc(image_size, &image_ptr, + sizeof(u32), jit_fill_hole); + /* + * Not able to allocate memory for the structure then + * we must fall back to the interpretation + */ + if (header == NULL) + goto out_err; + + /* Actual pass to generate final JIT code */ + ctx.target = (u32 *)image_ptr; + ctx.jit_index = 0; + + /* + * If building the JITed code fails somehow, + * we fall back to the interpretation. + */ + build_prologue(&ctx); + if (build_body(&ctx) < 0) + goto out_err; + build_epilogue(&ctx, MIPS_R_RA); + + /* Populate line info meta data */ + set_convert_flag(&ctx, false); + bpf_prog_fill_jited_linfo(prog, &ctx.descriptors[1]); + + /* Set as read-only exec and flush instruction cache */ + bpf_jit_binary_lock_ro(header); + flush_icache_range((unsigned long)header, + (unsigned long)&ctx.target[ctx.jit_index]); + + if (bpf_jit_enable > 1) + bpf_jit_dump(prog->len, image_size, 2, ctx.target); + + prog->bpf_func = (void *)ctx.target; + prog->jited = 1; + prog->jited_len = image_size; + +out: + if (tmp_blinded) + bpf_jit_prog_release_other(prog, prog == orig_prog ? + tmp : orig_prog); + kfree(ctx.descriptors); + return prog; + +out_err: + prog = orig_prog; + if (header) + bpf_jit_binary_free(header); + goto out; +} diff --git a/arch/mips/net/bpf_jit_comp.h b/arch/mips/net/bpf_jit_comp.h new file mode 100644 index 000000000000..6f3a7b07294b --- /dev/null +++ b/arch/mips/net/bpf_jit_comp.h @@ -0,0 +1,235 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Just-In-Time compiler for eBPF bytecode on 32-bit and 64-bit MIPS. + * + * Copyright (c) 2021 Anyfi Networks AB. + * Author: Johan Almbladh <johan.almbladh@gmail.com> + * + * Based on code and ideas from + * Copyright (c) 2017 Cavium, Inc. + * Copyright (c) 2017 Shubham Bansal <illusionist.neo@gmail.com> + * Copyright (c) 2011 Mircea Gherzan <mgherzan@gmail.com> + */ + +#ifndef _BPF_JIT_COMP_H +#define _BPF_JIT_COMP_H + +/* MIPS registers */ +#define MIPS_R_ZERO 0 /* Const zero */ +#define MIPS_R_AT 1 /* Asm temp */ +#define MIPS_R_V0 2 /* Result */ +#define MIPS_R_V1 3 /* Result */ +#define MIPS_R_A0 4 /* Argument */ +#define MIPS_R_A1 5 /* Argument */ +#define MIPS_R_A2 6 /* Argument */ +#define MIPS_R_A3 7 /* Argument */ +#define MIPS_R_A4 8 /* Arg (n64) */ +#define MIPS_R_A5 9 /* Arg (n64) */ +#define MIPS_R_A6 10 /* Arg (n64) */ +#define MIPS_R_A7 11 /* Arg (n64) */ +#define MIPS_R_T0 8 /* Temp (o32) */ +#define MIPS_R_T1 9 /* Temp (o32) */ +#define MIPS_R_T2 10 /* Temp (o32) */ +#define MIPS_R_T3 11 /* Temp (o32) */ +#define MIPS_R_T4 12 /* Temporary */ +#define MIPS_R_T5 13 /* Temporary */ +#define MIPS_R_T6 14 /* Temporary */ +#define MIPS_R_T7 15 /* Temporary */ +#define MIPS_R_S0 16 /* Saved */ +#define MIPS_R_S1 17 /* Saved */ +#define MIPS_R_S2 18 /* Saved */ +#define MIPS_R_S3 19 /* Saved */ +#define MIPS_R_S4 20 /* Saved */ +#define MIPS_R_S5 21 /* Saved */ +#define MIPS_R_S6 22 /* Saved */ +#define MIPS_R_S7 23 /* Saved */ +#define MIPS_R_T8 24 /* Temporary */ +#define MIPS_R_T9 25 /* Temporary */ +/* MIPS_R_K0 26 Reserved */ +/* MIPS_R_K1 27 Reserved */ +#define MIPS_R_GP 28 /* Global ptr */ +#define MIPS_R_SP 29 /* Stack ptr */ +#define MIPS_R_FP 30 /* Frame ptr */ +#define MIPS_R_RA 31 /* Return */ + +/* + * Jump address mask for immediate jumps. The four most significant bits + * must be equal to PC. + */ +#define MIPS_JMP_MASK 0x0fffffffUL + +/* Maximum number of iterations in offset table computation */ +#define JIT_MAX_ITERATIONS 8 + +/* + * Jump pseudo-instructions used internally + * for branch conversion and branch optimization. + */ +#define JIT_JNSET 0xe0 +#define JIT_JNOP 0xf0 + +/* Descriptor flag for PC-relative branch conversion */ +#define JIT_DESC_CONVERT BIT(31) + +/* JIT context for an eBPF program */ +struct jit_context { + struct bpf_prog *program; /* The eBPF program being JITed */ + u32 *descriptors; /* eBPF to JITed CPU insn descriptors */ + u32 *target; /* JITed code buffer */ + u32 bpf_index; /* Index of current BPF program insn */ + u32 jit_index; /* Index of current JIT target insn */ + u32 changes; /* Number of PC-relative branch conv */ + u32 accessed; /* Bit mask of read eBPF registers */ + u32 clobbered; /* Bit mask of modified CPU registers */ + u32 stack_size; /* Total allocated stack size in bytes */ + u32 saved_size; /* Size of callee-saved registers */ + u32 stack_used; /* Stack size used for function calls */ +}; + +/* Emit the instruction if the JIT memory space has been allocated */ +#define __emit(ctx, func, ...) \ +do { \ + if ((ctx)->target != NULL) { \ + u32 *p = &(ctx)->target[ctx->jit_index]; \ + uasm_i_##func(&p, ##__VA_ARGS__); \ + } \ + (ctx)->jit_index++; \ +} while (0) +#define emit(...) __emit(__VA_ARGS__) + +/* Workaround for R10000 ll/sc errata */ +#ifdef CONFIG_WAR_R10000 +#define LLSC_beqz beqzl +#else +#define LLSC_beqz beqz +#endif + +/* Workaround for Loongson-3 ll/sc errata */ +#ifdef CONFIG_CPU_LOONGSON3_WORKAROUNDS +#define LLSC_sync(ctx) emit(ctx, sync, 0) +#define LLSC_offset 4 +#else +#define LLSC_sync(ctx) +#define LLSC_offset 0 +#endif + +/* Workaround for Loongson-2F jump errata */ +#ifdef CONFIG_CPU_JUMP_WORKAROUNDS +#define JALR_MASK 0xffffffffcfffffffULL +#else +#define JALR_MASK (~0ULL) +#endif + +/* + * Mark a BPF register as accessed, it needs to be + * initialized by the program if expected, e.g. FP. + */ +static inline void access_reg(struct jit_context *ctx, u8 reg) +{ + ctx->accessed |= BIT(reg); +} + +/* + * Mark a CPU register as clobbered, it needs to be + * saved/restored by the program if callee-saved. + */ +static inline void clobber_reg(struct jit_context *ctx, u8 reg) +{ + ctx->clobbered |= BIT(reg); +} + +/* + * Push registers on the stack, starting at a given depth from the stack + * pointer and increasing. The next depth to be written is returned. + */ +int push_regs(struct jit_context *ctx, u32 mask, u32 excl, int depth); + +/* + * Pop registers from the stack, starting at a given depth from the stack + * pointer and increasing. The next depth to be read is returned. + */ +int pop_regs(struct jit_context *ctx, u32 mask, u32 excl, int depth); + +/* Compute the 28-bit jump target address from a BPF program location */ +int get_target(struct jit_context *ctx, u32 loc); + +/* Compute the PC-relative offset to relative BPF program offset */ +int get_offset(const struct jit_context *ctx, int off); + +/* dst = imm (32-bit) */ +void emit_mov_i(struct jit_context *ctx, u8 dst, s32 imm); + +/* dst = src (32-bit) */ +void emit_mov_r(struct jit_context *ctx, u8 dst, u8 src); + +/* Validate ALU/ALU64 immediate range */ +bool valid_alu_i(u8 op, s32 imm); + +/* Rewrite ALU/ALU64 immediate operation */ +bool rewrite_alu_i(u8 op, s32 imm, u8 *alu, s32 *val); + +/* ALU immediate operation (32-bit) */ +void emit_alu_i(struct jit_context *ctx, u8 dst, s32 imm, u8 op); + +/* ALU register operation (32-bit) */ +void emit_alu_r(struct jit_context *ctx, u8 dst, u8 src, u8 op); + +/* Atomic read-modify-write (32-bit) */ +void emit_atomic_r(struct jit_context *ctx, u8 dst, u8 src, s16 off, u8 code); + +/* Atomic compare-and-exchange (32-bit) */ +void emit_cmpxchg_r(struct jit_context *ctx, u8 dst, u8 src, u8 res, s16 off); + +/* Swap bytes and truncate a register word or half word */ +void emit_bswap_r(struct jit_context *ctx, u8 dst, u32 width); + +/* Validate JMP/JMP32 immediate range */ +bool valid_jmp_i(u8 op, s32 imm); + +/* Prepare a PC-relative jump operation with immediate conditional */ +void setup_jmp_i(struct jit_context *ctx, s32 imm, u8 width, + u8 bpf_op, s16 bpf_off, u8 *jit_op, s32 *jit_off); + +/* Prepare a PC-relative jump operation with register conditional */ +void setup_jmp_r(struct jit_context *ctx, bool same_reg, + u8 bpf_op, s16 bpf_off, u8 *jit_op, s32 *jit_off); + +/* Finish a PC-relative jump operation */ +int finish_jmp(struct jit_context *ctx, u8 jit_op, s16 bpf_off); + +/* Conditional JMP/JMP32 immediate */ +void emit_jmp_i(struct jit_context *ctx, u8 dst, s32 imm, s32 off, u8 op); + +/* Conditional JMP/JMP32 register */ +void emit_jmp_r(struct jit_context *ctx, u8 dst, u8 src, s32 off, u8 op); + +/* Jump always */ +int emit_ja(struct jit_context *ctx, s16 off); + +/* Jump to epilogue */ +int emit_exit(struct jit_context *ctx); + +/* + * Build program prologue to set up the stack and registers. + * This function is implemented separately for 32-bit and 64-bit JITs. + */ +void build_prologue(struct jit_context *ctx); + +/* + * Build the program epilogue to restore the stack and registers. + * This function is implemented separately for 32-bit and 64-bit JITs. + */ +void build_epilogue(struct jit_context *ctx, int dest_reg); + +/* + * Convert an eBPF instruction to native instruction, i.e + * JITs an eBPF instruction. + * Returns : + * 0 - Successfully JITed an 8-byte eBPF instruction + * >0 - Successfully JITed a 16-byte eBPF instruction + * <0 - Failed to JIT. + * This function is implemented separately for 32-bit and 64-bit JITs. + */ +int build_insn(const struct bpf_insn *insn, struct jit_context *ctx); + +#endif /* _BPF_JIT_COMP_H */ diff --git a/arch/mips/net/bpf_jit_comp32.c b/arch/mips/net/bpf_jit_comp32.c new file mode 100644 index 000000000000..bd996ede12f8 --- /dev/null +++ b/arch/mips/net/bpf_jit_comp32.c @@ -0,0 +1,1899 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Just-In-Time compiler for eBPF bytecode on MIPS. + * Implementation of JIT functions for 32-bit CPUs. + * + * Copyright (c) 2021 Anyfi Networks AB. + * Author: Johan Almbladh <johan.almbladh@gmail.com> + * + * Based on code and ideas from + * Copyright (c) 2017 Cavium, Inc. + * Copyright (c) 2017 Shubham Bansal <illusionist.neo@gmail.com> + * Copyright (c) 2011 Mircea Gherzan <mgherzan@gmail.com> + */ + +#include <linux/math64.h> +#include <linux/errno.h> +#include <linux/filter.h> +#include <linux/bpf.h> +#include <asm/cpu-features.h> +#include <asm/isa-rev.h> +#include <asm/uasm.h> + +#include "bpf_jit_comp.h" + +/* MIPS a4-a7 are not available in the o32 ABI */ +#undef MIPS_R_A4 +#undef MIPS_R_A5 +#undef MIPS_R_A6 +#undef MIPS_R_A7 + +/* Stack is 8-byte aligned in o32 ABI */ +#define MIPS_STACK_ALIGNMENT 8 + +/* + * The top 16 bytes of a stack frame is reserved for the callee in O32 ABI. + * This corresponds to stack space for register arguments a0-a3. + */ +#define JIT_RESERVED_STACK 16 + +/* Temporary 64-bit register used by JIT */ +#define JIT_REG_TMP MAX_BPF_JIT_REG + +/* + * Number of prologue bytes to skip when doing a tail call. + * Tail call count (TCC) initialization (8 bytes) always, plus + * R0-to-v0 assignment (4 bytes) if big endian. + */ +#ifdef __BIG_ENDIAN +#define JIT_TCALL_SKIP 12 +#else +#define JIT_TCALL_SKIP 8 +#endif + +/* CPU registers holding the callee return value */ +#define JIT_RETURN_REGS \ + (BIT(MIPS_R_V0) | \ + BIT(MIPS_R_V1)) + +/* CPU registers arguments passed to callee directly */ +#define JIT_ARG_REGS \ + (BIT(MIPS_R_A0) | \ + BIT(MIPS_R_A1) | \ + BIT(MIPS_R_A2) | \ + BIT(MIPS_R_A3)) + +/* CPU register arguments passed to callee on stack */ +#define JIT_STACK_REGS \ + (BIT(MIPS_R_T0) | \ + BIT(MIPS_R_T1) | \ + BIT(MIPS_R_T2) | \ + BIT(MIPS_R_T3) | \ + BIT(MIPS_R_T4) | \ + BIT(MIPS_R_T5)) + +/* Caller-saved CPU registers */ +#define JIT_CALLER_REGS \ + (JIT_RETURN_REGS | \ + JIT_ARG_REGS | \ + JIT_STACK_REGS) + +/* Callee-saved CPU registers */ +#define JIT_CALLEE_REGS \ + (BIT(MIPS_R_S0) | \ + BIT(MIPS_R_S1) | \ + BIT(MIPS_R_S2) | \ + BIT(MIPS_R_S3) | \ + BIT(MIPS_R_S4) | \ + BIT(MIPS_R_S5) | \ + BIT(MIPS_R_S6) | \ + BIT(MIPS_R_S7) | \ + BIT(MIPS_R_GP) | \ + BIT(MIPS_R_FP) | \ + BIT(MIPS_R_RA)) + +/* + * Mapping of 64-bit eBPF registers to 32-bit native MIPS registers. + * + * 1) Native register pairs are ordered according to CPU endiannes, following + * the MIPS convention for passing 64-bit arguments and return values. + * 2) The eBPF return value, arguments and callee-saved registers are mapped + * to their native MIPS equivalents. + * 3) Since the 32 highest bits in the eBPF FP register are always zero, + * only one general-purpose register is actually needed for the mapping. + * We use the fp register for this purpose, and map the highest bits to + * the MIPS register r0 (zero). + * 4) We use the MIPS gp and at registers as internal temporary registers + * for constant blinding. The gp register is callee-saved. + * 5) One 64-bit temporary register is mapped for use when sign-extending + * immediate operands. MIPS registers t6-t9 are available to the JIT + * for as temporaries when implementing complex 64-bit operations. + * + * With this scheme all eBPF registers are being mapped to native MIPS + * registers without having to use any stack scratch space. The direct + * register mapping (2) simplifies the handling of function calls. + */ +static const u8 bpf2mips32[][2] = { + /* Return value from in-kernel function, and exit value from eBPF */ + [BPF_REG_0] = {MIPS_R_V1, MIPS_R_V0}, + /* Arguments from eBPF program to in-kernel function */ + [BPF_REG_1] = {MIPS_R_A1, MIPS_R_A0}, + [BPF_REG_2] = {MIPS_R_A3, MIPS_R_A2}, + /* Remaining arguments, to be passed on the stack per O32 ABI */ + [BPF_REG_3] = {MIPS_R_T1, MIPS_R_T0}, + [BPF_REG_4] = {MIPS_R_T3, MIPS_R_T2}, + [BPF_REG_5] = {MIPS_R_T5, MIPS_R_T4}, + /* Callee-saved registers that in-kernel function will preserve */ + [BPF_REG_6] = {MIPS_R_S1, MIPS_R_S0}, + [BPF_REG_7] = {MIPS_R_S3, MIPS_R_S2}, + [BPF_REG_8] = {MIPS_R_S5, MIPS_R_S4}, + [BPF_REG_9] = {MIPS_R_S7, MIPS_R_S6}, + /* Read-only frame pointer to access the eBPF stack */ +#ifdef __BIG_ENDIAN + [BPF_REG_FP] = {MIPS_R_FP, MIPS_R_ZERO}, +#else + [BPF_REG_FP] = {MIPS_R_ZERO, MIPS_R_FP}, +#endif + /* Temporary register for blinding constants */ + [BPF_REG_AX] = {MIPS_R_GP, MIPS_R_AT}, + /* Temporary register for internal JIT use */ + [JIT_REG_TMP] = {MIPS_R_T7, MIPS_R_T6}, +}; + +/* Get low CPU register for a 64-bit eBPF register mapping */ +static inline u8 lo(const u8 reg[]) +{ +#ifdef __BIG_ENDIAN + return reg[0]; +#else + return reg[1]; +#endif +} + +/* Get high CPU register for a 64-bit eBPF register mapping */ +static inline u8 hi(const u8 reg[]) +{ +#ifdef __BIG_ENDIAN + return reg[1]; +#else + return reg[0]; +#endif +} + +/* + * Mark a 64-bit CPU register pair as clobbered, it needs to be + * saved/restored by the program if callee-saved. + */ +static void clobber_reg64(struct jit_context *ctx, const u8 reg[]) +{ + clobber_reg(ctx, reg[0]); + clobber_reg(ctx, reg[1]); +} + +/* dst = imm (sign-extended) */ +static void emit_mov_se_i64(struct jit_context *ctx, const u8 dst[], s32 imm) +{ + emit_mov_i(ctx, lo(dst), imm); + if (imm < 0) + emit(ctx, addiu, hi(dst), MIPS_R_ZERO, -1); + else + emit(ctx, move, hi(dst), MIPS_R_ZERO); + clobber_reg64(ctx, dst); +} + +/* Zero extension, if verifier does not do it for us */ +static void emit_zext_ver(struct jit_context *ctx, const u8 dst[]) +{ + if (!ctx->program->aux->verifier_zext) { + emit(ctx, move, hi(dst), MIPS_R_ZERO); + clobber_reg(ctx, hi(dst)); + } +} + +/* Load delay slot, if ISA mandates it */ +static void emit_load_delay(struct jit_context *ctx) +{ + if (!cpu_has_mips_2_3_4_5_r) + emit(ctx, nop); +} + +/* ALU immediate operation (64-bit) */ +static void emit_alu_i64(struct jit_context *ctx, + const u8 dst[], s32 imm, u8 op) +{ + u8 src = MIPS_R_T6; + + /* + * ADD/SUB with all but the max negative imm can be handled by + * inverting the operation and the imm value, saving one insn. + */ + if (imm > S32_MIN && imm < 0) + switch (op) { + case BPF_ADD: + op = BPF_SUB; + imm = -imm; + break; + case BPF_SUB: + op = BPF_ADD; + imm = -imm; + break; + } + + /* Move immediate to temporary register */ + emit_mov_i(ctx, src, imm); + + switch (op) { + /* dst = dst + imm */ + case BPF_ADD: + emit(ctx, addu, lo(dst), lo(dst), src); + emit(ctx, sltu, MIPS_R_T9, lo(dst), src); + emit(ctx, addu, hi(dst), hi(dst), MIPS_R_T9); + if (imm < 0) + emit(ctx, addiu, hi(dst), hi(dst), -1); + break; + /* dst = dst - imm */ + case BPF_SUB: + emit(ctx, sltu, MIPS_R_T9, lo(dst), src); + emit(ctx, subu, lo(dst), lo(dst), src); + emit(ctx, subu, hi(dst), hi(dst), MIPS_R_T9); + if (imm < 0) + emit(ctx, addiu, hi(dst), hi(dst), 1); + break; + /* dst = dst | imm */ + case BPF_OR: + emit(ctx, or, lo(dst), lo(dst), src); + if (imm < 0) + emit(ctx, addiu, hi(dst), MIPS_R_ZERO, -1); + break; + /* dst = dst & imm */ + case BPF_AND: + emit(ctx, and, lo(dst), lo(dst), src); + if (imm >= 0) + emit(ctx, move, hi(dst), MIPS_R_ZERO); + break; + /* dst = dst ^ imm */ + case BPF_XOR: + emit(ctx, xor, lo(dst), lo(dst), src); + if (imm < 0) { + emit(ctx, subu, hi(dst), MIPS_R_ZERO, hi(dst)); + emit(ctx, addiu, hi(dst), hi(dst), -1); + } + break; + } + clobber_reg64(ctx, dst); +} + +/* ALU register operation (64-bit) */ +static void emit_alu_r64(struct jit_context *ctx, + const u8 dst[], const u8 src[], u8 op) +{ + switch (BPF_OP(op)) { + /* dst = dst + src */ + case BPF_ADD: + if (src == dst) { + emit(ctx, srl, MIPS_R_T9, lo(dst), 31); + emit(ctx, addu, lo(dst), lo(dst), lo(dst)); + } else { + emit(ctx, addu, lo(dst), lo(dst), lo(src)); + emit(ctx, sltu, MIPS_R_T9, lo(dst), lo(src)); + } + emit(ctx, addu, hi(dst), hi(dst), hi(src)); + emit(ctx, addu, hi(dst), hi(dst), MIPS_R_T9); + break; + /* dst = dst - src */ + case BPF_SUB: + emit(ctx, sltu, MIPS_R_T9, lo(dst), lo(src)); + emit(ctx, subu, lo(dst), lo(dst), lo(src)); + emit(ctx, subu, hi(dst), hi(dst), hi(src)); + emit(ctx, subu, hi(dst), hi(dst), MIPS_R_T9); + break; + /* dst = dst | src */ + case BPF_OR: + emit(ctx, or, lo(dst), lo(dst), lo(src)); + emit(ctx, or, hi(dst), hi(dst), hi(src)); + break; + /* dst = dst & src */ + case BPF_AND: + emit(ctx, and, lo(dst), lo(dst), lo(src)); + emit(ctx, and, hi(dst), hi(dst), hi(src)); + break; + /* dst = dst ^ src */ + case BPF_XOR: + emit(ctx, xor, lo(dst), lo(dst), lo(src)); + emit(ctx, xor, hi(dst), hi(dst), hi(src)); + break; + } + clobber_reg64(ctx, dst); +} + +/* ALU invert (64-bit) */ +static void emit_neg_i64(struct jit_context *ctx, const u8 dst[]) +{ + emit(ctx, sltu, MIPS_R_T9, MIPS_R_ZERO, lo(dst)); + emit(ctx, subu, lo(dst), MIPS_R_ZERO, lo(dst)); + emit(ctx, subu, hi(dst), MIPS_R_ZERO, hi(dst)); + emit(ctx, subu, hi(dst), hi(dst), MIPS_R_T9); + + clobber_reg64(ctx, dst); +} + +/* ALU shift immediate (64-bit) */ +static void emit_shift_i64(struct jit_context *ctx, + const u8 dst[], u32 imm, u8 op) +{ + switch (BPF_OP(op)) { + /* dst = dst << imm */ + case BPF_LSH: + if (imm < 32) { + emit(ctx, srl, MIPS_R_T9, lo(dst), 32 - imm); + emit(ctx, sll, lo(dst), lo(dst), imm); + emit(ctx, sll, hi(dst), hi(dst), imm); + emit(ctx, or, hi(dst), hi(dst), MIPS_R_T9); + } else { + emit(ctx, sll, hi(dst), lo(dst), imm - 32); + emit(ctx, move, lo(dst), MIPS_R_ZERO); + } + break; + /* dst = dst >> imm */ + case BPF_RSH: + if (imm < 32) { + emit(ctx, sll, MIPS_R_T9, hi(dst), 32 - imm); + emit(ctx, srl, lo(dst), lo(dst), imm); + emit(ctx, srl, hi(dst), hi(dst), imm); + emit(ctx, or, lo(dst), lo(dst), MIPS_R_T9); + } else { + emit(ctx, srl, lo(dst), hi(dst), imm - 32); + emit(ctx, move, hi(dst), MIPS_R_ZERO); + } + break; + /* dst = dst >> imm (arithmetic) */ + case BPF_ARSH: + if (imm < 32) { + emit(ctx, sll, MIPS_R_T9, hi(dst), 32 - imm); + emit(ctx, srl, lo(dst), lo(dst), imm); + emit(ctx, sra, hi(dst), hi(dst), imm); + emit(ctx, or, lo(dst), lo(dst), MIPS_R_T9); + } else { + emit(ctx, sra, lo(dst), hi(dst), imm - 32); + emit(ctx, sra, hi(dst), hi(dst), 31); + } + break; + } + clobber_reg64(ctx, dst); +} + +/* ALU shift register (64-bit) */ +static void emit_shift_r64(struct jit_context *ctx, + const u8 dst[], u8 src, u8 op) +{ + u8 t1 = MIPS_R_T8; + u8 t2 = MIPS_R_T9; + + emit(ctx, andi, t1, src, 32); /* t1 = src & 32 */ + emit(ctx, beqz, t1, 16); /* PC += 16 if t1 == 0 */ + emit(ctx, nor, t2, src, MIPS_R_ZERO); /* t2 = ~src (delay slot) */ + + switch (BPF_OP(op)) { + /* dst = dst << src */ + case BPF_LSH: + /* Next: shift >= 32 */ + emit(ctx, sllv, hi(dst), lo(dst), src); /* dh = dl << src */ + emit(ctx, move, lo(dst), MIPS_R_ZERO); /* dl = 0 */ + emit(ctx, b, 20); /* PC += 20 */ + /* +16: shift < 32 */ + emit(ctx, srl, t1, lo(dst), 1); /* t1 = dl >> 1 */ + emit(ctx, srlv, t1, t1, t2); /* t1 = t1 >> t2 */ + emit(ctx, sllv, lo(dst), lo(dst), src); /* dl = dl << src */ + emit(ctx, sllv, hi(dst), hi(dst), src); /* dh = dh << src */ + emit(ctx, or, hi(dst), hi(dst), t1); /* dh = dh | t1 */ + break; + /* dst = dst >> src */ + case BPF_RSH: + /* Next: shift >= 32 */ + emit(ctx, srlv, lo(dst), hi(dst), src); /* dl = dh >> src */ + emit(ctx, move, hi(dst), MIPS_R_ZERO); /* dh = 0 */ + emit(ctx, b, 20); /* PC += 20 */ + /* +16: shift < 32 */ + emit(ctx, sll, t1, hi(dst), 1); /* t1 = dl << 1 */ + emit(ctx, sllv, t1, t1, t2); /* t1 = t1 << t2 */ + emit(ctx, srlv, lo(dst), lo(dst), src); /* dl = dl >> src */ + emit(ctx, srlv, hi(dst), hi(dst), src); /* dh = dh >> src */ + emit(ctx, or, lo(dst), lo(dst), t1); /* dl = dl | t1 */ + break; + /* dst = dst >> src (arithmetic) */ + case BPF_ARSH: + /* Next: shift >= 32 */ + emit(ctx, srav, lo(dst), hi(dst), src); /* dl = dh >>a src */ + emit(ctx, sra, hi(dst), hi(dst), 31); /* dh = dh >>a 31 */ + emit(ctx, b, 20); /* PC += 20 */ + /* +16: shift < 32 */ + emit(ctx, sll, t1, hi(dst), 1); /* t1 = dl << 1 */ + emit(ctx, sllv, t1, t1, t2); /* t1 = t1 << t2 */ + emit(ctx, srlv, lo(dst), lo(dst), src); /* dl = dl >>a src */ + emit(ctx, srav, hi(dst), hi(dst), src); /* dh = dh >> src */ + emit(ctx, or, lo(dst), lo(dst), t1); /* dl = dl | t1 */ + break; + } + + /* +20: Done */ + clobber_reg64(ctx, dst); +} + +/* ALU mul immediate (64x32-bit) */ +static void emit_mul_i64(struct jit_context *ctx, const u8 dst[], s32 imm) +{ + u8 src = MIPS_R_T6; + u8 tmp = MIPS_R_T9; + + switch (imm) { + /* dst = dst * 1 is a no-op */ + case 1: + break; + /* dst = dst * -1 */ + case -1: + emit_neg_i64(ctx, dst); + break; + case 0: + emit_mov_r(ctx, lo(dst), MIPS_R_ZERO); + emit_mov_r(ctx, hi(dst), MIPS_R_ZERO); + break; + /* Full 64x32 multiply */ + default: + /* hi(dst) = hi(dst) * src(imm) */ + emit_mov_i(ctx, src, imm); + if (cpu_has_mips32r1 || cpu_has_mips32r6) { + emit(ctx, mul, hi(dst), hi(dst), src); + } else { + emit(ctx, multu, hi(dst), src); + emit(ctx, mflo, hi(dst)); + } + + /* hi(dst) = hi(dst) - lo(dst) */ + if (imm < 0) + emit(ctx, subu, hi(dst), hi(dst), lo(dst)); + + /* tmp = lo(dst) * src(imm) >> 32 */ + /* lo(dst) = lo(dst) * src(imm) */ + if (cpu_has_mips32r6) { + emit(ctx, muhu, tmp, lo(dst), src); + emit(ctx, mulu, lo(dst), lo(dst), src); + } else { + emit(ctx, multu, lo(dst), src); + emit(ctx, mflo, lo(dst)); + emit(ctx, mfhi, tmp); + } + + /* hi(dst) += tmp */ + emit(ctx, addu, hi(dst), hi(dst), tmp); + clobber_reg64(ctx, dst); + break; + } +} + +/* ALU mul register (64x64-bit) */ +static void emit_mul_r64(struct jit_context *ctx, + const u8 dst[], const u8 src[]) +{ + u8 acc = MIPS_R_T8; + u8 tmp = MIPS_R_T9; + + /* acc = hi(dst) * lo(src) */ + if (cpu_has_mips32r1 || cpu_has_mips32r6) { + emit(ctx, mul, acc, hi(dst), lo(src)); + } else { + emit(ctx, multu, hi(dst), lo(src)); + emit(ctx, mflo, acc); + } + + /* tmp = lo(dst) * hi(src) */ + if (cpu_has_mips32r1 || cpu_has_mips32r6) { + emit(ctx, mul, tmp, lo(dst), hi(src)); + } else { + emit(ctx, multu, lo(dst), hi(src)); + emit(ctx, mflo, tmp); + } + + /* acc += tmp */ + emit(ctx, addu, acc, acc, tmp); + + /* tmp = lo(dst) * lo(src) >> 32 */ + /* lo(dst) = lo(dst) * lo(src) */ + if (cpu_has_mips32r6) { + emit(ctx, muhu, tmp, lo(dst), lo(src)); + emit(ctx, mulu, lo(dst), lo(dst), lo(src)); + } else { + emit(ctx, multu, lo(dst), lo(src)); + emit(ctx, mflo, lo(dst)); + emit(ctx, mfhi, tmp); + } + + /* hi(dst) = acc + tmp */ + emit(ctx, addu, hi(dst), acc, tmp); + clobber_reg64(ctx, dst); +} + +/* Helper function for 64-bit modulo */ +static u64 jit_mod64(u64 a, u64 b) +{ + u64 rem; + + div64_u64_rem(a, b, &rem); + return rem; +} + +/* ALU div/mod register (64-bit) */ +static void emit_divmod_r64(struct jit_context *ctx, + const u8 dst[], const u8 src[], u8 op) +{ + const u8 *r0 = bpf2mips32[BPF_REG_0]; /* Mapped to v0-v1 */ + const u8 *r1 = bpf2mips32[BPF_REG_1]; /* Mapped to a0-a1 */ + const u8 *r2 = bpf2mips32[BPF_REG_2]; /* Mapped to a2-a3 */ + int exclude, k; + u32 addr = 0; + + /* Push caller-saved registers on stack */ + push_regs(ctx, ctx->clobbered & JIT_CALLER_REGS, + 0, JIT_RESERVED_STACK); + + /* Put 64-bit arguments 1 and 2 in registers a0-a3 */ + for (k = 0; k < 2; k++) { + emit(ctx, move, MIPS_R_T9, src[k]); + emit(ctx, move, r1[k], dst[k]); + emit(ctx, move, r2[k], MIPS_R_T9); + } + + /* Emit function call */ + switch (BPF_OP(op)) { + /* dst = dst / src */ + case BPF_DIV: + addr = (u32)&div64_u64; + break; + /* dst = dst % src */ + case BPF_MOD: + addr = (u32)&jit_mod64; + break; + } + emit_mov_i(ctx, MIPS_R_T9, addr); + emit(ctx, jalr, MIPS_R_RA, MIPS_R_T9); + emit(ctx, nop); /* Delay slot */ + + /* Store the 64-bit result in dst */ + emit(ctx, move, dst[0], r0[0]); + emit(ctx, move, dst[1], r0[1]); + + /* Restore caller-saved registers, excluding the computed result */ + exclude = BIT(lo(dst)) | BIT(hi(dst)); + pop_regs(ctx, ctx->clobbered & JIT_CALLER_REGS, + exclude, JIT_RESERVED_STACK); + emit_load_delay(ctx); + + clobber_reg64(ctx, dst); + clobber_reg(ctx, MIPS_R_V0); + clobber_reg(ctx, MIPS_R_V1); + clobber_reg(ctx, MIPS_R_RA); +} + +/* Swap bytes in a register word */ +static void emit_swap8_r(struct jit_context *ctx, u8 dst, u8 src, u8 mask) +{ + u8 tmp = MIPS_R_T9; + + emit(ctx, and, tmp, src, mask); /* tmp = src & 0x00ff00ff */ + emit(ctx, sll, tmp, tmp, 8); /* tmp = tmp << 8 */ + emit(ctx, srl, dst, src, 8); /* dst = src >> 8 */ + emit(ctx, and, dst, dst, mask); /* dst = dst & 0x00ff00ff */ + emit(ctx, or, dst, dst, tmp); /* dst = dst | tmp */ +} + +/* Swap half words in a register word */ +static void emit_swap16_r(struct jit_context *ctx, u8 dst, u8 src) +{ + u8 tmp = MIPS_R_T9; + + emit(ctx, sll, tmp, src, 16); /* tmp = src << 16 */ + emit(ctx, srl, dst, src, 16); /* dst = src >> 16 */ + emit(ctx, or, dst, dst, tmp); /* dst = dst | tmp */ +} + +/* Swap bytes and truncate a register double word, word or half word */ +static void emit_bswap_r64(struct jit_context *ctx, const u8 dst[], u32 width) +{ + u8 tmp = MIPS_R_T8; + + switch (width) { + /* Swap bytes in a double word */ + case 64: + if (cpu_has_mips32r2 || cpu_has_mips32r6) { + emit(ctx, rotr, tmp, hi(dst), 16); + emit(ctx, rotr, hi(dst), lo(dst), 16); + emit(ctx, wsbh, lo(dst), tmp); + emit(ctx, wsbh, hi(dst), hi(dst)); + } else { + emit_swap16_r(ctx, tmp, lo(dst)); + emit_swap16_r(ctx, lo(dst), hi(dst)); + emit(ctx, move, hi(dst), tmp); + + emit(ctx, lui, tmp, 0xff); /* tmp = 0x00ff0000 */ + emit(ctx, ori, tmp, tmp, 0xff); /* tmp = 0x00ff00ff */ + emit_swap8_r(ctx, lo(dst), lo(dst), tmp); + emit_swap8_r(ctx, hi(dst), hi(dst), tmp); + } + break; + /* Swap bytes in a word */ + /* Swap bytes in a half word */ + case 32: + case 16: + emit_bswap_r(ctx, lo(dst), width); + emit(ctx, move, hi(dst), MIPS_R_ZERO); + break; + } + clobber_reg64(ctx, dst); +} + +/* Truncate a register double word, word or half word */ +static void emit_trunc_r64(struct jit_context *ctx, const u8 dst[], u32 width) +{ + switch (width) { + case 64: + break; + /* Zero-extend a word */ + case 32: + emit(ctx, move, hi(dst), MIPS_R_ZERO); + clobber_reg(ctx, hi(dst)); + break; + /* Zero-extend a half word */ + case 16: + emit(ctx, move, hi(dst), MIPS_R_ZERO); + emit(ctx, andi, lo(dst), lo(dst), 0xffff); + clobber_reg64(ctx, dst); + break; + } +} + +/* Load operation: dst = *(size*)(src + off) */ +static void emit_ldx(struct jit_context *ctx, + const u8 dst[], u8 src, s16 off, u8 size) +{ + switch (size) { + /* Load a byte */ + case BPF_B: + emit(ctx, lbu, lo(dst), off, src); + emit(ctx, move, hi(dst), MIPS_R_ZERO); + break; + /* Load a half word */ + case BPF_H: + emit(ctx, lhu, lo(dst), off, src); + emit(ctx, move, hi(dst), MIPS_R_ZERO); + break; + /* Load a word */ + case BPF_W: + emit(ctx, lw, lo(dst), off, src); + emit(ctx, move, hi(dst), MIPS_R_ZERO); + break; + /* Load a double word */ + case BPF_DW: + if (dst[1] == src) { + emit(ctx, lw, dst[0], off + 4, src); + emit(ctx, lw, dst[1], off, src); + } else { + emit(ctx, lw, dst[1], off, src); + emit(ctx, lw, dst[0], off + 4, src); + } + emit_load_delay(ctx); + break; + } + clobber_reg64(ctx, dst); +} + +/* Store operation: *(size *)(dst + off) = src */ +static void emit_stx(struct jit_context *ctx, + const u8 dst, const u8 src[], s16 off, u8 size) +{ + switch (size) { + /* Store a byte */ + case BPF_B: + emit(ctx, sb, lo(src), off, dst); + break; + /* Store a half word */ + case BPF_H: + emit(ctx, sh, lo(src), off, dst); + break; + /* Store a word */ + case BPF_W: + emit(ctx, sw, lo(src), off, dst); + break; + /* Store a double word */ + case BPF_DW: + emit(ctx, sw, src[1], off, dst); + emit(ctx, sw, src[0], off + 4, dst); + break; + } +} + +/* Atomic read-modify-write (32-bit, non-ll/sc fallback) */ +static void emit_atomic_r32(struct jit_context *ctx, + u8 dst, u8 src, s16 off, u8 code) +{ + u32 exclude = 0; + u32 addr = 0; + + /* Push caller-saved registers on stack */ + push_regs(ctx, ctx->clobbered & JIT_CALLER_REGS, + 0, JIT_RESERVED_STACK); + /* + * Argument 1: dst+off if xchg, otherwise src, passed in register a0 + * Argument 2: src if xchg, othersize dst+off, passed in register a1 + */ + emit(ctx, move, MIPS_R_T9, dst); + if (code == BPF_XCHG) { + emit(ctx, move, MIPS_R_A1, src); + emit(ctx, addiu, MIPS_R_A0, MIPS_R_T9, off); + } else { + emit(ctx, move, MIPS_R_A0, src); + emit(ctx, addiu, MIPS_R_A1, MIPS_R_T9, off); + } + + /* Emit function call */ + switch (code) { + case BPF_ADD: + addr = (u32)&atomic_add; + break; + case BPF_ADD | BPF_FETCH: + addr = (u32)&atomic_fetch_add; + break; + case BPF_SUB: + addr = (u32)&atomic_sub; + break; + case BPF_SUB | BPF_FETCH: + addr = (u32)&atomic_fetch_sub; + break; + case BPF_OR: + addr = (u32)&atomic_or; + break; + case BPF_OR | BPF_FETCH: + addr = (u32)&atomic_fetch_or; + break; + case BPF_AND: + addr = (u32)&atomic_and; + break; + case BPF_AND | BPF_FETCH: + addr = (u32)&atomic_fetch_and; + break; + case BPF_XOR: + addr = (u32)&atomic_xor; + break; + case BPF_XOR | BPF_FETCH: + addr = (u32)&atomic_fetch_xor; + break; + case BPF_XCHG: + addr = (u32)&atomic_xchg; + break; + } + emit_mov_i(ctx, MIPS_R_T9, addr); + emit(ctx, jalr, MIPS_R_RA, MIPS_R_T9); + emit(ctx, nop); /* Delay slot */ + + /* Update src register with old value, if specified */ + if (code & BPF_FETCH) { + emit(ctx, move, src, MIPS_R_V0); + exclude = BIT(src); + clobber_reg(ctx, src); + } + + /* Restore caller-saved registers, except any fetched value */ + pop_regs(ctx, ctx->clobbered & JIT_CALLER_REGS, + exclude, JIT_RESERVED_STACK); + emit_load_delay(ctx); + clobber_reg(ctx, MIPS_R_RA); +} + +/* Helper function for 64-bit atomic exchange */ +static s64 jit_xchg64(s64 a, atomic64_t *v) +{ + return atomic64_xchg(v, a); +} + +/* Atomic read-modify-write (64-bit) */ +static void emit_atomic_r64(struct jit_context *ctx, + u8 dst, const u8 src[], s16 off, u8 code) +{ + const u8 *r0 = bpf2mips32[BPF_REG_0]; /* Mapped to v0-v1 */ + const u8 *r1 = bpf2mips32[BPF_REG_1]; /* Mapped to a0-a1 */ + u32 exclude = 0; + u32 addr = 0; + + /* Push caller-saved registers on stack */ + push_regs(ctx, ctx->clobbered & JIT_CALLER_REGS, + 0, JIT_RESERVED_STACK); + /* + * Argument 1: 64-bit src, passed in registers a0-a1 + * Argument 2: 32-bit dst+off, passed in register a2 + */ + emit(ctx, move, MIPS_R_T9, dst); + emit(ctx, move, r1[0], src[0]); + emit(ctx, move, r1[1], src[1]); + emit(ctx, addiu, MIPS_R_A2, MIPS_R_T9, off); + + /* Emit function call */ + switch (code) { + case BPF_ADD: + addr = (u32)&atomic64_add; + break; + case BPF_ADD | BPF_FETCH: + addr = (u32)&atomic64_fetch_add; + break; + case BPF_SUB: + addr = (u32)&atomic64_sub; + break; + case BPF_SUB | BPF_FETCH: + addr = (u32)&atomic64_fetch_sub; + break; + case BPF_OR: + addr = (u32)&atomic64_or; + break; + case BPF_OR | BPF_FETCH: + addr = (u32)&atomic64_fetch_or; + break; + case BPF_AND: + addr = (u32)&atomic64_and; + break; + case BPF_AND | BPF_FETCH: + addr = (u32)&atomic64_fetch_and; + break; + case BPF_XOR: + addr = (u32)&atomic64_xor; + break; + case BPF_XOR | BPF_FETCH: + addr = (u32)&atomic64_fetch_xor; + break; + case BPF_XCHG: + addr = (u32)&jit_xchg64; + break; + } + emit_mov_i(ctx, MIPS_R_T9, addr); + emit(ctx, jalr, MIPS_R_RA, MIPS_R_T9); + emit(ctx, nop); /* Delay slot */ + + /* Update src register with old value, if specified */ + if (code & BPF_FETCH) { + emit(ctx, move, lo(src), lo(r0)); + emit(ctx, move, hi(src), hi(r0)); + exclude = BIT(src[0]) | BIT(src[1]); + clobber_reg64(ctx, src); + } + + /* Restore caller-saved registers, except any fetched value */ + pop_regs(ctx, ctx->clobbered & JIT_CALLER_REGS, + exclude, JIT_RESERVED_STACK); + emit_load_delay(ctx); + clobber_reg(ctx, MIPS_R_RA); +} + +/* Atomic compare-and-exchange (32-bit, non-ll/sc fallback) */ +static void emit_cmpxchg_r32(struct jit_context *ctx, u8 dst, u8 src, s16 off) +{ + const u8 *r0 = bpf2mips32[BPF_REG_0]; + + /* Push caller-saved registers on stack */ + push_regs(ctx, ctx->clobbered & JIT_CALLER_REGS, + JIT_RETURN_REGS, JIT_RESERVED_STACK + 2 * sizeof(u32)); + /* + * Argument 1: 32-bit dst+off, passed in register a0 + * Argument 2: 32-bit r0, passed in register a1 + * Argument 3: 32-bit src, passed in register a2 + */ + emit(ctx, addiu, MIPS_R_T9, dst, off); + emit(ctx, move, MIPS_R_T8, src); + emit(ctx, move, MIPS_R_A1, lo(r0)); + emit(ctx, move, MIPS_R_A0, MIPS_R_T9); + emit(ctx, move, MIPS_R_A2, MIPS_R_T8); + + /* Emit function call */ + emit_mov_i(ctx, MIPS_R_T9, (u32)&atomic_cmpxchg); + emit(ctx, jalr, MIPS_R_RA, MIPS_R_T9); + emit(ctx, nop); /* Delay slot */ + +#ifdef __BIG_ENDIAN + emit(ctx, move, lo(r0), MIPS_R_V0); +#endif + /* Restore caller-saved registers, except the return value */ + pop_regs(ctx, ctx->clobbered & JIT_CALLER_REGS, + JIT_RETURN_REGS, JIT_RESERVED_STACK + 2 * sizeof(u32)); + emit_load_delay(ctx); + clobber_reg(ctx, MIPS_R_V0); + clobber_reg(ctx, MIPS_R_V1); + clobber_reg(ctx, MIPS_R_RA); +} + +/* Atomic compare-and-exchange (64-bit) */ +static void emit_cmpxchg_r64(struct jit_context *ctx, + u8 dst, const u8 src[], s16 off) +{ + const u8 *r0 = bpf2mips32[BPF_REG_0]; + const u8 *r2 = bpf2mips32[BPF_REG_2]; + + /* Push caller-saved registers on stack */ + push_regs(ctx, ctx->clobbered & JIT_CALLER_REGS, + JIT_RETURN_REGS, JIT_RESERVED_STACK + 2 * sizeof(u32)); + /* + * Argument 1: 32-bit dst+off, passed in register a0 (a1 unused) + * Argument 2: 64-bit r0, passed in registers a2-a3 + * Argument 3: 64-bit src, passed on stack + */ + push_regs(ctx, BIT(src[0]) | BIT(src[1]), 0, JIT_RESERVED_STACK); + emit(ctx, addiu, MIPS_R_T9, dst, off); + emit(ctx, move, r2[0], r0[0]); + emit(ctx, move, r2[1], r0[1]); + emit(ctx, move, MIPS_R_A0, MIPS_R_T9); + + /* Emit function call */ + emit_mov_i(ctx, MIPS_R_T9, (u32)&atomic64_cmpxchg); + emit(ctx, jalr, MIPS_R_RA, MIPS_R_T9); + emit(ctx, nop); /* Delay slot */ + + /* Restore caller-saved registers, except the return value */ + pop_regs(ctx, ctx->clobbered & JIT_CALLER_REGS, + JIT_RETURN_REGS, JIT_RESERVED_STACK + 2 * sizeof(u32)); + emit_load_delay(ctx); + clobber_reg(ctx, MIPS_R_V0); + clobber_reg(ctx, MIPS_R_V1); + clobber_reg(ctx, MIPS_R_RA); +} + +/* + * Conditional movz or an emulated equivalent. + * Note that the rs register may be modified. + */ +static void emit_movz_r(struct jit_context *ctx, u8 rd, u8 rs, u8 rt) +{ + if (cpu_has_mips_2) { + emit(ctx, movz, rd, rs, rt); /* rd = rt ? rd : rs */ + } else if (cpu_has_mips32r6) { + if (rs != MIPS_R_ZERO) + emit(ctx, seleqz, rs, rs, rt); /* rs = 0 if rt == 0 */ + emit(ctx, selnez, rd, rd, rt); /* rd = 0 if rt != 0 */ + if (rs != MIPS_R_ZERO) + emit(ctx, or, rd, rd, rs); /* rd = rd | rs */ + } else { + emit(ctx, bnez, rt, 8); /* PC += 8 if rd != 0 */ + emit(ctx, nop); /* +0: delay slot */ + emit(ctx, or, rd, rs, MIPS_R_ZERO); /* +4: rd = rs */ + } + clobber_reg(ctx, rd); + clobber_reg(ctx, rs); +} + +/* + * Conditional movn or an emulated equivalent. + * Note that the rs register may be modified. + */ +static void emit_movn_r(struct jit_context *ctx, u8 rd, u8 rs, u8 rt) +{ + if (cpu_has_mips_2) { + emit(ctx, movn, rd, rs, rt); /* rd = rt ? rs : rd */ + } else if (cpu_has_mips32r6) { + if (rs != MIPS_R_ZERO) + emit(ctx, selnez, rs, rs, rt); /* rs = 0 if rt == 0 */ + emit(ctx, seleqz, rd, rd, rt); /* rd = 0 if rt != 0 */ + if (rs != MIPS_R_ZERO) + emit(ctx, or, rd, rd, rs); /* rd = rd | rs */ + } else { + emit(ctx, beqz, rt, 8); /* PC += 8 if rd == 0 */ + emit(ctx, nop); /* +0: delay slot */ + emit(ctx, or, rd, rs, MIPS_R_ZERO); /* +4: rd = rs */ + } + clobber_reg(ctx, rd); + clobber_reg(ctx, rs); +} + +/* Emulation of 64-bit sltiu rd, rs, imm, where imm may be S32_MAX + 1 */ +static void emit_sltiu_r64(struct jit_context *ctx, u8 rd, + const u8 rs[], s64 imm) +{ + u8 tmp = MIPS_R_T9; + + if (imm < 0) { + emit_mov_i(ctx, rd, imm); /* rd = imm */ + emit(ctx, sltu, rd, lo(rs), rd); /* rd = rsl < rd */ + emit(ctx, sltiu, tmp, hi(rs), -1); /* tmp = rsh < ~0U */ + emit(ctx, or, rd, rd, tmp); /* rd = rd | tmp */ + } else { /* imm >= 0 */ + if (imm > 0x7fff) { + emit_mov_i(ctx, rd, (s32)imm); /* rd = imm */ + emit(ctx, sltu, rd, lo(rs), rd); /* rd = rsl < rd */ + } else { + emit(ctx, sltiu, rd, lo(rs), imm); /* rd = rsl < imm */ + } + emit_movn_r(ctx, rd, MIPS_R_ZERO, hi(rs)); /* rd = 0 if rsh */ + } +} + +/* Emulation of 64-bit sltu rd, rs, rt */ +static void emit_sltu_r64(struct jit_context *ctx, u8 rd, + const u8 rs[], const u8 rt[]) +{ + u8 tmp = MIPS_R_T9; + + emit(ctx, sltu, rd, lo(rs), lo(rt)); /* rd = rsl < rtl */ + emit(ctx, subu, tmp, hi(rs), hi(rt)); /* tmp = rsh - rth */ + emit_movn_r(ctx, rd, MIPS_R_ZERO, tmp); /* rd = 0 if tmp != 0 */ + emit(ctx, sltu, tmp, hi(rs), hi(rt)); /* tmp = rsh < rth */ + emit(ctx, or, rd, rd, tmp); /* rd = rd | tmp */ +} + +/* Emulation of 64-bit slti rd, rs, imm, where imm may be S32_MAX + 1 */ +static void emit_slti_r64(struct jit_context *ctx, u8 rd, + const u8 rs[], s64 imm) +{ + u8 t1 = MIPS_R_T8; + u8 t2 = MIPS_R_T9; + u8 cmp; + + /* + * if ((rs < 0) ^ (imm < 0)) t1 = imm >u rsl + * else t1 = rsl <u imm + */ + emit_mov_i(ctx, rd, (s32)imm); + emit(ctx, sltu, t1, lo(rs), rd); /* t1 = rsl <u imm */ + emit(ctx, sltu, t2, rd, lo(rs)); /* t2 = imm <u rsl */ + emit(ctx, srl, rd, hi(rs), 31); /* rd = rsh >> 31 */ + if (imm < 0) + emit_movz_r(ctx, t1, t2, rd); /* t1 = rd ? t1 : t2 */ + else + emit_movn_r(ctx, t1, t2, rd); /* t1 = rd ? t2 : t1 */ + /* + * if ((imm < 0 && rsh != 0xffffffff) || + * (imm >= 0 && rsh != 0)) + * t1 = 0 + */ + if (imm < 0) { + emit(ctx, addiu, rd, hi(rs), 1); /* rd = rsh + 1 */ + cmp = rd; + } else { /* imm >= 0 */ + cmp = hi(rs); + } + emit_movn_r(ctx, t1, MIPS_R_ZERO, cmp); /* t1 = 0 if cmp != 0 */ + + /* + * if (imm < 0) rd = rsh < -1 + * else rd = rsh != 0 + * rd = rd | t1 + */ + emit(ctx, slti, rd, hi(rs), imm < 0 ? -1 : 0); /* rd = rsh < hi(imm) */ + emit(ctx, or, rd, rd, t1); /* rd = rd | t1 */ +} + +/* Emulation of 64-bit(slt rd, rs, rt) */ +static void emit_slt_r64(struct jit_context *ctx, u8 rd, + const u8 rs[], const u8 rt[]) +{ + u8 t1 = MIPS_R_T7; + u8 t2 = MIPS_R_T8; + u8 t3 = MIPS_R_T9; + + /* + * if ((rs < 0) ^ (rt < 0)) t1 = rtl <u rsl + * else t1 = rsl <u rtl + * if (rsh == rth) t1 = 0 + */ + emit(ctx, sltu, t1, lo(rs), lo(rt)); /* t1 = rsl <u rtl */ + emit(ctx, sltu, t2, lo(rt), lo(rs)); /* t2 = rtl <u rsl */ + emit(ctx, xor, t3, hi(rs), hi(rt)); /* t3 = rlh ^ rth */ + emit(ctx, srl, rd, t3, 31); /* rd = t3 >> 31 */ + emit_movn_r(ctx, t1, t2, rd); /* t1 = rd ? t2 : t1 */ + emit_movn_r(ctx, t1, MIPS_R_ZERO, t3); /* t1 = 0 if t3 != 0 */ + + /* rd = (rsh < rth) | t1 */ + emit(ctx, slt, rd, hi(rs), hi(rt)); /* rd = rsh <s rth */ + emit(ctx, or, rd, rd, t1); /* rd = rd | t1 */ +} + +/* Jump immediate (64-bit) */ +static void emit_jmp_i64(struct jit_context *ctx, + const u8 dst[], s32 imm, s32 off, u8 op) +{ + u8 tmp = MIPS_R_T6; + + switch (op) { + /* No-op, used internally for branch optimization */ + case JIT_JNOP: + break; + /* PC += off if dst == imm */ + /* PC += off if dst != imm */ + case BPF_JEQ: + case BPF_JNE: + if (imm >= -0x7fff && imm <= 0x8000) { + emit(ctx, addiu, tmp, lo(dst), -imm); + } else if ((u32)imm <= 0xffff) { + emit(ctx, xori, tmp, lo(dst), imm); + } else { /* Register fallback */ + emit_mov_i(ctx, tmp, imm); + emit(ctx, xor, tmp, lo(dst), tmp); + } + if (imm < 0) { /* Compare sign extension */ + emit(ctx, addu, MIPS_R_T9, hi(dst), 1); + emit(ctx, or, tmp, tmp, MIPS_R_T9); + } else { /* Compare zero extension */ + emit(ctx, or, tmp, tmp, hi(dst)); + } + if (op == BPF_JEQ) + emit(ctx, beqz, tmp, off); + else /* BPF_JNE */ + emit(ctx, bnez, tmp, off); + break; + /* PC += off if dst & imm */ + /* PC += off if (dst & imm) == 0 (not in BPF, used for long jumps) */ + case BPF_JSET: + case JIT_JNSET: + if ((u32)imm <= 0xffff) { + emit(ctx, andi, tmp, lo(dst), imm); + } else { /* Register fallback */ + emit_mov_i(ctx, tmp, imm); + emit(ctx, and, tmp, lo(dst), tmp); + } + if (imm < 0) /* Sign-extension pulls in high word */ + emit(ctx, or, tmp, tmp, hi(dst)); + if (op == BPF_JSET) + emit(ctx, bnez, tmp, off); + else /* JIT_JNSET */ + emit(ctx, beqz, tmp, off); + break; + /* PC += off if dst > imm */ + case BPF_JGT: + emit_sltiu_r64(ctx, tmp, dst, (s64)imm + 1); + emit(ctx, beqz, tmp, off); + break; + /* PC += off if dst >= imm */ + case BPF_JGE: + emit_sltiu_r64(ctx, tmp, dst, imm); + emit(ctx, beqz, tmp, off); + break; + /* PC += off if dst < imm */ + case BPF_JLT: + emit_sltiu_r64(ctx, tmp, dst, imm); + emit(ctx, bnez, tmp, off); + break; + /* PC += off if dst <= imm */ + case BPF_JLE: + emit_sltiu_r64(ctx, tmp, dst, (s64)imm + 1); + emit(ctx, bnez, tmp, off); + break; + /* PC += off if dst > imm (signed) */ + case BPF_JSGT: + emit_slti_r64(ctx, tmp, dst, (s64)imm + 1); + emit(ctx, beqz, tmp, off); + break; + /* PC += off if dst >= imm (signed) */ + case BPF_JSGE: + emit_slti_r64(ctx, tmp, dst, imm); + emit(ctx, beqz, tmp, off); + break; + /* PC += off if dst < imm (signed) */ + case BPF_JSLT: + emit_slti_r64(ctx, tmp, dst, imm); + emit(ctx, bnez, tmp, off); + break; + /* PC += off if dst <= imm (signed) */ + case BPF_JSLE: + emit_slti_r64(ctx, tmp, dst, (s64)imm + 1); + emit(ctx, bnez, tmp, off); + break; + } +} + +/* Jump register (64-bit) */ +static void emit_jmp_r64(struct jit_context *ctx, + const u8 dst[], const u8 src[], s32 off, u8 op) +{ + u8 t1 = MIPS_R_T6; + u8 t2 = MIPS_R_T7; + + switch (op) { + /* No-op, used internally for branch optimization */ + case JIT_JNOP: + break; + /* PC += off if dst == src */ + /* PC += off if dst != src */ + case BPF_JEQ: + case BPF_JNE: + emit(ctx, subu, t1, lo(dst), lo(src)); + emit(ctx, subu, t2, hi(dst), hi(src)); + emit(ctx, or, t1, t1, t2); + if (op == BPF_JEQ) + emit(ctx, beqz, t1, off); + else /* BPF_JNE */ + emit(ctx, bnez, t1, off); + break; + /* PC += off if dst & src */ + /* PC += off if (dst & imm) == 0 (not in BPF, used for long jumps) */ + case BPF_JSET: + case JIT_JNSET: + emit(ctx, and, t1, lo(dst), lo(src)); + emit(ctx, and, t2, hi(dst), hi(src)); + emit(ctx, or, t1, t1, t2); + if (op == BPF_JSET) + emit(ctx, bnez, t1, off); + else /* JIT_JNSET */ + emit(ctx, beqz, t1, off); + break; + /* PC += off if dst > src */ + case BPF_JGT: + emit_sltu_r64(ctx, t1, src, dst); + emit(ctx, bnez, t1, off); + break; + /* PC += off if dst >= src */ + case BPF_JGE: + emit_sltu_r64(ctx, t1, dst, src); + emit(ctx, beqz, t1, off); + break; + /* PC += off if dst < src */ + case BPF_JLT: + emit_sltu_r64(ctx, t1, dst, src); + emit(ctx, bnez, t1, off); + break; + /* PC += off if dst <= src */ + case BPF_JLE: + emit_sltu_r64(ctx, t1, src, dst); + emit(ctx, beqz, t1, off); + break; + /* PC += off if dst > src (signed) */ + case BPF_JSGT: + emit_slt_r64(ctx, t1, src, dst); + emit(ctx, bnez, t1, off); + break; + /* PC += off if dst >= src (signed) */ + case BPF_JSGE: + emit_slt_r64(ctx, t1, dst, src); + emit(ctx, beqz, t1, off); + break; + /* PC += off if dst < src (signed) */ + case BPF_JSLT: + emit_slt_r64(ctx, t1, dst, src); + emit(ctx, bnez, t1, off); + break; + /* PC += off if dst <= src (signed) */ + case BPF_JSLE: + emit_slt_r64(ctx, t1, src, dst); + emit(ctx, beqz, t1, off); + break; + } +} + +/* Function call */ +static int emit_call(struct jit_context *ctx, const struct bpf_insn *insn) +{ + bool fixed; + u64 addr; + + /* Decode the call address */ + if (bpf_jit_get_func_addr(ctx->program, insn, false, + &addr, &fixed) < 0) + return -1; + if (!fixed) + return -1; + + /* Push stack arguments */ + push_regs(ctx, JIT_STACK_REGS, 0, JIT_RESERVED_STACK); + + /* Emit function call */ + emit_mov_i(ctx, MIPS_R_T9, addr); + emit(ctx, jalr, MIPS_R_RA, MIPS_R_T9); + emit(ctx, nop); /* Delay slot */ + + clobber_reg(ctx, MIPS_R_RA); + clobber_reg(ctx, MIPS_R_V0); + clobber_reg(ctx, MIPS_R_V1); + return 0; +} + +/* Function tail call */ +static int emit_tail_call(struct jit_context *ctx) +{ + u8 ary = lo(bpf2mips32[BPF_REG_2]); + u8 ind = lo(bpf2mips32[BPF_REG_3]); + u8 t1 = MIPS_R_T8; + u8 t2 = MIPS_R_T9; + int off; + + /* + * Tail call: + * eBPF R1 - function argument (context ptr), passed in a0-a1 + * eBPF R2 - ptr to object with array of function entry points + * eBPF R3 - array index of function to be called + * stack[sz] - remaining tail call count, initialized in prologue + */ + + /* if (ind >= ary->map.max_entries) goto out */ + off = offsetof(struct bpf_array, map.max_entries); + if (off > 0x7fff) + return -1; + emit(ctx, lw, t1, off, ary); /* t1 = ary->map.max_entries*/ + emit_load_delay(ctx); /* Load delay slot */ + emit(ctx, sltu, t1, ind, t1); /* t1 = ind < t1 */ + emit(ctx, beqz, t1, get_offset(ctx, 1)); /* PC += off(1) if t1 == 0 */ + /* (next insn delay slot) */ + /* if (TCC-- <= 0) goto out */ + emit(ctx, lw, t2, ctx->stack_size, MIPS_R_SP); /* t2 = *(SP + size) */ + emit_load_delay(ctx); /* Load delay slot */ + emit(ctx, blez, t2, get_offset(ctx, 1)); /* PC += off(1) if t2 <= 0 */ + emit(ctx, addiu, t2, t2, -1); /* t2-- (delay slot) */ + emit(ctx, sw, t2, ctx->stack_size, MIPS_R_SP); /* *(SP + size) = t2 */ + + /* prog = ary->ptrs[ind] */ + off = offsetof(struct bpf_array, ptrs); + if (off > 0x7fff) + return -1; + emit(ctx, sll, t1, ind, 2); /* t1 = ind << 2 */ + emit(ctx, addu, t1, t1, ary); /* t1 += ary */ + emit(ctx, lw, t2, off, t1); /* t2 = *(t1 + off) */ + emit_load_delay(ctx); /* Load delay slot */ + + /* if (prog == 0) goto out */ + emit(ctx, beqz, t2, get_offset(ctx, 1)); /* PC += off(1) if t2 == 0 */ + emit(ctx, nop); /* Delay slot */ + + /* func = prog->bpf_func + 8 (prologue skip offset) */ + off = offsetof(struct bpf_prog, bpf_func); + if (off > 0x7fff) + return -1; + emit(ctx, lw, t1, off, t2); /* t1 = *(t2 + off) */ + emit_load_delay(ctx); /* Load delay slot */ + emit(ctx, addiu, t1, t1, JIT_TCALL_SKIP); /* t1 += skip (8 or 12) */ + + /* goto func */ + build_epilogue(ctx, t1); + return 0; +} + +/* + * Stack frame layout for a JITed program (stack grows down). + * + * Higher address : Caller's stack frame : + * :----------------------------: + * : 64-bit eBPF args r3-r5 : + * :----------------------------: + * : Reserved / tail call count : + * +============================+ <--- MIPS sp before call + * | Callee-saved registers, | + * | including RA and FP | + * +----------------------------+ <--- eBPF FP (MIPS zero,fp) + * | Local eBPF variables | + * | allocated by program | + * +----------------------------+ + * | Reserved for caller-saved | + * | registers | + * +----------------------------+ + * | Reserved for 64-bit eBPF | + * | args r3-r5 & args passed | + * | on stack in kernel calls | + * Lower address +============================+ <--- MIPS sp + */ + +/* Build program prologue to set up the stack and registers */ +void build_prologue(struct jit_context *ctx) +{ + const u8 *r1 = bpf2mips32[BPF_REG_1]; + const u8 *fp = bpf2mips32[BPF_REG_FP]; + int stack, saved, locals, reserved; + + /* + * The first two instructions initialize TCC in the reserved (for us) + * 16-byte area in the parent's stack frame. On a tail call, the + * calling function jumps into the prologue after these instructions. + */ + emit(ctx, ori, MIPS_R_T9, MIPS_R_ZERO, + min(MAX_TAIL_CALL_CNT + 1, 0xffff)); + emit(ctx, sw, MIPS_R_T9, 0, MIPS_R_SP); + + /* + * Register eBPF R1 contains the 32-bit context pointer argument. + * A 32-bit argument is always passed in MIPS register a0, regardless + * of CPU endianness. Initialize R1 accordingly and zero-extend. + */ +#ifdef __BIG_ENDIAN + emit(ctx, move, lo(r1), MIPS_R_A0); +#endif + + /* === Entry-point for tail calls === */ + + /* Zero-extend the 32-bit argument */ + emit(ctx, move, hi(r1), MIPS_R_ZERO); + + /* If the eBPF frame pointer was accessed it must be saved */ + if (ctx->accessed & BIT(BPF_REG_FP)) + clobber_reg64(ctx, fp); + + /* Compute the stack space needed for callee-saved registers */ + saved = hweight32(ctx->clobbered & JIT_CALLEE_REGS) * sizeof(u32); + saved = ALIGN(saved, MIPS_STACK_ALIGNMENT); + + /* Stack space used by eBPF program local data */ + locals = ALIGN(ctx->program->aux->stack_depth, MIPS_STACK_ALIGNMENT); + + /* + * If we are emitting function calls, reserve extra stack space for + * caller-saved registers and function arguments passed on the stack. + * The required space is computed automatically during resource + * usage discovery (pass 1). + */ + reserved = ctx->stack_used; + + /* Allocate the stack frame */ + stack = ALIGN(saved + locals + reserved, MIPS_STACK_ALIGNMENT); + emit(ctx, addiu, MIPS_R_SP, MIPS_R_SP, -stack); + + /* Store callee-saved registers on stack */ + push_regs(ctx, ctx->clobbered & JIT_CALLEE_REGS, 0, stack - saved); + + /* Initialize the eBPF frame pointer if accessed */ + if (ctx->accessed & BIT(BPF_REG_FP)) + emit(ctx, addiu, lo(fp), MIPS_R_SP, stack - saved); + + ctx->saved_size = saved; + ctx->stack_size = stack; +} + +/* Build the program epilogue to restore the stack and registers */ +void build_epilogue(struct jit_context *ctx, int dest_reg) +{ + /* Restore callee-saved registers from stack */ + pop_regs(ctx, ctx->clobbered & JIT_CALLEE_REGS, 0, + ctx->stack_size - ctx->saved_size); + /* + * A 32-bit return value is always passed in MIPS register v0, + * but on big-endian targets the low part of R0 is mapped to v1. + */ +#ifdef __BIG_ENDIAN + emit(ctx, move, MIPS_R_V0, MIPS_R_V1); +#endif + + /* Jump to the return address and adjust the stack pointer */ + emit(ctx, jr, dest_reg); + emit(ctx, addiu, MIPS_R_SP, MIPS_R_SP, ctx->stack_size); +} + +/* Build one eBPF instruction */ +int build_insn(const struct bpf_insn *insn, struct jit_context *ctx) +{ + const u8 *dst = bpf2mips32[insn->dst_reg]; + const u8 *src = bpf2mips32[insn->src_reg]; + const u8 *res = bpf2mips32[BPF_REG_0]; + const u8 *tmp = bpf2mips32[JIT_REG_TMP]; + u8 code = insn->code; + s16 off = insn->off; + s32 imm = insn->imm; + s32 val, rel; + u8 alu, jmp; + + switch (code) { + /* ALU operations */ + /* dst = imm */ + case BPF_ALU | BPF_MOV | BPF_K: + emit_mov_i(ctx, lo(dst), imm); + emit_zext_ver(ctx, dst); + break; + /* dst = src */ + case BPF_ALU | BPF_MOV | BPF_X: + if (imm == 1) { + /* Special mov32 for zext */ + emit_mov_i(ctx, hi(dst), 0); + } else { + emit_mov_r(ctx, lo(dst), lo(src)); + emit_zext_ver(ctx, dst); + } + break; + /* dst = -dst */ + case BPF_ALU | BPF_NEG: + emit_alu_i(ctx, lo(dst), 0, BPF_NEG); + emit_zext_ver(ctx, dst); + break; + /* dst = dst & imm */ + /* dst = dst | imm */ + /* dst = dst ^ imm */ + /* dst = dst << imm */ + /* dst = dst >> imm */ + /* dst = dst >> imm (arithmetic) */ + /* dst = dst + imm */ + /* dst = dst - imm */ + /* dst = dst * imm */ + /* dst = dst / imm */ + /* dst = dst % imm */ + case BPF_ALU | BPF_OR | BPF_K: + case BPF_ALU | BPF_AND | BPF_K: + case BPF_ALU | BPF_XOR | BPF_K: + case BPF_ALU | BPF_LSH | BPF_K: + case BPF_ALU | BPF_RSH | BPF_K: + case BPF_ALU | BPF_ARSH | BPF_K: + case BPF_ALU | BPF_ADD | BPF_K: + case BPF_ALU | BPF_SUB | BPF_K: + case BPF_ALU | BPF_MUL | BPF_K: + case BPF_ALU | BPF_DIV | BPF_K: + case BPF_ALU | BPF_MOD | BPF_K: + if (!valid_alu_i(BPF_OP(code), imm)) { + emit_mov_i(ctx, MIPS_R_T6, imm); + emit_alu_r(ctx, lo(dst), MIPS_R_T6, BPF_OP(code)); + } else if (rewrite_alu_i(BPF_OP(code), imm, &alu, &val)) { + emit_alu_i(ctx, lo(dst), val, alu); + } + emit_zext_ver(ctx, dst); + break; + /* dst = dst & src */ + /* dst = dst | src */ + /* dst = dst ^ src */ + /* dst = dst << src */ + /* dst = dst >> src */ + /* dst = dst >> src (arithmetic) */ + /* dst = dst + src */ + /* dst = dst - src */ + /* dst = dst * src */ + /* dst = dst / src */ + /* dst = dst % src */ + case BPF_ALU | BPF_AND | BPF_X: + case BPF_ALU | BPF_OR | BPF_X: + case BPF_ALU | BPF_XOR | BPF_X: + case BPF_ALU | BPF_LSH | BPF_X: + case BPF_ALU | BPF_RSH | BPF_X: + case BPF_ALU | BPF_ARSH | BPF_X: + case BPF_ALU | BPF_ADD | BPF_X: + case BPF_ALU | BPF_SUB | BPF_X: + case BPF_ALU | BPF_MUL | BPF_X: + case BPF_ALU | BPF_DIV | BPF_X: + case BPF_ALU | BPF_MOD | BPF_X: + emit_alu_r(ctx, lo(dst), lo(src), BPF_OP(code)); + emit_zext_ver(ctx, dst); + break; + /* dst = imm (64-bit) */ + case BPF_ALU64 | BPF_MOV | BPF_K: + emit_mov_se_i64(ctx, dst, imm); + break; + /* dst = src (64-bit) */ + case BPF_ALU64 | BPF_MOV | BPF_X: + emit_mov_r(ctx, lo(dst), lo(src)); + emit_mov_r(ctx, hi(dst), hi(src)); + break; + /* dst = -dst (64-bit) */ + case BPF_ALU64 | BPF_NEG: + emit_neg_i64(ctx, dst); + break; + /* dst = dst & imm (64-bit) */ + case BPF_ALU64 | BPF_AND | BPF_K: + emit_alu_i64(ctx, dst, imm, BPF_OP(code)); + break; + /* dst = dst | imm (64-bit) */ + /* dst = dst ^ imm (64-bit) */ + /* dst = dst + imm (64-bit) */ + /* dst = dst - imm (64-bit) */ + case BPF_ALU64 | BPF_OR | BPF_K: + case BPF_ALU64 | BPF_XOR | BPF_K: + case BPF_ALU64 | BPF_ADD | BPF_K: + case BPF_ALU64 | BPF_SUB | BPF_K: + if (imm) + emit_alu_i64(ctx, dst, imm, BPF_OP(code)); + break; + /* dst = dst << imm (64-bit) */ + /* dst = dst >> imm (64-bit) */ + /* dst = dst >> imm (64-bit, arithmetic) */ + case BPF_ALU64 | BPF_LSH | BPF_K: + case BPF_ALU64 | BPF_RSH | BPF_K: + case BPF_ALU64 | BPF_ARSH | BPF_K: + if (imm) + emit_shift_i64(ctx, dst, imm, BPF_OP(code)); + break; + /* dst = dst * imm (64-bit) */ + case BPF_ALU64 | BPF_MUL | BPF_K: + emit_mul_i64(ctx, dst, imm); + break; + /* dst = dst / imm (64-bit) */ + /* dst = dst % imm (64-bit) */ + case BPF_ALU64 | BPF_DIV | BPF_K: + case BPF_ALU64 | BPF_MOD | BPF_K: + /* + * Sign-extend the immediate value into a temporary register, + * and then do the operation on this register. + */ + emit_mov_se_i64(ctx, tmp, imm); + emit_divmod_r64(ctx, dst, tmp, BPF_OP(code)); + break; + /* dst = dst & src (64-bit) */ + /* dst = dst | src (64-bit) */ + /* dst = dst ^ src (64-bit) */ + /* dst = dst + src (64-bit) */ + /* dst = dst - src (64-bit) */ + case BPF_ALU64 | BPF_AND | BPF_X: + case BPF_ALU64 | BPF_OR | BPF_X: + case BPF_ALU64 | BPF_XOR | BPF_X: + case BPF_ALU64 | BPF_ADD | BPF_X: + case BPF_ALU64 | BPF_SUB | BPF_X: + emit_alu_r64(ctx, dst, src, BPF_OP(code)); + break; + /* dst = dst << src (64-bit) */ + /* dst = dst >> src (64-bit) */ + /* dst = dst >> src (64-bit, arithmetic) */ + case BPF_ALU64 | BPF_LSH | BPF_X: + case BPF_ALU64 | BPF_RSH | BPF_X: + case BPF_ALU64 | BPF_ARSH | BPF_X: + emit_shift_r64(ctx, dst, lo(src), BPF_OP(code)); + break; + /* dst = dst * src (64-bit) */ + case BPF_ALU64 | BPF_MUL | BPF_X: + emit_mul_r64(ctx, dst, src); + break; + /* dst = dst / src (64-bit) */ + /* dst = dst % src (64-bit) */ + case BPF_ALU64 | BPF_DIV | BPF_X: + case BPF_ALU64 | BPF_MOD | BPF_X: + emit_divmod_r64(ctx, dst, src, BPF_OP(code)); + break; + /* dst = htole(dst) */ + /* dst = htobe(dst) */ + case BPF_ALU | BPF_END | BPF_FROM_LE: + case BPF_ALU | BPF_END | BPF_FROM_BE: + if (BPF_SRC(code) == +#ifdef __BIG_ENDIAN + BPF_FROM_LE +#else + BPF_FROM_BE +#endif + ) + emit_bswap_r64(ctx, dst, imm); + else + emit_trunc_r64(ctx, dst, imm); + break; + /* dst = imm64 */ + case BPF_LD | BPF_IMM | BPF_DW: + emit_mov_i(ctx, lo(dst), imm); + emit_mov_i(ctx, hi(dst), insn[1].imm); + return 1; + /* LDX: dst = *(size *)(src + off) */ + case BPF_LDX | BPF_MEM | BPF_W: + case BPF_LDX | BPF_MEM | BPF_H: + case BPF_LDX | BPF_MEM | BPF_B: + case BPF_LDX | BPF_MEM | BPF_DW: + emit_ldx(ctx, dst, lo(src), off, BPF_SIZE(code)); + break; + /* ST: *(size *)(dst + off) = imm */ + case BPF_ST | BPF_MEM | BPF_W: + case BPF_ST | BPF_MEM | BPF_H: + case BPF_ST | BPF_MEM | BPF_B: + case BPF_ST | BPF_MEM | BPF_DW: + switch (BPF_SIZE(code)) { + case BPF_DW: + /* Sign-extend immediate value into temporary reg */ + emit_mov_se_i64(ctx, tmp, imm); + break; + case BPF_W: + case BPF_H: + case BPF_B: + emit_mov_i(ctx, lo(tmp), imm); + break; + } + emit_stx(ctx, lo(dst), tmp, off, BPF_SIZE(code)); + break; + /* STX: *(size *)(dst + off) = src */ + case BPF_STX | BPF_MEM | BPF_W: + case BPF_STX | BPF_MEM | BPF_H: + case BPF_STX | BPF_MEM | BPF_B: + case BPF_STX | BPF_MEM | BPF_DW: + emit_stx(ctx, lo(dst), src, off, BPF_SIZE(code)); + break; + /* Speculation barrier */ + case BPF_ST | BPF_NOSPEC: + break; + /* Atomics */ + case BPF_STX | BPF_ATOMIC | BPF_W: + switch (imm) { + case BPF_ADD: + case BPF_ADD | BPF_FETCH: + case BPF_AND: + case BPF_AND | BPF_FETCH: + case BPF_OR: + case BPF_OR | BPF_FETCH: + case BPF_XOR: + case BPF_XOR | BPF_FETCH: + case BPF_XCHG: + if (cpu_has_llsc) + emit_atomic_r(ctx, lo(dst), lo(src), off, imm); + else /* Non-ll/sc fallback */ + emit_atomic_r32(ctx, lo(dst), lo(src), + off, imm); + if (imm & BPF_FETCH) + emit_zext_ver(ctx, src); + break; + case BPF_CMPXCHG: + if (cpu_has_llsc) + emit_cmpxchg_r(ctx, lo(dst), lo(src), + lo(res), off); + else /* Non-ll/sc fallback */ + emit_cmpxchg_r32(ctx, lo(dst), lo(src), off); + /* Result zero-extension inserted by verifier */ + break; + default: + goto notyet; + } + break; + /* Atomics (64-bit) */ + case BPF_STX | BPF_ATOMIC | BPF_DW: + switch (imm) { + case BPF_ADD: + case BPF_ADD | BPF_FETCH: + case BPF_AND: + case BPF_AND | BPF_FETCH: + case BPF_OR: + case BPF_OR | BPF_FETCH: + case BPF_XOR: + case BPF_XOR | BPF_FETCH: + case BPF_XCHG: + emit_atomic_r64(ctx, lo(dst), src, off, imm); + break; + case BPF_CMPXCHG: + emit_cmpxchg_r64(ctx, lo(dst), src, off); + break; + default: + goto notyet; + } + break; + /* PC += off if dst == src */ + /* PC += off if dst != src */ + /* PC += off if dst & src */ + /* PC += off if dst > src */ + /* PC += off if dst >= src */ + /* PC += off if dst < src */ + /* PC += off if dst <= src */ + /* PC += off if dst > src (signed) */ + /* PC += off if dst >= src (signed) */ + /* PC += off if dst < src (signed) */ + /* PC += off if dst <= src (signed) */ + case BPF_JMP32 | BPF_JEQ | BPF_X: + case BPF_JMP32 | BPF_JNE | BPF_X: + case BPF_JMP32 | BPF_JSET | BPF_X: + case BPF_JMP32 | BPF_JGT | BPF_X: + case BPF_JMP32 | BPF_JGE | BPF_X: + case BPF_JMP32 | BPF_JLT | BPF_X: + case BPF_JMP32 | BPF_JLE | BPF_X: + case BPF_JMP32 | BPF_JSGT | BPF_X: + case BPF_JMP32 | BPF_JSGE | BPF_X: + case BPF_JMP32 | BPF_JSLT | BPF_X: + case BPF_JMP32 | BPF_JSLE | BPF_X: + if (off == 0) + break; + setup_jmp_r(ctx, dst == src, BPF_OP(code), off, &jmp, &rel); + emit_jmp_r(ctx, lo(dst), lo(src), rel, jmp); + if (finish_jmp(ctx, jmp, off) < 0) + goto toofar; + break; + /* PC += off if dst == imm */ + /* PC += off if dst != imm */ + /* PC += off if dst & imm */ + /* PC += off if dst > imm */ + /* PC += off if dst >= imm */ + /* PC += off if dst < imm */ + /* PC += off if dst <= imm */ + /* PC += off if dst > imm (signed) */ + /* PC += off if dst >= imm (signed) */ + /* PC += off if dst < imm (signed) */ + /* PC += off if dst <= imm (signed) */ + case BPF_JMP32 | BPF_JEQ | BPF_K: + case BPF_JMP32 | BPF_JNE | BPF_K: + case BPF_JMP32 | BPF_JSET | BPF_K: + case BPF_JMP32 | BPF_JGT | BPF_K: + case BPF_JMP32 | BPF_JGE | BPF_K: + case BPF_JMP32 | BPF_JLT | BPF_K: + case BPF_JMP32 | BPF_JLE | BPF_K: + case BPF_JMP32 | BPF_JSGT | BPF_K: + case BPF_JMP32 | BPF_JSGE | BPF_K: + case BPF_JMP32 | BPF_JSLT | BPF_K: + case BPF_JMP32 | BPF_JSLE | BPF_K: + if (off == 0) + break; + setup_jmp_i(ctx, imm, 32, BPF_OP(code), off, &jmp, &rel); + if (valid_jmp_i(jmp, imm)) { + emit_jmp_i(ctx, lo(dst), imm, rel, jmp); + } else { + /* Move large immediate to register */ + emit_mov_i(ctx, MIPS_R_T6, imm); + emit_jmp_r(ctx, lo(dst), MIPS_R_T6, rel, jmp); + } + if (finish_jmp(ctx, jmp, off) < 0) + goto toofar; + break; + /* PC += off if dst == src */ + /* PC += off if dst != src */ + /* PC += off if dst & src */ + /* PC += off if dst > src */ + /* PC += off if dst >= src */ + /* PC += off if dst < src */ + /* PC += off if dst <= src */ + /* PC += off if dst > src (signed) */ + /* PC += off if dst >= src (signed) */ + /* PC += off if dst < src (signed) */ + /* PC += off if dst <= src (signed) */ + case BPF_JMP | BPF_JEQ | BPF_X: + case BPF_JMP | BPF_JNE | BPF_X: + case BPF_JMP | BPF_JSET | BPF_X: + case BPF_JMP | BPF_JGT | BPF_X: + case BPF_JMP | BPF_JGE | BPF_X: + case BPF_JMP | BPF_JLT | BPF_X: + case BPF_JMP | BPF_JLE | BPF_X: + case BPF_JMP | BPF_JSGT | BPF_X: + case BPF_JMP | BPF_JSGE | BPF_X: + case BPF_JMP | BPF_JSLT | BPF_X: + case BPF_JMP | BPF_JSLE | BPF_X: + if (off == 0) + break; + setup_jmp_r(ctx, dst == src, BPF_OP(code), off, &jmp, &rel); + emit_jmp_r64(ctx, dst, src, rel, jmp); + if (finish_jmp(ctx, jmp, off) < 0) + goto toofar; + break; + /* PC += off if dst == imm */ + /* PC += off if dst != imm */ + /* PC += off if dst & imm */ + /* PC += off if dst > imm */ + /* PC += off if dst >= imm */ + /* PC += off if dst < imm */ + /* PC += off if dst <= imm */ + /* PC += off if dst > imm (signed) */ + /* PC += off if dst >= imm (signed) */ + /* PC += off if dst < imm (signed) */ + /* PC += off if dst <= imm (signed) */ + case BPF_JMP | BPF_JEQ | BPF_K: + case BPF_JMP | BPF_JNE | BPF_K: + case BPF_JMP | BPF_JSET | BPF_K: + case BPF_JMP | BPF_JGT | BPF_K: + case BPF_JMP | BPF_JGE | BPF_K: + case BPF_JMP | BPF_JLT | BPF_K: + case BPF_JMP | BPF_JLE | BPF_K: + case BPF_JMP | BPF_JSGT | BPF_K: + case BPF_JMP | BPF_JSGE | BPF_K: + case BPF_JMP | BPF_JSLT | BPF_K: + case BPF_JMP | BPF_JSLE | BPF_K: + if (off == 0) + break; + setup_jmp_i(ctx, imm, 64, BPF_OP(code), off, &jmp, &rel); + emit_jmp_i64(ctx, dst, imm, rel, jmp); + if (finish_jmp(ctx, jmp, off) < 0) + goto toofar; + break; + /* PC += off */ + case BPF_JMP | BPF_JA: + if (off == 0) + break; + if (emit_ja(ctx, off) < 0) + goto toofar; + break; + /* Tail call */ + case BPF_JMP | BPF_TAIL_CALL: + if (emit_tail_call(ctx) < 0) + goto invalid; + break; + /* Function call */ + case BPF_JMP | BPF_CALL: + if (emit_call(ctx, insn) < 0) + goto invalid; + break; + /* Function return */ + case BPF_JMP | BPF_EXIT: + /* + * Optimization: when last instruction is EXIT + * simply continue to epilogue. + */ + if (ctx->bpf_index == ctx->program->len - 1) + break; + if (emit_exit(ctx) < 0) + goto toofar; + break; + + default: +invalid: + pr_err_once("unknown opcode %02x\n", code); + return -EINVAL; +notyet: + pr_info_once("*** NOT YET: opcode %02x ***\n", code); + return -EFAULT; +toofar: + pr_info_once("*** TOO FAR: jump at %u opcode %02x ***\n", + ctx->bpf_index, code); + return -E2BIG; + } + return 0; +} diff --git a/arch/mips/net/bpf_jit_comp64.c b/arch/mips/net/bpf_jit_comp64.c new file mode 100644 index 000000000000..815ade724227 --- /dev/null +++ b/arch/mips/net/bpf_jit_comp64.c @@ -0,0 +1,1060 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Just-In-Time compiler for eBPF bytecode on MIPS. + * Implementation of JIT functions for 64-bit CPUs. + * + * Copyright (c) 2021 Anyfi Networks AB. + * Author: Johan Almbladh <johan.almbladh@gmail.com> + * + * Based on code and ideas from + * Copyright (c) 2017 Cavium, Inc. + * Copyright (c) 2017 Shubham Bansal <illusionist.neo@gmail.com> + * Copyright (c) 2011 Mircea Gherzan <mgherzan@gmail.com> + */ + +#include <linux/errno.h> +#include <linux/filter.h> +#include <linux/bpf.h> +#include <asm/cpu-features.h> +#include <asm/isa-rev.h> +#include <asm/uasm.h> + +#include "bpf_jit_comp.h" + +/* MIPS t0-t3 are not available in the n64 ABI */ +#undef MIPS_R_T0 +#undef MIPS_R_T1 +#undef MIPS_R_T2 +#undef MIPS_R_T3 + +/* Stack is 16-byte aligned in n64 ABI */ +#define MIPS_STACK_ALIGNMENT 16 + +/* Extra 64-bit eBPF registers used by JIT */ +#define JIT_REG_TC (MAX_BPF_JIT_REG + 0) +#define JIT_REG_ZX (MAX_BPF_JIT_REG + 1) + +/* Number of prologue bytes to skip when doing a tail call */ +#define JIT_TCALL_SKIP 4 + +/* Callee-saved CPU registers that the JIT must preserve */ +#define JIT_CALLEE_REGS \ + (BIT(MIPS_R_S0) | \ + BIT(MIPS_R_S1) | \ + BIT(MIPS_R_S2) | \ + BIT(MIPS_R_S3) | \ + BIT(MIPS_R_S4) | \ + BIT(MIPS_R_S5) | \ + BIT(MIPS_R_S6) | \ + BIT(MIPS_R_S7) | \ + BIT(MIPS_R_GP) | \ + BIT(MIPS_R_FP) | \ + BIT(MIPS_R_RA)) + +/* Caller-saved CPU registers available for JIT use */ +#define JIT_CALLER_REGS \ + (BIT(MIPS_R_A5) | \ + BIT(MIPS_R_A6) | \ + BIT(MIPS_R_A7)) +/* + * Mapping of 64-bit eBPF registers to 64-bit native MIPS registers. + * MIPS registers t4 - t7 may be used by the JIT as temporary registers. + * MIPS registers t8 - t9 are reserved for single-register common functions. + */ +static const u8 bpf2mips64[] = { + /* Return value from in-kernel function, and exit value from eBPF */ + [BPF_REG_0] = MIPS_R_V0, + /* Arguments from eBPF program to in-kernel function */ + [BPF_REG_1] = MIPS_R_A0, + [BPF_REG_2] = MIPS_R_A1, + [BPF_REG_3] = MIPS_R_A2, + [BPF_REG_4] = MIPS_R_A3, + [BPF_REG_5] = MIPS_R_A4, + /* Callee-saved registers that in-kernel function will preserve */ + [BPF_REG_6] = MIPS_R_S0, + [BPF_REG_7] = MIPS_R_S1, + [BPF_REG_8] = MIPS_R_S2, + [BPF_REG_9] = MIPS_R_S3, + /* Read-only frame pointer to access the eBPF stack */ + [BPF_REG_FP] = MIPS_R_FP, + /* Temporary register for blinding constants */ + [BPF_REG_AX] = MIPS_R_AT, + /* Tail call count register, caller-saved */ + [JIT_REG_TC] = MIPS_R_A5, + /* Constant for register zero-extension */ + [JIT_REG_ZX] = MIPS_R_V1, +}; + +/* + * MIPS 32-bit operations on 64-bit registers generate a sign-extended + * result. However, the eBPF ISA mandates zero-extension, so we rely on the + * verifier to add that for us (emit_zext_ver). In addition, ALU arithmetic + * operations, right shift and byte swap require properly sign-extended + * operands or the result is unpredictable. We emit explicit sign-extensions + * in those cases. + */ + +/* Sign extension */ +static void emit_sext(struct jit_context *ctx, u8 dst, u8 src) +{ + emit(ctx, sll, dst, src, 0); + clobber_reg(ctx, dst); +} + +/* Zero extension */ +static void emit_zext(struct jit_context *ctx, u8 dst) +{ + if (cpu_has_mips64r2 || cpu_has_mips64r6) { + emit(ctx, dinsu, dst, MIPS_R_ZERO, 32, 32); + } else { + emit(ctx, and, dst, dst, bpf2mips64[JIT_REG_ZX]); + access_reg(ctx, JIT_REG_ZX); /* We need the ZX register */ + } + clobber_reg(ctx, dst); +} + +/* Zero extension, if verifier does not do it for us */ +static void emit_zext_ver(struct jit_context *ctx, u8 dst) +{ + if (!ctx->program->aux->verifier_zext) + emit_zext(ctx, dst); +} + +/* dst = imm (64-bit) */ +static void emit_mov_i64(struct jit_context *ctx, u8 dst, u64 imm64) +{ + if (imm64 >= 0xffffffffffff8000ULL || imm64 < 0x8000ULL) { + emit(ctx, daddiu, dst, MIPS_R_ZERO, (s16)imm64); + } else if (imm64 >= 0xffffffff80000000ULL || + (imm64 < 0x80000000 && imm64 > 0xffff)) { + emit(ctx, lui, dst, (s16)(imm64 >> 16)); + emit(ctx, ori, dst, dst, (u16)imm64 & 0xffff); + } else { + u8 acc = MIPS_R_ZERO; + int shift = 0; + int k; + + for (k = 0; k < 4; k++) { + u16 half = imm64 >> (48 - 16 * k); + + if (acc == dst) + shift += 16; + + if (half) { + if (shift) + emit(ctx, dsll_safe, dst, dst, shift); + emit(ctx, ori, dst, acc, half); + acc = dst; + shift = 0; + } + } + if (shift) + emit(ctx, dsll_safe, dst, dst, shift); + } + clobber_reg(ctx, dst); +} + +/* ALU immediate operation (64-bit) */ +static void emit_alu_i64(struct jit_context *ctx, u8 dst, s32 imm, u8 op) +{ + switch (BPF_OP(op)) { + /* dst = dst | imm */ + case BPF_OR: + emit(ctx, ori, dst, dst, (u16)imm); + break; + /* dst = dst ^ imm */ + case BPF_XOR: + emit(ctx, xori, dst, dst, (u16)imm); + break; + /* dst = -dst */ + case BPF_NEG: + emit(ctx, dsubu, dst, MIPS_R_ZERO, dst); + break; + /* dst = dst << imm */ + case BPF_LSH: + emit(ctx, dsll_safe, dst, dst, imm); + break; + /* dst = dst >> imm */ + case BPF_RSH: + emit(ctx, dsrl_safe, dst, dst, imm); + break; + /* dst = dst >> imm (arithmetic) */ + case BPF_ARSH: + emit(ctx, dsra_safe, dst, dst, imm); + break; + /* dst = dst + imm */ + case BPF_ADD: + emit(ctx, daddiu, dst, dst, imm); + break; + /* dst = dst - imm */ + case BPF_SUB: + emit(ctx, daddiu, dst, dst, -imm); + break; + default: + /* Width-generic operations */ + emit_alu_i(ctx, dst, imm, op); + } + clobber_reg(ctx, dst); +} + +/* ALU register operation (64-bit) */ +static void emit_alu_r64(struct jit_context *ctx, u8 dst, u8 src, u8 op) +{ + switch (BPF_OP(op)) { + /* dst = dst << src */ + case BPF_LSH: + emit(ctx, dsllv, dst, dst, src); + break; + /* dst = dst >> src */ + case BPF_RSH: + emit(ctx, dsrlv, dst, dst, src); + break; + /* dst = dst >> src (arithmetic) */ + case BPF_ARSH: + emit(ctx, dsrav, dst, dst, src); + break; + /* dst = dst + src */ + case BPF_ADD: + emit(ctx, daddu, dst, dst, src); + break; + /* dst = dst - src */ + case BPF_SUB: + emit(ctx, dsubu, dst, dst, src); + break; + /* dst = dst * src */ + case BPF_MUL: + if (cpu_has_mips64r6) { + emit(ctx, dmulu, dst, dst, src); + } else { + emit(ctx, dmultu, dst, src); + emit(ctx, mflo, dst); + } + break; + /* dst = dst / src */ + case BPF_DIV: + if (cpu_has_mips64r6) { + emit(ctx, ddivu_r6, dst, dst, src); + } else { + emit(ctx, ddivu, dst, src); + emit(ctx, mflo, dst); + } + break; + /* dst = dst % src */ + case BPF_MOD: + if (cpu_has_mips64r6) { + emit(ctx, dmodu, dst, dst, src); + } else { + emit(ctx, ddivu, dst, src); + emit(ctx, mfhi, dst); + } + break; + default: + /* Width-generic operations */ + emit_alu_r(ctx, dst, src, op); + } + clobber_reg(ctx, dst); +} + +/* Swap sub words in a register double word */ +static void emit_swap_r64(struct jit_context *ctx, u8 dst, u8 mask, u32 bits) +{ + u8 tmp = MIPS_R_T9; + + emit(ctx, and, tmp, dst, mask); /* tmp = dst & mask */ + emit(ctx, dsll, tmp, tmp, bits); /* tmp = tmp << bits */ + emit(ctx, dsrl, dst, dst, bits); /* dst = dst >> bits */ + emit(ctx, and, dst, dst, mask); /* dst = dst & mask */ + emit(ctx, or, dst, dst, tmp); /* dst = dst | tmp */ +} + +/* Swap bytes and truncate a register double word, word or half word */ +static void emit_bswap_r64(struct jit_context *ctx, u8 dst, u32 width) +{ + switch (width) { + /* Swap bytes in a double word */ + case 64: + if (cpu_has_mips64r2 || cpu_has_mips64r6) { + emit(ctx, dsbh, dst, dst); + emit(ctx, dshd, dst, dst); + } else { + u8 t1 = MIPS_R_T6; + u8 t2 = MIPS_R_T7; + + emit(ctx, dsll32, t2, dst, 0); /* t2 = dst << 32 */ + emit(ctx, dsrl32, dst, dst, 0); /* dst = dst >> 32 */ + emit(ctx, or, dst, dst, t2); /* dst = dst | t2 */ + + emit(ctx, ori, t2, MIPS_R_ZERO, 0xffff); + emit(ctx, dsll32, t1, t2, 0); /* t1 = t2 << 32 */ + emit(ctx, or, t1, t1, t2); /* t1 = t1 | t2 */ + emit_swap_r64(ctx, dst, t1, 16);/* dst = swap16(dst) */ + + emit(ctx, lui, t2, 0xff); /* t2 = 0x00ff0000 */ + emit(ctx, ori, t2, t2, 0xff); /* t2 = t2 | 0x00ff */ + emit(ctx, dsll32, t1, t2, 0); /* t1 = t2 << 32 */ + emit(ctx, or, t1, t1, t2); /* t1 = t1 | t2 */ + emit_swap_r64(ctx, dst, t1, 8); /* dst = swap8(dst) */ + } + break; + /* Swap bytes in a half word */ + /* Swap bytes in a word */ + case 32: + case 16: + emit_sext(ctx, dst, dst); + emit_bswap_r(ctx, dst, width); + if (cpu_has_mips64r2 || cpu_has_mips64r6) + emit_zext(ctx, dst); + break; + } + clobber_reg(ctx, dst); +} + +/* Truncate a register double word, word or half word */ +static void emit_trunc_r64(struct jit_context *ctx, u8 dst, u32 width) +{ + switch (width) { + case 64: + break; + /* Zero-extend a word */ + case 32: + emit_zext(ctx, dst); + break; + /* Zero-extend a half word */ + case 16: + emit(ctx, andi, dst, dst, 0xffff); + break; + } + clobber_reg(ctx, dst); +} + +/* Load operation: dst = *(size*)(src + off) */ +static void emit_ldx(struct jit_context *ctx, u8 dst, u8 src, s16 off, u8 size) +{ + switch (size) { + /* Load a byte */ + case BPF_B: + emit(ctx, lbu, dst, off, src); + break; + /* Load a half word */ + case BPF_H: + emit(ctx, lhu, dst, off, src); + break; + /* Load a word */ + case BPF_W: + emit(ctx, lwu, dst, off, src); + break; + /* Load a double word */ + case BPF_DW: + emit(ctx, ld, dst, off, src); + break; + } + clobber_reg(ctx, dst); +} + +/* Store operation: *(size *)(dst + off) = src */ +static void emit_stx(struct jit_context *ctx, u8 dst, u8 src, s16 off, u8 size) +{ + switch (size) { + /* Store a byte */ + case BPF_B: + emit(ctx, sb, src, off, dst); + break; + /* Store a half word */ + case BPF_H: + emit(ctx, sh, src, off, dst); + break; + /* Store a word */ + case BPF_W: + emit(ctx, sw, src, off, dst); + break; + /* Store a double word */ + case BPF_DW: + emit(ctx, sd, src, off, dst); + break; + } +} + +/* Atomic read-modify-write */ +static void emit_atomic_r64(struct jit_context *ctx, + u8 dst, u8 src, s16 off, u8 code) +{ + u8 t1 = MIPS_R_T6; + u8 t2 = MIPS_R_T7; + + LLSC_sync(ctx); + emit(ctx, lld, t1, off, dst); + switch (code) { + case BPF_ADD: + case BPF_ADD | BPF_FETCH: + emit(ctx, daddu, t2, t1, src); + break; + case BPF_AND: + case BPF_AND | BPF_FETCH: + emit(ctx, and, t2, t1, src); + break; + case BPF_OR: + case BPF_OR | BPF_FETCH: + emit(ctx, or, t2, t1, src); + break; + case BPF_XOR: + case BPF_XOR | BPF_FETCH: + emit(ctx, xor, t2, t1, src); + break; + case BPF_XCHG: + emit(ctx, move, t2, src); + break; + } + emit(ctx, scd, t2, off, dst); + emit(ctx, LLSC_beqz, t2, -16 - LLSC_offset); + emit(ctx, nop); /* Delay slot */ + + if (code & BPF_FETCH) { + emit(ctx, move, src, t1); + clobber_reg(ctx, src); + } +} + +/* Atomic compare-and-exchange */ +static void emit_cmpxchg_r64(struct jit_context *ctx, u8 dst, u8 src, s16 off) +{ + u8 r0 = bpf2mips64[BPF_REG_0]; + u8 t1 = MIPS_R_T6; + u8 t2 = MIPS_R_T7; + + LLSC_sync(ctx); + emit(ctx, lld, t1, off, dst); + emit(ctx, bne, t1, r0, 12); + emit(ctx, move, t2, src); /* Delay slot */ + emit(ctx, scd, t2, off, dst); + emit(ctx, LLSC_beqz, t2, -20 - LLSC_offset); + emit(ctx, move, r0, t1); /* Delay slot */ + + clobber_reg(ctx, r0); +} + +/* Function call */ +static int emit_call(struct jit_context *ctx, const struct bpf_insn *insn) +{ + u8 zx = bpf2mips64[JIT_REG_ZX]; + u8 tmp = MIPS_R_T6; + bool fixed; + u64 addr; + + /* Decode the call address */ + if (bpf_jit_get_func_addr(ctx->program, insn, false, + &addr, &fixed) < 0) + return -1; + if (!fixed) + return -1; + + /* Push caller-saved registers on stack */ + push_regs(ctx, ctx->clobbered & JIT_CALLER_REGS, 0, 0); + + /* Emit function call */ + emit_mov_i64(ctx, tmp, addr & JALR_MASK); + emit(ctx, jalr, MIPS_R_RA, tmp); + emit(ctx, nop); /* Delay slot */ + + /* Restore caller-saved registers */ + pop_regs(ctx, ctx->clobbered & JIT_CALLER_REGS, 0, 0); + + /* Re-initialize the JIT zero-extension register if accessed */ + if (ctx->accessed & BIT(JIT_REG_ZX)) { + emit(ctx, daddiu, zx, MIPS_R_ZERO, -1); + emit(ctx, dsrl32, zx, zx, 0); + } + + clobber_reg(ctx, MIPS_R_RA); + clobber_reg(ctx, MIPS_R_V0); + clobber_reg(ctx, MIPS_R_V1); + return 0; +} + +/* Function tail call */ +static int emit_tail_call(struct jit_context *ctx) +{ + u8 ary = bpf2mips64[BPF_REG_2]; + u8 ind = bpf2mips64[BPF_REG_3]; + u8 tcc = bpf2mips64[JIT_REG_TC]; + u8 tmp = MIPS_R_T6; + int off; + + /* + * Tail call: + * eBPF R1 - function argument (context ptr), passed in a0-a1 + * eBPF R2 - ptr to object with array of function entry points + * eBPF R3 - array index of function to be called + */ + + /* if (ind >= ary->map.max_entries) goto out */ + off = offsetof(struct bpf_array, map.max_entries); + if (off > 0x7fff) + return -1; + emit(ctx, lwu, tmp, off, ary); /* tmp = ary->map.max_entrs*/ + emit(ctx, sltu, tmp, ind, tmp); /* tmp = ind < t1 */ + emit(ctx, beqz, tmp, get_offset(ctx, 1)); /* PC += off(1) if tmp == 0*/ + + /* if (--TCC < 0) goto out */ + emit(ctx, daddiu, tcc, tcc, -1); /* tcc-- (delay slot) */ + emit(ctx, bltz, tcc, get_offset(ctx, 1)); /* PC += off(1) if tcc < 0 */ + /* (next insn delay slot) */ + /* prog = ary->ptrs[ind] */ + off = offsetof(struct bpf_array, ptrs); + if (off > 0x7fff) + return -1; + emit(ctx, dsll, tmp, ind, 3); /* tmp = ind << 3 */ + emit(ctx, daddu, tmp, tmp, ary); /* tmp += ary */ + emit(ctx, ld, tmp, off, tmp); /* tmp = *(tmp + off) */ + + /* if (prog == 0) goto out */ + emit(ctx, beqz, tmp, get_offset(ctx, 1)); /* PC += off(1) if tmp == 0*/ + emit(ctx, nop); /* Delay slot */ + + /* func = prog->bpf_func + 8 (prologue skip offset) */ + off = offsetof(struct bpf_prog, bpf_func); + if (off > 0x7fff) + return -1; + emit(ctx, ld, tmp, off, tmp); /* tmp = *(tmp + off) */ + emit(ctx, daddiu, tmp, tmp, JIT_TCALL_SKIP); /* tmp += skip (4) */ + + /* goto func */ + build_epilogue(ctx, tmp); + access_reg(ctx, JIT_REG_TC); + return 0; +} + +/* + * Stack frame layout for a JITed program (stack grows down). + * + * Higher address : Previous stack frame : + * +===========================+ <--- MIPS sp before call + * | Callee-saved registers, | + * | including RA and FP | + * +---------------------------+ <--- eBPF FP (MIPS fp) + * | Local eBPF variables | + * | allocated by program | + * +---------------------------+ + * | Reserved for caller-saved | + * | registers | + * Lower address +===========================+ <--- MIPS sp + */ + +/* Build program prologue to set up the stack and registers */ +void build_prologue(struct jit_context *ctx) +{ + u8 fp = bpf2mips64[BPF_REG_FP]; + u8 tc = bpf2mips64[JIT_REG_TC]; + u8 zx = bpf2mips64[JIT_REG_ZX]; + int stack, saved, locals, reserved; + + /* + * The first instruction initializes the tail call count register. + * On a tail call, the calling function jumps into the prologue + * after this instruction. + */ + emit(ctx, addiu, tc, MIPS_R_ZERO, min(MAX_TAIL_CALL_CNT + 1, 0xffff)); + + /* === Entry-point for tail calls === */ + + /* + * If the eBPF frame pointer and tail call count registers were + * accessed they must be preserved. Mark them as clobbered here + * to save and restore them on the stack as needed. + */ + if (ctx->accessed & BIT(BPF_REG_FP)) + clobber_reg(ctx, fp); + if (ctx->accessed & BIT(JIT_REG_TC)) + clobber_reg(ctx, tc); + if (ctx->accessed & BIT(JIT_REG_ZX)) + clobber_reg(ctx, zx); + + /* Compute the stack space needed for callee-saved registers */ + saved = hweight32(ctx->clobbered & JIT_CALLEE_REGS) * sizeof(u64); + saved = ALIGN(saved, MIPS_STACK_ALIGNMENT); + + /* Stack space used by eBPF program local data */ + locals = ALIGN(ctx->program->aux->stack_depth, MIPS_STACK_ALIGNMENT); + + /* + * If we are emitting function calls, reserve extra stack space for + * caller-saved registers needed by the JIT. The required space is + * computed automatically during resource usage discovery (pass 1). + */ + reserved = ctx->stack_used; + + /* Allocate the stack frame */ + stack = ALIGN(saved + locals + reserved, MIPS_STACK_ALIGNMENT); + if (stack) + emit(ctx, daddiu, MIPS_R_SP, MIPS_R_SP, -stack); + + /* Store callee-saved registers on stack */ + push_regs(ctx, ctx->clobbered & JIT_CALLEE_REGS, 0, stack - saved); + + /* Initialize the eBPF frame pointer if accessed */ + if (ctx->accessed & BIT(BPF_REG_FP)) + emit(ctx, daddiu, fp, MIPS_R_SP, stack - saved); + + /* Initialize the ePF JIT zero-extension register if accessed */ + if (ctx->accessed & BIT(JIT_REG_ZX)) { + emit(ctx, daddiu, zx, MIPS_R_ZERO, -1); + emit(ctx, dsrl32, zx, zx, 0); + } + + ctx->saved_size = saved; + ctx->stack_size = stack; +} + +/* Build the program epilogue to restore the stack and registers */ +void build_epilogue(struct jit_context *ctx, int dest_reg) +{ + /* Restore callee-saved registers from stack */ + pop_regs(ctx, ctx->clobbered & JIT_CALLEE_REGS, 0, + ctx->stack_size - ctx->saved_size); + + /* Release the stack frame */ + if (ctx->stack_size) + emit(ctx, daddiu, MIPS_R_SP, MIPS_R_SP, ctx->stack_size); + + /* Jump to return address and sign-extend the 32-bit return value */ + emit(ctx, jr, dest_reg); + emit(ctx, sll, MIPS_R_V0, MIPS_R_V0, 0); /* Delay slot */ +} + +/* Build one eBPF instruction */ +int build_insn(const struct bpf_insn *insn, struct jit_context *ctx) +{ + u8 dst = bpf2mips64[insn->dst_reg]; + u8 src = bpf2mips64[insn->src_reg]; + u8 res = bpf2mips64[BPF_REG_0]; + u8 code = insn->code; + s16 off = insn->off; + s32 imm = insn->imm; + s32 val, rel; + u8 alu, jmp; + + switch (code) { + /* ALU operations */ + /* dst = imm */ + case BPF_ALU | BPF_MOV | BPF_K: + emit_mov_i(ctx, dst, imm); + emit_zext_ver(ctx, dst); + break; + /* dst = src */ + case BPF_ALU | BPF_MOV | BPF_X: + if (imm == 1) { + /* Special mov32 for zext */ + emit_zext(ctx, dst); + } else { + emit_mov_r(ctx, dst, src); + emit_zext_ver(ctx, dst); + } + break; + /* dst = -dst */ + case BPF_ALU | BPF_NEG: + emit_sext(ctx, dst, dst); + emit_alu_i(ctx, dst, 0, BPF_NEG); + emit_zext_ver(ctx, dst); + break; + /* dst = dst & imm */ + /* dst = dst | imm */ + /* dst = dst ^ imm */ + /* dst = dst << imm */ + case BPF_ALU | BPF_OR | BPF_K: + case BPF_ALU | BPF_AND | BPF_K: + case BPF_ALU | BPF_XOR | BPF_K: + case BPF_ALU | BPF_LSH | BPF_K: + if (!valid_alu_i(BPF_OP(code), imm)) { + emit_mov_i(ctx, MIPS_R_T4, imm); + emit_alu_r(ctx, dst, MIPS_R_T4, BPF_OP(code)); + } else if (rewrite_alu_i(BPF_OP(code), imm, &alu, &val)) { + emit_alu_i(ctx, dst, val, alu); + } + emit_zext_ver(ctx, dst); + break; + /* dst = dst >> imm */ + /* dst = dst >> imm (arithmetic) */ + /* dst = dst + imm */ + /* dst = dst - imm */ + /* dst = dst * imm */ + /* dst = dst / imm */ + /* dst = dst % imm */ + case BPF_ALU | BPF_RSH | BPF_K: + case BPF_ALU | BPF_ARSH | BPF_K: + case BPF_ALU | BPF_ADD | BPF_K: + case BPF_ALU | BPF_SUB | BPF_K: + case BPF_ALU | BPF_MUL | BPF_K: + case BPF_ALU | BPF_DIV | BPF_K: + case BPF_ALU | BPF_MOD | BPF_K: + if (!valid_alu_i(BPF_OP(code), imm)) { + emit_sext(ctx, dst, dst); + emit_mov_i(ctx, MIPS_R_T4, imm); + emit_alu_r(ctx, dst, MIPS_R_T4, BPF_OP(code)); + } else if (rewrite_alu_i(BPF_OP(code), imm, &alu, &val)) { + emit_sext(ctx, dst, dst); + emit_alu_i(ctx, dst, val, alu); + } + emit_zext_ver(ctx, dst); + break; + /* dst = dst & src */ + /* dst = dst | src */ + /* dst = dst ^ src */ + /* dst = dst << src */ + case BPF_ALU | BPF_AND | BPF_X: + case BPF_ALU | BPF_OR | BPF_X: + case BPF_ALU | BPF_XOR | BPF_X: + case BPF_ALU | BPF_LSH | BPF_X: + emit_alu_r(ctx, dst, src, BPF_OP(code)); + emit_zext_ver(ctx, dst); + break; + /* dst = dst >> src */ + /* dst = dst >> src (arithmetic) */ + /* dst = dst + src */ + /* dst = dst - src */ + /* dst = dst * src */ + /* dst = dst / src */ + /* dst = dst % src */ + case BPF_ALU | BPF_RSH | BPF_X: + case BPF_ALU | BPF_ARSH | BPF_X: + case BPF_ALU | BPF_ADD | BPF_X: + case BPF_ALU | BPF_SUB | BPF_X: + case BPF_ALU | BPF_MUL | BPF_X: + case BPF_ALU | BPF_DIV | BPF_X: + case BPF_ALU | BPF_MOD | BPF_X: + emit_sext(ctx, dst, dst); + emit_sext(ctx, MIPS_R_T4, src); + emit_alu_r(ctx, dst, MIPS_R_T4, BPF_OP(code)); + emit_zext_ver(ctx, dst); + break; + /* dst = imm (64-bit) */ + case BPF_ALU64 | BPF_MOV | BPF_K: + emit_mov_i(ctx, dst, imm); + break; + /* dst = src (64-bit) */ + case BPF_ALU64 | BPF_MOV | BPF_X: + emit_mov_r(ctx, dst, src); + break; + /* dst = -dst (64-bit) */ + case BPF_ALU64 | BPF_NEG: + emit_alu_i64(ctx, dst, 0, BPF_NEG); + break; + /* dst = dst & imm (64-bit) */ + /* dst = dst | imm (64-bit) */ + /* dst = dst ^ imm (64-bit) */ + /* dst = dst << imm (64-bit) */ + /* dst = dst >> imm (64-bit) */ + /* dst = dst >> imm ((64-bit, arithmetic) */ + /* dst = dst + imm (64-bit) */ + /* dst = dst - imm (64-bit) */ + /* dst = dst * imm (64-bit) */ + /* dst = dst / imm (64-bit) */ + /* dst = dst % imm (64-bit) */ + case BPF_ALU64 | BPF_AND | BPF_K: + case BPF_ALU64 | BPF_OR | BPF_K: + case BPF_ALU64 | BPF_XOR | BPF_K: + case BPF_ALU64 | BPF_LSH | BPF_K: + case BPF_ALU64 | BPF_RSH | BPF_K: + case BPF_ALU64 | BPF_ARSH | BPF_K: + case BPF_ALU64 | BPF_ADD | BPF_K: + case BPF_ALU64 | BPF_SUB | BPF_K: + case BPF_ALU64 | BPF_MUL | BPF_K: + case BPF_ALU64 | BPF_DIV | BPF_K: + case BPF_ALU64 | BPF_MOD | BPF_K: + if (!valid_alu_i(BPF_OP(code), imm)) { + emit_mov_i(ctx, MIPS_R_T4, imm); + emit_alu_r64(ctx, dst, MIPS_R_T4, BPF_OP(code)); + } else if (rewrite_alu_i(BPF_OP(code), imm, &alu, &val)) { + emit_alu_i64(ctx, dst, val, alu); + } + break; + /* dst = dst & src (64-bit) */ + /* dst = dst | src (64-bit) */ + /* dst = dst ^ src (64-bit) */ + /* dst = dst << src (64-bit) */ + /* dst = dst >> src (64-bit) */ + /* dst = dst >> src (64-bit, arithmetic) */ + /* dst = dst + src (64-bit) */ + /* dst = dst - src (64-bit) */ + /* dst = dst * src (64-bit) */ + /* dst = dst / src (64-bit) */ + /* dst = dst % src (64-bit) */ + case BPF_ALU64 | BPF_AND | BPF_X: + case BPF_ALU64 | BPF_OR | BPF_X: + case BPF_ALU64 | BPF_XOR | BPF_X: + case BPF_ALU64 | BPF_LSH | BPF_X: + case BPF_ALU64 | BPF_RSH | BPF_X: + case BPF_ALU64 | BPF_ARSH | BPF_X: + case BPF_ALU64 | BPF_ADD | BPF_X: + case BPF_ALU64 | BPF_SUB | BPF_X: + case BPF_ALU64 | BPF_MUL | BPF_X: + case BPF_ALU64 | BPF_DIV | BPF_X: + case BPF_ALU64 | BPF_MOD | BPF_X: + emit_alu_r64(ctx, dst, src, BPF_OP(code)); + break; + /* dst = htole(dst) */ + /* dst = htobe(dst) */ + case BPF_ALU | BPF_END | BPF_FROM_LE: + case BPF_ALU | BPF_END | BPF_FROM_BE: + if (BPF_SRC(code) == +#ifdef __BIG_ENDIAN + BPF_FROM_LE +#else + BPF_FROM_BE +#endif + ) + emit_bswap_r64(ctx, dst, imm); + else + emit_trunc_r64(ctx, dst, imm); + break; + /* dst = imm64 */ + case BPF_LD | BPF_IMM | BPF_DW: + emit_mov_i64(ctx, dst, (u32)imm | ((u64)insn[1].imm << 32)); + return 1; + /* LDX: dst = *(size *)(src + off) */ + case BPF_LDX | BPF_MEM | BPF_W: + case BPF_LDX | BPF_MEM | BPF_H: + case BPF_LDX | BPF_MEM | BPF_B: + case BPF_LDX | BPF_MEM | BPF_DW: + emit_ldx(ctx, dst, src, off, BPF_SIZE(code)); + break; + /* ST: *(size *)(dst + off) = imm */ + case BPF_ST | BPF_MEM | BPF_W: + case BPF_ST | BPF_MEM | BPF_H: + case BPF_ST | BPF_MEM | BPF_B: + case BPF_ST | BPF_MEM | BPF_DW: + emit_mov_i(ctx, MIPS_R_T4, imm); + emit_stx(ctx, dst, MIPS_R_T4, off, BPF_SIZE(code)); + break; + /* STX: *(size *)(dst + off) = src */ + case BPF_STX | BPF_MEM | BPF_W: + case BPF_STX | BPF_MEM | BPF_H: + case BPF_STX | BPF_MEM | BPF_B: + case BPF_STX | BPF_MEM | BPF_DW: + emit_stx(ctx, dst, src, off, BPF_SIZE(code)); + break; + /* Speculation barrier */ + case BPF_ST | BPF_NOSPEC: + break; + /* Atomics */ + case BPF_STX | BPF_ATOMIC | BPF_W: + case BPF_STX | BPF_ATOMIC | BPF_DW: + switch (imm) { + case BPF_ADD: + case BPF_ADD | BPF_FETCH: + case BPF_AND: + case BPF_AND | BPF_FETCH: + case BPF_OR: + case BPF_OR | BPF_FETCH: + case BPF_XOR: + case BPF_XOR | BPF_FETCH: + case BPF_XCHG: + if (BPF_SIZE(code) == BPF_DW) { + emit_atomic_r64(ctx, dst, src, off, imm); + } else if (imm & BPF_FETCH) { + u8 tmp = dst; + + if (src == dst) { /* Don't overwrite dst */ + emit_mov_r(ctx, MIPS_R_T4, dst); + tmp = MIPS_R_T4; + } + emit_sext(ctx, src, src); + emit_atomic_r(ctx, tmp, src, off, imm); + emit_zext_ver(ctx, src); + } else { /* 32-bit, no fetch */ + emit_sext(ctx, MIPS_R_T4, src); + emit_atomic_r(ctx, dst, MIPS_R_T4, off, imm); + } + break; + case BPF_CMPXCHG: + if (BPF_SIZE(code) == BPF_DW) { + emit_cmpxchg_r64(ctx, dst, src, off); + } else { + u8 tmp = res; + + if (res == dst) /* Don't overwrite dst */ + tmp = MIPS_R_T4; + emit_sext(ctx, tmp, res); + emit_sext(ctx, MIPS_R_T5, src); + emit_cmpxchg_r(ctx, dst, MIPS_R_T5, tmp, off); + if (res == dst) /* Restore result */ + emit_mov_r(ctx, res, MIPS_R_T4); + /* Result zext inserted by verifier */ + } + break; + default: + goto notyet; + } + break; + /* PC += off if dst == src */ + /* PC += off if dst != src */ + /* PC += off if dst & src */ + /* PC += off if dst > src */ + /* PC += off if dst >= src */ + /* PC += off if dst < src */ + /* PC += off if dst <= src */ + /* PC += off if dst > src (signed) */ + /* PC += off if dst >= src (signed) */ + /* PC += off if dst < src (signed) */ + /* PC += off if dst <= src (signed) */ + case BPF_JMP32 | BPF_JEQ | BPF_X: + case BPF_JMP32 | BPF_JNE | BPF_X: + case BPF_JMP32 | BPF_JSET | BPF_X: + case BPF_JMP32 | BPF_JGT | BPF_X: + case BPF_JMP32 | BPF_JGE | BPF_X: + case BPF_JMP32 | BPF_JLT | BPF_X: + case BPF_JMP32 | BPF_JLE | BPF_X: + case BPF_JMP32 | BPF_JSGT | BPF_X: + case BPF_JMP32 | BPF_JSGE | BPF_X: + case BPF_JMP32 | BPF_JSLT | BPF_X: + case BPF_JMP32 | BPF_JSLE | BPF_X: + if (off == 0) + break; + setup_jmp_r(ctx, dst == src, BPF_OP(code), off, &jmp, &rel); + emit_sext(ctx, MIPS_R_T4, dst); /* Sign-extended dst */ + emit_sext(ctx, MIPS_R_T5, src); /* Sign-extended src */ + emit_jmp_r(ctx, MIPS_R_T4, MIPS_R_T5, rel, jmp); + if (finish_jmp(ctx, jmp, off) < 0) + goto toofar; + break; + /* PC += off if dst == imm */ + /* PC += off if dst != imm */ + /* PC += off if dst & imm */ + /* PC += off if dst > imm */ + /* PC += off if dst >= imm */ + /* PC += off if dst < imm */ + /* PC += off if dst <= imm */ + /* PC += off if dst > imm (signed) */ + /* PC += off if dst >= imm (signed) */ + /* PC += off if dst < imm (signed) */ + /* PC += off if dst <= imm (signed) */ + case BPF_JMP32 | BPF_JEQ | BPF_K: + case BPF_JMP32 | BPF_JNE | BPF_K: + case BPF_JMP32 | BPF_JSET | BPF_K: + case BPF_JMP32 | BPF_JGT | BPF_K: + case BPF_JMP32 | BPF_JGE | BPF_K: + case BPF_JMP32 | BPF_JLT | BPF_K: + case BPF_JMP32 | BPF_JLE | BPF_K: + case BPF_JMP32 | BPF_JSGT | BPF_K: + case BPF_JMP32 | BPF_JSGE | BPF_K: + case BPF_JMP32 | BPF_JSLT | BPF_K: + case BPF_JMP32 | BPF_JSLE | BPF_K: + if (off == 0) + break; + setup_jmp_i(ctx, imm, 32, BPF_OP(code), off, &jmp, &rel); + emit_sext(ctx, MIPS_R_T4, dst); /* Sign-extended dst */ + if (valid_jmp_i(jmp, imm)) { + emit_jmp_i(ctx, MIPS_R_T4, imm, rel, jmp); + } else { + /* Move large immediate to register, sign-extended */ + emit_mov_i(ctx, MIPS_R_T5, imm); + emit_jmp_r(ctx, MIPS_R_T4, MIPS_R_T5, rel, jmp); + } + if (finish_jmp(ctx, jmp, off) < 0) + goto toofar; + break; + /* PC += off if dst == src */ + /* PC += off if dst != src */ + /* PC += off if dst & src */ + /* PC += off if dst > src */ + /* PC += off if dst >= src */ + /* PC += off if dst < src */ + /* PC += off if dst <= src */ + /* PC += off if dst > src (signed) */ + /* PC += off if dst >= src (signed) */ + /* PC += off if dst < src (signed) */ + /* PC += off if dst <= src (signed) */ + case BPF_JMP | BPF_JEQ | BPF_X: + case BPF_JMP | BPF_JNE | BPF_X: + case BPF_JMP | BPF_JSET | BPF_X: + case BPF_JMP | BPF_JGT | BPF_X: + case BPF_JMP | BPF_JGE | BPF_X: + case BPF_JMP | BPF_JLT | BPF_X: + case BPF_JMP | BPF_JLE | BPF_X: + case BPF_JMP | BPF_JSGT | BPF_X: + case BPF_JMP | BPF_JSGE | BPF_X: + case BPF_JMP | BPF_JSLT | BPF_X: + case BPF_JMP | BPF_JSLE | BPF_X: + if (off == 0) + break; + setup_jmp_r(ctx, dst == src, BPF_OP(code), off, &jmp, &rel); + emit_jmp_r(ctx, dst, src, rel, jmp); + if (finish_jmp(ctx, jmp, off) < 0) + goto toofar; + break; + /* PC += off if dst == imm */ + /* PC += off if dst != imm */ + /* PC += off if dst & imm */ + /* PC += off if dst > imm */ + /* PC += off if dst >= imm */ + /* PC += off if dst < imm */ + /* PC += off if dst <= imm */ + /* PC += off if dst > imm (signed) */ + /* PC += off if dst >= imm (signed) */ + /* PC += off if dst < imm (signed) */ + /* PC += off if dst <= imm (signed) */ + case BPF_JMP | BPF_JEQ | BPF_K: + case BPF_JMP | BPF_JNE | BPF_K: + case BPF_JMP | BPF_JSET | BPF_K: + case BPF_JMP | BPF_JGT | BPF_K: + case BPF_JMP | BPF_JGE | BPF_K: + case BPF_JMP | BPF_JLT | BPF_K: + case BPF_JMP | BPF_JLE | BPF_K: + case BPF_JMP | BPF_JSGT | BPF_K: + case BPF_JMP | BPF_JSGE | BPF_K: + case BPF_JMP | BPF_JSLT | BPF_K: + case BPF_JMP | BPF_JSLE | BPF_K: + if (off == 0) + break; + setup_jmp_i(ctx, imm, 64, BPF_OP(code), off, &jmp, &rel); + if (valid_jmp_i(jmp, imm)) { + emit_jmp_i(ctx, dst, imm, rel, jmp); + } else { + /* Move large immediate to register */ + emit_mov_i(ctx, MIPS_R_T4, imm); + emit_jmp_r(ctx, dst, MIPS_R_T4, rel, jmp); + } + if (finish_jmp(ctx, jmp, off) < 0) + goto toofar; + break; + /* PC += off */ + case BPF_JMP | BPF_JA: + if (off == 0) + break; + if (emit_ja(ctx, off) < 0) + goto toofar; + break; + /* Tail call */ + case BPF_JMP | BPF_TAIL_CALL: + if (emit_tail_call(ctx) < 0) + goto invalid; + break; + /* Function call */ + case BPF_JMP | BPF_CALL: + if (emit_call(ctx, insn) < 0) + goto invalid; + break; + /* Function return */ + case BPF_JMP | BPF_EXIT: + /* + * Optimization: when last instruction is EXIT + * simply continue to epilogue. + */ + if (ctx->bpf_index == ctx->program->len - 1) + break; + if (emit_exit(ctx) < 0) + goto toofar; + break; + + default: +invalid: + pr_err_once("unknown opcode %02x\n", code); + return -EINVAL; +notyet: + pr_info_once("*** NOT YET: opcode %02x ***\n", code); + return -EFAULT; +toofar: + pr_info_once("*** TOO FAR: jump at %u opcode %02x ***\n", + ctx->bpf_index, code); + return -E2BIG; + } + return 0; +} diff --git a/arch/mips/net/ebpf_jit.c b/arch/mips/net/ebpf_jit.c deleted file mode 100644 index 3a73e9375712..000000000000 --- a/arch/mips/net/ebpf_jit.c +++ /dev/null @@ -1,1938 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Just-In-Time compiler for eBPF filters on MIPS - * - * Copyright (c) 2017 Cavium, Inc. - * - * Based on code from: - * - * Copyright (c) 2014 Imagination Technologies Ltd. - * Author: Markos Chandras <markos.chandras@imgtec.com> - */ - -#include <linux/bitops.h> -#include <linux/errno.h> -#include <linux/filter.h> -#include <linux/bpf.h> -#include <linux/slab.h> -#include <asm/bitops.h> -#include <asm/byteorder.h> -#include <asm/cacheflush.h> -#include <asm/cpu-features.h> -#include <asm/isa-rev.h> -#include <asm/uasm.h> - -/* Registers used by JIT */ -#define MIPS_R_ZERO 0 -#define MIPS_R_AT 1 -#define MIPS_R_V0 2 /* BPF_R0 */ -#define MIPS_R_V1 3 -#define MIPS_R_A0 4 /* BPF_R1 */ -#define MIPS_R_A1 5 /* BPF_R2 */ -#define MIPS_R_A2 6 /* BPF_R3 */ -#define MIPS_R_A3 7 /* BPF_R4 */ -#define MIPS_R_A4 8 /* BPF_R5 */ -#define MIPS_R_T4 12 /* BPF_AX */ -#define MIPS_R_T5 13 -#define MIPS_R_T6 14 -#define MIPS_R_T7 15 -#define MIPS_R_S0 16 /* BPF_R6 */ -#define MIPS_R_S1 17 /* BPF_R7 */ -#define MIPS_R_S2 18 /* BPF_R8 */ -#define MIPS_R_S3 19 /* BPF_R9 */ -#define MIPS_R_S4 20 /* BPF_TCC */ -#define MIPS_R_S5 21 -#define MIPS_R_S6 22 -#define MIPS_R_S7 23 -#define MIPS_R_T8 24 -#define MIPS_R_T9 25 -#define MIPS_R_SP 29 -#define MIPS_R_RA 31 - -/* eBPF flags */ -#define EBPF_SAVE_S0 BIT(0) -#define EBPF_SAVE_S1 BIT(1) -#define EBPF_SAVE_S2 BIT(2) -#define EBPF_SAVE_S3 BIT(3) -#define EBPF_SAVE_S4 BIT(4) -#define EBPF_SAVE_RA BIT(5) -#define EBPF_SEEN_FP BIT(6) -#define EBPF_SEEN_TC BIT(7) -#define EBPF_TCC_IN_V1 BIT(8) - -/* - * For the mips64 ISA, we need to track the value range or type for - * each JIT register. The BPF machine requires zero extended 32-bit - * values, but the mips64 ISA requires sign extended 32-bit values. - * At each point in the BPF program we track the state of every - * register so that we can zero extend or sign extend as the BPF - * semantics require. - */ -enum reg_val_type { - /* uninitialized */ - REG_UNKNOWN, - /* not known to be 32-bit compatible. */ - REG_64BIT, - /* 32-bit compatible, no truncation needed for 64-bit ops. */ - REG_64BIT_32BIT, - /* 32-bit compatible, need truncation for 64-bit ops. */ - REG_32BIT, - /* 32-bit no sign/zero extension needed. */ - REG_32BIT_POS -}; - -/* - * high bit of offsets indicates if long branch conversion done at - * this insn. - */ -#define OFFSETS_B_CONV BIT(31) - -/** - * struct jit_ctx - JIT context - * @skf: The sk_filter - * @stack_size: eBPF stack size - * @idx: Instruction index - * @flags: JIT flags - * @offsets: Instruction offsets - * @target: Memory location for the compiled filter - * @reg_val_types Packed enum reg_val_type for each register. - */ -struct jit_ctx { - const struct bpf_prog *skf; - int stack_size; - u32 idx; - u32 flags; - u32 *offsets; - u32 *target; - u64 *reg_val_types; - unsigned int long_b_conversion:1; - unsigned int gen_b_offsets:1; - unsigned int use_bbit_insns:1; -}; - -static void set_reg_val_type(u64 *rvt, int reg, enum reg_val_type type) -{ - *rvt &= ~(7ull << (reg * 3)); - *rvt |= ((u64)type << (reg * 3)); -} - -static enum reg_val_type get_reg_val_type(const struct jit_ctx *ctx, - int index, int reg) -{ - return (ctx->reg_val_types[index] >> (reg * 3)) & 7; -} - -/* Simply emit the instruction if the JIT memory space has been allocated */ -#define emit_instr_long(ctx, func64, func32, ...) \ -do { \ - if ((ctx)->target != NULL) { \ - u32 *p = &(ctx)->target[ctx->idx]; \ - if (IS_ENABLED(CONFIG_64BIT)) \ - uasm_i_##func64(&p, ##__VA_ARGS__); \ - else \ - uasm_i_##func32(&p, ##__VA_ARGS__); \ - } \ - (ctx)->idx++; \ -} while (0) - -#define emit_instr(ctx, func, ...) \ - emit_instr_long(ctx, func, func, ##__VA_ARGS__) - -static unsigned int j_target(struct jit_ctx *ctx, int target_idx) -{ - unsigned long target_va, base_va; - unsigned int r; - - if (!ctx->target) - return 0; - - base_va = (unsigned long)ctx->target; - target_va = base_va + (ctx->offsets[target_idx] & ~OFFSETS_B_CONV); - - if ((base_va & ~0x0ffffffful) != (target_va & ~0x0ffffffful)) - return (unsigned int)-1; - r = target_va & 0x0ffffffful; - return r; -} - -/* Compute the immediate value for PC-relative branches. */ -static u32 b_imm(unsigned int tgt, struct jit_ctx *ctx) -{ - if (!ctx->gen_b_offsets) - return 0; - - /* - * We want a pc-relative branch. tgt is the instruction offset - * we want to jump to. - - * Branch on MIPS: - * I: target_offset <- sign_extend(offset) - * I+1: PC += target_offset (delay slot) - * - * ctx->idx currently points to the branch instruction - * but the offset is added to the delay slot so we need - * to subtract 4. - */ - return (ctx->offsets[tgt] & ~OFFSETS_B_CONV) - - (ctx->idx * 4) - 4; -} - -enum which_ebpf_reg { - src_reg, - src_reg_no_fp, - dst_reg, - dst_reg_fp_ok -}; - -/* - * For eBPF, the register mapping naturally falls out of the - * requirements of eBPF and the MIPS n64 ABI. We don't maintain a - * separate frame pointer, so BPF_REG_10 relative accesses are - * adjusted to be $sp relative. - */ -static int ebpf_to_mips_reg(struct jit_ctx *ctx, - const struct bpf_insn *insn, - enum which_ebpf_reg w) -{ - int ebpf_reg = (w == src_reg || w == src_reg_no_fp) ? - insn->src_reg : insn->dst_reg; - - switch (ebpf_reg) { - case BPF_REG_0: - return MIPS_R_V0; - case BPF_REG_1: - return MIPS_R_A0; - case BPF_REG_2: - return MIPS_R_A1; - case BPF_REG_3: - return MIPS_R_A2; - case BPF_REG_4: - return MIPS_R_A3; - case BPF_REG_5: - return MIPS_R_A4; - case BPF_REG_6: - ctx->flags |= EBPF_SAVE_S0; - return MIPS_R_S0; - case BPF_REG_7: - ctx->flags |= EBPF_SAVE_S1; - return MIPS_R_S1; - case BPF_REG_8: - ctx->flags |= EBPF_SAVE_S2; - return MIPS_R_S2; - case BPF_REG_9: - ctx->flags |= EBPF_SAVE_S3; - return MIPS_R_S3; - case BPF_REG_10: - if (w == dst_reg || w == src_reg_no_fp) - goto bad_reg; - ctx->flags |= EBPF_SEEN_FP; - /* - * Needs special handling, return something that - * cannot be clobbered just in case. - */ - return MIPS_R_ZERO; - case BPF_REG_AX: - return MIPS_R_T4; - default: -bad_reg: - WARN(1, "Illegal bpf reg: %d\n", ebpf_reg); - return -EINVAL; - } -} -/* - * eBPF stack frame will be something like: - * - * Entry $sp ------> +--------------------------------+ - * | $ra (optional) | - * +--------------------------------+ - * | $s0 (optional) | - * +--------------------------------+ - * | $s1 (optional) | - * +--------------------------------+ - * | $s2 (optional) | - * +--------------------------------+ - * | $s3 (optional) | - * +--------------------------------+ - * | $s4 (optional) | - * +--------------------------------+ - * | tmp-storage (if $ra saved) | - * $sp + tmp_offset --> +--------------------------------+ <--BPF_REG_10 - * | BPF_REG_10 relative storage | - * | MAX_BPF_STACK (optional) | - * | . | - * | . | - * | . | - * $sp --------> +--------------------------------+ - * - * If BPF_REG_10 is never referenced, then the MAX_BPF_STACK sized - * area is not allocated. - */ -static int gen_int_prologue(struct jit_ctx *ctx) -{ - int stack_adjust = 0; - int store_offset; - int locals_size; - - if (ctx->flags & EBPF_SAVE_RA) - /* - * If RA we are doing a function call and may need - * extra 8-byte tmp area. - */ - stack_adjust += 2 * sizeof(long); - if (ctx->flags & EBPF_SAVE_S0) - stack_adjust += sizeof(long); - if (ctx->flags & EBPF_SAVE_S1) - stack_adjust += sizeof(long); - if (ctx->flags & EBPF_SAVE_S2) - stack_adjust += sizeof(long); - if (ctx->flags & EBPF_SAVE_S3) - stack_adjust += sizeof(long); - if (ctx->flags & EBPF_SAVE_S4) - stack_adjust += sizeof(long); - - BUILD_BUG_ON(MAX_BPF_STACK & 7); - locals_size = (ctx->flags & EBPF_SEEN_FP) ? MAX_BPF_STACK : 0; - - stack_adjust += locals_size; - - ctx->stack_size = stack_adjust; - - /* - * First instruction initializes the tail call count (TCC). - * On tail call we skip this instruction, and the TCC is - * passed in $v1 from the caller. - */ - emit_instr(ctx, addiu, MIPS_R_V1, MIPS_R_ZERO, MAX_TAIL_CALL_CNT); - if (stack_adjust) - emit_instr_long(ctx, daddiu, addiu, - MIPS_R_SP, MIPS_R_SP, -stack_adjust); - else - return 0; - - store_offset = stack_adjust - sizeof(long); - - if (ctx->flags & EBPF_SAVE_RA) { - emit_instr_long(ctx, sd, sw, - MIPS_R_RA, store_offset, MIPS_R_SP); - store_offset -= sizeof(long); - } - if (ctx->flags & EBPF_SAVE_S0) { - emit_instr_long(ctx, sd, sw, - MIPS_R_S0, store_offset, MIPS_R_SP); - store_offset -= sizeof(long); - } - if (ctx->flags & EBPF_SAVE_S1) { - emit_instr_long(ctx, sd, sw, - MIPS_R_S1, store_offset, MIPS_R_SP); - store_offset -= sizeof(long); - } - if (ctx->flags & EBPF_SAVE_S2) { - emit_instr_long(ctx, sd, sw, - MIPS_R_S2, store_offset, MIPS_R_SP); - store_offset -= sizeof(long); - } - if (ctx->flags & EBPF_SAVE_S3) { - emit_instr_long(ctx, sd, sw, - MIPS_R_S3, store_offset, MIPS_R_SP); - store_offset -= sizeof(long); - } - if (ctx->flags & EBPF_SAVE_S4) { - emit_instr_long(ctx, sd, sw, - MIPS_R_S4, store_offset, MIPS_R_SP); - store_offset -= sizeof(long); - } - - if ((ctx->flags & EBPF_SEEN_TC) && !(ctx->flags & EBPF_TCC_IN_V1)) - emit_instr_long(ctx, daddu, addu, - MIPS_R_S4, MIPS_R_V1, MIPS_R_ZERO); - - return 0; -} - -static int build_int_epilogue(struct jit_ctx *ctx, int dest_reg) -{ - const struct bpf_prog *prog = ctx->skf; - int stack_adjust = ctx->stack_size; - int store_offset = stack_adjust - sizeof(long); - enum reg_val_type td; - int r0 = MIPS_R_V0; - - if (dest_reg == MIPS_R_RA) { - /* Don't let zero extended value escape. */ - td = get_reg_val_type(ctx, prog->len, BPF_REG_0); - if (td == REG_64BIT) - emit_instr(ctx, sll, r0, r0, 0); - } - - if (ctx->flags & EBPF_SAVE_RA) { - emit_instr_long(ctx, ld, lw, - MIPS_R_RA, store_offset, MIPS_R_SP); - store_offset -= sizeof(long); - } - if (ctx->flags & EBPF_SAVE_S0) { - emit_instr_long(ctx, ld, lw, - MIPS_R_S0, store_offset, MIPS_R_SP); - store_offset -= sizeof(long); - } - if (ctx->flags & EBPF_SAVE_S1) { - emit_instr_long(ctx, ld, lw, - MIPS_R_S1, store_offset, MIPS_R_SP); - store_offset -= sizeof(long); - } - if (ctx->flags & EBPF_SAVE_S2) { - emit_instr_long(ctx, ld, lw, - MIPS_R_S2, store_offset, MIPS_R_SP); - store_offset -= sizeof(long); - } - if (ctx->flags & EBPF_SAVE_S3) { - emit_instr_long(ctx, ld, lw, - MIPS_R_S3, store_offset, MIPS_R_SP); - store_offset -= sizeof(long); - } - if (ctx->flags & EBPF_SAVE_S4) { - emit_instr_long(ctx, ld, lw, - MIPS_R_S4, store_offset, MIPS_R_SP); - store_offset -= sizeof(long); - } - emit_instr(ctx, jr, dest_reg); - - if (stack_adjust) - emit_instr_long(ctx, daddiu, addiu, - MIPS_R_SP, MIPS_R_SP, stack_adjust); - else - emit_instr(ctx, nop); - - return 0; -} - -static void gen_imm_to_reg(const struct bpf_insn *insn, int reg, - struct jit_ctx *ctx) -{ - if (insn->imm >= S16_MIN && insn->imm <= S16_MAX) { - emit_instr(ctx, addiu, reg, MIPS_R_ZERO, insn->imm); - } else { - int lower = (s16)(insn->imm & 0xffff); - int upper = insn->imm - lower; - - emit_instr(ctx, lui, reg, upper >> 16); - emit_instr(ctx, addiu, reg, reg, lower); - } -} - -static int gen_imm_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, - int idx) -{ - int upper_bound, lower_bound; - int dst = ebpf_to_mips_reg(ctx, insn, dst_reg); - - if (dst < 0) - return dst; - - switch (BPF_OP(insn->code)) { - case BPF_MOV: - case BPF_ADD: - upper_bound = S16_MAX; - lower_bound = S16_MIN; - break; - case BPF_SUB: - upper_bound = -(int)S16_MIN; - lower_bound = -(int)S16_MAX; - break; - case BPF_AND: - case BPF_OR: - case BPF_XOR: - upper_bound = 0xffff; - lower_bound = 0; - break; - case BPF_RSH: - case BPF_LSH: - case BPF_ARSH: - /* Shift amounts are truncated, no need for bounds */ - upper_bound = S32_MAX; - lower_bound = S32_MIN; - break; - default: - return -EINVAL; - } - - /* - * Immediate move clobbers the register, so no sign/zero - * extension needed. - */ - if (BPF_CLASS(insn->code) == BPF_ALU64 && - BPF_OP(insn->code) != BPF_MOV && - get_reg_val_type(ctx, idx, insn->dst_reg) == REG_32BIT) - emit_instr(ctx, dinsu, dst, MIPS_R_ZERO, 32, 32); - /* BPF_ALU | BPF_LSH doesn't need separate sign extension */ - if (BPF_CLASS(insn->code) == BPF_ALU && - BPF_OP(insn->code) != BPF_LSH && - BPF_OP(insn->code) != BPF_MOV && - get_reg_val_type(ctx, idx, insn->dst_reg) != REG_32BIT) - emit_instr(ctx, sll, dst, dst, 0); - - if (insn->imm >= lower_bound && insn->imm <= upper_bound) { - /* single insn immediate case */ - switch (BPF_OP(insn->code) | BPF_CLASS(insn->code)) { - case BPF_ALU64 | BPF_MOV: - emit_instr(ctx, daddiu, dst, MIPS_R_ZERO, insn->imm); - break; - case BPF_ALU64 | BPF_AND: - case BPF_ALU | BPF_AND: - emit_instr(ctx, andi, dst, dst, insn->imm); - break; - case BPF_ALU64 | BPF_OR: - case BPF_ALU | BPF_OR: - emit_instr(ctx, ori, dst, dst, insn->imm); - break; - case BPF_ALU64 | BPF_XOR: - case BPF_ALU | BPF_XOR: - emit_instr(ctx, xori, dst, dst, insn->imm); - break; - case BPF_ALU64 | BPF_ADD: - emit_instr(ctx, daddiu, dst, dst, insn->imm); - break; - case BPF_ALU64 | BPF_SUB: - emit_instr(ctx, daddiu, dst, dst, -insn->imm); - break; - case BPF_ALU64 | BPF_RSH: - emit_instr(ctx, dsrl_safe, dst, dst, insn->imm & 0x3f); - break; - case BPF_ALU | BPF_RSH: - emit_instr(ctx, srl, dst, dst, insn->imm & 0x1f); - break; - case BPF_ALU64 | BPF_LSH: - emit_instr(ctx, dsll_safe, dst, dst, insn->imm & 0x3f); - break; - case BPF_ALU | BPF_LSH: - emit_instr(ctx, sll, dst, dst, insn->imm & 0x1f); - break; - case BPF_ALU64 | BPF_ARSH: - emit_instr(ctx, dsra_safe, dst, dst, insn->imm & 0x3f); - break; - case BPF_ALU | BPF_ARSH: - emit_instr(ctx, sra, dst, dst, insn->imm & 0x1f); - break; - case BPF_ALU | BPF_MOV: - emit_instr(ctx, addiu, dst, MIPS_R_ZERO, insn->imm); - break; - case BPF_ALU | BPF_ADD: - emit_instr(ctx, addiu, dst, dst, insn->imm); - break; - case BPF_ALU | BPF_SUB: - emit_instr(ctx, addiu, dst, dst, -insn->imm); - break; - default: - return -EINVAL; - } - } else { - /* multi insn immediate case */ - if (BPF_OP(insn->code) == BPF_MOV) { - gen_imm_to_reg(insn, dst, ctx); - } else { - gen_imm_to_reg(insn, MIPS_R_AT, ctx); - switch (BPF_OP(insn->code) | BPF_CLASS(insn->code)) { - case BPF_ALU64 | BPF_AND: - case BPF_ALU | BPF_AND: - emit_instr(ctx, and, dst, dst, MIPS_R_AT); - break; - case BPF_ALU64 | BPF_OR: - case BPF_ALU | BPF_OR: - emit_instr(ctx, or, dst, dst, MIPS_R_AT); - break; - case BPF_ALU64 | BPF_XOR: - case BPF_ALU | BPF_XOR: - emit_instr(ctx, xor, dst, dst, MIPS_R_AT); - break; - case BPF_ALU64 | BPF_ADD: - emit_instr(ctx, daddu, dst, dst, MIPS_R_AT); - break; - case BPF_ALU64 | BPF_SUB: - emit_instr(ctx, dsubu, dst, dst, MIPS_R_AT); - break; - case BPF_ALU | BPF_ADD: - emit_instr(ctx, addu, dst, dst, MIPS_R_AT); - break; - case BPF_ALU | BPF_SUB: - emit_instr(ctx, subu, dst, dst, MIPS_R_AT); - break; - default: - return -EINVAL; - } - } - } - - return 0; -} - -static void emit_const_to_reg(struct jit_ctx *ctx, int dst, u64 value) -{ - if (value >= 0xffffffffffff8000ull || value < 0x8000ull) { - emit_instr(ctx, daddiu, dst, MIPS_R_ZERO, (int)value); - } else if (value >= 0xffffffff80000000ull || - (value < 0x80000000 && value > 0xffff)) { - emit_instr(ctx, lui, dst, (s32)(s16)(value >> 16)); - emit_instr(ctx, ori, dst, dst, (unsigned int)(value & 0xffff)); - } else { - int i; - bool seen_part = false; - int needed_shift = 0; - - for (i = 0; i < 4; i++) { - u64 part = (value >> (16 * (3 - i))) & 0xffff; - - if (seen_part && needed_shift > 0 && (part || i == 3)) { - emit_instr(ctx, dsll_safe, dst, dst, needed_shift); - needed_shift = 0; - } - if (part) { - if (i == 0 || (!seen_part && i < 3 && part < 0x8000)) { - emit_instr(ctx, lui, dst, (s32)(s16)part); - needed_shift = -16; - } else { - emit_instr(ctx, ori, dst, - seen_part ? dst : MIPS_R_ZERO, - (unsigned int)part); - } - seen_part = true; - } - if (seen_part) - needed_shift += 16; - } - } -} - -static int emit_bpf_tail_call(struct jit_ctx *ctx, int this_idx) -{ - int off, b_off; - int tcc_reg; - - ctx->flags |= EBPF_SEEN_TC; - /* - * if (index >= array->map.max_entries) - * goto out; - */ - off = offsetof(struct bpf_array, map.max_entries); - emit_instr(ctx, lwu, MIPS_R_T5, off, MIPS_R_A1); - emit_instr(ctx, sltu, MIPS_R_AT, MIPS_R_T5, MIPS_R_A2); - b_off = b_imm(this_idx + 1, ctx); - emit_instr(ctx, bne, MIPS_R_AT, MIPS_R_ZERO, b_off); - /* - * if (TCC-- < 0) - * goto out; - */ - /* Delay slot */ - tcc_reg = (ctx->flags & EBPF_TCC_IN_V1) ? MIPS_R_V1 : MIPS_R_S4; - emit_instr(ctx, daddiu, MIPS_R_T5, tcc_reg, -1); - b_off = b_imm(this_idx + 1, ctx); - emit_instr(ctx, bltz, tcc_reg, b_off); - /* - * prog = array->ptrs[index]; - * if (prog == NULL) - * goto out; - */ - /* Delay slot */ - emit_instr(ctx, dsll, MIPS_R_T8, MIPS_R_A2, 3); - emit_instr(ctx, daddu, MIPS_R_T8, MIPS_R_T8, MIPS_R_A1); - off = offsetof(struct bpf_array, ptrs); - emit_instr(ctx, ld, MIPS_R_AT, off, MIPS_R_T8); - b_off = b_imm(this_idx + 1, ctx); - emit_instr(ctx, beq, MIPS_R_AT, MIPS_R_ZERO, b_off); - /* Delay slot */ - emit_instr(ctx, nop); - - /* goto *(prog->bpf_func + 4); */ - off = offsetof(struct bpf_prog, bpf_func); - emit_instr(ctx, ld, MIPS_R_T9, off, MIPS_R_AT); - /* All systems are go... propagate TCC */ - emit_instr(ctx, daddu, MIPS_R_V1, MIPS_R_T5, MIPS_R_ZERO); - /* Skip first instruction (TCC initialization) */ - emit_instr(ctx, daddiu, MIPS_R_T9, MIPS_R_T9, 4); - return build_int_epilogue(ctx, MIPS_R_T9); -} - -static bool is_bad_offset(int b_off) -{ - return b_off > 0x1ffff || b_off < -0x20000; -} - -/* Returns the number of insn slots consumed. */ -static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, - int this_idx, int exit_idx) -{ - int src, dst, r, td, ts, mem_off, b_off; - bool need_swap, did_move, cmp_eq; - unsigned int target = 0; - u64 t64; - s64 t64s; - int bpf_op = BPF_OP(insn->code); - - if (IS_ENABLED(CONFIG_32BIT) && ((BPF_CLASS(insn->code) == BPF_ALU64) - || (bpf_op == BPF_DW))) - return -EINVAL; - - switch (insn->code) { - case BPF_ALU64 | BPF_ADD | BPF_K: /* ALU64_IMM */ - case BPF_ALU64 | BPF_SUB | BPF_K: /* ALU64_IMM */ - case BPF_ALU64 | BPF_OR | BPF_K: /* ALU64_IMM */ - case BPF_ALU64 | BPF_AND | BPF_K: /* ALU64_IMM */ - case BPF_ALU64 | BPF_LSH | BPF_K: /* ALU64_IMM */ - case BPF_ALU64 | BPF_RSH | BPF_K: /* ALU64_IMM */ - case BPF_ALU64 | BPF_XOR | BPF_K: /* ALU64_IMM */ - case BPF_ALU64 | BPF_ARSH | BPF_K: /* ALU64_IMM */ - case BPF_ALU64 | BPF_MOV | BPF_K: /* ALU64_IMM */ - case BPF_ALU | BPF_MOV | BPF_K: /* ALU32_IMM */ - case BPF_ALU | BPF_ADD | BPF_K: /* ALU32_IMM */ - case BPF_ALU | BPF_SUB | BPF_K: /* ALU32_IMM */ - case BPF_ALU | BPF_OR | BPF_K: /* ALU64_IMM */ - case BPF_ALU | BPF_AND | BPF_K: /* ALU64_IMM */ - case BPF_ALU | BPF_LSH | BPF_K: /* ALU64_IMM */ - case BPF_ALU | BPF_RSH | BPF_K: /* ALU64_IMM */ - case BPF_ALU | BPF_XOR | BPF_K: /* ALU64_IMM */ - case BPF_ALU | BPF_ARSH | BPF_K: /* ALU64_IMM */ - r = gen_imm_insn(insn, ctx, this_idx); - if (r < 0) - return r; - break; - case BPF_ALU64 | BPF_MUL | BPF_K: /* ALU64_IMM */ - dst = ebpf_to_mips_reg(ctx, insn, dst_reg); - if (dst < 0) - return dst; - if (get_reg_val_type(ctx, this_idx, insn->dst_reg) == REG_32BIT) - emit_instr(ctx, dinsu, dst, MIPS_R_ZERO, 32, 32); - if (insn->imm == 1) /* Mult by 1 is a nop */ - break; - gen_imm_to_reg(insn, MIPS_R_AT, ctx); - if (MIPS_ISA_REV >= 6) { - emit_instr(ctx, dmulu, dst, dst, MIPS_R_AT); - } else { - emit_instr(ctx, dmultu, MIPS_R_AT, dst); - emit_instr(ctx, mflo, dst); - } - break; - case BPF_ALU64 | BPF_NEG | BPF_K: /* ALU64_IMM */ - dst = ebpf_to_mips_reg(ctx, insn, dst_reg); - if (dst < 0) - return dst; - if (get_reg_val_type(ctx, this_idx, insn->dst_reg) == REG_32BIT) - emit_instr(ctx, dinsu, dst, MIPS_R_ZERO, 32, 32); - emit_instr(ctx, dsubu, dst, MIPS_R_ZERO, dst); - break; - case BPF_ALU | BPF_MUL | BPF_K: /* ALU_IMM */ - dst = ebpf_to_mips_reg(ctx, insn, dst_reg); - if (dst < 0) - return dst; - td = get_reg_val_type(ctx, this_idx, insn->dst_reg); - if (td == REG_64BIT) { - /* sign extend */ - emit_instr(ctx, sll, dst, dst, 0); - } - if (insn->imm == 1) /* Mult by 1 is a nop */ - break; - gen_imm_to_reg(insn, MIPS_R_AT, ctx); - if (MIPS_ISA_REV >= 6) { - emit_instr(ctx, mulu, dst, dst, MIPS_R_AT); - } else { - emit_instr(ctx, multu, dst, MIPS_R_AT); - emit_instr(ctx, mflo, dst); - } - break; - case BPF_ALU | BPF_NEG | BPF_K: /* ALU_IMM */ - dst = ebpf_to_mips_reg(ctx, insn, dst_reg); - if (dst < 0) - return dst; - td = get_reg_val_type(ctx, this_idx, insn->dst_reg); - if (td == REG_64BIT) { - /* sign extend */ - emit_instr(ctx, sll, dst, dst, 0); - } - emit_instr(ctx, subu, dst, MIPS_R_ZERO, dst); - break; - case BPF_ALU | BPF_DIV | BPF_K: /* ALU_IMM */ - case BPF_ALU | BPF_MOD | BPF_K: /* ALU_IMM */ - if (insn->imm == 0) - return -EINVAL; - dst = ebpf_to_mips_reg(ctx, insn, dst_reg); - if (dst < 0) - return dst; - td = get_reg_val_type(ctx, this_idx, insn->dst_reg); - if (td == REG_64BIT) - /* sign extend */ - emit_instr(ctx, sll, dst, dst, 0); - if (insn->imm == 1) { - /* div by 1 is a nop, mod by 1 is zero */ - if (bpf_op == BPF_MOD) - emit_instr(ctx, addu, dst, MIPS_R_ZERO, MIPS_R_ZERO); - break; - } - gen_imm_to_reg(insn, MIPS_R_AT, ctx); - if (MIPS_ISA_REV >= 6) { - if (bpf_op == BPF_DIV) - emit_instr(ctx, divu_r6, dst, dst, MIPS_R_AT); - else - emit_instr(ctx, modu, dst, dst, MIPS_R_AT); - break; - } - emit_instr(ctx, divu, dst, MIPS_R_AT); - if (bpf_op == BPF_DIV) - emit_instr(ctx, mflo, dst); - else - emit_instr(ctx, mfhi, dst); - break; - case BPF_ALU64 | BPF_DIV | BPF_K: /* ALU_IMM */ - case BPF_ALU64 | BPF_MOD | BPF_K: /* ALU_IMM */ - if (insn->imm == 0) - return -EINVAL; - dst = ebpf_to_mips_reg(ctx, insn, dst_reg); - if (dst < 0) - return dst; - if (get_reg_val_type(ctx, this_idx, insn->dst_reg) == REG_32BIT) - emit_instr(ctx, dinsu, dst, MIPS_R_ZERO, 32, 32); - if (insn->imm == 1) { - /* div by 1 is a nop, mod by 1 is zero */ - if (bpf_op == BPF_MOD) - emit_instr(ctx, addu, dst, MIPS_R_ZERO, MIPS_R_ZERO); - break; - } - gen_imm_to_reg(insn, MIPS_R_AT, ctx); - if (MIPS_ISA_REV >= 6) { - if (bpf_op == BPF_DIV) - emit_instr(ctx, ddivu_r6, dst, dst, MIPS_R_AT); - else - emit_instr(ctx, modu, dst, dst, MIPS_R_AT); - break; - } - emit_instr(ctx, ddivu, dst, MIPS_R_AT); - if (bpf_op == BPF_DIV) - emit_instr(ctx, mflo, dst); - else - emit_instr(ctx, mfhi, dst); - break; - case BPF_ALU64 | BPF_MOV | BPF_X: /* ALU64_REG */ - case BPF_ALU64 | BPF_ADD | BPF_X: /* ALU64_REG */ - case BPF_ALU64 | BPF_SUB | BPF_X: /* ALU64_REG */ - case BPF_ALU64 | BPF_XOR | BPF_X: /* ALU64_REG */ - case BPF_ALU64 | BPF_OR | BPF_X: /* ALU64_REG */ - case BPF_ALU64 | BPF_AND | BPF_X: /* ALU64_REG */ - case BPF_ALU64 | BPF_MUL | BPF_X: /* ALU64_REG */ - case BPF_ALU64 | BPF_DIV | BPF_X: /* ALU64_REG */ - case BPF_ALU64 | BPF_MOD | BPF_X: /* ALU64_REG */ - case BPF_ALU64 | BPF_LSH | BPF_X: /* ALU64_REG */ - case BPF_ALU64 | BPF_RSH | BPF_X: /* ALU64_REG */ - case BPF_ALU64 | BPF_ARSH | BPF_X: /* ALU64_REG */ - src = ebpf_to_mips_reg(ctx, insn, src_reg); - dst = ebpf_to_mips_reg(ctx, insn, dst_reg); - if (src < 0 || dst < 0) - return -EINVAL; - if (get_reg_val_type(ctx, this_idx, insn->dst_reg) == REG_32BIT) - emit_instr(ctx, dinsu, dst, MIPS_R_ZERO, 32, 32); - did_move = false; - if (insn->src_reg == BPF_REG_10) { - if (bpf_op == BPF_MOV) { - emit_instr(ctx, daddiu, dst, MIPS_R_SP, MAX_BPF_STACK); - did_move = true; - } else { - emit_instr(ctx, daddiu, MIPS_R_AT, MIPS_R_SP, MAX_BPF_STACK); - src = MIPS_R_AT; - } - } else if (get_reg_val_type(ctx, this_idx, insn->src_reg) == REG_32BIT) { - int tmp_reg = MIPS_R_AT; - - if (bpf_op == BPF_MOV) { - tmp_reg = dst; - did_move = true; - } - emit_instr(ctx, daddu, tmp_reg, src, MIPS_R_ZERO); - emit_instr(ctx, dinsu, tmp_reg, MIPS_R_ZERO, 32, 32); - src = MIPS_R_AT; - } - switch (bpf_op) { - case BPF_MOV: - if (!did_move) - emit_instr(ctx, daddu, dst, src, MIPS_R_ZERO); - break; - case BPF_ADD: - emit_instr(ctx, daddu, dst, dst, src); - break; - case BPF_SUB: - emit_instr(ctx, dsubu, dst, dst, src); - break; - case BPF_XOR: - emit_instr(ctx, xor, dst, dst, src); - break; - case BPF_OR: - emit_instr(ctx, or, dst, dst, src); - break; - case BPF_AND: - emit_instr(ctx, and, dst, dst, src); - break; - case BPF_MUL: - if (MIPS_ISA_REV >= 6) { - emit_instr(ctx, dmulu, dst, dst, src); - } else { - emit_instr(ctx, dmultu, dst, src); - emit_instr(ctx, mflo, dst); - } - break; - case BPF_DIV: - case BPF_MOD: - if (MIPS_ISA_REV >= 6) { - if (bpf_op == BPF_DIV) - emit_instr(ctx, ddivu_r6, - dst, dst, src); - else - emit_instr(ctx, modu, dst, dst, src); - break; - } - emit_instr(ctx, ddivu, dst, src); - if (bpf_op == BPF_DIV) - emit_instr(ctx, mflo, dst); - else - emit_instr(ctx, mfhi, dst); - break; - case BPF_LSH: - emit_instr(ctx, dsllv, dst, dst, src); - break; - case BPF_RSH: - emit_instr(ctx, dsrlv, dst, dst, src); - break; - case BPF_ARSH: - emit_instr(ctx, dsrav, dst, dst, src); - break; - default: - pr_err("ALU64_REG NOT HANDLED\n"); - return -EINVAL; - } - break; - case BPF_ALU | BPF_MOV | BPF_X: /* ALU_REG */ - case BPF_ALU | BPF_ADD | BPF_X: /* ALU_REG */ - case BPF_ALU | BPF_SUB | BPF_X: /* ALU_REG */ - case BPF_ALU | BPF_XOR | BPF_X: /* ALU_REG */ - case BPF_ALU | BPF_OR | BPF_X: /* ALU_REG */ - case BPF_ALU | BPF_AND | BPF_X: /* ALU_REG */ - case BPF_ALU | BPF_MUL | BPF_X: /* ALU_REG */ - case BPF_ALU | BPF_DIV | BPF_X: /* ALU_REG */ - case BPF_ALU | BPF_MOD | BPF_X: /* ALU_REG */ - case BPF_ALU | BPF_LSH | BPF_X: /* ALU_REG */ - case BPF_ALU | BPF_RSH | BPF_X: /* ALU_REG */ - case BPF_ALU | BPF_ARSH | BPF_X: /* ALU_REG */ - src = ebpf_to_mips_reg(ctx, insn, src_reg_no_fp); - dst = ebpf_to_mips_reg(ctx, insn, dst_reg); - if (src < 0 || dst < 0) - return -EINVAL; - td = get_reg_val_type(ctx, this_idx, insn->dst_reg); - if (td == REG_64BIT) { - /* sign extend */ - emit_instr(ctx, sll, dst, dst, 0); - } - did_move = false; - ts = get_reg_val_type(ctx, this_idx, insn->src_reg); - if (ts == REG_64BIT) { - int tmp_reg = MIPS_R_AT; - - if (bpf_op == BPF_MOV) { - tmp_reg = dst; - did_move = true; - } - /* sign extend */ - emit_instr(ctx, sll, tmp_reg, src, 0); - src = MIPS_R_AT; - } - switch (bpf_op) { - case BPF_MOV: - if (!did_move) - emit_instr(ctx, addu, dst, src, MIPS_R_ZERO); - break; - case BPF_ADD: - emit_instr(ctx, addu, dst, dst, src); - break; - case BPF_SUB: - emit_instr(ctx, subu, dst, dst, src); - break; - case BPF_XOR: - emit_instr(ctx, xor, dst, dst, src); - break; - case BPF_OR: - emit_instr(ctx, or, dst, dst, src); - break; - case BPF_AND: - emit_instr(ctx, and, dst, dst, src); - break; - case BPF_MUL: - emit_instr(ctx, mul, dst, dst, src); - break; - case BPF_DIV: - case BPF_MOD: - if (MIPS_ISA_REV >= 6) { - if (bpf_op == BPF_DIV) - emit_instr(ctx, divu_r6, dst, dst, src); - else - emit_instr(ctx, modu, dst, dst, src); - break; - } - emit_instr(ctx, divu, dst, src); - if (bpf_op == BPF_DIV) - emit_instr(ctx, mflo, dst); - else - emit_instr(ctx, mfhi, dst); - break; - case BPF_LSH: - emit_instr(ctx, sllv, dst, dst, src); - break; - case BPF_RSH: - emit_instr(ctx, srlv, dst, dst, src); - break; - case BPF_ARSH: - emit_instr(ctx, srav, dst, dst, src); - break; - default: - pr_err("ALU_REG NOT HANDLED\n"); - return -EINVAL; - } - break; - case BPF_JMP | BPF_EXIT: - if (this_idx + 1 < exit_idx) { - b_off = b_imm(exit_idx, ctx); - if (is_bad_offset(b_off)) - return -E2BIG; - emit_instr(ctx, beq, MIPS_R_ZERO, MIPS_R_ZERO, b_off); - emit_instr(ctx, nop); - } - break; - case BPF_JMP | BPF_JEQ | BPF_K: /* JMP_IMM */ - case BPF_JMP | BPF_JNE | BPF_K: /* JMP_IMM */ - cmp_eq = (bpf_op == BPF_JEQ); - dst = ebpf_to_mips_reg(ctx, insn, dst_reg_fp_ok); - if (dst < 0) - return dst; - if (insn->imm == 0) { - src = MIPS_R_ZERO; - } else { - gen_imm_to_reg(insn, MIPS_R_AT, ctx); - src = MIPS_R_AT; - } - goto jeq_common; - case BPF_JMP | BPF_JEQ | BPF_X: /* JMP_REG */ - case BPF_JMP | BPF_JNE | BPF_X: - case BPF_JMP | BPF_JSLT | BPF_X: - case BPF_JMP | BPF_JSLE | BPF_X: - case BPF_JMP | BPF_JSGT | BPF_X: - case BPF_JMP | BPF_JSGE | BPF_X: - case BPF_JMP | BPF_JLT | BPF_X: - case BPF_JMP | BPF_JLE | BPF_X: - case BPF_JMP | BPF_JGT | BPF_X: - case BPF_JMP | BPF_JGE | BPF_X: - case BPF_JMP | BPF_JSET | BPF_X: - src = ebpf_to_mips_reg(ctx, insn, src_reg_no_fp); - dst = ebpf_to_mips_reg(ctx, insn, dst_reg); - if (src < 0 || dst < 0) - return -EINVAL; - td = get_reg_val_type(ctx, this_idx, insn->dst_reg); - ts = get_reg_val_type(ctx, this_idx, insn->src_reg); - if (td == REG_32BIT && ts != REG_32BIT) { - emit_instr(ctx, sll, MIPS_R_AT, src, 0); - src = MIPS_R_AT; - } else if (ts == REG_32BIT && td != REG_32BIT) { - emit_instr(ctx, sll, MIPS_R_AT, dst, 0); - dst = MIPS_R_AT; - } - if (bpf_op == BPF_JSET) { - emit_instr(ctx, and, MIPS_R_AT, dst, src); - cmp_eq = false; - dst = MIPS_R_AT; - src = MIPS_R_ZERO; - } else if (bpf_op == BPF_JSGT || bpf_op == BPF_JSLE) { - emit_instr(ctx, dsubu, MIPS_R_AT, dst, src); - if ((insn + 1)->code == (BPF_JMP | BPF_EXIT) && insn->off == 1) { - b_off = b_imm(exit_idx, ctx); - if (is_bad_offset(b_off)) - return -E2BIG; - if (bpf_op == BPF_JSGT) - emit_instr(ctx, blez, MIPS_R_AT, b_off); - else - emit_instr(ctx, bgtz, MIPS_R_AT, b_off); - emit_instr(ctx, nop); - return 2; /* We consumed the exit. */ - } - b_off = b_imm(this_idx + insn->off + 1, ctx); - if (is_bad_offset(b_off)) - return -E2BIG; - if (bpf_op == BPF_JSGT) - emit_instr(ctx, bgtz, MIPS_R_AT, b_off); - else - emit_instr(ctx, blez, MIPS_R_AT, b_off); - emit_instr(ctx, nop); - break; - } else if (bpf_op == BPF_JSGE || bpf_op == BPF_JSLT) { - emit_instr(ctx, slt, MIPS_R_AT, dst, src); - cmp_eq = bpf_op == BPF_JSGE; - dst = MIPS_R_AT; - src = MIPS_R_ZERO; - } else if (bpf_op == BPF_JGT || bpf_op == BPF_JLE) { - /* dst or src could be AT */ - emit_instr(ctx, dsubu, MIPS_R_T8, dst, src); - emit_instr(ctx, sltu, MIPS_R_AT, dst, src); - /* SP known to be non-zero, movz becomes boolean not */ - if (MIPS_ISA_REV >= 6) { - emit_instr(ctx, seleqz, MIPS_R_T9, - MIPS_R_SP, MIPS_R_T8); - } else { - emit_instr(ctx, movz, MIPS_R_T9, - MIPS_R_SP, MIPS_R_T8); - emit_instr(ctx, movn, MIPS_R_T9, - MIPS_R_ZERO, MIPS_R_T8); - } - emit_instr(ctx, or, MIPS_R_AT, MIPS_R_T9, MIPS_R_AT); - cmp_eq = bpf_op == BPF_JGT; - dst = MIPS_R_AT; - src = MIPS_R_ZERO; - } else if (bpf_op == BPF_JGE || bpf_op == BPF_JLT) { - emit_instr(ctx, sltu, MIPS_R_AT, dst, src); - cmp_eq = bpf_op == BPF_JGE; - dst = MIPS_R_AT; - src = MIPS_R_ZERO; - } else { /* JNE/JEQ case */ - cmp_eq = (bpf_op == BPF_JEQ); - } -jeq_common: - /* - * If the next insn is EXIT and we are jumping arround - * only it, invert the sense of the compare and - * conditionally jump to the exit. Poor man's branch - * chaining. - */ - if ((insn + 1)->code == (BPF_JMP | BPF_EXIT) && insn->off == 1) { - b_off = b_imm(exit_idx, ctx); - if (is_bad_offset(b_off)) { - target = j_target(ctx, exit_idx); - if (target == (unsigned int)-1) - return -E2BIG; - cmp_eq = !cmp_eq; - b_off = 4 * 3; - if (!(ctx->offsets[this_idx] & OFFSETS_B_CONV)) { - ctx->offsets[this_idx] |= OFFSETS_B_CONV; - ctx->long_b_conversion = 1; - } - } - - if (cmp_eq) - emit_instr(ctx, bne, dst, src, b_off); - else - emit_instr(ctx, beq, dst, src, b_off); - emit_instr(ctx, nop); - if (ctx->offsets[this_idx] & OFFSETS_B_CONV) { - emit_instr(ctx, j, target); - emit_instr(ctx, nop); - } - return 2; /* We consumed the exit. */ - } - b_off = b_imm(this_idx + insn->off + 1, ctx); - if (is_bad_offset(b_off)) { - target = j_target(ctx, this_idx + insn->off + 1); - if (target == (unsigned int)-1) - return -E2BIG; - cmp_eq = !cmp_eq; - b_off = 4 * 3; - if (!(ctx->offsets[this_idx] & OFFSETS_B_CONV)) { - ctx->offsets[this_idx] |= OFFSETS_B_CONV; - ctx->long_b_conversion = 1; - } - } - - if (cmp_eq) - emit_instr(ctx, beq, dst, src, b_off); - else - emit_instr(ctx, bne, dst, src, b_off); - emit_instr(ctx, nop); - if (ctx->offsets[this_idx] & OFFSETS_B_CONV) { - emit_instr(ctx, j, target); - emit_instr(ctx, nop); - } - break; - case BPF_JMP | BPF_JSGT | BPF_K: /* JMP_IMM */ - case BPF_JMP | BPF_JSGE | BPF_K: /* JMP_IMM */ - case BPF_JMP | BPF_JSLT | BPF_K: /* JMP_IMM */ - case BPF_JMP | BPF_JSLE | BPF_K: /* JMP_IMM */ - cmp_eq = (bpf_op == BPF_JSGE); - dst = ebpf_to_mips_reg(ctx, insn, dst_reg_fp_ok); - if (dst < 0) - return dst; - - if (insn->imm == 0) { - if ((insn + 1)->code == (BPF_JMP | BPF_EXIT) && insn->off == 1) { - b_off = b_imm(exit_idx, ctx); - if (is_bad_offset(b_off)) - return -E2BIG; - switch (bpf_op) { - case BPF_JSGT: - emit_instr(ctx, blez, dst, b_off); - break; - case BPF_JSGE: - emit_instr(ctx, bltz, dst, b_off); - break; - case BPF_JSLT: - emit_instr(ctx, bgez, dst, b_off); - break; - case BPF_JSLE: - emit_instr(ctx, bgtz, dst, b_off); - break; - } - emit_instr(ctx, nop); - return 2; /* We consumed the exit. */ - } - b_off = b_imm(this_idx + insn->off + 1, ctx); - if (is_bad_offset(b_off)) - return -E2BIG; - switch (bpf_op) { - case BPF_JSGT: - emit_instr(ctx, bgtz, dst, b_off); - break; - case BPF_JSGE: - emit_instr(ctx, bgez, dst, b_off); - break; - case BPF_JSLT: - emit_instr(ctx, bltz, dst, b_off); - break; - case BPF_JSLE: - emit_instr(ctx, blez, dst, b_off); - break; - } - emit_instr(ctx, nop); - break; - } - /* - * only "LT" compare available, so we must use imm + 1 - * to generate "GT" and imm -1 to generate LE - */ - if (bpf_op == BPF_JSGT) - t64s = insn->imm + 1; - else if (bpf_op == BPF_JSLE) - t64s = insn->imm + 1; - else - t64s = insn->imm; - - cmp_eq = bpf_op == BPF_JSGT || bpf_op == BPF_JSGE; - if (t64s >= S16_MIN && t64s <= S16_MAX) { - emit_instr(ctx, slti, MIPS_R_AT, dst, (int)t64s); - src = MIPS_R_AT; - dst = MIPS_R_ZERO; - goto jeq_common; - } - emit_const_to_reg(ctx, MIPS_R_AT, (u64)t64s); - emit_instr(ctx, slt, MIPS_R_AT, dst, MIPS_R_AT); - src = MIPS_R_AT; - dst = MIPS_R_ZERO; - goto jeq_common; - - case BPF_JMP | BPF_JGT | BPF_K: - case BPF_JMP | BPF_JGE | BPF_K: - case BPF_JMP | BPF_JLT | BPF_K: - case BPF_JMP | BPF_JLE | BPF_K: - cmp_eq = (bpf_op == BPF_JGE); - dst = ebpf_to_mips_reg(ctx, insn, dst_reg_fp_ok); - if (dst < 0) - return dst; - /* - * only "LT" compare available, so we must use imm + 1 - * to generate "GT" and imm -1 to generate LE - */ - if (bpf_op == BPF_JGT) - t64s = (u64)(u32)(insn->imm) + 1; - else if (bpf_op == BPF_JLE) - t64s = (u64)(u32)(insn->imm) + 1; - else - t64s = (u64)(u32)(insn->imm); - - cmp_eq = bpf_op == BPF_JGT || bpf_op == BPF_JGE; - - emit_const_to_reg(ctx, MIPS_R_AT, (u64)t64s); - emit_instr(ctx, sltu, MIPS_R_AT, dst, MIPS_R_AT); - src = MIPS_R_AT; - dst = MIPS_R_ZERO; - goto jeq_common; - - case BPF_JMP | BPF_JSET | BPF_K: /* JMP_IMM */ - dst = ebpf_to_mips_reg(ctx, insn, dst_reg_fp_ok); - if (dst < 0) - return dst; - - if (ctx->use_bbit_insns && hweight32((u32)insn->imm) == 1) { - if ((insn + 1)->code == (BPF_JMP | BPF_EXIT) && insn->off == 1) { - b_off = b_imm(exit_idx, ctx); - if (is_bad_offset(b_off)) - return -E2BIG; - emit_instr(ctx, bbit0, dst, ffs((u32)insn->imm) - 1, b_off); - emit_instr(ctx, nop); - return 2; /* We consumed the exit. */ - } - b_off = b_imm(this_idx + insn->off + 1, ctx); - if (is_bad_offset(b_off)) - return -E2BIG; - emit_instr(ctx, bbit1, dst, ffs((u32)insn->imm) - 1, b_off); - emit_instr(ctx, nop); - break; - } - t64 = (u32)insn->imm; - emit_const_to_reg(ctx, MIPS_R_AT, t64); - emit_instr(ctx, and, MIPS_R_AT, dst, MIPS_R_AT); - src = MIPS_R_AT; - dst = MIPS_R_ZERO; - cmp_eq = false; - goto jeq_common; - - case BPF_JMP | BPF_JA: - /* - * Prefer relative branch for easier debugging, but - * fall back if needed. - */ - b_off = b_imm(this_idx + insn->off + 1, ctx); - if (is_bad_offset(b_off)) { - target = j_target(ctx, this_idx + insn->off + 1); - if (target == (unsigned int)-1) - return -E2BIG; - emit_instr(ctx, j, target); - } else { - emit_instr(ctx, b, b_off); - } - emit_instr(ctx, nop); - break; - case BPF_LD | BPF_DW | BPF_IMM: - if (insn->src_reg != 0) - return -EINVAL; - dst = ebpf_to_mips_reg(ctx, insn, dst_reg); - if (dst < 0) - return dst; - t64 = ((u64)(u32)insn->imm) | ((u64)(insn + 1)->imm << 32); - emit_const_to_reg(ctx, dst, t64); - return 2; /* Double slot insn */ - - case BPF_JMP | BPF_CALL: - ctx->flags |= EBPF_SAVE_RA; - t64s = (s64)insn->imm + (long)__bpf_call_base; - emit_const_to_reg(ctx, MIPS_R_T9, (u64)t64s); - emit_instr(ctx, jalr, MIPS_R_RA, MIPS_R_T9); - /* delay slot */ - emit_instr(ctx, nop); - break; - - case BPF_JMP | BPF_TAIL_CALL: - if (emit_bpf_tail_call(ctx, this_idx)) - return -EINVAL; - break; - - case BPF_ALU | BPF_END | BPF_FROM_BE: - case BPF_ALU | BPF_END | BPF_FROM_LE: - dst = ebpf_to_mips_reg(ctx, insn, dst_reg); - if (dst < 0) - return dst; - td = get_reg_val_type(ctx, this_idx, insn->dst_reg); - if (insn->imm == 64 && td == REG_32BIT) - emit_instr(ctx, dinsu, dst, MIPS_R_ZERO, 32, 32); - - if (insn->imm != 64 && td == REG_64BIT) { - /* sign extend */ - emit_instr(ctx, sll, dst, dst, 0); - } - -#ifdef __BIG_ENDIAN - need_swap = (BPF_SRC(insn->code) == BPF_FROM_LE); -#else - need_swap = (BPF_SRC(insn->code) == BPF_FROM_BE); -#endif - if (insn->imm == 16) { - if (need_swap) - emit_instr(ctx, wsbh, dst, dst); - emit_instr(ctx, andi, dst, dst, 0xffff); - } else if (insn->imm == 32) { - if (need_swap) { - emit_instr(ctx, wsbh, dst, dst); - emit_instr(ctx, rotr, dst, dst, 16); - } - } else { /* 64-bit*/ - if (need_swap) { - emit_instr(ctx, dsbh, dst, dst); - emit_instr(ctx, dshd, dst, dst); - } - } - break; - - case BPF_ST | BPF_NOSPEC: /* speculation barrier */ - break; - - case BPF_ST | BPF_B | BPF_MEM: - case BPF_ST | BPF_H | BPF_MEM: - case BPF_ST | BPF_W | BPF_MEM: - case BPF_ST | BPF_DW | BPF_MEM: - if (insn->dst_reg == BPF_REG_10) { - ctx->flags |= EBPF_SEEN_FP; - dst = MIPS_R_SP; - mem_off = insn->off + MAX_BPF_STACK; - } else { - dst = ebpf_to_mips_reg(ctx, insn, dst_reg); - if (dst < 0) - return dst; - mem_off = insn->off; - } - gen_imm_to_reg(insn, MIPS_R_AT, ctx); - switch (BPF_SIZE(insn->code)) { - case BPF_B: - emit_instr(ctx, sb, MIPS_R_AT, mem_off, dst); - break; - case BPF_H: - emit_instr(ctx, sh, MIPS_R_AT, mem_off, dst); - break; - case BPF_W: - emit_instr(ctx, sw, MIPS_R_AT, mem_off, dst); - break; - case BPF_DW: - emit_instr(ctx, sd, MIPS_R_AT, mem_off, dst); - break; - } - break; - - case BPF_LDX | BPF_B | BPF_MEM: - case BPF_LDX | BPF_H | BPF_MEM: - case BPF_LDX | BPF_W | BPF_MEM: - case BPF_LDX | BPF_DW | BPF_MEM: - if (insn->src_reg == BPF_REG_10) { - ctx->flags |= EBPF_SEEN_FP; - src = MIPS_R_SP; - mem_off = insn->off + MAX_BPF_STACK; - } else { - src = ebpf_to_mips_reg(ctx, insn, src_reg_no_fp); - if (src < 0) - return src; - mem_off = insn->off; - } - dst = ebpf_to_mips_reg(ctx, insn, dst_reg); - if (dst < 0) - return dst; - switch (BPF_SIZE(insn->code)) { - case BPF_B: - emit_instr(ctx, lbu, dst, mem_off, src); - break; - case BPF_H: - emit_instr(ctx, lhu, dst, mem_off, src); - break; - case BPF_W: - emit_instr(ctx, lw, dst, mem_off, src); - break; - case BPF_DW: - emit_instr(ctx, ld, dst, mem_off, src); - break; - } - break; - - case BPF_STX | BPF_B | BPF_MEM: - case BPF_STX | BPF_H | BPF_MEM: - case BPF_STX | BPF_W | BPF_MEM: - case BPF_STX | BPF_DW | BPF_MEM: - case BPF_STX | BPF_W | BPF_ATOMIC: - case BPF_STX | BPF_DW | BPF_ATOMIC: - if (insn->dst_reg == BPF_REG_10) { - ctx->flags |= EBPF_SEEN_FP; - dst = MIPS_R_SP; - mem_off = insn->off + MAX_BPF_STACK; - } else { - dst = ebpf_to_mips_reg(ctx, insn, dst_reg); - if (dst < 0) - return dst; - mem_off = insn->off; - } - src = ebpf_to_mips_reg(ctx, insn, src_reg_no_fp); - if (src < 0) - return src; - if (BPF_MODE(insn->code) == BPF_ATOMIC) { - if (insn->imm != BPF_ADD) { - pr_err("ATOMIC OP %02x NOT HANDLED\n", insn->imm); - return -EINVAL; - } - - /* - * If mem_off does not fit within the 9 bit ll/sc - * instruction immediate field, use a temp reg. - */ - if (MIPS_ISA_REV >= 6 && - (mem_off >= BIT(8) || mem_off < -BIT(8))) { - emit_instr(ctx, daddiu, MIPS_R_T6, - dst, mem_off); - mem_off = 0; - dst = MIPS_R_T6; - } - switch (BPF_SIZE(insn->code)) { - case BPF_W: - if (get_reg_val_type(ctx, this_idx, insn->src_reg) == REG_32BIT) { - emit_instr(ctx, sll, MIPS_R_AT, src, 0); - src = MIPS_R_AT; - } - emit_instr(ctx, ll, MIPS_R_T8, mem_off, dst); - emit_instr(ctx, addu, MIPS_R_T8, MIPS_R_T8, src); - emit_instr(ctx, sc, MIPS_R_T8, mem_off, dst); - /* - * On failure back up to LL (-4 - * instructions of 4 bytes each - */ - emit_instr(ctx, beq, MIPS_R_T8, MIPS_R_ZERO, -4 * 4); - emit_instr(ctx, nop); - break; - case BPF_DW: - if (get_reg_val_type(ctx, this_idx, insn->src_reg) == REG_32BIT) { - emit_instr(ctx, daddu, MIPS_R_AT, src, MIPS_R_ZERO); - emit_instr(ctx, dinsu, MIPS_R_AT, MIPS_R_ZERO, 32, 32); - src = MIPS_R_AT; - } - emit_instr(ctx, lld, MIPS_R_T8, mem_off, dst); - emit_instr(ctx, daddu, MIPS_R_T8, MIPS_R_T8, src); - emit_instr(ctx, scd, MIPS_R_T8, mem_off, dst); - emit_instr(ctx, beq, MIPS_R_T8, MIPS_R_ZERO, -4 * 4); - emit_instr(ctx, nop); - break; - } - } else { /* BPF_MEM */ - switch (BPF_SIZE(insn->code)) { - case BPF_B: - emit_instr(ctx, sb, src, mem_off, dst); - break; - case BPF_H: - emit_instr(ctx, sh, src, mem_off, dst); - break; - case BPF_W: - emit_instr(ctx, sw, src, mem_off, dst); - break; - case BPF_DW: - if (get_reg_val_type(ctx, this_idx, insn->src_reg) == REG_32BIT) { - emit_instr(ctx, daddu, MIPS_R_AT, src, MIPS_R_ZERO); - emit_instr(ctx, dinsu, MIPS_R_AT, MIPS_R_ZERO, 32, 32); - src = MIPS_R_AT; - } - emit_instr(ctx, sd, src, mem_off, dst); - break; - } - } - break; - - default: - pr_err("NOT HANDLED %d - (%02x)\n", - this_idx, (unsigned int)insn->code); - return -EINVAL; - } - return 1; -} - -#define RVT_VISITED_MASK 0xc000000000000000ull -#define RVT_FALL_THROUGH 0x4000000000000000ull -#define RVT_BRANCH_TAKEN 0x8000000000000000ull -#define RVT_DONE (RVT_FALL_THROUGH | RVT_BRANCH_TAKEN) - -static int build_int_body(struct jit_ctx *ctx) -{ - const struct bpf_prog *prog = ctx->skf; - const struct bpf_insn *insn; - int i, r; - - for (i = 0; i < prog->len; ) { - insn = prog->insnsi + i; - if ((ctx->reg_val_types[i] & RVT_VISITED_MASK) == 0) { - /* dead instruction, don't emit it. */ - i++; - continue; - } - - if (ctx->target == NULL) - ctx->offsets[i] = (ctx->offsets[i] & OFFSETS_B_CONV) | (ctx->idx * 4); - - r = build_one_insn(insn, ctx, i, prog->len); - if (r < 0) - return r; - i += r; - } - /* epilogue offset */ - if (ctx->target == NULL) - ctx->offsets[i] = ctx->idx * 4; - - /* - * All exits have an offset of the epilogue, some offsets may - * not have been set due to banch-around threading, so set - * them now. - */ - if (ctx->target == NULL) - for (i = 0; i < prog->len; i++) { - insn = prog->insnsi + i; - if (insn->code == (BPF_JMP | BPF_EXIT)) - ctx->offsets[i] = ctx->idx * 4; - } - return 0; -} - -/* return the last idx processed, or negative for error */ -static int reg_val_propagate_range(struct jit_ctx *ctx, u64 initial_rvt, - int start_idx, bool follow_taken) -{ - const struct bpf_prog *prog = ctx->skf; - const struct bpf_insn *insn; - u64 exit_rvt = initial_rvt; - u64 *rvt = ctx->reg_val_types; - int idx; - int reg; - - for (idx = start_idx; idx < prog->len; idx++) { - rvt[idx] = (rvt[idx] & RVT_VISITED_MASK) | exit_rvt; - insn = prog->insnsi + idx; - switch (BPF_CLASS(insn->code)) { - case BPF_ALU: - switch (BPF_OP(insn->code)) { - case BPF_ADD: - case BPF_SUB: - case BPF_MUL: - case BPF_DIV: - case BPF_OR: - case BPF_AND: - case BPF_LSH: - case BPF_RSH: - case BPF_NEG: - case BPF_MOD: - case BPF_XOR: - set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT); - break; - case BPF_MOV: - if (BPF_SRC(insn->code)) { - set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT); - } else { - /* IMM to REG move*/ - if (insn->imm >= 0) - set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT_POS); - else - set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT); - } - break; - case BPF_END: - if (insn->imm == 64) - set_reg_val_type(&exit_rvt, insn->dst_reg, REG_64BIT); - else if (insn->imm == 32) - set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT); - else /* insn->imm == 16 */ - set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT_POS); - break; - } - rvt[idx] |= RVT_DONE; - break; - case BPF_ALU64: - switch (BPF_OP(insn->code)) { - case BPF_MOV: - if (BPF_SRC(insn->code)) { - /* REG to REG move*/ - set_reg_val_type(&exit_rvt, insn->dst_reg, REG_64BIT); - } else { - /* IMM to REG move*/ - if (insn->imm >= 0) - set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT_POS); - else - set_reg_val_type(&exit_rvt, insn->dst_reg, REG_64BIT_32BIT); - } - break; - default: - set_reg_val_type(&exit_rvt, insn->dst_reg, REG_64BIT); - } - rvt[idx] |= RVT_DONE; - break; - case BPF_LD: - switch (BPF_SIZE(insn->code)) { - case BPF_DW: - if (BPF_MODE(insn->code) == BPF_IMM) { - s64 val; - - val = (s64)((u32)insn->imm | ((u64)(insn + 1)->imm << 32)); - if (val > 0 && val <= S32_MAX) - set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT_POS); - else if (val >= S32_MIN && val <= S32_MAX) - set_reg_val_type(&exit_rvt, insn->dst_reg, REG_64BIT_32BIT); - else - set_reg_val_type(&exit_rvt, insn->dst_reg, REG_64BIT); - rvt[idx] |= RVT_DONE; - idx++; - } else { - set_reg_val_type(&exit_rvt, insn->dst_reg, REG_64BIT); - } - break; - case BPF_B: - case BPF_H: - set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT_POS); - break; - case BPF_W: - if (BPF_MODE(insn->code) == BPF_IMM) - set_reg_val_type(&exit_rvt, insn->dst_reg, - insn->imm >= 0 ? REG_32BIT_POS : REG_32BIT); - else - set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT); - break; - } - rvt[idx] |= RVT_DONE; - break; - case BPF_LDX: - switch (BPF_SIZE(insn->code)) { - case BPF_DW: - set_reg_val_type(&exit_rvt, insn->dst_reg, REG_64BIT); - break; - case BPF_B: - case BPF_H: - set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT_POS); - break; - case BPF_W: - set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT); - break; - } - rvt[idx] |= RVT_DONE; - break; - case BPF_JMP: - switch (BPF_OP(insn->code)) { - case BPF_EXIT: - rvt[idx] = RVT_DONE | exit_rvt; - rvt[prog->len] = exit_rvt; - return idx; - case BPF_JA: - rvt[idx] |= RVT_DONE; - idx += insn->off; - break; - case BPF_JEQ: - case BPF_JGT: - case BPF_JGE: - case BPF_JLT: - case BPF_JLE: - case BPF_JSET: - case BPF_JNE: - case BPF_JSGT: - case BPF_JSGE: - case BPF_JSLT: - case BPF_JSLE: - if (follow_taken) { - rvt[idx] |= RVT_BRANCH_TAKEN; - idx += insn->off; - follow_taken = false; - } else { - rvt[idx] |= RVT_FALL_THROUGH; - } - break; - case BPF_CALL: - set_reg_val_type(&exit_rvt, BPF_REG_0, REG_64BIT); - /* Upon call return, argument registers are clobbered. */ - for (reg = BPF_REG_0; reg <= BPF_REG_5; reg++) - set_reg_val_type(&exit_rvt, reg, REG_64BIT); - - rvt[idx] |= RVT_DONE; - break; - default: - WARN(1, "Unhandled BPF_JMP case.\n"); - rvt[idx] |= RVT_DONE; - break; - } - break; - default: - rvt[idx] |= RVT_DONE; - break; - } - } - return idx; -} - -/* - * Track the value range (i.e. 32-bit vs. 64-bit) of each register at - * each eBPF insn. This allows unneeded sign and zero extension - * operations to be omitted. - * - * Doesn't handle yet confluence of control paths with conflicting - * ranges, but it is good enough for most sane code. - */ -static int reg_val_propagate(struct jit_ctx *ctx) -{ - const struct bpf_prog *prog = ctx->skf; - u64 exit_rvt; - int reg; - int i; - - /* - * 11 registers * 3 bits/reg leaves top bits free for other - * uses. Bit-62..63 used to see if we have visited an insn. - */ - exit_rvt = 0; - - /* Upon entry, argument registers are 64-bit. */ - for (reg = BPF_REG_1; reg <= BPF_REG_5; reg++) - set_reg_val_type(&exit_rvt, reg, REG_64BIT); - - /* - * First follow all conditional branches on the fall-through - * edge of control flow.. - */ - reg_val_propagate_range(ctx, exit_rvt, 0, false); -restart_search: - /* - * Then repeatedly find the first conditional branch where - * both edges of control flow have not been taken, and follow - * the branch taken edge. We will end up restarting the - * search once per conditional branch insn. - */ - for (i = 0; i < prog->len; i++) { - u64 rvt = ctx->reg_val_types[i]; - - if ((rvt & RVT_VISITED_MASK) == RVT_DONE || - (rvt & RVT_VISITED_MASK) == 0) - continue; - if ((rvt & RVT_VISITED_MASK) == RVT_FALL_THROUGH) { - reg_val_propagate_range(ctx, rvt & ~RVT_VISITED_MASK, i, true); - } else { /* RVT_BRANCH_TAKEN */ - WARN(1, "Unexpected RVT_BRANCH_TAKEN case.\n"); - reg_val_propagate_range(ctx, rvt & ~RVT_VISITED_MASK, i, false); - } - goto restart_search; - } - /* - * Eventually all conditional branches have been followed on - * both branches and we are done. Any insn that has not been - * visited at this point is dead. - */ - - return 0; -} - -static void jit_fill_hole(void *area, unsigned int size) -{ - u32 *p; - - /* We are guaranteed to have aligned memory. */ - for (p = area; size >= sizeof(u32); size -= sizeof(u32)) - uasm_i_break(&p, BRK_BUG); /* Increments p */ -} - -struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) -{ - struct bpf_prog *orig_prog = prog; - bool tmp_blinded = false; - struct bpf_prog *tmp; - struct bpf_binary_header *header = NULL; - struct jit_ctx ctx; - unsigned int image_size; - u8 *image_ptr; - - if (!prog->jit_requested) - return prog; - - tmp = bpf_jit_blind_constants(prog); - /* If blinding was requested and we failed during blinding, - * we must fall back to the interpreter. - */ - if (IS_ERR(tmp)) - return orig_prog; - if (tmp != prog) { - tmp_blinded = true; - prog = tmp; - } - - memset(&ctx, 0, sizeof(ctx)); - - preempt_disable(); - switch (current_cpu_type()) { - case CPU_CAVIUM_OCTEON: - case CPU_CAVIUM_OCTEON_PLUS: - case CPU_CAVIUM_OCTEON2: - case CPU_CAVIUM_OCTEON3: - ctx.use_bbit_insns = 1; - break; - default: - ctx.use_bbit_insns = 0; - } - preempt_enable(); - - ctx.offsets = kcalloc(prog->len + 1, sizeof(*ctx.offsets), GFP_KERNEL); - if (ctx.offsets == NULL) - goto out_err; - - ctx.reg_val_types = kcalloc(prog->len + 1, sizeof(*ctx.reg_val_types), GFP_KERNEL); - if (ctx.reg_val_types == NULL) - goto out_err; - - ctx.skf = prog; - - if (reg_val_propagate(&ctx)) - goto out_err; - - /* - * First pass discovers used resources and instruction offsets - * assuming short branches are used. - */ - if (build_int_body(&ctx)) - goto out_err; - - /* - * If no calls are made (EBPF_SAVE_RA), then tail call count - * in $v1, else we must save in n$s4. - */ - if (ctx.flags & EBPF_SEEN_TC) { - if (ctx.flags & EBPF_SAVE_RA) - ctx.flags |= EBPF_SAVE_S4; - else - ctx.flags |= EBPF_TCC_IN_V1; - } - - /* - * Second pass generates offsets, if any branches are out of - * range a jump-around long sequence is generated, and we have - * to try again from the beginning to generate the new - * offsets. This is done until no additional conversions are - * necessary. - */ - do { - ctx.idx = 0; - ctx.gen_b_offsets = 1; - ctx.long_b_conversion = 0; - if (gen_int_prologue(&ctx)) - goto out_err; - if (build_int_body(&ctx)) - goto out_err; - if (build_int_epilogue(&ctx, MIPS_R_RA)) - goto out_err; - } while (ctx.long_b_conversion); - - image_size = 4 * ctx.idx; - - header = bpf_jit_binary_alloc(image_size, &image_ptr, - sizeof(u32), jit_fill_hole); - if (header == NULL) - goto out_err; - - ctx.target = (u32 *)image_ptr; - - /* Third pass generates the code */ - ctx.idx = 0; - if (gen_int_prologue(&ctx)) - goto out_err; - if (build_int_body(&ctx)) - goto out_err; - if (build_int_epilogue(&ctx, MIPS_R_RA)) - goto out_err; - - /* Update the icache */ - flush_icache_range((unsigned long)ctx.target, - (unsigned long)&ctx.target[ctx.idx]); - - if (bpf_jit_enable > 1) - /* Dump JIT code */ - bpf_jit_dump(prog->len, image_size, 2, ctx.target); - - bpf_jit_binary_lock_ro(header); - prog->bpf_func = (void *)ctx.target; - prog->jited = 1; - prog->jited_len = image_size; -out_normal: - if (tmp_blinded) - bpf_jit_prog_release_other(prog, prog == orig_prog ? - tmp : orig_prog); - kfree(ctx.offsets); - kfree(ctx.reg_val_types); - - return prog; - -out_err: - prog = orig_prog; - if (header) - bpf_jit_binary_free(header); - goto out_normal; -} diff --git a/arch/riscv/mm/extable.c b/arch/riscv/mm/extable.c index 2fc729422151..18bf338303b6 100644 --- a/arch/riscv/mm/extable.c +++ b/arch/riscv/mm/extable.c @@ -11,14 +11,23 @@ #include <linux/module.h> #include <linux/uaccess.h> +#ifdef CONFIG_BPF_JIT +int rv_bpf_fixup_exception(const struct exception_table_entry *ex, struct pt_regs *regs); +#endif + int fixup_exception(struct pt_regs *regs) { const struct exception_table_entry *fixup; fixup = search_exception_tables(regs->epc); - if (fixup) { - regs->epc = fixup->fixup; - return 1; - } - return 0; + if (!fixup) + return 0; + +#ifdef CONFIG_BPF_JIT + if (regs->epc >= BPF_JIT_REGION_START && regs->epc < BPF_JIT_REGION_END) + return rv_bpf_fixup_exception(fixup, regs); +#endif + + regs->epc = fixup->fixup; + return 1; } diff --git a/arch/riscv/net/bpf_jit.h b/arch/riscv/net/bpf_jit.h index 75c1e9996867..f42d9cd3b64d 100644 --- a/arch/riscv/net/bpf_jit.h +++ b/arch/riscv/net/bpf_jit.h @@ -71,6 +71,7 @@ struct rv_jit_context { int ninsns; int epilogue_offset; int *offset; /* BPF to RV */ + int nexentries; unsigned long flags; int stack_size; }; diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c index 3af4131c22c7..2ca345c7b0bf 100644 --- a/arch/riscv/net/bpf_jit_comp64.c +++ b/arch/riscv/net/bpf_jit_comp64.c @@ -5,6 +5,7 @@ * */ +#include <linux/bitfield.h> #include <linux/bpf.h> #include <linux/filter.h> #include "bpf_jit.h" @@ -27,6 +28,21 @@ static const int regmap[] = { [BPF_REG_AX] = RV_REG_T0, }; +static const int pt_regmap[] = { + [RV_REG_A0] = offsetof(struct pt_regs, a0), + [RV_REG_A1] = offsetof(struct pt_regs, a1), + [RV_REG_A2] = offsetof(struct pt_regs, a2), + [RV_REG_A3] = offsetof(struct pt_regs, a3), + [RV_REG_A4] = offsetof(struct pt_regs, a4), + [RV_REG_A5] = offsetof(struct pt_regs, a5), + [RV_REG_S1] = offsetof(struct pt_regs, s1), + [RV_REG_S2] = offsetof(struct pt_regs, s2), + [RV_REG_S3] = offsetof(struct pt_regs, s3), + [RV_REG_S4] = offsetof(struct pt_regs, s4), + [RV_REG_S5] = offsetof(struct pt_regs, s5), + [RV_REG_T0] = offsetof(struct pt_regs, t0), +}; + enum { RV_CTX_F_SEEN_TAIL_CALL = 0, RV_CTX_F_SEEN_CALL = RV_REG_RA, @@ -440,6 +456,69 @@ static int emit_call(bool fixed, u64 addr, struct rv_jit_context *ctx) return 0; } +#define BPF_FIXUP_OFFSET_MASK GENMASK(26, 0) +#define BPF_FIXUP_REG_MASK GENMASK(31, 27) + +int rv_bpf_fixup_exception(const struct exception_table_entry *ex, + struct pt_regs *regs) +{ + off_t offset = FIELD_GET(BPF_FIXUP_OFFSET_MASK, ex->fixup); + int regs_offset = FIELD_GET(BPF_FIXUP_REG_MASK, ex->fixup); + + *(unsigned long *)((void *)regs + pt_regmap[regs_offset]) = 0; + regs->epc = (unsigned long)&ex->fixup - offset; + + return 1; +} + +/* For accesses to BTF pointers, add an entry to the exception table */ +static int add_exception_handler(const struct bpf_insn *insn, + struct rv_jit_context *ctx, + int dst_reg, int insn_len) +{ + struct exception_table_entry *ex; + unsigned long pc; + off_t offset; + + if (!ctx->insns || !ctx->prog->aux->extable || BPF_MODE(insn->code) != BPF_PROBE_MEM) + return 0; + + if (WARN_ON_ONCE(ctx->nexentries >= ctx->prog->aux->num_exentries)) + return -EINVAL; + + if (WARN_ON_ONCE(insn_len > ctx->ninsns)) + return -EINVAL; + + if (WARN_ON_ONCE(!rvc_enabled() && insn_len == 1)) + return -EINVAL; + + ex = &ctx->prog->aux->extable[ctx->nexentries]; + pc = (unsigned long)&ctx->insns[ctx->ninsns - insn_len]; + + offset = pc - (long)&ex->insn; + if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN)) + return -ERANGE; + ex->insn = pc; + + /* + * Since the extable follows the program, the fixup offset is always + * negative and limited to BPF_JIT_REGION_SIZE. Store a positive value + * to keep things simple, and put the destination register in the upper + * bits. We don't need to worry about buildtime or runtime sort + * modifying the upper bits because the table is already sorted, and + * isn't part of the main exception table. + */ + offset = (long)&ex->fixup - (pc + insn_len * sizeof(u16)); + if (!FIELD_FIT(BPF_FIXUP_OFFSET_MASK, offset)) + return -ERANGE; + + ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, offset) | + FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg); + + ctx->nexentries++; + return 0; +} + int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, bool extra_pass) { @@ -893,52 +972,86 @@ out_be: /* LDX: dst = *(size *)(src + off) */ case BPF_LDX | BPF_MEM | BPF_B: - if (is_12b_int(off)) { - emit(rv_lbu(rd, off, rs), ctx); + case BPF_LDX | BPF_MEM | BPF_H: + case BPF_LDX | BPF_MEM | BPF_W: + case BPF_LDX | BPF_MEM | BPF_DW: + case BPF_LDX | BPF_PROBE_MEM | BPF_B: + case BPF_LDX | BPF_PROBE_MEM | BPF_H: + case BPF_LDX | BPF_PROBE_MEM | BPF_W: + case BPF_LDX | BPF_PROBE_MEM | BPF_DW: + { + int insn_len, insns_start; + + switch (BPF_SIZE(code)) { + case BPF_B: + if (is_12b_int(off)) { + insns_start = ctx->ninsns; + emit(rv_lbu(rd, off, rs), ctx); + insn_len = ctx->ninsns - insns_start; + break; + } + + emit_imm(RV_REG_T1, off, ctx); + emit_add(RV_REG_T1, RV_REG_T1, rs, ctx); + insns_start = ctx->ninsns; + emit(rv_lbu(rd, 0, RV_REG_T1), ctx); + insn_len = ctx->ninsns - insns_start; + if (insn_is_zext(&insn[1])) + return 1; break; - } + case BPF_H: + if (is_12b_int(off)) { + insns_start = ctx->ninsns; + emit(rv_lhu(rd, off, rs), ctx); + insn_len = ctx->ninsns - insns_start; + break; + } - emit_imm(RV_REG_T1, off, ctx); - emit_add(RV_REG_T1, RV_REG_T1, rs, ctx); - emit(rv_lbu(rd, 0, RV_REG_T1), ctx); - if (insn_is_zext(&insn[1])) - return 1; - break; - case BPF_LDX | BPF_MEM | BPF_H: - if (is_12b_int(off)) { - emit(rv_lhu(rd, off, rs), ctx); + emit_imm(RV_REG_T1, off, ctx); + emit_add(RV_REG_T1, RV_REG_T1, rs, ctx); + insns_start = ctx->ninsns; + emit(rv_lhu(rd, 0, RV_REG_T1), ctx); + insn_len = ctx->ninsns - insns_start; + if (insn_is_zext(&insn[1])) + return 1; break; - } + case BPF_W: + if (is_12b_int(off)) { + insns_start = ctx->ninsns; + emit(rv_lwu(rd, off, rs), ctx); + insn_len = ctx->ninsns - insns_start; + break; + } - emit_imm(RV_REG_T1, off, ctx); - emit_add(RV_REG_T1, RV_REG_T1, rs, ctx); - emit(rv_lhu(rd, 0, RV_REG_T1), ctx); - if (insn_is_zext(&insn[1])) - return 1; - break; - case BPF_LDX | BPF_MEM | BPF_W: - if (is_12b_int(off)) { - emit(rv_lwu(rd, off, rs), ctx); + emit_imm(RV_REG_T1, off, ctx); + emit_add(RV_REG_T1, RV_REG_T1, rs, ctx); + insns_start = ctx->ninsns; + emit(rv_lwu(rd, 0, RV_REG_T1), ctx); + insn_len = ctx->ninsns - insns_start; + if (insn_is_zext(&insn[1])) + return 1; break; - } + case BPF_DW: + if (is_12b_int(off)) { + insns_start = ctx->ninsns; + emit_ld(rd, off, rs, ctx); + insn_len = ctx->ninsns - insns_start; + break; + } - emit_imm(RV_REG_T1, off, ctx); - emit_add(RV_REG_T1, RV_REG_T1, rs, ctx); - emit(rv_lwu(rd, 0, RV_REG_T1), ctx); - if (insn_is_zext(&insn[1])) - return 1; - break; - case BPF_LDX | BPF_MEM | BPF_DW: - if (is_12b_int(off)) { - emit_ld(rd, off, rs, ctx); + emit_imm(RV_REG_T1, off, ctx); + emit_add(RV_REG_T1, RV_REG_T1, rs, ctx); + insns_start = ctx->ninsns; + emit_ld(rd, 0, RV_REG_T1, ctx); + insn_len = ctx->ninsns - insns_start; break; } - emit_imm(RV_REG_T1, off, ctx); - emit_add(RV_REG_T1, RV_REG_T1, rs, ctx); - emit_ld(rd, 0, RV_REG_T1, ctx); + ret = add_exception_handler(insn, ctx, rd, insn_len); + if (ret) + return ret; break; - + } /* speculation barrier */ case BPF_ST | BPF_NOSPEC: break; diff --git a/arch/riscv/net/bpf_jit_core.c b/arch/riscv/net/bpf_jit_core.c index 753d85bdfad0..be743d700aa7 100644 --- a/arch/riscv/net/bpf_jit_core.c +++ b/arch/riscv/net/bpf_jit_core.c @@ -11,7 +11,7 @@ #include "bpf_jit.h" /* Number of iterations to try until offsets converge. */ -#define NR_JIT_ITERATIONS 16 +#define NR_JIT_ITERATIONS 32 static int build_body(struct rv_jit_context *ctx, bool extra_pass, int *offset) { @@ -41,12 +41,12 @@ bool bpf_jit_needs_zext(void) struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) { + unsigned int prog_size = 0, extable_size = 0; bool tmp_blinded = false, extra_pass = false; struct bpf_prog *tmp, *orig_prog = prog; int pass = 0, prev_ninsns = 0, i; struct rv_jit_data *jit_data; struct rv_jit_context *ctx; - unsigned int image_size = 0; if (!prog->jit_requested) return orig_prog; @@ -73,7 +73,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) if (ctx->offset) { extra_pass = true; - image_size = sizeof(*ctx->insns) * ctx->ninsns; + prog_size = sizeof(*ctx->insns) * ctx->ninsns; goto skip_init_ctx; } @@ -102,10 +102,13 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) if (ctx->ninsns == prev_ninsns) { if (jit_data->header) break; + /* obtain the actual image size */ + extable_size = prog->aux->num_exentries * + sizeof(struct exception_table_entry); + prog_size = sizeof(*ctx->insns) * ctx->ninsns; - image_size = sizeof(*ctx->insns) * ctx->ninsns; jit_data->header = - bpf_jit_binary_alloc(image_size, + bpf_jit_binary_alloc(prog_size + extable_size, &jit_data->image, sizeof(u32), bpf_fill_ill_insns); @@ -131,9 +134,13 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) goto out_offset; } + if (extable_size) + prog->aux->extable = (void *)ctx->insns + prog_size; + skip_init_ctx: pass++; ctx->ninsns = 0; + ctx->nexentries = 0; bpf_jit_build_prologue(ctx); if (build_body(ctx, extra_pass, NULL)) { @@ -144,11 +151,11 @@ skip_init_ctx: bpf_jit_build_epilogue(ctx); if (bpf_jit_enable > 1) - bpf_jit_dump(prog->len, image_size, pass, ctx->insns); + bpf_jit_dump(prog->len, prog_size, pass, ctx->insns); prog->bpf_func = (void *)ctx->insns; prog->jited = 1; - prog->jited_len = image_size; + prog->jited_len = prog_size; bpf_flush_icache(jit_data->header, ctx->insns + ctx->ninsns); diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 576ef1a6954a..e474718d152b 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -721,6 +721,20 @@ static void maybe_emit_mod(u8 **pprog, u32 dst_reg, u32 src_reg, bool is64) *pprog = prog; } +/* + * Similar version of maybe_emit_mod() for a single register + */ +static void maybe_emit_1mod(u8 **pprog, u32 reg, bool is64) +{ + u8 *prog = *pprog; + + if (is64) + EMIT1(add_1mod(0x48, reg)); + else if (is_ereg(reg)) + EMIT1(add_1mod(0x40, reg)); + *pprog = prog; +} + /* LDX: dst_reg = *(u8*)(src_reg + off) */ static void emit_ldx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off) { @@ -951,10 +965,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, /* neg dst */ case BPF_ALU | BPF_NEG: case BPF_ALU64 | BPF_NEG: - if (BPF_CLASS(insn->code) == BPF_ALU64) - EMIT1(add_1mod(0x48, dst_reg)); - else if (is_ereg(dst_reg)) - EMIT1(add_1mod(0x40, dst_reg)); + maybe_emit_1mod(&prog, dst_reg, + BPF_CLASS(insn->code) == BPF_ALU64); EMIT2(0xF7, add_1reg(0xD8, dst_reg)); break; @@ -968,10 +980,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, case BPF_ALU64 | BPF_AND | BPF_K: case BPF_ALU64 | BPF_OR | BPF_K: case BPF_ALU64 | BPF_XOR | BPF_K: - if (BPF_CLASS(insn->code) == BPF_ALU64) - EMIT1(add_1mod(0x48, dst_reg)); - else if (is_ereg(dst_reg)) - EMIT1(add_1mod(0x40, dst_reg)); + maybe_emit_1mod(&prog, dst_reg, + BPF_CLASS(insn->code) == BPF_ALU64); /* * b3 holds 'normal' opcode, b2 short form only valid @@ -1028,19 +1038,30 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, case BPF_ALU64 | BPF_MOD | BPF_X: case BPF_ALU64 | BPF_DIV | BPF_X: case BPF_ALU64 | BPF_MOD | BPF_K: - case BPF_ALU64 | BPF_DIV | BPF_K: - EMIT1(0x50); /* push rax */ - EMIT1(0x52); /* push rdx */ - - if (BPF_SRC(insn->code) == BPF_X) - /* mov r11, src_reg */ - EMIT_mov(AUX_REG, src_reg); - else + case BPF_ALU64 | BPF_DIV | BPF_K: { + bool is64 = BPF_CLASS(insn->code) == BPF_ALU64; + + if (dst_reg != BPF_REG_0) + EMIT1(0x50); /* push rax */ + if (dst_reg != BPF_REG_3) + EMIT1(0x52); /* push rdx */ + + if (BPF_SRC(insn->code) == BPF_X) { + if (src_reg == BPF_REG_0 || + src_reg == BPF_REG_3) { + /* mov r11, src_reg */ + EMIT_mov(AUX_REG, src_reg); + src_reg = AUX_REG; + } + } else { /* mov r11, imm32 */ EMIT3_off32(0x49, 0xC7, 0xC3, imm32); + src_reg = AUX_REG; + } - /* mov rax, dst_reg */ - EMIT_mov(BPF_REG_0, dst_reg); + if (dst_reg != BPF_REG_0) + /* mov rax, dst_reg */ + emit_mov_reg(&prog, is64, BPF_REG_0, dst_reg); /* * xor edx, edx @@ -1048,33 +1069,30 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, */ EMIT2(0x31, 0xd2); - if (BPF_CLASS(insn->code) == BPF_ALU64) - /* div r11 */ - EMIT3(0x49, 0xF7, 0xF3); - else - /* div r11d */ - EMIT3(0x41, 0xF7, 0xF3); - - if (BPF_OP(insn->code) == BPF_MOD) - /* mov r11, rdx */ - EMIT3(0x49, 0x89, 0xD3); - else - /* mov r11, rax */ - EMIT3(0x49, 0x89, 0xC3); - - EMIT1(0x5A); /* pop rdx */ - EMIT1(0x58); /* pop rax */ - - /* mov dst_reg, r11 */ - EMIT_mov(dst_reg, AUX_REG); + /* div src_reg */ + maybe_emit_1mod(&prog, src_reg, is64); + EMIT2(0xF7, add_1reg(0xF0, src_reg)); + + if (BPF_OP(insn->code) == BPF_MOD && + dst_reg != BPF_REG_3) + /* mov dst_reg, rdx */ + emit_mov_reg(&prog, is64, dst_reg, BPF_REG_3); + else if (BPF_OP(insn->code) == BPF_DIV && + dst_reg != BPF_REG_0) + /* mov dst_reg, rax */ + emit_mov_reg(&prog, is64, dst_reg, BPF_REG_0); + + if (dst_reg != BPF_REG_3) + EMIT1(0x5A); /* pop rdx */ + if (dst_reg != BPF_REG_0) + EMIT1(0x58); /* pop rax */ break; + } case BPF_ALU | BPF_MUL | BPF_K: case BPF_ALU64 | BPF_MUL | BPF_K: - if (BPF_CLASS(insn->code) == BPF_ALU64) - EMIT1(add_2mod(0x48, dst_reg, dst_reg)); - else if (is_ereg(dst_reg)) - EMIT1(add_2mod(0x40, dst_reg, dst_reg)); + maybe_emit_mod(&prog, dst_reg, dst_reg, + BPF_CLASS(insn->code) == BPF_ALU64); if (is_imm8(imm32)) /* imul dst_reg, dst_reg, imm8 */ @@ -1089,10 +1107,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, case BPF_ALU | BPF_MUL | BPF_X: case BPF_ALU64 | BPF_MUL | BPF_X: - if (BPF_CLASS(insn->code) == BPF_ALU64) - EMIT1(add_2mod(0x48, src_reg, dst_reg)); - else if (is_ereg(dst_reg) || is_ereg(src_reg)) - EMIT1(add_2mod(0x40, src_reg, dst_reg)); + maybe_emit_mod(&prog, src_reg, dst_reg, + BPF_CLASS(insn->code) == BPF_ALU64); /* imul dst_reg, src_reg */ EMIT3(0x0F, 0xAF, add_2reg(0xC0, src_reg, dst_reg)); @@ -1105,10 +1121,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, case BPF_ALU64 | BPF_LSH | BPF_K: case BPF_ALU64 | BPF_RSH | BPF_K: case BPF_ALU64 | BPF_ARSH | BPF_K: - if (BPF_CLASS(insn->code) == BPF_ALU64) - EMIT1(add_1mod(0x48, dst_reg)); - else if (is_ereg(dst_reg)) - EMIT1(add_1mod(0x40, dst_reg)); + maybe_emit_1mod(&prog, dst_reg, + BPF_CLASS(insn->code) == BPF_ALU64); b3 = simple_alu_opcodes[BPF_OP(insn->code)]; if (imm32 == 1) @@ -1139,10 +1153,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, } /* shl %rax, %cl | shr %rax, %cl | sar %rax, %cl */ - if (BPF_CLASS(insn->code) == BPF_ALU64) - EMIT1(add_1mod(0x48, dst_reg)); - else if (is_ereg(dst_reg)) - EMIT1(add_1mod(0x40, dst_reg)); + maybe_emit_1mod(&prog, dst_reg, + BPF_CLASS(insn->code) == BPF_ALU64); b3 = simple_alu_opcodes[BPF_OP(insn->code)]; EMIT2(0xD3, add_1reg(b3, dst_reg)); @@ -1452,10 +1464,8 @@ st: if (is_imm8(insn->off)) case BPF_JMP | BPF_JSET | BPF_K: case BPF_JMP32 | BPF_JSET | BPF_K: /* test dst_reg, imm32 */ - if (BPF_CLASS(insn->code) == BPF_JMP) - EMIT1(add_1mod(0x48, dst_reg)); - else if (is_ereg(dst_reg)) - EMIT1(add_1mod(0x40, dst_reg)); + maybe_emit_1mod(&prog, dst_reg, + BPF_CLASS(insn->code) == BPF_JMP); EMIT2_off32(0xF7, add_1reg(0xC0, dst_reg), imm32); goto emit_cond_jmp; @@ -1488,10 +1498,8 @@ st: if (is_imm8(insn->off)) } /* cmp dst_reg, imm8/32 */ - if (BPF_CLASS(insn->code) == BPF_JMP) - EMIT1(add_1mod(0x48, dst_reg)); - else if (is_ereg(dst_reg)) - EMIT1(add_1mod(0x40, dst_reg)); + maybe_emit_1mod(&prog, dst_reg, + BPF_CLASS(insn->code) == BPF_JMP); if (is_imm8(imm32)) EMIT3(0x83, add_1reg(0xF8, dst_reg), imm32); diff --git a/include/linux/bpf.h b/include/linux/bpf.h index e6f5579f9356..2be6dfd68df9 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -168,6 +168,7 @@ struct bpf_map { u32 key_size; u32 value_size; u32 max_entries; + u64 map_extra; /* any per-map-type extra fields */ u32 map_flags; int spin_lock_off; /* >=0 valid offset, <0 error */ int timer_off; /* >=0 valid offset, <0 error */ @@ -175,15 +176,15 @@ struct bpf_map { int numa_node; u32 btf_key_type_id; u32 btf_value_type_id; + u32 btf_vmlinux_value_type_id; struct btf *btf; #ifdef CONFIG_MEMCG_KMEM struct mem_cgroup *memcg; #endif char name[BPF_OBJ_NAME_LEN]; - u32 btf_vmlinux_value_type_id; bool bypass_spec_v1; bool frozen; /* write-once; write-protected by freeze_mutex */ - /* 22 bytes hole */ + /* 14 bytes hole */ /* The 3rd and 4th cacheline with misc members to avoid false sharing * particularly with refcounting. @@ -513,7 +514,7 @@ struct bpf_verifier_ops { const struct btf_type *t, int off, int size, enum bpf_access_type atype, u32 *next_btf_id); - bool (*check_kfunc_call)(u32 kfunc_btf_id); + bool (*check_kfunc_call)(u32 kfunc_btf_id, struct module *owner); }; struct bpf_prog_offload_ops { @@ -877,6 +878,7 @@ struct bpf_prog_aux { void *jit_data; /* JIT specific data. arch dependent */ struct bpf_jit_poke_descriptor *poke_tab; struct bpf_kfunc_desc_tab *kfunc_tab; + struct bpf_kfunc_btf_tab *kfunc_btf_tab; u32 size_poke_tab; struct bpf_ksym ksym; const struct bpf_prog_ops *ops; @@ -886,6 +888,7 @@ struct bpf_prog_aux { struct bpf_prog *prog; struct user_struct *user; u64 load_time; /* ns since boottime */ + u32 verified_insns; struct bpf_map *cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]; char name[BPF_OBJ_NAME_LEN]; #ifdef CONFIG_SECURITY @@ -1000,6 +1003,10 @@ bool bpf_struct_ops_get(const void *kdata); void bpf_struct_ops_put(const void *kdata); int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, void *key, void *value); +int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_progs *tprogs, + struct bpf_prog *prog, + const struct btf_func_model *model, + void *image, void *image_end); static inline bool bpf_try_module_get(const void *data, struct module *owner) { if (owner == BPF_MODULE_OWNER) @@ -1014,6 +1021,22 @@ static inline void bpf_module_put(const void *data, struct module *owner) else module_put(owner); } + +#ifdef CONFIG_NET +/* Define it here to avoid the use of forward declaration */ +struct bpf_dummy_ops_state { + int val; +}; + +struct bpf_dummy_ops { + int (*test_1)(struct bpf_dummy_ops_state *cb); + int (*test_2)(struct bpf_dummy_ops_state *cb, int a1, unsigned short a2, + char a3, unsigned long a4); +}; + +int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr, + union bpf_attr __user *uattr); +#endif #else static inline const struct bpf_struct_ops *bpf_struct_ops_find(u32 type_id) { @@ -1642,10 +1665,33 @@ int bpf_prog_test_run_raw_tp(struct bpf_prog *prog, int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); -bool bpf_prog_test_check_kfunc_call(u32 kfunc_id); +bool bpf_prog_test_check_kfunc_call(u32 kfunc_id, struct module *owner); bool btf_ctx_access(int off, int size, enum bpf_access_type type, const struct bpf_prog *prog, struct bpf_insn_access_aux *info); + +static inline bool bpf_tracing_ctx_access(int off, int size, + enum bpf_access_type type) +{ + if (off < 0 || off >= sizeof(__u64) * MAX_BPF_FUNC_ARGS) + return false; + if (type != BPF_READ) + return false; + if (off % size != 0) + return false; + return true; +} + +static inline bool bpf_tracing_btf_ctx_access(int off, int size, + enum bpf_access_type type, + const struct bpf_prog *prog, + struct bpf_insn_access_aux *info) +{ + if (!bpf_tracing_ctx_access(off, size, type)) + return false; + return btf_ctx_access(off, size, type, prog, info); +} + int btf_struct_access(struct bpf_verifier_log *log, const struct btf *btf, const struct btf_type *t, int off, int size, enum bpf_access_type atype, @@ -1863,7 +1909,8 @@ static inline int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, return -ENOTSUPP; } -static inline bool bpf_prog_test_check_kfunc_call(u32 kfunc_id) +static inline bool bpf_prog_test_check_kfunc_call(u32 kfunc_id, + struct module *owner) { return false; } @@ -2094,6 +2141,7 @@ extern const struct bpf_func_proto bpf_skc_to_tcp_sock_proto; extern const struct bpf_func_proto bpf_skc_to_tcp_timewait_sock_proto; extern const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto; extern const struct bpf_func_proto bpf_skc_to_udp6_sock_proto; +extern const struct bpf_func_proto bpf_skc_to_unix_sock_proto; extern const struct bpf_func_proto bpf_copy_from_user_proto; extern const struct bpf_func_proto bpf_snprintf_btf_proto; extern const struct bpf_func_proto bpf_snprintf_proto; @@ -2108,6 +2156,7 @@ extern const struct bpf_func_proto bpf_for_each_map_elem_proto; extern const struct bpf_func_proto bpf_btf_find_by_name_kind_proto; extern const struct bpf_func_proto bpf_sk_setsockopt_proto; extern const struct bpf_func_proto bpf_sk_getsockopt_proto; +extern const struct bpf_func_proto bpf_kallsyms_lookup_name_proto; const struct bpf_func_proto *tracing_prog_func_proto( enum bpf_func_id func_id, const struct bpf_prog *prog); diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index bbe1eefa4c8a..48a91c51c015 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -125,6 +125,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_STACK, stack_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_STRUCT_OPS, bpf_struct_ops_map_ops) #endif BPF_MAP_TYPE(BPF_MAP_TYPE_RINGBUF, ringbuf_map_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_BLOOM_FILTER, bloom_filter_map_ops) BPF_LINK_TYPE(BPF_LINK_TYPE_RAW_TRACEPOINT, raw_tracepoint) BPF_LINK_TYPE(BPF_LINK_TYPE_TRACING, tracing) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 5424124dbe36..c8a78e830fca 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -527,5 +527,7 @@ int bpf_check_attach_target(struct bpf_verifier_log *log, const struct bpf_prog *tgt_prog, u32 btf_id, struct bpf_attach_target_info *tgt_info); +void bpf_free_kfunc_btf_tab(struct bpf_kfunc_btf_tab *tab); + #endif /* _LINUX_BPF_VERIFIER_H */ diff --git a/include/linux/bpfptr.h b/include/linux/bpfptr.h index 546e27fc6d46..46e1757d06a3 100644 --- a/include/linux/bpfptr.h +++ b/include/linux/bpfptr.h @@ -3,6 +3,7 @@ #ifndef _LINUX_BPFPTR_H #define _LINUX_BPFPTR_H +#include <linux/mm.h> #include <linux/sockptr.h> typedef sockptr_t bpfptr_t; diff --git a/include/linux/btf.h b/include/linux/btf.h index 214fde93214b..203eef993d76 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -5,6 +5,7 @@ #define _LINUX_BTF_H 1 #include <linux/types.h> +#include <linux/bpfptr.h> #include <uapi/linux/btf.h> #include <uapi/linux/bpf.h> @@ -238,4 +239,42 @@ static inline const char *btf_name_by_offset(const struct btf *btf, } #endif +struct kfunc_btf_id_set { + struct list_head list; + struct btf_id_set *set; + struct module *owner; +}; + +struct kfunc_btf_id_list; + +#ifdef CONFIG_DEBUG_INFO_BTF_MODULES +void register_kfunc_btf_id_set(struct kfunc_btf_id_list *l, + struct kfunc_btf_id_set *s); +void unregister_kfunc_btf_id_set(struct kfunc_btf_id_list *l, + struct kfunc_btf_id_set *s); +bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist, u32 kfunc_id, + struct module *owner); +#else +static inline void register_kfunc_btf_id_set(struct kfunc_btf_id_list *l, + struct kfunc_btf_id_set *s) +{ +} +static inline void unregister_kfunc_btf_id_set(struct kfunc_btf_id_list *l, + struct kfunc_btf_id_set *s) +{ +} +static inline bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist, + u32 kfunc_id, struct module *owner) +{ + return false; +} +#endif + +#define DEFINE_KFUNC_BTF_ID_SET(set, name) \ + struct kfunc_btf_id_set name = { LIST_HEAD_INIT(name.list), (set), \ + THIS_MODULE } + +extern struct kfunc_btf_id_list bpf_tcp_ca_kfunc_list; +extern struct kfunc_btf_id_list prog_test_kfunc_list; + #endif diff --git a/include/linux/filter.h b/include/linux/filter.h index 8231a6a257f6..75b8e299bfc9 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -553,9 +553,9 @@ struct bpf_binary_header { }; struct bpf_prog_stats { - u64 cnt; - u64 nsecs; - u64 misses; + u64_stats_t cnt; + u64_stats_t nsecs; + u64_stats_t misses; struct u64_stats_sync syncp; } __aligned(2 * sizeof(u64)); @@ -612,13 +612,14 @@ static __always_inline u32 __bpf_prog_run(const struct bpf_prog *prog, if (static_branch_unlikely(&bpf_stats_enabled_key)) { struct bpf_prog_stats *stats; u64 start = sched_clock(); + unsigned long flags; ret = dfunc(ctx, prog->insnsi, prog->bpf_func); stats = this_cpu_ptr(prog->stats); - u64_stats_update_begin(&stats->syncp); - stats->cnt++; - stats->nsecs += sched_clock() - start; - u64_stats_update_end(&stats->syncp); + flags = u64_stats_update_begin_irqsave(&stats->syncp); + u64_stats_inc(&stats->cnt); + u64_stats_add(&stats->nsecs, sched_clock() - start); + u64_stats_update_end_irqrestore(&stats->syncp, flags); } else { ret = dfunc(ctx, prog->insnsi, prog->bpf_func); } diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 1ce9a9eb223b..b4256847c707 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -509,8 +509,22 @@ static inline bool sk_psock_strp_enabled(struct sk_psock *psock) #if IS_ENABLED(CONFIG_NET_SOCK_MSG) -/* We only have one bit so far. */ -#define BPF_F_PTR_MASK ~(BPF_F_INGRESS) +#define BPF_F_STRPARSER (1UL << 1) + +/* We only have two bits so far. */ +#define BPF_F_PTR_MASK ~(BPF_F_INGRESS | BPF_F_STRPARSER) + +static inline bool skb_bpf_strparser(const struct sk_buff *skb) +{ + unsigned long sk_redir = skb->_sk_redir; + + return sk_redir & BPF_F_STRPARSER; +} + +static inline void skb_bpf_set_strparser(struct sk_buff *skb) +{ + skb->_sk_redir |= BPF_F_STRPARSER; +} static inline bool skb_bpf_ingress(const struct sk_buff *skb) { diff --git a/include/trace/bpf_probe.h b/include/trace/bpf_probe.h index a23be89119aa..a8e97f84b652 100644 --- a/include/trace/bpf_probe.h +++ b/include/trace/bpf_probe.h @@ -93,8 +93,7 @@ __section("__bpf_raw_tp_map") = { \ #define FIRST(x, ...) x -#undef DEFINE_EVENT_WRITABLE -#define DEFINE_EVENT_WRITABLE(template, call, proto, args, size) \ +#define __CHECK_WRITABLE_BUF_SIZE(call, proto, args, size) \ static inline void bpf_test_buffer_##call(void) \ { \ /* BUILD_BUG_ON() is ignored if the code is completely eliminated, but \ @@ -103,8 +102,12 @@ static inline void bpf_test_buffer_##call(void) \ */ \ FIRST(proto); \ (void)BUILD_BUG_ON_ZERO(size != sizeof(*FIRST(args))); \ -} \ -__DEFINE_EVENT(template, call, PARAMS(proto), PARAMS(args), size) +} + +#undef DEFINE_EVENT_WRITABLE +#define DEFINE_EVENT_WRITABLE(template, call, proto, args, size) \ + __CHECK_WRITABLE_BUF_SIZE(call, PARAMS(proto), PARAMS(args), size) \ + __DEFINE_EVENT(template, call, PARAMS(proto), PARAMS(args), size) #undef DEFINE_EVENT #define DEFINE_EVENT(template, call, proto, args) \ @@ -119,9 +122,17 @@ __DEFINE_EVENT(template, call, PARAMS(proto), PARAMS(args), size) __BPF_DECLARE_TRACE(call, PARAMS(proto), PARAMS(args)) \ __DEFINE_EVENT(call, call, PARAMS(proto), PARAMS(args), 0) +#undef DECLARE_TRACE_WRITABLE +#define DECLARE_TRACE_WRITABLE(call, proto, args, size) \ + __CHECK_WRITABLE_BUF_SIZE(call, PARAMS(proto), PARAMS(args), size) \ + __BPF_DECLARE_TRACE(call, PARAMS(proto), PARAMS(args)) \ + __DEFINE_EVENT(call, call, PARAMS(proto), PARAMS(args), size) + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) +#undef DECLARE_TRACE_WRITABLE #undef DEFINE_EVENT_WRITABLE +#undef __CHECK_WRITABLE_BUF_SIZE #undef __DEFINE_EVENT #undef FIRST diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 6fc59d61937a..ba5af15e25f5 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -906,6 +906,7 @@ enum bpf_map_type { BPF_MAP_TYPE_RINGBUF, BPF_MAP_TYPE_INODE_STORAGE, BPF_MAP_TYPE_TASK_STORAGE, + BPF_MAP_TYPE_BLOOM_FILTER, }; /* Note that tracing related programs such as @@ -1274,6 +1275,13 @@ union bpf_attr { * struct stored as the * map value */ + /* Any per-map-type extra fields + * + * BPF_MAP_TYPE_BLOOM_FILTER - the lowest 4 bits indicate the + * number of hash functions (if 0, the bloom filter will default + * to using 5 hash functions). + */ + __u64 map_extra; }; struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ @@ -4909,6 +4917,27 @@ union bpf_attr { * Return * The number of bytes written to the buffer, or a negative error * in case of failure. + * + * struct unix_sock *bpf_skc_to_unix_sock(void *sk) + * Description + * Dynamically cast a *sk* pointer to a *unix_sock* pointer. + * Return + * *sk* if casting is valid, or **NULL** otherwise. + * + * long bpf_kallsyms_lookup_name(const char *name, int name_sz, int flags, u64 *res) + * Description + * Get the address of a kernel symbol, returned in *res*. *res* is + * set to 0 if the symbol is not found. + * Return + * On success, zero. On error, a negative value. + * + * **-EINVAL** if *flags* is not zero. + * + * **-EINVAL** if string *name* is not the same size as *name_sz*. + * + * **-ENOENT** if symbol is not found. + * + * **-EPERM** if caller does not have permission to obtain kernel address. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5089,6 +5118,8 @@ union bpf_attr { FN(task_pt_regs), \ FN(get_branch_snapshot), \ FN(trace_vprintk), \ + FN(skc_to_unix_sock), \ + FN(kallsyms_lookup_name), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper @@ -5613,6 +5644,7 @@ struct bpf_prog_info { __u64 run_time_ns; __u64 run_cnt; __u64 recursion_misses; + __u32 verified_insns; } __attribute__((aligned(8))); struct bpf_map_info { @@ -5630,6 +5662,8 @@ struct bpf_map_info { __u32 btf_id; __u32 btf_key_type_id; __u32 btf_value_type_id; + __u32 :32; /* alignment pad */ + __u64 map_extra; } __attribute__((aligned(8))); struct bpf_btf_info { diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h index 642b6ecb37d7..deb12f755f0f 100644 --- a/include/uapi/linux/btf.h +++ b/include/uapi/linux/btf.h @@ -43,7 +43,7 @@ struct btf_type { * "size" tells the size of the type it is describing. * * "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT, - * FUNC, FUNC_PROTO, VAR and TAG. + * FUNC, FUNC_PROTO, VAR and DECL_TAG. * "type" is a type_id referring to another type. */ union { @@ -74,7 +74,7 @@ enum { BTF_KIND_VAR = 14, /* Variable */ BTF_KIND_DATASEC = 15, /* Section */ BTF_KIND_FLOAT = 16, /* Floating point */ - BTF_KIND_TAG = 17, /* Tag */ + BTF_KIND_DECL_TAG = 17, /* Decl Tag */ NR_BTF_KINDS, BTF_KIND_MAX = NR_BTF_KINDS - 1, @@ -174,14 +174,14 @@ struct btf_var_secinfo { __u32 size; }; -/* BTF_KIND_TAG is followed by a single "struct btf_tag" to describe +/* BTF_KIND_DECL_TAG is followed by a single "struct btf_decl_tag" to describe * additional information related to the tag applied location. * If component_idx == -1, the tag is applied to a struct, union, * variable or function. Otherwise, it is applied to a struct/union * member or a func argument, and component_idx indicates which member * or argument (0 ... vlen-1). */ -struct btf_tag { +struct btf_decl_tag { __s32 component_idx; }; diff --git a/kernel/bpf/Kconfig b/kernel/bpf/Kconfig index a82d6de86522..d24d518ddd63 100644 --- a/kernel/bpf/Kconfig +++ b/kernel/bpf/Kconfig @@ -64,6 +64,7 @@ config BPF_JIT_DEFAULT_ON config BPF_UNPRIV_DEFAULT_OFF bool "Disable unprivileged BPF by default" + default y depends on BPF_SYSCALL help Disables unprivileged BPF by default by setting the corresponding @@ -72,6 +73,12 @@ config BPF_UNPRIV_DEFAULT_OFF disable it by setting it to 1 (from which no other transition to 0 is possible anymore). + Unprivileged BPF could be used to exploit certain potential + speculative execution side-channel vulnerabilities on unmitigated + affected hardware. + + If you are unsure how to answer this question, answer Y. + source "kernel/bpf/preload/Kconfig" config BPF_LSM diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index 7f33098ca63f..cf6ca339f3cd 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -7,7 +7,7 @@ endif CFLAGS_core.o += $(call cc-disable-warning, override-init) $(cflags-nogcse-yy) obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o map_iter.o task_iter.o prog_iter.o -obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o +obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o bloom_filter.o obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o obj-$(CONFIG_BPF_SYSCALL) += bpf_local_storage.o bpf_task_storage.o obj-${CONFIG_BPF_LSM} += bpf_inode_storage.o diff --git a/kernel/bpf/bloom_filter.c b/kernel/bpf/bloom_filter.c new file mode 100644 index 000000000000..277a05e9c984 --- /dev/null +++ b/kernel/bpf/bloom_filter.c @@ -0,0 +1,204 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#include <linux/bitmap.h> +#include <linux/bpf.h> +#include <linux/btf.h> +#include <linux/err.h> +#include <linux/jhash.h> +#include <linux/random.h> + +#define BLOOM_CREATE_FLAG_MASK \ + (BPF_F_NUMA_NODE | BPF_F_ZERO_SEED | BPF_F_ACCESS_MASK) + +struct bpf_bloom_filter { + struct bpf_map map; + u32 bitset_mask; + u32 hash_seed; + /* If the size of the values in the bloom filter is u32 aligned, + * then it is more performant to use jhash2 as the underlying hash + * function, else we use jhash. This tracks the number of u32s + * in an u32-aligned value size. If the value size is not u32 aligned, + * this will be 0. + */ + u32 aligned_u32_count; + u32 nr_hash_funcs; + unsigned long bitset[]; +}; + +static u32 hash(struct bpf_bloom_filter *bloom, void *value, + u32 value_size, u32 index) +{ + u32 h; + + if (bloom->aligned_u32_count) + h = jhash2(value, bloom->aligned_u32_count, + bloom->hash_seed + index); + else + h = jhash(value, value_size, bloom->hash_seed + index); + + return h & bloom->bitset_mask; +} + +static int bloom_map_peek_elem(struct bpf_map *map, void *value) +{ + struct bpf_bloom_filter *bloom = + container_of(map, struct bpf_bloom_filter, map); + u32 i, h; + + for (i = 0; i < bloom->nr_hash_funcs; i++) { + h = hash(bloom, value, map->value_size, i); + if (!test_bit(h, bloom->bitset)) + return -ENOENT; + } + + return 0; +} + +static int bloom_map_push_elem(struct bpf_map *map, void *value, u64 flags) +{ + struct bpf_bloom_filter *bloom = + container_of(map, struct bpf_bloom_filter, map); + u32 i, h; + + if (flags != BPF_ANY) + return -EINVAL; + + for (i = 0; i < bloom->nr_hash_funcs; i++) { + h = hash(bloom, value, map->value_size, i); + set_bit(h, bloom->bitset); + } + + return 0; +} + +static int bloom_map_pop_elem(struct bpf_map *map, void *value) +{ + return -EOPNOTSUPP; +} + +static int bloom_map_delete_elem(struct bpf_map *map, void *value) +{ + return -EOPNOTSUPP; +} + +static struct bpf_map *bloom_map_alloc(union bpf_attr *attr) +{ + u32 bitset_bytes, bitset_mask, nr_hash_funcs, nr_bits; + int numa_node = bpf_map_attr_numa_node(attr); + struct bpf_bloom_filter *bloom; + + if (!bpf_capable()) + return ERR_PTR(-EPERM); + + if (attr->key_size != 0 || attr->value_size == 0 || + attr->max_entries == 0 || + attr->map_flags & ~BLOOM_CREATE_FLAG_MASK || + !bpf_map_flags_access_ok(attr->map_flags) || + /* The lower 4 bits of map_extra (0xF) specify the number + * of hash functions + */ + (attr->map_extra & ~0xF)) + return ERR_PTR(-EINVAL); + + nr_hash_funcs = attr->map_extra; + if (nr_hash_funcs == 0) + /* Default to using 5 hash functions if unspecified */ + nr_hash_funcs = 5; + + /* For the bloom filter, the optimal bit array size that minimizes the + * false positive probability is n * k / ln(2) where n is the number of + * expected entries in the bloom filter and k is the number of hash + * functions. We use 7 / 5 to approximate 1 / ln(2). + * + * We round this up to the nearest power of two to enable more efficient + * hashing using bitmasks. The bitmask will be the bit array size - 1. + * + * If this overflows a u32, the bit array size will have 2^32 (4 + * GB) bits. + */ + if (check_mul_overflow(attr->max_entries, nr_hash_funcs, &nr_bits) || + check_mul_overflow(nr_bits / 5, (u32)7, &nr_bits) || + nr_bits > (1UL << 31)) { + /* The bit array size is 2^32 bits but to avoid overflowing the + * u32, we use U32_MAX, which will round up to the equivalent + * number of bytes + */ + bitset_bytes = BITS_TO_BYTES(U32_MAX); + bitset_mask = U32_MAX; + } else { + if (nr_bits <= BITS_PER_LONG) + nr_bits = BITS_PER_LONG; + else + nr_bits = roundup_pow_of_two(nr_bits); + bitset_bytes = BITS_TO_BYTES(nr_bits); + bitset_mask = nr_bits - 1; + } + + bitset_bytes = roundup(bitset_bytes, sizeof(unsigned long)); + bloom = bpf_map_area_alloc(sizeof(*bloom) + bitset_bytes, numa_node); + + if (!bloom) + return ERR_PTR(-ENOMEM); + + bpf_map_init_from_attr(&bloom->map, attr); + + bloom->nr_hash_funcs = nr_hash_funcs; + bloom->bitset_mask = bitset_mask; + + /* Check whether the value size is u32-aligned */ + if ((attr->value_size & (sizeof(u32) - 1)) == 0) + bloom->aligned_u32_count = + attr->value_size / sizeof(u32); + + if (!(attr->map_flags & BPF_F_ZERO_SEED)) + bloom->hash_seed = get_random_int(); + + return &bloom->map; +} + +static void bloom_map_free(struct bpf_map *map) +{ + struct bpf_bloom_filter *bloom = + container_of(map, struct bpf_bloom_filter, map); + + bpf_map_area_free(bloom); +} + +static void *bloom_map_lookup_elem(struct bpf_map *map, void *key) +{ + /* The eBPF program should use map_peek_elem instead */ + return ERR_PTR(-EINVAL); +} + +static int bloom_map_update_elem(struct bpf_map *map, void *key, + void *value, u64 flags) +{ + /* The eBPF program should use map_push_elem instead */ + return -EINVAL; +} + +static int bloom_map_check_btf(const struct bpf_map *map, + const struct btf *btf, + const struct btf_type *key_type, + const struct btf_type *value_type) +{ + /* Bloom filter maps are keyless */ + return btf_type_is_void(key_type) ? 0 : -EINVAL; +} + +static int bpf_bloom_map_btf_id; +const struct bpf_map_ops bloom_filter_map_ops = { + .map_meta_equal = bpf_map_meta_equal, + .map_alloc = bloom_map_alloc, + .map_free = bloom_map_free, + .map_push_elem = bloom_map_push_elem, + .map_peek_elem = bloom_map_peek_elem, + .map_pop_elem = bloom_map_pop_elem, + .map_lookup_elem = bloom_map_lookup_elem, + .map_update_elem = bloom_map_update_elem, + .map_delete_elem = bloom_map_delete_elem, + .map_check_btf = bloom_map_check_btf, + .map_btf_name = "bpf_bloom_filter", + .map_btf_id = &bpf_bloom_map_btf_id, +}; diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c index 9abcc33f02cf..8ecfe4752769 100644 --- a/kernel/bpf/bpf_struct_ops.c +++ b/kernel/bpf/bpf_struct_ops.c @@ -93,6 +93,9 @@ const struct bpf_verifier_ops bpf_struct_ops_verifier_ops = { }; const struct bpf_prog_ops bpf_struct_ops_prog_ops = { +#ifdef CONFIG_NET + .test_run = bpf_struct_ops_test_run, +#endif }; static const struct btf_type *module_type; @@ -312,6 +315,20 @@ static int check_zero_holes(const struct btf_type *t, void *data) return 0; } +int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_progs *tprogs, + struct bpf_prog *prog, + const struct btf_func_model *model, + void *image, void *image_end) +{ + u32 flags; + + tprogs[BPF_TRAMP_FENTRY].progs[0] = prog; + tprogs[BPF_TRAMP_FENTRY].nr_progs = 1; + flags = model->ret_size > 0 ? BPF_TRAMP_F_RET_FENTRY_RET : 0; + return arch_prepare_bpf_trampoline(NULL, image, image_end, + model, flags, tprogs, NULL); +} + static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, void *value, u64 flags) { @@ -323,7 +340,7 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, struct bpf_tramp_progs *tprogs = NULL; void *udata, *kdata; int prog_fd, err = 0; - void *image; + void *image, *image_end; u32 i; if (flags) @@ -363,12 +380,12 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, udata = &uvalue->data; kdata = &kvalue->data; image = st_map->image; + image_end = st_map->image + PAGE_SIZE; for_each_member(i, t, member) { const struct btf_type *mtype, *ptype; struct bpf_prog *prog; u32 moff; - u32 flags; moff = btf_member_bit_offset(t, member) / 8; ptype = btf_type_resolve_ptr(btf_vmlinux, member->type, NULL); @@ -430,14 +447,9 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, goto reset_unlock; } - tprogs[BPF_TRAMP_FENTRY].progs[0] = prog; - tprogs[BPF_TRAMP_FENTRY].nr_progs = 1; - flags = st_ops->func_models[i].ret_size > 0 ? - BPF_TRAMP_F_RET_FENTRY_RET : 0; - err = arch_prepare_bpf_trampoline(NULL, image, - st_map->image + PAGE_SIZE, - &st_ops->func_models[i], - flags, tprogs, NULL); + err = bpf_struct_ops_prepare_trampoline(tprogs, prog, + &st_ops->func_models[i], + image, image_end); if (err < 0) goto reset_unlock; diff --git a/kernel/bpf/bpf_struct_ops_types.h b/kernel/bpf/bpf_struct_ops_types.h index 066d83ea1c99..5678a9ddf817 100644 --- a/kernel/bpf/bpf_struct_ops_types.h +++ b/kernel/bpf/bpf_struct_ops_types.h @@ -2,6 +2,9 @@ /* internal file - do not include directly */ #ifdef CONFIG_BPF_JIT +#ifdef CONFIG_NET +BPF_STRUCT_OPS_TYPE(bpf_dummy_ops) +#endif #ifdef CONFIG_INET #include <net/tcp.h> BPF_STRUCT_OPS_TYPE(tcp_congestion_ops) diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index c3d605b22473..dbc3ad07e21b 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -281,7 +281,7 @@ static const char * const btf_kind_str[NR_BTF_KINDS] = { [BTF_KIND_VAR] = "VAR", [BTF_KIND_DATASEC] = "DATASEC", [BTF_KIND_FLOAT] = "FLOAT", - [BTF_KIND_TAG] = "TAG", + [BTF_KIND_DECL_TAG] = "DECL_TAG", }; const char *btf_type_str(const struct btf_type *t) @@ -460,15 +460,15 @@ static bool btf_type_is_datasec(const struct btf_type *t) return BTF_INFO_KIND(t->info) == BTF_KIND_DATASEC; } -static bool btf_type_is_tag(const struct btf_type *t) +static bool btf_type_is_decl_tag(const struct btf_type *t) { - return BTF_INFO_KIND(t->info) == BTF_KIND_TAG; + return BTF_INFO_KIND(t->info) == BTF_KIND_DECL_TAG; } -static bool btf_type_is_tag_target(const struct btf_type *t) +static bool btf_type_is_decl_tag_target(const struct btf_type *t) { return btf_type_is_func(t) || btf_type_is_struct(t) || - btf_type_is_var(t); + btf_type_is_var(t) || btf_type_is_typedef(t); } u32 btf_nr_types(const struct btf *btf) @@ -549,7 +549,7 @@ const struct btf_type *btf_type_resolve_func_ptr(const struct btf *btf, static bool btf_type_is_resolve_source_only(const struct btf_type *t) { return btf_type_is_var(t) || - btf_type_is_tag(t) || + btf_type_is_decl_tag(t) || btf_type_is_datasec(t); } @@ -576,7 +576,7 @@ static bool btf_type_needs_resolve(const struct btf_type *t) btf_type_is_struct(t) || btf_type_is_array(t) || btf_type_is_var(t) || - btf_type_is_tag(t) || + btf_type_is_decl_tag(t) || btf_type_is_datasec(t); } @@ -630,9 +630,9 @@ static const struct btf_var *btf_type_var(const struct btf_type *t) return (const struct btf_var *)(t + 1); } -static const struct btf_tag *btf_type_tag(const struct btf_type *t) +static const struct btf_decl_tag *btf_type_decl_tag(const struct btf_type *t) { - return (const struct btf_tag *)(t + 1); + return (const struct btf_decl_tag *)(t + 1); } static const struct btf_kind_operations *btf_type_ops(const struct btf_type *t) @@ -3820,11 +3820,11 @@ static const struct btf_kind_operations float_ops = { .show = btf_df_show, }; -static s32 btf_tag_check_meta(struct btf_verifier_env *env, +static s32 btf_decl_tag_check_meta(struct btf_verifier_env *env, const struct btf_type *t, u32 meta_left) { - const struct btf_tag *tag; + const struct btf_decl_tag *tag; u32 meta_needed = sizeof(*tag); s32 component_idx; const char *value; @@ -3852,7 +3852,7 @@ static s32 btf_tag_check_meta(struct btf_verifier_env *env, return -EINVAL; } - component_idx = btf_type_tag(t)->component_idx; + component_idx = btf_type_decl_tag(t)->component_idx; if (component_idx < -1) { btf_verifier_log_type(env, t, "Invalid component_idx"); return -EINVAL; @@ -3863,7 +3863,7 @@ static s32 btf_tag_check_meta(struct btf_verifier_env *env, return meta_needed; } -static int btf_tag_resolve(struct btf_verifier_env *env, +static int btf_decl_tag_resolve(struct btf_verifier_env *env, const struct resolve_vertex *v) { const struct btf_type *next_type; @@ -3874,7 +3874,7 @@ static int btf_tag_resolve(struct btf_verifier_env *env, u32 vlen; next_type = btf_type_by_id(btf, next_type_id); - if (!next_type || !btf_type_is_tag_target(next_type)) { + if (!next_type || !btf_type_is_decl_tag_target(next_type)) { btf_verifier_log_type(env, v->t, "Invalid type_id"); return -EINVAL; } @@ -3883,9 +3883,9 @@ static int btf_tag_resolve(struct btf_verifier_env *env, !env_type_is_resolved(env, next_type_id)) return env_stack_push(env, next_type, next_type_id); - component_idx = btf_type_tag(t)->component_idx; + component_idx = btf_type_decl_tag(t)->component_idx; if (component_idx != -1) { - if (btf_type_is_var(next_type)) { + if (btf_type_is_var(next_type) || btf_type_is_typedef(next_type)) { btf_verifier_log_type(env, v->t, "Invalid component_idx"); return -EINVAL; } @@ -3909,18 +3909,18 @@ static int btf_tag_resolve(struct btf_verifier_env *env, return 0; } -static void btf_tag_log(struct btf_verifier_env *env, const struct btf_type *t) +static void btf_decl_tag_log(struct btf_verifier_env *env, const struct btf_type *t) { btf_verifier_log(env, "type=%u component_idx=%d", t->type, - btf_type_tag(t)->component_idx); + btf_type_decl_tag(t)->component_idx); } -static const struct btf_kind_operations tag_ops = { - .check_meta = btf_tag_check_meta, - .resolve = btf_tag_resolve, +static const struct btf_kind_operations decl_tag_ops = { + .check_meta = btf_decl_tag_check_meta, + .resolve = btf_decl_tag_resolve, .check_member = btf_df_check_member, .check_kflag_member = btf_df_check_kflag_member, - .log_details = btf_tag_log, + .log_details = btf_decl_tag_log, .show = btf_df_show, }; @@ -4058,7 +4058,7 @@ static const struct btf_kind_operations * const kind_ops[NR_BTF_KINDS] = { [BTF_KIND_VAR] = &var_ops, [BTF_KIND_DATASEC] = &datasec_ops, [BTF_KIND_FLOAT] = &float_ops, - [BTF_KIND_TAG] = &tag_ops, + [BTF_KIND_DECL_TAG] = &decl_tag_ops, }; static s32 btf_check_meta(struct btf_verifier_env *env, @@ -4143,7 +4143,7 @@ static bool btf_resolve_valid(struct btf_verifier_env *env, return !btf_resolved_type_id(btf, type_id) && !btf_resolved_type_size(btf, type_id); - if (btf_type_is_tag(t)) + if (btf_type_is_decl_tag(t)) return btf_resolved_type_id(btf, type_id) && !btf_resolved_type_size(btf, type_id); @@ -6343,3 +6343,58 @@ const struct bpf_func_proto bpf_btf_find_by_name_kind_proto = { }; BTF_ID_LIST_GLOBAL_SINGLE(btf_task_struct_ids, struct, task_struct) + +/* BTF ID set registration API for modules */ + +struct kfunc_btf_id_list { + struct list_head list; + struct mutex mutex; +}; + +#ifdef CONFIG_DEBUG_INFO_BTF_MODULES + +void register_kfunc_btf_id_set(struct kfunc_btf_id_list *l, + struct kfunc_btf_id_set *s) +{ + mutex_lock(&l->mutex); + list_add(&s->list, &l->list); + mutex_unlock(&l->mutex); +} +EXPORT_SYMBOL_GPL(register_kfunc_btf_id_set); + +void unregister_kfunc_btf_id_set(struct kfunc_btf_id_list *l, + struct kfunc_btf_id_set *s) +{ + mutex_lock(&l->mutex); + list_del_init(&s->list); + mutex_unlock(&l->mutex); +} +EXPORT_SYMBOL_GPL(unregister_kfunc_btf_id_set); + +bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist, u32 kfunc_id, + struct module *owner) +{ + struct kfunc_btf_id_set *s; + + if (!owner) + return false; + mutex_lock(&klist->mutex); + list_for_each_entry(s, &klist->list, list) { + if (s->owner == owner && btf_id_set_contains(s->set, kfunc_id)) { + mutex_unlock(&klist->mutex); + return true; + } + } + mutex_unlock(&klist->mutex); + return false; +} + +#endif + +#define DEFINE_KFUNC_BTF_ID_LIST(name) \ + struct kfunc_btf_id_list name = { LIST_HEAD_INIT(name.list), \ + __MUTEX_INITIALIZER(name.mutex) }; \ + EXPORT_SYMBOL_GPL(name) + +DEFINE_KFUNC_BTF_ID_LIST(bpf_tcp_ca_kfunc_list); +DEFINE_KFUNC_BTF_ID_LIST(prog_test_kfunc_list); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index ded9163185d1..327e3996eadb 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -32,6 +32,7 @@ #include <linux/perf_event.h> #include <linux/extable.h> #include <linux/log2.h> +#include <linux/bpf_verifier.h> #include <asm/barrier.h> #include <asm/unaligned.h> @@ -2263,6 +2264,9 @@ static void bpf_prog_free_deferred(struct work_struct *work) int i; aux = container_of(work, struct bpf_prog_aux, work); +#ifdef CONFIG_BPF_SYSCALL + bpf_free_kfunc_btf_tab(aux->kfunc_btf_tab); +#endif bpf_free_used_maps(aux); bpf_free_used_btfs(aux); if (bpf_prog_is_dev_bound(aux)) diff --git a/kernel/bpf/preload/.gitignore b/kernel/bpf/preload/.gitignore index 856a4c5ad0dd..9452322902a5 100644 --- a/kernel/bpf/preload/.gitignore +++ b/kernel/bpf/preload/.gitignore @@ -1,4 +1,2 @@ -/FEATURE-DUMP.libbpf -/bpf_helper_defs.h -/feature +/libbpf /bpf_preload_umd diff --git a/kernel/bpf/preload/Makefile b/kernel/bpf/preload/Makefile index 1951332dd15f..1400ac58178e 100644 --- a/kernel/bpf/preload/Makefile +++ b/kernel/bpf/preload/Makefile @@ -1,21 +1,35 @@ # SPDX-License-Identifier: GPL-2.0 LIBBPF_SRCS = $(srctree)/tools/lib/bpf/ -LIBBPF_A = $(obj)/libbpf.a -LIBBPF_OUT = $(abspath $(obj)) +LIBBPF_OUT = $(abspath $(obj))/libbpf +LIBBPF_A = $(LIBBPF_OUT)/libbpf.a +LIBBPF_DESTDIR = $(LIBBPF_OUT) +LIBBPF_INCLUDE = $(LIBBPF_DESTDIR)/include # Although not in use by libbpf's Makefile, set $(O) so that the "dummy" test # in tools/scripts/Makefile.include always succeeds when building the kernel # with $(O) pointing to a relative path, as in "make O=build bindeb-pkg". -$(LIBBPF_A): - $(Q)$(MAKE) -C $(LIBBPF_SRCS) O=$(LIBBPF_OUT)/ OUTPUT=$(LIBBPF_OUT)/ $(LIBBPF_OUT)/libbpf.a +$(LIBBPF_A): | $(LIBBPF_OUT) + $(Q)$(MAKE) -C $(LIBBPF_SRCS) O=$(LIBBPF_OUT)/ OUTPUT=$(LIBBPF_OUT)/ \ + DESTDIR=$(LIBBPF_DESTDIR) prefix= \ + $(LIBBPF_OUT)/libbpf.a install_headers + +libbpf_hdrs: $(LIBBPF_A) + +.PHONY: libbpf_hdrs + +$(LIBBPF_OUT): + $(call msg,MKDIR,$@) + $(Q)mkdir -p $@ userccflags += -I $(srctree)/tools/include/ -I $(srctree)/tools/include/uapi \ - -I $(srctree)/tools/lib/ -Wno-unused-result + -I $(LIBBPF_INCLUDE) -Wno-unused-result userprogs := bpf_preload_umd -clean-files := $(userprogs) bpf_helper_defs.h FEATURE-DUMP.libbpf staticobjs/ feature/ +clean-files := libbpf/ + +$(obj)/iterators/iterators.o: | libbpf_hdrs bpf_preload_umd-objs := iterators/iterators.o bpf_preload_umd-userldlibs := $(LIBBPF_A) -lelf -lz diff --git a/kernel/bpf/preload/iterators/Makefile b/kernel/bpf/preload/iterators/Makefile index 28fa8c1440f4..b8bd60511227 100644 --- a/kernel/bpf/preload/iterators/Makefile +++ b/kernel/bpf/preload/iterators/Makefile @@ -1,18 +1,26 @@ # SPDX-License-Identifier: GPL-2.0 OUTPUT := .output +abs_out := $(abspath $(OUTPUT)) + CLANG ?= clang LLC ?= llc LLVM_STRIP ?= llvm-strip + +TOOLS_PATH := $(abspath ../../../../tools) +BPFTOOL_SRC := $(TOOLS_PATH)/bpf/bpftool +BPFTOOL_OUTPUT := $(abs_out)/bpftool DEFAULT_BPFTOOL := $(OUTPUT)/sbin/bpftool BPFTOOL ?= $(DEFAULT_BPFTOOL) -LIBBPF_SRC := $(abspath ../../../../tools/lib/bpf) -BPFOBJ := $(OUTPUT)/libbpf.a -BPF_INCLUDE := $(OUTPUT) -INCLUDES := -I$(OUTPUT) -I$(BPF_INCLUDE) -I$(abspath ../../../../tools/lib) \ - -I$(abspath ../../../../tools/include/uapi) + +LIBBPF_SRC := $(TOOLS_PATH)/lib/bpf +LIBBPF_OUTPUT := $(abs_out)/libbpf +LIBBPF_DESTDIR := $(LIBBPF_OUTPUT) +LIBBPF_INCLUDE := $(LIBBPF_DESTDIR)/include +BPFOBJ := $(LIBBPF_OUTPUT)/libbpf.a + +INCLUDES := -I$(OUTPUT) -I$(LIBBPF_INCLUDE) -I$(TOOLS_PATH)/include/uapi CFLAGS := -g -Wall -abs_out := $(abspath $(OUTPUT)) ifeq ($(V),1) Q = msg = @@ -44,14 +52,18 @@ $(OUTPUT)/iterators.bpf.o: iterators.bpf.c $(BPFOBJ) | $(OUTPUT) -c $(filter %.c,$^) -o $@ && \ $(LLVM_STRIP) -g $@ -$(OUTPUT): +$(OUTPUT) $(LIBBPF_OUTPUT) $(BPFTOOL_OUTPUT): $(call msg,MKDIR,$@) - $(Q)mkdir -p $(OUTPUT) + $(Q)mkdir -p $@ -$(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(OUTPUT) +$(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(LIBBPF_OUTPUT) $(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC) \ - OUTPUT=$(abspath $(dir $@))/ $(abspath $@) + OUTPUT=$(abspath $(dir $@))/ prefix= \ + DESTDIR=$(LIBBPF_DESTDIR) $(abspath $@) install_headers -$(DEFAULT_BPFTOOL): - $(Q)$(MAKE) $(submake_extras) -C ../../../../tools/bpf/bpftool \ - prefix= OUTPUT=$(abs_out)/ DESTDIR=$(abs_out) install +$(DEFAULT_BPFTOOL): $(BPFOBJ) | $(BPFTOOL_OUTPUT) + $(Q)$(MAKE) $(submake_extras) -C $(BPFTOOL_SRC) \ + OUTPUT=$(BPFTOOL_OUTPUT)/ \ + LIBBPF_OUTPUT=$(LIBBPF_OUTPUT)/ \ + LIBBPF_DESTDIR=$(LIBBPF_DESTDIR)/ \ + prefix= DESTDIR=$(abs_out)/ install-bin diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 1cad6979a0d0..50f96ea4452a 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -199,7 +199,8 @@ static int bpf_map_update_value(struct bpf_map *map, struct fd f, void *key, err = bpf_fd_reuseport_array_update_elem(map, key, value, flags); } else if (map->map_type == BPF_MAP_TYPE_QUEUE || - map->map_type == BPF_MAP_TYPE_STACK) { + map->map_type == BPF_MAP_TYPE_STACK || + map->map_type == BPF_MAP_TYPE_BLOOM_FILTER) { err = map->ops->map_push_elem(map, value, flags); } else { rcu_read_lock(); @@ -238,7 +239,8 @@ static int bpf_map_copy_value(struct bpf_map *map, void *key, void *value, } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) { err = bpf_fd_reuseport_array_lookup_elem(map, key, value); } else if (map->map_type == BPF_MAP_TYPE_QUEUE || - map->map_type == BPF_MAP_TYPE_STACK) { + map->map_type == BPF_MAP_TYPE_STACK || + map->map_type == BPF_MAP_TYPE_BLOOM_FILTER) { err = map->ops->map_peek_elem(map, value); } else if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) { /* struct_ops map requires directly updating "value" */ @@ -348,6 +350,7 @@ void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr) map->max_entries = attr->max_entries; map->map_flags = bpf_map_flags_retain_permanent(attr->map_flags); map->numa_node = bpf_map_attr_numa_node(attr); + map->map_extra = attr->map_extra; } static int bpf_map_alloc_id(struct bpf_map *map) @@ -555,6 +558,7 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) "value_size:\t%u\n" "max_entries:\t%u\n" "map_flags:\t%#x\n" + "map_extra:\t%#llx\n" "memlock:\t%lu\n" "map_id:\t%u\n" "frozen:\t%u\n", @@ -563,6 +567,7 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) map->value_size, map->max_entries, map->map_flags, + (unsigned long long)map->map_extra, bpf_map_memory_footprint(map), map->id, READ_ONCE(map->frozen)); @@ -812,7 +817,7 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf, return ret; } -#define BPF_MAP_CREATE_LAST_FIELD btf_vmlinux_value_type_id +#define BPF_MAP_CREATE_LAST_FIELD map_extra /* called via syscall */ static int map_create(union bpf_attr *attr) { @@ -833,6 +838,10 @@ static int map_create(union bpf_attr *attr) return -EINVAL; } + if (attr->map_type != BPF_MAP_TYPE_BLOOM_FILTER && + attr->map_extra != 0) + return -EINVAL; + f_flags = bpf_get_file_flag(attr->map_flags); if (f_flags < 0) return f_flags; @@ -1082,6 +1091,14 @@ static int map_lookup_elem(union bpf_attr *attr) if (!value) goto free_key; + if (map->map_type == BPF_MAP_TYPE_BLOOM_FILTER) { + if (copy_from_user(value, uvalue, value_size)) + err = -EFAULT; + else + err = bpf_map_copy_value(map, key, value, attr->flags); + goto free_value; + } + err = bpf_map_copy_value(map, key, value, attr->flags); if (err) goto free_value; @@ -1807,8 +1824,14 @@ static int bpf_prog_release(struct inode *inode, struct file *filp) return 0; } +struct bpf_prog_kstats { + u64 nsecs; + u64 cnt; + u64 misses; +}; + static void bpf_prog_get_stats(const struct bpf_prog *prog, - struct bpf_prog_stats *stats) + struct bpf_prog_kstats *stats) { u64 nsecs = 0, cnt = 0, misses = 0; int cpu; @@ -1821,9 +1844,9 @@ static void bpf_prog_get_stats(const struct bpf_prog *prog, st = per_cpu_ptr(prog->stats, cpu); do { start = u64_stats_fetch_begin_irq(&st->syncp); - tnsecs = st->nsecs; - tcnt = st->cnt; - tmisses = st->misses; + tnsecs = u64_stats_read(&st->nsecs); + tcnt = u64_stats_read(&st->cnt); + tmisses = u64_stats_read(&st->misses); } while (u64_stats_fetch_retry_irq(&st->syncp, start)); nsecs += tnsecs; cnt += tcnt; @@ -1839,7 +1862,7 @@ static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp) { const struct bpf_prog *prog = filp->private_data; char prog_tag[sizeof(prog->tag) * 2 + 1] = { }; - struct bpf_prog_stats stats; + struct bpf_prog_kstats stats; bpf_prog_get_stats(prog, &stats); bin2hex(prog_tag, prog->tag, sizeof(prog->tag)); @@ -1851,7 +1874,8 @@ static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp) "prog_id:\t%u\n" "run_time_ns:\t%llu\n" "run_cnt:\t%llu\n" - "recursion_misses:\t%llu\n", + "recursion_misses:\t%llu\n" + "verified_insns:\t%u\n", prog->type, prog->jited, prog_tag, @@ -1859,7 +1883,8 @@ static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp) prog->aux->id, stats.nsecs, stats.cnt, - stats.misses); + stats.misses, + prog->aux->verified_insns); } #endif @@ -3578,7 +3603,7 @@ static int bpf_prog_get_info_by_fd(struct file *file, struct bpf_prog_info __user *uinfo = u64_to_user_ptr(attr->info.info); struct bpf_prog_info info; u32 info_len = attr->info.info_len; - struct bpf_prog_stats stats; + struct bpf_prog_kstats stats; char __user *uinsns; u32 ulen; int err; @@ -3628,6 +3653,8 @@ static int bpf_prog_get_info_by_fd(struct file *file, info.run_cnt = stats.cnt; info.recursion_misses = stats.misses; + info.verified_insns = prog->aux->verified_insns; + if (!bpf_capable()) { info.jited_prog_len = 0; info.xlated_prog_len = 0; @@ -3874,6 +3901,7 @@ static int bpf_map_get_info_by_fd(struct file *file, info.value_size = map->value_size; info.max_entries = map->max_entries; info.map_flags = map->map_flags; + info.map_extra = map->map_extra; memcpy(info.name, map->name, sizeof(map->name)); if (map->btf) { @@ -4756,6 +4784,31 @@ static const struct bpf_func_proto bpf_sys_close_proto = { .arg1_type = ARG_ANYTHING, }; +BPF_CALL_4(bpf_kallsyms_lookup_name, const char *, name, int, name_sz, int, flags, u64 *, res) +{ + if (flags) + return -EINVAL; + + if (name_sz <= 1 || name[name_sz - 1]) + return -EINVAL; + + if (!bpf_dump_raw_ok(current_cred())) + return -EPERM; + + *res = kallsyms_lookup_name(name); + return *res ? 0 : -ENOENT; +} + +const struct bpf_func_proto bpf_kallsyms_lookup_name_proto = { + .func = bpf_kallsyms_lookup_name, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_MEM, + .arg2_type = ARG_CONST_SIZE, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_PTR_TO_LONG, +}; + static const struct bpf_func_proto * syscall_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { @@ -4766,6 +4819,8 @@ syscall_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_btf_find_by_name_kind_proto; case BPF_FUNC_sys_close: return &bpf_sys_close_proto; + case BPF_FUNC_kallsyms_lookup_name: + return &bpf_kallsyms_lookup_name_proto; default: return tracing_prog_func_proto(func_id, prog); } diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index 39eaaff81953..e98de5e73ba5 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -545,7 +545,7 @@ static void notrace inc_misses_counter(struct bpf_prog *prog) stats = this_cpu_ptr(prog->stats); u64_stats_update_begin(&stats->syncp); - stats->misses++; + u64_stats_inc(&stats->misses); u64_stats_update_end(&stats->syncp); } @@ -586,11 +586,13 @@ static void notrace update_prog_stats(struct bpf_prog *prog, * Hence check that 'start' is valid. */ start > NO_START_TIME) { + unsigned long flags; + stats = this_cpu_ptr(prog->stats); - u64_stats_update_begin(&stats->syncp); - stats->cnt++; - stats->nsecs += sched_clock() - start; - u64_stats_update_end(&stats->syncp); + flags = u64_stats_update_begin_irqsave(&stats->syncp); + u64_stats_inc(&stats->cnt); + u64_stats_add(&stats->nsecs, sched_clock() - start); + u64_stats_update_end_irqrestore(&stats->syncp, flags); } } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 1433752db740..a4b48bd4e3ca 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1420,12 +1420,12 @@ static void __reg_combine_32_into_64(struct bpf_reg_state *reg) static bool __reg64_bound_s32(s64 a) { - return a > S32_MIN && a < S32_MAX; + return a >= S32_MIN && a <= S32_MAX; } static bool __reg64_bound_u32(u64 a) { - return a > U32_MIN && a < U32_MAX; + return a >= U32_MIN && a <= U32_MAX; } static void __reg_combine_64_into_32(struct bpf_reg_state *reg) @@ -1640,52 +1640,168 @@ static int add_subprog(struct bpf_verifier_env *env, int off) return env->subprog_cnt - 1; } +#define MAX_KFUNC_DESCS 256 +#define MAX_KFUNC_BTFS 256 + struct bpf_kfunc_desc { struct btf_func_model func_model; u32 func_id; s32 imm; + u16 offset; +}; + +struct bpf_kfunc_btf { + struct btf *btf; + struct module *module; + u16 offset; }; -#define MAX_KFUNC_DESCS 256 struct bpf_kfunc_desc_tab { struct bpf_kfunc_desc descs[MAX_KFUNC_DESCS]; u32 nr_descs; }; -static int kfunc_desc_cmp_by_id(const void *a, const void *b) +struct bpf_kfunc_btf_tab { + struct bpf_kfunc_btf descs[MAX_KFUNC_BTFS]; + u32 nr_descs; +}; + +static int kfunc_desc_cmp_by_id_off(const void *a, const void *b) { const struct bpf_kfunc_desc *d0 = a; const struct bpf_kfunc_desc *d1 = b; /* func_id is not greater than BTF_MAX_TYPE */ - return d0->func_id - d1->func_id; + return d0->func_id - d1->func_id ?: d0->offset - d1->offset; +} + +static int kfunc_btf_cmp_by_off(const void *a, const void *b) +{ + const struct bpf_kfunc_btf *d0 = a; + const struct bpf_kfunc_btf *d1 = b; + + return d0->offset - d1->offset; } static const struct bpf_kfunc_desc * -find_kfunc_desc(const struct bpf_prog *prog, u32 func_id) +find_kfunc_desc(const struct bpf_prog *prog, u32 func_id, u16 offset) { struct bpf_kfunc_desc desc = { .func_id = func_id, + .offset = offset, }; struct bpf_kfunc_desc_tab *tab; tab = prog->aux->kfunc_tab; return bsearch(&desc, tab->descs, tab->nr_descs, - sizeof(tab->descs[0]), kfunc_desc_cmp_by_id); + sizeof(tab->descs[0]), kfunc_desc_cmp_by_id_off); } -static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id) +static struct btf *__find_kfunc_desc_btf(struct bpf_verifier_env *env, + s16 offset, struct module **btf_modp) +{ + struct bpf_kfunc_btf kf_btf = { .offset = offset }; + struct bpf_kfunc_btf_tab *tab; + struct bpf_kfunc_btf *b; + struct module *mod; + struct btf *btf; + int btf_fd; + + tab = env->prog->aux->kfunc_btf_tab; + b = bsearch(&kf_btf, tab->descs, tab->nr_descs, + sizeof(tab->descs[0]), kfunc_btf_cmp_by_off); + if (!b) { + if (tab->nr_descs == MAX_KFUNC_BTFS) { + verbose(env, "too many different module BTFs\n"); + return ERR_PTR(-E2BIG); + } + + if (bpfptr_is_null(env->fd_array)) { + verbose(env, "kfunc offset > 0 without fd_array is invalid\n"); + return ERR_PTR(-EPROTO); + } + + if (copy_from_bpfptr_offset(&btf_fd, env->fd_array, + offset * sizeof(btf_fd), + sizeof(btf_fd))) + return ERR_PTR(-EFAULT); + + btf = btf_get_by_fd(btf_fd); + if (IS_ERR(btf)) { + verbose(env, "invalid module BTF fd specified\n"); + return btf; + } + + if (!btf_is_module(btf)) { + verbose(env, "BTF fd for kfunc is not a module BTF\n"); + btf_put(btf); + return ERR_PTR(-EINVAL); + } + + mod = btf_try_get_module(btf); + if (!mod) { + btf_put(btf); + return ERR_PTR(-ENXIO); + } + + b = &tab->descs[tab->nr_descs++]; + b->btf = btf; + b->module = mod; + b->offset = offset; + + sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]), + kfunc_btf_cmp_by_off, NULL); + } + if (btf_modp) + *btf_modp = b->module; + return b->btf; +} + +void bpf_free_kfunc_btf_tab(struct bpf_kfunc_btf_tab *tab) +{ + if (!tab) + return; + + while (tab->nr_descs--) { + module_put(tab->descs[tab->nr_descs].module); + btf_put(tab->descs[tab->nr_descs].btf); + } + kfree(tab); +} + +static struct btf *find_kfunc_desc_btf(struct bpf_verifier_env *env, + u32 func_id, s16 offset, + struct module **btf_modp) +{ + if (offset) { + if (offset < 0) { + /* In the future, this can be allowed to increase limit + * of fd index into fd_array, interpreted as u16. + */ + verbose(env, "negative offset disallowed for kernel module function call\n"); + return ERR_PTR(-EINVAL); + } + + return __find_kfunc_desc_btf(env, offset, btf_modp); + } + return btf_vmlinux ?: ERR_PTR(-ENOENT); +} + +static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset) { const struct btf_type *func, *func_proto; + struct bpf_kfunc_btf_tab *btf_tab; struct bpf_kfunc_desc_tab *tab; struct bpf_prog_aux *prog_aux; struct bpf_kfunc_desc *desc; const char *func_name; + struct btf *desc_btf; unsigned long addr; int err; prog_aux = env->prog->aux; tab = prog_aux->kfunc_tab; + btf_tab = prog_aux->kfunc_btf_tab; if (!tab) { if (!btf_vmlinux) { verbose(env, "calling kernel function is not supported without CONFIG_DEBUG_INFO_BTF\n"); @@ -1713,7 +1829,29 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id) prog_aux->kfunc_tab = tab; } - if (find_kfunc_desc(env->prog, func_id)) + /* func_id == 0 is always invalid, but instead of returning an error, be + * conservative and wait until the code elimination pass before returning + * error, so that invalid calls that get pruned out can be in BPF programs + * loaded from userspace. It is also required that offset be untouched + * for such calls. + */ + if (!func_id && !offset) + return 0; + + if (!btf_tab && offset) { + btf_tab = kzalloc(sizeof(*btf_tab), GFP_KERNEL); + if (!btf_tab) + return -ENOMEM; + prog_aux->kfunc_btf_tab = btf_tab; + } + + desc_btf = find_kfunc_desc_btf(env, func_id, offset, NULL); + if (IS_ERR(desc_btf)) { + verbose(env, "failed to find BTF for kernel function\n"); + return PTR_ERR(desc_btf); + } + + if (find_kfunc_desc(env->prog, func_id, offset)) return 0; if (tab->nr_descs == MAX_KFUNC_DESCS) { @@ -1721,20 +1859,20 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id) return -E2BIG; } - func = btf_type_by_id(btf_vmlinux, func_id); + func = btf_type_by_id(desc_btf, func_id); if (!func || !btf_type_is_func(func)) { verbose(env, "kernel btf_id %u is not a function\n", func_id); return -EINVAL; } - func_proto = btf_type_by_id(btf_vmlinux, func->type); + func_proto = btf_type_by_id(desc_btf, func->type); if (!func_proto || !btf_type_is_func_proto(func_proto)) { verbose(env, "kernel function btf_id %u does not have a valid func_proto\n", func_id); return -EINVAL; } - func_name = btf_name_by_offset(btf_vmlinux, func->name_off); + func_name = btf_name_by_offset(desc_btf, func->name_off); addr = kallsyms_lookup_name(func_name); if (!addr) { verbose(env, "cannot find address for kernel function %s\n", @@ -1745,12 +1883,13 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id) desc = &tab->descs[tab->nr_descs++]; desc->func_id = func_id; desc->imm = BPF_CALL_IMM(addr); - err = btf_distill_func_proto(&env->log, btf_vmlinux, + desc->offset = offset; + err = btf_distill_func_proto(&env->log, desc_btf, func_proto, func_name, &desc->func_model); if (!err) sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]), - kfunc_desc_cmp_by_id, NULL); + kfunc_desc_cmp_by_id_off, NULL); return err; } @@ -1829,7 +1968,7 @@ static int add_subprog_and_kfunc(struct bpf_verifier_env *env) } else if (bpf_pseudo_call(insn)) { ret = add_subprog(env, i + insn->imm + 1); } else { - ret = add_kfunc_call(env, insn->imm); + ret = add_kfunc_call(env, insn->imm, insn->off); } if (ret < 0) @@ -2166,12 +2305,17 @@ static int get_prev_insn_idx(struct bpf_verifier_state *st, int i, static const char *disasm_kfunc_name(void *data, const struct bpf_insn *insn) { const struct btf_type *func; + struct btf *desc_btf; if (insn->src_reg != BPF_PSEUDO_KFUNC_CALL) return NULL; - func = btf_type_by_id(btf_vmlinux, insn->imm); - return btf_name_by_offset(btf_vmlinux, func->name_off); + desc_btf = find_kfunc_desc_btf(data, insn->imm, insn->off, NULL); + if (IS_ERR(desc_btf)) + return "<error>"; + + func = btf_type_by_id(desc_btf, insn->imm); + return btf_name_by_offset(desc_btf, func->name_off); } /* For given verifier state backtrack_insn() is called from the last insn to @@ -4858,7 +5002,10 @@ static int resolve_map_arg_type(struct bpf_verifier_env *env, return -EINVAL; } break; - + case BPF_MAP_TYPE_BLOOM_FILTER: + if (meta->func_id == BPF_FUNC_map_peek_elem) + *arg_type = ARG_PTR_TO_MAP_VALUE; + break; default: break; } @@ -5433,6 +5580,11 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, func_id != BPF_FUNC_task_storage_delete) goto error; break; + case BPF_MAP_TYPE_BLOOM_FILTER: + if (func_id != BPF_FUNC_map_peek_elem && + func_id != BPF_FUNC_map_push_elem) + goto error; + break; default: break; } @@ -5500,13 +5652,18 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, map->map_type != BPF_MAP_TYPE_SOCKHASH) goto error; break; - case BPF_FUNC_map_peek_elem: case BPF_FUNC_map_pop_elem: - case BPF_FUNC_map_push_elem: if (map->map_type != BPF_MAP_TYPE_QUEUE && map->map_type != BPF_MAP_TYPE_STACK) goto error; break; + case BPF_FUNC_map_peek_elem: + case BPF_FUNC_map_push_elem: + if (map->map_type != BPF_MAP_TYPE_QUEUE && + map->map_type != BPF_MAP_TYPE_STACK && + map->map_type != BPF_MAP_TYPE_BLOOM_FILTER) + goto error; + break; case BPF_FUNC_sk_storage_get: case BPF_FUNC_sk_storage_delete: if (map->map_type != BPF_MAP_TYPE_SK_STORAGE) @@ -6530,23 +6687,33 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn) struct bpf_reg_state *regs = cur_regs(env); const char *func_name, *ptr_type_name; u32 i, nargs, func_id, ptr_type_id; + struct module *btf_mod = NULL; const struct btf_param *args; + struct btf *desc_btf; int err; + /* skip for now, but return error when we find this in fixup_kfunc_call */ + if (!insn->imm) + return 0; + + desc_btf = find_kfunc_desc_btf(env, insn->imm, insn->off, &btf_mod); + if (IS_ERR(desc_btf)) + return PTR_ERR(desc_btf); + func_id = insn->imm; - func = btf_type_by_id(btf_vmlinux, func_id); - func_name = btf_name_by_offset(btf_vmlinux, func->name_off); - func_proto = btf_type_by_id(btf_vmlinux, func->type); + func = btf_type_by_id(desc_btf, func_id); + func_name = btf_name_by_offset(desc_btf, func->name_off); + func_proto = btf_type_by_id(desc_btf, func->type); if (!env->ops->check_kfunc_call || - !env->ops->check_kfunc_call(func_id)) { + !env->ops->check_kfunc_call(func_id, btf_mod)) { verbose(env, "calling kernel function %s is not allowed\n", func_name); return -EACCES; } /* Check the arguments */ - err = btf_check_kfunc_arg_match(env, btf_vmlinux, func_id, regs); + err = btf_check_kfunc_arg_match(env, desc_btf, func_id, regs); if (err) return err; @@ -6554,15 +6721,15 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn) mark_reg_not_init(env, regs, caller_saved[i]); /* Check return type */ - t = btf_type_skip_modifiers(btf_vmlinux, func_proto->type, NULL); + t = btf_type_skip_modifiers(desc_btf, func_proto->type, NULL); if (btf_type_is_scalar(t)) { mark_reg_unknown(env, regs, BPF_REG_0); mark_btf_func_reg_size(env, BPF_REG_0, t->size); } else if (btf_type_is_ptr(t)) { - ptr_type = btf_type_skip_modifiers(btf_vmlinux, t->type, + ptr_type = btf_type_skip_modifiers(desc_btf, t->type, &ptr_type_id); if (!btf_type_is_struct(ptr_type)) { - ptr_type_name = btf_name_by_offset(btf_vmlinux, + ptr_type_name = btf_name_by_offset(desc_btf, ptr_type->name_off); verbose(env, "kernel function %s returns pointer type %s %s is not supported\n", func_name, btf_type_str(ptr_type), @@ -6570,7 +6737,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn) return -EINVAL; } mark_reg_known_zero(env, regs, BPF_REG_0); - regs[BPF_REG_0].btf = btf_vmlinux; + regs[BPF_REG_0].btf = desc_btf; regs[BPF_REG_0].type = PTR_TO_BTF_ID; regs[BPF_REG_0].btf_id = ptr_type_id; mark_btf_func_reg_size(env, BPF_REG_0, sizeof(void *)); @@ -6581,7 +6748,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn) for (i = 0; i < nargs; i++) { u32 regno = i + 1; - t = btf_type_skip_modifiers(btf_vmlinux, args[i].type, NULL); + t = btf_type_skip_modifiers(desc_btf, args[i].type, NULL); if (btf_type_is_ptr(t)) mark_btf_func_reg_size(env, regno, sizeof(void *)); else @@ -11121,7 +11288,8 @@ static int do_check(struct bpf_verifier_env *env) env->jmps_processed++; if (opcode == BPF_CALL) { if (BPF_SRC(insn->code) != BPF_K || - insn->off != 0 || + (insn->src_reg != BPF_PSEUDO_KFUNC_CALL + && insn->off != 0) || (insn->src_reg != BPF_REG_0 && insn->src_reg != BPF_PSEUDO_CALL && insn->src_reg != BPF_PSEUDO_KFUNC_CALL) || @@ -12477,6 +12645,7 @@ static int jit_subprogs(struct bpf_verifier_env *env) func[i]->aux->stack_depth = env->subprog_info[i].stack_depth; func[i]->jit_requested = 1; func[i]->aux->kfunc_tab = prog->aux->kfunc_tab; + func[i]->aux->kfunc_btf_tab = prog->aux->kfunc_btf_tab; func[i]->aux->linfo = prog->aux->linfo; func[i]->aux->nr_linfo = prog->aux->nr_linfo; func[i]->aux->jited_linfo = prog->aux->jited_linfo; @@ -12662,10 +12831,15 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, { const struct bpf_kfunc_desc *desc; + if (!insn->imm) { + verbose(env, "invalid kernel function call not eliminated in verifier pass\n"); + return -EINVAL; + } + /* insn->imm has the btf func_id. Replace it with * an address (relative to __bpf_base_call). */ - desc = find_kfunc_desc(env->prog, insn->imm); + desc = find_kfunc_desc(env->prog, insn->imm, insn->off); if (!desc) { verbose(env, "verifier internal error: kernel function descriptor not found for func_id %u\n", insn->imm); @@ -12946,7 +13120,8 @@ static int do_misc_fixups(struct bpf_verifier_env *env) insn->imm == BPF_FUNC_map_push_elem || insn->imm == BPF_FUNC_map_pop_elem || insn->imm == BPF_FUNC_map_peek_elem || - insn->imm == BPF_FUNC_redirect_map)) { + insn->imm == BPF_FUNC_redirect_map || + insn->imm == BPF_FUNC_for_each_map_elem)) { aux = &env->insn_aux_data[i + delta]; if (bpf_map_ptr_poisoned(aux)) goto patch_call_imm; @@ -12990,6 +13165,11 @@ static int do_misc_fixups(struct bpf_verifier_env *env) (int (*)(struct bpf_map *map, void *value))NULL)); BUILD_BUG_ON(!__same_type(ops->map_redirect, (int (*)(struct bpf_map *map, u32 ifindex, u64 flags))NULL)); + BUILD_BUG_ON(!__same_type(ops->map_for_each_callback, + (int (*)(struct bpf_map *map, + bpf_callback_t callback_fn, + void *callback_ctx, + u64 flags))NULL)); patch_map_ops_generic: switch (insn->imm) { @@ -13014,6 +13194,9 @@ patch_map_ops_generic: case BPF_FUNC_redirect_map: insn->imm = BPF_CALL_IMM(ops->map_redirect); continue; + case BPF_FUNC_for_each_map_elem: + insn->imm = BPF_CALL_IMM(ops->map_for_each_callback); + continue; } goto patch_call_imm; @@ -13863,6 +14046,7 @@ skip_full_check: env->verification_time = ktime_get_ns() - start_time; print_verification_stats(env); + env->prog->aux->verified_insns = env->insn_processed; if (log->level && bpf_verifier_log_full(log)) ret = -ENOSPC; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 6b3153841a33..7396488793ff 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1608,6 +1608,8 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_skc_to_tcp_request_sock_proto; case BPF_FUNC_skc_to_udp6_sock: return &bpf_skc_to_udp6_sock_proto; + case BPF_FUNC_skc_to_unix_sock: + return &bpf_skc_to_unix_sock_proto; case BPF_FUNC_sk_storage_get: return &bpf_sk_storage_get_tracing_proto; case BPF_FUNC_sk_storage_delete: @@ -1644,13 +1646,7 @@ static bool raw_tp_prog_is_valid_access(int off, int size, const struct bpf_prog *prog, struct bpf_insn_access_aux *info) { - if (off < 0 || off >= sizeof(__u64) * MAX_BPF_FUNC_ARGS) - return false; - if (type != BPF_READ) - return false; - if (off % size != 0) - return false; - return true; + return bpf_tracing_ctx_access(off, size, type); } static bool tracing_prog_is_valid_access(int off, int size, @@ -1658,13 +1654,7 @@ static bool tracing_prog_is_valid_access(int off, int size, const struct bpf_prog *prog, struct bpf_insn_access_aux *info) { - if (off < 0 || off >= sizeof(__u64) * MAX_BPF_FUNC_ARGS) - return false; - if (type != BPF_READ) - return false; - if (off % size != 0) - return false; - return btf_ctx_access(off, size, type, prog, info); + return bpf_tracing_btf_ctx_access(off, size, type, prog, info); } int __weak bpf_prog_test_run_tracing(struct bpf_prog *prog, diff --git a/lib/test_bpf.c b/lib/test_bpf.c index b9fc330fc83b..adae39567264 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -2134,7 +2134,7 @@ static int bpf_fill_atomic32_cmpxchg_reg_pairs(struct bpf_test *self) * of the immediate value. This is often the case if the native instruction * immediate field width is narrower than 32 bits. */ -static int bpf_fill_ld_imm64(struct bpf_test *self) +static int bpf_fill_ld_imm64_magn(struct bpf_test *self) { int block = 64; /* Increase for more tests per MSB position */ int len = 3 + 8 * 63 * block * 2; @@ -2181,6 +2181,88 @@ static int bpf_fill_ld_imm64(struct bpf_test *self) } /* + * Test the two-instruction 64-bit immediate load operation for different + * combinations of bytes. Each byte in the 64-bit word is constructed as + * (base & mask) | (rand() & ~mask), where rand() is a deterministic LCG. + * All patterns (base1, mask1) and (base2, mask2) bytes are tested. + */ +static int __bpf_fill_ld_imm64_bytes(struct bpf_test *self, + u8 base1, u8 mask1, + u8 base2, u8 mask2) +{ + struct bpf_insn *insn; + int len = 3 + 8 * BIT(8); + int pattern, index; + u32 rand = 1; + int i = 0; + + insn = kmalloc_array(len, sizeof(*insn), GFP_KERNEL); + if (!insn) + return -ENOMEM; + + insn[i++] = BPF_ALU64_IMM(BPF_MOV, R0, 0); + + for (pattern = 0; pattern < BIT(8); pattern++) { + u64 imm = 0; + + for (index = 0; index < 8; index++) { + int byte; + + if (pattern & BIT(index)) + byte = (base1 & mask1) | (rand & ~mask1); + else + byte = (base2 & mask2) | (rand & ~mask2); + imm = (imm << 8) | byte; + } + + /* Update our LCG */ + rand = rand * 1664525 + 1013904223; + + /* Perform operation */ + i += __bpf_ld_imm64(&insn[i], R1, imm); + + /* Load reference */ + insn[i++] = BPF_ALU32_IMM(BPF_MOV, R2, imm); + insn[i++] = BPF_ALU32_IMM(BPF_MOV, R3, (u32)(imm >> 32)); + insn[i++] = BPF_ALU64_IMM(BPF_LSH, R3, 32); + insn[i++] = BPF_ALU64_REG(BPF_OR, R2, R3); + + /* Check result */ + insn[i++] = BPF_JMP_REG(BPF_JEQ, R1, R2, 1); + insn[i++] = BPF_EXIT_INSN(); + } + + insn[i++] = BPF_ALU64_IMM(BPF_MOV, R0, 1); + insn[i++] = BPF_EXIT_INSN(); + + self->u.ptr.insns = insn; + self->u.ptr.len = len; + BUG_ON(i != len); + + return 0; +} + +static int bpf_fill_ld_imm64_checker(struct bpf_test *self) +{ + return __bpf_fill_ld_imm64_bytes(self, 0, 0xff, 0xff, 0xff); +} + +static int bpf_fill_ld_imm64_pos_neg(struct bpf_test *self) +{ + return __bpf_fill_ld_imm64_bytes(self, 1, 0x81, 0x80, 0x80); +} + +static int bpf_fill_ld_imm64_pos_zero(struct bpf_test *self) +{ + return __bpf_fill_ld_imm64_bytes(self, 1, 0x81, 0, 0xff); +} + +static int bpf_fill_ld_imm64_neg_zero(struct bpf_test *self) +{ + return __bpf_fill_ld_imm64_bytes(self, 0x80, 0x80, 0, 0xff); +} + +/* * Exhaustive tests of JMP operations for all combinations of power-of-two * magnitudes of the operands, both for positive and negative values. The * test is designed to verify e.g. the JMP and JMP32 operations for JITs that @@ -12401,14 +12483,46 @@ static struct bpf_test tests[] = { .fill_helper = bpf_fill_alu32_mod_reg, .nr_testruns = NR_PATTERN_RUNS, }, - /* LD_IMM64 immediate magnitudes */ + /* LD_IMM64 immediate magnitudes and byte patterns */ { "LD_IMM64: all immediate value magnitudes", { }, INTERNAL | FLAG_NO_DATA, { }, { { 0, 1 } }, - .fill_helper = bpf_fill_ld_imm64, + .fill_helper = bpf_fill_ld_imm64_magn, + }, + { + "LD_IMM64: checker byte patterns", + { }, + INTERNAL | FLAG_NO_DATA, + { }, + { { 0, 1 } }, + .fill_helper = bpf_fill_ld_imm64_checker, + }, + { + "LD_IMM64: random positive and zero byte patterns", + { }, + INTERNAL | FLAG_NO_DATA, + { }, + { { 0, 1 } }, + .fill_helper = bpf_fill_ld_imm64_pos_zero, + }, + { + "LD_IMM64: random negative and zero byte patterns", + { }, + INTERNAL | FLAG_NO_DATA, + { }, + { { 0, 1 } }, + .fill_helper = bpf_fill_ld_imm64_neg_zero, + }, + { + "LD_IMM64: random positive and negative byte patterns", + { }, + INTERNAL | FLAG_NO_DATA, + { }, + { { 0, 1 } }, + .fill_helper = bpf_fill_ld_imm64_pos_neg, }, /* 64-bit ATOMIC register combinations */ { @@ -14202,72 +14316,9 @@ module_param_string(test_name, test_name, sizeof(test_name), 0); static int test_id = -1; module_param(test_id, int, 0); -static int test_range[2] = { 0, ARRAY_SIZE(tests) - 1 }; +static int test_range[2] = { 0, INT_MAX }; module_param_array(test_range, int, NULL, 0); -static __init int find_test_index(const char *test_name) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(tests); i++) { - if (!strcmp(tests[i].descr, test_name)) - return i; - } - return -1; -} - -static __init int prepare_bpf_tests(void) -{ - if (test_id >= 0) { - /* - * if a test_id was specified, use test_range to - * cover only that test. - */ - if (test_id >= ARRAY_SIZE(tests)) { - pr_err("test_bpf: invalid test_id specified.\n"); - return -EINVAL; - } - - test_range[0] = test_id; - test_range[1] = test_id; - } else if (*test_name) { - /* - * if a test_name was specified, find it and setup - * test_range to cover only that test. - */ - int idx = find_test_index(test_name); - - if (idx < 0) { - pr_err("test_bpf: no test named '%s' found.\n", - test_name); - return -EINVAL; - } - test_range[0] = idx; - test_range[1] = idx; - } else { - /* - * check that the supplied test_range is valid. - */ - if (test_range[0] >= ARRAY_SIZE(tests) || - test_range[1] >= ARRAY_SIZE(tests) || - test_range[0] < 0 || test_range[1] < 0) { - pr_err("test_bpf: test_range is out of bound.\n"); - return -EINVAL; - } - - if (test_range[1] < test_range[0]) { - pr_err("test_bpf: test_range is ending before it starts.\n"); - return -EINVAL; - } - } - - return 0; -} - -static __init void destroy_bpf_tests(void) -{ -} - static bool exclude_test(int test_id) { return test_id < test_range[0] || test_id > test_range[1]; @@ -14439,6 +14490,10 @@ static __init int test_skb_segment(void) for (i = 0; i < ARRAY_SIZE(skb_segment_tests); i++) { const struct skb_segment_test *test = &skb_segment_tests[i]; + cond_resched(); + if (exclude_test(i)) + continue; + pr_info("#%d %s ", i, test->descr); if (test_skb_segment_single(test)) { @@ -14820,6 +14875,8 @@ static __init int test_tail_calls(struct bpf_array *progs) int ret; cond_resched(); + if (exclude_test(i)) + continue; pr_info("#%d %s ", i, test->descr); if (!fp) { @@ -14852,29 +14909,144 @@ static __init int test_tail_calls(struct bpf_array *progs) return err_cnt ? -EINVAL : 0; } +static char test_suite[32]; +module_param_string(test_suite, test_suite, sizeof(test_suite), 0); + +static __init int find_test_index(const char *test_name) +{ + int i; + + if (!strcmp(test_suite, "test_bpf")) { + for (i = 0; i < ARRAY_SIZE(tests); i++) { + if (!strcmp(tests[i].descr, test_name)) + return i; + } + } + + if (!strcmp(test_suite, "test_tail_calls")) { + for (i = 0; i < ARRAY_SIZE(tail_call_tests); i++) { + if (!strcmp(tail_call_tests[i].descr, test_name)) + return i; + } + } + + if (!strcmp(test_suite, "test_skb_segment")) { + for (i = 0; i < ARRAY_SIZE(skb_segment_tests); i++) { + if (!strcmp(skb_segment_tests[i].descr, test_name)) + return i; + } + } + + return -1; +} + +static __init int prepare_test_range(void) +{ + int valid_range; + + if (!strcmp(test_suite, "test_bpf")) + valid_range = ARRAY_SIZE(tests); + else if (!strcmp(test_suite, "test_tail_calls")) + valid_range = ARRAY_SIZE(tail_call_tests); + else if (!strcmp(test_suite, "test_skb_segment")) + valid_range = ARRAY_SIZE(skb_segment_tests); + else + return 0; + + if (test_id >= 0) { + /* + * if a test_id was specified, use test_range to + * cover only that test. + */ + if (test_id >= valid_range) { + pr_err("test_bpf: invalid test_id specified for '%s' suite.\n", + test_suite); + return -EINVAL; + } + + test_range[0] = test_id; + test_range[1] = test_id; + } else if (*test_name) { + /* + * if a test_name was specified, find it and setup + * test_range to cover only that test. + */ + int idx = find_test_index(test_name); + + if (idx < 0) { + pr_err("test_bpf: no test named '%s' found for '%s' suite.\n", + test_name, test_suite); + return -EINVAL; + } + test_range[0] = idx; + test_range[1] = idx; + } else if (test_range[0] != 0 || test_range[1] != INT_MAX) { + /* + * check that the supplied test_range is valid. + */ + if (test_range[0] < 0 || test_range[1] >= valid_range) { + pr_err("test_bpf: test_range is out of bound for '%s' suite.\n", + test_suite); + return -EINVAL; + } + + if (test_range[1] < test_range[0]) { + pr_err("test_bpf: test_range is ending before it starts.\n"); + return -EINVAL; + } + } + + return 0; +} + static int __init test_bpf_init(void) { struct bpf_array *progs = NULL; int ret; - ret = prepare_bpf_tests(); + if (strlen(test_suite) && + strcmp(test_suite, "test_bpf") && + strcmp(test_suite, "test_tail_calls") && + strcmp(test_suite, "test_skb_segment")) { + pr_err("test_bpf: invalid test_suite '%s' specified.\n", test_suite); + return -EINVAL; + } + + /* + * if test_suite is not specified, but test_id, test_name or test_range + * is specified, set 'test_bpf' as the default test suite. + */ + if (!strlen(test_suite) && + (test_id != -1 || strlen(test_name) || + (test_range[0] != 0 || test_range[1] != INT_MAX))) { + pr_info("test_bpf: set 'test_bpf' as the default test_suite.\n"); + strscpy(test_suite, "test_bpf", sizeof(test_suite)); + } + + ret = prepare_test_range(); if (ret < 0) return ret; - ret = test_bpf(); - destroy_bpf_tests(); - if (ret) - return ret; + if (!strlen(test_suite) || !strcmp(test_suite, "test_bpf")) { + ret = test_bpf(); + if (ret) + return ret; + } - ret = prepare_tail_call_tests(&progs); - if (ret) - return ret; - ret = test_tail_calls(progs); - destroy_tail_call_tests(progs); - if (ret) - return ret; + if (!strlen(test_suite) || !strcmp(test_suite, "test_tail_calls")) { + ret = prepare_tail_call_tests(&progs); + if (ret) + return ret; + ret = test_tail_calls(progs); + destroy_tail_call_tests(progs); + if (ret) + return ret; + } + + if (!strlen(test_suite) || !strcmp(test_suite, "test_skb_segment")) + return test_skb_segment(); - return test_skb_segment(); + return 0; } static void __exit test_bpf_exit(void) diff --git a/net/bpf/Makefile b/net/bpf/Makefile index 1c0a98d8c28f..1ebe270bde23 100644 --- a/net/bpf/Makefile +++ b/net/bpf/Makefile @@ -1,2 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_BPF_SYSCALL) := test_run.o +ifeq ($(CONFIG_BPF_JIT),y) +obj-$(CONFIG_BPF_SYSCALL) += bpf_dummy_struct_ops.o +endif diff --git a/net/bpf/bpf_dummy_struct_ops.c b/net/bpf/bpf_dummy_struct_ops.c new file mode 100644 index 000000000000..fbc896323bec --- /dev/null +++ b/net/bpf/bpf_dummy_struct_ops.c @@ -0,0 +1,200 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021. Huawei Technologies Co., Ltd + */ +#include <linux/kernel.h> +#include <linux/bpf_verifier.h> +#include <linux/bpf.h> +#include <linux/btf.h> + +extern struct bpf_struct_ops bpf_bpf_dummy_ops; + +/* A common type for test_N with return value in bpf_dummy_ops */ +typedef int (*dummy_ops_test_ret_fn)(struct bpf_dummy_ops_state *state, ...); + +struct bpf_dummy_ops_test_args { + u64 args[MAX_BPF_FUNC_ARGS]; + struct bpf_dummy_ops_state state; +}; + +static struct bpf_dummy_ops_test_args * +dummy_ops_init_args(const union bpf_attr *kattr, unsigned int nr) +{ + __u32 size_in; + struct bpf_dummy_ops_test_args *args; + void __user *ctx_in; + void __user *u_state; + + size_in = kattr->test.ctx_size_in; + if (size_in != sizeof(u64) * nr) + return ERR_PTR(-EINVAL); + + args = kzalloc(sizeof(*args), GFP_KERNEL); + if (!args) + return ERR_PTR(-ENOMEM); + + ctx_in = u64_to_user_ptr(kattr->test.ctx_in); + if (copy_from_user(args->args, ctx_in, size_in)) + goto out; + + /* args[0] is 0 means state argument of test_N will be NULL */ + u_state = u64_to_user_ptr(args->args[0]); + if (u_state && copy_from_user(&args->state, u_state, + sizeof(args->state))) + goto out; + + return args; +out: + kfree(args); + return ERR_PTR(-EFAULT); +} + +static int dummy_ops_copy_args(struct bpf_dummy_ops_test_args *args) +{ + void __user *u_state; + + u_state = u64_to_user_ptr(args->args[0]); + if (u_state && copy_to_user(u_state, &args->state, sizeof(args->state))) + return -EFAULT; + + return 0; +} + +static int dummy_ops_call_op(void *image, struct bpf_dummy_ops_test_args *args) +{ + dummy_ops_test_ret_fn test = (void *)image; + struct bpf_dummy_ops_state *state = NULL; + + /* state needs to be NULL if args[0] is 0 */ + if (args->args[0]) + state = &args->state; + return test(state, args->args[1], args->args[2], + args->args[3], args->args[4]); +} + +int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr, + union bpf_attr __user *uattr) +{ + const struct bpf_struct_ops *st_ops = &bpf_bpf_dummy_ops; + const struct btf_type *func_proto; + struct bpf_dummy_ops_test_args *args; + struct bpf_tramp_progs *tprogs; + void *image = NULL; + unsigned int op_idx; + int prog_ret; + int err; + + if (prog->aux->attach_btf_id != st_ops->type_id) + return -EOPNOTSUPP; + + func_proto = prog->aux->attach_func_proto; + args = dummy_ops_init_args(kattr, btf_type_vlen(func_proto)); + if (IS_ERR(args)) + return PTR_ERR(args); + + tprogs = kcalloc(BPF_TRAMP_MAX, sizeof(*tprogs), GFP_KERNEL); + if (!tprogs) { + err = -ENOMEM; + goto out; + } + + image = bpf_jit_alloc_exec(PAGE_SIZE); + if (!image) { + err = -ENOMEM; + goto out; + } + set_vm_flush_reset_perms(image); + + op_idx = prog->expected_attach_type; + err = bpf_struct_ops_prepare_trampoline(tprogs, prog, + &st_ops->func_models[op_idx], + image, image + PAGE_SIZE); + if (err < 0) + goto out; + + set_memory_ro((long)image, 1); + set_memory_x((long)image, 1); + prog_ret = dummy_ops_call_op(image, args); + + err = dummy_ops_copy_args(args); + if (err) + goto out; + if (put_user(prog_ret, &uattr->test.retval)) + err = -EFAULT; +out: + kfree(args); + bpf_jit_free_exec(image); + kfree(tprogs); + return err; +} + +static int bpf_dummy_init(struct btf *btf) +{ + return 0; +} + +static bool bpf_dummy_ops_is_valid_access(int off, int size, + enum bpf_access_type type, + const struct bpf_prog *prog, + struct bpf_insn_access_aux *info) +{ + return bpf_tracing_btf_ctx_access(off, size, type, prog, info); +} + +static int bpf_dummy_ops_btf_struct_access(struct bpf_verifier_log *log, + const struct btf *btf, + const struct btf_type *t, int off, + int size, enum bpf_access_type atype, + u32 *next_btf_id) +{ + const struct btf_type *state; + s32 type_id; + int err; + + type_id = btf_find_by_name_kind(btf, "bpf_dummy_ops_state", + BTF_KIND_STRUCT); + if (type_id < 0) + return -EINVAL; + + state = btf_type_by_id(btf, type_id); + if (t != state) { + bpf_log(log, "only access to bpf_dummy_ops_state is supported\n"); + return -EACCES; + } + + err = btf_struct_access(log, btf, t, off, size, atype, next_btf_id); + if (err < 0) + return err; + + return atype == BPF_READ ? err : NOT_INIT; +} + +static const struct bpf_verifier_ops bpf_dummy_verifier_ops = { + .is_valid_access = bpf_dummy_ops_is_valid_access, + .btf_struct_access = bpf_dummy_ops_btf_struct_access, +}; + +static int bpf_dummy_init_member(const struct btf_type *t, + const struct btf_member *member, + void *kdata, const void *udata) +{ + return -EOPNOTSUPP; +} + +static int bpf_dummy_reg(void *kdata) +{ + return -EOPNOTSUPP; +} + +static void bpf_dummy_unreg(void *kdata) +{ +} + +struct bpf_struct_ops bpf_bpf_dummy_ops = { + .verifier_ops = &bpf_dummy_verifier_ops, + .init = bpf_dummy_init, + .init_member = bpf_dummy_init_member, + .reg = bpf_dummy_reg, + .unreg = bpf_dummy_unreg, + .name = "bpf_dummy_ops", +}; diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 072f0c16c779..46dd95755967 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -2,6 +2,7 @@ /* Copyright (c) 2017 Facebook */ #include <linux/bpf.h> +#include <linux/btf.h> #include <linux/btf_ids.h> #include <linux/slab.h> #include <linux/vmalloc.h> @@ -241,9 +242,11 @@ BTF_ID(func, bpf_kfunc_call_test2) BTF_ID(func, bpf_kfunc_call_test3) BTF_SET_END(test_sk_kfunc_ids) -bool bpf_prog_test_check_kfunc_call(u32 kfunc_id) +bool bpf_prog_test_check_kfunc_call(u32 kfunc_id, struct module *owner) { - return btf_id_set_contains(&test_sk_kfunc_ids, kfunc_id); + if (btf_id_set_contains(&test_sk_kfunc_ids, kfunc_id)) + return true; + return bpf_check_mod_kfunc_call(&prog_test_kfunc_list, kfunc_id, owner); } static void *bpf_test_init(const union bpf_attr *kattr, u32 size, @@ -355,13 +358,9 @@ int bpf_prog_test_run_raw_tp(struct bpf_prog *prog, return -EINVAL; if (ctx_size_in) { - info.ctx = kzalloc(ctx_size_in, GFP_USER); - if (!info.ctx) - return -ENOMEM; - if (copy_from_user(info.ctx, ctx_in, ctx_size_in)) { - err = -EFAULT; - goto out; - } + info.ctx = memdup_user(ctx_in, ctx_size_in); + if (IS_ERR(info.ctx)) + return PTR_ERR(info.ctx); } else { info.ctx = NULL; } @@ -389,7 +388,6 @@ int bpf_prog_test_run_raw_tp(struct bpf_prog *prog, copy_to_user(&uattr->test.retval, &info.retval, sizeof(u32))) err = -EFAULT; -out: kfree(info.ctx); return err; } @@ -1049,13 +1047,9 @@ int bpf_prog_test_run_syscall(struct bpf_prog *prog, return -EINVAL; if (ctx_size_in) { - ctx = kzalloc(ctx_size_in, GFP_USER); - if (!ctx) - return -ENOMEM; - if (copy_from_user(ctx, ctx_in, ctx_size_in)) { - err = -EFAULT; - goto out; - } + ctx = memdup_user(ctx_in, ctx_size_in); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); } rcu_read_lock_trace(); diff --git a/net/core/filter.c b/net/core/filter.c index 4bace37a6a44..8e8d3b49c297 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -10723,6 +10723,26 @@ const struct bpf_func_proto bpf_skc_to_udp6_sock_proto = { .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_UDP6], }; +BPF_CALL_1(bpf_skc_to_unix_sock, struct sock *, sk) +{ + /* unix_sock type is not generated in dwarf and hence btf, + * trigger an explicit type generation here. + */ + BTF_TYPE_EMIT(struct unix_sock); + if (sk && sk_fullsock(sk) && sk->sk_family == AF_UNIX) + return (unsigned long)sk; + + return (unsigned long)NULL; +} + +const struct bpf_func_proto bpf_skc_to_unix_sock_proto = { + .func = bpf_skc_to_unix_sock, + .gpl_only = false, + .ret_type = RET_PTR_TO_BTF_ID_OR_NULL, + .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, + .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_UNIX], +}; + BPF_CALL_1(bpf_sock_from_file, struct file *, file) { return (unsigned long)sock_from_file(file); @@ -10762,6 +10782,9 @@ bpf_sk_base_func_proto(enum bpf_func_id func_id) case BPF_FUNC_skc_to_udp6_sock: func = &bpf_skc_to_udp6_sock_proto; break; + case BPF_FUNC_skc_to_unix_sock: + func = &bpf_skc_to_unix_sock_proto; + break; default: return bpf_base_func_proto(func_id); } diff --git a/net/core/skmsg.c b/net/core/skmsg.c index a86ef7e844f8..1ae52ac943f6 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -508,6 +508,7 @@ static struct sk_msg *sk_psock_create_ingress_msg(struct sock *sk, } static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb, + u32 off, u32 len, struct sk_psock *psock, struct sock *sk, struct sk_msg *msg) @@ -521,11 +522,11 @@ static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb, */ if (skb_linearize(skb)) return -EAGAIN; - num_sge = skb_to_sgvec(skb, msg->sg.data, 0, skb->len); + num_sge = skb_to_sgvec(skb, msg->sg.data, off, len); if (unlikely(num_sge < 0)) return num_sge; - copied = skb->len; + copied = len; msg->sg.start = 0; msg->sg.size = copied; msg->sg.end = num_sge; @@ -536,9 +537,11 @@ static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb, return copied; } -static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb); +static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb, + u32 off, u32 len); -static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb) +static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb, + u32 off, u32 len) { struct sock *sk = psock->sk; struct sk_msg *msg; @@ -549,7 +552,7 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb) * correctly. */ if (unlikely(skb->sk == sk)) - return sk_psock_skb_ingress_self(psock, skb); + return sk_psock_skb_ingress_self(psock, skb, off, len); msg = sk_psock_create_ingress_msg(sk, skb); if (!msg) return -EAGAIN; @@ -561,7 +564,7 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb) * into user buffers. */ skb_set_owner_r(skb, sk); - err = sk_psock_skb_ingress_enqueue(skb, psock, sk, msg); + err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg); if (err < 0) kfree(msg); return err; @@ -571,7 +574,8 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb) * skb. In this case we do not need to check memory limits or skb_set_owner_r * because the skb is already accounted for here. */ -static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb) +static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb, + u32 off, u32 len) { struct sk_msg *msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_ATOMIC); struct sock *sk = psock->sk; @@ -581,7 +585,7 @@ static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb return -EAGAIN; sk_msg_init(msg); skb_set_owner_r(skb, sk); - err = sk_psock_skb_ingress_enqueue(skb, psock, sk, msg); + err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg); if (err < 0) kfree(msg); return err; @@ -595,7 +599,7 @@ static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb, return -EAGAIN; return skb_send_sock(psock->sk, skb, off, len); } - return sk_psock_skb_ingress(psock, skb); + return sk_psock_skb_ingress(psock, skb, off, len); } static void sk_psock_skb_state(struct sk_psock *psock, @@ -638,6 +642,12 @@ static void sk_psock_backlog(struct work_struct *work) while ((skb = skb_dequeue(&psock->ingress_skb))) { len = skb->len; off = 0; + if (skb_bpf_strparser(skb)) { + struct strp_msg *stm = strp_msg(skb); + + off = stm->offset; + len = stm->full_len; + } start: ingress = skb_bpf_ingress(skb); skb_bpf_redirect_clear(skb); @@ -877,6 +887,7 @@ static int sk_psock_skb_redirect(struct sk_psock *from, struct sk_buff *skb) * return code, but then didn't set a redirect interface. */ if (unlikely(!sk_other)) { + skb_bpf_redirect_clear(skb); sock_drop(from->sk, skb); return -EIO; } @@ -944,6 +955,7 @@ static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb, { struct sock *sk_other; int err = 0; + u32 len, off; switch (verdict) { case __SK_PASS: @@ -951,6 +963,7 @@ static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb, sk_other = psock->sk; if (sock_flag(sk_other, SOCK_DEAD) || !sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) { + skb_bpf_redirect_clear(skb); goto out_free; } @@ -963,7 +976,15 @@ static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb, * retrying later from workqueue. */ if (skb_queue_empty(&psock->ingress_skb)) { - err = sk_psock_skb_ingress_self(psock, skb); + len = skb->len; + off = 0; + if (skb_bpf_strparser(skb)) { + struct strp_msg *stm = strp_msg(skb); + + off = stm->offset; + len = stm->full_len; + } + err = sk_psock_skb_ingress_self(psock, skb, off, len); } if (err < 0) { spin_lock_bh(&psock->ingress_lock); @@ -1029,6 +1050,8 @@ static void sk_psock_strp_read(struct strparser *strp, struct sk_buff *skb) skb_dst_drop(skb); skb_bpf_redirect_clear(skb); ret = bpf_prog_run_pin_on_cpu(prog, skb); + if (ret == SK_PASS) + skb_bpf_set_strparser(skb); ret = sk_psock_map_verd(ret, skb_bpf_redirect_fetch(skb)); skb->sk = NULL; } diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c index 0dcee9df1326..2cf02b4d77fb 100644 --- a/net/ipv4/bpf_tcp_ca.c +++ b/net/ipv4/bpf_tcp_ca.c @@ -81,14 +81,7 @@ static bool bpf_tcp_ca_is_valid_access(int off, int size, const struct bpf_prog *prog, struct bpf_insn_access_aux *info) { - if (off < 0 || off >= sizeof(__u64) * MAX_BPF_FUNC_ARGS) - return false; - if (type != BPF_READ) - return false; - if (off % size != 0) - return false; - - if (!btf_ctx_access(off, size, type, prog, info)) + if (!bpf_tracing_btf_ctx_access(off, size, type, prog, info)) return false; if (info->reg_type == PTR_TO_BTF_ID && info->btf_id == sock_id) @@ -223,41 +216,13 @@ BTF_ID(func, tcp_reno_cong_avoid) BTF_ID(func, tcp_reno_undo_cwnd) BTF_ID(func, tcp_slow_start) BTF_ID(func, tcp_cong_avoid_ai) -#ifdef CONFIG_X86 -#ifdef CONFIG_DYNAMIC_FTRACE -#if IS_BUILTIN(CONFIG_TCP_CONG_CUBIC) -BTF_ID(func, cubictcp_init) -BTF_ID(func, cubictcp_recalc_ssthresh) -BTF_ID(func, cubictcp_cong_avoid) -BTF_ID(func, cubictcp_state) -BTF_ID(func, cubictcp_cwnd_event) -BTF_ID(func, cubictcp_acked) -#endif -#if IS_BUILTIN(CONFIG_TCP_CONG_DCTCP) -BTF_ID(func, dctcp_init) -BTF_ID(func, dctcp_update_alpha) -BTF_ID(func, dctcp_cwnd_event) -BTF_ID(func, dctcp_ssthresh) -BTF_ID(func, dctcp_cwnd_undo) -BTF_ID(func, dctcp_state) -#endif -#if IS_BUILTIN(CONFIG_TCP_CONG_BBR) -BTF_ID(func, bbr_init) -BTF_ID(func, bbr_main) -BTF_ID(func, bbr_sndbuf_expand) -BTF_ID(func, bbr_undo_cwnd) -BTF_ID(func, bbr_cwnd_event) -BTF_ID(func, bbr_ssthresh) -BTF_ID(func, bbr_min_tso_segs) -BTF_ID(func, bbr_set_state) -#endif -#endif /* CONFIG_DYNAMIC_FTRACE */ -#endif /* CONFIG_X86 */ BTF_SET_END(bpf_tcp_ca_kfunc_ids) -static bool bpf_tcp_ca_check_kfunc_call(u32 kfunc_btf_id) +static bool bpf_tcp_ca_check_kfunc_call(u32 kfunc_btf_id, struct module *owner) { - return btf_id_set_contains(&bpf_tcp_ca_kfunc_ids, kfunc_btf_id); + if (btf_id_set_contains(&bpf_tcp_ca_kfunc_ids, kfunc_btf_id)) + return true; + return bpf_check_mod_kfunc_call(&bpf_tcp_ca_kfunc_list, kfunc_btf_id, owner); } static const struct bpf_verifier_ops bpf_tcp_ca_verifier_ops = { diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index 6274462b86b4..ec5550089b4d 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -56,6 +56,8 @@ * otherwise TCP stack falls back to an internal pacing using one high * resolution timer per TCP socket and may use more resources. */ +#include <linux/btf.h> +#include <linux/btf_ids.h> #include <linux/module.h> #include <net/tcp.h> #include <linux/inet_diag.h> @@ -1152,14 +1154,38 @@ static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = { .set_state = bbr_set_state, }; +BTF_SET_START(tcp_bbr_kfunc_ids) +#ifdef CONFIG_X86 +#ifdef CONFIG_DYNAMIC_FTRACE +BTF_ID(func, bbr_init) +BTF_ID(func, bbr_main) +BTF_ID(func, bbr_sndbuf_expand) +BTF_ID(func, bbr_undo_cwnd) +BTF_ID(func, bbr_cwnd_event) +BTF_ID(func, bbr_ssthresh) +BTF_ID(func, bbr_min_tso_segs) +BTF_ID(func, bbr_set_state) +#endif +#endif +BTF_SET_END(tcp_bbr_kfunc_ids) + +static DEFINE_KFUNC_BTF_ID_SET(&tcp_bbr_kfunc_ids, tcp_bbr_kfunc_btf_set); + static int __init bbr_register(void) { + int ret; + BUILD_BUG_ON(sizeof(struct bbr) > ICSK_CA_PRIV_SIZE); - return tcp_register_congestion_control(&tcp_bbr_cong_ops); + ret = tcp_register_congestion_control(&tcp_bbr_cong_ops); + if (ret) + return ret; + register_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_bbr_kfunc_btf_set); + return 0; } static void __exit bbr_unregister(void) { + unregister_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_bbr_kfunc_btf_set); tcp_unregister_congestion_control(&tcp_bbr_cong_ops); } diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 4a30deaa9a37..5e9d9c51164c 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -25,6 +25,8 @@ */ #include <linux/mm.h> +#include <linux/btf.h> +#include <linux/btf_ids.h> #include <linux/module.h> #include <linux/math64.h> #include <net/tcp.h> @@ -482,8 +484,25 @@ static struct tcp_congestion_ops cubictcp __read_mostly = { .name = "cubic", }; +BTF_SET_START(tcp_cubic_kfunc_ids) +#ifdef CONFIG_X86 +#ifdef CONFIG_DYNAMIC_FTRACE +BTF_ID(func, cubictcp_init) +BTF_ID(func, cubictcp_recalc_ssthresh) +BTF_ID(func, cubictcp_cong_avoid) +BTF_ID(func, cubictcp_state) +BTF_ID(func, cubictcp_cwnd_event) +BTF_ID(func, cubictcp_acked) +#endif +#endif +BTF_SET_END(tcp_cubic_kfunc_ids) + +static DEFINE_KFUNC_BTF_ID_SET(&tcp_cubic_kfunc_ids, tcp_cubic_kfunc_btf_set); + static int __init cubictcp_register(void) { + int ret; + BUILD_BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE); /* Precompute a bunch of the scaling factors that are used per-packet @@ -514,11 +533,16 @@ static int __init cubictcp_register(void) /* divide by bic_scale and by constant Srtt (100ms) */ do_div(cube_factor, bic_scale * 10); - return tcp_register_congestion_control(&cubictcp); + ret = tcp_register_congestion_control(&cubictcp); + if (ret) + return ret; + register_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_cubic_kfunc_btf_set); + return 0; } static void __exit cubictcp_unregister(void) { + unregister_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_cubic_kfunc_btf_set); tcp_unregister_congestion_control(&cubictcp); } diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c index 79f705450c16..0d7ab3cc7b61 100644 --- a/net/ipv4/tcp_dctcp.c +++ b/net/ipv4/tcp_dctcp.c @@ -36,6 +36,8 @@ * Glenn Judd <glenn.judd@morganstanley.com> */ +#include <linux/btf.h> +#include <linux/btf_ids.h> #include <linux/module.h> #include <linux/mm.h> #include <net/tcp.h> @@ -236,14 +238,36 @@ static struct tcp_congestion_ops dctcp_reno __read_mostly = { .name = "dctcp-reno", }; +BTF_SET_START(tcp_dctcp_kfunc_ids) +#ifdef CONFIG_X86 +#ifdef CONFIG_DYNAMIC_FTRACE +BTF_ID(func, dctcp_init) +BTF_ID(func, dctcp_update_alpha) +BTF_ID(func, dctcp_cwnd_event) +BTF_ID(func, dctcp_ssthresh) +BTF_ID(func, dctcp_cwnd_undo) +BTF_ID(func, dctcp_state) +#endif +#endif +BTF_SET_END(tcp_dctcp_kfunc_ids) + +static DEFINE_KFUNC_BTF_ID_SET(&tcp_dctcp_kfunc_ids, tcp_dctcp_kfunc_btf_set); + static int __init dctcp_register(void) { + int ret; + BUILD_BUG_ON(sizeof(struct dctcp) > ICSK_CA_PRIV_SIZE); - return tcp_register_congestion_control(&dctcp); + ret = tcp_register_congestion_control(&dctcp); + if (ret) + return ret; + register_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_dctcp_kfunc_btf_set); + return 0; } static void __exit dctcp_unregister(void) { + unregister_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_dctcp_kfunc_btf_set); tcp_unregister_congestion_control(&dctcp); } diff --git a/samples/bpf/.gitignore b/samples/bpf/.gitignore index fcba217f0ae2..0e7bfdbff80a 100644 --- a/samples/bpf/.gitignore +++ b/samples/bpf/.gitignore @@ -57,3 +57,7 @@ testfile.img hbm_out.log iperf.* *.out +*.skel.h +/vmlinux.h +/bpftool/ +/libbpf/ diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 5fd48a8d4f10..a886dff1ba89 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -3,6 +3,8 @@ BPF_SAMPLES_PATH ?= $(abspath $(srctree)/$(src)) TOOLS_PATH := $(BPF_SAMPLES_PATH)/../../tools +pound := \# + # List of programs to build tprogs-y := test_lru_dist tprogs-y += sock_example @@ -59,7 +61,11 @@ tprogs-y += xdp_redirect tprogs-y += xdp_monitor # Libbpf dependencies -LIBBPF = $(TOOLS_PATH)/lib/bpf/libbpf.a +LIBBPF_SRC = $(TOOLS_PATH)/lib/bpf +LIBBPF_OUTPUT = $(abspath $(BPF_SAMPLES_PATH))/libbpf +LIBBPF_DESTDIR = $(LIBBPF_OUTPUT) +LIBBPF_INCLUDE = $(LIBBPF_DESTDIR)/include +LIBBPF = $(LIBBPF_OUTPUT)/libbpf.a CGROUP_HELPERS := ../../tools/testing/selftests/bpf/cgroup_helpers.o TRACE_HELPERS := ../../tools/testing/selftests/bpf/trace_helpers.o @@ -198,7 +204,7 @@ TPROGS_CFLAGS += -Wstrict-prototypes TPROGS_CFLAGS += -I$(objtree)/usr/include TPROGS_CFLAGS += -I$(srctree)/tools/testing/selftests/bpf/ -TPROGS_CFLAGS += -I$(srctree)/tools/lib/ +TPROGS_CFLAGS += -I$(LIBBPF_INCLUDE) TPROGS_CFLAGS += -I$(srctree)/tools/include TPROGS_CFLAGS += -I$(srctree)/tools/perf TPROGS_CFLAGS += -DHAVE_ATTR_TEST=0 @@ -223,6 +229,7 @@ CLANG ?= clang OPT ?= opt LLVM_DIS ?= llvm-dis LLVM_OBJCOPY ?= llvm-objcopy +LLVM_READELF ?= llvm-readelf BTF_PAHOLE ?= pahole # Detect that we're cross compiling and use the cross compiler @@ -232,7 +239,7 @@ endif # Don't evaluate probes and warnings if we need to run make recursively ifneq ($(src),) -HDR_PROBE := $(shell printf "\#include <linux/types.h>\n struct list_head { int a; }; int main() { return 0; }" | \ +HDR_PROBE := $(shell printf "$(pound)include <linux/types.h>\n struct list_head { int a; }; int main() { return 0; }" | \ $(CC) $(TPROGS_CFLAGS) $(TPROGS_LDFLAGS) -x c - \ -o /dev/null 2>/dev/null && echo okay) @@ -246,7 +253,7 @@ BTF_PAHOLE_PROBE := $(shell $(BTF_PAHOLE) --help 2>&1 | grep BTF) BTF_OBJCOPY_PROBE := $(shell $(LLVM_OBJCOPY) --help 2>&1 | grep -i 'usage.*llvm') BTF_LLVM_PROBE := $(shell echo "int main() { return 0; }" | \ $(CLANG) -target bpf -O2 -g -c -x c - -o ./llvm_btf_verify.o; \ - readelf -S ./llvm_btf_verify.o | grep BTF; \ + $(LLVM_READELF) -S ./llvm_btf_verify.o | grep BTF; \ /bin/rm -f ./llvm_btf_verify.o) BPF_EXTRA_CFLAGS += -fno-stack-protector @@ -268,16 +275,27 @@ all: clean: $(MAKE) -C ../../ M=$(CURDIR) clean @find $(CURDIR) -type f -name '*~' -delete + @$(RM) -r $(CURDIR)/libbpf $(CURDIR)/bpftool -$(LIBBPF): FORCE +$(LIBBPF): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(LIBBPF_OUTPUT) # Fix up variables inherited from Kbuild that tools/ build system won't like - $(MAKE) -C $(dir $@) RM='rm -rf' EXTRA_CFLAGS="$(TPROGS_CFLAGS)" \ - LDFLAGS=$(TPROGS_LDFLAGS) srctree=$(BPF_SAMPLES_PATH)/../../ O= + $(MAKE) -C $(LIBBPF_SRC) RM='rm -rf' EXTRA_CFLAGS="$(TPROGS_CFLAGS)" \ + LDFLAGS=$(TPROGS_LDFLAGS) srctree=$(BPF_SAMPLES_PATH)/../../ \ + O= OUTPUT=$(LIBBPF_OUTPUT)/ DESTDIR=$(LIBBPF_DESTDIR) prefix= \ + $@ install_headers BPFTOOLDIR := $(TOOLS_PATH)/bpf/bpftool -BPFTOOL := $(BPFTOOLDIR)/bpftool -$(BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) - $(MAKE) -C $(BPFTOOLDIR) srctree=$(BPF_SAMPLES_PATH)/../../ +BPFTOOL_OUTPUT := $(abspath $(BPF_SAMPLES_PATH))/bpftool +BPFTOOL := $(BPFTOOL_OUTPUT)/bpftool +$(BPFTOOL): $(LIBBPF) $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) | $(BPFTOOL_OUTPUT) + $(MAKE) -C $(BPFTOOLDIR) srctree=$(BPF_SAMPLES_PATH)/../../ \ + OUTPUT=$(BPFTOOL_OUTPUT)/ \ + LIBBPF_OUTPUT=$(LIBBPF_OUTPUT)/ \ + LIBBPF_DESTDIR=$(LIBBPF_DESTDIR)/ + +$(LIBBPF_OUTPUT) $(BPFTOOL_OUTPUT): + $(call msg,MKDIR,$@) + $(Q)mkdir -p $@ $(obj)/syscall_nrs.h: $(obj)/syscall_nrs.s FORCE $(call filechk,offsets,__SYSCALL_NRS_H__) @@ -309,6 +327,11 @@ verify_target_bpf: verify_cmds $(BPF_SAMPLES_PATH)/*.c: verify_target_bpf $(LIBBPF) $(src)/*.c: verify_target_bpf $(LIBBPF) +libbpf_hdrs: $(LIBBPF) +$(obj)/$(TRACE_HELPERS): | libbpf_hdrs + +.PHONY: libbpf_hdrs + $(obj)/xdp_redirect_cpu_user.o: $(obj)/xdp_redirect_cpu.skel.h $(obj)/xdp_redirect_map_multi_user.o: $(obj)/xdp_redirect_map_multi.skel.h $(obj)/xdp_redirect_map_user.o: $(obj)/xdp_redirect_map.skel.h @@ -366,7 +389,7 @@ $(obj)/%.bpf.o: $(src)/%.bpf.c $(obj)/vmlinux.h $(src)/xdp_sample.bpf.h $(src)/x $(Q)$(CLANG) -g -O2 -target bpf -D__TARGET_ARCH_$(SRCARCH) \ -Wno-compare-distinct-pointer-types -I$(srctree)/include \ -I$(srctree)/samples/bpf -I$(srctree)/tools/include \ - -I$(srctree)/tools/lib $(CLANG_SYS_INCLUDES) \ + -I$(LIBBPF_INCLUDE) $(CLANG_SYS_INCLUDES) \ -c $(filter %.bpf.c,$^) -o $@ LINKED_SKELS := xdp_redirect_cpu.skel.h xdp_redirect_map_multi.skel.h \ @@ -403,7 +426,7 @@ $(obj)/%.o: $(src)/%.c @echo " CLANG-bpf " $@ $(Q)$(CLANG) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(BPF_EXTRA_CFLAGS) \ -I$(obj) -I$(srctree)/tools/testing/selftests/bpf/ \ - -I$(srctree)/tools/lib/ \ + -I$(LIBBPF_INCLUDE) \ -D__KERNEL__ -D__BPF_TRACING__ -Wno-unused-value -Wno-pointer-sign \ -D__TARGET_ARCH_$(SRCARCH) -Wno-compare-distinct-pointer-types \ -Wno-gnu-variable-sized-type-not-at-end \ diff --git a/samples/bpf/xdp1_user.c b/samples/bpf/xdp1_user.c index 116e39f6b666..8675fa5273df 100644 --- a/samples/bpf/xdp1_user.c +++ b/samples/bpf/xdp1_user.c @@ -128,7 +128,7 @@ int main(int argc, char **argv) if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd)) return 1; - map = bpf_map__next(NULL, obj); + map = bpf_object__next_map(obj, NULL); if (!map) { printf("finding a map in obj file failed\n"); return 1; diff --git a/samples/bpf/xdp_redirect_cpu_user.c b/samples/bpf/xdp_redirect_cpu_user.c index 6e25fba64c72..d84e6949007c 100644 --- a/samples/bpf/xdp_redirect_cpu_user.c +++ b/samples/bpf/xdp_redirect_cpu_user.c @@ -325,7 +325,6 @@ int main(int argc, char **argv) int add_cpu = -1; int ifindex = -1; int *cpu, i, opt; - char *ifname; __u32 qsize; int n_cpus; @@ -393,9 +392,8 @@ int main(int argc, char **argv) fprintf(stderr, "-d/--dev name too long\n"); goto end_cpu; } - ifname = (char *)&ifname_buf; - safe_strncpy(ifname, optarg, sizeof(ifname)); - ifindex = if_nametoindex(ifname); + safe_strncpy(ifname_buf, optarg, strlen(ifname_buf)); + ifindex = if_nametoindex(ifname_buf); if (!ifindex) ifindex = strtoul(optarg, NULL, 0); if (!ifindex) { diff --git a/samples/bpf/xdp_sample_pkts_user.c b/samples/bpf/xdp_sample_pkts_user.c index 495e09897bd3..f4382ccdcbb1 100644 --- a/samples/bpf/xdp_sample_pkts_user.c +++ b/samples/bpf/xdp_sample_pkts_user.c @@ -154,7 +154,7 @@ int main(int argc, char **argv) return 1; } - map = bpf_map__next(NULL, obj); + map = bpf_object__next_map(obj, NULL); if (!map) { printf("finding a map in obj file failed\n"); return 1; diff --git a/samples/seccomp/bpf-helper.h b/samples/seccomp/bpf-helper.h index 0cc9816fe8e8..417e48a4c4df 100644 --- a/samples/seccomp/bpf-helper.h +++ b/samples/seccomp/bpf-helper.h @@ -62,9 +62,9 @@ void seccomp_bpf_print(struct sock_filter *filter, size_t count); #define EXPAND(...) __VA_ARGS__ /* Ensure that we load the logically correct offset. */ -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ #define LO_ARG(idx) offsetof(struct seccomp_data, args[(idx)]) -#elif __BYTE_ORDER == __BIG_ENDIAN +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ #define LO_ARG(idx) offsetof(struct seccomp_data, args[(idx)]) + sizeof(__u32) #else #error "Unknown endianness" @@ -85,10 +85,10 @@ void seccomp_bpf_print(struct sock_filter *filter, size_t count); #elif __BITS_PER_LONG == 64 /* Ensure that we load the logically correct offset. */ -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ #define ENDIAN(_lo, _hi) _lo, _hi #define HI_ARG(idx) offsetof(struct seccomp_data, args[(idx)]) + sizeof(__u32) -#elif __BYTE_ORDER == __BIG_ENDIAN +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ #define ENDIAN(_lo, _hi) _hi, _lo #define HI_ARG(idx) offsetof(struct seccomp_data, args[(idx)]) #endif diff --git a/scripts/Makefile.modfinal b/scripts/Makefile.modfinal index ff805777431c..7f39599e9fae 100644 --- a/scripts/Makefile.modfinal +++ b/scripts/Makefile.modfinal @@ -40,7 +40,8 @@ quiet_cmd_ld_ko_o = LD [M] $@ quiet_cmd_btf_ko = BTF [M] $@ cmd_btf_ko = \ if [ -f vmlinux ]; then \ - LLVM_OBJCOPY="$(OBJCOPY)" $(PAHOLE) -J --btf_base vmlinux $@; \ + LLVM_OBJCOPY="$(OBJCOPY)" $(PAHOLE) -J $(PAHOLE_FLAGS) --btf_base vmlinux $@; \ + $(RESOLVE_BTFIDS) -b vmlinux $@; \ else \ printf "Skipping BTF generation for %s due to unavailability of vmlinux\n" $@ 1>&2; \ fi; diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py index 00ac7b79cddb..a6403ddf5de7 100755 --- a/scripts/bpf_doc.py +++ b/scripts/bpf_doc.py @@ -537,6 +537,7 @@ class PrinterHelpers(Printer): 'struct tcp_timewait_sock', 'struct tcp_request_sock', 'struct udp6_sock', + 'struct unix_sock', 'struct task_struct', 'struct __sk_buff', @@ -589,6 +590,7 @@ class PrinterHelpers(Printer): 'struct tcp_timewait_sock', 'struct tcp_request_sock', 'struct udp6_sock', + 'struct unix_sock', 'struct task_struct', 'struct path', 'struct btf_ptr', diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index d74cee5c4326..3ea7cece7c97 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -205,7 +205,6 @@ vmlinux_link() gen_btf() { local pahole_ver - local extra_paholeopt= if ! [ -x "$(command -v ${PAHOLE})" ]; then echo >&2 "BTF: ${1}: pahole (${PAHOLE}) is not available" @@ -220,16 +219,8 @@ gen_btf() vmlinux_link ${1} - if [ "${pahole_ver}" -ge "118" ] && [ "${pahole_ver}" -le "121" ]; then - # pahole 1.18 through 1.21 can't handle zero-sized per-CPU vars - extra_paholeopt="${extra_paholeopt} --skip_encoding_btf_vars" - fi - if [ "${pahole_ver}" -ge "121" ]; then - extra_paholeopt="${extra_paholeopt} --btf_gen_floats" - fi - info "BTF" ${2} - LLVM_OBJCOPY="${OBJCOPY}" ${PAHOLE} -J ${extra_paholeopt} ${1} + LLVM_OBJCOPY="${OBJCOPY}" ${PAHOLE} -J ${PAHOLE_FLAGS} ${1} # Create ${2} which contains just .BTF section but no symbols. Add # SHF_ALLOC because .BTF will be part of the vmlinux image. --strip-all diff --git a/scripts/pahole-flags.sh b/scripts/pahole-flags.sh new file mode 100755 index 000000000000..2b99fc77019c --- /dev/null +++ b/scripts/pahole-flags.sh @@ -0,0 +1,20 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +extra_paholeopt= + +if ! [ -x "$(command -v ${PAHOLE})" ]; then + return +fi + +pahole_ver=$(${PAHOLE} --version | sed -E 's/v([0-9]+)\.([0-9]+)/\1\2/') + +if [ "${pahole_ver}" -ge "118" ] && [ "${pahole_ver}" -le "121" ]; then + # pahole 1.18 through 1.21 can't handle zero-sized per-CPU vars + extra_paholeopt="${extra_paholeopt} --skip_encoding_btf_vars" +fi +if [ "${pahole_ver}" -ge "121" ]; then + extra_paholeopt="${extra_paholeopt} --btf_gen_floats" +fi + +echo ${extra_paholeopt} diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index 1fcf5b01a193..c0c30e56988f 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -14,33 +14,43 @@ else Q = @ endif -BPF_DIR = $(srctree)/tools/lib/bpf/ +BPF_DIR = $(srctree)/tools/lib/bpf ifneq ($(OUTPUT),) - LIBBPF_OUTPUT = $(OUTPUT)/libbpf/ - LIBBPF_PATH = $(LIBBPF_OUTPUT) - BOOTSTRAP_OUTPUT = $(OUTPUT)/bootstrap/ + _OUTPUT := $(OUTPUT) else - LIBBPF_OUTPUT = - LIBBPF_PATH = $(BPF_DIR) - BOOTSTRAP_OUTPUT = $(CURDIR)/bootstrap/ + _OUTPUT := $(CURDIR) endif +BOOTSTRAP_OUTPUT := $(_OUTPUT)/bootstrap/ +LIBBPF_OUTPUT := $(_OUTPUT)/libbpf/ +LIBBPF_DESTDIR := $(LIBBPF_OUTPUT) +LIBBPF_INCLUDE := $(LIBBPF_DESTDIR)/include +LIBBPF_HDRS_DIR := $(LIBBPF_INCLUDE)/bpf -LIBBPF = $(LIBBPF_PATH)libbpf.a +LIBBPF = $(LIBBPF_OUTPUT)libbpf.a LIBBPF_BOOTSTRAP_OUTPUT = $(BOOTSTRAP_OUTPUT)libbpf/ LIBBPF_BOOTSTRAP = $(LIBBPF_BOOTSTRAP_OUTPUT)libbpf.a +# We need to copy hashmap.h and nlattr.h which is not otherwise exported by +# libbpf, but still required by bpftool. +LIBBPF_INTERNAL_HDRS := $(addprefix $(LIBBPF_HDRS_DIR)/,hashmap.h nlattr.h) + ifeq ($(BPFTOOL_VERSION),) BPFTOOL_VERSION := $(shell make -rR --no-print-directory -sC ../../.. kernelversion) endif -$(LIBBPF_OUTPUT) $(BOOTSTRAP_OUTPUT) $(LIBBPF_BOOTSTRAP_OUTPUT): +$(LIBBPF_OUTPUT) $(BOOTSTRAP_OUTPUT) $(LIBBPF_BOOTSTRAP_OUTPUT) $(LIBBPF_HDRS_DIR): $(QUIET_MKDIR)mkdir -p $@ -$(LIBBPF): FORCE | $(LIBBPF_OUTPUT) - $(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(LIBBPF_OUTPUT) $(LIBBPF_OUTPUT)libbpf.a +$(LIBBPF): $(wildcard $(BPF_DIR)/*.[ch] $(BPF_DIR)/Makefile) | $(LIBBPF_OUTPUT) + $(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(LIBBPF_OUTPUT) \ + DESTDIR=$(LIBBPF_DESTDIR) prefix= $(LIBBPF) install_headers + +$(LIBBPF_INTERNAL_HDRS): $(LIBBPF_HDRS_DIR)/%.h: $(BPF_DIR)/%.h | $(LIBBPF_HDRS_DIR) + $(call QUIET_INSTALL, $@) + $(Q)install -m 644 -t $(LIBBPF_HDRS_DIR) $< -$(LIBBPF_BOOTSTRAP): FORCE | $(LIBBPF_BOOTSTRAP_OUTPUT) +$(LIBBPF_BOOTSTRAP): $(wildcard $(BPF_DIR)/*.[ch] $(BPF_DIR)/Makefile) | $(LIBBPF_BOOTSTRAP_OUTPUT) $(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(LIBBPF_BOOTSTRAP_OUTPUT) \ ARCH= CC=$(HOSTCC) LD=$(HOSTLD) $@ @@ -60,11 +70,10 @@ CFLAGS += -W -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers CFLAGS += $(filter-out -Wswitch-enum -Wnested-externs,$(EXTRA_WARNINGS)) CFLAGS += -DPACKAGE='"bpftool"' -D__EXPORTED_HEADERS__ \ -I$(if $(OUTPUT),$(OUTPUT),.) \ + -I$(LIBBPF_INCLUDE) \ -I$(srctree)/kernel/bpf/ \ -I$(srctree)/tools/include \ - -I$(srctree)/tools/include/uapi \ - -I$(srctree)/tools/lib \ - -I$(srctree)/tools/perf + -I$(srctree)/tools/include/uapi CFLAGS += -DBPFTOOL_VERSION='"$(BPFTOOL_VERSION)"' ifneq ($(EXTRA_CFLAGS),) CFLAGS += $(EXTRA_CFLAGS) @@ -140,7 +149,7 @@ BOOTSTRAP_OBJS = $(addprefix $(BOOTSTRAP_OUTPUT),main.o common.o json_writer.o g $(BOOTSTRAP_OBJS): $(LIBBPF_BOOTSTRAP) OBJS = $(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) $(OUTPUT)disasm.o -$(OBJS): $(LIBBPF) +$(OBJS): $(LIBBPF) $(LIBBPF_INTERNAL_HDRS) VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \ $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \ @@ -167,8 +176,7 @@ $(OUTPUT)%.bpf.o: skeleton/%.bpf.c $(OUTPUT)vmlinux.h $(LIBBPF) $(QUIET_CLANG)$(CLANG) \ -I$(if $(OUTPUT),$(OUTPUT),.) \ -I$(srctree)/tools/include/uapi/ \ - -I$(LIBBPF_PATH) \ - -I$(srctree)/tools/lib \ + -I$(LIBBPF_INCLUDE) \ -g -O2 -Wall -target bpf -c $< -o $@ && $(LLVM_STRIP) -g $@ $(OUTPUT)%.skel.h: $(OUTPUT)%.bpf.o $(BPFTOOL_BOOTSTRAP) @@ -189,7 +197,10 @@ $(BOOTSTRAP_OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c $(OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c $(QUIET_CC)$(CC) $(CFLAGS) -c -MMD -o $@ $< -$(OUTPUT)feature.o: | zdep +$(OUTPUT)feature.o: +ifneq ($(feature-zlib), 1) + $(error "No zlib found") +endif $(BPFTOOL_BOOTSTRAP): $(BOOTSTRAP_OBJS) $(LIBBPF_BOOTSTRAP) $(QUIET_LINK)$(HOSTCC) $(CFLAGS) $(LDFLAGS) -o $@ $(BOOTSTRAP_OBJS) \ @@ -198,7 +209,7 @@ $(BPFTOOL_BOOTSTRAP): $(BOOTSTRAP_OBJS) $(LIBBPF_BOOTSTRAP) $(OUTPUT)bpftool: $(OBJS) $(LIBBPF) $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJS) $(LIBS) -$(BOOTSTRAP_OUTPUT)%.o: %.c | $(BOOTSTRAP_OUTPUT) +$(BOOTSTRAP_OUTPUT)%.o: %.c $(LIBBPF_INTERNAL_HDRS) | $(BOOTSTRAP_OUTPUT) $(QUIET_CC)$(HOSTCC) $(CFLAGS) -c -MMD -o $@ $< $(OUTPUT)%.o: %.c @@ -217,10 +228,12 @@ clean: $(LIBBPF)-clean $(LIBBPF_BOOTSTRAP)-clean feature-detect-clean $(Q)$(RM) -- $(OUTPUT)FEATURE-DUMP.bpftool $(Q)$(RM) -r -- $(OUTPUT)feature/ -install: $(OUTPUT)bpftool +install-bin: $(OUTPUT)bpftool $(call QUIET_INSTALL, bpftool) $(Q)$(INSTALL) -m 0755 -d $(DESTDIR)$(prefix)/sbin $(Q)$(INSTALL) $(OUTPUT)bpftool $(DESTDIR)$(prefix)/sbin/bpftool + +install: install-bin $(Q)$(INSTALL) -m 0755 -d $(DESTDIR)$(bash_compdir) $(Q)$(INSTALL) -m 0644 bash-completion/bpftool $(DESTDIR)$(bash_compdir) @@ -243,10 +256,7 @@ doc-uninstall: FORCE: -zdep: - @if [ "$(feature-zlib)" != "1" ]; then echo "No zlib found"; exit 1 ; fi - .SECONDARY: -.PHONY: all FORCE clean install uninstall zdep +.PHONY: all FORCE clean install-bin install uninstall .PHONY: doc doc-clean doc-install doc-uninstall .DEFAULT_GOAL := all diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c index 49743ad96851..015d2758f826 100644 --- a/tools/bpf/bpftool/btf.c +++ b/tools/bpf/bpftool/btf.c @@ -8,14 +8,15 @@ #include <stdio.h> #include <string.h> #include <unistd.h> -#include <bpf/bpf.h> -#include <bpf/btf.h> -#include <bpf/libbpf.h> #include <linux/btf.h> -#include <linux/hashtable.h> #include <sys/types.h> #include <sys/stat.h> +#include <bpf/bpf.h> +#include <bpf/btf.h> +#include <bpf/hashmap.h> +#include <bpf/libbpf.h> + #include "json_writer.h" #include "main.h" @@ -37,17 +38,12 @@ static const char * const btf_kind_str[NR_BTF_KINDS] = { [BTF_KIND_VAR] = "VAR", [BTF_KIND_DATASEC] = "DATASEC", [BTF_KIND_FLOAT] = "FLOAT", - [BTF_KIND_TAG] = "TAG", -}; - -struct btf_attach_table { - DECLARE_HASHTABLE(table, 16); + [BTF_KIND_DECL_TAG] = "DECL_TAG", }; struct btf_attach_point { __u32 obj_id; __u32 btf_id; - struct hlist_node hash; }; static const char *btf_int_enc_str(__u8 encoding) @@ -329,7 +325,7 @@ static int dump_btf_type(const struct btf *btf, __u32 id, printf("\n\ttype_id=%u offset=%u size=%u", v->type, v->offset, v->size); - if (v->type <= btf__get_nr_types(btf)) { + if (v->type < btf__type_cnt(btf)) { vt = btf__type_by_id(btf, v->type); printf(" (%s '%s')", btf_kind_str[btf_kind_safe(btf_kind(vt))], @@ -348,8 +344,8 @@ static int dump_btf_type(const struct btf *btf, __u32 id, printf(" size=%u", t->size); break; } - case BTF_KIND_TAG: { - const struct btf_tag *tag = (const void *)(t + 1); + case BTF_KIND_DECL_TAG: { + const struct btf_decl_tag *tag = (const void *)(t + 1); if (json_output) { jsonw_uint_field(w, "type_id", t->type); @@ -390,14 +386,14 @@ static int dump_btf_raw(const struct btf *btf, } } else { const struct btf *base; - int cnt = btf__get_nr_types(btf); + int cnt = btf__type_cnt(btf); int start_id = 1; base = btf__base_btf(btf); if (base) - start_id = btf__get_nr_types(base) + 1; + start_id = btf__type_cnt(base); - for (i = start_id; i <= cnt; i++) { + for (i = start_id; i < cnt; i++) { t = btf__type_by_id(btf, i); dump_btf_type(btf, i, t); } @@ -440,9 +436,9 @@ static int dump_btf_c(const struct btf *btf, goto done; } } else { - int cnt = btf__get_nr_types(btf); + int cnt = btf__type_cnt(btf); - for (i = 1; i <= cnt; i++) { + for (i = 1; i < cnt; i++) { err = btf_dump__dump_type(d, i); if (err) goto done; @@ -645,21 +641,8 @@ static int btf_parse_fd(int *argc, char ***argv) return fd; } -static void delete_btf_table(struct btf_attach_table *tab) -{ - struct btf_attach_point *obj; - struct hlist_node *tmp; - - unsigned int bkt; - - hash_for_each_safe(tab->table, bkt, tmp, obj, hash) { - hash_del(&obj->hash); - free(obj); - } -} - static int -build_btf_type_table(struct btf_attach_table *tab, enum bpf_obj_type type, +build_btf_type_table(struct hashmap *tab, enum bpf_obj_type type, void *info, __u32 *len) { static const char * const names[] = { @@ -667,7 +650,6 @@ build_btf_type_table(struct btf_attach_table *tab, enum bpf_obj_type type, [BPF_OBJ_PROG] = "prog", [BPF_OBJ_MAP] = "map", }; - struct btf_attach_point *obj_node; __u32 btf_id, id = 0; int err; int fd; @@ -741,28 +723,25 @@ build_btf_type_table(struct btf_attach_table *tab, enum bpf_obj_type type, if (!btf_id) continue; - obj_node = calloc(1, sizeof(*obj_node)); - if (!obj_node) { - p_err("failed to allocate memory: %s", strerror(errno)); - err = -ENOMEM; + err = hashmap__append(tab, u32_as_hash_field(btf_id), + u32_as_hash_field(id)); + if (err) { + p_err("failed to append entry to hashmap for BTF ID %u, object ID %u: %s", + btf_id, id, strerror(errno)); goto err_free; } - - obj_node->obj_id = id; - obj_node->btf_id = btf_id; - hash_add(tab->table, &obj_node->hash, obj_node->btf_id); } return 0; err_free: - delete_btf_table(tab); + hashmap__free(tab); return err; } static int -build_btf_tables(struct btf_attach_table *btf_prog_table, - struct btf_attach_table *btf_map_table) +build_btf_tables(struct hashmap *btf_prog_table, + struct hashmap *btf_map_table) { struct bpf_prog_info prog_info; __u32 prog_len = sizeof(prog_info); @@ -778,7 +757,7 @@ build_btf_tables(struct btf_attach_table *btf_prog_table, err = build_btf_type_table(btf_map_table, BPF_OBJ_MAP, &map_info, &map_len); if (err) { - delete_btf_table(btf_prog_table); + hashmap__free(btf_prog_table); return err; } @@ -787,10 +766,10 @@ build_btf_tables(struct btf_attach_table *btf_prog_table, static void show_btf_plain(struct bpf_btf_info *info, int fd, - struct btf_attach_table *btf_prog_table, - struct btf_attach_table *btf_map_table) + struct hashmap *btf_prog_table, + struct hashmap *btf_map_table) { - struct btf_attach_point *obj; + struct hashmap_entry *entry; const char *name = u64_to_ptr(info->name); int n; @@ -804,29 +783,30 @@ show_btf_plain(struct bpf_btf_info *info, int fd, printf("size %uB", info->btf_size); n = 0; - hash_for_each_possible(btf_prog_table->table, obj, hash, info->id) { - if (obj->btf_id == info->id) - printf("%s%u", n++ == 0 ? " prog_ids " : ",", - obj->obj_id); + hashmap__for_each_key_entry(btf_prog_table, entry, + u32_as_hash_field(info->id)) { + printf("%s%u", n++ == 0 ? " prog_ids " : ",", + hash_field_as_u32(entry->value)); } n = 0; - hash_for_each_possible(btf_map_table->table, obj, hash, info->id) { - if (obj->btf_id == info->id) - printf("%s%u", n++ == 0 ? " map_ids " : ",", - obj->obj_id); + hashmap__for_each_key_entry(btf_map_table, entry, + u32_as_hash_field(info->id)) { + printf("%s%u", n++ == 0 ? " map_ids " : ",", + hash_field_as_u32(entry->value)); } - emit_obj_refs_plain(&refs_table, info->id, "\n\tpids "); + + emit_obj_refs_plain(refs_table, info->id, "\n\tpids "); printf("\n"); } static void show_btf_json(struct bpf_btf_info *info, int fd, - struct btf_attach_table *btf_prog_table, - struct btf_attach_table *btf_map_table) + struct hashmap *btf_prog_table, + struct hashmap *btf_map_table) { - struct btf_attach_point *obj; + struct hashmap_entry *entry; const char *name = u64_to_ptr(info->name); jsonw_start_object(json_wtr); /* btf object */ @@ -835,23 +815,21 @@ show_btf_json(struct bpf_btf_info *info, int fd, jsonw_name(json_wtr, "prog_ids"); jsonw_start_array(json_wtr); /* prog_ids */ - hash_for_each_possible(btf_prog_table->table, obj, hash, - info->id) { - if (obj->btf_id == info->id) - jsonw_uint(json_wtr, obj->obj_id); + hashmap__for_each_key_entry(btf_prog_table, entry, + u32_as_hash_field(info->id)) { + jsonw_uint(json_wtr, hash_field_as_u32(entry->value)); } jsonw_end_array(json_wtr); /* prog_ids */ jsonw_name(json_wtr, "map_ids"); jsonw_start_array(json_wtr); /* map_ids */ - hash_for_each_possible(btf_map_table->table, obj, hash, - info->id) { - if (obj->btf_id == info->id) - jsonw_uint(json_wtr, obj->obj_id); + hashmap__for_each_key_entry(btf_map_table, entry, + u32_as_hash_field(info->id)) { + jsonw_uint(json_wtr, hash_field_as_u32(entry->value)); } jsonw_end_array(json_wtr); /* map_ids */ - emit_obj_refs_json(&refs_table, info->id, json_wtr); /* pids */ + emit_obj_refs_json(refs_table, info->id, json_wtr); /* pids */ jsonw_bool_field(json_wtr, "kernel", info->kernel_btf); @@ -862,8 +840,8 @@ show_btf_json(struct bpf_btf_info *info, int fd, } static int -show_btf(int fd, struct btf_attach_table *btf_prog_table, - struct btf_attach_table *btf_map_table) +show_btf(int fd, struct hashmap *btf_prog_table, + struct hashmap *btf_map_table) { struct bpf_btf_info info; __u32 len = sizeof(info); @@ -900,8 +878,8 @@ show_btf(int fd, struct btf_attach_table *btf_prog_table, static int do_show(int argc, char **argv) { - struct btf_attach_table btf_prog_table; - struct btf_attach_table btf_map_table; + struct hashmap *btf_prog_table; + struct hashmap *btf_map_table; int err, fd = -1; __u32 id = 0; @@ -917,9 +895,19 @@ static int do_show(int argc, char **argv) return BAD_ARG(); } - hash_init(btf_prog_table.table); - hash_init(btf_map_table.table); - err = build_btf_tables(&btf_prog_table, &btf_map_table); + btf_prog_table = hashmap__new(hash_fn_for_key_as_id, + equal_fn_for_key_as_id, NULL); + btf_map_table = hashmap__new(hash_fn_for_key_as_id, + equal_fn_for_key_as_id, NULL); + if (!btf_prog_table || !btf_map_table) { + hashmap__free(btf_prog_table); + hashmap__free(btf_map_table); + if (fd >= 0) + close(fd); + p_err("failed to create hashmap for object references"); + return -1; + } + err = build_btf_tables(btf_prog_table, btf_map_table); if (err) { if (fd >= 0) close(fd); @@ -928,7 +916,7 @@ static int do_show(int argc, char **argv) build_obj_refs_table(&refs_table, BPF_OBJ_BTF); if (fd >= 0) { - err = show_btf(fd, &btf_prog_table, &btf_map_table); + err = show_btf(fd, btf_prog_table, btf_map_table); close(fd); goto exit_free; } @@ -960,7 +948,7 @@ static int do_show(int argc, char **argv) break; } - err = show_btf(fd, &btf_prog_table, &btf_map_table); + err = show_btf(fd, btf_prog_table, btf_map_table); close(fd); if (err) break; @@ -970,9 +958,9 @@ static int do_show(int argc, char **argv) jsonw_end_array(json_wtr); /* root array */ exit_free: - delete_btf_table(&btf_prog_table); - delete_btf_table(&btf_map_table); - delete_obj_refs_table(&refs_table); + hashmap__free(btf_prog_table); + hashmap__free(btf_map_table); + delete_obj_refs_table(refs_table); return err; } diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index d42d930a3ec4..511eccdbdfe6 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -22,6 +22,7 @@ #include <sys/vfs.h> #include <bpf/bpf.h> +#include <bpf/hashmap.h> #include <bpf/libbpf.h> /* libbpf_num_possible_cpus */ #include "main.h" @@ -393,7 +394,7 @@ void print_hex_data_json(uint8_t *data, size_t len) } /* extra params for nftw cb */ -static struct pinned_obj_table *build_fn_table; +static struct hashmap *build_fn_table; static enum bpf_obj_type build_fn_type; static int do_build_table_cb(const char *fpath, const struct stat *sb, @@ -401,9 +402,9 @@ static int do_build_table_cb(const char *fpath, const struct stat *sb, { struct bpf_prog_info pinned_info; __u32 len = sizeof(pinned_info); - struct pinned_obj *obj_node; enum bpf_obj_type objtype; int fd, err = 0; + char *path; if (typeflag != FTW_F) goto out_ret; @@ -420,28 +421,26 @@ static int do_build_table_cb(const char *fpath, const struct stat *sb, if (bpf_obj_get_info_by_fd(fd, &pinned_info, &len)) goto out_close; - obj_node = calloc(1, sizeof(*obj_node)); - if (!obj_node) { + path = strdup(fpath); + if (!path) { err = -1; goto out_close; } - obj_node->id = pinned_info.id; - obj_node->path = strdup(fpath); - if (!obj_node->path) { - err = -1; - free(obj_node); + err = hashmap__append(build_fn_table, u32_as_hash_field(pinned_info.id), path); + if (err) { + p_err("failed to append entry to hashmap for ID %u, path '%s': %s", + pinned_info.id, path, strerror(errno)); goto out_close; } - hash_add(build_fn_table->table, &obj_node->hash, obj_node->id); out_close: close(fd); out_ret: return err; } -int build_pinned_obj_table(struct pinned_obj_table *tab, +int build_pinned_obj_table(struct hashmap *tab, enum bpf_obj_type type) { struct mntent *mntent = NULL; @@ -470,17 +469,18 @@ int build_pinned_obj_table(struct pinned_obj_table *tab, return err; } -void delete_pinned_obj_table(struct pinned_obj_table *tab) +void delete_pinned_obj_table(struct hashmap *map) { - struct pinned_obj *obj; - struct hlist_node *tmp; - unsigned int bkt; + struct hashmap_entry *entry; + size_t bkt; - hash_for_each_safe(tab->table, bkt, tmp, obj, hash) { - hash_del(&obj->hash); - free(obj->path); - free(obj); - } + if (!map) + return; + + hashmap__for_each_entry(map, entry, bkt) + free(entry->value); + + hashmap__free(map); } unsigned int get_page_size(void) @@ -962,3 +962,13 @@ int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len) return fd; } + +size_t hash_fn_for_key_as_id(const void *key, void *ctx) +{ + return (size_t)key; +} + +bool equal_fn_for_key_as_id(const void *k1, const void *k2, void *ctx) +{ + return k1 == k2; +} diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c index cc835859465b..5c18351290f0 100644 --- a/tools/bpf/bpftool/gen.c +++ b/tools/bpf/bpftool/gen.c @@ -18,7 +18,6 @@ #include <sys/stat.h> #include <sys/mman.h> #include <bpf/btf.h> -#include <bpf/bpf_gen_internal.h> #include "json_writer.h" #include "main.h" @@ -34,6 +33,11 @@ static void sanitize_identifier(char *name) name[i] = '_'; } +static bool str_has_prefix(const char *str, const char *prefix) +{ + return strncmp(str, prefix, strlen(prefix)) == 0; +} + static bool str_has_suffix(const char *str, const char *suffix) { size_t i, n1 = strlen(str), n2 = strlen(suffix); @@ -68,23 +72,47 @@ static void get_header_guard(char *guard, const char *obj_name) guard[i] = toupper(guard[i]); } -static const char *get_map_ident(const struct bpf_map *map) +static bool get_map_ident(const struct bpf_map *map, char *buf, size_t buf_sz) { + static const char *sfxs[] = { ".data", ".rodata", ".bss", ".kconfig" }; const char *name = bpf_map__name(map); + int i, n; + + if (!bpf_map__is_internal(map)) { + snprintf(buf, buf_sz, "%s", name); + return true; + } + + for (i = 0, n = ARRAY_SIZE(sfxs); i < n; i++) { + const char *sfx = sfxs[i], *p; - if (!bpf_map__is_internal(map)) - return name; - - if (str_has_suffix(name, ".data")) - return "data"; - else if (str_has_suffix(name, ".rodata")) - return "rodata"; - else if (str_has_suffix(name, ".bss")) - return "bss"; - else if (str_has_suffix(name, ".kconfig")) - return "kconfig"; - else - return NULL; + p = strstr(name, sfx); + if (p) { + snprintf(buf, buf_sz, "%s", p + 1); + sanitize_identifier(buf); + return true; + } + } + + return false; +} + +static bool get_datasec_ident(const char *sec_name, char *buf, size_t buf_sz) +{ + static const char *pfxs[] = { ".data", ".rodata", ".bss", ".kconfig" }; + int i, n; + + for (i = 0, n = ARRAY_SIZE(pfxs); i < n; i++) { + const char *pfx = pfxs[i]; + + if (str_has_prefix(sec_name, pfx)) { + snprintf(buf, buf_sz, "%s", sec_name + 1); + sanitize_identifier(buf); + return true; + } + } + + return false; } static void codegen_btf_dump_printf(void *ctx, const char *fmt, va_list args) @@ -101,24 +129,14 @@ static int codegen_datasec_def(struct bpf_object *obj, const char *sec_name = btf__name_by_offset(btf, sec->name_off); const struct btf_var_secinfo *sec_var = btf_var_secinfos(sec); int i, err, off = 0, pad_cnt = 0, vlen = btf_vlen(sec); - const char *sec_ident; - char var_ident[256]; + char var_ident[256], sec_ident[256]; bool strip_mods = false; - if (strcmp(sec_name, ".data") == 0) { - sec_ident = "data"; - strip_mods = true; - } else if (strcmp(sec_name, ".bss") == 0) { - sec_ident = "bss"; - strip_mods = true; - } else if (strcmp(sec_name, ".rodata") == 0) { - sec_ident = "rodata"; - strip_mods = true; - } else if (strcmp(sec_name, ".kconfig") == 0) { - sec_ident = "kconfig"; - } else { + if (!get_datasec_ident(sec_name, sec_ident, sizeof(sec_ident))) return 0; - } + + if (strcmp(sec_name, ".kconfig") != 0) + strip_mods = true; printf(" struct %s__%s {\n", obj_name, sec_ident); for (i = 0; i < vlen; i++, sec_var++) { @@ -193,24 +211,63 @@ static int codegen_datasec_def(struct bpf_object *obj, static int codegen_datasecs(struct bpf_object *obj, const char *obj_name) { struct btf *btf = bpf_object__btf(obj); - int n = btf__get_nr_types(btf); + int n = btf__type_cnt(btf); struct btf_dump *d; + struct bpf_map *map; + const struct btf_type *sec; + char sec_ident[256], map_ident[256]; int i, err = 0; d = btf_dump__new(btf, NULL, NULL, codegen_btf_dump_printf); if (IS_ERR(d)) return PTR_ERR(d); - for (i = 1; i <= n; i++) { - const struct btf_type *t = btf__type_by_id(btf, i); + bpf_object__for_each_map(map, obj) { + /* only generate definitions for memory-mapped internal maps */ + if (!bpf_map__is_internal(map)) + continue; + if (!(bpf_map__def(map)->map_flags & BPF_F_MMAPABLE)) + continue; - if (!btf_is_datasec(t)) + if (!get_map_ident(map, map_ident, sizeof(map_ident))) continue; - err = codegen_datasec_def(obj, btf, d, t, obj_name); - if (err) - goto out; + sec = NULL; + for (i = 1; i < n; i++) { + const struct btf_type *t = btf__type_by_id(btf, i); + const char *name; + + if (!btf_is_datasec(t)) + continue; + + name = btf__str_by_offset(btf, t->name_off); + if (!get_datasec_ident(name, sec_ident, sizeof(sec_ident))) + continue; + + if (strcmp(sec_ident, map_ident) == 0) { + sec = t; + break; + } + } + + /* In some cases (e.g., sections like .rodata.cst16 containing + * compiler allocated string constants only) there will be + * special internal maps with no corresponding DATASEC BTF + * type. In such case, generate empty structs for each such + * map. It will still be memory-mapped and its contents + * accessible from user-space through BPF skeleton. + */ + if (!sec) { + printf(" struct %s__%s {\n", obj_name, map_ident); + printf(" } *%s;\n", map_ident); + } else { + err = codegen_datasec_def(obj, btf, d, sec, obj_name); + if (err) + goto out; + } } + + out: btf_dump__free(d); return err; @@ -386,6 +443,7 @@ static void codegen_destroy(struct bpf_object *obj, const char *obj_name) { struct bpf_program *prog; struct bpf_map *map; + char ident[256]; codegen("\ \n\ @@ -406,10 +464,7 @@ static void codegen_destroy(struct bpf_object *obj, const char *obj_name) } bpf_object__for_each_map(map, obj) { - const char *ident; - - ident = get_map_ident(map); - if (!ident) + if (!get_map_ident(map, ident, sizeof(ident))) continue; if (bpf_map__is_internal(map) && (bpf_map__def(map)->map_flags & BPF_F_MMAPABLE)) @@ -433,6 +488,7 @@ static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *h struct bpf_object_load_attr load_attr = {}; DECLARE_LIBBPF_OPTS(gen_loader_opts, opts); struct bpf_map *map; + char ident[256]; int err = 0; err = bpf_object__gen_loader(obj, &opts); @@ -478,12 +534,10 @@ static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *h ", obj_name, opts.data_sz); bpf_object__for_each_map(map, obj) { - const char *ident; const void *mmap_data = NULL; size_t mmap_size = 0; - ident = get_map_ident(map); - if (!ident) + if (!get_map_ident(map, ident, sizeof(ident))) continue; if (!bpf_map__is_internal(map) || @@ -545,15 +599,15 @@ static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *h return err; \n\ ", obj_name); bpf_object__for_each_map(map, obj) { - const char *ident, *mmap_flags; + const char *mmap_flags; - ident = get_map_ident(map); - if (!ident) + if (!get_map_ident(map, ident, sizeof(ident))) continue; if (!bpf_map__is_internal(map) || !(bpf_map__def(map)->map_flags & BPF_F_MMAPABLE)) continue; + if (bpf_map__def(map)->map_flags & BPF_F_RDONLY_PROG) mmap_flags = "PROT_READ"; else @@ -603,7 +657,8 @@ static int do_skeleton(int argc, char **argv) DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts); char obj_name[MAX_OBJ_NAME_LEN] = "", *obj_data; struct bpf_object *obj = NULL; - const char *file, *ident; + const char *file; + char ident[256]; struct bpf_program *prog; int fd, err = -1; struct bpf_map *map; @@ -674,8 +729,7 @@ static int do_skeleton(int argc, char **argv) } bpf_object__for_each_map(map, obj) { - ident = get_map_ident(map); - if (!ident) { + if (!get_map_ident(map, ident, sizeof(ident))) { p_err("ignoring unrecognized internal map '%s'...", bpf_map__name(map)); continue; @@ -728,8 +782,7 @@ static int do_skeleton(int argc, char **argv) if (map_cnt) { printf("\tstruct {\n"); bpf_object__for_each_map(map, obj) { - ident = get_map_ident(map); - if (!ident) + if (!get_map_ident(map, ident, sizeof(ident))) continue; if (use_loader) printf("\t\tstruct bpf_map_desc %s;\n", ident); @@ -898,9 +951,7 @@ static int do_skeleton(int argc, char **argv) ); i = 0; bpf_object__for_each_map(map, obj) { - ident = get_map_ident(map); - - if (!ident) + if (!get_map_ident(map, ident, sizeof(ident))) continue; codegen("\ diff --git a/tools/bpf/bpftool/iter.c b/tools/bpf/bpftool/iter.c index 84a9b01d956d..6c0de647b8ad 100644 --- a/tools/bpf/bpftool/iter.c +++ b/tools/bpf/bpftool/iter.c @@ -57,7 +57,7 @@ static int do_pin(int argc, char **argv) goto close_obj; } - prog = bpf_program__next(NULL, obj); + prog = bpf_object__next_program(obj, NULL); if (!prog) { p_err("can't find bpf program in objfile %s", objfile); goto close_obj; diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c index 8cc3e36f8cc6..2c258db0d352 100644 --- a/tools/bpf/bpftool/link.c +++ b/tools/bpf/bpftool/link.c @@ -7,6 +7,7 @@ #include <unistd.h> #include <bpf/bpf.h> +#include <bpf/hashmap.h> #include "json_writer.h" #include "main.h" @@ -20,6 +21,8 @@ static const char * const link_type_name[] = { [BPF_LINK_TYPE_NETNS] = "netns", }; +static struct hashmap *link_table; + static int link_parse_fd(int *argc, char ***argv) { int fd; @@ -156,19 +159,18 @@ static int show_link_close_json(int fd, struct bpf_link_info *info) break; } - if (!hash_empty(link_table.table)) { - struct pinned_obj *obj; + if (!hashmap__empty(link_table)) { + struct hashmap_entry *entry; jsonw_name(json_wtr, "pinned"); jsonw_start_array(json_wtr); - hash_for_each_possible(link_table.table, obj, hash, info->id) { - if (obj->id == info->id) - jsonw_string(json_wtr, obj->path); - } + hashmap__for_each_key_entry(link_table, entry, + u32_as_hash_field(info->id)) + jsonw_string(json_wtr, entry->value); jsonw_end_array(json_wtr); } - emit_obj_refs_json(&refs_table, info->id, json_wtr); + emit_obj_refs_json(refs_table, info->id, json_wtr); jsonw_end_object(json_wtr); @@ -244,15 +246,14 @@ static int show_link_close_plain(int fd, struct bpf_link_info *info) break; } - if (!hash_empty(link_table.table)) { - struct pinned_obj *obj; + if (!hashmap__empty(link_table)) { + struct hashmap_entry *entry; - hash_for_each_possible(link_table.table, obj, hash, info->id) { - if (obj->id == info->id) - printf("\n\tpinned %s", obj->path); - } + hashmap__for_each_key_entry(link_table, entry, + u32_as_hash_field(info->id)) + printf("\n\tpinned %s", (char *)entry->value); } - emit_obj_refs_plain(&refs_table, info->id, "\n\tpids "); + emit_obj_refs_plain(refs_table, info->id, "\n\tpids "); printf("\n"); @@ -302,8 +303,15 @@ static int do_show(int argc, char **argv) __u32 id = 0; int err, fd; - if (show_pinned) - build_pinned_obj_table(&link_table, BPF_OBJ_LINK); + if (show_pinned) { + link_table = hashmap__new(hash_fn_for_key_as_id, + equal_fn_for_key_as_id, NULL); + if (!link_table) { + p_err("failed to create hashmap for pinned paths"); + return -1; + } + build_pinned_obj_table(link_table, BPF_OBJ_LINK); + } build_obj_refs_table(&refs_table, BPF_OBJ_LINK); if (argc == 2) { @@ -344,7 +352,10 @@ static int do_show(int argc, char **argv) if (json_output) jsonw_end_array(json_wtr); - delete_obj_refs_table(&refs_table); + delete_obj_refs_table(refs_table); + + if (show_pinned) + delete_pinned_obj_table(link_table); return errno == ENOENT ? 0 : -1; } diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index 02eaaf065f65..28237d7cef67 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -10,8 +10,9 @@ #include <string.h> #include <bpf/bpf.h> -#include <bpf/libbpf.h> #include <bpf/btf.h> +#include <bpf/hashmap.h> +#include <bpf/libbpf.h> #include "main.h" @@ -31,10 +32,7 @@ bool verifier_logs; bool relaxed_maps; bool use_loader; struct btf *base_btf; -struct pinned_obj_table prog_table; -struct pinned_obj_table map_table; -struct pinned_obj_table link_table; -struct obj_refs_table refs_table; +struct hashmap *refs_table; static void __noreturn clean_and_exit(int i) { @@ -409,10 +407,6 @@ int main(int argc, char **argv) block_mount = false; bin_name = argv[0]; - hash_init(prog_table.table); - hash_init(map_table.table); - hash_init(link_table.table); - opterr = 0; while ((opt = getopt_long(argc, argv, "VhpjfLmndB:", options, NULL)) >= 0) { @@ -479,11 +473,6 @@ int main(int argc, char **argv) if (json_output) jsonw_destroy(&json_wtr); - if (show_pinned) { - delete_pinned_obj_table(&prog_table); - delete_pinned_obj_table(&map_table); - delete_pinned_obj_table(&link_table); - } btf__free(base_btf); return ret; diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 90caa42aac4c..383835c2604d 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -11,9 +11,9 @@ #include <linux/bpf.h> #include <linux/compiler.h> #include <linux/kernel.h> -#include <linux/hashtable.h> #include <tools/libc_compat.h> +#include <bpf/hashmap.h> #include <bpf/libbpf.h> #include "json_writer.h" @@ -91,10 +91,7 @@ extern bool verifier_logs; extern bool relaxed_maps; extern bool use_loader; extern struct btf *base_btf; -extern struct pinned_obj_table prog_table; -extern struct pinned_obj_table map_table; -extern struct pinned_obj_table link_table; -extern struct obj_refs_table refs_table; +extern struct hashmap *refs_table; void __printf(1, 2) p_err(const char *fmt, ...); void __printf(1, 2) p_info(const char *fmt, ...); @@ -108,28 +105,12 @@ void set_max_rlimit(void); int mount_tracefs(const char *target); -struct pinned_obj_table { - DECLARE_HASHTABLE(table, 16); -}; - -struct pinned_obj { - __u32 id; - char *path; - struct hlist_node hash; -}; - -struct obj_refs_table { - DECLARE_HASHTABLE(table, 16); -}; - struct obj_ref { int pid; char comm[16]; }; struct obj_refs { - struct hlist_node node; - __u32 id; int ref_cnt; struct obj_ref *refs; }; @@ -137,15 +118,15 @@ struct obj_refs { struct btf; struct bpf_line_info; -int build_pinned_obj_table(struct pinned_obj_table *table, +int build_pinned_obj_table(struct hashmap *table, enum bpf_obj_type type); -void delete_pinned_obj_table(struct pinned_obj_table *tab); -__weak int build_obj_refs_table(struct obj_refs_table *table, +void delete_pinned_obj_table(struct hashmap *table); +__weak int build_obj_refs_table(struct hashmap **table, enum bpf_obj_type type); -__weak void delete_obj_refs_table(struct obj_refs_table *table); -__weak void emit_obj_refs_json(struct obj_refs_table *table, __u32 id, +__weak void delete_obj_refs_table(struct hashmap *table); +__weak void emit_obj_refs_json(struct hashmap *table, __u32 id, json_writer_t *json_wtr); -__weak void emit_obj_refs_plain(struct obj_refs_table *table, __u32 id, +__weak void emit_obj_refs_plain(struct hashmap *table, __u32 id, const char *prefix); void print_dev_plain(__u32 ifindex, __u64 ns_dev, __u64 ns_inode); void print_dev_json(__u32 ifindex, __u64 ns_dev, __u64 ns_inode); @@ -259,4 +240,23 @@ int do_filter_dump(struct tcmsg *ifinfo, struct nlattr **tb, const char *kind, int print_all_levels(__maybe_unused enum libbpf_print_level level, const char *format, va_list args); + +size_t hash_fn_for_key_as_id(const void *key, void *ctx); +bool equal_fn_for_key_as_id(const void *k1, const void *k2, void *ctx); + +static inline void *u32_as_hash_field(__u32 x) +{ + return (void *)(uintptr_t)x; +} + +static inline __u32 hash_field_as_u32(const void *x) +{ + return (__u32)(uintptr_t)x; +} + +static inline bool hashmap__empty(struct hashmap *map) +{ + return map ? hashmap__size(map) == 0 : true; +} + #endif diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index 407071d54ab1..cae1f1119296 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -17,6 +17,7 @@ #include <bpf/bpf.h> #include <bpf/btf.h> +#include <bpf/hashmap.h> #include "json_writer.h" #include "main.h" @@ -56,6 +57,8 @@ const char * const map_type_name[] = { const size_t map_type_name_size = ARRAY_SIZE(map_type_name); +static struct hashmap *map_table; + static bool map_is_per_cpu(__u32 type) { return type == BPF_MAP_TYPE_PERCPU_HASH || @@ -535,19 +538,18 @@ static int show_map_close_json(int fd, struct bpf_map_info *info) if (info->btf_id) jsonw_int_field(json_wtr, "btf_id", info->btf_id); - if (!hash_empty(map_table.table)) { - struct pinned_obj *obj; + if (!hashmap__empty(map_table)) { + struct hashmap_entry *entry; jsonw_name(json_wtr, "pinned"); jsonw_start_array(json_wtr); - hash_for_each_possible(map_table.table, obj, hash, info->id) { - if (obj->id == info->id) - jsonw_string(json_wtr, obj->path); - } + hashmap__for_each_key_entry(map_table, entry, + u32_as_hash_field(info->id)) + jsonw_string(json_wtr, entry->value); jsonw_end_array(json_wtr); } - emit_obj_refs_json(&refs_table, info->id, json_wtr); + emit_obj_refs_json(refs_table, info->id, json_wtr); jsonw_end_object(json_wtr); @@ -610,13 +612,12 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info) } close(fd); - if (!hash_empty(map_table.table)) { - struct pinned_obj *obj; + if (!hashmap__empty(map_table)) { + struct hashmap_entry *entry; - hash_for_each_possible(map_table.table, obj, hash, info->id) { - if (obj->id == info->id) - printf("\n\tpinned %s", obj->path); - } + hashmap__for_each_key_entry(map_table, entry, + u32_as_hash_field(info->id)) + printf("\n\tpinned %s", (char *)entry->value); } printf("\n"); @@ -636,7 +637,7 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info) if (frozen) printf("%sfrozen", info->btf_id ? " " : ""); - emit_obj_refs_plain(&refs_table, info->id, "\n\tpids "); + emit_obj_refs_plain(refs_table, info->id, "\n\tpids "); printf("\n"); return 0; @@ -694,8 +695,15 @@ static int do_show(int argc, char **argv) int err; int fd; - if (show_pinned) - build_pinned_obj_table(&map_table, BPF_OBJ_MAP); + if (show_pinned) { + map_table = hashmap__new(hash_fn_for_key_as_id, + equal_fn_for_key_as_id, NULL); + if (!map_table) { + p_err("failed to create hashmap for pinned paths"); + return -1; + } + build_pinned_obj_table(map_table, BPF_OBJ_MAP); + } build_obj_refs_table(&refs_table, BPF_OBJ_MAP); if (argc == 2) @@ -740,7 +748,10 @@ static int do_show(int argc, char **argv) if (json_output) jsonw_end_array(json_wtr); - delete_obj_refs_table(&refs_table); + delete_obj_refs_table(refs_table); + + if (show_pinned) + delete_pinned_obj_table(map_table); return errno == ENOENT ? 0 : -1; } diff --git a/tools/bpf/bpftool/map_perf_ring.c b/tools/bpf/bpftool/map_perf_ring.c index 825f29f93a57..b98ea702d284 100644 --- a/tools/bpf/bpftool/map_perf_ring.c +++ b/tools/bpf/bpftool/map_perf_ring.c @@ -22,7 +22,6 @@ #include <sys/syscall.h> #include <bpf/bpf.h> -#include <perf-sys.h> #include "main.h" diff --git a/tools/bpf/bpftool/pids.c b/tools/bpf/bpftool/pids.c index 477e55d59c34..56b598eee043 100644 --- a/tools/bpf/bpftool/pids.c +++ b/tools/bpf/bpftool/pids.c @@ -6,35 +6,37 @@ #include <stdlib.h> #include <string.h> #include <unistd.h> + #include <bpf/bpf.h> +#include <bpf/hashmap.h> #include "main.h" #include "skeleton/pid_iter.h" #ifdef BPFTOOL_WITHOUT_SKELETONS -int build_obj_refs_table(struct obj_refs_table *table, enum bpf_obj_type type) +int build_obj_refs_table(struct hashmap **map, enum bpf_obj_type type) { return -ENOTSUP; } -void delete_obj_refs_table(struct obj_refs_table *table) {} -void emit_obj_refs_plain(struct obj_refs_table *table, __u32 id, const char *prefix) {} -void emit_obj_refs_json(struct obj_refs_table *table, __u32 id, json_writer_t *json_writer) {} +void delete_obj_refs_table(struct hashmap *map) {} +void emit_obj_refs_plain(struct hashmap *map, __u32 id, const char *prefix) {} +void emit_obj_refs_json(struct hashmap *map, __u32 id, json_writer_t *json_writer) {} #else /* BPFTOOL_WITHOUT_SKELETONS */ #include "pid_iter.skel.h" -static void add_ref(struct obj_refs_table *table, struct pid_iter_entry *e) +static void add_ref(struct hashmap *map, struct pid_iter_entry *e) { + struct hashmap_entry *entry; struct obj_refs *refs; struct obj_ref *ref; + int err, i; void *tmp; - int i; - hash_for_each_possible(table->table, refs, node, e->id) { - if (refs->id != e->id) - continue; + hashmap__for_each_key_entry(map, entry, u32_as_hash_field(e->id)) { + refs = entry->value; for (i = 0; i < refs->ref_cnt; i++) { if (refs->refs[i].pid == e->pid) @@ -64,7 +66,6 @@ static void add_ref(struct obj_refs_table *table, struct pid_iter_entry *e) return; } - refs->id = e->id; refs->refs = malloc(sizeof(*refs->refs)); if (!refs->refs) { free(refs); @@ -76,7 +77,11 @@ static void add_ref(struct obj_refs_table *table, struct pid_iter_entry *e) ref->pid = e->pid; memcpy(ref->comm, e->comm, sizeof(ref->comm)); refs->ref_cnt = 1; - hash_add(table->table, &refs->node, e->id); + + err = hashmap__append(map, u32_as_hash_field(e->id), refs); + if (err) + p_err("failed to append entry to hashmap for ID %u: %s", + e->id, strerror(errno)); } static int __printf(2, 0) @@ -87,7 +92,7 @@ libbpf_print_none(__maybe_unused enum libbpf_print_level level, return 0; } -int build_obj_refs_table(struct obj_refs_table *table, enum bpf_obj_type type) +int build_obj_refs_table(struct hashmap **map, enum bpf_obj_type type) { struct pid_iter_entry *e; char buf[4096 / sizeof(*e) * sizeof(*e)]; @@ -95,7 +100,11 @@ int build_obj_refs_table(struct obj_refs_table *table, enum bpf_obj_type type) int err, ret, fd = -1, i; libbpf_print_fn_t default_print; - hash_init(table->table); + *map = hashmap__new(hash_fn_for_key_as_id, equal_fn_for_key_as_id, NULL); + if (!*map) { + p_err("failed to create hashmap for PID references"); + return -1; + } set_max_rlimit(); skel = pid_iter_bpf__open(); @@ -151,7 +160,7 @@ int build_obj_refs_table(struct obj_refs_table *table, enum bpf_obj_type type) e = (void *)buf; for (i = 0; i < ret; i++, e++) { - add_ref(table, e); + add_ref(*map, e); } } err = 0; @@ -162,39 +171,44 @@ out: return err; } -void delete_obj_refs_table(struct obj_refs_table *table) +void delete_obj_refs_table(struct hashmap *map) { - struct obj_refs *refs; - struct hlist_node *tmp; - unsigned int bkt; + struct hashmap_entry *entry; + size_t bkt; + + if (!map) + return; + + hashmap__for_each_entry(map, entry, bkt) { + struct obj_refs *refs = entry->value; - hash_for_each_safe(table->table, bkt, tmp, refs, node) { - hash_del(&refs->node); free(refs->refs); free(refs); } + + hashmap__free(map); } -void emit_obj_refs_json(struct obj_refs_table *table, __u32 id, +void emit_obj_refs_json(struct hashmap *map, __u32 id, json_writer_t *json_writer) { - struct obj_refs *refs; - struct obj_ref *ref; - int i; + struct hashmap_entry *entry; - if (hash_empty(table->table)) + if (hashmap__empty(map)) return; - hash_for_each_possible(table->table, refs, node, id) { - if (refs->id != id) - continue; + hashmap__for_each_key_entry(map, entry, u32_as_hash_field(id)) { + struct obj_refs *refs = entry->value; + int i; + if (refs->ref_cnt == 0) break; jsonw_name(json_writer, "pids"); jsonw_start_array(json_writer); for (i = 0; i < refs->ref_cnt; i++) { - ref = &refs->refs[i]; + struct obj_ref *ref = &refs->refs[i]; + jsonw_start_object(json_writer); jsonw_int_field(json_writer, "pid", ref->pid); jsonw_string_field(json_writer, "comm", ref->comm); @@ -205,24 +219,24 @@ void emit_obj_refs_json(struct obj_refs_table *table, __u32 id, } } -void emit_obj_refs_plain(struct obj_refs_table *table, __u32 id, const char *prefix) +void emit_obj_refs_plain(struct hashmap *map, __u32 id, const char *prefix) { - struct obj_refs *refs; - struct obj_ref *ref; - int i; + struct hashmap_entry *entry; - if (hash_empty(table->table)) + if (hashmap__empty(map)) return; - hash_for_each_possible(table->table, refs, node, id) { - if (refs->id != id) - continue; + hashmap__for_each_key_entry(map, entry, u32_as_hash_field(id)) { + struct obj_refs *refs = entry->value; + int i; + if (refs->ref_cnt == 0) break; printf("%s", prefix); for (i = 0; i < refs->ref_cnt; i++) { - ref = &refs->refs[i]; + struct obj_ref *ref = &refs->refs[i]; + printf("%s%s(%d)", i == 0 ? "" : ", ", ref->comm, ref->pid); } break; diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 9c3e343b7d87..515d22952602 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -24,8 +24,8 @@ #include <bpf/bpf.h> #include <bpf/btf.h> +#include <bpf/hashmap.h> #include <bpf/libbpf.h> -#include <bpf/bpf_gen_internal.h> #include <bpf/skel_internal.h> #include "cfg.h" @@ -85,6 +85,8 @@ static const char * const attach_type_strings[] = { [__MAX_BPF_ATTACH_TYPE] = NULL, }; +static struct hashmap *prog_table; + static enum bpf_attach_type parse_attach_type(const char *str) { enum bpf_attach_type type; @@ -308,18 +310,12 @@ static void show_prog_metadata(int fd, __u32 num_maps) if (printed_header) jsonw_end_object(json_wtr); } else { - json_writer_t *btf_wtr = jsonw_new(stdout); + json_writer_t *btf_wtr; struct btf_dumper d = { .btf = btf, - .jw = btf_wtr, .is_plain_text = true, }; - if (!btf_wtr) { - p_err("jsonw alloc failed"); - goto out_free; - } - for (i = 0; i < vlen; i++, vsi++) { t_var = btf__type_by_id(btf, vsi->type); name = btf__name_by_offset(btf, t_var->name_off); @@ -329,6 +325,14 @@ static void show_prog_metadata(int fd, __u32 num_maps) if (!printed_header) { printf("\tmetadata:"); + + btf_wtr = jsonw_new(stdout); + if (!btf_wtr) { + p_err("jsonw alloc failed"); + goto out_free; + } + d.jw = btf_wtr, + printed_header = true; } @@ -415,19 +419,18 @@ static void print_prog_json(struct bpf_prog_info *info, int fd) if (info->btf_id) jsonw_int_field(json_wtr, "btf_id", info->btf_id); - if (!hash_empty(prog_table.table)) { - struct pinned_obj *obj; + if (!hashmap__empty(prog_table)) { + struct hashmap_entry *entry; jsonw_name(json_wtr, "pinned"); jsonw_start_array(json_wtr); - hash_for_each_possible(prog_table.table, obj, hash, info->id) { - if (obj->id == info->id) - jsonw_string(json_wtr, obj->path); - } + hashmap__for_each_key_entry(prog_table, entry, + u32_as_hash_field(info->id)) + jsonw_string(json_wtr, entry->value); jsonw_end_array(json_wtr); } - emit_obj_refs_json(&refs_table, info->id, json_wtr); + emit_obj_refs_json(refs_table, info->id, json_wtr); show_prog_metadata(fd, info->nr_map_ids); @@ -487,19 +490,18 @@ static void print_prog_plain(struct bpf_prog_info *info, int fd) if (info->nr_map_ids) show_prog_maps(fd, info->nr_map_ids); - if (!hash_empty(prog_table.table)) { - struct pinned_obj *obj; + if (!hashmap__empty(prog_table)) { + struct hashmap_entry *entry; - hash_for_each_possible(prog_table.table, obj, hash, info->id) { - if (obj->id == info->id) - printf("\n\tpinned %s", obj->path); - } + hashmap__for_each_key_entry(prog_table, entry, + u32_as_hash_field(info->id)) + printf("\n\tpinned %s", (char *)entry->value); } if (info->btf_id) printf("\n\tbtf_id %d", info->btf_id); - emit_obj_refs_plain(&refs_table, info->id, "\n\tpids "); + emit_obj_refs_plain(refs_table, info->id, "\n\tpids "); printf("\n"); @@ -566,8 +568,15 @@ static int do_show(int argc, char **argv) int err; int fd; - if (show_pinned) - build_pinned_obj_table(&prog_table, BPF_OBJ_PROG); + if (show_pinned) { + prog_table = hashmap__new(hash_fn_for_key_as_id, + equal_fn_for_key_as_id, NULL); + if (!prog_table) { + p_err("failed to create hashmap for pinned paths"); + return -1; + } + build_pinned_obj_table(prog_table, BPF_OBJ_PROG); + } build_obj_refs_table(&refs_table, BPF_OBJ_PROG); if (argc == 2) @@ -610,7 +619,10 @@ static int do_show(int argc, char **argv) if (json_output) jsonw_end_array(json_wtr); - delete_obj_refs_table(&refs_table); + delete_obj_refs_table(refs_table); + + if (show_pinned) + delete_pinned_obj_table(prog_table); return err; } @@ -1601,7 +1613,7 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) goto err_close_obj; if (first_prog_only) { - prog = bpf_program__next(NULL, obj); + prog = bpf_object__next_program(obj, NULL); if (!prog) { p_err("object file doesn't contain any bpf program"); goto err_close_obj; diff --git a/tools/bpf/resolve_btfids/Makefile b/tools/bpf/resolve_btfids/Makefile index 08b75e314ae7..751643f860b2 100644 --- a/tools/bpf/resolve_btfids/Makefile +++ b/tools/bpf/resolve_btfids/Makefile @@ -29,25 +29,30 @@ BPFOBJ := $(OUTPUT)/libbpf/libbpf.a LIBBPF_OUT := $(abspath $(dir $(BPFOBJ)))/ SUBCMDOBJ := $(OUTPUT)/libsubcmd/libsubcmd.a +LIBBPF_DESTDIR := $(LIBBPF_OUT) +LIBBPF_INCLUDE := $(LIBBPF_DESTDIR)include + BINARY := $(OUTPUT)/resolve_btfids BINARY_IN := $(BINARY)-in.o all: $(BINARY) -$(OUTPUT) $(OUTPUT)/libbpf $(OUTPUT)/libsubcmd: +$(OUTPUT) $(OUTPUT)/libsubcmd $(LIBBPF_OUT): $(call msg,MKDIR,,$@) $(Q)mkdir -p $(@) $(SUBCMDOBJ): fixdep FORCE | $(OUTPUT)/libsubcmd $(Q)$(MAKE) -C $(SUBCMD_SRC) OUTPUT=$(abspath $(dir $@))/ $(abspath $@) -$(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(OUTPUT)/libbpf - $(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC) OUTPUT=$(LIBBPF_OUT) $(abspath $@) +$(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(LIBBPF_OUT) + $(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC) OUTPUT=$(LIBBPF_OUT) \ + DESTDIR=$(LIBBPF_DESTDIR) prefix= \ + $(abspath $@) install_headers CFLAGS := -g \ -I$(srctree)/tools/include \ -I$(srctree)/tools/include/uapi \ - -I$(LIBBPF_SRC) \ + -I$(LIBBPF_INCLUDE) \ -I$(SUBCMD_SRC) LIBS = -lelf -lz @@ -65,7 +70,8 @@ $(BINARY): $(BPFOBJ) $(SUBCMDOBJ) $(BINARY_IN) clean_objects := $(wildcard $(OUTPUT)/*.o \ $(OUTPUT)/.*.o.cmd \ $(OUTPUT)/.*.o.d \ - $(OUTPUT)/libbpf \ + $(LIBBPF_OUT) \ + $(LIBBPF_DESTDIR) \ $(OUTPUT)/libsubcmd \ $(OUTPUT)/resolve_btfids) diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c index de6365b53c9c..a59cb0ee609c 100644 --- a/tools/bpf/resolve_btfids/main.c +++ b/tools/bpf/resolve_btfids/main.c @@ -60,8 +60,8 @@ #include <linux/rbtree.h> #include <linux/zalloc.h> #include <linux/err.h> -#include <btf.h> -#include <libbpf.h> +#include <bpf/btf.h> +#include <bpf/libbpf.h> #include <parse-options.h> #define BTF_IDS_SECTION ".BTF_ids" @@ -89,6 +89,7 @@ struct btf_id { struct object { const char *path; const char *btf; + const char *base_btf_path; struct { int fd; @@ -477,25 +478,36 @@ static int symbols_resolve(struct object *obj) int nr_structs = obj->nr_structs; int nr_unions = obj->nr_unions; int nr_funcs = obj->nr_funcs; + struct btf *base_btf = NULL; int err, type_id; struct btf *btf; __u32 nr_types; - btf = btf__parse(obj->btf ?: obj->path, NULL); + if (obj->base_btf_path) { + base_btf = btf__parse(obj->base_btf_path, NULL); + err = libbpf_get_error(base_btf); + if (err) { + pr_err("FAILED: load base BTF from %s: %s\n", + obj->base_btf_path, strerror(-err)); + return -1; + } + } + + btf = btf__parse_split(obj->btf ?: obj->path, base_btf); err = libbpf_get_error(btf); if (err) { pr_err("FAILED: load BTF from %s: %s\n", obj->btf ?: obj->path, strerror(-err)); - return -1; + goto out; } err = -1; - nr_types = btf__get_nr_types(btf); + nr_types = btf__type_cnt(btf); /* * Iterate all the BTF types and search for collected symbol IDs. */ - for (type_id = 1; type_id <= nr_types; type_id++) { + for (type_id = 1; type_id < nr_types; type_id++) { const struct btf_type *type; struct rb_root *root; struct btf_id *id; @@ -545,6 +557,7 @@ static int symbols_resolve(struct object *obj) err = 0; out: + btf__free(base_btf); btf__free(btf); return err; } @@ -678,7 +691,6 @@ static const char * const resolve_btfids_usage[] = { int main(int argc, const char **argv) { - bool no_fail = false; struct object obj = { .efile = { .idlist_shndx = -1, @@ -695,8 +707,8 @@ int main(int argc, const char **argv) "be more verbose (show errors, etc)"), OPT_STRING(0, "btf", &obj.btf, "BTF data", "BTF data"), - OPT_BOOLEAN(0, "no-fail", &no_fail, - "do not fail if " BTF_IDS_SECTION " section is not found"), + OPT_STRING('b', "btf_base", &obj.base_btf_path, "file", + "path of file providing base BTF"), OPT_END() }; int err = -1; @@ -717,10 +729,8 @@ int main(int argc, const char **argv) */ if (obj.efile.idlist_shndx == -1 || obj.efile.symbols_shndx == -1) { - if (no_fail) - return 0; - pr_err("FAILED to find needed sections\n"); - return -1; + pr_debug("Cannot find .BTF_ids or symbols sections, nothing to do\n"); + return 0; } if (symbols_collect(&obj)) diff --git a/tools/bpf/runqslower/Makefile b/tools/bpf/runqslower/Makefile index 3818ec511fd2..bbd1150578f7 100644 --- a/tools/bpf/runqslower/Makefile +++ b/tools/bpf/runqslower/Makefile @@ -9,9 +9,9 @@ BPFTOOL ?= $(DEFAULT_BPFTOOL) LIBBPF_SRC := $(abspath ../../lib/bpf) BPFOBJ_OUTPUT := $(OUTPUT)libbpf/ BPFOBJ := $(BPFOBJ_OUTPUT)libbpf.a -BPF_INCLUDE := $(BPFOBJ_OUTPUT) -INCLUDES := -I$(OUTPUT) -I$(BPF_INCLUDE) -I$(abspath ../../lib) \ - -I$(abspath ../../include/uapi) +BPF_DESTDIR := $(BPFOBJ_OUTPUT) +BPF_INCLUDE := $(BPF_DESTDIR)/include +INCLUDES := -I$(OUTPUT) -I$(BPF_INCLUDE) -I$(abspath ../../include/uapi) CFLAGS := -g -Wall # Try to detect best kernel BTF source @@ -33,7 +33,7 @@ endif .DELETE_ON_ERROR: -.PHONY: all clean runqslower +.PHONY: all clean runqslower libbpf_hdrs all: runqslower runqslower: $(OUTPUT)/runqslower @@ -46,13 +46,15 @@ clean: $(Q)$(RM) $(OUTPUT)runqslower $(Q)$(RM) -r .output +libbpf_hdrs: $(BPFOBJ) + $(OUTPUT)/runqslower: $(OUTPUT)/runqslower.o $(BPFOBJ) $(QUIET_LINK)$(CC) $(CFLAGS) $^ -lelf -lz -o $@ $(OUTPUT)/runqslower.o: runqslower.h $(OUTPUT)/runqslower.skel.h \ - $(OUTPUT)/runqslower.bpf.o + $(OUTPUT)/runqslower.bpf.o | libbpf_hdrs -$(OUTPUT)/runqslower.bpf.o: $(OUTPUT)/vmlinux.h runqslower.h +$(OUTPUT)/runqslower.bpf.o: $(OUTPUT)/vmlinux.h runqslower.h | libbpf_hdrs $(OUTPUT)/%.skel.h: $(OUTPUT)/%.bpf.o | $(BPFTOOL) $(QUIET_GEN)$(BPFTOOL) gen skeleton $< > $@ @@ -81,8 +83,10 @@ else endif $(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(BPFOBJ_OUTPUT) - $(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC) OUTPUT=$(BPFOBJ_OUTPUT) $@ + $(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC) OUTPUT=$(BPFOBJ_OUTPUT) \ + DESTDIR=$(BPFOBJ_OUTPUT) prefix= $(abspath $@) install_headers -$(DEFAULT_BPFTOOL): | $(BPFTOOL_OUTPUT) +$(DEFAULT_BPFTOOL): $(BPFOBJ) | $(BPFTOOL_OUTPUT) $(Q)$(MAKE) $(submake_extras) -C ../bpftool OUTPUT=$(BPFTOOL_OUTPUT) \ - CC=$(HOSTCC) LD=$(HOSTLD) + LIBBPF_OUTPUT=$(BPFOBJ_OUTPUT) \ + LIBBPF_DESTDIR=$(BPF_DESTDIR) CC=$(HOSTCC) LD=$(HOSTLD) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 6fc59d61937a..ba5af15e25f5 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -906,6 +906,7 @@ enum bpf_map_type { BPF_MAP_TYPE_RINGBUF, BPF_MAP_TYPE_INODE_STORAGE, BPF_MAP_TYPE_TASK_STORAGE, + BPF_MAP_TYPE_BLOOM_FILTER, }; /* Note that tracing related programs such as @@ -1274,6 +1275,13 @@ union bpf_attr { * struct stored as the * map value */ + /* Any per-map-type extra fields + * + * BPF_MAP_TYPE_BLOOM_FILTER - the lowest 4 bits indicate the + * number of hash functions (if 0, the bloom filter will default + * to using 5 hash functions). + */ + __u64 map_extra; }; struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ @@ -4909,6 +4917,27 @@ union bpf_attr { * Return * The number of bytes written to the buffer, or a negative error * in case of failure. + * + * struct unix_sock *bpf_skc_to_unix_sock(void *sk) + * Description + * Dynamically cast a *sk* pointer to a *unix_sock* pointer. + * Return + * *sk* if casting is valid, or **NULL** otherwise. + * + * long bpf_kallsyms_lookup_name(const char *name, int name_sz, int flags, u64 *res) + * Description + * Get the address of a kernel symbol, returned in *res*. *res* is + * set to 0 if the symbol is not found. + * Return + * On success, zero. On error, a negative value. + * + * **-EINVAL** if *flags* is not zero. + * + * **-EINVAL** if string *name* is not the same size as *name_sz*. + * + * **-ENOENT** if symbol is not found. + * + * **-EPERM** if caller does not have permission to obtain kernel address. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5089,6 +5118,8 @@ union bpf_attr { FN(task_pt_regs), \ FN(get_branch_snapshot), \ FN(trace_vprintk), \ + FN(skc_to_unix_sock), \ + FN(kallsyms_lookup_name), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper @@ -5613,6 +5644,7 @@ struct bpf_prog_info { __u64 run_time_ns; __u64 run_cnt; __u64 recursion_misses; + __u32 verified_insns; } __attribute__((aligned(8))); struct bpf_map_info { @@ -5630,6 +5662,8 @@ struct bpf_map_info { __u32 btf_id; __u32 btf_key_type_id; __u32 btf_value_type_id; + __u32 :32; /* alignment pad */ + __u64 map_extra; } __attribute__((aligned(8))); struct bpf_btf_info { diff --git a/tools/include/uapi/linux/btf.h b/tools/include/uapi/linux/btf.h index 642b6ecb37d7..deb12f755f0f 100644 --- a/tools/include/uapi/linux/btf.h +++ b/tools/include/uapi/linux/btf.h @@ -43,7 +43,7 @@ struct btf_type { * "size" tells the size of the type it is describing. * * "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT, - * FUNC, FUNC_PROTO, VAR and TAG. + * FUNC, FUNC_PROTO, VAR and DECL_TAG. * "type" is a type_id referring to another type. */ union { @@ -74,7 +74,7 @@ enum { BTF_KIND_VAR = 14, /* Variable */ BTF_KIND_DATASEC = 15, /* Section */ BTF_KIND_FLOAT = 16, /* Floating point */ - BTF_KIND_TAG = 17, /* Tag */ + BTF_KIND_DECL_TAG = 17, /* Decl Tag */ NR_BTF_KINDS, BTF_KIND_MAX = NR_BTF_KINDS - 1, @@ -174,14 +174,14 @@ struct btf_var_secinfo { __u32 size; }; -/* BTF_KIND_TAG is followed by a single "struct btf_tag" to describe +/* BTF_KIND_DECL_TAG is followed by a single "struct btf_decl_tag" to describe * additional information related to the tag applied location. * If component_idx == -1, the tag is applied to a struct, union, * variable or function. Otherwise, it is applied to a struct/union * member or a func argument, and component_idx indicates which member * or argument (0 ... vlen-1). */ -struct btf_tag { +struct btf_decl_tag { __s32 component_idx; }; diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index 0f766345506f..b393b5e82380 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -146,12 +146,6 @@ $(BPF_IN_SHARED): force $(BPF_GENERATED) @(test -f ../../include/uapi/linux/bpf_common.h -a -f ../../../include/uapi/linux/bpf_common.h && ( \ (diff -B ../../include/uapi/linux/bpf_common.h ../../../include/uapi/linux/bpf_common.h >/dev/null) || \ echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/bpf_common.h' differs from latest version at 'include/uapi/linux/bpf_common.h'" >&2 )) || true - @(test -f ../../include/uapi/linux/netlink.h -a -f ../../../include/uapi/linux/netlink.h && ( \ - (diff -B ../../include/uapi/linux/netlink.h ../../../include/uapi/linux/netlink.h >/dev/null) || \ - echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/netlink.h' differs from latest version at 'include/uapi/linux/netlink.h'" >&2 )) || true - @(test -f ../../include/uapi/linux/if_link.h -a -f ../../../include/uapi/linux/if_link.h && ( \ - (diff -B ../../include/uapi/linux/if_link.h ../../../include/uapi/linux/if_link.h >/dev/null) || \ - echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/if_link.h' differs from latest version at 'include/uapi/linux/if_link.h'" >&2 )) || true @(test -f ../../include/uapi/linux/if_xdp.h -a -f ../../../include/uapi/linux/if_xdp.h && ( \ (diff -B ../../include/uapi/linux/if_xdp.h ../../../include/uapi/linux/if_xdp.h >/dev/null) || \ echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/if_xdp.h' differs from latest version at 'include/uapi/linux/if_xdp.h'" >&2 )) || true @@ -208,8 +202,8 @@ check_abi: $(OUTPUT)libbpf.so $(VERSION_SCRIPT) exit 1; \ fi -HDR_MAJ_VERSION := $(shell grep -oE '^\#define LIBBPF_MAJOR_VERSION ([0-9]+)$$' libbpf_version.h | cut -d' ' -f3) -HDR_MIN_VERSION := $(shell grep -oE '^\#define LIBBPF_MINOR_VERSION ([0-9]+)$$' libbpf_version.h | cut -d' ' -f3) +HDR_MAJ_VERSION := $(shell grep -oE '^$(pound)define LIBBPF_MAJOR_VERSION ([0-9]+)$$' libbpf_version.h | cut -d' ' -f3) +HDR_MIN_VERSION := $(shell grep -oE '^$(pound)define LIBBPF_MINOR_VERSION ([0-9]+)$$' libbpf_version.h | cut -d' ' -f3) check_version: $(VERSION_SCRIPT) libbpf_version.h @if [ "$(HDR_MAJ_VERSION)" != "$(LIBBPF_MAJOR_VERSION)" ]; then \ @@ -241,15 +235,24 @@ install_lib: all_cmd $(call do_install_mkdir,$(libdir_SQ)); \ cp -fpR $(LIB_FILE) $(DESTDIR)$(libdir_SQ) -INSTALL_HEADERS = bpf.h libbpf.h btf.h libbpf_common.h libbpf_legacy.h xsk.h \ - bpf_helpers.h $(BPF_GENERATED) bpf_tracing.h \ - bpf_endian.h bpf_core_read.h skel_internal.h \ - libbpf_version.h +SRC_HDRS := bpf.h libbpf.h btf.h libbpf_common.h libbpf_legacy.h xsk.h \ + bpf_helpers.h bpf_tracing.h bpf_endian.h bpf_core_read.h \ + skel_internal.h libbpf_version.h +GEN_HDRS := $(BPF_GENERATED) -install_headers: $(BPF_GENERATED) - $(call QUIET_INSTALL, headers) \ - $(foreach hdr,$(INSTALL_HEADERS), \ - $(call do_install,$(hdr),$(prefix)/include/bpf,644);) +INSTALL_PFX := $(DESTDIR)$(prefix)/include/bpf +INSTALL_SRC_HDRS := $(addprefix $(INSTALL_PFX)/, $(SRC_HDRS)) +INSTALL_GEN_HDRS := $(addprefix $(INSTALL_PFX)/, $(notdir $(GEN_HDRS))) + +$(INSTALL_SRC_HDRS): $(INSTALL_PFX)/%.h: %.h + $(call QUIET_INSTALL, $@) \ + $(call do_install,$<,$(prefix)/include/bpf,644) + +$(INSTALL_GEN_HDRS): $(INSTALL_PFX)/%.h: $(OUTPUT)%.h + $(call QUIET_INSTALL, $@) \ + $(call do_install,$<,$(prefix)/include/bpf,644) + +install_headers: $(BPF_GENERATED) $(INSTALL_SRC_HDRS) $(INSTALL_GEN_HDRS) install_pkgconfig: $(PC_FILE) $(call QUIET_INSTALL, $(PC_FILE)) \ diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 2401fad090c5..c09cbb868c9f 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -65,19 +65,28 @@ static inline int sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr, return syscall(__NR_bpf, cmd, attr, size); } +static inline int sys_bpf_fd(enum bpf_cmd cmd, union bpf_attr *attr, + unsigned int size) +{ + int fd; + + fd = sys_bpf(cmd, attr, size); + return ensure_good_fd(fd); +} + static inline int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size) { int retries = 5; int fd; do { - fd = sys_bpf(BPF_PROG_LOAD, attr, size); + fd = sys_bpf_fd(BPF_PROG_LOAD, attr, size); } while (fd < 0 && errno == EAGAIN && retries-- > 0); return fd; } -int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr) +int libbpf__bpf_create_map_xattr(const struct bpf_create_map_params *create_attr) { union bpf_attr attr; int fd; @@ -102,11 +111,36 @@ int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr) create_attr->btf_vmlinux_value_type_id; else attr.inner_map_fd = create_attr->inner_map_fd; + attr.map_extra = create_attr->map_extra; - fd = sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr)); + fd = sys_bpf_fd(BPF_MAP_CREATE, &attr, sizeof(attr)); return libbpf_err_errno(fd); } +int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr) +{ + struct bpf_create_map_params p = {}; + + p.map_type = create_attr->map_type; + p.key_size = create_attr->key_size; + p.value_size = create_attr->value_size; + p.max_entries = create_attr->max_entries; + p.map_flags = create_attr->map_flags; + p.name = create_attr->name; + p.numa_node = create_attr->numa_node; + p.btf_fd = create_attr->btf_fd; + p.btf_key_type_id = create_attr->btf_key_type_id; + p.btf_value_type_id = create_attr->btf_value_type_id; + p.map_ifindex = create_attr->map_ifindex; + if (p.map_type == BPF_MAP_TYPE_STRUCT_OPS) + p.btf_vmlinux_value_type_id = + create_attr->btf_vmlinux_value_type_id; + else + p.inner_map_fd = create_attr->inner_map_fd; + + return libbpf__bpf_create_map_xattr(&p); +} + int bpf_create_map_node(enum bpf_map_type map_type, const char *name, int key_size, int value_size, int max_entries, __u32 map_flags, int node) @@ -181,7 +215,7 @@ int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name, attr.numa_node = node; } - fd = sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr)); + fd = sys_bpf_fd(BPF_MAP_CREATE, &attr, sizeof(attr)); return libbpf_err_errno(fd); } @@ -264,6 +298,7 @@ int libbpf__bpf_prog_load(const struct bpf_prog_load_params *load_attr) attr.line_info_rec_size = load_attr->line_info_rec_size; attr.line_info_cnt = load_attr->line_info_cnt; attr.line_info = ptr_to_u64(load_attr->line_info); + attr.fd_array = ptr_to_u64(load_attr->fd_array); if (load_attr->name) memcpy(attr.prog_name, load_attr->name, @@ -608,7 +643,7 @@ int bpf_obj_get(const char *pathname) memset(&attr, 0, sizeof(attr)); attr.pathname = ptr_to_u64((void *)pathname); - fd = sys_bpf(BPF_OBJ_GET, &attr, sizeof(attr)); + fd = sys_bpf_fd(BPF_OBJ_GET, &attr, sizeof(attr)); return libbpf_err_errno(fd); } @@ -719,7 +754,7 @@ int bpf_link_create(int prog_fd, int target_fd, break; } proceed: - fd = sys_bpf(BPF_LINK_CREATE, &attr, sizeof(attr)); + fd = sys_bpf_fd(BPF_LINK_CREATE, &attr, sizeof(attr)); return libbpf_err_errno(fd); } @@ -762,7 +797,7 @@ int bpf_iter_create(int link_fd) memset(&attr, 0, sizeof(attr)); attr.iter_create.link_fd = link_fd; - fd = sys_bpf(BPF_ITER_CREATE, &attr, sizeof(attr)); + fd = sys_bpf_fd(BPF_ITER_CREATE, &attr, sizeof(attr)); return libbpf_err_errno(fd); } @@ -920,7 +955,7 @@ int bpf_prog_get_fd_by_id(__u32 id) memset(&attr, 0, sizeof(attr)); attr.prog_id = id; - fd = sys_bpf(BPF_PROG_GET_FD_BY_ID, &attr, sizeof(attr)); + fd = sys_bpf_fd(BPF_PROG_GET_FD_BY_ID, &attr, sizeof(attr)); return libbpf_err_errno(fd); } @@ -932,7 +967,7 @@ int bpf_map_get_fd_by_id(__u32 id) memset(&attr, 0, sizeof(attr)); attr.map_id = id; - fd = sys_bpf(BPF_MAP_GET_FD_BY_ID, &attr, sizeof(attr)); + fd = sys_bpf_fd(BPF_MAP_GET_FD_BY_ID, &attr, sizeof(attr)); return libbpf_err_errno(fd); } @@ -944,7 +979,7 @@ int bpf_btf_get_fd_by_id(__u32 id) memset(&attr, 0, sizeof(attr)); attr.btf_id = id; - fd = sys_bpf(BPF_BTF_GET_FD_BY_ID, &attr, sizeof(attr)); + fd = sys_bpf_fd(BPF_BTF_GET_FD_BY_ID, &attr, sizeof(attr)); return libbpf_err_errno(fd); } @@ -956,7 +991,7 @@ int bpf_link_get_fd_by_id(__u32 id) memset(&attr, 0, sizeof(attr)); attr.link_id = id; - fd = sys_bpf(BPF_LINK_GET_FD_BY_ID, &attr, sizeof(attr)); + fd = sys_bpf_fd(BPF_LINK_GET_FD_BY_ID, &attr, sizeof(attr)); return libbpf_err_errno(fd); } @@ -987,7 +1022,7 @@ int bpf_raw_tracepoint_open(const char *name, int prog_fd) attr.raw_tracepoint.name = ptr_to_u64(name); attr.raw_tracepoint.prog_fd = prog_fd; - fd = sys_bpf(BPF_RAW_TRACEPOINT_OPEN, &attr, sizeof(attr)); + fd = sys_bpf_fd(BPF_RAW_TRACEPOINT_OPEN, &attr, sizeof(attr)); return libbpf_err_errno(fd); } @@ -1007,7 +1042,7 @@ retry: attr.btf_log_buf = ptr_to_u64(log_buf); } - fd = sys_bpf(BPF_BTF_LOAD, &attr, sizeof(attr)); + fd = sys_bpf_fd(BPF_BTF_LOAD, &attr, sizeof(attr)); if (fd < 0 && !do_log && log_buf && log_buf_size) { do_log = true; @@ -1049,7 +1084,7 @@ int bpf_enable_stats(enum bpf_stats_type type) memset(&attr, 0, sizeof(attr)); attr.enable_stats.type = type; - fd = sys_bpf(BPF_ENABLE_STATS, &attr, sizeof(attr)); + fd = sys_bpf_fd(BPF_ENABLE_STATS, &attr, sizeof(attr)); return libbpf_err_errno(fd); } diff --git a/tools/lib/bpf/bpf_core_read.h b/tools/lib/bpf/bpf_core_read.h index 09ebe3db5f2f..e4aa9996a550 100644 --- a/tools/lib/bpf/bpf_core_read.h +++ b/tools/lib/bpf/bpf_core_read.h @@ -40,7 +40,7 @@ enum bpf_enum_value_kind { #define __CORE_RELO(src, field, info) \ __builtin_preserve_field_info((src)->field, BPF_FIELD_##info) -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ #define __CORE_BITFIELD_PROBE_READ(dst, src, fld) \ bpf_probe_read_kernel( \ (void *)dst, \ diff --git a/tools/lib/bpf/bpf_gen_internal.h b/tools/lib/bpf/bpf_gen_internal.h index 615400391e57..d26e5472fe50 100644 --- a/tools/lib/bpf/bpf_gen_internal.h +++ b/tools/lib/bpf/bpf_gen_internal.h @@ -7,6 +7,21 @@ struct ksym_relo_desc { const char *name; int kind; int insn_idx; + bool is_weak; + bool is_typeless; +}; + +struct ksym_desc { + const char *name; + int ref; + int kind; + union { + /* used for kfunc */ + int off; + /* used for typeless ksym */ + bool typeless; + }; + int insn; }; struct bpf_gen { @@ -24,18 +39,23 @@ struct bpf_gen { int relo_cnt; char attach_target[128]; int attach_kind; + struct ksym_desc *ksyms; + __u32 nr_ksyms; + int fd_array; + int nr_fd_array; }; void bpf_gen__init(struct bpf_gen *gen, int log_level); int bpf_gen__finish(struct bpf_gen *gen); void bpf_gen__free(struct bpf_gen *gen); void bpf_gen__load_btf(struct bpf_gen *gen, const void *raw_data, __u32 raw_size); -void bpf_gen__map_create(struct bpf_gen *gen, struct bpf_create_map_attr *map_attr, int map_idx); +void bpf_gen__map_create(struct bpf_gen *gen, struct bpf_create_map_params *map_attr, int map_idx); struct bpf_prog_load_params; void bpf_gen__prog_load(struct bpf_gen *gen, struct bpf_prog_load_params *load_attr, int prog_idx); void bpf_gen__map_update_elem(struct bpf_gen *gen, int map_idx, void *value, __u32 value_size); void bpf_gen__map_freeze(struct bpf_gen *gen, int map_idx); void bpf_gen__record_attach_target(struct bpf_gen *gen, const char *name, enum bpf_attach_type type); -void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, int kind, int insn_idx); +void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, bool is_weak, + bool is_typeless, int kind, int insn_idx); #endif diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h index d6bfbe009296..db05a5937105 100644 --- a/tools/lib/bpf/bpf_tracing.h +++ b/tools/lib/bpf/bpf_tracing.h @@ -24,6 +24,9 @@ #elif defined(__TARGET_ARCH_sparc) #define bpf_target_sparc #define bpf_target_defined +#elif defined(__TARGET_ARCH_riscv) + #define bpf_target_riscv + #define bpf_target_defined #else /* Fall back to what the compiler says */ @@ -48,6 +51,9 @@ #elif defined(__sparc__) #define bpf_target_sparc #define bpf_target_defined +#elif defined(__riscv) && __riscv_xlen == 64 + #define bpf_target_riscv + #define bpf_target_defined #endif /* no compiler target */ #endif @@ -288,6 +294,32 @@ struct pt_regs; #define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), pc) #endif +#elif defined(bpf_target_riscv) + +struct pt_regs; +#define PT_REGS_RV const volatile struct user_regs_struct +#define PT_REGS_PARM1(x) (((PT_REGS_RV *)(x))->a0) +#define PT_REGS_PARM2(x) (((PT_REGS_RV *)(x))->a1) +#define PT_REGS_PARM3(x) (((PT_REGS_RV *)(x))->a2) +#define PT_REGS_PARM4(x) (((PT_REGS_RV *)(x))->a3) +#define PT_REGS_PARM5(x) (((PT_REGS_RV *)(x))->a4) +#define PT_REGS_RET(x) (((PT_REGS_RV *)(x))->ra) +#define PT_REGS_FP(x) (((PT_REGS_RV *)(x))->s5) +#define PT_REGS_RC(x) (((PT_REGS_RV *)(x))->a5) +#define PT_REGS_SP(x) (((PT_REGS_RV *)(x))->sp) +#define PT_REGS_IP(x) (((PT_REGS_RV *)(x))->epc) + +#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), a0) +#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), a1) +#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), a2) +#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), a3) +#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), a4) +#define PT_REGS_RET_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), ra) +#define PT_REGS_FP_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), fp) +#define PT_REGS_RC_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), a5) +#define PT_REGS_SP_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), sp) +#define PT_REGS_IP_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), epc) + #endif #if defined(bpf_target_powerpc) diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 6ad63e4d418a..7e4c5586bd87 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -57,7 +57,7 @@ struct btf { * representation is broken up into three independently allocated * memory regions to be able to modify them independently. * raw_data is nulled out at that point, but can be later allocated - * and cached again if user calls btf__get_raw_data(), at which point + * and cached again if user calls btf__raw_data(), at which point * raw_data will contain a contiguous copy of header, types, and * strings: * @@ -189,12 +189,17 @@ int libbpf_ensure_mem(void **data, size_t *cap_cnt, size_t elem_sz, size_t need_ return 0; } +static void *btf_add_type_offs_mem(struct btf *btf, size_t add_cnt) +{ + return libbpf_add_mem((void **)&btf->type_offs, &btf->type_offs_cap, sizeof(__u32), + btf->nr_types, BTF_MAX_NR_TYPES, add_cnt); +} + static int btf_add_type_idx_entry(struct btf *btf, __u32 type_off) { __u32 *p; - p = libbpf_add_mem((void **)&btf->type_offs, &btf->type_offs_cap, sizeof(__u32), - btf->nr_types, BTF_MAX_NR_TYPES, 1); + p = btf_add_type_offs_mem(btf, 1); if (!p) return -ENOMEM; @@ -231,17 +236,23 @@ static int btf_parse_hdr(struct btf *btf) } btf_bswap_hdr(hdr); } else if (hdr->magic != BTF_MAGIC) { - pr_debug("Invalid BTF magic:%x\n", hdr->magic); + pr_debug("Invalid BTF magic: %x\n", hdr->magic); + return -EINVAL; + } + + if (btf->raw_size < hdr->hdr_len) { + pr_debug("BTF header len %u larger than data size %u\n", + hdr->hdr_len, btf->raw_size); return -EINVAL; } - meta_left = btf->raw_size - sizeof(*hdr); - if (meta_left < hdr->str_off + hdr->str_len) { - pr_debug("Invalid BTF total size:%u\n", btf->raw_size); + meta_left = btf->raw_size - hdr->hdr_len; + if (meta_left < (long long)hdr->str_off + hdr->str_len) { + pr_debug("Invalid BTF total size: %u\n", btf->raw_size); return -EINVAL; } - if (hdr->type_off + hdr->type_len > hdr->str_off) { + if ((long long)hdr->type_off + hdr->type_len > hdr->str_off) { pr_debug("Invalid BTF data sections layout: type data at %u + %u, strings data at %u + %u\n", hdr->type_off, hdr->type_len, hdr->str_off, hdr->str_len); return -EINVAL; @@ -304,8 +315,8 @@ static int btf_type_size(const struct btf_type *t) return base_size + sizeof(struct btf_var); case BTF_KIND_DATASEC: return base_size + vlen * sizeof(struct btf_var_secinfo); - case BTF_KIND_TAG: - return base_size + sizeof(struct btf_tag); + case BTF_KIND_DECL_TAG: + return base_size + sizeof(struct btf_decl_tag); default: pr_debug("Unsupported BTF_KIND:%u\n", btf_kind(t)); return -EINVAL; @@ -378,8 +389,8 @@ static int btf_bswap_type_rest(struct btf_type *t) v->size = bswap_32(v->size); } return 0; - case BTF_KIND_TAG: - btf_tag(t)->component_idx = bswap_32(btf_tag(t)->component_idx); + case BTF_KIND_DECL_TAG: + btf_decl_tag(t)->component_idx = bswap_32(btf_decl_tag(t)->component_idx); return 0; default: pr_debug("Unsupported BTF_KIND:%u\n", btf_kind(t)); @@ -430,6 +441,11 @@ __u32 btf__get_nr_types(const struct btf *btf) return btf->start_id + btf->nr_types - 1; } +__u32 btf__type_cnt(const struct btf *btf) +{ + return btf->start_id + btf->nr_types; +} + const struct btf *btf__base_btf(const struct btf *btf) { return btf->base_btf; @@ -461,8 +477,8 @@ static int determine_ptr_size(const struct btf *btf) if (btf->base_btf && btf->base_btf->ptr_sz > 0) return btf->base_btf->ptr_sz; - n = btf__get_nr_types(btf); - for (i = 1; i <= n; i++) { + n = btf__type_cnt(btf); + for (i = 1; i < n; i++) { t = btf__type_by_id(btf, i); if (!btf_is_int(t)) continue; @@ -522,9 +538,9 @@ int btf__set_pointer_size(struct btf *btf, size_t ptr_sz) static bool is_host_big_endian(void) { -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ return false; -#elif __BYTE_ORDER == __BIG_ENDIAN +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ return true; #else # error "Unrecognized __BYTE_ORDER__" @@ -591,7 +607,7 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id) case BTF_KIND_CONST: case BTF_KIND_RESTRICT: case BTF_KIND_VAR: - case BTF_KIND_TAG: + case BTF_KIND_DECL_TAG: type_id = t->type; break; case BTF_KIND_ARRAY: @@ -679,12 +695,12 @@ int btf__resolve_type(const struct btf *btf, __u32 type_id) __s32 btf__find_by_name(const struct btf *btf, const char *type_name) { - __u32 i, nr_types = btf__get_nr_types(btf); + __u32 i, nr_types = btf__type_cnt(btf); if (!strcmp(type_name, "void")) return 0; - for (i = 1; i <= nr_types; i++) { + for (i = 1; i < nr_types; i++) { const struct btf_type *t = btf__type_by_id(btf, i); const char *name = btf__name_by_offset(btf, t->name_off); @@ -695,15 +711,15 @@ __s32 btf__find_by_name(const struct btf *btf, const char *type_name) return libbpf_err(-ENOENT); } -__s32 btf__find_by_name_kind(const struct btf *btf, const char *type_name, - __u32 kind) +static __s32 btf_find_by_name_kind(const struct btf *btf, int start_id, + const char *type_name, __u32 kind) { - __u32 i, nr_types = btf__get_nr_types(btf); + __u32 i, nr_types = btf__type_cnt(btf); if (kind == BTF_KIND_UNKN || !strcmp(type_name, "void")) return 0; - for (i = 1; i <= nr_types; i++) { + for (i = start_id; i < nr_types; i++) { const struct btf_type *t = btf__type_by_id(btf, i); const char *name; @@ -717,6 +733,18 @@ __s32 btf__find_by_name_kind(const struct btf *btf, const char *type_name, return libbpf_err(-ENOENT); } +__s32 btf__find_by_name_kind_own(const struct btf *btf, const char *type_name, + __u32 kind) +{ + return btf_find_by_name_kind(btf, btf->start_id, type_name, kind); +} + +__s32 btf__find_by_name_kind(const struct btf *btf, const char *type_name, + __u32 kind) +{ + return btf_find_by_name_kind(btf, 1, type_name, kind); +} + static bool btf_is_modifiable(const struct btf *btf) { return (void *)btf->hdr != btf->raw_data; @@ -764,7 +792,7 @@ static struct btf *btf_new_empty(struct btf *base_btf) if (base_btf) { btf->base_btf = base_btf; - btf->start_id = btf__get_nr_types(base_btf) + 1; + btf->start_id = btf__type_cnt(base_btf); btf->start_str_off = base_btf->hdr->str_len; } @@ -814,7 +842,7 @@ static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf) if (base_btf) { btf->base_btf = base_btf; - btf->start_id = btf__get_nr_types(base_btf) + 1; + btf->start_id = btf__type_cnt(base_btf); btf->start_str_off = base_btf->hdr->str_len; } @@ -869,7 +897,7 @@ static struct btf *btf_parse_elf(const char *path, struct btf *base_btf, return ERR_PTR(-LIBBPF_ERRNO__LIBELF); } - fd = open(path, O_RDONLY); + fd = open(path, O_RDONLY | O_CLOEXEC); if (fd < 0) { err = -errno; pr_warn("failed to open %s: %s\n", path, strerror(errno)); @@ -1090,99 +1118,6 @@ struct btf *btf__parse_split(const char *path, struct btf *base_btf) return libbpf_ptr(btf_parse(path, base_btf, NULL)); } -static int compare_vsi_off(const void *_a, const void *_b) -{ - const struct btf_var_secinfo *a = _a; - const struct btf_var_secinfo *b = _b; - - return a->offset - b->offset; -} - -static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf, - struct btf_type *t) -{ - __u32 size = 0, off = 0, i, vars = btf_vlen(t); - const char *name = btf__name_by_offset(btf, t->name_off); - const struct btf_type *t_var; - struct btf_var_secinfo *vsi; - const struct btf_var *var; - int ret; - - if (!name) { - pr_debug("No name found in string section for DATASEC kind.\n"); - return -ENOENT; - } - - /* .extern datasec size and var offsets were set correctly during - * extern collection step, so just skip straight to sorting variables - */ - if (t->size) - goto sort_vars; - - ret = bpf_object__section_size(obj, name, &size); - if (ret || !size || (t->size && t->size != size)) { - pr_debug("Invalid size for section %s: %u bytes\n", name, size); - return -ENOENT; - } - - t->size = size; - - for (i = 0, vsi = btf_var_secinfos(t); i < vars; i++, vsi++) { - t_var = btf__type_by_id(btf, vsi->type); - var = btf_var(t_var); - - if (!btf_is_var(t_var)) { - pr_debug("Non-VAR type seen in section %s\n", name); - return -EINVAL; - } - - if (var->linkage == BTF_VAR_STATIC) - continue; - - name = btf__name_by_offset(btf, t_var->name_off); - if (!name) { - pr_debug("No name found in string section for VAR kind\n"); - return -ENOENT; - } - - ret = bpf_object__variable_offset(obj, name, &off); - if (ret) { - pr_debug("No offset found in symbol table for VAR %s\n", - name); - return -ENOENT; - } - - vsi->offset = off; - } - -sort_vars: - qsort(btf_var_secinfos(t), vars, sizeof(*vsi), compare_vsi_off); - return 0; -} - -int btf__finalize_data(struct bpf_object *obj, struct btf *btf) -{ - int err = 0; - __u32 i; - - for (i = 1; i <= btf->nr_types; i++) { - struct btf_type *t = btf_type_by_id(btf, i); - - /* Loader needs to fix up some of the things compiler - * couldn't get its hands on while emitting BTF. This - * is section size and global variable offset. We use - * the info from the ELF itself for this purpose. - */ - if (btf_is_datasec(t)) { - err = btf_fixup_datasec(obj, btf, t); - if (err) - break; - } - } - - return libbpf_err(err); -} - static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endian); int btf__load_into_kernel(struct btf *btf) @@ -1300,7 +1235,7 @@ err_out: return NULL; } -const void *btf__get_raw_data(const struct btf *btf_ro, __u32 *size) +const void *btf__raw_data(const struct btf *btf_ro, __u32 *size) { struct btf *btf = (struct btf *)btf_ro; __u32 data_sz; @@ -1308,7 +1243,7 @@ const void *btf__get_raw_data(const struct btf *btf_ro, __u32 *size) data = btf_get_raw_data(btf, &data_sz, btf->swapped_endian); if (!data) - return errno = -ENOMEM, NULL; + return errno = ENOMEM, NULL; btf->raw_size = data_sz; if (btf->swapped_endian) @@ -1319,6 +1254,9 @@ const void *btf__get_raw_data(const struct btf *btf_ro, __u32 *size) return data; } +__attribute__((alias("btf__raw_data"))) +const void *btf__get_raw_data(const struct btf *btf, __u32 *size); + const char *btf__str_by_offset(const struct btf *btf, __u32 offset) { if (offset < btf->start_str_off) @@ -1691,6 +1629,111 @@ int btf__add_type(struct btf *btf, const struct btf *src_btf, const struct btf_t return btf_commit_type(btf, sz); } +static int btf_rewrite_type_ids(__u32 *type_id, void *ctx) +{ + struct btf *btf = ctx; + + if (!*type_id) /* nothing to do for VOID references */ + return 0; + + /* we haven't updated btf's type count yet, so + * btf->start_id + btf->nr_types - 1 is the type ID offset we should + * add to all newly added BTF types + */ + *type_id += btf->start_id + btf->nr_types - 1; + return 0; +} + +int btf__add_btf(struct btf *btf, const struct btf *src_btf) +{ + struct btf_pipe p = { .src = src_btf, .dst = btf }; + int data_sz, sz, cnt, i, err, old_strs_len; + __u32 *off; + void *t; + + /* appending split BTF isn't supported yet */ + if (src_btf->base_btf) + return libbpf_err(-ENOTSUP); + + /* deconstruct BTF, if necessary, and invalidate raw_data */ + if (btf_ensure_modifiable(btf)) + return libbpf_err(-ENOMEM); + + /* remember original strings section size if we have to roll back + * partial strings section changes + */ + old_strs_len = btf->hdr->str_len; + + data_sz = src_btf->hdr->type_len; + cnt = btf__type_cnt(src_btf) - 1; + + /* pre-allocate enough memory for new types */ + t = btf_add_type_mem(btf, data_sz); + if (!t) + return libbpf_err(-ENOMEM); + + /* pre-allocate enough memory for type offset index for new types */ + off = btf_add_type_offs_mem(btf, cnt); + if (!off) + return libbpf_err(-ENOMEM); + + /* bulk copy types data for all types from src_btf */ + memcpy(t, src_btf->types_data, data_sz); + + for (i = 0; i < cnt; i++) { + sz = btf_type_size(t); + if (sz < 0) { + /* unlikely, has to be corrupted src_btf */ + err = sz; + goto err_out; + } + + /* fill out type ID to type offset mapping for lookups by type ID */ + *off = t - btf->types_data; + + /* add, dedup, and remap strings referenced by this BTF type */ + err = btf_type_visit_str_offs(t, btf_rewrite_str, &p); + if (err) + goto err_out; + + /* remap all type IDs referenced from this BTF type */ + err = btf_type_visit_type_ids(t, btf_rewrite_type_ids, btf); + if (err) + goto err_out; + + /* go to next type data and type offset index entry */ + t += sz; + off++; + } + + /* Up until now any of the copied type data was effectively invisible, + * so if we exited early before this point due to error, BTF would be + * effectively unmodified. There would be extra internal memory + * pre-allocated, but it would not be available for querying. But now + * that we've copied and rewritten all the data successfully, we can + * update type count and various internal offsets and sizes to + * "commit" the changes and made them visible to the outside world. + */ + btf->hdr->type_len += data_sz; + btf->hdr->str_off += data_sz; + btf->nr_types += cnt; + + /* return type ID of the first added BTF type */ + return btf->start_id + btf->nr_types - cnt; +err_out: + /* zero out preallocated memory as if it was just allocated with + * libbpf_add_mem() + */ + memset(btf->types_data + btf->hdr->type_len, 0, data_sz); + memset(btf->strs_data + old_strs_len, 0, btf->hdr->str_len - old_strs_len); + + /* and now restore original strings section size; types data size + * wasn't modified, so doesn't need restoring, see big comment above */ + btf->hdr->str_len = old_strs_len; + + return libbpf_err(err); +} + /* * Append new BTF_KIND_INT type with: * - *name* - non-empty, non-NULL type name; @@ -1939,7 +1982,7 @@ int btf__add_union(struct btf *btf, const char *name, __u32 byte_sz) static struct btf_type *btf_last_type(struct btf *btf) { - return btf_type_by_id(btf, btf__get_nr_types(btf)); + return btf_type_by_id(btf, btf__type_cnt(btf) - 1); } /* @@ -2447,7 +2490,7 @@ int btf__add_datasec_var_info(struct btf *btf, int var_type_id, __u32 offset, __ } /* - * Append new BTF_KIND_TAG type with: + * Append new BTF_KIND_DECL_TAG type with: * - *value* - non-empty/non-NULL string; * - *ref_type_id* - referenced type ID, it might not exist yet; * - *component_idx* - -1 for tagging reference type, otherwise struct/union @@ -2456,7 +2499,7 @@ int btf__add_datasec_var_info(struct btf *btf, int var_type_id, __u32 offset, __ * - >0, type ID of newly added BTF type; * - <0, on error. */ -int btf__add_tag(struct btf *btf, const char *value, int ref_type_id, +int btf__add_decl_tag(struct btf *btf, const char *value, int ref_type_id, int component_idx) { struct btf_type *t; @@ -2471,7 +2514,7 @@ int btf__add_tag(struct btf *btf, const char *value, int ref_type_id, if (btf_ensure_modifiable(btf)) return libbpf_err(-ENOMEM); - sz = sizeof(struct btf_type) + sizeof(struct btf_tag); + sz = sizeof(struct btf_type) + sizeof(struct btf_decl_tag); t = btf_add_type_mem(btf, sz); if (!t) return libbpf_err(-ENOMEM); @@ -2481,9 +2524,9 @@ int btf__add_tag(struct btf *btf, const char *value, int ref_type_id, return value_off; t->name_off = value_off; - t->info = btf_type_info(BTF_KIND_TAG, 0, false); + t->info = btf_type_info(BTF_KIND_DECL_TAG, 0, false); t->type = ref_type_id; - btf_tag(t)->component_idx = component_idx; + btf_decl_tag(t)->component_idx = component_idx; return btf_commit_type(btf, sz); } @@ -2962,8 +3005,10 @@ int btf__dedup(struct btf *btf, struct btf_ext *btf_ext, return libbpf_err(-EINVAL); } - if (btf_ensure_modifiable(btf)) - return libbpf_err(-ENOMEM); + if (btf_ensure_modifiable(btf)) { + err = -ENOMEM; + goto done; + } err = btf_dedup_prep(d); if (err) { @@ -3143,7 +3188,7 @@ static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext, goto done; } - type_cnt = btf__get_nr_types(btf) + 1; + type_cnt = btf__type_cnt(btf); d->map = malloc(sizeof(__u32) * type_cnt); if (!d->map) { err = -ENOMEM; @@ -3305,7 +3350,7 @@ static bool btf_equal_common(struct btf_type *t1, struct btf_type *t2) } /* Calculate type signature hash of INT or TAG. */ -static long btf_hash_int_tag(struct btf_type *t) +static long btf_hash_int_decl_tag(struct btf_type *t) { __u32 info = *(__u32 *)(t + 1); long h; @@ -3583,8 +3628,8 @@ static int btf_dedup_prep(struct btf_dedup *d) h = btf_hash_common(t); break; case BTF_KIND_INT: - case BTF_KIND_TAG: - h = btf_hash_int_tag(t); + case BTF_KIND_DECL_TAG: + h = btf_hash_int_decl_tag(t); break; case BTF_KIND_ENUM: h = btf_hash_enum(t); @@ -3639,11 +3684,11 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id) case BTF_KIND_FUNC_PROTO: case BTF_KIND_VAR: case BTF_KIND_DATASEC: - case BTF_KIND_TAG: + case BTF_KIND_DECL_TAG: return 0; case BTF_KIND_INT: - h = btf_hash_int_tag(t); + h = btf_hash_int_decl_tag(t); for_each_dedup_cand(d, hash_entry, h) { cand_id = (__u32)(long)hash_entry->value; cand = btf_type_by_id(d->btf, cand_id); @@ -4260,13 +4305,13 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id) } break; - case BTF_KIND_TAG: + case BTF_KIND_DECL_TAG: ref_type_id = btf_dedup_ref_type(d, t->type); if (ref_type_id < 0) return ref_type_id; t->type = ref_type_id; - h = btf_hash_int_tag(t); + h = btf_hash_int_decl_tag(t); for_each_dedup_cand(d, hash_entry, h) { cand_id = (__u32)(long)hash_entry->value; cand = btf_type_by_id(d->btf, cand_id); @@ -4549,7 +4594,7 @@ int btf_type_visit_type_ids(struct btf_type *t, type_id_visit_fn visit, void *ct case BTF_KIND_TYPEDEF: case BTF_KIND_FUNC: case BTF_KIND_VAR: - case BTF_KIND_TAG: + case BTF_KIND_DECL_TAG: return visit(&t->type, ctx); case BTF_KIND_ARRAY: { diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index 2cfe31327920..bc005ba3ceec 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -123,6 +123,7 @@ LIBBPF_API struct btf *btf__load_from_kernel_by_id_split(__u32 id, struct btf *b LIBBPF_DEPRECATED_SINCE(0, 6, "use btf__load_from_kernel_by_id instead") LIBBPF_API int btf__get_from_id(__u32 id, struct btf **btf); +LIBBPF_DEPRECATED_SINCE(0, 6, "intended for internal libbpf use only") LIBBPF_API int btf__finalize_data(struct bpf_object *obj, struct btf *btf); LIBBPF_DEPRECATED_SINCE(0, 6, "use btf__load_into_kernel instead") LIBBPF_API int btf__load(struct btf *btf); @@ -131,7 +132,9 @@ LIBBPF_API __s32 btf__find_by_name(const struct btf *btf, const char *type_name); LIBBPF_API __s32 btf__find_by_name_kind(const struct btf *btf, const char *type_name, __u32 kind); +LIBBPF_DEPRECATED_SINCE(0, 7, "use btf__type_cnt() instead; note that btf__get_nr_types() == btf__type_cnt() - 1") LIBBPF_API __u32 btf__get_nr_types(const struct btf *btf); +LIBBPF_API __u32 btf__type_cnt(const struct btf *btf); LIBBPF_API const struct btf *btf__base_btf(const struct btf *btf); LIBBPF_API const struct btf_type *btf__type_by_id(const struct btf *btf, __u32 id); @@ -144,7 +147,9 @@ LIBBPF_API int btf__resolve_type(const struct btf *btf, __u32 type_id); LIBBPF_API int btf__align_of(const struct btf *btf, __u32 id); LIBBPF_API int btf__fd(const struct btf *btf); LIBBPF_API void btf__set_fd(struct btf *btf, int fd); +LIBBPF_DEPRECATED_SINCE(0, 7, "use btf__raw_data() instead") LIBBPF_API const void *btf__get_raw_data(const struct btf *btf, __u32 *size); +LIBBPF_API const void *btf__raw_data(const struct btf *btf, __u32 *size); LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset); LIBBPF_API const char *btf__str_by_offset(const struct btf *btf, __u32 offset); LIBBPF_API int btf__get_map_kv_tids(const struct btf *btf, const char *map_name, @@ -173,6 +178,28 @@ LIBBPF_API int btf__find_str(struct btf *btf, const char *s); LIBBPF_API int btf__add_str(struct btf *btf, const char *s); LIBBPF_API int btf__add_type(struct btf *btf, const struct btf *src_btf, const struct btf_type *src_type); +/** + * @brief **btf__add_btf()** appends all the BTF types from *src_btf* into *btf* + * @param btf BTF object which all the BTF types and strings are added to + * @param src_btf BTF object which all BTF types and referenced strings are copied from + * @return BTF type ID of the first appended BTF type, or negative error code + * + * **btf__add_btf()** can be used to simply and efficiently append the entire + * contents of one BTF object to another one. All the BTF type data is copied + * over, all referenced type IDs are adjusted by adding a necessary ID offset. + * Only strings referenced from BTF types are copied over and deduplicated, so + * if there were some unused strings in *src_btf*, those won't be copied over, + * which is consistent with the general string deduplication semantics of BTF + * writing APIs. + * + * If any error is encountered during this process, the contents of *btf* is + * left intact, which means that **btf__add_btf()** follows the transactional + * semantics and the operation as a whole is all-or-nothing. + * + * *src_btf* has to be non-split BTF, as of now copying types from split BTF + * is not supported and will result in -ENOTSUP error code returned. + */ +LIBBPF_API int btf__add_btf(struct btf *btf, const struct btf *src_btf); LIBBPF_API int btf__add_int(struct btf *btf, const char *name, size_t byte_sz, int encoding); LIBBPF_API int btf__add_float(struct btf *btf, const char *name, size_t byte_sz); @@ -214,7 +241,7 @@ LIBBPF_API int btf__add_datasec_var_info(struct btf *btf, int var_type_id, __u32 offset, __u32 byte_sz); /* tag construction API */ -LIBBPF_API int btf__add_tag(struct btf *btf, const char *value, int ref_type_id, +LIBBPF_API int btf__add_decl_tag(struct btf *btf, const char *value, int ref_type_id, int component_idx); struct btf_dedup_opts { @@ -404,9 +431,9 @@ static inline bool btf_is_float(const struct btf_type *t) return btf_kind(t) == BTF_KIND_FLOAT; } -static inline bool btf_is_tag(const struct btf_type *t) +static inline bool btf_is_decl_tag(const struct btf_type *t) { - return btf_kind(t) == BTF_KIND_TAG; + return btf_kind(t) == BTF_KIND_DECL_TAG; } static inline __u8 btf_int_encoding(const struct btf_type *t) @@ -477,10 +504,10 @@ btf_var_secinfos(const struct btf_type *t) return (struct btf_var_secinfo *)(t + 1); } -struct btf_tag; -static inline struct btf_tag *btf_tag(const struct btf_type *t) +struct btf_decl_tag; +static inline struct btf_decl_tag *btf_decl_tag(const struct btf_type *t) { - return (struct btf_tag *)(t + 1); + return (struct btf_decl_tag *)(t + 1); } #ifdef __cplusplus diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index ad6df97295ae..17db62b5002e 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -188,7 +188,7 @@ err: static int btf_dump_resize(struct btf_dump *d) { - int err, last_id = btf__get_nr_types(d->btf); + int err, last_id = btf__type_cnt(d->btf) - 1; if (last_id <= d->last_id) return 0; @@ -262,7 +262,7 @@ int btf_dump__dump_type(struct btf_dump *d, __u32 id) { int err, i; - if (id > btf__get_nr_types(d->btf)) + if (id >= btf__type_cnt(d->btf)) return libbpf_err(-EINVAL); err = btf_dump_resize(d); @@ -294,11 +294,11 @@ int btf_dump__dump_type(struct btf_dump *d, __u32 id) */ static int btf_dump_mark_referenced(struct btf_dump *d) { - int i, j, n = btf__get_nr_types(d->btf); + int i, j, n = btf__type_cnt(d->btf); const struct btf_type *t; __u16 vlen; - for (i = d->last_id + 1; i <= n; i++) { + for (i = d->last_id + 1; i < n; i++) { t = btf__type_by_id(d->btf, i); vlen = btf_vlen(t); @@ -316,7 +316,7 @@ static int btf_dump_mark_referenced(struct btf_dump *d) case BTF_KIND_TYPEDEF: case BTF_KIND_FUNC: case BTF_KIND_VAR: - case BTF_KIND_TAG: + case BTF_KIND_DECL_TAG: d->type_states[t->type].referenced = 1; break; @@ -584,7 +584,7 @@ static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr) case BTF_KIND_FUNC: case BTF_KIND_VAR: case BTF_KIND_DATASEC: - case BTF_KIND_TAG: + case BTF_KIND_DECL_TAG: d->type_states[id].order_state = ORDERED; return 0; @@ -1562,29 +1562,28 @@ static int btf_dump_get_bitfield_value(struct btf_dump *d, __u64 *value) { __u16 left_shift_bits, right_shift_bits; - __u8 nr_copy_bits, nr_copy_bytes; const __u8 *bytes = data; - int sz = t->size; + __u8 nr_copy_bits; __u64 num = 0; int i; /* Maximum supported bitfield size is 64 bits */ - if (sz > 8) { - pr_warn("unexpected bitfield size %d\n", sz); + if (t->size > 8) { + pr_warn("unexpected bitfield size %d\n", t->size); return -EINVAL; } /* Bitfield value retrieval is done in two steps; first relevant bytes are * stored in num, then we left/right shift num to eliminate irrelevant bits. */ - nr_copy_bits = bit_sz + bits_offset; - nr_copy_bytes = t->size; -#if __BYTE_ORDER == __LITTLE_ENDIAN - for (i = nr_copy_bytes - 1; i >= 0; i--) +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + for (i = t->size - 1; i >= 0; i--) num = num * 256 + bytes[i]; -#elif __BYTE_ORDER == __BIG_ENDIAN - for (i = 0; i < nr_copy_bytes; i++) + nr_copy_bits = bit_sz + bits_offset; +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + for (i = 0; i < t->size; i++) num = num * 256 + bytes[i]; + nr_copy_bits = t->size * 8 - bits_offset; #else # error "Unrecognized __BYTE_ORDER__" #endif @@ -1658,9 +1657,15 @@ static int btf_dump_base_type_check_zero(struct btf_dump *d, return 0; } -static bool ptr_is_aligned(const void *data, int data_sz) +static bool ptr_is_aligned(const struct btf *btf, __u32 type_id, + const void *data) { - return ((uintptr_t)data) % data_sz == 0; + int alignment = btf__align_of(btf, type_id); + + if (alignment == 0) + return false; + + return ((uintptr_t)data) % alignment == 0; } static int btf_dump_int_data(struct btf_dump *d, @@ -1671,9 +1676,10 @@ static int btf_dump_int_data(struct btf_dump *d, { __u8 encoding = btf_int_encoding(t); bool sign = encoding & BTF_INT_SIGNED; + char buf[16] __attribute__((aligned(16))); int sz = t->size; - if (sz == 0) { + if (sz == 0 || sz > sizeof(buf)) { pr_warn("unexpected size %d for id [%u]\n", sz, type_id); return -EINVAL; } @@ -1681,8 +1687,10 @@ static int btf_dump_int_data(struct btf_dump *d, /* handle packed int data - accesses of integers not aligned on * int boundaries can cause problems on some platforms. */ - if (!ptr_is_aligned(data, sz)) - return btf_dump_bitfield_data(d, t, data, 0, 0); + if (!ptr_is_aligned(d->btf, type_id, data)) { + memcpy(buf, data, sz); + data = buf; + } switch (sz) { case 16: { @@ -1692,10 +1700,10 @@ static int btf_dump_int_data(struct btf_dump *d, /* avoid use of __int128 as some 32-bit platforms do not * support it. */ -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ lsi = ints[0]; msi = ints[1]; -#elif __BYTE_ORDER == __BIG_ENDIAN +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ lsi = ints[1]; msi = ints[0]; #else @@ -1768,7 +1776,7 @@ static int btf_dump_float_data(struct btf_dump *d, int sz = t->size; /* handle unaligned data; copy to local union */ - if (!ptr_is_aligned(data, sz)) { + if (!ptr_is_aligned(d->btf, type_id, data)) { memcpy(&fl, data, sz); flp = &fl; } @@ -1931,7 +1939,7 @@ static int btf_dump_ptr_data(struct btf_dump *d, __u32 id, const void *data) { - if (ptr_is_aligned(data, d->ptr_sz) && d->ptr_sz == sizeof(void *)) { + if (ptr_is_aligned(d->btf, id, data) && d->ptr_sz == sizeof(void *)) { btf_dump_type_values(d, "%p", *(void **)data); } else { union ptr_data pt; @@ -1951,10 +1959,8 @@ static int btf_dump_get_enum_value(struct btf_dump *d, __u32 id, __s64 *value) { - int sz = t->size; - /* handle unaligned enum value */ - if (!ptr_is_aligned(data, sz)) { + if (!ptr_is_aligned(d->btf, id, data)) { __u64 val; int err; @@ -2217,7 +2223,7 @@ static int btf_dump_dump_type_data(struct btf_dump *d, case BTF_KIND_FWD: case BTF_KIND_FUNC: case BTF_KIND_FUNC_PROTO: - case BTF_KIND_TAG: + case BTF_KIND_DECL_TAG: err = btf_dump_unsupported_data(d, t, id); break; case BTF_KIND_INT: diff --git a/tools/lib/bpf/gen_loader.c b/tools/lib/bpf/gen_loader.c index 80087b13877f..502dea53a742 100644 --- a/tools/lib/bpf/gen_loader.c +++ b/tools/lib/bpf/gen_loader.c @@ -13,9 +13,12 @@ #include "hashmap.h" #include "bpf_gen_internal.h" #include "skel_internal.h" +#include <asm/byteorder.h> -#define MAX_USED_MAPS 64 -#define MAX_USED_PROGS 32 +#define MAX_USED_MAPS 64 +#define MAX_USED_PROGS 32 +#define MAX_KFUNC_DESCS 256 +#define MAX_FD_ARRAY_SZ (MAX_USED_PROGS + MAX_KFUNC_DESCS) /* The following structure describes the stack layout of the loader program. * In addition R6 contains the pointer to context. @@ -30,7 +33,6 @@ */ struct loader_stack { __u32 btf_fd; - __u32 map_fd[MAX_USED_MAPS]; __u32 prog_fd[MAX_USED_PROGS]; __u32 inner_map_fd; }; @@ -143,13 +145,49 @@ static int add_data(struct bpf_gen *gen, const void *data, __u32 size) if (realloc_data_buf(gen, size8)) return 0; prev = gen->data_cur; - memcpy(gen->data_cur, data, size); - gen->data_cur += size; - memcpy(gen->data_cur, &zero, size8 - size); - gen->data_cur += size8 - size; + if (data) { + memcpy(gen->data_cur, data, size); + memcpy(gen->data_cur + size, &zero, size8 - size); + } else { + memset(gen->data_cur, 0, size8); + } + gen->data_cur += size8; return prev - gen->data_start; } +/* Get index for map_fd/btf_fd slot in reserved fd_array, or in data relative + * to start of fd_array. Caller can decide if it is usable or not. + */ +static int add_map_fd(struct bpf_gen *gen) +{ + if (!gen->fd_array) + gen->fd_array = add_data(gen, NULL, MAX_FD_ARRAY_SZ * sizeof(int)); + if (gen->nr_maps == MAX_USED_MAPS) { + pr_warn("Total maps exceeds %d\n", MAX_USED_MAPS); + gen->error = -E2BIG; + return 0; + } + return gen->nr_maps++; +} + +static int add_kfunc_btf_fd(struct bpf_gen *gen) +{ + int cur; + + if (!gen->fd_array) + gen->fd_array = add_data(gen, NULL, MAX_FD_ARRAY_SZ * sizeof(int)); + if (gen->nr_fd_array == MAX_KFUNC_DESCS) { + cur = add_data(gen, NULL, sizeof(int)); + return (cur - gen->fd_array) / sizeof(int); + } + return MAX_USED_MAPS + gen->nr_fd_array++; +} + +static int blob_fd_array_off(struct bpf_gen *gen, int index) +{ + return gen->fd_array + index * sizeof(int); +} + static int insn_bytes_to_bpf_size(__u32 sz) { switch (sz) { @@ -171,14 +209,22 @@ static void emit_rel_store(struct bpf_gen *gen, int off, int data) emit(gen, BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0)); } -/* *(u64 *)(blob + off) = (u64)(void *)(%sp + stack_off) */ -static void emit_rel_store_sp(struct bpf_gen *gen, int off, int stack_off) +static void move_blob2blob(struct bpf_gen *gen, int off, int size, int blob_off) { - emit(gen, BPF_MOV64_REG(BPF_REG_0, BPF_REG_10)); - emit(gen, BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, stack_off)); + emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_2, BPF_PSEUDO_MAP_IDX_VALUE, + 0, 0, 0, blob_off)); + emit(gen, BPF_LDX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_0, BPF_REG_2, 0)); emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE, 0, 0, 0, off)); - emit(gen, BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0)); + emit(gen, BPF_STX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_1, BPF_REG_0, 0)); +} + +static void move_blob2ctx(struct bpf_gen *gen, int ctx_off, int size, int blob_off) +{ + emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE, + 0, 0, 0, blob_off)); + emit(gen, BPF_LDX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_0, BPF_REG_1, 0)); + emit(gen, BPF_STX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_6, BPF_REG_0, ctx_off)); } static void move_ctx2blob(struct bpf_gen *gen, int off, int size, int ctx_off, @@ -326,11 +372,11 @@ int bpf_gen__finish(struct bpf_gen *gen) offsetof(struct bpf_prog_desc, prog_fd), 4, stack_off(prog_fd[i])); for (i = 0; i < gen->nr_maps; i++) - move_stack2ctx(gen, - sizeof(struct bpf_loader_ctx) + - sizeof(struct bpf_map_desc) * i + - offsetof(struct bpf_map_desc, map_fd), 4, - stack_off(map_fd[i])); + move_blob2ctx(gen, + sizeof(struct bpf_loader_ctx) + + sizeof(struct bpf_map_desc) * i + + offsetof(struct bpf_map_desc, map_fd), 4, + blob_fd_array_off(gen, i)); emit(gen, BPF_MOV64_IMM(BPF_REG_0, 0)); emit(gen, BPF_EXIT_INSN()); pr_debug("gen: finish %d\n", gen->error); @@ -386,11 +432,11 @@ void bpf_gen__load_btf(struct bpf_gen *gen, const void *btf_raw_data, } void bpf_gen__map_create(struct bpf_gen *gen, - struct bpf_create_map_attr *map_attr, int map_idx) + struct bpf_create_map_params *map_attr, int map_idx) { int attr_size = offsetofend(union bpf_attr, btf_vmlinux_value_type_id); bool close_inner_map_fd = false; - int map_create_attr; + int map_create_attr, idx; union bpf_attr attr; memset(&attr, 0, attr_size); @@ -398,6 +444,7 @@ void bpf_gen__map_create(struct bpf_gen *gen, attr.key_size = map_attr->key_size; attr.value_size = map_attr->value_size; attr.map_flags = map_attr->map_flags; + attr.map_extra = map_attr->map_extra; memcpy(attr.map_name, map_attr->name, min((unsigned)strlen(map_attr->name), BPF_OBJ_NAME_LEN - 1)); attr.numa_node = map_attr->numa_node; @@ -467,9 +514,11 @@ void bpf_gen__map_create(struct bpf_gen *gen, gen->error = -EDOM; /* internal bug */ return; } else { - emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_7, - stack_off(map_fd[map_idx]))); - gen->nr_maps++; + /* add_map_fd does gen->nr_maps++ */ + idx = add_map_fd(gen); + emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE, + 0, 0, 0, blob_fd_array_off(gen, idx))); + emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_7, 0)); } if (close_inner_map_fd) emit_sys_close_stack(gen, stack_off(inner_map_fd)); @@ -511,8 +560,8 @@ static void emit_find_attach_target(struct bpf_gen *gen) */ } -void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, int kind, - int insn_idx) +void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, bool is_weak, + bool is_typeless, int kind, int insn_idx) { struct ksym_relo_desc *relo; @@ -524,38 +573,292 @@ void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, int kind, gen->relos = relo; relo += gen->relo_cnt; relo->name = name; + relo->is_weak = is_weak; + relo->is_typeless = is_typeless; relo->kind = kind; relo->insn_idx = insn_idx; gen->relo_cnt++; } -static void emit_relo(struct bpf_gen *gen, struct ksym_relo_desc *relo, int insns) +/* returns existing ksym_desc with ref incremented, or inserts a new one */ +static struct ksym_desc *get_ksym_desc(struct bpf_gen *gen, struct ksym_relo_desc *relo) { - int name, insn, len = strlen(relo->name) + 1; + struct ksym_desc *kdesc; - pr_debug("gen: emit_relo: %s at %d\n", relo->name, relo->insn_idx); - name = add_data(gen, relo->name, len); + for (int i = 0; i < gen->nr_ksyms; i++) { + if (!strcmp(gen->ksyms[i].name, relo->name)) { + gen->ksyms[i].ref++; + return &gen->ksyms[i]; + } + } + kdesc = libbpf_reallocarray(gen->ksyms, gen->nr_ksyms + 1, sizeof(*kdesc)); + if (!kdesc) { + gen->error = -ENOMEM; + return NULL; + } + gen->ksyms = kdesc; + kdesc = &gen->ksyms[gen->nr_ksyms++]; + kdesc->name = relo->name; + kdesc->kind = relo->kind; + kdesc->ref = 1; + kdesc->off = 0; + kdesc->insn = 0; + return kdesc; +} + +/* Overwrites BPF_REG_{0, 1, 2, 3, 4, 7} + * Returns result in BPF_REG_7 + */ +static void emit_bpf_find_by_name_kind(struct bpf_gen *gen, struct ksym_relo_desc *relo) +{ + int name_off, len = strlen(relo->name) + 1; + name_off = add_data(gen, relo->name, len); emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE, - 0, 0, 0, name)); + 0, 0, 0, name_off)); emit(gen, BPF_MOV64_IMM(BPF_REG_2, len)); emit(gen, BPF_MOV64_IMM(BPF_REG_3, relo->kind)); emit(gen, BPF_MOV64_IMM(BPF_REG_4, 0)); emit(gen, BPF_EMIT_CALL(BPF_FUNC_btf_find_by_name_kind)); emit(gen, BPF_MOV64_REG(BPF_REG_7, BPF_REG_0)); debug_ret(gen, "find_by_name_kind(%s,%d)", relo->name, relo->kind); - emit_check_err(gen); +} + +/* Overwrites BPF_REG_{0, 1, 2, 3, 4, 7} + * Returns result in BPF_REG_7 + * Returns u64 symbol addr in BPF_REG_9 + */ +static void emit_bpf_kallsyms_lookup_name(struct bpf_gen *gen, struct ksym_relo_desc *relo) +{ + int name_off, len = strlen(relo->name) + 1, res_off; + + name_off = add_data(gen, relo->name, len); + res_off = add_data(gen, NULL, 8); /* res is u64 */ + emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE, + 0, 0, 0, name_off)); + emit(gen, BPF_MOV64_IMM(BPF_REG_2, len)); + emit(gen, BPF_MOV64_IMM(BPF_REG_3, 0)); + emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_4, BPF_PSEUDO_MAP_IDX_VALUE, + 0, 0, 0, res_off)); + emit(gen, BPF_MOV64_REG(BPF_REG_7, BPF_REG_4)); + emit(gen, BPF_EMIT_CALL(BPF_FUNC_kallsyms_lookup_name)); + emit(gen, BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0)); + emit(gen, BPF_MOV64_REG(BPF_REG_7, BPF_REG_0)); + debug_ret(gen, "kallsyms_lookup_name(%s,%d)", relo->name, relo->kind); +} + +/* Expects: + * BPF_REG_8 - pointer to instruction + * + * We need to reuse BTF fd for same symbol otherwise each relocation takes a new + * index, while kernel limits total kfunc BTFs to 256. For duplicate symbols, + * this would mean a new BTF fd index for each entry. By pairing symbol name + * with index, we get the insn->imm, insn->off pairing that kernel uses for + * kfunc_tab, which becomes the effective limit even though all of them may + * share same index in fd_array (such that kfunc_btf_tab has 1 element). + */ +static void emit_relo_kfunc_btf(struct bpf_gen *gen, struct ksym_relo_desc *relo, int insn) +{ + struct ksym_desc *kdesc; + int btf_fd_idx; + + kdesc = get_ksym_desc(gen, relo); + if (!kdesc) + return; + /* try to copy from existing bpf_insn */ + if (kdesc->ref > 1) { + move_blob2blob(gen, insn + offsetof(struct bpf_insn, imm), 4, + kdesc->insn + offsetof(struct bpf_insn, imm)); + move_blob2blob(gen, insn + offsetof(struct bpf_insn, off), 2, + kdesc->insn + offsetof(struct bpf_insn, off)); + goto log; + } + /* remember insn offset, so we can copy BTF ID and FD later */ + kdesc->insn = insn; + emit_bpf_find_by_name_kind(gen, relo); + if (!relo->is_weak) + emit_check_err(gen); + /* get index in fd_array to store BTF FD at */ + btf_fd_idx = add_kfunc_btf_fd(gen); + if (btf_fd_idx > INT16_MAX) { + pr_warn("BTF fd off %d for kfunc %s exceeds INT16_MAX, cannot process relocation\n", + btf_fd_idx, relo->name); + gen->error = -E2BIG; + return; + } + kdesc->off = btf_fd_idx; + /* set a default value for imm */ + emit(gen, BPF_ST_MEM(BPF_W, BPF_REG_8, offsetof(struct bpf_insn, imm), 0)); + /* skip success case store if ret < 0 */ + emit(gen, BPF_JMP_IMM(BPF_JSLT, BPF_REG_7, 0, 1)); /* store btf_id into insn[insn_idx].imm */ - insn = insns + sizeof(struct bpf_insn) * relo->insn_idx + - offsetof(struct bpf_insn, imm); + emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_8, BPF_REG_7, offsetof(struct bpf_insn, imm))); + /* load fd_array slot pointer */ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_0, BPF_PSEUDO_MAP_IDX_VALUE, - 0, 0, 0, insn)); - emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_7, 0)); - if (relo->kind == BTF_KIND_VAR) { - /* store btf_obj_fd into insn[insn_idx + 1].imm */ - emit(gen, BPF_ALU64_IMM(BPF_RSH, BPF_REG_7, 32)); - emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_7, - sizeof(struct bpf_insn))); + 0, 0, 0, blob_fd_array_off(gen, btf_fd_idx))); + /* skip store of BTF fd if ret < 0 */ + emit(gen, BPF_JMP_IMM(BPF_JSLT, BPF_REG_7, 0, 3)); + /* store BTF fd in slot */ + emit(gen, BPF_MOV64_REG(BPF_REG_9, BPF_REG_7)); + emit(gen, BPF_ALU64_IMM(BPF_RSH, BPF_REG_9, 32)); + emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_9, 0)); + /* set a default value for off */ + emit(gen, BPF_ST_MEM(BPF_H, BPF_REG_8, offsetof(struct bpf_insn, off), 0)); + /* skip insn->off store if ret < 0 */ + emit(gen, BPF_JMP_IMM(BPF_JSLT, BPF_REG_7, 0, 2)); + /* skip if vmlinux BTF */ + emit(gen, BPF_JMP_IMM(BPF_JEQ, BPF_REG_9, 0, 1)); + /* store index into insn[insn_idx].off */ + emit(gen, BPF_ST_MEM(BPF_H, BPF_REG_8, offsetof(struct bpf_insn, off), btf_fd_idx)); +log: + if (!gen->log_level) + return; + emit(gen, BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_8, + offsetof(struct bpf_insn, imm))); + emit(gen, BPF_LDX_MEM(BPF_H, BPF_REG_9, BPF_REG_8, + offsetof(struct bpf_insn, off))); + debug_regs(gen, BPF_REG_7, BPF_REG_9, " func (%s:count=%d): imm: %%d, off: %%d", + relo->name, kdesc->ref); + emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_0, BPF_PSEUDO_MAP_IDX_VALUE, + 0, 0, 0, blob_fd_array_off(gen, kdesc->off))); + emit(gen, BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_0, 0)); + debug_regs(gen, BPF_REG_9, -1, " func (%s:count=%d): btf_fd", + relo->name, kdesc->ref); +} + +static void emit_ksym_relo_log(struct bpf_gen *gen, struct ksym_relo_desc *relo, + int ref) +{ + if (!gen->log_level) + return; + emit(gen, BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_8, + offsetof(struct bpf_insn, imm))); + emit(gen, BPF_LDX_MEM(BPF_H, BPF_REG_9, BPF_REG_8, sizeof(struct bpf_insn) + + offsetof(struct bpf_insn, imm))); + debug_regs(gen, BPF_REG_7, BPF_REG_9, " var t=%d w=%d (%s:count=%d): imm[0]: %%d, imm[1]: %%d", + relo->is_typeless, relo->is_weak, relo->name, ref); + emit(gen, BPF_LDX_MEM(BPF_B, BPF_REG_9, BPF_REG_8, offsetofend(struct bpf_insn, code))); + debug_regs(gen, BPF_REG_9, -1, " var t=%d w=%d (%s:count=%d): insn.reg", + relo->is_typeless, relo->is_weak, relo->name, ref); +} + +/* Expects: + * BPF_REG_8 - pointer to instruction + */ +static void emit_relo_ksym_typeless(struct bpf_gen *gen, + struct ksym_relo_desc *relo, int insn) +{ + struct ksym_desc *kdesc; + + kdesc = get_ksym_desc(gen, relo); + if (!kdesc) + return; + /* try to copy from existing ldimm64 insn */ + if (kdesc->ref > 1) { + move_blob2blob(gen, insn + offsetof(struct bpf_insn, imm), 4, + kdesc->insn + offsetof(struct bpf_insn, imm)); + move_blob2blob(gen, insn + sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm), 4, + kdesc->insn + sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm)); + goto log; + } + /* remember insn offset, so we can copy ksym addr later */ + kdesc->insn = insn; + /* skip typeless ksym_desc in fd closing loop in cleanup_relos */ + kdesc->typeless = true; + emit_bpf_kallsyms_lookup_name(gen, relo); + emit(gen, BPF_JMP_IMM(BPF_JEQ, BPF_REG_7, -ENOENT, 1)); + emit_check_err(gen); + /* store lower half of addr into insn[insn_idx].imm */ + emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_8, BPF_REG_9, offsetof(struct bpf_insn, imm))); + /* store upper half of addr into insn[insn_idx + 1].imm */ + emit(gen, BPF_ALU64_IMM(BPF_RSH, BPF_REG_9, 32)); + emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_8, BPF_REG_9, + sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm))); +log: + emit_ksym_relo_log(gen, relo, kdesc->ref); +} + +static __u32 src_reg_mask(void) +{ +#if defined(__LITTLE_ENDIAN_BITFIELD) + return 0x0f; /* src_reg,dst_reg,... */ +#elif defined(__BIG_ENDIAN_BITFIELD) + return 0xf0; /* dst_reg,src_reg,... */ +#else +#error "Unsupported bit endianness, cannot proceed" +#endif +} + +/* Expects: + * BPF_REG_8 - pointer to instruction + */ +static void emit_relo_ksym_btf(struct bpf_gen *gen, struct ksym_relo_desc *relo, int insn) +{ + struct ksym_desc *kdesc; + __u32 reg_mask; + + kdesc = get_ksym_desc(gen, relo); + if (!kdesc) + return; + /* try to copy from existing ldimm64 insn */ + if (kdesc->ref > 1) { + move_blob2blob(gen, insn + offsetof(struct bpf_insn, imm), 4, + kdesc->insn + offsetof(struct bpf_insn, imm)); + move_blob2blob(gen, insn + sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm), 4, + kdesc->insn + sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm)); + emit(gen, BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_8, offsetof(struct bpf_insn, imm))); + /* jump over src_reg adjustment if imm is not 0 */ + emit(gen, BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 0, 3)); + goto clear_src_reg; + } + /* remember insn offset, so we can copy BTF ID and FD later */ + kdesc->insn = insn; + emit_bpf_find_by_name_kind(gen, relo); + if (!relo->is_weak) + emit_check_err(gen); + /* set default values as 0 */ + emit(gen, BPF_ST_MEM(BPF_W, BPF_REG_8, offsetof(struct bpf_insn, imm), 0)); + emit(gen, BPF_ST_MEM(BPF_W, BPF_REG_8, sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm), 0)); + /* skip success case stores if ret < 0 */ + emit(gen, BPF_JMP_IMM(BPF_JSLT, BPF_REG_7, 0, 4)); + /* store btf_id into insn[insn_idx].imm */ + emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_8, BPF_REG_7, offsetof(struct bpf_insn, imm))); + /* store btf_obj_fd into insn[insn_idx + 1].imm */ + emit(gen, BPF_ALU64_IMM(BPF_RSH, BPF_REG_7, 32)); + emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_8, BPF_REG_7, + sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm))); + emit(gen, BPF_JMP_IMM(BPF_JSGE, BPF_REG_7, 0, 3)); +clear_src_reg: + /* clear bpf_object__relocate_data's src_reg assignment, otherwise we get a verifier failure */ + reg_mask = src_reg_mask(); + emit(gen, BPF_LDX_MEM(BPF_B, BPF_REG_9, BPF_REG_8, offsetofend(struct bpf_insn, code))); + emit(gen, BPF_ALU32_IMM(BPF_AND, BPF_REG_9, reg_mask)); + emit(gen, BPF_STX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, offsetofend(struct bpf_insn, code))); + + emit_ksym_relo_log(gen, relo, kdesc->ref); +} + +static void emit_relo(struct bpf_gen *gen, struct ksym_relo_desc *relo, int insns) +{ + int insn; + + pr_debug("gen: emit_relo (%d): %s at %d\n", relo->kind, relo->name, relo->insn_idx); + insn = insns + sizeof(struct bpf_insn) * relo->insn_idx; + emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_8, BPF_PSEUDO_MAP_IDX_VALUE, 0, 0, 0, insn)); + switch (relo->kind) { + case BTF_KIND_VAR: + if (relo->is_typeless) + emit_relo_ksym_typeless(gen, relo, insn); + else + emit_relo_ksym_btf(gen, relo, insn); + break; + case BTF_KIND_FUNC: + emit_relo_kfunc_btf(gen, relo, insn); + break; + default: + pr_warn("Unknown relocation kind '%d'\n", relo->kind); + gen->error = -EDOM; + return; } } @@ -571,14 +874,23 @@ static void cleanup_relos(struct bpf_gen *gen, int insns) { int i, insn; - for (i = 0; i < gen->relo_cnt; i++) { - if (gen->relos[i].kind != BTF_KIND_VAR) - continue; - /* close fd recorded in insn[insn_idx + 1].imm */ - insn = insns + - sizeof(struct bpf_insn) * (gen->relos[i].insn_idx + 1) + - offsetof(struct bpf_insn, imm); - emit_sys_close_blob(gen, insn); + for (i = 0; i < gen->nr_ksyms; i++) { + /* only close fds for typed ksyms and kfuncs */ + if (gen->ksyms[i].kind == BTF_KIND_VAR && !gen->ksyms[i].typeless) { + /* close fd recorded in insn[insn_idx + 1].imm */ + insn = gen->ksyms[i].insn; + insn += sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm); + emit_sys_close_blob(gen, insn); + } else if (gen->ksyms[i].kind == BTF_KIND_FUNC) { + emit_sys_close_blob(gen, blob_fd_array_off(gen, gen->ksyms[i].off)); + if (gen->ksyms[i].off < MAX_FD_ARRAY_SZ) + gen->nr_fd_array--; + } + } + if (gen->nr_ksyms) { + free(gen->ksyms); + gen->nr_ksyms = 0; + gen->ksyms = NULL; } if (gen->relo_cnt) { free(gen->relos); @@ -637,9 +949,8 @@ void bpf_gen__prog_load(struct bpf_gen *gen, /* populate union bpf_attr with a pointer to line_info */ emit_rel_store(gen, attr_field(prog_load_attr, line_info), line_info); - /* populate union bpf_attr fd_array with a pointer to stack where map_fds are saved */ - emit_rel_store_sp(gen, attr_field(prog_load_attr, fd_array), - stack_off(map_fd[0])); + /* populate union bpf_attr fd_array with a pointer to data where map_fds are saved */ + emit_rel_store(gen, attr_field(prog_load_attr, fd_array), gen->fd_array); /* populate union bpf_attr with user provided log details */ move_ctx2blob(gen, attr_field(prog_load_attr, log_level), 4, @@ -706,8 +1017,8 @@ void bpf_gen__map_update_elem(struct bpf_gen *gen, int map_idx, void *pvalue, emit(gen, BPF_EMIT_CALL(BPF_FUNC_copy_from_user)); map_update_attr = add_data(gen, &attr, attr_size); - move_stack2blob(gen, attr_field(map_update_attr, map_fd), 4, - stack_off(map_fd[map_idx])); + move_blob2blob(gen, attr_field(map_update_attr, map_fd), 4, + blob_fd_array_off(gen, map_idx)); emit_rel_store(gen, attr_field(map_update_attr, key), key); emit_rel_store(gen, attr_field(map_update_attr, value), value); /* emit MAP_UPDATE_ELEM command */ @@ -725,8 +1036,8 @@ void bpf_gen__map_freeze(struct bpf_gen *gen, int map_idx) memset(&attr, 0, attr_size); pr_debug("gen: map_freeze: idx %d\n", map_idx); map_freeze_attr = add_data(gen, &attr, attr_size); - move_stack2blob(gen, attr_field(map_freeze_attr, map_fd), 4, - stack_off(map_fd[map_idx])); + move_blob2blob(gen, attr_field(map_freeze_attr, map_fd), 4, + blob_fd_array_off(gen, map_idx)); /* emit MAP_FREEZE command */ emit_sys_bpf(gen, BPF_MAP_FREEZE, map_freeze_attr, attr_size); debug_ret(gen, "map_freeze"); diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 8892f2f1bbcc..a1bea1953df6 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -195,8 +195,8 @@ enum kern_feature_id { FEAT_BTF_FLOAT, /* BPF perf link support */ FEAT_PERF_LINK, - /* BTF_KIND_TAG support */ - FEAT_BTF_TAG, + /* BTF_KIND_DECL_TAG support */ + FEAT_BTF_DECL_TAG, __FEAT_CNT, }; @@ -285,7 +285,7 @@ struct bpf_program { size_t sub_insn_off; char *name; - /* sec_name with / replaced by _; makes recursive pinning + /* name with / replaced by _; makes recursive pinning * in bpf_object__pin_programs easier */ char *pin_name; @@ -370,15 +370,14 @@ enum libbpf_map_type { LIBBPF_MAP_KCONFIG, }; -static const char * const libbpf_type_to_btf_name[] = { - [LIBBPF_MAP_DATA] = DATA_SEC, - [LIBBPF_MAP_BSS] = BSS_SEC, - [LIBBPF_MAP_RODATA] = RODATA_SEC, - [LIBBPF_MAP_KCONFIG] = KCONFIG_SEC, -}; - struct bpf_map { char *name; + /* real_name is defined for special internal maps (.rodata*, + * .data*, .bss, .kconfig) and preserves their original ELF section + * name. This is important to be be able to find corresponding BTF + * DATASEC information. + */ + char *real_name; int fd; int sec_idx; size_t sec_offset; @@ -401,6 +400,7 @@ struct bpf_map { char *pin_path; bool pinned; bool reused; + __u64 map_extra; }; enum extern_type { @@ -443,6 +443,11 @@ struct extern_desc { /* local btf_id of the ksym extern's type. */ __u32 type_id; + /* BTF fd index to be patched in for insn->off, this is + * 0 for vmlinux BTF, index in obj->fd_array for module + * BTF + */ + __s16 btf_fd_idx; } ksym; }; }; @@ -454,6 +459,41 @@ struct module_btf { char *name; __u32 id; int fd; + int fd_array_idx; +}; + +enum sec_type { + SEC_UNUSED = 0, + SEC_RELO, + SEC_BSS, + SEC_DATA, + SEC_RODATA, +}; + +struct elf_sec_desc { + enum sec_type sec_type; + Elf64_Shdr *shdr; + Elf_Data *data; +}; + +struct elf_state { + int fd; + const void *obj_buf; + size_t obj_buf_sz; + Elf *elf; + Elf64_Ehdr *ehdr; + Elf_Data *symbols; + Elf_Data *st_ops_data; + size_t shstrndx; /* section index for section name strings */ + size_t strtabidx; + struct elf_sec_desc *secs; + int sec_cnt; + int maps_shndx; + int btf_maps_shndx; + __u32 btf_maps_sec_btf_id; + int text_shndx; + int symbols_shndx; + int st_ops_shndx; }; struct bpf_object { @@ -471,47 +511,17 @@ struct bpf_object { struct extern_desc *externs; int nr_extern; int kconfig_map_idx; - int rodata_map_idx; bool loaded; bool has_subcalls; + bool has_rodata; struct bpf_gen *gen_loader; + /* Information when doing ELF related work. Only valid if efile.elf is not NULL */ + struct elf_state efile; /* - * Information when doing elf related work. Only valid if fd - * is valid. - */ - struct { - int fd; - const void *obj_buf; - size_t obj_buf_sz; - Elf *elf; - GElf_Ehdr ehdr; - Elf_Data *symbols; - Elf_Data *data; - Elf_Data *rodata; - Elf_Data *bss; - Elf_Data *st_ops_data; - size_t shstrndx; /* section index for section name strings */ - size_t strtabidx; - struct { - GElf_Shdr shdr; - Elf_Data *data; - } *reloc_sects; - int nr_reloc_sects; - int maps_shndx; - int btf_maps_shndx; - __u32 btf_maps_sec_btf_id; - int text_shndx; - int symbols_shndx; - int data_shndx; - int rodata_shndx; - int bss_shndx; - int st_ops_shndx; - } efile; - /* - * All loaded bpf_object is linked in a list, which is + * All loaded bpf_object are linked in a list, which is * hidden to caller. bpf_objects__<func> handlers deal with * all objects. */ @@ -539,17 +549,22 @@ struct bpf_object { void *priv; bpf_object_clear_priv_t clear_priv; + int *fd_array; + size_t fd_array_cap; + size_t fd_array_cnt; + char path[]; }; -#define obj_elf_valid(o) ((o)->efile.elf) static const char *elf_sym_str(const struct bpf_object *obj, size_t off); static const char *elf_sec_str(const struct bpf_object *obj, size_t off); static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx); static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name); -static int elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn, GElf_Shdr *hdr); +static Elf64_Shdr *elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn); static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn); static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn); +static Elf64_Sym *elf_sym_by_idx(const struct bpf_object *obj, size_t idx); +static Elf64_Rel *elf_rel_by_idx(Elf_Data *data, size_t idx); void bpf_program__unload(struct bpf_program *prog) { @@ -604,7 +619,16 @@ static char *__bpf_program__pin_name(struct bpf_program *prog) { char *name, *p; - name = p = strdup(prog->sec_name); + if (libbpf_mode & LIBBPF_STRICT_SEC_NAME) + name = strdup(prog->name); + else + name = strdup(prog->sec_name); + + if (!name) + return NULL; + + p = name; + while ((p = strchr(p, '/'))) *p = '_'; @@ -691,25 +715,25 @@ bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data, size_t sec_sz = sec_data->d_size, sec_off, prog_sz, nr_syms; int nr_progs, err, i; const char *name; - GElf_Sym sym; + Elf64_Sym *sym; progs = obj->programs; nr_progs = obj->nr_programs; - nr_syms = symbols->d_size / sizeof(GElf_Sym); + nr_syms = symbols->d_size / sizeof(Elf64_Sym); sec_off = 0; for (i = 0; i < nr_syms; i++) { - if (!gelf_getsym(symbols, i, &sym)) - continue; - if (sym.st_shndx != sec_idx) + sym = elf_sym_by_idx(obj, i); + + if (sym->st_shndx != sec_idx) continue; - if (GELF_ST_TYPE(sym.st_info) != STT_FUNC) + if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC) continue; - prog_sz = sym.st_size; - sec_off = sym.st_value; + prog_sz = sym->st_size; + sec_off = sym->st_value; - name = elf_sym_str(obj, sym.st_name); + name = elf_sym_str(obj, sym->st_name); if (!name) { pr_warn("sec '%s': failed to get symbol name for offset %zu\n", sec_name, sec_off); @@ -722,7 +746,7 @@ bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data, return -LIBBPF_ERRNO__FORMAT; } - if (sec_idx != obj->efile.text_shndx && GELF_ST_BIND(sym.st_info) == STB_LOCAL) { + if (sec_idx != obj->efile.text_shndx && ELF64_ST_BIND(sym->st_info) == STB_LOCAL) { pr_warn("sec '%s': program '%s' is static and not supported\n", sec_name, name); return -ENOTSUP; } @@ -755,9 +779,9 @@ bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data, * as static to enable more permissive BPF verification mode * with more outside context available to BPF verifier */ - if (GELF_ST_BIND(sym.st_info) != STB_LOCAL - && (GELF_ST_VISIBILITY(sym.st_other) == STV_HIDDEN - || GELF_ST_VISIBILITY(sym.st_other) == STV_INTERNAL)) + if (ELF64_ST_BIND(sym->st_info) != STB_LOCAL + && (ELF64_ST_VISIBILITY(sym->st_other) == STV_HIDDEN + || ELF64_ST_VISIBILITY(sym->st_other) == STV_INTERNAL)) prog->mark_btf_static = true; nr_progs++; @@ -1125,6 +1149,7 @@ static struct bpf_object *bpf_object__new(const char *path, size_t obj_buf_sz, const char *obj_name) { + bool strict = (libbpf_mode & LIBBPF_STRICT_NO_OBJECT_LIST); struct bpf_object *obj; char *end; @@ -1158,24 +1183,21 @@ static struct bpf_object *bpf_object__new(const char *path, obj->efile.obj_buf_sz = obj_buf_sz; obj->efile.maps_shndx = -1; obj->efile.btf_maps_shndx = -1; - obj->efile.data_shndx = -1; - obj->efile.rodata_shndx = -1; - obj->efile.bss_shndx = -1; obj->efile.st_ops_shndx = -1; obj->kconfig_map_idx = -1; - obj->rodata_map_idx = -1; obj->kern_version = get_kernel_version(); obj->loaded = false; INIT_LIST_HEAD(&obj->list); - list_add(&obj->list, &bpf_objects_list); + if (!strict) + list_add(&obj->list, &bpf_objects_list); return obj; } static void bpf_object__elf_finish(struct bpf_object *obj) { - if (!obj_elf_valid(obj)) + if (!obj->efile.elf) return; if (obj->efile.elf) { @@ -1183,13 +1205,10 @@ static void bpf_object__elf_finish(struct bpf_object *obj) obj->efile.elf = NULL; } obj->efile.symbols = NULL; - obj->efile.data = NULL; - obj->efile.rodata = NULL; - obj->efile.bss = NULL; obj->efile.st_ops_data = NULL; - zfree(&obj->efile.reloc_sects); - obj->efile.nr_reloc_sects = 0; + zfree(&obj->efile.secs); + obj->efile.sec_cnt = 0; zclose(obj->efile.fd); obj->efile.obj_buf = NULL; obj->efile.obj_buf_sz = 0; @@ -1197,10 +1216,11 @@ static void bpf_object__elf_finish(struct bpf_object *obj) static int bpf_object__elf_init(struct bpf_object *obj) { + Elf64_Ehdr *ehdr; int err = 0; - GElf_Ehdr *ep; + Elf *elf; - if (obj_elf_valid(obj)) { + if (obj->efile.elf) { pr_warn("elf: init internal error\n"); return -LIBBPF_ERRNO__LIBELF; } @@ -1210,10 +1230,9 @@ static int bpf_object__elf_init(struct bpf_object *obj) * obj_buf should have been validated by * bpf_object__open_buffer(). */ - obj->efile.elf = elf_memory((char *)obj->efile.obj_buf, - obj->efile.obj_buf_sz); + elf = elf_memory((char *)obj->efile.obj_buf, obj->efile.obj_buf_sz); } else { - obj->efile.fd = open(obj->path, O_RDONLY); + obj->efile.fd = open(obj->path, O_RDONLY | O_CLOEXEC); if (obj->efile.fd < 0) { char errmsg[STRERR_BUFSIZE], *cp; @@ -1223,23 +1242,37 @@ static int bpf_object__elf_init(struct bpf_object *obj) return err; } - obj->efile.elf = elf_begin(obj->efile.fd, ELF_C_READ_MMAP, NULL); + elf = elf_begin(obj->efile.fd, ELF_C_READ_MMAP, NULL); } - if (!obj->efile.elf) { + if (!elf) { pr_warn("elf: failed to open %s as ELF file: %s\n", obj->path, elf_errmsg(-1)); err = -LIBBPF_ERRNO__LIBELF; goto errout; } - if (!gelf_getehdr(obj->efile.elf, &obj->efile.ehdr)) { + obj->efile.elf = elf; + + if (elf_kind(elf) != ELF_K_ELF) { + err = -LIBBPF_ERRNO__FORMAT; + pr_warn("elf: '%s' is not a proper ELF object\n", obj->path); + goto errout; + } + + if (gelf_getclass(elf) != ELFCLASS64) { + err = -LIBBPF_ERRNO__FORMAT; + pr_warn("elf: '%s' is not a 64-bit ELF object\n", obj->path); + goto errout; + } + + obj->efile.ehdr = ehdr = elf64_getehdr(elf); + if (!obj->efile.ehdr) { pr_warn("elf: failed to get ELF header from %s: %s\n", obj->path, elf_errmsg(-1)); err = -LIBBPF_ERRNO__FORMAT; goto errout; } - ep = &obj->efile.ehdr; - if (elf_getshdrstrndx(obj->efile.elf, &obj->efile.shstrndx)) { + if (elf_getshdrstrndx(elf, &obj->efile.shstrndx)) { pr_warn("elf: failed to get section names section index for %s: %s\n", obj->path, elf_errmsg(-1)); err = -LIBBPF_ERRNO__FORMAT; @@ -1247,7 +1280,7 @@ static int bpf_object__elf_init(struct bpf_object *obj) } /* Elf is corrupted/truncated, avoid calling elf_strptr. */ - if (!elf_rawdata(elf_getscn(obj->efile.elf, obj->efile.shstrndx), NULL)) { + if (!elf_rawdata(elf_getscn(elf, obj->efile.shstrndx), NULL)) { pr_warn("elf: failed to get section names strings from %s: %s\n", obj->path, elf_errmsg(-1)); err = -LIBBPF_ERRNO__FORMAT; @@ -1255,8 +1288,7 @@ static int bpf_object__elf_init(struct bpf_object *obj) } /* Old LLVM set e_machine to EM_NONE */ - if (ep->e_type != ET_REL || - (ep->e_machine && ep->e_machine != EM_BPF)) { + if (ehdr->e_type != ET_REL || (ehdr->e_machine && ehdr->e_machine != EM_BPF)) { pr_warn("elf: %s is not a valid eBPF object file\n", obj->path); err = -LIBBPF_ERRNO__FORMAT; goto errout; @@ -1270,11 +1302,11 @@ errout: static int bpf_object__check_endianness(struct bpf_object *obj) { -#if __BYTE_ORDER == __LITTLE_ENDIAN - if (obj->efile.ehdr.e_ident[EI_DATA] == ELFDATA2LSB) +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + if (obj->efile.ehdr->e_ident[EI_DATA] == ELFDATA2LSB) return 0; -#elif __BYTE_ORDER == __BIG_ENDIAN - if (obj->efile.ehdr.e_ident[EI_DATA] == ELFDATA2MSB) +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + if (obj->efile.ehdr->e_ident[EI_DATA] == ELFDATA2MSB) return 0; #else # error "Unrecognized __BYTE_ORDER__" @@ -1314,41 +1346,27 @@ static bool bpf_map_type__is_map_in_map(enum bpf_map_type type) return false; } -int bpf_object__section_size(const struct bpf_object *obj, const char *name, - __u32 *size) +static int find_elf_sec_sz(const struct bpf_object *obj, const char *name, __u32 *size) { int ret = -ENOENT; + Elf_Data *data; + Elf_Scn *scn; *size = 0; - if (!name) { + if (!name) return -EINVAL; - } else if (!strcmp(name, DATA_SEC)) { - if (obj->efile.data) - *size = obj->efile.data->d_size; - } else if (!strcmp(name, BSS_SEC)) { - if (obj->efile.bss) - *size = obj->efile.bss->d_size; - } else if (!strcmp(name, RODATA_SEC)) { - if (obj->efile.rodata) - *size = obj->efile.rodata->d_size; - } else if (!strcmp(name, STRUCT_OPS_SEC)) { - if (obj->efile.st_ops_data) - *size = obj->efile.st_ops_data->d_size; - } else { - Elf_Scn *scn = elf_sec_by_name(obj, name); - Elf_Data *data = elf_sec_data(obj, scn); - if (data) { - ret = 0; /* found it */ - *size = data->d_size; - } + scn = elf_sec_by_name(obj, name); + data = elf_sec_data(obj, scn); + if (data) { + ret = 0; /* found it */ + *size = data->d_size; } return *size ? 0 : ret; } -int bpf_object__variable_offset(const struct bpf_object *obj, const char *name, - __u32 *off) +static int find_elf_var_offset(const struct bpf_object *obj, const char *name, __u32 *off) { Elf_Data *symbols = obj->efile.symbols; const char *sname; @@ -1357,23 +1375,20 @@ int bpf_object__variable_offset(const struct bpf_object *obj, const char *name, if (!name || !off) return -EINVAL; - for (si = 0; si < symbols->d_size / sizeof(GElf_Sym); si++) { - GElf_Sym sym; + for (si = 0; si < symbols->d_size / sizeof(Elf64_Sym); si++) { + Elf64_Sym *sym = elf_sym_by_idx(obj, si); - if (!gelf_getsym(symbols, si, &sym)) - continue; - if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL || - GELF_ST_TYPE(sym.st_info) != STT_OBJECT) + if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL || + ELF64_ST_TYPE(sym->st_info) != STT_OBJECT) continue; - sname = elf_sym_str(obj, sym.st_name); + sname = elf_sym_str(obj, sym->st_name); if (!sname) { - pr_warn("failed to get sym name string for var %s\n", - name); + pr_warn("failed to get sym name string for var %s\n", name); return -EIO; } if (strcmp(name, sname) == 0) { - *off = sym.st_value; + *off = sym->st_value; return 0; } } @@ -1425,17 +1440,55 @@ static size_t bpf_map_mmap_sz(const struct bpf_map *map) return map_sz; } -static char *internal_map_name(struct bpf_object *obj, - enum libbpf_map_type type) +static char *internal_map_name(struct bpf_object *obj, const char *real_name) { char map_name[BPF_OBJ_NAME_LEN], *p; - const char *sfx = libbpf_type_to_btf_name[type]; - int sfx_len = max((size_t)7, strlen(sfx)); - int pfx_len = min((size_t)BPF_OBJ_NAME_LEN - sfx_len - 1, - strlen(obj->name)); + int pfx_len, sfx_len = max((size_t)7, strlen(real_name)); + + /* This is one of the more confusing parts of libbpf for various + * reasons, some of which are historical. The original idea for naming + * internal names was to include as much of BPF object name prefix as + * possible, so that it can be distinguished from similar internal + * maps of a different BPF object. + * As an example, let's say we have bpf_object named 'my_object_name' + * and internal map corresponding to '.rodata' ELF section. The final + * map name advertised to user and to the kernel will be + * 'my_objec.rodata', taking first 8 characters of object name and + * entire 7 characters of '.rodata'. + * Somewhat confusingly, if internal map ELF section name is shorter + * than 7 characters, e.g., '.bss', we still reserve 7 characters + * for the suffix, even though we only have 4 actual characters, and + * resulting map will be called 'my_objec.bss', not even using all 15 + * characters allowed by the kernel. Oh well, at least the truncated + * object name is somewhat consistent in this case. But if the map + * name is '.kconfig', we'll still have entirety of '.kconfig' added + * (8 chars) and thus will be left with only first 7 characters of the + * object name ('my_obje'). Happy guessing, user, that the final map + * name will be "my_obje.kconfig". + * Now, with libbpf starting to support arbitrarily named .rodata.* + * and .data.* data sections, it's possible that ELF section name is + * longer than allowed 15 chars, so we now need to be careful to take + * only up to 15 first characters of ELF name, taking no BPF object + * name characters at all. So '.rodata.abracadabra' will result in + * '.rodata.abracad' kernel and user-visible name. + * We need to keep this convoluted logic intact for .data, .bss and + * .rodata maps, but for new custom .data.custom and .rodata.custom + * maps we use their ELF names as is, not prepending bpf_object name + * in front. We still need to truncate them to 15 characters for the + * kernel. Full name can be recovered for such maps by using DATASEC + * BTF type associated with such map's value type, though. + */ + if (sfx_len >= BPF_OBJ_NAME_LEN) + sfx_len = BPF_OBJ_NAME_LEN - 1; + + /* if there are two or more dots in map name, it's a custom dot map */ + if (strchr(real_name + 1, '.') != NULL) + pfx_len = 0; + else + pfx_len = min((size_t)BPF_OBJ_NAME_LEN - sfx_len - 1, strlen(obj->name)); snprintf(map_name, sizeof(map_name), "%.*s%.*s", pfx_len, obj->name, - sfx_len, libbpf_type_to_btf_name[type]); + sfx_len, real_name); /* sanitise map name to characters allowed by kernel */ for (p = map_name; *p && p < map_name + sizeof(map_name); p++) @@ -1447,7 +1500,7 @@ static char *internal_map_name(struct bpf_object *obj, static int bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, - int sec_idx, void *data, size_t data_sz) + const char *real_name, int sec_idx, void *data, size_t data_sz) { struct bpf_map_def *def; struct bpf_map *map; @@ -1460,9 +1513,11 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, map->libbpf_type = type; map->sec_idx = sec_idx; map->sec_offset = 0; - map->name = internal_map_name(obj, type); - if (!map->name) { - pr_warn("failed to alloc map name\n"); + map->real_name = strdup(real_name); + map->name = internal_map_name(obj, real_name); + if (!map->real_name || !map->name) { + zfree(&map->real_name); + zfree(&map->name); return -ENOMEM; } @@ -1485,6 +1540,7 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, map->mmaped = NULL; pr_warn("failed to alloc map '%s' content buffer: %d\n", map->name, err); + zfree(&map->real_name); zfree(&map->name); return err; } @@ -1498,34 +1554,43 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, static int bpf_object__init_global_data_maps(struct bpf_object *obj) { - int err; + struct elf_sec_desc *sec_desc; + const char *sec_name; + int err = 0, sec_idx; /* * Populate obj->maps with libbpf internal maps. */ - if (obj->efile.data_shndx >= 0) { - err = bpf_object__init_internal_map(obj, LIBBPF_MAP_DATA, - obj->efile.data_shndx, - obj->efile.data->d_buf, - obj->efile.data->d_size); - if (err) - return err; - } - if (obj->efile.rodata_shndx >= 0) { - err = bpf_object__init_internal_map(obj, LIBBPF_MAP_RODATA, - obj->efile.rodata_shndx, - obj->efile.rodata->d_buf, - obj->efile.rodata->d_size); - if (err) - return err; - - obj->rodata_map_idx = obj->nr_maps - 1; - } - if (obj->efile.bss_shndx >= 0) { - err = bpf_object__init_internal_map(obj, LIBBPF_MAP_BSS, - obj->efile.bss_shndx, - NULL, - obj->efile.bss->d_size); + for (sec_idx = 1; sec_idx < obj->efile.sec_cnt; sec_idx++) { + sec_desc = &obj->efile.secs[sec_idx]; + + switch (sec_desc->sec_type) { + case SEC_DATA: + sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx)); + err = bpf_object__init_internal_map(obj, LIBBPF_MAP_DATA, + sec_name, sec_idx, + sec_desc->data->d_buf, + sec_desc->data->d_size); + break; + case SEC_RODATA: + obj->has_rodata = true; + sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx)); + err = bpf_object__init_internal_map(obj, LIBBPF_MAP_RODATA, + sec_name, sec_idx, + sec_desc->data->d_buf, + sec_desc->data->d_size); + break; + case SEC_BSS: + sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx)); + err = bpf_object__init_internal_map(obj, LIBBPF_MAP_BSS, + sec_name, sec_idx, + NULL, + sec_desc->data->d_size); + break; + default: + /* skip */ + break; + } if (err) return err; } @@ -1822,7 +1887,7 @@ static int bpf_object__init_kconfig_map(struct bpf_object *obj) map_sz = last_ext->kcfg.data_off + last_ext->kcfg.sz; err = bpf_object__init_internal_map(obj, LIBBPF_MAP_KCONFIG, - obj->efile.symbols_shndx, + ".kconfig", obj->efile.symbols_shndx, NULL, map_sz); if (err) return err; @@ -1860,15 +1925,13 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict) * * TODO: Detect array of map and report error. */ - nr_syms = symbols->d_size / sizeof(GElf_Sym); + nr_syms = symbols->d_size / sizeof(Elf64_Sym); for (i = 0; i < nr_syms; i++) { - GElf_Sym sym; + Elf64_Sym *sym = elf_sym_by_idx(obj, i); - if (!gelf_getsym(symbols, i, &sym)) + if (sym->st_shndx != obj->efile.maps_shndx) continue; - if (sym.st_shndx != obj->efile.maps_shndx) - continue; - if (GELF_ST_TYPE(sym.st_info) == STT_SECTION) + if (ELF64_ST_TYPE(sym->st_info) == STT_SECTION) continue; nr_maps++; } @@ -1885,40 +1948,38 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict) /* Fill obj->maps using data in "maps" section. */ for (i = 0; i < nr_syms; i++) { - GElf_Sym sym; + Elf64_Sym *sym = elf_sym_by_idx(obj, i); const char *map_name; struct bpf_map_def *def; struct bpf_map *map; - if (!gelf_getsym(symbols, i, &sym)) - continue; - if (sym.st_shndx != obj->efile.maps_shndx) + if (sym->st_shndx != obj->efile.maps_shndx) continue; - if (GELF_ST_TYPE(sym.st_info) == STT_SECTION) + if (ELF64_ST_TYPE(sym->st_info) == STT_SECTION) continue; map = bpf_object__add_map(obj); if (IS_ERR(map)) return PTR_ERR(map); - map_name = elf_sym_str(obj, sym.st_name); + map_name = elf_sym_str(obj, sym->st_name); if (!map_name) { pr_warn("failed to get map #%d name sym string for obj %s\n", i, obj->path); return -LIBBPF_ERRNO__FORMAT; } - if (GELF_ST_BIND(sym.st_info) == STB_LOCAL) { + if (ELF64_ST_BIND(sym->st_info) == STB_LOCAL) { pr_warn("map '%s' (legacy): static maps are not supported\n", map_name); return -ENOTSUP; } map->libbpf_type = LIBBPF_MAP_UNSPEC; - map->sec_idx = sym.st_shndx; - map->sec_offset = sym.st_value; + map->sec_idx = sym->st_shndx; + map->sec_offset = sym->st_value; pr_debug("map '%s' (legacy): at sec_idx %d, offset %zu.\n", map_name, map->sec_idx, map->sec_offset); - if (sym.st_value + map_def_sz > data->d_size) { + if (sym->st_value + map_def_sz > data->d_size) { pr_warn("corrupted maps section in %s: last map \"%s\" too small\n", obj->path, map_name); return -EINVAL; @@ -1926,11 +1987,11 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict) map->name = strdup(map_name); if (!map->name) { - pr_warn("failed to alloc map name\n"); + pr_warn("map '%s': failed to alloc map name\n", map_name); return -ENOMEM; } pr_debug("map %d is \"%s\"\n", i, map->name); - def = (struct bpf_map_def *)(data->d_buf + sym.st_value); + def = (struct bpf_map_def *)(data->d_buf + sym->st_value); /* * If the definition of the map in the object file fits in * bpf_map_def, copy it. Any extra fields in our version @@ -2014,7 +2075,7 @@ static const char *__btf_kind_str(__u16 kind) case BTF_KIND_VAR: return "var"; case BTF_KIND_DATASEC: return "datasec"; case BTF_KIND_FLOAT: return "float"; - case BTF_KIND_TAG: return "tag"; + case BTF_KIND_DECL_TAG: return "decl_tag"; default: return "unknown"; } } @@ -2264,6 +2325,13 @@ int parse_btf_map_def(const char *map_name, struct btf *btf, } map_def->pinning = val; map_def->parts |= MAP_DEF_PINNING; + } else if (strcmp(name, "map_extra") == 0) { + __u32 map_extra; + + if (!get_map_field_int(map_name, btf, m, &map_extra)) + return -EINVAL; + map_def->map_extra = map_extra; + map_def->parts |= MAP_DEF_MAP_EXTRA; } else { if (strict) { pr_warn("map '%s': unknown field '%s'.\n", map_name, name); @@ -2288,6 +2356,7 @@ static void fill_map_from_def(struct bpf_map *map, const struct btf_map_def *def map->def.value_size = def->value_size; map->def.max_entries = def->max_entries; map->def.map_flags = def->map_flags; + map->map_extra = def->map_extra; map->numa_node = def->numa_node; map->btf_key_type_id = def->key_type_id; @@ -2311,7 +2380,10 @@ static void fill_map_from_def(struct bpf_map *map, const struct btf_map_def *def if (def->parts & MAP_DEF_MAX_ENTRIES) pr_debug("map '%s': found max_entries = %u.\n", map->name, def->max_entries); if (def->parts & MAP_DEF_MAP_FLAGS) - pr_debug("map '%s': found map_flags = %u.\n", map->name, def->map_flags); + pr_debug("map '%s': found map_flags = 0x%x.\n", map->name, def->map_flags); + if (def->parts & MAP_DEF_MAP_EXTRA) + pr_debug("map '%s': found map_extra = 0x%llx.\n", map->name, + (unsigned long long)def->map_extra); if (def->parts & MAP_DEF_PINNING) pr_debug("map '%s': found pinning = %u.\n", map->name, def->pinning); if (def->parts & MAP_DEF_NUMA_NODE) @@ -2448,8 +2520,8 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict, return -EINVAL; } - nr_types = btf__get_nr_types(obj->btf); - for (i = 1; i <= nr_types; i++) { + nr_types = btf__type_cnt(obj->btf); + for (i = 1; i < nr_types; i++) { t = btf__type_by_id(obj->btf, i); if (!btf_is_datasec(t)) continue; @@ -2500,12 +2572,13 @@ static int bpf_object__init_maps(struct bpf_object *obj, static bool section_have_execinstr(struct bpf_object *obj, int idx) { - GElf_Shdr sh; + Elf64_Shdr *sh; - if (elf_sec_hdr(obj, elf_sec_by_idx(obj, idx), &sh)) + sh = elf_sec_hdr(obj, elf_sec_by_idx(obj, idx)); + if (!sh) return false; - return sh.sh_flags & SHF_EXECINSTR; + return sh->sh_flags & SHF_EXECINSTR; } static bool btf_needs_sanitization(struct bpf_object *obj) @@ -2514,9 +2587,9 @@ static bool btf_needs_sanitization(struct bpf_object *obj) bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC); bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT); bool has_func = kernel_supports(obj, FEAT_BTF_FUNC); - bool has_tag = kernel_supports(obj, FEAT_BTF_TAG); + bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG); - return !has_func || !has_datasec || !has_func_global || !has_float || !has_tag; + return !has_func || !has_datasec || !has_func_global || !has_float || !has_decl_tag; } static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf) @@ -2525,15 +2598,15 @@ static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf) bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC); bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT); bool has_func = kernel_supports(obj, FEAT_BTF_FUNC); - bool has_tag = kernel_supports(obj, FEAT_BTF_TAG); + bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG); struct btf_type *t; int i, j, vlen; - for (i = 1; i <= btf__get_nr_types(btf); i++) { + for (i = 1; i < btf__type_cnt(btf); i++) { t = (struct btf_type *)btf__type_by_id(btf, i); - if ((!has_datasec && btf_is_var(t)) || (!has_tag && btf_is_tag(t))) { - /* replace VAR/TAG with INT */ + if ((!has_datasec && btf_is_var(t)) || (!has_decl_tag && btf_is_decl_tag(t))) { + /* replace VAR/DECL_TAG with INT */ t->info = BTF_INFO_ENC(BTF_KIND_INT, 0, 0); /* * using size = 1 is the safest choice, 4 will be too @@ -2640,6 +2713,104 @@ out: return 0; } +static int compare_vsi_off(const void *_a, const void *_b) +{ + const struct btf_var_secinfo *a = _a; + const struct btf_var_secinfo *b = _b; + + return a->offset - b->offset; +} + +static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf, + struct btf_type *t) +{ + __u32 size = 0, off = 0, i, vars = btf_vlen(t); + const char *name = btf__name_by_offset(btf, t->name_off); + const struct btf_type *t_var; + struct btf_var_secinfo *vsi; + const struct btf_var *var; + int ret; + + if (!name) { + pr_debug("No name found in string section for DATASEC kind.\n"); + return -ENOENT; + } + + /* .extern datasec size and var offsets were set correctly during + * extern collection step, so just skip straight to sorting variables + */ + if (t->size) + goto sort_vars; + + ret = find_elf_sec_sz(obj, name, &size); + if (ret || !size || (t->size && t->size != size)) { + pr_debug("Invalid size for section %s: %u bytes\n", name, size); + return -ENOENT; + } + + t->size = size; + + for (i = 0, vsi = btf_var_secinfos(t); i < vars; i++, vsi++) { + t_var = btf__type_by_id(btf, vsi->type); + var = btf_var(t_var); + + if (!btf_is_var(t_var)) { + pr_debug("Non-VAR type seen in section %s\n", name); + return -EINVAL; + } + + if (var->linkage == BTF_VAR_STATIC) + continue; + + name = btf__name_by_offset(btf, t_var->name_off); + if (!name) { + pr_debug("No name found in string section for VAR kind\n"); + return -ENOENT; + } + + ret = find_elf_var_offset(obj, name, &off); + if (ret) { + pr_debug("No offset found in symbol table for VAR %s\n", + name); + return -ENOENT; + } + + vsi->offset = off; + } + +sort_vars: + qsort(btf_var_secinfos(t), vars, sizeof(*vsi), compare_vsi_off); + return 0; +} + +static int btf_finalize_data(struct bpf_object *obj, struct btf *btf) +{ + int err = 0; + __u32 i, n = btf__type_cnt(btf); + + for (i = 1; i < n; i++) { + struct btf_type *t = btf_type_by_id(btf, i); + + /* Loader needs to fix up some of the things compiler + * couldn't get its hands on while emitting BTF. This + * is section size and global variable offset. We use + * the info from the ELF itself for this purpose. + */ + if (btf_is_datasec(t)) { + err = btf_fixup_datasec(obj, btf, t); + if (err) + break; + } + } + + return libbpf_err(err); +} + +int btf__finalize_data(struct bpf_object *obj, struct btf *btf) +{ + return btf_finalize_data(obj, btf); +} + static int bpf_object__finalize_btf(struct bpf_object *obj) { int err; @@ -2647,7 +2818,7 @@ static int bpf_object__finalize_btf(struct bpf_object *obj) if (!obj->btf) return 0; - err = btf__finalize_data(obj, obj->btf); + err = btf_finalize_data(obj, obj->btf); if (err) { pr_warn("Error finalizing %s: %d.\n", BTF_ELF_SEC, err); return err; @@ -2757,8 +2928,8 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) if (!prog->mark_btf_static || !prog_is_subprog(obj, prog)) continue; - n = btf__get_nr_types(obj->btf); - for (j = 1; j <= n; j++) { + n = btf__type_cnt(obj->btf); + for (j = 1; j < n; j++) { t = btf_type_by_id(obj->btf, j); if (!btf_is_func(t) || btf_func_linkage(t) != BTF_FUNC_GLOBAL) continue; @@ -2778,7 +2949,7 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) __u32 sz; /* clone BTF to sanitize a copy and leave the original intact */ - raw_data = btf__get_raw_data(obj->btf, &sz); + raw_data = btf__raw_data(obj->btf, &sz); kern_btf = btf__new(raw_data, sz); err = libbpf_get_error(kern_btf); if (err) @@ -2791,7 +2962,7 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) if (obj->gen_loader) { __u32 raw_size = 0; - const void *raw_data = btf__get_raw_data(kern_btf, &raw_size); + const void *raw_data = btf__raw_data(kern_btf, &raw_size); if (!raw_data) return -ENOMEM; @@ -2883,32 +3054,36 @@ static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name) return NULL; } -static int elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn, GElf_Shdr *hdr) +static Elf64_Shdr *elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn) { + Elf64_Shdr *shdr; + if (!scn) - return -EINVAL; + return NULL; - if (gelf_getshdr(scn, hdr) != hdr) { + shdr = elf64_getshdr(scn); + if (!shdr) { pr_warn("elf: failed to get section(%zu) header from %s: %s\n", elf_ndxscn(scn), obj->path, elf_errmsg(-1)); - return -EINVAL; + return NULL; } - return 0; + return shdr; } static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn) { const char *name; - GElf_Shdr sh; + Elf64_Shdr *sh; if (!scn) return NULL; - if (elf_sec_hdr(obj, scn, &sh)) + sh = elf_sec_hdr(obj, scn); + if (!sh) return NULL; - name = elf_sec_str(obj, sh.sh_name); + name = elf_sec_str(obj, sh->sh_name); if (!name) { pr_warn("elf: failed to get section(%zu) name from %s: %s\n", elf_ndxscn(scn), obj->path, elf_errmsg(-1)); @@ -2936,13 +3111,29 @@ static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn) return data; } +static Elf64_Sym *elf_sym_by_idx(const struct bpf_object *obj, size_t idx) +{ + if (idx >= obj->efile.symbols->d_size / sizeof(Elf64_Sym)) + return NULL; + + return (Elf64_Sym *)obj->efile.symbols->d_buf + idx; +} + +static Elf64_Rel *elf_rel_by_idx(Elf_Data *data, size_t idx) +{ + if (idx >= data->d_size / sizeof(Elf64_Rel)) + return NULL; + + return (Elf64_Rel *)data->d_buf + idx; +} + static bool is_sec_name_dwarf(const char *name) { /* approximation, but the actual list is too long */ return str_has_pfx(name, ".debug_"); } -static bool ignore_elf_section(GElf_Shdr *hdr, const char *name) +static bool ignore_elf_section(Elf64_Shdr *hdr, const char *name) { /* no special handling of .strtab */ if (hdr->sh_type == SHT_STRTAB) @@ -2990,6 +3181,7 @@ static int cmp_progs(const void *_a, const void *_b) static int bpf_object__elf_collect(struct bpf_object *obj) { + struct elf_sec_desc *sec_desc; Elf *elf = obj->efile.elf; Elf_Data *btf_ext_data = NULL; Elf_Data *btf_data = NULL; @@ -2997,17 +3189,27 @@ static int bpf_object__elf_collect(struct bpf_object *obj) const char *name; Elf_Data *data; Elf_Scn *scn; - GElf_Shdr sh; + Elf64_Shdr *sh; + + /* ELF section indices are 1-based, so allocate +1 element to keep + * indexing simple. Also include 0th invalid section into sec_cnt for + * simpler and more traditional iteration logic. + */ + obj->efile.sec_cnt = 1 + obj->efile.ehdr->e_shnum; + obj->efile.secs = calloc(obj->efile.sec_cnt, sizeof(*obj->efile.secs)); + if (!obj->efile.secs) + return -ENOMEM; /* a bunch of ELF parsing functionality depends on processing symbols, * so do the first pass and find the symbol table */ scn = NULL; while ((scn = elf_nextscn(elf, scn)) != NULL) { - if (elf_sec_hdr(obj, scn, &sh)) + sh = elf_sec_hdr(obj, scn); + if (!sh) return -LIBBPF_ERRNO__FORMAT; - if (sh.sh_type == SHT_SYMTAB) { + if (sh->sh_type == SHT_SYMTAB) { if (obj->efile.symbols) { pr_warn("elf: multiple symbol tables in %s\n", obj->path); return -LIBBPF_ERRNO__FORMAT; @@ -3017,9 +3219,11 @@ static int bpf_object__elf_collect(struct bpf_object *obj) if (!data) return -LIBBPF_ERRNO__FORMAT; + idx = elf_ndxscn(scn); + obj->efile.symbols = data; - obj->efile.symbols_shndx = elf_ndxscn(scn); - obj->efile.strtabidx = sh.sh_link; + obj->efile.symbols_shndx = idx; + obj->efile.strtabidx = sh->sh_link; } } @@ -3031,16 +3235,18 @@ static int bpf_object__elf_collect(struct bpf_object *obj) scn = NULL; while ((scn = elf_nextscn(elf, scn)) != NULL) { - idx++; + idx = elf_ndxscn(scn); + sec_desc = &obj->efile.secs[idx]; - if (elf_sec_hdr(obj, scn, &sh)) + sh = elf_sec_hdr(obj, scn); + if (!sh) return -LIBBPF_ERRNO__FORMAT; - name = elf_sec_str(obj, sh.sh_name); + name = elf_sec_str(obj, sh->sh_name); if (!name) return -LIBBPF_ERRNO__FORMAT; - if (ignore_elf_section(&sh, name)) + if (ignore_elf_section(sh, name)) continue; data = elf_sec_data(obj, scn); @@ -3049,8 +3255,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj) pr_debug("elf: section(%d) %s, size %ld, link %d, flags %lx, type=%d\n", idx, name, (unsigned long)data->d_size, - (int)sh.sh_link, (unsigned long)sh.sh_flags, - (int)sh.sh_type); + (int)sh->sh_link, (unsigned long)sh->sh_flags, + (int)sh->sh_type); if (strcmp(name, "license") == 0) { err = bpf_object__init_license(obj, data->d_buf, data->d_size); @@ -3068,21 +3274,25 @@ static int bpf_object__elf_collect(struct bpf_object *obj) btf_data = data; } else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) { btf_ext_data = data; - } else if (sh.sh_type == SHT_SYMTAB) { + } else if (sh->sh_type == SHT_SYMTAB) { /* already processed during the first pass above */ - } else if (sh.sh_type == SHT_PROGBITS && data->d_size > 0) { - if (sh.sh_flags & SHF_EXECINSTR) { + } else if (sh->sh_type == SHT_PROGBITS && data->d_size > 0) { + if (sh->sh_flags & SHF_EXECINSTR) { if (strcmp(name, ".text") == 0) obj->efile.text_shndx = idx; err = bpf_object__add_programs(obj, data, name, idx); if (err) return err; - } else if (strcmp(name, DATA_SEC) == 0) { - obj->efile.data = data; - obj->efile.data_shndx = idx; - } else if (strcmp(name, RODATA_SEC) == 0) { - obj->efile.rodata = data; - obj->efile.rodata_shndx = idx; + } else if (strcmp(name, DATA_SEC) == 0 || + str_has_pfx(name, DATA_SEC ".")) { + sec_desc->sec_type = SEC_DATA; + sec_desc->shdr = sh; + sec_desc->data = data; + } else if (strcmp(name, RODATA_SEC) == 0 || + str_has_pfx(name, RODATA_SEC ".")) { + sec_desc->sec_type = SEC_RODATA; + sec_desc->shdr = sh; + sec_desc->data = data; } else if (strcmp(name, STRUCT_OPS_SEC) == 0) { obj->efile.st_ops_data = data; obj->efile.st_ops_shndx = idx; @@ -3090,37 +3300,29 @@ static int bpf_object__elf_collect(struct bpf_object *obj) pr_info("elf: skipping unrecognized data section(%d) %s\n", idx, name); } - } else if (sh.sh_type == SHT_REL) { - int nr_sects = obj->efile.nr_reloc_sects; - void *sects = obj->efile.reloc_sects; - int sec = sh.sh_info; /* points to other section */ + } else if (sh->sh_type == SHT_REL) { + int targ_sec_idx = sh->sh_info; /* points to other section */ /* Only do relo for section with exec instructions */ - if (!section_have_execinstr(obj, sec) && + if (!section_have_execinstr(obj, targ_sec_idx) && strcmp(name, ".rel" STRUCT_OPS_SEC) && strcmp(name, ".rel" MAPS_ELF_SEC)) { pr_info("elf: skipping relo section(%d) %s for section(%d) %s\n", - idx, name, sec, - elf_sec_name(obj, elf_sec_by_idx(obj, sec)) ?: "<?>"); + idx, name, targ_sec_idx, + elf_sec_name(obj, elf_sec_by_idx(obj, targ_sec_idx)) ?: "<?>"); continue; } - sects = libbpf_reallocarray(sects, nr_sects + 1, - sizeof(*obj->efile.reloc_sects)); - if (!sects) - return -ENOMEM; - - obj->efile.reloc_sects = sects; - obj->efile.nr_reloc_sects++; - - obj->efile.reloc_sects[nr_sects].shdr = sh; - obj->efile.reloc_sects[nr_sects].data = data; - } else if (sh.sh_type == SHT_NOBITS && strcmp(name, BSS_SEC) == 0) { - obj->efile.bss = data; - obj->efile.bss_shndx = idx; + sec_desc->sec_type = SEC_RELO; + sec_desc->shdr = sh; + sec_desc->data = data; + } else if (sh->sh_type == SHT_NOBITS && strcmp(name, BSS_SEC) == 0) { + sec_desc->sec_type = SEC_BSS; + sec_desc->shdr = sh; + sec_desc->data = data; } else { pr_info("elf: skipping section(%d) %s (size %zu)\n", idx, name, - (size_t)sh.sh_size); + (size_t)sh->sh_size); } } @@ -3136,19 +3338,19 @@ static int bpf_object__elf_collect(struct bpf_object *obj) return bpf_object__init_btf(obj, btf_data, btf_ext_data); } -static bool sym_is_extern(const GElf_Sym *sym) +static bool sym_is_extern(const Elf64_Sym *sym) { - int bind = GELF_ST_BIND(sym->st_info); + int bind = ELF64_ST_BIND(sym->st_info); /* externs are symbols w/ type=NOTYPE, bind=GLOBAL|WEAK, section=UND */ return sym->st_shndx == SHN_UNDEF && (bind == STB_GLOBAL || bind == STB_WEAK) && - GELF_ST_TYPE(sym->st_info) == STT_NOTYPE; + ELF64_ST_TYPE(sym->st_info) == STT_NOTYPE; } -static bool sym_is_subprog(const GElf_Sym *sym, int text_shndx) +static bool sym_is_subprog(const Elf64_Sym *sym, int text_shndx) { - int bind = GELF_ST_BIND(sym->st_info); - int type = GELF_ST_TYPE(sym->st_info); + int bind = ELF64_ST_BIND(sym->st_info); + int type = ELF64_ST_TYPE(sym->st_info); /* in .text section */ if (sym->st_shndx != text_shndx) @@ -3171,8 +3373,8 @@ static int find_extern_btf_id(const struct btf *btf, const char *ext_name) if (!btf) return -ESRCH; - n = btf__get_nr_types(btf); - for (i = 1; i <= n; i++) { + n = btf__type_cnt(btf); + for (i = 1; i < n; i++) { t = btf__type_by_id(btf, i); if (!btf_is_var(t) && !btf_is_func(t)) @@ -3203,8 +3405,8 @@ static int find_extern_sec_btf_id(struct btf *btf, int ext_btf_id) { if (!btf) return -ESRCH; - n = btf__get_nr_types(btf); - for (i = 1; i <= n; i++) { + n = btf__type_cnt(btf); + for (i = 1; i < n; i++) { t = btf__type_by_id(btf, i); if (!btf_is_datasec(t)) @@ -3288,8 +3490,8 @@ static int find_int_btf_id(const struct btf *btf) const struct btf_type *t; int i, n; - n = btf__get_nr_types(btf); - for (i = 1; i <= n; i++) { + n = btf__type_cnt(btf); + for (i = 1; i < n; i++) { t = btf__type_by_id(btf, i); if (btf_is_int(t) && btf_int_bits(t) == 32) @@ -3346,30 +3548,31 @@ static int bpf_object__collect_externs(struct bpf_object *obj) int i, n, off, dummy_var_btf_id; const char *ext_name, *sec_name; Elf_Scn *scn; - GElf_Shdr sh; + Elf64_Shdr *sh; if (!obj->efile.symbols) return 0; scn = elf_sec_by_idx(obj, obj->efile.symbols_shndx); - if (elf_sec_hdr(obj, scn, &sh)) + sh = elf_sec_hdr(obj, scn); + if (!sh) return -LIBBPF_ERRNO__FORMAT; dummy_var_btf_id = add_dummy_ksym_var(obj->btf); if (dummy_var_btf_id < 0) return dummy_var_btf_id; - n = sh.sh_size / sh.sh_entsize; + n = sh->sh_size / sh->sh_entsize; pr_debug("looking for externs among %d symbols...\n", n); for (i = 0; i < n; i++) { - GElf_Sym sym; + Elf64_Sym *sym = elf_sym_by_idx(obj, i); - if (!gelf_getsym(obj->efile.symbols, i, &sym)) + if (!sym) return -LIBBPF_ERRNO__FORMAT; - if (!sym_is_extern(&sym)) + if (!sym_is_extern(sym)) continue; - ext_name = elf_sym_str(obj, sym.st_name); + ext_name = elf_sym_str(obj, sym->st_name); if (!ext_name || !ext_name[0]) continue; @@ -3391,7 +3594,7 @@ static int bpf_object__collect_externs(struct bpf_object *obj) t = btf__type_by_id(obj->btf, ext->btf_id); ext->name = btf__name_by_offset(obj->btf, t->name_off); ext->sym_idx = i; - ext->is_weak = GELF_ST_BIND(sym.st_info) == STB_WEAK; + ext->is_weak = ELF64_ST_BIND(sym->st_info) == STB_WEAK; ext->sec_btf_id = find_extern_sec_btf_id(obj->btf, ext->btf_id); if (ext->sec_btf_id <= 0) { @@ -3429,11 +3632,6 @@ static int bpf_object__collect_externs(struct bpf_object *obj) return -ENOTSUP; } } else if (strcmp(sec_name, KSYMS_SEC) == 0) { - if (btf_is_func(t) && ext->is_weak) { - pr_warn("extern weak function %s is unsupported\n", - ext->name); - return -ENOTSUP; - } ksym_sec = sec; ext->type = EXT_KSYM; skip_mods_and_typedefs(obj->btf, t->type, @@ -3601,9 +3799,14 @@ bpf_object__find_program_by_name(const struct bpf_object *obj, static bool bpf_object__shndx_is_data(const struct bpf_object *obj, int shndx) { - return shndx == obj->efile.data_shndx || - shndx == obj->efile.bss_shndx || - shndx == obj->efile.rodata_shndx; + switch (obj->efile.secs[shndx].sec_type) { + case SEC_BSS: + case SEC_DATA: + case SEC_RODATA: + return true; + default: + return false; + } } static bool bpf_object__shndx_is_maps(const struct bpf_object *obj, @@ -3616,22 +3819,25 @@ static bool bpf_object__shndx_is_maps(const struct bpf_object *obj, static enum libbpf_map_type bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx) { - if (shndx == obj->efile.data_shndx) - return LIBBPF_MAP_DATA; - else if (shndx == obj->efile.bss_shndx) + if (shndx == obj->efile.symbols_shndx) + return LIBBPF_MAP_KCONFIG; + + switch (obj->efile.secs[shndx].sec_type) { + case SEC_BSS: return LIBBPF_MAP_BSS; - else if (shndx == obj->efile.rodata_shndx) + case SEC_DATA: + return LIBBPF_MAP_DATA; + case SEC_RODATA: return LIBBPF_MAP_RODATA; - else if (shndx == obj->efile.symbols_shndx) - return LIBBPF_MAP_KCONFIG; - else + default: return LIBBPF_MAP_UNSPEC; + } } static int bpf_program__record_reloc(struct bpf_program *prog, struct reloc_desc *reloc_desc, __u32 insn_idx, const char *sym_name, - const GElf_Sym *sym, const GElf_Rel *rel) + const Elf64_Sym *sym, const Elf64_Rel *rel) { struct bpf_insn *insn = &prog->insns[insn_idx]; size_t map_idx, nr_maps = prog->obj->nr_maps; @@ -3648,7 +3854,7 @@ static int bpf_program__record_reloc(struct bpf_program *prog, } if (sym_is_extern(sym)) { - int sym_idx = GELF_R_SYM(rel->r_info); + int sym_idx = ELF64_R_SYM(rel->r_info); int i, n = obj->nr_extern; struct extern_desc *ext; @@ -3761,7 +3967,7 @@ static int bpf_program__record_reloc(struct bpf_program *prog, } for (map_idx = 0; map_idx < nr_maps; map_idx++) { map = &obj->maps[map_idx]; - if (map->libbpf_type != type) + if (map->libbpf_type != type || map->sec_idx != sym->st_shndx) continue; pr_debug("prog '%s': found data map %zd (%s, sec %d, off %zu) for insn %u\n", prog->name, map_idx, map->name, map->sec_idx, @@ -3813,9 +4019,8 @@ static struct bpf_program *find_prog_by_sec_insn(const struct bpf_object *obj, } static int -bpf_object__collect_prog_relos(struct bpf_object *obj, GElf_Shdr *shdr, Elf_Data *data) +bpf_object__collect_prog_relos(struct bpf_object *obj, Elf64_Shdr *shdr, Elf_Data *data) { - Elf_Data *symbols = obj->efile.symbols; const char *relo_sec_name, *sec_name; size_t sec_idx = shdr->sh_info; struct bpf_program *prog; @@ -3825,8 +4030,8 @@ bpf_object__collect_prog_relos(struct bpf_object *obj, GElf_Shdr *shdr, Elf_Data __u32 insn_idx; Elf_Scn *scn; Elf_Data *scn_data; - GElf_Sym sym; - GElf_Rel rel; + Elf64_Sym *sym; + Elf64_Rel *rel; scn = elf_sec_by_idx(obj, sec_idx); scn_data = elf_sec_data(obj, scn); @@ -3841,33 +4046,36 @@ bpf_object__collect_prog_relos(struct bpf_object *obj, GElf_Shdr *shdr, Elf_Data nrels = shdr->sh_size / shdr->sh_entsize; for (i = 0; i < nrels; i++) { - if (!gelf_getrel(data, i, &rel)) { + rel = elf_rel_by_idx(data, i); + if (!rel) { pr_warn("sec '%s': failed to get relo #%d\n", relo_sec_name, i); return -LIBBPF_ERRNO__FORMAT; } - if (!gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym)) { + + sym = elf_sym_by_idx(obj, ELF64_R_SYM(rel->r_info)); + if (!sym) { pr_warn("sec '%s': symbol 0x%zx not found for relo #%d\n", - relo_sec_name, (size_t)GELF_R_SYM(rel.r_info), i); + relo_sec_name, (size_t)ELF64_R_SYM(rel->r_info), i); return -LIBBPF_ERRNO__FORMAT; } - if (rel.r_offset % BPF_INSN_SZ || rel.r_offset >= scn_data->d_size) { + if (rel->r_offset % BPF_INSN_SZ || rel->r_offset >= scn_data->d_size) { pr_warn("sec '%s': invalid offset 0x%zx for relo #%d\n", - relo_sec_name, (size_t)GELF_R_SYM(rel.r_info), i); + relo_sec_name, (size_t)ELF64_R_SYM(rel->r_info), i); return -LIBBPF_ERRNO__FORMAT; } - insn_idx = rel.r_offset / BPF_INSN_SZ; + insn_idx = rel->r_offset / BPF_INSN_SZ; /* relocations against static functions are recorded as * relocations against the section that contains a function; * in such case, symbol will be STT_SECTION and sym.st_name * will point to empty string (0), so fetch section name * instead */ - if (GELF_ST_TYPE(sym.st_info) == STT_SECTION && sym.st_name == 0) - sym_name = elf_sec_name(obj, elf_sec_by_idx(obj, sym.st_shndx)); + if (ELF64_ST_TYPE(sym->st_info) == STT_SECTION && sym->st_name == 0) + sym_name = elf_sec_name(obj, elf_sec_by_idx(obj, sym->st_shndx)); else - sym_name = elf_sym_str(obj, sym.st_name); + sym_name = elf_sym_str(obj, sym->st_name); sym_name = sym_name ?: "<?"; pr_debug("sec '%s': relo #%d: insn #%u against '%s'\n", @@ -3889,7 +4097,7 @@ bpf_object__collect_prog_relos(struct bpf_object *obj, GElf_Shdr *shdr, Elf_Data /* adjust insn_idx to local BPF program frame of reference */ insn_idx -= prog->sec_insn_off; err = bpf_program__record_reloc(prog, &relos[prog->nr_reloc], - insn_idx, sym_name, &sym, &rel); + insn_idx, sym_name, sym, rel); if (err) return err; @@ -3921,8 +4129,7 @@ static int bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map) * LLVM annotates global data differently in BTF, that is, * only as '.data', '.bss' or '.rodata'. */ - ret = btf__find_by_name(obj->btf, - libbpf_type_to_btf_name[map->libbpf_type]); + ret = btf__find_by_name(obj->btf, map->real_name); } if (ret < 0) return ret; @@ -4015,6 +4222,7 @@ int bpf_map__reuse_fd(struct bpf_map *map, int fd) map->btf_key_type_id = info.btf_key_type_id; map->btf_value_type_id = info.btf_value_type_id; map->reused = true; + map->map_extra = info.map_extra; return 0; @@ -4243,7 +4451,7 @@ static int probe_kern_btf_float(void) strs, sizeof(strs))); } -static int probe_kern_btf_tag(void) +static int probe_kern_btf_decl_tag(void) { static const char strs[] = "\0tag"; __u32 types[] = { @@ -4253,7 +4461,7 @@ static int probe_kern_btf_tag(void) BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1), BTF_VAR_STATIC, /* attr */ - BTF_TYPE_TAG_ENC(1, 2, -1), + BTF_TYPE_DECL_TAG_ENC(1, 2, -1), }; return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), @@ -4476,8 +4684,8 @@ static struct kern_feature_desc { [FEAT_PERF_LINK] = { "BPF perf link support", probe_perf_link, }, - [FEAT_BTF_TAG] = { - "BTF_KIND_TAG support", probe_kern_btf_tag, + [FEAT_BTF_DECL_TAG] = { + "BTF_KIND_DECL_TAG support", probe_kern_btf_decl_tag, }, }; @@ -4529,7 +4737,8 @@ static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd) map_info.key_size == map->def.key_size && map_info.value_size == map->def.value_size && map_info.max_entries == map->def.max_entries && - map_info.map_flags == map->def.map_flags); + map_info.map_flags == map->def.map_flags && + map_info.map_extra == map->map_extra); } static int @@ -4612,7 +4821,7 @@ static void bpf_map__destroy(struct bpf_map *map); static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, bool is_inner) { - struct bpf_create_map_attr create_attr; + struct bpf_create_map_params create_attr; struct bpf_map_def *def = &map->def; int err = 0; @@ -4626,6 +4835,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b create_attr.key_size = def->key_size; create_attr.value_size = def->value_size; create_attr.numa_node = map->numa_node; + create_attr.map_extra = map->map_extra; if (def->type == BPF_MAP_TYPE_PERF_EVENT_ARRAY && !def->max_entries) { int nr_cpus; @@ -4700,7 +4910,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b */ map->fd = 0; } else { - map->fd = bpf_create_map_xattr(&create_attr); + map->fd = libbpf__bpf_create_map_xattr(&create_attr); } if (map->fd < 0 && (create_attr.btf_key_type_id || create_attr.btf_value_type_id)) { @@ -4715,7 +4925,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b create_attr.btf_value_type_id = 0; map->btf_key_type_id = 0; map->btf_value_type_id = 0; - map->fd = bpf_create_map_xattr(&create_attr); + map->fd = libbpf__bpf_create_map_xattr(&create_attr); } err = map->fd < 0 ? -errno : 0; @@ -4892,8 +5102,8 @@ static int bpf_core_add_cands(struct bpf_core_cand *local_cand, size_t targ_essent_len; int n, i; - n = btf__get_nr_types(targ_btf); - for (i = targ_start_id; i <= n; i++) { + n = btf__type_cnt(targ_btf); + for (i = targ_start_id; i < n; i++) { t = btf__type_by_id(targ_btf, i); if (btf_kind(t) != btf_kind(local_cand->t)) continue; @@ -5068,7 +5278,7 @@ bpf_core_find_cands(struct bpf_object *obj, const struct btf *local_btf, __u32 l err = bpf_core_add_cands(&local_cand, local_essent_len, obj->btf_modules[i].btf, obj->btf_modules[i].name, - btf__get_nr_types(obj->btf_vmlinux) + 1, + btf__type_cnt(obj->btf_vmlinux), cands); if (err) goto err_out; @@ -5212,7 +5422,7 @@ static int bpf_core_apply_relo(struct bpf_program *prog, * relocated, so it's enough to just subtract in-section offset */ insn_idx = insn_idx - prog->sec_insn_off; - if (insn_idx > prog->insns_cnt) + if (insn_idx >= prog->insns_cnt) return -EINVAL; insn = &prog->insns[insn_idx]; @@ -5406,7 +5616,13 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog) case RELO_EXTERN_FUNC: ext = &obj->externs[relo->sym_off]; insn[0].src_reg = BPF_PSEUDO_KFUNC_CALL; - insn[0].imm = ext->ksym.kernel_btf_id; + if (ext->is_set) { + insn[0].imm = ext->ksym.kernel_btf_id; + insn[0].off = ext->ksym.btf_fd_idx; + } else { /* unresolved weak kfunc */ + insn[0].imm = 0; + insn[0].off = 0; + } break; case RELO_SUBPROG_ADDR: if (insn[0].src_reg != BPF_PSEUDO_FUNC) { @@ -5931,10 +6147,10 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) } static int bpf_object__collect_st_ops_relos(struct bpf_object *obj, - GElf_Shdr *shdr, Elf_Data *data); + Elf64_Shdr *shdr, Elf_Data *data); static int bpf_object__collect_map_relos(struct bpf_object *obj, - GElf_Shdr *shdr, Elf_Data *data) + Elf64_Shdr *shdr, Elf_Data *data) { const int bpf_ptr_sz = 8, host_ptr_sz = sizeof(void *); int i, j, nrels, new_sz; @@ -5943,10 +6159,9 @@ static int bpf_object__collect_map_relos(struct bpf_object *obj, struct bpf_map *map = NULL, *targ_map; const struct btf_member *member; const char *name, *mname; - Elf_Data *symbols; unsigned int moff; - GElf_Sym sym; - GElf_Rel rel; + Elf64_Sym *sym; + Elf64_Rel *rel; void *tmp; if (!obj->efile.btf_maps_sec_btf_id || !obj->btf) @@ -5955,28 +6170,30 @@ static int bpf_object__collect_map_relos(struct bpf_object *obj, if (!sec) return -EINVAL; - symbols = obj->efile.symbols; nrels = shdr->sh_size / shdr->sh_entsize; for (i = 0; i < nrels; i++) { - if (!gelf_getrel(data, i, &rel)) { + rel = elf_rel_by_idx(data, i); + if (!rel) { pr_warn(".maps relo #%d: failed to get ELF relo\n", i); return -LIBBPF_ERRNO__FORMAT; } - if (!gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym)) { + + sym = elf_sym_by_idx(obj, ELF64_R_SYM(rel->r_info)); + if (!sym) { pr_warn(".maps relo #%d: symbol %zx not found\n", - i, (size_t)GELF_R_SYM(rel.r_info)); + i, (size_t)ELF64_R_SYM(rel->r_info)); return -LIBBPF_ERRNO__FORMAT; } - name = elf_sym_str(obj, sym.st_name) ?: "<?>"; - if (sym.st_shndx != obj->efile.btf_maps_shndx) { + name = elf_sym_str(obj, sym->st_name) ?: "<?>"; + if (sym->st_shndx != obj->efile.btf_maps_shndx) { pr_warn(".maps relo #%d: '%s' isn't a BTF-defined map\n", i, name); return -LIBBPF_ERRNO__RELOC; } - pr_debug(".maps relo #%d: for %zd value %zd rel.r_offset %zu name %d ('%s')\n", - i, (ssize_t)(rel.r_info >> 32), (size_t)sym.st_value, - (size_t)rel.r_offset, sym.st_name, name); + pr_debug(".maps relo #%d: for %zd value %zd rel->r_offset %zu name %d ('%s')\n", + i, (ssize_t)(rel->r_info >> 32), (size_t)sym->st_value, + (size_t)rel->r_offset, sym->st_name, name); for (j = 0; j < obj->nr_maps; j++) { map = &obj->maps[j]; @@ -5984,13 +6201,13 @@ static int bpf_object__collect_map_relos(struct bpf_object *obj, continue; vi = btf_var_secinfos(sec) + map->btf_var_idx; - if (vi->offset <= rel.r_offset && - rel.r_offset + bpf_ptr_sz <= vi->offset + vi->size) + if (vi->offset <= rel->r_offset && + rel->r_offset + bpf_ptr_sz <= vi->offset + vi->size) break; } if (j == obj->nr_maps) { - pr_warn(".maps relo #%d: cannot find map '%s' at rel.r_offset %zu\n", - i, name, (size_t)rel.r_offset); + pr_warn(".maps relo #%d: cannot find map '%s' at rel->r_offset %zu\n", + i, name, (size_t)rel->r_offset); return -EINVAL; } @@ -6017,10 +6234,10 @@ static int bpf_object__collect_map_relos(struct bpf_object *obj, return -EINVAL; moff = btf_member_bit_offset(def, btf_vlen(def) - 1) / 8; - if (rel.r_offset - vi->offset < moff) + if (rel->r_offset - vi->offset < moff) return -EINVAL; - moff = rel.r_offset - vi->offset - moff; + moff = rel->r_offset - vi->offset - moff; /* here we use BPF pointer size, which is always 64 bit, as we * are parsing ELF that was built for BPF target */ @@ -6065,10 +6282,18 @@ static int bpf_object__collect_relos(struct bpf_object *obj) { int i, err; - for (i = 0; i < obj->efile.nr_reloc_sects; i++) { - GElf_Shdr *shdr = &obj->efile.reloc_sects[i].shdr; - Elf_Data *data = obj->efile.reloc_sects[i].data; - int idx = shdr->sh_info; + for (i = 0; i < obj->efile.sec_cnt; i++) { + struct elf_sec_desc *sec_desc = &obj->efile.secs[i]; + Elf64_Shdr *shdr; + Elf_Data *data; + int idx; + + if (sec_desc->sec_type != SEC_RELO) + continue; + + shdr = sec_desc->shdr; + data = sec_desc->data; + idx = shdr->sh_info; if (shdr->sh_type != SHT_REL) { pr_warn("internal error at %d\n", __LINE__); @@ -6191,6 +6416,7 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, char *license, __u32 kern_version, int *pfd) { struct bpf_prog_load_params load_attr = {}; + struct bpf_object *obj = prog->obj; char *cp, errmsg[STRERR_BUFSIZE]; size_t log_buf_size = 0; char *log_buf = NULL; @@ -6211,7 +6437,7 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, load_attr.prog_type = prog->type; load_attr.expected_attach_type = prog->expected_attach_type; - if (kernel_supports(prog->obj, FEAT_PROG_NAME)) + if (kernel_supports(obj, FEAT_PROG_NAME)) load_attr.name = prog->name; load_attr.insns = insns; load_attr.insn_cnt = insns_cnt; @@ -6224,8 +6450,8 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, load_attr.prog_ifindex = prog->prog_ifindex; /* specify func_info/line_info only if kernel supports them */ - btf_fd = bpf_object__btf_fd(prog->obj); - if (btf_fd >= 0 && kernel_supports(prog->obj, FEAT_BTF_FUNC)) { + btf_fd = bpf_object__btf_fd(obj); + if (btf_fd >= 0 && kernel_supports(obj, FEAT_BTF_FUNC)) { load_attr.prog_btf_fd = btf_fd; load_attr.func_info = prog->func_info; load_attr.func_info_rec_size = prog->func_info_rec_size; @@ -6236,6 +6462,7 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, } load_attr.log_level = prog->log_level; load_attr.prog_flags = prog->prog_flags; + load_attr.fd_array = obj->fd_array; /* adjust load_attr if sec_def provides custom preload callback */ if (prog->sec_def && prog->sec_def->preload_fn) { @@ -6247,9 +6474,9 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, } } - if (prog->obj->gen_loader) { - bpf_gen__prog_load(prog->obj->gen_loader, &load_attr, - prog - prog->obj->programs); + if (obj->gen_loader) { + bpf_gen__prog_load(obj->gen_loader, &load_attr, + prog - obj->programs); *pfd = -1; return 0; } @@ -6270,16 +6497,21 @@ retry_load: if (log_buf && load_attr.log_level) pr_debug("verifier log:\n%s", log_buf); - if (prog->obj->rodata_map_idx >= 0 && - kernel_supports(prog->obj, FEAT_PROG_BIND_MAP)) { - struct bpf_map *rodata_map = - &prog->obj->maps[prog->obj->rodata_map_idx]; + if (obj->has_rodata && kernel_supports(obj, FEAT_PROG_BIND_MAP)) { + struct bpf_map *map; + int i; + + for (i = 0; i < obj->nr_maps; i++) { + map = &prog->obj->maps[i]; + if (map->libbpf_type != LIBBPF_MAP_RODATA) + continue; - if (bpf_prog_bind_map(ret, bpf_map__fd(rodata_map), NULL)) { - cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); - pr_warn("prog '%s': failed to bind .rodata map: %s\n", - prog->name, cp); - /* Don't fail hard if can't bind rodata. */ + if (bpf_prog_bind_map(ret, bpf_map__fd(map), NULL)) { + cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); + pr_warn("prog '%s': failed to bind .rodata map: %s\n", + prog->name, cp); + /* Don't fail hard if can't bind rodata. */ + } } } @@ -6343,16 +6575,13 @@ static int bpf_program__record_externs(struct bpf_program *prog) case RELO_EXTERN_VAR: if (ext->type != EXT_KSYM) continue; - if (!ext->ksym.type_id) { - pr_warn("typeless ksym %s is not supported yet\n", - ext->name); - return -ENOTSUP; - } - bpf_gen__record_extern(obj->gen_loader, ext->name, BTF_KIND_VAR, - relo->insn_idx); + bpf_gen__record_extern(obj->gen_loader, ext->name, + ext->is_weak, !ext->ksym.type_id, + BTF_KIND_VAR, relo->insn_idx); break; case RELO_EXTERN_FUNC: - bpf_gen__record_extern(obj->gen_loader, ext->name, BTF_KIND_FUNC, + bpf_gen__record_extern(obj->gen_loader, ext->name, + ext->is_weak, false, BTF_KIND_FUNC, relo->insn_idx); break; default: @@ -6438,8 +6667,6 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver) out: if (err) pr_warn("failed to load program '%s'\n", prog->name); - zfree(&prog->insns); - prog->insns_cnt = 0; return libbpf_err(err); } @@ -6660,7 +6887,7 @@ bpf_object__open_buffer(const void *obj_buf, size_t obj_buf_sz, return libbpf_ptr(__bpf_object__open(NULL, obj_buf, obj_buf_sz, &opts)); } -int bpf_object__unload(struct bpf_object *obj) +static int bpf_object_unload(struct bpf_object *obj) { size_t i; @@ -6679,6 +6906,8 @@ int bpf_object__unload(struct bpf_object *obj) return 0; } +int bpf_object__unload(struct bpf_object *obj) __attribute__((alias("bpf_object_unload"))); + static int bpf_object__sanitize_maps(struct bpf_object *obj) { struct bpf_map *m; @@ -6752,13 +6981,14 @@ out: static int find_ksym_btf_id(struct bpf_object *obj, const char *ksym_name, __u16 kind, struct btf **res_btf, - int *res_btf_fd) + struct module_btf **res_mod_btf) { - int i, id, btf_fd, err; + struct module_btf *mod_btf; struct btf *btf; + int i, id, err; btf = obj->btf_vmlinux; - btf_fd = 0; + mod_btf = NULL; id = btf__find_by_name_kind(btf, ksym_name, kind); if (id == -ENOENT) { @@ -6767,10 +6997,10 @@ static int find_ksym_btf_id(struct bpf_object *obj, const char *ksym_name, return err; for (i = 0; i < obj->btf_module_cnt; i++) { - btf = obj->btf_modules[i].btf; - /* we assume module BTF FD is always >0 */ - btf_fd = obj->btf_modules[i].fd; - id = btf__find_by_name_kind(btf, ksym_name, kind); + /* we assume module_btf's BTF FD is always >0 */ + mod_btf = &obj->btf_modules[i]; + btf = mod_btf->btf; + id = btf__find_by_name_kind_own(btf, ksym_name, kind); if (id != -ENOENT) break; } @@ -6779,7 +7009,7 @@ static int find_ksym_btf_id(struct bpf_object *obj, const char *ksym_name, return -ESRCH; *res_btf = btf; - *res_btf_fd = btf_fd; + *res_mod_btf = mod_btf; return id; } @@ -6788,14 +7018,15 @@ static int bpf_object__resolve_ksym_var_btf_id(struct bpf_object *obj, { const struct btf_type *targ_var, *targ_type; __u32 targ_type_id, local_type_id; + struct module_btf *mod_btf = NULL; const char *targ_var_name; - int id, btf_fd = 0, err; struct btf *btf = NULL; + int id, err; - id = find_ksym_btf_id(obj, ext->name, BTF_KIND_VAR, &btf, &btf_fd); - if (id == -ESRCH && ext->is_weak) { - return 0; - } else if (id < 0) { + id = find_ksym_btf_id(obj, ext->name, BTF_KIND_VAR, &btf, &mod_btf); + if (id < 0) { + if (id == -ESRCH && ext->is_weak) + return 0; pr_warn("extern (var ksym) '%s': not found in kernel BTF\n", ext->name); return id; @@ -6827,7 +7058,7 @@ static int bpf_object__resolve_ksym_var_btf_id(struct bpf_object *obj, } ext->is_set = true; - ext->ksym.kernel_btf_obj_fd = btf_fd; + ext->ksym.kernel_btf_obj_fd = mod_btf ? mod_btf->fd : 0; ext->ksym.kernel_btf_id = id; pr_debug("extern (var ksym) '%s': resolved to [%d] %s %s\n", ext->name, id, btf_kind_str(targ_var), targ_var_name); @@ -6839,26 +7070,22 @@ static int bpf_object__resolve_ksym_func_btf_id(struct bpf_object *obj, struct extern_desc *ext) { int local_func_proto_id, kfunc_proto_id, kfunc_id; + struct module_btf *mod_btf = NULL; const struct btf_type *kern_func; struct btf *kern_btf = NULL; - int ret, kern_btf_fd = 0; + int ret; local_func_proto_id = ext->ksym.type_id; - kfunc_id = find_ksym_btf_id(obj, ext->name, BTF_KIND_FUNC, - &kern_btf, &kern_btf_fd); + kfunc_id = find_ksym_btf_id(obj, ext->name, BTF_KIND_FUNC, &kern_btf, &mod_btf); if (kfunc_id < 0) { - pr_warn("extern (func ksym) '%s': not found in kernel BTF\n", + if (kfunc_id == -ESRCH && ext->is_weak) + return 0; + pr_warn("extern (func ksym) '%s': not found in kernel or module BTFs\n", ext->name); return kfunc_id; } - if (kern_btf != obj->btf_vmlinux) { - pr_warn("extern (func ksym) '%s': function in kernel module is not supported\n", - ext->name); - return -ENOTSUP; - } - kern_func = btf__type_by_id(kern_btf, kfunc_id); kfunc_proto_id = kern_func->type; @@ -6870,9 +7097,30 @@ static int bpf_object__resolve_ksym_func_btf_id(struct bpf_object *obj, return -EINVAL; } + /* set index for module BTF fd in fd_array, if unset */ + if (mod_btf && !mod_btf->fd_array_idx) { + /* insn->off is s16 */ + if (obj->fd_array_cnt == INT16_MAX) { + pr_warn("extern (func ksym) '%s': module BTF fd index %d too big to fit in bpf_insn offset\n", + ext->name, mod_btf->fd_array_idx); + return -E2BIG; + } + /* Cannot use index 0 for module BTF fd */ + if (!obj->fd_array_cnt) + obj->fd_array_cnt = 1; + + ret = libbpf_ensure_mem((void **)&obj->fd_array, &obj->fd_array_cap, sizeof(int), + obj->fd_array_cnt + 1); + if (ret) + return ret; + mod_btf->fd_array_idx = obj->fd_array_cnt; + /* we assume module BTF FD is always >0 */ + obj->fd_array[obj->fd_array_cnt++] = mod_btf->fd; + } + ext->is_set = true; - ext->ksym.kernel_btf_obj_fd = kern_btf_fd; ext->ksym.kernel_btf_id = kfunc_id; + ext->ksym.btf_fd_idx = mod_btf ? mod_btf->fd_array_idx : 0; pr_debug("extern (func ksym) '%s': resolved to kernel [%d]\n", ext->name, kfunc_id); @@ -7032,6 +7280,9 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr) err = bpf_gen__finish(obj->gen_loader); } + /* clean up fd_array */ + zfree(&obj->fd_array); + /* clean up module BTFs */ for (i = 0; i < obj->btf_module_cnt; i++) { close(obj->btf_modules[i].fd); @@ -7056,7 +7307,7 @@ out: if (obj->maps[i].pinned && !obj->maps[i].reused) bpf_map__unpin(&obj->maps[i], NULL); - bpf_object__unload(obj); + bpf_object_unload(obj); pr_warn("failed to load object '%s'\n", obj->path); return libbpf_err(err); } @@ -7122,8 +7373,7 @@ static int check_path(const char *path) return err; } -int bpf_program__pin_instance(struct bpf_program *prog, const char *path, - int instance) +static int bpf_program_pin_instance(struct bpf_program *prog, const char *path, int instance) { char *cp, errmsg[STRERR_BUFSIZE]; int err; @@ -7158,8 +7408,7 @@ int bpf_program__pin_instance(struct bpf_program *prog, const char *path, return 0; } -int bpf_program__unpin_instance(struct bpf_program *prog, const char *path, - int instance) +static int bpf_program_unpin_instance(struct bpf_program *prog, const char *path, int instance) { int err; @@ -7187,6 +7436,12 @@ int bpf_program__unpin_instance(struct bpf_program *prog, const char *path, return 0; } +__attribute__((alias("bpf_program_pin_instance"))) +int bpf_object__pin_instance(struct bpf_program *prog, const char *path, int instance); + +__attribute__((alias("bpf_program_unpin_instance"))) +int bpf_program__unpin_instance(struct bpf_program *prog, const char *path, int instance); + int bpf_program__pin(struct bpf_program *prog, const char *path) { int i, err; @@ -7211,7 +7466,7 @@ int bpf_program__pin(struct bpf_program *prog, const char *path) if (prog->instances.nr == 1) { /* don't create subdirs when pinning single instance */ - return bpf_program__pin_instance(prog, path, 0); + return bpf_program_pin_instance(prog, path, 0); } for (i = 0; i < prog->instances.nr; i++) { @@ -7227,7 +7482,7 @@ int bpf_program__pin(struct bpf_program *prog, const char *path) goto err_unpin; } - err = bpf_program__pin_instance(prog, buf, i); + err = bpf_program_pin_instance(prog, buf, i); if (err) goto err_unpin; } @@ -7245,7 +7500,7 @@ err_unpin: else if (len >= PATH_MAX) continue; - bpf_program__unpin_instance(prog, buf, i); + bpf_program_unpin_instance(prog, buf, i); } rmdir(path); @@ -7273,7 +7528,7 @@ int bpf_program__unpin(struct bpf_program *prog, const char *path) if (prog->instances.nr == 1) { /* don't create subdirs when pinning single instance */ - return bpf_program__unpin_instance(prog, path, 0); + return bpf_program_unpin_instance(prog, path, 0); } for (i = 0; i < prog->instances.nr; i++) { @@ -7286,7 +7541,7 @@ int bpf_program__unpin(struct bpf_program *prog, const char *path) else if (len >= PATH_MAX) return libbpf_err(-ENAMETOOLONG); - err = bpf_program__unpin_instance(prog, buf, i); + err = bpf_program_unpin_instance(prog, buf, i); if (err) return err; } @@ -7647,6 +7902,7 @@ static void bpf_map__destroy(struct bpf_map *map) } zfree(&map->name); + zfree(&map->real_name); zfree(&map->pin_path); if (map->fd >= 0) @@ -7665,7 +7921,7 @@ void bpf_object__close(struct bpf_object *obj) bpf_gen__free(obj->gen_loader); bpf_object__elf_finish(obj); - bpf_object__unload(obj); + bpf_object_unload(obj); btf__free(obj->btf); btf_ext__free(obj->btf_ext); @@ -7694,6 +7950,10 @@ struct bpf_object * bpf_object__next(struct bpf_object *prev) { struct bpf_object *next; + bool strict = (libbpf_mode & LIBBPF_STRICT_NO_OBJECT_LIST); + + if (strict) + return NULL; if (!prev) next = list_first_entry(&bpf_objects_list, @@ -7800,6 +8060,12 @@ __bpf_program__iter(const struct bpf_program *p, const struct bpf_object *obj, struct bpf_program * bpf_program__next(struct bpf_program *prev, const struct bpf_object *obj) { + return bpf_object__next_program(obj, prev); +} + +struct bpf_program * +bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prev) +{ struct bpf_program *prog = prev; do { @@ -7812,6 +8078,12 @@ bpf_program__next(struct bpf_program *prev, const struct bpf_object *obj) struct bpf_program * bpf_program__prev(struct bpf_program *next, const struct bpf_object *obj) { + return bpf_object__prev_program(obj, next); +} + +struct bpf_program * +bpf_object__prev_program(const struct bpf_object *obj, struct bpf_program *next) +{ struct bpf_program *prog = next; do { @@ -7892,6 +8164,16 @@ size_t bpf_program__size(const struct bpf_program *prog) return prog->insns_cnt * BPF_INSN_SZ; } +const struct bpf_insn *bpf_program__insns(const struct bpf_program *prog) +{ + return prog->insns; +} + +size_t bpf_program__insn_cnt(const struct bpf_program *prog) +{ + return prog->insns_cnt; +} + int bpf_program__set_prep(struct bpf_program *prog, int nr_instances, bpf_program_prep_t prep) { @@ -8030,6 +8312,8 @@ static const struct bpf_sec_def section_defs[] = { SEC_DEF("tp/", TRACEPOINT, 0, SEC_NONE, attach_tp), SEC_DEF("raw_tracepoint/", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp), SEC_DEF("raw_tp/", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp), + SEC_DEF("raw_tracepoint.w/", RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp), + SEC_DEF("raw_tp.w/", RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp), SEC_DEF("tp_btf/", TRACING, BPF_TRACE_RAW_TP, SEC_ATTACH_BTF, attach_trace), SEC_DEF("fentry/", TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF, attach_trace), SEC_DEF("fmod_ret/", TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF, attach_trace), @@ -8219,7 +8503,7 @@ static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj, /* Collect the reloc from ELF and populate the st_ops->progs[] */ static int bpf_object__collect_st_ops_relos(struct bpf_object *obj, - GElf_Shdr *shdr, Elf_Data *data) + Elf64_Shdr *shdr, Elf_Data *data) { const struct btf_member *member; struct bpf_struct_ops *st_ops; @@ -8227,58 +8511,58 @@ static int bpf_object__collect_st_ops_relos(struct bpf_object *obj, unsigned int shdr_idx; const struct btf *btf; struct bpf_map *map; - Elf_Data *symbols; unsigned int moff, insn_idx; const char *name; __u32 member_idx; - GElf_Sym sym; - GElf_Rel rel; + Elf64_Sym *sym; + Elf64_Rel *rel; int i, nrels; - symbols = obj->efile.symbols; btf = obj->btf; nrels = shdr->sh_size / shdr->sh_entsize; for (i = 0; i < nrels; i++) { - if (!gelf_getrel(data, i, &rel)) { + rel = elf_rel_by_idx(data, i); + if (!rel) { pr_warn("struct_ops reloc: failed to get %d reloc\n", i); return -LIBBPF_ERRNO__FORMAT; } - if (!gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym)) { + sym = elf_sym_by_idx(obj, ELF64_R_SYM(rel->r_info)); + if (!sym) { pr_warn("struct_ops reloc: symbol %zx not found\n", - (size_t)GELF_R_SYM(rel.r_info)); + (size_t)ELF64_R_SYM(rel->r_info)); return -LIBBPF_ERRNO__FORMAT; } - name = elf_sym_str(obj, sym.st_name) ?: "<?>"; - map = find_struct_ops_map_by_offset(obj, rel.r_offset); + name = elf_sym_str(obj, sym->st_name) ?: "<?>"; + map = find_struct_ops_map_by_offset(obj, rel->r_offset); if (!map) { - pr_warn("struct_ops reloc: cannot find map at rel.r_offset %zu\n", - (size_t)rel.r_offset); + pr_warn("struct_ops reloc: cannot find map at rel->r_offset %zu\n", + (size_t)rel->r_offset); return -EINVAL; } - moff = rel.r_offset - map->sec_offset; - shdr_idx = sym.st_shndx; + moff = rel->r_offset - map->sec_offset; + shdr_idx = sym->st_shndx; st_ops = map->st_ops; - pr_debug("struct_ops reloc %s: for %lld value %lld shdr_idx %u rel.r_offset %zu map->sec_offset %zu name %d (\'%s\')\n", + pr_debug("struct_ops reloc %s: for %lld value %lld shdr_idx %u rel->r_offset %zu map->sec_offset %zu name %d (\'%s\')\n", map->name, - (long long)(rel.r_info >> 32), - (long long)sym.st_value, - shdr_idx, (size_t)rel.r_offset, - map->sec_offset, sym.st_name, name); + (long long)(rel->r_info >> 32), + (long long)sym->st_value, + shdr_idx, (size_t)rel->r_offset, + map->sec_offset, sym->st_name, name); if (shdr_idx >= SHN_LORESERVE) { - pr_warn("struct_ops reloc %s: rel.r_offset %zu shdr_idx %u unsupported non-static function\n", - map->name, (size_t)rel.r_offset, shdr_idx); + pr_warn("struct_ops reloc %s: rel->r_offset %zu shdr_idx %u unsupported non-static function\n", + map->name, (size_t)rel->r_offset, shdr_idx); return -LIBBPF_ERRNO__RELOC; } - if (sym.st_value % BPF_INSN_SZ) { + if (sym->st_value % BPF_INSN_SZ) { pr_warn("struct_ops reloc %s: invalid target program offset %llu\n", - map->name, (unsigned long long)sym.st_value); + map->name, (unsigned long long)sym->st_value); return -LIBBPF_ERRNO__FORMAT; } - insn_idx = sym.st_value / BPF_INSN_SZ; + insn_idx = sym->st_value / BPF_INSN_SZ; member = find_member_by_offset(st_ops->type, moff * 8); if (!member) { @@ -8412,28 +8696,27 @@ int libbpf_find_vmlinux_btf_id(const char *name, static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd) { - struct bpf_prog_info_linear *info_linear; - struct bpf_prog_info *info; + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); struct btf *btf; int err; - info_linear = bpf_program__get_prog_info_linear(attach_prog_fd, 0); - err = libbpf_get_error(info_linear); + err = bpf_obj_get_info_by_fd(attach_prog_fd, &info, &info_len); if (err) { - pr_warn("failed get_prog_info_linear for FD %d\n", - attach_prog_fd); + pr_warn("failed bpf_obj_get_info_by_fd for FD %d: %d\n", + attach_prog_fd, err); return err; } err = -EINVAL; - info = &info_linear->info; - if (!info->btf_id) { + if (!info.btf_id) { pr_warn("The target program doesn't have BTF\n"); goto out; } - btf = btf__load_from_kernel_by_id(info->btf_id); - if (libbpf_get_error(btf)) { - pr_warn("Failed to get BTF of the program\n"); + btf = btf__load_from_kernel_by_id(info.btf_id); + err = libbpf_get_error(btf); + if (err) { + pr_warn("Failed to get BTF %d of the program: %d\n", info.btf_id, err); goto out; } err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC); @@ -8443,7 +8726,6 @@ static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd) goto out; } out: - free(info_linear); return err; } @@ -8559,9 +8841,30 @@ const struct bpf_map_def *bpf_map__def(const struct bpf_map *map) return map ? &map->def : libbpf_err_ptr(-EINVAL); } +static bool map_uses_real_name(const struct bpf_map *map) +{ + /* Since libbpf started to support custom .data.* and .rodata.* maps, + * their user-visible name differs from kernel-visible name. Users see + * such map's corresponding ELF section name as a map name. + * This check distinguishes .data/.rodata from .data.* and .rodata.* + * maps to know which name has to be returned to the user. + */ + if (map->libbpf_type == LIBBPF_MAP_DATA && strcmp(map->real_name, DATA_SEC) != 0) + return true; + if (map->libbpf_type == LIBBPF_MAP_RODATA && strcmp(map->real_name, RODATA_SEC) != 0) + return true; + return false; +} + const char *bpf_map__name(const struct bpf_map *map) { - return map ? map->name : NULL; + if (!map) + return NULL; + + if (map_uses_real_name(map)) + return map->real_name; + + return map->name; } enum bpf_map_type bpf_map__type(const struct bpf_map *map) @@ -8590,6 +8893,19 @@ int bpf_map__set_map_flags(struct bpf_map *map, __u32 flags) return 0; } +__u64 bpf_map__map_extra(const struct bpf_map *map) +{ + return map->map_extra; +} + +int bpf_map__set_map_extra(struct bpf_map *map, __u64 map_extra) +{ + if (map->fd >= 0) + return libbpf_err(-EBUSY); + map->map_extra = map_extra; + return 0; +} + __u32 bpf_map__numa_node(const struct bpf_map *map) { return map->numa_node; @@ -8744,6 +9060,12 @@ __bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i) struct bpf_map * bpf_map__next(const struct bpf_map *prev, const struct bpf_object *obj) { + return bpf_object__next_map(obj, prev); +} + +struct bpf_map * +bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *prev) +{ if (prev == NULL) return obj->maps; @@ -8753,6 +9075,12 @@ bpf_map__next(const struct bpf_map *prev, const struct bpf_object *obj) struct bpf_map * bpf_map__prev(const struct bpf_map *next, const struct bpf_object *obj) { + return bpf_object__prev_map(obj, next); +} + +struct bpf_map * +bpf_object__prev_map(const struct bpf_object *obj, const struct bpf_map *next) +{ if (next == NULL) { if (!obj->nr_maps) return NULL; @@ -8768,7 +9096,22 @@ bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name) struct bpf_map *pos; bpf_object__for_each_map(pos, obj) { - if (pos->name && !strcmp(pos->name, name)) + /* if it's a special internal map name (which always starts + * with dot) then check if that special name matches the + * real map name (ELF section name) + */ + if (name[0] == '.') { + if (pos->real_name && strcmp(pos->real_name, name) == 0) + return pos; + continue; + } + /* otherwise map name has to be an exact match */ + if (map_uses_real_name(pos)) { + if (strcmp(pos->real_name, name) == 0) + return pos; + continue; + } + if (strcmp(pos->name, name) == 0) return pos; } return errno = ENOENT, NULL; @@ -9273,7 +9616,7 @@ static int append_to_file(const char *file, const char *fmt, ...) int fd, n, err = 0; va_list ap; - fd = open(file, O_WRONLY | O_APPEND, 0); + fd = open(file, O_WRONLY | O_APPEND | O_CLOEXEC, 0); if (fd < 0) return -errno; @@ -9787,12 +10130,26 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(const struct bpf_program *pr static struct bpf_link *attach_raw_tp(const struct bpf_program *prog, long cookie) { - const char *tp_name; + static const char *const prefixes[] = { + "raw_tp/", + "raw_tracepoint/", + "raw_tp.w/", + "raw_tracepoint.w/", + }; + size_t i; + const char *tp_name = NULL; - if (str_has_pfx(prog->sec_name, "raw_tp/")) - tp_name = prog->sec_name + sizeof("raw_tp/") - 1; - else - tp_name = prog->sec_name + sizeof("raw_tracepoint/") - 1; + for (i = 0; i < ARRAY_SIZE(prefixes); i++) { + if (str_has_pfx(prog->sec_name, prefixes[i])) { + tp_name = prog->sec_name + strlen(prefixes[i]); + break; + } + } + if (!tp_name) { + pr_warn("prog '%s': invalid section name '%s'\n", + prog->name, prog->sec_name); + return libbpf_err_ptr(-EINVAL); + } return bpf_program__attach_raw_tracepoint(prog, tp_name); } @@ -10904,7 +11261,7 @@ int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz) int fd, err = 0, len; char buf[128]; - fd = open(fcpu, O_RDONLY); + fd = open(fcpu, O_RDONLY | O_CLOEXEC); if (fd < 0) { err = -errno; pr_warn("Failed to open cpu mask file %s: %d\n", fcpu, err); diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index e35490c54eb3..9de0f299706b 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -150,6 +150,7 @@ struct bpf_object_load_attr { /* Load/unload object into/from kernel */ LIBBPF_API int bpf_object__load(struct bpf_object *obj); LIBBPF_API int bpf_object__load_xattr(struct bpf_object_load_attr *attr); +LIBBPF_DEPRECATED_SINCE(0, 6, "bpf_object__unload() is deprecated, use bpf_object__close() instead") LIBBPF_API int bpf_object__unload(struct bpf_object *obj); LIBBPF_API const char *bpf_object__name(const struct bpf_object *obj); @@ -167,7 +168,8 @@ LIBBPF_API struct bpf_program * bpf_object__find_program_by_name(const struct bpf_object *obj, const char *name); -LIBBPF_API struct bpf_object *bpf_object__next(struct bpf_object *prev); +LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "track bpf_objects in application code instead") +struct bpf_object *bpf_object__next(struct bpf_object *prev); #define bpf_object__for_each_safe(pos, tmp) \ for ((pos) = bpf_object__next(NULL), \ (tmp) = bpf_object__next(pos); \ @@ -189,16 +191,22 @@ LIBBPF_API int libbpf_find_vmlinux_btf_id(const char *name, /* Accessors of bpf_program */ struct bpf_program; -LIBBPF_API struct bpf_program *bpf_program__next(struct bpf_program *prog, - const struct bpf_object *obj); +LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__next_program() instead") +struct bpf_program *bpf_program__next(struct bpf_program *prog, + const struct bpf_object *obj); +LIBBPF_API struct bpf_program * +bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prog); -#define bpf_object__for_each_program(pos, obj) \ - for ((pos) = bpf_program__next(NULL, (obj)); \ - (pos) != NULL; \ - (pos) = bpf_program__next((pos), (obj))) +#define bpf_object__for_each_program(pos, obj) \ + for ((pos) = bpf_object__next_program((obj), NULL); \ + (pos) != NULL; \ + (pos) = bpf_object__next_program((obj), (pos))) -LIBBPF_API struct bpf_program *bpf_program__prev(struct bpf_program *prog, - const struct bpf_object *obj); +LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__prev_program() instead") +struct bpf_program *bpf_program__prev(struct bpf_program *prog, + const struct bpf_object *obj); +LIBBPF_API struct bpf_program * +bpf_object__prev_program(const struct bpf_object *obj, struct bpf_program *prog); typedef void (*bpf_program_clear_priv_t)(struct bpf_program *, void *); @@ -217,14 +225,51 @@ LIBBPF_API bool bpf_program__autoload(const struct bpf_program *prog); LIBBPF_API int bpf_program__set_autoload(struct bpf_program *prog, bool autoload); /* returns program size in bytes */ +LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_program__insn_cnt() instead") LIBBPF_API size_t bpf_program__size(const struct bpf_program *prog); +struct bpf_insn; + +/** + * @brief **bpf_program__insns()** gives read-only access to BPF program's + * underlying BPF instructions. + * @param prog BPF program for which to return instructions + * @return a pointer to an array of BPF instructions that belong to the + * specified BPF program + * + * Returned pointer is always valid and not NULL. Number of `struct bpf_insn` + * pointed to can be fetched using **bpf_program__insn_cnt()** API. + * + * Keep in mind, libbpf can modify and append/delete BPF program's + * instructions as it processes BPF object file and prepares everything for + * uploading into the kernel. So depending on the point in BPF object + * lifetime, **bpf_program__insns()** can return different sets of + * instructions. As an example, during BPF object load phase BPF program + * instructions will be CO-RE-relocated, BPF subprograms instructions will be + * appended, ldimm64 instructions will have FDs embedded, etc. So instructions + * returned before **bpf_object__load()** and after it might be quite + * different. + */ +LIBBPF_API const struct bpf_insn *bpf_program__insns(const struct bpf_program *prog); +/** + * @brief **bpf_program__insn_cnt()** returns number of `struct bpf_insn`'s + * that form specified BPF program. + * @param prog BPF program for which to return number of BPF instructions + * + * See **bpf_program__insns()** documentation for notes on how libbpf can + * change instructions and their count during different phases of + * **bpf_object** lifetime. + */ +LIBBPF_API size_t bpf_program__insn_cnt(const struct bpf_program *prog); + LIBBPF_API int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_version); LIBBPF_API int bpf_program__fd(const struct bpf_program *prog); +LIBBPF_DEPRECATED_SINCE(0, 7, "multi-instance bpf_program support is deprecated") LIBBPF_API int bpf_program__pin_instance(struct bpf_program *prog, const char *path, int instance); +LIBBPF_DEPRECATED_SINCE(0, 7, "multi-instance bpf_program support is deprecated") LIBBPF_API int bpf_program__unpin_instance(struct bpf_program *prog, const char *path, int instance); @@ -358,8 +403,6 @@ LIBBPF_API struct bpf_link * bpf_program__attach_iter(const struct bpf_program *prog, const struct bpf_iter_attach_opts *opts); -struct bpf_insn; - /* * Libbpf allows callers to adjust BPF programs before being loaded * into kernel. One program in an object file can be transformed into @@ -388,7 +431,7 @@ struct bpf_insn; * one instance. In this case bpf_program__fd(prog) is equal to * bpf_program__nth_fd(prog, 0). */ - +LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_program__insns() for getting bpf_program instructions") struct bpf_prog_prep_result { /* * If not NULL, load new instruction array. @@ -417,9 +460,11 @@ typedef int (*bpf_program_prep_t)(struct bpf_program *prog, int n, struct bpf_insn *insns, int insns_cnt, struct bpf_prog_prep_result *res); +LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_program__insns() for getting bpf_program instructions") LIBBPF_API int bpf_program__set_prep(struct bpf_program *prog, int nr_instance, bpf_program_prep_t prep); +LIBBPF_DEPRECATED_SINCE(0, 7, "multi-instance bpf_program support is deprecated") LIBBPF_API int bpf_program__nth_fd(const struct bpf_program *prog, int n); /* @@ -502,16 +547,21 @@ bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name); LIBBPF_API struct bpf_map * bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset); +LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__next_map() instead") +struct bpf_map *bpf_map__next(const struct bpf_map *map, const struct bpf_object *obj); LIBBPF_API struct bpf_map * -bpf_map__next(const struct bpf_map *map, const struct bpf_object *obj); +bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *map); + #define bpf_object__for_each_map(pos, obj) \ - for ((pos) = bpf_map__next(NULL, (obj)); \ + for ((pos) = bpf_object__next_map((obj), NULL); \ (pos) != NULL; \ - (pos) = bpf_map__next((pos), (obj))) + (pos) = bpf_object__next_map((obj), (pos))) #define bpf_map__for_each bpf_object__for_each_map +LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__prev_map() instead") +struct bpf_map *bpf_map__prev(const struct bpf_map *map, const struct bpf_object *obj); LIBBPF_API struct bpf_map * -bpf_map__prev(const struct bpf_map *map, const struct bpf_object *obj); +bpf_object__prev_map(const struct bpf_object *obj, const struct bpf_map *map); /** * @brief **bpf_map__fd()** gets the file descriptor of the passed @@ -550,6 +600,9 @@ LIBBPF_API __u32 bpf_map__btf_value_type_id(const struct bpf_map *map); /* get/set map if_index */ LIBBPF_API __u32 bpf_map__ifindex(const struct bpf_map *map); LIBBPF_API int bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex); +/* get/set map map_extra flags */ +LIBBPF_API __u64 bpf_map__map_extra(const struct bpf_map *map); +LIBBPF_API int bpf_map__set_map_extra(struct bpf_map *map, __u64 map_extra); typedef void (*bpf_map_clear_priv_t)(struct bpf_map *, void *); LIBBPF_API int bpf_map__set_priv(struct bpf_map *map, void *priv, diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 9e649cf9e771..43580eb47740 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -389,5 +389,16 @@ LIBBPF_0.5.0 { LIBBPF_0.6.0 { global: - btf__add_tag; + bpf_map__map_extra; + bpf_map__set_map_extra; + bpf_object__next_map; + bpf_object__next_program; + bpf_object__prev_map; + bpf_object__prev_program; + bpf_program__insn_cnt; + bpf_program__insns; + btf__add_btf; + btf__add_decl_tag; + btf__raw_data; + btf__type_cnt; } LIBBPF_0.5.0; diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index ec79400517d4..aeb79e3a8ff9 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -13,6 +13,8 @@ #include <limits.h> #include <errno.h> #include <linux/err.h> +#include <fcntl.h> +#include <unistd.h> #include "libbpf_legacy.h" #include "relo_core.h" @@ -52,8 +54,8 @@ #endif /* Older libelf all end up in this expression, for both 32 and 64 bit */ -#ifndef GELF_ST_VISIBILITY -#define GELF_ST_VISIBILITY(o) ((o) & 0x03) +#ifndef ELF64_ST_VISIBILITY +#define ELF64_ST_VISIBILITY(o) ((o) & 0x03) #endif #define BTF_INFO_ENC(kind, kind_flag, vlen) \ @@ -69,8 +71,8 @@ #define BTF_VAR_SECINFO_ENC(type, offset, size) (type), (offset), (size) #define BTF_TYPE_FLOAT_ENC(name, sz) \ BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_FLOAT, 0, 0), sz) -#define BTF_TYPE_TAG_ENC(value, type, component_idx) \ - BTF_TYPE_ENC(value, BTF_INFO_ENC(BTF_KIND_TAG, 0, 0), type), (component_idx) +#define BTF_TYPE_DECL_TAG_ENC(value, type, component_idx) \ + BTF_TYPE_ENC(value, BTF_INFO_ENC(BTF_KIND_DECL_TAG, 0, 0), type), (component_idx) #ifndef likely #define likely(x) __builtin_expect(!!(x), 1) @@ -193,8 +195,9 @@ enum map_def_parts { MAP_DEF_NUMA_NODE = 0x080, MAP_DEF_PINNING = 0x100, MAP_DEF_INNER_MAP = 0x200, + MAP_DEF_MAP_EXTRA = 0x400, - MAP_DEF_ALL = 0x3ff, /* combination of all above */ + MAP_DEF_ALL = 0x7ff, /* combination of all above */ }; struct btf_map_def { @@ -208,6 +211,7 @@ struct btf_map_def { __u32 map_flags; __u32 numa_node; __u32 pinning; + __u64 map_extra; }; int parse_btf_map_def(const char *map_name, struct btf *btf, @@ -298,14 +302,32 @@ struct bpf_prog_load_params { __u32 log_level; char *log_buf; size_t log_buf_sz; + int *fd_array; }; int libbpf__bpf_prog_load(const struct bpf_prog_load_params *load_attr); -int bpf_object__section_size(const struct bpf_object *obj, const char *name, - __u32 *size); -int bpf_object__variable_offset(const struct bpf_object *obj, const char *name, - __u32 *off); +struct bpf_create_map_params { + const char *name; + enum bpf_map_type map_type; + __u32 map_flags; + __u32 key_size; + __u32 value_size; + __u32 max_entries; + __u32 numa_node; + __u32 btf_fd; + __u32 btf_key_type_id; + __u32 btf_value_type_id; + __u32 map_ifindex; + union { + __u32 inner_map_fd; + __u32 btf_vmlinux_value_type_id; + }; + __u64 map_extra; +}; + +int libbpf__bpf_create_map_xattr(const struct bpf_create_map_params *create_attr); + struct btf *btf_get_from_fd(int btf_fd, struct btf *base_btf); void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type, const char **prefix, int *kind); @@ -408,6 +430,8 @@ int btf_type_visit_type_ids(struct btf_type *t, type_id_visit_fn visit, void *ct int btf_type_visit_str_offs(struct btf_type *t, str_off_visit_fn visit, void *ctx); int btf_ext_visit_type_ids(struct btf_ext *btf_ext, type_id_visit_fn visit, void *ctx); int btf_ext_visit_str_offs(struct btf_ext *btf_ext, str_off_visit_fn visit, void *ctx); +__s32 btf__find_by_name_kind_own(const struct btf *btf, const char *type_name, + __u32 kind); extern enum libbpf_strict_mode libbpf_mode; @@ -469,4 +493,26 @@ static inline bool is_ldimm64_insn(struct bpf_insn *insn) return insn->code == (BPF_LD | BPF_IMM | BPF_DW); } +/* if fd is stdin, stdout, or stderr, dup to a fd greater than 2 + * Takes ownership of the fd passed in, and closes it if calling + * fcntl(fd, F_DUPFD_CLOEXEC, 3). + */ +static inline int ensure_good_fd(int fd) +{ + int old_fd = fd, saved_errno; + + if (fd < 0) + return fd; + if (fd < 3) { + fd = fcntl(fd, F_DUPFD_CLOEXEC, 3); + saved_errno = errno; + close(old_fd); + if (fd < 0) { + pr_warn("failed to dup FD %d to FD > 2: %d\n", old_fd, -saved_errno); + errno = saved_errno; + } + } + return fd; +} + #endif /* __LIBBPF_LIBBPF_INTERNAL_H */ diff --git a/tools/lib/bpf/libbpf_legacy.h b/tools/lib/bpf/libbpf_legacy.h index 74e6f860f703..5ba5c9beccfa 100644 --- a/tools/lib/bpf/libbpf_legacy.h +++ b/tools/lib/bpf/libbpf_legacy.h @@ -52,8 +52,17 @@ enum libbpf_strict_mode { * allowed, with LIBBPF_STRICT_SEC_PREFIX this will become * unrecognized by libbpf and would have to be just SEC("xdp") and * SEC("xdp") and SEC("perf_event"). + * + * Note, in this mode the program pin path will be based on the + * function name instead of section name. */ LIBBPF_STRICT_SEC_NAME = 0x04, + /* + * Disable the global 'bpf_objects_list'. Maintaining this list adds + * a race condition to bpf_object__open() and bpf_object__close(). + * Clients can maintain it on their own if it is valuable for them. + */ + LIBBPF_STRICT_NO_OBJECT_LIST = 0x08, __LIBBPF_STRICT_LAST, }; diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c index cd8c703dde71..68f2dbf364aa 100644 --- a/tools/lib/bpf/libbpf_probes.c +++ b/tools/lib/bpf/libbpf_probes.c @@ -33,7 +33,7 @@ static int get_vendor_id(int ifindex) snprintf(path, sizeof(path), "/sys/class/net/%s/device/vendor", ifname); - fd = open(path, O_RDONLY); + fd = open(path, O_RDONLY | O_CLOEXEC); if (fd < 0) return -1; diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c index 2df880cefdae..f677dccdeae4 100644 --- a/tools/lib/bpf/linker.c +++ b/tools/lib/bpf/linker.c @@ -15,7 +15,6 @@ #include <linux/btf.h> #include <elf.h> #include <libelf.h> -#include <gelf.h> #include <fcntl.h> #include "libbpf.h" #include "btf.h" @@ -302,7 +301,7 @@ static int init_output_elf(struct bpf_linker *linker, const char *file) if (!linker->filename) return -ENOMEM; - linker->fd = open(file, O_WRONLY | O_CREAT | O_TRUNC, 0644); + linker->fd = open(file, O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, 0644); if (linker->fd < 0) { err = -errno; pr_warn("failed to create '%s': %d\n", file, err); @@ -324,12 +323,12 @@ static int init_output_elf(struct bpf_linker *linker, const char *file) linker->elf_hdr->e_machine = EM_BPF; linker->elf_hdr->e_type = ET_REL; -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ linker->elf_hdr->e_ident[EI_DATA] = ELFDATA2LSB; -#elif __BYTE_ORDER == __BIG_ENDIAN +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ linker->elf_hdr->e_ident[EI_DATA] = ELFDATA2MSB; #else -#error "Unknown __BYTE_ORDER" +#error "Unknown __BYTE_ORDER__" #endif /* STRTAB */ @@ -539,12 +538,12 @@ static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, const struct bpf_linker_file_opts *opts, struct src_obj *obj) { -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ const int host_endianness = ELFDATA2LSB; -#elif __BYTE_ORDER == __BIG_ENDIAN +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ const int host_endianness = ELFDATA2MSB; #else -#error "Unknown __BYTE_ORDER" +#error "Unknown __BYTE_ORDER__" #endif int err = 0; Elf_Scn *scn; @@ -557,7 +556,7 @@ static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, obj->filename = filename; - obj->fd = open(filename, O_RDONLY); + obj->fd = open(filename, O_RDONLY | O_CLOEXEC); if (obj->fd < 0) { err = -errno; pr_warn("failed to open file '%s': %d\n", filename, err); @@ -921,7 +920,7 @@ static int check_btf_type_id(__u32 *type_id, void *ctx) { struct btf *btf = ctx; - if (*type_id > btf__get_nr_types(btf)) + if (*type_id >= btf__type_cnt(btf)) return -EINVAL; return 0; @@ -948,8 +947,8 @@ static int linker_sanity_check_btf(struct src_obj *obj) if (!obj->btf) return 0; - n = btf__get_nr_types(obj->btf); - for (i = 1; i <= n; i++) { + n = btf__type_cnt(obj->btf); + for (i = 1; i < n; i++) { t = btf_type_by_id(obj->btf, i); err = err ?: btf_type_visit_type_ids(t, check_btf_type_id, obj->btf); @@ -1659,8 +1658,8 @@ static int find_glob_sym_btf(struct src_obj *obj, Elf64_Sym *sym, const char *sy return -EINVAL; } - n = btf__get_nr_types(obj->btf); - for (i = 1; i <= n; i++) { + n = btf__type_cnt(obj->btf); + for (i = 1; i < n; i++) { t = btf__type_by_id(obj->btf, i); /* some global and extern FUNCs and VARs might not be associated with any @@ -2131,8 +2130,8 @@ static int linker_fixup_btf(struct src_obj *obj) if (!obj->btf) return 0; - n = btf__get_nr_types(obj->btf); - for (i = 1; i <= n; i++) { + n = btf__type_cnt(obj->btf); + for (i = 1; i < n; i++) { struct btf_var_secinfo *vi; struct btf_type *t; @@ -2235,14 +2234,14 @@ static int linker_append_btf(struct bpf_linker *linker, struct src_obj *obj) if (!obj->btf) return 0; - start_id = btf__get_nr_types(linker->btf) + 1; - n = btf__get_nr_types(obj->btf); + start_id = btf__type_cnt(linker->btf); + n = btf__type_cnt(obj->btf); obj->btf_type_map = calloc(n + 1, sizeof(int)); if (!obj->btf_type_map) return -ENOMEM; - for (i = 1; i <= n; i++) { + for (i = 1; i < n; i++) { struct glob_sym *glob_sym = NULL; t = btf__type_by_id(obj->btf, i); @@ -2297,8 +2296,8 @@ static int linker_append_btf(struct bpf_linker *linker, struct src_obj *obj) } /* remap all the types except DATASECs */ - n = btf__get_nr_types(linker->btf); - for (i = start_id; i <= n; i++) { + n = btf__type_cnt(linker->btf); + for (i = start_id; i < n; i++) { struct btf_type *dst_t = btf_type_by_id(linker->btf, i); if (btf_type_visit_type_ids(dst_t, remap_type_id, obj->btf_type_map)) @@ -2657,7 +2656,7 @@ static int finalize_btf(struct bpf_linker *linker) __u32 raw_sz; /* bail out if no BTF data was produced */ - if (btf__get_nr_types(linker->btf) == 0) + if (btf__type_cnt(linker->btf) == 1) return 0; for (i = 1; i < linker->sec_cnt; i++) { @@ -2694,7 +2693,7 @@ static int finalize_btf(struct bpf_linker *linker) } /* Emit .BTF section */ - raw_data = btf__get_raw_data(linker->btf, &raw_sz); + raw_data = btf__raw_data(linker->btf, &raw_sz); if (!raw_data) return -ENOMEM; diff --git a/tools/lib/bpf/relo_core.c b/tools/lib/bpf/relo_core.c index 4016ed492d0c..b5b8956a1be8 100644 --- a/tools/lib/bpf/relo_core.c +++ b/tools/lib/bpf/relo_core.c @@ -662,7 +662,7 @@ static int bpf_core_calc_field_relo(const char *prog_name, *validate = true; /* signedness is never ambiguous */ break; case BPF_FIELD_LSHIFT_U64: -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ *val = 64 - (bit_off + bit_sz - byte_off * 8); #else *val = (8 - byte_sz) * 8 + (bit_off - byte_off * 8); diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c index a2111696ba91..81f8fbc85e70 100644 --- a/tools/lib/bpf/xsk.c +++ b/tools/lib/bpf/xsk.c @@ -300,7 +300,7 @@ int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area, if (!umem) return -ENOMEM; - umem->fd = socket(AF_XDP, SOCK_RAW, 0); + umem->fd = socket(AF_XDP, SOCK_RAW | SOCK_CLOEXEC, 0); if (umem->fd < 0) { err = -errno; goto out_umem_alloc; @@ -549,7 +549,7 @@ static int xsk_get_max_queues(struct xsk_socket *xsk) struct ifreq ifr = {}; int fd, err, ret; - fd = socket(AF_LOCAL, SOCK_DGRAM, 0); + fd = socket(AF_LOCAL, SOCK_DGRAM | SOCK_CLOEXEC, 0); if (fd < 0) return -errno; @@ -1046,7 +1046,7 @@ int xsk_socket__create_shared(struct xsk_socket **xsk_ptr, } if (umem->refcount++ > 0) { - xsk->fd = socket(AF_XDP, SOCK_RAW, 0); + xsk->fd = socket(AF_XDP, SOCK_RAW | SOCK_CLOEXEC, 0); if (xsk->fd < 0) { err = -errno; goto out_xsk_alloc; diff --git a/tools/lib/bpf/xsk.h b/tools/lib/bpf/xsk.h index 01c12dca9c10..64e9c57fd792 100644 --- a/tools/lib/bpf/xsk.h +++ b/tools/lib/bpf/xsk.h @@ -23,6 +23,12 @@ extern "C" { #endif +/* This whole API has been deprecated and moved to libxdp that can be found at + * https://github.com/xdp-project/xdp-tools. The APIs are exactly the same so + * it should just be linking with libxdp instead of libbpf for this set of + * functionality. If not, please submit a bug report on the aforementioned page. + */ + /* Load-Acquire Store-Release barriers used by the XDP socket * library. The following macros should *NOT* be considered part of * the xsk.h API, and is subject to change anytime. @@ -245,8 +251,10 @@ static inline __u64 xsk_umem__add_offset_to_addr(__u64 addr) return xsk_umem__extract_addr(addr) + xsk_umem__extract_offset(addr); } -LIBBPF_API int xsk_umem__fd(const struct xsk_umem *umem); -LIBBPF_API int xsk_socket__fd(const struct xsk_socket *xsk); +LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp") +int xsk_umem__fd(const struct xsk_umem *umem); +LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp") +int xsk_socket__fd(const struct xsk_socket *xsk); #define XSK_RING_CONS__DEFAULT_NUM_DESCS 2048 #define XSK_RING_PROD__DEFAULT_NUM_DESCS 2048 @@ -263,10 +271,10 @@ struct xsk_umem_config { __u32 flags; }; -LIBBPF_API int xsk_setup_xdp_prog(int ifindex, - int *xsks_map_fd); -LIBBPF_API int xsk_socket__update_xskmap(struct xsk_socket *xsk, - int xsks_map_fd); +LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp") +int xsk_setup_xdp_prog(int ifindex, int *xsks_map_fd); +LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp") +int xsk_socket__update_xskmap(struct xsk_socket *xsk, int xsks_map_fd); /* Flags for the libbpf_flags field. */ #define XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD (1 << 0) @@ -280,40 +288,46 @@ struct xsk_socket_config { }; /* Set config to NULL to get the default configuration. */ -LIBBPF_API int xsk_umem__create(struct xsk_umem **umem, - void *umem_area, __u64 size, - struct xsk_ring_prod *fill, - struct xsk_ring_cons *comp, - const struct xsk_umem_config *config); -LIBBPF_API int xsk_umem__create_v0_0_2(struct xsk_umem **umem, - void *umem_area, __u64 size, - struct xsk_ring_prod *fill, - struct xsk_ring_cons *comp, - const struct xsk_umem_config *config); -LIBBPF_API int xsk_umem__create_v0_0_4(struct xsk_umem **umem, - void *umem_area, __u64 size, - struct xsk_ring_prod *fill, - struct xsk_ring_cons *comp, - const struct xsk_umem_config *config); -LIBBPF_API int xsk_socket__create(struct xsk_socket **xsk, - const char *ifname, __u32 queue_id, - struct xsk_umem *umem, - struct xsk_ring_cons *rx, - struct xsk_ring_prod *tx, - const struct xsk_socket_config *config); -LIBBPF_API int -xsk_socket__create_shared(struct xsk_socket **xsk_ptr, - const char *ifname, - __u32 queue_id, struct xsk_umem *umem, - struct xsk_ring_cons *rx, - struct xsk_ring_prod *tx, - struct xsk_ring_prod *fill, - struct xsk_ring_cons *comp, - const struct xsk_socket_config *config); +LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp") +int xsk_umem__create(struct xsk_umem **umem, + void *umem_area, __u64 size, + struct xsk_ring_prod *fill, + struct xsk_ring_cons *comp, + const struct xsk_umem_config *config); +LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp") +int xsk_umem__create_v0_0_2(struct xsk_umem **umem, + void *umem_area, __u64 size, + struct xsk_ring_prod *fill, + struct xsk_ring_cons *comp, + const struct xsk_umem_config *config); +LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp") +int xsk_umem__create_v0_0_4(struct xsk_umem **umem, + void *umem_area, __u64 size, + struct xsk_ring_prod *fill, + struct xsk_ring_cons *comp, + const struct xsk_umem_config *config); +LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp") +int xsk_socket__create(struct xsk_socket **xsk, + const char *ifname, __u32 queue_id, + struct xsk_umem *umem, + struct xsk_ring_cons *rx, + struct xsk_ring_prod *tx, + const struct xsk_socket_config *config); +LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp") +int xsk_socket__create_shared(struct xsk_socket **xsk_ptr, + const char *ifname, + __u32 queue_id, struct xsk_umem *umem, + struct xsk_ring_cons *rx, + struct xsk_ring_prod *tx, + struct xsk_ring_prod *fill, + struct xsk_ring_cons *comp, + const struct xsk_socket_config *config); /* Returns 0 for success and -EBUSY if the umem is still in use. */ -LIBBPF_API int xsk_umem__delete(struct xsk_umem *umem); -LIBBPF_API void xsk_socket__delete(struct xsk_socket *xsk); +LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp") +int xsk_umem__delete(struct xsk_umem *umem); +LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp") +void xsk_socket__delete(struct xsk_socket *xsk); #ifdef __cplusplus } /* extern "C" */ diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c index 1a7112a87736..388847bab6d9 100644 --- a/tools/perf/util/bpf-event.c +++ b/tools/perf/util/bpf-event.c @@ -110,7 +110,7 @@ static int perf_env__fetch_btf(struct perf_env *env, u32 data_size; const void *data; - data = btf__get_raw_data(btf, &data_size); + data = btf__raw_data(btf, &data_size); node = malloc(data_size + sizeof(struct btf_node)); if (!node) diff --git a/tools/scripts/Makefile.arch b/tools/scripts/Makefile.arch index b10b7a27c33f..0c6c7f456887 100644 --- a/tools/scripts/Makefile.arch +++ b/tools/scripts/Makefile.arch @@ -4,7 +4,8 @@ HOSTARCH := $(shell uname -m | sed -e s/i.86/x86/ -e s/x86_64/x86/ \ -e /arm64/!s/arm.*/arm/ -e s/sa110/arm/ \ -e s/s390x/s390/ -e s/parisc64/parisc/ \ -e s/ppc.*/powerpc/ -e s/mips.*/mips/ \ - -e s/sh[234].*/sh/ -e s/aarch64.*/arm64/ ) + -e s/sh[234].*/sh/ -e s/aarch64.*/arm64/ \ + -e s/riscv.*/riscv/) ifndef ARCH ARCH := $(HOSTARCH) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index aa94739a1835..54b0a41a3775 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -122,12 +122,15 @@ BPFOBJ := $(BUILD_DIR)/libbpf/libbpf.a ifneq ($(CROSS_COMPILE),) HOST_BUILD_DIR := $(BUILD_DIR)/host HOST_SCRATCH_DIR := $(OUTPUT)/host-tools +HOST_INCLUDE_DIR := $(HOST_SCRATCH_DIR)/include else HOST_BUILD_DIR := $(BUILD_DIR) HOST_SCRATCH_DIR := $(SCRATCH_DIR) +HOST_INCLUDE_DIR := $(INCLUDE_DIR) endif HOST_BPFOBJ := $(HOST_BUILD_DIR)/libbpf/libbpf.a RESOLVE_BTFIDS := $(HOST_BUILD_DIR)/resolve_btfids/resolve_btfids +RUNQSLOWER_OUTPUT := $(BUILD_DIR)/runqslower/ VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \ $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \ @@ -152,7 +155,7 @@ $(notdir $(TEST_GEN_PROGS) \ # sort removes libbpf duplicates when not cross-building MAKE_DIRS := $(sort $(BUILD_DIR)/libbpf $(HOST_BUILD_DIR)/libbpf \ $(HOST_BUILD_DIR)/bpftool $(HOST_BUILD_DIR)/resolve_btfids \ - $(INCLUDE_DIR)) + $(RUNQSLOWER_OUTPUT) $(INCLUDE_DIR)) $(MAKE_DIRS): $(call msg,MKDIR,,$@) $(Q)mkdir -p $@ @@ -181,11 +184,13 @@ $(OUTPUT)/test_stub.o: test_stub.c $(BPFOBJ) DEFAULT_BPFTOOL := $(HOST_SCRATCH_DIR)/sbin/bpftool -$(OUTPUT)/runqslower: $(BPFOBJ) | $(DEFAULT_BPFTOOL) - $(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/runqslower \ - OUTPUT=$(SCRATCH_DIR)/ VMLINUX_BTF=$(VMLINUX_BTF) \ - BPFOBJ=$(BPFOBJ) BPF_INCLUDE=$(INCLUDE_DIR) && \ - cp $(SCRATCH_DIR)/runqslower $@ +$(OUTPUT)/runqslower: $(BPFOBJ) | $(DEFAULT_BPFTOOL) $(RUNQSLOWER_OUTPUT) + $(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/runqslower \ + OUTPUT=$(RUNQSLOWER_OUTPUT) VMLINUX_BTF=$(VMLINUX_BTF) \ + BPFTOOL_OUTPUT=$(BUILD_DIR)/bpftool/ \ + BPFOBJ_OUTPUT=$(BUILD_DIR)/libbpf \ + BPFOBJ=$(BPFOBJ) BPF_INCLUDE=$(INCLUDE_DIR) && \ + cp $(RUNQSLOWER_OUTPUT)runqslower $@ TEST_GEN_PROGS_EXTENDED += $(DEFAULT_BPFTOOL) @@ -209,7 +214,9 @@ $(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \ CC=$(HOSTCC) LD=$(HOSTLD) \ EXTRA_CFLAGS='-g -O0' \ OUTPUT=$(HOST_BUILD_DIR)/bpftool/ \ - prefix= DESTDIR=$(HOST_SCRATCH_DIR)/ install + LIBBPF_OUTPUT=$(HOST_BUILD_DIR)/libbpf/ \ + LIBBPF_DESTDIR=$(HOST_SCRATCH_DIR)/ \ + prefix= DESTDIR=$(HOST_SCRATCH_DIR)/ install-bin all: docs @@ -225,7 +232,7 @@ docs-clean: $(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \ ../../../include/uapi/linux/bpf.h \ - | $(INCLUDE_DIR) $(BUILD_DIR)/libbpf + | $(BUILD_DIR)/libbpf $(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(BUILD_DIR)/libbpf/ \ EXTRA_CFLAGS='-g -O0' \ DESTDIR=$(SCRATCH_DIR) prefix= all install_headers @@ -233,7 +240,7 @@ $(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \ ifneq ($(BPFOBJ),$(HOST_BPFOBJ)) $(HOST_BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \ ../../../include/uapi/linux/bpf.h \ - | $(INCLUDE_DIR) $(HOST_BUILD_DIR)/libbpf + | $(HOST_BUILD_DIR)/libbpf $(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) \ EXTRA_CFLAGS='-g -O0' \ OUTPUT=$(HOST_BUILD_DIR)/libbpf/ CC=$(HOSTCC) LD=$(HOSTLD) \ @@ -258,6 +265,7 @@ $(RESOLVE_BTFIDS): $(HOST_BPFOBJ) | $(HOST_BUILD_DIR)/resolve_btfids \ $(TOOLSDIR)/lib/str_error_r.c $(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/resolve_btfids \ CC=$(HOSTCC) LD=$(HOSTLD) AR=$(HOSTAR) \ + LIBBPF_INCLUDE=$(HOST_INCLUDE_DIR) \ OUTPUT=$(HOST_BUILD_DIR)/resolve_btfids/ BPFOBJ=$(HOST_BPFOBJ) # Get Clang's default includes on this system, as opposed to those seen by @@ -269,7 +277,7 @@ $(RESOLVE_BTFIDS): $(HOST_BPFOBJ) | $(HOST_BUILD_DIR)/resolve_btfids \ define get_sys_includes $(shell $(1) -v -E - </dev/null 2>&1 \ | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') \ -$(shell $(1) -dM -E - </dev/null | grep '#define __riscv_xlen ' | sed 's/#define /-D/' | sed 's/ /=/') +$(shell $(1) -dM -E - </dev/null | grep '__riscv_xlen ' | awk '{printf("-D__riscv_xlen=%d -D__BITS_PER_LONG=%d", $$3, $$3)}') endef # Determine target endianness. @@ -315,8 +323,9 @@ LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h \ linked_vars.skel.h linked_maps.skel.h LSKELS := kfunc_call_test.c fentry_test.c fexit_test.c fexit_sleep.c \ - test_ksyms_module.c test_ringbuf.c atomics.c trace_printk.c \ - trace_vprintk.c + test_ringbuf.c atomics.c trace_printk.c trace_vprintk.c +# Generate both light skeleton and libbpf skeleton for these +LSKELS_EXTRA := test_ksyms_module.c test_ksyms_weak.c SKEL_BLACKLIST += $$(LSKELS) test_static_linked.skel.h-deps := test_static_linked1.o test_static_linked2.o @@ -346,7 +355,7 @@ TRUNNER_BPF_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.o, $$(TRUNNER_BPF_SRCS) TRUNNER_BPF_SKELS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.skel.h, \ $$(filter-out $(SKEL_BLACKLIST) $(LINKED_BPF_SRCS),\ $$(TRUNNER_BPF_SRCS))) -TRUNNER_BPF_LSKELS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.lskel.h, $$(LSKELS)) +TRUNNER_BPF_LSKELS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.lskel.h, $$(LSKELS) $$(LSKELS_EXTRA)) TRUNNER_BPF_SKELS_LINKED := $$(addprefix $$(TRUNNER_OUTPUT)/,$(LINKED_SKELS)) TEST_GEN_FILES += $$(TRUNNER_BPF_OBJS) @@ -395,7 +404,7 @@ $(TRUNNER_BPF_LSKELS): %.lskel.h: %.o $(BPFTOOL) | $(TRUNNER_OUTPUT) $(Q)$$(BPFTOOL) gen object $$(<:.o=.linked2.o) $$(<:.o=.linked1.o) $(Q)$$(BPFTOOL) gen object $$(<:.o=.linked3.o) $$(<:.o=.linked2.o) $(Q)diff $$(<:.o=.linked2.o) $$(<:.o=.linked3.o) - $(Q)$$(BPFTOOL) gen skeleton -L $$(<:.o=.linked3.o) name $$(notdir $$(<:.o=)) > $$@ + $(Q)$$(BPFTOOL) gen skeleton -L $$(<:.o=.linked3.o) name $$(notdir $$(<:.o=_lskel)) > $$@ $(TRUNNER_BPF_SKELS_LINKED): $(TRUNNER_BPF_OBJS) $(BPFTOOL) | $(TRUNNER_OUTPUT) $$(call msg,LINK-BPF,$(TRUNNER_BINARY),$$(@:.skel.h=.o)) @@ -412,10 +421,9 @@ ifeq ($($(TRUNNER_TESTS_DIR)-tests-hdr),) $(TRUNNER_TESTS_DIR)-tests-hdr := y $(TRUNNER_TESTS_HDR): $(TRUNNER_TESTS_DIR)/*.c $$(call msg,TEST-HDR,$(TRUNNER_BINARY),$$@) - $$(shell ( cd $(TRUNNER_TESTS_DIR); \ - echo '/* Generated header, do not edit */'; \ - ls *.c 2> /dev/null | \ - sed -e 's@\([^\.]*\)\.c@DEFINE_TEST(\1)@'; \ + $$(shell (echo '/* Generated header, do not edit */'; \ + sed -n -E 's/^void (serial_)?test_([a-zA-Z0-9_]+)\((void)?\).*/DEFINE_TEST(\2)/p' \ + $(TRUNNER_TESTS_DIR)/*.c | sort ; \ ) > $$@) endif @@ -454,7 +462,7 @@ $(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS) \ | $(TRUNNER_BINARY)-extras $$(call msg,BINARY,,$$@) $(Q)$$(CC) $$(CFLAGS) $$(filter %.a %.o,$$^) $$(LDLIBS) -o $$@ - $(Q)$(RESOLVE_BTFIDS) --no-fail --btf $(TRUNNER_OUTPUT)/btf_data.o $$@ + $(Q)$(RESOLVE_BTFIDS) --btf $(TRUNNER_OUTPUT)/btf_data.o $$@ endef @@ -516,18 +524,20 @@ $(OUTPUT)/test_cpp: test_cpp.cpp $(OUTPUT)/test_core_extern.skel.h $(BPFOBJ) # Benchmark runner $(OUTPUT)/bench_%.o: benchs/bench_%.c bench.h $(BPFOBJ) $(call msg,CC,,$@) - $(Q)$(CC) $(CFLAGS) -c $(filter %.c,$^) $(LDLIBS) -o $@ + $(Q)$(CC) $(CFLAGS) -O2 -c $(filter %.c,$^) $(LDLIBS) -o $@ $(OUTPUT)/bench_rename.o: $(OUTPUT)/test_overhead.skel.h $(OUTPUT)/bench_trigger.o: $(OUTPUT)/trigger_bench.skel.h $(OUTPUT)/bench_ringbufs.o: $(OUTPUT)/ringbuf_bench.skel.h \ $(OUTPUT)/perfbuf_bench.skel.h +$(OUTPUT)/bench_bloom_filter_map.o: $(OUTPUT)/bloom_filter_bench.skel.h $(OUTPUT)/bench.o: bench.h testing_helpers.h $(BPFOBJ) $(OUTPUT)/bench: LDLIBS += -lm $(OUTPUT)/bench: $(OUTPUT)/bench.o $(OUTPUT)/testing_helpers.o \ $(OUTPUT)/bench_count.o \ $(OUTPUT)/bench_rename.o \ $(OUTPUT)/bench_trigger.o \ - $(OUTPUT)/bench_ringbufs.o + $(OUTPUT)/bench_ringbufs.o \ + $(OUTPUT)/bench_bloom_filter_map.o $(call msg,BINARY,,$@) $(Q)$(CC) $(LDFLAGS) -o $@ $(filter %.a %.o,$^) $(LDLIBS) diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst index 554553acc6d9..5e287e445f75 100644 --- a/tools/testing/selftests/bpf/README.rst +++ b/tools/testing/selftests/bpf/README.rst @@ -204,7 +204,7 @@ __ https://reviews.llvm.org/D93563 btf_tag test and Clang version ============================== -The btf_tag selftest require LLVM support to recognize the btf_tag attribute. +The btf_tag selftest require LLVM support to recognize the btf_decl_tag attribute. It was introduced in `Clang 14`__. Without it, the btf_tag selftest will be skipped and you will observe: @@ -213,7 +213,7 @@ Without it, the btf_tag selftest will be skipped and you will observe: #<test_num> btf_tag:SKIP -__ https://reviews.llvm.org/D106614 +__ https://reviews.llvm.org/D111588 Clang dependencies for static linking tests =========================================== diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c index 6ea15b93a2f8..cc4722f693e9 100644 --- a/tools/testing/selftests/bpf/bench.c +++ b/tools/testing/selftests/bpf/bench.c @@ -51,6 +51,35 @@ void setup_libbpf() fprintf(stderr, "failed to increase RLIMIT_MEMLOCK: %d", err); } +void false_hits_report_progress(int iter, struct bench_res *res, long delta_ns) +{ + long total = res->false_hits + res->hits + res->drops; + + printf("Iter %3d (%7.3lfus): ", + iter, (delta_ns - 1000000000) / 1000.0); + + printf("%ld false hits of %ld total operations. Percentage = %2.2f %%\n", + res->false_hits, total, ((float)res->false_hits / total) * 100); +} + +void false_hits_report_final(struct bench_res res[], int res_cnt) +{ + long total_hits = 0, total_drops = 0, total_false_hits = 0, total_ops = 0; + int i; + + for (i = 0; i < res_cnt; i++) { + total_hits += res[i].hits; + total_false_hits += res[i].false_hits; + total_drops += res[i].drops; + } + total_ops = total_hits + total_false_hits + total_drops; + + printf("Summary: %ld false hits of %ld total operations. ", + total_false_hits, total_ops); + printf("Percentage = %2.2f %%\n", + ((float)total_false_hits / total_ops) * 100); +} + void hits_drops_report_progress(int iter, struct bench_res *res, long delta_ns) { double hits_per_sec, drops_per_sec; @@ -63,20 +92,22 @@ void hits_drops_report_progress(int iter, struct bench_res *res, long delta_ns) printf("Iter %3d (%7.3lfus): ", iter, (delta_ns - 1000000000) / 1000.0); - printf("hits %8.3lfM/s (%7.3lfM/prod), drops %8.3lfM/s\n", - hits_per_sec, hits_per_prod, drops_per_sec); + printf("hits %8.3lfM/s (%7.3lfM/prod), drops %8.3lfM/s, total operations %8.3lfM/s\n", + hits_per_sec, hits_per_prod, drops_per_sec, hits_per_sec + drops_per_sec); } void hits_drops_report_final(struct bench_res res[], int res_cnt) { int i; - double hits_mean = 0.0, drops_mean = 0.0; - double hits_stddev = 0.0, drops_stddev = 0.0; + double hits_mean = 0.0, drops_mean = 0.0, total_ops_mean = 0.0; + double hits_stddev = 0.0, drops_stddev = 0.0, total_ops_stddev = 0.0; + double total_ops; for (i = 0; i < res_cnt; i++) { hits_mean += res[i].hits / 1000000.0 / (0.0 + res_cnt); drops_mean += res[i].drops / 1000000.0 / (0.0 + res_cnt); } + total_ops_mean = hits_mean + drops_mean; if (res_cnt > 1) { for (i = 0; i < res_cnt; i++) { @@ -86,14 +117,21 @@ void hits_drops_report_final(struct bench_res res[], int res_cnt) drops_stddev += (drops_mean - res[i].drops / 1000000.0) * (drops_mean - res[i].drops / 1000000.0) / (res_cnt - 1.0); + total_ops = res[i].hits + res[i].drops; + total_ops_stddev += (total_ops_mean - total_ops / 1000000.0) * + (total_ops_mean - total_ops / 1000000.0) / + (res_cnt - 1.0); } hits_stddev = sqrt(hits_stddev); drops_stddev = sqrt(drops_stddev); + total_ops_stddev = sqrt(total_ops_stddev); } printf("Summary: hits %8.3lf \u00B1 %5.3lfM/s (%7.3lfM/prod), ", hits_mean, hits_stddev, hits_mean / env.producer_cnt); - printf("drops %8.3lf \u00B1 %5.3lfM/s\n", + printf("drops %8.3lf \u00B1 %5.3lfM/s, ", drops_mean, drops_stddev); + printf("total operations %8.3lf \u00B1 %5.3lfM/s\n", + total_ops_mean, total_ops_stddev); } const char *argp_program_version = "benchmark"; @@ -132,9 +170,11 @@ static const struct argp_option opts[] = { }; extern struct argp bench_ringbufs_argp; +extern struct argp bench_bloom_map_argp; static const struct argp_child bench_parsers[] = { { &bench_ringbufs_argp, 0, "Ring buffers benchmark", 0 }, + { &bench_bloom_map_argp, 0, "Bloom filter map benchmark", 0 }, {}, }; @@ -323,6 +363,11 @@ extern const struct bench bench_rb_libbpf; extern const struct bench bench_rb_custom; extern const struct bench bench_pb_libbpf; extern const struct bench bench_pb_custom; +extern const struct bench bench_bloom_lookup; +extern const struct bench bench_bloom_update; +extern const struct bench bench_bloom_false_positive; +extern const struct bench bench_hashmap_without_bloom; +extern const struct bench bench_hashmap_with_bloom; static const struct bench *benchs[] = { &bench_count_global, @@ -344,6 +389,11 @@ static const struct bench *benchs[] = { &bench_rb_custom, &bench_pb_libbpf, &bench_pb_custom, + &bench_bloom_lookup, + &bench_bloom_update, + &bench_bloom_false_positive, + &bench_hashmap_without_bloom, + &bench_hashmap_with_bloom, }; static void setup_benchmark() diff --git a/tools/testing/selftests/bpf/bench.h b/tools/testing/selftests/bpf/bench.h index c1f48a473b02..624c6b11501f 100644 --- a/tools/testing/selftests/bpf/bench.h +++ b/tools/testing/selftests/bpf/bench.h @@ -33,6 +33,7 @@ struct env { struct bench_res { long hits; long drops; + long false_hits; }; struct bench { @@ -56,6 +57,8 @@ extern const struct bench *bench; void setup_libbpf(); void hits_drops_report_progress(int iter, struct bench_res *res, long delta_ns); void hits_drops_report_final(struct bench_res res[], int res_cnt); +void false_hits_report_progress(int iter, struct bench_res *res, long delta_ns); +void false_hits_report_final(struct bench_res res[], int res_cnt); static inline __u64 get_time_ns() { struct timespec t; diff --git a/tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c b/tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c new file mode 100644 index 000000000000..6eeeed2913e6 --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c @@ -0,0 +1,477 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#include <argp.h> +#include <linux/log2.h> +#include <pthread.h> +#include "bench.h" +#include "bloom_filter_bench.skel.h" +#include "bpf_util.h" + +static struct ctx { + bool use_array_map; + bool use_hashmap; + bool hashmap_use_bloom; + bool count_false_hits; + + struct bloom_filter_bench *skel; + + int bloom_fd; + int hashmap_fd; + int array_map_fd; + + pthread_mutex_t map_done_mtx; + pthread_cond_t map_done_cv; + bool map_done; + bool map_prepare_err; + + __u32 next_map_idx; +} ctx = { + .map_done_mtx = PTHREAD_MUTEX_INITIALIZER, + .map_done_cv = PTHREAD_COND_INITIALIZER, +}; + +struct stat { + __u32 stats[3]; +}; + +static struct { + __u32 nr_entries; + __u8 nr_hash_funcs; + __u8 value_size; +} args = { + .nr_entries = 1000, + .nr_hash_funcs = 3, + .value_size = 8, +}; + +enum { + ARG_NR_ENTRIES = 3000, + ARG_NR_HASH_FUNCS = 3001, + ARG_VALUE_SIZE = 3002, +}; + +static const struct argp_option opts[] = { + { "nr_entries", ARG_NR_ENTRIES, "NR_ENTRIES", 0, + "Set number of expected unique entries in the bloom filter"}, + { "nr_hash_funcs", ARG_NR_HASH_FUNCS, "NR_HASH_FUNCS", 0, + "Set number of hash functions in the bloom filter"}, + { "value_size", ARG_VALUE_SIZE, "VALUE_SIZE", 0, + "Set value size (in bytes) of bloom filter entries"}, + {}, +}; + +static error_t parse_arg(int key, char *arg, struct argp_state *state) +{ + switch (key) { + case ARG_NR_ENTRIES: + args.nr_entries = strtol(arg, NULL, 10); + if (args.nr_entries == 0) { + fprintf(stderr, "Invalid nr_entries count."); + argp_usage(state); + } + break; + case ARG_NR_HASH_FUNCS: + args.nr_hash_funcs = strtol(arg, NULL, 10); + if (args.nr_hash_funcs == 0 || args.nr_hash_funcs > 15) { + fprintf(stderr, + "The bloom filter must use 1 to 15 hash functions."); + argp_usage(state); + } + break; + case ARG_VALUE_SIZE: + args.value_size = strtol(arg, NULL, 10); + if (args.value_size < 2 || args.value_size > 256) { + fprintf(stderr, + "Invalid value size. Must be between 2 and 256 bytes"); + argp_usage(state); + } + break; + default: + return ARGP_ERR_UNKNOWN; + } + + return 0; +} + +/* exported into benchmark runner */ +const struct argp bench_bloom_map_argp = { + .options = opts, + .parser = parse_arg, +}; + +static void validate(void) +{ + if (env.consumer_cnt != 1) { + fprintf(stderr, + "The bloom filter benchmarks do not support multi-consumer use\n"); + exit(1); + } +} + +static inline void trigger_bpf_program(void) +{ + syscall(__NR_getpgid); +} + +static void *producer(void *input) +{ + while (true) + trigger_bpf_program(); + + return NULL; +} + +static void *map_prepare_thread(void *arg) +{ + __u32 val_size, i; + void *val = NULL; + int err; + + val_size = args.value_size; + val = malloc(val_size); + if (!val) { + ctx.map_prepare_err = true; + goto done; + } + + while (true) { + i = __atomic_add_fetch(&ctx.next_map_idx, 1, __ATOMIC_RELAXED); + if (i > args.nr_entries) + break; + +again: + /* Populate hashmap, bloom filter map, and array map with the same + * random values + */ + err = syscall(__NR_getrandom, val, val_size, 0); + if (err != val_size) { + ctx.map_prepare_err = true; + fprintf(stderr, "failed to get random value: %d\n", -errno); + break; + } + + if (ctx.use_hashmap) { + err = bpf_map_update_elem(ctx.hashmap_fd, val, val, BPF_NOEXIST); + if (err) { + if (err != -EEXIST) { + ctx.map_prepare_err = true; + fprintf(stderr, "failed to add elem to hashmap: %d\n", + -errno); + break; + } + goto again; + } + } + + i--; + + if (ctx.use_array_map) { + err = bpf_map_update_elem(ctx.array_map_fd, &i, val, 0); + if (err) { + ctx.map_prepare_err = true; + fprintf(stderr, "failed to add elem to array map: %d\n", -errno); + break; + } + } + + if (ctx.use_hashmap && !ctx.hashmap_use_bloom) + continue; + + err = bpf_map_update_elem(ctx.bloom_fd, NULL, val, 0); + if (err) { + ctx.map_prepare_err = true; + fprintf(stderr, + "failed to add elem to bloom filter map: %d\n", -errno); + break; + } + } +done: + pthread_mutex_lock(&ctx.map_done_mtx); + ctx.map_done = true; + pthread_cond_signal(&ctx.map_done_cv); + pthread_mutex_unlock(&ctx.map_done_mtx); + + if (val) + free(val); + + return NULL; +} + +static void populate_maps(void) +{ + unsigned int nr_cpus = bpf_num_possible_cpus(); + pthread_t map_thread; + int i, err, nr_rand_bytes; + + ctx.bloom_fd = bpf_map__fd(ctx.skel->maps.bloom_map); + ctx.hashmap_fd = bpf_map__fd(ctx.skel->maps.hashmap); + ctx.array_map_fd = bpf_map__fd(ctx.skel->maps.array_map); + + for (i = 0; i < nr_cpus; i++) { + err = pthread_create(&map_thread, NULL, map_prepare_thread, + NULL); + if (err) { + fprintf(stderr, "failed to create pthread: %d\n", -errno); + exit(1); + } + } + + pthread_mutex_lock(&ctx.map_done_mtx); + while (!ctx.map_done) + pthread_cond_wait(&ctx.map_done_cv, &ctx.map_done_mtx); + pthread_mutex_unlock(&ctx.map_done_mtx); + + if (ctx.map_prepare_err) + exit(1); + + nr_rand_bytes = syscall(__NR_getrandom, ctx.skel->bss->rand_vals, + ctx.skel->rodata->nr_rand_bytes, 0); + if (nr_rand_bytes != ctx.skel->rodata->nr_rand_bytes) { + fprintf(stderr, "failed to get random bytes\n"); + exit(1); + } +} + +static void check_args(void) +{ + if (args.value_size < 8) { + __u64 nr_unique_entries = 1ULL << (args.value_size * 8); + + if (args.nr_entries > nr_unique_entries) { + fprintf(stderr, + "Not enough unique values for the nr_entries requested\n"); + exit(1); + } + } +} + +static struct bloom_filter_bench *setup_skeleton(void) +{ + struct bloom_filter_bench *skel; + + check_args(); + + setup_libbpf(); + + skel = bloom_filter_bench__open(); + if (!skel) { + fprintf(stderr, "failed to open skeleton\n"); + exit(1); + } + + skel->rodata->hashmap_use_bloom = ctx.hashmap_use_bloom; + skel->rodata->count_false_hits = ctx.count_false_hits; + + /* Resize number of entries */ + bpf_map__set_max_entries(skel->maps.hashmap, args.nr_entries); + + bpf_map__set_max_entries(skel->maps.array_map, args.nr_entries); + + bpf_map__set_max_entries(skel->maps.bloom_map, args.nr_entries); + + /* Set value size */ + bpf_map__set_value_size(skel->maps.array_map, args.value_size); + + bpf_map__set_value_size(skel->maps.bloom_map, args.value_size); + + bpf_map__set_value_size(skel->maps.hashmap, args.value_size); + + /* For the hashmap, we use the value as the key as well */ + bpf_map__set_key_size(skel->maps.hashmap, args.value_size); + + skel->bss->value_size = args.value_size; + + /* Set number of hash functions */ + bpf_map__set_map_extra(skel->maps.bloom_map, args.nr_hash_funcs); + + if (bloom_filter_bench__load(skel)) { + fprintf(stderr, "failed to load skeleton\n"); + exit(1); + } + + return skel; +} + +static void bloom_lookup_setup(void) +{ + struct bpf_link *link; + + ctx.use_array_map = true; + + ctx.skel = setup_skeleton(); + + populate_maps(); + + link = bpf_program__attach(ctx.skel->progs.bloom_lookup); + if (!link) { + fprintf(stderr, "failed to attach program!\n"); + exit(1); + } +} + +static void bloom_update_setup(void) +{ + struct bpf_link *link; + + ctx.use_array_map = true; + + ctx.skel = setup_skeleton(); + + populate_maps(); + + link = bpf_program__attach(ctx.skel->progs.bloom_update); + if (!link) { + fprintf(stderr, "failed to attach program!\n"); + exit(1); + } +} + +static void false_positive_setup(void) +{ + struct bpf_link *link; + + ctx.use_hashmap = true; + ctx.hashmap_use_bloom = true; + ctx.count_false_hits = true; + + ctx.skel = setup_skeleton(); + + populate_maps(); + + link = bpf_program__attach(ctx.skel->progs.bloom_hashmap_lookup); + if (!link) { + fprintf(stderr, "failed to attach program!\n"); + exit(1); + } +} + +static void hashmap_with_bloom_setup(void) +{ + struct bpf_link *link; + + ctx.use_hashmap = true; + ctx.hashmap_use_bloom = true; + + ctx.skel = setup_skeleton(); + + populate_maps(); + + link = bpf_program__attach(ctx.skel->progs.bloom_hashmap_lookup); + if (!link) { + fprintf(stderr, "failed to attach program!\n"); + exit(1); + } +} + +static void hashmap_no_bloom_setup(void) +{ + struct bpf_link *link; + + ctx.use_hashmap = true; + + ctx.skel = setup_skeleton(); + + populate_maps(); + + link = bpf_program__attach(ctx.skel->progs.bloom_hashmap_lookup); + if (!link) { + fprintf(stderr, "failed to attach program!\n"); + exit(1); + } +} + +static void measure(struct bench_res *res) +{ + unsigned long total_hits = 0, total_drops = 0, total_false_hits = 0; + static unsigned long last_hits, last_drops, last_false_hits; + unsigned int nr_cpus = bpf_num_possible_cpus(); + int hit_key, drop_key, false_hit_key; + int i; + + hit_key = ctx.skel->rodata->hit_key; + drop_key = ctx.skel->rodata->drop_key; + false_hit_key = ctx.skel->rodata->false_hit_key; + + if (ctx.skel->bss->error != 0) { + fprintf(stderr, "error (%d) when searching the bloom filter\n", + ctx.skel->bss->error); + exit(1); + } + + for (i = 0; i < nr_cpus; i++) { + struct stat *s = (void *)&ctx.skel->bss->percpu_stats[i]; + + total_hits += s->stats[hit_key]; + total_drops += s->stats[drop_key]; + total_false_hits += s->stats[false_hit_key]; + } + + res->hits = total_hits - last_hits; + res->drops = total_drops - last_drops; + res->false_hits = total_false_hits - last_false_hits; + + last_hits = total_hits; + last_drops = total_drops; + last_false_hits = total_false_hits; +} + +static void *consumer(void *input) +{ + return NULL; +} + +const struct bench bench_bloom_lookup = { + .name = "bloom-lookup", + .validate = validate, + .setup = bloom_lookup_setup, + .producer_thread = producer, + .consumer_thread = consumer, + .measure = measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; + +const struct bench bench_bloom_update = { + .name = "bloom-update", + .validate = validate, + .setup = bloom_update_setup, + .producer_thread = producer, + .consumer_thread = consumer, + .measure = measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; + +const struct bench bench_bloom_false_positive = { + .name = "bloom-false-positive", + .validate = validate, + .setup = false_positive_setup, + .producer_thread = producer, + .consumer_thread = consumer, + .measure = measure, + .report_progress = false_hits_report_progress, + .report_final = false_hits_report_final, +}; + +const struct bench bench_hashmap_without_bloom = { + .name = "hashmap-without-bloom", + .validate = validate, + .setup = hashmap_no_bloom_setup, + .producer_thread = producer, + .consumer_thread = consumer, + .measure = measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; + +const struct bench bench_hashmap_with_bloom = { + .name = "hashmap-with-bloom", + .validate = validate, + .setup = hashmap_with_bloom_setup, + .producer_thread = producer, + .consumer_thread = consumer, + .measure = measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; diff --git a/tools/testing/selftests/bpf/benchs/run_bench_bloom_filter_map.sh b/tools/testing/selftests/bpf/benchs/run_bench_bloom_filter_map.sh new file mode 100755 index 000000000000..8ffd385ab2f4 --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/run_bench_bloom_filter_map.sh @@ -0,0 +1,45 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source ./benchs/run_common.sh + +set -eufo pipefail + +header "Bloom filter map" +for v in 2 4 8 16 40; do +for t in 1 4 8 12 16; do +for h in {1..10}; do +subtitle "value_size: $v bytes, # threads: $t, # hashes: $h" + for e in 10000 50000 75000 100000 250000 500000 750000 1000000 2500000 5000000; do + printf "%'d entries -\n" $e + printf "\t" + summarize "Lookups, total operations: " \ + "$($RUN_BENCH -p $t --nr_hash_funcs $h --nr_entries $e --value_size $v bloom-lookup)" + printf "\t" + summarize "Updates, total operations: " \ + "$($RUN_BENCH -p $t --nr_hash_funcs $h --nr_entries $e --value_size $v bloom-update)" + printf "\t" + summarize_percentage "False positive rate: " \ + "$($RUN_BENCH -p $t --nr_hash_funcs $h --nr_entries $e --value_size $v bloom-false-positive)" + done + printf "\n" +done +done +done + +header "Hashmap without bloom filter vs. hashmap with bloom filter (throughput, 8 threads)" +for v in 2 4 8 16 40; do +for h in {1..10}; do +subtitle "value_size: $v, # hashes: $h" + for e in 10000 50000 75000 100000 250000 500000 750000 1000000 2500000 5000000; do + printf "%'d entries -\n" $e + printf "\t" + summarize_total "Hashmap without bloom filter: " \ + "$($RUN_BENCH --nr_hash_funcs $h --nr_entries $e --value_size $v -p 8 hashmap-without-bloom)" + printf "\t" + summarize_total "Hashmap with bloom filter: " \ + "$($RUN_BENCH --nr_hash_funcs $h --nr_entries $e --value_size $v -p 8 hashmap-with-bloom)" + done + printf "\n" +done +done diff --git a/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh b/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh index af4aa04caba6..ada028aa9007 100755 --- a/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh +++ b/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh @@ -1,34 +1,8 @@ #!/bin/bash -set -eufo pipefail - -RUN_BENCH="sudo ./bench -w3 -d10 -a" - -function hits() -{ - echo "$*" | sed -E "s/.*hits\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/" -} - -function drops() -{ - echo "$*" | sed -E "s/.*drops\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/" -} +source ./benchs/run_common.sh -function header() -{ - local len=${#1} - - printf "\n%s\n" "$1" - for i in $(seq 1 $len); do printf '='; done - printf '\n' -} - -function summarize() -{ - bench="$1" - summary=$(echo $2 | tail -n1) - printf "%-20s %s (drops %s)\n" "$bench" "$(hits $summary)" "$(drops $summary)" -} +set -eufo pipefail header "Single-producer, parallel producer" for b in rb-libbpf rb-custom pb-libbpf pb-custom; do diff --git a/tools/testing/selftests/bpf/benchs/run_common.sh b/tools/testing/selftests/bpf/benchs/run_common.sh new file mode 100644 index 000000000000..9a16be78b180 --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/run_common.sh @@ -0,0 +1,60 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +RUN_BENCH="sudo ./bench -w3 -d10 -a" + +function header() +{ + local len=${#1} + + printf "\n%s\n" "$1" + for i in $(seq 1 $len); do printf '='; done + printf '\n' +} + +function subtitle() +{ + local len=${#1} + printf "\t%s\n" "$1" +} + +function hits() +{ + echo "$*" | sed -E "s/.*hits\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/" +} + +function drops() +{ + echo "$*" | sed -E "s/.*drops\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/" +} + +function percentage() +{ + echo "$*" | sed -E "s/.*Percentage\s=\s+([0-9]+\.[0-9]+).*/\1/" +} + +function total() +{ + echo "$*" | sed -E "s/.*total operations\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/" +} + +function summarize() +{ + bench="$1" + summary=$(echo $2 | tail -n1) + printf "%-20s %s (drops %s)\n" "$bench" "$(hits $summary)" "$(drops $summary)" +} + +function summarize_percentage() +{ + bench="$1" + summary=$(echo $2 | tail -n1) + printf "%-20s %s%%\n" "$bench" "$(percentage $summary)" +} + +function summarize_total() +{ + bench="$1" + summary=$(echo $2 | tail -n1) + printf "%-20s %s\n" "$bench" "$(total $summary)" +} diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h index 89c6d58e5dd6..11ee801e75e7 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h @@ -34,6 +34,21 @@ DECLARE_TRACE(bpf_testmod_test_write_bare, TP_ARGS(task, ctx) ); +#undef BPF_TESTMOD_DECLARE_TRACE +#ifdef DECLARE_TRACE_WRITABLE +#define BPF_TESTMOD_DECLARE_TRACE(call, proto, args, size) \ + DECLARE_TRACE_WRITABLE(call, PARAMS(proto), PARAMS(args), size) +#else +#define BPF_TESTMOD_DECLARE_TRACE(call, proto, args, size) \ + DECLARE_TRACE(call, PARAMS(proto), PARAMS(args)) +#endif + +BPF_TESTMOD_DECLARE_TRACE(bpf_testmod_test_writable_bare, + TP_PROTO(struct bpf_testmod_test_writable_ctx *ctx), + TP_ARGS(ctx), + sizeof(struct bpf_testmod_test_writable_ctx) +); + #endif /* _BPF_TESTMOD_EVENTS_H */ #undef TRACE_INCLUDE_PATH diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c index 50fc5561110a..5d52ea2768df 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c @@ -1,5 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ +#include <linux/btf.h> +#include <linux/btf_ids.h> #include <linux/error-injection.h> #include <linux/init.h> #include <linux/module.h> @@ -13,6 +15,12 @@ DEFINE_PER_CPU(int, bpf_testmod_ksym_percpu) = 123; +noinline void +bpf_testmod_test_mod_kfunc(int i) +{ + *(int *)this_cpu_ptr(&bpf_testmod_ksym_percpu) = i; +} + noinline int bpf_testmod_loop_test(int n) { int i, sum = 0; @@ -42,6 +50,16 @@ bpf_testmod_test_read(struct file *file, struct kobject *kobj, if (bpf_testmod_loop_test(101) > 100) trace_bpf_testmod_test_read(current, &ctx); + /* Magic number to enable writable tp */ + if (len == 64) { + struct bpf_testmod_test_writable_ctx writable = { + .val = 1024, + }; + trace_bpf_testmod_test_writable_bare(&writable); + if (writable.early_ret) + return snprintf(buf, len, "%d\n", writable.val); + } + return -EIO; /* always fail */ } EXPORT_SYMBOL(bpf_testmod_test_read); @@ -71,13 +89,26 @@ static struct bin_attribute bin_attr_bpf_testmod_file __ro_after_init = { .write = bpf_testmod_test_write, }; +BTF_SET_START(bpf_testmod_kfunc_ids) +BTF_ID(func, bpf_testmod_test_mod_kfunc) +BTF_SET_END(bpf_testmod_kfunc_ids) + +static DEFINE_KFUNC_BTF_ID_SET(&bpf_testmod_kfunc_ids, bpf_testmod_kfunc_btf_set); + static int bpf_testmod_init(void) { - return sysfs_create_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file); + int ret; + + ret = sysfs_create_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file); + if (ret) + return ret; + register_kfunc_btf_id_set(&prog_test_kfunc_list, &bpf_testmod_kfunc_btf_set); + return 0; } static void bpf_testmod_exit(void) { + unregister_kfunc_btf_id_set(&prog_test_kfunc_list, &bpf_testmod_kfunc_btf_set); return sysfs_remove_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file); } diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h index b3892dc40111..0d71e2607832 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h @@ -17,4 +17,9 @@ struct bpf_testmod_test_write_ctx { size_t len; }; +struct bpf_testmod_test_writable_ctx { + bool early_ret; + int val; +}; + #endif /* _BPF_TESTMOD_H */ diff --git a/tools/testing/selftests/bpf/btf_helpers.c b/tools/testing/selftests/bpf/btf_helpers.c index ce103fb0ad1b..b5b6b013a245 100644 --- a/tools/testing/selftests/bpf/btf_helpers.c +++ b/tools/testing/selftests/bpf/btf_helpers.c @@ -24,12 +24,12 @@ static const char * const btf_kind_str_mapping[] = { [BTF_KIND_VAR] = "VAR", [BTF_KIND_DATASEC] = "DATASEC", [BTF_KIND_FLOAT] = "FLOAT", - [BTF_KIND_TAG] = "TAG", + [BTF_KIND_DECL_TAG] = "DECL_TAG", }; static const char *btf_kind_str(__u16 kind) { - if (kind > BTF_KIND_TAG) + if (kind > BTF_KIND_DECL_TAG) return "UNKNOWN"; return btf_kind_str_mapping[kind]; } @@ -178,9 +178,9 @@ int fprintf_btf_type_raw(FILE *out, const struct btf *btf, __u32 id) case BTF_KIND_FLOAT: fprintf(out, " size=%u", t->size); break; - case BTF_KIND_TAG: + case BTF_KIND_DECL_TAG: fprintf(out, " type_id=%u component_idx=%d", - t->type, btf_tag(t)->component_idx); + t->type, btf_decl_tag(t)->component_idx); break; default: break; @@ -215,7 +215,7 @@ int btf_validate_raw(struct btf *btf, int nr_types, const char *exp_types[]) int i; bool ok = true; - ASSERT_EQ(btf__get_nr_types(btf), nr_types, "btf_nr_types"); + ASSERT_EQ(btf__type_cnt(btf) - 1, nr_types, "btf_nr_types"); for (i = 1; i <= nr_types; i++) { if (!ASSERT_STREQ(btf_type_raw_dump(btf, i), exp_types[i - 1], "raw_dump")) @@ -254,7 +254,7 @@ const char *btf_type_c_dump(const struct btf *btf) return NULL; } - for (i = 1; i <= btf__get_nr_types(btf); i++) { + for (i = 1; i < btf__type_cnt(btf); i++) { err = btf_dump__dump_type(d, i); if (err) { fprintf(stderr, "Failed to dump type [%d]: %d\n", i, err); diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c index f3daa44a8266..9d59c3990ca8 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.c +++ b/tools/testing/selftests/bpf/cgroup_helpers.c @@ -33,10 +33,9 @@ #define CGROUP_MOUNT_DFLT "/sys/fs/cgroup" #define NETCLS_MOUNT_PATH CGROUP_MOUNT_DFLT "/net_cls" #define CGROUP_WORK_DIR "/cgroup-test-work-dir" - #define format_cgroup_path(buf, path) \ - snprintf(buf, sizeof(buf), "%s%s%s", CGROUP_MOUNT_PATH, \ - CGROUP_WORK_DIR, path) + snprintf(buf, sizeof(buf), "%s%s%d%s", CGROUP_MOUNT_PATH, \ + CGROUP_WORK_DIR, getpid(), path) #define format_classid_path(buf) \ snprintf(buf, sizeof(buf), "%s%s", NETCLS_MOUNT_PATH, \ diff --git a/tools/testing/selftests/bpf/cgroup_helpers.h b/tools/testing/selftests/bpf/cgroup_helpers.h index 629da3854b3e..fcc9cb91b211 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.h +++ b/tools/testing/selftests/bpf/cgroup_helpers.h @@ -26,4 +26,4 @@ int join_classid(void); int setup_classid_environment(void); void cleanup_classid_environment(void); -#endif /* __CGROUP_HELPERS_H */ +#endif /* __CGROUP_HELPERS_H */
\ No newline at end of file diff --git a/tools/testing/selftests/bpf/flow_dissector_load.c b/tools/testing/selftests/bpf/flow_dissector_load.c index 3fd83b9dc1bf..87fd1aa323a9 100644 --- a/tools/testing/selftests/bpf/flow_dissector_load.c +++ b/tools/testing/selftests/bpf/flow_dissector_load.c @@ -17,7 +17,7 @@ const char *cfg_pin_path = "/sys/fs/bpf/flow_dissector"; const char *cfg_map_name = "jmp_table"; bool cfg_attach = true; -char *cfg_section_name; +char *cfg_prog_name; char *cfg_path_name; static void load_and_attach_program(void) @@ -25,7 +25,11 @@ static void load_and_attach_program(void) int prog_fd, ret; struct bpf_object *obj; - ret = bpf_flow_load(&obj, cfg_path_name, cfg_section_name, + ret = libbpf_set_strict_mode(LIBBPF_STRICT_ALL); + if (ret) + error(1, 0, "failed to enable libbpf strict mode: %d", ret); + + ret = bpf_flow_load(&obj, cfg_path_name, cfg_prog_name, cfg_map_name, NULL, &prog_fd, NULL); if (ret) error(1, 0, "bpf_flow_load %s", cfg_path_name); @@ -75,15 +79,15 @@ static void parse_opts(int argc, char **argv) break; case 'p': if (cfg_path_name) - error(1, 0, "only one prog name can be given"); + error(1, 0, "only one path can be given"); cfg_path_name = optarg; break; case 's': - if (cfg_section_name) - error(1, 0, "only one section can be given"); + if (cfg_prog_name) + error(1, 0, "only one prog can be given"); - cfg_section_name = optarg; + cfg_prog_name = optarg; break; } } @@ -94,7 +98,7 @@ static void parse_opts(int argc, char **argv) if (cfg_attach && !cfg_path_name) error(1, 0, "must provide a path to the BPF program"); - if (cfg_attach && !cfg_section_name) + if (cfg_attach && !cfg_prog_name) error(1, 0, "must provide a section name"); } diff --git a/tools/testing/selftests/bpf/flow_dissector_load.h b/tools/testing/selftests/bpf/flow_dissector_load.h index 7290401ec172..9d0acc2fc6cc 100644 --- a/tools/testing/selftests/bpf/flow_dissector_load.h +++ b/tools/testing/selftests/bpf/flow_dissector_load.h @@ -7,7 +7,7 @@ static inline int bpf_flow_load(struct bpf_object **obj, const char *path, - const char *section_name, + const char *prog_name, const char *map_name, const char *keys_map_name, int *prog_fd, @@ -23,13 +23,7 @@ static inline int bpf_flow_load(struct bpf_object **obj, if (ret) return ret; - main_prog = NULL; - bpf_object__for_each_program(prog, *obj) { - if (strcmp(section_name, bpf_program__section_name(prog)) == 0) { - main_prog = prog; - break; - } - } + main_prog = bpf_object__find_program_by_name(*obj, prog_name); if (!main_prog) return -1; diff --git a/tools/testing/selftests/bpf/prog_tests/atomics.c b/tools/testing/selftests/bpf/prog_tests/atomics.c index ba0e1efe5a45..0f9525293881 100644 --- a/tools/testing/selftests/bpf/prog_tests/atomics.c +++ b/tools/testing/selftests/bpf/prog_tests/atomics.c @@ -4,13 +4,13 @@ #include "atomics.lskel.h" -static void test_add(struct atomics *skel) +static void test_add(struct atomics_lskel *skel) { int err, prog_fd; __u32 duration = 0, retval; int link_fd; - link_fd = atomics__add__attach(skel); + link_fd = atomics_lskel__add__attach(skel); if (!ASSERT_GT(link_fd, 0, "attach(add)")) return; @@ -36,13 +36,13 @@ cleanup: close(link_fd); } -static void test_sub(struct atomics *skel) +static void test_sub(struct atomics_lskel *skel) { int err, prog_fd; __u32 duration = 0, retval; int link_fd; - link_fd = atomics__sub__attach(skel); + link_fd = atomics_lskel__sub__attach(skel); if (!ASSERT_GT(link_fd, 0, "attach(sub)")) return; @@ -69,13 +69,13 @@ cleanup: close(link_fd); } -static void test_and(struct atomics *skel) +static void test_and(struct atomics_lskel *skel) { int err, prog_fd; __u32 duration = 0, retval; int link_fd; - link_fd = atomics__and__attach(skel); + link_fd = atomics_lskel__and__attach(skel); if (!ASSERT_GT(link_fd, 0, "attach(and)")) return; @@ -97,13 +97,13 @@ cleanup: close(link_fd); } -static void test_or(struct atomics *skel) +static void test_or(struct atomics_lskel *skel) { int err, prog_fd; __u32 duration = 0, retval; int link_fd; - link_fd = atomics__or__attach(skel); + link_fd = atomics_lskel__or__attach(skel); if (!ASSERT_GT(link_fd, 0, "attach(or)")) return; @@ -126,13 +126,13 @@ cleanup: close(link_fd); } -static void test_xor(struct atomics *skel) +static void test_xor(struct atomics_lskel *skel) { int err, prog_fd; __u32 duration = 0, retval; int link_fd; - link_fd = atomics__xor__attach(skel); + link_fd = atomics_lskel__xor__attach(skel); if (!ASSERT_GT(link_fd, 0, "attach(xor)")) return; @@ -154,13 +154,13 @@ cleanup: close(link_fd); } -static void test_cmpxchg(struct atomics *skel) +static void test_cmpxchg(struct atomics_lskel *skel) { int err, prog_fd; __u32 duration = 0, retval; int link_fd; - link_fd = atomics__cmpxchg__attach(skel); + link_fd = atomics_lskel__cmpxchg__attach(skel); if (!ASSERT_GT(link_fd, 0, "attach(cmpxchg)")) return; @@ -183,13 +183,13 @@ cleanup: close(link_fd); } -static void test_xchg(struct atomics *skel) +static void test_xchg(struct atomics_lskel *skel) { int err, prog_fd; __u32 duration = 0, retval; int link_fd; - link_fd = atomics__xchg__attach(skel); + link_fd = atomics_lskel__xchg__attach(skel); if (!ASSERT_GT(link_fd, 0, "attach(xchg)")) return; @@ -212,10 +212,10 @@ cleanup: void test_atomics(void) { - struct atomics *skel; + struct atomics_lskel *skel; __u32 duration = 0; - skel = atomics__open_and_load(); + skel = atomics_lskel__open_and_load(); if (CHECK(!skel, "skel_load", "atomics skeleton failed\n")) return; @@ -225,6 +225,7 @@ void test_atomics(void) test__skip(); goto cleanup; } + skel->bss->pid = getpid(); if (test__start_subtest("add")) test_add(skel); @@ -242,5 +243,5 @@ void test_atomics(void) test_xchg(skel); cleanup: - atomics__destroy(skel); + atomics_lskel__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c index 6c511dcd1465..d0bd51eb23c8 100644 --- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c +++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c @@ -5,6 +5,11 @@ /* this is how USDT semaphore is actually defined, except volatile modifier */ volatile unsigned short uprobe_ref_ctr __attribute__((unused)) __attribute((section(".probes"))); +/* attach point */ +static void method(void) { + return ; +} + void test_attach_probe(void) { DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts); @@ -33,7 +38,7 @@ void test_attach_probe(void) if (CHECK(base_addr < 0, "get_base_addr", "failed to find base addr: %zd", base_addr)) return; - uprobe_offset = get_uprobe_offset(&get_base_addr, base_addr); + uprobe_offset = get_uprobe_offset(&method, base_addr); ref_ctr_offset = get_rel_offset((uintptr_t)&uprobe_ref_ctr); if (!ASSERT_GE(ref_ctr_offset, 0, "ref_ctr_offset")) @@ -98,7 +103,7 @@ void test_attach_probe(void) goto cleanup; /* trigger & validate uprobe & uretprobe */ - get_base_addr(); + method(); if (CHECK(skel->bss->uprobe_res != 3, "check_uprobe_res", "wrong uprobe res: %d\n", skel->bss->uprobe_res)) diff --git a/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c b/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c new file mode 100644 index 000000000000..be73e3de6668 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c @@ -0,0 +1,211 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#include <sys/syscall.h> +#include <test_progs.h> +#include "bloom_filter_map.skel.h" + +static void test_fail_cases(void) +{ + __u32 value; + int fd, err; + + /* Invalid key size */ + fd = bpf_create_map(BPF_MAP_TYPE_BLOOM_FILTER, 4, sizeof(value), 100, 0); + if (!ASSERT_LT(fd, 0, "bpf_create_map bloom filter invalid key size")) + close(fd); + + /* Invalid value size */ + fd = bpf_create_map(BPF_MAP_TYPE_BLOOM_FILTER, 0, 0, 100, 0); + if (!ASSERT_LT(fd, 0, "bpf_create_map bloom filter invalid value size 0")) + close(fd); + + /* Invalid max entries size */ + fd = bpf_create_map(BPF_MAP_TYPE_BLOOM_FILTER, 0, sizeof(value), 0, 0); + if (!ASSERT_LT(fd, 0, "bpf_create_map bloom filter invalid max entries size")) + close(fd); + + /* Bloom filter maps do not support BPF_F_NO_PREALLOC */ + fd = bpf_create_map(BPF_MAP_TYPE_BLOOM_FILTER, 0, sizeof(value), 100, + BPF_F_NO_PREALLOC); + if (!ASSERT_LT(fd, 0, "bpf_create_map bloom filter invalid flags")) + close(fd); + + fd = bpf_create_map(BPF_MAP_TYPE_BLOOM_FILTER, 0, sizeof(value), 100, 0); + if (!ASSERT_GE(fd, 0, "bpf_create_map bloom filter")) + return; + + /* Test invalid flags */ + err = bpf_map_update_elem(fd, NULL, &value, -1); + ASSERT_EQ(err, -EINVAL, "bpf_map_update_elem bloom filter invalid flags"); + + err = bpf_map_update_elem(fd, NULL, &value, BPF_EXIST); + ASSERT_EQ(err, -EINVAL, "bpf_map_update_elem bloom filter invalid flags"); + + err = bpf_map_update_elem(fd, NULL, &value, BPF_F_LOCK); + ASSERT_EQ(err, -EINVAL, "bpf_map_update_elem bloom filter invalid flags"); + + err = bpf_map_update_elem(fd, NULL, &value, BPF_NOEXIST); + ASSERT_EQ(err, -EINVAL, "bpf_map_update_elem bloom filter invalid flags"); + + err = bpf_map_update_elem(fd, NULL, &value, 10000); + ASSERT_EQ(err, -EINVAL, "bpf_map_update_elem bloom filter invalid flags"); + + close(fd); +} + +static void test_success_cases(void) +{ + char value[11]; + int fd, err; + + /* Create a map */ + fd = bpf_create_map(BPF_MAP_TYPE_BLOOM_FILTER, 0, sizeof(value), 100, + BPF_F_ZERO_SEED | BPF_F_NUMA_NODE); + if (!ASSERT_GE(fd, 0, "bpf_create_map bloom filter success case")) + return; + + /* Add a value to the bloom filter */ + err = bpf_map_update_elem(fd, NULL, &value, 0); + if (!ASSERT_OK(err, "bpf_map_update_elem bloom filter success case")) + goto done; + + /* Lookup a value in the bloom filter */ + err = bpf_map_lookup_elem(fd, NULL, &value); + ASSERT_OK(err, "bpf_map_update_elem bloom filter success case"); + +done: + close(fd); +} + +static void check_bloom(struct bloom_filter_map *skel) +{ + struct bpf_link *link; + + link = bpf_program__attach(skel->progs.check_bloom); + if (!ASSERT_OK_PTR(link, "link")) + return; + + syscall(SYS_getpgid); + + ASSERT_EQ(skel->bss->error, 0, "error"); + + bpf_link__destroy(link); +} + +static void test_inner_map(struct bloom_filter_map *skel, const __u32 *rand_vals, + __u32 nr_rand_vals) +{ + int outer_map_fd, inner_map_fd, err, i, key = 0; + struct bpf_link *link; + + /* Create a bloom filter map that will be used as the inner map */ + inner_map_fd = bpf_create_map(BPF_MAP_TYPE_BLOOM_FILTER, 0, sizeof(*rand_vals), + nr_rand_vals, 0); + if (!ASSERT_GE(inner_map_fd, 0, "bpf_create_map bloom filter inner map")) + return; + + for (i = 0; i < nr_rand_vals; i++) { + err = bpf_map_update_elem(inner_map_fd, NULL, rand_vals + i, BPF_ANY); + if (!ASSERT_OK(err, "Add random value to inner_map_fd")) + goto done; + } + + /* Add the bloom filter map to the outer map */ + outer_map_fd = bpf_map__fd(skel->maps.outer_map); + err = bpf_map_update_elem(outer_map_fd, &key, &inner_map_fd, BPF_ANY); + if (!ASSERT_OK(err, "Add bloom filter map to outer map")) + goto done; + + /* Attach the bloom_filter_inner_map prog */ + link = bpf_program__attach(skel->progs.inner_map); + if (!ASSERT_OK_PTR(link, "link")) + goto delete_inner_map; + + syscall(SYS_getpgid); + + ASSERT_EQ(skel->bss->error, 0, "error"); + + bpf_link__destroy(link); + +delete_inner_map: + /* Ensure the inner bloom filter map can be deleted */ + err = bpf_map_delete_elem(outer_map_fd, &key); + ASSERT_OK(err, "Delete inner bloom filter map"); + +done: + close(inner_map_fd); +} + +static int setup_progs(struct bloom_filter_map **out_skel, __u32 **out_rand_vals, + __u32 *out_nr_rand_vals) +{ + struct bloom_filter_map *skel; + int random_data_fd, bloom_fd; + __u32 *rand_vals = NULL; + __u32 map_size, val; + int err, i; + + /* Set up a bloom filter map skeleton */ + skel = bloom_filter_map__open_and_load(); + if (!ASSERT_OK_PTR(skel, "bloom_filter_map__open_and_load")) + return -EINVAL; + + /* Set up rand_vals */ + map_size = bpf_map__max_entries(skel->maps.map_random_data); + rand_vals = malloc(sizeof(*rand_vals) * map_size); + if (!rand_vals) { + err = -ENOMEM; + goto error; + } + + /* Generate random values and populate both skeletons */ + random_data_fd = bpf_map__fd(skel->maps.map_random_data); + bloom_fd = bpf_map__fd(skel->maps.map_bloom); + for (i = 0; i < map_size; i++) { + val = rand(); + + err = bpf_map_update_elem(random_data_fd, &i, &val, BPF_ANY); + if (!ASSERT_OK(err, "Add random value to map_random_data")) + goto error; + + err = bpf_map_update_elem(bloom_fd, NULL, &val, BPF_ANY); + if (!ASSERT_OK(err, "Add random value to map_bloom")) + goto error; + + rand_vals[i] = val; + } + + *out_skel = skel; + *out_rand_vals = rand_vals; + *out_nr_rand_vals = map_size; + + return 0; + +error: + bloom_filter_map__destroy(skel); + if (rand_vals) + free(rand_vals); + return err; +} + +void test_bloom_filter_map(void) +{ + __u32 *rand_vals, nr_rand_vals; + struct bloom_filter_map *skel; + int err; + + test_fail_cases(); + test_success_cases(); + + err = setup_progs(&skel, &rand_vals, &nr_rand_vals); + if (err) + return; + + test_inner_map(skel, rand_vals, nr_rand_vals); + free(rand_vals); + + check_bloom(skel); + + bloom_filter_map__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt.c index 85babb0487b3..b52ff8ce34db 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt.c @@ -179,7 +179,7 @@ done: free_fds(est_fds, nr_est); } -void test_bpf_iter_setsockopt(void) +void serial_test_bpf_iter_setsockopt(void) { struct bpf_iter_setsockopt *iter_skel = NULL; struct bpf_cubic *cubic_skel = NULL; diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c index 284d5921c345..eb8eeebe6935 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c @@ -3,7 +3,7 @@ #define nr_iters 2 -void test_bpf_obj_id(void) +void serial_test_bpf_obj_id(void) { const __u64 array_magic_value = 0xfaceb00c; const __u32 array_key = 0; diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c index 3d002c245d2b..27f5d8ea7964 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c @@ -39,82 +39,171 @@ struct scale_test_def { bool fails; }; -void test_bpf_verif_scale(void) -{ - struct scale_test_def tests[] = { - { "loop3.o", BPF_PROG_TYPE_RAW_TRACEPOINT, true /* fails */ }, - - { "test_verif_scale1.o", BPF_PROG_TYPE_SCHED_CLS }, - { "test_verif_scale2.o", BPF_PROG_TYPE_SCHED_CLS }, - { "test_verif_scale3.o", BPF_PROG_TYPE_SCHED_CLS }, - - { "pyperf_global.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, - { "pyperf_subprogs.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, - - /* full unroll by llvm */ - { "pyperf50.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, - { "pyperf100.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, - { "pyperf180.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, - - /* partial unroll. llvm will unroll loop ~150 times. - * C loop count -> 600. - * Asm loop count -> 4. - * 16k insns in loop body. - * Total of 5 such loops. Total program size ~82k insns. - */ - { "pyperf600.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, - - /* no unroll at all. - * C loop count -> 600. - * ASM loop count -> 600. - * ~110 insns in loop body. - * Total of 5 such loops. Total program size ~1500 insns. - */ - { "pyperf600_nounroll.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, - - { "loop1.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, - { "loop2.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, - { "loop4.o", BPF_PROG_TYPE_SCHED_CLS }, - { "loop5.o", BPF_PROG_TYPE_SCHED_CLS }, - { "loop6.o", BPF_PROG_TYPE_KPROBE }, - - /* partial unroll. 19k insn in a loop. - * Total program size 20.8k insn. - * ~350k processed_insns - */ - { "strobemeta.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, - - /* no unroll, tiny loops */ - { "strobemeta_nounroll1.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, - { "strobemeta_nounroll2.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, - - /* non-inlined subprogs */ - { "strobemeta_subprogs.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, - - { "test_sysctl_loop1.o", BPF_PROG_TYPE_CGROUP_SYSCTL }, - { "test_sysctl_loop2.o", BPF_PROG_TYPE_CGROUP_SYSCTL }, - - { "test_xdp_loop.o", BPF_PROG_TYPE_XDP }, - { "test_seg6_loop.o", BPF_PROG_TYPE_LWT_SEG6LOCAL }, - }; +static void scale_test(const char *file, + enum bpf_prog_type attach_type, + bool should_fail) +{ libbpf_print_fn_t old_print_fn = NULL; - int err, i; + int err; if (env.verifier_stats) { test__force_log(); old_print_fn = libbpf_set_print(libbpf_debug_print); } - for (i = 0; i < ARRAY_SIZE(tests); i++) { - const struct scale_test_def *test = &tests[i]; - - if (!test__start_subtest(test->file)) - continue; - - err = check_load(test->file, test->attach_type); - CHECK_FAIL(err && !test->fails); - } + err = check_load(file, attach_type); + if (should_fail) + ASSERT_ERR(err, "expect_error"); + else + ASSERT_OK(err, "expect_success"); if (env.verifier_stats) libbpf_set_print(old_print_fn); } + +void test_verif_scale1() +{ + scale_test("test_verif_scale1.o", BPF_PROG_TYPE_SCHED_CLS, false); +} + +void test_verif_scale2() +{ + scale_test("test_verif_scale2.o", BPF_PROG_TYPE_SCHED_CLS, false); +} + +void test_verif_scale3() +{ + scale_test("test_verif_scale3.o", BPF_PROG_TYPE_SCHED_CLS, false); +} + +void test_verif_scale_pyperf_global() +{ + scale_test("pyperf_global.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); +} + +void test_verif_scale_pyperf_subprogs() +{ + scale_test("pyperf_subprogs.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); +} + +void test_verif_scale_pyperf50() +{ + /* full unroll by llvm */ + scale_test("pyperf50.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); +} + +void test_verif_scale_pyperf100() +{ + /* full unroll by llvm */ + scale_test("pyperf100.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); +} + +void test_verif_scale_pyperf180() +{ + /* full unroll by llvm */ + scale_test("pyperf180.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); +} + +void test_verif_scale_pyperf600() +{ + /* partial unroll. llvm will unroll loop ~150 times. + * C loop count -> 600. + * Asm loop count -> 4. + * 16k insns in loop body. + * Total of 5 such loops. Total program size ~82k insns. + */ + scale_test("pyperf600.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); +} + +void test_verif_scale_pyperf600_nounroll() +{ + /* no unroll at all. + * C loop count -> 600. + * ASM loop count -> 600. + * ~110 insns in loop body. + * Total of 5 such loops. Total program size ~1500 insns. + */ + scale_test("pyperf600_nounroll.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); +} + +void test_verif_scale_loop1() +{ + scale_test("loop1.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); +} + +void test_verif_scale_loop2() +{ + scale_test("loop2.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); +} + +void test_verif_scale_loop3_fail() +{ + scale_test("loop3.o", BPF_PROG_TYPE_RAW_TRACEPOINT, true /* fails */); +} + +void test_verif_scale_loop4() +{ + scale_test("loop4.o", BPF_PROG_TYPE_SCHED_CLS, false); +} + +void test_verif_scale_loop5() +{ + scale_test("loop5.o", BPF_PROG_TYPE_SCHED_CLS, false); +} + +void test_verif_scale_loop6() +{ + scale_test("loop6.o", BPF_PROG_TYPE_KPROBE, false); +} + +void test_verif_scale_strobemeta() +{ + /* partial unroll. 19k insn in a loop. + * Total program size 20.8k insn. + * ~350k processed_insns + */ + scale_test("strobemeta.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); +} + +void test_verif_scale_strobemeta_nounroll1() +{ + /* no unroll, tiny loops */ + scale_test("strobemeta_nounroll1.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); +} + +void test_verif_scale_strobemeta_nounroll2() +{ + /* no unroll, tiny loops */ + scale_test("strobemeta_nounroll2.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); +} + +void test_verif_scale_strobemeta_subprogs() +{ + /* non-inlined subprogs */ + scale_test("strobemeta_subprogs.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); +} + +void test_verif_scale_sysctl_loop1() +{ + scale_test("test_sysctl_loop1.o", BPF_PROG_TYPE_CGROUP_SYSCTL, false); +} + +void test_verif_scale_sysctl_loop2() +{ + scale_test("test_sysctl_loop2.o", BPF_PROG_TYPE_CGROUP_SYSCTL, false); +} + +void test_verif_scale_xdp_loop() +{ + scale_test("test_xdp_loop.o", BPF_PROG_TYPE_XDP, false); +} + +void test_verif_scale_seg6_loop() +{ + scale_test("test_seg6_loop.o", BPF_PROG_TYPE_LWT_SEG6LOCAL, false); +} + +void test_verif_twfw() +{ + scale_test("twfw.o", BPF_PROG_TYPE_CGROUP_SKB, false); +} diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c index 9c85d7d27409..ac596cb06e40 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf.c +++ b/tools/testing/selftests/bpf/prog_tests/btf.c @@ -3662,15 +3662,15 @@ static struct btf_raw_test raw_tests[] = { }, { - .descr = "tag test #1, struct/member, well-formed", + .descr = "decl_tag test #1, struct/member, well-formed", .raw_types = { BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ BTF_STRUCT_ENC(0, 2, 8), /* [2] */ BTF_MEMBER_ENC(NAME_TBD, 1, 0), BTF_MEMBER_ENC(NAME_TBD, 1, 32), - BTF_TAG_ENC(NAME_TBD, 2, -1), - BTF_TAG_ENC(NAME_TBD, 2, 0), - BTF_TAG_ENC(NAME_TBD, 2, 1), + BTF_DECL_TAG_ENC(NAME_TBD, 2, -1), + BTF_DECL_TAG_ENC(NAME_TBD, 2, 0), + BTF_DECL_TAG_ENC(NAME_TBD, 2, 1), BTF_END_RAW, }, BTF_STR_SEC("\0m1\0m2\0tag1\0tag2\0tag3"), @@ -3683,15 +3683,15 @@ static struct btf_raw_test raw_tests[] = { .max_entries = 1, }, { - .descr = "tag test #2, union/member, well-formed", + .descr = "decl_tag test #2, union/member, well-formed", .raw_types = { BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ BTF_UNION_ENC(NAME_TBD, 2, 4), /* [2] */ BTF_MEMBER_ENC(NAME_TBD, 1, 0), BTF_MEMBER_ENC(NAME_TBD, 1, 0), - BTF_TAG_ENC(NAME_TBD, 2, -1), - BTF_TAG_ENC(NAME_TBD, 2, 0), - BTF_TAG_ENC(NAME_TBD, 2, 1), + BTF_DECL_TAG_ENC(NAME_TBD, 2, -1), + BTF_DECL_TAG_ENC(NAME_TBD, 2, 0), + BTF_DECL_TAG_ENC(NAME_TBD, 2, 1), BTF_END_RAW, }, BTF_STR_SEC("\0t\0m1\0m2\0tag1\0tag2\0tag3"), @@ -3704,13 +3704,13 @@ static struct btf_raw_test raw_tests[] = { .max_entries = 1, }, { - .descr = "tag test #3, variable, well-formed", + .descr = "decl_tag test #3, variable, well-formed", .raw_types = { BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ BTF_VAR_ENC(NAME_TBD, 1, 0), /* [2] */ BTF_VAR_ENC(NAME_TBD, 1, 1), /* [3] */ - BTF_TAG_ENC(NAME_TBD, 2, -1), - BTF_TAG_ENC(NAME_TBD, 3, -1), + BTF_DECL_TAG_ENC(NAME_TBD, 2, -1), + BTF_DECL_TAG_ENC(NAME_TBD, 3, -1), BTF_END_RAW, }, BTF_STR_SEC("\0local\0global\0tag1\0tag2"), @@ -3723,16 +3723,16 @@ static struct btf_raw_test raw_tests[] = { .max_entries = 1, }, { - .descr = "tag test #4, func/parameter, well-formed", + .descr = "decl_tag test #4, func/parameter, well-formed", .raw_types = { BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ BTF_FUNC_PROTO_ENC(0, 2), /* [2] */ BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), BTF_FUNC_ENC(NAME_TBD, 2), /* [3] */ - BTF_TAG_ENC(NAME_TBD, 3, -1), - BTF_TAG_ENC(NAME_TBD, 3, 0), - BTF_TAG_ENC(NAME_TBD, 3, 1), + BTF_DECL_TAG_ENC(NAME_TBD, 3, -1), + BTF_DECL_TAG_ENC(NAME_TBD, 3, 0), + BTF_DECL_TAG_ENC(NAME_TBD, 3, 1), BTF_END_RAW, }, BTF_STR_SEC("\0arg1\0arg2\0f\0tag1\0tag2\0tag3"), @@ -3745,11 +3745,11 @@ static struct btf_raw_test raw_tests[] = { .max_entries = 1, }, { - .descr = "tag test #5, invalid value", + .descr = "decl_tag test #5, invalid value", .raw_types = { BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ BTF_VAR_ENC(NAME_TBD, 1, 0), /* [2] */ - BTF_TAG_ENC(0, 2, -1), + BTF_DECL_TAG_ENC(0, 2, -1), BTF_END_RAW, }, BTF_STR_SEC("\0local\0tag"), @@ -3764,10 +3764,10 @@ static struct btf_raw_test raw_tests[] = { .err_str = "Invalid value", }, { - .descr = "tag test #6, invalid target type", + .descr = "decl_tag test #6, invalid target type", .raw_types = { BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TAG_ENC(NAME_TBD, 1, -1), + BTF_DECL_TAG_ENC(NAME_TBD, 1, -1), BTF_END_RAW, }, BTF_STR_SEC("\0tag1"), @@ -3782,11 +3782,11 @@ static struct btf_raw_test raw_tests[] = { .err_str = "Invalid type", }, { - .descr = "tag test #7, invalid vlen", + .descr = "decl_tag test #7, invalid vlen", .raw_types = { BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ BTF_VAR_ENC(NAME_TBD, 1, 0), /* [2] */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_TAG, 0, 1), 2), (0), + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DECL_TAG, 0, 1), 2), (0), BTF_END_RAW, }, BTF_STR_SEC("\0local\0tag1"), @@ -3801,11 +3801,11 @@ static struct btf_raw_test raw_tests[] = { .err_str = "vlen != 0", }, { - .descr = "tag test #8, invalid kflag", + .descr = "decl_tag test #8, invalid kflag", .raw_types = { BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ BTF_VAR_ENC(NAME_TBD, 1, 0), /* [2] */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_TAG, 1, 0), 2), (-1), + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DECL_TAG, 1, 0), 2), (-1), BTF_END_RAW, }, BTF_STR_SEC("\0local\0tag1"), @@ -3820,11 +3820,11 @@ static struct btf_raw_test raw_tests[] = { .err_str = "Invalid btf_info kind_flag", }, { - .descr = "tag test #9, var, invalid component_idx", + .descr = "decl_tag test #9, var, invalid component_idx", .raw_types = { BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ BTF_VAR_ENC(NAME_TBD, 1, 0), /* [2] */ - BTF_TAG_ENC(NAME_TBD, 2, 0), + BTF_DECL_TAG_ENC(NAME_TBD, 2, 0), BTF_END_RAW, }, BTF_STR_SEC("\0local\0tag"), @@ -3839,13 +3839,13 @@ static struct btf_raw_test raw_tests[] = { .err_str = "Invalid component_idx", }, { - .descr = "tag test #10, struct member, invalid component_idx", + .descr = "decl_tag test #10, struct member, invalid component_idx", .raw_types = { BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ BTF_STRUCT_ENC(0, 2, 8), /* [2] */ BTF_MEMBER_ENC(NAME_TBD, 1, 0), BTF_MEMBER_ENC(NAME_TBD, 1, 32), - BTF_TAG_ENC(NAME_TBD, 2, 2), + BTF_DECL_TAG_ENC(NAME_TBD, 2, 2), BTF_END_RAW, }, BTF_STR_SEC("\0m1\0m2\0tag"), @@ -3860,14 +3860,14 @@ static struct btf_raw_test raw_tests[] = { .err_str = "Invalid component_idx", }, { - .descr = "tag test #11, func parameter, invalid component_idx", + .descr = "decl_tag test #11, func parameter, invalid component_idx", .raw_types = { BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ BTF_FUNC_PROTO_ENC(0, 2), /* [2] */ BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), BTF_FUNC_ENC(NAME_TBD, 2), /* [3] */ - BTF_TAG_ENC(NAME_TBD, 3, 2), + BTF_DECL_TAG_ENC(NAME_TBD, 3, 2), BTF_END_RAW, }, BTF_STR_SEC("\0arg1\0arg2\0f\0tag"), @@ -3882,14 +3882,14 @@ static struct btf_raw_test raw_tests[] = { .err_str = "Invalid component_idx", }, { - .descr = "tag test #12, < -1 component_idx", + .descr = "decl_tag test #12, < -1 component_idx", .raw_types = { BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ BTF_FUNC_PROTO_ENC(0, 2), /* [2] */ BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), BTF_FUNC_ENC(NAME_TBD, 2), /* [3] */ - BTF_TAG_ENC(NAME_TBD, 3, -2), + BTF_DECL_TAG_ENC(NAME_TBD, 3, -2), BTF_END_RAW, }, BTF_STR_SEC("\0arg1\0arg2\0f\0tag"), @@ -3903,6 +3903,42 @@ static struct btf_raw_test raw_tests[] = { .btf_load_err = true, .err_str = "Invalid component_idx", }, +{ + .descr = "decl_tag test #13, typedef, well-formed", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPEDEF_ENC(NAME_TBD, 1), /* [2] */ + BTF_DECL_TAG_ENC(NAME_TBD, 2, -1), + BTF_END_RAW, + }, + BTF_STR_SEC("\0t\0tag"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "tag_type_check_btf", + .key_size = sizeof(int), + .value_size = 4, + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 1, +}, +{ + .descr = "decl_tag test #14, typedef, invalid component_idx", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPEDEF_ENC(NAME_TBD, 1), /* [2] */ + BTF_DECL_TAG_ENC(NAME_TBD, 2, 0), + BTF_END_RAW, + }, + BTF_STR_SEC("\0local\0tag"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "tag_type_check_btf", + .key_size = sizeof(int), + .value_size = 4, + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 1, + .btf_load_err = true, + .err_str = "Invalid component_idx", +}, }; /* struct btf_raw_test raw_tests[] */ @@ -4511,7 +4547,7 @@ static void do_test_file(unsigned int test_num) if (CHECK(err, "obj: %d", err)) return; - prog = bpf_program__next(NULL, obj); + prog = bpf_object__next_program(obj, NULL); if (CHECK(!prog, "Cannot find bpf_prog")) { err = -1; goto done; @@ -6672,9 +6708,9 @@ const struct btf_dedup_test dedup_tests[] = { /* const -> [1] int */ BTF_CONST_ENC(1), /* [6] */ /* tag -> [3] struct s */ - BTF_TAG_ENC(NAME_NTH(2), 3, -1), /* [7] */ + BTF_DECL_TAG_ENC(NAME_NTH(2), 3, -1), /* [7] */ /* tag -> [3] struct s, member 1 */ - BTF_TAG_ENC(NAME_NTH(2), 3, 1), /* [8] */ + BTF_DECL_TAG_ENC(NAME_NTH(2), 3, 1), /* [8] */ /* full copy of the above */ BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4), /* [9] */ @@ -6689,8 +6725,8 @@ const struct btf_dedup_test dedup_tests[] = { BTF_PTR_ENC(14), /* [13] */ BTF_CONST_ENC(9), /* [14] */ BTF_TYPE_FLOAT_ENC(NAME_NTH(7), 4), /* [15] */ - BTF_TAG_ENC(NAME_NTH(2), 11, -1), /* [16] */ - BTF_TAG_ENC(NAME_NTH(2), 11, 1), /* [17] */ + BTF_DECL_TAG_ENC(NAME_NTH(2), 11, -1), /* [16] */ + BTF_DECL_TAG_ENC(NAME_NTH(2), 11, 1), /* [17] */ BTF_END_RAW, }, BTF_STR_SEC("\0int\0s\0next\0a\0b\0c\0float\0d"), @@ -6714,8 +6750,8 @@ const struct btf_dedup_test dedup_tests[] = { BTF_PTR_ENC(6), /* [5] */ /* const -> [1] int */ BTF_CONST_ENC(1), /* [6] */ - BTF_TAG_ENC(NAME_NTH(2), 3, -1), /* [7] */ - BTF_TAG_ENC(NAME_NTH(2), 3, 1), /* [8] */ + BTF_DECL_TAG_ENC(NAME_NTH(2), 3, -1), /* [7] */ + BTF_DECL_TAG_ENC(NAME_NTH(2), 3, 1), /* [8] */ BTF_TYPE_FLOAT_ENC(NAME_NTH(7), 4), /* [9] */ BTF_END_RAW, }, @@ -6841,11 +6877,12 @@ const struct btf_dedup_test dedup_tests[] = { BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 8), BTF_FUNC_ENC(NAME_TBD, 12), /* [13] func */ BTF_TYPE_FLOAT_ENC(NAME_TBD, 2), /* [14] float */ - BTF_TAG_ENC(NAME_TBD, 13, -1), /* [15] tag */ - BTF_TAG_ENC(NAME_TBD, 13, 1), /* [16] tag */ + BTF_DECL_TAG_ENC(NAME_TBD, 13, -1), /* [15] decl_tag */ + BTF_DECL_TAG_ENC(NAME_TBD, 13, 1), /* [16] decl_tag */ + BTF_DECL_TAG_ENC(NAME_TBD, 7, -1), /* [17] decl_tag */ BTF_END_RAW, }, - BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P"), + BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P\0Q"), }, .expect = { .raw_types = { @@ -6869,11 +6906,12 @@ const struct btf_dedup_test dedup_tests[] = { BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 8), BTF_FUNC_ENC(NAME_TBD, 12), /* [13] func */ BTF_TYPE_FLOAT_ENC(NAME_TBD, 2), /* [14] float */ - BTF_TAG_ENC(NAME_TBD, 13, -1), /* [15] tag */ - BTF_TAG_ENC(NAME_TBD, 13, 1), /* [16] tag */ + BTF_DECL_TAG_ENC(NAME_TBD, 13, -1), /* [15] decl_tag */ + BTF_DECL_TAG_ENC(NAME_TBD, 13, 1), /* [16] decl_tag */ + BTF_DECL_TAG_ENC(NAME_TBD, 7, -1), /* [17] decl_tag */ BTF_END_RAW, }, - BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P"), + BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P\0Q"), }, .opts = { .dont_resolve_fwds = false, @@ -7036,14 +7074,14 @@ const struct btf_dedup_test dedup_tests[] = { BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(3), 1), BTF_FUNC_ENC(NAME_NTH(4), 2), /* [4] */ /* tag -> t */ - BTF_TAG_ENC(NAME_NTH(5), 2, -1), /* [5] */ - BTF_TAG_ENC(NAME_NTH(5), 2, -1), /* [6] */ + BTF_DECL_TAG_ENC(NAME_NTH(5), 2, -1), /* [5] */ + BTF_DECL_TAG_ENC(NAME_NTH(5), 2, -1), /* [6] */ /* tag -> func */ - BTF_TAG_ENC(NAME_NTH(5), 4, -1), /* [7] */ - BTF_TAG_ENC(NAME_NTH(5), 4, -1), /* [8] */ + BTF_DECL_TAG_ENC(NAME_NTH(5), 4, -1), /* [7] */ + BTF_DECL_TAG_ENC(NAME_NTH(5), 4, -1), /* [8] */ /* tag -> func arg a1 */ - BTF_TAG_ENC(NAME_NTH(5), 4, 1), /* [9] */ - BTF_TAG_ENC(NAME_NTH(5), 4, 1), /* [10] */ + BTF_DECL_TAG_ENC(NAME_NTH(5), 4, 1), /* [9] */ + BTF_DECL_TAG_ENC(NAME_NTH(5), 4, 1), /* [10] */ BTF_END_RAW, }, BTF_STR_SEC("\0t\0a1\0a2\0f\0tag"), @@ -7056,9 +7094,9 @@ const struct btf_dedup_test dedup_tests[] = { BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(2), 1), BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(3), 1), BTF_FUNC_ENC(NAME_NTH(4), 2), /* [4] */ - BTF_TAG_ENC(NAME_NTH(5), 2, -1), /* [5] */ - BTF_TAG_ENC(NAME_NTH(5), 4, -1), /* [6] */ - BTF_TAG_ENC(NAME_NTH(5), 4, 1), /* [7] */ + BTF_DECL_TAG_ENC(NAME_NTH(5), 2, -1), /* [5] */ + BTF_DECL_TAG_ENC(NAME_NTH(5), 4, -1), /* [6] */ + BTF_DECL_TAG_ENC(NAME_NTH(5), 4, 1), /* [7] */ BTF_END_RAW, }, BTF_STR_SEC("\0t\0a1\0a2\0f\0tag"), @@ -7084,17 +7122,17 @@ const struct btf_dedup_test dedup_tests[] = { BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(2), 1), BTF_FUNC_ENC(NAME_NTH(3), 4), /* [5] */ /* tag -> f: tag1, tag2 */ - BTF_TAG_ENC(NAME_NTH(4), 3, -1), /* [6] */ - BTF_TAG_ENC(NAME_NTH(5), 3, -1), /* [7] */ + BTF_DECL_TAG_ENC(NAME_NTH(4), 3, -1), /* [6] */ + BTF_DECL_TAG_ENC(NAME_NTH(5), 3, -1), /* [7] */ /* tag -> f/a2: tag1, tag2 */ - BTF_TAG_ENC(NAME_NTH(4), 3, 1), /* [8] */ - BTF_TAG_ENC(NAME_NTH(5), 3, 1), /* [9] */ + BTF_DECL_TAG_ENC(NAME_NTH(4), 3, 1), /* [8] */ + BTF_DECL_TAG_ENC(NAME_NTH(5), 3, 1), /* [9] */ /* tag -> f: tag1, tag3 */ - BTF_TAG_ENC(NAME_NTH(4), 5, -1), /* [10] */ - BTF_TAG_ENC(NAME_NTH(6), 5, -1), /* [11] */ + BTF_DECL_TAG_ENC(NAME_NTH(4), 5, -1), /* [10] */ + BTF_DECL_TAG_ENC(NAME_NTH(6), 5, -1), /* [11] */ /* tag -> f/a2: tag1, tag3 */ - BTF_TAG_ENC(NAME_NTH(4), 5, 1), /* [12] */ - BTF_TAG_ENC(NAME_NTH(6), 5, 1), /* [13] */ + BTF_DECL_TAG_ENC(NAME_NTH(4), 5, 1), /* [12] */ + BTF_DECL_TAG_ENC(NAME_NTH(6), 5, 1), /* [13] */ BTF_END_RAW, }, BTF_STR_SEC("\0a1\0a2\0f\0tag1\0tag2\0tag3"), @@ -7106,12 +7144,12 @@ const struct btf_dedup_test dedup_tests[] = { BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(1), 1), BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(2), 1), BTF_FUNC_ENC(NAME_NTH(3), 2), /* [3] */ - BTF_TAG_ENC(NAME_NTH(4), 3, -1), /* [4] */ - BTF_TAG_ENC(NAME_NTH(5), 3, -1), /* [5] */ - BTF_TAG_ENC(NAME_NTH(6), 3, -1), /* [6] */ - BTF_TAG_ENC(NAME_NTH(4), 3, 1), /* [7] */ - BTF_TAG_ENC(NAME_NTH(5), 3, 1), /* [8] */ - BTF_TAG_ENC(NAME_NTH(6), 3, 1), /* [9] */ + BTF_DECL_TAG_ENC(NAME_NTH(4), 3, -1), /* [4] */ + BTF_DECL_TAG_ENC(NAME_NTH(5), 3, -1), /* [5] */ + BTF_DECL_TAG_ENC(NAME_NTH(6), 3, -1), /* [6] */ + BTF_DECL_TAG_ENC(NAME_NTH(4), 3, 1), /* [7] */ + BTF_DECL_TAG_ENC(NAME_NTH(5), 3, 1), /* [8] */ + BTF_DECL_TAG_ENC(NAME_NTH(6), 3, 1), /* [9] */ BTF_END_RAW, }, BTF_STR_SEC("\0a1\0a2\0f\0tag1\0tag2\0tag3"), @@ -7133,17 +7171,17 @@ const struct btf_dedup_test dedup_tests[] = { BTF_MEMBER_ENC(NAME_NTH(2), 1, 0), BTF_MEMBER_ENC(NAME_NTH(3), 1, 32), /* tag -> t: tag1, tag2 */ - BTF_TAG_ENC(NAME_NTH(4), 2, -1), /* [4] */ - BTF_TAG_ENC(NAME_NTH(5), 2, -1), /* [5] */ + BTF_DECL_TAG_ENC(NAME_NTH(4), 2, -1), /* [4] */ + BTF_DECL_TAG_ENC(NAME_NTH(5), 2, -1), /* [5] */ /* tag -> t/m2: tag1, tag2 */ - BTF_TAG_ENC(NAME_NTH(4), 2, 1), /* [6] */ - BTF_TAG_ENC(NAME_NTH(5), 2, 1), /* [7] */ + BTF_DECL_TAG_ENC(NAME_NTH(4), 2, 1), /* [6] */ + BTF_DECL_TAG_ENC(NAME_NTH(5), 2, 1), /* [7] */ /* tag -> t: tag1, tag3 */ - BTF_TAG_ENC(NAME_NTH(4), 3, -1), /* [8] */ - BTF_TAG_ENC(NAME_NTH(6), 3, -1), /* [9] */ + BTF_DECL_TAG_ENC(NAME_NTH(4), 3, -1), /* [8] */ + BTF_DECL_TAG_ENC(NAME_NTH(6), 3, -1), /* [9] */ /* tag -> t/m2: tag1, tag3 */ - BTF_TAG_ENC(NAME_NTH(4), 3, 1), /* [10] */ - BTF_TAG_ENC(NAME_NTH(6), 3, 1), /* [11] */ + BTF_DECL_TAG_ENC(NAME_NTH(4), 3, 1), /* [10] */ + BTF_DECL_TAG_ENC(NAME_NTH(6), 3, 1), /* [11] */ BTF_END_RAW, }, BTF_STR_SEC("\0t\0m1\0m2\0tag1\0tag2\0tag3"), @@ -7154,12 +7192,12 @@ const struct btf_dedup_test dedup_tests[] = { BTF_STRUCT_ENC(NAME_NTH(1), 2, 8), /* [2] */ BTF_MEMBER_ENC(NAME_NTH(2), 1, 0), BTF_MEMBER_ENC(NAME_NTH(3), 1, 32), - BTF_TAG_ENC(NAME_NTH(4), 2, -1), /* [3] */ - BTF_TAG_ENC(NAME_NTH(5), 2, -1), /* [4] */ - BTF_TAG_ENC(NAME_NTH(6), 2, -1), /* [5] */ - BTF_TAG_ENC(NAME_NTH(4), 2, 1), /* [6] */ - BTF_TAG_ENC(NAME_NTH(5), 2, 1), /* [7] */ - BTF_TAG_ENC(NAME_NTH(6), 2, 1), /* [8] */ + BTF_DECL_TAG_ENC(NAME_NTH(4), 2, -1), /* [3] */ + BTF_DECL_TAG_ENC(NAME_NTH(5), 2, -1), /* [4] */ + BTF_DECL_TAG_ENC(NAME_NTH(6), 2, -1), /* [5] */ + BTF_DECL_TAG_ENC(NAME_NTH(4), 2, 1), /* [6] */ + BTF_DECL_TAG_ENC(NAME_NTH(5), 2, 1), /* [7] */ + BTF_DECL_TAG_ENC(NAME_NTH(6), 2, 1), /* [8] */ BTF_END_RAW, }, BTF_STR_SEC("\0t\0m1\0m2\0tag1\0tag2\0tag3"), @@ -7168,6 +7206,39 @@ const struct btf_dedup_test dedup_tests[] = { .dont_resolve_fwds = false, }, }, +{ + .descr = "dedup: typedef tags", + .input = { + .raw_types = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPEDEF_ENC(NAME_NTH(1), 1), /* [2] */ + BTF_TYPEDEF_ENC(NAME_NTH(1), 1), /* [3] */ + /* tag -> t: tag1, tag2 */ + BTF_DECL_TAG_ENC(NAME_NTH(2), 2, -1), /* [4] */ + BTF_DECL_TAG_ENC(NAME_NTH(3), 2, -1), /* [5] */ + /* tag -> t: tag1, tag3 */ + BTF_DECL_TAG_ENC(NAME_NTH(2), 3, -1), /* [6] */ + BTF_DECL_TAG_ENC(NAME_NTH(4), 3, -1), /* [7] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0t\0tag1\0tag2\0tag3"), + }, + .expect = { + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPEDEF_ENC(NAME_NTH(1), 1), /* [2] */ + BTF_DECL_TAG_ENC(NAME_NTH(2), 2, -1), /* [3] */ + BTF_DECL_TAG_ENC(NAME_NTH(3), 2, -1), /* [4] */ + BTF_DECL_TAG_ENC(NAME_NTH(4), 2, -1), /* [5] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0t\0tag1\0tag2\0tag3"), + }, + .opts = { + .dont_resolve_fwds = false, + }, +}, }; @@ -7202,8 +7273,8 @@ static int btf_type_size(const struct btf_type *t) return base_size + sizeof(struct btf_var); case BTF_KIND_DATASEC: return base_size + vlen * sizeof(struct btf_var_secinfo); - case BTF_KIND_TAG: - return base_size + sizeof(struct btf_tag); + case BTF_KIND_DECL_TAG: + return base_size + sizeof(struct btf_decl_tag); default: fprintf(stderr, "Unsupported BTF_KIND:%u\n", kind); return -EINVAL; @@ -7274,8 +7345,8 @@ static void do_test_dedup(unsigned int test_num) goto done; } - test_btf_data = btf__get_raw_data(test_btf, &test_btf_size); - expect_btf_data = btf__get_raw_data(expect_btf, &expect_btf_size); + test_btf_data = btf__raw_data(test_btf, &test_btf_size); + expect_btf_data = btf__raw_data(expect_btf, &expect_btf_size); if (CHECK(test_btf_size != expect_btf_size, "test_btf_size:%u != expect_btf_size:%u", test_btf_size, expect_btf_size)) { @@ -7329,8 +7400,8 @@ static void do_test_dedup(unsigned int test_num) expect_str_cur += expect_len + 1; } - test_nr_types = btf__get_nr_types(test_btf); - expect_nr_types = btf__get_nr_types(expect_btf); + test_nr_types = btf__type_cnt(test_btf); + expect_nr_types = btf__type_cnt(expect_btf); if (CHECK(test_nr_types != expect_nr_types, "test_nr_types:%u != expect_nr_types:%u", test_nr_types, expect_nr_types)) { @@ -7338,7 +7409,7 @@ static void do_test_dedup(unsigned int test_num) goto done; } - for (i = 1; i <= test_nr_types; i++) { + for (i = 1; i < test_nr_types; i++) { const struct btf_type *test_type, *expect_type; int test_size, expect_size; diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c index 87f9df653e4e..aa76360d8f49 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c @@ -27,7 +27,7 @@ static struct btf_dump_test_case { static int btf_dump_all_types(const struct btf *btf, const struct btf_dump_opts *opts) { - size_t type_cnt = btf__get_nr_types(btf); + size_t type_cnt = btf__type_cnt(btf); struct btf_dump *d; int err = 0, id; @@ -36,7 +36,7 @@ static int btf_dump_all_types(const struct btf *btf, if (err) return err; - for (id = 1; id <= type_cnt; id++) { + for (id = 1; id < type_cnt; id++) { err = btf_dump__dump_type(d, id); if (err) goto done; @@ -133,7 +133,7 @@ static char *dump_buf; static size_t dump_buf_sz; static FILE *dump_buf_file; -void test_btf_dump_incremental(void) +static void test_btf_dump_incremental(void) { struct btf *btf = NULL; struct btf_dump *d = NULL; @@ -171,7 +171,7 @@ void test_btf_dump_incremental(void) err = btf__add_field(btf, "x", 2, 0, 0); ASSERT_OK(err, "field_ok"); - for (i = 1; i <= btf__get_nr_types(btf); i++) { + for (i = 1; i < btf__type_cnt(btf); i++) { err = btf_dump__dump_type(d, i); ASSERT_OK(err, "dump_type_ok"); } @@ -210,7 +210,7 @@ void test_btf_dump_incremental(void) err = btf__add_field(btf, "s", 3, 32, 0); ASSERT_OK(err, "field_ok"); - for (i = 1; i <= btf__get_nr_types(btf); i++) { + for (i = 1; i < btf__type_cnt(btf); i++) { err = btf_dump__dump_type(d, i); ASSERT_OK(err, "dump_type_ok"); } @@ -778,8 +778,10 @@ static void test_btf_dump_struct_data(struct btf *btf, struct btf_dump *d, static void test_btf_dump_var_data(struct btf *btf, struct btf_dump *d, char *str) { +#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__) TEST_BTF_DUMP_VAR(btf, d, NULL, str, "cpu_number", int, BTF_F_COMPACT, "int cpu_number = (int)100", 100); +#endif TEST_BTF_DUMP_VAR(btf, d, NULL, str, "cpu_profile_flip", int, BTF_F_COMPACT, "static int cpu_profile_flip = (int)2", 2); } diff --git a/tools/testing/selftests/bpf/prog_tests/btf_endian.c b/tools/testing/selftests/bpf/prog_tests/btf_endian.c index 8ab5d3e358dd..8afbf3d0b89a 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_endian.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_endian.c @@ -7,12 +7,12 @@ #include <bpf/btf.h> void test_btf_endian() { -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ enum btf_endianness endian = BTF_LITTLE_ENDIAN; -#elif __BYTE_ORDER == __BIG_ENDIAN +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ enum btf_endianness endian = BTF_BIG_ENDIAN; #else -#error "Unrecognized __BYTE_ORDER" +#error "Unrecognized __BYTE_ORDER__" #endif enum btf_endianness swap_endian = 1 - endian; struct btf *btf = NULL, *swap_btf = NULL; @@ -32,7 +32,7 @@ void test_btf_endian() { ASSERT_EQ(btf__endianness(btf), swap_endian, "endian"); /* Get raw BTF data in non-native endianness... */ - raw_data = btf__get_raw_data(btf, &raw_sz); + raw_data = btf__raw_data(btf, &raw_sz); if (!ASSERT_OK_PTR(raw_data, "raw_data_inverted")) goto err_out; @@ -42,9 +42,9 @@ void test_btf_endian() { goto err_out; ASSERT_EQ(btf__endianness(swap_btf), swap_endian, "endian"); - ASSERT_EQ(btf__get_nr_types(swap_btf), btf__get_nr_types(btf), "nr_types"); + ASSERT_EQ(btf__type_cnt(swap_btf), btf__type_cnt(btf), "nr_types"); - swap_raw_data = btf__get_raw_data(swap_btf, &swap_raw_sz); + swap_raw_data = btf__raw_data(swap_btf, &swap_raw_sz); if (!ASSERT_OK_PTR(swap_raw_data, "swap_raw_data")) goto err_out; @@ -58,7 +58,7 @@ void test_btf_endian() { /* swap it back to native endianness */ btf__set_endianness(swap_btf, endian); - swap_raw_data = btf__get_raw_data(swap_btf, &swap_raw_sz); + swap_raw_data = btf__raw_data(swap_btf, &swap_raw_sz); if (!ASSERT_OK_PTR(swap_raw_data, "swap_raw_data")) goto err_out; @@ -75,7 +75,7 @@ void test_btf_endian() { swap_btf = NULL; btf__set_endianness(btf, swap_endian); - raw_data = btf__get_raw_data(btf, &raw_sz); + raw_data = btf__raw_data(btf, &raw_sz); if (!ASSERT_OK_PTR(raw_data, "raw_data_inverted")) goto err_out; @@ -85,7 +85,7 @@ void test_btf_endian() { goto err_out; ASSERT_EQ(btf__endianness(swap_btf), swap_endian, "endian"); - ASSERT_EQ(btf__get_nr_types(swap_btf), btf__get_nr_types(btf), "nr_types"); + ASSERT_EQ(btf__type_cnt(swap_btf), btf__type_cnt(btf), "nr_types"); /* the type should appear as if it was stored in native endianness */ t = btf__type_by_id(swap_btf, var_id); diff --git a/tools/testing/selftests/bpf/prog_tests/btf_split.c b/tools/testing/selftests/bpf/prog_tests/btf_split.c index ca7c2a91610a..b1ffe61f2aa9 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_split.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_split.c @@ -72,7 +72,7 @@ void test_btf_split() { d = btf_dump__new(btf2, NULL, &opts, btf_dump_printf); if (!ASSERT_OK_PTR(d, "btf_dump__new")) goto cleanup; - for (i = 1; i <= btf__get_nr_types(btf2); i++) { + for (i = 1; i < btf__type_cnt(btf2); i++) { err = btf_dump__dump_type(d, i); ASSERT_OK(err, "dump_type_ok"); } diff --git a/tools/testing/selftests/bpf/prog_tests/btf_write.c b/tools/testing/selftests/bpf/prog_tests/btf_write.c index 76548eecce2c..b912eeb0b6b4 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_write.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_write.c @@ -4,19 +4,15 @@ #include <bpf/btf.h> #include "btf_helpers.h" -void test_btf_write() { +static void gen_btf(struct btf *btf) +{ const struct btf_var_secinfo *vi; const struct btf_type *t; const struct btf_member *m; const struct btf_enum *v; const struct btf_param *p; - struct btf *btf; int id, err, str_off; - btf = btf__new_empty(); - if (!ASSERT_OK_PTR(btf, "new_empty")) - return; - str_off = btf__find_str(btf, "int"); ASSERT_EQ(str_off, -ENOENT, "int_str_missing_off"); @@ -281,26 +277,159 @@ void test_btf_write() { "[17] DATASEC 'datasec1' size=12 vlen=1\n" "\ttype_id=1 offset=4 size=8", "raw_dump"); - /* TAG */ - id = btf__add_tag(btf, "tag1", 16, -1); + /* DECL_TAG */ + id = btf__add_decl_tag(btf, "tag1", 16, -1); ASSERT_EQ(id, 18, "tag_id"); t = btf__type_by_id(btf, 18); ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "tag1", "tag_value"); - ASSERT_EQ(btf_kind(t), BTF_KIND_TAG, "tag_kind"); + ASSERT_EQ(btf_kind(t), BTF_KIND_DECL_TAG, "tag_kind"); ASSERT_EQ(t->type, 16, "tag_type"); - ASSERT_EQ(btf_tag(t)->component_idx, -1, "tag_component_idx"); + ASSERT_EQ(btf_decl_tag(t)->component_idx, -1, "tag_component_idx"); ASSERT_STREQ(btf_type_raw_dump(btf, 18), - "[18] TAG 'tag1' type_id=16 component_idx=-1", "raw_dump"); + "[18] DECL_TAG 'tag1' type_id=16 component_idx=-1", "raw_dump"); - id = btf__add_tag(btf, "tag2", 14, 1); + id = btf__add_decl_tag(btf, "tag2", 14, 1); ASSERT_EQ(id, 19, "tag_id"); t = btf__type_by_id(btf, 19); ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "tag2", "tag_value"); - ASSERT_EQ(btf_kind(t), BTF_KIND_TAG, "tag_kind"); + ASSERT_EQ(btf_kind(t), BTF_KIND_DECL_TAG, "tag_kind"); ASSERT_EQ(t->type, 14, "tag_type"); - ASSERT_EQ(btf_tag(t)->component_idx, 1, "tag_component_idx"); + ASSERT_EQ(btf_decl_tag(t)->component_idx, 1, "tag_component_idx"); ASSERT_STREQ(btf_type_raw_dump(btf, 19), - "[19] TAG 'tag2' type_id=14 component_idx=1", "raw_dump"); + "[19] DECL_TAG 'tag2' type_id=14 component_idx=1", "raw_dump"); +} + +static void test_btf_add() +{ + struct btf *btf; + + btf = btf__new_empty(); + if (!ASSERT_OK_PTR(btf, "new_empty")) + return; + + gen_btf(btf); + + VALIDATE_RAW_BTF( + btf, + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[2] PTR '(anon)' type_id=1", + "[3] CONST '(anon)' type_id=5", + "[4] VOLATILE '(anon)' type_id=3", + "[5] RESTRICT '(anon)' type_id=4", + "[6] ARRAY '(anon)' type_id=2 index_type_id=1 nr_elems=10", + "[7] STRUCT 's1' size=8 vlen=2\n" + "\t'f1' type_id=1 bits_offset=0\n" + "\t'f2' type_id=1 bits_offset=32 bitfield_size=16", + "[8] UNION 'u1' size=8 vlen=1\n" + "\t'f1' type_id=1 bits_offset=0 bitfield_size=16", + "[9] ENUM 'e1' size=4 vlen=2\n" + "\t'v1' val=1\n" + "\t'v2' val=2", + "[10] FWD 'struct_fwd' fwd_kind=struct", + "[11] FWD 'union_fwd' fwd_kind=union", + "[12] ENUM 'enum_fwd' size=4 vlen=0", + "[13] TYPEDEF 'typedef1' type_id=1", + "[14] FUNC 'func1' type_id=15 linkage=global", + "[15] FUNC_PROTO '(anon)' ret_type_id=1 vlen=2\n" + "\t'p1' type_id=1\n" + "\t'p2' type_id=2", + "[16] VAR 'var1' type_id=1, linkage=global-alloc", + "[17] DATASEC 'datasec1' size=12 vlen=1\n" + "\ttype_id=1 offset=4 size=8", + "[18] DECL_TAG 'tag1' type_id=16 component_idx=-1", + "[19] DECL_TAG 'tag2' type_id=14 component_idx=1"); btf__free(btf); } + +static void test_btf_add_btf() +{ + struct btf *btf1 = NULL, *btf2 = NULL; + int id; + + btf1 = btf__new_empty(); + if (!ASSERT_OK_PTR(btf1, "btf1")) + return; + + btf2 = btf__new_empty(); + if (!ASSERT_OK_PTR(btf2, "btf2")) + goto cleanup; + + gen_btf(btf1); + gen_btf(btf2); + + id = btf__add_btf(btf1, btf2); + if (!ASSERT_EQ(id, 20, "id")) + goto cleanup; + + VALIDATE_RAW_BTF( + btf1, + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[2] PTR '(anon)' type_id=1", + "[3] CONST '(anon)' type_id=5", + "[4] VOLATILE '(anon)' type_id=3", + "[5] RESTRICT '(anon)' type_id=4", + "[6] ARRAY '(anon)' type_id=2 index_type_id=1 nr_elems=10", + "[7] STRUCT 's1' size=8 vlen=2\n" + "\t'f1' type_id=1 bits_offset=0\n" + "\t'f2' type_id=1 bits_offset=32 bitfield_size=16", + "[8] UNION 'u1' size=8 vlen=1\n" + "\t'f1' type_id=1 bits_offset=0 bitfield_size=16", + "[9] ENUM 'e1' size=4 vlen=2\n" + "\t'v1' val=1\n" + "\t'v2' val=2", + "[10] FWD 'struct_fwd' fwd_kind=struct", + "[11] FWD 'union_fwd' fwd_kind=union", + "[12] ENUM 'enum_fwd' size=4 vlen=0", + "[13] TYPEDEF 'typedef1' type_id=1", + "[14] FUNC 'func1' type_id=15 linkage=global", + "[15] FUNC_PROTO '(anon)' ret_type_id=1 vlen=2\n" + "\t'p1' type_id=1\n" + "\t'p2' type_id=2", + "[16] VAR 'var1' type_id=1, linkage=global-alloc", + "[17] DATASEC 'datasec1' size=12 vlen=1\n" + "\ttype_id=1 offset=4 size=8", + "[18] DECL_TAG 'tag1' type_id=16 component_idx=-1", + "[19] DECL_TAG 'tag2' type_id=14 component_idx=1", + + /* types appended from the second BTF */ + "[20] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[21] PTR '(anon)' type_id=20", + "[22] CONST '(anon)' type_id=24", + "[23] VOLATILE '(anon)' type_id=22", + "[24] RESTRICT '(anon)' type_id=23", + "[25] ARRAY '(anon)' type_id=21 index_type_id=20 nr_elems=10", + "[26] STRUCT 's1' size=8 vlen=2\n" + "\t'f1' type_id=20 bits_offset=0\n" + "\t'f2' type_id=20 bits_offset=32 bitfield_size=16", + "[27] UNION 'u1' size=8 vlen=1\n" + "\t'f1' type_id=20 bits_offset=0 bitfield_size=16", + "[28] ENUM 'e1' size=4 vlen=2\n" + "\t'v1' val=1\n" + "\t'v2' val=2", + "[29] FWD 'struct_fwd' fwd_kind=struct", + "[30] FWD 'union_fwd' fwd_kind=union", + "[31] ENUM 'enum_fwd' size=4 vlen=0", + "[32] TYPEDEF 'typedef1' type_id=20", + "[33] FUNC 'func1' type_id=34 linkage=global", + "[34] FUNC_PROTO '(anon)' ret_type_id=20 vlen=2\n" + "\t'p1' type_id=20\n" + "\t'p2' type_id=21", + "[35] VAR 'var1' type_id=20, linkage=global-alloc", + "[36] DATASEC 'datasec1' size=12 vlen=1\n" + "\ttype_id=20 offset=4 size=8", + "[37] DECL_TAG 'tag1' type_id=35 component_idx=-1", + "[38] DECL_TAG 'tag2' type_id=33 component_idx=1"); + +cleanup: + btf__free(btf1); + btf__free(btf2); +} + +void test_btf_write() +{ + if (test__start_subtest("btf_add")) + test_btf_add(); + if (test__start_subtest("btf_add_btf")) + test_btf_add_btf(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c b/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c index 876be0ecb654..621c57222191 100644 --- a/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c +++ b/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c @@ -363,7 +363,7 @@ close_bpf_object: cg_storage_multi_shared__destroy(obj); } -void test_cg_storage_multi(void) +void serial_test_cg_storage_multi(void) { int parent_cgroup_fd = -1, child_cgroup_fd = -1; diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c index 70e94e783070..5de485c7370f 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c @@ -21,7 +21,7 @@ static int prog_load(void) bpf_log_buf, BPF_LOG_BUF_SIZE); } -void test_cgroup_attach_autodetach(void) +void serial_test_cgroup_attach_autodetach(void) { __u32 duration = 0, prog_cnt = 4, attach_flags; int allow_prog[2] = {-1}; diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c index 20bb8831dda6..731bea84d8ed 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c @@ -74,7 +74,7 @@ static int prog_load_cnt(int verdict, int val) return ret; } -void test_cgroup_attach_multi(void) +void serial_test_cgroup_attach_multi(void) { __u32 prog_ids[4], prog_cnt = 0, attach_flags, saved_prog_id; int cg1 = 0, cg2 = 0, cg3 = 0, cg4 = 0, cg5 = 0, key = 0; diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c index 9e96f8d87fea..10d3c33821a7 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c @@ -23,7 +23,7 @@ static int prog_load(int verdict) bpf_log_buf, BPF_LOG_BUF_SIZE); } -void test_cgroup_attach_override(void) +void serial_test_cgroup_attach_override(void) { int drop_prog = -1, allow_prog = -1, foo = -1, bar = -1; __u32 duration = 0; diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_link.c b/tools/testing/selftests/bpf/prog_tests/cgroup_link.c index 9091524131d6..9e6e6aad347c 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_link.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_link.c @@ -24,7 +24,7 @@ int ping_and_check(int exp_calls, int exp_alt_calls) return 0; } -void test_cgroup_link(void) +void serial_test_cgroup_link(void) { struct { const char *path; diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c b/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c index ab3b9bc5e6d1..9026b42914d3 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c @@ -46,7 +46,7 @@ void test_cgroup_v1v2(void) { struct network_helper_opts opts = {}; int server_fd, client_fd, cgroup_fd; - static const int port = 60123; + static const int port = 60120; /* Step 1: Check base connectivity works without any BPF. */ server_fd = start_server(AF_INET, SOCK_STREAM, NULL, port, 0); diff --git a/tools/testing/selftests/bpf/prog_tests/check_mtu.c b/tools/testing/selftests/bpf/prog_tests/check_mtu.c index 012068f33a0a..f73e6e36b74d 100644 --- a/tools/testing/selftests/bpf/prog_tests/check_mtu.c +++ b/tools/testing/selftests/bpf/prog_tests/check_mtu.c @@ -195,7 +195,7 @@ cleanup: test_check_mtu__destroy(skel); } -void test_check_mtu(void) +void serial_test_check_mtu(void) { __u32 mtu_lo; diff --git a/tools/testing/selftests/bpf/prog_tests/core_autosize.c b/tools/testing/selftests/bpf/prog_tests/core_autosize.c index 3d4b2a358d47..1dfe14ff6aa4 100644 --- a/tools/testing/selftests/bpf/prog_tests/core_autosize.c +++ b/tools/testing/selftests/bpf/prog_tests/core_autosize.c @@ -112,7 +112,7 @@ void test_core_autosize(void) if (!ASSERT_OK_PTR(f, "btf_fdopen")) goto cleanup; - raw_data = btf__get_raw_data(btf, &raw_sz); + raw_data = btf__raw_data(btf, &raw_sz); if (!ASSERT_OK_PTR(raw_data, "raw_data")) goto cleanup; written = fwrite(raw_data, 1, raw_sz, f); @@ -163,7 +163,7 @@ void test_core_autosize(void) usleep(1); - bss_map = bpf_object__find_map_by_name(skel->obj, "test_cor.bss"); + bss_map = bpf_object__find_map_by_name(skel->obj, ".bss"); if (!ASSERT_OK_PTR(bss_map, "bss_map_find")) goto cleanup; diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c index 763302e63a29..55ec85ba7375 100644 --- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c +++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c @@ -381,7 +381,7 @@ static int setup_type_id_case_local(struct core_reloc_test_case *test) exp->local_anon_void_ptr = -1; exp->local_anon_arr = -1; - for (i = 1; i <= btf__get_nr_types(local_btf); i++) + for (i = 1; i < btf__type_cnt(local_btf); i++) { t = btf__type_by_id(local_btf, i); /* we are interested only in anonymous types */ @@ -867,7 +867,7 @@ void test_core_reloc(void) goto cleanup; } - data_map = bpf_object__find_map_by_name(obj, "test_cor.bss"); + data_map = bpf_object__find_map_by_name(obj, ".bss"); if (CHECK(!data_map, "find_data_map", "data map not found\n")) goto cleanup; diff --git a/tools/testing/selftests/bpf/prog_tests/dummy_st_ops.c b/tools/testing/selftests/bpf/prog_tests/dummy_st_ops.c new file mode 100644 index 000000000000..cbaa44ffb8c6 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/dummy_st_ops.c @@ -0,0 +1,115 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2021. Huawei Technologies Co., Ltd */ +#include <test_progs.h> +#include "dummy_st_ops.skel.h" + +/* Need to keep consistent with definition in include/linux/bpf.h */ +struct bpf_dummy_ops_state { + int val; +}; + +static void test_dummy_st_ops_attach(void) +{ + struct dummy_st_ops *skel; + struct bpf_link *link; + + skel = dummy_st_ops__open_and_load(); + if (!ASSERT_OK_PTR(skel, "dummy_st_ops_load")) + return; + + link = bpf_map__attach_struct_ops(skel->maps.dummy_1); + ASSERT_EQ(libbpf_get_error(link), -EOPNOTSUPP, "dummy_st_ops_attach"); + + dummy_st_ops__destroy(skel); +} + +static void test_dummy_init_ret_value(void) +{ + __u64 args[1] = {0}; + struct bpf_prog_test_run_attr attr = { + .ctx_size_in = sizeof(args), + .ctx_in = args, + }; + struct dummy_st_ops *skel; + int fd, err; + + skel = dummy_st_ops__open_and_load(); + if (!ASSERT_OK_PTR(skel, "dummy_st_ops_load")) + return; + + fd = bpf_program__fd(skel->progs.test_1); + attr.prog_fd = fd; + err = bpf_prog_test_run_xattr(&attr); + ASSERT_OK(err, "test_run"); + ASSERT_EQ(attr.retval, 0xf2f3f4f5, "test_ret"); + + dummy_st_ops__destroy(skel); +} + +static void test_dummy_init_ptr_arg(void) +{ + int exp_retval = 0xbeef; + struct bpf_dummy_ops_state in_state = { + .val = exp_retval, + }; + __u64 args[1] = {(unsigned long)&in_state}; + struct bpf_prog_test_run_attr attr = { + .ctx_size_in = sizeof(args), + .ctx_in = args, + }; + struct dummy_st_ops *skel; + int fd, err; + + skel = dummy_st_ops__open_and_load(); + if (!ASSERT_OK_PTR(skel, "dummy_st_ops_load")) + return; + + fd = bpf_program__fd(skel->progs.test_1); + attr.prog_fd = fd; + err = bpf_prog_test_run_xattr(&attr); + ASSERT_OK(err, "test_run"); + ASSERT_EQ(in_state.val, 0x5a, "test_ptr_ret"); + ASSERT_EQ(attr.retval, exp_retval, "test_ret"); + + dummy_st_ops__destroy(skel); +} + +static void test_dummy_multiple_args(void) +{ + __u64 args[5] = {0, -100, 0x8a5f, 'c', 0x1234567887654321ULL}; + struct bpf_prog_test_run_attr attr = { + .ctx_size_in = sizeof(args), + .ctx_in = args, + }; + struct dummy_st_ops *skel; + int fd, err; + size_t i; + char name[8]; + + skel = dummy_st_ops__open_and_load(); + if (!ASSERT_OK_PTR(skel, "dummy_st_ops_load")) + return; + + fd = bpf_program__fd(skel->progs.test_2); + attr.prog_fd = fd; + err = bpf_prog_test_run_xattr(&attr); + ASSERT_OK(err, "test_run"); + for (i = 0; i < ARRAY_SIZE(args); i++) { + snprintf(name, sizeof(name), "arg %zu", i); + ASSERT_EQ(skel->bss->test_2_args[i], args[i], name); + } + + dummy_st_ops__destroy(skel); +} + +void test_dummy_st_ops(void) +{ + if (test__start_subtest("dummy_st_ops_attach")) + test_dummy_st_ops_attach(); + if (test__start_subtest("dummy_init_ret_value")) + test_dummy_init_ret_value(); + if (test__start_subtest("dummy_init_ptr_arg")) + test_dummy_init_ptr_arg(); + if (test__start_subtest("dummy_multiple_args")) + test_dummy_multiple_args(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c index 91154c2ba256..4374ac8a8a91 100644 --- a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c +++ b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c @@ -6,23 +6,23 @@ void test_fentry_fexit(void) { - struct fentry_test *fentry_skel = NULL; - struct fexit_test *fexit_skel = NULL; + struct fentry_test_lskel *fentry_skel = NULL; + struct fexit_test_lskel *fexit_skel = NULL; __u64 *fentry_res, *fexit_res; __u32 duration = 0, retval; int err, prog_fd, i; - fentry_skel = fentry_test__open_and_load(); + fentry_skel = fentry_test_lskel__open_and_load(); if (CHECK(!fentry_skel, "fentry_skel_load", "fentry skeleton failed\n")) goto close_prog; - fexit_skel = fexit_test__open_and_load(); + fexit_skel = fexit_test_lskel__open_and_load(); if (CHECK(!fexit_skel, "fexit_skel_load", "fexit skeleton failed\n")) goto close_prog; - err = fentry_test__attach(fentry_skel); + err = fentry_test_lskel__attach(fentry_skel); if (CHECK(err, "fentry_attach", "fentry attach failed: %d\n", err)) goto close_prog; - err = fexit_test__attach(fexit_skel); + err = fexit_test_lskel__attach(fexit_skel); if (CHECK(err, "fexit_attach", "fexit attach failed: %d\n", err)) goto close_prog; @@ -44,6 +44,6 @@ void test_fentry_fexit(void) } close_prog: - fentry_test__destroy(fentry_skel); - fexit_test__destroy(fexit_skel); + fentry_test_lskel__destroy(fentry_skel); + fexit_test_lskel__destroy(fexit_skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_test.c b/tools/testing/selftests/bpf/prog_tests/fentry_test.c index 174c89e7456e..12921b3850d2 100644 --- a/tools/testing/selftests/bpf/prog_tests/fentry_test.c +++ b/tools/testing/selftests/bpf/prog_tests/fentry_test.c @@ -3,19 +3,19 @@ #include <test_progs.h> #include "fentry_test.lskel.h" -static int fentry_test(struct fentry_test *fentry_skel) +static int fentry_test(struct fentry_test_lskel *fentry_skel) { int err, prog_fd, i; __u32 duration = 0, retval; int link_fd; __u64 *result; - err = fentry_test__attach(fentry_skel); + err = fentry_test_lskel__attach(fentry_skel); if (!ASSERT_OK(err, "fentry_attach")) return err; /* Check that already linked program can't be attached again. */ - link_fd = fentry_test__test1__attach(fentry_skel); + link_fd = fentry_test_lskel__test1__attach(fentry_skel); if (!ASSERT_LT(link_fd, 0, "fentry_attach_link")) return -1; @@ -31,7 +31,7 @@ static int fentry_test(struct fentry_test *fentry_skel) return -1; } - fentry_test__detach(fentry_skel); + fentry_test_lskel__detach(fentry_skel); /* zero results for re-attach test */ memset(fentry_skel->bss, 0, sizeof(*fentry_skel->bss)); @@ -40,10 +40,10 @@ static int fentry_test(struct fentry_test *fentry_skel) void test_fentry_test(void) { - struct fentry_test *fentry_skel = NULL; + struct fentry_test_lskel *fentry_skel = NULL; int err; - fentry_skel = fentry_test__open_and_load(); + fentry_skel = fentry_test_lskel__open_and_load(); if (!ASSERT_OK_PTR(fentry_skel, "fentry_skel_load")) goto cleanup; @@ -55,5 +55,5 @@ void test_fentry_test(void) ASSERT_OK(err, "fentry_second_attach"); cleanup: - fentry_test__destroy(fentry_skel); + fentry_test_lskel__destroy(fentry_skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c index c7c1816899bf..9cff14a23bb7 100644 --- a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c +++ b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c @@ -285,7 +285,7 @@ static void test_fmod_ret_freplace(void) if (!ASSERT_OK_PTR(freplace_obj, "freplace_obj_open")) goto out; - prog = bpf_program__next(NULL, freplace_obj); + prog = bpf_object__next_program(freplace_obj, NULL); err = bpf_program__set_attach_target(prog, pkt_fd, NULL); ASSERT_OK(err, "freplace__set_attach_target"); @@ -302,7 +302,7 @@ static void test_fmod_ret_freplace(void) goto out; attach_prog_fd = bpf_program__fd(prog); - prog = bpf_program__next(NULL, fmod_obj); + prog = bpf_object__next_program(fmod_obj, NULL); err = bpf_program__set_attach_target(prog, attach_prog_fd, NULL); ASSERT_OK(err, "fmod_ret_set_attach_target"); @@ -352,7 +352,7 @@ static void test_obj_load_failure_common(const char *obj_file, if (!ASSERT_OK_PTR(obj, "obj_open")) goto close_prog; - prog = bpf_program__next(NULL, obj); + prog = bpf_object__next_program(obj, NULL); err = bpf_program__set_attach_target(prog, pkt_fd, NULL); ASSERT_OK(err, "set_attach_target"); @@ -380,7 +380,8 @@ static void test_func_map_prog_compatibility(void) "./test_attach_probe.o"); } -void test_fexit_bpf2bpf(void) +/* NOTE: affect other tests, must run in serial mode */ +void serial_test_fexit_bpf2bpf(void) { if (test__start_subtest("target_no_callees")) test_target_no_callees(); diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c b/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c index 4e7f4b42ea29..f949647dbbc2 100644 --- a/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c +++ b/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c @@ -10,7 +10,7 @@ static int do_sleep(void *skel) { - struct fexit_sleep *fexit_skel = skel; + struct fexit_sleep_lskel *fexit_skel = skel; struct timespec ts1 = { .tv_nsec = 1 }; struct timespec ts2 = { .tv_sec = 10 }; @@ -25,16 +25,16 @@ static char child_stack[STACK_SIZE]; void test_fexit_sleep(void) { - struct fexit_sleep *fexit_skel = NULL; + struct fexit_sleep_lskel *fexit_skel = NULL; int wstatus, duration = 0; pid_t cpid; int err, fexit_cnt; - fexit_skel = fexit_sleep__open_and_load(); + fexit_skel = fexit_sleep_lskel__open_and_load(); if (CHECK(!fexit_skel, "fexit_skel_load", "fexit skeleton failed\n")) goto cleanup; - err = fexit_sleep__attach(fexit_skel); + err = fexit_sleep_lskel__attach(fexit_skel); if (CHECK(err, "fexit_attach", "fexit attach failed: %d\n", err)) goto cleanup; @@ -60,7 +60,7 @@ void test_fexit_sleep(void) */ close(fexit_skel->progs.nanosleep_fentry.prog_fd); close(fexit_skel->progs.nanosleep_fexit.prog_fd); - fexit_sleep__detach(fexit_skel); + fexit_sleep_lskel__detach(fexit_skel); /* kill the thread to unwind sys_nanosleep stack through the trampoline */ kill(cpid, 9); @@ -78,5 +78,5 @@ void test_fexit_sleep(void) goto cleanup; cleanup: - fexit_sleep__destroy(fexit_skel); + fexit_sleep_lskel__destroy(fexit_skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_test.c b/tools/testing/selftests/bpf/prog_tests/fexit_test.c index af3dba726701..d4887d8bb396 100644 --- a/tools/testing/selftests/bpf/prog_tests/fexit_test.c +++ b/tools/testing/selftests/bpf/prog_tests/fexit_test.c @@ -3,19 +3,19 @@ #include <test_progs.h> #include "fexit_test.lskel.h" -static int fexit_test(struct fexit_test *fexit_skel) +static int fexit_test(struct fexit_test_lskel *fexit_skel) { int err, prog_fd, i; __u32 duration = 0, retval; int link_fd; __u64 *result; - err = fexit_test__attach(fexit_skel); + err = fexit_test_lskel__attach(fexit_skel); if (!ASSERT_OK(err, "fexit_attach")) return err; /* Check that already linked program can't be attached again. */ - link_fd = fexit_test__test1__attach(fexit_skel); + link_fd = fexit_test_lskel__test1__attach(fexit_skel); if (!ASSERT_LT(link_fd, 0, "fexit_attach_link")) return -1; @@ -31,7 +31,7 @@ static int fexit_test(struct fexit_test *fexit_skel) return -1; } - fexit_test__detach(fexit_skel); + fexit_test_lskel__detach(fexit_skel); /* zero results for re-attach test */ memset(fexit_skel->bss, 0, sizeof(*fexit_skel->bss)); @@ -40,10 +40,10 @@ static int fexit_test(struct fexit_test *fexit_skel) void test_fexit_test(void) { - struct fexit_test *fexit_skel = NULL; + struct fexit_test_lskel *fexit_skel = NULL; int err; - fexit_skel = fexit_test__open_and_load(); + fexit_skel = fexit_test_lskel__open_and_load(); if (!ASSERT_OK_PTR(fexit_skel, "fexit_skel_load")) goto cleanup; @@ -55,5 +55,5 @@ void test_fexit_test(void) ASSERT_OK(err, "fexit_second_attach"); cleanup: - fexit_test__destroy(fexit_skel); + fexit_test_lskel__destroy(fexit_skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c index 0e8a4d2f023d..6093728497c7 100644 --- a/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c +++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c @@ -2,7 +2,7 @@ #include <test_progs.h> #include <network_helpers.h> -void test_flow_dissector_load_bytes(void) +void serial_test_flow_dissector_load_bytes(void) { struct bpf_flow_keys flow_keys; __u32 duration = 0, retval, size; diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c index 3931ede5c534..f0c6c226aba8 100644 --- a/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c +++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c @@ -628,7 +628,7 @@ out_close: } } -void test_flow_dissector_reattach(void) +void serial_test_flow_dissector_reattach(void) { int err, new_net, saved_net; diff --git a/tools/testing/selftests/bpf/prog_tests/get_branch_snapshot.c b/tools/testing/selftests/bpf/prog_tests/get_branch_snapshot.c index 67e86f8d8677..3948da12a528 100644 --- a/tools/testing/selftests/bpf/prog_tests/get_branch_snapshot.c +++ b/tools/testing/selftests/bpf/prog_tests/get_branch_snapshot.c @@ -6,6 +6,30 @@ static int *pfd_array; static int cpu_cnt; +static bool is_hypervisor(void) +{ + char *line = NULL; + bool ret = false; + size_t len; + FILE *fp; + + fp = fopen("/proc/cpuinfo", "r"); + if (!fp) + return false; + + while (getline(&line, &len, fp) != -1) { + if (!strncmp(line, "flags", 5)) { + if (strstr(line, "hypervisor") != NULL) + ret = true; + break; + } + } + + free(line); + fclose(fp); + return ret; +} + static int create_perf_events(void) { struct perf_event_attr attr = {0}; @@ -49,11 +73,17 @@ static void close_perf_events(void) free(pfd_array); } -void test_get_branch_snapshot(void) +void serial_test_get_branch_snapshot(void) { struct get_branch_snapshot *skel = NULL; int err; + /* Skip the test before we fix LBR snapshot for hypervisor. */ + if (is_hypervisor()) { + test__skip(); + return; + } + if (create_perf_events()) { test__skip(); /* system doesn't support LBR */ goto cleanup; @@ -67,9 +97,10 @@ void test_get_branch_snapshot(void) if (!ASSERT_OK(err, "kallsyms_find")) goto cleanup; - err = kallsyms_find_next("bpf_testmod_loop_test", &skel->bss->address_high); - if (!ASSERT_OK(err, "kallsyms_find_next")) - goto cleanup; + /* Just a guess for the end of this function, as module functions + * in /proc/kallsyms could come in any order. + */ + skel->bss->address_high = skel->bss->address_low + 128; err = get_branch_snapshot__attach(skel); if (!ASSERT_OK(err, "get_branch_snapshot__attach")) diff --git a/tools/testing/selftests/bpf/prog_tests/global_data.c b/tools/testing/selftests/bpf/prog_tests/global_data.c index 9efa7e50eab2..afd8639f9a94 100644 --- a/tools/testing/selftests/bpf/prog_tests/global_data.c +++ b/tools/testing/selftests/bpf/prog_tests/global_data.c @@ -103,11 +103,18 @@ static void test_global_data_struct(struct bpf_object *obj, __u32 duration) static void test_global_data_rdonly(struct bpf_object *obj, __u32 duration) { int err = -ENOMEM, map_fd, zero = 0; - struct bpf_map *map; + struct bpf_map *map, *map2; __u8 *buff; map = bpf_object__find_map_by_name(obj, "test_glo.rodata"); - if (CHECK_FAIL(!map || !bpf_map__is_internal(map))) + if (!ASSERT_OK_PTR(map, "map")) + return; + if (!ASSERT_TRUE(bpf_map__is_internal(map), "is_internal")) + return; + + /* ensure we can lookup internal maps by their ELF names */ + map2 = bpf_object__find_map_by_name(obj, ".rodata"); + if (!ASSERT_EQ(map, map2, "same_maps")) return; map_fd = bpf_map__fd(map); diff --git a/tools/testing/selftests/bpf/prog_tests/global_data_init.c b/tools/testing/selftests/bpf/prog_tests/global_data_init.c index ee46b11f1f9a..1db86eab101b 100644 --- a/tools/testing/selftests/bpf/prog_tests/global_data_init.c +++ b/tools/testing/selftests/bpf/prog_tests/global_data_init.c @@ -16,7 +16,7 @@ void test_global_data_init(void) if (CHECK_FAIL(err)) return; - map = bpf_object__find_map_by_name(obj, "test_glo.rodata"); + map = bpf_object__find_map_by_name(obj, ".rodata"); if (CHECK_FAIL(!map || !bpf_map__is_internal(map))) goto out; diff --git a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c index ddfb6bf97152..01e51d16c8b8 100644 --- a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c +++ b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c @@ -48,7 +48,8 @@ static void on_sample(void *ctx, int cpu, void *data, __u32 size) *(bool *)ctx = true; } -void test_kfree_skb(void) +/* TODO: fix kernel panic caused by this test in parallel mode */ +void serial_test_kfree_skb(void) { struct __sk_buff skb = {}; struct bpf_prog_test_run_attr tattr = { @@ -92,7 +93,7 @@ void test_kfree_skb(void) if (CHECK(!fexit, "find_prog", "prog eth_type_trans not found\n")) goto close_prog; - global_data = bpf_object__find_map_by_name(obj2, "kfree_sk.bss"); + global_data = bpf_object__find_map_by_name(obj2, ".bss"); if (CHECK(!global_data, "find global data", "not found\n")) goto close_prog; diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c index 9611f2bc50df..5c9c0176991b 100644 --- a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c +++ b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c @@ -7,10 +7,10 @@ static void test_main(void) { - struct kfunc_call_test *skel; + struct kfunc_call_test_lskel *skel; int prog_fd, retval, err; - skel = kfunc_call_test__open_and_load(); + skel = kfunc_call_test_lskel__open_and_load(); if (!ASSERT_OK_PTR(skel, "skel")) return; @@ -26,7 +26,7 @@ static void test_main(void) ASSERT_OK(err, "bpf_prog_test_run(test2)"); ASSERT_EQ(retval, 3, "test2-retval"); - kfunc_call_test__destroy(skel); + kfunc_call_test_lskel__destroy(skel); } static void test_subprog(void) diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c index cf3acfa5a91d..79f6bd1e50d6 100644 --- a/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c +++ b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c @@ -7,6 +7,7 @@ #include "test_ksyms_btf.skel.h" #include "test_ksyms_btf_null_check.skel.h" #include "test_ksyms_weak.skel.h" +#include "test_ksyms_weak.lskel.h" static int duration; @@ -89,11 +90,11 @@ static void test_weak_syms(void) int err; skel = test_ksyms_weak__open_and_load(); - if (CHECK(!skel, "test_ksyms_weak__open_and_load", "failed\n")) + if (!ASSERT_OK_PTR(skel, "test_ksyms_weak__open_and_load")) return; err = test_ksyms_weak__attach(skel); - if (CHECK(err, "test_ksyms_weak__attach", "skeleton attach failed: %d\n", err)) + if (!ASSERT_OK(err, "test_ksyms_weak__attach")) goto cleanup; /* trigger tracepoint */ @@ -109,6 +110,33 @@ cleanup: test_ksyms_weak__destroy(skel); } +static void test_weak_syms_lskel(void) +{ + struct test_ksyms_weak_lskel *skel; + struct test_ksyms_weak_lskel__data *data; + int err; + + skel = test_ksyms_weak_lskel__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_ksyms_weak_lskel__open_and_load")) + return; + + err = test_ksyms_weak_lskel__attach(skel); + if (!ASSERT_OK(err, "test_ksyms_weak_lskel__attach")) + goto cleanup; + + /* trigger tracepoint */ + usleep(1); + + data = skel->data; + ASSERT_EQ(data->out__existing_typed, 0, "existing typed ksym"); + ASSERT_NEQ(data->out__existing_typeless, -1, "existing typeless ksym"); + ASSERT_EQ(data->out__non_existent_typeless, 0, "nonexistent typeless ksym"); + ASSERT_EQ(data->out__non_existent_typed, 0, "nonexistent typed ksym"); + +cleanup: + test_ksyms_weak_lskel__destroy(skel); +} + void test_ksyms_btf(void) { int percpu_datasec; @@ -136,4 +164,7 @@ void test_ksyms_btf(void) if (test__start_subtest("weak_ksyms")) test_weak_syms(); + + if (test__start_subtest("weak_ksyms_lskel")) + test_weak_syms_lskel(); } diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms_module.c b/tools/testing/selftests/bpf/prog_tests/ksyms_module.c index 2cd5cded543f..d490ad80eccb 100644 --- a/tools/testing/selftests/bpf/prog_tests/ksyms_module.c +++ b/tools/testing/selftests/bpf/prog_tests/ksyms_module.c @@ -2,30 +2,61 @@ /* Copyright (c) 2021 Facebook */ #include <test_progs.h> -#include <bpf/libbpf.h> -#include <bpf/btf.h> +#include <network_helpers.h> #include "test_ksyms_module.lskel.h" +#include "test_ksyms_module.skel.h" -static int duration; - -void test_ksyms_module(void) +void test_ksyms_module_lskel(void) { - struct test_ksyms_module* skel; + struct test_ksyms_module_lskel *skel; + int retval; int err; - skel = test_ksyms_module__open_and_load(); - if (CHECK(!skel, "skel_open", "failed to open skeleton\n")) + if (!env.has_testmod) { + test__skip(); return; + } - err = test_ksyms_module__attach(skel); - if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err)) + skel = test_ksyms_module_lskel__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_ksyms_module_lskel__open_and_load")) + return; + err = bpf_prog_test_run(skel->progs.load.prog_fd, 1, &pkt_v4, sizeof(pkt_v4), + NULL, NULL, (__u32 *)&retval, NULL); + if (!ASSERT_OK(err, "bpf_prog_test_run")) goto cleanup; + ASSERT_EQ(retval, 0, "retval"); + ASSERT_EQ(skel->bss->out_bpf_testmod_ksym, 42, "bpf_testmod_ksym"); +cleanup: + test_ksyms_module_lskel__destroy(skel); +} - usleep(1); +void test_ksyms_module_libbpf(void) +{ + struct test_ksyms_module *skel; + int retval, err; - ASSERT_EQ(skel->bss->triggered, true, "triggered"); - ASSERT_EQ(skel->bss->out_mod_ksym_global, 123, "global_ksym_val"); + if (!env.has_testmod) { + test__skip(); + return; + } + skel = test_ksyms_module__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_ksyms_module__open")) + return; + err = bpf_prog_test_run(bpf_program__fd(skel->progs.load), 1, &pkt_v4, + sizeof(pkt_v4), NULL, NULL, (__u32 *)&retval, NULL); + if (!ASSERT_OK(err, "bpf_prog_test_run")) + goto cleanup; + ASSERT_EQ(retval, 0, "retval"); + ASSERT_EQ(skel->bss->out_bpf_testmod_ksym, 42, "bpf_testmod_ksym"); cleanup: test_ksyms_module__destroy(skel); } + +void test_ksyms_module(void) +{ + if (test__start_subtest("lskel")) + test_ksyms_module_lskel(); + if (test__start_subtest("libbpf")) + test_ksyms_module_libbpf(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c b/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c index 59adb4715394..7589c03fd26b 100644 --- a/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c +++ b/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c @@ -541,7 +541,7 @@ close_servers: } } -void test_migrate_reuseport(void) +void serial_test_migrate_reuseport(void) { struct test_migrate_reuseport *skel; int i; diff --git a/tools/testing/selftests/bpf/prog_tests/modify_return.c b/tools/testing/selftests/bpf/prog_tests/modify_return.c index 97fec70c600b..b772fe30ce9b 100644 --- a/tools/testing/selftests/bpf/prog_tests/modify_return.c +++ b/tools/testing/selftests/bpf/prog_tests/modify_return.c @@ -53,7 +53,8 @@ cleanup: modify_return__destroy(skel); } -void test_modify_return(void) +/* TODO: conflict with get_func_ip_test */ +void serial_test_modify_return(void) { run_test(0 /* input_retval */, 1 /* want_side_effect */, diff --git a/tools/testing/selftests/bpf/prog_tests/module_attach.c b/tools/testing/selftests/bpf/prog_tests/module_attach.c index 1797a6e4d6d8..6d0e50dcf47c 100644 --- a/tools/testing/selftests/bpf/prog_tests/module_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/module_attach.c @@ -2,10 +2,36 @@ /* Copyright (c) 2020 Facebook */ #include <test_progs.h> +#include <stdbool.h> #include "test_module_attach.skel.h" static int duration; +static int trigger_module_test_writable(int *val) +{ + int fd, err; + char buf[65]; + ssize_t rd; + + fd = open(BPF_TESTMOD_TEST_FILE, O_RDONLY); + err = -errno; + if (!ASSERT_GE(fd, 0, "testmode_file_open")) + return err; + + rd = read(fd, buf, sizeof(buf) - 1); + err = -errno; + if (!ASSERT_GT(rd, 0, "testmod_file_rd_val")) { + close(fd); + return err; + } + + buf[rd] = '\0'; + *val = strtol(buf, NULL, 0); + close(fd); + + return 0; +} + static int delete_module(const char *name, int flags) { return syscall(__NR_delete_module, name, flags); @@ -19,6 +45,7 @@ void test_module_attach(void) struct test_module_attach__bss *bss; struct bpf_link *link; int err; + int writable_val = 0; skel = test_module_attach__open(); if (CHECK(!skel, "skel_open", "failed to open skeleton\n")) @@ -51,6 +78,14 @@ void test_module_attach(void) ASSERT_EQ(bss->fexit_ret, -EIO, "fexit_tet"); ASSERT_EQ(bss->fmod_ret_read_sz, READ_SZ, "fmod_ret"); + bss->raw_tp_writable_bare_early_ret = true; + bss->raw_tp_writable_bare_out_val = 0xf1f2f3f4; + ASSERT_OK(trigger_module_test_writable(&writable_val), + "trigger_writable"); + ASSERT_EQ(bss->raw_tp_writable_bare_in_val, 1024, "writable_test_in"); + ASSERT_EQ(bss->raw_tp_writable_bare_out_val, writable_val, + "writable_test_out"); + test_module_attach__detach(skel); /* attach fentry/fexit and make sure it get's module reference */ diff --git a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c index 2535788e135f..24d493482ffc 100644 --- a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c +++ b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c @@ -78,7 +78,8 @@ static void test_ns_current_pid_tgid_new_ns(void) return; } -void test_ns_current_pid_tgid(void) +/* TODO: use a different tracepoint */ +void serial_test_ns_current_pid_tgid(void) { if (test__start_subtest("ns_current_pid_tgid_root_ns")) test_current_pid_tgid(NULL); diff --git a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c index 6490e9673002..4e32f3586a75 100644 --- a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c +++ b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c @@ -43,9 +43,10 @@ int trigger_on_cpu(int cpu) return 0; } -void test_perf_buffer(void) +void serial_test_perf_buffer(void) { - int err, on_len, nr_on_cpus = 0, nr_cpus, i; + int err, on_len, nr_on_cpus = 0, nr_cpus, i, j; + int zero = 0, my_pid = getpid(); struct perf_buffer_opts pb_opts = {}; struct test_perf_buffer *skel; cpu_set_t cpu_seen; @@ -71,6 +72,10 @@ void test_perf_buffer(void) if (CHECK(!skel, "skel_load", "skeleton open/load failed\n")) goto out_close; + err = bpf_map_update_elem(bpf_map__fd(skel->maps.my_pid_map), &zero, &my_pid, 0); + if (!ASSERT_OK(err, "my_pid_update")) + goto out_close; + /* attach probe */ err = test_perf_buffer__attach(skel); if (CHECK(err, "attach_kprobe", "err %d\n", err)) @@ -107,19 +112,19 @@ void test_perf_buffer(void) "expect %d, seen %d\n", nr_on_cpus, CPU_COUNT(&cpu_seen))) goto out_free_pb; - if (CHECK(perf_buffer__buffer_cnt(pb) != nr_cpus, "buf_cnt", - "got %zu, expected %d\n", perf_buffer__buffer_cnt(pb), nr_cpus)) + if (CHECK(perf_buffer__buffer_cnt(pb) != nr_on_cpus, "buf_cnt", + "got %zu, expected %d\n", perf_buffer__buffer_cnt(pb), nr_on_cpus)) goto out_close; - for (i = 0; i < nr_cpus; i++) { + for (i = 0, j = 0; i < nr_cpus; i++) { if (i >= on_len || !online[i]) continue; - fd = perf_buffer__buffer_fd(pb, i); + fd = perf_buffer__buffer_fd(pb, j); CHECK(fd < 0 || last_fd == fd, "fd_check", "last fd %d == fd %d\n", last_fd, fd); last_fd = fd; - err = perf_buffer__consume_buffer(pb, i); + err = perf_buffer__consume_buffer(pb, j); if (CHECK(err, "drain_buf", "cpu %d, err %d\n", i, err)) goto out_close; @@ -127,12 +132,13 @@ void test_perf_buffer(void) if (trigger_on_cpu(i)) goto out_close; - err = perf_buffer__consume_buffer(pb, i); - if (CHECK(err, "consume_buf", "cpu %d, err %d\n", i, err)) + err = perf_buffer__consume_buffer(pb, j); + if (CHECK(err, "consume_buf", "cpu %d, err %d\n", j, err)) goto out_close; if (CHECK(!CPU_ISSET(i, &cpu_seen), "cpu_seen", "cpu %d not seen\n", i)) goto out_close; + j++; } out_free_pb: diff --git a/tools/testing/selftests/bpf/prog_tests/perf_link.c b/tools/testing/selftests/bpf/prog_tests/perf_link.c index b1abd0c46607..ede07344f264 100644 --- a/tools/testing/selftests/bpf/prog_tests/perf_link.c +++ b/tools/testing/selftests/bpf/prog_tests/perf_link.c @@ -23,7 +23,8 @@ static void burn_cpu(void) ++j; } -void test_perf_link(void) +/* TODO: often fails in concurrent mode */ +void serial_test_perf_link(void) { struct test_perf_link *skel = NULL; struct perf_event_attr attr; diff --git a/tools/testing/selftests/bpf/prog_tests/probe_user.c b/tools/testing/selftests/bpf/prog_tests/probe_user.c index 52fe157e2a90..abf890d066eb 100644 --- a/tools/testing/selftests/bpf/prog_tests/probe_user.c +++ b/tools/testing/selftests/bpf/prog_tests/probe_user.c @@ -1,7 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 #include <test_progs.h> -void test_probe_user(void) +/* TODO: corrupts other tests uses connect() */ +void serial_test_probe_user(void) { const char *prog_name = "handle_sys_connect"; const char *obj_file = "./test_probe_user.o"; diff --git a/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_test_run.c b/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_test_run.c index 5c45424cac5f..ddefa1192e5d 100644 --- a/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_test_run.c +++ b/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_test_run.c @@ -3,7 +3,8 @@ #include <test_progs.h> #include <linux/nbd.h> -void test_raw_tp_writable_test_run(void) +/* NOTE: conflict with other tests. */ +void serial_test_raw_tp_writable_test_run(void) { __u32 duration = 0; char error[4096]; diff --git a/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c b/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c index 5f9eaa3ab584..fd5d2ddfb062 100644 --- a/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c +++ b/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c @@ -37,7 +37,7 @@ void test_rdonly_maps(void) if (CHECK(err, "obj_load", "err %d errno %d\n", err, errno)) goto cleanup; - bss_map = bpf_object__find_map_by_name(obj, "test_rdo.bss"); + bss_map = bpf_object__find_map_by_name(obj, ".bss"); if (CHECK(!bss_map, "find_bss_map", "failed\n")) goto cleanup; diff --git a/tools/testing/selftests/bpf/prog_tests/recursion.c b/tools/testing/selftests/bpf/prog_tests/recursion.c index 0e378d63fe18..f3af2627b599 100644 --- a/tools/testing/selftests/bpf/prog_tests/recursion.c +++ b/tools/testing/selftests/bpf/prog_tests/recursion.c @@ -20,18 +20,18 @@ void test_recursion(void) goto out; ASSERT_EQ(skel->bss->pass1, 0, "pass1 == 0"); - bpf_map_lookup_elem(bpf_map__fd(skel->maps.hash1), &key, 0); + bpf_map_delete_elem(bpf_map__fd(skel->maps.hash1), &key); ASSERT_EQ(skel->bss->pass1, 1, "pass1 == 1"); - bpf_map_lookup_elem(bpf_map__fd(skel->maps.hash1), &key, 0); + bpf_map_delete_elem(bpf_map__fd(skel->maps.hash1), &key); ASSERT_EQ(skel->bss->pass1, 2, "pass1 == 2"); ASSERT_EQ(skel->bss->pass2, 0, "pass2 == 0"); - bpf_map_lookup_elem(bpf_map__fd(skel->maps.hash2), &key, 0); + bpf_map_delete_elem(bpf_map__fd(skel->maps.hash2), &key); ASSERT_EQ(skel->bss->pass2, 1, "pass2 == 1"); - bpf_map_lookup_elem(bpf_map__fd(skel->maps.hash2), &key, 0); + bpf_map_delete_elem(bpf_map__fd(skel->maps.hash2), &key); ASSERT_EQ(skel->bss->pass2, 2, "pass2 == 2"); - err = bpf_obj_get_info_by_fd(bpf_program__fd(skel->progs.on_lookup), + err = bpf_obj_get_info_by_fd(bpf_program__fd(skel->progs.on_delete), &prog_info, &prog_info_len); if (!ASSERT_OK(err, "get_prog_info")) goto out; diff --git a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c index f62361306f6d..f4a13d9dd5c8 100644 --- a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c +++ b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c @@ -106,9 +106,9 @@ static int resolve_symbols(void) "Failed to load BTF from btf_data.o\n")) return -1; - nr = btf__get_nr_types(btf); + nr = btf__type_cnt(btf); - for (type_id = 1; type_id <= nr; type_id++) { + for (type_id = 1; type_id < nr; type_id++) { if (__resolve_symbol(btf, type_id)) break; } @@ -117,14 +117,14 @@ static int resolve_symbols(void) return 0; } -int test_resolve_btfids(void) +void test_resolve_btfids(void) { __u32 *test_list, *test_lists[] = { test_list_local, test_list_global }; unsigned int i, j; int ret = 0; if (resolve_symbols()) - return -1; + return; /* Check BTF_ID_LIST(test_list_local) and * BTF_ID_LIST_GLOBAL(test_list_global) IDs @@ -138,7 +138,7 @@ int test_resolve_btfids(void) test_symbols[i].name, test_list[i], test_symbols[i].id); if (ret) - return ret; + return; } } @@ -161,9 +161,7 @@ int test_resolve_btfids(void) if (i > 0) { if (!ASSERT_LE(test_set.ids[i - 1], test_set.ids[i], "sort_check")) - return -1; + return; } } - - return ret; } diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c index 4706cee84360..9a80fe8a6427 100644 --- a/tools/testing/selftests/bpf/prog_tests/ringbuf.c +++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c @@ -58,7 +58,7 @@ static int process_sample(void *ctx, void *data, size_t len) } } -static struct test_ringbuf *skel; +static struct test_ringbuf_lskel *skel; static struct ring_buffer *ringbuf; static void trigger_samples() @@ -90,13 +90,13 @@ void test_ringbuf(void) int page_size = getpagesize(); void *mmap_ptr, *tmp_ptr; - skel = test_ringbuf__open(); + skel = test_ringbuf_lskel__open(); if (CHECK(!skel, "skel_open", "skeleton open failed\n")) return; skel->maps.ringbuf.max_entries = page_size; - err = test_ringbuf__load(skel); + err = test_ringbuf_lskel__load(skel); if (CHECK(err != 0, "skel_load", "skeleton load failed\n")) goto cleanup; @@ -154,7 +154,7 @@ void test_ringbuf(void) if (CHECK(!ringbuf, "ringbuf_create", "failed to create ringbuf\n")) goto cleanup; - err = test_ringbuf__attach(skel); + err = test_ringbuf_lskel__attach(skel); if (CHECK(err, "skel_attach", "skeleton attachment failed: %d\n", err)) goto cleanup; @@ -292,8 +292,8 @@ void test_ringbuf(void) CHECK(skel->bss->discarded != 1, "err_discarded", "exp %ld, got %ld\n", 1L, skel->bss->discarded); - test_ringbuf__detach(skel); + test_ringbuf_lskel__detach(skel); cleanup: ring_buffer__free(ringbuf); - test_ringbuf__destroy(skel); + test_ringbuf_lskel__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c index 4efd337d6a3c..3cfc910ab3c1 100644 --- a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c +++ b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c @@ -114,7 +114,7 @@ static int prepare_bpf_obj(void) err = bpf_object__load(obj); RET_ERR(err, "load bpf_object", "err:%d\n", err); - prog = bpf_program__next(NULL, obj); + prog = bpf_object__next_program(obj, NULL); RET_ERR(!prog, "get first bpf_program", "!prog\n"); select_by_skb_data_prog = bpf_program__fd(prog); RET_ERR(select_by_skb_data_prog < 0, "get prog fd", @@ -858,7 +858,7 @@ out: cleanup(); } -void test_select_reuseport(void) +void serial_test_select_reuseport(void) { saved_tcp_fo = read_int_sysctl(TCP_FO_SYSCTL); if (saved_tcp_fo < 0) diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal_sched_switch.c b/tools/testing/selftests/bpf/prog_tests/send_signal_sched_switch.c index 189a34a7addb..15dacfcfaa6d 100644 --- a/tools/testing/selftests/bpf/prog_tests/send_signal_sched_switch.c +++ b/tools/testing/selftests/bpf/prog_tests/send_signal_sched_switch.c @@ -25,7 +25,8 @@ static void *worker(void *p) return NULL; } -void test_send_signal_sched_switch(void) +/* NOTE: cause events loss */ +void serial_test_send_signal_sched_switch(void) { struct test_send_signal_kern *skel; pthread_t threads[THREAD_COUNT]; diff --git a/tools/testing/selftests/bpf/prog_tests/signal_pending.c b/tools/testing/selftests/bpf/prog_tests/signal_pending.c index dfcbddcbe4d3..fdfdcff6cbef 100644 --- a/tools/testing/selftests/bpf/prog_tests/signal_pending.c +++ b/tools/testing/selftests/bpf/prog_tests/signal_pending.c @@ -42,7 +42,7 @@ static void test_signal_pending_by_type(enum bpf_prog_type prog_type) signal(SIGALRM, SIG_DFL); } -void test_signal_pending(enum bpf_prog_type prog_type) +void test_signal_pending(void) { test_signal_pending_by_type(BPF_PROG_TYPE_SOCKET_FILTER); test_signal_pending_by_type(BPF_PROG_TYPE_FLOW_DISSECTOR); diff --git a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c index aee41547e7f4..6db07401bc49 100644 --- a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c +++ b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c @@ -598,7 +598,7 @@ close: static void run_lookup_prog(const struct test *t) { - int server_fds[MAX_SERVERS] = { -1 }; + int server_fds[] = { [0 ... MAX_SERVERS - 1] = -1 }; int client_fd, reuse_conn_fd = -1; struct bpf_link *lookup_link; int i, err; @@ -1053,7 +1053,7 @@ static void run_sk_assign(struct test_sk_lookup *skel, struct bpf_program *lookup_prog, const char *remote_ip, const char *local_ip) { - int server_fds[MAX_SERVERS] = { -1 }; + int server_fds[] = { [0 ... MAX_SERVERS - 1] = -1 }; struct bpf_sk_lookup ctx; __u64 server_cookie; int i, err; diff --git a/tools/testing/selftests/bpf/prog_tests/sk_storage_tracing.c b/tools/testing/selftests/bpf/prog_tests/sk_storage_tracing.c index 2b392590e8ca..547ae53cde74 100644 --- a/tools/testing/selftests/bpf/prog_tests/sk_storage_tracing.c +++ b/tools/testing/selftests/bpf/prog_tests/sk_storage_tracing.c @@ -105,7 +105,7 @@ out: close(listen_fd); } -void test_sk_storage_tracing(void) +void serial_test_sk_storage_tracing(void) { struct test_sk_storage_trace_itself *skel_itself; int err; diff --git a/tools/testing/selftests/bpf/prog_tests/skc_to_unix_sock.c b/tools/testing/selftests/bpf/prog_tests/skc_to_unix_sock.c new file mode 100644 index 000000000000..3eefdfed1db9 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/skc_to_unix_sock.c @@ -0,0 +1,54 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2021 Hengqi Chen */ + +#include <test_progs.h> +#include <sys/un.h> +#include "test_skc_to_unix_sock.skel.h" + +static const char *sock_path = "@skc_to_unix_sock"; + +void test_skc_to_unix_sock(void) +{ + struct test_skc_to_unix_sock *skel; + struct sockaddr_un sockaddr; + int err, sockfd = 0; + + skel = test_skc_to_unix_sock__open(); + if (!ASSERT_OK_PTR(skel, "could not open BPF object")) + return; + + skel->rodata->my_pid = getpid(); + + err = test_skc_to_unix_sock__load(skel); + if (!ASSERT_OK(err, "could not load BPF object")) + goto cleanup; + + err = test_skc_to_unix_sock__attach(skel); + if (!ASSERT_OK(err, "could not attach BPF object")) + goto cleanup; + + /* trigger unix_listen */ + sockfd = socket(AF_UNIX, SOCK_STREAM, 0); + if (!ASSERT_GT(sockfd, 0, "socket failed")) + goto cleanup; + + memset(&sockaddr, 0, sizeof(sockaddr)); + sockaddr.sun_family = AF_UNIX; + strncpy(sockaddr.sun_path, sock_path, strlen(sock_path)); + sockaddr.sun_path[0] = '\0'; + + err = bind(sockfd, (struct sockaddr *)&sockaddr, sizeof(sockaddr)); + if (!ASSERT_OK(err, "bind failed")) + goto cleanup; + + err = listen(sockfd, 1); + if (!ASSERT_OK(err, "listen failed")) + goto cleanup; + + ASSERT_EQ(strcmp(skel->bss->path, sock_path), 0, "bpf_skc_to_unix_sock failed"); + +cleanup: + if (sockfd) + close(sockfd); + test_skc_to_unix_sock__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/skeleton.c b/tools/testing/selftests/bpf/prog_tests/skeleton.c index fe1e204a65c6..180afd632f4c 100644 --- a/tools/testing/selftests/bpf/prog_tests/skeleton.c +++ b/tools/testing/selftests/bpf/prog_tests/skeleton.c @@ -16,10 +16,13 @@ void test_skeleton(void) struct test_skeleton* skel; struct test_skeleton__bss *bss; struct test_skeleton__data *data; + struct test_skeleton__data_dyn *data_dyn; struct test_skeleton__rodata *rodata; + struct test_skeleton__rodata_dyn *rodata_dyn; struct test_skeleton__kconfig *kcfg; const void *elf_bytes; size_t elf_bytes_sz = 0; + int i; skel = test_skeleton__open(); if (CHECK(!skel, "skel_open", "failed to open skeleton\n")) @@ -30,7 +33,12 @@ void test_skeleton(void) bss = skel->bss; data = skel->data; + data_dyn = skel->data_dyn; rodata = skel->rodata; + rodata_dyn = skel->rodata_dyn; + + ASSERT_STREQ(bpf_map__name(skel->maps.rodata_dyn), ".rodata.dyn", "rodata_dyn_name"); + ASSERT_STREQ(bpf_map__name(skel->maps.data_dyn), ".data.dyn", "data_dyn_name"); /* validate values are pre-initialized correctly */ CHECK(data->in1 != -1, "in1", "got %d != exp %d\n", data->in1, -1); @@ -46,6 +54,12 @@ void test_skeleton(void) CHECK(rodata->in.in6 != 0, "in6", "got %d != exp %d\n", rodata->in.in6, 0); CHECK(bss->out6 != 0, "out6", "got %d != exp %d\n", bss->out6, 0); + ASSERT_EQ(rodata_dyn->in_dynarr_sz, 0, "in_dynarr_sz"); + for (i = 0; i < 4; i++) + ASSERT_EQ(rodata_dyn->in_dynarr[i], -(i + 1), "in_dynarr"); + for (i = 0; i < 4; i++) + ASSERT_EQ(data_dyn->out_dynarr[i], i + 1, "out_dynarr"); + /* validate we can pre-setup global variables, even in .bss */ data->in1 = 10; data->in2 = 11; @@ -53,6 +67,10 @@ void test_skeleton(void) bss->in4 = 13; rodata->in.in6 = 14; + rodata_dyn->in_dynarr_sz = 4; + for (i = 0; i < 4; i++) + rodata_dyn->in_dynarr[i] = i + 10; + err = test_skeleton__load(skel); if (CHECK(err, "skel_load", "failed to load skeleton: %d\n", err)) goto cleanup; @@ -64,6 +82,10 @@ void test_skeleton(void) CHECK(bss->in4 != 13, "in4", "got %lld != exp %lld\n", bss->in4, 13LL); CHECK(rodata->in.in6 != 14, "in6", "got %d != exp %d\n", rodata->in.in6, 14); + ASSERT_EQ(rodata_dyn->in_dynarr_sz, 4, "in_dynarr_sz"); + for (i = 0; i < 4; i++) + ASSERT_EQ(rodata_dyn->in_dynarr[i], i + 10, "in_dynarr"); + /* now set new values and attach to get them into outX variables */ data->in1 = 1; data->in2 = 2; @@ -73,6 +95,8 @@ void test_skeleton(void) bss->in5.b = 6; kcfg = skel->kconfig; + skel->data_read_mostly->read_mostly_var = 123; + err = test_skeleton__attach(skel); if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err)) goto cleanup; @@ -93,6 +117,11 @@ void test_skeleton(void) CHECK(bss->kern_ver != kcfg->LINUX_KERNEL_VERSION, "ext2", "got %d != exp %d\n", bss->kern_ver, kcfg->LINUX_KERNEL_VERSION); + for (i = 0; i < 4; i++) + ASSERT_EQ(data_dyn->out_dynarr[i], i + 10, "out_dynarr"); + + ASSERT_EQ(skel->bss->out_mostly_var, 123, "out_mostly_var"); + elf_bytes = test_skeleton__elf_bytes(&elf_bytes_sz); ASSERT_OK_PTR(elf_bytes, "elf_bytes"); ASSERT_GE(elf_bytes_sz, 0, "elf_bytes_sz"); diff --git a/tools/testing/selftests/bpf/prog_tests/snprintf.c b/tools/testing/selftests/bpf/prog_tests/snprintf.c index 8fd1b4b29a0e..394ebfc3bbf3 100644 --- a/tools/testing/selftests/bpf/prog_tests/snprintf.c +++ b/tools/testing/selftests/bpf/prog_tests/snprintf.c @@ -33,7 +33,7 @@ #define EXP_NO_BUF_RET 29 -void test_snprintf_positive(void) +static void test_snprintf_positive(void) { char exp_addr_out[] = EXP_ADDR_OUT; char exp_sym_out[] = EXP_SYM_OUT; @@ -103,7 +103,7 @@ static int load_single_snprintf(char *fmt) return ret; } -void test_snprintf_negative(void) +static void test_snprintf_negative(void) { ASSERT_OK(load_single_snprintf("valid %d"), "valid usage"); diff --git a/tools/testing/selftests/bpf/prog_tests/snprintf_btf.c b/tools/testing/selftests/bpf/prog_tests/snprintf_btf.c index 76e1f5fe18fa..dd41b826be30 100644 --- a/tools/testing/selftests/bpf/prog_tests/snprintf_btf.c +++ b/tools/testing/selftests/bpf/prog_tests/snprintf_btf.c @@ -6,7 +6,7 @@ /* Demonstrate that bpf_snprintf_btf succeeds and that various data types * are formatted correctly. */ -void test_snprintf_btf(void) +void serial_test_snprintf_btf(void) { struct netif_receive_skb *skel; struct netif_receive_skb__bss *bss; diff --git a/tools/testing/selftests/bpf/prog_tests/sock_fields.c b/tools/testing/selftests/bpf/prog_tests/sock_fields.c index 577d619fb07e..fae40db4d81f 100644 --- a/tools/testing/selftests/bpf/prog_tests/sock_fields.c +++ b/tools/testing/selftests/bpf/prog_tests/sock_fields.c @@ -329,7 +329,7 @@ done: close(listen_fd); } -void test_sock_fields(void) +void serial_test_sock_fields(void) { struct bpf_link *egress_link = NULL, *ingress_link = NULL; int parent_cg_fd = -1, child_cg_fd = -1; diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c index d88bb65b74cc..2a9cb951bfd6 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c @@ -2002,7 +2002,7 @@ static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map, test_udp_unix_redir(skel, map, family); } -void test_sockmap_listen(void) +void serial_test_sockmap_listen(void) { struct test_sockmap_listen *skel; diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c index e87bc4466d9a..4b18b73df10b 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c @@ -176,6 +176,18 @@ static int netns_setup_namespaces(const char *verb) return 0; } +static void netns_setup_namespaces_nofail(const char *verb) +{ + const char * const *ns = namespaces; + char cmd[128]; + + while (*ns) { + snprintf(cmd, sizeof(cmd), "ip netns %s %s > /dev/null 2>&1", verb, *ns); + system(cmd); + ns++; + } +} + struct netns_setup_result { int ifindex_veth_src_fwd; int ifindex_veth_dst_fwd; @@ -762,6 +774,8 @@ fail: static void *test_tc_redirect_run_tests(void *arg) { + netns_setup_namespaces_nofail("delete"); + RUN_TEST(tc_redirect_peer); RUN_TEST(tc_redirect_peer_l3); RUN_TEST(tc_redirect_neigh); @@ -769,7 +783,7 @@ static void *test_tc_redirect_run_tests(void *arg) return NULL; } -void test_tc_redirect(void) +void serial_test_tc_redirect(void) { pthread_t test_thread; int err; diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c index d207e968e6b1..265b4fe33ec3 100644 --- a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c +++ b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c @@ -109,7 +109,7 @@ static int run_test(int cgroup_fd, int server_fd) return -1; } - map = bpf_map__next(NULL, obj); + map = bpf_object__next_map(obj, NULL); map_fd = bpf_map__fd(map); err = bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_SOCK_OPS, 0); diff --git a/tools/testing/selftests/bpf/prog_tests/test_ima.c b/tools/testing/selftests/bpf/prog_tests/test_ima.c index 0252f61d611a..97d8a6f84f4a 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_ima.c +++ b/tools/testing/selftests/bpf/prog_tests/test_ima.c @@ -43,7 +43,7 @@ static int process_sample(void *ctx, void *data, size_t len) void test_test_ima(void) { char measured_dir_template[] = "/tmp/ima_measuredXXXXXX"; - struct ring_buffer *ringbuf; + struct ring_buffer *ringbuf = NULL; const char *measured_dir; char cmd[256]; @@ -85,5 +85,6 @@ close_clean: err = system(cmd); CHECK(err, "failed to run command", "%s, errno = %d\n", cmd, errno); close_prog: + ring_buffer__free(ringbuf); ima__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/timer.c b/tools/testing/selftests/bpf/prog_tests/timer.c index 25f40e1b9967..0f4e49e622cd 100644 --- a/tools/testing/selftests/bpf/prog_tests/timer.c +++ b/tools/testing/selftests/bpf/prog_tests/timer.c @@ -39,7 +39,8 @@ static int timer(struct timer *timer_skel) return 0; } -void test_timer(void) +/* TODO: use pid filtering */ +void serial_test_timer(void) { struct timer *timer_skel = NULL; int err; diff --git a/tools/testing/selftests/bpf/prog_tests/timer_mim.c b/tools/testing/selftests/bpf/prog_tests/timer_mim.c index ced8f6cf347c..949a0617869d 100644 --- a/tools/testing/selftests/bpf/prog_tests/timer_mim.c +++ b/tools/testing/selftests/bpf/prog_tests/timer_mim.c @@ -52,7 +52,7 @@ static int timer_mim(struct timer_mim *timer_skel) return 0; } -void test_timer_mim(void) +void serial_test_timer_mim(void) { struct timer_mim_reject *timer_reject_skel = NULL; libbpf_print_fn_t old_print_fn = NULL; diff --git a/tools/testing/selftests/bpf/prog_tests/tp_attach_query.c b/tools/testing/selftests/bpf/prog_tests/tp_attach_query.c index fb095e5cd9af..8652d0a46c87 100644 --- a/tools/testing/selftests/bpf/prog_tests/tp_attach_query.c +++ b/tools/testing/selftests/bpf/prog_tests/tp_attach_query.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <test_progs.h> -void test_tp_attach_query(void) +void serial_test_tp_attach_query(void) { const int num_progs = 3; int i, j, bytes, efd, err, prog_fd[num_progs], pmu_fd[num_progs]; diff --git a/tools/testing/selftests/bpf/prog_tests/trace_printk.c b/tools/testing/selftests/bpf/prog_tests/trace_printk.c index e47835f0a674..cade7f12315f 100644 --- a/tools/testing/selftests/bpf/prog_tests/trace_printk.c +++ b/tools/testing/selftests/bpf/prog_tests/trace_printk.c @@ -8,29 +8,29 @@ #define TRACEBUF "/sys/kernel/debug/tracing/trace_pipe" #define SEARCHMSG "testing,testing" -void test_trace_printk(void) +void serial_test_trace_printk(void) { + struct trace_printk_lskel__bss *bss; int err = 0, iter = 0, found = 0; - struct trace_printk__bss *bss; - struct trace_printk *skel; + struct trace_printk_lskel *skel; char *buf = NULL; FILE *fp = NULL; size_t buflen; - skel = trace_printk__open(); + skel = trace_printk_lskel__open(); if (!ASSERT_OK_PTR(skel, "trace_printk__open")) return; ASSERT_EQ(skel->rodata->fmt[0], 'T', "skel->rodata->fmt[0]"); skel->rodata->fmt[0] = 't'; - err = trace_printk__load(skel); + err = trace_printk_lskel__load(skel); if (!ASSERT_OK(err, "trace_printk__load")) goto cleanup; bss = skel->bss; - err = trace_printk__attach(skel); + err = trace_printk_lskel__attach(skel); if (!ASSERT_OK(err, "trace_printk__attach")) goto cleanup; @@ -43,7 +43,7 @@ void test_trace_printk(void) /* wait for tracepoint to trigger */ usleep(1); - trace_printk__detach(skel); + trace_printk_lskel__detach(skel); if (!ASSERT_GT(bss->trace_printk_ran, 0, "bss->trace_printk_ran")) goto cleanup; @@ -65,7 +65,7 @@ void test_trace_printk(void) goto cleanup; cleanup: - trace_printk__destroy(skel); + trace_printk_lskel__destroy(skel); free(buf); if (fp) fclose(fp); diff --git a/tools/testing/selftests/bpf/prog_tests/trace_vprintk.c b/tools/testing/selftests/bpf/prog_tests/trace_vprintk.c index 61a24e62e1a0..7a4e313e8558 100644 --- a/tools/testing/selftests/bpf/prog_tests/trace_vprintk.c +++ b/tools/testing/selftests/bpf/prog_tests/trace_vprintk.c @@ -8,22 +8,22 @@ #define TRACEBUF "/sys/kernel/debug/tracing/trace_pipe" #define SEARCHMSG "1,2,3,4,5,6,7,8,9,10" -void test_trace_vprintk(void) +void serial_test_trace_vprintk(void) { + struct trace_vprintk_lskel__bss *bss; int err = 0, iter = 0, found = 0; - struct trace_vprintk__bss *bss; - struct trace_vprintk *skel; + struct trace_vprintk_lskel *skel; char *buf = NULL; FILE *fp = NULL; size_t buflen; - skel = trace_vprintk__open_and_load(); + skel = trace_vprintk_lskel__open_and_load(); if (!ASSERT_OK_PTR(skel, "trace_vprintk__open_and_load")) goto cleanup; bss = skel->bss; - err = trace_vprintk__attach(skel); + err = trace_vprintk_lskel__attach(skel); if (!ASSERT_OK(err, "trace_vprintk__attach")) goto cleanup; @@ -36,7 +36,7 @@ void test_trace_vprintk(void) /* wait for tracepoint to trigger */ usleep(1); - trace_vprintk__detach(skel); + trace_vprintk_lskel__detach(skel); if (!ASSERT_GT(bss->trace_vprintk_ran, 0, "bss->trace_vprintk_ran")) goto cleanup; @@ -61,7 +61,7 @@ void test_trace_vprintk(void) goto cleanup; cleanup: - trace_vprintk__destroy(skel); + trace_vprintk_lskel__destroy(skel); free(buf); if (fp) fclose(fp); diff --git a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c index d7f5a931d7f3..fc146671b20a 100644 --- a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c +++ b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c @@ -41,7 +41,8 @@ static struct bpf_link *load(struct bpf_object *obj, const char *name) return bpf_program__attach_trace(prog); } -void test_trampoline_count(void) +/* TODO: use different target function to run in concurrent mode */ +void serial_test_trampoline_count(void) { const char *fentry_name = "fentry/__set_task_comm"; const char *fexit_name = "fexit/__set_task_comm"; diff --git a/tools/testing/selftests/bpf/prog_tests/verif_stats.c b/tools/testing/selftests/bpf/prog_tests/verif_stats.c new file mode 100644 index 000000000000..a47e7c0e1ffd --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/verif_stats.c @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#include <test_progs.h> + +#include "trace_vprintk.lskel.h" + +void test_verif_stats(void) +{ + __u32 len = sizeof(struct bpf_prog_info); + struct trace_vprintk_lskel *skel; + struct bpf_prog_info info = {}; + int err; + + skel = trace_vprintk_lskel__open_and_load(); + if (!ASSERT_OK_PTR(skel, "trace_vprintk__open_and_load")) + goto cleanup; + + err = bpf_obj_get_info_by_fd(skel->progs.sys_enter.prog_fd, &info, &len); + if (!ASSERT_OK(err, "bpf_obj_get_info_by_fd")) + goto cleanup; + + if (!ASSERT_GT(info.verified_insns, 0, "verified_insns")) + goto cleanup; + +cleanup: + trace_vprintk_lskel__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c index d5c98f2cb12f..f529e3c923ae 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c @@ -2,7 +2,7 @@ #include <test_progs.h> #include <network_helpers.h> -void test_xdp_adjust_tail_shrink(void) +static void test_xdp_adjust_tail_shrink(void) { const char *file = "./test_xdp_adjust_tail_shrink.o"; __u32 duration, retval, size, expect_sz; @@ -30,7 +30,7 @@ void test_xdp_adjust_tail_shrink(void) bpf_object__close(obj); } -void test_xdp_adjust_tail_grow(void) +static void test_xdp_adjust_tail_grow(void) { const char *file = "./test_xdp_adjust_tail_grow.o"; struct bpf_object *obj; @@ -58,7 +58,7 @@ void test_xdp_adjust_tail_grow(void) bpf_object__close(obj); } -void test_xdp_adjust_tail_grow2(void) +static void test_xdp_adjust_tail_grow2(void) { const char *file = "./test_xdp_adjust_tail_grow.o"; char buf[4096]; /* avoid segfault: large buf to hold grow results */ diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_attach.c index 15ef3531483e..4c4057262cd8 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_attach.c @@ -4,7 +4,7 @@ #define IFINDEX_LO 1 #define XDP_FLAGS_REPLACE (1U << 4) -void test_xdp_attach(void) +void serial_test_xdp_attach(void) { __u32 duration = 0, id1, id2, id0 = 0, len; struct bpf_object *obj1, *obj2, *obj3; diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c index ad3ba81b4048..faa22b84f2ee 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c @@ -519,7 +519,7 @@ static struct bond_test_case bond_test_cases[] = { { "xdp_bonding_xor_layer34", BOND_MODE_XOR, BOND_XMIT_POLICY_LAYER34, }, }; -void test_xdp_bonding(void) +void serial_test_xdp_bonding(void) { libbpf_print_fn_t old_print_fn; struct skeletons skeletons = {}; diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c index 8755effd80b0..fd812bd43600 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c @@ -7,7 +7,7 @@ #define IFINDEX_LO 1 -void test_xdp_cpumap_attach(void) +void serial_test_xdp_cpumap_attach(void) { struct test_xdp_with_cpumap_helpers *skel; struct bpf_prog_info info = {}; diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c index c72af030ff10..3079d5568f8f 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c @@ -8,7 +8,7 @@ #define IFINDEX_LO 1 -void test_xdp_with_devmap_helpers(void) +static void test_xdp_with_devmap_helpers(void) { struct test_xdp_with_devmap_helpers *skel; struct bpf_prog_info info = {}; @@ -60,7 +60,7 @@ out_close: test_xdp_with_devmap_helpers__destroy(skel); } -void test_neg_xdp_devmap_helpers(void) +static void test_neg_xdp_devmap_helpers(void) { struct test_xdp_devmap_helpers *skel; @@ -72,7 +72,7 @@ void test_neg_xdp_devmap_helpers(void) } -void test_xdp_devmap_attach(void) +void serial_test_xdp_devmap_attach(void) { if (test__start_subtest("DEVMAP with programs in entries")) test_xdp_with_devmap_helpers(); diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_info.c b/tools/testing/selftests/bpf/prog_tests/xdp_info.c index d2d7a283d72f..4e2a4fd56f67 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_info.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_info.c @@ -4,7 +4,7 @@ #define IFINDEX_LO 1 -void test_xdp_info(void) +void serial_test_xdp_info(void) { __u32 len = sizeof(struct bpf_prog_info), duration = 0, prog_id; const char *file = "./xdp_dummy.o"; diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_link.c b/tools/testing/selftests/bpf/prog_tests/xdp_link.c index 46eed0a33c23..983ab0b47d30 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_link.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_link.c @@ -6,7 +6,7 @@ #define IFINDEX_LO 1 -void test_xdp_link(void) +void serial_test_xdp_link(void) { __u32 duration = 0, id1, id2, id0 = 0, prog_fd1, prog_fd2, err; DECLARE_LIBBPF_OPTS(bpf_xdp_set_link_opts, opts, .old_fd = -1); diff --git a/tools/testing/selftests/bpf/progs/atomics.c b/tools/testing/selftests/bpf/progs/atomics.c index c245345e41ca..16e57313204a 100644 --- a/tools/testing/selftests/bpf/progs/atomics.c +++ b/tools/testing/selftests/bpf/progs/atomics.c @@ -10,6 +10,8 @@ bool skip_tests __attribute((__section__(".data"))) = false; bool skip_tests = true; #endif +__u32 pid = 0; + __u64 add64_value = 1; __u64 add64_result = 0; __u32 add32_value = 1; @@ -21,6 +23,8 @@ __u64 add_noreturn_value = 1; SEC("fentry/bpf_fentry_test1") int BPF_PROG(add, int a) { + if (pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; #ifdef ENABLE_ATOMICS_TESTS __u64 add_stack_value = 1; @@ -45,6 +49,8 @@ __s64 sub_noreturn_value = 1; SEC("fentry/bpf_fentry_test1") int BPF_PROG(sub, int a) { + if (pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; #ifdef ENABLE_ATOMICS_TESTS __u64 sub_stack_value = 1; @@ -67,6 +73,8 @@ __u64 and_noreturn_value = (0x110ull << 32); SEC("fentry/bpf_fentry_test1") int BPF_PROG(and, int a) { + if (pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; #ifdef ENABLE_ATOMICS_TESTS and64_result = __sync_fetch_and_and(&and64_value, 0x011ull << 32); @@ -86,6 +94,8 @@ __u64 or_noreturn_value = (0x110ull << 32); SEC("fentry/bpf_fentry_test1") int BPF_PROG(or, int a) { + if (pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; #ifdef ENABLE_ATOMICS_TESTS or64_result = __sync_fetch_and_or(&or64_value, 0x011ull << 32); or32_result = __sync_fetch_and_or(&or32_value, 0x011); @@ -104,6 +114,8 @@ __u64 xor_noreturn_value = (0x110ull << 32); SEC("fentry/bpf_fentry_test1") int BPF_PROG(xor, int a) { + if (pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; #ifdef ENABLE_ATOMICS_TESTS xor64_result = __sync_fetch_and_xor(&xor64_value, 0x011ull << 32); xor32_result = __sync_fetch_and_xor(&xor32_value, 0x011); @@ -123,6 +135,8 @@ __u32 cmpxchg32_result_succeed = 0; SEC("fentry/bpf_fentry_test1") int BPF_PROG(cmpxchg, int a) { + if (pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; #ifdef ENABLE_ATOMICS_TESTS cmpxchg64_result_fail = __sync_val_compare_and_swap(&cmpxchg64_value, 0, 3); cmpxchg64_result_succeed = __sync_val_compare_and_swap(&cmpxchg64_value, 1, 2); @@ -142,6 +156,8 @@ __u32 xchg32_result = 0; SEC("fentry/bpf_fentry_test1") int BPF_PROG(xchg, int a) { + if (pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; #ifdef ENABLE_ATOMICS_TESTS __u64 val64 = 2; __u32 val32 = 2; diff --git a/tools/testing/selftests/bpf/progs/bloom_filter_bench.c b/tools/testing/selftests/bpf/progs/bloom_filter_bench.c new file mode 100644 index 000000000000..d9a88dd1ea65 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bloom_filter_bench.c @@ -0,0 +1,153 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#include <errno.h> +#include <linux/bpf.h> +#include <stdbool.h> +#include <bpf/bpf_helpers.h> + +char _license[] SEC("license") = "GPL"; + +struct bpf_map; + +__u8 rand_vals[2500000]; +const __u32 nr_rand_bytes = 2500000; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(key_size, sizeof(__u32)); + /* max entries and value_size will be set programmatically. + * They are configurable from the userspace bench program. + */ +} array_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_BLOOM_FILTER); + /* max entries, value_size, and # of hash functions will be set + * programmatically. They are configurable from the userspace + * bench program. + */ + __uint(map_extra, 3); +} bloom_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + /* max entries, key_size, and value_size, will be set + * programmatically. They are configurable from the userspace + * bench program. + */ +} hashmap SEC(".maps"); + +struct callback_ctx { + struct bpf_map *map; + bool update; +}; + +/* Tracks the number of hits, drops, and false hits */ +struct { + __u32 stats[3]; +} __attribute__((__aligned__(256))) percpu_stats[256]; + +const __u32 hit_key = 0; +const __u32 drop_key = 1; +const __u32 false_hit_key = 2; + +__u8 value_size; + +const volatile bool hashmap_use_bloom; +const volatile bool count_false_hits; + +int error = 0; + +static __always_inline void log_result(__u32 key) +{ + __u32 cpu = bpf_get_smp_processor_id(); + + percpu_stats[cpu & 255].stats[key]++; +} + +static __u64 +bloom_callback(struct bpf_map *map, __u32 *key, void *val, + struct callback_ctx *data) +{ + int err; + + if (data->update) + err = bpf_map_push_elem(data->map, val, 0); + else + err = bpf_map_peek_elem(data->map, val); + + if (err) { + error |= 1; + return 1; /* stop the iteration */ + } + + log_result(hit_key); + + return 0; +} + +SEC("fentry/__x64_sys_getpgid") +int bloom_lookup(void *ctx) +{ + struct callback_ctx data; + + data.map = (struct bpf_map *)&bloom_map; + data.update = false; + + bpf_for_each_map_elem(&array_map, bloom_callback, &data, 0); + + return 0; +} + +SEC("fentry/__x64_sys_getpgid") +int bloom_update(void *ctx) +{ + struct callback_ctx data; + + data.map = (struct bpf_map *)&bloom_map; + data.update = true; + + bpf_for_each_map_elem(&array_map, bloom_callback, &data, 0); + + return 0; +} + +SEC("fentry/__x64_sys_getpgid") +int bloom_hashmap_lookup(void *ctx) +{ + __u64 *result; + int i, err; + + __u32 index = bpf_get_prandom_u32(); + __u32 bitmask = (1ULL << 21) - 1; + + for (i = 0; i < 1024; i++, index += value_size) { + index = index & bitmask; + + if (hashmap_use_bloom) { + err = bpf_map_peek_elem(&bloom_map, + rand_vals + index); + if (err) { + if (err != -ENOENT) { + error |= 2; + return 0; + } + log_result(hit_key); + continue; + } + } + + result = bpf_map_lookup_elem(&hashmap, + rand_vals + index); + if (result) { + log_result(hit_key); + } else { + if (hashmap_use_bloom && count_false_hits) + log_result(false_hit_key); + log_result(drop_key); + } + } + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/bloom_filter_map.c b/tools/testing/selftests/bpf/progs/bloom_filter_map.c new file mode 100644 index 000000000000..1316f3db79d9 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bloom_filter_map.c @@ -0,0 +1,82 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +char _license[] SEC("license") = "GPL"; + +struct bpf_map; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, __u32); + __type(value, __u32); + __uint(max_entries, 1000); +} map_random_data SEC(".maps"); + +struct map_bloom_type { + __uint(type, BPF_MAP_TYPE_BLOOM_FILTER); + __type(value, __u32); + __uint(max_entries, 10000); + __uint(map_extra, 5); +} map_bloom SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __type(key, int); + __type(value, int); + __uint(max_entries, 1); + __array(values, struct map_bloom_type); +} outer_map SEC(".maps"); + +struct callback_ctx { + struct bpf_map *map; +}; + +int error = 0; + +static __u64 +check_elem(struct bpf_map *map, __u32 *key, __u32 *val, + struct callback_ctx *data) +{ + int err; + + err = bpf_map_peek_elem(data->map, val); + if (err) { + error |= 1; + return 1; /* stop the iteration */ + } + + return 0; +} + +SEC("fentry/__x64_sys_getpgid") +int inner_map(void *ctx) +{ + struct bpf_map *inner_map; + struct callback_ctx data; + int key = 0; + + inner_map = bpf_map_lookup_elem(&outer_map, &key); + if (!inner_map) { + error |= 2; + return 0; + } + + data.map = inner_map; + bpf_for_each_map_elem(&map_random_data, check_elem, &data, 0); + + return 0; +} + +SEC("fentry/__x64_sys_getpgid") +int check_bloom(void *ctx) +{ + struct callback_ctx data; + + data.map = (struct bpf_map *)&map_bloom; + bpf_for_each_map_elem(&map_random_data, check_elem, &data, 0); + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_bitfields.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_bitfields.c index 8f44767a75fa..e5560a656030 100644 --- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_bitfields.c +++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_bitfields.c @@ -11,7 +11,7 @@ /* *struct bitfields_only_mixed_types { * int a: 3; - * long int b: 2; + * long b: 2; * _Bool c: 1; * enum { * A = 0, @@ -27,7 +27,7 @@ struct bitfields_only_mixed_types { int a: 3; - long int b: 2; + long b: 2; bool c: 1; /* it's really a _Bool type */ enum { A, /* A = 0, dumper is very explicit */ @@ -44,8 +44,8 @@ struct bitfields_only_mixed_types { * char: 4; * int a: 4; * short b; - * long int c; - * long int d: 8; + * long c; + * long d: 8; * int e; * int f; *}; @@ -71,7 +71,7 @@ struct bitfield_mixed_with_others { *struct bitfield_flushed { * int a: 4; * long: 60; - * long int b: 16; + * long b: 16; *}; * */ diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_packing.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_packing.c index 1cef3bec1dc7..e304b6204bd9 100644 --- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_packing.c +++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_packing.c @@ -29,7 +29,7 @@ struct non_packed_fields { struct nested_packed { char: 4; int a: 4; - long int b; + long b; struct { char c; int d; @@ -44,7 +44,7 @@ union union_is_never_packed { union union_does_not_need_packing { struct { - long int a; + long a; int b; } __attribute__((packed)); int c; diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c index 35c512818a56..f2661c8d2d90 100644 --- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c +++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c @@ -9,7 +9,7 @@ /* ----- START-EXPECTED-OUTPUT ----- */ struct padded_implicitly { int a; - long int b; + long b; char c; }; diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c index 8aaa24a00322..1c7105fcae3c 100644 --- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c +++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c @@ -189,7 +189,7 @@ struct struct_with_embedded_stuff { const char *d; } e; union { - volatile long int f; + volatile long f; void * restrict g; }; }; diff --git a/tools/testing/selftests/bpf/progs/cgroup_skb_sk_lookup_kern.c b/tools/testing/selftests/bpf/progs/cgroup_skb_sk_lookup_kern.c index 3f757e30d7a0..88638315c582 100644 --- a/tools/testing/selftests/bpf/progs/cgroup_skb_sk_lookup_kern.c +++ b/tools/testing/selftests/bpf/progs/cgroup_skb_sk_lookup_kern.c @@ -14,7 +14,6 @@ #include <sys/types.h> #include <sys/socket.h> -int _version SEC("version") = 1; char _license[] SEC("license") = "GPL"; __u16 g_serv_port = 0; diff --git a/tools/testing/selftests/bpf/progs/connect4_dropper.c b/tools/testing/selftests/bpf/progs/connect4_dropper.c index b565d997810a..d3f4c5e4fb69 100644 --- a/tools/testing/selftests/bpf/progs/connect4_dropper.c +++ b/tools/testing/selftests/bpf/progs/connect4_dropper.c @@ -18,7 +18,7 @@ int connect_v4_dropper(struct bpf_sock_addr *ctx) { if (ctx->type != SOCK_STREAM) return VERDICT_PROCEED; - if (ctx->user_port == bpf_htons(60123)) + if (ctx->user_port == bpf_htons(60120)) return VERDICT_REJECT; return VERDICT_PROCEED; } diff --git a/tools/testing/selftests/bpf/progs/connect4_prog.c b/tools/testing/selftests/bpf/progs/connect4_prog.c index a943d394fd3a..b241932911db 100644 --- a/tools/testing/selftests/bpf/progs/connect4_prog.c +++ b/tools/testing/selftests/bpf/progs/connect4_prog.c @@ -31,8 +31,6 @@ #define IFNAMSIZ 16 #endif -int _version SEC("version") = 1; - __attribute__ ((noinline)) int do_bind(struct bpf_sock_addr *ctx) { diff --git a/tools/testing/selftests/bpf/progs/connect6_prog.c b/tools/testing/selftests/bpf/progs/connect6_prog.c index 506d0f81a375..40266d2c737c 100644 --- a/tools/testing/selftests/bpf/progs/connect6_prog.c +++ b/tools/testing/selftests/bpf/progs/connect6_prog.c @@ -24,8 +24,6 @@ #define DST_REWRITE_PORT6 6666 -int _version SEC("version") = 1; - SEC("cgroup/connect6") int connect_v6_prog(struct bpf_sock_addr *ctx) { diff --git a/tools/testing/selftests/bpf/progs/connect_force_port4.c b/tools/testing/selftests/bpf/progs/connect_force_port4.c index a979aaef2a76..27a632dd382e 100644 --- a/tools/testing/selftests/bpf/progs/connect_force_port4.c +++ b/tools/testing/selftests/bpf/progs/connect_force_port4.c @@ -13,7 +13,6 @@ #include <bpf_sockopt_helpers.h> char _license[] SEC("license") = "GPL"; -int _version SEC("version") = 1; struct svc_addr { __be32 addr; diff --git a/tools/testing/selftests/bpf/progs/connect_force_port6.c b/tools/testing/selftests/bpf/progs/connect_force_port6.c index afc8f1c5a9d6..19cad93e612f 100644 --- a/tools/testing/selftests/bpf/progs/connect_force_port6.c +++ b/tools/testing/selftests/bpf/progs/connect_force_port6.c @@ -12,7 +12,6 @@ #include <bpf_sockopt_helpers.h> char _license[] SEC("license") = "GPL"; -int _version SEC("version") = 1; struct svc_addr { __be32 addr[4]; diff --git a/tools/testing/selftests/bpf/progs/dev_cgroup.c b/tools/testing/selftests/bpf/progs/dev_cgroup.c index 8924e06bdef0..79b54a4fa244 100644 --- a/tools/testing/selftests/bpf/progs/dev_cgroup.c +++ b/tools/testing/selftests/bpf/progs/dev_cgroup.c @@ -57,4 +57,3 @@ int bpf_prog1(struct bpf_cgroup_dev_ctx *ctx) } char _license[] SEC("license") = "GPL"; -__u32 _version SEC("version") = LINUX_VERSION_CODE; diff --git a/tools/testing/selftests/bpf/progs/dummy_st_ops.c b/tools/testing/selftests/bpf/progs/dummy_st_ops.c new file mode 100644 index 000000000000..ead87edb75e2 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/dummy_st_ops.c @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2021. Huawei Technologies Co., Ltd */ +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +struct bpf_dummy_ops_state { + int val; +} __attribute__((preserve_access_index)); + +struct bpf_dummy_ops { + int (*test_1)(struct bpf_dummy_ops_state *state); + int (*test_2)(struct bpf_dummy_ops_state *state, int a1, unsigned short a2, + char a3, unsigned long a4); +}; + +char _license[] SEC("license") = "GPL"; + +SEC("struct_ops/test_1") +int BPF_PROG(test_1, struct bpf_dummy_ops_state *state) +{ + int ret; + + if (!state) + return 0xf2f3f4f5; + + ret = state->val; + state->val = 0x5a; + return ret; +} + +__u64 test_2_args[5]; + +SEC("struct_ops/test_2") +int BPF_PROG(test_2, struct bpf_dummy_ops_state *state, int a1, unsigned short a2, + char a3, unsigned long a4) +{ + test_2_args[0] = (unsigned long)state; + test_2_args[1] = a1; + test_2_args[2] = a2; + test_2_args[3] = a3; + test_2_args[4] = a4; + return 0; +} + +SEC(".struct_ops") +struct bpf_dummy_ops dummy_1 = { + .test_1 = (void *)test_1, + .test_2 = (void *)test_2, +}; diff --git a/tools/testing/selftests/bpf/progs/fexit_sleep.c b/tools/testing/selftests/bpf/progs/fexit_sleep.c index 03a672d76353..bca92c9bd29a 100644 --- a/tools/testing/selftests/bpf/progs/fexit_sleep.c +++ b/tools/testing/selftests/bpf/progs/fexit_sleep.c @@ -13,7 +13,7 @@ int fexit_cnt = 0; SEC("fentry/__x64_sys_nanosleep") int BPF_PROG(nanosleep_fentry, const struct pt_regs *regs) { - if ((int)bpf_get_current_pid_tgid() != pid) + if (bpf_get_current_pid_tgid() >> 32 != pid) return 0; fentry_cnt++; @@ -23,7 +23,7 @@ int BPF_PROG(nanosleep_fentry, const struct pt_regs *regs) SEC("fexit/__x64_sys_nanosleep") int BPF_PROG(nanosleep_fexit, const struct pt_regs *regs, int ret) { - if ((int)bpf_get_current_pid_tgid() != pid) + if (bpf_get_current_pid_tgid() >> 32 != pid) return 0; fexit_cnt++; diff --git a/tools/testing/selftests/bpf/progs/get_cgroup_id_kern.c b/tools/testing/selftests/bpf/progs/get_cgroup_id_kern.c index 6b42db2fe391..68587b1de34e 100644 --- a/tools/testing/selftests/bpf/progs/get_cgroup_id_kern.c +++ b/tools/testing/selftests/bpf/progs/get_cgroup_id_kern.c @@ -37,4 +37,3 @@ int trace(void *ctx) } char _license[] SEC("license") = "GPL"; -__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */ diff --git a/tools/testing/selftests/bpf/progs/map_ptr_kern.c b/tools/testing/selftests/bpf/progs/map_ptr_kern.c index d1d304c980f0..b1b711d9b214 100644 --- a/tools/testing/selftests/bpf/progs/map_ptr_kern.c +++ b/tools/testing/selftests/bpf/progs/map_ptr_kern.c @@ -683,5 +683,4 @@ int cg_skb(void *ctx) return 1; } -__u32 _version SEC("version") = 1; char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/netcnt_prog.c b/tools/testing/selftests/bpf/progs/netcnt_prog.c index 43649bce4c54..f718b2c212dc 100644 --- a/tools/testing/selftests/bpf/progs/netcnt_prog.c +++ b/tools/testing/selftests/bpf/progs/netcnt_prog.c @@ -68,4 +68,3 @@ int bpf_nextcnt(struct __sk_buff *skb) } char _license[] SEC("license") = "GPL"; -__u32 _version SEC("version") = LINUX_VERSION_CODE; diff --git a/tools/testing/selftests/bpf/progs/recursion.c b/tools/testing/selftests/bpf/progs/recursion.c index 49f679375b9d..3c2423bb19e2 100644 --- a/tools/testing/selftests/bpf/progs/recursion.c +++ b/tools/testing/selftests/bpf/progs/recursion.c @@ -24,8 +24,8 @@ struct { int pass1 = 0; int pass2 = 0; -SEC("fentry/__htab_map_lookup_elem") -int BPF_PROG(on_lookup, struct bpf_map *map) +SEC("fentry/htab_map_delete_elem") +int BPF_PROG(on_delete, struct bpf_map *map) { int key = 0; @@ -35,10 +35,7 @@ int BPF_PROG(on_lookup, struct bpf_map *map) } if (map == (void *)&hash2) { pass2++; - /* htab_map_gen_lookup() will inline below call - * into direct call to __htab_map_lookup_elem() - */ - bpf_map_lookup_elem(&hash2, &key); + bpf_map_delete_elem(&hash2, &key); return 0; } diff --git a/tools/testing/selftests/bpf/progs/sendmsg4_prog.c b/tools/testing/selftests/bpf/progs/sendmsg4_prog.c index ac5abc34cde8..ea75a44cb7fc 100644 --- a/tools/testing/selftests/bpf/progs/sendmsg4_prog.c +++ b/tools/testing/selftests/bpf/progs/sendmsg4_prog.c @@ -18,8 +18,6 @@ #define DST_PORT 4040 #define DST_REWRITE_PORT4 4444 -int _version SEC("version") = 1; - SEC("cgroup/sendmsg4") int sendmsg_v4_prog(struct bpf_sock_addr *ctx) { diff --git a/tools/testing/selftests/bpf/progs/sendmsg6_prog.c b/tools/testing/selftests/bpf/progs/sendmsg6_prog.c index 24694b1a8d82..bf9b46b806f6 100644 --- a/tools/testing/selftests/bpf/progs/sendmsg6_prog.c +++ b/tools/testing/selftests/bpf/progs/sendmsg6_prog.c @@ -22,8 +22,6 @@ #define DST_REWRITE_PORT6 6666 -int _version SEC("version") = 1; - SEC("cgroup/sendmsg6") int sendmsg_v6_prog(struct bpf_sock_addr *ctx) { diff --git a/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c b/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c index ca283af80d4e..95d5b941bc1f 100644 --- a/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c +++ b/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c @@ -2,8 +2,6 @@ #include <bpf/bpf_helpers.h> #include <bpf/bpf_endian.h> -int _version SEC("version") = 1; - SEC("sk_skb1") int bpf_prog1(struct __sk_buff *skb) { diff --git a/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c b/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c index eeaf6e75c9a2..80632954c5a1 100644 --- a/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c +++ b/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c @@ -3,8 +3,6 @@ #include <bpf/bpf_helpers.h> #include <bpf/bpf_endian.h> -int _version SEC("version") = 1; - SEC("sk_msg1") int bpf_prog1(struct sk_msg_md *msg) { diff --git a/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c b/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c index 73872c535cbb..e2468a6d01a5 100644 --- a/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c +++ b/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c @@ -2,8 +2,6 @@ #include <bpf/bpf_helpers.h> #include <bpf/bpf_endian.h> -int _version SEC("version") = 1; - struct { __uint(type, BPF_MAP_TYPE_SOCKMAP); __uint(max_entries, 20); diff --git a/tools/testing/selftests/bpf/progs/sockopt_inherit.c b/tools/testing/selftests/bpf/progs/sockopt_inherit.c index c6d428a8d785..9fb241b97291 100644 --- a/tools/testing/selftests/bpf/progs/sockopt_inherit.c +++ b/tools/testing/selftests/bpf/progs/sockopt_inherit.c @@ -3,7 +3,6 @@ #include <bpf/bpf_helpers.h> char _license[] SEC("license") = "GPL"; -__u32 _version SEC("version") = 1; #define SOL_CUSTOM 0xdeadbeef #define CUSTOM_INHERIT1 0 diff --git a/tools/testing/selftests/bpf/progs/strobemeta.h b/tools/testing/selftests/bpf/progs/strobemeta.h index 7de534f38c3f..60c93aee2f4a 100644 --- a/tools/testing/selftests/bpf/progs/strobemeta.h +++ b/tools/testing/selftests/bpf/progs/strobemeta.h @@ -358,7 +358,7 @@ static __always_inline uint64_t read_str_var(struct strobemeta_cfg *cfg, void *payload) { void *location; - uint32_t len; + uint64_t len; data->str_lens[idx] = 0; location = calc_location(&cfg->str_locs[idx], tls_base); @@ -390,7 +390,7 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg, struct strobe_map_descr* descr = &data->map_descrs[idx]; struct strobe_map_raw map; void *location; - uint32_t len; + uint64_t len; int i; descr->tag_len = 0; /* presume no tag is set */ diff --git a/tools/testing/selftests/bpf/progs/tag.c b/tools/testing/selftests/bpf/progs/tag.c index b46b1bfac7da..1792f4eda095 100644 --- a/tools/testing/selftests/bpf/progs/tag.c +++ b/tools/testing/selftests/bpf/progs/tag.c @@ -8,9 +8,9 @@ #define __has_attribute(x) 0 #endif -#if __has_attribute(btf_tag) -#define __tag1 __attribute__((btf_tag("tag1"))) -#define __tag2 __attribute__((btf_tag("tag2"))) +#if __has_attribute(btf_decl_tag) +#define __tag1 __attribute__((btf_decl_tag("tag1"))) +#define __tag2 __attribute__((btf_decl_tag("tag2"))) volatile const bool skip_tests __tag1 __tag2 = false; #else #define __tag1 @@ -24,18 +24,23 @@ struct key_t { int c; } __tag1 __tag2; +typedef struct { + int a; + int b; +} value_t __tag1 __tag2; + struct { __uint(type, BPF_MAP_TYPE_HASH); __uint(max_entries, 3); __type(key, struct key_t); - __type(value, __u64); + __type(value, value_t); } hashmap1 SEC(".maps"); static __noinline int foo(int x __tag1 __tag2) __tag1 __tag2 { struct key_t key; - __u64 val = 1; + value_t val = {}; key.a = key.b = key.c = x; bpf_map_update_elem(&hashmap1, &key, &val, 0); diff --git a/tools/testing/selftests/bpf/progs/tcp_rtt.c b/tools/testing/selftests/bpf/progs/tcp_rtt.c index 0cb3204ddb18..0988d79f1587 100644 --- a/tools/testing/selftests/bpf/progs/tcp_rtt.c +++ b/tools/testing/selftests/bpf/progs/tcp_rtt.c @@ -3,7 +3,6 @@ #include <bpf/bpf_helpers.h> char _license[] SEC("license") = "GPL"; -__u32 _version SEC("version") = 1; struct tcp_rtt_storage { __u32 invoked; diff --git a/tools/testing/selftests/bpf/progs/test_btf_haskv.c b/tools/testing/selftests/bpf/progs/test_btf_haskv.c index 31538c9ed193..160ead6c67b2 100644 --- a/tools/testing/selftests/bpf/progs/test_btf_haskv.c +++ b/tools/testing/selftests/bpf/progs/test_btf_haskv.c @@ -4,8 +4,6 @@ #include <bpf/bpf_helpers.h> #include "bpf_legacy.h" -int _version SEC("version") = 1; - struct ipv_counts { unsigned int v4; unsigned int v6; diff --git a/tools/testing/selftests/bpf/progs/test_btf_newkv.c b/tools/testing/selftests/bpf/progs/test_btf_newkv.c index 6c5560162746..1884a5bd10f5 100644 --- a/tools/testing/selftests/bpf/progs/test_btf_newkv.c +++ b/tools/testing/selftests/bpf/progs/test_btf_newkv.c @@ -4,8 +4,6 @@ #include <bpf/bpf_helpers.h> #include "bpf_legacy.h" -int _version SEC("version") = 1; - struct ipv_counts { unsigned int v4; unsigned int v6; diff --git a/tools/testing/selftests/bpf/progs/test_btf_nokv.c b/tools/testing/selftests/bpf/progs/test_btf_nokv.c index 506da7fd2da2..15e0f9945fe4 100644 --- a/tools/testing/selftests/bpf/progs/test_btf_nokv.c +++ b/tools/testing/selftests/bpf/progs/test_btf_nokv.c @@ -3,8 +3,6 @@ #include <linux/bpf.h> #include <bpf/bpf_helpers.h> -int _version SEC("version") = 1; - struct ipv_counts { unsigned int v4; unsigned int v6; diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_mods.c b/tools/testing/selftests/bpf/progs/test_core_reloc_mods.c index 8b533db4a7a5..b2ded497572a 100644 --- a/tools/testing/selftests/bpf/progs/test_core_reloc_mods.c +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_mods.c @@ -42,7 +42,16 @@ struct core_reloc_mods { core_reloc_mods_substruct_t h; }; +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ #define CORE_READ(dst, src) bpf_core_read(dst, sizeof(*(dst)), src) +#else +#define CORE_READ(dst, src) ({ \ + int __sz = sizeof(*(dst)) < sizeof(*(src)) ? sizeof(*(dst)) : \ + sizeof(*(src)); \ + bpf_core_read((char *)(dst) + sizeof(*(dst)) - __sz, __sz, \ + (const char *)(src) + sizeof(*(src)) - __sz); \ +}) +#endif SEC("raw_tracepoint/sys_enter") int test_core_mods(void *ctx) diff --git a/tools/testing/selftests/bpf/progs/test_enable_stats.c b/tools/testing/selftests/bpf/progs/test_enable_stats.c index 01a002ade529..1705097d01d7 100644 --- a/tools/testing/selftests/bpf/progs/test_enable_stats.c +++ b/tools/testing/selftests/bpf/progs/test_enable_stats.c @@ -13,6 +13,6 @@ __u64 count = 0; SEC("raw_tracepoint/sys_enter") int test_enable_stats(void *ctx) { - count += 1; + __sync_fetch_and_add(&count, 1); return 0; } diff --git a/tools/testing/selftests/bpf/progs/test_ksyms_module.c b/tools/testing/selftests/bpf/progs/test_ksyms_module.c index d6a0b3086b90..0650d918c096 100644 --- a/tools/testing/selftests/bpf/progs/test_ksyms_module.c +++ b/tools/testing/selftests/bpf/progs/test_ksyms_module.c @@ -2,24 +2,48 @@ /* Copyright (c) 2021 Facebook */ #include "vmlinux.h" - #include <bpf/bpf_helpers.h> +#define X_0(x) +#define X_1(x) x X_0(x) +#define X_2(x) x X_1(x) +#define X_3(x) x X_2(x) +#define X_4(x) x X_3(x) +#define X_5(x) x X_4(x) +#define X_6(x) x X_5(x) +#define X_7(x) x X_6(x) +#define X_8(x) x X_7(x) +#define X_9(x) x X_8(x) +#define X_10(x) x X_9(x) +#define REPEAT_256(Y) X_2(X_10(X_10(Y))) X_5(X_10(Y)) X_6(Y) + extern const int bpf_testmod_ksym_percpu __ksym; +extern void bpf_testmod_test_mod_kfunc(int i) __ksym; +extern void bpf_testmod_invalid_mod_kfunc(void) __ksym __weak; -int out_mod_ksym_global = 0; -bool triggered = false; +int out_bpf_testmod_ksym = 0; +const volatile int x = 0; -SEC("raw_tp/sys_enter") -int handler(const void *ctx) +SEC("tc") +int load(struct __sk_buff *skb) { - int *val; - __u32 cpu; - - val = (int *)bpf_this_cpu_ptr(&bpf_testmod_ksym_percpu); - out_mod_ksym_global = *val; - triggered = true; + /* This will be kept by clang, but removed by verifier. Since it is + * marked as __weak, libbpf and gen_loader don't error out if BTF ID + * is not found for it, instead imm and off is set to 0 for it. + */ + if (x) + bpf_testmod_invalid_mod_kfunc(); + bpf_testmod_test_mod_kfunc(42); + out_bpf_testmod_ksym = *(int *)bpf_this_cpu_ptr(&bpf_testmod_ksym_percpu); + return 0; +} +SEC("tc") +int load_256(struct __sk_buff *skb) +{ + /* this will fail if kfunc doesn't reuse its own btf fd index */ + REPEAT_256(bpf_testmod_test_mod_kfunc(42);); + bpf_testmod_test_mod_kfunc(42); return 0; } diff --git a/tools/testing/selftests/bpf/progs/test_ksyms_weak.c b/tools/testing/selftests/bpf/progs/test_ksyms_weak.c index 5f8379aadb29..8eadbd4caf7a 100644 --- a/tools/testing/selftests/bpf/progs/test_ksyms_weak.c +++ b/tools/testing/selftests/bpf/progs/test_ksyms_weak.c @@ -38,7 +38,7 @@ int pass_handler(const void *ctx) /* tests existing symbols. */ rq = (struct rq *)bpf_per_cpu_ptr(&runqueues, 0); if (rq) - out__existing_typed = rq->cpu; + out__existing_typed = 0; out__existing_typeless = (__u64)&bpf_prog_active; /* tests non-existent symbols. */ diff --git a/tools/testing/selftests/bpf/progs/test_l4lb.c b/tools/testing/selftests/bpf/progs/test_l4lb.c index 33493911d87a..04fee08863cb 100644 --- a/tools/testing/selftests/bpf/progs/test_l4lb.c +++ b/tools/testing/selftests/bpf/progs/test_l4lb.c @@ -21,8 +21,6 @@ #include "test_iptunnel_common.h" #include <bpf/bpf_endian.h> -int _version SEC("version") = 1; - static inline __u32 rol32(__u32 word, unsigned int shift) { return (word << shift) | (word >> ((-shift) & 31)); diff --git a/tools/testing/selftests/bpf/progs/test_map_in_map.c b/tools/testing/selftests/bpf/progs/test_map_in_map.c index a6d91932dcd5..f416032ba858 100644 --- a/tools/testing/selftests/bpf/progs/test_map_in_map.c +++ b/tools/testing/selftests/bpf/progs/test_map_in_map.c @@ -47,5 +47,4 @@ int xdp_mimtest0(struct xdp_md *ctx) return XDP_PASS; } -int _version SEC("version") = 1; char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_module_attach.c b/tools/testing/selftests/bpf/progs/test_module_attach.c index bd37ceec5587..b36857093f71 100644 --- a/tools/testing/selftests/bpf/progs/test_module_attach.c +++ b/tools/testing/selftests/bpf/progs/test_module_attach.c @@ -27,6 +27,20 @@ int BPF_PROG(handle_raw_tp_bare, return 0; } +int raw_tp_writable_bare_in_val = 0; +int raw_tp_writable_bare_early_ret = 0; +int raw_tp_writable_bare_out_val = 0; + +SEC("raw_tp.w/bpf_testmod_test_writable_bare") +int BPF_PROG(handle_raw_tp_writable_bare, + struct bpf_testmod_test_writable_ctx *writable) +{ + raw_tp_writable_bare_in_val = writable->val; + writable->early_ret = raw_tp_writable_bare_early_ret; + writable->val = raw_tp_writable_bare_out_val; + return 0; +} + __u32 tp_btf_read_sz = 0; SEC("tp_btf/bpf_testmod_test_read") diff --git a/tools/testing/selftests/bpf/progs/test_perf_buffer.c b/tools/testing/selftests/bpf/progs/test_perf_buffer.c index d37ce29fd393..17d5b67744d5 100644 --- a/tools/testing/selftests/bpf/progs/test_perf_buffer.c +++ b/tools/testing/selftests/bpf/progs/test_perf_buffer.c @@ -7,6 +7,13 @@ #include <bpf/bpf_tracing.h> struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, int); + __type(value, int); + __uint(max_entries, 1); +} my_pid_map SEC(".maps"); + +struct { __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); __type(key, int); __type(value, int); @@ -15,11 +22,20 @@ struct { SEC("tp/raw_syscalls/sys_enter") int handle_sys_enter(void *ctx) { + int zero = 0, *my_pid, cur_pid; int cpu = bpf_get_smp_processor_id(); + my_pid = bpf_map_lookup_elem(&my_pid_map, &zero); + if (!my_pid) + return 1; + + cur_pid = bpf_get_current_pid_tgid() >> 32; + if (cur_pid != *my_pid) + return 1; + bpf_perf_event_output(ctx, &perf_buf_map, BPF_F_CURRENT_CPU, &cpu, sizeof(cpu)); - return 0; + return 1; } char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_pinning.c b/tools/testing/selftests/bpf/progs/test_pinning.c index 4ef2630292b2..0facea6cbbae 100644 --- a/tools/testing/selftests/bpf/progs/test_pinning.c +++ b/tools/testing/selftests/bpf/progs/test_pinning.c @@ -3,8 +3,6 @@ #include <linux/bpf.h> #include <bpf/bpf_helpers.h> -int _version SEC("version") = 1; - struct { __uint(type, BPF_MAP_TYPE_ARRAY); __uint(max_entries, 1); diff --git a/tools/testing/selftests/bpf/progs/test_pinning_invalid.c b/tools/testing/selftests/bpf/progs/test_pinning_invalid.c index 5412e0c732c7..2a56db1094b8 100644 --- a/tools/testing/selftests/bpf/progs/test_pinning_invalid.c +++ b/tools/testing/selftests/bpf/progs/test_pinning_invalid.c @@ -3,8 +3,6 @@ #include <linux/bpf.h> #include <bpf/bpf_helpers.h> -int _version SEC("version") = 1; - struct { __uint(type, BPF_MAP_TYPE_ARRAY); __uint(max_entries, 1); diff --git a/tools/testing/selftests/bpf/progs/test_pkt_access.c b/tools/testing/selftests/bpf/progs/test_pkt_access.c index 3cfd88141ddc..0558544e1ff0 100644 --- a/tools/testing/selftests/bpf/progs/test_pkt_access.c +++ b/tools/testing/selftests/bpf/progs/test_pkt_access.c @@ -15,7 +15,6 @@ #include <bpf/bpf_endian.h> #define barrier() __asm__ __volatile__("": : :"memory") -int _version SEC("version") = 1; /* llvm will optimize both subprograms into exactly the same BPF assembly * diff --git a/tools/testing/selftests/bpf/progs/test_queue_stack_map.h b/tools/testing/selftests/bpf/progs/test_queue_stack_map.h index 4dd9806ad73b..0fcd3ff0e38a 100644 --- a/tools/testing/selftests/bpf/progs/test_queue_stack_map.h +++ b/tools/testing/selftests/bpf/progs/test_queue_stack_map.h @@ -8,8 +8,6 @@ #include <linux/pkt_cls.h> #include <bpf/bpf_helpers.h> -int _version SEC("version") = 1; - struct { __uint(type, MAP_TYPE); __uint(max_entries, 32); diff --git a/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c b/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c index 0f9bc258225e..7d56ed47cd4d 100644 --- a/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c +++ b/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c @@ -15,8 +15,6 @@ #include <bpf/bpf_helpers.h> #include "test_select_reuseport_common.h" -int _version SEC("version") = 1; - #ifndef offsetof #define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER) #endif diff --git a/tools/testing/selftests/bpf/progs/test_sk_lookup.c b/tools/testing/selftests/bpf/progs/test_sk_lookup.c index 48534d810391..19d2465d9442 100644 --- a/tools/testing/selftests/bpf/progs/test_sk_lookup.c +++ b/tools/testing/selftests/bpf/progs/test_sk_lookup.c @@ -644,4 +644,3 @@ int multi_prog_redir2(struct bpf_sk_lookup *ctx) } char _license[] SEC("license") = "Dual BSD/GPL"; -__u32 _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c b/tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c index 552f2090665c..c304cd5b8cad 100644 --- a/tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c +++ b/tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c @@ -42,6 +42,4 @@ int log_cgroup_id(struct __sk_buff *skb) return TC_ACT_OK; } -int _version SEC("version") = 1; - char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_skb_ctx.c b/tools/testing/selftests/bpf/progs/test_skb_ctx.c index ba4dab09d19c..1d61b36e6067 100644 --- a/tools/testing/selftests/bpf/progs/test_skb_ctx.c +++ b/tools/testing/selftests/bpf/progs/test_skb_ctx.c @@ -3,7 +3,6 @@ #include <linux/bpf.h> #include <bpf/bpf_helpers.h> -int _version SEC("version") = 1; char _license[] SEC("license") = "GPL"; SEC("skb_ctx") diff --git a/tools/testing/selftests/bpf/progs/test_skc_to_unix_sock.c b/tools/testing/selftests/bpf/progs/test_skc_to_unix_sock.c new file mode 100644 index 000000000000..a408ec95cba4 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_skc_to_unix_sock.c @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2021 Hengqi Chen */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "bpf_tracing_net.h" + +const volatile pid_t my_pid = 0; +char path[256] = {}; + +SEC("fentry/unix_listen") +int BPF_PROG(unix_listen, struct socket *sock, int backlog) +{ + pid_t pid = bpf_get_current_pid_tgid() >> 32; + struct unix_sock *unix_sk; + int i, len; + + if (pid != my_pid) + return 0; + + unix_sk = (struct unix_sock *)bpf_skc_to_unix_sock(sock->sk); + if (!unix_sk) + return 0; + + if (!UNIX_ABSTRACT(unix_sk)) + return 0; + + len = unix_sk->addr->len - sizeof(short); + path[0] = '@'; + for (i = 1; i < len; i++) { + if (i >= sizeof(struct sockaddr_un)) + break; + + path[i] = unix_sk->addr->name->sun_path[i]; + } + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_skeleton.c b/tools/testing/selftests/bpf/progs/test_skeleton.c index 441fa1c552c8..1b1187d2967b 100644 --- a/tools/testing/selftests/bpf/progs/test_skeleton.c +++ b/tools/testing/selftests/bpf/progs/test_skeleton.c @@ -5,6 +5,8 @@ #include <linux/bpf.h> #include <bpf/bpf_helpers.h> +#define __read_mostly SEC(".data.read_mostly") + struct s { int a; long long b; @@ -40,9 +42,20 @@ int kern_ver = 0; struct s out5 = {}; + +const volatile int in_dynarr_sz SEC(".rodata.dyn"); +const volatile int in_dynarr[4] SEC(".rodata.dyn") = { -1, -2, -3, -4 }; + +int out_dynarr[4] SEC(".data.dyn") = { 1, 2, 3, 4 }; + +int read_mostly_var __read_mostly; +int out_mostly_var; + SEC("raw_tp/sys_enter") int handler(const void *ctx) { + int i; + out1 = in1; out2 = in2; out3 = in3; @@ -53,6 +66,11 @@ int handler(const void *ctx) bpf_syscall = CONFIG_BPF_SYSCALL; kern_ver = LINUX_KERNEL_VERSION; + for (i = 0; i < in_dynarr_sz; i++) + out_dynarr[i] = in_dynarr[i]; + + out_mostly_var = read_mostly_var; + return 0; } diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_kern.h b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h index 1858435de7aa..2966564b8497 100644 --- a/tools/testing/selftests/bpf/progs/test_sockmap_kern.h +++ b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h @@ -361,5 +361,4 @@ int bpf_prog10(struct sk_msg_md *msg) return SK_DROP; } -int _version SEC("version") = 1; char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_listen.c b/tools/testing/selftests/bpf/progs/test_sockmap_listen.c index 00f1456aaeda..325c9f193432 100644 --- a/tools/testing/selftests/bpf/progs/test_sockmap_listen.c +++ b/tools/testing/selftests/bpf/progs/test_sockmap_listen.c @@ -116,5 +116,4 @@ int prog_reuseport(struct sk_reuseport_md *reuse) return verdict; } -int _version SEC("version") = 1; char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c b/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c index 7449fdb1763b..36a707e7c7a7 100644 --- a/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c +++ b/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c @@ -73,4 +73,3 @@ int oncpu(struct random_urandom_args *args) } char _license[] SEC("license") = "GPL"; -__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */ diff --git a/tools/testing/selftests/bpf/progs/test_tcp_estats.c b/tools/testing/selftests/bpf/progs/test_tcp_estats.c index adc83a54c352..2c5c602c6011 100644 --- a/tools/testing/selftests/bpf/progs/test_tcp_estats.c +++ b/tools/testing/selftests/bpf/progs/test_tcp_estats.c @@ -255,4 +255,3 @@ int _dummy_tracepoint(struct dummy_tracepoint_args *arg) } char _license[] SEC("license") = "GPL"; -__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */ diff --git a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c index 94f50f7e94d6..3ded05280757 100644 --- a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c @@ -16,7 +16,6 @@ #include "test_tcpbpf.h" struct tcpbpf_globals global = {}; -int _version SEC("version") = 1; /** * SOL_TCP is defined in <netinet/tcp.h> while diff --git a/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c b/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c index 24e9344994ef..540181c115a8 100644 --- a/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c @@ -28,8 +28,6 @@ struct { __type(value, __u32); } perf_event_map SEC(".maps"); -int _version SEC("version") = 1; - SEC("sockops") int bpf_testcb(struct bpf_sock_ops *skops) { diff --git a/tools/testing/selftests/bpf/progs/test_tracepoint.c b/tools/testing/selftests/bpf/progs/test_tracepoint.c index 4b825ee122cf..ce6974016f53 100644 --- a/tools/testing/selftests/bpf/progs/test_tracepoint.c +++ b/tools/testing/selftests/bpf/progs/test_tracepoint.c @@ -23,4 +23,3 @@ int oncpu(struct sched_switch_args *ctx) } char _license[] SEC("license") = "GPL"; -__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */ diff --git a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c index e7b673117436..ef0dde83b85a 100644 --- a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c @@ -26,8 +26,6 @@ bpf_trace_printk(fmt, sizeof(fmt), __LINE__, ret); \ } while (0) -int _version SEC("version") = 1; - struct geneve_opt { __be16 opt_class; __u8 type; diff --git a/tools/testing/selftests/bpf/progs/test_xdp.c b/tools/testing/selftests/bpf/progs/test_xdp.c index e6aa2fc6ce6b..d7a9a74b7245 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp.c +++ b/tools/testing/selftests/bpf/progs/test_xdp.c @@ -20,8 +20,6 @@ #include <bpf/bpf_endian.h> #include "test_iptunnel_common.h" -int _version SEC("version") = 1; - struct { __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); __uint(max_entries, 256); diff --git a/tools/testing/selftests/bpf/progs/test_xdp_loop.c b/tools/testing/selftests/bpf/progs/test_xdp_loop.c index 27eb52dda92c..c98fb44156f0 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_loop.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_loop.c @@ -16,8 +16,6 @@ #include <bpf/bpf_endian.h> #include "test_iptunnel_common.h" -int _version SEC("version") = 1; - struct { __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); __uint(max_entries, 256); diff --git a/tools/testing/selftests/bpf/progs/test_xdp_redirect.c b/tools/testing/selftests/bpf/progs/test_xdp_redirect.c index a5337cd9400b..b778cad45485 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_redirect.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_redirect.c @@ -12,8 +12,6 @@ #include <linux/bpf.h> #include <bpf/bpf_helpers.h> -int _version SEC("version") = 1; - SEC("redirect_to_111") int xdp_redirect_to_111(struct xdp_md *xdp) { diff --git a/tools/testing/selftests/bpf/progs/twfw.c b/tools/testing/selftests/bpf/progs/twfw.c new file mode 100644 index 000000000000..de1b18a62b46 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/twfw.c @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ +#include <linux/types.h> +#include <bpf/bpf_helpers.h> +#include <linux/bpf.h> +#include <stdint.h> + +#define TWFW_MAX_TIERS (64) +/* + * load is successful + * #define TWFW_MAX_TIERS (64u)$ + */ + +struct twfw_tier_value { + unsigned long mask[1]; +}; + +struct rule { + uint8_t seqnum; +}; + +struct rules_map { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, __u32); + __type(value, struct rule); + __uint(max_entries, 1); +}; + +struct tiers_map { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, __u32); + __type(value, struct twfw_tier_value); + __uint(max_entries, 1); +}; + +struct rules_map rules SEC(".maps"); +struct tiers_map tiers SEC(".maps"); + +SEC("cgroup_skb/ingress") +int twfw_verifier(struct __sk_buff* skb) +{ + const uint32_t key = 0; + const struct twfw_tier_value* tier = bpf_map_lookup_elem(&tiers, &key); + if (!tier) + return 1; + + struct rule* rule = bpf_map_lookup_elem(&rules, &key); + if (!rule) + return 1; + + if (rule && rule->seqnum < TWFW_MAX_TIERS) { + /* rule->seqnum / 64 should always be 0 */ + unsigned long mask = tier->mask[rule->seqnum / 64]; + if (mask) + return 0; + } + return 1; +} diff --git a/tools/testing/selftests/bpf/test_bpftool_build.sh b/tools/testing/selftests/bpf/test_bpftool_build.sh index b03a87571592..1453a53ed547 100755 --- a/tools/testing/selftests/bpf/test_bpftool_build.sh +++ b/tools/testing/selftests/bpf/test_bpftool_build.sh @@ -90,6 +90,10 @@ echo -e "... through kbuild\n" if [ -f ".config" ] ; then make_and_clean tools/bpf + ## "make tools/bpf" sets $(OUTPUT) to ...tools/bpf/runqslower for + ## runqslower, but the default (used for the "clean" target) is .output. + ## Let's make sure we clean runqslower's directory properly. + make -C tools/bpf/runqslower OUTPUT=${KDIR_ROOT_DIR}/tools/bpf/runqslower/ clean ## $OUTPUT is overwritten in kbuild Makefile, and thus cannot be passed ## down from toplevel Makefile to bpftool's Makefile. diff --git a/tools/testing/selftests/bpf/test_btf.h b/tools/testing/selftests/bpf/test_btf.h index 0619e06d745e..32c7a57867da 100644 --- a/tools/testing/selftests/bpf/test_btf.h +++ b/tools/testing/selftests/bpf/test_btf.h @@ -69,7 +69,7 @@ #define BTF_TYPE_FLOAT_ENC(name, sz) \ BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_FLOAT, 0, 0), sz) -#define BTF_TAG_ENC(value, type, component_idx) \ - BTF_TYPE_ENC(value, BTF_INFO_ENC(BTF_KIND_TAG, 0, 0), type), (component_idx) +#define BTF_DECL_TAG_ENC(value, type, component_idx) \ + BTF_TYPE_ENC(value, BTF_INFO_ENC(BTF_KIND_DECL_TAG, 0, 0), type), (component_idx) #endif /* _TEST_BTF_H */ diff --git a/tools/testing/selftests/bpf/test_flow_dissector.sh b/tools/testing/selftests/bpf/test_flow_dissector.sh index 174b72a64a4c..dbd91221727d 100755 --- a/tools/testing/selftests/bpf/test_flow_dissector.sh +++ b/tools/testing/selftests/bpf/test_flow_dissector.sh @@ -26,22 +26,22 @@ if [[ -z $(ip netns identify $$) ]]; then type flow_dissector if ! unshare --net $bpftool prog attach pinned \ - /sys/fs/bpf/flow/flow_dissector flow_dissector; then + /sys/fs/bpf/flow/_dissect flow_dissector; then echo "Unexpected unsuccessful attach in namespace" >&2 err=1 fi - $bpftool prog attach pinned /sys/fs/bpf/flow/flow_dissector \ + $bpftool prog attach pinned /sys/fs/bpf/flow/_dissect \ flow_dissector if unshare --net $bpftool prog attach pinned \ - /sys/fs/bpf/flow/flow_dissector flow_dissector; then + /sys/fs/bpf/flow/_dissect flow_dissector; then echo "Unexpected successful attach in namespace" >&2 err=1 fi if ! $bpftool prog detach pinned \ - /sys/fs/bpf/flow/flow_dissector flow_dissector; then + /sys/fs/bpf/flow/_dissect flow_dissector; then echo "Failed to detach flow dissector" >&2 err=1 fi @@ -95,7 +95,7 @@ else fi # Attach BPF program -./flow_dissector_load -p bpf_flow.o -s flow_dissector +./flow_dissector_load -p bpf_flow.o -s _dissect # Setup tc qdisc add dev lo ingress diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 2ed01f615d20..c65986bd9d07 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -12,6 +12,11 @@ #include <string.h> #include <execinfo.h> /* backtrace */ #include <linux/membarrier.h> +#include <sys/sysinfo.h> /* get_nprocs */ +#include <netinet/in.h> +#include <sys/select.h> +#include <sys/socket.h> +#include <sys/un.h> /* Adapted from perf/util/string.c */ static bool glob_match(const char *str, const char *pat) @@ -45,9 +50,12 @@ struct prog_test_def { const char *test_name; int test_num; void (*run_test)(void); + void (*run_serial_test)(void); bool force_log; int error_cnt; int skip_cnt; + int sub_succ_cnt; + bool should_run; bool tested; bool need_cgroup_cleanup; @@ -97,6 +105,10 @@ static void dump_test_log(const struct prog_test_def *test, bool failed) if (stdout == env.stdout) return; + /* worker always holds log */ + if (env.worker_id != -1) + return; + fflush(stdout); /* exports env.log_buf & env.log_cnt */ if (env.verbosity > VERBOSE_NONE || test->force_log || failed) { @@ -107,8 +119,6 @@ static void dump_test_log(const struct prog_test_def *test, bool failed) fprintf(env.stdout, "\n"); } } - - fseeko(stdout, 0, SEEK_SET); /* rewind */ } static void skip_account(void) @@ -124,7 +134,8 @@ static void stdio_restore(void); /* A bunch of tests set custom affinity per-thread and/or per-process. Reset * it after each test/sub-test. */ -static void reset_affinity() { +static void reset_affinity(void) +{ cpu_set_t cpuset; int i, err; @@ -165,21 +176,21 @@ static void restore_netns(void) } } -void test__end_subtest() +void test__end_subtest(void) { struct prog_test_def *test = env.test; int sub_error_cnt = test->error_cnt - test->old_error_cnt; dump_test_log(test, sub_error_cnt); - fprintf(env.stdout, "#%d/%d %s/%s:%s\n", + fprintf(stdout, "#%d/%d %s/%s:%s\n", test->test_num, test->subtest_num, test->test_name, test->subtest_name, sub_error_cnt ? "FAIL" : (test->skip_cnt ? "SKIP" : "OK")); if (sub_error_cnt) - env.fail_cnt++; + test->error_cnt++; else if (test->skip_cnt == 0) - env.sub_succ_cnt++; + test->sub_succ_cnt++; skip_account(); free(test->subtest_name); @@ -217,7 +228,8 @@ bool test__start_subtest(const char *name) return true; } -void test__force_log() { +void test__force_log(void) +{ env.test->force_log = true; } @@ -370,7 +382,7 @@ int extract_build_id(char *build_id, size_t size) if (getline(&line, &len, fp) == -1) goto err; - fclose(fp); + pclose(fp); if (len > size) len = size; @@ -379,7 +391,7 @@ int extract_build_id(char *build_id, size_t size) free(line); return 0; err: - fclose(fp); + pclose(fp); return -1; } @@ -446,14 +458,17 @@ static int load_bpf_testmod(void) } /* extern declarations for test funcs */ -#define DEFINE_TEST(name) extern void test_##name(void); +#define DEFINE_TEST(name) \ + extern void test_##name(void) __weak; \ + extern void serial_test_##name(void) __weak; #include <prog_tests/tests.h> #undef DEFINE_TEST static struct prog_test_def prog_test_defs[] = { -#define DEFINE_TEST(name) { \ - .test_name = #name, \ - .run_test = &test_##name, \ +#define DEFINE_TEST(name) { \ + .test_name = #name, \ + .run_test = &test_##name, \ + .run_serial_test = &serial_test_##name, \ }, #include <prog_tests/tests.h> #undef DEFINE_TEST @@ -474,6 +489,8 @@ enum ARG_KEYS { ARG_LIST_TEST_NAMES = 'l', ARG_TEST_NAME_GLOB_ALLOWLIST = 'a', ARG_TEST_NAME_GLOB_DENYLIST = 'd', + ARG_NUM_WORKERS = 'j', + ARG_DEBUG = -1, }; static const struct argp_option opts[] = { @@ -495,6 +512,10 @@ static const struct argp_option opts[] = { "Run tests with name matching the pattern (supports '*' wildcard)." }, { "deny", ARG_TEST_NAME_GLOB_DENYLIST, "NAMES", 0, "Don't run tests with name matching the pattern (supports '*' wildcard)." }, + { "workers", ARG_NUM_WORKERS, "WORKERS", OPTION_ARG_OPTIONAL, + "Number of workers to run in parallel, default to number of cpus." }, + { "debug", ARG_DEBUG, NULL, 0, + "print extra debug information for test_progs." }, {}, }; @@ -650,7 +671,7 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) fprintf(stderr, "Unable to setenv SELFTESTS_VERBOSE=1 (errno=%d)", errno); - return -1; + return -EINVAL; } } @@ -661,6 +682,20 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) case ARG_LIST_TEST_NAMES: env->list_test_names = true; break; + case ARG_NUM_WORKERS: + if (arg) { + env->workers = atoi(arg); + if (!env->workers) { + fprintf(stderr, "Invalid number of worker: %s.", arg); + return -EINVAL; + } + } else { + env->workers = get_nprocs(); + } + break; + case ARG_DEBUG: + env->debug = true; + break; case ARGP_KEY_ARG: argp_usage(state); break; @@ -678,7 +713,7 @@ static void stdio_hijack(void) env.stdout = stdout; env.stderr = stderr; - if (env.verbosity > VERBOSE_NONE) { + if (env.verbosity > VERBOSE_NONE && env.worker_id == -1) { /* nothing to do, output to stdout by default */ return; } @@ -704,10 +739,6 @@ static void stdio_restore(void) return; fclose(stdout); - free(env.log_buf); - - env.log_buf = NULL; - env.log_cnt = 0; stdout = env.stdout; stderr = env.stderr; @@ -747,7 +778,7 @@ int trigger_module_test_read(int read_sz) { int fd, err; - fd = open("/sys/kernel/bpf_testmod", O_RDONLY); + fd = open(BPF_TESTMOD_TEST_FILE, O_RDONLY); err = -errno; if (!ASSERT_GE(fd, 0, "testmod_file_open")) return err; @@ -769,7 +800,7 @@ int trigger_module_test_write(int write_sz) memset(buf, 'a', write_sz); buf[write_sz-1] = '\0'; - fd = open("/sys/kernel/bpf_testmod", O_WRONLY); + fd = open(BPF_TESTMOD_TEST_FILE, O_WRONLY); err = -errno; if (!ASSERT_GE(fd, 0, "testmod_file_open")) { free(buf); @@ -794,11 +825,498 @@ void crash_handler(int signum) dump_test_log(env.test, true); if (env.stdout) stdio_restore(); - + if (env.worker_id != -1) + fprintf(stderr, "[%d]: ", env.worker_id); fprintf(stderr, "Caught signal #%d!\nStack trace:\n", signum); backtrace_symbols_fd(bt, sz, STDERR_FILENO); } +static void sigint_handler(int signum) +{ + int i; + + for (i = 0; i < env.workers; i++) + if (env.worker_socks[i] > 0) + close(env.worker_socks[i]); +} + +static int current_test_idx; +static pthread_mutex_t current_test_lock; +static pthread_mutex_t stdout_output_lock; + +struct test_result { + int error_cnt; + int skip_cnt; + int sub_succ_cnt; + + size_t log_cnt; + char *log_buf; +}; + +static struct test_result test_results[ARRAY_SIZE(prog_test_defs)]; + +static inline const char *str_msg(const struct msg *msg, char *buf) +{ + switch (msg->type) { + case MSG_DO_TEST: + sprintf(buf, "MSG_DO_TEST %d", msg->do_test.test_num); + break; + case MSG_TEST_DONE: + sprintf(buf, "MSG_TEST_DONE %d (log: %d)", + msg->test_done.test_num, + msg->test_done.have_log); + break; + case MSG_TEST_LOG: + sprintf(buf, "MSG_TEST_LOG (cnt: %ld, last: %d)", + strlen(msg->test_log.log_buf), + msg->test_log.is_last); + break; + case MSG_EXIT: + sprintf(buf, "MSG_EXIT"); + break; + default: + sprintf(buf, "UNKNOWN"); + break; + } + + return buf; +} + +static int send_message(int sock, const struct msg *msg) +{ + char buf[256]; + + if (env.debug) + fprintf(stderr, "Sending msg: %s\n", str_msg(msg, buf)); + return send(sock, msg, sizeof(*msg), 0); +} + +static int recv_message(int sock, struct msg *msg) +{ + int ret; + char buf[256]; + + memset(msg, 0, sizeof(*msg)); + ret = recv(sock, msg, sizeof(*msg), 0); + if (ret >= 0) { + if (env.debug) + fprintf(stderr, "Received msg: %s\n", str_msg(msg, buf)); + } + return ret; +} + +static void run_one_test(int test_num) +{ + struct prog_test_def *test = &prog_test_defs[test_num]; + + env.test = test; + + if (test->run_test) + test->run_test(); + else if (test->run_serial_test) + test->run_serial_test(); + + /* ensure last sub-test is finalized properly */ + if (test->subtest_name) + test__end_subtest(); + + test->tested = true; + + dump_test_log(test, test->error_cnt); + + reset_affinity(); + restore_netns(); + if (test->need_cgroup_cleanup) + cleanup_cgroup_environment(); +} + +struct dispatch_data { + int worker_id; + int sock_fd; +}; + +static void *dispatch_thread(void *ctx) +{ + struct dispatch_data *data = ctx; + int sock_fd; + FILE *log_fd = NULL; + + sock_fd = data->sock_fd; + + while (true) { + int test_to_run = -1; + struct prog_test_def *test; + struct test_result *result; + + /* grab a test */ + { + pthread_mutex_lock(¤t_test_lock); + + if (current_test_idx >= prog_test_cnt) { + pthread_mutex_unlock(¤t_test_lock); + goto done; + } + + test = &prog_test_defs[current_test_idx]; + test_to_run = current_test_idx; + current_test_idx++; + + pthread_mutex_unlock(¤t_test_lock); + } + + if (!test->should_run || test->run_serial_test) + continue; + + /* run test through worker */ + { + struct msg msg_do_test; + + msg_do_test.type = MSG_DO_TEST; + msg_do_test.do_test.test_num = test_to_run; + if (send_message(sock_fd, &msg_do_test) < 0) { + perror("Fail to send command"); + goto done; + } + env.worker_current_test[data->worker_id] = test_to_run; + } + + /* wait for test done */ + { + int err; + struct msg msg_test_done; + + err = recv_message(sock_fd, &msg_test_done); + if (err < 0) + goto error; + if (msg_test_done.type != MSG_TEST_DONE) + goto error; + if (test_to_run != msg_test_done.test_done.test_num) + goto error; + + test->tested = true; + result = &test_results[test_to_run]; + + result->error_cnt = msg_test_done.test_done.error_cnt; + result->skip_cnt = msg_test_done.test_done.skip_cnt; + result->sub_succ_cnt = msg_test_done.test_done.sub_succ_cnt; + + /* collect all logs */ + if (msg_test_done.test_done.have_log) { + log_fd = open_memstream(&result->log_buf, &result->log_cnt); + if (!log_fd) + goto error; + + while (true) { + struct msg msg_log; + + if (recv_message(sock_fd, &msg_log) < 0) + goto error; + if (msg_log.type != MSG_TEST_LOG) + goto error; + + fprintf(log_fd, "%s", msg_log.test_log.log_buf); + if (msg_log.test_log.is_last) + break; + } + fclose(log_fd); + log_fd = NULL; + } + /* output log */ + { + pthread_mutex_lock(&stdout_output_lock); + + if (result->log_cnt) { + result->log_buf[result->log_cnt] = '\0'; + fprintf(stdout, "%s", result->log_buf); + if (result->log_buf[result->log_cnt - 1] != '\n') + fprintf(stdout, "\n"); + } + + fprintf(stdout, "#%d %s:%s\n", + test->test_num, test->test_name, + result->error_cnt ? "FAIL" : (result->skip_cnt ? "SKIP" : "OK")); + + pthread_mutex_unlock(&stdout_output_lock); + } + + } /* wait for test done */ + } /* while (true) */ +error: + if (env.debug) + fprintf(stderr, "[%d]: Protocol/IO error: %s.\n", data->worker_id, strerror(errno)); + + if (log_fd) + fclose(log_fd); +done: + { + struct msg msg_exit; + + msg_exit.type = MSG_EXIT; + if (send_message(sock_fd, &msg_exit) < 0) { + if (env.debug) + fprintf(stderr, "[%d]: send_message msg_exit: %s.\n", + data->worker_id, strerror(errno)); + } + } + return NULL; +} + +static void print_all_error_logs(void) +{ + int i; + + if (env.fail_cnt) + fprintf(stdout, "\nAll error logs:\n"); + + /* print error logs again */ + for (i = 0; i < prog_test_cnt; i++) { + struct prog_test_def *test; + struct test_result *result; + + test = &prog_test_defs[i]; + result = &test_results[i]; + + if (!test->tested || !result->error_cnt) + continue; + + fprintf(stdout, "\n#%d %s:%s\n", + test->test_num, test->test_name, + result->error_cnt ? "FAIL" : (result->skip_cnt ? "SKIP" : "OK")); + + if (result->log_cnt) { + result->log_buf[result->log_cnt] = '\0'; + fprintf(stdout, "%s", result->log_buf); + if (result->log_buf[result->log_cnt - 1] != '\n') + fprintf(stdout, "\n"); + } + } +} + +static int server_main(void) +{ + pthread_t *dispatcher_threads; + struct dispatch_data *data; + struct sigaction sigact_int = { + .sa_handler = sigint_handler, + .sa_flags = SA_RESETHAND, + }; + int i; + + sigaction(SIGINT, &sigact_int, NULL); + + dispatcher_threads = calloc(sizeof(pthread_t), env.workers); + data = calloc(sizeof(struct dispatch_data), env.workers); + + env.worker_current_test = calloc(sizeof(int), env.workers); + for (i = 0; i < env.workers; i++) { + int rc; + + data[i].worker_id = i; + data[i].sock_fd = env.worker_socks[i]; + rc = pthread_create(&dispatcher_threads[i], NULL, dispatch_thread, &data[i]); + if (rc < 0) { + perror("Failed to launch dispatcher thread"); + exit(EXIT_ERR_SETUP_INFRA); + } + } + + /* wait for all dispatcher to finish */ + for (i = 0; i < env.workers; i++) { + while (true) { + int ret = pthread_tryjoin_np(dispatcher_threads[i], NULL); + + if (!ret) { + break; + } else if (ret == EBUSY) { + if (env.debug) + fprintf(stderr, "Still waiting for thread %d (test %d).\n", + i, env.worker_current_test[i] + 1); + usleep(1000 * 1000); + continue; + } else { + fprintf(stderr, "Unexpected error joining dispatcher thread: %d", ret); + break; + } + } + } + free(dispatcher_threads); + free(env.worker_current_test); + free(data); + + /* run serial tests */ + save_netns(); + + for (int i = 0; i < prog_test_cnt; i++) { + struct prog_test_def *test = &prog_test_defs[i]; + struct test_result *result = &test_results[i]; + + if (!test->should_run || !test->run_serial_test) + continue; + + stdio_hijack(); + + run_one_test(i); + + stdio_restore(); + if (env.log_buf) { + result->log_cnt = env.log_cnt; + result->log_buf = strdup(env.log_buf); + + free(env.log_buf); + env.log_buf = NULL; + env.log_cnt = 0; + } + restore_netns(); + + fprintf(stdout, "#%d %s:%s\n", + test->test_num, test->test_name, + test->error_cnt ? "FAIL" : (test->skip_cnt ? "SKIP" : "OK")); + + result->error_cnt = test->error_cnt; + result->skip_cnt = test->skip_cnt; + result->sub_succ_cnt = test->sub_succ_cnt; + } + + /* generate summary */ + fflush(stderr); + fflush(stdout); + + for (i = 0; i < prog_test_cnt; i++) { + struct prog_test_def *current_test; + struct test_result *result; + + current_test = &prog_test_defs[i]; + result = &test_results[i]; + + if (!current_test->tested) + continue; + + env.succ_cnt += result->error_cnt ? 0 : 1; + env.skip_cnt += result->skip_cnt; + if (result->error_cnt) + env.fail_cnt++; + env.sub_succ_cnt += result->sub_succ_cnt; + } + + fprintf(stdout, "Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n", + env.succ_cnt, env.sub_succ_cnt, env.skip_cnt, env.fail_cnt); + + print_all_error_logs(); + + /* reap all workers */ + for (i = 0; i < env.workers; i++) { + int wstatus, pid; + + pid = waitpid(env.worker_pids[i], &wstatus, 0); + if (pid != env.worker_pids[i]) + perror("Unable to reap worker"); + } + + return 0; +} + +static int worker_main(int sock) +{ + save_netns(); + + while (true) { + /* receive command */ + struct msg msg; + + if (recv_message(sock, &msg) < 0) + goto out; + + switch (msg.type) { + case MSG_EXIT: + if (env.debug) + fprintf(stderr, "[%d]: worker exit.\n", + env.worker_id); + goto out; + case MSG_DO_TEST: { + int test_to_run; + struct prog_test_def *test; + struct msg msg_done; + + test_to_run = msg.do_test.test_num; + test = &prog_test_defs[test_to_run]; + + if (env.debug) + fprintf(stderr, "[%d]: #%d:%s running.\n", + env.worker_id, + test_to_run + 1, + test->test_name); + + stdio_hijack(); + + run_one_test(test_to_run); + + stdio_restore(); + + memset(&msg_done, 0, sizeof(msg_done)); + msg_done.type = MSG_TEST_DONE; + msg_done.test_done.test_num = test_to_run; + msg_done.test_done.error_cnt = test->error_cnt; + msg_done.test_done.skip_cnt = test->skip_cnt; + msg_done.test_done.sub_succ_cnt = test->sub_succ_cnt; + msg_done.test_done.have_log = false; + + if (env.verbosity > VERBOSE_NONE || test->force_log || test->error_cnt) { + if (env.log_cnt) + msg_done.test_done.have_log = true; + } + if (send_message(sock, &msg_done) < 0) { + perror("Fail to send message done"); + goto out; + } + + /* send logs */ + if (msg_done.test_done.have_log) { + char *src; + size_t slen; + + src = env.log_buf; + slen = env.log_cnt; + while (slen) { + struct msg msg_log; + char *dest; + size_t len; + + memset(&msg_log, 0, sizeof(msg_log)); + msg_log.type = MSG_TEST_LOG; + dest = msg_log.test_log.log_buf; + len = slen >= MAX_LOG_TRUNK_SIZE ? MAX_LOG_TRUNK_SIZE : slen; + memcpy(dest, src, len); + + src += len; + slen -= len; + if (!slen) + msg_log.test_log.is_last = true; + + assert(send_message(sock, &msg_log) >= 0); + } + } + if (env.log_buf) { + free(env.log_buf); + env.log_buf = NULL; + env.log_cnt = 0; + } + if (env.debug) + fprintf(stderr, "[%d]: #%d:%s done.\n", + env.worker_id, + test_to_run + 1, + test->test_name); + break; + } /* case MSG_DO_TEST */ + default: + if (env.debug) + fprintf(stderr, "[%d]: unknown message.\n", env.worker_id); + return -1; + } + } +out: + return 0; +} + int main(int argc, char **argv) { static const struct argp argp = { @@ -809,7 +1327,7 @@ int main(int argc, char **argv) struct sigaction sigact = { .sa_handler = crash_handler, .sa_flags = SA_RESETHAND, - }; + }; int err, i; sigaction(SIGSEGV, &sigact, NULL); @@ -837,21 +1355,84 @@ int main(int argc, char **argv) return -1; } - save_netns(); - stdio_hijack(); + env.stdout = stdout; + env.stderr = stderr; + env.has_testmod = true; if (!env.list_test_names && load_bpf_testmod()) { fprintf(env.stderr, "WARNING! Selftests relying on bpf_testmod.ko will be skipped.\n"); env.has_testmod = false; } + + /* initializing tests */ for (i = 0; i < prog_test_cnt; i++) { struct prog_test_def *test = &prog_test_defs[i]; - env.test = test; test->test_num = i + 1; - - if (!should_run(&env.test_selector, + if (should_run(&env.test_selector, test->test_num, test->test_name)) + test->should_run = true; + else + test->should_run = false; + + if ((test->run_test == NULL && test->run_serial_test == NULL) || + (test->run_test != NULL && test->run_serial_test != NULL)) { + fprintf(stderr, "Test %d:%s must have either test_%s() or serial_test_%sl() defined.\n", + test->test_num, test->test_name, test->test_name, test->test_name); + exit(EXIT_ERR_SETUP_INFRA); + } + } + + /* ignore workers if we are just listing */ + if (env.get_test_cnt || env.list_test_names) + env.workers = 0; + + /* launch workers if requested */ + env.worker_id = -1; /* main process */ + if (env.workers) { + env.worker_pids = calloc(sizeof(__pid_t), env.workers); + env.worker_socks = calloc(sizeof(int), env.workers); + if (env.debug) + fprintf(stdout, "Launching %d workers.\n", env.workers); + for (i = 0; i < env.workers; i++) { + int sv[2]; + pid_t pid; + + if (socketpair(AF_UNIX, SOCK_SEQPACKET | SOCK_CLOEXEC, 0, sv) < 0) { + perror("Fail to create worker socket"); + return -1; + } + pid = fork(); + if (pid < 0) { + perror("Failed to fork worker"); + return -1; + } else if (pid != 0) { /* main process */ + close(sv[1]); + env.worker_pids[i] = pid; + env.worker_socks[i] = sv[0]; + } else { /* inside each worker process */ + close(sv[0]); + env.worker_id = i; + return worker_main(sv[1]); + } + } + + if (env.worker_id == -1) { + server_main(); + goto out; + } + } + + /* The rest of the main process */ + + /* on single mode */ + save_netns(); + + for (i = 0; i < prog_test_cnt; i++) { + struct prog_test_def *test = &prog_test_defs[i]; + struct test_result *result; + + if (!test->should_run) continue; if (env.get_test_cnt) { @@ -865,33 +1446,35 @@ int main(int argc, char **argv) continue; } - test->run_test(); - /* ensure last sub-test is finalized properly */ - if (test->subtest_name) - test__end_subtest(); + stdio_hijack(); - test->tested = true; + run_one_test(i); - dump_test_log(test, test->error_cnt); + stdio_restore(); fprintf(env.stdout, "#%d %s:%s\n", test->test_num, test->test_name, test->error_cnt ? "FAIL" : (test->skip_cnt ? "SKIP" : "OK")); + result = &test_results[i]; + result->error_cnt = test->error_cnt; + if (env.log_buf) { + result->log_buf = strdup(env.log_buf); + result->log_cnt = env.log_cnt; + + free(env.log_buf); + env.log_buf = NULL; + env.log_cnt = 0; + } + if (test->error_cnt) env.fail_cnt++; else env.succ_cnt++; - skip_account(); - reset_affinity(); - restore_netns(); - if (test->need_cgroup_cleanup) - cleanup_cgroup_environment(); + skip_account(); + env.sub_succ_cnt += test->sub_succ_cnt; } - if (!env.list_test_names && env.has_testmod) - unload_bpf_testmod(); - stdio_restore(); if (env.get_test_cnt) { printf("%d\n", env.succ_cnt); @@ -904,14 +1487,18 @@ int main(int argc, char **argv) fprintf(stdout, "Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n", env.succ_cnt, env.sub_succ_cnt, env.skip_cnt, env.fail_cnt); + print_all_error_logs(); + + close(env.saved_netns_fd); out: + if (!env.list_test_names && env.has_testmod) + unload_bpf_testmod(); free_str_set(&env.test_selector.blacklist); free_str_set(&env.test_selector.whitelist); free(env.test_selector.num_set); free_str_set(&env.subtest_selector.blacklist); free_str_set(&env.subtest_selector.whitelist); free(env.subtest_selector.num_set); - close(env.saved_netns_fd); if (env.succ_cnt + env.fail_cnt + env.skip_cnt == 0) return EXIT_NO_TEST; diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index 94bef0aa74cf..93c1ff705533 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -62,6 +62,7 @@ struct test_env { struct test_selector test_selector; struct test_selector subtest_selector; bool verifier_stats; + bool debug; enum verbosity verbosity; bool jit_enabled; @@ -69,7 +70,8 @@ struct test_env { bool get_test_cnt; bool list_test_names; - struct prog_test_def *test; + struct prog_test_def *test; /* current running tests */ + FILE *stdout; FILE *stderr; char *log_buf; @@ -82,6 +84,38 @@ struct test_env { int skip_cnt; /* skipped tests */ int saved_netns_fd; + int workers; /* number of worker process */ + int worker_id; /* id number of current worker, main process is -1 */ + pid_t *worker_pids; /* array of worker pids */ + int *worker_socks; /* array of worker socks */ + int *worker_current_test; /* array of current running test for each worker */ +}; + +#define MAX_LOG_TRUNK_SIZE 8192 +enum msg_type { + MSG_DO_TEST = 0, + MSG_TEST_DONE = 1, + MSG_TEST_LOG = 2, + MSG_EXIT = 255, +}; +struct msg { + enum msg_type type; + union { + struct { + int test_num; + } do_test; + struct { + int test_num; + int sub_succ_cnt; + int error_cnt; + int skip_cnt; + bool have_log; + } test_done; + struct { + char log_buf[MAX_LOG_TRUNK_SIZE + 1]; + bool is_last; + } test_log; + }; }; extern struct test_env env; @@ -301,3 +335,5 @@ int trigger_module_test_write(int write_sz); #else #define SYS_NANOSLEEP_KPROBE_NAME "sys_nanosleep" #endif + +#define BPF_TESTMOD_TEST_FILE "/sys/kernel/bpf_testmod" diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c index eefd445b96fc..1ba7e7346afb 100644 --- a/tools/testing/selftests/bpf/test_sockmap.c +++ b/tools/testing/selftests/bpf/test_sockmap.c @@ -139,6 +139,7 @@ struct sockmap_options { bool sendpage; bool data_test; bool drop_expected; + bool check_recved_len; int iov_count; int iov_length; int rate; @@ -556,8 +557,12 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt, int err, i, flags = MSG_NOSIGNAL; bool drop = opt->drop_expected; bool data = opt->data_test; + int iov_alloc_length = iov_length; - err = msg_alloc_iov(&msg, iov_count, iov_length, data, tx); + if (!tx && opt->check_recved_len) + iov_alloc_length *= 2; + + err = msg_alloc_iov(&msg, iov_count, iov_alloc_length, data, tx); if (err) goto out_errno; if (peek_flag) { @@ -665,6 +670,13 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt, s->bytes_recvd += recv; + if (opt->check_recved_len && s->bytes_recvd > total_bytes) { + errno = EMSGSIZE; + fprintf(stderr, "recv failed(), bytes_recvd:%zd, total_bytes:%f\n", + s->bytes_recvd, total_bytes); + goto out_errno; + } + if (data) { int chunk_sz = opt->sendpage ? iov_length * cnt : @@ -744,7 +756,8 @@ static int sendmsg_test(struct sockmap_options *opt) rxpid = fork(); if (rxpid == 0) { - iov_buf -= (txmsg_pop - txmsg_start_pop + 1); + if (txmsg_pop || txmsg_start_pop) + iov_buf -= (txmsg_pop - txmsg_start_pop + 1); if (opt->drop_expected || txmsg_ktls_skb_drop) _exit(0); @@ -1680,12 +1693,27 @@ static void test_txmsg_ingress_parser(int cgrp, struct sockmap_options *opt) { txmsg_pass = 1; skb_use_parser = 512; + if (ktls == 1) + skb_use_parser = 570; opt->iov_length = 256; opt->iov_count = 1; opt->rate = 2; test_exec(cgrp, opt); } +static void test_txmsg_ingress_parser2(int cgrp, struct sockmap_options *opt) +{ + if (ktls == 1) + return; + skb_use_parser = 10; + opt->iov_length = 20; + opt->iov_count = 1; + opt->rate = 1; + opt->check_recved_len = true; + test_exec(cgrp, opt); + opt->check_recved_len = false; +} + char *map_names[] = { "sock_map", "sock_map_txmsg", @@ -1784,7 +1812,8 @@ struct _test test[] = { {"txmsg test pull-data", test_txmsg_pull}, {"txmsg test pop-data", test_txmsg_pop}, {"txmsg test push/pop data", test_txmsg_push_pop}, - {"txmsg text ingress parser", test_txmsg_ingress_parser}, + {"txmsg test ingress parser", test_txmsg_ingress_parser}, + {"txmsg test ingress parser2", test_txmsg_ingress_parser2}, }; static int check_whitelist(struct _test *t, struct sockmap_options *opt) diff --git a/tools/testing/selftests/bpf/test_sysctl.c b/tools/testing/selftests/bpf/test_sysctl.c index a20a919244c0..a3bb6d399daa 100644 --- a/tools/testing/selftests/bpf/test_sysctl.c +++ b/tools/testing/selftests/bpf/test_sysctl.c @@ -124,7 +124,7 @@ static struct sysctl_test tests[] = { .descr = "ctx:write sysctl:write read ok narrow", .insns = { /* u64 w = (u16)write & 1; */ -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_1, offsetof(struct bpf_sysctl, write)), #else @@ -184,7 +184,7 @@ static struct sysctl_test tests[] = { .descr = "ctx:file_pos sysctl:read read ok narrow", .insns = { /* If (file_pos == X) */ -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_1, offsetof(struct bpf_sysctl, file_pos)), #else diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 3a9e332c5e36..25afe423b3f0 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -47,6 +47,10 @@ #include "test_btf.h" #include "../../../include/linux/filter.h" +#ifndef ENOTSUPP +#define ENOTSUPP 524 +#endif + #define MAX_INSNS BPF_MAXINSNS #define MAX_TEST_INSNS 1000000 #define MAX_FIXUPS 8 @@ -974,7 +978,7 @@ static int do_prog_test_run(int fd_prog, bool unpriv, uint32_t expected_val, if (err) { switch (saved_errno) { - case 524/*ENOTSUPP*/: + case ENOTSUPP: printf("Did not run the program (not supported) "); return 0; case EPERM: @@ -1119,6 +1123,12 @@ static void do_test_single(struct bpf_test *test, bool unpriv, goto close_fds; } + if (fd_prog < 0 && saved_errno == ENOTSUPP) { + printf("SKIP (program uses an unsupported feature)\n"); + skips++; + goto close_fds; + } + alignment_prevented_execution = 0; if (expected_ret == ACCEPT || expected_ret == VERBOSE_ACCEPT) { diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c index 5100a169b72b..7b7f918eda77 100644 --- a/tools/testing/selftests/bpf/trace_helpers.c +++ b/tools/testing/selftests/bpf/trace_helpers.c @@ -118,42 +118,6 @@ out: return err; } -/* find the address of the next symbol of the same type, this can be used - * to determine the end of a function. - */ -int kallsyms_find_next(const char *sym, unsigned long long *addr) -{ - char type, found_type, name[500]; - unsigned long long value; - bool found = false; - int err = 0; - FILE *f; - - f = fopen("/proc/kallsyms", "r"); - if (!f) - return -EINVAL; - - while (fscanf(f, "%llx %c %499s%*[^\n]\n", &value, &type, name) > 0) { - /* Different types of symbols in kernel modules are mixed - * in /proc/kallsyms. Only return the next matching type. - * Use tolower() for type so that 'T' matches 't'. - */ - if (found && found_type == tolower(type)) { - *addr = value; - goto out; - } - if (strcmp(name, sym) == 0) { - found = true; - found_type = tolower(type); - } - } - err = -ENOENT; - -out: - fclose(f); - return err; -} - void read_trace_pipe(void) { int trace_fd; diff --git a/tools/testing/selftests/bpf/trace_helpers.h b/tools/testing/selftests/bpf/trace_helpers.h index bc8ed86105d9..d907b445524d 100644 --- a/tools/testing/selftests/bpf/trace_helpers.h +++ b/tools/testing/selftests/bpf/trace_helpers.h @@ -16,11 +16,6 @@ long ksym_get_addr(const char *name); /* open kallsyms and find addresses on the fly, faster than load + search. */ int kallsyms_find(const char *sym, unsigned long long *addr); -/* find the address of the next symbol, this can be used to determine the - * end of a function - */ -int kallsyms_find_next(const char *sym, unsigned long long *addr); - void read_trace_pipe(void); ssize_t get_uprobe_offset(const void *addr, ssize_t base); diff --git a/tools/testing/selftests/bpf/verifier/array_access.c b/tools/testing/selftests/bpf/verifier/array_access.c index 1b1c798e9248..1b138cd2b187 100644 --- a/tools/testing/selftests/bpf/verifier/array_access.c +++ b/tools/testing/selftests/bpf/verifier/array_access.c @@ -186,7 +186,7 @@ }, .fixup_map_hash_48b = { 3 }, .errstr_unpriv = "R0 leaks addr", - .errstr = "R0 unbounded memory access", + .errstr = "invalid access to map value, value_size=48 off=44 size=8", .result_unpriv = REJECT, .result = REJECT, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, diff --git a/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c b/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c index 6e52dfc64415..c22dc83a41fd 100644 --- a/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c +++ b/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c @@ -119,3 +119,41 @@ }, .result = ACCEPT, }, +{ + "Dest pointer in r0 - fail", + .insns = { + /* val = 0; */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + /* r0 = &val */ + BPF_MOV64_REG(BPF_REG_0, BPF_REG_10), + /* r0 = atomic_cmpxchg(&val, r0, 1); */ + BPF_MOV64_IMM(BPF_REG_1, 1), + BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, BPF_REG_10, BPF_REG_1, -8), + /* if (r0 != 0) exit(1); */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + /* exit(0); */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, +}, +{ + "Dest pointer in r0 - succeed", + .insns = { + /* r0 = &val */ + BPF_MOV64_REG(BPF_REG_0, BPF_REG_10), + /* val = r0; */ + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), + /* r0 = atomic_cmpxchg(&val, r0, 0); */ + BPF_MOV64_IMM(BPF_REG_1, 0), + BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, BPF_REG_10, BPF_REG_1, -8), + /* r1 = *r0 */ + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, -8), + /* exit(0); */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, +}, diff --git a/tools/testing/selftests/bpf/verifier/atomic_fetch.c b/tools/testing/selftests/bpf/verifier/atomic_fetch.c new file mode 100644 index 000000000000..3bc9ff7a860b --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/atomic_fetch.c @@ -0,0 +1,57 @@ +#define __ATOMIC_FETCH_OP_TEST(src_reg, dst_reg, operand1, op, operand2, expect) \ + { \ + "atomic fetch " #op ", src=" #dst_reg " dst=" #dst_reg, \ + .insns = { \ + /* u64 val = operan1; */ \ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, operand1), \ + /* u64 old = atomic_fetch_add(&val, operand2); */ \ + BPF_MOV64_REG(dst_reg, BPF_REG_10), \ + BPF_MOV64_IMM(src_reg, operand2), \ + BPF_ATOMIC_OP(BPF_DW, op, \ + dst_reg, src_reg, -8), \ + /* if (old != operand1) exit(1); */ \ + BPF_JMP_IMM(BPF_JEQ, src_reg, operand1, 2), \ + BPF_MOV64_IMM(BPF_REG_0, 1), \ + BPF_EXIT_INSN(), \ + /* if (val != result) exit (2); */ \ + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -8), \ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, expect, 2), \ + BPF_MOV64_IMM(BPF_REG_0, 2), \ + BPF_EXIT_INSN(), \ + /* exit(0); */ \ + BPF_MOV64_IMM(BPF_REG_0, 0), \ + BPF_EXIT_INSN(), \ + }, \ + .result = ACCEPT, \ + } +__ATOMIC_FETCH_OP_TEST(BPF_REG_1, BPF_REG_2, 1, BPF_ADD | BPF_FETCH, 2, 3), +__ATOMIC_FETCH_OP_TEST(BPF_REG_0, BPF_REG_1, 1, BPF_ADD | BPF_FETCH, 2, 3), +__ATOMIC_FETCH_OP_TEST(BPF_REG_1, BPF_REG_0, 1, BPF_ADD | BPF_FETCH, 2, 3), +__ATOMIC_FETCH_OP_TEST(BPF_REG_2, BPF_REG_3, 1, BPF_ADD | BPF_FETCH, 2, 3), +__ATOMIC_FETCH_OP_TEST(BPF_REG_4, BPF_REG_5, 1, BPF_ADD | BPF_FETCH, 2, 3), +__ATOMIC_FETCH_OP_TEST(BPF_REG_9, BPF_REG_8, 1, BPF_ADD | BPF_FETCH, 2, 3), +__ATOMIC_FETCH_OP_TEST(BPF_REG_1, BPF_REG_2, 0x010, BPF_AND | BPF_FETCH, 0x011, 0x010), +__ATOMIC_FETCH_OP_TEST(BPF_REG_0, BPF_REG_1, 0x010, BPF_AND | BPF_FETCH, 0x011, 0x010), +__ATOMIC_FETCH_OP_TEST(BPF_REG_1, BPF_REG_0, 0x010, BPF_AND | BPF_FETCH, 0x011, 0x010), +__ATOMIC_FETCH_OP_TEST(BPF_REG_2, BPF_REG_3, 0x010, BPF_AND | BPF_FETCH, 0x011, 0x010), +__ATOMIC_FETCH_OP_TEST(BPF_REG_4, BPF_REG_5, 0x010, BPF_AND | BPF_FETCH, 0x011, 0x010), +__ATOMIC_FETCH_OP_TEST(BPF_REG_9, BPF_REG_8, 0x010, BPF_AND | BPF_FETCH, 0x011, 0x010), +__ATOMIC_FETCH_OP_TEST(BPF_REG_1, BPF_REG_2, 0x010, BPF_OR | BPF_FETCH, 0x011, 0x011), +__ATOMIC_FETCH_OP_TEST(BPF_REG_0, BPF_REG_1, 0x010, BPF_OR | BPF_FETCH, 0x011, 0x011), +__ATOMIC_FETCH_OP_TEST(BPF_REG_1, BPF_REG_0, 0x010, BPF_OR | BPF_FETCH, 0x011, 0x011), +__ATOMIC_FETCH_OP_TEST(BPF_REG_2, BPF_REG_3, 0x010, BPF_OR | BPF_FETCH, 0x011, 0x011), +__ATOMIC_FETCH_OP_TEST(BPF_REG_4, BPF_REG_5, 0x010, BPF_OR | BPF_FETCH, 0x011, 0x011), +__ATOMIC_FETCH_OP_TEST(BPF_REG_9, BPF_REG_8, 0x010, BPF_OR | BPF_FETCH, 0x011, 0x011), +__ATOMIC_FETCH_OP_TEST(BPF_REG_1, BPF_REG_2, 0x010, BPF_XOR | BPF_FETCH, 0x011, 0x001), +__ATOMIC_FETCH_OP_TEST(BPF_REG_0, BPF_REG_1, 0x010, BPF_XOR | BPF_FETCH, 0x011, 0x001), +__ATOMIC_FETCH_OP_TEST(BPF_REG_1, BPF_REG_0, 0x010, BPF_XOR | BPF_FETCH, 0x011, 0x001), +__ATOMIC_FETCH_OP_TEST(BPF_REG_2, BPF_REG_3, 0x010, BPF_XOR | BPF_FETCH, 0x011, 0x001), +__ATOMIC_FETCH_OP_TEST(BPF_REG_4, BPF_REG_5, 0x010, BPF_XOR | BPF_FETCH, 0x011, 0x001), +__ATOMIC_FETCH_OP_TEST(BPF_REG_9, BPF_REG_8, 0x010, BPF_XOR | BPF_FETCH, 0x011, 0x001), +__ATOMIC_FETCH_OP_TEST(BPF_REG_1, BPF_REG_2, 0x010, BPF_XCHG, 0x011, 0x011), +__ATOMIC_FETCH_OP_TEST(BPF_REG_0, BPF_REG_1, 0x010, BPF_XCHG, 0x011, 0x011), +__ATOMIC_FETCH_OP_TEST(BPF_REG_1, BPF_REG_0, 0x010, BPF_XCHG, 0x011, 0x011), +__ATOMIC_FETCH_OP_TEST(BPF_REG_2, BPF_REG_3, 0x010, BPF_XCHG, 0x011, 0x011), +__ATOMIC_FETCH_OP_TEST(BPF_REG_4, BPF_REG_5, 0x010, BPF_XCHG, 0x011, 0x011), +__ATOMIC_FETCH_OP_TEST(BPF_REG_9, BPF_REG_8, 0x010, BPF_XCHG, 0x011, 0x011), +#undef __ATOMIC_FETCH_OP_TEST diff --git a/tools/testing/selftests/bpf/verifier/atomic_invalid.c b/tools/testing/selftests/bpf/verifier/atomic_invalid.c new file mode 100644 index 000000000000..39272720b2f6 --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/atomic_invalid.c @@ -0,0 +1,25 @@ +#define __INVALID_ATOMIC_ACCESS_TEST(op) \ + { \ + "atomic " #op " access through non-pointer ", \ + .insns = { \ + BPF_MOV64_IMM(BPF_REG_0, 1), \ + BPF_MOV64_IMM(BPF_REG_1, 0), \ + BPF_ATOMIC_OP(BPF_DW, op, BPF_REG_1, BPF_REG_0, -8), \ + BPF_MOV64_IMM(BPF_REG_0, 0), \ + BPF_EXIT_INSN(), \ + }, \ + .result = REJECT, \ + .errstr = "R1 invalid mem access 'inv'" \ + } +__INVALID_ATOMIC_ACCESS_TEST(BPF_ADD), +__INVALID_ATOMIC_ACCESS_TEST(BPF_ADD | BPF_FETCH), +__INVALID_ATOMIC_ACCESS_TEST(BPF_ADD), +__INVALID_ATOMIC_ACCESS_TEST(BPF_ADD | BPF_FETCH), +__INVALID_ATOMIC_ACCESS_TEST(BPF_AND), +__INVALID_ATOMIC_ACCESS_TEST(BPF_AND | BPF_FETCH), +__INVALID_ATOMIC_ACCESS_TEST(BPF_OR), +__INVALID_ATOMIC_ACCESS_TEST(BPF_OR | BPF_FETCH), +__INVALID_ATOMIC_ACCESS_TEST(BPF_XOR), +__INVALID_ATOMIC_ACCESS_TEST(BPF_XOR | BPF_FETCH), +__INVALID_ATOMIC_ACCESS_TEST(BPF_XCHG), +__INVALID_ATOMIC_ACCESS_TEST(BPF_CMPXCHG), diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c index 336a749673d1..d7b74eb28333 100644 --- a/tools/testing/selftests/bpf/verifier/calls.c +++ b/tools/testing/selftests/bpf/verifier/calls.c @@ -1,4 +1,27 @@ { + "calls: invalid kfunc call not eliminated", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .result = REJECT, + .errstr = "invalid kernel function call not eliminated in verifier pass", +}, +{ + "calls: invalid kfunc call unreachable", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_IMM(BPF_JGT, BPF_REG_0, 0, 2), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .result = ACCEPT, +}, +{ "calls: basic sanity", .insns = { BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), diff --git a/tools/testing/selftests/bpf/verifier/ctx_skb.c b/tools/testing/selftests/bpf/verifier/ctx_skb.c index 9e1a30b94197..83cecfbd6739 100644 --- a/tools/testing/selftests/bpf/verifier/ctx_skb.c +++ b/tools/testing/selftests/bpf/verifier/ctx_skb.c @@ -502,7 +502,7 @@ "check skb->hash byte load permitted", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, offsetof(struct __sk_buff, hash)), #else @@ -537,7 +537,7 @@ "check skb->hash byte load permitted 3", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, offsetof(struct __sk_buff, hash) + 3), #else @@ -646,7 +646,7 @@ "check skb->hash half load permitted", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, offsetof(struct __sk_buff, hash)), #else @@ -661,7 +661,7 @@ "check skb->hash half load permitted 2", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, offsetof(struct __sk_buff, hash) + 2), #else @@ -676,7 +676,7 @@ "check skb->hash half load not permitted, unaligned 1", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, offsetof(struct __sk_buff, hash) + 1), #else @@ -693,7 +693,7 @@ "check skb->hash half load not permitted, unaligned 3", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, offsetof(struct __sk_buff, hash) + 3), #else @@ -951,7 +951,7 @@ "check skb->data half load not permitted", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, offsetof(struct __sk_buff, data)), #else diff --git a/tools/testing/selftests/bpf/verifier/jit.c b/tools/testing/selftests/bpf/verifier/jit.c index eedcb752bf70..79021c30e51e 100644 --- a/tools/testing/selftests/bpf/verifier/jit.c +++ b/tools/testing/selftests/bpf/verifier/jit.c @@ -103,6 +103,53 @@ .retval = 2, }, { + "jit: various div tests", + .insns = { + BPF_LD_IMM64(BPF_REG_2, 0xefeffeULL), + BPF_LD_IMM64(BPF_REG_0, 0xeeff0d413122ULL), + BPF_LD_IMM64(BPF_REG_1, 0xfefeeeULL), + BPF_ALU64_REG(BPF_DIV, BPF_REG_0, BPF_REG_1), + BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_2, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_LD_IMM64(BPF_REG_3, 0xeeff0d413122ULL), + BPF_ALU64_IMM(BPF_DIV, BPF_REG_3, 0xfefeeeULL), + BPF_JMP_REG(BPF_JEQ, BPF_REG_3, BPF_REG_2, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_LD_IMM64(BPF_REG_2, 0xaa93ULL), + BPF_ALU64_IMM(BPF_MOD, BPF_REG_1, 0xbeefULL), + BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_LD_IMM64(BPF_REG_1, 0xfefeeeULL), + BPF_LD_IMM64(BPF_REG_3, 0xbeefULL), + BPF_ALU64_REG(BPF_MOD, BPF_REG_1, BPF_REG_3), + BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_LD_IMM64(BPF_REG_2, 0x5ee1dULL), + BPF_LD_IMM64(BPF_REG_1, 0xfefeeeULL), + BPF_LD_IMM64(BPF_REG_3, 0x2bULL), + BPF_ALU32_REG(BPF_DIV, BPF_REG_1, BPF_REG_3), + BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_ALU32_REG(BPF_DIV, BPF_REG_1, BPF_REG_1), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 1, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_MOD, BPF_REG_2, BPF_REG_2), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_2, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 2, +}, +{ "jit: jsgt, jslt", .insns = { BPF_LD_IMM64(BPF_REG_1, 0x80000000ULL), diff --git a/tools/testing/selftests/bpf/verifier/lwt.c b/tools/testing/selftests/bpf/verifier/lwt.c index 2cab6a3966bb..5c8944d0b091 100644 --- a/tools/testing/selftests/bpf/verifier/lwt.c +++ b/tools/testing/selftests/bpf/verifier/lwt.c @@ -174,7 +174,7 @@ "check skb->tc_classid half load not permitted for lwt prog", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, offsetof(struct __sk_buff, tc_classid)), #else diff --git a/tools/testing/selftests/bpf/verifier/perf_event_sample_period.c b/tools/testing/selftests/bpf/verifier/perf_event_sample_period.c index 471c1a5950d8..d8a9b1a1f9a2 100644 --- a/tools/testing/selftests/bpf/verifier/perf_event_sample_period.c +++ b/tools/testing/selftests/bpf/verifier/perf_event_sample_period.c @@ -2,7 +2,7 @@ "check bpf_perf_event_data->sample_period byte load permitted", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, offsetof(struct bpf_perf_event_data, sample_period)), #else @@ -18,7 +18,7 @@ "check bpf_perf_event_data->sample_period half load permitted", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, offsetof(struct bpf_perf_event_data, sample_period)), #else @@ -34,7 +34,7 @@ "check bpf_perf_event_data->sample_period word load permitted", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, offsetof(struct bpf_perf_event_data, sample_period)), #else diff --git a/tools/testing/selftests/bpf/vmtest.sh b/tools/testing/selftests/bpf/vmtest.sh index 8889b3f55236..027198768fad 100755 --- a/tools/testing/selftests/bpf/vmtest.sh +++ b/tools/testing/selftests/bpf/vmtest.sh @@ -224,10 +224,10 @@ EOF -nodefaults \ -display none \ -serial mon:stdio \ - -cpu kvm64 \ + -cpu host \ -enable-kvm \ - -smp 4 \ - -m 2G \ + -smp 8 \ + -m 4G \ -drive file="${rootfs_img}",format=raw,index=1,media=disk,if=virtio,cache=none \ -kernel "${kernel_bzimage}" \ -append "root=/dev/vda rw console=ttyS0,115200" diff --git a/tools/testing/selftests/bpf/xdping.c b/tools/testing/selftests/bpf/xdping.c index 79a3453dab25..30f12637f4e4 100644 --- a/tools/testing/selftests/bpf/xdping.c +++ b/tools/testing/selftests/bpf/xdping.c @@ -187,7 +187,7 @@ int main(int argc, char **argv) return 1; } - map = bpf_map__next(NULL, obj); + map = bpf_object__next_map(obj, NULL); if (map) map_fd = bpf_map__fd(map); if (!map || map_fd < 0) { diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 1d64891e6492..d425688cf59c 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -276,12 +276,12 @@ int seccomp(unsigned int op, unsigned int flags, void *args) } #endif -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n])) -#elif __BYTE_ORDER == __BIG_ENDIAN +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]) + sizeof(__u32)) #else -#error "wut? Unknown __BYTE_ORDER?!" +#error "wut? Unknown __BYTE_ORDER__?!" #endif #define SIBLING_EXIT_UNKILLED 0xbadbeef |