summaryrefslogtreecommitdiffstats
path: root/kernel/bpf/verifier.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/bpf/verifier.c')
-rw-r--r--kernel/bpf/verifier.c543
1 files changed, 506 insertions, 37 deletions
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index ffc3e53f5300..a0482e1c4a77 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -23,7 +23,7 @@
#include "disasm.h"
static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
-#define BPF_PROG_TYPE(_id, _name) \
+#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
[_id] = & _name ## _verifier_ops,
#define BPF_MAP_TYPE(_id, _ops)
#include <linux/bpf_types.h>
@@ -171,6 +171,9 @@ struct bpf_verifier_stack_elem {
#define BPF_COMPLEXITY_LIMIT_JMP_SEQ 8192
#define BPF_COMPLEXITY_LIMIT_STATES 64
+#define BPF_MAP_KEY_POISON (1ULL << 63)
+#define BPF_MAP_KEY_SEEN (1ULL << 62)
+
#define BPF_MAP_PTR_UNPRIV 1UL
#define BPF_MAP_PTR_POISON ((void *)((0xeB9FUL << 1) + \
POISON_POINTER_DELTA))
@@ -178,12 +181,12 @@ struct bpf_verifier_stack_elem {
static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux)
{
- return BPF_MAP_PTR(aux->map_state) == BPF_MAP_PTR_POISON;
+ return BPF_MAP_PTR(aux->map_ptr_state) == BPF_MAP_PTR_POISON;
}
static bool bpf_map_ptr_unpriv(const struct bpf_insn_aux_data *aux)
{
- return aux->map_state & BPF_MAP_PTR_UNPRIV;
+ return aux->map_ptr_state & BPF_MAP_PTR_UNPRIV;
}
static void bpf_map_ptr_store(struct bpf_insn_aux_data *aux,
@@ -191,8 +194,31 @@ static void bpf_map_ptr_store(struct bpf_insn_aux_data *aux,
{
BUILD_BUG_ON((unsigned long)BPF_MAP_PTR_POISON & BPF_MAP_PTR_UNPRIV);
unpriv |= bpf_map_ptr_unpriv(aux);
- aux->map_state = (unsigned long)map |
- (unpriv ? BPF_MAP_PTR_UNPRIV : 0UL);
+ aux->map_ptr_state = (unsigned long)map |
+ (unpriv ? BPF_MAP_PTR_UNPRIV : 0UL);
+}
+
+static bool bpf_map_key_poisoned(const struct bpf_insn_aux_data *aux)
+{
+ return aux->map_key_state & BPF_MAP_KEY_POISON;
+}
+
+static bool bpf_map_key_unseen(const struct bpf_insn_aux_data *aux)
+{
+ return !(aux->map_key_state & BPF_MAP_KEY_SEEN);
+}
+
+static u64 bpf_map_key_immediate(const struct bpf_insn_aux_data *aux)
+{
+ return aux->map_key_state & ~(BPF_MAP_KEY_SEEN | BPF_MAP_KEY_POISON);
+}
+
+static void bpf_map_key_store(struct bpf_insn_aux_data *aux, u64 state)
+{
+ bool poisoned = bpf_map_key_poisoned(aux);
+
+ aux->map_key_state = state | BPF_MAP_KEY_SEEN |
+ (poisoned ? BPF_MAP_KEY_POISON : 0ULL);
}
struct bpf_call_arg_meta {
@@ -205,8 +231,11 @@ struct bpf_call_arg_meta {
u64 msize_umax_value;
int ref_obj_id;
int func_id;
+ u32 btf_id;
};
+struct btf *btf_vmlinux;
+
static DEFINE_MUTEX(bpf_verifier_lock);
static const struct bpf_line_info *
@@ -243,6 +272,10 @@ void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt,
n = min(log->len_total - log->len_used - 1, n);
log->kbuf[n] = '\0';
+ if (log->level == BPF_LOG_KERNEL) {
+ pr_err("BPF:%s\n", log->kbuf);
+ return;
+ }
if (!copy_to_user(log->ubuf + log->len_used, log->kbuf, n + 1))
log->len_used += n;
else
@@ -280,6 +313,19 @@ __printf(2, 3) static void verbose(void *private_data, const char *fmt, ...)
va_end(args);
}
+__printf(2, 3) void bpf_log(struct bpf_verifier_log *log,
+ const char *fmt, ...)
+{
+ va_list args;
+
+ if (!bpf_verifier_log_needed(log))
+ return;
+
+ va_start(args, fmt);
+ bpf_verifier_vlog(log, fmt, args);
+ va_end(args);
+}
+
static const char *ltrim(const char *s)
{
while (isspace(*s))
@@ -400,6 +446,7 @@ static const char * const reg_type_str[] = {
[PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null",
[PTR_TO_TP_BUFFER] = "tp_buffer",
[PTR_TO_XDP_SOCK] = "xdp_sock",
+ [PTR_TO_BTF_ID] = "ptr_",
};
static char slot_type_char[] = {
@@ -430,6 +477,12 @@ static struct bpf_func_state *func(struct bpf_verifier_env *env,
return cur->frame[reg->frameno];
}
+const char *kernel_type_name(u32 id)
+{
+ return btf_name_by_offset(btf_vmlinux,
+ btf_type_by_id(btf_vmlinux, id)->name_off);
+}
+
static void print_verifier_state(struct bpf_verifier_env *env,
const struct bpf_func_state *state)
{
@@ -454,6 +507,8 @@ static void print_verifier_state(struct bpf_verifier_env *env,
/* reg->off should be 0 for SCALAR_VALUE */
verbose(env, "%lld", reg->var_off.value + reg->off);
} else {
+ if (t == PTR_TO_BTF_ID)
+ verbose(env, "%s", kernel_type_name(reg->btf_id));
verbose(env, "(id=%d", reg->id);
if (reg_type_may_be_refcounted_or_null(t))
verbose(env, ",ref_obj_id=%d", reg->ref_obj_id);
@@ -978,6 +1033,17 @@ static void __reg_bound_offset(struct bpf_reg_state *reg)
reg->umax_value));
}
+static void __reg_bound_offset32(struct bpf_reg_state *reg)
+{
+ u64 mask = 0xffffFFFF;
+ struct tnum range = tnum_range(reg->umin_value & mask,
+ reg->umax_value & mask);
+ struct tnum lo32 = tnum_cast(reg->var_off, 4);
+ struct tnum hi32 = tnum_lshift(tnum_rshift(reg->var_off, 32), 32);
+
+ reg->var_off = tnum_or(hi32, tnum_intersect(lo32, range));
+}
+
/* Reset the min/max bounds of a register */
static void __mark_reg_unbounded(struct bpf_reg_state *reg)
{
@@ -2331,10 +2397,12 @@ static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
/* check access to 'struct bpf_context' fields. Supports fixed offsets only */
static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size,
- enum bpf_access_type t, enum bpf_reg_type *reg_type)
+ enum bpf_access_type t, enum bpf_reg_type *reg_type,
+ u32 *btf_id)
{
struct bpf_insn_access_aux info = {
.reg_type = *reg_type,
+ .log = &env->log,
};
if (env->ops->is_valid_access &&
@@ -2348,7 +2416,10 @@ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off,
*/
*reg_type = info.reg_type;
- env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
+ if (*reg_type == PTR_TO_BTF_ID)
+ *btf_id = info.btf_id;
+ else
+ env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
/* remember the offset of last byte accessed in ctx */
if (env->prog->aux->max_ctx_offset < off + size)
env->prog->aux->max_ctx_offset = off + size;
@@ -2739,6 +2810,88 @@ static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
reg->smax_value = reg->umax_value;
}
+static bool bpf_map_is_rdonly(const struct bpf_map *map)
+{
+ return (map->map_flags & BPF_F_RDONLY_PROG) && map->frozen;
+}
+
+static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val)
+{
+ void *ptr;
+ u64 addr;
+ int err;
+
+ err = map->ops->map_direct_value_addr(map, &addr, off);
+ if (err)
+ return err;
+ ptr = (void *)(long)addr + off;
+
+ switch (size) {
+ case sizeof(u8):
+ *val = (u64)*(u8 *)ptr;
+ break;
+ case sizeof(u16):
+ *val = (u64)*(u16 *)ptr;
+ break;
+ case sizeof(u32):
+ *val = (u64)*(u32 *)ptr;
+ break;
+ case sizeof(u64):
+ *val = *(u64 *)ptr;
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
+ struct bpf_reg_state *regs,
+ int regno, int off, int size,
+ enum bpf_access_type atype,
+ int value_regno)
+{
+ struct bpf_reg_state *reg = regs + regno;
+ const struct btf_type *t = btf_type_by_id(btf_vmlinux, reg->btf_id);
+ const char *tname = btf_name_by_offset(btf_vmlinux, t->name_off);
+ u32 btf_id;
+ int ret;
+
+ if (atype != BPF_READ) {
+ verbose(env, "only read is supported\n");
+ return -EACCES;
+ }
+
+ if (off < 0) {
+ verbose(env,
+ "R%d is ptr_%s invalid negative access: off=%d\n",
+ regno, tname, off);
+ return -EACCES;
+ }
+ if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
+ char tn_buf[48];
+
+ tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
+ verbose(env,
+ "R%d is ptr_%s invalid variable offset: off=%d, var_off=%s\n",
+ regno, tname, off, tn_buf);
+ return -EACCES;
+ }
+
+ ret = btf_struct_access(&env->log, t, off, size, atype, &btf_id);
+ if (ret < 0)
+ return ret;
+
+ if (ret == SCALAR_VALUE) {
+ mark_reg_unknown(env, regs, value_regno);
+ return 0;
+ }
+ mark_reg_known_zero(env, regs, value_regno);
+ regs[value_regno].type = PTR_TO_BTF_ID;
+ regs[value_regno].btf_id = btf_id;
+ return 0;
+}
+
/* check whether memory at (regno + off) is accessible for t = (read | write)
* if t==write, value_regno is a register which value is stored into memory
* if t==read, value_regno is a register which will receive the value from memory
@@ -2776,11 +2929,30 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
if (err)
return err;
err = check_map_access(env, regno, off, size, false);
- if (!err && t == BPF_READ && value_regno >= 0)
- mark_reg_unknown(env, regs, value_regno);
+ if (!err && t == BPF_READ && value_regno >= 0) {
+ struct bpf_map *map = reg->map_ptr;
+
+ /* if map is read-only, track its contents as scalars */
+ if (tnum_is_const(reg->var_off) &&
+ bpf_map_is_rdonly(map) &&
+ map->ops->map_direct_value_addr) {
+ int map_off = off + reg->var_off.value;
+ u64 val = 0;
+
+ err = bpf_map_direct_read(map, map_off, size,
+ &val);
+ if (err)
+ return err;
+ regs[value_regno].type = SCALAR_VALUE;
+ __mark_reg_known(&regs[value_regno], val);
+ } else {
+ mark_reg_unknown(env, regs, value_regno);
+ }
+ }
} else if (reg->type == PTR_TO_CTX) {
enum bpf_reg_type reg_type = SCALAR_VALUE;
+ u32 btf_id = 0;
if (t == BPF_WRITE && value_regno >= 0 &&
is_pointer_value(env, value_regno)) {
@@ -2792,7 +2964,9 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
if (err < 0)
return err;
- err = check_ctx_access(env, insn_idx, off, size, t, &reg_type);
+ err = check_ctx_access(env, insn_idx, off, size, t, &reg_type, &btf_id);
+ if (err)
+ verbose_linfo(env, insn_idx, "; ");
if (!err && t == BPF_READ && value_regno >= 0) {
/* ctx access returns either a scalar, or a
* PTR_TO_PACKET[_META,_END]. In the latter
@@ -2811,6 +2985,8 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
* a sub-register.
*/
regs[value_regno].subreg_def = DEF_NOT_SUBREG;
+ if (reg_type == PTR_TO_BTF_ID)
+ regs[value_regno].btf_id = btf_id;
}
regs[value_regno].type = reg_type;
}
@@ -2870,6 +3046,9 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
err = check_tp_buffer_access(env, reg, regno, off, size);
if (!err && t == BPF_READ && value_regno >= 0)
mark_reg_unknown(env, regs, value_regno);
+ } else if (reg->type == PTR_TO_BTF_ID) {
+ err = check_ptr_to_btf_access(env, regs, regno, off, size, t,
+ value_regno);
} else {
verbose(env, "R%d invalid mem access '%s'\n", regno,
reg_type_str[reg->type]);
@@ -3298,6 +3477,22 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
expected_type = PTR_TO_SOCKET;
if (type != expected_type)
goto err_type;
+ } else if (arg_type == ARG_PTR_TO_BTF_ID) {
+ expected_type = PTR_TO_BTF_ID;
+ if (type != expected_type)
+ goto err_type;
+ if (reg->btf_id != meta->btf_id) {
+ verbose(env, "Helper has type %s got %s in R%d\n",
+ kernel_type_name(meta->btf_id),
+ kernel_type_name(reg->btf_id), regno);
+
+ return -EACCES;
+ }
+ if (!tnum_is_const(reg->var_off) || reg->var_off.value || reg->off) {
+ verbose(env, "R%d is a pointer to in-kernel struct with non-zero offset\n",
+ regno);
+ return -EACCES;
+ }
} else if (arg_type == ARG_PTR_TO_SPIN_LOCK) {
if (meta->func_id == BPF_FUNC_spin_lock) {
if (process_spin_lock(env, regno, true))
@@ -3445,6 +3640,7 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
if (func_id != BPF_FUNC_perf_event_read &&
func_id != BPF_FUNC_perf_event_output &&
+ func_id != BPF_FUNC_skb_output &&
func_id != BPF_FUNC_perf_event_read_value)
goto error;
break;
@@ -3532,6 +3728,7 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
case BPF_FUNC_perf_event_read:
case BPF_FUNC_perf_event_output:
case BPF_FUNC_perf_event_read_value:
+ case BPF_FUNC_skb_output:
if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
goto error;
break;
@@ -3810,6 +4007,9 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
/* only increment it after check_reg_arg() finished */
state->curframe++;
+ if (btf_check_func_arg_match(env, subprog))
+ return -EINVAL;
+
/* and go analyze first insn of the callee */
*insn_idx = target_insn;
@@ -3916,15 +4116,49 @@ record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
return -EACCES;
}
- if (!BPF_MAP_PTR(aux->map_state))
+ if (!BPF_MAP_PTR(aux->map_ptr_state))
bpf_map_ptr_store(aux, meta->map_ptr,
meta->map_ptr->unpriv_array);
- else if (BPF_MAP_PTR(aux->map_state) != meta->map_ptr)
+ else if (BPF_MAP_PTR(aux->map_ptr_state) != meta->map_ptr)
bpf_map_ptr_store(aux, BPF_MAP_PTR_POISON,
meta->map_ptr->unpriv_array);
return 0;
}
+static int
+record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
+ int func_id, int insn_idx)
+{
+ struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
+ struct bpf_reg_state *regs = cur_regs(env), *reg;
+ struct bpf_map *map = meta->map_ptr;
+ struct tnum range;
+ u64 val;
+
+ if (func_id != BPF_FUNC_tail_call)
+ return 0;
+ if (!map || map->map_type != BPF_MAP_TYPE_PROG_ARRAY) {
+ verbose(env, "kernel subsystem misconfigured verifier\n");
+ return -EINVAL;
+ }
+
+ range = tnum_range(0, map->max_entries - 1);
+ reg = &regs[BPF_REG_3];
+
+ if (!register_is_const(reg) || !tnum_in(range, reg->var_off)) {
+ bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
+ return 0;
+ }
+
+ val = reg->var_off.value;
+ if (bpf_map_key_unseen(aux))
+ bpf_map_key_store(aux, val);
+ else if (!bpf_map_key_poisoned(aux) &&
+ bpf_map_key_immediate(aux) != val)
+ bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
+ return 0;
+}
+
static int check_reference_leak(struct bpf_verifier_env *env)
{
struct bpf_func_state *state = cur_func(env);
@@ -3986,23 +4220,20 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
meta.func_id = func_id;
/* check args */
- err = check_func_arg(env, BPF_REG_1, fn->arg1_type, &meta);
- if (err)
- return err;
- err = check_func_arg(env, BPF_REG_2, fn->arg2_type, &meta);
- if (err)
- return err;
- err = check_func_arg(env, BPF_REG_3, fn->arg3_type, &meta);
- if (err)
- return err;
- err = check_func_arg(env, BPF_REG_4, fn->arg4_type, &meta);
- if (err)
- return err;
- err = check_func_arg(env, BPF_REG_5, fn->arg5_type, &meta);
+ for (i = 0; i < 5; i++) {
+ err = btf_resolve_helper_id(&env->log, fn, i);
+ if (err > 0)
+ meta.btf_id = err;
+ err = check_func_arg(env, BPF_REG_1 + i, fn->arg_type[i], &meta);
+ if (err)
+ return err;
+ }
+
+ err = record_func_map(env, &meta, func_id, insn_idx);
if (err)
return err;
- err = record_func_map(env, &meta, func_id, insn_idx);
+ err = record_func_key(env, &meta, func_id, insn_idx);
if (err)
return err;
@@ -5433,6 +5664,10 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg,
/* We might have learned some bits from the bounds. */
__reg_bound_offset(false_reg);
__reg_bound_offset(true_reg);
+ if (is_jmp32) {
+ __reg_bound_offset32(false_reg);
+ __reg_bound_offset32(true_reg);
+ }
/* Intersecting with the old var_off might have improved our bounds
* slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
* then new var_off is (0; 0x7f...fc) which improves our umax.
@@ -5542,6 +5777,10 @@ static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
/* We might have learned some bits from the bounds. */
__reg_bound_offset(false_reg);
__reg_bound_offset(true_reg);
+ if (is_jmp32) {
+ __reg_bound_offset32(false_reg);
+ __reg_bound_offset32(true_reg);
+ }
/* Intersecting with the old var_off might have improved our bounds
* slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
* then new var_off is (0; 0x7f...fc) which improves our umax.
@@ -6124,6 +6363,11 @@ static int check_return_code(struct bpf_verifier_env *env)
case BPF_PROG_TYPE_CGROUP_SYSCTL:
case BPF_PROG_TYPE_CGROUP_SOCKOPT:
break;
+ case BPF_PROG_TYPE_RAW_TRACEPOINT:
+ if (!env->prog->aux->attach_btf_id)
+ return 0;
+ range = tnum_const(0);
+ break;
default:
return 0;
}
@@ -6406,6 +6650,7 @@ static int check_btf_func(struct bpf_verifier_env *env,
u32 i, nfuncs, urec_size, min_size;
u32 krec_size = sizeof(struct bpf_func_info);
struct bpf_func_info *krecord;
+ struct bpf_func_info_aux *info_aux = NULL;
const struct btf_type *type;
struct bpf_prog *prog;
const struct btf *btf;
@@ -6439,6 +6684,9 @@ static int check_btf_func(struct bpf_verifier_env *env,
krecord = kvcalloc(nfuncs, krec_size, GFP_KERNEL | __GFP_NOWARN);
if (!krecord)
return -ENOMEM;
+ info_aux = kcalloc(nfuncs, sizeof(*info_aux), GFP_KERNEL | __GFP_NOWARN);
+ if (!info_aux)
+ goto err_free;
for (i = 0; i < nfuncs; i++) {
ret = bpf_check_uarg_tail_zero(urecord, krec_size, urec_size);
@@ -6490,29 +6738,31 @@ static int check_btf_func(struct bpf_verifier_env *env,
ret = -EINVAL;
goto err_free;
}
-
prev_offset = krecord[i].insn_off;
urecord += urec_size;
}
prog->aux->func_info = krecord;
prog->aux->func_info_cnt = nfuncs;
+ prog->aux->func_info_aux = info_aux;
return 0;
err_free:
kvfree(krecord);
+ kfree(info_aux);
return ret;
}
static void adjust_btf_func(struct bpf_verifier_env *env)
{
+ struct bpf_prog_aux *aux = env->prog->aux;
int i;
- if (!env->prog->aux->func_info)
+ if (!aux->func_info)
return;
for (i = 0; i < env->subprog_cnt; i++)
- env->prog->aux->func_info[i].insn_off = env->subprog_info[i].start;
+ aux->func_info[i].insn_off = env->subprog_info[i].start;
}
#define MIN_BPF_LINEINFO_SIZE (offsetof(struct bpf_line_info, line_col) + \
@@ -7440,6 +7690,7 @@ static bool reg_type_mismatch_ok(enum bpf_reg_type type)
case PTR_TO_TCP_SOCK:
case PTR_TO_TCP_SOCK_OR_NULL:
case PTR_TO_XDP_SOCK:
+ case PTR_TO_BTF_ID:
return false;
default:
return true;
@@ -7492,6 +7743,9 @@ static int do_check(struct bpf_verifier_env *env)
0 /* frameno */,
0 /* subprogno, zero == main subprog */);
+ if (btf_check_func_arg_match(env, 0))
+ return -EINVAL;
+
for (;;) {
struct bpf_insn *insn;
u8 class;
@@ -8008,11 +8262,7 @@ static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
* will be used by the valid program until it's unloaded
* and all maps are released in free_used_maps()
*/
- map = bpf_map_inc(map, false);
- if (IS_ERR(map)) {
- fdput(f);
- return PTR_ERR(map);
- }
+ bpf_map_inc(map);
aux->map_index = env->used_map_cnt;
env->used_maps[env->used_map_cnt++] = map;
@@ -8581,6 +8831,14 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
case PTR_TO_XDP_SOCK:
convert_ctx_access = bpf_xdp_sock_convert_ctx_access;
break;
+ case PTR_TO_BTF_ID:
+ if (type == BPF_WRITE) {
+ verbose(env, "Writes through BTF pointers are not allowed\n");
+ return -EINVAL;
+ }
+ insn->code = BPF_LDX | BPF_PROBE_MEM | BPF_SIZE((insn)->code);
+ env->prog->aux->num_exentries++;
+ continue;
default:
continue;
}
@@ -8871,6 +9129,7 @@ static int fixup_call_args(struct bpf_verifier_env *env)
static int fixup_bpf_calls(struct bpf_verifier_env *env)
{
struct bpf_prog *prog = env->prog;
+ bool expect_blinding = bpf_jit_blinding_enabled(prog);
struct bpf_insn *insn = prog->insnsi;
const struct bpf_func_proto *fn;
const int insn_cnt = prog->len;
@@ -8879,7 +9138,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
struct bpf_insn insn_buf[16];
struct bpf_prog *new_prog;
struct bpf_map *map_ptr;
- int i, cnt, delta = 0;
+ int i, ret, cnt, delta = 0;
for (i = 0; i < insn_cnt; i++, insn++) {
if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) ||
@@ -9023,6 +9282,26 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
insn->code = BPF_JMP | BPF_TAIL_CALL;
aux = &env->insn_aux_data[i + delta];
+ if (prog->jit_requested && !expect_blinding &&
+ !bpf_map_key_poisoned(aux) &&
+ !bpf_map_ptr_poisoned(aux) &&
+ !bpf_map_ptr_unpriv(aux)) {
+ struct bpf_jit_poke_descriptor desc = {
+ .reason = BPF_POKE_REASON_TAIL_CALL,
+ .tail_call.map = BPF_MAP_PTR(aux->map_ptr_state),
+ .tail_call.key = bpf_map_key_immediate(aux),
+ };
+
+ ret = bpf_jit_add_poke_descriptor(prog, &desc);
+ if (ret < 0) {
+ verbose(env, "adding tail call poke descriptor failed\n");
+ return ret;
+ }
+
+ insn->imm = ret + 1;
+ continue;
+ }
+
if (!bpf_map_ptr_unpriv(aux))
continue;
@@ -9037,7 +9316,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
return -EINVAL;
}
- map_ptr = BPF_MAP_PTR(aux->map_state);
+ map_ptr = BPF_MAP_PTR(aux->map_ptr_state);
insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3,
map_ptr->max_entries, 2);
insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3,
@@ -9071,7 +9350,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
if (bpf_map_ptr_poisoned(aux))
goto patch_call_imm;
- map_ptr = BPF_MAP_PTR(aux->map_state);
+ map_ptr = BPF_MAP_PTR(aux->map_ptr_state);
ops = map_ptr->ops;
if (insn->imm == BPF_FUNC_map_lookup_elem &&
ops->map_gen_lookup) {
@@ -9151,6 +9430,23 @@ patch_call_imm:
insn->imm = fn->func - __bpf_call_base;
}
+ /* Since poke tab is now finalized, publish aux to tracker. */
+ for (i = 0; i < prog->aux->size_poke_tab; i++) {
+ map_ptr = prog->aux->poke_tab[i].tail_call.map;
+ if (!map_ptr->ops->map_poke_track ||
+ !map_ptr->ops->map_poke_untrack ||
+ !map_ptr->ops->map_poke_run) {
+ verbose(env, "bpf verifier is misconfigured\n");
+ return -EINVAL;
+ }
+
+ ret = map_ptr->ops->map_poke_track(map_ptr, prog->aux);
+ if (ret < 0) {
+ verbose(env, "tracking tail call prog failed\n");
+ return ret;
+ }
+ }
+
return 0;
}
@@ -9208,6 +9504,161 @@ static void print_verification_stats(struct bpf_verifier_env *env)
env->peak_states, env->longest_mark_read_walk);
}
+static int check_attach_btf_id(struct bpf_verifier_env *env)
+{
+ struct bpf_prog *prog = env->prog;
+ struct bpf_prog *tgt_prog = prog->aux->linked_prog;
+ u32 btf_id = prog->aux->attach_btf_id;
+ const char prefix[] = "btf_trace_";
+ int ret = 0, subprog = -1, i;
+ struct bpf_trampoline *tr;
+ const struct btf_type *t;
+ bool conservative = true;
+ const char *tname;
+ struct btf *btf;
+ long addr;
+ u64 key;
+
+ if (prog->type != BPF_PROG_TYPE_TRACING)
+ return 0;
+
+ if (!btf_id) {
+ verbose(env, "Tracing programs must provide btf_id\n");
+ return -EINVAL;
+ }
+ btf = bpf_prog_get_target_btf(prog);
+ if (!btf) {
+ verbose(env,
+ "FENTRY/FEXIT program can only be attached to another program annotated with BTF\n");
+ return -EINVAL;
+ }
+ t = btf_type_by_id(btf, btf_id);
+ if (!t) {
+ verbose(env, "attach_btf_id %u is invalid\n", btf_id);
+ return -EINVAL;
+ }
+ tname = btf_name_by_offset(btf, t->name_off);
+ if (!tname) {
+ verbose(env, "attach_btf_id %u doesn't have a name\n", btf_id);
+ return -EINVAL;
+ }
+ if (tgt_prog) {
+ struct bpf_prog_aux *aux = tgt_prog->aux;
+
+ for (i = 0; i < aux->func_info_cnt; i++)
+ if (aux->func_info[i].type_id == btf_id) {
+ subprog = i;
+ break;
+ }
+ if (subprog == -1) {
+ verbose(env, "Subprog %s doesn't exist\n", tname);
+ return -EINVAL;
+ }
+ conservative = aux->func_info_aux[subprog].unreliable;
+ key = ((u64)aux->id) << 32 | btf_id;
+ } else {
+ key = btf_id;
+ }
+
+ switch (prog->expected_attach_type) {
+ case BPF_TRACE_RAW_TP:
+ if (tgt_prog) {
+ verbose(env,
+ "Only FENTRY/FEXIT progs are attachable to another BPF prog\n");
+ return -EINVAL;
+ }
+ if (!btf_type_is_typedef(t)) {
+ verbose(env, "attach_btf_id %u is not a typedef\n",
+ btf_id);
+ return -EINVAL;
+ }
+ if (strncmp(prefix, tname, sizeof(prefix) - 1)) {
+ verbose(env, "attach_btf_id %u points to wrong type name %s\n",
+ btf_id, tname);
+ return -EINVAL;
+ }
+ tname += sizeof(prefix) - 1;
+ t = btf_type_by_id(btf, t->type);
+ if (!btf_type_is_ptr(t))
+ /* should never happen in valid vmlinux build */
+ return -EINVAL;
+ t = btf_type_by_id(btf, t->type);
+ if (!btf_type_is_func_proto(t))
+ /* should never happen in valid vmlinux build */
+ return -EINVAL;
+
+ /* remember two read only pointers that are valid for
+ * the life time of the kernel
+ */
+ prog->aux->attach_func_name = tname;
+ prog->aux->attach_func_proto = t;
+ prog->aux->attach_btf_trace = true;
+ return 0;
+ case BPF_TRACE_FENTRY:
+ case BPF_TRACE_FEXIT:
+ if (!btf_type_is_func(t)) {
+ verbose(env, "attach_btf_id %u is not a function\n",
+ btf_id);
+ return -EINVAL;
+ }
+ t = btf_type_by_id(btf, t->type);
+ if (!btf_type_is_func_proto(t))
+ return -EINVAL;
+ tr = bpf_trampoline_lookup(key);
+ if (!tr)
+ return -ENOMEM;
+ prog->aux->attach_func_name = tname;
+ /* t is either vmlinux type or another program's type */
+ prog->aux->attach_func_proto = t;
+ mutex_lock(&tr->mutex);
+ if (tr->func.addr) {
+ prog->aux->trampoline = tr;
+ goto out;
+ }
+ if (tgt_prog && conservative) {
+ prog->aux->attach_func_proto = NULL;
+ t = NULL;
+ }
+ ret = btf_distill_func_proto(&env->log, btf, t,
+ tname, &tr->func.model);
+ if (ret < 0)
+ goto out;
+ if (tgt_prog) {
+ if (!tgt_prog->jited) {
+ /* for now */
+ verbose(env, "Can trace only JITed BPF progs\n");
+ ret = -EINVAL;
+ goto out;
+ }
+ if (tgt_prog->type == BPF_PROG_TYPE_TRACING) {
+ /* prevent cycles */
+ verbose(env, "Cannot recursively attach\n");
+ ret = -EINVAL;
+ goto out;
+ }
+ addr = (long) tgt_prog->aux->func[subprog]->bpf_func;
+ } else {
+ addr = kallsyms_lookup_name(tname);
+ if (!addr) {
+ verbose(env,
+ "The address of function %s cannot be found\n",
+ tname);
+ ret = -ENOENT;
+ goto out;
+ }
+ }
+ tr->func.addr = (void *)addr;
+ prog->aux->trampoline = tr;
+out:
+ mutex_unlock(&tr->mutex);
+ if (ret)
+ bpf_trampoline_put(tr);
+ return ret;
+ default:
+ return -EINVAL;
+ }
+}
+
int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
union bpf_attr __user *uattr)
{
@@ -9241,6 +9692,13 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
env->ops = bpf_verifier_ops[env->prog->type];
is_priv = capable(CAP_SYS_ADMIN);
+ if (!btf_vmlinux && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) {
+ mutex_lock(&bpf_verifier_lock);
+ if (!btf_vmlinux)
+ btf_vmlinux = btf_parse_vmlinux();
+ mutex_unlock(&bpf_verifier_lock);
+ }
+
/* grab the mutex to protect few globals used by verifier */
if (!is_priv)
mutex_lock(&bpf_verifier_lock);
@@ -9260,6 +9718,17 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
goto err_unlock;
}
+ if (IS_ERR(btf_vmlinux)) {
+ /* Either gcc or pahole or kernel are broken. */
+ verbose(env, "in-kernel BTF is malformed\n");
+ ret = PTR_ERR(btf_vmlinux);
+ goto skip_full_check;
+ }
+
+ ret = check_attach_btf_id(env);
+ if (ret)
+ goto skip_full_check;
+
env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT);
if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
env->strict_alignment = true;