summaryrefslogtreecommitdiffstats
path: root/kernel/bpf
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-01-20 09:57:05 +0100
committerLinus Torvalds <torvalds@linux-foundation.org>2022-01-20 09:57:05 +0100
commitfa2e1ba3e9e39072fa7a6a9d11ac432c505b4ac7 (patch)
tree9a8eb0d867559af3a55a88856d23036dabdf3f1c /kernel/bpf
parentMerge branch 'akpm' (patches from Andrew) (diff)
parentMerge branch 'ipv4-avoid-pathological-hash-tables' (diff)
downloadlinux-fa2e1ba3e9e39072fa7a6a9d11ac432c505b4ac7.tar.xz
linux-fa2e1ba3e9e39072fa7a6a9d11ac432c505b4ac7.zip
Merge tag 'net-5.17-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Pull networking fixes from Jakub Kicinski: "Including fixes from netfilter, bpf. Quite a handful of old regression fixes but most of those are pre-5.16. Current release - regressions: - fix memory leaks in the skb free deferral scheme if upper layer protocols are used, i.e. in-kernel TCP readers like TLS Current release - new code bugs: - nf_tables: fix NULL check typo in _clone() functions - change the default to y for Vertexcom vendor Kconfig - a couple of fixes to incorrect uses of ref tracking - two fixes for constifying netdev->dev_addr Previous releases - regressions: - bpf: - various verifier fixes mainly around register offset handling when passed to helper functions - fix mount source displayed for bpffs (none -> bpffs) - bonding: - fix extraction of ports for connection hash calculation - fix bond_xmit_broadcast return value when some devices are down - phy: marvell: add Marvell specific PHY loopback - sch_api: don't skip qdisc attach on ingress, prevent ref leak - htb: restore minimal packet size handling in rate control - sfp: fix high power modules without diagnostic monitoring - mscc: ocelot: - don't let phylink re-enable TX PAUSE on the NPI port - don't dereference NULL pointers with shared tc filters - smsc95xx: correct reset handling for LAN9514 - cpsw: avoid alignment faults by taking NET_IP_ALIGN into account - phy: micrel: use kszphy_suspend/_resume for irq aware devices, avoid races with the interrupt Previous releases - always broken: - xdp: check prog type before updating BPF link - smc: resolve various races around abnormal connection termination - sit: allow encapsulated IPv6 traffic to be delivered locally - axienet: fix init/reset handling, add missing barriers, read the right status words, stop queues correctly - add missing dev_put() in sock_timestamping_bind_phc() Misc: - ipv4: prevent accidentally passing RTO_ONLINK to ip_route_output_key_hash() by sanitizing flags - ipv4: avoid quadratic behavior in netns dismantle - stmmac: dwmac-oxnas: add support for OX810SE - fsl: xgmac_mdio: add workaround for erratum A-009885" * tag 'net-5.17-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net: (92 commits) ipv4: add net_hash_mix() dispersion to fib_info_laddrhash keys ipv4: avoid quadratic behavior in netns dismantle net/fsl: xgmac_mdio: Fix incorrect iounmap when removing module powerpc/fsl/dts: Enable WA for erratum A-009885 on fman3l MDIO buses dt-bindings: net: Document fsl,erratum-a009885 net/fsl: xgmac_mdio: Add workaround for erratum A-009885 net: mscc: ocelot: fix using match before it is set net: phy: micrel: use kszphy_suspend()/kszphy_resume for irq aware devices net: cpsw: avoid alignment faults by taking NET_IP_ALIGN into account nfc: llcp: fix NULL error pointer dereference on sendmsg() after failed bind() net: axienet: increase default TX ring size to 128 net: axienet: fix for TX busy handling net: axienet: fix number of TX ring slots for available check net: axienet: Fix TX ring slot available check net: axienet: limit minimum TX ring size net: axienet: add missing memory barriers net: axienet: reset core on initialization prior to MDIO access net: axienet: Wait for PhyRstCmplt after core reset net: axienet: increase reset timeout bpf, selftests: Add ringbuf memory type confusion test ...
Diffstat (limited to 'kernel/bpf')
-rw-r--r--kernel/bpf/btf.c2
-rw-r--r--kernel/bpf/inode.c14
-rw-r--r--kernel/bpf/verifier.c81
3 files changed, 69 insertions, 28 deletions
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 33bb8ae4a804..e16dafeb2450 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -5686,7 +5686,7 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
i, btf_type_str(t));
return -EINVAL;
}
- if (check_ctx_reg(env, reg, regno))
+ if (check_ptr_off_reg(env, reg, regno))
return -EINVAL;
} else if (is_kfunc && (reg->type == PTR_TO_BTF_ID || reg2btf_ids[reg->type])) {
const struct btf_type *reg_ref_t;
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index 80da1db47c68..5a8d9f7467bf 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -648,12 +648,22 @@ static int bpf_parse_param(struct fs_context *fc, struct fs_parameter *param)
int opt;
opt = fs_parse(fc, bpf_fs_parameters, param, &result);
- if (opt < 0)
+ if (opt < 0) {
/* We might like to report bad mount options here, but
* traditionally we've ignored all mount options, so we'd
* better continue to ignore non-existing options for bpf.
*/
- return opt == -ENOPARAM ? 0 : opt;
+ if (opt == -ENOPARAM) {
+ opt = vfs_parse_fs_param_source(fc, param);
+ if (opt != -ENOPARAM)
+ return opt;
+
+ return 0;
+ }
+
+ if (opt < 0)
+ return opt;
+ }
switch (opt) {
case OPT_MODE:
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index bfb45381fb3f..a39eedecc93a 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -570,6 +570,8 @@ static const char *reg_type_str(struct bpf_verifier_env *env,
if (type & MEM_RDONLY)
strncpy(prefix, "rdonly_", 16);
+ if (type & MEM_ALLOC)
+ strncpy(prefix, "alloc_", 16);
snprintf(env->type_str_buf, TYPE_STR_BUF_LEN, "%s%s%s",
prefix, str[base_type(type)], postfix);
@@ -616,7 +618,7 @@ static void mark_reg_scratched(struct bpf_verifier_env *env, u32 regno)
static void mark_stack_slot_scratched(struct bpf_verifier_env *env, u32 spi)
{
- env->scratched_stack_slots |= 1UL << spi;
+ env->scratched_stack_slots |= 1ULL << spi;
}
static bool reg_scratched(const struct bpf_verifier_env *env, u32 regno)
@@ -637,14 +639,14 @@ static bool verifier_state_scratched(const struct bpf_verifier_env *env)
static void mark_verifier_state_clean(struct bpf_verifier_env *env)
{
env->scratched_regs = 0U;
- env->scratched_stack_slots = 0UL;
+ env->scratched_stack_slots = 0ULL;
}
/* Used for printing the entire verifier state. */
static void mark_verifier_state_scratched(struct bpf_verifier_env *env)
{
env->scratched_regs = ~0U;
- env->scratched_stack_slots = ~0UL;
+ env->scratched_stack_slots = ~0ULL;
}
/* The reg state of a pointer or a bounded scalar was saved when
@@ -3969,16 +3971,17 @@ static int get_callee_stack_depth(struct bpf_verifier_env *env,
}
#endif
-int check_ctx_reg(struct bpf_verifier_env *env,
- const struct bpf_reg_state *reg, int regno)
+static int __check_ptr_off_reg(struct bpf_verifier_env *env,
+ const struct bpf_reg_state *reg, int regno,
+ bool fixed_off_ok)
{
- /* Access to ctx or passing it to a helper is only allowed in
- * its original, unmodified form.
+ /* Access to this pointer-typed register or passing it to a helper
+ * is only allowed in its original, unmodified form.
*/
- if (reg->off) {
- verbose(env, "dereference of modified ctx ptr R%d off=%d disallowed\n",
- regno, reg->off);
+ if (!fixed_off_ok && reg->off) {
+ verbose(env, "dereference of modified %s ptr R%d off=%d disallowed\n",
+ reg_type_str(env, reg->type), regno, reg->off);
return -EACCES;
}
@@ -3986,13 +3989,20 @@ int check_ctx_reg(struct bpf_verifier_env *env,
char tn_buf[48];
tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
- verbose(env, "variable ctx access var_off=%s disallowed\n", tn_buf);
+ verbose(env, "variable %s access var_off=%s disallowed\n",
+ reg_type_str(env, reg->type), tn_buf);
return -EACCES;
}
return 0;
}
+int check_ptr_off_reg(struct bpf_verifier_env *env,
+ const struct bpf_reg_state *reg, int regno)
+{
+ return __check_ptr_off_reg(env, reg, regno, false);
+}
+
static int __check_buffer_access(struct bpf_verifier_env *env,
const char *buf_info,
const struct bpf_reg_state *reg,
@@ -4437,7 +4447,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
return -EACCES;
}
- err = check_ctx_reg(env, reg, regno);
+ err = check_ptr_off_reg(env, reg, regno);
if (err < 0)
return err;
@@ -5127,6 +5137,7 @@ static const struct bpf_reg_types mem_types = {
PTR_TO_MAP_KEY,
PTR_TO_MAP_VALUE,
PTR_TO_MEM,
+ PTR_TO_MEM | MEM_ALLOC,
PTR_TO_BUF,
},
};
@@ -5144,7 +5155,7 @@ static const struct bpf_reg_types int_ptr_types = {
static const struct bpf_reg_types fullsock_types = { .types = { PTR_TO_SOCKET } };
static const struct bpf_reg_types scalar_types = { .types = { SCALAR_VALUE } };
static const struct bpf_reg_types context_types = { .types = { PTR_TO_CTX } };
-static const struct bpf_reg_types alloc_mem_types = { .types = { PTR_TO_MEM } };
+static const struct bpf_reg_types alloc_mem_types = { .types = { PTR_TO_MEM | MEM_ALLOC } };
static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_TO_MAP } };
static const struct bpf_reg_types btf_ptr_types = { .types = { PTR_TO_BTF_ID } };
static const struct bpf_reg_types spin_lock_types = { .types = { PTR_TO_MAP_VALUE } };
@@ -5244,12 +5255,6 @@ found:
kernel_type_name(btf_vmlinux, *arg_btf_id));
return -EACCES;
}
-
- if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
- verbose(env, "R%d is a pointer to in-kernel struct with non-zero offset\n",
- regno);
- return -EACCES;
- }
}
return 0;
@@ -5304,10 +5309,33 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
if (err)
return err;
- if (type == PTR_TO_CTX) {
- err = check_ctx_reg(env, reg, regno);
+ switch ((u32)type) {
+ case SCALAR_VALUE:
+ /* Pointer types where reg offset is explicitly allowed: */
+ case PTR_TO_PACKET:
+ case PTR_TO_PACKET_META:
+ case PTR_TO_MAP_KEY:
+ case PTR_TO_MAP_VALUE:
+ case PTR_TO_MEM:
+ case PTR_TO_MEM | MEM_RDONLY:
+ case PTR_TO_MEM | MEM_ALLOC:
+ case PTR_TO_BUF:
+ case PTR_TO_BUF | MEM_RDONLY:
+ case PTR_TO_STACK:
+ /* Some of the argument types nevertheless require a
+ * zero register offset.
+ */
+ if (arg_type == ARG_PTR_TO_ALLOC_MEM)
+ goto force_off_check;
+ break;
+ /* All the rest must be rejected: */
+ default:
+force_off_check:
+ err = __check_ptr_off_reg(env, reg, regno,
+ type == PTR_TO_BTF_ID);
if (err < 0)
return err;
+ break;
}
skip_type_check:
@@ -9507,9 +9535,13 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
return 0;
}
- if (insn->src_reg == BPF_PSEUDO_BTF_ID) {
- mark_reg_known_zero(env, regs, insn->dst_reg);
+ /* All special src_reg cases are listed below. From this point onwards
+ * we either succeed and assign a corresponding dst_reg->type after
+ * zeroing the offset, or fail and reject the program.
+ */
+ mark_reg_known_zero(env, regs, insn->dst_reg);
+ if (insn->src_reg == BPF_PSEUDO_BTF_ID) {
dst_reg->type = aux->btf_var.reg_type;
switch (base_type(dst_reg->type)) {
case PTR_TO_MEM:
@@ -9547,7 +9579,6 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
}
map = env->used_maps[aux->map_index];
- mark_reg_known_zero(env, regs, insn->dst_reg);
dst_reg->map_ptr = map;
if (insn->src_reg == BPF_PSEUDO_MAP_VALUE ||
@@ -9651,7 +9682,7 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
return err;
}
- err = check_ctx_reg(env, &regs[ctx_reg], ctx_reg);
+ err = check_ptr_off_reg(env, &regs[ctx_reg], ctx_reg);
if (err < 0)
return err;