summaryrefslogtreecommitdiffstats
path: root/kernel/bpf/syscall.c
diff options
context:
space:
mode:
authorAlexei Starovoitov <ast@plumgrid.com>2015-10-08 07:23:21 +0200
committerDavid S. Miller <davem@davemloft.net>2015-10-13 04:13:35 +0200
commit1be7f75d1668d6296b80bf35dcf6762393530afc (patch)
tree319fe845ed6fc5f5f1b30f17983418d77196f313 /kernel/bpf/syscall.c
parentnet: HNS: fix MDIO dependencies (diff)
downloadlinux-1be7f75d1668d6296b80bf35dcf6762393530afc.tar.xz
linux-1be7f75d1668d6296b80bf35dcf6762393530afc.zip
bpf: enable non-root eBPF programs
In order to let unprivileged users load and execute eBPF programs teach verifier to prevent pointer leaks. Verifier will prevent - any arithmetic on pointers (except R10+Imm which is used to compute stack addresses) - comparison of pointers (except if (map_value_ptr == 0) ... ) - passing pointers to helper functions - indirectly passing pointers in stack to helper functions - returning pointer from bpf program - storing pointers into ctx or maps Spill/fill of pointers into stack is allowed, but mangling of pointers stored in the stack or reading them byte by byte is not. Within bpf programs the pointers do exist, since programs need to be able to access maps, pass skb pointer to LD_ABS insns, etc but programs cannot pass such pointer values to the outside or obfuscate them. Only allow BPF_PROG_TYPE_SOCKET_FILTER unprivileged programs, so that socket filters (tcpdump), af_packet (quic acceleration) and future kcm can use it. tracing and tc cls/act program types still require root permissions, since tracing actually needs to be able to see all kernel pointers and tc is for root only. For example, the following unprivileged socket filter program is allowed: int bpf_prog1(struct __sk_buff *skb) { u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol)); u64 *value = bpf_map_lookup_elem(&my_map, &index); if (value) *value += skb->len; return 0; } but the following program is not: int bpf_prog1(struct __sk_buff *skb) { u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol)); u64 *value = bpf_map_lookup_elem(&my_map, &index); if (value) *value += (u64) skb; return 0; } since it would leak the kernel address into the map. Unprivileged socket filter bpf programs have access to the following helper functions: - map lookup/update/delete (but they cannot store kernel pointers into them) - get_random (it's already exposed to unprivileged user space) - get_smp_processor_id - tail_call into another socket filter program - ktime_get_ns The feature is controlled by sysctl kernel.unprivileged_bpf_disabled. This toggle defaults to off (0), but can be set true (1). Once true, bpf programs and maps cannot be accessed from unprivileged process, and the toggle cannot be set back to false. Signed-off-by: Alexei Starovoitov <ast@plumgrid.com> Reviewed-by: Kees Cook <keescook@chromium.org> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'kernel/bpf/syscall.c')
-rw-r--r--kernel/bpf/syscall.c11
1 files changed, 6 insertions, 5 deletions
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index c868cafbc00c..83697bc8e574 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -18,6 +18,8 @@
#include <linux/filter.h>
#include <linux/version.h>
+int sysctl_unprivileged_bpf_disabled __read_mostly;
+
static LIST_HEAD(bpf_map_types);
static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
@@ -544,6 +546,9 @@ static int bpf_prog_load(union bpf_attr *attr)
attr->kern_version != LINUX_VERSION_CODE)
return -EINVAL;
+ if (type != BPF_PROG_TYPE_SOCKET_FILTER && !capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
/* plain bpf_prog allocation */
prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER);
if (!prog)
@@ -599,11 +604,7 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
union bpf_attr attr = {};
int err;
- /* the syscall is limited to root temporarily. This restriction will be
- * lifted when security audit is clean. Note that eBPF+tracing must have
- * this restriction, since it may pass kernel data to user space
- */
- if (!capable(CAP_SYS_ADMIN))
+ if (!capable(CAP_SYS_ADMIN) && sysctl_unprivileged_bpf_disabled)
return -EPERM;
if (!access_ok(VERIFY_READ, uattr, 1))