diff options
author | David S. Miller <davem@davemloft.net> | 2015-11-03 04:48:39 +0100 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2015-11-03 04:48:39 +0100 |
commit | 12d4309636d30770b54985be05ac512131f328b8 (patch) | |
tree | 011745b348dfea74709648487aed31bb6f670329 | |
parent | net: fix percpu memory leaks (diff) | |
parent | bpf: add sample usages for persistent maps/progs (diff) | |
download | linux-12d4309636d30770b54985be05ac512131f328b8.tar.xz linux-12d4309636d30770b54985be05ac512131f328b8.zip |
Merge branch 'bpf-persistent'
Daniel Borkmann says:
====================
BPF updates
This set adds support for persistent maps/progs. Please see
individual patches for further details. A man-page update
to bpf(2) will be sent later on, also a iproute2 patch for
support in tc.
v1 -> v2:
- Reworked most of patch 4 and 5
- Rebased to latest net-next
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/linux/bpf.h | 9 | ||||
-rw-r--r-- | include/uapi/linux/bpf.h | 45 | ||||
-rw-r--r-- | include/uapi/linux/magic.h | 1 | ||||
-rw-r--r-- | kernel/bpf/Makefile | 4 | ||||
-rw-r--r-- | kernel/bpf/core.c | 3 | ||||
-rw-r--r-- | kernel/bpf/inode.c | 387 | ||||
-rw-r--r-- | kernel/bpf/syscall.c | 95 | ||||
-rw-r--r-- | kernel/bpf/verifier.c | 3 | ||||
-rw-r--r-- | samples/bpf/Makefile | 3 | ||||
-rw-r--r-- | samples/bpf/fds_example.c | 183 | ||||
-rw-r--r-- | samples/bpf/libbpf.c | 19 | ||||
-rw-r--r-- | samples/bpf/libbpf.h | 3 |
12 files changed, 683 insertions, 72 deletions
diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 75718fa28260..de464e6683b6 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -167,11 +167,18 @@ struct bpf_prog *bpf_prog_get(u32 ufd); void bpf_prog_put(struct bpf_prog *prog); void bpf_prog_put_rcu(struct bpf_prog *prog); -struct bpf_map *bpf_map_get(struct fd f); +struct bpf_map *bpf_map_get(u32 ufd); +struct bpf_map *__bpf_map_get(struct fd f); void bpf_map_put(struct bpf_map *map); extern int sysctl_unprivileged_bpf_disabled; +int bpf_map_new_fd(struct bpf_map *map); +int bpf_prog_new_fd(struct bpf_prog *prog); + +int bpf_obj_pin_user(u32 ufd, const char __user *pathname); +int bpf_obj_get_user(const char __user *pathname); + /* verify correctness of eBPF program */ int bpf_check(struct bpf_prog **fp, union bpf_attr *attr); #else diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 2e032426cfb7..9ea2d22fa2cb 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -63,50 +63,16 @@ struct bpf_insn { __s32 imm; /* signed immediate constant */ }; -/* BPF syscall commands */ +/* BPF syscall commands, see bpf(2) man-page for details. */ enum bpf_cmd { - /* create a map with given type and attributes - * fd = bpf(BPF_MAP_CREATE, union bpf_attr *, u32 size) - * returns fd or negative error - * map is deleted when fd is closed - */ BPF_MAP_CREATE, - - /* lookup key in a given map - * err = bpf(BPF_MAP_LOOKUP_ELEM, union bpf_attr *attr, u32 size) - * Using attr->map_fd, attr->key, attr->value - * returns zero and stores found elem into value - * or negative error - */ BPF_MAP_LOOKUP_ELEM, - - /* create or update key/value pair in a given map - * err = bpf(BPF_MAP_UPDATE_ELEM, union bpf_attr *attr, u32 size) - * Using attr->map_fd, attr->key, attr->value, attr->flags - * returns zero or negative error - */ BPF_MAP_UPDATE_ELEM, - - /* find and delete elem by key in a given map - * err = bpf(BPF_MAP_DELETE_ELEM, union bpf_attr *attr, u32 size) - * Using attr->map_fd, attr->key - * returns zero or negative error - */ BPF_MAP_DELETE_ELEM, - - /* lookup key in a given map and return next key - * err = bpf(BPF_MAP_GET_NEXT_KEY, union bpf_attr *attr, u32 size) - * Using attr->map_fd, attr->key, attr->next_key - * returns zero and stores next key or negative error - */ BPF_MAP_GET_NEXT_KEY, - - /* verify and load eBPF program - * prog_fd = bpf(BPF_PROG_LOAD, union bpf_attr *attr, u32 size) - * Using attr->prog_type, attr->insns, attr->license - * returns fd or negative error - */ BPF_PROG_LOAD, + BPF_OBJ_PIN, + BPF_OBJ_GET, }; enum bpf_map_type { @@ -160,6 +126,11 @@ union bpf_attr { __aligned_u64 log_buf; /* user supplied buffer */ __u32 kern_version; /* checked when prog_type=kprobe */ }; + + struct { /* anonymous struct used by BPF_OBJ_* commands */ + __aligned_u64 pathname; + __u32 bpf_fd; + }; } __attribute__((aligned(8))); /* integer value in 'imm' field of BPF_CALL instruction selects which helper diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h index 7b1425a6b370..accb036bbc9c 100644 --- a/include/uapi/linux/magic.h +++ b/include/uapi/linux/magic.h @@ -75,5 +75,6 @@ #define ANON_INODE_FS_MAGIC 0x09041934 #define BTRFS_TEST_MAGIC 0x73727279 #define NSFS_MAGIC 0x6e736673 +#define BPF_FS_MAGIC 0xcafe4a11 #endif /* __LINUX_MAGIC_H__ */ diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index e6983be12bd3..13272582eee0 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -1,2 +1,4 @@ obj-y := core.o -obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o hashtab.o arraymap.o helpers.o + +obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o +obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 80864712d2c4..334b1bdd572c 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -92,6 +92,7 @@ struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags) fp->pages = size / PAGE_SIZE; fp->aux = aux; + fp->aux->prog = fp; return fp; } @@ -116,6 +117,7 @@ struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size, memcpy(fp, fp_old, fp_old->pages * PAGE_SIZE); fp->pages = size / PAGE_SIZE; + fp->aux->prog = fp; /* We keep fp->aux from fp_old around in the new * reallocated structure. @@ -726,7 +728,6 @@ void bpf_prog_free(struct bpf_prog *fp) struct bpf_prog_aux *aux = fp->aux; INIT_WORK(&aux->work, bpf_prog_free_deferred); - aux->prog = fp; schedule_work(&aux->work); } EXPORT_SYMBOL_GPL(bpf_prog_free); diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c new file mode 100644 index 000000000000..be6d726e31c9 --- /dev/null +++ b/kernel/bpf/inode.c @@ -0,0 +1,387 @@ +/* + * Minimal file system backend for holding eBPF maps and programs, + * used by bpf(2) object pinning. + * + * Authors: + * + * Daniel Borkmann <daniel@iogearbox.net> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/magic.h> +#include <linux/major.h> +#include <linux/mount.h> +#include <linux/namei.h> +#include <linux/fs.h> +#include <linux/kdev_t.h> +#include <linux/filter.h> +#include <linux/bpf.h> + +enum bpf_type { + BPF_TYPE_UNSPEC = 0, + BPF_TYPE_PROG, + BPF_TYPE_MAP, +}; + +static void *bpf_any_get(void *raw, enum bpf_type type) +{ + switch (type) { + case BPF_TYPE_PROG: + atomic_inc(&((struct bpf_prog *)raw)->aux->refcnt); + break; + case BPF_TYPE_MAP: + atomic_inc(&((struct bpf_map *)raw)->refcnt); + break; + default: + WARN_ON_ONCE(1); + break; + } + + return raw; +} + +static void bpf_any_put(void *raw, enum bpf_type type) +{ + switch (type) { + case BPF_TYPE_PROG: + bpf_prog_put(raw); + break; + case BPF_TYPE_MAP: + bpf_map_put(raw); + break; + default: + WARN_ON_ONCE(1); + break; + } +} + +static void *bpf_fd_probe_obj(u32 ufd, enum bpf_type *type) +{ + void *raw; + + *type = BPF_TYPE_MAP; + raw = bpf_map_get(ufd); + if (IS_ERR(raw)) { + *type = BPF_TYPE_PROG; + raw = bpf_prog_get(ufd); + } + + return raw; +} + +static const struct inode_operations bpf_dir_iops; + +static const struct inode_operations bpf_prog_iops = { }; +static const struct inode_operations bpf_map_iops = { }; + +static struct inode *bpf_get_inode(struct super_block *sb, + const struct inode *dir, + umode_t mode) +{ + struct inode *inode; + + switch (mode & S_IFMT) { + case S_IFDIR: + case S_IFREG: + break; + default: + return ERR_PTR(-EINVAL); + } + + inode = new_inode(sb); + if (!inode) + return ERR_PTR(-ENOSPC); + + inode->i_ino = get_next_ino(); + inode->i_atime = CURRENT_TIME; + inode->i_mtime = inode->i_atime; + inode->i_ctime = inode->i_atime; + + inode_init_owner(inode, dir, mode); + + return inode; +} + +static int bpf_inode_type(const struct inode *inode, enum bpf_type *type) +{ + *type = BPF_TYPE_UNSPEC; + if (inode->i_op == &bpf_prog_iops) + *type = BPF_TYPE_PROG; + else if (inode->i_op == &bpf_map_iops) + *type = BPF_TYPE_MAP; + else + return -EACCES; + + return 0; +} + +static bool bpf_dname_reserved(const struct dentry *dentry) +{ + return strchr(dentry->d_name.name, '.'); +} + +static int bpf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) +{ + struct inode *inode; + + if (bpf_dname_reserved(dentry)) + return -EPERM; + + inode = bpf_get_inode(dir->i_sb, dir, mode | S_IFDIR); + if (IS_ERR(inode)) + return PTR_ERR(inode); + + inode->i_op = &bpf_dir_iops; + inode->i_fop = &simple_dir_operations; + + inc_nlink(inode); + inc_nlink(dir); + + d_instantiate(dentry, inode); + dget(dentry); + + return 0; +} + +static int bpf_mkobj_ops(struct inode *dir, struct dentry *dentry, + umode_t mode, const struct inode_operations *iops) +{ + struct inode *inode; + + if (bpf_dname_reserved(dentry)) + return -EPERM; + + inode = bpf_get_inode(dir->i_sb, dir, mode | S_IFREG); + if (IS_ERR(inode)) + return PTR_ERR(inode); + + inode->i_op = iops; + inode->i_private = dentry->d_fsdata; + + d_instantiate(dentry, inode); + dget(dentry); + + return 0; +} + +static int bpf_mkobj(struct inode *dir, struct dentry *dentry, umode_t mode, + dev_t devt) +{ + enum bpf_type type = MINOR(devt); + + if (MAJOR(devt) != UNNAMED_MAJOR || !S_ISREG(mode) || + dentry->d_fsdata == NULL) + return -EPERM; + + switch (type) { + case BPF_TYPE_PROG: + return bpf_mkobj_ops(dir, dentry, mode, &bpf_prog_iops); + case BPF_TYPE_MAP: + return bpf_mkobj_ops(dir, dentry, mode, &bpf_map_iops); + default: + return -EPERM; + } +} + +static const struct inode_operations bpf_dir_iops = { + .lookup = simple_lookup, + .mknod = bpf_mkobj, + .mkdir = bpf_mkdir, + .rmdir = simple_rmdir, + .unlink = simple_unlink, +}; + +static int bpf_obj_do_pin(const struct filename *pathname, void *raw, + enum bpf_type type) +{ + struct dentry *dentry; + struct inode *dir; + struct path path; + umode_t mode; + dev_t devt; + int ret; + + dentry = kern_path_create(AT_FDCWD, pathname->name, &path, 0); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + + mode = S_IFREG | ((S_IRUSR | S_IWUSR) & ~current_umask()); + devt = MKDEV(UNNAMED_MAJOR, type); + + ret = security_path_mknod(&path, dentry, mode, devt); + if (ret) + goto out; + + dir = d_inode(path.dentry); + if (dir->i_op != &bpf_dir_iops) { + ret = -EPERM; + goto out; + } + + dentry->d_fsdata = raw; + ret = vfs_mknod(dir, dentry, mode, devt); + dentry->d_fsdata = NULL; +out: + done_path_create(&path, dentry); + return ret; +} + +int bpf_obj_pin_user(u32 ufd, const char __user *pathname) +{ + struct filename *pname; + enum bpf_type type; + void *raw; + int ret; + + pname = getname(pathname); + if (IS_ERR(pname)) + return PTR_ERR(pname); + + raw = bpf_fd_probe_obj(ufd, &type); + if (IS_ERR(raw)) { + ret = PTR_ERR(raw); + goto out; + } + + ret = bpf_obj_do_pin(pname, raw, type); + if (ret != 0) + bpf_any_put(raw, type); +out: + putname(pname); + return ret; +} + +static void *bpf_obj_do_get(const struct filename *pathname, + enum bpf_type *type) +{ + struct inode *inode; + struct path path; + void *raw; + int ret; + + ret = kern_path(pathname->name, LOOKUP_FOLLOW, &path); + if (ret) + return ERR_PTR(ret); + + inode = d_backing_inode(path.dentry); + ret = inode_permission(inode, MAY_WRITE); + if (ret) + goto out; + + ret = bpf_inode_type(inode, type); + if (ret) + goto out; + + raw = bpf_any_get(inode->i_private, *type); + touch_atime(&path); + + path_put(&path); + return raw; +out: + path_put(&path); + return ERR_PTR(ret); +} + +int bpf_obj_get_user(const char __user *pathname) +{ + enum bpf_type type = BPF_TYPE_UNSPEC; + struct filename *pname; + int ret = -ENOENT; + void *raw; + + pname = getname(pathname); + if (IS_ERR(pname)) + return PTR_ERR(pname); + + raw = bpf_obj_do_get(pname, &type); + if (IS_ERR(raw)) { + ret = PTR_ERR(raw); + goto out; + } + + if (type == BPF_TYPE_PROG) + ret = bpf_prog_new_fd(raw); + else if (type == BPF_TYPE_MAP) + ret = bpf_map_new_fd(raw); + else + goto out; + + if (ret < 0) + bpf_any_put(raw, type); +out: + putname(pname); + return ret; +} + +static void bpf_evict_inode(struct inode *inode) +{ + enum bpf_type type; + + truncate_inode_pages_final(&inode->i_data); + clear_inode(inode); + + if (!bpf_inode_type(inode, &type)) + bpf_any_put(inode->i_private, type); +} + +static const struct super_operations bpf_super_ops = { + .statfs = simple_statfs, + .drop_inode = generic_delete_inode, + .evict_inode = bpf_evict_inode, +}; + +static int bpf_fill_super(struct super_block *sb, void *data, int silent) +{ + static struct tree_descr bpf_rfiles[] = { { "" } }; + struct inode *inode; + int ret; + + ret = simple_fill_super(sb, BPF_FS_MAGIC, bpf_rfiles); + if (ret) + return ret; + + sb->s_op = &bpf_super_ops; + + inode = sb->s_root->d_inode; + inode->i_op = &bpf_dir_iops; + inode->i_mode &= ~S_IALLUGO; + inode->i_mode |= S_ISVTX | S_IRWXUGO; + + return 0; +} + +static struct dentry *bpf_mount(struct file_system_type *type, int flags, + const char *dev_name, void *data) +{ + return mount_ns(type, flags, current->nsproxy->mnt_ns, bpf_fill_super); +} + +static struct file_system_type bpf_fs_type = { + .owner = THIS_MODULE, + .name = "bpf", + .mount = bpf_mount, + .kill_sb = kill_litter_super, + .fs_flags = FS_USERNS_MOUNT, +}; + +MODULE_ALIAS_FS("bpf"); + +static int __init bpf_init(void) +{ + int ret; + + ret = sysfs_create_mount_point(fs_kobj, "bpf"); + if (ret) + return ret; + + ret = register_filesystem(&bpf_fs_type); + if (ret) + sysfs_remove_mount_point(fs_kobj, "bpf"); + + return ret; +} +fs_initcall(bpf_init); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 687dd6ca574d..0d3313d02a7e 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -111,6 +111,12 @@ static const struct file_operations bpf_map_fops = { .release = bpf_map_release, }; +int bpf_map_new_fd(struct bpf_map *map) +{ + return anon_inode_getfd("bpf-map", &bpf_map_fops, map, + O_RDWR | O_CLOEXEC); +} + /* helper macro to check that unused fields 'union bpf_attr' are zero */ #define CHECK_ATTR(CMD) \ memchr_inv((void *) &attr->CMD##_LAST_FIELD + \ @@ -141,8 +147,7 @@ static int map_create(union bpf_attr *attr) if (err) goto free_map; - err = anon_inode_getfd("bpf-map", &bpf_map_fops, map, O_RDWR | O_CLOEXEC); - + err = bpf_map_new_fd(map); if (err < 0) /* failed to allocate fd */ goto free_map; @@ -157,19 +162,29 @@ free_map: /* if error is returned, fd is released. * On success caller should complete fd access with matching fdput() */ -struct bpf_map *bpf_map_get(struct fd f) +struct bpf_map *__bpf_map_get(struct fd f) { - struct bpf_map *map; - if (!f.file) return ERR_PTR(-EBADF); - if (f.file->f_op != &bpf_map_fops) { fdput(f); return ERR_PTR(-EINVAL); } - map = f.file->private_data; + return f.file->private_data; +} + +struct bpf_map *bpf_map_get(u32 ufd) +{ + struct fd f = fdget(ufd); + struct bpf_map *map; + + map = __bpf_map_get(f); + if (IS_ERR(map)) + return map; + + atomic_inc(&map->refcnt); + fdput(f); return map; } @@ -197,7 +212,7 @@ static int map_lookup_elem(union bpf_attr *attr) return -EINVAL; f = fdget(ufd); - map = bpf_map_get(f); + map = __bpf_map_get(f); if (IS_ERR(map)) return PTR_ERR(map); @@ -256,7 +271,7 @@ static int map_update_elem(union bpf_attr *attr) return -EINVAL; f = fdget(ufd); - map = bpf_map_get(f); + map = __bpf_map_get(f); if (IS_ERR(map)) return PTR_ERR(map); @@ -309,7 +324,7 @@ static int map_delete_elem(union bpf_attr *attr) return -EINVAL; f = fdget(ufd); - map = bpf_map_get(f); + map = __bpf_map_get(f); if (IS_ERR(map)) return PTR_ERR(map); @@ -350,7 +365,7 @@ static int map_get_next_key(union bpf_attr *attr) return -EINVAL; f = fdget(ufd); - map = bpf_map_get(f); + map = __bpf_map_get(f); if (IS_ERR(map)) return PTR_ERR(map); @@ -498,7 +513,7 @@ static void bpf_prog_uncharge_memlock(struct bpf_prog *prog) free_uid(user); } -static void __prog_put_rcu(struct rcu_head *rcu) +static void __prog_put_common(struct rcu_head *rcu) { struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu); @@ -510,19 +525,14 @@ static void __prog_put_rcu(struct rcu_head *rcu) /* version of bpf_prog_put() that is called after a grace period */ void bpf_prog_put_rcu(struct bpf_prog *prog) { - if (atomic_dec_and_test(&prog->aux->refcnt)) { - prog->aux->prog = prog; - call_rcu(&prog->aux->rcu, __prog_put_rcu); - } + if (atomic_dec_and_test(&prog->aux->refcnt)) + call_rcu(&prog->aux->rcu, __prog_put_common); } void bpf_prog_put(struct bpf_prog *prog) { - if (atomic_dec_and_test(&prog->aux->refcnt)) { - free_used_maps(prog->aux); - bpf_prog_uncharge_memlock(prog); - bpf_prog_free(prog); - } + if (atomic_dec_and_test(&prog->aux->refcnt)) + __prog_put_common(&prog->aux->rcu); } EXPORT_SYMBOL_GPL(bpf_prog_put); @@ -538,21 +548,22 @@ static const struct file_operations bpf_prog_fops = { .release = bpf_prog_release, }; -static struct bpf_prog *get_prog(struct fd f) +int bpf_prog_new_fd(struct bpf_prog *prog) { - struct bpf_prog *prog; + return anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog, + O_RDWR | O_CLOEXEC); +} +static struct bpf_prog *__bpf_prog_get(struct fd f) +{ if (!f.file) return ERR_PTR(-EBADF); - if (f.file->f_op != &bpf_prog_fops) { fdput(f); return ERR_PTR(-EINVAL); } - prog = f.file->private_data; - - return prog; + return f.file->private_data; } /* called by sockets/tracing/seccomp before attaching program to an event @@ -563,13 +574,13 @@ struct bpf_prog *bpf_prog_get(u32 ufd) struct fd f = fdget(ufd); struct bpf_prog *prog; - prog = get_prog(f); - + prog = __bpf_prog_get(f); if (IS_ERR(prog)) return prog; atomic_inc(&prog->aux->refcnt); fdput(f); + return prog; } EXPORT_SYMBOL_GPL(bpf_prog_get); @@ -647,7 +658,7 @@ static int bpf_prog_load(union bpf_attr *attr) if (err < 0) goto free_used_maps; - err = anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog, O_RDWR | O_CLOEXEC); + err = bpf_prog_new_fd(prog); if (err < 0) /* failed to allocate fd */ goto free_used_maps; @@ -663,6 +674,24 @@ free_prog_nouncharge: return err; } +#define BPF_OBJ_LAST_FIELD bpf_fd + +static int bpf_obj_pin(const union bpf_attr *attr) +{ + if (CHECK_ATTR(BPF_OBJ)) + return -EINVAL; + + return bpf_obj_pin_user(attr->bpf_fd, u64_to_ptr(attr->pathname)); +} + +static int bpf_obj_get(const union bpf_attr *attr) +{ + if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0) + return -EINVAL; + + return bpf_obj_get_user(u64_to_ptr(attr->pathname)); +} + SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) { union bpf_attr attr = {}; @@ -723,6 +752,12 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz case BPF_PROG_LOAD: err = bpf_prog_load(&attr); break; + case BPF_OBJ_PIN: + err = bpf_obj_pin(&attr); + break; + case BPF_OBJ_GET: + err = bpf_obj_get(&attr); + break; default: err = -EINVAL; break; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index b56cf51f8d42..fdc88c5a60e3 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1989,8 +1989,7 @@ static int replace_map_fd_with_map_ptr(struct verifier_env *env) } f = fdget(insn->imm); - - map = bpf_map_get(f); + map = __bpf_map_get(f); if (IS_ERR(map)) { verbose("fd %d is not pointing to valid bpf_map\n", insn->imm); diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index b30514514e37..79b4596b5f9a 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -4,6 +4,7 @@ obj- := dummy.o # List of programs to build hostprogs-y := test_verifier test_maps hostprogs-y += sock_example +hostprogs-y += fds_example hostprogs-y += sockex1 hostprogs-y += sockex2 hostprogs-y += sockex3 @@ -19,6 +20,7 @@ hostprogs-y += lathist test_verifier-objs := test_verifier.o libbpf.o test_maps-objs := test_maps.o libbpf.o sock_example-objs := sock_example.o libbpf.o +fds_example-objs := bpf_load.o libbpf.o fds_example.o sockex1-objs := bpf_load.o libbpf.o sockex1_user.o sockex2-objs := bpf_load.o libbpf.o sockex2_user.o sockex3-objs := bpf_load.o libbpf.o sockex3_user.o @@ -49,6 +51,7 @@ always += lathist_kern.o HOSTCFLAGS += -I$(objtree)/usr/include HOSTCFLAGS_bpf_load.o += -I$(objtree)/usr/include -Wno-unused-variable +HOSTLOADLIBES_fds_example += -lelf HOSTLOADLIBES_sockex1 += -lelf HOSTLOADLIBES_sockex2 += -lelf HOSTLOADLIBES_sockex3 += -lelf diff --git a/samples/bpf/fds_example.c b/samples/bpf/fds_example.c new file mode 100644 index 000000000000..e2fd16c3d0f0 --- /dev/null +++ b/samples/bpf/fds_example.c @@ -0,0 +1,183 @@ +#include <linux/unistd.h> +#include <linux/bpf.h> + +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <unistd.h> +#include <string.h> +#include <assert.h> +#include <errno.h> + +#include <sys/types.h> +#include <sys/socket.h> + +#include "bpf_load.h" +#include "libbpf.h" + +#define BPF_F_PIN (1 << 0) +#define BPF_F_GET (1 << 1) +#define BPF_F_PIN_GET (BPF_F_PIN | BPF_F_GET) + +#define BPF_F_KEY (1 << 2) +#define BPF_F_VAL (1 << 3) +#define BPF_F_KEY_VAL (BPF_F_KEY | BPF_F_VAL) + +#define BPF_M_UNSPEC 0 +#define BPF_M_MAP 1 +#define BPF_M_PROG 2 + +static void usage(void) +{ + printf("Usage: fds_example [...]\n"); + printf(" -F <file> File to pin/get object\n"); + printf(" -P |- pin object\n"); + printf(" -G `- get object\n"); + printf(" -m eBPF map mode\n"); + printf(" -k <key> |- map key\n"); + printf(" -v <value> `- map value\n"); + printf(" -p eBPF prog mode\n"); + printf(" -o <object> `- object file\n"); + printf(" -h Display this help.\n"); +} + +static int bpf_map_create(void) +{ + return bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(uint32_t), + sizeof(uint32_t), 1024); +} + +static int bpf_prog_create(const char *object) +{ + static const struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }; + + if (object) { + assert(!load_bpf_file((char *)object)); + return prog_fd[0]; + } else { + return bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, + insns, sizeof(insns), "GPL", 0); + } +} + +static int bpf_do_map(const char *file, uint32_t flags, uint32_t key, + uint32_t value) +{ + int fd, ret; + + if (flags & BPF_F_PIN) { + fd = bpf_map_create(); + printf("bpf: map fd:%d (%s)\n", fd, strerror(errno)); + assert(fd > 0); + + ret = bpf_obj_pin(fd, file); + printf("bpf: pin ret:(%d,%s)\n", ret, strerror(errno)); + assert(ret == 0); + } else { + fd = bpf_obj_get(file); + printf("bpf: get fd:%d (%s)\n", fd, strerror(errno)); + assert(fd > 0); + } + + if ((flags & BPF_F_KEY_VAL) == BPF_F_KEY_VAL) { + ret = bpf_update_elem(fd, &key, &value, 0); + printf("bpf: fd:%d u->(%u:%u) ret:(%d,%s)\n", fd, key, value, + ret, strerror(errno)); + assert(ret == 0); + } else if (flags & BPF_F_KEY) { + ret = bpf_lookup_elem(fd, &key, &value); + printf("bpf: fd:%d l->(%u):%u ret:(%d,%s)\n", fd, key, value, + ret, strerror(errno)); + assert(ret == 0); + } + + return 0; +} + +static int bpf_do_prog(const char *file, uint32_t flags, const char *object) +{ + int fd, sock, ret; + + if (flags & BPF_F_PIN) { + fd = bpf_prog_create(object); + printf("bpf: prog fd:%d (%s)\n", fd, strerror(errno)); + assert(fd > 0); + + ret = bpf_obj_pin(fd, file); + printf("bpf: pin ret:(%d,%s)\n", ret, strerror(errno)); + assert(ret == 0); + } else { + fd = bpf_obj_get(file); + printf("bpf: get fd:%d (%s)\n", fd, strerror(errno)); + assert(fd > 0); + } + + sock = open_raw_sock("lo"); + assert(sock > 0); + + ret = setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, &fd, sizeof(fd)); + printf("bpf: sock:%d <- fd:%d attached ret:(%d,%s)\n", sock, fd, + ret, strerror(errno)); + assert(ret == 0); + + return 0; +} + +int main(int argc, char **argv) +{ + const char *file = NULL, *object = NULL; + uint32_t key = 0, value = 0, flags = 0; + int opt, mode = BPF_M_UNSPEC; + + while ((opt = getopt(argc, argv, "F:PGmk:v:po:")) != -1) { + switch (opt) { + /* General args */ + case 'F': + file = optarg; + break; + case 'P': + flags |= BPF_F_PIN; + break; + case 'G': + flags |= BPF_F_GET; + break; + /* Map-related args */ + case 'm': + mode = BPF_M_MAP; + break; + case 'k': + key = strtoul(optarg, NULL, 0); + flags |= BPF_F_KEY; + break; + case 'v': + value = strtoul(optarg, NULL, 0); + flags |= BPF_F_VAL; + break; + /* Prog-related args */ + case 'p': + mode = BPF_M_PROG; + break; + case 'o': + object = optarg; + break; + default: + goto out; + } + } + + if (!(flags & BPF_F_PIN_GET) || !file) + goto out; + + switch (mode) { + case BPF_M_MAP: + return bpf_do_map(file, flags, key, value); + case BPF_M_PROG: + return bpf_do_prog(file, flags, object); + } +out: + usage(); + return -1; +} diff --git a/samples/bpf/libbpf.c b/samples/bpf/libbpf.c index 7e1efa7e2ed7..65a8d48d2799 100644 --- a/samples/bpf/libbpf.c +++ b/samples/bpf/libbpf.c @@ -103,6 +103,25 @@ int bpf_prog_load(enum bpf_prog_type prog_type, return syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr)); } +int bpf_obj_pin(int fd, const char *pathname) +{ + union bpf_attr attr = { + .pathname = ptr_to_u64((void *)pathname), + .bpf_fd = fd, + }; + + return syscall(__NR_bpf, BPF_OBJ_PIN, &attr, sizeof(attr)); +} + +int bpf_obj_get(const char *pathname) +{ + union bpf_attr attr = { + .pathname = ptr_to_u64((void *)pathname), + }; + + return syscall(__NR_bpf, BPF_OBJ_GET, &attr, sizeof(attr)); +} + int open_raw_sock(const char *name) { struct sockaddr_ll sll; diff --git a/samples/bpf/libbpf.h b/samples/bpf/libbpf.h index b7f63c70b4a2..014aacf916e4 100644 --- a/samples/bpf/libbpf.h +++ b/samples/bpf/libbpf.h @@ -15,6 +15,9 @@ int bpf_prog_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns, int insn_len, const char *license, int kern_version); +int bpf_obj_pin(int fd, const char *pathname); +int bpf_obj_get(const char *pathname); + #define LOG_BUF_SIZE 65536 extern char bpf_log_buf[LOG_BUF_SIZE]; |