summaryrefslogtreecommitdiffstats
path: root/kernel/bpf/btf.c
diff options
context:
space:
mode:
authorMartin KaFai Lau <kafai@fb.com>2018-05-04 23:49:51 +0200
committerDaniel Borkmann <daniel@iogearbox.net>2018-05-09 17:25:13 +0200
commit78958fca7ead2f81b60a6827881c4866d1ed0c52 (patch)
tree541a6090ce06d7bc6f708f5a70a0cd4bff187ac2 /kernel/bpf/btf.c
parentbpf: btf: Avoid WARN_ON when CONFIG_REFCOUNT_FULL=y (diff)
downloadlinux-78958fca7ead2f81b60a6827881c4866d1ed0c52.tar.xz
linux-78958fca7ead2f81b60a6827881c4866d1ed0c52.zip
bpf: btf: Introduce BTF ID
This patch gives an ID to each loaded BTF. The ID is allocated by the idr like the existing prog-id and map-id. The bpf_put(map->btf) is moved to __bpf_map_put() so that the userspace can stop seeing the BTF ID ASAP when the last BTF refcnt is gone. It also makes BTF accessible from userspace through the 1. new BPF_BTF_GET_FD_BY_ID command. It is limited to CAP_SYS_ADMIN which is inline with the BPF_BTF_LOAD cmd and the existing BPF_[MAP|PROG]_GET_FD_BY_ID cmd. 2. new btf_id (and btf_key_id + btf_value_id) in "struct bpf_map_info" Once the BTF ID handler is accessible from userspace, freeing a BTF object has to go through a rcu period. The BPF_BTF_GET_FD_BY_ID cmd can then be done under a rcu_read_lock() instead of taking spin_lock. [Note: A similar rcu usage can be done to the existing bpf_prog_get_fd_by_id() in a follow up patch] When processing the BPF_BTF_GET_FD_BY_ID cmd, refcount_inc_not_zero() is needed because the BTF object could be already in the rcu dead row . btf_get() is removed since its usage is currently limited to btf.c alone. refcount_inc() is used directly instead. Signed-off-by: Martin KaFai Lau <kafai@fb.com> Acked-by: Alexei Starovoitov <ast@fb.com> Acked-by: Song Liu <songliubraving@fb.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Diffstat (limited to 'kernel/bpf/btf.c')
-rw-r--r--kernel/bpf/btf.c108
1 files changed, 98 insertions, 10 deletions
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index fa0dce0452e7..40950b6bf395 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -11,6 +11,7 @@
#include <linux/file.h>
#include <linux/uaccess.h>
#include <linux/kernel.h>
+#include <linux/idr.h>
#include <linux/bpf_verifier.h>
#include <linux/btf.h>
@@ -179,6 +180,9 @@
i < btf_type_vlen(struct_type); \
i++, member++)
+static DEFINE_IDR(btf_idr);
+static DEFINE_SPINLOCK(btf_idr_lock);
+
struct btf {
union {
struct btf_header *hdr;
@@ -193,6 +197,8 @@ struct btf {
u32 types_size;
u32 data_size;
refcount_t refcnt;
+ u32 id;
+ struct rcu_head rcu;
};
enum verifier_phase {
@@ -598,6 +604,42 @@ static int btf_add_type(struct btf_verifier_env *env, struct btf_type *t)
return 0;
}
+static int btf_alloc_id(struct btf *btf)
+{
+ int id;
+
+ idr_preload(GFP_KERNEL);
+ spin_lock_bh(&btf_idr_lock);
+ id = idr_alloc_cyclic(&btf_idr, btf, 1, INT_MAX, GFP_ATOMIC);
+ if (id > 0)
+ btf->id = id;
+ spin_unlock_bh(&btf_idr_lock);
+ idr_preload_end();
+
+ if (WARN_ON_ONCE(!id))
+ return -ENOSPC;
+
+ return id > 0 ? 0 : id;
+}
+
+static void btf_free_id(struct btf *btf)
+{
+ unsigned long flags;
+
+ /*
+ * In map-in-map, calling map_delete_elem() on outer
+ * map will call bpf_map_put on the inner map.
+ * It will then eventually call btf_free_id()
+ * on the inner map. Some of the map_delete_elem()
+ * implementation may have irq disabled, so
+ * we need to use the _irqsave() version instead
+ * of the _bh() version.
+ */
+ spin_lock_irqsave(&btf_idr_lock, flags);
+ idr_remove(&btf_idr, btf->id);
+ spin_unlock_irqrestore(&btf_idr_lock, flags);
+}
+
static void btf_free(struct btf *btf)
{
kvfree(btf->types);
@@ -607,15 +649,19 @@ static void btf_free(struct btf *btf)
kfree(btf);
}
-static void btf_get(struct btf *btf)
+static void btf_free_rcu(struct rcu_head *rcu)
{
- refcount_inc(&btf->refcnt);
+ struct btf *btf = container_of(rcu, struct btf, rcu);
+
+ btf_free(btf);
}
void btf_put(struct btf *btf)
{
- if (btf && refcount_dec_and_test(&btf->refcnt))
- btf_free(btf);
+ if (btf && refcount_dec_and_test(&btf->refcnt)) {
+ btf_free_id(btf);
+ call_rcu(&btf->rcu, btf_free_rcu);
+ }
}
static int env_resolve_init(struct btf_verifier_env *env)
@@ -2006,10 +2052,15 @@ const struct file_operations btf_fops = {
.release = btf_release,
};
+static int __btf_new_fd(struct btf *btf)
+{
+ return anon_inode_getfd("btf", &btf_fops, btf, O_RDONLY | O_CLOEXEC);
+}
+
int btf_new_fd(const union bpf_attr *attr)
{
struct btf *btf;
- int fd;
+ int ret;
btf = btf_parse(u64_to_user_ptr(attr->btf),
attr->btf_size, attr->btf_log_level,
@@ -2018,12 +2069,23 @@ int btf_new_fd(const union bpf_attr *attr)
if (IS_ERR(btf))
return PTR_ERR(btf);
- fd = anon_inode_getfd("btf", &btf_fops, btf,
- O_RDONLY | O_CLOEXEC);
- if (fd < 0)
+ ret = btf_alloc_id(btf);
+ if (ret) {
+ btf_free(btf);
+ return ret;
+ }
+
+ /*
+ * The BTF ID is published to the userspace.
+ * All BTF free must go through call_rcu() from
+ * now on (i.e. free by calling btf_put()).
+ */
+
+ ret = __btf_new_fd(btf);
+ if (ret < 0)
btf_put(btf);
- return fd;
+ return ret;
}
struct btf *btf_get_by_fd(int fd)
@@ -2042,7 +2104,7 @@ struct btf *btf_get_by_fd(int fd)
}
btf = f.file->private_data;
- btf_get(btf);
+ refcount_inc(&btf->refcnt);
fdput(f);
return btf;
@@ -2062,3 +2124,29 @@ int btf_get_info_by_fd(const struct btf *btf,
return 0;
}
+
+int btf_get_fd_by_id(u32 id)
+{
+ struct btf *btf;
+ int fd;
+
+ rcu_read_lock();
+ btf = idr_find(&btf_idr, id);
+ if (!btf || !refcount_inc_not_zero(&btf->refcnt))
+ btf = ERR_PTR(-ENOENT);
+ rcu_read_unlock();
+
+ if (IS_ERR(btf))
+ return PTR_ERR(btf);
+
+ fd = __btf_new_fd(btf);
+ if (fd < 0)
+ btf_put(btf);
+
+ return fd;
+}
+
+u32 btf_id(const struct btf *btf)
+{
+ return btf->id;
+}