summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/bpf-cgroup.h41
-rw-r--r--include/linux/bpf.h10
-rw-r--r--include/uapi/linux/bpf.h22
-rw-r--r--kernel/bpf/cgroup.c395
-rw-r--r--kernel/bpf/syscall.c119
-rw-r--r--kernel/cgroup/cgroup.c41
-rw-r--r--tools/include/uapi/linux/bpf.h22
-rw-r--r--tools/lib/bpf/bpf.c34
-rw-r--r--tools/lib/bpf/bpf.h19
-rw-r--r--tools/lib/bpf/libbpf.c46
-rw-r--r--tools/lib/bpf/libbpf.h8
-rw-r--r--tools/lib/bpf/libbpf.map4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_link.c244
-rw-r--r--tools/testing/selftests/bpf/progs/test_cgroup_link.c24
14 files changed, 930 insertions, 99 deletions
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index a7cd5c7a2509..c11b413d5b1a 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -51,9 +51,18 @@ struct bpf_cgroup_storage {
struct rcu_head rcu;
};
+struct bpf_cgroup_link {
+ struct bpf_link link;
+ struct cgroup *cgroup;
+ enum bpf_attach_type type;
+};
+
+extern const struct bpf_link_ops bpf_cgroup_link_lops;
+
struct bpf_prog_list {
struct list_head node;
struct bpf_prog *prog;
+ struct bpf_cgroup_link *link;
struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE];
};
@@ -84,20 +93,27 @@ struct cgroup_bpf {
int cgroup_bpf_inherit(struct cgroup *cgrp);
void cgroup_bpf_offline(struct cgroup *cgrp);
-int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
- struct bpf_prog *replace_prog,
+int __cgroup_bpf_attach(struct cgroup *cgrp,
+ struct bpf_prog *prog, struct bpf_prog *replace_prog,
+ struct bpf_cgroup_link *link,
enum bpf_attach_type type, u32 flags);
int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
+ struct bpf_cgroup_link *link,
enum bpf_attach_type type);
+int __cgroup_bpf_replace(struct cgroup *cgrp, struct bpf_cgroup_link *link,
+ struct bpf_prog *new_prog);
int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
union bpf_attr __user *uattr);
/* Wrapper for __cgroup_bpf_*() protected by cgroup_mutex */
-int cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
- struct bpf_prog *replace_prog, enum bpf_attach_type type,
+int cgroup_bpf_attach(struct cgroup *cgrp,
+ struct bpf_prog *prog, struct bpf_prog *replace_prog,
+ struct bpf_cgroup_link *link, enum bpf_attach_type type,
u32 flags);
int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
- enum bpf_attach_type type, u32 flags);
+ enum bpf_attach_type type);
+int cgroup_bpf_replace(struct bpf_link *link, struct bpf_prog *old_prog,
+ struct bpf_prog *new_prog);
int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
union bpf_attr __user *uattr);
@@ -332,11 +348,13 @@ int cgroup_bpf_prog_attach(const union bpf_attr *attr,
enum bpf_prog_type ptype, struct bpf_prog *prog);
int cgroup_bpf_prog_detach(const union bpf_attr *attr,
enum bpf_prog_type ptype);
+int cgroup_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
int cgroup_bpf_prog_query(const union bpf_attr *attr,
union bpf_attr __user *uattr);
#else
struct bpf_prog;
+struct bpf_link;
struct cgroup_bpf {};
static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; }
static inline void cgroup_bpf_offline(struct cgroup *cgrp) {}
@@ -354,6 +372,19 @@ static inline int cgroup_bpf_prog_detach(const union bpf_attr *attr,
return -EINVAL;
}
+static inline int cgroup_bpf_link_attach(const union bpf_attr *attr,
+ struct bpf_prog *prog)
+{
+ return -EINVAL;
+}
+
+static inline int cgroup_bpf_replace(struct bpf_link *link,
+ struct bpf_prog *old_prog,
+ struct bpf_prog *new_prog)
+{
+ return -EINVAL;
+}
+
static inline int cgroup_bpf_prog_query(const union bpf_attr *attr,
union bpf_attr __user *uattr)
{
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 3bde59a8453b..56254d880293 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1082,15 +1082,23 @@ extern int sysctl_unprivileged_bpf_disabled;
int bpf_map_new_fd(struct bpf_map *map, int flags);
int bpf_prog_new_fd(struct bpf_prog *prog);
-struct bpf_link;
+struct bpf_link {
+ atomic64_t refcnt;
+ const struct bpf_link_ops *ops;
+ struct bpf_prog *prog;
+ struct work_struct work;
+};
struct bpf_link_ops {
void (*release)(struct bpf_link *link);
void (*dealloc)(struct bpf_link *link);
+
};
void bpf_link_init(struct bpf_link *link, const struct bpf_link_ops *ops,
struct bpf_prog *prog);
+void bpf_link_cleanup(struct bpf_link *link, struct file *link_file,
+ int link_fd);
void bpf_link_inc(struct bpf_link *link);
void bpf_link_put(struct bpf_link *link);
int bpf_link_new_fd(struct bpf_link *link);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 9f786a5a44ac..2e29a671d67e 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -111,6 +111,8 @@ enum bpf_cmd {
BPF_MAP_LOOKUP_AND_DELETE_BATCH,
BPF_MAP_UPDATE_BATCH,
BPF_MAP_DELETE_BATCH,
+ BPF_LINK_CREATE,
+ BPF_LINK_UPDATE,
};
enum bpf_map_type {
@@ -541,7 +543,7 @@ union bpf_attr {
__u32 prog_cnt;
} query;
- struct {
+ struct { /* anonymous struct used by BPF_RAW_TRACEPOINT_OPEN command */
__u64 name;
__u32 prog_fd;
} raw_tracepoint;
@@ -569,6 +571,24 @@ union bpf_attr {
__u64 probe_offset; /* output: probe_offset */
__u64 probe_addr; /* output: probe_addr */
} task_fd_query;
+
+ struct { /* struct used by BPF_LINK_CREATE command */
+ __u32 prog_fd; /* eBPF program to attach */
+ __u32 target_fd; /* object to attach to */
+ __u32 attach_type; /* attach type */
+ __u32 flags; /* extra flags */
+ } link_create;
+
+ struct { /* struct used by BPF_LINK_UPDATE command */
+ __u32 link_fd; /* link fd */
+ /* new program fd to update link with */
+ __u32 new_prog_fd;
+ __u32 flags; /* extra flags */
+ /* expected link's program fd; is specified only if
+ * BPF_F_REPLACE flag is set in flags */
+ __u32 old_prog_fd;
+ } link_update;
+
} __attribute__((aligned(8)));
/* The description below is an attempt at providing documentation to eBPF
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 9c8472823a7f..80676fc00d81 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -80,6 +80,17 @@ static void bpf_cgroup_storages_unlink(struct bpf_cgroup_storage *storages[])
bpf_cgroup_storage_unlink(storages[stype]);
}
+/* Called when bpf_cgroup_link is auto-detached from dying cgroup.
+ * It drops cgroup and bpf_prog refcounts, and marks bpf_link as defunct. It
+ * doesn't free link memory, which will eventually be done by bpf_link's
+ * release() callback, when its last FD is closed.
+ */
+static void bpf_cgroup_link_auto_detach(struct bpf_cgroup_link *link)
+{
+ cgroup_put(link->cgroup);
+ link->cgroup = NULL;
+}
+
/**
* cgroup_bpf_release() - put references of all bpf programs and
* release all cgroup bpf data
@@ -100,7 +111,10 @@ static void cgroup_bpf_release(struct work_struct *work)
list_for_each_entry_safe(pl, tmp, progs, node) {
list_del(&pl->node);
- bpf_prog_put(pl->prog);
+ if (pl->prog)
+ bpf_prog_put(pl->prog);
+ if (pl->link)
+ bpf_cgroup_link_auto_detach(pl->link);
bpf_cgroup_storages_unlink(pl->storage);
bpf_cgroup_storages_free(pl->storage);
kfree(pl);
@@ -134,6 +148,18 @@ static void cgroup_bpf_release_fn(struct percpu_ref *ref)
queue_work(system_wq, &cgrp->bpf.release_work);
}
+/* Get underlying bpf_prog of bpf_prog_list entry, regardless if it's through
+ * link or direct prog.
+ */
+static struct bpf_prog *prog_list_prog(struct bpf_prog_list *pl)
+{
+ if (pl->prog)
+ return pl->prog;
+ if (pl->link)
+ return pl->link->link.prog;
+ return NULL;
+}
+
/* count number of elements in the list.
* it's slow but the list cannot be long
*/
@@ -143,7 +169,7 @@ static u32 prog_list_length(struct list_head *head)
u32 cnt = 0;
list_for_each_entry(pl, head, node) {
- if (!pl->prog)
+ if (!prog_list_prog(pl))
continue;
cnt++;
}
@@ -212,11 +238,11 @@ static int compute_effective_progs(struct cgroup *cgrp,
continue;
list_for_each_entry(pl, &p->bpf.progs[type], node) {
- if (!pl->prog)
+ if (!prog_list_prog(pl))
continue;
item = &progs->items[cnt];
- item->prog = pl->prog;
+ item->prog = prog_list_prog(pl);
bpf_cgroup_storages_assign(item->cgroup_storage,
pl->storage);
cnt++;
@@ -333,19 +359,60 @@ cleanup:
#define BPF_CGROUP_MAX_PROGS 64
+static struct bpf_prog_list *find_attach_entry(struct list_head *progs,
+ struct bpf_prog *prog,
+ struct bpf_cgroup_link *link,
+ struct bpf_prog *replace_prog,
+ bool allow_multi)
+{
+ struct bpf_prog_list *pl;
+
+ /* single-attach case */
+ if (!allow_multi) {
+ if (list_empty(progs))
+ return NULL;
+ return list_first_entry(progs, typeof(*pl), node);
+ }
+
+ list_for_each_entry(pl, progs, node) {
+ if (prog && pl->prog == prog)
+ /* disallow attaching the same prog twice */
+ return ERR_PTR(-EINVAL);
+ if (link && pl->link == link)
+ /* disallow attaching the same link twice */
+ return ERR_PTR(-EINVAL);
+ }
+
+ /* direct prog multi-attach w/ replacement case */
+ if (replace_prog) {
+ list_for_each_entry(pl, progs, node) {
+ if (pl->prog == replace_prog)
+ /* a match found */
+ return pl;
+ }
+ /* prog to replace not found for cgroup */
+ return ERR_PTR(-ENOENT);
+ }
+
+ return NULL;
+}
+
/**
- * __cgroup_bpf_attach() - Attach the program to a cgroup, and
+ * __cgroup_bpf_attach() - Attach the program or the link to a cgroup, and
* propagate the change to descendants
* @cgrp: The cgroup which descendants to traverse
* @prog: A program to attach
+ * @link: A link to attach
* @replace_prog: Previously attached program to replace if BPF_F_REPLACE is set
* @type: Type of attach operation
* @flags: Option flags
*
+ * Exactly one of @prog or @link can be non-null.
* Must be called with cgroup_mutex held.
*/
-int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
- struct bpf_prog *replace_prog,
+int __cgroup_bpf_attach(struct cgroup *cgrp,
+ struct bpf_prog *prog, struct bpf_prog *replace_prog,
+ struct bpf_cgroup_link *link,
enum bpf_attach_type type, u32 flags)
{
u32 saved_flags = (flags & (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI));
@@ -353,13 +420,19 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
struct bpf_prog *old_prog = NULL;
struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE],
*old_storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {NULL};
- struct bpf_prog_list *pl, *replace_pl = NULL;
+ struct bpf_prog_list *pl;
int err;
if (((flags & BPF_F_ALLOW_OVERRIDE) && (flags & BPF_F_ALLOW_MULTI)) ||
((flags & BPF_F_REPLACE) && !(flags & BPF_F_ALLOW_MULTI)))
/* invalid combination */
return -EINVAL;
+ if (link && (prog || replace_prog))
+ /* only either link or prog/replace_prog can be specified */
+ return -EINVAL;
+ if (!!replace_prog != !!(flags & BPF_F_REPLACE))
+ /* replace_prog implies BPF_F_REPLACE, and vice versa */
+ return -EINVAL;
if (!hierarchy_allows_attach(cgrp, type))
return -EPERM;
@@ -374,26 +447,15 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
if (prog_list_length(progs) >= BPF_CGROUP_MAX_PROGS)
return -E2BIG;
- if (flags & BPF_F_ALLOW_MULTI) {
- list_for_each_entry(pl, progs, node) {
- if (pl->prog == prog)
- /* disallow attaching the same prog twice */
- return -EINVAL;
- if (pl->prog == replace_prog)
- replace_pl = pl;
- }
- if ((flags & BPF_F_REPLACE) && !replace_pl)
- /* prog to replace not found for cgroup */
- return -ENOENT;
- } else if (!list_empty(progs)) {
- replace_pl = list_first_entry(progs, typeof(*pl), node);
- }
+ pl = find_attach_entry(progs, prog, link, replace_prog,
+ flags & BPF_F_ALLOW_MULTI);
+ if (IS_ERR(pl))
+ return PTR_ERR(pl);
- if (bpf_cgroup_storages_alloc(storage, prog))
+ if (bpf_cgroup_storages_alloc(storage, prog ? : link->link.prog))
return -ENOMEM;
- if (replace_pl) {
- pl = replace_pl;
+ if (pl) {
old_prog = pl->prog;
bpf_cgroup_storages_unlink(pl->storage);
bpf_cgroup_storages_assign(old_storage, pl->storage);
@@ -407,6 +469,7 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
}
pl->prog = prog;
+ pl->link = link;
bpf_cgroup_storages_assign(pl->storage, storage);
cgrp->bpf.flags[type] = saved_flags;
@@ -414,80 +477,173 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
if (err)
goto cleanup;
- static_branch_inc(&cgroup_bpf_enabled_key);
bpf_cgroup_storages_free(old_storage);
- if (old_prog) {
+ if (old_prog)
bpf_prog_put(old_prog);
- static_branch_dec(&cgroup_bpf_enabled_key);
- }
- bpf_cgroup_storages_link(storage, cgrp, type);
+ else
+ static_branch_inc(&cgroup_bpf_enabled_key);
+ bpf_cgroup_storages_link(pl->storage, cgrp, type);
return 0;
cleanup:
- /* and cleanup the prog list */
- pl->prog = old_prog;
+ if (old_prog) {
+ pl->prog = old_prog;
+ pl->link = NULL;
+ }
bpf_cgroup_storages_free(pl->storage);
bpf_cgroup_storages_assign(pl->storage, old_storage);
bpf_cgroup_storages_link(pl->storage, cgrp, type);
- if (!replace_pl) {
+ if (!old_prog) {
list_del(&pl->node);
kfree(pl);
}
return err;
}
+/* Swap updated BPF program for given link in effective program arrays across
+ * all descendant cgroups. This function is guaranteed to succeed.
+ */
+static void replace_effective_prog(struct cgroup *cgrp,
+ enum bpf_attach_type type,
+ struct bpf_cgroup_link *link)
+{
+ struct bpf_prog_array_item *item;
+ struct cgroup_subsys_state *css;
+ struct bpf_prog_array *progs;
+ struct bpf_prog_list *pl;
+ struct list_head *head;
+ struct cgroup *cg;
+ int pos;
+
+ css_for_each_descendant_pre(css, &cgrp->self) {
+ struct cgroup *desc = container_of(css, struct cgroup, self);
+
+ if (percpu_ref_is_zero(&desc->bpf.refcnt))
+ continue;
+
+ /* find position of link in effective progs array */
+ for (pos = 0, cg = desc; cg; cg = cgroup_parent(cg)) {
+ if (pos && !(cg->bpf.flags[type] & BPF_F_ALLOW_MULTI))
+ continue;
+
+ head = &cg->bpf.progs[type];
+ list_for_each_entry(pl, head, node) {
+ if (!prog_list_prog(pl))
+ continue;
+ if (pl->link == link)
+ goto found;
+ pos++;
+ }
+ }
+found:
+ BUG_ON(!cg);
+ progs = rcu_dereference_protected(
+ desc->bpf.effective[type],
+ lockdep_is_held(&cgroup_mutex));
+ item = &progs->items[pos];
+ WRITE_ONCE(item->prog, link->link.prog);
+ }
+}
+
+/**
+ * __cgroup_bpf_replace() - Replace link's program and propagate the change
+ * to descendants
+ * @cgrp: The cgroup which descendants to traverse
+ * @link: A link for which to replace BPF program
+ * @type: Type of attach operation
+ *
+ * Must be called with cgroup_mutex held.
+ */
+int __cgroup_bpf_replace(struct cgroup *cgrp, struct bpf_cgroup_link *link,
+ struct bpf_prog *new_prog)
+{
+ struct list_head *progs = &cgrp->bpf.progs[link->type];
+ struct bpf_prog *old_prog;
+ struct bpf_prog_list *pl;
+ bool found = false;
+
+ if (link->link.prog->type != new_prog->type)
+ return -EINVAL;
+
+ list_for_each_entry(pl, progs, node) {
+ if (pl->link == link) {
+ found = true;
+ break;
+ }
+ }
+ if (!found)
+ return -ENOENT;
+
+ old_prog = xchg(&link->link.prog, new_prog);
+ replace_effective_prog(cgrp, link->type, link);
+ bpf_prog_put(old_prog);
+ return 0;
+}
+
+static struct bpf_prog_list *find_detach_entry(struct list_head *progs,
+ struct bpf_prog *prog,
+ struct bpf_cgroup_link *link,
+ bool allow_multi)
+{
+ struct bpf_prog_list *pl;
+
+ if (!allow_multi) {
+ if (list_empty(progs))
+ /* report error when trying to detach and nothing is attached */
+ return ERR_PTR(-ENOENT);
+
+ /* to maintain backward compatibility NONE and OVERRIDE cgroups
+ * allow detaching with invalid FD (prog==NULL) in legacy mode
+ */
+ return list_first_entry(progs, typeof(*pl), node);
+ }
+
+ if (!prog && !link)
+ /* to detach MULTI prog the user has to specify valid FD
+ * of the program or link to be detached
+ */
+ return ERR_PTR(-EINVAL);
+
+ /* find the prog or link and detach it */
+ list_for_each_entry(pl, progs, node) {
+ if (pl->prog == prog && pl->link == link)
+ return pl;
+ }
+ return ERR_PTR(-ENOENT);
+}
+
/**
- * __cgroup_bpf_detach() - Detach the program from a cgroup, and
+ * __cgroup_bpf_detach() - Detach the program or link from a cgroup, and
* propagate the change to descendants
* @cgrp: The cgroup which descendants to traverse
* @prog: A program to detach or NULL
+ * @prog: A link to detach or NULL
* @type: Type of detach operation
*
+ * At most one of @prog or @link can be non-NULL.
* Must be called with cgroup_mutex held.
*/
int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
- enum bpf_attach_type type)
+ struct bpf_cgroup_link *link, enum bpf_attach_type type)
{
struct list_head *progs = &cgrp->bpf.progs[type];
u32 flags = cgrp->bpf.flags[type];
- struct bpf_prog *old_prog = NULL;
struct bpf_prog_list *pl;
+ struct bpf_prog *old_prog;
int err;
- if (flags & BPF_F_ALLOW_MULTI) {
- if (!prog)
- /* to detach MULTI prog the user has to specify valid FD
- * of the program to be detached
- */
- return -EINVAL;
- } else {
- if (list_empty(progs))
- /* report error when trying to detach and nothing is attached */
- return -ENOENT;
- }
+ if (prog && link)
+ /* only one of prog or link can be specified */
+ return -EINVAL;
- if (flags & BPF_F_ALLOW_MULTI) {
- /* find the prog and detach it */
- list_for_each_entry(pl, progs, node) {
- if (pl->prog != prog)
- continue;
- old_prog = prog;
- /* mark it deleted, so it's ignored while
- * recomputing effective
- */
- pl->prog = NULL;
- break;
- }
- if (!old_prog)
- return -ENOENT;
- } else {
- /* to maintain backward compatibility NONE and OVERRIDE cgroups
- * allow detaching with invalid FD (prog==NULL)
- */
- pl = list_first_entry(progs, typeof(*pl), node);
- old_prog = pl->prog;
- pl->prog = NULL;
- }
+ pl = find_detach_entry(progs, prog, link, flags & BPF_F_ALLOW_MULTI);
+ if (IS_ERR(pl))
+ return PTR_ERR(pl);
+
+ /* mark it deleted, so it's ignored while recomputing effective */
+ old_prog = pl->prog;
+ pl->prog = NULL;
+ pl->link = NULL;
err = update_effective_progs(cgrp, type);
if (err)
@@ -501,14 +657,15 @@ int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
if (list_empty(progs))
/* last program was detached, reset flags to zero */
cgrp->bpf.flags[type] = 0;
-
- bpf_prog_put(old_prog);
+ if (old_prog)
+ bpf_prog_put(old_prog);
static_branch_dec(&cgroup_bpf_enabled_key);
return 0;
cleanup:
- /* and restore back old_prog */
+ /* restore back prog or link */
pl->prog = old_prog;
+ pl->link = link;
return err;
}
@@ -521,6 +678,7 @@ int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
struct list_head *progs = &cgrp->bpf.progs[type];
u32 flags = cgrp->bpf.flags[type];
struct bpf_prog_array *effective;
+ struct bpf_prog *prog;
int cnt, ret = 0, i;
effective = rcu_dereference_protected(cgrp->bpf.effective[type],
@@ -551,7 +709,8 @@ int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
i = 0;
list_for_each_entry(pl, progs, node) {
- id = pl->prog->aux->id;
+ prog = prog_list_prog(pl);
+ id = prog->aux->id;
if (copy_to_user(prog_ids + i, &id, sizeof(id)))
return -EFAULT;
if (++i == cnt)
@@ -581,8 +740,8 @@ int cgroup_bpf_prog_attach(const union bpf_attr *attr,
}
}
- ret = cgroup_bpf_attach(cgrp, prog, replace_prog, attr->attach_type,
- attr->attach_flags);
+ ret = cgroup_bpf_attach(cgrp, prog, replace_prog, NULL,
+ attr->attach_type, attr->attach_flags);
if (replace_prog)
bpf_prog_put(replace_prog);
@@ -604,7 +763,7 @@ int cgroup_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype)
if (IS_ERR(prog))
prog = NULL;
- ret = cgroup_bpf_detach(cgrp, prog, attr->attach_type, 0);
+ ret = cgroup_bpf_detach(cgrp, prog, attr->attach_type);
if (prog)
bpf_prog_put(prog);
@@ -612,6 +771,90 @@ int cgroup_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype)
return ret;
}
+static void bpf_cgroup_link_release(struct bpf_link *link)
+{
+ struct bpf_cgroup_link *cg_link =
+ container_of(link, struct bpf_cgroup_link, link);
+
+ /* link might have been auto-detached by dying cgroup already,
+ * in that case our work is done here
+ */
+ if (!cg_link->cgroup)
+ return;
+
+ mutex_lock(&cgroup_mutex);
+
+ /* re-check cgroup under lock again */
+ if (!cg_link->cgroup) {
+ mutex_unlock(&cgroup_mutex);
+ return;
+ }
+
+ WARN_ON(__cgroup_bpf_detach(cg_link->cgroup, NULL, cg_link,
+ cg_link->type));
+
+ mutex_unlock(&cgroup_mutex);
+ cgroup_put(cg_link->cgroup);
+}
+
+static void bpf_cgroup_link_dealloc(struct bpf_link *link)
+{
+ struct bpf_cgroup_link *cg_link =
+ container_of(link, struct bpf_cgroup_link, link);
+
+ kfree(cg_link);
+}
+
+const struct bpf_link_ops bpf_cgroup_link_lops = {
+ .release = bpf_cgroup_link_release,
+ .dealloc = bpf_cgroup_link_dealloc,
+};
+
+int cgroup_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
+{
+ struct bpf_cgroup_link *link;
+ struct file *link_file;
+ struct cgroup *cgrp;
+ int err, link_fd;
+
+ if (attr->link_create.flags)
+ return -EINVAL;
+
+ cgrp = cgroup_get_from_fd(attr->link_create.target_fd);
+ if (IS_ERR(cgrp))
+ return PTR_ERR(cgrp);
+
+ link = kzalloc(sizeof(*link), GFP_USER);
+ if (!link) {
+ err = -ENOMEM;
+ goto out_put_cgroup;
+ }
+ bpf_link_init(&link->link, &bpf_cgroup_link_lops, prog);
+ link->cgroup = cgrp;
+ link->type = attr->link_create.attach_type;
+
+ link_file = bpf_link_new_file(&link->link, &link_fd);
+ if (IS_ERR(link_file)) {
+ kfree(link);
+ err = PTR_ERR(link_file);
+ goto out_put_cgroup;
+ }
+
+ err = cgroup_bpf_attach(cgrp, NULL, NULL, link, link->type,
+ BPF_F_ALLOW_MULTI);
+ if (err) {
+ bpf_link_cleanup(&link->link, link_file, link_fd);
+ goto out_put_cgroup;
+ }
+
+ fd_install(link_fd, link_file);
+ return link_fd;
+
+out_put_cgroup:
+ cgroup_put(cgrp);
+ return err;
+}
+
int cgroup_bpf_prog_query(const union bpf_attr *attr,
union bpf_attr __user *uattr)
{
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index a616b63f23b4..e0a3b34d7039 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2175,13 +2175,6 @@ static int bpf_obj_get(const union bpf_attr *attr)
attr->file_flags);
}
-struct bpf_link {
- atomic64_t refcnt;
- const struct bpf_link_ops *ops;
- struct bpf_prog *prog;
- struct work_struct work;
-};
-
void bpf_link_init(struct bpf_link *link, const struct bpf_link_ops *ops,
struct bpf_prog *prog)
{
@@ -2195,8 +2188,8 @@ void bpf_link_init(struct bpf_link *link, const struct bpf_link_ops *ops,
* anon_inode's release() call. This helper manages marking bpf_link as
* defunct, releases anon_inode file and puts reserved FD.
*/
-static void bpf_link_cleanup(struct bpf_link *link, struct file *link_file,
- int link_fd)
+void bpf_link_cleanup(struct bpf_link *link, struct file *link_file,
+ int link_fd)
{
link->prog = NULL;
fput(link_file);
@@ -2266,6 +2259,10 @@ static void bpf_link_show_fdinfo(struct seq_file *m, struct file *filp)
link_type = "raw_tracepoint";
else if (link->ops == &bpf_tracing_link_lops)
link_type = "tracing";
+#ifdef CONFIG_CGROUP_BPF
+ else if (link->ops == &bpf_cgroup_link_lops)
+ link_type = "cgroup";
+#endif
else
link_type = "unknown";
@@ -3553,6 +3550,104 @@ err_put:
return err;
}
+#define BPF_LINK_CREATE_LAST_FIELD link_create.flags
+static int link_create(union bpf_attr *attr)
+{
+ enum bpf_prog_type ptype;
+ struct bpf_prog *prog;
+ int ret;
+
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ if (CHECK_ATTR(BPF_LINK_CREATE))
+ return -EINVAL;
+
+ ptype = attach_type_to_prog_type(attr->link_create.attach_type);
+ if (ptype == BPF_PROG_TYPE_UNSPEC)
+ return -EINVAL;
+
+ prog = bpf_prog_get_type(attr->link_create.prog_fd, ptype);
+ if (IS_ERR(prog))
+ return PTR_ERR(prog);
+
+ ret = bpf_prog_attach_check_attach_type(prog,
+ attr->link_create.attach_type);
+ if (ret)
+ goto err_out;
+
+ switch (ptype) {
+ case BPF_PROG_TYPE_CGROUP_SKB:
+ case BPF_PROG_TYPE_CGROUP_SOCK:
+ case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
+ case BPF_PROG_TYPE_SOCK_OPS:
+ case BPF_PROG_TYPE_CGROUP_DEVICE:
+ case BPF_PROG_TYPE_CGROUP_SYSCTL:
+ case BPF_PROG_TYPE_CGROUP_SOCKOPT:
+ ret = cgroup_bpf_link_attach(attr, prog);
+ break;
+ default:
+ ret = -EINVAL;
+ }
+
+err_out:
+ if (ret < 0)
+ bpf_prog_put(prog);
+ return ret;
+}
+
+#define BPF_LINK_UPDATE_LAST_FIELD link_update.old_prog_fd
+
+static int link_update(union bpf_attr *attr)
+{
+ struct bpf_prog *old_prog = NULL, *new_prog;
+ struct bpf_link *link;
+ u32 flags;
+ int ret;
+
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ if (CHECK_ATTR(BPF_LINK_UPDATE))
+ return -EINVAL;
+
+ flags = attr->link_update.flags;
+ if (flags & ~BPF_F_REPLACE)
+ return -EINVAL;
+
+ link = bpf_link_get_from_fd(attr->link_update.link_fd);
+ if (IS_ERR(link))
+ return PTR_ERR(link);
+
+ new_prog = bpf_prog_get(attr->link_update.new_prog_fd);
+ if (IS_ERR(new_prog))
+ return PTR_ERR(new_prog);
+
+ if (flags & BPF_F_REPLACE) {
+ old_prog = bpf_prog_get(attr->link_update.old_prog_fd);
+ if (IS_ERR(old_prog)) {
+ ret = PTR_ERR(old_prog);
+ old_prog = NULL;
+ goto out_put_progs;
+ }
+ }
+
+#ifdef CONFIG_CGROUP_BPF
+ if (link->ops == &bpf_cgroup_link_lops) {
+ ret = cgroup_bpf_replace(link, old_prog, new_prog);
+ goto out_put_progs;
+ }
+#endif
+ ret = -EINVAL;
+
+out_put_progs:
+ if (old_prog)
+ bpf_prog_put(old_prog);
+ if (ret)
+ bpf_prog_put(new_prog);
+ return ret;
+}
+
SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
{
union bpf_attr attr = {};
@@ -3663,6 +3758,12 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
case BPF_MAP_DELETE_BATCH:
err = bpf_map_do_batch(&attr, uattr, BPF_MAP_DELETE_BATCH);
break;
+ case BPF_LINK_CREATE:
+ err = link_create(&attr);
+ break;
+ case BPF_LINK_UPDATE:
+ err = link_update(&attr);
+ break;
default:
err = -EINVAL;
break;
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 3dead0416b91..915dda3f7f19 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -6303,27 +6303,58 @@ void cgroup_sk_free(struct sock_cgroup_data *skcd)
#endif /* CONFIG_SOCK_CGROUP_DATA */
#ifdef CONFIG_CGROUP_BPF
-int cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
- struct bpf_prog *replace_prog, enum bpf_attach_type type,
+int cgroup_bpf_attach(struct cgroup *cgrp,
+ struct bpf_prog *prog, struct bpf_prog *replace_prog,
+ struct bpf_cgroup_link *link,
+ enum bpf_attach_type type,
u32 flags)
{
int ret;
mutex_lock(&cgroup_mutex);
- ret = __cgroup_bpf_attach(cgrp, prog, replace_prog, type, flags);
+ ret = __cgroup_bpf_attach(cgrp, prog, replace_prog, link, type, flags);
mutex_unlock(&cgroup_mutex);
return ret;
}
+
+int cgroup_bpf_replace(struct bpf_link *link, struct bpf_prog *old_prog,
+ struct bpf_prog *new_prog)
+{
+ struct bpf_cgroup_link *cg_link;
+ int ret;
+
+ if (link->ops != &bpf_cgroup_link_lops)
+ return -EINVAL;
+
+ cg_link = container_of(link, struct bpf_cgroup_link, link);
+
+ mutex_lock(&cgroup_mutex);
+ /* link might have been auto-released by dying cgroup, so fail */
+ if (!cg_link->cgroup) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+ if (old_prog && link->prog != old_prog) {
+ ret = -EPERM;
+ goto out_unlock;
+ }
+ ret = __cgroup_bpf_replace(cg_link->cgroup, cg_link, new_prog);
+out_unlock:
+ mutex_unlock(&cgroup_mutex);
+ return ret;
+}
+
int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
- enum bpf_attach_type type, u32 flags)
+ enum bpf_attach_type type)
{
int ret;
mutex_lock(&cgroup_mutex);
- ret = __cgroup_bpf_detach(cgrp, prog, type);
+ ret = __cgroup_bpf_detach(cgrp, prog, NULL, type);
mutex_unlock(&cgroup_mutex);
return ret;
}
+
int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
union bpf_attr __user *uattr)
{
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 9f786a5a44ac..2e29a671d67e 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -111,6 +111,8 @@ enum bpf_cmd {
BPF_MAP_LOOKUP_AND_DELETE_BATCH,
BPF_MAP_UPDATE_BATCH,
BPF_MAP_DELETE_BATCH,
+ BPF_LINK_CREATE,
+ BPF_LINK_UPDATE,
};
enum bpf_map_type {
@@ -541,7 +543,7 @@ union bpf_attr {
__u32 prog_cnt;
} query;
- struct {
+ struct { /* anonymous struct used by BPF_RAW_TRACEPOINT_OPEN command */
__u64 name;
__u32 prog_fd;
} raw_tracepoint;
@@ -569,6 +571,24 @@ union bpf_attr {
__u64 probe_offset; /* output: probe_offset */
__u64 probe_addr; /* output: probe_addr */
} task_fd_query;
+
+ struct { /* struct used by BPF_LINK_CREATE command */
+ __u32 prog_fd; /* eBPF program to attach */
+ __u32 target_fd; /* object to attach to */
+ __u32 attach_type; /* attach type */
+ __u32 flags; /* extra flags */
+ } link_create;
+
+ struct { /* struct used by BPF_LINK_UPDATE command */
+ __u32 link_fd; /* link fd */
+ /* new program fd to update link with */
+ __u32 new_prog_fd;
+ __u32 flags; /* extra flags */
+ /* expected link's program fd; is specified only if
+ * BPF_F_REPLACE flag is set in flags */
+ __u32 old_prog_fd;
+ } link_update;
+
} __attribute__((aligned(8)));
/* The description below is an attempt at providing documentation to eBPF
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 73220176728d..5cc1b0785d18 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -585,6 +585,40 @@ int bpf_prog_detach2(int prog_fd, int target_fd, enum bpf_attach_type type)
return sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr));
}
+int bpf_link_create(int prog_fd, int target_fd,
+ enum bpf_attach_type attach_type,
+ const struct bpf_link_create_opts *opts)
+{
+ union bpf_attr attr;
+
+ if (!OPTS_VALID(opts, bpf_link_create_opts))
+ return -EINVAL;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.link_create.prog_fd = prog_fd;
+ attr.link_create.target_fd = target_fd;
+ attr.link_create.attach_type = attach_type;
+
+ return sys_bpf(BPF_LINK_CREATE, &attr, sizeof(attr));
+}
+
+int bpf_link_update(int link_fd, int new_prog_fd,
+ const struct bpf_link_update_opts *opts)
+{
+ union bpf_attr attr;
+
+ if (!OPTS_VALID(opts, bpf_link_update_opts))
+ return -EINVAL;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.link_update.link_fd = link_fd;
+ attr.link_update.new_prog_fd = new_prog_fd;
+ attr.link_update.flags = OPTS_GET(opts, flags, 0);
+ attr.link_update.old_prog_fd = OPTS_GET(opts, old_prog_fd, 0);
+
+ return sys_bpf(BPF_LINK_UPDATE, &attr, sizeof(attr));
+}
+
int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags,
__u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt)
{
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index b976e77316cc..46d47afdd887 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -168,6 +168,25 @@ LIBBPF_API int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type);
LIBBPF_API int bpf_prog_detach2(int prog_fd, int attachable_fd,
enum bpf_attach_type type);
+struct bpf_link_create_opts {
+ size_t sz; /* size of this struct for forward/backward compatibility */
+};
+#define bpf_link_create_opts__last_field sz
+
+LIBBPF_API int bpf_link_create(int prog_fd, int target_fd,
+ enum bpf_attach_type attach_type,
+ const struct bpf_link_create_opts *opts);
+
+struct bpf_link_update_opts {
+ size_t sz; /* size of this struct for forward/backward compatibility */
+ __u32 flags; /* extra flags */
+ __u32 old_prog_fd; /* expected old program FD */
+};
+#define bpf_link_update_opts__last_field old_prog_fd
+
+LIBBPF_API int bpf_link_update(int link_fd, int new_prog_fd,
+ const struct bpf_link_update_opts *opts);
+
struct bpf_prog_test_run_attr {
int prog_fd;
int repeat;
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 0638e717f502..ff9174282a8c 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -6978,6 +6978,12 @@ struct bpf_link {
bool disconnected;
};
+/* Replace link's underlying BPF program with the new one */
+int bpf_link__update_program(struct bpf_link *link, struct bpf_program *prog)
+{
+ return bpf_link_update(bpf_link__fd(link), bpf_program__fd(prog), NULL);
+}
+
/* Release "ownership" of underlying BPF resource (typically, BPF program
* attached to some BPF hook, e.g., tracepoint, kprobe, etc). Disconnected
* link, when destructed through bpf_link__destroy() call won't attempt to
@@ -7533,6 +7539,46 @@ static struct bpf_link *attach_lsm(const struct bpf_sec_def *sec,
return bpf_program__attach_lsm(prog);
}
+struct bpf_link *
+bpf_program__attach_cgroup(struct bpf_program *prog, int cgroup_fd)
+{
+ const struct bpf_sec_def *sec_def;
+ enum bpf_attach_type attach_type;
+ char errmsg[STRERR_BUFSIZE];
+ struct bpf_link *link;
+ int prog_fd, link_fd;
+
+ prog_fd = bpf_program__fd(prog);
+ if (prog_fd < 0) {
+ pr_warn("program '%s': can't attach before loaded\n",
+ bpf_program__title(prog, false));
+ return ERR_PTR(-EINVAL);
+ }
+
+ link = calloc(1, sizeof(*link));
+ if (!link)
+ return ERR_PTR(-ENOMEM);
+ link->detach = &bpf_link__detach_fd;
+
+ attach_type = bpf_program__get_expected_attach_type(prog);
+ if (!attach_type) {
+ sec_def = find_sec_def(bpf_program__title(prog, false));
+ if (sec_def)
+ attach_type = sec_def->attach_type;
+ }
+ link_fd = bpf_link_create(prog_fd, cgroup_fd, attach_type, NULL);
+ if (link_fd < 0) {
+ link_fd = -errno;
+ free(link);
+ pr_warn("program '%s': failed to attach to cgroup: %s\n",
+ bpf_program__title(prog, false),
+ libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
+ return ERR_PTR(link_fd);
+ }
+ link->fd = link_fd;
+ return link;
+}
+
struct bpf_link *bpf_program__attach(struct bpf_program *prog)
{
const struct bpf_sec_def *sec_def;
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 55348724c355..44df1d3e7287 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -224,6 +224,8 @@ LIBBPF_API int bpf_link__fd(const struct bpf_link *link);
LIBBPF_API const char *bpf_link__pin_path(const struct bpf_link *link);
LIBBPF_API int bpf_link__pin(struct bpf_link *link, const char *path);
LIBBPF_API int bpf_link__unpin(struct bpf_link *link);
+LIBBPF_API int bpf_link__update_program(struct bpf_link *link,
+ struct bpf_program *prog);
LIBBPF_API void bpf_link__disconnect(struct bpf_link *link);
LIBBPF_API int bpf_link__destroy(struct bpf_link *link);
@@ -245,13 +247,17 @@ bpf_program__attach_tracepoint(struct bpf_program *prog,
LIBBPF_API struct bpf_link *
bpf_program__attach_raw_tracepoint(struct bpf_program *prog,
const char *tp_name);
-
LIBBPF_API struct bpf_link *
bpf_program__attach_trace(struct bpf_program *prog);
LIBBPF_API struct bpf_link *
bpf_program__attach_lsm(struct bpf_program *prog);
+LIBBPF_API struct bpf_link *
+bpf_program__attach_cgroup(struct bpf_program *prog, int cgroup_fd);
+
struct bpf_map;
+
LIBBPF_API struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map);
+
struct bpf_insn;
/*
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index eabd3d3e689f..bb8831605b25 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -243,7 +243,11 @@ LIBBPF_0.0.8 {
bpf_link__pin;
bpf_link__pin_path;
bpf_link__unpin;
+ bpf_link__update_program;
+ bpf_link_create;
+ bpf_link_update;
bpf_map__set_initial_value;
+ bpf_program__attach_cgroup;
bpf_program__attach_lsm;
bpf_program__is_lsm;
bpf_program__set_attach_target;
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_link.c b/tools/testing/selftests/bpf/prog_tests/cgroup_link.c
new file mode 100644
index 000000000000..6e04f8d1d15b
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_link.c
@@ -0,0 +1,244 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include "cgroup_helpers.h"
+#include "test_cgroup_link.skel.h"
+
+static __u32 duration = 0;
+#define PING_CMD "ping -q -c1 -w1 127.0.0.1 > /dev/null"
+
+static struct test_cgroup_link *skel = NULL;
+
+int ping_and_check(int exp_calls, int exp_alt_calls)
+{
+ skel->bss->calls = 0;
+ skel->bss->alt_calls = 0;
+ CHECK_FAIL(system(PING_CMD));
+ if (CHECK(skel->bss->calls != exp_calls, "call_cnt",
+ "exp %d, got %d\n", exp_calls, skel->bss->calls))
+ return -EINVAL;
+ if (CHECK(skel->bss->alt_calls != exp_alt_calls, "alt_call_cnt",
+ "exp %d, got %d\n", exp_alt_calls, skel->bss->alt_calls))
+ return -EINVAL;
+ return 0;
+}
+
+void test_cgroup_link(void)
+{
+ struct {
+ const char *path;
+ int fd;
+ } cgs[] = {
+ { "/cg1" },
+ { "/cg1/cg2" },
+ { "/cg1/cg2/cg3" },
+ { "/cg1/cg2/cg3/cg4" },
+ };
+ int last_cg = ARRAY_SIZE(cgs) - 1, cg_nr = ARRAY_SIZE(cgs);
+ DECLARE_LIBBPF_OPTS(bpf_link_update_opts, link_upd_opts);
+ struct bpf_link *links[ARRAY_SIZE(cgs)] = {}, *tmp_link;
+ __u32 prog_ids[ARRAY_SIZE(cgs)], prog_cnt = 0, attach_flags;
+ int i = 0, err, prog_fd;
+ bool detach_legacy = false;
+
+ skel = test_cgroup_link__open_and_load();
+ if (CHECK(!skel, "skel_open_load", "failed to open/load skeleton\n"))
+ return;
+ prog_fd = bpf_program__fd(skel->progs.egress);
+
+ err = setup_cgroup_environment();
+ if (CHECK(err, "cg_init", "failed: %d\n", err))
+ goto cleanup;
+
+ for (i = 0; i < cg_nr; i++) {
+ cgs[i].fd = create_and_get_cgroup(cgs[i].path);
+ if (CHECK(cgs[i].fd < 0, "cg_create", "fail: %d\n", cgs[i].fd))
+ goto cleanup;
+ }
+
+ err = join_cgroup(cgs[last_cg].path);
+ if (CHECK(err, "cg_join", "fail: %d\n", err))
+ goto cleanup;
+
+ for (i = 0; i < cg_nr; i++) {
+ links[i] = bpf_program__attach_cgroup(skel->progs.egress,
+ cgs[i].fd);
+ if (CHECK(IS_ERR(links[i]), "cg_attach", "i: %d, err: %ld\n",
+ i, PTR_ERR(links[i])))
+ goto cleanup;
+ }
+
+ ping_and_check(cg_nr, 0);
+
+ /* query the number of effective progs and attach flags in root cg */
+ err = bpf_prog_query(cgs[0].fd, BPF_CGROUP_INET_EGRESS,
+ BPF_F_QUERY_EFFECTIVE, &attach_flags, NULL,
+ &prog_cnt);
+ CHECK_FAIL(err);
+ CHECK_FAIL(attach_flags != BPF_F_ALLOW_MULTI);
+ if (CHECK(prog_cnt != 1, "effect_cnt", "exp %d, got %d\n", 1, prog_cnt))
+ goto cleanup;
+
+ /* query the number of effective progs in last cg */
+ err = bpf_prog_query(cgs[last_cg].fd, BPF_CGROUP_INET_EGRESS,
+ BPF_F_QUERY_EFFECTIVE, NULL, NULL,
+ &prog_cnt);
+ CHECK_FAIL(err);
+ CHECK_FAIL(attach_flags != BPF_F_ALLOW_MULTI);
+ if (CHECK(prog_cnt != cg_nr, "effect_cnt", "exp %d, got %d\n",
+ cg_nr, prog_cnt))
+ goto cleanup;
+
+ /* query the effective prog IDs in last cg */
+ err = bpf_prog_query(cgs[last_cg].fd, BPF_CGROUP_INET_EGRESS,
+ BPF_F_QUERY_EFFECTIVE, &attach_flags,
+ prog_ids, &prog_cnt);
+ CHECK_FAIL(err);
+ CHECK_FAIL(attach_flags != BPF_F_ALLOW_MULTI);
+ if (CHECK(prog_cnt != cg_nr, "effect_cnt", "exp %d, got %d\n",
+ cg_nr, prog_cnt))
+ goto cleanup;
+ for (i = 1; i < prog_cnt; i++) {
+ CHECK(prog_ids[i - 1] != prog_ids[i], "prog_id_check",
+ "idx %d, prev id %d, cur id %d\n",
+ i, prog_ids[i - 1], prog_ids[i]);
+ }
+
+ /* detach bottom program and ping again */
+ bpf_link__destroy(links[last_cg]);
+ links[last_cg] = NULL;
+
+ ping_and_check(cg_nr - 1, 0);
+
+ /* mix in with non link-based multi-attachments */
+ err = bpf_prog_attach(prog_fd, cgs[last_cg].fd,
+ BPF_CGROUP_INET_EGRESS, BPF_F_ALLOW_MULTI);
+ if (CHECK(err, "cg_attach_legacy", "errno=%d\n", errno))
+ goto cleanup;
+ detach_legacy = true;
+
+ links[last_cg] = bpf_program__attach_cgroup(skel->progs.egress,
+ cgs[last_cg].fd);
+ if (CHECK(IS_ERR(links[last_cg]), "cg_attach", "err: %ld\n",
+ PTR_ERR(links[last_cg])))
+ goto cleanup;
+
+ ping_and_check(cg_nr + 1, 0);
+
+ /* detach link */
+ bpf_link__destroy(links[last_cg]);
+ links[last_cg] = NULL;
+
+ /* detach legacy */
+ err = bpf_prog_detach2(prog_fd, cgs[last_cg].fd, BPF_CGROUP_INET_EGRESS);
+ if (CHECK(err, "cg_detach_legacy", "errno=%d\n", errno))
+ goto cleanup;
+ detach_legacy = false;
+
+ /* attach legacy exclusive prog attachment */
+ err = bpf_prog_attach(prog_fd, cgs[last_cg].fd,
+ BPF_CGROUP_INET_EGRESS, 0);
+ if (CHECK(err, "cg_attach_exclusive", "errno=%d\n", errno))
+ goto cleanup;
+ detach_legacy = true;
+
+ /* attempt to mix in with multi-attach bpf_link */
+ tmp_link = bpf_program__attach_cgroup(skel->progs.egress,
+ cgs[last_cg].fd);
+ if (CHECK(!IS_ERR(tmp_link), "cg_attach_fail", "unexpected success!\n")) {
+ bpf_link__destroy(tmp_link);
+ goto cleanup;
+ }
+
+ ping_and_check(cg_nr, 0);
+
+ /* detach */
+ err = bpf_prog_detach2(prog_fd, cgs[last_cg].fd, BPF_CGROUP_INET_EGRESS);
+ if (CHECK(err, "cg_detach_legacy", "errno=%d\n", errno))
+ goto cleanup;
+ detach_legacy = false;
+
+ ping_and_check(cg_nr - 1, 0);
+
+ /* attach back link-based one */
+ links[last_cg] = bpf_program__attach_cgroup(skel->progs.egress,
+ cgs[last_cg].fd);
+ if (CHECK(IS_ERR(links[last_cg]), "cg_attach", "err: %ld\n",
+ PTR_ERR(links[last_cg])))
+ goto cleanup;
+
+ ping_and_check(cg_nr, 0);
+
+ /* check legacy exclusive prog can't be attached */
+ err = bpf_prog_attach(prog_fd, cgs[last_cg].fd,
+ BPF_CGROUP_INET_EGRESS, 0);
+ if (CHECK(!err, "cg_attach_exclusive", "unexpected success")) {
+ bpf_prog_detach2(prog_fd, cgs[last_cg].fd, BPF_CGROUP_INET_EGRESS);
+ goto cleanup;
+ }
+
+ /* replace BPF programs inside their links for all but first link */
+ for (i = 1; i < cg_nr; i++) {
+ err = bpf_link__update_program(links[i], skel->progs.egress_alt);
+ if (CHECK(err, "prog_upd", "link #%d\n", i))
+ goto cleanup;
+ }
+
+ ping_and_check(1, cg_nr - 1);
+
+ /* Attempt program update with wrong expected BPF program */
+ link_upd_opts.old_prog_fd = bpf_program__fd(skel->progs.egress_alt);
+ link_upd_opts.flags = BPF_F_REPLACE;
+ err = bpf_link_update(bpf_link__fd(links[0]),
+ bpf_program__fd(skel->progs.egress_alt),
+ &link_upd_opts);
+ if (CHECK(err == 0 || errno != EPERM, "prog_cmpxchg1",
+ "unexpectedly succeeded, err %d, errno %d\n", err, -errno))
+ goto cleanup;
+
+ /* Compare-exchange single link program from egress to egress_alt */
+ link_upd_opts.old_prog_fd = bpf_program__fd(skel->progs.egress);
+ link_upd_opts.flags = BPF_F_REPLACE;
+ err = bpf_link_update(bpf_link__fd(links[0]),
+ bpf_program__fd(skel->progs.egress_alt),
+ &link_upd_opts);
+ if (CHECK(err, "prog_cmpxchg2", "errno %d\n", -errno))
+ goto cleanup;
+
+ /* ping */
+ ping_and_check(0, cg_nr);
+
+ /* close cgroup FDs before detaching links */
+ for (i = 0; i < cg_nr; i++) {
+ if (cgs[i].fd > 0) {
+ close(cgs[i].fd);
+ cgs[i].fd = -1;
+ }
+ }
+
+ /* BPF programs should still get called */
+ ping_and_check(0, cg_nr);
+
+ /* leave cgroup and remove them, don't detach programs */
+ cleanup_cgroup_environment();
+
+ /* BPF programs should have been auto-detached */
+ ping_and_check(0, 0);
+
+cleanup:
+ if (detach_legacy)
+ bpf_prog_detach2(prog_fd, cgs[last_cg].fd,
+ BPF_CGROUP_INET_EGRESS);
+
+ for (i = 0; i < cg_nr; i++) {
+ if (!IS_ERR(links[i]))
+ bpf_link__destroy(links[i]);
+ }
+ test_cgroup_link__destroy(skel);
+
+ for (i = 0; i < cg_nr; i++) {
+ if (cgs[i].fd > 0)
+ close(cgs[i].fd);
+ }
+ cleanup_cgroup_environment();
+}
diff --git a/tools/testing/selftests/bpf/progs/test_cgroup_link.c b/tools/testing/selftests/bpf/progs/test_cgroup_link.c
new file mode 100644
index 000000000000..77e47b9e4446
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_cgroup_link.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+int calls = 0;
+int alt_calls = 0;
+
+SEC("cgroup_skb/egress1")
+int egress(struct __sk_buff *skb)
+{
+ __sync_fetch_and_add(&calls, 1);
+ return 1;
+}
+
+SEC("cgroup_skb/egress2")
+int egress_alt(struct __sk_buff *skb)
+{
+ __sync_fetch_and_add(&alt_calls, 1);
+ return 1;
+}
+
+char _license[] SEC("license") = "GPL";
+