summaryrefslogtreecommitdiffstats
path: root/kernel/bpf/cgroup.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-08-16 00:04:25 +0200
committerLinus Torvalds <torvalds@linux-foundation.org>2018-08-16 00:04:25 +0200
commit9a76aba02a37718242d7cdc294f0a3901928aa57 (patch)
tree2040d038f85d2120f21af83b0793efd5af1864e3 /kernel/bpf/cgroup.c
parentx86: i8259: Add missing include file (diff)
parentbpf: test: fix spelling mistake "REUSEEPORT" -> "REUSEPORT" (diff)
downloadlinux-9a76aba02a37718242d7cdc294f0a3901928aa57.tar.xz
linux-9a76aba02a37718242d7cdc294f0a3901928aa57.zip
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: "Highlights: - Gustavo A. R. Silva keeps working on the implicit switch fallthru changes. - Support 802.11ax High-Efficiency wireless in cfg80211 et al, From Luca Coelho. - Re-enable ASPM in r8169, from Kai-Heng Feng. - Add virtual XFRM interfaces, which avoids all of the limitations of existing IPSEC tunnels. From Steffen Klassert. - Convert GRO over to use a hash table, so that when we have many flows active we don't traverse a long list during accumluation. - Many new self tests for routing, TC, tunnels, etc. Too many contributors to mention them all, but I'm really happy to keep seeing this stuff. - Hardware timestamping support for dpaa_eth/fsl-fman from Yangbo Lu. - Lots of cleanups and fixes in L2TP code from Guillaume Nault. - Add IPSEC offload support to netdevsim, from Shannon Nelson. - Add support for slotting with non-uniform distribution to netem packet scheduler, from Yousuk Seung. - Add UDP GSO support to mlx5e, from Boris Pismenny. - Support offloading of Team LAG in NFP, from John Hurley. - Allow to configure TX queue selection based upon RX queue, from Amritha Nambiar. - Support ethtool ring size configuration in aquantia, from Anton Mikaev. - Support DSCP and flowlabel per-transport in SCTP, from Xin Long. - Support list based batching and stack traversal of SKBs, this is very exciting work. From Edward Cree. - Busyloop optimizations in vhost_net, from Toshiaki Makita. - Introduce the ETF qdisc, which allows time based transmissions. IGB can offload this in hardware. From Vinicius Costa Gomes. - Add parameter support to devlink, from Moshe Shemesh. - Several multiplication and division optimizations for BPF JIT in nfp driver, from Jiong Wang. - Lots of prepatory work to make more of the packet scheduler layer lockless, when possible, from Vlad Buslov. - Add ACK filter and NAT awareness to sch_cake packet scheduler, from Toke Høiland-Jørgensen. - Support regions and region snapshots in devlink, from Alex Vesker. - Allow to attach XDP programs to both HW and SW at the same time on a given device, with initial support in nfp. From Jakub Kicinski. - Add TLS RX offload and support in mlx5, from Ilya Lesokhin. - Use PHYLIB in r8169 driver, from Heiner Kallweit. - All sorts of changes to support Spectrum 2 in mlxsw driver, from Ido Schimmel. - PTP support in mv88e6xxx DSA driver, from Andrew Lunn. - Make TCP_USER_TIMEOUT socket option more accurate, from Jon Maxwell. - Support for templates in packet scheduler classifier, from Jiri Pirko. - IPV6 support in RDS, from Ka-Cheong Poon. - Native tproxy support in nf_tables, from Máté Eckl. - Maintain IP fragment queue in an rbtree, but optimize properly for in-order frags. From Peter Oskolkov. - Improvde handling of ACKs on hole repairs, from Yuchung Cheng" * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1996 commits) bpf: test: fix spelling mistake "REUSEEPORT" -> "REUSEPORT" hv/netvsc: Fix NULL dereference at single queue mode fallback net: filter: mark expected switch fall-through xen-netfront: fix warn message as irq device name has '/' cxgb4: Add new T5 PCI device ids 0x50af and 0x50b0 net: dsa: mv88e6xxx: missing unlock on error path rds: fix building with IPV6=m inet/connection_sock: prefer _THIS_IP_ to current_text_addr net: dsa: mv88e6xxx: bitwise vs logical bug net: sock_diag: Fix spectre v1 gadget in __sock_diag_cmd() ieee802154: hwsim: using right kind of iteration net: hns3: Add vlan filter setting by ethtool command -K net: hns3: Set tx ring' tc info when netdev is up net: hns3: Remove tx ring BD len register in hns3_enet net: hns3: Fix desc num set to default when setting channel net: hns3: Fix for phy link issue when using marvell phy driver net: hns3: Fix for information of phydev lost problem when down/up net: hns3: Fix for command format parsing error in hclge_is_all_function_id_zero net: hns3: Add support for serdes loopback selftest bnxt_en: take coredump_record structure off stack ...
Diffstat (limited to 'kernel/bpf/cgroup.c')
-rw-r--r--kernel/bpf/cgroup.c162
1 files changed, 92 insertions, 70 deletions
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 3d83ee7df381..6a7d931bbc55 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -34,6 +34,8 @@ void cgroup_bpf_put(struct cgroup *cgrp)
list_for_each_entry_safe(pl, tmp, progs, node) {
list_del(&pl->node);
bpf_prog_put(pl->prog);
+ bpf_cgroup_storage_unlink(pl->storage);
+ bpf_cgroup_storage_free(pl->storage);
kfree(pl);
static_branch_dec(&cgroup_bpf_enabled_key);
}
@@ -95,7 +97,7 @@ static int compute_effective_progs(struct cgroup *cgrp,
enum bpf_attach_type type,
struct bpf_prog_array __rcu **array)
{
- struct bpf_prog_array __rcu *progs;
+ struct bpf_prog_array *progs;
struct bpf_prog_list *pl;
struct cgroup *p = cgrp;
int cnt = 0;
@@ -115,18 +117,20 @@ static int compute_effective_progs(struct cgroup *cgrp,
cnt = 0;
p = cgrp;
do {
- if (cnt == 0 || (p->bpf.flags[type] & BPF_F_ALLOW_MULTI))
- list_for_each_entry(pl,
- &p->bpf.progs[type], node) {
- if (!pl->prog)
- continue;
- rcu_dereference_protected(progs, 1)->
- progs[cnt++] = pl->prog;
- }
- p = cgroup_parent(p);
- } while (p);
+ if (cnt > 0 && !(p->bpf.flags[type] & BPF_F_ALLOW_MULTI))
+ continue;
+
+ list_for_each_entry(pl, &p->bpf.progs[type], node) {
+ if (!pl->prog)
+ continue;
- *array = progs;
+ progs->items[cnt].prog = pl->prog;
+ progs->items[cnt].cgroup_storage = pl->storage;
+ cnt++;
+ }
+ } while ((p = cgroup_parent(p)));
+
+ rcu_assign_pointer(*array, progs);
return 0;
}
@@ -173,6 +177,45 @@ cleanup:
return -ENOMEM;
}
+static int update_effective_progs(struct cgroup *cgrp,
+ enum bpf_attach_type type)
+{
+ struct cgroup_subsys_state *css;
+ int err;
+
+ /* allocate and recompute effective prog arrays */
+ css_for_each_descendant_pre(css, &cgrp->self) {
+ struct cgroup *desc = container_of(css, struct cgroup, self);
+
+ err = compute_effective_progs(desc, type, &desc->bpf.inactive);
+ if (err)
+ goto cleanup;
+ }
+
+ /* all allocations were successful. Activate all prog arrays */
+ css_for_each_descendant_pre(css, &cgrp->self) {
+ struct cgroup *desc = container_of(css, struct cgroup, self);
+
+ activate_effective_progs(desc, type, desc->bpf.inactive);
+ desc->bpf.inactive = NULL;
+ }
+
+ return 0;
+
+cleanup:
+ /* oom while computing effective. Free all computed effective arrays
+ * since they were not activated
+ */
+ css_for_each_descendant_pre(css, &cgrp->self) {
+ struct cgroup *desc = container_of(css, struct cgroup, self);
+
+ bpf_prog_array_free(desc->bpf.inactive);
+ desc->bpf.inactive = NULL;
+ }
+
+ return err;
+}
+
#define BPF_CGROUP_MAX_PROGS 64
/**
@@ -189,7 +232,7 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
{
struct list_head *progs = &cgrp->bpf.progs[type];
struct bpf_prog *old_prog = NULL;
- struct cgroup_subsys_state *css;
+ struct bpf_cgroup_storage *storage, *old_storage = NULL;
struct bpf_prog_list *pl;
bool pl_was_allocated;
int err;
@@ -211,72 +254,71 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
if (prog_list_length(progs) >= BPF_CGROUP_MAX_PROGS)
return -E2BIG;
+ storage = bpf_cgroup_storage_alloc(prog);
+ if (IS_ERR(storage))
+ return -ENOMEM;
+
if (flags & BPF_F_ALLOW_MULTI) {
- list_for_each_entry(pl, progs, node)
- if (pl->prog == prog)
+ list_for_each_entry(pl, progs, node) {
+ if (pl->prog == prog) {
/* disallow attaching the same prog twice */
+ bpf_cgroup_storage_free(storage);
return -EINVAL;
+ }
+ }
pl = kmalloc(sizeof(*pl), GFP_KERNEL);
- if (!pl)
+ if (!pl) {
+ bpf_cgroup_storage_free(storage);
return -ENOMEM;
+ }
+
pl_was_allocated = true;
pl->prog = prog;
+ pl->storage = storage;
list_add_tail(&pl->node, progs);
} else {
if (list_empty(progs)) {
pl = kmalloc(sizeof(*pl), GFP_KERNEL);
- if (!pl)
+ if (!pl) {
+ bpf_cgroup_storage_free(storage);
return -ENOMEM;
+ }
pl_was_allocated = true;
list_add_tail(&pl->node, progs);
} else {
pl = list_first_entry(progs, typeof(*pl), node);
old_prog = pl->prog;
+ old_storage = pl->storage;
+ bpf_cgroup_storage_unlink(old_storage);
pl_was_allocated = false;
}
pl->prog = prog;
+ pl->storage = storage;
}
cgrp->bpf.flags[type] = flags;
- /* allocate and recompute effective prog arrays */
- css_for_each_descendant_pre(css, &cgrp->self) {
- struct cgroup *desc = container_of(css, struct cgroup, self);
-
- err = compute_effective_progs(desc, type, &desc->bpf.inactive);
- if (err)
- goto cleanup;
- }
-
- /* all allocations were successful. Activate all prog arrays */
- css_for_each_descendant_pre(css, &cgrp->self) {
- struct cgroup *desc = container_of(css, struct cgroup, self);
-
- activate_effective_progs(desc, type, desc->bpf.inactive);
- desc->bpf.inactive = NULL;
- }
+ err = update_effective_progs(cgrp, type);
+ if (err)
+ goto cleanup;
static_branch_inc(&cgroup_bpf_enabled_key);
+ if (old_storage)
+ bpf_cgroup_storage_free(old_storage);
if (old_prog) {
bpf_prog_put(old_prog);
static_branch_dec(&cgroup_bpf_enabled_key);
}
+ bpf_cgroup_storage_link(storage, cgrp, type);
return 0;
cleanup:
- /* oom while computing effective. Free all computed effective arrays
- * since they were not activated
- */
- css_for_each_descendant_pre(css, &cgrp->self) {
- struct cgroup *desc = container_of(css, struct cgroup, self);
-
- bpf_prog_array_free(desc->bpf.inactive);
- desc->bpf.inactive = NULL;
- }
-
/* and cleanup the prog list */
pl->prog = old_prog;
+ bpf_cgroup_storage_free(pl->storage);
+ pl->storage = old_storage;
+ bpf_cgroup_storage_link(old_storage, cgrp, type);
if (pl_was_allocated) {
list_del(&pl->node);
kfree(pl);
@@ -299,7 +341,6 @@ int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
struct list_head *progs = &cgrp->bpf.progs[type];
u32 flags = cgrp->bpf.flags[type];
struct bpf_prog *old_prog = NULL;
- struct cgroup_subsys_state *css;
struct bpf_prog_list *pl;
int err;
@@ -338,25 +379,14 @@ int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
pl->prog = NULL;
}
- /* allocate and recompute effective prog arrays */
- css_for_each_descendant_pre(css, &cgrp->self) {
- struct cgroup *desc = container_of(css, struct cgroup, self);
-
- err = compute_effective_progs(desc, type, &desc->bpf.inactive);
- if (err)
- goto cleanup;
- }
-
- /* all allocations were successful. Activate all prog arrays */
- css_for_each_descendant_pre(css, &cgrp->self) {
- struct cgroup *desc = container_of(css, struct cgroup, self);
-
- activate_effective_progs(desc, type, desc->bpf.inactive);
- desc->bpf.inactive = NULL;
- }
+ err = update_effective_progs(cgrp, type);
+ if (err)
+ goto cleanup;
/* now can actually delete it from this cgroup list */
list_del(&pl->node);
+ bpf_cgroup_storage_unlink(pl->storage);
+ bpf_cgroup_storage_free(pl->storage);
kfree(pl);
if (list_empty(progs))
/* last program was detached, reset flags to zero */
@@ -367,16 +397,6 @@ int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
return 0;
cleanup:
- /* oom while computing effective. Free all computed effective arrays
- * since they were not activated
- */
- css_for_each_descendant_pre(css, &cgrp->self) {
- struct cgroup *desc = container_of(css, struct cgroup, self);
-
- bpf_prog_array_free(desc->bpf.inactive);
- desc->bpf.inactive = NULL;
- }
-
/* and restore back old_prog */
pl->prog = old_prog;
return err;
@@ -655,6 +675,8 @@ cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_map_delete_elem_proto;
case BPF_FUNC_get_current_uid_gid:
return &bpf_get_current_uid_gid_proto;
+ case BPF_FUNC_get_local_storage:
+ return &bpf_get_local_storage_proto;
case BPF_FUNC_trace_printk:
if (capable(CAP_SYS_ADMIN))
return bpf_get_trace_printk_proto();