diff options
author | Lennart Poettering <lennart@poettering.net> | 2021-06-09 09:47:23 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-06-09 09:47:23 +0200 |
commit | bead169fe0f182a98cc8b55a63b3a12a5049ab45 (patch) | |
tree | 5425eea35c12cc910768ed8907fe89f6cc366ff6 /src | |
parent | Merge pull request #19852 from yuwata/network-stable-secret (diff) | |
parent | bpf-firewall: close gap when updating the firewall (diff) | |
download | systemd-bead169fe0f182a98cc8b55a63b3a12a5049ab45.tar.xz systemd-bead169fe0f182a98cc8b55a63b3a12a5049ab45.zip |
Merge pull request #19851 from poettering/bpf-firewall-tweaks
close bpf firewall reload gap
Diffstat (limited to 'src')
-rw-r--r-- | src/core/bpf-firewall.c | 52 | ||||
-rw-r--r-- | src/core/bpf-firewall.h | 2 | ||||
-rw-r--r-- | src/core/bpf-socket-bind.c (renamed from src/core/socket-bind.c) | 93 | ||||
-rw-r--r-- | src/core/bpf-socket-bind.h | 15 | ||||
-rw-r--r-- | src/core/cgroup.c | 6 | ||||
-rw-r--r-- | src/core/load-fragment.c | 2 | ||||
-rw-r--r-- | src/core/meson.build | 8 | ||||
-rw-r--r-- | src/core/socket-bind.h | 15 | ||||
-rw-r--r-- | src/core/unit-serialize.c | 29 | ||||
-rw-r--r-- | src/core/unit.c | 26 | ||||
-rw-r--r-- | src/core/unit.h | 5 | ||||
-rw-r--r-- | src/shared/bpf-dlopen.c | 3 | ||||
-rw-r--r-- | src/shared/bpf-link.c | 16 | ||||
-rw-r--r-- | src/shared/bpf-link.h | 4 | ||||
-rw-r--r-- | src/shared/bpf-program.c | 150 | ||||
-rw-r--r-- | src/shared/bpf-program.h | 18 | ||||
-rw-r--r-- | src/test/test-socket-bind.c | 4 |
17 files changed, 322 insertions, 126 deletions
diff --git a/src/core/bpf-firewall.c b/src/core/bpf-firewall.c index 2a41bffee6..9317edeb4c 100644 --- a/src/core/bpf-firewall.c +++ b/src/core/bpf-firewall.c @@ -587,8 +587,6 @@ int bpf_firewall_compile(Unit *u) { return 0; } -DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(filter_prog_hash_ops, void, trivial_hash_func, trivial_compare_func, BPFProgram, bpf_program_unref); - static int load_bpf_progs_from_fs_to_set(Unit *u, char **filter_paths, Set **set) { char **bpf_fs_path; @@ -606,7 +604,7 @@ static int load_bpf_progs_from_fs_to_set(Unit *u, char **filter_paths, Set **set if (r < 0) return log_unit_error_errno(u, r, "Loading of ingress BPF program %s failed: %m", *bpf_fs_path); - r = set_ensure_consume(set, &filter_prog_hash_ops, TAKE_PTR(prog)); + r = set_ensure_consume(set, &bpf_program_hash_ops, TAKE_PTR(prog)); if (r < 0) return log_unit_error_errno(u, r, "Can't add program to BPF program set: %m"); } @@ -658,9 +656,10 @@ static int attach_custom_bpf_progs(Unit *u, const char *path, int attach_type, S return log_unit_error_errno(u, r, "Attaching custom egress BPF program to cgroup %s failed: %m", path); /* Remember that these BPF programs are installed now. */ - r = set_ensure_put(set_installed, &filter_prog_hash_ops, prog); + r = set_ensure_put(set_installed, &bpf_program_hash_ops, prog); if (r < 0) return log_unit_error_errno(u, r, "Can't add program to BPF program set: %m"); + bpf_program_ref(prog); } @@ -668,6 +667,7 @@ static int attach_custom_bpf_progs(Unit *u, const char *path, int attach_type, S } int bpf_firewall_install(Unit *u) { + _cleanup_(bpf_program_unrefp) BPFProgram *ip_bpf_ingress_uninstall = NULL, *ip_bpf_egress_uninstall = NULL; _cleanup_free_ char *path = NULL; CGroupContext *cc; int r, supported; @@ -700,10 +700,20 @@ int bpf_firewall_install(Unit *u) { flags = supported == BPF_FIREWALL_SUPPORTED_WITH_MULTI ? BPF_F_ALLOW_MULTI : 0; - /* Unref the old BPF program (which will implicitly detach it) right before attaching the new program, to - * minimize the time window when we don't account for IP traffic. */ - u->ip_bpf_egress_installed = bpf_program_unref(u->ip_bpf_egress_installed); - u->ip_bpf_ingress_installed = bpf_program_unref(u->ip_bpf_ingress_installed); + if (FLAGS_SET(flags, BPF_F_ALLOW_MULTI)) { + /* If we have BPF_F_ALLOW_MULTI, then let's clear the fields, but destroy the programs only + * after attaching the new programs, so that there's no time window where neither program is + * attached. (There will be a program where both are attached, but that's OK, since this is a + * security feature where we rather want to lock down too much than too little */ + ip_bpf_egress_uninstall = TAKE_PTR(u->ip_bpf_egress_installed); + ip_bpf_ingress_uninstall = TAKE_PTR(u->ip_bpf_ingress_installed); + } else { + /* If we don't have BPF_F_ALLOW_MULTI then unref the old BPF programs (which will implicitly + * detach them) right before attaching the new program, to minimize the time window when we + * don't account for IP traffic. */ + u->ip_bpf_egress_installed = bpf_program_unref(u->ip_bpf_egress_installed); + u->ip_bpf_ingress_installed = bpf_program_unref(u->ip_bpf_ingress_installed); + } if (u->ip_bpf_egress) { r = bpf_program_cgroup_attach(u->ip_bpf_egress, BPF_CGROUP_INET_EGRESS, path, flags); @@ -722,6 +732,10 @@ int bpf_firewall_install(Unit *u) { u->ip_bpf_ingress_installed = bpf_program_ref(u->ip_bpf_ingress); } + /* And now, definitely get rid of the old programs, and detach them */ + ip_bpf_egress_uninstall = bpf_program_unref(ip_bpf_egress_uninstall); + ip_bpf_ingress_uninstall = bpf_program_unref(ip_bpf_ingress_uninstall); + r = attach_custom_bpf_progs(u, path, BPF_CGROUP_INET_EGRESS, &u->ip_bpf_custom_egress, &u->ip_bpf_custom_egress_installed); if (r < 0) return r; @@ -902,3 +916,25 @@ void emit_bpf_firewall_warning(Unit *u) { warned = true; } } + +void bpf_firewall_close(Unit *u) { + assert(u); + + u->ip_accounting_ingress_map_fd = safe_close(u->ip_accounting_ingress_map_fd); + u->ip_accounting_egress_map_fd = safe_close(u->ip_accounting_egress_map_fd); + + u->ipv4_allow_map_fd = safe_close(u->ipv4_allow_map_fd); + u->ipv6_allow_map_fd = safe_close(u->ipv6_allow_map_fd); + u->ipv4_deny_map_fd = safe_close(u->ipv4_deny_map_fd); + u->ipv6_deny_map_fd = safe_close(u->ipv6_deny_map_fd); + + u->ip_bpf_ingress = bpf_program_unref(u->ip_bpf_ingress); + u->ip_bpf_ingress_installed = bpf_program_unref(u->ip_bpf_ingress_installed); + u->ip_bpf_egress = bpf_program_unref(u->ip_bpf_egress); + u->ip_bpf_egress_installed = bpf_program_unref(u->ip_bpf_egress_installed); + + u->ip_bpf_custom_ingress = set_free(u->ip_bpf_custom_ingress); + u->ip_bpf_custom_egress = set_free(u->ip_bpf_custom_egress); + u->ip_bpf_custom_ingress_installed = set_free(u->ip_bpf_custom_ingress_installed); + u->ip_bpf_custom_egress_installed = set_free(u->ip_bpf_custom_egress_installed); +} diff --git a/src/core/bpf-firewall.h b/src/core/bpf-firewall.h index 08d7742193..58b401f834 100644 --- a/src/core/bpf-firewall.h +++ b/src/core/bpf-firewall.h @@ -21,3 +21,5 @@ int bpf_firewall_read_accounting(int map_fd, uint64_t *ret_bytes, uint64_t *ret_ int bpf_firewall_reset_accounting(int map_fd); void emit_bpf_firewall_warning(Unit *u); + +void bpf_firewall_close(Unit *u); diff --git a/src/core/socket-bind.c b/src/core/bpf-socket-bind.c index 352b47ad9f..66c82d5469 100644 --- a/src/core/socket-bind.c +++ b/src/core/bpf-socket-bind.c @@ -5,7 +5,7 @@ #endif #include "fd-util.h" -#include "socket-bind.h" +#include "bpf-socket-bind.h" #if BPF_FRAMEWORK /* libbpf, clang, llvm and bpftool compile time dependencies are satisfied */ @@ -24,20 +24,23 @@ static struct socket_bind_bpf *socket_bind_bpf_free(struct socket_bind_bpf *obj) DEFINE_TRIVIAL_CLEANUP_FUNC(struct socket_bind_bpf *, socket_bind_bpf_free); static int update_rules_map( - int map_fd, CGroupSocketBindItem *head) { + int map_fd, + CGroupSocketBindItem *head) { + CGroupSocketBindItem *item; uint32_t i = 0; assert(map_fd >= 0); LIST_FOREACH(socket_bind_items, item, head) { - const uint32_t key = i++; struct socket_bind_rule val = { .address_family = (uint32_t) item->address_family, .nr_ports = item->nr_ports, .port_min = item->port_min, }; + uint32_t key = i++; + if (sym_bpf_map_update_elem(map_fd, &key, &val, BPF_ANY) != 0) return -errno; } @@ -46,15 +49,19 @@ static int update_rules_map( } static int prepare_socket_bind_bpf( - Unit *u, CGroupSocketBindItem *allow, CGroupSocketBindItem *deny, struct socket_bind_bpf **ret_obj) { - _cleanup_(socket_bind_bpf_freep) struct socket_bind_bpf *obj = 0; - uint32_t allow_count = 0, deny_count = 0; + Unit *u, + CGroupSocketBindItem *allow, + CGroupSocketBindItem *deny, + struct socket_bind_bpf **ret_obj) { + + _cleanup_(socket_bind_bpf_freep) struct socket_bind_bpf *obj = NULL; + size_t allow_count = 0, deny_count = 0; int allow_map_fd, deny_map_fd, r; CGroupSocketBindItem *item; assert(ret_obj); - LIST_FOREACH(socket_bind_items, item, allow) + LIST_FOREACH(socket_bind_items, item, allow) allow_count++; LIST_FOREACH(socket_bind_items, item, deny) @@ -107,41 +114,36 @@ static int prepare_socket_bind_bpf( return 0; } -int socket_bind_supported(void) { +int bpf_socket_bind_supported(void) { _cleanup_(socket_bind_bpf_freep) struct socket_bind_bpf *obj = NULL; + int r; - int r = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER); + r = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER); if (r < 0) - return log_error_errno(r, "Can't determine whether the unified hierarchy is used: %m"); - + return log_debug_errno(r, "Can't determine whether the unified hierarchy is used: %m"); if (r == 0) { - log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), - "Not running with unified cgroup hierarchy, BPF is not supported"); - return 0; + log_debug("Not running with unified cgroup hierarchy, BPF is not supported"); + return false; } - r = dlopen_bpf(); - if (r < 0) { - log_info_errno(r, "Could not load libbpf: %m"); - return 0; - } + if (dlopen_bpf() < 0) + return false; if (!sym_bpf_probe_prog_type(BPF_PROG_TYPE_CGROUP_SOCK_ADDR, /*ifindex=*/0)) { - log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), - "BPF program type cgroup_sock_addr is not supported"); - return 0; + log_debug("BPF program type cgroup_sock_addr is not supported"); + return false; } r = prepare_socket_bind_bpf(/*unit=*/NULL, /*allow_rules=*/NULL, /*deny_rules=*/NULL, &obj); if (r < 0) { log_debug_errno(r, "BPF based socket_bind is not supported: %m"); - return 0; + return false; } - return can_link_bpf_program(obj->progs.sd_bind4); + return bpf_can_link_program(obj->progs.sd_bind4); } -int socket_bind_add_initial_link_fd(Unit *u, int fd) { +int bpf_socket_bind_add_initial_link_fd(Unit *u, int fd) { int r; assert(u); @@ -167,6 +169,8 @@ static int socket_bind_install_impl(Unit *u) { CGroupContext *cc; int r; + assert(u); + cc = unit_get_cgroup_context(u); if (!cc) return 0; @@ -184,20 +188,19 @@ static int socket_bind_install_impl(Unit *u) { cgroup_fd = open(cgroup_path, O_RDONLY | O_CLOEXEC, 0); if (cgroup_fd < 0) - return log_unit_error_errno( - u, errno, "Failed to open cgroup=%s for reading", cgroup_path); + return log_unit_error_errno(u, errno, "Failed to open cgroup=%s for reading: %m", cgroup_path); ipv4 = sym_bpf_program__attach_cgroup(obj->progs.sd_bind4, cgroup_fd); r = sym_libbpf_get_error(ipv4); if (r != 0) - return log_unit_error_errno(u, r, "Failed to link '%s' cgroup-bpf program", - sym_bpf_program__name(obj->progs.sd_bind4)); + return log_unit_error_errno(u, r, "Failed to link '%s' cgroup-bpf program: %m", + sym_bpf_program__name(obj->progs.sd_bind4)); ipv6 = sym_bpf_program__attach_cgroup(obj->progs.sd_bind6, cgroup_fd); r = sym_libbpf_get_error(ipv6); if (r != 0) - return log_unit_error_errno(u, r, "Failed to link '%s' cgroup-bpf program", - sym_bpf_program__name(obj->progs.sd_bind6)); + return log_unit_error_errno(u, r, "Failed to link '%s' cgroup-bpf program: %m", + sym_bpf_program__name(obj->progs.sd_bind6)); u->ipv4_socket_bind_link = TAKE_PTR(ipv4); u->ipv6_socket_bind_link = TAKE_PTR(ipv6); @@ -205,43 +208,45 @@ static int socket_bind_install_impl(Unit *u) { return 0; } -int socket_bind_install(Unit *u) { - int r = socket_bind_install_impl(u); +int bpf_socket_bind_install(Unit *u) { + int r; + + assert(u); + + r = socket_bind_install_impl(u); if (r == -ENOMEM) return r; fdset_close(u->initial_socket_bind_link_fds); - return r; } -int serialize_socket_bind(Unit *u, FILE *f, FDSet *fds) { +int bpf_serialize_socket_bind(Unit *u, FILE *f, FDSet *fds) { int r; assert(u); - r = serialize_bpf_link(f, fds, "ipv4-socket-bind-bpf-link", u->ipv4_socket_bind_link); + r = bpf_serialize_link(f, fds, "ipv4-socket-bind-bpf-link", u->ipv4_socket_bind_link); if (r < 0) return r; - return serialize_bpf_link(f, fds, "ipv6-socket-bind-bpf-link", u->ipv6_socket_bind_link); + return bpf_serialize_link(f, fds, "ipv6-socket-bind-bpf-link", u->ipv6_socket_bind_link); } #else /* ! BPF_FRAMEWORK */ -int socket_bind_supported(void) { - return 0; +int bpf_socket_bind_supported(void) { + return false; } -int socket_bind_add_initial_link_fd(Unit *u, int fd) { +int bpf_socket_bind_add_initial_link_fd(Unit *u, int fd) { return 0; } -int socket_bind_install(Unit *u) { - log_unit_debug(u, "Failed to install socket bind: BPF framework is not supported"); - return 0; +int bpf_socket_bind_install(Unit *u) { + return log_unit_debug_errno(u, SYNTHETIC_ERRNO(EOPNOTSUPP), "Failed to install socket bind: BPF framework is not supported"); } -int serialize_socket_bind(Unit *u, FILE *f, FDSet *fds) { +int bpf_serialize_socket_bind(Unit *u, FILE *f, FDSet *fds) { return 0; } #endif diff --git a/src/core/bpf-socket-bind.h b/src/core/bpf-socket-bind.h new file mode 100644 index 0000000000..c8c75adaf6 --- /dev/null +++ b/src/core/bpf-socket-bind.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +#include "fdset.h" +#include "unit.h" + +int bpf_socket_bind_supported(void); + +/* Add BPF link fd created before daemon-reload or daemon-reexec. FDs will be closed at the end of + * socket_bind_install. */ +int bpf_socket_bind_add_initial_link_fd(Unit *u, int fd); + +int bpf_socket_bind_install(Unit *u); + +int bpf_serialize_socket_bind(Unit *u, FILE *f, FDSet *fds); diff --git a/src/core/cgroup.c b/src/core/cgroup.c index 7fde1efce4..3cec8a5786 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -10,6 +10,7 @@ #include "bpf-devices.h" #include "bpf-firewall.h" #include "bpf-foreign.h" +#include "bpf-socket-bind.h" #include "btrfs-util.h" #include "bus-error.h" #include "cgroup-setup.h" @@ -26,7 +27,6 @@ #include "percent-util.h" #include "process-util.h" #include "procfs-util.h" -#include "socket-bind.h" #include "special.h" #include "stat-util.h" #include "stdio-util.h" @@ -1096,7 +1096,7 @@ static void cgroup_apply_firewall(Unit *u) { static void cgroup_apply_socket_bind(Unit *u) { assert(u); - (void) socket_bind_install(u); + (void) bpf_socket_bind_install(u); } static int cgroup_apply_devices(Unit *u) { @@ -3126,7 +3126,7 @@ static int cg_bpf_mask_supported(CGroupMask *ret) { mask |= CGROUP_MASK_BPF_FOREIGN; /* BPF-based bind{4|6} hooks */ - r = socket_bind_supported(); + r = bpf_socket_bind_supported(); if (r > 0) mask |= CGROUP_MASK_BPF_SOCKET_BIND; diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c index c6fca7135c..46b6549d16 100644 --- a/src/core/load-fragment.c +++ b/src/core/load-fragment.c @@ -20,6 +20,7 @@ #include "alloc-util.h" #include "bpf-firewall.h" #include "bpf-program.h" +#include "bpf-socket-bind.h" #include "bus-error.h" #include "bus-internal.h" #include "bus-util.h" @@ -55,7 +56,6 @@ #endif #include "securebits-util.h" #include "signal-util.h" -#include "socket-bind.h" #include "socket-netlink.h" #include "specifier.h" #include "stat-util.h" diff --git a/src/core/meson.build b/src/core/meson.build index e696d27727..f0d2c6f642 100644 --- a/src/core/meson.build +++ b/src/core/meson.build @@ -13,6 +13,8 @@ libcore_sources = ''' bpf-firewall.h bpf-foreign.c bpf-foreign.h + bpf-socket-bind.c + bpf-socket-bind.h cgroup.c cgroup.h core-varlink.c @@ -83,10 +85,10 @@ libcore_sources = ''' load-fragment.h locale-setup.c locale-setup.h - manager.c - manager.h manager-dump.c manager-dump.h + manager.c + manager.h mount.c mount.h namespace.c @@ -107,8 +109,6 @@ libcore_sources = ''' slice.h smack-setup.c smack-setup.h - socket-bind.c - socket-bind.h socket.c socket.h swap.c diff --git a/src/core/socket-bind.h b/src/core/socket-bind.h deleted file mode 100644 index 2a6e71a9b9..0000000000 --- a/src/core/socket-bind.h +++ /dev/null @@ -1,15 +0,0 @@ -/* SPDX-License-Identifier: LGPL-2.1+ */ -#pragma once - -#include "fdset.h" -#include "unit.h" - -int socket_bind_supported(void); - -/* Add BPF link fd created before daemon-reload or daemon-reexec. - * FDs will be closed at the end of socket_bind_install. */ -int socket_bind_add_initial_link_fd(Unit *u, int fd); - -int socket_bind_install(Unit *u); - -int serialize_socket_bind(Unit *u, FILE *f, FDSet *fds); diff --git a/src/core/unit-serialize.c b/src/core/unit-serialize.c index 4da69769a6..daf7c59cc1 100644 --- a/src/core/unit-serialize.c +++ b/src/core/unit-serialize.c @@ -1,5 +1,6 @@ /* SPDX-License-Identifier: LGPL-2.1-or-later */ +#include "bpf-socket-bind.h" #include "bus-util.h" #include "dbus.h" #include "fileio-label.h" @@ -7,7 +8,6 @@ #include "format-util.h" #include "parse-util.h" #include "serialize.h" -#include "socket-bind.h" #include "string-table.h" #include "unit-serialize.h" #include "user-util.h" @@ -164,7 +164,12 @@ int unit_serialize(Unit *u, FILE *f, FDSet *fds, bool switching_root) { (void) serialize_cgroup_mask(f, "cgroup-enabled-mask", u->cgroup_enabled_mask); (void) serialize_cgroup_mask(f, "cgroup-invalidated-mask", u->cgroup_invalidated_mask); - (void) serialize_socket_bind(u, f, fds); + (void) bpf_serialize_socket_bind(u, f, fds); + + (void) bpf_program_serialize_attachment(f, fds, "ip-bpf-ingress-installed", u->ip_bpf_ingress_installed); + (void) bpf_program_serialize_attachment(f, fds, "ip-bpf-egress-installed", u->ip_bpf_egress_installed); + (void) bpf_program_serialize_attachment_set(f, fds, "ip-bpf-custom-ingress-installed", u->ip_bpf_custom_ingress_installed); + (void) bpf_program_serialize_attachment_set(f, fds, "ip-bpf-custom-egress-installed", u->ip_bpf_custom_egress_installed); if (uid_is_valid(u->ref_uid)) (void) serialize_item_format(f, "ref-uid", UID_FMT, u->ref_uid); @@ -385,16 +390,28 @@ int unit_deserialize(Unit *u, FILE *f, FDSet *fds) { else { if (fdset_remove(fds, fd) < 0) { log_unit_debug(u, "Failed to remove %s value=%d from fdset", l, fd); - continue; } - (void) socket_bind_add_initial_link_fd(u, fd); + (void) bpf_socket_bind_add_initial_link_fd(u, fd); } continue; - } - else if (streq(l, "ref-uid")) { + } else if (streq(l, "ip-bpf-ingress-installed")) { + (void) bpf_program_deserialize_attachment(v, fds, &u->ip_bpf_ingress_installed); + continue; + } else if (streq(l, "ip-bpf-egress-installed")) { + (void) bpf_program_deserialize_attachment(v, fds, &u->ip_bpf_egress_installed); + continue; + + } else if (streq(l, "ip-bpf-custom-ingress-installed")) { + (void) bpf_program_deserialize_attachment_set(v, fds, &u->ip_bpf_custom_ingress_installed); + continue; + } else if (streq(l, "ip-bpf-custom-egress-installed")) { + (void) bpf_program_deserialize_attachment_set(v, fds, &u->ip_bpf_custom_egress_installed); + continue; + + } else if (streq(l, "ref-uid")) { uid_t uid; r = parse_uid(v, &uid); diff --git a/src/core/unit.c b/src/core/unit.c index c6b17afa51..de407d20a8 100644 --- a/src/core/unit.c +++ b/src/core/unit.c @@ -12,6 +12,7 @@ #include "alloc-util.h" #include "bpf-firewall.h" #include "bpf-foreign.h" +#include "bpf-socket-bind.h" #include "bus-common-errors.h" #include "bus-util.h" #include "cgroup-setup.h" @@ -41,7 +42,6 @@ #include "rm-rf.h" #include "set.h" #include "signal-util.h" -#include "socket-bind.h" #include "sparse-endian.h" #include "special.h" #include "specifier.h" @@ -114,6 +114,9 @@ Unit* unit_new(Manager *m, size_t size) { u->ip_accounting_ingress_map_fd = -1; u->ip_accounting_egress_map_fd = -1; + for (CGroupIOAccountingMetric i = 0; i < _CGROUP_IO_ACCOUNTING_METRIC_MAX; i++) + u->io_accounting_last[i] = UINT64_MAX; + u->ipv4_allow_map_fd = -1; u->ipv6_allow_map_fd = -1; u->ipv4_deny_map_fd = -1; @@ -124,9 +127,6 @@ Unit* unit_new(Manager *m, size_t size) { u->start_ratelimit = (RateLimit) { m->default_start_limit_interval, m->default_start_limit_burst }; u->auto_start_stop_ratelimit = (RateLimit) { 10 * USEC_PER_SEC, 16 }; - for (CGroupIOAccountingMetric i = 0; i < _CGROUP_IO_ACCOUNTING_METRIC_MAX; i++) - u->io_accounting_last[i] = UINT64_MAX; - return u; } @@ -757,23 +757,7 @@ Unit* unit_free(Unit *u) { if (u->in_stop_when_bound_queue) LIST_REMOVE(stop_when_bound_queue, u->manager->stop_when_bound_queue, u); - safe_close(u->ip_accounting_ingress_map_fd); - safe_close(u->ip_accounting_egress_map_fd); - - safe_close(u->ipv4_allow_map_fd); - safe_close(u->ipv6_allow_map_fd); - safe_close(u->ipv4_deny_map_fd); - safe_close(u->ipv6_deny_map_fd); - - bpf_program_unref(u->ip_bpf_ingress); - bpf_program_unref(u->ip_bpf_ingress_installed); - bpf_program_unref(u->ip_bpf_egress); - bpf_program_unref(u->ip_bpf_egress_installed); - - set_free(u->ip_bpf_custom_ingress); - set_free(u->ip_bpf_custom_egress); - set_free(u->ip_bpf_custom_ingress_installed); - set_free(u->ip_bpf_custom_egress_installed); + bpf_firewall_close(u); hashmap_free(u->bpf_foreign_by_key); diff --git a/src/core/unit.h b/src/core/unit.h index 8818392731..52feb3693b 100644 --- a/src/core/unit.h +++ b/src/core/unit.h @@ -308,14 +308,15 @@ typedef struct Unit { /* IP BPF Firewalling/accounting */ int ip_accounting_ingress_map_fd; int ip_accounting_egress_map_fd; + uint64_t ip_accounting_extra[_CGROUP_IP_ACCOUNTING_METRIC_MAX]; int ipv4_allow_map_fd; int ipv6_allow_map_fd; int ipv4_deny_map_fd; int ipv6_deny_map_fd; - BPFProgram *ip_bpf_ingress, *ip_bpf_ingress_installed; BPFProgram *ip_bpf_egress, *ip_bpf_egress_installed; + Set *ip_bpf_custom_ingress; Set *ip_bpf_custom_ingress_installed; Set *ip_bpf_custom_egress; @@ -334,8 +335,6 @@ typedef struct Unit { struct bpf_link *ipv6_socket_bind_link; #endif - uint64_t ip_accounting_extra[_CGROUP_IP_ACCOUNTING_METRIC_MAX]; - /* Low-priority event source which is used to remove watched PIDs that have gone away, and subscribe to any new * ones which might have appeared. */ sd_event_source *rewatch_pids_event_source; diff --git a/src/shared/bpf-dlopen.c b/src/shared/bpf-dlopen.c index 64120f17c5..0556148458 100644 --- a/src/shared/bpf-dlopen.c +++ b/src/shared/bpf-dlopen.c @@ -37,7 +37,7 @@ int dlopen_bpf(void) { r = dlsym_many_and_warn( dl, - LOG_ERR, + LOG_DEBUG, DLSYM_ARG(bpf_link__destroy), DLSYM_ARG(bpf_link__fd), DLSYM_ARG(bpf_map__fd), @@ -60,7 +60,6 @@ int dlopen_bpf(void) { /* Note that we never release the reference here, because there's no real reason to, after all this * was traditionally a regular shared library dependency which lives forever too. */ bpf_dl = TAKE_PTR(dl); - return 1; } diff --git a/src/shared/bpf-link.c b/src/shared/bpf-link.c index 405874374c..720ed40395 100644 --- a/src/shared/bpf-link.c +++ b/src/shared/bpf-link.c @@ -4,17 +4,13 @@ #include "bpf-link.h" #include "serialize.h" -bool can_link_bpf_program(struct bpf_program *prog) { +bool bpf_can_link_program(struct bpf_program *prog) { _cleanup_(bpf_link_freep) struct bpf_link *link = NULL; - int r; assert(prog); - r = dlopen_bpf(); - if (r < 0) { - log_debug_errno(r, "Could not load libbpf: %m"); + if (dlopen_bpf() < 0) return false; - } /* Pass invalid cgroup fd intentionally. */ link = sym_bpf_program__attach_cgroup(prog, /*cgroup_fd=*/-1); @@ -23,9 +19,7 @@ bool can_link_bpf_program(struct bpf_program *prog) { return sym_libbpf_get_error(link) == -EBADF; } -int serialize_bpf_link(FILE *f, FDSet *fds, const char *key, struct bpf_link *link) { - int fd; - +int bpf_serialize_link(FILE *f, FDSet *fds, const char *key, struct bpf_link *link) { assert(key); if (!link) @@ -34,11 +28,11 @@ int serialize_bpf_link(FILE *f, FDSet *fds, const char *key, struct bpf_link *li if (sym_libbpf_get_error(link) != 0) return -EINVAL; - fd = sym_bpf_link__fd(link); - return serialize_fd(f, fds, key, fd); + return serialize_fd(f, fds, key, sym_bpf_link__fd(link)); } struct bpf_link *bpf_link_free(struct bpf_link *link) { + /* Avoid a useless dlopen() if link == NULL */ if (!link) return NULL; diff --git a/src/shared/bpf-link.h b/src/shared/bpf-link.h index 095465b07c..bb6ac60ac9 100644 --- a/src/shared/bpf-link.h +++ b/src/shared/bpf-link.h @@ -8,9 +8,9 @@ #include "fdset.h" #include "macro.h" -bool can_link_bpf_program(struct bpf_program *prog); +bool bpf_can_link_program(struct bpf_program *prog); -int serialize_bpf_link(FILE *f, FDSet *fds, const char *key, struct bpf_link *link); +int bpf_serialize_link(FILE *f, FDSet *fds, const char *key, struct bpf_link *link); struct bpf_link *bpf_link_free(struct bpf_link *p); DEFINE_TRIVIAL_CLEANUP_FUNC(struct bpf_link *, bpf_link_free); diff --git a/src/shared/bpf-program.c b/src/shared/bpf-program.c index ec8437d583..0f865a7168 100644 --- a/src/shared/bpf-program.c +++ b/src/shared/bpf-program.c @@ -7,10 +7,12 @@ #include "alloc-util.h" #include "bpf-program.h" +#include "escape.h" #include "fd-util.h" #include "memory-util.h" #include "missing_syscall.h" #include "path-util.h" +#include "serialize.h" #include "string-table.h" static const char *const bpf_cgroup_attach_type_table[__MAX_BPF_ATTACH_TYPE] = { @@ -36,6 +38,8 @@ static const char *const bpf_cgroup_attach_type_table[__MAX_BPF_ATTACH_TYPE] = { DEFINE_STRING_TABLE_LOOKUP(bpf_cgroup_attach_type, int); +DEFINE_HASH_OPS_WITH_KEY_DESTRUCTOR(bpf_program_hash_ops, void, trivial_hash_func, trivial_compare_func, bpf_program_unref); + /* struct bpf_prog_info info must be initialized since its value is both input and output * for BPF_OBJ_GET_INFO_BY_FD syscall. */ static int bpf_program_get_info_by_fd(int prog_fd, struct bpf_prog_info *info, uint32_t info_len) { @@ -59,13 +63,15 @@ static int bpf_program_get_info_by_fd(int prog_fd, struct bpf_prog_info *info, u int bpf_program_new(uint32_t prog_type, BPFProgram **ret) { _cleanup_(bpf_program_unrefp) BPFProgram *p = NULL; - p = new0(BPFProgram, 1); + p = new(BPFProgram, 1); if (!p) return -ENOMEM; - p->n_ref = 1; - p->prog_type = prog_type; - p->kernel_fd = -1; + *p = (BPFProgram) { + .n_ref = 1, + .prog_type = prog_type, + .kernel_fd = -1, + }; *ret = TAKE_PTR(p); @@ -358,3 +364,139 @@ int bpf_program_get_id_by_fd(int prog_fd, uint32_t *ret_id) { return 0; }; + +int bpf_program_serialize_attachment( + FILE *f, + FDSet *fds, + const char *key, + BPFProgram *p) { + + _cleanup_free_ char *escaped = NULL; + int copy, r; + + if (!p || !p->attached_path) + return 0; + + assert(p->kernel_fd >= 0); + + escaped = cescape(p->attached_path); + if (!escaped) + return -ENOMEM; + + copy = fdset_put_dup(fds, p->kernel_fd); + if (copy < 0) + return log_error_errno(copy, "Failed to add BPF kernel fd to serialize: %m"); + + r = serialize_item_format( + f, + key, + "%i %s %s", + copy, + bpf_cgroup_attach_type_to_string(p->attached_type), + escaped); + if (r < 0) + return r; + + /* After serialization, let's forget the fact that this program is attached. The attachment — if you + * so will — is now 'owned' by the serialization, and not us anymore. Why does that matter? Because + * of BPF's less-than-ideal lifecycle handling: to detach a program from a cgroup we have to + * explicitly do so, it's not done implicitly on close(). Now, since we are serializing here we don't + * want the program to be detached while freeing things, so that the attachment can be retained after + * deserializing again. bpf_program_free() implicitly detaches things, if attached_path is non-NULL, + * hence we set it to NULL here. */ + + p->attached_path = mfree(p->attached_path); + return 0; +} + +int bpf_program_serialize_attachment_set(FILE *f, FDSet *fds, const char *key, Set *set) { + BPFProgram *p; + int r; + + SET_FOREACH(p, set) { + r = bpf_program_serialize_attachment(f, fds, key, p); + if (r < 0) + return r; + } + + return 0; +} + +int bpf_program_deserialize_attachment(const char *v, FDSet *fds, BPFProgram **bpfp) { + _cleanup_free_ char *sfd = NULL, *sat = NULL, *unescaped = NULL; + _cleanup_(bpf_program_unrefp) BPFProgram *p = NULL; + _cleanup_close_ int fd = -1; + int ifd, at, r; + + assert(v); + assert(bpfp); + + /* Extract first word: the fd number */ + r = extract_first_word(&v, &sfd, NULL, 0); + if (r < 0) + return r; + if (r == 0) + return -EINVAL; + + r = safe_atoi(sfd, &ifd); + if (r < 0) + return r; + if (ifd < 0) + return -EBADF; + + /* Extract second word: the attach type */ + r = extract_first_word(&v, &sat, NULL, 0); + if (r < 0) + return r; + if (r == 0) + return -EINVAL; + + at = bpf_cgroup_attach_type_from_string(sat); + if (at < 0) + return at; + + /* The rest is the path */ + r = cunescape(v, 0, &unescaped); + if (r < 0) + return r; + + fd = fdset_remove(fds, ifd); + if (fd < 0) + return fd; + + p = new(BPFProgram, 1); + if (!p) + return -ENOMEM; + + *p = (BPFProgram) { + .n_ref = 1, + .kernel_fd = TAKE_FD(fd), + .prog_type = BPF_PROG_TYPE_UNSPEC, + .attached_path = TAKE_PTR(unescaped), + .attached_type = at, + }; + + if (*bpfp) + bpf_program_unref(*bpfp); + + *bpfp = TAKE_PTR(p); + return 0; +} + +int bpf_program_deserialize_attachment_set(const char *v, FDSet *fds, Set **bpfsetp) { + BPFProgram *p = NULL; + int r; + + assert(v); + assert(bpfsetp); + + r = bpf_program_deserialize_attachment(v, fds, &p); + if (r < 0) + return r; + + r = set_ensure_consume(bpfsetp, &bpf_program_hash_ops, p); + if (r < 0) + return r; + + return 0; +} diff --git a/src/shared/bpf-program.h b/src/shared/bpf-program.h index edde86c119..908af1a1b2 100644 --- a/src/shared/bpf-program.h +++ b/src/shared/bpf-program.h @@ -3,22 +3,32 @@ #include <linux/bpf.h> #include <stdint.h> +#include <stdio.h> #include <sys/syscall.h> +#include "fdset.h" #include "list.h" #include "macro.h" typedef struct BPFProgram BPFProgram; +/* This encapsulates three different concepts: the loaded BPF program, the BPF code, and the attachment to a + * cgroup. Typically our BPF programs go through all three stages: we build the code, we load it, and finally + * we attach it, but it might happen that we operate with programs that aren't loaded or aren't attached, or + * where we don't have the code. */ struct BPFProgram { unsigned n_ref; + /* The loaded BPF program, if loaded */ int kernel_fd; uint32_t prog_type; + /* The code of it BPF program, if known */ size_t n_instructions; struct bpf_insn *instructions; + /* The cgroup path the program is attached to, if it is attached. If non-NULL bpf_program_unref() + * will detach on destruction. */ char *attached_path; int attached_type; uint32_t attached_flags; @@ -35,9 +45,17 @@ int bpf_program_load_from_bpf_fs(BPFProgram *p, const char *path); int bpf_program_cgroup_attach(BPFProgram *p, int type, const char *path, uint32_t flags); int bpf_program_cgroup_detach(BPFProgram *p); + int bpf_program_pin(int prog_fd, const char *bpffs_path); int bpf_program_get_id_by_fd(int prog_fd, uint32_t *ret_id); +int bpf_program_serialize_attachment(FILE *f, FDSet *fds, const char *key, BPFProgram *p); +int bpf_program_serialize_attachment_set(FILE *f, FDSet *fds, const char *key, Set *set); +int bpf_program_deserialize_attachment(const char *v, FDSet *fds, BPFProgram **bpfp); +int bpf_program_deserialize_attachment_set(const char *v, FDSet *fds, Set **bpfsetp); + +extern const struct hash_ops bpf_program_hash_ops; + int bpf_map_new(enum bpf_map_type type, size_t key_size, size_t value_size, size_t max_entries, uint32_t flags); int bpf_map_update_element(int fd, const void *key, void *value); int bpf_map_lookup_element(int fd, const void *key, void *value); diff --git a/src/test/test-socket-bind.c b/src/test/test-socket-bind.c index 16cfea7779..996aeebbcc 100644 --- a/src/test/test-socket-bind.c +++ b/src/test/test-socket-bind.c @@ -1,12 +1,12 @@ /* SPDX-License-Identifier: LGPL-2.1-or-later */ +#include "bpf-socket-bind.h" #include "load-fragment.h" #include "manager.h" #include "process-util.h" #include "rlimit-util.h" #include "rm-rf.h" #include "service.h" -#include "socket-bind.h" #include "strv.h" #include "tests.h" #include "unit.h" @@ -122,7 +122,7 @@ int main(int argc, char *argv[]) { if (!can_memlock()) return log_tests_skipped("Can't use mlock(), skipping."); - r = socket_bind_supported(); + r = bpf_socket_bind_supported(); if (r <= 0) return log_tests_skipped("socket-bind is not supported, skipping."); |