summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorLennart Poettering <lennart@poettering.net>2021-06-09 09:47:23 +0200
committerGitHub <noreply@github.com>2021-06-09 09:47:23 +0200
commitbead169fe0f182a98cc8b55a63b3a12a5049ab45 (patch)
tree5425eea35c12cc910768ed8907fe89f6cc366ff6 /src
parentMerge pull request #19852 from yuwata/network-stable-secret (diff)
parentbpf-firewall: close gap when updating the firewall (diff)
downloadsystemd-bead169fe0f182a98cc8b55a63b3a12a5049ab45.tar.xz
systemd-bead169fe0f182a98cc8b55a63b3a12a5049ab45.zip
Merge pull request #19851 from poettering/bpf-firewall-tweaks
close bpf firewall reload gap
Diffstat (limited to 'src')
-rw-r--r--src/core/bpf-firewall.c52
-rw-r--r--src/core/bpf-firewall.h2
-rw-r--r--src/core/bpf-socket-bind.c (renamed from src/core/socket-bind.c)93
-rw-r--r--src/core/bpf-socket-bind.h15
-rw-r--r--src/core/cgroup.c6
-rw-r--r--src/core/load-fragment.c2
-rw-r--r--src/core/meson.build8
-rw-r--r--src/core/socket-bind.h15
-rw-r--r--src/core/unit-serialize.c29
-rw-r--r--src/core/unit.c26
-rw-r--r--src/core/unit.h5
-rw-r--r--src/shared/bpf-dlopen.c3
-rw-r--r--src/shared/bpf-link.c16
-rw-r--r--src/shared/bpf-link.h4
-rw-r--r--src/shared/bpf-program.c150
-rw-r--r--src/shared/bpf-program.h18
-rw-r--r--src/test/test-socket-bind.c4
17 files changed, 322 insertions, 126 deletions
diff --git a/src/core/bpf-firewall.c b/src/core/bpf-firewall.c
index 2a41bffee6..9317edeb4c 100644
--- a/src/core/bpf-firewall.c
+++ b/src/core/bpf-firewall.c
@@ -587,8 +587,6 @@ int bpf_firewall_compile(Unit *u) {
return 0;
}
-DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(filter_prog_hash_ops, void, trivial_hash_func, trivial_compare_func, BPFProgram, bpf_program_unref);
-
static int load_bpf_progs_from_fs_to_set(Unit *u, char **filter_paths, Set **set) {
char **bpf_fs_path;
@@ -606,7 +604,7 @@ static int load_bpf_progs_from_fs_to_set(Unit *u, char **filter_paths, Set **set
if (r < 0)
return log_unit_error_errno(u, r, "Loading of ingress BPF program %s failed: %m", *bpf_fs_path);
- r = set_ensure_consume(set, &filter_prog_hash_ops, TAKE_PTR(prog));
+ r = set_ensure_consume(set, &bpf_program_hash_ops, TAKE_PTR(prog));
if (r < 0)
return log_unit_error_errno(u, r, "Can't add program to BPF program set: %m");
}
@@ -658,9 +656,10 @@ static int attach_custom_bpf_progs(Unit *u, const char *path, int attach_type, S
return log_unit_error_errno(u, r, "Attaching custom egress BPF program to cgroup %s failed: %m", path);
/* Remember that these BPF programs are installed now. */
- r = set_ensure_put(set_installed, &filter_prog_hash_ops, prog);
+ r = set_ensure_put(set_installed, &bpf_program_hash_ops, prog);
if (r < 0)
return log_unit_error_errno(u, r, "Can't add program to BPF program set: %m");
+
bpf_program_ref(prog);
}
@@ -668,6 +667,7 @@ static int attach_custom_bpf_progs(Unit *u, const char *path, int attach_type, S
}
int bpf_firewall_install(Unit *u) {
+ _cleanup_(bpf_program_unrefp) BPFProgram *ip_bpf_ingress_uninstall = NULL, *ip_bpf_egress_uninstall = NULL;
_cleanup_free_ char *path = NULL;
CGroupContext *cc;
int r, supported;
@@ -700,10 +700,20 @@ int bpf_firewall_install(Unit *u) {
flags = supported == BPF_FIREWALL_SUPPORTED_WITH_MULTI ? BPF_F_ALLOW_MULTI : 0;
- /* Unref the old BPF program (which will implicitly detach it) right before attaching the new program, to
- * minimize the time window when we don't account for IP traffic. */
- u->ip_bpf_egress_installed = bpf_program_unref(u->ip_bpf_egress_installed);
- u->ip_bpf_ingress_installed = bpf_program_unref(u->ip_bpf_ingress_installed);
+ if (FLAGS_SET(flags, BPF_F_ALLOW_MULTI)) {
+ /* If we have BPF_F_ALLOW_MULTI, then let's clear the fields, but destroy the programs only
+ * after attaching the new programs, so that there's no time window where neither program is
+ * attached. (There will be a program where both are attached, but that's OK, since this is a
+ * security feature where we rather want to lock down too much than too little */
+ ip_bpf_egress_uninstall = TAKE_PTR(u->ip_bpf_egress_installed);
+ ip_bpf_ingress_uninstall = TAKE_PTR(u->ip_bpf_ingress_installed);
+ } else {
+ /* If we don't have BPF_F_ALLOW_MULTI then unref the old BPF programs (which will implicitly
+ * detach them) right before attaching the new program, to minimize the time window when we
+ * don't account for IP traffic. */
+ u->ip_bpf_egress_installed = bpf_program_unref(u->ip_bpf_egress_installed);
+ u->ip_bpf_ingress_installed = bpf_program_unref(u->ip_bpf_ingress_installed);
+ }
if (u->ip_bpf_egress) {
r = bpf_program_cgroup_attach(u->ip_bpf_egress, BPF_CGROUP_INET_EGRESS, path, flags);
@@ -722,6 +732,10 @@ int bpf_firewall_install(Unit *u) {
u->ip_bpf_ingress_installed = bpf_program_ref(u->ip_bpf_ingress);
}
+ /* And now, definitely get rid of the old programs, and detach them */
+ ip_bpf_egress_uninstall = bpf_program_unref(ip_bpf_egress_uninstall);
+ ip_bpf_ingress_uninstall = bpf_program_unref(ip_bpf_ingress_uninstall);
+
r = attach_custom_bpf_progs(u, path, BPF_CGROUP_INET_EGRESS, &u->ip_bpf_custom_egress, &u->ip_bpf_custom_egress_installed);
if (r < 0)
return r;
@@ -902,3 +916,25 @@ void emit_bpf_firewall_warning(Unit *u) {
warned = true;
}
}
+
+void bpf_firewall_close(Unit *u) {
+ assert(u);
+
+ u->ip_accounting_ingress_map_fd = safe_close(u->ip_accounting_ingress_map_fd);
+ u->ip_accounting_egress_map_fd = safe_close(u->ip_accounting_egress_map_fd);
+
+ u->ipv4_allow_map_fd = safe_close(u->ipv4_allow_map_fd);
+ u->ipv6_allow_map_fd = safe_close(u->ipv6_allow_map_fd);
+ u->ipv4_deny_map_fd = safe_close(u->ipv4_deny_map_fd);
+ u->ipv6_deny_map_fd = safe_close(u->ipv6_deny_map_fd);
+
+ u->ip_bpf_ingress = bpf_program_unref(u->ip_bpf_ingress);
+ u->ip_bpf_ingress_installed = bpf_program_unref(u->ip_bpf_ingress_installed);
+ u->ip_bpf_egress = bpf_program_unref(u->ip_bpf_egress);
+ u->ip_bpf_egress_installed = bpf_program_unref(u->ip_bpf_egress_installed);
+
+ u->ip_bpf_custom_ingress = set_free(u->ip_bpf_custom_ingress);
+ u->ip_bpf_custom_egress = set_free(u->ip_bpf_custom_egress);
+ u->ip_bpf_custom_ingress_installed = set_free(u->ip_bpf_custom_ingress_installed);
+ u->ip_bpf_custom_egress_installed = set_free(u->ip_bpf_custom_egress_installed);
+}
diff --git a/src/core/bpf-firewall.h b/src/core/bpf-firewall.h
index 08d7742193..58b401f834 100644
--- a/src/core/bpf-firewall.h
+++ b/src/core/bpf-firewall.h
@@ -21,3 +21,5 @@ int bpf_firewall_read_accounting(int map_fd, uint64_t *ret_bytes, uint64_t *ret_
int bpf_firewall_reset_accounting(int map_fd);
void emit_bpf_firewall_warning(Unit *u);
+
+void bpf_firewall_close(Unit *u);
diff --git a/src/core/socket-bind.c b/src/core/bpf-socket-bind.c
index 352b47ad9f..66c82d5469 100644
--- a/src/core/socket-bind.c
+++ b/src/core/bpf-socket-bind.c
@@ -5,7 +5,7 @@
#endif
#include "fd-util.h"
-#include "socket-bind.h"
+#include "bpf-socket-bind.h"
#if BPF_FRAMEWORK
/* libbpf, clang, llvm and bpftool compile time dependencies are satisfied */
@@ -24,20 +24,23 @@ static struct socket_bind_bpf *socket_bind_bpf_free(struct socket_bind_bpf *obj)
DEFINE_TRIVIAL_CLEANUP_FUNC(struct socket_bind_bpf *, socket_bind_bpf_free);
static int update_rules_map(
- int map_fd, CGroupSocketBindItem *head) {
+ int map_fd,
+ CGroupSocketBindItem *head) {
+
CGroupSocketBindItem *item;
uint32_t i = 0;
assert(map_fd >= 0);
LIST_FOREACH(socket_bind_items, item, head) {
- const uint32_t key = i++;
struct socket_bind_rule val = {
.address_family = (uint32_t) item->address_family,
.nr_ports = item->nr_ports,
.port_min = item->port_min,
};
+ uint32_t key = i++;
+
if (sym_bpf_map_update_elem(map_fd, &key, &val, BPF_ANY) != 0)
return -errno;
}
@@ -46,15 +49,19 @@ static int update_rules_map(
}
static int prepare_socket_bind_bpf(
- Unit *u, CGroupSocketBindItem *allow, CGroupSocketBindItem *deny, struct socket_bind_bpf **ret_obj) {
- _cleanup_(socket_bind_bpf_freep) struct socket_bind_bpf *obj = 0;
- uint32_t allow_count = 0, deny_count = 0;
+ Unit *u,
+ CGroupSocketBindItem *allow,
+ CGroupSocketBindItem *deny,
+ struct socket_bind_bpf **ret_obj) {
+
+ _cleanup_(socket_bind_bpf_freep) struct socket_bind_bpf *obj = NULL;
+ size_t allow_count = 0, deny_count = 0;
int allow_map_fd, deny_map_fd, r;
CGroupSocketBindItem *item;
assert(ret_obj);
- LIST_FOREACH(socket_bind_items, item, allow)
+ LIST_FOREACH(socket_bind_items, item, allow)
allow_count++;
LIST_FOREACH(socket_bind_items, item, deny)
@@ -107,41 +114,36 @@ static int prepare_socket_bind_bpf(
return 0;
}
-int socket_bind_supported(void) {
+int bpf_socket_bind_supported(void) {
_cleanup_(socket_bind_bpf_freep) struct socket_bind_bpf *obj = NULL;
+ int r;
- int r = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER);
+ r = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER);
if (r < 0)
- return log_error_errno(r, "Can't determine whether the unified hierarchy is used: %m");
-
+ return log_debug_errno(r, "Can't determine whether the unified hierarchy is used: %m");
if (r == 0) {
- log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
- "Not running with unified cgroup hierarchy, BPF is not supported");
- return 0;
+ log_debug("Not running with unified cgroup hierarchy, BPF is not supported");
+ return false;
}
- r = dlopen_bpf();
- if (r < 0) {
- log_info_errno(r, "Could not load libbpf: %m");
- return 0;
- }
+ if (dlopen_bpf() < 0)
+ return false;
if (!sym_bpf_probe_prog_type(BPF_PROG_TYPE_CGROUP_SOCK_ADDR, /*ifindex=*/0)) {
- log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
- "BPF program type cgroup_sock_addr is not supported");
- return 0;
+ log_debug("BPF program type cgroup_sock_addr is not supported");
+ return false;
}
r = prepare_socket_bind_bpf(/*unit=*/NULL, /*allow_rules=*/NULL, /*deny_rules=*/NULL, &obj);
if (r < 0) {
log_debug_errno(r, "BPF based socket_bind is not supported: %m");
- return 0;
+ return false;
}
- return can_link_bpf_program(obj->progs.sd_bind4);
+ return bpf_can_link_program(obj->progs.sd_bind4);
}
-int socket_bind_add_initial_link_fd(Unit *u, int fd) {
+int bpf_socket_bind_add_initial_link_fd(Unit *u, int fd) {
int r;
assert(u);
@@ -167,6 +169,8 @@ static int socket_bind_install_impl(Unit *u) {
CGroupContext *cc;
int r;
+ assert(u);
+
cc = unit_get_cgroup_context(u);
if (!cc)
return 0;
@@ -184,20 +188,19 @@ static int socket_bind_install_impl(Unit *u) {
cgroup_fd = open(cgroup_path, O_RDONLY | O_CLOEXEC, 0);
if (cgroup_fd < 0)
- return log_unit_error_errno(
- u, errno, "Failed to open cgroup=%s for reading", cgroup_path);
+ return log_unit_error_errno(u, errno, "Failed to open cgroup=%s for reading: %m", cgroup_path);
ipv4 = sym_bpf_program__attach_cgroup(obj->progs.sd_bind4, cgroup_fd);
r = sym_libbpf_get_error(ipv4);
if (r != 0)
- return log_unit_error_errno(u, r, "Failed to link '%s' cgroup-bpf program",
- sym_bpf_program__name(obj->progs.sd_bind4));
+ return log_unit_error_errno(u, r, "Failed to link '%s' cgroup-bpf program: %m",
+ sym_bpf_program__name(obj->progs.sd_bind4));
ipv6 = sym_bpf_program__attach_cgroup(obj->progs.sd_bind6, cgroup_fd);
r = sym_libbpf_get_error(ipv6);
if (r != 0)
- return log_unit_error_errno(u, r, "Failed to link '%s' cgroup-bpf program",
- sym_bpf_program__name(obj->progs.sd_bind6));
+ return log_unit_error_errno(u, r, "Failed to link '%s' cgroup-bpf program: %m",
+ sym_bpf_program__name(obj->progs.sd_bind6));
u->ipv4_socket_bind_link = TAKE_PTR(ipv4);
u->ipv6_socket_bind_link = TAKE_PTR(ipv6);
@@ -205,43 +208,45 @@ static int socket_bind_install_impl(Unit *u) {
return 0;
}
-int socket_bind_install(Unit *u) {
- int r = socket_bind_install_impl(u);
+int bpf_socket_bind_install(Unit *u) {
+ int r;
+
+ assert(u);
+
+ r = socket_bind_install_impl(u);
if (r == -ENOMEM)
return r;
fdset_close(u->initial_socket_bind_link_fds);
-
return r;
}
-int serialize_socket_bind(Unit *u, FILE *f, FDSet *fds) {
+int bpf_serialize_socket_bind(Unit *u, FILE *f, FDSet *fds) {
int r;
assert(u);
- r = serialize_bpf_link(f, fds, "ipv4-socket-bind-bpf-link", u->ipv4_socket_bind_link);
+ r = bpf_serialize_link(f, fds, "ipv4-socket-bind-bpf-link", u->ipv4_socket_bind_link);
if (r < 0)
return r;
- return serialize_bpf_link(f, fds, "ipv6-socket-bind-bpf-link", u->ipv6_socket_bind_link);
+ return bpf_serialize_link(f, fds, "ipv6-socket-bind-bpf-link", u->ipv6_socket_bind_link);
}
#else /* ! BPF_FRAMEWORK */
-int socket_bind_supported(void) {
- return 0;
+int bpf_socket_bind_supported(void) {
+ return false;
}
-int socket_bind_add_initial_link_fd(Unit *u, int fd) {
+int bpf_socket_bind_add_initial_link_fd(Unit *u, int fd) {
return 0;
}
-int socket_bind_install(Unit *u) {
- log_unit_debug(u, "Failed to install socket bind: BPF framework is not supported");
- return 0;
+int bpf_socket_bind_install(Unit *u) {
+ return log_unit_debug_errno(u, SYNTHETIC_ERRNO(EOPNOTSUPP), "Failed to install socket bind: BPF framework is not supported");
}
-int serialize_socket_bind(Unit *u, FILE *f, FDSet *fds) {
+int bpf_serialize_socket_bind(Unit *u, FILE *f, FDSet *fds) {
return 0;
}
#endif
diff --git a/src/core/bpf-socket-bind.h b/src/core/bpf-socket-bind.h
new file mode 100644
index 0000000000..c8c75adaf6
--- /dev/null
+++ b/src/core/bpf-socket-bind.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "fdset.h"
+#include "unit.h"
+
+int bpf_socket_bind_supported(void);
+
+/* Add BPF link fd created before daemon-reload or daemon-reexec. FDs will be closed at the end of
+ * socket_bind_install. */
+int bpf_socket_bind_add_initial_link_fd(Unit *u, int fd);
+
+int bpf_socket_bind_install(Unit *u);
+
+int bpf_serialize_socket_bind(Unit *u, FILE *f, FDSet *fds);
diff --git a/src/core/cgroup.c b/src/core/cgroup.c
index 7fde1efce4..3cec8a5786 100644
--- a/src/core/cgroup.c
+++ b/src/core/cgroup.c
@@ -10,6 +10,7 @@
#include "bpf-devices.h"
#include "bpf-firewall.h"
#include "bpf-foreign.h"
+#include "bpf-socket-bind.h"
#include "btrfs-util.h"
#include "bus-error.h"
#include "cgroup-setup.h"
@@ -26,7 +27,6 @@
#include "percent-util.h"
#include "process-util.h"
#include "procfs-util.h"
-#include "socket-bind.h"
#include "special.h"
#include "stat-util.h"
#include "stdio-util.h"
@@ -1096,7 +1096,7 @@ static void cgroup_apply_firewall(Unit *u) {
static void cgroup_apply_socket_bind(Unit *u) {
assert(u);
- (void) socket_bind_install(u);
+ (void) bpf_socket_bind_install(u);
}
static int cgroup_apply_devices(Unit *u) {
@@ -3126,7 +3126,7 @@ static int cg_bpf_mask_supported(CGroupMask *ret) {
mask |= CGROUP_MASK_BPF_FOREIGN;
/* BPF-based bind{4|6} hooks */
- r = socket_bind_supported();
+ r = bpf_socket_bind_supported();
if (r > 0)
mask |= CGROUP_MASK_BPF_SOCKET_BIND;
diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c
index c6fca7135c..46b6549d16 100644
--- a/src/core/load-fragment.c
+++ b/src/core/load-fragment.c
@@ -20,6 +20,7 @@
#include "alloc-util.h"
#include "bpf-firewall.h"
#include "bpf-program.h"
+#include "bpf-socket-bind.h"
#include "bus-error.h"
#include "bus-internal.h"
#include "bus-util.h"
@@ -55,7 +56,6 @@
#endif
#include "securebits-util.h"
#include "signal-util.h"
-#include "socket-bind.h"
#include "socket-netlink.h"
#include "specifier.h"
#include "stat-util.h"
diff --git a/src/core/meson.build b/src/core/meson.build
index e696d27727..f0d2c6f642 100644
--- a/src/core/meson.build
+++ b/src/core/meson.build
@@ -13,6 +13,8 @@ libcore_sources = '''
bpf-firewall.h
bpf-foreign.c
bpf-foreign.h
+ bpf-socket-bind.c
+ bpf-socket-bind.h
cgroup.c
cgroup.h
core-varlink.c
@@ -83,10 +85,10 @@ libcore_sources = '''
load-fragment.h
locale-setup.c
locale-setup.h
- manager.c
- manager.h
manager-dump.c
manager-dump.h
+ manager.c
+ manager.h
mount.c
mount.h
namespace.c
@@ -107,8 +109,6 @@ libcore_sources = '''
slice.h
smack-setup.c
smack-setup.h
- socket-bind.c
- socket-bind.h
socket.c
socket.h
swap.c
diff --git a/src/core/socket-bind.h b/src/core/socket-bind.h
deleted file mode 100644
index 2a6e71a9b9..0000000000
--- a/src/core/socket-bind.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/* SPDX-License-Identifier: LGPL-2.1+ */
-#pragma once
-
-#include "fdset.h"
-#include "unit.h"
-
-int socket_bind_supported(void);
-
-/* Add BPF link fd created before daemon-reload or daemon-reexec.
- * FDs will be closed at the end of socket_bind_install. */
-int socket_bind_add_initial_link_fd(Unit *u, int fd);
-
-int socket_bind_install(Unit *u);
-
-int serialize_socket_bind(Unit *u, FILE *f, FDSet *fds);
diff --git a/src/core/unit-serialize.c b/src/core/unit-serialize.c
index 4da69769a6..daf7c59cc1 100644
--- a/src/core/unit-serialize.c
+++ b/src/core/unit-serialize.c
@@ -1,5 +1,6 @@
/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#include "bpf-socket-bind.h"
#include "bus-util.h"
#include "dbus.h"
#include "fileio-label.h"
@@ -7,7 +8,6 @@
#include "format-util.h"
#include "parse-util.h"
#include "serialize.h"
-#include "socket-bind.h"
#include "string-table.h"
#include "unit-serialize.h"
#include "user-util.h"
@@ -164,7 +164,12 @@ int unit_serialize(Unit *u, FILE *f, FDSet *fds, bool switching_root) {
(void) serialize_cgroup_mask(f, "cgroup-enabled-mask", u->cgroup_enabled_mask);
(void) serialize_cgroup_mask(f, "cgroup-invalidated-mask", u->cgroup_invalidated_mask);
- (void) serialize_socket_bind(u, f, fds);
+ (void) bpf_serialize_socket_bind(u, f, fds);
+
+ (void) bpf_program_serialize_attachment(f, fds, "ip-bpf-ingress-installed", u->ip_bpf_ingress_installed);
+ (void) bpf_program_serialize_attachment(f, fds, "ip-bpf-egress-installed", u->ip_bpf_egress_installed);
+ (void) bpf_program_serialize_attachment_set(f, fds, "ip-bpf-custom-ingress-installed", u->ip_bpf_custom_ingress_installed);
+ (void) bpf_program_serialize_attachment_set(f, fds, "ip-bpf-custom-egress-installed", u->ip_bpf_custom_egress_installed);
if (uid_is_valid(u->ref_uid))
(void) serialize_item_format(f, "ref-uid", UID_FMT, u->ref_uid);
@@ -385,16 +390,28 @@ int unit_deserialize(Unit *u, FILE *f, FDSet *fds) {
else {
if (fdset_remove(fds, fd) < 0) {
log_unit_debug(u, "Failed to remove %s value=%d from fdset", l, fd);
-
continue;
}
- (void) socket_bind_add_initial_link_fd(u, fd);
+ (void) bpf_socket_bind_add_initial_link_fd(u, fd);
}
continue;
- }
- else if (streq(l, "ref-uid")) {
+ } else if (streq(l, "ip-bpf-ingress-installed")) {
+ (void) bpf_program_deserialize_attachment(v, fds, &u->ip_bpf_ingress_installed);
+ continue;
+ } else if (streq(l, "ip-bpf-egress-installed")) {
+ (void) bpf_program_deserialize_attachment(v, fds, &u->ip_bpf_egress_installed);
+ continue;
+
+ } else if (streq(l, "ip-bpf-custom-ingress-installed")) {
+ (void) bpf_program_deserialize_attachment_set(v, fds, &u->ip_bpf_custom_ingress_installed);
+ continue;
+ } else if (streq(l, "ip-bpf-custom-egress-installed")) {
+ (void) bpf_program_deserialize_attachment_set(v, fds, &u->ip_bpf_custom_egress_installed);
+ continue;
+
+ } else if (streq(l, "ref-uid")) {
uid_t uid;
r = parse_uid(v, &uid);
diff --git a/src/core/unit.c b/src/core/unit.c
index c6b17afa51..de407d20a8 100644
--- a/src/core/unit.c
+++ b/src/core/unit.c
@@ -12,6 +12,7 @@
#include "alloc-util.h"
#include "bpf-firewall.h"
#include "bpf-foreign.h"
+#include "bpf-socket-bind.h"
#include "bus-common-errors.h"
#include "bus-util.h"
#include "cgroup-setup.h"
@@ -41,7 +42,6 @@
#include "rm-rf.h"
#include "set.h"
#include "signal-util.h"
-#include "socket-bind.h"
#include "sparse-endian.h"
#include "special.h"
#include "specifier.h"
@@ -114,6 +114,9 @@ Unit* unit_new(Manager *m, size_t size) {
u->ip_accounting_ingress_map_fd = -1;
u->ip_accounting_egress_map_fd = -1;
+ for (CGroupIOAccountingMetric i = 0; i < _CGROUP_IO_ACCOUNTING_METRIC_MAX; i++)
+ u->io_accounting_last[i] = UINT64_MAX;
+
u->ipv4_allow_map_fd = -1;
u->ipv6_allow_map_fd = -1;
u->ipv4_deny_map_fd = -1;
@@ -124,9 +127,6 @@ Unit* unit_new(Manager *m, size_t size) {
u->start_ratelimit = (RateLimit) { m->default_start_limit_interval, m->default_start_limit_burst };
u->auto_start_stop_ratelimit = (RateLimit) { 10 * USEC_PER_SEC, 16 };
- for (CGroupIOAccountingMetric i = 0; i < _CGROUP_IO_ACCOUNTING_METRIC_MAX; i++)
- u->io_accounting_last[i] = UINT64_MAX;
-
return u;
}
@@ -757,23 +757,7 @@ Unit* unit_free(Unit *u) {
if (u->in_stop_when_bound_queue)
LIST_REMOVE(stop_when_bound_queue, u->manager->stop_when_bound_queue, u);
- safe_close(u->ip_accounting_ingress_map_fd);
- safe_close(u->ip_accounting_egress_map_fd);
-
- safe_close(u->ipv4_allow_map_fd);
- safe_close(u->ipv6_allow_map_fd);
- safe_close(u->ipv4_deny_map_fd);
- safe_close(u->ipv6_deny_map_fd);
-
- bpf_program_unref(u->ip_bpf_ingress);
- bpf_program_unref(u->ip_bpf_ingress_installed);
- bpf_program_unref(u->ip_bpf_egress);
- bpf_program_unref(u->ip_bpf_egress_installed);
-
- set_free(u->ip_bpf_custom_ingress);
- set_free(u->ip_bpf_custom_egress);
- set_free(u->ip_bpf_custom_ingress_installed);
- set_free(u->ip_bpf_custom_egress_installed);
+ bpf_firewall_close(u);
hashmap_free(u->bpf_foreign_by_key);
diff --git a/src/core/unit.h b/src/core/unit.h
index 8818392731..52feb3693b 100644
--- a/src/core/unit.h
+++ b/src/core/unit.h
@@ -308,14 +308,15 @@ typedef struct Unit {
/* IP BPF Firewalling/accounting */
int ip_accounting_ingress_map_fd;
int ip_accounting_egress_map_fd;
+ uint64_t ip_accounting_extra[_CGROUP_IP_ACCOUNTING_METRIC_MAX];
int ipv4_allow_map_fd;
int ipv6_allow_map_fd;
int ipv4_deny_map_fd;
int ipv6_deny_map_fd;
-
BPFProgram *ip_bpf_ingress, *ip_bpf_ingress_installed;
BPFProgram *ip_bpf_egress, *ip_bpf_egress_installed;
+
Set *ip_bpf_custom_ingress;
Set *ip_bpf_custom_ingress_installed;
Set *ip_bpf_custom_egress;
@@ -334,8 +335,6 @@ typedef struct Unit {
struct bpf_link *ipv6_socket_bind_link;
#endif
- uint64_t ip_accounting_extra[_CGROUP_IP_ACCOUNTING_METRIC_MAX];
-
/* Low-priority event source which is used to remove watched PIDs that have gone away, and subscribe to any new
* ones which might have appeared. */
sd_event_source *rewatch_pids_event_source;
diff --git a/src/shared/bpf-dlopen.c b/src/shared/bpf-dlopen.c
index 64120f17c5..0556148458 100644
--- a/src/shared/bpf-dlopen.c
+++ b/src/shared/bpf-dlopen.c
@@ -37,7 +37,7 @@ int dlopen_bpf(void) {
r = dlsym_many_and_warn(
dl,
- LOG_ERR,
+ LOG_DEBUG,
DLSYM_ARG(bpf_link__destroy),
DLSYM_ARG(bpf_link__fd),
DLSYM_ARG(bpf_map__fd),
@@ -60,7 +60,6 @@ int dlopen_bpf(void) {
/* Note that we never release the reference here, because there's no real reason to, after all this
* was traditionally a regular shared library dependency which lives forever too. */
bpf_dl = TAKE_PTR(dl);
-
return 1;
}
diff --git a/src/shared/bpf-link.c b/src/shared/bpf-link.c
index 405874374c..720ed40395 100644
--- a/src/shared/bpf-link.c
+++ b/src/shared/bpf-link.c
@@ -4,17 +4,13 @@
#include "bpf-link.h"
#include "serialize.h"
-bool can_link_bpf_program(struct bpf_program *prog) {
+bool bpf_can_link_program(struct bpf_program *prog) {
_cleanup_(bpf_link_freep) struct bpf_link *link = NULL;
- int r;
assert(prog);
- r = dlopen_bpf();
- if (r < 0) {
- log_debug_errno(r, "Could not load libbpf: %m");
+ if (dlopen_bpf() < 0)
return false;
- }
/* Pass invalid cgroup fd intentionally. */
link = sym_bpf_program__attach_cgroup(prog, /*cgroup_fd=*/-1);
@@ -23,9 +19,7 @@ bool can_link_bpf_program(struct bpf_program *prog) {
return sym_libbpf_get_error(link) == -EBADF;
}
-int serialize_bpf_link(FILE *f, FDSet *fds, const char *key, struct bpf_link *link) {
- int fd;
-
+int bpf_serialize_link(FILE *f, FDSet *fds, const char *key, struct bpf_link *link) {
assert(key);
if (!link)
@@ -34,11 +28,11 @@ int serialize_bpf_link(FILE *f, FDSet *fds, const char *key, struct bpf_link *li
if (sym_libbpf_get_error(link) != 0)
return -EINVAL;
- fd = sym_bpf_link__fd(link);
- return serialize_fd(f, fds, key, fd);
+ return serialize_fd(f, fds, key, sym_bpf_link__fd(link));
}
struct bpf_link *bpf_link_free(struct bpf_link *link) {
+
/* Avoid a useless dlopen() if link == NULL */
if (!link)
return NULL;
diff --git a/src/shared/bpf-link.h b/src/shared/bpf-link.h
index 095465b07c..bb6ac60ac9 100644
--- a/src/shared/bpf-link.h
+++ b/src/shared/bpf-link.h
@@ -8,9 +8,9 @@
#include "fdset.h"
#include "macro.h"
-bool can_link_bpf_program(struct bpf_program *prog);
+bool bpf_can_link_program(struct bpf_program *prog);
-int serialize_bpf_link(FILE *f, FDSet *fds, const char *key, struct bpf_link *link);
+int bpf_serialize_link(FILE *f, FDSet *fds, const char *key, struct bpf_link *link);
struct bpf_link *bpf_link_free(struct bpf_link *p);
DEFINE_TRIVIAL_CLEANUP_FUNC(struct bpf_link *, bpf_link_free);
diff --git a/src/shared/bpf-program.c b/src/shared/bpf-program.c
index ec8437d583..0f865a7168 100644
--- a/src/shared/bpf-program.c
+++ b/src/shared/bpf-program.c
@@ -7,10 +7,12 @@
#include "alloc-util.h"
#include "bpf-program.h"
+#include "escape.h"
#include "fd-util.h"
#include "memory-util.h"
#include "missing_syscall.h"
#include "path-util.h"
+#include "serialize.h"
#include "string-table.h"
static const char *const bpf_cgroup_attach_type_table[__MAX_BPF_ATTACH_TYPE] = {
@@ -36,6 +38,8 @@ static const char *const bpf_cgroup_attach_type_table[__MAX_BPF_ATTACH_TYPE] = {
DEFINE_STRING_TABLE_LOOKUP(bpf_cgroup_attach_type, int);
+DEFINE_HASH_OPS_WITH_KEY_DESTRUCTOR(bpf_program_hash_ops, void, trivial_hash_func, trivial_compare_func, bpf_program_unref);
+
/* struct bpf_prog_info info must be initialized since its value is both input and output
* for BPF_OBJ_GET_INFO_BY_FD syscall. */
static int bpf_program_get_info_by_fd(int prog_fd, struct bpf_prog_info *info, uint32_t info_len) {
@@ -59,13 +63,15 @@ static int bpf_program_get_info_by_fd(int prog_fd, struct bpf_prog_info *info, u
int bpf_program_new(uint32_t prog_type, BPFProgram **ret) {
_cleanup_(bpf_program_unrefp) BPFProgram *p = NULL;
- p = new0(BPFProgram, 1);
+ p = new(BPFProgram, 1);
if (!p)
return -ENOMEM;
- p->n_ref = 1;
- p->prog_type = prog_type;
- p->kernel_fd = -1;
+ *p = (BPFProgram) {
+ .n_ref = 1,
+ .prog_type = prog_type,
+ .kernel_fd = -1,
+ };
*ret = TAKE_PTR(p);
@@ -358,3 +364,139 @@ int bpf_program_get_id_by_fd(int prog_fd, uint32_t *ret_id) {
return 0;
};
+
+int bpf_program_serialize_attachment(
+ FILE *f,
+ FDSet *fds,
+ const char *key,
+ BPFProgram *p) {
+
+ _cleanup_free_ char *escaped = NULL;
+ int copy, r;
+
+ if (!p || !p->attached_path)
+ return 0;
+
+ assert(p->kernel_fd >= 0);
+
+ escaped = cescape(p->attached_path);
+ if (!escaped)
+ return -ENOMEM;
+
+ copy = fdset_put_dup(fds, p->kernel_fd);
+ if (copy < 0)
+ return log_error_errno(copy, "Failed to add BPF kernel fd to serialize: %m");
+
+ r = serialize_item_format(
+ f,
+ key,
+ "%i %s %s",
+ copy,
+ bpf_cgroup_attach_type_to_string(p->attached_type),
+ escaped);
+ if (r < 0)
+ return r;
+
+ /* After serialization, let's forget the fact that this program is attached. The attachment — if you
+ * so will — is now 'owned' by the serialization, and not us anymore. Why does that matter? Because
+ * of BPF's less-than-ideal lifecycle handling: to detach a program from a cgroup we have to
+ * explicitly do so, it's not done implicitly on close(). Now, since we are serializing here we don't
+ * want the program to be detached while freeing things, so that the attachment can be retained after
+ * deserializing again. bpf_program_free() implicitly detaches things, if attached_path is non-NULL,
+ * hence we set it to NULL here. */
+
+ p->attached_path = mfree(p->attached_path);
+ return 0;
+}
+
+int bpf_program_serialize_attachment_set(FILE *f, FDSet *fds, const char *key, Set *set) {
+ BPFProgram *p;
+ int r;
+
+ SET_FOREACH(p, set) {
+ r = bpf_program_serialize_attachment(f, fds, key, p);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
+int bpf_program_deserialize_attachment(const char *v, FDSet *fds, BPFProgram **bpfp) {
+ _cleanup_free_ char *sfd = NULL, *sat = NULL, *unescaped = NULL;
+ _cleanup_(bpf_program_unrefp) BPFProgram *p = NULL;
+ _cleanup_close_ int fd = -1;
+ int ifd, at, r;
+
+ assert(v);
+ assert(bpfp);
+
+ /* Extract first word: the fd number */
+ r = extract_first_word(&v, &sfd, NULL, 0);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EINVAL;
+
+ r = safe_atoi(sfd, &ifd);
+ if (r < 0)
+ return r;
+ if (ifd < 0)
+ return -EBADF;
+
+ /* Extract second word: the attach type */
+ r = extract_first_word(&v, &sat, NULL, 0);
+ if (r < 0)
+ return r;
+ if (r == 0)
+ return -EINVAL;
+
+ at = bpf_cgroup_attach_type_from_string(sat);
+ if (at < 0)
+ return at;
+
+ /* The rest is the path */
+ r = cunescape(v, 0, &unescaped);
+ if (r < 0)
+ return r;
+
+ fd = fdset_remove(fds, ifd);
+ if (fd < 0)
+ return fd;
+
+ p = new(BPFProgram, 1);
+ if (!p)
+ return -ENOMEM;
+
+ *p = (BPFProgram) {
+ .n_ref = 1,
+ .kernel_fd = TAKE_FD(fd),
+ .prog_type = BPF_PROG_TYPE_UNSPEC,
+ .attached_path = TAKE_PTR(unescaped),
+ .attached_type = at,
+ };
+
+ if (*bpfp)
+ bpf_program_unref(*bpfp);
+
+ *bpfp = TAKE_PTR(p);
+ return 0;
+}
+
+int bpf_program_deserialize_attachment_set(const char *v, FDSet *fds, Set **bpfsetp) {
+ BPFProgram *p = NULL;
+ int r;
+
+ assert(v);
+ assert(bpfsetp);
+
+ r = bpf_program_deserialize_attachment(v, fds, &p);
+ if (r < 0)
+ return r;
+
+ r = set_ensure_consume(bpfsetp, &bpf_program_hash_ops, p);
+ if (r < 0)
+ return r;
+
+ return 0;
+}
diff --git a/src/shared/bpf-program.h b/src/shared/bpf-program.h
index edde86c119..908af1a1b2 100644
--- a/src/shared/bpf-program.h
+++ b/src/shared/bpf-program.h
@@ -3,22 +3,32 @@
#include <linux/bpf.h>
#include <stdint.h>
+#include <stdio.h>
#include <sys/syscall.h>
+#include "fdset.h"
#include "list.h"
#include "macro.h"
typedef struct BPFProgram BPFProgram;
+/* This encapsulates three different concepts: the loaded BPF program, the BPF code, and the attachment to a
+ * cgroup. Typically our BPF programs go through all three stages: we build the code, we load it, and finally
+ * we attach it, but it might happen that we operate with programs that aren't loaded or aren't attached, or
+ * where we don't have the code. */
struct BPFProgram {
unsigned n_ref;
+ /* The loaded BPF program, if loaded */
int kernel_fd;
uint32_t prog_type;
+ /* The code of it BPF program, if known */
size_t n_instructions;
struct bpf_insn *instructions;
+ /* The cgroup path the program is attached to, if it is attached. If non-NULL bpf_program_unref()
+ * will detach on destruction. */
char *attached_path;
int attached_type;
uint32_t attached_flags;
@@ -35,9 +45,17 @@ int bpf_program_load_from_bpf_fs(BPFProgram *p, const char *path);
int bpf_program_cgroup_attach(BPFProgram *p, int type, const char *path, uint32_t flags);
int bpf_program_cgroup_detach(BPFProgram *p);
+
int bpf_program_pin(int prog_fd, const char *bpffs_path);
int bpf_program_get_id_by_fd(int prog_fd, uint32_t *ret_id);
+int bpf_program_serialize_attachment(FILE *f, FDSet *fds, const char *key, BPFProgram *p);
+int bpf_program_serialize_attachment_set(FILE *f, FDSet *fds, const char *key, Set *set);
+int bpf_program_deserialize_attachment(const char *v, FDSet *fds, BPFProgram **bpfp);
+int bpf_program_deserialize_attachment_set(const char *v, FDSet *fds, Set **bpfsetp);
+
+extern const struct hash_ops bpf_program_hash_ops;
+
int bpf_map_new(enum bpf_map_type type, size_t key_size, size_t value_size, size_t max_entries, uint32_t flags);
int bpf_map_update_element(int fd, const void *key, void *value);
int bpf_map_lookup_element(int fd, const void *key, void *value);
diff --git a/src/test/test-socket-bind.c b/src/test/test-socket-bind.c
index 16cfea7779..996aeebbcc 100644
--- a/src/test/test-socket-bind.c
+++ b/src/test/test-socket-bind.c
@@ -1,12 +1,12 @@
/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#include "bpf-socket-bind.h"
#include "load-fragment.h"
#include "manager.h"
#include "process-util.h"
#include "rlimit-util.h"
#include "rm-rf.h"
#include "service.h"
-#include "socket-bind.h"
#include "strv.h"
#include "tests.h"
#include "unit.h"
@@ -122,7 +122,7 @@ int main(int argc, char *argv[]) {
if (!can_memlock())
return log_tests_skipped("Can't use mlock(), skipping.");
- r = socket_bind_supported();
+ r = bpf_socket_bind_supported();
if (r <= 0)
return log_tests_skipped("socket-bind is not supported, skipping.");