Merge pull request #19851 from poettering/bpf-firewall-tweaks

close bpf firewall reload gap
author: Lennart Poettering <lennart@poettering.net> 2021-06-09 09:47:23 +0200
committer: GitHub <noreply@github.com> 2021-06-09 09:47:23 +0200
commit: bead169fe0f182a98cc8b55a63b3a12a5049ab45 (patch)
tree: 5425eea35c12cc910768ed8907fe89f6cc366ff6 /src
parent: Merge pull request #19852 from yuwata/network-stable-secret (diff)
parent: bpf-firewall: close gap when updating the firewall (diff)
download: systemd-bead169fe0f182a98cc8b55a63b3a12a5049ab45.tar.xz
systemd-bead169fe0f182a98cc8b55a63b3a12a5049ab45.zip
17 files changed, 322 insertions, 126 deletions
diff --git a/src/core/bpf-firewall.c b/src/core/bpf-firewall.c
index 2a41bffee6..9317edeb4c 100644
--- a/src/core/bpf-firewall.c
+++ b/src/core/bpf-firewall.c
@@ -587,8 +587,6 @@ int bpf_firewall_compile(Unit *u) {
         return 0;
 }
 
-DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(filter_prog_hash_ops, void, trivial_hash_func, trivial_compare_func, BPFProgram, bpf_program_unref);
-
 static int load_bpf_progs_from_fs_to_set(Unit *u, char **filter_paths, Set **set) {
         char **bpf_fs_path;
 
@@ -606,7 +604,7 @@ static int load_bpf_progs_from_fs_to_set(Unit *u, char **filter_paths, Set **set
                 if (r < 0)
                         return log_unit_error_errno(u, r, "Loading of ingress BPF program %s failed: %m", *bpf_fs_path);
 
-                r = set_ensure_consume(set, &filter_prog_hash_ops, TAKE_PTR(prog));
+                r = set_ensure_consume(set, &bpf_program_hash_ops, TAKE_PTR(prog));
                 if (r < 0)
                         return log_unit_error_errno(u, r, "Can't add program to BPF program set: %m");
         }
@@ -658,9 +656,10 @@ static int attach_custom_bpf_progs(Unit *u, const char *path, int attach_type, S
                         return log_unit_error_errno(u, r, "Attaching custom egress BPF program to cgroup %s failed: %m", path);
 
                 /* Remember that these BPF programs are installed now. */
-                r = set_ensure_put(set_installed, &filter_prog_hash_ops, prog);
+                r = set_ensure_put(set_installed, &bpf_program_hash_ops, prog);
                 if (r < 0)
                         return log_unit_error_errno(u, r, "Can't add program to BPF program set: %m");
+
                 bpf_program_ref(prog);
         }
 
@@ -668,6 +667,7 @@ static int attach_custom_bpf_progs(Unit *u, const char *path, int attach_type, S
 }
 
 int bpf_firewall_install(Unit *u) {
+        _cleanup_(bpf_program_unrefp) BPFProgram *ip_bpf_ingress_uninstall = NULL, *ip_bpf_egress_uninstall = NULL;
         _cleanup_free_ char *path = NULL;
         CGroupContext *cc;
         int r, supported;
@@ -700,10 +700,20 @@ int bpf_firewall_install(Unit *u) {
 
         flags = supported == BPF_FIREWALL_SUPPORTED_WITH_MULTI ? BPF_F_ALLOW_MULTI : 0;
 
-        /* Unref the old BPF program (which will implicitly detach it) right before attaching the new program, to
-         * minimize the time window when we don't account for IP traffic. */
-        u->ip_bpf_egress_installed = bpf_program_unref(u->ip_bpf_egress_installed);
-        u->ip_bpf_ingress_installed = bpf_program_unref(u->ip_bpf_ingress_installed);
+        if (FLAGS_SET(flags, BPF_F_ALLOW_MULTI)) {
+                /* If we have BPF_F_ALLOW_MULTI, then let's clear the fields, but destroy the programs only
+                 * after attaching the new programs, so that there's no time window where neither program is
+                 * attached. (There will be a program where both are attached, but that's OK, since this is a
+                 * security feature where we rather want to lock down too much than too little */
+                ip_bpf_egress_uninstall = TAKE_PTR(u->ip_bpf_egress_installed);
+                ip_bpf_ingress_uninstall = TAKE_PTR(u->ip_bpf_ingress_installed);
+        } else {
+                /* If we don't have BPF_F_ALLOW_MULTI then unref the old BPF programs (which will implicitly
+                 * detach them) right before attaching the new program, to minimize the time window when we
+                 * don't account for IP traffic. */
+                u->ip_bpf_egress_installed = bpf_program_unref(u->ip_bpf_egress_installed);
+                u->ip_bpf_ingress_installed = bpf_program_unref(u->ip_bpf_ingress_installed);
+        }
 
         if (u->ip_bpf_egress) {
                 r = bpf_program_cgroup_attach(u->ip_bpf_egress, BPF_CGROUP_INET_EGRESS, path, flags);
@@ -722,6 +732,10 @@ int bpf_firewall_install(Unit *u) {
                 u->ip_bpf_ingress_installed = bpf_program_ref(u->ip_bpf_ingress);
         }
 
+        /* And now, definitely get rid of the old programs, and detach them */
+        ip_bpf_egress_uninstall = bpf_program_unref(ip_bpf_egress_uninstall);
+        ip_bpf_ingress_uninstall = bpf_program_unref(ip_bpf_ingress_uninstall);
+
         r = attach_custom_bpf_progs(u, path, BPF_CGROUP_INET_EGRESS, &u->ip_bpf_custom_egress, &u->ip_bpf_custom_egress_installed);
         if (r < 0)
                 return r;
@@ -902,3 +916,25 @@ void emit_bpf_firewall_warning(Unit *u) {
                 warned = true;
         }
 }
+
+void bpf_firewall_close(Unit *u) {
+        assert(u);
+
+        u->ip_accounting_ingress_map_fd = safe_close(u->ip_accounting_ingress_map_fd);
+        u->ip_accounting_egress_map_fd = safe_close(u->ip_accounting_egress_map_fd);
+
+        u->ipv4_allow_map_fd = safe_close(u->ipv4_allow_map_fd);
+        u->ipv6_allow_map_fd = safe_close(u->ipv6_allow_map_fd);
+        u->ipv4_deny_map_fd = safe_close(u->ipv4_deny_map_fd);
+        u->ipv6_deny_map_fd = safe_close(u->ipv6_deny_map_fd);
+
+        u->ip_bpf_ingress = bpf_program_unref(u->ip_bpf_ingress);
+        u->ip_bpf_ingress_installed = bpf_program_unref(u->ip_bpf_ingress_installed);
+        u->ip_bpf_egress = bpf_program_unref(u->ip_bpf_egress);
+        u->ip_bpf_egress_installed = bpf_program_unref(u->ip_bpf_egress_installed);
+
+        u->ip_bpf_custom_ingress = set_free(u->ip_bpf_custom_ingress);
+        u->ip_bpf_custom_egress = set_free(u->ip_bpf_custom_egress);
+        u->ip_bpf_custom_ingress_installed = set_free(u->ip_bpf_custom_ingress_installed);
+        u->ip_bpf_custom_egress_installed = set_free(u->ip_bpf_custom_egress_installed);
+}
diff --git a/src/core/bpf-firewall.h b/src/core/bpf-firewall.h
index 08d7742193..58b401f834 100644
--- a/src/core/bpf-firewall.h
+++ b/src/core/bpf-firewall.h
@@ -21,3 +21,5 @@ int bpf_firewall_read_accounting(int map_fd, uint64_t *ret_bytes, uint64_t *ret_
 int bpf_firewall_reset_accounting(int map_fd);
 
 void emit_bpf_firewall_warning(Unit *u);
+
+void bpf_firewall_close(Unit *u);
diff --git a/src/core/socket-bind.c b/src/core/bpf-socket-bind.c
index 352b47ad9f..66c82d5469 100644
--- a/src/core/socket-bind.c
+++ b/src/core/bpf-socket-bind.c
@@ -5,7 +5,7 @@
 #endif
 
 #include "fd-util.h"
-#include "socket-bind.h"
+#include "bpf-socket-bind.h"
 
 #if BPF_FRAMEWORK
 /* libbpf, clang, llvm and bpftool compile time dependencies are satisfied */
@@ -24,20 +24,23 @@ static struct socket_bind_bpf *socket_bind_bpf_free(struct socket_bind_bpf *obj)
 DEFINE_TRIVIAL_CLEANUP_FUNC(struct socket_bind_bpf *, socket_bind_bpf_free);
 
 static int update_rules_map(
-                int map_fd, CGroupSocketBindItem *head) {
+                int map_fd,
+                CGroupSocketBindItem *head) {
+
         CGroupSocketBindItem *item;
         uint32_t i = 0;
 
         assert(map_fd >= 0);
 
         LIST_FOREACH(socket_bind_items, item, head) {
-                const uint32_t key = i++;
                 struct socket_bind_rule val = {
                         .address_family = (uint32_t) item->address_family,
                         .nr_ports = item->nr_ports,
                         .port_min = item->port_min,
                 };
 
+                uint32_t key = i++;
+
                 if (sym_bpf_map_update_elem(map_fd, &key, &val, BPF_ANY) != 0)
                         return -errno;
         }
@@ -46,15 +49,19 @@ static int update_rules_map(
 }
 
 static int prepare_socket_bind_bpf(
-                Unit *u, CGroupSocketBindItem *allow, CGroupSocketBindItem *deny, struct socket_bind_bpf **ret_obj) {
-        _cleanup_(socket_bind_bpf_freep) struct socket_bind_bpf *obj = 0;
-        uint32_t allow_count = 0, deny_count = 0;
+                Unit *u,
+                CGroupSocketBindItem *allow,
+                CGroupSocketBindItem *deny,
+                struct socket_bind_bpf **ret_obj) {
+
+        _cleanup_(socket_bind_bpf_freep) struct socket_bind_bpf *obj = NULL;
+        size_t allow_count = 0, deny_count = 0;
         int allow_map_fd, deny_map_fd, r;
         CGroupSocketBindItem *item;
 
         assert(ret_obj);
 
-        LIST_FOREACH(socket_bind_items, item,  allow)
+        LIST_FOREACH(socket_bind_items, item, allow)
                 allow_count++;
 
         LIST_FOREACH(socket_bind_items, item, deny)
@@ -107,41 +114,36 @@ static int prepare_socket_bind_bpf(
         return 0;
 }
 
-int socket_bind_supported(void) {
+int bpf_socket_bind_supported(void) {
         _cleanup_(socket_bind_bpf_freep) struct socket_bind_bpf *obj = NULL;
+        int r;
 
-        int r = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER);
+        r = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER);
         if (r < 0)
-                return log_error_errno(r, "Can't determine whether the unified hierarchy is used: %m");
-
+                return log_debug_errno(r, "Can't determine whether the unified hierarchy is used: %m");
         if (r == 0) {
-                log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
-                                "Not running with unified cgroup hierarchy, BPF is not supported");
-                return 0;
+                log_debug("Not running with unified cgroup hierarchy, BPF is not supported");
+                return false;
         }
 
-        r = dlopen_bpf();
-        if (r < 0) {
-                log_info_errno(r, "Could not load libbpf: %m");
-                return 0;
-        }
+        if (dlopen_bpf() < 0)
+                return false;
 
         if (!sym_bpf_probe_prog_type(BPF_PROG_TYPE_CGROUP_SOCK_ADDR, /*ifindex=*/0)) {
-                log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP),
-                                "BPF program type cgroup_sock_addr is not supported");
-                return 0;
+                log_debug("BPF program type cgroup_sock_addr is not supported");
+                return false;
         }
 
         r = prepare_socket_bind_bpf(/*unit=*/NULL, /*allow_rules=*/NULL, /*deny_rules=*/NULL, &obj);
         if (r < 0) {
                 log_debug_errno(r, "BPF based socket_bind is not supported: %m");
-                return 0;
+                return false;
         }
 
-        return can_link_bpf_program(obj->progs.sd_bind4);
+        return bpf_can_link_program(obj->progs.sd_bind4);
 }
 
-int socket_bind_add_initial_link_fd(Unit *u, int fd) {
+int bpf_socket_bind_add_initial_link_fd(Unit *u, int fd) {
         int r;
 
         assert(u);
@@ -167,6 +169,8 @@ static int socket_bind_install_impl(Unit *u) {
         CGroupContext *cc;
         int r;
 
+        assert(u);
+
         cc = unit_get_cgroup_context(u);
         if (!cc)
                 return 0;
@@ -184,20 +188,19 @@ static int socket_bind_install_impl(Unit *u) {
 
         cgroup_fd = open(cgroup_path, O_RDONLY | O_CLOEXEC, 0);
         if (cgroup_fd < 0)
-                return log_unit_error_errno(
-                                u, errno, "Failed to open cgroup=%s for reading", cgroup_path);
+                return log_unit_error_errno(u, errno, "Failed to open cgroup=%s for reading: %m", cgroup_path);
 
         ipv4 = sym_bpf_program__attach_cgroup(obj->progs.sd_bind4, cgroup_fd);
         r = sym_libbpf_get_error(ipv4);
         if (r != 0)
-                return log_unit_error_errno(u, r, "Failed to link '%s' cgroup-bpf program",
-                                sym_bpf_program__name(obj->progs.sd_bind4));
+                return log_unit_error_errno(u, r, "Failed to link '%s' cgroup-bpf program: %m",
+                                            sym_bpf_program__name(obj->progs.sd_bind4));
 
         ipv6 = sym_bpf_program__attach_cgroup(obj->progs.sd_bind6, cgroup_fd);
         r = sym_libbpf_get_error(ipv6);
         if (r != 0)
-                return log_unit_error_errno(u, r, "Failed to link '%s' cgroup-bpf program",
-                                sym_bpf_program__name(obj->progs.sd_bind6));
+                return log_unit_error_errno(u, r, "Failed to link '%s' cgroup-bpf program: %m",
+                                            sym_bpf_program__name(obj->progs.sd_bind6));
 
         u->ipv4_socket_bind_link = TAKE_PTR(ipv4);
         u->ipv6_socket_bind_link = TAKE_PTR(ipv6);
@@ -205,43 +208,45 @@ static int socket_bind_install_impl(Unit *u) {
         return 0;
 }
 
-int socket_bind_install(Unit *u) {
-        int r = socket_bind_install_impl(u);
+int bpf_socket_bind_install(Unit *u) {
+        int r;
+
+        assert(u);
+
+        r = socket_bind_install_impl(u);
         if (r == -ENOMEM)
                 return r;
 
         fdset_close(u->initial_socket_bind_link_fds);
-
         return r;
 }
 
-int serialize_socket_bind(Unit *u, FILE *f, FDSet *fds) {
+int bpf_serialize_socket_bind(Unit *u, FILE *f, FDSet *fds) {
         int r;
 
         assert(u);
 
-        r = serialize_bpf_link(f, fds, "ipv4-socket-bind-bpf-link", u->ipv4_socket_bind_link);
+        r = bpf_serialize_link(f, fds, "ipv4-socket-bind-bpf-link", u->ipv4_socket_bind_link);
         if (r < 0)
                 return r;
 
-        return serialize_bpf_link(f, fds, "ipv6-socket-bind-bpf-link", u->ipv6_socket_bind_link);
+        return bpf_serialize_link(f, fds, "ipv6-socket-bind-bpf-link", u->ipv6_socket_bind_link);
 }
 
 #else /* ! BPF_FRAMEWORK */
-int socket_bind_supported(void) {
-        return 0;
+int bpf_socket_bind_supported(void) {
+        return false;
 }
 
-int socket_bind_add_initial_link_fd(Unit *u, int fd) {
+int bpf_socket_bind_add_initial_link_fd(Unit *u, int fd) {
         return 0;
 }
 
-int socket_bind_install(Unit *u) {
-        log_unit_debug(u, "Failed to install socket bind: BPF framework is not supported");
-        return 0;
+int bpf_socket_bind_install(Unit *u) {
+        return log_unit_debug_errno(u, SYNTHETIC_ERRNO(EOPNOTSUPP), "Failed to install socket bind: BPF framework is not supported");
 }
 
-int serialize_socket_bind(Unit *u, FILE *f, FDSet *fds) {
+int bpf_serialize_socket_bind(Unit *u, FILE *f, FDSet *fds) {
         return 0;
 }
 #endif
diff --git a/src/core/bpf-socket-bind.h b/src/core/bpf-socket-bind.h
new file mode 100644
index 0000000000..c8c75adaf6
--- /dev/null
+++ b/src/core/bpf-socket-bind.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+#pragma once
+
+#include "fdset.h"
+#include "unit.h"
+
+int bpf_socket_bind_supported(void);
+
+/* Add BPF link fd created before daemon-reload or daemon-reexec.  FDs will be closed at the end of
+ * socket_bind_install. */
+int bpf_socket_bind_add_initial_link_fd(Unit *u, int fd);
+
+int bpf_socket_bind_install(Unit *u);
+
+int bpf_serialize_socket_bind(Unit *u, FILE *f, FDSet *fds);
diff --git a/src/core/cgroup.c b/src/core/cgroup.c
index 7fde1efce4..3cec8a5786 100644
--- a/src/core/cgroup.c
+++ b/src/core/cgroup.c
@@ -10,6 +10,7 @@
 #include "bpf-devices.h"
 #include "bpf-firewall.h"
 #include "bpf-foreign.h"
+#include "bpf-socket-bind.h"
 #include "btrfs-util.h"
 #include "bus-error.h"
 #include "cgroup-setup.h"
@@ -26,7 +27,6 @@
 #include "percent-util.h"
 #include "process-util.h"
 #include "procfs-util.h"
-#include "socket-bind.h"
 #include "special.h"
 #include "stat-util.h"
 #include "stdio-util.h"
@@ -1096,7 +1096,7 @@ static void cgroup_apply_firewall(Unit *u) {
 static void cgroup_apply_socket_bind(Unit *u) {
         assert(u);
 
-        (void) socket_bind_install(u);
+        (void) bpf_socket_bind_install(u);
 }
 
 static int cgroup_apply_devices(Unit *u) {
@@ -3126,7 +3126,7 @@ static int cg_bpf_mask_supported(CGroupMask *ret) {
                 mask |= CGROUP_MASK_BPF_FOREIGN;
 
         /* BPF-based bind{4|6} hooks */
-        r = socket_bind_supported();
+        r = bpf_socket_bind_supported();
         if (r > 0)
                 mask |= CGROUP_MASK_BPF_SOCKET_BIND;
 
diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c
index c6fca7135c..46b6549d16 100644
--- a/src/core/load-fragment.c
+++ b/src/core/load-fragment.c
@@ -20,6 +20,7 @@
 #include "alloc-util.h"
 #include "bpf-firewall.h"
 #include "bpf-program.h"
+#include "bpf-socket-bind.h"
 #include "bus-error.h"
 #include "bus-internal.h"
 #include "bus-util.h"
@@ -55,7 +56,6 @@
 #endif
 #include "securebits-util.h"
 #include "signal-util.h"
-#include "socket-bind.h"
 #include "socket-netlink.h"
 #include "specifier.h"
 #include "stat-util.h"
diff --git a/src/core/meson.build b/src/core/meson.build
index e696d27727..f0d2c6f642 100644
--- a/src/core/meson.build
+++ b/src/core/meson.build
@@ -13,6 +13,8 @@ libcore_sources = '''
         bpf-firewall.h
         bpf-foreign.c
         bpf-foreign.h
+        bpf-socket-bind.c
+        bpf-socket-bind.h
         cgroup.c
         cgroup.h
         core-varlink.c
@@ -83,10 +85,10 @@ libcore_sources = '''
         load-fragment.h
         locale-setup.c
         locale-setup.h
-        manager.c
-        manager.h
         manager-dump.c
         manager-dump.h
+        manager.c
+        manager.h
         mount.c
         mount.h
         namespace.c
@@ -107,8 +109,6 @@ libcore_sources = '''
         slice.h
         smack-setup.c
         smack-setup.h
-        socket-bind.c
-        socket-bind.h
         socket.c
         socket.h
         swap.c
diff --git a/src/core/socket-bind.h b/src/core/socket-bind.h
deleted file mode 100644
index 2a6e71a9b9..0000000000
--- a/src/core/socket-bind.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/* SPDX-License-Identifier: LGPL-2.1+ */
-#pragma once
-
-#include "fdset.h"
-#include "unit.h"
-
-int socket_bind_supported(void);
-
-/* Add BPF link fd created before daemon-reload or daemon-reexec.
- * FDs will be closed at the end of socket_bind_install. */
-int socket_bind_add_initial_link_fd(Unit *u, int fd);
-
-int socket_bind_install(Unit *u);
-
-int serialize_socket_bind(Unit *u, FILE *f, FDSet *fds);
diff --git a/src/core/unit-serialize.c b/src/core/unit-serialize.c
index 4da69769a6..daf7c59cc1 100644
--- a/src/core/unit-serialize.c
+++ b/src/core/unit-serialize.c
@@ -1,5 +1,6 @@
 /* SPDX-License-Identifier: LGPL-2.1-or-later */
 
+#include "bpf-socket-bind.h"
 #include "bus-util.h"
 #include "dbus.h"
 #include "fileio-label.h"
@@ -7,7 +8,6 @@
 #include "format-util.h"
 #include "parse-util.h"
 #include "serialize.h"
-#include "socket-bind.h"
 #include "string-table.h"
 #include "unit-serialize.h"
 #include "user-util.h"
@@ -164,7 +164,12 @@ int unit_serialize(Unit *u, FILE *f, FDSet *fds, bool switching_root) {
         (void) serialize_cgroup_mask(f, "cgroup-enabled-mask", u->cgroup_enabled_mask);
         (void) serialize_cgroup_mask(f, "cgroup-invalidated-mask", u->cgroup_invalidated_mask);
 
-        (void) serialize_socket_bind(u, f, fds);
+        (void) bpf_serialize_socket_bind(u, f, fds);
+
+        (void) bpf_program_serialize_attachment(f, fds, "ip-bpf-ingress-installed", u->ip_bpf_ingress_installed);
+        (void) bpf_program_serialize_attachment(f, fds, "ip-bpf-egress-installed", u->ip_bpf_egress_installed);
+        (void) bpf_program_serialize_attachment_set(f, fds, "ip-bpf-custom-ingress-installed", u->ip_bpf_custom_ingress_installed);
+        (void) bpf_program_serialize_attachment_set(f, fds, "ip-bpf-custom-egress-installed", u->ip_bpf_custom_egress_installed);
 
         if (uid_is_valid(u->ref_uid))
                 (void) serialize_item_format(f, "ref-uid", UID_FMT, u->ref_uid);
@@ -385,16 +390,28 @@ int unit_deserialize(Unit *u, FILE *f, FDSet *fds) {
                         else {
                                 if (fdset_remove(fds, fd) < 0) {
                                         log_unit_debug(u, "Failed to remove %s value=%d from fdset", l, fd);
-
                                         continue;
                                 }
 
-                                (void) socket_bind_add_initial_link_fd(u, fd);
+                                (void) bpf_socket_bind_add_initial_link_fd(u, fd);
                         }
                         continue;
-                }
 
-                else if (streq(l, "ref-uid")) {
+                } else if (streq(l, "ip-bpf-ingress-installed")) {
+                         (void) bpf_program_deserialize_attachment(v, fds, &u->ip_bpf_ingress_installed);
+                         continue;
+                } else if (streq(l, "ip-bpf-egress-installed")) {
+                         (void) bpf_program_deserialize_attachment(v, fds, &u->ip_bpf_egress_installed);
+                         continue;
+
+                } else if (streq(l, "ip-bpf-custom-ingress-installed")) {
+                         (void) bpf_program_deserialize_attachment_set(v, fds, &u->ip_bpf_custom_ingress_installed);
+                         continue;
+                } else if (streq(l, "ip-bpf-custom-egress-installed")) {
+                         (void) bpf_program_deserialize_attachment_set(v, fds, &u->ip_bpf_custom_egress_installed);
+                         continue;
+
+                } else if (streq(l, "ref-uid")) {
                         uid_t uid;
 
                         r = parse_uid(v, &uid);
diff --git a/src/core/unit.c b/src/core/unit.c
index c6b17afa51..de407d20a8 100644
--- a/src/core/unit.c
+++ b/src/core/unit.c
@@ -12,6 +12,7 @@
 #include "alloc-util.h"
 #include "bpf-firewall.h"
 #include "bpf-foreign.h"
+#include "bpf-socket-bind.h"
 #include "bus-common-errors.h"
 #include "bus-util.h"
 #include "cgroup-setup.h"
@@ -41,7 +42,6 @@
 #include "rm-rf.h"
 #include "set.h"
 #include "signal-util.h"
-#include "socket-bind.h"
 #include "sparse-endian.h"
 #include "special.h"
 #include "specifier.h"
@@ -114,6 +114,9 @@ Unit* unit_new(Manager *m, size_t size) {
 
         u->ip_accounting_ingress_map_fd = -1;
         u->ip_accounting_egress_map_fd = -1;
+        for (CGroupIOAccountingMetric i = 0; i < _CGROUP_IO_ACCOUNTING_METRIC_MAX; i++)
+                u->io_accounting_last[i] = UINT64_MAX;
+
         u->ipv4_allow_map_fd = -1;
         u->ipv6_allow_map_fd = -1;
         u->ipv4_deny_map_fd = -1;
@@ -124,9 +127,6 @@ Unit* unit_new(Manager *m, size_t size) {
         u->start_ratelimit = (RateLimit) { m->default_start_limit_interval, m->default_start_limit_burst };
         u->auto_start_stop_ratelimit = (RateLimit) { 10 * USEC_PER_SEC, 16 };
 
-        for (CGroupIOAccountingMetric i = 0; i < _CGROUP_IO_ACCOUNTING_METRIC_MAX; i++)
-                u->io_accounting_last[i] = UINT64_MAX;
-
         return u;
 }
 
@@ -757,23 +757,7 @@ Unit* unit_free(Unit *u) {
         if (u->in_stop_when_bound_queue)
                 LIST_REMOVE(stop_when_bound_queue, u->manager->stop_when_bound_queue, u);
 
-        safe_close(u->ip_accounting_ingress_map_fd);
-        safe_close(u->ip_accounting_egress_map_fd);
-
-        safe_close(u->ipv4_allow_map_fd);
-        safe_close(u->ipv6_allow_map_fd);
-        safe_close(u->ipv4_deny_map_fd);
-        safe_close(u->ipv6_deny_map_fd);
-
-        bpf_program_unref(u->ip_bpf_ingress);
-        bpf_program_unref(u->ip_bpf_ingress_installed);
-        bpf_program_unref(u->ip_bpf_egress);
-        bpf_program_unref(u->ip_bpf_egress_installed);
-
-        set_free(u->ip_bpf_custom_ingress);
-        set_free(u->ip_bpf_custom_egress);
-        set_free(u->ip_bpf_custom_ingress_installed);
-        set_free(u->ip_bpf_custom_egress_installed);
+        bpf_firewall_close(u);
 
         hashmap_free(u->bpf_foreign_by_key);
 
diff --git a/src/core/unit.h b/src/core/unit.h
index 8818392731..52feb3693b 100644
--- a/src/core/unit.h
+++ b/src/core/unit.h
@@ -308,14 +308,15 @@ typedef struct Unit {
         /* IP BPF Firewalling/accounting */
         int ip_accounting_ingress_map_fd;
         int ip_accounting_egress_map_fd;
+        uint64_t ip_accounting_extra[_CGROUP_IP_ACCOUNTING_METRIC_MAX];
 
         int ipv4_allow_map_fd;
         int ipv6_allow_map_fd;
         int ipv4_deny_map_fd;
         int ipv6_deny_map_fd;
-
         BPFProgram *ip_bpf_ingress, *ip_bpf_ingress_installed;
         BPFProgram *ip_bpf_egress, *ip_bpf_egress_installed;
+
         Set *ip_bpf_custom_ingress;
         Set *ip_bpf_custom_ingress_installed;
         Set *ip_bpf_custom_egress;
@@ -334,8 +335,6 @@ typedef struct Unit {
         struct bpf_link *ipv6_socket_bind_link;
 #endif
 
-        uint64_t ip_accounting_extra[_CGROUP_IP_ACCOUNTING_METRIC_MAX];
-
         /* Low-priority event source which is used to remove watched PIDs that have gone away, and subscribe to any new
          * ones which might have appeared. */
         sd_event_source *rewatch_pids_event_source;
diff --git a/src/shared/bpf-dlopen.c b/src/shared/bpf-dlopen.c
index 64120f17c5..0556148458 100644
--- a/src/shared/bpf-dlopen.c
+++ b/src/shared/bpf-dlopen.c
@@ -37,7 +37,7 @@ int dlopen_bpf(void) {
 
         r = dlsym_many_and_warn(
                         dl,
-                        LOG_ERR,
+                        LOG_DEBUG,
                         DLSYM_ARG(bpf_link__destroy),
                         DLSYM_ARG(bpf_link__fd),
                         DLSYM_ARG(bpf_map__fd),
@@ -60,7 +60,6 @@ int dlopen_bpf(void) {
         /* Note that we never release the reference here, because there's no real reason to, after all this
          * was traditionally a regular shared library dependency which lives forever too. */
         bpf_dl = TAKE_PTR(dl);
-
         return 1;
 }
 
diff --git a/src/shared/bpf-link.c b/src/shared/bpf-link.c
index 405874374c..720ed40395 100644
--- a/src/shared/bpf-link.c
+++ b/src/shared/bpf-link.c
@@ -4,17 +4,13 @@
 #include "bpf-link.h"
 #include "serialize.h"
 
-bool can_link_bpf_program(struct bpf_program *prog) {
+bool bpf_can_link_program(struct bpf_program *prog) {
         _cleanup_(bpf_link_freep) struct bpf_link *link = NULL;
-        int r;
 
         assert(prog);
 
-        r = dlopen_bpf();
-        if (r < 0) {
-                log_debug_errno(r, "Could not load libbpf: %m");
+        if (dlopen_bpf() < 0)
                 return false;
-        }
 
         /* Pass invalid cgroup fd intentionally. */
         link = sym_bpf_program__attach_cgroup(prog, /*cgroup_fd=*/-1);
@@ -23,9 +19,7 @@ bool can_link_bpf_program(struct bpf_program *prog) {
         return sym_libbpf_get_error(link) == -EBADF;
 }
 
-int serialize_bpf_link(FILE *f, FDSet *fds, const char *key, struct bpf_link *link) {
-        int fd;
-
+int bpf_serialize_link(FILE *f, FDSet *fds, const char *key, struct bpf_link *link) {
         assert(key);
 
         if (!link)
@@ -34,11 +28,11 @@ int serialize_bpf_link(FILE *f, FDSet *fds, const char *key, struct bpf_link *li
         if (sym_libbpf_get_error(link) != 0)
                 return -EINVAL;
 
-        fd = sym_bpf_link__fd(link);
-        return serialize_fd(f, fds, key, fd);
+        return serialize_fd(f, fds, key, sym_bpf_link__fd(link));
 }
 
 struct bpf_link *bpf_link_free(struct bpf_link *link) {
+
         /* Avoid a useless dlopen() if link == NULL */
         if (!link)
                 return NULL;
diff --git a/src/shared/bpf-link.h b/src/shared/bpf-link.h
index 095465b07c..bb6ac60ac9 100644
--- a/src/shared/bpf-link.h
+++ b/src/shared/bpf-link.h
@@ -8,9 +8,9 @@
 #include "fdset.h"
 #include "macro.h"
 
-bool can_link_bpf_program(struct bpf_program *prog);
+bool bpf_can_link_program(struct bpf_program *prog);
 
-int serialize_bpf_link(FILE *f, FDSet *fds, const char *key, struct bpf_link *link);
+int bpf_serialize_link(FILE *f, FDSet *fds, const char *key, struct bpf_link *link);
 
 struct bpf_link *bpf_link_free(struct bpf_link *p);
 DEFINE_TRIVIAL_CLEANUP_FUNC(struct bpf_link *, bpf_link_free);
diff --git a/src/shared/bpf-program.c b/src/shared/bpf-program.c
index ec8437d583..0f865a7168 100644
--- a/src/shared/bpf-program.c
+++ b/src/shared/bpf-program.c
@@ -7,10 +7,12 @@
 
 #include "alloc-util.h"
 #include "bpf-program.h"
+#include "escape.h"
 #include "fd-util.h"
 #include "memory-util.h"
 #include "missing_syscall.h"
 #include "path-util.h"
+#include "serialize.h"
 #include "string-table.h"
 
 static const char *const bpf_cgroup_attach_type_table[__MAX_BPF_ATTACH_TYPE] = {
@@ -36,6 +38,8 @@ static const char *const bpf_cgroup_attach_type_table[__MAX_BPF_ATTACH_TYPE] = {
 
 DEFINE_STRING_TABLE_LOOKUP(bpf_cgroup_attach_type, int);
 
+DEFINE_HASH_OPS_WITH_KEY_DESTRUCTOR(bpf_program_hash_ops, void, trivial_hash_func, trivial_compare_func, bpf_program_unref);
+
  /* struct bpf_prog_info info must be initialized since its value is both input and output
   * for BPF_OBJ_GET_INFO_BY_FD syscall. */
 static int bpf_program_get_info_by_fd(int prog_fd, struct bpf_prog_info *info, uint32_t info_len) {
@@ -59,13 +63,15 @@ static int bpf_program_get_info_by_fd(int prog_fd, struct bpf_prog_info *info, u
 int bpf_program_new(uint32_t prog_type, BPFProgram **ret) {
         _cleanup_(bpf_program_unrefp) BPFProgram *p = NULL;
 
-        p = new0(BPFProgram, 1);
+        p = new(BPFProgram, 1);
         if (!p)
                 return -ENOMEM;
 
-        p->n_ref = 1;
-        p->prog_type = prog_type;
-        p->kernel_fd = -1;
+        *p = (BPFProgram) {
+                .n_ref = 1,
+                .prog_type = prog_type,
+                .kernel_fd = -1,
+        };
 
         *ret = TAKE_PTR(p);
 
@@ -358,3 +364,139 @@ int bpf_program_get_id_by_fd(int prog_fd, uint32_t *ret_id) {
 
         return 0;
 };
+
+int bpf_program_serialize_attachment(
+                FILE *f,
+                FDSet *fds,
+                const char *key,
+                BPFProgram *p) {
+
+        _cleanup_free_ char *escaped = NULL;
+        int copy, r;
+
+        if (!p || !p->attached_path)
+                return 0;
+
+        assert(p->kernel_fd >= 0);
+
+        escaped = cescape(p->attached_path);
+        if (!escaped)
+                return -ENOMEM;
+
+        copy = fdset_put_dup(fds, p->kernel_fd);
+        if (copy < 0)
+                return log_error_errno(copy, "Failed to add BPF kernel fd to serialize: %m");
+
+        r = serialize_item_format(
+                        f,
+                        key,
+                        "%i %s %s",
+                        copy,
+                        bpf_cgroup_attach_type_to_string(p->attached_type),
+                        escaped);
+        if (r < 0)
+                return r;
+
+        /* After serialization, let's forget the fact that this program is attached. The attachment — if you
+         * so will — is now 'owned' by the serialization, and not us anymore. Why does that matter? Because
+         * of BPF's less-than-ideal lifecycle handling: to detach a program from a cgroup we have to
+         * explicitly do so, it's not done implicitly on close(). Now, since we are serializing here we don't
+         * want the program to be detached while freeing things, so that the attachment can be retained after
+         * deserializing again. bpf_program_free() implicitly detaches things, if attached_path is non-NULL,
+         * hence we set it to NULL here. */
+
+        p->attached_path = mfree(p->attached_path);
+        return 0;
+}
+
+int bpf_program_serialize_attachment_set(FILE *f, FDSet *fds, const char *key, Set *set) {
+        BPFProgram *p;
+        int r;
+
+        SET_FOREACH(p, set) {
+                r = bpf_program_serialize_attachment(f, fds, key, p);
+                if (r < 0)
+                        return r;
+        }
+
+        return 0;
+}
+
+int bpf_program_deserialize_attachment(const char *v, FDSet *fds, BPFProgram **bpfp) {
+        _cleanup_free_ char *sfd = NULL, *sat = NULL, *unescaped = NULL;
+        _cleanup_(bpf_program_unrefp) BPFProgram *p = NULL;
+        _cleanup_close_ int fd = -1;
+        int ifd, at, r;
+
+        assert(v);
+        assert(bpfp);
+
+        /* Extract first word: the fd number */
+        r = extract_first_word(&v, &sfd, NULL, 0);
+        if (r < 0)
+                return r;
+        if (r == 0)
+                return -EINVAL;
+
+        r = safe_atoi(sfd, &ifd);
+        if (r < 0)
+                return r;
+        if (ifd < 0)
+                return -EBADF;
+
+        /* Extract second word: the attach type */
+        r = extract_first_word(&v, &sat, NULL, 0);
+        if (r < 0)
+                return r;
+        if (r == 0)
+                return -EINVAL;
+
+        at = bpf_cgroup_attach_type_from_string(sat);
+        if (at < 0)
+                return at;
+
+        /* The rest is the path */
+        r = cunescape(v, 0, &unescaped);
+        if (r < 0)
+                return r;
+
+        fd = fdset_remove(fds, ifd);
+        if (fd < 0)
+                return fd;
+
+        p = new(BPFProgram, 1);
+        if (!p)
+                return -ENOMEM;
+
+        *p = (BPFProgram) {
+                .n_ref = 1,
+                .kernel_fd = TAKE_FD(fd),
+                .prog_type = BPF_PROG_TYPE_UNSPEC,
+                .attached_path = TAKE_PTR(unescaped),
+                .attached_type = at,
+        };
+
+        if (*bpfp)
+                bpf_program_unref(*bpfp);
+
+        *bpfp = TAKE_PTR(p);
+        return 0;
+}
+
+int bpf_program_deserialize_attachment_set(const char *v, FDSet *fds, Set **bpfsetp) {
+        BPFProgram *p = NULL;
+        int r;
+
+        assert(v);
+        assert(bpfsetp);
+
+        r = bpf_program_deserialize_attachment(v, fds, &p);
+        if (r < 0)
+                return r;
+
+        r = set_ensure_consume(bpfsetp, &bpf_program_hash_ops, p);
+        if (r < 0)
+                return r;
+
+        return 0;
+}
diff --git a/src/shared/bpf-program.h b/src/shared/bpf-program.h
index edde86c119..908af1a1b2 100644
--- a/src/shared/bpf-program.h
+++ b/src/shared/bpf-program.h
@@ -3,22 +3,32 @@
 
 #include <linux/bpf.h>
 #include <stdint.h>
+#include <stdio.h>
 #include <sys/syscall.h>
 
+#include "fdset.h"
 #include "list.h"
 #include "macro.h"
 
 typedef struct BPFProgram BPFProgram;
 
+/* This encapsulates three different concepts: the loaded BPF program, the BPF code, and the attachment to a
+ * cgroup. Typically our BPF programs go through all three stages: we build the code, we load it, and finally
+ * we attach it, but it might happen that we operate with programs that aren't loaded or aren't attached, or
+ * where we don't have the code. */
 struct BPFProgram {
         unsigned n_ref;
 
+        /* The loaded BPF program, if loaded */
         int kernel_fd;
         uint32_t prog_type;
 
+        /* The code of it BPF program, if known */
         size_t n_instructions;
         struct bpf_insn *instructions;
 
+        /* The cgroup path the program is attached to, if it is attached. If non-NULL bpf_program_unref()
+         * will detach on destruction. */
         char *attached_path;
         int attached_type;
         uint32_t attached_flags;
@@ -35,9 +45,17 @@ int bpf_program_load_from_bpf_fs(BPFProgram *p, const char *path);
 
 int bpf_program_cgroup_attach(BPFProgram *p, int type, const char *path, uint32_t flags);
 int bpf_program_cgroup_detach(BPFProgram *p);
+
 int bpf_program_pin(int prog_fd, const char *bpffs_path);
 int bpf_program_get_id_by_fd(int prog_fd, uint32_t *ret_id);
 
+int bpf_program_serialize_attachment(FILE *f, FDSet *fds, const char *key, BPFProgram *p);
+int bpf_program_serialize_attachment_set(FILE *f, FDSet *fds, const char *key, Set *set);
+int bpf_program_deserialize_attachment(const char *v, FDSet *fds, BPFProgram **bpfp);
+int bpf_program_deserialize_attachment_set(const char *v, FDSet *fds, Set **bpfsetp);
+
+extern const struct hash_ops bpf_program_hash_ops;
+
 int bpf_map_new(enum bpf_map_type type, size_t key_size, size_t value_size, size_t max_entries, uint32_t flags);
 int bpf_map_update_element(int fd, const void *key, void *value);
 int bpf_map_lookup_element(int fd, const void *key, void *value);
diff --git a/src/test/test-socket-bind.c b/src/test/test-socket-bind.c
index 16cfea7779..996aeebbcc 100644
--- a/src/test/test-socket-bind.c
+++ b/src/test/test-socket-bind.c
@@ -1,12 +1,12 @@
 /* SPDX-License-Identifier: LGPL-2.1-or-later */
 
+#include "bpf-socket-bind.h"
 #include "load-fragment.h"
 #include "manager.h"
 #include "process-util.h"
 #include "rlimit-util.h"
 #include "rm-rf.h"
 #include "service.h"
-#include "socket-bind.h"
 #include "strv.h"
 #include "tests.h"
 #include "unit.h"
@@ -122,7 +122,7 @@ int main(int argc, char *argv[]) {
         if (!can_memlock())
                 return log_tests_skipped("Can't use mlock(), skipping.");
 
-        r = socket_bind_supported();
+        r = bpf_socket_bind_supported();
         if (r <= 0)
                 return log_tests_skipped("socket-bind is not supported, skipping.");
author	Lennart Poettering <lennart@poettering.net>	2021-06-09 09:47:23 +0200
committer	GitHub <noreply@github.com>	2021-06-09 09:47:23 +0200
commit	bead169fe0f182a98cc8b55a63b3a12a5049ab45 (patch)
tree	5425eea35c12cc910768ed8907fe89f6cc366ff6 /src
parent	Merge pull request #19852 from yuwata/network-stable-secret (diff)
parent	bpf-firewall: close gap when updating the firewall (diff)
download	systemd-bead169fe0f182a98cc8b55a63b3a12a5049ab45.tar.xz systemd-bead169fe0f182a98cc8b55a63b3a12a5049ab45.zip