diff options
-rw-r--r-- | man/systemd.resource-control.xml | 33 | ||||
-rw-r--r-- | src/analyze/analyze-security.c | 37 | ||||
-rw-r--r-- | src/core/bpf-firewall.c | 106 | ||||
-rw-r--r-- | src/core/bpf-firewall.h | 1 | ||||
-rw-r--r-- | src/core/cgroup.c | 21 | ||||
-rw-r--r-- | src/core/cgroup.h | 3 | ||||
-rw-r--r-- | src/core/dbus-cgroup.c | 76 | ||||
-rw-r--r-- | src/core/load-fragment-gperf.gperf.m4 | 2 | ||||
-rw-r--r-- | src/core/load-fragment.c | 61 | ||||
-rw-r--r-- | src/core/load-fragment.h | 1 | ||||
-rw-r--r-- | src/core/unit.c | 12 | ||||
-rw-r--r-- | src/core/unit.h | 5 | ||||
-rw-r--r-- | src/shared/bpf-program.c | 19 | ||||
-rw-r--r-- | src/shared/bpf-program.h | 1 | ||||
-rw-r--r-- | src/shared/bus-unit-util.c | 12 | ||||
-rw-r--r-- | src/test/test-bpf.c | 53 |
16 files changed, 436 insertions, 7 deletions
diff --git a/man/systemd.resource-control.xml b/man/systemd.resource-control.xml index 95209a8a6a..e7b5dfbce6 100644 --- a/man/systemd.resource-control.xml +++ b/man/systemd.resource-control.xml @@ -619,6 +619,39 @@ </varlistentry> <varlistentry> + <term><varname>IPIngressFilterPath=<replaceable>BPF_FS_PROGRAMM_PATH</replaceable></varname></term> + <term><varname>IPEgressFilterPath=<replaceable>BPF_FS_PROGRAMM_PATH</replaceable></varname></term> + + <listitem> + <para>Add custom network traffic filters implemented as BPF programs, applying to all IP packets + sent and received over <constant>AF_INET</constant> and <constant>AF_INET6</constant> sockets. + Takes an absolute path to a pinned BPF program in the BPF virtual filesystem (<filename>/sys/fs/bpf/</filename>). + </para> + + <para>The filters configured with this option are applied to all sockets created by processes + of this unit (or in the case of socket units, associated with it). The filters are loaded in addition + to filters any of the parent slice units this unit might be a member of as well as any + <varname>IPAddressAllow=</varname> and <varname>IPAddressDeny=</varname> filters in any of these units. + By default there are no filters specified.</para> + + <para>If these settings are used multiple times in the same unit all the specified programs are attached. If an + empty string is assigned to these settings the program list is reset and all previous specified programs ignored.</para> + + <para>Note that for socket-activated services, the IP filter programs configured on the socket unit apply to + all sockets associated with it directly, but not to any sockets created by the ultimately activated services + for it. Conversely, the IP filter programs configured for the service are not applied to any sockets passed into + the service via socket activation. Thus, it is usually a good idea, to replicate the IP filter programs on both + the socket and the service unit, however it often makes sense to maintain one configuration more open and the other + one more restricted, depending on the usecase.</para> + + <para>Note that these settings might not be supported on some systems (for example if eBPF control group + support is not enabled in the underlying kernel or container manager). These settings will fail the service in + that case. If compatibility with such systems is desired it is hence recommended to attach your filter manually + (requires <varname>Delegate=</varname><constant>yes</constant>) instead of using this setting.</para> + </listitem> + </varlistentry> + + <varlistentry> <term><varname>DeviceAllow=</varname></term> <listitem> diff --git a/src/analyze/analyze-security.c b/src/analyze/analyze-security.c index 0962950dd0..3cf6515f5f 100644 --- a/src/analyze/analyze-security.c +++ b/src/analyze/analyze-security.c @@ -45,6 +45,9 @@ struct security_info { bool ip_address_allow_localhost; bool ip_address_allow_other; + bool ip_filters_custom_ingress; + bool ip_filters_custom_egress; + char *keyring_mode; bool lock_personality; bool memory_deny_write_execute; @@ -590,7 +593,10 @@ static int assess_ip_address_allow( assert(ret_badness); assert(ret_description); - if (!info->ip_address_deny_all) { + if (info->ip_filters_custom_ingress || info->ip_filters_custom_egress) { + d = strdup("Service defines custom ingress/egress IP filters with BPF programs"); + b = 0; + } else if (!info->ip_address_deny_all) { d = strdup("Service does not define an IP address whitelist"); b = 10; } else if (info->ip_address_allow_other) { @@ -1824,6 +1830,33 @@ static int property_read_ip_address_allow( return sd_bus_message_exit_container(m); } +static int property_read_ip_filters( + sd_bus *bus, + const char *member, + sd_bus_message *m, + sd_bus_error *error, + void *userdata) { + + struct security_info *info = userdata; + _cleanup_(strv_freep) char **l = NULL; + int r; + + assert(bus); + assert(member); + assert(m); + + r = sd_bus_message_read_strv(m, &l); + if (r < 0) + return r; + + if (streq(member, "IPIngressFilterPath")) + info->ip_filters_custom_ingress = !strv_isempty(l); + else if (streq(member, "IPEgressFilterPath")) + info->ip_filters_custom_ingress = !strv_isempty(l); + + return 0; +} + static int property_read_device_allow( sd_bus *bus, const char *member, @@ -1873,6 +1906,8 @@ static int acquire_security_info(sd_bus *bus, const char *name, struct security_ { "FragmentPath", "s", NULL, offsetof(struct security_info, fragment_path) }, { "IPAddressAllow", "a(iayu)", property_read_ip_address_allow, 0 }, { "IPAddressDeny", "a(iayu)", property_read_ip_address_allow, 0 }, + { "IPIngressFilterPath", "as", property_read_ip_filters, 0 }, + { "IPEgressFilterPath", "as", property_read_ip_filters, 0 }, { "Id", "s", NULL, offsetof(struct security_info, id) }, { "KeyringMode", "s", NULL, offsetof(struct security_info, keyring_mode) }, { "LoadState", "s", NULL, offsetof(struct security_info, load_state) }, diff --git a/src/core/bpf-firewall.c b/src/core/bpf-firewall.c index 8163db276b..7a8b848fb3 100644 --- a/src/core/bpf-firewall.c +++ b/src/core/bpf-firewall.c @@ -587,6 +587,95 @@ int bpf_firewall_compile(Unit *u) { return 0; } +DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(filter_prog_hash_ops, void, trivial_hash_func, trivial_compare_func, BPFProgram, bpf_program_unref); + +static int load_bpf_progs_from_fs_to_set(Unit *u, char **filter_paths, Set **set) { + char **bpf_fs_path; + + set_clear(*set); + + STRV_FOREACH(bpf_fs_path, filter_paths) { + _cleanup_free_ BPFProgram *prog = NULL; + int r; + + r = bpf_program_new(BPF_PROG_TYPE_CGROUP_SKB, &prog); + if (r < 0) + return log_unit_error_errno(u, r, "Can't allocate CGROUP SKB BPF program: %m"); + + r = bpf_program_load_from_bpf_fs(prog, *bpf_fs_path); + if (r < 0) + return log_unit_error_errno(u, r, "Loading of ingress BPF program %s failed: %m", *bpf_fs_path); + + r = set_ensure_allocated(set, &filter_prog_hash_ops); + if (r < 0) + return log_unit_error_errno(u, r, "Can't allocate BPF program set: %m"); + + r = set_put(*set, prog); + if (r < 0) + return log_unit_error_errno(u, r, "Can't add program to BPF program set: %m"); + TAKE_PTR(prog); + } + + return 0; +} + +int bpf_firewall_load_custom(Unit *u) { + CGroupContext *cc; + int r, supported; + + assert(u); + + cc = unit_get_cgroup_context(u); + if (!cc) + return 0; + + if (!(cc->ip_filters_ingress || cc->ip_filters_egress)) + return 0; + + supported = bpf_firewall_supported(); + if (supported < 0) + return supported; + + if (supported != BPF_FIREWALL_SUPPORTED_WITH_MULTI) + return log_unit_debug_errno(u, SYNTHETIC_ERRNO(EOPNOTSUPP), "BPF_F_ALLOW_MULTI not supported on this manager, cannot attach custom BPF programs."); + + r = load_bpf_progs_from_fs_to_set(u, cc->ip_filters_ingress, &u->ip_bpf_custom_ingress); + if (r < 0) + return r; + r = load_bpf_progs_from_fs_to_set(u, cc->ip_filters_egress, &u->ip_bpf_custom_egress); + if (r < 0) + return r; + + return 0; +} + +static int attach_custom_bpf_progs(Unit *u, const char *path, int attach_type, Set **set, Set **set_installed) { + BPFProgram *prog; + Iterator i; + int r; + + assert(u); + + set_clear(*set_installed); + + SET_FOREACH(prog, *set, i) { + r = bpf_program_cgroup_attach(prog, attach_type, path, BPF_F_ALLOW_MULTI); + if (r < 0) + return log_unit_error_errno(u, r, "Attaching custom egress BPF program to cgroup %s failed: %m", path); + /* Remember that these BPF programs are installed now. */ + r = set_ensure_allocated(set_installed, &filter_prog_hash_ops); + if (r < 0) + return log_unit_error_errno(u, r, "Can't allocate BPF program set: %m"); + + r = set_put(*set_installed, prog); + if (r < 0) + return log_unit_error_errno(u, r, "Can't add program to BPF program set: %m"); + bpf_program_ref(prog); + } + + return 0; +} + int bpf_firewall_install(Unit *u) { _cleanup_free_ char *path = NULL; CGroupContext *cc; @@ -614,6 +703,9 @@ int bpf_firewall_install(Unit *u) { log_unit_debug(u, "BPF_F_ALLOW_MULTI is not supported on this manager, not doing BPF firewall on slice units."); return -EOPNOTSUPP; } + if (supported != BPF_FIREWALL_SUPPORTED_WITH_MULTI && + (!set_isempty(u->ip_bpf_custom_ingress) || !set_isempty(u->ip_bpf_custom_egress))) + return log_unit_debug_errno(u, SYNTHETIC_ERRNO(EOPNOTSUPP), "BPF_F_ALLOW_MULTI not supported on this manager, cannot attach custom BPF programs."); r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, NULL, &path); if (r < 0) @@ -628,7 +720,8 @@ int bpf_firewall_install(Unit *u) { u->ip_bpf_ingress_installed = bpf_program_unref(u->ip_bpf_ingress_installed); if (u->ip_bpf_egress) { - r = bpf_program_cgroup_attach(u->ip_bpf_egress, BPF_CGROUP_INET_EGRESS, path, flags); + r = bpf_program_cgroup_attach(u->ip_bpf_egress, BPF_CGROUP_INET_EGRESS, path, + flags | (set_isempty(u->ip_bpf_custom_egress) ? 0 : BPF_F_ALLOW_MULTI)); if (r < 0) return log_unit_error_errno(u, r, "Attaching egress BPF program to cgroup %s failed: %m", path); @@ -637,13 +730,22 @@ int bpf_firewall_install(Unit *u) { } if (u->ip_bpf_ingress) { - r = bpf_program_cgroup_attach(u->ip_bpf_ingress, BPF_CGROUP_INET_INGRESS, path, flags); + r = bpf_program_cgroup_attach(u->ip_bpf_ingress, BPF_CGROUP_INET_INGRESS, path, + flags | (set_isempty(u->ip_bpf_custom_ingress) ? 0 : BPF_F_ALLOW_MULTI)); if (r < 0) return log_unit_error_errno(u, r, "Attaching ingress BPF program to cgroup %s failed: %m", path); u->ip_bpf_ingress_installed = bpf_program_ref(u->ip_bpf_ingress); } + r = attach_custom_bpf_progs(u, path, BPF_CGROUP_INET_EGRESS, &u->ip_bpf_custom_egress, &u->ip_bpf_custom_egress_installed); + if (r < 0) + return r; + + r = attach_custom_bpf_progs(u, path, BPF_CGROUP_INET_INGRESS, &u->ip_bpf_custom_ingress, &u->ip_bpf_custom_ingress_installed); + if (r < 0) + return r; + return 0; } diff --git a/src/core/bpf-firewall.h b/src/core/bpf-firewall.h index 10cafcc02e..f1460d982d 100644 --- a/src/core/bpf-firewall.h +++ b/src/core/bpf-firewall.h @@ -15,6 +15,7 @@ int bpf_firewall_supported(void); int bpf_firewall_compile(Unit *u); int bpf_firewall_install(Unit *u); +int bpf_firewall_load_custom(Unit *u); int bpf_firewall_read_accounting(int map_fd, uint64_t *ret_bytes, uint64_t *ret_packets); int bpf_firewall_reset_accounting(int map_fd); diff --git a/src/core/cgroup.c b/src/core/cgroup.c index 1ed5723892..0428f62481 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -199,6 +199,9 @@ void cgroup_context_done(CGroupContext *c) { c->ip_address_allow = ip_address_access_free_all(c->ip_address_allow); c->ip_address_deny = ip_address_access_free_all(c->ip_address_deny); + + c->ip_filters_ingress = strv_free(c->ip_filters_ingress); + c->ip_filters_egress = strv_free(c->ip_filters_egress); } void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) { @@ -210,6 +213,7 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) { CGroupBlockIODeviceWeight *w; CGroupDeviceAllow *a; IPAddressAccessItem *iaai; + char **path; char u[FORMAT_TIMESPAN_MAX]; char v[FORMAT_TIMESPAN_MAX]; @@ -360,6 +364,12 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) { (void) in_addr_to_string(iaai->family, &iaai->address, &k); fprintf(f, "%sIPAddressDeny=%s/%u\n", prefix, strnull(k), iaai->prefixlen); } + + STRV_FOREACH(path, c->ip_filters_ingress) + fprintf(f, "%sIPIngressFilterPath=%s\n", prefix, *path); + + STRV_FOREACH(path, c->ip_filters_egress) + fprintf(f, "%sIPEgressFilterPath=%s\n", prefix, *path); } int cgroup_add_device_allow(CGroupContext *c, const char *dev, const char *mode) { @@ -945,6 +955,7 @@ static void cgroup_apply_firewall(Unit *u) { if (bpf_firewall_compile(u) < 0) return; + (void) bpf_firewall_load_custom(u); (void) bpf_firewall_install(u); } @@ -1353,7 +1364,9 @@ static bool unit_get_needs_bpf_firewall(Unit *u) { if (c->ip_accounting || c->ip_address_allow || - c->ip_address_deny) + c->ip_address_deny || + c->ip_filters_ingress || + c->ip_filters_egress) return true; /* If any parent slice has an IP access list defined, it applies too */ @@ -1919,6 +1932,12 @@ int unit_attach_pids_to_cgroup(Unit *u, Set *pids, const char *suffix_path) { if (set_isempty(pids)) return 0; + /* Load any custom firewall BPF programs here once to test if they are existing and actually loadable. + * Fail here early since later errors in the call chain unit_realize_cgroup to cgroup_context_apply are ignored. */ + r = bpf_firewall_load_custom(u); + if (r < 0) + return r; + r = unit_realize_cgroup(u); if (r < 0) return r; diff --git a/src/core/cgroup.h b/src/core/cgroup.h index fe347ea114..d1537c503e 100644 --- a/src/core/cgroup.h +++ b/src/core/cgroup.h @@ -114,6 +114,9 @@ struct CGroupContext { LIST_HEAD(IPAddressAccessItem, ip_address_allow); LIST_HEAD(IPAddressAccessItem, ip_address_deny); + char **ip_filters_ingress; + char **ip_filters_egress; + /* For legacy hierarchies */ uint64_t cpu_shares; uint64_t startup_cpu_shares; diff --git a/src/core/dbus-cgroup.c b/src/core/dbus-cgroup.c index 9f4fd06dc4..f70e6c87ee 100644 --- a/src/core/dbus-cgroup.c +++ b/src/core/dbus-cgroup.c @@ -362,6 +362,8 @@ const sd_bus_vtable bus_cgroup_vtable[] = { SD_BUS_PROPERTY("IPAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, ip_accounting), 0), SD_BUS_PROPERTY("IPAddressAllow", "a(iayu)", property_get_ip_address_access, offsetof(CGroupContext, ip_address_allow), 0), SD_BUS_PROPERTY("IPAddressDeny", "a(iayu)", property_get_ip_address_access, offsetof(CGroupContext, ip_address_deny), 0), + SD_BUS_PROPERTY("IPIngressFilterPath", "as", NULL, offsetof(CGroupContext, ip_filters_ingress), 0), + SD_BUS_PROPERTY("IPEgressFilterPath", "as", NULL, offsetof(CGroupContext, ip_filters_egress), 0), SD_BUS_PROPERTY("DisableControllers", "as", property_get_cgroup_mask, offsetof(CGroupContext, disable_controllers), 0), SD_BUS_VTABLE_END }; @@ -462,6 +464,80 @@ static int bus_cgroup_set_transient_property( } return 1; + } else if (STR_IN_SET(name, "IPIngressFilterPath", "IPEgressFilterPath")) { + char ***filters; + size_t n = 0; + + filters = streq(name, "IPIngressFilterPath") ? &c->ip_filters_ingress : &c->ip_filters_egress; + r = sd_bus_message_enter_container(message, 'a', "s"); + if (r < 0) + return r; + + for (;;) { + const char *path; + + r = sd_bus_message_read(message, "s", &path); + if (r < 0) + return r; + if (r == 0) + break; + + if (!path_is_normalized(path) || !path_is_absolute(path)) + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "%s= expects a normalized absolute path.", name); + + if (!UNIT_WRITE_FLAGS_NOOP(flags) && !strv_contains(*filters, path)) { + r = strv_extend(filters, path); + if (r < 0) + return log_oom(); + } + n++; + } + r = sd_bus_message_exit_container(message); + if (r < 0) + return r; + + if (!UNIT_WRITE_FLAGS_NOOP(flags)) { + _cleanup_free_ char *buf = NULL; + _cleanup_fclose_ FILE *f = NULL; + char **entry; + size_t size = 0; + + if (n == 0) + *filters = strv_free(*filters); + + unit_invalidate_cgroup_bpf(u); + f = open_memstream_unlocked(&buf, &size); + if (!f) + return -ENOMEM; + + fputs(name, f); + fputs("=\n", f); + + STRV_FOREACH(entry, *filters) + fprintf(f, "%s=%s\n", name, *entry); + + r = fflush_and_check(f); + if (r < 0) + return r; + + unit_write_setting(u, flags, name, buf); + + if (*filters) { + r = bpf_firewall_supported(); + if (r < 0) + return r; + if (r != BPF_FIREWALL_SUPPORTED_WITH_MULTI) { + static bool warned = false; + + log_full(warned ? LOG_DEBUG : LOG_WARNING, + "Transient unit %s configures an IP firewall with BPF, but the local system does not support BPF/cgroup firewalling with mulitiple filters.\n" + "Starting this unit will fail! (This warning is only shown for the first started transient unit using IP firewalling.)", u->id); + warned = true; + } + } + } + + return 1; } return 0; diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4 index 5e6fb64093..f7906b374a 100644 --- a/src/core/load-fragment-gperf.gperf.m4 +++ b/src/core/load-fragment-gperf.gperf.m4 @@ -205,6 +205,8 @@ $1.DisableControllers, config_parse_disable_controllers, 0, $1.IPAccounting, config_parse_bool, 0, offsetof($1, cgroup_context.ip_accounting) $1.IPAddressAllow, config_parse_ip_address_access, 0, offsetof($1, cgroup_context.ip_address_allow) $1.IPAddressDeny, config_parse_ip_address_access, 0, offsetof($1, cgroup_context.ip_address_deny) +$1.IPIngressFilterPath, config_parse_ip_filter_bpf_progs, 0, offsetof($1, cgroup_context.ip_filters_ingress) +$1.IPEgressFilterPath, config_parse_ip_filter_bpf_progs, 0, offsetof($1, cgroup_context.ip_filters_egress) $1.NetClass, config_parse_warn_compat, DISABLED_LEGACY, 0' )m4_dnl Unit.Description, config_parse_unit_string_printf, 0, offsetof(Unit, description) diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c index 274d9d2fef..ba41f8ee1a 100644 --- a/src/core/load-fragment.c +++ b/src/core/load-fragment.c @@ -18,6 +18,7 @@ #include "af-list.h" #include "alloc-util.h" #include "all-units.h" +#include "bpf-firewall.h" #include "bus-error.h" #include "bus-internal.h" #include "bus-util.h" @@ -4456,6 +4457,66 @@ int config_parse_disable_controllers( return 0; } +int config_parse_ip_filter_bpf_progs( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + _cleanup_free_ char *resolved = NULL; + Unit *u = userdata; + char ***paths = data; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + assert(paths); + + if (isempty(rvalue)) { + *paths = strv_free(*paths); + return 0; + } + + r = unit_full_printf(u, rvalue, &resolved); + if (r < 0) { + log_syntax(unit, LOG_ERR, filename, line, r, "Failed to resolve unit specifiers in '%s', ignoring: %m", rvalue); + return 0; + } + + r = path_simplify_and_warn(resolved, PATH_CHECK_ABSOLUTE, unit, filename, line, lvalue); + if (r < 0) + return 0; + + if (strv_contains(*paths, resolved)) + return 0; + + r = strv_extend(paths, resolved); + if (r < 0) + return log_oom(); + + r = bpf_firewall_supported(); + if (r < 0) + return r; + if (r != BPF_FIREWALL_SUPPORTED_WITH_MULTI) { + static bool warned = false; + + log_full(warned ? LOG_DEBUG : LOG_WARNING, + "File %s:%u configures an IP firewall with BPF programs (%s=%s), but the local system does not support BPF/cgroup based firewalling with multiple filters.\n" + "Starting this unit will fail! (This warning is only shown for the first loaded unit using IP firewalling.)", filename, line, lvalue, rvalue); + + warned = true; + } + + return 0; +} + #define FOLLOW_MAX 8 static int open_follow(char **filename, FILE **_f, Set *names, char **_final) { diff --git a/src/core/load-fragment.h b/src/core/load-fragment.h index ddcc8d216d..8d5f7010cd 100644 --- a/src/core/load-fragment.h +++ b/src/core/load-fragment.h @@ -110,6 +110,7 @@ CONFIG_PARSER_PROTOTYPE(config_parse_disable_controllers); CONFIG_PARSER_PROTOTYPE(config_parse_oom_policy); CONFIG_PARSER_PROTOTYPE(config_parse_numa_policy); CONFIG_PARSER_PROTOTYPE(config_parse_numa_mask); +CONFIG_PARSER_PROTOTYPE(config_parse_ip_filter_bpf_progs); /* gperf prototypes */ const struct ConfigPerfItem* load_fragment_gperf_lookup(const char *key, GPERF_LEN_TYPE length); diff --git a/src/core/unit.c b/src/core/unit.c index 4d777b447d..463db73ff1 100644 --- a/src/core/unit.c +++ b/src/core/unit.c @@ -12,6 +12,7 @@ #include "all-units.h" #include "alloc-util.h" +#include "bpf-firewall.h" #include "bus-common-errors.h" #include "bus-util.h" #include "cgroup-util.h" @@ -682,6 +683,11 @@ void unit_free(Unit *u) { bpf_program_unref(u->ip_bpf_egress); bpf_program_unref(u->ip_bpf_egress_installed); + set_free(u->ip_bpf_custom_ingress); + set_free(u->ip_bpf_custom_egress); + set_free(u->ip_bpf_custom_ingress_installed); + set_free(u->ip_bpf_custom_egress_installed); + bpf_program_unref(u->bpf_device_control_installed); condition_free_list(u->conditions); @@ -5500,6 +5506,12 @@ int unit_prepare_exec(Unit *u) { assert(u); + /* Load any custom firewall BPF programs here once to test if they are existing and actually loadable. + * Fail here early since later errors in the call chain unit_realize_cgroup to cgroup_context_apply are ignored. */ + r = bpf_firewall_load_custom(u); + if (r < 0) + return r; + /* Prepares everything so that we can fork of a process for this unit */ (void) unit_realize_cgroup(u); diff --git a/src/core/unit.h b/src/core/unit.h index 007c4aea9e..ef495f836b 100644 --- a/src/core/unit.h +++ b/src/core/unit.h @@ -10,6 +10,7 @@ #include "emergency-action.h" #include "install.h" #include "list.h" +#include "set.h" #include "unit-name.h" #include "cgroup.h" @@ -281,6 +282,10 @@ typedef struct Unit { BPFProgram *ip_bpf_ingress, *ip_bpf_ingress_installed; BPFProgram *ip_bpf_egress, *ip_bpf_egress_installed; + Set *ip_bpf_custom_ingress; + Set *ip_bpf_custom_ingress_installed; + Set *ip_bpf_custom_egress; + Set *ip_bpf_custom_egress_installed; uint64_t ip_accounting_extra[_CGROUP_IP_ACCOUNTING_METRIC_MAX]; diff --git a/src/shared/bpf-program.c b/src/shared/bpf-program.c index 40bc9645be..93f8db3f34 100644 --- a/src/shared/bpf-program.c +++ b/src/shared/bpf-program.c @@ -94,6 +94,25 @@ int bpf_program_load_kernel(BPFProgram *p, char *log_buf, size_t log_size) { return 0; } +int bpf_program_load_from_bpf_fs(BPFProgram *p, const char *path) { + union bpf_attr attr; + + assert(p); + + if (p->kernel_fd >= 0) /* don't overwrite an assembled or loaded program */ + return -EBUSY; + + attr = (union bpf_attr) { + .pathname = PTR_TO_UINT64(path), + }; + + p->kernel_fd = bpf(BPF_OBJ_GET, &attr, sizeof(attr)); + if (p->kernel_fd < 0) + return -errno; + + return 0; +} + int bpf_program_cgroup_attach(BPFProgram *p, int type, const char *path, uint32_t flags) { _cleanup_free_ char *copy = NULL; _cleanup_close_ int fd = -1; diff --git a/src/shared/bpf-program.h b/src/shared/bpf-program.h index c21eb2f72a..a21589eb1f 100644 --- a/src/shared/bpf-program.h +++ b/src/shared/bpf-program.h @@ -31,6 +31,7 @@ BPFProgram *bpf_program_ref(BPFProgram *p); int bpf_program_add_instructions(BPFProgram *p, const struct bpf_insn *insn, size_t count); int bpf_program_load_kernel(BPFProgram *p, char *log_buf, size_t log_size); +int bpf_program_load_from_bpf_fs(BPFProgram *p, const char *path); int bpf_program_cgroup_attach(BPFProgram *p, int type, const char *path, uint32_t flags); int bpf_program_cgroup_detach(BPFProgram *p); diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c index bb30e8f151..2ea25d830a 100644 --- a/src/shared/bus-unit-util.c +++ b/src/shared/bus-unit-util.c @@ -758,6 +758,18 @@ static int bus_append_cgroup_property(sd_bus_message *m, const char *field, cons return 1; } + if (STR_IN_SET(field, "IPIngressFilterPath", "IPEgressFilterPath")) { + if (isempty(eq)) + r = sd_bus_message_append(m, "(sv)", field, "as", 0); + else + r = sd_bus_message_append(m, "(sv)", field, "as", 1, eq); + + if (r < 0) + return bus_log_create_error(r); + + return 1; + } + return 0; } diff --git a/src/test/test-bpf.c b/src/test/test-bpf.c index 90ab15c549..6a75221542 100644 --- a/src/test/test-bpf.c +++ b/src/test/test-bpf.c @@ -9,6 +9,7 @@ #include "bpf-program.h" #include "load-fragment.h" #include "manager.h" +#include "missing.h" #include "rm-rf.h" #include "service.h" #include "test-helper.h" @@ -42,7 +43,7 @@ static bool can_memlock(void) { int main(int argc, char *argv[]) { struct bpf_insn exit_insn[] = { - BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_MOV64_IMM(BPF_REG_0, 0), /* drop */ BPF_EXIT_INSN() }; @@ -54,6 +55,9 @@ int main(int argc, char *argv[]) { char log_buf[65535]; struct rlimit rl; int r; + union bpf_attr attr; + bool test_custom_filter = false; + const char *test_prog = "/sys/fs/bpf/test-dropper"; test_setup_logging(LOG_DEBUG); @@ -88,14 +92,31 @@ int main(int argc, char *argv[]) { return log_tests_skipped("BPF firewalling not supported"); assert_se(r > 0); - if (r == BPF_FIREWALL_SUPPORTED_WITH_MULTI) + if (r == BPF_FIREWALL_SUPPORTED_WITH_MULTI) { log_notice("BPF firewalling with BPF_F_ALLOW_MULTI supported. Yay!"); - else + test_custom_filter = true; + } else log_notice("BPF firewalling (though without BPF_F_ALLOW_MULTI) supported. Good."); r = bpf_program_load_kernel(p, log_buf, ELEMENTSOF(log_buf)); assert(r >= 0); + if (test_custom_filter) { + attr = (union bpf_attr) { + .pathname = PTR_TO_UINT64(test_prog), + .bpf_fd = p->kernel_fd, + .file_flags = 0, + }; + + (void) unlink(test_prog); + + r = bpf(BPF_OBJ_PIN, &attr, sizeof(attr)); + if (r < 0) { + log_warning_errno(errno, "BPF object pinning failed, will not run custom filter test: %m"); + test_custom_filter = false; + } + } + p = bpf_program_unref(p); /* The simple tests succeeded. Now let's try full unit-based use-case. */ @@ -175,5 +196,31 @@ int main(int argc, char *argv[]) { assert_se(SERVICE(u)->exec_command[SERVICE_EXEC_START]->command_next->exec_status.code != CLD_EXITED || SERVICE(u)->exec_command[SERVICE_EXEC_START]->command_next->exec_status.status != EXIT_SUCCESS); + if (test_custom_filter) { + assert_se(u = unit_new(m, sizeof(Service))); + assert_se(unit_add_name(u, "custom-filter.service") == 0); + assert_se(cc = unit_get_cgroup_context(u)); + u->perpetual = true; + + cc->ip_accounting = true; + + assert_se(config_parse_ip_filter_bpf_progs(u->id, "filename", 1, "Service", 1, "IPIngressFilterPath", 0, test_prog, &cc->ip_filters_ingress, u) == 0); + assert_se(config_parse_exec(u->id, "filename", 1, "Service", 1, "ExecStart", SERVICE_EXEC_START, "-/bin/ping -c 1 127.0.0.1 -W 5", SERVICE(u)->exec_command, u) == 0); + + SERVICE(u)->type = SERVICE_ONESHOT; + u->load_state = UNIT_LOADED; + + assert_se(unit_start(u) >= 0); + + while (!IN_SET(SERVICE(u)->state, SERVICE_DEAD, SERVICE_FAILED)) + assert_se(sd_event_run(m->event, UINT64_MAX) >= 0); + + assert_se(SERVICE(u)->exec_command[SERVICE_EXEC_START]->exec_status.code != CLD_EXITED || + SERVICE(u)->exec_command[SERVICE_EXEC_START]->exec_status.status != EXIT_SUCCESS); + + (void) unlink(test_prog); + assert_se(SERVICE(u)->state == SERVICE_DEAD); + } + return 0; } |