From 766bcf302aa6a863e4a3e5ee26f2f2742077181d Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Mon, 1 Jul 2024 21:58:30 +0200 Subject: extend sysctl functions to shadow values Pass to all the sysctl_* functions a hashmap which can be used to optionally save the value written in the sysctl. --- src/basic/sysctl-util.c | 45 ++++++++++++++++++++++++++++++++++++++----- src/basic/sysctl-util.h | 21 +++++++++++--------- src/network/networkd-ipv6ll.c | 4 ++-- src/network/networkd-ndisc.c | 6 +++--- src/network/networkd-sysctl.c | 34 ++++++++++++++++---------------- src/test/test-sysctl-util.c | 4 ++-- 6 files changed, 76 insertions(+), 38 deletions(-) (limited to 'src') diff --git a/src/basic/sysctl-util.c b/src/basic/sysctl-util.c index b284c9ccd2..dfb99e1896 100644 --- a/src/basic/sysctl-util.c +++ b/src/basic/sysctl-util.c @@ -44,8 +44,39 @@ char* sysctl_normalize(char *s) { return s; } -int sysctl_write(const char *property, const char *value) { +static int shadow_update(Hashmap **shadow, const char *property, const char *value) { + _cleanup_free_ char *k = NULL, *v = NULL, *cur_k = NULL, *cur_v = NULL; + int r; + + assert(property); + assert(value); + + if (!shadow) + return 0; + + k = strdup(property); + if (!k) + return -ENOMEM; + + v = strdup(value); + if (!v) + return -ENOMEM; + + cur_v = hashmap_remove2(*shadow, k, (void**)&cur_k); + + r = hashmap_ensure_put(shadow, &path_hash_ops_free_free, k, v); + if (r < 0) + return r; + + TAKE_PTR(k); + TAKE_PTR(v); + + return 0; +} + +int sysctl_write_full(const char *property, const char *value, Hashmap **shadow) { char *p; + int r; assert(property); assert(value); @@ -58,6 +89,10 @@ int sysctl_write(const char *property, const char *value) { log_debug("Setting '%s' to '%s'", p, value); + r = shadow_update(shadow, p, value); + if (r < 0) + return r; + return write_string_file(p, value, WRITE_STRING_FILE_VERIFY_ON_FAILURE | WRITE_STRING_FILE_DISABLE_BUFFER | WRITE_STRING_FILE_SUPPRESS_REDUNDANT_VIRTUAL); } @@ -76,7 +111,7 @@ int sysctl_writef(const char *property, const char *format, ...) { return sysctl_write(property, v); } -int sysctl_write_ip_property(int af, const char *ifname, const char *property, const char *value) { +int sysctl_write_ip_property(int af, const char *ifname, const char *property, const char *value, Hashmap **shadow) { const char *p; assert(property); @@ -93,10 +128,10 @@ int sysctl_write_ip_property(int af, const char *ifname, const char *property, c } else p = strjoina("net/", af_to_ipv4_ipv6(af), "/", property); - return sysctl_write(p, value); + return sysctl_write_full(p, value, shadow); } -int sysctl_write_ip_neighbor_property(int af, const char *ifname, const char *property, const char *value) { +int sysctl_write_ip_neighbor_property(int af, const char *ifname, const char *property, const char *value, Hashmap **shadow) { const char *p; assert(property); @@ -113,7 +148,7 @@ int sysctl_write_ip_neighbor_property(int af, const char *ifname, const char *pr } else p = strjoina("net/", af_to_ipv4_ipv6(af), "/neigh/default/", property); - return sysctl_write(p, value); + return sysctl_write_full(p, value, shadow); } int sysctl_read(const char *property, char **ret) { diff --git a/src/basic/sysctl-util.h b/src/basic/sysctl-util.h index 2bf5491703..041292f693 100644 --- a/src/basic/sysctl-util.h +++ b/src/basic/sysctl-util.h @@ -10,27 +10,30 @@ char* sysctl_normalize(char *s); int sysctl_read(const char *property, char **value); -int sysctl_write(const char *property, const char *value); +int sysctl_write_full(const char *property, const char *value, Hashmap **shadow); int sysctl_writef(const char *property, const char *format, ...) _printf_(2, 3); +static inline int sysctl_write(const char *property, const char *value) { + return sysctl_write_full(property, value, NULL); +} int sysctl_read_ip_property(int af, const char *ifname, const char *property, char **ret); -int sysctl_write_ip_property(int af, const char *ifname, const char *property, const char *value); -static inline int sysctl_write_ip_property_boolean(int af, const char *ifname, const char *property, bool value) { - return sysctl_write_ip_property(af, ifname, property, one_zero(value)); +int sysctl_write_ip_property(int af, const char *ifname, const char *property, const char *value, Hashmap **shadow); +static inline int sysctl_write_ip_property_boolean(int af, const char *ifname, const char *property, bool value, Hashmap **shadow) { + return sysctl_write_ip_property(af, ifname, property, one_zero(value), shadow); } -int sysctl_write_ip_neighbor_property(int af, const char *ifname, const char *property, const char *value); -static inline int sysctl_write_ip_neighbor_property_uint32(int af, const char *ifname, const char *property, uint32_t value) { +int sysctl_write_ip_neighbor_property(int af, const char *ifname, const char *property, const char *value, Hashmap **shadow); +static inline int sysctl_write_ip_neighbor_property_uint32(int af, const char *ifname, const char *property, uint32_t value, Hashmap **shadow) { char buf[DECIMAL_STR_MAX(uint32_t)]; xsprintf(buf, "%u", value); - return sysctl_write_ip_neighbor_property(af, ifname, property, buf); + return sysctl_write_ip_neighbor_property(af, ifname, property, buf, shadow); } #define DEFINE_SYSCTL_WRITE_IP_PROPERTY(name, type, format) \ - static inline int sysctl_write_ip_property_##name(int af, const char *ifname, const char *property, type value) { \ + static inline int sysctl_write_ip_property_##name(int af, const char *ifname, const char *property, type value, Hashmap **shadow) { \ char buf[DECIMAL_STR_MAX(type)]; \ xsprintf(buf, format, value); \ - return sysctl_write_ip_property(af, ifname, property, buf); \ + return sysctl_write_ip_property(af, ifname, property, buf, shadow); \ } DEFINE_SYSCTL_WRITE_IP_PROPERTY(int, int, "%i"); diff --git a/src/network/networkd-ipv6ll.c b/src/network/networkd-ipv6ll.c index cd23cc94aa..66705e6a79 100644 --- a/src/network/networkd-ipv6ll.c +++ b/src/network/networkd-ipv6ll.c @@ -219,7 +219,7 @@ int link_set_ipv6ll_stable_secret(Link *link) { } return sysctl_write_ip_property(AF_INET6, link->ifname, "stable_secret", - IN6_ADDR_TO_STRING(&a)); + IN6_ADDR_TO_STRING(&a), NULL); } int link_set_ipv6ll_addrgen_mode(Link *link, IPv6LinkLocalAddressGenMode mode) { @@ -229,7 +229,7 @@ int link_set_ipv6ll_addrgen_mode(Link *link, IPv6LinkLocalAddressGenMode mode) { if (mode == link->ipv6ll_address_gen_mode) return 0; - return sysctl_write_ip_property_uint32(AF_INET6, link->ifname, "addr_gen_mode", mode); + return sysctl_write_ip_property_uint32(AF_INET6, link->ifname, "addr_gen_mode", mode, NULL); } static const char* const ipv6_link_local_address_gen_mode_table[_IPV6_LINK_LOCAL_ADDRESS_GEN_MODE_MAX] = { diff --git a/src/network/networkd-ndisc.c b/src/network/networkd-ndisc.c index f44f03365c..253ca585aa 100644 --- a/src/network/networkd-ndisc.c +++ b/src/network/networkd-ndisc.c @@ -986,7 +986,7 @@ static int ndisc_router_process_reachable_time(Link *link, sd_ndisc_router *rt) } /* Set the reachable time for Neighbor Solicitations. */ - r = sysctl_write_ip_neighbor_property_uint32(AF_INET6, link->ifname, "base_reachable_time_ms", (uint32_t) msec); + r = sysctl_write_ip_neighbor_property_uint32(AF_INET6, link->ifname, "base_reachable_time_ms", (uint32_t) msec, NULL); if (r < 0) log_link_warning_errno(link, r, "Failed to apply neighbor reachable time (%"PRIu64"), ignoring: %m", msec); @@ -1019,7 +1019,7 @@ static int ndisc_router_process_retransmission_time(Link *link, sd_ndisc_router } /* Set the retransmission time for Neighbor Solicitations. */ - r = sysctl_write_ip_neighbor_property_uint32(AF_INET6, link->ifname, "retrans_time_ms", (uint32_t) msec); + r = sysctl_write_ip_neighbor_property_uint32(AF_INET6, link->ifname, "retrans_time_ms", (uint32_t) msec, NULL); if (r < 0) log_link_warning_errno(link, r, "Failed to apply neighbor retransmission time (%"PRIu64"), ignoring: %m", msec); @@ -1054,7 +1054,7 @@ static int ndisc_router_process_hop_limit(Link *link, sd_ndisc_router *rt) { if (hop_limit <= 0) return 0; - r = sysctl_write_ip_property_uint32(AF_INET6, link->ifname, "hop_limit", (uint32_t) hop_limit); + r = sysctl_write_ip_property_uint32(AF_INET6, link->ifname, "hop_limit", (uint32_t) hop_limit, NULL); if (r < 0) log_link_warning_errno(link, r, "Failed to apply hop_limit (%u), ignoring: %m", hop_limit); diff --git a/src/network/networkd-sysctl.c b/src/network/networkd-sysctl.c index 2027a29f27..23c2878359 100644 --- a/src/network/networkd-sysctl.c +++ b/src/network/networkd-sysctl.c @@ -30,13 +30,13 @@ static void manager_set_ip_forwarding(Manager *manager, int family) { return; /* keep */ /* First, set the default value. */ - r = sysctl_write_ip_property_boolean(family, "default", "forwarding", t); + r = sysctl_write_ip_property_boolean(family, "default", "forwarding", t, NULL); if (r < 0) log_warning_errno(r, "Failed to %s the default %s forwarding: %m", enable_disable(t), af_to_ipv4_ipv6(family)); /* Then, set the value to all interfaces. */ - r = sysctl_write_ip_property_boolean(family, "all", "forwarding", t); + r = sysctl_write_ip_property_boolean(family, "all", "forwarding", t, NULL); if (r < 0) log_warning_errno(r, "Failed to %s %s forwarding for all interfaces: %m", enable_disable(t), af_to_ipv4_ipv6(family)); @@ -80,7 +80,7 @@ static int link_update_ipv6_sysctl(Link *link) { if (!link_ipv6_enabled(link)) return 0; - return sysctl_write_ip_property_boolean(AF_INET6, link->ifname, "disable_ipv6", false); + return sysctl_write_ip_property_boolean(AF_INET6, link->ifname, "disable_ipv6", false, NULL); } static int link_set_proxy_arp(Link *link) { @@ -92,7 +92,7 @@ static int link_set_proxy_arp(Link *link) { if (link->network->proxy_arp < 0) return 0; - return sysctl_write_ip_property_boolean(AF_INET, link->ifname, "proxy_arp", link->network->proxy_arp > 0); + return sysctl_write_ip_property_boolean(AF_INET, link->ifname, "proxy_arp", link->network->proxy_arp > 0, NULL); } static int link_set_proxy_arp_pvlan(Link *link) { @@ -104,7 +104,7 @@ static int link_set_proxy_arp_pvlan(Link *link) { if (link->network->proxy_arp_pvlan < 0) return 0; - return sysctl_write_ip_property_boolean(AF_INET, link->ifname, "proxy_arp_pvlan", link->network->proxy_arp_pvlan > 0); + return sysctl_write_ip_property_boolean(AF_INET, link->ifname, "proxy_arp_pvlan", link->network->proxy_arp_pvlan > 0, NULL); } int link_get_ip_forwarding(Link *link, int family) { @@ -145,7 +145,7 @@ static int link_set_ip_forwarding_impl(Link *link, int family) { if (t < 0) return 0; /* keep */ - r = sysctl_write_ip_property_boolean(family, link->ifname, "forwarding", t); + r = sysctl_write_ip_property_boolean(family, link->ifname, "forwarding", t, NULL); if (r < 0) return log_link_warning_errno(link, r, "Failed to %s %s forwarding, ignoring: %m", enable_disable(t), af_to_ipv4_ipv6(family)); @@ -221,7 +221,7 @@ static int link_set_ipv4_rp_filter(Link *link) { if (link->network->ipv4_rp_filter < 0) return 0; - return sysctl_write_ip_property_int(AF_INET, link->ifname, "rp_filter", link->network->ipv4_rp_filter); + return sysctl_write_ip_property_int(AF_INET, link->ifname, "rp_filter", link->network->ipv4_rp_filter, NULL); } static int link_set_ipv6_privacy_extensions(Link *link) { @@ -241,7 +241,7 @@ static int link_set_ipv6_privacy_extensions(Link *link) { if (val == IPV6_PRIVACY_EXTENSIONS_KERNEL) return 0; - return sysctl_write_ip_property_int(AF_INET6, link->ifname, "use_tempaddr", (int) val); + return sysctl_write_ip_property_int(AF_INET6, link->ifname, "use_tempaddr", (int) val, NULL); } static int link_set_ipv6_accept_ra(Link *link) { @@ -250,7 +250,7 @@ static int link_set_ipv6_accept_ra(Link *link) { if (!link_is_configured_for_family(link, AF_INET6)) return 0; - return sysctl_write_ip_property(AF_INET6, link->ifname, "accept_ra", "0"); + return sysctl_write_ip_property(AF_INET6, link->ifname, "accept_ra", "0", NULL); } static int link_set_ipv6_dad_transmits(Link *link) { @@ -262,7 +262,7 @@ static int link_set_ipv6_dad_transmits(Link *link) { if (link->network->ipv6_dad_transmits < 0) return 0; - return sysctl_write_ip_property_int(AF_INET6, link->ifname, "dad_transmits", link->network->ipv6_dad_transmits); + return sysctl_write_ip_property_int(AF_INET6, link->ifname, "dad_transmits", link->network->ipv6_dad_transmits, NULL); } static int link_set_ipv6_hop_limit(Link *link) { @@ -274,7 +274,7 @@ static int link_set_ipv6_hop_limit(Link *link) { if (link->network->ipv6_hop_limit <= 0) return 0; - return sysctl_write_ip_property_int(AF_INET6, link->ifname, "hop_limit", link->network->ipv6_hop_limit); + return sysctl_write_ip_property_int(AF_INET6, link->ifname, "hop_limit", link->network->ipv6_hop_limit, NULL); } static int link_set_ipv6_retransmission_time(Link *link) { @@ -292,7 +292,7 @@ static int link_set_ipv6_retransmission_time(Link *link) { if (retrans_time_ms <= 0 || retrans_time_ms > UINT32_MAX) return 0; - return sysctl_write_ip_neighbor_property_uint32(AF_INET6, link->ifname, "retrans_time_ms", retrans_time_ms); + return sysctl_write_ip_neighbor_property_uint32(AF_INET6, link->ifname, "retrans_time_ms", retrans_time_ms, NULL); } static int link_set_ipv6_proxy_ndp(Link *link) { @@ -308,7 +308,7 @@ static int link_set_ipv6_proxy_ndp(Link *link) { else v = !set_isempty(link->network->ipv6_proxy_ndp_addresses); - return sysctl_write_ip_property_boolean(AF_INET6, link->ifname, "proxy_ndp", v); + return sysctl_write_ip_property_boolean(AF_INET6, link->ifname, "proxy_ndp", v, NULL); } int link_set_ipv6_mtu(Link *link, int log_level) { @@ -335,7 +335,7 @@ int link_set_ipv6_mtu(Link *link, int log_level) { mtu = link->mtu; } - return sysctl_write_ip_property_uint32(AF_INET6, link->ifname, "mtu", mtu); + return sysctl_write_ip_property_uint32(AF_INET6, link->ifname, "mtu", mtu, NULL); } static int link_set_ipv4_accept_local(Link *link) { @@ -347,7 +347,7 @@ static int link_set_ipv4_accept_local(Link *link) { if (link->network->ipv4_accept_local < 0) return 0; - return sysctl_write_ip_property_boolean(AF_INET, link->ifname, "accept_local", link->network->ipv4_accept_local > 0); + return sysctl_write_ip_property_boolean(AF_INET, link->ifname, "accept_local", link->network->ipv4_accept_local > 0, NULL); } static int link_set_ipv4_route_localnet(Link *link) { @@ -359,7 +359,7 @@ static int link_set_ipv4_route_localnet(Link *link) { if (link->network->ipv4_route_localnet < 0) return 0; - return sysctl_write_ip_property_boolean(AF_INET, link->ifname, "route_localnet", link->network->ipv4_route_localnet > 0); + return sysctl_write_ip_property_boolean(AF_INET, link->ifname, "route_localnet", link->network->ipv4_route_localnet > 0, NULL); } static int link_set_ipv4_promote_secondaries(Link *link) { @@ -373,7 +373,7 @@ static int link_set_ipv4_promote_secondaries(Link *link) { * otherwise. The way systemd-networkd works is that the new IP of a lease is added as a * secondary IP and when the primary one expires it relies on the kernel to promote the * secondary IP. See also https://github.com/systemd/systemd/issues/7163 */ - return sysctl_write_ip_property_boolean(AF_INET, link->ifname, "promote_secondaries", true); + return sysctl_write_ip_property_boolean(AF_INET, link->ifname, "promote_secondaries", true, NULL); } int link_set_sysctl(Link *link) { diff --git a/src/test/test-sysctl-util.c b/src/test/test-sysctl-util.c index e94099605c..83d6c9036c 100644 --- a/src/test/test-sysctl-util.c +++ b/src/test/test-sysctl-util.c @@ -53,14 +53,14 @@ TEST(sysctl_read) { assert_se(sysctl_read_ip_property(AF_INET, "lo", "forwarding", &s)); assert_se(STR_IN_SET(s, "0", "1")); - r = sysctl_write_ip_property(AF_INET, "lo", "forwarding", s); + r = sysctl_write_ip_property(AF_INET, "lo", "forwarding", s, NULL); assert_se(r >= 0 || ERRNO_IS_PRIVILEGE(r) || r == -EROFS); s = mfree(s); assert_se(sysctl_read_ip_property(AF_INET, NULL, "ip_forward", &s)); assert_se(STR_IN_SET(s, "0", "1")); - r = sysctl_write_ip_property(AF_INET, NULL, "ip_forward", s); + r = sysctl_write_ip_property(AF_INET, NULL, "ip_forward", s, NULL); assert_se(r >= 0 || ERRNO_IS_PRIVILEGE(r) || r == -EROFS); s = mfree(s); -- cgit v1.2.3 From 64629617b6abbe3665a7f886f068c9e4f3b366a4 Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Mon, 1 Jul 2024 21:58:30 +0200 Subject: store the sysctls set by networkd networkd set several sysctl to set the network configuration. Save their value so we can check is other processes change them. --- src/network/networkd-ipv6ll.c | 7 ++++-- src/network/networkd-manager.c | 2 ++ src/network/networkd-manager.h | 1 + src/network/networkd-ndisc.c | 9 +++++--- src/network/networkd-sysctl.c | 48 +++++++++++++++++++++++++++--------------- 5 files changed, 45 insertions(+), 22 deletions(-) (limited to 'src') diff --git a/src/network/networkd-ipv6ll.c b/src/network/networkd-ipv6ll.c index 66705e6a79..0daf3ad8ab 100644 --- a/src/network/networkd-ipv6ll.c +++ b/src/network/networkd-ipv6ll.c @@ -7,6 +7,7 @@ #include "networkd-address.h" #include "networkd-ipv6ll.h" #include "networkd-link.h" +#include "networkd-manager.h" #include "networkd-network.h" #include "networkd-util.h" #include "socket-util.h" @@ -189,6 +190,7 @@ int link_set_ipv6ll_stable_secret(Link *link) { int r; assert(link); + assert(link->manager); assert(link->network); if (link->network->ipv6ll_address_gen_mode != IPV6_LINK_LOCAL_ADDRESSS_GEN_MODE_STABLE_PRIVACY) @@ -219,17 +221,18 @@ int link_set_ipv6ll_stable_secret(Link *link) { } return sysctl_write_ip_property(AF_INET6, link->ifname, "stable_secret", - IN6_ADDR_TO_STRING(&a), NULL); + IN6_ADDR_TO_STRING(&a), &link->manager->sysctl_shadow); } int link_set_ipv6ll_addrgen_mode(Link *link, IPv6LinkLocalAddressGenMode mode) { assert(link); + assert(link->manager); assert(mode >= 0 && mode < _IPV6_LINK_LOCAL_ADDRESS_GEN_MODE_MAX); if (mode == link->ipv6ll_address_gen_mode) return 0; - return sysctl_write_ip_property_uint32(AF_INET6, link->ifname, "addr_gen_mode", mode, NULL); + return sysctl_write_ip_property_uint32(AF_INET6, link->ifname, "addr_gen_mode", mode, &link->manager->sysctl_shadow); } static const char* const ipv6_link_local_address_gen_mode_table[_IPV6_LINK_LOCAL_ADDRESS_GEN_MODE_MAX] = { diff --git a/src/network/networkd-manager.c b/src/network/networkd-manager.c index 2c2956f465..3fdc73d914 100644 --- a/src/network/networkd-manager.c +++ b/src/network/networkd-manager.c @@ -620,6 +620,8 @@ Manager* manager_free(Manager *m) { HASHMAP_FOREACH(link, m->links_by_index) (void) link_stop_engines(link, true); + hashmap_free(m->sysctl_shadow); + m->request_queue = ordered_set_free(m->request_queue); m->remove_request_queue = ordered_set_free(m->remove_request_queue); diff --git a/src/network/networkd-manager.h b/src/network/networkd-manager.h index a70b3e708f..076cf5e3d6 100644 --- a/src/network/networkd-manager.h +++ b/src/network/networkd-manager.h @@ -122,6 +122,7 @@ struct Manager { /* sysctl */ int ip_forwarding[2]; + Hashmap *sysctl_shadow; }; int manager_new(Manager **ret, bool test_mode); diff --git a/src/network/networkd-ndisc.c b/src/network/networkd-ndisc.c index 253ca585aa..81835c06e5 100644 --- a/src/network/networkd-ndisc.c +++ b/src/network/networkd-ndisc.c @@ -965,6 +965,7 @@ static int ndisc_router_process_reachable_time(Link *link, sd_ndisc_router *rt) int r; assert(link); + assert(link->manager); assert(link->network); assert(rt); @@ -986,7 +987,7 @@ static int ndisc_router_process_reachable_time(Link *link, sd_ndisc_router *rt) } /* Set the reachable time for Neighbor Solicitations. */ - r = sysctl_write_ip_neighbor_property_uint32(AF_INET6, link->ifname, "base_reachable_time_ms", (uint32_t) msec, NULL); + r = sysctl_write_ip_neighbor_property_uint32(AF_INET6, link->ifname, "base_reachable_time_ms", (uint32_t) msec, &link->manager->sysctl_shadow); if (r < 0) log_link_warning_errno(link, r, "Failed to apply neighbor reachable time (%"PRIu64"), ignoring: %m", msec); @@ -998,6 +999,7 @@ static int ndisc_router_process_retransmission_time(Link *link, sd_ndisc_router int r; assert(link); + assert(link->manager); assert(link->network); assert(rt); @@ -1019,7 +1021,7 @@ static int ndisc_router_process_retransmission_time(Link *link, sd_ndisc_router } /* Set the retransmission time for Neighbor Solicitations. */ - r = sysctl_write_ip_neighbor_property_uint32(AF_INET6, link->ifname, "retrans_time_ms", (uint32_t) msec, NULL); + r = sysctl_write_ip_neighbor_property_uint32(AF_INET6, link->ifname, "retrans_time_ms", (uint32_t) msec, &link->manager->sysctl_shadow); if (r < 0) log_link_warning_errno(link, r, "Failed to apply neighbor retransmission time (%"PRIu64"), ignoring: %m", msec); @@ -1031,6 +1033,7 @@ static int ndisc_router_process_hop_limit(Link *link, sd_ndisc_router *rt) { int r; assert(link); + assert(link->manager); assert(link->network); assert(rt); @@ -1054,7 +1057,7 @@ static int ndisc_router_process_hop_limit(Link *link, sd_ndisc_router *rt) { if (hop_limit <= 0) return 0; - r = sysctl_write_ip_property_uint32(AF_INET6, link->ifname, "hop_limit", (uint32_t) hop_limit, NULL); + r = sysctl_write_ip_property_uint32(AF_INET6, link->ifname, "hop_limit", (uint32_t) hop_limit, &link->manager->sysctl_shadow); if (r < 0) log_link_warning_errno(link, r, "Failed to apply hop_limit (%u), ignoring: %m", hop_limit); diff --git a/src/network/networkd-sysctl.c b/src/network/networkd-sysctl.c index 23c2878359..62b0b12680 100644 --- a/src/network/networkd-sysctl.c +++ b/src/network/networkd-sysctl.c @@ -30,13 +30,13 @@ static void manager_set_ip_forwarding(Manager *manager, int family) { return; /* keep */ /* First, set the default value. */ - r = sysctl_write_ip_property_boolean(family, "default", "forwarding", t, NULL); + r = sysctl_write_ip_property_boolean(family, "default", "forwarding", t, &manager->sysctl_shadow); if (r < 0) log_warning_errno(r, "Failed to %s the default %s forwarding: %m", enable_disable(t), af_to_ipv4_ipv6(family)); /* Then, set the value to all interfaces. */ - r = sysctl_write_ip_property_boolean(family, "all", "forwarding", t, NULL); + r = sysctl_write_ip_property_boolean(family, "all", "forwarding", t, &manager->sysctl_shadow); if (r < 0) log_warning_errno(r, "Failed to %s %s forwarding for all interfaces: %m", enable_disable(t), af_to_ipv4_ipv6(family)); @@ -73,6 +73,7 @@ static bool link_is_configured_for_family(Link *link, int family) { static int link_update_ipv6_sysctl(Link *link) { assert(link); + assert(link->manager); if (!link_is_configured_for_family(link, AF_INET6)) return 0; @@ -80,11 +81,12 @@ static int link_update_ipv6_sysctl(Link *link) { if (!link_ipv6_enabled(link)) return 0; - return sysctl_write_ip_property_boolean(AF_INET6, link->ifname, "disable_ipv6", false, NULL); + return sysctl_write_ip_property_boolean(AF_INET6, link->ifname, "disable_ipv6", false, &link->manager->sysctl_shadow); } static int link_set_proxy_arp(Link *link) { assert(link); + assert(link->manager); if (!link_is_configured_for_family(link, AF_INET)) return 0; @@ -92,11 +94,12 @@ static int link_set_proxy_arp(Link *link) { if (link->network->proxy_arp < 0) return 0; - return sysctl_write_ip_property_boolean(AF_INET, link->ifname, "proxy_arp", link->network->proxy_arp > 0, NULL); + return sysctl_write_ip_property_boolean(AF_INET, link->ifname, "proxy_arp", link->network->proxy_arp > 0, &link->manager->sysctl_shadow); } static int link_set_proxy_arp_pvlan(Link *link) { assert(link); + assert(link->manager); if (!link_is_configured_for_family(link, AF_INET)) return 0; @@ -104,7 +107,7 @@ static int link_set_proxy_arp_pvlan(Link *link) { if (link->network->proxy_arp_pvlan < 0) return 0; - return sysctl_write_ip_property_boolean(AF_INET, link->ifname, "proxy_arp_pvlan", link->network->proxy_arp_pvlan > 0, NULL); + return sysctl_write_ip_property_boolean(AF_INET, link->ifname, "proxy_arp_pvlan", link->network->proxy_arp_pvlan > 0, &link->manager->sysctl_shadow); } int link_get_ip_forwarding(Link *link, int family) { @@ -136,6 +139,7 @@ static int link_set_ip_forwarding_impl(Link *link, int family) { int r, t; assert(link); + assert(link->manager); assert(IN_SET(family, AF_INET, AF_INET6)); if (!link_is_configured_for_family(link, family)) @@ -145,7 +149,7 @@ static int link_set_ip_forwarding_impl(Link *link, int family) { if (t < 0) return 0; /* keep */ - r = sysctl_write_ip_property_boolean(family, link->ifname, "forwarding", t, NULL); + r = sysctl_write_ip_property_boolean(family, link->ifname, "forwarding", t, &link->manager->sysctl_shadow); if (r < 0) return log_link_warning_errno(link, r, "Failed to %s %s forwarding, ignoring: %m", enable_disable(t), af_to_ipv4_ipv6(family)); @@ -214,6 +218,7 @@ static int link_set_ip_forwarding(Link *link, int family) { static int link_set_ipv4_rp_filter(Link *link) { assert(link); + assert(link->manager); if (!link_is_configured_for_family(link, AF_INET)) return 0; @@ -221,7 +226,7 @@ static int link_set_ipv4_rp_filter(Link *link) { if (link->network->ipv4_rp_filter < 0) return 0; - return sysctl_write_ip_property_int(AF_INET, link->ifname, "rp_filter", link->network->ipv4_rp_filter, NULL); + return sysctl_write_ip_property_int(AF_INET, link->ifname, "rp_filter", link->network->ipv4_rp_filter, &link->manager->sysctl_shadow); } static int link_set_ipv6_privacy_extensions(Link *link) { @@ -241,20 +246,22 @@ static int link_set_ipv6_privacy_extensions(Link *link) { if (val == IPV6_PRIVACY_EXTENSIONS_KERNEL) return 0; - return sysctl_write_ip_property_int(AF_INET6, link->ifname, "use_tempaddr", (int) val, NULL); + return sysctl_write_ip_property_int(AF_INET6, link->ifname, "use_tempaddr", (int) val, &link->manager->sysctl_shadow); } static int link_set_ipv6_accept_ra(Link *link) { assert(link); + assert(link->manager); if (!link_is_configured_for_family(link, AF_INET6)) return 0; - return sysctl_write_ip_property(AF_INET6, link->ifname, "accept_ra", "0", NULL); + return sysctl_write_ip_property(AF_INET6, link->ifname, "accept_ra", "0", &link->manager->sysctl_shadow); } static int link_set_ipv6_dad_transmits(Link *link) { assert(link); + assert(link->manager); if (!link_is_configured_for_family(link, AF_INET6)) return 0; @@ -262,11 +269,12 @@ static int link_set_ipv6_dad_transmits(Link *link) { if (link->network->ipv6_dad_transmits < 0) return 0; - return sysctl_write_ip_property_int(AF_INET6, link->ifname, "dad_transmits", link->network->ipv6_dad_transmits, NULL); + return sysctl_write_ip_property_int(AF_INET6, link->ifname, "dad_transmits", link->network->ipv6_dad_transmits, &link->manager->sysctl_shadow); } static int link_set_ipv6_hop_limit(Link *link) { assert(link); + assert(link->manager); if (!link_is_configured_for_family(link, AF_INET6)) return 0; @@ -274,13 +282,14 @@ static int link_set_ipv6_hop_limit(Link *link) { if (link->network->ipv6_hop_limit <= 0) return 0; - return sysctl_write_ip_property_int(AF_INET6, link->ifname, "hop_limit", link->network->ipv6_hop_limit, NULL); + return sysctl_write_ip_property_int(AF_INET6, link->ifname, "hop_limit", link->network->ipv6_hop_limit, &link->manager->sysctl_shadow); } static int link_set_ipv6_retransmission_time(Link *link) { usec_t retrans_time_ms; assert(link); + assert(link->manager); if (!link_is_configured_for_family(link, AF_INET6)) return 0; @@ -292,13 +301,14 @@ static int link_set_ipv6_retransmission_time(Link *link) { if (retrans_time_ms <= 0 || retrans_time_ms > UINT32_MAX) return 0; - return sysctl_write_ip_neighbor_property_uint32(AF_INET6, link->ifname, "retrans_time_ms", retrans_time_ms, NULL); + return sysctl_write_ip_neighbor_property_uint32(AF_INET6, link->ifname, "retrans_time_ms", retrans_time_ms, &link->manager->sysctl_shadow); } static int link_set_ipv6_proxy_ndp(Link *link) { bool v; assert(link); + assert(link->manager); if (!link_is_configured_for_family(link, AF_INET6)) return 0; @@ -308,13 +318,14 @@ static int link_set_ipv6_proxy_ndp(Link *link) { else v = !set_isempty(link->network->ipv6_proxy_ndp_addresses); - return sysctl_write_ip_property_boolean(AF_INET6, link->ifname, "proxy_ndp", v, NULL); + return sysctl_write_ip_property_boolean(AF_INET6, link->ifname, "proxy_ndp", v, &link->manager->sysctl_shadow); } int link_set_ipv6_mtu(Link *link, int log_level) { uint32_t mtu = 0; assert(link); + assert(link->manager); if (!link_is_configured_for_family(link, AF_INET6)) return 0; @@ -335,11 +346,12 @@ int link_set_ipv6_mtu(Link *link, int log_level) { mtu = link->mtu; } - return sysctl_write_ip_property_uint32(AF_INET6, link->ifname, "mtu", mtu, NULL); + return sysctl_write_ip_property_uint32(AF_INET6, link->ifname, "mtu", mtu, &link->manager->sysctl_shadow); } static int link_set_ipv4_accept_local(Link *link) { assert(link); + assert(link->manager); if (!link_is_configured_for_family(link, AF_INET)) return 0; @@ -347,11 +359,12 @@ static int link_set_ipv4_accept_local(Link *link) { if (link->network->ipv4_accept_local < 0) return 0; - return sysctl_write_ip_property_boolean(AF_INET, link->ifname, "accept_local", link->network->ipv4_accept_local > 0, NULL); + return sysctl_write_ip_property_boolean(AF_INET, link->ifname, "accept_local", link->network->ipv4_accept_local > 0, &link->manager->sysctl_shadow); } static int link_set_ipv4_route_localnet(Link *link) { assert(link); + assert(link->manager); if (!link_is_configured_for_family(link, AF_INET)) return 0; @@ -359,11 +372,12 @@ static int link_set_ipv4_route_localnet(Link *link) { if (link->network->ipv4_route_localnet < 0) return 0; - return sysctl_write_ip_property_boolean(AF_INET, link->ifname, "route_localnet", link->network->ipv4_route_localnet > 0, NULL); + return sysctl_write_ip_property_boolean(AF_INET, link->ifname, "route_localnet", link->network->ipv4_route_localnet > 0, &link->manager->sysctl_shadow); } static int link_set_ipv4_promote_secondaries(Link *link) { assert(link); + assert(link->manager); if (!link_is_configured_for_family(link, AF_INET)) return 0; @@ -373,7 +387,7 @@ static int link_set_ipv4_promote_secondaries(Link *link) { * otherwise. The way systemd-networkd works is that the new IP of a lease is added as a * secondary IP and when the primary one expires it relies on the kernel to promote the * secondary IP. See also https://github.com/systemd/systemd/issues/7163 */ - return sysctl_write_ip_property_boolean(AF_INET, link->ifname, "promote_secondaries", true, NULL); + return sysctl_write_ip_property_boolean(AF_INET, link->ifname, "promote_secondaries", true, &link->manager->sysctl_shadow); } int link_set_sysctl(Link *link) { -- cgit v1.2.3 From 6d9ef22acdeac4b429efb75164341233955484af Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Mon, 1 Jul 2024 21:58:30 +0200 Subject: emit a warning in networkd if managed sysctls are changed Monitor the sysctl set by networkd for writes, if a sysctl is overwritten with a different value than the one we set, emit a warning. Writes are detected with an eBPF program attached as BPF_CGROUP_SYSCTL which reports the sysctl writes only in net/. The eBPF program only reports sysctl writes from a different cgroup than networkd. To do this, it uses the `bpf_current_task_under_cgroup_proto()` helper, which will be available allowed in BPF_CGROUP_SYSCTL from kernel 6.12[1]. Loading a BPF_CGROUP_SYSCTL program requires the CAP_SYS_ADMIN capability, so drop it just after the program load, whether it loads successfully or not. Writes are logged but permitted, in future the functionality can be extended to also deny writes to managed sysctls. [1] https://lore.kernel.org/bpf/20240819162805.78235-3-technoboy85@gmail.com/ --- catalog/systemd.catalog.in | 9 + src/network/bpf/sysctl_monitor/meson.build | 25 +++ .../bpf/sysctl_monitor/sysctl-monitor-skel.h | 16 ++ .../bpf/sysctl_monitor/sysctl-monitor.bpf.c | 134 +++++++++++++++ .../bpf/sysctl_monitor/sysctl-write-event.h | 46 +++++ src/network/meson.build | 6 + src/network/networkd-link.c | 2 + src/network/networkd-manager.c | 16 ++ src/network/networkd-manager.h | 5 + src/network/networkd-sysctl.c | 191 +++++++++++++++++++++ src/network/networkd-sysctl.h | 10 ++ src/network/networkd.c | 4 +- src/systemd/sd-messages.h | 3 + units/systemd-networkd.service.in | 6 +- 14 files changed, 469 insertions(+), 4 deletions(-) create mode 100644 src/network/bpf/sysctl_monitor/meson.build create mode 100644 src/network/bpf/sysctl_monitor/sysctl-monitor-skel.h create mode 100644 src/network/bpf/sysctl_monitor/sysctl-monitor.bpf.c create mode 100644 src/network/bpf/sysctl_monitor/sysctl-write-event.h (limited to 'src') diff --git a/catalog/systemd.catalog.in b/catalog/systemd.catalog.in index 200c98eabe..0a12b7c3f7 100644 --- a/catalog/systemd.catalog.in +++ b/catalog/systemd.catalog.in @@ -794,3 +794,12 @@ the TPM. Automatic SRK enrollment on TPMs in such scenarios is not supported. In order to unset the PIN/password protection on the owner hierarchy issue a command like the following: 'tpm2_changeauth -c o -p ""'. + +-- 9cf56b8baf9546cf9478783a8de42113 +Subject: A foreign process changed a sysctl we manage +Defined-By: systemd +Support: %SUPPORT_URL% + +A sysctl handle under /proc/sys/net, which is managed by systemd-networkd, has been changed by another process. +The event is raised only if the written value differs from the current one. +The program name, the written value, the previous value, and the value initially set by networkd have been logged. diff --git a/src/network/bpf/sysctl_monitor/meson.build b/src/network/bpf/sysctl_monitor/meson.build new file mode 100644 index 0000000000..ac8e81e927 --- /dev/null +++ b/src/network/bpf/sysctl_monitor/meson.build @@ -0,0 +1,25 @@ +# SPDX-License-Identifier: LGPL-2.1-or-later + +if conf.get('HAVE_VMLINUX_H') != 1 + subdir_done() +endif + +sysctl_monitor_bpf_o_unstripped = custom_target( + 'sysctl-monitor.bpf.unstripped.o', + input : 'sysctl-monitor.bpf.c', + output : 'sysctl-monitor.bpf.unstripped.o', + command : bpf_o_unstripped_cmd, + depends : vmlinux_h_dependency) + +sysctl_monitor_bpf_o = custom_target( + 'sysctl-monitor.bpf.o', + input : sysctl_monitor_bpf_o_unstripped, + output : 'sysctl-monitor.bpf.o', + command : bpf_o_cmd) + +sysctl_monitor_skel_h = custom_target( + 'sysctl-monitor.skel.h', + input : sysctl_monitor_bpf_o, + output : 'sysctl-monitor.skel.h', + command : skel_h_cmd, + capture : true) diff --git a/src/network/bpf/sysctl_monitor/sysctl-monitor-skel.h b/src/network/bpf/sysctl_monitor/sysctl-monitor-skel.h new file mode 100644 index 0000000000..d002414521 --- /dev/null +++ b/src/network/bpf/sysctl_monitor/sysctl-monitor-skel.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +/* The SPDX header above is actually correct in claiming this was + * LGPL-2.1-or-later, because it is. Since the kernel doesn't consider that + * compatible with GPL we will claim this to be GPL however, which should be + * fine given that LGPL-2.1-or-later downgrades to GPL if needed. + */ + +#include "bpf-dlopen.h" + +/* libbpf is used via dlopen(), so rename symbols */ +#define bpf_object__destroy_skeleton sym_bpf_object__destroy_skeleton +#define bpf_object__load_skeleton sym_bpf_object__load_skeleton +#define bpf_object__open_skeleton sym_bpf_object__open_skeleton + +#include "bpf/sysctl_monitor/sysctl-monitor.skel.h" diff --git a/src/network/bpf/sysctl_monitor/sysctl-monitor.bpf.c b/src/network/bpf/sysctl_monitor/sysctl-monitor.bpf.c new file mode 100644 index 0000000000..ef154931ce --- /dev/null +++ b/src/network/bpf/sysctl_monitor/sysctl-monitor.bpf.c @@ -0,0 +1,134 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "vmlinux.h" + +#include + +#include "sysctl-write-event.h" + +struct { + __uint(type, BPF_MAP_TYPE_CGROUP_ARRAY); + __type(key, u32); + __type(value, u32); + __uint(max_entries, 1); +} cgroup_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(max_entries, 256 * 1024); +} written_sysctls SEC(".maps"); + +static bool my_streq(const char *s1, const char *s2, size_t l) { + for (size_t i = 0; i < l; i++) { + if (s1[i] != s2[i]) + return false; + if (s1[i] == 0) + return true; + } + return true; +} + +struct str { + char *s; + size_t l; +}; + +static long cut_last(u32 i, struct str *str) { + char *s; + + i = str->l - i - 1; + s = str->s + i; + + /* Sanity check for the preverifier */ + if (i >= str->l) + return 1; + + if (*s == 0) + return 0; + + if (*s == '\n' || *s == '\r' || *s == ' ' || *s == '\t') { + *s = 0; + + return 0; + } + + return 1; +} + +/* Cut off trailing whitespace and newlines */ +static void chop(char *s, size_t l) { + struct str str = { s, l }; + + bpf_loop(l, cut_last, &str, 0); +} + +SEC("cgroup/sysctl") +int sysctl_monitor(struct bpf_sysctl *ctx) { + int r; + + /* Ignore events generated by us */ + if (bpf_current_task_under_cgroup(&cgroup_map, 0)) + return 1; + + /* Allow reads */ + if (!ctx->write) + return 1; + + /* Declare the struct without contextually initializing it. + * This avoid zero-filling the struct, which would be a waste of + * resource and code size. Since we're sending an event even on failure, + * truncate the strings to zero size, in case we don't populate them. */ + struct sysctl_write_event we; + we.version = 1; + we.errorcode = 0; + we.path[0] = 0; + we.comm[0] = 0; + we.current[0] = 0; + we.newvalue[0] = 0; + + /* Set the simple values first */ + we.pid = bpf_get_current_pid_tgid() >> 32; + we.cgroup_id = bpf_get_current_cgroup_id(); + + /* Only monitor /proc/sys/net/ */ + r = bpf_sysctl_get_name(ctx, we.path, sizeof(we.path), 0); + if (r < 0) { + we.errorcode = r; + goto send_event; + } + + if (bpf_strncmp(we.path, 4, "net/") != 0) + return 1; + + r = bpf_get_current_comm(we.comm, sizeof(we.comm)); + if (r < 0) { + we.errorcode = r; + goto send_event; + } + + r = bpf_sysctl_get_current_value(ctx, we.current, sizeof(we.current)); + if (r < 0) { + we.errorcode = r; + goto send_event; + } + + r = bpf_sysctl_get_new_value(ctx, we.newvalue, sizeof(we.newvalue)); + if (r < 0) { + we.errorcode = r; + goto send_event; + } + + /* Both the kernel and userspace applications add a newline at the end, + * remove it from both strings */ + chop(we.current, sizeof(we.current)); + chop(we.newvalue, sizeof(we.newvalue)); + +send_event: + /* If new value differs or we encountered an error, send the event */ + if (r < 0 || !my_streq(we.current, we.newvalue, sizeof(we.current))) + bpf_ringbuf_output(&written_sysctls, &we, sizeof(we), 0); + + return 1; +} + +char _license[] SEC("license") = "GPL"; diff --git a/src/network/bpf/sysctl_monitor/sysctl-write-event.h b/src/network/bpf/sysctl_monitor/sysctl-write-event.h new file mode 100644 index 0000000000..77b71fb4f9 --- /dev/null +++ b/src/network/bpf/sysctl_monitor/sysctl-write-event.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#pragma once + +#ifndef TASK_COMM_LEN +#define TASK_COMM_LEN 16 +#endif + +/* It would be nice to size these members to bigger values, but the stack + * in BPF programs is limited to 512 bytes, and allocating bigger structures + * leads to this compile time error: + * error: Looks like the BPF stack limit is exceeded. + * Please move large on stack variables into BPF per-cpu array map. + * For non-kernel uses, the stack can be increased using -mllvm -bpf-stack-size. */ +struct sysctl_write_event { + /* Used to track changes in the struct layout */ + int version; + + /* Error code returned to userspace to handle eventual failures. */ + int errorcode; + + /* The PID of the process which is writing the sysctl. */ + pid_t pid; + + /* The cgroup id of the process. */ + uint64_t cgroup_id; + + /* The name of the binary. */ + char comm[TASK_COMM_LEN]; + + /* The path of the sysctl, relative to /proc/sys/. + * The longest path observed is 64 bytes: + * net/ipv4/conf/123456789012345/igmpv3_unsolicited_report_interval + * so set it to 100 gives us lot of headroom */ + char path[100]; + + /* The value of the sysctl just before the write. + * The longest value observed is net.core.netdev_rss_key which + * contains 155 bytes, so set it to 160 to have some headroom + * even in this corner case. */ + char current[160]; + + /* The new value being written into the sysctl. + * same sizing as 'current' */ + char newvalue[160]; +}; diff --git a/src/network/meson.build b/src/network/meson.build index 54cf694aeb..73c48e06af 100644 --- a/src/network/meson.build +++ b/src/network/meson.build @@ -1,5 +1,7 @@ # SPDX-License-Identifier: LGPL-2.1-or-later +subdir('bpf/sysctl_monitor') + sources = files( 'netdev/bareudp.c', 'netdev/batadv.c', @@ -140,6 +142,10 @@ network_generator_sources = files( networkd_network_gperf_gperf = files('networkd-network-gperf.gperf') networkd_netdev_gperf_gperf = files('netdev/netdev-gperf.gperf') +if conf.get('HAVE_VMLINUX_H') == 1 + sources += sysctl_monitor_skel_h +endif + sources += custom_target( 'networkd-gperf.c', input : 'networkd-gperf.gperf', diff --git a/src/network/networkd-link.c b/src/network/networkd-link.c index 0eeab6e8b0..303007a9de 100644 --- a/src/network/networkd-link.c +++ b/src/network/networkd-link.c @@ -252,6 +252,8 @@ static void link_free_engines(Link *link) { static Link *link_free(Link *link) { assert(link); + (void) sysctl_clear_link_shadows(link); + link_ntp_settings_clear(link); link_dns_settings_clear(link); diff --git a/src/network/networkd-manager.c b/src/network/networkd-manager.c index 3fdc73d914..6063834a20 100644 --- a/src/network/networkd-manager.c +++ b/src/network/networkd-manager.c @@ -16,6 +16,7 @@ #include "bus-log-control-api.h" #include "bus-polkit.h" #include "bus-util.h" +#include "capability-util.h" #include "common-signal.h" #include "conf-parser.h" #include "constants.h" @@ -603,6 +604,7 @@ int manager_new(Manager **ret, bool test_mode) { .duid_product_uuid.type = DUID_TYPE_UUID, .dhcp_server_persist_leases = true, .ip_forwarding = { -1, -1, }, + .cgroup_fd = -EBADF, }; *ret = TAKE_PTR(m); @@ -615,6 +617,8 @@ Manager* manager_free(Manager *m) { if (!m) return NULL; + sysctl_remove_monitor(m); + free(m->state_file); HASHMAP_FOREACH(link, m->links_by_index) @@ -694,6 +698,18 @@ int manager_start(Manager *m) { assert(m); + (void) sysctl_add_monitor(m); + + /* Loading BPF programs requires CAP_SYS_ADMIN and CAP_BPF. + * Drop the capabilities here, regardless if the load succeeds or not. */ + r = drop_capability(CAP_SYS_ADMIN); + if (r < 0) + log_warning_errno(r, "Failed to drop CAP_SYS_ADMIN: %m, ignoring."); + + r = drop_capability(CAP_BPF); + if (r < 0) + log_warning_errno(r, "Failed to drop CAP_BPF: %m, ignoring."); + manager_set_sysctl(m); r = manager_request_static_address_labels(m); diff --git a/src/network/networkd-manager.h b/src/network/networkd-manager.h index 076cf5e3d6..5a0decced2 100644 --- a/src/network/networkd-manager.h +++ b/src/network/networkd-manager.h @@ -123,6 +123,11 @@ struct Manager { /* sysctl */ int ip_forwarding[2]; Hashmap *sysctl_shadow; + sd_event_source *sysctl_event_source; + struct ring_buffer *sysctl_buffer; + struct sysctl_monitor_bpf *sysctl_skel; + struct bpf_link *sysctl_link; + int cgroup_fd; }; int manager_new(Manager **ret, bool test_mode); diff --git a/src/network/networkd-sysctl.c b/src/network/networkd-sysctl.c index 62b0b12680..b85f0ca568 100644 --- a/src/network/networkd-sysctl.c +++ b/src/network/networkd-sysctl.c @@ -4,7 +4,11 @@ #include #include +#include "sd-messages.h" + #include "af-list.h" +#include "cgroup-util.h" +#include "fd-util.h" #include "missing_network.h" #include "networkd-link.h" #include "networkd-lldp-tx.h" @@ -12,10 +16,197 @@ #include "networkd-ndisc.h" #include "networkd-network.h" #include "networkd-sysctl.h" +#include "path-util.h" #include "socket-util.h" #include "string-table.h" #include "sysctl-util.h" +#if HAVE_VMLINUX_H + +#include "bpf-link.h" + +#include "bpf/sysctl_monitor/sysctl-monitor-skel.h" +#include "bpf/sysctl_monitor/sysctl-write-event.h" + +static struct sysctl_monitor_bpf *sysctl_monitor_bpf_free(struct sysctl_monitor_bpf *obj) { + sysctl_monitor_bpf__destroy(obj); + return NULL; +} + +static struct ring_buffer *rb_free(struct ring_buffer *rb) { + sym_ring_buffer__free(rb); + return NULL; +} + +DEFINE_TRIVIAL_CLEANUP_FUNC(struct sysctl_monitor_bpf *, sysctl_monitor_bpf_free); +DEFINE_TRIVIAL_CLEANUP_FUNC(struct ring_buffer *, rb_free); + +static int sysctl_event_handler(void *ctx, void *data, size_t data_sz) { + struct sysctl_write_event *we = ASSERT_PTR(data); + Hashmap **sysctl_shadow = ASSERT_PTR(ctx); + _cleanup_free_ char *path = NULL; + char *value; + + /* Returning a negative value interrupts the ring buffer polling, + * so do it only in case of a fatal error like a version mismatch. */ + if (we->version != 1) + return log_warning_errno(SYNTHETIC_ERRNO(EINVAL), + "Unexpected sysctl event, disabling sysctl monitoring: %d", we->version); + + if (we->errorcode != 0) { + log_warning_errno(we->errorcode, "Sysctl monitor BPF returned error: %m"); + return 0; + } + + path = path_join("/proc/sys", we->path); + if (!path) { + log_oom(); + return 0; + } + + /* If we never managed this handle, ignore it. */ + value = hashmap_get(*sysctl_shadow, path); + if (!value) + return 0; + + if (!strneq(value, we->newvalue, sizeof(we->newvalue))) + log_struct(LOG_WARNING, + "MESSAGE_ID=" SD_MESSAGE_SYSCTL_CHANGED_STR, + "OBJECT_PID=%d", we->pid, + "OBJECT_COMM=%s", we->comm, + "SYSCTL=/proc/sys/%s", we->path, + "OLDVALUE=%s", we->current, + "NEWVALUE=%s", we->newvalue, + "OURVALUE=%s", value, + LOG_MESSAGE("Foreign process '%s[%d]' changed sysctl '/proc/sys/%s' from '%s' to '%s', conflicting with our setting to '%s'", + we->comm, we->pid, we->path, we->current, we->newvalue, value)); + + return 0; +} + +static int on_ringbuf_io(sd_event_source *s, int fd, uint32_t revents, void *userdata) { + struct ring_buffer *rb = ASSERT_PTR(userdata); + int r; + + r = sym_ring_buffer__poll(rb, /* timeout_msec= */ 0); + if (r < 0 && errno != EINTR) + log_error_errno(errno, "Error polling ring buffer: %m"); + + return 0; +} + +int sysctl_add_monitor(Manager *manager) { + _cleanup_(sysctl_monitor_bpf_freep) struct sysctl_monitor_bpf *obj = NULL; + _cleanup_(bpf_link_freep) struct bpf_link *sysctl_link = NULL; + _cleanup_(rb_freep) struct ring_buffer *sysctl_buffer = NULL; + _cleanup_close_ int cgroup_fd = -EBADF, rootcg = -EBADF; + _cleanup_free_ char *cgroup = NULL; + int idx = 0, r; + + assert(manager); + + r = dlopen_bpf(); + if (r < 0) { + log_info_errno(r, "sysctl monitor disabled, as BPF support is not available."); + return 0; + } + + r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 0, &cgroup); + if (r < 0) + return log_warning_errno(r, "Failed to get cgroup path, ignoring: %m."); + + rootcg = cg_path_open(SYSTEMD_CGROUP_CONTROLLER, "/"); + if (rootcg < 0) + return log_warning_errno(rootcg, "Failed to open cgroup, ignoring: %m."); + + obj = sysctl_monitor_bpf__open_and_load(); + if (!obj) { + log_info_errno(errno, "Unable to load sysctl monitor BPF program, ignoring: %m."); + return 0; + } + + cgroup_fd = cg_path_open(SYSTEMD_CGROUP_CONTROLLER, cgroup); + if (cgroup_fd < 0) + return log_warning_errno(cgroup_fd, "Failed to open cgroup: %m"); + + if (sym_bpf_map_update_elem(sym_bpf_map__fd(obj->maps.cgroup_map), &idx, &cgroup_fd, BPF_ANY)) + return log_warning_errno(errno, "Failed to update cgroup map: %m"); + + sysctl_link = sym_bpf_program__attach_cgroup(obj->progs.sysctl_monitor, rootcg); + r = bpf_get_error_translated(sysctl_link); + if (r < 0) { + log_info_errno(r, "Unable to attach sysctl monitor BPF program to cgroup, ignoring: %m."); + return 0; + } + + sysctl_buffer = sym_ring_buffer__new( + sym_bpf_map__fd(obj->maps.written_sysctls), + sysctl_event_handler, &manager->sysctl_shadow, NULL); + if (!sysctl_buffer) + return log_warning_errno(errno, "Failed to create ring buffer: %m"); + + r = sd_event_add_io(manager->event, &manager->sysctl_event_source, + sym_ring_buffer__epoll_fd(sysctl_buffer), EPOLLIN, on_ringbuf_io, sysctl_buffer); + if (r < 0) + return log_warning_errno(r, "Failed to watch sysctl event ringbuffer: %m"); + + manager->sysctl_link = TAKE_PTR(sysctl_link); + manager->sysctl_skel = TAKE_PTR(obj); + manager->sysctl_buffer = TAKE_PTR(sysctl_buffer); + manager->cgroup_fd = TAKE_FD(cgroup_fd); + + return 0; +} + +void sysctl_remove_monitor(Manager *manager) { + assert(manager); + + manager->sysctl_event_source = sd_event_source_disable_unref(manager->sysctl_event_source); + + if (manager->sysctl_buffer) { + sym_ring_buffer__free(manager->sysctl_buffer); + manager->sysctl_buffer = NULL; + } + + if (manager->sysctl_link) { + sym_bpf_link__destroy(manager->sysctl_link); + manager->sysctl_link = NULL; + } + + if (manager->sysctl_skel) { + sysctl_monitor_bpf__destroy(manager->sysctl_skel); + manager->sysctl_skel = NULL; + } + + manager->cgroup_fd = safe_close(manager->cgroup_fd); +} + +int sysctl_clear_link_shadows(Link *link) { + _cleanup_free_ char *ipv4 = NULL, *ipv6 = NULL; + char *key = NULL, *value = NULL; + + assert(link); + assert(link->manager); + + ipv4 = path_join("/proc/sys/net/ipv4/conf", link->ifname); + if (!ipv4) + return log_oom(); + + ipv6 = path_join("/proc/sys/net/ipv6/conf", link->ifname); + if (!ipv6) + return log_oom(); + + HASHMAP_FOREACH_KEY(value, key, link->manager->sysctl_shadow) + if (path_startswith(key, ipv4) || path_startswith(key, ipv6)) { + assert_se(hashmap_remove_value(link->manager->sysctl_shadow, key, value) == value); + free(key); + free(value); + } + + return 0; +} +#endif + static void manager_set_ip_forwarding(Manager *manager, int family) { int r, t; diff --git a/src/network/networkd-sysctl.h b/src/network/networkd-sysctl.h index d7a9b1f320..446b835555 100644 --- a/src/network/networkd-sysctl.h +++ b/src/network/networkd-sysctl.h @@ -27,6 +27,16 @@ typedef enum IPReversePathFilter { _IP_REVERSE_PATH_FILTER_INVALID = -EINVAL, } IPReversePathFilter; +#if HAVE_VMLINUX_H +int sysctl_add_monitor(Manager *manager); +void sysctl_remove_monitor(Manager *manager); +int sysctl_clear_link_shadows(Link *link); +#else +static inline int sysctl_add_monitor(Manager *manager) { return 0; } +static inline void sysctl_remove_monitor(Manager *manager) { } +static inline int sysctl_clear_link_shadows(Link *link) { return 0; } +#endif + void manager_set_sysctl(Manager *manager); int link_get_ip_forwarding(Link *link, int family); diff --git a/src/network/networkd.c b/src/network/networkd.c index 69a28647c8..2798cd8cf8 100644 --- a/src/network/networkd.c +++ b/src/network/networkd.c @@ -62,7 +62,9 @@ static int run(int argc, char *argv[]) { (1ULL << CAP_NET_ADMIN) | (1ULL << CAP_NET_BIND_SERVICE) | (1ULL << CAP_NET_BROADCAST) | - (1ULL << CAP_NET_RAW)); + (1ULL << CAP_NET_RAW) | + (1ULL << CAP_SYS_ADMIN) | + (1ULL << CAP_BPF)); if (r < 0) return log_error_errno(r, "Failed to drop privileges: %m"); } diff --git a/src/systemd/sd-messages.h b/src/systemd/sd-messages.h index f4f4e95b7f..441f4e6888 100644 --- a/src/systemd/sd-messages.h +++ b/src/systemd/sd-messages.h @@ -277,6 +277,9 @@ _SD_BEGIN_DECLARATIONS; #define SD_MESSAGE_SRK_ENROLLMENT_NEEDS_AUTHORIZATION SD_ID128_MAKE(ad,70,89,f9,28,ac,4f,7e,a0,0c,07,45,7d,47,ba,8a) #define SD_MESSAGE_SRK_ENROLLMENT_NEEDS_AUTHORIZATION_STR SD_ID128_MAKE_STR(ad,70,89,f9,28,ac,4f,7e,a0,0c,07,45,7d,47,ba,8a) +#define SD_MESSAGE_SYSCTL_CHANGED SD_ID128_MAKE(9c,f5,6b,8b,af,95,46,cf,94,78,78,3a,8d,e4,21,13) +#define SD_MESSAGE_SYSCTL_CHANGED_STR SD_ID128_MAKE_STR(9c,f5,6b,8b,af,95,46,cf,94,78,78,3a,8d,e4,21,13) + _SD_END_DECLARATIONS; #endif diff --git a/units/systemd-networkd.service.in b/units/systemd-networkd.service.in index 6141fdbb6d..cf81c7d841 100644 --- a/units/systemd-networkd.service.in +++ b/units/systemd-networkd.service.in @@ -20,9 +20,9 @@ Conflicts=shutdown.target initrd-switch-root.target Wants=systemd-networkd.socket network.target systemd-networkd-persistent-storage.service [Service] -AmbientCapabilities=CAP_NET_ADMIN CAP_NET_BIND_SERVICE CAP_NET_BROADCAST CAP_NET_RAW +AmbientCapabilities=CAP_NET_ADMIN CAP_NET_BIND_SERVICE CAP_NET_BROADCAST CAP_NET_RAW CAP_BPF CAP_SYS_ADMIN BusName=org.freedesktop.network1 -CapabilityBoundingSet=CAP_NET_ADMIN CAP_NET_BIND_SERVICE CAP_NET_BROADCAST CAP_NET_RAW +CapabilityBoundingSet=CAP_NET_ADMIN CAP_NET_BIND_SERVICE CAP_NET_BROADCAST CAP_NET_RAW CAP_BPF CAP_SYS_ADMIN DeviceAllow=char-* rw ExecStart=!!{{LIBEXECDIR}}/systemd-networkd FileDescriptorStoreMax=512 @@ -48,7 +48,7 @@ RuntimeDirectory=systemd/netif RuntimeDirectoryPreserve=yes SystemCallArchitectures=native SystemCallErrorNumber=EPERM -SystemCallFilter=@system-service +SystemCallFilter=@system-service bpf Type=notify-reload User=systemd-network {{SERVICE_WATCHDOG}} -- cgit v1.2.3