diff options
Diffstat (limited to 'src')
46 files changed, 1253 insertions, 94 deletions
diff --git a/src/basic/lock-util.c b/src/basic/lock-util.c index aef395d78e..db9905cb48 100644 --- a/src/basic/lock-util.c +++ b/src/basic/lock-util.c @@ -203,9 +203,9 @@ int lock_generic_with_timeout(int fd, LockType type, int operation, usec_t timeo assert(fd >= 0); - /* A version of lock_generic(), but with a time-out. We do this in a child process, since the kernel + /* A version of lock_generic(), but with a timeout. We do this in a child process, since the kernel * APIs natively don't support a timeout. We set a SIGALRM timer that will kill the child after the - * timeout is hit. Returns -ETIMEDOUT if the time-out is hit, and 0 on success. + * timeout is hit. Returns -ETIMEDOUT if the timeout is hit, and 0 on success. * * This only works for BSD and UNPOSIX locks, as only those are fd-bound, and hence can be acquired * from any process that has access to the fd. POSIX locks OTOH are process-bound, and hence if we'd diff --git a/src/basic/namespace-util.c b/src/basic/namespace-util.c index 16053ff2a9..a80ed32791 100644 --- a/src/basic/namespace-util.c +++ b/src/basic/namespace-util.c @@ -527,19 +527,19 @@ int is_idmapping_supported(const char *path) { return r; userns_fd = userns_acquire(uid_map, gid_map); - if (ERRNO_IS_NEG_NOT_SUPPORTED(userns_fd)) + if (ERRNO_IS_NEG_NOT_SUPPORTED(userns_fd) || ERRNO_IS_NEG_PRIVILEGE(userns_fd)) return false; if (userns_fd < 0) return log_debug_errno(userns_fd, "ID-mapping supported namespace acquire failed for '%s' : %m", path); dir_fd = RET_NERRNO(open(path, O_RDONLY | O_CLOEXEC | O_NOFOLLOW)); - if (ERRNO_IS_NEG_NOT_SUPPORTED(dir_fd) || dir_fd == -EINVAL) + if (ERRNO_IS_NEG_NOT_SUPPORTED(dir_fd)) return false; if (dir_fd < 0) return log_debug_errno(dir_fd, "ID-mapping supported open failed for '%s' : %m", path); mount_fd = RET_NERRNO(open_tree(dir_fd, "", AT_EMPTY_PATH | OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC)); - if (ERRNO_IS_NEG_NOT_SUPPORTED(mount_fd) || mount_fd == -EINVAL) + if (ERRNO_IS_NEG_NOT_SUPPORTED(mount_fd) || ERRNO_IS_NEG_PRIVILEGE(mount_fd) || mount_fd == -EINVAL) return false; if (mount_fd < 0) return log_debug_errno(mount_fd, "ID-mapping supported open_tree failed for '%s' : %m", path); @@ -549,7 +549,7 @@ int is_idmapping_supported(const char *path) { .attr_set = MOUNT_ATTR_IDMAP | MOUNT_ATTR_NOSUID | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RDONLY | MOUNT_ATTR_NODEV, .userns_fd = userns_fd, }, sizeof(struct mount_attr))); - if (ERRNO_IS_NEG_NOT_SUPPORTED(r) || r == -EINVAL || r == -EPERM) + if (ERRNO_IS_NEG_NOT_SUPPORTED(r) || ERRNO_IS_NEG_PRIVILEGE(r) || r == -EINVAL) return false; if (r < 0) return log_debug_errno(r, "ID-mapping supported setattr failed for '%s' : %m", path); diff --git a/src/basic/process-util.c b/src/basic/process-util.c index a85a1b35f0..75bc65652e 100644 --- a/src/basic/process-util.c +++ b/src/basic/process-util.c @@ -1521,11 +1521,12 @@ int safe_fork_full( } } - if ((flags & (FORK_NEW_MOUNTNS|FORK_NEW_USERNS|FORK_NEW_NETNS)) != 0) + if ((flags & (FORK_NEW_MOUNTNS|FORK_NEW_USERNS|FORK_NEW_NETNS|FORK_NEW_PIDNS)) != 0) pid = raw_clone(SIGCHLD| (FLAGS_SET(flags, FORK_NEW_MOUNTNS) ? CLONE_NEWNS : 0) | (FLAGS_SET(flags, FORK_NEW_USERNS) ? CLONE_NEWUSER : 0) | - (FLAGS_SET(flags, FORK_NEW_NETNS) ? CLONE_NEWNET : 0)); + (FLAGS_SET(flags, FORK_NEW_NETNS) ? CLONE_NEWNET : 0) | + (FLAGS_SET(flags, FORK_NEW_PIDNS) ? CLONE_NEWPID : 0)); else pid = fork(); if (pid < 0) diff --git a/src/basic/process-util.h b/src/basic/process-util.h index 05b7a69fc6..cb6d47a5bb 100644 --- a/src/basic/process-util.h +++ b/src/basic/process-util.h @@ -166,7 +166,7 @@ int must_be_root(void); pid_t clone_with_nested_stack(int (*fn)(void *), int flags, void *userdata); -/* 💣 Note that FORK_NEW_USERNS, FORK_NEW_MOUNTNS, or FORK_NEW_NETNS should not be called in threaded +/* 💣 Note that FORK_NEW_USERNS, FORK_NEW_MOUNTNS, FORK_NEW_NETNS or FORK_NEW_PIDNS should not be called in threaded * programs, because they cause us to use raw_clone() which does not synchronize the glibc malloc() locks, * and thus will cause deadlocks if the parent uses threads and the child does memory allocations. Hence: if * the parent is threaded these flags may not be used. These flags cannot be used if the parent uses threads @@ -181,18 +181,19 @@ typedef enum ForkFlags { FORK_REOPEN_LOG = 1 << 6, /* Reopen log connection */ FORK_LOG = 1 << 7, /* Log above LOG_DEBUG log level about failures */ FORK_WAIT = 1 << 8, /* Wait until child exited */ - FORK_NEW_MOUNTNS = 1 << 9, /* Run child in its own mount namespace 💣 DO NOT USE IN THREADED PROGRAMS! 💣 */ - FORK_MOUNTNS_SLAVE = 1 << 10, /* Make child's mount namespace MS_SLAVE */ - FORK_PRIVATE_TMP = 1 << 11, /* Mount new /tmp/ in the child (combine with FORK_NEW_MOUNTNS!) */ - FORK_RLIMIT_NOFILE_SAFE = 1 << 12, /* Set RLIMIT_NOFILE soft limit to 1K for select() compat */ - FORK_STDOUT_TO_STDERR = 1 << 13, /* Make stdout a copy of stderr */ - FORK_FLUSH_STDIO = 1 << 14, /* fflush() stdout (and stderr) before forking */ - FORK_NEW_USERNS = 1 << 15, /* Run child in its own user namespace 💣 DO NOT USE IN THREADED PROGRAMS! 💣 */ - FORK_CLOEXEC_OFF = 1 << 16, /* In the child: turn off O_CLOEXEC on all fds in except_fds[] */ - FORK_KEEP_NOTIFY_SOCKET = 1 << 17, /* Unless this specified, $NOTIFY_SOCKET will be unset. */ - FORK_DETACH = 1 << 18, /* Double fork if needed to ensure PID1/subreaper is parent */ - FORK_NEW_NETNS = 1 << 19, /* Run child in its own network namespace 💣 DO NOT USE IN THREADED PROGRAMS! 💣 */ - FORK_PACK_FDS = 1 << 20, /* Rearrange the passed FDs to be FD 3,4,5,etc. Updates the array in place (combine with FORK_CLOSE_ALL_FDS!) */ + FORK_MOUNTNS_SLAVE = 1 << 9, /* Make child's mount namespace MS_SLAVE */ + FORK_PRIVATE_TMP = 1 << 10, /* Mount new /tmp/ in the child (combine with FORK_NEW_MOUNTNS!) */ + FORK_RLIMIT_NOFILE_SAFE = 1 << 11, /* Set RLIMIT_NOFILE soft limit to 1K for select() compat */ + FORK_STDOUT_TO_STDERR = 1 << 12, /* Make stdout a copy of stderr */ + FORK_FLUSH_STDIO = 1 << 13, /* fflush() stdout (and stderr) before forking */ + FORK_CLOEXEC_OFF = 1 << 14, /* In the child: turn off O_CLOEXEC on all fds in except_fds[] */ + FORK_KEEP_NOTIFY_SOCKET = 1 << 15, /* Unless this specified, $NOTIFY_SOCKET will be unset. */ + FORK_DETACH = 1 << 16, /* Double fork if needed to ensure PID1/subreaper is parent */ + FORK_PACK_FDS = 1 << 17, /* Rearrange the passed FDs to be FD 3,4,5,etc. Updates the array in place (combine with FORK_CLOSE_ALL_FDS!) */ + FORK_NEW_MOUNTNS = 1 << 18, /* Run child in its own mount namespace 💣 DO NOT USE IN THREADED PROGRAMS! 💣 */ + FORK_NEW_USERNS = 1 << 19, /* Run child in its own user namespace 💣 DO NOT USE IN THREADED PROGRAMS! 💣 */ + FORK_NEW_NETNS = 1 << 20, /* Run child in its own network namespace 💣 DO NOT USE IN THREADED PROGRAMS! 💣 */ + FORK_NEW_PIDNS = 1 << 21, /* Run child in its own PID namespace 💣 DO NOT USE IN THREADED PROGRAMS! 💣 */ } ForkFlags; int safe_fork_full( diff --git a/src/boot/efi/chid.c b/src/boot/efi/chid.c new file mode 100644 index 0000000000..50d840aea0 --- /dev/null +++ b/src/boot/efi/chid.c @@ -0,0 +1,129 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ + +/* + * Based on Nikita Travkin's dtbloader implementation. + * Copyright (c) 2024 Nikita Travkin <nikita@trvn.ru> + * + * https://github.com/TravMurav/dtbloader/blob/main/src/chid.c + */ + +/* + * Based on Linaro dtbloader implementation. + * Copyright (c) 2019, Linaro. All rights reserved. + * + * https://github.com/aarch64-laptops/edk2/blob/dtbloader-app/EmbeddedPkg/Application/ConfigTableLoader/CHID.c + */ + +#include "chid.h" +#include "chid-fundamental.h" +#include "efi.h" +#include "sha1-fundamental.h" +#include "smbios.h" +#include "util.h" + +/** + * smbios_to_hashable_string() - Convert ascii smbios string to stripped char16_t. + */ +static char16_t *smbios_to_hashable_string(const char *str) { + if (!str) + /* User of this function is expected to free the result. */ + return xnew0(char16_t, 1); + + /* + * We need to strip leading and trailing spaces, leading zeroes. + * See fwupd/libfwupdplugin/fu-hwids-smbios.c + */ + while (*str == ' ') + str++; + + while (*str == '0') + str++; + + size_t len = strlen8(str); + + while (len > 0 && str[len - 1] == ' ') + len--; + + return xstrn8_to_16(str, len); +} + +/* This has to be in a struct due to _cleanup_ in populate_board_chids */ +typedef struct SmbiosInfo { + const char16_t *smbios_fields[_CHID_SMBIOS_FIELDS_MAX]; +} SmbiosInfo; + +static void smbios_info_populate(SmbiosInfo *ret_info) { + static RawSmbiosInfo raw = {}; + static bool raw_info_populated = false; + + if (!raw_info_populated) { + smbios_raw_info_populate(&raw); + raw_info_populated = true; + } + + ret_info->smbios_fields[CHID_SMBIOS_MANUFACTURER] = smbios_to_hashable_string(raw.manufacturer); + ret_info->smbios_fields[CHID_SMBIOS_PRODUCT_NAME] = smbios_to_hashable_string(raw.product_name); + ret_info->smbios_fields[CHID_SMBIOS_PRODUCT_SKU] = smbios_to_hashable_string(raw.product_sku); + ret_info->smbios_fields[CHID_SMBIOS_FAMILY] = smbios_to_hashable_string(raw.family); + ret_info->smbios_fields[CHID_SMBIOS_BASEBOARD_PRODUCT] = smbios_to_hashable_string(raw.baseboard_product); + ret_info->smbios_fields[CHID_SMBIOS_BASEBOARD_MANUFACTURER] = smbios_to_hashable_string(raw.baseboard_manufacturer); +} + +static void smbios_info_done(SmbiosInfo *info) { + FOREACH_ELEMENT(i, info->smbios_fields) + free(i); +} + +static EFI_STATUS populate_board_chids(EFI_GUID ret_chids[static CHID_TYPES_MAX]) { + _cleanup_(smbios_info_done) SmbiosInfo info = {}; + + if (!ret_chids) + return EFI_INVALID_PARAMETER; + + smbios_info_populate(&info); + chid_calculate(info.smbios_fields, ret_chids); + + return EFI_SUCCESS; +} + +EFI_STATUS chid_match(const void *hwid_buffer, size_t hwid_length, const Device **ret_device) { + EFI_STATUS status; + + if ((uintptr_t) hwid_buffer % alignof(Device) != 0) + return EFI_INVALID_PARAMETER; + + const Device *devices = ASSERT_PTR(hwid_buffer); + + EFI_GUID chids[CHID_TYPES_MAX] = {}; + static const size_t priority[] = { 3, 6, 8, 10, 4, 5, 7, 9, 11 }; /* From most to least specific. */ + + status = populate_board_chids(chids); + if (EFI_STATUS_IS_ERROR(status)) + return log_error_status(status, "Failed to populate board CHIDs: %m"); + + size_t n_devices = 0; + + /* Count devices and check validity */ + for (; (n_devices + 1) * sizeof(*devices) < hwid_length;) { + if (devices[n_devices].struct_size == 0) + break; + if (devices[n_devices].struct_size != sizeof(*devices)) + return EFI_UNSUPPORTED; + n_devices++; + } + + if (n_devices == 0) + return EFI_NOT_FOUND; + + FOREACH_ELEMENT(i, priority) + FOREACH_ARRAY(dev, devices, n_devices) { + /* Can't take a pointer to a packed struct member, so copy to a local variable */ + EFI_GUID chid = dev->chid; + if (efi_guid_equal(&chids[*i], &chid)) { + *ret_device = dev; + return EFI_SUCCESS; + } + } + + return EFI_NOT_FOUND; +} diff --git a/src/boot/efi/chid.h b/src/boot/efi/chid.h new file mode 100644 index 0000000000..ea6e2d348f --- /dev/null +++ b/src/boot/efi/chid.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +#pragma once + +#include "efi.h" + +#include "chid-fundamental.h" + +typedef struct Device { + uint32_t struct_size; /* = sizeof(struct Device), or 0 for EOL */ + uint32_t name_offset; /* nul-terminated string or 0 if not present */ + uint32_t compatible_offset; /* nul-terminated string or 0 if not present */ + EFI_GUID chid; +} _packed_ Device; + +static inline const char* device_get_name(const void *base, const Device *device) { + return device->name_offset == 0 ? NULL : (const char *) ((const uint8_t *) base + device->name_offset); +} + +static inline const char* device_get_compatible(const void *base, const Device *device) { + return device->compatible_offset == 0 ? NULL : (const char *) ((const uint8_t *) base + device->compatible_offset); +} + +EFI_STATUS chid_match(const void *chids_buffer, size_t chids_length, const Device **ret_device); diff --git a/src/boot/efi/devicetree.c b/src/boot/efi/devicetree.c index 61a43cd77d..f3563f296f 100644 --- a/src/boot/efi/devicetree.c +++ b/src/boot/efi/devicetree.c @@ -106,6 +106,129 @@ EFI_STATUS devicetree_install(struct devicetree_state *state, EFI_FILE *root_dir MAKE_GUID_PTR(EFI_DTB_TABLE), PHYSICAL_ADDRESS_TO_POINTER(state->addr)); } +static const char* devicetree_get_compatible(const void *dtb) { + if ((uintptr_t) dtb % alignof(FdtHeader) != 0) + return NULL; + + const FdtHeader *dt_header = ASSERT_PTR(dtb); + + if (be32toh(dt_header->magic) != UINT32_C(0xd00dfeed)) + return NULL; + + uint32_t dt_size = be32toh(dt_header->total_size); + uint32_t struct_off = be32toh(dt_header->off_dt_struct); + uint32_t struct_size = be32toh(dt_header->size_dt_struct); + uint32_t strings_off = be32toh(dt_header->off_dt_strings); + uint32_t strings_size = be32toh(dt_header->size_dt_strings); + uint32_t end; + + if (PTR_TO_SIZE(dtb) > SIZE_MAX - dt_size) + return NULL; + + if (!ADD_SAFE(&end, strings_off, strings_size) || end > dt_size) + return NULL; + const char *strings_block = (const char *) ((const uint8_t *) dt_header + strings_off); + + if (struct_off % sizeof(uint32_t) != 0) + return NULL; + if (struct_size % sizeof(uint32_t) != 0 || + !ADD_SAFE(&end, struct_off, struct_size) || + end > strings_off) + return NULL; + const uint32_t *cursor = (const uint32_t *) ((const uint8_t *) dt_header + struct_off); + + size_t size_words = struct_size / sizeof(uint32_t); + size_t len, name_off, len_words, s; + + for (size_t i = 0; i < end; i++) { + switch (be32toh(cursor[i])) { + case FDT_BEGIN_NODE: + if (i >= size_words || cursor[++i] != 0) + return NULL; + break; + case FDT_NOP: + break; + case FDT_PROP: + /* At least 3 words should present: len, name_off, c (nul-terminated string always has non-zero length) */ + if (i + 3 >= size_words || cursor[++i] != 0) + return NULL; + len = be32toh(cursor[++i]); + name_off = be32toh(cursor[++i]); + len_words = DIV_ROUND_UP(len, sizeof(uint32_t)); + + if (ADD_SAFE(&s, name_off, STRLEN("compatible")) && + s < strings_size && streq8(strings_block + name_off, "compatible")) { + const char *c = (const char *) &cursor[++i]; + if (len == 0 || i + len_words > size_words || c[len - 1] != '\0') + c = NULL; + + return c; + } + i += len_words; + break; + default: + return NULL; + } + } + + return NULL; +} + +bool firmware_devicetree_exists(void) { + return !!find_configuration_table(MAKE_GUID_PTR(EFI_DTB_TABLE)); +} + +/* This function checks if the firmware provided Devicetree + * and a UKI provided Devicetree contain the same first entry + * on their respective "compatible" fields (which usually defines + * the actual device model). More specifically, given the FW/UKI + * "compatible" property pair: + * + * compatible = "string1", "string2"; + * compatible = "string1", "string3"; + * + * the function reports a match, while for + * + * compatible = "string1", "string3"; + * compatible = "string2", "string1"; + * + * it reports a mismatch. + * + * Other entries might refer to SoC and therefore can't be used for matching + */ +EFI_STATUS devicetree_match(const void *uki_dtb, size_t uki_dtb_length) { + const void *fw_dtb = find_configuration_table(MAKE_GUID_PTR(EFI_DTB_TABLE)); + if (!fw_dtb) + return EFI_UNSUPPORTED; + + const char *fw_compat = devicetree_get_compatible(fw_dtb); + if (!fw_compat) + return EFI_UNSUPPORTED; + + return devicetree_match_by_compatible(uki_dtb, uki_dtb_length, fw_compat); +} + +EFI_STATUS devicetree_match_by_compatible(const void *uki_dtb, size_t uki_dtb_length, const char *compat) { + if ((uintptr_t) uki_dtb % alignof(FdtHeader) != 0) + return EFI_INVALID_PARAMETER; + + const FdtHeader *dt_header = ASSERT_PTR(uki_dtb); + + if (uki_dtb_length < sizeof(FdtHeader) || + uki_dtb_length < be32toh(dt_header->total_size)) + return EFI_INVALID_PARAMETER; + + if (!compat) + return EFI_INVALID_PARAMETER; + + const char *dt_compat = devicetree_get_compatible(uki_dtb); + if (!dt_compat) + return EFI_INVALID_PARAMETER; + + /* Only matches the first compatible string from each DT */ + return streq8(dt_compat, compat) ? EFI_SUCCESS : EFI_NOT_FOUND; +} + EFI_STATUS devicetree_install_from_memory( struct devicetree_state *state, const void *dtb_buffer, size_t dtb_length) { diff --git a/src/boot/efi/devicetree.h b/src/boot/efi/devicetree.h index 33eaa2256c..5f6720f655 100644 --- a/src/boot/efi/devicetree.h +++ b/src/boot/efi/devicetree.h @@ -9,6 +9,30 @@ struct devicetree_state { void *orig; }; +enum { + FDT_BEGIN_NODE = 1, + FDT_END_NODE = 2, + FDT_PROP = 3, + FDT_NOP = 4, + FDT_END = 9, +}; + +typedef struct FdtHeader { + uint32_t magic; + uint32_t total_size; + uint32_t off_dt_struct; + uint32_t off_dt_strings; + uint32_t off_mem_rsv_map; + uint32_t version; + uint32_t last_comp_version; + uint32_t boot_cpuid_phys; + uint32_t size_dt_strings; + uint32_t size_dt_struct; +} FdtHeader; + +bool firmware_devicetree_exists(void); +EFI_STATUS devicetree_match(const void *uki_dtb, size_t uki_dtb_length); +EFI_STATUS devicetree_match_by_compatible(const void *uki_dtb, size_t uki_dtb_length, const char *compat); EFI_STATUS devicetree_install(struct devicetree_state *state, EFI_FILE *root_dir, char16_t *name); EFI_STATUS devicetree_install_from_memory( struct devicetree_state *state, const void *dtb_buffer, size_t dtb_length); diff --git a/src/boot/efi/meson.build b/src/boot/efi/meson.build index 0109793b7a..29c5455dbd 100644 --- a/src/boot/efi/meson.build +++ b/src/boot/efi/meson.build @@ -254,6 +254,7 @@ endif ############################################################ libefi_sources = files( + 'chid.c', 'console.c', 'device-path-util.c', 'devicetree.c', diff --git a/src/boot/efi/pe.c b/src/boot/efi/pe.c index 26dfcd4291..00739a7c74 100644 --- a/src/boot/efi/pe.c +++ b/src/boot/efi/pe.c @@ -1,5 +1,7 @@ /* SPDX-License-Identifier: LGPL-2.1-or-later */ +#include "chid.h" +#include "devicetree.h" #include "pe.h" #include "util.h" @@ -162,11 +164,46 @@ static bool pe_section_name_equal(const char *a, const char *b) { return true; } -static void pe_locate_sections( +static bool pe_use_this_dtb( + const void *dtb, + size_t dtb_size, + const void *base, + const Device *device, + size_t section_nb) { + + assert(dtb); + + EFI_STATUS err; + + err = devicetree_match(dtb, dtb_size); + if (err == EFI_SUCCESS) + return true; + if (err != EFI_UNSUPPORTED) + return false; + + /* There's nothing to match against if firmware does not provide DTB and there is no .hwids section */ + if (!device || !base) + return false; + + const char *compatible = device_get_compatible(base, device); + if (!compatible) + return false; + + err = devicetree_match_by_compatible(dtb, dtb_size, compatible); + if (err == EFI_SUCCESS) + return true; + if (err == EFI_INVALID_PARAMETER) + log_error_status(err, "Found bad DT blob in PE section %zu", section_nb); + return false; +} + +static void pe_locate_sections_internal( const PeSectionHeader section_table[], size_t n_section_table, const char *const section_names[], size_t validate_base, + const void *device_table, + const Device *device, PeSectionVector sections[]) { assert(section_table || n_section_table == 0); @@ -206,6 +243,20 @@ static void pe_locate_sections( continue; } + /* Special handling for .dtbauto sections compared to plain .dtb */ + if (pe_section_name_equal(section_names[i], ".dtbauto")) { + /* .dtbauto sections require validate_base for matching */ + if (!validate_base) + break; + if (!pe_use_this_dtb( + (const uint8_t *) SIZE_TO_PTR(validate_base) + j->VirtualAddress, + j->VirtualSize, + device_table, + device, + i)) + continue; + } + /* At this time, the sizes and offsets have been validated. Store them away */ sections[i] = (PeSectionVector) { .memory_size = j->VirtualSize, @@ -224,6 +275,73 @@ static void pe_locate_sections( } } +static bool looking_for_dbauto(const char *const section_names[]) { + assert(section_names); + + for (size_t i = 0; section_names[i]; i++) + if (pe_section_name_equal(section_names[i], ".dtbauto")) + return true; + return false; +} + +static void pe_locate_sections( + const PeSectionHeader section_table[], + size_t n_section_table, + const char *const section_names[], + size_t validate_base, + PeSectionVector sections[]) { + + if (!looking_for_dbauto(section_names)) + return pe_locate_sections_internal( + section_table, + n_section_table, + section_names, + validate_base, + /* device_base */ NULL, + /* device */ NULL, + sections); + + /* It doesn't make sense not to provide validate_base here */ + assert(validate_base != 0); + + const void *hwids = NULL; + const Device *device = NULL; + + if (!firmware_devicetree_exists()) { + /* Find HWIDs table and search for the current device */ + PeSectionVector hwids_section = {}; + + pe_locate_sections_internal( + section_table, + n_section_table, + (const char *const[]) { ".hwids", NULL }, + validate_base, + /* device_table */ NULL, + /* device */ NULL, + &hwids_section); + + if (hwids_section.memory_offset != 0) { + hwids = (const uint8_t *) SIZE_TO_PTR(validate_base) + hwids_section.memory_offset; + + EFI_STATUS err = chid_match(hwids, hwids_section.memory_size, &device); + if (err != EFI_SUCCESS) { + log_error_status(err, "HWID matching failed, no DT blob will be selected: %m"); + hwids = NULL; + } + } else + log_info("HWIDs section is missing, no DT blob will be selected"); + } + + return pe_locate_sections_internal( + section_table, + n_section_table, + section_names, + validate_base, + hwids, + device, + sections); +} + static uint32_t get_compatibility_entry_address(const DosFileHeader *dos, const PeFileHeader *pe) { /* The kernel may provide alternative PE entry points for different PE architectures. This allows * booting a 64-bit kernel on 32-bit EFI that is otherwise running on a 64-bit CPU. The locations of any diff --git a/src/boot/efi/stub.c b/src/boot/efi/stub.c index 9664c95d57..7261e942d3 100644 --- a/src/boot/efi/stub.c +++ b/src/boot/efi/stub.c @@ -614,12 +614,13 @@ static EFI_STATUS load_addons( if (err != EFI_SUCCESS || (!PE_SECTION_VECTOR_IS_SET(sections + UNIFIED_SECTION_CMDLINE) && !PE_SECTION_VECTOR_IS_SET(sections + UNIFIED_SECTION_DTB) && + !PE_SECTION_VECTOR_IS_SET(sections + UNIFIED_SECTION_DTBAUTO) && !PE_SECTION_VECTOR_IS_SET(sections + UNIFIED_SECTION_INITRD) && !PE_SECTION_VECTOR_IS_SET(sections + UNIFIED_SECTION_UCODE))) { if (err == EFI_SUCCESS) err = EFI_NOT_FOUND; log_error_status(err, - "Unable to locate embedded .cmdline/.dtb/.initrd/.ucode sections in %ls, ignoring: %m", + "Unable to locate embedded .cmdline/.dtb/.dtbauto/.initrd/.ucode sections in %ls, ignoring: %m", items[i]); continue; } @@ -647,7 +648,21 @@ static EFI_STATUS load_addons( *cmdline = xasprintf("%ls%ls%ls", strempty(tmp), isempty(tmp) ? u"" : u" ", extra16); } - if (devicetree_addons && PE_SECTION_VECTOR_IS_SET(sections + UNIFIED_SECTION_DTB)) { + // FIXME: do we want to do something else here? + // This should behave exactly as .dtb/.dtbauto in the main UKI + if (devicetree_addons && PE_SECTION_VECTOR_IS_SET(sections + UNIFIED_SECTION_DTBAUTO)) { + *devicetree_addons = xrealloc(*devicetree_addons, + *n_devicetree_addons * sizeof(NamedAddon), + (*n_devicetree_addons + 1) * sizeof(NamedAddon)); + + (*devicetree_addons)[(*n_devicetree_addons)++] = (NamedAddon) { + .blob = { + .iov_base = xmemdup((const uint8_t*) loaded_addon->ImageBase + sections[UNIFIED_SECTION_DTBAUTO].memory_offset, sections[UNIFIED_SECTION_DTBAUTO].memory_size), + .iov_len = sections[UNIFIED_SECTION_DTBAUTO].memory_size, + }, + .filename = xstrdup16(items[i]), + }; + } else if (devicetree_addons && PE_SECTION_VECTOR_IS_SET(sections + UNIFIED_SECTION_DTB)) { *devicetree_addons = xrealloc(*devicetree_addons, *n_devicetree_addons * sizeof(NamedAddon), (*n_devicetree_addons + 1) * sizeof(NamedAddon)); @@ -968,13 +983,20 @@ static void install_embedded_devicetree( assert(sections); assert(dt_state); - if (!PE_SECTION_VECTOR_IS_SET(sections + UNIFIED_SECTION_DTB)) + UnifiedSection section = _UNIFIED_SECTION_MAX; + + /* Use automatically selected DT if available, otherwise go for "normal" one */ + if (PE_SECTION_VECTOR_IS_SET(sections + UNIFIED_SECTION_DTBAUTO)) + section = UNIFIED_SECTION_DTBAUTO; + else if (PE_SECTION_VECTOR_IS_SET(sections + UNIFIED_SECTION_DTB)) + section = UNIFIED_SECTION_DTB; + else return; err = devicetree_install_from_memory( dt_state, - (const uint8_t*) loaded_image->ImageBase + sections[UNIFIED_SECTION_DTB].memory_offset, - sections[UNIFIED_SECTION_DTB].memory_size); + (const uint8_t*) loaded_image->ImageBase + sections[section].memory_offset, + sections[section].memory_size); if (err != EFI_SUCCESS) log_error_status(err, "Error loading embedded devicetree, ignoring: %m"); } diff --git a/src/boot/efi/util.h b/src/boot/efi/util.h index 054d49ef02..c7634576cf 100644 --- a/src/boot/efi/util.h +++ b/src/boot/efi/util.h @@ -69,6 +69,7 @@ static inline void* xmemdup(const void *p, size_t l) { } #define xnew(type, n) ((type *) xmalloc_multiply((n), sizeof(type))) +#define xnew0(type, n) ((type *) xcalloc_multiply((n), sizeof(type))) bool free_and_xstrdup16(char16_t **p, const char16_t *s); diff --git a/src/boot/measure.c b/src/boot/measure.c index 3c409f8bd9..9e6295b9da 100644 --- a/src/boot/measure.c +++ b/src/boot/measure.c @@ -103,6 +103,7 @@ static int help(int argc, char *argv[], void *userdata) { " --sbat=PATH Path to SBAT file %7$s .sbat\n" " --pcrpkey=PATH Path to public key for PCR signatures %7$s .pcrpkey\n" " --profile=PATH Path to profile file %7$s .profile\n" + " --hwids=PATH Path to HWIDs file %7$s .hwids\n" "\nSee the %2$s for details.\n", program_invocation_short_name, link, @@ -146,8 +147,10 @@ static int parse_argv(int argc, char *argv[]) { ARG_SBAT, _ARG_PCRSIG, /* the .pcrsig section is not input for signing, hence not actually an argument here */ ARG_PCRPKEY, + ARG_PROFILE, + ARG_HWIDS, _ARG_SECTION_LAST, - ARG_PROFILE = _ARG_SECTION_LAST, + ARG_DTBAUTO = _ARG_SECTION_LAST, ARG_BANK, ARG_PRIVATE_KEY, ARG_PRIVATE_KEY_SOURCE, @@ -170,10 +173,12 @@ static int parse_argv(int argc, char *argv[]) { { "ucode", required_argument, NULL, ARG_UCODE }, { "splash", required_argument, NULL, ARG_SPLASH }, { "dtb", required_argument, NULL, ARG_DTB }, + { "dtbauto", required_argument, NULL, ARG_DTBAUTO }, { "uname", required_argument, NULL, ARG_UNAME }, { "sbat", required_argument, NULL, ARG_SBAT }, { "pcrpkey", required_argument, NULL, ARG_PCRPKEY }, { "profile", required_argument, NULL, ARG_PROFILE }, + { "hwids", required_argument, NULL, ARG_HWIDS }, { "current", no_argument, NULL, 'c' }, { "bank", required_argument, NULL, ARG_BANK }, { "tpm2-device", required_argument, NULL, ARG_TPM2_DEVICE }, diff --git a/src/core/dbus-execute.c b/src/core/dbus-execute.c index a9a73b599b..e297323f1d 100644 --- a/src/core/dbus-execute.c +++ b/src/core/dbus-execute.c @@ -63,6 +63,7 @@ static BUS_DEFINE_PROPERTY_GET_GLOBAL(property_get_empty_string, "s", NULL); static BUS_DEFINE_PROPERTY_GET_REF(property_get_private_tmp_ex, "s", PrivateTmp, private_tmp_to_string); static BUS_DEFINE_PROPERTY_GET_REF(property_get_private_users_ex, "s", PrivateUsers, private_users_to_string); static BUS_DEFINE_PROPERTY_GET_REF(property_get_protect_control_groups_ex, "s", ProtectControlGroups, protect_control_groups_to_string); +static BUS_DEFINE_PROPERTY_GET_REF(property_get_private_pids, "s", PrivatePIDs, private_pids_to_string); static BUS_DEFINE_PROPERTY_GET_REF(property_get_syslog_level, "i", int, LOG_PRI); static BUS_DEFINE_PROPERTY_GET_REF(property_get_syslog_facility, "i", int, LOG_FAC); static BUS_DEFINE_PROPERTY_GET(property_get_cpu_affinity_from_numa, "b", ExecContext, exec_context_get_cpu_affinity_from_numa); @@ -1194,6 +1195,7 @@ const sd_bus_vtable bus_exec_vtable[] = { SD_BUS_PROPERTY("PrivateUsersEx", "s", property_get_private_users_ex, offsetof(ExecContext, private_users), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("PrivateMounts", "b", bus_property_get_tristate, offsetof(ExecContext, private_mounts), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("PrivateIPC", "b", bus_property_get_bool, offsetof(ExecContext, private_ipc), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("PrivatePIDs", "s", property_get_private_pids, offsetof(ExecContext, private_pids), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("ProtectHome", "s", property_get_protect_home, offsetof(ExecContext, protect_home), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("ProtectSystem", "s", property_get_protect_system, offsetof(ExecContext, protect_system), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("SameProcessGroup", "b", bus_property_get_bool, offsetof(ExecContext, same_pgrp), SD_BUS_VTABLE_PROPERTY_CONST), @@ -1970,6 +1972,27 @@ int bus_exec_context_set_transient_property( return 1; } + if (streq(name, "PrivatePIDs")) { + const char *s; + PrivatePIDs t; + + r = sd_bus_message_read(message, "s", &s); + if (r < 0) + return r; + + t = private_pids_from_string(s); + if (t < 0) + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid %s setting: %s", name, s); + + if (!UNIT_WRITE_FLAGS_NOOP(flags)) { + c->private_pids = t; + (void) unit_write_settingf(u, flags, name, "%s=%s", + name, private_pids_to_string(c->private_pids)); + } + + return 1; + } + if (streq(name, "PrivateDevices")) return bus_set_transient_bool(u, name, &c->private_devices, message, flags, error); diff --git a/src/core/exec-invoke.c b/src/core/exec-invoke.c index 4e70c84374..120067a774 100644 --- a/src/core/exec-invoke.c +++ b/src/core/exec-invoke.c @@ -2175,14 +2175,14 @@ static int setup_private_users(PrivateUsers private_users, uid_t ouid, gid_t ogi fd = open(a, O_WRONLY|O_CLOEXEC); if (fd < 0) { if (errno != ENOENT) { - r = -errno; + r = log_debug_errno(errno, "Failed to open %s: %m", a); goto child_fail; } /* If the file is missing the kernel is too old, let's continue anyway. */ } else { if (write(fd, "deny\n", 5) < 0) { - r = -errno; + r = log_debug_errno(errno, "Failed to write \"deny\" to %s: %m", a); goto child_fail; } @@ -2193,11 +2193,11 @@ static int setup_private_users(PrivateUsers private_users, uid_t ouid, gid_t ogi a = procfs_file_alloca(ppid, "gid_map"); fd = open(a, O_WRONLY|O_CLOEXEC); if (fd < 0) { - r = -errno; + r = log_debug_errno(errno, "Failed to open %s: %m", a); goto child_fail; } if (write(fd, gid_map, strlen(gid_map)) < 0) { - r = -errno; + r = log_debug_errno(errno, "Failed to write GID map to %s: %m", a); goto child_fail; } fd = safe_close(fd); @@ -2206,11 +2206,11 @@ static int setup_private_users(PrivateUsers private_users, uid_t ouid, gid_t ogi a = procfs_file_alloca(ppid, "uid_map"); fd = open(a, O_WRONLY|O_CLOEXEC); if (fd < 0) { - r = -errno; + r = log_debug_errno(errno, "Failed to open %s: %m", a); goto child_fail; } if (write(fd, uid_map, strlen(uid_map)) < 0) { - r = -errno; + r = log_debug_errno(errno, "Failed to write UID map to %s: %m", a); goto child_fail; } @@ -2224,7 +2224,7 @@ static int setup_private_users(PrivateUsers private_users, uid_t ouid, gid_t ogi errno_pipe[1] = safe_close(errno_pipe[1]); if (unshare(CLONE_NEWUSER) < 0) - return -errno; + return log_debug_errno(errno, "Failed to unshare user namespace: %m"); /* Let the child know that the namespace is ready now */ if (write(unshare_ready_fd, &c, sizeof(c)) < 0) @@ -2251,6 +2251,130 @@ static int setup_private_users(PrivateUsers private_users, uid_t ouid, gid_t ogi return 1; } +static int can_mount_proc(const ExecContext *c, ExecParameters *p) { + _cleanup_close_pair_ int errno_pipe[2] = EBADF_PAIR; + _cleanup_(sigkill_waitp) pid_t pid = 0; + ssize_t n; + int r; + + assert(c); + assert(p); + + /* If running via unprivileged user manager and /proc/ is masked (e.g. /proc/kmsg is over-mounted with tmpfs + * like systemd-nspawn does), then mounting /proc/ will fail with EPERM. This is due to a kernel restriction + * where unprivileged user namespaces cannot mount a less restrictive instance of /proc. */ + + /* Create a communication channel so that the child can tell the parent a proper error code in case it + * failed. */ + if (pipe2(errno_pipe, O_CLOEXEC) < 0) + return log_exec_debug_errno(c, p, errno, "Failed to create pipe for communicating with child process (sd-proc-check): %m"); + + /* Fork a child process into its own mount and PID namespace. Note safe_fork() already remounts / as SLAVE + * with FORK_MOUNTNS_SLAVE. */ + r = safe_fork("(sd-proc-check)", + FORK_RESET_SIGNALS|FORK_DEATHSIG_SIGKILL|FORK_NEW_MOUNTNS|FORK_MOUNTNS_SLAVE|FORK_NEW_PIDNS, &pid); + if (r < 0) + return log_exec_debug_errno(c, p, r, "Failed to fork child process (sd-proc-check): %m"); + if (r == 0) { + errno_pipe[0] = safe_close(errno_pipe[0]); + + /* Try mounting /proc on /dev/shm/. No need to clean up the mount since the mount + * namespace will be cleaned up once the process exits. */ + r = mount_follow_verbose(LOG_DEBUG, "proc", "/dev/shm/", "proc", MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL); + if (r < 0) { + (void) write(errno_pipe[1], &r, sizeof(r)); + _exit(EXIT_FAILURE); + } + + _exit(EXIT_SUCCESS); + } + + errno_pipe[1] = safe_close(errno_pipe[1]); + + /* Try to read an error code from the child */ + n = read(errno_pipe[0], &r, sizeof(r)); + if (n < 0) + return log_exec_debug_errno(c, p, errno, "Failed to read errno from pipe with child process (sd-proc-check): %m"); + if (n == sizeof(r)) { /* an error code was sent to us */ + /* This is the expected case where proc cannot be mounted due to permissions. */ + if (ERRNO_IS_NEG_PRIVILEGE(r)) + return 0; + if (r < 0) + return r; + + return -EIO; + } + if (n != 0) /* on success we should have read 0 bytes */ + return -EIO; + + r = wait_for_terminate_and_check("(sd-proc-check)", TAKE_PID(pid), 0 /* flags= */); + if (r < 0) + return log_exec_debug_errno(c, p, r, "Failed to wait for (sd-proc-check) child process to terminate: %m"); + if (r != EXIT_SUCCESS) /* If something strange happened with the child, let's consider this fatal, too */ + return log_exec_debug_errno(c, p, SYNTHETIC_ERRNO(EIO), "Child process (sd-proc-check) exited with unexpected exit status '%d'.", r); + + return 1; +} + +static int setup_private_pids(const ExecContext *c, ExecParameters *p) { + _cleanup_(pidref_done) PidRef pidref = PIDREF_NULL; + _cleanup_close_pair_ int errno_pipe[2] = EBADF_PAIR; + ssize_t n; + int r, q; + + assert(c); + assert(p); + assert(p->pidref_transport_fd >= 0); + + /* The first process created after unsharing a pid namespace becomes PID 1 in the pid namespace, so + * we have to fork after unsharing the pid namespace to become PID 1. The parent sends the child + * pidref to the manager and exits while the child process continues with the rest of exec_invoke() + * and finally executes the actual payload. */ + + /* Create a communication channel so that the parent can tell the child a proper error code in case it + * failed to send child pidref to the manager. */ + if (pipe2(errno_pipe, O_CLOEXEC) < 0) + return log_exec_debug_errno(c, p, errno, "Failed to create pipe for communicating with parent process: %m"); + + r = pidref_safe_fork("(sd-pidns-child)", FORK_NEW_PIDNS, &pidref); + if (r < 0) + return log_exec_debug_errno(c, p, r, "Failed to fork child into new pid namespace: %m"); + if (r > 0) { + errno_pipe[0] = safe_close(errno_pipe[0]); + + /* In the parent process, we send the child pidref to the manager and exit. + * If PIDFD is not supported, only the child PID is sent. The server then + * uses the child PID to set the new exec main process. */ + q = send_one_fd_iov( + p->pidref_transport_fd, + pidref.fd, + &IOVEC_MAKE(&pidref.pid, sizeof(pidref.pid)), + /*iovlen=*/ 1, + /*flags=*/ 0); + /* Send error code to child process. */ + (void) write(errno_pipe[1], &q, sizeof(q)); + /* Exit here so we only go through the destructors in exec_invoke only once - in the child - as + * some destructors have external effects. The main codepaths continue in the child process. */ + _exit(q < 0 ? EXIT_FAILURE : EXIT_SUCCESS); + } + + errno_pipe[1] = safe_close(errno_pipe[1]); + p->pidref_transport_fd = safe_close(p->pidref_transport_fd); + + /* Try to read an error code from the parent. Note a child process cannot wait for the parent so we always + * receive an errno even on success. */ + n = read(errno_pipe[0], &r, sizeof(r)); + if (n < 0) + return log_exec_debug_errno(c, p, errno, "Failed to read errno from pipe with parent process: %m"); + if (n != sizeof(r)) + return log_exec_debug_errno(c, p, SYNTHETIC_ERRNO(EIO), "Failed to read enough bytes from pipe with parent process"); + if (r < 0) + return log_exec_debug_errno(c, p, r, "Failed to send child pidref to manager: %m"); + + /* NOTE! This function returns in the child process only. */ + return r; +} + static int create_many_symlinks(const char *root, const char *source, char **symlinks) { _cleanup_free_ char *src_abs = NULL; int r; @@ -3301,6 +3425,7 @@ static int apply_mount_namespace( .private_dev = needs_sandboxing && context->private_devices, .private_network = needs_sandboxing && exec_needs_network_namespace(context), .private_ipc = needs_sandboxing && exec_needs_ipc_namespace(context), + .private_pids = needs_sandboxing && exec_needs_pid_namespace(context) ? context->private_pids : PRIVATE_PIDS_NO, .private_tmp = needs_sandboxing ? context->private_tmp : false, .mount_apivfs = needs_sandboxing && exec_context_get_effective_mount_apivfs(context), @@ -3573,7 +3698,7 @@ static int close_remaining_fds( const int *fds, size_t n_fds) { size_t n_dont_close = 0; - int dont_close[n_fds + 16]; + int dont_close[n_fds + 17]; assert(params); @@ -3612,6 +3737,9 @@ static int close_remaining_fds( if (params->handoff_timestamp_fd >= 0) dont_close[n_dont_close++] = params->handoff_timestamp_fd; + if (params->pidref_transport_fd >= 0) + dont_close[n_dont_close++] = params->pidref_transport_fd; + assert(n_dont_close <= ELEMENTSOF(dont_close)); return close_all_fds(dont_close, n_dont_close); @@ -3934,6 +4062,7 @@ static bool exec_context_need_unprivileged_private_users( !strv_isempty(context->extension_directories) || context->protect_system != PROTECT_SYSTEM_NO || context->protect_home != PROTECT_HOME_NO || + exec_needs_pid_namespace(context) || context->protect_kernel_tunables || context->protect_kernel_modules || context->protect_kernel_logs || @@ -4139,6 +4268,7 @@ int exec_invoke( needs_mount_namespace, /* Do we need to set up a mount namespace for this kernel? */ needs_ambient_hack; /* Do we need to apply the ambient capabilities hack? */ bool keep_seccomp_privileges = false; + bool has_cap_sys_admin = false; #if HAVE_SELINUX _cleanup_free_ char *mac_selinux_context_net = NULL; bool use_selinux = false; @@ -4790,6 +4920,9 @@ int exec_invoke( uint64_t capability_ambient_set = context->capability_ambient_set; + /* Check CAP_SYS_ADMIN before we enter user namespace to see if we can mount /proc even though its masked. */ + has_cap_sys_admin = have_effective_cap(CAP_SYS_ADMIN) > 0; + if (needs_sandboxing) { /* MAC enablement checks need to be done before a new mount ns is created, as they rely on * /sys being present. The actual MAC context application will happen later, as late as @@ -4924,6 +5057,40 @@ int exec_invoke( } } + /* Unshare a new PID namespace before setting up mounts to ensure /proc/ is mounted with only processes in PID namespace visible. + * Note PrivatePIDs=yes implies MountAPIVFS=yes so we'll always ensure procfs is remounted. */ + if (needs_sandboxing && exec_needs_pid_namespace(context)) { + if (params->pidref_transport_fd < 0) { + *exit_status = EXIT_NAMESPACE; + return log_exec_error_errno(context, params, r, "PidRef socket is not set up: %m"); + } + + /* If we had CAP_SYS_ADMIN prior to joining the user namespace, then we are privileged and don't need + * to check if we can mount /proc/. + * + * We need to check prior to entering the user namespace because if we're running unprivileged or in a + * system without CAP_SYS_ADMIN, then we can have CAP_SYS_ADMIN in the current user namespace but not + * once we unshare a mount namespace. */ + r = has_cap_sys_admin ? 1 : can_mount_proc(context, params); + if (r < 0) { + *exit_status = EXIT_NAMESPACE; + return log_exec_error_errno(context, params, r, "Failed to detect if /proc/ can be remounted: %m"); + } + if (r == 0) { + *exit_status = EXIT_NAMESPACE; + return log_exec_error_errno(context, params, SYNTHETIC_ERRNO(EPERM), + "PrivatePIDs=yes is configured, but /proc/ cannot be re-mounted due to lack of privileges, refusing."); + } + + r = setup_private_pids(context, params); + if (r < 0) { + *exit_status = EXIT_NAMESPACE; + return log_exec_error_errno(context, params, r, "Failed to set up pid namespace: %m"); + } + } + + /* If PrivatePIDs= yes is configured, we're now running as pid 1 in a pid namespace! */ + if (needs_mount_namespace) { _cleanup_free_ char *error_path = NULL; diff --git a/src/core/execute-serialize.c b/src/core/execute-serialize.c index 6fa0b21968..bf6592faed 100644 --- a/src/core/execute-serialize.c +++ b/src/core/execute-serialize.c @@ -1391,6 +1391,10 @@ static int exec_parameters_serialize(const ExecParameters *p, const ExecContext if (r < 0) return r; + r = serialize_fd(f, fds, "exec-parameters-pidref-transport-fd", p->pidref_transport_fd); + if (r < 0) + return r; + if (c && exec_context_restrict_filesystems_set(c)) { r = serialize_fd(f, fds, "exec-parameters-bpf-outer-map-fd", p->bpf_restrict_fs_map_fd); if (r < 0) @@ -1660,6 +1664,14 @@ static int exec_parameters_deserialize(ExecParameters *p, FILE *f, FDSet *fds) { continue; close_and_replace(p->handoff_timestamp_fd, fd); + } else if ((val = startswith(l, "exec-parameters-pidref-transport-fd="))) { + int fd; + + fd = deserialize_fd(fds, val); + if (fd < 0) + continue; + + close_and_replace(p->pidref_transport_fd, fd); } else if ((val = startswith(l, "exec-parameters-bpf-outer-map-fd="))) { int fd; @@ -1926,6 +1938,10 @@ static int exec_context_serialize(const ExecContext *c, FILE *f) { if (r < 0) return r; + r = serialize_item(f, "exec-context-private-pids", private_pids_to_string(c->private_pids)); + if (r < 0) + return r; + r = serialize_bool_elide(f, "exec-context-remove-ipc", c->remove_ipc); if (r < 0) return r; @@ -2813,6 +2829,10 @@ static int exec_context_deserialize(ExecContext *c, FILE *f) { if (r < 0) return r; c->private_ipc = r; + } else if ((val = startswith(l, "exec-context-private-pids="))) { + c->private_pids = private_pids_from_string(val); + if (c->private_pids < 0) + return -EINVAL; } else if ((val = startswith(l, "exec-context-remove-ipc="))) { r = parse_boolean(val); if (r < 0) diff --git a/src/core/execute.c b/src/core/execute.c index 1c41b39a2f..2c5a5db10e 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -254,6 +254,12 @@ bool exec_is_cgroup_mount_read_only(const ExecContext *context, const ExecParame return IN_SET(exec_get_protect_control_groups(context, params), PROTECT_CONTROL_GROUPS_YES, PROTECT_CONTROL_GROUPS_STRICT); } +bool exec_needs_pid_namespace(const ExecContext *context) { + assert(context); + + return context->private_pids != PRIVATE_PIDS_NO && ns_type_supported(NAMESPACE_PID); +} + bool exec_needs_mount_namespace( const ExecContext *context, const ExecParameters *params, @@ -306,7 +312,8 @@ bool exec_needs_mount_namespace( exec_needs_cgroup_mount(context, params) || context->protect_proc != PROTECT_PROC_DEFAULT || context->proc_subset != PROC_SUBSET_ALL || - exec_needs_ipc_namespace(context)) + exec_needs_ipc_namespace(context) || + exec_needs_pid_namespace(context)) return true; if (context->root_directory) { @@ -1026,6 +1033,7 @@ void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) { "%sProtectControlGroups: %s\n" "%sPrivateNetwork: %s\n" "%sPrivateUsers: %s\n" + "%sPrivatePIDs: %s\n" "%sProtectHome: %s\n" "%sProtectSystem: %s\n" "%sMountAPIVFS: %s\n" @@ -1052,6 +1060,7 @@ void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) { prefix, protect_control_groups_to_string(c->protect_control_groups), prefix, yes_no(c->private_network), prefix, private_users_to_string(c->private_users), + prefix, private_pids_to_string(c->private_pids), prefix, protect_home_to_string(c->protect_home), prefix, protect_system_to_string(c->protect_system), prefix, yes_no(exec_context_get_effective_mount_apivfs(c)), diff --git a/src/core/execute.h b/src/core/execute.h index 7274c68d3d..32dabf177f 100644 --- a/src/core/execute.h +++ b/src/core/execute.h @@ -335,6 +335,7 @@ struct ExecContext { ProtectControlGroups protect_control_groups; ProtectSystem protect_system; ProtectHome protect_home; + PrivatePIDs private_pids; bool protect_hostname; bool dynamic_user; @@ -465,6 +466,7 @@ struct ExecParameters { char **files_env; int user_lookup_fd; int handoff_timestamp_fd; + int pidref_transport_fd; int bpf_restrict_fs_map_fd; @@ -486,6 +488,7 @@ struct ExecParameters { .bpf_restrict_fs_map_fd = -EBADF, \ .user_lookup_fd = -EBADF, \ .handoff_timestamp_fd = -EBADF, \ + .pidref_transport_fd = -EBADF, \ } #include "unit.h" @@ -623,6 +626,7 @@ ExecDirectoryType exec_resource_type_from_string(const char *s) _pure_; bool exec_needs_mount_namespace(const ExecContext *context, const ExecParameters *params, const ExecRuntime *runtime); bool exec_needs_network_namespace(const ExecContext *context); bool exec_needs_ipc_namespace(const ExecContext *context); +bool exec_needs_pid_namespace(const ExecContext *context); ProtectControlGroups exec_get_protect_control_groups(const ExecContext *context, const ExecParameters *params); bool exec_needs_cgroup_namespace(const ExecContext *context, const ExecParameters *params); diff --git a/src/core/load-fragment-gperf.gperf.in b/src/core/load-fragment-gperf.gperf.in index f5cbb319d7..d7564b3767 100644 --- a/src/core/load-fragment-gperf.gperf.in +++ b/src/core/load-fragment-gperf.gperf.in @@ -133,6 +133,7 @@ {{type}}.PrivateUsers, config_parse_private_users, 0, offsetof({{type}}, exec_context.private_users) {{type}}.PrivateMounts, config_parse_tristate, 0, offsetof({{type}}, exec_context.private_mounts) {{type}}.PrivateIPC, config_parse_bool, 0, offsetof({{type}}, exec_context.private_ipc) +{{type}}.PrivatePIDs, config_parse_private_pids, 0, offsetof({{type}}, exec_context.private_pids) {{type}}.ProtectSystem, config_parse_protect_system, 0, offsetof({{type}}, exec_context.protect_system) {{type}}.ProtectHome, config_parse_protect_home, 0, offsetof({{type}}, exec_context.protect_home) {{type}}.MountFlags, config_parse_exec_mount_propagation_flag, 0, offsetof({{type}}, exec_context.mount_propagation_flag) diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c index 1d813332b1..f34c930f4e 100644 --- a/src/core/load-fragment.c +++ b/src/core/load-fragment.c @@ -135,6 +135,7 @@ DEFINE_CONFIG_PARSE_ENUM(config_parse_protect_proc, protect_proc, ProtectProc); DEFINE_CONFIG_PARSE_ENUM(config_parse_proc_subset, proc_subset, ProcSubset); DEFINE_CONFIG_PARSE_ENUM(config_parse_private_tmp, private_tmp, PrivateTmp); DEFINE_CONFIG_PARSE_ENUM(config_parse_private_users, private_users, PrivateUsers); +DEFINE_CONFIG_PARSE_ENUM(config_parse_private_pids, private_pids, PrivatePIDs); DEFINE_CONFIG_PARSE_ENUM(config_parse_protect_control_groups, protect_control_groups, ProtectControlGroups); DEFINE_CONFIG_PARSE_ENUM(config_parse_exec_utmp_mode, exec_utmp_mode, ExecUtmpMode); DEFINE_CONFIG_PARSE_ENUM(config_parse_job_mode, job_mode, JobMode); diff --git a/src/core/load-fragment.h b/src/core/load-fragment.h index 9b95f0c24e..8ac962a94b 100644 --- a/src/core/load-fragment.h +++ b/src/core/load-fragment.h @@ -114,6 +114,7 @@ CONFIG_PARSER_PROTOTYPE(config_parse_namespace_path_strv); CONFIG_PARSER_PROTOTYPE(config_parse_temporary_filesystems); CONFIG_PARSER_PROTOTYPE(config_parse_private_tmp); CONFIG_PARSER_PROTOTYPE(config_parse_private_users); +CONFIG_PARSER_PROTOTYPE(config_parse_private_pids); CONFIG_PARSER_PROTOTYPE(config_parse_protect_control_groups); CONFIG_PARSER_PROTOTYPE(config_parse_cpu_quota); CONFIG_PARSER_PROTOTYPE(config_parse_allowed_cpuset); diff --git a/src/core/manager.c b/src/core/manager.c index f58bc547a6..296d7416b1 100644 --- a/src/core/manager.c +++ b/src/core/manager.c @@ -126,6 +126,7 @@ static int manager_dispatch_time_change_fd(sd_event_source *source, int fd, uint static int manager_dispatch_idle_pipe_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata); static int manager_dispatch_user_lookup_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata); static int manager_dispatch_handoff_timestamp_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata); +static int manager_dispatch_pidref_transport_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata); static int manager_dispatch_jobs_in_progress(sd_event_source *source, usec_t usec, void *userdata); static int manager_dispatch_run_queue(sd_event_source *source, void *userdata); static int manager_dispatch_sigchld(sd_event_source *source, void *userdata); @@ -913,6 +914,7 @@ int manager_new(RuntimeScope runtime_scope, ManagerTestRunFlags test_run_flags, .signal_fd = -EBADF, .user_lookup_fds = EBADF_PAIR, .handoff_timestamp_fds = EBADF_PAIR, + .pidref_transport_fds = EBADF_PAIR, .private_listen_fd = -EBADF, .dev_autofs_fd = -EBADF, .cgroup_inotify_fd = -EBADF, @@ -1309,6 +1311,55 @@ static int manager_setup_handoff_timestamp_fd(Manager *m) { return 0; } +static int manager_setup_pidref_transport_fd(Manager *m) { + int r; + + assert(m); + + /* Set up the socket pair used for passing parent and child pidrefs back when the executor unshares + * a PID namespace and forks again when using PrivatePIDs=yes. */ + + if (m->pidref_transport_fds[0] < 0) { + m->pidref_event_source = sd_event_source_disable_unref(m->pidref_event_source); + safe_close_pair(m->pidref_transport_fds); + + if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, m->pidref_transport_fds) < 0) + return log_error_errno(errno, "Failed to allocate pidref socket: %m"); + + /* Make sure children never have to block */ + (void) fd_increase_rxbuf(m->pidref_transport_fds[0], MANAGER_SOCKET_RCVBUF_SIZE); + + r = setsockopt_int(m->pidref_transport_fds[0], SOL_SOCKET, SO_PASSCRED, true); + if (r < 0) + return log_error_errno(r, "Failed to enable SO_PASSCRED for pidref socket: %m"); + + r = setsockopt_int(m->pidref_transport_fds[0], SOL_SOCKET, SO_PASSPIDFD, true); + if (ERRNO_IS_NEG_NOT_SUPPORTED(r)) + log_debug("SO_PASSPIDFD is not supported for pidref socket, ignoring."); + else if (r < 0) + log_warning_errno(r, "Failed to enable SO_PASSPIDFD for pidref socket, ignoring: %m"); + + /* Mark the receiving socket as O_NONBLOCK (but leave sending side as-is) */ + r = fd_nonblock(m->pidref_transport_fds[0], true); + if (r < 0) + return log_error_errno(r, "Failed to make pidref socket O_NONBLOCK: %m"); + } + + if (!m->pidref_event_source) { + r = sd_event_add_io(m->event, &m->pidref_event_source, m->pidref_transport_fds[0], EPOLLIN, manager_dispatch_pidref_transport_fd, m); + if (r < 0) + return log_error_errno(r, "Failed to allocate pidref event source: %m"); + + r = sd_event_source_set_priority(m->pidref_event_source, EVENT_PRIORITY_PIDREF); + if (r < 0) + return log_error_errno(r, "Failed to set priority of pidref event source: %m"); + + (void) sd_event_source_set_description(m->pidref_event_source, "pidref"); + } + + return 0; +} + static unsigned manager_dispatch_cleanup_queue(Manager *m) { Unit *u; unsigned n = 0; @@ -1724,6 +1775,7 @@ Manager* manager_free(Manager *m) { sd_event_source_unref(m->run_queue_event_source); sd_event_source_unref(m->user_lookup_event_source); sd_event_source_unref(m->handoff_timestamp_event_source); + sd_event_source_unref(m->pidref_event_source); sd_event_source_unref(m->memory_pressure_event_source); safe_close(m->signal_fd); @@ -1731,6 +1783,7 @@ Manager* manager_free(Manager *m) { safe_close(m->cgroups_agent_fd); safe_close_pair(m->user_lookup_fds); safe_close_pair(m->handoff_timestamp_fds); + safe_close_pair(m->pidref_transport_fds); manager_close_ask_password(m); @@ -2077,6 +2130,11 @@ int manager_startup(Manager *m, FILE *serialization, FDSet *fds, const char *roo /* This shouldn't fail, except if things are really broken. */ return r; + r = manager_setup_pidref_transport_fd(m); + if (r < 0) + /* This shouldn't fail, except if things are really broken. */ + return r; + /* Connect to the bus if we are good for it */ manager_setup_bus(m); @@ -3747,6 +3805,7 @@ int manager_reload(Manager *m) { (void) manager_setup_cgroups_agent(m); (void) manager_setup_user_lookup_fd(m); (void) manager_setup_handoff_timestamp_fd(m); + (void) manager_setup_pidref_transport_fd(m); /* Third, fire things up! */ manager_coldplug(m); @@ -5002,6 +5061,142 @@ static int manager_dispatch_handoff_timestamp_fd(sd_event_source *source, int fd return 0; } +static int manager_dispatch_pidref_transport_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata) { + Manager *m = ASSERT_PTR(userdata); + _cleanup_(pidref_done) PidRef child_pidref = PIDREF_NULL, parent_pidref = PIDREF_NULL; + _cleanup_close_ int child_pidfd = -EBADF, parent_pidfd = -EBADF; + struct ucred *ucred = NULL; + CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(struct ucred)) + CMSG_SPACE(sizeof(int)) * 2) control; + pid_t child_pid; + struct msghdr msghdr = { + .msg_iov = &IOVEC_MAKE(&child_pid, sizeof(child_pid)), + .msg_iovlen = 1, + .msg_control = &control, + .msg_controllen = sizeof(control), + }; + struct cmsghdr *cmsg; + ssize_t n; + int r; + + assert(source); + + /* Server expects: + * - Parent PID in ucreds enabled via SO_PASSCRED + * - Parent PIDFD in SCM_PIDFD message enabled via SO_PASSPIDFD + * - Child PIDFD in SCM_RIGHTS in message body + * - Child PID in message IOV + * + * SO_PASSPIDFD may not be supported by the kernel so we fall back to using parent PID from ucreds + * and accept some raciness. */ + n = recvmsg_safe(m->pidref_transport_fds[0], &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC|MSG_TRUNC); + if (ERRNO_IS_NEG_TRANSIENT(n)) + return 0; /* Spurious wakeup, try again */ + if (n == -ECHRNG) { + log_warning_errno(n, "Got message with truncated control data (unexpected fds sent?), ignoring."); + return 0; + } + if (n == -EXFULL) { + log_warning_errno(n, "Got message with truncated payload data, ignoring."); + return 0; + } + if (n < 0) + return log_error_errno(n, "Failed to receive pidref message: %m"); + + if (n != sizeof(child_pid)) { + log_warning("Got pidref message of unexpected size %zi (expected %zu), ignoring.", n, sizeof(child_pid)); + return 0; + } + + CMSG_FOREACH(cmsg, &msghdr) { + if (cmsg->cmsg_level != SOL_SOCKET) + continue; + + if (cmsg->cmsg_type == SCM_CREDENTIALS && cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred))) { + assert(!ucred); + ucred = CMSG_TYPED_DATA(cmsg, struct ucred); + } else if (cmsg->cmsg_type == SCM_PIDFD) { + assert(parent_pidfd < 0); + parent_pidfd = *CMSG_TYPED_DATA(cmsg, int); + } else if (cmsg->cmsg_type == SCM_RIGHTS) { + assert(child_pidfd < 0); + child_pidfd = *CMSG_TYPED_DATA(cmsg, int); + } + } + + /* Verify and set parent pidref. */ + if (!ucred || !pid_is_valid(ucred->pid)) { + log_warning("Received pidref message without valid credentials. Ignoring."); + return 0; + } + + /* Need to handle kernels without SO_PASSPIDFD where SCM_PIDFD will not be set. */ + if (parent_pidfd >= 0) + r = pidref_set_pidfd_consume(&parent_pidref, TAKE_FD(parent_pidfd)); + else + r = pidref_set_pid(&parent_pidref, ucred->pid); + if (r < 0) { + if (r == -ESRCH) + log_debug_errno(r, "PidRef child process died before message is processed. Ignoring."); + else + log_warning_errno(r, "Failed to pin pidref child process, ignoring message: %m"); + return 0; + } + + if (parent_pidref.pid != ucred->pid) { + assert(parent_pidref.fd >= 0); + log_warning("Got SCM_PIDFD for parent process " PID_FMT " but got SCM_CREDENTIALS for parent process " PID_FMT ". Ignoring.", + parent_pidref.pid, ucred->pid); + return 0; + } + + /* Verify and set child pidref. */ + if (!pid_is_valid(child_pid)) { + log_warning("Received pidref message without valid child PID. Ignoring."); + return 0; + } + + /* Need to handle kernels without PIDFD support. */ + if (child_pidfd >= 0) + r = pidref_set_pidfd_consume(&child_pidref, TAKE_FD(child_pidfd)); + else + r = pidref_set_pid(&child_pidref, child_pid); + if (r < 0) { + if (r == -ESRCH) + log_debug_errno(r, "PidRef child process died before message is processed. Ignoring."); + else + log_warning_errno(r, "Failed to pin pidref child process, ignoring message: %m"); + return 0; + } + + if (child_pidref.pid != child_pid) { + assert(child_pidref.fd >= 0); + log_warning("Got SCM_RIGHTS for child process " PID_FMT " but PID in IOV message is " PID_FMT ". Ignoring.", + child_pidref.pid, child_pid); + return 0; + } + + log_debug("Got pidref event with parent PID " PID_FMT " and child PID " PID_FMT ".", parent_pidref.pid, child_pidref.pid); + + /* Try finding cgroup of parent process. But if parent process exited and we're not using PIDFD, this could return NULL. + * Then fall back to finding cgroup of the child process. */ + Unit *u = manager_get_unit_by_pidref_cgroup(m, &parent_pidref); + if (!u) + u = manager_get_unit_by_pidref_cgroup(m, &child_pidref); + if (!u) { + log_debug("Got pidref for parent process " PID_FMT " and child process " PID_FMT " we are not interested in, ignoring.", parent_pidref.pid, child_pidref.pid); + return 0; + } + + if (!UNIT_VTABLE(u)->notify_pidref) { + log_unit_warning(u, "Received pidref event from unexpected unit type '%s'.", unit_type_to_string(u->type)); + return 0; + } + + UNIT_VTABLE(u)->notify_pidref(u, &parent_pidref, &child_pidref); + + return 0; +} + void manager_ref_console(Manager *m) { assert(m); diff --git a/src/core/manager.h b/src/core/manager.h index c1f7f8c083..e4cada80ff 100644 --- a/src/core/manager.h +++ b/src/core/manager.h @@ -289,6 +289,9 @@ struct Manager { int handoff_timestamp_fds[2]; sd_event_source *handoff_timestamp_event_source; + int pidref_transport_fds[2]; + sd_event_source *pidref_event_source; + RuntimeScope runtime_scope; LookupPaths lookup_paths; @@ -678,12 +681,13 @@ void unit_defaults_done(UnitDefaults *defaults); enum { /* most important … */ - EVENT_PRIORITY_USER_LOOKUP = SD_EVENT_PRIORITY_NORMAL-11, - EVENT_PRIORITY_MOUNT_TABLE = SD_EVENT_PRIORITY_NORMAL-10, - EVENT_PRIORITY_SWAP_TABLE = SD_EVENT_PRIORITY_NORMAL-10, - EVENT_PRIORITY_CGROUP_AGENT = SD_EVENT_PRIORITY_NORMAL-9, /* cgroupv1 */ - EVENT_PRIORITY_CGROUP_INOTIFY = SD_EVENT_PRIORITY_NORMAL-9, /* cgroupv2 */ - EVENT_PRIORITY_CGROUP_OOM = SD_EVENT_PRIORITY_NORMAL-8, + EVENT_PRIORITY_USER_LOOKUP = SD_EVENT_PRIORITY_NORMAL-12, + EVENT_PRIORITY_MOUNT_TABLE = SD_EVENT_PRIORITY_NORMAL-11, + EVENT_PRIORITY_SWAP_TABLE = SD_EVENT_PRIORITY_NORMAL-11, + EVENT_PRIORITY_CGROUP_AGENT = SD_EVENT_PRIORITY_NORMAL-10, /* cgroupv1 */ + EVENT_PRIORITY_CGROUP_INOTIFY = SD_EVENT_PRIORITY_NORMAL-10, /* cgroupv2 */ + EVENT_PRIORITY_CGROUP_OOM = SD_EVENT_PRIORITY_NORMAL-9, + EVENT_PRIORITY_PIDREF = SD_EVENT_PRIORITY_NORMAL-8, EVENT_PRIORITY_HANDOFF_TIMESTAMP = SD_EVENT_PRIORITY_NORMAL-7, EVENT_PRIORITY_EXEC_FD = SD_EVENT_PRIORITY_NORMAL-6, EVENT_PRIORITY_NOTIFY = SD_EVENT_PRIORITY_NORMAL-5, diff --git a/src/core/namespace.c b/src/core/namespace.c index 91c905f2fe..57dbbc4fc7 100644 --- a/src/core/namespace.c +++ b/src/core/namespace.c @@ -2061,7 +2061,8 @@ static bool namespace_parameters_mount_apivfs(const NamespaceParameters *p) { p->protect_control_groups != PROTECT_CONTROL_GROUPS_NO || p->protect_kernel_tunables || p->protect_proc != PROTECT_PROC_DEFAULT || - p->proc_subset != PROC_SUBSET_ALL; + p->proc_subset != PROC_SUBSET_ALL || + p->private_pids != PRIVATE_PIDS_NO; } /* Walk all mount entries and dropping any unused mounts. This affects all @@ -3366,3 +3367,10 @@ static const char* const private_users_table[_PRIVATE_USERS_MAX] = { }; DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(private_users, PrivateUsers, PRIVATE_USERS_SELF); + +static const char* const private_pids_table[_PRIVATE_PIDS_MAX] = { + [PRIVATE_PIDS_NO] = "no", + [PRIVATE_PIDS_YES] = "yes", +}; + +DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(private_pids, PrivatePIDs, PRIVATE_PIDS_YES); diff --git a/src/core/namespace.h b/src/core/namespace.h index 7b6e892cc2..bd48aa31da 100644 --- a/src/core/namespace.h +++ b/src/core/namespace.h @@ -78,6 +78,13 @@ typedef enum ProtectControlGroups { _PROTECT_CONTROL_GROUPS_INVALID = -EINVAL, } ProtectControlGroups; +typedef enum PrivatePIDs { + PRIVATE_PIDS_NO, + PRIVATE_PIDS_YES, + _PRIVATE_PIDS_MAX, + _PRIVATE_PIDS_INVALID = -EINVAL, +} PrivatePIDs; + struct BindMount { char *source; char *destination; @@ -182,6 +189,7 @@ struct NamespaceParameters { ProtectProc protect_proc; ProcSubset proc_subset; PrivateTmp private_tmp; + PrivatePIDs private_pids; }; int setup_namespace(const NamespaceParameters *p, char **reterr_path); @@ -225,6 +233,9 @@ PrivateUsers private_users_from_string(const char *s) _pure_; const char* protect_control_groups_to_string(ProtectControlGroups i) _const_; ProtectControlGroups protect_control_groups_from_string(const char *s) _pure_; +const char* private_pids_to_string(PrivatePIDs i) _const_; +PrivatePIDs private_pids_from_string(const char *s) _pure_; + void bind_mount_free_many(BindMount *b, size_t n); int bind_mount_add(BindMount **b, size_t *n, const BindMount *item); diff --git a/src/core/service.c b/src/core/service.c index 737dc9905a..5b0bb76af2 100644 --- a/src/core/service.c +++ b/src/core/service.c @@ -710,6 +710,9 @@ static int service_verify(Service *s) { if (s->type == SERVICE_DBUS && !s->bus_name) return log_unit_error_errno(UNIT(s), SYNTHETIC_ERRNO(ENOEXEC), "Service is of type D-Bus but no D-Bus service name has been specified. Refusing."); + if (s->type == SERVICE_FORKING && exec_needs_pid_namespace(&s->exec_context)) + return log_unit_error_errno(UNIT(s), SYNTHETIC_ERRNO(ENOEXEC), "Service of Type=forking does not support PrivatePIDs=yes. Refusing."); + if (s->usb_function_descriptors && !s->usb_function_strings) log_unit_warning(UNIT(s), "Service has USBFunctionDescriptors= setting, but no USBFunctionStrings=. Ignoring."); @@ -4733,7 +4736,7 @@ static void service_notify_message( monotonic_usec != USEC_INFINITY && monotonic_usec >= s->reload_begin_usec) /* Note, we don't call service_enter_reload_by_notify() here, because we - * don't need reload propagation nor do we want to restart the time-out. */ + * don't need reload propagation nor do we want to restart the timeout. */ service_set_state(s, SERVICE_RELOAD_NOTIFY); if (s->state == SERVICE_RUNNING) @@ -4908,6 +4911,35 @@ static void service_handoff_timestamp( unit_add_to_dbus_queue(u); } +static void service_notify_pidref(Unit *u, PidRef *parent_pidref, PidRef *child_pidref) { + Service *s = ASSERT_PTR(SERVICE(u)); + int r; + + assert(pidref_is_set(parent_pidref)); + assert(pidref_is_set(child_pidref)); + + if (pidref_equal(&s->main_pid, parent_pidref)) { + r = service_set_main_pidref(s, TAKE_PIDREF(*child_pidref), /* start_timestamp = */ NULL); + if (r < 0) + return (void) log_unit_warning_errno(u, r, "Failed to set new main pid: %m"); + + /* Since the child process is PID 1 in a new PID namespace, it must be exclusive to this unit. */ + r = unit_watch_pidref(u, &s->main_pid, /* exclusive= */ true); + if (r < 0) + log_unit_warning_errno(u, r, "Failed to watch new main PID " PID_FMT ": %m", s->main_pid.pid); + } else if (pidref_equal(&s->control_pid, parent_pidref)) { + service_unwatch_control_pid(s); + s->control_pid = TAKE_PIDREF(*child_pidref); + + r = unit_watch_pidref(u, &s->control_pid, /* exclusive= */ true); + if (r < 0) + log_unit_warning_errno(u, r, "Failed to watch new control PID " PID_FMT ": %m", s->control_pid.pid); + } else + return (void) log_unit_debug(u, "Parent process " PID_FMT " does not match main or control processes, ignoring.", parent_pidref->pid); + + unit_add_to_dbus_queue(u); +} + static int service_get_timeout(Unit *u, usec_t *timeout) { Service *s = ASSERT_PTR(SERVICE(u)); uint64_t t; @@ -5638,6 +5670,7 @@ const UnitVTable service_vtable = { .notify_cgroup_oom = service_notify_cgroup_oom_event, .notify_message = service_notify_message, .notify_handoff_timestamp = service_handoff_timestamp, + .notify_pidref = service_notify_pidref, .main_pid = service_main_pid, .control_pid = service_control_pid, diff --git a/src/core/unit.c b/src/core/unit.c index eec08a2fbf..71488a4555 100644 --- a/src/core/unit.c +++ b/src/core/unit.c @@ -4237,6 +4237,9 @@ static int unit_verify_contexts(const Unit *u) { exec_needs_mount_namespace(ec, /* params = */ NULL, /* runtime = */ NULL)) return log_unit_error_errno(u, SYNTHETIC_ERRNO(ENOEXEC), "WorkingDirectory= may not be below /proc/, /sys/ or /dev/ when using mount namespacing. Refusing."); + if (exec_needs_pid_namespace(ec) && !UNIT_VTABLE(u)->notify_pidref) + return log_unit_error_errno(u, SYNTHETIC_ERRNO(ENOEXEC), "PrivatePIDs= setting is only supported for service units. Refusing."); + const KillContext *kc = unit_get_kill_context(u); if (ec->pam_name && kc && !IN_SET(kc->kill_mode, KILL_CONTROL_GROUP, KILL_MIXED)) @@ -5402,6 +5405,8 @@ int unit_set_exec_params(Unit *u, ExecParameters *p) { p->user_lookup_fd = u->manager->user_lookup_fds[1]; p->handoff_timestamp_fd = u->manager->handoff_timestamp_fds[1]; + if (UNIT_VTABLE(u)->notify_pidref) + p->pidref_transport_fd = u->manager->pidref_transport_fds[1]; p->cgroup_id = crt ? crt->cgroup_id : 0; p->invocation_id = u->invocation_id; diff --git a/src/core/unit.h b/src/core/unit.h index 01e1adf961..a8eb366337 100644 --- a/src/core/unit.h +++ b/src/core/unit.h @@ -640,6 +640,9 @@ typedef struct UnitVTable { /* Called whenever we learn a handoff timestamp */ void (*notify_handoff_timestamp)(Unit *u, const struct ucred *ucred, const dual_timestamp *ts); + /* Called whenever we learn about a child process */ + void (*notify_pidref)(Unit *u, PidRef *parent_pidref, PidRef *child_pidref); + /* Called whenever a name this Unit registered for comes or goes away. */ void (*bus_name_owner_change)(Unit *u, const char *new_owner); diff --git a/src/cryptsetup/cryptsetup.c b/src/cryptsetup/cryptsetup.c index 0620b1cffb..a415c3e6d7 100644 --- a/src/cryptsetup/cryptsetup.c +++ b/src/cryptsetup/cryptsetup.c @@ -1294,7 +1294,7 @@ static int run_security_device_monitor( assert(event); assert(monitor); - /* Runs the event loop for the device monitor until either something happens, or the time-out is + /* Runs the event loop for the device monitor until either something happens, or the timeout is * hit. */ for (;;) { diff --git a/src/fundamental/chid-fundamental.c b/src/fundamental/chid-fundamental.c new file mode 100644 index 0000000000..55b04fa2ab --- /dev/null +++ b/src/fundamental/chid-fundamental.c @@ -0,0 +1,120 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ + +/* + * Based on Nikita Travkin's dtbloader implementation. + * Copyright (c) 2024 Nikita Travkin <nikita@trvn.ru> + * + * https://github.com/TravMurav/dtbloader/blob/main/src/chid.c + */ + +/* + * Based on Linaro dtbloader implementation. + * Copyright (c) 2019, Linaro. All rights reserved. + * + * https://github.com/aarch64-laptops/edk2/blob/dtbloader-app/EmbeddedPkg/Application/ConfigTableLoader/CHID.c + */ + +#if SD_BOOT +# include "efi-string.h" +# include "util.h" +#else +# include <byteswap.h> +# include <string.h> +# include <uchar.h> +# include <utf8.h> +#define strsize16(str) ((char16_strlen(str) + 1) * sizeof(char16_t)) +#endif + +#include "chid-fundamental.h" +#include "macro-fundamental.h" +#include "memory-util-fundamental.h" +#include "sha1-fundamental.h" + +static void get_chid(const char16_t *const smbios_fields[static _CHID_SMBIOS_FIELDS_MAX], uint32_t mask, EFI_GUID *ret_chid) { + assert(mask != 0); + assert(ret_chid); + const EFI_GUID namespace = { UINT32_C(0x12d8ff70), UINT16_C(0x7f4c), UINT16_C(0x7d4c), {} }; /* Swapped to BE */ + + struct sha1_ctx ctx = {}; + sha1_init_ctx(&ctx); + + sha1_process_bytes(&namespace, sizeof(namespace), &ctx); + + for (unsigned i = 0; i < _CHID_SMBIOS_FIELDS_MAX; i++) + if ((mask >> i) & 1) { + if (i > 0) + sha1_process_bytes(L"&", 2, &ctx); + sha1_process_bytes(smbios_fields[i], strsize16(smbios_fields[i]), &ctx); + } + + uint8_t hash[SHA1_DIGEST_SIZE]; + sha1_finish_ctx(&ctx, hash); + + assert_cc(sizeof(hash) >= sizeof(*ret_chid)); + memcpy(ret_chid, hash, sizeof(*ret_chid)); + + /* Convert the resulting CHID back to little-endian: */ + ret_chid->Data1 = bswap_32(ret_chid->Data1); + ret_chid->Data2 = bswap_16(ret_chid->Data2); + ret_chid->Data3 = bswap_16(ret_chid->Data3); + + /* set specific bits according to RFC4122 Section 4.1.3 */ + ret_chid->Data3 = (ret_chid->Data3 & 0x0fff) | (5 << 12); + ret_chid->Data4[0] = (ret_chid->Data4[0] & UINT8_C(0x3f)) | UINT8_C(0x80); +} + +static const uint32_t chid_smbios_table[CHID_TYPES_MAX] = { + [3] = (UINT32_C(1) << CHID_SMBIOS_MANUFACTURER) | + (UINT32_C(1) << CHID_SMBIOS_FAMILY) | + (UINT32_C(1) << CHID_SMBIOS_PRODUCT_NAME) | + (UINT32_C(1) << CHID_SMBIOS_PRODUCT_SKU) | + (UINT32_C(1) << CHID_SMBIOS_BASEBOARD_MANUFACTURER) | + (UINT32_C(1) << CHID_SMBIOS_BASEBOARD_PRODUCT), + + [4] = (UINT32_C(1) << CHID_SMBIOS_MANUFACTURER) | + (UINT32_C(1) << CHID_SMBIOS_FAMILY) | + (UINT32_C(1) << CHID_SMBIOS_PRODUCT_NAME) | + (UINT32_C(1) << CHID_SMBIOS_PRODUCT_SKU), + + [5] = (UINT32_C(1) << CHID_SMBIOS_MANUFACTURER) | + (UINT32_C(1) << CHID_SMBIOS_FAMILY) | + (UINT32_C(1) << CHID_SMBIOS_PRODUCT_NAME), + + [6] = (UINT32_C(1) << CHID_SMBIOS_MANUFACTURER) | + (UINT32_C(1) << CHID_SMBIOS_PRODUCT_SKU) | + (UINT32_C(1) << CHID_SMBIOS_BASEBOARD_MANUFACTURER) | + (UINT32_C(1) << CHID_SMBIOS_BASEBOARD_PRODUCT), + + [7] = (UINT32_C(1) << CHID_SMBIOS_MANUFACTURER) | + (UINT32_C(1) << CHID_SMBIOS_PRODUCT_SKU), + + [8] = (UINT32_C(1) << CHID_SMBIOS_MANUFACTURER) | + (UINT32_C(1) << CHID_SMBIOS_PRODUCT_NAME) | + (UINT32_C(1) << CHID_SMBIOS_BASEBOARD_MANUFACTURER) | + (UINT32_C(1) << CHID_SMBIOS_BASEBOARD_PRODUCT), + + [9] = (UINT32_C(1) << CHID_SMBIOS_MANUFACTURER) | + (UINT32_C(1) << CHID_SMBIOS_PRODUCT_NAME), + + [10] = (UINT32_C(1) << CHID_SMBIOS_MANUFACTURER) | + (UINT32_C(1) << CHID_SMBIOS_FAMILY) | + (UINT32_C(1) << CHID_SMBIOS_BASEBOARD_MANUFACTURER) | + (UINT32_C(1) << CHID_SMBIOS_BASEBOARD_PRODUCT), + + [11] = (UINT32_C(1) << CHID_SMBIOS_MANUFACTURER) | + (UINT32_C(1) << CHID_SMBIOS_FAMILY), + + [13] = (UINT32_C(1) << CHID_SMBIOS_MANUFACTURER) | + (UINT32_C(1) << CHID_SMBIOS_BASEBOARD_MANUFACTURER) | + (UINT32_C(1) << CHID_SMBIOS_BASEBOARD_PRODUCT), +}; + +void chid_calculate(const char16_t *const smbios_fields[static _CHID_SMBIOS_FIELDS_MAX], EFI_GUID ret_chids[static CHID_TYPES_MAX]) { + assert(smbios_fields); + assert(ret_chids); + for (size_t i = 0; i < _CHID_SMBIOS_FIELDS_MAX; i++) + if (chid_smbios_table[i] != 0) + get_chid(smbios_fields, chid_smbios_table[i], &ret_chids[i]); + else + memzero(&ret_chids[i], sizeof(EFI_GUID)); +} diff --git a/src/fundamental/chid-fundamental.h b/src/fundamental/chid-fundamental.h new file mode 100644 index 0000000000..e8c5c1add2 --- /dev/null +++ b/src/fundamental/chid-fundamental.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ + +#pragma once + +#include "efi-fundamental.h" +#include "string-util-fundamental.h" + +#define CHID_TYPES_MAX 15 + +typedef enum ChidSmbiosFields { + CHID_SMBIOS_MANUFACTURER, + CHID_SMBIOS_FAMILY, + CHID_SMBIOS_PRODUCT_NAME, + CHID_SMBIOS_PRODUCT_SKU, + CHID_SMBIOS_BASEBOARD_MANUFACTURER, + CHID_SMBIOS_BASEBOARD_PRODUCT, + _CHID_SMBIOS_FIELDS_MAX, +} ChidSmbiosFields; + +/* CHID (also called HWID by fwupd) is described at https://github.com/fwupd/fwupd/blob/main/docs/hwids.md */ +void chid_calculate(const char16_t *const smbios_fields[static _CHID_SMBIOS_FIELDS_MAX], EFI_GUID ret_chids[static CHID_TYPES_MAX]); diff --git a/src/fundamental/meson.build b/src/fundamental/meson.build index b1522a88f8..7b72372e83 100644 --- a/src/fundamental/meson.build +++ b/src/fundamental/meson.build @@ -4,6 +4,7 @@ fundamental_include = include_directories('.') fundamental_sources = files( 'bootspec-fundamental.c', + 'chid-fundamental.c', 'efivars-fundamental.c', 'iovec-util-fundamental.h', 'sha1-fundamental.c', diff --git a/src/fundamental/uki.c b/src/fundamental/uki.c index da5da1cf10..441d466a97 100644 --- a/src/fundamental/uki.c +++ b/src/fundamental/uki.c @@ -21,5 +21,7 @@ const char* const unified_sections[_UNIFIED_SECTION_MAX + 1] = { [UNIFIED_SECTION_PCRSIG] = ".pcrsig", [UNIFIED_SECTION_PCRPKEY] = ".pcrpkey", [UNIFIED_SECTION_PROFILE] = ".profile", + [UNIFIED_SECTION_DTBAUTO] = ".dtbauto", + [UNIFIED_SECTION_HWIDS] = ".hwids", NULL, }; diff --git a/src/fundamental/uki.h b/src/fundamental/uki.h index e7c59100e1..4b6195f9b7 100644 --- a/src/fundamental/uki.h +++ b/src/fundamental/uki.h @@ -18,6 +18,8 @@ typedef enum UnifiedSection { UNIFIED_SECTION_PCRSIG, UNIFIED_SECTION_PCRPKEY, UNIFIED_SECTION_PROFILE, + UNIFIED_SECTION_DTBAUTO, + UNIFIED_SECTION_HWIDS, _UNIFIED_SECTION_MAX, } UnifiedSection; diff --git a/src/libsystemd/sd-event/sd-event.c b/src/libsystemd/sd-event/sd-event.c index f19f579b48..7aea7d2581 100644 --- a/src/libsystemd/sd-event/sd-event.c +++ b/src/libsystemd/sd-event/sd-event.c @@ -4573,7 +4573,7 @@ static int epoll_wait_usec( /* epoll_pwait2() was added to Linux 5.11 (2021-02-14) and to glibc in 2.35 (2022-02-03). In contrast * to other syscalls we don't bother with our own fallback syscall wrappers on old libcs, since this * is not that obvious to implement given the libc and kernel definitions differ in the last - * argument. Moreover, the only reason to use it is the more accurate time-outs (which is not a + * argument. Moreover, the only reason to use it is the more accurate timeouts (which is not a * biggie), let's hence rely on glibc's definitions, and fallback to epoll_pwait() when that's * missing. */ diff --git a/src/network/networkd-json.c b/src/network/networkd-json.c index 07d52c96c0..fd2b709d9d 100644 --- a/src/network/networkd-json.c +++ b/src/network/networkd-json.c @@ -561,18 +561,16 @@ static int dnr_append_json(Link *link, sd_json_variant **v) { return r; n_dnr = sd_dhcp_lease_get_dnr(link->dhcp_lease, &dnr); - if (n_dnr < 0) - return 0; - - FOREACH_ARRAY(res, dnr, n_dnr) { - r = dnr_append_json_one(link, - res, - NETWORK_CONFIG_SOURCE_DHCP4, - &s, - &array); - if (r < 0) - return r; - } + if (n_dnr > 0) + FOREACH_ARRAY(res, dnr, n_dnr) { + r = dnr_append_json_one(link, + res, + NETWORK_CONFIG_SOURCE_DHCP4, + &s, + &array); + if (r < 0) + return r; + } } if (link->dhcp6_lease && link_get_use_dnr(link, NETWORK_CONFIG_SOURCE_DHCP6)) { @@ -585,18 +583,16 @@ static int dnr_append_json(Link *link, sd_json_variant **v) { return r; n_dnr = sd_dhcp6_lease_get_dnr(link->dhcp6_lease, &dnr); - if (n_dnr < 0) - return 0; - - FOREACH_ARRAY(res, dnr, n_dnr) { - r = dnr_append_json_one(link, - res, - NETWORK_CONFIG_SOURCE_DHCP6, - &s, - &array); - if (r < 0) - return r; - } + if (n_dnr > 0) + FOREACH_ARRAY(res, dnr, n_dnr) { + r = dnr_append_json_one(link, + res, + NETWORK_CONFIG_SOURCE_DHCP6, + &s, + &array); + if (r < 0) + return r; + } } if (link_get_use_dnr(link, NETWORK_CONFIG_SOURCE_NDISC)) { diff --git a/src/network/networkd-manager.c b/src/network/networkd-manager.c index f8c0da4b42..47299e3b27 100644 --- a/src/network/networkd-manager.c +++ b/src/network/networkd-manager.c @@ -31,6 +31,7 @@ #include "fs-util.h" #include "initrd-util.h" #include "local-addresses.h" +#include "mount-util.h" #include "netlink-util.h" #include "network-internal.h" #include "networkd-address-label.h" @@ -59,7 +60,6 @@ #include "selinux-util.h" #include "set.h" #include "signal-util.h" -#include "stat-util.h" #include "strv.h" #include "sysctl-util.h" #include "tclass.h" @@ -508,9 +508,11 @@ static int manager_set_keep_configuration(Manager *m) { return 0; } - r = path_is_network_fs("/"); - if (r < 0) - return log_error_errno(r, "Failed to detect if root is network filesystem: %m"); + r = path_is_network_fs_harder("/"); + if (r < 0) { + log_warning_errno(r, "Failed to detect if root is network filesystem, assuming not: %m"); + return 0; + } if (r == 0) { m->keep_configuration = _KEEP_CONFIGURATION_INVALID; return 0; diff --git a/src/network/networkd-ndisc.c b/src/network/networkd-ndisc.c index ee1e09dd69..5ab9c881f2 100644 --- a/src/network/networkd-ndisc.c +++ b/src/network/networkd-ndisc.c @@ -1278,7 +1278,7 @@ static int ndisc_router_process_onlink_prefix(Link *link, sd_ndisc_router *rt) { * * - If the prefix is already present in the host's Prefix List as the result of a previously * received advertisement, reset its invalidation timer to the Valid Lifetime value in the Prefix - * Information option. If the new Lifetime value is zero, time-out the prefix immediately. */ + * Information option. If the new Lifetime value is zero, timeout the prefix immediately. */ if (lifetime_usec == 0) { r = ndisc_remove_route(route, link); if (r < 0) diff --git a/src/network/networkd-state-file.c b/src/network/networkd-state-file.c index 0c9e530128..da917dd897 100644 --- a/src/network/networkd-state-file.c +++ b/src/network/networkd-state-file.c @@ -600,14 +600,16 @@ static void serialize_resolvers( int r; r = sd_dhcp_lease_get_dnr(lease, &resolvers); - if (r < 0) - return (void) log_debug_errno(r, "Failed to get DNR from DHCP lease, ignoring: %m"); + if (r < 0 && r != -ENODATA) + log_warning_errno(r, "Failed to get DNR from DHCP lease, ignoring: %m"); - r = dns_resolvers_to_dot_strv(resolvers, r, &names); - if (r < 0) - return (void) log_warning_errno(r, "Failed to get DoT servers from DHCP DNR, ignoring: %m"); - if (r > 0) - fputstrv(f, names, NULL, space); + if (r > 0) { + r = dns_resolvers_to_dot_strv(resolvers, r, &names); + if (r < 0) + return (void) log_warning_errno(r, "Failed to get DoT servers from DHCP DNR, ignoring: %m"); + if (r > 0) + fputstrv(f, names, NULL, space); + } } if (lease6 && conditional6) { @@ -616,14 +618,16 @@ static void serialize_resolvers( int r; r = sd_dhcp6_lease_get_dnr(lease6, &resolvers); - if (r < 0) - return (void) log_debug_errno(r, "Failed to get DNR from DHCPv6 lease, ignoring: %m"); + if (r < 0 && r != -ENODATA) + log_warning_errno(r, "Failed to get DNR from DHCPv6 lease, ignoring: %m"); - r = dns_resolvers_to_dot_strv(resolvers, r, &names); - if (r < 0) - return (void) log_warning_errno(r, "Failed to get DoT servers from DHCPv6 DNR, ignoring: %m"); - if (r > 0) - fputstrv(f, names, NULL, space); + if (r > 0) { + r = dns_resolvers_to_dot_strv(resolvers, r, &names); + if (r < 0) + return (void) log_warning_errno(r, "Failed to get DoT servers from DHCPv6 DNR, ignoring: %m"); + if (r > 0) + fputstrv(f, names, NULL, space); + } } if (lvalue) diff --git a/src/resolve/resolvectl.c b/src/resolve/resolvectl.c index 0dd2ca5fc7..b34ade1378 100644 --- a/src/resolve/resolvectl.c +++ b/src/resolve/resolvectl.c @@ -3010,7 +3010,7 @@ static int verb_monitor(int argc, char *argv[], void *userdata) { r = sd_varlink_set_relative_timeout(vl, USEC_INFINITY); /* We want the monitor to run basically forever */ if (r < 0) - return log_error_errno(r, "Failed to set varlink time-out: %m"); + return log_error_errno(r, "Failed to set varlink timeout: %m"); r = sd_varlink_attach_event(vl, event, SD_EVENT_PRIORITY_NORMAL); if (r < 0) diff --git a/src/resolve/resolved-dns-scope.c b/src/resolve/resolved-dns-scope.c index 734728f905..cd16d2475e 100644 --- a/src/resolve/resolved-dns-scope.c +++ b/src/resolve/resolved-dns-scope.c @@ -1744,7 +1744,7 @@ int dns_type_suitable_for_protocol(uint16_t type, DnsProtocol protocol) { /* Tests whether it makes sense to route queries for the specified DNS RR types to the specified * protocol. For classic DNS pretty much all RR types are suitable, but for LLMNR/mDNS let's * allowlist only a few that make sense. We use this when routing queries so that we can more quickly - * return errors for queries that will almost certainly fail/time-out otherwise. For example, this + * return errors for queries that will almost certainly fail/time out otherwise. For example, this * ensures that SOA, NS, or DS/DNSKEY queries are never routed to mDNS/LLMNR where they simply make * no sense. */ diff --git a/src/resolve/resolved-dns-stream.h b/src/resolve/resolved-dns-stream.h index 912b9bf431..d3de4ebf3d 100644 --- a/src/resolve/resolved-dns-stream.h +++ b/src/resolve/resolved-dns-stream.h @@ -15,7 +15,7 @@ typedef struct DnsStubListenerExtra DnsStubListenerExtra; #include "resolved-dns-packet.h" #include "resolved-dnstls.h" -/* Various timeouts for establishing TCP connections. First the default time-out for that. */ +/* Various timeouts for establishing TCP connections. First the default timeout for that. */ #define DNS_STREAM_DEFAULT_TIMEOUT_USEC (10 * USEC_PER_SEC) /* In the DNS stub, be more friendly for incoming connections, than we are to ourselves for outgoing ones */ diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c index 90b6f233e2..06bfb90c8f 100644 --- a/src/shared/bus-unit-util.c +++ b/src/shared/bus-unit-util.c @@ -1061,7 +1061,8 @@ static int bus_append_execute_property(sd_bus_message *m, const char *field, con "LogNamespace", "RootImagePolicy", "MountImagePolicy", - "ExtensionImagePolicy")) + "ExtensionImagePolicy", + "PrivatePIDs")) return bus_append_string(m, field, eq); if (STR_IN_SET(field, "IgnoreSIGPIPE", diff --git a/src/shared/mount-util.c b/src/shared/mount-util.c index 3c89a18790..8ef952a035 100644 --- a/src/shared/mount-util.c +++ b/src/shared/mount-util.c @@ -19,6 +19,7 @@ #include "fd-util.h" #include "fileio.h" #include "fs-util.h" +#include "fstab-util.h" #include "glyph-util.h" #include "hashmap.h" #include "initrd-util.h" @@ -1820,3 +1821,70 @@ char* umount_and_unlink_and_free(char *p) { (void) unlink(p); return mfree(p); } + +static int path_get_mount_info( + const char *path, + char **ret_fstype, + char **ret_options) { + + _cleanup_(mnt_free_tablep) struct libmnt_table *table = NULL; + _cleanup_free_ char *fstype = NULL, *options = NULL; + struct libmnt_fs *fs; + int r; + + assert(path); + + table = mnt_new_table(); + if (!table) + return -ENOMEM; + + r = mnt_table_parse_mtab(table, /* filename = */ NULL); + if (r < 0) + return r; + + fs = mnt_table_find_mountpoint(table, path, MNT_ITER_FORWARD); + if (!fs) + return -EINVAL; + + if (ret_fstype) { + fstype = strdup(strempty(mnt_fs_get_fstype(fs))); + if (!fstype) + return -ENOMEM; + } + + if (ret_options) { + options = strdup(strempty(mnt_fs_get_options(fs))); + if (!options) + return -ENOMEM; + } + + if (ret_fstype) + *ret_fstype = TAKE_PTR(fstype); + if (ret_options) + *ret_options = TAKE_PTR(options); + + return 0; +} + +int path_is_network_fs_harder(const char *path) { + _cleanup_free_ char *fstype = NULL, *options = NULL; + int r, ret; + + assert(path); + + ret = path_is_network_fs(path); + if (ret > 0) + return true; + + r = path_get_mount_info(path, &fstype, &options); + if (r < 0) + return RET_GATHER(ret, r); + + if (fstype_is_network(fstype)) + return true; + + if (fstab_test_option(options, "_netdev\0")) + return true; + + return false; +} diff --git a/src/shared/mount-util.h b/src/shared/mount-util.h index eb068d5b44..067ed0e4d9 100644 --- a/src/shared/mount-util.h +++ b/src/shared/mount-util.h @@ -180,3 +180,5 @@ unsigned long credentials_fs_mount_flags(bool ro); int mount_credentials_fs(const char *path, size_t size, bool ro); int make_fsmount(int error_log_level, const char *what, const char *type, unsigned long flags, const char *options, int userns_fd); + +int path_is_network_fs_harder(const char *path); diff --git a/src/test/test-mount-util.c b/src/test/test-mount-util.c index 4f6da39f48..28d171de33 100644 --- a/src/test/test-mount-util.c +++ b/src/test/test-mount-util.c @@ -537,4 +537,11 @@ TEST(bind_mount_submounts) { assert_se(umount_recursive(b, 0) >= 0); } +TEST(path_is_network_fs_harder) { + ASSERT_OK(path_is_network_fs_harder("/")); + ASSERT_OK_ZERO(path_is_network_fs_harder("/dev")); + ASSERT_OK_ZERO(path_is_network_fs_harder("/sys")); + ASSERT_OK_ZERO(path_is_network_fs_harder("/run")); +} + DEFINE_TEST_MAIN(LOG_DEBUG); |