summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/basic/lock-util.c4
-rw-r--r--src/basic/namespace-util.c8
-rw-r--r--src/basic/process-util.c5
-rw-r--r--src/basic/process-util.h27
-rw-r--r--src/boot/efi/chid.c129
-rw-r--r--src/boot/efi/chid.h23
-rw-r--r--src/boot/efi/devicetree.c123
-rw-r--r--src/boot/efi/devicetree.h24
-rw-r--r--src/boot/efi/meson.build1
-rw-r--r--src/boot/efi/pe.c120
-rw-r--r--src/boot/efi/stub.c32
-rw-r--r--src/boot/efi/util.h1
-rw-r--r--src/boot/measure.c7
-rw-r--r--src/core/dbus-execute.c23
-rw-r--r--src/core/exec-invoke.c183
-rw-r--r--src/core/execute-serialize.c20
-rw-r--r--src/core/execute.c11
-rw-r--r--src/core/execute.h4
-rw-r--r--src/core/load-fragment-gperf.gperf.in1
-rw-r--r--src/core/load-fragment.c1
-rw-r--r--src/core/load-fragment.h1
-rw-r--r--src/core/manager.c195
-rw-r--r--src/core/manager.h16
-rw-r--r--src/core/namespace.c10
-rw-r--r--src/core/namespace.h11
-rw-r--r--src/core/service.c35
-rw-r--r--src/core/unit.c5
-rw-r--r--src/core/unit.h3
-rw-r--r--src/cryptsetup/cryptsetup.c2
-rw-r--r--src/fundamental/chid-fundamental.c120
-rw-r--r--src/fundamental/chid-fundamental.h21
-rw-r--r--src/fundamental/meson.build1
-rw-r--r--src/fundamental/uki.c2
-rw-r--r--src/fundamental/uki.h2
-rw-r--r--src/libsystemd/sd-event/sd-event.c2
-rw-r--r--src/network/networkd-json.c44
-rw-r--r--src/network/networkd-manager.c10
-rw-r--r--src/network/networkd-ndisc.c2
-rw-r--r--src/network/networkd-state-file.c32
-rw-r--r--src/resolve/resolvectl.c2
-rw-r--r--src/resolve/resolved-dns-scope.c2
-rw-r--r--src/resolve/resolved-dns-stream.h2
-rw-r--r--src/shared/bus-unit-util.c3
-rw-r--r--src/shared/mount-util.c68
-rw-r--r--src/shared/mount-util.h2
-rw-r--r--src/test/test-mount-util.c7
46 files changed, 1253 insertions, 94 deletions
diff --git a/src/basic/lock-util.c b/src/basic/lock-util.c
index aef395d78e..db9905cb48 100644
--- a/src/basic/lock-util.c
+++ b/src/basic/lock-util.c
@@ -203,9 +203,9 @@ int lock_generic_with_timeout(int fd, LockType type, int operation, usec_t timeo
assert(fd >= 0);
- /* A version of lock_generic(), but with a time-out. We do this in a child process, since the kernel
+ /* A version of lock_generic(), but with a timeout. We do this in a child process, since the kernel
* APIs natively don't support a timeout. We set a SIGALRM timer that will kill the child after the
- * timeout is hit. Returns -ETIMEDOUT if the time-out is hit, and 0 on success.
+ * timeout is hit. Returns -ETIMEDOUT if the timeout is hit, and 0 on success.
*
* This only works for BSD and UNPOSIX locks, as only those are fd-bound, and hence can be acquired
* from any process that has access to the fd. POSIX locks OTOH are process-bound, and hence if we'd
diff --git a/src/basic/namespace-util.c b/src/basic/namespace-util.c
index 16053ff2a9..a80ed32791 100644
--- a/src/basic/namespace-util.c
+++ b/src/basic/namespace-util.c
@@ -527,19 +527,19 @@ int is_idmapping_supported(const char *path) {
return r;
userns_fd = userns_acquire(uid_map, gid_map);
- if (ERRNO_IS_NEG_NOT_SUPPORTED(userns_fd))
+ if (ERRNO_IS_NEG_NOT_SUPPORTED(userns_fd) || ERRNO_IS_NEG_PRIVILEGE(userns_fd))
return false;
if (userns_fd < 0)
return log_debug_errno(userns_fd, "ID-mapping supported namespace acquire failed for '%s' : %m", path);
dir_fd = RET_NERRNO(open(path, O_RDONLY | O_CLOEXEC | O_NOFOLLOW));
- if (ERRNO_IS_NEG_NOT_SUPPORTED(dir_fd) || dir_fd == -EINVAL)
+ if (ERRNO_IS_NEG_NOT_SUPPORTED(dir_fd))
return false;
if (dir_fd < 0)
return log_debug_errno(dir_fd, "ID-mapping supported open failed for '%s' : %m", path);
mount_fd = RET_NERRNO(open_tree(dir_fd, "", AT_EMPTY_PATH | OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC));
- if (ERRNO_IS_NEG_NOT_SUPPORTED(mount_fd) || mount_fd == -EINVAL)
+ if (ERRNO_IS_NEG_NOT_SUPPORTED(mount_fd) || ERRNO_IS_NEG_PRIVILEGE(mount_fd) || mount_fd == -EINVAL)
return false;
if (mount_fd < 0)
return log_debug_errno(mount_fd, "ID-mapping supported open_tree failed for '%s' : %m", path);
@@ -549,7 +549,7 @@ int is_idmapping_supported(const char *path) {
.attr_set = MOUNT_ATTR_IDMAP | MOUNT_ATTR_NOSUID | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RDONLY | MOUNT_ATTR_NODEV,
.userns_fd = userns_fd,
}, sizeof(struct mount_attr)));
- if (ERRNO_IS_NEG_NOT_SUPPORTED(r) || r == -EINVAL || r == -EPERM)
+ if (ERRNO_IS_NEG_NOT_SUPPORTED(r) || ERRNO_IS_NEG_PRIVILEGE(r) || r == -EINVAL)
return false;
if (r < 0)
return log_debug_errno(r, "ID-mapping supported setattr failed for '%s' : %m", path);
diff --git a/src/basic/process-util.c b/src/basic/process-util.c
index a85a1b35f0..75bc65652e 100644
--- a/src/basic/process-util.c
+++ b/src/basic/process-util.c
@@ -1521,11 +1521,12 @@ int safe_fork_full(
}
}
- if ((flags & (FORK_NEW_MOUNTNS|FORK_NEW_USERNS|FORK_NEW_NETNS)) != 0)
+ if ((flags & (FORK_NEW_MOUNTNS|FORK_NEW_USERNS|FORK_NEW_NETNS|FORK_NEW_PIDNS)) != 0)
pid = raw_clone(SIGCHLD|
(FLAGS_SET(flags, FORK_NEW_MOUNTNS) ? CLONE_NEWNS : 0) |
(FLAGS_SET(flags, FORK_NEW_USERNS) ? CLONE_NEWUSER : 0) |
- (FLAGS_SET(flags, FORK_NEW_NETNS) ? CLONE_NEWNET : 0));
+ (FLAGS_SET(flags, FORK_NEW_NETNS) ? CLONE_NEWNET : 0) |
+ (FLAGS_SET(flags, FORK_NEW_PIDNS) ? CLONE_NEWPID : 0));
else
pid = fork();
if (pid < 0)
diff --git a/src/basic/process-util.h b/src/basic/process-util.h
index 05b7a69fc6..cb6d47a5bb 100644
--- a/src/basic/process-util.h
+++ b/src/basic/process-util.h
@@ -166,7 +166,7 @@ int must_be_root(void);
pid_t clone_with_nested_stack(int (*fn)(void *), int flags, void *userdata);
-/* 💣 Note that FORK_NEW_USERNS, FORK_NEW_MOUNTNS, or FORK_NEW_NETNS should not be called in threaded
+/* 💣 Note that FORK_NEW_USERNS, FORK_NEW_MOUNTNS, FORK_NEW_NETNS or FORK_NEW_PIDNS should not be called in threaded
* programs, because they cause us to use raw_clone() which does not synchronize the glibc malloc() locks,
* and thus will cause deadlocks if the parent uses threads and the child does memory allocations. Hence: if
* the parent is threaded these flags may not be used. These flags cannot be used if the parent uses threads
@@ -181,18 +181,19 @@ typedef enum ForkFlags {
FORK_REOPEN_LOG = 1 << 6, /* Reopen log connection */
FORK_LOG = 1 << 7, /* Log above LOG_DEBUG log level about failures */
FORK_WAIT = 1 << 8, /* Wait until child exited */
- FORK_NEW_MOUNTNS = 1 << 9, /* Run child in its own mount namespace 💣 DO NOT USE IN THREADED PROGRAMS! 💣 */
- FORK_MOUNTNS_SLAVE = 1 << 10, /* Make child's mount namespace MS_SLAVE */
- FORK_PRIVATE_TMP = 1 << 11, /* Mount new /tmp/ in the child (combine with FORK_NEW_MOUNTNS!) */
- FORK_RLIMIT_NOFILE_SAFE = 1 << 12, /* Set RLIMIT_NOFILE soft limit to 1K for select() compat */
- FORK_STDOUT_TO_STDERR = 1 << 13, /* Make stdout a copy of stderr */
- FORK_FLUSH_STDIO = 1 << 14, /* fflush() stdout (and stderr) before forking */
- FORK_NEW_USERNS = 1 << 15, /* Run child in its own user namespace 💣 DO NOT USE IN THREADED PROGRAMS! 💣 */
- FORK_CLOEXEC_OFF = 1 << 16, /* In the child: turn off O_CLOEXEC on all fds in except_fds[] */
- FORK_KEEP_NOTIFY_SOCKET = 1 << 17, /* Unless this specified, $NOTIFY_SOCKET will be unset. */
- FORK_DETACH = 1 << 18, /* Double fork if needed to ensure PID1/subreaper is parent */
- FORK_NEW_NETNS = 1 << 19, /* Run child in its own network namespace 💣 DO NOT USE IN THREADED PROGRAMS! 💣 */
- FORK_PACK_FDS = 1 << 20, /* Rearrange the passed FDs to be FD 3,4,5,etc. Updates the array in place (combine with FORK_CLOSE_ALL_FDS!) */
+ FORK_MOUNTNS_SLAVE = 1 << 9, /* Make child's mount namespace MS_SLAVE */
+ FORK_PRIVATE_TMP = 1 << 10, /* Mount new /tmp/ in the child (combine with FORK_NEW_MOUNTNS!) */
+ FORK_RLIMIT_NOFILE_SAFE = 1 << 11, /* Set RLIMIT_NOFILE soft limit to 1K for select() compat */
+ FORK_STDOUT_TO_STDERR = 1 << 12, /* Make stdout a copy of stderr */
+ FORK_FLUSH_STDIO = 1 << 13, /* fflush() stdout (and stderr) before forking */
+ FORK_CLOEXEC_OFF = 1 << 14, /* In the child: turn off O_CLOEXEC on all fds in except_fds[] */
+ FORK_KEEP_NOTIFY_SOCKET = 1 << 15, /* Unless this specified, $NOTIFY_SOCKET will be unset. */
+ FORK_DETACH = 1 << 16, /* Double fork if needed to ensure PID1/subreaper is parent */
+ FORK_PACK_FDS = 1 << 17, /* Rearrange the passed FDs to be FD 3,4,5,etc. Updates the array in place (combine with FORK_CLOSE_ALL_FDS!) */
+ FORK_NEW_MOUNTNS = 1 << 18, /* Run child in its own mount namespace 💣 DO NOT USE IN THREADED PROGRAMS! 💣 */
+ FORK_NEW_USERNS = 1 << 19, /* Run child in its own user namespace 💣 DO NOT USE IN THREADED PROGRAMS! 💣 */
+ FORK_NEW_NETNS = 1 << 20, /* Run child in its own network namespace 💣 DO NOT USE IN THREADED PROGRAMS! 💣 */
+ FORK_NEW_PIDNS = 1 << 21, /* Run child in its own PID namespace 💣 DO NOT USE IN THREADED PROGRAMS! 💣 */
} ForkFlags;
int safe_fork_full(
diff --git a/src/boot/efi/chid.c b/src/boot/efi/chid.c
new file mode 100644
index 0000000000..50d840aea0
--- /dev/null
+++ b/src/boot/efi/chid.c
@@ -0,0 +1,129 @@
+/* SPDX-License-Identifier: BSD-3-Clause */
+
+/*
+ * Based on Nikita Travkin's dtbloader implementation.
+ * Copyright (c) 2024 Nikita Travkin <nikita@trvn.ru>
+ *
+ * https://github.com/TravMurav/dtbloader/blob/main/src/chid.c
+ */
+
+/*
+ * Based on Linaro dtbloader implementation.
+ * Copyright (c) 2019, Linaro. All rights reserved.
+ *
+ * https://github.com/aarch64-laptops/edk2/blob/dtbloader-app/EmbeddedPkg/Application/ConfigTableLoader/CHID.c
+ */
+
+#include "chid.h"
+#include "chid-fundamental.h"
+#include "efi.h"
+#include "sha1-fundamental.h"
+#include "smbios.h"
+#include "util.h"
+
+/**
+ * smbios_to_hashable_string() - Convert ascii smbios string to stripped char16_t.
+ */
+static char16_t *smbios_to_hashable_string(const char *str) {
+ if (!str)
+ /* User of this function is expected to free the result. */
+ return xnew0(char16_t, 1);
+
+ /*
+ * We need to strip leading and trailing spaces, leading zeroes.
+ * See fwupd/libfwupdplugin/fu-hwids-smbios.c
+ */
+ while (*str == ' ')
+ str++;
+
+ while (*str == '0')
+ str++;
+
+ size_t len = strlen8(str);
+
+ while (len > 0 && str[len - 1] == ' ')
+ len--;
+
+ return xstrn8_to_16(str, len);
+}
+
+/* This has to be in a struct due to _cleanup_ in populate_board_chids */
+typedef struct SmbiosInfo {
+ const char16_t *smbios_fields[_CHID_SMBIOS_FIELDS_MAX];
+} SmbiosInfo;
+
+static void smbios_info_populate(SmbiosInfo *ret_info) {
+ static RawSmbiosInfo raw = {};
+ static bool raw_info_populated = false;
+
+ if (!raw_info_populated) {
+ smbios_raw_info_populate(&raw);
+ raw_info_populated = true;
+ }
+
+ ret_info->smbios_fields[CHID_SMBIOS_MANUFACTURER] = smbios_to_hashable_string(raw.manufacturer);
+ ret_info->smbios_fields[CHID_SMBIOS_PRODUCT_NAME] = smbios_to_hashable_string(raw.product_name);
+ ret_info->smbios_fields[CHID_SMBIOS_PRODUCT_SKU] = smbios_to_hashable_string(raw.product_sku);
+ ret_info->smbios_fields[CHID_SMBIOS_FAMILY] = smbios_to_hashable_string(raw.family);
+ ret_info->smbios_fields[CHID_SMBIOS_BASEBOARD_PRODUCT] = smbios_to_hashable_string(raw.baseboard_product);
+ ret_info->smbios_fields[CHID_SMBIOS_BASEBOARD_MANUFACTURER] = smbios_to_hashable_string(raw.baseboard_manufacturer);
+}
+
+static void smbios_info_done(SmbiosInfo *info) {
+ FOREACH_ELEMENT(i, info->smbios_fields)
+ free(i);
+}
+
+static EFI_STATUS populate_board_chids(EFI_GUID ret_chids[static CHID_TYPES_MAX]) {
+ _cleanup_(smbios_info_done) SmbiosInfo info = {};
+
+ if (!ret_chids)
+ return EFI_INVALID_PARAMETER;
+
+ smbios_info_populate(&info);
+ chid_calculate(info.smbios_fields, ret_chids);
+
+ return EFI_SUCCESS;
+}
+
+EFI_STATUS chid_match(const void *hwid_buffer, size_t hwid_length, const Device **ret_device) {
+ EFI_STATUS status;
+
+ if ((uintptr_t) hwid_buffer % alignof(Device) != 0)
+ return EFI_INVALID_PARAMETER;
+
+ const Device *devices = ASSERT_PTR(hwid_buffer);
+
+ EFI_GUID chids[CHID_TYPES_MAX] = {};
+ static const size_t priority[] = { 3, 6, 8, 10, 4, 5, 7, 9, 11 }; /* From most to least specific. */
+
+ status = populate_board_chids(chids);
+ if (EFI_STATUS_IS_ERROR(status))
+ return log_error_status(status, "Failed to populate board CHIDs: %m");
+
+ size_t n_devices = 0;
+
+ /* Count devices and check validity */
+ for (; (n_devices + 1) * sizeof(*devices) < hwid_length;) {
+ if (devices[n_devices].struct_size == 0)
+ break;
+ if (devices[n_devices].struct_size != sizeof(*devices))
+ return EFI_UNSUPPORTED;
+ n_devices++;
+ }
+
+ if (n_devices == 0)
+ return EFI_NOT_FOUND;
+
+ FOREACH_ELEMENT(i, priority)
+ FOREACH_ARRAY(dev, devices, n_devices) {
+ /* Can't take a pointer to a packed struct member, so copy to a local variable */
+ EFI_GUID chid = dev->chid;
+ if (efi_guid_equal(&chids[*i], &chid)) {
+ *ret_device = dev;
+ return EFI_SUCCESS;
+ }
+ }
+
+ return EFI_NOT_FOUND;
+}
diff --git a/src/boot/efi/chid.h b/src/boot/efi/chid.h
new file mode 100644
index 0000000000..ea6e2d348f
--- /dev/null
+++ b/src/boot/efi/chid.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: BSD-3-Clause */
+#pragma once
+
+#include "efi.h"
+
+#include "chid-fundamental.h"
+
+typedef struct Device {
+ uint32_t struct_size; /* = sizeof(struct Device), or 0 for EOL */
+ uint32_t name_offset; /* nul-terminated string or 0 if not present */
+ uint32_t compatible_offset; /* nul-terminated string or 0 if not present */
+ EFI_GUID chid;
+} _packed_ Device;
+
+static inline const char* device_get_name(const void *base, const Device *device) {
+ return device->name_offset == 0 ? NULL : (const char *) ((const uint8_t *) base + device->name_offset);
+}
+
+static inline const char* device_get_compatible(const void *base, const Device *device) {
+ return device->compatible_offset == 0 ? NULL : (const char *) ((const uint8_t *) base + device->compatible_offset);
+}
+
+EFI_STATUS chid_match(const void *chids_buffer, size_t chids_length, const Device **ret_device);
diff --git a/src/boot/efi/devicetree.c b/src/boot/efi/devicetree.c
index 61a43cd77d..f3563f296f 100644
--- a/src/boot/efi/devicetree.c
+++ b/src/boot/efi/devicetree.c
@@ -106,6 +106,129 @@ EFI_STATUS devicetree_install(struct devicetree_state *state, EFI_FILE *root_dir
MAKE_GUID_PTR(EFI_DTB_TABLE), PHYSICAL_ADDRESS_TO_POINTER(state->addr));
}
+static const char* devicetree_get_compatible(const void *dtb) {
+ if ((uintptr_t) dtb % alignof(FdtHeader) != 0)
+ return NULL;
+
+ const FdtHeader *dt_header = ASSERT_PTR(dtb);
+
+ if (be32toh(dt_header->magic) != UINT32_C(0xd00dfeed))
+ return NULL;
+
+ uint32_t dt_size = be32toh(dt_header->total_size);
+ uint32_t struct_off = be32toh(dt_header->off_dt_struct);
+ uint32_t struct_size = be32toh(dt_header->size_dt_struct);
+ uint32_t strings_off = be32toh(dt_header->off_dt_strings);
+ uint32_t strings_size = be32toh(dt_header->size_dt_strings);
+ uint32_t end;
+
+ if (PTR_TO_SIZE(dtb) > SIZE_MAX - dt_size)
+ return NULL;
+
+ if (!ADD_SAFE(&end, strings_off, strings_size) || end > dt_size)
+ return NULL;
+ const char *strings_block = (const char *) ((const uint8_t *) dt_header + strings_off);
+
+ if (struct_off % sizeof(uint32_t) != 0)
+ return NULL;
+ if (struct_size % sizeof(uint32_t) != 0 ||
+ !ADD_SAFE(&end, struct_off, struct_size) ||
+ end > strings_off)
+ return NULL;
+ const uint32_t *cursor = (const uint32_t *) ((const uint8_t *) dt_header + struct_off);
+
+ size_t size_words = struct_size / sizeof(uint32_t);
+ size_t len, name_off, len_words, s;
+
+ for (size_t i = 0; i < end; i++) {
+ switch (be32toh(cursor[i])) {
+ case FDT_BEGIN_NODE:
+ if (i >= size_words || cursor[++i] != 0)
+ return NULL;
+ break;
+ case FDT_NOP:
+ break;
+ case FDT_PROP:
+ /* At least 3 words should present: len, name_off, c (nul-terminated string always has non-zero length) */
+ if (i + 3 >= size_words || cursor[++i] != 0)
+ return NULL;
+ len = be32toh(cursor[++i]);
+ name_off = be32toh(cursor[++i]);
+ len_words = DIV_ROUND_UP(len, sizeof(uint32_t));
+
+ if (ADD_SAFE(&s, name_off, STRLEN("compatible")) &&
+ s < strings_size && streq8(strings_block + name_off, "compatible")) {
+ const char *c = (const char *) &cursor[++i];
+ if (len == 0 || i + len_words > size_words || c[len - 1] != '\0')
+ c = NULL;
+
+ return c;
+ }
+ i += len_words;
+ break;
+ default:
+ return NULL;
+ }
+ }
+
+ return NULL;
+}
+
+bool firmware_devicetree_exists(void) {
+ return !!find_configuration_table(MAKE_GUID_PTR(EFI_DTB_TABLE));
+}
+
+/* This function checks if the firmware provided Devicetree
+ * and a UKI provided Devicetree contain the same first entry
+ * on their respective "compatible" fields (which usually defines
+ * the actual device model). More specifically, given the FW/UKI
+ * "compatible" property pair:
+ *
+ * compatible = "string1", "string2";
+ * compatible = "string1", "string3";
+ *
+ * the function reports a match, while for
+ *
+ * compatible = "string1", "string3";
+ * compatible = "string2", "string1";
+ *
+ * it reports a mismatch.
+ *
+ * Other entries might refer to SoC and therefore can't be used for matching
+ */
+EFI_STATUS devicetree_match(const void *uki_dtb, size_t uki_dtb_length) {
+ const void *fw_dtb = find_configuration_table(MAKE_GUID_PTR(EFI_DTB_TABLE));
+ if (!fw_dtb)
+ return EFI_UNSUPPORTED;
+
+ const char *fw_compat = devicetree_get_compatible(fw_dtb);
+ if (!fw_compat)
+ return EFI_UNSUPPORTED;
+
+ return devicetree_match_by_compatible(uki_dtb, uki_dtb_length, fw_compat);
+}
+
+EFI_STATUS devicetree_match_by_compatible(const void *uki_dtb, size_t uki_dtb_length, const char *compat) {
+ if ((uintptr_t) uki_dtb % alignof(FdtHeader) != 0)
+ return EFI_INVALID_PARAMETER;
+
+ const FdtHeader *dt_header = ASSERT_PTR(uki_dtb);
+
+ if (uki_dtb_length < sizeof(FdtHeader) ||
+ uki_dtb_length < be32toh(dt_header->total_size))
+ return EFI_INVALID_PARAMETER;
+
+ if (!compat)
+ return EFI_INVALID_PARAMETER;
+
+ const char *dt_compat = devicetree_get_compatible(uki_dtb);
+ if (!dt_compat)
+ return EFI_INVALID_PARAMETER;
+
+ /* Only matches the first compatible string from each DT */
+ return streq8(dt_compat, compat) ? EFI_SUCCESS : EFI_NOT_FOUND;
+}
+
EFI_STATUS devicetree_install_from_memory(
struct devicetree_state *state, const void *dtb_buffer, size_t dtb_length) {
diff --git a/src/boot/efi/devicetree.h b/src/boot/efi/devicetree.h
index 33eaa2256c..5f6720f655 100644
--- a/src/boot/efi/devicetree.h
+++ b/src/boot/efi/devicetree.h
@@ -9,6 +9,30 @@ struct devicetree_state {
void *orig;
};
+enum {
+ FDT_BEGIN_NODE = 1,
+ FDT_END_NODE = 2,
+ FDT_PROP = 3,
+ FDT_NOP = 4,
+ FDT_END = 9,
+};
+
+typedef struct FdtHeader {
+ uint32_t magic;
+ uint32_t total_size;
+ uint32_t off_dt_struct;
+ uint32_t off_dt_strings;
+ uint32_t off_mem_rsv_map;
+ uint32_t version;
+ uint32_t last_comp_version;
+ uint32_t boot_cpuid_phys;
+ uint32_t size_dt_strings;
+ uint32_t size_dt_struct;
+} FdtHeader;
+
+bool firmware_devicetree_exists(void);
+EFI_STATUS devicetree_match(const void *uki_dtb, size_t uki_dtb_length);
+EFI_STATUS devicetree_match_by_compatible(const void *uki_dtb, size_t uki_dtb_length, const char *compat);
EFI_STATUS devicetree_install(struct devicetree_state *state, EFI_FILE *root_dir, char16_t *name);
EFI_STATUS devicetree_install_from_memory(
struct devicetree_state *state, const void *dtb_buffer, size_t dtb_length);
diff --git a/src/boot/efi/meson.build b/src/boot/efi/meson.build
index 0109793b7a..29c5455dbd 100644
--- a/src/boot/efi/meson.build
+++ b/src/boot/efi/meson.build
@@ -254,6 +254,7 @@ endif
############################################################
libefi_sources = files(
+ 'chid.c',
'console.c',
'device-path-util.c',
'devicetree.c',
diff --git a/src/boot/efi/pe.c b/src/boot/efi/pe.c
index 26dfcd4291..00739a7c74 100644
--- a/src/boot/efi/pe.c
+++ b/src/boot/efi/pe.c
@@ -1,5 +1,7 @@
/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#include "chid.h"
+#include "devicetree.h"
#include "pe.h"
#include "util.h"
@@ -162,11 +164,46 @@ static bool pe_section_name_equal(const char *a, const char *b) {
return true;
}
-static void pe_locate_sections(
+static bool pe_use_this_dtb(
+ const void *dtb,
+ size_t dtb_size,
+ const void *base,
+ const Device *device,
+ size_t section_nb) {
+
+ assert(dtb);
+
+ EFI_STATUS err;
+
+ err = devicetree_match(dtb, dtb_size);
+ if (err == EFI_SUCCESS)
+ return true;
+ if (err != EFI_UNSUPPORTED)
+ return false;
+
+ /* There's nothing to match against if firmware does not provide DTB and there is no .hwids section */
+ if (!device || !base)
+ return false;
+
+ const char *compatible = device_get_compatible(base, device);
+ if (!compatible)
+ return false;
+
+ err = devicetree_match_by_compatible(dtb, dtb_size, compatible);
+ if (err == EFI_SUCCESS)
+ return true;
+ if (err == EFI_INVALID_PARAMETER)
+ log_error_status(err, "Found bad DT blob in PE section %zu", section_nb);
+ return false;
+}
+
+static void pe_locate_sections_internal(
const PeSectionHeader section_table[],
size_t n_section_table,
const char *const section_names[],
size_t validate_base,
+ const void *device_table,
+ const Device *device,
PeSectionVector sections[]) {
assert(section_table || n_section_table == 0);
@@ -206,6 +243,20 @@ static void pe_locate_sections(
continue;
}
+ /* Special handling for .dtbauto sections compared to plain .dtb */
+ if (pe_section_name_equal(section_names[i], ".dtbauto")) {
+ /* .dtbauto sections require validate_base for matching */
+ if (!validate_base)
+ break;
+ if (!pe_use_this_dtb(
+ (const uint8_t *) SIZE_TO_PTR(validate_base) + j->VirtualAddress,
+ j->VirtualSize,
+ device_table,
+ device,
+ i))
+ continue;
+ }
+
/* At this time, the sizes and offsets have been validated. Store them away */
sections[i] = (PeSectionVector) {
.memory_size = j->VirtualSize,
@@ -224,6 +275,73 @@ static void pe_locate_sections(
}
}
+static bool looking_for_dbauto(const char *const section_names[]) {
+ assert(section_names);
+
+ for (size_t i = 0; section_names[i]; i++)
+ if (pe_section_name_equal(section_names[i], ".dtbauto"))
+ return true;
+ return false;
+}
+
+static void pe_locate_sections(
+ const PeSectionHeader section_table[],
+ size_t n_section_table,
+ const char *const section_names[],
+ size_t validate_base,
+ PeSectionVector sections[]) {
+
+ if (!looking_for_dbauto(section_names))
+ return pe_locate_sections_internal(
+ section_table,
+ n_section_table,
+ section_names,
+ validate_base,
+ /* device_base */ NULL,
+ /* device */ NULL,
+ sections);
+
+ /* It doesn't make sense not to provide validate_base here */
+ assert(validate_base != 0);
+
+ const void *hwids = NULL;
+ const Device *device = NULL;
+
+ if (!firmware_devicetree_exists()) {
+ /* Find HWIDs table and search for the current device */
+ PeSectionVector hwids_section = {};
+
+ pe_locate_sections_internal(
+ section_table,
+ n_section_table,
+ (const char *const[]) { ".hwids", NULL },
+ validate_base,
+ /* device_table */ NULL,
+ /* device */ NULL,
+ &hwids_section);
+
+ if (hwids_section.memory_offset != 0) {
+ hwids = (const uint8_t *) SIZE_TO_PTR(validate_base) + hwids_section.memory_offset;
+
+ EFI_STATUS err = chid_match(hwids, hwids_section.memory_size, &device);
+ if (err != EFI_SUCCESS) {
+ log_error_status(err, "HWID matching failed, no DT blob will be selected: %m");
+ hwids = NULL;
+ }
+ } else
+ log_info("HWIDs section is missing, no DT blob will be selected");
+ }
+
+ return pe_locate_sections_internal(
+ section_table,
+ n_section_table,
+ section_names,
+ validate_base,
+ hwids,
+ device,
+ sections);
+}
+
static uint32_t get_compatibility_entry_address(const DosFileHeader *dos, const PeFileHeader *pe) {
/* The kernel may provide alternative PE entry points for different PE architectures. This allows
* booting a 64-bit kernel on 32-bit EFI that is otherwise running on a 64-bit CPU. The locations of any
diff --git a/src/boot/efi/stub.c b/src/boot/efi/stub.c
index 9664c95d57..7261e942d3 100644
--- a/src/boot/efi/stub.c
+++ b/src/boot/efi/stub.c
@@ -614,12 +614,13 @@ static EFI_STATUS load_addons(
if (err != EFI_SUCCESS ||
(!PE_SECTION_VECTOR_IS_SET(sections + UNIFIED_SECTION_CMDLINE) &&
!PE_SECTION_VECTOR_IS_SET(sections + UNIFIED_SECTION_DTB) &&
+ !PE_SECTION_VECTOR_IS_SET(sections + UNIFIED_SECTION_DTBAUTO) &&
!PE_SECTION_VECTOR_IS_SET(sections + UNIFIED_SECTION_INITRD) &&
!PE_SECTION_VECTOR_IS_SET(sections + UNIFIED_SECTION_UCODE))) {
if (err == EFI_SUCCESS)
err = EFI_NOT_FOUND;
log_error_status(err,
- "Unable to locate embedded .cmdline/.dtb/.initrd/.ucode sections in %ls, ignoring: %m",
+ "Unable to locate embedded .cmdline/.dtb/.dtbauto/.initrd/.ucode sections in %ls, ignoring: %m",
items[i]);
continue;
}
@@ -647,7 +648,21 @@ static EFI_STATUS load_addons(
*cmdline = xasprintf("%ls%ls%ls", strempty(tmp), isempty(tmp) ? u"" : u" ", extra16);
}
- if (devicetree_addons && PE_SECTION_VECTOR_IS_SET(sections + UNIFIED_SECTION_DTB)) {
+ // FIXME: do we want to do something else here?
+ // This should behave exactly as .dtb/.dtbauto in the main UKI
+ if (devicetree_addons && PE_SECTION_VECTOR_IS_SET(sections + UNIFIED_SECTION_DTBAUTO)) {
+ *devicetree_addons = xrealloc(*devicetree_addons,
+ *n_devicetree_addons * sizeof(NamedAddon),
+ (*n_devicetree_addons + 1) * sizeof(NamedAddon));
+
+ (*devicetree_addons)[(*n_devicetree_addons)++] = (NamedAddon) {
+ .blob = {
+ .iov_base = xmemdup((const uint8_t*) loaded_addon->ImageBase + sections[UNIFIED_SECTION_DTBAUTO].memory_offset, sections[UNIFIED_SECTION_DTBAUTO].memory_size),
+ .iov_len = sections[UNIFIED_SECTION_DTBAUTO].memory_size,
+ },
+ .filename = xstrdup16(items[i]),
+ };
+ } else if (devicetree_addons && PE_SECTION_VECTOR_IS_SET(sections + UNIFIED_SECTION_DTB)) {
*devicetree_addons = xrealloc(*devicetree_addons,
*n_devicetree_addons * sizeof(NamedAddon),
(*n_devicetree_addons + 1) * sizeof(NamedAddon));
@@ -968,13 +983,20 @@ static void install_embedded_devicetree(
assert(sections);
assert(dt_state);
- if (!PE_SECTION_VECTOR_IS_SET(sections + UNIFIED_SECTION_DTB))
+ UnifiedSection section = _UNIFIED_SECTION_MAX;
+
+ /* Use automatically selected DT if available, otherwise go for "normal" one */
+ if (PE_SECTION_VECTOR_IS_SET(sections + UNIFIED_SECTION_DTBAUTO))
+ section = UNIFIED_SECTION_DTBAUTO;
+ else if (PE_SECTION_VECTOR_IS_SET(sections + UNIFIED_SECTION_DTB))
+ section = UNIFIED_SECTION_DTB;
+ else
return;
err = devicetree_install_from_memory(
dt_state,
- (const uint8_t*) loaded_image->ImageBase + sections[UNIFIED_SECTION_DTB].memory_offset,
- sections[UNIFIED_SECTION_DTB].memory_size);
+ (const uint8_t*) loaded_image->ImageBase + sections[section].memory_offset,
+ sections[section].memory_size);
if (err != EFI_SUCCESS)
log_error_status(err, "Error loading embedded devicetree, ignoring: %m");
}
diff --git a/src/boot/efi/util.h b/src/boot/efi/util.h
index 054d49ef02..c7634576cf 100644
--- a/src/boot/efi/util.h
+++ b/src/boot/efi/util.h
@@ -69,6 +69,7 @@ static inline void* xmemdup(const void *p, size_t l) {
}
#define xnew(type, n) ((type *) xmalloc_multiply((n), sizeof(type)))
+#define xnew0(type, n) ((type *) xcalloc_multiply((n), sizeof(type)))
bool free_and_xstrdup16(char16_t **p, const char16_t *s);
diff --git a/src/boot/measure.c b/src/boot/measure.c
index 3c409f8bd9..9e6295b9da 100644
--- a/src/boot/measure.c
+++ b/src/boot/measure.c
@@ -103,6 +103,7 @@ static int help(int argc, char *argv[], void *userdata) {
" --sbat=PATH Path to SBAT file %7$s .sbat\n"
" --pcrpkey=PATH Path to public key for PCR signatures %7$s .pcrpkey\n"
" --profile=PATH Path to profile file %7$s .profile\n"
+ " --hwids=PATH Path to HWIDs file %7$s .hwids\n"
"\nSee the %2$s for details.\n",
program_invocation_short_name,
link,
@@ -146,8 +147,10 @@ static int parse_argv(int argc, char *argv[]) {
ARG_SBAT,
_ARG_PCRSIG, /* the .pcrsig section is not input for signing, hence not actually an argument here */
ARG_PCRPKEY,
+ ARG_PROFILE,
+ ARG_HWIDS,
_ARG_SECTION_LAST,
- ARG_PROFILE = _ARG_SECTION_LAST,
+ ARG_DTBAUTO = _ARG_SECTION_LAST,
ARG_BANK,
ARG_PRIVATE_KEY,
ARG_PRIVATE_KEY_SOURCE,
@@ -170,10 +173,12 @@ static int parse_argv(int argc, char *argv[]) {
{ "ucode", required_argument, NULL, ARG_UCODE },
{ "splash", required_argument, NULL, ARG_SPLASH },
{ "dtb", required_argument, NULL, ARG_DTB },
+ { "dtbauto", required_argument, NULL, ARG_DTBAUTO },
{ "uname", required_argument, NULL, ARG_UNAME },
{ "sbat", required_argument, NULL, ARG_SBAT },
{ "pcrpkey", required_argument, NULL, ARG_PCRPKEY },
{ "profile", required_argument, NULL, ARG_PROFILE },
+ { "hwids", required_argument, NULL, ARG_HWIDS },
{ "current", no_argument, NULL, 'c' },
{ "bank", required_argument, NULL, ARG_BANK },
{ "tpm2-device", required_argument, NULL, ARG_TPM2_DEVICE },
diff --git a/src/core/dbus-execute.c b/src/core/dbus-execute.c
index a9a73b599b..e297323f1d 100644
--- a/src/core/dbus-execute.c
+++ b/src/core/dbus-execute.c
@@ -63,6 +63,7 @@ static BUS_DEFINE_PROPERTY_GET_GLOBAL(property_get_empty_string, "s", NULL);
static BUS_DEFINE_PROPERTY_GET_REF(property_get_private_tmp_ex, "s", PrivateTmp, private_tmp_to_string);
static BUS_DEFINE_PROPERTY_GET_REF(property_get_private_users_ex, "s", PrivateUsers, private_users_to_string);
static BUS_DEFINE_PROPERTY_GET_REF(property_get_protect_control_groups_ex, "s", ProtectControlGroups, protect_control_groups_to_string);
+static BUS_DEFINE_PROPERTY_GET_REF(property_get_private_pids, "s", PrivatePIDs, private_pids_to_string);
static BUS_DEFINE_PROPERTY_GET_REF(property_get_syslog_level, "i", int, LOG_PRI);
static BUS_DEFINE_PROPERTY_GET_REF(property_get_syslog_facility, "i", int, LOG_FAC);
static BUS_DEFINE_PROPERTY_GET(property_get_cpu_affinity_from_numa, "b", ExecContext, exec_context_get_cpu_affinity_from_numa);
@@ -1194,6 +1195,7 @@ const sd_bus_vtable bus_exec_vtable[] = {
SD_BUS_PROPERTY("PrivateUsersEx", "s", property_get_private_users_ex, offsetof(ExecContext, private_users), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("PrivateMounts", "b", bus_property_get_tristate, offsetof(ExecContext, private_mounts), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("PrivateIPC", "b", bus_property_get_bool, offsetof(ExecContext, private_ipc), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("PrivatePIDs", "s", property_get_private_pids, offsetof(ExecContext, private_pids), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("ProtectHome", "s", property_get_protect_home, offsetof(ExecContext, protect_home), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("ProtectSystem", "s", property_get_protect_system, offsetof(ExecContext, protect_system), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("SameProcessGroup", "b", bus_property_get_bool, offsetof(ExecContext, same_pgrp), SD_BUS_VTABLE_PROPERTY_CONST),
@@ -1970,6 +1972,27 @@ int bus_exec_context_set_transient_property(
return 1;
}
+ if (streq(name, "PrivatePIDs")) {
+ const char *s;
+ PrivatePIDs t;
+
+ r = sd_bus_message_read(message, "s", &s);
+ if (r < 0)
+ return r;
+
+ t = private_pids_from_string(s);
+ if (t < 0)
+ return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Invalid %s setting: %s", name, s);
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ c->private_pids = t;
+ (void) unit_write_settingf(u, flags, name, "%s=%s",
+ name, private_pids_to_string(c->private_pids));
+ }
+
+ return 1;
+ }
+
if (streq(name, "PrivateDevices"))
return bus_set_transient_bool(u, name, &c->private_devices, message, flags, error);
diff --git a/src/core/exec-invoke.c b/src/core/exec-invoke.c
index 4e70c84374..120067a774 100644
--- a/src/core/exec-invoke.c
+++ b/src/core/exec-invoke.c
@@ -2175,14 +2175,14 @@ static int setup_private_users(PrivateUsers private_users, uid_t ouid, gid_t ogi
fd = open(a, O_WRONLY|O_CLOEXEC);
if (fd < 0) {
if (errno != ENOENT) {
- r = -errno;
+ r = log_debug_errno(errno, "Failed to open %s: %m", a);
goto child_fail;
}
/* If the file is missing the kernel is too old, let's continue anyway. */
} else {
if (write(fd, "deny\n", 5) < 0) {
- r = -errno;
+ r = log_debug_errno(errno, "Failed to write \"deny\" to %s: %m", a);
goto child_fail;
}
@@ -2193,11 +2193,11 @@ static int setup_private_users(PrivateUsers private_users, uid_t ouid, gid_t ogi
a = procfs_file_alloca(ppid, "gid_map");
fd = open(a, O_WRONLY|O_CLOEXEC);
if (fd < 0) {
- r = -errno;
+ r = log_debug_errno(errno, "Failed to open %s: %m", a);
goto child_fail;
}
if (write(fd, gid_map, strlen(gid_map)) < 0) {
- r = -errno;
+ r = log_debug_errno(errno, "Failed to write GID map to %s: %m", a);
goto child_fail;
}
fd = safe_close(fd);
@@ -2206,11 +2206,11 @@ static int setup_private_users(PrivateUsers private_users, uid_t ouid, gid_t ogi
a = procfs_file_alloca(ppid, "uid_map");
fd = open(a, O_WRONLY|O_CLOEXEC);
if (fd < 0) {
- r = -errno;
+ r = log_debug_errno(errno, "Failed to open %s: %m", a);
goto child_fail;
}
if (write(fd, uid_map, strlen(uid_map)) < 0) {
- r = -errno;
+ r = log_debug_errno(errno, "Failed to write UID map to %s: %m", a);
goto child_fail;
}
@@ -2224,7 +2224,7 @@ static int setup_private_users(PrivateUsers private_users, uid_t ouid, gid_t ogi
errno_pipe[1] = safe_close(errno_pipe[1]);
if (unshare(CLONE_NEWUSER) < 0)
- return -errno;
+ return log_debug_errno(errno, "Failed to unshare user namespace: %m");
/* Let the child know that the namespace is ready now */
if (write(unshare_ready_fd, &c, sizeof(c)) < 0)
@@ -2251,6 +2251,130 @@ static int setup_private_users(PrivateUsers private_users, uid_t ouid, gid_t ogi
return 1;
}
+static int can_mount_proc(const ExecContext *c, ExecParameters *p) {
+ _cleanup_close_pair_ int errno_pipe[2] = EBADF_PAIR;
+ _cleanup_(sigkill_waitp) pid_t pid = 0;
+ ssize_t n;
+ int r;
+
+ assert(c);
+ assert(p);
+
+ /* If running via unprivileged user manager and /proc/ is masked (e.g. /proc/kmsg is over-mounted with tmpfs
+ * like systemd-nspawn does), then mounting /proc/ will fail with EPERM. This is due to a kernel restriction
+ * where unprivileged user namespaces cannot mount a less restrictive instance of /proc. */
+
+ /* Create a communication channel so that the child can tell the parent a proper error code in case it
+ * failed. */
+ if (pipe2(errno_pipe, O_CLOEXEC) < 0)
+ return log_exec_debug_errno(c, p, errno, "Failed to create pipe for communicating with child process (sd-proc-check): %m");
+
+ /* Fork a child process into its own mount and PID namespace. Note safe_fork() already remounts / as SLAVE
+ * with FORK_MOUNTNS_SLAVE. */
+ r = safe_fork("(sd-proc-check)",
+ FORK_RESET_SIGNALS|FORK_DEATHSIG_SIGKILL|FORK_NEW_MOUNTNS|FORK_MOUNTNS_SLAVE|FORK_NEW_PIDNS, &pid);
+ if (r < 0)
+ return log_exec_debug_errno(c, p, r, "Failed to fork child process (sd-proc-check): %m");
+ if (r == 0) {
+ errno_pipe[0] = safe_close(errno_pipe[0]);
+
+ /* Try mounting /proc on /dev/shm/. No need to clean up the mount since the mount
+ * namespace will be cleaned up once the process exits. */
+ r = mount_follow_verbose(LOG_DEBUG, "proc", "/dev/shm/", "proc", MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL);
+ if (r < 0) {
+ (void) write(errno_pipe[1], &r, sizeof(r));
+ _exit(EXIT_FAILURE);
+ }
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ errno_pipe[1] = safe_close(errno_pipe[1]);
+
+ /* Try to read an error code from the child */
+ n = read(errno_pipe[0], &r, sizeof(r));
+ if (n < 0)
+ return log_exec_debug_errno(c, p, errno, "Failed to read errno from pipe with child process (sd-proc-check): %m");
+ if (n == sizeof(r)) { /* an error code was sent to us */
+ /* This is the expected case where proc cannot be mounted due to permissions. */
+ if (ERRNO_IS_NEG_PRIVILEGE(r))
+ return 0;
+ if (r < 0)
+ return r;
+
+ return -EIO;
+ }
+ if (n != 0) /* on success we should have read 0 bytes */
+ return -EIO;
+
+ r = wait_for_terminate_and_check("(sd-proc-check)", TAKE_PID(pid), 0 /* flags= */);
+ if (r < 0)
+ return log_exec_debug_errno(c, p, r, "Failed to wait for (sd-proc-check) child process to terminate: %m");
+ if (r != EXIT_SUCCESS) /* If something strange happened with the child, let's consider this fatal, too */
+ return log_exec_debug_errno(c, p, SYNTHETIC_ERRNO(EIO), "Child process (sd-proc-check) exited with unexpected exit status '%d'.", r);
+
+ return 1;
+}
+
+static int setup_private_pids(const ExecContext *c, ExecParameters *p) {
+ _cleanup_(pidref_done) PidRef pidref = PIDREF_NULL;
+ _cleanup_close_pair_ int errno_pipe[2] = EBADF_PAIR;
+ ssize_t n;
+ int r, q;
+
+ assert(c);
+ assert(p);
+ assert(p->pidref_transport_fd >= 0);
+
+ /* The first process created after unsharing a pid namespace becomes PID 1 in the pid namespace, so
+ * we have to fork after unsharing the pid namespace to become PID 1. The parent sends the child
+ * pidref to the manager and exits while the child process continues with the rest of exec_invoke()
+ * and finally executes the actual payload. */
+
+ /* Create a communication channel so that the parent can tell the child a proper error code in case it
+ * failed to send child pidref to the manager. */
+ if (pipe2(errno_pipe, O_CLOEXEC) < 0)
+ return log_exec_debug_errno(c, p, errno, "Failed to create pipe for communicating with parent process: %m");
+
+ r = pidref_safe_fork("(sd-pidns-child)", FORK_NEW_PIDNS, &pidref);
+ if (r < 0)
+ return log_exec_debug_errno(c, p, r, "Failed to fork child into new pid namespace: %m");
+ if (r > 0) {
+ errno_pipe[0] = safe_close(errno_pipe[0]);
+
+ /* In the parent process, we send the child pidref to the manager and exit.
+ * If PIDFD is not supported, only the child PID is sent. The server then
+ * uses the child PID to set the new exec main process. */
+ q = send_one_fd_iov(
+ p->pidref_transport_fd,
+ pidref.fd,
+ &IOVEC_MAKE(&pidref.pid, sizeof(pidref.pid)),
+ /*iovlen=*/ 1,
+ /*flags=*/ 0);
+ /* Send error code to child process. */
+ (void) write(errno_pipe[1], &q, sizeof(q));
+ /* Exit here so we only go through the destructors in exec_invoke only once - in the child - as
+ * some destructors have external effects. The main codepaths continue in the child process. */
+ _exit(q < 0 ? EXIT_FAILURE : EXIT_SUCCESS);
+ }
+
+ errno_pipe[1] = safe_close(errno_pipe[1]);
+ p->pidref_transport_fd = safe_close(p->pidref_transport_fd);
+
+ /* Try to read an error code from the parent. Note a child process cannot wait for the parent so we always
+ * receive an errno even on success. */
+ n = read(errno_pipe[0], &r, sizeof(r));
+ if (n < 0)
+ return log_exec_debug_errno(c, p, errno, "Failed to read errno from pipe with parent process: %m");
+ if (n != sizeof(r))
+ return log_exec_debug_errno(c, p, SYNTHETIC_ERRNO(EIO), "Failed to read enough bytes from pipe with parent process");
+ if (r < 0)
+ return log_exec_debug_errno(c, p, r, "Failed to send child pidref to manager: %m");
+
+ /* NOTE! This function returns in the child process only. */
+ return r;
+}
+
static int create_many_symlinks(const char *root, const char *source, char **symlinks) {
_cleanup_free_ char *src_abs = NULL;
int r;
@@ -3301,6 +3425,7 @@ static int apply_mount_namespace(
.private_dev = needs_sandboxing && context->private_devices,
.private_network = needs_sandboxing && exec_needs_network_namespace(context),
.private_ipc = needs_sandboxing && exec_needs_ipc_namespace(context),
+ .private_pids = needs_sandboxing && exec_needs_pid_namespace(context) ? context->private_pids : PRIVATE_PIDS_NO,
.private_tmp = needs_sandboxing ? context->private_tmp : false,
.mount_apivfs = needs_sandboxing && exec_context_get_effective_mount_apivfs(context),
@@ -3573,7 +3698,7 @@ static int close_remaining_fds(
const int *fds, size_t n_fds) {
size_t n_dont_close = 0;
- int dont_close[n_fds + 16];
+ int dont_close[n_fds + 17];
assert(params);
@@ -3612,6 +3737,9 @@ static int close_remaining_fds(
if (params->handoff_timestamp_fd >= 0)
dont_close[n_dont_close++] = params->handoff_timestamp_fd;
+ if (params->pidref_transport_fd >= 0)
+ dont_close[n_dont_close++] = params->pidref_transport_fd;
+
assert(n_dont_close <= ELEMENTSOF(dont_close));
return close_all_fds(dont_close, n_dont_close);
@@ -3934,6 +4062,7 @@ static bool exec_context_need_unprivileged_private_users(
!strv_isempty(context->extension_directories) ||
context->protect_system != PROTECT_SYSTEM_NO ||
context->protect_home != PROTECT_HOME_NO ||
+ exec_needs_pid_namespace(context) ||
context->protect_kernel_tunables ||
context->protect_kernel_modules ||
context->protect_kernel_logs ||
@@ -4139,6 +4268,7 @@ int exec_invoke(
needs_mount_namespace, /* Do we need to set up a mount namespace for this kernel? */
needs_ambient_hack; /* Do we need to apply the ambient capabilities hack? */
bool keep_seccomp_privileges = false;
+ bool has_cap_sys_admin = false;
#if HAVE_SELINUX
_cleanup_free_ char *mac_selinux_context_net = NULL;
bool use_selinux = false;
@@ -4790,6 +4920,9 @@ int exec_invoke(
uint64_t capability_ambient_set = context->capability_ambient_set;
+ /* Check CAP_SYS_ADMIN before we enter user namespace to see if we can mount /proc even though its masked. */
+ has_cap_sys_admin = have_effective_cap(CAP_SYS_ADMIN) > 0;
+
if (needs_sandboxing) {
/* MAC enablement checks need to be done before a new mount ns is created, as they rely on
* /sys being present. The actual MAC context application will happen later, as late as
@@ -4924,6 +5057,40 @@ int exec_invoke(
}
}
+ /* Unshare a new PID namespace before setting up mounts to ensure /proc/ is mounted with only processes in PID namespace visible.
+ * Note PrivatePIDs=yes implies MountAPIVFS=yes so we'll always ensure procfs is remounted. */
+ if (needs_sandboxing && exec_needs_pid_namespace(context)) {
+ if (params->pidref_transport_fd < 0) {
+ *exit_status = EXIT_NAMESPACE;
+ return log_exec_error_errno(context, params, r, "PidRef socket is not set up: %m");
+ }
+
+ /* If we had CAP_SYS_ADMIN prior to joining the user namespace, then we are privileged and don't need
+ * to check if we can mount /proc/.
+ *
+ * We need to check prior to entering the user namespace because if we're running unprivileged or in a
+ * system without CAP_SYS_ADMIN, then we can have CAP_SYS_ADMIN in the current user namespace but not
+ * once we unshare a mount namespace. */
+ r = has_cap_sys_admin ? 1 : can_mount_proc(context, params);
+ if (r < 0) {
+ *exit_status = EXIT_NAMESPACE;
+ return log_exec_error_errno(context, params, r, "Failed to detect if /proc/ can be remounted: %m");
+ }
+ if (r == 0) {
+ *exit_status = EXIT_NAMESPACE;
+ return log_exec_error_errno(context, params, SYNTHETIC_ERRNO(EPERM),
+ "PrivatePIDs=yes is configured, but /proc/ cannot be re-mounted due to lack of privileges, refusing.");
+ }
+
+ r = setup_private_pids(context, params);
+ if (r < 0) {
+ *exit_status = EXIT_NAMESPACE;
+ return log_exec_error_errno(context, params, r, "Failed to set up pid namespace: %m");
+ }
+ }
+
+ /* If PrivatePIDs= yes is configured, we're now running as pid 1 in a pid namespace! */
+
if (needs_mount_namespace) {
_cleanup_free_ char *error_path = NULL;
diff --git a/src/core/execute-serialize.c b/src/core/execute-serialize.c
index 6fa0b21968..bf6592faed 100644
--- a/src/core/execute-serialize.c
+++ b/src/core/execute-serialize.c
@@ -1391,6 +1391,10 @@ static int exec_parameters_serialize(const ExecParameters *p, const ExecContext
if (r < 0)
return r;
+ r = serialize_fd(f, fds, "exec-parameters-pidref-transport-fd", p->pidref_transport_fd);
+ if (r < 0)
+ return r;
+
if (c && exec_context_restrict_filesystems_set(c)) {
r = serialize_fd(f, fds, "exec-parameters-bpf-outer-map-fd", p->bpf_restrict_fs_map_fd);
if (r < 0)
@@ -1660,6 +1664,14 @@ static int exec_parameters_deserialize(ExecParameters *p, FILE *f, FDSet *fds) {
continue;
close_and_replace(p->handoff_timestamp_fd, fd);
+ } else if ((val = startswith(l, "exec-parameters-pidref-transport-fd="))) {
+ int fd;
+
+ fd = deserialize_fd(fds, val);
+ if (fd < 0)
+ continue;
+
+ close_and_replace(p->pidref_transport_fd, fd);
} else if ((val = startswith(l, "exec-parameters-bpf-outer-map-fd="))) {
int fd;
@@ -1926,6 +1938,10 @@ static int exec_context_serialize(const ExecContext *c, FILE *f) {
if (r < 0)
return r;
+ r = serialize_item(f, "exec-context-private-pids", private_pids_to_string(c->private_pids));
+ if (r < 0)
+ return r;
+
r = serialize_bool_elide(f, "exec-context-remove-ipc", c->remove_ipc);
if (r < 0)
return r;
@@ -2813,6 +2829,10 @@ static int exec_context_deserialize(ExecContext *c, FILE *f) {
if (r < 0)
return r;
c->private_ipc = r;
+ } else if ((val = startswith(l, "exec-context-private-pids="))) {
+ c->private_pids = private_pids_from_string(val);
+ if (c->private_pids < 0)
+ return -EINVAL;
} else if ((val = startswith(l, "exec-context-remove-ipc="))) {
r = parse_boolean(val);
if (r < 0)
diff --git a/src/core/execute.c b/src/core/execute.c
index 1c41b39a2f..2c5a5db10e 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -254,6 +254,12 @@ bool exec_is_cgroup_mount_read_only(const ExecContext *context, const ExecParame
return IN_SET(exec_get_protect_control_groups(context, params), PROTECT_CONTROL_GROUPS_YES, PROTECT_CONTROL_GROUPS_STRICT);
}
+bool exec_needs_pid_namespace(const ExecContext *context) {
+ assert(context);
+
+ return context->private_pids != PRIVATE_PIDS_NO && ns_type_supported(NAMESPACE_PID);
+}
+
bool exec_needs_mount_namespace(
const ExecContext *context,
const ExecParameters *params,
@@ -306,7 +312,8 @@ bool exec_needs_mount_namespace(
exec_needs_cgroup_mount(context, params) ||
context->protect_proc != PROTECT_PROC_DEFAULT ||
context->proc_subset != PROC_SUBSET_ALL ||
- exec_needs_ipc_namespace(context))
+ exec_needs_ipc_namespace(context) ||
+ exec_needs_pid_namespace(context))
return true;
if (context->root_directory) {
@@ -1026,6 +1033,7 @@ void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) {
"%sProtectControlGroups: %s\n"
"%sPrivateNetwork: %s\n"
"%sPrivateUsers: %s\n"
+ "%sPrivatePIDs: %s\n"
"%sProtectHome: %s\n"
"%sProtectSystem: %s\n"
"%sMountAPIVFS: %s\n"
@@ -1052,6 +1060,7 @@ void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) {
prefix, protect_control_groups_to_string(c->protect_control_groups),
prefix, yes_no(c->private_network),
prefix, private_users_to_string(c->private_users),
+ prefix, private_pids_to_string(c->private_pids),
prefix, protect_home_to_string(c->protect_home),
prefix, protect_system_to_string(c->protect_system),
prefix, yes_no(exec_context_get_effective_mount_apivfs(c)),
diff --git a/src/core/execute.h b/src/core/execute.h
index 7274c68d3d..32dabf177f 100644
--- a/src/core/execute.h
+++ b/src/core/execute.h
@@ -335,6 +335,7 @@ struct ExecContext {
ProtectControlGroups protect_control_groups;
ProtectSystem protect_system;
ProtectHome protect_home;
+ PrivatePIDs private_pids;
bool protect_hostname;
bool dynamic_user;
@@ -465,6 +466,7 @@ struct ExecParameters {
char **files_env;
int user_lookup_fd;
int handoff_timestamp_fd;
+ int pidref_transport_fd;
int bpf_restrict_fs_map_fd;
@@ -486,6 +488,7 @@ struct ExecParameters {
.bpf_restrict_fs_map_fd = -EBADF, \
.user_lookup_fd = -EBADF, \
.handoff_timestamp_fd = -EBADF, \
+ .pidref_transport_fd = -EBADF, \
}
#include "unit.h"
@@ -623,6 +626,7 @@ ExecDirectoryType exec_resource_type_from_string(const char *s) _pure_;
bool exec_needs_mount_namespace(const ExecContext *context, const ExecParameters *params, const ExecRuntime *runtime);
bool exec_needs_network_namespace(const ExecContext *context);
bool exec_needs_ipc_namespace(const ExecContext *context);
+bool exec_needs_pid_namespace(const ExecContext *context);
ProtectControlGroups exec_get_protect_control_groups(const ExecContext *context, const ExecParameters *params);
bool exec_needs_cgroup_namespace(const ExecContext *context, const ExecParameters *params);
diff --git a/src/core/load-fragment-gperf.gperf.in b/src/core/load-fragment-gperf.gperf.in
index f5cbb319d7..d7564b3767 100644
--- a/src/core/load-fragment-gperf.gperf.in
+++ b/src/core/load-fragment-gperf.gperf.in
@@ -133,6 +133,7 @@
{{type}}.PrivateUsers, config_parse_private_users, 0, offsetof({{type}}, exec_context.private_users)
{{type}}.PrivateMounts, config_parse_tristate, 0, offsetof({{type}}, exec_context.private_mounts)
{{type}}.PrivateIPC, config_parse_bool, 0, offsetof({{type}}, exec_context.private_ipc)
+{{type}}.PrivatePIDs, config_parse_private_pids, 0, offsetof({{type}}, exec_context.private_pids)
{{type}}.ProtectSystem, config_parse_protect_system, 0, offsetof({{type}}, exec_context.protect_system)
{{type}}.ProtectHome, config_parse_protect_home, 0, offsetof({{type}}, exec_context.protect_home)
{{type}}.MountFlags, config_parse_exec_mount_propagation_flag, 0, offsetof({{type}}, exec_context.mount_propagation_flag)
diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c
index 1d813332b1..f34c930f4e 100644
--- a/src/core/load-fragment.c
+++ b/src/core/load-fragment.c
@@ -135,6 +135,7 @@ DEFINE_CONFIG_PARSE_ENUM(config_parse_protect_proc, protect_proc, ProtectProc);
DEFINE_CONFIG_PARSE_ENUM(config_parse_proc_subset, proc_subset, ProcSubset);
DEFINE_CONFIG_PARSE_ENUM(config_parse_private_tmp, private_tmp, PrivateTmp);
DEFINE_CONFIG_PARSE_ENUM(config_parse_private_users, private_users, PrivateUsers);
+DEFINE_CONFIG_PARSE_ENUM(config_parse_private_pids, private_pids, PrivatePIDs);
DEFINE_CONFIG_PARSE_ENUM(config_parse_protect_control_groups, protect_control_groups, ProtectControlGroups);
DEFINE_CONFIG_PARSE_ENUM(config_parse_exec_utmp_mode, exec_utmp_mode, ExecUtmpMode);
DEFINE_CONFIG_PARSE_ENUM(config_parse_job_mode, job_mode, JobMode);
diff --git a/src/core/load-fragment.h b/src/core/load-fragment.h
index 9b95f0c24e..8ac962a94b 100644
--- a/src/core/load-fragment.h
+++ b/src/core/load-fragment.h
@@ -114,6 +114,7 @@ CONFIG_PARSER_PROTOTYPE(config_parse_namespace_path_strv);
CONFIG_PARSER_PROTOTYPE(config_parse_temporary_filesystems);
CONFIG_PARSER_PROTOTYPE(config_parse_private_tmp);
CONFIG_PARSER_PROTOTYPE(config_parse_private_users);
+CONFIG_PARSER_PROTOTYPE(config_parse_private_pids);
CONFIG_PARSER_PROTOTYPE(config_parse_protect_control_groups);
CONFIG_PARSER_PROTOTYPE(config_parse_cpu_quota);
CONFIG_PARSER_PROTOTYPE(config_parse_allowed_cpuset);
diff --git a/src/core/manager.c b/src/core/manager.c
index f58bc547a6..296d7416b1 100644
--- a/src/core/manager.c
+++ b/src/core/manager.c
@@ -126,6 +126,7 @@ static int manager_dispatch_time_change_fd(sd_event_source *source, int fd, uint
static int manager_dispatch_idle_pipe_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata);
static int manager_dispatch_user_lookup_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata);
static int manager_dispatch_handoff_timestamp_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata);
+static int manager_dispatch_pidref_transport_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata);
static int manager_dispatch_jobs_in_progress(sd_event_source *source, usec_t usec, void *userdata);
static int manager_dispatch_run_queue(sd_event_source *source, void *userdata);
static int manager_dispatch_sigchld(sd_event_source *source, void *userdata);
@@ -913,6 +914,7 @@ int manager_new(RuntimeScope runtime_scope, ManagerTestRunFlags test_run_flags,
.signal_fd = -EBADF,
.user_lookup_fds = EBADF_PAIR,
.handoff_timestamp_fds = EBADF_PAIR,
+ .pidref_transport_fds = EBADF_PAIR,
.private_listen_fd = -EBADF,
.dev_autofs_fd = -EBADF,
.cgroup_inotify_fd = -EBADF,
@@ -1309,6 +1311,55 @@ static int manager_setup_handoff_timestamp_fd(Manager *m) {
return 0;
}
+static int manager_setup_pidref_transport_fd(Manager *m) {
+ int r;
+
+ assert(m);
+
+ /* Set up the socket pair used for passing parent and child pidrefs back when the executor unshares
+ * a PID namespace and forks again when using PrivatePIDs=yes. */
+
+ if (m->pidref_transport_fds[0] < 0) {
+ m->pidref_event_source = sd_event_source_disable_unref(m->pidref_event_source);
+ safe_close_pair(m->pidref_transport_fds);
+
+ if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, m->pidref_transport_fds) < 0)
+ return log_error_errno(errno, "Failed to allocate pidref socket: %m");
+
+ /* Make sure children never have to block */
+ (void) fd_increase_rxbuf(m->pidref_transport_fds[0], MANAGER_SOCKET_RCVBUF_SIZE);
+
+ r = setsockopt_int(m->pidref_transport_fds[0], SOL_SOCKET, SO_PASSCRED, true);
+ if (r < 0)
+ return log_error_errno(r, "Failed to enable SO_PASSCRED for pidref socket: %m");
+
+ r = setsockopt_int(m->pidref_transport_fds[0], SOL_SOCKET, SO_PASSPIDFD, true);
+ if (ERRNO_IS_NEG_NOT_SUPPORTED(r))
+ log_debug("SO_PASSPIDFD is not supported for pidref socket, ignoring.");
+ else if (r < 0)
+ log_warning_errno(r, "Failed to enable SO_PASSPIDFD for pidref socket, ignoring: %m");
+
+ /* Mark the receiving socket as O_NONBLOCK (but leave sending side as-is) */
+ r = fd_nonblock(m->pidref_transport_fds[0], true);
+ if (r < 0)
+ return log_error_errno(r, "Failed to make pidref socket O_NONBLOCK: %m");
+ }
+
+ if (!m->pidref_event_source) {
+ r = sd_event_add_io(m->event, &m->pidref_event_source, m->pidref_transport_fds[0], EPOLLIN, manager_dispatch_pidref_transport_fd, m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate pidref event source: %m");
+
+ r = sd_event_source_set_priority(m->pidref_event_source, EVENT_PRIORITY_PIDREF);
+ if (r < 0)
+ return log_error_errno(r, "Failed to set priority of pidref event source: %m");
+
+ (void) sd_event_source_set_description(m->pidref_event_source, "pidref");
+ }
+
+ return 0;
+}
+
static unsigned manager_dispatch_cleanup_queue(Manager *m) {
Unit *u;
unsigned n = 0;
@@ -1724,6 +1775,7 @@ Manager* manager_free(Manager *m) {
sd_event_source_unref(m->run_queue_event_source);
sd_event_source_unref(m->user_lookup_event_source);
sd_event_source_unref(m->handoff_timestamp_event_source);
+ sd_event_source_unref(m->pidref_event_source);
sd_event_source_unref(m->memory_pressure_event_source);
safe_close(m->signal_fd);
@@ -1731,6 +1783,7 @@ Manager* manager_free(Manager *m) {
safe_close(m->cgroups_agent_fd);
safe_close_pair(m->user_lookup_fds);
safe_close_pair(m->handoff_timestamp_fds);
+ safe_close_pair(m->pidref_transport_fds);
manager_close_ask_password(m);
@@ -2077,6 +2130,11 @@ int manager_startup(Manager *m, FILE *serialization, FDSet *fds, const char *roo
/* This shouldn't fail, except if things are really broken. */
return r;
+ r = manager_setup_pidref_transport_fd(m);
+ if (r < 0)
+ /* This shouldn't fail, except if things are really broken. */
+ return r;
+
/* Connect to the bus if we are good for it */
manager_setup_bus(m);
@@ -3747,6 +3805,7 @@ int manager_reload(Manager *m) {
(void) manager_setup_cgroups_agent(m);
(void) manager_setup_user_lookup_fd(m);
(void) manager_setup_handoff_timestamp_fd(m);
+ (void) manager_setup_pidref_transport_fd(m);
/* Third, fire things up! */
manager_coldplug(m);
@@ -5002,6 +5061,142 @@ static int manager_dispatch_handoff_timestamp_fd(sd_event_source *source, int fd
return 0;
}
+static int manager_dispatch_pidref_transport_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
+ Manager *m = ASSERT_PTR(userdata);
+ _cleanup_(pidref_done) PidRef child_pidref = PIDREF_NULL, parent_pidref = PIDREF_NULL;
+ _cleanup_close_ int child_pidfd = -EBADF, parent_pidfd = -EBADF;
+ struct ucred *ucred = NULL;
+ CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(struct ucred)) + CMSG_SPACE(sizeof(int)) * 2) control;
+ pid_t child_pid;
+ struct msghdr msghdr = {
+ .msg_iov = &IOVEC_MAKE(&child_pid, sizeof(child_pid)),
+ .msg_iovlen = 1,
+ .msg_control = &control,
+ .msg_controllen = sizeof(control),
+ };
+ struct cmsghdr *cmsg;
+ ssize_t n;
+ int r;
+
+ assert(source);
+
+ /* Server expects:
+ * - Parent PID in ucreds enabled via SO_PASSCRED
+ * - Parent PIDFD in SCM_PIDFD message enabled via SO_PASSPIDFD
+ * - Child PIDFD in SCM_RIGHTS in message body
+ * - Child PID in message IOV
+ *
+ * SO_PASSPIDFD may not be supported by the kernel so we fall back to using parent PID from ucreds
+ * and accept some raciness. */
+ n = recvmsg_safe(m->pidref_transport_fds[0], &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC|MSG_TRUNC);
+ if (ERRNO_IS_NEG_TRANSIENT(n))
+ return 0; /* Spurious wakeup, try again */
+ if (n == -ECHRNG) {
+ log_warning_errno(n, "Got message with truncated control data (unexpected fds sent?), ignoring.");
+ return 0;
+ }
+ if (n == -EXFULL) {
+ log_warning_errno(n, "Got message with truncated payload data, ignoring.");
+ return 0;
+ }
+ if (n < 0)
+ return log_error_errno(n, "Failed to receive pidref message: %m");
+
+ if (n != sizeof(child_pid)) {
+ log_warning("Got pidref message of unexpected size %zi (expected %zu), ignoring.", n, sizeof(child_pid));
+ return 0;
+ }
+
+ CMSG_FOREACH(cmsg, &msghdr) {
+ if (cmsg->cmsg_level != SOL_SOCKET)
+ continue;
+
+ if (cmsg->cmsg_type == SCM_CREDENTIALS && cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred))) {
+ assert(!ucred);
+ ucred = CMSG_TYPED_DATA(cmsg, struct ucred);
+ } else if (cmsg->cmsg_type == SCM_PIDFD) {
+ assert(parent_pidfd < 0);
+ parent_pidfd = *CMSG_TYPED_DATA(cmsg, int);
+ } else if (cmsg->cmsg_type == SCM_RIGHTS) {
+ assert(child_pidfd < 0);
+ child_pidfd = *CMSG_TYPED_DATA(cmsg, int);
+ }
+ }
+
+ /* Verify and set parent pidref. */
+ if (!ucred || !pid_is_valid(ucred->pid)) {
+ log_warning("Received pidref message without valid credentials. Ignoring.");
+ return 0;
+ }
+
+ /* Need to handle kernels without SO_PASSPIDFD where SCM_PIDFD will not be set. */
+ if (parent_pidfd >= 0)
+ r = pidref_set_pidfd_consume(&parent_pidref, TAKE_FD(parent_pidfd));
+ else
+ r = pidref_set_pid(&parent_pidref, ucred->pid);
+ if (r < 0) {
+ if (r == -ESRCH)
+ log_debug_errno(r, "PidRef child process died before message is processed. Ignoring.");
+ else
+ log_warning_errno(r, "Failed to pin pidref child process, ignoring message: %m");
+ return 0;
+ }
+
+ if (parent_pidref.pid != ucred->pid) {
+ assert(parent_pidref.fd >= 0);
+ log_warning("Got SCM_PIDFD for parent process " PID_FMT " but got SCM_CREDENTIALS for parent process " PID_FMT ". Ignoring.",
+ parent_pidref.pid, ucred->pid);
+ return 0;
+ }
+
+ /* Verify and set child pidref. */
+ if (!pid_is_valid(child_pid)) {
+ log_warning("Received pidref message without valid child PID. Ignoring.");
+ return 0;
+ }
+
+ /* Need to handle kernels without PIDFD support. */
+ if (child_pidfd >= 0)
+ r = pidref_set_pidfd_consume(&child_pidref, TAKE_FD(child_pidfd));
+ else
+ r = pidref_set_pid(&child_pidref, child_pid);
+ if (r < 0) {
+ if (r == -ESRCH)
+ log_debug_errno(r, "PidRef child process died before message is processed. Ignoring.");
+ else
+ log_warning_errno(r, "Failed to pin pidref child process, ignoring message: %m");
+ return 0;
+ }
+
+ if (child_pidref.pid != child_pid) {
+ assert(child_pidref.fd >= 0);
+ log_warning("Got SCM_RIGHTS for child process " PID_FMT " but PID in IOV message is " PID_FMT ". Ignoring.",
+ child_pidref.pid, child_pid);
+ return 0;
+ }
+
+ log_debug("Got pidref event with parent PID " PID_FMT " and child PID " PID_FMT ".", parent_pidref.pid, child_pidref.pid);
+
+ /* Try finding cgroup of parent process. But if parent process exited and we're not using PIDFD, this could return NULL.
+ * Then fall back to finding cgroup of the child process. */
+ Unit *u = manager_get_unit_by_pidref_cgroup(m, &parent_pidref);
+ if (!u)
+ u = manager_get_unit_by_pidref_cgroup(m, &child_pidref);
+ if (!u) {
+ log_debug("Got pidref for parent process " PID_FMT " and child process " PID_FMT " we are not interested in, ignoring.", parent_pidref.pid, child_pidref.pid);
+ return 0;
+ }
+
+ if (!UNIT_VTABLE(u)->notify_pidref) {
+ log_unit_warning(u, "Received pidref event from unexpected unit type '%s'.", unit_type_to_string(u->type));
+ return 0;
+ }
+
+ UNIT_VTABLE(u)->notify_pidref(u, &parent_pidref, &child_pidref);
+
+ return 0;
+}
+
void manager_ref_console(Manager *m) {
assert(m);
diff --git a/src/core/manager.h b/src/core/manager.h
index c1f7f8c083..e4cada80ff 100644
--- a/src/core/manager.h
+++ b/src/core/manager.h
@@ -289,6 +289,9 @@ struct Manager {
int handoff_timestamp_fds[2];
sd_event_source *handoff_timestamp_event_source;
+ int pidref_transport_fds[2];
+ sd_event_source *pidref_event_source;
+
RuntimeScope runtime_scope;
LookupPaths lookup_paths;
@@ -678,12 +681,13 @@ void unit_defaults_done(UnitDefaults *defaults);
enum {
/* most important … */
- EVENT_PRIORITY_USER_LOOKUP = SD_EVENT_PRIORITY_NORMAL-11,
- EVENT_PRIORITY_MOUNT_TABLE = SD_EVENT_PRIORITY_NORMAL-10,
- EVENT_PRIORITY_SWAP_TABLE = SD_EVENT_PRIORITY_NORMAL-10,
- EVENT_PRIORITY_CGROUP_AGENT = SD_EVENT_PRIORITY_NORMAL-9, /* cgroupv1 */
- EVENT_PRIORITY_CGROUP_INOTIFY = SD_EVENT_PRIORITY_NORMAL-9, /* cgroupv2 */
- EVENT_PRIORITY_CGROUP_OOM = SD_EVENT_PRIORITY_NORMAL-8,
+ EVENT_PRIORITY_USER_LOOKUP = SD_EVENT_PRIORITY_NORMAL-12,
+ EVENT_PRIORITY_MOUNT_TABLE = SD_EVENT_PRIORITY_NORMAL-11,
+ EVENT_PRIORITY_SWAP_TABLE = SD_EVENT_PRIORITY_NORMAL-11,
+ EVENT_PRIORITY_CGROUP_AGENT = SD_EVENT_PRIORITY_NORMAL-10, /* cgroupv1 */
+ EVENT_PRIORITY_CGROUP_INOTIFY = SD_EVENT_PRIORITY_NORMAL-10, /* cgroupv2 */
+ EVENT_PRIORITY_CGROUP_OOM = SD_EVENT_PRIORITY_NORMAL-9,
+ EVENT_PRIORITY_PIDREF = SD_EVENT_PRIORITY_NORMAL-8,
EVENT_PRIORITY_HANDOFF_TIMESTAMP = SD_EVENT_PRIORITY_NORMAL-7,
EVENT_PRIORITY_EXEC_FD = SD_EVENT_PRIORITY_NORMAL-6,
EVENT_PRIORITY_NOTIFY = SD_EVENT_PRIORITY_NORMAL-5,
diff --git a/src/core/namespace.c b/src/core/namespace.c
index 91c905f2fe..57dbbc4fc7 100644
--- a/src/core/namespace.c
+++ b/src/core/namespace.c
@@ -2061,7 +2061,8 @@ static bool namespace_parameters_mount_apivfs(const NamespaceParameters *p) {
p->protect_control_groups != PROTECT_CONTROL_GROUPS_NO ||
p->protect_kernel_tunables ||
p->protect_proc != PROTECT_PROC_DEFAULT ||
- p->proc_subset != PROC_SUBSET_ALL;
+ p->proc_subset != PROC_SUBSET_ALL ||
+ p->private_pids != PRIVATE_PIDS_NO;
}
/* Walk all mount entries and dropping any unused mounts. This affects all
@@ -3366,3 +3367,10 @@ static const char* const private_users_table[_PRIVATE_USERS_MAX] = {
};
DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(private_users, PrivateUsers, PRIVATE_USERS_SELF);
+
+static const char* const private_pids_table[_PRIVATE_PIDS_MAX] = {
+ [PRIVATE_PIDS_NO] = "no",
+ [PRIVATE_PIDS_YES] = "yes",
+};
+
+DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(private_pids, PrivatePIDs, PRIVATE_PIDS_YES);
diff --git a/src/core/namespace.h b/src/core/namespace.h
index 7b6e892cc2..bd48aa31da 100644
--- a/src/core/namespace.h
+++ b/src/core/namespace.h
@@ -78,6 +78,13 @@ typedef enum ProtectControlGroups {
_PROTECT_CONTROL_GROUPS_INVALID = -EINVAL,
} ProtectControlGroups;
+typedef enum PrivatePIDs {
+ PRIVATE_PIDS_NO,
+ PRIVATE_PIDS_YES,
+ _PRIVATE_PIDS_MAX,
+ _PRIVATE_PIDS_INVALID = -EINVAL,
+} PrivatePIDs;
+
struct BindMount {
char *source;
char *destination;
@@ -182,6 +189,7 @@ struct NamespaceParameters {
ProtectProc protect_proc;
ProcSubset proc_subset;
PrivateTmp private_tmp;
+ PrivatePIDs private_pids;
};
int setup_namespace(const NamespaceParameters *p, char **reterr_path);
@@ -225,6 +233,9 @@ PrivateUsers private_users_from_string(const char *s) _pure_;
const char* protect_control_groups_to_string(ProtectControlGroups i) _const_;
ProtectControlGroups protect_control_groups_from_string(const char *s) _pure_;
+const char* private_pids_to_string(PrivatePIDs i) _const_;
+PrivatePIDs private_pids_from_string(const char *s) _pure_;
+
void bind_mount_free_many(BindMount *b, size_t n);
int bind_mount_add(BindMount **b, size_t *n, const BindMount *item);
diff --git a/src/core/service.c b/src/core/service.c
index 737dc9905a..5b0bb76af2 100644
--- a/src/core/service.c
+++ b/src/core/service.c
@@ -710,6 +710,9 @@ static int service_verify(Service *s) {
if (s->type == SERVICE_DBUS && !s->bus_name)
return log_unit_error_errno(UNIT(s), SYNTHETIC_ERRNO(ENOEXEC), "Service is of type D-Bus but no D-Bus service name has been specified. Refusing.");
+ if (s->type == SERVICE_FORKING && exec_needs_pid_namespace(&s->exec_context))
+ return log_unit_error_errno(UNIT(s), SYNTHETIC_ERRNO(ENOEXEC), "Service of Type=forking does not support PrivatePIDs=yes. Refusing.");
+
if (s->usb_function_descriptors && !s->usb_function_strings)
log_unit_warning(UNIT(s), "Service has USBFunctionDescriptors= setting, but no USBFunctionStrings=. Ignoring.");
@@ -4733,7 +4736,7 @@ static void service_notify_message(
monotonic_usec != USEC_INFINITY &&
monotonic_usec >= s->reload_begin_usec)
/* Note, we don't call service_enter_reload_by_notify() here, because we
- * don't need reload propagation nor do we want to restart the time-out. */
+ * don't need reload propagation nor do we want to restart the timeout. */
service_set_state(s, SERVICE_RELOAD_NOTIFY);
if (s->state == SERVICE_RUNNING)
@@ -4908,6 +4911,35 @@ static void service_handoff_timestamp(
unit_add_to_dbus_queue(u);
}
+static void service_notify_pidref(Unit *u, PidRef *parent_pidref, PidRef *child_pidref) {
+ Service *s = ASSERT_PTR(SERVICE(u));
+ int r;
+
+ assert(pidref_is_set(parent_pidref));
+ assert(pidref_is_set(child_pidref));
+
+ if (pidref_equal(&s->main_pid, parent_pidref)) {
+ r = service_set_main_pidref(s, TAKE_PIDREF(*child_pidref), /* start_timestamp = */ NULL);
+ if (r < 0)
+ return (void) log_unit_warning_errno(u, r, "Failed to set new main pid: %m");
+
+ /* Since the child process is PID 1 in a new PID namespace, it must be exclusive to this unit. */
+ r = unit_watch_pidref(u, &s->main_pid, /* exclusive= */ true);
+ if (r < 0)
+ log_unit_warning_errno(u, r, "Failed to watch new main PID " PID_FMT ": %m", s->main_pid.pid);
+ } else if (pidref_equal(&s->control_pid, parent_pidref)) {
+ service_unwatch_control_pid(s);
+ s->control_pid = TAKE_PIDREF(*child_pidref);
+
+ r = unit_watch_pidref(u, &s->control_pid, /* exclusive= */ true);
+ if (r < 0)
+ log_unit_warning_errno(u, r, "Failed to watch new control PID " PID_FMT ": %m", s->control_pid.pid);
+ } else
+ return (void) log_unit_debug(u, "Parent process " PID_FMT " does not match main or control processes, ignoring.", parent_pidref->pid);
+
+ unit_add_to_dbus_queue(u);
+}
+
static int service_get_timeout(Unit *u, usec_t *timeout) {
Service *s = ASSERT_PTR(SERVICE(u));
uint64_t t;
@@ -5638,6 +5670,7 @@ const UnitVTable service_vtable = {
.notify_cgroup_oom = service_notify_cgroup_oom_event,
.notify_message = service_notify_message,
.notify_handoff_timestamp = service_handoff_timestamp,
+ .notify_pidref = service_notify_pidref,
.main_pid = service_main_pid,
.control_pid = service_control_pid,
diff --git a/src/core/unit.c b/src/core/unit.c
index eec08a2fbf..71488a4555 100644
--- a/src/core/unit.c
+++ b/src/core/unit.c
@@ -4237,6 +4237,9 @@ static int unit_verify_contexts(const Unit *u) {
exec_needs_mount_namespace(ec, /* params = */ NULL, /* runtime = */ NULL))
return log_unit_error_errno(u, SYNTHETIC_ERRNO(ENOEXEC), "WorkingDirectory= may not be below /proc/, /sys/ or /dev/ when using mount namespacing. Refusing.");
+ if (exec_needs_pid_namespace(ec) && !UNIT_VTABLE(u)->notify_pidref)
+ return log_unit_error_errno(u, SYNTHETIC_ERRNO(ENOEXEC), "PrivatePIDs= setting is only supported for service units. Refusing.");
+
const KillContext *kc = unit_get_kill_context(u);
if (ec->pam_name && kc && !IN_SET(kc->kill_mode, KILL_CONTROL_GROUP, KILL_MIXED))
@@ -5402,6 +5405,8 @@ int unit_set_exec_params(Unit *u, ExecParameters *p) {
p->user_lookup_fd = u->manager->user_lookup_fds[1];
p->handoff_timestamp_fd = u->manager->handoff_timestamp_fds[1];
+ if (UNIT_VTABLE(u)->notify_pidref)
+ p->pidref_transport_fd = u->manager->pidref_transport_fds[1];
p->cgroup_id = crt ? crt->cgroup_id : 0;
p->invocation_id = u->invocation_id;
diff --git a/src/core/unit.h b/src/core/unit.h
index 01e1adf961..a8eb366337 100644
--- a/src/core/unit.h
+++ b/src/core/unit.h
@@ -640,6 +640,9 @@ typedef struct UnitVTable {
/* Called whenever we learn a handoff timestamp */
void (*notify_handoff_timestamp)(Unit *u, const struct ucred *ucred, const dual_timestamp *ts);
+ /* Called whenever we learn about a child process */
+ void (*notify_pidref)(Unit *u, PidRef *parent_pidref, PidRef *child_pidref);
+
/* Called whenever a name this Unit registered for comes or goes away. */
void (*bus_name_owner_change)(Unit *u, const char *new_owner);
diff --git a/src/cryptsetup/cryptsetup.c b/src/cryptsetup/cryptsetup.c
index 0620b1cffb..a415c3e6d7 100644
--- a/src/cryptsetup/cryptsetup.c
+++ b/src/cryptsetup/cryptsetup.c
@@ -1294,7 +1294,7 @@ static int run_security_device_monitor(
assert(event);
assert(monitor);
- /* Runs the event loop for the device monitor until either something happens, or the time-out is
+ /* Runs the event loop for the device monitor until either something happens, or the timeout is
* hit. */
for (;;) {
diff --git a/src/fundamental/chid-fundamental.c b/src/fundamental/chid-fundamental.c
new file mode 100644
index 0000000000..55b04fa2ab
--- /dev/null
+++ b/src/fundamental/chid-fundamental.c
@@ -0,0 +1,120 @@
+/* SPDX-License-Identifier: BSD-3-Clause */
+
+/*
+ * Based on Nikita Travkin's dtbloader implementation.
+ * Copyright (c) 2024 Nikita Travkin <nikita@trvn.ru>
+ *
+ * https://github.com/TravMurav/dtbloader/blob/main/src/chid.c
+ */
+
+/*
+ * Based on Linaro dtbloader implementation.
+ * Copyright (c) 2019, Linaro. All rights reserved.
+ *
+ * https://github.com/aarch64-laptops/edk2/blob/dtbloader-app/EmbeddedPkg/Application/ConfigTableLoader/CHID.c
+ */
+
+#if SD_BOOT
+# include "efi-string.h"
+# include "util.h"
+#else
+# include <byteswap.h>
+# include <string.h>
+# include <uchar.h>
+# include <utf8.h>
+#define strsize16(str) ((char16_strlen(str) + 1) * sizeof(char16_t))
+#endif
+
+#include "chid-fundamental.h"
+#include "macro-fundamental.h"
+#include "memory-util-fundamental.h"
+#include "sha1-fundamental.h"
+
+static void get_chid(const char16_t *const smbios_fields[static _CHID_SMBIOS_FIELDS_MAX], uint32_t mask, EFI_GUID *ret_chid) {
+ assert(mask != 0);
+ assert(ret_chid);
+ const EFI_GUID namespace = { UINT32_C(0x12d8ff70), UINT16_C(0x7f4c), UINT16_C(0x7d4c), {} }; /* Swapped to BE */
+
+ struct sha1_ctx ctx = {};
+ sha1_init_ctx(&ctx);
+
+ sha1_process_bytes(&namespace, sizeof(namespace), &ctx);
+
+ for (unsigned i = 0; i < _CHID_SMBIOS_FIELDS_MAX; i++)
+ if ((mask >> i) & 1) {
+ if (i > 0)
+ sha1_process_bytes(L"&", 2, &ctx);
+ sha1_process_bytes(smbios_fields[i], strsize16(smbios_fields[i]), &ctx);
+ }
+
+ uint8_t hash[SHA1_DIGEST_SIZE];
+ sha1_finish_ctx(&ctx, hash);
+
+ assert_cc(sizeof(hash) >= sizeof(*ret_chid));
+ memcpy(ret_chid, hash, sizeof(*ret_chid));
+
+ /* Convert the resulting CHID back to little-endian: */
+ ret_chid->Data1 = bswap_32(ret_chid->Data1);
+ ret_chid->Data2 = bswap_16(ret_chid->Data2);
+ ret_chid->Data3 = bswap_16(ret_chid->Data3);
+
+ /* set specific bits according to RFC4122 Section 4.1.3 */
+ ret_chid->Data3 = (ret_chid->Data3 & 0x0fff) | (5 << 12);
+ ret_chid->Data4[0] = (ret_chid->Data4[0] & UINT8_C(0x3f)) | UINT8_C(0x80);
+}
+
+static const uint32_t chid_smbios_table[CHID_TYPES_MAX] = {
+ [3] = (UINT32_C(1) << CHID_SMBIOS_MANUFACTURER) |
+ (UINT32_C(1) << CHID_SMBIOS_FAMILY) |
+ (UINT32_C(1) << CHID_SMBIOS_PRODUCT_NAME) |
+ (UINT32_C(1) << CHID_SMBIOS_PRODUCT_SKU) |
+ (UINT32_C(1) << CHID_SMBIOS_BASEBOARD_MANUFACTURER) |
+ (UINT32_C(1) << CHID_SMBIOS_BASEBOARD_PRODUCT),
+
+ [4] = (UINT32_C(1) << CHID_SMBIOS_MANUFACTURER) |
+ (UINT32_C(1) << CHID_SMBIOS_FAMILY) |
+ (UINT32_C(1) << CHID_SMBIOS_PRODUCT_NAME) |
+ (UINT32_C(1) << CHID_SMBIOS_PRODUCT_SKU),
+
+ [5] = (UINT32_C(1) << CHID_SMBIOS_MANUFACTURER) |
+ (UINT32_C(1) << CHID_SMBIOS_FAMILY) |
+ (UINT32_C(1) << CHID_SMBIOS_PRODUCT_NAME),
+
+ [6] = (UINT32_C(1) << CHID_SMBIOS_MANUFACTURER) |
+ (UINT32_C(1) << CHID_SMBIOS_PRODUCT_SKU) |
+ (UINT32_C(1) << CHID_SMBIOS_BASEBOARD_MANUFACTURER) |
+ (UINT32_C(1) << CHID_SMBIOS_BASEBOARD_PRODUCT),
+
+ [7] = (UINT32_C(1) << CHID_SMBIOS_MANUFACTURER) |
+ (UINT32_C(1) << CHID_SMBIOS_PRODUCT_SKU),
+
+ [8] = (UINT32_C(1) << CHID_SMBIOS_MANUFACTURER) |
+ (UINT32_C(1) << CHID_SMBIOS_PRODUCT_NAME) |
+ (UINT32_C(1) << CHID_SMBIOS_BASEBOARD_MANUFACTURER) |
+ (UINT32_C(1) << CHID_SMBIOS_BASEBOARD_PRODUCT),
+
+ [9] = (UINT32_C(1) << CHID_SMBIOS_MANUFACTURER) |
+ (UINT32_C(1) << CHID_SMBIOS_PRODUCT_NAME),
+
+ [10] = (UINT32_C(1) << CHID_SMBIOS_MANUFACTURER) |
+ (UINT32_C(1) << CHID_SMBIOS_FAMILY) |
+ (UINT32_C(1) << CHID_SMBIOS_BASEBOARD_MANUFACTURER) |
+ (UINT32_C(1) << CHID_SMBIOS_BASEBOARD_PRODUCT),
+
+ [11] = (UINT32_C(1) << CHID_SMBIOS_MANUFACTURER) |
+ (UINT32_C(1) << CHID_SMBIOS_FAMILY),
+
+ [13] = (UINT32_C(1) << CHID_SMBIOS_MANUFACTURER) |
+ (UINT32_C(1) << CHID_SMBIOS_BASEBOARD_MANUFACTURER) |
+ (UINT32_C(1) << CHID_SMBIOS_BASEBOARD_PRODUCT),
+};
+
+void chid_calculate(const char16_t *const smbios_fields[static _CHID_SMBIOS_FIELDS_MAX], EFI_GUID ret_chids[static CHID_TYPES_MAX]) {
+ assert(smbios_fields);
+ assert(ret_chids);
+ for (size_t i = 0; i < _CHID_SMBIOS_FIELDS_MAX; i++)
+ if (chid_smbios_table[i] != 0)
+ get_chid(smbios_fields, chid_smbios_table[i], &ret_chids[i]);
+ else
+ memzero(&ret_chids[i], sizeof(EFI_GUID));
+}
diff --git a/src/fundamental/chid-fundamental.h b/src/fundamental/chid-fundamental.h
new file mode 100644
index 0000000000..e8c5c1add2
--- /dev/null
+++ b/src/fundamental/chid-fundamental.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: BSD-3-Clause */
+
+#pragma once
+
+#include "efi-fundamental.h"
+#include "string-util-fundamental.h"
+
+#define CHID_TYPES_MAX 15
+
+typedef enum ChidSmbiosFields {
+ CHID_SMBIOS_MANUFACTURER,
+ CHID_SMBIOS_FAMILY,
+ CHID_SMBIOS_PRODUCT_NAME,
+ CHID_SMBIOS_PRODUCT_SKU,
+ CHID_SMBIOS_BASEBOARD_MANUFACTURER,
+ CHID_SMBIOS_BASEBOARD_PRODUCT,
+ _CHID_SMBIOS_FIELDS_MAX,
+} ChidSmbiosFields;
+
+/* CHID (also called HWID by fwupd) is described at https://github.com/fwupd/fwupd/blob/main/docs/hwids.md */
+void chid_calculate(const char16_t *const smbios_fields[static _CHID_SMBIOS_FIELDS_MAX], EFI_GUID ret_chids[static CHID_TYPES_MAX]);
diff --git a/src/fundamental/meson.build b/src/fundamental/meson.build
index b1522a88f8..7b72372e83 100644
--- a/src/fundamental/meson.build
+++ b/src/fundamental/meson.build
@@ -4,6 +4,7 @@ fundamental_include = include_directories('.')
fundamental_sources = files(
'bootspec-fundamental.c',
+ 'chid-fundamental.c',
'efivars-fundamental.c',
'iovec-util-fundamental.h',
'sha1-fundamental.c',
diff --git a/src/fundamental/uki.c b/src/fundamental/uki.c
index da5da1cf10..441d466a97 100644
--- a/src/fundamental/uki.c
+++ b/src/fundamental/uki.c
@@ -21,5 +21,7 @@ const char* const unified_sections[_UNIFIED_SECTION_MAX + 1] = {
[UNIFIED_SECTION_PCRSIG] = ".pcrsig",
[UNIFIED_SECTION_PCRPKEY] = ".pcrpkey",
[UNIFIED_SECTION_PROFILE] = ".profile",
+ [UNIFIED_SECTION_DTBAUTO] = ".dtbauto",
+ [UNIFIED_SECTION_HWIDS] = ".hwids",
NULL,
};
diff --git a/src/fundamental/uki.h b/src/fundamental/uki.h
index e7c59100e1..4b6195f9b7 100644
--- a/src/fundamental/uki.h
+++ b/src/fundamental/uki.h
@@ -18,6 +18,8 @@ typedef enum UnifiedSection {
UNIFIED_SECTION_PCRSIG,
UNIFIED_SECTION_PCRPKEY,
UNIFIED_SECTION_PROFILE,
+ UNIFIED_SECTION_DTBAUTO,
+ UNIFIED_SECTION_HWIDS,
_UNIFIED_SECTION_MAX,
} UnifiedSection;
diff --git a/src/libsystemd/sd-event/sd-event.c b/src/libsystemd/sd-event/sd-event.c
index f19f579b48..7aea7d2581 100644
--- a/src/libsystemd/sd-event/sd-event.c
+++ b/src/libsystemd/sd-event/sd-event.c
@@ -4573,7 +4573,7 @@ static int epoll_wait_usec(
/* epoll_pwait2() was added to Linux 5.11 (2021-02-14) and to glibc in 2.35 (2022-02-03). In contrast
* to other syscalls we don't bother with our own fallback syscall wrappers on old libcs, since this
* is not that obvious to implement given the libc and kernel definitions differ in the last
- * argument. Moreover, the only reason to use it is the more accurate time-outs (which is not a
+ * argument. Moreover, the only reason to use it is the more accurate timeouts (which is not a
* biggie), let's hence rely on glibc's definitions, and fallback to epoll_pwait() when that's
* missing. */
diff --git a/src/network/networkd-json.c b/src/network/networkd-json.c
index 07d52c96c0..fd2b709d9d 100644
--- a/src/network/networkd-json.c
+++ b/src/network/networkd-json.c
@@ -561,18 +561,16 @@ static int dnr_append_json(Link *link, sd_json_variant **v) {
return r;
n_dnr = sd_dhcp_lease_get_dnr(link->dhcp_lease, &dnr);
- if (n_dnr < 0)
- return 0;
-
- FOREACH_ARRAY(res, dnr, n_dnr) {
- r = dnr_append_json_one(link,
- res,
- NETWORK_CONFIG_SOURCE_DHCP4,
- &s,
- &array);
- if (r < 0)
- return r;
- }
+ if (n_dnr > 0)
+ FOREACH_ARRAY(res, dnr, n_dnr) {
+ r = dnr_append_json_one(link,
+ res,
+ NETWORK_CONFIG_SOURCE_DHCP4,
+ &s,
+ &array);
+ if (r < 0)
+ return r;
+ }
}
if (link->dhcp6_lease && link_get_use_dnr(link, NETWORK_CONFIG_SOURCE_DHCP6)) {
@@ -585,18 +583,16 @@ static int dnr_append_json(Link *link, sd_json_variant **v) {
return r;
n_dnr = sd_dhcp6_lease_get_dnr(link->dhcp6_lease, &dnr);
- if (n_dnr < 0)
- return 0;
-
- FOREACH_ARRAY(res, dnr, n_dnr) {
- r = dnr_append_json_one(link,
- res,
- NETWORK_CONFIG_SOURCE_DHCP6,
- &s,
- &array);
- if (r < 0)
- return r;
- }
+ if (n_dnr > 0)
+ FOREACH_ARRAY(res, dnr, n_dnr) {
+ r = dnr_append_json_one(link,
+ res,
+ NETWORK_CONFIG_SOURCE_DHCP6,
+ &s,
+ &array);
+ if (r < 0)
+ return r;
+ }
}
if (link_get_use_dnr(link, NETWORK_CONFIG_SOURCE_NDISC)) {
diff --git a/src/network/networkd-manager.c b/src/network/networkd-manager.c
index f8c0da4b42..47299e3b27 100644
--- a/src/network/networkd-manager.c
+++ b/src/network/networkd-manager.c
@@ -31,6 +31,7 @@
#include "fs-util.h"
#include "initrd-util.h"
#include "local-addresses.h"
+#include "mount-util.h"
#include "netlink-util.h"
#include "network-internal.h"
#include "networkd-address-label.h"
@@ -59,7 +60,6 @@
#include "selinux-util.h"
#include "set.h"
#include "signal-util.h"
-#include "stat-util.h"
#include "strv.h"
#include "sysctl-util.h"
#include "tclass.h"
@@ -508,9 +508,11 @@ static int manager_set_keep_configuration(Manager *m) {
return 0;
}
- r = path_is_network_fs("/");
- if (r < 0)
- return log_error_errno(r, "Failed to detect if root is network filesystem: %m");
+ r = path_is_network_fs_harder("/");
+ if (r < 0) {
+ log_warning_errno(r, "Failed to detect if root is network filesystem, assuming not: %m");
+ return 0;
+ }
if (r == 0) {
m->keep_configuration = _KEEP_CONFIGURATION_INVALID;
return 0;
diff --git a/src/network/networkd-ndisc.c b/src/network/networkd-ndisc.c
index ee1e09dd69..5ab9c881f2 100644
--- a/src/network/networkd-ndisc.c
+++ b/src/network/networkd-ndisc.c
@@ -1278,7 +1278,7 @@ static int ndisc_router_process_onlink_prefix(Link *link, sd_ndisc_router *rt) {
*
* - If the prefix is already present in the host's Prefix List as the result of a previously
* received advertisement, reset its invalidation timer to the Valid Lifetime value in the Prefix
- * Information option. If the new Lifetime value is zero, time-out the prefix immediately. */
+ * Information option. If the new Lifetime value is zero, timeout the prefix immediately. */
if (lifetime_usec == 0) {
r = ndisc_remove_route(route, link);
if (r < 0)
diff --git a/src/network/networkd-state-file.c b/src/network/networkd-state-file.c
index 0c9e530128..da917dd897 100644
--- a/src/network/networkd-state-file.c
+++ b/src/network/networkd-state-file.c
@@ -600,14 +600,16 @@ static void serialize_resolvers(
int r;
r = sd_dhcp_lease_get_dnr(lease, &resolvers);
- if (r < 0)
- return (void) log_debug_errno(r, "Failed to get DNR from DHCP lease, ignoring: %m");
+ if (r < 0 && r != -ENODATA)
+ log_warning_errno(r, "Failed to get DNR from DHCP lease, ignoring: %m");
- r = dns_resolvers_to_dot_strv(resolvers, r, &names);
- if (r < 0)
- return (void) log_warning_errno(r, "Failed to get DoT servers from DHCP DNR, ignoring: %m");
- if (r > 0)
- fputstrv(f, names, NULL, space);
+ if (r > 0) {
+ r = dns_resolvers_to_dot_strv(resolvers, r, &names);
+ if (r < 0)
+ return (void) log_warning_errno(r, "Failed to get DoT servers from DHCP DNR, ignoring: %m");
+ if (r > 0)
+ fputstrv(f, names, NULL, space);
+ }
}
if (lease6 && conditional6) {
@@ -616,14 +618,16 @@ static void serialize_resolvers(
int r;
r = sd_dhcp6_lease_get_dnr(lease6, &resolvers);
- if (r < 0)
- return (void) log_debug_errno(r, "Failed to get DNR from DHCPv6 lease, ignoring: %m");
+ if (r < 0 && r != -ENODATA)
+ log_warning_errno(r, "Failed to get DNR from DHCPv6 lease, ignoring: %m");
- r = dns_resolvers_to_dot_strv(resolvers, r, &names);
- if (r < 0)
- return (void) log_warning_errno(r, "Failed to get DoT servers from DHCPv6 DNR, ignoring: %m");
- if (r > 0)
- fputstrv(f, names, NULL, space);
+ if (r > 0) {
+ r = dns_resolvers_to_dot_strv(resolvers, r, &names);
+ if (r < 0)
+ return (void) log_warning_errno(r, "Failed to get DoT servers from DHCPv6 DNR, ignoring: %m");
+ if (r > 0)
+ fputstrv(f, names, NULL, space);
+ }
}
if (lvalue)
diff --git a/src/resolve/resolvectl.c b/src/resolve/resolvectl.c
index 0dd2ca5fc7..b34ade1378 100644
--- a/src/resolve/resolvectl.c
+++ b/src/resolve/resolvectl.c
@@ -3010,7 +3010,7 @@ static int verb_monitor(int argc, char *argv[], void *userdata) {
r = sd_varlink_set_relative_timeout(vl, USEC_INFINITY); /* We want the monitor to run basically forever */
if (r < 0)
- return log_error_errno(r, "Failed to set varlink time-out: %m");
+ return log_error_errno(r, "Failed to set varlink timeout: %m");
r = sd_varlink_attach_event(vl, event, SD_EVENT_PRIORITY_NORMAL);
if (r < 0)
diff --git a/src/resolve/resolved-dns-scope.c b/src/resolve/resolved-dns-scope.c
index 734728f905..cd16d2475e 100644
--- a/src/resolve/resolved-dns-scope.c
+++ b/src/resolve/resolved-dns-scope.c
@@ -1744,7 +1744,7 @@ int dns_type_suitable_for_protocol(uint16_t type, DnsProtocol protocol) {
/* Tests whether it makes sense to route queries for the specified DNS RR types to the specified
* protocol. For classic DNS pretty much all RR types are suitable, but for LLMNR/mDNS let's
* allowlist only a few that make sense. We use this when routing queries so that we can more quickly
- * return errors for queries that will almost certainly fail/time-out otherwise. For example, this
+ * return errors for queries that will almost certainly fail/time out otherwise. For example, this
* ensures that SOA, NS, or DS/DNSKEY queries are never routed to mDNS/LLMNR where they simply make
* no sense. */
diff --git a/src/resolve/resolved-dns-stream.h b/src/resolve/resolved-dns-stream.h
index 912b9bf431..d3de4ebf3d 100644
--- a/src/resolve/resolved-dns-stream.h
+++ b/src/resolve/resolved-dns-stream.h
@@ -15,7 +15,7 @@ typedef struct DnsStubListenerExtra DnsStubListenerExtra;
#include "resolved-dns-packet.h"
#include "resolved-dnstls.h"
-/* Various timeouts for establishing TCP connections. First the default time-out for that. */
+/* Various timeouts for establishing TCP connections. First the default timeout for that. */
#define DNS_STREAM_DEFAULT_TIMEOUT_USEC (10 * USEC_PER_SEC)
/* In the DNS stub, be more friendly for incoming connections, than we are to ourselves for outgoing ones */
diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c
index 90b6f233e2..06bfb90c8f 100644
--- a/src/shared/bus-unit-util.c
+++ b/src/shared/bus-unit-util.c
@@ -1061,7 +1061,8 @@ static int bus_append_execute_property(sd_bus_message *m, const char *field, con
"LogNamespace",
"RootImagePolicy",
"MountImagePolicy",
- "ExtensionImagePolicy"))
+ "ExtensionImagePolicy",
+ "PrivatePIDs"))
return bus_append_string(m, field, eq);
if (STR_IN_SET(field, "IgnoreSIGPIPE",
diff --git a/src/shared/mount-util.c b/src/shared/mount-util.c
index 3c89a18790..8ef952a035 100644
--- a/src/shared/mount-util.c
+++ b/src/shared/mount-util.c
@@ -19,6 +19,7 @@
#include "fd-util.h"
#include "fileio.h"
#include "fs-util.h"
+#include "fstab-util.h"
#include "glyph-util.h"
#include "hashmap.h"
#include "initrd-util.h"
@@ -1820,3 +1821,70 @@ char* umount_and_unlink_and_free(char *p) {
(void) unlink(p);
return mfree(p);
}
+
+static int path_get_mount_info(
+ const char *path,
+ char **ret_fstype,
+ char **ret_options) {
+
+ _cleanup_(mnt_free_tablep) struct libmnt_table *table = NULL;
+ _cleanup_free_ char *fstype = NULL, *options = NULL;
+ struct libmnt_fs *fs;
+ int r;
+
+ assert(path);
+
+ table = mnt_new_table();
+ if (!table)
+ return -ENOMEM;
+
+ r = mnt_table_parse_mtab(table, /* filename = */ NULL);
+ if (r < 0)
+ return r;
+
+ fs = mnt_table_find_mountpoint(table, path, MNT_ITER_FORWARD);
+ if (!fs)
+ return -EINVAL;
+
+ if (ret_fstype) {
+ fstype = strdup(strempty(mnt_fs_get_fstype(fs)));
+ if (!fstype)
+ return -ENOMEM;
+ }
+
+ if (ret_options) {
+ options = strdup(strempty(mnt_fs_get_options(fs)));
+ if (!options)
+ return -ENOMEM;
+ }
+
+ if (ret_fstype)
+ *ret_fstype = TAKE_PTR(fstype);
+ if (ret_options)
+ *ret_options = TAKE_PTR(options);
+
+ return 0;
+}
+
+int path_is_network_fs_harder(const char *path) {
+ _cleanup_free_ char *fstype = NULL, *options = NULL;
+ int r, ret;
+
+ assert(path);
+
+ ret = path_is_network_fs(path);
+ if (ret > 0)
+ return true;
+
+ r = path_get_mount_info(path, &fstype, &options);
+ if (r < 0)
+ return RET_GATHER(ret, r);
+
+ if (fstype_is_network(fstype))
+ return true;
+
+ if (fstab_test_option(options, "_netdev\0"))
+ return true;
+
+ return false;
+}
diff --git a/src/shared/mount-util.h b/src/shared/mount-util.h
index eb068d5b44..067ed0e4d9 100644
--- a/src/shared/mount-util.h
+++ b/src/shared/mount-util.h
@@ -180,3 +180,5 @@ unsigned long credentials_fs_mount_flags(bool ro);
int mount_credentials_fs(const char *path, size_t size, bool ro);
int make_fsmount(int error_log_level, const char *what, const char *type, unsigned long flags, const char *options, int userns_fd);
+
+int path_is_network_fs_harder(const char *path);
diff --git a/src/test/test-mount-util.c b/src/test/test-mount-util.c
index 4f6da39f48..28d171de33 100644
--- a/src/test/test-mount-util.c
+++ b/src/test/test-mount-util.c
@@ -537,4 +537,11 @@ TEST(bind_mount_submounts) {
assert_se(umount_recursive(b, 0) >= 0);
}
+TEST(path_is_network_fs_harder) {
+ ASSERT_OK(path_is_network_fs_harder("/"));
+ ASSERT_OK_ZERO(path_is_network_fs_harder("/dev"));
+ ASSERT_OK_ZERO(path_is_network_fs_harder("/sys"));
+ ASSERT_OK_ZERO(path_is_network_fs_harder("/run"));
+}
+
DEFINE_TEST_MAIN(LOG_DEBUG);