From 18ead2b03d9df251ab682539648a556ace239dc7 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 22 Nov 2024 17:11:29 +0100 Subject: namespace-util: add generic namespace_is_init() call --- src/basic/missing_namespace.h | 12 ++++++++++++ src/basic/namespace-util.c | 39 +++++++++++++++++++++++++++++++-------- src/basic/namespace-util.h | 3 +++ src/test/test-namespace.c | 14 ++++++++++++++ 4 files changed, 60 insertions(+), 8 deletions(-) create mode 100644 src/basic/missing_namespace.h (limited to 'src') diff --git a/src/basic/missing_namespace.h b/src/basic/missing_namespace.h new file mode 100644 index 0000000000..318c0143b8 --- /dev/null +++ b/src/basic/missing_namespace.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +/* Root namespace inode numbers, as per include/linux/proc_ns.h in the kernel source tree, since v3.8: + * https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=98f842e675f96ffac96e6c50315790912b2812be */ + +#define PROC_IPC_INIT_INO ((ino_t) UINT32_C(0xEFFFFFFF)) +#define PROC_UTS_INIT_INO ((ino_t) UINT32_C(0xEFFFFFFE)) +#define PROC_USER_INIT_INO ((ino_t) UINT32_C(0xEFFFFFFD)) +#define PROC_PID_INIT_INO ((ino_t) UINT32_C(0xEFFFFFFC)) +#define PROC_CGROUP_INIT_INO ((ino_t) UINT32_C(0xEFFFFFFB)) +#define PROC_TIME_INIT_INO ((ino_t) UINT32_C(0xEFFFFFFA)) diff --git a/src/basic/namespace-util.c b/src/basic/namespace-util.c index 2c61506149..27f6ec34e4 100644 --- a/src/basic/namespace-util.c +++ b/src/basic/namespace-util.c @@ -12,6 +12,7 @@ #include "fileio.h" #include "missing_fs.h" #include "missing_magic.h" +#include "missing_namespace.h" #include "missing_sched.h" #include "missing_syscall.h" #include "mountpoint-util.h" @@ -23,17 +24,17 @@ #include "user-util.h" const struct namespace_info namespace_info[_NAMESPACE_TYPE_MAX + 1] = { - [NAMESPACE_CGROUP] = { "cgroup", "ns/cgroup", CLONE_NEWCGROUP, }, - [NAMESPACE_IPC] = { "ipc", "ns/ipc", CLONE_NEWIPC, }, - [NAMESPACE_NET] = { "net", "ns/net", CLONE_NEWNET, }, + [NAMESPACE_CGROUP] = { "cgroup", "ns/cgroup", CLONE_NEWCGROUP, PROC_CGROUP_INIT_INO }, + [NAMESPACE_IPC] = { "ipc", "ns/ipc", CLONE_NEWIPC, PROC_IPC_INIT_INO }, + [NAMESPACE_NET] = { "net", "ns/net", CLONE_NEWNET, 0 }, /* So, the mount namespace flag is called CLONE_NEWNS for historical * reasons. Let's expose it here under a more explanatory name: "mnt". * This is in-line with how the kernel exposes namespaces in /proc/$PID/ns. */ - [NAMESPACE_MOUNT] = { "mnt", "ns/mnt", CLONE_NEWNS, }, - [NAMESPACE_PID] = { "pid", "ns/pid", CLONE_NEWPID, }, - [NAMESPACE_USER] = { "user", "ns/user", CLONE_NEWUSER, }, - [NAMESPACE_UTS] = { "uts", "ns/uts", CLONE_NEWUTS, }, - [NAMESPACE_TIME] = { "time", "ns/time", CLONE_NEWTIME, }, + [NAMESPACE_MOUNT] = { "mnt", "ns/mnt", CLONE_NEWNS, 0 }, + [NAMESPACE_PID] = { "pid", "ns/pid", CLONE_NEWPID, PROC_PID_INIT_INO }, + [NAMESPACE_USER] = { "user", "ns/user", CLONE_NEWUSER, PROC_USER_INIT_INO }, + [NAMESPACE_UTS] = { "uts", "ns/uts", CLONE_NEWUTS, PROC_UTS_INIT_INO }, + [NAMESPACE_TIME] = { "time", "ns/time", CLONE_NEWTIME, PROC_TIME_INIT_INO }, { /* Allow callers to iterate over the array without using _NAMESPACE_TYPE_MAX. */ }, }; @@ -479,6 +480,28 @@ int namespace_open_by_type(NamespaceType type) { return fd; } +int namespace_is_init(NamespaceType type) { + int r; + + assert(type >= 0); + assert(type <= _NAMESPACE_TYPE_MAX); + + if (namespace_info[type].root_inode == 0) + return -EBADR; /* Cannot answer this question */ + + const char *p = pid_namespace_path(0, type); + + struct stat st; + r = RET_NERRNO(stat(p, &st)); + if (r == -ENOENT) + /* If the /proc/ns/ API is not around in /proc/ then ns is off in the kernel and we are in the init ns */ + return proc_mounted() == 0 ? -ENOSYS : true; + if (r < 0) + return r; + + return st.st_ino == namespace_info[type].root_inode; +} + int is_our_namespace(int fd, NamespaceType request_type) { int clone_flag; diff --git a/src/basic/namespace-util.h b/src/basic/namespace-util.h index 105bab6fdb..e92d407864 100644 --- a/src/basic/namespace-util.h +++ b/src/basic/namespace-util.h @@ -24,6 +24,7 @@ extern const struct namespace_info { const char *proc_name; const char *proc_path; unsigned int clone_flag; + ino_t root_inode; } namespace_info[_NAMESPACE_TYPE_MAX + 1]; int pidref_namespace_open( @@ -74,6 +75,8 @@ int parse_userns_uid_range(const char *s, uid_t *ret_uid_shift, uid_t *ret_uid_r int namespace_open_by_type(NamespaceType type); +int namespace_is_init(NamespaceType type); + int is_our_namespace(int fd, NamespaceType type); int is_idmapping_supported(const char *path); diff --git a/src/test/test-namespace.c b/src/test/test-namespace.c index 37bce0ae1d..4d8efb23bd 100644 --- a/src/test/test-namespace.c +++ b/src/test/test-namespace.c @@ -213,6 +213,20 @@ TEST(idmapping_supported) { assert_se(is_idmapping_supported("/etc") >= 0); } +TEST(namespace_is_init) { + int r; + + for (NamespaceType t = 0; t < _NAMESPACE_TYPE_MAX; t++) { + r = namespace_is_init(t); + if (r == -EBADR) + log_info_errno(r, "In root namespace of type '%s': don't know", namespace_info[t].proc_name); + else { + ASSERT_OK(r); + log_info("In root namespace of type '%s': %s", namespace_info[t].proc_name, yes_no(r)); + } + } +} + static int intro(void) { if (!have_namespaces()) return log_tests_skipped("Don't have namespace support"); -- cgit v1.2.3 From 193bf42ab0721384570bb6913e5f38385a9105e7 Mon Sep 17 00:00:00 2001 From: Luca Boccassi Date: Thu, 21 Nov 2024 00:47:39 +0000 Subject: detect-virt: check the inode number of the pid namespace The indoe number of root pid namespace is hardcoded in the kernel to 0xEFFFFFFC since 3.8, so check the inode number of our pid namespace if all else fails. If it's not 0xEFFFFFFC then we are in a pid namespace, hence a container environment. Fixes https://github.com/systemd/systemd/issues/35249 [Reworked by Lennart, to make use of namespace_is_init()] --- src/basic/virt.c | 19 +++++++++++++++++++ test/units/TEST-74-AUX-UTILS.detect-virt.sh | 4 ++++ 2 files changed, 23 insertions(+) (limited to 'src') diff --git a/src/basic/virt.c b/src/basic/virt.c index fd0c353791..7792d64f16 100644 --- a/src/basic/virt.c +++ b/src/basic/virt.c @@ -645,6 +645,16 @@ static int running_in_cgroupns(void) { } } +static int running_in_pidns(void) { + int r; + + r = namespace_is_init(NAMESPACE_PID); + if (r < 0) + return log_debug_errno(r, "Failed to test if in root PID namespace, ignoring: %m"); + + return !r; +} + static Virtualization detect_container_files(void) { static const struct { const char *file_path; @@ -790,12 +800,21 @@ check_files: r = running_in_cgroupns(); if (r > 0) { + log_debug("Running in a cgroup namespace, assuming unknown container manager."); v = VIRTUALIZATION_CONTAINER_OTHER; goto finish; } if (r < 0) log_debug_errno(r, "Failed to detect cgroup namespace: %m"); + /* Finally, the root pid namespace has an hardcoded inode number of 0xEFFFFFFC since kernel 3.8, so + * if all else fails we can check the inode number of our pid namespace and compare it. */ + if (running_in_pidns() > 0) { + log_debug("Running in a pid namespace, assuming unknown container manager."); + v = VIRTUALIZATION_CONTAINER_OTHER; + goto finish; + } + /* If none of that worked, give up, assume no container manager. */ v = VIRTUALIZATION_NONE; goto finish; diff --git a/test/units/TEST-74-AUX-UTILS.detect-virt.sh b/test/units/TEST-74-AUX-UTILS.detect-virt.sh index fe1db4d2aa..a1539d9b44 100755 --- a/test/units/TEST-74-AUX-UTILS.detect-virt.sh +++ b/test/units/TEST-74-AUX-UTILS.detect-virt.sh @@ -5,3 +5,7 @@ set -o pipefail SYSTEMD_IN_CHROOT=1 systemd-detect-virt --chroot (! SYSTEMD_IN_CHROOT=0 systemd-detect-virt --chroot) + +if ! systemd-detect-virt -c; then + unshare --mount-proc --fork --user --pid systemd-detect-virt --container +fi -- cgit v1.2.3 From a2429f507ca72278bb6d2893b58906ab313d6e0c Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 22 Nov 2024 17:37:35 +0100 Subject: virt: make use of ns inode check in running_in_userns() and running_in_cgroupns() too --- src/basic/virt.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'src') diff --git a/src/basic/virt.c b/src/basic/virt.c index 7792d64f16..9dcafb9dea 100644 --- a/src/basic/virt.c +++ b/src/basic/virt.c @@ -585,6 +585,14 @@ static int running_in_cgroupns(void) { if (!cg_ns_supported()) return false; + r = namespace_is_init(NAMESPACE_CGROUP); + if (r < 0) + log_debug_errno(r, "Failed to test if in root cgroup namespace, ignoring: %m"); + else if (r > 0) + return false; + + // FIXME: We really should drop the heuristics below. + r = cg_all_unified(); if (r < 0) return r; @@ -882,6 +890,14 @@ int running_in_userns(void) { _cleanup_free_ char *line = NULL; int r; + r = namespace_is_init(NAMESPACE_USER); + if (r < 0) + log_debug_errno(r, "Failed to test if in root user namespace, ignoring: %m"); + else if (r > 0) + return false; + + // FIXME: We really should drop the heuristics below. + r = userns_has_mapping("/proc/self/uid_map"); if (r != 0) return r; -- cgit v1.2.3 From 4b4af14a98b6c695e29014ec14275d2009590170 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 22 Nov 2024 17:43:17 +0100 Subject: test-namespace: tweak log message a bit --- src/test/test-namespace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/test/test-namespace.c b/src/test/test-namespace.c index 4d8efb23bd..66a8bdf427 100644 --- a/src/test/test-namespace.c +++ b/src/test/test-namespace.c @@ -229,7 +229,7 @@ TEST(namespace_is_init) { static int intro(void) { if (!have_namespaces()) - return log_tests_skipped("Don't have namespace support"); + return log_tests_skipped("Don't have namespace support or lacking privileges"); return EXIT_SUCCESS; } -- cgit v1.2.3