diff options
author | Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl> | 2024-11-25 14:13:36 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-11-25 14:13:36 +0100 |
commit | d293fade24b34ccc2f5716b0ff5513e9533cf0c4 (patch) | |
tree | a687676488a82c29e770961fd0464d66929dc727 /src | |
parent | test: Dump coredumps from journal in the integration test wrapper (diff) | |
parent | test-namespace: tweak log message a bit (diff) | |
download | systemd-d293fade24b34ccc2f5716b0ff5513e9533cf0c4.tar.xz systemd-d293fade24b34ccc2f5716b0ff5513e9533cf0c4.zip |
Check inode number to see if we are in init namespace (#35306)
This is a more comprehensive fix compared to #35273. Also adds a minimal
test only.
Based on Luca's #35273 but generalizes the code a bit.
In v258 we really should get rid of the old heuristics around userns and
cgroupns detection, but given we are late in the v257 cycle this keeps
them in.
Diffstat (limited to 'src')
-rw-r--r-- | src/basic/missing_namespace.h | 12 | ||||
-rw-r--r-- | src/basic/namespace-util.c | 39 | ||||
-rw-r--r-- | src/basic/namespace-util.h | 3 | ||||
-rw-r--r-- | src/basic/virt.c | 35 | ||||
-rw-r--r-- | src/test/test-namespace.c | 16 |
5 files changed, 96 insertions, 9 deletions
diff --git a/src/basic/missing_namespace.h b/src/basic/missing_namespace.h new file mode 100644 index 0000000000..318c0143b8 --- /dev/null +++ b/src/basic/missing_namespace.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +#pragma once + +/* Root namespace inode numbers, as per include/linux/proc_ns.h in the kernel source tree, since v3.8: + * https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=98f842e675f96ffac96e6c50315790912b2812be */ + +#define PROC_IPC_INIT_INO ((ino_t) UINT32_C(0xEFFFFFFF)) +#define PROC_UTS_INIT_INO ((ino_t) UINT32_C(0xEFFFFFFE)) +#define PROC_USER_INIT_INO ((ino_t) UINT32_C(0xEFFFFFFD)) +#define PROC_PID_INIT_INO ((ino_t) UINT32_C(0xEFFFFFFC)) +#define PROC_CGROUP_INIT_INO ((ino_t) UINT32_C(0xEFFFFFFB)) +#define PROC_TIME_INIT_INO ((ino_t) UINT32_C(0xEFFFFFFA)) diff --git a/src/basic/namespace-util.c b/src/basic/namespace-util.c index 5e8908216c..5d02ac6025 100644 --- a/src/basic/namespace-util.c +++ b/src/basic/namespace-util.c @@ -12,6 +12,7 @@ #include "fileio.h" #include "missing_fs.h" #include "missing_magic.h" +#include "missing_namespace.h" #include "missing_sched.h" #include "missing_syscall.h" #include "mountpoint-util.h" @@ -23,17 +24,17 @@ #include "user-util.h" const struct namespace_info namespace_info[_NAMESPACE_TYPE_MAX + 1] = { - [NAMESPACE_CGROUP] = { "cgroup", "ns/cgroup", CLONE_NEWCGROUP, }, - [NAMESPACE_IPC] = { "ipc", "ns/ipc", CLONE_NEWIPC, }, - [NAMESPACE_NET] = { "net", "ns/net", CLONE_NEWNET, }, + [NAMESPACE_CGROUP] = { "cgroup", "ns/cgroup", CLONE_NEWCGROUP, PROC_CGROUP_INIT_INO }, + [NAMESPACE_IPC] = { "ipc", "ns/ipc", CLONE_NEWIPC, PROC_IPC_INIT_INO }, + [NAMESPACE_NET] = { "net", "ns/net", CLONE_NEWNET, 0 }, /* So, the mount namespace flag is called CLONE_NEWNS for historical * reasons. Let's expose it here under a more explanatory name: "mnt". * This is in-line with how the kernel exposes namespaces in /proc/$PID/ns. */ - [NAMESPACE_MOUNT] = { "mnt", "ns/mnt", CLONE_NEWNS, }, - [NAMESPACE_PID] = { "pid", "ns/pid", CLONE_NEWPID, }, - [NAMESPACE_USER] = { "user", "ns/user", CLONE_NEWUSER, }, - [NAMESPACE_UTS] = { "uts", "ns/uts", CLONE_NEWUTS, }, - [NAMESPACE_TIME] = { "time", "ns/time", CLONE_NEWTIME, }, + [NAMESPACE_MOUNT] = { "mnt", "ns/mnt", CLONE_NEWNS, 0 }, + [NAMESPACE_PID] = { "pid", "ns/pid", CLONE_NEWPID, PROC_PID_INIT_INO }, + [NAMESPACE_USER] = { "user", "ns/user", CLONE_NEWUSER, PROC_USER_INIT_INO }, + [NAMESPACE_UTS] = { "uts", "ns/uts", CLONE_NEWUTS, PROC_UTS_INIT_INO }, + [NAMESPACE_TIME] = { "time", "ns/time", CLONE_NEWTIME, PROC_TIME_INIT_INO }, { /* Allow callers to iterate over the array without using _NAMESPACE_TYPE_MAX. */ }, }; @@ -479,6 +480,28 @@ int namespace_open_by_type(NamespaceType type) { return fd; } +int namespace_is_init(NamespaceType type) { + int r; + + assert(type >= 0); + assert(type <= _NAMESPACE_TYPE_MAX); + + if (namespace_info[type].root_inode == 0) + return -EBADR; /* Cannot answer this question */ + + const char *p = pid_namespace_path(0, type); + + struct stat st; + r = RET_NERRNO(stat(p, &st)); + if (r == -ENOENT) + /* If the /proc/ns/<type> API is not around in /proc/ then ns is off in the kernel and we are in the init ns */ + return proc_mounted() == 0 ? -ENOSYS : true; + if (r < 0) + return r; + + return st.st_ino == namespace_info[type].root_inode; +} + int is_our_namespace(int fd, NamespaceType request_type) { int clone_flag; diff --git a/src/basic/namespace-util.h b/src/basic/namespace-util.h index 105bab6fdb..e92d407864 100644 --- a/src/basic/namespace-util.h +++ b/src/basic/namespace-util.h @@ -24,6 +24,7 @@ extern const struct namespace_info { const char *proc_name; const char *proc_path; unsigned int clone_flag; + ino_t root_inode; } namespace_info[_NAMESPACE_TYPE_MAX + 1]; int pidref_namespace_open( @@ -74,6 +75,8 @@ int parse_userns_uid_range(const char *s, uid_t *ret_uid_shift, uid_t *ret_uid_r int namespace_open_by_type(NamespaceType type); +int namespace_is_init(NamespaceType type); + int is_our_namespace(int fd, NamespaceType type); int is_idmapping_supported(const char *path); diff --git a/src/basic/virt.c b/src/basic/virt.c index fd0c353791..9dcafb9dea 100644 --- a/src/basic/virt.c +++ b/src/basic/virt.c @@ -585,6 +585,14 @@ static int running_in_cgroupns(void) { if (!cg_ns_supported()) return false; + r = namespace_is_init(NAMESPACE_CGROUP); + if (r < 0) + log_debug_errno(r, "Failed to test if in root cgroup namespace, ignoring: %m"); + else if (r > 0) + return false; + + // FIXME: We really should drop the heuristics below. + r = cg_all_unified(); if (r < 0) return r; @@ -645,6 +653,16 @@ static int running_in_cgroupns(void) { } } +static int running_in_pidns(void) { + int r; + + r = namespace_is_init(NAMESPACE_PID); + if (r < 0) + return log_debug_errno(r, "Failed to test if in root PID namespace, ignoring: %m"); + + return !r; +} + static Virtualization detect_container_files(void) { static const struct { const char *file_path; @@ -790,12 +808,21 @@ check_files: r = running_in_cgroupns(); if (r > 0) { + log_debug("Running in a cgroup namespace, assuming unknown container manager."); v = VIRTUALIZATION_CONTAINER_OTHER; goto finish; } if (r < 0) log_debug_errno(r, "Failed to detect cgroup namespace: %m"); + /* Finally, the root pid namespace has an hardcoded inode number of 0xEFFFFFFC since kernel 3.8, so + * if all else fails we can check the inode number of our pid namespace and compare it. */ + if (running_in_pidns() > 0) { + log_debug("Running in a pid namespace, assuming unknown container manager."); + v = VIRTUALIZATION_CONTAINER_OTHER; + goto finish; + } + /* If none of that worked, give up, assume no container manager. */ v = VIRTUALIZATION_NONE; goto finish; @@ -863,6 +890,14 @@ int running_in_userns(void) { _cleanup_free_ char *line = NULL; int r; + r = namespace_is_init(NAMESPACE_USER); + if (r < 0) + log_debug_errno(r, "Failed to test if in root user namespace, ignoring: %m"); + else if (r > 0) + return false; + + // FIXME: We really should drop the heuristics below. + r = userns_has_mapping("/proc/self/uid_map"); if (r != 0) return r; diff --git a/src/test/test-namespace.c b/src/test/test-namespace.c index 37bce0ae1d..66a8bdf427 100644 --- a/src/test/test-namespace.c +++ b/src/test/test-namespace.c @@ -213,9 +213,23 @@ TEST(idmapping_supported) { assert_se(is_idmapping_supported("/etc") >= 0); } +TEST(namespace_is_init) { + int r; + + for (NamespaceType t = 0; t < _NAMESPACE_TYPE_MAX; t++) { + r = namespace_is_init(t); + if (r == -EBADR) + log_info_errno(r, "In root namespace of type '%s': don't know", namespace_info[t].proc_name); + else { + ASSERT_OK(r); + log_info("In root namespace of type '%s': %s", namespace_info[t].proc_name, yes_no(r)); + } + } +} + static int intro(void) { if (!have_namespaces()) - return log_tests_skipped("Don't have namespace support"); + return log_tests_skipped("Don't have namespace support or lacking privileges"); return EXIT_SUCCESS; } |