summaryrefslogtreecommitdiffstats
path: root/src/core/namespace.c
diff options
context:
space:
mode:
authorYu Watanabe <watanabe.yu+github@gmail.com>2023-10-11 07:58:38 +0200
committerYu Watanabe <watanabe.yu+github@gmail.com>2023-10-26 12:09:46 +0200
commit4793605db17c417d35dff1b460e6910dba3cc254 (patch)
tree2585d3c419b38102a3692932287175a94cc46073 /src/core/namespace.c
parentcore/namespace: unify logic of mounting /proc and /sys (diff)
downloadsystemd-4793605db17c417d35dff1b460e6910dba3cc254.tar.xz
systemd-4793605db17c417d35dff1b460e6910dba3cc254.zip
core/namespace: check if we have enough privilege to mount sysfs or procfs
If we do not have enough privilege to mount a new instance of sysfs or procfs, units e.g. with PrivateNetwork=yes may fail. Let's first try to mount sysfs or procfs anyway to check if we have enough privilege. Fixes #29526.
Diffstat (limited to 'src/core/namespace.c')
-rw-r--r--src/core/namespace.c41
1 files changed, 27 insertions, 14 deletions
diff --git a/src/core/namespace.c b/src/core/namespace.c
index 9202fbc1b7..38f74346e1 100644
--- a/src/core/namespace.c
+++ b/src/core/namespace.c
@@ -1164,33 +1164,35 @@ static int mount_private_apivfs(
const char *fstype,
const char *entry_path,
const char *bind_source,
- const char *opts) {
+ const char *opts,
+ RuntimeScope scope) {
- int r, n;
+ _cleanup_(rmdir_and_freep) char *temporary_mount = NULL;
+ int r;
assert(fstype);
assert(entry_path);
assert(bind_source);
(void) mkdir_p_label(entry_path, 0755);
- n = umount_recursive(entry_path, /* flags = */ 0);
- r = mount_nofollow_verbose(LOG_DEBUG, fstype, entry_path, fstype, MS_NOSUID|MS_NOEXEC|MS_NODEV, opts);
+ /* First, check if we have enough privileges to mount a new instance. Note, a new sysfs instance
+ * cannot be mounted on an already existing mount. Let's use a temporary place. */
+ r = create_temporary_mount_point(scope, &temporary_mount);
+ if (r < 0)
+ return r;
+
+ r = mount_nofollow_verbose(LOG_DEBUG, fstype, temporary_mount, fstype, MS_NOSUID|MS_NOEXEC|MS_NODEV, opts);
if (r == -EINVAL && opts)
/* If this failed with EINVAL then this likely means the textual hidepid= stuff for procfs is
* not supported by the kernel, and thus the per-instance hidepid= neither, which means we
* really don't want to use it, since it would affect our host's /proc mount. Hence let's
* gracefully fallback to a classic, unrestricted version. */
- r = mount_nofollow_verbose(LOG_DEBUG, fstype, entry_path, fstype, MS_NOSUID|MS_NOEXEC|MS_NODEV, /* opts = */ NULL);
+ r = mount_nofollow_verbose(LOG_DEBUG, fstype, temporary_mount, fstype, MS_NOSUID|MS_NOEXEC|MS_NODEV, /* opts = */ NULL);
if (ERRNO_IS_NEG_PRIVILEGE(r)) {
/* When we do not have enough privileges to mount a new instance, fall back to use an
* existing mount. */
- if (n > 0)
- /* The mount or some of sub-mounts are umounted in the above. Refuse incomplete tree.
- * Propagate the original error code returned by mount() in the above. */
- return r;
-
r = path_is_mount_point(entry_path, /* root = */ NULL, /* flags = */ 0);
if (r < 0)
return log_debug_errno(r, "Unable to determine whether '%s' is already mounted: %m", entry_path);
@@ -1205,15 +1207,26 @@ static int mount_private_apivfs(
} else if (r < 0)
return r;
+ /* OK. We have a new mount instance. Let's clear an existing mount and its submounts. */
+ r = umount_recursive(entry_path, /* flags = */ 0);
+ if (r < 0)
+ log_debug_errno(r, "Failed to unmount directories below '%s', ignoring: %m", entry_path);
+
+ /* Then, move the new mount instance. */
+ r = mount_nofollow_verbose(LOG_DEBUG, temporary_mount, entry_path, /* fstype = */ NULL, MS_MOVE, /* opts = */ NULL);
+ if (r < 0)
+ return r;
+
/* We mounted a new instance now. Let's bind mount the children over now. This matters for nspawn
* where a bunch of files are overmounted, in particular the boot id. */
(void) bind_mount_submounts(bind_source, entry_path);
return 0;
}
-static int mount_private_sysfs(const MountEntry *m) {
+static int mount_private_sysfs(const MountEntry *m, const NamespaceParameters *p) {
assert(m);
- return mount_private_apivfs("sysfs", mount_entry_path(m), "/sys", /* opts = */ NULL);
+ assert(p);
+ return mount_private_apivfs("sysfs", mount_entry_path(m), "/sys", /* opts = */ NULL, p->runtime_scope);
}
static int mount_procfs(const MountEntry *m, const NamespaceParameters *p) {
@@ -1257,7 +1270,7 @@ static int mount_procfs(const MountEntry *m, const NamespaceParameters *p) {
* one. i.e we don't reuse existing mounts here under any condition, we want a new instance owned by
* our user namespace and with our hidepid= settings applied. Hence, let's get rid of everything
* mounted on /proc/ first. */
- return mount_private_apivfs("proc", mount_entry_path(m), "/proc", opts);
+ return mount_private_apivfs("proc", mount_entry_path(m), "/proc", opts, p->runtime_scope);
}
static int mount_tmpfs(const MountEntry *m) {
@@ -1596,7 +1609,7 @@ static int apply_one_mount(
return mount_bind_dev(m);
case PRIVATE_SYSFS:
- return mount_private_sysfs(m);
+ return mount_private_sysfs(m, p);
case BIND_SYSFS:
return mount_bind_sysfs(m);