summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorLennart Poettering <lennart@poettering.net>2023-02-23 09:44:06 +0100
committerGitHub <noreply@github.com>2023-02-23 09:44:06 +0100
commitf1e1614e10f866daa6e62366ba06fa0b10739174 (patch)
tree68f2c05c42d1ecd77f22f129ff254812564806c5 /src
parentsd-event: fix error handling (diff)
parenttest-execute: add test for PrivateNetwork= with/without mount namespacing (diff)
downloadsystemd-f1e1614e10f866daa6e62366ba06fa0b10739174.tar.xz
systemd-f1e1614e10f866daa6e62366ba06fa0b10739174.zip
Merge pull request #26458 from yuwata/core-network-namespace-remount-sysfs
core: remount sysfs when network and mount namespace are enabled
Diffstat (limited to 'src')
-rw-r--r--src/core/dbus-execute.c4
-rw-r--r--src/core/dbus-util.c24
-rw-r--r--src/core/dbus-util.h1
-rw-r--r--src/core/execute.c33
-rw-r--r--src/core/execute.h3
-rw-r--r--src/core/load-fragment-gperf.gperf.in2
-rw-r--r--src/core/namespace.c49
-rw-r--r--src/core/namespace.h2
-rw-r--r--src/core/socket.c4
-rw-r--r--src/shared/bus-get-properties.c16
-rw-r--r--src/shared/bus-get-properties.h1
-rw-r--r--src/shared/mount-util.c185
-rw-r--r--src/shared/mount-util.h8
-rw-r--r--src/test/test-execute.c6
-rw-r--r--src/test/test-mount-util.c147
15 files changed, 458 insertions, 27 deletions
diff --git a/src/core/dbus-execute.c b/src/core/dbus-execute.c
index b07b5775ff..8c3fa7b286 100644
--- a/src/core/dbus-execute.c
+++ b/src/core/dbus-execute.c
@@ -1274,7 +1274,7 @@ const sd_bus_vtable bus_exec_vtable[] = {
SD_BUS_PROPERTY("ProtectControlGroups", "b", bus_property_get_bool, offsetof(ExecContext, protect_control_groups), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("PrivateNetwork", "b", bus_property_get_bool, offsetof(ExecContext, private_network), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("PrivateUsers", "b", bus_property_get_bool, offsetof(ExecContext, private_users), SD_BUS_VTABLE_PROPERTY_CONST),
- SD_BUS_PROPERTY("PrivateMounts", "b", bus_property_get_bool, offsetof(ExecContext, private_mounts), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("PrivateMounts", "b", bus_property_get_tristate, offsetof(ExecContext, private_mounts), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("PrivateIPC", "b", bus_property_get_bool, offsetof(ExecContext, private_ipc), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("ProtectHome", "s", property_get_protect_home, offsetof(ExecContext, protect_home), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("ProtectSystem", "s", property_get_protect_system, offsetof(ExecContext, protect_system), SD_BUS_VTABLE_PROPERTY_CONST),
@@ -1933,7 +1933,7 @@ int bus_exec_context_set_transient_property(
return bus_set_transient_bool(u, name, &c->private_devices, message, flags, error);
if (streq(name, "PrivateMounts"))
- return bus_set_transient_bool(u, name, &c->private_mounts, message, flags, error);
+ return bus_set_transient_tristate(u, name, &c->private_mounts, message, flags, error);
if (streq(name, "PrivateNetwork"))
return bus_set_transient_bool(u, name, &c->private_network, message, flags, error);
diff --git a/src/core/dbus-util.c b/src/core/dbus-util.c
index edfa0eb69a..461f6aafb2 100644
--- a/src/core/dbus-util.c
+++ b/src/core/dbus-util.c
@@ -93,6 +93,30 @@ int bus_set_transient_bool(
return 1;
}
+int bus_set_transient_tristate(
+ Unit *u,
+ const char *name,
+ int *p,
+ sd_bus_message *message,
+ UnitWriteFlags flags,
+ sd_bus_error *error) {
+
+ int v, r;
+
+ assert(p);
+
+ r = sd_bus_message_read(message, "b", &v);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ *p = v;
+ unit_write_settingf(u, flags, name, "%s=%s", name, yes_no(v));
+ }
+
+ return 1;
+}
+
int bus_set_transient_usec_internal(
Unit *u,
const char *name,
diff --git a/src/core/dbus-util.h b/src/core/dbus-util.h
index e12631a0e2..9464b25516 100644
--- a/src/core/dbus-util.h
+++ b/src/core/dbus-util.h
@@ -241,6 +241,7 @@ int bus_set_transient_user_relaxed(Unit *u, const char *name, char **p, sd_bus_m
int bus_set_transient_path(Unit *u, const char *name, char **p, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error);
int bus_set_transient_string(Unit *u, const char *name, char **p, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error);
int bus_set_transient_bool(Unit *u, const char *name, bool *p, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error);
+int bus_set_transient_tristate(Unit *u, const char *name, int *p, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error);
int bus_set_transient_usec_internal(Unit *u, const char *name, usec_t *p, bool fix_0, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error);
static inline int bus_set_transient_usec(Unit *u, const char *name, usec_t *p, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error) {
return bus_set_transient_usec_internal(u, name, p, false, message, flags, error);
diff --git a/src/core/execute.c b/src/core/execute.c
index 9bfeacfb62..3971695fb6 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -2023,6 +2023,18 @@ static int build_pass_environment(const ExecContext *c, char ***ret) {
return 0;
}
+bool exec_needs_network_namespace(const ExecContext *context) {
+ assert(context);
+
+ return context->private_network || context->network_namespace_path;
+}
+
+static bool exec_needs_ipc_namespace(const ExecContext *context) {
+ assert(context);
+
+ return context->private_ipc || context->ipc_namespace_path;
+}
+
bool exec_needs_mount_namespace(
const ExecContext *context,
const ExecParameters *params,
@@ -2062,7 +2074,8 @@ bool exec_needs_mount_namespace(
return true;
if (context->private_devices ||
- context->private_mounts ||
+ context->private_mounts > 0 ||
+ (context->private_mounts < 0 && exec_needs_network_namespace(context)) ||
context->protect_system != PROTECT_SYSTEM_NO ||
context->protect_home != PROTECT_HOME_NO ||
context->protect_kernel_tunables ||
@@ -2071,8 +2084,7 @@ bool exec_needs_mount_namespace(
context->protect_control_groups ||
context->protect_proc != PROTECT_PROC_DEFAULT ||
context->proc_subset != PROC_SUBSET_ALL ||
- context->private_ipc ||
- context->ipc_namespace_path)
+ exec_needs_ipc_namespace(context))
return true;
if (context->root_directory) {
@@ -3591,12 +3603,12 @@ static int apply_mount_namespace(
.protect_kernel_logs = context->protect_kernel_logs,
.protect_hostname = context->protect_hostname,
.mount_apivfs = exec_context_get_effective_mount_apivfs(context),
- .private_mounts = context->private_mounts,
.protect_home = context->protect_home,
.protect_system = context->protect_system,
.protect_proc = context->protect_proc,
.proc_subset = context->proc_subset,
- .private_ipc = context->private_ipc || context->ipc_namespace_path,
+ .private_network = exec_needs_network_namespace(context),
+ .private_ipc = exec_needs_ipc_namespace(context),
/* If NNP is on, we can turn on MS_NOSUID, since it won't have any effect anymore. */
.mount_nosuid = context->no_new_privileges && !mac_selinux_use(),
};
@@ -4823,7 +4835,7 @@ static int exec_child(
}
}
- if ((context->private_network || context->network_namespace_path) && runtime && runtime->netns_storage_socket[0] >= 0) {
+ if (exec_needs_network_namespace(context) && runtime && runtime->netns_storage_socket[0] >= 0) {
if (ns_type_supported(NAMESPACE_NET)) {
r = setup_shareable_ns(runtime->netns_storage_socket, CLONE_NEWNET);
@@ -4842,7 +4854,7 @@ static int exec_child(
log_unit_warning(unit, "PrivateNetwork=yes is configured, but the kernel does not support network namespaces, ignoring.");
}
- if ((context->private_ipc || context->ipc_namespace_path) && runtime && runtime->ipcns_storage_socket[0] >= 0) {
+ if (exec_needs_ipc_namespace(context) && runtime && runtime->ipcns_storage_socket[0] >= 0) {
if (ns_type_supported(NAMESPACE_IPC)) {
r = setup_shareable_ns(runtime->ipcns_storage_socket, CLONE_NEWIPC);
@@ -5478,6 +5490,7 @@ void exec_context_init(ExecContext *c) {
c->tty_rows = UINT_MAX;
c->tty_cols = UINT_MAX;
numa_policy_reset(&c->numa_policy);
+ c->private_mounts = -1;
}
void exec_context_done(ExecContext *c) {
@@ -6841,7 +6854,7 @@ static int exec_runtime_make(
assert(id);
/* It is not necessary to create ExecRuntime object. */
- if (!c->private_network && !c->private_ipc && !c->private_tmp && !c->network_namespace_path) {
+ if (!exec_needs_network_namespace(c) && !exec_needs_ipc_namespace(c) && !c->private_tmp) {
*ret = NULL;
return 0;
}
@@ -6855,12 +6868,12 @@ static int exec_runtime_make(
return r;
}
- if (c->private_network || c->network_namespace_path) {
+ if (exec_needs_network_namespace(c)) {
if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, netns_storage_socket) < 0)
return -errno;
}
- if (c->private_ipc || c->ipc_namespace_path) {
+ if (exec_needs_ipc_namespace(c)) {
if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, ipcns_storage_socket) < 0)
return -errno;
}
diff --git a/src/core/execute.h b/src/core/execute.h
index 325f340862..79f98daf30 100644
--- a/src/core/execute.h
+++ b/src/core/execute.h
@@ -301,11 +301,11 @@ struct ExecContext {
ProtectProc protect_proc; /* hidepid= */
ProcSubset proc_subset; /* subset= */
+ int private_mounts;
bool private_tmp;
bool private_network;
bool private_devices;
bool private_users;
- bool private_mounts;
bool private_ipc;
bool protect_kernel_tunables;
bool protect_kernel_modules;
@@ -531,3 +531,4 @@ const char* exec_resource_type_to_string(ExecDirectoryType i) _const_;
ExecDirectoryType exec_resource_type_from_string(const char *s) _pure_;
bool exec_needs_mount_namespace(const ExecContext *context, const ExecParameters *params, const ExecRuntime *runtime);
+bool exec_needs_network_namespace(const ExecContext *context);
diff --git a/src/core/load-fragment-gperf.gperf.in b/src/core/load-fragment-gperf.gperf.in
index 58ace46279..2a8a10819b 100644
--- a/src/core/load-fragment-gperf.gperf.in
+++ b/src/core/load-fragment-gperf.gperf.in
@@ -126,7 +126,7 @@
{{type}}.LogNamespace, config_parse_log_namespace, 0, offsetof({{type}}, exec_context)
{{type}}.PrivateNetwork, config_parse_bool, 0, offsetof({{type}}, exec_context.private_network)
{{type}}.PrivateUsers, config_parse_bool, 0, offsetof({{type}}, exec_context.private_users)
-{{type}}.PrivateMounts, config_parse_bool, 0, offsetof({{type}}, exec_context.private_mounts)
+{{type}}.PrivateMounts, config_parse_tristate, 0, offsetof({{type}}, exec_context.private_mounts)
{{type}}.PrivateIPC, config_parse_bool, 0, offsetof({{type}}, exec_context.private_ipc)
{{type}}.ProtectSystem, config_parse_protect_system, 0, offsetof({{type}}, exec_context.protect_system)
{{type}}.ProtectHome, config_parse_protect_home, 0, offsetof({{type}}, exec_context.protect_home)
diff --git a/src/core/namespace.c b/src/core/namespace.c
index feae4dcbbf..3b0896039b 100644
--- a/src/core/namespace.c
+++ b/src/core/namespace.c
@@ -61,7 +61,8 @@ typedef enum MountMode {
PRIVATE_DEV,
BIND_DEV,
EMPTY_DIR,
- SYSFS,
+ PRIVATE_SYSFS,
+ BIND_SYSFS,
PROCFS,
READONLY,
READWRITE,
@@ -103,7 +104,7 @@ typedef struct MountEntry {
static const MountEntry apivfs_table[] = {
{ "/proc", PROCFS, false },
{ "/dev", BIND_DEV, false },
- { "/sys", SYSFS, false },
+ { "/sys", BIND_SYSFS, false },
{ "/run", RUN, false, .options_const = "mode=0755" TMPFS_LIMITS_RUN, .flags = MS_NOSUID|MS_NODEV|MS_STRICTATIME },
};
@@ -233,7 +234,8 @@ static const char * const mount_mode_table[_MOUNT_MODE_MAX] = {
[PRIVATE_DEV] = "private-dev",
[BIND_DEV] = "bind-dev",
[EMPTY_DIR] = "empty",
- [SYSFS] = "sysfs",
+ [PRIVATE_SYSFS] = "private-sysfs",
+ [BIND_SYSFS] = "bind-sysfs",
[PROCFS] = "procfs",
[READONLY] = "read-only",
[READWRITE] = "read-write",
@@ -288,7 +290,7 @@ static bool mount_entry_read_only(const MountEntry *p) {
static bool mount_entry_noexec(const MountEntry *p) {
assert(p);
- return p->noexec || IN_SET(p->mode, NOEXEC, INACCESSIBLE, SYSFS, PROCFS);
+ return p->noexec || IN_SET(p->mode, NOEXEC, INACCESSIBLE, PRIVATE_SYSFS, BIND_SYSFS, PROCFS);
}
static bool mount_entry_exec(const MountEntry *p) {
@@ -1053,7 +1055,30 @@ static int mount_bind_dev(const MountEntry *m) {
return 1;
}
-static int mount_sysfs(const MountEntry *m) {
+static int mount_private_sysfs(const MountEntry *m) {
+ const char *p = mount_entry_path(ASSERT_PTR(m));
+ int r;
+
+ (void) mkdir_p_label(p, 0755);
+
+ r = remount_sysfs(p);
+ if (r < 0 && (ERRNO_IS_PRIVILEGE(r) || ERRNO_IS_NOT_SUPPORTED(r))) {
+ /* Running with an unprivileged user (PrivateUsers=yes), or the kernel seems old. Falling
+ * back to bind mount the host's version so that we get all child mounts of it, too. */
+
+ log_debug_errno(r, "Failed to remount sysfs on %s, falling back to bind mount: %m", p);
+
+ (void) umount_recursive(p, 0);
+
+ r = mount_nofollow_verbose(LOG_DEBUG, "/sys", p, NULL, MS_BIND|MS_REC, NULL);
+ }
+ if (r < 0)
+ return log_debug_errno(r, "Failed to remount sysfs on %s: %m", p);
+
+ return 1;
+}
+
+static int mount_bind_sysfs(const MountEntry *m) {
int r;
assert(m);
@@ -1483,8 +1508,11 @@ static int apply_one_mount(
case BIND_DEV:
return mount_bind_dev(m);
- case SYSFS:
- return mount_sysfs(m);
+ case PRIVATE_SYSFS:
+ return mount_private_sysfs(m);
+
+ case BIND_SYSFS:
+ return mount_bind_sysfs(m);
case PROCFS:
return mount_procfs(m, ns_info);
@@ -1720,6 +1748,7 @@ static size_t namespace_calculate_mounts(
!!log_namespace +
setup_propagate + /* /run/systemd/incoming */
!!notify_socket +
+ ns_info->private_network + /* /sys */
ns_info->private_ipc; /* /dev/mqueue */
}
@@ -2326,6 +2355,12 @@ int setup_namespace(
};
}
+ if (ns_info->private_network)
+ *(m++) = (MountEntry) {
+ .path_const = "/sys",
+ .mode = PRIVATE_SYSFS,
+ };
+
if (ns_info->private_ipc)
*(m++) = (MountEntry) {
.path_const = "/dev/mqueue",
diff --git a/src/core/namespace.h b/src/core/namespace.h
index 2ba5970159..74f78784b6 100644
--- a/src/core/namespace.h
+++ b/src/core/namespace.h
@@ -55,13 +55,13 @@ typedef enum ProcSubset {
struct NamespaceInfo {
bool ignore_protect_paths;
bool private_dev;
- bool private_mounts;
bool protect_control_groups;
bool protect_kernel_tunables;
bool protect_kernel_modules;
bool protect_kernel_logs;
bool mount_apivfs;
bool protect_hostname;
+ bool private_network;
bool private_ipc;
bool mount_nosuid;
ProtectHome protect_home;
diff --git a/src/core/socket.c b/src/core/socket.c
index 8241ba050b..3dd726d52a 100644
--- a/src/core/socket.c
+++ b/src/core/socket.c
@@ -1494,7 +1494,7 @@ static int fork_needed(const SocketAddress *address, const ExecContext *context)
return true;
}
- return context->private_network || context->network_namespace_path;
+ return exec_needs_network_namespace(context);
}
static int socket_address_listen_in_cgroup(
@@ -1557,7 +1557,7 @@ static int socket_address_listen_in_cgroup(
pair[0] = safe_close(pair[0]);
- if ((s->exec_context.private_network || s->exec_context.network_namespace_path) &&
+ if (exec_needs_network_namespace(&s->exec_context) &&
s->exec_runtime &&
s->exec_runtime->netns_storage_socket[0] >= 0) {
diff --git a/src/shared/bus-get-properties.c b/src/shared/bus-get-properties.c
index 8b4f66b22e..3d0887e6df 100644
--- a/src/shared/bus-get-properties.c
+++ b/src/shared/bus-get-properties.c
@@ -38,6 +38,22 @@ int bus_property_set_bool(
return 0;
}
+int bus_property_get_tristate(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ /* Defaults to false. */
+
+ int b = (*(int*) userdata) > 0;
+
+ return sd_bus_message_append_basic(reply, 'b', &b);
+}
+
int bus_property_get_id128(
sd_bus *bus,
const char *path,
diff --git a/src/shared/bus-get-properties.h b/src/shared/bus-get-properties.h
index d048913877..44cd584bdc 100644
--- a/src/shared/bus-get-properties.h
+++ b/src/shared/bus-get-properties.h
@@ -7,6 +7,7 @@
int bus_property_get_bool(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error);
int bus_property_set_bool(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *value, void *userdata, sd_bus_error *error);
+int bus_property_get_tristate(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error);
int bus_property_get_id128(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error);
#define bus_property_get_usec ((sd_bus_property_get_t) NULL)
diff --git a/src/shared/mount-util.c b/src/shared/mount-util.c
index 8aad531a4d..e583261f45 100644
--- a/src/shared/mount-util.c
+++ b/src/shared/mount-util.c
@@ -34,6 +34,7 @@
#include "path-util.h"
#include "process-util.h"
#include "set.h"
+#include "sort-util.h"
#include "stat-util.h"
#include "stdio-util.h"
#include "string-table.h"
@@ -1167,6 +1168,190 @@ int remount_idmap(
return 0;
}
+typedef struct SubMount {
+ char *path;
+ int mount_fd;
+} SubMount;
+
+static void sub_mount_clear(SubMount *s) {
+ assert(s);
+
+ s->path = mfree(s->path);
+ s->mount_fd = safe_close(s->mount_fd);
+}
+
+static void sub_mount_array_free(SubMount *s, size_t n) {
+ assert(s || n == 0);
+
+ for (size_t i = 0; i < n; i++)
+ sub_mount_clear(s + i);
+
+ free(s);
+}
+
+static int sub_mount_compare(const SubMount *a, const SubMount *b) {
+ assert(a);
+ assert(b);
+ assert(a->path);
+ assert(b->path);
+
+ return path_compare(a->path, b->path);
+}
+
+static void sub_mount_drop(SubMount *s, size_t n) {
+ assert(s || n == 0);
+
+ for (size_t m = 0, i = 1; i < n; i++) {
+ if (path_startswith(s[i].path, s[m].path))
+ sub_mount_clear(s + i);
+ else
+ m = i;
+ }
+}
+
+static int get_sub_mounts(const char *prefix, SubMount **ret_mounts, size_t *ret_n_mounts) {
+ _cleanup_(mnt_free_tablep) struct libmnt_table *table = NULL;
+ _cleanup_(mnt_free_iterp) struct libmnt_iter *iter = NULL;
+ SubMount *mounts = NULL;
+ size_t n = 0;
+ int r;
+
+ CLEANUP_ARRAY(mounts, n, sub_mount_array_free);
+
+ assert(prefix);
+ assert(ret_mounts);
+ assert(ret_n_mounts);
+
+ r = libmount_parse("/proc/self/mountinfo", NULL, &table, &iter);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to parse /proc/self/mountinfo: %m");
+
+ for (;;) {
+ _cleanup_close_ int mount_fd = -EBADF;
+ _cleanup_free_ char *p = NULL;
+ struct libmnt_fs *fs;
+ const char *path;
+ int id1, id2;
+
+ r = mnt_table_next_fs(table, iter, &fs);
+ if (r == 1)
+ break; /* EOF */
+ if (r < 0)
+ return log_debug_errno(r, "Failed to get next entry from /proc/self/mountinfo: %m");
+
+ path = mnt_fs_get_target(fs);
+ if (!path)
+ continue;
+
+ if (isempty(path_startswith(path, prefix)))
+ continue;
+
+ id1 = mnt_fs_get_id(fs);
+ r = path_get_mnt_id(path, &id2);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to get mount ID of '%s', ignoring: %m", path);
+ continue;
+ }
+ if (id1 != id2) {
+ /* The path may be hidden by another over-mount or already remounted. */
+ log_debug("The mount IDs of '%s' obtained by libmount and path_get_mnt_id() are different (%i vs %i), ignoring.",
+ path, id1, id2);
+ continue;
+ }
+
+ mount_fd = open_tree(AT_FDCWD, path, OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC | AT_RECURSIVE);
+ if (mount_fd < 0) {
+ if (errno == ENOENT) /* The path may be hidden by another over-mount or already unmounted. */
+ continue;
+
+ return log_debug_errno(errno, "Failed to open tree of mounted filesystem '%s': %m", path);
+ }
+
+ p = strdup(path);
+ if (!p)
+ return log_oom_debug();
+
+ if (!GREEDY_REALLOC(mounts, n + 1))
+ return log_oom_debug();
+
+ mounts[n++] = (SubMount) {
+ .path = TAKE_PTR(p),
+ .mount_fd = TAKE_FD(mount_fd),
+ };
+ }
+
+ typesafe_qsort(mounts, n, sub_mount_compare);
+ sub_mount_drop(mounts, n);
+
+ *ret_mounts = TAKE_PTR(mounts);
+ *ret_n_mounts = n;
+ return 0;
+}
+
+static int move_sub_mounts(SubMount *mounts, size_t n) {
+ assert(mounts || n == 0);
+
+ for (size_t i = 0; i < n; i++) {
+ if (!mounts[i].path || mounts[i].mount_fd < 0)
+ continue;
+
+ (void) mkdir_p_label(mounts[i].path, 0755);
+
+ if (move_mount(mounts[i].mount_fd, "", AT_FDCWD, mounts[i].path, MOVE_MOUNT_F_EMPTY_PATH) < 0)
+ return log_debug_errno(errno, "Failed to move mount_fd to '%s': %m", mounts[i].path);
+ }
+
+ return 0;
+}
+
+int remount_and_move_sub_mounts(
+ const char *what,
+ const char *where,
+ const char *type,
+ unsigned long flags,
+ const char *options) {
+
+ SubMount *mounts = NULL; /* avoid false maybe-uninitialized warning */
+ size_t n = 0; /* avoid false maybe-uninitialized warning */
+ int r;
+
+ CLEANUP_ARRAY(mounts, n, sub_mount_array_free);
+
+ assert(where);
+
+ /* This is useful when creating a new network namespace. Unlike procfs, we need to remount sysfs,
+ * otherwise properties of the network interfaces in the main network namespace are still accessible
+ * through the old sysfs, e.g. /sys/class/net/eth0. All sub-mounts previously mounted on the sysfs
+ * are moved onto the new sysfs mount. */
+
+ r = path_is_mount_point(where, NULL, 0);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to determine if '%s' is a mountpoint: %m", where);
+ if (r == 0)
+ /* Shortcut. Simply mount the requested filesystem. */
+ return mount_nofollow_verbose(LOG_DEBUG, what, where, type, flags, options);
+
+ /* Get the list of sub-mounts and duplicate them. */
+ r = get_sub_mounts(where, &mounts, &n);
+ if (r < 0)
+ return r;
+
+ /* Then, remount the mount and its sub-mounts. */
+ (void) umount_recursive(where, 0);
+
+ /* Remount the target filesystem. */
+ r = mount_nofollow_verbose(LOG_DEBUG, what, where, type, flags, options);
+ if (r < 0)
+ return r;
+
+ /* Finally, move the all sub-mounts on the new target mount point. */
+ return move_sub_mounts(mounts, n);
+}
+
+int remount_sysfs(const char *where) {
+ return remount_and_move_sub_mounts("sysfs", where, "sysfs", MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL);
+}
+
int make_mount_point_inode_from_stat(const struct stat *st, const char *dest, mode_t mode) {
assert(st);
assert(dest);
diff --git a/src/shared/mount-util.h b/src/shared/mount-util.h
index 7554bf828e..84ea4b6392 100644
--- a/src/shared/mount-util.h
+++ b/src/shared/mount-util.h
@@ -105,6 +105,14 @@ typedef enum RemountIdmapping {
int remount_idmap(const char *p, uid_t uid_shift, uid_t uid_range, uid_t owner, RemountIdmapping idmapping);
+int remount_and_move_sub_mounts(
+ const char *what,
+ const char *where,
+ const char *type,
+ unsigned long flags,
+ const char *options);
+int remount_sysfs(const char *where);
+
/* Creates a mount point (not parents) based on the source path or stat - ie, a file or a directory */
int make_mount_point_inode_from_stat(const struct stat *st, const char *dest, mode_t mode);
int make_mount_point_inode_from_path(const char *source, const char *dest, mode_t mode);
diff --git a/src/test/test-execute.c b/src/test/test-execute.c
index 7363ea95db..7df3be4a7c 100644
--- a/src/test/test-execute.c
+++ b/src/test/test-execute.c
@@ -1052,7 +1052,7 @@ static void test_exec_ambientcapabilities(Manager *m) {
}
static void test_exec_privatenetwork(Manager *m) {
- int r;
+ int r, status;
r = find_executable("ip", NULL);
if (r < 0) {
@@ -1060,7 +1060,9 @@ static void test_exec_privatenetwork(Manager *m) {
return;
}
- test(m, "exec-privatenetwork-yes.service", can_unshare ? 0 : MANAGER_IS_SYSTEM(m) ? EXIT_NETWORK : EXIT_FAILURE, CLD_EXITED);
+ status = can_unshare ? 0 : MANAGER_IS_SYSTEM(m) ? EXIT_NETWORK : EXIT_FAILURE;
+ test(m, "exec-privatenetwork-yes-privatemounts-no.service", status, CLD_EXITED);
+ test(m, "exec-privatenetwork-yes-privatemounts-yes.service", status, CLD_EXITED);
}
static void test_exec_oomscoreadjust(Manager *m) {
diff --git a/src/test/test-mount-util.c b/src/test/test-mount-util.c
index fddf70584f..405cdf557a 100644
--- a/src/test/test-mount-util.c
+++ b/src/test/test-mount-util.c
@@ -8,6 +8,7 @@
#include "fd-util.h"
#include "fileio.h"
#include "fs-util.h"
+#include "missing_magic.h"
#include "missing_mount.h"
#include "mkdir.h"
#include "mount-util.h"
@@ -16,11 +17,142 @@
#include "path-util.h"
#include "process-util.h"
#include "rm-rf.h"
+#include "stat-util.h"
#include "string-util.h"
#include "strv.h"
#include "tests.h"
#include "tmpfile-util.h"
+TEST(remount_and_move_sub_mounts) {
+ int r;
+
+ if (geteuid() != 0 || have_effective_cap(CAP_SYS_ADMIN) <= 0)
+ return (void) log_tests_skipped("not running privileged");
+
+ r = safe_fork("(remount-and-move-sub-mounts)",
+ FORK_RESET_SIGNALS |
+ FORK_CLOSE_ALL_FDS |
+ FORK_DEATHSIG |
+ FORK_WAIT |
+ FORK_REOPEN_LOG |
+ FORK_LOG |
+ FORK_NEW_MOUNTNS |
+ FORK_MOUNTNS_SLAVE,
+ NULL);
+ assert_se(r >= 0);
+ if (r == 0) {
+ _cleanup_free_ char *d = NULL, *fn = NULL;
+
+ assert_se(mkdtemp_malloc(NULL, &d) >= 0);
+
+ assert_se(mount_nofollow_verbose(LOG_DEBUG, "tmpfs", d, "tmpfs", MS_NOSUID|MS_NODEV, NULL) >= 0);
+
+ assert_se(fn = path_join(d, "memo"));
+ assert_se(write_string_file(fn, d, WRITE_STRING_FILE_CREATE|WRITE_STRING_FILE_AVOID_NEWLINE) >= 0);
+ assert_se(access(fn, F_OK) >= 0);
+
+ /* Create fs tree */
+ FOREACH_STRING(p, "sub1", "sub1/hoge", "sub1/foo", "sub2", "sub2/aaa", "sub2/bbb") {
+ _cleanup_free_ char *where = NULL, *filename = NULL;
+
+ assert_se(where = path_join(d, p));
+ assert_se(mkdir_p(where, 0755) >= 0);
+ assert_se(mount_nofollow_verbose(LOG_DEBUG, "tmpfs", where, "tmpfs", MS_NOSUID|MS_NODEV, NULL) >= 0);
+
+ assert_se(filename = path_join(where, "memo"));
+ assert_se(write_string_file(filename, where, WRITE_STRING_FILE_CREATE|WRITE_STRING_FILE_AVOID_NEWLINE) >= 0);
+ assert_se(access(filename, F_OK) >= 0);
+ }
+
+ /* Hide sub1. */
+ FOREACH_STRING(p, "sub1", "sub1/hogehoge", "sub1/foofoo") {
+ _cleanup_free_ char *where = NULL, *filename = NULL;
+
+ assert_se(where = path_join(d, p));
+ assert_se(mkdir_p(where, 0755) >= 0);
+ assert_se(mount_nofollow_verbose(LOG_DEBUG, "tmpfs", where, "tmpfs", MS_NOSUID|MS_NODEV, NULL) >= 0);
+
+ assert_se(filename = path_join(where, "memo"));
+ assert_se(write_string_file(filename, where, WRITE_STRING_FILE_CREATE|WRITE_STRING_FILE_AVOID_NEWLINE) >= 0);
+ assert_se(access(filename, F_OK) >= 0);
+ }
+
+ /* Remount the main fs. */
+ r = remount_and_move_sub_mounts("tmpfs", d, "tmpfs", MS_NOSUID|MS_NODEV, NULL);
+ if (r == -EINVAL || (r < 0 && ERRNO_IS_NOT_SUPPORTED(r))) {
+ log_tests_skipped_errno(r, "The kernel seems too old: %m");
+ _exit(EXIT_SUCCESS);
+ }
+
+ /* Check the file in the main fs does not exist. */
+ assert_se(access(fn, F_OK) < 0 && errno == ENOENT);
+
+ /* Check the files in sub-mounts are kept. */
+ FOREACH_STRING(p, "sub1", "sub1/hogehoge", "sub1/foofoo", "sub2", "sub2/aaa", "sub2/bbb") {
+ _cleanup_free_ char *where = NULL, *filename = NULL, *content = NULL;
+
+ assert_se(where = path_join(d, p));
+ assert_se(filename = path_join(where, "memo"));
+ assert_se(read_full_file(filename, &content, NULL) >= 0);
+ assert_se(streq(content, where));
+ }
+
+ /* umount sub1, and check if the previously hidden sub-mounts are dropped. */
+ FOREACH_STRING(p, "sub1/hoge", "sub1/foo") {
+ _cleanup_free_ char *where = NULL;
+
+ assert_se(where = path_join(d, p));
+ assert_se(access(where, F_OK) < 0 && errno == ENOENT);
+ }
+
+ _exit(EXIT_SUCCESS);
+ }
+}
+
+TEST(remount_sysfs) {
+ int r;
+
+ if (geteuid() != 0 || have_effective_cap(CAP_SYS_ADMIN) <= 0)
+ return (void) log_tests_skipped("not running privileged");
+
+ if (path_is_fs_type("/sys", SYSFS_MAGIC) <= 0)
+ return (void) log_tests_skipped("sysfs is not mounted on /sys");
+
+ if (access("/sys/class/net/dummy-test-mnt", F_OK) < 0)
+ return (void) log_tests_skipped_errno(errno, "The network interface dummy-test-mnt does not exit");
+
+ r = safe_fork("(remount-sysfs)",
+ FORK_RESET_SIGNALS |
+ FORK_CLOSE_ALL_FDS |
+ FORK_DEATHSIG |
+ FORK_WAIT |
+ FORK_REOPEN_LOG |
+ FORK_LOG |
+ FORK_NEW_MOUNTNS |
+ FORK_MOUNTNS_SLAVE,
+ NULL);
+ assert_se(r >= 0);
+ if (r == 0) {
+ assert_se(unshare(CLONE_NEWNET) >= 0);
+
+ /* Even unshare()ed, the interfaces in the main namespace can be accessed through sysfs. */
+ assert_se(access("/sys/class/net/lo", F_OK) >= 0);
+ assert_se(access("/sys/class/net/dummy-test-mnt", F_OK) >= 0);
+
+ r = remount_sysfs("/sys");
+ if (r == -EINVAL || (r < 0 && ERRNO_IS_NOT_SUPPORTED(r))) {
+ log_tests_skipped_errno(r, "The kernel seems too old: %m");
+ _exit(EXIT_SUCCESS);
+ }
+
+ /* After remounting sysfs, the interfaces in the main namespace cannot be accessed. */
+ assert_se(access("/sys/class/net/lo", F_OK) >= 0);
+ assert_se(access("/sys/class/net/dummy-test-mnt", F_OK) < 0 && errno == ENOENT);
+
+ _exit(EXIT_SUCCESS);
+ }
+}
+
TEST(mount_option_mangle) {
char *opts = NULL;
unsigned long f;
@@ -256,4 +388,17 @@ TEST(make_mount_point_inode) {
assert_se(!(S_IXOTH & st.st_mode));
}
-DEFINE_TEST_MAIN(LOG_DEBUG);
+static int intro(void) {
+ /* Create a dummy network interface for testing remount_sysfs(). */
+ (void) system("ip link add dummy-test-mnt type dummy");
+
+ return 0;
+}
+
+static int outro(void) {
+ (void) system("ip link del dummy-test-mnt");
+
+ return 0;
+}
+
+DEFINE_TEST_MAIN_FULL(LOG_DEBUG, intro, outro);