diff options
author | Lennart Poettering <lennart@poettering.net> | 2023-02-23 09:44:06 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-02-23 09:44:06 +0100 |
commit | f1e1614e10f866daa6e62366ba06fa0b10739174 (patch) | |
tree | 68f2c05c42d1ecd77f22f129ff254812564806c5 /src | |
parent | sd-event: fix error handling (diff) | |
parent | test-execute: add test for PrivateNetwork= with/without mount namespacing (diff) | |
download | systemd-f1e1614e10f866daa6e62366ba06fa0b10739174.tar.xz systemd-f1e1614e10f866daa6e62366ba06fa0b10739174.zip |
Merge pull request #26458 from yuwata/core-network-namespace-remount-sysfs
core: remount sysfs when network and mount namespace are enabled
Diffstat (limited to 'src')
-rw-r--r-- | src/core/dbus-execute.c | 4 | ||||
-rw-r--r-- | src/core/dbus-util.c | 24 | ||||
-rw-r--r-- | src/core/dbus-util.h | 1 | ||||
-rw-r--r-- | src/core/execute.c | 33 | ||||
-rw-r--r-- | src/core/execute.h | 3 | ||||
-rw-r--r-- | src/core/load-fragment-gperf.gperf.in | 2 | ||||
-rw-r--r-- | src/core/namespace.c | 49 | ||||
-rw-r--r-- | src/core/namespace.h | 2 | ||||
-rw-r--r-- | src/core/socket.c | 4 | ||||
-rw-r--r-- | src/shared/bus-get-properties.c | 16 | ||||
-rw-r--r-- | src/shared/bus-get-properties.h | 1 | ||||
-rw-r--r-- | src/shared/mount-util.c | 185 | ||||
-rw-r--r-- | src/shared/mount-util.h | 8 | ||||
-rw-r--r-- | src/test/test-execute.c | 6 | ||||
-rw-r--r-- | src/test/test-mount-util.c | 147 |
15 files changed, 458 insertions, 27 deletions
diff --git a/src/core/dbus-execute.c b/src/core/dbus-execute.c index b07b5775ff..8c3fa7b286 100644 --- a/src/core/dbus-execute.c +++ b/src/core/dbus-execute.c @@ -1274,7 +1274,7 @@ const sd_bus_vtable bus_exec_vtable[] = { SD_BUS_PROPERTY("ProtectControlGroups", "b", bus_property_get_bool, offsetof(ExecContext, protect_control_groups), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("PrivateNetwork", "b", bus_property_get_bool, offsetof(ExecContext, private_network), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("PrivateUsers", "b", bus_property_get_bool, offsetof(ExecContext, private_users), SD_BUS_VTABLE_PROPERTY_CONST), - SD_BUS_PROPERTY("PrivateMounts", "b", bus_property_get_bool, offsetof(ExecContext, private_mounts), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("PrivateMounts", "b", bus_property_get_tristate, offsetof(ExecContext, private_mounts), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("PrivateIPC", "b", bus_property_get_bool, offsetof(ExecContext, private_ipc), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("ProtectHome", "s", property_get_protect_home, offsetof(ExecContext, protect_home), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("ProtectSystem", "s", property_get_protect_system, offsetof(ExecContext, protect_system), SD_BUS_VTABLE_PROPERTY_CONST), @@ -1933,7 +1933,7 @@ int bus_exec_context_set_transient_property( return bus_set_transient_bool(u, name, &c->private_devices, message, flags, error); if (streq(name, "PrivateMounts")) - return bus_set_transient_bool(u, name, &c->private_mounts, message, flags, error); + return bus_set_transient_tristate(u, name, &c->private_mounts, message, flags, error); if (streq(name, "PrivateNetwork")) return bus_set_transient_bool(u, name, &c->private_network, message, flags, error); diff --git a/src/core/dbus-util.c b/src/core/dbus-util.c index edfa0eb69a..461f6aafb2 100644 --- a/src/core/dbus-util.c +++ b/src/core/dbus-util.c @@ -93,6 +93,30 @@ int bus_set_transient_bool( return 1; } +int bus_set_transient_tristate( + Unit *u, + const char *name, + int *p, + sd_bus_message *message, + UnitWriteFlags flags, + sd_bus_error *error) { + + int v, r; + + assert(p); + + r = sd_bus_message_read(message, "b", &v); + if (r < 0) + return r; + + if (!UNIT_WRITE_FLAGS_NOOP(flags)) { + *p = v; + unit_write_settingf(u, flags, name, "%s=%s", name, yes_no(v)); + } + + return 1; +} + int bus_set_transient_usec_internal( Unit *u, const char *name, diff --git a/src/core/dbus-util.h b/src/core/dbus-util.h index e12631a0e2..9464b25516 100644 --- a/src/core/dbus-util.h +++ b/src/core/dbus-util.h @@ -241,6 +241,7 @@ int bus_set_transient_user_relaxed(Unit *u, const char *name, char **p, sd_bus_m int bus_set_transient_path(Unit *u, const char *name, char **p, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error); int bus_set_transient_string(Unit *u, const char *name, char **p, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error); int bus_set_transient_bool(Unit *u, const char *name, bool *p, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error); +int bus_set_transient_tristate(Unit *u, const char *name, int *p, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error); int bus_set_transient_usec_internal(Unit *u, const char *name, usec_t *p, bool fix_0, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error); static inline int bus_set_transient_usec(Unit *u, const char *name, usec_t *p, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error) { return bus_set_transient_usec_internal(u, name, p, false, message, flags, error); diff --git a/src/core/execute.c b/src/core/execute.c index 9bfeacfb62..3971695fb6 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -2023,6 +2023,18 @@ static int build_pass_environment(const ExecContext *c, char ***ret) { return 0; } +bool exec_needs_network_namespace(const ExecContext *context) { + assert(context); + + return context->private_network || context->network_namespace_path; +} + +static bool exec_needs_ipc_namespace(const ExecContext *context) { + assert(context); + + return context->private_ipc || context->ipc_namespace_path; +} + bool exec_needs_mount_namespace( const ExecContext *context, const ExecParameters *params, @@ -2062,7 +2074,8 @@ bool exec_needs_mount_namespace( return true; if (context->private_devices || - context->private_mounts || + context->private_mounts > 0 || + (context->private_mounts < 0 && exec_needs_network_namespace(context)) || context->protect_system != PROTECT_SYSTEM_NO || context->protect_home != PROTECT_HOME_NO || context->protect_kernel_tunables || @@ -2071,8 +2084,7 @@ bool exec_needs_mount_namespace( context->protect_control_groups || context->protect_proc != PROTECT_PROC_DEFAULT || context->proc_subset != PROC_SUBSET_ALL || - context->private_ipc || - context->ipc_namespace_path) + exec_needs_ipc_namespace(context)) return true; if (context->root_directory) { @@ -3591,12 +3603,12 @@ static int apply_mount_namespace( .protect_kernel_logs = context->protect_kernel_logs, .protect_hostname = context->protect_hostname, .mount_apivfs = exec_context_get_effective_mount_apivfs(context), - .private_mounts = context->private_mounts, .protect_home = context->protect_home, .protect_system = context->protect_system, .protect_proc = context->protect_proc, .proc_subset = context->proc_subset, - .private_ipc = context->private_ipc || context->ipc_namespace_path, + .private_network = exec_needs_network_namespace(context), + .private_ipc = exec_needs_ipc_namespace(context), /* If NNP is on, we can turn on MS_NOSUID, since it won't have any effect anymore. */ .mount_nosuid = context->no_new_privileges && !mac_selinux_use(), }; @@ -4823,7 +4835,7 @@ static int exec_child( } } - if ((context->private_network || context->network_namespace_path) && runtime && runtime->netns_storage_socket[0] >= 0) { + if (exec_needs_network_namespace(context) && runtime && runtime->netns_storage_socket[0] >= 0) { if (ns_type_supported(NAMESPACE_NET)) { r = setup_shareable_ns(runtime->netns_storage_socket, CLONE_NEWNET); @@ -4842,7 +4854,7 @@ static int exec_child( log_unit_warning(unit, "PrivateNetwork=yes is configured, but the kernel does not support network namespaces, ignoring."); } - if ((context->private_ipc || context->ipc_namespace_path) && runtime && runtime->ipcns_storage_socket[0] >= 0) { + if (exec_needs_ipc_namespace(context) && runtime && runtime->ipcns_storage_socket[0] >= 0) { if (ns_type_supported(NAMESPACE_IPC)) { r = setup_shareable_ns(runtime->ipcns_storage_socket, CLONE_NEWIPC); @@ -5478,6 +5490,7 @@ void exec_context_init(ExecContext *c) { c->tty_rows = UINT_MAX; c->tty_cols = UINT_MAX; numa_policy_reset(&c->numa_policy); + c->private_mounts = -1; } void exec_context_done(ExecContext *c) { @@ -6841,7 +6854,7 @@ static int exec_runtime_make( assert(id); /* It is not necessary to create ExecRuntime object. */ - if (!c->private_network && !c->private_ipc && !c->private_tmp && !c->network_namespace_path) { + if (!exec_needs_network_namespace(c) && !exec_needs_ipc_namespace(c) && !c->private_tmp) { *ret = NULL; return 0; } @@ -6855,12 +6868,12 @@ static int exec_runtime_make( return r; } - if (c->private_network || c->network_namespace_path) { + if (exec_needs_network_namespace(c)) { if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, netns_storage_socket) < 0) return -errno; } - if (c->private_ipc || c->ipc_namespace_path) { + if (exec_needs_ipc_namespace(c)) { if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, ipcns_storage_socket) < 0) return -errno; } diff --git a/src/core/execute.h b/src/core/execute.h index 325f340862..79f98daf30 100644 --- a/src/core/execute.h +++ b/src/core/execute.h @@ -301,11 +301,11 @@ struct ExecContext { ProtectProc protect_proc; /* hidepid= */ ProcSubset proc_subset; /* subset= */ + int private_mounts; bool private_tmp; bool private_network; bool private_devices; bool private_users; - bool private_mounts; bool private_ipc; bool protect_kernel_tunables; bool protect_kernel_modules; @@ -531,3 +531,4 @@ const char* exec_resource_type_to_string(ExecDirectoryType i) _const_; ExecDirectoryType exec_resource_type_from_string(const char *s) _pure_; bool exec_needs_mount_namespace(const ExecContext *context, const ExecParameters *params, const ExecRuntime *runtime); +bool exec_needs_network_namespace(const ExecContext *context); diff --git a/src/core/load-fragment-gperf.gperf.in b/src/core/load-fragment-gperf.gperf.in index 58ace46279..2a8a10819b 100644 --- a/src/core/load-fragment-gperf.gperf.in +++ b/src/core/load-fragment-gperf.gperf.in @@ -126,7 +126,7 @@ {{type}}.LogNamespace, config_parse_log_namespace, 0, offsetof({{type}}, exec_context) {{type}}.PrivateNetwork, config_parse_bool, 0, offsetof({{type}}, exec_context.private_network) {{type}}.PrivateUsers, config_parse_bool, 0, offsetof({{type}}, exec_context.private_users) -{{type}}.PrivateMounts, config_parse_bool, 0, offsetof({{type}}, exec_context.private_mounts) +{{type}}.PrivateMounts, config_parse_tristate, 0, offsetof({{type}}, exec_context.private_mounts) {{type}}.PrivateIPC, config_parse_bool, 0, offsetof({{type}}, exec_context.private_ipc) {{type}}.ProtectSystem, config_parse_protect_system, 0, offsetof({{type}}, exec_context.protect_system) {{type}}.ProtectHome, config_parse_protect_home, 0, offsetof({{type}}, exec_context.protect_home) diff --git a/src/core/namespace.c b/src/core/namespace.c index feae4dcbbf..3b0896039b 100644 --- a/src/core/namespace.c +++ b/src/core/namespace.c @@ -61,7 +61,8 @@ typedef enum MountMode { PRIVATE_DEV, BIND_DEV, EMPTY_DIR, - SYSFS, + PRIVATE_SYSFS, + BIND_SYSFS, PROCFS, READONLY, READWRITE, @@ -103,7 +104,7 @@ typedef struct MountEntry { static const MountEntry apivfs_table[] = { { "/proc", PROCFS, false }, { "/dev", BIND_DEV, false }, - { "/sys", SYSFS, false }, + { "/sys", BIND_SYSFS, false }, { "/run", RUN, false, .options_const = "mode=0755" TMPFS_LIMITS_RUN, .flags = MS_NOSUID|MS_NODEV|MS_STRICTATIME }, }; @@ -233,7 +234,8 @@ static const char * const mount_mode_table[_MOUNT_MODE_MAX] = { [PRIVATE_DEV] = "private-dev", [BIND_DEV] = "bind-dev", [EMPTY_DIR] = "empty", - [SYSFS] = "sysfs", + [PRIVATE_SYSFS] = "private-sysfs", + [BIND_SYSFS] = "bind-sysfs", [PROCFS] = "procfs", [READONLY] = "read-only", [READWRITE] = "read-write", @@ -288,7 +290,7 @@ static bool mount_entry_read_only(const MountEntry *p) { static bool mount_entry_noexec(const MountEntry *p) { assert(p); - return p->noexec || IN_SET(p->mode, NOEXEC, INACCESSIBLE, SYSFS, PROCFS); + return p->noexec || IN_SET(p->mode, NOEXEC, INACCESSIBLE, PRIVATE_SYSFS, BIND_SYSFS, PROCFS); } static bool mount_entry_exec(const MountEntry *p) { @@ -1053,7 +1055,30 @@ static int mount_bind_dev(const MountEntry *m) { return 1; } -static int mount_sysfs(const MountEntry *m) { +static int mount_private_sysfs(const MountEntry *m) { + const char *p = mount_entry_path(ASSERT_PTR(m)); + int r; + + (void) mkdir_p_label(p, 0755); + + r = remount_sysfs(p); + if (r < 0 && (ERRNO_IS_PRIVILEGE(r) || ERRNO_IS_NOT_SUPPORTED(r))) { + /* Running with an unprivileged user (PrivateUsers=yes), or the kernel seems old. Falling + * back to bind mount the host's version so that we get all child mounts of it, too. */ + + log_debug_errno(r, "Failed to remount sysfs on %s, falling back to bind mount: %m", p); + + (void) umount_recursive(p, 0); + + r = mount_nofollow_verbose(LOG_DEBUG, "/sys", p, NULL, MS_BIND|MS_REC, NULL); + } + if (r < 0) + return log_debug_errno(r, "Failed to remount sysfs on %s: %m", p); + + return 1; +} + +static int mount_bind_sysfs(const MountEntry *m) { int r; assert(m); @@ -1483,8 +1508,11 @@ static int apply_one_mount( case BIND_DEV: return mount_bind_dev(m); - case SYSFS: - return mount_sysfs(m); + case PRIVATE_SYSFS: + return mount_private_sysfs(m); + + case BIND_SYSFS: + return mount_bind_sysfs(m); case PROCFS: return mount_procfs(m, ns_info); @@ -1720,6 +1748,7 @@ static size_t namespace_calculate_mounts( !!log_namespace + setup_propagate + /* /run/systemd/incoming */ !!notify_socket + + ns_info->private_network + /* /sys */ ns_info->private_ipc; /* /dev/mqueue */ } @@ -2326,6 +2355,12 @@ int setup_namespace( }; } + if (ns_info->private_network) + *(m++) = (MountEntry) { + .path_const = "/sys", + .mode = PRIVATE_SYSFS, + }; + if (ns_info->private_ipc) *(m++) = (MountEntry) { .path_const = "/dev/mqueue", diff --git a/src/core/namespace.h b/src/core/namespace.h index 2ba5970159..74f78784b6 100644 --- a/src/core/namespace.h +++ b/src/core/namespace.h @@ -55,13 +55,13 @@ typedef enum ProcSubset { struct NamespaceInfo { bool ignore_protect_paths; bool private_dev; - bool private_mounts; bool protect_control_groups; bool protect_kernel_tunables; bool protect_kernel_modules; bool protect_kernel_logs; bool mount_apivfs; bool protect_hostname; + bool private_network; bool private_ipc; bool mount_nosuid; ProtectHome protect_home; diff --git a/src/core/socket.c b/src/core/socket.c index 8241ba050b..3dd726d52a 100644 --- a/src/core/socket.c +++ b/src/core/socket.c @@ -1494,7 +1494,7 @@ static int fork_needed(const SocketAddress *address, const ExecContext *context) return true; } - return context->private_network || context->network_namespace_path; + return exec_needs_network_namespace(context); } static int socket_address_listen_in_cgroup( @@ -1557,7 +1557,7 @@ static int socket_address_listen_in_cgroup( pair[0] = safe_close(pair[0]); - if ((s->exec_context.private_network || s->exec_context.network_namespace_path) && + if (exec_needs_network_namespace(&s->exec_context) && s->exec_runtime && s->exec_runtime->netns_storage_socket[0] >= 0) { diff --git a/src/shared/bus-get-properties.c b/src/shared/bus-get-properties.c index 8b4f66b22e..3d0887e6df 100644 --- a/src/shared/bus-get-properties.c +++ b/src/shared/bus-get-properties.c @@ -38,6 +38,22 @@ int bus_property_set_bool( return 0; } +int bus_property_get_tristate( + sd_bus *bus, + const char *path, + const char *interface, + const char *property, + sd_bus_message *reply, + void *userdata, + sd_bus_error *error) { + + /* Defaults to false. */ + + int b = (*(int*) userdata) > 0; + + return sd_bus_message_append_basic(reply, 'b', &b); +} + int bus_property_get_id128( sd_bus *bus, const char *path, diff --git a/src/shared/bus-get-properties.h b/src/shared/bus-get-properties.h index d048913877..44cd584bdc 100644 --- a/src/shared/bus-get-properties.h +++ b/src/shared/bus-get-properties.h @@ -7,6 +7,7 @@ int bus_property_get_bool(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error); int bus_property_set_bool(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *value, void *userdata, sd_bus_error *error); +int bus_property_get_tristate(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error); int bus_property_get_id128(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *error); #define bus_property_get_usec ((sd_bus_property_get_t) NULL) diff --git a/src/shared/mount-util.c b/src/shared/mount-util.c index 8aad531a4d..e583261f45 100644 --- a/src/shared/mount-util.c +++ b/src/shared/mount-util.c @@ -34,6 +34,7 @@ #include "path-util.h" #include "process-util.h" #include "set.h" +#include "sort-util.h" #include "stat-util.h" #include "stdio-util.h" #include "string-table.h" @@ -1167,6 +1168,190 @@ int remount_idmap( return 0; } +typedef struct SubMount { + char *path; + int mount_fd; +} SubMount; + +static void sub_mount_clear(SubMount *s) { + assert(s); + + s->path = mfree(s->path); + s->mount_fd = safe_close(s->mount_fd); +} + +static void sub_mount_array_free(SubMount *s, size_t n) { + assert(s || n == 0); + + for (size_t i = 0; i < n; i++) + sub_mount_clear(s + i); + + free(s); +} + +static int sub_mount_compare(const SubMount *a, const SubMount *b) { + assert(a); + assert(b); + assert(a->path); + assert(b->path); + + return path_compare(a->path, b->path); +} + +static void sub_mount_drop(SubMount *s, size_t n) { + assert(s || n == 0); + + for (size_t m = 0, i = 1; i < n; i++) { + if (path_startswith(s[i].path, s[m].path)) + sub_mount_clear(s + i); + else + m = i; + } +} + +static int get_sub_mounts(const char *prefix, SubMount **ret_mounts, size_t *ret_n_mounts) { + _cleanup_(mnt_free_tablep) struct libmnt_table *table = NULL; + _cleanup_(mnt_free_iterp) struct libmnt_iter *iter = NULL; + SubMount *mounts = NULL; + size_t n = 0; + int r; + + CLEANUP_ARRAY(mounts, n, sub_mount_array_free); + + assert(prefix); + assert(ret_mounts); + assert(ret_n_mounts); + + r = libmount_parse("/proc/self/mountinfo", NULL, &table, &iter); + if (r < 0) + return log_debug_errno(r, "Failed to parse /proc/self/mountinfo: %m"); + + for (;;) { + _cleanup_close_ int mount_fd = -EBADF; + _cleanup_free_ char *p = NULL; + struct libmnt_fs *fs; + const char *path; + int id1, id2; + + r = mnt_table_next_fs(table, iter, &fs); + if (r == 1) + break; /* EOF */ + if (r < 0) + return log_debug_errno(r, "Failed to get next entry from /proc/self/mountinfo: %m"); + + path = mnt_fs_get_target(fs); + if (!path) + continue; + + if (isempty(path_startswith(path, prefix))) + continue; + + id1 = mnt_fs_get_id(fs); + r = path_get_mnt_id(path, &id2); + if (r < 0) { + log_debug_errno(r, "Failed to get mount ID of '%s', ignoring: %m", path); + continue; + } + if (id1 != id2) { + /* The path may be hidden by another over-mount or already remounted. */ + log_debug("The mount IDs of '%s' obtained by libmount and path_get_mnt_id() are different (%i vs %i), ignoring.", + path, id1, id2); + continue; + } + + mount_fd = open_tree(AT_FDCWD, path, OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC | AT_RECURSIVE); + if (mount_fd < 0) { + if (errno == ENOENT) /* The path may be hidden by another over-mount or already unmounted. */ + continue; + + return log_debug_errno(errno, "Failed to open tree of mounted filesystem '%s': %m", path); + } + + p = strdup(path); + if (!p) + return log_oom_debug(); + + if (!GREEDY_REALLOC(mounts, n + 1)) + return log_oom_debug(); + + mounts[n++] = (SubMount) { + .path = TAKE_PTR(p), + .mount_fd = TAKE_FD(mount_fd), + }; + } + + typesafe_qsort(mounts, n, sub_mount_compare); + sub_mount_drop(mounts, n); + + *ret_mounts = TAKE_PTR(mounts); + *ret_n_mounts = n; + return 0; +} + +static int move_sub_mounts(SubMount *mounts, size_t n) { + assert(mounts || n == 0); + + for (size_t i = 0; i < n; i++) { + if (!mounts[i].path || mounts[i].mount_fd < 0) + continue; + + (void) mkdir_p_label(mounts[i].path, 0755); + + if (move_mount(mounts[i].mount_fd, "", AT_FDCWD, mounts[i].path, MOVE_MOUNT_F_EMPTY_PATH) < 0) + return log_debug_errno(errno, "Failed to move mount_fd to '%s': %m", mounts[i].path); + } + + return 0; +} + +int remount_and_move_sub_mounts( + const char *what, + const char *where, + const char *type, + unsigned long flags, + const char *options) { + + SubMount *mounts = NULL; /* avoid false maybe-uninitialized warning */ + size_t n = 0; /* avoid false maybe-uninitialized warning */ + int r; + + CLEANUP_ARRAY(mounts, n, sub_mount_array_free); + + assert(where); + + /* This is useful when creating a new network namespace. Unlike procfs, we need to remount sysfs, + * otherwise properties of the network interfaces in the main network namespace are still accessible + * through the old sysfs, e.g. /sys/class/net/eth0. All sub-mounts previously mounted on the sysfs + * are moved onto the new sysfs mount. */ + + r = path_is_mount_point(where, NULL, 0); + if (r < 0) + return log_debug_errno(r, "Failed to determine if '%s' is a mountpoint: %m", where); + if (r == 0) + /* Shortcut. Simply mount the requested filesystem. */ + return mount_nofollow_verbose(LOG_DEBUG, what, where, type, flags, options); + + /* Get the list of sub-mounts and duplicate them. */ + r = get_sub_mounts(where, &mounts, &n); + if (r < 0) + return r; + + /* Then, remount the mount and its sub-mounts. */ + (void) umount_recursive(where, 0); + + /* Remount the target filesystem. */ + r = mount_nofollow_verbose(LOG_DEBUG, what, where, type, flags, options); + if (r < 0) + return r; + + /* Finally, move the all sub-mounts on the new target mount point. */ + return move_sub_mounts(mounts, n); +} + +int remount_sysfs(const char *where) { + return remount_and_move_sub_mounts("sysfs", where, "sysfs", MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL); +} + int make_mount_point_inode_from_stat(const struct stat *st, const char *dest, mode_t mode) { assert(st); assert(dest); diff --git a/src/shared/mount-util.h b/src/shared/mount-util.h index 7554bf828e..84ea4b6392 100644 --- a/src/shared/mount-util.h +++ b/src/shared/mount-util.h @@ -105,6 +105,14 @@ typedef enum RemountIdmapping { int remount_idmap(const char *p, uid_t uid_shift, uid_t uid_range, uid_t owner, RemountIdmapping idmapping); +int remount_and_move_sub_mounts( + const char *what, + const char *where, + const char *type, + unsigned long flags, + const char *options); +int remount_sysfs(const char *where); + /* Creates a mount point (not parents) based on the source path or stat - ie, a file or a directory */ int make_mount_point_inode_from_stat(const struct stat *st, const char *dest, mode_t mode); int make_mount_point_inode_from_path(const char *source, const char *dest, mode_t mode); diff --git a/src/test/test-execute.c b/src/test/test-execute.c index 7363ea95db..7df3be4a7c 100644 --- a/src/test/test-execute.c +++ b/src/test/test-execute.c @@ -1052,7 +1052,7 @@ static void test_exec_ambientcapabilities(Manager *m) { } static void test_exec_privatenetwork(Manager *m) { - int r; + int r, status; r = find_executable("ip", NULL); if (r < 0) { @@ -1060,7 +1060,9 @@ static void test_exec_privatenetwork(Manager *m) { return; } - test(m, "exec-privatenetwork-yes.service", can_unshare ? 0 : MANAGER_IS_SYSTEM(m) ? EXIT_NETWORK : EXIT_FAILURE, CLD_EXITED); + status = can_unshare ? 0 : MANAGER_IS_SYSTEM(m) ? EXIT_NETWORK : EXIT_FAILURE; + test(m, "exec-privatenetwork-yes-privatemounts-no.service", status, CLD_EXITED); + test(m, "exec-privatenetwork-yes-privatemounts-yes.service", status, CLD_EXITED); } static void test_exec_oomscoreadjust(Manager *m) { diff --git a/src/test/test-mount-util.c b/src/test/test-mount-util.c index fddf70584f..405cdf557a 100644 --- a/src/test/test-mount-util.c +++ b/src/test/test-mount-util.c @@ -8,6 +8,7 @@ #include "fd-util.h" #include "fileio.h" #include "fs-util.h" +#include "missing_magic.h" #include "missing_mount.h" #include "mkdir.h" #include "mount-util.h" @@ -16,11 +17,142 @@ #include "path-util.h" #include "process-util.h" #include "rm-rf.h" +#include "stat-util.h" #include "string-util.h" #include "strv.h" #include "tests.h" #include "tmpfile-util.h" +TEST(remount_and_move_sub_mounts) { + int r; + + if (geteuid() != 0 || have_effective_cap(CAP_SYS_ADMIN) <= 0) + return (void) log_tests_skipped("not running privileged"); + + r = safe_fork("(remount-and-move-sub-mounts)", + FORK_RESET_SIGNALS | + FORK_CLOSE_ALL_FDS | + FORK_DEATHSIG | + FORK_WAIT | + FORK_REOPEN_LOG | + FORK_LOG | + FORK_NEW_MOUNTNS | + FORK_MOUNTNS_SLAVE, + NULL); + assert_se(r >= 0); + if (r == 0) { + _cleanup_free_ char *d = NULL, *fn = NULL; + + assert_se(mkdtemp_malloc(NULL, &d) >= 0); + + assert_se(mount_nofollow_verbose(LOG_DEBUG, "tmpfs", d, "tmpfs", MS_NOSUID|MS_NODEV, NULL) >= 0); + + assert_se(fn = path_join(d, "memo")); + assert_se(write_string_file(fn, d, WRITE_STRING_FILE_CREATE|WRITE_STRING_FILE_AVOID_NEWLINE) >= 0); + assert_se(access(fn, F_OK) >= 0); + + /* Create fs tree */ + FOREACH_STRING(p, "sub1", "sub1/hoge", "sub1/foo", "sub2", "sub2/aaa", "sub2/bbb") { + _cleanup_free_ char *where = NULL, *filename = NULL; + + assert_se(where = path_join(d, p)); + assert_se(mkdir_p(where, 0755) >= 0); + assert_se(mount_nofollow_verbose(LOG_DEBUG, "tmpfs", where, "tmpfs", MS_NOSUID|MS_NODEV, NULL) >= 0); + + assert_se(filename = path_join(where, "memo")); + assert_se(write_string_file(filename, where, WRITE_STRING_FILE_CREATE|WRITE_STRING_FILE_AVOID_NEWLINE) >= 0); + assert_se(access(filename, F_OK) >= 0); + } + + /* Hide sub1. */ + FOREACH_STRING(p, "sub1", "sub1/hogehoge", "sub1/foofoo") { + _cleanup_free_ char *where = NULL, *filename = NULL; + + assert_se(where = path_join(d, p)); + assert_se(mkdir_p(where, 0755) >= 0); + assert_se(mount_nofollow_verbose(LOG_DEBUG, "tmpfs", where, "tmpfs", MS_NOSUID|MS_NODEV, NULL) >= 0); + + assert_se(filename = path_join(where, "memo")); + assert_se(write_string_file(filename, where, WRITE_STRING_FILE_CREATE|WRITE_STRING_FILE_AVOID_NEWLINE) >= 0); + assert_se(access(filename, F_OK) >= 0); + } + + /* Remount the main fs. */ + r = remount_and_move_sub_mounts("tmpfs", d, "tmpfs", MS_NOSUID|MS_NODEV, NULL); + if (r == -EINVAL || (r < 0 && ERRNO_IS_NOT_SUPPORTED(r))) { + log_tests_skipped_errno(r, "The kernel seems too old: %m"); + _exit(EXIT_SUCCESS); + } + + /* Check the file in the main fs does not exist. */ + assert_se(access(fn, F_OK) < 0 && errno == ENOENT); + + /* Check the files in sub-mounts are kept. */ + FOREACH_STRING(p, "sub1", "sub1/hogehoge", "sub1/foofoo", "sub2", "sub2/aaa", "sub2/bbb") { + _cleanup_free_ char *where = NULL, *filename = NULL, *content = NULL; + + assert_se(where = path_join(d, p)); + assert_se(filename = path_join(where, "memo")); + assert_se(read_full_file(filename, &content, NULL) >= 0); + assert_se(streq(content, where)); + } + + /* umount sub1, and check if the previously hidden sub-mounts are dropped. */ + FOREACH_STRING(p, "sub1/hoge", "sub1/foo") { + _cleanup_free_ char *where = NULL; + + assert_se(where = path_join(d, p)); + assert_se(access(where, F_OK) < 0 && errno == ENOENT); + } + + _exit(EXIT_SUCCESS); + } +} + +TEST(remount_sysfs) { + int r; + + if (geteuid() != 0 || have_effective_cap(CAP_SYS_ADMIN) <= 0) + return (void) log_tests_skipped("not running privileged"); + + if (path_is_fs_type("/sys", SYSFS_MAGIC) <= 0) + return (void) log_tests_skipped("sysfs is not mounted on /sys"); + + if (access("/sys/class/net/dummy-test-mnt", F_OK) < 0) + return (void) log_tests_skipped_errno(errno, "The network interface dummy-test-mnt does not exit"); + + r = safe_fork("(remount-sysfs)", + FORK_RESET_SIGNALS | + FORK_CLOSE_ALL_FDS | + FORK_DEATHSIG | + FORK_WAIT | + FORK_REOPEN_LOG | + FORK_LOG | + FORK_NEW_MOUNTNS | + FORK_MOUNTNS_SLAVE, + NULL); + assert_se(r >= 0); + if (r == 0) { + assert_se(unshare(CLONE_NEWNET) >= 0); + + /* Even unshare()ed, the interfaces in the main namespace can be accessed through sysfs. */ + assert_se(access("/sys/class/net/lo", F_OK) >= 0); + assert_se(access("/sys/class/net/dummy-test-mnt", F_OK) >= 0); + + r = remount_sysfs("/sys"); + if (r == -EINVAL || (r < 0 && ERRNO_IS_NOT_SUPPORTED(r))) { + log_tests_skipped_errno(r, "The kernel seems too old: %m"); + _exit(EXIT_SUCCESS); + } + + /* After remounting sysfs, the interfaces in the main namespace cannot be accessed. */ + assert_se(access("/sys/class/net/lo", F_OK) >= 0); + assert_se(access("/sys/class/net/dummy-test-mnt", F_OK) < 0 && errno == ENOENT); + + _exit(EXIT_SUCCESS); + } +} + TEST(mount_option_mangle) { char *opts = NULL; unsigned long f; @@ -256,4 +388,17 @@ TEST(make_mount_point_inode) { assert_se(!(S_IXOTH & st.st_mode)); } -DEFINE_TEST_MAIN(LOG_DEBUG); +static int intro(void) { + /* Create a dummy network interface for testing remount_sysfs(). */ + (void) system("ip link add dummy-test-mnt type dummy"); + + return 0; +} + +static int outro(void) { + (void) system("ip link del dummy-test-mnt"); + + return 0; +} + +DEFINE_TEST_MAIN_FULL(LOG_DEBUG, intro, outro); |