diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/nspawn/nspawn-register.c | 9 | ||||
-rw-r--r-- | src/nspawn/nspawn-register.h | 4 | ||||
-rw-r--r-- | src/nspawn/nspawn.c | 100 |
3 files changed, 109 insertions, 4 deletions
diff --git a/src/nspawn/nspawn-register.c b/src/nspawn/nspawn-register.c index 855172c09c..52f7384468 100644 --- a/src/nspawn/nspawn-register.c +++ b/src/nspawn/nspawn-register.c @@ -15,6 +15,7 @@ static int append_machine_properties( sd_bus_message *m, + bool enable_fuse, CustomMount *mounts, unsigned n_mounts, int kill_signal, @@ -40,6 +41,12 @@ static int append_machine_properties( "char-pts", "rw"); if (r < 0) return bus_log_create_error(r); + if (enable_fuse) { + r = sd_bus_message_append(m, "(sv)", "DeviceAllow", "a(ss)", 1, + "/dev/fuse", "rw"); + if (r < 0) + return bus_log_create_error(r); + } for (j = 0; j < n_mounts; j++) { CustomMount *cm = mounts + j; @@ -200,6 +207,7 @@ int register_machine( r = append_machine_properties( m, + FLAGS_SET(flags, REGISTER_MACHINE_ENABLE_FUSE), mounts, n_mounts, kill_signal, @@ -320,6 +328,7 @@ int allocate_scope( r = append_machine_properties( m, + FLAGS_SET(flags, ALLOCATE_SCOPE_ENABLE_FUSE), mounts, n_mounts, kill_signal, diff --git a/src/nspawn/nspawn-register.h b/src/nspawn/nspawn-register.h index 0effb40aa0..5e187e33bb 100644 --- a/src/nspawn/nspawn-register.h +++ b/src/nspawn/nspawn-register.h @@ -9,7 +9,8 @@ #include "nspawn-settings.h" typedef enum RegisterMachineFlags { - REGISTER_MACHINE_KEEP_UNIT = 1 << 0, + REGISTER_MACHINE_KEEP_UNIT = 1 << 0, + REGISTER_MACHINE_ENABLE_FUSE = 1 << 1, } RegisterMachineFlags; int register_machine( @@ -31,6 +32,7 @@ int unregister_machine(sd_bus *bus, const char *machine_name); typedef enum AllocateScopeFlags { ALLOCATE_SCOPE_ALLOW_PIDFD = 1 << 0, + ALLOCATE_SCOPE_ENABLE_FUSE = 1 << 1, } AllocateScopeFlags; int allocate_scope( diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index 8a26333364..f8bcf26b58 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -2,6 +2,7 @@ #include <errno.h> #include <getopt.h> +#include <linux/fuse.h> #include <linux/loop.h> #if HAVE_SELINUX #include <selinux/selinux.h> @@ -2147,7 +2148,85 @@ static int setup_boot_id(void) { return mount_nofollow_verbose(LOG_ERR, NULL, to, NULL, MS_BIND|MS_REMOUNT|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL); } -static int copy_devnodes(const char *dest) { +static int get_fuse_version(uint32_t *ret_major, uint32_t *ret_minor) { + /* Must be called with mount privileges, either via arg_privileged or by being uid=0 in new + * CLONE_NEWUSER/CLONE_NEWNS namespaces. This is true when called from outer_child(). */ + ssize_t n; + _cleanup_close_ int fuse_fd = -EBADF, mnt_fd = -EBADF; + _cleanup_free_ char *opts = NULL; + union { + char unstructured[FUSE_MIN_READ_BUFFER]; + struct { + struct fuse_in_header header; + /* Don't use <linux/fuse.h>:`struct fuse_init_in` because a newer fuse.h might give + * us a bigger struct than what an older kernel actually gives us, and that would + * break our .header.len check. */ + struct { + uint32_t major; + uint32_t minor; + } body; + } structured; + } request; + + assert(ret_major); + assert(ret_minor); + + /* Get a FUSE handle. */ + fuse_fd = open("/dev/fuse", O_CLOEXEC|O_RDWR); + if (fuse_fd < 0) + return log_debug_errno(errno, "Failed to open /dev/fuse: %m"); + if (asprintf(&opts, "fd=%i,rootmode=40000,user_id=0,group_id=0", fuse_fd) < 0) + return log_oom_debug(); + mnt_fd = make_fsmount(LOG_DEBUG, "nspawn-fuse", "fuse.nspawn", 0, opts, -EBADF); + if (mnt_fd < 0) + return mnt_fd; + + /* Read a request from the FUSE handle. */ + n = read(fuse_fd, &request.unstructured, sizeof request); + if (n < 0) + return log_debug_errno(errno, "Failed to read /dev/fuse: %m"); + if ((size_t) n < sizeof request.structured.header || + (size_t) n < request.structured.header.len) + return log_debug_errno(SYNTHETIC_ERRNO(EIO), "Failed to read /dev/fuse: Short read"); + + /* Assume that the request is a FUSE_INIT request, and return the version information from it. */ + if (request.structured.header.opcode != FUSE_INIT) + return log_debug_errno(SYNTHETIC_ERRNO(EIO), "Initial request from /dev/fuse should have opcode=%i (FUSE_INIT), but has opcode=%"PRIu32, + FUSE_INIT, request.structured.header.opcode); + if (request.structured.header.len < sizeof request.structured) + return log_debug_errno(SYNTHETIC_ERRNO(EIO), "Initial FUSE_INIT request from /dev/fuse is too short"); + *ret_major = request.structured.body.major; + *ret_minor = request.structured.body.minor; + return 0; +} + +static bool should_enable_fuse(void) { + uint32_t fuse_major, fuse_minor; + int r; + + r = get_fuse_version(&fuse_major, &fuse_minor); + if (r < 0) { + if (ERRNO_IS_NEG_DEVICE_ABSENT(r)) + log_debug_errno(r, "Disabling FUSE: FUSE appears to be disabled on the host: %m"); + else if (r == -ENOSYS) + log_debug_errno(r, "Disabling FUSE: Kernel does not support the fsopen() family of syscalls: %m"); + else + log_warning_errno(r, "Disabling FUSE: Failed to determine FUSE version: %m"); + return false; + } + + /* FUSE is only userns-safe in FUSE version 7.27 and later. + * https://github.com/torvalds/linux/commit/da315f6e03988a7127680bbc26e1028991b899b8 */ + if (fuse_major < 7 || (fuse_major == 7 && fuse_minor < 27)) { + log_debug("Disabling FUSE: FUSE version %" PRIu32 ".%" PRIu32 " is too old to support user namespaces", + fuse_major, fuse_minor); + return false; + } + + return true; +} + +static int copy_devnodes(const char *dest, bool enable_fuse) { _cleanup_strv_free_ char **devnodes = NULL; int r = 0; @@ -2159,6 +2238,7 @@ static int copy_devnodes(const char *dest) { "random", "urandom", "tty", + STRV_IFNOTNULL(enable_fuse ? "fuse" : NULL), "net/tun"); if (!devnodes) return log_oom(); @@ -3807,7 +3887,7 @@ static int outer_child( _cleanup_(bind_user_context_freep) BindUserContext *bind_user_context = NULL; _cleanup_strv_free_ char **os_release_pairs = NULL; _cleanup_close_ int fd = -EBADF, mntns_fd = -EBADF; - bool idmap = false; + bool idmap = false, enable_fuse; const char *p; pid_t pid; ssize_t l; @@ -4090,7 +4170,12 @@ static int outer_child( if (r < 0) return r; - r = copy_devnodes(directory); + enable_fuse = should_enable_fuse(); + l = send(fd_outer_socket, &enable_fuse, sizeof enable_fuse, 0); + if (l < 0) + return log_error_errno(errno, "Failed to send whether to enable FUSE: %m"); + + r = copy_devnodes(directory, enable_fuse); if (r < 0) return r; @@ -5048,6 +5133,7 @@ static int run_container( ssize_t l; sigset_t mask_chld; _cleanup_close_ int child_netns_fd = -EBADF; + bool enable_fuse; assert_se(sigemptyset(&mask_chld) == 0); assert_se(sigaddset(&mask_chld, SIGCHLD) == 0); @@ -5234,6 +5320,12 @@ static int run_container( l, l == 0 ? " The child is most likely dead." : ""); } + l = recv(fd_outer_socket_pair[0], &enable_fuse, sizeof enable_fuse, 0); + if (l < 0) + return log_error_errno(errno, "Failed to read whether to enable FUSE: %m"); + if (l != sizeof enable_fuse) + return log_error_errno(SYNTHETIC_ERRNO(EIO), "Short read while reading whether to enable FUSE."); + /* Wait for the outer child. */ r = wait_for_terminate_and_check("(sd-namespace)", *pid, WAIT_LOG_ABNORMAL); if (r < 0) @@ -5386,6 +5478,7 @@ static int run_container( if (arg_register) { RegisterMachineFlags flags = 0; SET_FLAG(flags, REGISTER_MACHINE_KEEP_UNIT, arg_keep_unit); + SET_FLAG(flags, REGISTER_MACHINE_ENABLE_FUSE, enable_fuse); r = register_machine( bus, arg_machine, @@ -5406,6 +5499,7 @@ static int run_container( } else if (!arg_keep_unit) { AllocateScopeFlags flags = ALLOCATE_SCOPE_ALLOW_PIDFD; + SET_FLAG(flags, ALLOCATE_SCOPE_ENABLE_FUSE, enable_fuse); r = allocate_scope( bus, arg_machine, |