diff options
author | Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl> | 2019-03-01 16:08:55 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-03-01 16:08:55 +0100 |
commit | cc5fc36aec85b706294aec568b62cb24c05c04a3 (patch) | |
tree | 7dab782e57cf37ff292a05bd666760293b5d5f8d /src | |
parent | Merge pull request #11701 from poettering/discover-bls (diff) | |
parent | man: document new systemd.volatile=overlay kernel command line option (diff) | |
download | systemd-cc5fc36aec85b706294aec568b62cb24c05c04a3.tar.xz systemd-cc5fc36aec85b706294aec568b62cb24c05c04a3.zip |
Merge pull request #11243 from poettering/nspawn-root-overlay
add systemd-nspawn --volatile=overlay support, as well as the same for host systems
Diffstat (limited to 'src')
-rw-r--r-- | src/basic/copy.c | 13 | ||||
-rw-r--r-- | src/basic/copy.h | 3 | ||||
-rw-r--r-- | src/boot/bootctl.c | 2 | ||||
-rw-r--r-- | src/fstab-generator/fstab-generator.c | 5 | ||||
-rw-r--r-- | src/gpt-auto-generator/gpt-auto-generator.c | 26 | ||||
-rw-r--r-- | src/import/export-raw.c | 2 | ||||
-rw-r--r-- | src/import/import-raw.c | 2 | ||||
-rw-r--r-- | src/import/pull-raw.c | 2 | ||||
-rw-r--r-- | src/nspawn/nspawn-mount.c | 115 | ||||
-rw-r--r-- | src/nspawn/nspawn-mount.h | 3 | ||||
-rw-r--r-- | src/nspawn/nspawn.c | 88 | ||||
-rw-r--r-- | src/shared/machine-image.c | 2 | ||||
-rw-r--r-- | src/shared/volatile-util.c | 20 | ||||
-rw-r--r-- | src/shared/volatile-util.h | 1 | ||||
-rw-r--r-- | src/volatile-root/volatile-root.c | 118 |
15 files changed, 293 insertions, 109 deletions
diff --git a/src/basic/copy.c b/src/basic/copy.c index 46e02a3759..2f36c8eb87 100644 --- a/src/basic/copy.c +++ b/src/basic/copy.c @@ -743,7 +743,7 @@ int copy_file_fd_full( r = copy_bytes_full(fdf, fdt, (uint64_t) -1, copy_flags, NULL, NULL, progress_bytes, userdata); - (void) copy_times(fdf, fdt); + (void) copy_times(fdf, fdt, copy_flags); (void) copy_xattr(fdf, fdt); return r; @@ -849,10 +849,9 @@ int copy_file_atomic_full( return 0; } -int copy_times(int fdf, int fdt) { +int copy_times(int fdf, int fdt, CopyFlags flags) { struct timespec ut[2]; struct stat st; - usec_t crtime = 0; assert(fdf >= 0); assert(fdt >= 0); @@ -866,8 +865,12 @@ int copy_times(int fdf, int fdt) { if (futimens(fdt, ut) < 0) return -errno; - if (fd_getcrtime(fdf, &crtime) >= 0) - (void) fd_setcrtime(fdt, crtime); + if (FLAGS_SET(flags, COPY_CRTIME)) { + usec_t crtime; + + if (fd_getcrtime(fdf, &crtime) >= 0) + (void) fd_setcrtime(fdt, crtime); + } return 0; } diff --git a/src/basic/copy.h b/src/basic/copy.h index f677021881..a33546d3ab 100644 --- a/src/basic/copy.h +++ b/src/basic/copy.h @@ -14,6 +14,7 @@ typedef enum CopyFlags { COPY_REPLACE = 1 << 2, /* Replace an existing file if there's one */ COPY_SAME_MOUNT = 1 << 3, /* Don't descend recursively into other file systems, across mount point boundaries */ COPY_MERGE_EMPTY = 1 << 4, /* Merge an existing, empty directory with our new tree to copy */ + COPY_CRTIME = 1 << 5, /* Generate a user.crtime_usec xattr off the source crtime if there is one, on copying */ } CopyFlags; typedef int (*copy_progress_bytes_t)(uint64_t n_bytes, void *userdata); @@ -57,5 +58,5 @@ static inline int copy_bytes(int fdf, int fdt, uint64_t max_bytes, CopyFlags cop return copy_bytes_full(fdf, fdt, max_bytes, copy_flags, NULL, NULL, NULL, NULL); } -int copy_times(int fdf, int fdt); +int copy_times(int fdf, int fdt, CopyFlags flags); int copy_xattr(int fdf, int fdt); diff --git a/src/boot/bootctl.c b/src/boot/bootctl.c index a529989ea0..1e0d115fe3 100644 --- a/src/boot/bootctl.c +++ b/src/boot/bootctl.c @@ -494,7 +494,7 @@ static int copy_file_with_version_check(const char *from, const char *to, bool f return log_error_errno(r, "Failed to copy data from \"%s\" to \"%s\": %m", from, t); } - (void) copy_times(fd_from, fd_to); + (void) copy_times(fd_from, fd_to, 0); if (fsync(fd_to) < 0) { (void) unlink_noerrno(t); diff --git a/src/fstab-generator/fstab-generator.c b/src/fstab-generator/fstab-generator.c index 30a6d356d0..d1bfa775e4 100644 --- a/src/fstab-generator/fstab-generator.c +++ b/src/fstab-generator/fstab-generator.c @@ -722,10 +722,11 @@ static int add_sysroot_usr_mount(void) { } static int add_volatile_root(void) { + /* Let's add in systemd-remount-volatile.service which will remount the root device to tmpfs if this is - * requested, leaving only /usr from the root mount inside. */ + * requested (or as an overlayfs), leaving only /usr from the root mount inside. */ - if (arg_volatile_mode != VOLATILE_YES) + if (!IN_SET(arg_volatile_mode, VOLATILE_YES, VOLATILE_OVERLAY)) return 0; return generator_add_symlink(arg_dest, SPECIAL_INITRD_ROOT_FS_TARGET, "requires", diff --git a/src/gpt-auto-generator/gpt-auto-generator.c b/src/gpt-auto-generator/gpt-auto-generator.c index 2f8ccd025b..0f1e184eea 100644 --- a/src/gpt-auto-generator/gpt-auto-generator.c +++ b/src/gpt-auto-generator/gpt-auto-generator.c @@ -18,6 +18,7 @@ #include "efivars.h" #include "fd-util.h" #include "fileio.h" +#include "fs-util.h" #include "fstab-util.h" #include "generator.h" #include "gpt.h" @@ -533,7 +534,7 @@ static int add_root_rw(DissectedPartition *p) { return 0; } -static int open_parent(dev_t devnum, int *ret) { +static int open_parent_devno(dev_t devnum, int *ret) { _cleanup_(sd_device_unrefp) sd_device *d = NULL; const char *name, *devtype, *node; sd_device *parent; @@ -601,7 +602,7 @@ static int enumerate_partitions(dev_t devnum) { _cleanup_(dissected_image_unrefp) DissectedImage *m = NULL; int r, k; - r = open_parent(devnum, &fd); + r = open_parent_devno(devnum, &fd); if (r <= 0) return r; @@ -763,8 +764,25 @@ static int add_mounts(void) { if (r < 0) return log_error_errno(r, "Failed to determine block device of /usr file system: %m"); if (r == 0) { - log_debug("Neither root nor /usr file system are on a (single) block device."); - return 0; + _cleanup_free_ char *p = NULL; + mode_t m; + + /* If the root mount has been replaced by some form of volatile file system (overlayfs), the + * original root block device node is symlinked in /run/systemd/volatile-root. Let's read that + * here. */ + r = readlink_malloc("/run/systemd/volatile-root", &p); + if (r == -ENOENT) { + log_debug("Neither root nor /usr file system are on a (single) block device."); + return 0; + } + if (r < 0) + return log_error_errno(r, "Failed to read symlink /run/systemd/volatile-root: %m"); + + r = device_path_parse_major_minor(p, &m, &devno); + if (r < 0) + return log_error_errno(r, "Failed to parse major/minor device node: %m"); + if (!S_ISBLK(m)) + return log_error_errno(SYNTHETIC_ERRNO(ENOTBLK), "Volatile root device is of wrong type."); } } diff --git a/src/import/export-raw.c b/src/import/export-raw.c index 6a02b47a17..c1c946cd2b 100644 --- a/src/import/export-raw.c +++ b/src/import/export-raw.c @@ -223,7 +223,7 @@ static int raw_export_process(RawExport *e) { finish: if (r >= 0) { - (void) copy_times(e->input_fd, e->output_fd); + (void) copy_times(e->input_fd, e->output_fd, COPY_CRTIME); (void) copy_xattr(e->input_fd, e->output_fd); } diff --git a/src/import/import-raw.c b/src/import/import-raw.c index 4b1161557d..56f3431a08 100644 --- a/src/import/import-raw.c +++ b/src/import/import-raw.c @@ -215,7 +215,7 @@ static int raw_import_finish(RawImport *i) { return r; if (S_ISREG(i->st.st_mode)) { - (void) copy_times(i->input_fd, i->output_fd); + (void) copy_times(i->input_fd, i->output_fd, COPY_CRTIME); (void) copy_xattr(i->input_fd, i->output_fd); } diff --git a/src/import/pull-raw.c b/src/import/pull-raw.c index 3a3e015df8..72b9054e49 100644 --- a/src/import/pull-raw.c +++ b/src/import/pull-raw.c @@ -368,7 +368,7 @@ static int raw_pull_make_local_copy(RawPull *i) { return log_error_errno(r, "Failed to make writable copy of image: %m"); } - (void) copy_times(i->raw_job->disk_fd, dfd); + (void) copy_times(i->raw_job->disk_fd, dfd, COPY_CRTIME); (void) copy_xattr(i->raw_job->disk_fd, dfd); dfd = safe_close(dfd); diff --git a/src/nspawn/nspawn-mount.c b/src/nspawn/nspawn-mount.c index a9af889747..eb0a26ef35 100644 --- a/src/nspawn/nspawn-mount.c +++ b/src/nspawn/nspawn-mount.c @@ -212,6 +212,8 @@ int bind_mount_parse(CustomMount **l, size_t *n, const char *s, bool read_only) if (!path_is_absolute(destination)) return -EINVAL; + if (empty_or_root(destination)) + return -EINVAL; m = custom_mount_add(l, n, CUSTOM_MOUNT_BIND); if (!m) @@ -251,6 +253,8 @@ int tmpfs_mount_parse(CustomMount **l, size_t *n, const char *s) { if (!path_is_absolute(path)) return -EINVAL; + if (empty_or_root(path)) + return -EINVAL; m = custom_mount_add(l, n, CUSTOM_MOUNT_TMPFS); if (!m) @@ -310,6 +314,9 @@ int overlay_mount_parse(CustomMount **l, size_t *n, const char *s, bool read_onl return -EINVAL; } + if (empty_or_root(destination)) + return -EINVAL; + m = custom_mount_add(l, n, CUSTOM_MOUNT_OVERLAY); if (!m) return -ENOMEM; @@ -849,9 +856,8 @@ int mount_custom( return 0; } -int setup_volatile_state( +static int setup_volatile_state( const char *directory, - VolatileMode mode, bool userns, uid_t uid_shift, uid_t uid_range, const char *selinux_apifs_context) { @@ -861,11 +867,7 @@ int setup_volatile_state( assert(directory); - if (mode != VOLATILE_STATE) - return 0; - - /* --volatile=state means we simply overmount /var - with a tmpfs, and the rest read-only. */ + /* --volatile=state means we simply overmount /var with a tmpfs, and the rest read-only. */ r = bind_remount_recursive(directory, true, NULL); if (r < 0) @@ -886,9 +888,8 @@ int setup_volatile_state( return mount_verbose(LOG_ERR, "tmpfs", p, "tmpfs", MS_STRICTATIME, options); } -int setup_volatile( +static int setup_volatile_yes( const char *directory, - VolatileMode mode, bool userns, uid_t uid_shift, uid_t uid_range, const char *selinux_apifs_context) { @@ -900,11 +901,8 @@ int setup_volatile( assert(directory); - if (mode != VOLATILE_YES) - return 0; - - /* --volatile=yes means we mount a tmpfs to the root dir, and - the original /usr to use inside it, and that read-only. */ + /* --volatile=yes means we mount a tmpfs to the root dir, and the original /usr to use inside it, and that + read-only. */ if (!mkdtemp(template)) return log_error_errno(errno, "Failed to create temporary directory: %m"); @@ -912,7 +910,7 @@ int setup_volatile( options = "mode=755"; r = tmpfs_patch_options(options, uid_shift == 0 ? UID_INVALID : uid_shift, selinux_apifs_context, &buf); if (r < 0) - return log_oom(); + goto fail; if (r > 0) options = buf; @@ -961,6 +959,93 @@ fail: return r; } +static int setup_volatile_overlay( + const char *directory, + bool userns, uid_t uid_shift, uid_t uid_range, + const char *selinux_apifs_context) { + + _cleanup_free_ char *buf = NULL, *escaped_directory = NULL, *escaped_upper = NULL, *escaped_work = NULL; + char template[] = "/tmp/nspawn-volatile-XXXXXX"; + const char *upper, *work, *options; + bool tmpfs_mounted = false; + int r; + + assert(directory); + + /* --volatile=overlay means we mount an overlayfs to the root dir. */ + + if (!mkdtemp(template)) + return log_error_errno(errno, "Failed to create temporary directory: %m"); + + options = "mode=755"; + r = tmpfs_patch_options(options, uid_shift == 0 ? UID_INVALID : uid_shift, selinux_apifs_context, &buf); + if (r < 0) + goto finish; + if (r > 0) + options = buf; + + r = mount_verbose(LOG_ERR, "tmpfs", template, "tmpfs", MS_STRICTATIME, options); + if (r < 0) + goto finish; + + tmpfs_mounted = true; + + upper = strjoina(template, "/upper"); + work = strjoina(template, "/work"); + + if (mkdir(upper, 0755) < 0) { + r = log_error_errno(errno, "Failed to create %s: %m", upper); + goto finish; + } + if (mkdir(work, 0755) < 0) { + r = log_error_errno(errno, "Failed to create %s: %m", work); + goto finish; + } + + /* And now, let's overmount the root dir with an overlayfs that uses the root dir as lower dir. It's kinda nice + * that the kernel allows us to do that without going through some mount point rearrangements. */ + + escaped_directory = shell_escape(directory, ",:"); + escaped_upper = shell_escape(upper, ",:"); + escaped_work = shell_escape(work, ",:"); + if (!escaped_directory || !escaped_upper || !escaped_work) { + r = -ENOMEM; + goto finish; + } + + options = strjoina("lowerdir=", escaped_directory, ",upperdir=", escaped_upper, ",workdir=", escaped_work); + r = mount_verbose(LOG_ERR, "overlay", directory, "overlay", 0, options); + +finish: + if (tmpfs_mounted) + (void) umount_verbose(template); + + (void) rmdir(template); + return r; +} + +int setup_volatile_mode( + const char *directory, + VolatileMode mode, + bool userns, uid_t uid_shift, uid_t uid_range, + const char *selinux_apifs_context) { + + switch (mode) { + + case VOLATILE_YES: + return setup_volatile_yes(directory, userns, uid_shift, uid_range, selinux_apifs_context); + + case VOLATILE_STATE: + return setup_volatile_state(directory, userns, uid_shift, uid_range, selinux_apifs_context); + + case VOLATILE_OVERLAY: + return setup_volatile_overlay(directory, userns, uid_shift, uid_range, selinux_apifs_context); + + default: + return 0; + } +} + /* Expects *pivot_root_new and *pivot_root_old to be initialised to allocated memory or NULL. */ int pivot_root_parse(char **pivot_root_new, char **pivot_root_old, const char *s) { _cleanup_free_ char *root_new = NULL, *root_old = NULL; diff --git a/src/nspawn/nspawn-mount.h b/src/nspawn/nspawn-mount.h index 8051a7d9d9..e060ca0e4d 100644 --- a/src/nspawn/nspawn-mount.h +++ b/src/nspawn/nspawn-mount.h @@ -49,8 +49,7 @@ int mount_sysfs(const char *dest, MountSettingsMask mount_settings); int mount_custom(const char *dest, CustomMount *mounts, size_t n, bool userns, uid_t uid_shift, uid_t uid_range, const char *selinux_apifs_context); -int setup_volatile(const char *directory, VolatileMode mode, bool userns, uid_t uid_shift, uid_t uid_range, const char *selinux_apifs_context); -int setup_volatile_state(const char *directory, VolatileMode mode, bool userns, uid_t uid_shift, uid_t uid_range, const char *selinux_apifs_context); +int setup_volatile_mode(const char *directory, VolatileMode mode, bool userns, uid_t uid_shift, uid_t uid_range, const char *selinux_apifs_context); int pivot_root_parse(char **pivot_root_new, char **pivot_root_old, const char *s); int setup_pivot_root(const char *directory, const char *pivot_root_new, const char *pivot_root_old); diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index e0c2d711e6..5cb049e5f7 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -1308,6 +1308,9 @@ static int verify_arguments(void) { if (arg_start_mode == START_BOOT && arg_kill_signal <= 0) arg_kill_signal = SIGRTMIN+3; + if (arg_volatile_mode != VOLATILE_NO) /* Make sure all file systems contained in the image are mounted read-only if we are in volatile mode */ + arg_read_only = true; + if (arg_keep_unit && arg_register && cg_pid_get_owner_uid(0, NULL) >= 0) /* Save the user from accidentally registering either user-$SESSION.scope or user@.service. * The latter is not technically a user session, but we don't need to labour the point. */ @@ -1334,6 +1337,12 @@ static int verify_arguments(void) { if (arg_userns_chown && arg_read_only) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "--read-only and --private-users-chown may not be combined."); + /* We don't support --private-users-chown together with any of the volatile modes since we couldn't + * change the read-only part of the tree (i.e. /usr) anyway, or because it would trigger a massive + * copy-up (in case of overlay) making the entire excercise pointless. */ + if (arg_userns_chown && arg_volatile_mode != VOLATILE_NO) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "--volatile= and --private-users-chown may not be combined."); + /* If --network-namespace-path is given with any other network-related option, * we need to error out, to avoid conflicts between different network options. */ if (arg_network_namespace_path && @@ -1352,9 +1361,6 @@ static int verify_arguments(void) { if (arg_userns_mode != USER_NAMESPACE_NO && !(arg_mount_settings & MOUNT_APPLY_APIVFS_RO)) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Cannot combine --private-users with read-write mounts."); - if (arg_volatile_mode != VOLATILE_NO && arg_read_only) - return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Cannot combine --read-only with --volatile. Note that --volatile already implies a read-only base hierarchy."); - if (arg_expose_ports && !arg_private_network) return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Cannot use --port= without private networking."); @@ -1420,6 +1426,10 @@ static const char *timezone_from_path(const char *path) { "/usr/share/zoneinfo/"); } +static bool etc_writable(void) { + return !arg_read_only || IN_SET(arg_volatile_mode, VOLATILE_YES, VOLATILE_OVERLAY); +} + static int setup_timezone(const char *dest) { _cleanup_free_ char *p = NULL, *etc = NULL; const char *where, *check; @@ -1431,9 +1441,9 @@ static int setup_timezone(const char *dest) { if (IN_SET(arg_timezone, TIMEZONE_AUTO, TIMEZONE_SYMLINK)) { r = readlink_malloc("/etc/localtime", &p); if (r == -ENOENT && arg_timezone == TIMEZONE_AUTO) - m = arg_read_only && arg_volatile_mode != VOLATILE_YES ? TIMEZONE_OFF : TIMEZONE_DELETE; + m = etc_writable() ? TIMEZONE_DELETE : TIMEZONE_OFF; else if (r == -EINVAL && arg_timezone == TIMEZONE_AUTO) /* regular file? */ - m = arg_read_only && arg_volatile_mode != VOLATILE_YES ? TIMEZONE_BIND : TIMEZONE_COPY; + m = etc_writable() ? TIMEZONE_COPY : TIMEZONE_BIND; else if (r < 0) { log_warning_errno(r, "Failed to read host's /etc/localtime symlink, not updating container timezone: %m"); /* To handle warning, delete /etc/localtime and replace it with a symbolic link to a time zone data @@ -1444,7 +1454,7 @@ static int setup_timezone(const char *dest) { */ return 0; } else if (arg_timezone == TIMEZONE_AUTO) - m = arg_read_only && arg_volatile_mode != VOLATILE_YES ? TIMEZONE_BIND : TIMEZONE_SYMLINK; + m = etc_writable() ? TIMEZONE_SYMLINK : TIMEZONE_BIND; else m = arg_timezone; } else @@ -1606,11 +1616,11 @@ static int setup_resolv_conf(const char *dest) { if (arg_private_network) m = RESOLV_CONF_OFF; else if (have_resolv_conf(STATIC_RESOLV_CONF) > 0 && resolved_listening() > 0) - m = arg_read_only && arg_volatile_mode != VOLATILE_YES ? RESOLV_CONF_BIND_STATIC : RESOLV_CONF_COPY_STATIC; + m = etc_writable() ? RESOLV_CONF_COPY_STATIC : RESOLV_CONF_BIND_STATIC; else if (have_resolv_conf("/etc/resolv.conf") > 0) - m = arg_read_only && arg_volatile_mode != VOLATILE_YES ? RESOLV_CONF_BIND_HOST : RESOLV_CONF_COPY_HOST; + m = etc_writable() ? RESOLV_CONF_COPY_HOST : RESOLV_CONF_BIND_HOST; else - m = arg_read_only && arg_volatile_mode != VOLATILE_YES ? RESOLV_CONF_OFF : RESOLV_CONF_DELETE; + m = etc_writable() ? RESOLV_CONF_DELETE : RESOLV_CONF_OFF; } else m = arg_resolv_conf; @@ -2896,6 +2906,30 @@ static int outer_child( "Selected user namespace base " UID_FMT " and range " UID_FMT ".", arg_uid_shift, arg_uid_range); } + if (!dissected_image) { + /* Turn directory into bind mount */ + r = mount_verbose(LOG_ERR, directory, directory, NULL, MS_BIND|MS_REC, NULL); + if (r < 0) + return r; + } + + r = setup_pivot_root( + directory, + arg_pivot_root_new, + arg_pivot_root_old); + if (r < 0) + return r; + + r = setup_volatile_mode( + directory, + arg_volatile_mode, + arg_userns_mode != USER_NAMESPACE_NO, + arg_uid_shift, + arg_uid_range, + arg_selinux_context); + if (r < 0) + return r; + if (dissected_image) { /* Now we know the uid shift, let's now mount everything else that might be in the image. */ r = dissected_image_mount(dissected_image, directory, arg_uid_shift, @@ -2921,38 +2955,6 @@ static int outer_child( unified_cgroup_hierarchy_socket = safe_close(unified_cgroup_hierarchy_socket); } - /* Turn directory into bind mount */ - r = mount_verbose(LOG_ERR, directory, directory, NULL, MS_BIND|MS_REC, NULL); - if (r < 0) - return r; - - r = setup_pivot_root( - directory, - arg_pivot_root_new, - arg_pivot_root_old); - if (r < 0) - return r; - - r = setup_volatile( - directory, - arg_volatile_mode, - arg_userns_mode != USER_NAMESPACE_NO, - arg_uid_shift, - arg_uid_range, - arg_selinux_context); - if (r < 0) - return r; - - r = setup_volatile_state( - directory, - arg_volatile_mode, - arg_userns_mode != USER_NAMESPACE_NO, - arg_uid_shift, - arg_uid_range, - arg_selinux_context); - if (r < 0) - return r; - /* Mark everything as shared so our mounts get propagated down. This is * required to make new bind mounts available in systemd services * inside the containter that create a new mount namespace. @@ -2971,7 +2973,7 @@ static int outer_child( if (r < 0) return r; - if (arg_read_only) { + if (arg_read_only && arg_volatile_mode == VOLATILE_NO) { r = bind_remount_recursive(directory, true, NULL); if (r < 0) return log_error_errno(r, "Failed to make tree read-only: %m"); @@ -4398,7 +4400,7 @@ int main(int argc, char *argv[]) { goto finish; } - r = copy_file(arg_image, np, O_EXCL, arg_read_only ? 0400 : 0600, FS_NOCOW_FL, COPY_REFLINK); + r = copy_file(arg_image, np, O_EXCL, arg_read_only ? 0400 : 0600, FS_NOCOW_FL, COPY_REFLINK|COPY_CRTIME); if (r < 0) { r = log_error_errno(r, "Failed to copy image file: %m"); goto finish; diff --git a/src/shared/machine-image.c b/src/shared/machine-image.c index af06ab22e8..3d61221056 100644 --- a/src/shared/machine-image.c +++ b/src/shared/machine-image.c @@ -870,7 +870,7 @@ int image_clone(Image *i, const char *new_name, bool read_only) { case IMAGE_RAW: new_path = strjoina("/var/lib/machines/", new_name, ".raw"); - r = copy_file_atomic(i->path, new_path, read_only ? 0444 : 0644, FS_NOCOW_FL, COPY_REFLINK); + r = copy_file_atomic(i->path, new_path, read_only ? 0444 : 0644, FS_NOCOW_FL, COPY_REFLINK|COPY_CRTIME); break; case IMAGE_BLOCK: diff --git a/src/shared/volatile-util.c b/src/shared/volatile-util.c index 4d75bc0e96..5ca6ab3376 100644 --- a/src/shared/volatile-util.c +++ b/src/shared/volatile-util.c @@ -12,33 +12,35 @@ int query_volatile_mode(VolatileMode *ret) { _cleanup_free_ char *mode = NULL; - VolatileMode m = VOLATILE_NO; int r; r = proc_cmdline_get_key("systemd.volatile", PROC_CMDLINE_VALUE_OPTIONAL, &mode); if (r < 0) return r; - if (r == 0) - goto finish; + if (r == 0) { + *ret = VOLATILE_NO; + return 0; + } if (mode) { + VolatileMode m; + m = volatile_mode_from_string(mode); if (m < 0) return -EINVAL; - } else - m = VOLATILE_YES; - r = 1; + *ret = m; + } else + *ret = VOLATILE_YES; -finish: - *ret = m; - return r; + return 1; } static const char* const volatile_mode_table[_VOLATILE_MODE_MAX] = { [VOLATILE_NO] = "no", [VOLATILE_YES] = "yes", [VOLATILE_STATE] = "state", + [VOLATILE_OVERLAY] = "overlay", }; DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(volatile_mode, VolatileMode, VOLATILE_YES); diff --git a/src/shared/volatile-util.h b/src/shared/volatile-util.h index 8761c44ab8..2d31bb1174 100644 --- a/src/shared/volatile-util.h +++ b/src/shared/volatile-util.h @@ -5,6 +5,7 @@ typedef enum VolatileMode { VOLATILE_NO, VOLATILE_YES, VOLATILE_STATE, + VOLATILE_OVERLAY, _VOLATILE_MODE_MAX, _VOLATILE_MODE_INVALID = -1 } VolatileMode; diff --git a/src/volatile-root/volatile-root.c b/src/volatile-root/volatile-root.c index 5da9ce1681..701f5a2832 100644 --- a/src/volatile-root/volatile-root.c +++ b/src/volatile-root/volatile-root.c @@ -3,6 +3,8 @@ #include <sys/mount.h> #include "alloc-util.h" +#include "blockdev-util.h" +#include "escape.h" #include "fs-util.h" #include "main-func.h" #include "mkdir.h" @@ -17,20 +19,7 @@ static int make_volatile(const char *path) { _cleanup_free_ char *old_usr = NULL; int r; - r = path_is_mount_point(path, NULL, AT_SYMLINK_FOLLOW); - if (r < 0) - return log_error_errno(r, "Couldn't determine whether %s is a mount point: %m", path); - if (r == 0) - return log_error_errno(SYNTHETIC_ERRNO(EINVAL), - "%s is not a mount point.", path); - - r = path_is_temporary_fs(path); - if (r < 0) - return log_error_errno(r, "Couldn't determine whether %s is a temporary file system: %m", path); - if (r > 0) { - log_info("%s already is a temporary file system.", path); - return 0; - } + assert(path); r = chase_symlinks("/usr", path, CHASE_PREFIX_ROOT, &old_usr); if (r < 0) @@ -45,7 +34,7 @@ static int make_volatile(const char *path) { goto finish_rmdir; if (mkdir("/run/systemd/volatile-sysroot/usr", 0755) < 0) { - r = -errno; + r = log_error_errno(errno, "Failed to create /usr directory: %m"); goto finish_umount; } @@ -54,8 +43,10 @@ static int make_volatile(const char *path) { goto finish_umount; r = bind_remount_recursive("/run/systemd/volatile-sysroot/usr", true, NULL); - if (r < 0) + if (r < 0) { + log_error_errno(r, "Failed to remount /usr read-only: %m"); goto finish_umount; + } r = umount_recursive(path, 0); if (r < 0) { @@ -64,7 +55,7 @@ static int make_volatile(const char *path) { } if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0) - log_warning_errno(errno, "Failed to remount %s MS_SLAVE|MS_REC: %m", path); + log_warning_errno(errno, "Failed to remount %s MS_SLAVE|MS_REC, ignoring: %m", path); r = mount_verbose(LOG_ERR, "/run/systemd/volatile-sysroot", path, NULL, MS_MOVE, NULL); @@ -77,9 +68,55 @@ finish_rmdir: return r; } +static int make_overlay(const char *path) { + _cleanup_free_ char *escaped_path = NULL; + bool tmpfs_mounted = false; + const char *options = NULL; + int r; + + assert(path); + + r = mkdir_p("/run/systemd/overlay-sysroot", 0700); + if (r < 0) + return log_error_errno(r, "Couldn't create overlay sysroot directory: %m"); + + r = mount_verbose(LOG_ERR, "tmpfs", "/run/systemd/overlay-sysroot", "tmpfs", MS_STRICTATIME, "mode=755"); + if (r < 0) + goto finish; + + tmpfs_mounted = true; + + if (mkdir("/run/systemd/overlay-sysroot/upper", 0755) < 0) { + r = log_error_errno(errno, "Failed to create /run/systemd/overlay-sysroot/upper: %m"); + goto finish; + } + + if (mkdir("/run/systemd/overlay-sysroot/work", 0755) < 0) { + r = log_error_errno(errno, "Failed to create /run/systemd/overlay-sysroot/work: %m"); + goto finish; + } + + escaped_path = shell_escape(path, ",:"); + if (!escaped_path) { + r = log_oom(); + goto finish; + } + + options = strjoina("lowerdir=", escaped_path, ",upperdir=/run/systemd/overlay-sysroot/upper,workdir=/run/systemd/overlay-sysroot/work"); + r = mount_verbose(LOG_ERR, "overlay", path, "overlay", 0, options); + +finish: + if (tmpfs_mounted) + (void) umount_verbose("/run/systemd/overlay-sysroot"); + + (void) rmdir("/run/systemd/overlay-sysroot"); + return r; +} + static int run(int argc, char *argv[]) { VolatileMode m = _VOLATILE_MODE_INVALID; const char *path; + dev_t devt; int r; log_setup_service(); @@ -94,10 +131,8 @@ static int run(int argc, char *argv[]) { if (r == 0 && argc >= 2) { /* The kernel command line always wins. However if nothing was set there, the argument passed here wins instead. */ m = volatile_mode_from_string(argv[1]); - if (m < 0) { - log_error("Couldn't parse volatile mode: %s", argv[1]); - r = -EINVAL; - } + if (m < 0) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Couldn't parse volatile mode: %s", argv[1]); } if (argc < 3) @@ -116,10 +151,47 @@ static int run(int argc, char *argv[]) { "Directory cannot be the root directory."); } - if (m != VOLATILE_YES) + if (!IN_SET(m, VOLATILE_YES, VOLATILE_OVERLAY)) return 0; - return make_volatile(path); + r = path_is_mount_point(path, NULL, AT_SYMLINK_FOLLOW); + if (r < 0) + return log_error_errno(r, "Couldn't determine whether %s is a mount point: %m", path); + if (r == 0) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "%s is not a mount point.", path); + + r = path_is_temporary_fs(path); + if (r < 0) + return log_error_errno(r, "Couldn't determine whether %s is a temporary file system: %m", path); + if (r > 0) { + log_info("%s already is a temporary file system.", path); + return 0; + } + + /* We are about to replace the root directory with something else. Later code might want to know what we + * replaced here, hence let's save that information as a symlink we can later use. (This is particularly + * relevant for the overlayfs case where we'll fully obstruct the view onto the underlying device, hence + * querying the backing device node from the file system directly is no longer possible. */ + r = get_block_device_harder(path, &devt); + if (r < 0) + return log_error_errno(r, "Failed to determine device major/minor of %s: %m", path); + else if (r > 0) { + _cleanup_free_ char *dn = NULL; + + r = device_path_make_major_minor(S_IFBLK, devt, &dn); + if (r < 0) + return log_error_errno(r, "Failed to format device node path: %m"); + + if (symlink(dn, "/run/systemd/volatile-root") < 0) + log_warning_errno(errno, "Failed to create symlink /run/systemd/volatile-root: %m"); + } + + if (m == VOLATILE_YES) + return make_volatile(path); + else { + assert(m == VOLATILE_OVERLAY); + return make_overlay(path); + } } DEFINE_MAIN_FUNCTION(run); |