diff options
author | Krzesimir Nowak <knowak@microsoft.com> | 2024-02-15 17:46:08 +0100 |
---|---|---|
committer | Krzesimir Nowak <knowak@microsoft.com> | 2024-02-22 19:06:22 +0100 |
commit | 8a8990653ca21a313ad0cd460a52562cf7f267b5 (patch) | |
tree | 17b30145b9f3c963c229afc5f939c9494c22bd5c | |
parent | sysext: Refactor the merge hierarchy code (diff) | |
download | systemd-8a8990653ca21a313ad0cd460a52562cf7f267b5.tar.xz systemd-8a8990653ca21a313ad0cd460a52562cf7f267b5.zip |
sysext: Add minimal support for optional mutability for extensions
systemd-sysext will check if /var/lib/extensions.mutable/${hierarchy} exists
and use it as an overlayfs upperdir for storing writes. This allows having
mutable hierarchy after merging the extension images.
The implementation is following a proposed update to the Extension Images
specification at https://github.com/uapi-group/specifications/pull/78.
-rw-r--r-- | src/sysext/sysext.c | 339 |
1 files changed, 327 insertions, 12 deletions
diff --git a/src/sysext/sysext.c b/src/sysext/sysext.c index 7581b736c3..e381b346b0 100644 --- a/src/sysext/sysext.c +++ b/src/sysext/sysext.c @@ -39,9 +39,12 @@ #include "pager.h" #include "parse-argument.h" #include "parse-util.h" +#include "path-util.h" #include "pretty-print.h" #include "process-util.h" +#include "rm-rf.h" #include "sort-util.h" +#include "string-util.h" #include "terminal-util.h" #include "user-util.h" #include "varlink.h" @@ -252,11 +255,22 @@ static int unmerge_hierarchy( ImageClass image_class, const char *p) { + _cleanup_free_ char *dot_dir = NULL, *work_dir_info_file = NULL; int r; assert(p); + dot_dir = path_join(p, image_class_info[image_class].dot_directory_name); + if (!dot_dir) + return log_oom(); + + work_dir_info_file = path_join(dot_dir, "work_dir"); + if (!work_dir_info_file) + return log_oom(); + for (;;) { + _cleanup_free_ char *escaped_work_dir_in_root = NULL, *work_dir = NULL; + /* We only unmount /usr/ if it is a mount point and really one of ours, in order not to break * systems where /usr/ is a mount point of its own already. */ @@ -266,10 +280,41 @@ static int unmerge_hierarchy( if (r == 0) break; + r = read_one_line_file(work_dir_info_file, &escaped_work_dir_in_root); + if (r < 0) { + if (r != -ENOENT) + return log_error_errno(r, "Failed to read '%s': %m", work_dir_info_file); + } else { + _cleanup_free_ char *work_dir_in_root = NULL; + ssize_t l; + + l = cunescape_length(escaped_work_dir_in_root, r, 0, &work_dir_in_root); + if (l < 0) + return log_error_errno(l, "Failed to unescape work directory path: %m"); + work_dir = path_join(arg_root, work_dir_in_root); + if (!work_dir) + return log_oom(); + } + + r = umount_verbose(LOG_DEBUG, dot_dir, MNT_DETACH|UMOUNT_NOFOLLOW); + if (r < 0) { + /* EINVAL is possibly "not a mount point". Let it slide as it's expected to occur if + * the whole hierarchy was read-only, so the dot directory inside it was not + * bind-mounted as read-only. */ + if (r != -EINVAL) + return log_error_errno(r, "Failed to unmount '%s': %m", dot_dir); + } + r = umount_verbose(LOG_ERR, p, MNT_DETACH|UMOUNT_NOFOLLOW); if (r < 0) return r; + if (work_dir) { + r = rm_rf(work_dir, REMOVE_ROOT | REMOVE_MISSING_OK | REMOVE_PHYSICAL); + if (r < 0) + return log_error_errno(r, "Failed to remove '%s': %m", work_dir); + } + log_info("Unmerged '%s'.", p); } @@ -507,7 +552,9 @@ static int mount_overlayfs( ImageClass image_class, int noexec, const char *where, - char **layers) { + char **layers, + const char *upper_dir, + const char *work_dir) { _cleanup_free_ char *options = NULL; bool separator = false; @@ -515,6 +562,7 @@ static int mount_overlayfs( int r; assert(where); + assert((upper_dir && work_dir) || (!upper_dir && !work_dir)); options = strdup("lowerdir="); if (!options) @@ -532,6 +580,22 @@ static int mount_overlayfs( if (noexec >= 0) SET_FLAG(flags, MS_NOEXEC, noexec); + if (upper_dir && work_dir) { + r = append_overlayfs_path_option(&options, ",", "upperdir", upper_dir); + if (r < 0) + return r; + + flags &= ~MS_RDONLY; + + r = append_overlayfs_path_option(&options, ",", "workdir", work_dir); + if (r < 0) + return r; + /* redirect_dir=on and noatime prevent unnecessary upcopies, metacopy=off prevents broken + * files from partial upcopies after umount. */ + if (!strextend(&options, ",redirect_dir=on,noatime,metacopy=off")) + return log_oom(); + } + /* Now mount the actual overlayfs */ r = mount_nofollow_verbose(LOG_ERR, image_class_info[image_class].short_identifier, where, "overlay", flags, options); if (r < 0) @@ -540,10 +604,104 @@ static int mount_overlayfs( return 0; } +static char *hierarchy_as_single_path_component(const char *hierarchy) { + /* We normally expect hierarchy to be /usr, /opt or /etc, but for debugging purposes the hierarchy + * could very well be like /foo/bar/baz/. So for a given hierarchy we generate a directory name by + * stripping the leading and trailing separators and replacing the rest of separators with dots. This + * makes the generated name to be the same for /foo/bar/baz and for /foo/bar.baz, but, again, + * speciyfing a different hierarchy is a debugging feature, so non-unique mapping should not be an + * issue in general case. */ + const char *stripped = hierarchy; + _cleanup_free_ char *dir_name = NULL; + + assert(hierarchy); + + stripped += strspn(stripped, "/"); + + dir_name = strdup(stripped); + if (!dir_name) + return NULL; + delete_trailing_chars(dir_name, "/"); + string_replace_char(dir_name, '/', '.'); + return TAKE_PTR(dir_name); +} + +static char *determine_mutable_directory_path_for_hierarchy(const char *hierarchy) { + _cleanup_free_ char *dir_name = NULL; + + assert(hierarchy); + dir_name = hierarchy_as_single_path_component(hierarchy); + if (!dir_name) + return NULL; + + return path_join("/var/lib/extensions.mutable", dir_name); +} + +static int paths_on_same_fs(const char *path1, const char *path2) { + struct stat st1, st2; + + assert(path1); + assert(path2); + + if (stat(path1, &st1)) + return log_error_errno(errno, "Failed to stat '%s': %m", path1); + + if (stat(path2, &st2)) + return log_error_errno(errno, "Failed to stat '%s': %m", path2); + + return st1.st_dev == st2.st_dev; +} + +static int work_dir_for_hierarchy( + const char *hierarchy, + const char *resolved_upper_dir, + char **ret_work_dir) { + + _cleanup_free_ char *parent = NULL; + int r; + + assert(hierarchy); + assert(resolved_upper_dir); + assert(ret_work_dir); + + r = path_extract_directory(resolved_upper_dir, &parent); + if (r < 0) + return log_error_errno(r, "Failed to get parent directory of upperdir '%s': %m", resolved_upper_dir); + + /* TODO: paths_in_same_superblock? partition? device? */ + r = paths_on_same_fs(resolved_upper_dir, parent); + if (r < 0) + return r; + if (!r) + return log_error_errno(SYNTHETIC_ERRNO(EXDEV), "Unable to find a suitable workdir location for upperdir '%s' for host hierarchy '%s' - parent directory of the upperdir is in a different filesystem", resolved_upper_dir, hierarchy); + + _cleanup_free_ char *f = NULL, *dir_name = NULL; + + f = hierarchy_as_single_path_component(hierarchy); + if (!f) + return log_oom(); + dir_name = strjoin(".systemd-", f, "-workdir"); + if (!dir_name) + return log_oom(); + + free(f); + f = path_join(parent, dir_name); + if (!f) + return log_oom(); + + *ret_work_dir = TAKE_PTR(f); + return 0; +} + typedef struct OverlayFSPaths { char *hierarchy; char *resolved_hierarchy; + char *resolved_mutable_directory; + /* NULL if merged fs is read-only */ + char *upper_dir; + /* NULL if merged fs is read-only */ + char *work_dir; /* lowest index is top lowerdir, highest index is bottom lowerdir */ char **lower_dirs; } OverlayFSPaths; @@ -554,7 +712,10 @@ static OverlayFSPaths *overlayfs_paths_free(OverlayFSPaths *op) { free(op->hierarchy); free(op->resolved_hierarchy); + free(op->resolved_mutable_directory); + free(op->upper_dir); + free(op->work_dir); strv_free(op->lower_dirs); free(op); @@ -577,6 +738,25 @@ static int resolve_hierarchy(const char *hierarchy, char **ret_resolved_hierarch return 0; } +static int resolve_mutable_directory(const char *hierarchy, char **ret_resolved_mutable_directory) { + _cleanup_free_ char *path = NULL, *resolved_path = NULL; + int r; + + assert(hierarchy); + assert(ret_resolved_mutable_directory); + + path = determine_mutable_directory_path_for_hierarchy(hierarchy); + if (!path) + return log_oom(); + + r = chase(path, arg_root, CHASE_PREFIX_ROOT, &resolved_path, NULL); + if (r < 0 && r != -ENOENT) + return log_error_errno(r, "Failed to resolve mutable directory '%s': %m", path); + + *ret_resolved_mutable_directory = TAKE_PTR(resolved_path); + return 0; +} + static int overlayfs_paths_new(const char *hierarchy, OverlayFSPaths **ret_op) { _cleanup_free_ char *hierarchy_copy = NULL, *resolved_hierarchy = NULL, *resolved_mutable_directory = NULL; int r; @@ -591,6 +771,9 @@ static int overlayfs_paths_new(const char *hierarchy, OverlayFSPaths **ret_op) { r = resolve_hierarchy(hierarchy, &resolved_hierarchy); if (r < 0) return r; + r = resolve_mutable_directory(hierarchy, &resolved_mutable_directory); + if (r < 0) + return r; OverlayFSPaths *op; op = new(OverlayFSPaths, 1); @@ -600,6 +783,7 @@ static int overlayfs_paths_new(const char *hierarchy, OverlayFSPaths **ret_op) { *op = (OverlayFSPaths) { .hierarchy = TAKE_PTR(hierarchy_copy), .resolved_hierarchy = TAKE_PTR(resolved_hierarchy), + .resolved_mutable_directory = TAKE_PTR(resolved_mutable_directory), }; *ret_op = TAKE_PTR(op); @@ -678,6 +862,23 @@ static int hierarchy_as_lower_dir(OverlayFSPaths *op) { return 1; } + if (!op->resolved_mutable_directory) { + log_debug("No mutable directory found, so host hierarchy '%s' will be used as lowerdir", op->resolved_hierarchy); + return 0; + } + + if (path_equal(op->resolved_hierarchy, op->resolved_mutable_directory)) { + log_debug("Host hierarchy '%s' will serve as upperdir.", op->resolved_hierarchy); + return 1; + } + r = inode_same(op->resolved_hierarchy, op->resolved_mutable_directory, 0); + if (r < 0) + return log_error_errno(r, "Failed to check inode equality of hierarchy %s and its mutable directory %s: %m", op->resolved_hierarchy, op->resolved_mutable_directory); + if (r > 0) { + log_debug("Host hierarchy '%s' will serve as upperdir.", op->resolved_hierarchy); + return 1; + } + return 0; } @@ -726,6 +927,50 @@ static int determine_lower_dirs( return 0; } +static int determine_upper_dir(OverlayFSPaths *op) { + int r; + + assert(op); + assert(!op->upper_dir); + + if (!op->resolved_mutable_directory) { + log_debug("No mutable directory found for host hierarchy '%s', there will be no upperdir", op->hierarchy); + return 0; + } + + /* Require upper dir to be on writable filesystem if it's going to be used as an actual overlayfs + * upperdir, instead of a lowerdir as an imported path. */ + r = path_is_read_only_fs(op->resolved_mutable_directory); + if (r < 0) + return log_error_errno(r, "Failed to determine if mutable directory '%s' is on read-only filesystem: %m", op->resolved_mutable_directory); + if (r > 0) + return log_error_errno(SYNTHETIC_ERRNO(EROFS), "Can't use '%s' as an upperdir as it is read-only.", op->resolved_mutable_directory); + + op->upper_dir = strdup(op->resolved_mutable_directory); + if (!op->upper_dir) + return log_oom(); + + return 0; +} + +static int determine_work_dir(OverlayFSPaths *op) { + _cleanup_free_ char *work_dir = NULL; + int r; + + assert(op); + assert(!op->work_dir); + + if (!op->upper_dir) + return 0; + + r = work_dir_for_hierarchy(op->hierarchy, op->upper_dir, &work_dir); + if (r < 0) + return r; + + op->work_dir = TAKE_PTR(work_dir); + return 0; +} + static int mount_overlayfs_with_op( OverlayFSPaths *op, ImageClass image_class, @@ -746,7 +991,13 @@ static int mount_overlayfs_with_op( if (r < 0) return log_error_errno(r, "Failed to make directory '%s': %m", meta_path); - r = mount_overlayfs(image_class, noexec, overlay_path, op->lower_dirs); + if (op->upper_dir && op->work_dir) { + r = mkdir_p(op->work_dir, 0700); + if (r < 0) + return log_error_errno(r, "Failed to make directory '%s': %m", op->work_dir); + } + + r = mount_overlayfs(image_class, noexec, overlay_path, op->lower_dirs, op->upper_dir, op->work_dir); if (r < 0) return r; @@ -807,17 +1058,49 @@ static int write_dev_file(ImageClass image_class, const char *meta_path, const c return 0; } +static int write_work_dir_file(ImageClass image_class, const char *meta_path, const char *work_dir) { + _cleanup_free_ char *escaped_work_dir_in_root = NULL, *f = NULL; + char *work_dir_in_root = NULL; + int r; + + assert(meta_path); + + if (!work_dir) + return 0; + + work_dir_in_root = path_startswith(work_dir, empty_to_root(arg_root)); + if (!work_dir_in_root) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Workdir '%s' must not be outside root '%s'", work_dir, empty_to_root(arg_root)); + + f = path_join(meta_path, image_class_info[image_class].dot_directory_name, "work_dir"); + if (!f) + return log_oom(); + + /* Paths can have newlines for whatever reason, so better escape them to really get a single + * line file. */ + escaped_work_dir_in_root = cescape(work_dir_in_root); + if (!escaped_work_dir_in_root) + return log_oom(); + r = write_string_file(f, escaped_work_dir_in_root, WRITE_STRING_FILE_CREATE); + if (r < 0) + return log_error_errno(r, "Failed to write '%s': %m", f); + + return 0; +} + static int store_info_in_meta( ImageClass image_class, char **extensions, const char *meta_path, - const char *overlay_path) { + const char *overlay_path, + const char *work_dir) { int r; assert(extensions); assert(meta_path); assert(overlay_path); + /* work_dir may be NULL */ r = write_extensions_file(image_class, extensions, meta_path); if (r < 0) @@ -827,6 +1110,10 @@ static int store_info_in_meta( if (r < 0) return r; + r = write_work_dir_file(image_class, meta_path, work_dir); + if (r < 0) + return r; + /* Make sure the top-level dir has an mtime marking the point we established the merge */ if (utimensat(AT_FDCWD, meta_path, NULL, AT_SYMLINK_NOFOLLOW) < 0) return log_error_errno(r, "Failed fix mtime of '%s': %m", meta_path); @@ -834,16 +1121,35 @@ static int store_info_in_meta( return 0; } -static int make_mounts_read_only(ImageClass image_class, const char *overlay_path) { +static int make_mounts_read_only(ImageClass image_class, const char *overlay_path, bool mutable) { int r; assert(overlay_path); - /* The overlayfs superblock is read-only. Let's also mark the bind mount read-only. Extra turbo - * safety 😎 */ - r = bind_remount_recursive(overlay_path, MS_RDONLY, MS_RDONLY, NULL); - if (r < 0) - return log_error_errno(r, "Failed to make bind mount '%s' read-only: %m", overlay_path); + if (mutable) { + /* Bind mount the meta path as read-only on mutable overlays to avoid accidental + * modifications of the contents of meta directory, which could lead to systemd thinking that + * this hierarchy is not our mount. */ + _cleanup_free_ char *f = NULL; + + f = path_join(overlay_path, image_class_info[image_class].dot_directory_name); + if (!f) + return log_oom(); + + r = mount_nofollow_verbose(LOG_ERR, f, f, NULL, MS_BIND, NULL); + if (r < 0) + return r; + + r = bind_remount_one(f, MS_RDONLY, MS_RDONLY); + if (r < 0) + return log_error_errno(r, "Failed to remount '%s' as read-only: %m", f); + } else { + /* The overlayfs superblock is read-only. Let's also mark the bind mount read-only. Extra + * turbo safety 😎 */ + r = bind_remount_recursive(overlay_path, MS_RDONLY, MS_RDONLY, NULL); + if (r < 0) + return log_error_errno(r, "Failed to make bind mount '%s' read-only: %m", overlay_path); + } return 0; } @@ -878,15 +1184,23 @@ static int merge_hierarchy( if (extensions_used == 0) /* No extension with files in this hierarchy? Then don't do anything. */ return 0; + r = determine_upper_dir(op); + if (r < 0) + return r; + + r = determine_work_dir(op); + if (r < 0) + return r; + r = mount_overlayfs_with_op(op, image_class, noexec, overlay_path, meta_path); if (r < 0) return r; - r = store_info_in_meta(image_class, extensions, meta_path, overlay_path); + r = store_info_in_meta(image_class, extensions, meta_path, overlay_path, op->work_dir); if (r < 0) return r; - r = make_mounts_read_only(image_class, overlay_path); + r = make_mounts_read_only(image_class, overlay_path, op->upper_dir && op->work_dir); if (r < 0) return r; @@ -1213,7 +1527,8 @@ static int merge_subprocess( if (r < 0) return log_error_errno(r, "Failed to create hierarchy mount point '%s': %m", resolved); - r = mount_nofollow_verbose(LOG_ERR, p, resolved, NULL, MS_BIND, NULL); + /* Using MS_REC to potentially bring in our read-only bind mount of metadata. */ + r = mount_nofollow_verbose(LOG_ERR, p, resolved, NULL, MS_BIND|MS_REC, NULL); if (r < 0) return r; |