diff options
author | Daan De Meyer <daan.j.demeyer@gmail.com> | 2022-10-10 00:14:17 +0200 |
---|---|---|
committer | Daan De Meyer <daan.j.demeyer@gmail.com> | 2022-11-15 20:23:51 +0100 |
commit | a64769d605b21d183972b585719d99aa2af44977 (patch) | |
tree | 634c4eb7517016c83d4ace5b67cd92aad20f00ec /src/partition/repart.c | |
parent | repart: Change mode of temporary root to 755 (diff) | |
download | systemd-a64769d605b21d183972b585719d99aa2af44977.tar.xz systemd-a64769d605b21d183972b585719d99aa2af44977.zip |
repart: Don't use loop devices when we're not operating on a block device
When repart is not operating on a block device, if we avoid using
any loop devices at all, it becomes possible to run repart without
needing root privileges.
Note that this also depends on the filesystems in use to support
population without needing root privileges (specifically, squashfs,
ext4 or btrfs).
Diffstat (limited to '')
-rw-r--r-- | src/partition/repart.c | 270 |
1 files changed, 210 insertions, 60 deletions
diff --git a/src/partition/repart.c b/src/partition/repart.c index b786678557..5d4cdbe689 100644 --- a/src/partition/repart.c +++ b/src/partition/repart.c @@ -2992,6 +2992,170 @@ static int context_wipe_and_discard(Context *context, bool from_scratch) { return 0; } +typedef struct { + LoopDevice *loop; + int fd; + char *path; + int whole_fd; +} PartitionTarget; + +static int partition_target_fd(PartitionTarget *t) { + assert(t); + assert(t->loop || t->fd >= 0 || t->whole_fd >= 0); + return t->loop ? t->loop->fd : t->fd >= 0 ? t->fd : t->whole_fd; +} + +static const char* partition_target_path(PartitionTarget *t) { + assert(t); + assert(t->loop || t->path); + return t->loop ? t->loop->node : t->path; +} + +static PartitionTarget *partition_target_free(PartitionTarget *t) { + if (!t) + return NULL; + + loop_device_unref(t->loop); + safe_close(t->fd); + unlink_and_free(t->path); + + return mfree(t); +} + +DEFINE_TRIVIAL_CLEANUP_FUNC(PartitionTarget*, partition_target_free); + +static int partition_target_prepare( + Context *context, + Partition *p, + uint64_t size, + bool need_path, + PartitionTarget **ret) { + + _cleanup_(partition_target_freep) PartitionTarget *t = NULL; + struct stat st; + int whole_fd; + int r; + + assert(context); + assert(p); + assert(ret); + + assert_se((whole_fd = fdisk_get_devfd(context->fdisk_context)) >= 0); + + if (fstat(whole_fd, &st) < 0) + return -errno; + + /* If we're operating on a block device, we definitely need privileges to access block devices so we + * can just use loop devices as our target. Otherwise, we're operating on a regular file, in that + * case, let's write to regular files and copy those into the final image so we can run without root + * privileges. On filesystems with reflinking support, we can take advantage of this and just reflink + * the result into the image. + */ + + t = new0(PartitionTarget, 1); + if (!t) + return log_oom(); + + if (S_ISBLK(st.st_mode) || (p->format && !mkfs_supports_root_option(p->format))) { + _cleanup_(loop_device_unrefp) LoopDevice *d = NULL; + + /* Loopback block devices are not only useful to turn regular files into block devices, but + * also to cut out sections of block devices into new block devices. */ + + r = loop_device_make(whole_fd, O_RDWR, p->offset, size, 0, 0, LOCK_EX, &d); + if (r < 0) + return log_error_errno(r, "Failed to make loopback device of future partition %" PRIu64 ": %m", p->partno); + + *t = (PartitionTarget) { + .loop = TAKE_PTR(d), + .fd = -1, + }; + } else if (need_path) { + _cleanup_(unlink_and_freep) char *temp = NULL; + _cleanup_close_ int fd = -1; + const char *vt; + + r = var_tmp_dir(&vt); + if (r < 0) + return log_error_errno(r, "Could not determine temporary directory: %m"); + + temp = path_join(vt, "repart-XXXXXX"); + if (!temp) + return log_oom(); + + fd = mkostemp_safe(temp); + if (fd < 0) + return log_error_errno(fd, "Failed to create temporary file: %m"); + + if (ftruncate(fd, size) < 0) + return log_error_errno(errno, "Failed to truncate temporary file to %s: %m", + FORMAT_BYTES(size)); + + *t = (PartitionTarget) { + .fd = TAKE_FD(fd), + .path = TAKE_PTR(temp), + }; + } else { + if (lseek(whole_fd, p->offset, SEEK_SET) == (off_t) -1) + return log_error_errno(errno, "Failed to seek to partition offset: %m"); + + *t = (PartitionTarget) { + .fd = -1, + .whole_fd = whole_fd, + }; + } + + *ret = TAKE_PTR(t); + + return 0; +} + +static int partition_target_grow(PartitionTarget *t, uint64_t size) { + int r; + + assert(t); + + if (t->loop) { + r = loop_device_refresh_size(t->loop, UINT64_MAX, size); + if (r < 0) + return log_error_errno(r, "Failed to refresh loopback device size: %m"); + } else if (t->fd >= 0) { + if (ftruncate(t->fd, size) < 0) + return log_error_errno(errno, "Failed to grow '%s' to %s by truncation: %m", + t->path, FORMAT_BYTES(size)); + } + + return 0; +} + +static int partition_target_sync(Context *context, Partition *p, PartitionTarget *t) { + int whole_fd, r; + + assert(context); + assert(p); + assert(t); + + assert_se((whole_fd = fdisk_get_devfd(context->fdisk_context)) >= 0); + + if (t->loop) { + r = loop_device_sync(t->loop); + if (r < 0) + return log_error_errno(r, "Failed to sync loopback device: %m"); + } else if (t->fd >= 0) { + if (lseek(whole_fd, p->offset, SEEK_SET) == (off_t) -1) + return log_error_errno(errno, "Failed to seek to partition offset: %m"); + + r = copy_bytes(t->fd, whole_fd, UINT64_MAX, COPY_REFLINK|COPY_HOLES|COPY_FSYNC); + if (r < 0) + return log_error_errno(r, "Failed to copy bytes to partition: %m"); + } else { + if (fsync(t->whole_fd) < 0) + return log_error_errno(errno, "Failed to sync changes: %m"); + } + + return 0; +} + static int partition_encrypt(Context *context, Partition *p, const char *node) { #if HAVE_LIBCRYPTSETUP && HAVE_CRYPT_SET_DATA_OFFSET && HAVE_CRYPT_REENCRYPT_INIT_BY_PASSPHRASE && HAVE_CRYPT_REENCRYPT struct crypt_params_luks2 luks_params = { @@ -3226,11 +3390,11 @@ static int partition_format_verity_hash( #if HAVE_LIBCRYPTSETUP Partition *dp; - _cleanup_(loop_device_unrefp) LoopDevice *d = NULL; + _cleanup_(partition_target_freep) PartitionTarget *t = NULL; _cleanup_(sym_crypt_freep) struct crypt_device *cd = NULL; _cleanup_free_ uint8_t *rh = NULL; size_t rhs; - int whole_fd, r; + int r; assert(context); assert(p); @@ -3251,19 +3415,15 @@ static int partition_format_verity_hash( assert_se(dp = p->siblings[VERITY_DATA]); assert(!dp->dropped); - assert_se((whole_fd = fdisk_get_devfd(context->fdisk_context)) >= 0); - r = dlopen_cryptsetup(); if (r < 0) return log_error_errno(r, "libcryptsetup not found, cannot setup verity: %m"); - r = loop_device_make(whole_fd, O_RDWR, p->offset, p->new_size, 0, 0, LOCK_EX, &d); + r = partition_target_prepare(context, p, p->new_size, /*need_path=*/ true, &t); if (r < 0) - return log_error_errno(r, - "Failed to make loopback device of verity hash partition %" PRIu64 ": %m", - p->partno); + return r; - r = sym_crypt_init(&cd, d->node); + r = sym_crypt_init(&cd, partition_target_path(t)); if (r < 0) return log_error_errno(r, "Failed to allocate libcryptsetup context: %m"); @@ -3281,6 +3441,10 @@ static int partition_format_verity_hash( if (r < 0) return log_error_errno(r, "Failed to setup verity hash data: %m"); + r = partition_target_sync(context, p, t); + if (r < 0) + return r; + r = sym_crypt_get_volume_key_size(cd); if (r < 0) return log_error_errno(r, "Failed to determine verity root hash size: %m"); @@ -3433,15 +3597,14 @@ static int partition_format_verity_sig(Context *context, Partition *p) { } static int context_copy_blocks(Context *context) { - int whole_fd = -1, r; + int r; assert(context); /* Copy in file systems on the block level */ LIST_FOREACH(partitions, p, context->partitions) { - _cleanup_(loop_device_unrefp) LoopDevice *d = NULL; - int target_fd; + _cleanup_(partition_target_freep) PartitionTarget *t = NULL; if (p->copy_blocks_fd < 0) continue; @@ -3459,42 +3622,34 @@ static int context_copy_blocks(Context *context) { assert(p->copy_blocks_size != UINT64_MAX); assert(p->new_size >= p->copy_blocks_size + (p->encrypt != ENCRYPT_OFF ? LUKS2_METADATA_KEEP_FREE : 0)); - if (whole_fd < 0) - assert_se((whole_fd = fdisk_get_devfd(context->fdisk_context)) >= 0); - - if (p->encrypt != ENCRYPT_OFF) { - r = loop_device_make(whole_fd, O_RDWR, p->offset, p->new_size, 0, 0, LOCK_EX, &d); - if (r < 0) - return log_error_errno(r, "Failed to make loopback device of future partition %" PRIu64 ": %m", p->partno); - - target_fd = d->fd; - } else { - if (lseek(whole_fd, p->offset, SEEK_SET) == (off_t) -1) - return log_error_errno(errno, "Failed to seek to partition offset: %m"); - - target_fd = whole_fd; - } + r = partition_target_prepare(context, p, p->new_size, + /*need_path=*/ p->encrypt != ENCRYPT_OFF || p->siblings[VERITY_HASH], + &t); + if (r < 0) + return r; log_info("Copying in '%s' (%s) on block level into future partition %" PRIu64 ".", p->copy_blocks_path, FORMAT_BYTES(p->copy_blocks_size), p->partno); - r = copy_bytes(p->copy_blocks_fd, target_fd, p->copy_blocks_size, COPY_REFLINK); + r = copy_bytes(p->copy_blocks_fd, partition_target_fd(t), p->copy_blocks_size, COPY_REFLINK); if (r < 0) return log_error_errno(r, "Failed to copy in data from '%s': %m", p->copy_blocks_path); if (p->encrypt != ENCRYPT_OFF) { - r = partition_encrypt(context, p, d->node); + r = partition_encrypt(context, p, partition_target_path(t)); if (r < 0) return r; } - if (fsync(target_fd) < 0) - return log_error_errno(errno, "Failed to synchronize copied data blocks: %m"); + r = partition_target_sync(context, p, t); + if (r < 0) + return r; log_info("Copying in of '%s' on block level completed.", p->copy_blocks_path); if (p->siblings[VERITY_HASH]) { - r = partition_format_verity_hash(context, p->siblings[VERITY_HASH], d->node); + r = partition_format_verity_hash(context, p->siblings[VERITY_HASH], + partition_target_path(t)); if (r < 0) return r; } @@ -3764,7 +3919,7 @@ static int make_copy_files_denylist(Context *context, Set **ret) { static int context_mkfs(Context *context) { _cleanup_set_free_ Set *denylist = NULL; - int fd = -1, r; + int r; assert(context); @@ -3775,8 +3930,8 @@ static int context_mkfs(Context *context) { return r; LIST_FOREACH(partitions, p, context->partitions) { - _cleanup_(loop_device_unrefp) LoopDevice *d = NULL; _cleanup_(rm_rf_physical_and_freep) char *root = NULL; + _cleanup_(partition_target_freep) PartitionTarget *t = NULL; if (p->dropped) continue; @@ -3798,27 +3953,20 @@ static int context_mkfs(Context *context) { assert(p->new_size != UINT64_MAX); assert(p->new_size >= (p->encrypt != ENCRYPT_OFF ? LUKS2_METADATA_KEEP_FREE : 0)); - if (fd < 0) - assert_se((fd = fdisk_get_devfd(context->fdisk_context)) >= 0); - - /* Loopback block devices are not only useful to turn regular files into block devices, but - * also to cut out sections of block devices into new block devices. If we're doing - * encryption, we make sure we keep free space at the end which is required for cryptsetup's - * offline encryption. */ - - r = loop_device_make(fd, O_RDWR, p->offset, - p->new_size - (p->encrypt != ENCRYPT_OFF ? LUKS2_METADATA_KEEP_FREE : 0), - 0, 0, LOCK_EX, &d); + /* If we're doing encryption, we make sure we keep free space at the end which is required + * for cryptsetup's offline encryption. */ + r = partition_target_prepare(context, p, + p->new_size - (p->encrypt != ENCRYPT_OFF ? LUKS2_METADATA_KEEP_FREE : 0), + /*need_path=*/ true, + &t); if (r < 0) - return log_error_errno(r, "Failed to make loopback device of future partition %" PRIu64 ": %m", p->partno); + return r; log_info("Formatting future partition %" PRIu64 ".", p->partno); - /* Ideally, we populate filesystems using our own code after creating the filesystem to - * ensure consistent handling of chattrs, xattrs and other similar things. However, when - * using read-only filesystems such as squashfs, we can't populate after creating the - * filesystem because it's read-only, so instead we create a temporary root to use as the - * source tree when generating the read-only filesystem. */ + /* We prefer (or are required in the case of read-only filesystems) to populate filesystems + * directly via the corresponding mkfs binary if it supports a --rootdir (or equivalent) + * option. To do that, we need to setup the final directory tree beforehand. */ if (mkfs_supports_root_option(p->format)) { r = partition_populate_directory(p, denylist, &root); @@ -3826,25 +3974,26 @@ static int context_mkfs(Context *context) { return r; } - r = make_filesystem(d->node, p->format, strempty(p->new_label), root, p->fs_uuid, arg_discard); + r = make_filesystem(partition_target_path(t), p->format, strempty(p->new_label), root, + p->fs_uuid, arg_discard); if (r < 0) return r; log_info("Successfully formatted future partition %" PRIu64 ".", p->partno); - /* Now, we can populate all the other filesystems that aren't read-only. */ + /* Now, we can populate all the other filesystems that we couldn't populate earlier. */ if (!mkfs_supports_root_option(p->format)) { - r = partition_populate_filesystem(p, d->node, denylist); + r = partition_populate_filesystem(p, partition_target_path(t), denylist); if (r < 0) return r; } if (p->encrypt != ENCRYPT_OFF) { - r = loop_device_refresh_size(d, UINT64_MAX, p->new_size); + r = partition_target_grow(t, p->new_size); if (r < 0) - return log_error_errno(r, "Failed to refresh loopback device size: %m"); + return r; - r = partition_encrypt(context, p, d->node); + r = partition_encrypt(context, p, partition_target_path(t)); if (r < 0) return log_error_errno(r, "Failed to encrypt device: %m"); } @@ -3853,12 +4002,13 @@ static int context_mkfs(Context *context) { * if we don't sync before detaching a block device the in-flight sectors possibly won't hit * the disk. */ - r = loop_device_sync(d); + r = partition_target_sync(context, p, t); if (r < 0) - return log_error_errno(r, "Failed to sync loopback device: %m"); + return r; if (p->siblings[VERITY_HASH]) { - r = partition_format_verity_hash(context, p->siblings[VERITY_HASH], d->node); + r = partition_format_verity_hash(context, p->siblings[VERITY_HASH], + partition_target_path(t)); if (r < 0) return r; } |