diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-07-17 21:38:04 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-07-17 21:38:04 +0200 |
commit | a1b547f0f217cfb06af7eb4ce8488b02d83a0370 (patch) | |
tree | 2935586a4371bfb9c5abec688dc279f99504cd08 /fs/btrfs/ioctl.c | |
parent | Merge tag 'gfs2-v6.10-rc1-fixes' of git://git.kernel.org/pub/scm/linux/kernel... (diff) | |
parent | btrfs: fix extent map use-after-free when adding pages to compressed bio (diff) | |
download | linux-a1b547f0f217cfb06af7eb4ce8488b02d83a0370.tar.xz linux-a1b547f0f217cfb06af7eb4ce8488b02d83a0370.zip |
Merge tag 'for-6.11-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs updates from David Sterba:
"The highlights are new logic behind background block group reclaim,
automatic removal of qgroup after removing a subvolume and new
'rescue=' mount options.
The rest is optimizations, cleanups and refactoring.
User visible features:
- dynamic block group reclaim:
- tunable framework to avoid situations where eager data
allocations prevent creating new metadata chunks due to lack of
unallocated space
- reuse sysfs knob bg_reclaim_threshold (otherwise used only in
zoned mode) for a fixed value threshold
- new on/off sysfs knob "dynamic_reclaim" calculating the value
based on heuristics, aiming to keep spare working space for
relocating chunks but not to needlessly relocate partially
utilized block groups or reclaim newly allocated ones
- stats are exported in sysfs per block group type, files
"reclaim_*"
- this may increase IO load at unexpected times but the corner
case of no allocatable block groups is known to be worse
- automatically remove qgroup of deleted subvolumes:
- adjust qgroup removal conditions, make sure all related
subvolume data are already removed, or return EBUSY, also take
into account setting of sysfs drop_subtree_threshold
- also works in squota mode
- mount option updates: new modes of 'rescue=' that allow to mount
images (read-only) that could have been partially converted by user
space tools
- ignoremetacsums - invalid metadata checksums are ignored
- ignoresuperflags - super block flags that track conversion in
progress (like UUID or checksums)
Core:
- size of struct btrfs_inode is now below 1024 (on a release config),
improved memory packing and other secondary effects
- switch tracking of open inodes from rb-tree to xarray, minor
performance improvement
- reduce number of empty transaction commits when there are no dirty
data/metadata
- memory allocation optimizations (reduced numbers, reordering out of
critical sections)
- extent map structure optimizations and refactoring, more sanity
checks
- more subpage in zoned mode preparations or fixes
- general snapshot code cleanups, improvements and documentation
- tree-checker updates: more file extent ram_bytes fixes, continued
- raid-stripe-tree update (not backward compatible):
- remove extent encoding field from the structure, can be inferred
from other information
- requires btrfs-progs 6.9.1 or newer
- cleanups and refactoring
- error message updates
- error handling improvements
- return type and parameter cleanups and improvements"
* tag 'for-6.11-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (152 commits)
btrfs: fix extent map use-after-free when adding pages to compressed bio
btrfs: fix bitmap leak when loading free space cache on duplicate entry
btrfs: remove the BUG_ON() inside extent_range_clear_dirty_for_io()
btrfs: move extent_range_clear_dirty_for_io() into inode.c
btrfs: enhance compression error messages
btrfs: fix data race when accessing the last_trans field of a root
btrfs: rename the extra_gfp parameter of btrfs_alloc_page_array()
btrfs: remove the extra_gfp parameter from btrfs_alloc_folio_array()
btrfs: introduce new "rescue=ignoresuperflags" mount option
btrfs: introduce new "rescue=ignoremetacsums" mount option
btrfs: output the unrecognized super block flags as hex
btrfs: remove unused Opt enums
btrfs: tree-checker: add extra ram_bytes and disk_num_bytes check
btrfs: fix the ram_bytes assignment for truncated ordered extents
btrfs: make validate_extent_map() catch ram_bytes mismatch
btrfs: ignore incorrect btrfs_file_extent_item::ram_bytes
btrfs: cleanup the bytenr usage inside btrfs_extent_item_to_extent_map()
btrfs: fix typo in error message in btrfs_validate_super()
btrfs: move the direct IO code into its own file
btrfs: pass a btrfs_inode to btrfs_set_prop()
...
Diffstat (limited to 'fs/btrfs/ioctl.c')
-rw-r--r-- | fs/btrfs/ioctl.c | 94 |
1 files changed, 69 insertions, 25 deletions
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 6ad524b894fc..e0a664b8a46a 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -375,15 +375,15 @@ int btrfs_fileattr_set(struct mnt_idmap *idmap, return PTR_ERR(trans); if (comp) { - ret = btrfs_set_prop(trans, inode, "btrfs.compression", comp, - strlen(comp), 0); + ret = btrfs_set_prop(trans, BTRFS_I(inode), "btrfs.compression", + comp, strlen(comp), 0); if (ret) { btrfs_abort_transaction(trans, ret); goto out_end_trans; } } else { - ret = btrfs_set_prop(trans, inode, "btrfs.compression", NULL, - 0, 0); + ret = btrfs_set_prop(trans, BTRFS_I(inode), "btrfs.compression", + NULL, 0, 0); if (ret && ret != -ENODATA) { btrfs_abort_transaction(trans, ret); goto out_end_trans; @@ -552,7 +552,7 @@ static noinline int btrfs_ioctl_fitrim(struct btrfs_fs_info *fs_info, return 0; } -int __pure btrfs_is_empty_uuid(u8 *uuid) +int __pure btrfs_is_empty_uuid(const u8 *uuid) { int i; @@ -658,15 +658,10 @@ static noinline int create_subvol(struct mnt_idmap *idmap, ret = PTR_ERR(trans); goto out_release_rsv; } - ret = btrfs_record_root_in_trans(trans, BTRFS_I(dir)->root); - if (ret) - goto out; btrfs_qgroup_convert_reserved_meta(root, qgroup_reserved); qgroup_reserved = 0; trans->block_rsv = &block_rsv; trans->bytes_reserved = block_rsv.size; - /* Tree log can't currently deal with an inode which is a new root. */ - btrfs_set_log_full_commit(trans); ret = btrfs_qgroup_inherit(trans, 0, objectid, btrfs_root_id(root), inherit); if (ret) @@ -719,6 +714,8 @@ static noinline int create_subvol(struct mnt_idmap *idmap, ret = btrfs_insert_root(trans, fs_info->tree_root, &key, root_item); if (ret) { + int ret2; + /* * Since we don't abort the transaction in this case, free the * tree block so that we don't leak space and leave the @@ -729,7 +726,9 @@ static noinline int create_subvol(struct mnt_idmap *idmap, btrfs_tree_lock(leaf); btrfs_clear_buffer_dirty(trans, leaf); btrfs_tree_unlock(leaf); - btrfs_free_tree_block(trans, objectid, leaf, 0, 1); + ret2 = btrfs_free_tree_block(trans, objectid, leaf, 0, 1); + if (ret2 < 0) + btrfs_abort_transaction(trans, ret2); free_extent_buffer(leaf); goto out; } @@ -767,6 +766,8 @@ static noinline int create_subvol(struct mnt_idmap *idmap, goto out; } + btrfs_record_new_subvolume(trans, BTRFS_I(dir)); + d_instantiate_new(dentry, new_inode_args.inode); new_inode_args.inode = NULL; @@ -854,7 +855,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, pending_snapshot->dentry = dentry; pending_snapshot->root = root; pending_snapshot->readonly = readonly; - pending_snapshot->dir = dir; + pending_snapshot->dir = BTRFS_I(dir); pending_snapshot->inherit = inherit; trans = btrfs_start_transaction(root, 0); @@ -1070,7 +1071,7 @@ static noinline int btrfs_mksnapshot(const struct path *parent, atomic_inc(&root->snapshot_force_cow); snapshot_force_cow = true; - btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1); + btrfs_wait_ordered_extents(root, U64_MAX, NULL); ret = btrfs_mksubvol(parent, idmap, name, namelen, root, readonly, inherit); @@ -1917,8 +1918,7 @@ static int btrfs_search_path_in_tree_user(struct mnt_idmap *idmap, struct btrfs_ioctl_ino_lookup_user_args *args) { struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; - struct super_block *sb = inode->i_sb; - struct btrfs_key upper_limit = BTRFS_I(inode)->location; + u64 upper_limit = btrfs_ino(BTRFS_I(inode)); u64 treeid = btrfs_root_id(BTRFS_I(inode)->root); u64 dirid = args->dirid; unsigned long item_off; @@ -1944,7 +1944,7 @@ static int btrfs_search_path_in_tree_user(struct mnt_idmap *idmap, * If the bottom subvolume does not exist directly under upper_limit, * construct the path in from the bottom up. */ - if (dirid != upper_limit.objectid) { + if (dirid != upper_limit) { ptr = &args->path[BTRFS_INO_LOOKUP_USER_PATH_MAX - 1]; root = btrfs_get_fs_root(fs_info, treeid, true); @@ -2006,7 +2006,7 @@ static int btrfs_search_path_in_tree_user(struct mnt_idmap *idmap, * btree and lock the same leaf. */ btrfs_release_path(path); - temp_inode = btrfs_iget(sb, key2.objectid, root); + temp_inode = btrfs_iget(key2.objectid, root); if (IS_ERR(temp_inode)) { ret = PTR_ERR(temp_inode); goto out_put; @@ -2019,7 +2019,7 @@ static int btrfs_search_path_in_tree_user(struct mnt_idmap *idmap, goto out_put; } - if (key.offset == upper_limit.objectid) + if (key.offset == upper_limit) break; if (key.objectid == BTRFS_FIRST_FREE_OBJECTID) { ret = -EACCES; @@ -2140,7 +2140,7 @@ static int btrfs_ioctl_ino_lookup_user(struct file *file, void __user *argp) inode = file_inode(file); if (args->dirid == BTRFS_FIRST_FREE_OBJECTID && - BTRFS_I(inode)->location.objectid != BTRFS_FIRST_FREE_OBJECTID) { + btrfs_ino(BTRFS_I(inode)) != BTRFS_FIRST_FREE_OBJECTID) { /* * The subvolume does not exist under fd with which this is * called @@ -3807,12 +3807,29 @@ drop_write: return ret; } +/* + * Quick check for ioctl handlers if quotas are enabled. Proper locking must be + * done before any operations. + */ +static bool qgroup_enabled(struct btrfs_fs_info *fs_info) +{ + bool ret = true; + + mutex_lock(&fs_info->qgroup_ioctl_lock); + if (!fs_info->quota_root) + ret = false; + mutex_unlock(&fs_info->qgroup_ioctl_lock); + + return ret; +} + static long btrfs_ioctl_qgroup_assign(struct file *file, void __user *arg) { struct inode *inode = file_inode(file); struct btrfs_fs_info *fs_info = inode_to_fs_info(inode); struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_ioctl_qgroup_assign_args *sa; + struct btrfs_qgroup_list *prealloc = NULL; struct btrfs_trans_handle *trans; int ret; int err; @@ -3820,6 +3837,9 @@ static long btrfs_ioctl_qgroup_assign(struct file *file, void __user *arg) if (!capable(CAP_SYS_ADMIN)) return -EPERM; + if (!qgroup_enabled(root->fs_info)) + return -ENOTCONN; + ret = mnt_want_write_file(file); if (ret) return ret; @@ -3830,14 +3850,27 @@ static long btrfs_ioctl_qgroup_assign(struct file *file, void __user *arg) goto drop_write; } + if (sa->assign) { + prealloc = kzalloc(sizeof(*prealloc), GFP_KERNEL); + if (!prealloc) { + ret = -ENOMEM; + goto drop_write; + } + } + trans = btrfs_join_transaction(root); if (IS_ERR(trans)) { ret = PTR_ERR(trans); goto out; } + /* + * Prealloc ownership is moved to the relation handler, there it's used + * or freed on error. + */ if (sa->assign) { - ret = btrfs_add_qgroup_relation(trans, sa->src, sa->dst); + ret = btrfs_add_qgroup_relation(trans, sa->src, sa->dst, prealloc); + prealloc = NULL; } else { ret = btrfs_del_qgroup_relation(trans, sa->src, sa->dst); } @@ -3847,13 +3880,15 @@ static long btrfs_ioctl_qgroup_assign(struct file *file, void __user *arg) err = btrfs_run_qgroups(trans); mutex_unlock(&fs_info->qgroup_ioctl_lock); if (err < 0) - btrfs_handle_fs_error(fs_info, err, - "failed to update qgroup status and info"); + btrfs_warn(fs_info, + "qgroup status update failed after %s relation, marked as inconsistent", + sa->assign ? "adding" : "deleting"); err = btrfs_end_transaction(trans); if (err && !ret) ret = err; out: + kfree(prealloc); kfree(sa); drop_write: mnt_drop_write_file(file); @@ -3872,6 +3907,9 @@ static long btrfs_ioctl_qgroup_create(struct file *file, void __user *arg) if (!capable(CAP_SYS_ADMIN)) return -EPERM; + if (!qgroup_enabled(root->fs_info)) + return -ENOTCONN; + ret = mnt_want_write_file(file); if (ret) return ret; @@ -3928,6 +3966,9 @@ static long btrfs_ioctl_qgroup_limit(struct file *file, void __user *arg) if (!capable(CAP_SYS_ADMIN)) return -EPERM; + if (!qgroup_enabled(root->fs_info)) + return -ENOTCONN; + ret = mnt_want_write_file(file); if (ret) return ret; @@ -3973,6 +4014,9 @@ static long btrfs_ioctl_quota_rescan(struct file *file, void __user *arg) if (!capable(CAP_SYS_ADMIN)) return -EPERM; + if (!qgroup_enabled(fs_info)) + return -ENOTCONN; + ret = mnt_want_write_file(file); if (ret) return ret; @@ -4429,7 +4473,7 @@ out_drop_write: return ret; } -static int _btrfs_ioctl_send(struct inode *inode, void __user *argp, bool compat) +static int _btrfs_ioctl_send(struct btrfs_inode *inode, void __user *argp, bool compat) { struct btrfs_ioctl_send_args *arg; int ret; @@ -4751,10 +4795,10 @@ long btrfs_ioctl(struct file *file, unsigned int return btrfs_ioctl_set_received_subvol_32(file, argp); #endif case BTRFS_IOC_SEND: - return _btrfs_ioctl_send(inode, argp, false); + return _btrfs_ioctl_send(BTRFS_I(inode), argp, false); #if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT) case BTRFS_IOC_SEND_32: - return _btrfs_ioctl_send(inode, argp, true); + return _btrfs_ioctl_send(BTRFS_I(inode), argp, true); #endif case BTRFS_IOC_GET_DEV_STATS: return btrfs_ioctl_get_dev_stats(fs_info, argp); |