diff options
author | Boris Burkov <boris@bur.io> | 2023-06-28 19:06:45 +0200 |
---|---|---|
committer | David Sterba <dsterba@suse.com> | 2023-10-12 16:44:10 +0200 |
commit | af0e2aab3b70b7844232bbce2a619ec70e049df4 (patch) | |
tree | 260cc9837c999aae23b3d3d51439c030f739e346 | |
parent | btrfs: sysfs: add simple_quota incompat feature entry (diff) | |
download | linux-af0e2aab3b70b7844232bbce2a619ec70e049df4.tar.xz linux-af0e2aab3b70b7844232bbce2a619ec70e049df4.zip |
btrfs: qgroup: flush reservations during quota disable
The following sequence:
enable simple quotas
do some writes
reserve space
create ordered_extent
release rsv (store rsv_bytes in OE, mark QGROUP_RESERVED bits)
disable quotas
enable simple quotas
set qgroup rsv to 0 on all subvolumes
ordered_extent finishes
create delayed ref with rsv_bytes from before
run delayed ref
record_simple_quota_delta
free rsv_bytes (0 -> -rsv_delta)
results in us reliably underflowing the subvolume's qgroup rsv counter,
because disabling/re-enabling quotas toggles reservation counters down
to 0, but does not remove other file system state which represents
successful acquisition of qgroup rsv space. Specifically metadata rsv
counters on the root object and rsv_bytes on ordered_extent objects that
have released their reservation as well as the corresponding
QGROUP_RESERVED extent bits.
Normal qgroups gets away with this, I believe because it forces more
work to happen on transaction commit, but I am not certain it is totally
safe from the ordered_extent/leaked extent bit variant. Simple quotas
hits this reliably.
The intent of the fix is to make disable take the time to clear that
external to qgroups state as well: after flipping off the quota bit on
fs_info, flush delalloc and ordered extents, clearing the extent bits
along the way. This makes it so there are no ordered extents or meta
prealloc hanging around from the first enablement period during the second.
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Boris Burkov <boris@bur.io>
Signed-off-by: David Sterba <dsterba@suse.com>
-rw-r--r-- | fs/btrfs/qgroup.c | 47 |
1 files changed, 44 insertions, 3 deletions
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 1c0efc1757c1..a95c11a163f2 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1286,6 +1286,38 @@ out: return ret; } +/* + * It is possible to have outstanding ordered extents which reserved bytes + * before we disabled. We need to fully flush delalloc, ordered extents, and a + * commit to ensure that we don't leak such reservations, only to have them + * come back if we re-enable. + * + * - enable simple quotas + * - reserve space + * - release it, store rsv_bytes in OE + * - disable quotas + * - enable simple quotas (qgroup rsv are all 0) + * - OE finishes + * - run delayed refs + * - free rsv_bytes, resulting in miscounting or even underflow + */ +static int flush_reservations(struct btrfs_fs_info *fs_info) +{ + struct btrfs_trans_handle *trans; + int ret; + + ret = btrfs_start_delalloc_roots(fs_info, LONG_MAX, false); + if (ret) + return ret; + btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1); + trans = btrfs_join_transaction(fs_info->tree_root); + if (IS_ERR(trans)) + return PTR_ERR(trans); + btrfs_commit_transaction(trans); + + return ret; +} + int btrfs_quota_disable(struct btrfs_fs_info *fs_info) { struct btrfs_root *quota_root; @@ -1330,6 +1362,10 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info) clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); btrfs_qgroup_wait_for_completion(fs_info, false); + ret = flush_reservations(fs_info); + if (ret) + goto out_unlock_cleaner; + /* * 1 For the root item * @@ -1391,7 +1427,8 @@ out: if (ret && trans) btrfs_end_transaction(trans); else if (trans) - ret = btrfs_end_transaction(trans); + ret = btrfs_commit_transaction(trans); +out_unlock_cleaner: mutex_unlock(&fs_info->cleaner_mutex); return ret; @@ -4010,8 +4047,12 @@ static int __btrfs_qgroup_release_data(struct btrfs_inode *inode, int trace_op = QGROUP_RELEASE; int ret; - if (btrfs_qgroup_mode(inode->root->fs_info) == BTRFS_QGROUP_MODE_DISABLED) - return 0; + if (btrfs_qgroup_mode(inode->root->fs_info) == BTRFS_QGROUP_MODE_DISABLED) { + extent_changeset_init(&changeset); + return clear_record_extent_bits(&inode->io_tree, start, + start + len - 1, + EXTENT_QGROUP_RESERVED, &changeset); + } /* In release case, we shouldn't have @reserved */ WARN_ON(!free && reserved); |