diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-01-18 21:44:40 +0100 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-01-18 21:44:40 +0100 |
commit | c1a198d9235b9e7d6942027374e44f78ebdcb455 (patch) | |
tree | e006624aadd5af82fab3c67073eb621b2af50396 /fs/btrfs/super.c | |
parent | Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net (diff) | |
parent | Merge branch 'for-chris-4.5' of git://git.kernel.org/pub/scm/linux/kernel/git... (diff) | |
download | linux-c1a198d9235b9e7d6942027374e44f78ebdcb455.tar.xz linux-c1a198d9235b9e7d6942027374e44f78ebdcb455.zip |
Merge branch 'for-linus-4.5' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
Pull btrfs updates from Chris Mason:
"This has our usual assortment of fixes and cleanups, but the biggest
change included is Omar Sandoval's free space tree. It's not the
default yet, mounting -o space_cache=v2 enables it and sets a readonly
compat bit. The tree can actually be deleted and regenerated if there
are any problems, but it has held up really well in testing so far.
For very large filesystems (30T+) our existing free space caching code
can end up taking a huge amount of time during commits. The new tree
based code is faster and less work overall to update as the commit
progresses.
Omar worked on this during the summer and we'll hammer on it in
production here at FB over the next few months"
* 'for-linus-4.5' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (73 commits)
Btrfs: fix fitrim discarding device area reserved for boot loader's use
Btrfs: Check metadata redundancy on balance
btrfs: statfs: report zero available if metadata are exhausted
btrfs: preallocate path for snapshot creation at ioctl time
btrfs: allocate root item at snapshot ioctl time
btrfs: do an allocation earlier during snapshot creation
btrfs: use smaller type for btrfs_path locks
btrfs: use smaller type for btrfs_path lowest_level
btrfs: use smaller type for btrfs_path reada
btrfs: cleanup, use enum values for btrfs_path reada
btrfs: constify static arrays
btrfs: constify remaining structs with function pointers
btrfs tests: replace whole ops structure for free space tests
btrfs: use list_for_each_entry* in backref.c
btrfs: use list_for_each_entry_safe in free-space-cache.c
btrfs: use list_for_each_entry* in check-integrity.c
Btrfs: use linux/sizes.h to represent constants
btrfs: cleanup, remove stray return statements
btrfs: zero out delayed node upon allocation
btrfs: pass proper enum type to start_transaction()
...
Diffstat (limited to 'fs/btrfs/super.c')
-rw-r--r-- | fs/btrfs/super.c | 84 |
1 files changed, 73 insertions, 11 deletions
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index a0434c179ea9..9b9eab6d048e 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -295,10 +295,11 @@ enum { Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress, Opt_compress_type, Opt_compress_force, Opt_compress_force_type, Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard, - Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed, - Opt_enospc_debug, Opt_subvolrootid, Opt_defrag, Opt_inode_cache, - Opt_no_space_cache, Opt_recovery, Opt_skip_balance, - Opt_check_integrity, Opt_check_integrity_including_extent_data, + Opt_space_cache, Opt_space_cache_version, Opt_clear_cache, + Opt_user_subvol_rm_allowed, Opt_enospc_debug, Opt_subvolrootid, + Opt_defrag, Opt_inode_cache, Opt_no_space_cache, Opt_recovery, + Opt_skip_balance, Opt_check_integrity, + Opt_check_integrity_including_extent_data, Opt_check_integrity_print_mask, Opt_fatal_errors, Opt_rescan_uuid_tree, Opt_commit_interval, Opt_barrier, Opt_nodefrag, Opt_nodiscard, Opt_noenospc_debug, Opt_noflushoncommit, Opt_acl, Opt_datacow, @@ -309,7 +310,7 @@ enum { Opt_err, }; -static match_table_t tokens = { +static const match_table_t tokens = { {Opt_degraded, "degraded"}, {Opt_subvol, "subvol=%s"}, {Opt_subvolid, "subvolid=%s"}, @@ -340,6 +341,7 @@ static match_table_t tokens = { {Opt_discard, "discard"}, {Opt_nodiscard, "nodiscard"}, {Opt_space_cache, "space_cache"}, + {Opt_space_cache_version, "space_cache=%s"}, {Opt_clear_cache, "clear_cache"}, {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"}, {Opt_enospc_debug, "enospc_debug"}, @@ -383,7 +385,9 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) bool compress_force = false; cache_gen = btrfs_super_cache_generation(root->fs_info->super_copy); - if (cache_gen) + if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) + btrfs_set_opt(info->mount_opt, FREE_SPACE_TREE); + else if (cache_gen) btrfs_set_opt(info->mount_opt, SPACE_CACHE); if (!options) @@ -617,15 +621,35 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) "turning off discard"); break; case Opt_space_cache: - btrfs_set_and_info(root, SPACE_CACHE, - "enabling disk space caching"); + case Opt_space_cache_version: + if (token == Opt_space_cache || + strcmp(args[0].from, "v1") == 0) { + btrfs_clear_opt(root->fs_info->mount_opt, + FREE_SPACE_TREE); + btrfs_set_and_info(root, SPACE_CACHE, + "enabling disk space caching"); + } else if (strcmp(args[0].from, "v2") == 0) { + btrfs_clear_opt(root->fs_info->mount_opt, + SPACE_CACHE); + btrfs_set_and_info(root, FREE_SPACE_TREE, + "enabling free space tree"); + } else { + ret = -EINVAL; + goto out; + } break; case Opt_rescan_uuid_tree: btrfs_set_opt(info->mount_opt, RESCAN_UUID_TREE); break; case Opt_no_space_cache: - btrfs_clear_and_info(root, SPACE_CACHE, - "disabling disk space caching"); + if (btrfs_test_opt(root, SPACE_CACHE)) { + btrfs_clear_and_info(root, SPACE_CACHE, + "disabling disk space caching"); + } + if (btrfs_test_opt(root, FREE_SPACE_TREE)) { + btrfs_clear_and_info(root, FREE_SPACE_TREE, + "disabling free space tree"); + } break; case Opt_inode_cache: btrfs_set_pending_and_info(info, INODE_MAP_CACHE, @@ -754,8 +778,17 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) } } out: + if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE) && + !btrfs_test_opt(root, FREE_SPACE_TREE) && + !btrfs_test_opt(root, CLEAR_CACHE)) { + btrfs_err(root->fs_info, "cannot disable free space tree"); + ret = -EINVAL; + + } if (!ret && btrfs_test_opt(root, SPACE_CACHE)) btrfs_info(root->fs_info, "disk space caching is enabled"); + if (!ret && btrfs_test_opt(root, FREE_SPACE_TREE)) + btrfs_info(root->fs_info, "using free space tree"); kfree(orig); return ret; } @@ -1162,6 +1195,8 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) seq_puts(seq, ",noacl"); if (btrfs_test_opt(root, SPACE_CACHE)) seq_puts(seq, ",space_cache"); + else if (btrfs_test_opt(root, FREE_SPACE_TREE)) + seq_puts(seq, ",space_cache=v2"); else seq_puts(seq, ",nospace_cache"); if (btrfs_test_opt(root, RESCAN_UUID_TREE)) @@ -1863,7 +1898,7 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes) * btrfs starts at an offset of at least 1MB when doing chunk * allocation. */ - skip_space = 1024 * 1024; + skip_space = SZ_1M; /* user can set the offset in fs_info->alloc_start. */ if (fs_info->alloc_start && @@ -1954,6 +1989,8 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes) * there are other factors that may change the result (like a new metadata * chunk). * + * If metadata is exhausted, f_bavail will be 0. + * * FIXME: not accurate for mixed block groups, total and free/used are ok, * available appears slightly larger. */ @@ -1965,11 +2002,13 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) struct btrfs_space_info *found; u64 total_used = 0; u64 total_free_data = 0; + u64 total_free_meta = 0; int bits = dentry->d_sb->s_blocksize_bits; __be32 *fsid = (__be32 *)fs_info->fsid; unsigned factor = 1; struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv; int ret; + u64 thresh = 0; /* * holding chunk_muext to avoid allocating new chunks, holding @@ -1995,6 +2034,8 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) } } } + if (found->flags & BTRFS_BLOCK_GROUP_METADATA) + total_free_meta += found->disk_total - found->disk_used; total_used += found->disk_used; } @@ -2017,6 +2058,24 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_bavail += div_u64(total_free_data, factor); buf->f_bavail = buf->f_bavail >> bits; + /* + * We calculate the remaining metadata space minus global reserve. If + * this is (supposedly) smaller than zero, there's no space. But this + * does not hold in practice, the exhausted state happens where's still + * some positive delta. So we apply some guesswork and compare the + * delta to a 4M threshold. (Practically observed delta was ~2M.) + * + * We probably cannot calculate the exact threshold value because this + * depends on the internal reservations requested by various + * operations, so some operations that consume a few metadata will + * succeed even if the Avail is zero. But this is better than the other + * way around. + */ + thresh = 4 * 1024 * 1024; + + if (total_free_meta - thresh < block_rsv->size) + buf->f_bavail = 0; + buf->f_type = BTRFS_SUPER_MAGIC; buf->f_bsize = dentry->d_sb->s_blocksize; buf->f_namelen = BTRFS_NAME_LEN; @@ -2223,6 +2282,9 @@ static int btrfs_run_sanity_tests(void) if (ret) goto out; ret = btrfs_test_qgroups(); + if (ret) + goto out; + ret = btrfs_test_free_space_tree(); out: btrfs_destroy_test_fs(); return ret; |