From ba2d084055fd3f67af120070f5620173efd867c8 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 20 Jan 2016 19:07:04 +0100 Subject: btrfs: sysfs: fix typo in compat_ro attribute definition Signed-off-by: David Sterba --- fs/btrfs/sysfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/sysfs.h b/fs/btrfs/sysfs.h index 9c09522125a6..72408e2c4ea8 100644 --- a/fs/btrfs/sysfs.h +++ b/fs/btrfs/sysfs.h @@ -56,7 +56,7 @@ static struct btrfs_feature_attr btrfs_attr_##_name = { \ #define BTRFS_FEAT_ATTR_COMPAT(name, feature) \ BTRFS_FEAT_ATTR(name, FEAT_COMPAT, BTRFS_FEATURE_COMPAT, feature) #define BTRFS_FEAT_ATTR_COMPAT_RO(name, feature) \ - BTRFS_FEAT_ATTR(name, FEAT_COMPAT_RO, BTRFS_FEATURE_COMPAT, feature) + BTRFS_FEAT_ATTR(name, FEAT_COMPAT_RO, BTRFS_FEATURE_COMPAT_RO, feature) #define BTRFS_FEAT_ATTR_INCOMPAT(name, feature) \ BTRFS_FEAT_ATTR(name, FEAT_INCOMPAT, BTRFS_FEATURE_INCOMPAT, feature) -- cgit v1.2.3 From 3b5bb73bd88d1d90163c91e7cad50b12725dbb1c Mon Sep 17 00:00:00 2001 From: David Sterba Date: Thu, 21 Jan 2016 18:36:46 +0100 Subject: btrfs: sysfs: add free-space-tree bit attribute The incompat bit representing the newly added free space tree feature is missing. Right now it will be listed only among features supported by the module, not per-fs. Signed-off-by: David Sterba --- fs/btrfs/sysfs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index e0ac85949067..906f7ed6fc80 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -202,6 +202,7 @@ BTRFS_FEAT_ATTR_INCOMPAT(extended_iref, EXTENDED_IREF); BTRFS_FEAT_ATTR_INCOMPAT(raid56, RAID56); BTRFS_FEAT_ATTR_INCOMPAT(skinny_metadata, SKINNY_METADATA); BTRFS_FEAT_ATTR_INCOMPAT(no_holes, NO_HOLES); +BTRFS_FEAT_ATTR_COMPAT_RO(free_space_tree, FREE_SPACE_TREE); static struct attribute *btrfs_supported_feature_attrs[] = { BTRFS_FEAT_ATTR_PTR(mixed_backref), @@ -213,6 +214,7 @@ static struct attribute *btrfs_supported_feature_attrs[] = { BTRFS_FEAT_ATTR_PTR(raid56), BTRFS_FEAT_ATTR_PTR(skinny_metadata), BTRFS_FEAT_ATTR_PTR(no_holes), + BTRFS_FEAT_ATTR_PTR(free_space_tree), NULL }; -- cgit v1.2.3 From 444e75169872f668eb890f19ec1f32dfc632e704 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Thu, 21 Jan 2016 18:50:40 +0100 Subject: btrfs: sysfs: introduce helper for syncing bits with sysfs files The files under /sys/fs/UUID/features get out of sync with the actual incompat bits set for the filesystem if they change after mount. We're going to sync them and need a helper to do that. Signed-off-by: David Sterba --- fs/btrfs/sysfs.c | 30 ++++++++++++++++++++++++++++++ fs/btrfs/sysfs.h | 3 +++ 2 files changed, 33 insertions(+) diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 906f7ed6fc80..6986886243bf 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -782,6 +782,36 @@ failure: return error; } + +/* + * Change per-fs features in /sys/fs/btrfs/UUID/features to match current + * values in superblock. Call after any changes to incompat/compat_ro flags + */ +void btrfs_sysfs_feature_update(struct btrfs_fs_info *fs_info, + u64 bit, enum btrfs_feature_set set) +{ + struct btrfs_fs_devices *fs_devs; + struct kobject *fsid_kobj; + u64 features; + int ret; + + if (!fs_info) + return; + + features = get_features(fs_info, set); + ASSERT(bit & supported_feature_masks[set]); + + fs_devs = fs_info->fs_devices; + fsid_kobj = &fs_devs->fsid_kobj; + + /* + * FIXME: this is too heavy to update just one value, ideally we'd like + * to use sysfs_update_group but some refactoring is needed first. + */ + sysfs_remove_group(fsid_kobj, &btrfs_feature_attr_group); + ret = sysfs_create_group(fsid_kobj, &btrfs_feature_attr_group); +} + static int btrfs_init_debugfs(void) { #ifdef CONFIG_DEBUG_FS diff --git a/fs/btrfs/sysfs.h b/fs/btrfs/sysfs.h index 72408e2c4ea8..d7da1a4c2f6c 100644 --- a/fs/btrfs/sysfs.h +++ b/fs/btrfs/sysfs.h @@ -90,4 +90,7 @@ int btrfs_sysfs_add_fsid(struct btrfs_fs_devices *fs_devs, struct kobject *parent); int btrfs_sysfs_add_device(struct btrfs_fs_devices *fs_devs); void btrfs_sysfs_remove_fsid(struct btrfs_fs_devices *fs_devs); +void btrfs_sysfs_feature_update(struct btrfs_fs_info *fs_info, + u64 bit, enum btrfs_feature_set set); + #endif /* _BTRFS_SYSFS_H_ */ -- cgit v1.2.3 From 14e46e04958df740c6c6a94849f176159a333f13 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Thu, 21 Jan 2016 18:54:41 +0100 Subject: btrfs: synchronize incompat feature bits with sysfs files The files under /sys/fs/UUID/features get out of sync with the actual incompat bits set for the filesystem if they change after mount (eg. the LZO compression). Synchronize the feature bits with the sysfs files representing them right after we set/clear them. Signed-off-by: David Sterba --- fs/btrfs/free-space-tree.c | 7 +++++++ fs/btrfs/ioctl.c | 4 ++++ fs/btrfs/super.c | 4 ++++ fs/btrfs/volumes.c | 2 ++ 4 files changed, 17 insertions(+) diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c index 393e36bd5845..94e887f5ec4e 100644 --- a/fs/btrfs/free-space-tree.c +++ b/fs/btrfs/free-space-tree.c @@ -23,6 +23,7 @@ #include "locking.h" #include "free-space-tree.h" #include "transaction.h" +#include "sysfs.h" static int __add_block_group_free_space(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, @@ -1169,6 +1170,9 @@ int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info) } btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE); + btrfs_sysfs_feature_update(fs_info, + BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE, FEAT_COMPAT_RO); + fs_info->creating_free_space_tree = 0; ret = btrfs_commit_transaction(trans, tree_root); @@ -1237,6 +1241,9 @@ int btrfs_clear_free_space_tree(struct btrfs_fs_info *fs_info) return PTR_ERR(trans); btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE); + btrfs_sysfs_feature_update(fs_info, + BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE, FEAT_COMPAT_RO); + fs_info->free_space_root = NULL; ret = clear_free_space_tree(trans, free_space_root); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index e392dd67f0ba..209dcfa9ab33 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1455,6 +1455,8 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, if (range->compress_type == BTRFS_COMPRESS_LZO) { btrfs_set_fs_incompat(root->fs_info, COMPRESS_LZO); + btrfs_sysfs_feature_update(root->fs_info, + BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO, FEAT_INCOMPAT); } ret = defrag_count; @@ -4063,6 +4065,8 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) btrfs_free_path(path); btrfs_set_fs_incompat(root->fs_info, DEFAULT_SUBVOL); + btrfs_sysfs_feature_update(root->fs_info, + BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL, FEAT_INCOMPAT); btrfs_end_transaction(trans, root); out: mnt_drop_write_file(file); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 86f7fdc05633..5a1bab11984d 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -58,6 +58,7 @@ #include "dev-replace.h" #include "free-space-cache.h" #include "backref.h" +#include "sysfs.h" #include "tests/btrfs-tests.h" #include "qgroup.h" @@ -477,6 +478,9 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) btrfs_clear_opt(info->mount_opt, NODATACOW); btrfs_clear_opt(info->mount_opt, NODATASUM); btrfs_set_fs_incompat(info, COMPRESS_LZO); + btrfs_sysfs_feature_update(root->fs_info, + BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO, + FEAT_INCOMPAT); } else if (strncmp(args[0].from, "no", 2) == 0) { compress_type = "no"; btrfs_clear_opt(info->mount_opt, COMPRESS); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index c32abbca9d77..73bcd1322c1d 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -4468,6 +4468,8 @@ static void check_raid56_incompat_flag(struct btrfs_fs_info *info, u64 type) return; btrfs_set_fs_incompat(info, RAID56); + btrfs_sysfs_feature_update(info, BTRFS_FEATURE_INCOMPAT_RAID56, + FEAT_INCOMPAT); } #define BTRFS_MAX_DEVS(r) ((BTRFS_LEAF_DATA_SIZE(r) \ -- cgit v1.2.3 From 8cce83ba5062a301a09e0920df813bbbdd3e9dbf Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 22 Jan 2016 10:28:24 +0100 Subject: btrfs: tests: switch to GFP_KERNEL There's no reason to do GFP_NOFS in tests, it's not data-heavy and memory allocation failures would affect only developers or testers. Signed-off-by: David Sterba --- fs/btrfs/tests/btrfs-tests.c | 10 +++++----- fs/btrfs/tests/extent-io-tests.c | 12 ++++++------ fs/btrfs/tests/inode-tests.c | 8 ++++---- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c index b1d920b30070..0e1e61a7ec23 100644 --- a/fs/btrfs/tests/btrfs-tests.c +++ b/fs/btrfs/tests/btrfs-tests.c @@ -82,18 +82,18 @@ void btrfs_destroy_test_fs(void) struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(void) { struct btrfs_fs_info *fs_info = kzalloc(sizeof(struct btrfs_fs_info), - GFP_NOFS); + GFP_KERNEL); if (!fs_info) return fs_info; fs_info->fs_devices = kzalloc(sizeof(struct btrfs_fs_devices), - GFP_NOFS); + GFP_KERNEL); if (!fs_info->fs_devices) { kfree(fs_info); return NULL; } fs_info->super_copy = kzalloc(sizeof(struct btrfs_super_block), - GFP_NOFS); + GFP_KERNEL); if (!fs_info->super_copy) { kfree(fs_info->fs_devices); kfree(fs_info); @@ -180,11 +180,11 @@ btrfs_alloc_dummy_block_group(unsigned long length) { struct btrfs_block_group_cache *cache; - cache = kzalloc(sizeof(*cache), GFP_NOFS); + cache = kzalloc(sizeof(*cache), GFP_KERNEL); if (!cache) return NULL; cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl), - GFP_NOFS); + GFP_KERNEL); if (!cache->free_space_ctl) { kfree(cache); return NULL; diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c index e29fa297e053..669b58201e36 100644 --- a/fs/btrfs/tests/extent-io-tests.c +++ b/fs/btrfs/tests/extent-io-tests.c @@ -94,7 +94,7 @@ static int test_find_delalloc(void) * test. */ for (index = 0; index < (total_dirty >> PAGE_CACHE_SHIFT); index++) { - page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); + page = find_or_create_page(inode->i_mapping, index, GFP_KERNEL); if (!page) { test_msg("Failed to allocate test page\n"); ret = -ENOMEM; @@ -113,7 +113,7 @@ static int test_find_delalloc(void) * |--- delalloc ---| * |--- search ---| */ - set_extent_delalloc(&tmp, 0, 4095, NULL, GFP_NOFS); + set_extent_delalloc(&tmp, 0, 4095, NULL, GFP_KERNEL); start = 0; end = 0; found = find_lock_delalloc_range(inode, &tmp, locked_page, &start, @@ -144,7 +144,7 @@ static int test_find_delalloc(void) test_msg("Couldn't find the locked page\n"); goto out_bits; } - set_extent_delalloc(&tmp, 4096, max_bytes - 1, NULL, GFP_NOFS); + set_extent_delalloc(&tmp, 4096, max_bytes - 1, NULL, GFP_KERNEL); start = test_start; end = 0; found = find_lock_delalloc_range(inode, &tmp, locked_page, &start, @@ -199,7 +199,7 @@ static int test_find_delalloc(void) * * We are re-using our test_start from above since it works out well. */ - set_extent_delalloc(&tmp, max_bytes, total_dirty - 1, NULL, GFP_NOFS); + set_extent_delalloc(&tmp, max_bytes, total_dirty - 1, NULL, GFP_KERNEL); start = test_start; end = 0; found = find_lock_delalloc_range(inode, &tmp, locked_page, &start, @@ -262,7 +262,7 @@ static int test_find_delalloc(void) } ret = 0; out_bits: - clear_extent_bits(&tmp, 0, total_dirty - 1, (unsigned)-1, GFP_NOFS); + clear_extent_bits(&tmp, 0, total_dirty - 1, (unsigned)-1, GFP_KERNEL); out: if (locked_page) page_cache_release(locked_page); @@ -360,7 +360,7 @@ static int test_eb_bitmaps(void) test_msg("Running extent buffer bitmap tests\n"); - bitmap = kmalloc(len, GFP_NOFS); + bitmap = kmalloc(len, GFP_KERNEL); if (!bitmap) { test_msg("Couldn't allocate test bitmap\n"); return -ENOMEM; diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c index 5de55fdd28bc..e2d3da02deee 100644 --- a/fs/btrfs/tests/inode-tests.c +++ b/fs/btrfs/tests/inode-tests.c @@ -974,7 +974,7 @@ static int test_extent_accounting(void) (BTRFS_MAX_EXTENT_SIZE >> 1) + 4095, EXTENT_DELALLOC | EXTENT_DIRTY | EXTENT_UPTODATE | EXTENT_DO_ACCOUNTING, 0, 0, - NULL, GFP_NOFS); + NULL, GFP_KERNEL); if (ret) { test_msg("clear_extent_bit returned %d\n", ret); goto out; @@ -1045,7 +1045,7 @@ static int test_extent_accounting(void) BTRFS_MAX_EXTENT_SIZE+8191, EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0, - NULL, GFP_NOFS); + NULL, GFP_KERNEL); if (ret) { test_msg("clear_extent_bit returned %d\n", ret); goto out; @@ -1079,7 +1079,7 @@ static int test_extent_accounting(void) ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1, EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0, - NULL, GFP_NOFS); + NULL, GFP_KERNEL); if (ret) { test_msg("clear_extent_bit returned %d\n", ret); goto out; @@ -1096,7 +1096,7 @@ out: clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1, EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0, - NULL, GFP_NOFS); + NULL, GFP_KERNEL); iput(inode); btrfs_free_dummy_root(root); return ret; -- cgit v1.2.3 From 79b134a22b6fc45f48bcc1c27d6bf05b3ede5e06 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 22 Jan 2016 17:16:18 +0100 Subject: btrfs: tweak free space tree bitmap allocation The requested bitmap size varies, observed numbers were < 4K up to 16K. Using vmalloc unconditionally would be too heavy, we'll try contiguous allocations first and fall back to vmalloc if there's no contig memory. Signed-off-by: David Sterba --- fs/btrfs/free-space-tree.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c index 393e36bd5845..0f33d58cb321 100644 --- a/fs/btrfs/free-space-tree.c +++ b/fs/btrfs/free-space-tree.c @@ -153,6 +153,20 @@ static inline u32 free_space_bitmap_size(u64 size, u32 sectorsize) static unsigned long *alloc_bitmap(u32 bitmap_size) { + void *mem; + + /* + * The allocation size varies, observed numbers were < 4K up to 16K. + * Using vmalloc unconditionally would be too heavy, we'll try + * contiguous allocations first. + */ + if (bitmap_size <= PAGE_SIZE) + return kzalloc(bitmap_size, GFP_NOFS); + + mem = kzalloc(bitmap_size, GFP_NOFS | __GFP_HIGHMEM | __GFP_NOWARN); + if (mem) + return mem; + return __vmalloc(bitmap_size, GFP_NOFS | __GFP_HIGHMEM | __GFP_ZERO, PAGE_KERNEL); } @@ -289,7 +303,7 @@ int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans, ret = 0; out: - vfree(bitmap); + kvfree(bitmap); if (ret) btrfs_abort_transaction(trans, root, ret); return ret; @@ -438,7 +452,7 @@ int convert_free_space_to_extents(struct btrfs_trans_handle *trans, ret = 0; out: - vfree(bitmap); + kvfree(bitmap); if (ret) btrfs_abort_transaction(trans, root, ret); return ret; -- cgit v1.2.3 From 6b20e0ad2e39b77b7d4a48cc06c4de39768c7bb8 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Mon, 25 Jan 2016 16:30:22 +0100 Subject: btrfs: add free space tree to lockdep classes Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index c67c129fe99a..ca78bf2ed287 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -176,6 +176,7 @@ static struct btrfs_lockdep_keyset { { .id = BTRFS_TREE_RELOC_OBJECTID, .name_stem = "treloc" }, { .id = BTRFS_DATA_RELOC_TREE_OBJECTID, .name_stem = "dreloc" }, { .id = BTRFS_UUID_TREE_OBJECTID, .name_stem = "uuid" }, + { .id = BTRFS_FREE_SPACE_TREE_OBJECTID, .name_stem = "free-space" }, { .id = 0, .name_stem = "tree" }, }; -- cgit v1.2.3 From 3e4c5efbb3ac7c9c4fb5f33b659fa98afe568ab1 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Mon, 25 Jan 2016 16:47:10 +0100 Subject: btrfs: add free space tree to the cow-only list Signed-off-by: David Sterba --- fs/btrfs/relocation.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index ef6d8fc85853..c5540302402d 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -575,7 +575,8 @@ static int is_cowonly_root(u64 root_objectid) root_objectid == BTRFS_TREE_LOG_OBJECTID || root_objectid == BTRFS_CSUM_TREE_OBJECTID || root_objectid == BTRFS_UUID_TREE_OBJECTID || - root_objectid == BTRFS_QUOTA_TREE_OBJECTID) + root_objectid == BTRFS_QUOTA_TREE_OBJECTID || + root_objectid == BTRFS_FREE_SPACE_TREE_OBJECTID) return 1; return 0; } -- cgit v1.2.3 From de0ee0edb21fbab4c7afa3e94573ecfebfb0244e Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 21 Jan 2016 10:17:54 +0000 Subject: Btrfs: fix race between fsync and lockless direct IO writes An fsync, using the fast path, can race with a concurrent lockless direct IO write and end up logging a file extent item that points to an extent that wasn't written to yet. This is because the fast fsync path collects ordered extents into a local list and then collects all the new extent maps to log file extent items based on them, while the direct IO write path creates the new extent map before it creates the corresponding ordered extent (and submitting the respective bio(s)). So fix this by making the direct IO write path create ordered extents before the extent maps and make the fast fsync path collect any new ordered extents after it collects the extent maps. Note that making the fsync handler call inode_dio_wait() (after acquiring the inode's i_mutex) would not work and lead to a deadlock when doing AIO, as through AIO we end up in a path where the fsync handler is called (through dio_aio_complete_work() -> dio_complete() -> vfs_fsync_range()) before the inode's dio counter is decremented (inode_dio_wait() waits for this counter to have a value of zero). Signed-off-by: Filipe Manana Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 36 ++++++++++++++++++++++++++++-------- fs/btrfs/tree-log.c | 14 +++++++++++--- 2 files changed, 39 insertions(+), 11 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index b8bb7591ff9f..e4565456eb01 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7115,21 +7115,41 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode, if (ret) return ERR_PTR(ret); - em = create_pinned_em(inode, start, ins.offset, start, ins.objectid, - ins.offset, ins.offset, ins.offset, 0); - if (IS_ERR(em)) { - btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1); - return em; - } - + /* + * Create the ordered extent before the extent map. This is to avoid + * races with the fast fsync path that would lead to it logging file + * extent items that point to disk extents that were not yet written to. + * The fast fsync path collects ordered extents into a local list and + * then collects all the new extent maps, so we must create the ordered + * extent first and make sure the fast fsync path collects any new + * ordered extents after collecting new extent maps as well. + * The fsync path simply can not rely on inode_dio_wait() because it + * causes deadlock with AIO. + */ ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid, ins.offset, ins.offset, 0); if (ret) { btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1); - free_extent_map(em); return ERR_PTR(ret); } + em = create_pinned_em(inode, start, ins.offset, start, ins.objectid, + ins.offset, ins.offset, ins.offset, 0); + if (IS_ERR(em)) { + struct btrfs_ordered_extent *oe; + + btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1); + oe = btrfs_lookup_ordered_extent(inode, start); + ASSERT(oe); + if (WARN_ON(!oe)) + return em; + set_bit(BTRFS_ORDERED_IOERR, &oe->flags); + set_bit(BTRFS_ORDERED_IO_DONE, &oe->flags); + btrfs_remove_ordered_extent(inode, oe); + /* Once for our lookup and once for the ordered extents tree. */ + btrfs_put_ordered_extent(oe); + btrfs_put_ordered_extent(oe); + } return em; } diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 323e12cc9d2f..978c3a810893 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -4127,7 +4127,9 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, struct inode *inode, struct btrfs_path *path, struct list_head *logged_list, - struct btrfs_log_ctx *ctx) + struct btrfs_log_ctx *ctx, + const u64 start, + const u64 end) { struct extent_map *em, *n; struct list_head extents; @@ -4166,7 +4168,13 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, } list_sort(NULL, &extents, extent_cmp); - + /* + * Collect any new ordered extents within the range. This is to + * prevent logging file extent items without waiting for the disk + * location they point to being written. We do this only to deal + * with races against concurrent lockless direct IO writes. + */ + btrfs_get_logged_extents(inode, logged_list, start, end); process: while (!list_empty(&extents)) { em = list_entry(extents.next, struct extent_map, list); @@ -4701,7 +4709,7 @@ log_extents: goto out_unlock; } ret = btrfs_log_changed_extents(trans, root, inode, dst_path, - &logged_list, ctx); + &logged_list, ctx, start, end); if (ret) { err = ret; goto out_unlock; -- cgit v1.2.3 From 0a95b851370b84a4b9d92ee6d1fa0926901d0454 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 22 Jan 2016 09:28:38 +0800 Subject: btrfs: async-thread: Fix a use-after-free error for trace Parameter of trace_btrfs_work_queued() can be freed in its workqueue. So no one use use that pointer after queue_work(). Fix the user-after-free bug by move the trace line before queue_work(). Reported-by: Dave Jones Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: Chris Mason --- fs/btrfs/async-thread.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 88d9af3d4581..5fb60ea7eee2 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c @@ -328,8 +328,8 @@ static inline void __btrfs_queue_work(struct __btrfs_workqueue *wq, list_add_tail(&work->ordered_list, &wq->ordered_list); spin_unlock_irqrestore(&wq->list_lock, flags); } - queue_work(wq->normal_wq, &work->normal_work); trace_btrfs_work_queued(work); + queue_work(wq->normal_wq, &work->normal_work); } void btrfs_queue_work(struct btrfs_workqueue *wq, -- cgit v1.2.3 From 80ad623edd2d0ccb47d85357ee31c97e6c684e82 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Mon, 25 Jan 2016 11:02:06 +0100 Subject: Revert "btrfs: clear PF_NOFREEZE in cleaner_kthread()" This reverts commit 696249132158014d594896df3a81390616069c5c. The cleaner thread can block freezing when there's a snapshot cleaning in progress and the other threads get suspended first. From the logs provided by Martin we're waiting for reading extent pages: kernel: PM: Syncing filesystems ... done. kernel: Freezing user space processes ... (elapsed 0.015 seconds) done. kernel: Freezing remaining freezable tasks ... kernel: Freezing of tasks failed after 20.003 seconds (1 tasks refusing to freeze, wq_busy=0): kernel: btrfs-cleaner D ffff88033dd13bc0 0 152 2 0x00000000 kernel: ffff88032ebc2e00 ffff88032e750000 ffff88032e74fa50 7fffffffffffffff kernel: ffffffff814a58df 0000000000000002 ffffea000934d580 ffffffff814a5451 kernel: 7fffffffffffffff ffffffff814a6e8f 0000000000000000 0000000000000020 kernel: Call Trace: kernel: [] ? bit_wait+0x2c/0x2c kernel: [] ? schedule+0x6f/0x7c kernel: [] ? schedule_timeout+0x2f/0xd8 kernel: [] ? timekeeping_get_ns+0xa/0x2e kernel: [] ? ktime_get+0x36/0x44 kernel: [] ? io_schedule_timeout+0x94/0xf2 kernel: [] ? io_schedule_timeout+0x94/0xf2 kernel: [] ? bit_wait_io+0x2c/0x30 kernel: [] ? __wait_on_bit+0x41/0x73 kernel: [] ? wait_on_page_bit+0x6d/0x72 kernel: [] ? autoremove_wake_function+0x2a/0x2a kernel: [] ? read_extent_buffer_pages+0x1bd/0x203 kernel: [] ? free_root_pointers+0x4c/0x4c kernel: [] ? btree_read_extent_buffer_pages.constprop.57+0x5a/0xe9 kernel: [] ? read_tree_block+0x2d/0x45 kernel: [] ? read_block_for_search.isra.34+0x22a/0x26b kernel: [] ? btrfs_set_path_blocking+0x1e/0x4a kernel: [] ? btrfs_search_slot+0x648/0x736 kernel: [] ? btrfs_lookup_extent_info+0xb7/0x2c7 kernel: [] ? walk_down_proc+0x9c/0x1ae kernel: [] ? walk_down_tree+0x40/0xa4 kernel: [] ? btrfs_drop_snapshot+0x2da/0x664 kernel: [] ? finish_task_switch+0x126/0x167 kernel: [] ? btrfs_clean_one_deleted_snapshot+0xa6/0xb0 kernel: [] ? cleaner_kthread+0x13e/0x17b kernel: [] ? btrfs_item_end+0x33/0x33 kernel: [] ? kthread+0x95/0x9d kernel: [] ? kthread_parkme+0x16/0x16 kernel: [] ? ret_from_fork+0x3f/0x70 kernel: [] ? kthread_parkme+0x16/0x16 As this affects a released kernel (4.4) we need a minimal fix for stable kernels. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=108361 Reported-by: Martin Ziegler CC: stable@vger.kernel.org # 4.4 CC: Jiri Kosina Signed-off-by: David Sterba Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 26ef14152093..404e894c33d9 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1787,7 +1787,6 @@ static int cleaner_kthread(void *arg) int again; struct btrfs_trans_handle *trans; - set_freezable(); do { again = 0; -- cgit v1.2.3 From bf6092066f80840410e3401cd962b23d54a95713 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 27 Jan 2016 14:06:29 +0100 Subject: btrfs: sysfs: check initialization state before updating features If the mount phase is not finished, we can't update the sysfs files. Reported-by: Chris Mason Signed-off-by: David Sterba Signed-off-by: Chris Mason --- fs/btrfs/sysfs.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 6986886243bf..539e7b5e3f86 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -804,6 +804,9 @@ void btrfs_sysfs_feature_update(struct btrfs_fs_info *fs_info, fs_devs = fs_info->fs_devices; fsid_kobj = &fs_devs->fsid_kobj; + if (!fsid_kobj->state_initialized) + return; + /* * FIXME: this is too heavy to update just one value, ideally we'd like * to use sysfs_update_group but some refactoring is needed first. -- cgit v1.2.3 From e1c0ebad3f77387c6684f8b7e86a4bbaca7577ac Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 27 Jan 2016 06:38:45 -0800 Subject: btrfs: don't use GFP_HIGHMEM for free-space-tree bitmap kzalloc This was copied incorrectly from the __vmalloc call. Signed-off-by: Chris Mason --- fs/btrfs/free-space-tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c index dfa8124effb8..81a053d2966b 100644 --- a/fs/btrfs/free-space-tree.c +++ b/fs/btrfs/free-space-tree.c @@ -164,7 +164,7 @@ static unsigned long *alloc_bitmap(u32 bitmap_size) if (bitmap_size <= PAGE_SIZE) return kzalloc(bitmap_size, GFP_NOFS); - mem = kzalloc(bitmap_size, GFP_NOFS | __GFP_HIGHMEM | __GFP_NOWARN); + mem = kzalloc(bitmap_size, GFP_NOFS | __GFP_NOWARN); if (mem) return mem; -- cgit v1.2.3 From e410e34fad913dd568ec28d2a9949694324c14db Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 29 Jan 2016 08:19:37 -0800 Subject: Revert "btrfs: synchronize incompat feature bits with sysfs files" This reverts commit 14e46e04958df740c6c6a94849f176159a333f13. This ends up doing sysfs operations from deep in balance (where we should be GFP_NOFS) and under heavy balance load, we're making races against sysfs internals. Revert it for now while we figure things out. Signed-off-by: Chris Mason --- fs/btrfs/free-space-tree.c | 7 ------- fs/btrfs/ioctl.c | 4 ---- fs/btrfs/super.c | 4 ---- fs/btrfs/volumes.c | 2 -- 4 files changed, 17 deletions(-) diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c index 81a053d2966b..53dbeaf6ce94 100644 --- a/fs/btrfs/free-space-tree.c +++ b/fs/btrfs/free-space-tree.c @@ -23,7 +23,6 @@ #include "locking.h" #include "free-space-tree.h" #include "transaction.h" -#include "sysfs.h" static int __add_block_group_free_space(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, @@ -1184,9 +1183,6 @@ int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info) } btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE); - btrfs_sysfs_feature_update(fs_info, - BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE, FEAT_COMPAT_RO); - fs_info->creating_free_space_tree = 0; ret = btrfs_commit_transaction(trans, tree_root); @@ -1255,9 +1251,6 @@ int btrfs_clear_free_space_tree(struct btrfs_fs_info *fs_info) return PTR_ERR(trans); btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE); - btrfs_sysfs_feature_update(fs_info, - BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE, FEAT_COMPAT_RO); - fs_info->free_space_root = NULL; ret = clear_free_space_tree(trans, free_space_root); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 1568f57a3376..83c9ad3f2621 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1459,8 +1459,6 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, if (range->compress_type == BTRFS_COMPRESS_LZO) { btrfs_set_fs_incompat(root->fs_info, COMPRESS_LZO); - btrfs_sysfs_feature_update(root->fs_info, - BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO, FEAT_INCOMPAT); } ret = defrag_count; @@ -4069,8 +4067,6 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) btrfs_free_path(path); btrfs_set_fs_incompat(root->fs_info, DEFAULT_SUBVOL); - btrfs_sysfs_feature_update(root->fs_info, - BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL, FEAT_INCOMPAT); btrfs_end_transaction(trans, root); out: mnt_drop_write_file(file); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 49b424103c32..a8e049ae933d 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -58,7 +58,6 @@ #include "dev-replace.h" #include "free-space-cache.h" #include "backref.h" -#include "sysfs.h" #include "tests/btrfs-tests.h" #include "qgroup.h" @@ -486,9 +485,6 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) btrfs_clear_opt(info->mount_opt, NODATACOW); btrfs_clear_opt(info->mount_opt, NODATASUM); btrfs_set_fs_incompat(info, COMPRESS_LZO); - btrfs_sysfs_feature_update(root->fs_info, - BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO, - FEAT_INCOMPAT); no_compress = 0; } else if (strncmp(args[0].from, "no", 2) == 0) { compress_type = "no"; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 5b505f61929e..366b335946fa 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -4469,8 +4469,6 @@ static void check_raid56_incompat_flag(struct btrfs_fs_info *info, u64 type) return; btrfs_set_fs_incompat(info, RAID56); - btrfs_sysfs_feature_update(info, BTRFS_FEATURE_INCOMPAT_RAID56, - FEAT_INCOMPAT); } #define BTRFS_MAX_DEVS(r) ((BTRFS_LEAF_DATA_SIZE(r) \ -- cgit v1.2.3