From 86d4a77ba3dc4ace238a0556541a41df2bd71d49 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 25 May 2011 13:03:16 -0400 Subject: Btrfs: cache bitmaps when searching for a cluster If we are looking for a cluster in a particularly sparse or fragmented block group, we will do a lot of looping through the free space tree looking for various things, and if we need to look at bitmaps we will endup doing the whole dance twice. So instead add the bitmap entries to a temporary list so if we have to do the bitmap search we can just look through the list of entries we've found quickly instead of having to loop through the entire tree again. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/free-space-cache.c | 54 ++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 49 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index ad144736a5fd..930c07f79b3d 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -2144,6 +2144,7 @@ again: */ static int setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group, struct btrfs_free_cluster *cluster, + struct list_head *bitmaps, u64 offset, u64 bytes, u64 min_bytes) { struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; @@ -2166,6 +2167,8 @@ static int setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group, * extent entry. */ while (entry->bitmap) { + if (list_empty(&entry->list)) + list_add_tail(&entry->list, bitmaps); node = rb_next(&entry->offset_index); if (!node) return -ENOSPC; @@ -2185,8 +2188,12 @@ static int setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group, return -ENOSPC; entry = rb_entry(node, struct btrfs_free_space, offset_index); - if (entry->bitmap) + if (entry->bitmap) { + if (list_empty(&entry->list)) + list_add_tail(&entry->list, bitmaps); continue; + } + /* * we haven't filled the empty size and the window is * very large. reset and try again @@ -2240,6 +2247,7 @@ static int setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group, */ static int setup_cluster_bitmap(struct btrfs_block_group_cache *block_group, struct btrfs_free_cluster *cluster, + struct list_head *bitmaps, u64 offset, u64 bytes, u64 min_bytes) { struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; @@ -2250,10 +2258,39 @@ static int setup_cluster_bitmap(struct btrfs_block_group_cache *block_group, if (ctl->total_bitmaps == 0) return -ENOSPC; + /* + * First check our cached list of bitmaps and see if there is an entry + * here that will work. + */ + list_for_each_entry(entry, bitmaps, list) { + if (entry->bytes < min_bytes) + continue; + ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset, + bytes, min_bytes); + if (!ret) + return 0; + } + + /* + * If we do have entries on our list and we are here then we didn't find + * anything, so go ahead and get the next entry after the last entry in + * this list and start the search from there. + */ + if (!list_empty(bitmaps)) { + entry = list_entry(bitmaps->prev, struct btrfs_free_space, + list); + node = rb_next(&entry->offset_index); + if (!node) + return -ENOSPC; + entry = rb_entry(node, struct btrfs_free_space, offset_index); + goto search; + } + entry = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 0, 1); if (!entry) return -ENOSPC; +search: node = &entry->offset_index; do { entry = rb_entry(node, struct btrfs_free_space, offset_index); @@ -2284,6 +2321,8 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, u64 offset, u64 bytes, u64 empty_size) { struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; + struct list_head bitmaps; + struct btrfs_free_space *entry, *tmp; u64 min_bytes; int ret; @@ -2322,11 +2361,16 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, goto out; } - ret = setup_cluster_no_bitmap(block_group, cluster, offset, bytes, - min_bytes); + INIT_LIST_HEAD(&bitmaps); + ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset, + bytes, min_bytes); if (ret) - ret = setup_cluster_bitmap(block_group, cluster, offset, - bytes, min_bytes); + ret = setup_cluster_bitmap(block_group, cluster, &bitmaps, + offset, bytes, min_bytes); + + /* Clear our temporary list */ + list_for_each_entry_safe(entry, tmp, &bitmaps, list) + list_del_init(&entry->list); if (!ret) { atomic_inc(&block_group->count); -- cgit v1.2.3 From 3de85bb95cc50d0977cbb7a0c605e894be4c790d Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 25 May 2011 13:07:37 -0400 Subject: Btrfs: noinline the cluster searching functions When profiling the find cluster code it's hard to tell where we are spending our time because the bitmap and non-bitmap functions get inlined by the compiler, so make that not happen. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/free-space-cache.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 930c07f79b3d..f56caacfd8ad 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -2142,10 +2142,11 @@ again: /* * This searches the block group for just extents to fill the cluster with. */ -static int setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group, - struct btrfs_free_cluster *cluster, - struct list_head *bitmaps, - u64 offset, u64 bytes, u64 min_bytes) +static noinline int +setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group, + struct btrfs_free_cluster *cluster, + struct list_head *bitmaps, u64 offset, u64 bytes, + u64 min_bytes) { struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; struct btrfs_free_space *first = NULL; @@ -2245,10 +2246,11 @@ static int setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group, * This specifically looks for bitmaps that may work in the cluster, we assume * that we have already failed to find extents that will work. */ -static int setup_cluster_bitmap(struct btrfs_block_group_cache *block_group, - struct btrfs_free_cluster *cluster, - struct list_head *bitmaps, - u64 offset, u64 bytes, u64 min_bytes) +static noinline int +setup_cluster_bitmap(struct btrfs_block_group_cache *block_group, + struct btrfs_free_cluster *cluster, + struct list_head *bitmaps, u64 offset, u64 bytes, + u64 min_bytes) { struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; struct btrfs_free_space *entry; -- cgit v1.2.3 From f2bb8f5cfb3bce595b2de251ed7638047fc4e530 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 25 May 2011 13:10:16 -0400 Subject: Btrfs: don't commit the transaction if we dont have enough pinned bytes I noticed when running an enospc test that we would get stuck committing the transaction in check_data_space even though we truly didn't have enough space. So check to see if bytes_pinned is bigger than num_bytes, if it's not don't commit the transaction. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/extent-tree.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 5b9b6b6df242..0d0a3fe77bb7 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3089,6 +3089,13 @@ alloc: } goto again; } + + /* + * If we have less pinned bytes than we want to allocate then + * don't bother committing the transaction, it won't help us. + */ + if (data_sinfo->bytes_pinned < bytes) + committed = 1; spin_unlock(&data_sinfo->lock); /* commit the current transaction and try again */ -- cgit v1.2.3 From 2cdc342c204dba69ca3b2ec43d8e6ff41ed920b8 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 27 May 2011 14:07:49 -0400 Subject: Btrfs: fix bitmap regression In cleaning up the clustering code I accidently introduced a regression by adding bitmap entries to the cluster rb tree. The problem is if we've maxed out the number of bitmaps we can have for the block group we can only add free space to the bitmaps, but since the bitmap is on the cluster we can't find it and we try to create another one. This would result in a panic because the total bitmaps was bigger than the max bitmaps that were allowed. This patch fixes this by checking to see if we have a cluster, and then looking at the cluster rb tree to see if it has a bitmap entry and if it does and that space belongs to that bitmap, go ahead and add it to that bitmap. I could hit this panic every time with an fs_mark test within a couple of minutes. With this patch I no longer hit the panic and fs_mark goes to completion. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/free-space-cache.c | 88 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 69 insertions(+), 19 deletions(-) diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index f56caacfd8ad..8258ccf85dbd 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -1417,6 +1417,23 @@ again: return 0; } +static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl, + struct btrfs_free_space *info, u64 offset, + u64 bytes) +{ + u64 bytes_to_set = 0; + u64 end; + + end = info->offset + (u64)(BITS_PER_BITMAP * ctl->unit); + + bytes_to_set = min(end - offset, bytes); + + bitmap_set_bits(ctl, info, offset, bytes_to_set); + + return bytes_to_set; + +} + static bool use_bitmap(struct btrfs_free_space_ctl *ctl, struct btrfs_free_space *info) { @@ -1453,12 +1470,18 @@ static bool use_bitmap(struct btrfs_free_space_ctl *ctl, return true; } +static struct btrfs_free_space_op free_space_op = { + .recalc_thresholds = recalculate_thresholds, + .use_bitmap = use_bitmap, +}; + static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl, struct btrfs_free_space *info) { struct btrfs_free_space *bitmap_info; + struct btrfs_block_group_cache *block_group = NULL; int added = 0; - u64 bytes, offset, end; + u64 bytes, offset, bytes_added; int ret; bytes = info->bytes; @@ -1467,6 +1490,47 @@ static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl, if (!ctl->op->use_bitmap(ctl, info)) return 0; + if (ctl->op == &free_space_op) + block_group = ctl->private; + + /* + * Since we link bitmaps right into the cluster we need to see if we + * have a cluster here, and if so and it has our bitmap we need to add + * the free space to that bitmap. + */ + if (block_group && !list_empty(&block_group->cluster_list)) { + struct btrfs_free_cluster *cluster; + struct rb_node *node; + struct btrfs_free_space *entry; + + cluster = list_entry(block_group->cluster_list.next, + struct btrfs_free_cluster, + block_group_list); + spin_lock(&cluster->lock); + node = rb_first(&cluster->root); + if (!node) { + spin_unlock(&cluster->lock); + goto again; + } + + entry = rb_entry(node, struct btrfs_free_space, offset_index); + if (!entry->bitmap) { + spin_unlock(&cluster->lock); + goto again; + } + + if (entry->offset == offset_to_bitmap(ctl, offset)) { + bytes_added = add_bytes_to_bitmap(ctl, entry, + offset, bytes); + bytes -= bytes_added; + offset += bytes_added; + } + spin_unlock(&cluster->lock); + if (!bytes) { + ret = 1; + goto out; + } + } again: bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 1, 0); @@ -1475,19 +1539,10 @@ again: goto new_bitmap; } - end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit); - - if (offset >= bitmap_info->offset && offset + bytes > end) { - bitmap_set_bits(ctl, bitmap_info, offset, end - offset); - bytes -= end - offset; - offset = end; - added = 0; - } else if (offset >= bitmap_info->offset && offset + bytes <= end) { - bitmap_set_bits(ctl, bitmap_info, offset, bytes); - bytes = 0; - } else { - BUG(); - } + bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes); + bytes -= bytes_added; + offset += bytes_added; + added = 0; if (!bytes) { ret = 1; @@ -1766,11 +1821,6 @@ void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, "\n", count); } -static struct btrfs_free_space_op free_space_op = { - .recalc_thresholds = recalculate_thresholds, - .use_bitmap = use_bitmap, -}; - void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group) { struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; -- cgit v1.2.3 From 723bda2083d44edbd6be0f0b09f902120dc07442 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 27 May 2011 16:11:38 -0400 Subject: Btrfs: fix the allocator loop logic I was testing with empty_cluster = 0 to try and reproduce a problem and kept hitting early enospc panics. This was because our loop logic was a little confused. So this is what I did 1) Make the loop variable the ultimate decider on wether we should loop again isntead of checking to see if we had an uncached bg, empty size or empty cluster. 2) Increment loop before checking to see what we are on to make the loop definitions make more sense. 3) If we are on the chunk alloc loop don't set empty_size/empty_cluster to 0 unless we didn't actually allocate a chunk. If we did allocate a chunk we should be able to easily setup a new cluster so clearing empty_size/empty_cluster makes us less efficient. This kept me from hitting panics while trying to reproduce the other problem. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/extent-tree.c | 48 +++++++++++++++++++++++++----------------------- 1 file changed, 25 insertions(+), 23 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 0d0a3fe77bb7..b42efc2ded51 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -5218,9 +5218,7 @@ loop: * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try * again */ - if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE && - (found_uncached_bg || empty_size || empty_cluster || - allowed_chunk_alloc)) { + if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE) { index = 0; if (loop == LOOP_FIND_IDEAL && found_uncached_bg) { found_uncached_bg = false; @@ -5260,32 +5258,36 @@ loop: goto search; } - if (loop < LOOP_CACHING_WAIT) { - loop++; - goto search; - } + loop++; if (loop == LOOP_ALLOC_CHUNK) { - empty_size = 0; - empty_cluster = 0; - } + if (allowed_chunk_alloc) { + ret = do_chunk_alloc(trans, root, num_bytes + + 2 * 1024 * 1024, data, + CHUNK_ALLOC_LIMITED); + allowed_chunk_alloc = 0; + if (ret == 1) + done_chunk_alloc = 1; + } else if (!done_chunk_alloc && + space_info->force_alloc == + CHUNK_ALLOC_NO_FORCE) { + space_info->force_alloc = CHUNK_ALLOC_LIMITED; + } - if (allowed_chunk_alloc) { - ret = do_chunk_alloc(trans, root, num_bytes + - 2 * 1024 * 1024, data, - CHUNK_ALLOC_LIMITED); - allowed_chunk_alloc = 0; - done_chunk_alloc = 1; - } else if (!done_chunk_alloc && - space_info->force_alloc == CHUNK_ALLOC_NO_FORCE) { - space_info->force_alloc = CHUNK_ALLOC_LIMITED; + /* + * We didn't allocate a chunk, go ahead and drop the + * empty size and loop again. + */ + if (!done_chunk_alloc) + loop = LOOP_NO_EMPTY_SIZE; } - if (loop < LOOP_NO_EMPTY_SIZE) { - loop++; - goto search; + if (loop == LOOP_NO_EMPTY_SIZE) { + empty_size = 0; + empty_cluster = 0; } - ret = -ENOSPC; + + goto search; } else if (!ins->objectid) { ret = -ENOSPC; } else if (ins->objectid) { -- cgit v1.2.3 From f6a398298d34af66ec3a2d82a44a4dbc5277357d Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 6 Jun 2011 10:50:35 -0400 Subject: Btrfs: fix duplicate checking logic When merging my code into the integration test the second check for duplicate entries got screwed up. This patch fixes it by dropping ret2 and just using ret for the return value, and checking if we got an error before adding the bitmap to the local list. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/free-space-cache.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 8258ccf85dbd..38f3fd923043 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -250,7 +250,7 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, pgoff_t index = 0; unsigned long first_page_offset; int num_checksums; - int ret = 0, ret2; + int ret = 0; INIT_LIST_HEAD(&bitmaps); @@ -421,11 +421,10 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, goto free_cache; } spin_lock(&ctl->tree_lock); - ret2 = link_free_space(ctl, e); + ret = link_free_space(ctl, e); ctl->total_bitmaps++; ctl->op->recalc_thresholds(ctl); spin_unlock(&ctl->tree_lock); - list_add_tail(&e->list, &bitmaps); if (ret) { printk(KERN_ERR "Duplicate entries in " "free space cache, dumping\n"); @@ -434,6 +433,7 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, page_cache_release(page); goto free_cache; } + list_add_tail(&e->list, &bitmaps); } num_entries--; -- cgit v1.2.3 From 25b8b936ed44814a5ce6fc3b2a21401f33cd56f6 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 8 Jun 2011 14:36:54 -0400 Subject: Btrfs: don't map extent buffer if path->skip_locking is set Arne's scrub stuff exposed a problem with mapping the extent buffer in reada_for_search. He searches the commit root with multiple threads and with skip_locking set, so we can race and overwrite node->map_token since node isn't locked. So fix this so that we only map the extent buffer if we don't already have a map_token and skip_locking isn't set. Without this patch scrub would panic almost immediately, with the patch it doesn't panic anymore. Thanks, Reported-by: Arne Jansen Signed-off-by: Josef Bacik --- fs/btrfs/ctree.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index d84089349c82..2e667868e0d2 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1228,6 +1228,7 @@ static void reada_for_search(struct btrfs_root *root, u32 nr; u32 blocksize; u32 nscan = 0; + bool map = true; if (level != 1) return; @@ -1249,8 +1250,11 @@ static void reada_for_search(struct btrfs_root *root, nritems = btrfs_header_nritems(node); nr = slot; + if (node->map_token || path->skip_locking) + map = false; + while (1) { - if (!node->map_token) { + if (map && !node->map_token) { unsigned long offset = btrfs_node_key_ptr_offset(nr); map_private_extent_buffer(node, offset, sizeof(struct btrfs_key_ptr), @@ -1277,7 +1281,7 @@ static void reada_for_search(struct btrfs_root *root, if ((search <= target && target - search <= 65536) || (search > target && search - target <= 65536)) { gen = btrfs_node_ptr_generation(node, nr); - if (node->map_token) { + if (map && node->map_token) { unmap_extent_buffer(node, node->map_token, KM_USER1); node->map_token = NULL; @@ -1289,7 +1293,7 @@ static void reada_for_search(struct btrfs_root *root, if ((nread > 65536 || nscan > 32)) break; } - if (node->map_token) { + if (map && node->map_token) { unmap_extent_buffer(node, node->map_token, KM_USER1); node->map_token = NULL; } -- cgit v1.2.3 From 3473f3c06a36865ae05993041fff35ee928342a7 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 9 Jun 2011 10:15:17 -0400 Subject: Btrfs: unlock the trans lock properly In btrfs_wait_for_commit if we came upon a transaction that had committed we just exited, but that's bad since we are holding the trans_lock. So break instead so that the lock is dropped. Thanks, Reported-by: David Sterba Signed-off-by: Josef Bacik --- fs/btrfs/transaction.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index dd719662340e..6b2e4786d189 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -349,7 +349,7 @@ int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid) list) { if (t->in_commit) { if (t->commit_done) - goto out; + break; cur_trans = t; atomic_inc(&cur_trans->use_count); break; -- cgit v1.2.3 From ad3e34bba4b64ab8e1f5ea1a17768e1a0d9648ea Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 8 Jun 2011 14:45:50 -0400 Subject: Btrfs: don't map extent buffer if path->skip_locking is set Arne's scrub stuff exposed a problem with mapping the extent buffer in reada_for_search. He searches the commit root with multiple threads and with skip_locking set, so we can race and overwrite node->map_token since node isn't locked. So fix this so that we only map the extent buffer if we don't already have a map_token and skip_locking isn't set. Without this patch scrub would panic almost immediately, with the patch it doesn't panic anymore. Thanks, Reported-by: Arne Jansen Signed-off-by: Josef Bacik --- fs/btrfs/ctree.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index d84089349c82..2e667868e0d2 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1228,6 +1228,7 @@ static void reada_for_search(struct btrfs_root *root, u32 nr; u32 blocksize; u32 nscan = 0; + bool map = true; if (level != 1) return; @@ -1249,8 +1250,11 @@ static void reada_for_search(struct btrfs_root *root, nritems = btrfs_header_nritems(node); nr = slot; + if (node->map_token || path->skip_locking) + map = false; + while (1) { - if (!node->map_token) { + if (map && !node->map_token) { unsigned long offset = btrfs_node_key_ptr_offset(nr); map_private_extent_buffer(node, offset, sizeof(struct btrfs_key_ptr), @@ -1277,7 +1281,7 @@ static void reada_for_search(struct btrfs_root *root, if ((search <= target && target - search <= 65536) || (search > target && search - target <= 65536)) { gen = btrfs_node_ptr_generation(node, nr); - if (node->map_token) { + if (map && node->map_token) { unmap_extent_buffer(node, node->map_token, KM_USER1); node->map_token = NULL; @@ -1289,7 +1293,7 @@ static void reada_for_search(struct btrfs_root *root, if ((nread > 65536 || nscan > 32)) break; } - if (node->map_token) { + if (map && node->map_token) { unmap_extent_buffer(node, node->map_token, KM_USER1); node->map_token = NULL; } -- cgit v1.2.3 From 8c51032f978bac5bec5dae0c5de4f85db97c1cc9 Mon Sep 17 00:00:00 2001 From: Arne Jansen Date: Fri, 3 Jun 2011 10:09:26 +0200 Subject: btrfs: scrub: errors in tree enumeration due to the semantics of btrfs_search_slot the path can point to an invalid slot when ret > 0. This condition went unnoticed, which in turn could have led to an incomplete scrubbing. Signed-off-by: Arne Jansen --- fs/btrfs/scrub.c | 57 +++++++++++++++++++++++++++++++++----------------------- 1 file changed, 34 insertions(+), 23 deletions(-) diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index df50fd1eca8f..d5a4108cedaf 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -804,18 +804,12 @@ static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev, ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) - goto out; - - l = path->nodes[0]; - slot = path->slots[0]; - btrfs_item_key_to_cpu(l, &key, slot); - if (key.objectid != logical) { - ret = btrfs_previous_item(root, path, 0, - BTRFS_EXTENT_ITEM_KEY); - if (ret < 0) - goto out; - } + goto out_noplug; + /* + * we might miss half an extent here, but that doesn't matter, + * as it's only the prefetch + */ while (1) { l = path->nodes[0]; slot = path->slots[0]; @@ -824,7 +818,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev, if (ret == 0) continue; if (ret < 0) - goto out; + goto out_noplug; break; } @@ -906,15 +900,20 @@ again: ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) goto out; - - l = path->nodes[0]; - slot = path->slots[0]; - btrfs_item_key_to_cpu(l, &key, slot); - if (key.objectid != logical) { + if (ret > 0) { ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY); if (ret < 0) goto out; + if (ret > 0) { + /* there's no smaller item, so stick with the + * larger one */ + btrfs_release_path(path); + ret = btrfs_search_slot(NULL, root, &key, + path, 0, 0); + if (ret < 0) + goto out; + } } while (1) { @@ -989,6 +988,7 @@ next: out: blk_finish_plug(&plug); +out_noplug: btrfs_free_path(path); return ret < 0 ? ret : 0; } @@ -1064,8 +1064,15 @@ int scrub_enumerate_chunks(struct scrub_dev *sdev, u64 start, u64 end) while (1) { ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) - goto out; - ret = 0; + break; + if (ret > 0) { + if (path->slots[0] >= + btrfs_header_nritems(path->nodes[0])) { + ret = btrfs_next_leaf(root, path); + if (ret) + break; + } + } l = path->nodes[0]; slot = path->slots[0]; @@ -1075,7 +1082,7 @@ int scrub_enumerate_chunks(struct scrub_dev *sdev, u64 start, u64 end) if (found_key.objectid != sdev->dev->devid) break; - if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY) + if (btrfs_key_type(&found_key) != BTRFS_DEV_EXTENT_KEY) break; if (found_key.offset >= end) @@ -1104,7 +1111,7 @@ int scrub_enumerate_chunks(struct scrub_dev *sdev, u64 start, u64 end) cache = btrfs_lookup_block_group(fs_info, chunk_offset); if (!cache) { ret = -ENOENT; - goto out; + break; } ret = scrub_chunk(sdev, chunk_tree, chunk_objectid, chunk_offset, length); @@ -1116,9 +1123,13 @@ int scrub_enumerate_chunks(struct scrub_dev *sdev, u64 start, u64 end) btrfs_release_path(path); } -out: btrfs_free_path(path); - return ret; + + /* + * ret can still be 1 from search_slot or next_leaf, + * that's not an error + */ + return ret < 0 ? ret : 0; } static noinline_for_stack int scrub_supers(struct scrub_dev *sdev) -- cgit v1.2.3 From 632dd772fcbde2ba37c0e8983bd38ef4a1eac906 Mon Sep 17 00:00:00 2001 From: Arne Jansen Date: Fri, 10 Jun 2011 12:07:07 +0200 Subject: btrfs: reinitialize scrub workers Scrub starts the workers each time a scrub starts and stops them after it finished. This patch adds an initialization for the workers before each start, otherwise the workers behave strangely. Signed-off-by: Arne Jansen --- fs/btrfs/disk-io.c | 2 -- fs/btrfs/scrub.c | 6 +++++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index a203d363184d..7bbbfebe47e4 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1668,8 +1668,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, init_waitqueue_head(&fs_info->scrub_pause_wait); init_rwsem(&fs_info->scrub_super_lock); fs_info->scrub_workers_refcnt = 0; - btrfs_init_workers(&fs_info->scrub_workers, "scrub", - fs_info->thread_pool_size, &fs_info->generic_worker); sb->s_blocksize = 4096; sb->s_blocksize_bits = blksize_bits(4096); diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index d5a4108cedaf..92cac19388ed 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -1166,8 +1166,12 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_root *root) struct btrfs_fs_info *fs_info = root->fs_info; mutex_lock(&fs_info->scrub_lock); - if (fs_info->scrub_workers_refcnt == 0) + if (fs_info->scrub_workers_refcnt == 0) { + btrfs_init_workers(&fs_info->scrub_workers, "scrub", + fs_info->thread_pool_size, &fs_info->generic_worker); + fs_info->scrub_workers.idle_thresh = 4; btrfs_start_workers(&fs_info->scrub_workers, 1); + } ++fs_info->scrub_workers_refcnt; mutex_unlock(&fs_info->scrub_lock); -- cgit v1.2.3 From 6eef3125886df260ca0e8758d141308152226f6a Mon Sep 17 00:00:00 2001 From: Arne Jansen Date: Fri, 10 Jun 2011 13:04:58 +0200 Subject: btrfs: remove unneeded includes from scrub.c Signed-off-by: Arne Jansen --- fs/btrfs/scrub.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 92cac19388ed..a8d03d5efb5d 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -16,13 +16,7 @@ * Boston, MA 021110-1307, USA. */ -#include -#include -#include #include -#include -#include -#include #include "ctree.h" #include "volumes.h" #include "disk-io.h" -- cgit v1.2.3 From 38e87880666091fe9c572a7a2ed2e771d97ca5aa Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 10 Jun 2011 16:36:57 -0400 Subject: Btrfs: make sure to recheck for bitmaps in clusters Josef recently changed the free extent cache to look in the block group cluster for any bitmaps before trying to add a new bitmap for the same offset. This avoids BUG_ON()s due covering duplicate ranges. But it didn't go quite far enough. A given free range might span between one or more bitmaps or free space entries. The code has looping to cover this, but it doesn't check for clustered bitmaps every time. This shuffles our gotos to check for a bitmap in the cluster for every new bitmap entry we try to add. Signed-off-by: Chris Mason --- fs/btrfs/free-space-cache.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 38f3fd923043..9f985a429877 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -1492,7 +1492,7 @@ static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl, if (ctl->op == &free_space_op) block_group = ctl->private; - +again: /* * Since we link bitmaps right into the cluster we need to see if we * have a cluster here, and if so and it has our bitmap we need to add @@ -1510,13 +1510,13 @@ static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl, node = rb_first(&cluster->root); if (!node) { spin_unlock(&cluster->lock); - goto again; + goto no_cluster_bitmap; } entry = rb_entry(node, struct btrfs_free_space, offset_index); if (!entry->bitmap) { spin_unlock(&cluster->lock); - goto again; + goto no_cluster_bitmap; } if (entry->offset == offset_to_bitmap(ctl, offset)) { @@ -1531,7 +1531,8 @@ static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl, goto out; } } -again: + +no_cluster_bitmap: bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 1, 0); if (!bitmap_info) { -- cgit v1.2.3 From 38e880540f983045da7a00fbc50daad238207fc5 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 10 Jun 2011 18:43:13 +0000 Subject: Btrfs: clear current->journal_info on async transaction commit Normally current->jouranl_info is cleared by commit_transaction. For an async snap or subvol creation, though, it runs in a work queue. Clear it in btrfs_commit_transaction_async() to avoid leaking a non-NULL journal_info when we return to userspace. When the actual commit runs in the other thread it won't care that it's current->journal_info is already NULL. Signed-off-by: Sage Weil Tested-by: Jim Schutt Signed-off-by: Chris Mason --- fs/btrfs/transaction.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 6b2e4786d189..2b3590b9fe98 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1118,8 +1118,11 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans, wait_current_trans_commit_start_and_unblock(root, cur_trans); else wait_current_trans_commit_start(root, cur_trans); - put_transaction(cur_trans); + if (current->journal_info == trans) + current->journal_info = NULL; + + put_transaction(cur_trans); return 0; } -- cgit v1.2.3 From 9eb9104c665aae2401a1723c044669eb10240072 Mon Sep 17 00:00:00 2001 From: richard kennedy Date: Tue, 7 Jun 2011 10:46:32 +0000 Subject: btrfs: remove 64bit alignment padding to allow extent_buffer to fit into one fewer cacheline Reorder extent_buffer to remove 8 bytes of alignment padding on 64 bit builds. This shrinks its size to 128 bytes allowing it to fit into one fewer cache lines and allows more objects per slab in its kmem_cache. slabinfo extent_buffer reports :- before:- Sizes (bytes) Slabs ---------------------------------- Object : 136 Total : 123 SlabObj: 136 Full : 121 SlabSiz: 4096 Partial: 0 Loss : 0 CpuSlab: 2 Align : 8 Objects: 30 after :- Object : 128 Total : 4 SlabObj: 128 Full : 2 SlabSiz: 4096 Partial: 0 Loss : 0 CpuSlab: 2 Align : 8 Objects: 32 Signed-off-by: Richard Kennedy Signed-off-by: Chris Mason --- fs/btrfs/extent_io.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 4e8445a4757c..a11a92ee2d30 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -126,9 +126,9 @@ struct extent_buffer { unsigned long map_len; struct page *first_page; unsigned long bflags; - atomic_t refs; struct list_head leak_list; struct rcu_head rcu_head; + atomic_t refs; /* the spinlock is used to protect most operations */ spinlock_t lock; -- cgit v1.2.3 From 027ed2f0044e95a97ed34db2d55a9ca95ba84385 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 8 Jun 2011 08:27:56 +0000 Subject: Btrfs: avoid stack bloat in btrfs_ioctl_fs_info() The size of struct btrfs_ioctl_fs_info_args is as big as 1KB, so don't declare the variable on stack. Signed-off-by: Li Zefan Reviewed-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index ac37040e426a..b793d112d1f6 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2054,29 +2054,34 @@ static long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg) static long btrfs_ioctl_fs_info(struct btrfs_root *root, void __user *arg) { - struct btrfs_ioctl_fs_info_args fi_args; + struct btrfs_ioctl_fs_info_args *fi_args; struct btrfs_device *device; struct btrfs_device *next; struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; + int ret = 0; if (!capable(CAP_SYS_ADMIN)) return -EPERM; - fi_args.num_devices = fs_devices->num_devices; - fi_args.max_id = 0; - memcpy(&fi_args.fsid, root->fs_info->fsid, sizeof(fi_args.fsid)); + fi_args = kzalloc(sizeof(*fi_args), GFP_KERNEL); + if (!fi_args) + return -ENOMEM; + + fi_args->num_devices = fs_devices->num_devices; + memcpy(&fi_args->fsid, root->fs_info->fsid, sizeof(fi_args->fsid)); mutex_lock(&fs_devices->device_list_mutex); list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) { - if (device->devid > fi_args.max_id) - fi_args.max_id = device->devid; + if (device->devid > fi_args->max_id) + fi_args->max_id = device->devid; } mutex_unlock(&fs_devices->device_list_mutex); - if (copy_to_user(arg, &fi_args, sizeof(fi_args))) - return -EFAULT; + if (copy_to_user(arg, fi_args, sizeof(*fi_args))) + ret = -EFAULT; - return 0; + kfree(fi_args); + return ret; } static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg) -- cgit v1.2.3 From 5be76758f35ec6578e5b9b150aa513ac26bd9c54 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Thu, 9 Jun 2011 10:02:51 +0000 Subject: btrfs: fix unlocked access of delalloc_inodes list_splice_init will make delalloc_inodes empty, but without a spinlock around, this may produce corrupted list head, accessed in many placess, The race window is very tight and nobody seems to have hit it so far. Signed-off-by: David Sterba Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index a203d363184d..33b744a5ac03 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2911,9 +2911,8 @@ static int btrfs_destroy_delalloc_inodes(struct btrfs_root *root) INIT_LIST_HEAD(&splice); - list_splice_init(&root->fs_info->delalloc_inodes, &splice); - spin_lock(&root->fs_info->delalloc_lock); + list_splice_init(&root->fs_info->delalloc_inodes, &splice); while (!list_empty(&splice)) { btrfs_inode = list_entry(splice.next, struct btrfs_inode, -- cgit v1.2.3 From 08d2f347e877e489ca098c87a6fd2e872fef9767 Mon Sep 17 00:00:00 2001 From: Jan Schmidt Date: Wed, 4 May 2011 16:18:50 +0200 Subject: Btrfs: fix extent state leak on failed nodatasum reads When encountering an EIO while reading from a nodatasum extent, we insert an error record into the inode's failure tree. btrfs_readpage_end_io_hook returns early for nodatasum inodes. We'd better clear the failure tree in that case, otherwise the kernel complains about BUG extent_state: Objects remaining on kmem_cache_close() on rmmod. Signed-off-by: Jan Schmidt Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 02ff4a1b968b..113913ae36e0 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1986,7 +1986,7 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end, } if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) - return 0; + goto good; if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID && test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1, NULL)) { -- cgit v1.2.3 From 22b63a2971c5657dfc1bf4514f9410fc90c8b2c2 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 9 Feb 2011 16:05:31 +0200 Subject: Btrfs - use %pU to print fsid Get rid of FIXME comment. Uuids from dmesg are now the same as uuids given by btrfs-progs. Signed-off-by: Ilya Dryomov Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index da541dfca2e3..1efa56e18f9b 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -689,12 +689,8 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, transid = btrfs_super_generation(disk_super); if (disk_super->label[0]) printk(KERN_INFO "device label %s ", disk_super->label); - else { - /* FIXME, make a readl uuid parser */ - printk(KERN_INFO "device fsid %llx-%llx ", - *(unsigned long long *)disk_super->fsid, - *(unsigned long long *)(disk_super->fsid + 8)); - } + else + printk(KERN_INFO "device fsid %pU ", disk_super->fsid); printk(KERN_CONT "devid %llu transid %llu %s\n", (unsigned long long)devid, (unsigned long long)transid, path); ret = device_list_add(path, disk_super, devid, fs_devices_ret); -- cgit v1.2.3 From 30b4caf5d73af5c99cf1b2b46496d8bc35330992 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 8 Jun 2011 03:56:44 +0000 Subject: Btrfs: use join_transaction in btrfs_evict_inode() The WARN_ON() in start_transaction() was triggered while balancing. The cause is btrfs_relocate_chunk() started a transaction and then called iput() on the inode that stores free space cache, and iput() called btrfs_start_transaction() again. Reported-by: Tsutomu Itoh Signed-off-by: Li Zefan Reviewed-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 113913ae36e0..c15636b17874 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3646,7 +3646,7 @@ void btrfs_evict_inode(struct inode *inode) btrfs_i_size_write(inode, 0); while (1) { - trans = btrfs_start_transaction(root, 0); + trans = btrfs_join_transaction(root); BUG_ON(IS_ERR(trans)); trans->block_rsv = root->orphan_block_rsv; -- cgit v1.2.3