summaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent-tree.c
diff options
context:
space:
mode:
authorJohannes Berg <johannes.berg@intel.com>2013-12-16 11:23:45 +0100
committerJohannes Berg <johannes.berg@intel.com>2013-12-16 11:23:45 +0100
commitc4de673b775e4db48cd2db6277e0c6714332ca0c (patch)
tree84f9e4728e6ccf257236d2ba063b6e784ec8b65d /fs/btrfs/extent-tree.c
parentmac80211: fix nested sdata lock for IBSS/CSA (diff)
parentath9k: initialize retry chain flags in tx99 code (diff)
downloadlinux-c4de673b775e4db48cd2db6277e0c6714332ca0c.tar.xz
linux-c4de673b775e4db48cd2db6277e0c6714332ca0c.zip
Merge remote-tracking branch 'wireless-next/master' into mac80211-next
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r--fs/btrfs/extent-tree.c225
1 files changed, 145 insertions, 80 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index cfb3cf711b34..45d98d01028f 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -25,7 +25,6 @@
#include <linux/slab.h>
#include <linux/ratelimit.h>
#include <linux/percpu_counter.h>
-#include "compat.h"
#include "hash.h"
#include "ctree.h"
#include "disk-io.h"
@@ -1551,9 +1550,8 @@ again:
if (ret && !insert) {
err = -ENOENT;
goto out;
- } else if (ret) {
+ } else if (WARN_ON(ret)) {
err = -EIO;
- WARN_ON(1);
goto out;
}
@@ -1979,7 +1977,6 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
struct btrfs_extent_item *item;
u64 refs;
int ret;
- int err = 0;
path = btrfs_alloc_path();
if (!path)
@@ -1992,13 +1989,8 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
path, bytenr, num_bytes, parent,
root_objectid, owner, offset,
refs_to_add, extent_op);
- if (ret == 0)
- goto out;
-
- if (ret != -EAGAIN) {
- err = ret;
+ if (ret != -EAGAIN)
goto out;
- }
leaf = path->nodes[0];
item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
@@ -2021,7 +2013,7 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
btrfs_abort_transaction(trans, root, ret);
out:
btrfs_free_path(path);
- return err;
+ return ret;
}
static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
@@ -2137,15 +2129,28 @@ again:
}
if (ret > 0) {
if (metadata) {
- btrfs_release_path(path);
- metadata = 0;
+ if (path->slots[0] > 0) {
+ path->slots[0]--;
+ btrfs_item_key_to_cpu(path->nodes[0], &key,
+ path->slots[0]);
+ if (key.objectid == node->bytenr &&
+ key.type == BTRFS_EXTENT_ITEM_KEY &&
+ key.offset == node->num_bytes)
+ ret = 0;
+ }
+ if (ret > 0) {
+ btrfs_release_path(path);
+ metadata = 0;
- key.offset = node->num_bytes;
- key.type = BTRFS_EXTENT_ITEM_KEY;
- goto again;
+ key.objectid = node->bytenr;
+ key.offset = node->num_bytes;
+ key.type = BTRFS_EXTENT_ITEM_KEY;
+ goto again;
+ }
+ } else {
+ err = -EIO;
+ goto out;
}
- err = -EIO;
- goto out;
}
leaf = path->nodes[0];
@@ -2234,8 +2239,12 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
{
int ret = 0;
- if (trans->aborted)
+ if (trans->aborted) {
+ if (insert_reserved)
+ btrfs_pin_extent(root, node->bytenr,
+ node->num_bytes, 1);
return 0;
+ }
if (btrfs_delayed_ref_is_head(node)) {
struct btrfs_delayed_ref_head *head;
@@ -2411,6 +2420,14 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
btrfs_free_delayed_extent_op(extent_op);
if (ret) {
+ /*
+ * Need to reset must_insert_reserved if
+ * there was an error so the abort stuff
+ * can cleanup the reserved space
+ * properly.
+ */
+ if (must_insert_reserved)
+ locked_ref->must_insert_reserved = 1;
btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret);
spin_lock(&delayed_refs->lock);
btrfs_delayed_ref_unlock(locked_ref);
@@ -3197,8 +3214,7 @@ again:
if (ret)
goto out_put;
- ret = btrfs_truncate_free_space_cache(root, trans, path,
- inode);
+ ret = btrfs_truncate_free_space_cache(root, trans, inode);
if (ret)
goto out_put;
}
@@ -3318,10 +3334,9 @@ again:
last = cache->key.objectid + cache->key.offset;
err = write_one_cache_group(trans, root, path, cache);
+ btrfs_put_block_group(cache);
if (err) /* File system offline */
goto out;
-
- btrfs_put_block_group(cache);
}
while (1) {
@@ -3605,10 +3620,9 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes)
/* make sure bytes are sectorsize aligned */
bytes = ALIGN(bytes, root->sectorsize);
- if (root == root->fs_info->tree_root ||
- BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID) {
- alloc_chunk = 0;
+ if (btrfs_is_free_space_inode(inode)) {
committed = 1;
+ ASSERT(current->journal_info);
}
data_sinfo = fs_info->data_sinfo;
@@ -3636,6 +3650,16 @@ again:
spin_unlock(&data_sinfo->lock);
alloc:
alloc_target = btrfs_get_alloc_profile(root, 1);
+ /*
+ * It is ugly that we don't call nolock join
+ * transaction for the free space inode case here.
+ * But it is safe because we only do the data space
+ * reservation for the free space cache in the
+ * transaction context, the common join transaction
+ * just increase the counter of the current transaction
+ * handler, doesn't try to acquire the trans_lock of
+ * the fs.
+ */
trans = btrfs_join_transaction(root);
if (IS_ERR(trans))
return PTR_ERR(trans);
@@ -3681,6 +3705,9 @@ commit_trans:
goto again;
}
+ trace_btrfs_space_reservation(root->fs_info,
+ "space_info:enospc",
+ data_sinfo->flags, bytes, 1);
return -ENOSPC;
}
data_sinfo->bytes_may_use += bytes;
@@ -3925,7 +3952,6 @@ static int can_overcommit(struct btrfs_root *root,
u64 space_size;
u64 avail;
u64 used;
- u64 to_add;
used = space_info->bytes_used + space_info->bytes_reserved +
space_info->bytes_pinned + space_info->bytes_readonly;
@@ -3959,25 +3985,17 @@ static int can_overcommit(struct btrfs_root *root,
BTRFS_BLOCK_GROUP_RAID10))
avail >>= 1;
- to_add = space_info->total_bytes;
-
/*
* If we aren't flushing all things, let us overcommit up to
* 1/2th of the space. If we can flush, don't let us overcommit
* too much, let it overcommit up to 1/8 of the space.
*/
if (flush == BTRFS_RESERVE_FLUSH_ALL)
- to_add >>= 3;
+ avail >>= 3;
else
- to_add >>= 1;
-
- /*
- * Limit the overcommit to the amount of free space we could possibly
- * allocate for chunks.
- */
- to_add = min(avail, to_add);
+ avail >>= 1;
- if (used + bytes < space_info->total_bytes + to_add)
+ if (used + bytes < space_info->total_bytes + avail)
return 1;
return 0;
}
@@ -3998,12 +4016,26 @@ static void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root,
* the filesystem is readonly(all dirty pages are written to
* the disk).
*/
- btrfs_start_all_delalloc_inodes(root->fs_info, 0);
+ btrfs_start_delalloc_roots(root->fs_info, 0);
if (!current->journal_info)
- btrfs_wait_all_ordered_extents(root->fs_info, 0);
+ btrfs_wait_ordered_roots(root->fs_info, -1);
}
}
+static inline int calc_reclaim_items_nr(struct btrfs_root *root, u64 to_reclaim)
+{
+ u64 bytes;
+ int nr;
+
+ bytes = btrfs_calc_trans_metadata_size(root, 1);
+ nr = (int)div64_u64(to_reclaim, bytes);
+ if (!nr)
+ nr = 1;
+ return nr;
+}
+
+#define EXTENT_SIZE_PER_ITEM (256 * 1024)
+
/*
* shrink metadata reservation for delalloc
*/
@@ -4016,24 +4048,30 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
u64 delalloc_bytes;
u64 max_reclaim;
long time_left;
- unsigned long nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT;
- int loops = 0;
+ unsigned long nr_pages;
+ int loops;
+ int items;
enum btrfs_reserve_flush_enum flush;
+ /* Calc the number of the pages we need flush for space reservation */
+ items = calc_reclaim_items_nr(root, to_reclaim);
+ to_reclaim = items * EXTENT_SIZE_PER_ITEM;
+
trans = (struct btrfs_trans_handle *)current->journal_info;
block_rsv = &root->fs_info->delalloc_block_rsv;
space_info = block_rsv->space_info;
- smp_mb();
delalloc_bytes = percpu_counter_sum_positive(
&root->fs_info->delalloc_bytes);
if (delalloc_bytes == 0) {
if (trans)
return;
- btrfs_wait_all_ordered_extents(root->fs_info, 0);
+ if (wait_ordered)
+ btrfs_wait_ordered_roots(root->fs_info, items);
return;
}
+ loops = 0;
while (delalloc_bytes && loops < 3) {
max_reclaim = min(delalloc_bytes, to_reclaim);
nr_pages = max_reclaim >> PAGE_CACHE_SHIFT;
@@ -4042,9 +4080,19 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
* We need to wait for the async pages to actually start before
* we do anything.
*/
- wait_event(root->fs_info->async_submit_wait,
- !atomic_read(&root->fs_info->async_delalloc_pages));
+ max_reclaim = atomic_read(&root->fs_info->async_delalloc_pages);
+ if (!max_reclaim)
+ goto skip_async;
+ if (max_reclaim <= nr_pages)
+ max_reclaim = 0;
+ else
+ max_reclaim -= nr_pages;
+
+ wait_event(root->fs_info->async_submit_wait,
+ atomic_read(&root->fs_info->async_delalloc_pages) <=
+ (int)max_reclaim);
+skip_async:
if (!trans)
flush = BTRFS_RESERVE_FLUSH_ALL;
else
@@ -4058,13 +4106,12 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
loops++;
if (wait_ordered && !trans) {
- btrfs_wait_all_ordered_extents(root->fs_info, 0);
+ btrfs_wait_ordered_roots(root->fs_info, items);
} else {
time_left = schedule_timeout_killable(1);
if (time_left)
break;
}
- smp_mb();
delalloc_bytes = percpu_counter_sum_positive(
&root->fs_info->delalloc_bytes);
}
@@ -4149,16 +4196,11 @@ static int flush_space(struct btrfs_root *root,
switch (state) {
case FLUSH_DELAYED_ITEMS_NR:
case FLUSH_DELAYED_ITEMS:
- if (state == FLUSH_DELAYED_ITEMS_NR) {
- u64 bytes = btrfs_calc_trans_metadata_size(root, 1);
-
- nr = (int)div64_u64(num_bytes, bytes);
- if (!nr)
- nr = 1;
- nr *= 2;
- } else {
+ if (state == FLUSH_DELAYED_ITEMS_NR)
+ nr = calc_reclaim_items_nr(root, num_bytes) * 2;
+ else
nr = -1;
- }
+
trans = btrfs_join_transaction(root);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
@@ -4341,6 +4383,10 @@ out:
!block_rsv_use_bytes(global_rsv, orig_bytes))
ret = 0;
}
+ if (ret == -ENOSPC)
+ trace_btrfs_space_reservation(root->fs_info,
+ "space_info:enospc",
+ space_info->flags, orig_bytes, 1);
if (flushing) {
spin_lock(&space_info->lock);
space_info->flush = 0;
@@ -4465,7 +4511,6 @@ static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
space_info->bytes_may_use -= num_bytes;
trace_btrfs_space_reservation(fs_info, "space_info",
space_info->flags, num_bytes, 0);
- space_info->reservation_progress++;
spin_unlock(&space_info->lock);
}
}
@@ -4666,7 +4711,6 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
sinfo->bytes_may_use -= num_bytes;
trace_btrfs_space_reservation(fs_info, "space_info",
sinfo->flags, num_bytes, 0);
- sinfo->reservation_progress++;
block_rsv->reserved = block_rsv->size;
block_rsv->full = 1;
}
@@ -4997,7 +5041,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
mutex_unlock(&BTRFS_I(inode)->delalloc_mutex);
if (to_reserve)
- trace_btrfs_space_reservation(root->fs_info,"delalloc",
+ trace_btrfs_space_reservation(root->fs_info, "delalloc",
btrfs_ino(inode), to_reserve, 1);
block_rsv_add_bytes(block_rsv, to_reserve, 1);
@@ -5275,6 +5319,8 @@ static int pin_down_extent(struct btrfs_root *root,
set_extent_dirty(root->fs_info->pinned_extents, bytenr,
bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL);
+ if (reserved)
+ trace_btrfs_reserved_extent_free(root, bytenr, num_bytes);
return 0;
}
@@ -5446,7 +5492,6 @@ static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
space_info->bytes_readonly += num_bytes;
cache->reserved -= num_bytes;
space_info->bytes_reserved -= num_bytes;
- space_info->reservation_progress++;
}
spin_unlock(&cache->lock);
spin_unlock(&space_info->lock);
@@ -5730,9 +5775,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
}
extent_slot = path->slots[0];
}
- } else if (ret == -ENOENT) {
+ } else if (WARN_ON(ret == -ENOENT)) {
btrfs_print_leaf(extent_root, path->nodes[0]);
- WARN_ON(1);
btrfs_err(info,
"unable to find ref byte nr %llu parent %llu root %llu owner %llu offset %llu",
bytenr, parent, root_objectid, owner_objectid,
@@ -5979,6 +6023,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
btrfs_add_free_space(cache, buf->start, buf->len);
btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE);
+ trace_btrfs_reserved_extent_free(root, buf->start, buf->len);
pin = 0;
}
out:
@@ -6117,10 +6162,13 @@ enum btrfs_loop_type {
/*
* walks the btree of allocated extents and find a hole of a given size.
* The key ins is changed to record the hole:
- * ins->objectid == block start
+ * ins->objectid == start position
* ins->flags = BTRFS_EXTENT_ITEM_KEY
- * ins->offset == number of blocks
+ * ins->offset == the size of the hole.
* Any available blocks before search_start are skipped.
+ *
+ * If there is no suitable free space, we will record the max size of
+ * the free space extent currently.
*/
static noinline int find_free_extent(struct btrfs_root *orig_root,
u64 num_bytes, u64 empty_size,
@@ -6133,6 +6181,7 @@ static noinline int find_free_extent(struct btrfs_root *orig_root,
struct btrfs_block_group_cache *block_group = NULL;
struct btrfs_block_group_cache *used_block_group;
u64 search_start = 0;
+ u64 max_extent_size = 0;
int empty_cluster = 2 * 1024 * 1024;
struct btrfs_space_info *space_info;
int loop = 0;
@@ -6292,7 +6341,10 @@ have_block_group:
btrfs_get_block_group(used_block_group);
offset = btrfs_alloc_from_cluster(used_block_group,
- last_ptr, num_bytes, used_block_group->key.objectid);
+ last_ptr,
+ num_bytes,
+ used_block_group->key.objectid,
+ &max_extent_size);
if (offset) {
/* we have a block, we're done */
spin_unlock(&last_ptr->refill_lock);
@@ -6355,8 +6407,10 @@ refill_cluster:
* cluster
*/
offset = btrfs_alloc_from_cluster(block_group,
- last_ptr, num_bytes,
- search_start);
+ last_ptr,
+ num_bytes,
+ search_start,
+ &max_extent_size);
if (offset) {
/* we found one, proceed */
spin_unlock(&last_ptr->refill_lock);
@@ -6391,13 +6445,18 @@ unclustered_alloc:
if (cached &&
block_group->free_space_ctl->free_space <
num_bytes + empty_cluster + empty_size) {
+ if (block_group->free_space_ctl->free_space >
+ max_extent_size)
+ max_extent_size =
+ block_group->free_space_ctl->free_space;
spin_unlock(&block_group->free_space_ctl->tree_lock);
goto loop;
}
spin_unlock(&block_group->free_space_ctl->tree_lock);
offset = btrfs_find_space_for_alloc(block_group, search_start,
- num_bytes, empty_size);
+ num_bytes, empty_size,
+ &max_extent_size);
/*
* If we didn't find a chunk, and we haven't failed on this
* block group before, and this block group is in the middle of
@@ -6515,7 +6574,8 @@ loop:
ret = 0;
}
out:
-
+ if (ret == -ENOSPC)
+ ins->offset = max_extent_size;
return ret;
}
@@ -6573,8 +6633,8 @@ again:
flags);
if (ret == -ENOSPC) {
- if (!final_tried) {
- num_bytes = num_bytes >> 1;
+ if (!final_tried && ins->offset) {
+ num_bytes = min(num_bytes >> 1, ins->offset);
num_bytes = round_down(num_bytes, root->sectorsize);
num_bytes = max(num_bytes, min_alloc_size);
if (num_bytes == min_alloc_size)
@@ -6591,8 +6651,6 @@ again:
}
}
- trace_btrfs_reserved_extent_alloc(root, ins->objectid, ins->offset);
-
return ret;
}
@@ -6704,6 +6762,7 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
ins->objectid, ins->offset);
BUG();
}
+ trace_btrfs_reserved_extent_alloc(root, ins->objectid, ins->offset);
return ret;
}
@@ -6728,13 +6787,18 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
size += sizeof(*block_info);
path = btrfs_alloc_path();
- if (!path)
+ if (!path) {
+ btrfs_free_and_pin_reserved_extent(root, ins->objectid,
+ root->leafsize);
return -ENOMEM;
+ }
path->leave_spinning = 1;
ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
ins, size);
if (ret) {
+ btrfs_free_and_pin_reserved_extent(root, ins->objectid,
+ root->leafsize);
btrfs_free_path(path);
return ret;
}
@@ -6776,6 +6840,8 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
ins->objectid, ins->offset);
BUG();
}
+
+ trace_btrfs_reserved_extent_alloc(root, ins->objectid, root->leafsize);
return ret;
}
@@ -7980,7 +8046,7 @@ u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo)
spin_lock(&sinfo->lock);
- for(i = 0; i < BTRFS_NR_RAID_TYPES; i++)
+ for (i = 0; i < BTRFS_NR_RAID_TYPES; i++)
if (!list_empty(&sinfo->block_groups[i]))
free_bytes += __btrfs_get_ro_block_group_free_space(
&sinfo->block_groups[i]);
@@ -8268,15 +8334,14 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
release_global_block_rsv(info);
- while(!list_empty(&info->space_info)) {
+ while (!list_empty(&info->space_info)) {
space_info = list_entry(info->space_info.next,
struct btrfs_space_info,
list);
if (btrfs_test_opt(info->tree_root, ENOSPC_DEBUG)) {
- if (space_info->bytes_pinned > 0 ||
+ if (WARN_ON(space_info->bytes_pinned > 0 ||
space_info->bytes_reserved > 0 ||
- space_info->bytes_may_use > 0) {
- WARN_ON(1);
+ space_info->bytes_may_use > 0)) {
dump_space_info(space_info, 0, 0);
}
}