diff options
Diffstat (limited to 'fs/btrfs/ctree.c')
-rw-r--r-- | fs/btrfs/ctree.c | 904 |
1 files changed, 515 insertions, 389 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 42491d728e99..e5b2533b691a 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -254,18 +254,13 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, * empty_size -- a hint that you plan on doing more cow. This is the size in * bytes the allocator should try to find free next to the block it returns. * This is just a hint and may be ignored by the allocator. - * - * prealloc_dest -- if you have already reserved a destination for the cow, - * this uses that block instead of allocating a new one. - * btrfs_alloc_reserved_extent is used to finish the allocation. */ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, struct extent_buffer *parent, int parent_slot, struct extent_buffer **cow_ret, - u64 search_start, u64 empty_size, - u64 prealloc_dest) + u64 search_start, u64 empty_size) { u64 parent_start; struct extent_buffer *cow; @@ -277,7 +272,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, if (*cow_ret == buf) unlock_orig = 1; - WARN_ON(!btrfs_tree_locked(buf)); + btrfs_assert_tree_locked(buf); if (parent) parent_start = parent->start; @@ -291,26 +286,10 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, level = btrfs_header_level(buf); nritems = btrfs_header_nritems(buf); - if (prealloc_dest) { - struct btrfs_key ins; - - ins.objectid = prealloc_dest; - ins.offset = buf->len; - ins.type = BTRFS_EXTENT_ITEM_KEY; - - ret = btrfs_alloc_reserved_extent(trans, root, parent_start, - root->root_key.objectid, - trans->transid, level, &ins); - BUG_ON(ret); - cow = btrfs_init_new_buffer(trans, root, prealloc_dest, - buf->len, level); - } else { - cow = btrfs_alloc_free_block(trans, root, buf->len, - parent_start, - root->root_key.objectid, - trans->transid, level, - search_start, empty_size); - } + cow = btrfs_alloc_free_block(trans, root, buf->len, + parent_start, root->root_key.objectid, + trans->transid, level, + search_start, empty_size); if (IS_ERR(cow)) return PTR_ERR(cow); @@ -413,7 +392,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, noinline int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, struct extent_buffer *parent, int parent_slot, - struct extent_buffer **cow_ret, u64 prealloc_dest) + struct extent_buffer **cow_ret) { u64 search_start; int ret; @@ -436,7 +415,6 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans, btrfs_header_owner(buf) == root->root_key.objectid && !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { *cow_ret = buf; - WARN_ON(prealloc_dest); return 0; } @@ -447,8 +425,7 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans, btrfs_set_lock_blocking(buf); ret = __btrfs_cow_block(trans, root, buf, parent, - parent_slot, cow_ret, search_start, 0, - prealloc_dest); + parent_slot, cow_ret, search_start, 0); return ret; } @@ -617,7 +594,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, err = __btrfs_cow_block(trans, root, cur, parent, i, &cur, search_start, min(16 * blocksize, - (end_slot - i) * blocksize), 0); + (end_slot - i) * blocksize)); if (err) { btrfs_tree_unlock(cur); free_extent_buffer(cur); @@ -937,7 +914,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, BUG_ON(!child); btrfs_tree_lock(child); btrfs_set_lock_blocking(child); - ret = btrfs_cow_block(trans, root, child, mid, 0, &child, 0); + ret = btrfs_cow_block(trans, root, child, mid, 0, &child); BUG_ON(ret); spin_lock(&root->node_lock); @@ -945,6 +922,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, spin_unlock(&root->node_lock); ret = btrfs_update_extent_ref(trans, root, child->start, + child->len, mid->start, child->start, root->root_key.objectid, trans->transid, level - 1); @@ -971,6 +949,10 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, BTRFS_NODEPTRS_PER_BLOCK(root) / 4) return 0; + if (trans->transaction->delayed_refs.flushing && + btrfs_header_nritems(mid) > 2) + return 0; + if (btrfs_header_nritems(mid) < 2) err_on_enospc = 1; @@ -979,7 +961,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, btrfs_tree_lock(left); btrfs_set_lock_blocking(left); wret = btrfs_cow_block(trans, root, left, - parent, pslot - 1, &left, 0); + parent, pslot - 1, &left); if (wret) { ret = wret; goto enospc; @@ -990,7 +972,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, btrfs_tree_lock(right); btrfs_set_lock_blocking(right); wret = btrfs_cow_block(trans, root, right, - parent, pslot + 1, &right, 0); + parent, pslot + 1, &right); if (wret) { ret = wret; goto enospc; @@ -1171,7 +1153,7 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, wret = 1; } else { ret = btrfs_cow_block(trans, root, left, parent, - pslot - 1, &left, 0); + pslot - 1, &left); if (ret) wret = 1; else { @@ -1222,7 +1204,7 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, } else { ret = btrfs_cow_block(trans, root, right, parent, pslot + 1, - &right, 0); + &right); if (ret) wret = 1; else { @@ -1262,9 +1244,9 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, * readahead one full node of leaves, finding things that are close * to the block in 'slot', and triggering ra on them. */ -static noinline void reada_for_search(struct btrfs_root *root, - struct btrfs_path *path, - int level, int slot, u64 objectid) +static void reada_for_search(struct btrfs_root *root, + struct btrfs_path *path, + int level, int slot, u64 objectid) { struct extent_buffer *node; struct btrfs_disk_key disk_key; @@ -1465,6 +1447,117 @@ noinline void btrfs_unlock_up_safe(struct btrfs_path *path, int level) } /* + * helper function for btrfs_search_slot. The goal is to find a block + * in cache without setting the path to blocking. If we find the block + * we return zero and the path is unchanged. + * + * If we can't find the block, we set the path blocking and do some + * reada. -EAGAIN is returned and the search must be repeated. + */ +static int +read_block_for_search(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct btrfs_path *p, + struct extent_buffer **eb_ret, int level, int slot, + struct btrfs_key *key) +{ + u64 blocknr; + u64 gen; + u32 blocksize; + struct extent_buffer *b = *eb_ret; + struct extent_buffer *tmp; + + blocknr = btrfs_node_blockptr(b, slot); + gen = btrfs_node_ptr_generation(b, slot); + blocksize = btrfs_level_size(root, level - 1); + + tmp = btrfs_find_tree_block(root, blocknr, blocksize); + if (tmp && btrfs_buffer_uptodate(tmp, gen)) { + *eb_ret = tmp; + return 0; + } + + /* + * reduce lock contention at high levels + * of the btree by dropping locks before + * we read. + */ + btrfs_release_path(NULL, p); + if (tmp) + free_extent_buffer(tmp); + if (p->reada) + reada_for_search(root, p, level, slot, key->objectid); + + tmp = read_tree_block(root, blocknr, blocksize, gen); + if (tmp) + free_extent_buffer(tmp); + return -EAGAIN; +} + +/* + * helper function for btrfs_search_slot. This does all of the checks + * for node-level blocks and does any balancing required based on + * the ins_len. + * + * If no extra work was required, zero is returned. If we had to + * drop the path, -EAGAIN is returned and btrfs_search_slot must + * start over + */ +static int +setup_nodes_for_search(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct btrfs_path *p, + struct extent_buffer *b, int level, int ins_len) +{ + int ret; + if ((p->search_for_split || ins_len > 0) && btrfs_header_nritems(b) >= + BTRFS_NODEPTRS_PER_BLOCK(root) - 3) { + int sret; + + sret = reada_for_balance(root, p, level); + if (sret) + goto again; + + btrfs_set_path_blocking(p); + sret = split_node(trans, root, p, level); + btrfs_clear_path_blocking(p, NULL); + + BUG_ON(sret > 0); + if (sret) { + ret = sret; + goto done; + } + b = p->nodes[level]; + } else if (ins_len < 0 && btrfs_header_nritems(b) < + BTRFS_NODEPTRS_PER_BLOCK(root) / 4) { + int sret; + + sret = reada_for_balance(root, p, level); + if (sret) + goto again; + + btrfs_set_path_blocking(p); + sret = balance_level(trans, root, p, level); + btrfs_clear_path_blocking(p, NULL); + + if (sret) { + ret = sret; + goto done; + } + b = p->nodes[level]; + if (!b) { + btrfs_release_path(NULL, p); + goto again; + } + BUG_ON(btrfs_header_nritems(b) == 1); + } + return 0; + +again: + ret = -EAGAIN; +done: + return ret; +} + +/* * look for key in the tree. path is filled in with nodes along the way * if key is found, we return zero and you can find the item in the leaf * level of the path (level 0) @@ -1482,17 +1575,11 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root ins_len, int cow) { struct extent_buffer *b; - struct extent_buffer *tmp; int slot; int ret; int level; - int should_reada = p->reada; int lowest_unlock = 1; - int blocksize; u8 lowest_level = 0; - u64 blocknr; - u64 gen; - struct btrfs_key prealloc_block; lowest_level = p->lowest_level; WARN_ON(lowest_level && ins_len > 0); @@ -1501,8 +1588,6 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root if (ins_len < 0) lowest_unlock = 2; - prealloc_block.objectid = 0; - again: if (p->skip_locking) b = btrfs_root_node(root); @@ -1523,50 +1608,21 @@ again: if (cow) { int wret; - /* is a cow on this block not required */ + /* + * if we don't really need to cow this block + * then we don't want to set the path blocking, + * so we test it here + */ if (btrfs_header_generation(b) == trans->transid && btrfs_header_owner(b) == root->root_key.objectid && !btrfs_header_flag(b, BTRFS_HEADER_FLAG_WRITTEN)) { goto cow_done; } - - /* ok, we have to cow, is our old prealloc the right - * size? - */ - if (prealloc_block.objectid && - prealloc_block.offset != b->len) { - btrfs_release_path(root, p); - btrfs_free_reserved_extent(root, - prealloc_block.objectid, - prealloc_block.offset); - prealloc_block.objectid = 0; - goto again; - } - - /* - * for higher level blocks, try not to allocate blocks - * with the block and the parent locks held. - */ - if (level > 0 && !prealloc_block.objectid) { - u32 size = b->len; - u64 hint = b->start; - - btrfs_release_path(root, p); - ret = btrfs_reserve_extent(trans, root, - size, size, 0, - hint, (u64)-1, - &prealloc_block, 0); - BUG_ON(ret); - goto again; - } - btrfs_set_path_blocking(p); wret = btrfs_cow_block(trans, root, b, p->nodes[level + 1], - p->slots[level + 1], - &b, prealloc_block.objectid); - prealloc_block.objectid = 0; + p->slots[level + 1], &b); if (wret) { free_extent_buffer(b); ret = wret; @@ -1611,51 +1667,15 @@ cow_done: if (ret && slot > 0) slot -= 1; p->slots[level] = slot; - if ((p->search_for_split || ins_len > 0) && - btrfs_header_nritems(b) >= - BTRFS_NODEPTRS_PER_BLOCK(root) - 3) { - int sret; - - sret = reada_for_balance(root, p, level); - if (sret) - goto again; - - btrfs_set_path_blocking(p); - sret = split_node(trans, root, p, level); - btrfs_clear_path_blocking(p, NULL); - - BUG_ON(sret > 0); - if (sret) { - ret = sret; - goto done; - } - b = p->nodes[level]; - slot = p->slots[level]; - } else if (ins_len < 0 && - btrfs_header_nritems(b) < - BTRFS_NODEPTRS_PER_BLOCK(root) / 4) { - int sret; - - sret = reada_for_balance(root, p, level); - if (sret) - goto again; - - btrfs_set_path_blocking(p); - sret = balance_level(trans, root, p, level); - btrfs_clear_path_blocking(p, NULL); + ret = setup_nodes_for_search(trans, root, p, b, level, + ins_len); + if (ret == -EAGAIN) + goto again; + else if (ret) + goto done; + b = p->nodes[level]; + slot = p->slots[level]; - if (sret) { - ret = sret; - goto done; - } - b = p->nodes[level]; - if (!b) { - btrfs_release_path(NULL, p); - goto again; - } - slot = p->slots[level]; - BUG_ON(btrfs_header_nritems(b) == 1); - } unlock_up(p, level, lowest_unlock); /* this is only true while dropping a snapshot */ @@ -1664,44 +1684,11 @@ cow_done: goto done; } - blocknr = btrfs_node_blockptr(b, slot); - gen = btrfs_node_ptr_generation(b, slot); - blocksize = btrfs_level_size(root, level - 1); + ret = read_block_for_search(trans, root, p, + &b, level, slot, key); + if (ret == -EAGAIN) + goto again; - tmp = btrfs_find_tree_block(root, blocknr, blocksize); - if (tmp && btrfs_buffer_uptodate(tmp, gen)) { - b = tmp; - } else { - /* - * reduce lock contention at high levels - * of the btree by dropping locks before - * we read. - */ - if (level > 0) { - btrfs_release_path(NULL, p); - if (tmp) - free_extent_buffer(tmp); - if (should_reada) - reada_for_search(root, p, - level, slot, - key->objectid); - - tmp = read_tree_block(root, blocknr, - blocksize, gen); - if (tmp) - free_extent_buffer(tmp); - goto again; - } else { - btrfs_set_path_blocking(p); - if (tmp) - free_extent_buffer(tmp); - if (should_reada) - reada_for_search(root, p, - level, slot, - key->objectid); - b = read_node_slot(root, b, slot); - } - } if (!p->skip_locking) { int lret; @@ -1742,12 +1729,8 @@ done: * we don't really know what they plan on doing with the path * from here on, so for now just mark it as blocking */ - btrfs_set_path_blocking(p); - if (prealloc_block.objectid) { - btrfs_free_reserved_extent(root, - prealloc_block.objectid, - prealloc_block.offset); - } + if (!p->leave_spinning) + btrfs_set_path_blocking(p); return ret; } @@ -1768,7 +1751,7 @@ int btrfs_merge_path(struct btrfs_trans_handle *trans, int ret; eb = btrfs_lock_root_node(root); - ret = btrfs_cow_block(trans, root, eb, NULL, 0, &eb, 0); + ret = btrfs_cow_block(trans, root, eb, NULL, 0, &eb); BUG_ON(ret); btrfs_set_lock_blocking(eb); @@ -1826,7 +1809,7 @@ int btrfs_merge_path(struct btrfs_trans_handle *trans, } ret = btrfs_cow_block(trans, root, eb, parent, slot, - &eb, 0); + &eb); BUG_ON(ret); if (root->root_key.objectid == @@ -2139,7 +2122,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, spin_unlock(&root->node_lock); ret = btrfs_update_extent_ref(trans, root, lower->start, - lower->start, c->start, + lower->len, lower->start, c->start, root->root_key.objectid, trans->transid, level - 1); BUG_ON(ret); @@ -2174,8 +2157,7 @@ static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root BUG_ON(!path->nodes[level]); lower = path->nodes[level]; nritems = btrfs_header_nritems(lower); - if (slot > nritems) - BUG(); + BUG_ON(slot > nritems); if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root)) BUG(); if (slot != nritems) { @@ -2221,7 +2203,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans, ret = insert_new_root(trans, root, path, level + 1); if (ret) return ret; - } else { + } else if (!trans->transaction->delayed_refs.flushing) { ret = push_nodes_for_insert(trans, root, path, level); c = path->nodes[level]; if (!ret && btrfs_header_nritems(c) < @@ -2329,66 +2311,27 @@ noinline int btrfs_leaf_free_space(struct btrfs_root *root, return ret; } -/* - * push some data in the path leaf to the right, trying to free up at - * least data_size bytes. returns zero if the push worked, nonzero otherwise - * - * returns 1 if the push failed because the other node didn't have enough - * room, 0 if everything worked out and < 0 if there were major errors. - */ -static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_path *path, int data_size, - int empty) +static noinline int __push_leaf_right(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + int data_size, int empty, + struct extent_buffer *right, + int free_space, u32 left_nritems) { struct extent_buffer *left = path->nodes[0]; - struct extent_buffer *right; - struct extent_buffer *upper; + struct extent_buffer *upper = path->nodes[1]; struct btrfs_disk_key disk_key; int slot; u32 i; - int free_space; int push_space = 0; int push_items = 0; struct btrfs_item *item; - u32 left_nritems; u32 nr; u32 right_nritems; u32 data_end; u32 this_item_size; int ret; - slot = path->slots[1]; - if (!path->nodes[1]) - return 1; - - upper = path->nodes[1]; - if (slot >= btrfs_header_nritems(upper) - 1) - return 1; - - WARN_ON(!btrfs_tree_locked(path->nodes[1])); - - right = read_node_slot(root, upper, slot + 1); - btrfs_tree_lock(right); - btrfs_set_lock_blocking(right); - - free_space = btrfs_leaf_free_space(root, right); - if (free_space < data_size) - goto out_unlock; - - /* cow and double check */ - ret = btrfs_cow_block(trans, root, right, upper, - slot + 1, &right, 0); - if (ret) - goto out_unlock; - - free_space = btrfs_leaf_free_space(root, right); - if (free_space < data_size) - goto out_unlock; - - left_nritems = btrfs_header_nritems(left); - if (left_nritems == 0) - goto out_unlock; - if (empty) nr = 0; else @@ -2397,6 +2340,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root if (path->slots[0] >= left_nritems) push_space += data_size; + slot = path->slots[1]; i = left_nritems - 1; while (i >= nr) { item = btrfs_item_nr(left, i); @@ -2528,24 +2472,82 @@ out_unlock: } /* + * push some data in the path leaf to the right, trying to free up at + * least data_size bytes. returns zero if the push worked, nonzero otherwise + * + * returns 1 if the push failed because the other node didn't have enough + * room, 0 if everything worked out and < 0 if there were major errors. + */ +static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_path *path, int data_size, + int empty) +{ + struct extent_buffer *left = path->nodes[0]; + struct extent_buffer *right; + struct extent_buffer *upper; + int slot; + int free_space; + u32 left_nritems; + int ret; + + if (!path->nodes[1]) + return 1; + + slot = path->slots[1]; + upper = path->nodes[1]; + if (slot >= btrfs_header_nritems(upper) - 1) + return 1; + + btrfs_assert_tree_locked(path->nodes[1]); + + right = read_node_slot(root, upper, slot + 1); + btrfs_tree_lock(right); + btrfs_set_lock_blocking(right); + + free_space = btrfs_leaf_free_space(root, right); + if (free_space < data_size) + goto out_unlock; + + /* cow and double check */ + ret = btrfs_cow_block(trans, root, right, upper, + slot + 1, &right); + if (ret) + goto out_unlock; + + free_space = btrfs_leaf_free_space(root, right); + if (free_space < data_size) + goto out_unlock; + + left_nritems = btrfs_header_nritems(left); + if (left_nritems == 0) + goto out_unlock; + + return __push_leaf_right(trans, root, path, data_size, empty, + right, free_space, left_nritems); +out_unlock: + btrfs_tree_unlock(right); + free_extent_buffer(right); + return 1; +} + +/* * push some data in the path leaf to the left, trying to free up at * least data_size bytes. returns zero if the push worked, nonzero otherwise */ -static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_path *path, int data_size, - int empty) +static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, int data_size, + int empty, struct extent_buffer *left, + int free_space, int right_nritems) { struct btrfs_disk_key disk_key; struct extent_buffer *right = path->nodes[0]; - struct extent_buffer *left; int slot; int i; - int free_space; int push_space = 0; int push_items = 0; struct btrfs_item *item; u32 old_left_nritems; - u32 right_nritems; u32 nr; int ret = 0; int wret; @@ -2553,41 +2555,6 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root u32 old_left_item_size; slot = path->slots[1]; - if (slot == 0) - return 1; - if (!path->nodes[1]) - return 1; - - right_nritems = btrfs_header_nritems(right); - if (right_nritems == 0) - return 1; - - WARN_ON(!btrfs_tree_locked(path->nodes[1])); - - left = read_node_slot(root, path->nodes[1], slot - 1); - btrfs_tree_lock(left); - btrfs_set_lock_blocking(left); - - free_space = btrfs_leaf_free_space(root, left); - if (free_space < data_size) { - ret = 1; - goto out; - } - - /* cow and double check */ - ret = btrfs_cow_block(trans, root, left, - path->nodes[1], slot - 1, &left, 0); - if (ret) { - /* we hit -ENOSPC, but it isn't fatal here */ - ret = 1; - goto out; - } - - free_space = btrfs_leaf_free_space(root, left); - if (free_space < data_size) { - ret = 1; - goto out; - } if (empty) nr = right_nritems; @@ -2755,6 +2722,154 @@ out: } /* + * push some data in the path leaf to the left, trying to free up at + * least data_size bytes. returns zero if the push worked, nonzero otherwise + */ +static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_path *path, int data_size, + int empty) +{ + struct extent_buffer *right = path->nodes[0]; + struct extent_buffer *left; + int slot; + int free_space; + u32 right_nritems; + int ret = 0; + + slot = path->slots[1]; + if (slot == 0) + return 1; + if (!path->nodes[1]) + return 1; + + right_nritems = btrfs_header_nritems(right); + if (right_nritems == 0) + return 1; + + btrfs_assert_tree_locked(path->nodes[1]); + + left = read_node_slot(root, path->nodes[1], slot - 1); + btrfs_tree_lock(left); + btrfs_set_lock_blocking(left); + + free_space = btrfs_leaf_free_space(root, left); + if (free_space < data_size) { + ret = 1; + goto out; + } + + /* cow and double check */ + ret = btrfs_cow_block(trans, root, left, + path->nodes[1], slot - 1, &left); + if (ret) { + /* we hit -ENOSPC, but it isn't fatal here */ + ret = 1; + goto out; + } + + free_space = btrfs_leaf_free_space(root, left); + if (free_space < data_size) { + ret = 1; + goto out; + } + + return __push_leaf_left(trans, root, path, data_size, + empty, left, free_space, right_nritems); +out: + btrfs_tree_unlock(left); + free_extent_buffer(left); + return ret; +} + +/* + * split the path's leaf in two, making sure there is at least data_size + * available for the resulting leaf level of the path. + * + * returns 0 if all went well and < 0 on failure. + */ +static noinline int copy_for_split(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct extent_buffer *l, + struct extent_buffer *right, + int slot, int mid, int nritems) +{ + int data_copy_size; + int rt_data_off; + int i; + int ret = 0; + int wret; + struct btrfs_disk_key disk_key; + + nritems = nritems - mid; + btrfs_set_header_nritems(right, nritems); + data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(root, l); + + copy_extent_buffer(right, l, btrfs_item_nr_offset(0), + btrfs_item_nr_offset(mid), + nritems * sizeof(struct btrfs_item)); + + copy_extent_buffer(right, l, + btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) - + data_copy_size, btrfs_leaf_data(l) + + leaf_data_end(root, l), data_copy_size); + + rt_data_off = BTRFS_LEAF_DATA_SIZE(root) - + btrfs_item_end_nr(l, mid); + + for (i = 0; i < nritems; i++) { + struct btrfs_item *item = btrfs_item_nr(right, i); + u32 ioff; + + if (!right->map_token) { + map_extent_buffer(right, (unsigned long)item, + sizeof(struct btrfs_item), + &right->map_token, &right->kaddr, + &right->map_start, &right->map_len, + KM_USER1); + } + + ioff = btrfs_item_offset(right, item); + btrfs_set_item_offset(right, item, ioff + rt_data_off); + } + + if (right->map_token) { + unmap_extent_buffer(right, right->map_token, KM_USER1); + right->map_token = NULL; + } + + btrfs_set_header_nritems(l, mid); + ret = 0; + btrfs_item_key(right, &disk_key, 0); + wret = insert_ptr(trans, root, path, &disk_key, right->start, + path->slots[1] + 1, 1); + if (wret) + ret = wret; + + btrfs_mark_buffer_dirty(right); + btrfs_mark_buffer_dirty(l); + BUG_ON(path->slots[0] != slot); + + ret = btrfs_update_ref(trans, root, l, right, 0, nritems); + BUG_ON(ret); + + if (mid <= slot) { + btrfs_tree_unlock(path->nodes[0]); + free_extent_buffer(path->nodes[0]); + path->nodes[0] = right; + path->slots[0] -= mid; + path->slots[1] += 1; + } else { + btrfs_tree_unlock(right); + free_extent_buffer(right); + } + + BUG_ON(path->slots[0] < 0); + + return ret; +} + +/* * split the path's leaf in two, making sure there is at least data_size * available for the resulting leaf level of the path. * @@ -2771,17 +2886,14 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans, int mid; int slot; struct extent_buffer *right; - int data_copy_size; - int rt_data_off; - int i; int ret = 0; int wret; int double_split; int num_doubles = 0; - struct btrfs_disk_key disk_key; /* first try to make some room by pushing left and right */ - if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY) { + if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY && + !trans->transaction->delayed_refs.flushing) { wret = push_leaf_right(trans, root, path, data_size, 0); if (wret < 0) return wret; @@ -2830,11 +2942,14 @@ again: write_extent_buffer(right, root->fs_info->chunk_tree_uuid, (unsigned long)btrfs_header_chunk_tree_uuid(right), BTRFS_UUID_SIZE); + if (mid <= slot) { if (nritems == 1 || leaf_space_used(l, mid, nritems - mid) + data_size > BTRFS_LEAF_DATA_SIZE(root)) { if (slot >= nritems) { + struct btrfs_disk_key disk_key; + btrfs_cpu_key_to_disk(&disk_key, ins_key); btrfs_set_header_nritems(right, 0); wret = insert_ptr(trans, root, path, @@ -2862,6 +2977,8 @@ again: if (leaf_space_used(l, 0, mid) + data_size > BTRFS_LEAF_DATA_SIZE(root)) { if (!extend && data_size && slot == 0) { + struct btrfs_disk_key disk_key; + btrfs_cpu_key_to_disk(&disk_key, ins_key); btrfs_set_header_nritems(right, 0); wret = insert_ptr(trans, root, path, @@ -2894,76 +3011,16 @@ again: } } } - nritems = nritems - mid; - btrfs_set_header_nritems(right, nritems); - data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(root, l); - - copy_extent_buffer(right, l, btrfs_item_nr_offset(0), - btrfs_item_nr_offset(mid), - nritems * sizeof(struct btrfs_item)); - - copy_extent_buffer(right, l, - btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) - - data_copy_size, btrfs_leaf_data(l) + - leaf_data_end(root, l), data_copy_size); - - rt_data_off = BTRFS_LEAF_DATA_SIZE(root) - - btrfs_item_end_nr(l, mid); - - for (i = 0; i < nritems; i++) { - struct btrfs_item *item = btrfs_item_nr(right, i); - u32 ioff; - - if (!right->map_token) { - map_extent_buffer(right, (unsigned long)item, - sizeof(struct btrfs_item), - &right->map_token, &right->kaddr, - &right->map_start, &right->map_len, - KM_USER1); - } - - ioff = btrfs_item_offset(right, item); - btrfs_set_item_offset(right, item, ioff + rt_data_off); - } - - if (right->map_token) { - unmap_extent_buffer(right, right->map_token, KM_USER1); - right->map_token = NULL; - } - btrfs_set_header_nritems(l, mid); - ret = 0; - btrfs_item_key(right, &disk_key, 0); - wret = insert_ptr(trans, root, path, &disk_key, right->start, - path->slots[1] + 1, 1); - if (wret) - ret = wret; - - btrfs_mark_buffer_dirty(right); - btrfs_mark_buffer_dirty(l); - BUG_ON(path->slots[0] != slot); - - ret = btrfs_update_ref(trans, root, l, right, 0, nritems); + ret = copy_for_split(trans, root, path, l, right, slot, mid, nritems); BUG_ON(ret); - if (mid <= slot) { - btrfs_tree_unlock(path->nodes[0]); - free_extent_buffer(path->nodes[0]); - path->nodes[0] = right; - path->slots[0] -= mid; - path->slots[1] += 1; - } else { - btrfs_tree_unlock(right); - free_extent_buffer(right); - } - - BUG_ON(path->slots[0] < 0); - if (double_split) { BUG_ON(num_doubles != 0); num_doubles++; goto again; } + return ret; } @@ -3021,26 +3078,27 @@ int btrfs_split_item(struct btrfs_trans_handle *trans, return -EAGAIN; } + btrfs_set_path_blocking(path); ret = split_leaf(trans, root, &orig_key, path, sizeof(struct btrfs_item), 1); path->keep_locks = 0; BUG_ON(ret); + btrfs_unlock_up_safe(path, 1); + leaf = path->nodes[0]; + BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item)); + +split: /* * make sure any changes to the path from split_leaf leave it * in a blocking state */ btrfs_set_path_blocking(path); - leaf = path->nodes[0]; - BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item)); - -split: item = btrfs_item_nr(leaf, path->slots[0]); orig_offset = btrfs_item_offset(leaf, item); item_size = btrfs_item_size(leaf, item); - buf = kmalloc(item_size, GFP_NOFS); read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf, path->slots[0]), item_size); @@ -3445,39 +3503,27 @@ out: } /* - * Given a key and some data, insert items into the tree. - * This does all the path init required, making room in the tree if needed. + * this is a helper for btrfs_insert_empty_items, the main goal here is + * to save stack depth by doing the bulk of the work in a function + * that doesn't call btrfs_search_slot */ -int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_path *path, - struct btrfs_key *cpu_key, u32 *data_size, - int nr) +static noinline_for_stack int +setup_items_for_insert(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct btrfs_path *path, + struct btrfs_key *cpu_key, u32 *data_size, + u32 total_data, u32 total_size, int nr) { - struct extent_buffer *leaf; struct btrfs_item *item; - int ret = 0; - int slot; - int slot_orig; int i; u32 nritems; - u32 total_size = 0; - u32 total_data = 0; unsigned int data_end; struct btrfs_disk_key disk_key; + int ret; + struct extent_buffer *leaf; + int slot; - for (i = 0; i < nr; i++) - total_data += data_size[i]; - - total_size = total_data + (nr * sizeof(struct btrfs_item)); - ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1); - if (ret == 0) - return -EEXIST; - if (ret < 0) - goto out; - - slot_orig = path->slots[0]; leaf = path->nodes[0]; + slot = path->slots[0]; nritems = btrfs_header_nritems(leaf); data_end = leaf_data_end(root, leaf); @@ -3489,9 +3535,6 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, BUG(); } - slot = path->slots[0]; - BUG_ON(slot < 0); - if (slot != nritems) { unsigned int old_data = btrfs_item_end_nr(leaf, slot); @@ -3547,21 +3590,60 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, data_end -= data_size[i]; btrfs_set_item_size(leaf, item, data_size[i]); } + btrfs_set_header_nritems(leaf, nritems + nr); - btrfs_mark_buffer_dirty(leaf); ret = 0; if (slot == 0) { + struct btrfs_disk_key disk_key; btrfs_cpu_key_to_disk(&disk_key, cpu_key); ret = fixup_low_keys(trans, root, path, &disk_key, 1); } + btrfs_unlock_up_safe(path, 1); + btrfs_mark_buffer_dirty(leaf); if (btrfs_leaf_free_space(root, leaf) < 0) { btrfs_print_leaf(root, leaf); BUG(); } + return ret; +} + +/* + * Given a key and some data, insert items into the tree. + * This does all the path init required, making room in the tree if needed. + */ +int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_key *cpu_key, u32 *data_size, + int nr) +{ + struct extent_buffer *leaf; + int ret = 0; + int slot; + int i; + u32 total_size = 0; + u32 total_data = 0; + + for (i = 0; i < nr; i++) + total_data += data_size[i]; + + total_size = total_data + (nr * sizeof(struct btrfs_item)); + ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1); + if (ret == 0) + return -EEXIST; + if (ret < 0) + goto out; + + leaf = path->nodes[0]; + slot = path->slots[0]; + BUG_ON(slot < 0); + + ret = setup_items_for_insert(trans, root, path, cpu_key, data_size, + total_data, total_size, nr); + out: - btrfs_unlock_up_safe(path, 1); return ret; } @@ -3749,7 +3831,8 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, } /* delete the leaf if it is mostly empty */ - if (used < BTRFS_LEAF_DATA_SIZE(root) / 4) { + if (used < BTRFS_LEAF_DATA_SIZE(root) / 4 && + !trans->transaction->delayed_refs.flushing) { /* push_leaf_left fixes the path. * make sure the path still points to our leaf * for possible call to del_ptr below @@ -3757,6 +3840,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, slot = path->slots[1]; extent_buffer_get(leaf); + btrfs_set_path_blocking(path); wret = push_leaf_left(trans, root, path, 1, 1); if (wret < 0 && wret != -ENOSPC) ret = wret; @@ -4042,28 +4126,44 @@ next: int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) { int slot; - int level = 1; + int level; struct extent_buffer *c; - struct extent_buffer *next = NULL; + struct extent_buffer *next; struct btrfs_key key; u32 nritems; int ret; + int old_spinning = path->leave_spinning; + int force_blocking = 0; nritems = btrfs_header_nritems(path->nodes[0]); if (nritems == 0) return 1; - btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1); + /* + * we take the blocks in an order that upsets lockdep. Using + * blocking mode is the only way around it. + */ +#ifdef CONFIG_DEBUG_LOCK_ALLOC + force_blocking = 1; +#endif + btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1); +again: + level = 1; + next = NULL; btrfs_release_path(root, path); + path->keep_locks = 1; + + if (!force_blocking) + path->leave_spinning = 1; + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); path->keep_locks = 0; if (ret < 0) return ret; - btrfs_set_path_blocking(path); nritems = btrfs_header_nritems(path->nodes[0]); /* * by releasing the path above we dropped all our locks. A balance @@ -4073,19 +4173,24 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) */ if (nritems > 0 && path->slots[0] < nritems - 1) { path->slots[0]++; + ret = 0; goto done; } while (level < BTRFS_MAX_LEVEL) { - if (!path->nodes[level]) - return 1; + if (!path->nodes[level]) { + ret = 1; + goto done; + } slot = path->slots[level] + 1; c = path->nodes[level]; if (slot >= btrfs_header_nritems(c)) { level++; - if (level == BTRFS_MAX_LEVEL) - return 1; + if (level == BTRFS_MAX_LEVEL) { + ret = 1; + goto done; + } continue; } @@ -4094,16 +4199,22 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) free_extent_buffer(next); } - /* the path was set to blocking above */ - if (level == 1 && (path->locks[1] || path->skip_locking) && - path->reada) - reada_for_search(root, path, level, slot, 0); + next = c; + ret = read_block_for_search(NULL, root, path, &next, level, + slot, &key); + if (ret == -EAGAIN) + goto again; - next = read_node_slot(root, c, slot); if (!path->skip_locking) { - WARN_ON(!btrfs_tree_locked(c)); - btrfs_tree_lock(next); - btrfs_set_lock_blocking(next); + ret = btrfs_try_spin_lock(next); + if (!ret) { + btrfs_set_path_blocking(path); + btrfs_tree_lock(next); + if (!force_blocking) + btrfs_clear_path_blocking(path, next); + } + if (force_blocking) + btrfs_set_lock_blocking(next); } break; } @@ -4113,27 +4224,42 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) c = path->nodes[level]; if (path->locks[level]) btrfs_tree_unlock(c); + free_extent_buffer(c); path->nodes[level] = next; path->slots[level] = 0; if (!path->skip_locking) path->locks[level] = 1; + if (!level) break; - btrfs_set_path_blocking(path); - if (level == 1 && path->locks[1] && path->reada) - reada_for_search(root, path, level, slot, 0); - next = read_node_slot(root, next, 0); + ret = read_block_for_search(NULL, root, path, &next, level, + 0, &key); + if (ret == -EAGAIN) + goto again; + if (!path->skip_locking) { - WARN_ON(!btrfs_tree_locked(path->nodes[level])); - btrfs_tree_lock(next); - btrfs_set_lock_blocking(next); + btrfs_assert_tree_locked(path->nodes[level]); + ret = btrfs_try_spin_lock(next); + if (!ret) { + btrfs_set_path_blocking(path); + btrfs_tree_lock(next); + if (!force_blocking) + btrfs_clear_path_blocking(path, next); + } + if (force_blocking) + btrfs_set_lock_blocking(next); } } + ret = 0; done: unlock_up(path, 0, 1); - return 0; + path->leave_spinning = old_spinning; + if (!old_spinning) + btrfs_set_path_blocking(path); + + return ret; } /* |