diff options
Diffstat (limited to 'fs/btrfs/ctree.c')
-rw-r--r-- | fs/btrfs/ctree.c | 289 |
1 files changed, 144 insertions, 145 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index ed504607d8ec..64346721173f 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -274,8 +274,7 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, else btrfs_set_header_owner(cow, new_root_objectid); - write_extent_buffer(cow, root->fs_info->fsid, - (unsigned long)btrfs_header_fsid(cow), + write_extent_buffer(cow, root->fs_info->fsid, btrfs_header_fsid(cow), BTRFS_FSID_SIZE); WARN_ON(btrfs_header_generation(buf) > trans->transid); @@ -484,8 +483,27 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm) struct rb_node **new; struct rb_node *parent = NULL; struct tree_mod_elem *cur; + int ret = 0; + + BUG_ON(!tm); + + tree_mod_log_write_lock(fs_info); + if (list_empty(&fs_info->tree_mod_seq_list)) { + tree_mod_log_write_unlock(fs_info); + /* + * Ok we no longer care about logging modifications, free up tm + * and return 0. Any callers shouldn't be using tm after + * calling tree_mod_log_insert, but if they do we can just + * change this to return a special error code to let the callers + * do their own thing. + */ + kfree(tm); + return 0; + } - BUG_ON(!tm || !tm->seq); + spin_lock(&fs_info->tree_mod_seq_lock); + tm->seq = btrfs_inc_tree_mod_seq_minor(fs_info); + spin_unlock(&fs_info->tree_mod_seq_lock); tm_root = &fs_info->tree_mod_log; new = &tm_root->rb_node; @@ -501,14 +519,17 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm) else if (cur->seq > tm->seq) new = &((*new)->rb_right); else { + ret = -EEXIST; kfree(tm); - return -EEXIST; + goto out; } } rb_link_node(&tm->node, parent, new); rb_insert_color(&tm->node, tm_root); - return 0; +out: + tree_mod_log_write_unlock(fs_info); + return ret; } /* @@ -524,57 +545,19 @@ static inline int tree_mod_dont_log(struct btrfs_fs_info *fs_info, return 1; if (eb && btrfs_header_level(eb) == 0) return 1; - - tree_mod_log_write_lock(fs_info); - if (list_empty(&fs_info->tree_mod_seq_list)) { - /* - * someone emptied the list while we were waiting for the lock. - * we must not add to the list when no blocker exists. - */ - tree_mod_log_write_unlock(fs_info); - return 1; - } - return 0; } -/* - * This allocates memory and gets a tree modification sequence number. - * - * Returns <0 on error. - * Returns >0 (the added sequence number) on success. - */ -static inline int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags, - struct tree_mod_elem **tm_ret) -{ - struct tree_mod_elem *tm; - - /* - * once we switch from spin locks to something different, we should - * honor the flags parameter here. - */ - tm = *tm_ret = kzalloc(sizeof(*tm), GFP_ATOMIC); - if (!tm) - return -ENOMEM; - - spin_lock(&fs_info->tree_mod_seq_lock); - tm->seq = btrfs_inc_tree_mod_seq_minor(fs_info); - spin_unlock(&fs_info->tree_mod_seq_lock); - - return tm->seq; -} - static inline int __tree_mod_log_insert_key(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, int slot, enum mod_log_op op, gfp_t flags) { - int ret; struct tree_mod_elem *tm; - ret = tree_mod_alloc(fs_info, flags, &tm); - if (ret < 0) - return ret; + tm = kzalloc(sizeof(*tm), flags); + if (!tm) + return -ENOMEM; tm->index = eb->start >> PAGE_CACHE_SHIFT; if (op != MOD_LOG_KEY_ADD) { @@ -589,34 +572,14 @@ __tree_mod_log_insert_key(struct btrfs_fs_info *fs_info, } static noinline int -tree_mod_log_insert_key_mask(struct btrfs_fs_info *fs_info, - struct extent_buffer *eb, int slot, - enum mod_log_op op, gfp_t flags) +tree_mod_log_insert_key(struct btrfs_fs_info *fs_info, + struct extent_buffer *eb, int slot, + enum mod_log_op op, gfp_t flags) { - int ret; - if (tree_mod_dont_log(fs_info, eb)) return 0; - ret = __tree_mod_log_insert_key(fs_info, eb, slot, op, flags); - - tree_mod_log_write_unlock(fs_info); - return ret; -} - -static noinline int -tree_mod_log_insert_key(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, - int slot, enum mod_log_op op) -{ - return tree_mod_log_insert_key_mask(fs_info, eb, slot, op, GFP_NOFS); -} - -static noinline int -tree_mod_log_insert_key_locked(struct btrfs_fs_info *fs_info, - struct extent_buffer *eb, int slot, - enum mod_log_op op) -{ - return __tree_mod_log_insert_key(fs_info, eb, slot, op, GFP_NOFS); + return __tree_mod_log_insert_key(fs_info, eb, slot, op, flags); } static noinline int @@ -637,14 +600,14 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info, * buffer, i.e. dst_slot < src_slot. */ for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) { - ret = tree_mod_log_insert_key_locked(fs_info, eb, i + dst_slot, - MOD_LOG_KEY_REMOVE_WHILE_MOVING); + ret = __tree_mod_log_insert_key(fs_info, eb, i + dst_slot, + MOD_LOG_KEY_REMOVE_WHILE_MOVING, GFP_NOFS); BUG_ON(ret < 0); } - ret = tree_mod_alloc(fs_info, flags, &tm); - if (ret < 0) - goto out; + tm = kzalloc(sizeof(*tm), flags); + if (!tm) + return -ENOMEM; tm->index = eb->start >> PAGE_CACHE_SHIFT; tm->slot = src_slot; @@ -652,10 +615,7 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info, tm->move.nr_items = nr_items; tm->op = MOD_LOG_MOVE_KEYS; - ret = __tree_mod_log_insert(fs_info, tm); -out: - tree_mod_log_write_unlock(fs_info); - return ret; + return __tree_mod_log_insert(fs_info, tm); } static inline void @@ -670,8 +630,8 @@ __tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb) nritems = btrfs_header_nritems(eb); for (i = nritems - 1; i >= 0; i--) { - ret = tree_mod_log_insert_key_locked(fs_info, eb, i, - MOD_LOG_KEY_REMOVE_WHILE_FREEING); + ret = __tree_mod_log_insert_key(fs_info, eb, i, + MOD_LOG_KEY_REMOVE_WHILE_FREEING, GFP_NOFS); BUG_ON(ret < 0); } } @@ -683,7 +643,6 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info, int log_removal) { struct tree_mod_elem *tm; - int ret; if (tree_mod_dont_log(fs_info, NULL)) return 0; @@ -691,9 +650,9 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info, if (log_removal) __tree_mod_log_free_eb(fs_info, old_root); - ret = tree_mod_alloc(fs_info, flags, &tm); - if (ret < 0) - goto out; + tm = kzalloc(sizeof(*tm), flags); + if (!tm) + return -ENOMEM; tm->index = new_root->start >> PAGE_CACHE_SHIFT; tm->old_root.logical = old_root->start; @@ -701,10 +660,7 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info, tm->generation = btrfs_header_generation(old_root); tm->op = MOD_LOG_ROOT_REPLACE; - ret = __tree_mod_log_insert(fs_info, tm); -out: - tree_mod_log_write_unlock(fs_info); - return ret; + return __tree_mod_log_insert(fs_info, tm); } static struct tree_mod_elem * @@ -784,23 +740,20 @@ tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst, if (tree_mod_dont_log(fs_info, NULL)) return; - if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0) { - tree_mod_log_write_unlock(fs_info); + if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0) return; - } for (i = 0; i < nr_items; i++) { - ret = tree_mod_log_insert_key_locked(fs_info, src, + ret = __tree_mod_log_insert_key(fs_info, src, i + src_offset, - MOD_LOG_KEY_REMOVE); + MOD_LOG_KEY_REMOVE, GFP_NOFS); BUG_ON(ret < 0); - ret = tree_mod_log_insert_key_locked(fs_info, dst, + ret = __tree_mod_log_insert_key(fs_info, dst, i + dst_offset, - MOD_LOG_KEY_ADD); + MOD_LOG_KEY_ADD, + GFP_NOFS); BUG_ON(ret < 0); } - - tree_mod_log_write_unlock(fs_info); } static inline void @@ -819,9 +772,9 @@ tree_mod_log_set_node_key(struct btrfs_fs_info *fs_info, { int ret; - ret = tree_mod_log_insert_key_mask(fs_info, eb, slot, - MOD_LOG_KEY_REPLACE, - atomic ? GFP_ATOMIC : GFP_NOFS); + ret = __tree_mod_log_insert_key(fs_info, eb, slot, + MOD_LOG_KEY_REPLACE, + atomic ? GFP_ATOMIC : GFP_NOFS); BUG_ON(ret < 0); } @@ -830,10 +783,7 @@ tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb) { if (tree_mod_dont_log(fs_info, eb)) return; - __tree_mod_log_free_eb(fs_info, eb); - - tree_mod_log_write_unlock(fs_info); } static noinline void @@ -1046,8 +996,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, else btrfs_set_header_owner(cow, root->root_key.objectid); - write_extent_buffer(cow, root->fs_info->fsid, - (unsigned long)btrfs_header_fsid(cow), + write_extent_buffer(cow, root->fs_info->fsid, btrfs_header_fsid(cow), BTRFS_FSID_SIZE); ret = update_ref_for_cow(trans, root, buf, cow, &last_ref); @@ -1083,7 +1032,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, WARN_ON(trans->transid != btrfs_header_generation(parent)); tree_mod_log_insert_key(root->fs_info, parent, parent_slot, - MOD_LOG_KEY_REPLACE); + MOD_LOG_KEY_REPLACE, GFP_NOFS); btrfs_set_node_blockptr(parent, parent_slot, cow->start); btrfs_set_node_ptr_generation(parent, parent_slot, @@ -1116,7 +1065,7 @@ __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info, int looped = 0; if (!time_seq) - return 0; + return NULL; /* * the very last operation that's logged for a root is the replacement @@ -1127,7 +1076,7 @@ __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info, tm = tree_mod_log_search_oldest(fs_info, root_logical, time_seq); if (!looped && !tm) - return 0; + return NULL; /* * if there are no tree operation for the oldest root, we simply * return it. this should only happen if that (old) root is at @@ -1240,8 +1189,8 @@ __tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, * is freed (its refcount is decremented). */ static struct extent_buffer * -tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, - u64 time_seq) +tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct btrfs_path *path, + struct extent_buffer *eb, u64 time_seq) { struct extent_buffer *eb_rewin; struct tree_mod_elem *tm; @@ -1256,11 +1205,18 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, if (!tm) return eb; + btrfs_set_path_blocking(path); + btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); + if (tm->op == MOD_LOG_KEY_REMOVE_WHILE_FREEING) { BUG_ON(tm->slot != 0); eb_rewin = alloc_dummy_extent_buffer(eb->start, fs_info->tree_root->nodesize); - BUG_ON(!eb_rewin); + if (!eb_rewin) { + btrfs_tree_read_unlock_blocking(eb); + free_extent_buffer(eb); + return NULL; + } btrfs_set_header_bytenr(eb_rewin, eb->start); btrfs_set_header_backref_rev(eb_rewin, btrfs_header_backref_rev(eb)); @@ -1268,10 +1224,15 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, btrfs_set_header_level(eb_rewin, btrfs_header_level(eb)); } else { eb_rewin = btrfs_clone_extent_buffer(eb); - BUG_ON(!eb_rewin); + if (!eb_rewin) { + btrfs_tree_read_unlock_blocking(eb); + free_extent_buffer(eb); + return NULL; + } } - btrfs_tree_read_unlock(eb); + btrfs_clear_path_blocking(path, NULL, BTRFS_READ_LOCK); + btrfs_tree_read_unlock_blocking(eb); free_extent_buffer(eb); extent_buffer_get(eb_rewin); @@ -1335,8 +1296,9 @@ get_old_root(struct btrfs_root *root, u64 time_seq) free_extent_buffer(eb_root); eb = alloc_dummy_extent_buffer(logical, root->nodesize); } else { + btrfs_set_lock_blocking_rw(eb_root, BTRFS_READ_LOCK); eb = btrfs_clone_extent_buffer(eb_root); - btrfs_tree_read_unlock(eb_root); + btrfs_tree_read_unlock_blocking(eb_root); free_extent_buffer(eb_root); } @@ -1419,14 +1381,12 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans, if (trans->transaction != root->fs_info->running_transaction) WARN(1, KERN_CRIT "trans %llu running %llu\n", - (unsigned long long)trans->transid, - (unsigned long long) + trans->transid, root->fs_info->running_transaction->transid); if (trans->transid != root->fs_info->generation) WARN(1, KERN_CRIT "trans %llu running %llu\n", - (unsigned long long)trans->transid, - (unsigned long long)root->fs_info->generation); + trans->transid, root->fs_info->generation); if (!should_cow_block(trans, root, buf)) { *cow_ret = buf; @@ -2466,6 +2426,40 @@ done: return ret; } +static void key_search_validate(struct extent_buffer *b, + struct btrfs_key *key, + int level) +{ +#ifdef CONFIG_BTRFS_ASSERT + struct btrfs_disk_key disk_key; + + btrfs_cpu_key_to_disk(&disk_key, key); + + if (level == 0) + ASSERT(!memcmp_extent_buffer(b, &disk_key, + offsetof(struct btrfs_leaf, items[0].key), + sizeof(disk_key))); + else + ASSERT(!memcmp_extent_buffer(b, &disk_key, + offsetof(struct btrfs_node, ptrs[0].key), + sizeof(disk_key))); +#endif +} + +static int key_search(struct extent_buffer *b, struct btrfs_key *key, + int level, int *prev_cmp, int *slot) +{ + if (*prev_cmp != 0) { + *prev_cmp = bin_search(b, key, level, slot); + return *prev_cmp; + } + + key_search_validate(b, key, level); + *slot = 0; + + return 0; +} + /* * look for key in the tree. path is filled in with nodes along the way * if key is found, we return zero and you can find the item in the leaf @@ -2494,6 +2488,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root int write_lock_level = 0; u8 lowest_level = 0; int min_write_lock_level; + int prev_cmp; lowest_level = p->lowest_level; WARN_ON(lowest_level && ins_len > 0); @@ -2524,6 +2519,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root min_write_lock_level = write_lock_level; again: + prev_cmp = -1; /* * we try very hard to do read locks on the root */ @@ -2624,7 +2620,7 @@ cow_done: if (!cow) btrfs_unlock_up_safe(p, level + 1); - ret = bin_search(b, key, level, &slot); + ret = key_search(b, key, level, &prev_cmp, &slot); if (level != 0) { int dec = 0; @@ -2759,6 +2755,7 @@ int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key, int level; int lowest_unlock = 1; u8 lowest_level = 0; + int prev_cmp; lowest_level = p->lowest_level; WARN_ON(p->nodes[0] != NULL); @@ -2769,6 +2766,7 @@ int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key, } again: + prev_cmp = -1; b = get_old_root(root, time_seq); level = btrfs_header_level(b); p->locks[level] = BTRFS_READ_LOCK; @@ -2786,7 +2784,7 @@ again: */ btrfs_unlock_up_safe(p, level + 1); - ret = bin_search(b, key, level, &slot); + ret = key_search(b, key, level, &prev_cmp, &slot); if (level != 0) { int dec = 0; @@ -2820,7 +2818,11 @@ again: btrfs_clear_path_blocking(p, b, BTRFS_READ_LOCK); } - b = tree_mod_log_rewind(root->fs_info, b, time_seq); + b = tree_mod_log_rewind(root->fs_info, p, b, time_seq); + if (!b) { + ret = -ENOMEM; + goto done; + } p->locks[level] = BTRFS_READ_LOCK; p->nodes[level] = b; } else { @@ -3143,13 +3145,11 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV); btrfs_set_header_owner(c, root->root_key.objectid); - write_extent_buffer(c, root->fs_info->fsid, - (unsigned long)btrfs_header_fsid(c), + write_extent_buffer(c, root->fs_info->fsid, btrfs_header_fsid(c), BTRFS_FSID_SIZE); write_extent_buffer(c, root->fs_info->chunk_tree_uuid, - (unsigned long)btrfs_header_chunk_tree_uuid(c), - BTRFS_UUID_SIZE); + btrfs_header_chunk_tree_uuid(c), BTRFS_UUID_SIZE); btrfs_set_node_key(c, &lower_key, 0); btrfs_set_node_blockptr(c, 0, lower->start); @@ -3208,7 +3208,7 @@ static void insert_ptr(struct btrfs_trans_handle *trans, } if (level) { ret = tree_mod_log_insert_key(root->fs_info, lower, slot, - MOD_LOG_KEY_ADD); + MOD_LOG_KEY_ADD, GFP_NOFS); BUG_ON(ret < 0); } btrfs_set_node_key(lower, key, slot); @@ -3284,10 +3284,9 @@ static noinline int split_node(struct btrfs_trans_handle *trans, btrfs_set_header_backref_rev(split, BTRFS_MIXED_BACKREF_REV); btrfs_set_header_owner(split, root->root_key.objectid); write_extent_buffer(split, root->fs_info->fsid, - (unsigned long)btrfs_header_fsid(split), - BTRFS_FSID_SIZE); + btrfs_header_fsid(split), BTRFS_FSID_SIZE); write_extent_buffer(split, root->fs_info->chunk_tree_uuid, - (unsigned long)btrfs_header_chunk_tree_uuid(split), + btrfs_header_chunk_tree_uuid(split), BTRFS_UUID_SIZE); tree_mod_log_eb_copy(root->fs_info, split, c, 0, mid, c_nritems - mid); @@ -4040,11 +4039,10 @@ again: btrfs_set_header_owner(right, root->root_key.objectid); btrfs_set_header_level(right, 0); write_extent_buffer(right, root->fs_info->fsid, - (unsigned long)btrfs_header_fsid(right), - BTRFS_FSID_SIZE); + btrfs_header_fsid(right), BTRFS_FSID_SIZE); write_extent_buffer(right, root->fs_info->chunk_tree_uuid, - (unsigned long)btrfs_header_chunk_tree_uuid(right), + btrfs_header_chunk_tree_uuid(right), BTRFS_UUID_SIZE); if (split == 0) { @@ -4642,7 +4640,7 @@ static void del_ptr(struct btrfs_root *root, struct btrfs_path *path, (nritems - slot - 1)); } else if (level) { ret = tree_mod_log_insert_key(root->fs_info, parent, slot, - MOD_LOG_KEY_REMOVE); + MOD_LOG_KEY_REMOVE, GFP_NOFS); BUG_ON(ret < 0); } @@ -4814,7 +4812,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, * This may release the path, and so you may lose any locks held at the * time you call it. */ -int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) +static int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) { struct btrfs_key key; struct btrfs_disk_key found_key; @@ -5329,19 +5327,20 @@ int btrfs_compare_trees(struct btrfs_root *left_root, goto out; advance_right = ADVANCE; } else { + enum btrfs_compare_tree_result cmp; + WARN_ON(!extent_buffer_uptodate(left_path->nodes[0])); ret = tree_compare_item(left_root, left_path, right_path, tmp_buf); - if (ret) { - WARN_ON(!extent_buffer_uptodate(left_path->nodes[0])); - ret = changed_cb(left_root, right_root, - left_path, right_path, - &left_key, - BTRFS_COMPARE_TREE_CHANGED, - ctx); - if (ret < 0) - goto out; - } + if (ret) + cmp = BTRFS_COMPARE_TREE_CHANGED; + else + cmp = BTRFS_COMPARE_TREE_SAME; + ret = changed_cb(left_root, right_root, + left_path, right_path, + &left_key, cmp, ctx); + if (ret < 0) + goto out; advance_left = ADVANCE; advance_right = ADVANCE; } |