Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs

Pull btrfs fixes from Chris Mason: "This has our collection of bug fixes. I missed the last rc because I thought our patches were making NFS crash during my xfs test runs. Turns out it was an NFS client bug fixed by someone else while I tried to bisect it. All of these fixes are small, but some are fairly high impact. The biggest are fixes for our mount -o remount handling, a deadlock due to GFP_KERNEL allocations in readdir, and a RAID10 error handling bug. This was tested against both 3.3 and Linus' master as of this morning." * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (26 commits) Btrfs: reduce lock contention during extent insertion Btrfs: avoid deadlocks from GFP_KERNEL allocations during btrfs_real_readdir Btrfs: Fix space checking during fs resize Btrfs: fix block_rsv and space_info lock ordering Btrfs: Prevent root_list corruption Btrfs: fix repair code for RAID10 Btrfs: do not start delalloc inodes during sync Btrfs: fix that check_int_data mount option was ignored Btrfs: don't count CRC or header errors twice while scrubbing Btrfs: fix btrfs_ioctl_dev_info() crash on missing device btrfs: don't return EINTR Btrfs: double unlock bug in error handling Btrfs: always store the mirror we read the eb from fs/btrfs/volumes.c: add missing free_fs_devices btrfs: fix early abort in 'remount' Btrfs: fix max chunk size check in chunk allocator Btrfs: add missing read locks in backref.c Btrfs: don't call free_extent_buffer twice in iterate_irefs Btrfs: Make free_ipath() deal gracefully with NULL pointers Btrfs: avoid possible use-after-free in clear_extent_bit() ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2012-04-28 18:30:07 +0200
committer: Linus Torvalds <torvalds@linux-foundation.org> 2012-04-28 18:30:07 +0200
commit: f7b006931751f029620ad2f8310ac7a1484fbdb4 (patch)
tree: 71120f4c4c51752902317fbf853e3b0316c2adb0 /fs
parent: Merge tag 'fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/... (diff)
parent: Btrfs: reduce lock contention during extent insertion (diff)
download: linux-f7b006931751f029620ad2f8310ac7a1484fbdb4.tar.xz
linux-f7b006931751f029620ad2f8310ac7a1484fbdb4.zip
15 files changed, 148 insertions, 139 deletions
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index f4e90748940a..bcec06750232 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -22,6 +22,7 @@
 #include "ulist.h"
 #include "transaction.h"
 #include "delayed-ref.h"
+#include "locking.h"
 
 /*
  * this structure records all encountered refs on the way up to the root
@@ -893,18 +894,22 @@ static char *iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
 	s64 bytes_left = size - 1;
 	struct extent_buffer *eb = eb_in;
 	struct btrfs_key found_key;
+	int leave_spinning = path->leave_spinning;
 
 	if (bytes_left >= 0)
 		dest[bytes_left] = '\0';
 
+	path->leave_spinning = 1;
 	while (1) {
 		len = btrfs_inode_ref_name_len(eb, iref);
 		bytes_left -= len;
 		if (bytes_left >= 0)
 			read_extent_buffer(eb, dest + bytes_left,
 						(unsigned long)(iref + 1), len);
-		if (eb != eb_in)
+		if (eb != eb_in) {
+			btrfs_tree_read_unlock_blocking(eb);
 			free_extent_buffer(eb);
+		}
 		ret = inode_ref_info(parent, 0, fs_root, path, &found_key);
 		if (ret > 0)
 			ret = -ENOENT;
@@ -919,8 +924,11 @@ static char *iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
 		slot = path->slots[0];
 		eb = path->nodes[0];
 		/* make sure we can use eb after releasing the path */
-		if (eb != eb_in)
+		if (eb != eb_in) {
 			atomic_inc(&eb->refs);
+			btrfs_tree_read_lock(eb);
+			btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
+		}
 		btrfs_release_path(path);
 
 		iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
@@ -931,6 +939,7 @@ static char *iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
 	}
 
 	btrfs_release_path(path);
+	path->leave_spinning = leave_spinning;
 
 	if (ret)
 		return ERR_PTR(ret);
@@ -1247,7 +1256,7 @@ static int iterate_irefs(u64 inum, struct btrfs_root *fs_root,
 				struct btrfs_path *path,
 				iterate_irefs_t *iterate, void *ctx)
 {
-	int ret;
+	int ret = 0;
 	int slot;
 	u32 cur;
 	u32 len;
@@ -1259,7 +1268,8 @@ static int iterate_irefs(u64 inum, struct btrfs_root *fs_root,
 	struct btrfs_inode_ref *iref;
 	struct btrfs_key found_key;
 
-	while (1) {
+	while (!ret) {
+		path->leave_spinning = 1;
 		ret = inode_ref_info(inum, parent ? parent+1 : 0, fs_root, path,
 					&found_key);
 		if (ret < 0)
@@ -1275,6 +1285,8 @@ static int iterate_irefs(u64 inum, struct btrfs_root *fs_root,
 		eb = path->nodes[0];
 		/* make sure we can use eb after releasing the path */
 		atomic_inc(&eb->refs);
+		btrfs_tree_read_lock(eb);
+		btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
 		btrfs_release_path(path);
 
 		item = btrfs_item_nr(eb, slot);
@@ -1288,13 +1300,12 @@ static int iterate_irefs(u64 inum, struct btrfs_root *fs_root,
 				 (unsigned long long)found_key.objectid,
 				 (unsigned long long)fs_root->objectid);
 			ret = iterate(parent, iref, eb, ctx);
-			if (ret) {
-				free_extent_buffer(eb);
+			if (ret)
 				break;
-			}
 			len = sizeof(*iref) + name_len;
 			iref = (struct btrfs_inode_ref *)((char *)iref + len);
 		}
+		btrfs_tree_read_unlock_blocking(eb);
 		free_extent_buffer(eb);
 	}
 
@@ -1414,6 +1425,8 @@ struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root,
 
 void free_ipath(struct inode_fs_paths *ipath)
 {
+	if (!ipath)
+		return;
 	kfree(ipath->fspath);
 	kfree(ipath);
 }
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 3f65a812e282..8fd72331d600 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1078,7 +1078,7 @@ struct btrfs_fs_info {
 	 * is required instead of the faster short fsync log commits
 	 */
 	u64 last_trans_log_full_commit;
-	unsigned long mount_opt:21;
+	unsigned long mount_opt;
 	unsigned long compress_type:4;
 	u64 max_inline;
 	u64 alloc_start;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 20196f411206..d0c969beaad4 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -383,17 +383,16 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,
 		if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags))
 			break;
 
-		if (!failed_mirror) {
-			failed = 1;
-			printk(KERN_ERR "failed mirror was %d\n", eb->failed_mirror);
-			failed_mirror = eb->failed_mirror;
-		}
-
 		num_copies = btrfs_num_copies(&root->fs_info->mapping_tree,
 					      eb->start, eb->len);
 		if (num_copies == 1)
 			break;
 
+		if (!failed_mirror) {
+			failed = 1;
+			failed_mirror = eb->read_mirror;
+		}
+
 		mirror_num++;
 		if (mirror_num == failed_mirror)
 			mirror_num++;
@@ -564,7 +563,7 @@ struct extent_buffer *find_eb_for_page(struct extent_io_tree *tree,
 }
 
 static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
-			       struct extent_state *state)
+			       struct extent_state *state, int mirror)
 {
 	struct extent_io_tree *tree;
 	u64 found_start;
@@ -589,6 +588,7 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
 	if (!reads_done)
 		goto err;
 
+	eb->read_mirror = mirror;
 	if (test_bit(EXTENT_BUFFER_IOERR, &eb->bflags)) {
 		ret = -EIO;
 		goto err;
@@ -652,7 +652,7 @@ static int btree_io_failed_hook(struct page *page, int failed_mirror)
 
 	eb = (struct extent_buffer *)page->private;
 	set_bit(EXTENT_BUFFER_IOERR, &eb->bflags);
-	eb->failed_mirror = failed_mirror;
+	eb->read_mirror = failed_mirror;
 	if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags))
 		btree_readahead_hook(root, eb, eb->start, -EIO);
 	return -EIO;	/* we fixed nothing */
@@ -2254,9 +2254,9 @@ int open_ctree(struct super_block *sb,
 		goto fail_sb_buffer;
 	}
 
-	if (sectorsize < PAGE_SIZE) {
-		printk(KERN_WARNING "btrfs: Incompatible sector size "
-		       "found on %s\n", sb->s_id);
+	if (sectorsize != PAGE_SIZE) {
+		printk(KERN_WARNING "btrfs: Incompatible sector size(%lu) "
+		       "found on %s\n", (unsigned long)sectorsize, sb->s_id);
 		goto fail_sb_buffer;
 	}
 
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 2b35f8d14bb9..6fc2e6f5aab8 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2301,6 +2301,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
 
 				if (ret) {
 					printk(KERN_DEBUG "btrfs: run_delayed_extent_op returned %d\n", ret);
+					spin_lock(&delayed_refs->lock);
 					return ret;
 				}
 
@@ -2331,6 +2332,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
 
 		if (ret) {
 			printk(KERN_DEBUG "btrfs: run_one_delayed_ref returned %d\n", ret);
+			spin_lock(&delayed_refs->lock);
 			return ret;
 		}
 
@@ -3769,13 +3771,10 @@ again:
 		 */
 		if (current->journal_info)
 			return -EAGAIN;
-		ret = wait_event_interruptible(space_info->wait,
-					       !space_info->flush);
-		/* Must have been interrupted, return */
-		if (ret) {
-			printk(KERN_DEBUG "btrfs: %s returning -EINTR\n", __func__);
+		ret = wait_event_killable(space_info->wait, !space_info->flush);
+		/* Must have been killed, return */
+		if (ret)
 			return -EINTR;
-		}
 
 		spin_lock(&space_info->lock);
 	}
@@ -4215,8 +4214,8 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
 
 	num_bytes = calc_global_metadata_size(fs_info);
 
-	spin_lock(&block_rsv->lock);
 	spin_lock(&sinfo->lock);
+	spin_lock(&block_rsv->lock);
 
 	block_rsv->size = num_bytes;
 
@@ -4242,8 +4241,8 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
 		block_rsv->full = 1;
 	}
 
-	spin_unlock(&sinfo->lock);
 	spin_unlock(&block_rsv->lock);
+	spin_unlock(&sinfo->lock);
 }
 
 static void init_global_block_rsv(struct btrfs_fs_info *fs_info)
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index cd4b5e400221..198c2ba2fa40 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -402,20 +402,28 @@ static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
 	return 0;
 }
 
+static struct extent_state *next_state(struct extent_state *state)
+{
+	struct rb_node *next = rb_next(&state->rb_node);
+	if (next)
+		return rb_entry(next, struct extent_state, rb_node);
+	else
+		return NULL;
+}
+
 /*
  * utility function to clear some bits in an extent state struct.
- * it will optionally wake up any one waiting on this state (wake == 1), or
- * forcibly remove the state from the tree (delete == 1).
+ * it will optionally wake up any one waiting on this state (wake == 1)
  *
  * If no bits are set on the state struct after clearing things, the
  * struct is freed and removed from the tree
  */
-static int clear_state_bit(struct extent_io_tree *tree,
-			    struct extent_state *state,
-			    int *bits, int wake)
+static struct extent_state *clear_state_bit(struct extent_io_tree *tree,
+					    struct extent_state *state,
+					    int *bits, int wake)
 {
+	struct extent_state *next;
 	int bits_to_clear = *bits & ~EXTENT_CTLBITS;
-	int ret = state->state & bits_to_clear;
 
 	if ((bits_to_clear & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) {
 		u64 range = state->end - state->start + 1;
@@ -427,6 +435,7 @@ static int clear_state_bit(struct extent_io_tree *tree,
 	if (wake)
 		wake_up(&state->wq);
 	if (state->state == 0) {
+		next = next_state(state);
 		if (state->tree) {
 			rb_erase(&state->rb_node, &tree->state);
 			state->tree = NULL;
@@ -436,8 +445,9 @@ static int clear_state_bit(struct extent_io_tree *tree,
 		}
 	} else {
 		merge_state(tree, state);
+		next = next_state(state);
 	}
-	return ret;
+	return next;
 }
 
 static struct extent_state *
@@ -476,7 +486,6 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
 	struct extent_state *state;
 	struct extent_state *cached;
 	struct extent_state *prealloc = NULL;
-	struct rb_node *next_node;
 	struct rb_node *node;
 	u64 last_end;
 	int err;
@@ -528,14 +537,11 @@ hit_next:
 	WARN_ON(state->end < start);
 	last_end = state->end;
 
-	if (state->end < end && !need_resched())
-		next_node = rb_next(&state->rb_node);
-	else
-		next_node = NULL;
-
 	/* the state doesn't have the wanted bits, go ahead */
-	if (!(state->state & bits))
+	if (!(state->state & bits)) {
+		state = next_state(state);
 		goto next;
+	}
 
 	/*
 	 *     | ---- desired range ---- |
@@ -593,16 +599,13 @@ hit_next:
 		goto out;
 	}
 
-	clear_state_bit(tree, state, &bits, wake);
+	state = clear_state_bit(tree, state, &bits, wake);
 next:
 	if (last_end == (u64)-1)
 		goto out;
 	start = last_end + 1;
-	if (start <= end && next_node) {
-		state = rb_entry(next_node, struct extent_state,
-				 rb_node);
+	if (start <= end && state && !need_resched())
 		goto hit_next;
-	}
 	goto search_again;
 
 out:
@@ -2301,7 +2304,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
 	u64 start;
 	u64 end;
 	int whole_page;
-	int failed_mirror;
+	int mirror;
 	int ret;
 
 	if (err)
@@ -2340,20 +2343,18 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
 		}
 		spin_unlock(&tree->lock);
 
+		mirror = (int)(unsigned long)bio->bi_bdev;
 		if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {
 			ret = tree->ops->readpage_end_io_hook(page, start, end,
-							      state);
+							      state, mirror);
 			if (ret)
 				uptodate = 0;
 			else
 				clean_io_failure(start, page);
 		}
 
-		if (!uptodate)
-			failed_mirror = (int)(unsigned long)bio->bi_bdev;
-
 		if (!uptodate && tree->ops && tree->ops->readpage_io_failed_hook) {
-			ret = tree->ops->readpage_io_failed_hook(page, failed_mirror);
+			ret = tree->ops->readpage_io_failed_hook(page, mirror);
 			if (!ret && !err &&
 			    test_bit(BIO_UPTODATE, &bio->bi_flags))
 				uptodate = 1;
@@ -2368,8 +2369,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
 			 * can't handle the error it will return -EIO and we
 			 * remain responsible for that page.
 			 */
-			ret = bio_readpage_error(bio, page, start, end,
-							failed_mirror, NULL);
+			ret = bio_readpage_error(bio, page, start, end, mirror, NULL);
 			if (ret == 0) {
 				uptodate =
 					test_bit(BIO_UPTODATE, &bio->bi_flags);
@@ -4462,7 +4462,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
 	}
 
 	clear_bit(EXTENT_BUFFER_IOERR, &eb->bflags);
-	eb->failed_mirror = 0;
+	eb->read_mirror = 0;
 	atomic_set(&eb->io_pages, num_reads);
 	for (i = start_i; i < num_pages; i++) {
 		page = extent_buffer_page(eb, i);
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index faf10eb57f75..b516c3b8dec6 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -79,7 +79,7 @@ struct extent_io_ops {
 					u64 start, u64 end,
 				       struct extent_state *state);
 	int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end,
-				    struct extent_state *state);
+				    struct extent_state *state, int mirror);
 	int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end,
 				      struct extent_state *state, int uptodate);
 	void (*set_bit_hook)(struct inode *inode, struct extent_state *state,
@@ -135,7 +135,7 @@ struct extent_buffer {
 	spinlock_t refs_lock;
 	atomic_t refs;
 	atomic_t io_pages;
-	int failed_mirror;
+	int read_mirror;
 	struct list_head leak_list;
 	struct rcu_head rcu_head;
 	pid_t lock_owner;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index d83260d7498f..53bf2d764bbc 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -567,6 +567,7 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode,
 	int extent_type;
 	int recow;
 	int ret;
+	int modify_tree = -1;
 
 	if (drop_cache)
 		btrfs_drop_extent_cache(inode, start, end - 1, 0);
@@ -575,10 +576,13 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode,
 	if (!path)
 		return -ENOMEM;
 
+	if (start >= BTRFS_I(inode)->disk_i_size)
+		modify_tree = 0;
+
 	while (1) {
 		recow = 0;
 		ret = btrfs_lookup_file_extent(trans, root, path, ino,
-					       search_start, -1);
+					       search_start, modify_tree);
 		if (ret < 0)
 			break;
 		if (ret > 0 && path->slots[0] > 0 && search_start == start) {
@@ -634,7 +638,8 @@ next_slot:
 		}
 
 		search_start = max(key.offset, start);
-		if (recow) {
+		if (recow || !modify_tree) {
+			modify_tree = -1;
 			btrfs_release_path(path);
 			continue;
 		}
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 115bc05e42b0..61b16c641ce0 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1947,7 +1947,7 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
  * extent_io.c will try to find good copies for us.
  */
 static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
-			       struct extent_state *state)
+			       struct extent_state *state, int mirror)
 {
 	size_t offset = start - ((u64)page->index << PAGE_CACHE_SHIFT);
 	struct inode *inode = page->mapping->host;
@@ -4069,7 +4069,7 @@ static struct inode *new_simple_dir(struct super_block *s,
 	BTRFS_I(inode)->dummy_inode = 1;
 
 	inode->i_ino = BTRFS_EMPTY_SUBVOL_DIR_OBJECTID;
-	inode->i_op = &simple_dir_inode_operations;
+	inode->i_op = &btrfs_dir_ro_inode_operations;
 	inode->i_fop = &simple_dir_operations;
 	inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO;
 	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
@@ -4140,14 +4140,18 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
 static int btrfs_dentry_delete(const struct dentry *dentry)
 {
 	struct btrfs_root *root;
+	struct inode *inode = dentry->d_inode;
 
-	if (!dentry->d_inode && !IS_ROOT(dentry))
-		dentry = dentry->d_parent;
+	if (!inode && !IS_ROOT(dentry))
+		inode = dentry->d_parent->d_inode;
 
-	if (dentry->d_inode) {
-		root = BTRFS_I(dentry->d_inode)->root;
+	if (inode) {
+		root = BTRFS_I(inode)->root;
 		if (btrfs_root_refs(&root->root_item) == 0)
 			return 1;
+
+		if (btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
+			return 1;
 	}
 	return 0;
 }
@@ -4188,7 +4192,6 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
 	struct btrfs_path *path;
 	struct list_head ins_list;
 	struct list_head del_list;
-	struct qstr q;
 	int ret;
 	struct extent_buffer *leaf;
 	int slot;
@@ -4279,7 +4282,6 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
 
 		while (di_cur < di_total) {
 			struct btrfs_key location;
-			struct dentry *tmp;
 
 			if (verify_dir_item(root, leaf, di))
 				break;
@@ -4300,35 +4302,15 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
 			d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)];
 			btrfs_dir_item_key_to_cpu(leaf, di, &location);
 
-			q.name = name_ptr;
-			q.len = name_len;
-			q.hash = full_name_hash(q.name, q.len);
-			tmp = d_lookup(filp->f_dentry, &q);
-			if (!tmp) {
-				struct btrfs_key *newkey;
-
-				newkey = kzalloc(sizeof(struct btrfs_key),
-						 GFP_NOFS);
-				if (!newkey)
-					goto no_dentry;
-				tmp = d_alloc(filp->f_dentry, &q);
-				if (!tmp) {
-					kfree(newkey);
-					dput(tmp);
-					goto no_dentry;
-				}
-				memcpy(newkey, &location,
-				       sizeof(struct btrfs_key));
-				tmp->d_fsdata = newkey;
-				tmp->d_flags |= DCACHE_NEED_LOOKUP;
-				d_rehash(tmp);
-				dput(tmp);
-			} else {
-				dput(tmp);
-			}
-no_dentry:
+
 			/* is this a reference to our own snapshot? If so
-			 * skip it
+			 * skip it.
+			 *
+			 * In contrast to old kernels, we insert the snapshot's
+			 * dir item and dir index after it has been created, so
+			 * we won't find a reference to our own snapshot. We
+			 * still keep the following code for backward
+			 * compatibility.
 			 */
 			if (location.type == BTRFS_ROOT_ITEM_KEY &&
 			    location.objectid == root->root_key.objectid) {
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 18cc23d164a8..14f8e1faa46e 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -2262,7 +2262,10 @@ static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg)
 	di_args->bytes_used = dev->bytes_used;
 	di_args->total_bytes = dev->total_bytes;
 	memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid));
-	strncpy(di_args->path, dev->name, sizeof(di_args->path));
+	if (dev->name)
+		strncpy(di_args->path, dev->name, sizeof(di_args->path));
+	else
+		di_args->path[0] = '\0';
 
 out:
 	if (ret == 0 && copy_to_user(arg, di_args, sizeof(*di_args)))
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
index dc5d33146fdb..ac5d01085884 100644
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c
@@ -250,14 +250,12 @@ static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info,
 					  struct btrfs_bio *bbio)
 {
 	int ret;
-	int looped = 0;
 	struct reada_zone *zone;
 	struct btrfs_block_group_cache *cache = NULL;
 	u64 start;
 	u64 end;
 	int i;
 
-again:
 	zone = NULL;
 	spin_lock(&fs_info->reada_lock);
 	ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone,
@@ -274,9 +272,6 @@ again:
 		spin_unlock(&fs_info->reada_lock);
 	}
 
-	if (looped)
-		return NULL;
-
 	cache = btrfs_lookup_block_group(fs_info, logical);
 	if (!cache)
 		return NULL;
@@ -307,13 +302,15 @@ again:
 	ret = radix_tree_insert(&dev->reada_zones,
 				(unsigned long)(zone->end >> PAGE_CACHE_SHIFT),
 				zone);
-	spin_unlock(&fs_info->reada_lock);
 
-	if (ret) {
+	if (ret == -EEXIST) {
 		kfree(zone);
-		looped = 1;
-		goto again;
+		ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone,
+					     logical >> PAGE_CACHE_SHIFT, 1);
+		if (ret == 1)
+			kref_get(&zone->refcnt);
 	}
+	spin_unlock(&fs_info->reada_lock);
 
 	return zone;
 }
@@ -323,26 +320,26 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
 					      struct btrfs_key *top, int level)
 {
 	int ret;
-	int looped = 0;
 	struct reada_extent *re = NULL;
+	struct reada_extent *re_exist = NULL;
 	struct btrfs_fs_info *fs_info = root->fs_info;
 	struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
 	struct btrfs_bio *bbio = NULL;
 	struct btrfs_device *dev;
+	struct btrfs_device *prev_dev;
 	u32 blocksize;
 	u64 length;
 	int nzones = 0;
 	int i;
 	unsigned long index = logical >> PAGE_CACHE_SHIFT;
 
-again:
 	spin_lock(&fs_info->reada_lock);
 	re = radix_tree_lookup(&fs_info->reada_tree, index);
 	if (re)
 		kref_get(&re->refcnt);
 	spin_unlock(&fs_info->reada_lock);
 
-	if (re || looped)
+	if (re)
 		return re;
 
 	re = kzalloc(sizeof(*re), GFP_NOFS);
@@ -398,16 +395,31 @@ again:
 	/* insert extent in reada_tree + all per-device trees, all or nothing */
 	spin_lock(&fs_info->reada_lock);
 	ret = radix_tree_insert(&fs_info->reada_tree, index, re);
+	if (ret == -EEXIST) {
+		re_exist = radix_tree_lookup(&fs_info->reada_tree, index);
+		BUG_ON(!re_exist);
+		kref_get(&re_exist->refcnt);
+		spin_unlock(&fs_info->reada_lock);
+		goto error;
+	}
 	if (ret) {
 		spin_unlock(&fs_info->reada_lock);
-		if (ret != -ENOMEM) {
-			/* someone inserted the extent in the meantime */
-			looped = 1;
-		}
 		goto error;
 	}
+	prev_dev = NULL;
 	for (i = 0; i < nzones; ++i) {
 		dev = bbio->stripes[i].dev;
+		if (dev == prev_dev) {
+			/*
+			 * in case of DUP, just add the first zone. As both
+			 * are on the same device, there's nothing to gain
+			 * from adding both.
+			 * Also, it wouldn't work, as the tree is per device
+			 * and adding would fail with EEXIST
+			 */
+			continue;
+		}
+		prev_dev = dev;
 		ret = radix_tree_insert(&dev->reada_extents, index, re);
 		if (ret) {
 			while (--i >= 0) {
@@ -450,9 +462,7 @@ error:
 	}
 	kfree(bbio);
 	kfree(re);
-	if (looped)
-		goto again;
-	return NULL;
+	return re_exist;
 }
 
 static void reada_kref_dummy(struct kref *kr)
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 017281dbb2a7..646ee21bb035 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1279,7 +1279,9 @@ static int __update_reloc_root(struct btrfs_root *root, int del)
 		if (rb_node)
 			backref_tree_panic(rb_node, -EEXIST, node->bytenr);
 	} else {
+		spin_lock(&root->fs_info->trans_lock);
 		list_del_init(&root->root_list);
+		spin_unlock(&root->fs_info->trans_lock);
 		kfree(node);
 	}
 	return 0;
@@ -3811,7 +3813,7 @@ restart:
 
 		ret = btrfs_block_rsv_check(rc->extent_root, rc->block_rsv, 5);
 		if (ret < 0) {
-			if (ret != -EAGAIN) {
+			if (ret != -ENOSPC) {
 				err = ret;
 				WARN_ON(1);
 				break;
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index bc015f77f3ea..4f76fc3f8e89 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -1257,12 +1257,6 @@ static int scrub_checksum_data(struct scrub_block *sblock)
 	if (memcmp(csum, on_disk_csum, sdev->csum_size))
 		fail = 1;
 
-	if (fail) {
-		spin_lock(&sdev->stat_lock);
-		++sdev->stat.csum_errors;
-		spin_unlock(&sdev->stat_lock);
-	}
-
 	return fail;
 }
 
@@ -1335,15 +1329,6 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock)
 	if (memcmp(calculated_csum, on_disk_csum, sdev->csum_size))
 		++crc_fail;
 
-	if (crc_fail || fail) {
-		spin_lock(&sdev->stat_lock);
-		if (crc_fail)
-			++sdev->stat.csum_errors;
-		if (fail)
-			++sdev->stat.verify_errors;
-		spin_unlock(&sdev->stat_lock);
-	}
-
 	return fail || crc_fail;
 }
 
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 8d5d380f7bdb..c5f8fca4195f 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -815,7 +815,6 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
 		return 0;
 	}
 
-	btrfs_start_delalloc_inodes(root, 0);
 	btrfs_wait_ordered_extents(root, 0, 0);
 
 	trans = btrfs_start_transaction(root, 0);
@@ -1148,13 +1147,15 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
 		if (ret)
 			goto restore;
 	} else {
-		if (fs_info->fs_devices->rw_devices == 0)
+		if (fs_info->fs_devices->rw_devices == 0) {
 			ret = -EACCES;
 			goto restore;
+		}
 
-		if (btrfs_super_log_root(fs_info->super_copy) != 0)
+		if (btrfs_super_log_root(fs_info->super_copy) != 0) {
 			ret = -EINVAL;
 			goto restore;
+		}
 
 		ret = btrfs_cleanup_fs_roots(fs_info);
 		if (ret)
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 11b77a59db62..36422254ef67 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -73,8 +73,10 @@ loop:
 
 	cur_trans = root->fs_info->running_transaction;
 	if (cur_trans) {
-		if (cur_trans->aborted)
+		if (cur_trans->aborted) {
+			spin_unlock(&root->fs_info->trans_lock);
 			return cur_trans->aborted;
+		}
 		atomic_inc(&cur_trans->use_count);
 		atomic_inc(&cur_trans->num_writers);
 		cur_trans->num_joined++;
@@ -1400,6 +1402,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
 	ret = commit_fs_roots(trans, root);
 	if (ret) {
 		mutex_unlock(&root->fs_info->tree_log_mutex);
+		mutex_unlock(&root->fs_info->reloc_mutex);
 		goto cleanup_transaction;
 	}
 
@@ -1411,6 +1414,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
 	ret = commit_cowonly_roots(trans, root);
 	if (ret) {
 		mutex_unlock(&root->fs_info->tree_log_mutex);
+		mutex_unlock(&root->fs_info->reloc_mutex);
 		goto cleanup_transaction;
 	}
 
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 759d02486d7c..1411b99555a4 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -3324,12 +3324,14 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
 	stripe_size = devices_info[ndevs-1].max_avail;
 	num_stripes = ndevs * dev_stripes;
 
-	if (stripe_size * num_stripes > max_chunk_size * ncopies) {
+	if (stripe_size * ndevs > max_chunk_size * ncopies) {
 		stripe_size = max_chunk_size * ncopies;
-		do_div(stripe_size, num_stripes);
+		do_div(stripe_size, ndevs);
 	}
 
 	do_div(stripe_size, dev_stripes);
+
+	/* align to BTRFS_STRIPE_LEN */
 	do_div(stripe_size, BTRFS_STRIPE_LEN);
 	stripe_size *= BTRFS_STRIPE_LEN;
 
@@ -3805,10 +3807,11 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
 		else if (mirror_num)
 			stripe_index += mirror_num - 1;
 		else {
+			int old_stripe_index = stripe_index;
 			stripe_index = find_live_mirror(map, stripe_index,
 					      map->sub_stripes, stripe_index +
 					      current->pid % map->sub_stripes);
-			mirror_num = stripe_index + 1;
+			mirror_num = stripe_index - old_stripe_index + 1;
 		}
 	} else {
 		/*
@@ -4350,8 +4353,10 @@ static int open_seed_devices(struct btrfs_root *root, u8 *fsid)
 
 	ret = __btrfs_open_devices(fs_devices, FMODE_READ,
 				   root->fs_info->bdev_holder);
-	if (ret)
+	if (ret) {
+		free_fs_devices(fs_devices);
 		goto out;
+	}
 
 	if (!fs_devices->seeding) {
 		__btrfs_close_devices(fs_devices);
author	Linus Torvalds <torvalds@linux-foundation.org>	2012-04-28 18:30:07 +0200
committer	Linus Torvalds <torvalds@linux-foundation.org>	2012-04-28 18:30:07 +0200
commit	f7b006931751f029620ad2f8310ac7a1484fbdb4 (patch)
tree	71120f4c4c51752902317fbf853e3b0316c2adb0 /fs
parent	Merge tag 'fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/... (diff)
parent	Btrfs: reduce lock contention during extent insertion (diff)
download	linux-f7b006931751f029620ad2f8310ac7a1484fbdb4.tar.xz linux-f7b006931751f029620ad2f8310ac7a1484fbdb4.zip