summaryrefslogtreecommitdiffstats
path: root/fs/btrfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/acl.c4
-rw-r--r--fs/btrfs/async-thread.c4
-rw-r--r--fs/btrfs/ctree.h3
-rw-r--r--fs/btrfs/disk-io.c7
-rw-r--r--fs/btrfs/disk-io.h1
-rw-r--r--fs/btrfs/extent-tree.c31
-rw-r--r--fs/btrfs/extent_io.c125
-rw-r--r--fs/btrfs/extent_io.h126
-rw-r--r--fs/btrfs/file.c26
-rw-r--r--fs/btrfs/free-space-cache.c12
-rw-r--r--fs/btrfs/inode.c162
-rw-r--r--fs/btrfs/ioctl.c16
-rw-r--r--fs/btrfs/scrub.c2
-rw-r--r--fs/btrfs/transaction.c18
-rw-r--r--fs/btrfs/transaction.h2
-rw-r--r--fs/btrfs/tree-defrag.c27
-rw-r--r--fs/btrfs/volumes.c27
-rw-r--r--fs/btrfs/volumes.h1
-rw-r--r--fs/btrfs/xattr.c2
19 files changed, 332 insertions, 264 deletions
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 9a0124a95851..dbbb8ed53a51 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -48,7 +48,7 @@ struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
size = __btrfs_getxattr(inode, name, "", 0);
if (size > 0) {
- value = kzalloc(size, GFP_NOFS);
+ value = kzalloc(size, GFP_KERNEL);
if (!value)
return ERR_PTR(-ENOMEM);
size = __btrfs_getxattr(inode, name, value, size);
@@ -102,7 +102,7 @@ static int __btrfs_set_acl(struct btrfs_trans_handle *trans,
if (acl) {
size = posix_acl_xattr_size(acl->a_count);
- value = kmalloc(size, GFP_NOFS);
+ value = kmalloc(size, GFP_KERNEL);
if (!value) {
ret = -ENOMEM;
goto out;
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index 3e36e4adc4a3..88d9af3d4581 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -97,7 +97,7 @@ static struct __btrfs_workqueue *
__btrfs_alloc_workqueue(const char *name, unsigned int flags, int limit_active,
int thresh)
{
- struct __btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_NOFS);
+ struct __btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_KERNEL);
if (!ret)
return NULL;
@@ -148,7 +148,7 @@ struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name,
int limit_active,
int thresh)
{
- struct btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_NOFS);
+ struct btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_KERNEL);
if (!ret)
return NULL;
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index cf87979a153e..d79ba0570c55 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3977,7 +3977,6 @@ void btrfs_extent_item_to_extent_map(struct inode *inode,
/* inode.c */
struct btrfs_delalloc_work {
struct inode *inode;
- int wait;
int delay_iput;
struct completion completion;
struct list_head list;
@@ -3985,7 +3984,7 @@ struct btrfs_delalloc_work {
};
struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode,
- int wait, int delay_iput);
+ int delay_iput);
void btrfs_wait_and_free_delalloc_work(struct btrfs_delalloc_work *work);
struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *page,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 52e623f59848..823c1ce87e2e 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -363,7 +363,7 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
}
lock_extent_bits(io_tree, eb->start, eb->start + eb->len - 1,
- 0, &cached_state);
+ &cached_state);
if (extent_buffer_uptodate(eb) &&
btrfs_header_generation(eb) == parent_transid) {
ret = 0;
@@ -3940,11 +3940,6 @@ int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
return !ret;
}
-int btrfs_set_buffer_uptodate(struct extent_buffer *buf)
-{
- return set_extent_buffer_uptodate(buf);
-}
-
void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
{
struct btrfs_root *root;
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index adeb31830b9c..7c52e29fdb0d 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -116,7 +116,6 @@ static inline void btrfs_put_fs_root(struct btrfs_root *root)
void btrfs_mark_buffer_dirty(struct extent_buffer *buf);
int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
int atomic);
-int btrfs_set_buffer_uptodate(struct extent_buffer *buf);
int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid);
u32 btrfs_csum_data(char *data, u32 seed, size_t len);
void btrfs_csum_final(u32 crc, char *result);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 8abb344e3dcb..0617cb73669d 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3698,11 +3698,21 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
return -ENOMEM;
/*
- * We don't need the lock here since we are protected by the transaction
- * commit. We want to do the cache_save_setup first and then run the
+ * Even though we are in the critical section of the transaction commit,
+ * we can still have concurrent tasks adding elements to this
+ * transaction's list of dirty block groups. These tasks correspond to
+ * endio free space workers started when writeback finishes for a
+ * space cache, which run inode.c:btrfs_finish_ordered_io(), and can
+ * allocate new block groups as a result of COWing nodes of the root
+ * tree when updating the free space inode. The writeback for the space
+ * caches is triggered by an earlier call to
+ * btrfs_start_dirty_block_groups() and iterations of the following
+ * loop.
+ * Also we want to do the cache_save_setup first and then run the
* delayed refs to make sure we have the best chance at doing this all
* in one shot.
*/
+ spin_lock(&cur_trans->dirty_bgs_lock);
while (!list_empty(&cur_trans->dirty_bgs)) {
cache = list_first_entry(&cur_trans->dirty_bgs,
struct btrfs_block_group_cache,
@@ -3714,11 +3724,13 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
* finish and then do it all again
*/
if (!list_empty(&cache->io_list)) {
+ spin_unlock(&cur_trans->dirty_bgs_lock);
list_del_init(&cache->io_list);
btrfs_wait_cache_io(root, trans, cache,
&cache->io_ctl, path,
cache->key.objectid);
btrfs_put_block_group(cache);
+ spin_lock(&cur_trans->dirty_bgs_lock);
}
/*
@@ -3726,6 +3738,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
* on any pending IO
*/
list_del_init(&cache->dirty_list);
+ spin_unlock(&cur_trans->dirty_bgs_lock);
should_put = 1;
cache_save_setup(cache, trans, path);
@@ -3757,7 +3770,9 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
/* if its not on the io list, we need to put the block group */
if (should_put)
btrfs_put_block_group(cache);
+ spin_lock(&cur_trans->dirty_bgs_lock);
}
+ spin_unlock(&cur_trans->dirty_bgs_lock);
while (!list_empty(io)) {
cache = list_first_entry(io, struct btrfs_block_group_cache,
@@ -7865,7 +7880,7 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
clear_bit(EXTENT_BUFFER_STALE, &buf->bflags);
btrfs_set_lock_blocking(buf);
- btrfs_set_buffer_uptodate(buf);
+ set_extent_buffer_uptodate(buf);
if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
buf->log_index = root->log_transid % 2;
@@ -10521,11 +10536,15 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
* until transaction commit to do the actual discard.
*/
if (trimming) {
- WARN_ON(!list_empty(&block_group->bg_list));
- spin_lock(&trans->transaction->deleted_bgs_lock);
+ spin_lock(&fs_info->unused_bgs_lock);
+ /*
+ * A concurrent scrub might have added us to the list
+ * fs_info->unused_bgs, so use a list_move operation
+ * to add the block group to the deleted_bgs list.
+ */
list_move(&block_group->bg_list,
&trans->transaction->deleted_bgs);
- spin_unlock(&trans->transaction->deleted_bgs_lock);
+ spin_unlock(&fs_info->unused_bgs_lock);
btrfs_get_block_group(block_group);
}
end_trans:
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 2b3f26326565..a2356e261531 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1285,20 +1285,6 @@ search_again:
}
/* wrappers around set/clear extent bit */
-int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
- gfp_t mask)
-{
- return set_extent_bit(tree, start, end, EXTENT_DIRTY, NULL,
- NULL, mask);
-}
-
-int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
- unsigned bits, gfp_t mask)
-{
- return set_extent_bit(tree, start, end, bits, NULL,
- NULL, mask);
-}
-
int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
unsigned bits, gfp_t mask,
struct extent_changeset *changeset)
@@ -1323,17 +1309,6 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
cached, mask, NULL);
}
-int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
- unsigned bits, gfp_t mask)
-{
- int wake = 0;
-
- if (bits & EXTENT_LOCKED)
- wake = 1;
-
- return clear_extent_bit(tree, start, end, bits, wake, 0, NULL, mask);
-}
-
int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
unsigned bits, gfp_t mask,
struct extent_changeset *changeset)
@@ -1348,63 +1323,18 @@ int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
changeset);
}
-int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
- struct extent_state **cached_state, gfp_t mask)
-{
- return set_extent_bit(tree, start, end,
- EXTENT_DELALLOC | EXTENT_UPTODATE,
- NULL, cached_state, mask);
-}
-
-int set_extent_defrag(struct extent_io_tree *tree, u64 start, u64 end,
- struct extent_state **cached_state, gfp_t mask)
-{
- return set_extent_bit(tree, start, end,
- EXTENT_DELALLOC | EXTENT_UPTODATE | EXTENT_DEFRAG,
- NULL, cached_state, mask);
-}
-
-int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
- gfp_t mask)
-{
- return clear_extent_bit(tree, start, end,
- EXTENT_DIRTY | EXTENT_DELALLOC |
- EXTENT_DO_ACCOUNTING, 0, 0, NULL, mask);
-}
-
-int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
- gfp_t mask)
-{
- return set_extent_bit(tree, start, end, EXTENT_NEW, NULL,
- NULL, mask);
-}
-
-int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
- struct extent_state **cached_state, gfp_t mask)
-{
- return set_extent_bit(tree, start, end, EXTENT_UPTODATE, NULL,
- cached_state, mask);
-}
-
-int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
- struct extent_state **cached_state, gfp_t mask)
-{
- return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0,
- cached_state, mask);
-}
-
/*
* either insert or lock state struct between start and end use mask to tell
* us if waiting is desired.
*/
int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
- unsigned bits, struct extent_state **cached_state)
+ struct extent_state **cached_state)
{
int err;
u64 failed_start;
while (1) {
- err = __set_extent_bit(tree, start, end, EXTENT_LOCKED | bits,
+ err = __set_extent_bit(tree, start, end, EXTENT_LOCKED,
EXTENT_LOCKED, &failed_start,
cached_state, GFP_NOFS, NULL);
if (err == -EEXIST) {
@@ -1417,11 +1347,6 @@ int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
return err;
}
-int lock_extent(struct extent_io_tree *tree, u64 start, u64 end)
-{
- return lock_extent_bits(tree, start, end, 0, NULL);
-}
-
int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end)
{
int err;
@@ -1438,20 +1363,7 @@ int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end)
return 1;
}
-int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end,
- struct extent_state **cached, gfp_t mask)
-{
- return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, cached,
- mask);
-}
-
-int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end)
-{
- return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL,
- GFP_NOFS);
-}
-
-int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end)
+void extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end)
{
unsigned long index = start >> PAGE_CACHE_SHIFT;
unsigned long end_index = end >> PAGE_CACHE_SHIFT;
@@ -1464,10 +1376,9 @@ int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end)
page_cache_release(page);
index++;
}
- return 0;
}
-int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end)
+void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end)
{
unsigned long index = start >> PAGE_CACHE_SHIFT;
unsigned long end_index = end >> PAGE_CACHE_SHIFT;
@@ -1481,13 +1392,12 @@ int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end)
page_cache_release(page);
index++;
}
- return 0;
}
/*
* helper function to set both pages and extents in the tree writeback
*/
-static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
+static void set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
{
unsigned long index = start >> PAGE_CACHE_SHIFT;
unsigned long end_index = end >> PAGE_CACHE_SHIFT;
@@ -1500,7 +1410,6 @@ static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
page_cache_release(page);
index++;
}
- return 0;
}
/* find the first state struct with 'bits' set after 'start', and
@@ -1800,7 +1709,7 @@ again:
BUG_ON(ret); /* Only valid values are 0 and -EAGAIN */
/* step three, lock the state bits for the whole range */
- lock_extent_bits(tree, delalloc_start, delalloc_end, 0, &cached_state);
+ lock_extent_bits(tree, delalloc_start, delalloc_end, &cached_state);
/* then test to make sure it is all still delalloc */
ret = test_range_bit(tree, delalloc_start, delalloc_end,
@@ -1820,7 +1729,7 @@ out_failed:
return found;
}
-int extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
+void extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
struct page *locked_page,
unsigned clear_bits,
unsigned long page_ops)
@@ -1835,7 +1744,7 @@ int extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
clear_extent_bit(tree, start, end, clear_bits, 1, 0, NULL, GFP_NOFS);
if (page_ops == 0)
- return 0;
+ return;
if ((page_ops & PAGE_SET_ERROR) && nr_pages > 0)
mapping_set_error(inode->i_mapping, -EIO);
@@ -1869,7 +1778,6 @@ int extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
index += ret;
cond_resched();
}
- return 0;
}
/*
@@ -2516,7 +2424,7 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
/* lots and lots of room for performance fixes in the end_bio funcs */
-int end_extent_writepage(struct page *page, int err, u64 start, u64 end)
+void end_extent_writepage(struct page *page, int err, u64 start, u64 end)
{
int uptodate = (err == 0);
struct extent_io_tree *tree;
@@ -2537,7 +2445,6 @@ int end_extent_writepage(struct page *page, int err, u64 start, u64 end)
ret = ret < 0 ? ret : -EIO;
mapping_set_error(page->mapping, ret);
}
- return 0;
}
/*
@@ -2579,9 +2486,7 @@ static void end_bio_extent_writepage(struct bio *bio)
start = page_offset(page);
end = start + bvec->bv_offset + bvec->bv_len - 1;
- if (end_extent_writepage(page, bio->bi_error, start, end))
- continue;
-
+ end_extent_writepage(page, bio->bi_error, start, end);
end_page_writeback(page);
}
@@ -4326,7 +4231,7 @@ int extent_invalidatepage(struct extent_io_tree *tree,
if (start > end)
return 0;
- lock_extent_bits(tree, start, end, 0, &cached_state);
+ lock_extent_bits(tree, start, end, &cached_state);
wait_on_page_writeback(page);
clear_extent_bit(tree, start, end,
EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC |
@@ -4536,7 +4441,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
last_for_get_extent = isize;
}
- lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len - 1, 0,
+ lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len - 1,
&cached_state);
em = get_extent_skip_holes(inode, start, last_for_get_extent,
@@ -5235,7 +5140,7 @@ int set_extent_buffer_dirty(struct extent_buffer *eb)
return was_dirty;
}
-int clear_extent_buffer_uptodate(struct extent_buffer *eb)
+void clear_extent_buffer_uptodate(struct extent_buffer *eb)
{
unsigned long i;
struct page *page;
@@ -5248,10 +5153,9 @@ int clear_extent_buffer_uptodate(struct extent_buffer *eb)
if (page)
ClearPageUptodate(page);
}
- return 0;
}
-int set_extent_buffer_uptodate(struct extent_buffer *eb)
+void set_extent_buffer_uptodate(struct extent_buffer *eb)
{
unsigned long i;
struct page *page;
@@ -5263,7 +5167,6 @@ int set_extent_buffer_uptodate(struct extent_buffer *eb)
page = eb->pages[i];
SetPageUptodate(page);
}
- return 0;
}
int extent_buffer_uptodate(struct extent_buffer *eb)
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 350c8b0a8582..0377413bd4b9 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -199,12 +199,14 @@ int try_release_extent_mapping(struct extent_map_tree *map,
struct extent_io_tree *tree, struct page *page,
gfp_t mask);
int try_release_extent_buffer(struct page *page);
-int lock_extent(struct extent_io_tree *tree, u64 start, u64 end);
int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
- unsigned bits, struct extent_state **cached);
-int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end);
-int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end,
- struct extent_state **cached, gfp_t mask);
+ struct extent_state **cached);
+
+static inline int lock_extent(struct extent_io_tree *tree, u64 start, u64 end)
+{
+ return lock_extent_bits(tree, start, end, NULL);
+}
+
int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end);
int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
get_extent_t *get_extent, int mirror_num);
@@ -221,39 +223,105 @@ void free_extent_state(struct extent_state *state);
int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
unsigned bits, int filled,
struct extent_state *cached_state);
-int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
- unsigned bits, gfp_t mask);
int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
unsigned bits, gfp_t mask,
struct extent_changeset *changeset);
int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
unsigned bits, int wake, int delete,
struct extent_state **cached, gfp_t mask);
-int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
- unsigned bits, gfp_t mask);
+
+static inline int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end)
+{
+ return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL,
+ GFP_NOFS);
+}
+
+static inline int unlock_extent_cached(struct extent_io_tree *tree, u64 start,
+ u64 end, struct extent_state **cached, gfp_t mask)
+{
+ return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, cached,
+ mask);
+}
+
+static inline int clear_extent_bits(struct extent_io_tree *tree, u64 start,
+ u64 end, unsigned bits, gfp_t mask)
+{
+ int wake = 0;
+
+ if (bits & EXTENT_LOCKED)
+ wake = 1;
+
+ return clear_extent_bit(tree, start, end, bits, wake, 0, NULL, mask);
+}
+
int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
unsigned bits, gfp_t mask,
struct extent_changeset *changeset);
int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
unsigned bits, u64 *failed_start,
struct extent_state **cached_state, gfp_t mask);
-int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
- struct extent_state **cached_state, gfp_t mask);
-int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
- struct extent_state **cached_state, gfp_t mask);
-int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
- gfp_t mask);
-int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
- gfp_t mask);
-int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
- gfp_t mask);
+
+static inline int set_extent_bits(struct extent_io_tree *tree, u64 start,
+ u64 end, unsigned bits, gfp_t mask)
+{
+ return set_extent_bit(tree, start, end, bits, NULL, NULL, mask);
+}
+
+static inline int clear_extent_uptodate(struct extent_io_tree *tree, u64 start,
+ u64 end, struct extent_state **cached_state, gfp_t mask)
+{
+ return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0,
+ cached_state, mask);
+}
+
+static inline int set_extent_dirty(struct extent_io_tree *tree, u64 start,
+ u64 end, gfp_t mask)
+{
+ return set_extent_bit(tree, start, end, EXTENT_DIRTY, NULL,
+ NULL, mask);
+}
+
+static inline int clear_extent_dirty(struct extent_io_tree *tree, u64 start,
+ u64 end, gfp_t mask)
+{
+ return clear_extent_bit(tree, start, end,
+ EXTENT_DIRTY | EXTENT_DELALLOC |
+ EXTENT_DO_ACCOUNTING, 0, 0, NULL, mask);
+}
+
int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
unsigned bits, unsigned clear_bits,
struct extent_state **cached_state, gfp_t mask);
-int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
- struct extent_state **cached_state, gfp_t mask);
-int set_extent_defrag(struct extent_io_tree *tree, u64 start, u64 end,
- struct extent_state **cached_state, gfp_t mask);
+
+static inline int set_extent_delalloc(struct extent_io_tree *tree, u64 start,
+ u64 end, struct extent_state **cached_state, gfp_t mask)
+{
+ return set_extent_bit(tree, start, end,
+ EXTENT_DELALLOC | EXTENT_UPTODATE,
+ NULL, cached_state, mask);
+}
+
+static inline int set_extent_defrag(struct extent_io_tree *tree, u64 start,
+ u64 end, struct extent_state **cached_state, gfp_t mask)
+{
+ return set_extent_bit(tree, start, end,
+ EXTENT_DELALLOC | EXTENT_UPTODATE | EXTENT_DEFRAG,
+ NULL, cached_state, mask);
+}
+
+static inline int set_extent_new(struct extent_io_tree *tree, u64 start,
+ u64 end, gfp_t mask)
+{
+ return set_extent_bit(tree, start, end, EXTENT_NEW, NULL, NULL, mask);
+}
+
+static inline int set_extent_uptodate(struct extent_io_tree *tree, u64 start,
+ u64 end, struct extent_state **cached_state, gfp_t mask)
+{
+ return set_extent_bit(tree, start, end, EXTENT_UPTODATE, NULL,
+ cached_state, mask);
+}
+
int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
u64 *start_ret, u64 *end_ret, unsigned bits,
struct extent_state **cached_state);
@@ -338,17 +406,17 @@ void extent_buffer_bitmap_clear(struct extent_buffer *eb, unsigned long start,
unsigned long pos, unsigned long len);
void clear_extent_buffer_dirty(struct extent_buffer *eb);
int set_extent_buffer_dirty(struct extent_buffer *eb);
-int set_extent_buffer_uptodate(struct extent_buffer *eb);
-int clear_extent_buffer_uptodate(struct extent_buffer *eb);
+void set_extent_buffer_uptodate(struct extent_buffer *eb);
+void clear_extent_buffer_uptodate(struct extent_buffer *eb);
int extent_buffer_uptodate(struct extent_buffer *eb);
int extent_buffer_under_io(struct extent_buffer *eb);
int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset,
unsigned long min_len, char **map,
unsigned long *map_start,
unsigned long *map_len);
-int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end);
-int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end);
-int extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
+void extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end);
+void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end);
+void extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
struct page *locked_page,
unsigned bits_to_clear,
unsigned long page_ops);
@@ -365,7 +433,7 @@ int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical,
int mirror_num);
int clean_io_failure(struct inode *inode, u64 start, struct page *page,
unsigned int pg_offset);
-int end_extent_writepage(struct page *page, int err, u64 start, u64 end);
+void end_extent_writepage(struct page *page, int err, u64 start, u64 end);
int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb,
int mirror_num);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 72e73461c064..364e0f1f61f6 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1291,7 +1291,8 @@ out:
* on error we return an unlocked page and the error value
* on success we return a locked page and 0
*/
-static int prepare_uptodate_page(struct page *page, u64 pos,
+static int prepare_uptodate_page(struct inode *inode,
+ struct page *page, u64 pos,
bool force_uptodate)
{
int ret = 0;
@@ -1306,6 +1307,10 @@ static int prepare_uptodate_page(struct page *page, u64 pos,
unlock_page(page);
return -EIO;
}
+ if (page->mapping != inode->i_mapping) {
+ unlock_page(page);
+ return -EAGAIN;
+ }
}
return 0;
}
@@ -1324,6 +1329,7 @@ static noinline int prepare_pages(struct inode *inode, struct page **pages,
int faili;
for (i = 0; i < num_pages; i++) {
+again:
pages[i] = find_or_create_page(inode->i_mapping, index + i,
mask | __GFP_WRITE);
if (!pages[i]) {
@@ -1333,13 +1339,17 @@ static noinline int prepare_pages(struct inode *inode, struct page **pages,
}
if (i == 0)
- err = prepare_uptodate_page(pages[i], pos,
+ err = prepare_uptodate_page(inode, pages[i], pos,
force_uptodate);
- if (i == num_pages - 1)
- err = prepare_uptodate_page(pages[i],
+ if (!err && i == num_pages - 1)
+ err = prepare_uptodate_page(inode, pages[i],
pos + write_bytes, false);
if (err) {
page_cache_release(pages[i]);
+ if (err == -EAGAIN) {
+ err = 0;
+ goto again;
+ }
faili = i - 1;
goto fail;
}
@@ -1384,7 +1394,7 @@ lock_and_cleanup_extent_if_need(struct inode *inode, struct page **pages,
if (start_pos < inode->i_size) {
struct btrfs_ordered_extent *ordered;
lock_extent_bits(&BTRFS_I(inode)->io_tree,
- start_pos, last_pos, 0, cached_state);
+ start_pos, last_pos, cached_state);
ordered = btrfs_lookup_ordered_range(inode, start_pos,
last_pos - start_pos + 1);
if (ordered &&
@@ -2388,7 +2398,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
truncate_pagecache_range(inode, lockstart, lockend);
lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
- 0, &cached_state);
+ &cached_state);
ordered = btrfs_lookup_first_ordered_extent(inode, lockend);
/*
@@ -2695,7 +2705,7 @@ static long btrfs_fallocate(struct file *file, int mode,
* transaction
*/
lock_extent_bits(&BTRFS_I(inode)->io_tree, alloc_start,
- locked_end, 0, &cached_state);
+ locked_end, &cached_state);
ordered = btrfs_lookup_first_ordered_extent(inode,
alloc_end - 1);
if (ordered &&
@@ -2842,7 +2852,7 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int whence)
lockend--;
len = lockend - lockstart + 1;
- lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, 0,
+ lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
&cached_state);
while (start < inode->i_size) {
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 85a1f8621b51..7c5992d0fd31 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -891,7 +891,7 @@ out:
spin_unlock(&block_group->lock);
ret = 0;
- btrfs_warn(fs_info, "failed to load free space cache for block group %llu, rebuild it now",
+ btrfs_warn(fs_info, "failed to load free space cache for block group %llu, rebuilding it now",
block_group->key.objectid);
}
@@ -1261,7 +1261,7 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
goto out;
lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
- 0, &cached_state);
+ &cached_state);
io_ctl_set_generation(io_ctl, trans->transid);
@@ -2972,7 +2972,7 @@ setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
u64 cont1_bytes, u64 min_bytes)
{
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
- struct btrfs_free_space *entry;
+ struct btrfs_free_space *entry = NULL;
int ret = -ENOSPC;
u64 bitmap_offset = offset_to_bitmap(ctl, offset);
@@ -2983,8 +2983,10 @@ setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
* The bitmap that covers offset won't be in the list unless offset
* is just its start offset.
*/
- entry = list_first_entry(bitmaps, struct btrfs_free_space, list);
- if (entry->offset != bitmap_offset) {
+ if (!list_empty(bitmaps))
+ entry = list_first_entry(bitmaps, struct btrfs_free_space, list);
+
+ if (!entry || entry->offset != bitmap_offset) {
entry = tree_search_offset(ctl, bitmap_offset, 1, 0);
if (entry && list_empty(&entry->list))
list_add(&entry->list, bitmaps);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index a70c5790f8f5..bdb0008712c8 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -66,6 +66,13 @@ struct btrfs_iget_args {
struct btrfs_root *root;
};
+struct btrfs_dio_data {
+ u64 outstanding_extents;
+ u64 reserve;
+ u64 unsubmitted_oe_range_start;
+ u64 unsubmitted_oe_range_end;
+};
+
static const struct inode_operations btrfs_dir_inode_operations;
static const struct inode_operations btrfs_symlink_inode_operations;
static const struct inode_operations btrfs_dir_ro_inode_operations;
@@ -1989,7 +1996,7 @@ again:
page_start = page_offset(page);
page_end = page_offset(page) + PAGE_CACHE_SIZE - 1;
- lock_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end, 0,
+ lock_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end,
&cached_state);
/* already ordered? We're done */
@@ -2482,7 +2489,7 @@ static noinline int relink_extent_backref(struct btrfs_path *path,
lock_start = backref->file_pos;
lock_end = backref->file_pos + backref->num_bytes - 1;
lock_extent_bits(&BTRFS_I(inode)->io_tree, lock_start, lock_end,
- 0, &cached);
+ &cached);
ordered = btrfs_lookup_first_ordered_extent(inode, lock_end);
if (ordered) {
@@ -2874,7 +2881,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
lock_extent_bits(io_tree, ordered_extent->file_offset,
ordered_extent->file_offset + ordered_extent->len - 1,
- 0, &cached_state);
+ &cached_state);
ret = test_range_bit(io_tree, ordered_extent->file_offset,
ordered_extent->file_offset + ordered_extent->len - 1,
@@ -4668,7 +4675,7 @@ again:
}
wait_on_page_writeback(page);
- lock_extent_bits(io_tree, page_start, page_end, 0, &cached_state);
+ lock_extent_bits(io_tree, page_start, page_end, &cached_state);
set_page_extent_mapped(page);
ordered = btrfs_lookup_ordered_extent(inode, page_start);
@@ -4799,7 +4806,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
while (1) {
struct btrfs_ordered_extent *ordered;
- lock_extent_bits(io_tree, hole_start, block_end - 1, 0,
+ lock_extent_bits(io_tree, hole_start, block_end - 1,
&cached_state);
ordered = btrfs_lookup_ordered_range(inode, hole_start,
block_end - hole_start);
@@ -5111,7 +5118,7 @@ static void evict_inode_truncate_pages(struct inode *inode)
end = state->end;
spin_unlock(&io_tree->lock);
- lock_extent_bits(io_tree, start, end, 0, &cached_state);
+ lock_extent_bits(io_tree, start, end, &cached_state);
/*
* If still has DELALLOC flag, the extent didn't reach disk,
@@ -7380,7 +7387,7 @@ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
while (1) {
lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
- 0, cached_state);
+ cached_state);
/*
* We're concerned with the entire range that we're going to be
* doing DIO to, so we need to make sure theres no ordered
@@ -7408,25 +7415,21 @@ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
btrfs_start_ordered_extent(inode, ordered, 1);
btrfs_put_ordered_extent(ordered);
} else {
- /* Screw you mmap */
- ret = btrfs_fdatawrite_range(inode, lockstart, lockend);
- if (ret)
- break;
- ret = filemap_fdatawait_range(inode->i_mapping,
- lockstart,
- lockend);
- if (ret)
- break;
-
/*
- * If we found a page that couldn't be invalidated just
- * fall back to buffered.
+ * We could trigger writeback for this range (and wait
+ * for it to complete) and then invalidate the pages for
+ * this range (through invalidate_inode_pages2_range()),
+ * but that can lead us to a deadlock with a concurrent
+ * call to readpages() (a buffered read or a defrag call
+ * triggered a readahead) on a page lock due to an
+ * ordered dio extent we created before but did not have
+ * yet a corresponding bio submitted (whence it can not
+ * complete), which makes readpages() wait for that
+ * ordered extent to complete while holding a lock on
+ * that page.
*/
- ret = invalidate_inode_pages2_range(inode->i_mapping,
- lockstart >> PAGE_CACHE_SHIFT,
- lockend >> PAGE_CACHE_SHIFT);
- if (ret)
- break;
+ ret = -ENOTBLK;
+ break;
}
cond_resched();
@@ -7482,11 +7485,6 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
return em;
}
-struct btrfs_dio_data {
- u64 outstanding_extents;
- u64 reserve;
-};
-
static void adjust_dio_outstanding_extents(struct inode *inode,
struct btrfs_dio_data *dio_data,
const u64 len)
@@ -7670,6 +7668,7 @@ unlock:
btrfs_free_reserved_data_space(inode, start, len);
WARN_ON(dio_data->reserve < len);
dio_data->reserve -= len;
+ dio_data->unsubmitted_oe_range_end = start + len;
current->journal_info = dio_data;
}
@@ -7992,22 +7991,22 @@ static void btrfs_endio_direct_read(struct bio *bio)
bio_put(bio);
}
-static void btrfs_endio_direct_write(struct bio *bio)
+static void btrfs_endio_direct_write_update_ordered(struct inode *inode,
+ const u64 offset,
+ const u64 bytes,
+ const int uptodate)
{
- struct btrfs_dio_private *dip = bio->bi_private;
- struct inode *inode = dip->inode;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_ordered_extent *ordered = NULL;
- u64 ordered_offset = dip->logical_offset;
- u64 ordered_bytes = dip->bytes;
- struct bio *dio_bio;
+ u64 ordered_offset = offset;
+ u64 ordered_bytes = bytes;
int ret;
again:
ret = btrfs_dec_test_first_ordered_pending(inode, &ordered,
&ordered_offset,
ordered_bytes,
- !bio->bi_error);
+ uptodate);
if (!ret)
goto out_test;
@@ -8020,13 +8019,22 @@ out_test:
* our bio might span multiple ordered extents. If we haven't
* completed the accounting for the whole dio, go back and try again
*/
- if (ordered_offset < dip->logical_offset + dip->bytes) {
- ordered_bytes = dip->logical_offset + dip->bytes -
- ordered_offset;
+ if (ordered_offset < offset + bytes) {
+ ordered_bytes = offset + bytes - ordered_offset;
ordered = NULL;
goto again;
}
- dio_bio = dip->dio_bio;
+}
+
+static void btrfs_endio_direct_write(struct bio *bio)
+{
+ struct btrfs_dio_private *dip = bio->bi_private;
+ struct bio *dio_bio = dip->dio_bio;
+
+ btrfs_endio_direct_write_update_ordered(dip->inode,
+ dip->logical_offset,
+ dip->bytes,
+ !bio->bi_error);
kfree(dip);
@@ -8334,6 +8342,21 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio,
dip->subio_endio = btrfs_subio_endio_read;
}
+ /*
+ * Reset the range for unsubmitted ordered extents (to a 0 length range)
+ * even if we fail to submit a bio, because in such case we do the
+ * corresponding error handling below and it must not be done a second
+ * time by btrfs_direct_IO().
+ */
+ if (write) {
+ struct btrfs_dio_data *dio_data = current->journal_info;
+
+ dio_data->unsubmitted_oe_range_end = dip->logical_offset +
+ dip->bytes;
+ dio_data->unsubmitted_oe_range_start =
+ dio_data->unsubmitted_oe_range_end;
+ }
+
ret = btrfs_submit_direct_hook(rw, dip, skip_sum);
if (!ret)
return;
@@ -8362,24 +8385,15 @@ free_ordered:
dip = NULL;
io_bio = NULL;
} else {
- if (write) {
- struct btrfs_ordered_extent *ordered;
-
- ordered = btrfs_lookup_ordered_extent(inode,
- file_offset);
- set_bit(BTRFS_ORDERED_IOERR, &ordered->flags);
- /*
- * Decrements our ref on the ordered extent and removes
- * the ordered extent from the inode's ordered tree,
- * doing all the proper resource cleanup such as for the
- * reserved space and waking up any waiters for this
- * ordered extent (through btrfs_remove_ordered_extent).
- */
- btrfs_finish_ordered_io(ordered);
- } else {
+ if (write)
+ btrfs_endio_direct_write_update_ordered(inode,
+ file_offset,
+ dio_bio->bi_iter.bi_size,
+ 0);
+ else
unlock_extent(&BTRFS_I(inode)->io_tree, file_offset,
file_offset + dio_bio->bi_iter.bi_size - 1);
- }
+
dio_bio->bi_error = -EIO;
/*
* Releases and cleans up our dio_bio, no need to bio_put()
@@ -8479,6 +8493,8 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
* originally calculated. Abuse current->journal_info for this.
*/
dio_data.reserve = round_up(count, root->sectorsize);
+ dio_data.unsubmitted_oe_range_start = (u64)offset;
+ dio_data.unsubmitted_oe_range_end = (u64)offset;
current->journal_info = &dio_data;
} else if (test_bit(BTRFS_INODE_READDIO_NEED_LOCK,
&BTRFS_I(inode)->runtime_flags)) {
@@ -8497,6 +8513,19 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
if (dio_data.reserve)
btrfs_delalloc_release_space(inode, offset,
dio_data.reserve);
+ /*
+ * On error we might have left some ordered extents
+ * without submitting corresponding bios for them, so
+ * cleanup them up to avoid other tasks getting them
+ * and waiting for them to complete forever.
+ */
+ if (dio_data.unsubmitted_oe_range_start <
+ dio_data.unsubmitted_oe_range_end)
+ btrfs_endio_direct_write_update_ordered(inode,
+ dio_data.unsubmitted_oe_range_start,
+ dio_data.unsubmitted_oe_range_end -
+ dio_data.unsubmitted_oe_range_start,
+ 0);
} else if (ret >= 0 && (size_t)ret < count)
btrfs_delalloc_release_space(inode, offset,
count - (size_t)ret);
@@ -8614,7 +8643,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
}
if (!inode_evicting)
- lock_extent_bits(tree, page_start, page_end, 0, &cached_state);
+ lock_extent_bits(tree, page_start, page_end, &cached_state);
ordered = btrfs_lookup_ordered_extent(inode, page_start);
if (ordered) {
/*
@@ -8652,7 +8681,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
btrfs_put_ordered_extent(ordered);
if (!inode_evicting) {
cached_state = NULL;
- lock_extent_bits(tree, page_start, page_end, 0,
+ lock_extent_bits(tree, page_start, page_end,
&cached_state);
}
}
@@ -8750,7 +8779,7 @@ again:
}
wait_on_page_writeback(page);
- lock_extent_bits(io_tree, page_start, page_end, 0, &cached_state);
+ lock_extent_bits(io_tree, page_start, page_end, &cached_state);
set_page_extent_mapped(page);
/*
@@ -9418,14 +9447,10 @@ static void btrfs_run_delalloc_work(struct btrfs_work *work)
delalloc_work = container_of(work, struct btrfs_delalloc_work,
work);
inode = delalloc_work->inode;
- if (delalloc_work->wait) {
- btrfs_wait_ordered_range(inode, 0, (u64)-1);
- } else {
+ filemap_flush(inode->i_mapping);
+ if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
+ &BTRFS_I(inode)->runtime_flags))
filemap_flush(inode->i_mapping);
- if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
- &BTRFS_I(inode)->runtime_flags))
- filemap_flush(inode->i_mapping);
- }
if (delalloc_work->delay_iput)
btrfs_add_delayed_iput(inode);
@@ -9435,7 +9460,7 @@ static void btrfs_run_delalloc_work(struct btrfs_work *work)
}
struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode,
- int wait, int delay_iput)
+ int delay_iput)
{
struct btrfs_delalloc_work *work;
@@ -9446,7 +9471,6 @@ struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode,
init_completion(&work->completion);
INIT_LIST_HEAD(&work->list);
work->inode = inode;
- work->wait = wait;
work->delay_iput = delay_iput;
WARN_ON_ONCE(!inode);
btrfs_init_work(&work->work, btrfs_flush_delalloc_helper,
@@ -9494,7 +9518,7 @@ static int __start_delalloc_inodes(struct btrfs_root *root, int delay_iput,
}
spin_unlock(&root->delalloc_lock);
- work = btrfs_alloc_delalloc_work(inode, 0, delay_iput);
+ work = btrfs_alloc_delalloc_work(inode, delay_iput);
if (!work) {
if (delay_iput)
btrfs_add_delayed_iput(inode);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index da94138eb85e..8bbecdaf58a3 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -992,7 +992,7 @@ static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start)
u64 end = start + len - 1;
/* get the big lock and read metadata off disk */
- lock_extent_bits(io_tree, start, end, 0, &cached);
+ lock_extent_bits(io_tree, start, end, &cached);
em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
unlock_extent_cached(io_tree, start, end, &cached, GFP_NOFS);
@@ -1140,7 +1140,7 @@ again:
page_end = page_start + PAGE_CACHE_SIZE - 1;
while (1) {
lock_extent_bits(tree, page_start, page_end,
- 0, &cached_state);
+ &cached_state);
ordered = btrfs_lookup_ordered_extent(inode,
page_start);
unlock_extent_cached(tree, page_start, page_end,
@@ -1200,7 +1200,7 @@ again:
page_end = page_offset(pages[i_done - 1]) + PAGE_CACHE_SIZE;
lock_extent_bits(&BTRFS_I(inode)->io_tree,
- page_start, page_end - 1, 0, &cached_state);
+ page_start, page_end - 1, &cached_state);
clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start,
page_end - 1, EXTENT_DIRTY | EXTENT_DELALLOC |
EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 0, 0,
@@ -4147,7 +4147,7 @@ static long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
return -ENOMEM;
space_args.total_spaces = 0;
- dest = kmalloc(alloc_size, GFP_NOFS);
+ dest = kmalloc(alloc_size, GFP_KERNEL);
if (!dest)
return -ENOMEM;
dest_orig = dest;
@@ -4673,7 +4673,7 @@ locked:
goto out_bargs;
}
- bctl = kzalloc(sizeof(*bctl), GFP_NOFS);
+ bctl = kzalloc(sizeof(*bctl), GFP_KERNEL);
if (!bctl) {
ret = -ENOMEM;
goto out_bargs;
@@ -4759,7 +4759,7 @@ static long btrfs_ioctl_balance_progress(struct btrfs_root *root,
goto out;
}
- bargs = kzalloc(sizeof(*bargs), GFP_NOFS);
+ bargs = kzalloc(sizeof(*bargs), GFP_KERNEL);
if (!bargs) {
ret = -ENOMEM;
goto out;
@@ -5019,7 +5019,7 @@ static long btrfs_ioctl_quota_rescan_status(struct file *file, void __user *arg)
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- qsa = kzalloc(sizeof(*qsa), GFP_NOFS);
+ qsa = kzalloc(sizeof(*qsa), GFP_KERNEL);
if (!qsa)
return -ENOMEM;
@@ -5149,7 +5149,7 @@ static long btrfs_ioctl_set_received_subvol_32(struct file *file,
goto out;
}
- args64 = kmalloc(sizeof(*args64), GFP_NOFS);
+ args64 = kmalloc(sizeof(*args64), GFP_KERNEL);
if (!args64) {
ret = -ENOMEM;
goto out;
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index b091d94ceef6..69987150ddfa 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -4211,7 +4211,7 @@ static int check_extent_to_block(struct inode *inode, u64 start, u64 len,
io_tree = &BTRFS_I(inode)->io_tree;
- lock_extent_bits(io_tree, lockstart, lockend, 0, &cached_state);
+ lock_extent_bits(io_tree, lockstart, lockend, &cached_state);
ordered = btrfs_lookup_ordered_range(inode, lockstart, len);
if (ordered) {
btrfs_put_ordered_extent(ordered);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 3367a3c6f214..f85ccf634ca1 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -75,6 +75,23 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction)
list_del_init(&em->list);
free_extent_map(em);
}
+ /*
+ * If any block groups are found in ->deleted_bgs then it's
+ * because the transaction was aborted and a commit did not
+ * happen (things failed before writing the new superblock
+ * and calling btrfs_finish_extent_commit()), so we can not
+ * discard the physical locations of the block groups.
+ */
+ while (!list_empty(&transaction->deleted_bgs)) {
+ struct btrfs_block_group_cache *cache;
+
+ cache = list_first_entry(&transaction->deleted_bgs,
+ struct btrfs_block_group_cache,
+ bg_list);
+ list_del_init(&cache->bg_list);
+ btrfs_put_block_group_trimming(cache);
+ btrfs_put_block_group(cache);
+ }
kmem_cache_free(btrfs_transaction_cachep, transaction);
}
}
@@ -274,7 +291,6 @@ loop:
cur_trans->num_dirty_bgs = 0;
spin_lock_init(&cur_trans->dirty_bgs_lock);
INIT_LIST_HEAD(&cur_trans->deleted_bgs);
- spin_lock_init(&cur_trans->deleted_bgs_lock);
spin_lock_init(&cur_trans->dropped_roots_lock);
list_add_tail(&cur_trans->list, &fs_info->trans_list);
extent_io_tree_init(&cur_trans->dirty_pages,
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 0da21ca9b3fb..64c8221b6165 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -77,8 +77,8 @@ struct btrfs_transaction {
*/
struct mutex cache_write_mutex;
spinlock_t dirty_bgs_lock;
+ /* Protected by spin lock fs_info->unused_bgs_lock. */
struct list_head deleted_bgs;
- spinlock_t deleted_bgs_lock;
spinlock_t dropped_roots_lock;
struct btrfs_delayed_ref_root delayed_refs;
int aborted;
diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c
index f31db4325339..cb65089127cc 100644
--- a/fs/btrfs/tree-defrag.c
+++ b/fs/btrfs/tree-defrag.c
@@ -89,6 +89,12 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
goto out;
}
btrfs_release_path(path);
+ /*
+ * We don't need a lock on a leaf. btrfs_realloc_node() will lock all
+ * leafs from path->nodes[1], so set lowest_level to 1 to avoid later
+ * a deadlock (attempting to write lock an already write locked leaf).
+ */
+ path->lowest_level = 1;
wret = btrfs_search_slot(trans, root, &key, path, 0, 1);
if (wret < 0) {
@@ -99,9 +105,12 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
ret = 0;
goto out;
}
- path->slots[1] = btrfs_header_nritems(path->nodes[1]);
- next_key_ret = btrfs_find_next_key(root, path, &key, 1,
- min_trans);
+ /*
+ * The node at level 1 must always be locked when our path has
+ * keep_locks set and lowest_level is 1, regardless of the value of
+ * path->slots[1].
+ */
+ BUG_ON(path->locks[1] == 0);
ret = btrfs_realloc_node(trans, root,
path->nodes[1], 0,
&last_ret,
@@ -110,6 +119,18 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
WARN_ON(ret == -EAGAIN);
goto out;
}
+ /*
+ * Now that we reallocated the node we can find the next key. Note that
+ * btrfs_find_next_key() can release our path and do another search
+ * without COWing, this is because even with path->keep_locks = 1,
+ * btrfs_search_slot() / ctree.c:unlock_up() does not keeps a lock on a
+ * node when path->slots[node_level - 1] does not point to the last
+ * item or a slot beyond the last item (ctree.c:unlock_up()). Therefore
+ * we search for the next key after reallocating our node.
+ */
+ path->slots[1] = btrfs_header_nritems(path->nodes[1]);
+ next_key_ret = btrfs_find_next_key(root, path, &key, 1,
+ min_trans);
if (next_key_ret == 0) {
memcpy(&root->defrag_progress, &key, sizeof(key));
ret = -EAGAIN;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 456452206609..a37cc0478bb2 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -125,6 +125,7 @@ static int btrfs_relocate_sys_chunks(struct btrfs_root *root);
static void __btrfs_reset_dev_stats(struct btrfs_device *dev);
static void btrfs_dev_stat_print_on_error(struct btrfs_device *dev);
static void btrfs_dev_stat_print_on_load(struct btrfs_device *device);
+static void btrfs_close_one_device(struct btrfs_device *device);
DEFINE_MUTEX(uuid_mutex);
static LIST_HEAD(fs_uuids);
@@ -3548,12 +3549,11 @@ again:
ret = btrfs_force_chunk_alloc(trans, chunk_root,
BTRFS_BLOCK_GROUP_DATA);
+ btrfs_end_transaction(trans, chunk_root);
if (ret < 0) {
mutex_unlock(&fs_info->delete_unused_bgs_mutex);
goto error;
}
-
- btrfs_end_transaction(trans, chunk_root);
chunk_reserved = 1;
}
@@ -4825,20 +4825,32 @@ int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans,
goto out;
}
+ /*
+ * Take the device list mutex to prevent races with the final phase of
+ * a device replace operation that replaces the device object associated
+ * with the map's stripes, because the device object's id can change
+ * at any time during that final phase of the device replace operation
+ * (dev-replace.c:btrfs_dev_replace_finishing()).
+ */
+ mutex_lock(&chunk_root->fs_info->fs_devices->device_list_mutex);
for (i = 0; i < map->num_stripes; i++) {
device = map->stripes[i].dev;
dev_offset = map->stripes[i].physical;
ret = btrfs_update_device(trans, device);
if (ret)
- goto out;
+ break;
ret = btrfs_alloc_dev_extent(trans, device,
chunk_root->root_key.objectid,
BTRFS_FIRST_CHUNK_TREE_OBJECTID,
chunk_offset, dev_offset,
stripe_size);
if (ret)
- goto out;
+ break;
+ }
+ if (ret) {
+ mutex_unlock(&chunk_root->fs_info->fs_devices->device_list_mutex);
+ goto out;
}
stripe = &chunk->stripe;
@@ -4851,6 +4863,7 @@ int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans,
memcpy(stripe->dev_uuid, device->uuid, BTRFS_UUID_SIZE);
stripe++;
}
+ mutex_unlock(&chunk_root->fs_info->fs_devices->device_list_mutex);
btrfs_set_stack_chunk_length(chunk, chunk_size);
btrfs_set_stack_chunk_owner(chunk, extent_root->root_key.objectid);
@@ -6466,11 +6479,11 @@ int btrfs_read_sys_array(struct btrfs_root *root)
sb = btrfs_find_create_tree_block(root, BTRFS_SUPER_INFO_OFFSET);
if (!sb)
return -ENOMEM;
- btrfs_set_buffer_uptodate(sb);
+ set_extent_buffer_uptodate(sb);
btrfs_set_buffer_lockdep_class(root->root_key.objectid, sb, 0);
/*
* The sb extent buffer is artifical and just used to read the system array.
- * btrfs_set_buffer_uptodate() call does not properly mark all it's
+ * set_extent_buffer_uptodate() call does not properly mark all it's
* pages up-to-date when the page is larger: extent does not cover the
* whole page and consequently check_page_uptodate does not find all
* the page's extents up-to-date (the hole beyond sb),
@@ -6950,7 +6963,7 @@ void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info)
}
}
-void btrfs_close_one_device(struct btrfs_device *device)
+static void btrfs_close_one_device(struct btrfs_device *device)
{
struct btrfs_fs_devices *fs_devices = device->fs_devices;
struct btrfs_device *new_device;
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index d5c84f6b1353..6a4375a7c588 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -566,6 +566,5 @@ static inline void unlock_chunks(struct btrfs_root *root)
struct list_head *btrfs_get_fs_uuids(void);
void btrfs_set_fs_info_ptr(struct btrfs_fs_info *fs_info);
void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info);
-void btrfs_close_one_device(struct btrfs_device *device);
#endif
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index 1fcd7b6e7564..24e8ff722730 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -494,7 +494,7 @@ static int btrfs_initxattrs(struct inode *inode,
for (xattr = xattr_array; xattr->name != NULL; xattr++) {
name = kmalloc(XATTR_SECURITY_PREFIX_LEN +
- strlen(xattr->name) + 1, GFP_NOFS);
+ strlen(xattr->name) + 1, GFP_KERNEL);
if (!name) {
err = -ENOMEM;
break;