From 0c4d2d95d06e920e0c61707e62c7fffc9c57f63a Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 5 Apr 2012 15:03:02 -0400 Subject: Btrfs: use i_version instead of our own sequence We've been keeping around the inode sequence number in hopes that somebody would use it, but nobody uses it and people actually use i_version which serves the same purpose, so use i_version where we used the incore inode's sequence number and that way the sequence is updated properly across the board, and not just in file write. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/inode.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 61b16c641ce0..41a62e6954c2 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2510,7 +2510,7 @@ static void btrfs_read_locked_inode(struct inode *inode) inode_set_bytes(inode, btrfs_inode_nbytes(leaf, inode_item)); BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item); - BTRFS_I(inode)->sequence = btrfs_inode_sequence(leaf, inode_item); + inode->i_version = btrfs_inode_sequence(leaf, inode_item); inode->i_generation = BTRFS_I(inode)->generation; inode->i_rdev = 0; rdev = btrfs_inode_rdev(leaf, inode_item); @@ -2594,7 +2594,7 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, btrfs_set_inode_nbytes(leaf, item, inode_get_bytes(inode)); btrfs_set_inode_generation(leaf, item, BTRFS_I(inode)->generation); - btrfs_set_inode_sequence(leaf, item, BTRFS_I(inode)->sequence); + btrfs_set_inode_sequence(leaf, item, inode->i_version); btrfs_set_inode_transid(leaf, item, trans->transid); btrfs_set_inode_rdev(leaf, item, inode->i_rdev); btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags); @@ -2752,6 +2752,8 @@ err: goto out; btrfs_i_size_write(dir, dir->i_size - name_len * 2); + inode_inc_iversion(inode); + inode_inc_iversion(dir); inode->i_ctime = dir->i_mtime = dir->i_ctime = CURRENT_TIME; btrfs_update_inode(trans, root, dir); out: @@ -3089,6 +3091,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, } btrfs_i_size_write(dir, dir->i_size - name_len * 2); + inode_inc_iversion(dir); dir->i_mtime = dir->i_ctime = CURRENT_TIME; ret = btrfs_update_inode(trans, root, dir); if (ret) @@ -3638,6 +3641,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) if (attr->ia_valid) { setattr_copy(inode, attr); + inode_inc_iversion(inode); err = btrfs_dirty_inode(inode); if (!err && attr->ia_valid & ATTR_MODE) @@ -4730,6 +4734,7 @@ int btrfs_add_link(struct btrfs_trans_handle *trans, btrfs_i_size_write(parent_inode, parent_inode->i_size + name_len * 2); + inode_inc_iversion(parent_inode); parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME; ret = btrfs_update_inode(trans, root, parent_inode); if (ret) @@ -4937,6 +4942,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, } btrfs_inc_nlink(inode); + inode_inc_iversion(inode); inode->i_ctime = CURRENT_TIME; ihold(inode); @@ -6894,7 +6900,6 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) ei->root = NULL; ei->space_info = NULL; ei->generation = 0; - ei->sequence = 0; ei->last_trans = 0; ei->last_sub_trans = 0; ei->logged_trans = 0; @@ -7193,6 +7198,9 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, if (new_inode && new_inode->i_size && S_ISREG(old_inode->i_mode)) btrfs_add_ordered_operation(trans, root, old_inode); + inode_inc_iversion(old_dir); + inode_inc_iversion(new_dir); + inode_inc_iversion(old_inode); old_dir->i_ctime = old_dir->i_mtime = ctime; new_dir->i_ctime = new_dir->i_mtime = ctime; old_inode->i_ctime = ctime; @@ -7219,6 +7227,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, } if (new_inode) { + inode_inc_iversion(new_inode); new_inode->i_ctime = CURRENT_TIME; if (unlikely(btrfs_ino(new_inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { @@ -7490,6 +7499,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, cur_offset += ins.offset; *alloc_hint = ins.objectid + ins.offset; + inode_inc_iversion(inode); inode->i_ctime = CURRENT_TIME; BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC; if (!(mode & FALLOC_FL_KEEP_SIZE) && -- cgit v1.2.3 From 5fd02043553b02867b29de1ac9fff2ec16b84def Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 2 May 2012 14:00:54 -0400 Subject: Btrfs: finish ordered extents in their own thread We noticed that the ordered extent completion doesn't really rely on having a page and that it could be done independantly of ending the writeback on a page. This patch makes us not do the threaded endio stuff for normal buffered writes and direct writes so we can end page writeback as soon as possible (in irq context) and only start threads to do the ordered work when it is actually done. Compression needs to be reworked some to take advantage of this as well, but atm it has to do a find_get_page in its endio handler so it must be done in its own thread. This makes direct writes quite a bit faster. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/disk-io.c | 12 --- fs/btrfs/extent_io.c | 15 +--- fs/btrfs/extent_io.h | 5 +- fs/btrfs/free-space-cache.c | 4 +- fs/btrfs/inode.c | 177 +++++++++++++++++++------------------------- fs/btrfs/ordered-data.c | 129 ++++++++++++++++++-------------- fs/btrfs/ordered-data.h | 13 +++- 7 files changed, 164 insertions(+), 191 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index a7ffc88a7dbe..19f5b450f405 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3671,17 +3671,6 @@ int btrfs_cleanup_transaction(struct btrfs_root *root) return 0; } -static int btree_writepage_io_failed_hook(struct bio *bio, struct page *page, - u64 start, u64 end, - struct extent_state *state) -{ - struct super_block *sb = page->mapping->host->i_sb; - struct btrfs_fs_info *fs_info = btrfs_sb(sb); - btrfs_error(fs_info, -EIO, - "Error occured while writing out btree at %llu", start); - return -EIO; -} - static struct extent_io_ops btree_extent_io_ops = { .write_cache_pages_lock_hook = btree_lock_page_hook, .readpage_end_io_hook = btree_readpage_end_io_hook, @@ -3689,5 +3678,4 @@ static struct extent_io_ops btree_extent_io_ops = { .submit_bio_hook = btree_submit_bio_hook, /* note we're sharing with inode.c for the merge bio hook */ .merge_bio_hook = btrfs_merge_bio_hook, - .writepage_io_failed_hook = btree_writepage_io_failed_hook, }; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 836fc37a437a..7af93435cee0 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1172,9 +1172,8 @@ int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, cached_state, mask); } -static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, - u64 end, struct extent_state **cached_state, - gfp_t mask) +int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, + struct extent_state **cached_state, gfp_t mask) { return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, cached_state, mask); @@ -2221,17 +2220,7 @@ int end_extent_writepage(struct page *page, int err, u64 start, u64 end) uptodate = 0; } - if (!uptodate && tree->ops && - tree->ops->writepage_io_failed_hook) { - ret = tree->ops->writepage_io_failed_hook(NULL, page, - start, end, NULL); - /* Writeback already completed */ - if (ret == 0) - return 1; - } - if (!uptodate) { - clear_extent_uptodate(tree, start, end, NULL, GFP_NOFS); ClearPageUptodate(page); SetPageError(page); } diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index b516c3b8dec6..4d8124b64577 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -75,9 +75,6 @@ struct extent_io_ops { unsigned long bio_flags); int (*readpage_io_hook)(struct page *page, u64 start, u64 end); int (*readpage_io_failed_hook)(struct page *page, int failed_mirror); - int (*writepage_io_failed_hook)(struct bio *bio, struct page *page, - u64 start, u64 end, - struct extent_state *state); int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end, struct extent_state *state, int mirror); int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end, @@ -225,6 +222,8 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, struct extent_state **cached_state, gfp_t mask); int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, struct extent_state **cached_state, gfp_t mask); +int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, + struct extent_state **cached_state, gfp_t mask); int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 202008ec367d..cecf8df62481 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -972,9 +972,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, goto out; - ret = filemap_write_and_wait(inode->i_mapping); - if (ret) - goto out; + btrfs_wait_ordered_range(inode, 0, (u64)-1); key.objectid = BTRFS_FREE_SPACE_OBJECTID; key.offset = offset; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 41a62e6954c2..9a1b96fd672a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -89,7 +89,7 @@ static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = { static int btrfs_setsize(struct inode *inode, loff_t newsize); static int btrfs_truncate(struct inode *inode); -static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end); +static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent); static noinline int cow_file_range(struct inode *inode, struct page *locked_page, u64 start, u64 end, int *page_started, @@ -1572,11 +1572,11 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, if (btrfs_is_free_space_inode(root, inode)) metadata = 2; - ret = btrfs_bio_wq_end_io(root->fs_info, bio, metadata); - if (ret) - return ret; - if (!(rw & REQ_WRITE)) { + ret = btrfs_bio_wq_end_io(root->fs_info, bio, metadata); + if (ret) + return ret; + if (bio_flags & EXTENT_BIO_COMPRESSED) { return btrfs_submit_compressed_read(inode, bio, mirror_num, bio_flags); @@ -1815,25 +1815,24 @@ out: * an ordered extent if the range of bytes in the file it covers are * fully written. */ -static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) +static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) { + struct inode *inode = ordered_extent->inode; struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_trans_handle *trans = NULL; - struct btrfs_ordered_extent *ordered_extent = NULL; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct extent_state *cached_state = NULL; int compress_type = 0; int ret; bool nolock; - ret = btrfs_dec_test_ordered_pending(inode, &ordered_extent, start, - end - start + 1); - if (!ret) - return 0; - BUG_ON(!ordered_extent); /* Logic error */ - nolock = btrfs_is_free_space_inode(root, inode); + if (test_bit(BTRFS_ORDERED_IOERR, &ordered_extent->flags)) { + ret = -EIO; + goto out; + } + if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) { BUG_ON(!list_empty(&ordered_extent->list)); /* Logic error */ ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); @@ -1889,12 +1888,10 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) ordered_extent->file_offset, ordered_extent->len); } - unlock_extent_cached(io_tree, ordered_extent->file_offset, - ordered_extent->file_offset + - ordered_extent->len - 1, &cached_state, GFP_NOFS); + if (ret < 0) { btrfs_abort_transaction(trans, root, ret); - goto out; + goto out_unlock; } add_pending_csums(trans, inode, ordered_extent->file_offset, @@ -1905,10 +1902,14 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) ret = btrfs_update_inode_fallback(trans, root, inode); if (ret) { /* -ENOMEM or corruption */ btrfs_abort_transaction(trans, root, ret); - goto out; + goto out_unlock; } } ret = 0; +out_unlock: + unlock_extent_cached(io_tree, ordered_extent->file_offset, + ordered_extent->file_offset + + ordered_extent->len - 1, &cached_state, GFP_NOFS); out: if (root != root->fs_info->tree_root) btrfs_delalloc_release_metadata(inode, ordered_extent->len); @@ -1919,26 +1920,57 @@ out: btrfs_end_transaction(trans, root); } + if (ret) + clear_extent_uptodate(io_tree, ordered_extent->file_offset, + ordered_extent->file_offset + + ordered_extent->len - 1, NULL, GFP_NOFS); + + /* + * This needs to be dont to make sure anybody waiting knows we are done + * upating everything for this ordered extent. + */ + btrfs_remove_ordered_extent(inode, ordered_extent); + /* once for us */ btrfs_put_ordered_extent(ordered_extent); /* once for the tree */ btrfs_put_ordered_extent(ordered_extent); - return 0; -out_unlock: - unlock_extent_cached(io_tree, ordered_extent->file_offset, - ordered_extent->file_offset + - ordered_extent->len - 1, &cached_state, GFP_NOFS); - goto out; + return ret; +} + +static void finish_ordered_fn(struct btrfs_work *work) +{ + struct btrfs_ordered_extent *ordered_extent; + ordered_extent = container_of(work, struct btrfs_ordered_extent, work); + btrfs_finish_ordered_io(ordered_extent); } static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, struct extent_state *state, int uptodate) { + struct inode *inode = page->mapping->host; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_ordered_extent *ordered_extent = NULL; + struct btrfs_workers *workers; + trace_btrfs_writepage_end_io_hook(page, start, end, uptodate); ClearPagePrivate2(page); - return btrfs_finish_ordered_io(page->mapping->host, start, end); + if (!btrfs_dec_test_ordered_pending(inode, &ordered_extent, start, + end - start + 1, uptodate)) + return 0; + + ordered_extent->work.func = finish_ordered_fn; + ordered_extent->work.flags = 0; + + if (btrfs_is_free_space_inode(root, inode)) + workers = &root->fs_info->endio_freespace_worker; + else + workers = &root->fs_info->endio_write_workers; + btrfs_queue_worker(workers, &ordered_extent->work); + + return 0; } /* @@ -5909,9 +5941,7 @@ static void btrfs_endio_direct_write(struct bio *bio, int err) struct btrfs_dio_private *dip = bio->bi_private; struct inode *inode = dip->inode; struct btrfs_root *root = BTRFS_I(inode)->root; - struct btrfs_trans_handle *trans; struct btrfs_ordered_extent *ordered = NULL; - struct extent_state *cached_state = NULL; u64 ordered_offset = dip->logical_offset; u64 ordered_bytes = dip->bytes; int ret; @@ -5921,73 +5951,14 @@ static void btrfs_endio_direct_write(struct bio *bio, int err) again: ret = btrfs_dec_test_first_ordered_pending(inode, &ordered, &ordered_offset, - ordered_bytes); + ordered_bytes, !err); if (!ret) goto out_test; - BUG_ON(!ordered); - - trans = btrfs_join_transaction(root); - if (IS_ERR(trans)) { - err = -ENOMEM; - goto out; - } - trans->block_rsv = &root->fs_info->delalloc_block_rsv; - - if (test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) { - ret = btrfs_ordered_update_i_size(inode, 0, ordered); - if (!ret) - err = btrfs_update_inode_fallback(trans, root, inode); - goto out; - } - - lock_extent_bits(&BTRFS_I(inode)->io_tree, ordered->file_offset, - ordered->file_offset + ordered->len - 1, 0, - &cached_state); - - if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags)) { - ret = btrfs_mark_extent_written(trans, inode, - ordered->file_offset, - ordered->file_offset + - ordered->len); - if (ret) { - err = ret; - goto out_unlock; - } - } else { - ret = insert_reserved_file_extent(trans, inode, - ordered->file_offset, - ordered->start, - ordered->disk_len, - ordered->len, - ordered->len, - 0, 0, 0, - BTRFS_FILE_EXTENT_REG); - unpin_extent_cache(&BTRFS_I(inode)->extent_tree, - ordered->file_offset, ordered->len); - if (ret) { - err = ret; - WARN_ON(1); - goto out_unlock; - } - } - - add_pending_csums(trans, inode, ordered->file_offset, &ordered->list); - ret = btrfs_ordered_update_i_size(inode, 0, ordered); - if (!ret || !test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags)) - btrfs_update_inode_fallback(trans, root, inode); - ret = 0; -out_unlock: - unlock_extent_cached(&BTRFS_I(inode)->io_tree, ordered->file_offset, - ordered->file_offset + ordered->len - 1, - &cached_state, GFP_NOFS); -out: - btrfs_delalloc_release_metadata(inode, ordered->len); - btrfs_end_transaction(trans, root); - ordered_offset = ordered->file_offset + ordered->len; - btrfs_put_ordered_extent(ordered); - btrfs_put_ordered_extent(ordered); - + ordered->work.func = finish_ordered_fn; + ordered->work.flags = 0; + btrfs_queue_worker(&root->fs_info->endio_write_workers, + &ordered->work); out_test: /* * our bio might span multiple ordered extents. If we haven't @@ -5996,12 +5967,12 @@ out_test: if (ordered_offset < dip->logical_offset + dip->bytes) { ordered_bytes = dip->logical_offset + dip->bytes - ordered_offset; + ordered = NULL; goto again; } out_done: bio->bi_private = dip->private; - kfree(dip->csums); kfree(dip); /* If we had an error make sure to clear the uptodate flag */ @@ -6069,9 +6040,12 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, int ret; bio_get(bio); - ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); - if (ret) - goto err; + + if (!write) { + ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); + if (ret) + goto err; + } if (skip_sum) goto map; @@ -6491,13 +6465,13 @@ static int btrfs_releasepage(struct page *page, gfp_t gfp_flags) static void btrfs_invalidatepage(struct page *page, unsigned long offset) { + struct inode *inode = page->mapping->host; struct extent_io_tree *tree; struct btrfs_ordered_extent *ordered; struct extent_state *cached_state = NULL; u64 page_start = page_offset(page); u64 page_end = page_start + PAGE_CACHE_SIZE - 1; - /* * we have the page locked, so new writeback can't start, * and the dirty bit won't be cleared while we are here. @@ -6507,13 +6481,13 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset) */ wait_on_page_writeback(page); - tree = &BTRFS_I(page->mapping->host)->io_tree; + tree = &BTRFS_I(inode)->io_tree; if (offset) { btrfs_releasepage(page, GFP_NOFS); return; } lock_extent_bits(tree, page_start, page_end, 0, &cached_state); - ordered = btrfs_lookup_ordered_extent(page->mapping->host, + ordered = btrfs_lookup_ordered_extent(inode, page_offset(page)); if (ordered) { /* @@ -6528,9 +6502,10 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset) * whoever cleared the private bit is responsible * for the finish_ordered_io */ - if (TestClearPagePrivate2(page)) { - btrfs_finish_ordered_io(page->mapping->host, - page_start, page_end); + if (TestClearPagePrivate2(page) && + btrfs_dec_test_ordered_pending(inode, &ordered, page_start, + PAGE_CACHE_SIZE, 1)) { + btrfs_finish_ordered_io(ordered); } btrfs_put_ordered_extent(ordered); cached_state = NULL; diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 9565c0289164..9e138cdc36c5 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -196,7 +196,7 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, entry->len = len; entry->disk_len = disk_len; entry->bytes_left = len; - entry->inode = inode; + entry->inode = igrab(inode); entry->compress_type = compress_type; if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) set_bit(type, &entry->flags); @@ -212,12 +212,12 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, trace_btrfs_ordered_extent_add(inode, entry); - spin_lock(&tree->lock); + spin_lock_irq(&tree->lock); node = tree_insert(&tree->tree, file_offset, &entry->rb_node); if (node) ordered_data_tree_panic(inode, -EEXIST, file_offset); - spin_unlock(&tree->lock); + spin_unlock_irq(&tree->lock); spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); list_add_tail(&entry->root_extent_list, @@ -264,9 +264,9 @@ void btrfs_add_ordered_sum(struct inode *inode, struct btrfs_ordered_inode_tree *tree; tree = &BTRFS_I(inode)->ordered_tree; - spin_lock(&tree->lock); + spin_lock_irq(&tree->lock); list_add_tail(&sum->list, &entry->list); - spin_unlock(&tree->lock); + spin_unlock_irq(&tree->lock); } /* @@ -283,18 +283,19 @@ void btrfs_add_ordered_sum(struct inode *inode, */ int btrfs_dec_test_first_ordered_pending(struct inode *inode, struct btrfs_ordered_extent **cached, - u64 *file_offset, u64 io_size) + u64 *file_offset, u64 io_size, int uptodate) { struct btrfs_ordered_inode_tree *tree; struct rb_node *node; struct btrfs_ordered_extent *entry = NULL; int ret; + unsigned long flags; u64 dec_end; u64 dec_start; u64 to_dec; tree = &BTRFS_I(inode)->ordered_tree; - spin_lock(&tree->lock); + spin_lock_irqsave(&tree->lock, flags); node = tree_search(tree, *file_offset); if (!node) { ret = 1; @@ -323,6 +324,9 @@ int btrfs_dec_test_first_ordered_pending(struct inode *inode, (unsigned long long)to_dec); } entry->bytes_left -= to_dec; + if (!uptodate) + set_bit(BTRFS_ORDERED_IOERR, &entry->flags); + if (entry->bytes_left == 0) ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); else @@ -332,7 +336,7 @@ out: *cached = entry; atomic_inc(&entry->refs); } - spin_unlock(&tree->lock); + spin_unlock_irqrestore(&tree->lock, flags); return ret == 0; } @@ -347,15 +351,21 @@ out: */ int btrfs_dec_test_ordered_pending(struct inode *inode, struct btrfs_ordered_extent **cached, - u64 file_offset, u64 io_size) + u64 file_offset, u64 io_size, int uptodate) { struct btrfs_ordered_inode_tree *tree; struct rb_node *node; struct btrfs_ordered_extent *entry = NULL; + unsigned long flags; int ret; tree = &BTRFS_I(inode)->ordered_tree; - spin_lock(&tree->lock); + spin_lock_irqsave(&tree->lock, flags); + if (cached && *cached) { + entry = *cached; + goto have_entry; + } + node = tree_search(tree, file_offset); if (!node) { ret = 1; @@ -363,6 +373,7 @@ int btrfs_dec_test_ordered_pending(struct inode *inode, } entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); +have_entry: if (!offset_in_entry(entry, file_offset)) { ret = 1; goto out; @@ -374,6 +385,9 @@ int btrfs_dec_test_ordered_pending(struct inode *inode, (unsigned long long)io_size); } entry->bytes_left -= io_size; + if (!uptodate) + set_bit(BTRFS_ORDERED_IOERR, &entry->flags); + if (entry->bytes_left == 0) ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); else @@ -383,7 +397,7 @@ out: *cached = entry; atomic_inc(&entry->refs); } - spin_unlock(&tree->lock); + spin_unlock_irqrestore(&tree->lock, flags); return ret == 0; } @@ -399,6 +413,8 @@ void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry) trace_btrfs_ordered_extent_put(entry->inode, entry); if (atomic_dec_and_test(&entry->refs)) { + if (entry->inode) + btrfs_add_delayed_iput(entry->inode); while (!list_empty(&entry->list)) { cur = entry->list.next; sum = list_entry(cur, struct btrfs_ordered_sum, list); @@ -411,21 +427,22 @@ void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry) /* * remove an ordered extent from the tree. No references are dropped - * and you must wake_up entry->wait. You must hold the tree lock - * while you call this function. + * and waiters are woken up. */ -static void __btrfs_remove_ordered_extent(struct inode *inode, - struct btrfs_ordered_extent *entry) +void btrfs_remove_ordered_extent(struct inode *inode, + struct btrfs_ordered_extent *entry) { struct btrfs_ordered_inode_tree *tree; struct btrfs_root *root = BTRFS_I(inode)->root; struct rb_node *node; tree = &BTRFS_I(inode)->ordered_tree; + spin_lock_irq(&tree->lock); node = &entry->rb_node; rb_erase(node, &tree->tree); tree->last = NULL; set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags); + spin_unlock_irq(&tree->lock); spin_lock(&root->fs_info->ordered_extent_lock); list_del_init(&entry->root_extent_list); @@ -442,21 +459,6 @@ static void __btrfs_remove_ordered_extent(struct inode *inode, list_del_init(&BTRFS_I(inode)->ordered_operations); } spin_unlock(&root->fs_info->ordered_extent_lock); -} - -/* - * remove an ordered extent from the tree. No references are dropped - * but any waiters are woken. - */ -void btrfs_remove_ordered_extent(struct inode *inode, - struct btrfs_ordered_extent *entry) -{ - struct btrfs_ordered_inode_tree *tree; - - tree = &BTRFS_I(inode)->ordered_tree; - spin_lock(&tree->lock); - __btrfs_remove_ordered_extent(inode, entry); - spin_unlock(&tree->lock); wake_up(&entry->wait); } @@ -663,7 +665,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode, struct btrfs_ordered_extent *entry = NULL; tree = &BTRFS_I(inode)->ordered_tree; - spin_lock(&tree->lock); + spin_lock_irq(&tree->lock); node = tree_search(tree, file_offset); if (!node) goto out; @@ -674,7 +676,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode, if (entry) atomic_inc(&entry->refs); out: - spin_unlock(&tree->lock); + spin_unlock_irq(&tree->lock); return entry; } @@ -690,7 +692,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode, struct btrfs_ordered_extent *entry = NULL; tree = &BTRFS_I(inode)->ordered_tree; - spin_lock(&tree->lock); + spin_lock_irq(&tree->lock); node = tree_search(tree, file_offset); if (!node) { node = tree_search(tree, file_offset + len); @@ -715,7 +717,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode, out: if (entry) atomic_inc(&entry->refs); - spin_unlock(&tree->lock); + spin_unlock_irq(&tree->lock); return entry; } @@ -731,7 +733,7 @@ btrfs_lookup_first_ordered_extent(struct inode *inode, u64 file_offset) struct btrfs_ordered_extent *entry = NULL; tree = &BTRFS_I(inode)->ordered_tree; - spin_lock(&tree->lock); + spin_lock_irq(&tree->lock); node = tree_search(tree, file_offset); if (!node) goto out; @@ -739,7 +741,7 @@ btrfs_lookup_first_ordered_extent(struct inode *inode, u64 file_offset) entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); atomic_inc(&entry->refs); out: - spin_unlock(&tree->lock); + spin_unlock_irq(&tree->lock); return entry; } @@ -765,7 +767,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, else offset = ALIGN(offset, BTRFS_I(inode)->root->sectorsize); - spin_lock(&tree->lock); + spin_lock_irq(&tree->lock); disk_i_size = BTRFS_I(inode)->disk_i_size; /* truncate file */ @@ -803,15 +805,18 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, } node = prev; } - while (node) { + for (; node; node = rb_prev(node)) { test = rb_entry(node, struct btrfs_ordered_extent, rb_node); + + /* We treat this entry as if it doesnt exist */ + if (test_bit(BTRFS_ORDERED_UPDATED_ISIZE, &test->flags)) + continue; if (test->file_offset + test->len <= disk_i_size) break; if (test->file_offset >= i_size) break; if (test->file_offset >= disk_i_size) goto out; - node = rb_prev(node); } new_i_size = min_t(u64, offset, i_size); @@ -829,17 +834,27 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, else node = rb_first(&tree->tree); } - i_size_test = 0; - if (node) { - /* - * do we have an area where IO might have finished - * between our ordered extent and the next one. - */ + + /* + * We are looking for an area between our current extent and the next + * ordered extent to update the i_size to. There are 3 cases here + * + * 1) We don't actually have anything and we can update to i_size. + * 2) We have stuff but they already did their i_size update so again we + * can just update to i_size. + * 3) We have an outstanding ordered extent so the most we can update + * our disk_i_size to is the start of the next offset. + */ + i_size_test = i_size; + for (; node; node = rb_next(node)) { test = rb_entry(node, struct btrfs_ordered_extent, rb_node); - if (test->file_offset > offset) + + if (test_bit(BTRFS_ORDERED_UPDATED_ISIZE, &test->flags)) + continue; + if (test->file_offset > offset) { i_size_test = test->file_offset; - } else { - i_size_test = i_size; + break; + } } /* @@ -853,15 +868,15 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, ret = 0; out: /* - * we need to remove the ordered extent with the tree lock held - * so that other people calling this function don't find our fully - * processed ordered entry and skip updating the i_size + * We need to do this because we can't remove ordered extents until + * after the i_disk_size has been updated and then the inode has been + * updated to reflect the change, so we need to tell anybody who finds + * this ordered extent that we've already done all the real work, we + * just haven't completed all the other work. */ if (ordered) - __btrfs_remove_ordered_extent(inode, ordered); - spin_unlock(&tree->lock); - if (ordered) - wake_up(&ordered->wait); + set_bit(BTRFS_ORDERED_UPDATED_ISIZE, &ordered->flags); + spin_unlock_irq(&tree->lock); return ret; } @@ -886,7 +901,7 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, if (!ordered) return 1; - spin_lock(&tree->lock); + spin_lock_irq(&tree->lock); list_for_each_entry_reverse(ordered_sum, &ordered->list, list) { if (disk_bytenr >= ordered_sum->bytenr) { num_sectors = ordered_sum->len / sectorsize; @@ -901,7 +916,7 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, } } out: - spin_unlock(&tree->lock); + spin_unlock_irq(&tree->lock); btrfs_put_ordered_extent(ordered); return ret; } diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index c355ad4dc1a6..e03c560d2997 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -74,6 +74,12 @@ struct btrfs_ordered_sum { #define BTRFS_ORDERED_DIRECT 5 /* set when we're doing DIO with this extent */ +#define BTRFS_ORDERED_IOERR 6 /* We had an io error when writing this out */ + +#define BTRFS_ORDERED_UPDATED_ISIZE 7 /* indicates wether this ordered extent + * has done its due diligence in updating + * the isize. */ + struct btrfs_ordered_extent { /* logical offset in the file */ u64 file_offset; @@ -113,6 +119,8 @@ struct btrfs_ordered_extent { /* a per root list of all the pending ordered extents */ struct list_head root_extent_list; + + struct btrfs_work work; }; @@ -143,10 +151,11 @@ void btrfs_remove_ordered_extent(struct inode *inode, struct btrfs_ordered_extent *entry); int btrfs_dec_test_ordered_pending(struct inode *inode, struct btrfs_ordered_extent **cached, - u64 file_offset, u64 io_size); + u64 file_offset, u64 io_size, int uptodate); int btrfs_dec_test_first_ordered_pending(struct inode *inode, struct btrfs_ordered_extent **cached, - u64 *file_offset, u64 io_size); + u64 *file_offset, u64 io_size, + int uptodate); int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, u64 start, u64 len, u64 disk_len, int type); int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset, -- cgit v1.2.3 From 72ac3c0d7921f943d92d1ef42a549fb52e56817d Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 23 May 2012 14:13:11 -0400 Subject: Btrfs: convert the inode bit field to use the actual bit operations Miao pointed this out while I was working on an orphan problem that messing with a bitfield where different ranges are protected by different locks doesn't work out right. Turns out we've been doing this forever where we have different parts of the bit field protected by either no lock at all or different locks which could cause all sorts of weird problems including the issue I was hitting. So instead make a runtime_flags thing that we use the normal bit operations on that are all atomic so we can keep having our no/different locking for the different flags and then make force_compress it's own thing so it can be treated normally. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/btrfs_inode.h | 30 ++++++++++++++++-------------- fs/btrfs/delayed-inode.c | 4 ++-- fs/btrfs/disk-io.c | 3 ++- fs/btrfs/extent-tree.c | 11 ++++++----- fs/btrfs/file.c | 12 ++++++------ fs/btrfs/inode.c | 28 ++++++++++++---------------- 6 files changed, 44 insertions(+), 44 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 3771b8543a7e..6265edb219e2 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -24,6 +24,19 @@ #include "ordered-data.h" #include "delayed-inode.h" +/* + * ordered_data_close is set by truncate when a file that used + * to have good data has been truncated to zero. When it is set + * the btrfs file release call will add this inode to the + * ordered operations list so that we make sure to flush out any + * new data the application may have written before commit. + */ +#define BTRFS_INODE_ORDERED_DATA_CLOSE 0 +#define BTRFS_INODE_ORPHAN_META_RESERVED 1 +#define BTRFS_INODE_DUMMY 2 +#define BTRFS_INODE_IN_DEFRAG 3 +#define BTRFS_INODE_DELALLOC_META_RESERVED 4 + /* in memory btrfs inode */ struct btrfs_inode { /* which subvolume this inode belongs to */ @@ -78,6 +91,8 @@ struct btrfs_inode { /* the space_info for where this inode's data allocations are done */ struct btrfs_space_info *space_info; + unsigned long runtime_flags; + /* full 64 bit generation number, struct vfs_inode doesn't have a big * enough field for this. */ @@ -141,23 +156,10 @@ struct btrfs_inode { unsigned outstanding_extents; unsigned reserved_extents; - /* - * ordered_data_close is set by truncate when a file that used - * to have good data has been truncated to zero. When it is set - * the btrfs file release call will add this inode to the - * ordered operations list so that we make sure to flush out any - * new data the application may have written before commit. - */ - unsigned ordered_data_close:1; - unsigned orphan_meta_reserved:1; - unsigned dummy_inode:1; - unsigned in_defrag:1; - unsigned delalloc_meta_reserved:1; - /* * always compress this one file */ - unsigned force_compress:4; + unsigned force_compress; struct btrfs_delayed_node *delayed_node; diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index bcd40c7109fa..c18d0442ae6d 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -669,8 +669,8 @@ static int btrfs_delayed_inode_reserve_metadata( return ret; } else if (src_rsv == &root->fs_info->delalloc_block_rsv) { spin_lock(&BTRFS_I(inode)->lock); - if (BTRFS_I(inode)->delalloc_meta_reserved) { - BTRFS_I(inode)->delalloc_meta_reserved = 0; + if (test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED, + &BTRFS_I(inode)->runtime_flags)) { spin_unlock(&BTRFS_I(inode)->lock); release = true; goto migrate; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 19f5b450f405..0cf8ef2b5b1a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2001,7 +2001,8 @@ int open_ctree(struct super_block *sb, BTRFS_I(fs_info->btree_inode)->root = tree_root; memset(&BTRFS_I(fs_info->btree_inode)->location, 0, sizeof(struct btrfs_key)); - BTRFS_I(fs_info->btree_inode)->dummy_inode = 1; + set_bit(BTRFS_INODE_DUMMY, + &BTRFS_I(fs_info->btree_inode)->runtime_flags); insert_inode_hash(fs_info->btree_inode); spin_lock_init(&fs_info->block_group_cache_lock); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 59ae191d4f93..1902726fa70a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -4355,10 +4355,9 @@ static unsigned drop_outstanding_extent(struct inode *inode) BTRFS_I(inode)->outstanding_extents--; if (BTRFS_I(inode)->outstanding_extents == 0 && - BTRFS_I(inode)->delalloc_meta_reserved) { + test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED, + &BTRFS_I(inode)->runtime_flags)) drop_inode_space = 1; - BTRFS_I(inode)->delalloc_meta_reserved = 0; - } /* * If we have more or the same amount of outsanding extents than we have @@ -4465,7 +4464,8 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) * Add an item to reserve for updating the inode when we complete the * delalloc io. */ - if (!BTRFS_I(inode)->delalloc_meta_reserved) { + if (!test_bit(BTRFS_INODE_DELALLOC_META_RESERVED, + &BTRFS_I(inode)->runtime_flags)) { nr_extents++; extra_reserve = 1; } @@ -4511,7 +4511,8 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) spin_lock(&BTRFS_I(inode)->lock); if (extra_reserve) { - BTRFS_I(inode)->delalloc_meta_reserved = 1; + set_bit(BTRFS_INODE_DELALLOC_META_RESERVED, + &BTRFS_I(inode)->runtime_flags); nr_extents--; } BTRFS_I(inode)->reserved_extents += nr_extents; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index cfc0ab915d03..c9005f216975 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -103,7 +103,7 @@ static void __btrfs_add_inode_defrag(struct inode *inode, goto exists; } } - BTRFS_I(inode)->in_defrag = 1; + set_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags); rb_link_node(&defrag->rb_node, parent, p); rb_insert_color(&defrag->rb_node, &root->fs_info->defrag_inodes); return; @@ -131,7 +131,7 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, if (btrfs_fs_closing(root->fs_info)) return 0; - if (BTRFS_I(inode)->in_defrag) + if (test_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags)) return 0; if (trans) @@ -148,7 +148,7 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, defrag->root = root->root_key.objectid; spin_lock(&root->fs_info->defrag_inodes_lock); - if (!BTRFS_I(inode)->in_defrag) + if (!test_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags)) __btrfs_add_inode_defrag(inode, defrag); else kfree(defrag); @@ -252,7 +252,7 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info) goto next; /* do a chunk of defrag */ - BTRFS_I(inode)->in_defrag = 0; + clear_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags); range.start = defrag->last_offset; num_defrag = btrfs_defrag_file(inode, NULL, &range, defrag->transid, defrag_batch); @@ -1465,8 +1465,8 @@ int btrfs_release_file(struct inode *inode, struct file *filp) * flush down new bytes that may have been written if the * application were using truncate to replace a file in place. */ - if (BTRFS_I(inode)->ordered_data_close) { - BTRFS_I(inode)->ordered_data_close = 0; + if (test_and_clear_bit(BTRFS_INODE_ORDERED_DATA_CLOSE, + &BTRFS_I(inode)->runtime_flags)) { btrfs_add_ordered_operation(NULL, BTRFS_I(inode)->root, inode); if (inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT) filemap_flush(inode->i_mapping); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 9a1b96fd672a..91ad63901756 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2182,10 +2182,9 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) insert = 1; } - if (!BTRFS_I(inode)->orphan_meta_reserved) { - BTRFS_I(inode)->orphan_meta_reserved = 1; + if (!test_and_set_bit(BTRFS_INODE_ORPHAN_META_RESERVED, + &BTRFS_I(inode)->runtime_flags)) reserve = 1; - } spin_unlock(&root->orphan_lock); /* grab metadata reservation from transaction handle */ @@ -2233,10 +2232,9 @@ int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode) delete_item = 1; } - if (BTRFS_I(inode)->orphan_meta_reserved) { - BTRFS_I(inode)->orphan_meta_reserved = 0; + if (test_and_clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED, + &BTRFS_I(inode)->runtime_flags)) release_rsv = 1; - } spin_unlock(&root->orphan_lock); if (trans && delete_item) { @@ -3642,7 +3640,8 @@ static int btrfs_setsize(struct inode *inode, loff_t newsize) * any new writes get down to disk quickly. */ if (newsize == 0) - BTRFS_I(inode)->ordered_data_close = 1; + set_bit(BTRFS_INODE_ORDERED_DATA_CLOSE, + &BTRFS_I(inode)->runtime_flags); /* we don't support swapfiles, so vmtruncate shouldn't fail */ truncate_setsize(inode, newsize); @@ -4102,7 +4101,7 @@ static struct inode *new_simple_dir(struct super_block *s, BTRFS_I(inode)->root = root; memcpy(&BTRFS_I(inode)->location, key, sizeof(*key)); - BTRFS_I(inode)->dummy_inode = 1; + set_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags); inode->i_ino = BTRFS_EMPTY_SUBVOL_DIR_OBJECTID; inode->i_op = &btrfs_dir_ro_inode_operations; @@ -4406,7 +4405,7 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc) int ret = 0; bool nolock = false; - if (BTRFS_I(inode)->dummy_inode) + if (test_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags)) return 0; if (btrfs_fs_closing(root->fs_info) && btrfs_is_free_space_inode(root, inode)) @@ -4439,7 +4438,7 @@ int btrfs_dirty_inode(struct inode *inode) struct btrfs_trans_handle *trans; int ret; - if (BTRFS_I(inode)->dummy_inode) + if (test_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags)) return 0; trans = btrfs_join_transaction(root); @@ -6752,7 +6751,8 @@ static int btrfs_truncate(struct inode *inode) * using truncate to replace the contents of the file will * end up with a zero length file after a crash. */ - if (inode->i_size == 0 && BTRFS_I(inode)->ordered_data_close) + if (inode->i_size == 0 && test_bit(BTRFS_INODE_ORDERED_DATA_CLOSE, + &BTRFS_I(inode)->runtime_flags)) btrfs_add_ordered_operation(trans, root, inode); while (1) { @@ -6889,11 +6889,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) ei->outstanding_extents = 0; ei->reserved_extents = 0; - ei->ordered_data_close = 0; - ei->orphan_meta_reserved = 0; - ei->dummy_inode = 0; - ei->in_defrag = 0; - ei->delalloc_meta_reserved = 0; + ei->runtime_flags = 0; ei->force_compress = BTRFS_COMPRESS_NONE; ei->delayed_node = NULL; -- cgit v1.2.3 From 8a35d95ff4680a456d3ce47df9638f33d4f54f20 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 23 May 2012 14:26:42 -0400 Subject: Btrfs: fix how we deal with the orphan block rsv Ceph was hitting this race where we would remove an inode from the per-root orphan list before we would release the space we had reserved for the inode. We actually don't need a list or anything, we just need to make sure the root doesn't try to free up the orphan reserve until after the inodes have released their reservations. So use an atomic counter instead of a list on the root and only decrement the counter after we've released our reservation. I've tested this as well as several others and we no longer see the warnings that you would see while running ceph. Thanks, Btrfs: fix how we deal with the orphan block rsv Ceph was hitting this race where we would remove an inode from the per-root orphan list before we would release the space we had reserved for the inode. We actually don't need a list or anything, we just need to make sure the root doesn't try to free up the orphan reserve until after the inodes have released their reservations. So use an atomic counter instead of a list on the root and only decrement the counter after we've released our reservation. I've tested this as well as several others and we no longer see the warnings that you would see while running ceph. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/btrfs_inode.h | 4 +--- fs/btrfs/ctree.h | 2 +- fs/btrfs/disk-io.c | 2 +- fs/btrfs/inode.c | 38 +++++++++++++++++++++----------------- 4 files changed, 24 insertions(+), 22 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 6265edb219e2..ce2c9d60031e 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -36,6 +36,7 @@ #define BTRFS_INODE_DUMMY 2 #define BTRFS_INODE_IN_DEFRAG 3 #define BTRFS_INODE_DELALLOC_META_RESERVED 4 +#define BTRFS_INODE_HAS_ORPHAN_ITEM 5 /* in memory btrfs inode */ struct btrfs_inode { @@ -70,9 +71,6 @@ struct btrfs_inode { /* used to order data wrt metadata */ struct btrfs_ordered_inode_tree ordered_tree; - /* for keeping track of orphaned inodes */ - struct list_head i_orphan; - /* list of all the delalloc inodes in the FS. There are times we need * to write all the delalloc pages to disk, and this list is used * to walk them all. diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 8fd72331d600..aad2600718a3 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1375,7 +1375,7 @@ struct btrfs_root { struct list_head root_list; spinlock_t orphan_lock; - struct list_head orphan_list; + atomic_t orphan_inodes; struct btrfs_block_rsv *orphan_block_rsv; int orphan_item_inserted; int orphan_cleanup_state; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 0cf8ef2b5b1a..297e5a8ed93c 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1153,7 +1153,6 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, root->orphan_block_rsv = NULL; INIT_LIST_HEAD(&root->dirty_list); - INIT_LIST_HEAD(&root->orphan_list); INIT_LIST_HEAD(&root->root_list); spin_lock_init(&root->orphan_lock); spin_lock_init(&root->inode_lock); @@ -1166,6 +1165,7 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, atomic_set(&root->log_commit[0], 0); atomic_set(&root->log_commit[1], 0); atomic_set(&root->log_writers, 0); + atomic_set(&root->orphan_inodes, 0); root->log_batch = 0; root->log_transid = 0; root->last_log_commit = 0; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 91ad63901756..029892887fc1 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2104,12 +2104,12 @@ void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans, struct btrfs_block_rsv *block_rsv; int ret; - if (!list_empty(&root->orphan_list) || + if (atomic_read(&root->orphan_inodes) || root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE) return; spin_lock(&root->orphan_lock); - if (!list_empty(&root->orphan_list)) { + if (atomic_read(&root->orphan_inodes)) { spin_unlock(&root->orphan_lock); return; } @@ -2166,8 +2166,8 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) block_rsv = NULL; } - if (list_empty(&BTRFS_I(inode)->i_orphan)) { - list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list); + if (!test_and_set_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, + &BTRFS_I(inode)->runtime_flags)) { #if 0 /* * For proper ENOSPC handling, we should do orphan @@ -2180,6 +2180,7 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) insert = 1; #endif insert = 1; + atomic_dec(&root->orphan_inodes); } if (!test_and_set_bit(BTRFS_INODE_ORPHAN_META_RESERVED, @@ -2197,6 +2198,8 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) if (insert >= 1) { ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode)); if (ret && ret != -EEXIST) { + clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, + &BTRFS_I(inode)->runtime_flags); btrfs_abort_transaction(trans, root, ret); return ret; } @@ -2227,10 +2230,9 @@ int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode) int ret = 0; spin_lock(&root->orphan_lock); - if (!list_empty(&BTRFS_I(inode)->i_orphan)) { - list_del_init(&BTRFS_I(inode)->i_orphan); + if (test_and_clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, + &BTRFS_I(inode)->runtime_flags)) delete_item = 1; - } if (test_and_clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED, &BTRFS_I(inode)->runtime_flags)) @@ -2242,8 +2244,10 @@ int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode) BUG_ON(ret); /* -ENOMEM or corruption (JDM: Recheck) */ } - if (release_rsv) + if (release_rsv) { btrfs_orphan_release_metadata(inode); + atomic_dec(&root->orphan_inodes); + } return 0; } @@ -2371,6 +2375,8 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) ret = PTR_ERR(trans); goto out; } + printk(KERN_ERR "auto deleting %Lu\n", + found_key.objectid); ret = btrfs_del_orphan_item(trans, root, found_key.objectid); BUG_ON(ret); /* -ENOMEM or corruption (JDM: Recheck) */ @@ -2382,9 +2388,8 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) * add this inode to the orphan list so btrfs_orphan_del does * the proper thing when we hit it */ - spin_lock(&root->orphan_lock); - list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list); - spin_unlock(&root->orphan_lock); + set_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, + &BTRFS_I(inode)->runtime_flags); /* if we have links, this was a truncate, lets do that */ if (inode->i_nlink) { @@ -3706,7 +3711,8 @@ void btrfs_evict_inode(struct inode *inode) btrfs_wait_ordered_range(inode, 0, (u64)-1); if (root->fs_info->log_root_recovering) { - BUG_ON(!list_empty(&BTRFS_I(inode)->i_orphan)); + BUG_ON(!test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, + &BTRFS_I(inode)->runtime_flags)); goto no_delete; } @@ -6903,7 +6909,6 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) mutex_init(&ei->log_mutex); mutex_init(&ei->delalloc_mutex); btrfs_ordered_inode_tree_init(&ei->ordered_tree); - INIT_LIST_HEAD(&ei->i_orphan); INIT_LIST_HEAD(&ei->delalloc_inodes); INIT_LIST_HEAD(&ei->ordered_operations); RB_CLEAR_NODE(&ei->rb_node); @@ -6948,13 +6953,12 @@ void btrfs_destroy_inode(struct inode *inode) spin_unlock(&root->fs_info->ordered_extent_lock); } - spin_lock(&root->orphan_lock); - if (!list_empty(&BTRFS_I(inode)->i_orphan)) { + if (test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, + &BTRFS_I(inode)->runtime_flags)) { printk(KERN_INFO "BTRFS: inode %llu still on the orphan list\n", (unsigned long long)btrfs_ino(inode)); - list_del_init(&BTRFS_I(inode)->i_orphan); + atomic_dec(&root->orphan_inodes); } - spin_unlock(&root->orphan_lock); while (1) { ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1); -- cgit v1.2.3 From 2adcac1a7331d93a17285804819caa96070b231f Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 23 May 2012 16:10:14 -0400 Subject: Btrfs: fall back to non-inline if we don't have enough space If cow_file_range_inline fails with ENOSPC we abort the transaction which isn't very nice. This really shouldn't be happening anyways but there's no sense in making it a horrible error when we can easily just go allocate normal data space for this stuff. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/inode.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 029892887fc1..92df0a5d1d94 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -257,10 +257,13 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans, ret = insert_inline_extent(trans, root, inode, start, inline_len, compressed_size, compress_type, compressed_pages); - if (ret) { + if (ret && ret != -ENOSPC) { btrfs_abort_transaction(trans, root, ret); return ret; + } else if (ret == -ENOSPC) { + return 1; } + btrfs_delalloc_release_metadata(inode, end + 1 - start); btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0); return 0; -- cgit v1.2.3 From e41f941a23115e84a8550b3d901a13a14b2edc2f Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 26 Mar 2012 09:46:47 -0400 Subject: Btrfs: move over to use ->update_time Btrfs had been doing it's own file_update_time so we could catch ENOSPC properly, so just update our btrfs_update_time to work with the new stuff and then we'll be fancy later. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/ctree.h | 1 - fs/btrfs/file.c | 2 +- fs/btrfs/inode.c | 53 ++++++++++++++--------------------------------------- 3 files changed, 15 insertions(+), 41 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 8fd72331d600..ba8743b9a825 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2922,7 +2922,6 @@ int btrfs_readpage(struct file *file, struct page *page); void btrfs_evict_inode(struct inode *inode); int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc); int btrfs_dirty_inode(struct inode *inode); -int btrfs_update_time(struct file *file); struct inode *btrfs_alloc_inode(struct super_block *sb); void btrfs_destroy_inode(struct inode *inode); int btrfs_drop_inode(struct inode *inode); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 53bf2d764bbc..974beb84ed61 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1404,7 +1404,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, goto out; } - err = btrfs_update_time(file); + err = file_update_time(file); if (err) { mutex_unlock(&inode->i_mutex); goto out; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index ceb7b9c9edcc..3c1723a9ae6f 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4431,46 +4431,18 @@ int btrfs_dirty_inode(struct inode *inode) * This is a copy of file_update_time. We need this so we can return error on * ENOSPC for updating the inode in the case of file write and mmap writes. */ -int btrfs_update_time(struct file *file) +static int btrfs_update_time(struct inode *inode, struct timespec *now, + int flags) { - struct inode *inode = file->f_path.dentry->d_inode; - struct timespec now; - int ret; - enum { S_MTIME = 1, S_CTIME = 2, S_VERSION = 4 } sync_it = 0; - - /* First try to exhaust all avenues to not sync */ - if (IS_NOCMTIME(inode)) - return 0; - - now = current_fs_time(inode->i_sb); - if (!timespec_equal(&inode->i_mtime, &now)) - sync_it = S_MTIME; - - if (!timespec_equal(&inode->i_ctime, &now)) - sync_it |= S_CTIME; - - if (IS_I_VERSION(inode)) - sync_it |= S_VERSION; - - if (!sync_it) - return 0; - - /* Finally allowed to write? Takes lock. */ - if (mnt_want_write_file(file)) - return 0; - - /* Only change inode inside the lock region */ - if (sync_it & S_VERSION) + if (flags & S_VERSION) inode_inc_iversion(inode); - if (sync_it & S_CTIME) - inode->i_ctime = now; - if (sync_it & S_MTIME) - inode->i_mtime = now; - ret = btrfs_dirty_inode(inode); - if (!ret) - mark_inode_dirty_sync(inode); - mnt_drop_write(file->f_path.mnt); - return ret; + if (flags & S_CTIME) + inode->i_ctime = *now; + if (flags & S_MTIME) + inode->i_mtime = *now; + if (flags & S_ATIME) + inode->i_atime = *now; + return btrfs_dirty_inode(inode); } /* @@ -6576,7 +6548,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); if (!ret) { - ret = btrfs_update_time(vma->vm_file); + ret = file_update_time(vma->vm_file); reserved = 1; } if (ret) { @@ -7647,6 +7619,7 @@ static const struct inode_operations btrfs_file_inode_operations = { .permission = btrfs_permission, .fiemap = btrfs_fiemap, .get_acl = btrfs_get_acl, + .update_time = btrfs_update_time, }; static const struct inode_operations btrfs_special_inode_operations = { .getattr = btrfs_getattr, @@ -7657,6 +7630,7 @@ static const struct inode_operations btrfs_special_inode_operations = { .listxattr = btrfs_listxattr, .removexattr = btrfs_removexattr, .get_acl = btrfs_get_acl, + .update_time = btrfs_update_time, }; static const struct inode_operations btrfs_symlink_inode_operations = { .readlink = generic_readlink, @@ -7670,6 +7644,7 @@ static const struct inode_operations btrfs_symlink_inode_operations = { .listxattr = btrfs_listxattr, .removexattr = btrfs_removexattr, .get_acl = btrfs_get_acl, + .update_time = btrfs_update_time, }; const struct dentry_operations btrfs_dentry_operations = { -- cgit v1.2.3 From beb42dd793193a3d4e72970bfa73cd8810f63cea Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 30 May 2012 15:35:17 -0400 Subject: Btrfs: pass locked_page into extent_clear_unlock_delalloc if theres an error While doing my enospc work I got a transaction abortion that resulted in a panic when we tried to unlock_page() an already unlocked page. This is because we aren't calling extent_clear_unlock_delalloc with the locked page so it was unlocking all the pages in the range. This is wrong since __extent_writepage expects to have the page locked still unless we return *page_started as 1. This should keep us from panicing. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/inode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 92df0a5d1d94..ccd6355af73f 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -830,7 +830,7 @@ static noinline int cow_file_range(struct inode *inode, if (IS_ERR(trans)) { extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, - start, end, NULL, + start, end, locked_page, EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_UNLOCK | EXTENT_CLEAR_DELALLOC | @@ -963,7 +963,7 @@ out: out_unlock: extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, - start, end, NULL, + start, end, locked_page, EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_UNLOCK | EXTENT_CLEAR_DELALLOC | -- cgit v1.2.3 From 17ca04aff7e6171df684b7b65804df8830eb8c15 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 31 May 2012 15:58:55 -0400 Subject: Btrfs: unlock everything properly in the error case for nocow I was getting hung on umount when a transaction was aborted because a range of one of the free space inodes was still locked. This is because the nocow stuff doesn't unlock anything on error. This fixed the problem and I verified that is what was happening. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/inode.c | 37 +++++++++++++++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index ccd6355af73f..b7f398c36cb7 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1136,8 +1136,18 @@ static noinline int run_delalloc_nocow(struct inode *inode, u64 ino = btrfs_ino(inode); path = btrfs_alloc_path(); - if (!path) + if (!path) { + extent_clear_unlock_delalloc(inode, + &BTRFS_I(inode)->io_tree, + start, end, locked_page, + EXTENT_CLEAR_UNLOCK_PAGE | + EXTENT_CLEAR_UNLOCK | + EXTENT_CLEAR_DELALLOC | + EXTENT_CLEAR_DIRTY | + EXTENT_SET_WRITEBACK | + EXTENT_END_WRITEBACK); return -ENOMEM; + } nolock = btrfs_is_free_space_inode(root, inode); @@ -1147,6 +1157,15 @@ static noinline int run_delalloc_nocow(struct inode *inode, trans = btrfs_join_transaction(root); if (IS_ERR(trans)) { + extent_clear_unlock_delalloc(inode, + &BTRFS_I(inode)->io_tree, + start, end, locked_page, + EXTENT_CLEAR_UNLOCK_PAGE | + EXTENT_CLEAR_UNLOCK | + EXTENT_CLEAR_DELALLOC | + EXTENT_CLEAR_DIRTY | + EXTENT_SET_WRITEBACK | + EXTENT_END_WRITEBACK); btrfs_free_path(path); return PTR_ERR(trans); } @@ -1327,8 +1346,11 @@ out_check: } btrfs_release_path(path); - if (cur_offset <= end && cow_start == (u64)-1) + if (cur_offset <= end && cow_start == (u64)-1) { cow_start = cur_offset; + cur_offset = end; + } + if (cow_start != (u64)-1) { ret = cow_file_range(inode, locked_page, cow_start, end, page_started, nr_written, 1); @@ -1347,6 +1369,17 @@ error: if (!ret) ret = err; + if (ret && cur_offset < end) + extent_clear_unlock_delalloc(inode, + &BTRFS_I(inode)->io_tree, + cur_offset, end, locked_page, + EXTENT_CLEAR_UNLOCK_PAGE | + EXTENT_CLEAR_UNLOCK | + EXTENT_CLEAR_DELALLOC | + EXTENT_CLEAR_DIRTY | + EXTENT_SET_WRITEBACK | + EXTENT_END_WRITEBACK); + btrfs_free_path(path); return ret; } -- cgit v1.2.3 From 8180ef8894fa402443205cff1e23417e8d3434df Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 8 Jun 2012 15:16:12 -0400 Subject: Btrfs: keep inode pinned when compressing writes A user reported lots of problems using compression on the new code and it turns out part of the problem was that igrab() was failing when we added a new ordered extent. This is because when writing out an inode under compression we immediately return without actually doing anything to the pages, and then in another thread at some point down the line actually do the ordered dance. The problem is between the point that we start writeback and we actually add the ordered extent we could be trying to reclaim the inode, which makes igrab() return NULL. So we need to do an igrab() when we create the async extent and then drop it when we are done with it. This makes sure we stay pinned in memory until the ordered extent can get a reference on it and we are good to go. With this patch we no longer panic in btrfs_finish_ordered_io(). Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/inode.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index b7f398c36cb7..06075043da5d 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -986,8 +986,10 @@ static noinline void async_cow_start(struct btrfs_work *work) compress_file_range(async_cow->inode, async_cow->locked_page, async_cow->start, async_cow->end, async_cow, &num_added); - if (num_added == 0) + if (num_added == 0) { + iput(async_cow->inode); async_cow->inode = NULL; + } } /* @@ -1020,6 +1022,8 @@ static noinline void async_cow_free(struct btrfs_work *work) { struct async_cow *async_cow; async_cow = container_of(work, struct async_cow, work); + if (async_cow->inode) + iput(async_cow->inode); kfree(async_cow); } @@ -1038,7 +1042,7 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page, while (start < end) { async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS); BUG_ON(!async_cow); /* -ENOMEM */ - async_cow->inode = inode; + async_cow->inode = igrab(inode); async_cow->root = root; async_cow->locked_page = locked_page; async_cow->start = start; -- cgit v1.2.3 From 7ddf5a42d311d74fd9f7373cb56def0843c219f8 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 8 Jun 2012 15:26:47 -0400 Subject: Btrfs: call filemap_fdatawrite twice for compression I removed this in an earlier commit and I was wrong. Because compression can return from filemap_fdatawrite() without having actually set any of it's pages as writeback() it can make filemap_fdatawait() do essentially nothing, and then we won't find any ordered extents because they may not have been created yet. So not only does this make fsync() completely useless, but it will also screw up if you truncate on a non-page aligned offset since we zero out the end and then wait on ordered extents and then call drop caches. We can drop the cache before the io completes and then we try to unpin the extent we just wrote we won't find it and everything goes sideways. So fix this by putting it back and put a giant comment there to keep me from trying to remove it in the future. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/btrfs_inode.h | 1 + fs/btrfs/inode.c | 15 +++++++++------ fs/btrfs/ordered-data.c | 22 +++++++++++++++++++++- 3 files changed, 31 insertions(+), 7 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index e616f8872e69..12394a90d60f 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -37,6 +37,7 @@ #define BTRFS_INODE_IN_DEFRAG 3 #define BTRFS_INODE_DELALLOC_META_RESERVED 4 #define BTRFS_INODE_HAS_ORPHAN_ITEM 5 +#define BTRFS_INODE_HAS_ASYNC_EXTENT 6 /* in memory btrfs inode */ struct btrfs_inode { diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 06075043da5d..7a090fb4eb98 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1398,20 +1398,23 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page, int ret; struct btrfs_root *root = BTRFS_I(inode)->root; - if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) + if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) { ret = run_delalloc_nocow(inode, locked_page, start, end, page_started, 1, nr_written); - else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC) + } else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC) { ret = run_delalloc_nocow(inode, locked_page, start, end, page_started, 0, nr_written); - else if (!btrfs_test_opt(root, COMPRESS) && - !(BTRFS_I(inode)->force_compress) && - !(BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS)) + } else if (!btrfs_test_opt(root, COMPRESS) && + !(BTRFS_I(inode)->force_compress) && + !(BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS)) { ret = cow_file_range(inode, locked_page, start, end, page_started, nr_written, 1); - else + } else { + set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, + &BTRFS_I(inode)->runtime_flags); ret = cow_file_range_async(inode, locked_page, start, end, page_started, nr_written); + } return ret; } diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 9e138cdc36c5..643335a4fe3c 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -627,7 +627,27 @@ void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) /* start IO across the range first to instantiate any delalloc * extents */ - filemap_write_and_wait_range(inode->i_mapping, start, orig_end); + filemap_fdatawrite_range(inode->i_mapping, start, orig_end); + + /* + * So with compression we will find and lock a dirty page and clear the + * first one as dirty, setup an async extent, and immediately return + * with the entire range locked but with nobody actually marked with + * writeback. So we can't just filemap_write_and_wait_range() and + * expect it to work since it will just kick off a thread to do the + * actual work. So we need to call filemap_fdatawrite_range _again_ + * since it will wait on the page lock, which won't be unlocked until + * after the pages have been marked as writeback and so we're good to go + * from there. We have to do this otherwise we'll miss the ordered + * extents and that results in badness. Please Josef, do not think you + * know better and pull this out at some point in the future, it is + * right and you are wrong. + */ + if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, + &BTRFS_I(inode)->runtime_flags)) + filemap_fdatawrite_range(inode->i_mapping, start, orig_end); + + filemap_fdatawait_range(inode->i_mapping, start, orig_end); end = orig_end; found = 0; -- cgit v1.2.3 From bc1782374b128103ae9689e0753e0610f35b6bfd Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Thu, 14 Jun 2012 02:23:18 -0600 Subject: Btrfs: fix missing inherited flag in rename When we move a file into a directory with compression flag, we need to inherite BTRFS_INODE_COMPRESS and clear BTRFS_INODE_NOCOMPRESS as well. But if we move a file into a directory without compression flag, we need to clear both of them. It is the way how our setflags deals with compression flag, so keep the same behaviour here. Signed-off-by: Liu Bo Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 7a090fb4eb98..3f2c8cbe5ba6 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7122,10 +7122,13 @@ static void fixup_inode_flags(struct inode *dir, struct inode *inode) else b_inode->flags &= ~BTRFS_INODE_NODATACOW; - if (b_dir->flags & BTRFS_INODE_COMPRESS) + if (b_dir->flags & BTRFS_INODE_COMPRESS) { b_inode->flags |= BTRFS_INODE_COMPRESS; - else - b_inode->flags &= ~BTRFS_INODE_COMPRESS; + b_inode->flags &= ~BTRFS_INODE_NOCOMPRESS; + } else { + b_inode->flags &= ~(BTRFS_INODE_COMPRESS | + BTRFS_INODE_NOCOMPRESS); + } } static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, -- cgit v1.2.3 From cb77fcd88569cd2b7b25ecd4086ea932a53be9b3 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 15 Jun 2012 12:19:48 -0600 Subject: Btrfs: delay iput with async extents There is some concern that these iput()'s could be the final iputs and could induce lockups on people waiting on writeback. This would happen in the rare case that we don't create ordered extents because of an error, but it is theoretically possible and we already have a mechanism to deal with this so just make them delayed iputs to negate any worry. Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 3f2c8cbe5ba6..4a4f2d59a64b 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -987,7 +987,7 @@ static noinline void async_cow_start(struct btrfs_work *work) async_cow->start, async_cow->end, async_cow, &num_added); if (num_added == 0) { - iput(async_cow->inode); + btrfs_add_delayed_iput(async_cow->inode); async_cow->inode = NULL; } } @@ -1023,7 +1023,7 @@ static noinline void async_cow_free(struct btrfs_work *work) struct async_cow *async_cow; async_cow = container_of(work, struct async_cow, work); if (async_cow->inode) - iput(async_cow->inode); + btrfs_add_delayed_iput(async_cow->inode); kfree(async_cow); } -- cgit v1.2.3