summaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent_io.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/extent_io.c')
-rw-r--r--fs/btrfs/extent_io.c189
1 files changed, 133 insertions, 56 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index c9018a05036e..01c21b6c6d43 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -20,6 +20,7 @@
#include "volumes.h"
#include "check-integrity.h"
#include "locking.h"
+#include "rcu-string.h"
static struct kmem_cache *extent_state_cache;
static struct kmem_cache *extent_buffer_cache;
@@ -186,7 +187,6 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 offset,
return parent;
}
- entry = rb_entry(node, struct tree_entry, rb_node);
rb_link_node(node, parent, p);
rb_insert_color(node, root);
return NULL;
@@ -413,7 +413,7 @@ static struct extent_state *next_state(struct extent_state *state)
/*
* utility function to clear some bits in an extent state struct.
- * it will optionally wake up any one waiting on this state (wake == 1)
+ * it will optionally wake up any one waiting on this state (wake == 1).
*
* If no bits are set on the state struct after clearing things, the
* struct is freed and removed from the tree
@@ -570,10 +570,8 @@ hit_next:
if (err)
goto out;
if (state->end <= end) {
- clear_state_bit(tree, state, &bits, wake);
- if (last_end == (u64)-1)
- goto out;
- start = last_end + 1;
+ state = clear_state_bit(tree, state, &bits, wake);
+ goto next;
}
goto search_again;
}
@@ -781,7 +779,6 @@ hit_next:
* Just lock what we found and keep going
*/
if (state->start == start && state->end <= end) {
- struct rb_node *next_node;
if (state->state & exclusive_bits) {
*failed_start = state->start;
err = -EEXIST;
@@ -789,20 +786,15 @@ hit_next:
}
set_state_bits(tree, state, &bits);
-
cache_state(state, cached_state);
merge_state(tree, state);
if (last_end == (u64)-1)
goto out;
-
start = last_end + 1;
- next_node = rb_next(&state->rb_node);
- if (next_node && start < end && prealloc && !need_resched()) {
- state = rb_entry(next_node, struct extent_state,
- rb_node);
- if (state->start == start)
- goto hit_next;
- }
+ state = next_state(state);
+ if (start < end && state && state->start == start &&
+ !need_resched())
+ goto hit_next;
goto search_again;
}
@@ -845,6 +837,10 @@ hit_next:
if (last_end == (u64)-1)
goto out;
start = last_end + 1;
+ state = next_state(state);
+ if (start < end && state && state->start == start &&
+ !need_resched())
+ goto hit_next;
}
goto search_again;
}
@@ -994,21 +990,14 @@ hit_next:
* Just lock what we found and keep going
*/
if (state->start == start && state->end <= end) {
- struct rb_node *next_node;
-
set_state_bits(tree, state, &bits);
- clear_state_bit(tree, state, &clear_bits, 0);
+ state = clear_state_bit(tree, state, &clear_bits, 0);
if (last_end == (u64)-1)
goto out;
-
start = last_end + 1;
- next_node = rb_next(&state->rb_node);
- if (next_node && start < end && prealloc && !need_resched()) {
- state = rb_entry(next_node, struct extent_state,
- rb_node);
- if (state->start == start)
- goto hit_next;
- }
+ if (start < end && state && state->start == start &&
+ !need_resched())
+ goto hit_next;
goto search_again;
}
@@ -1042,10 +1031,13 @@ hit_next:
goto out;
if (state->end <= end) {
set_state_bits(tree, state, &bits);
- clear_state_bit(tree, state, &clear_bits, 0);
+ state = clear_state_bit(tree, state, &clear_bits, 0);
if (last_end == (u64)-1)
goto out;
start = last_end + 1;
+ if (start < end && state && state->start == start &&
+ !need_resched())
+ goto hit_next;
}
goto search_again;
}
@@ -1173,9 +1165,8 @@ int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
cached_state, mask);
}
-static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start,
- u64 end, struct extent_state **cached_state,
- gfp_t mask)
+int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
+ struct extent_state **cached_state, gfp_t mask)
{
return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0,
cached_state, mask);
@@ -1293,7 +1284,7 @@ out:
* returned if we find something, and *start_ret and *end_ret are
* set to reflect the state struct that was found.
*
- * If nothing was found, 1 is returned, < 0 on error
+ * If nothing was found, 1 is returned. If found something, return 0.
*/
int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
u64 *start_ret, u64 *end_ret, int bits)
@@ -1923,12 +1914,13 @@ int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start,
if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) {
/* try to remap that extent elsewhere? */
bio_put(bio);
+ btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
return -EIO;
}
- printk(KERN_INFO "btrfs read error corrected: ino %lu off %llu (dev %s "
- "sector %llu)\n", page->mapping->host->i_ino, start,
- dev->name, sector);
+ printk_in_rcu(KERN_INFO "btrfs read error corrected: ino %lu off %llu "
+ "(dev %s sector %llu)\n", page->mapping->host->i_ino,
+ start, rcu_str_deref(dev->name), sector);
bio_put(bio);
return 0;
@@ -2222,17 +2214,7 @@ int end_extent_writepage(struct page *page, int err, u64 start, u64 end)
uptodate = 0;
}
- if (!uptodate && tree->ops &&
- tree->ops->writepage_io_failed_hook) {
- ret = tree->ops->writepage_io_failed_hook(NULL, page,
- start, end, NULL);
- /* Writeback already completed */
- if (ret == 0)
- return 1;
- }
-
if (!uptodate) {
- clear_extent_uptodate(tree, start, end, NULL, GFP_NOFS);
ClearPageUptodate(page);
SetPageError(page);
}
@@ -2347,10 +2329,23 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {
ret = tree->ops->readpage_end_io_hook(page, start, end,
state, mirror);
- if (ret)
+ if (ret) {
+ /* no IO indicated but software detected errors
+ * in the block, either checksum errors or
+ * issues with the contents */
+ struct btrfs_root *root =
+ BTRFS_I(page->mapping->host)->root;
+ struct btrfs_device *device;
+
uptodate = 0;
- else
+ device = btrfs_find_device_for_logical(
+ root, start, mirror);
+ if (device)
+ btrfs_dev_stat_inc_and_print(device,
+ BTRFS_DEV_STAT_CORRUPTION_ERRS);
+ } else {
clean_io_failure(start, page);
+ }
}
if (!uptodate && tree->ops && tree->ops->readpage_io_failed_hook) {
@@ -3164,7 +3159,7 @@ static int write_one_eb(struct extent_buffer *eb,
u64 offset = eb->start;
unsigned long i, num_pages;
int rw = (epd->sync_io ? WRITE_SYNC : WRITE);
- int ret;
+ int ret = 0;
clear_bit(EXTENT_BUFFER_IOERR, &eb->bflags);
num_pages = num_extent_pages(eb->start, eb->len);
@@ -3329,6 +3324,7 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
writepage_t writepage, void *data,
void (*flush_fn)(void *))
{
+ struct inode *inode = mapping->host;
int ret = 0;
int done = 0;
int nr_to_write_done = 0;
@@ -3339,6 +3335,18 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
int scanned = 0;
int tag;
+ /*
+ * We have to hold onto the inode so that ordered extents can do their
+ * work when the IO finishes. The alternative to this is failing to add
+ * an ordered extent if the igrab() fails there and that is a huge pain
+ * to deal with, so instead just hold onto the inode throughout the
+ * writepages operation. If it fails here we are freeing up the inode
+ * anyway and we'd rather not waste our time writing out stuff that is
+ * going to be truncated anyway.
+ */
+ if (!igrab(inode))
+ return 0;
+
pagevec_init(&pvec, 0);
if (wbc->range_cyclic) {
index = mapping->writeback_index; /* Start from prev offset */
@@ -3433,6 +3441,7 @@ retry:
index = 0;
goto retry;
}
+ btrfs_add_delayed_iput(inode);
return ret;
}
@@ -3930,6 +3939,7 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
eb->start = start;
eb->len = len;
eb->tree = tree;
+ eb->bflags = 0;
rwlock_init(&eb->lock);
atomic_set(&eb->write_locks, 0);
atomic_set(&eb->read_locks, 0);
@@ -3967,6 +3977,60 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
return eb;
}
+struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src)
+{
+ unsigned long i;
+ struct page *p;
+ struct extent_buffer *new;
+ unsigned long num_pages = num_extent_pages(src->start, src->len);
+
+ new = __alloc_extent_buffer(NULL, src->start, src->len, GFP_ATOMIC);
+ if (new == NULL)
+ return NULL;
+
+ for (i = 0; i < num_pages; i++) {
+ p = alloc_page(GFP_ATOMIC);
+ BUG_ON(!p);
+ attach_extent_buffer_page(new, p);
+ WARN_ON(PageDirty(p));
+ SetPageUptodate(p);
+ new->pages[i] = p;
+ }
+
+ copy_extent_buffer(new, src, 0, 0, src->len);
+ set_bit(EXTENT_BUFFER_UPTODATE, &new->bflags);
+ set_bit(EXTENT_BUFFER_DUMMY, &new->bflags);
+
+ return new;
+}
+
+struct extent_buffer *alloc_dummy_extent_buffer(u64 start, unsigned long len)
+{
+ struct extent_buffer *eb;
+ unsigned long num_pages = num_extent_pages(0, len);
+ unsigned long i;
+
+ eb = __alloc_extent_buffer(NULL, start, len, GFP_ATOMIC);
+ if (!eb)
+ return NULL;
+
+ for (i = 0; i < num_pages; i++) {
+ eb->pages[i] = alloc_page(GFP_ATOMIC);
+ if (!eb->pages[i])
+ goto err;
+ }
+ set_extent_buffer_uptodate(eb);
+ btrfs_set_header_nritems(eb, 0);
+ set_bit(EXTENT_BUFFER_DUMMY, &eb->bflags);
+
+ return eb;
+err:
+ for (i--; i > 0; i--)
+ __free_page(eb->pages[i]);
+ __free_extent_buffer(eb);
+ return NULL;
+}
+
static int extent_buffer_under_io(struct extent_buffer *eb)
{
return (atomic_read(&eb->io_pages) ||
@@ -3981,18 +4045,21 @@ static void btrfs_release_extent_buffer_page(struct extent_buffer *eb,
unsigned long start_idx)
{
unsigned long index;
+ unsigned long num_pages;
struct page *page;
+ int mapped = !test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags);
BUG_ON(extent_buffer_under_io(eb));
- index = num_extent_pages(eb->start, eb->len);
+ num_pages = num_extent_pages(eb->start, eb->len);
+ index = start_idx + num_pages;
if (start_idx >= index)
return;
do {
index--;
page = extent_buffer_page(eb, index);
- if (page) {
+ if (page && mapped) {
spin_lock(&page->mapping->private_lock);
/*
* We do this since we'll remove the pages after we've
@@ -4017,6 +4084,8 @@ static void btrfs_release_extent_buffer_page(struct extent_buffer *eb,
}
spin_unlock(&page->mapping->private_lock);
+ }
+ if (page) {
/* One for when we alloced the page */
page_cache_release(page);
}
@@ -4235,14 +4304,18 @@ static void release_extent_buffer(struct extent_buffer *eb, gfp_t mask)
{
WARN_ON(atomic_read(&eb->refs) == 0);
if (atomic_dec_and_test(&eb->refs)) {
- struct extent_io_tree *tree = eb->tree;
+ if (test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags)) {
+ spin_unlock(&eb->refs_lock);
+ } else {
+ struct extent_io_tree *tree = eb->tree;
- spin_unlock(&eb->refs_lock);
+ spin_unlock(&eb->refs_lock);
- spin_lock(&tree->buffer_lock);
- radix_tree_delete(&tree->buffer,
- eb->start >> PAGE_CACHE_SHIFT);
- spin_unlock(&tree->buffer_lock);
+ spin_lock(&tree->buffer_lock);
+ radix_tree_delete(&tree->buffer,
+ eb->start >> PAGE_CACHE_SHIFT);
+ spin_unlock(&tree->buffer_lock);
+ }
/* Should be safe to release our pages at this point */
btrfs_release_extent_buffer_page(eb, 0);
@@ -4260,6 +4333,10 @@ void free_extent_buffer(struct extent_buffer *eb)
spin_lock(&eb->refs_lock);
if (atomic_read(&eb->refs) == 2 &&
+ test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags))
+ atomic_dec(&eb->refs);
+
+ if (atomic_read(&eb->refs) == 2 &&
test_bit(EXTENT_BUFFER_STALE, &eb->bflags) &&
!extent_buffer_under_io(eb) &&
test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))