summaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent_io.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/extent_io.c')
-rw-r--r--fs/btrfs/extent_io.c185
1 files changed, 83 insertions, 102 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index eb2bee8b7fbf..68260180f587 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -17,12 +17,6 @@
#include "ctree.h"
#include "btrfs_inode.h"
-/* temporary define until extent_map moves out of btrfs */
-struct kmem_cache *btrfs_cache_create(const char *name, size_t size,
- unsigned long extra_flags,
- void (*ctor)(void *, struct kmem_cache *,
- unsigned long));
-
static struct kmem_cache *extent_state_cache;
static struct kmem_cache *extent_buffer_cache;
@@ -50,20 +44,23 @@ struct extent_page_data {
/* tells writepage not to lock the state bits for this range
* it still does the unlocking
*/
- int extent_locked;
+ unsigned int extent_locked:1;
+
+ /* tells the submit_bio code to use a WRITE_SYNC */
+ unsigned int sync_io:1;
};
int __init extent_io_init(void)
{
- extent_state_cache = btrfs_cache_create("extent_state",
- sizeof(struct extent_state), 0,
- NULL);
+ extent_state_cache = kmem_cache_create("extent_state",
+ sizeof(struct extent_state), 0,
+ SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
if (!extent_state_cache)
return -ENOMEM;
- extent_buffer_cache = btrfs_cache_create("extent_buffers",
- sizeof(struct extent_buffer), 0,
- NULL);
+ extent_buffer_cache = kmem_cache_create("extent_buffers",
+ sizeof(struct extent_buffer), 0,
+ SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
if (!extent_buffer_cache)
goto free_state_cache;
return 0;
@@ -479,6 +476,7 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
struct extent_state *state;
struct extent_state *prealloc = NULL;
struct rb_node *node;
+ u64 last_end;
int err;
int set = 0;
@@ -501,6 +499,7 @@ again:
if (state->start > end)
goto out;
WARN_ON(state->end < start);
+ last_end = state->end;
/*
* | ---- desired range ---- |
@@ -527,9 +526,11 @@ again:
if (err)
goto out;
if (state->end <= end) {
- start = state->end + 1;
set |= clear_state_bit(tree, state, bits,
wake, delete);
+ if (last_end == (u64)-1)
+ goto out;
+ start = last_end + 1;
} else {
start = state->start;
}
@@ -555,8 +556,10 @@ again:
goto out;
}
- start = state->end + 1;
set |= clear_state_bit(tree, state, bits, wake, delete);
+ if (last_end == (u64)-1)
+ goto out;
+ start = last_end + 1;
goto search_again;
out:
@@ -710,8 +713,10 @@ again:
goto out;
}
set_state_bits(tree, state, bits);
- start = state->end + 1;
merge_state(tree, state);
+ if (last_end == (u64)-1)
+ goto out;
+ start = last_end + 1;
goto search_again;
}
@@ -745,8 +750,10 @@ again:
goto out;
if (state->end <= end) {
set_state_bits(tree, state, bits);
- start = state->end + 1;
merge_state(tree, state);
+ if (last_end == (u64)-1)
+ goto out;
+ start = last_end + 1;
} else {
start = state->start;
}
@@ -1404,69 +1411,6 @@ out:
return total_bytes;
}
-#if 0
-/*
- * helper function to lock both pages and extents in the tree.
- * pages must be locked first.
- */
-static int lock_range(struct extent_io_tree *tree, u64 start, u64 end)
-{
- unsigned long index = start >> PAGE_CACHE_SHIFT;
- unsigned long end_index = end >> PAGE_CACHE_SHIFT;
- struct page *page;
- int err;
-
- while (index <= end_index) {
- page = grab_cache_page(tree->mapping, index);
- if (!page) {
- err = -ENOMEM;
- goto failed;
- }
- if (IS_ERR(page)) {
- err = PTR_ERR(page);
- goto failed;
- }
- index++;
- }
- lock_extent(tree, start, end, GFP_NOFS);
- return 0;
-
-failed:
- /*
- * we failed above in getting the page at 'index', so we undo here
- * up to but not including the page at 'index'
- */
- end_index = index;
- index = start >> PAGE_CACHE_SHIFT;
- while (index < end_index) {
- page = find_get_page(tree->mapping, index);
- unlock_page(page);
- page_cache_release(page);
- index++;
- }
- return err;
-}
-
-/*
- * helper function to unlock both pages and extents in the tree.
- */
-static int unlock_range(struct extent_io_tree *tree, u64 start, u64 end)
-{
- unsigned long index = start >> PAGE_CACHE_SHIFT;
- unsigned long end_index = end >> PAGE_CACHE_SHIFT;
- struct page *page;
-
- while (index <= end_index) {
- page = find_get_page(tree->mapping, index);
- unlock_page(page);
- page_cache_release(page);
- index++;
- }
- unlock_extent(tree, start, end, GFP_NOFS);
- return 0;
-}
-#endif
-
/*
* set the private field for a given byte offset in the tree. If there isn't
* an extent_state there already, this does nothing.
@@ -2101,6 +2045,16 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
return ret;
}
+static noinline void update_nr_written(struct page *page,
+ struct writeback_control *wbc,
+ unsigned long nr_written)
+{
+ wbc->nr_to_write -= nr_written;
+ if (wbc->range_cyclic || (wbc->nr_to_write > 0 &&
+ wbc->range_start == 0 && wbc->range_end == LLONG_MAX))
+ page->mapping->writeback_index = page->index + nr_written;
+}
+
/*
* the writepage semantics are similar to regular writepage. extent
* records are inserted to lock ranges in the tree, and as dirty areas
@@ -2136,8 +2090,14 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
u64 delalloc_end;
int page_started;
int compressed;
+ int write_flags;
unsigned long nr_written = 0;
+ if (wbc->sync_mode == WB_SYNC_ALL)
+ write_flags = WRITE_SYNC_PLUG;
+ else
+ write_flags = WRITE;
+
WARN_ON(!PageLocked(page));
pg_offset = i_size & (PAGE_CACHE_SIZE - 1);
if (page->index > end_index ||
@@ -2164,6 +2124,12 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
delalloc_end = 0;
page_started = 0;
if (!epd->extent_locked) {
+ /*
+ * make sure the wbc mapping index is at least updated
+ * to this page.
+ */
+ update_nr_written(page, wbc, 0);
+
while (delalloc_end < page_end) {
nr_delalloc = find_lock_delalloc_range(inode, tree,
page,
@@ -2185,7 +2151,13 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
*/
if (page_started) {
ret = 0;
- goto update_nr_written;
+ /*
+ * we've unlocked the page, so we can't update
+ * the mapping's writeback index, just update
+ * nr_to_write.
+ */
+ wbc->nr_to_write -= nr_written;
+ goto done_unlocked;
}
}
lock_extent(tree, start, page_end, GFP_NOFS);
@@ -2198,13 +2170,18 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
if (ret == -EAGAIN) {
unlock_extent(tree, start, page_end, GFP_NOFS);
redirty_page_for_writepage(wbc, page);
+ update_nr_written(page, wbc, nr_written);
unlock_page(page);
ret = 0;
- goto update_nr_written;
+ goto done_unlocked;
}
}
- nr_written++;
+ /*
+ * we don't want to touch the inode after unlocking the page,
+ * so we update the mapping writeback index now
+ */
+ update_nr_written(page, wbc, nr_written + 1);
end = page_end;
if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0))
@@ -2314,9 +2291,9 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
(unsigned long long)end);
}
- ret = submit_extent_page(WRITE, tree, page, sector,
- iosize, pg_offset, bdev,
- &epd->bio, max_nr,
+ ret = submit_extent_page(write_flags, tree, page,
+ sector, iosize, pg_offset,
+ bdev, &epd->bio, max_nr,
end_bio_extent_writepage,
0, 0, 0);
if (ret)
@@ -2336,11 +2313,8 @@ done:
unlock_extent(tree, unlock_start, page_end, GFP_NOFS);
unlock_page(page);
-update_nr_written:
- wbc->nr_to_write -= nr_written;
- if (wbc->range_cyclic || (wbc->nr_to_write > 0 &&
- wbc->range_start == 0 && wbc->range_end == LLONG_MAX))
- page->mapping->writeback_index = page->index + nr_written;
+done_unlocked:
+
return 0;
}
@@ -2460,15 +2434,23 @@ retry:
return ret;
}
-static noinline void flush_write_bio(void *data)
+static void flush_epd_write_bio(struct extent_page_data *epd)
{
- struct extent_page_data *epd = data;
if (epd->bio) {
- submit_one_bio(WRITE, epd->bio, 0, 0);
+ if (epd->sync_io)
+ submit_one_bio(WRITE_SYNC, epd->bio, 0, 0);
+ else
+ submit_one_bio(WRITE, epd->bio, 0, 0);
epd->bio = NULL;
}
}
+static noinline void flush_write_bio(void *data)
+{
+ struct extent_page_data *epd = data;
+ flush_epd_write_bio(epd);
+}
+
int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
get_extent_t *get_extent,
struct writeback_control *wbc)
@@ -2480,23 +2462,22 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
.tree = tree,
.get_extent = get_extent,
.extent_locked = 0,
+ .sync_io = wbc->sync_mode == WB_SYNC_ALL,
};
struct writeback_control wbc_writepages = {
.bdi = wbc->bdi,
- .sync_mode = WB_SYNC_NONE,
+ .sync_mode = wbc->sync_mode,
.older_than_this = NULL,
.nr_to_write = 64,
.range_start = page_offset(page) + PAGE_CACHE_SIZE,
.range_end = (loff_t)-1,
};
-
ret = __extent_writepage(page, wbc, &epd);
extent_write_cache_pages(tree, mapping, &wbc_writepages,
__extent_writepage, &epd, flush_write_bio);
- if (epd.bio)
- submit_one_bio(WRITE, epd.bio, 0, 0);
+ flush_epd_write_bio(&epd);
return ret;
}
@@ -2515,6 +2496,7 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
.tree = tree,
.get_extent = get_extent,
.extent_locked = 1,
+ .sync_io = mode == WB_SYNC_ALL,
};
struct writeback_control wbc_writepages = {
.bdi = inode->i_mapping->backing_dev_info,
@@ -2540,8 +2522,7 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
start += PAGE_CACHE_SIZE;
}
- if (epd.bio)
- submit_one_bio(WRITE, epd.bio, 0, 0);
+ flush_epd_write_bio(&epd);
return ret;
}
@@ -2556,13 +2537,13 @@ int extent_writepages(struct extent_io_tree *tree,
.tree = tree,
.get_extent = get_extent,
.extent_locked = 0,
+ .sync_io = wbc->sync_mode == WB_SYNC_ALL,
};
ret = extent_write_cache_pages(tree, mapping, wbc,
__extent_writepage, &epd,
flush_write_bio);
- if (epd.bio)
- submit_one_bio(WRITE, epd.bio, 0, 0);
+ flush_epd_write_bio(&epd);
return ret;
}