summaryrefslogtreecommitdiffstats
path: root/mm/filemap.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/filemap.c')
-rw-r--r--mm/filemap.c195
1 files changed, 124 insertions, 71 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index 65b4b6e7f7bd..594d73fef8b4 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -130,17 +130,8 @@ static int page_cache_tree_insert(struct address_space *mapping,
return -EEXIST;
mapping->nrexceptional--;
- if (!dax_mapping(mapping)) {
- if (shadowp)
- *shadowp = p;
- } else {
- /* DAX can replace empty locked entry with a hole */
- WARN_ON_ONCE(p !=
- dax_radix_locked_entry(0, RADIX_DAX_EMPTY));
- /* Wakeup waiters for exceptional entry lock */
- dax_wake_mapping_entry_waiter(mapping, page->index, p,
- true);
- }
+ if (shadowp)
+ *shadowp = p;
}
__radix_tree_replace(&mapping->page_tree, node, slot, page,
workingset_update_node, mapping);
@@ -402,8 +393,7 @@ bool filemap_range_has_page(struct address_space *mapping,
{
pgoff_t index = start_byte >> PAGE_SHIFT;
pgoff_t end = end_byte >> PAGE_SHIFT;
- struct pagevec pvec;
- bool ret;
+ struct page *page;
if (end_byte < start_byte)
return false;
@@ -411,12 +401,10 @@ bool filemap_range_has_page(struct address_space *mapping,
if (mapping->nrpages == 0)
return false;
- pagevec_init(&pvec, 0);
- if (!pagevec_lookup(&pvec, mapping, index, 1))
+ if (!find_get_pages_range(mapping, &index, end, 1, &page))
return false;
- ret = (pvec.pages[0]->index <= end);
- pagevec_release(&pvec);
- return ret;
+ put_page(page);
+ return true;
}
EXPORT_SYMBOL(filemap_range_has_page);
@@ -476,6 +464,29 @@ int filemap_fdatawait_range(struct address_space *mapping, loff_t start_byte,
EXPORT_SYMBOL(filemap_fdatawait_range);
/**
+ * file_fdatawait_range - wait for writeback to complete
+ * @file: file pointing to address space structure to wait for
+ * @start_byte: offset in bytes where the range starts
+ * @end_byte: offset in bytes where the range ends (inclusive)
+ *
+ * Walk the list of under-writeback pages of the address space that file
+ * refers to, in the given range and wait for all of them. Check error
+ * status of the address space vs. the file->f_wb_err cursor and return it.
+ *
+ * Since the error status of the file is advanced by this function,
+ * callers are responsible for checking the return value and handling and/or
+ * reporting the error.
+ */
+int file_fdatawait_range(struct file *file, loff_t start_byte, loff_t end_byte)
+{
+ struct address_space *mapping = file->f_mapping;
+
+ __filemap_fdatawait_range(mapping, start_byte, end_byte);
+ return file_check_and_advance_wb_err(file);
+}
+EXPORT_SYMBOL(file_fdatawait_range);
+
+/**
* filemap_fdatawait_keep_errors - wait for writeback without clearing errors
* @mapping: address space structure to wait for
*
@@ -489,45 +500,22 @@ EXPORT_SYMBOL(filemap_fdatawait_range);
*/
int filemap_fdatawait_keep_errors(struct address_space *mapping)
{
- loff_t i_size = i_size_read(mapping->host);
-
- if (i_size == 0)
- return 0;
-
- __filemap_fdatawait_range(mapping, 0, i_size - 1);
+ __filemap_fdatawait_range(mapping, 0, LLONG_MAX);
return filemap_check_and_keep_errors(mapping);
}
EXPORT_SYMBOL(filemap_fdatawait_keep_errors);
-/**
- * filemap_fdatawait - wait for all under-writeback pages to complete
- * @mapping: address space structure to wait for
- *
- * Walk the list of under-writeback pages of the given address space
- * and wait for all of them. Check error status of the address space
- * and return it.
- *
- * Since the error status of the address space is cleared by this function,
- * callers are responsible for checking the return value and handling and/or
- * reporting the error.
- */
-int filemap_fdatawait(struct address_space *mapping)
+static bool mapping_needs_writeback(struct address_space *mapping)
{
- loff_t i_size = i_size_read(mapping->host);
-
- if (i_size == 0)
- return 0;
-
- return filemap_fdatawait_range(mapping, 0, i_size - 1);
+ return (!dax_mapping(mapping) && mapping->nrpages) ||
+ (dax_mapping(mapping) && mapping->nrexceptional);
}
-EXPORT_SYMBOL(filemap_fdatawait);
int filemap_write_and_wait(struct address_space *mapping)
{
int err = 0;
- if ((!dax_mapping(mapping) && mapping->nrpages) ||
- (dax_mapping(mapping) && mapping->nrexceptional)) {
+ if (mapping_needs_writeback(mapping)) {
err = filemap_fdatawrite(mapping);
/*
* Even if the above returned error, the pages may be
@@ -566,8 +554,7 @@ int filemap_write_and_wait_range(struct address_space *mapping,
{
int err = 0;
- if ((!dax_mapping(mapping) && mapping->nrpages) ||
- (dax_mapping(mapping) && mapping->nrexceptional)) {
+ if (mapping_needs_writeback(mapping)) {
err = __filemap_fdatawrite_range(mapping, lstart, lend,
WB_SYNC_ALL);
/* See comment of filemap_write_and_wait() */
@@ -589,7 +576,7 @@ EXPORT_SYMBOL(filemap_write_and_wait_range);
void __filemap_set_wb_err(struct address_space *mapping, int err)
{
- errseq_t eseq = __errseq_set(&mapping->wb_err, err);
+ errseq_t eseq = errseq_set(&mapping->wb_err, err);
trace_filemap_set_wb_err(mapping, eseq);
}
@@ -633,6 +620,14 @@ int file_check_and_advance_wb_err(struct file *file)
trace_file_check_and_advance_wb_err(file, old);
spin_unlock(&file->f_lock);
}
+
+ /*
+ * We're mostly using this function as a drop in replacement for
+ * filemap_check_errors. Clear AS_EIO/AS_ENOSPC to emulate the effect
+ * that the legacy code would have had on these flags.
+ */
+ clear_bit(AS_EIO, &mapping->flags);
+ clear_bit(AS_ENOSPC, &mapping->flags);
return err;
}
EXPORT_SYMBOL(file_check_and_advance_wb_err);
@@ -656,8 +651,7 @@ int file_write_and_wait_range(struct file *file, loff_t lstart, loff_t lend)
int err = 0, err2;
struct address_space *mapping = file->f_mapping;
- if ((!dax_mapping(mapping) && mapping->nrpages) ||
- (dax_mapping(mapping) && mapping->nrexceptional)) {
+ if (mapping_needs_writeback(mapping)) {
err = __filemap_fdatawrite_range(mapping, lstart, lend,
WB_SYNC_ALL);
/* See comment of filemap_write_and_wait() */
@@ -923,13 +917,33 @@ static void wake_up_page_bit(struct page *page, int bit_nr)
wait_queue_head_t *q = page_waitqueue(page);
struct wait_page_key key;
unsigned long flags;
+ wait_queue_entry_t bookmark;
key.page = page;
key.bit_nr = bit_nr;
key.page_match = 0;
+ bookmark.flags = 0;
+ bookmark.private = NULL;
+ bookmark.func = NULL;
+ INIT_LIST_HEAD(&bookmark.entry);
+
spin_lock_irqsave(&q->lock, flags);
- __wake_up_locked_key(q, TASK_NORMAL, &key);
+ __wake_up_locked_key_bookmark(q, TASK_NORMAL, &key, &bookmark);
+
+ while (bookmark.flags & WQ_FLAG_BOOKMARK) {
+ /*
+ * Take a breather from holding the lock,
+ * allow pages that finish wake up asynchronously
+ * to acquire the lock and remove themselves
+ * from wait queue
+ */
+ spin_unlock_irqrestore(&q->lock, flags);
+ cpu_relax();
+ spin_lock_irqsave(&q->lock, flags);
+ __wake_up_locked_key_bookmark(q, TASK_NORMAL, &key, &bookmark);
+ }
+
/*
* It is possible for other pages to have collided on the waitqueue
* hash, so in that case check for a page match. That prevents a long-
@@ -1566,23 +1580,29 @@ export:
}
/**
- * find_get_pages - gang pagecache lookup
+ * find_get_pages_range - gang pagecache lookup
* @mapping: The address_space to search
* @start: The starting page index
+ * @end: The final page index (inclusive)
* @nr_pages: The maximum number of pages
* @pages: Where the resulting pages are placed
*
- * find_get_pages() will search for and return a group of up to
- * @nr_pages pages in the mapping. The pages are placed at @pages.
- * find_get_pages() takes a reference against the returned pages.
+ * find_get_pages_range() will search for and return a group of up to @nr_pages
+ * pages in the mapping starting at index @start and up to index @end
+ * (inclusive). The pages are placed at @pages. find_get_pages_range() takes
+ * a reference against the returned pages.
*
* The search returns a group of mapping-contiguous pages with ascending
* indexes. There may be holes in the indices due to not-present pages.
+ * We also update @start to index the next page for the traversal.
*
- * find_get_pages() returns the number of pages which were found.
+ * find_get_pages_range() returns the number of pages which were found. If this
+ * number is smaller than @nr_pages, the end of specified range has been
+ * reached.
*/
-unsigned find_get_pages(struct address_space *mapping, pgoff_t start,
- unsigned int nr_pages, struct page **pages)
+unsigned find_get_pages_range(struct address_space *mapping, pgoff_t *start,
+ pgoff_t end, unsigned int nr_pages,
+ struct page **pages)
{
struct radix_tree_iter iter;
void **slot;
@@ -1592,8 +1612,11 @@ unsigned find_get_pages(struct address_space *mapping, pgoff_t start,
return 0;
rcu_read_lock();
- radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
+ radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, *start) {
struct page *head, *page;
+
+ if (iter.index > end)
+ break;
repeat:
page = radix_tree_deref_slot(slot);
if (unlikely(!page))
@@ -1629,11 +1652,25 @@ repeat:
}
pages[ret] = page;
- if (++ret == nr_pages)
- break;
+ if (++ret == nr_pages) {
+ *start = pages[ret - 1]->index + 1;
+ goto out;
+ }
}
+ /*
+ * We come here when there is no page beyond @end. We take care to not
+ * overflow the index @start as it confuses some of the callers. This
+ * breaks the iteration when there is page at index -1 but that is
+ * already broken anyway.
+ */
+ if (end == (pgoff_t)-1)
+ *start = (pgoff_t)-1;
+ else
+ *start = end + 1;
+out:
rcu_read_unlock();
+
return ret;
}
@@ -1888,9 +1925,8 @@ static void shrink_readahead_size_eio(struct file *filp,
}
/**
- * do_generic_file_read - generic file read routine
- * @filp: the file to read
- * @ppos: current file position
+ * generic_file_buffered_read - generic file read routine
+ * @iocb: the iocb to read
* @iter: data destination
* @written: already copied
*
@@ -1900,12 +1936,14 @@ static void shrink_readahead_size_eio(struct file *filp,
* This is really ugly. But the goto's actually try to clarify some
* of the logic when it comes to error handling etc.
*/
-static ssize_t do_generic_file_read(struct file *filp, loff_t *ppos,
+static ssize_t generic_file_buffered_read(struct kiocb *iocb,
struct iov_iter *iter, ssize_t written)
{
+ struct file *filp = iocb->ki_filp;
struct address_space *mapping = filp->f_mapping;
struct inode *inode = mapping->host;
struct file_ra_state *ra = &filp->f_ra;
+ loff_t *ppos = &iocb->ki_pos;
pgoff_t index;
pgoff_t last_index;
pgoff_t prev_index;
@@ -1938,6 +1976,8 @@ find_page:
page = find_get_page(mapping, index);
if (!page) {
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ goto would_block;
page_cache_sync_readahead(mapping,
ra, filp,
index, last_index - index);
@@ -1951,6 +1991,11 @@ find_page:
index, last_index - index);
}
if (!PageUptodate(page)) {
+ if (iocb->ki_flags & IOCB_NOWAIT) {
+ put_page(page);
+ goto would_block;
+ }
+
/*
* See comment in do_read_cache_page on why
* wait_on_page_locked is used to avoid unnecessarily
@@ -2132,6 +2177,8 @@ no_cached_page:
goto readpage;
}
+would_block:
+ error = -EAGAIN;
out:
ra->prev_pos = prev_index;
ra->prev_pos <<= PAGE_SHIFT;
@@ -2153,14 +2200,14 @@ out:
ssize_t
generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
{
- struct file *file = iocb->ki_filp;
- ssize_t retval = 0;
size_t count = iov_iter_count(iter);
+ ssize_t retval = 0;
if (!count)
goto out; /* skip atime */
if (iocb->ki_flags & IOCB_DIRECT) {
+ struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
struct inode *inode = mapping->host;
loff_t size;
@@ -2201,7 +2248,7 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
goto out;
}
- retval = do_generic_file_read(file, &iocb->ki_pos, iter, retval);
+ retval = generic_file_buffered_read(iocb, iter, retval);
out:
return retval;
}
@@ -2887,9 +2934,15 @@ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from)
* we're writing. Either one is a pretty crazy thing to do,
* so we don't support it 100%. If this invalidation
* fails, tough, the write still worked...
+ *
+ * Most of the time we do not need this since dio_complete() will do
+ * the invalidation for us. However there are some file systems that
+ * do not end up with dio_complete() being called, so let's not break
+ * them by removing it completely
*/
- invalidate_inode_pages2_range(mapping,
- pos >> PAGE_SHIFT, end);
+ if (mapping->nrpages)
+ invalidate_inode_pages2_range(mapping,
+ pos >> PAGE_SHIFT, end);
if (written > 0) {
pos += written;