diff options
Diffstat (limited to 'mm/filemap.c')
-rw-r--r-- | mm/filemap.c | 195 |
1 files changed, 124 insertions, 71 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index 65b4b6e7f7bd..594d73fef8b4 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -130,17 +130,8 @@ static int page_cache_tree_insert(struct address_space *mapping, return -EEXIST; mapping->nrexceptional--; - if (!dax_mapping(mapping)) { - if (shadowp) - *shadowp = p; - } else { - /* DAX can replace empty locked entry with a hole */ - WARN_ON_ONCE(p != - dax_radix_locked_entry(0, RADIX_DAX_EMPTY)); - /* Wakeup waiters for exceptional entry lock */ - dax_wake_mapping_entry_waiter(mapping, page->index, p, - true); - } + if (shadowp) + *shadowp = p; } __radix_tree_replace(&mapping->page_tree, node, slot, page, workingset_update_node, mapping); @@ -402,8 +393,7 @@ bool filemap_range_has_page(struct address_space *mapping, { pgoff_t index = start_byte >> PAGE_SHIFT; pgoff_t end = end_byte >> PAGE_SHIFT; - struct pagevec pvec; - bool ret; + struct page *page; if (end_byte < start_byte) return false; @@ -411,12 +401,10 @@ bool filemap_range_has_page(struct address_space *mapping, if (mapping->nrpages == 0) return false; - pagevec_init(&pvec, 0); - if (!pagevec_lookup(&pvec, mapping, index, 1)) + if (!find_get_pages_range(mapping, &index, end, 1, &page)) return false; - ret = (pvec.pages[0]->index <= end); - pagevec_release(&pvec); - return ret; + put_page(page); + return true; } EXPORT_SYMBOL(filemap_range_has_page); @@ -476,6 +464,29 @@ int filemap_fdatawait_range(struct address_space *mapping, loff_t start_byte, EXPORT_SYMBOL(filemap_fdatawait_range); /** + * file_fdatawait_range - wait for writeback to complete + * @file: file pointing to address space structure to wait for + * @start_byte: offset in bytes where the range starts + * @end_byte: offset in bytes where the range ends (inclusive) + * + * Walk the list of under-writeback pages of the address space that file + * refers to, in the given range and wait for all of them. Check error + * status of the address space vs. the file->f_wb_err cursor and return it. + * + * Since the error status of the file is advanced by this function, + * callers are responsible for checking the return value and handling and/or + * reporting the error. + */ +int file_fdatawait_range(struct file *file, loff_t start_byte, loff_t end_byte) +{ + struct address_space *mapping = file->f_mapping; + + __filemap_fdatawait_range(mapping, start_byte, end_byte); + return file_check_and_advance_wb_err(file); +} +EXPORT_SYMBOL(file_fdatawait_range); + +/** * filemap_fdatawait_keep_errors - wait for writeback without clearing errors * @mapping: address space structure to wait for * @@ -489,45 +500,22 @@ EXPORT_SYMBOL(filemap_fdatawait_range); */ int filemap_fdatawait_keep_errors(struct address_space *mapping) { - loff_t i_size = i_size_read(mapping->host); - - if (i_size == 0) - return 0; - - __filemap_fdatawait_range(mapping, 0, i_size - 1); + __filemap_fdatawait_range(mapping, 0, LLONG_MAX); return filemap_check_and_keep_errors(mapping); } EXPORT_SYMBOL(filemap_fdatawait_keep_errors); -/** - * filemap_fdatawait - wait for all under-writeback pages to complete - * @mapping: address space structure to wait for - * - * Walk the list of under-writeback pages of the given address space - * and wait for all of them. Check error status of the address space - * and return it. - * - * Since the error status of the address space is cleared by this function, - * callers are responsible for checking the return value and handling and/or - * reporting the error. - */ -int filemap_fdatawait(struct address_space *mapping) +static bool mapping_needs_writeback(struct address_space *mapping) { - loff_t i_size = i_size_read(mapping->host); - - if (i_size == 0) - return 0; - - return filemap_fdatawait_range(mapping, 0, i_size - 1); + return (!dax_mapping(mapping) && mapping->nrpages) || + (dax_mapping(mapping) && mapping->nrexceptional); } -EXPORT_SYMBOL(filemap_fdatawait); int filemap_write_and_wait(struct address_space *mapping) { int err = 0; - if ((!dax_mapping(mapping) && mapping->nrpages) || - (dax_mapping(mapping) && mapping->nrexceptional)) { + if (mapping_needs_writeback(mapping)) { err = filemap_fdatawrite(mapping); /* * Even if the above returned error, the pages may be @@ -566,8 +554,7 @@ int filemap_write_and_wait_range(struct address_space *mapping, { int err = 0; - if ((!dax_mapping(mapping) && mapping->nrpages) || - (dax_mapping(mapping) && mapping->nrexceptional)) { + if (mapping_needs_writeback(mapping)) { err = __filemap_fdatawrite_range(mapping, lstart, lend, WB_SYNC_ALL); /* See comment of filemap_write_and_wait() */ @@ -589,7 +576,7 @@ EXPORT_SYMBOL(filemap_write_and_wait_range); void __filemap_set_wb_err(struct address_space *mapping, int err) { - errseq_t eseq = __errseq_set(&mapping->wb_err, err); + errseq_t eseq = errseq_set(&mapping->wb_err, err); trace_filemap_set_wb_err(mapping, eseq); } @@ -633,6 +620,14 @@ int file_check_and_advance_wb_err(struct file *file) trace_file_check_and_advance_wb_err(file, old); spin_unlock(&file->f_lock); } + + /* + * We're mostly using this function as a drop in replacement for + * filemap_check_errors. Clear AS_EIO/AS_ENOSPC to emulate the effect + * that the legacy code would have had on these flags. + */ + clear_bit(AS_EIO, &mapping->flags); + clear_bit(AS_ENOSPC, &mapping->flags); return err; } EXPORT_SYMBOL(file_check_and_advance_wb_err); @@ -656,8 +651,7 @@ int file_write_and_wait_range(struct file *file, loff_t lstart, loff_t lend) int err = 0, err2; struct address_space *mapping = file->f_mapping; - if ((!dax_mapping(mapping) && mapping->nrpages) || - (dax_mapping(mapping) && mapping->nrexceptional)) { + if (mapping_needs_writeback(mapping)) { err = __filemap_fdatawrite_range(mapping, lstart, lend, WB_SYNC_ALL); /* See comment of filemap_write_and_wait() */ @@ -923,13 +917,33 @@ static void wake_up_page_bit(struct page *page, int bit_nr) wait_queue_head_t *q = page_waitqueue(page); struct wait_page_key key; unsigned long flags; + wait_queue_entry_t bookmark; key.page = page; key.bit_nr = bit_nr; key.page_match = 0; + bookmark.flags = 0; + bookmark.private = NULL; + bookmark.func = NULL; + INIT_LIST_HEAD(&bookmark.entry); + spin_lock_irqsave(&q->lock, flags); - __wake_up_locked_key(q, TASK_NORMAL, &key); + __wake_up_locked_key_bookmark(q, TASK_NORMAL, &key, &bookmark); + + while (bookmark.flags & WQ_FLAG_BOOKMARK) { + /* + * Take a breather from holding the lock, + * allow pages that finish wake up asynchronously + * to acquire the lock and remove themselves + * from wait queue + */ + spin_unlock_irqrestore(&q->lock, flags); + cpu_relax(); + spin_lock_irqsave(&q->lock, flags); + __wake_up_locked_key_bookmark(q, TASK_NORMAL, &key, &bookmark); + } + /* * It is possible for other pages to have collided on the waitqueue * hash, so in that case check for a page match. That prevents a long- @@ -1566,23 +1580,29 @@ export: } /** - * find_get_pages - gang pagecache lookup + * find_get_pages_range - gang pagecache lookup * @mapping: The address_space to search * @start: The starting page index + * @end: The final page index (inclusive) * @nr_pages: The maximum number of pages * @pages: Where the resulting pages are placed * - * find_get_pages() will search for and return a group of up to - * @nr_pages pages in the mapping. The pages are placed at @pages. - * find_get_pages() takes a reference against the returned pages. + * find_get_pages_range() will search for and return a group of up to @nr_pages + * pages in the mapping starting at index @start and up to index @end + * (inclusive). The pages are placed at @pages. find_get_pages_range() takes + * a reference against the returned pages. * * The search returns a group of mapping-contiguous pages with ascending * indexes. There may be holes in the indices due to not-present pages. + * We also update @start to index the next page for the traversal. * - * find_get_pages() returns the number of pages which were found. + * find_get_pages_range() returns the number of pages which were found. If this + * number is smaller than @nr_pages, the end of specified range has been + * reached. */ -unsigned find_get_pages(struct address_space *mapping, pgoff_t start, - unsigned int nr_pages, struct page **pages) +unsigned find_get_pages_range(struct address_space *mapping, pgoff_t *start, + pgoff_t end, unsigned int nr_pages, + struct page **pages) { struct radix_tree_iter iter; void **slot; @@ -1592,8 +1612,11 @@ unsigned find_get_pages(struct address_space *mapping, pgoff_t start, return 0; rcu_read_lock(); - radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) { + radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, *start) { struct page *head, *page; + + if (iter.index > end) + break; repeat: page = radix_tree_deref_slot(slot); if (unlikely(!page)) @@ -1629,11 +1652,25 @@ repeat: } pages[ret] = page; - if (++ret == nr_pages) - break; + if (++ret == nr_pages) { + *start = pages[ret - 1]->index + 1; + goto out; + } } + /* + * We come here when there is no page beyond @end. We take care to not + * overflow the index @start as it confuses some of the callers. This + * breaks the iteration when there is page at index -1 but that is + * already broken anyway. + */ + if (end == (pgoff_t)-1) + *start = (pgoff_t)-1; + else + *start = end + 1; +out: rcu_read_unlock(); + return ret; } @@ -1888,9 +1925,8 @@ static void shrink_readahead_size_eio(struct file *filp, } /** - * do_generic_file_read - generic file read routine - * @filp: the file to read - * @ppos: current file position + * generic_file_buffered_read - generic file read routine + * @iocb: the iocb to read * @iter: data destination * @written: already copied * @@ -1900,12 +1936,14 @@ static void shrink_readahead_size_eio(struct file *filp, * This is really ugly. But the goto's actually try to clarify some * of the logic when it comes to error handling etc. */ -static ssize_t do_generic_file_read(struct file *filp, loff_t *ppos, +static ssize_t generic_file_buffered_read(struct kiocb *iocb, struct iov_iter *iter, ssize_t written) { + struct file *filp = iocb->ki_filp; struct address_space *mapping = filp->f_mapping; struct inode *inode = mapping->host; struct file_ra_state *ra = &filp->f_ra; + loff_t *ppos = &iocb->ki_pos; pgoff_t index; pgoff_t last_index; pgoff_t prev_index; @@ -1938,6 +1976,8 @@ find_page: page = find_get_page(mapping, index); if (!page) { + if (iocb->ki_flags & IOCB_NOWAIT) + goto would_block; page_cache_sync_readahead(mapping, ra, filp, index, last_index - index); @@ -1951,6 +1991,11 @@ find_page: index, last_index - index); } if (!PageUptodate(page)) { + if (iocb->ki_flags & IOCB_NOWAIT) { + put_page(page); + goto would_block; + } + /* * See comment in do_read_cache_page on why * wait_on_page_locked is used to avoid unnecessarily @@ -2132,6 +2177,8 @@ no_cached_page: goto readpage; } +would_block: + error = -EAGAIN; out: ra->prev_pos = prev_index; ra->prev_pos <<= PAGE_SHIFT; @@ -2153,14 +2200,14 @@ out: ssize_t generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) { - struct file *file = iocb->ki_filp; - ssize_t retval = 0; size_t count = iov_iter_count(iter); + ssize_t retval = 0; if (!count) goto out; /* skip atime */ if (iocb->ki_flags & IOCB_DIRECT) { + struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; loff_t size; @@ -2201,7 +2248,7 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) goto out; } - retval = do_generic_file_read(file, &iocb->ki_pos, iter, retval); + retval = generic_file_buffered_read(iocb, iter, retval); out: return retval; } @@ -2887,9 +2934,15 @@ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from) * we're writing. Either one is a pretty crazy thing to do, * so we don't support it 100%. If this invalidation * fails, tough, the write still worked... + * + * Most of the time we do not need this since dio_complete() will do + * the invalidation for us. However there are some file systems that + * do not end up with dio_complete() being called, so let's not break + * them by removing it completely */ - invalidate_inode_pages2_range(mapping, - pos >> PAGE_SHIFT, end); + if (mapping->nrpages) + invalidate_inode_pages2_range(mapping, + pos >> PAGE_SHIFT, end); if (written > 0) { pos += written; |