summaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-03-24 18:16:26 +0100
committerLinus Torvalds <torvalds@linux-foundation.org>2011-03-24 18:16:26 +0100
commit6c5103890057b1bb781b26b7aae38d33e4c517d8 (patch)
treee6e57961dcddcb5841acb34956e70b9dc696a880 /mm
parentMerge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/dhowells... (diff)
parentDocumentation/iostats.txt: bit-size reference etc. (diff)
downloadlinux-6c5103890057b1bb781b26b7aae38d33e4c517d8.tar.xz
linux-6c5103890057b1bb781b26b7aae38d33e4c517d8.zip
Merge branch 'for-2.6.39/core' of git://git.kernel.dk/linux-2.6-block
* 'for-2.6.39/core' of git://git.kernel.dk/linux-2.6-block: (65 commits) Documentation/iostats.txt: bit-size reference etc. cfq-iosched: removing unnecessary think time checking cfq-iosched: Don't clear queue stats when preempt. blk-throttle: Reset group slice when limits are changed blk-cgroup: Only give unaccounted_time under debug cfq-iosched: Don't set active queue in preempt block: fix non-atomic access to genhd inflight structures block: attempt to merge with existing requests on plug flush block: NULL dereference on error path in __blkdev_get() cfq-iosched: Don't update group weights when on service tree fs: assign sb->s_bdi to default_backing_dev_info if the bdi is going away block: Require subsystems to explicitly allocate bio_set integrity mempool jbd2: finish conversion from WRITE_SYNC_PLUG to WRITE_SYNC and explicit plugging jbd: finish conversion from WRITE_SYNC_PLUG to WRITE_SYNC and explicit plugging fs: make fsync_buffers_list() plug mm: make generic_writepages() use plugging blk-cgroup: Add unaccounted time to timeslice_used. block: fixup plugging stubs for !CONFIG_BLOCK block: remove obsolete comments for blkdev_issue_zeroout. blktrace: Use rq->cmd_flags directly in blk_add_trace_rq. ... Fix up conflicts in fs/{aio.c,super.c}
Diffstat (limited to 'mm')
-rw-r--r--mm/backing-dev.c8
-rw-r--r--mm/filemap.c74
-rw-r--r--mm/memory-failure.c8
-rw-r--r--mm/nommu.c4
-rw-r--r--mm/page-writeback.c10
-rw-r--r--mm/page_io.c2
-rw-r--r--mm/readahead.c18
-rw-r--r--mm/shmem.c1
-rw-r--r--mm/swap_state.c5
-rw-r--r--mm/swapfile.c37
-rw-r--r--mm/vmscan.c2
11 files changed, 35 insertions, 134 deletions
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 027100d30227..8fe9d3407921 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -14,17 +14,11 @@
static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
-void default_unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
-{
-}
-EXPORT_SYMBOL(default_unplug_io_fn);
-
struct backing_dev_info default_backing_dev_info = {
.name = "default",
.ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE,
.state = 0,
.capabilities = BDI_CAP_MAP_COPY,
- .unplug_io_fn = default_unplug_io_fn,
};
EXPORT_SYMBOL_GPL(default_backing_dev_info);
@@ -604,7 +598,7 @@ static void bdi_prune_sb(struct backing_dev_info *bdi)
spin_lock(&sb_lock);
list_for_each_entry(sb, &super_blocks, s_list) {
if (sb->s_bdi == bdi)
- sb->s_bdi = NULL;
+ sb->s_bdi = &default_backing_dev_info;
}
spin_unlock(&sb_lock);
}
diff --git a/mm/filemap.c b/mm/filemap.c
index f807afda86f2..04d1992fd86b 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -164,45 +164,15 @@ void delete_from_page_cache(struct page *page)
}
EXPORT_SYMBOL(delete_from_page_cache);
-static int sync_page(void *word)
+static int sleep_on_page(void *word)
{
- struct address_space *mapping;
- struct page *page;
-
- page = container_of((unsigned long *)word, struct page, flags);
-
- /*
- * page_mapping() is being called without PG_locked held.
- * Some knowledge of the state and use of the page is used to
- * reduce the requirements down to a memory barrier.
- * The danger here is of a stale page_mapping() return value
- * indicating a struct address_space different from the one it's
- * associated with when it is associated with one.
- * After smp_mb(), it's either the correct page_mapping() for
- * the page, or an old page_mapping() and the page's own
- * page_mapping() has gone NULL.
- * The ->sync_page() address_space operation must tolerate
- * page_mapping() going NULL. By an amazing coincidence,
- * this comes about because none of the users of the page
- * in the ->sync_page() methods make essential use of the
- * page_mapping(), merely passing the page down to the backing
- * device's unplug functions when it's non-NULL, which in turn
- * ignore it for all cases but swap, where only page_private(page) is
- * of interest. When page_mapping() does go NULL, the entire
- * call stack gracefully ignores the page and returns.
- * -- wli
- */
- smp_mb();
- mapping = page_mapping(page);
- if (mapping && mapping->a_ops && mapping->a_ops->sync_page)
- mapping->a_ops->sync_page(page);
io_schedule();
return 0;
}
-static int sync_page_killable(void *word)
+static int sleep_on_page_killable(void *word)
{
- sync_page(word);
+ sleep_on_page(word);
return fatal_signal_pending(current) ? -EINTR : 0;
}
@@ -558,12 +528,6 @@ struct page *__page_cache_alloc(gfp_t gfp)
EXPORT_SYMBOL(__page_cache_alloc);
#endif
-static int __sleep_on_page_lock(void *word)
-{
- io_schedule();
- return 0;
-}
-
/*
* In order to wait for pages to become available there must be
* waitqueues associated with pages. By using a hash table of
@@ -591,7 +555,7 @@ void wait_on_page_bit(struct page *page, int bit_nr)
DEFINE_WAIT_BIT(wait, &page->flags, bit_nr);
if (test_bit(bit_nr, &page->flags))
- __wait_on_bit(page_waitqueue(page), &wait, sync_page,
+ __wait_on_bit(page_waitqueue(page), &wait, sleep_on_page,
TASK_UNINTERRUPTIBLE);
}
EXPORT_SYMBOL(wait_on_page_bit);
@@ -655,17 +619,12 @@ EXPORT_SYMBOL(end_page_writeback);
/**
* __lock_page - get a lock on the page, assuming we need to sleep to get it
* @page: the page to lock
- *
- * Ugly. Running sync_page() in state TASK_UNINTERRUPTIBLE is scary. If some
- * random driver's requestfn sets TASK_RUNNING, we could busywait. However
- * chances are that on the second loop, the block layer's plug list is empty,
- * so sync_page() will then return in state TASK_UNINTERRUPTIBLE.
*/
void __lock_page(struct page *page)
{
DEFINE_WAIT_BIT(wait, &page->flags, PG_locked);
- __wait_on_bit_lock(page_waitqueue(page), &wait, sync_page,
+ __wait_on_bit_lock(page_waitqueue(page), &wait, sleep_on_page,
TASK_UNINTERRUPTIBLE);
}
EXPORT_SYMBOL(__lock_page);
@@ -675,24 +634,10 @@ int __lock_page_killable(struct page *page)
DEFINE_WAIT_BIT(wait, &page->flags, PG_locked);
return __wait_on_bit_lock(page_waitqueue(page), &wait,
- sync_page_killable, TASK_KILLABLE);
+ sleep_on_page_killable, TASK_KILLABLE);
}
EXPORT_SYMBOL_GPL(__lock_page_killable);
-/**
- * __lock_page_nosync - get a lock on the page, without calling sync_page()
- * @page: the page to lock
- *
- * Variant of lock_page that does not require the caller to hold a reference
- * on the page's mapping.
- */
-void __lock_page_nosync(struct page *page)
-{
- DEFINE_WAIT_BIT(wait, &page->flags, PG_locked);
- __wait_on_bit_lock(page_waitqueue(page), &wait, __sleep_on_page_lock,
- TASK_UNINTERRUPTIBLE);
-}
-
int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
unsigned int flags)
{
@@ -1407,12 +1352,15 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
unsigned long seg = 0;
size_t count;
loff_t *ppos = &iocb->ki_pos;
+ struct blk_plug plug;
count = 0;
retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE);
if (retval)
return retval;
+ blk_start_plug(&plug);
+
/* coalesce the iovecs and go direct-to-BIO for O_DIRECT */
if (filp->f_flags & O_DIRECT) {
loff_t size;
@@ -1485,6 +1433,7 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
break;
}
out:
+ blk_finish_plug(&plug);
return retval;
}
EXPORT_SYMBOL(generic_file_aio_read);
@@ -2596,11 +2545,13 @@ ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
{
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
+ struct blk_plug plug;
ssize_t ret;
BUG_ON(iocb->ki_pos != pos);
mutex_lock(&inode->i_mutex);
+ blk_start_plug(&plug);
ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
mutex_unlock(&inode->i_mutex);
@@ -2611,6 +2562,7 @@ ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
if (err < 0 && ret > 0)
ret = err;
}
+ blk_finish_plug(&plug);
return ret;
}
EXPORT_SYMBOL(generic_file_aio_write);
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index e0af336530c6..37feb9fec228 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -945,7 +945,7 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn,
collect_procs(ppage, &tokill);
if (hpage != ppage)
- lock_page_nosync(ppage);
+ lock_page(ppage);
ret = try_to_unmap(ppage, ttu);
if (ret != SWAP_SUCCESS)
@@ -1038,7 +1038,7 @@ int __memory_failure(unsigned long pfn, int trapno, int flags)
* Check "just unpoisoned", "filter hit", and
* "race with other subpage."
*/
- lock_page_nosync(hpage);
+ lock_page(hpage);
if (!PageHWPoison(hpage)
|| (hwpoison_filter(p) && TestClearPageHWPoison(p))
|| (p != hpage && TestSetPageHWPoison(hpage))) {
@@ -1088,7 +1088,7 @@ int __memory_failure(unsigned long pfn, int trapno, int flags)
* It's very difficult to mess with pages currently under IO
* and in many cases impossible, so we just avoid it here.
*/
- lock_page_nosync(hpage);
+ lock_page(hpage);
/*
* unpoison always clear PG_hwpoison inside page lock
@@ -1231,7 +1231,7 @@ int unpoison_memory(unsigned long pfn)
return 0;
}
- lock_page_nosync(page);
+ lock_page(page);
/*
* This test is racy because PG_hwpoison is set outside of page lock.
* That's acceptable because that won't trigger kernel panic. Instead,
diff --git a/mm/nommu.c b/mm/nommu.c
index e629143f9440..cb86e7d5e7f5 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1842,10 +1842,6 @@ int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
}
EXPORT_SYMBOL(remap_vmalloc_range);
-void swap_unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
-{
-}
-
unsigned long arch_get_unmapped_area(struct file *file, unsigned long addr,
unsigned long len, unsigned long pgoff, unsigned long flags)
{
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 632b46479c94..31f698862420 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -1040,11 +1040,17 @@ static int __writepage(struct page *page, struct writeback_control *wbc,
int generic_writepages(struct address_space *mapping,
struct writeback_control *wbc)
{
+ struct blk_plug plug;
+ int ret;
+
/* deal with chardevs and other special file */
if (!mapping->a_ops->writepage)
return 0;
- return write_cache_pages(mapping, wbc, __writepage, mapping);
+ blk_start_plug(&plug);
+ ret = write_cache_pages(mapping, wbc, __writepage, mapping);
+ blk_finish_plug(&plug);
+ return ret;
}
EXPORT_SYMBOL(generic_writepages);
@@ -1251,7 +1257,7 @@ int set_page_dirty_lock(struct page *page)
{
int ret;
- lock_page_nosync(page);
+ lock_page(page);
ret = set_page_dirty(page);
unlock_page(page);
return ret;
diff --git a/mm/page_io.c b/mm/page_io.c
index 2dee975bf469..dc76b4d0611e 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -106,7 +106,7 @@ int swap_writepage(struct page *page, struct writeback_control *wbc)
goto out;
}
if (wbc->sync_mode == WB_SYNC_ALL)
- rw |= REQ_SYNC | REQ_UNPLUG;
+ rw |= REQ_SYNC;
count_vm_event(PSWPOUT);
set_page_writeback(page);
unlock_page(page);
diff --git a/mm/readahead.c b/mm/readahead.c
index 77506a291a2d..2c0cc489e288 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -109,9 +109,12 @@ EXPORT_SYMBOL(read_cache_pages);
static int read_pages(struct address_space *mapping, struct file *filp,
struct list_head *pages, unsigned nr_pages)
{
+ struct blk_plug plug;
unsigned page_idx;
int ret;
+ blk_start_plug(&plug);
+
if (mapping->a_ops->readpages) {
ret = mapping->a_ops->readpages(filp, mapping, pages, nr_pages);
/* Clean up the remaining pages */
@@ -129,7 +132,10 @@ static int read_pages(struct address_space *mapping, struct file *filp,
page_cache_release(page);
}
ret = 0;
+
out:
+ blk_finish_plug(&plug);
+
return ret;
}
@@ -554,17 +560,5 @@ page_cache_async_readahead(struct address_space *mapping,
/* do read-ahead */
ondemand_readahead(mapping, ra, filp, true, offset, req_size);
-
-#ifdef CONFIG_BLOCK
- /*
- * Normally the current page is !uptodate and lock_page() will be
- * immediately called to implicitly unplug the device. However this
- * is not always true for RAID conifgurations, where data arrives
- * not strictly in their submission order. In this case we need to
- * explicitly kick off the IO.
- */
- if (PageUptodate(page))
- blk_run_backing_dev(mapping->backing_dev_info, NULL);
-#endif
}
EXPORT_SYMBOL_GPL(page_cache_async_readahead);
diff --git a/mm/shmem.c b/mm/shmem.c
index 91ce9a1024d7..58da7c150ba6 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -224,7 +224,6 @@ static const struct vm_operations_struct shmem_vm_ops;
static struct backing_dev_info shmem_backing_dev_info __read_mostly = {
.ra_pages = 0, /* No readahead */
.capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK | BDI_CAP_SWAP_BACKED,
- .unplug_io_fn = default_unplug_io_fn,
};
static LIST_HEAD(shmem_swaplist);
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 5c8cfabbc9bc..46680461785b 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -24,12 +24,10 @@
/*
* swapper_space is a fiction, retained to simplify the path through
- * vmscan's shrink_page_list, to make sync_page look nicer, and to allow
- * future use of radix_tree tags in the swap cache.
+ * vmscan's shrink_page_list.
*/
static const struct address_space_operations swap_aops = {
.writepage = swap_writepage,
- .sync_page = block_sync_page,
.set_page_dirty = __set_page_dirty_nobuffers,
.migratepage = migrate_page,
};
@@ -37,7 +35,6 @@ static const struct address_space_operations swap_aops = {
static struct backing_dev_info swap_backing_dev_info = {
.name = "swap",
.capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK | BDI_CAP_SWAP_BACKED,
- .unplug_io_fn = swap_unplug_io_fn,
};
struct address_space swapper_space = {
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 039e61677635..8c6b3ce38f09 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -95,39 +95,6 @@ __try_to_reclaim_swap(struct swap_info_struct *si, unsigned long offset)
}
/*
- * We need this because the bdev->unplug_fn can sleep and we cannot
- * hold swap_lock while calling the unplug_fn. And swap_lock
- * cannot be turned into a mutex.
- */
-static DECLARE_RWSEM(swap_unplug_sem);
-
-void swap_unplug_io_fn(struct backing_dev_info *unused_bdi, struct page *page)
-{
- swp_entry_t entry;
-
- down_read(&swap_unplug_sem);
- entry.val = page_private(page);
- if (PageSwapCache(page)) {
- struct block_device *bdev = swap_info[swp_type(entry)]->bdev;
- struct backing_dev_info *bdi;
-
- /*
- * If the page is removed from swapcache from under us (with a
- * racy try_to_unuse/swapoff) we need an additional reference
- * count to avoid reading garbage from page_private(page) above.
- * If the WARN_ON triggers during a swapoff it maybe the race
- * condition and it's harmless. However if it triggers without
- * swapoff it signals a problem.
- */
- WARN_ON(page_count(page) <= 1);
-
- bdi = bdev->bd_inode->i_mapping->backing_dev_info;
- blk_run_backing_dev(bdi, page);
- }
- up_read(&swap_unplug_sem);
-}
-
-/*
* swapon tell device that all the old swap contents can be discarded,
* to allow the swap device to optimize its wear-levelling.
*/
@@ -1662,10 +1629,6 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
goto out_dput;
}
- /* wait for any unplug function to finish */
- down_write(&swap_unplug_sem);
- up_write(&swap_unplug_sem);
-
destroy_swap_extents(p);
if (p->flags & SWP_CONTINUED)
free_swap_count_continuations(p);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 060e4c191403..f73b8657c2d0 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -358,7 +358,7 @@ static int may_write_to_queue(struct backing_dev_info *bdi,
static void handle_write_error(struct address_space *mapping,
struct page *page, int error)
{
- lock_page_nosync(page);
+ lock_page(page);
if (page_mapping(page) == mapping)
mapping_set_error(mapping, error);
unlock_page(page);