summaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorJan Kara <jack@suse.cz>2017-11-16 02:37:33 +0100
committerLinus Torvalds <torvalds@linux-foundation.org>2017-11-16 03:21:06 +0100
commitaa65c29ce1b6e1990cd2c7d8004bbea7ff3aff38 (patch)
treef3a3fe2b5a6430f04e721c4c35f9209fa6482e32 /mm
parentmm: factor out checks and accounting from __delete_from_page_cache() (diff)
downloadlinux-aa65c29ce1b6e1990cd2c7d8004bbea7ff3aff38.tar.xz
linux-aa65c29ce1b6e1990cd2c7d8004bbea7ff3aff38.zip
mm: batch radix tree operations when truncating pages
Currently we remove pages from the radix tree one by one. To speed up page cache truncation, lock several pages at once and free them in one go. This allows us to batch radix tree operations in a more efficient way and also save round-trips on mapping->tree_lock. As a result we gain about 20% speed improvement in page cache truncation. Data from a simple benchmark timing 10000 truncates of 1024 pages (on ext4 on ramdisk but the filesystem is barely visible in the profiles). The range shows 1% and 95% percentiles of the measured times: 4.14-rc2 4.14-rc2 + batched truncation 248-256 209-219 249-258 209-217 248-255 211-239 248-255 209-217 247-256 210-218 [jack@suse.cz: convert delete_from_page_cache_batch() to pagevec] Link: http://lkml.kernel.org/r/20171018111648.13714-1-jack@suse.cz [akpm@linux-foundation.org: move struct pagevec forward declaration to top-of-file] Link: http://lkml.kernel.org/r/20171010151937.26984-8-jack@suse.cz Signed-off-by: Jan Kara <jack@suse.cz> Acked-by: Mel Gorman <mgorman@suse.de> Reviewed-by: Andi Kleen <ak@linux.intel.com> Cc: Dave Chinner <david@fromorbit.com> Cc: Dave Hansen <dave.hansen@intel.com> Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/filemap.c83
-rw-r--r--mm/truncate.c20
2 files changed, 101 insertions, 2 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index a11b42189436..a470dd8cd05b 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -304,6 +304,89 @@ void delete_from_page_cache(struct page *page)
}
EXPORT_SYMBOL(delete_from_page_cache);
+/*
+ * page_cache_tree_delete_batch - delete several pages from page cache
+ * @mapping: the mapping to which pages belong
+ * @pvec: pagevec with pages to delete
+ *
+ * The function walks over mapping->page_tree and removes pages passed in @pvec
+ * from the radix tree. The function expects @pvec to be sorted by page index.
+ * It tolerates holes in @pvec (radix tree entries at those indices are not
+ * modified). The function expects only THP head pages to be present in the
+ * @pvec and takes care to delete all corresponding tail pages from the radix
+ * tree as well.
+ *
+ * The function expects mapping->tree_lock to be held.
+ */
+static void
+page_cache_tree_delete_batch(struct address_space *mapping,
+ struct pagevec *pvec)
+{
+ struct radix_tree_iter iter;
+ void **slot;
+ int total_pages = 0;
+ int i = 0, tail_pages = 0;
+ struct page *page;
+ pgoff_t start;
+
+ start = pvec->pages[0]->index;
+ radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
+ if (i >= pagevec_count(pvec) && !tail_pages)
+ break;
+ page = radix_tree_deref_slot_protected(slot,
+ &mapping->tree_lock);
+ if (radix_tree_exceptional_entry(page))
+ continue;
+ if (!tail_pages) {
+ /*
+ * Some page got inserted in our range? Skip it. We
+ * have our pages locked so they are protected from
+ * being removed.
+ */
+ if (page != pvec->pages[i])
+ continue;
+ WARN_ON_ONCE(!PageLocked(page));
+ if (PageTransHuge(page) && !PageHuge(page))
+ tail_pages = HPAGE_PMD_NR - 1;
+ page->mapping = NULL;
+ /*
+ * Leave page->index set: truncation lookup relies
+ * upon it
+ */
+ i++;
+ } else {
+ tail_pages--;
+ }
+ radix_tree_clear_tags(&mapping->page_tree, iter.node, slot);
+ __radix_tree_replace(&mapping->page_tree, iter.node, slot, NULL,
+ workingset_update_node, mapping);
+ total_pages++;
+ }
+ mapping->nrpages -= total_pages;
+}
+
+void delete_from_page_cache_batch(struct address_space *mapping,
+ struct pagevec *pvec)
+{
+ int i;
+ unsigned long flags;
+
+ if (!pagevec_count(pvec))
+ return;
+
+ spin_lock_irqsave(&mapping->tree_lock, flags);
+ for (i = 0; i < pagevec_count(pvec); i++) {
+ trace_mm_filemap_delete_from_page_cache(pvec->pages[i]);
+
+ unaccount_page_cache_page(mapping, pvec->pages[i]);
+ }
+ page_cache_tree_delete_batch(mapping, pvec);
+ spin_unlock_irqrestore(&mapping->tree_lock, flags);
+
+ for (i = 0; i < pagevec_count(pvec); i++)
+ page_cache_free_page(mapping, pvec->pages[i]);
+}
+
int filemap_check_errors(struct address_space *mapping)
{
int ret = 0;
diff --git a/mm/truncate.c b/mm/truncate.c
index 383a530d511e..4a39a3150ee2 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -294,6 +294,14 @@ void truncate_inode_pages_range(struct address_space *mapping,
while (index < end && pagevec_lookup_entries(&pvec, mapping, index,
min(end - index, (pgoff_t)PAGEVEC_SIZE),
indices)) {
+ /*
+ * Pagevec array has exceptional entries and we may also fail
+ * to lock some pages. So we store pages that can be deleted
+ * in a new pagevec.
+ */
+ struct pagevec locked_pvec;
+
+ pagevec_init(&locked_pvec, 0);
for (i = 0; i < pagevec_count(&pvec); i++) {
struct page *page = pvec.pages[i];
@@ -315,9 +323,17 @@ void truncate_inode_pages_range(struct address_space *mapping,
unlock_page(page);
continue;
}
- truncate_inode_page(mapping, page);
- unlock_page(page);
+ if (page->mapping != mapping) {
+ unlock_page(page);
+ continue;
+ }
+ pagevec_add(&locked_pvec, page);
}
+ for (i = 0; i < pagevec_count(&locked_pvec); i++)
+ truncate_cleanup_page(mapping, locked_pvec.pages[i]);
+ delete_from_page_cache_batch(mapping, &locked_pvec);
+ for (i = 0; i < pagevec_count(&locked_pvec); i++)
+ unlock_page(locked_pvec.pages[i]);
pagevec_remove_exceptionals(&pvec);
pagevec_release(&pvec);
cond_resched();