summaryrefslogtreecommitdiffstats
path: root/mm/hugetlb.c
diff options
context:
space:
mode:
authorMuchun Song <songmuchun@bytedance.com>2021-07-01 03:47:13 +0200
committerLinus Torvalds <torvalds@linux-foundation.org>2021-07-01 05:47:25 +0200
commitf41f2ed43ca5258d70d53290d1951a21621f95c8 (patch)
tree482b563f648b43445eecd0a7e8c4f145e39d21e8 /mm/hugetlb.c
parentmm: hugetlb: gather discrete indexes of tail page (diff)
downloadlinux-f41f2ed43ca5258d70d53290d1951a21621f95c8.tar.xz
linux-f41f2ed43ca5258d70d53290d1951a21621f95c8.zip
mm: hugetlb: free the vmemmap pages associated with each HugeTLB page
Every HugeTLB has more than one struct page structure. We __know__ that we only use the first 4 (__NR_USED_SUBPAGE) struct page structures to store metadata associated with each HugeTLB. There are a lot of struct page structures associated with each HugeTLB page. For tail pages, the value of compound_head is the same. So we can reuse first page of tail page structures. We map the virtual addresses of the remaining pages of tail page structures to the first tail page struct, and then free these page frames. Therefore, we need to reserve two pages as vmemmap areas. When we allocate a HugeTLB page from the buddy, we can free some vmemmap pages associated with each HugeTLB page. It is more appropriate to do it in the prep_new_huge_page(). The free_vmemmap_pages_per_hpage(), which indicates how many vmemmap pages associated with a HugeTLB page can be freed, returns zero for now, which means the feature is disabled. We will enable it once all the infrastructure is there. [willy@infradead.org: fix documentation warning] Link: https://lkml.kernel.org/r/20210615200242.1716568-5-willy@infradead.org Link: https://lkml.kernel.org/r/20210510030027.56044-5-songmuchun@bytedance.com Signed-off-by: Muchun Song <songmuchun@bytedance.com> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org> Reviewed-by: Oscar Salvador <osalvador@suse.de> Tested-by: Chen Huang <chenhuang5@huawei.com> Tested-by: Bodeddula Balasubramaniam <bodeddub@amazon.com> Acked-by: Michal Hocko <mhocko@suse.com> Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com> Cc: Alexander Viro <viro@zeniv.linux.org.uk> Cc: Andy Lutomirski <luto@kernel.org> Cc: Anshuman Khandual <anshuman.khandual@arm.com> Cc: Balbir Singh <bsingharora@gmail.com> Cc: Barry Song <song.bao.hua@hisilicon.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: David Hildenbrand <david@redhat.com> Cc: David Rientjes <rientjes@google.com> Cc: HORIGUCHI NAOYA <naoya.horiguchi@nec.com> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Joao Martins <joao.m.martins@oracle.com> Cc: Joerg Roedel <jroedel@suse.de> Cc: Jonathan Corbet <corbet@lwn.net> Cc: Matthew Wilcox <willy@infradead.org> Cc: Miaohe Lin <linmiaohe@huawei.com> Cc: Mina Almasry <almasrymina@google.com> Cc: Oliver Neukum <oneukum@suse.com> Cc: Paul E. McKenney <paulmck@kernel.org> Cc: Pawan Gupta <pawan.kumar.gupta@linux.intel.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Randy Dunlap <rdunlap@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Xiongchun Duan <duanxiongchun@bytedance.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/hugetlb.c')
-rw-r--r--mm/hugetlb.c22
1 files changed, 10 insertions, 12 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 103f1187043f..5f5493f0f003 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -41,6 +41,7 @@
#include <linux/node.h>
#include <linux/page_owner.h>
#include "internal.h"
+#include "hugetlb_vmemmap.h"
int hugetlb_max_hstate __read_mostly;
unsigned int default_hstate_idx;
@@ -1493,8 +1494,9 @@ static void __prep_account_new_huge_page(struct hstate *h, int nid)
h->nr_huge_pages_node[nid]++;
}
-static void __prep_new_huge_page(struct page *page)
+static void __prep_new_huge_page(struct hstate *h, struct page *page)
{
+ free_huge_page_vmemmap(h, page);
INIT_LIST_HEAD(&page->lru);
set_compound_page_dtor(page, HUGETLB_PAGE_DTOR);
hugetlb_set_page_subpool(page, NULL);
@@ -1504,7 +1506,7 @@ static void __prep_new_huge_page(struct page *page)
static void prep_new_huge_page(struct hstate *h, struct page *page, int nid)
{
- __prep_new_huge_page(page);
+ __prep_new_huge_page(h, page);
spin_lock_irq(&hugetlb_lock);
__prep_account_new_huge_page(h, nid);
spin_unlock_irq(&hugetlb_lock);
@@ -2351,14 +2353,15 @@ static int alloc_and_dissolve_huge_page(struct hstate *h, struct page *old_page,
/*
* Before dissolving the page, we need to allocate a new one for the
- * pool to remain stable. Using alloc_buddy_huge_page() allows us to
- * not having to deal with prep_new_huge_page() and avoids dealing of any
- * counters. This simplifies and let us do the whole thing under the
- * lock.
+ * pool to remain stable. Here, we allocate the page and 'prep' it
+ * by doing everything but actually updating counters and adding to
+ * the pool. This simplifies and let us do most of the processing
+ * under the lock.
*/
new_page = alloc_buddy_huge_page(h, gfp_mask, nid, NULL, NULL);
if (!new_page)
return -ENOMEM;
+ __prep_new_huge_page(h, new_page);
retry:
spin_lock_irq(&hugetlb_lock);
@@ -2397,14 +2400,9 @@ retry:
remove_hugetlb_page(h, old_page, false);
/*
- * new_page needs to be initialized with the standard hugetlb
- * state. This is normally done by prep_new_huge_page() but
- * that takes hugetlb_lock which is already held so we need to
- * open code it here.
* Reference count trick is needed because allocator gives us
* referenced page but the pool requires pages with 0 refcount.
*/
- __prep_new_huge_page(new_page);
__prep_account_new_huge_page(h, nid);
page_ref_dec(new_page);
enqueue_huge_page(h, new_page);
@@ -2420,7 +2418,7 @@ retry:
free_new:
spin_unlock_irq(&hugetlb_lock);
- __free_pages(new_page, huge_page_order(h));
+ update_and_free_page(h, new_page);
return ret;
}