summaryrefslogtreecommitdiffstats
path: root/mm/memory-failure.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-05-26 21:32:41 +0200
committerLinus Torvalds <torvalds@linux-foundation.org>2022-05-26 21:32:41 +0200
commit98931dd95fd489fcbfa97da563505a6f071d7c77 (patch)
tree44683fc4a92efa614acdca2742a7ff19d26da1e3 /mm/memory-failure.c
parentMerge tag 'kbuild-v5.19' of git://git.kernel.org/pub/scm/linux/kernel/git/mas... (diff)
parentmm: kfence: use PAGE_ALIGNED helper (diff)
downloadlinux-98931dd95fd489fcbfa97da563505a6f071d7c77.tar.xz
linux-98931dd95fd489fcbfa97da563505a6f071d7c77.zip
Merge tag 'mm-stable-2022-05-25' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull MM updates from Andrew Morton: "Almost all of MM here. A few things are still getting finished off, reviewed, etc. - Yang Shi has improved the behaviour of khugepaged collapsing of readonly file-backed transparent hugepages. - Johannes Weiner has arranged for zswap memory use to be tracked and managed on a per-cgroup basis. - Munchun Song adds a /proc knob ("hugetlb_optimize_vmemmap") for runtime enablement of the recent huge page vmemmap optimization feature. - Baolin Wang contributes a series to fix some issues around hugetlb pagetable invalidation. - Zhenwei Pi has fixed some interactions between hwpoisoned pages and virtualization. - Tong Tiangen has enabled the use of the presently x86-only page_table_check debugging feature on arm64 and riscv. - David Vernet has done some fixup work on the memcg selftests. - Peter Xu has taught userfaultfd to handle write protection faults against shmem- and hugetlbfs-backed files. - More DAMON development from SeongJae Park - adding online tuning of the feature and support for monitoring of fixed virtual address ranges. Also easier discovery of which monitoring operations are available. - Nadav Amit has done some optimization of TLB flushing during mprotect(). - Neil Brown continues to labor away at improving our swap-over-NFS support. - David Hildenbrand has some fixes to anon page COWing versus get_user_pages(). - Peng Liu fixed some errors in the core hugetlb code. - Joao Martins has reduced the amount of memory consumed by device-dax's compound devmaps. - Some cleanups of the arch-specific pagemap code from Anshuman Khandual. - Muchun Song has found and fixed some errors in the TLB flushing of transparent hugepages. - Roman Gushchin has done more work on the memcg selftests. ... and, of course, many smaller fixes and cleanups. Notably, the customary million cleanup serieses from Miaohe Lin" * tag 'mm-stable-2022-05-25' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (381 commits) mm: kfence: use PAGE_ALIGNED helper selftests: vm: add the "settings" file with timeout variable selftests: vm: add "test_hmm.sh" to TEST_FILES selftests: vm: check numa_available() before operating "merge_across_nodes" in ksm_tests selftests: vm: add migration to the .gitignore selftests/vm/pkeys: fix typo in comment ksm: fix typo in comment selftests: vm: add process_mrelease tests Revert "mm/vmscan: never demote for memcg reclaim" mm/kfence: print disabling or re-enabling message include/trace/events/percpu.h: cleanup for "percpu: improve percpu_alloc_percpu event trace" include/trace/events/mmflags.h: cleanup for "tracing: incorrect gfp_t conversion" mm: fix a potential infinite loop in start_isolate_page_range() MAINTAINERS: add Muchun as co-maintainer for HugeTLB zram: fix Kconfig dependency warning mm/shmem: fix shmem folio swapoff hang cgroup: fix an error handling path in alloc_pagecache_max_30M() mm: damon: use HPAGE_PMD_SIZE tracing: incorrect isolate_mote_t cast in mm_vmscan_lru_isolate nodemask.h: fix compilation error with GCC12 ...
Diffstat (limited to 'mm/memory-failure.c')
-rw-r--r--mm/memory-failure.c126
1 files changed, 63 insertions, 63 deletions
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index d4a4adcca01f..b85661cbdc4a 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -59,6 +59,7 @@
#include <linux/page-isolation.h>
#include <linux/pagewalk.h>
#include <linux/shmem_fs.h>
+#include "swap.h"
#include "internal.h"
#include "ras/ras_event.h"
@@ -484,7 +485,7 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill,
struct anon_vma *av;
pgoff_t pgoff;
- av = folio_lock_anon_vma_read(folio);
+ av = folio_lock_anon_vma_read(folio, NULL);
if (av == NULL) /* Not actually mapped anymore */
return;
@@ -622,7 +623,7 @@ static int check_hwpoisoned_pmd_entry(pmd_t *pmdp, unsigned long addr,
static int hwpoison_pte_range(pmd_t *pmdp, unsigned long addr,
unsigned long end, struct mm_walk *walk)
{
- struct hwp_walk *hwp = (struct hwp_walk *)walk->private;
+ struct hwp_walk *hwp = walk->private;
int ret = 0;
pte_t *ptep, *mapped_pte;
spinlock_t *ptl;
@@ -656,7 +657,7 @@ static int hwpoison_hugetlb_range(pte_t *ptep, unsigned long hmask,
unsigned long addr, unsigned long end,
struct mm_walk *walk)
{
- struct hwp_walk *hwp = (struct hwp_walk *)walk->private;
+ struct hwp_walk *hwp = walk->private;
pte_t pte = huge_ptep_get(ptep);
struct hstate *h = hstate_vma(walk->vma);
@@ -733,7 +734,6 @@ static const char * const action_page_types[] = {
[MF_MSG_BUDDY] = "free buddy page",
[MF_MSG_DAX] = "dax page",
[MF_MSG_UNSPLIT_THP] = "unsplit thp",
- [MF_MSG_DIFFERENT_PAGE_SIZE] = "different page size",
[MF_MSG_UNKNOWN] = "unknown page",
};
@@ -1041,12 +1041,11 @@ static int me_huge_page(struct page_state *ps, struct page *p)
res = MF_FAILED;
unlock_page(hpage);
/*
- * migration entry prevents later access on error anonymous
- * hugepage, so we can free and dissolve it into buddy to
- * save healthy subpages.
+ * migration entry prevents later access on error hugepage,
+ * so we can free and dissolve it into buddy to save healthy
+ * subpages.
*/
- if (PageAnon(hpage))
- put_page(hpage);
+ put_page(hpage);
if (__page_handle_poison(p)) {
page_ref_inc(p);
res = MF_RECOVERED;
@@ -1133,6 +1132,7 @@ static void action_result(unsigned long pfn, enum mf_action_page_type type,
{
trace_memory_failure_event(pfn, type, result);
+ num_poisoned_pages_inc();
pr_err("Memory failure: %#lx: recovery action for %s: %s\n",
pfn, action_page_types[type], action_name[result]);
}
@@ -1179,13 +1179,11 @@ void ClearPageHWPoisonTakenOff(struct page *page)
*/
static inline bool HWPoisonHandlable(struct page *page, unsigned long flags)
{
- bool movable = false;
-
- /* Soft offline could mirgate non-LRU movable pages */
+ /* Soft offline could migrate non-LRU movable pages */
if ((flags & MF_SOFT_OFFLINE) && __PageMovable(page))
- movable = true;
+ return true;
- return movable || PageLRU(page) || is_free_buddy_page(page);
+ return PageLRU(page) || is_free_buddy_page(page);
}
static int __get_hwpoison_page(struct page *page, unsigned long flags)
@@ -1521,7 +1519,9 @@ int __get_huge_page_for_hwpoison(unsigned long pfn, int flags)
if (flags & MF_COUNT_INCREASED) {
ret = 1;
count_increased = true;
- } else if (HPageFreed(head) || HPageMigratable(head)) {
+ } else if (HPageFreed(head)) {
+ ret = 0;
+ } else if (HPageMigratable(head)) {
ret = get_page_unless_zero(head);
if (ret)
count_increased = true;
@@ -1588,8 +1588,6 @@ retry:
goto out;
}
- num_poisoned_pages_inc();
-
/*
* Handling free hugepage. The possible race with hugepage allocation
* or demotion can be prevented by PageHWPoison flag.
@@ -1605,16 +1603,6 @@ retry:
return res == MF_RECOVERED ? 0 : -EBUSY;
}
- /*
- * The page could have changed compound pages due to race window.
- * If this happens just bail out.
- */
- if (!PageHuge(p) || compound_head(p) != head) {
- action_result(pfn, MF_MSG_DIFFERENT_PAGE_SIZE, MF_IGNORED);
- res = -EBUSY;
- goto out;
- }
-
page_flags = head->flags;
/*
@@ -1762,7 +1750,7 @@ static DEFINE_MUTEX(mf_mutex);
* enabled and no spinlocks hold.
*
* Return: 0 for successfully handled the memory error,
- * -EOPNOTSUPP for memory_filter() filtered the error event,
+ * -EOPNOTSUPP for hwpoison_filter() filtered the error event,
* < 0(except -EOPNOTSUPP) on failure.
*/
int memory_failure(unsigned long pfn, int flags)
@@ -1811,11 +1799,12 @@ try_again:
res = -EHWPOISON;
if (flags & MF_ACTION_REQUIRED)
res = kill_accessing_process(current, pfn, flags);
+ if (flags & MF_COUNT_INCREASED)
+ put_page(p);
goto unlock_mutex;
}
hpage = compound_head(p);
- num_poisoned_pages_inc();
/*
* We need/can do nothing about count=0 pages.
@@ -1839,7 +1828,6 @@ try_again:
/* We lost the race, try again */
if (retry) {
ClearPageHWPoison(p);
- num_poisoned_pages_dec();
retry = false;
goto try_again;
}
@@ -1902,8 +1890,7 @@ try_again:
*/
if (PageCompound(p)) {
if (retry) {
- if (TestClearPageHWPoison(p))
- num_poisoned_pages_dec();
+ ClearPageHWPoison(p);
unlock_page(p);
put_page(p);
flags &= ~MF_COUNT_INCREASED;
@@ -1925,8 +1912,7 @@ try_again:
page_flags = p->flags;
if (hwpoison_filter(p)) {
- if (TestClearPageHWPoison(p))
- num_poisoned_pages_dec();
+ TestClearPageHWPoison(p);
unlock_page(p);
put_page(p);
res = -EOPNOTSUPP;
@@ -2088,28 +2074,6 @@ core_initcall(memory_failure_init);
pr_info(fmt, pfn); \
})
-static inline int clear_page_hwpoison(struct ratelimit_state *rs, struct page *p)
-{
- if (TestClearPageHWPoison(p)) {
- unpoison_pr_info("Unpoison: Software-unpoisoned page %#lx\n",
- page_to_pfn(p), rs);
- num_poisoned_pages_dec();
- return 1;
- }
- return 0;
-}
-
-static inline int unpoison_taken_off_page(struct ratelimit_state *rs,
- struct page *p)
-{
- if (put_page_back_buddy(p)) {
- unpoison_pr_info("Unpoison: Software-unpoisoned page %#lx\n",
- page_to_pfn(p), rs);
- return 0;
- }
- return -EBUSY;
-}
-
/**
* unpoison_memory - Unpoison a previously poisoned page
* @pfn: Page number of the to be unpoisoned page
@@ -2127,6 +2091,7 @@ int unpoison_memory(unsigned long pfn)
struct page *page;
struct page *p;
int ret = -EBUSY;
+ int freeit = 0;
static DEFINE_RATELIMIT_STATE(unpoison_rs, DEFAULT_RATELIMIT_INTERVAL,
DEFAULT_RATELIMIT_BURST);
@@ -2167,18 +2132,15 @@ int unpoison_memory(unsigned long pfn)
ret = get_hwpoison_page(p, MF_UNPOISON);
if (!ret) {
- if (clear_page_hwpoison(&unpoison_rs, page))
- ret = 0;
- else
- ret = -EBUSY;
+ ret = TestClearPageHWPoison(page) ? 0 : -EBUSY;
} else if (ret < 0) {
if (ret == -EHWPOISON) {
- ret = unpoison_taken_off_page(&unpoison_rs, p);
+ ret = put_page_back_buddy(p) ? 0 : -EBUSY;
} else
unpoison_pr_info("Unpoison: failed to grab page %#lx\n",
pfn, &unpoison_rs);
} else {
- int freeit = clear_page_hwpoison(&unpoison_rs, p);
+ freeit = !!TestClearPageHWPoison(p);
put_page(page);
if (freeit && !(pfn == my_zero_pfn(0) && page_count(p) == 1)) {
@@ -2189,6 +2151,11 @@ int unpoison_memory(unsigned long pfn)
unlock_mutex:
mutex_unlock(&mf_mutex);
+ if (!ret || freeit) {
+ num_poisoned_pages_dec();
+ unpoison_pr_info("Unpoison: Software-unpoisoned page %#lx\n",
+ page_to_pfn(p), &unpoison_rs);
+ }
return ret;
}
EXPORT_SYMBOL(unpoison_memory);
@@ -2323,7 +2290,9 @@ static void put_ref_page(struct page *page)
* @pfn: pfn to soft-offline
* @flags: flags. Same as memory_failure().
*
- * Returns 0 on success, otherwise negated errno.
+ * Returns 0 on success
+ * -EOPNOTSUPP for hwpoison_filter() filtered the error event
+ * < 0 otherwise negated errno.
*
* Soft offline a page, by migration or invalidation,
* without killing anything. This is for the case when
@@ -2374,6 +2343,16 @@ retry:
ret = get_hwpoison_page(page, flags | MF_SOFT_OFFLINE);
put_online_mems();
+ if (hwpoison_filter(page)) {
+ if (ret > 0)
+ put_page(page);
+ else
+ put_ref_page(ref_page);
+
+ mutex_unlock(&mf_mutex);
+ return -EOPNOTSUPP;
+ }
+
if (ret > 0) {
ret = soft_offline_in_use_page(page);
} else if (ret == 0) {
@@ -2388,3 +2367,24 @@ retry:
return ret;
}
+
+void clear_hwpoisoned_pages(struct page *memmap, int nr_pages)
+{
+ int i;
+
+ /*
+ * A further optimization is to have per section refcounted
+ * num_poisoned_pages. But that would need more space per memmap, so
+ * for now just do a quick global check to speed up this routine in the
+ * absence of bad pages.
+ */
+ if (atomic_long_read(&num_poisoned_pages) == 0)
+ return;
+
+ for (i = 0; i < nr_pages; i++) {
+ if (PageHWPoison(&memmap[i])) {
+ num_poisoned_pages_dec();
+ ClearPageHWPoison(&memmap[i]);
+ }
+ }
+}