diff options
Diffstat (limited to 'mm/memory-failure.c')
-rw-r--r-- | mm/memory-failure.c | 156 |
1 files changed, 89 insertions, 67 deletions
diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 97a9ed8f87a9..dcb6bb9cf731 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -130,12 +130,6 @@ static int hwpoison_filter_dev(struct page *p) hwpoison_filter_dev_minor == ~0U) return 0; - /* - * page_mapping() does not accept slab pages. - */ - if (PageSlab(p)) - return -EINVAL; - mapping = page_mapping(p); if (mapping == NULL || mapping->host == NULL) return -EINVAL; @@ -258,16 +252,13 @@ static int kill_proc(struct to_kill *tk, unsigned long pfn, int flags) pr_err("Memory failure: %#lx: Sending SIGBUS to %s:%d due to hardware memory corruption\n", pfn, t->comm, t->pid); - if (flags & MF_ACTION_REQUIRED) { - if (t == current) - ret = force_sig_mceerr(BUS_MCEERR_AR, - (void __user *)tk->addr, addr_lsb); - else - /* Signal other processes sharing the page if they have PF_MCE_EARLY set. */ - ret = send_sig_mceerr(BUS_MCEERR_AO, (void __user *)tk->addr, - addr_lsb, t); - } else { + if ((flags & MF_ACTION_REQUIRED) && (t == current)) + ret = force_sig_mceerr(BUS_MCEERR_AR, + (void __user *)tk->addr, addr_lsb); + else /* + * Signal other processes sharing the page if they have + * PF_MCE_EARLY set. * Don't use force here, it's convenient if the signal * can be temporarily blocked. * This could cause a loop when the user sets SIGBUS @@ -275,7 +266,6 @@ static int kill_proc(struct to_kill *tk, unsigned long pfn, int flags) */ ret = send_sig_mceerr(BUS_MCEERR_AO, (void __user *)tk->addr, addr_lsb, t); /* synchronous? */ - } if (ret < 0) pr_info("Memory failure: Error sending signal to %s:%d: %d\n", t->comm, t->pid, ret); @@ -315,6 +305,7 @@ static unsigned long dev_pagemap_mapping_shift(struct page *page, pmd_t *pmd; pte_t *pte; + VM_BUG_ON_VMA(address == -EFAULT, vma); pgd = pgd_offset(vma->vm_mm, address); if (!pgd_present(*pgd)) return 0; @@ -487,12 +478,13 @@ static struct task_struct *task_early_kill(struct task_struct *tsk, static void collect_procs_anon(struct page *page, struct list_head *to_kill, int force_early) { + struct folio *folio = page_folio(page); struct vm_area_struct *vma; struct task_struct *tsk; struct anon_vma *av; pgoff_t pgoff; - av = page_lock_anon_vma_read(page); + av = folio_lock_anon_vma_read(folio); if (av == NULL) /* Not actually mapped anymore */ return; @@ -707,8 +699,10 @@ static int kill_accessing_process(struct task_struct *p, unsigned long pfn, (void *)&priv); if (ret == 1 && priv.tk.addr) kill_proc(&priv.tk, pfn, flags); + else + ret = 0; mmap_read_unlock(p->mm); - return ret ? -EFAULT : -EHWPOISON; + return ret > 0 ? -EHWPOISON : -EFAULT; } static const char *action_name[] = { @@ -739,6 +733,7 @@ static const char * const action_page_types[] = { [MF_MSG_BUDDY] = "free buddy page", [MF_MSG_DAX] = "dax page", [MF_MSG_UNSPLIT_THP] = "unsplit thp", + [MF_MSG_DIFFERENT_PAGE_SIZE] = "different page size", [MF_MSG_UNKNOWN] = "unknown page", }; @@ -1182,12 +1177,18 @@ void ClearPageHWPoisonTakenOff(struct page *page) * does not return true for hugetlb or device memory pages, so it's assumed * to be called only in the context where we never have such pages. */ -static inline bool HWPoisonHandlable(struct page *page) +static inline bool HWPoisonHandlable(struct page *page, unsigned long flags) { - return PageLRU(page) || __PageMovable(page) || is_free_buddy_page(page); + bool movable = false; + + /* Soft offline could mirgate non-LRU movable pages */ + if ((flags & MF_SOFT_OFFLINE) && __PageMovable(page)) + movable = true; + + return movable || PageLRU(page) || is_free_buddy_page(page); } -static int __get_hwpoison_page(struct page *page) +static int __get_hwpoison_page(struct page *page, unsigned long flags) { struct page *head = compound_head(page); int ret = 0; @@ -1202,7 +1203,7 @@ static int __get_hwpoison_page(struct page *page) * for any unsupported type of page in order to reduce the risk of * unexpected races caused by taking a page refcount. */ - if (!HWPoisonHandlable(head)) + if (!HWPoisonHandlable(head, flags)) return -EBUSY; if (get_page_unless_zero(head)) { @@ -1227,7 +1228,7 @@ static int get_any_page(struct page *p, unsigned long flags) try_again: if (!count_increased) { - ret = __get_hwpoison_page(p); + ret = __get_hwpoison_page(p, flags); if (!ret) { if (page_count(p)) { /* We raced with an allocation, retry. */ @@ -1255,7 +1256,7 @@ try_again: } } - if (PageHuge(p) || HWPoisonHandlable(p)) { + if (PageHuge(p) || HWPoisonHandlable(p, flags)) { ret = 1; } else { /* @@ -1347,6 +1348,7 @@ static int get_hwpoison_page(struct page *p, unsigned long flags) static bool hwpoison_user_mappings(struct page *p, unsigned long pfn, int flags, struct page *hpage) { + struct folio *folio = page_folio(hpage); enum ttu_flags ttu = TTU_IGNORE_MLOCK | TTU_SYNC; struct address_space *mapping; LIST_HEAD(tokill); @@ -1411,26 +1413,22 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn, if (kill) collect_procs(hpage, &tokill, flags & MF_ACTION_REQUIRED); - if (!PageHuge(hpage)) { - try_to_unmap(hpage, ttu); + if (PageHuge(hpage) && !PageAnon(hpage)) { + /* + * For hugetlb pages in shared mappings, try_to_unmap + * could potentially call huge_pmd_unshare. Because of + * this, take semaphore in write mode here and set + * TTU_RMAP_LOCKED to indicate we have taken the lock + * at this higher level. + */ + mapping = hugetlb_page_mapping_lock_write(hpage); + if (mapping) { + try_to_unmap(folio, ttu|TTU_RMAP_LOCKED); + i_mmap_unlock_write(mapping); + } else + pr_info("Memory failure: %#lx: could not lock mapping for mapped huge page\n", pfn); } else { - if (!PageAnon(hpage)) { - /* - * For hugetlb pages in shared mappings, try_to_unmap - * could potentially call huge_pmd_unshare. Because of - * this, take semaphore in write mode here and set - * TTU_RMAP_LOCKED to indicate we have taken the lock - * at this higher level. - */ - mapping = hugetlb_page_mapping_lock_write(hpage); - if (mapping) { - try_to_unmap(hpage, ttu|TTU_RMAP_LOCKED); - i_mmap_unlock_write(mapping); - } else - pr_info("Memory failure: %#lx: could not lock mapping for mapped huge page\n", pfn); - } else { - try_to_unmap(hpage, ttu); - } + try_to_unmap(folio, ttu); } unmap_success = !page_mapped(hpage); @@ -1526,7 +1524,7 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags) if (TestClearPageHWPoison(head)) num_poisoned_pages_dec(); unlock_page(head); - return 0; + return -EOPNOTSUPP; } unlock_page(head); res = MF_FAILED; @@ -1543,8 +1541,27 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags) } lock_page(head); + + /* + * The page could have changed compound pages due to race window. + * If this happens just bail out. + */ + if (!PageHuge(p) || compound_head(p) != head) { + action_result(pfn, MF_MSG_DIFFERENT_PAGE_SIZE, MF_IGNORED); + res = -EBUSY; + goto out; + } + page_flags = head->flags; + if (hwpoison_filter(p)) { + if (TestClearPageHWPoison(head)) + num_poisoned_pages_dec(); + put_page(p); + res = -EOPNOTSUPP; + goto out; + } + /* * TODO: hwpoison for pud-sized hugetlb doesn't work right now, so * simply disable it. In order to make it work properly, we need @@ -1613,7 +1630,7 @@ static int memory_failure_dev_pagemap(unsigned long pfn, int flags, goto out; if (hwpoison_filter(page)) { - rc = 0; + rc = -EOPNOTSUPP; goto unlock; } @@ -1638,7 +1655,7 @@ static int memory_failure_dev_pagemap(unsigned long pfn, int flags, * SIGBUS (i.e. MF_MUST_KILL) */ flags |= MF_ACTION_REQUIRED | MF_MUST_KILL; - collect_procs(page, &tokill, flags & MF_ACTION_REQUIRED); + collect_procs(page, &tokill, true); list_for_each_entry(tk, &tokill, nd) if (tk->size_shift) @@ -1653,7 +1670,7 @@ static int memory_failure_dev_pagemap(unsigned long pfn, int flags, start = (page->index << PAGE_SHIFT) & ~(size - 1); unmap_mapping_range(page->mapping, start, size, 0); } - kill_procs(&tokill, flags & MF_MUST_KILL, false, pfn, flags); + kill_procs(&tokill, true, false, pfn, flags); rc = 0; unlock: dax_unlock_page(page, cookie); @@ -1682,12 +1699,15 @@ static DEFINE_MUTEX(mf_mutex); * * Must run in process context (e.g. a work queue) with interrupts * enabled and no spinlocks hold. + * + * Return: 0 for successfully handled the memory error, + * -EOPNOTSUPP for memory_filter() filtered the error event, + * < 0(except -EOPNOTSUPP) on failure. */ int memory_failure(unsigned long pfn, int flags) { struct page *p; struct page *hpage; - struct page *orig_head; struct dev_pagemap *pgmap; int res = 0; unsigned long page_flags; @@ -1733,7 +1753,7 @@ try_again: goto unlock_mutex; } - orig_head = hpage = compound_head(p); + hpage = compound_head(p); num_poisoned_pages_inc(); /* @@ -1814,10 +1834,21 @@ try_again: lock_page(p); /* - * The page could have changed compound pages during the locking. - * If this happens just bail out. + * We're only intended to deal with the non-Compound page here. + * However, the page could have changed compound pages due to + * race window. If this happens, we could try again to hopefully + * handle the page next round. */ - if (PageCompound(p) && compound_head(p) != orig_head) { + if (PageCompound(p)) { + if (retry) { + if (TestClearPageHWPoison(p)) + num_poisoned_pages_dec(); + unlock_page(p); + put_page(p); + flags &= ~MF_COUNT_INCREASED; + retry = false; + goto try_again; + } action_result(pfn, MF_MSG_DIFFERENT_COMPOUND, MF_IGNORED); res = -EBUSY; goto unlock_page; @@ -1837,6 +1868,7 @@ try_again: num_poisoned_pages_dec(); unlock_page(p); put_page(p); + res = -EOPNOTSUPP; goto unlock_mutex; } @@ -1845,7 +1877,7 @@ try_again: * page_lock. We need wait writeback completion for this page or it * may trigger vfs BUG while evict inode. */ - if (!PageTransTail(p) && !PageLRU(p) && !PageWriteback(p)) + if (!PageLRU(p) && !PageWriteback(p)) goto identify_page_state; /* @@ -2139,7 +2171,7 @@ static bool isolate_page(struct page *page, struct list_head *pagelist) */ static int __soft_offline_page(struct page *page) { - int ret = 0; + long ret = 0; unsigned long pfn = page_to_pfn(page); struct page *hpage = compound_head(page); char const *msg_page[] = {"page", "hugepage"}; @@ -2150,12 +2182,6 @@ static int __soft_offline_page(struct page *page) .gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL, }; - /* - * Check PageHWPoison again inside page lock because PageHWPoison - * is set by memory_failure() outside page lock. Note that - * memory_failure() also double-checks PageHWPoison inside page lock, - * so there's no race between soft_offline_page() and memory_failure(). - */ lock_page(page); if (!PageHuge(page)) wait_on_page_writeback(page); @@ -2166,7 +2192,7 @@ static int __soft_offline_page(struct page *page) return 0; } - if (!PageHuge(page)) + if (!PageHuge(page) && PageLRU(page) && !PageSwapCache(page)) /* * Try to invalidate first. This should work for * non dirty unmapped page cache pages. @@ -2174,10 +2200,6 @@ static int __soft_offline_page(struct page *page) ret = invalidate_inode_page(page); unlock_page(page); - /* - * RED-PEN would be better to keep it isolated here, but we - * would need to fix isolation locking first. - */ if (ret) { pr_info("soft_offline: %#lx: invalidated\n", pfn); page_handle_poison(page, false, true); @@ -2196,7 +2218,7 @@ static int __soft_offline_page(struct page *page) if (!list_empty(&pagelist)) putback_movable_pages(&pagelist); - pr_info("soft offline: %#lx: %s migration failed %d, type %pGp\n", + pr_info("soft offline: %#lx: %s migration failed %ld, type %pGp\n", pfn, msg_page[huge], ret, &page->flags); if (ret > 0) ret = -EBUSY; @@ -2288,7 +2310,7 @@ int soft_offline_page(unsigned long pfn, int flags) retry: get_online_mems(); - ret = get_hwpoison_page(page, flags); + ret = get_hwpoison_page(page, flags | MF_SOFT_OFFLINE); put_online_mems(); if (ret > 0) { |