diff options
Diffstat (limited to 'mm/memory.c')
-rw-r--r-- | mm/memory.c | 113 |
1 files changed, 89 insertions, 24 deletions
diff --git a/mm/memory.c b/mm/memory.c index 20d93001ef93..148eafb8cbb1 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -71,6 +71,8 @@ #include <linux/dax.h> #include <linux/oom.h> #include <linux/numa.h> +#include <linux/perf_event.h> +#include <linux/ptrace.h> #include <trace/events/kmem.h> @@ -437,7 +439,7 @@ int __pte_alloc(struct mm_struct *mm, pmd_t *pmd) * of a chain of data-dependent loads, meaning most CPUs (alpha * being the notable exception) will already guarantee loads are * seen in-order. See the alpha page table accessors for the - * smp_read_barrier_depends() barriers in page table walking code. + * smp_rmb() barriers in page table walking code. */ smp_wmb(); /* Could be smp_wmb__xxx(before|after)_spin_lock */ @@ -1098,7 +1100,7 @@ again: } entry = pte_to_swp_entry(ptent); - if (non_swap_entry(entry) && is_device_private_entry(entry)) { + if (is_device_private_entry(entry)) { struct page *page = device_private_entry_to_page(entry); if (unlikely(details && details->check_mapping)) { @@ -1800,7 +1802,7 @@ out_unlock: * @pfn: source kernel pfn * @pgprot: pgprot flags for the inserted page * - * This is exactly like vmf_insert_pfn(), except that it allows drivers to + * This is exactly like vmf_insert_pfn(), except that it allows drivers * to override pgprot on a per-page basis. * * This only makes sense for IO mappings, and it makes no sense for @@ -1936,7 +1938,7 @@ static vm_fault_t __vm_insert_mixed(struct vm_area_struct *vma, * @pfn: source kernel pfn * @pgprot: pgprot flags for the inserted page * - * This is exactly like vmf_insert_mixed(), except that it allows drivers to + * This is exactly like vmf_insert_mixed(), except that it allows drivers * to override pgprot on a per-page basis. * * Typically this function should be used by drivers to set caching- and @@ -2082,7 +2084,7 @@ static inline int remap_p4d_range(struct mm_struct *mm, pgd_t *pgd, /** * remap_pfn_range - remap kernel memory to userspace * @vma: user vma to map to - * @addr: target user address to start at + * @addr: target page aligned user address to start at * @pfn: page frame number of kernel physical memory address * @size: size of mapping area * @prot: page protection flags for this mapping @@ -2101,6 +2103,9 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, unsigned long remap_pfn = pfn; int err; + if (WARN_ON_ONCE(!PAGE_ALIGNED(addr))) + return -EINVAL; + /* * Physically remapped pages are special. Tell the * rest of the world about it: @@ -2205,7 +2210,7 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd, { pte_t *pte; int err = 0; - spinlock_t *uninitialized_var(ptl); + spinlock_t *ptl; if (create) { pte = (mm == &init_mm) ? @@ -2406,8 +2411,6 @@ static inline bool cow_user_page(struct page *dst, struct page *src, struct mm_struct *mm = vma->vm_mm; unsigned long addr = vmf->address; - debug_dma_assert_idle(src); - if (likely(src)) { copy_user_highpage(dst, src, addr, vma); return true; @@ -2713,7 +2716,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf) */ ptep_clear_flush_notify(vma, vmf->address, vmf->pte); page_add_new_anon_rmap(new_page, vma, vmf->address, false); - lru_cache_add_active_or_unevictable(new_page, vma); + lru_cache_add_inactive_or_unevictable(new_page, vma); /* * We call the notify macro here because, when using secondary * mmu page tables (such as kvm shadow page tables), we want the @@ -3071,6 +3074,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) int locked; int exclusive = 0; vm_fault_t ret = 0; + void *shadow = NULL; if (!pte_unmap_same(vma->vm_mm, vmf->pmd, vmf->pte, vmf->orig_pte)) goto out; @@ -3100,8 +3104,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) if (!page) { struct swap_info_struct *si = swp_swap_info(entry); - if (si->flags & SWP_SYNCHRONOUS_IO && - __swap_count(entry) == 1) { + if (data_race(si->flags & SWP_SYNCHRONOUS_IO) && + __swap_count(entry) == 1) { /* skip swapcache */ page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vmf->address); @@ -3122,13 +3126,9 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) goto out_page; } - /* - * XXX: Move to lru_cache_add() when it - * supports new vs putback - */ - spin_lock_irq(&page_pgdat(page)->lru_lock); - lru_note_cost_page(page); - spin_unlock_irq(&page_pgdat(page)->lru_lock); + shadow = get_shadow_from_swap_cache(entry); + if (shadow) + workingset_refault(page, shadow); lru_cache_add(page); swap_readpage(page, true); @@ -3239,10 +3239,9 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) /* ksm created a completely new copy */ if (unlikely(page != swapcache && swapcache)) { page_add_new_anon_rmap(page, vma, vmf->address, false); - lru_cache_add_active_or_unevictable(page, vma); + lru_cache_add_inactive_or_unevictable(page, vma); } else { do_page_add_anon_rmap(page, vma, vmf->address, exclusive); - activate_page(page); } swap_free(entry); @@ -3387,7 +3386,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf) inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); page_add_new_anon_rmap(page, vma, vmf->address, false); - lru_cache_add_active_or_unevictable(page, vma); + lru_cache_add_inactive_or_unevictable(page, vma); setpte: set_pte_at(vma->vm_mm, vmf->address, vmf->pte, entry); @@ -3645,7 +3644,7 @@ vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct page *page) if (write && !(vma->vm_flags & VM_SHARED)) { inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); page_add_new_anon_rmap(page, vma, vmf->address, false); - lru_cache_add_active_or_unevictable(page, vma); + lru_cache_add_inactive_or_unevictable(page, vma); } else { inc_mm_counter_fast(vma->vm_mm, mm_counter_file(page)); page_add_file_rmap(page, false); @@ -4224,6 +4223,9 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf) vmf->flags & FAULT_FLAG_WRITE)) { update_mmu_cache(vmf->vma, vmf->address, vmf->pte); } else { + /* Skip spurious TLB flush for retried page fault */ + if (vmf->flags & FAULT_FLAG_TRIED) + goto unlock; /* * This is needed only for protection faults but the arch code * is not yet telling us if this is a protection fault or not. @@ -4333,6 +4335,67 @@ retry_pud: return handle_pte_fault(&vmf); } +/** + * mm_account_fault - Do page fault accountings + * + * @regs: the pt_regs struct pointer. When set to NULL, will skip accounting + * of perf event counters, but we'll still do the per-task accounting to + * the task who triggered this page fault. + * @address: the faulted address. + * @flags: the fault flags. + * @ret: the fault retcode. + * + * This will take care of most of the page fault accountings. Meanwhile, it + * will also include the PERF_COUNT_SW_PAGE_FAULTS_[MAJ|MIN] perf counter + * updates. However note that the handling of PERF_COUNT_SW_PAGE_FAULTS should + * still be in per-arch page fault handlers at the entry of page fault. + */ +static inline void mm_account_fault(struct pt_regs *regs, + unsigned long address, unsigned int flags, + vm_fault_t ret) +{ + bool major; + + /* + * We don't do accounting for some specific faults: + * + * - Unsuccessful faults (e.g. when the address wasn't valid). That + * includes arch_vma_access_permitted() failing before reaching here. + * So this is not a "this many hardware page faults" counter. We + * should use the hw profiling for that. + * + * - Incomplete faults (VM_FAULT_RETRY). They will only be counted + * once they're completed. + */ + if (ret & (VM_FAULT_ERROR | VM_FAULT_RETRY)) + return; + + /* + * We define the fault as a major fault when the final successful fault + * is VM_FAULT_MAJOR, or if it retried (which implies that we couldn't + * handle it immediately previously). + */ + major = (ret & VM_FAULT_MAJOR) || (flags & FAULT_FLAG_TRIED); + + if (major) + current->maj_flt++; + else + current->min_flt++; + + /* + * If the fault is done for GUP, regs will be NULL. We only do the + * accounting for the per thread fault counters who triggered the + * fault, and we skip the perf event updates. + */ + if (!regs) + return; + + if (major) + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address); + else + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address); +} + /* * By the time we get here, we already hold the mm semaphore * @@ -4340,7 +4403,7 @@ retry_pud: * return value. See filemap_fault() and __lock_page_or_retry(). */ vm_fault_t handle_mm_fault(struct vm_area_struct *vma, unsigned long address, - unsigned int flags) + unsigned int flags, struct pt_regs *regs) { vm_fault_t ret; @@ -4381,6 +4444,8 @@ vm_fault_t handle_mm_fault(struct vm_area_struct *vma, unsigned long address, mem_cgroup_oom_synchronize(false); } + mm_account_fault(regs, address, flags, ret); + return ret; } EXPORT_SYMBOL_GPL(handle_mm_fault); @@ -4654,7 +4719,7 @@ int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, void *maddr; struct page *page = NULL; - ret = get_user_pages_remote(tsk, mm, addr, 1, + ret = get_user_pages_remote(mm, addr, 1, gup_flags, &page, &vma, NULL); if (ret <= 0) { #ifndef CONFIG_HAVE_IOREMAP_PROT |