diff options
Diffstat (limited to 'mm/hugetlb.c')
-rw-r--r-- | mm/hugetlb.c | 106 |
1 files changed, 67 insertions, 39 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index d26162e81fea..abc1c5fb7222 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -832,8 +832,27 @@ static bool vma_has_reserves(struct vm_area_struct *vma, long chg) * Only the process that called mmap() has reserves for * private mappings. */ - if (is_vma_resv_set(vma, HPAGE_RESV_OWNER)) - return true; + if (is_vma_resv_set(vma, HPAGE_RESV_OWNER)) { + /* + * Like the shared case above, a hole punch or truncate + * could have been performed on the private mapping. + * Examine the value of chg to determine if reserves + * actually exist or were previously consumed. + * Very Subtle - The value of chg comes from a previous + * call to vma_needs_reserves(). The reserve map for + * private mappings has different (opposite) semantics + * than that of shared mappings. vma_needs_reserves() + * has already taken this difference in semantics into + * account. Therefore, the meaning of chg is the same + * as in the shared case above. Code could easily be + * combined, but keeping it separate draws attention to + * subtle differences. + */ + if (chg) + return false; + else + return true; + } return false; } @@ -1003,7 +1022,9 @@ static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed) ((node = hstate_next_node_to_free(hs, mask)) || 1); \ nr_nodes--) -#if defined(CONFIG_X86_64) && ((defined(CONFIG_MEMORY_ISOLATION) && defined(CONFIG_COMPACTION)) || defined(CONFIG_CMA)) +#if (defined(CONFIG_X86_64) || defined(CONFIG_S390)) && \ + ((defined(CONFIG_MEMORY_ISOLATION) && defined(CONFIG_COMPACTION)) || \ + defined(CONFIG_CMA)) static void destroy_compound_gigantic_page(struct page *page, unsigned int order) { @@ -1011,6 +1032,7 @@ static void destroy_compound_gigantic_page(struct page *page, int nr_pages = 1 << order; struct page *p = page + 1; + atomic_set(compound_mapcount_ptr(page), 0); for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) { clear_compound_head(p); set_page_refcounted(p); @@ -1816,6 +1838,25 @@ static long __vma_reservation_common(struct hstate *h, if (vma->vm_flags & VM_MAYSHARE) return ret; + else if (is_vma_resv_set(vma, HPAGE_RESV_OWNER) && ret >= 0) { + /* + * In most cases, reserves always exist for private mappings. + * However, a file associated with mapping could have been + * hole punched or truncated after reserves were consumed. + * As subsequent fault on such a range will not use reserves. + * Subtle - The reserve map for private mappings has the + * opposite meaning than that of shared mappings. If NO + * entry is in the reserve map, it means a reservation exists. + * If an entry exists in the reserve map, it means the + * reservation has already been consumed. As a result, the + * return value of this routine is the opposite of the + * value returned from reserve map manipulation routines above. + */ + if (ret) + return 0; + else + return 1; + } else return ret < 0 ? ret : 0; } @@ -3138,7 +3179,6 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long start, unsigned long end, struct page *ref_page) { - int force_flush = 0; struct mm_struct *mm = vma->vm_mm; unsigned long address; pte_t *ptep; @@ -3157,19 +3197,22 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, tlb_start_vma(tlb, vma); mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); address = start; -again: for (; address < end; address += sz) { ptep = huge_pte_offset(mm, address); if (!ptep) continue; ptl = huge_pte_lock(h, mm, ptep); - if (huge_pmd_unshare(mm, &address, ptep)) - goto unlock; + if (huge_pmd_unshare(mm, &address, ptep)) { + spin_unlock(ptl); + continue; + } pte = huge_ptep_get(ptep); - if (huge_pte_none(pte)) - goto unlock; + if (huge_pte_none(pte)) { + spin_unlock(ptl); + continue; + } /* * Migrating hugepage or HWPoisoned hugepage is already @@ -3177,7 +3220,8 @@ again: */ if (unlikely(!pte_present(pte))) { huge_pte_clear(mm, address, ptep); - goto unlock; + spin_unlock(ptl); + continue; } page = pte_page(pte); @@ -3187,9 +3231,10 @@ again: * are about to unmap is the actual page of interest. */ if (ref_page) { - if (page != ref_page) - goto unlock; - + if (page != ref_page) { + spin_unlock(ptl); + continue; + } /* * Mark the VMA as having unmapped its page so that * future faults in this VMA will fail rather than @@ -3205,30 +3250,14 @@ again: hugetlb_count_sub(pages_per_huge_page(h), mm); page_remove_rmap(page, true); - force_flush = !__tlb_remove_page(tlb, page); - if (force_flush) { - address += sz; - spin_unlock(ptl); - break; - } - /* Bail out after unmapping reference page if supplied */ - if (ref_page) { - spin_unlock(ptl); - break; - } -unlock: + spin_unlock(ptl); - } - /* - * mmu_gather ran out of room to batch pages, we break out of - * the PTE lock to avoid doing the potential expensive TLB invalidate - * and page-free while holding it. - */ - if (force_flush) { - force_flush = 0; - tlb_flush_mmu(tlb); - if (address < end && !ref_page) - goto again; + tlb_remove_page_size(tlb, page, huge_page_size(h)); + /* + * Bail out after unmapping reference page if supplied + */ + if (ref_page) + break; } mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); tlb_end_vma(tlb, vma); @@ -3344,7 +3373,7 @@ retry_avoidcopy: /* If no-one else is actually using this page, avoid the copy * and just make the page writable */ if (page_mapcount(old_page) == 1 && PageAnon(old_page)) { - page_move_anon_rmap(old_page, vma, address); + page_move_anon_rmap(old_page, vma); set_huge_ptep_writable(vma, address, ptep); return 0; } @@ -4190,7 +4219,6 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) if (saddr) { spte = huge_pte_offset(svma->vm_mm, saddr); if (spte) { - mm_inc_nr_pmds(mm); get_page(virt_to_page(spte)); break; } @@ -4205,9 +4233,9 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) if (pud_none(*pud)) { pud_populate(mm, pud, (pmd_t *)((unsigned long)spte & PAGE_MASK)); + mm_inc_nr_pmds(mm); } else { put_page(virt_to_page(spte)); - mm_inc_nr_pmds(mm); } spin_unlock(ptl); out: |