From e5f15b45ddf3afa2bbbb10c7ea34fb32b6de0a0e Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 18 Feb 2011 11:30:30 +0000 Subject: x86: Cleanup highmap after brk is concluded Now cleanup_highmap actually is in two steps: one is early in head64.c and only clears above _end; a second one is in init_memory_mapping() and tries to clean from _brk_end to _end. It should check if those boundaries are PMD_SIZE aligned but currently does not. Also init_memory_mapping() is called several times for numa or memory hotplug, so we really should not handle initial kernel mappings there. This patch moves cleanup_highmap() down after _brk_end is settled so we can do everything in one step. Also we honor max_pfn_mapped in the implementation of cleanup_highmap. Signed-off-by: Yinghai Lu Signed-off-by: Stefano Stabellini LKML-Reference: Signed-off-by: H. Peter Anvin --- arch/x86/kernel/head64.c | 3 --- arch/x86/kernel/setup.c | 25 +++---------------------- arch/x86/mm/init_64.c | 11 ++++++----- 3 files changed, 9 insertions(+), 30 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 2d2673c28aff..5655c2272adb 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -77,9 +77,6 @@ void __init x86_64_start_kernel(char * real_mode_data) /* Make NULL pointers segfault */ zap_identity_mappings(); - /* Cleanup the over mapped high alias */ - cleanup_highmap(); - max_pfn_mapped = KERNEL_IMAGE_SIZE >> PAGE_SHIFT; for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) { diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index b176f2b1f45d..4a52a5f9afcb 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -294,30 +294,11 @@ static void __init init_gbpages(void) else direct_gbpages = 0; } - -static void __init cleanup_highmap_brk_end(void) -{ - pud_t *pud; - pmd_t *pmd; - - mmu_cr4_features = read_cr4(); - - /* - * _brk_end cannot change anymore, but it and _end may be - * located on different 2M pages. cleanup_highmap(), however, - * can only consider _end when it runs, so destroy any - * mappings beyond _brk_end here. - */ - pud = pud_offset(pgd_offset_k(_brk_end), _brk_end); - pmd = pmd_offset(pud, _brk_end - 1); - while (++pmd <= pmd_offset(pud, (unsigned long)_end - 1)) - pmd_clear(pmd); -} #else static inline void init_gbpages(void) { } -static inline void cleanup_highmap_brk_end(void) +static void __init cleanup_highmap(void) { } #endif @@ -330,8 +311,6 @@ static void __init reserve_brk(void) /* Mark brk area as locked down and no longer taking any new allocations */ _brk_start = 0; - - cleanup_highmap_brk_end(); } #ifdef CONFIG_BLK_DEV_INITRD @@ -950,6 +929,8 @@ void __init setup_arch(char **cmdline_p) */ reserve_brk(); + cleanup_highmap(); + memblock.current_limit = get_max_mapped(); memblock_x86_fill(); diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index a08a62cb136e..7026505a33ba 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -51,6 +51,7 @@ #include #include #include +#include static int __init parse_direct_gbpages_off(char *arg) { @@ -293,18 +294,18 @@ void __init init_extra_mapping_uc(unsigned long phys, unsigned long size) * to the compile time generated pmds. This results in invalid pmds up * to the point where we hit the physaddr 0 mapping. * - * We limit the mappings to the region from _text to _end. _end is - * rounded up to the 2MB boundary. This catches the invalid pmds as + * We limit the mappings to the region from _text to _brk_end. _brk_end + * is rounded up to the 2MB boundary. This catches the invalid pmds as * well, as they are located before _text: */ void __init cleanup_highmap(void) { unsigned long vaddr = __START_KERNEL_map; - unsigned long end = roundup((unsigned long)_end, PMD_SIZE) - 1; + unsigned long vaddr_end = __START_KERNEL_map + (max_pfn_mapped << PAGE_SHIFT); + unsigned long end = roundup((unsigned long)_brk_end, PMD_SIZE) - 1; pmd_t *pmd = level2_kernel_pgt; - pmd_t *last_pmd = pmd + PTRS_PER_PMD; - for (; pmd < last_pmd; pmd++, vaddr += PMD_SIZE) { + for (; vaddr + PMD_SIZE - 1 < vaddr_end; pmd++, vaddr += PMD_SIZE) { if (pmd_none(*pmd)) continue; if (vaddr < (unsigned long) _text || vaddr > end) -- cgit v1.2.3 From 14988a4d350ce3b41ecad4f63c4f44c56f5ae34d Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Fri, 18 Feb 2011 11:32:40 +0000 Subject: xen: set max_pfn_mapped to the last pfn mapped Do not set max_pfn_mapped to the end of the initial memory mappings, that also contain pages that don't belong in pfn space (like the mfn list). Set max_pfn_mapped to the last real pfn mapped in the initial memory mappings that is the pfn backing _end. Signed-off-by: Stefano Stabellini Acked-by: Konrad Rzeszutek Wilk LKML-Reference: Signed-off-by: H. Peter Anvin --- arch/x86/xen/mmu.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index a2d78ad35a55..6e27979506c1 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1701,9 +1701,6 @@ static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) for (pteidx = 0; pteidx < PTRS_PER_PTE; pteidx++, pfn++) { pte_t pte; - if (pfn > max_pfn_mapped) - max_pfn_mapped = pfn; - if (!pte_none(pte_page[pteidx])) continue; @@ -1761,6 +1758,12 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, pud_t *l3; pmd_t *l2; + /* max_pfn_mapped is the last pfn mapped in the initial memory + * mappings. Considering that on Xen after the kernel mappings we + * have the mappings of some pages that don't exist in pfn space, we + * set max_pfn_mapped to the last real pfn mapped. */ + max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->mfn_list)); + /* Zap identity mapping */ init_level4_pgt[0] = __pgd(0); @@ -1865,9 +1868,7 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, initial_kernel_pmd = extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE); - max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->pt_base) + - xen_start_info->nr_pt_frames * PAGE_SIZE + - 512*1024); + max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->mfn_list)); kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd); memcpy(initial_kernel_pmd, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD); -- cgit v1.2.3 From d8aa5ec3382e6a545b8f25178d1e0992d4927f19 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Wed, 9 Mar 2011 14:22:05 +0000 Subject: xen: update mask_rw_pte after kernel page tables init changes After "x86-64, mm: Put early page table high" already existing kernel page table pages can be mapped using early_ioremap too so we need to update mask_rw_pte to make sure these pages are still mapped RO. The reason why we have to do that is explain by the commit message of fef5ba797991f9335bcfc295942b684f9bf613a1: "Xen requires that all pages containing pagetable entries to be mapped read-only. If pages used for the initial pagetable are already mapped then we can change the mapping to RO. However, if they are initially unmapped, we need to make sure that when they are later mapped, they are also mapped RO. ..SNIP.. the pagetable setup code early_ioremaps the pages to write their entries, so we must make sure that mappings created in the early_ioremap fixmap area are mapped RW. (Those mappings are removed before the pages are presented to Xen as pagetable pages.)" We accomplish all this in mask_rw_pte by mapping RO all the pages mapped using early_ioremap apart from the last one that has been allocated because it is not a page table page yet (it has not been hooked into the page tables yet). Signed-off-by: Stefano Stabellini Acked-by: Konrad Rzeszutek Wilk LKML-Reference: Signed-off-by: H. Peter Anvin --- arch/x86/xen/mmu.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 6e27979506c1..21058ad1e5e3 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1488,10 +1488,12 @@ static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte) /* * If the new pfn is within the range of the newly allocated * kernel pagetable, and it isn't being mapped into an - * early_ioremap fixmap slot, make sure it is RO. + * early_ioremap fixmap slot as a freshly allocated page, make sure + * it is RO. */ - if (!is_early_ioremap_ptep(ptep) && - pfn >= pgt_buf_start && pfn < pgt_buf_end) + if (((!is_early_ioremap_ptep(ptep) && + pfn >= pgt_buf_start && pfn < pgt_buf_end)) || + (is_early_ioremap_ptep(ptep) && pfn != (pgt_buf_end - 1))) pte = pte_wrprotect(pte); return pte; -- cgit v1.2.3