From d8b57bb700a73872fd06b891d7c9bc4cea1a6af4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 6 Feb 2008 22:39:43 +0100 Subject: x86: make spurious fault handler aware of large mappings In very rare cases, on certain CPUs, we could end up in the spurious fault handler and ignore a large pud/pmd mapping. The resulting pte pointer points into the mapped physical space and dereferencing it will fault recursively. Make the code aware of large mappings and do the permission check on the pmd/pud entry, when a large pud/pmd mapping is detected. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/mm/fault.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index ad8b9733d6b3..d8ed4006b3d2 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -428,6 +428,16 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs, } #endif +static int spurious_fault_check(unsigned long error_code, pte_t *pte) +{ + if ((error_code & PF_WRITE) && !pte_write(*pte)) + return 0; + if ((error_code & PF_INSTR) && !pte_exec(*pte)) + return 0; + + return 1; +} + /* * Handle a spurious fault caused by a stale TLB entry. This allows * us to lazily refresh the TLB when increasing the permissions of a @@ -457,20 +467,21 @@ static int spurious_fault(unsigned long address, if (!pud_present(*pud)) return 0; + if (pud_large(*pud)) + return spurious_fault_check(error_code, (pte_t *) pud); + pmd = pmd_offset(pud, address); if (!pmd_present(*pmd)) return 0; + if (pmd_large(*pmd)) + return spurious_fault_check(error_code, (pte_t *) pmd); + pte = pte_offset_kernel(pmd, address); if (!pte_present(*pte)) return 0; - if ((error_code & PF_WRITE) && !pte_write(*pte)) - return 0; - if ((error_code & PF_INSTR) && !pte_exec(*pte)) - return 0; - - return 1; + return spurious_fault_check(error_code, pte); } /* -- cgit v1.2.3 From 2d684cd6d9cf0c6a0e28978362671b6e2d8fb56c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 6 Feb 2008 22:39:44 +0100 Subject: x86: remove X2 workaround With the spurious handler fix, the X2 does not lock up anymore. Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr.c | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 16ce841f08d6..c870424aa9ad 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -260,17 +260,6 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, pgprot_t old_prot, new_prot; int level, do_split = 1; - /* - * An Athlon 64 X2 showed hard hangs if we tried to preserve - * largepages and changed the PSE entry from RW to RO. - * - * As AMD CPUs have a long series of erratas in this area, - * (and none of the known ones seem to explain this hang), - * disable this code until the hang can be debugged: - */ - if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) - return 1; - spin_lock_irqsave(&pgd_lock, flags); /* * Check for races, another CPU might have split this page -- cgit v1.2.3 From 984bb80d94d891592ab16d4d129b988792752c7b Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Wed, 6 Feb 2008 22:39:45 +0100 Subject: x86: mark the .rodata section also NX The .rodata section shouldn't just be read-only, but also non-executable. This is free since we've broken up the 2MB page already anyway. also update test_nx to check for this. Signed-off-by: Arjan van de Ven Signed-off-by: Ingo Molnar --- arch/x86/kernel/test_nx.c | 2 +- arch/x86/mm/init_64.c | 9 ++++++++- 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/kernel/test_nx.c b/arch/x86/kernel/test_nx.c index 36c100c323aa..10b8a6f69f84 100644 --- a/arch/x86/kernel/test_nx.c +++ b/arch/x86/kernel/test_nx.c @@ -139,7 +139,6 @@ static int test_NX(void) * Until then, don't run them to avoid too many people getting scared * by the error message */ -#if 0 #ifdef CONFIG_DEBUG_RODATA /* Test 3: Check if the .rodata section is executable */ @@ -152,6 +151,7 @@ static int test_NX(void) } #endif +#if 0 /* Test 4: Check if the .data section of a module is executable */ if (test_address(&test_data)) { printk(KERN_ERR "test_nx: .data section is executable\n"); diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 3a98d6f724ab..9b61c75a2355 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -591,10 +591,17 @@ void mark_rodata_ro(void) if (end <= start) return; - set_memory_ro(start, (end - start) >> PAGE_SHIFT); printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", (end - start) >> 10); + set_memory_ro(start, (end - start) >> PAGE_SHIFT); + + /* + * The rodata section (but not the kernel text!) should also be + * not-executable. + */ + start = ((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK; + set_memory_nx(start, (end - start) >> PAGE_SHIFT); rodata_test(); -- cgit v1.2.3 From cc842b82cc513ebc78bef6eeaacb5f6335851bcb Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Wed, 6 Feb 2008 22:39:45 +0100 Subject: x86: remove suprious ifdefs from pageattr.c The .rodata section really should just be read only; the config option is there to make breaking up the 2Mb page an option (so people whos machines give more performance for the 2Mb case can opt to do so). But when the page gets split anyway, this is no longer an issue, so clean up the code and remove the ifdefs Signed-off-by: Arjan van de Ven Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index c870424aa9ad..8493c855582b 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -167,8 +167,6 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address) if (within(address, virt_to_highmap(_text), virt_to_highmap(_etext))) pgprot_val(forbidden) |= _PAGE_NX; - -#ifdef CONFIG_DEBUG_RODATA /* The .rodata section needs to be read-only */ if (within(address, (unsigned long)__start_rodata, (unsigned long)__end_rodata)) @@ -179,7 +177,6 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address) if (within(address, virt_to_highmap(__start_rodata), virt_to_highmap(__end_rodata))) pgprot_val(forbidden) |= _PAGE_RW; -#endif prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden)); -- cgit v1.2.3 From 971a52d66a3e87d4d2f5d3455e62680447cdb8e9 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 6 Feb 2008 22:39:45 +0100 Subject: x86: delay CPA self-test and repeat it delay the CPA self-test so that any impact (corruption) of user-space pagetables can be triggered. Repeat the test every 30 seconds. this would have prevented the bug fixed by 8cb2a7c1e95e472b5, at its source. Signed-off-by: Ingo Molnar --- arch/x86/Kconfig.debug | 4 +-- arch/x86/mm/pageattr-test.c | 65 +++++++++++++++++++++++++++++++++++---------- 2 files changed, 53 insertions(+), 16 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 2e1e3af28c3a..fa555148823d 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -220,9 +220,9 @@ config DEBUG_BOOT_PARAMS This option will cause struct boot_params to be exported via debugfs. config CPA_DEBUG - bool "CPA self test code" + bool "CPA self-test code" depends on DEBUG_KERNEL help - Do change_page_attr self tests at boot. + Do change_page_attr() self-tests every 30 seconds. endmenu diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c index 398f3a578dde..ed8201600354 100644 --- a/arch/x86/mm/pageattr-test.c +++ b/arch/x86/mm/pageattr-test.c @@ -5,6 +5,7 @@ * and compares page tables forwards and afterwards. */ #include +#include #include #include #include @@ -14,8 +15,13 @@ #include #include +/* + * Only print the results of the first pass: + */ +static __read_mostly int print = 1; + enum { - NTEST = 4000, + NTEST = 400, #ifdef CONFIG_X86_64 LPS = (1 << PMD_SHIFT), #elif defined(CONFIG_X86_PAE) @@ -31,7 +37,7 @@ struct split_state { long min_exec, max_exec; }; -static __init int print_split(struct split_state *s) +static int print_split(struct split_state *s) { long i, expected, missed = 0; int printed = 0; @@ -82,10 +88,13 @@ static __init int print_split(struct split_state *s) s->max_exec = addr; } } - printk(KERN_INFO - "CPA mapping 4k %lu large %lu gb %lu x %lu[%lx-%lx] miss %lu\n", - s->spg, s->lpg, s->gpg, s->exec, - s->min_exec != ~0UL ? s->min_exec : 0, s->max_exec, missed); + if (print) { + printk(KERN_INFO + " 4k %lu large %lu gb %lu x %lu[%lx-%lx] miss %lu\n", + s->spg, s->lpg, s->gpg, s->exec, + s->min_exec != ~0UL ? s->min_exec : 0, + s->max_exec, missed); + } expected = (s->gpg*GPS + s->lpg*LPS)/PAGE_SIZE + s->spg + missed; if (expected != i) { @@ -96,11 +105,11 @@ static __init int print_split(struct split_state *s) return err; } -static unsigned long __initdata addr[NTEST]; -static unsigned int __initdata len[NTEST]; +static unsigned long addr[NTEST]; +static unsigned int len[NTEST]; /* Change the global bit on random pages in the direct mapping */ -static __init int exercise_pageattr(void) +static int pageattr_test(void) { struct split_state sa, sb, sc; unsigned long *bm; @@ -110,7 +119,8 @@ static __init int exercise_pageattr(void) int i, k; int err; - printk(KERN_INFO "CPA exercising pageattr\n"); + if (print) + printk(KERN_INFO "CPA self-test:\n"); bm = vmalloc((max_pfn_mapped + 7) / 8); if (!bm) { @@ -186,7 +196,6 @@ static __init int exercise_pageattr(void) failed += print_split(&sb); - printk(KERN_INFO "CPA reverting everything\n"); for (i = 0; i < NTEST; i++) { if (!addr[i]) continue; @@ -214,12 +223,40 @@ static __init int exercise_pageattr(void) failed += print_split(&sc); if (failed) { - printk(KERN_ERR "CPA selftests NOT PASSED. Please report.\n"); + printk(KERN_ERR "NOT PASSED. Please report.\n"); WARN_ON(1); + return -EINVAL; } else { - printk(KERN_INFO "CPA selftests PASSED\n"); + if (print) + printk(KERN_INFO "ok.\n"); } return 0; } -module_init(exercise_pageattr); + +static int do_pageattr_test(void *__unused) +{ + while (!kthread_should_stop()) { + schedule_timeout_interruptible(HZ*30); + if (pageattr_test() < 0) + break; + if (print) + print--; + } + return 0; +} + +static int start_pageattr_test(void) +{ + struct task_struct *p; + + p = kthread_create(do_pageattr_test, NULL, "pageattr-test"); + if (!IS_ERR(p)) + wake_up_process(p); + else + WARN_ON(1); + + return 0; +} + +module_init(start_pageattr_test); -- cgit v1.2.3 From 58d5d0d8dd52cbca988af24b5692a20b00285543 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 6 Feb 2008 22:39:45 +0100 Subject: x86: fix deadlock, make pgd_lock irq-safe lockdep just caught this one: ================================= [ INFO: inconsistent lock state ] 2.6.24 #38 --------------------------------- inconsistent {in-softirq-W} -> {softirq-on-W} usage. swapper/1 [HC0[0]:SC0[0]:HE1:SE1] takes: (pgd_lock){-+..}, at: [] mm_init+0x1da/0x250 {in-softirq-W} state was registered at: [] 0xffffffffffffffff irq event stamp: 394559 hardirqs last enabled at (394559): [] get_page_from_freelist+0x30a/0x4c0 hardirqs last disabled at (394558): [] get_page_from_freelist+0x125/0x4c0 softirqs last enabled at (393952): [] __do_softirq+0xce/0xe0 softirqs last disabled at (393945): [] call_softirq+0x1c/0x30 other info that might help us debug this: no locks held by swapper/1. stack backtrace: Pid: 1, comm: swapper Not tainted 2.6.24 #38 Call Trace: [] print_usage_bug+0x18b/0x190 [] mark_lock+0x53d/0x560 [] __lock_acquire+0x3ca/0xed0 [] lock_acquire+0xa8/0xe0 [] ? mm_init+0x1da/0x250 [] _spin_lock+0x30/0x70 [] mm_init+0x1da/0x250 [] mm_alloc+0x39/0x50 [] bprm_mm_init+0x2a/0x1a0 [] do_execve+0x7b/0x220 [] sys_execve+0x46/0x70 [] kernel_execve+0x64/0xd0 [] ? _stext+0x1e/0x20 [] init_post+0x9a/0xf0 [] ? trace_hardirqs_on_thunk+0x35/0x3a [] ? trace_hardirqs_on+0xba/0xd0 [] ? child_rip+0xa/0x12 [] ? restore_args+0x0/0x44 [] ? child_rip+0x0/0x12 turns out that pgd_lock has been used on 64-bit x86 in an irq-unsafe way for almost two years, since commit 8c914cb704a11460e. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/mm/fault.c | 5 +++-- include/asm-x86/pgalloc_64.h | 10 ++++++---- 2 files changed, 9 insertions(+), 6 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index d8ed4006b3d2..621afb6343dc 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -958,11 +958,12 @@ void vmalloc_sync_all(void) for (address = start; address <= VMALLOC_END; address += PGDIR_SIZE) { if (!test_bit(pgd_index(address), insync)) { const pgd_t *pgd_ref = pgd_offset_k(address); + unsigned long flags; struct page *page; if (pgd_none(*pgd_ref)) continue; - spin_lock(&pgd_lock); + spin_lock_irqsave(&pgd_lock, flags); list_for_each_entry(page, &pgd_list, lru) { pgd_t *pgd; pgd = (pgd_t *)page_address(page) + pgd_index(address); @@ -971,7 +972,7 @@ void vmalloc_sync_all(void) else BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref)); } - spin_unlock(&pgd_lock); + spin_unlock_irqrestore(&pgd_lock, flags); set_bit(pgd_index(address), insync); } if (address == start) diff --git a/include/asm-x86/pgalloc_64.h b/include/asm-x86/pgalloc_64.h index 315314ce4bfb..4f6220db22b1 100644 --- a/include/asm-x86/pgalloc_64.h +++ b/include/asm-x86/pgalloc_64.h @@ -42,19 +42,21 @@ static inline void pud_free(struct mm_struct *mm, pud_t *pud) static inline void pgd_list_add(pgd_t *pgd) { struct page *page = virt_to_page(pgd); + unsigned long flags; - spin_lock(&pgd_lock); + spin_lock_irqsave(&pgd_lock, flags); list_add(&page->lru, &pgd_list); - spin_unlock(&pgd_lock); + spin_unlock_irqrestore(&pgd_lock, flags); } static inline void pgd_list_del(pgd_t *pgd) { struct page *page = virt_to_page(pgd); + unsigned long flags; - spin_lock(&pgd_lock); + spin_lock_irqsave(&pgd_lock, flags); list_del(&page->lru); - spin_unlock(&pgd_lock); + spin_unlock_irqrestore(&pgd_lock, flags); } static inline pgd_t *pgd_alloc(struct mm_struct *mm) -- cgit v1.2.3 From 72a7fe3967dbf86cb34e24fbf1d957fe24d2f246 Mon Sep 17 00:00:00 2001 From: Bernhard Walle Date: Thu, 7 Feb 2008 00:15:17 -0800 Subject: Introduce flags for reserve_bootmem() This patchset adds a flags variable to reserve_bootmem() and uses the BOOTMEM_EXCLUSIVE flag in crashkernel reservation code to detect collisions between crashkernel area and already used memory. This patch: Change the reserve_bootmem() function to accept a new flag BOOTMEM_EXCLUSIVE. If that flag is set, the function returns with -EBUSY if the memory already has been reserved in the past. This is to avoid conflicts. Because that code runs before SMP initialisation, there's no race condition inside reserve_bootmem_core(). [akpm@linux-foundation.org: coding-style fixes] [akpm@linux-foundation.org: fix powerpc build] Signed-off-by: Bernhard Walle Cc: Cc: "Eric W. Biederman" Cc: Vivek Goyal Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/kernel/core_irongate.c | 3 ++- arch/alpha/kernel/setup.c | 5 +++-- arch/alpha/mm/numa.c | 5 +++-- arch/arm/mm/init.c | 4 ++-- arch/arm/mm/mmu.c | 17 +++++++++++------ arch/arm/mm/nommu.c | 9 ++++++--- arch/arm/plat-omap/fb.c | 2 +- arch/avr32/kernel/setup.c | 6 ++++-- arch/blackfin/kernel/setup.c | 2 +- arch/cris/kernel/setup.c | 2 +- arch/frv/kernel/setup.c | 16 ++++++++++------ arch/h8300/kernel/setup.c | 2 +- arch/ia64/mm/contig.c | 2 +- arch/ia64/mm/discontig.c | 4 ++-- arch/m32r/kernel/setup.c | 11 +++++++---- arch/m32r/mm/discontig.c | 5 +++-- arch/m68k/atari/stram.c | 2 +- arch/m68k/kernel/setup.c | 3 ++- arch/m68knommu/kernel/setup.c | 2 +- arch/mips/kernel/setup.c | 4 ++-- arch/mips/sgi-ip27/ip27-memory.c | 3 ++- arch/parisc/mm/init.c | 14 +++++++++----- arch/powerpc/mm/mem.c | 8 +++++--- arch/powerpc/mm/numa.c | 2 +- arch/s390/kernel/setup.c | 11 +++++++---- arch/sh/kernel/setup.c | 10 ++++++---- arch/sh/mm/numa.c | 4 ++-- arch/sparc/mm/init.c | 6 +++--- arch/sparc64/mm/init.c | 8 ++++---- arch/v850/kernel/anna.c | 3 ++- arch/v850/kernel/as85ep1.c | 3 ++- arch/v850/kernel/rte_ma1_cb.c | 6 ++++-- arch/v850/kernel/setup.c | 12 ++++++++---- arch/x86/kernel/mpparse_32.c | 6 ++++-- arch/x86/kernel/setup_32.c | 18 ++++++++++-------- arch/x86/kernel/setup_64.c | 5 +++-- arch/x86/mm/discontig_32.c | 3 ++- arch/x86/mm/init_64.c | 4 ++-- arch/x86/mm/numa_64.c | 5 +++-- arch/x86/mm/srat_64.c | 3 ++- include/asm-x86/mmzone_32.h | 4 ++-- include/linux/bootmem.h | 17 +++++++++++++++-- mm/bootmem.c | 27 +++++++++++++++++++++------ 43 files changed, 183 insertions(+), 105 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/alpha/kernel/core_irongate.c b/arch/alpha/kernel/core_irongate.c index e4a0bcf1d28b..a872078497be 100644 --- a/arch/alpha/kernel/core_irongate.c +++ b/arch/alpha/kernel/core_irongate.c @@ -241,7 +241,8 @@ albacore_init_arch(void) size / 1024); } #endif - reserve_bootmem_node(NODE_DATA(0), pci_mem, memtop - pci_mem); + reserve_bootmem_node(NODE_DATA(0), pci_mem, memtop - + pci_mem, BOOTMEM_DEFAULT); printk("irongate_init_arch: temporarily reserving " "region %08lx-%08lx for PCI\n", pci_mem, memtop - 1); } diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c index beff6297f788..74c346625658 100644 --- a/arch/alpha/kernel/setup.c +++ b/arch/alpha/kernel/setup.c @@ -428,7 +428,8 @@ setup_memory(void *kernel_end) } /* Reserve the bootmap memory. */ - reserve_bootmem(PFN_PHYS(bootmap_start), bootmap_size); + reserve_bootmem(PFN_PHYS(bootmap_start), bootmap_size, + BOOTMEM_DEFAULT); printk("reserving pages %ld:%ld\n", bootmap_start, bootmap_start+PFN_UP(bootmap_size)); #ifdef CONFIG_BLK_DEV_INITRD @@ -446,7 +447,7 @@ setup_memory(void *kernel_end) phys_to_virt(PFN_PHYS(max_low_pfn))); } else { reserve_bootmem(virt_to_phys((void *)initrd_start), - INITRD_SIZE); + INITRD_SIZE, BOOTMEM_DEFAULT); } } #endif /* CONFIG_BLK_DEV_INITRD */ diff --git a/arch/alpha/mm/numa.c b/arch/alpha/mm/numa.c index e3e3806a6f25..10ab7833e83c 100644 --- a/arch/alpha/mm/numa.c +++ b/arch/alpha/mm/numa.c @@ -242,7 +242,8 @@ setup_memory_node(int nid, void *kernel_end) } /* Reserve the bootmap memory. */ - reserve_bootmem_node(NODE_DATA(nid), PFN_PHYS(bootmap_start), bootmap_size); + reserve_bootmem_node(NODE_DATA(nid), PFN_PHYS(bootmap_start), + bootmap_size, BOOTMEM_DEFAULT); printk(" reserving pages %ld:%ld\n", bootmap_start, bootmap_start+PFN_UP(bootmap_size)); node_set_online(nid); @@ -281,7 +282,7 @@ setup_memory(void *kernel_end) nid = kvaddr_to_nid(initrd_start); reserve_bootmem_node(NODE_DATA(nid), virt_to_phys((void *)initrd_start), - INITRD_SIZE); + INITRD_SIZE, BOOTMEM_DEFAULT); } } #endif /* CONFIG_BLK_DEV_INITRD */ diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index c0ad7c0fbae0..ec00f26bffa4 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -239,7 +239,7 @@ bootmem_init_node(int node, int initrd_node, struct meminfo *mi) * Reserve the bootmem bitmap for this node. */ reserve_bootmem_node(pgdat, boot_pfn << PAGE_SHIFT, - boot_pages << PAGE_SHIFT); + boot_pages << PAGE_SHIFT, BOOTMEM_DEFAULT); #ifdef CONFIG_BLK_DEV_INITRD /* @@ -247,7 +247,7 @@ bootmem_init_node(int node, int initrd_node, struct meminfo *mi) */ if (node == initrd_node) { reserve_bootmem_node(pgdat, phys_initrd_start, - phys_initrd_size); + phys_initrd_size, BOOTMEM_DEFAULT); initrd_start = __phys_to_virt(phys_initrd_start); initrd_end = initrd_start + phys_initrd_size; } diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index e5d61ee3d4a1..d41a75ed3dce 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -605,9 +605,11 @@ void __init reserve_node_zero(pg_data_t *pgdat) * Note that this can only be in node 0. */ #ifdef CONFIG_XIP_KERNEL - reserve_bootmem_node(pgdat, __pa(&__data_start), &_end - &__data_start); + reserve_bootmem_node(pgdat, __pa(&__data_start), &_end - &__data_start, + BOOTMEM_DEFAULT); #else - reserve_bootmem_node(pgdat, __pa(&_stext), &_end - &_stext); + reserve_bootmem_node(pgdat, __pa(&_stext), &_end - &_stext, + BOOTMEM_DEFAULT); #endif /* @@ -615,7 +617,7 @@ void __init reserve_node_zero(pg_data_t *pgdat) * and can only be in node 0. */ reserve_bootmem_node(pgdat, __pa(swapper_pg_dir), - PTRS_PER_PGD * sizeof(pgd_t)); + PTRS_PER_PGD * sizeof(pgd_t), BOOTMEM_DEFAULT); /* * Hmm... This should go elsewhere, but we really really need to @@ -638,8 +640,10 @@ void __init reserve_node_zero(pg_data_t *pgdat) /* H1940 and RX3715 need to reserve this for suspend */ if (machine_is_h1940() || machine_is_rx3715()) { - reserve_bootmem_node(pgdat, 0x30003000, 0x1000); - reserve_bootmem_node(pgdat, 0x30081000, 0x1000); + reserve_bootmem_node(pgdat, 0x30003000, 0x1000, + BOOTMEM_DEFAULT); + reserve_bootmem_node(pgdat, 0x30081000, 0x1000, + BOOTMEM_DEFAULT); } #ifdef CONFIG_SA1111 @@ -650,7 +654,8 @@ void __init reserve_node_zero(pg_data_t *pgdat) res_size = __pa(swapper_pg_dir) - PHYS_OFFSET; #endif if (res_size) - reserve_bootmem_node(pgdat, PHYS_OFFSET, res_size); + reserve_bootmem_node(pgdat, PHYS_OFFSET, res_size, + BOOTMEM_DEFAULT); } /* diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c index 8cd3a60954f0..63c62fdea521 100644 --- a/arch/arm/mm/nommu.c +++ b/arch/arm/mm/nommu.c @@ -27,9 +27,11 @@ void __init reserve_node_zero(pg_data_t *pgdat) * Note that this can only be in node 0. */ #ifdef CONFIG_XIP_KERNEL - reserve_bootmem_node(pgdat, __pa(&__data_start), &_end - &__data_start); + reserve_bootmem_node(pgdat, __pa(&__data_start), &_end - &__data_start, + BOOTMEM_DEFAULT); #else - reserve_bootmem_node(pgdat, __pa(&_stext), &_end - &_stext); + reserve_bootmem_node(pgdat, __pa(&_stext), &_end - &_stext, + BOOTMEM_DEFAULT); #endif /* @@ -37,7 +39,8 @@ void __init reserve_node_zero(pg_data_t *pgdat) * some architectures which the DRAM is the exception vector to trap, * alloc_page breaks with error, although it is not NULL, but "0." */ - reserve_bootmem_node(pgdat, CONFIG_VECTORS_BASE, PAGE_SIZE); + reserve_bootmem_node(pgdat, CONFIG_VECTORS_BASE, PAGE_SIZE, + BOOTMEM_DEFAULT); } /* diff --git a/arch/arm/plat-omap/fb.c b/arch/arm/plat-omap/fb.c index ee40c1a0b83d..7854f19b77cf 100644 --- a/arch/arm/plat-omap/fb.c +++ b/arch/arm/plat-omap/fb.c @@ -207,7 +207,7 @@ void __init omapfb_reserve_sdram(void) return; } if (rg.paddr) - reserve_bootmem(rg.paddr, rg.size); + reserve_bootmem(rg.paddr, rg.size, BOOTMEM_DEFAULT); reserved += rg.size; omapfb_config.mem_desc.region[i] = rg; configured_regions++; diff --git a/arch/avr32/kernel/setup.c b/arch/avr32/kernel/setup.c index 4b4c1884e1c5..e66a07a928cd 100644 --- a/arch/avr32/kernel/setup.c +++ b/arch/avr32/kernel/setup.c @@ -489,7 +489,8 @@ static void __init setup_bootmem(void) /* Reserve space for the bootmem bitmap... */ reserve_bootmem_node(NODE_DATA(node), PFN_PHYS(bootmap_pfn), - bootmap_size); + bootmap_size, + BOOTMEM_DEFAULT); /* ...and any other reserved regions. */ for (res = reserved; res; res = res->sibling) { @@ -505,7 +506,8 @@ static void __init setup_bootmem(void) && res->end < PFN_PHYS(max_pfn)) reserve_bootmem_node( NODE_DATA(node), res->start, - res->end - res->start + 1); + res->end - res->start + 1, + BOOTMEM_DEFAULT); } node_set_online(node); diff --git a/arch/blackfin/kernel/setup.c b/arch/blackfin/kernel/setup.c index 462cae893757..6e106b3d7729 100644 --- a/arch/blackfin/kernel/setup.c +++ b/arch/blackfin/kernel/setup.c @@ -406,7 +406,7 @@ void __init setup_arch(char **cmdline_p) */ free_bootmem(memory_start, memory_end - memory_start); - reserve_bootmem(memory_start, bootmap_size); + reserve_bootmem(memory_start, bootmap_size, BOOTMEM_DEFAULT); /* * get kmalloc into gear */ diff --git a/arch/cris/kernel/setup.c b/arch/cris/kernel/setup.c index 65466c49d7a9..4da042e100a0 100644 --- a/arch/cris/kernel/setup.c +++ b/arch/cris/kernel/setup.c @@ -137,7 +137,7 @@ setup_arch(char **cmdline_p) * Arguments are start, size */ - reserve_bootmem(PFN_PHYS(start_pfn), bootmap_size); + reserve_bootmem(PFN_PHYS(start_pfn), bootmap_size, BOOTMEM_DEFAULT); /* paging_init() sets up the MMU and marks all pages as reserved */ diff --git a/arch/frv/kernel/setup.c b/arch/frv/kernel/setup.c index b38ae1fc15fd..6c01464db699 100644 --- a/arch/frv/kernel/setup.c +++ b/arch/frv/kernel/setup.c @@ -925,13 +925,15 @@ static void __init setup_linux_memory(void) #endif /* take back the memory occupied by the kernel image and the bootmem alloc map */ - reserve_bootmem(kstart, kend - kstart + bootmap_size); + reserve_bootmem(kstart, kend - kstart + bootmap_size, + BOOTMEM_DEFAULT); /* reserve the memory occupied by the initial ramdisk */ #ifdef CONFIG_BLK_DEV_INITRD if (LOADER_TYPE && INITRD_START) { if (INITRD_START + INITRD_SIZE <= (low_top_pfn << PAGE_SHIFT)) { - reserve_bootmem(INITRD_START, INITRD_SIZE); + reserve_bootmem(INITRD_START, INITRD_SIZE, + BOOTMEM_DEFAULT); initrd_start = INITRD_START + PAGE_OFFSET; initrd_end = initrd_start + INITRD_SIZE; } @@ -986,9 +988,10 @@ static void __init setup_uclinux_memory(void) /* now take back the bits the core kernel is occupying */ #ifndef CONFIG_PROTECT_KERNEL - reserve_bootmem(kend, bootmap_size); + reserve_bootmem(kend, bootmap_size, BOOTMEM_DEFAULT); reserve_bootmem((unsigned long) &__kernel_image_start, - kend - (unsigned long) &__kernel_image_start); + kend - (unsigned long) &__kernel_image_start, + BOOTMEM_DEFAULT); #else dampr = __get_DAMPR(0); @@ -996,14 +999,15 @@ static void __init setup_uclinux_memory(void) dampr = (dampr >> 4) + 17; dampr = 1 << dampr; - reserve_bootmem(__get_DAMPR(0) & xAMPRx_PPFN, dampr); + reserve_bootmem(__get_DAMPR(0) & xAMPRx_PPFN, dampr, BOOTMEM_DEFAULT); #endif /* reserve some memory to do uncached DMA through if requested */ #ifdef CONFIG_RESERVE_DMA_COHERENT if (dma_coherent_mem_start) reserve_bootmem(dma_coherent_mem_start, - dma_coherent_mem_end - dma_coherent_mem_start); + dma_coherent_mem_end - dma_coherent_mem_start, + BOOTMEM_DEFAULT); #endif } /* end setup_uclinux_memory() */ diff --git a/arch/h8300/kernel/setup.c b/arch/h8300/kernel/setup.c index b2e86d0255e6..cd3734614d9d 100644 --- a/arch/h8300/kernel/setup.c +++ b/arch/h8300/kernel/setup.c @@ -173,7 +173,7 @@ void __init setup_arch(char **cmdline_p) * the bootmem bitmap so we then reserve it after freeing it :-) */ free_bootmem(memory_start, memory_end - memory_start); - reserve_bootmem(memory_start, bootmap_size); + reserve_bootmem(memory_start, bootmap_size, BOOTMEM_DEFAULT); /* * get kmalloc into gear */ diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c index 7e9c275ea148..344f64eca7a9 100644 --- a/arch/ia64/mm/contig.c +++ b/arch/ia64/mm/contig.c @@ -218,7 +218,7 @@ find_memory (void) /* Free all available memory, then mark bootmem-map as being in use. */ efi_memmap_walk(filter_rsvd_memory, free_bootmem); - reserve_bootmem(bootmap_start, bootmap_size); + reserve_bootmem(bootmap_start, bootmap_size, BOOTMEM_DEFAULT); find_initrd(); diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index 0b567398f38e..ee5e68b2af94 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c @@ -299,12 +299,12 @@ static void __init reserve_pernode_space(void) pages = bdp->node_low_pfn - (bdp->node_boot_start>>PAGE_SHIFT); size = bootmem_bootmap_pages(pages) << PAGE_SHIFT; base = __pa(bdp->node_bootmem_map); - reserve_bootmem_node(pdp, base, size); + reserve_bootmem_node(pdp, base, size, BOOTMEM_DEFAULT); /* Now the per-node space */ size = mem_data[node].pernode_size; base = __pa(mem_data[node].pernode_addr); - reserve_bootmem_node(pdp, base, size); + reserve_bootmem_node(pdp, base, size, BOOTMEM_DEFAULT); } } diff --git a/arch/m32r/kernel/setup.c b/arch/m32r/kernel/setup.c index d64814385d70..f1f5db0c4084 100644 --- a/arch/m32r/kernel/setup.c +++ b/arch/m32r/kernel/setup.c @@ -177,25 +177,28 @@ static unsigned long __init setup_memory(void) */ reserve_bootmem(CONFIG_MEMORY_START + PAGE_SIZE, (PFN_PHYS(start_pfn) + bootmap_size + PAGE_SIZE - 1) - - CONFIG_MEMORY_START); + - CONFIG_MEMORY_START, + BOOTMEM_DEFAULT); /* * reserve physical page 0 - it's a special BIOS page on many boxes, * enabling clean reboots, SMP operation, laptop functions. */ - reserve_bootmem(CONFIG_MEMORY_START, PAGE_SIZE); + reserve_bootmem(CONFIG_MEMORY_START, PAGE_SIZE, BOOTMEM_DEFAULT); /* * reserve memory hole */ #ifdef CONFIG_MEMHOLE - reserve_bootmem(CONFIG_MEMHOLE_START, CONFIG_MEMHOLE_SIZE); + reserve_bootmem(CONFIG_MEMHOLE_START, CONFIG_MEMHOLE_SIZE, + BOOTMEM_DEFAULT); #endif #ifdef CONFIG_BLK_DEV_INITRD if (LOADER_TYPE && INITRD_START) { if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) { - reserve_bootmem(INITRD_START, INITRD_SIZE); + reserve_bootmem(INITRD_START, INITRD_SIZE, + BOOTMEM_DEFAULT); initrd_start = INITRD_START + PAGE_OFFSET; initrd_end = initrd_start + INITRD_SIZE; printk("initrd:start[%08lx],size[%08lx]\n", diff --git a/arch/m32r/mm/discontig.c b/arch/m32r/mm/discontig.c index c7efdb0aefc5..07c1af7dc0e2 100644 --- a/arch/m32r/mm/discontig.c +++ b/arch/m32r/mm/discontig.c @@ -91,7 +91,8 @@ unsigned long __init setup_memory(void) PFN_PHYS(mp->pages)); reserve_bootmem_node(NODE_DATA(nid), PFN_PHYS(mp->start_pfn), - PFN_PHYS(mp->free_pfn - mp->start_pfn) + bootmap_size); + PFN_PHYS(mp->free_pfn - mp->start_pfn) + bootmap_size, + BOOTMEM_DEFAULT); if (max_low_pfn < max_pfn) max_low_pfn = max_pfn; @@ -104,7 +105,7 @@ unsigned long __init setup_memory(void) if (LOADER_TYPE && INITRD_START) { if (INITRD_START + INITRD_SIZE <= PFN_PHYS(max_low_pfn)) { reserve_bootmem_node(NODE_DATA(0), INITRD_START, - INITRD_SIZE); + INITRD_SIZE, BOOTMEM_DEFAULT); initrd_start = INITRD_START + PAGE_OFFSET; initrd_end = initrd_start + INITRD_SIZE; printk("initrd:start[%08lx],size[%08lx]\n", diff --git a/arch/m68k/atari/stram.c b/arch/m68k/atari/stram.c index 8dda6515887a..0055a6c06f75 100644 --- a/arch/m68k/atari/stram.c +++ b/arch/m68k/atari/stram.c @@ -154,7 +154,7 @@ void __init atari_stram_reserve_pages(void *start_mem) /* always reserve first page of ST-RAM, the first 2 kB are * supervisor-only! */ if (!kernel_in_stram) - reserve_bootmem (0, PAGE_SIZE); + reserve_bootmem(0, PAGE_SIZE, BOOTMEM_DEFAULT); } diff --git a/arch/m68k/kernel/setup.c b/arch/m68k/kernel/setup.c index ed3a4caec620..9a06c48edcb3 100644 --- a/arch/m68k/kernel/setup.c +++ b/arch/m68k/kernel/setup.c @@ -323,7 +323,8 @@ void __init setup_arch(char **cmdline_p) #ifdef CONFIG_BLK_DEV_INITRD if (m68k_ramdisk.size) { reserve_bootmem_node(__virt_to_node(phys_to_virt(m68k_ramdisk.addr)), - m68k_ramdisk.addr, m68k_ramdisk.size); + m68k_ramdisk.addr, m68k_ramdisk.size, + BOOTMEM_DEFAULT); initrd_start = (unsigned long)phys_to_virt(m68k_ramdisk.addr); initrd_end = initrd_start + m68k_ramdisk.size; printk("initrd: %08lx - %08lx\n", initrd_start, initrd_end); diff --git a/arch/m68knommu/kernel/setup.c b/arch/m68knommu/kernel/setup.c index 81507c53d4a9..156c6c662c7e 100644 --- a/arch/m68knommu/kernel/setup.c +++ b/arch/m68knommu/kernel/setup.c @@ -203,7 +203,7 @@ void __init setup_arch(char **cmdline_p) * the bootmem bitmap so we then reserve it after freeing it :-) */ free_bootmem(memory_start, memory_end - memory_start); - reserve_bootmem(memory_start, bootmap_size); + reserve_bootmem(memory_start, bootmap_size, BOOTMEM_DEFAULT); /* * Get kmalloc into gear. diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index c032409cba9b..39f3dfe134fb 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -232,7 +232,7 @@ static void __init finalize_initrd(void) goto disable; } - reserve_bootmem(__pa(initrd_start), size); + reserve_bootmem(__pa(initrd_start), size, BOOTMEM_DEFAULT); initrd_below_start_ok = 1; printk(KERN_INFO "Initial ramdisk at: 0x%lx (%lu bytes)\n", @@ -413,7 +413,7 @@ static void __init bootmem_init(void) /* * Reserve the bootmap memory. */ - reserve_bootmem(PFN_PHYS(mapstart), bootmap_size); + reserve_bootmem(PFN_PHYS(mapstart), bootmap_size, BOOTMEM_DEFAULT); /* * Reserve initrd memory if needed. diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c index e5e023f50a07..bf438d02366e 100644 --- a/arch/mips/sgi-ip27/ip27-memory.c +++ b/arch/mips/sgi-ip27/ip27-memory.c @@ -465,7 +465,8 @@ static void __init node_mem_init(cnodeid_t node) free_bootmem_node(NODE_DATA(node), slot_firstpfn << PAGE_SHIFT, (slot_lastpfn - slot_firstpfn) << PAGE_SHIFT); reserve_bootmem_node(NODE_DATA(node), slot_firstpfn << PAGE_SHIFT, - ((slot_freepfn - slot_firstpfn) << PAGE_SHIFT) + bootmap_size); + ((slot_freepfn - slot_firstpfn) << PAGE_SHIFT) + bootmap_size, + BOOTMEM_DEFAULT); } /* diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index aa875fa43488..eb80f5e33d7d 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -315,11 +315,13 @@ static void __init setup_bootmem(void) #define PDC_CONSOLE_IO_IODC_SIZE 32768 reserve_bootmem_node(NODE_DATA(0), 0UL, - (unsigned long)(PAGE0->mem_free + PDC_CONSOLE_IO_IODC_SIZE)); + (unsigned long)(PAGE0->mem_free + + PDC_CONSOLE_IO_IODC_SIZE), BOOTMEM_DEFAULT); reserve_bootmem_node(NODE_DATA(0), __pa((unsigned long)_text), - (unsigned long)(_end - _text)); + (unsigned long)(_end - _text), BOOTMEM_DEFAULT); reserve_bootmem_node(NODE_DATA(0), (bootmap_start_pfn << PAGE_SHIFT), - ((bootmap_pfn - bootmap_start_pfn) << PAGE_SHIFT)); + ((bootmap_pfn - bootmap_start_pfn) << PAGE_SHIFT), + BOOTMEM_DEFAULT); #ifndef CONFIG_DISCONTIGMEM @@ -328,7 +330,8 @@ static void __init setup_bootmem(void) for (i = 0; i < npmem_holes; i++) { reserve_bootmem_node(NODE_DATA(0), (pmem_holes[i].start_pfn << PAGE_SHIFT), - (pmem_holes[i].pages << PAGE_SHIFT)); + (pmem_holes[i].pages << PAGE_SHIFT), + BOOTMEM_DEFAULT); } #endif @@ -346,7 +349,8 @@ static void __init setup_bootmem(void) initrd_below_start_ok = 1; printk(KERN_INFO "initrd: reserving %08lx-%08lx (mem_max %08lx)\n", __pa(initrd_start), __pa(initrd_start) + initrd_reserve, mem_max); - reserve_bootmem_node(NODE_DATA(0),__pa(initrd_start), initrd_reserve); + reserve_bootmem_node(NODE_DATA(0), __pa(initrd_start), + initrd_reserve, BOOTMEM_DEFAULT); } } #endif diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index e8122447f019..ff5debf5eedd 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -220,12 +220,13 @@ void __init do_init_bootmem(void) lmb_size_bytes(&lmb.reserved, i) - 1; if (addr < total_lowmem) reserve_bootmem(lmb.reserved.region[i].base, - lmb_size_bytes(&lmb.reserved, i)); + lmb_size_bytes(&lmb.reserved, i), + BOOTMEM_DEFAULT); else if (lmb.reserved.region[i].base < total_lowmem) { unsigned long adjusted_size = total_lowmem - lmb.reserved.region[i].base; reserve_bootmem(lmb.reserved.region[i].base, - adjusted_size); + adjusted_size, BOOTMEM_DEFAULT); } } #else @@ -234,7 +235,8 @@ void __init do_init_bootmem(void) /* reserve the sections we're already using */ for (i = 0; i < lmb.reserved.cnt; i++) reserve_bootmem(lmb.reserved.region[i].base, - lmb_size_bytes(&lmb.reserved, i)); + lmb_size_bytes(&lmb.reserved, i), + BOOTMEM_DEFAULT); #endif /* XXX need to clip this if using highmem? */ diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index c12adc3ddffd..bc60322d2436 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -675,7 +675,7 @@ void __init do_init_bootmem(void) dbg("reserve_bootmem %lx %lx\n", physbase, size); reserve_bootmem_node(NODE_DATA(nid), physbase, - size); + size, BOOTMEM_DEFAULT); } } diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 29ae165d1749..f9f8779022a0 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -649,21 +649,24 @@ setup_memory(void) /* * Reserve memory used for lowcore/command line/kernel image. */ - reserve_bootmem(0, (unsigned long)_ehead); + reserve_bootmem(0, (unsigned long)_ehead, BOOTMEM_DEFAULT); reserve_bootmem((unsigned long)_stext, - PFN_PHYS(start_pfn) - (unsigned long)_stext); + PFN_PHYS(start_pfn) - (unsigned long)_stext, + BOOTMEM_DEFAULT); /* * Reserve the bootmem bitmap itself as well. We do this in two * steps (first step was init_bootmem()) because this catches * the (very unlikely) case of us accidentally initializing the * bootmem allocator with an invalid RAM area. */ - reserve_bootmem(start_pfn << PAGE_SHIFT, bootmap_size); + reserve_bootmem(start_pfn << PAGE_SHIFT, bootmap_size, + BOOTMEM_DEFAULT); #ifdef CONFIG_BLK_DEV_INITRD if (INITRD_START && INITRD_SIZE) { if (INITRD_START + INITRD_SIZE <= memory_end) { - reserve_bootmem(INITRD_START, INITRD_SIZE); + reserve_bootmem(INITRD_START, INITRD_SIZE, + BOOTMEM_DEFAULT); initrd_start = INITRD_START; initrd_end = initrd_start + INITRD_SIZE; } else { diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c index 855cdf9d85b1..af10db90a554 100644 --- a/arch/sh/kernel/setup.c +++ b/arch/sh/kernel/setup.c @@ -148,7 +148,8 @@ static void __init reserve_crashkernel(void) (unsigned long)(free_mem >> 20)); crashk_res.start = crash_base; crashk_res.end = crash_base + crash_size - 1; - reserve_bootmem(crash_base, crash_size); + reserve_bootmem(crash_base, crash_size, + BOOTMEM_DEFAULT); } else printk(KERN_INFO "crashkernel reservation failed - " "you have to specify a base address\n"); @@ -184,13 +185,14 @@ void __init setup_bootmem_allocator(unsigned long free_pfn) * an invalid RAM area. */ reserve_bootmem(__MEMORY_START+PAGE_SIZE, - (PFN_PHYS(free_pfn)+bootmap_size+PAGE_SIZE-1)-__MEMORY_START); + (PFN_PHYS(free_pfn)+bootmap_size+PAGE_SIZE-1)-__MEMORY_START, + BOOTMEM_DEFAULT); /* * reserve physical page 0 - it's a special BIOS page on many boxes, * enabling clean reboots, SMP operation, laptop functions. */ - reserve_bootmem(__MEMORY_START, PAGE_SIZE); + reserve_bootmem(__MEMORY_START, PAGE_SIZE, BOOTMEM_DEFAULT); sparse_memory_present_with_active_regions(0); @@ -200,7 +202,7 @@ void __init setup_bootmem_allocator(unsigned long free_pfn) if (LOADER_TYPE && INITRD_START) { if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) { reserve_bootmem(INITRD_START + __MEMORY_START, - INITRD_SIZE); + INITRD_SIZE, BOOTMEM_DEFAULT); initrd_start = INITRD_START + PAGE_OFFSET + __MEMORY_START; initrd_end = initrd_start + INITRD_SIZE; diff --git a/arch/sh/mm/numa.c b/arch/sh/mm/numa.c index 8aff065dd307..2de7302724fc 100644 --- a/arch/sh/mm/numa.c +++ b/arch/sh/mm/numa.c @@ -80,9 +80,9 @@ void __init setup_bootmem_node(int nid, unsigned long start, unsigned long end) /* Reserve the pgdat and bootmap space with the bootmem allocator */ reserve_bootmem_node(NODE_DATA(nid), start_pfn << PAGE_SHIFT, - sizeof(struct pglist_data)); + sizeof(struct pglist_data), BOOTMEM_DEFAULT); reserve_bootmem_node(NODE_DATA(nid), free_pfn << PAGE_SHIFT, - bootmap_pages << PAGE_SHIFT); + bootmap_pages << PAGE_SHIFT, BOOTMEM_DEFAULT); /* It's up */ node_set_online(nid); diff --git a/arch/sparc/mm/init.c b/arch/sparc/mm/init.c index a1bef07755a9..b89837accc88 100644 --- a/arch/sparc/mm/init.c +++ b/arch/sparc/mm/init.c @@ -259,7 +259,7 @@ unsigned long __init bootmem_init(unsigned long *pages_avail) if (initrd_start) { /* Reserve the initrd image area. */ size = initrd_end - initrd_start; - reserve_bootmem(initrd_start, size); + reserve_bootmem(initrd_start, size, BOOTMEM_DEFAULT); *pages_avail -= PAGE_ALIGN(size) >> PAGE_SHIFT; initrd_start = (initrd_start - phys_base) + PAGE_OFFSET; @@ -268,7 +268,7 @@ unsigned long __init bootmem_init(unsigned long *pages_avail) #endif /* Reserve the kernel text/data/bss. */ size = (start_pfn << PAGE_SHIFT) - phys_base; - reserve_bootmem(phys_base, size); + reserve_bootmem(phys_base, size, BOOTMEM_DEFAULT); *pages_avail -= PAGE_ALIGN(size) >> PAGE_SHIFT; /* Reserve the bootmem map. We do not account for it @@ -276,7 +276,7 @@ unsigned long __init bootmem_init(unsigned long *pages_avail) * in free_all_bootmem. */ size = bootmap_size; - reserve_bootmem((bootmap_pfn << PAGE_SHIFT), size); + reserve_bootmem((bootmap_pfn << PAGE_SHIFT), size, BOOTMEM_DEFAULT); *pages_avail -= PAGE_ALIGN(size) >> PAGE_SHIFT; return max_pfn; diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c index 523e993ee90c..e726c45645ff 100644 --- a/arch/sparc64/mm/init.c +++ b/arch/sparc64/mm/init.c @@ -997,7 +997,7 @@ static unsigned long __init bootmem_init(unsigned long *pages_avail, prom_printf("reserve_bootmem(initrd): base[%llx] size[%lx]\n", initrd_start, initrd_end); #endif - reserve_bootmem(initrd_start, size); + reserve_bootmem(initrd_start, size, BOOTMEM_DEFAULT); initrd_start += PAGE_OFFSET; initrd_end += PAGE_OFFSET; @@ -1007,7 +1007,7 @@ static unsigned long __init bootmem_init(unsigned long *pages_avail, #ifdef CONFIG_DEBUG_BOOTMEM prom_printf("reserve_bootmem(kernel): base[%lx] size[%lx]\n", kern_base, kern_size); #endif - reserve_bootmem(kern_base, kern_size); + reserve_bootmem(kern_base, kern_size, BOOTMEM_DEFAULT); *pages_avail -= PAGE_ALIGN(kern_size) >> PAGE_SHIFT; /* Add back in the initmem pages. */ @@ -1024,7 +1024,7 @@ static unsigned long __init bootmem_init(unsigned long *pages_avail, prom_printf("reserve_bootmem(bootmap): base[%lx] size[%lx]\n", (bootmap_pfn << PAGE_SHIFT), size); #endif - reserve_bootmem((bootmap_pfn << PAGE_SHIFT), size); + reserve_bootmem((bootmap_pfn << PAGE_SHIFT), size, BOOTMEM_DEFAULT); for (i = 0; i < pavail_ents; i++) { unsigned long start_pfn, end_pfn; @@ -1489,7 +1489,7 @@ static void __init taint_real_pages(void) goto do_next_page; } } - reserve_bootmem(old_start, PAGE_SIZE); + reserve_bootmem(old_start, PAGE_SIZE, BOOTMEM_DEFAULT); do_next_page: old_start += PAGE_SIZE; diff --git a/arch/v850/kernel/anna.c b/arch/v850/kernel/anna.c index 0e429041a117..5978a25170fb 100644 --- a/arch/v850/kernel/anna.c +++ b/arch/v850/kernel/anna.c @@ -85,7 +85,8 @@ void __init mach_reserve_bootmem () /* The space between SRAM and SDRAM is filled with duplicate images of SRAM. Prevent the kernel from using them. */ reserve_bootmem (SRAM_ADDR + SRAM_SIZE, - SDRAM_ADDR - (SRAM_ADDR + SRAM_SIZE)); + SDRAM_ADDR - (SRAM_ADDR + SRAM_SIZE), + BOOTMEM_DEFAULT); } void mach_gettimeofday (struct timespec *tv) diff --git a/arch/v850/kernel/as85ep1.c b/arch/v850/kernel/as85ep1.c index 18437bc5c3ad..b525ecf3aea4 100644 --- a/arch/v850/kernel/as85ep1.c +++ b/arch/v850/kernel/as85ep1.c @@ -116,7 +116,8 @@ void __init mach_reserve_bootmem () if (SDRAM_ADDR < RAM_END && SDRAM_ADDR > RAM_START) /* We can't use the space between SRAM and SDRAM, so prevent the kernel from trying. */ - reserve_bootmem (SRAM_END, SDRAM_ADDR - SRAM_END); + reserve_bootmem(SRAM_END, SDRAM_ADDR - SRAM_END, + BOOTMEM_DEFAULT); } void mach_gettimeofday (struct timespec *tv) diff --git a/arch/v850/kernel/rte_ma1_cb.c b/arch/v850/kernel/rte_ma1_cb.c index 9a716f946421..08abf3d5f8df 100644 --- a/arch/v850/kernel/rte_ma1_cb.c +++ b/arch/v850/kernel/rte_ma1_cb.c @@ -46,13 +46,15 @@ void __init mach_reserve_bootmem () { #ifdef CONFIG_RTE_CB_MULTI /* Prevent the kernel from touching the monitor's scratch RAM. */ - reserve_bootmem (MON_SCRATCH_ADDR, MON_SCRATCH_SIZE); + reserve_bootmem(MON_SCRATCH_ADDR, MON_SCRATCH_SIZE, + BOOTMEM_DEFAULT); #endif /* The space between SRAM and SDRAM is filled with duplicate images of SRAM. Prevent the kernel from using them. */ reserve_bootmem (SRAM_ADDR + SRAM_SIZE, - SDRAM_ADDR - (SRAM_ADDR + SRAM_SIZE)); + SDRAM_ADDR - (SRAM_ADDR + SRAM_SIZE), + BOOTMEM_DEFAULT); } void mach_gettimeofday (struct timespec *tv) diff --git a/arch/v850/kernel/setup.c b/arch/v850/kernel/setup.c index a914f244f494..a0a8456a8430 100644 --- a/arch/v850/kernel/setup.c +++ b/arch/v850/kernel/setup.c @@ -241,15 +241,18 @@ init_bootmem_alloc (unsigned long ram_start, unsigned long ram_len) if (kram_end > kram_start) /* Reserve the RAM part of the kernel's address space, so it doesn't get allocated. */ - reserve_bootmem (kram_start, kram_end - kram_start); + reserve_bootmem(kram_start, kram_end - kram_start, + BOOTMEM_DEFAULT); if (intv_in_ram && !intv_in_kram) /* Reserve the interrupt vector space. */ - reserve_bootmem (intv_start, intv_end - intv_start); + reserve_bootmem(intv_start, intv_end - intv_start, + BOOTMEM_DEFAULT); if (bootmap >= ram_start && bootmap < ram_end) /* Reserve the bootmap space. */ - reserve_bootmem (bootmap, bootmap_len); + reserve_bootmem(bootmap, bootmap_len, + BOOTMEM_DEFAULT); /* Reserve the memory used by the root filesystem image if it's in RAM. */ @@ -257,7 +260,8 @@ init_bootmem_alloc (unsigned long ram_start, unsigned long ram_len) && (unsigned long)&_root_fs_image_start >= ram_start && (unsigned long)&_root_fs_image_start < ram_end) reserve_bootmem ((unsigned long)&_root_fs_image_start, - &_root_fs_image_end - &_root_fs_image_start); + &_root_fs_image_end - &_root_fs_image_start, + BOOTMEM_DEFAULT); /* Let the platform-dependent code reserve some too. */ if (mrb) diff --git a/arch/x86/kernel/mpparse_32.c b/arch/x86/kernel/mpparse_32.c index 67009cdd5eca..f349e68e45a0 100644 --- a/arch/x86/kernel/mpparse_32.c +++ b/arch/x86/kernel/mpparse_32.c @@ -736,7 +736,8 @@ static int __init smp_scan_config (unsigned long base, unsigned long length) smp_found_config = 1; printk(KERN_INFO "found SMP MP-table at [%p] %08lx\n", mpf, virt_to_phys(mpf)); - reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE); + reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE, + BOOTMEM_DEFAULT); if (mpf->mpf_physptr) { /* * We cannot access to MPC table to compute @@ -751,7 +752,8 @@ static int __init smp_scan_config (unsigned long base, unsigned long length) unsigned long end = max_low_pfn * PAGE_SIZE; if (mpf->mpf_physptr + size > end) size = end - mpf->mpf_physptr; - reserve_bootmem(mpf->mpf_physptr, size); + reserve_bootmem(mpf->mpf_physptr, size, + BOOTMEM_DEFAULT); } mpf_found = mpf; diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c index 62adc5f20be5..d1d8c347cc0b 100644 --- a/arch/x86/kernel/setup_32.c +++ b/arch/x86/kernel/setup_32.c @@ -390,7 +390,7 @@ static void __init reserve_ebda_region(void) unsigned int addr; addr = get_bios_ebda(); if (addr) - reserve_bootmem(addr, PAGE_SIZE); + reserve_bootmem(addr, PAGE_SIZE, BOOTMEM_DEFAULT); } #ifndef CONFIG_NEED_MULTIPLE_NODES @@ -484,7 +484,8 @@ static void __init reserve_crashkernel(void) (unsigned long)(total_mem >> 20)); crashk_res.start = crash_base; crashk_res.end = crash_base + crash_size - 1; - reserve_bootmem(crash_base, crash_size); + reserve_bootmem(crash_base, crash_size, + BOOTMEM_DEFAULT); } else printk(KERN_INFO "crashkernel reservation failed - " "you have to specify a base address\n"); @@ -525,7 +526,7 @@ static void __init reserve_initrd(void) } if (ramdisk_end <= end_of_lowmem) { /* All in lowmem, easy case */ - reserve_bootmem(ramdisk_image, ramdisk_size); + reserve_bootmem(ramdisk_image, ramdisk_size, BOOTMEM_DEFAULT); initrd_start = ramdisk_image + PAGE_OFFSET; initrd_end = initrd_start+ramdisk_size; return; @@ -536,7 +537,7 @@ static void __init reserve_initrd(void) /* Note: this includes all the lowmem currently occupied by the initrd, we rely on that fact to keep the data intact. */ - reserve_bootmem(ramdisk_here, ramdisk_size); + reserve_bootmem(ramdisk_here, ramdisk_size, BOOTMEM_DEFAULT); initrd_start = ramdisk_here + PAGE_OFFSET; initrd_end = initrd_start + ramdisk_size; @@ -606,13 +607,14 @@ void __init setup_bootmem_allocator(void) * bootmem allocator with an invalid RAM area. */ reserve_bootmem(__pa_symbol(_text), (PFN_PHYS(min_low_pfn) + - bootmap_size + PAGE_SIZE-1) - __pa_symbol(_text)); + bootmap_size + PAGE_SIZE-1) - __pa_symbol(_text), + BOOTMEM_DEFAULT); /* * reserve physical page 0 - it's a special BIOS page on many boxes, * enabling clean reboots, SMP operation, laptop functions. */ - reserve_bootmem(0, PAGE_SIZE); + reserve_bootmem(0, PAGE_SIZE, BOOTMEM_DEFAULT); /* reserve EBDA region, it's a 4K region */ reserve_ebda_region(); @@ -622,7 +624,7 @@ void __init setup_bootmem_allocator(void) unless you have no PS/2 mouse plugged in. */ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && boot_cpu_data.x86 == 6) - reserve_bootmem(0xa0000 - 4096, 4096); + reserve_bootmem(0xa0000 - 4096, 4096, BOOTMEM_DEFAULT); #ifdef CONFIG_SMP /* @@ -630,7 +632,7 @@ void __init setup_bootmem_allocator(void) * FIXME: Don't need the extra page at 4K, but need to fix * trampoline before removing it. (see the GDT stuff) */ - reserve_bootmem(PAGE_SIZE, PAGE_SIZE); + reserve_bootmem(PAGE_SIZE, PAGE_SIZE, BOOTMEM_DEFAULT); #endif #ifdef CONFIG_ACPI_SLEEP /* diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index c8939dfddfba..8345c3b12f05 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -189,7 +189,7 @@ contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn) bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn); e820_register_active_regions(0, start_pfn, end_pfn); free_bootmem_with_active_regions(0, end_pfn); - reserve_bootmem(bootmap, bootmap_size); + reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT); } #endif @@ -238,7 +238,8 @@ static void __init reserve_crashkernel(void) (unsigned long)(free_mem >> 20)); crashk_res.start = crash_base; crashk_res.end = crash_base + crash_size - 1; - reserve_bootmem(crash_base, crash_size); + reserve_bootmem(crash_base, crash_size, + BOOTMEM_DEFAULT); } else printk(KERN_INFO "crashkernel reservation failed - " "you have to specify a base address\n"); diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c index 04b1d20e2613..c394ca0720b8 100644 --- a/arch/x86/mm/discontig_32.c +++ b/arch/x86/mm/discontig_32.c @@ -391,7 +391,8 @@ unsigned long __init setup_memory(void) void __init numa_kva_reserve(void) { if (kva_pages) - reserve_bootmem(PFN_PHYS(kva_start_pfn), PFN_PHYS(kva_pages)); + reserve_bootmem(PFN_PHYS(kva_start_pfn), PFN_PHYS(kva_pages), + BOOTMEM_DEFAULT); } void __init zone_sizes_init(void) diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 9b61c75a2355..5fe880fc305d 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -644,9 +644,9 @@ void __init reserve_bootmem_generic(unsigned long phys, unsigned len) /* Should check here against the e820 map to avoid double free */ #ifdef CONFIG_NUMA - reserve_bootmem_node(NODE_DATA(nid), phys, len); + reserve_bootmem_node(NODE_DATA(nid), phys, len, BOOTMEM_DEFAULT); #else - reserve_bootmem(phys, len); + reserve_bootmem(phys, len, BOOTMEM_DEFAULT); #endif if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) { dma_reserve += len / PAGE_SIZE; diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 5a02bf4c91ec..1aecc658cd7d 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c @@ -238,9 +238,10 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, free_bootmem_with_active_regions(nodeid, end); - reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, pgdat_size); + reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, pgdat_size, + BOOTMEM_DEFAULT); reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, - bootmap_pages<= sidx; i--) + clear_bit(i, bdata->node_bootmem_map); + + return ret; } static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr, @@ -374,9 +388,9 @@ unsigned long __init init_bootmem_node(pg_data_t *pgdat, unsigned long freepfn, } void __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, - unsigned long size) + unsigned long size, int flags) { - reserve_bootmem_core(pgdat->bdata, physaddr, size); + reserve_bootmem_core(pgdat->bdata, physaddr, size, flags); } void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, @@ -398,9 +412,10 @@ unsigned long __init init_bootmem(unsigned long start, unsigned long pages) } #ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE -void __init reserve_bootmem(unsigned long addr, unsigned long size) +int __init reserve_bootmem(unsigned long addr, unsigned long size, + int flags) { - reserve_bootmem_core(NODE_DATA(0)->bdata, addr, size); + return reserve_bootmem_core(NODE_DATA(0)->bdata, addr, size, flags); } #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */ -- cgit v1.2.3 From 2f569afd9ced9ebec9a6eb3dbf6f83429be0a7b4 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 8 Feb 2008 04:22:04 -0800 Subject: CONFIG_HIGHPTE vs. sub-page page tables. Background: I've implemented 1K/2K page tables for s390. These sub-page page tables are required to properly support the s390 virtualization instruction with KVM. The SIE instruction requires that the page tables have 256 page table entries (pte) followed by 256 page status table entries (pgste). The pgstes are only required if the process is using the SIE instruction. The pgstes are updated by the hardware and by the hypervisor for a number of reasons, one of them is dirty and reference bit tracking. To avoid wasting memory the standard pte table allocation should return 1K/2K (31/64 bit) and 2K/4K if the process is using SIE. Problem: Page size on s390 is 4K, page table size is 1K or 2K. That means the s390 version for pte_alloc_one cannot return a pointer to a struct page. Trouble is that with the CONFIG_HIGHPTE feature on x86 pte_alloc_one cannot return a pointer to a pte either, since that would require more than 32 bit for the return value of pte_alloc_one (and the pte * would not be accessible since its not kmapped). Solution: The only solution I found to this dilemma is a new typedef: a pgtable_t. For s390 pgtable_t will be a (pte *) - to be introduced with a later patch. For everybody else it will be a (struct page *). The additional problem with the initialization of the ptl lock and the NR_PAGETABLE accounting is solved with a constructor pgtable_page_ctor and a destructor pgtable_page_dtor. The page table allocation and free functions need to call these two whenever a page table page is allocated or freed. pmd_populate will get a pgtable_t instead of a struct page pointer. To get the pgtable_t back from a pmd entry that has been installed with pmd_populate a new function pmd_pgtable is added. It replaces the pmd_page call in free_pte_range and apply_to_pte_range. Signed-off-by: Martin Schwidefsky Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/frv/mm/pgalloc.c | 8 +++++--- arch/powerpc/mm/pgtable_32.c | 14 ++++++++------ arch/ppc/mm/pgtable.c | 9 ++++++--- arch/s390/mm/pgtable.c | 2 ++ arch/sparc/mm/srmmu.c | 10 +++++++--- arch/sparc/mm/sun4c.c | 14 ++++++++++---- arch/um/kernel/mem.c | 4 +++- arch/x86/mm/pgtable_32.c | 5 ++++- include/asm-alpha/page.h | 2 ++ include/asm-alpha/pgalloc.h | 22 ++++++++++++++-------- include/asm-arm/page.h | 2 ++ include/asm-arm/pgalloc.h | 9 ++++++--- include/asm-avr32/page.h | 1 + include/asm-avr32/pgalloc.h | 16 ++++++++++++---- include/asm-cris/page.h | 1 + include/asm-cris/pgalloc.h | 14 ++++++++++---- include/asm-frv/page.h | 1 + include/asm-frv/pgalloc.h | 12 +++++++++--- include/asm-ia64/page.h | 2 ++ include/asm-ia64/pgalloc.h | 20 ++++++++++++++------ include/asm-m32r/page.h | 1 + include/asm-m32r/pgalloc.h | 10 ++++++---- include/asm-m68k/motorola_pgalloc.h | 14 ++++++++------ include/asm-m68k/page.h | 1 + include/asm-m68k/sun3_pgalloc.h | 17 ++++++++++++----- include/asm-mips/page.h | 1 + include/asm-mips/pgalloc.h | 17 ++++++++++++----- include/asm-parisc/page.h | 1 + include/asm-parisc/pgalloc.h | 11 +++++++++-- include/asm-powerpc/page.h | 2 ++ include/asm-powerpc/pgalloc-32.h | 6 ++++-- include/asm-powerpc/pgalloc-64.h | 27 ++++++++++++++++++++------- include/asm-ppc/pgalloc.h | 6 ++++-- include/asm-s390/page.h | 2 ++ include/asm-s390/pgalloc.h | 3 ++- include/asm-s390/tlb.h | 2 +- include/asm-sh/page.h | 2 ++ include/asm-sh/pgalloc.h | 27 ++++++++++++++++++++------- include/asm-sparc/page.h | 2 ++ include/asm-sparc/pgalloc.h | 5 +++-- include/asm-sparc64/page.h | 2 ++ include/asm-sparc64/pgalloc.h | 19 ++++++++++++++----- include/asm-um/page.h | 2 ++ include/asm-um/pgalloc.h | 12 +++++++++--- include/asm-x86/page_32.h | 2 ++ include/asm-x86/page_64.h | 2 ++ include/asm-x86/pgalloc_32.h | 6 ++++-- include/asm-x86/pgalloc_64.h | 22 +++++++++++++++++----- include/asm-xtensa/page.h | 1 + include/asm-xtensa/pgalloc.h | 17 ++++++++++++----- include/linux/mm.h | 14 +++++++++++++- mm/memory.c | 32 +++++++++++++++----------------- mm/vmalloc.c | 2 +- 53 files changed, 326 insertions(+), 132 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/frv/mm/pgalloc.c b/arch/frv/mm/pgalloc.c index 1a2e5c8d03a9..66f616fb4860 100644 --- a/arch/frv/mm/pgalloc.c +++ b/arch/frv/mm/pgalloc.c @@ -28,7 +28,7 @@ pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) return pte; } -struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) +pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) { struct page *page; @@ -37,9 +37,11 @@ struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) #else page = alloc_pages(GFP_KERNEL|__GFP_REPEAT, 0); #endif - if (page) + if (page) { clear_highpage(page); - flush_dcache_page(page); + pgtable_page_ctor(page); + flush_dcache_page(page); + } return page; } diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index f80f90c4d58b..ac3390f81900 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -107,19 +107,20 @@ __init_refok pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long add return pte; } -struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) +pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) { struct page *ptepage; #ifdef CONFIG_HIGHPTE - gfp_t flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_REPEAT; + gfp_t flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_REPEAT | __GFP_ZERO; #else - gfp_t flags = GFP_KERNEL | __GFP_REPEAT; + gfp_t flags = GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO; #endif ptepage = alloc_pages(flags, 0); - if (ptepage) - clear_highpage(ptepage); + if (!ptepage) + return NULL; + pgtable_page_ctor(ptepage); return ptepage; } @@ -131,11 +132,12 @@ void pte_free_kernel(struct mm_struct *mm, pte_t *pte) free_page((unsigned long)pte); } -void pte_free(struct mm_struct *mm, struct page *ptepage) +void pte_free(struct mm_struct *mm, pgtable_t ptepage) { #ifdef CONFIG_SMP hash_page_sync(); #endif + pgtable_page_dtor(ptepage); __free_page(ptepage); } diff --git a/arch/ppc/mm/pgtable.c b/arch/ppc/mm/pgtable.c index 409fcaa4994a..03a79bff1271 100644 --- a/arch/ppc/mm/pgtable.c +++ b/arch/ppc/mm/pgtable.c @@ -95,7 +95,7 @@ __init_refok pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long add return pte; } -struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) +pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) { struct page *ptepage; @@ -106,8 +106,10 @@ struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) #endif ptepage = alloc_pages(flags, 0); - if (ptepage) + if (ptepage) { clear_highpage(ptepage); + pgtable_page_ctor(ptepage); + } return ptepage; } @@ -119,11 +121,12 @@ void pte_free_kernel(struct mm_struct *mm, pte_t *pte) free_page((unsigned long)pte); } -void pte_free(struct mm_struct *mm, struct page *ptepage) +void pte_free(struct mm_struct *mm, pgtable_t ptepage) { #ifdef CONFIG_SMP hash_page_sync(); #endif + pgtable_page_dtor(ptepage); __free_page(ptepage); } diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index e60e0ae13402..019f518cd5a0 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -78,6 +78,7 @@ unsigned long *page_table_alloc(int noexec) clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE); page->index = (addr_t) table; } + pgtable_page_ctor(page); table = (unsigned long *) page_to_phys(page); clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE); return table; @@ -87,6 +88,7 @@ void page_table_free(unsigned long *table) { unsigned long *shadow = get_shadow_pte(table); + pgtable_page_dtor(virt_to_page(table)); if (shadow) free_page((unsigned long) shadow); free_page((unsigned long) table); diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c index dc98e3844a0a..23d3291a3e81 100644 --- a/arch/sparc/mm/srmmu.c +++ b/arch/sparc/mm/srmmu.c @@ -489,14 +489,17 @@ srmmu_pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) return (pte_t *)srmmu_get_nocache(PTE_SIZE, PTE_SIZE); } -static struct page * +static pgtable_t srmmu_pte_alloc_one(struct mm_struct *mm, unsigned long address) { unsigned long pte; + struct page *page; if ((pte = (unsigned long)srmmu_pte_alloc_one_kernel(mm, address)) == 0) return NULL; - return pfn_to_page( __nocache_pa(pte) >> PAGE_SHIFT ); + page = pfn_to_page( __nocache_pa(pte) >> PAGE_SHIFT ); + pgtable_page_ctor(page); + return page; } static void srmmu_free_pte_fast(pte_t *pte) @@ -504,10 +507,11 @@ static void srmmu_free_pte_fast(pte_t *pte) srmmu_free_nocache((unsigned long)pte, PTE_SIZE); } -static void srmmu_pte_free(struct page *pte) +static void srmmu_pte_free(pgtable_t pte) { unsigned long p; + pgtable_page_dtor(pte); p = (unsigned long)page_address(pte); /* Cached address (for test) */ if (p == 0) BUG(); diff --git a/arch/sparc/mm/sun4c.c b/arch/sparc/mm/sun4c.c index 0729305f2f59..c0442e8c4b15 100644 --- a/arch/sparc/mm/sun4c.c +++ b/arch/sparc/mm/sun4c.c @@ -1947,12 +1947,17 @@ static pte_t *sun4c_pte_alloc_one_kernel(struct mm_struct *mm, unsigned long add return pte; } -static struct page *sun4c_pte_alloc_one(struct mm_struct *mm, unsigned long address) +static pgtable_t sun4c_pte_alloc_one(struct mm_struct *mm, unsigned long address) { - pte_t *pte = sun4c_pte_alloc_one_kernel(mm, address); + pte_t *pte; + struct page *page; + + pte = sun4c_pte_alloc_one_kernel(mm, address); if (pte == NULL) return NULL; - return virt_to_page(pte); + page = virt_to_page(pte); + pgtable_page_ctor(page); + return page; } static inline void sun4c_free_pte_fast(pte_t *pte) @@ -1962,8 +1967,9 @@ static inline void sun4c_free_pte_fast(pte_t *pte) pgtable_cache_size++; } -static void sun4c_pte_free(struct page *pte) +static void sun4c_pte_free(pgtable_t pte) { + pgtable_page_dtor(pte); sun4c_free_pte_fast(page_address(pte)); } diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c index d872fdce1d7e..2627ce82e918 100644 --- a/arch/um/kernel/mem.c +++ b/arch/um/kernel/mem.c @@ -354,11 +354,13 @@ pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) return pte; } -struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) +pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) { struct page *pte; pte = alloc_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); + if (pte) + pgtable_page_ctor(pte); return pte; } diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c index 6c1914622a88..73aba7125203 100644 --- a/arch/x86/mm/pgtable_32.c +++ b/arch/x86/mm/pgtable_32.c @@ -183,7 +183,7 @@ pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) return (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); } -struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) +pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) { struct page *pte; @@ -192,6 +192,8 @@ struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) #else pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0); #endif + if (pte) + pgtable_page_ctor(pte); return pte; } @@ -365,6 +367,7 @@ void check_pgt_cache(void) void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte) { + pgtable_page_dtor(pte); paravirt_release_pt(page_to_pfn(pte)); tlb_remove_page(tlb, pte); } diff --git a/include/asm-alpha/page.h b/include/asm-alpha/page.h index 05f09f997d82..22ff9762d17b 100644 --- a/include/asm-alpha/page.h +++ b/include/asm-alpha/page.h @@ -62,6 +62,8 @@ typedef unsigned long pgprot_t; #endif /* STRICT_MM_TYPECHECKS */ +typedef struct page *pgtable_t; + #ifdef USE_48_BIT_KSEG #define PAGE_OFFSET 0xffff800000000000UL #else diff --git a/include/asm-alpha/pgalloc.h b/include/asm-alpha/pgalloc.h index fdbedacc7375..fd090155dccd 100644 --- a/include/asm-alpha/pgalloc.h +++ b/include/asm-alpha/pgalloc.h @@ -11,10 +11,11 @@ */ static inline void -pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *pte) +pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t pte) { pmd_set(pmd, (pte_t *)(page_to_pa(pte) + PAGE_OFFSET)); } +#define pmd_pgtable(pmd) pmd_page(pmd) static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte) @@ -57,18 +58,23 @@ pte_free_kernel(struct mm_struct *mm, pte_t *pte) free_page((unsigned long)pte); } -static inline struct page * -pte_alloc_one(struct mm_struct *mm, unsigned long addr) +static inline pgtable_t +pte_alloc_one(struct mm_struct *mm, unsigned long address) { - pte_t *pte = pte_alloc_one_kernel(mm, addr); - if (pte) - return virt_to_page(pte); - return NULL; + pte_t *pte = pte_alloc_one_kernel(mm, address); + struct page *page; + + if (!pte) + return NULL; + page = virt_to_page(pte); + pgtable_page_ctor(page); + return page; } static inline void -pte_free(struct mm_struct *mm, struct page *page) +pte_free(struct mm_struct *mm, pgtable_t page) { + pgtable_page_dtor(page); __free_page(page); } diff --git a/include/asm-arm/page.h b/include/asm-arm/page.h index 31ff12f4ffb7..c86f68ee6511 100644 --- a/include/asm-arm/page.h +++ b/include/asm-arm/page.h @@ -171,6 +171,8 @@ typedef unsigned long pgprot_t; #endif /* STRICT_MM_TYPECHECKS */ +typedef struct page *pgtable_t; + #endif /* CONFIG_MMU */ #include diff --git a/include/asm-arm/pgalloc.h b/include/asm-arm/pgalloc.h index fb6c6e3222bd..163b0305dd76 100644 --- a/include/asm-arm/pgalloc.h +++ b/include/asm-arm/pgalloc.h @@ -66,7 +66,7 @@ pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr) return pte; } -static inline struct page * +static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr) { struct page *pte; @@ -75,6 +75,7 @@ pte_alloc_one(struct mm_struct *mm, unsigned long addr) if (pte) { void *page = page_address(pte); clean_dcache_area(page, sizeof(pte_t) * PTRS_PER_PTE); + pgtable_page_ctor(pte); } return pte; @@ -91,8 +92,9 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) } } -static inline void pte_free(struct mm_struct *mm, struct page *pte) +static inline void pte_free(struct mm_struct *mm, pgtable_t pte) { + pgtable_page_dtor(pte); __free_page(pte); } @@ -123,10 +125,11 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp, pte_t *ptep) } static inline void -pmd_populate(struct mm_struct *mm, pmd_t *pmdp, struct page *ptep) +pmd_populate(struct mm_struct *mm, pmd_t *pmdp, pgtable_t ptep) { __pmd_populate(pmdp, page_to_pfn(ptep) << PAGE_SHIFT | _PAGE_USER_TABLE); } +#define pmd_pgtable(pmd) pmd_page(pmd) #endif /* CONFIG_MMU */ diff --git a/include/asm-avr32/page.h b/include/asm-avr32/page.h index ee23499cec34..5582968feee8 100644 --- a/include/asm-avr32/page.h +++ b/include/asm-avr32/page.h @@ -34,6 +34,7 @@ extern void copy_page(void *to, void *from); typedef struct { unsigned long pte; } pte_t; typedef struct { unsigned long pgd; } pgd_t; typedef struct { unsigned long pgprot; } pgprot_t; +typedef struct page *pgtable_t; #define pte_val(x) ((x).pte) #define pgd_val(x) ((x).pgd) diff --git a/include/asm-avr32/pgalloc.h b/include/asm-avr32/pgalloc.h index b77e364b4c44..51fc1f6e4b17 100644 --- a/include/asm-avr32/pgalloc.h +++ b/include/asm-avr32/pgalloc.h @@ -17,10 +17,11 @@ set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte))) static __inline__ void pmd_populate(struct mm_struct *mm, pmd_t *pmd, - struct page *pte) + pgtable_t pte) { set_pmd(pmd, __pmd(_PAGE_TABLE + page_to_phys(pte))); } +#define pmd_pgtable(pmd) pmd_page(pmd) /* * Allocate and free page tables @@ -51,7 +52,9 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm, struct page *pte; pte = alloc_page(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO); - + if (!pte) + return NULL; + pgtable_page_ctor(pte); return pte; } @@ -60,12 +63,17 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) free_page((unsigned long)pte); } -static inline void pte_free(struct mm_struct *mm, struct page *pte) +static inline void pte_free(struct mm_struct *mm, pgtable_t pte) { + pgtable_page_dtor(pte); __free_page(pte); } -#define __pte_free_tlb(tlb,pte) tlb_remove_page((tlb),(pte)) +#define __pte_free_tlb(tlb,pte) \ +do { \ + pgtable_page_dtor(pte); \ + tlb_remove_page((tlb), pte); \ +} while (0) #define check_pgt_cache() do { } while(0) diff --git a/include/asm-cris/page.h b/include/asm-cris/page.h index 3b0156c46311..c45bb1ef397c 100644 --- a/include/asm-cris/page.h +++ b/include/asm-cris/page.h @@ -26,6 +26,7 @@ typedef struct { unsigned long pte; } pte_t; typedef struct { unsigned long pgd; } pgd_t; typedef struct { unsigned long pgprot; } pgprot_t; +typedef struct page *pgtable_t; #endif #define pte_val(x) ((x).pte) diff --git a/include/asm-cris/pgalloc.h b/include/asm-cris/pgalloc.h index 8ddd66f81773..a1ba761d0573 100644 --- a/include/asm-cris/pgalloc.h +++ b/include/asm-cris/pgalloc.h @@ -6,6 +6,7 @@ #define pmd_populate_kernel(mm, pmd, pte) pmd_set(pmd, pte) #define pmd_populate(mm, pmd, pte) pmd_set(pmd, page_address(pte)) +#define pmd_pgtable(pmd) pmd_page(pmd) /* * Allocate and free page tables. @@ -27,10 +28,11 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long ad return pte; } -static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) +static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) { struct page *pte; pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0); + pgtable_page_ctor(pte); return pte; } @@ -39,13 +41,17 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) free_page((unsigned long)pte); } -static inline void pte_free(struct mm_struct *mm, struct page *pte) +static inline void pte_free(struct mm_struct *mm, pgtable_t pte) { + pgtable_page_dtor(pte); __free_page(pte); } - -#define __pte_free_tlb(tlb,pte) tlb_remove_page((tlb),(pte)) +#define __pte_free_tlb(tlb,pte) \ +do { \ + pgtable_page_dtor(pte); \ + tlb_remove_page((tlb), pte); \ +} while (0) #define check_pgt_cache() do { } while (0) diff --git a/include/asm-frv/page.h b/include/asm-frv/page.h index cacc045700de..c2c1e89e747d 100644 --- a/include/asm-frv/page.h +++ b/include/asm-frv/page.h @@ -25,6 +25,7 @@ typedef struct { unsigned long ste[64];} pmd_t; typedef struct { pmd_t pue[1]; } pud_t; typedef struct { pud_t pge[1]; } pgd_t; typedef struct { unsigned long pgprot; } pgprot_t; +typedef struct page *pgtable_t; #define pte_val(x) ((x).pte) #define pmd_val(x) ((x).ste[0]) diff --git a/include/asm-frv/pgalloc.h b/include/asm-frv/pgalloc.h index e89620ef08ca..971e6addb009 100644 --- a/include/asm-frv/pgalloc.h +++ b/include/asm-frv/pgalloc.h @@ -25,6 +25,7 @@ do { \ __set_pmd((PMD), page_to_pfn(PAGE) << PAGE_SHIFT | _PAGE_TABLE); \ } while(0) +#define pmd_pgtable(pmd) pmd_page(pmd) /* * Allocate and free page tables. @@ -35,19 +36,24 @@ extern void pgd_free(struct mm_struct *mm, pgd_t *); extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long); -extern struct page *pte_alloc_one(struct mm_struct *, unsigned long); +extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long); static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) { free_page((unsigned long)pte); } -static inline void pte_free(struct mm_struct *mm, struct page *pte) +static inline void pte_free(struct mm_struct *mm, pgtable_t pte) { + pgtable_page_dtor(pte); __free_page(pte); } -#define __pte_free_tlb(tlb,pte) tlb_remove_page((tlb),(pte)) +#define __pte_free_tlb(tlb,pte) \ +do { \ + pgtable_page_dtor(pte); \ + tlb_remove_page((tlb),(pte)); \ +} while (0) /* * allocating and freeing a pmd is trivial: the 1-entry pmd is diff --git a/include/asm-ia64/page.h b/include/asm-ia64/page.h index 8a8aa3fd7cd4..4999a6c63775 100644 --- a/include/asm-ia64/page.h +++ b/include/asm-ia64/page.h @@ -185,6 +185,7 @@ get_order (unsigned long size) #endif typedef struct { unsigned long pgd; } pgd_t; typedef struct { unsigned long pgprot; } pgprot_t; + typedef struct page *pgtable_t; # define pte_val(x) ((x).pte) # define pmd_val(x) ((x).pmd) @@ -206,6 +207,7 @@ get_order (unsigned long size) typedef unsigned long pmd_t; typedef unsigned long pgd_t; typedef unsigned long pgprot_t; + typedef struct page *pgtable_t; # endif # define pte_val(x) (x) diff --git a/include/asm-ia64/pgalloc.h b/include/asm-ia64/pgalloc.h index 556d988123ac..b9ac1a6fc216 100644 --- a/include/asm-ia64/pgalloc.h +++ b/include/asm-ia64/pgalloc.h @@ -70,10 +70,11 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) #define __pmd_free_tlb(tlb, pmd) pmd_free((tlb)->mm, pmd) static inline void -pmd_populate(struct mm_struct *mm, pmd_t * pmd_entry, struct page *pte) +pmd_populate(struct mm_struct *mm, pmd_t * pmd_entry, pgtable_t pte) { pmd_val(*pmd_entry) = page_to_phys(pte); } +#define pmd_pgtable(pmd) pmd_page(pmd) static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t * pmd_entry, pte_t * pte) @@ -81,11 +82,17 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t * pmd_entry, pte_t * pte) pmd_val(*pmd_entry) = __pa(pte); } -static inline struct page *pte_alloc_one(struct mm_struct *mm, - unsigned long addr) +static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr) { - void *pg = quicklist_alloc(0, GFP_KERNEL, NULL); - return pg ? virt_to_page(pg) : NULL; + struct page *page; + void *pg; + + pg = quicklist_alloc(0, GFP_KERNEL, NULL); + if (!pg) + return NULL; + page = virt_to_page(pg); + pgtable_page_ctor(page); + return page; } static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, @@ -94,8 +101,9 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, return quicklist_alloc(0, GFP_KERNEL, NULL); } -static inline void pte_free(struct mm_struct *mm, struct page *pte) +static inline void pte_free(struct mm_struct *mm, pgtable_t pte) { + pgtable_page_dtor(pte); quicklist_free_page(0, NULL, pte); } diff --git a/include/asm-m32r/page.h b/include/asm-m32r/page.h index 05d43bbbf940..8a677f3fca68 100644 --- a/include/asm-m32r/page.h +++ b/include/asm-m32r/page.h @@ -28,6 +28,7 @@ typedef struct { unsigned long pgd; } pgd_t; #define PTE_MASK PAGE_MASK typedef struct { unsigned long pgprot; } pgprot_t; +typedef struct page *pgtable_t; #define pmd_val(x) ((x).pmd) #define pgd_val(x) ((x).pgd) diff --git a/include/asm-m32r/pgalloc.h b/include/asm-m32r/pgalloc.h index e5921adfad1b..f11a2b909cdb 100644 --- a/include/asm-m32r/pgalloc.h +++ b/include/asm-m32r/pgalloc.h @@ -9,10 +9,11 @@ set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte))) static __inline__ void pmd_populate(struct mm_struct *mm, pmd_t *pmd, - struct page *pte) + pgtable_t pte) { set_pmd(pmd, __pmd(_PAGE_TABLE + page_to_phys(pte))); } +#define pmd_pgtable(pmd) pmd_page(pmd) /* * Allocate and free page tables. @@ -37,12 +38,12 @@ static __inline__ pte_t *pte_alloc_one_kernel(struct mm_struct *mm, return pte; } -static __inline__ struct page *pte_alloc_one(struct mm_struct *mm, +static __inline__ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) { struct page *pte = alloc_page(GFP_KERNEL|__GFP_ZERO); - + pgtable_page_ctor(pte); return pte; } @@ -51,8 +52,9 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) free_page((unsigned long)pte); } -static inline void pte_free(struct mm_struct *mm, struct page *pte) +static inline void pte_free(struct mm_struct *mm, pgtable_t pte) { + pgtable_page_dtor(pte); __free_page(pte); } diff --git a/include/asm-m68k/motorola_pgalloc.h b/include/asm-m68k/motorola_pgalloc.h index 500ec9b8b189..d08bf6261df8 100644 --- a/include/asm-m68k/motorola_pgalloc.h +++ b/include/asm-m68k/motorola_pgalloc.h @@ -7,7 +7,6 @@ extern pmd_t *get_pointer_table(void); extern int free_pointer_table(pmd_t *); - static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { pte_t *pte; @@ -28,7 +27,7 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) free_page((unsigned long) pte); } -static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) +static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) { struct page *page = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0); pte_t *pte; @@ -43,19 +42,21 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long add nocache_page(pte); } kunmap(pte); - + pgtable_page_ctor(page); return page; } -static inline void pte_free(struct mm_struct *mm, struct page *page) +static inline void pte_free(struct mm_struct *mm, pgtable_t page) { + pgtable_page_dtor(page); cache_page(kmap(page)); kunmap(page); __free_page(page); } -static inline void __pte_free_tlb(struct mmu_gather *tlb, struct page *page) +static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t page) { + pgtable_page_dtor(page); cache_page(kmap(page)); kunmap(page); __free_page(page); @@ -94,10 +95,11 @@ static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t * pmd_set(pmd, pte); } -static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *page) +static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t page) { pmd_set(pmd, page_address(page)); } +#define pmd_pgtable(pmd) pmd_page(pmd) static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmd) { diff --git a/include/asm-m68k/page.h b/include/asm-m68k/page.h index 3f29e2a03a43..880c2cbff8a6 100644 --- a/include/asm-m68k/page.h +++ b/include/asm-m68k/page.h @@ -91,6 +91,7 @@ typedef struct { unsigned long pte; } pte_t; typedef struct { unsigned long pmd[16]; } pmd_t; typedef struct { unsigned long pgd; } pgd_t; typedef struct { unsigned long pgprot; } pgprot_t; +typedef struct page *pgtable_t; #define pte_val(x) ((x).pte) #define pmd_val(x) ((&x)->pmd[0]) diff --git a/include/asm-m68k/sun3_pgalloc.h b/include/asm-m68k/sun3_pgalloc.h index a5a91e72714b..d4c83f143816 100644 --- a/include/asm-m68k/sun3_pgalloc.h +++ b/include/asm-m68k/sun3_pgalloc.h @@ -26,12 +26,17 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) free_page((unsigned long) pte); } -static inline void pte_free(struct mm_struct *mm, struct page *page) +static inline void pte_free(struct mm_struct *mm, pgtable_t page) { + pgtable_page_dtor(page); __free_page(page); } -#define __pte_free_tlb(tlb,pte) tlb_remove_page((tlb),(pte)) +#define __pte_free_tlb(tlb,pte) \ +do { \ + pgtable_page_dtor(pte); \ + tlb_remove_page((tlb), pte); \ +} while (0) static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) @@ -45,8 +50,8 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, return (pte_t *) (page); } -static inline struct page *pte_alloc_one(struct mm_struct *mm, - unsigned long address) +static inline pgtable_t pte_alloc_one(struct mm_struct *mm, + unsigned long address) { struct page *page = alloc_pages(GFP_KERNEL|__GFP_REPEAT, 0); @@ -54,6 +59,7 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm, return NULL; clear_highpage(page); + pgtable_page_ctor(page); return page; } @@ -63,10 +69,11 @@ static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t * pmd_val(*pmd) = __pa((unsigned long)pte); } -static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *page) +static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t page) { pmd_val(*pmd) = __pa((unsigned long)page_address(page)); } +#define pmd_pgtable(pmd) pmd_page(pmd) /* * allocating and freeing a pmd is trivial: the 1-entry pmd is diff --git a/include/asm-mips/page.h b/include/asm-mips/page.h index 635aa44d2290..8735aa0b8963 100644 --- a/include/asm-mips/page.h +++ b/include/asm-mips/page.h @@ -90,6 +90,7 @@ typedef struct { unsigned long pte; } pte_t; #define pte_val(x) ((x).pte) #define __pte(x) ((pte_t) { (x) } ) #endif +typedef struct page *pgtable_t; /* * For 3-level pagetables we defines these ourselves, for 2-level the diff --git a/include/asm-mips/pgalloc.h b/include/asm-mips/pgalloc.h index c4efeced8396..1275831dda29 100644 --- a/include/asm-mips/pgalloc.h +++ b/include/asm-mips/pgalloc.h @@ -20,10 +20,11 @@ static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, } static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, - struct page *pte) + pgtable_t pte) { set_pmd(pmd, __pmd((unsigned long)page_address(pte))); } +#define pmd_pgtable(pmd) pmd_page(pmd) /* * Initialize a new pmd table with invalid pointers. @@ -79,9 +80,10 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm, struct page *pte; pte = alloc_pages(GFP_KERNEL | __GFP_REPEAT, PTE_ORDER); - if (pte) + if (pte) { clear_highpage(pte); - + pgtable_page_ctor(pte); + } return pte; } @@ -90,12 +92,17 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) free_pages((unsigned long)pte, PTE_ORDER); } -static inline void pte_free(struct mm_struct *mm, struct page *pte) +static inline void pte_free(struct mm_struct *mm, pgtable_t pte) { + pgtable_page_dtor(pte); __free_pages(pte, PTE_ORDER); } -#define __pte_free_tlb(tlb, pte) tlb_remove_page((tlb), (pte)) +#define __pte_free_tlb(tlb,pte) \ +do { \ + pgtable_page_dtor(pte); \ + tlb_remove_page((tlb), pte); \ +} while (0) #ifdef CONFIG_32BIT diff --git a/include/asm-parisc/page.h b/include/asm-parisc/page.h index b08d9151c71e..27d50b859541 100644 --- a/include/asm-parisc/page.h +++ b/include/asm-parisc/page.h @@ -91,6 +91,7 @@ typedef unsigned long pgprot_t; #endif /* STRICT_MM_TYPECHECKS */ +typedef struct page *pgtable_t; typedef struct __physmem_range { unsigned long start_pfn; diff --git a/include/asm-parisc/pgalloc.h b/include/asm-parisc/pgalloc.h index aab66f1bea14..3996dfc30a3f 100644 --- a/include/asm-parisc/pgalloc.h +++ b/include/asm-parisc/pgalloc.h @@ -115,11 +115,14 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte) #define pmd_populate(mm, pmd, pte_page) \ pmd_populate_kernel(mm, pmd, page_address(pte_page)) +#define pmd_pgtable(pmd) pmd_page(pmd) -static inline struct page * +static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) { struct page *page = alloc_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); + if (page) + pgtable_page_ctor(page); return page; } @@ -135,7 +138,11 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) free_page((unsigned long)pte); } -#define pte_free(mm, page) pte_free_kernel(page_address(page)) +static inline void pte_free_kernel(struct mm_struct *mm, struct page *pte) +{ + pgtable_page_dtor(pte); + pte_free_kernel(page_address((pte)); +} #define check_pgt_cache() do { } while (0) diff --git a/include/asm-powerpc/page.h b/include/asm-powerpc/page.h index 61e3725bbd37..df47bbb6ea13 100644 --- a/include/asm-powerpc/page.h +++ b/include/asm-powerpc/page.h @@ -190,6 +190,8 @@ extern int page_is_ram(unsigned long pfn); struct vm_area_struct; +typedef struct page *pgtable_t; + #include #endif /* __ASSEMBLY__ */ diff --git a/include/asm-powerpc/pgalloc-32.h b/include/asm-powerpc/pgalloc-32.h index c162a4c37b39..58c07147b3ea 100644 --- a/include/asm-powerpc/pgalloc-32.h +++ b/include/asm-powerpc/pgalloc-32.h @@ -22,17 +22,19 @@ extern void pgd_free(struct mm_struct *mm, pgd_t *pgd); (pmd_val(*(pmd)) = __pa(pte) | _PMD_PRESENT) #define pmd_populate(mm, pmd, pte) \ (pmd_val(*(pmd)) = (page_to_pfn(pte) << PAGE_SHIFT) | _PMD_PRESENT) +#define pmd_pgtable(pmd) pmd_page(pmd) #else #define pmd_populate_kernel(mm, pmd, pte) \ (pmd_val(*(pmd)) = (unsigned long)pte | _PMD_PRESENT) #define pmd_populate(mm, pmd, pte) \ (pmd_val(*(pmd)) = (unsigned long)lowmem_page_address(pte) | _PMD_PRESENT) +#define pmd_pgtable(pmd) pmd_page(pmd) #endif extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr); -extern struct page *pte_alloc_one(struct mm_struct *mm, unsigned long addr); +extern pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr); extern void pte_free_kernel(struct mm_struct *mm, pte_t *pte); -extern void pte_free(struct mm_struct *mm, struct page *pte); +extern void pte_free(struct mm_struct *mm, pgtable_t pte); #define __pte_free_tlb(tlb, pte) pte_free((tlb)->mm, (pte)) diff --git a/include/asm-powerpc/pgalloc-64.h b/include/asm-powerpc/pgalloc-64.h index 5afae8593931..68980990f62a 100644 --- a/include/asm-powerpc/pgalloc-64.h +++ b/include/asm-powerpc/pgalloc-64.h @@ -58,6 +58,7 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) #define pmd_populate(mm, pmd, pte_page) \ pmd_populate_kernel(mm, pmd, page_address(pte_page)) #define pmd_populate_kernel(mm, pmd, pte) pmd_set(pmd, (unsigned long)(pte)) +#define pmd_pgtable(pmd) pmd_page(pmd) #else /* CONFIG_PPC_64K_PAGES */ @@ -72,6 +73,7 @@ static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, #define pmd_populate(mm, pmd, pte_page) \ pmd_populate_kernel(mm, pmd, page_address(pte_page)) +#define pmd_pgtable(pmd) pmd_page(pmd) #endif /* CONFIG_PPC_64K_PAGES */ @@ -92,11 +94,18 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO); } -static inline struct page *pte_alloc_one(struct mm_struct *mm, - unsigned long address) +static inline pgtable_t pte_alloc_one(struct mm_struct *mm, + unsigned long address) { - pte_t *pte = pte_alloc_one_kernel(mm, address); - return pte ? virt_to_page(pte) : NULL; + struct page *page; + pte_t *pte; + + pte = pte_alloc_one_kernel(mm, address); + if (!pte) + return NULL; + page = virt_to_page(pte); + pgtable_page_ctor(page); + return page; } static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) @@ -104,8 +113,9 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) free_page((unsigned long)pte); } -static inline void pte_free(struct mm_struct *mm, struct page *ptepage) +static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage) { + pgtable_page_dtor(ptepage); __free_page(ptepage); } @@ -136,9 +146,12 @@ static inline void pgtable_free(pgtable_free_t pgf) extern void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf); -#define __pte_free_tlb(tlb, ptepage) \ +#define __pte_free_tlb(tlb,ptepage) \ +do { \ + pgtable_page_dtor(ptepage); \ pgtable_free_tlb(tlb, pgtable_free_cache(page_address(ptepage), \ - PTE_NONCACHE_NUM, PTE_TABLE_SIZE-1)) + PTE_NONCACHE_NUM, PTE_TABLE_SIZE-1)); \ +} while (0) #define __pmd_free_tlb(tlb, pmd) \ pgtable_free_tlb(tlb, pgtable_free_cache(pmd, \ PMD_CACHE_NUM, PMD_TABLE_SIZE-1)) diff --git a/include/asm-ppc/pgalloc.h b/include/asm-ppc/pgalloc.h index 7c39a95829c7..fd4d1d74cfb1 100644 --- a/include/asm-ppc/pgalloc.h +++ b/include/asm-ppc/pgalloc.h @@ -23,17 +23,19 @@ extern void pgd_free(struct mm_struct *mm, pgd_t *pgd); (pmd_val(*(pmd)) = __pa(pte) | _PMD_PRESENT) #define pmd_populate(mm, pmd, pte) \ (pmd_val(*(pmd)) = (page_to_pfn(pte) << PAGE_SHIFT) | _PMD_PRESENT) +#define pmd_pgtable(pmd) pmd_page(pmd) #else #define pmd_populate_kernel(mm, pmd, pte) \ (pmd_val(*(pmd)) = (unsigned long)pte | _PMD_PRESENT) #define pmd_populate(mm, pmd, pte) \ (pmd_val(*(pmd)) = (unsigned long)lowmem_page_address(pte) | _PMD_PRESENT) +#define pmd_pgtable(pmd) pmd_page(pmd) #endif extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr); -extern struct page *pte_alloc_one(struct mm_struct *mm, unsigned long addr); +extern pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr); extern void pte_free_kernel(struct mm_struct *mm, pte_t *pte); -extern void pte_free(struct mm_struct *mm, struct page *pte); +extern void pte_free(struct mm_struct *mm, pgtable_t pte); #define __pte_free_tlb(tlb, pte) pte_free((tlb)->mm, (pte)) diff --git a/include/asm-s390/page.h b/include/asm-s390/page.h index a55f9d979dfb..7f29a981f48c 100644 --- a/include/asm-s390/page.h +++ b/include/asm-s390/page.h @@ -109,6 +109,8 @@ typedef struct { unsigned long pgd; } pgd_t; #endif /* __s390x__ */ +typedef struct page *pgtable_t; + #define __pte(x) ((pte_t) { (x) } ) #define __pmd(x) ((pmd_t) { (x) } ) #define __pgd(x) ((pgd_t) { (x) } ) diff --git a/include/asm-s390/pgalloc.h b/include/asm-s390/pgalloc.h index 6f6619ba8980..900d44807e10 100644 --- a/include/asm-s390/pgalloc.h +++ b/include/asm-s390/pgalloc.h @@ -132,7 +132,7 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte) } static inline void -pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *page) +pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t page) { pte_t *pte = (pte_t *)page_to_phys(page); pmd_t *shadow_pmd = get_shadow_table(pmd); @@ -142,6 +142,7 @@ pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *page) if (shadow_pmd && shadow_pte) pmd_populate_kernel(mm, shadow_pmd, shadow_pte); } +#define pmd_pgtable(pmd) pmd_page(pmd) /* * page table entry allocation/free routines. diff --git a/include/asm-s390/tlb.h b/include/asm-s390/tlb.h index 985de2b88279..3c8177fa9e06 100644 --- a/include/asm-s390/tlb.h +++ b/include/asm-s390/tlb.h @@ -95,7 +95,7 @@ static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) * pte_free_tlb frees a pte table and clears the CRSTE for the * page table from the tlb. */ -static inline void pte_free_tlb(struct mmu_gather *tlb, struct page *page) +static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t page) { if (!tlb->fullmm) { tlb->array[tlb->nr_ptes++] = page; diff --git a/include/asm-sh/page.h b/include/asm-sh/page.h index e0fe02950f52..134562dc8c45 100644 --- a/include/asm-sh/page.h +++ b/include/asm-sh/page.h @@ -100,6 +100,8 @@ typedef struct { unsigned long pgd; } pgd_t; #define __pgd(x) ((pgd_t) { (x) } ) #define __pgprot(x) ((pgprot_t) { (x) } ) +typedef struct page *pgtable_t; + #endif /* !__ASSEMBLY__ */ /* diff --git a/include/asm-sh/pgalloc.h b/include/asm-sh/pgalloc.h index 59ca16d77a1d..84dd2db7104c 100644 --- a/include/asm-sh/pgalloc.h +++ b/include/asm-sh/pgalloc.h @@ -14,10 +14,11 @@ static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, } static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, - struct page *pte) + pgtable_t pte) { set_pmd(pmd, __pmd((unsigned long)page_address(pte))); } +#define pmd_pgtable(pmd) pmd_page(pmd) static inline void pgd_ctor(void *x) { @@ -47,11 +48,18 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, return quicklist_alloc(QUICK_PT, GFP_KERNEL | __GFP_REPEAT, NULL); } -static inline struct page *pte_alloc_one(struct mm_struct *mm, - unsigned long address) +static inline pgtable_t pte_alloc_one(struct mm_struct *mm, + unsigned long address) { - void *pg = quicklist_alloc(QUICK_PT, GFP_KERNEL | __GFP_REPEAT, NULL); - return pg ? virt_to_page(pg) : NULL; + struct page *page; + void *pg; + + pg = quicklist_alloc(QUICK_PT, GFP_KERNEL | __GFP_REPEAT, NULL); + if (!pg) + return NULL; + page = virt_to_page(pg); + pgtable_page_ctor(page); + return page; } static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) @@ -59,12 +67,17 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) quicklist_free(QUICK_PT, NULL, pte); } -static inline void pte_free(struct mm_struct *mm, struct page *pte) +static inline void pte_free(struct mm_struct *mm, pgtable_t pte) { + pgtable_page_dtor(pte); quicklist_free_page(QUICK_PT, NULL, pte); } -#define __pte_free_tlb(tlb,pte) tlb_remove_page((tlb),(pte)) +#define __pte_free_tlb(tlb,pte) \ +do { \ + pgtable_page_dtor(pte); \ + tlb_remove_page((tlb), (pte)); \ +} while (0) /* * allocating and freeing a pmd is trivial: the 1-entry pmd is diff --git a/include/asm-sparc/page.h b/include/asm-sparc/page.h index cbc48c0c4e15..39ccf2da297c 100644 --- a/include/asm-sparc/page.h +++ b/include/asm-sparc/page.h @@ -123,6 +123,8 @@ typedef unsigned long iopgprot_t; #endif +typedef struct page *pgtable_t; + extern unsigned long sparc_unmapped_base; BTFIXUPDEF_SETHI(sparc_unmapped_base) diff --git a/include/asm-sparc/pgalloc.h b/include/asm-sparc/pgalloc.h index b5fbdd36447f..6292cd00e5af 100644 --- a/include/asm-sparc/pgalloc.h +++ b/include/asm-sparc/pgalloc.h @@ -50,10 +50,11 @@ BTFIXUPDEF_CALL(void, free_pmd_fast, pmd_t *) BTFIXUPDEF_CALL(void, pmd_populate, pmd_t *, struct page *) #define pmd_populate(MM, PMD, PTE) BTFIXUP_CALL(pmd_populate)(PMD, PTE) +#define pmd_pgtable(pmd) pmd_page(pmd) BTFIXUPDEF_CALL(void, pmd_set, pmd_t *, pte_t *) #define pmd_populate_kernel(MM, PMD, PTE) BTFIXUP_CALL(pmd_set)(PMD, PTE) -BTFIXUPDEF_CALL(struct page *, pte_alloc_one, struct mm_struct *, unsigned long) +BTFIXUPDEF_CALL(pgtable_t , pte_alloc_one, struct mm_struct *, unsigned long) #define pte_alloc_one(mm, address) BTFIXUP_CALL(pte_alloc_one)(mm, address) BTFIXUPDEF_CALL(pte_t *, pte_alloc_one_kernel, struct mm_struct *, unsigned long) #define pte_alloc_one_kernel(mm, addr) BTFIXUP_CALL(pte_alloc_one_kernel)(mm, addr) @@ -61,7 +62,7 @@ BTFIXUPDEF_CALL(pte_t *, pte_alloc_one_kernel, struct mm_struct *, unsigned long BTFIXUPDEF_CALL(void, free_pte_fast, pte_t *) #define pte_free_kernel(mm, pte) BTFIXUP_CALL(free_pte_fast)(pte) -BTFIXUPDEF_CALL(void, pte_free, struct page *) +BTFIXUPDEF_CALL(void, pte_free, pgtable_t ) #define pte_free(mm, pte) BTFIXUP_CALL(pte_free)(pte) #define __pte_free_tlb(tlb, pte) pte_free((tlb)->mm, pte) diff --git a/include/asm-sparc64/page.h b/include/asm-sparc64/page.h index cdf950e017ee..e93a482aa24a 100644 --- a/include/asm-sparc64/page.h +++ b/include/asm-sparc64/page.h @@ -104,6 +104,8 @@ typedef unsigned long pgprot_t; #endif /* (STRICT_MM_TYPECHECKS) */ +typedef struct page *pgtable_t; + #define TASK_UNMAPPED_BASE (test_thread_flag(TIF_32BIT) ? \ (_AC(0x0000000070000000,UL)) : \ (_AC(0xfffff80000000000,UL) + (1UL << 32UL))) diff --git a/include/asm-sparc64/pgalloc.h b/include/asm-sparc64/pgalloc.h index b48f73c2274e..3ee2d406373b 100644 --- a/include/asm-sparc64/pgalloc.h +++ b/include/asm-sparc64/pgalloc.h @@ -43,11 +43,18 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, return quicklist_alloc(0, GFP_KERNEL, NULL); } -static inline struct page *pte_alloc_one(struct mm_struct *mm, - unsigned long address) +static inline pgtable_t pte_alloc_one(struct mm_struct *mm, + unsigned long address) { - void *pg = quicklist_alloc(0, GFP_KERNEL, NULL); - return pg ? virt_to_page(pg) : NULL; + struct page *page; + void *pg; + + pg = quicklist_alloc(0, GFP_KERNEL, NULL); + if (!pg) + return NULL; + page = virt_to_page(pg); + pgtable_page_ctor(page); + return page; } static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) @@ -55,8 +62,9 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) quicklist_free(0, NULL, pte); } -static inline void pte_free(struct mm_struct *mm, struct page *ptepage) +static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage) { + pgtable_page_dtor(ptepage); quicklist_free_page(0, NULL, ptepage); } @@ -64,6 +72,7 @@ static inline void pte_free(struct mm_struct *mm, struct page *ptepage) #define pmd_populate_kernel(MM, PMD, PTE) pmd_set(PMD, PTE) #define pmd_populate(MM,PMD,PTE_PAGE) \ pmd_populate_kernel(MM,PMD,page_address(PTE_PAGE)) +#define pmd_pgtable(pmd) pmd_page(pmd) static inline void check_pgt_cache(void) { diff --git a/include/asm-um/page.h b/include/asm-um/page.h index fe2374d705d1..381f96b1c825 100644 --- a/include/asm-um/page.h +++ b/include/asm-um/page.h @@ -79,6 +79,8 @@ typedef unsigned long phys_t; typedef struct { unsigned long pgprot; } pgprot_t; +typedef struct page *pgtable_t; + #define pgd_val(x) ((x).pgd) #define pgprot_val(x) ((x).pgprot) diff --git a/include/asm-um/pgalloc.h b/include/asm-um/pgalloc.h index 4f3e62b02861..9062a6e72241 100644 --- a/include/asm-um/pgalloc.h +++ b/include/asm-um/pgalloc.h @@ -18,6 +18,7 @@ set_pmd(pmd, __pmd(_PAGE_TABLE + \ ((unsigned long long)page_to_pfn(pte) << \ (unsigned long long) PAGE_SHIFT))) +#define pmd_pgtable(pmd) pmd_page(pmd) /* * Allocate and free page tables. @@ -26,19 +27,24 @@ extern pgd_t *pgd_alloc(struct mm_struct *); extern void pgd_free(struct mm_struct *mm, pgd_t *pgd); extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long); -extern struct page *pte_alloc_one(struct mm_struct *, unsigned long); +extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long); static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) { free_page((unsigned long) pte); } -static inline void pte_free(struct mm_struct *mm, struct page *pte) +static inline void pte_free(struct mm_struct *mm, pgtable_t pte) { + pgtable_page_dtor(pte); __free_page(pte); } -#define __pte_free_tlb(tlb,pte) tlb_remove_page((tlb),(pte)) +#define __pte_free_tlb(tlb,pte) \ +do { \ + pgtable_page_dtor(pte); \ + tlb_remove_page((tlb),(pte)); \ +} while (0) #ifdef CONFIG_3_LEVEL_PGTABLES diff --git a/include/asm-x86/page_32.h b/include/asm-x86/page_32.h index a6fd10f230d2..ba715d9798b0 100644 --- a/include/asm-x86/page_32.h +++ b/include/asm-x86/page_32.h @@ -50,6 +50,8 @@ typedef unsigned long phys_addr_t; typedef union { pteval_t pte, pte_low; } pte_t; typedef pte_t boot_pte_t; +typedef struct page *pgtable_t; + #endif /* __ASSEMBLY__ */ #endif /* CONFIG_X86_PAE */ diff --git a/include/asm-x86/page_64.h b/include/asm-x86/page_64.h index dcf0c0746075..f7393bc516ef 100644 --- a/include/asm-x86/page_64.h +++ b/include/asm-x86/page_64.h @@ -71,6 +71,8 @@ typedef unsigned long pgdval_t; typedef unsigned long pgprotval_t; typedef unsigned long phys_addr_t; +typedef struct page *pgtable_t; + typedef struct { pteval_t pte; } pte_t; #define vmemmap ((struct page *)VMEMMAP_START) diff --git a/include/asm-x86/pgalloc_32.h b/include/asm-x86/pgalloc_32.h index bab12718a913..6bea6e5b5ee5 100644 --- a/include/asm-x86/pgalloc_32.h +++ b/include/asm-x86/pgalloc_32.h @@ -31,6 +31,7 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *p paravirt_alloc_pt(mm, pfn); set_pmd(pmd, __pmd(((pteval_t)pfn << PAGE_SHIFT) | _PAGE_TABLE)); } +#define pmd_pgtable(pmd) pmd_page(pmd) /* * Allocate and free page tables. @@ -39,15 +40,16 @@ extern pgd_t *pgd_alloc(struct mm_struct *); extern void pgd_free(struct mm_struct *mm, pgd_t *pgd); extern pte_t *pte_alloc_one_kernel(struct mm_struct *, unsigned long); -extern struct page *pte_alloc_one(struct mm_struct *, unsigned long); +extern pgtable_t pte_alloc_one(struct mm_struct *, unsigned long); static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) { free_page((unsigned long)pte); } -static inline void pte_free(struct mm_struct *mm, struct page *pte) +static inline void pte_free(struct mm_struct *mm, pgtable_t pte) { + pgtable_page_dtor(pte); __free_page(pte); } diff --git a/include/asm-x86/pgalloc_64.h b/include/asm-x86/pgalloc_64.h index 4f6220db22b1..8d6722320dcc 100644 --- a/include/asm-x86/pgalloc_64.h +++ b/include/asm-x86/pgalloc_64.h @@ -12,6 +12,8 @@ #define pgd_populate(mm, pgd, pud) \ set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(pud))) +#define pmd_pgtable(pmd) pmd_page(pmd) + static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *pte) { set_pmd(pmd, __pmd(_PAGE_TABLE | (page_to_pfn(pte) << PAGE_SHIFT))); @@ -91,12 +93,17 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long ad return (pte_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT); } -static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) +static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) { - void *p = (void *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT); + struct page *page; + void *p; + + p = (void *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT); if (!p) return NULL; - return virt_to_page(p); + page = virt_to_page(p); + pgtable_page_ctor(page); + return page; } /* Should really implement gc for free page table pages. This could be @@ -108,12 +115,17 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) free_page((unsigned long)pte); } -static inline void pte_free(struct mm_struct *mm, struct page *pte) +static inline void pte_free(struct mm_struct *mm, pgtable_t pte) { + pgtable_page_dtor(pte); __free_page(pte); } -#define __pte_free_tlb(tlb,pte) tlb_remove_page((tlb),(pte)) +#define __pte_free_tlb(tlb,pte) \ +do { \ + pgtable_page_dtor((pte)); \ + tlb_remove_page((tlb), (pte)); \ +} while (0) #define __pmd_free_tlb(tlb,x) tlb_remove_page((tlb),virt_to_page(x)) #define __pud_free_tlb(tlb,x) tlb_remove_page((tlb),virt_to_page(x)) diff --git a/include/asm-xtensa/page.h b/include/asm-xtensa/page.h index 1adedbf41d01..80a6ae0dd259 100644 --- a/include/asm-xtensa/page.h +++ b/include/asm-xtensa/page.h @@ -98,6 +98,7 @@ typedef struct { unsigned long pte; } pte_t; /* page table entry */ typedef struct { unsigned long pgd; } pgd_t; /* PGD table entry */ typedef struct { unsigned long pgprot; } pgprot_t; +typedef struct page *pgtable_t; #define pte_val(x) ((x).pte) #define pgd_val(x) ((x).pgd) diff --git a/include/asm-xtensa/pgalloc.h b/include/asm-xtensa/pgalloc.h index 1d51ba5463f9..8d1544eb461e 100644 --- a/include/asm-xtensa/pgalloc.h +++ b/include/asm-xtensa/pgalloc.h @@ -24,6 +24,7 @@ (pmd_val(*(pmdp)) = ((unsigned long)ptep)) #define pmd_populate(mm, pmdp, page) \ (pmd_val(*(pmdp)) = ((unsigned long)page_to_virt(page))) +#define pmd_pgtable(pmd) pmd_page(pmd) static inline pgd_t* pgd_alloc(struct mm_struct *mm) @@ -46,10 +47,14 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, return kmem_cache_alloc(pgtable_cache, GFP_KERNEL|__GFP_REPEAT); } -static inline struct page *pte_alloc_one(struct mm_struct *mm, - unsigned long addr) +static inline pte_token_t pte_alloc_one(struct mm_struct *mm, + unsigned long addr) { - return virt_to_page(pte_alloc_one_kernel(mm, addr)); + struct page *page; + + page = virt_to_page(pte_alloc_one_kernel(mm, addr)); + pgtable_page_ctor(page); + return page; } static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) @@ -57,10 +62,12 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) kmem_cache_free(pgtable_cache, pte); } -static inline void pte_free(struct mm_struct *mm, struct page *page) +static inline void pte_free(struct mm_struct *mm, pgtable_t pte) { - kmem_cache_free(pgtable_cache, page_address(page)); + pgtable_page_dtor(pte); + kmem_cache_free(pgtable_cache, page_address(pte)); } +#define pmd_pgtable(pmd) pmd_page(pmd) #endif /* __KERNEL__ */ #endif /* _XTENSA_PGALLOC_H */ diff --git a/include/linux/mm.h b/include/linux/mm.h index 89d7c691b93a..e8abb3814209 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -894,6 +894,18 @@ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long a #define pte_lockptr(mm, pmd) ({(void)(pmd); &(mm)->page_table_lock;}) #endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */ +static inline void pgtable_page_ctor(struct page *page) +{ + pte_lock_init(page); + inc_zone_page_state(page, NR_PAGETABLE); +} + +static inline void pgtable_page_dtor(struct page *page) +{ + pte_lock_deinit(page); + dec_zone_page_state(page, NR_PAGETABLE); +} + #define pte_offset_map_lock(mm, pmd, address, ptlp) \ ({ \ spinlock_t *__ptl = pte_lockptr(mm, pmd); \ @@ -1136,7 +1148,7 @@ struct page *follow_page(struct vm_area_struct *, unsigned long address, #define FOLL_GET 0x04 /* do get_page on page */ #define FOLL_ANON 0x08 /* give ZERO_PAGE if no pgtable */ -typedef int (*pte_fn_t)(pte_t *pte, struct page *pmd_page, unsigned long addr, +typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, void *data); extern int apply_to_page_range(struct mm_struct *mm, unsigned long address, unsigned long size, pte_fn_t fn, void *data); diff --git a/mm/memory.c b/mm/memory.c index 153a54b2013c..e5628a5fd678 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -134,11 +134,9 @@ void pmd_clear_bad(pmd_t *pmd) */ static void free_pte_range(struct mmu_gather *tlb, pmd_t *pmd) { - struct page *page = pmd_page(*pmd); + pgtable_t token = pmd_pgtable(*pmd); pmd_clear(pmd); - pte_lock_deinit(page); - pte_free_tlb(tlb, page); - dec_zone_page_state(page, NR_PAGETABLE); + pte_free_tlb(tlb, token); tlb->mm->nr_ptes--; } @@ -309,21 +307,19 @@ void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *vma, int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address) { - struct page *new = pte_alloc_one(mm, address); + pgtable_t new = pte_alloc_one(mm, address); if (!new) return -ENOMEM; - pte_lock_init(new); spin_lock(&mm->page_table_lock); - if (pmd_present(*pmd)) { /* Another has populated it */ - pte_lock_deinit(new); - pte_free(mm, new); - } else { + if (!pmd_present(*pmd)) { /* Has another populated it ? */ mm->nr_ptes++; - inc_zone_page_state(new, NR_PAGETABLE); pmd_populate(mm, pmd, new); + new = NULL; } spin_unlock(&mm->page_table_lock); + if (new) + pte_free(mm, new); return 0; } @@ -334,11 +330,13 @@ int __pte_alloc_kernel(pmd_t *pmd, unsigned long address) return -ENOMEM; spin_lock(&init_mm.page_table_lock); - if (pmd_present(*pmd)) /* Another has populated it */ - pte_free_kernel(&init_mm, new); - else + if (!pmd_present(*pmd)) { /* Has another populated it ? */ pmd_populate_kernel(&init_mm, pmd, new); + new = NULL; + } spin_unlock(&init_mm.page_table_lock); + if (new) + pte_free_kernel(&init_mm, new); return 0; } @@ -1390,7 +1388,7 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd, { pte_t *pte; int err; - struct page *pmd_page; + pgtable_t token; spinlock_t *uninitialized_var(ptl); pte = (mm == &init_mm) ? @@ -1401,10 +1399,10 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd, BUG_ON(pmd_huge(*pmd)); - pmd_page = pmd_page(*pmd); + token = pmd_pgtable(*pmd); do { - err = fn(pte, pmd_page, addr, data); + err = fn(pte, token, addr, data); if (err) break; } while (pte++, addr += PAGE_SIZE, addr != end); diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 0536dde139d1..950c0be9ca81 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -820,7 +820,7 @@ void __attribute__((weak)) vmalloc_sync_all(void) } -static int f(pte_t *pte, struct page *pmd_page, unsigned long addr, void *data) +static int f(pte_t *pte, pgtable_t table, unsigned long addr, void *data) { /* apply_to_page_range() does all the hard work. */ return 0; -- cgit v1.2.3 From da7bfc50f5cb54aeee8147dca0c1de9d487cb5e0 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Sat, 9 Feb 2008 23:24:08 +0100 Subject: x86: sparse warnings in pageattr.c Adjust the definition of lookup_address to take an unsigned long level argument. Adjust callers in xen/mmu.c that pass in a dummy variable. Signed-off-by: Harvey Harrison Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/mm/pageattr.c | 8 +++++--- arch/x86/xen/mmu.c | 6 +++--- include/asm-x86/pgtable.h | 2 +- 3 files changed, 9 insertions(+), 7 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 8493c855582b..eb2a54415a77 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -191,7 +191,7 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address) * or when the present bit is not set. Otherwise we would return a * pointer to a nonexisting mapping. */ -pte_t *lookup_address(unsigned long address, int *level) +pte_t *lookup_address(unsigned long address, unsigned int *level) { pgd_t *pgd = pgd_offset_k(address); pud_t *pud; @@ -255,7 +255,8 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, unsigned long nextpage_addr, numpages, pmask, psize, flags; pte_t new_pte, old_pte, *tmp; pgprot_t old_prot, new_prot; - int level, do_split = 1; + int do_split = 1; + unsigned int level; spin_lock_irqsave(&pgd_lock, flags); /* @@ -406,7 +407,8 @@ out_unlock: static int __change_page_attr(unsigned long address, struct cpa_data *cpa) { - int level, do_split, err; + int do_split, err; + unsigned int level; struct page *kpte_page; pte_t *kpte; diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 45aa771e73a9..0144395448ae 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -58,7 +58,7 @@ xmaddr_t arbitrary_virt_to_machine(unsigned long address) { - int level; + unsigned int level; pte_t *pte = lookup_address(address, &level); unsigned offset = address & PAGE_MASK; @@ -71,7 +71,7 @@ void make_lowmem_page_readonly(void *vaddr) { pte_t *pte, ptev; unsigned long address = (unsigned long)vaddr; - int level; + unsigned int level; pte = lookup_address(address, &level); BUG_ON(pte == NULL); @@ -86,7 +86,7 @@ void make_lowmem_page_readwrite(void *vaddr) { pte_t *pte, ptev; unsigned long address = (unsigned long)vaddr; - int level; + unsigned int level; pte = lookup_address(address, &level); BUG_ON(pte == NULL); diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h index 44c0a4f1b1eb..174b87738714 100644 --- a/include/asm-x86/pgtable.h +++ b/include/asm-x86/pgtable.h @@ -255,7 +255,7 @@ enum { * NOTE: the return type is pte_t but if the pmd is PSE then we return it * as a pte too. */ -extern pte_t *lookup_address(unsigned long address, int *level); +extern pte_t *lookup_address(unsigned long address, unsigned int *level); /* local pte updates need not use xchg for locking */ static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep) -- cgit v1.2.3 From bfc734b24671b2639218ae2ef53af91dfd30b6c9 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 9 Feb 2008 23:24:09 +0100 Subject: x86: avoid unused variable warning in mm/init_64.c Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/mm/init_64.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 5fe880fc305d..620d2b6b6bf4 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -532,9 +532,9 @@ void __init mem_init(void) void free_init_pages(char *what, unsigned long begin, unsigned long end) { - unsigned long addr; + unsigned long addr = begin; - if (begin >= end) + if (addr >= end) return; /* @@ -549,7 +549,7 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end) #else printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10); - for (addr = begin; addr < end; addr += PAGE_SIZE) { + for (; addr < end; addr += PAGE_SIZE) { ClearPageReserved(virt_to_page(addr)); init_page_count(virt_to_page(addr)); memset((void *)(addr & ~(PAGE_SIZE-1)), -- cgit v1.2.3 From 551889a6e2a24a9c06fd453ea03b57b7746ffdc0 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Sat, 9 Feb 2008 23:24:09 +0100 Subject: x86: construct 32-bit boot time page tables in native format. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Specifically the boot time page tables in a CONFIG_X86_PAE=y enabled kernel are in PAE format. early_ioremap is updated to use the standard page table accessors. Clear any mappings beyond max_low_pfn from the boot page tables in native_pagetable_setup_start because the initial mappings can extend beyond the range of physical memory and into the vmalloc area. Derived from patches by Eric Biederman and H. Peter Anvin. [ jeremy@goop.org: PAE swapper_pg_dir needs to be page-sized fix ] Signed-off-by: Ian Campbell Cc: H. Peter Anvin Cc: Eric W. Biederman Cc: Andi Kleen Cc: Mika Penttilä Cc: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/head_32.S | 151 +++++++++++++++++++++++++++++++++---------- arch/x86/kernel/setup_32.c | 4 ++ arch/x86/mm/init_32.c | 72 ++++++++------------- arch/x86/mm/ioremap.c | 55 +++++++++------- include/asm-x86/page_32.h | 1 - include/asm-x86/pgtable_32.h | 4 -- 6 files changed, 178 insertions(+), 109 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 5d8c5730686b..74ef4a41f224 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -19,6 +19,10 @@ #include #include #include +#include + +/* Physical address */ +#define pa(X) ((X) - __PAGE_OFFSET) /* * References to members of the new_cpu_data structure. @@ -80,10 +84,6 @@ INIT_MAP_BEYOND_END = BOOTBITMAP_SIZE + (PAGE_TABLE_SIZE + ALLOCATOR_SLOP)*PAGE_ */ .section .text.head,"ax",@progbits ENTRY(startup_32) - /* check to see if KEEP_SEGMENTS flag is meaningful */ - cmpw $0x207, BP_version(%esi) - jb 1f - /* test KEEP_SEGMENTS flag to see if the bootloader is asking us to not reload segments */ testb $(1<<6), BP_loadflags(%esi) @@ -92,7 +92,7 @@ ENTRY(startup_32) /* * Set segments to known values. */ -1: lgdt boot_gdt_descr - __PAGE_OFFSET + lgdt pa(boot_gdt_descr) movl $(__BOOT_DS),%eax movl %eax,%ds movl %eax,%es @@ -105,8 +105,8 @@ ENTRY(startup_32) */ cld xorl %eax,%eax - movl $__bss_start - __PAGE_OFFSET,%edi - movl $__bss_stop - __PAGE_OFFSET,%ecx + movl $pa(__bss_start),%edi + movl $pa(__bss_stop),%ecx subl %edi,%ecx shrl $2,%ecx rep ; stosl @@ -118,31 +118,32 @@ ENTRY(startup_32) * (kexec on panic case). Hence copy out the parameters before initializing * page tables. */ - movl $(boot_params - __PAGE_OFFSET),%edi + movl $pa(boot_params),%edi movl $(PARAM_SIZE/4),%ecx cld rep movsl - movl boot_params - __PAGE_OFFSET + NEW_CL_POINTER,%esi + movl pa(boot_params) + NEW_CL_POINTER,%esi andl %esi,%esi jz 1f # No comand line - movl $(boot_command_line - __PAGE_OFFSET),%edi + movl $pa(boot_command_line),%edi movl $(COMMAND_LINE_SIZE/4),%ecx rep movsl 1: #ifdef CONFIG_PARAVIRT - cmpw $0x207, (boot_params + BP_version - __PAGE_OFFSET) + /* This is can only trip for a broken bootloader... */ + cmpw $0x207, pa(boot_params + BP_version) jb default_entry /* Paravirt-compatible boot parameters. Look to see what architecture we're booting under. */ - movl (boot_params + BP_hardware_subarch - __PAGE_OFFSET), %eax + movl pa(boot_params + BP_hardware_subarch), %eax cmpl $num_subarch_entries, %eax jae bad_subarch - movl subarch_entries - __PAGE_OFFSET(,%eax,4), %eax + movl pa(subarch_entries)(,%eax,4), %eax subl $__PAGE_OFFSET, %eax jmp *%eax @@ -170,17 +171,68 @@ num_subarch_entries = (. - subarch_entries) / 4 * Mappings are created both at virtual address 0 (identity mapping) * and PAGE_OFFSET for up to _end+sizeof(page tables)+INIT_MAP_BEYOND_END. * - * Warning: don't use %esi or the stack in this code. However, %esp - * can be used as a GPR if you really need it... + * Note that the stack is not yet set up! */ -page_pde_offset = (__PAGE_OFFSET >> 20); +#define PTE_ATTR 0x007 /* PRESENT+RW+USER */ +#define PDE_ATTR 0x067 /* PRESENT+RW+USER+DIRTY+ACCESSED */ +#define PGD_ATTR 0x001 /* PRESENT (no other attributes) */ default_entry: - movl $(pg0 - __PAGE_OFFSET), %edi - movl $(swapper_pg_dir - __PAGE_OFFSET), %edx - movl $0x007, %eax /* 0x007 = PRESENT+RW+USER */ +#ifdef CONFIG_X86_PAE + + /* + * In PAE mode swapper_pg_dir is statically defined to contain enough + * entries to cover the VMSPLIT option (that is the top 1, 2 or 3 + * entries). The identity mapping is handled by pointing two PGD + * entries to the first kernel PMD. + * + * Note the upper half of each PMD or PTE are always zero at + * this stage. + */ + +#define KPMDS ((0x100000000-__PAGE_OFFSET) >> 30) /* Number of kernel PMDs */ + + xorl %ebx,%ebx /* %ebx is kept at zero */ + + movl $pa(pg0), %edi + movl $pa(swapper_pg_pmd), %edx + movl $PTE_ATTR, %eax +10: + leal PDE_ATTR(%edi),%ecx /* Create PMD entry */ + movl %ecx,(%edx) /* Store PMD entry */ + /* Upper half already zero */ + addl $8,%edx + movl $512,%ecx +11: + stosl + xchgl %eax,%ebx + stosl + xchgl %eax,%ebx + addl $0x1000,%eax + loop 11b + + /* + * End condition: we must map up to and including INIT_MAP_BEYOND_END + * bytes beyond the end of our own page tables. + */ + leal (INIT_MAP_BEYOND_END+PTE_ATTR)(%edi),%ebp + cmpl %ebp,%eax + jb 10b +1: + movl %edi,pa(init_pg_tables_end) + + /* Do early initialization of the fixmap area */ + movl $pa(swapper_pg_fixmap)+PDE_ATTR,%eax + movl %eax,pa(swapper_pg_pmd+0x1000*KPMDS-8) +#else /* Not PAE */ + +page_pde_offset = (__PAGE_OFFSET >> 20); + + movl $pa(pg0), %edi + movl $pa(swapper_pg_dir), %edx + movl $PTE_ATTR, %eax 10: - leal 0x007(%edi),%ecx /* Create PDE entry */ + leal PDE_ATTR(%edi),%ecx /* Create PDE entry */ movl %ecx,(%edx) /* Store identity PDE entry */ movl %ecx,page_pde_offset(%edx) /* Store kernel PDE entry */ addl $4,%edx @@ -189,19 +241,20 @@ default_entry: stosl addl $0x1000,%eax loop 11b - /* End condition: we must map up to and including INIT_MAP_BEYOND_END */ - /* bytes beyond the end of our own page tables; the +0x007 is the attribute bits */ - leal (INIT_MAP_BEYOND_END+0x007)(%edi),%ebp + /* + * End condition: we must map up to and including INIT_MAP_BEYOND_END + * bytes beyond the end of our own page tables; the +0x007 is + * the attribute bits + */ + leal (INIT_MAP_BEYOND_END+PTE_ATTR)(%edi),%ebp cmpl %ebp,%eax jb 10b - movl %edi,(init_pg_tables_end - __PAGE_OFFSET) - - /* Do an early initialization of the fixmap area */ - movl $(swapper_pg_dir - __PAGE_OFFSET), %edx - movl $(swapper_pg_pmd - __PAGE_OFFSET), %eax - addl $0x67, %eax /* 0x67 == _PAGE_TABLE */ - movl %eax, 4092(%edx) + movl %edi,pa(init_pg_tables_end) + /* Do early initialization of the fixmap area */ + movl $pa(swapper_pg_fixmap)+PDE_ATTR,%eax + movl %eax,pa(swapper_pg_dir+0xffc) +#endif jmp 3f /* * Non-boot CPU entry point; entered from trampoline.S @@ -241,7 +294,7 @@ ENTRY(startup_32_smp) * NOTE! We have to correct for the fact that we're * not yet offset PAGE_OFFSET.. */ -#define cr4_bits mmu_cr4_features-__PAGE_OFFSET +#define cr4_bits pa(mmu_cr4_features) movl cr4_bits,%edx andl %edx,%edx jz 6f @@ -276,10 +329,10 @@ ENTRY(startup_32_smp) /* * Enable paging */ - movl $swapper_pg_dir-__PAGE_OFFSET,%eax + movl $pa(swapper_pg_dir),%eax movl %eax,%cr3 /* set the page table pointer.. */ movl %cr0,%eax - orl $0x80000000,%eax + orl $X86_CR0_PG,%eax movl %eax,%cr0 /* ..and set paging (PG) bit */ ljmp $__BOOT_CS,$1f /* Clear prefetch and normalize %eip */ 1: @@ -552,16 +605,44 @@ ENTRY(_stext) */ .section ".bss.page_aligned","wa" .align PAGE_SIZE_asm +#ifdef CONFIG_X86_PAE +ENTRY(swapper_pg_pmd) + .fill 1024*KPMDS,4,0 +#else ENTRY(swapper_pg_dir) .fill 1024,4,0 -ENTRY(swapper_pg_pmd) +#endif +ENTRY(swapper_pg_fixmap) .fill 1024,4,0 ENTRY(empty_zero_page) .fill 4096,1,0 - /* * This starts the data section. */ +#ifdef CONFIG_X86_PAE +.section ".data.page_aligned","wa" + /* Page-aligned for the benefit of paravirt? */ + .align PAGE_SIZE_asm +ENTRY(swapper_pg_dir) + .long pa(swapper_pg_pmd+PGD_ATTR),0 /* low identity map */ +# if KPMDS == 3 + .long pa(swapper_pg_pmd+PGD_ATTR),0 + .long pa(swapper_pg_pmd+PGD_ATTR+0x1000),0 + .long pa(swapper_pg_pmd+PGD_ATTR+0x2000),0 +# elif KPMDS == 2 + .long 0,0 + .long pa(swapper_pg_pmd+PGD_ATTR),0 + .long pa(swapper_pg_pmd+PGD_ATTR+0x1000),0 +# elif KPMDS == 1 + .long 0,0 + .long 0,0 + .long pa(swapper_pg_pmd+PGD_ATTR),0 +# else +# error "Kernel PMDs should be 1, 2 or 3" +# endif + .align PAGE_SIZE_asm /* needs to be page-sized too */ +#endif + .data ENTRY(stack_start) .long init_thread_union+THREAD_SIZE diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c index d1d8c347cc0b..691ab4cb167b 100644 --- a/arch/x86/kernel/setup_32.c +++ b/arch/x86/kernel/setup_32.c @@ -154,7 +154,11 @@ struct cpuinfo_x86 new_cpu_data __cpuinitdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; struct cpuinfo_x86 boot_cpu_data __read_mostly = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; EXPORT_SYMBOL(boot_cpu_data); +#ifndef CONFIG_X86_PAE unsigned long mmu_cr4_features; +#else +unsigned long mmu_cr4_features = X86_CR4_PAE; +#endif /* for MCA, but anyone else can use it if they want */ unsigned int machine_id; diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index d1bc04006d16..54aba3cf9efe 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -46,6 +46,7 @@ #include #include #include +#include unsigned int __VMALLOC_RESERVE = 128 << 20; @@ -328,44 +329,38 @@ pteval_t __PAGE_KERNEL_EXEC = _PAGE_KERNEL_EXEC; void __init native_pagetable_setup_start(pgd_t *base) { -#ifdef CONFIG_X86_PAE - int i; + unsigned long pfn, va; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; /* - * Init entries of the first-level page table to the - * zero page, if they haven't already been set up. - * - * In a normal native boot, we'll be running on a - * pagetable rooted in swapper_pg_dir, but not in PAE - * mode, so this will end up clobbering the mappings - * for the lower 24Mbytes of the address space, - * without affecting the kernel address space. + * Remove any mappings which extend past the end of physical + * memory from the boot time page table: */ - for (i = 0; i < USER_PTRS_PER_PGD; i++) - set_pgd(&base[i], - __pgd(__pa(empty_zero_page) | _PAGE_PRESENT)); - - /* Make sure kernel address space is empty so that a pagetable - will be allocated for it. */ - memset(&base[USER_PTRS_PER_PGD], 0, - KERNEL_PGD_PTRS * sizeof(pgd_t)); -#else + for (pfn = max_low_pfn + 1; pfn < 1<<(32-PAGE_SHIFT); pfn++) { + va = PAGE_OFFSET + (pfn<> PAGE_SHIFT); -#endif } void __init native_pagetable_setup_done(pgd_t *base) { -#ifdef CONFIG_X86_PAE - /* - * Add low memory identity-mappings - SMP needs it when - * starting up on an AP from real-mode. In the non-PAE - * case we already have these mappings through head.S. - * All user-space mappings are explicitly cleared after - * SMP startup. - */ - set_pgd(&base[0], base[USER_PTRS_PER_PGD]); -#endif } /* @@ -374,9 +369,8 @@ void __init native_pagetable_setup_done(pgd_t *base) * the boot process. * * If we're booting on native hardware, this will be a pagetable - * constructed in arch/i386/kernel/head.S, and not running in PAE mode - * (even if we'll end up running in PAE). The root of the pagetable - * will be swapper_pg_dir. + * constructed in arch/x86/kernel/head_32.S. The root of the + * pagetable will be swapper_pg_dir. * * If we're booting paravirtualized under a hypervisor, then there are * more options: we may already be running PAE, and the pagetable may @@ -537,14 +531,6 @@ void __init paging_init(void) load_cr3(swapper_pg_dir); -#ifdef CONFIG_X86_PAE - /* - * We will bail out later - printk doesn't work right now so - * the user would just see a hanging kernel. - */ - if (cpu_has_pae) - set_in_cr4(X86_CR4_PAE); -#endif __flush_tlb_all(); kmap_init(); @@ -675,10 +661,6 @@ void __init mem_init(void) BUG_ON((unsigned long)high_memory > VMALLOC_START); #endif /* double-sanity-check paranoia */ -#ifdef CONFIG_X86_PAE - if (!cpu_has_pae) - panic("cannot execute a PAE-enabled kernel on a PAE-less CPU!"); -#endif if (boot_cpu_data.wp_works_ok < 0) test_wp_bit(); diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index ee6648fe6b15..1106b7f477bd 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -260,41 +260,46 @@ static int __init early_ioremap_debug_setup(char *str) early_param("early_ioremap_debug", early_ioremap_debug_setup); static __initdata int after_paging_init; -static __initdata unsigned long bm_pte[1024] +static __initdata pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] __attribute__((aligned(PAGE_SIZE))); -static inline unsigned long * __init early_ioremap_pgd(unsigned long addr) +static inline pmd_t * __init early_ioremap_pmd(unsigned long addr) { - return (unsigned long *)swapper_pg_dir + ((addr >> 22) & 1023); + pgd_t *pgd = &swapper_pg_dir[pgd_index(addr)]; + pud_t *pud = pud_offset(pgd, addr); + pmd_t *pmd = pmd_offset(pud, addr); + + return pmd; } -static inline unsigned long * __init early_ioremap_pte(unsigned long addr) +static inline pte_t * __init early_ioremap_pte(unsigned long addr) { - return bm_pte + ((addr >> PAGE_SHIFT) & 1023); + return &bm_pte[pte_index(addr)]; } void __init early_ioremap_init(void) { - unsigned long *pgd; + pmd_t *pmd; if (early_ioremap_debug) printk(KERN_INFO "early_ioremap_init()\n"); - pgd = early_ioremap_pgd(fix_to_virt(FIX_BTMAP_BEGIN)); - *pgd = __pa(bm_pte) | _PAGE_TABLE; + pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)); memset(bm_pte, 0, sizeof(bm_pte)); + set_pmd(pmd, __pmd(__pa(bm_pte) | _PAGE_TABLE)); + /* - * The boot-ioremap range spans multiple pgds, for which + * The boot-ioremap range spans multiple pmds, for which * we are not prepared: */ - if (pgd != early_ioremap_pgd(fix_to_virt(FIX_BTMAP_END))) { + if (pmd != early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))) { WARN_ON(1); - printk(KERN_WARNING "pgd %p != %p\n", - pgd, early_ioremap_pgd(fix_to_virt(FIX_BTMAP_END))); + printk(KERN_WARNING "pmd %p != %p\n", + pmd, early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))); printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n", - fix_to_virt(FIX_BTMAP_BEGIN)); + fix_to_virt(FIX_BTMAP_BEGIN)); printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_END): %08lx\n", - fix_to_virt(FIX_BTMAP_END)); + fix_to_virt(FIX_BTMAP_END)); printk(KERN_WARNING "FIX_BTMAP_END: %d\n", FIX_BTMAP_END); printk(KERN_WARNING "FIX_BTMAP_BEGIN: %d\n", @@ -304,28 +309,29 @@ void __init early_ioremap_init(void) void __init early_ioremap_clear(void) { - unsigned long *pgd; + pmd_t *pmd; if (early_ioremap_debug) printk(KERN_INFO "early_ioremap_clear()\n"); - pgd = early_ioremap_pgd(fix_to_virt(FIX_BTMAP_BEGIN)); - *pgd = 0; - paravirt_release_pt(__pa(pgd) >> PAGE_SHIFT); + pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)); + pmd_clear(pmd); + paravirt_release_pt(__pa(pmd) >> PAGE_SHIFT); __flush_tlb_all(); } void __init early_ioremap_reset(void) { enum fixed_addresses idx; - unsigned long *pte, phys, addr; + unsigned long addr, phys; + pte_t *pte; after_paging_init = 1; for (idx = FIX_BTMAP_BEGIN; idx >= FIX_BTMAP_END; idx--) { addr = fix_to_virt(idx); pte = early_ioremap_pte(addr); - if (*pte & _PAGE_PRESENT) { - phys = *pte & PAGE_MASK; + if (pte_present(*pte)) { + phys = pte_val(*pte) & PAGE_MASK; set_fixmap(idx, phys); } } @@ -334,7 +340,8 @@ void __init early_ioremap_reset(void) static void __init __early_set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t flags) { - unsigned long *pte, addr = __fix_to_virt(idx); + unsigned long addr = __fix_to_virt(idx); + pte_t *pte; if (idx >= __end_of_fixed_addresses) { BUG(); @@ -342,9 +349,9 @@ static void __init __early_set_fixmap(enum fixed_addresses idx, } pte = early_ioremap_pte(addr); if (pgprot_val(flags)) - *pte = (phys & PAGE_MASK) | pgprot_val(flags); + set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags)); else - *pte = 0; + pte_clear(NULL, addr, pte); __flush_tlb_one(addr); } diff --git a/include/asm-x86/page_32.h b/include/asm-x86/page_32.h index 984998a30741..5f7257fd589b 100644 --- a/include/asm-x86/page_32.h +++ b/include/asm-x86/page_32.h @@ -48,7 +48,6 @@ typedef unsigned long pgprotval_t; typedef unsigned long phys_addr_t; typedef union { pteval_t pte, pte_low; } pte_t; -typedef pte_t boot_pte_t; #endif /* __ASSEMBLY__ */ #endif /* CONFIG_X86_PAE */ diff --git a/include/asm-x86/pgtable_32.h b/include/asm-x86/pgtable_32.h index 80dd438642f6..a842c7222b1e 100644 --- a/include/asm-x86/pgtable_32.h +++ b/include/asm-x86/pgtable_32.h @@ -52,10 +52,6 @@ void paging_init(void); #define USER_PGD_PTRS (PAGE_OFFSET >> PGDIR_SHIFT) #define KERNEL_PGD_PTRS (PTRS_PER_PGD-USER_PGD_PTRS) -#define TWOLEVEL_PGDIR_SHIFT 22 -#define BOOT_USER_PGD_PTRS (__PAGE_OFFSET >> TWOLEVEL_PGDIR_SHIFT) -#define BOOT_KERNEL_PGD_PTRS (1024-BOOT_USER_PGD_PTRS) - /* Just any arbitrary offset to the start of the vmalloc VM area: the * current 8MB value just means that there will be a 8MB "hole" after the * physical memory until the kernel virtual memory starts. That means that -- cgit v1.2.3 From b6fbb669c8ef3a112121697ca901c290ccd35eb2 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Sat, 9 Feb 2008 23:24:09 +0100 Subject: x86: fix early_ioremap pagetable ops Some important parts of f6df72e71eba621b2f5c49b3a763116fac748f6e got dropped along the way, reintroduce them. Only affects paravirt guests. Signed-off-by: Ian Campbell Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/mm/ioremap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 1106b7f477bd..a4897a85268a 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -286,7 +286,7 @@ void __init early_ioremap_init(void) pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)); memset(bm_pte, 0, sizeof(bm_pte)); - set_pmd(pmd, __pmd(__pa(bm_pte) | _PAGE_TABLE)); + pmd_populate_kernel(&init_mm, pmd, bm_pte); /* * The boot-ioremap range spans multiple pmds, for which @@ -316,7 +316,7 @@ void __init early_ioremap_clear(void) pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)); pmd_clear(pmd); - paravirt_release_pt(__pa(pmd) >> PAGE_SHIFT); + paravirt_release_pt(__pa(bm_pte) >> PAGE_SHIFT); __flush_tlb_all(); } -- cgit v1.2.3 From 76ebd0548df6ee48586e9b80d8fc2f58aa5fb51c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 9 Feb 2008 23:24:09 +0100 Subject: x86: introduce page pool in cpa DEBUG_PAGEALLOC was not possible on 64-bit due to its early-bootup hardcoded reliance on PSE pages, and the unrobustness of the runtime splitup of large pages. The splitup ended in recursive calls to alloc_pages() when a page for a pte split was requested. Avoid the recursion with a preallocated page pool, which is used to split up large mappings and gets refilled in the return path of kernel_map_pages after the split has been done. The size of the page pool is adjusted to the available memory. This part just implements the page pool and the initialization w/o using it yet. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/mm/init_32.c | 2 ++ arch/x86/mm/init_64.c | 2 ++ arch/x86/mm/pageattr.c | 82 +++++++++++++++++++++++++++++++++++++++++++- include/asm-x86/cacheflush.h | 2 ++ 4 files changed, 87 insertions(+), 1 deletion(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 54aba3cf9efe..8106bba41ecb 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -664,6 +664,8 @@ void __init mem_init(void) if (boot_cpu_data.wp_works_ok < 0) test_wp_bit(); + cpa_init(); + /* * Subtle. SMP is doing it's boot stuff late (because it has to * fork idle threads) - but it also needs low mappings for the diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 620d2b6b6bf4..b59fc238151f 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -528,6 +528,8 @@ void __init mem_init(void) reservedpages << (PAGE_SHIFT-10), datasize >> 10, initsize >> 10); + + cpa_init(); } void free_init_pages(char *what, unsigned long begin, unsigned long end) diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index eb2a54415a77..831462c3bc35 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -336,6 +337,77 @@ out_unlock: return do_split; } +static LIST_HEAD(page_pool); +static unsigned long pool_size, pool_pages, pool_low; +static unsigned long pool_used, pool_failed, pool_refill; + +static void cpa_fill_pool(void) +{ + struct page *p; + gfp_t gfp = GFP_KERNEL; + + /* Do not allocate from interrupt context */ + if (in_irq() || irqs_disabled()) + return; + /* + * Check unlocked. I does not matter when we have one more + * page in the pool. The bit lock avoids recursive pool + * allocations: + */ + if (pool_pages >= pool_size || test_and_set_bit_lock(0, &pool_refill)) + return; + +#ifdef CONFIG_DEBUG_PAGEALLOC + /* + * We could do: + * gfp = in_atomic() ? GFP_ATOMIC : GFP_KERNEL; + * but this fails on !PREEMPT kernels + */ + gfp = GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN; +#endif + + while (pool_pages < pool_size) { + p = alloc_pages(gfp, 0); + if (!p) { + pool_failed++; + break; + } + spin_lock_irq(&pgd_lock); + list_add(&p->lru, &page_pool); + pool_pages++; + spin_unlock_irq(&pgd_lock); + } + clear_bit_unlock(0, &pool_refill); +} + +#define SHIFT_MB (20 - PAGE_SHIFT) +#define ROUND_MB_GB ((1 << 10) - 1) +#define SHIFT_MB_GB 10 +#define POOL_PAGES_PER_GB 16 + +void __init cpa_init(void) +{ + struct sysinfo si; + unsigned long gb; + + si_meminfo(&si); + /* + * Calculate the number of pool pages: + * + * Convert totalram (nr of pages) to MiB and round to the next + * GiB. Shift MiB to Gib and multiply the result by + * POOL_PAGES_PER_GB: + */ + gb = ((si.totalram >> SHIFT_MB) + ROUND_MB_GB) >> SHIFT_MB_GB; + pool_size = POOL_PAGES_PER_GB * gb; + pool_low = pool_size; + + cpa_fill_pool(); + printk(KERN_DEBUG + "CPA: page pool initialized %lu of %lu pages preallocated\n", + pool_pages, pool_size); +} + static int split_large_page(pte_t *kpte, unsigned long address) { unsigned long flags, pfn, pfninc = 1; @@ -600,7 +672,7 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages, * Check whether we really changed something: */ if (!cpa.flushtlb) - return ret; + goto out; /* * No need to flush, when we did not set any of the caching @@ -619,6 +691,8 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages, else cpa_flush_all(cache); +out: + cpa_fill_pool(); return ret; } @@ -772,6 +846,12 @@ void kernel_map_pages(struct page *page, int numpages, int enable) * but that can deadlock->flush only current cpu: */ __flush_tlb_all(); + + /* + * Try to refill the page pool here. We can do this only after + * the tlb flush. + */ + cpa_fill_pool(); } #endif diff --git a/include/asm-x86/cacheflush.h b/include/asm-x86/cacheflush.h index 8dd8c5e3cc7f..6a22212b4b20 100644 --- a/include/asm-x86/cacheflush.h +++ b/include/asm-x86/cacheflush.h @@ -44,6 +44,8 @@ int set_memory_np(unsigned long addr, int numpages); void clflush_cache_range(void *addr, unsigned int size); +void cpa_init(void); + #ifdef CONFIG_DEBUG_RODATA void mark_rodata_ro(void); #endif -- cgit v1.2.3 From eb5b5f024c40f02e9b0f3801173769a726f170fb Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 9 Feb 2008 23:24:09 +0100 Subject: x86: cpa, use page pool Switch the split page code to use the page pool. We do this unconditionally to avoid different behaviour with and without DEBUG_PAGEALLOC enabled. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr.c | 35 +++++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 831462c3bc35..e5d29a112d00 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -411,20 +411,29 @@ void __init cpa_init(void) static int split_large_page(pte_t *kpte, unsigned long address) { unsigned long flags, pfn, pfninc = 1; - gfp_t gfp_flags = GFP_KERNEL; unsigned int i, level; pte_t *pbase, *tmp; pgprot_t ref_prot; struct page *base; -#ifdef CONFIG_DEBUG_PAGEALLOC - gfp_flags = GFP_ATOMIC | __GFP_NOWARN; -#endif - base = alloc_pages(gfp_flags, 0); - if (!base) + /* + * Get a page from the pool. The pool list is protected by the + * pgd_lock, which we have to take anyway for the split + * operation: + */ + spin_lock_irqsave(&pgd_lock, flags); + if (list_empty(&page_pool)) { + spin_unlock_irqrestore(&pgd_lock, flags); return -ENOMEM; + } + + base = list_first_entry(&page_pool, struct page, lru); + list_del(&base->lru); + pool_pages--; + + if (pool_pages < pool_low) + pool_low = pool_pages; - spin_lock_irqsave(&pgd_lock, flags); /* * Check for races, another CPU might have split this page * up for us already: @@ -469,11 +478,17 @@ static int split_large_page(pte_t *kpte, unsigned long address) base = NULL; out_unlock: + /* + * If we dropped out via the lookup_address check under + * pgd_lock then stick the page back into the pool: + */ + if (base) { + list_add(&base->lru, &page_pool); + pool_pages++; + } else + pool_used++; spin_unlock_irqrestore(&pgd_lock, flags); - if (base) - __free_pages(base, 0); - return 0; } -- cgit v1.2.3 From fac84939609a683503947f41eb93e1917d026263 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 9 Feb 2008 23:24:09 +0100 Subject: x86: cpa, strict range check in try_preserve_large_page() Right now, we check only the first 4k page for static required protections. This does not take overlapping regions into account. So we might end up setting the wrong permissions/protections for other parts of this large page. This can be optimized further, but correctness is the important part. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index e5d29a112d00..440210a2277d 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -253,10 +253,10 @@ static int try_preserve_large_page(pte_t *kpte, unsigned long address, struct cpa_data *cpa) { - unsigned long nextpage_addr, numpages, pmask, psize, flags; + unsigned long nextpage_addr, numpages, pmask, psize, flags, addr; pte_t new_pte, old_pte, *tmp; pgprot_t old_prot, new_prot; - int do_split = 1; + int i, do_split = 1; unsigned int level; spin_lock_irqsave(&pgd_lock, flags); @@ -303,6 +303,19 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, pgprot_val(new_prot) |= pgprot_val(cpa->mask_set); new_prot = static_protections(new_prot, address); + /* + * We need to check the full range, whether + * static_protection() requires a different pgprot for one of + * the pages in the range we try to preserve: + */ + addr = address + PAGE_SIZE; + for (i = 1; i < cpa->numpages; i++, addr += PAGE_SIZE) { + pgprot_t chk_prot = static_protections(new_prot, addr); + + if (pgprot_val(chk_prot) != pgprot_val(new_prot)) + goto out_unlock; + } + /* * If there are no changes, return. maxpages has been updated * above: -- cgit v1.2.3 From 81772fea4110f7ce8083d52503c9c4ddaa50f75b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 10 Feb 2008 23:57:36 +0100 Subject: x86: remove over noisy debug printk pageattr-test.c contains a noisy debug printk that people reported. The condition under which it prints (randomly tapping into a mem_map[] hole and not being able to c_p_a() there) is valid behavior and not interesting to report. Remove it. Signed-off-by: Thomas Gleixner Acked-by: Ingo Molnar Signed-off-by: Linus Torvalds --- arch/x86/mm/pageattr-test.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c index ed8201600354..75f1b109aae8 100644 --- a/arch/x86/mm/pageattr-test.c +++ b/arch/x86/mm/pageattr-test.c @@ -40,7 +40,6 @@ struct split_state { static int print_split(struct split_state *s) { long i, expected, missed = 0; - int printed = 0; int err = 0; s->lpg = s->gpg = s->spg = s->exec = 0; @@ -53,12 +52,6 @@ static int print_split(struct split_state *s) pte = lookup_address(addr, &level); if (!pte) { - if (!printed) { - dump_pagetable(addr); - printk(KERN_INFO "CPA %lx no pte level %d\n", - addr, level); - printed = 1; - } missed++; i++; continue; -- cgit v1.2.3 From 37cc8d7f963ba2deec29c9b68716944516a3244f Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 13 Feb 2008 16:20:35 +0100 Subject: x86/early_ioremap: don't assume we're using swapper_pg_dir At the early stages of boot, before the kernel pagetable has been fully initialized, a Xen kernel will still be running off the Xen-provided pagetables rather than swapper_pg_dir[]. Therefore, readback cr3 to determine the base of the pagetable rather than assuming swapper_pg_dir[]. Signed-off-by: Jeremy Fitzhardinge Tested-by: Jody Belka Signed-off-by: Ingo Molnar --- arch/x86/mm/ioremap.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index a4897a85268a..9f42d7e9c158 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -265,7 +265,9 @@ static __initdata pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] static inline pmd_t * __init early_ioremap_pmd(unsigned long addr) { - pgd_t *pgd = &swapper_pg_dir[pgd_index(addr)]; + /* Don't assume we're using swapper_pg_dir at this point */ + pgd_t *base = __va(read_cr3()); + pgd_t *pgd = &base[pgd_index(addr)]; pud_t *pud = pud_offset(pgd, addr); pmd_t *pmd = pmd_offset(pud, addr); -- cgit v1.2.3 From 5d3c8b21e22712137db6bbd246d1bdcbe0a09914 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 13 Feb 2008 16:20:35 +0100 Subject: x86: CPA: fix gbpages support in try_preserve_large_page [ mingo@elte.hu: while gbpages cannot be enabled on mainline currently, keep the code uptodate and this fix is easy enough. ] Use correct page sizes and masks for GB pages in try_preserve_large_page() This prevents a boot hang on a GB capable system with CONFIG_DIRECT_GBPAGES enabled. Signed-off-by: Andi Kleen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/mm/pageattr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 440210a2277d..bd61ed13f9cf 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -275,8 +275,8 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, break; #ifdef CONFIG_X86_64 case PG_LEVEL_1G: - psize = PMD_PAGE_SIZE; - pmask = PMD_PAGE_MASK; + psize = PUD_PAGE_SIZE; + pmask = PUD_PAGE_MASK; break; #endif default: -- cgit v1.2.3 From cae30f8270005940902c5807146fbaa36875e6e9 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Wed, 13 Feb 2008 23:31:31 +0200 Subject: x86: make dump_pagetable() static dump_pagetable() can now become static. Signed-off-by: Adrian Bunk Acked-by: Arjan van de Ven Signed-off-by: Ingo Molnar --- arch/x86/mm/fault.c | 2 +- include/asm-x86/kdebug.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 621afb6343dc..fdc667422df9 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -186,7 +186,7 @@ static int bad_address(void *p) } #endif -void dump_pagetable(unsigned long address) +static void dump_pagetable(unsigned long address) { #ifdef CONFIG_X86_32 __typeof__(pte_val(__pte(0))) page; diff --git a/include/asm-x86/kdebug.h b/include/asm-x86/kdebug.h index dd442a1632c0..99dcbafa1511 100644 --- a/include/asm-x86/kdebug.h +++ b/include/asm-x86/kdebug.h @@ -31,7 +31,6 @@ extern void show_trace(struct task_struct *t, struct pt_regs *regs, unsigned long *sp, unsigned long bp); extern void __show_regs(struct pt_regs *regs); extern void show_regs(struct pt_regs *regs); -extern void dump_pagetable(unsigned long); extern unsigned long oops_begin(void); extern void oops_end(unsigned long, struct pt_regs *, int signr); -- cgit v1.2.3 From 7bfeab9af95565e38a97fbcfb631e5b140241187 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Tue, 12 Feb 2008 12:12:01 -0800 Subject: x86: include proper prototypes for rodata_test extern should not appear in C files. Also, the definitions do not match the prototype currently, not sure what way you want to go with this, I've switched the prototype to return int, but I can see going to the void return as well. Signed-off-by: Harvey Harrison Signed-off-by: Ingo Molnar --- arch/x86/kernel/test_rodata.c | 2 +- arch/x86/mm/init_32.c | 1 + arch/x86/mm/init_64.c | 1 + include/asm-x86/cacheflush.h | 7 +++++-- 4 files changed, 8 insertions(+), 3 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/kernel/test_rodata.c b/arch/x86/kernel/test_rodata.c index 4c163772000e..c29e235792af 100644 --- a/arch/x86/kernel/test_rodata.c +++ b/arch/x86/kernel/test_rodata.c @@ -10,8 +10,8 @@ * of the License. */ #include +#include #include -extern int rodata_test_data; int rodata_test(void) { diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 8106bba41ecb..ee1091a46964 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -47,6 +47,7 @@ #include #include #include +#include unsigned int __VMALLOC_RESERVE = 128 << 20; diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index b59fc238151f..a4a9cccdd4f2 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -45,6 +45,7 @@ #include #include #include +#include const struct dma_mapping_ops *dma_ops; EXPORT_SYMBOL(dma_ops); diff --git a/include/asm-x86/cacheflush.h b/include/asm-x86/cacheflush.h index 6a22212b4b20..5396c212d8c0 100644 --- a/include/asm-x86/cacheflush.h +++ b/include/asm-x86/cacheflush.h @@ -48,12 +48,15 @@ void cpa_init(void); #ifdef CONFIG_DEBUG_RODATA void mark_rodata_ro(void); +extern const int rodata_test_data; #endif + #ifdef CONFIG_DEBUG_RODATA_TEST -void rodata_test(void); +int rodata_test(void); #else -static inline void rodata_test(void) +static inline int rodata_test(void) { + return 0; } #endif -- cgit v1.2.3 From 69b1415e934fc1a796a817e32d84162ae65962f5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 13 Feb 2008 11:04:50 +0100 Subject: x86: cpa: ensure page alignment the cpa API is page aligned - warn about any weird alignments. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index bd61ed13f9cf..5d2259468d3c 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -688,6 +688,15 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages, if (!pgprot_val(mask_set) && !pgprot_val(mask_clr)) return 0; + /* Ensure we are PAGE_SIZE aligned */ + if (addr & ~PAGE_MASK) { + addr &= PAGE_MASK; + /* + * People should not be passing in unaligned addresses: + */ + WARN_ON_ONCE(1); + } + cpa.vaddr = addr; cpa.numpages = numpages; cpa.mask_set = mask_set; -- cgit v1.2.3 From f8d8406bcb58ff70e97b71c35ff5be90c54fc3d0 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 13 Feb 2008 14:09:53 +0100 Subject: x86: cpa, fix out of date comment Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 5d2259468d3c..4119379f80ff 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -870,8 +870,12 @@ void kernel_map_pages(struct page *page, int numpages, int enable) return; /* - * The return value is ignored - the calls cannot fail, - * large pages are disabled at boot time: + * The return value is ignored as the calls cannot fail. + * Large pages are kept enabled at boot time, and are + * split up quickly with DEBUG_PAGEALLOC. If a splitup + * fails here (due to temporary memory shortage) no damage + * is done because we just keep the largepage intact up + * to the next attempt when it will likely be split up: */ if (enable) __set_pages_p(page, numpages); -- cgit v1.2.3