diff options
Diffstat (limited to 'arch/x86/mm')
-rw-r--r-- | arch/x86/mm/Makefile | 2 | ||||
-rw-r--r-- | arch/x86/mm/amdtopology_64.c (renamed from arch/x86/mm/k8topology_64.c) | 12 | ||||
-rw-r--r-- | arch/x86/mm/fault.c | 63 | ||||
-rw-r--r-- | arch/x86/mm/highmem_32.c | 76 | ||||
-rw-r--r-- | arch/x86/mm/init.c | 3 | ||||
-rw-r--r-- | arch/x86/mm/init_32.c | 65 | ||||
-rw-r--r-- | arch/x86/mm/init_64.c | 1 | ||||
-rw-r--r-- | arch/x86/mm/iomap_32.c | 43 | ||||
-rw-r--r-- | arch/x86/mm/kmemcheck/error.c | 2 | ||||
-rw-r--r-- | arch/x86/mm/numa_64.c | 29 | ||||
-rw-r--r-- | arch/x86/mm/pageattr.c | 33 | ||||
-rw-r--r-- | arch/x86/mm/setup_nx.c | 2 | ||||
-rw-r--r-- | arch/x86/mm/srat_32.c | 1 | ||||
-rw-r--r-- | arch/x86/mm/srat_64.c | 10 | ||||
-rw-r--r-- | arch/x86/mm/tlb.c | 7 |
15 files changed, 190 insertions, 159 deletions
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 55543397a8a7..09df2f9a3d69 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -23,7 +23,7 @@ mmiotrace-y := kmmio.o pf_in.o mmio-mod.o obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o obj-$(CONFIG_NUMA) += numa.o numa_$(BITS).o -obj-$(CONFIG_K8_NUMA) += k8topology_64.o +obj-$(CONFIG_AMD_NUMA) += amdtopology_64.o obj-$(CONFIG_ACPI_NUMA) += srat_$(BITS).o obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o diff --git a/arch/x86/mm/k8topology_64.c b/arch/x86/mm/amdtopology_64.c index 804a3b6c6e14..51fae9cfdecb 100644 --- a/arch/x86/mm/k8topology_64.c +++ b/arch/x86/mm/amdtopology_64.c @@ -1,8 +1,8 @@ /* - * AMD K8 NUMA support. + * AMD NUMA support. * Discover the memory map and associated nodes. * - * This version reads it directly from the K8 northbridge. + * This version reads it directly from the AMD northbridge. * * Copyright 2002,2003 Andi Kleen, SuSE Labs. */ @@ -57,7 +57,7 @@ static __init void early_get_boot_cpu_id(void) { /* * need to get the APIC ID of the BSP so can use that to - * create apicid_to_node in k8_scan_nodes() + * create apicid_to_node in amd_scan_nodes() */ #ifdef CONFIG_X86_MPPARSE /* @@ -69,7 +69,7 @@ static __init void early_get_boot_cpu_id(void) early_init_lapic_mapping(); } -int __init k8_get_nodes(struct bootnode *physnodes) +int __init amd_get_nodes(struct bootnode *physnodes) { int i; int ret = 0; @@ -82,7 +82,7 @@ int __init k8_get_nodes(struct bootnode *physnodes) return ret; } -int __init k8_numa_init(unsigned long start_pfn, unsigned long end_pfn) +int __init amd_numa_init(unsigned long start_pfn, unsigned long end_pfn) { unsigned long start = PFN_PHYS(start_pfn); unsigned long end = PFN_PHYS(end_pfn); @@ -194,7 +194,7 @@ int __init k8_numa_init(unsigned long start_pfn, unsigned long end_pfn) return 0; } -int __init k8_scan_nodes(void) +int __init amd_scan_nodes(void) { unsigned int bits; unsigned int cores; diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 79b0b372d2d0..7d90ceb882a4 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -11,6 +11,7 @@ #include <linux/kprobes.h> /* __kprobes, ... */ #include <linux/mmiotrace.h> /* kmmio_handler, ... */ #include <linux/perf_event.h> /* perf_sw_event */ +#include <linux/hugetlb.h> /* hstate_index_to_shift */ #include <asm/traps.h> /* dotraplinkage, ... */ #include <asm/pgalloc.h> /* pgd_*(), ... */ @@ -160,15 +161,20 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr) static void force_sig_info_fault(int si_signo, int si_code, unsigned long address, - struct task_struct *tsk) + struct task_struct *tsk, int fault) { + unsigned lsb = 0; siginfo_t info; info.si_signo = si_signo; info.si_errno = 0; info.si_code = si_code; info.si_addr = (void __user *)address; - info.si_addr_lsb = si_code == BUS_MCEERR_AR ? PAGE_SHIFT : 0; + if (fault & VM_FAULT_HWPOISON_LARGE) + lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault)); + if (fault & VM_FAULT_HWPOISON) + lsb = PAGE_SHIFT; + info.si_addr_lsb = lsb; force_sig_info(si_signo, &info, tsk); } @@ -722,7 +728,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, tsk->thread.error_code = error_code | (address >= TASK_SIZE); tsk->thread.trap_no = 14; - force_sig_info_fault(SIGSEGV, si_code, address, tsk); + force_sig_info_fault(SIGSEGV, si_code, address, tsk, 0); return; } @@ -807,14 +813,14 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address, tsk->thread.trap_no = 14; #ifdef CONFIG_MEMORY_FAILURE - if (fault & VM_FAULT_HWPOISON) { + if (fault & (VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE)) { printk(KERN_ERR "MCE: Killing %s:%d due to hardware memory corruption fault at %lx\n", tsk->comm, tsk->pid, address); code = BUS_MCEERR_AR; } #endif - force_sig_info_fault(SIGBUS, code, address, tsk); + force_sig_info_fault(SIGBUS, code, address, tsk, fault); } static noinline void @@ -824,7 +830,8 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code, if (fault & VM_FAULT_OOM) { out_of_memory(regs, error_code, address); } else { - if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON)) + if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON| + VM_FAULT_HWPOISON_LARGE)) do_sigbus(regs, error_code, address, fault); else BUG(); @@ -912,9 +919,9 @@ spurious_fault(unsigned long error_code, unsigned long address) int show_unhandled_signals = 1; static inline int -access_error(unsigned long error_code, int write, struct vm_area_struct *vma) +access_error(unsigned long error_code, struct vm_area_struct *vma) { - if (write) { + if (error_code & PF_WRITE) { /* write, present and write, not present: */ if (unlikely(!(vma->vm_flags & VM_WRITE))) return 1; @@ -949,8 +956,10 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code) struct task_struct *tsk; unsigned long address; struct mm_struct *mm; - int write; int fault; + int write = error_code & PF_WRITE; + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | + (write ? FAULT_FLAG_WRITE : 0); tsk = current; mm = tsk->mm; @@ -1061,6 +1070,7 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code) bad_area_nosemaphore(regs, error_code, address); return; } +retry: down_read(&mm->mmap_sem); } else { /* @@ -1104,9 +1114,7 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code) * we can handle it.. */ good_area: - write = error_code & PF_WRITE; - - if (unlikely(access_error(error_code, write, vma))) { + if (unlikely(access_error(error_code, vma))) { bad_area_access_error(regs, error_code, address); return; } @@ -1116,21 +1124,34 @@ good_area: * make sure we exit gracefully rather than endlessly redo * the fault: */ - fault = handle_mm_fault(mm, vma, address, write ? FAULT_FLAG_WRITE : 0); + fault = handle_mm_fault(mm, vma, address, flags); if (unlikely(fault & VM_FAULT_ERROR)) { mm_fault_error(regs, error_code, address, fault); return; } - if (fault & VM_FAULT_MAJOR) { - tsk->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, - regs, address); - } else { - tsk->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, - regs, address); + /* + * Major/minor page fault accounting is only done on the + * initial attempt. If we go through a retry, it is extremely + * likely that the page will be found in page cache at that point. + */ + if (flags & FAULT_FLAG_ALLOW_RETRY) { + if (fault & VM_FAULT_MAJOR) { + tsk->maj_flt++; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, + regs, address); + } else { + tsk->min_flt++; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, + regs, address); + } + if (fault & VM_FAULT_RETRY) { + /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk + * of starvation. */ + flags &= ~FAULT_FLAG_ALLOW_RETRY; + goto retry; + } } check_v8086_mode(regs, address, tsk); diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c index 5e8fa12ef861..b49962662101 100644 --- a/arch/x86/mm/highmem_32.c +++ b/arch/x86/mm/highmem_32.c @@ -9,6 +9,7 @@ void *kmap(struct page *page) return page_address(page); return kmap_high(page); } +EXPORT_SYMBOL(kmap); void kunmap(struct page *page) { @@ -18,6 +19,7 @@ void kunmap(struct page *page) return; kunmap_high(page); } +EXPORT_SYMBOL(kunmap); /* * kmap_atomic/kunmap_atomic is significantly faster than kmap/kunmap because @@ -27,10 +29,10 @@ void kunmap(struct page *page) * However when holding an atomic kmap it is not legal to sleep, so atomic * kmaps are appropriate for short, tight code paths only. */ -void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot) +void *kmap_atomic_prot(struct page *page, pgprot_t prot) { - enum fixed_addresses idx; unsigned long vaddr; + int idx, type; /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ pagefault_disable(); @@ -38,8 +40,7 @@ void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot) if (!PageHighMem(page)) return page_address(page); - debug_kmap_atomic(type); - + type = kmap_atomic_idx_push(); idx = type + KM_TYPE_NR*smp_processor_id(); vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); BUG_ON(!pte_none(*(kmap_pte-idx))); @@ -47,44 +48,57 @@ void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot) return (void *)vaddr; } +EXPORT_SYMBOL(kmap_atomic_prot); + +void *__kmap_atomic(struct page *page) +{ + return kmap_atomic_prot(page, kmap_prot); +} +EXPORT_SYMBOL(__kmap_atomic); -void *kmap_atomic(struct page *page, enum km_type type) +/* + * This is the same as kmap_atomic() but can map memory that doesn't + * have a struct page associated with it. + */ +void *kmap_atomic_pfn(unsigned long pfn) { - return kmap_atomic_prot(page, type, kmap_prot); + return kmap_atomic_prot_pfn(pfn, kmap_prot); } +EXPORT_SYMBOL_GPL(kmap_atomic_pfn); -void kunmap_atomic_notypecheck(void *kvaddr, enum km_type type) +void __kunmap_atomic(void *kvaddr) { unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; - enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id(); - - /* - * Force other mappings to Oops if they'll try to access this pte - * without first remap it. Keeping stale mappings around is a bad idea - * also, in case the page changes cacheability attributes or becomes - * a protected page in a hypervisor. - */ - if (vaddr == __fix_to_virt(FIX_KMAP_BEGIN+idx)) + + if (vaddr >= __fix_to_virt(FIX_KMAP_END) && + vaddr <= __fix_to_virt(FIX_KMAP_BEGIN)) { + int idx, type; + + type = kmap_atomic_idx(); + idx = type + KM_TYPE_NR * smp_processor_id(); + +#ifdef CONFIG_DEBUG_HIGHMEM + WARN_ON_ONCE(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx)); +#endif + /* + * Force other mappings to Oops if they'll try to access this + * pte without first remap it. Keeping stale mappings around + * is a bad idea also, in case the page changes cacheability + * attributes or becomes a protected page in a hypervisor. + */ kpte_clear_flush(kmap_pte-idx, vaddr); - else { + kmap_atomic_idx_pop(); + } #ifdef CONFIG_DEBUG_HIGHMEM + else { BUG_ON(vaddr < PAGE_OFFSET); BUG_ON(vaddr >= (unsigned long)high_memory); -#endif } +#endif pagefault_enable(); } - -/* - * This is the same as kmap_atomic() but can map memory that doesn't - * have a struct page associated with it. - */ -void *kmap_atomic_pfn(unsigned long pfn, enum km_type type) -{ - return kmap_atomic_prot_pfn(pfn, type, kmap_prot); -} -EXPORT_SYMBOL_GPL(kmap_atomic_pfn); /* temporarily in use by i915 GEM until vmap */ +EXPORT_SYMBOL(__kunmap_atomic); struct page *kmap_atomic_to_page(void *ptr) { @@ -98,12 +112,6 @@ struct page *kmap_atomic_to_page(void *ptr) pte = kmap_pte - (idx - FIX_KMAP_BEGIN); return pte_page(*pte); } - -EXPORT_SYMBOL(kmap); -EXPORT_SYMBOL(kunmap); -EXPORT_SYMBOL(kmap_atomic); -EXPORT_SYMBOL(kunmap_atomic_notypecheck); -EXPORT_SYMBOL(kmap_atomic_prot); EXPORT_SYMBOL(kmap_atomic_to_page); void __init set_highmem_pages_init(void) diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index c0e28a13de7d..947f42abe820 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -364,8 +364,9 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end) /* * We just marked the kernel text read only above, now that * we are going to free part of that, we need to make that - * writeable first. + * writeable and non-executable first. */ + set_memory_nx(begin, (end - begin) >> PAGE_SHIFT); set_memory_rw(begin, (end - begin) >> PAGE_SHIFT); printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10); diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 5d0a6711c282..f89b5bb4e93f 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -226,7 +226,7 @@ page_table_range_init(unsigned long start, unsigned long end, pgd_t *pgd_base) static inline int is_kernel_text(unsigned long addr) { - if (addr >= PAGE_OFFSET && addr <= (unsigned long)__init_end) + if (addr >= (unsigned long)_text && addr <= (unsigned long)__init_end) return 1; return 0; } @@ -528,48 +528,6 @@ static void __init pagetable_init(void) permanent_kmaps_init(pgd_base); } -#ifdef CONFIG_ACPI_SLEEP -/* - * ACPI suspend needs this for resume, because things like the intel-agp - * driver might have split up a kernel 4MB mapping. - */ -char swsusp_pg_dir[PAGE_SIZE] - __attribute__ ((aligned(PAGE_SIZE))); - -static inline void save_pg_dir(void) -{ - copy_page(swsusp_pg_dir, swapper_pg_dir); -} -#else /* !CONFIG_ACPI_SLEEP */ -static inline void save_pg_dir(void) -{ -} -#endif /* !CONFIG_ACPI_SLEEP */ - -void zap_low_mappings(bool early) -{ - int i; - - /* - * Zap initial low-memory mappings. - * - * Note that "pgd_clear()" doesn't do it for - * us, because pgd_clear() is a no-op on i386. - */ - for (i = 0; i < KERNEL_PGD_BOUNDARY; i++) { -#ifdef CONFIG_X86_PAE - set_pgd(swapper_pg_dir+i, __pgd(1 + __pa(empty_zero_page))); -#else - set_pgd(swapper_pg_dir+i, __pgd(0)); -#endif - } - - if (early) - __flush_tlb(); - else - flush_tlb_all(); -} - pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL | _PAGE_IOMAP); EXPORT_SYMBOL_GPL(__supported_pte_mask); @@ -882,9 +840,6 @@ void __init mem_init(void) if (boot_cpu_data.wp_works_ok < 0) test_wp_bit(); - - save_pg_dir(); - zap_low_mappings(true); } #ifdef CONFIG_MEMORY_HOTPLUG @@ -957,6 +912,23 @@ void set_kernel_text_ro(void) set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); } +static void mark_nxdata_nx(void) +{ + /* + * When this called, init has already been executed and released, + * so everything past _etext sould be NX. + */ + unsigned long start = PFN_ALIGN(_etext); + /* + * This comes from is_kernel_text upper limit. Also HPAGE where used: + */ + unsigned long size = (((unsigned long)__init_end + HPAGE_SIZE) & HPAGE_MASK) - start; + + if (__supported_pte_mask & _PAGE_NX) + printk(KERN_INFO "NX-protecting the kernel data: %luk\n", size >> 10); + set_pages_nx(virt_to_page(start), size >> PAGE_SHIFT); +} + void mark_rodata_ro(void) { unsigned long start = PFN_ALIGN(_text); @@ -991,6 +963,7 @@ void mark_rodata_ro(void) printk(KERN_INFO "Testing CPA: write protecting again\n"); set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); #endif + mark_nxdata_nx(); } #endif diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 84346200e783..71a59296af80 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -51,7 +51,6 @@ #include <asm/numa.h> #include <asm/cacheflush.h> #include <asm/init.h> -#include <linux/bootmem.h> static int __init parse_direct_gbpages_off(char *arg) { diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c index 72fc70cf6184..7b179b499fa3 100644 --- a/arch/x86/mm/iomap_32.c +++ b/arch/x86/mm/iomap_32.c @@ -48,21 +48,20 @@ int iomap_create_wc(resource_size_t base, unsigned long size, pgprot_t *prot) } EXPORT_SYMBOL_GPL(iomap_create_wc); -void -iomap_free(resource_size_t base, unsigned long size) +void iomap_free(resource_size_t base, unsigned long size) { io_free_memtype(base, base + size); } EXPORT_SYMBOL_GPL(iomap_free); -void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot) +void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot) { - enum fixed_addresses idx; unsigned long vaddr; + int idx, type; pagefault_disable(); - debug_kmap_atomic(type); + type = kmap_atomic_idx_push(); idx = type + KM_TYPE_NR * smp_processor_id(); vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); set_pte(kmap_pte - idx, pfn_pte(pfn, prot)); @@ -72,10 +71,10 @@ void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot) } /* - * Map 'pfn' using fixed map 'type' and protections 'prot' + * Map 'pfn' using protections 'prot' */ void __iomem * -iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot) +iomap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot) { /* * For non-PAT systems, promote PAGE_KERNEL_WC to PAGE_KERNEL_UC_MINUS. @@ -86,24 +85,34 @@ iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot) if (!pat_enabled && pgprot_val(prot) == pgprot_val(PAGE_KERNEL_WC)) prot = PAGE_KERNEL_UC_MINUS; - return (void __force __iomem *) kmap_atomic_prot_pfn(pfn, type, prot); + return (void __force __iomem *) kmap_atomic_prot_pfn(pfn, prot); } EXPORT_SYMBOL_GPL(iomap_atomic_prot_pfn); void -iounmap_atomic(void __iomem *kvaddr, enum km_type type) +iounmap_atomic(void __iomem *kvaddr) { unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; - enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id(); - /* - * Force other mappings to Oops if they'll try to access this pte - * without first remap it. Keeping stale mappings around is a bad idea - * also, in case the page changes cacheability attributes or becomes - * a protected page in a hypervisor. - */ - if (vaddr == __fix_to_virt(FIX_KMAP_BEGIN+idx)) + if (vaddr >= __fix_to_virt(FIX_KMAP_END) && + vaddr <= __fix_to_virt(FIX_KMAP_BEGIN)) { + int idx, type; + + type = kmap_atomic_idx(); + idx = type + KM_TYPE_NR * smp_processor_id(); + +#ifdef CONFIG_DEBUG_HIGHMEM + WARN_ON_ONCE(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx)); +#endif + /* + * Force other mappings to Oops if they'll try to access this + * pte without first remap it. Keeping stale mappings around + * is a bad idea also, in case the page changes cacheability + * attributes or becomes a protected page in a hypervisor. + */ kpte_clear_flush(kmap_pte-idx, vaddr); + kmap_atomic_idx_pop(); + } pagefault_enable(); } diff --git a/arch/x86/mm/kmemcheck/error.c b/arch/x86/mm/kmemcheck/error.c index af3b6c8a436f..704a37cedddb 100644 --- a/arch/x86/mm/kmemcheck/error.c +++ b/arch/x86/mm/kmemcheck/error.c @@ -185,7 +185,7 @@ void kmemcheck_error_save(enum kmemcheck_shadow state, e->trace.entries = e->trace_entries; e->trace.max_entries = ARRAY_SIZE(e->trace_entries); e->trace.skip = 0; - save_stack_trace_bp(&e->trace, regs->bp); + save_stack_trace_regs(&e->trace, regs); /* Round address down to nearest 16 bytes */ shadow_copy = kmemcheck_shadow_lookup(address diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 60f498511dd6..7762a517d69d 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c @@ -178,11 +178,8 @@ static void * __init early_node_mem(int nodeid, unsigned long start, /* extend the search scope */ end = max_pfn_mapped << PAGE_SHIFT; - if (end > (MAX_DMA32_PFN<<PAGE_SHIFT)) - start = MAX_DMA32_PFN<<PAGE_SHIFT; - else - start = MAX_DMA_PFN<<PAGE_SHIFT; - mem = memblock_x86_find_in_range_node(nodeid, start, end, size, align); + start = MAX_DMA_PFN << PAGE_SHIFT; + mem = memblock_find_in_range(start, end, size, align); if (mem != MEMBLOCK_ERROR) return __va(mem); @@ -267,7 +264,7 @@ static struct bootnode physnodes[MAX_NUMNODES] __initdata; static char *cmdline __initdata; static int __init setup_physnodes(unsigned long start, unsigned long end, - int acpi, int k8) + int acpi, int amd) { int nr_nodes = 0; int ret = 0; @@ -277,13 +274,13 @@ static int __init setup_physnodes(unsigned long start, unsigned long end, if (acpi) nr_nodes = acpi_get_nodes(physnodes); #endif -#ifdef CONFIG_K8_NUMA - if (k8) - nr_nodes = k8_get_nodes(physnodes); +#ifdef CONFIG_AMD_NUMA + if (amd) + nr_nodes = amd_get_nodes(physnodes); #endif /* * Basic sanity checking on the physical node map: there may be errors - * if the SRAT or K8 incorrectly reported the topology or the mem= + * if the SRAT or AMD code incorrectly reported the topology or the mem= * kernel parameter is used. */ for (i = 0; i < nr_nodes; i++) { @@ -552,7 +549,7 @@ static int __init split_nodes_size_interleave(u64 addr, u64 max_addr, u64 size) * numa=fake command-line option. */ static int __init numa_emulation(unsigned long start_pfn, - unsigned long last_pfn, int acpi, int k8) + unsigned long last_pfn, int acpi, int amd) { u64 addr = start_pfn << PAGE_SHIFT; u64 max_addr = last_pfn << PAGE_SHIFT; @@ -560,7 +557,7 @@ static int __init numa_emulation(unsigned long start_pfn, int num_nodes; int i; - num_phys_nodes = setup_physnodes(addr, max_addr, acpi, k8); + num_phys_nodes = setup_physnodes(addr, max_addr, acpi, amd); /* * If the numa=fake command-line contains a 'M' or 'G', it represents * the fixed node size. Otherwise, if it is just a single number N, @@ -605,7 +602,7 @@ static int __init numa_emulation(unsigned long start_pfn, #endif /* CONFIG_NUMA_EMU */ void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn, - int acpi, int k8) + int acpi, int amd) { int i; @@ -613,7 +610,7 @@ void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn, nodes_clear(node_online_map); #ifdef CONFIG_NUMA_EMU - if (cmdline && !numa_emulation(start_pfn, last_pfn, acpi, k8)) + if (cmdline && !numa_emulation(start_pfn, last_pfn, acpi, amd)) return; nodes_clear(node_possible_map); nodes_clear(node_online_map); @@ -627,8 +624,8 @@ void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn, nodes_clear(node_online_map); #endif -#ifdef CONFIG_K8_NUMA - if (!numa_off && k8 && !k8_scan_nodes()) +#ifdef CONFIG_AMD_NUMA + if (!numa_off && amd && !amd_scan_nodes()) return; nodes_clear(node_possible_map); nodes_clear(node_online_map); diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 532e7933d606..8b830ca14ac4 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -13,6 +13,7 @@ #include <linux/pfn.h> #include <linux/percpu.h> #include <linux/gfp.h> +#include <linux/pci.h> #include <asm/e820.h> #include <asm/processor.h> @@ -255,13 +256,16 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, unsigned long pfn) { pgprot_t forbidden = __pgprot(0); + pgprot_t required = __pgprot(0); /* * The BIOS area between 640k and 1Mb needs to be executable for * PCI BIOS based config access (CONFIG_PCI_GOBIOS) support. */ - if (within(pfn, BIOS_BEGIN >> PAGE_SHIFT, BIOS_END >> PAGE_SHIFT)) +#ifdef CONFIG_PCI_BIOS + if (pcibios_enabled && within(pfn, BIOS_BEGIN >> PAGE_SHIFT, BIOS_END >> PAGE_SHIFT)) pgprot_val(forbidden) |= _PAGE_NX; +#endif /* * The kernel text needs to be executable for obvious reasons @@ -278,6 +282,12 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, if (within(pfn, __pa((unsigned long)__start_rodata) >> PAGE_SHIFT, __pa((unsigned long)__end_rodata) >> PAGE_SHIFT)) pgprot_val(forbidden) |= _PAGE_RW; + /* + * .data and .bss should always be writable. + */ + if (within(address, (unsigned long)_sdata, (unsigned long)_edata) || + within(address, (unsigned long)__bss_start, (unsigned long)__bss_stop)) + pgprot_val(required) |= _PAGE_RW; #if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA) /* @@ -317,6 +327,7 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, #endif prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden)); + prot = __pgprot(pgprot_val(prot) | pgprot_val(required)); return prot; } @@ -393,7 +404,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, { unsigned long nextpage_addr, numpages, pmask, psize, flags, addr, pfn; pte_t new_pte, old_pte, *tmp; - pgprot_t old_prot, new_prot; + pgprot_t old_prot, new_prot, req_prot; int i, do_split = 1; unsigned int level; @@ -438,10 +449,10 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, * We are safe now. Check whether the new pgprot is the same: */ old_pte = *kpte; - old_prot = new_prot = pte_pgprot(old_pte); + old_prot = new_prot = req_prot = pte_pgprot(old_pte); - pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr); - pgprot_val(new_prot) |= pgprot_val(cpa->mask_set); + pgprot_val(req_prot) &= ~pgprot_val(cpa->mask_clr); + pgprot_val(req_prot) |= pgprot_val(cpa->mask_set); /* * old_pte points to the large page base address. So we need @@ -450,17 +461,17 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, pfn = pte_pfn(old_pte) + ((address & (psize - 1)) >> PAGE_SHIFT); cpa->pfn = pfn; - new_prot = static_protections(new_prot, address, pfn); + new_prot = static_protections(req_prot, address, pfn); /* * We need to check the full range, whether * static_protection() requires a different pgprot for one of * the pages in the range we try to preserve: */ - addr = address + PAGE_SIZE; - pfn++; - for (i = 1; i < cpa->numpages; i++, addr += PAGE_SIZE, pfn++) { - pgprot_t chk_prot = static_protections(new_prot, addr, pfn); + addr = address & pmask; + pfn = pte_pfn(old_pte); + for (i = 0; i < (psize >> PAGE_SHIFT); i++, addr += PAGE_SIZE, pfn++) { + pgprot_t chk_prot = static_protections(req_prot, addr, pfn); if (pgprot_val(chk_prot) != pgprot_val(new_prot)) goto out_unlock; @@ -483,7 +494,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, * that we limited the number of possible pages already to * the number of pages in the large page. */ - if (address == (nextpage_addr - psize) && cpa->numpages == numpages) { + if (address == (address & pmask) && cpa->numpages == (psize >> PAGE_SHIFT)) { /* * The address is aligned and the number of pages * covers the full page. diff --git a/arch/x86/mm/setup_nx.c b/arch/x86/mm/setup_nx.c index a3250aa34086..410531d3c292 100644 --- a/arch/x86/mm/setup_nx.c +++ b/arch/x86/mm/setup_nx.c @@ -41,7 +41,7 @@ void __init x86_report_nx(void) { if (!cpu_has_nx) { printk(KERN_NOTICE "Notice: NX (Execute Disable) protection " - "missing in CPU or disabled in BIOS!\n"); + "missing in CPU!\n"); } else { #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) if (disable_nx) { diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c index a17dffd136c1..f16434568a51 100644 --- a/arch/x86/mm/srat_32.c +++ b/arch/x86/mm/srat_32.c @@ -92,6 +92,7 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *cpu_affinity) /* mark this node as "seen" in node bitmap */ BMAP_SET(pxm_bitmap, cpu_affinity->proximity_domain_lo); + /* don't need to check apic_id here, because it is always 8 bits */ apicid_to_pxm[cpu_affinity->apic_id] = cpu_affinity->proximity_domain_lo; printk(KERN_DEBUG "CPU %02x in proximity domain %02x\n", diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index a35cb9d8b060..171a0aacb99a 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c @@ -134,6 +134,10 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa) } apic_id = pa->apic_id; + if (apic_id >= MAX_LOCAL_APIC) { + printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u skipped apicid that is too big\n", pxm, apic_id, node); + return; + } apicid_to_node[apic_id] = node; node_set(node, cpu_nodes_parsed); acpi_numa = 1; @@ -168,6 +172,12 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) apic_id = (pa->apic_id << 8) | pa->local_sapic_eid; else apic_id = pa->apic_id; + + if (apic_id >= MAX_LOCAL_APIC) { + printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u skipped apicid that is too big\n", pxm, apic_id, node); + return; + } + apicid_to_node[apic_id] = node; node_set(node, cpu_nodes_parsed); acpi_numa = 1; diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 49358481c733..6acc724d5d8f 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -223,7 +223,7 @@ void native_flush_tlb_others(const struct cpumask *cpumask, static void __cpuinit calculate_tlb_offset(void) { - int cpu, node, nr_node_vecs; + int cpu, node, nr_node_vecs, idx = 0; /* * we are changing tlb_vector_offset for each CPU in runtime, but this * will not cause inconsistency, as the write is atomic under X86. we @@ -239,7 +239,7 @@ static void __cpuinit calculate_tlb_offset(void) nr_node_vecs = NUM_INVALIDATE_TLB_VECTORS/nr_online_nodes; for_each_online_node(node) { - int node_offset = (node % NUM_INVALIDATE_TLB_VECTORS) * + int node_offset = (idx % NUM_INVALIDATE_TLB_VECTORS) * nr_node_vecs; int cpu_offset = 0; for_each_cpu(cpu, cpumask_of_node(node)) { @@ -248,10 +248,11 @@ static void __cpuinit calculate_tlb_offset(void) cpu_offset++; cpu_offset = cpu_offset % nr_node_vecs; } + idx++; } } -static int tlb_cpuhp_notify(struct notifier_block *n, +static int __cpuinit tlb_cpuhp_notify(struct notifier_block *n, unsigned long action, void *hcpu) { switch (action & 0xf) { |