diff options
author | Ingo Molnar <mingo@elte.hu> | 2011-01-04 09:43:42 +0100 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2011-01-04 09:43:42 +0100 |
commit | bc030d6cb9532877c1c5a3f5e7123344fa24a285 (patch) | |
tree | d223d410b868b80d4c0deec192d354a5d06b201a /arch/x86/mm | |
parent | x86, acpi: Parse all SRAT cpu entries even above the cpu number limitation (diff) | |
parent | Linux 2.6.37-rc8 (diff) | |
download | linux-bc030d6cb9532877c1c5a3f5e7123344fa24a285.tar.xz linux-bc030d6cb9532877c1c5a3f5e7123344fa24a285.zip |
Merge commit 'v2.6.37-rc8' into x86/apic
Conflicts:
arch/x86/include/asm/io_apic.h
Merge reason: move to a fresh -rc, resolve the conflict.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/mm')
-rw-r--r-- | arch/x86/mm/fault.c | 63 | ||||
-rw-r--r-- | arch/x86/mm/highmem_32.c | 76 | ||||
-rw-r--r-- | arch/x86/mm/init_32.c | 45 | ||||
-rw-r--r-- | arch/x86/mm/init_64.c | 1 | ||||
-rw-r--r-- | arch/x86/mm/iomap_32.c | 43 | ||||
-rw-r--r-- | arch/x86/mm/numa_64.c | 7 | ||||
-rw-r--r-- | arch/x86/mm/tlb.c | 7 |
7 files changed, 116 insertions, 126 deletions
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 79b0b372d2d0..7d90ceb882a4 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -11,6 +11,7 @@ #include <linux/kprobes.h> /* __kprobes, ... */ #include <linux/mmiotrace.h> /* kmmio_handler, ... */ #include <linux/perf_event.h> /* perf_sw_event */ +#include <linux/hugetlb.h> /* hstate_index_to_shift */ #include <asm/traps.h> /* dotraplinkage, ... */ #include <asm/pgalloc.h> /* pgd_*(), ... */ @@ -160,15 +161,20 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr) static void force_sig_info_fault(int si_signo, int si_code, unsigned long address, - struct task_struct *tsk) + struct task_struct *tsk, int fault) { + unsigned lsb = 0; siginfo_t info; info.si_signo = si_signo; info.si_errno = 0; info.si_code = si_code; info.si_addr = (void __user *)address; - info.si_addr_lsb = si_code == BUS_MCEERR_AR ? PAGE_SHIFT : 0; + if (fault & VM_FAULT_HWPOISON_LARGE) + lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault)); + if (fault & VM_FAULT_HWPOISON) + lsb = PAGE_SHIFT; + info.si_addr_lsb = lsb; force_sig_info(si_signo, &info, tsk); } @@ -722,7 +728,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, tsk->thread.error_code = error_code | (address >= TASK_SIZE); tsk->thread.trap_no = 14; - force_sig_info_fault(SIGSEGV, si_code, address, tsk); + force_sig_info_fault(SIGSEGV, si_code, address, tsk, 0); return; } @@ -807,14 +813,14 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address, tsk->thread.trap_no = 14; #ifdef CONFIG_MEMORY_FAILURE - if (fault & VM_FAULT_HWPOISON) { + if (fault & (VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE)) { printk(KERN_ERR "MCE: Killing %s:%d due to hardware memory corruption fault at %lx\n", tsk->comm, tsk->pid, address); code = BUS_MCEERR_AR; } #endif - force_sig_info_fault(SIGBUS, code, address, tsk); + force_sig_info_fault(SIGBUS, code, address, tsk, fault); } static noinline void @@ -824,7 +830,8 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code, if (fault & VM_FAULT_OOM) { out_of_memory(regs, error_code, address); } else { - if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON)) + if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON| + VM_FAULT_HWPOISON_LARGE)) do_sigbus(regs, error_code, address, fault); else BUG(); @@ -912,9 +919,9 @@ spurious_fault(unsigned long error_code, unsigned long address) int show_unhandled_signals = 1; static inline int -access_error(unsigned long error_code, int write, struct vm_area_struct *vma) +access_error(unsigned long error_code, struct vm_area_struct *vma) { - if (write) { + if (error_code & PF_WRITE) { /* write, present and write, not present: */ if (unlikely(!(vma->vm_flags & VM_WRITE))) return 1; @@ -949,8 +956,10 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code) struct task_struct *tsk; unsigned long address; struct mm_struct *mm; - int write; int fault; + int write = error_code & PF_WRITE; + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | + (write ? FAULT_FLAG_WRITE : 0); tsk = current; mm = tsk->mm; @@ -1061,6 +1070,7 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code) bad_area_nosemaphore(regs, error_code, address); return; } +retry: down_read(&mm->mmap_sem); } else { /* @@ -1104,9 +1114,7 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code) * we can handle it.. */ good_area: - write = error_code & PF_WRITE; - - if (unlikely(access_error(error_code, write, vma))) { + if (unlikely(access_error(error_code, vma))) { bad_area_access_error(regs, error_code, address); return; } @@ -1116,21 +1124,34 @@ good_area: * make sure we exit gracefully rather than endlessly redo * the fault: */ - fault = handle_mm_fault(mm, vma, address, write ? FAULT_FLAG_WRITE : 0); + fault = handle_mm_fault(mm, vma, address, flags); if (unlikely(fault & VM_FAULT_ERROR)) { mm_fault_error(regs, error_code, address, fault); return; } - if (fault & VM_FAULT_MAJOR) { - tsk->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, - regs, address); - } else { - tsk->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, - regs, address); + /* + * Major/minor page fault accounting is only done on the + * initial attempt. If we go through a retry, it is extremely + * likely that the page will be found in page cache at that point. + */ + if (flags & FAULT_FLAG_ALLOW_RETRY) { + if (fault & VM_FAULT_MAJOR) { + tsk->maj_flt++; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, + regs, address); + } else { + tsk->min_flt++; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, + regs, address); + } + if (fault & VM_FAULT_RETRY) { + /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk + * of starvation. */ + flags &= ~FAULT_FLAG_ALLOW_RETRY; + goto retry; + } } check_v8086_mode(regs, address, tsk); diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c index 5e8fa12ef861..b49962662101 100644 --- a/arch/x86/mm/highmem_32.c +++ b/arch/x86/mm/highmem_32.c @@ -9,6 +9,7 @@ void *kmap(struct page *page) return page_address(page); return kmap_high(page); } +EXPORT_SYMBOL(kmap); void kunmap(struct page *page) { @@ -18,6 +19,7 @@ void kunmap(struct page *page) return; kunmap_high(page); } +EXPORT_SYMBOL(kunmap); /* * kmap_atomic/kunmap_atomic is significantly faster than kmap/kunmap because @@ -27,10 +29,10 @@ void kunmap(struct page *page) * However when holding an atomic kmap it is not legal to sleep, so atomic * kmaps are appropriate for short, tight code paths only. */ -void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot) +void *kmap_atomic_prot(struct page *page, pgprot_t prot) { - enum fixed_addresses idx; unsigned long vaddr; + int idx, type; /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ pagefault_disable(); @@ -38,8 +40,7 @@ void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot) if (!PageHighMem(page)) return page_address(page); - debug_kmap_atomic(type); - + type = kmap_atomic_idx_push(); idx = type + KM_TYPE_NR*smp_processor_id(); vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); BUG_ON(!pte_none(*(kmap_pte-idx))); @@ -47,44 +48,57 @@ void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot) return (void *)vaddr; } +EXPORT_SYMBOL(kmap_atomic_prot); + +void *__kmap_atomic(struct page *page) +{ + return kmap_atomic_prot(page, kmap_prot); +} +EXPORT_SYMBOL(__kmap_atomic); -void *kmap_atomic(struct page *page, enum km_type type) +/* + * This is the same as kmap_atomic() but can map memory that doesn't + * have a struct page associated with it. + */ +void *kmap_atomic_pfn(unsigned long pfn) { - return kmap_atomic_prot(page, type, kmap_prot); + return kmap_atomic_prot_pfn(pfn, kmap_prot); } +EXPORT_SYMBOL_GPL(kmap_atomic_pfn); -void kunmap_atomic_notypecheck(void *kvaddr, enum km_type type) +void __kunmap_atomic(void *kvaddr) { unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; - enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id(); - - /* - * Force other mappings to Oops if they'll try to access this pte - * without first remap it. Keeping stale mappings around is a bad idea - * also, in case the page changes cacheability attributes or becomes - * a protected page in a hypervisor. - */ - if (vaddr == __fix_to_virt(FIX_KMAP_BEGIN+idx)) + + if (vaddr >= __fix_to_virt(FIX_KMAP_END) && + vaddr <= __fix_to_virt(FIX_KMAP_BEGIN)) { + int idx, type; + + type = kmap_atomic_idx(); + idx = type + KM_TYPE_NR * smp_processor_id(); + +#ifdef CONFIG_DEBUG_HIGHMEM + WARN_ON_ONCE(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx)); +#endif + /* + * Force other mappings to Oops if they'll try to access this + * pte without first remap it. Keeping stale mappings around + * is a bad idea also, in case the page changes cacheability + * attributes or becomes a protected page in a hypervisor. + */ kpte_clear_flush(kmap_pte-idx, vaddr); - else { + kmap_atomic_idx_pop(); + } #ifdef CONFIG_DEBUG_HIGHMEM + else { BUG_ON(vaddr < PAGE_OFFSET); BUG_ON(vaddr >= (unsigned long)high_memory); -#endif } +#endif pagefault_enable(); } - -/* - * This is the same as kmap_atomic() but can map memory that doesn't - * have a struct page associated with it. - */ -void *kmap_atomic_pfn(unsigned long pfn, enum km_type type) -{ - return kmap_atomic_prot_pfn(pfn, type, kmap_prot); -} -EXPORT_SYMBOL_GPL(kmap_atomic_pfn); /* temporarily in use by i915 GEM until vmap */ +EXPORT_SYMBOL(__kunmap_atomic); struct page *kmap_atomic_to_page(void *ptr) { @@ -98,12 +112,6 @@ struct page *kmap_atomic_to_page(void *ptr) pte = kmap_pte - (idx - FIX_KMAP_BEGIN); return pte_page(*pte); } - -EXPORT_SYMBOL(kmap); -EXPORT_SYMBOL(kunmap); -EXPORT_SYMBOL(kmap_atomic); -EXPORT_SYMBOL(kunmap_atomic_notypecheck); -EXPORT_SYMBOL(kmap_atomic_prot); EXPORT_SYMBOL(kmap_atomic_to_page); void __init set_highmem_pages_init(void) diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 5d0a6711c282..0e969f9f401b 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -528,48 +528,6 @@ static void __init pagetable_init(void) permanent_kmaps_init(pgd_base); } -#ifdef CONFIG_ACPI_SLEEP -/* - * ACPI suspend needs this for resume, because things like the intel-agp - * driver might have split up a kernel 4MB mapping. - */ -char swsusp_pg_dir[PAGE_SIZE] - __attribute__ ((aligned(PAGE_SIZE))); - -static inline void save_pg_dir(void) -{ - copy_page(swsusp_pg_dir, swapper_pg_dir); -} -#else /* !CONFIG_ACPI_SLEEP */ -static inline void save_pg_dir(void) -{ -} -#endif /* !CONFIG_ACPI_SLEEP */ - -void zap_low_mappings(bool early) -{ - int i; - - /* - * Zap initial low-memory mappings. - * - * Note that "pgd_clear()" doesn't do it for - * us, because pgd_clear() is a no-op on i386. - */ - for (i = 0; i < KERNEL_PGD_BOUNDARY; i++) { -#ifdef CONFIG_X86_PAE - set_pgd(swapper_pg_dir+i, __pgd(1 + __pa(empty_zero_page))); -#else - set_pgd(swapper_pg_dir+i, __pgd(0)); -#endif - } - - if (early) - __flush_tlb(); - else - flush_tlb_all(); -} - pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL | _PAGE_IOMAP); EXPORT_SYMBOL_GPL(__supported_pte_mask); @@ -882,9 +840,6 @@ void __init mem_init(void) if (boot_cpu_data.wp_works_ok < 0) test_wp_bit(); - - save_pg_dir(); - zap_low_mappings(true); } #ifdef CONFIG_MEMORY_HOTPLUG diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 84346200e783..71a59296af80 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -51,7 +51,6 @@ #include <asm/numa.h> #include <asm/cacheflush.h> #include <asm/init.h> -#include <linux/bootmem.h> static int __init parse_direct_gbpages_off(char *arg) { diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c index 72fc70cf6184..7b179b499fa3 100644 --- a/arch/x86/mm/iomap_32.c +++ b/arch/x86/mm/iomap_32.c @@ -48,21 +48,20 @@ int iomap_create_wc(resource_size_t base, unsigned long size, pgprot_t *prot) } EXPORT_SYMBOL_GPL(iomap_create_wc); -void -iomap_free(resource_size_t base, unsigned long size) +void iomap_free(resource_size_t base, unsigned long size) { io_free_memtype(base, base + size); } EXPORT_SYMBOL_GPL(iomap_free); -void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot) +void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot) { - enum fixed_addresses idx; unsigned long vaddr; + int idx, type; pagefault_disable(); - debug_kmap_atomic(type); + type = kmap_atomic_idx_push(); idx = type + KM_TYPE_NR * smp_processor_id(); vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); set_pte(kmap_pte - idx, pfn_pte(pfn, prot)); @@ -72,10 +71,10 @@ void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot) } /* - * Map 'pfn' using fixed map 'type' and protections 'prot' + * Map 'pfn' using protections 'prot' */ void __iomem * -iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot) +iomap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot) { /* * For non-PAT systems, promote PAGE_KERNEL_WC to PAGE_KERNEL_UC_MINUS. @@ -86,24 +85,34 @@ iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot) if (!pat_enabled && pgprot_val(prot) == pgprot_val(PAGE_KERNEL_WC)) prot = PAGE_KERNEL_UC_MINUS; - return (void __force __iomem *) kmap_atomic_prot_pfn(pfn, type, prot); + return (void __force __iomem *) kmap_atomic_prot_pfn(pfn, prot); } EXPORT_SYMBOL_GPL(iomap_atomic_prot_pfn); void -iounmap_atomic(void __iomem *kvaddr, enum km_type type) +iounmap_atomic(void __iomem *kvaddr) { unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; - enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id(); - /* - * Force other mappings to Oops if they'll try to access this pte - * without first remap it. Keeping stale mappings around is a bad idea - * also, in case the page changes cacheability attributes or becomes - * a protected page in a hypervisor. - */ - if (vaddr == __fix_to_virt(FIX_KMAP_BEGIN+idx)) + if (vaddr >= __fix_to_virt(FIX_KMAP_END) && + vaddr <= __fix_to_virt(FIX_KMAP_BEGIN)) { + int idx, type; + + type = kmap_atomic_idx(); + idx = type + KM_TYPE_NR * smp_processor_id(); + +#ifdef CONFIG_DEBUG_HIGHMEM + WARN_ON_ONCE(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx)); +#endif + /* + * Force other mappings to Oops if they'll try to access this + * pte without first remap it. Keeping stale mappings around + * is a bad idea also, in case the page changes cacheability + * attributes or becomes a protected page in a hypervisor. + */ kpte_clear_flush(kmap_pte-idx, vaddr); + kmap_atomic_idx_pop(); + } pagefault_enable(); } diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 60f498511dd6..7ffc9b727efd 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c @@ -178,11 +178,8 @@ static void * __init early_node_mem(int nodeid, unsigned long start, /* extend the search scope */ end = max_pfn_mapped << PAGE_SHIFT; - if (end > (MAX_DMA32_PFN<<PAGE_SHIFT)) - start = MAX_DMA32_PFN<<PAGE_SHIFT; - else - start = MAX_DMA_PFN<<PAGE_SHIFT; - mem = memblock_x86_find_in_range_node(nodeid, start, end, size, align); + start = MAX_DMA_PFN << PAGE_SHIFT; + mem = memblock_find_in_range(start, end, size, align); if (mem != MEMBLOCK_ERROR) return __va(mem); diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 49358481c733..6acc724d5d8f 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -223,7 +223,7 @@ void native_flush_tlb_others(const struct cpumask *cpumask, static void __cpuinit calculate_tlb_offset(void) { - int cpu, node, nr_node_vecs; + int cpu, node, nr_node_vecs, idx = 0; /* * we are changing tlb_vector_offset for each CPU in runtime, but this * will not cause inconsistency, as the write is atomic under X86. we @@ -239,7 +239,7 @@ static void __cpuinit calculate_tlb_offset(void) nr_node_vecs = NUM_INVALIDATE_TLB_VECTORS/nr_online_nodes; for_each_online_node(node) { - int node_offset = (node % NUM_INVALIDATE_TLB_VECTORS) * + int node_offset = (idx % NUM_INVALIDATE_TLB_VECTORS) * nr_node_vecs; int cpu_offset = 0; for_each_cpu(cpu, cpumask_of_node(node)) { @@ -248,10 +248,11 @@ static void __cpuinit calculate_tlb_offset(void) cpu_offset++; cpu_offset = cpu_offset % nr_node_vecs; } + idx++; } } -static int tlb_cpuhp_notify(struct notifier_block *n, +static int __cpuinit tlb_cpuhp_notify(struct notifier_block *n, unsigned long action, void *hcpu) { switch (action & 0xf) { |