diff options
Diffstat (limited to 'arch/x86/mm')
-rw-r--r-- | arch/x86/mm/Makefile | 6 | ||||
-rw-r--r-- | arch/x86/mm/fault.c | 70 | ||||
-rw-r--r-- | arch/x86/mm/highmem_32.c | 4 | ||||
-rw-r--r-- | arch/x86/mm/init.c | 18 | ||||
-rw-r--r-- | arch/x86/mm/init_32.c | 6 | ||||
-rw-r--r-- | arch/x86/mm/init_64.c | 13 | ||||
-rw-r--r-- | arch/x86/mm/iomap_32.c | 27 | ||||
-rw-r--r-- | arch/x86/mm/ioremap.c | 90 | ||||
-rw-r--r-- | arch/x86/mm/kmemcheck/kmemcheck.c | 14 | ||||
-rw-r--r-- | arch/x86/mm/mmap.c | 17 | ||||
-rw-r--r-- | arch/x86/mm/pageattr.c | 99 | ||||
-rw-r--r-- | arch/x86/mm/pat.c | 358 | ||||
-rw-r--r-- | arch/x86/mm/pgtable.c | 7 | ||||
-rw-r--r-- | arch/x86/mm/physaddr.c | 70 | ||||
-rw-r--r-- | arch/x86/mm/physaddr.h | 10 | ||||
-rw-r--r-- | arch/x86/mm/srat_32.c | 4 | ||||
-rw-r--r-- | arch/x86/mm/srat_64.c | 6 | ||||
-rw-r--r-- | arch/x86/mm/tlb.c | 21 |
18 files changed, 534 insertions, 306 deletions
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index eefdeee8a871..9b5a9f59a478 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -1,5 +1,9 @@ obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ - pat.o pgtable.o gup.o + pat.o pgtable.o physaddr.o gup.o + +# Make sure __phys_addr has no stackprotector +nostackp := $(call cc-option, -fno-stack-protector) +CFLAGS_physaddr.o := $(nostackp) obj-$(CONFIG_SMP) += tlb.o diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 78a5fff857be..82728f2c6d55 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -10,7 +10,7 @@ #include <linux/bootmem.h> /* max_low_pfn */ #include <linux/kprobes.h> /* __kprobes, ... */ #include <linux/mmiotrace.h> /* kmmio_handler, ... */ -#include <linux/perf_counter.h> /* perf_swcounter_event */ +#include <linux/perf_event.h> /* perf_sw_event */ #include <asm/traps.h> /* dotraplinkage, ... */ #include <asm/pgalloc.h> /* pgd_*(), ... */ @@ -285,26 +285,25 @@ check_v8086_mode(struct pt_regs *regs, unsigned long address, tsk->thread.screen_bitmap |= 1 << bit; } -static void dump_pagetable(unsigned long address) +static bool low_pfn(unsigned long pfn) { - __typeof__(pte_val(__pte(0))) page; + return pfn < max_low_pfn; +} - page = read_cr3(); - page = ((__typeof__(page) *) __va(page))[address >> PGDIR_SHIFT]; +static void dump_pagetable(unsigned long address) +{ + pgd_t *base = __va(read_cr3()); + pgd_t *pgd = &base[pgd_index(address)]; + pmd_t *pmd; + pte_t *pte; #ifdef CONFIG_X86_PAE - printk("*pdpt = %016Lx ", page); - if ((page >> PAGE_SHIFT) < max_low_pfn - && page & _PAGE_PRESENT) { - page &= PAGE_MASK; - page = ((__typeof__(page) *) __va(page))[(address >> PMD_SHIFT) - & (PTRS_PER_PMD - 1)]; - printk(KERN_CONT "*pde = %016Lx ", page); - page &= ~_PAGE_NX; - } -#else - printk("*pde = %08lx ", page); + printk("*pdpt = %016Lx ", pgd_val(*pgd)); + if (!low_pfn(pgd_val(*pgd) >> PAGE_SHIFT) || !pgd_present(*pgd)) + goto out; #endif + pmd = pmd_offset(pud_offset(pgd, address), address); + printk(KERN_CONT "*pde = %0*Lx ", sizeof(*pmd) * 2, (u64)pmd_val(*pmd)); /* * We must not directly access the pte in the highpte @@ -312,16 +311,12 @@ static void dump_pagetable(unsigned long address) * And let's rather not kmap-atomic the pte, just in case * it's allocated already: */ - if ((page >> PAGE_SHIFT) < max_low_pfn - && (page & _PAGE_PRESENT) - && !(page & _PAGE_PSE)) { - - page &= PAGE_MASK; - page = ((__typeof__(page) *) __va(page))[(address >> PAGE_SHIFT) - & (PTRS_PER_PTE - 1)]; - printk("*pte = %0*Lx ", sizeof(page)*2, (u64)page); - } + if (!low_pfn(pmd_pfn(*pmd)) || !pmd_present(*pmd) || pmd_large(*pmd)) + goto out; + pte = pte_offset_kernel(pmd, address); + printk("*pte = %0*Lx ", sizeof(*pte) * 2, (u64)pte_val(*pte)); +out: printk("\n"); } @@ -426,10 +421,11 @@ static noinline int vmalloc_fault(unsigned long address) } static const char errata93_warning[] = -KERN_ERR "******* Your BIOS seems to not contain a fix for K8 errata #93\n" -KERN_ERR "******* Working around it, but it may cause SEGVs or burn power.\n" -KERN_ERR "******* Please consider a BIOS update.\n" -KERN_ERR "******* Disabling USB legacy in the BIOS may also help.\n"; +KERN_ERR +"******* Your BIOS seems to not contain a fix for K8 errata #93\n" +"******* Working around it, but it may cause SEGVs or burn power.\n" +"******* Please consider a BIOS update.\n" +"******* Disabling USB legacy in the BIOS may also help.\n"; /* * No vm86 mode in 64-bit mode: @@ -449,16 +445,12 @@ static int bad_address(void *p) static void dump_pagetable(unsigned long address) { - pgd_t *pgd; + pgd_t *base = __va(read_cr3() & PHYSICAL_PAGE_MASK); + pgd_t *pgd = base + pgd_index(address); pud_t *pud; pmd_t *pmd; pte_t *pte; - pgd = (pgd_t *)read_cr3(); - - pgd = __va((unsigned long)pgd & PHYSICAL_PAGE_MASK); - - pgd += pgd_index(address); if (bad_address(pgd)) goto bad; @@ -696,7 +688,7 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code, if (!printk_ratelimit()) return; - printk(KERN_CONT "%s%s[%d]: segfault at %lx ip %p sp %p error %lx", + printk("%s%s[%d]: segfault at %lx ip %p sp %p error %lx", task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, tsk->comm, task_pid_nr(tsk), address, (void *)regs->ip, (void *)regs->sp, error_code); @@ -1025,7 +1017,7 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code) if (unlikely(error_code & PF_RSVD)) pgtable_bad(regs, error_code, address); - perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); /* * If we're in an interrupt, have no user context or are running @@ -1122,11 +1114,11 @@ good_area: if (fault & VM_FAULT_MAJOR) { tsk->maj_flt++; - perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, regs, address); } else { tsk->min_flt++; - perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, regs, address); } diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c index 58f621e81919..63a6ba66cbe0 100644 --- a/arch/x86/mm/highmem_32.c +++ b/arch/x86/mm/highmem_32.c @@ -24,7 +24,7 @@ void kunmap(struct page *page) * no global lock is needed and because the kmap code must perform a global TLB * invalidation when the kmap pool wraps. * - * However when holding an atomic kmap is is not legal to sleep, so atomic + * However when holding an atomic kmap it is not legal to sleep, so atomic * kmaps are appropriate for short, tight code paths only. */ void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot) @@ -103,6 +103,8 @@ EXPORT_SYMBOL(kmap); EXPORT_SYMBOL(kunmap); EXPORT_SYMBOL(kmap_atomic); EXPORT_SYMBOL(kunmap_atomic); +EXPORT_SYMBOL(kmap_atomic_prot); +EXPORT_SYMBOL(kmap_atomic_to_page); void __init set_highmem_pages_init(void) { diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index f53b57e4086f..0607119cef94 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -12,6 +12,7 @@ #include <asm/system.h> #include <asm/tlbflush.h> #include <asm/tlb.h> +#include <asm/proto.h> DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); @@ -177,20 +178,6 @@ static int __meminit save_mr(struct map_range *mr, int nr_range, return nr_range; } -#ifdef CONFIG_X86_64 -static void __init init_gbpages(void) -{ - if (direct_gbpages && cpu_has_gbpages) - printk(KERN_INFO "Using GB pages for direct mapping\n"); - else - direct_gbpages = 0; -} -#else -static inline void init_gbpages(void) -{ -} -#endif - /* * Setup the direct mapping of the physical memory at PAGE_OFFSET. * This runs before bootmem is initialized and gets pages directly from @@ -210,9 +197,6 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, printk(KERN_INFO "init_memory_mapping: %016lx-%016lx\n", start, end); - if (!after_bootmem) - init_gbpages(); - #if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KMEMCHECK) /* * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages. diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 3cd7711bb949..b49b4f67453d 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -84,7 +84,7 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd) #ifdef CONFIG_X86_PAE if (!(pgd_val(*pgd) & _PAGE_PRESENT)) { if (after_bootmem) - pmd_table = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE); + pmd_table = (pmd_t *)alloc_bootmem_pages(PAGE_SIZE); else pmd_table = (pmd_t *)alloc_low_page(); paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT); @@ -116,7 +116,7 @@ static pte_t * __init one_page_table_init(pmd_t *pmd) #endif if (!page_table) page_table = - (pte_t *)alloc_bootmem_low_pages(PAGE_SIZE); + (pte_t *)alloc_bootmem_pages(PAGE_SIZE); } else page_table = (pte_t *)alloc_low_page(); @@ -892,7 +892,7 @@ void __init mem_init(void) printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, " "%dk reserved, %dk data, %dk init, %ldk highmem)\n", - (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), + nr_free_pages() << (PAGE_SHIFT-10), num_physpages << (PAGE_SHIFT-10), codesize >> 10, reservedpages << (PAGE_SHIFT-10), diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index c4378f4fd4a5..810bd31e7f5f 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -598,6 +598,15 @@ void __init paging_init(void) sparse_memory_present_with_active_regions(MAX_NUMNODES); sparse_init(); + + /* + * clear the default setting with node 0 + * note: don't use nodes_clear here, that is really clearing when + * numa support is not compiled in, and later node_set_state + * will not set it back. + */ + node_clear_state(0, N_NORMAL_MEMORY); + free_area_init_nodes(max_zone_pfns); } @@ -678,7 +687,7 @@ void __init mem_init(void) printk(KERN_INFO "Memory: %luk/%luk available (%ldk kernel code, " "%ldk absent, %ldk reserved, %ldk data, %ldk init)\n", - (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), + nr_free_pages() << (PAGE_SHIFT-10), max_pfn << (PAGE_SHIFT-10), codesize >> 10, absent_pages << (PAGE_SHIFT-10), @@ -787,7 +796,7 @@ int __init reserve_bootmem_generic(unsigned long phys, unsigned long len, return ret; #else - reserve_bootmem(phys, len, BOOTMEM_DEFAULT); + reserve_bootmem(phys, len, flags); #endif if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) { diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c index fe6f84ca121e..84e236ce76ba 100644 --- a/arch/x86/mm/iomap_32.c +++ b/arch/x86/mm/iomap_32.c @@ -21,7 +21,7 @@ #include <linux/module.h> #include <linux/highmem.h> -int is_io_mapping_possible(resource_size_t base, unsigned long size) +static int is_io_mapping_possible(resource_size_t base, unsigned long size) { #if !defined(CONFIG_X86_PAE) && defined(CONFIG_PHYS_ADDR_T_64BIT) /* There is no way to map greater than 1 << 32 address without PAE */ @@ -30,7 +30,30 @@ int is_io_mapping_possible(resource_size_t base, unsigned long size) #endif return 1; } -EXPORT_SYMBOL_GPL(is_io_mapping_possible); + +int iomap_create_wc(resource_size_t base, unsigned long size, pgprot_t *prot) +{ + unsigned long flag = _PAGE_CACHE_WC; + int ret; + + if (!is_io_mapping_possible(base, size)) + return -EINVAL; + + ret = io_reserve_memtype(base, base + size, &flag); + if (ret) + return ret; + + *prot = __pgprot(__PAGE_KERNEL | flag); + return 0; +} +EXPORT_SYMBOL_GPL(iomap_create_wc); + +void +iomap_free(resource_size_t base, unsigned long size) +{ + io_free_memtype(base, base + size); +} +EXPORT_SYMBOL_GPL(iomap_free); void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot) { diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 8a450930834f..334e63ca7b2b 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -22,77 +22,7 @@ #include <asm/pgalloc.h> #include <asm/pat.h> -static inline int phys_addr_valid(resource_size_t addr) -{ -#ifdef CONFIG_PHYS_ADDR_T_64BIT - return !(addr >> boot_cpu_data.x86_phys_bits); -#else - return 1; -#endif -} - -#ifdef CONFIG_X86_64 - -unsigned long __phys_addr(unsigned long x) -{ - if (x >= __START_KERNEL_map) { - x -= __START_KERNEL_map; - VIRTUAL_BUG_ON(x >= KERNEL_IMAGE_SIZE); - x += phys_base; - } else { - VIRTUAL_BUG_ON(x < PAGE_OFFSET); - x -= PAGE_OFFSET; - VIRTUAL_BUG_ON(!phys_addr_valid(x)); - } - return x; -} -EXPORT_SYMBOL(__phys_addr); - -bool __virt_addr_valid(unsigned long x) -{ - if (x >= __START_KERNEL_map) { - x -= __START_KERNEL_map; - if (x >= KERNEL_IMAGE_SIZE) - return false; - x += phys_base; - } else { - if (x < PAGE_OFFSET) - return false; - x -= PAGE_OFFSET; - if (!phys_addr_valid(x)) - return false; - } - - return pfn_valid(x >> PAGE_SHIFT); -} -EXPORT_SYMBOL(__virt_addr_valid); - -#else - -#ifdef CONFIG_DEBUG_VIRTUAL -unsigned long __phys_addr(unsigned long x) -{ - /* VMALLOC_* aren't constants */ - VIRTUAL_BUG_ON(x < PAGE_OFFSET); - VIRTUAL_BUG_ON(__vmalloc_start_set && is_vmalloc_addr((void *) x)); - return x - PAGE_OFFSET; -} -EXPORT_SYMBOL(__phys_addr); -#endif - -bool __virt_addr_valid(unsigned long x) -{ - if (x < PAGE_OFFSET) - return false; - if (__vmalloc_start_set && is_vmalloc_addr((void *) x)) - return false; - if (x >= FIXADDR_START) - return false; - return pfn_valid((x - PAGE_OFFSET) >> PAGE_SHIFT); -} -EXPORT_SYMBOL(__virt_addr_valid); - -#endif +#include "physaddr.h" int page_is_ram(unsigned long pagenr) { @@ -228,24 +158,14 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, retval = reserve_memtype(phys_addr, (u64)phys_addr + size, prot_val, &new_prot_val); if (retval) { - pr_debug("Warning: reserve_memtype returned %d\n", retval); + printk(KERN_ERR "ioremap reserve_memtype failed %d\n", retval); return NULL; } if (prot_val != new_prot_val) { - /* - * Do not fallback to certain memory types with certain - * requested type: - * - request is uc-, return cannot be write-back - * - request is uc-, return cannot be write-combine - * - request is write-combine, return cannot be write-back - */ - if ((prot_val == _PAGE_CACHE_UC_MINUS && - (new_prot_val == _PAGE_CACHE_WB || - new_prot_val == _PAGE_CACHE_WC)) || - (prot_val == _PAGE_CACHE_WC && - new_prot_val == _PAGE_CACHE_WB)) { - pr_debug( + if (!is_new_memtype_allowed(phys_addr, size, + prot_val, new_prot_val)) { + printk(KERN_ERR "ioremap error for 0x%llx-0x%llx, requested 0x%lx, got 0x%lx\n", (unsigned long long)phys_addr, (unsigned long long)(phys_addr + size), diff --git a/arch/x86/mm/kmemcheck/kmemcheck.c b/arch/x86/mm/kmemcheck/kmemcheck.c index 5b99004fb4be..8cc183344140 100644 --- a/arch/x86/mm/kmemcheck/kmemcheck.c +++ b/arch/x86/mm/kmemcheck/kmemcheck.c @@ -328,6 +328,20 @@ static void kmemcheck_read_strict(struct pt_regs *regs, kmemcheck_shadow_set(shadow, size); } +bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size) +{ + enum kmemcheck_shadow status; + void *shadow; + + shadow = kmemcheck_shadow_lookup(addr); + if (!shadow) + return true; + + status = kmemcheck_shadow_test(shadow, size); + + return status == KMEMCHECK_SHADOW_INITIALIZED; +} + /* Access may cross page boundary */ static void kmemcheck_read(struct pt_regs *regs, unsigned long addr, unsigned int size) diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index 165829600566..c8191defc38a 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c @@ -29,13 +29,26 @@ #include <linux/random.h> #include <linux/limits.h> #include <linux/sched.h> +#include <asm/elf.h> + +static unsigned int stack_maxrandom_size(void) +{ + unsigned int max = 0; + if ((current->flags & PF_RANDOMIZE) && + !(current->personality & ADDR_NO_RANDOMIZE)) { + max = ((-1U) & STACK_RND_MASK) << PAGE_SHIFT; + } + + return max; +} + /* * Top of mmap area (just below the process stack). * - * Leave an at least ~128 MB hole. + * Leave an at least ~128 MB hole with possible stack randomization. */ -#define MIN_GAP (128*1024*1024) +#define MIN_GAP (128*1024*1024UL + stack_maxrandom_size()) #define MAX_GAP (TASK_SIZE/6*5) /* diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 3cfe9ced8a4c..24952fdc7e40 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -11,6 +11,8 @@ #include <linux/interrupt.h> #include <linux/seq_file.h> #include <linux/debugfs.h> +#include <linux/pfn.h> +#include <linux/percpu.h> #include <asm/e820.h> #include <asm/processor.h> @@ -590,9 +592,12 @@ static int __change_page_attr(struct cpa_data *cpa, int primary) unsigned int level; pte_t *kpte, old_pte; - if (cpa->flags & CPA_PAGES_ARRAY) - address = (unsigned long)page_address(cpa->pages[cpa->curpage]); - else if (cpa->flags & CPA_ARRAY) + if (cpa->flags & CPA_PAGES_ARRAY) { + struct page *page = cpa->pages[cpa->curpage]; + if (unlikely(PageHighMem(page))) + return 0; + address = (unsigned long)page_address(page); + } else if (cpa->flags & CPA_ARRAY) address = cpa->vaddr[cpa->curpage]; else address = *cpa->vaddr; @@ -681,8 +686,9 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias); static int cpa_process_alias(struct cpa_data *cpa) { struct cpa_data alias_cpa; - int ret = 0; - unsigned long temp_cpa_vaddr, vaddr; + unsigned long laddr = (unsigned long)__va(cpa->pfn << PAGE_SHIFT); + unsigned long vaddr; + int ret; if (cpa->pfn >= max_pfn_mapped) return 0; @@ -695,9 +701,12 @@ static int cpa_process_alias(struct cpa_data *cpa) * No need to redo, when the primary call touched the direct * mapping already: */ - if (cpa->flags & CPA_PAGES_ARRAY) - vaddr = (unsigned long)page_address(cpa->pages[cpa->curpage]); - else if (cpa->flags & CPA_ARRAY) + if (cpa->flags & CPA_PAGES_ARRAY) { + struct page *page = cpa->pages[cpa->curpage]; + if (unlikely(PageHighMem(page))) + return 0; + vaddr = (unsigned long)page_address(page); + } else if (cpa->flags & CPA_ARRAY) vaddr = cpa->vaddr[cpa->curpage]; else vaddr = *cpa->vaddr; @@ -706,42 +715,37 @@ static int cpa_process_alias(struct cpa_data *cpa) PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT)))) { alias_cpa = *cpa; - temp_cpa_vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT); - alias_cpa.vaddr = &temp_cpa_vaddr; + alias_cpa.vaddr = &laddr; alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY); - ret = __change_page_attr_set_clr(&alias_cpa, 0); + if (ret) + return ret; } #ifdef CONFIG_X86_64 - if (ret) - return ret; /* - * No need to redo, when the primary call touched the high - * mapping already: - */ - if (within(vaddr, (unsigned long) _text, _brk_end)) - return 0; - - /* - * If the physical address is inside the kernel map, we need + * If the primary call didn't touch the high mapping already + * and the physical address is inside the kernel map, we need * to touch the high mapped kernel as well: */ - if (!within(cpa->pfn, highmap_start_pfn(), highmap_end_pfn())) - return 0; - - alias_cpa = *cpa; - temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) + __START_KERNEL_map - phys_base; - alias_cpa.vaddr = &temp_cpa_vaddr; - alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY); + if (!within(vaddr, (unsigned long)_text, _brk_end) && + within(cpa->pfn, highmap_start_pfn(), highmap_end_pfn())) { + unsigned long temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) + + __START_KERNEL_map - phys_base; + alias_cpa = *cpa; + alias_cpa.vaddr = &temp_cpa_vaddr; + alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY); - /* - * The high mapping range is imprecise, so ignore the return value. - */ - __change_page_attr_set_clr(&alias_cpa, 0); + /* + * The high mapping range is imprecise, so ignore the + * return value. + */ + __change_page_attr_set_clr(&alias_cpa, 0); + } #endif - return ret; + + return 0; } static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias) @@ -801,6 +805,7 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, { struct cpa_data cpa; int ret, cache, checkalias; + unsigned long baddr = 0; /* * Check, if we are requested to change a not supported @@ -832,6 +837,11 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, */ WARN_ON_ONCE(1); } + /* + * Save address for cache flush. *addr is modified in the call + * to __change_page_attr_set_clr() below. + */ + baddr = *addr; } /* Must avoid aliasing mappings in the highmem code */ @@ -879,7 +889,7 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, cpa_flush_array(addr, numpages, cache, cpa.flags, pages); } else - cpa_flush_range(*addr, numpages, cache); + cpa_flush_range(baddr, numpages, cache); } else cpa_flush_all(cache); @@ -982,12 +992,15 @@ EXPORT_SYMBOL(set_memory_array_uc); int _set_memory_wc(unsigned long addr, int numpages) { int ret; + unsigned long addr_copy = addr; + ret = change_page_attr_set(&addr, numpages, __pgprot(_PAGE_CACHE_UC_MINUS), 0); - if (!ret) { - ret = change_page_attr_set(&addr, numpages, - __pgprot(_PAGE_CACHE_WC), 0); + ret = change_page_attr_set_clr(&addr_copy, numpages, + __pgprot(_PAGE_CACHE_WC), + __pgprot(_PAGE_CACHE_MASK), + 0, 0, NULL); } return ret; } @@ -1104,7 +1117,9 @@ int set_pages_array_uc(struct page **pages, int addrinarray) int free_idx; for (i = 0; i < addrinarray; i++) { - start = (unsigned long)page_address(pages[i]); + if (PageHighMem(pages[i])) + continue; + start = page_to_pfn(pages[i]) << PAGE_SHIFT; end = start + PAGE_SIZE; if (reserve_memtype(start, end, _PAGE_CACHE_UC_MINUS, NULL)) goto err_out; @@ -1117,7 +1132,9 @@ int set_pages_array_uc(struct page **pages, int addrinarray) err_out: free_idx = i; for (i = 0; i < free_idx; i++) { - start = (unsigned long)page_address(pages[i]); + if (PageHighMem(pages[i])) + continue; + start = page_to_pfn(pages[i]) << PAGE_SHIFT; end = start + PAGE_SIZE; free_memtype(start, end); } @@ -1146,7 +1163,9 @@ int set_pages_array_wb(struct page **pages, int addrinarray) return retval; for (i = 0; i < addrinarray; i++) { - start = (unsigned long)page_address(pages[i]); + if (PageHighMem(pages[i])) + continue; + start = page_to_pfn(pages[i]) << PAGE_SHIFT; end = start + PAGE_SIZE; free_memtype(start, end); } diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index e6718bb28065..7257cf3decf9 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -15,6 +15,7 @@ #include <linux/gfp.h> #include <linux/mm.h> #include <linux/fs.h> +#include <linux/rbtree.h> #include <asm/cacheflush.h> #include <asm/processor.h> @@ -148,11 +149,10 @@ static char *cattr_name(unsigned long flags) * areas). All the aliases have the same cache attributes of course. * Zero attributes are represented as holes. * - * Currently the data structure is a list because the number of mappings - * are expected to be relatively small. If this should be a problem - * it could be changed to a rbtree or similar. + * The data structure is a list that is also organized as an rbtree + * sorted on the start address of memtype range. * - * memtype_lock protects the whole list. + * memtype_lock protects both the linear list and rbtree. */ struct memtype { @@ -160,11 +160,53 @@ struct memtype { u64 end; unsigned long type; struct list_head nd; + struct rb_node rb; }; +static struct rb_root memtype_rbroot = RB_ROOT; static LIST_HEAD(memtype_list); static DEFINE_SPINLOCK(memtype_lock); /* protects memtype list */ +static struct memtype *memtype_rb_search(struct rb_root *root, u64 start) +{ + struct rb_node *node = root->rb_node; + struct memtype *last_lower = NULL; + + while (node) { + struct memtype *data = container_of(node, struct memtype, rb); + + if (data->start < start) { + last_lower = data; + node = node->rb_right; + } else if (data->start > start) { + node = node->rb_left; + } else + return data; + } + + /* Will return NULL if there is no entry with its start <= start */ + return last_lower; +} + +static void memtype_rb_insert(struct rb_root *root, struct memtype *data) +{ + struct rb_node **new = &(root->rb_node); + struct rb_node *parent = NULL; + + while (*new) { + struct memtype *this = container_of(*new, struct memtype, rb); + + parent = *new; + if (data->start <= this->start) + new = &((*new)->rb_left); + else if (data->start > this->start) + new = &((*new)->rb_right); + } + + rb_link_node(&data->rb, parent, new); + rb_insert_color(&data->rb, root); +} + /* * Does intersection of PAT memory type and MTRR memory type and returns * the resulting memory type as PAT understands it. @@ -218,9 +260,6 @@ chk_conflict(struct memtype *new, struct memtype *entry, unsigned long *type) return -EBUSY; } -static struct memtype *cached_entry; -static u64 cached_start; - static int pat_pagerange_is_ram(unsigned long start, unsigned long end) { int ram_page = 0, not_rampage = 0; @@ -249,63 +288,61 @@ static int pat_pagerange_is_ram(unsigned long start, unsigned long end) } /* - * For RAM pages, mark the pages as non WB memory type using - * PageNonWB (PG_arch_1). We allow only one set_memory_uc() or - * set_memory_wc() on a RAM page at a time before marking it as WB again. - * This is ok, because only one driver will be owning the page and - * doing set_memory_*() calls. + * For RAM pages, we use page flags to mark the pages with appropriate type. + * Here we do two pass: + * - Find the memtype of all the pages in the range, look for any conflicts + * - In case of no conflicts, set the new memtype for pages in the range * - * For now, we use PageNonWB to track that the RAM page is being mapped - * as non WB. In future, we will have to use one more flag - * (or some other mechanism in page_struct) to distinguish between - * UC and WC mapping. + * Caller must hold memtype_lock for atomicity. */ static int reserve_ram_pages_type(u64 start, u64 end, unsigned long req_type, unsigned long *new_type) { struct page *page; - u64 pfn, end_pfn; + u64 pfn; + + if (req_type == _PAGE_CACHE_UC) { + /* We do not support strong UC */ + WARN_ON_ONCE(1); + req_type = _PAGE_CACHE_UC_MINUS; + } for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) { - page = pfn_to_page(pfn); - if (page_mapped(page) || PageNonWB(page)) - goto out; + unsigned long type; - SetPageNonWB(page); + page = pfn_to_page(pfn); + type = get_page_memtype(page); + if (type != -1) { + printk(KERN_INFO "reserve_ram_pages_type failed " + "0x%Lx-0x%Lx, track 0x%lx, req 0x%lx\n", + start, end, type, req_type); + if (new_type) + *new_type = type; + + return -EBUSY; + } } - return 0; -out: - end_pfn = pfn; - for (pfn = (start >> PAGE_SHIFT); pfn < end_pfn; ++pfn) { + if (new_type) + *new_type = req_type; + + for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) { page = pfn_to_page(pfn); - ClearPageNonWB(page); + set_page_memtype(page, req_type); } - - return -EINVAL; + return 0; } static int free_ram_pages_type(u64 start, u64 end) { struct page *page; - u64 pfn, end_pfn; + u64 pfn; for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) { page = pfn_to_page(pfn); - if (page_mapped(page) || !PageNonWB(page)) - goto out; - - ClearPageNonWB(page); + set_page_memtype(page, -1); } return 0; - -out: - end_pfn = pfn; - for (pfn = (start >> PAGE_SHIFT); pfn < end_pfn; ++pfn) { - page = pfn_to_page(pfn); - SetPageNonWB(page); - } - return -EINVAL; } /* @@ -339,6 +376,8 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, if (new_type) { if (req_type == -1) *new_type = _PAGE_CACHE_WB; + else if (req_type == _PAGE_CACHE_WC) + *new_type = _PAGE_CACHE_UC_MINUS; else *new_type = req_type & _PAGE_CACHE_MASK; } @@ -364,11 +403,16 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, *new_type = actual_type; is_range_ram = pat_pagerange_is_ram(start, end); - if (is_range_ram == 1) - return reserve_ram_pages_type(start, end, req_type, - new_type); - else if (is_range_ram < 0) + if (is_range_ram == 1) { + + spin_lock(&memtype_lock); + err = reserve_ram_pages_type(start, end, req_type, new_type); + spin_unlock(&memtype_lock); + + return err; + } else if (is_range_ram < 0) { return -EINVAL; + } new = kmalloc(sizeof(struct memtype), GFP_KERNEL); if (!new) @@ -380,17 +424,11 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, spin_lock(&memtype_lock); - if (cached_entry && start >= cached_start) - entry = cached_entry; - else - entry = list_entry(&memtype_list, struct memtype, nd); - /* Search for existing mapping that overlaps the current range */ where = NULL; - list_for_each_entry_continue(entry, &memtype_list, nd) { + list_for_each_entry(entry, &memtype_list, nd) { if (end <= entry->start) { where = entry->nd.prev; - cached_entry = list_entry(where, struct memtype, nd); break; } else if (start <= entry->start) { /* end > entry->start */ err = chk_conflict(new, entry, new_type); @@ -398,8 +436,6 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, dprintk("Overlap at 0x%Lx-0x%Lx\n", entry->start, entry->end); where = entry->nd.prev; - cached_entry = list_entry(where, - struct memtype, nd); } break; } else if (start < entry->end) { /* start > entry->start */ @@ -407,8 +443,6 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, if (!err) { dprintk("Overlap at 0x%Lx-0x%Lx\n", entry->start, entry->end); - cached_entry = list_entry(entry->nd.prev, - struct memtype, nd); /* * Move to right position in the linked @@ -436,13 +470,13 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, return err; } - cached_start = start; - if (where) list_add(&new->nd, where); else list_add_tail(&new->nd, &memtype_list); + memtype_rb_insert(&memtype_rbroot, new); + spin_unlock(&memtype_lock); dprintk("reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n", @@ -454,7 +488,7 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, int free_memtype(u64 start, u64 end) { - struct memtype *entry; + struct memtype *entry, *saved_entry; int err = -EINVAL; int is_range_ram; @@ -466,23 +500,58 @@ int free_memtype(u64 start, u64 end) return 0; is_range_ram = pat_pagerange_is_ram(start, end); - if (is_range_ram == 1) - return free_ram_pages_type(start, end); - else if (is_range_ram < 0) + if (is_range_ram == 1) { + + spin_lock(&memtype_lock); + err = free_ram_pages_type(start, end); + spin_unlock(&memtype_lock); + + return err; + } else if (is_range_ram < 0) { return -EINVAL; + } spin_lock(&memtype_lock); - list_for_each_entry(entry, &memtype_list, nd) { + + entry = memtype_rb_search(&memtype_rbroot, start); + if (unlikely(entry == NULL)) + goto unlock_ret; + + /* + * Saved entry points to an entry with start same or less than what + * we searched for. Now go through the list in both directions to look + * for the entry that matches with both start and end, with list stored + * in sorted start address + */ + saved_entry = entry; + list_for_each_entry_from(entry, &memtype_list, nd) { if (entry->start == start && entry->end == end) { - if (cached_entry == entry || cached_start == start) - cached_entry = NULL; + rb_erase(&entry->rb, &memtype_rbroot); + list_del(&entry->nd); + kfree(entry); + err = 0; + break; + } else if (entry->start > start) { + break; + } + } + + if (!err) + goto unlock_ret; + entry = saved_entry; + list_for_each_entry_reverse(entry, &memtype_list, nd) { + if (entry->start == start && entry->end == end) { + rb_erase(&entry->rb, &memtype_rbroot); list_del(&entry->nd); kfree(entry); err = 0; break; + } else if (entry->start < start) { + break; } } +unlock_ret: spin_unlock(&memtype_lock); if (err) { @@ -496,6 +565,101 @@ int free_memtype(u64 start, u64 end) } +/** + * lookup_memtype - Looksup the memory type for a physical address + * @paddr: physical address of which memory type needs to be looked up + * + * Only to be called when PAT is enabled + * + * Returns _PAGE_CACHE_WB, _PAGE_CACHE_WC, _PAGE_CACHE_UC_MINUS or + * _PAGE_CACHE_UC + */ +static unsigned long lookup_memtype(u64 paddr) +{ + int rettype = _PAGE_CACHE_WB; + struct memtype *entry; + + if (is_ISA_range(paddr, paddr + PAGE_SIZE - 1)) + return rettype; + + if (pat_pagerange_is_ram(paddr, paddr + PAGE_SIZE)) { + struct page *page; + spin_lock(&memtype_lock); + page = pfn_to_page(paddr >> PAGE_SHIFT); + rettype = get_page_memtype(page); + spin_unlock(&memtype_lock); + /* + * -1 from get_page_memtype() implies RAM page is in its + * default state and not reserved, and hence of type WB + */ + if (rettype == -1) + rettype = _PAGE_CACHE_WB; + + return rettype; + } + + spin_lock(&memtype_lock); + + entry = memtype_rb_search(&memtype_rbroot, paddr); + if (entry != NULL) + rettype = entry->type; + else + rettype = _PAGE_CACHE_UC_MINUS; + + spin_unlock(&memtype_lock); + return rettype; +} + +/** + * io_reserve_memtype - Request a memory type mapping for a region of memory + * @start: start (physical address) of the region + * @end: end (physical address) of the region + * @type: A pointer to memtype, with requested type. On success, requested + * or any other compatible type that was available for the region is returned + * + * On success, returns 0 + * On failure, returns non-zero + */ +int io_reserve_memtype(resource_size_t start, resource_size_t end, + unsigned long *type) +{ + resource_size_t size = end - start; + unsigned long req_type = *type; + unsigned long new_type; + int ret; + + WARN_ON_ONCE(iomem_map_sanity_check(start, size)); + + ret = reserve_memtype(start, end, req_type, &new_type); + if (ret) + goto out_err; + + if (!is_new_memtype_allowed(start, size, req_type, new_type)) + goto out_free; + + if (kernel_map_sync_memtype(start, size, new_type) < 0) + goto out_free; + + *type = new_type; + return 0; + +out_free: + free_memtype(start, end); + ret = -EBUSY; +out_err: + return ret; +} + +/** + * io_free_memtype - Release a memory type mapping for a region of memory + * @start: start (physical address) of the region + * @end: end (physical address) of the region + */ +void io_free_memtype(resource_size_t start, resource_size_t end) +{ + free_memtype(start, end); +} + pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, unsigned long size, pgprot_t vma_prot) { @@ -577,7 +741,7 @@ int kernel_map_sync_memtype(u64 base, unsigned long size, unsigned long flags) { unsigned long id_sz; - if (!pat_enabled || base >= __pa(high_memory)) + if (base >= __pa(high_memory)) return 0; id_sz = (__pa(high_memory) < base + size) ? @@ -612,18 +776,37 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot, is_ram = pat_pagerange_is_ram(paddr, paddr + size); /* - * reserve_pfn_range() doesn't support RAM pages. Maintain the current - * behavior with RAM pages by returning success. + * reserve_pfn_range() for RAM pages. We do not refcount to keep + * track of number of mappings of RAM pages. We can assert that + * the type requested matches the type of first page in the range. */ - if (is_ram != 0) + if (is_ram) { + if (!pat_enabled) + return 0; + + flags = lookup_memtype(paddr); + if (want_flags != flags) { + printk(KERN_WARNING + "%s:%d map pfn RAM range req %s for %Lx-%Lx, got %s\n", + current->comm, current->pid, + cattr_name(want_flags), + (unsigned long long)paddr, + (unsigned long long)(paddr + size), + cattr_name(flags)); + *vma_prot = __pgprot((pgprot_val(*vma_prot) & + (~_PAGE_CACHE_MASK)) | + flags); + } return 0; + } ret = reserve_memtype(paddr, paddr + size, want_flags, &flags); if (ret) return ret; if (flags != want_flags) { - if (strict_prot || !is_new_memtype_allowed(want_flags, flags)) { + if (strict_prot || + !is_new_memtype_allowed(paddr, size, want_flags, flags)) { free_memtype(paddr, paddr + size); printk(KERN_ERR "%s:%d map pfn expected mapping type %s" " for %Lx-%Lx, got %s\n", @@ -677,14 +860,6 @@ int track_pfn_vma_copy(struct vm_area_struct *vma) unsigned long vma_size = vma->vm_end - vma->vm_start; pgprot_t pgprot; - if (!pat_enabled) - return 0; - - /* - * For now, only handle remap_pfn_range() vmas where - * is_linear_pfn_mapping() == TRUE. Handling of - * vm_insert_pfn() is TBD. - */ if (is_linear_pfn_mapping(vma)) { /* * reserve the whole chunk covered by vma. We need the @@ -712,23 +887,24 @@ int track_pfn_vma_copy(struct vm_area_struct *vma) int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot, unsigned long pfn, unsigned long size) { + unsigned long flags; resource_size_t paddr; unsigned long vma_size = vma->vm_end - vma->vm_start; - if (!pat_enabled) - return 0; - - /* - * For now, only handle remap_pfn_range() vmas where - * is_linear_pfn_mapping() == TRUE. Handling of - * vm_insert_pfn() is TBD. - */ if (is_linear_pfn_mapping(vma)) { /* reserve the whole chunk starting from vm_pgoff */ paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT; return reserve_pfn_range(paddr, vma_size, prot, 0); } + if (!pat_enabled) + return 0; + + /* for vm_insert_pfn and friends, we set prot based on lookup */ + flags = lookup_memtype(pfn << PAGE_SHIFT); + *prot = __pgprot((pgprot_val(vma->vm_page_prot) & (~_PAGE_CACHE_MASK)) | + flags); + return 0; } @@ -743,14 +919,6 @@ void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn, resource_size_t paddr; unsigned long vma_size = vma->vm_end - vma->vm_start; - if (!pat_enabled) - return; - - /* - * For now, only handle remap_pfn_range() vmas where - * is_linear_pfn_mapping() == TRUE. Handling of - * vm_insert_pfn() is TBD. - */ if (is_linear_pfn_mapping(vma)) { /* free the whole chunk starting from vm_pgoff */ paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT; @@ -826,7 +994,7 @@ static int memtype_seq_show(struct seq_file *seq, void *v) return 0; } -static struct seq_operations memtype_seq_ops = { +static const struct seq_operations memtype_seq_ops = { .start = memtype_seq_start, .next = memtype_seq_next, .stop = memtype_seq_stop, diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 8e43bdd45456..ed34f5e35999 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -25,7 +25,7 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) return pte; } -void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte) +void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte) { pgtable_page_dtor(pte); paravirt_release_pte(page_to_pfn(pte)); @@ -33,14 +33,14 @@ void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte) } #if PAGETABLE_LEVELS > 2 -void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) +void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) { paravirt_release_pmd(__pa(pmd) >> PAGE_SHIFT); tlb_remove_page(tlb, virt_to_page(pmd)); } #if PAGETABLE_LEVELS > 3 -void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud) +void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud) { paravirt_release_pud(__pa(pud) >> PAGE_SHIFT); tlb_remove_page(tlb, virt_to_page(pud)); @@ -329,7 +329,6 @@ void __init reserve_top_address(unsigned long reserve) printk(KERN_INFO "Reserving virtual address space above 0x%08x\n", (int)-reserve); __FIXADDR_TOP = -reserve - PAGE_SIZE; - __VMALLOC_RESERVE += reserve; #endif } diff --git a/arch/x86/mm/physaddr.c b/arch/x86/mm/physaddr.c new file mode 100644 index 000000000000..d2e2735327b4 --- /dev/null +++ b/arch/x86/mm/physaddr.c @@ -0,0 +1,70 @@ +#include <linux/mmdebug.h> +#include <linux/module.h> +#include <linux/mm.h> + +#include <asm/page.h> + +#include "physaddr.h" + +#ifdef CONFIG_X86_64 + +unsigned long __phys_addr(unsigned long x) +{ + if (x >= __START_KERNEL_map) { + x -= __START_KERNEL_map; + VIRTUAL_BUG_ON(x >= KERNEL_IMAGE_SIZE); + x += phys_base; + } else { + VIRTUAL_BUG_ON(x < PAGE_OFFSET); + x -= PAGE_OFFSET; + VIRTUAL_BUG_ON(!phys_addr_valid(x)); + } + return x; +} +EXPORT_SYMBOL(__phys_addr); + +bool __virt_addr_valid(unsigned long x) +{ + if (x >= __START_KERNEL_map) { + x -= __START_KERNEL_map; + if (x >= KERNEL_IMAGE_SIZE) + return false; + x += phys_base; + } else { + if (x < PAGE_OFFSET) + return false; + x -= PAGE_OFFSET; + if (!phys_addr_valid(x)) + return false; + } + + return pfn_valid(x >> PAGE_SHIFT); +} +EXPORT_SYMBOL(__virt_addr_valid); + +#else + +#ifdef CONFIG_DEBUG_VIRTUAL +unsigned long __phys_addr(unsigned long x) +{ + /* VMALLOC_* aren't constants */ + VIRTUAL_BUG_ON(x < PAGE_OFFSET); + VIRTUAL_BUG_ON(__vmalloc_start_set && is_vmalloc_addr((void *) x)); + return x - PAGE_OFFSET; +} +EXPORT_SYMBOL(__phys_addr); +#endif + +bool __virt_addr_valid(unsigned long x) +{ + if (x < PAGE_OFFSET) + return false; + if (__vmalloc_start_set && is_vmalloc_addr((void *) x)) + return false; + if (x >= FIXADDR_START) + return false; + return pfn_valid((x - PAGE_OFFSET) >> PAGE_SHIFT); +} +EXPORT_SYMBOL(__virt_addr_valid); + +#endif /* CONFIG_X86_64 */ diff --git a/arch/x86/mm/physaddr.h b/arch/x86/mm/physaddr.h new file mode 100644 index 000000000000..a3cd5a0c97b3 --- /dev/null +++ b/arch/x86/mm/physaddr.h @@ -0,0 +1,10 @@ +#include <asm/processor.h> + +static inline int phys_addr_valid(resource_size_t addr) +{ +#ifdef CONFIG_PHYS_ADDR_T_64BIT + return !(addr >> boot_cpu_data.x86_phys_bits); +#else + return 1; +#endif +} diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c index 29a0e37114f8..6f8aa33031c7 100644 --- a/arch/x86/mm/srat_32.c +++ b/arch/x86/mm/srat_32.c @@ -215,7 +215,7 @@ int __init get_memcfg_from_srat(void) goto out_fail; if (num_memory_chunks == 0) { - printk(KERN_WARNING + printk(KERN_DEBUG "could not find any ACPI SRAT memory areas.\n"); goto out_fail; } @@ -277,7 +277,7 @@ int __init get_memcfg_from_srat(void) } return 1; out_fail: - printk(KERN_ERR "failed to get NUMA memory information from SRAT" + printk(KERN_DEBUG "failed to get NUMA memory information from SRAT" " table\n"); return 0; } diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index 2dfcbf9df2ae..dbb5381f7b3b 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c @@ -79,8 +79,10 @@ static __init void bad_srat(void) acpi_numa = -1; for (i = 0; i < MAX_LOCAL_APIC; i++) apicid_to_node[i] = NUMA_NO_NODE; - for (i = 0; i < MAX_NUMNODES; i++) - nodes_add[i].start = nodes[i].end = 0; + for (i = 0; i < MAX_NUMNODES; i++) { + nodes[i].start = nodes[i].end = 0; + nodes_add[i].start = nodes_add[i].end = 0; + } remove_all_active_ranges(); } diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 821e97017e95..c814e144a3f0 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -183,18 +183,17 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask, f->flush_mm = mm; f->flush_va = va; - cpumask_andnot(to_cpumask(f->flush_cpumask), - cpumask, cpumask_of(smp_processor_id())); - - /* - * We have to send the IPI only to - * CPUs affected. - */ - apic->send_IPI_mask(to_cpumask(f->flush_cpumask), - INVALIDATE_TLB_VECTOR_START + sender); + if (cpumask_andnot(to_cpumask(f->flush_cpumask), cpumask, cpumask_of(smp_processor_id()))) { + /* + * We have to send the IPI only to + * CPUs affected. + */ + apic->send_IPI_mask(to_cpumask(f->flush_cpumask), + INVALIDATE_TLB_VECTOR_START + sender); - while (!cpumask_empty(to_cpumask(f->flush_cpumask))) - cpu_relax(); + while (!cpumask_empty(to_cpumask(f->flush_cpumask))) + cpu_relax(); + } f->flush_mm = NULL; f->flush_va = 0; |