diff options
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r-- | arch/powerpc/mm/8xx_mmu.c | 2 | ||||
-rw-r--r-- | arch/powerpc/mm/copro_fault.c | 2 | ||||
-rw-r--r-- | arch/powerpc/mm/fault.c | 28 | ||||
-rw-r--r-- | arch/powerpc/mm/hash_native_64.c | 15 | ||||
-rw-r--r-- | arch/powerpc/mm/hash_utils_64.c | 34 | ||||
-rw-r--r-- | arch/powerpc/mm/hugetlbpage.c | 26 | ||||
-rw-r--r-- | arch/powerpc/mm/init_32.c | 7 | ||||
-rw-r--r-- | arch/powerpc/mm/init_64.c | 8 | ||||
-rw-r--r-- | arch/powerpc/mm/mem.c | 25 | ||||
-rw-r--r-- | arch/powerpc/mm/mmu_context_book3s64.c | 24 | ||||
-rw-r--r-- | arch/powerpc/mm/mmu_context_nohash.c | 15 | ||||
-rw-r--r-- | arch/powerpc/mm/mmu_decl.h | 1 | ||||
-rw-r--r-- | arch/powerpc/mm/numa.c | 36 | ||||
-rw-r--r-- | arch/powerpc/mm/pgtable-book3s64.c | 8 | ||||
-rw-r--r-- | arch/powerpc/mm/pgtable-hash64.c | 6 | ||||
-rw-r--r-- | arch/powerpc/mm/pgtable-radix.c | 218 | ||||
-rw-r--r-- | arch/powerpc/mm/pgtable_32.c | 2 | ||||
-rw-r--r-- | arch/powerpc/mm/pgtable_64.c | 5 | ||||
-rw-r--r-- | arch/powerpc/mm/pkeys.c | 17 | ||||
-rw-r--r-- | arch/powerpc/mm/slb.c | 108 | ||||
-rw-r--r-- | arch/powerpc/mm/slb_low.S | 19 | ||||
-rw-r--r-- | arch/powerpc/mm/slice.c | 485 | ||||
-rw-r--r-- | arch/powerpc/mm/tlb-radix.c | 14 | ||||
-rw-r--r-- | arch/powerpc/mm/tlb_hash64.c | 2 |
24 files changed, 662 insertions, 445 deletions
diff --git a/arch/powerpc/mm/8xx_mmu.c b/arch/powerpc/mm/8xx_mmu.c index 849f50cd62f2..cf77d755246d 100644 --- a/arch/powerpc/mm/8xx_mmu.c +++ b/arch/powerpc/mm/8xx_mmu.c @@ -192,7 +192,7 @@ void set_context(unsigned long id, pgd_t *pgd) mtspr(SPRN_M_TW, __pa(pgd) - offset); /* Update context */ - mtspr(SPRN_M_CASID, id); + mtspr(SPRN_M_CASID, id - 1); /* sync */ mb(); } diff --git a/arch/powerpc/mm/copro_fault.c b/arch/powerpc/mm/copro_fault.c index 697b70ad1195..7d0945bd3a61 100644 --- a/arch/powerpc/mm/copro_fault.c +++ b/arch/powerpc/mm/copro_fault.c @@ -112,7 +112,7 @@ int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb) return 1; psize = get_slice_psize(mm, ea); ssize = user_segment_size(ea); - vsid = get_vsid(mm->context.id, ea, ssize); + vsid = get_user_vsid(&mm->context, ea, ssize); vsidkey = SLB_VSID_USER; break; case VMALLOC_REGION_ID: diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 866446cf2d9a..c01d627e687a 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -297,7 +297,12 @@ static bool access_error(bool is_write, bool is_exec, if (unlikely(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))) return true; - + /* + * We should ideally do the vma pkey access check here. But in the + * fault path, handle_mm_fault() also does the same check. To avoid + * these multiple checks, we skip it here and handle access error due + * to pkeys later. + */ return false; } @@ -518,25 +523,16 @@ good_area: #ifdef CONFIG_PPC_MEM_KEYS /* - * if the HPTE is not hashed, hardware will not detect - * a key fault. Lets check if we failed because of a - * software detected key fault. + * we skipped checking for access error due to key earlier. + * Check that using handle_mm_fault error return. */ if (unlikely(fault & VM_FAULT_SIGSEGV) && - !arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE, - is_exec, 0)) { - /* - * The PGD-PDT...PMD-PTE tree may not have been fully setup. - * Hence we cannot walk the tree to locate the PTE, to locate - * the key. Hence let's use vma_pkey() to get the key; instead - * of get_mm_addr_key(). - */ + !arch_vma_access_permitted(vma, is_write, is_exec, 0)) { + int pkey = vma_pkey(vma); - if (likely(pkey)) { - up_read(&mm->mmap_sem); - return bad_key_fault_exception(regs, address, pkey); - } + up_read(&mm->mmap_sem); + return bad_key_fault_exception(regs, address, pkey); } #endif /* CONFIG_PPC_MEM_KEYS */ diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index 656933c85925..1d049c78c82a 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -866,18 +866,6 @@ static void native_flush_hash_range(unsigned long number, int local) local_irq_restore(flags); } -static int native_register_proc_table(unsigned long base, unsigned long page_size, - unsigned long table_size) -{ - unsigned long patb1 = base << 25; /* VSID */ - - patb1 |= (page_size << 5); /* sllp */ - patb1 |= table_size; - - partition_tb->patb1 = cpu_to_be64(patb1); - return 0; -} - void __init hpte_init_native(void) { mmu_hash_ops.hpte_invalidate = native_hpte_invalidate; @@ -889,7 +877,4 @@ void __init hpte_init_native(void) mmu_hash_ops.hpte_clear_all = native_hpte_clear; mmu_hash_ops.flush_hash_range = native_flush_hash_range; mmu_hash_ops.hugepage_invalidate = native_hugepage_invalidate; - - if (cpu_has_feature(CPU_FTR_ARCH_300)) - register_process_table = native_register_proc_table; } diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index cf290d415dcd..0bd3790d35df 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -132,9 +132,10 @@ EXPORT_SYMBOL(mmu_hash_ops); * is provided by the firmware. */ -/* Pre-POWER4 CPUs (4k pages only) +/* + * Fallback (4k pages only) */ -static struct mmu_psize_def mmu_psize_defaults_old[] = { +static struct mmu_psize_def mmu_psize_defaults[] = { [MMU_PAGE_4K] = { .shift = 12, .sllp = 0, @@ -554,8 +555,8 @@ static void __init htab_scan_page_sizes(void) mmu_psize_set_default_penc(); /* Default to 4K pages only */ - memcpy(mmu_psize_defs, mmu_psize_defaults_old, - sizeof(mmu_psize_defaults_old)); + memcpy(mmu_psize_defs, mmu_psize_defaults, + sizeof(mmu_psize_defaults)); /* * Try to find the available page sizes in the device-tree @@ -781,7 +782,7 @@ void resize_hpt_for_hotplug(unsigned long new_mem_size) } } -int hash__create_section_mapping(unsigned long start, unsigned long end) +int hash__create_section_mapping(unsigned long start, unsigned long end, int nid) { int rc = htab_bolt_mapping(start, end, __pa(start), pgprot_val(PAGE_KERNEL), mmu_linear_psize, @@ -875,6 +876,12 @@ static void __init htab_initialize(void) /* Using a hypervisor which owns the htab */ htab_address = NULL; _SDR1 = 0; + /* + * On POWER9, we need to do a H_REGISTER_PROC_TBL hcall + * to inform the hypervisor that we wish to use the HPT. + */ + if (cpu_has_feature(CPU_FTR_ARCH_300)) + register_process_table(0, 0, 0); #ifdef CONFIG_FA_DUMP /* * If firmware assisted dump is active firmware preserves @@ -1110,19 +1117,18 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap) #ifdef CONFIG_PPC_MM_SLICES static unsigned int get_paca_psize(unsigned long addr) { - u64 lpsizes; - unsigned char *hpsizes; + unsigned char *psizes; unsigned long index, mask_index; if (addr < SLICE_LOW_TOP) { - lpsizes = get_paca()->mm_ctx_low_slices_psize; + psizes = get_paca()->mm_ctx_low_slices_psize; index = GET_LOW_SLICE_INDEX(addr); - return (lpsizes >> (index * 4)) & 0xF; + } else { + psizes = get_paca()->mm_ctx_high_slices_psize; + index = GET_HIGH_SLICE_INDEX(addr); } - hpsizes = get_paca()->mm_ctx_high_slices_psize; - index = GET_HIGH_SLICE_INDEX(addr); mask_index = index & 0x1; - return (hpsizes[index >> 1] >> (mask_index * 4)) & 0xF; + return (psizes[index >> 1] >> (mask_index * 4)) & 0xF; } #else @@ -1262,7 +1268,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, } psize = get_slice_psize(mm, ea); ssize = user_segment_size(ea); - vsid = get_vsid(mm->context.id, ea, ssize); + vsid = get_user_vsid(&mm->context, ea, ssize); break; case VMALLOC_REGION_ID: vsid = get_kernel_vsid(ea, mmu_kernel_ssize); @@ -1527,7 +1533,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea, /* Get VSID */ ssize = user_segment_size(ea); - vsid = get_vsid(mm->context.id, ea, ssize); + vsid = get_user_vsid(&mm->context, ea, ssize); if (!vsid) return; /* diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 3a08d211d2ee..f1153f8254e3 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -122,9 +122,6 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, #if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx) #define HUGEPD_PGD_SHIFT PGDIR_SHIFT #define HUGEPD_PUD_SHIFT PUD_SHIFT -#else -#define HUGEPD_PGD_SHIFT PUD_SHIFT -#define HUGEPD_PUD_SHIFT PMD_SHIFT #endif /* @@ -553,9 +550,11 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, struct hstate *hstate = hstate_file(file); int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate)); +#ifdef CONFIG_PPC_RADIX_MMU if (radix_enabled()) return radix__hugetlb_get_unmapped_area(file, addr, len, pgoff, flags); +#endif return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1); } #endif @@ -563,10 +562,12 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, unsigned long vma_mmu_pagesize(struct vm_area_struct *vma) { #ifdef CONFIG_PPC_MM_SLICES - unsigned int psize = get_slice_psize(vma->vm_mm, vma->vm_start); /* With radix we don't use slice, so derive it from vma*/ - if (!radix_enabled()) + if (!radix_enabled()) { + unsigned int psize = get_slice_psize(vma->vm_mm, vma->vm_start); + return 1UL << mmu_psize_to_shift(psize); + } #endif return vma_kernel_pagesize(vma); } @@ -663,15 +664,26 @@ static int __init hugetlbpage_init(void) shift = mmu_psize_to_shift(psize); - if (add_huge_page_size(1ULL << shift) < 0) +#ifdef CONFIG_PPC_BOOK3S_64 + if (shift > PGDIR_SHIFT) continue; - + else if (shift > PUD_SHIFT) + pdshift = PGDIR_SHIFT; + else if (shift > PMD_SHIFT) + pdshift = PUD_SHIFT; + else + pdshift = PMD_SHIFT; +#else if (shift < HUGEPD_PUD_SHIFT) pdshift = PMD_SHIFT; else if (shift < HUGEPD_PGD_SHIFT) pdshift = PUD_SHIFT; else pdshift = PGDIR_SHIFT; +#endif + + if (add_huge_page_size(1ULL << shift) < 0) + continue; /* * if we have pdshift and shift value same, we don't * use pgt cache for hugepd. diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index 6419b33ca309..3e59e5d64b01 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -88,18 +88,13 @@ void MMU_init(void); int __map_without_bats; int __map_without_ltlbs; -/* - * This tells the system to allow ioremapping memory marked as reserved. - */ -int __allow_ioremap_reserved; - /* max amount of low RAM to map in */ unsigned long __max_low_memory = MAX_LOW_MEM; /* * Check for command-line options that affect what MMU_init will do. */ -void __init MMU_setup(void) +static void __init MMU_setup(void) { /* Check for nobats option (used in mapin_ram). */ if (strstr(boot_command_line, "nobats")) { diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index fdb424a29f03..51ce091914f9 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -68,12 +68,6 @@ #include "mmu_decl.h" -#ifdef CONFIG_PPC_BOOK3S_64 -#if H_PGTABLE_RANGE > USER_VSID_RANGE -#warning Limited user VSID range means pagetable space is wasted -#endif -#endif /* CONFIG_PPC_BOOK3S_64 */ - phys_addr_t memstart_addr = ~0; EXPORT_SYMBOL_GPL(memstart_addr); phys_addr_t kernstart_addr; @@ -372,7 +366,7 @@ static int __init parse_disable_radix(char *p) { bool val; - if (strlen(p) == 0) + if (!p) val = true; else if (kstrtobool(p, &val)) return -EINVAL; diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index fe8c61149fb8..737f8a4632cc 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -82,17 +82,7 @@ static inline pte_t *virt_to_kpte(unsigned long vaddr) int page_is_ram(unsigned long pfn) { -#ifndef CONFIG_PPC64 /* XXX for now */ - return pfn < max_pfn; -#else - unsigned long paddr = (pfn << PAGE_SHIFT); - struct memblock_region *reg; - - for_each_memblock(memory, reg) - if (paddr >= reg->base && paddr < (reg->base + reg->size)) - return 1; - return 0; -#endif + return memblock_is_memory(__pfn_to_phys(pfn)); } pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, @@ -117,7 +107,7 @@ int memory_add_physaddr_to_nid(u64 start) } #endif -int __weak create_section_mapping(unsigned long start, unsigned long end) +int __weak create_section_mapping(unsigned long start, unsigned long end, int nid) { return -ENODEV; } @@ -127,7 +117,7 @@ int __weak remove_section_mapping(unsigned long start, unsigned long end) return -ENODEV; } -int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, +int __meminit arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, bool want_memblock) { unsigned long start_pfn = start >> PAGE_SHIFT; @@ -137,7 +127,7 @@ int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, resize_hpt_for_hotplug(memblock_phys_mem_size()); start = (unsigned long)__va(start); - rc = create_section_mapping(start, start + size); + rc = create_section_mapping(start, start + size, nid); if (rc) { pr_warn("Unable to create mapping for hot added memory 0x%llx..0x%llx: %d\n", start, start + size, rc); @@ -148,7 +138,7 @@ int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, } #ifdef CONFIG_MEMORY_HOTREMOVE -int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) +int __meminit arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; @@ -212,7 +202,7 @@ walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages, EXPORT_SYMBOL_GPL(walk_system_ram_range); #ifndef CONFIG_NEED_MULTIPLE_NODES -void __init initmem_init(void) +void __init mem_topology_setup(void) { max_low_pfn = max_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT; min_low_pfn = MEMORY_START >> PAGE_SHIFT; @@ -224,7 +214,10 @@ void __init initmem_init(void) * memblock_regions */ memblock_set_node(0, (phys_addr_t)ULLONG_MAX, &memblock.memory, 0); +} +void __init initmem_init(void) +{ /* XXX need to clip this if using highmem? */ sparse_memory_present_with_active_regions(0); sparse_init(); diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c index 3f980baade4c..b75194dff64c 100644 --- a/arch/powerpc/mm/mmu_context_book3s64.c +++ b/arch/powerpc/mm/mmu_context_book3s64.c @@ -94,13 +94,6 @@ static int hash__init_new_context(struct mm_struct *mm) return index; /* - * In the case of exec, use the default limit, - * otherwise inherit it from the mm we are duplicating. - */ - if (!mm->context.slb_addr_limit) - mm->context.slb_addr_limit = DEFAULT_MAP_WINDOW_USER64; - - /* * The old code would re-promote on fork, we don't do that when using * slices as it could cause problem promoting slices that have been * forced down to 4K. @@ -115,7 +108,7 @@ static int hash__init_new_context(struct mm_struct *mm) * check against 0 is OK. */ if (mm->context.id == 0) - slice_set_user_psize(mm, mmu_virtual_psize); + slice_init_new_context_exec(mm); subpage_prot_init_new_context(mm); @@ -186,6 +179,19 @@ void __destroy_context(int context_id) } EXPORT_SYMBOL_GPL(__destroy_context); +static void destroy_contexts(mm_context_t *ctx) +{ + int index, context_id; + + spin_lock(&mmu_context_lock); + for (index = 0; index < ARRAY_SIZE(ctx->extended_id); index++) { + context_id = ctx->extended_id[index]; + if (context_id) + ida_remove(&mmu_context_ida, context_id); + } + spin_unlock(&mmu_context_lock); +} + #ifdef CONFIG_PPC_64K_PAGES static void destroy_pagetable_page(struct mm_struct *mm) { @@ -224,7 +230,7 @@ void destroy_context(struct mm_struct *mm) else subpage_prot_free(mm); destroy_pagetable_page(mm); - __destroy_context(mm->context.id); + destroy_contexts(&mm->context); mm->context.id = MMU_NO_CONTEXT; } diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c index 4554d6527682..be8f5c9d4d08 100644 --- a/arch/powerpc/mm/mmu_context_nohash.c +++ b/arch/powerpc/mm/mmu_context_nohash.c @@ -331,6 +331,17 @@ int init_new_context(struct task_struct *t, struct mm_struct *mm) { pr_hard("initing context for mm @%p\n", mm); +#ifdef CONFIG_PPC_MM_SLICES + /* + * We have MMU_NO_CONTEXT set to be ~0. Hence check + * explicitly against context.id == 0. This ensures that we properly + * initialize context slice details for newly allocated mm's (which will + * have id == 0) and don't alter context slice inherited via fork (which + * will have id != 0). + */ + if (mm->context.id == 0) + slice_init_new_context_exec(mm); +#endif mm->context.id = MMU_NO_CONTEXT; mm->context.active = 0; return 0; @@ -428,8 +439,8 @@ void __init mmu_context_init(void) * -- BenH */ if (mmu_has_feature(MMU_FTR_TYPE_8xx)) { - first_context = 0; - last_context = 15; + first_context = 1; + last_context = 16; no_selective_tlbil = true; } else if (mmu_has_feature(MMU_FTR_TYPE_47x)) { first_context = 1; diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index 57fbc554c785..c4c0a09a7775 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -98,7 +98,6 @@ extern void setbat(int index, unsigned long virt, phys_addr_t phys, unsigned int size, pgprot_t prot); extern int __map_without_bats; -extern int __allow_ioremap_reserved; extern unsigned int rtas_data, rtas_size; struct hash_pte; diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index edd8d0bc9364..57a5029b4521 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -831,18 +831,13 @@ out: of_node_put(rtas); } -void __init initmem_init(void) +void __init mem_topology_setup(void) { - int nid, cpu; - - max_low_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT; - max_pfn = max_low_pfn; + int cpu; if (parse_numa_properties()) setup_nonnuma(); - memblock_dump_all(); - /* * Modify the set of possible NUMA nodes to reflect information * available about the set of online nodes, and the set of nodes @@ -853,6 +848,23 @@ void __init initmem_init(void) find_possible_nodes(); + setup_node_to_cpumask_map(); + + reset_numa_cpu_lookup_table(); + + for_each_present_cpu(cpu) + numa_setup_cpu(cpu); +} + +void __init initmem_init(void) +{ + int nid; + + max_low_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT; + max_pfn = max_low_pfn; + + memblock_dump_all(); + for_each_online_node(nid) { unsigned long start_pfn, end_pfn; @@ -863,10 +875,6 @@ void __init initmem_init(void) sparse_init(); - setup_node_to_cpumask_map(); - - reset_numa_cpu_lookup_table(); - /* * We need the numa_cpu_lookup_table to be accurate for all CPUs, * even before we online them, so that we can use cpu_to_{node,mem} @@ -876,8 +884,6 @@ void __init initmem_init(void) */ cpuhp_setup_state_nocalls(CPUHP_POWER_NUMA_PREPARE, "powerpc/numa:prepare", ppc_numa_cpu_prepare, ppc_numa_cpu_dead); - for_each_present_cpu(cpu) - numa_setup_cpu(cpu); } static int __init early_numa(char *p) @@ -1105,7 +1111,7 @@ static void setup_cpu_associativity_change_counters(void) for_each_possible_cpu(cpu) { int i; u8 *counts = vphn_cpu_change_counts[cpu]; - volatile u8 *hypervisor_counts = lppaca[cpu].vphn_assoc_counts; + volatile u8 *hypervisor_counts = lppaca_of(cpu).vphn_assoc_counts; for (i = 0; i < distance_ref_points_depth; i++) counts[i] = hypervisor_counts[i]; @@ -1131,7 +1137,7 @@ static int update_cpu_associativity_changes_mask(void) for_each_possible_cpu(cpu) { int i, changed = 0; u8 *counts = vphn_cpu_change_counts[cpu]; - volatile u8 *hypervisor_counts = lppaca[cpu].vphn_assoc_counts; + volatile u8 *hypervisor_counts = lppaca_of(cpu).vphn_assoc_counts; for (i = 0; i < distance_ref_points_depth; i++) { if (hypervisor_counts[i] != counts[i]) { diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c index 422e80253a33..518518fb7c45 100644 --- a/arch/powerpc/mm/pgtable-book3s64.c +++ b/arch/powerpc/mm/pgtable-book3s64.c @@ -155,15 +155,15 @@ void mmu_cleanup_all(void) } #ifdef CONFIG_MEMORY_HOTPLUG -int create_section_mapping(unsigned long start, unsigned long end) +int __meminit create_section_mapping(unsigned long start, unsigned long end, int nid) { if (radix_enabled()) - return radix__create_section_mapping(start, end); + return radix__create_section_mapping(start, end, nid); - return hash__create_section_mapping(start, end); + return hash__create_section_mapping(start, end, nid); } -int remove_section_mapping(unsigned long start, unsigned long end) +int __meminit remove_section_mapping(unsigned long start, unsigned long end) { if (radix_enabled()) return radix__remove_section_mapping(start, end); diff --git a/arch/powerpc/mm/pgtable-hash64.c b/arch/powerpc/mm/pgtable-hash64.c index 469808e77e58..199bfda5f0d9 100644 --- a/arch/powerpc/mm/pgtable-hash64.c +++ b/arch/powerpc/mm/pgtable-hash64.c @@ -24,6 +24,10 @@ #define CREATE_TRACE_POINTS #include <trace/events/thp.h> +#if H_PGTABLE_RANGE > (USER_VSID_RANGE * (TASK_SIZE_USER64 / TASK_CONTEXT_SIZE)) +#warning Limited user VSID range means pagetable space is wasted +#endif + #ifdef CONFIG_SPARSEMEM_VMEMMAP /* * vmemmap is the starting address of the virtual address space where @@ -320,7 +324,7 @@ void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr, if (!is_kernel_addr(addr)) { ssize = user_segment_size(addr); - vsid = get_vsid(mm->context.id, addr, ssize); + vsid = get_user_vsid(&mm->context, addr, ssize); WARN_ON(vsid == 0); } else { vsid = get_kernel_vsid(addr, mmu_kernel_ssize); diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index 2e10a964e290..f1891e215e39 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -48,20 +48,88 @@ static int native_register_process_table(unsigned long base, unsigned long pg_sz return 0; } -static __ref void *early_alloc_pgtable(unsigned long size) +static __ref void *early_alloc_pgtable(unsigned long size, int nid, + unsigned long region_start, unsigned long region_end) { + unsigned long pa = 0; void *pt; - pt = __va(memblock_alloc_base(size, size, MEMBLOCK_ALLOC_ANYWHERE)); + if (region_start || region_end) /* has region hint */ + pa = memblock_alloc_range(size, size, region_start, region_end, + MEMBLOCK_NONE); + else if (nid != -1) /* has node hint */ + pa = memblock_alloc_base_nid(size, size, + MEMBLOCK_ALLOC_ANYWHERE, + nid, MEMBLOCK_NONE); + + if (!pa) + pa = memblock_alloc_base(size, size, MEMBLOCK_ALLOC_ANYWHERE); + + BUG_ON(!pa); + + pt = __va(pa); memset(pt, 0, size); return pt; } -int radix__map_kernel_page(unsigned long ea, unsigned long pa, +static int early_map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t flags, - unsigned int map_page_size) + unsigned int map_page_size, + int nid, + unsigned long region_start, unsigned long region_end) { + unsigned long pfn = pa >> PAGE_SHIFT; + pgd_t *pgdp; + pud_t *pudp; + pmd_t *pmdp; + pte_t *ptep; + + pgdp = pgd_offset_k(ea); + if (pgd_none(*pgdp)) { + pudp = early_alloc_pgtable(PUD_TABLE_SIZE, nid, + region_start, region_end); + pgd_populate(&init_mm, pgdp, pudp); + } + pudp = pud_offset(pgdp, ea); + if (map_page_size == PUD_SIZE) { + ptep = (pte_t *)pudp; + goto set_the_pte; + } + if (pud_none(*pudp)) { + pmdp = early_alloc_pgtable(PMD_TABLE_SIZE, nid, + region_start, region_end); + pud_populate(&init_mm, pudp, pmdp); + } + pmdp = pmd_offset(pudp, ea); + if (map_page_size == PMD_SIZE) { + ptep = pmdp_ptep(pmdp); + goto set_the_pte; + } + if (!pmd_present(*pmdp)) { + ptep = early_alloc_pgtable(PAGE_SIZE, nid, + region_start, region_end); + pmd_populate_kernel(&init_mm, pmdp, ptep); + } + ptep = pte_offset_kernel(pmdp, ea); + +set_the_pte: + set_pte_at(&init_mm, ea, ptep, pfn_pte(pfn, flags)); + smp_wmb(); + return 0; +} + +/* + * nid, region_start, and region_end are hints to try to place the page + * table memory in the same node or region. + */ +static int __map_kernel_page(unsigned long ea, unsigned long pa, + pgprot_t flags, + unsigned int map_page_size, + int nid, + unsigned long region_start, unsigned long region_end) +{ + unsigned long pfn = pa >> PAGE_SHIFT; pgd_t *pgdp; pud_t *pudp; pmd_t *pmdp; @@ -70,61 +138,48 @@ int radix__map_kernel_page(unsigned long ea, unsigned long pa, * Make sure task size is correct as per the max adddr */ BUILD_BUG_ON(TASK_SIZE_USER64 > RADIX_PGTABLE_RANGE); - if (slab_is_available()) { - pgdp = pgd_offset_k(ea); - pudp = pud_alloc(&init_mm, pgdp, ea); - if (!pudp) - return -ENOMEM; - if (map_page_size == PUD_SIZE) { - ptep = (pte_t *)pudp; - goto set_the_pte; - } - pmdp = pmd_alloc(&init_mm, pudp, ea); - if (!pmdp) - return -ENOMEM; - if (map_page_size == PMD_SIZE) { - ptep = pmdp_ptep(pmdp); - goto set_the_pte; - } - ptep = pte_alloc_kernel(pmdp, ea); - if (!ptep) - return -ENOMEM; - } else { - pgdp = pgd_offset_k(ea); - if (pgd_none(*pgdp)) { - pudp = early_alloc_pgtable(PUD_TABLE_SIZE); - BUG_ON(pudp == NULL); - pgd_populate(&init_mm, pgdp, pudp); - } - pudp = pud_offset(pgdp, ea); - if (map_page_size == PUD_SIZE) { - ptep = (pte_t *)pudp; - goto set_the_pte; - } - if (pud_none(*pudp)) { - pmdp = early_alloc_pgtable(PMD_TABLE_SIZE); - BUG_ON(pmdp == NULL); - pud_populate(&init_mm, pudp, pmdp); - } - pmdp = pmd_offset(pudp, ea); - if (map_page_size == PMD_SIZE) { - ptep = pmdp_ptep(pmdp); - goto set_the_pte; - } - if (!pmd_present(*pmdp)) { - ptep = early_alloc_pgtable(PAGE_SIZE); - BUG_ON(ptep == NULL); - pmd_populate_kernel(&init_mm, pmdp, ptep); - } - ptep = pte_offset_kernel(pmdp, ea); + + if (unlikely(!slab_is_available())) + return early_map_kernel_page(ea, pa, flags, map_page_size, + nid, region_start, region_end); + + /* + * Should make page table allocation functions be able to take a + * node, so we can place kernel page tables on the right nodes after + * boot. + */ + pgdp = pgd_offset_k(ea); + pudp = pud_alloc(&init_mm, pgdp, ea); + if (!pudp) + return -ENOMEM; + if (map_page_size == PUD_SIZE) { + ptep = (pte_t *)pudp; + goto set_the_pte; + } + pmdp = pmd_alloc(&init_mm, pudp, ea); + if (!pmdp) + return -ENOMEM; + if (map_page_size == PMD_SIZE) { + ptep = pmdp_ptep(pmdp); + goto set_the_pte; } + ptep = pte_alloc_kernel(pmdp, ea); + if (!ptep) + return -ENOMEM; set_the_pte: - set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, flags)); + set_pte_at(&init_mm, ea, ptep, pfn_pte(pfn, flags)); smp_wmb(); return 0; } +int radix__map_kernel_page(unsigned long ea, unsigned long pa, + pgprot_t flags, + unsigned int map_page_size) +{ + return __map_kernel_page(ea, pa, flags, map_page_size, -1, 0, 0); +} + #ifdef CONFIG_STRICT_KERNEL_RWX void radix__change_memory_range(unsigned long start, unsigned long end, unsigned long clear) @@ -211,7 +266,8 @@ static inline void __meminit print_mapping(unsigned long start, } static int __meminit create_physical_mapping(unsigned long start, - unsigned long end) + unsigned long end, + int nid) { unsigned long vaddr, addr, mapping_size = 0; pgprot_t prot; @@ -267,7 +323,7 @@ retry: else prot = PAGE_KERNEL; - rc = radix__map_kernel_page(vaddr, addr, prot, mapping_size); + rc = __map_kernel_page(vaddr, addr, prot, mapping_size, nid, start, end); if (rc) return rc; } @@ -276,7 +332,7 @@ retry: return 0; } -static void __init radix_init_pgtable(void) +void __init radix_init_pgtable(void) { unsigned long rts_field; struct memblock_region *reg; @@ -286,9 +342,16 @@ static void __init radix_init_pgtable(void) /* * Create the linear mapping, using standard page size for now */ - for_each_memblock(memory, reg) + for_each_memblock(memory, reg) { + /* + * The memblock allocator is up at this point, so the + * page tables will be allocated within the range. No + * need or a node (which we don't have yet). + */ WARN_ON(create_physical_mapping(reg->base, - reg->base + reg->size)); + reg->base + reg->size, + -1)); + } /* Find out how many PID bits are supported */ if (cpu_has_feature(CPU_FTR_HVMODE)) { @@ -317,7 +380,7 @@ static void __init radix_init_pgtable(void) * host. */ BUG_ON(PRTB_SIZE_SHIFT > 36); - process_tb = early_alloc_pgtable(1UL << PRTB_SIZE_SHIFT); + process_tb = early_alloc_pgtable(1UL << PRTB_SIZE_SHIFT, -1, 0, 0); /* * Fill in the process table. */ @@ -575,12 +638,8 @@ void __init radix__early_init_mmu(void) #ifdef CONFIG_PCI pci_io_base = ISA_IO_BASE; #endif - - /* - * For now radix also use the same frag size - */ - __pte_frag_nr = H_PTE_FRAG_NR; - __pte_frag_size_shift = H_PTE_FRAG_SIZE_SHIFT; + __pte_frag_nr = RADIX_PTE_FRAG_NR; + __pte_frag_size_shift = RADIX_PTE_FRAG_SIZE_SHIFT; if (!firmware_has_feature(FW_FEATURE_LPAR)) { radix_init_native(); @@ -695,7 +754,7 @@ struct change_mapping_params { unsigned long aligned_end; }; -static int stop_machine_change_mapping(void *data) +static int __meminit stop_machine_change_mapping(void *data) { struct change_mapping_params *params = (struct change_mapping_params *)data; @@ -705,8 +764,8 @@ static int stop_machine_change_mapping(void *data) spin_unlock(&init_mm.page_table_lock); pte_clear(&init_mm, params->aligned_start, params->pte); - create_physical_mapping(params->aligned_start, params->start); - create_physical_mapping(params->end, params->aligned_end); + create_physical_mapping(params->aligned_start, params->start, -1); + create_physical_mapping(params->end, params->aligned_end, -1); spin_lock(&init_mm.page_table_lock); return 0; } @@ -742,7 +801,7 @@ static void remove_pte_table(pte_t *pte_start, unsigned long addr, /* * clear the pte and potentially split the mapping helper */ -static void split_kernel_mapping(unsigned long addr, unsigned long end, +static void __meminit split_kernel_mapping(unsigned long addr, unsigned long end, unsigned long size, pte_t *pte) { unsigned long mask = ~(size - 1); @@ -835,7 +894,7 @@ static void remove_pud_table(pud_t *pud_start, unsigned long addr, } } -static void remove_pagetable(unsigned long start, unsigned long end) +static void __meminit remove_pagetable(unsigned long start, unsigned long end) { unsigned long addr, next; pud_t *pud_base; @@ -863,12 +922,12 @@ static void remove_pagetable(unsigned long start, unsigned long end) radix__flush_tlb_kernel_range(start, end); } -int __ref radix__create_section_mapping(unsigned long start, unsigned long end) +int __meminit radix__create_section_mapping(unsigned long start, unsigned long end, int nid) { - return create_physical_mapping(start, end); + return create_physical_mapping(start, end, nid); } -int radix__remove_section_mapping(unsigned long start, unsigned long end) +int __meminit radix__remove_section_mapping(unsigned long start, unsigned long end) { remove_pagetable(start, end); return 0; @@ -876,19 +935,30 @@ int radix__remove_section_mapping(unsigned long start, unsigned long end) #endif /* CONFIG_MEMORY_HOTPLUG */ #ifdef CONFIG_SPARSEMEM_VMEMMAP +static int __map_kernel_page_nid(unsigned long ea, unsigned long pa, + pgprot_t flags, unsigned int map_page_size, + int nid) +{ + return __map_kernel_page(ea, pa, flags, map_page_size, nid, 0, 0); +} + int __meminit radix__vmemmap_create_mapping(unsigned long start, unsigned long page_size, unsigned long phys) { /* Create a PTE encoding */ unsigned long flags = _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_KERNEL_RW; + int nid = early_pfn_to_nid(phys >> PAGE_SHIFT); + int ret; + + ret = __map_kernel_page_nid(start, phys, __pgprot(flags), page_size, nid); + BUG_ON(ret); - BUG_ON(radix__map_kernel_page(start, phys, __pgprot(flags), page_size)); return 0; } #ifdef CONFIG_MEMORY_HOTPLUG -void radix__vmemmap_remove_mapping(unsigned long start, unsigned long page_size) +void __meminit radix__vmemmap_remove_mapping(unsigned long start, unsigned long page_size) { remove_pagetable(start, start + page_size); } diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index d35d9ad3c1cd..120a49bfb9c6 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -148,7 +148,7 @@ __ioremap_caller(phys_addr_t addr, unsigned long size, unsigned long flags, * mem_init() sets high_memory so only do the check after that. */ if (slab_is_available() && (p < virt_to_phys(high_memory)) && - !(__allow_ioremap_reserved && memblock_is_region_reserved(p, size))) { + page_is_ram(__phys_to_pfn(p))) { printk("__ioremap(): phys addr 0x%llx is RAM lr %ps\n", (unsigned long long)p, __builtin_return_address(0)); return NULL; diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index adf469f312f2..9bf659d5078c 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -57,11 +57,6 @@ #include "mmu_decl.h" -#ifdef CONFIG_PPC_BOOK3S_64 -#if TASK_SIZE_USER64 > (1UL << (ESID_BITS + SID_SHIFT)) -#error TASK_SIZE_USER64 exceeds user VSID range -#endif -#endif #ifdef CONFIG_PPC_BOOK3S_64 /* diff --git a/arch/powerpc/mm/pkeys.c b/arch/powerpc/mm/pkeys.c index ba71c5481f42..0eafdf01edc7 100644 --- a/arch/powerpc/mm/pkeys.c +++ b/arch/powerpc/mm/pkeys.c @@ -119,18 +119,15 @@ int pkey_initialize(void) #else os_reserved = 0; #endif + initial_allocation_mask = ~0x0; + pkey_amr_uamor_mask = ~0x0ul; + pkey_iamr_mask = ~0x0ul; /* - * Bits are in LE format. NOTE: 1, 0 are reserved. + * key 0, 1 are reserved. * key 0 is the default key, which allows read/write/execute. * key 1 is recommended not to be used. PowerISA(3.0) page 1015, * programming note. */ - initial_allocation_mask = ~0x0; - - /* register mask is in BE format */ - pkey_amr_uamor_mask = ~0x0ul; - pkey_iamr_mask = ~0x0ul; - for (i = 2; i < (pkeys_total - os_reserved); i++) { initial_allocation_mask &= ~(0x1 << i); pkey_amr_uamor_mask &= ~(0x3ul << pkeyshift(i)); @@ -308,9 +305,9 @@ void thread_pkey_regs_init(struct thread_struct *thread) if (static_branch_likely(&pkey_disabled)) return; - write_amr(read_amr() & pkey_amr_uamor_mask); - write_iamr(read_iamr() & pkey_iamr_mask); - write_uamor(read_uamor() & pkey_amr_uamor_mask); + thread->amr = read_amr() & pkey_amr_uamor_mask; + thread->iamr = read_iamr() & pkey_iamr_mask; + thread->uamor = read_uamor() & pkey_amr_uamor_mask; } static inline bool pkey_allows_readwrite(int pkey) diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c index 13cfe413b40d..66577cc66dc9 100644 --- a/arch/powerpc/mm/slb.c +++ b/arch/powerpc/mm/slb.c @@ -22,6 +22,7 @@ #include <asm/cacheflush.h> #include <asm/smp.h> #include <linux/compiler.h> +#include <linux/context_tracking.h> #include <linux/mm_types.h> #include <asm/udbg.h> @@ -340,3 +341,110 @@ void slb_initialize(void) asm volatile("isync":::"memory"); } + +static void insert_slb_entry(unsigned long vsid, unsigned long ea, + int bpsize, int ssize) +{ + unsigned long flags, vsid_data, esid_data; + enum slb_index index; + int slb_cache_index; + + /* + * We are irq disabled, hence should be safe to access PACA. + */ + index = get_paca()->stab_rr; + + /* + * simple round-robin replacement of slb starting at SLB_NUM_BOLTED. + */ + if (index < (mmu_slb_size - 1)) + index++; + else + index = SLB_NUM_BOLTED; + + get_paca()->stab_rr = index; + + flags = SLB_VSID_USER | mmu_psize_defs[bpsize].sllp; + vsid_data = (vsid << slb_vsid_shift(ssize)) | flags | + ((unsigned long) ssize << SLB_VSID_SSIZE_SHIFT); + esid_data = mk_esid_data(ea, ssize, index); + + asm volatile("slbmte %0, %1" : : "r" (vsid_data), "r" (esid_data) + : "memory"); + + /* + * Now update slb cache entries + */ + slb_cache_index = get_paca()->slb_cache_ptr; + if (slb_cache_index < SLB_CACHE_ENTRIES) { + /* + * We have space in slb cache for optimized switch_slb(). + * Top 36 bits from esid_data as per ISA + */ + get_paca()->slb_cache[slb_cache_index++] = esid_data >> 28; + get_paca()->slb_cache_ptr++; + } else { + /* + * Our cache is full and the current cache content strictly + * doesn't indicate the active SLB conents. Bump the ptr + * so that switch_slb() will ignore the cache. + */ + get_paca()->slb_cache_ptr = SLB_CACHE_ENTRIES + 1; + } +} + +static void handle_multi_context_slb_miss(int context_id, unsigned long ea) +{ + struct mm_struct *mm = current->mm; + unsigned long vsid; + int bpsize; + + /* + * We are always above 1TB, hence use high user segment size. + */ + vsid = get_vsid(context_id, ea, mmu_highuser_ssize); + bpsize = get_slice_psize(mm, ea); + insert_slb_entry(vsid, ea, bpsize, mmu_highuser_ssize); +} + +void slb_miss_large_addr(struct pt_regs *regs) +{ + enum ctx_state prev_state = exception_enter(); + unsigned long ea = regs->dar; + int context; + + if (REGION_ID(ea) != USER_REGION_ID) + goto slb_bad_addr; + + /* + * Are we beyound what the page table layout supports ? + */ + if ((ea & ~REGION_MASK) >= H_PGTABLE_RANGE) + goto slb_bad_addr; + + /* Lower address should have been handled by asm code */ + if (ea < (1UL << MAX_EA_BITS_PER_CONTEXT)) + goto slb_bad_addr; + + /* + * consider this as bad access if we take a SLB miss + * on an address above addr limit. + */ + if (ea >= current->mm->context.slb_addr_limit) + goto slb_bad_addr; + + context = get_ea_context(¤t->mm->context, ea); + if (!context) + goto slb_bad_addr; + + handle_multi_context_slb_miss(context, ea); + exception_exit(prev_state); + return; + +slb_bad_addr: + if (user_mode(regs)) + _exception(SIGSEGV, regs, SEGV_BNDERR, ea); + else + bad_page_fault(regs, ea, SIGSEGV); + exception_exit(prev_state); +} diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S index 2cf5ef3fc50d..a83fbd2a4a24 100644 --- a/arch/powerpc/mm/slb_low.S +++ b/arch/powerpc/mm/slb_low.S @@ -75,10 +75,15 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_68_BIT_VA) */ _GLOBAL(slb_allocate) /* - * check for bad kernel/user address - * (ea & ~REGION_MASK) >= PGTABLE_RANGE + * Check if the address falls within the range of the first context, or + * if we may need to handle multi context. For the first context we + * allocate the slb entry via the fast path below. For large address we + * branch out to C-code and see if additional contexts have been + * allocated. + * The test here is: + * (ea & ~REGION_MASK) >= (1ull << MAX_EA_BITS_PER_CONTEXT) */ - rldicr. r9,r3,4,(63 - H_PGTABLE_EADDR_SIZE - 4) + rldicr. r9,r3,4,(63 - MAX_EA_BITS_PER_CONTEXT - 4) bne- 8f srdi r9,r3,60 /* get region */ @@ -200,10 +205,12 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT) 5: /* * Handle lpsizes - * r9 is get_paca()->context.low_slices_psize, r11 is index + * r9 is get_paca()->context.low_slices_psize[index], r11 is mask_index */ - ld r9,PACALOWSLICESPSIZE(r13) - mr r11,r10 + srdi r11,r10,1 /* index */ + addi r9,r11,PACALOWSLICESPSIZE + lbzx r9,r13,r9 /* r9 is lpsizes[r11] */ + rldicl r11,r10,0,63 /* r11 = r10 & 0x1 */ 6: sldi r11,r11,2 /* index * 4 */ /* Extract the psize and multiply to get an array offset */ diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c index 23ec2c5e3b78..9cd87d11fe4e 100644 --- a/arch/powerpc/mm/slice.c +++ b/arch/powerpc/mm/slice.c @@ -37,32 +37,25 @@ #include <asm/hugetlb.h> static DEFINE_SPINLOCK(slice_convert_lock); -/* - * One bit per slice. We have lower slices which cover 256MB segments - * upto 4G range. That gets us 16 low slices. For the rest we track slices - * in 1TB size. - */ -struct slice_mask { - u64 low_slices; - DECLARE_BITMAP(high_slices, SLICE_NUM_HIGH); -}; #ifdef DEBUG int _slice_debug = 1; -static void slice_print_mask(const char *label, struct slice_mask mask) +static void slice_print_mask(const char *label, const struct slice_mask *mask) { if (!_slice_debug) return; - pr_devel("%s low_slice: %*pbl\n", label, (int)SLICE_NUM_LOW, &mask.low_slices); - pr_devel("%s high_slice: %*pbl\n", label, (int)SLICE_NUM_HIGH, mask.high_slices); + pr_devel("%s low_slice: %*pbl\n", label, + (int)SLICE_NUM_LOW, &mask->low_slices); + pr_devel("%s high_slice: %*pbl\n", label, + (int)SLICE_NUM_HIGH, mask->high_slices); } #define slice_dbg(fmt...) do { if (_slice_debug) pr_devel(fmt); } while (0) #else -static void slice_print_mask(const char *label, struct slice_mask mask) {} +static void slice_print_mask(const char *label, const struct slice_mask *mask) {} #define slice_dbg(fmt...) #endif @@ -73,10 +66,12 @@ static void slice_range_to_mask(unsigned long start, unsigned long len, unsigned long end = start + len - 1; ret->low_slices = 0; - bitmap_zero(ret->high_slices, SLICE_NUM_HIGH); + if (SLICE_NUM_HIGH) + bitmap_zero(ret->high_slices, SLICE_NUM_HIGH); if (start < SLICE_LOW_TOP) { - unsigned long mend = min(end, (SLICE_LOW_TOP - 1)); + unsigned long mend = min(end, + (unsigned long)(SLICE_LOW_TOP - 1)); ret->low_slices = (1u << (GET_LOW_SLICE_INDEX(mend) + 1)) - (1u << GET_LOW_SLICE_INDEX(start)); @@ -113,11 +108,13 @@ static int slice_high_has_vma(struct mm_struct *mm, unsigned long slice) unsigned long start = slice << SLICE_HIGH_SHIFT; unsigned long end = start + (1ul << SLICE_HIGH_SHIFT); +#ifdef CONFIG_PPC64 /* Hack, so that each addresses is controlled by exactly one * of the high or low area bitmaps, the first high area starts * at 4GB, not 0 */ if (start == 0) start = SLICE_LOW_TOP; +#endif return !slice_area_is_free(mm, start, end - start); } @@ -128,7 +125,8 @@ static void slice_mask_for_free(struct mm_struct *mm, struct slice_mask *ret, unsigned long i; ret->low_slices = 0; - bitmap_zero(ret->high_slices, SLICE_NUM_HIGH); + if (SLICE_NUM_HIGH) + bitmap_zero(ret->high_slices, SLICE_NUM_HIGH); for (i = 0; i < SLICE_NUM_LOW; i++) if (!slice_low_has_vma(mm, i)) @@ -142,53 +140,75 @@ static void slice_mask_for_free(struct mm_struct *mm, struct slice_mask *ret, __set_bit(i, ret->high_slices); } -static void slice_mask_for_size(struct mm_struct *mm, int psize, struct slice_mask *ret, - unsigned long high_limit) +#ifdef CONFIG_PPC_BOOK3S_64 +static struct slice_mask *slice_mask_for_size(struct mm_struct *mm, int psize) { - unsigned char *hpsizes; - int index, mask_index; - unsigned long i; - u64 lpsizes; - - ret->low_slices = 0; - bitmap_zero(ret->high_slices, SLICE_NUM_HIGH); +#ifdef CONFIG_PPC_64K_PAGES + if (psize == MMU_PAGE_64K) + return &mm->context.mask_64k; +#endif + if (psize == MMU_PAGE_4K) + return &mm->context.mask_4k; +#ifdef CONFIG_HUGETLB_PAGE + if (psize == MMU_PAGE_16M) + return &mm->context.mask_16m; + if (psize == MMU_PAGE_16G) + return &mm->context.mask_16g; +#endif + BUG(); +} +#elif defined(CONFIG_PPC_8xx) +static struct slice_mask *slice_mask_for_size(struct mm_struct *mm, int psize) +{ + if (psize == mmu_virtual_psize) + return &mm->context.mask_base_psize; +#ifdef CONFIG_HUGETLB_PAGE + if (psize == MMU_PAGE_512K) + return &mm->context.mask_512k; + if (psize == MMU_PAGE_8M) + return &mm->context.mask_8m; +#endif + BUG(); +} +#else +#error "Must define the slice masks for page sizes supported by the platform" +#endif - lpsizes = mm->context.low_slices_psize; - for (i = 0; i < SLICE_NUM_LOW; i++) - if (((lpsizes >> (i * 4)) & 0xf) == psize) - ret->low_slices |= 1u << i; +static bool slice_check_range_fits(struct mm_struct *mm, + const struct slice_mask *available, + unsigned long start, unsigned long len) +{ + unsigned long end = start + len - 1; + u64 low_slices = 0; - if (high_limit <= SLICE_LOW_TOP) - return; + if (start < SLICE_LOW_TOP) { + unsigned long mend = min(end, + (unsigned long)(SLICE_LOW_TOP - 1)); - hpsizes = mm->context.high_slices_psize; - for (i = 0; i < GET_HIGH_SLICE_INDEX(high_limit); i++) { - mask_index = i & 0x1; - index = i >> 1; - if (((hpsizes[index] >> (mask_index * 4)) & 0xf) == psize) - __set_bit(i, ret->high_slices); + low_slices = (1u << (GET_LOW_SLICE_INDEX(mend) + 1)) + - (1u << GET_LOW_SLICE_INDEX(start)); } -} + if ((low_slices & available->low_slices) != low_slices) + return false; -static int slice_check_fit(struct mm_struct *mm, - struct slice_mask mask, struct slice_mask available) -{ - DECLARE_BITMAP(result, SLICE_NUM_HIGH); - /* - * Make sure we just do bit compare only to the max - * addr limit and not the full bit map size. - */ - unsigned long slice_count = GET_HIGH_SLICE_INDEX(mm->context.slb_addr_limit); + if (SLICE_NUM_HIGH && ((start + len) > SLICE_LOW_TOP)) { + unsigned long start_index = GET_HIGH_SLICE_INDEX(start); + unsigned long align_end = ALIGN(end, (1UL << SLICE_HIGH_SHIFT)); + unsigned long count = GET_HIGH_SLICE_INDEX(align_end) - start_index; + unsigned long i; - bitmap_and(result, mask.high_slices, - available.high_slices, slice_count); + for (i = start_index; i < start_index + count; i++) { + if (!test_bit(i, available->high_slices)) + return false; + } + } - return (mask.low_slices & available.low_slices) == mask.low_slices && - bitmap_equal(result, mask.high_slices, slice_count); + return true; } static void slice_flush_segments(void *parm) { +#ifdef CONFIG_PPC64 struct mm_struct *mm = parm; unsigned long flags; @@ -200,40 +220,64 @@ static void slice_flush_segments(void *parm) local_irq_save(flags); slb_flush_and_rebolt(); local_irq_restore(flags); +#endif } -static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psize) +static void slice_convert(struct mm_struct *mm, + const struct slice_mask *mask, int psize) { int index, mask_index; /* Write the new slice psize bits */ - unsigned char *hpsizes; - u64 lpsizes; + unsigned char *hpsizes, *lpsizes; + struct slice_mask *psize_mask, *old_mask; unsigned long i, flags; + int old_psize; slice_dbg("slice_convert(mm=%p, psize=%d)\n", mm, psize); slice_print_mask(" mask", mask); + psize_mask = slice_mask_for_size(mm, psize); + /* We need to use a spinlock here to protect against * concurrent 64k -> 4k demotion ... */ spin_lock_irqsave(&slice_convert_lock, flags); lpsizes = mm->context.low_slices_psize; - for (i = 0; i < SLICE_NUM_LOW; i++) - if (mask.low_slices & (1u << i)) - lpsizes = (lpsizes & ~(0xful << (i * 4))) | - (((unsigned long)psize) << (i * 4)); + for (i = 0; i < SLICE_NUM_LOW; i++) { + if (!(mask->low_slices & (1u << i))) + continue; + + mask_index = i & 0x1; + index = i >> 1; - /* Assign the value back */ - mm->context.low_slices_psize = lpsizes; + /* Update the slice_mask */ + old_psize = (lpsizes[index] >> (mask_index * 4)) & 0xf; + old_mask = slice_mask_for_size(mm, old_psize); + old_mask->low_slices &= ~(1u << i); + psize_mask->low_slices |= 1u << i; + + /* Update the sizes array */ + lpsizes[index] = (lpsizes[index] & ~(0xf << (mask_index * 4))) | + (((unsigned long)psize) << (mask_index * 4)); + } hpsizes = mm->context.high_slices_psize; for (i = 0; i < GET_HIGH_SLICE_INDEX(mm->context.slb_addr_limit); i++) { + if (!test_bit(i, mask->high_slices)) + continue; + mask_index = i & 0x1; index = i >> 1; - if (test_bit(i, mask.high_slices)) - hpsizes[index] = (hpsizes[index] & - ~(0xf << (mask_index * 4))) | + + /* Update the slice_mask */ + old_psize = (hpsizes[index] >> (mask_index * 4)) & 0xf; + old_mask = slice_mask_for_size(mm, old_psize); + __clear_bit(i, old_mask->high_slices); + __set_bit(i, psize_mask->high_slices); + + /* Update the sizes array */ + hpsizes[index] = (hpsizes[index] & ~(0xf << (mask_index * 4))) | (((unsigned long)psize) << (mask_index * 4)); } @@ -254,26 +298,25 @@ static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psiz * 'available' slice_mark. */ static bool slice_scan_available(unsigned long addr, - struct slice_mask available, - int end, - unsigned long *boundary_addr) + const struct slice_mask *available, + int end, unsigned long *boundary_addr) { unsigned long slice; if (addr < SLICE_LOW_TOP) { slice = GET_LOW_SLICE_INDEX(addr); *boundary_addr = (slice + end) << SLICE_LOW_SHIFT; - return !!(available.low_slices & (1u << slice)); + return !!(available->low_slices & (1u << slice)); } else { slice = GET_HIGH_SLICE_INDEX(addr); *boundary_addr = (slice + end) ? ((slice + end) << SLICE_HIGH_SHIFT) : SLICE_LOW_TOP; - return !!test_bit(slice, available.high_slices); + return !!test_bit(slice, available->high_slices); } } static unsigned long slice_find_area_bottomup(struct mm_struct *mm, unsigned long len, - struct slice_mask available, + const struct slice_mask *available, int psize, unsigned long high_limit) { int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT); @@ -319,7 +362,7 @@ static unsigned long slice_find_area_bottomup(struct mm_struct *mm, static unsigned long slice_find_area_topdown(struct mm_struct *mm, unsigned long len, - struct slice_mask available, + const struct slice_mask *available, int psize, unsigned long high_limit) { int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT); @@ -377,7 +420,7 @@ static unsigned long slice_find_area_topdown(struct mm_struct *mm, static unsigned long slice_find_area(struct mm_struct *mm, unsigned long len, - struct slice_mask mask, int psize, + const struct slice_mask *mask, int psize, int topdown, unsigned long high_limit) { if (topdown) @@ -386,23 +429,33 @@ static unsigned long slice_find_area(struct mm_struct *mm, unsigned long len, return slice_find_area_bottomup(mm, len, mask, psize, high_limit); } -static inline void slice_or_mask(struct slice_mask *dst, struct slice_mask *src) +static inline void slice_copy_mask(struct slice_mask *dst, + const struct slice_mask *src) { - DECLARE_BITMAP(result, SLICE_NUM_HIGH); - - dst->low_slices |= src->low_slices; - bitmap_or(result, dst->high_slices, src->high_slices, SLICE_NUM_HIGH); - bitmap_copy(dst->high_slices, result, SLICE_NUM_HIGH); + dst->low_slices = src->low_slices; + if (!SLICE_NUM_HIGH) + return; + bitmap_copy(dst->high_slices, src->high_slices, SLICE_NUM_HIGH); } -static inline void slice_andnot_mask(struct slice_mask *dst, struct slice_mask *src) +static inline void slice_or_mask(struct slice_mask *dst, + const struct slice_mask *src1, + const struct slice_mask *src2) { - DECLARE_BITMAP(result, SLICE_NUM_HIGH); - - dst->low_slices &= ~src->low_slices; + dst->low_slices = src1->low_slices | src2->low_slices; + if (!SLICE_NUM_HIGH) + return; + bitmap_or(dst->high_slices, src1->high_slices, src2->high_slices, SLICE_NUM_HIGH); +} - bitmap_andnot(result, dst->high_slices, src->high_slices, SLICE_NUM_HIGH); - bitmap_copy(dst->high_slices, result, SLICE_NUM_HIGH); +static inline void slice_andnot_mask(struct slice_mask *dst, + const struct slice_mask *src1, + const struct slice_mask *src2) +{ + dst->low_slices = src1->low_slices & ~src2->low_slices; + if (!SLICE_NUM_HIGH) + return; + bitmap_andnot(dst->high_slices, src1->high_slices, src2->high_slices, SLICE_NUM_HIGH); } #ifdef CONFIG_PPC_64K_PAGES @@ -415,10 +468,10 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, unsigned long flags, unsigned int psize, int topdown) { - struct slice_mask mask; struct slice_mask good_mask; struct slice_mask potential_mask; - struct slice_mask compat_mask; + const struct slice_mask *maskp; + const struct slice_mask *compat_maskp = NULL; int fixed = (flags & MAP_FIXED); int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT); unsigned long page_size = 1UL << pshift; @@ -442,23 +495,16 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, } if (high_limit > mm->context.slb_addr_limit) { + /* + * Increasing the slb_addr_limit does not require + * slice mask cache to be recalculated because it should + * be already initialised beyond the old address limit. + */ mm->context.slb_addr_limit = high_limit; + on_each_cpu(slice_flush_segments, mm, 1); } - /* - * init different masks - */ - mask.low_slices = 0; - bitmap_zero(mask.high_slices, SLICE_NUM_HIGH); - - /* silence stupid warning */; - potential_mask.low_slices = 0; - bitmap_zero(potential_mask.high_slices, SLICE_NUM_HIGH); - - compat_mask.low_slices = 0; - bitmap_zero(compat_mask.high_slices, SLICE_NUM_HIGH); - /* Sanity checks */ BUG_ON(mm->task_size == 0); BUG_ON(mm->context.slb_addr_limit == 0); @@ -481,8 +527,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, /* First make up a "good" mask of slices that have the right size * already */ - slice_mask_for_size(mm, psize, &good_mask, high_limit); - slice_print_mask(" good_mask", good_mask); + maskp = slice_mask_for_size(mm, psize); /* * Here "good" means slices that are already the right page size, @@ -503,40 +548,47 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, * search in good | compat | free, found => convert free. */ -#ifdef CONFIG_PPC_64K_PAGES - /* If we support combo pages, we can allow 64k pages in 4k slices */ - if (psize == MMU_PAGE_64K) { - slice_mask_for_size(mm, MMU_PAGE_4K, &compat_mask, high_limit); + /* + * If we support combo pages, we can allow 64k pages in 4k slices + * The mask copies could be avoided in most cases here if we had + * a pointer to good mask for the next code to use. + */ + if (IS_ENABLED(CONFIG_PPC_64K_PAGES) && psize == MMU_PAGE_64K) { + compat_maskp = slice_mask_for_size(mm, MMU_PAGE_4K); if (fixed) - slice_or_mask(&good_mask, &compat_mask); + slice_or_mask(&good_mask, maskp, compat_maskp); + else + slice_copy_mask(&good_mask, maskp); + } else { + slice_copy_mask(&good_mask, maskp); } -#endif + + slice_print_mask(" good_mask", &good_mask); + if (compat_maskp) + slice_print_mask(" compat_mask", compat_maskp); /* First check hint if it's valid or if we have MAP_FIXED */ if (addr != 0 || fixed) { - /* Build a mask for the requested range */ - slice_range_to_mask(addr, len, &mask); - slice_print_mask(" mask", mask); - /* Check if we fit in the good mask. If we do, we just return, * nothing else to do */ - if (slice_check_fit(mm, mask, good_mask)) { + if (slice_check_range_fits(mm, &good_mask, addr, len)) { slice_dbg(" fits good !\n"); - return addr; + newaddr = addr; + goto return_addr; } } else { /* Now let's see if we can find something in the existing * slices for that size */ - newaddr = slice_find_area(mm, len, good_mask, + newaddr = slice_find_area(mm, len, &good_mask, psize, topdown, high_limit); if (newaddr != -ENOMEM) { /* Found within the good mask, we don't have to setup, * we thus return directly */ slice_dbg(" found area at 0x%lx\n", newaddr); - return newaddr; + goto return_addr; } } /* @@ -544,12 +596,15 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, * empty and thus can be converted */ slice_mask_for_free(mm, &potential_mask, high_limit); - slice_or_mask(&potential_mask, &good_mask); - slice_print_mask(" potential", potential_mask); + slice_or_mask(&potential_mask, &potential_mask, &good_mask); + slice_print_mask(" potential", &potential_mask); - if ((addr != 0 || fixed) && slice_check_fit(mm, mask, potential_mask)) { - slice_dbg(" fits potential !\n"); - goto convert; + if (addr != 0 || fixed) { + if (slice_check_range_fits(mm, &potential_mask, addr, len)) { + slice_dbg(" fits potential !\n"); + newaddr = addr; + goto convert; + } } /* If we have MAP_FIXED and failed the above steps, then error out */ @@ -562,46 +617,64 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, * anywhere in the good area. */ if (addr) { - addr = slice_find_area(mm, len, good_mask, - psize, topdown, high_limit); - if (addr != -ENOMEM) { - slice_dbg(" found area at 0x%lx\n", addr); - return addr; + newaddr = slice_find_area(mm, len, &good_mask, + psize, topdown, high_limit); + if (newaddr != -ENOMEM) { + slice_dbg(" found area at 0x%lx\n", newaddr); + goto return_addr; } } /* Now let's see if we can find something in the existing slices * for that size plus free slices */ - addr = slice_find_area(mm, len, potential_mask, - psize, topdown, high_limit); + newaddr = slice_find_area(mm, len, &potential_mask, + psize, topdown, high_limit); #ifdef CONFIG_PPC_64K_PAGES - if (addr == -ENOMEM && psize == MMU_PAGE_64K) { + if (newaddr == -ENOMEM && psize == MMU_PAGE_64K) { /* retry the search with 4k-page slices included */ - slice_or_mask(&potential_mask, &compat_mask); - addr = slice_find_area(mm, len, potential_mask, - psize, topdown, high_limit); + slice_or_mask(&potential_mask, &potential_mask, compat_maskp); + newaddr = slice_find_area(mm, len, &potential_mask, + psize, topdown, high_limit); } #endif - if (addr == -ENOMEM) + if (newaddr == -ENOMEM) return -ENOMEM; - slice_range_to_mask(addr, len, &mask); - slice_dbg(" found potential area at 0x%lx\n", addr); - slice_print_mask(" mask", mask); + slice_range_to_mask(newaddr, len, &potential_mask); + slice_dbg(" found potential area at 0x%lx\n", newaddr); + slice_print_mask(" mask", &potential_mask); convert: - slice_andnot_mask(&mask, &good_mask); - slice_andnot_mask(&mask, &compat_mask); - if (mask.low_slices || !bitmap_empty(mask.high_slices, SLICE_NUM_HIGH)) { - slice_convert(mm, mask, psize); + /* + * Try to allocate the context before we do slice convert + * so that we handle the context allocation failure gracefully. + */ + if (need_extra_context(mm, newaddr)) { + if (alloc_extended_context(mm, newaddr) < 0) + return -ENOMEM; + } + + slice_andnot_mask(&potential_mask, &potential_mask, &good_mask); + if (compat_maskp && !fixed) + slice_andnot_mask(&potential_mask, &potential_mask, compat_maskp); + if (potential_mask.low_slices || + (SLICE_NUM_HIGH && + !bitmap_empty(potential_mask.high_slices, SLICE_NUM_HIGH))) { + slice_convert(mm, &potential_mask, psize); if (psize > MMU_PAGE_BASE) on_each_cpu(slice_flush_segments, mm, 1); } - return addr; + return newaddr; +return_addr: + if (need_extra_context(mm, newaddr)) { + if (alloc_extended_context(mm, newaddr) < 0) + return -ENOMEM; + } + return newaddr; } EXPORT_SYMBOL_GPL(slice_get_unmapped_area); @@ -627,94 +700,60 @@ unsigned long arch_get_unmapped_area_topdown(struct file *filp, unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr) { - unsigned char *hpsizes; + unsigned char *psizes; int index, mask_index; - /* - * Radix doesn't use slice, but can get enabled along with MMU_SLICE - */ - if (radix_enabled()) { -#ifdef CONFIG_PPC_64K_PAGES - return MMU_PAGE_64K; -#else - return MMU_PAGE_4K; -#endif - } + VM_BUG_ON(radix_enabled()); + if (addr < SLICE_LOW_TOP) { - u64 lpsizes; - lpsizes = mm->context.low_slices_psize; + psizes = mm->context.low_slices_psize; index = GET_LOW_SLICE_INDEX(addr); - return (lpsizes >> (index * 4)) & 0xf; + } else { + psizes = mm->context.high_slices_psize; + index = GET_HIGH_SLICE_INDEX(addr); } - hpsizes = mm->context.high_slices_psize; - index = GET_HIGH_SLICE_INDEX(addr); mask_index = index & 0x1; - return (hpsizes[index >> 1] >> (mask_index * 4)) & 0xf; + return (psizes[index >> 1] >> (mask_index * 4)) & 0xf; } EXPORT_SYMBOL_GPL(get_slice_psize); -/* - * This is called by hash_page when it needs to do a lazy conversion of - * an address space from real 64K pages to combo 4K pages (typically - * when hitting a non cacheable mapping on a processor or hypervisor - * that won't allow them for 64K pages). - * - * This is also called in init_new_context() to change back the user - * psize from whatever the parent context had it set to - * N.B. This may be called before mm->context.id has been set. - * - * This function will only change the content of the {low,high)_slice_psize - * masks, it will not flush SLBs as this shall be handled lazily by the - * caller. - */ -void slice_set_user_psize(struct mm_struct *mm, unsigned int psize) +void slice_init_new_context_exec(struct mm_struct *mm) { - int index, mask_index; - unsigned char *hpsizes; - unsigned long flags, lpsizes; - unsigned int old_psize; - int i; + unsigned char *hpsizes, *lpsizes; + struct slice_mask *mask; + unsigned int psize = mmu_virtual_psize; - slice_dbg("slice_set_user_psize(mm=%p, psize=%d)\n", mm, psize); + slice_dbg("slice_init_new_context_exec(mm=%p)\n", mm); - VM_BUG_ON(radix_enabled()); - spin_lock_irqsave(&slice_convert_lock, flags); - - old_psize = mm->context.user_psize; - slice_dbg(" old_psize=%d\n", old_psize); - if (old_psize == psize) - goto bail; + /* + * In the case of exec, use the default limit. In the + * case of fork it is just inherited from the mm being + * duplicated. + */ +#ifdef CONFIG_PPC64 + mm->context.slb_addr_limit = DEFAULT_MAP_WINDOW_USER64; +#else + mm->context.slb_addr_limit = DEFAULT_MAP_WINDOW; +#endif mm->context.user_psize = psize; - wmb(); + /* + * Set all slice psizes to the default. + */ lpsizes = mm->context.low_slices_psize; - for (i = 0; i < SLICE_NUM_LOW; i++) - if (((lpsizes >> (i * 4)) & 0xf) == old_psize) - lpsizes = (lpsizes & ~(0xful << (i * 4))) | - (((unsigned long)psize) << (i * 4)); - /* Assign the value back */ - mm->context.low_slices_psize = lpsizes; + memset(lpsizes, (psize << 4) | psize, SLICE_NUM_LOW >> 1); hpsizes = mm->context.high_slices_psize; - for (i = 0; i < SLICE_NUM_HIGH; i++) { - mask_index = i & 0x1; - index = i >> 1; - if (((hpsizes[index] >> (mask_index * 4)) & 0xf) == old_psize) - hpsizes[index] = (hpsizes[index] & - ~(0xf << (mask_index * 4))) | - (((unsigned long)psize) << (mask_index * 4)); - } - - - - - slice_dbg(" lsps=%lx, hsps=%lx\n", - (unsigned long)mm->context.low_slices_psize, - (unsigned long)mm->context.high_slices_psize); + memset(hpsizes, (psize << 4) | psize, SLICE_NUM_HIGH >> 1); - bail: - spin_unlock_irqrestore(&slice_convert_lock, flags); + /* + * Slice mask cache starts zeroed, fill the default size cache. + */ + mask = slice_mask_for_size(mm, psize); + mask->low_slices = ~0UL; + if (SLICE_NUM_HIGH) + bitmap_fill(mask->high_slices, SLICE_NUM_HIGH); } void slice_set_range_psize(struct mm_struct *mm, unsigned long start, @@ -725,7 +764,7 @@ void slice_set_range_psize(struct mm_struct *mm, unsigned long start, VM_BUG_ON(radix_enabled()); slice_range_to_mask(start, len, &mask); - slice_convert(mm, mask, psize); + slice_convert(mm, &mask, psize); } #ifdef CONFIG_HUGETLB_PAGE @@ -748,33 +787,27 @@ void slice_set_range_psize(struct mm_struct *mm, unsigned long start, * for now as we only use slices with hugetlbfs enabled. This should * be fixed as the generic code gets fixed. */ -int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, +int slice_is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, unsigned long len) { - struct slice_mask mask, available; + const struct slice_mask *maskp; unsigned int psize = mm->context.user_psize; - unsigned long high_limit = mm->context.slb_addr_limit; - if (radix_enabled()) - return 0; + VM_BUG_ON(radix_enabled()); - slice_range_to_mask(addr, len, &mask); - slice_mask_for_size(mm, psize, &available, high_limit); + maskp = slice_mask_for_size(mm, psize); #ifdef CONFIG_PPC_64K_PAGES /* We need to account for 4k slices too */ if (psize == MMU_PAGE_64K) { - struct slice_mask compat_mask; - slice_mask_for_size(mm, MMU_PAGE_4K, &compat_mask, high_limit); - slice_or_mask(&available, &compat_mask); + const struct slice_mask *compat_maskp; + struct slice_mask available; + + compat_maskp = slice_mask_for_size(mm, MMU_PAGE_4K); + slice_or_mask(&available, maskp, compat_maskp); + return !slice_check_range_fits(mm, &available, addr, len); } #endif -#if 0 /* too verbose */ - slice_dbg("is_hugepage_only_range(mm=%p, addr=%lx, len=%lx)\n", - mm, addr, len); - slice_print_mask(" mask", mask); - slice_print_mask(" available", available); -#endif - return !slice_check_fit(mm, mask, available); + return !slice_check_range_fits(mm, maskp, addr, len); } #endif diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c index a07f5372a4bf..2fba6170ab3f 100644 --- a/arch/powerpc/mm/tlb-radix.c +++ b/arch/powerpc/mm/tlb-radix.c @@ -98,7 +98,7 @@ static inline void __tlbiel_pid(unsigned long pid, int set, rb |= set << PPC_BITLSHIFT(51); rs = ((unsigned long)pid) << PPC_BITLSHIFT(31); prs = 1; /* process scoped */ - r = 1; /* raidx format */ + r = 1; /* radix format */ asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); @@ -112,7 +112,7 @@ static inline void __tlbie_pid(unsigned long pid, unsigned long ric) rb = PPC_BIT(53); /* IS = 1 */ rs = pid << PPC_BITLSHIFT(31); prs = 1; /* process scoped */ - r = 1; /* raidx format */ + r = 1; /* radix format */ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); @@ -128,7 +128,7 @@ static inline void __tlbiel_va(unsigned long va, unsigned long pid, rb |= ap << PPC_BITLSHIFT(58); rs = pid << PPC_BITLSHIFT(31); prs = 1; /* process scoped */ - r = 1; /* raidx format */ + r = 1; /* radix format */ asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); @@ -144,7 +144,7 @@ static inline void __tlbie_va(unsigned long va, unsigned long pid, rb |= ap << PPC_BITLSHIFT(58); rs = pid << PPC_BITLSHIFT(31); prs = 1; /* process scoped */ - r = 1; /* raidx format */ + r = 1; /* radix format */ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); @@ -668,7 +668,7 @@ void radix__flush_tlb_all(void) rb = 0x3 << PPC_BITLSHIFT(53); /* IS = 3 */ prs = 0; /* partition scoped */ - r = 1; /* raidx format */ + r = 1; /* radix format */ rs = 1 & ((1UL << 32) - 1); /* any LPID value to flush guest mappings */ asm volatile("ptesync": : :"memory"); @@ -706,7 +706,7 @@ void radix__flush_tlb_pte_p9_dd1(unsigned long old_pte, struct mm_struct *mm, #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE extern void radix_kvm_prefetch_workaround(struct mm_struct *mm) { - unsigned int pid = mm->context.id; + unsigned long pid = mm->context.id; if (unlikely(pid == MMU_NO_CONTEXT)) return; @@ -734,7 +734,7 @@ extern void radix_kvm_prefetch_workaround(struct mm_struct *mm) for (; sib <= cpu_last_thread_sibling(cpu) && !flush; sib++) { if (sib == cpu) continue; - if (paca[sib].kvm_hstate.kvm_vcpu) + if (paca_ptrs[sib]->kvm_hstate.kvm_vcpu) flush = true; } if (flush) diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c index 9b23f12e863c..87d71dd25441 100644 --- a/arch/powerpc/mm/tlb_hash64.c +++ b/arch/powerpc/mm/tlb_hash64.c @@ -89,7 +89,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr, /* Build full vaddr */ if (!is_kernel_addr(addr)) { ssize = user_segment_size(addr); - vsid = get_vsid(mm->context.id, addr, ssize); + vsid = get_user_vsid(&mm->context, addr, ssize); } else { vsid = get_kernel_vsid(addr, mmu_kernel_ssize); ssize = mmu_kernel_ssize; |