diff options
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r-- | arch/powerpc/mm/40x_mmu.c | 2 | ||||
-rw-r--r-- | arch/powerpc/mm/fsl_booke_mmu.c | 7 | ||||
-rw-r--r-- | arch/powerpc/mm/hash_low_64.S | 9 | ||||
-rw-r--r-- | arch/powerpc/mm/hash_utils_64.c | 79 | ||||
-rw-r--r-- | arch/powerpc/mm/highmem.c | 4 | ||||
-rw-r--r-- | arch/powerpc/mm/hugetlbpage-hash64.c | 40 | ||||
-rw-r--r-- | arch/powerpc/mm/init_32.c | 16 | ||||
-rw-r--r-- | arch/powerpc/mm/init_64.c | 2 | ||||
-rw-r--r-- | arch/powerpc/mm/mem.c | 78 | ||||
-rw-r--r-- | arch/powerpc/mm/numa.c | 182 | ||||
-rw-r--r-- | arch/powerpc/mm/pgtable.c | 1 | ||||
-rw-r--r-- | arch/powerpc/mm/pgtable_32.c | 10 | ||||
-rw-r--r-- | arch/powerpc/mm/pgtable_64.c | 4 | ||||
-rw-r--r-- | arch/powerpc/mm/ppc_mmu_32.c | 4 | ||||
-rw-r--r-- | arch/powerpc/mm/stab.c | 4 | ||||
-rw-r--r-- | arch/powerpc/mm/tlb_hash32.c | 15 | ||||
-rw-r--r-- | arch/powerpc/mm/tlb_nohash.c | 133 |
17 files changed, 363 insertions, 227 deletions
diff --git a/arch/powerpc/mm/40x_mmu.c b/arch/powerpc/mm/40x_mmu.c index 65abfcfaaa9e..1dc2fa5ce1bd 100644 --- a/arch/powerpc/mm/40x_mmu.c +++ b/arch/powerpc/mm/40x_mmu.c @@ -135,7 +135,7 @@ unsigned long __init mmu_mapin_ram(unsigned long top) /* If the size of RAM is not an exact power of two, we may not * have covered RAM in its entirety with 16 and 4 MiB * pages. Consequently, restrict the top end of RAM currently - * allocable so that calls to the LMB to allocate PTEs for "tail" + * allocable so that calls to the MEMBLOCK to allocate PTEs for "tail" * coverage with normal-sized pages (or other reasons) do not * attempt to allocate outside the allowed range. */ diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c index cdc7526e9c93..4b66a1ece6d8 100644 --- a/arch/powerpc/mm/fsl_booke_mmu.c +++ b/arch/powerpc/mm/fsl_booke_mmu.c @@ -104,9 +104,10 @@ unsigned long p_mapped_by_tlbcam(phys_addr_t pa) } /* - * Set up one of the I/D BAT (block address translation) register pairs. - * The parameters are not checked; in particular size must be a power - * of 4 between 4k and 256M. + * Set up a variable-size TLB entry (tlbcam). The parameters are not checked; + * in particular size must be a power of 4 between 4k and 256M (or 1G, for cpus + * that support extended page sizes). Note that while some cpus support a + * page size of 4G, we don't allow its use here. */ static void settlbcam(int index, unsigned long virt, phys_addr_t phys, unsigned long size, unsigned long flags, unsigned int pid) diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S index a719f53921a5..3079f6b44cf5 100644 --- a/arch/powerpc/mm/hash_low_64.S +++ b/arch/powerpc/mm/hash_low_64.S @@ -68,9 +68,6 @@ _GLOBAL(__hash_page_4K) std r8,STK_PARM(r8)(r1) std r9,STK_PARM(r9)(r1) - /* Add _PAGE_PRESENT to access */ - ori r4,r4,_PAGE_PRESENT - /* Save non-volatile registers. * r31 will hold "old PTE" * r30 is "new PTE" @@ -347,9 +344,6 @@ _GLOBAL(__hash_page_4K) std r8,STK_PARM(r8)(r1) std r9,STK_PARM(r9)(r1) - /* Add _PAGE_PRESENT to access */ - ori r4,r4,_PAGE_PRESENT - /* Save non-volatile registers. * r31 will hold "old PTE" * r30 is "new PTE" @@ -687,9 +681,6 @@ _GLOBAL(__hash_page_64K) std r8,STK_PARM(r8)(r1) std r9,STK_PARM(r9)(r1) - /* Add _PAGE_PRESENT to access */ - ori r4,r4,_PAGE_PRESENT - /* Save non-volatile registers. * r31 will hold "old PTE" * r30 is "new PTE" diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 3ecdcec0a39e..09dffe6efa46 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -31,7 +31,7 @@ #include <linux/cache.h> #include <linux/init.h> #include <linux/signal.h> -#include <linux/lmb.h> +#include <linux/memblock.h> #include <asm/processor.h> #include <asm/pgtable.h> @@ -384,8 +384,8 @@ static int __init htab_dt_scan_hugepage_blocks(unsigned long node, printk(KERN_INFO "Huge page(16GB) memory: " "addr = 0x%lX size = 0x%lX pages = %d\n", phys_addr, block_size, expected_pages); - if (phys_addr + (16 * GB) <= lmb_end_of_DRAM()) { - lmb_reserve(phys_addr, block_size * expected_pages); + if (phys_addr + (16 * GB) <= memblock_end_of_DRAM()) { + memblock_reserve(phys_addr, block_size * expected_pages); add_gpage(phys_addr, block_size, expected_pages); } return 0; @@ -458,7 +458,7 @@ static void __init htab_init_page_sizes(void) * and we have at least 1G of RAM at boot */ if (mmu_psize_defs[MMU_PAGE_16M].shift && - lmb_phys_mem_size() >= 0x40000000) + memblock_phys_mem_size() >= 0x40000000) mmu_vmemmap_psize = MMU_PAGE_16M; else if (mmu_psize_defs[MMU_PAGE_64K].shift) mmu_vmemmap_psize = MMU_PAGE_64K; @@ -520,7 +520,7 @@ static unsigned long __init htab_get_table_size(void) return 1UL << ppc64_pft_size; /* round mem_size up to next power of 2 */ - mem_size = lmb_phys_mem_size(); + mem_size = memblock_phys_mem_size(); rnd_mem_size = 1UL << __ilog2(mem_size); if (rnd_mem_size < mem_size) rnd_mem_size <<= 1; @@ -627,7 +627,7 @@ static void __init htab_initialize(void) else limit = 0; - table = lmb_alloc_base(htab_size_bytes, htab_size_bytes, limit); + table = memblock_alloc_base(htab_size_bytes, htab_size_bytes, limit); DBG("Hash table allocated at %lx, size: %lx\n", table, htab_size_bytes); @@ -647,9 +647,9 @@ static void __init htab_initialize(void) prot = pgprot_val(PAGE_KERNEL); #ifdef CONFIG_DEBUG_PAGEALLOC - linear_map_hash_count = lmb_end_of_DRAM() >> PAGE_SHIFT; - linear_map_hash_slots = __va(lmb_alloc_base(linear_map_hash_count, - 1, lmb.rmo_size)); + linear_map_hash_count = memblock_end_of_DRAM() >> PAGE_SHIFT; + linear_map_hash_slots = __va(memblock_alloc_base(linear_map_hash_count, + 1, memblock.rmo_size)); memset(linear_map_hash_slots, 0, linear_map_hash_count); #endif /* CONFIG_DEBUG_PAGEALLOC */ @@ -659,16 +659,16 @@ static void __init htab_initialize(void) */ /* create bolted the linear mapping in the hash table */ - for (i=0; i < lmb.memory.cnt; i++) { - base = (unsigned long)__va(lmb.memory.region[i].base); - size = lmb.memory.region[i].size; + for (i=0; i < memblock.memory.cnt; i++) { + base = (unsigned long)__va(memblock.memory.region[i].base); + size = memblock.memory.region[i].size; DBG("creating mapping for region: %lx..%lx (prot: %lx)\n", base, size, prot); #ifdef CONFIG_U3_DART /* Do not map the DART space. Fortunately, it will be aligned - * in such a way that it will not cross two lmb regions and + * in such a way that it will not cross two memblock regions and * will fit within a single 16Mb page. * The DART space is assumed to be a full 16Mb region even if * we only use 2Mb of that space. We will use more of it later @@ -871,6 +871,18 @@ static inline int subpage_protection(struct mm_struct *mm, unsigned long ea) } #endif +void hash_failure_debug(unsigned long ea, unsigned long access, + unsigned long vsid, unsigned long trap, + int ssize, int psize, unsigned long pte) +{ + if (!printk_ratelimit()) + return; + pr_info("mm: Hashing failure ! EA=0x%lx access=0x%lx current=%s\n", + ea, access, current->comm); + pr_info(" trap=0x%lx vsid=0x%lx ssize=%d psize=%d pte=0x%lx\n", + trap, vsid, ssize, psize, pte); +} + /* Result code is: * 0 - handled * 1 - normal page fault @@ -955,6 +967,17 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) return 1; } + /* Add _PAGE_PRESENT to the required access perm */ + access |= _PAGE_PRESENT; + + /* Pre-check access permissions (will be re-checked atomically + * in __hash_page_XX but this pre-check is a fast path + */ + if (access & ~pte_val(*ptep)) { + DBG_LOW(" no access !\n"); + return 1; + } + #ifdef CONFIG_HUGETLB_PAGE if (hugeshift) return __hash_page_huge(ea, access, vsid, ptep, trap, local, @@ -967,14 +990,6 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) DBG_LOW(" i-pte: %016lx %016lx\n", pte_val(*ptep), pte_val(*(ptep + PTRS_PER_PTE))); #endif - /* Pre-check access permissions (will be re-checked atomically - * in __hash_page_XX but this pre-check is a fast path - */ - if (access & ~pte_val(*ptep)) { - DBG_LOW(" no access !\n"); - return 1; - } - /* Do actual hashing */ #ifdef CONFIG_PPC_64K_PAGES /* If _PAGE_4K_PFN is set, make sure this is a 4k segment */ @@ -1033,6 +1048,12 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) local, ssize, spp); } + /* Dump some info in case of hash insertion failure, they should + * never happen so it is really useful to know if/when they do + */ + if (rc == -1) + hash_failure_debug(ea, access, vsid, trap, ssize, psize, + pte_val(*ptep)); #ifndef CONFIG_PPC_64K_PAGES DBG_LOW(" o-pte: %016lx\n", pte_val(*ptep)); #else @@ -1051,8 +1072,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea, void *pgdir; pte_t *ptep; unsigned long flags; - int local = 0; - int ssize; + int rc, ssize, local = 0; BUG_ON(REGION_ID(ea) != USER_REGION_ID); @@ -1098,11 +1118,18 @@ void hash_preload(struct mm_struct *mm, unsigned long ea, /* Hash it in */ #ifdef CONFIG_PPC_HAS_HASH_64K if (mm->context.user_psize == MMU_PAGE_64K) - __hash_page_64K(ea, access, vsid, ptep, trap, local, ssize); + rc = __hash_page_64K(ea, access, vsid, ptep, trap, local, ssize); else #endif /* CONFIG_PPC_HAS_HASH_64K */ - __hash_page_4K(ea, access, vsid, ptep, trap, local, ssize, - subpage_protection(pgdir, ea)); + rc = __hash_page_4K(ea, access, vsid, ptep, trap, local, ssize, + subpage_protection(pgdir, ea)); + + /* Dump some info in case of hash insertion failure, they should + * never happen so it is really useful to know if/when they do + */ + if (rc == -1) + hash_failure_debug(ea, access, vsid, trap, ssize, + mm->context.user_psize, pte_val(*ptep)); local_irq_restore(flags); } diff --git a/arch/powerpc/mm/highmem.c b/arch/powerpc/mm/highmem.c index c2186c74c85a..857d4173f9c6 100644 --- a/arch/powerpc/mm/highmem.c +++ b/arch/powerpc/mm/highmem.c @@ -52,7 +52,7 @@ void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot) } EXPORT_SYMBOL(kmap_atomic_prot); -void kunmap_atomic(void *kvaddr, enum km_type type) +void kunmap_atomic_notypecheck(void *kvaddr, enum km_type type) { #ifdef CONFIG_DEBUG_HIGHMEM unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; @@ -74,4 +74,4 @@ void kunmap_atomic(void *kvaddr, enum km_type type) #endif pagefault_enable(); } -EXPORT_SYMBOL(kunmap_atomic); +EXPORT_SYMBOL(kunmap_atomic_notypecheck); diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c index 199539882f92..cc5c273086cf 100644 --- a/arch/powerpc/mm/hugetlbpage-hash64.c +++ b/arch/powerpc/mm/hugetlbpage-hash64.c @@ -21,21 +21,13 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, unsigned long old_pte, new_pte; unsigned long va, rflags, pa, sz; long slot; - int err = 1; BUG_ON(shift != mmu_psize_defs[mmu_psize].shift); /* Search the Linux page table for a match with va */ va = hpt_va(ea, vsid, ssize); - /* - * Check the user's access rights to the page. If access should be - * prevented then send the problem up to do_page_fault. - */ - if (unlikely(access & ~pte_val(*ptep))) - goto out; - /* - * At this point, we have a pte (old_pte) which can be used to build + /* At this point, we have a pte (old_pte) which can be used to build * or update an HPTE. There are 2 cases: * * 1. There is a valid (present) pte with no associated HPTE (this is @@ -49,9 +41,17 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, do { old_pte = pte_val(*ptep); - if (old_pte & _PAGE_BUSY) - goto out; + /* If PTE busy, retry the access */ + if (unlikely(old_pte & _PAGE_BUSY)) + return 0; + /* If PTE permissions don't match, take page fault */ + if (unlikely(access & ~old_pte)) + return 1; + /* Try to lock the PTE, add ACCESSED and DIRTY if it was + * a write access */ new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED; + if (access & _PAGE_RW) + new_pte |= _PAGE_DIRTY; } while(old_pte != __cmpxchg_u64((unsigned long *)ptep, old_pte, new_pte)); @@ -121,8 +121,16 @@ repeat: } } - if (unlikely(slot == -2)) - panic("hash_huge_page: pte_insert failed\n"); + /* + * Hypervisor failure. Restore old pte and return -1 + * similar to __hash_page_* + */ + if (unlikely(slot == -2)) { + *ptep = __pte(old_pte); + hash_failure_debug(ea, access, vsid, trap, ssize, + mmu_psize, old_pte); + return -1; + } new_pte |= (slot << 12) & (_PAGE_F_SECOND | _PAGE_F_GIX); } @@ -131,9 +139,5 @@ repeat: * No need to use ldarx/stdcx here */ *ptep = __pte(new_pte & ~_PAGE_BUSY); - - err = 0; - - out: - return err; + return 0; } diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index 767333005eb4..6a6975dc2654 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -30,7 +30,7 @@ #include <linux/highmem.h> #include <linux/initrd.h> #include <linux/pagemap.h> -#include <linux/lmb.h> +#include <linux/memblock.h> #include <linux/gfp.h> #include <asm/pgalloc.h> @@ -136,17 +136,17 @@ void __init MMU_init(void) /* parse args from command line */ MMU_setup(); - if (lmb.memory.cnt > 1) { + if (memblock.memory.cnt > 1) { #ifndef CONFIG_WII - lmb.memory.cnt = 1; - lmb_analyze(); + memblock.memory.cnt = 1; + memblock_analyze(); printk(KERN_WARNING "Only using first contiguous memory region"); #else wii_memory_fixups(); #endif } - total_lowmem = total_memory = lmb_end_of_DRAM() - memstart_addr; + total_lowmem = total_memory = memblock_end_of_DRAM() - memstart_addr; lowmem_end_addr = memstart_addr + total_lowmem; #ifdef CONFIG_FSL_BOOKE @@ -161,8 +161,8 @@ void __init MMU_init(void) lowmem_end_addr = memstart_addr + total_lowmem; #ifndef CONFIG_HIGHMEM total_memory = total_lowmem; - lmb_enforce_memory_limit(lowmem_end_addr); - lmb_analyze(); + memblock_enforce_memory_limit(lowmem_end_addr); + memblock_analyze(); #endif /* CONFIG_HIGHMEM */ } @@ -200,7 +200,7 @@ void __init *early_get_page(void) if (init_bootmem_done) { p = alloc_bootmem_pages(PAGE_SIZE); } else { - p = __va(lmb_alloc_base(PAGE_SIZE, PAGE_SIZE, + p = __va(memblock_alloc_base(PAGE_SIZE, PAGE_SIZE, __initial_memory_limit_addr)); } return p; diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index e267f223fdff..71f1415e2472 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -40,7 +40,7 @@ #include <linux/nodemask.h> #include <linux/module.h> #include <linux/poison.h> -#include <linux/lmb.h> +#include <linux/memblock.h> #include <linux/hugetlb.h> #include <linux/slab.h> diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 0f594d774bf7..1a84a8d00005 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -32,7 +32,7 @@ #include <linux/initrd.h> #include <linux/pagemap.h> #include <linux/suspend.h> -#include <linux/lmb.h> +#include <linux/memblock.h> #include <linux/hugetlb.h> #include <asm/pgalloc.h> @@ -83,13 +83,13 @@ int page_is_ram(unsigned long pfn) #else unsigned long paddr = (pfn << PAGE_SHIFT); int i; - for (i=0; i < lmb.memory.cnt; i++) { + for (i=0; i < memblock.memory.cnt; i++) { unsigned long base; - base = lmb.memory.region[i].base; + base = memblock.memory.region[i].base; if ((paddr >= base) && - (paddr < (base + lmb.memory.region[i].size))) { + (paddr < (base + memblock.memory.region[i].size))) { return 1; } } @@ -142,14 +142,14 @@ int arch_add_memory(int nid, u64 start, u64 size) /* * walk_memory_resource() needs to make sure there is no holes in a given * memory range. PPC64 does not maintain the memory layout in /proc/iomem. - * Instead it maintains it in lmb.memory structures. Walk through the + * Instead it maintains it in memblock.memory structures. Walk through the * memory regions, find holes and callback for contiguous regions. */ int walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages, void *arg, int (*func)(unsigned long, unsigned long, void *)) { - struct lmb_property res; + struct memblock_property res; unsigned long pfn, len; u64 end; int ret = -1; @@ -158,7 +158,7 @@ walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages, res.size = (u64) nr_pages << PAGE_SHIFT; end = res.base + res.size - 1; - while ((res.base < end) && (lmb_find(&res) >= 0)) { + while ((res.base < end) && (memblock_find(&res) >= 0)) { pfn = (unsigned long)(res.base >> PAGE_SHIFT); len = (unsigned long)(res.size >> PAGE_SHIFT); ret = (*func)(pfn, len, arg); @@ -184,8 +184,8 @@ void __init do_init_bootmem(void) unsigned long total_pages; int boot_mapsize; - max_low_pfn = max_pfn = lmb_end_of_DRAM() >> PAGE_SHIFT; - total_pages = (lmb_end_of_DRAM() - memstart_addr) >> PAGE_SHIFT; + max_low_pfn = max_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT; + total_pages = (memblock_end_of_DRAM() - memstart_addr) >> PAGE_SHIFT; #ifdef CONFIG_HIGHMEM total_pages = total_lowmem >> PAGE_SHIFT; max_low_pfn = lowmem_end_addr >> PAGE_SHIFT; @@ -198,16 +198,16 @@ void __init do_init_bootmem(void) */ bootmap_pages = bootmem_bootmap_pages(total_pages); - start = lmb_alloc(bootmap_pages << PAGE_SHIFT, PAGE_SIZE); + start = memblock_alloc(bootmap_pages << PAGE_SHIFT, PAGE_SIZE); min_low_pfn = MEMORY_START >> PAGE_SHIFT; boot_mapsize = init_bootmem_node(NODE_DATA(0), start >> PAGE_SHIFT, min_low_pfn, max_low_pfn); /* Add active regions with valid PFNs */ - for (i = 0; i < lmb.memory.cnt; i++) { + for (i = 0; i < memblock.memory.cnt; i++) { unsigned long start_pfn, end_pfn; - start_pfn = lmb.memory.region[i].base >> PAGE_SHIFT; - end_pfn = start_pfn + lmb_size_pages(&lmb.memory, i); + start_pfn = memblock.memory.region[i].base >> PAGE_SHIFT; + end_pfn = start_pfn + memblock_size_pages(&memblock.memory, i); add_active_range(0, start_pfn, end_pfn); } @@ -218,17 +218,17 @@ void __init do_init_bootmem(void) free_bootmem_with_active_regions(0, lowmem_end_addr >> PAGE_SHIFT); /* reserve the sections we're already using */ - for (i = 0; i < lmb.reserved.cnt; i++) { - unsigned long addr = lmb.reserved.region[i].base + - lmb_size_bytes(&lmb.reserved, i) - 1; + for (i = 0; i < memblock.reserved.cnt; i++) { + unsigned long addr = memblock.reserved.region[i].base + + memblock_size_bytes(&memblock.reserved, i) - 1; if (addr < lowmem_end_addr) - reserve_bootmem(lmb.reserved.region[i].base, - lmb_size_bytes(&lmb.reserved, i), + reserve_bootmem(memblock.reserved.region[i].base, + memblock_size_bytes(&memblock.reserved, i), BOOTMEM_DEFAULT); - else if (lmb.reserved.region[i].base < lowmem_end_addr) { + else if (memblock.reserved.region[i].base < lowmem_end_addr) { unsigned long adjusted_size = lowmem_end_addr - - lmb.reserved.region[i].base; - reserve_bootmem(lmb.reserved.region[i].base, + memblock.reserved.region[i].base; + reserve_bootmem(memblock.reserved.region[i].base, adjusted_size, BOOTMEM_DEFAULT); } } @@ -236,9 +236,9 @@ void __init do_init_bootmem(void) free_bootmem_with_active_regions(0, max_pfn); /* reserve the sections we're already using */ - for (i = 0; i < lmb.reserved.cnt; i++) - reserve_bootmem(lmb.reserved.region[i].base, - lmb_size_bytes(&lmb.reserved, i), + for (i = 0; i < memblock.reserved.cnt; i++) + reserve_bootmem(memblock.reserved.region[i].base, + memblock_size_bytes(&memblock.reserved, i), BOOTMEM_DEFAULT); #endif @@ -251,20 +251,20 @@ void __init do_init_bootmem(void) /* mark pages that don't exist as nosave */ static int __init mark_nonram_nosave(void) { - unsigned long lmb_next_region_start_pfn, - lmb_region_max_pfn; + unsigned long memblock_next_region_start_pfn, + memblock_region_max_pfn; int i; - for (i = 0; i < lmb.memory.cnt - 1; i++) { - lmb_region_max_pfn = - (lmb.memory.region[i].base >> PAGE_SHIFT) + - (lmb.memory.region[i].size >> PAGE_SHIFT); - lmb_next_region_start_pfn = - lmb.memory.region[i+1].base >> PAGE_SHIFT; + for (i = 0; i < memblock.memory.cnt - 1; i++) { + memblock_region_max_pfn = + (memblock.memory.region[i].base >> PAGE_SHIFT) + + (memblock.memory.region[i].size >> PAGE_SHIFT); + memblock_next_region_start_pfn = + memblock.memory.region[i+1].base >> PAGE_SHIFT; - if (lmb_region_max_pfn < lmb_next_region_start_pfn) - register_nosave_region(lmb_region_max_pfn, - lmb_next_region_start_pfn); + if (memblock_region_max_pfn < memblock_next_region_start_pfn) + register_nosave_region(memblock_region_max_pfn, + memblock_next_region_start_pfn); } return 0; @@ -275,8 +275,8 @@ static int __init mark_nonram_nosave(void) */ void __init paging_init(void) { - unsigned long total_ram = lmb_phys_mem_size(); - phys_addr_t top_of_ram = lmb_end_of_DRAM(); + unsigned long total_ram = memblock_phys_mem_size(); + phys_addr_t top_of_ram = memblock_end_of_DRAM(); unsigned long max_zone_pfns[MAX_NR_ZONES]; #ifdef CONFIG_PPC32 @@ -327,7 +327,7 @@ void __init mem_init(void) swiotlb_init(1); #endif - num_physpages = lmb.memory.size >> PAGE_SHIFT; + num_physpages = memblock.memory.size >> PAGE_SHIFT; high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); #ifdef CONFIG_NEED_MULTIPLE_NODES @@ -364,7 +364,7 @@ void __init mem_init(void) highmem_mapnr = lowmem_end_addr >> PAGE_SHIFT; for (pfn = highmem_mapnr; pfn < max_mapnr; ++pfn) { struct page *page = pfn_to_page(pfn); - if (lmb_is_reserved(pfn << PAGE_SHIFT)) + if (memblock_is_reserved(pfn << PAGE_SHIFT)) continue; ClearPageReserved(page); init_page_count(page); diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 80d110635d24..002878ccf90b 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -17,7 +17,7 @@ #include <linux/nodemask.h> #include <linux/cpu.h> #include <linux/notifier.h> -#include <linux/lmb.h> +#include <linux/memblock.h> #include <linux/of.h> #include <linux/pfn.h> #include <asm/sparsemem.h> @@ -42,6 +42,12 @@ EXPORT_SYMBOL(node_data); static int min_common_depth; static int n_mem_addr_cells, n_mem_size_cells; +static int form1_affinity; + +#define MAX_DISTANCE_REF_POINTS 4 +static int distance_ref_points_depth; +static const unsigned int *distance_ref_points; +static int distance_lookup_table[MAX_NUMNODES][MAX_DISTANCE_REF_POINTS]; /* * Allocate node_to_cpumask_map based on number of available nodes @@ -204,6 +210,39 @@ static const u32 *of_get_usable_memory(struct device_node *memory) return prop; } +int __node_distance(int a, int b) +{ + int i; + int distance = LOCAL_DISTANCE; + + if (!form1_affinity) + return distance; + + for (i = 0; i < distance_ref_points_depth; i++) { + if (distance_lookup_table[a][i] == distance_lookup_table[b][i]) + break; + + /* Double the distance for each NUMA level */ + distance *= 2; + } + + return distance; +} + +static void initialize_distance_lookup_table(int nid, + const unsigned int *associativity) +{ + int i; + + if (!form1_affinity) + return; + + for (i = 0; i < distance_ref_points_depth; i++) { + distance_lookup_table[nid][i] = + associativity[distance_ref_points[i]]; + } +} + /* Returns nid in the range [0..MAX_NUMNODES-1], or -1 if no useful numa * info is found. */ @@ -225,6 +264,10 @@ static int of_node_to_nid_single(struct device_node *device) /* POWER4 LPAR uses 0xffff as invalid node */ if (nid == 0xffff || nid >= MAX_NUMNODES) nid = -1; + + if (nid > 0 && tmp[0] >= distance_ref_points_depth) + initialize_distance_lookup_table(nid, tmp); + out: return nid; } @@ -251,26 +294,10 @@ int of_node_to_nid(struct device_node *device) } EXPORT_SYMBOL_GPL(of_node_to_nid); -/* - * In theory, the "ibm,associativity" property may contain multiple - * associativity lists because a resource may be multiply connected - * into the machine. This resource then has different associativity - * characteristics relative to its multiple connections. We ignore - * this for now. We also assume that all cpu and memory sets have - * their distances represented at a common level. This won't be - * true for hierarchical NUMA. - * - * In any case the ibm,associativity-reference-points should give - * the correct depth for a normal NUMA system. - * - * - Dave Hansen <haveblue@us.ibm.com> - */ static int __init find_min_common_depth(void) { - int depth, index; - const unsigned int *ref_points; + int depth; struct device_node *rtas_root; - unsigned int len; struct device_node *chosen; const char *vec5; @@ -280,18 +307,28 @@ static int __init find_min_common_depth(void) return -1; /* - * this property is 2 32-bit integers, each representing a level of - * depth in the associativity nodes. The first is for an SMP - * configuration (should be all 0's) and the second is for a normal - * NUMA configuration. + * This property is a set of 32-bit integers, each representing + * an index into the ibm,associativity nodes. + * + * With form 0 affinity the first integer is for an SMP configuration + * (should be all 0's) and the second is for a normal NUMA + * configuration. We have only one level of NUMA. + * + * With form 1 affinity the first integer is the most significant + * NUMA boundary and the following are progressively less significant + * boundaries. There can be more than one level of NUMA. */ - index = 1; - ref_points = of_get_property(rtas_root, - "ibm,associativity-reference-points", &len); + distance_ref_points = of_get_property(rtas_root, + "ibm,associativity-reference-points", + &distance_ref_points_depth); + + if (!distance_ref_points) { + dbg("NUMA: ibm,associativity-reference-points not found.\n"); + goto err; + } + + distance_ref_points_depth /= sizeof(int); - /* - * For form 1 affinity information we want the first field - */ #define VEC5_AFFINITY_BYTE 5 #define VEC5_AFFINITY 0x80 chosen = of_find_node_by_path("/chosen"); @@ -299,19 +336,38 @@ static int __init find_min_common_depth(void) vec5 = of_get_property(chosen, "ibm,architecture-vec-5", NULL); if (vec5 && (vec5[VEC5_AFFINITY_BYTE] & VEC5_AFFINITY)) { dbg("Using form 1 affinity\n"); - index = 0; + form1_affinity = 1; } } - if ((len >= 2 * sizeof(unsigned int)) && ref_points) { - depth = ref_points[index]; + if (form1_affinity) { + depth = distance_ref_points[0]; } else { - dbg("NUMA: ibm,associativity-reference-points not found.\n"); - depth = -1; + if (distance_ref_points_depth < 2) { + printk(KERN_WARNING "NUMA: " + "short ibm,associativity-reference-points\n"); + goto err; + } + + depth = distance_ref_points[1]; } - of_node_put(rtas_root); + /* + * Warn and cap if the hardware supports more than + * MAX_DISTANCE_REF_POINTS domains. + */ + if (distance_ref_points_depth > MAX_DISTANCE_REF_POINTS) { + printk(KERN_WARNING "NUMA: distance array capped at " + "%d entries\n", MAX_DISTANCE_REF_POINTS); + distance_ref_points_depth = MAX_DISTANCE_REF_POINTS; + } + + of_node_put(rtas_root); return depth; + +err: + of_node_put(rtas_root); + return -1; } static void __init get_n_mem_cells(int *n_addr_cells, int *n_size_cells) @@ -351,7 +407,7 @@ struct of_drconf_cell { #define DRCONF_MEM_RESERVED 0x00000080 /* - * Read the next lmb list entry from the ibm,dynamic-memory property + * Read the next memblock list entry from the ibm,dynamic-memory property * and return the information in the provided of_drconf_cell structure. */ static void read_drconf_cell(struct of_drconf_cell *drmem, const u32 **cellp) @@ -372,8 +428,8 @@ static void read_drconf_cell(struct of_drconf_cell *drmem, const u32 **cellp) /* * Retreive and validate the ibm,dynamic-memory property of the device tree. * - * The layout of the ibm,dynamic-memory property is a number N of lmb - * list entries followed by N lmb list entries. Each lmb list entry + * The layout of the ibm,dynamic-memory property is a number N of memblock + * list entries followed by N memblock list entries. Each memblock list entry * contains information as layed out in the of_drconf_cell struct above. */ static int of_get_drconf_memory(struct device_node *memory, const u32 **dm) @@ -540,19 +596,19 @@ static unsigned long __init numa_enforce_memory_limit(unsigned long start, unsigned long size) { /* - * We use lmb_end_of_DRAM() in here instead of memory_limit because + * We use memblock_end_of_DRAM() in here instead of memory_limit because * we've already adjusted it for the limit and it takes care of * having memory holes below the limit. Also, in the case of * iommu_is_off, memory_limit is not set but is implicitly enforced. */ - if (start + size <= lmb_end_of_DRAM()) + if (start + size <= memblock_end_of_DRAM()) return size; - if (start >= lmb_end_of_DRAM()) + if (start >= memblock_end_of_DRAM()) return 0; - return lmb_end_of_DRAM() - start; + return memblock_end_of_DRAM() - start; } /* @@ -731,7 +787,7 @@ new_range: } /* - * Now do the same thing for each LMB listed in the ibm,dynamic-memory + * Now do the same thing for each MEMBLOCK listed in the ibm,dynamic-memory * property in the ibm,dynamic-reconfiguration-memory node. */ memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); @@ -743,8 +799,8 @@ new_range: static void __init setup_nonnuma(void) { - unsigned long top_of_ram = lmb_end_of_DRAM(); - unsigned long total_ram = lmb_phys_mem_size(); + unsigned long top_of_ram = memblock_end_of_DRAM(); + unsigned long total_ram = memblock_phys_mem_size(); unsigned long start_pfn, end_pfn; unsigned int i, nid = 0; @@ -753,9 +809,9 @@ static void __init setup_nonnuma(void) printk(KERN_DEBUG "Memory hole size: %ldMB\n", (top_of_ram - total_ram) >> 20); - for (i = 0; i < lmb.memory.cnt; ++i) { - start_pfn = lmb.memory.region[i].base >> PAGE_SHIFT; - end_pfn = start_pfn + lmb_size_pages(&lmb.memory, i); + for (i = 0; i < memblock.memory.cnt; ++i) { + start_pfn = memblock.memory.region[i].base >> PAGE_SHIFT; + end_pfn = start_pfn + memblock_size_pages(&memblock.memory, i); fake_numa_create_new_node(end_pfn, &nid); add_active_range(nid, start_pfn, end_pfn); @@ -813,7 +869,7 @@ static void __init dump_numa_memory_topology(void) count = 0; - for (i = 0; i < lmb_end_of_DRAM(); + for (i = 0; i < memblock_end_of_DRAM(); i += (1 << SECTION_SIZE_BITS)) { if (early_pfn_to_nid(i >> PAGE_SHIFT) == node) { if (count == 0) @@ -833,7 +889,7 @@ static void __init dump_numa_memory_topology(void) } /* - * Allocate some memory, satisfying the lmb or bootmem allocator where + * Allocate some memory, satisfying the memblock or bootmem allocator where * required. nid is the preferred node and end is the physical address of * the highest address in the node. * @@ -847,11 +903,11 @@ static void __init *careful_zallocation(int nid, unsigned long size, int new_nid; unsigned long ret_paddr; - ret_paddr = __lmb_alloc_base(size, align, end_pfn << PAGE_SHIFT); + ret_paddr = __memblock_alloc_base(size, align, end_pfn << PAGE_SHIFT); /* retry over all memory */ if (!ret_paddr) - ret_paddr = __lmb_alloc_base(size, align, lmb_end_of_DRAM()); + ret_paddr = __memblock_alloc_base(size, align, memblock_end_of_DRAM()); if (!ret_paddr) panic("numa.c: cannot allocate %lu bytes for node %d", @@ -861,14 +917,14 @@ static void __init *careful_zallocation(int nid, unsigned long size, /* * We initialize the nodes in numeric order: 0, 1, 2... - * and hand over control from the LMB allocator to the + * and hand over control from the MEMBLOCK allocator to the * bootmem allocator. If this function is called for * node 5, then we know that all nodes <5 are using the - * bootmem allocator instead of the LMB allocator. + * bootmem allocator instead of the MEMBLOCK allocator. * * So, check the nid from which this allocation came * and double check to see if we need to use bootmem - * instead of the LMB. We don't free the LMB memory + * instead of the MEMBLOCK. We don't free the MEMBLOCK memory * since it would be useless. */ new_nid = early_pfn_to_nid(ret_paddr >> PAGE_SHIFT); @@ -893,9 +949,9 @@ static void mark_reserved_regions_for_nid(int nid) struct pglist_data *node = NODE_DATA(nid); int i; - for (i = 0; i < lmb.reserved.cnt; i++) { - unsigned long physbase = lmb.reserved.region[i].base; - unsigned long size = lmb.reserved.region[i].size; + for (i = 0; i < memblock.reserved.cnt; i++) { + unsigned long physbase = memblock.reserved.region[i].base; + unsigned long size = memblock.reserved.region[i].size; unsigned long start_pfn = physbase >> PAGE_SHIFT; unsigned long end_pfn = PFN_UP(physbase + size); struct node_active_region node_ar; @@ -903,7 +959,7 @@ static void mark_reserved_regions_for_nid(int nid) node->node_spanned_pages; /* - * Check to make sure that this lmb.reserved area is + * Check to make sure that this memblock.reserved area is * within the bounds of the node that we care about. * Checking the nid of the start and end points is not * sufficient because the reserved area could span the @@ -961,7 +1017,7 @@ void __init do_init_bootmem(void) int nid; min_low_pfn = 0; - max_low_pfn = lmb_end_of_DRAM() >> PAGE_SHIFT; + max_low_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT; max_pfn = max_low_pfn; if (parse_numa_properties()) @@ -1038,7 +1094,7 @@ void __init paging_init(void) { unsigned long max_zone_pfns[MAX_NR_ZONES]; memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); - max_zone_pfns[ZONE_DMA] = lmb_end_of_DRAM() >> PAGE_SHIFT; + max_zone_pfns[ZONE_DMA] = memblock_end_of_DRAM() >> PAGE_SHIFT; free_area_init_nodes(max_zone_pfns); } @@ -1113,7 +1169,7 @@ static int hot_add_drconf_scn_to_nid(struct device_node *memory, /* * Find the node associated with a hot added memory section for memory * represented in the device tree as a node (i.e. memory@XXXX) for - * each lmb. + * each memblock. */ int hot_add_node_scn_to_nid(unsigned long scn_addr) { @@ -1154,8 +1210,8 @@ int hot_add_node_scn_to_nid(unsigned long scn_addr) /* * Find the node associated with a hot added memory section. Section - * corresponds to a SPARSEMEM section, not an LMB. It is assumed that - * sections are fully contained within a single LMB. + * corresponds to a SPARSEMEM section, not an MEMBLOCK. It is assumed that + * sections are fully contained within a single MEMBLOCK. */ int hot_add_scn_to_nid(unsigned long scn_addr) { diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index ebc2f38eb381..2c7e801ab20b 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -92,7 +92,6 @@ static void pte_free_rcu_callback(struct rcu_head *head) static void pte_free_submit(struct pte_freelist_batch *batch) { - INIT_RCU_HEAD(&batch->rcu); call_rcu(&batch->rcu, pte_free_rcu_callback); } diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 9fc02dc72ce9..a87ead0138b4 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -26,7 +26,7 @@ #include <linux/vmalloc.h> #include <linux/init.h> #include <linux/highmem.h> -#include <linux/lmb.h> +#include <linux/memblock.h> #include <linux/slab.h> #include <asm/pgtable.h> @@ -115,11 +115,7 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) { struct page *ptepage; -#ifdef CONFIG_HIGHPTE - gfp_t flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_REPEAT | __GFP_ZERO; -#else gfp_t flags = GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO; -#endif ptepage = alloc_pages(flags, 0); if (!ptepage) @@ -202,7 +198,7 @@ __ioremap_caller(phys_addr_t addr, unsigned long size, unsigned long flags, * mem_init() sets high_memory so only do the check after that. */ if (mem_init_done && (p < virt_to_phys(high_memory)) && - !(__allow_ioremap_reserved && lmb_is_region_reserved(p, size))) { + !(__allow_ioremap_reserved && memblock_is_region_reserved(p, size))) { printk("__ioremap(): phys addr 0x%llx is RAM lr %p\n", (unsigned long long)p, __builtin_return_address(0)); return NULL; @@ -335,7 +331,7 @@ void __init mapin_ram(void) s = mmu_mapin_ram(top); __mapin_ram_chunk(s, top); - top = lmb_end_of_DRAM(); + top = memblock_end_of_DRAM(); s = wii_mmu_mapin_mem2(top); __mapin_ram_chunk(s, top); } diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index d050fc8d9714..21d6dfab7942 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -34,7 +34,7 @@ #include <linux/vmalloc.h> #include <linux/init.h> #include <linux/bootmem.h> -#include <linux/lmb.h> +#include <linux/memblock.h> #include <linux/slab.h> #include <asm/pgalloc.h> @@ -67,7 +67,7 @@ static void *early_alloc_pgtable(unsigned long size) if (init_bootmem_done) pt = __alloc_bootmem(size, size, __pa(MAX_DMA_ADDRESS)); else - pt = __va(lmb_alloc_base(size, size, + pt = __va(memblock_alloc_base(size, size, __pa(MAX_DMA_ADDRESS))); memset(pt, 0, size); diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c index f11c2cdcb0fe..f8a01829d64f 100644 --- a/arch/powerpc/mm/ppc_mmu_32.c +++ b/arch/powerpc/mm/ppc_mmu_32.c @@ -26,7 +26,7 @@ #include <linux/mm.h> #include <linux/init.h> #include <linux/highmem.h> -#include <linux/lmb.h> +#include <linux/memblock.h> #include <asm/prom.h> #include <asm/mmu.h> @@ -223,7 +223,7 @@ void __init MMU_init_hw(void) * Find some memory for the hash table. */ if ( ppc_md.progress ) ppc_md.progress("hash:find piece", 0x322); - Hash = __va(lmb_alloc_base(Hash_size, Hash_size, + Hash = __va(memblock_alloc_base(Hash_size, Hash_size, __initial_memory_limit_addr)); cacheable_memzero(Hash, Hash_size); _SDR1 = __pa(Hash) | SDR1_LOW_BITS; diff --git a/arch/powerpc/mm/stab.c b/arch/powerpc/mm/stab.c index 687fddaa24c5..446a01842a73 100644 --- a/arch/powerpc/mm/stab.c +++ b/arch/powerpc/mm/stab.c @@ -12,7 +12,7 @@ * 2 of the License, or (at your option) any later version. */ -#include <linux/lmb.h> +#include <linux/memblock.h> #include <asm/pgtable.h> #include <asm/mmu.h> @@ -252,7 +252,7 @@ void __init stabs_alloc(void) if (cpu == 0) continue; /* stab for CPU 0 is statically allocated */ - newstab = lmb_alloc_base(HW_PAGE_SIZE, HW_PAGE_SIZE, + newstab = memblock_alloc_base(HW_PAGE_SIZE, HW_PAGE_SIZE, 1<<SID_SHIFT); newstab = (unsigned long)__va(newstab); diff --git a/arch/powerpc/mm/tlb_hash32.c b/arch/powerpc/mm/tlb_hash32.c index 8aaa8b7eb324..690566b66e8e 100644 --- a/arch/powerpc/mm/tlb_hash32.c +++ b/arch/powerpc/mm/tlb_hash32.c @@ -89,17 +89,6 @@ void tlb_flush(struct mmu_gather *tlb) * -- Cort */ -/* - * 750 SMP is a Bad Idea because the 750 doesn't broadcast all - * the cache operations on the bus. Hence we need to use an IPI - * to get the other CPU(s) to invalidate their TLBs. - */ -#ifdef CONFIG_SMP_750 -#define FINISH_FLUSH smp_send_tlb_invalidate(0) -#else -#define FINISH_FLUSH do { } while (0) -#endif - static void flush_range(struct mm_struct *mm, unsigned long start, unsigned long end) { @@ -138,7 +127,6 @@ static void flush_range(struct mm_struct *mm, unsigned long start, void flush_tlb_kernel_range(unsigned long start, unsigned long end) { flush_range(&init_mm, start, end); - FINISH_FLUSH; } EXPORT_SYMBOL(flush_tlb_kernel_range); @@ -162,7 +150,6 @@ void flush_tlb_mm(struct mm_struct *mm) */ for (mp = mm->mmap; mp != NULL; mp = mp->vm_next) flush_range(mp->vm_mm, mp->vm_start, mp->vm_end); - FINISH_FLUSH; } EXPORT_SYMBOL(flush_tlb_mm); @@ -179,7 +166,6 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) pmd = pmd_offset(pud_offset(pgd_offset(mm, vmaddr), vmaddr), vmaddr); if (!pmd_none(*pmd)) flush_hash_pages(mm->context.id, vmaddr, pmd_val(*pmd), 1); - FINISH_FLUSH; } EXPORT_SYMBOL(flush_tlb_page); @@ -192,6 +178,5 @@ void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { flush_range(vma->vm_mm, start, end); - FINISH_FLUSH; } EXPORT_SYMBOL(flush_tlb_range); diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c index e81d5d67f834..fe391e942521 100644 --- a/arch/powerpc/mm/tlb_nohash.c +++ b/arch/powerpc/mm/tlb_nohash.c @@ -34,7 +34,7 @@ #include <linux/pagemap.h> #include <linux/preempt.h> #include <linux/spinlock.h> -#include <linux/lmb.h> +#include <linux/memblock.h> #include <asm/tlbflush.h> #include <asm/tlb.h> @@ -46,6 +46,7 @@ struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = { [MMU_PAGE_4K] = { .shift = 12, + .ind = 20, .enc = BOOK3E_PAGESZ_4K, }, [MMU_PAGE_16K] = { @@ -54,6 +55,7 @@ struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = { }, [MMU_PAGE_64K] = { .shift = 16, + .ind = 28, .enc = BOOK3E_PAGESZ_64K, }, [MMU_PAGE_1M] = { @@ -62,6 +64,7 @@ struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = { }, [MMU_PAGE_16M] = { .shift = 24, + .ind = 36, .enc = BOOK3E_PAGESZ_16M, }, [MMU_PAGE_256M] = { @@ -344,16 +347,108 @@ void tlb_flush_pgtable(struct mmu_gather *tlb, unsigned long address) } } -/* - * Early initialization of the MMU TLB code - */ -static void __early_init_mmu(int boot_cpu) +static void setup_page_sizes(void) +{ + unsigned int tlb0cfg = mfspr(SPRN_TLB0CFG); + unsigned int tlb0ps = mfspr(SPRN_TLB0PS); + unsigned int eptcfg = mfspr(SPRN_EPTCFG); + int i, psize; + + /* Look for supported direct sizes */ + for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { + struct mmu_psize_def *def = &mmu_psize_defs[psize]; + + if (tlb0ps & (1U << (def->shift - 10))) + def->flags |= MMU_PAGE_SIZE_DIRECT; + } + + /* Indirect page sizes supported ? */ + if ((tlb0cfg & TLBnCFG_IND) == 0) + goto no_indirect; + + /* Now, we only deal with one IND page size for each + * direct size. Hopefully all implementations today are + * unambiguous, but we might want to be careful in the + * future. + */ + for (i = 0; i < 3; i++) { + unsigned int ps, sps; + + sps = eptcfg & 0x1f; + eptcfg >>= 5; + ps = eptcfg & 0x1f; + eptcfg >>= 5; + if (!ps || !sps) + continue; + for (psize = 0; psize < MMU_PAGE_COUNT; psize++) { + struct mmu_psize_def *def = &mmu_psize_defs[psize]; + + if (ps == (def->shift - 10)) + def->flags |= MMU_PAGE_SIZE_INDIRECT; + if (sps == (def->shift - 10)) + def->ind = ps + 10; + } + } + no_indirect: + + /* Cleanup array and print summary */ + pr_info("MMU: Supported page sizes\n"); + for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { + struct mmu_psize_def *def = &mmu_psize_defs[psize]; + const char *__page_type_names[] = { + "unsupported", + "direct", + "indirect", + "direct & indirect" + }; + if (def->flags == 0) { + def->shift = 0; + continue; + } + pr_info(" %8ld KB as %s\n", 1ul << (def->shift - 10), + __page_type_names[def->flags & 0x3]); + } +} + +static void setup_mmu_htw(void) { extern unsigned int interrupt_base_book3e; extern unsigned int exc_data_tlb_miss_htw_book3e; extern unsigned int exc_instruction_tlb_miss_htw_book3e; unsigned int *ibase = &interrupt_base_book3e; + + /* Check if HW tablewalk is present, and if yes, enable it by: + * + * - patching the TLB miss handlers to branch to the + * one dedicates to it + * + * - setting the global book3e_htw_enabled + */ + unsigned int tlb0cfg = mfspr(SPRN_TLB0CFG); + + if ((tlb0cfg & TLBnCFG_IND) && + (tlb0cfg & TLBnCFG_PT)) { + /* Our exceptions vectors start with a NOP and -then- a branch + * to deal with single stepping from userspace which stops on + * the second instruction. Thus we need to patch the second + * instruction of the exception, not the first one + */ + patch_branch(ibase + (0x1c0 / 4) + 1, + (unsigned long)&exc_data_tlb_miss_htw_book3e, 0); + patch_branch(ibase + (0x1e0 / 4) + 1, + (unsigned long)&exc_instruction_tlb_miss_htw_book3e, 0); + book3e_htw_enabled = 1; + } + pr_info("MMU: Book3E Page Tables %s\n", + book3e_htw_enabled ? "Enabled" : "Disabled"); +} + +/* + * Early initialization of the MMU TLB code + */ +static void __early_init_mmu(int boot_cpu) +{ unsigned int mas4; /* XXX This will have to be decided at runtime, but right @@ -370,35 +465,17 @@ static void __early_init_mmu(int boot_cpu) */ mmu_vmemmap_psize = MMU_PAGE_16M; - /* Check if HW tablewalk is present, and if yes, enable it by: - * - * - patching the TLB miss handlers to branch to the - * one dedicates to it - * - * - setting the global book3e_htw_enabled - * - * - Set MAS4:INDD and default page size - */ - /* XXX This code only checks for TLB 0 capabilities and doesn't * check what page size combos are supported by the HW. It * also doesn't handle the case where a separate array holds * the IND entries from the array loaded by the PT. */ if (boot_cpu) { - unsigned int tlb0cfg = mfspr(SPRN_TLB0CFG); + /* Look for supported page sizes */ + setup_page_sizes(); - /* Check if HW loader is supported */ - if ((tlb0cfg & TLBnCFG_IND) && - (tlb0cfg & TLBnCFG_PT)) { - patch_branch(ibase + (0x1c0 / 4), - (unsigned long)&exc_data_tlb_miss_htw_book3e, 0); - patch_branch(ibase + (0x1e0 / 4), - (unsigned long)&exc_instruction_tlb_miss_htw_book3e, 0); - book3e_htw_enabled = 1; - } - pr_info("MMU: Book3E Page Tables %s\n", - book3e_htw_enabled ? "Enabled" : "Disabled"); + /* Look for HW tablewalk support */ + setup_mmu_htw(); } /* Set MAS4 based on page table setting */ @@ -426,7 +503,7 @@ static void __early_init_mmu(int boot_cpu) /* Set the global containing the top of the linear mapping * for use by the TLB miss code */ - linear_map_top = lmb_end_of_DRAM(); + linear_map_top = memblock_end_of_DRAM(); /* A sync won't hurt us after mucking around with * the MMU configuration |