From 32d206eb5637d8cf73d9c70f7680de2a7193ce8b Mon Sep 17 00:00:00 2001 From: Kumar Gala Date: Thu, 19 May 2011 20:09:28 +0000 Subject: powerpc/book3e: Clarify HW table walk enable/disable message Before if we didn't support or enable HW table walk we'd get a messaage like: MMU: Book3E Page Tables Disabled Which is a bit misleading. Now it will say: MMU: Book3E HW tablewalk not supported Signed-off-by: Kumar Gala Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/mm/tlb_nohash.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c index 0bdad3aecc67..569349916471 100644 --- a/arch/powerpc/mm/tlb_nohash.c +++ b/arch/powerpc/mm/tlb_nohash.c @@ -473,8 +473,8 @@ static void setup_mmu_htw(void) (unsigned long)&exc_instruction_tlb_miss_htw_book3e, 0); book3e_htw_enabled = 1; } - pr_info("MMU: Book3E Page Tables %s\n", - book3e_htw_enabled ? "Enabled" : "Disabled"); + pr_info("MMU: Book3E HW tablewalk %s\n", + book3e_htw_enabled ? "enabled" : "not supported"); } /* -- cgit v1.2.3 From f67f4ef5fcdfdeeddcb0ed4ab2c85d9bb4185d5f Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Wed, 22 Jun 2011 11:25:42 +0000 Subject: powerpc/book3e-64: use a separate TLB handler when linear map is bolted On MMUs such as FSL where we can guarantee the entire linear mapping is bolted, we don't need to worry about linear TLB misses. If on top of that we do a full table walk, we get rid of all recursive TLB faults, and can dispense with some state saving. This gains a few percent on TLB-miss-heavy workloads, and around 50% on a benchmark that had a high rate of virtual page table faults under the normal handler. While touching the EX_TLB layout, remove EX_TLB_MMUCR0, EX_TLB_SRR0, and EX_TLB_SRR1 as they're not used. [BenH: Fixed build with 64K pages (wsp config)] Signed-off-by: Scott Wood Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/exception-64e.h | 52 +++++--- arch/powerpc/include/asm/paca.h | 7 +- arch/powerpc/mm/tlb_low_64e.S | 206 +++++++++++++++++++++++++++++++ arch/powerpc/mm/tlb_nohash.c | 35 ++++-- 4 files changed, 266 insertions(+), 34 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/include/asm/exception-64e.h b/arch/powerpc/include/asm/exception-64e.h index 6d53f311d942..ac13addb8495 100644 --- a/arch/powerpc/include/asm/exception-64e.h +++ b/arch/powerpc/include/asm/exception-64e.h @@ -48,30 +48,33 @@ #define EX_R14 (4 * 8) #define EX_R15 (5 * 8) -/* The TLB miss exception uses different slots */ +/* + * The TLB miss exception uses different slots. + * + * The bolted variant uses only the first six fields, + * which in combination with pgd and kernel_pgd fits in + * one 64-byte cache line. + */ #define EX_TLB_R10 ( 0 * 8) #define EX_TLB_R11 ( 1 * 8) -#define EX_TLB_R12 ( 2 * 8) -#define EX_TLB_R13 ( 3 * 8) -#define EX_TLB_R14 ( 4 * 8) -#define EX_TLB_R15 ( 5 * 8) -#define EX_TLB_R16 ( 6 * 8) -#define EX_TLB_CR ( 7 * 8) +#define EX_TLB_R14 ( 2 * 8) +#define EX_TLB_R15 ( 3 * 8) +#define EX_TLB_R16 ( 4 * 8) +#define EX_TLB_CR ( 5 * 8) +#define EX_TLB_R12 ( 6 * 8) +#define EX_TLB_R13 ( 7 * 8) #define EX_TLB_DEAR ( 8 * 8) /* Level 0 and 2 only */ #define EX_TLB_ESR ( 9 * 8) /* Level 0 and 2 only */ #define EX_TLB_SRR0 (10 * 8) #define EX_TLB_SRR1 (11 * 8) -#define EX_TLB_MMUCR0 (12 * 8) /* Level 0 */ -#define EX_TLB_MAS1 (12 * 8) /* Level 0 */ -#define EX_TLB_MAS2 (13 * 8) /* Level 0 */ #ifdef CONFIG_BOOK3E_MMU_TLB_STATS -#define EX_TLB_R8 (14 * 8) -#define EX_TLB_R9 (15 * 8) -#define EX_TLB_LR (16 * 8) -#define EX_TLB_SIZE (17 * 8) +#define EX_TLB_R8 (12 * 8) +#define EX_TLB_R9 (13 * 8) +#define EX_TLB_LR (14 * 8) +#define EX_TLB_SIZE (15 * 8) #else -#define EX_TLB_SIZE (14 * 8) +#define EX_TLB_SIZE (12 * 8) #endif #define START_EXCEPTION(label) \ @@ -168,6 +171,16 @@ exc_##label##_book3e: ld r9,EX_TLB_R9(r12); \ ld r8,EX_TLB_R8(r12); \ mtlr r16; +#define TLB_MISS_PROLOG_STATS_BOLTED \ + mflr r10; \ + std r8,PACA_EXTLB+EX_TLB_R8(r13); \ + std r9,PACA_EXTLB+EX_TLB_R9(r13); \ + std r10,PACA_EXTLB+EX_TLB_LR(r13); +#define TLB_MISS_RESTORE_STATS_BOLTED \ + ld r16,PACA_EXTLB+EX_TLB_LR(r13); \ + ld r9,PACA_EXTLB+EX_TLB_R9(r13); \ + ld r8,PACA_EXTLB+EX_TLB_R8(r13); \ + mtlr r16; #define TLB_MISS_STATS_D(name) \ addi r9,r13,MMSTAT_DSTATS+name; \ bl .tlb_stat_inc; @@ -183,17 +196,20 @@ exc_##label##_book3e: 61: addi r9,r13,MMSTAT_ISTATS+name; \ 62: bl .tlb_stat_inc; #define TLB_MISS_STATS_SAVE_INFO \ - std r14,EX_TLB_ESR(r12); /* save ESR */ \ - - + std r14,EX_TLB_ESR(r12); /* save ESR */ +#define TLB_MISS_STATS_SAVE_INFO_BOLTED \ + std r14,PACA_EXTLB+EX_TLB_ESR(r13); /* save ESR */ #else #define TLB_MISS_PROLOG_STATS #define TLB_MISS_RESTORE_STATS +#define TLB_MISS_PROLOG_STATS_BOLTED +#define TLB_MISS_RESTORE_STATS_BOLTED #define TLB_MISS_STATS_D(name) #define TLB_MISS_STATS_I(name) #define TLB_MISS_STATS_X(name) #define TLB_MISS_STATS_Y(name) #define TLB_MISS_STATS_SAVE_INFO +#define TLB_MISS_STATS_SAVE_INFO_BOLTED #endif #define SET_IVOR(vector_number, vector_offset) \ diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 74126765106a..c1f65f597920 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -103,11 +103,12 @@ struct paca_struct { #endif /* CONFIG_PPC_STD_MMU_64 */ #ifdef CONFIG_PPC_BOOK3E - pgd_t *pgd; /* Current PGD */ - pgd_t *kernel_pgd; /* Kernel PGD */ u64 exgen[8] __attribute__((aligned(0x80))); + /* Keep pgd in the same cacheline as the start of extlb */ + pgd_t *pgd __attribute__((aligned(0x80))); /* Current PGD */ + pgd_t *kernel_pgd; /* Kernel PGD */ /* We can have up to 3 levels of reentrancy in the TLB miss handler */ - u64 extlb[3][EX_TLB_SIZE / sizeof(u64)] __attribute__((aligned(0x80))); + u64 extlb[3][EX_TLB_SIZE / sizeof(u64)]; u64 exmc[8]; /* used for machine checks */ u64 excrit[8]; /* used for crit interrupts */ u64 exdbg[8]; /* used for debug interrupts */ diff --git a/arch/powerpc/mm/tlb_low_64e.S b/arch/powerpc/mm/tlb_low_64e.S index af0892209417..4ebb34bc01d6 100644 --- a/arch/powerpc/mm/tlb_low_64e.S +++ b/arch/powerpc/mm/tlb_low_64e.S @@ -30,6 +30,212 @@ #define VPTE_PGD_SHIFT (VPTE_PUD_SHIFT + PUD_INDEX_SIZE) #define VPTE_INDEX_SIZE (VPTE_PGD_SHIFT + PGD_INDEX_SIZE) +/********************************************************************** + * * + * TLB miss handling for Book3E with a bolted linear mapping * + * No virtual page table, no nested TLB misses * + * * + **********************************************************************/ + +.macro tlb_prolog_bolted addr + mtspr SPRN_SPRG_TLB_SCRATCH,r13 + mfspr r13,SPRN_SPRG_PACA + std r10,PACA_EXTLB+EX_TLB_R10(r13) + mfcr r10 + std r11,PACA_EXTLB+EX_TLB_R11(r13) + std r16,PACA_EXTLB+EX_TLB_R16(r13) + mfspr r16,\addr /* get faulting address */ + std r14,PACA_EXTLB+EX_TLB_R14(r13) + ld r14,PACAPGD(r13) + std r15,PACA_EXTLB+EX_TLB_R15(r13) + std r10,PACA_EXTLB+EX_TLB_CR(r13) + TLB_MISS_PROLOG_STATS_BOLTED +.endm + +.macro tlb_epilog_bolted + ld r14,PACA_EXTLB+EX_TLB_CR(r13) + ld r10,PACA_EXTLB+EX_TLB_R10(r13) + ld r11,PACA_EXTLB+EX_TLB_R11(r13) + mtcr r14 + ld r14,PACA_EXTLB+EX_TLB_R14(r13) + ld r15,PACA_EXTLB+EX_TLB_R15(r13) + TLB_MISS_RESTORE_STATS_BOLTED + ld r16,PACA_EXTLB+EX_TLB_R16(r13) + mfspr r13,SPRN_SPRG_TLB_SCRATCH +.endm + +/* Data TLB miss */ + START_EXCEPTION(data_tlb_miss_bolted) + tlb_prolog_bolted SPRN_DEAR + + /* We need _PAGE_PRESENT and _PAGE_ACCESSED set */ + + /* We do the user/kernel test for the PID here along with the RW test + */ + /* We pre-test some combination of permissions to avoid double + * faults: + * + * We move the ESR:ST bit into the position of _PAGE_BAP_SW in the PTE + * ESR_ST is 0x00800000 + * _PAGE_BAP_SW is 0x00000010 + * So the shift is >> 19. This tests for supervisor writeability. + * If the page happens to be supervisor writeable and not user + * writeable, we will take a new fault later, but that should be + * a rare enough case. + * + * We also move ESR_ST in _PAGE_DIRTY position + * _PAGE_DIRTY is 0x00001000 so the shift is >> 11 + * + * MAS1 is preset for all we need except for TID that needs to + * be cleared for kernel translations + */ + + mfspr r11,SPRN_ESR + + srdi r15,r16,60 /* get region */ + rldicl. r10,r16,64-PGTABLE_EADDR_SIZE,PGTABLE_EADDR_SIZE+4 + bne- dtlb_miss_fault_bolted + + rlwinm r10,r11,32-19,27,27 + rlwimi r10,r11,32-16,19,19 + cmpwi r15,0 + ori r10,r10,_PAGE_PRESENT + oris r11,r10,_PAGE_ACCESSED@h + + TLB_MISS_STATS_SAVE_INFO_BOLTED + bne tlb_miss_kernel_bolted + +tlb_miss_common_bolted: +/* + * This is the guts of the TLB miss handler for bolted-linear. + * We are entered with: + * + * r16 = faulting address + * r15 = crap (free to use) + * r14 = page table base + * r13 = PACA + * r11 = PTE permission mask + * r10 = crap (free to use) + */ + rldicl r15,r16,64-PGDIR_SHIFT+3,64-PGD_INDEX_SIZE-3 + cmpldi cr0,r14,0 + clrrdi r15,r15,3 + beq tlb_miss_fault_bolted + +BEGIN_MMU_FTR_SECTION + /* Set the TLB reservation and search for existing entry. Then load + * the entry. + */ + PPC_TLBSRX_DOT(0,r16) + ldx r14,r14,r15 + beq normal_tlb_miss_done +MMU_FTR_SECTION_ELSE + ldx r14,r14,r15 +ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_USE_TLBRSRV) + +#ifndef CONFIG_PPC_64K_PAGES + rldicl r15,r16,64-PUD_SHIFT+3,64-PUD_INDEX_SIZE-3 + clrrdi r15,r15,3 + + cmpldi cr0,r14,0 + beq tlb_miss_fault_bolted + + ldx r14,r14,r15 +#endif /* CONFIG_PPC_64K_PAGES */ + + rldicl r15,r16,64-PMD_SHIFT+3,64-PMD_INDEX_SIZE-3 + clrrdi r15,r15,3 + + cmpldi cr0,r14,0 + beq tlb_miss_fault_bolted + + ldx r14,r14,r15 + + rldicl r15,r16,64-PAGE_SHIFT+3,64-PTE_INDEX_SIZE-3 + clrrdi r15,r15,3 + + cmpldi cr0,r14,0 + beq tlb_miss_fault_bolted + + ldx r14,r14,r15 + + /* Check if required permissions are met */ + andc. r15,r11,r14 + rldicr r15,r14,64-(PTE_RPN_SHIFT-PAGE_SHIFT),63-PAGE_SHIFT + bne- tlb_miss_fault_bolted + + /* Now we build the MAS: + * + * MAS 0 : Fully setup with defaults in MAS4 and TLBnCFG + * MAS 1 : Almost fully setup + * - PID already updated by caller if necessary + * - TSIZE need change if !base page size, not + * yet implemented for now + * MAS 2 : Defaults not useful, need to be redone + * MAS 3+7 : Needs to be done + */ + clrrdi r11,r16,12 /* Clear low crap in EA */ + clrldi r15,r15,12 /* Clear crap at the top */ + rlwimi r11,r14,32-19,27,31 /* Insert WIMGE */ + rlwimi r15,r14,32-8,22,25 /* Move in U bits */ + mtspr SPRN_MAS2,r11 + andi. r11,r14,_PAGE_DIRTY + rlwimi r15,r14,32-2,26,31 /* Move in BAP bits */ + + /* Mask out SW and UW if !DIRTY (XXX optimize this !) */ + bne 1f + li r11,MAS3_SW|MAS3_UW + andc r15,r15,r11 +1: + mtspr SPRN_MAS7_MAS3,r15 + tlbwe + + TLB_MISS_STATS_X(MMSTAT_TLB_MISS_NORM_OK) + tlb_epilog_bolted + rfi + +itlb_miss_kernel_bolted: + li r11,_PAGE_PRESENT|_PAGE_BAP_SX /* Base perm */ + oris r11,r11,_PAGE_ACCESSED@h +tlb_miss_kernel_bolted: + mfspr r10,SPRN_MAS1 + ld r14,PACA_KERNELPGD(r13) + cmpldi cr0,r15,8 /* Check for vmalloc region */ + rlwinm r10,r10,0,16,1 /* Clear TID */ + mtspr SPRN_MAS1,r10 + beq+ tlb_miss_common_bolted + +tlb_miss_fault_bolted: + /* We need to check if it was an instruction miss */ + andi. r10,r11,_PAGE_EXEC|_PAGE_BAP_SX + bne itlb_miss_fault_bolted +dtlb_miss_fault_bolted: + TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT) + tlb_epilog_bolted + b exc_data_storage_book3e +itlb_miss_fault_bolted: + TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT) + tlb_epilog_bolted + b exc_instruction_storage_book3e + +/* Instruction TLB miss */ + START_EXCEPTION(instruction_tlb_miss_bolted) + tlb_prolog_bolted SPRN_SRR0 + + rldicl. r10,r16,64-PGTABLE_EADDR_SIZE,PGTABLE_EADDR_SIZE+4 + srdi r15,r16,60 /* get region */ + TLB_MISS_STATS_SAVE_INFO_BOLTED + bne- itlb_miss_fault_bolted + + li r11,_PAGE_PRESENT|_PAGE_EXEC /* Base perm */ + + /* We do the user/kernel test for the PID here along with the RW test + */ + + cmpldi cr0,r15,0 /* Check for user region */ + oris r11,r11,_PAGE_ACCESSED@h + beq tlb_miss_common_bolted + b itlb_miss_kernel_bolted /********************************************************************** * * diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c index 569349916471..3722185d1865 100644 --- a/arch/powerpc/mm/tlb_nohash.c +++ b/arch/powerpc/mm/tlb_nohash.c @@ -443,14 +443,27 @@ static void setup_page_sizes(void) } } -static void setup_mmu_htw(void) +static void __patch_exception(int exc, unsigned long addr) { extern unsigned int interrupt_base_book3e; - extern unsigned int exc_data_tlb_miss_htw_book3e; - extern unsigned int exc_instruction_tlb_miss_htw_book3e; + unsigned int *ibase = &interrupt_base_book3e; + + /* Our exceptions vectors start with a NOP and -then- a branch + * to deal with single stepping from userspace which stops on + * the second instruction. Thus we need to patch the second + * instruction of the exception, not the first one + */ + + patch_branch(ibase + (exc / 4) + 1, addr, 0); +} - unsigned int *ibase = &interrupt_base_book3e; +#define patch_exception(exc, name) do { \ + extern unsigned int name; \ + __patch_exception((exc), (unsigned long)&name); \ +} while (0) +static void setup_mmu_htw(void) +{ /* Check if HW tablewalk is present, and if yes, enable it by: * * - patching the TLB miss handlers to branch to the @@ -462,15 +475,8 @@ static void setup_mmu_htw(void) if ((tlb0cfg & TLBnCFG_IND) && (tlb0cfg & TLBnCFG_PT)) { - /* Our exceptions vectors start with a NOP and -then- a branch - * to deal with single stepping from userspace which stops on - * the second instruction. Thus we need to patch the second - * instruction of the exception, not the first one - */ - patch_branch(ibase + (0x1c0 / 4) + 1, - (unsigned long)&exc_data_tlb_miss_htw_book3e, 0); - patch_branch(ibase + (0x1e0 / 4) + 1, - (unsigned long)&exc_instruction_tlb_miss_htw_book3e, 0); + patch_exception(0x1c0, exc_data_tlb_miss_htw_book3e); + patch_exception(0x1e0, exc_instruction_tlb_miss_htw_book3e); book3e_htw_enabled = 1; } pr_info("MMU: Book3E HW tablewalk %s\n", @@ -549,6 +555,9 @@ static void __early_init_mmu(int boot_cpu) /* limit memory so we dont have linear faults */ memblock_enforce_memory_limit(linear_map_top); memblock_analyze(); + + patch_exception(0x1c0, exc_data_tlb_miss_bolted_book3e); + patch_exception(0x1e0, exc_instruction_tlb_miss_bolted_book3e); } #endif -- cgit v1.2.3 From 3d41e0f6d91cedcab08dff2f5e148b20c87a7432 Mon Sep 17 00:00:00 2001 From: Becky Bruce Date: Tue, 28 Jun 2011 09:54:46 +0000 Subject: powerpc: mem_init should call memblock_is_reserved with phys_addr_t This has been broken for a while but hasn't been an issue until now because nobody was reserving regions at high addresses. Signed-off-by: Becky Bruce Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/mm/mem.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 57e545b84bf1..097b288779e2 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -337,8 +337,9 @@ void __init mem_init(void) highmem_mapnr = lowmem_end_addr >> PAGE_SHIFT; for (pfn = highmem_mapnr; pfn < max_mapnr; ++pfn) { + phys_addr_t paddr = (phys_addr_t)pfn << PAGE_SHIFT; struct page *page = pfn_to_page(pfn); - if (memblock_is_reserved(pfn << PAGE_SHIFT)) + if (memblock_is_reserved(paddr)) continue; ClearPageReserved(page); init_page_count(page); -- cgit v1.2.3 From 2773fcc8c48b947c997ff345f3e453917883cdb5 Mon Sep 17 00:00:00 2001 From: Dave Carroll Date: Sat, 18 Jun 2011 07:36:39 +0000 Subject: powerpc: Move free_initmem to common code The free_initmem function is basically duplicated in mm/init_32, and init_64, and is moved to the common 32/64-bit mm/mem.c. All other sections except init were removed in v2.6.15 by 6c45ab992e4299c869fb26427944a8f8ea177024 (powerpc: Remove section free() and linker script bits), and therefore the bulk of the executed code is identical. This patch also removes updating ppc_md.progress to NULL in the powermac late_initcall. Suggested-by: Milton Miller Suggested-by: Benjamin Herrenschmidt Signed-off-by: Dave Carroll Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/mm/init_32.c | 32 -------------------------------- arch/powerpc/mm/init_64.c | 16 ---------------- arch/powerpc/mm/mem.c | 19 +++++++++++++++++++ arch/powerpc/platforms/powermac/setup.c | 3 --- 4 files changed, 19 insertions(+), 51 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index 5de0f254dbb5..c77fef56dad6 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -191,38 +191,6 @@ void __init *early_get_page(void) return __va(memblock_alloc(PAGE_SIZE, PAGE_SIZE)); } -/* Free up now-unused memory */ -static void free_sec(unsigned long start, unsigned long end, const char *name) -{ - unsigned long cnt = 0; - - while (start < end) { - ClearPageReserved(virt_to_page(start)); - init_page_count(virt_to_page(start)); - free_page(start); - cnt++; - start += PAGE_SIZE; - } - if (cnt) { - printk(" %ldk %s", cnt << (PAGE_SHIFT - 10), name); - totalram_pages += cnt; - } -} - -void free_initmem(void) -{ -#define FREESEC(TYPE) \ - free_sec((unsigned long)(&__ ## TYPE ## _begin), \ - (unsigned long)(&__ ## TYPE ## _end), \ - #TYPE); - - printk ("Freeing unused kernel memory:"); - FREESEC(init); - printk("\n"); - ppc_md.progress = NULL; -#undef FREESEC -} - #ifdef CONFIG_8xx /* No 8xx specific .c file to put that in ... */ void setup_initial_memory_limit(phys_addr_t first_memblock_base, phys_addr_t first_memblock_size) diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index f6dbb4c20e64..e94b57fb79a0 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -83,22 +83,6 @@ EXPORT_SYMBOL_GPL(memstart_addr); phys_addr_t kernstart_addr; EXPORT_SYMBOL_GPL(kernstart_addr); -void free_initmem(void) -{ - unsigned long addr; - - addr = (unsigned long)__init_begin; - for (; addr < (unsigned long)__init_end; addr += PAGE_SIZE) { - memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE); - ClearPageReserved(virt_to_page(addr)); - init_page_count(virt_to_page(addr)); - free_page(addr); - totalram_pages++; - } - printk ("Freeing unused kernel memory: %luk freed\n", - ((unsigned long)__init_end - (unsigned long)__init_begin) >> 10); -} - static void pgd_ctor(void *addr) { memset(addr, 0, PGD_TABLE_SIZE); diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 89cbfef5a7dd..e9379ce6f3be 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -383,6 +383,25 @@ void __init mem_init(void) mem_init_done = 1; } +void free_initmem(void) +{ + unsigned long addr; + + ppc_md.progress = NULL; + + addr = (unsigned long)__init_begin; + for (; addr < (unsigned long)__init_end; addr += PAGE_SIZE) { + memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE); + ClearPageReserved(virt_to_page(addr)); + init_page_count(virt_to_page(addr)); + free_page(addr); + totalram_pages++; + } + pr_info("Freeing unused kernel memory: %luk freed\n", + ((unsigned long)__init_end - + (unsigned long)__init_begin) >> 10); +} + #ifdef CONFIG_BLK_DEV_INITRD void __init free_initrd_mem(unsigned long start, unsigned long end) { diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c index aa45281bd296..a028f08309d6 100644 --- a/arch/powerpc/platforms/powermac/setup.c +++ b/arch/powerpc/platforms/powermac/setup.c @@ -355,9 +355,6 @@ static int initializing = 1; static int pmac_late_init(void) { initializing = 0; - /* this is udbg (which is __init) and we can later use it during - * cpu hotplug (in smp_core99_kick_cpu) */ - ppc_md.progress = NULL; return 0; } machine_late_initcall(powermac, pmac_late_init); -- cgit v1.2.3 From a9c0f41b3a64955fd6f4e9d66ae1df1cbdee0cd0 Mon Sep 17 00:00:00 2001 From: Dave Carroll Date: Sat, 18 Jun 2011 07:36:40 +0000 Subject: powerpc: Add printk companion for ppc_md.progress This patch adds a printk companion to replace the udbg progress function when initmem is freed. Suggested-by: Milton Miller Suggested-by: Benjamin Herrenschmidt Signed-off-by: Dave Carroll Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/setup.h | 2 ++ arch/powerpc/kernel/setup-common.c | 5 +++++ arch/powerpc/mm/mem.c | 2 +- 3 files changed, 8 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h index dae19342f0b9..c77cb7a7f4dc 100644 --- a/arch/powerpc/include/asm/setup.h +++ b/arch/powerpc/include/asm/setup.h @@ -3,4 +3,6 @@ #include +extern void ppc_printk_progress(char *s, unsigned short hex); + #endif /* _ASM_POWERPC_SETUP_H */ diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 79fca2651b65..e053b1641de3 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -704,6 +704,11 @@ static int powerpc_debugfs_init(void) arch_initcall(powerpc_debugfs_init); #endif +void ppc_printk_progress(char *s, unsigned short hex) +{ + pr_info("%s\n", s); +} + static int ppc_dflt_bus_notify(struct notifier_block *nb, unsigned long action, void *data) { diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index e9379ce6f3be..a3841bb75cbb 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -387,7 +387,7 @@ void free_initmem(void) { unsigned long addr; - ppc_md.progress = NULL; + ppc_md.progress = ppc_printk_progress; addr = (unsigned long)__init_begin; for (; addr < (unsigned long)__init_end; addr += PAGE_SIZE) { -- cgit v1.2.3 From a8b0ca17b80e92faab46ee7179ba9e99ccb61233 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 27 Jun 2011 14:41:57 +0200 Subject: perf: Remove the nmi parameter from the swevent and overflow interface The nmi parameter indicated if we could do wakeups from the current context, if not, we would set some state and self-IPI and let the resulting interrupt do the wakeup. For the various event classes: - hardware: nmi=0; PMI is in fact an NMI or we run irq_work_run from the PMI-tail (ARM etc.) - tracepoint: nmi=0; since tracepoint could be from NMI context. - software: nmi=[0,1]; some, like the schedule thing cannot perform wakeups, and hence need 0. As one can see, there is very little nmi=1 usage, and the down-side of not using it is that on some platforms some software events can have a jiffy delay in wakeup (when arch_irq_work_raise isn't implemented). The up-side however is that we can remove the nmi parameter and save a bunch of conditionals in fast paths. Signed-off-by: Peter Zijlstra Cc: Michael Cree Cc: Will Deacon Cc: Deng-Cheng Zhu Cc: Anton Blanchard Cc: Eric B Munson Cc: Heiko Carstens Cc: Paul Mundt Cc: David S. Miller Cc: Frederic Weisbecker Cc: Jason Wessel Cc: Don Zickus Link: http://lkml.kernel.org/n/tip-agjev8eu666tvknpb3iaj0fg@git.kernel.org Signed-off-by: Ingo Molnar --- arch/alpha/kernel/perf_event.c | 2 +- arch/arm/kernel/perf_event_v6.c | 2 +- arch/arm/kernel/perf_event_v7.c | 2 +- arch/arm/kernel/perf_event_xscale.c | 4 +- arch/arm/kernel/ptrace.c | 2 +- arch/arm/kernel/swp_emulate.c | 2 +- arch/arm/mm/fault.c | 6 +-- arch/mips/kernel/perf_event.c | 2 +- arch/mips/kernel/traps.c | 8 ++-- arch/mips/kernel/unaligned.c | 5 +-- arch/mips/math-emu/cp1emu.c | 3 +- arch/mips/mm/fault.c | 8 ++-- arch/powerpc/include/asm/emulated_ops.h | 4 +- arch/powerpc/kernel/perf_event.c | 6 +-- arch/powerpc/kernel/perf_event_fsl_emb.c | 6 +-- arch/powerpc/kernel/ptrace.c | 2 +- arch/powerpc/mm/fault.c | 6 +-- arch/s390/mm/fault.c | 6 +-- arch/sh/kernel/ptrace_32.c | 2 +- arch/sh/kernel/traps_32.c | 2 +- arch/sh/kernel/traps_64.c | 8 ++-- arch/sh/math-emu/math.c | 2 +- arch/sh/mm/fault_32.c | 6 +-- arch/sh/mm/tlbflush_64.c | 6 +-- arch/sparc/kernel/perf_event.c | 2 +- arch/sparc/kernel/unaligned_32.c | 4 +- arch/sparc/kernel/unaligned_64.c | 12 +++--- arch/sparc/kernel/visemul.c | 2 +- arch/sparc/math-emu/math_32.c | 2 +- arch/sparc/math-emu/math_64.c | 2 +- arch/sparc/mm/fault_32.c | 8 ++-- arch/sparc/mm/fault_64.c | 8 ++-- arch/x86/kernel/cpu/perf_event.c | 2 +- arch/x86/kernel/cpu/perf_event_intel.c | 2 +- arch/x86/kernel/cpu/perf_event_intel_ds.c | 4 +- arch/x86/kernel/cpu/perf_event_p4.c | 2 +- arch/x86/kernel/kgdb.c | 2 +- arch/x86/kernel/ptrace.c | 2 +- arch/x86/mm/fault.c | 6 +-- include/linux/perf_event.h | 18 ++++----- kernel/events/core.c | 63 ++++++++++++++----------------- kernel/events/internal.h | 1 - kernel/events/ring_buffer.c | 10 ++--- kernel/sched.c | 2 +- kernel/watchdog.c | 2 +- samples/hw_breakpoint/data_breakpoint.c | 2 +- 46 files changed, 119 insertions(+), 141 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c index 90561c45e7d8..8e47709160f8 100644 --- a/arch/alpha/kernel/perf_event.c +++ b/arch/alpha/kernel/perf_event.c @@ -847,7 +847,7 @@ static void alpha_perf_event_irq_handler(unsigned long la_ptr, data.period = event->hw.last_period; if (alpha_perf_event_set_period(event, hwc, idx)) { - if (perf_event_overflow(event, 1, &data, regs)) { + if (perf_event_overflow(event, &data, regs)) { /* Interrupts coming too quickly; "throttle" the * counter, i.e., disable it for a little while. */ diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c index f1e8dd94afe8..38dc4da1d612 100644 --- a/arch/arm/kernel/perf_event_v6.c +++ b/arch/arm/kernel/perf_event_v6.c @@ -479,7 +479,7 @@ armv6pmu_handle_irq(int irq_num, if (!armpmu_event_set_period(event, hwc, idx)) continue; - if (perf_event_overflow(event, 0, &data, regs)) + if (perf_event_overflow(event, &data, regs)) armpmu->disable(hwc, idx); } diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c index 4960686afb58..6e5f8752303b 100644 --- a/arch/arm/kernel/perf_event_v7.c +++ b/arch/arm/kernel/perf_event_v7.c @@ -787,7 +787,7 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev) if (!armpmu_event_set_period(event, hwc, idx)) continue; - if (perf_event_overflow(event, 0, &data, regs)) + if (perf_event_overflow(event, &data, regs)) armpmu->disable(hwc, idx); } diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c index 39affbe4fdb2..99b6b85c7e49 100644 --- a/arch/arm/kernel/perf_event_xscale.c +++ b/arch/arm/kernel/perf_event_xscale.c @@ -251,7 +251,7 @@ xscale1pmu_handle_irq(int irq_num, void *dev) if (!armpmu_event_set_period(event, hwc, idx)) continue; - if (perf_event_overflow(event, 0, &data, regs)) + if (perf_event_overflow(event, &data, regs)) armpmu->disable(hwc, idx); } @@ -583,7 +583,7 @@ xscale2pmu_handle_irq(int irq_num, void *dev) if (!armpmu_event_set_period(event, hwc, idx)) continue; - if (perf_event_overflow(event, 0, &data, regs)) + if (perf_event_overflow(event, &data, regs)) armpmu->disable(hwc, idx); } diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c index 97260060bf26..0c9b1054f790 100644 --- a/arch/arm/kernel/ptrace.c +++ b/arch/arm/kernel/ptrace.c @@ -396,7 +396,7 @@ static long ptrace_hbp_idx_to_num(int idx) /* * Handle hitting a HW-breakpoint. */ -static void ptrace_hbptriggered(struct perf_event *bp, int unused, +static void ptrace_hbptriggered(struct perf_event *bp, struct perf_sample_data *data, struct pt_regs *regs) { diff --git a/arch/arm/kernel/swp_emulate.c b/arch/arm/kernel/swp_emulate.c index 40ee7e5045e4..5f452f8fde05 100644 --- a/arch/arm/kernel/swp_emulate.c +++ b/arch/arm/kernel/swp_emulate.c @@ -183,7 +183,7 @@ static int swp_handler(struct pt_regs *regs, unsigned int instr) unsigned int address, destreg, data, type; unsigned int res = 0; - perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, regs->ARM_pc); + perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, regs->ARM_pc); if (current->pid != previous_pid) { pr_debug("\"%s\" (%ld) uses deprecated SWP{B} instruction\n", diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index bc0e1d88fd3b..9ea4f7ddd665 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -318,11 +318,11 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) fault = __do_page_fault(mm, addr, fsr, tsk); up_read(&mm->mmap_sem); - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, addr); + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); if (fault & VM_FAULT_MAJOR) - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, regs, addr); + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, addr); else if (fault & VM_FAULT_MINOR) - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, regs, addr); + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, addr); /* * Handle the "normal" case first - VM_FAULT_MAJOR / VM_FAULT_MINOR diff --git a/arch/mips/kernel/perf_event.c b/arch/mips/kernel/perf_event.c index a8244854d3dc..d0deaab9ace2 100644 --- a/arch/mips/kernel/perf_event.c +++ b/arch/mips/kernel/perf_event.c @@ -527,7 +527,7 @@ handle_associated_event(struct cpu_hw_events *cpuc, if (!mipspmu_event_set_period(event, hwc, idx)) return; - if (perf_event_overflow(event, 0, data, regs)) + if (perf_event_overflow(event, data, regs)) mipspmu->disable_event(idx); } diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index e9b3af27d844..b7517e3abc85 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -578,12 +578,12 @@ static int simulate_llsc(struct pt_regs *regs, unsigned int opcode) { if ((opcode & OPCODE) == LL) { perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, - 1, 0, regs, 0); + 1, regs, 0); return simulate_ll(regs, opcode); } if ((opcode & OPCODE) == SC) { perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, - 1, 0, regs, 0); + 1, regs, 0); return simulate_sc(regs, opcode); } @@ -602,7 +602,7 @@ static int simulate_rdhwr(struct pt_regs *regs, unsigned int opcode) int rd = (opcode & RD) >> 11; int rt = (opcode & RT) >> 16; perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, - 1, 0, regs, 0); + 1, regs, 0); switch (rd) { case 0: /* CPU number */ regs->regs[rt] = smp_processor_id(); @@ -640,7 +640,7 @@ static int simulate_sync(struct pt_regs *regs, unsigned int opcode) { if ((opcode & OPCODE) == SPEC0 && (opcode & FUNC) == SYNC) { perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, - 1, 0, regs, 0); + 1, regs, 0); return 0; } diff --git a/arch/mips/kernel/unaligned.c b/arch/mips/kernel/unaligned.c index cfea1adfa153..eb319b580353 100644 --- a/arch/mips/kernel/unaligned.c +++ b/arch/mips/kernel/unaligned.c @@ -111,8 +111,7 @@ static void emulate_load_store_insn(struct pt_regs *regs, unsigned long value; unsigned int res; - perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, - 1, 0, regs, 0); + perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0); /* * This load never faults. @@ -517,7 +516,7 @@ asmlinkage void do_ade(struct pt_regs *regs) mm_segment_t seg; perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, - 1, 0, regs, regs->cp0_badvaddr); + 1, regs, regs->cp0_badvaddr); /* * Did we catch a fault trying to load an instruction? * Or are we running in MIPS16 mode? diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c index d32cb0503110..dbf2f93a5091 100644 --- a/arch/mips/math-emu/cp1emu.c +++ b/arch/mips/math-emu/cp1emu.c @@ -272,8 +272,7 @@ static int cop1Emulate(struct pt_regs *xcp, struct mips_fpu_struct *ctx, } emul: - perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, - 1, 0, xcp, 0); + perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, xcp, 0); MIPS_FPU_EMU_INC_STATS(emulated); switch (MIPSInst_OPCODE(ir)) { case ldc1_op:{ diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c index 137ee76a0045..937cf3368164 100644 --- a/arch/mips/mm/fault.c +++ b/arch/mips/mm/fault.c @@ -145,7 +145,7 @@ good_area: * the fault. */ fault = handle_mm_fault(mm, vma, address, write ? FAULT_FLAG_WRITE : 0); - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; @@ -154,12 +154,10 @@ good_area: BUG(); } if (fault & VM_FAULT_MAJOR) { - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, - 1, 0, regs, address); + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address); tsk->maj_flt++; } else { - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, - 1, 0, regs, address); + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address); tsk->min_flt++; } diff --git a/arch/powerpc/include/asm/emulated_ops.h b/arch/powerpc/include/asm/emulated_ops.h index 45921672b97a..2cc41c715d2b 100644 --- a/arch/powerpc/include/asm/emulated_ops.h +++ b/arch/powerpc/include/asm/emulated_ops.h @@ -78,14 +78,14 @@ extern void ppc_warn_emulated_print(const char *type); #define PPC_WARN_EMULATED(type, regs) \ do { \ perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, \ - 1, 0, regs, 0); \ + 1, regs, 0); \ __PPC_WARN_EMULATED(type); \ } while (0) #define PPC_WARN_ALIGNMENT(type, regs) \ do { \ perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, \ - 1, 0, regs, regs->dar); \ + 1, regs, regs->dar); \ __PPC_WARN_EMULATED(type); \ } while (0) diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c index 822f63008ae1..14967de98876 100644 --- a/arch/powerpc/kernel/perf_event.c +++ b/arch/powerpc/kernel/perf_event.c @@ -1207,7 +1207,7 @@ struct pmu power_pmu = { * here so there is no possibility of being interrupted. */ static void record_and_restart(struct perf_event *event, unsigned long val, - struct pt_regs *regs, int nmi) + struct pt_regs *regs) { u64 period = event->hw.sample_period; s64 prev, delta, left; @@ -1258,7 +1258,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val, if (event->attr.sample_type & PERF_SAMPLE_ADDR) perf_get_data_addr(regs, &data.addr); - if (perf_event_overflow(event, nmi, &data, regs)) + if (perf_event_overflow(event, &data, regs)) power_pmu_stop(event, 0); } } @@ -1346,7 +1346,7 @@ static void perf_event_interrupt(struct pt_regs *regs) if ((int)val < 0) { /* event has overflowed */ found = 1; - record_and_restart(event, val, regs, nmi); + record_and_restart(event, val, regs); } } diff --git a/arch/powerpc/kernel/perf_event_fsl_emb.c b/arch/powerpc/kernel/perf_event_fsl_emb.c index b0dc8f7069cd..0a6d2a9d569c 100644 --- a/arch/powerpc/kernel/perf_event_fsl_emb.c +++ b/arch/powerpc/kernel/perf_event_fsl_emb.c @@ -568,7 +568,7 @@ static struct pmu fsl_emb_pmu = { * here so there is no possibility of being interrupted. */ static void record_and_restart(struct perf_event *event, unsigned long val, - struct pt_regs *regs, int nmi) + struct pt_regs *regs) { u64 period = event->hw.sample_period; s64 prev, delta, left; @@ -616,7 +616,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val, perf_sample_data_init(&data, 0); data.period = event->hw.last_period; - if (perf_event_overflow(event, nmi, &data, regs)) + if (perf_event_overflow(event, &data, regs)) fsl_emb_pmu_stop(event, 0); } } @@ -644,7 +644,7 @@ static void perf_event_interrupt(struct pt_regs *regs) if (event) { /* event has overflowed */ found = 1; - record_and_restart(event, val, regs, nmi); + record_and_restart(event, val, regs); } else { /* * Disabled counter is negative, diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index cb22024f2b42..3177617af2ef 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -882,7 +882,7 @@ void user_disable_single_step(struct task_struct *task) } #ifdef CONFIG_HAVE_HW_BREAKPOINT -void ptrace_triggered(struct perf_event *bp, int nmi, +void ptrace_triggered(struct perf_event *bp, struct perf_sample_data *data, struct pt_regs *regs) { struct perf_event_attr attr; diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 54f4fb994e99..dbc48254c6cc 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -173,7 +173,7 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, die("Weird page fault", regs, SIGSEGV); } - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); /* When running in the kernel we expect faults to occur only to * addresses in user space. All other faults represent errors in the @@ -319,7 +319,7 @@ good_area: } if (ret & VM_FAULT_MAJOR) { current->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address); #ifdef CONFIG_PPC_SMLPAR if (firmware_has_feature(FW_FEATURE_CMO)) { @@ -330,7 +330,7 @@ good_area: #endif } else { current->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address); } up_read(&mm->mmap_sem); diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index fe103e891e7a..095f782a5512 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -299,7 +299,7 @@ static inline int do_exception(struct pt_regs *regs, int access, goto out; address = trans_exc_code & __FAIL_ADDR_MASK; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); flags = FAULT_FLAG_ALLOW_RETRY; if (access == VM_WRITE || (trans_exc_code & store_indication) == 0x400) flags |= FAULT_FLAG_WRITE; @@ -345,11 +345,11 @@ retry: if (flags & FAULT_FLAG_ALLOW_RETRY) { if (fault & VM_FAULT_MAJOR) { tsk->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address); } else { tsk->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address); } if (fault & VM_FAULT_RETRY) { diff --git a/arch/sh/kernel/ptrace_32.c b/arch/sh/kernel/ptrace_32.c index 3d7b209b2178..8051976100a6 100644 --- a/arch/sh/kernel/ptrace_32.c +++ b/arch/sh/kernel/ptrace_32.c @@ -63,7 +63,7 @@ static inline int put_stack_long(struct task_struct *task, int offset, return 0; } -void ptrace_triggered(struct perf_event *bp, int nmi, +void ptrace_triggered(struct perf_event *bp, struct perf_sample_data *data, struct pt_regs *regs) { struct perf_event_attr attr; diff --git a/arch/sh/kernel/traps_32.c b/arch/sh/kernel/traps_32.c index b51a17104b5f..d9006f8ffc14 100644 --- a/arch/sh/kernel/traps_32.c +++ b/arch/sh/kernel/traps_32.c @@ -393,7 +393,7 @@ int handle_unaligned_access(insn_size_t instruction, struct pt_regs *regs, */ if (!expected) { unaligned_fixups_notify(current, instruction, regs); - perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, 0, + perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, address); } diff --git a/arch/sh/kernel/traps_64.c b/arch/sh/kernel/traps_64.c index 6713ca97e553..67110be83fd7 100644 --- a/arch/sh/kernel/traps_64.c +++ b/arch/sh/kernel/traps_64.c @@ -434,7 +434,7 @@ static int misaligned_load(struct pt_regs *regs, return error; } - perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, 0, regs, address); + perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, address); destreg = (opcode >> 4) & 0x3f; if (user_mode(regs)) { @@ -512,7 +512,7 @@ static int misaligned_store(struct pt_regs *regs, return error; } - perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, 0, regs, address); + perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, address); srcreg = (opcode >> 4) & 0x3f; if (user_mode(regs)) { @@ -588,7 +588,7 @@ static int misaligned_fpu_load(struct pt_regs *regs, return error; } - perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, address); + perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, address); destreg = (opcode >> 4) & 0x3f; if (user_mode(regs)) { @@ -665,7 +665,7 @@ static int misaligned_fpu_store(struct pt_regs *regs, return error; } - perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, address); + perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, address); srcreg = (opcode >> 4) & 0x3f; if (user_mode(regs)) { diff --git a/arch/sh/math-emu/math.c b/arch/sh/math-emu/math.c index f76a5090d5d1..977195210653 100644 --- a/arch/sh/math-emu/math.c +++ b/arch/sh/math-emu/math.c @@ -620,7 +620,7 @@ int do_fpu_inst(unsigned short inst, struct pt_regs *regs) struct task_struct *tsk = current; struct sh_fpu_soft_struct *fpu = &(tsk->thread.xstate->softfpu); - perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, 0); + perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0); if (!(task_thread_info(tsk)->status & TS_USEDFPU)) { /* initialize once. */ diff --git a/arch/sh/mm/fault_32.c b/arch/sh/mm/fault_32.c index d4c34d757f0d..7bebd044f2a1 100644 --- a/arch/sh/mm/fault_32.c +++ b/arch/sh/mm/fault_32.c @@ -160,7 +160,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, if ((regs->sr & SR_IMASK) != SR_IMASK) local_irq_enable(); - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); /* * If we're in an interrupt, have no user context or are running @@ -210,11 +210,11 @@ good_area: } if (fault & VM_FAULT_MAJOR) { tsk->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address); } else { tsk->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address); } diff --git a/arch/sh/mm/tlbflush_64.c b/arch/sh/mm/tlbflush_64.c index 7f5810f5dfdc..e3430e093d43 100644 --- a/arch/sh/mm/tlbflush_64.c +++ b/arch/sh/mm/tlbflush_64.c @@ -116,7 +116,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long writeaccess, /* Not an IO address, so reenable interrupts */ local_irq_enable(); - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); /* * If we're in an interrupt or have no user @@ -200,11 +200,11 @@ good_area: if (fault & VM_FAULT_MAJOR) { tsk->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address); } else { tsk->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address); } diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 2cb0e1c001e2..0b32f2e9e08d 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -1277,7 +1277,7 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self, if (!sparc_perf_event_set_period(event, hwc, idx)) continue; - if (perf_event_overflow(event, 1, &data, regs)) + if (perf_event_overflow(event, &data, regs)) sparc_pmu_stop(event, 0); } diff --git a/arch/sparc/kernel/unaligned_32.c b/arch/sparc/kernel/unaligned_32.c index 4491f4cb2695..7efbb2f9e77f 100644 --- a/arch/sparc/kernel/unaligned_32.c +++ b/arch/sparc/kernel/unaligned_32.c @@ -247,7 +247,7 @@ asmlinkage void kernel_unaligned_trap(struct pt_regs *regs, unsigned int insn) unsigned long addr = compute_effective_address(regs, insn); int err; - perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, 0, regs, addr); + perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr); switch (dir) { case load: err = do_int_load(fetch_reg_addr(((insn>>25)&0x1f), @@ -338,7 +338,7 @@ asmlinkage void user_unaligned_trap(struct pt_regs *regs, unsigned int insn) } addr = compute_effective_address(regs, insn); - perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, 0, regs, addr); + perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr); switch(dir) { case load: err = do_int_load(fetch_reg_addr(((insn>>25)&0x1f), diff --git a/arch/sparc/kernel/unaligned_64.c b/arch/sparc/kernel/unaligned_64.c index b2b019ea8caa..35cff1673aa4 100644 --- a/arch/sparc/kernel/unaligned_64.c +++ b/arch/sparc/kernel/unaligned_64.c @@ -317,7 +317,7 @@ asmlinkage void kernel_unaligned_trap(struct pt_regs *regs, unsigned int insn) addr = compute_effective_address(regs, insn, ((insn >> 25) & 0x1f)); - perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, 0, regs, addr); + perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr); switch (asi) { case ASI_NL: case ASI_AIUPL: @@ -384,7 +384,7 @@ int handle_popc(u32 insn, struct pt_regs *regs) int ret, i, rd = ((insn >> 25) & 0x1f); int from_kernel = (regs->tstate & TSTATE_PRIV) != 0; - perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, 0); + perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0); if (insn & 0x2000) { maybe_flush_windows(0, 0, rd, from_kernel); value = sign_extend_imm13(insn); @@ -431,7 +431,7 @@ int handle_ldf_stq(u32 insn, struct pt_regs *regs) int asi = decode_asi(insn, regs); int flag = (freg < 32) ? FPRS_DL : FPRS_DU; - perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, 0); + perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0); save_and_clear_fpu(); current_thread_info()->xfsr[0] &= ~0x1c000; @@ -554,7 +554,7 @@ void handle_ld_nf(u32 insn, struct pt_regs *regs) int from_kernel = (regs->tstate & TSTATE_PRIV) != 0; unsigned long *reg; - perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, 0); + perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0); maybe_flush_windows(0, 0, rd, from_kernel); reg = fetch_reg_addr(rd, regs); @@ -586,7 +586,7 @@ void handle_lddfmna(struct pt_regs *regs, unsigned long sfar, unsigned long sfsr if (tstate & TSTATE_PRIV) die_if_kernel("lddfmna from kernel", regs); - perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, 0, regs, sfar); + perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, sfar); if (test_thread_flag(TIF_32BIT)) pc = (u32)pc; if (get_user(insn, (u32 __user *) pc) != -EFAULT) { @@ -647,7 +647,7 @@ void handle_stdfmna(struct pt_regs *regs, unsigned long sfar, unsigned long sfsr if (tstate & TSTATE_PRIV) die_if_kernel("stdfmna from kernel", regs); - perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, 0, regs, sfar); + perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, sfar); if (test_thread_flag(TIF_32BIT)) pc = (u32)pc; if (get_user(insn, (u32 __user *) pc) != -EFAULT) { diff --git a/arch/sparc/kernel/visemul.c b/arch/sparc/kernel/visemul.c index 36357717d691..32b626c9d815 100644 --- a/arch/sparc/kernel/visemul.c +++ b/arch/sparc/kernel/visemul.c @@ -802,7 +802,7 @@ int vis_emul(struct pt_regs *regs, unsigned int insn) BUG_ON(regs->tstate & TSTATE_PRIV); - perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, 0); + perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0); if (test_thread_flag(TIF_32BIT)) pc = (u32)pc; diff --git a/arch/sparc/math-emu/math_32.c b/arch/sparc/math-emu/math_32.c index a3fccde894ec..aa4d55b0bdf0 100644 --- a/arch/sparc/math-emu/math_32.c +++ b/arch/sparc/math-emu/math_32.c @@ -164,7 +164,7 @@ int do_mathemu(struct pt_regs *regs, struct task_struct *fpt) int retcode = 0; /* assume all succeed */ unsigned long insn; - perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, 0); + perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0); #ifdef DEBUG_MATHEMU printk("In do_mathemu()... pc is %08lx\n", regs->pc); diff --git a/arch/sparc/math-emu/math_64.c b/arch/sparc/math-emu/math_64.c index 56d2c44747b8..e575bd2fe381 100644 --- a/arch/sparc/math-emu/math_64.c +++ b/arch/sparc/math-emu/math_64.c @@ -184,7 +184,7 @@ int do_mathemu(struct pt_regs *regs, struct fpustate *f) if (tstate & TSTATE_PRIV) die_if_kernel("unfinished/unimplemented FPop from kernel", regs); - perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, 0); + perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0); if (test_thread_flag(TIF_32BIT)) pc = (u32)pc; if (get_user(insn, (u32 __user *) pc) != -EFAULT) { diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c index 7543ddbdadb2..aa1c1b1ce5cc 100644 --- a/arch/sparc/mm/fault_32.c +++ b/arch/sparc/mm/fault_32.c @@ -251,7 +251,7 @@ asmlinkage void do_sparc_fault(struct pt_regs *regs, int text_fault, int write, if (in_atomic() || !mm) goto no_context; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); down_read(&mm->mmap_sem); @@ -301,12 +301,10 @@ good_area: } if (fault & VM_FAULT_MAJOR) { current->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, - regs, address); + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address); } else { current->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, - regs, address); + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address); } up_read(&mm->mmap_sem); return; diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c index f92ce56a8b22..504c0622f729 100644 --- a/arch/sparc/mm/fault_64.c +++ b/arch/sparc/mm/fault_64.c @@ -325,7 +325,7 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs) if (in_atomic() || !mm) goto intr_or_no_mm; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); if (!down_read_trylock(&mm->mmap_sem)) { if ((regs->tstate & TSTATE_PRIV) && @@ -433,12 +433,10 @@ good_area: } if (fault & VM_FAULT_MAJOR) { current->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, - regs, address); + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address); } else { current->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, - regs, address); + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address); } up_read(&mm->mmap_sem); diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 8a57f9aa8e36..5b86ec51534c 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1339,7 +1339,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs) if (!x86_perf_event_set_period(event)) continue; - if (perf_event_overflow(event, 1, &data, regs)) + if (perf_event_overflow(event, &data, regs)) x86_pmu_stop(event, 0); } diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 41178c826c48..d38b0020f775 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1003,7 +1003,7 @@ again: data.period = event->hw.last_period; - if (perf_event_overflow(event, 1, &data, regs)) + if (perf_event_overflow(event, &data, regs)) x86_pmu_stop(event, 0); } diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index bab491b8ee25..0941f93f2940 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -340,7 +340,7 @@ static int intel_pmu_drain_bts_buffer(void) */ perf_prepare_sample(&header, &data, event, ®s); - if (perf_output_begin(&handle, event, header.size * (top - at), 1, 1)) + if (perf_output_begin(&handle, event, header.size * (top - at), 1)) return 1; for (; at < top; at++) { @@ -616,7 +616,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event, else regs.flags &= ~PERF_EFLAGS_EXACT; - if (perf_event_overflow(event, 1, &data, ®s)) + if (perf_event_overflow(event, &data, ®s)) x86_pmu_stop(event, 0); } diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index f76fddf63381..d6e6a67b9608 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c @@ -970,7 +970,7 @@ static int p4_pmu_handle_irq(struct pt_regs *regs) if (!x86_perf_event_set_period(event)) continue; - if (perf_event_overflow(event, 1, &data, regs)) + if (perf_event_overflow(event, &data, regs)) x86_pmu_stop(event, 0); } diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 5f9ecff328b5..98da6a7b5e82 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -608,7 +608,7 @@ int kgdb_arch_init(void) return register_die_notifier(&kgdb_notifier); } -static void kgdb_hw_overflow_handler(struct perf_event *event, int nmi, +static void kgdb_hw_overflow_handler(struct perf_event *event, struct perf_sample_data *data, struct pt_regs *regs) { struct task_struct *tsk = current; diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 807c2a2b80f1..11db2e9b860a 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -528,7 +528,7 @@ static int genregs_set(struct task_struct *target, return ret; } -static void ptrace_triggered(struct perf_event *bp, int nmi, +static void ptrace_triggered(struct perf_event *bp, struct perf_sample_data *data, struct pt_regs *regs) { diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 2dbf6bf4c7e5..4d09df054e39 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -1059,7 +1059,7 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code) if (unlikely(error_code & PF_RSVD)) pgtable_bad(regs, error_code, address); - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); /* * If we're in an interrupt, have no user context or are running @@ -1161,11 +1161,11 @@ good_area: if (flags & FAULT_FLAG_ALLOW_RETRY) { if (fault & VM_FAULT_MAJOR) { tsk->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address); } else { tsk->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address); } if (fault & VM_FAULT_RETRY) { diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 2f7b5d42ab41..0946a8bc098d 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -682,7 +682,7 @@ enum perf_event_active_state { struct file; struct perf_sample_data; -typedef void (*perf_overflow_handler_t)(struct perf_event *, int, +typedef void (*perf_overflow_handler_t)(struct perf_event *, struct perf_sample_data *, struct pt_regs *regs); @@ -925,7 +925,6 @@ struct perf_output_handle { unsigned long size; void *addr; int page; - int nmi; int sample; }; @@ -993,7 +992,7 @@ extern void perf_prepare_sample(struct perf_event_header *header, struct perf_event *event, struct pt_regs *regs); -extern int perf_event_overflow(struct perf_event *event, int nmi, +extern int perf_event_overflow(struct perf_event *event, struct perf_sample_data *data, struct pt_regs *regs); @@ -1012,7 +1011,7 @@ static inline int is_software_event(struct perf_event *event) extern struct jump_label_key perf_swevent_enabled[PERF_COUNT_SW_MAX]; -extern void __perf_sw_event(u32, u64, int, struct pt_regs *, u64); +extern void __perf_sw_event(u32, u64, struct pt_regs *, u64); #ifndef perf_arch_fetch_caller_regs static inline void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip) { } @@ -1034,7 +1033,7 @@ static inline void perf_fetch_caller_regs(struct pt_regs *regs) } static __always_inline void -perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr) +perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) { struct pt_regs hot_regs; @@ -1043,7 +1042,7 @@ perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr) perf_fetch_caller_regs(&hot_regs); regs = &hot_regs; } - __perf_sw_event(event_id, nr, nmi, regs, addr); + __perf_sw_event(event_id, nr, regs, addr); } } @@ -1057,7 +1056,7 @@ static inline void perf_event_task_sched_in(struct task_struct *task) static inline void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next) { - perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0); + perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, NULL, 0); __perf_event_task_sched_out(task, next); } @@ -1119,7 +1118,7 @@ extern void perf_bp_event(struct perf_event *event, void *data); extern int perf_output_begin(struct perf_output_handle *handle, struct perf_event *event, unsigned int size, - int nmi, int sample); + int sample); extern void perf_output_end(struct perf_output_handle *handle); extern void perf_output_copy(struct perf_output_handle *handle, const void *buf, unsigned int len); @@ -1143,8 +1142,7 @@ static inline int perf_event_task_disable(void) { return -EINVAL; } static inline int perf_event_task_enable(void) { return -EINVAL; } static inline void -perf_sw_event(u32 event_id, u64 nr, int nmi, - struct pt_regs *regs, u64 addr) { } +perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) { } static inline void perf_bp_event(struct perf_event *event, void *data) { } diff --git a/kernel/events/core.c b/kernel/events/core.c index 270e32f9fc06..dbd1ca75bd3c 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3972,7 +3972,7 @@ void perf_prepare_sample(struct perf_event_header *header, } } -static void perf_event_output(struct perf_event *event, int nmi, +static void perf_event_output(struct perf_event *event, struct perf_sample_data *data, struct pt_regs *regs) { @@ -3984,7 +3984,7 @@ static void perf_event_output(struct perf_event *event, int nmi, perf_prepare_sample(&header, data, event, regs); - if (perf_output_begin(&handle, event, header.size, nmi, 1)) + if (perf_output_begin(&handle, event, header.size, 1)) goto exit; perf_output_sample(&handle, &header, data, event); @@ -4024,7 +4024,7 @@ perf_event_read_event(struct perf_event *event, int ret; perf_event_header__init_id(&read_event.header, &sample, event); - ret = perf_output_begin(&handle, event, read_event.header.size, 0, 0); + ret = perf_output_begin(&handle, event, read_event.header.size, 0); if (ret) return; @@ -4067,7 +4067,7 @@ static void perf_event_task_output(struct perf_event *event, perf_event_header__init_id(&task_event->event_id.header, &sample, event); ret = perf_output_begin(&handle, event, - task_event->event_id.header.size, 0, 0); + task_event->event_id.header.size, 0); if (ret) goto out; @@ -4204,7 +4204,7 @@ static void perf_event_comm_output(struct perf_event *event, perf_event_header__init_id(&comm_event->event_id.header, &sample, event); ret = perf_output_begin(&handle, event, - comm_event->event_id.header.size, 0, 0); + comm_event->event_id.header.size, 0); if (ret) goto out; @@ -4351,7 +4351,7 @@ static void perf_event_mmap_output(struct perf_event *event, perf_event_header__init_id(&mmap_event->event_id.header, &sample, event); ret = perf_output_begin(&handle, event, - mmap_event->event_id.header.size, 0, 0); + mmap_event->event_id.header.size, 0); if (ret) goto out; @@ -4546,7 +4546,7 @@ static void perf_log_throttle(struct perf_event *event, int enable) perf_event_header__init_id(&throttle_event.header, &sample, event); ret = perf_output_begin(&handle, event, - throttle_event.header.size, 1, 0); + throttle_event.header.size, 0); if (ret) return; @@ -4559,7 +4559,7 @@ static void perf_log_throttle(struct perf_event *event, int enable) * Generic event overflow handling, sampling. */ -static int __perf_event_overflow(struct perf_event *event, int nmi, +static int __perf_event_overflow(struct perf_event *event, int throttle, struct perf_sample_data *data, struct pt_regs *regs) { @@ -4602,34 +4602,28 @@ static int __perf_event_overflow(struct perf_event *event, int nmi, if (events && atomic_dec_and_test(&event->event_limit)) { ret = 1; event->pending_kill = POLL_HUP; - if (nmi) { - event->pending_disable = 1; - irq_work_queue(&event->pending); - } else - perf_event_disable(event); + event->pending_disable = 1; + irq_work_queue(&event->pending); } if (event->overflow_handler) - event->overflow_handler(event, nmi, data, regs); + event->overflow_handler(event, data, regs); else - perf_event_output(event, nmi, data, regs); + perf_event_output(event, data, regs); if (event->fasync && event->pending_kill) { - if (nmi) { - event->pending_wakeup = 1; - irq_work_queue(&event->pending); - } else - perf_event_wakeup(event); + event->pending_wakeup = 1; + irq_work_queue(&event->pending); } return ret; } -int perf_event_overflow(struct perf_event *event, int nmi, +int perf_event_overflow(struct perf_event *event, struct perf_sample_data *data, struct pt_regs *regs) { - return __perf_event_overflow(event, nmi, 1, data, regs); + return __perf_event_overflow(event, 1, data, regs); } /* @@ -4678,7 +4672,7 @@ again: } static void perf_swevent_overflow(struct perf_event *event, u64 overflow, - int nmi, struct perf_sample_data *data, + struct perf_sample_data *data, struct pt_regs *regs) { struct hw_perf_event *hwc = &event->hw; @@ -4692,7 +4686,7 @@ static void perf_swevent_overflow(struct perf_event *event, u64 overflow, return; for (; overflow; overflow--) { - if (__perf_event_overflow(event, nmi, throttle, + if (__perf_event_overflow(event, throttle, data, regs)) { /* * We inhibit the overflow from happening when @@ -4705,7 +4699,7 @@ static void perf_swevent_overflow(struct perf_event *event, u64 overflow, } static void perf_swevent_event(struct perf_event *event, u64 nr, - int nmi, struct perf_sample_data *data, + struct perf_sample_data *data, struct pt_regs *regs) { struct hw_perf_event *hwc = &event->hw; @@ -4719,12 +4713,12 @@ static void perf_swevent_event(struct perf_event *event, u64 nr, return; if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq) - return perf_swevent_overflow(event, 1, nmi, data, regs); + return perf_swevent_overflow(event, 1, data, regs); if (local64_add_negative(nr, &hwc->period_left)) return; - perf_swevent_overflow(event, 0, nmi, data, regs); + perf_swevent_overflow(event, 0, data, regs); } static int perf_exclude_event(struct perf_event *event, @@ -4812,7 +4806,7 @@ find_swevent_head(struct swevent_htable *swhash, struct perf_event *event) } static void do_perf_sw_event(enum perf_type_id type, u32 event_id, - u64 nr, int nmi, + u64 nr, struct perf_sample_data *data, struct pt_regs *regs) { @@ -4828,7 +4822,7 @@ static void do_perf_sw_event(enum perf_type_id type, u32 event_id, hlist_for_each_entry_rcu(event, node, head, hlist_entry) { if (perf_swevent_match(event, type, event_id, data, regs)) - perf_swevent_event(event, nr, nmi, data, regs); + perf_swevent_event(event, nr, data, regs); } end: rcu_read_unlock(); @@ -4849,8 +4843,7 @@ inline void perf_swevent_put_recursion_context(int rctx) put_recursion_context(swhash->recursion, rctx); } -void __perf_sw_event(u32 event_id, u64 nr, int nmi, - struct pt_regs *regs, u64 addr) +void __perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) { struct perf_sample_data data; int rctx; @@ -4862,7 +4855,7 @@ void __perf_sw_event(u32 event_id, u64 nr, int nmi, perf_sample_data_init(&data, addr); - do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, nmi, &data, regs); + do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, &data, regs); perf_swevent_put_recursion_context(rctx); preempt_enable_notrace(); @@ -5110,7 +5103,7 @@ void perf_tp_event(u64 addr, u64 count, void *record, int entry_size, hlist_for_each_entry_rcu(event, node, head, hlist_entry) { if (perf_tp_event_match(event, &data, regs)) - perf_swevent_event(event, count, 1, &data, regs); + perf_swevent_event(event, count, &data, regs); } perf_swevent_put_recursion_context(rctx); @@ -5203,7 +5196,7 @@ void perf_bp_event(struct perf_event *bp, void *data) perf_sample_data_init(&sample, bp->attr.bp_addr); if (!bp->hw.state && !perf_exclude_event(bp, regs)) - perf_swevent_event(bp, 1, 1, &sample, regs); + perf_swevent_event(bp, 1, &sample, regs); } #endif @@ -5232,7 +5225,7 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer) if (regs && !perf_exclude_event(event, regs)) { if (!(event->attr.exclude_idle && current->pid == 0)) - if (perf_event_overflow(event, 0, &data, regs)) + if (perf_event_overflow(event, &data, regs)) ret = HRTIMER_NORESTART; } diff --git a/kernel/events/internal.h b/kernel/events/internal.h index 114f27f3a624..09097dd8116c 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h @@ -27,7 +27,6 @@ struct ring_buffer { void *data_pages[0]; }; - extern void rb_free(struct ring_buffer *rb); extern struct ring_buffer * rb_alloc(int nr_pages, long watermark, int cpu, int flags); diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index fc2701c99207..8b3b73630fa4 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -38,11 +38,8 @@ static void perf_output_wakeup(struct perf_output_handle *handle) { atomic_set(&handle->rb->poll, POLL_IN); - if (handle->nmi) { - handle->event->pending_wakeup = 1; - irq_work_queue(&handle->event->pending); - } else - perf_event_wakeup(handle->event); + handle->event->pending_wakeup = 1; + irq_work_queue(&handle->event->pending); } /* @@ -102,7 +99,7 @@ out: int perf_output_begin(struct perf_output_handle *handle, struct perf_event *event, unsigned int size, - int nmi, int sample) + int sample) { struct ring_buffer *rb; unsigned long tail, offset, head; @@ -127,7 +124,6 @@ int perf_output_begin(struct perf_output_handle *handle, handle->rb = rb; handle->event = event; - handle->nmi = nmi; handle->sample = sample; if (!rb->nr_pages) diff --git a/kernel/sched.c b/kernel/sched.c index 3f2e502d609b..d08d110b8976 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2220,7 +2220,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) if (task_cpu(p) != new_cpu) { p->se.nr_migrations++; - perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 1, NULL, 0); + perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, NULL, 0); } __set_task_cpu(p, new_cpu); diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 752b75ba662b..a6708e677a0a 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -211,7 +211,7 @@ static struct perf_event_attr wd_hw_attr = { }; /* Callback function for perf event subsystem */ -static void watchdog_overflow_callback(struct perf_event *event, int nmi, +static void watchdog_overflow_callback(struct perf_event *event, struct perf_sample_data *data, struct pt_regs *regs) { diff --git a/samples/hw_breakpoint/data_breakpoint.c b/samples/hw_breakpoint/data_breakpoint.c index 063653955f9f..7b164d3200ff 100644 --- a/samples/hw_breakpoint/data_breakpoint.c +++ b/samples/hw_breakpoint/data_breakpoint.c @@ -41,7 +41,7 @@ module_param_string(ksym, ksym_name, KSYM_NAME_LEN, S_IRUGO); MODULE_PARM_DESC(ksym, "Kernel symbol to monitor; this module will report any" " write operations on the kernel symbol"); -static void sample_hbp_handler(struct perf_event *bp, int nmi, +static void sample_hbp_handler(struct perf_event *bp, struct perf_sample_data *data, struct pt_regs *regs) { -- cgit v1.2.3 From 3160b09796129abc9523ea3cd1633b0faba64a02 Mon Sep 17 00:00:00 2001 From: Becky Bruce Date: Tue, 28 Jun 2011 14:54:47 -0500 Subject: powerpc: Create next_tlbcam_idx percpu variable for FSL_BOOKE This is used to round-robin TLBCAM entries. Signed-off-by: Becky Bruce Signed-off-by: Kumar Gala --- arch/powerpc/include/asm/mmu.h | 5 +++++ arch/powerpc/kernel/smp.c | 4 ++++ arch/powerpc/mm/mem.c | 9 +++++++++ arch/powerpc/mm/tlb_nohash.c | 6 ++++++ 4 files changed, 24 insertions(+) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index 4138b21ae80a..b427a55ce2ab 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -115,6 +115,11 @@ #ifndef __ASSEMBLY__ #include +#ifdef CONFIG_PPC_FSL_BOOK3E +#include +DECLARE_PER_CPU(int, next_tlbcam_idx); +#endif + static inline int mmu_has_feature(unsigned long feature) { return (cur_cpu_spec->mmu_features & feature); diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 6c8e739a12da..567a1746ed74 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -305,6 +305,10 @@ struct thread_info *current_set[NR_CPUS]; static void __devinit smp_store_cpu_info(int id) { per_cpu(cpu_pvr, id) = mfspr(SPRN_PVR); +#ifdef CONFIG_PPC_FSL_BOOK3E + per_cpu(next_tlbcam_idx, id) + = (mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) - 1; +#endif } void __init smp_prepare_cpus(unsigned int max_cpus) diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index a3841bb75cbb..457dc84154ba 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -353,6 +353,15 @@ void __init mem_init(void) } #endif /* CONFIG_HIGHMEM */ +#if defined(CONFIG_PPC_FSL_BOOK3E) && !defined(CONFIG_SMP) + /* + * If smp is enabled, next_tlbcam_idx is initialized in the cpu up + * functions.... do it here for the non-smp case. + */ + per_cpu(next_tlbcam_idx, smp_processor_id()) = + (mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) - 1; +#endif + printk(KERN_INFO "Memory: %luk/%luk available (%luk kernel code, " "%luk reserved, %luk data, %luk bss, %luk init)\n", nr_free_pages() << (PAGE_SHIFT-10), diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c index 3722185d1865..e80ed1093e2f 100644 --- a/arch/powerpc/mm/tlb_nohash.c +++ b/arch/powerpc/mm/tlb_nohash.c @@ -102,6 +102,12 @@ unsigned long linear_map_top; /* Top of linear mapping */ #endif /* CONFIG_PPC64 */ +#ifdef CONFIG_PPC_FSL_BOOK3E +/* next_tlbcam_idx is used to round-robin tlbcam entry assignment */ +DEFINE_PER_CPU(int, next_tlbcam_idx); +EXPORT_PER_CPU_SYMBOL(next_tlbcam_idx); +#endif + /* * Base TLB flushing operations: * -- cgit v1.2.3 From 969391c58a4efb8411d6881179945f425ad9cbb5 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 29 Jun 2011 00:26:11 +0000 Subject: powerpc, KVM: Split HVMODE_206 cpu feature bit into separate HV and architecture bits This replaces the single CPU_FTR_HVMODE_206 bit with two bits, one to indicate that we have a usable hypervisor mode, and another to indicate that the processor conforms to PowerISA version 2.06. We also add another bit to indicate that the processor conforms to ISA version 2.01 and set that for PPC970 and derivatives. Some PPC970 chips (specifically those in Apple machines) have a hypervisor mode in that MSR[HV] is always 1, but the hypervisor mode is not useful in the sense that there is no way to run any code in supervisor mode (HV=0 PR=0). On these processors, the LPES0 and LPES1 bits in HID4 are always 0, and we use that as a way of detecting that hypervisor mode is not useful. Where we have a feature section in assembly code around code that only applies on POWER7 in hypervisor mode, we use a construct like END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) The definition of END_FTR_SECTION_IFSET is such that the code will be enabled (not overwritten with nops) only if all bits in the provided mask are set. Note that the CPU feature check in __tlbie() only needs to check the ARCH_206 bit, not the HVMODE bit, because __tlbie() can only get called if we are running bare-metal, i.e. in hypervisor mode. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/cputable.h | 14 ++++++++------ arch/powerpc/include/asm/reg.h | 16 ++++++++++++---- arch/powerpc/kernel/cpu_setup_power7.S | 4 ++-- arch/powerpc/kernel/cpu_setup_ppc970.S | 26 ++++++++++++++++++++++---- arch/powerpc/kernel/exceptions-64s.S | 4 ++-- arch/powerpc/kernel/paca.c | 2 +- arch/powerpc/kvm/book3s_64_mmu_hv.c | 3 ++- arch/powerpc/kvm/book3s_hv.c | 3 ++- arch/powerpc/kvm/book3s_hv_builtin.c | 4 ++-- arch/powerpc/kvm/book3s_segment.S | 2 +- arch/powerpc/mm/hash_native_64.c | 4 ++-- 11 files changed, 56 insertions(+), 26 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index c0d842cfd012..e30442c539ce 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -179,8 +179,9 @@ extern const char *powerpc_base_platform; #define LONG_ASM_CONST(x) 0 #endif - -#define CPU_FTR_HVMODE_206 LONG_ASM_CONST(0x0000000800000000) +#define CPU_FTR_HVMODE LONG_ASM_CONST(0x0000000200000000) +#define CPU_FTR_ARCH_201 LONG_ASM_CONST(0x0000000400000000) +#define CPU_FTR_ARCH_206 LONG_ASM_CONST(0x0000000800000000) #define CPU_FTR_CFAR LONG_ASM_CONST(0x0000001000000000) #define CPU_FTR_IABR LONG_ASM_CONST(0x0000002000000000) #define CPU_FTR_MMCRA LONG_ASM_CONST(0x0000004000000000) @@ -401,9 +402,10 @@ extern const char *powerpc_base_platform; CPU_FTR_MMCRA | CPU_FTR_CP_USE_DCBTZ | \ CPU_FTR_STCX_CHECKS_ADDRESS) #define CPU_FTRS_PPC970 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ - CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ + CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_201 | \ CPU_FTR_ALTIVEC_COMP | CPU_FTR_CAN_NAP | CPU_FTR_MMCRA | \ - CPU_FTR_CP_USE_DCBTZ | CPU_FTR_STCX_CHECKS_ADDRESS) + CPU_FTR_CP_USE_DCBTZ | CPU_FTR_STCX_CHECKS_ADDRESS | \ + CPU_FTR_HVMODE) #define CPU_FTRS_POWER5 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ CPU_FTR_MMCRA | CPU_FTR_SMT | \ @@ -417,13 +419,13 @@ extern const char *powerpc_base_platform; CPU_FTR_DSCR | CPU_FTR_UNALIGNED_LD_STD | \ CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_CFAR) #define CPU_FTRS_POWER7 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ - CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_HVMODE_206 |\ + CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_206 |\ CPU_FTR_MMCRA | CPU_FTR_SMT | \ CPU_FTR_COHERENT_ICACHE | \ CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \ CPU_FTR_DSCR | CPU_FTR_SAO | CPU_FTR_ASYM_SMT | \ CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ - CPU_FTR_ICSWX | CPU_FTR_CFAR) + CPU_FTR_ICSWX | CPU_FTR_CFAR | CPU_FTR_HVMODE) #define CPU_FTRS_CELL (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \ diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 20a053c14270..ddbe57ae8584 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -307,6 +307,7 @@ #define SPRN_HASH1 0x3D2 /* Primary Hash Address Register */ #define SPRN_HASH2 0x3D3 /* Secondary Hash Address Resgister */ #define SPRN_HID0 0x3F0 /* Hardware Implementation Register 0 */ +#define HID0_HDICE_SH (63 - 23) /* 970 HDEC interrupt enable */ #define HID0_EMCP (1<<31) /* Enable Machine Check pin */ #define HID0_EBA (1<<29) /* Enable Bus Address Parity */ #define HID0_EBD (1<<28) /* Enable Bus Data Parity */ @@ -362,6 +363,13 @@ #define SPRN_IABR2 0x3FA /* 83xx */ #define SPRN_IBCR 0x135 /* 83xx Insn Breakpoint Control Reg */ #define SPRN_HID4 0x3F4 /* 970 HID4 */ +#define HID4_LPES0 (1ul << (63-0)) /* LPAR env. sel. bit 0 */ +#define HID4_RMLS2_SH (63 - 2) /* Real mode limit bottom 2 bits */ +#define HID4_LPID5_SH (63 - 6) /* partition ID bottom 4 bits */ +#define HID4_RMOR_SH (63 - 22) /* real mode offset (16 bits) */ +#define HID4_LPES1 (1 << (63-57)) /* LPAR env. sel. bit 1 */ +#define HID4_RMLS0_SH (63 - 58) /* Real mode limit top bit */ +#define HID4_LPID1_SH 0 /* partition ID top 2 bits */ #define SPRN_HID4_GEKKO 0x3F3 /* Gekko HID4 */ #define SPRN_HID5 0x3F6 /* 970 HID5 */ #define SPRN_HID6 0x3F9 /* BE HID 6 */ @@ -811,28 +819,28 @@ mfspr rX,SPRN_SPRG_PACA; \ FTR_SECTION_ELSE_NESTED(66); \ mfspr rX,SPRN_SPRG_HPACA; \ - ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_HVMODE_206, 66) + ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_HVMODE, 66) #define SET_PACA(rX) \ BEGIN_FTR_SECTION_NESTED(66); \ mtspr SPRN_SPRG_PACA,rX; \ FTR_SECTION_ELSE_NESTED(66); \ mtspr SPRN_SPRG_HPACA,rX; \ - ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_HVMODE_206, 66) + ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_HVMODE, 66) #define GET_SCRATCH0(rX) \ BEGIN_FTR_SECTION_NESTED(66); \ mfspr rX,SPRN_SPRG_SCRATCH0; \ FTR_SECTION_ELSE_NESTED(66); \ mfspr rX,SPRN_SPRG_HSCRATCH0; \ - ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_HVMODE_206, 66) + ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_HVMODE, 66) #define SET_SCRATCH0(rX) \ BEGIN_FTR_SECTION_NESTED(66); \ mtspr SPRN_SPRG_SCRATCH0,rX; \ FTR_SECTION_ELSE_NESTED(66); \ mtspr SPRN_SPRG_HSCRATCH0,rX; \ - ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_HVMODE_206, 66) + ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_HVMODE, 66) #else /* CONFIG_PPC_BOOK3S_64 */ #define GET_SCRATCH0(rX) mfspr rX,SPRN_SPRG_SCRATCH0 diff --git a/arch/powerpc/kernel/cpu_setup_power7.S b/arch/powerpc/kernel/cpu_setup_power7.S index 2ef6749688e9..76797c5105d6 100644 --- a/arch/powerpc/kernel/cpu_setup_power7.S +++ b/arch/powerpc/kernel/cpu_setup_power7.S @@ -45,12 +45,12 @@ _GLOBAL(__restore_cpu_power7) blr __init_hvmode_206: - /* Disable CPU_FTR_HVMODE_206 and exit if MSR:HV is not set */ + /* Disable CPU_FTR_HVMODE and exit if MSR:HV is not set */ mfmsr r3 rldicl. r0,r3,4,63 bnelr ld r5,CPU_SPEC_FEATURES(r4) - LOAD_REG_IMMEDIATE(r6,CPU_FTR_HVMODE_206) + LOAD_REG_IMMEDIATE(r6,CPU_FTR_HVMODE) xor r5,r5,r6 std r5,CPU_SPEC_FEATURES(r4) blr diff --git a/arch/powerpc/kernel/cpu_setup_ppc970.S b/arch/powerpc/kernel/cpu_setup_ppc970.S index 27f2507279d8..12fac8df01c5 100644 --- a/arch/powerpc/kernel/cpu_setup_ppc970.S +++ b/arch/powerpc/kernel/cpu_setup_ppc970.S @@ -76,7 +76,7 @@ _GLOBAL(__setup_cpu_ppc970) /* Do nothing if not running in HV mode */ mfmsr r0 rldicl. r0,r0,4,63 - beqlr + beq no_hv_mode mfspr r0,SPRN_HID0 li r11,5 /* clear DOZE and SLEEP */ @@ -90,7 +90,7 @@ _GLOBAL(__setup_cpu_ppc970MP) /* Do nothing if not running in HV mode */ mfmsr r0 rldicl. r0,r0,4,63 - beqlr + beq no_hv_mode mfspr r0,SPRN_HID0 li r11,0x15 /* clear DOZE and SLEEP */ @@ -109,6 +109,14 @@ load_hids: sync isync + /* Try to set LPES = 01 in HID4 */ + mfspr r0,SPRN_HID4 + clrldi r0,r0,1 /* clear LPES0 */ + ori r0,r0,HID4_LPES1 /* set LPES1 */ + sync + mtspr SPRN_HID4,r0 + isync + /* Save away cpu state */ LOAD_REG_ADDR(r5,cpu_state_storage) @@ -117,11 +125,21 @@ load_hids: std r3,CS_HID0(r5) mfspr r3,SPRN_HID1 std r3,CS_HID1(r5) - mfspr r3,SPRN_HID4 - std r3,CS_HID4(r5) + mfspr r4,SPRN_HID4 + std r4,CS_HID4(r5) mfspr r3,SPRN_HID5 std r3,CS_HID5(r5) + /* See if we successfully set LPES1 to 1; if not we are in Apple mode */ + andi. r4,r4,HID4_LPES1 + bnelr + +no_hv_mode: + /* Disable CPU_FTR_HVMODE and exit, since we don't have HV mode */ + ld r5,CPU_SPEC_FEATURES(r4) + LOAD_REG_IMMEDIATE(r6,CPU_FTR_HVMODE) + andc r5,r5,r6 + std r5,CPU_SPEC_FEATURES(r4) blr /* Called with no MMU context (typically MSR:IR/DR off) to diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 5bc06fdfa6c0..a5345380bef3 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -75,7 +75,7 @@ BEGIN_FTR_SECTION b .power7_wakeup_noloss 2: b .power7_wakeup_loss 9: -END_FTR_SECTION_IFSET(CPU_FTR_HVMODE_206) +END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) #endif /* CONFIG_PPC_P7_NAP */ EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD, NOTEST, 0x100) @@ -173,7 +173,7 @@ hardware_interrupt_hv: _MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt, EXC_STD, SOFTEN_TEST_PR) KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x500) - ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE_206) + ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) STD_EXCEPTION_PSERIES(0x600, 0x600, alignment) KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x600) diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index efeb88184182..0a5a899846bb 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -167,7 +167,7 @@ void setup_paca(struct paca_struct *new_paca) * if we do a GET_PACA() before the feature fixups have been * applied */ - if (cpu_has_feature(CPU_FTR_HVMODE_206)) + if (cpu_has_feature(CPU_FTR_HVMODE)) mtspr(SPRN_SPRG_HPACA, local_paca); #endif mtspr(SPRN_SPRG_PACA, local_paca); diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 96ba96a16abf..212dcd8fc50b 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -128,7 +128,8 @@ void kvmppc_map_vrma(struct kvm *kvm, struct kvm_userspace_memory_region *mem) int kvmppc_mmu_hv_init(void) { - if (!cpu_has_feature(CPU_FTR_HVMODE_206)) + if (!cpu_has_feature(CPU_FTR_HVMODE) || + !cpu_has_feature(CPU_FTR_ARCH_206)) return -EINVAL; memset(lpid_inuse, 0, sizeof(lpid_inuse)); set_bit(mfspr(SPRN_LPID), lpid_inuse); diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 04da135cae61..dc70e7745ab3 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -443,7 +443,8 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, int kvmppc_core_check_processor_compat(void) { - if (cpu_has_feature(CPU_FTR_HVMODE_206)) + if (cpu_has_feature(CPU_FTR_HVMODE) && + cpu_has_feature(CPU_FTR_ARCH_206)) return 0; return -EIO; } diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 736df3cbbc55..7315ec6e8177 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -90,8 +90,8 @@ void kvm_rma_init(void) void *rma; struct page *pg; - /* Only do this on POWER7 in HV mode */ - if (!cpu_has_feature(CPU_FTR_HVMODE_206)) + /* Only do this in HV mode */ + if (!cpu_has_feature(CPU_FTR_HVMODE)) return; if (!kvm_rma_size || !kvm_rma_count) diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S index 134501691ad0..aed32e517212 100644 --- a/arch/powerpc/kvm/book3s_segment.S +++ b/arch/powerpc/kvm/book3s_segment.S @@ -170,7 +170,7 @@ BEGIN_FTR_SECTION mfspr r4,SPRN_HSRR1 andi. r12,r12,0x3ffd b 2f -END_FTR_SECTION_IFSET(CPU_FTR_HVMODE_206) +END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) #endif 1: mfsrr0 r3 mfsrr1 r4 diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index dfd764896db0..b44f5f803052 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -51,7 +51,7 @@ static inline void __tlbie(unsigned long va, int psize, int ssize) va &= ~0xffful; va |= ssize << 8; asm volatile(ASM_FTR_IFCLR("tlbie %0,0", PPC_TLBIE(%1,%0), %2) - : : "r" (va), "r"(0), "i" (CPU_FTR_HVMODE_206) + : : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206) : "memory"); break; default: @@ -61,7 +61,7 @@ static inline void __tlbie(unsigned long va, int psize, int ssize) va |= ssize << 8; va |= 1; /* L */ asm volatile(ASM_FTR_IFCLR("tlbie %0,1", PPC_TLBIE(%1,%0), %2) - : : "r" (va), "r"(0), "i" (CPU_FTR_HVMODE_206) + : : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206) : "memory"); break; } -- cgit v1.2.3 From 9e368f2915601cd5bc7f5fd638b58435b018bbd7 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 29 Jun 2011 00:40:08 +0000 Subject: KVM: PPC: book3s_hv: Add support for PPC970-family processors This adds support for running KVM guests in supervisor mode on those PPC970 processors that have a usable hypervisor mode. Unfortunately, Apple G5 machines have supervisor mode disabled (MSR[HV] is forced to 1), but the YDL PowerStation does have a usable hypervisor mode. There are several differences between the PPC970 and POWER7 in how guests are managed. These differences are accommodated using the CPU_FTR_ARCH_201 (PPC970) and CPU_FTR_ARCH_206 (POWER7) CPU feature bits. Notably, on PPC970: * The LPCR, LPID or RMOR registers don't exist, and the functions of those registers are provided by bits in HID4 and one bit in HID0. * External interrupts can be directed to the hypervisor, but unlike POWER7 they are masked by MSR[EE] in non-hypervisor modes and use SRR0/1 not HSRR0/1. * There is no virtual RMA (VRMA) mode; the guest must use an RMO (real mode offset) area. * The TLB entries are not tagged with the LPID, so it is necessary to flush the whole TLB on partition switch. Furthermore, when switching partitions we have to ensure that no other CPU is executing the tlbie or tlbsync instructions in either the old or the new partition, otherwise undefined behaviour can occur. * The PMU has 8 counters (PMC registers) rather than 6. * The DSCR, PURR, SPURR, AMR, AMOR, UAMOR registers don't exist. * The SLB has 64 entries rather than 32. * There is no mediated external interrupt facility, so if we switch to a guest that has a virtual external interrupt pending but the guest has MSR[EE] = 0, we have to arrange to have an interrupt pending for it so that we can get control back once it re-enables interrupts. We do that by sending ourselves an IPI with smp_send_reschedule after hard-disabling interrupts. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/exception-64s.h | 4 + arch/powerpc/include/asm/kvm_book3s_asm.h | 2 +- arch/powerpc/include/asm/kvm_host.h | 2 +- arch/powerpc/kernel/asm-offsets.c | 1 + arch/powerpc/kernel/exceptions-64s.S | 2 +- arch/powerpc/kvm/Kconfig | 13 +- arch/powerpc/kvm/book3s_64_mmu_hv.c | 30 ++-- arch/powerpc/kvm/book3s_hv.c | 60 ++++++-- arch/powerpc/kvm/book3s_hv_builtin.c | 11 +- arch/powerpc/kvm/book3s_hv_interrupts.S | 30 ++++ arch/powerpc/kvm/book3s_hv_rm_mmu.c | 6 +- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 230 +++++++++++++++++++++++++++++- arch/powerpc/kvm/powerpc.c | 3 + arch/powerpc/mm/hash_native_64.c | 2 +- 14 files changed, 354 insertions(+), 42 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 69435da8f2ba..8057f4f6980f 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -246,6 +246,10 @@ label##_hv: \ KVMTEST(vec); \ _SOFTEN_TEST(EXC_HV) +#define SOFTEN_TEST_HV_201(vec) \ + KVMTEST(vec); \ + _SOFTEN_TEST(EXC_STD) + #define __MASKABLE_EXCEPTION_PSERIES(vec, label, h, extra) \ HMT_MEDIUM; \ SET_SCRATCH0(r13); /* save r13 */ \ diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h index 9cfd5436782d..ef7b3688c3b6 100644 --- a/arch/powerpc/include/asm/kvm_book3s_asm.h +++ b/arch/powerpc/include/asm/kvm_book3s_asm.h @@ -82,7 +82,7 @@ struct kvmppc_host_state { unsigned long xics_phys; u64 dabr; u64 host_mmcr[3]; - u32 host_pmc[6]; + u32 host_pmc[8]; u64 host_purr; u64 host_spurr; u64 host_dscr; diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index f572d9cc31bd..cc22b282d755 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -353,7 +353,7 @@ struct kvm_vcpu_arch { u32 dbsr; u64 mmcr[3]; - u32 pmc[6]; + u32 pmc[8]; #ifdef CONFIG_KVM_EXIT_TIMING struct mutex exit_timing_lock; diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index f4aba938166b..54b935f2f5de 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -128,6 +128,7 @@ int main(void) DEFINE(ICACHEL1LINESPERPAGE, offsetof(struct ppc64_caches, ilines_per_page)); /* paca */ DEFINE(PACA_SIZE, sizeof(struct paca_struct)); + DEFINE(PACA_LOCK_TOKEN, offsetof(struct paca_struct, lock_token)); DEFINE(PACAPACAINDEX, offsetof(struct paca_struct, paca_index)); DEFINE(PACAPROCSTART, offsetof(struct paca_struct, cpu_start)); DEFINE(PACAKSAVE, offsetof(struct paca_struct, kstack)); diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index a5345380bef3..41b02c792aa3 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -171,7 +171,7 @@ hardware_interrupt_hv: KVM_HANDLER(PACA_EXGEN, EXC_HV, 0x502) FTR_SECTION_ELSE _MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt, - EXC_STD, SOFTEN_TEST_PR) + EXC_STD, SOFTEN_TEST_HV_201) KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x500) ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index 5d9b78ebbaa6..eeb42e06f2d7 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -67,23 +67,20 @@ config KVM_BOOK3S_64 If unsure, say N. config KVM_BOOK3S_64_HV - bool "KVM support for POWER7 using hypervisor mode in host" + bool "KVM support for POWER7 and PPC970 using hypervisor mode in host" depends on KVM_BOOK3S_64 ---help--- Support running unmodified book3s_64 guest kernels in - virtual machines on POWER7 processors that have hypervisor - mode available to the host. + virtual machines on POWER7 and PPC970 processors that have + hypervisor mode available to the host. If you say Y here, KVM will use the hardware virtualization facilities of POWER7 (and later) processors, meaning that guest operating systems will run at full hardware speed using supervisor and user modes. However, this also means that KVM is not usable under PowerVM (pHyp), is only usable - on POWER7 (or later) processors, and can only emulate - POWER5+, POWER6 and POWER7 processors. - - This module provides access to the hardware capabilities through - a character device node named /dev/kvm. + on POWER7 (or later) processors and PPC970-family processors, + and cannot emulate a different processor from the host processor. If unsure, say N. diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 212dcd8fc50b..bc3a2ea94217 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -42,6 +42,8 @@ #define VRMA_PAGE_ORDER 24 #define VRMA_VSID 0x1ffffffUL /* 1TB VSID reserved for VRMA */ +/* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */ +#define MAX_LPID_970 63 #define NR_LPIDS (LPID_RSVD + 1) unsigned long lpid_inuse[BITS_TO_LONGS(NR_LPIDS)]; @@ -69,9 +71,6 @@ long kvmppc_alloc_hpt(struct kvm *kvm) kvm->arch.sdr1 = __pa(hpt) | (HPT_ORDER - 18); kvm->arch.lpid = lpid; - kvm->arch.host_sdr1 = mfspr(SPRN_SDR1); - kvm->arch.host_lpid = mfspr(SPRN_LPID); - kvm->arch.host_lpcr = mfspr(SPRN_LPCR); pr_info("KVM guest htab at %lx, LPID %lx\n", hpt, lpid); return 0; @@ -128,12 +127,24 @@ void kvmppc_map_vrma(struct kvm *kvm, struct kvm_userspace_memory_region *mem) int kvmppc_mmu_hv_init(void) { - if (!cpu_has_feature(CPU_FTR_HVMODE) || - !cpu_has_feature(CPU_FTR_ARCH_206)) + unsigned long host_lpid, rsvd_lpid; + + if (!cpu_has_feature(CPU_FTR_HVMODE)) return -EINVAL; + memset(lpid_inuse, 0, sizeof(lpid_inuse)); - set_bit(mfspr(SPRN_LPID), lpid_inuse); - set_bit(LPID_RSVD, lpid_inuse); + + if (cpu_has_feature(CPU_FTR_ARCH_206)) { + host_lpid = mfspr(SPRN_LPID); /* POWER7 */ + rsvd_lpid = LPID_RSVD; + } else { + host_lpid = 0; /* PPC970 */ + rsvd_lpid = MAX_LPID_970; + } + + set_bit(host_lpid, lpid_inuse); + /* rsvd_lpid is reserved for use in partition switching */ + set_bit(rsvd_lpid, lpid_inuse); return 0; } @@ -157,7 +168,10 @@ void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu) { struct kvmppc_mmu *mmu = &vcpu->arch.mmu; - vcpu->arch.slb_nr = 32; /* Assume POWER7 for now */ + if (cpu_has_feature(CPU_FTR_ARCH_206)) + vcpu->arch.slb_nr = 32; /* POWER7 */ + else + vcpu->arch.slb_nr = 64; mmu->xlate = kvmppc_mmu_book3s_64_hv_xlate; mmu->reset_msr = kvmppc_mmu_book3s_64_hv_reset_msr; diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index dc70e7745ab3..cc0d7f1b19ab 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -443,8 +443,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, int kvmppc_core_check_processor_compat(void) { - if (cpu_has_feature(CPU_FTR_HVMODE) && - cpu_has_feature(CPU_FTR_ARCH_206)) + if (cpu_has_feature(CPU_FTR_HVMODE)) return 0; return -EIO; } @@ -731,6 +730,10 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) return -EINTR; } + /* On PPC970, check that we have an RMA region */ + if (!vcpu->kvm->arch.rma && cpu_has_feature(CPU_FTR_ARCH_201)) + return -EPERM; + kvm_run->exit_reason = 0; vcpu->arch.ret = RESUME_GUEST; vcpu->arch.trap = 0; @@ -920,12 +923,14 @@ fail: } /* Work out RMLS (real mode limit selector) field value for a given RMA size. - Assumes POWER7. */ + Assumes POWER7 or PPC970. */ static inline int lpcr_rmls(unsigned long rma_size) { switch (rma_size) { case 32ul << 20: /* 32 MB */ - return 8; + if (cpu_has_feature(CPU_FTR_ARCH_206)) + return 8; /* only supported on POWER7 */ + return -1; case 64ul << 20: /* 64 MB */ return 3; case 128ul << 20: /* 128 MB */ @@ -1059,6 +1064,10 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm, mem->userspace_addr == vma->vm_start) ri = vma->vm_file->private_data; up_read(¤t->mm->mmap_sem); + if (!ri && cpu_has_feature(CPU_FTR_ARCH_201)) { + pr_err("CPU requires an RMO\n"); + return -EINVAL; + } } if (ri) { @@ -1077,10 +1086,25 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm, atomic_inc(&ri->use_count); kvm->arch.rma = ri; kvm->arch.n_rma_pages = rma_size >> porder; - lpcr = kvm->arch.lpcr & ~(LPCR_VPM0 | LPCR_VRMA_L); - lpcr |= rmls << LPCR_RMLS_SH; + + /* Update LPCR and RMOR */ + lpcr = kvm->arch.lpcr; + if (cpu_has_feature(CPU_FTR_ARCH_201)) { + /* PPC970; insert RMLS value (split field) in HID4 */ + lpcr &= ~((1ul << HID4_RMLS0_SH) | + (3ul << HID4_RMLS2_SH)); + lpcr |= ((rmls >> 2) << HID4_RMLS0_SH) | + ((rmls & 3) << HID4_RMLS2_SH); + /* RMOR is also in HID4 */ + lpcr |= ((ri->base_pfn >> (26 - PAGE_SHIFT)) & 0xffff) + << HID4_RMOR_SH; + } else { + /* POWER7 */ + lpcr &= ~(LPCR_VPM0 | LPCR_VRMA_L); + lpcr |= rmls << LPCR_RMLS_SH; + kvm->arch.rmor = kvm->arch.rma->base_pfn << PAGE_SHIFT; + } kvm->arch.lpcr = lpcr; - kvm->arch.rmor = kvm->arch.rma->base_pfn << PAGE_SHIFT; pr_info("Using RMO at %lx size %lx (LPCR = %lx)\n", ri->base_pfn << PAGE_SHIFT, rma_size, lpcr); } @@ -1151,11 +1175,25 @@ int kvmppc_core_init_vm(struct kvm *kvm) kvm->arch.rma = NULL; kvm->arch.n_rma_pages = 0; - lpcr = kvm->arch.host_lpcr & (LPCR_PECE | LPCR_LPES); - lpcr |= (4UL << LPCR_DPFD_SH) | LPCR_HDICE | - LPCR_VPM0 | LPCR_VRMA_L; - kvm->arch.lpcr = lpcr; + kvm->arch.host_sdr1 = mfspr(SPRN_SDR1); + if (cpu_has_feature(CPU_FTR_ARCH_201)) { + /* PPC970; HID4 is effectively the LPCR */ + unsigned long lpid = kvm->arch.lpid; + kvm->arch.host_lpid = 0; + kvm->arch.host_lpcr = lpcr = mfspr(SPRN_HID4); + lpcr &= ~((3 << HID4_LPID1_SH) | (0xful << HID4_LPID5_SH)); + lpcr |= ((lpid >> 4) << HID4_LPID1_SH) | + ((lpid & 0xf) << HID4_LPID5_SH); + } else { + /* POWER7; init LPCR for virtual RMA mode */ + kvm->arch.host_lpid = mfspr(SPRN_LPID); + kvm->arch.host_lpcr = lpcr = mfspr(SPRN_LPCR); + lpcr &= LPCR_PECE | LPCR_LPES; + lpcr |= (4UL << LPCR_DPFD_SH) | LPCR_HDICE | + LPCR_VPM0 | LPCR_VRMA_L; + } + kvm->arch.lpcr = lpcr; return 0; diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 7315ec6e8177..d43120355eec 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -55,12 +55,14 @@ static LIST_HEAD(free_rmas); static DEFINE_SPINLOCK(rma_lock); /* Work out RMLS (real mode limit selector) field value for a given RMA size. - Assumes POWER7. */ + Assumes POWER7 or PPC970. */ static inline int lpcr_rmls(unsigned long rma_size) { switch (rma_size) { case 32ul << 20: /* 32 MB */ - return 8; + if (cpu_has_feature(CPU_FTR_ARCH_206)) + return 8; /* only supported on POWER7 */ + return -1; case 64ul << 20: /* 64 MB */ return 3; case 128ul << 20: /* 128 MB */ @@ -90,8 +92,9 @@ void kvm_rma_init(void) void *rma; struct page *pg; - /* Only do this in HV mode */ - if (!cpu_has_feature(CPU_FTR_HVMODE)) + /* Only do this on PPC970 in HV mode */ + if (!cpu_has_feature(CPU_FTR_HVMODE) || + !cpu_has_feature(CPU_FTR_ARCH_201)) return; if (!kvm_rma_size || !kvm_rma_count) diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S index 532afaf19841..3f7b674dd4bf 100644 --- a/arch/powerpc/kvm/book3s_hv_interrupts.S +++ b/arch/powerpc/kvm/book3s_hv_interrupts.S @@ -50,8 +50,10 @@ _GLOBAL(__kvmppc_vcore_entry) SAVE_NVGPRS(r1) /* Save host DSCR */ +BEGIN_FTR_SECTION mfspr r3, SPRN_DSCR std r3, HSTATE_DSCR(r13) +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) /* Save host DABR */ mfspr r3, SPRN_DABR @@ -86,12 +88,20 @@ _GLOBAL(__kvmppc_vcore_entry) mfspr r7, SPRN_PMC4 mfspr r8, SPRN_PMC5 mfspr r9, SPRN_PMC6 +BEGIN_FTR_SECTION + mfspr r10, SPRN_PMC7 + mfspr r11, SPRN_PMC8 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) stw r3, HSTATE_PMC(r13) stw r5, HSTATE_PMC + 4(r13) stw r6, HSTATE_PMC + 8(r13) stw r7, HSTATE_PMC + 12(r13) stw r8, HSTATE_PMC + 16(r13) stw r9, HSTATE_PMC + 20(r13) +BEGIN_FTR_SECTION + stw r10, HSTATE_PMC + 24(r13) + stw r11, HSTATE_PMC + 28(r13) +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) 31: /* @@ -105,6 +115,26 @@ _GLOBAL(__kvmppc_vcore_entry) add r8,r8,r7 std r8,HSTATE_DECEXP(r13) + /* + * On PPC970, if the guest vcpu has an external interrupt pending, + * send ourselves an IPI so as to interrupt the guest once it + * enables interrupts. (It must have interrupts disabled, + * otherwise we would already have delivered the interrupt.) + */ +BEGIN_FTR_SECTION + ld r0, VCPU_PENDING_EXC(r4) + li r7, (1 << BOOK3S_IRQPRIO_EXTERNAL) + oris r7, r7, (1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h + and. r0, r0, r7 + beq 32f + mr r31, r4 + lhz r3, PACAPACAINDEX(r13) + bl smp_send_reschedule + nop + mr r4, r31 +32: +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) + /* Jump to partition switch code */ bl .kvmppc_hv_entry_trampoline nop diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index edb0aae901a3..fcfe6b055558 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -56,7 +56,8 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, /* only handle 4k, 64k and 16M pages for now */ porder = 12; if (pteh & HPTE_V_LARGE) { - if ((ptel & 0xf000) == 0x1000) { + if (cpu_has_feature(CPU_FTR_ARCH_206) && + (ptel & 0xf000) == 0x1000) { /* 64k page */ porder = 16; } else if ((ptel & 0xff000) == 0) { @@ -126,7 +127,8 @@ static unsigned long compute_tlbie_rb(unsigned long v, unsigned long r, va_low &= 0x7ff; if (v & HPTE_V_LARGE) { rb |= 1; /* L field */ - if (r & 0xff000) { + if (cpu_has_feature(CPU_FTR_ARCH_206) && + (r & 0xff000)) { /* non-16MB large page, must be 64k */ /* (masks depend on page size) */ rb |= 0x1000; /* page encoding in LP field */ diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 9ee223c35285..6dd33581a228 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -148,12 +148,20 @@ kvmppc_hv_entry: lwz r7, VCPU_PMC + 12(r4) lwz r8, VCPU_PMC + 16(r4) lwz r9, VCPU_PMC + 20(r4) +BEGIN_FTR_SECTION + lwz r10, VCPU_PMC + 24(r4) + lwz r11, VCPU_PMC + 28(r4) +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) mtspr SPRN_PMC1, r3 mtspr SPRN_PMC2, r5 mtspr SPRN_PMC3, r6 mtspr SPRN_PMC4, r7 mtspr SPRN_PMC5, r8 mtspr SPRN_PMC6, r9 +BEGIN_FTR_SECTION + mtspr SPRN_PMC7, r10 + mtspr SPRN_PMC8, r11 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) ld r3, VCPU_MMCR(r4) ld r5, VCPU_MMCR + 8(r4) ld r6, VCPU_MMCR + 16(r4) @@ -165,9 +173,11 @@ kvmppc_hv_entry: /* Load up FP, VMX and VSX registers */ bl kvmppc_load_fp +BEGIN_FTR_SECTION /* Switch DSCR to guest value */ ld r5, VCPU_DSCR(r4) mtspr SPRN_DSCR, r5 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) /* * Set the decrementer to the guest decrementer. @@ -210,6 +220,7 @@ kvmppc_hv_entry: mtspr SPRN_DABRX,r5 mtspr SPRN_DABR,r6 +BEGIN_FTR_SECTION /* Restore AMR and UAMOR, set AMOR to all 1s */ ld r5,VCPU_AMR(r4) ld r6,VCPU_UAMOR(r4) @@ -217,6 +228,7 @@ kvmppc_hv_entry: mtspr SPRN_AMR,r5 mtspr SPRN_UAMOR,r6 mtspr SPRN_AMOR,r7 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) /* Clear out SLB */ li r6,0 @@ -224,6 +236,14 @@ kvmppc_hv_entry: slbia ptesync +BEGIN_FTR_SECTION + b 30f +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) + /* + * POWER7 host -> guest partition switch code. + * We don't have to lock against concurrent tlbies, + * but we do have to coordinate across hardware threads. + */ /* Increment entry count iff exit count is zero. */ ld r5,HSTATE_KVM_VCORE(r13) addi r9,r5,VCORE_ENTRY_EXIT @@ -315,9 +335,94 @@ kvmppc_hv_entry: ld r8,VCPU_SPURR(r4) mtspr SPRN_PURR,r7 mtspr SPRN_SPURR,r8 + b 31f + + /* + * PPC970 host -> guest partition switch code. + * We have to lock against concurrent tlbies, + * using native_tlbie_lock to lock against host tlbies + * and kvm->arch.tlbie_lock to lock against guest tlbies. + * We also have to invalidate the TLB since its + * entries aren't tagged with the LPID. + */ +30: ld r9,VCPU_KVM(r4) /* pointer to struct kvm */ + + /* first take native_tlbie_lock */ + .section ".toc","aw" +toc_tlbie_lock: + .tc native_tlbie_lock[TC],native_tlbie_lock + .previous + ld r3,toc_tlbie_lock@toc(2) + lwz r8,PACA_LOCK_TOKEN(r13) +24: lwarx r0,0,r3 + cmpwi r0,0 + bne 24b + stwcx. r8,0,r3 + bne 24b + isync + + ld r7,KVM_LPCR(r9) /* use kvm->arch.lpcr to store HID4 */ + li r0,0x18f + rotldi r0,r0,HID4_LPID5_SH /* all lpid bits in HID4 = 1 */ + or r0,r7,r0 + ptesync + sync + mtspr SPRN_HID4,r0 /* switch to reserved LPID */ + isync + li r0,0 + stw r0,0(r3) /* drop native_tlbie_lock */ + + /* invalidate the whole TLB */ + li r0,256 + mtctr r0 + li r6,0 +25: tlbiel r6 + addi r6,r6,0x1000 + bdnz 25b + ptesync + + /* Take the guest's tlbie_lock */ + addi r3,r9,KVM_TLBIE_LOCK +24: lwarx r0,0,r3 + cmpwi r0,0 + bne 24b + stwcx. r8,0,r3 + bne 24b + isync + ld r6,KVM_SDR1(r9) + mtspr SPRN_SDR1,r6 /* switch to partition page table */ + + /* Set up HID4 with the guest's LPID etc. */ + sync + mtspr SPRN_HID4,r7 + isync + + /* drop the guest's tlbie_lock */ + li r0,0 + stw r0,0(r3) + + /* Check if HDEC expires soon */ + mfspr r3,SPRN_HDEC + cmpwi r3,10 + li r12,BOOK3S_INTERRUPT_HV_DECREMENTER + mr r9,r4 + blt hdec_soon + + /* Enable HDEC interrupts */ + mfspr r0,SPRN_HID0 + li r3,1 + rldimi r0,r3, HID0_HDICE_SH, 64-HID0_HDICE_SH-1 + sync + mtspr SPRN_HID0,r0 + mfspr r0,SPRN_HID0 + mfspr r0,SPRN_HID0 + mfspr r0,SPRN_HID0 + mfspr r0,SPRN_HID0 + mfspr r0,SPRN_HID0 + mfspr r0,SPRN_HID0 /* Load up guest SLB entries */ - lwz r5,VCPU_SLB_MAX(r4) +31: lwz r5,VCPU_SLB_MAX(r4) cmpwi r5,0 beq 9f mtctr r5 @@ -472,6 +577,7 @@ kvmppc_interrupt: hcall_real_cont: /* Check for mediated interrupts (could be done earlier really ...) */ +BEGIN_FTR_SECTION cmpwi r12,BOOK3S_INTERRUPT_EXTERNAL bne+ 1f ld r5,VCPU_KVM(r9) @@ -481,6 +587,7 @@ hcall_real_cont: andi. r0,r5,LPCR_MER bne bounce_ext_interrupt 1: +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) /* Save DEC */ mfspr r5,SPRN_DEC @@ -492,9 +599,11 @@ hcall_real_cont: /* Save HEIR (HV emulation assist reg) in last_inst if this is an HEI (HV emulation interrupt, e40) */ li r3,-1 +BEGIN_FTR_SECTION cmpwi r12,BOOK3S_INTERRUPT_H_EMUL_ASSIST bne 11f mfspr r3,SPRN_HEIR +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) 11: stw r3,VCPU_LAST_INST(r9) /* Save more register state */ @@ -508,8 +617,10 @@ hcall_real_cont: stw r7, VCPU_DSISR(r9) std r8, VCPU_CTR(r9) /* grab HDAR & HDSISR if HV data storage interrupt (HDSI) */ +BEGIN_FTR_SECTION cmpwi r12,BOOK3S_INTERRUPT_H_DATA_STORAGE beq 6f +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) 7: std r6, VCPU_FAULT_DAR(r9) stw r7, VCPU_FAULT_DSISR(r9) @@ -543,6 +654,7 @@ hcall_real_cont: /* * Save the guest PURR/SPURR */ +BEGIN_FTR_SECTION mfspr r5,SPRN_PURR mfspr r6,SPRN_SPURR ld r7,VCPU_PURR(r9) @@ -562,6 +674,7 @@ hcall_real_cont: add r4,r4,r6 mtspr SPRN_PURR,r3 mtspr SPRN_SPURR,r4 +END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_201) /* Clear out SLB */ li r5,0 @@ -570,6 +683,14 @@ hcall_real_cont: ptesync hdec_soon: +BEGIN_FTR_SECTION + b 32f +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) + /* + * POWER7 guest -> host partition switch code. + * We don't have to lock against tlbies but we do + * have to coordinate the hardware threads. + */ /* Increment the threads-exiting-guest count in the 0xff00 bits of vcore->entry_exit_count */ lwsync @@ -640,9 +761,82 @@ hdec_soon: 16: ld r8,KVM_HOST_LPCR(r4) mtspr SPRN_LPCR,r8 isync + b 33f + + /* + * PPC970 guest -> host partition switch code. + * We have to lock against concurrent tlbies, and + * we have to flush the whole TLB. + */ +32: ld r4,VCPU_KVM(r9) /* pointer to struct kvm */ + + /* Take the guest's tlbie_lock */ + lwz r8,PACA_LOCK_TOKEN(r13) + addi r3,r4,KVM_TLBIE_LOCK +24: lwarx r0,0,r3 + cmpwi r0,0 + bne 24b + stwcx. r8,0,r3 + bne 24b + isync + + ld r7,KVM_HOST_LPCR(r4) /* use kvm->arch.host_lpcr for HID4 */ + li r0,0x18f + rotldi r0,r0,HID4_LPID5_SH /* all lpid bits in HID4 = 1 */ + or r0,r7,r0 + ptesync + sync + mtspr SPRN_HID4,r0 /* switch to reserved LPID */ + isync + li r0,0 + stw r0,0(r3) /* drop guest tlbie_lock */ + + /* invalidate the whole TLB */ + li r0,256 + mtctr r0 + li r6,0 +25: tlbiel r6 + addi r6,r6,0x1000 + bdnz 25b + ptesync + + /* take native_tlbie_lock */ + ld r3,toc_tlbie_lock@toc(2) +24: lwarx r0,0,r3 + cmpwi r0,0 + bne 24b + stwcx. r8,0,r3 + bne 24b + isync + + ld r6,KVM_HOST_SDR1(r4) + mtspr SPRN_SDR1,r6 /* switch to host page table */ + + /* Set up host HID4 value */ + sync + mtspr SPRN_HID4,r7 + isync + li r0,0 + stw r0,0(r3) /* drop native_tlbie_lock */ + + lis r8,0x7fff /* MAX_INT@h */ + mtspr SPRN_HDEC,r8 + + /* Disable HDEC interrupts */ + mfspr r0,SPRN_HID0 + li r3,0 + rldimi r0,r3, HID0_HDICE_SH, 64-HID0_HDICE_SH-1 + sync + mtspr SPRN_HID0,r0 + mfspr r0,SPRN_HID0 + mfspr r0,SPRN_HID0 + mfspr r0,SPRN_HID0 + mfspr r0,SPRN_HID0 + mfspr r0,SPRN_HID0 + mfspr r0,SPRN_HID0 /* load host SLB entries */ - ld r8,PACA_SLBSHADOWPTR(r13) +33: ld r8,PACA_SLBSHADOWPTR(r13) .rept SLB_NUM_BOLTED ld r5,SLBSHADOW_SAVEAREA(r8) @@ -654,12 +848,14 @@ hdec_soon: .endr /* Save and reset AMR and UAMOR before turning on the MMU */ +BEGIN_FTR_SECTION mfspr r5,SPRN_AMR mfspr r6,SPRN_UAMOR std r5,VCPU_AMR(r9) std r6,VCPU_UAMOR(r9) li r6,0 mtspr SPRN_AMR,r6 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) /* Restore host DABR and DABRX */ ld r5,HSTATE_DABR(r13) @@ -668,10 +864,12 @@ hdec_soon: mtspr SPRN_DABRX,r6 /* Switch DSCR back to host value */ +BEGIN_FTR_SECTION mfspr r8, SPRN_DSCR ld r7, HSTATE_DSCR(r13) std r8, VCPU_DSCR(r7) mtspr SPRN_DSCR, r7 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) /* Save non-volatile GPRs */ std r14, VCPU_GPR(r14)(r9) @@ -735,21 +933,31 @@ hdec_soon: mfspr r6, SPRN_PMC4 mfspr r7, SPRN_PMC5 mfspr r8, SPRN_PMC6 +BEGIN_FTR_SECTION + mfspr r10, SPRN_PMC7 + mfspr r11, SPRN_PMC8 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) stw r3, VCPU_PMC(r9) stw r4, VCPU_PMC + 4(r9) stw r5, VCPU_PMC + 8(r9) stw r6, VCPU_PMC + 12(r9) stw r7, VCPU_PMC + 16(r9) stw r8, VCPU_PMC + 20(r9) +BEGIN_FTR_SECTION + stw r10, VCPU_PMC + 24(r9) + stw r11, VCPU_PMC + 28(r9) +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) 22: /* save FP state */ mr r3, r9 bl .kvmppc_save_fp - /* Secondary threads go off to take a nap */ + /* Secondary threads go off to take a nap on POWER7 */ +BEGIN_FTR_SECTION lwz r0,VCPU_PTID(r3) cmpwi r0,0 bne secondary_nap +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) /* * Reload DEC. HDEC interrupts were disabled when @@ -771,12 +979,20 @@ hdec_soon: lwz r6, HSTATE_PMC + 12(r13) lwz r8, HSTATE_PMC + 16(r13) lwz r9, HSTATE_PMC + 20(r13) +BEGIN_FTR_SECTION + lwz r10, HSTATE_PMC + 24(r13) + lwz r11, HSTATE_PMC + 28(r13) +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) mtspr SPRN_PMC1, r3 mtspr SPRN_PMC2, r4 mtspr SPRN_PMC3, r5 mtspr SPRN_PMC4, r6 mtspr SPRN_PMC5, r8 mtspr SPRN_PMC6, r9 +BEGIN_FTR_SECTION + mtspr SPRN_PMC7, r10 + mtspr SPRN_PMC8, r11 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) ld r3, HSTATE_MMCR(r13) ld r4, HSTATE_MMCR + 8(r13) ld r5, HSTATE_MMCR + 16(r13) @@ -802,7 +1018,7 @@ hdec_soon: cmpwi r12, BOOK3S_INTERRUPT_MACHINE_CHECK /* RFI into the highmem handler, or branch to interrupt handler */ - mfmsr r6 +12: mfmsr r6 mtctr r12 li r0, MSR_RI andc r6, r6, r0 @@ -812,7 +1028,11 @@ hdec_soon: beqctr RFI -11: mtspr SPRN_HSRR0, r8 +11: +BEGIN_FTR_SECTION + b 12b +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) + mtspr SPRN_HSRR0, r8 mtspr SPRN_HSRR1, r7 ba 0x500 diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 72c506505fa4..a107c9be0fb1 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -213,6 +213,9 @@ int kvm_dev_ioctl_check_extension(long ext) break; case KVM_CAP_PPC_RMA: r = 1; + /* PPC970 requires an RMA */ + if (cpu_has_feature(CPU_FTR_ARCH_201)) + r = 2; break; #endif default: diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index b44f5f803052..90039bc64119 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -37,7 +37,7 @@ #define HPTE_LOCK_BIT 3 -static DEFINE_RAW_SPINLOCK(native_tlbie_lock); +DEFINE_RAW_SPINLOCK(native_tlbie_lock); static inline void __tlbie(unsigned long va, int psize, int ssize) { -- cgit v1.2.3 From 91b191c71eae79c0e652c52a968d06cd96b539c5 Mon Sep 17 00:00:00 2001 From: Dave Kleikamp Date: Mon, 4 Jul 2011 18:38:03 +0000 Subject: powerpc/44x: don't use tlbivax on AMP systems Since other OS's may be running on the other cores don't use tlbivax Signed-off-by: Dave Kleikamp Signed-off-by: Tony Breeds Cc: Benjamin Herrenschmidt Cc: Josh Boyer Cc: linuxppc-dev@lists.ozlabs.org Signed-off-by: Josh Boyer --- arch/powerpc/include/asm/mmu.h | 7 ++++++- arch/powerpc/kernel/setup_32.c | 2 ++ arch/powerpc/mm/tlb_hash32.c | 4 ++++ arch/powerpc/mm/tlb_nohash.c | 19 +++++++++++++++++++ 4 files changed, 31 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index 4138b21ae80a..bb5591f424b3 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -120,9 +120,14 @@ static inline int mmu_has_feature(unsigned long feature) return (cur_cpu_spec->mmu_features & feature); } +static inline void mmu_clear_feature(unsigned long feature) +{ + cur_cpu_spec->mmu_features &= ~feature; +} + extern unsigned int __start___mmu_ftr_fixup, __stop___mmu_ftr_fixup; -/* MMU initialization (64-bit only fo now) */ +/* MMU initialization */ extern void early_init_mmu(void); extern void early_init_mmu_secondary(void); diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 1d2fbc905303..7544e39b4156 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -126,6 +126,8 @@ notrace void __init machine_init(unsigned long dt_ptr) /* Do some early initialization based on the flat device tree */ early_init_devtree(__va(dt_ptr)); + early_init_mmu(); + probe_machine(); setup_kdump_trampoline(); diff --git a/arch/powerpc/mm/tlb_hash32.c b/arch/powerpc/mm/tlb_hash32.c index 27b863c14941..9a445f64accd 100644 --- a/arch/powerpc/mm/tlb_hash32.c +++ b/arch/powerpc/mm/tlb_hash32.c @@ -177,3 +177,7 @@ void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, flush_range(vma->vm_mm, start, end); } EXPORT_SYMBOL(flush_tlb_range); + +void __init early_init_mmu(void) +{ +} diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c index 3722185d1865..7355211b93b3 100644 --- a/arch/powerpc/mm/tlb_nohash.c +++ b/arch/powerpc/mm/tlb_nohash.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -266,6 +267,17 @@ EXPORT_SYMBOL(flush_tlb_page); #endif /* CONFIG_SMP */ +#ifdef CONFIG_PPC_47x +void __init early_init_mmu_47x(void) +{ +#ifdef CONFIG_SMP + unsigned long root = of_get_flat_dt_root(); + if (of_get_flat_dt_prop(root, "cooperative-partition", NULL)) + mmu_clear_feature(MMU_FTR_USE_TLBIVAX_BCAST); +#endif /* CONFIG_SMP */ +} +#endif /* CONFIG_PPC_47x */ + /* * Flush kernel TLB entries in the given range */ @@ -593,4 +605,11 @@ void setup_initial_memory_limit(phys_addr_t first_memblock_base, /* Finally limit subsequent allocations */ memblock_set_current_limit(first_memblock_base + ppc64_rma_size); } +#else /* ! CONFIG_PPC64 */ +void __init early_init_mmu(void) +{ +#ifdef CONFIG_PPC_47x + early_init_mmu_47x(); +#endif +} #endif /* CONFIG_PPC64 */ -- cgit v1.2.3 From 9661534d6ad75f881d8bbeb73dd7372f41cbad99 Mon Sep 17 00:00:00 2001 From: Dave Kleikamp Date: Mon, 4 Jul 2011 18:36:57 +0000 Subject: powerpc/47x: allow kernel to be loaded in higher physical memory The 44x code (which is shared by 47x) assumes the available physical memory begins at 0x00000000. This is not necessarily the case in an AMP environment. Support CONFIG_RELOCATABLE for 476 in order to allow the kernel to be loaded into a higher memory range. Signed-off-by: Tony Breeds Signed-off-by: Dave Kleikamp Cc: Benjamin Herrenschmidt Cc: Josh Boyer Cc: linuxppc-dev@lists.ozlabs.org Signed-off-by: Josh Boyer --- arch/powerpc/Kconfig | 2 +- arch/powerpc/configs/44x/iss476-smp_defconfig | 6 ++-- arch/powerpc/kernel/head_44x.S | 42 ++++++++++++++++++++++----- arch/powerpc/mm/44x_mmu.c | 13 +++++++-- 4 files changed, 48 insertions(+), 15 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index c15f2e61e4fd..f8e578be5806 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -842,7 +842,7 @@ config LOWMEM_CAM_NUM config RELOCATABLE bool "Build a relocatable kernel (EXPERIMENTAL)" - depends on EXPERIMENTAL && ADVANCED_OPTIONS && FLATMEM && FSL_BOOKE + depends on EXPERIMENTAL && ADVANCED_OPTIONS && FLATMEM && (FSL_BOOKE || PPC_47x) help This builds a kernel image that is capable of running at the location the kernel is loaded at (some alignment restrictions may diff --git a/arch/powerpc/configs/44x/iss476-smp_defconfig b/arch/powerpc/configs/44x/iss476-smp_defconfig index 92f863ac8443..a6eb6ad05b2d 100644 --- a/arch/powerpc/configs/44x/iss476-smp_defconfig +++ b/arch/powerpc/configs/44x/iss476-smp_defconfig @@ -3,8 +3,8 @@ CONFIG_SMP=y CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y +CONFIG_SPARSE_IRQ=y CONFIG_LOG_BUF_SHIFT=14 -CONFIG_SYSFS_DEPRECATED_V2=y CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y @@ -21,10 +21,11 @@ CONFIG_ISS4xx=y CONFIG_HZ_100=y CONFIG_MATH_EMULATION=y CONFIG_IRQ_ALL_CPUS=y -CONFIG_SPARSE_IRQ=y CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="root=/dev/issblk0" # CONFIG_PCI is not set +CONFIG_ADVANCED_OPTIONS=y +CONFIG_RELOCATABLE=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y @@ -67,7 +68,6 @@ CONFIG_EXT3_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_EXT3_FS_POSIX_ACL=y CONFIG_EXT3_FS_SECURITY=y -CONFIG_INOTIFY=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_CRAMFS=y diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index 5e12b741ba5f..f8e971ba94f5 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -93,6 +93,30 @@ _ENTRY(_start); bl early_init +#ifdef CONFIG_RELOCATABLE + /* + * r25 will contain RPN/ERPN for the start address of memory + * + * Add the difference between KERNELBASE and PAGE_OFFSET to the + * start of physical memory to get kernstart_addr. + */ + lis r3,kernstart_addr@ha + la r3,kernstart_addr@l(r3) + + lis r4,KERNELBASE@h + ori r4,r4,KERNELBASE@l + lis r5,PAGE_OFFSET@h + ori r5,r5,PAGE_OFFSET@l + subf r4,r5,r4 + + rlwinm r6,r25,0,28,31 /* ERPN */ + rlwinm r7,r25,0,0,3 /* RPN - assuming 256 MB page size */ + add r7,r7,r4 + + stw r6,0(r3) + stw r7,4(r3) +#endif + /* * Decide what sort of machine this is and initialize the MMU. */ @@ -1001,9 +1025,6 @@ clear_utlb_entry: lis r3,PAGE_OFFSET@h ori r3,r3,PAGE_OFFSET@l - /* Kernel is at the base of RAM */ - li r4, 0 /* Load the kernel physical address */ - /* Load the kernel PID = 0 */ li r0,0 mtspr SPRN_PID,r0 @@ -1013,9 +1034,8 @@ clear_utlb_entry: clrrwi r3,r3,12 /* Mask off the effective page number */ ori r3,r3,PPC47x_TLB0_VALID | PPC47x_TLB0_256M - /* Word 1 */ - clrrwi r4,r4,12 /* Mask off the real page number */ - /* ERPN is 0 for first 4GB page */ + /* Word 1 - use r25. RPN is the same as the original entry */ + /* Word 2 */ li r5,0 ori r5,r5,PPC47x_TLB2_S_RWX @@ -1026,7 +1046,7 @@ clear_utlb_entry: /* We write to way 0 and bolted 0 */ lis r0,0x8800 tlbwe r3,r0,0 - tlbwe r4,r0,1 + tlbwe r25,r0,1 tlbwe r5,r0,2 /* @@ -1124,7 +1144,13 @@ head_start_common: lis r4,interrupt_base@h /* IVPR only uses the high 16-bits */ mtspr SPRN_IVPR,r4 - addis r22,r22,KERNELBASE@h + /* + * If the kernel was loaded at a non-zero 256 MB page, we need to + * mask off the most significant 4 bits to get the relative address + * from the start of physical memory + */ + rlwinm r22,r22,0,4,31 + addis r22,r22,PAGE_OFFSET@h mtlr r22 isync blr diff --git a/arch/powerpc/mm/44x_mmu.c b/arch/powerpc/mm/44x_mmu.c index 024acab588fd..f60e006d90c3 100644 --- a/arch/powerpc/mm/44x_mmu.c +++ b/arch/powerpc/mm/44x_mmu.c @@ -186,10 +186,11 @@ void __init MMU_init_hw(void) unsigned long __init mmu_mapin_ram(unsigned long top) { unsigned long addr; + unsigned long memstart = memstart_addr & ~(PPC_PIN_SIZE - 1); /* Pin in enough TLBs to cover any lowmem not covered by the * initial 256M mapping established in head_44x.S */ - for (addr = PPC_PIN_SIZE; addr < lowmem_end_addr; + for (addr = memstart + PPC_PIN_SIZE; addr < lowmem_end_addr; addr += PPC_PIN_SIZE) { if (mmu_has_feature(MMU_FTR_TYPE_47x)) ppc47x_pin_tlb(addr + PAGE_OFFSET, addr); @@ -218,19 +219,25 @@ unsigned long __init mmu_mapin_ram(unsigned long top) void setup_initial_memory_limit(phys_addr_t first_memblock_base, phys_addr_t first_memblock_size) { + u64 size; + +#ifndef CONFIG_RELOCATABLE /* We don't currently support the first MEMBLOCK not mapping 0 * physical on those processors */ BUG_ON(first_memblock_base != 0); +#endif /* 44x has a 256M TLB entry pinned at boot */ - memblock_set_current_limit(min_t(u64, first_memblock_size, PPC_PIN_SIZE)); + size = (min_t(u64, first_memblock_size, PPC_PIN_SIZE)); + memblock_set_current_limit(first_memblock_base + size); } #ifdef CONFIG_SMP void __cpuinit mmu_init_secondary(int cpu) { unsigned long addr; + unsigned long memstart = memstart_addr & ~(PPC_PIN_SIZE - 1); /* Pin in enough TLBs to cover any lowmem not covered by the * initial 256M mapping established in head_44x.S @@ -241,7 +248,7 @@ void __cpuinit mmu_init_secondary(int cpu) * stack. current (r2) isn't initialized, smp_processor_id() * will not work, current thread info isn't accessible, ... */ - for (addr = PPC_PIN_SIZE; addr < lowmem_end_addr; + for (addr = memstart + PPC_PIN_SIZE; addr < lowmem_end_addr; addr += PPC_PIN_SIZE) { if (mmu_has_feature(MMU_FTR_TYPE_47x)) ppc47x_pin_tlb(addr + PAGE_OFFSET, addr); -- cgit v1.2.3 From f7ba2991e9063fa8cf668ee57c08e2842ad04f11 Mon Sep 17 00:00:00 2001 From: Tony Breeds Date: Mon, 4 Jul 2011 18:44:19 +0000 Subject: powerpc/mm: Fix output of total_ram. On 32bit platforms that support >= 4GB memory total_ram was truncated. This creates a confusing printk: Top of RAM: 0x100000000, Total RAM: 0x0 Fix that: Top of RAM: 0x100000000, Total RAM: 0x100000000 Signed-off-by: Tony Breeds Acked-by: Josh Boyer Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/mm/mem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 457dc84154ba..c781bbcf7338 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -249,7 +249,7 @@ static int __init mark_nonram_nosave(void) */ void __init paging_init(void) { - unsigned long total_ram = memblock_phys_mem_size(); + unsigned long long total_ram = memblock_phys_mem_size(); phys_addr_t top_of_ram = memblock_end_of_DRAM(); unsigned long max_zone_pfns[MAX_NR_ZONES]; @@ -269,7 +269,7 @@ void __init paging_init(void) kmap_prot = PAGE_KERNEL; #endif /* CONFIG_HIGHMEM */ - printk(KERN_DEBUG "Top of RAM: 0x%llx, Total RAM: 0x%lx\n", + printk(KERN_DEBUG "Top of RAM: 0x%llx, Total RAM: 0x%llx\n", (unsigned long long)top_of_ram, total_ram); printk(KERN_DEBUG "Memory hole size: %ldMB\n", (long int)((top_of_ram - total_ram) >> 20)); -- cgit v1.2.3 From 41151e77a4d96ea138cede6d84c955aa4769ce74 Mon Sep 17 00:00:00 2001 From: Becky Bruce Date: Tue, 28 Jun 2011 09:54:48 +0000 Subject: powerpc: Hugetlb for BookE Enable hugepages on Freescale BookE processors. This allows the kernel to use huge TLB entries to map pages, which can greatly reduce the number of TLB misses and the amount of TLB thrashing experienced by applications with large memory footprints. Care should be taken when using this on FSL processors, as the number of large TLB entries supported by the core is low (16-64) on current processors. The supported set of hugepage sizes include 4m, 16m, 64m, 256m, and 1g. Page sizes larger than the max zone size are called "gigantic" pages and must be allocated on the command line (and cannot be deallocated). This is currently only fully implemented for Freescale 32-bit BookE processors, but there is some infrastructure in the code for 64-bit BooKE. Signed-off-by: Becky Bruce Signed-off-by: David Gibson Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/Kconfig | 3 +- arch/powerpc/include/asm/hugetlb.h | 63 +++++- arch/powerpc/include/asm/mmu-book3e.h | 7 + arch/powerpc/include/asm/mmu-hash64.h | 3 +- arch/powerpc/include/asm/mmu.h | 18 +- arch/powerpc/include/asm/page.h | 31 ++- arch/powerpc/include/asm/page_64.h | 11 - arch/powerpc/include/asm/pte-book3e.h | 3 + arch/powerpc/kernel/head_fsl_booke.S | 133 ++++++++++-- arch/powerpc/mm/Makefile | 1 + arch/powerpc/mm/hash_utils_64.c | 3 - arch/powerpc/mm/hugetlbpage-book3e.c | 121 +++++++++++ arch/powerpc/mm/hugetlbpage.c | 379 +++++++++++++++++++++++++++++---- arch/powerpc/mm/init_32.c | 9 + arch/powerpc/mm/mem.c | 5 + arch/powerpc/mm/mmu_context_nohash.c | 5 + arch/powerpc/mm/pgtable.c | 3 +- arch/powerpc/mm/tlb_low_64e.S | 24 +-- arch/powerpc/mm/tlb_nohash.c | 46 +++- arch/powerpc/platforms/Kconfig.cputype | 4 +- 20 files changed, 766 insertions(+), 106 deletions(-) create mode 100644 arch/powerpc/mm/hugetlbpage-book3e.c (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 0a3d5560c9be..62711421cd64 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -429,8 +429,7 @@ config ARCH_POPULATES_NODE_MAP def_bool y config SYS_SUPPORTS_HUGETLBFS - def_bool y - depends on PPC_BOOK3S_64 + bool source "mm/Kconfig" diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h index 5856a66ab404..86004930a78e 100644 --- a/arch/powerpc/include/asm/hugetlb.h +++ b/arch/powerpc/include/asm/hugetlb.h @@ -1,15 +1,60 @@ #ifndef _ASM_POWERPC_HUGETLB_H #define _ASM_POWERPC_HUGETLB_H +#ifdef CONFIG_HUGETLB_PAGE #include +extern struct kmem_cache *hugepte_cache; +extern void __init reserve_hugetlb_gpages(void); + +static inline pte_t *hugepd_page(hugepd_t hpd) +{ + BUG_ON(!hugepd_ok(hpd)); + return (pte_t *)((hpd.pd & ~HUGEPD_SHIFT_MASK) | PD_HUGE); +} + +static inline unsigned int hugepd_shift(hugepd_t hpd) +{ + return hpd.pd & HUGEPD_SHIFT_MASK; +} + +static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr, + unsigned pdshift) +{ + /* + * On 32-bit, we have multiple higher-level table entries that point to + * the same hugepte. Just use the first one since they're all + * identical. So for that case, idx=0. + */ + unsigned long idx = 0; + + pte_t *dir = hugepd_page(*hpdp); +#ifdef CONFIG_PPC64 + idx = (addr & ((1UL << pdshift) - 1)) >> hugepd_shift(*hpdp); +#endif + + return dir + idx; +} + pte_t *huge_pte_offset_and_shift(struct mm_struct *mm, unsigned long addr, unsigned *shift); void flush_dcache_icache_hugepage(struct page *page); +#if defined(CONFIG_PPC_MM_SLICES) || defined(CONFIG_PPC_SUBPAGE_PROT) int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, unsigned long len); +#else +static inline int is_hugepage_only_range(struct mm_struct *mm, + unsigned long addr, + unsigned long len) +{ + return 0; +} +#endif + +void book3e_hugetlb_preload(struct mm_struct *mm, unsigned long ea, pte_t pte); +void flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr); void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr, unsigned long end, unsigned long floor, @@ -50,8 +95,11 @@ static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - unsigned long old = pte_update(mm, addr, ptep, ~0UL, 1); - return __pte(old); +#ifdef CONFIG_PPC64 + return __pte(pte_update(mm, addr, ptep, ~0UL, 1)); +#else + return __pte(pte_update(ptep, ~0UL, 0)); +#endif } static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, @@ -93,4 +141,15 @@ static inline void arch_release_hugepage(struct page *page) { } +#else /* ! CONFIG_HUGETLB_PAGE */ +static inline void reserve_hugetlb_gpages(void) +{ + pr_err("Cannot reserve gpages without hugetlb enabled\n"); +} +static inline void flush_hugetlb_page(struct vm_area_struct *vma, + unsigned long vmaddr) +{ +} +#endif + #endif /* _ASM_POWERPC_HUGETLB_H */ diff --git a/arch/powerpc/include/asm/mmu-book3e.h b/arch/powerpc/include/asm/mmu-book3e.h index 3ea0f9a259d8..0260ea5ec3c2 100644 --- a/arch/powerpc/include/asm/mmu-book3e.h +++ b/arch/powerpc/include/asm/mmu-book3e.h @@ -66,6 +66,7 @@ #define MAS2_M 0x00000004 #define MAS2_G 0x00000002 #define MAS2_E 0x00000001 +#define MAS2_WIMGE_MASK 0x0000001f #define MAS2_EPN_MASK(size) (~0 << (size + 10)) #define MAS2_VAL(addr, size, flags) ((addr) & MAS2_EPN_MASK(size) | (flags)) @@ -80,6 +81,7 @@ #define MAS3_SW 0x00000004 #define MAS3_UR 0x00000002 #define MAS3_SR 0x00000001 +#define MAS3_BAP_MASK 0x0000003f #define MAS3_SPSIZE 0x0000003e #define MAS3_SPSIZE_SHIFT 1 @@ -212,6 +214,11 @@ typedef struct { unsigned int id; unsigned int active; unsigned long vdso_base; +#ifdef CONFIG_PPC_MM_SLICES + u64 low_slices_psize; /* SLB page size encodings */ + u64 high_slices_psize; /* 4 bits per slice for now */ + u16 user_psize; /* page size index */ +#endif } mm_context_t; /* Page size definitions, common between 32 and 64-bit diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h index b445e0af4c2b..db645ec842bd 100644 --- a/arch/powerpc/include/asm/mmu-hash64.h +++ b/arch/powerpc/include/asm/mmu-hash64.h @@ -262,8 +262,7 @@ extern void hash_failure_debug(unsigned long ea, unsigned long access, extern int htab_bolt_mapping(unsigned long vstart, unsigned long vend, unsigned long pstart, unsigned long prot, int psize, int ssize); -extern void add_gpage(unsigned long addr, unsigned long page_size, - unsigned long number_of_pages); +extern void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages); extern void demote_segment_4k(struct mm_struct *mm, unsigned long addr); extern void hpte_init_native(void); diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index 698b30638681..f0145522cfba 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -175,14 +175,16 @@ extern u64 ppc64_rma_size; #define MMU_PAGE_64K_AP 3 /* "Admixed pages" (hash64 only) */ #define MMU_PAGE_256K 4 #define MMU_PAGE_1M 5 -#define MMU_PAGE_8M 6 -#define MMU_PAGE_16M 7 -#define MMU_PAGE_256M 8 -#define MMU_PAGE_1G 9 -#define MMU_PAGE_16G 10 -#define MMU_PAGE_64G 11 -#define MMU_PAGE_COUNT 12 - +#define MMU_PAGE_4M 6 +#define MMU_PAGE_8M 7 +#define MMU_PAGE_16M 8 +#define MMU_PAGE_64M 9 +#define MMU_PAGE_256M 10 +#define MMU_PAGE_1G 11 +#define MMU_PAGE_16G 12 +#define MMU_PAGE_64G 13 + +#define MMU_PAGE_COUNT 14 #if defined(CONFIG_PPC_STD_MMU_64) /* 64-bit classic hash table MMU */ diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index 2cd664ef0a5e..dd9c4fd038e0 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -36,6 +36,18 @@ #define PAGE_SIZE (ASM_CONST(1) << PAGE_SHIFT) +#ifndef __ASSEMBLY__ +#ifdef CONFIG_HUGETLB_PAGE +extern unsigned int HPAGE_SHIFT; +#else +#define HPAGE_SHIFT PAGE_SHIFT +#endif +#define HPAGE_SIZE ((1UL) << HPAGE_SHIFT) +#define HPAGE_MASK (~(HPAGE_SIZE - 1)) +#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) +#define HUGE_MAX_HSTATE (MMU_PAGE_COUNT-1) +#endif + /* We do define AT_SYSINFO_EHDR but don't use the gate mechanism */ #define __HAVE_ARCH_GATE_AREA 1 @@ -158,6 +170,24 @@ extern phys_addr_t kernstart_addr; #define is_kernel_addr(x) ((x) >= PAGE_OFFSET) #endif +/* + * Use the top bit of the higher-level page table entries to indicate whether + * the entries we point to contain hugepages. This works because we know that + * the page tables live in kernel space. If we ever decide to support having + * page tables at arbitrary addresses, this breaks and will have to change. + */ +#ifdef CONFIG_PPC64 +#define PD_HUGE 0x8000000000000000 +#else +#define PD_HUGE 0x80000000 +#endif + +/* + * Some number of bits at the level of the page table that points to + * a hugepte are used to encode the size. This masks those bits. + */ +#define HUGEPD_SHIFT_MASK 0x3f + #ifndef __ASSEMBLY__ #undef STRICT_MM_TYPECHECKS @@ -243,7 +273,6 @@ typedef unsigned long pgprot_t; #endif typedef struct { signed long pd; } hugepd_t; -#define HUGEPD_SHIFT_MASK 0x3f #ifdef CONFIG_HUGETLB_PAGE static inline int hugepd_ok(hugepd_t hpd) diff --git a/arch/powerpc/include/asm/page_64.h b/arch/powerpc/include/asm/page_64.h index 9356262fd3cc..fb40ede6bc0d 100644 --- a/arch/powerpc/include/asm/page_64.h +++ b/arch/powerpc/include/asm/page_64.h @@ -64,17 +64,6 @@ extern void copy_page(void *to, void *from); /* Log 2 of page table size */ extern u64 ppc64_pft_size; -/* Large pages size */ -#ifdef CONFIG_HUGETLB_PAGE -extern unsigned int HPAGE_SHIFT; -#else -#define HPAGE_SHIFT PAGE_SHIFT -#endif -#define HPAGE_SIZE ((1UL) << HPAGE_SHIFT) -#define HPAGE_MASK (~(HPAGE_SIZE - 1)) -#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) -#define HUGE_MAX_HSTATE (MMU_PAGE_COUNT-1) - #endif /* __ASSEMBLY__ */ #ifdef CONFIG_PPC_MM_SLICES diff --git a/arch/powerpc/include/asm/pte-book3e.h b/arch/powerpc/include/asm/pte-book3e.h index 082d515930a2..0156702ba24e 100644 --- a/arch/powerpc/include/asm/pte-book3e.h +++ b/arch/powerpc/include/asm/pte-book3e.h @@ -72,6 +72,9 @@ #define PTE_RPN_SHIFT (24) #endif +#define PTE_WIMGE_SHIFT (19) +#define PTE_BAP_SHIFT (2) + /* On 32-bit, we never clear the top part of the PTE */ #ifdef CONFIG_PPC32 #define _PTE_NONE_MASK 0xffffffff00000000ULL diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 50845924b7d9..4ea9bfbf67e9 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -236,8 +236,24 @@ _ENTRY(__early_start) * if we find the pte (fall through): * r11 is low pte word * r12 is pointer to the pte + * r10 is the pshift from the PGD, if we're a hugepage */ #ifdef CONFIG_PTE_64BIT +#ifdef CONFIG_HUGETLB_PAGE +#define FIND_PTE \ + rlwinm r12, r10, 13, 19, 29; /* Compute pgdir/pmd offset */ \ + lwzx r11, r12, r11; /* Get pgd/pmd entry */ \ + rlwinm. r12, r11, 0, 0, 20; /* Extract pt base address */ \ + blt 1000f; /* Normal non-huge page */ \ + beq 2f; /* Bail if no table */ \ + oris r11, r11, PD_HUGE@h; /* Put back address bit */ \ + andi. r10, r11, HUGEPD_SHIFT_MASK@l; /* extract size field */ \ + xor r12, r10, r11; /* drop size bits from pointer */ \ + b 1001f; \ +1000: rlwimi r12, r10, 23, 20, 28; /* Compute pte address */ \ + li r10, 0; /* clear r10 */ \ +1001: lwz r11, 4(r12); /* Get pte entry */ +#else #define FIND_PTE \ rlwinm r12, r10, 13, 19, 29; /* Compute pgdir/pmd offset */ \ lwzx r11, r12, r11; /* Get pgd/pmd entry */ \ @@ -245,7 +261,8 @@ _ENTRY(__early_start) beq 2f; /* Bail if no table */ \ rlwimi r12, r10, 23, 20, 28; /* Compute pte address */ \ lwz r11, 4(r12); /* Get pte entry */ -#else +#endif /* HUGEPAGE */ +#else /* !PTE_64BIT */ #define FIND_PTE \ rlwimi r11, r10, 12, 20, 29; /* Create L1 (pgdir/pmd) address */ \ lwz r11, 0(r11); /* Get L1 entry */ \ @@ -402,8 +419,8 @@ interrupt_base: #ifdef CONFIG_PTE_64BIT #ifdef CONFIG_SMP - subf r10,r11,r12 /* create false data dep */ - lwzx r13,r11,r10 /* Get upper pte bits */ + subf r13,r11,r12 /* create false data dep */ + lwzx r13,r11,r13 /* Get upper pte bits */ #else lwz r13,0(r12) /* Get upper pte bits */ #endif @@ -483,8 +500,8 @@ interrupt_base: #ifdef CONFIG_PTE_64BIT #ifdef CONFIG_SMP - subf r10,r11,r12 /* create false data dep */ - lwzx r13,r11,r10 /* Get upper pte bits */ + subf r13,r11,r12 /* create false data dep */ + lwzx r13,r11,r13 /* Get upper pte bits */ #else lwz r13,0(r12) /* Get upper pte bits */ #endif @@ -548,7 +565,7 @@ interrupt_base: /* * Both the instruction and data TLB miss get to this * point to load the TLB. - * r10 - available to use + * r10 - tsize encoding (if HUGETLB_PAGE) or available to use * r11 - TLB (info from Linux PTE) * r12 - available to use * r13 - upper bits of PTE (if PTE_64BIT) or available to use @@ -558,21 +575,73 @@ interrupt_base: * Upon exit, we reload everything and RFI. */ finish_tlb_load: +#ifdef CONFIG_HUGETLB_PAGE + cmpwi 6, r10, 0 /* check for huge page */ + beq 6, finish_tlb_load_cont /* !huge */ + + /* Alas, we need more scratch registers for hugepages */ + mfspr r12, SPRN_SPRG_THREAD + stw r14, THREAD_NORMSAVE(4)(r12) + stw r15, THREAD_NORMSAVE(5)(r12) + stw r16, THREAD_NORMSAVE(6)(r12) + stw r17, THREAD_NORMSAVE(7)(r12) + + /* Get the next_tlbcam_idx percpu var */ +#ifdef CONFIG_SMP + lwz r12, THREAD_INFO-THREAD(r12) + lwz r15, TI_CPU(r12) + lis r14, __per_cpu_offset@h + ori r14, r14, __per_cpu_offset@l + rlwinm r15, r15, 2, 0, 29 + lwzx r16, r14, r15 +#else + li r16, 0 +#endif + lis r17, next_tlbcam_idx@h + ori r17, r17, next_tlbcam_idx@l + add r17, r17, r16 /* r17 = *next_tlbcam_idx */ + lwz r15, 0(r17) /* r15 = next_tlbcam_idx */ + + lis r14, MAS0_TLBSEL(1)@h /* select TLB1 (TLBCAM) */ + rlwimi r14, r15, 16, 4, 15 /* next_tlbcam_idx entry */ + mtspr SPRN_MAS0, r14 + + /* Extract TLB1CFG(NENTRY) */ + mfspr r16, SPRN_TLB1CFG + andi. r16, r16, 0xfff + + /* Update next_tlbcam_idx, wrapping when necessary */ + addi r15, r15, 1 + cmpw r15, r16 + blt 100f + lis r14, tlbcam_index@h + ori r14, r14, tlbcam_index@l + lwz r15, 0(r14) +100: stw r15, 0(r17) + + /* + * Calc MAS1_TSIZE from r10 (which has pshift encoded) + * tlb_enc = (pshift - 10). + */ + subi r15, r10, 10 + mfspr r16, SPRN_MAS1 + rlwimi r16, r15, 7, 20, 24 + mtspr SPRN_MAS1, r16 + + /* copy the pshift for use later */ + mr r14, r10 + + /* fall through */ + +#endif /* CONFIG_HUGETLB_PAGE */ + /* * We set execute, because we don't have the granularity to * properly set this at the page level (Linux problem). * Many of these bits are software only. Bits we don't set * here we (properly should) assume have the appropriate value. */ - - mfspr r12, SPRN_MAS2 -#ifdef CONFIG_PTE_64BIT - rlwimi r12, r11, 32-19, 27, 31 /* extract WIMGE from pte */ -#else - rlwimi r12, r11, 26, 27, 31 /* extract WIMGE from pte */ -#endif - mtspr SPRN_MAS2, r12 - +finish_tlb_load_cont: #ifdef CONFIG_PTE_64BIT rlwinm r12, r11, 32-2, 26, 31 /* Move in perm bits */ andi. r10, r11, _PAGE_DIRTY @@ -581,22 +650,40 @@ finish_tlb_load: andc r12, r12, r10 1: rlwimi r12, r13, 20, 0, 11 /* grab RPN[32:43] */ rlwimi r12, r11, 20, 12, 19 /* grab RPN[44:51] */ - mtspr SPRN_MAS3, r12 +2: mtspr SPRN_MAS3, r12 BEGIN_MMU_FTR_SECTION srwi r10, r13, 12 /* grab RPN[12:31] */ mtspr SPRN_MAS7, r10 END_MMU_FTR_SECTION_IFSET(MMU_FTR_BIG_PHYS) #else li r10, (_PAGE_EXEC | _PAGE_PRESENT) + mr r13, r11 rlwimi r10, r11, 31, 29, 29 /* extract _PAGE_DIRTY into SW */ and r12, r11, r10 andi. r10, r11, _PAGE_USER /* Test for _PAGE_USER */ slwi r10, r12, 1 or r10, r10, r12 iseleq r12, r12, r10 - rlwimi r11, r12, 0, 20, 31 /* Extract RPN from PTE and merge with perms */ - mtspr SPRN_MAS3, r11 + rlwimi r13, r12, 0, 20, 31 /* Get RPN from PTE, merge w/ perms */ + mtspr SPRN_MAS3, r13 #endif + + mfspr r12, SPRN_MAS2 +#ifdef CONFIG_PTE_64BIT + rlwimi r12, r11, 32-19, 27, 31 /* extract WIMGE from pte */ +#else + rlwimi r12, r11, 26, 27, 31 /* extract WIMGE from pte */ +#endif +#ifdef CONFIG_HUGETLB_PAGE + beq 6, 3f /* don't mask if page isn't huge */ + li r13, 1 + slw r13, r13, r14 + subi r13, r13, 1 + rlwinm r13, r13, 0, 0, 19 /* bottom bits used for WIMGE/etc */ + andc r12, r12, r13 /* mask off ea bits within the page */ +#endif +3: mtspr SPRN_MAS2, r12 + #ifdef CONFIG_E200 /* Round robin TLB1 entries assignment */ mfspr r12, SPRN_MAS0 @@ -622,11 +709,19 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_BIG_PHYS) mtspr SPRN_MAS0,r12 #endif /* CONFIG_E200 */ +tlb_write_entry: tlbwe /* Done...restore registers and get out of here. */ mfspr r10, SPRN_SPRG_THREAD - lwz r11, THREAD_NORMSAVE(3)(r10) +#ifdef CONFIG_HUGETLB_PAGE + beq 6, 8f /* skip restore for 4k page faults */ + lwz r14, THREAD_NORMSAVE(4)(r10) + lwz r15, THREAD_NORMSAVE(5)(r10) + lwz r16, THREAD_NORMSAVE(6)(r10) + lwz r17, THREAD_NORMSAVE(7)(r10) +#endif +8: lwz r11, THREAD_NORMSAVE(3)(r10) mtcr r11 lwz r13, THREAD_NORMSAVE(2)(r10) lwz r12, THREAD_NORMSAVE(1)(r10) diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index bdca46e08382..991ee813d2a8 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -29,6 +29,7 @@ obj-$(CONFIG_PPC_MM_SLICES) += slice.o ifeq ($(CONFIG_HUGETLB_PAGE),y) obj-y += hugetlbpage.o obj-$(CONFIG_PPC_STD_MMU_64) += hugetlbpage-hash64.o +obj-$(CONFIG_PPC_BOOK3E_MMU) += hugetlbpage-book3e.o endif obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage-prot.o obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 26b2872b3d00..1f8b2a05e3d0 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -105,9 +105,6 @@ int mmu_kernel_ssize = MMU_SEGSIZE_256M; int mmu_highuser_ssize = MMU_SEGSIZE_256M; u16 mmu_slb_size = 64; EXPORT_SYMBOL_GPL(mmu_slb_size); -#ifdef CONFIG_HUGETLB_PAGE -unsigned int HPAGE_SHIFT; -#endif #ifdef CONFIG_PPC_64K_PAGES int mmu_ci_restrictions; #endif diff --git a/arch/powerpc/mm/hugetlbpage-book3e.c b/arch/powerpc/mm/hugetlbpage-book3e.c new file mode 100644 index 000000000000..1295b7c1cdac --- /dev/null +++ b/arch/powerpc/mm/hugetlbpage-book3e.c @@ -0,0 +1,121 @@ +/* + * PPC Huge TLB Page Support for Book3E MMU + * + * Copyright (C) 2009 David Gibson, IBM Corporation. + * Copyright (C) 2011 Becky Bruce, Freescale Semiconductor + * + */ +#include +#include + +static inline int mmu_get_tsize(int psize) +{ + return mmu_psize_defs[psize].enc; +} + +static inline int book3e_tlb_exists(unsigned long ea, unsigned long pid) +{ + int found = 0; + + mtspr(SPRN_MAS6, pid << 16); + if (mmu_has_feature(MMU_FTR_USE_TLBRSRV)) { + asm volatile( + "li %0,0\n" + "tlbsx. 0,%1\n" + "bne 1f\n" + "li %0,1\n" + "1:\n" + : "=&r"(found) : "r"(ea)); + } else { + asm volatile( + "tlbsx 0,%1\n" + "mfspr %0,0x271\n" + "srwi %0,%0,31\n" + : "=&r"(found) : "r"(ea)); + } + + return found; +} + +void book3e_hugetlb_preload(struct mm_struct *mm, unsigned long ea, pte_t pte) +{ + unsigned long mas1, mas2; + u64 mas7_3; + unsigned long psize, tsize, shift; + unsigned long flags; + +#ifdef CONFIG_PPC_FSL_BOOK3E + int index, lz, ncams; + struct vm_area_struct *vma; +#endif + + if (unlikely(is_kernel_addr(ea))) + return; + +#ifdef CONFIG_MM_SLICES + psize = mmu_get_tsize(get_slice_psize(mm, ea)); + tsize = mmu_get_psize(psize); + shift = mmu_psize_defs[psize].shift; +#else + vma = find_vma(mm, ea); + psize = vma_mmu_pagesize(vma); /* returns actual size in bytes */ + asm (PPC_CNTLZL "%0,%1" : "=r" (lz) : "r" (psize)); + shift = 31 - lz; + tsize = 21 - lz; +#endif + + /* + * We can't be interrupted while we're setting up the MAS + * regusters or after we've confirmed that no tlb exists. + */ + local_irq_save(flags); + + if (unlikely(book3e_tlb_exists(ea, mm->context.id))) { + local_irq_restore(flags); + return; + } + +#ifdef CONFIG_PPC_FSL_BOOK3E + ncams = mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY; + + /* We have to use the CAM(TLB1) on FSL parts for hugepages */ + index = __get_cpu_var(next_tlbcam_idx); + mtspr(SPRN_MAS0, MAS0_ESEL(index) | MAS0_TLBSEL(1)); + + /* Just round-robin the entries and wrap when we hit the end */ + if (unlikely(index == ncams - 1)) + __get_cpu_var(next_tlbcam_idx) = tlbcam_index; + else + __get_cpu_var(next_tlbcam_idx)++; +#endif + mas1 = MAS1_VALID | MAS1_TID(mm->context.id) | MAS1_TSIZE(tsize); + mas2 = ea & ~((1UL << shift) - 1); + mas2 |= (pte_val(pte) >> PTE_WIMGE_SHIFT) & MAS2_WIMGE_MASK; + mas7_3 = (u64)pte_pfn(pte) << PAGE_SHIFT; + mas7_3 |= (pte_val(pte) >> PTE_BAP_SHIFT) & MAS3_BAP_MASK; + if (!pte_dirty(pte)) + mas7_3 &= ~(MAS3_SW|MAS3_UW); + + mtspr(SPRN_MAS1, mas1); + mtspr(SPRN_MAS2, mas2); + + if (mmu_has_feature(MMU_FTR_USE_PAIRED_MAS)) { + mtspr(SPRN_MAS7_MAS3, mas7_3); + } else { + mtspr(SPRN_MAS7, upper_32_bits(mas7_3)); + mtspr(SPRN_MAS3, lower_32_bits(mas7_3)); + } + + asm volatile ("tlbwe"); + + local_irq_restore(flags); +} + +void flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr) +{ + struct hstate *hstate = hstate_file(vma->vm_file); + unsigned long tsize = huge_page_shift(hstate) - 10; + + __flush_tlb_page(vma ? vma->vm_mm : NULL, vmaddr, tsize, 0); + +} diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 0b9a5c1901b9..3a5f59dcbb33 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -1,7 +1,8 @@ /* - * PPC64 (POWER4) Huge TLB Page Support for Kernel. + * PPC Huge TLB Page Support for Kernel. * * Copyright (C) 2003 David Gibson, IBM Corporation. + * Copyright (C) 2011 Becky Bruce, Freescale Semiconductor * * Based on the IA-32 version: * Copyright (C) 2002, Rohit Seth @@ -11,24 +12,39 @@ #include #include #include +#include +#include +#include #include #include #include +#include #define PAGE_SHIFT_64K 16 #define PAGE_SHIFT_16M 24 #define PAGE_SHIFT_16G 34 -#define MAX_NUMBER_GPAGES 1024 +unsigned int HPAGE_SHIFT; -/* Tracks the 16G pages after the device tree is scanned and before the - * huge_boot_pages list is ready. */ -static unsigned long gpage_freearray[MAX_NUMBER_GPAGES]; +/* + * Tracks gpages after the device tree is scanned and before the + * huge_boot_pages list is ready. On 64-bit implementations, this is + * just used to track 16G pages and so is a single array. 32-bit + * implementations may have more than one gpage size due to limitations + * of the memory allocators, so we need multiple arrays + */ +#ifdef CONFIG_PPC64 +#define MAX_NUMBER_GPAGES 1024 +static u64 gpage_freearray[MAX_NUMBER_GPAGES]; static unsigned nr_gpages; - -/* Flag to mark huge PD pointers. This means pmd_bad() and pud_bad() - * will choke on pointers to hugepte tables, which is handy for - * catching screwups early. */ +#else +#define MAX_NUMBER_GPAGES 128 +struct psize_gpages { + u64 gpage_list[MAX_NUMBER_GPAGES]; + unsigned int nr_gpages; +}; +static struct psize_gpages gpage_freearray[MMU_PAGE_COUNT]; +#endif static inline int shift_to_mmu_psize(unsigned int shift) { @@ -49,25 +65,6 @@ static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize) #define hugepd_none(hpd) ((hpd).pd == 0) -static inline pte_t *hugepd_page(hugepd_t hpd) -{ - BUG_ON(!hugepd_ok(hpd)); - return (pte_t *)((hpd.pd & ~HUGEPD_SHIFT_MASK) | 0xc000000000000000); -} - -static inline unsigned int hugepd_shift(hugepd_t hpd) -{ - return hpd.pd & HUGEPD_SHIFT_MASK; -} - -static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr, unsigned pdshift) -{ - unsigned long idx = (addr & ((1UL << pdshift) - 1)) >> hugepd_shift(*hpdp); - pte_t *dir = hugepd_page(*hpdp); - - return dir + idx; -} - pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift) { pgd_t *pg; @@ -93,7 +90,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift if (is_hugepd(pm)) hpdp = (hugepd_t *)pm; else if (!pmd_none(*pm)) { - return pte_offset_map(pm, ea); + return pte_offset_kernel(pm, ea); } } } @@ -114,8 +111,18 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, unsigned long address, unsigned pdshift, unsigned pshift) { - pte_t *new = kmem_cache_zalloc(PGT_CACHE(pdshift - pshift), - GFP_KERNEL|__GFP_REPEAT); + struct kmem_cache *cachep; + pte_t *new; + +#ifdef CONFIG_PPC64 + cachep = PGT_CACHE(pdshift - pshift); +#else + int i; + int num_hugepd = 1 << (pshift - pdshift); + cachep = hugepte_cache; +#endif + + new = kmem_cache_zalloc(cachep, GFP_KERNEL|__GFP_REPEAT); BUG_ON(pshift > HUGEPD_SHIFT_MASK); BUG_ON((unsigned long)new & HUGEPD_SHIFT_MASK); @@ -124,10 +131,31 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, return -ENOMEM; spin_lock(&mm->page_table_lock); +#ifdef CONFIG_PPC64 if (!hugepd_none(*hpdp)) - kmem_cache_free(PGT_CACHE(pdshift - pshift), new); + kmem_cache_free(cachep, new); else - hpdp->pd = ((unsigned long)new & ~0x8000000000000000) | pshift; + hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift; +#else + /* + * We have multiple higher-level entries that point to the same + * actual pte location. Fill in each as we go and backtrack on error. + * We need all of these so the DTLB pgtable walk code can find the + * right higher-level entry without knowing if it's a hugepage or not. + */ + for (i = 0; i < num_hugepd; i++, hpdp++) { + if (unlikely(!hugepd_none(*hpdp))) + break; + else + hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift; + } + /* If we bailed from the for loop early, an error occurred, clean up */ + if (i < num_hugepd) { + for (i = i - 1 ; i >= 0; i--, hpdp--) + hpdp->pd = 0; + kmem_cache_free(cachep, new); + } +#endif spin_unlock(&mm->page_table_lock); return 0; } @@ -169,11 +197,132 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz return hugepte_offset(hpdp, addr, pdshift); } +#ifdef CONFIG_PPC32 /* Build list of addresses of gigantic pages. This function is used in early * boot before the buddy or bootmem allocator is setup. */ -void add_gpage(unsigned long addr, unsigned long page_size, - unsigned long number_of_pages) +void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages) +{ + unsigned int idx = shift_to_mmu_psize(__ffs(page_size)); + int i; + + if (addr == 0) + return; + + gpage_freearray[idx].nr_gpages = number_of_pages; + + for (i = 0; i < number_of_pages; i++) { + gpage_freearray[idx].gpage_list[i] = addr; + addr += page_size; + } +} + +/* + * Moves the gigantic page addresses from the temporary list to the + * huge_boot_pages list. + */ +int alloc_bootmem_huge_page(struct hstate *hstate) +{ + struct huge_bootmem_page *m; + int idx = shift_to_mmu_psize(hstate->order + PAGE_SHIFT); + int nr_gpages = gpage_freearray[idx].nr_gpages; + + if (nr_gpages == 0) + return 0; + +#ifdef CONFIG_HIGHMEM + /* + * If gpages can be in highmem we can't use the trick of storing the + * data structure in the page; allocate space for this + */ + m = alloc_bootmem(sizeof(struct huge_bootmem_page)); + m->phys = gpage_freearray[idx].gpage_list[--nr_gpages]; +#else + m = phys_to_virt(gpage_freearray[idx].gpage_list[--nr_gpages]); +#endif + + list_add(&m->list, &huge_boot_pages); + gpage_freearray[idx].nr_gpages = nr_gpages; + gpage_freearray[idx].gpage_list[nr_gpages] = 0; + m->hstate = hstate; + + return 1; +} +/* + * Scan the command line hugepagesz= options for gigantic pages; store those in + * a list that we use to allocate the memory once all options are parsed. + */ + +unsigned long gpage_npages[MMU_PAGE_COUNT]; + +static int __init do_gpage_early_setup(char *param, char *val) +{ + static phys_addr_t size; + unsigned long npages; + + /* + * The hugepagesz and hugepages cmdline options are interleaved. We + * use the size variable to keep track of whether or not this was done + * properly and skip over instances where it is incorrect. Other + * command-line parsing code will issue warnings, so we don't need to. + * + */ + if ((strcmp(param, "default_hugepagesz") == 0) || + (strcmp(param, "hugepagesz") == 0)) { + size = memparse(val, NULL); + } else if (strcmp(param, "hugepages") == 0) { + if (size != 0) { + if (sscanf(val, "%lu", &npages) <= 0) + npages = 0; + gpage_npages[shift_to_mmu_psize(__ffs(size))] = npages; + size = 0; + } + } + return 0; +} + + +/* + * This function allocates physical space for pages that are larger than the + * buddy allocator can handle. We want to allocate these in highmem because + * the amount of lowmem is limited. This means that this function MUST be + * called before lowmem_end_addr is set up in MMU_init() in order for the lmb + * allocate to grab highmem. + */ +void __init reserve_hugetlb_gpages(void) +{ + static __initdata char cmdline[COMMAND_LINE_SIZE]; + phys_addr_t size, base; + int i; + + strlcpy(cmdline, boot_command_line, COMMAND_LINE_SIZE); + parse_args("hugetlb gpages", cmdline, NULL, 0, &do_gpage_early_setup); + + /* + * Walk gpage list in reverse, allocating larger page sizes first. + * Skip over unsupported sizes, or sizes that have 0 gpages allocated. + * When we reach the point in the list where pages are no longer + * considered gpages, we're done. + */ + for (i = MMU_PAGE_COUNT-1; i >= 0; i--) { + if (mmu_psize_defs[i].shift == 0 || gpage_npages[i] == 0) + continue; + else if (mmu_psize_to_shift(i) < (MAX_ORDER + PAGE_SHIFT)) + break; + + size = (phys_addr_t)(1ULL << mmu_psize_to_shift(i)); + base = memblock_alloc_base(size * gpage_npages[i], size, + MEMBLOCK_ALLOC_ANYWHERE); + add_gpage(base, size, gpage_npages[i]); + } +} + +#else /* PPC64 */ + +/* Build list of addresses of gigantic pages. This function is used in early + * boot before the buddy or bootmem allocator is setup. + */ +void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages) { if (!addr) return; @@ -199,19 +348,79 @@ int alloc_bootmem_huge_page(struct hstate *hstate) m->hstate = hstate; return 1; } +#endif int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) { return 0; } +#ifdef CONFIG_PPC32 +#define HUGEPD_FREELIST_SIZE \ + ((PAGE_SIZE - sizeof(struct hugepd_freelist)) / sizeof(pte_t)) + +struct hugepd_freelist { + struct rcu_head rcu; + unsigned int index; + void *ptes[0]; +}; + +static DEFINE_PER_CPU(struct hugepd_freelist *, hugepd_freelist_cur); + +static void hugepd_free_rcu_callback(struct rcu_head *head) +{ + struct hugepd_freelist *batch = + container_of(head, struct hugepd_freelist, rcu); + unsigned int i; + + for (i = 0; i < batch->index; i++) + kmem_cache_free(hugepte_cache, batch->ptes[i]); + + free_page((unsigned long)batch); +} + +static void hugepd_free(struct mmu_gather *tlb, void *hugepte) +{ + struct hugepd_freelist **batchp; + + batchp = &__get_cpu_var(hugepd_freelist_cur); + + if (atomic_read(&tlb->mm->mm_users) < 2 || + cpumask_equal(mm_cpumask(tlb->mm), + cpumask_of(smp_processor_id()))) { + kmem_cache_free(hugepte_cache, hugepte); + return; + } + + if (*batchp == NULL) { + *batchp = (struct hugepd_freelist *)__get_free_page(GFP_ATOMIC); + (*batchp)->index = 0; + } + + (*batchp)->ptes[(*batchp)->index++] = hugepte; + if ((*batchp)->index == HUGEPD_FREELIST_SIZE) { + call_rcu_sched(&(*batchp)->rcu, hugepd_free_rcu_callback); + *batchp = NULL; + } +} +#endif + static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshift, unsigned long start, unsigned long end, unsigned long floor, unsigned long ceiling) { pte_t *hugepte = hugepd_page(*hpdp); - unsigned shift = hugepd_shift(*hpdp); + int i; + unsigned long pdmask = ~((1UL << pdshift) - 1); + unsigned int num_hugepd = 1; + +#ifdef CONFIG_PPC64 + unsigned int shift = hugepd_shift(*hpdp); +#else + /* Note: On 32-bit the hpdp may be the first of several */ + num_hugepd = (1 << (hugepd_shift(*hpdp) - pdshift)); +#endif start &= pdmask; if (start < floor) @@ -224,9 +433,15 @@ static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshif if (end - 1 > ceiling - 1) return; - hpdp->pd = 0; + for (i = 0; i < num_hugepd; i++, hpdp++) + hpdp->pd = 0; + tlb->need_flush = 1; +#ifdef CONFIG_PPC64 pgtable_free_tlb(tlb, hugepte, pdshift - shift); +#else + hugepd_free(tlb, hugepte); +#endif } static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, @@ -331,18 +546,27 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb, * too. */ - pgd = pgd_offset(tlb->mm, addr); do { next = pgd_addr_end(addr, end); + pgd = pgd_offset(tlb->mm, addr); if (!is_hugepd(pgd)) { if (pgd_none_or_clear_bad(pgd)) continue; hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling); } else { +#ifdef CONFIG_PPC32 + /* + * Increment next by the size of the huge mapping since + * on 32-bit there may be more than one entry at the pgd + * level for a single hugepage, but all of them point to + * the same kmem cache that holds the hugepte. + */ + next = addr + (1 << hugepd_shift(*(hugepd_t *)pgd)); +#endif free_hugepd_range(tlb, (hugepd_t *)pgd, PGDIR_SHIFT, addr, next, floor, ceiling); } - } while (pgd++, addr = next, addr != end); + } while (addr = next, addr != end); } struct page * @@ -466,17 +690,35 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) { +#ifdef CONFIG_MM_SLICES struct hstate *hstate = hstate_file(file); int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate)); return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1, 0); +#else + return get_unmapped_area(file, addr, len, pgoff, flags); +#endif } unsigned long vma_mmu_pagesize(struct vm_area_struct *vma) { +#ifdef CONFIG_MM_SLICES unsigned int psize = get_slice_psize(vma->vm_mm, vma->vm_start); return 1UL << mmu_psize_to_shift(psize); +#else + if (!is_vm_hugetlb_page(vma)) + return PAGE_SIZE; + + return huge_page_size(hstate_vma(vma)); +#endif +} + +static inline bool is_power_of_4(unsigned long x) +{ + if (is_power_of_2(x)) + return (__ilog2(x) % 2) ? false : true; + return false; } static int __init add_huge_page_size(unsigned long long size) @@ -486,9 +728,14 @@ static int __init add_huge_page_size(unsigned long long size) /* Check that it is a page size supported by the hardware and * that it fits within pagetable and slice limits. */ +#ifdef CONFIG_PPC_FSL_BOOK3E + if ((size < PAGE_SIZE) || !is_power_of_4(size)) + return -EINVAL; +#else if (!is_power_of_2(size) || (shift > SLICE_HIGH_SHIFT) || (shift <= PAGE_SHIFT)) return -EINVAL; +#endif if ((mmu_psize = shift_to_mmu_psize(shift)) < 0) return -EINVAL; @@ -525,6 +772,46 @@ static int __init hugepage_setup_sz(char *str) } __setup("hugepagesz=", hugepage_setup_sz); +#ifdef CONFIG_FSL_BOOKE +struct kmem_cache *hugepte_cache; +static int __init hugetlbpage_init(void) +{ + int psize; + + for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { + unsigned shift; + + if (!mmu_psize_defs[psize].shift) + continue; + + shift = mmu_psize_to_shift(psize); + + /* Don't treat normal page sizes as huge... */ + if (shift != PAGE_SHIFT) + if (add_huge_page_size(1ULL << shift) < 0) + continue; + } + + /* + * Create a kmem cache for hugeptes. The bottom bits in the pte have + * size information encoded in them, so align them to allow this + */ + hugepte_cache = kmem_cache_create("hugepte-cache", sizeof(pte_t), + HUGEPD_SHIFT_MASK + 1, 0, NULL); + if (hugepte_cache == NULL) + panic("%s: Unable to create kmem cache for hugeptes\n", + __func__); + + /* Default hpage size = 4M */ + if (mmu_psize_defs[MMU_PAGE_4M].shift) + HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_4M].shift; + else + panic("%s: Unable to set default huge page size\n", __func__); + + + return 0; +} +#else static int __init hugetlbpage_init(void) { int psize; @@ -567,15 +854,23 @@ static int __init hugetlbpage_init(void) return 0; } - +#endif module_init(hugetlbpage_init); void flush_dcache_icache_hugepage(struct page *page) { int i; + void *start; BUG_ON(!PageCompound(page)); - for (i = 0; i < (1UL << compound_order(page)); i++) - __flush_dcache_icache(page_address(page+i)); + for (i = 0; i < (1UL << compound_order(page)); i++) { + if (!PageHighMem(page)) { + __flush_dcache_icache(page_address(page+i)); + } else { + start = kmap_atomic(page+i, KM_PPC_SYNC_ICACHE); + __flush_dcache_icache(start); + kunmap_atomic(start, KM_PPC_SYNC_ICACHE); + } + } } diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index c77fef56dad6..161cefde5c15 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -32,6 +32,8 @@ #include #include #include +#include +#include #include #include @@ -44,6 +46,7 @@ #include #include #include +#include #include "mmu_decl.h" @@ -123,6 +126,12 @@ void __init MMU_init(void) /* parse args from command line */ MMU_setup(); + /* + * Reserve gigantic pages for hugetlb. This MUST occur before + * lowmem_end_addr is initialized below. + */ + reserve_hugetlb_gpages(); + if (memblock.memory.cnt > 1) { #ifndef CONFIG_WII memblock.memory.cnt = 1; diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index c781bbcf7338..ad9cf49dfb89 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -548,4 +548,9 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, return; hash_preload(vma->vm_mm, address, access, trap); #endif /* CONFIG_PPC_STD_MMU */ +#if (defined(CONFIG_PPC_BOOK3E_64) || defined(CONFIG_PPC_FSL_BOOK3E)) \ + && defined(CONFIG_HUGETLB_PAGE) + if (is_vm_hugetlb_page(vma)) + book3e_hugetlb_preload(vma->vm_mm, address, *ptep); +#endif } diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c index 336807de550e..5b63bd3da4a9 100644 --- a/arch/powerpc/mm/mmu_context_nohash.c +++ b/arch/powerpc/mm/mmu_context_nohash.c @@ -292,6 +292,11 @@ int init_new_context(struct task_struct *t, struct mm_struct *mm) mm->context.id = MMU_NO_CONTEXT; mm->context.active = 0; +#ifdef CONFIG_PPC_MM_SLICES + if (slice_mm_new_context(mm)) + slice_set_user_psize(mm, mmu_virtual_psize); +#endif + return 0; } diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index af40c8768a78..214130a4edc6 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -212,7 +213,7 @@ int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address, entry = set_access_flags_filter(entry, vma, dirty); changed = !pte_same(*(ptep), entry); if (changed) { - if (!(vma->vm_flags & VM_HUGETLB)) + if (!is_vm_hugetlb_page(vma)) assert_pte_locked(vma->vm_mm, address); __ptep_set_access_flags(ptep, entry); flush_tlb_page_nohash(vma, address); diff --git a/arch/powerpc/mm/tlb_low_64e.S b/arch/powerpc/mm/tlb_low_64e.S index 4ebb34bc01d6..dc4a5f385e41 100644 --- a/arch/powerpc/mm/tlb_low_64e.S +++ b/arch/powerpc/mm/tlb_low_64e.S @@ -553,24 +553,24 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_TLBRSRV) rldicl r11,r16,64-VPTE_PGD_SHIFT,64-PGD_INDEX_SIZE-3 clrrdi r10,r11,3 ldx r15,r10,r15 - cmpldi cr0,r15,0 - beq virt_page_table_tlb_miss_fault + cmpdi cr0,r15,0 + bge virt_page_table_tlb_miss_fault #ifndef CONFIG_PPC_64K_PAGES /* Get to PUD entry */ rldicl r11,r16,64-VPTE_PUD_SHIFT,64-PUD_INDEX_SIZE-3 clrrdi r10,r11,3 ldx r15,r10,r15 - cmpldi cr0,r15,0 - beq virt_page_table_tlb_miss_fault + cmpdi cr0,r15,0 + bge virt_page_table_tlb_miss_fault #endif /* CONFIG_PPC_64K_PAGES */ /* Get to PMD entry */ rldicl r11,r16,64-VPTE_PMD_SHIFT,64-PMD_INDEX_SIZE-3 clrrdi r10,r11,3 ldx r15,r10,r15 - cmpldi cr0,r15,0 - beq virt_page_table_tlb_miss_fault + cmpdi cr0,r15,0 + bge virt_page_table_tlb_miss_fault /* Ok, we're all right, we can now create a kernel translation for * a 4K or 64K page from r16 -> r15. @@ -802,24 +802,24 @@ htw_tlb_miss: rldicl r11,r16,64-(PGDIR_SHIFT-3),64-PGD_INDEX_SIZE-3 clrrdi r10,r11,3 ldx r15,r10,r15 - cmpldi cr0,r15,0 - beq htw_tlb_miss_fault + cmpdi cr0,r15,0 + bge htw_tlb_miss_fault #ifndef CONFIG_PPC_64K_PAGES /* Get to PUD entry */ rldicl r11,r16,64-(PUD_SHIFT-3),64-PUD_INDEX_SIZE-3 clrrdi r10,r11,3 ldx r15,r10,r15 - cmpldi cr0,r15,0 - beq htw_tlb_miss_fault + cmpdi cr0,r15,0 + bge htw_tlb_miss_fault #endif /* CONFIG_PPC_64K_PAGES */ /* Get to PMD entry */ rldicl r11,r16,64-(PMD_SHIFT-3),64-PMD_INDEX_SIZE-3 clrrdi r10,r11,3 ldx r15,r10,r15 - cmpldi cr0,r15,0 - beq htw_tlb_miss_fault + cmpdi cr0,r15,0 + bge htw_tlb_miss_fault /* Ok, we're all right, we can now create an indirect entry for * a 1M or 256M page. diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c index d32ec643c231..afc95c7304ae 100644 --- a/arch/powerpc/mm/tlb_nohash.c +++ b/arch/powerpc/mm/tlb_nohash.c @@ -36,14 +36,49 @@ #include #include #include +#include #include #include #include +#include #include "mmu_decl.h" -#ifdef CONFIG_PPC_BOOK3E +/* + * This struct lists the sw-supported page sizes. The hardawre MMU may support + * other sizes not listed here. The .ind field is only used on MMUs that have + * indirect page table entries. + */ +#ifdef CONFIG_PPC_BOOK3E_MMU +#ifdef CONFIG_FSL_BOOKE +struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = { + [MMU_PAGE_4K] = { + .shift = 12, + .enc = BOOK3E_PAGESZ_4K, + }, + [MMU_PAGE_4M] = { + .shift = 22, + .enc = BOOK3E_PAGESZ_4M, + }, + [MMU_PAGE_16M] = { + .shift = 24, + .enc = BOOK3E_PAGESZ_16M, + }, + [MMU_PAGE_64M] = { + .shift = 26, + .enc = BOOK3E_PAGESZ_64M, + }, + [MMU_PAGE_256M] = { + .shift = 28, + .enc = BOOK3E_PAGESZ_256M, + }, + [MMU_PAGE_1G] = { + .shift = 30, + .enc = BOOK3E_PAGESZ_1GB, + }, +}; +#else struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = { [MMU_PAGE_4K] = { .shift = 12, @@ -77,6 +112,8 @@ struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = { .enc = BOOK3E_PAGESZ_1GB, }, }; +#endif /* CONFIG_FSL_BOOKE */ + static inline int mmu_get_tsize(int psize) { return mmu_psize_defs[psize].enc; @@ -87,7 +124,7 @@ static inline int mmu_get_tsize(int psize) /* This isn't used on !Book3E for now */ return 0; } -#endif +#endif /* CONFIG_PPC_BOOK3E_MMU */ /* The variables below are currently only used on 64-bit Book3E * though this will probably be made common with other nohash @@ -266,6 +303,11 @@ void __flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr, void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) { +#ifdef CONFIG_HUGETLB_PAGE + if (is_vm_hugetlb_page(vma)) + flush_hugetlb_page(vma, vmaddr); +#endif + __flush_tlb_page(vma ? vma->vm_mm : NULL, vmaddr, mmu_get_tsize(mmu_virtual_psize), 0); } diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index e06e39589a09..a85990c886e9 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -69,6 +69,7 @@ config PPC_BOOK3S_64 bool "Server processors" select PPC_FPU select PPC_HAVE_PMU_SUPPORT + select SYS_SUPPORTS_HUGETLBFS config PPC_BOOK3E_64 bool "Embedded processors" @@ -173,6 +174,7 @@ config BOOKE config FSL_BOOKE bool depends on (E200 || E500) && PPC32 + select SYS_SUPPORTS_HUGETLBFS if PHYS_64BIT default y # this is for common code between PPC32 & PPC64 FSL BOOKE @@ -296,7 +298,7 @@ config PPC_BOOK3E_MMU config PPC_MM_SLICES bool - default y if HUGETLB_PAGE || (PPC_STD_MMU_64 && PPC_64K_PAGES) + default y if (PPC64 && HUGETLB_PAGE) || (PPC_STD_MMU_64 && PPC_64K_PAGES) default n config VIRT_CPU_ACCOUNTING -- cgit v1.2.3 From 6083184269fd723affca4f6340e491950267622a Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 10 Aug 2011 20:44:21 +0000 Subject: powerpc/numa: Remove double of_node_put in hot_add_node_scn_to_nid During memory hotplug testing, I got the following warning: ERROR: Bad of_node_put() on /memory@0 of_node_release kref_put of_node_put of_find_node_by_type hot_add_node_scn_to_nid hot_add_scn_to_nid memory_add_physaddr_to_nid ... of_find_node_by_type() loop does the of_node_put for us so we only need the handle the case where we terminate the loop early. As suggested by Stephen Rothwell we can do the of_node_put unconditionally outside of the loop since of_node_put handles a NULL argument fine. Signed-off-by: Anton Blanchard Cc: stable@kernel.org Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/mm/numa.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 2164006fe170..2c1ae7a5fb53 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -1214,11 +1214,12 @@ int hot_add_node_scn_to_nid(unsigned long scn_addr) break; } - of_node_put(memory); if (nid >= 0) break; } + of_node_put(memory); + return nid; } -- cgit v1.2.3 From 94db7c5e14f44b943febe54e089d077cd983d284 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 10 Aug 2011 20:44:22 +0000 Subject: powerpc: Use for_each_node_by_type instead of open coding it Use for_each_node_by_type instead of open coding it. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/machine_kexec_64.c | 3 +-- arch/powerpc/kernel/setup_64.c | 2 +- arch/powerpc/mm/numa.c | 10 +++++----- 3 files changed, 7 insertions(+), 8 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c index 583af70c4b14..26ccbf77dd41 100644 --- a/arch/powerpc/kernel/machine_kexec_64.c +++ b/arch/powerpc/kernel/machine_kexec_64.c @@ -74,8 +74,7 @@ int default_machine_kexec_prepare(struct kimage *image) } /* We also should not overwrite the tce tables */ - for (node = of_find_node_by_type(NULL, "pci"); node != NULL; - node = of_find_node_by_type(node, "pci")) { + for_each_node_by_type(node, "pci") { basep = of_get_property(node, "linux,tce-base", NULL); sizep = of_get_property(node, "linux,tce-size", NULL); if (basep == NULL || sizep == NULL) diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index aebef1320ed7..eade1fd8ee2e 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -278,7 +278,7 @@ static void __init initialize_cache_info(void) DBG(" -> initialize_cache_info()\n"); - for (np = NULL; (np = of_find_node_by_type(np, "cpu"));) { + for_each_node_by_type(np, "cpu") { num_cpus += 1; /* We're assuming *all* of the CPUs have the same diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 2c1ae7a5fb53..00cc090cd1dc 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -710,7 +710,7 @@ static void __init parse_drconf_memory(struct device_node *memory) static int __init parse_numa_properties(void) { struct device_node *cpu = NULL; - struct device_node *memory = NULL; + struct device_node *memory; int default_nid = 0; unsigned long i; @@ -750,8 +750,8 @@ static int __init parse_numa_properties(void) } get_n_mem_cells(&n_mem_addr_cells, &n_mem_size_cells); - memory = NULL; - while ((memory = of_find_node_by_type(memory, "memory")) != NULL) { + + for_each_node_by_type(memory, "memory") { unsigned long start; unsigned long size; int nid; @@ -1187,10 +1187,10 @@ static int hot_add_drconf_scn_to_nid(struct device_node *memory, */ int hot_add_node_scn_to_nid(unsigned long scn_addr) { - struct device_node *memory = NULL; + struct device_node *memory; int nid = -1; - while ((memory = of_find_node_by_type(memory, "memory")) != NULL) { + for_each_node_by_type(memory, "memory") { unsigned long start, size; int ranges; const unsigned int *memcell_buf; -- cgit v1.2.3 From dfbe93a222e74b6f96ad84eff2b04a0f864fac65 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 10 Aug 2011 20:44:23 +0000 Subject: powerpc: Coding style cleanups While converting code to use for_each_node_by_type I noticed a number of coding style issues. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/setup_64.c | 20 +++++++++++++------- arch/powerpc/mm/numa.c | 7 ++++--- 2 files changed, 17 insertions(+), 10 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index eade1fd8ee2e..d4168c93098f 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -281,11 +281,11 @@ static void __init initialize_cache_info(void) for_each_node_by_type(np, "cpu") { num_cpus += 1; - /* We're assuming *all* of the CPUs have the same + /* + * We're assuming *all* of the CPUs have the same * d-cache and i-cache sizes... -Peter */ - - if ( num_cpus == 1 ) { + if (num_cpus == 1) { const u32 *sizep, *lsizep; u32 size, lsize; @@ -294,10 +294,13 @@ static void __init initialize_cache_info(void) sizep = of_get_property(np, "d-cache-size", NULL); if (sizep != NULL) size = *sizep; - lsizep = of_get_property(np, "d-cache-block-size", NULL); + lsizep = of_get_property(np, "d-cache-block-size", + NULL); /* fallback if block size missing */ if (lsizep == NULL) - lsizep = of_get_property(np, "d-cache-line-size", NULL); + lsizep = of_get_property(np, + "d-cache-line-size", + NULL); if (lsizep != NULL) lsize = *lsizep; if (sizep == 0 || lsizep == 0) @@ -314,9 +317,12 @@ static void __init initialize_cache_info(void) sizep = of_get_property(np, "i-cache-size", NULL); if (sizep != NULL) size = *sizep; - lsizep = of_get_property(np, "i-cache-block-size", NULL); + lsizep = of_get_property(np, "i-cache-block-size", + NULL); if (lsizep == NULL) - lsizep = of_get_property(np, "i-cache-line-size", NULL); + lsizep = of_get_property(np, + "i-cache-line-size", + NULL); if (lsizep != NULL) lsize = *lsizep; if (sizep == 0 || lsizep == 0) diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 00cc090cd1dc..0bfb90c50571 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -709,7 +709,6 @@ static void __init parse_drconf_memory(struct device_node *memory) static int __init parse_numa_properties(void) { - struct device_node *cpu = NULL; struct device_node *memory; int default_nid = 0; unsigned long i; @@ -732,6 +731,7 @@ static int __init parse_numa_properties(void) * each node to be onlined must have NODE_DATA etc backing it. */ for_each_present_cpu(i) { + struct device_node *cpu; int nid; cpu = of_get_cpu_node(i, NULL); @@ -800,8 +800,9 @@ new_range: } /* - * Now do the same thing for each MEMBLOCK listed in the ibm,dynamic-memory - * property in the ibm,dynamic-reconfiguration-memory node. + * Now do the same thing for each MEMBLOCK listed in the + * ibm,dynamic-memory property in the + * ibm,dynamic-reconfiguration-memory node. */ memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); if (memory) -- cgit v1.2.3 From a11940978bd598e65996b4f807cf4904793f7025 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 10 Aug 2011 20:44:24 +0000 Subject: powerpc: Fix oops when echoing bad values to /sys/devices/system/memory/probe If we echo an address the hypervisor doesn't like to /sys/devices/system/memory/probe we oops the box: # echo 0x10000000000 > /sys/devices/system/memory/probe kernel BUG at arch/powerpc/mm/hash_utils_64.c:541! The backtrace is: create_section_mapping arch_add_memory add_memory memory_probe_store sysdev_class_store sysfs_write_file vfs_write SyS_write In create_section_mapping we BUG if htab_bolt_mapping returned an error. A better approach is to return an error which will propagate back to userspace. Rerunning the test with this patch applied: # echo 0x10000000000 > /sys/devices/system/memory/probe -bash: echo: write error: Invalid argument Signed-off-by: Anton Blanchard Cc: stable@kernel.org Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/sparsemem.h | 2 +- arch/powerpc/mm/hash_utils_64.c | 6 +++--- arch/powerpc/mm/mem.c | 3 ++- 3 files changed, 6 insertions(+), 5 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/include/asm/sparsemem.h b/arch/powerpc/include/asm/sparsemem.h index 54a47ea2c3aa..0c5fa3145615 100644 --- a/arch/powerpc/include/asm/sparsemem.h +++ b/arch/powerpc/include/asm/sparsemem.h @@ -16,7 +16,7 @@ #endif /* CONFIG_SPARSEMEM */ #ifdef CONFIG_MEMORY_HOTPLUG -extern void create_section_mapping(unsigned long start, unsigned long end); +extern int create_section_mapping(unsigned long start, unsigned long end); extern int remove_section_mapping(unsigned long start, unsigned long end); #ifdef CONFIG_NUMA extern int hot_add_scn_to_nid(unsigned long scn_addr); diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 1f8b2a05e3d0..1628201c8cea 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -531,11 +531,11 @@ static unsigned long __init htab_get_table_size(void) } #ifdef CONFIG_MEMORY_HOTPLUG -void create_section_mapping(unsigned long start, unsigned long end) +int create_section_mapping(unsigned long start, unsigned long end) { - BUG_ON(htab_bolt_mapping(start, end, __pa(start), + return htab_bolt_mapping(start, end, __pa(start), pgprot_val(PAGE_KERNEL), mmu_linear_psize, - mmu_kernel_ssize)); + mmu_kernel_ssize); } int remove_section_mapping(unsigned long start, unsigned long end) diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index ad9cf49dfb89..5db316cad47b 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -123,7 +123,8 @@ int arch_add_memory(int nid, u64 start, u64 size) pgdata = NODE_DATA(nid); start = (unsigned long)__va(start); - create_section_mapping(start, start + size); + if (create_section_mapping(start, start + size)) + return -EINVAL; /* this should work for most non-highmem platforms */ zone = pgdata->node_zones; -- cgit v1.2.3 From 8bdafa39a47265bc029838b35cc6585f69224afa Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 14 Sep 2011 09:43:15 +0000 Subject: powerpc: Fix deadlock in icswx code The icswx code introduced an A-B B-A deadlock: CPU0 CPU1 ---- ---- lock(&anon_vma->mutex); lock(&mm->mmap_sem); lock(&anon_vma->mutex); lock(&mm->mmap_sem); Instead of using the mmap_sem to keep mm_users constant, take the page table spinlock. Signed-off-by: Anton Blanchard Cc: Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/mm/mmu_context_hash64.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/mmu_context_hash64.c b/arch/powerpc/mm/mmu_context_hash64.c index 3bafc3deca6d..4ff587e38250 100644 --- a/arch/powerpc/mm/mmu_context_hash64.c +++ b/arch/powerpc/mm/mmu_context_hash64.c @@ -136,8 +136,8 @@ int use_cop(unsigned long acop, struct mm_struct *mm) if (!mm || !acop) return -EINVAL; - /* We need to make sure mm_users doesn't change */ - down_read(&mm->mmap_sem); + /* The page_table_lock ensures mm_users won't change under us */ + spin_lock(&mm->page_table_lock); spin_lock(mm->context.cop_lockp); if (mm->context.cop_pid == COP_PID_NONE) { @@ -164,7 +164,7 @@ int use_cop(unsigned long acop, struct mm_struct *mm) out: spin_unlock(mm->context.cop_lockp); - up_read(&mm->mmap_sem); + spin_unlock(&mm->page_table_lock); return ret; } @@ -185,8 +185,8 @@ void drop_cop(unsigned long acop, struct mm_struct *mm) if (WARN_ON_ONCE(!mm)) return; - /* We need to make sure mm_users doesn't change */ - down_read(&mm->mmap_sem); + /* The page_table_lock ensures mm_users won't change under us */ + spin_lock(&mm->page_table_lock); spin_lock(mm->context.cop_lockp); mm->context.acop &= ~acop; @@ -213,7 +213,7 @@ void drop_cop(unsigned long acop, struct mm_struct *mm) } spin_unlock(mm->context.cop_lockp); - up_read(&mm->mmap_sem); + spin_unlock(&mm->page_table_lock); } EXPORT_SYMBOL_GPL(drop_cop); -- cgit v1.2.3 From 25c29f9e3242071bca1bee7ad919baf1888ae436 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 20 Sep 2011 19:58:10 +0000 Subject: powerpc: Fix hugetlb with CONFIG_PPC_MM_SLICES=y Commit 41151e77a4 ("powerpc: Hugetlb for BookE") added some #ifdef CONFIG_MM_SLICES conditionals to hugetlb_get_unmapped_area() and vma_mmu_pagesize(). Unfortunately this is not the correct config symbol; it should be CONFIG_PPC_MM_SLICES. The result is that attempting to use hugetlbfs on 64-bit Power server processors results in an infinite stack recursion between get_unmapped_area() and hugetlb_get_unmapped_area(). This fixes it by changing the #ifdef to use CONFIG_PPC_MM_SLICES in those functions and also in book3e_hugetlb_preload(). Signed-off-by: Paul Mackerras Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/mm/hugetlbpage-book3e.c | 2 +- arch/powerpc/mm/hugetlbpage.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/hugetlbpage-book3e.c b/arch/powerpc/mm/hugetlbpage-book3e.c index 1295b7c1cdac..343ad0b87261 100644 --- a/arch/powerpc/mm/hugetlbpage-book3e.c +++ b/arch/powerpc/mm/hugetlbpage-book3e.c @@ -52,7 +52,7 @@ void book3e_hugetlb_preload(struct mm_struct *mm, unsigned long ea, pte_t pte) if (unlikely(is_kernel_addr(ea))) return; -#ifdef CONFIG_MM_SLICES +#ifdef CONFIG_PPC_MM_SLICES psize = mmu_get_tsize(get_slice_psize(mm, ea)); tsize = mmu_get_psize(psize); shift = mmu_psize_defs[psize].shift; diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 3a5f59dcbb33..48b65bedc04d 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -690,7 +690,7 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) { -#ifdef CONFIG_MM_SLICES +#ifdef CONFIG_PPC_MM_SLICES struct hstate *hstate = hstate_file(file); int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate)); @@ -702,7 +702,7 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, unsigned long vma_mmu_pagesize(struct vm_area_struct *vma) { -#ifdef CONFIG_MM_SLICES +#ifdef CONFIG_PPC_MM_SLICES unsigned int psize = get_slice_psize(vma->vm_mm, vma->vm_start); return 1UL << mmu_psize_to_shift(psize); -- cgit v1.2.3 From 1dc91c3eb374ca01ec99dc0ca2a38babc509beb3 Mon Sep 17 00:00:00 2001 From: Kumar Gala Date: Fri, 16 Sep 2011 10:39:59 -0500 Subject: powerpc/fsl-booke: Fix setup_initial_memory_limit to not blindly map On FSL Book-E devices we support multiple large TLB sizes and so we can get into situations in which the initial 1G TLB size is too big and we're asked for a size that is not mappable by a single entry (like 512M). The single entry is important because when we bring up secondary cores they need to ensure any data structure they need to access (eg PACA or stack) is always mapped. So we really need to determine what size will actually be mapped by the first TLB entry to ensure we limit early memory references to that region. We refactor the map_mem_in_cams() code to provider a helper function that we can utilize to determine the size of the first TLB entry while taking into account size and alignment constraints. Signed-off-by: Kumar Gala --- arch/powerpc/mm/fsl_booke_mmu.c | 31 +++++++++++++++++++------------ arch/powerpc/mm/mmu_decl.h | 2 ++ arch/powerpc/mm/tlb_nohash.c | 21 ++++++++++++++++++--- 3 files changed, 39 insertions(+), 15 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c index f7802c8bba0a..6f593bd27174 100644 --- a/arch/powerpc/mm/fsl_booke_mmu.c +++ b/arch/powerpc/mm/fsl_booke_mmu.c @@ -146,29 +146,36 @@ static void settlbcam(int index, unsigned long virt, phys_addr_t phys, loadcam_entry(index); } +unsigned long calc_cam_sz(unsigned long ram, unsigned long virt, + phys_addr_t phys) +{ + unsigned int camsize = __ilog2(ram) & ~1U; + unsigned int align = __ffs(virt | phys) & ~1U; + unsigned long max_cam = (mfspr(SPRN_TLB1CFG) >> 16) & 0xf; + + /* Convert (4^max) kB to (2^max) bytes */ + max_cam = max_cam * 2 + 10; + + if (camsize > align) + camsize = align; + if (camsize > max_cam) + camsize = max_cam; + + return 1UL << camsize; +} + unsigned long map_mem_in_cams(unsigned long ram, int max_cam_idx) { int i; unsigned long virt = PAGE_OFFSET; phys_addr_t phys = memstart_addr; unsigned long amount_mapped = 0; - unsigned long max_cam = (mfspr(SPRN_TLB1CFG) >> 16) & 0xf; - - /* Convert (4^max) kB to (2^max) bytes */ - max_cam = max_cam * 2 + 10; /* Calculate CAM values */ for (i = 0; ram && i < max_cam_idx; i++) { - unsigned int camsize = __ilog2(ram) & ~1U; - unsigned int align = __ffs(virt | phys) & ~1U; unsigned long cam_sz; - if (camsize > align) - camsize = align; - if (camsize > max_cam) - camsize = max_cam; - - cam_sz = 1UL << camsize; + cam_sz = calc_cam_sz(ram, virt, phys); settlbcam(i, virt, phys, cam_sz, PAGE_KERNEL_X, 0); ram -= cam_sz; diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index dd0a2589591d..83eb5d5f53d5 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -142,6 +142,8 @@ extern unsigned long mmu_mapin_ram(unsigned long top); #elif defined(CONFIG_PPC_FSL_BOOK3E) extern unsigned long map_mem_in_cams(unsigned long ram, int max_cam_idx); +extern unsigned long calc_cam_sz(unsigned long ram, unsigned long virt, + phys_addr_t phys); #ifdef CONFIG_PPC32 extern void MMU_init_hw(void); extern unsigned long mmu_mapin_ram(unsigned long top); diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c index afc95c7304ae..6c2eabf707b7 100644 --- a/arch/powerpc/mm/tlb_nohash.c +++ b/arch/powerpc/mm/tlb_nohash.c @@ -642,13 +642,28 @@ void __cpuinit early_init_mmu_secondary(void) void setup_initial_memory_limit(phys_addr_t first_memblock_base, phys_addr_t first_memblock_size) { - /* On Embedded 64-bit, we adjust the RMA size to match + /* On non-FSL Embedded 64-bit, we adjust the RMA size to match * the bolted TLB entry. We know for now that only 1G * entries are supported though that may eventually - * change. We crop it to the size of the first MEMBLOCK to + * change. + * + * on FSL Embedded 64-bit, we adjust the RMA size to match the + * first bolted TLB entry size. We still limit max to 1G even if + * the TLB could cover more. This is due to what the early init + * code is setup to do. + * + * We crop it to the size of the first MEMBLOCK to * avoid going over total available memory just in case... */ - ppc64_rma_size = min_t(u64, first_memblock_size, 0x40000000); +#ifdef CONFIG_PPC_FSL_BOOK3E + if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) { + unsigned long linear_sz; + linear_sz = calc_cam_sz(first_memblock_size, PAGE_OFFSET, + first_memblock_base); + ppc64_rma_size = min_t(u64, linear_sz, 0x40000000); + } else +#endif + ppc64_rma_size = min_t(u64, first_memblock_size, 0x40000000); /* Finally limit subsequent allocations */ memblock_set_current_limit(first_memblock_base + ppc64_rma_size); -- cgit v1.2.3 From 4559424a0c34f0cb22fa31bc24015a06dc064b32 Mon Sep 17 00:00:00 2001 From: Becky Bruce Date: Wed, 12 Oct 2011 16:17:02 -0500 Subject: powerpc/fsl-booke: Fix settlbcam for 64-bit Currently, it does a cntlzd on the size and then subtracts it from 21.... this doesn't take into account the varying size of a "long". Just use __ilog instead (and subtract the 10 we have to subtract to get to the tsize encoding). Also correct the comment about page sizes supported. Signed-off-by: Becky Bruce Signed-off-by: Kumar Gala --- arch/powerpc/mm/fsl_booke_mmu.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c index 6f593bd27174..66a6fd38e9cd 100644 --- a/arch/powerpc/mm/fsl_booke_mmu.c +++ b/arch/powerpc/mm/fsl_booke_mmu.c @@ -101,17 +101,17 @@ unsigned long p_mapped_by_tlbcam(phys_addr_t pa) /* * Set up a variable-size TLB entry (tlbcam). The parameters are not checked; - * in particular size must be a power of 4 between 4k and 256M (or 1G, for cpus - * that support extended page sizes). Note that while some cpus support a - * page size of 4G, we don't allow its use here. + * in particular size must be a power of 4 between 4k and the max supported by + * an implementation; max may further be limited by what can be represented in + * an unsigned long (for example, 32-bit implementations cannot support a 4GB + * size). */ static void settlbcam(int index, unsigned long virt, phys_addr_t phys, unsigned long size, unsigned long flags, unsigned int pid) { - unsigned int tsize, lz; + unsigned int tsize; - asm (PPC_CNTLZL "%0,%1" : "=r" (lz) : "r" (size)); - tsize = 21 - lz; + tsize = __ilog2(size) - 10; #ifdef CONFIG_SMP if ((flags & _PAGE_NO_CACHE) == 0) -- cgit v1.2.3 From 66b15db69c2553036cc25f6e2e74fe7e3aa2761e Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Fri, 27 May 2011 10:46:24 -0400 Subject: powerpc: add export.h to files making use of EXPORT_SYMBOL With module.h being implicitly everywhere via device.h, the absence of explicitly including something for EXPORT_SYMBOL went unnoticed. Since we are heading to fix things up and clean module.h from the device.h file, we need to explicitly include these files now. Signed-off-by: Paul Gortmaker --- arch/powerpc/include/asm/lv1call.h | 1 + arch/powerpc/kernel/crash.c | 1 + arch/powerpc/kernel/dma-iommu.c | 1 + arch/powerpc/kernel/dma.c | 1 + arch/powerpc/kernel/ibmebus.c | 1 + arch/powerpc/kernel/isa-bridge.c | 1 + arch/powerpc/kernel/kvm.c | 1 + arch/powerpc/kernel/pci-common.c | 1 + arch/powerpc/kernel/pci_64.c | 1 + arch/powerpc/kernel/pci_dn.c | 1 + arch/powerpc/kernel/pci_of_scan.c | 1 + arch/powerpc/kvm/book3s.c | 1 + arch/powerpc/kvm/book3s_hv.c | 1 + arch/powerpc/kvm/book3s_hv_builtin.c | 1 + arch/powerpc/mm/hash_utils_64.c | 1 + arch/powerpc/mm/pgtable_64.c | 1 + arch/powerpc/platforms/52xx/mpc52xx_common.c | 1 + arch/powerpc/platforms/83xx/suspend.c | 1 + arch/powerpc/platforms/cell/axon_msi.c | 1 + arch/powerpc/platforms/cell/celleb_setup.c | 1 + arch/powerpc/platforms/cell/pmu.c | 1 + arch/powerpc/platforms/cell/qpace_setup.c | 1 + arch/powerpc/platforms/cell/setup.c | 1 + arch/powerpc/platforms/iseries/mf.c | 1 + arch/powerpc/platforms/iseries/setup.c | 1 + arch/powerpc/platforms/maple/setup.c | 1 + arch/powerpc/platforms/pasemi/setup.c | 1 + arch/powerpc/platforms/powermac/backlight.c | 1 + arch/powerpc/platforms/powermac/feature.c | 1 + arch/powerpc/platforms/powermac/setup.c | 1 + arch/powerpc/platforms/ps3/os-area.c | 1 + arch/powerpc/platforms/ps3/setup.c | 1 + arch/powerpc/platforms/ps3/spu.c | 1 + arch/powerpc/platforms/pseries/eeh.c | 1 + arch/powerpc/platforms/pseries/lpar.c | 1 + arch/powerpc/sysdev/cpm_common.c | 1 + arch/powerpc/sysdev/dcr.c | 1 + arch/powerpc/sysdev/fsl_gtm.c | 1 + arch/powerpc/sysdev/fsl_pmc.c | 1 + arch/powerpc/sysdev/mpc5xxx_clocks.c | 1 + arch/powerpc/sysdev/qe_lib/gpio.c | 1 + arch/powerpc/sysdev/scom.c | 1 + drivers/ps3/sys-manager-core.c | 1 + 43 files changed, 43 insertions(+) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/include/asm/lv1call.h b/arch/powerpc/include/asm/lv1call.h index 81713acf7529..9cd5fc828a37 100644 --- a/arch/powerpc/include/asm/lv1call.h +++ b/arch/powerpc/include/asm/lv1call.h @@ -25,6 +25,7 @@ #if !defined(__ASSEMBLY__) #include +#include /* lv1 call declaration macros */ diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c index cc6a9d5d69ab..d879809d5c45 100644 --- a/arch/powerpc/kernel/crash.c +++ b/arch/powerpc/kernel/crash.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c index e7554154a6de..6ad57719487f 100644 --- a/arch/powerpc/kernel/dma-iommu.c +++ b/arch/powerpc/kernel/dma-iommu.c @@ -5,6 +5,7 @@ * busses using the iommu infrastructure */ +#include #include /* diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c index 4f0959fbfbee..b04e01f303a9 100644 --- a/arch/powerpc/kernel/dma.c +++ b/arch/powerpc/kernel/dma.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/powerpc/kernel/ibmebus.c b/arch/powerpc/kernel/ibmebus.c index 28581f1ad2c0..2848856dc171 100644 --- a/arch/powerpc/kernel/ibmebus.c +++ b/arch/powerpc/kernel/ibmebus.c @@ -37,6 +37,7 @@ */ #include +#include #include #include #include diff --git a/arch/powerpc/kernel/isa-bridge.c b/arch/powerpc/kernel/isa-bridge.c index 4d5731b2429a..479752901ec6 100644 --- a/arch/powerpc/kernel/isa-bridge.c +++ b/arch/powerpc/kernel/isa-bridge.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c index b06bdae04064..35f27646c4ff 100644 --- a/arch/powerpc/kernel/kvm.c +++ b/arch/powerpc/kernel/kvm.c @@ -20,6 +20,7 @@ #include #include +#include #include #include #include diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 32656f105250..ff41553b74c6 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c index ab34046752bf..bcf4bf9e72d9 100644 --- a/arch/powerpc/kernel/pci_64.c +++ b/arch/powerpc/kernel/pci_64.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c index 478f8d78716b..4e69deb89b37 100644 --- a/arch/powerpc/kernel/pci_dn.c +++ b/arch/powerpc/kernel/pci_dn.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c index fe0a5ad6f73e..b37d0b5a796e 100644 --- a/arch/powerpc/kernel/pci_of_scan.c +++ b/arch/powerpc/kernel/pci_of_scan.c @@ -15,6 +15,7 @@ */ #include +#include #include #include diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index f68a34d16035..a459479995c6 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -16,6 +16,7 @@ #include #include +#include #include #include diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 4644c7986d80..0cdbc07cec14 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index d43120355eec..286f13d601cf 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -8,6 +8,7 @@ #include #include +#include #include #include #include diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 26b2872b3d00..8352db050192 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 6e595f6496d4..ad36ede469cc 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/powerpc/platforms/52xx/mpc52xx_common.c b/arch/powerpc/platforms/52xx/mpc52xx_common.c index 41f3a7eda1de..369fd5457a3f 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_common.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_common.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/powerpc/platforms/83xx/suspend.c b/arch/powerpc/platforms/83xx/suspend.c index 104faa8aa23c..edf66870d978 100644 --- a/arch/powerpc/platforms/83xx/suspend.c +++ b/arch/powerpc/platforms/83xx/suspend.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c index ac06903e136a..40a6e34793b4 100644 --- a/arch/powerpc/platforms/cell/axon_msi.c +++ b/arch/powerpc/platforms/cell/axon_msi.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/powerpc/platforms/cell/celleb_setup.c b/arch/powerpc/platforms/cell/celleb_setup.c index d58d9bae4b9b..1d5a4d8ddad9 100644 --- a/arch/powerpc/platforms/cell/celleb_setup.c +++ b/arch/powerpc/platforms/cell/celleb_setup.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/powerpc/platforms/cell/pmu.c b/arch/powerpc/platforms/cell/pmu.c index 69ed0d7f1646..1acf36010423 100644 --- a/arch/powerpc/platforms/cell/pmu.c +++ b/arch/powerpc/platforms/cell/pmu.c @@ -24,6 +24,7 @@ #include #include +#include #include #include #include diff --git a/arch/powerpc/platforms/cell/qpace_setup.c b/arch/powerpc/platforms/cell/qpace_setup.c index 51e290126bc1..7f9b6742f8b6 100644 --- a/arch/powerpc/platforms/cell/qpace_setup.c +++ b/arch/powerpc/platforms/cell/qpace_setup.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/powerpc/platforms/cell/setup.c b/arch/powerpc/platforms/cell/setup.c index c73cf4c43fc2..0fc9b7256126 100644 --- a/arch/powerpc/platforms/cell/setup.c +++ b/arch/powerpc/platforms/cell/setup.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/powerpc/platforms/iseries/mf.c b/arch/powerpc/platforms/iseries/mf.c index 62dabe3c2bfa..254c1fc3d8dd 100644 --- a/arch/powerpc/platforms/iseries/mf.c +++ b/arch/powerpc/platforms/iseries/mf.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/powerpc/platforms/iseries/setup.c b/arch/powerpc/platforms/iseries/setup.c index c25a0815c26b..ea0acbd8966d 100644 --- a/arch/powerpc/platforms/iseries/setup.c +++ b/arch/powerpc/platforms/iseries/setup.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/powerpc/platforms/maple/setup.c b/arch/powerpc/platforms/maple/setup.c index 5b3388b9f911..4c372047c94e 100644 --- a/arch/powerpc/platforms/maple/setup.c +++ b/arch/powerpc/platforms/maple/setup.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/powerpc/platforms/pasemi/setup.c b/arch/powerpc/platforms/pasemi/setup.c index 7c858e6f843c..6f3558210554 100644 --- a/arch/powerpc/platforms/pasemi/setup.c +++ b/arch/powerpc/platforms/pasemi/setup.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/powerpc/platforms/powermac/backlight.c b/arch/powerpc/platforms/powermac/backlight.c index c2f3e861f5ea..a00096b1c713 100644 --- a/arch/powerpc/platforms/powermac/backlight.c +++ b/arch/powerpc/platforms/powermac/backlight.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include diff --git a/arch/powerpc/platforms/powermac/feature.c b/arch/powerpc/platforms/powermac/feature.c index df423993f175..63d82bbc05e9 100644 --- a/arch/powerpc/platforms/powermac/feature.c +++ b/arch/powerpc/platforms/powermac/feature.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c index a028f08309d6..96580b189ec2 100644 --- a/arch/powerpc/platforms/powermac/setup.c +++ b/arch/powerpc/platforms/powermac/setup.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/powerpc/platforms/ps3/os-area.c b/arch/powerpc/platforms/ps3/os-area.c index 5b759b669598..56d26bc4fd41 100644 --- a/arch/powerpc/platforms/ps3/os-area.c +++ b/arch/powerpc/platforms/ps3/os-area.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/powerpc/platforms/ps3/setup.c b/arch/powerpc/platforms/ps3/setup.c index 149bea2ce583..e8ec1b2bfffd 100644 --- a/arch/powerpc/platforms/ps3/setup.c +++ b/arch/powerpc/platforms/ps3/setup.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include diff --git a/arch/powerpc/platforms/ps3/spu.c b/arch/powerpc/platforms/ps3/spu.c index 375a9f92158d..451fad1c92a8 100644 --- a/arch/powerpc/platforms/ps3/spu.c +++ b/arch/powerpc/platforms/ps3/spu.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c index ada6e07532ec..f1cb75e5441a 100644 --- a/arch/powerpc/platforms/pseries/eeh.c +++ b/arch/powerpc/platforms/pseries/eeh.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index c9a29dae8c05..27a49508b410 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/powerpc/sysdev/cpm_common.c b/arch/powerpc/sysdev/cpm_common.c index d55d0ad0deab..f8488a64771d 100644 --- a/arch/powerpc/sysdev/cpm_common.c +++ b/arch/powerpc/sysdev/cpm_common.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include diff --git a/arch/powerpc/sysdev/dcr.c b/arch/powerpc/sysdev/dcr.c index bb44aa9fd470..1bd0eba4d355 100644 --- a/arch/powerpc/sysdev/dcr.c +++ b/arch/powerpc/sysdev/dcr.c @@ -20,6 +20,7 @@ #undef DEBUG #include +#include #include #include diff --git a/arch/powerpc/sysdev/fsl_gtm.c b/arch/powerpc/sysdev/fsl_gtm.c index 7dd2885321ad..02cf1e7e77fc 100644 --- a/arch/powerpc/sysdev/fsl_gtm.c +++ b/arch/powerpc/sysdev/fsl_gtm.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #define GTCFR_STP(x) ((x) & 1 ? 1 << 5 : 1 << 1) diff --git a/arch/powerpc/sysdev/fsl_pmc.c b/arch/powerpc/sysdev/fsl_pmc.c index f122e8961d32..592a0f8d527a 100644 --- a/arch/powerpc/sysdev/fsl_pmc.c +++ b/arch/powerpc/sysdev/fsl_pmc.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/powerpc/sysdev/mpc5xxx_clocks.c b/arch/powerpc/sysdev/mpc5xxx_clocks.c index 34e12f9995fe..96f815a55dfd 100644 --- a/arch/powerpc/sysdev/mpc5xxx_clocks.c +++ b/arch/powerpc/sysdev/mpc5xxx_clocks.c @@ -8,6 +8,7 @@ #include #include +#include unsigned int mpc5xxx_get_bus_frequency(struct device_node *node) diff --git a/arch/powerpc/sysdev/qe_lib/gpio.c b/arch/powerpc/sysdev/qe_lib/gpio.c index 36bf845df127..e23f23cf9f5c 100644 --- a/arch/powerpc/sysdev/qe_lib/gpio.c +++ b/arch/powerpc/sysdev/qe_lib/gpio.c @@ -20,6 +20,7 @@ #include #include #include +#include #include struct qe_gpio_chip { diff --git a/arch/powerpc/sysdev/scom.c b/arch/powerpc/sysdev/scom.c index b2593ce30c9b..49a3ece1c6b3 100644 --- a/arch/powerpc/sysdev/scom.c +++ b/arch/powerpc/sysdev/scom.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include diff --git a/drivers/ps3/sys-manager-core.c b/drivers/ps3/sys-manager-core.c index 474225852b63..0e41737ea835 100644 --- a/drivers/ps3/sys-manager-core.c +++ b/drivers/ps3/sys-manager-core.c @@ -19,6 +19,7 @@ */ #include +#include #include #include -- cgit v1.2.3 From 930879488495e19178f8c63297fd4b9b4df9e9fc Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Fri, 29 Jul 2011 16:19:31 +1000 Subject: powerpc: include export.h for files using EXPORT_SYMBOL/THIS_MODULE Fix failures in powerpc associated with the previously allowed implicit module.h presence that now lead to things like this: arch/powerpc/mm/mmu_context_hash32.c:76:1: error: type defaults to 'int' in declaration of 'EXPORT_SYMBOL_GPL' arch/powerpc/mm/tlb_hash32.c:48:1: error: type defaults to 'int' in declaration of 'EXPORT_SYMBOL' arch/powerpc/kernel/pci_32.c:51:1: error: type defaults to 'int' in declaration of 'EXPORT_SYMBOL_GPL' arch/powerpc/kernel/iomap.c:36:1: error: type defaults to 'int' in declaration of 'EXPORT_SYMBOL' arch/powerpc/platforms/44x/canyonlands.c:126:1: error: type defaults to 'int' in declaration of 'EXPORT_SYMBOL' arch/powerpc/kvm/44x.c:168:59: error: 'THIS_MODULE' undeclared (first use in this function) [with several contibutions from Stephen Rothwell ] Signed-off-by: Paul Gortmaker --- arch/powerpc/include/asm/machdep.h | 1 + arch/powerpc/kernel/iomap.c | 1 + arch/powerpc/kernel/pci_32.c | 1 + arch/powerpc/kvm/44x.c | 1 + arch/powerpc/kvm/book3s_pr.c | 1 + arch/powerpc/mm/dma-noncoherent.c | 1 + arch/powerpc/mm/mmu_context_hash32.c | 1 + arch/powerpc/mm/tlb_hash32.c | 1 + arch/powerpc/mm/tlb_nohash.c | 1 + arch/powerpc/platforms/44x/warp.c | 1 + arch/powerpc/platforms/pseries/pci_dlpar.c | 1 + arch/powerpc/sysdev/ppc4xx_msi.c | 1 + arch/powerpc/sysdev/qe_lib/usb.c | 1 + 13 files changed, 13 insertions(+) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index 47cacddb14cf..7fa31cec41bc 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -12,6 +12,7 @@ #include #include #include +#include #include diff --git a/arch/powerpc/kernel/iomap.c b/arch/powerpc/kernel/iomap.c index b25f6325fc70..262791807397 100644 --- a/arch/powerpc/kernel/iomap.c +++ b/arch/powerpc/kernel/iomap.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c index bb154511db5e..fdd1a3d951dc 100644 --- a/arch/powerpc/kernel/pci_32.c +++ b/arch/powerpc/kernel/pci_32.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c index ca1f88b3dc59..7b612a76c701 100644 --- a/arch/powerpc/kvm/44x.c +++ b/arch/powerpc/kvm/44x.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index d417511abfb1..bc4d50dec78b 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -20,6 +20,7 @@ */ #include +#include #include #include diff --git a/arch/powerpc/mm/dma-noncoherent.c b/arch/powerpc/mm/dma-noncoherent.c index b42f76c4948d..329be36c0a8d 100644 --- a/arch/powerpc/mm/dma-noncoherent.c +++ b/arch/powerpc/mm/dma-noncoherent.c @@ -30,6 +30,7 @@ #include #include #include +#include #include diff --git a/arch/powerpc/mm/mmu_context_hash32.c b/arch/powerpc/mm/mmu_context_hash32.c index d0ee554e86e4..78fef6726e10 100644 --- a/arch/powerpc/mm/mmu_context_hash32.c +++ b/arch/powerpc/mm/mmu_context_hash32.c @@ -24,6 +24,7 @@ #include #include +#include #include #include diff --git a/arch/powerpc/mm/tlb_hash32.c b/arch/powerpc/mm/tlb_hash32.c index 9a445f64accd..558e30cce33e 100644 --- a/arch/powerpc/mm/tlb_hash32.c +++ b/arch/powerpc/mm/tlb_hash32.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c index d32ec643c231..0ff8ab5702a9 100644 --- a/arch/powerpc/mm/tlb_nohash.c +++ b/arch/powerpc/mm/tlb_nohash.c @@ -28,6 +28,7 @@ */ #include +#include #include #include #include diff --git a/arch/powerpc/platforms/44x/warp.c b/arch/powerpc/platforms/44x/warp.c index 8f771395f424..4cfa49901c02 100644 --- a/arch/powerpc/platforms/44x/warp.c +++ b/arch/powerpc/platforms/44x/warp.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c index 3bf4488aaec6..55d4ec1bd1ac 100644 --- a/arch/powerpc/platforms/pseries/pci_dlpar.c +++ b/arch/powerpc/platforms/pseries/pci_dlpar.c @@ -26,6 +26,7 @@ */ #include +#include #include #include #include diff --git a/arch/powerpc/sysdev/ppc4xx_msi.c b/arch/powerpc/sysdev/ppc4xx_msi.c index 367af0241851..1c2d7af17bbe 100644 --- a/arch/powerpc/sysdev/ppc4xx_msi.c +++ b/arch/powerpc/sysdev/ppc4xx_msi.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/powerpc/sysdev/qe_lib/usb.c b/arch/powerpc/sysdev/qe_lib/usb.c index 8105462078eb..9162828f5da7 100644 --- a/arch/powerpc/sysdev/qe_lib/usb.c +++ b/arch/powerpc/sysdev/qe_lib/usb.c @@ -15,6 +15,7 @@ #include #include +#include #include #include #include -- cgit v1.2.3 From 4b16f8e2d6d64249f0ed3ca7fe2a319d0dde2719 Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Fri, 22 Jul 2011 18:24:23 -0400 Subject: powerpc: various straight conversions from module.h --> export.h All these files were including module.h just for the basic EXPORT_SYMBOL infrastructure. We can shift them off to the export.h header which is a way smaller footprint and thus realize some compile time gains. Signed-off-by: Paul Gortmaker --- arch/powerpc/kernel/btext.c | 2 +- arch/powerpc/kernel/clock.c | 2 +- arch/powerpc/kernel/cputable.c | 2 +- arch/powerpc/kernel/init_task.c | 2 +- arch/powerpc/kernel/io.c | 2 +- arch/powerpc/kernel/irq.c | 2 +- arch/powerpc/kernel/of_platform.c | 2 +- arch/powerpc/kernel/paca.c | 2 +- arch/powerpc/kernel/pmc.c | 2 +- arch/powerpc/kernel/ppc_ksyms.c | 2 +- arch/powerpc/kernel/process.c | 2 +- arch/powerpc/kernel/prom.c | 2 +- arch/powerpc/kernel/rtas.c | 2 +- arch/powerpc/kernel/setup-common.c | 2 +- arch/powerpc/kernel/setup_64.c | 2 +- arch/powerpc/kernel/smp.c | 2 +- arch/powerpc/kernel/stacktrace.c | 2 +- arch/powerpc/kernel/sysfs.c | 2 +- arch/powerpc/kernel/time.c | 2 +- arch/powerpc/kernel/vio.c | 2 +- arch/powerpc/kvm/book3s_exports.c | 2 +- arch/powerpc/lib/checksum_wrappers_64.c | 2 +- arch/powerpc/lib/devres.c | 2 +- arch/powerpc/lib/locks.c | 2 +- arch/powerpc/lib/rheap.c | 2 +- arch/powerpc/mm/mem.c | 2 +- arch/powerpc/mm/mmu_context_hash64.c | 2 +- arch/powerpc/mm/numa.c | 2 +- arch/powerpc/mm/slice.c | 2 +- arch/powerpc/platforms/cell/beat.c | 2 +- arch/powerpc/platforms/cell/cbe_regs.c | 2 +- arch/powerpc/platforms/cell/interrupt.c | 2 +- arch/powerpc/platforms/cell/spu_callbacks.c | 2 +- arch/powerpc/platforms/cell/spu_fault.c | 2 +- arch/powerpc/platforms/cell/spu_manage.c | 2 +- arch/powerpc/platforms/cell/spufs/file.c | 2 +- arch/powerpc/platforms/cell/spufs/switch.c | 2 +- arch/powerpc/platforms/cell/spufs/syscalls.c | 2 +- arch/powerpc/platforms/iseries/hvlpconfig.c | 2 +- arch/powerpc/platforms/iseries/iommu.c | 2 +- arch/powerpc/platforms/iseries/ksyms.c | 2 +- arch/powerpc/platforms/iseries/lpevents.c | 2 +- arch/powerpc/platforms/iseries/vio.c | 2 +- arch/powerpc/platforms/iseries/viopath.c | 2 +- arch/powerpc/platforms/pasemi/dma_lib.c | 2 +- arch/powerpc/platforms/powermac/low_i2c.c | 2 +- arch/powerpc/platforms/powermac/nvram.c | 2 +- arch/powerpc/platforms/ps3/interrupt.c | 2 +- arch/powerpc/platforms/ps3/mm.c | 2 +- arch/powerpc/platforms/ps3/system-bus.c | 2 +- arch/powerpc/platforms/pseries/io_event_irq.c | 2 +- arch/powerpc/platforms/pseries/setup.c | 2 +- arch/powerpc/sysdev/bestcomm/sram.c | 2 +- arch/powerpc/sysdev/fsl_lbc.c | 2 +- arch/powerpc/sysdev/fsl_rio.c | 2 +- arch/powerpc/sysdev/fsl_soc.c | 2 +- arch/powerpc/sysdev/qe_lib/ucc.c | 2 +- arch/powerpc/sysdev/qe_lib/ucc_fast.c | 2 +- arch/powerpc/sysdev/qe_lib/ucc_slow.c | 2 +- arch/powerpc/sysdev/tsi108_dev.c | 2 +- arch/powerpc/xmon/xmon.c | 2 +- 61 files changed, 61 insertions(+), 61 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c index 60b3e377b1e4..ac8f52732fde 100644 --- a/arch/powerpc/kernel/btext.c +++ b/arch/powerpc/kernel/btext.c @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/powerpc/kernel/clock.c b/arch/powerpc/kernel/clock.c index ce668f545758..a764b47791e8 100644 --- a/arch/powerpc/kernel/clock.c +++ b/arch/powerpc/kernel/clock.c @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include struct clk_interface clk_functions; diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index fa44ff538861..edae5bb06f1f 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/powerpc/kernel/init_task.c b/arch/powerpc/kernel/init_task.c index 2375b7eb1c76..d076d465dbd1 100644 --- a/arch/powerpc/kernel/init_task.c +++ b/arch/powerpc/kernel/init_task.c @@ -1,5 +1,5 @@ #include -#include +#include #include #include #include diff --git a/arch/powerpc/kernel/io.c b/arch/powerpc/kernel/io.c index 8dc7547c2377..886381f32c3d 100644 --- a/arch/powerpc/kernel/io.c +++ b/arch/powerpc/kernel/io.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index d281fb6f12f3..5c3c46948d94 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -30,7 +30,7 @@ #undef DEBUG -#include +#include #include #include #include diff --git a/arch/powerpc/kernel/of_platform.c b/arch/powerpc/kernel/of_platform.c index 59dbf6abaaf3..e1612dfb4a93 100644 --- a/arch/powerpc/kernel/of_platform.c +++ b/arch/powerpc/kernel/of_platform.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index 0a5a899846bb..41456ff55e14 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -8,7 +8,7 @@ */ #include -#include +#include #include #include diff --git a/arch/powerpc/kernel/pmc.c b/arch/powerpc/kernel/pmc.c index 461499b43cff..a841a9d136a2 100644 --- a/arch/powerpc/kernel/pmc.c +++ b/arch/powerpc/kernel/pmc.c @@ -14,7 +14,7 @@ #include #include -#include +#include #include #include diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index f5ae872a2ef0..d3114a71dd32 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -1,4 +1,4 @@ -#include +#include #include #include #include diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 8f53954e75a3..9054ca9ab4f9 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 174e1e96175e..da1cb57571d7 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -27,7 +27,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index d5ca8236315c..517b1d8f455b 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index b1d738d12890..77bb77da05c1 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -12,7 +12,7 @@ #undef DEBUG -#include +#include #include #include #include diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index aebef1320ed7..dc6655594e9d 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -12,7 +12,7 @@ #undef DEBUG -#include +#include #include #include #include diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 7bf2187dfd99..37f4c9862310 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -18,7 +18,7 @@ #undef DEBUG #include -#include +#include #include #include #include diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c index b0dbb1daa4df..3d30ef1038e5 100644 --- a/arch/powerpc/kernel/stacktrace.c +++ b/arch/powerpc/kernel/stacktrace.c @@ -10,7 +10,7 @@ * 2 of the License, or (at your option) any later version. */ -#include +#include #include #include #include diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index f0f2199e64e1..ce035c1905f0 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 03b29a6759ab..522bb1dfc353 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -33,7 +33,7 @@ */ #include -#include +#include #include #include #include diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c index ee09398ed05b..b57e163c5e0e 100644 --- a/arch/powerpc/kernel/vio.c +++ b/arch/powerpc/kernel/vio.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/kvm/book3s_exports.c b/arch/powerpc/kvm/book3s_exports.c index f7f63a00ab1f..a150817d6d4c 100644 --- a/arch/powerpc/kvm/book3s_exports.c +++ b/arch/powerpc/kvm/book3s_exports.c @@ -17,7 +17,7 @@ * Authors: Alexander Graf */ -#include +#include #include #ifdef CONFIG_KVM_BOOK3S_64_HV diff --git a/arch/powerpc/lib/checksum_wrappers_64.c b/arch/powerpc/lib/checksum_wrappers_64.c index 769b817fbb32..08e3a3356c40 100644 --- a/arch/powerpc/lib/checksum_wrappers_64.c +++ b/arch/powerpc/lib/checksum_wrappers_64.c @@ -17,7 +17,7 @@ * * Author: Anton Blanchard */ -#include +#include #include #include #include diff --git a/arch/powerpc/lib/devres.c b/arch/powerpc/lib/devres.c index e91615abae66..8df55fc3aad6 100644 --- a/arch/powerpc/lib/devres.c +++ b/arch/powerpc/lib/devres.c @@ -10,7 +10,7 @@ #include /* devres_*(), devm_ioremap_release() */ #include #include /* ioremap_prot() */ -#include /* EXPORT_SYMBOL() */ +#include /* EXPORT_SYMBOL() */ /** * devm_ioremap_prot - Managed ioremap_prot() diff --git a/arch/powerpc/lib/locks.c b/arch/powerpc/lib/locks.c index 9b8182e82166..a6ebba56fdd4 100644 --- a/arch/powerpc/lib/locks.c +++ b/arch/powerpc/lib/locks.c @@ -14,7 +14,7 @@ #include #include -#include +#include #include #include diff --git a/arch/powerpc/lib/rheap.c b/arch/powerpc/lib/rheap.c index 45907c1dae66..a1060a868e69 100644 --- a/arch/powerpc/lib/rheap.c +++ b/arch/powerpc/lib/rheap.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index c781bbcf7338..507963ee9aa4 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -17,7 +17,7 @@ * */ -#include +#include #include #include #include diff --git a/arch/powerpc/mm/mmu_context_hash64.c b/arch/powerpc/mm/mmu_context_hash64.c index 3bafc3deca6d..f599f91255ce 100644 --- a/arch/powerpc/mm/mmu_context_hash64.c +++ b/arch/powerpc/mm/mmu_context_hash64.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 2164006fe170..1a7c44d8d669 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c index ba5194817f8a..73709f7ce92c 100644 --- a/arch/powerpc/mm/slice.c +++ b/arch/powerpc/mm/slice.c @@ -29,7 +29,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/platforms/cell/beat.c b/arch/powerpc/platforms/cell/beat.c index 48c690ea65da..232fc384e855 100644 --- a/arch/powerpc/platforms/cell/beat.c +++ b/arch/powerpc/platforms/cell/beat.c @@ -18,7 +18,7 @@ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ -#include +#include #include #include #include diff --git a/arch/powerpc/platforms/cell/cbe_regs.c b/arch/powerpc/platforms/cell/cbe_regs.c index f3917e7a5b44..1428d583c238 100644 --- a/arch/powerpc/platforms/cell/cbe_regs.c +++ b/arch/powerpc/platforms/cell/cbe_regs.c @@ -8,7 +8,7 @@ #include #include -#include +#include #include #include diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c index 3e4eba603e6b..96a433dd2d64 100644 --- a/arch/powerpc/platforms/cell/interrupt.c +++ b/arch/powerpc/platforms/cell/interrupt.c @@ -31,7 +31,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/platforms/cell/spu_callbacks.c b/arch/powerpc/platforms/cell/spu_callbacks.c index fec1495e6b12..75d613313f10 100644 --- a/arch/powerpc/platforms/cell/spu_callbacks.c +++ b/arch/powerpc/platforms/cell/spu_callbacks.c @@ -5,7 +5,7 @@ #undef DEBUG #include -#include +#include #include #include diff --git a/arch/powerpc/platforms/cell/spu_fault.c b/arch/powerpc/platforms/cell/spu_fault.c index d06ba87f1a19..641e7273d75a 100644 --- a/arch/powerpc/platforms/cell/spu_fault.c +++ b/arch/powerpc/platforms/cell/spu_fault.c @@ -22,7 +22,7 @@ */ #include #include -#include +#include #include #include diff --git a/arch/powerpc/platforms/cell/spu_manage.c b/arch/powerpc/platforms/cell/spu_manage.c index 4e5c91489c02..2bb6977c0a5a 100644 --- a/arch/powerpc/platforms/cell/spu_manage.c +++ b/arch/powerpc/platforms/cell/spu_manage.c @@ -21,7 +21,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c index fb59c46e9e9e..0cfece4cf6ef 100644 --- a/arch/powerpc/platforms/cell/spufs/file.c +++ b/arch/powerpc/platforms/cell/spufs/file.c @@ -24,7 +24,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/platforms/cell/spufs/switch.c b/arch/powerpc/platforms/cell/spufs/switch.c index 3df9a36eb2f5..dde35551e744 100644 --- a/arch/powerpc/platforms/cell/spufs/switch.c +++ b/arch/powerpc/platforms/cell/spufs/switch.c @@ -32,7 +32,7 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ -#include +#include #include #include #include diff --git a/arch/powerpc/platforms/cell/spufs/syscalls.c b/arch/powerpc/platforms/cell/spufs/syscalls.c index 609e016e92d0..71a5b5207266 100644 --- a/arch/powerpc/platforms/cell/spufs/syscalls.c +++ b/arch/powerpc/platforms/cell/spufs/syscalls.c @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/platforms/iseries/hvlpconfig.c b/arch/powerpc/platforms/iseries/hvlpconfig.c index f0475f0b1853..f62a0c5fa670 100644 --- a/arch/powerpc/platforms/iseries/hvlpconfig.c +++ b/arch/powerpc/platforms/iseries/hvlpconfig.c @@ -16,7 +16,7 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#include +#include #include #include "it_lp_naca.h" diff --git a/arch/powerpc/platforms/iseries/iommu.c b/arch/powerpc/platforms/iseries/iommu.c index d8b76335bd13..2f3d9110248c 100644 --- a/arch/powerpc/platforms/iseries/iommu.c +++ b/arch/powerpc/platforms/iseries/iommu.c @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/powerpc/platforms/iseries/ksyms.c b/arch/powerpc/platforms/iseries/ksyms.c index 2430848b98e7..997e234fb8b7 100644 --- a/arch/powerpc/platforms/iseries/ksyms.c +++ b/arch/powerpc/platforms/iseries/ksyms.c @@ -6,7 +6,7 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ -#include +#include #include #include diff --git a/arch/powerpc/platforms/iseries/lpevents.c b/arch/powerpc/platforms/iseries/lpevents.c index b0f8a857ec02..202e22798d30 100644 --- a/arch/powerpc/platforms/iseries/lpevents.c +++ b/arch/powerpc/platforms/iseries/lpevents.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/powerpc/platforms/iseries/vio.c b/arch/powerpc/platforms/iseries/vio.c index b6db7cef83b4..04be62d368a6 100644 --- a/arch/powerpc/platforms/iseries/vio.c +++ b/arch/powerpc/platforms/iseries/vio.c @@ -25,7 +25,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/powerpc/platforms/iseries/viopath.c b/arch/powerpc/platforms/iseries/viopath.c index 2376069cdc14..40dad0840eb3 100644 --- a/arch/powerpc/platforms/iseries/viopath.c +++ b/arch/powerpc/platforms/iseries/viopath.c @@ -27,7 +27,7 @@ * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * */ -#include +#include #include #include #include diff --git a/arch/powerpc/platforms/pasemi/dma_lib.c b/arch/powerpc/platforms/pasemi/dma_lib.c index d19907c2047f..f3defd8a2806 100644 --- a/arch/powerpc/platforms/pasemi/dma_lib.c +++ b/arch/powerpc/platforms/pasemi/dma_lib.c @@ -19,7 +19,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/platforms/powermac/low_i2c.c b/arch/powerpc/platforms/powermac/low_i2c.c index e9c8a607268e..996c5ff7824b 100644 --- a/arch/powerpc/platforms/powermac/low_i2c.c +++ b/arch/powerpc/platforms/powermac/low_i2c.c @@ -33,7 +33,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/platforms/powermac/nvram.c b/arch/powerpc/platforms/powermac/nvram.c index 695443bfdb08..54d227127c9f 100644 --- a/arch/powerpc/platforms/powermac/nvram.c +++ b/arch/powerpc/platforms/powermac/nvram.c @@ -8,7 +8,7 @@ * * Todo: - add support for the OF persistent properties */ -#include +#include #include #include #include diff --git a/arch/powerpc/platforms/ps3/interrupt.c b/arch/powerpc/platforms/ps3/interrupt.c index 600ed2c0ed59..404bc52b7806 100644 --- a/arch/powerpc/platforms/ps3/interrupt.c +++ b/arch/powerpc/platforms/ps3/interrupt.c @@ -19,7 +19,7 @@ */ #include -#include +#include #include #include diff --git a/arch/powerpc/platforms/ps3/mm.c b/arch/powerpc/platforms/ps3/mm.c index c2045880e674..72714ad27842 100644 --- a/arch/powerpc/platforms/ps3/mm.c +++ b/arch/powerpc/platforms/ps3/mm.c @@ -19,7 +19,7 @@ */ #include -#include +#include #include #include #include diff --git a/arch/powerpc/platforms/ps3/system-bus.c b/arch/powerpc/platforms/ps3/system-bus.c index 23083c397528..169c4ef18d09 100644 --- a/arch/powerpc/platforms/ps3/system-bus.c +++ b/arch/powerpc/platforms/ps3/system-bus.c @@ -20,7 +20,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/platforms/pseries/io_event_irq.c b/arch/powerpc/platforms/pseries/io_event_irq.c index 2c4dd1fb8333..1a709bc48ce1 100644 --- a/arch/powerpc/platforms/pseries/io_event_irq.c +++ b/arch/powerpc/platforms/pseries/io_event_irq.c @@ -9,7 +9,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 0969fd98c4fa..c3408ca8855e 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -34,7 +34,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/sysdev/bestcomm/sram.c b/arch/powerpc/sysdev/bestcomm/sram.c index 1225012a681a..b6db23e085fb 100644 --- a/arch/powerpc/sysdev/bestcomm/sram.c +++ b/arch/powerpc/sysdev/bestcomm/sram.c @@ -13,7 +13,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/sysdev/fsl_lbc.c b/arch/powerpc/sysdev/fsl_lbc.c index 3d106a4e6d8d..c4d96fa32ba5 100644 --- a/arch/powerpc/sysdev/fsl_lbc.c +++ b/arch/powerpc/sysdev/fsl_lbc.c @@ -15,7 +15,7 @@ */ #include -#include +#include #include #include #include diff --git a/arch/powerpc/sysdev/fsl_rio.c b/arch/powerpc/sysdev/fsl_rio.c index c65f75aa7ff7..8d9b9c42260c 100644 --- a/arch/powerpc/sysdev/fsl_rio.c +++ b/arch/powerpc/sysdev/fsl_rio.c @@ -23,7 +23,7 @@ */ #include -#include +#include #include #include #include diff --git a/arch/powerpc/sysdev/fsl_soc.c b/arch/powerpc/sysdev/fsl_soc.c index 2d66275e489f..e8f385fbf549 100644 --- a/arch/powerpc/sysdev/fsl_soc.c +++ b/arch/powerpc/sysdev/fsl_soc.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/sysdev/qe_lib/ucc.c b/arch/powerpc/sysdev/qe_lib/ucc.c index fa589b21dbcd..04677505f20f 100644 --- a/arch/powerpc/sysdev/qe_lib/ucc.c +++ b/arch/powerpc/sysdev/qe_lib/ucc.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/powerpc/sysdev/qe_lib/ucc_fast.c b/arch/powerpc/sysdev/qe_lib/ucc_fast.c index 25fbbfaa837d..fba02440d122 100644 --- a/arch/powerpc/sysdev/qe_lib/ucc_fast.c +++ b/arch/powerpc/sysdev/qe_lib/ucc_fast.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/powerpc/sysdev/qe_lib/ucc_slow.c b/arch/powerpc/sysdev/qe_lib/ucc_slow.c index e1d6a1340157..524c0ead941d 100644 --- a/arch/powerpc/sysdev/qe_lib/ucc_slow.c +++ b/arch/powerpc/sysdev/qe_lib/ucc_slow.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/powerpc/sysdev/tsi108_dev.c b/arch/powerpc/sysdev/tsi108_dev.c index 9f51f97abb5d..2370e1c63379 100644 --- a/arch/powerpc/sysdev/tsi108_dev.c +++ b/arch/powerpc/sysdev/tsi108_dev.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 42541bbcc7fa..0f5643fb9747 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include #include -- cgit v1.2.3 From 70b50f94f1644e2aa7cb374819cfd93f3c28d725 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Wed, 2 Nov 2011 13:36:59 -0700 Subject: mm: thp: tail page refcounting fix Michel while working on the working set estimation code, noticed that calling get_page_unless_zero() on a random pfn_to_page(random_pfn) wasn't safe, if the pfn ended up being a tail page of a transparent hugepage under splitting by __split_huge_page_refcount(). He then found the problem could also theoretically materialize with page_cache_get_speculative() during the speculative radix tree lookups that uses get_page_unless_zero() in SMP if the radix tree page is freed and reallocated and get_user_pages is called on it before page_cache_get_speculative has a chance to call get_page_unless_zero(). So the best way to fix the problem is to keep page_tail->_count zero at all times. This will guarantee that get_page_unless_zero() can never succeed on any tail page. page_tail->_mapcount is guaranteed zero and is unused for all tail pages of a compound page, so we can simply account the tail page references there and transfer them to tail_page->_count in __split_huge_page_refcount() (in addition to the head_page->_mapcount). While debugging this s/_count/_mapcount/ change I also noticed get_page is called by direct-io.c on pages returned by get_user_pages. That wasn't entirely safe because the two atomic_inc in get_page weren't atomic. As opposed to other get_user_page users like secondary-MMU page fault to establish the shadow pagetables would never call any superflous get_page after get_user_page returns. It's safer to make get_page universally safe for tail pages and to use get_page_foll() within follow_page (inside get_user_pages()). get_page_foll() is safe to do the refcounting for tail pages without taking any locks because it is run within PT lock protected critical sections (PT lock for pte and page_table_lock for pmd_trans_huge). The standard get_page() as invoked by direct-io instead will now take the compound_lock but still only for tail pages. The direct-io paths are usually I/O bound and the compound_lock is per THP so very finegrined, so there's no risk of scalability issues with it. A simple direct-io benchmarks with all lockdep prove locking and spinlock debugging infrastructure enabled shows identical performance and no overhead. So it's worth it. Ideally direct-io should stop calling get_page() on pages returned by get_user_pages(). The spinlock in get_page() is already optimized away for no-THP builds but doing get_page() on tail pages returned by GUP is generally a rare operation and usually only run in I/O paths. This new refcounting on page_tail->_mapcount in addition to avoiding new RCU critical sections will also allow the working set estimation code to work without any further complexity associated to the tail page refcounting with THP. Signed-off-by: Andrea Arcangeli Reported-by: Michel Lespinasse Reviewed-by: Michel Lespinasse Reviewed-by: Minchan Kim Cc: Peter Zijlstra Cc: Hugh Dickins Cc: Johannes Weiner Cc: Rik van Riel Cc: Mel Gorman Cc: KOSAKI Motohiro Cc: Benjamin Herrenschmidt Cc: David Gibson Cc: Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/mm/gup.c | 5 +-- arch/x86/mm/gup.c | 5 +-- include/linux/mm.h | 56 +++++++++++++------------------- include/linux/mm_types.h | 21 +++++++++--- mm/huge_memory.c | 37 ++++++++++++++------- mm/internal.h | 46 +++++++++++++++++++++++++++ mm/memory.c | 2 +- mm/swap.c | 83 +++++++++++++++++++++++++++++++----------------- 8 files changed, 171 insertions(+), 84 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/gup.c b/arch/powerpc/mm/gup.c index fec13200868f..b9e1c7ff5f6d 100644 --- a/arch/powerpc/mm/gup.c +++ b/arch/powerpc/mm/gup.c @@ -22,8 +22,9 @@ static inline void get_huge_page_tail(struct page *page) * __split_huge_page_refcount() cannot run * from under us. */ - VM_BUG_ON(atomic_read(&page->_count) < 0); - atomic_inc(&page->_count); + VM_BUG_ON(page_mapcount(page) < 0); + VM_BUG_ON(atomic_read(&page->_count) != 0); + atomic_inc(&page->_mapcount); } /* diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index dbe34b931374..3b5032a62b0f 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c @@ -114,8 +114,9 @@ static inline void get_huge_page_tail(struct page *page) * __split_huge_page_refcount() cannot run * from under us. */ - VM_BUG_ON(atomic_read(&page->_count) < 0); - atomic_inc(&page->_count); + VM_BUG_ON(page_mapcount(page) < 0); + VM_BUG_ON(atomic_read(&page->_count) != 0); + atomic_inc(&page->_mapcount); } static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr, diff --git a/include/linux/mm.h b/include/linux/mm.h index 3b3e3b8bb706..f81b7b41930c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -356,36 +356,39 @@ static inline struct page *compound_head(struct page *page) return page; } +/* + * The atomic page->_mapcount, starts from -1: so that transitions + * both from it and to it can be tracked, using atomic_inc_and_test + * and atomic_add_negative(-1). + */ +static inline void reset_page_mapcount(struct page *page) +{ + atomic_set(&(page)->_mapcount, -1); +} + +static inline int page_mapcount(struct page *page) +{ + return atomic_read(&(page)->_mapcount) + 1; +} + static inline int page_count(struct page *page) { return atomic_read(&compound_head(page)->_count); } +extern bool __get_page_tail(struct page *page); + static inline void get_page(struct page *page) { + if (unlikely(PageTail(page))) + if (likely(__get_page_tail(page))) + return; /* * Getting a normal page or the head of a compound page - * requires to already have an elevated page->_count. Only if - * we're getting a tail page, the elevated page->_count is - * required only in the head page, so for tail pages the - * bugcheck only verifies that the page->_count isn't - * negative. + * requires to already have an elevated page->_count. */ - VM_BUG_ON(atomic_read(&page->_count) < !PageTail(page)); + VM_BUG_ON(atomic_read(&page->_count) <= 0); atomic_inc(&page->_count); - /* - * Getting a tail page will elevate both the head and tail - * page->_count(s). - */ - if (unlikely(PageTail(page))) { - /* - * This is safe only because - * __split_huge_page_refcount can't run under - * get_page(). - */ - VM_BUG_ON(atomic_read(&page->first_page->_count) <= 0); - atomic_inc(&page->first_page->_count); - } } static inline struct page *virt_to_head_page(const void *x) @@ -803,21 +806,6 @@ static inline pgoff_t page_index(struct page *page) return page->index; } -/* - * The atomic page->_mapcount, like _count, starts from -1: - * so that transitions both from it and to it can be tracked, - * using atomic_inc_and_test and atomic_add_negative(-1). - */ -static inline void reset_page_mapcount(struct page *page) -{ - atomic_set(&(page)->_mapcount, -1); -} - -static inline int page_mapcount(struct page *page) -{ - return atomic_read(&(page)->_mapcount) + 1; -} - /* * Return true if this page is mapped into pagetables. */ diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 3e01a19a91e8..5b42f1b34eb7 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -62,10 +62,23 @@ struct page { struct { union { - atomic_t _mapcount; /* Count of ptes mapped in mms, - * to show when page is mapped - * & limit reverse map searches. - */ + /* + * Count of ptes mapped in + * mms, to show when page is + * mapped & limit reverse map + * searches. + * + * Used also for tail pages + * refcounting instead of + * _count. Tail pages cannot + * be mapped and keeping the + * tail page _count zero at + * all times guarantees + * get_page_unless_zero() will + * never succeed on tail + * pages. + */ + atomic_t _mapcount; struct { unsigned inuse:16; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 860ec211ddd6..4298abaae153 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -990,7 +990,7 @@ struct page *follow_trans_huge_pmd(struct mm_struct *mm, page += (addr & ~HPAGE_PMD_MASK) >> PAGE_SHIFT; VM_BUG_ON(!PageCompound(page)); if (flags & FOLL_GET) - get_page(page); + get_page_foll(page); out: return page; @@ -1202,6 +1202,7 @@ static void __split_huge_page_refcount(struct page *page) unsigned long head_index = page->index; struct zone *zone = page_zone(page); int zonestat; + int tail_count = 0; /* prevent PageLRU to go away from under us, and freeze lru stats */ spin_lock_irq(&zone->lru_lock); @@ -1210,11 +1211,27 @@ static void __split_huge_page_refcount(struct page *page) for (i = 1; i < HPAGE_PMD_NR; i++) { struct page *page_tail = page + i; - /* tail_page->_count cannot change */ - atomic_sub(atomic_read(&page_tail->_count), &page->_count); - BUG_ON(page_count(page) <= 0); - atomic_add(page_mapcount(page) + 1, &page_tail->_count); - BUG_ON(atomic_read(&page_tail->_count) <= 0); + /* tail_page->_mapcount cannot change */ + BUG_ON(page_mapcount(page_tail) < 0); + tail_count += page_mapcount(page_tail); + /* check for overflow */ + BUG_ON(tail_count < 0); + BUG_ON(atomic_read(&page_tail->_count) != 0); + /* + * tail_page->_count is zero and not changing from + * under us. But get_page_unless_zero() may be running + * from under us on the tail_page. If we used + * atomic_set() below instead of atomic_add(), we + * would then run atomic_set() concurrently with + * get_page_unless_zero(), and atomic_set() is + * implemented in C not using locked ops. spin_unlock + * on x86 sometime uses locked ops because of PPro + * errata 66, 92, so unless somebody can guarantee + * atomic_set() here would be safe on all archs (and + * not only on x86), it's safer to use atomic_add(). + */ + atomic_add(page_mapcount(page) + page_mapcount(page_tail) + 1, + &page_tail->_count); /* after clearing PageTail the gup refcount can be released */ smp_mb(); @@ -1232,10 +1249,7 @@ static void __split_huge_page_refcount(struct page *page) (1L << PG_uptodate))); page_tail->flags |= (1L << PG_dirty); - /* - * 1) clear PageTail before overwriting first_page - * 2) clear PageTail before clearing PageHead for VM_BUG_ON - */ + /* clear PageTail before overwriting first_page */ smp_wmb(); /* @@ -1252,7 +1266,6 @@ static void __split_huge_page_refcount(struct page *page) * status is achieved setting a reserved bit in the * pmd, not by clearing the present bit. */ - BUG_ON(page_mapcount(page_tail)); page_tail->_mapcount = page->_mapcount; BUG_ON(page_tail->mapping); @@ -1269,6 +1282,8 @@ static void __split_huge_page_refcount(struct page *page) lru_add_page_tail(zone, page, page_tail); } + atomic_sub(tail_count, &page->_count); + BUG_ON(atomic_read(&page->_count) <= 0); __dec_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES); __mod_zone_page_state(zone, NR_ANON_PAGES, HPAGE_PMD_NR); diff --git a/mm/internal.h b/mm/internal.h index d071d380fb49..2189af491783 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -37,6 +37,52 @@ static inline void __put_page(struct page *page) atomic_dec(&page->_count); } +static inline void __get_page_tail_foll(struct page *page, + bool get_page_head) +{ + /* + * If we're getting a tail page, the elevated page->_count is + * required only in the head page and we will elevate the head + * page->_count and tail page->_mapcount. + * + * We elevate page_tail->_mapcount for tail pages to force + * page_tail->_count to be zero at all times to avoid getting + * false positives from get_page_unless_zero() with + * speculative page access (like in + * page_cache_get_speculative()) on tail pages. + */ + VM_BUG_ON(atomic_read(&page->first_page->_count) <= 0); + VM_BUG_ON(atomic_read(&page->_count) != 0); + VM_BUG_ON(page_mapcount(page) < 0); + if (get_page_head) + atomic_inc(&page->first_page->_count); + atomic_inc(&page->_mapcount); +} + +/* + * This is meant to be called as the FOLL_GET operation of + * follow_page() and it must be called while holding the proper PT + * lock while the pte (or pmd_trans_huge) is still mapping the page. + */ +static inline void get_page_foll(struct page *page) +{ + if (unlikely(PageTail(page))) + /* + * This is safe only because + * __split_huge_page_refcount() can't run under + * get_page_foll() because we hold the proper PT lock. + */ + __get_page_tail_foll(page, true); + else { + /* + * Getting a normal page or the head of a compound page + * requires to already have an elevated page->_count. + */ + VM_BUG_ON(atomic_read(&page->_count) <= 0); + atomic_inc(&page->_count); + } +} + extern unsigned long highest_memmap_pfn; /* diff --git a/mm/memory.c b/mm/memory.c index a56e3ba816b2..b2b87315cdc6 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1503,7 +1503,7 @@ split_fallthrough: } if (flags & FOLL_GET) - get_page(page); + get_page_foll(page); if (flags & FOLL_TOUCH) { if ((flags & FOLL_WRITE) && !pte_dirty(pte) && !PageDirty(page)) diff --git a/mm/swap.c b/mm/swap.c index 3a442f18b0b3..87627f181c3f 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -78,39 +78,22 @@ static void put_compound_page(struct page *page) { if (unlikely(PageTail(page))) { /* __split_huge_page_refcount can run under us */ - struct page *page_head = page->first_page; - smp_rmb(); - /* - * If PageTail is still set after smp_rmb() we can be sure - * that the page->first_page we read wasn't a dangling pointer. - * See __split_huge_page_refcount() smp_wmb(). - */ - if (likely(PageTail(page) && get_page_unless_zero(page_head))) { + struct page *page_head = compound_trans_head(page); + + if (likely(page != page_head && + get_page_unless_zero(page_head))) { unsigned long flags; /* - * Verify that our page_head wasn't converted - * to a a regular page before we got a - * reference on it. + * page_head wasn't a dangling pointer but it + * may not be a head page anymore by the time + * we obtain the lock. That is ok as long as it + * can't be freed from under us. */ - if (unlikely(!PageHead(page_head))) { - /* PageHead is cleared after PageTail */ - smp_rmb(); - VM_BUG_ON(PageTail(page)); - goto out_put_head; - } - /* - * Only run compound_lock on a valid PageHead, - * after having it pinned with - * get_page_unless_zero() above. - */ - smp_mb(); - /* page_head wasn't a dangling pointer */ flags = compound_lock_irqsave(page_head); if (unlikely(!PageTail(page))) { /* __split_huge_page_refcount run before us */ compound_unlock_irqrestore(page_head, flags); VM_BUG_ON(PageHead(page_head)); - out_put_head: if (put_page_testzero(page_head)) __put_single_page(page_head); out_put_single: @@ -121,16 +104,17 @@ static void put_compound_page(struct page *page) VM_BUG_ON(page_head != page->first_page); /* * We can release the refcount taken by - * get_page_unless_zero now that - * split_huge_page_refcount is blocked on the - * compound_lock. + * get_page_unless_zero() now that + * __split_huge_page_refcount() is blocked on + * the compound_lock. */ if (put_page_testzero(page_head)) VM_BUG_ON(1); /* __split_huge_page_refcount will wait now */ - VM_BUG_ON(atomic_read(&page->_count) <= 0); - atomic_dec(&page->_count); + VM_BUG_ON(page_mapcount(page) <= 0); + atomic_dec(&page->_mapcount); VM_BUG_ON(atomic_read(&page_head->_count) <= 0); + VM_BUG_ON(atomic_read(&page->_count) != 0); compound_unlock_irqrestore(page_head, flags); if (put_page_testzero(page_head)) { if (PageHead(page_head)) @@ -160,6 +144,45 @@ void put_page(struct page *page) } EXPORT_SYMBOL(put_page); +/* + * This function is exported but must not be called by anything other + * than get_page(). It implements the slow path of get_page(). + */ +bool __get_page_tail(struct page *page) +{ + /* + * This takes care of get_page() if run on a tail page + * returned by one of the get_user_pages/follow_page variants. + * get_user_pages/follow_page itself doesn't need the compound + * lock because it runs __get_page_tail_foll() under the + * proper PT lock that already serializes against + * split_huge_page(). + */ + unsigned long flags; + bool got = false; + struct page *page_head = compound_trans_head(page); + + if (likely(page != page_head && get_page_unless_zero(page_head))) { + /* + * page_head wasn't a dangling pointer but it + * may not be a head page anymore by the time + * we obtain the lock. That is ok as long as it + * can't be freed from under us. + */ + flags = compound_lock_irqsave(page_head); + /* here __split_huge_page_refcount won't run anymore */ + if (likely(PageTail(page))) { + __get_page_tail_foll(page, false); + got = true; + } + compound_unlock_irqrestore(page_head, flags); + if (unlikely(!got)) + put_page(page_head); + } + return got; +} +EXPORT_SYMBOL(__get_page_tail); + /** * put_pages_list() - release a list of pages * @pages: list of pages threaded on page->lru -- cgit v1.2.3 From 2839bdc1bfc0af76a2f0f11eca011590520a04fa Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Wed, 2 Nov 2011 13:37:03 -0700 Subject: powerpc: remove superfluous PageTail checks on the pte gup_fast This part of gup_fast doesn't seem capable of handling hugetlbfs ptes, those should be handled by gup_hugepd only, so these checks are superfluous. Plus if this wasn't a noop, it would have oopsed because, the insistence of using the speculative refcounting would trigger a VM_BUG_ON if a tail page was encountered in the page_cache_get_speculative(). Signed-off-by: Andrea Arcangeli Cc: Peter Zijlstra Cc: Hugh Dickins Cc: Johannes Weiner Cc: Rik van Riel Cc: Mel Gorman Cc: KOSAKI Motohiro Cc: Benjamin Herrenschmidt Acked-by: David Gibson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/mm/gup.c | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/gup.c b/arch/powerpc/mm/gup.c index b9e1c7ff5f6d..d7efdbf640c7 100644 --- a/arch/powerpc/mm/gup.c +++ b/arch/powerpc/mm/gup.c @@ -16,17 +16,6 @@ #ifdef __HAVE_ARCH_PTE_SPECIAL -static inline void get_huge_page_tail(struct page *page) -{ - /* - * __split_huge_page_refcount() cannot run - * from under us. - */ - VM_BUG_ON(page_mapcount(page) < 0); - VM_BUG_ON(atomic_read(&page->_count) != 0); - atomic_inc(&page->_mapcount); -} - /* * The performance critical leaf functions are made noinline otherwise gcc * inlines everything into a single function which results in too much @@ -58,8 +47,6 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, put_page(page); return 0; } - if (PageTail(page)) - get_huge_page_tail(page); pages[*nr] = page; (*nr)++; -- cgit v1.2.3 From 405e44f2e312dd5dd63e5a9f459bffcbcd4368ef Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Wed, 2 Nov 2011 13:37:08 -0700 Subject: powerpc: get_hugepte() don't put_page() the wrong page "page" may have changed to point to the next hugepage after the loop completed, The references have been taken on the head page, so the put_page must happen there too. This is a longstanding issue pre-thp inclusion. It's totally unclear how these page_cache_add_speculative and pte_val(pte) != pte_val(*ptep) checks are necessary across all the powerpc gup_fast code, when x86 doesn't need any of that: there's no way the page can be freed with irq disabled so we're guaranteed the atomic_inc will happen on a page with page_count > 0 (so not needing the speculative check). The pte check is also meaningless on x86: no need to rollback on x86 if the pte changed, because the pte can still change a CPU tick after the check succeeded and it won't be rolled back in that case. The important thing is we got a reference on a valid page that was mapped there a CPU tick ago. So not knowing the soft tlb refill code of ppc64 in great detail I'm not removing the "speculative" page_count increase and the pte checks across all the code, but unless there's a strong reason for it they should be later cleaned up too. If a pte can change from huge to non-huge (like it could happen with THP) passing a pte_t *ptep to gup_hugepte() would also require to repeat the is_hugepd in gup_hugepte(), but that shouldn't happen with hugetlbfs only so I'm not altering that. Signed-off-by: Andrea Arcangeli Cc: Peter Zijlstra Cc: Hugh Dickins Cc: Johannes Weiner Cc: Rik van Riel Cc: Mel Gorman Cc: KOSAKI Motohiro Cc: Benjamin Herrenschmidt Acked-by: David Gibson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/mm/hugetlbpage.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 0b9a5c1901b9..b649c288af90 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -429,7 +429,7 @@ static noinline int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long add if (unlikely(pte_val(pte) != pte_val(*ptep))) { /* Could be optimized better */ while (*nr) { - put_page(page); + put_page(head); (*nr)--; } } -- cgit v1.2.3 From 8596468487e2062cae2aad56e973784e03959245 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Wed, 2 Nov 2011 13:37:11 -0700 Subject: powerpc: gup_hugepte() avoid freeing the head page too many times We only taken "refs" pins on the head page not "*nr" pins. Signed-off-by: Andrea Arcangeli Cc: Peter Zijlstra Cc: Hugh Dickins Cc: Johannes Weiner Cc: Rik van Riel Cc: Mel Gorman Cc: KOSAKI Motohiro Cc: Benjamin Herrenschmidt Acked-by: David Gibson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/mm/hugetlbpage.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index b649c288af90..78b14abded65 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -428,10 +428,9 @@ static noinline int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long add if (unlikely(pte_val(pte) != pte_val(*ptep))) { /* Could be optimized better */ - while (*nr) { + *nr -= refs; + while (refs--) put_page(head); - (*nr)--; - } } return 1; -- cgit v1.2.3 From 3526741f0964c88bc2ce511e1078359052bf225b Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Wed, 2 Nov 2011 13:37:15 -0700 Subject: powerpc: gup_hugepte() support THP based tail recounting Up to this point the code assumed old refcounting for hugepages (pre-thp). This updates the code directly to the thp mapcount tail page refcounting. Signed-off-by: Andrea Arcangeli Cc: Peter Zijlstra Cc: Hugh Dickins Cc: Johannes Weiner Cc: Rik van Riel Cc: Mel Gorman Cc: KOSAKI Motohiro Cc: Benjamin Herrenschmidt Cc: David Gibson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/mm/hugetlbpage.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 78b14abded65..a618ef01bfad 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -385,12 +385,23 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address, return NULL; } +static inline void get_huge_page_tail(struct page *page) +{ + /* + * __split_huge_page_refcount() cannot run + * from under us. + */ + VM_BUG_ON(page_mapcount(page) < 0); + VM_BUG_ON(atomic_read(&page->_count) != 0); + atomic_inc(&page->_mapcount); +} + static noinline int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, unsigned long end, int write, struct page **pages, int *nr) { unsigned long mask; unsigned long pte_end; - struct page *head, *page; + struct page *head, *page, *tail; pte_t pte; int refs; @@ -413,6 +424,7 @@ static noinline int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long add head = pte_page(pte); page = head + ((addr & (sz-1)) >> PAGE_SHIFT); + tail = page; do { VM_BUG_ON(compound_head(page) != head); pages[*nr] = page; @@ -431,6 +443,16 @@ static noinline int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long add *nr -= refs; while (refs--) put_page(head); + } else { + /* + * Any tail page need their mapcount reference taken + * before we return. + */ + while (refs--) { + if (PageTail(tail)) + get_huge_page_tail(tail); + tail++; + } } return 1; -- cgit v1.2.3 From cf592bf768c4fa40282b8fce58a80820065de2cb Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Wed, 2 Nov 2011 13:37:19 -0700 Subject: powerpc: gup_huge_pmd() return 0 if pte changes powerpc didn't return 0 in that case, if it's rolling back the *nr pointer it should also return zero to avoid adding pages to the array at the wrong offset. Signed-off-by: Andrea Arcangeli Cc: Peter Zijlstra Cc: Hugh Dickins Cc: Johannes Weiner Cc: Rik van Riel Cc: Mel Gorman Cc: KOSAKI Motohiro Cc: Benjamin Herrenschmidt Acked-by: David Gibson Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: David Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/mm/hugetlbpage.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index a618ef01bfad..1c59d94f5942 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -443,16 +443,17 @@ static noinline int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long add *nr -= refs; while (refs--) put_page(head); - } else { - /* - * Any tail page need their mapcount reference taken - * before we return. - */ - while (refs--) { - if (PageTail(tail)) - get_huge_page_tail(tail); - tail++; - } + return 0; + } + + /* + * Any tail page need their mapcount reference taken before we + * return. + */ + while (refs--) { + if (PageTail(tail)) + get_huge_page_tail(tail); + tail++; } return 1; -- cgit v1.2.3 From b35a35b556f5e6b7993ad0baf20173e75c09ce8c Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Wed, 2 Nov 2011 13:37:36 -0700 Subject: thp: share get_huge_page_tail() This avoids duplicating the function in every arch gup_fast. Signed-off-by: Andrea Arcangeli Cc: Peter Zijlstra Cc: Hugh Dickins Cc: Johannes Weiner Cc: Rik van Riel Cc: Mel Gorman Cc: KOSAKI Motohiro Cc: Benjamin Herrenschmidt Cc: David Gibson Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: David Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/mm/hugetlbpage.c | 11 ----------- arch/s390/mm/gup.c | 11 ----------- arch/sparc/mm/gup.c | 11 ----------- arch/x86/mm/gup.c | 11 ----------- include/linux/mm.h | 11 +++++++++++ 5 files changed, 11 insertions(+), 44 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 1c59d94f5942..da5eb3885702 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -385,17 +385,6 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address, return NULL; } -static inline void get_huge_page_tail(struct page *page) -{ - /* - * __split_huge_page_refcount() cannot run - * from under us. - */ - VM_BUG_ON(page_mapcount(page) < 0); - VM_BUG_ON(atomic_read(&page->_count) != 0); - atomic_inc(&page->_mapcount); -} - static noinline int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, unsigned long end, int write, struct page **pages, int *nr) { diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c index da33a0281d9d..65cb06e2af4e 100644 --- a/arch/s390/mm/gup.c +++ b/arch/s390/mm/gup.c @@ -48,17 +48,6 @@ static inline int gup_pte_range(pmd_t *pmdp, pmd_t pmd, unsigned long addr, return 1; } -static inline void get_huge_page_tail(struct page *page) -{ - /* - * __split_huge_page_refcount() cannot run - * from under us. - */ - VM_BUG_ON(page_mapcount(page) < 0); - VM_BUG_ON(atomic_read(&page->_count) != 0); - atomic_inc(&page->_mapcount); -} - static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr, unsigned long end, int write, struct page **pages, int *nr) { diff --git a/arch/sparc/mm/gup.c b/arch/sparc/mm/gup.c index afcebac144fb..42c55df3aec3 100644 --- a/arch/sparc/mm/gup.c +++ b/arch/sparc/mm/gup.c @@ -12,17 +12,6 @@ #include #include -static inline void get_huge_page_tail(struct page *page) -{ - /* - * __split_huge_page_refcount() cannot run - * from under us. - */ - VM_BUG_ON(page_mapcount(page) < 0); - VM_BUG_ON(atomic_read(&page->_count) != 0); - atomic_inc(&page->_mapcount); -} - /* * The performance critical leaf functions are made noinline otherwise gcc * inlines everything into a single function which results in too much diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index 3b5032a62b0f..ea305856151c 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c @@ -108,17 +108,6 @@ static inline void get_head_page_multiple(struct page *page, int nr) SetPageReferenced(page); } -static inline void get_huge_page_tail(struct page *page) -{ - /* - * __split_huge_page_refcount() cannot run - * from under us. - */ - VM_BUG_ON(page_mapcount(page) < 0); - VM_BUG_ON(atomic_read(&page->_count) != 0); - atomic_inc(&page->_mapcount); -} - static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr, unsigned long end, int write, struct page **pages, int *nr) { diff --git a/include/linux/mm.h b/include/linux/mm.h index f81b7b41930c..3dc3a8c2c485 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -376,6 +376,17 @@ static inline int page_count(struct page *page) return atomic_read(&compound_head(page)->_count); } +static inline void get_huge_page_tail(struct page *page) +{ + /* + * __split_huge_page_refcount() cannot run + * from under us. + */ + VM_BUG_ON(page_mapcount(page) < 0); + VM_BUG_ON(atomic_read(&page->_count) != 0); + atomic_inc(&page->_mapcount); +} + extern bool __get_page_tail(struct page *page); static inline void get_page(struct page *page) -- cgit v1.2.3 From c40dd2f76644016ca7677545fc846ec2470d70a1 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 2 Nov 2011 14:56:12 +0000 Subject: powerpc: Add System RAM to /proc/iomem We've resisted adding System RAM to /proc/iomem because it is the wrong place for it. Unfortunately we continue to find tools that rely on this behaviour so give up and add it in. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/mm/mem.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 16da595ff402..2dd6bdd31fe1 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include @@ -555,3 +556,32 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, book3e_hugetlb_preload(vma->vm_mm, address, *ptep); #endif } + +/* + * System memory should not be in /proc/iomem but various tools expect it + * (eg kdump). + */ +static int add_system_ram_resources(void) +{ + struct memblock_region *reg; + + for_each_memblock(memory, reg) { + struct resource *res; + unsigned long base = reg->base; + unsigned long size = reg->size; + + res = kzalloc(sizeof(struct resource), GFP_KERNEL); + WARN_ON(!res); + + if (res) { + res->name = "System RAM"; + res->start = base; + res->end = base + size - 1; + res->flags = IORESOURCE_MEM; + WARN_ON(request_resource(&iomem_resource, res) < 0); + } + } + + return 0; +} +subsys_initcall(add_system_ram_resources); -- cgit v1.2.3 From 1c8ee73395af762726e9eb628636d3b763618c60 Mon Sep 17 00:00:00 2001 From: Dipankar Sarma Date: Fri, 28 Oct 2011 04:25:32 +0000 Subject: powerpc/numa: NUMA topology support for PowerNV This patch adds support for numa topology on powernv platforms running OPAL formware. It checks for the type of platform at run time and sets the affinity form correctly so that NUMA topology can be discovered correctly. Signed-off-by: Dipankar Sarma Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/mm/numa.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index c7dd4dec4df8..b22a83a91cb8 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -315,7 +315,10 @@ static int __init find_min_common_depth(void) struct device_node *root; const char *vec5; - root = of_find_node_by_path("/rtas"); + if (firmware_has_feature(FW_FEATURE_OPAL)) + root = of_find_node_by_path("/ibm,opal"); + else + root = of_find_node_by_path("/rtas"); if (!root) root = of_find_node_by_path("/"); @@ -344,12 +347,19 @@ static int __init find_min_common_depth(void) #define VEC5_AFFINITY_BYTE 5 #define VEC5_AFFINITY 0x80 - chosen = of_find_node_by_path("/chosen"); - if (chosen) { - vec5 = of_get_property(chosen, "ibm,architecture-vec-5", NULL); - if (vec5 && (vec5[VEC5_AFFINITY_BYTE] & VEC5_AFFINITY)) { - dbg("Using form 1 affinity\n"); - form1_affinity = 1; + + if (firmware_has_feature(FW_FEATURE_OPAL)) + form1_affinity = 1; + else { + chosen = of_find_node_by_path("/chosen"); + if (chosen) { + vec5 = of_get_property(chosen, + "ibm,architecture-vec-5", NULL); + if (vec5 && (vec5[VEC5_AFFINITY_BYTE] & + VEC5_AFFINITY)) { + dbg("Using form 1 affinity\n"); + form1_affinity = 1; + } } } -- cgit v1.2.3 From 13020be8be2d5843c3810169cdbcc51f07ff020f Mon Sep 17 00:00:00 2001 From: Kumar Gala Date: Thu, 24 Nov 2011 09:40:07 +0000 Subject: powerpc: Fix compiliation with hugetlbfs enabled arch/powerpc/mm/hugetlbpage.c: In function 'reserve_hugetlb_gpages': arch/powerpc/mm/hugetlbpage.c:312:2: error: implicit declaration of function 'parse_args' Signed-off-by: Kumar Gala Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/mm/hugetlbpage.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 5964371303ac..8558b572e55d 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3