diff options
Diffstat (limited to 'arch/powerpc/kernel')
41 files changed, 735 insertions, 290 deletions
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 83afacd3ba7b..bb282dd81612 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -128,6 +128,7 @@ ifneq ($(CONFIG_XMON)$(CONFIG_KEXEC),) obj-y += ppc_save_regs.o endif +obj-$(CONFIG_EPAPR_PARAVIRT) += epapr_paravirt.o epapr_hcalls.o obj-$(CONFIG_KVM_GUEST) += kvm.o kvm_emul.o # Disable GCOV in odd or sensitive code diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 52c7ad78242e..85b05c463fae 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -533,6 +533,7 @@ int main(void) HSTATE_FIELD(HSTATE_VMHANDLER, vmhandler); HSTATE_FIELD(HSTATE_SCRATCH0, scratch0); HSTATE_FIELD(HSTATE_SCRATCH1, scratch1); + HSTATE_FIELD(HSTATE_SPRG3, sprg3); HSTATE_FIELD(HSTATE_IN_GUEST, in_guest); HSTATE_FIELD(HSTATE_RESTORE_HID5, restore_hid5); HSTATE_FIELD(HSTATE_NAPPING, napping); diff --git a/arch/powerpc/kernel/cpu_setup_a2.S b/arch/powerpc/kernel/cpu_setup_a2.S index ebc62f42a237..61f079e05b61 100644 --- a/arch/powerpc/kernel/cpu_setup_a2.S +++ b/arch/powerpc/kernel/cpu_setup_a2.S @@ -100,19 +100,19 @@ _icswx_skip_guest: lis r4,(MMUCR0_TLBSEL_I|MMUCR0_ECL)@h mtspr SPRN_MMUCR0, r4 li r4,A2_IERAT_SIZE-1 - PPC_ERATWE(r4,r4,3) + PPC_ERATWE(R4,R4,3) /* Now set the D-ERAT watermark to 31 */ lis r4,(MMUCR0_TLBSEL_D|MMUCR0_ECL)@h mtspr SPRN_MMUCR0, r4 li r4,A2_DERAT_SIZE-1 - PPC_ERATWE(r4,r4,3) + PPC_ERATWE(R4,R4,3) /* And invalidate the beast just in case. That won't get rid of * a bolted entry though it will be in LRU and so will go away eventually * but let's not bother for now */ - PPC_ERATILX(0,0,0) + PPC_ERATILX(0,0,R0) 1: blr diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c index bcfdcd22c766..2d7bb8ced136 100644 --- a/arch/powerpc/kernel/dma-iommu.c +++ b/arch/powerpc/kernel/dma-iommu.c @@ -109,6 +109,7 @@ static u64 dma_iommu_get_required_mask(struct device *dev) struct dma_map_ops dma_iommu_ops = { .alloc = dma_iommu_alloc_coherent, .free = dma_iommu_free_coherent, + .mmap = dma_direct_mmap_coherent, .map_sg = dma_iommu_map_sg, .unmap_sg = dma_iommu_unmap_sg, .dma_supported = dma_iommu_dma_supported, diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c index 4ab88dafb235..46943651da23 100644 --- a/arch/powerpc/kernel/dma-swiotlb.c +++ b/arch/powerpc/kernel/dma-swiotlb.c @@ -49,6 +49,7 @@ static u64 swiotlb_powerpc_get_required(struct device *dev) struct dma_map_ops swiotlb_dma_ops = { .alloc = dma_direct_alloc_coherent, .free = dma_direct_free_coherent, + .mmap = dma_direct_mmap_coherent, .map_sg = swiotlb_map_sg_attrs, .unmap_sg = swiotlb_unmap_sg_attrs, .dma_supported = swiotlb_dma_supported, diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c index b1ec983dcec8..355b9d84b0f8 100644 --- a/arch/powerpc/kernel/dma.c +++ b/arch/powerpc/kernel/dma.c @@ -11,6 +11,8 @@ #include <linux/gfp.h> #include <linux/memblock.h> #include <linux/export.h> +#include <linux/pci.h> +#include <asm/vio.h> #include <asm/bug.h> #include <asm/abs_addr.h> #include <asm/machdep.h> @@ -65,6 +67,24 @@ void dma_direct_free_coherent(struct device *dev, size_t size, #endif } +int dma_direct_mmap_coherent(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t handle, size_t size, + struct dma_attrs *attrs) +{ + unsigned long pfn; + +#ifdef CONFIG_NOT_COHERENT_CACHE + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + pfn = __dma_get_coherent_pfn((unsigned long)cpu_addr); +#else + pfn = page_to_pfn(virt_to_page(cpu_addr)); +#endif + return remap_pfn_range(vma, vma->vm_start, + pfn + vma->vm_pgoff, + vma->vm_end - vma->vm_start, + vma->vm_page_prot); +} + static int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction direction, struct dma_attrs *attrs) @@ -154,6 +174,7 @@ static inline void dma_direct_sync_single(struct device *dev, struct dma_map_ops dma_direct_ops = { .alloc = dma_direct_alloc_coherent, .free = dma_direct_free_coherent, + .mmap = dma_direct_mmap_coherent, .map_sg = dma_direct_map_sg, .unmap_sg = dma_direct_unmap_sg, .dma_supported = dma_direct_dma_supported, @@ -205,26 +226,15 @@ EXPORT_SYMBOL_GPL(dma_get_required_mask); static int __init dma_init(void) { - dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); + dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); +#ifdef CONFIG_PCI + dma_debug_add_bus(&pci_bus_type); +#endif +#ifdef CONFIG_IBMVIO + dma_debug_add_bus(&vio_bus_type); +#endif return 0; } fs_initcall(dma_init); -int dma_mmap_coherent(struct device *dev, struct vm_area_struct *vma, - void *cpu_addr, dma_addr_t handle, size_t size) -{ - unsigned long pfn; - -#ifdef CONFIG_NOT_COHERENT_CACHE - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - pfn = __dma_get_coherent_pfn((unsigned long)cpu_addr); -#else - pfn = page_to_pfn(virt_to_page(cpu_addr)); -#endif - return remap_pfn_range(vma, vma->vm_start, - pfn + vma->vm_pgoff, - vma->vm_end - vma->vm_start, - vma->vm_page_prot); -} -EXPORT_SYMBOL_GPL(dma_mmap_coherent); diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index ba3aeb4bc06a..ead5016b02d0 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -89,6 +89,10 @@ crit_transfer_to_handler: mfspr r0,SPRN_SRR1 stw r0,_SRR1(r11) + /* set the stack limit to the current stack + * and set the limit to protect the thread_info + * struct + */ mfspr r8,SPRN_SPRG_THREAD lwz r0,KSP_LIMIT(r8) stw r0,SAVED_KSP_LIMIT(r11) @@ -109,6 +113,10 @@ crit_transfer_to_handler: mfspr r0,SPRN_SRR1 stw r0,crit_srr1@l(0) + /* set the stack limit to the current stack + * and set the limit to protect the thread_info + * struct + */ mfspr r8,SPRN_SPRG_THREAD lwz r0,KSP_LIMIT(r8) stw r0,saved_ksp_limit@l(0) @@ -158,7 +166,7 @@ transfer_to_handler: tophys(r11,r11) addi r11,r11,global_dbcr0@l #ifdef CONFIG_SMP - rlwinm r9,r1,0,0,(31-THREAD_SHIFT) + CURRENT_THREAD_INFO(r9, r1) lwz r9,TI_CPU(r9) slwi r9,r9,3 add r11,r11,r9 @@ -179,7 +187,7 @@ transfer_to_handler: ble- stack_ovf /* then the kernel stack overflowed */ 5: #if defined(CONFIG_6xx) || defined(CONFIG_E500) - rlwinm r9,r1,0,0,31-THREAD_SHIFT + CURRENT_THREAD_INFO(r9, r1) tophys(r9,r9) /* check local flags */ lwz r12,TI_LOCAL_FLAGS(r9) mtcrf 0x01,r12 @@ -226,13 +234,7 @@ reenable_mmu: /* re-enable mmu so we can */ stw r3,16(r1) stw r4,20(r1) stw r5,24(r1) - andi. r12,r12,MSR_PR - b 11f - bl trace_hardirqs_off - b 12f -11: bl trace_hardirqs_off -12: lwz r5,24(r1) lwz r4,20(r1) lwz r3,16(r1) @@ -333,7 +335,7 @@ _GLOBAL(DoSyscall) mtmsr r11 1: #endif /* CONFIG_TRACE_IRQFLAGS */ - rlwinm r10,r1,0,0,(31-THREAD_SHIFT) /* current_thread_info() */ + CURRENT_THREAD_INFO(r10, r1) lwz r11,TI_FLAGS(r10) andi. r11,r11,_TIF_SYSCALL_T_OR_A bne- syscall_dotrace @@ -354,7 +356,7 @@ ret_from_syscall: bl do_show_syscall_exit #endif mr r6,r3 - rlwinm r12,r1,0,0,(31-THREAD_SHIFT) /* current_thread_info() */ + CURRENT_THREAD_INFO(r12, r1) /* disable interrupts so current_thread_info()->flags can't change */ LOAD_MSR_KERNEL(r10,MSR_KERNEL) /* doesn't include MSR_EE */ /* Note: We don't bother telling lockdep about it */ @@ -815,7 +817,7 @@ ret_from_except: user_exc_return: /* r10 contains MSR_KERNEL here */ /* Check current_thread_info()->flags */ - rlwinm r9,r1,0,0,(31-THREAD_SHIFT) + CURRENT_THREAD_INFO(r9, r1) lwz r9,TI_FLAGS(r9) andi. r0,r9,_TIF_USER_WORK_MASK bne do_work @@ -835,7 +837,7 @@ restore_user: /* N.B. the only way to get here is from the beq following ret_from_except. */ resume_kernel: /* check current_thread_info->preempt_count */ - rlwinm r9,r1,0,0,(31-THREAD_SHIFT) + CURRENT_THREAD_INFO(r9, r1) lwz r0,TI_PREEMPT(r9) cmpwi 0,r0,0 /* if non-zero, just restore regs and return */ bne restore @@ -852,7 +854,7 @@ resume_kernel: bl trace_hardirqs_off #endif 1: bl preempt_schedule_irq - rlwinm r9,r1,0,0,(31-THREAD_SHIFT) + CURRENT_THREAD_INFO(r9, r1) lwz r3,TI_FLAGS(r9) andi. r0,r3,_TIF_NEED_RESCHED bne- 1b @@ -1122,7 +1124,7 @@ ret_from_debug_exc: lwz r10,SAVED_KSP_LIMIT(r1) stw r10,KSP_LIMIT(r9) lwz r9,THREAD_INFO-THREAD(r9) - rlwinm r10,r1,0,0,(31-THREAD_SHIFT) + CURRENT_THREAD_INFO(r10, r1) lwz r10,TI_PREEMPT(r10) stw r10,TI_PREEMPT(r9) RESTORE_xSRR(SRR0,SRR1); @@ -1156,7 +1158,7 @@ load_dbcr0: lis r11,global_dbcr0@ha addi r11,r11,global_dbcr0@l #ifdef CONFIG_SMP - rlwinm r9,r1,0,0,(31-THREAD_SHIFT) + CURRENT_THREAD_INFO(r9, r1) lwz r9,TI_CPU(r9) slwi r9,r9,3 add r11,r11,r9 @@ -1197,7 +1199,7 @@ recheck: LOAD_MSR_KERNEL(r10,MSR_KERNEL) SYNC MTMSRD(r10) /* disable interrupts */ - rlwinm r9,r1,0,0,(31-THREAD_SHIFT) + CURRENT_THREAD_INFO(r9, r1) lwz r9,TI_FLAGS(r9) andi. r0,r9,_TIF_NEED_RESCHED bne- do_resched diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 5971c85df136..4b01a25e29ef 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -146,7 +146,7 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR) REST_2GPRS(7,r1) addi r9,r1,STACK_FRAME_OVERHEAD #endif - clrrdi r11,r1,THREAD_SHIFT + CURRENT_THREAD_INFO(r11, r1) ld r10,TI_FLAGS(r11) andi. r11,r10,_TIF_SYSCALL_T_OR_A bne- syscall_dotrace @@ -181,7 +181,7 @@ syscall_exit: bl .do_show_syscall_exit ld r3,RESULT(r1) #endif - clrrdi r12,r1,THREAD_SHIFT + CURRENT_THREAD_INFO(r12, r1) ld r8,_MSR(r1) #ifdef CONFIG_PPC_BOOK3S @@ -197,7 +197,16 @@ syscall_exit: wrteei 0 #else ld r10,PACAKMSR(r13) - mtmsrd r10,1 + /* + * For performance reasons we clear RI the same time that we + * clear EE. We only need to clear RI just before we restore r13 + * below, but batching it with EE saves us one expensive mtmsrd call. + * We have to be careful to restore RI if we branch anywhere from + * here (eg syscall_exit_work). + */ + li r9,MSR_RI + andc r11,r10,r9 + mtmsrd r11,1 #endif /* CONFIG_PPC_BOOK3E */ ld r9,TI_FLAGS(r12) @@ -214,17 +223,6 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) andi. r6,r8,MSR_PR ld r4,_LINK(r1) - /* - * Clear RI before restoring r13. If we are returning to - * userspace and we take an exception after restoring r13, - * we end up corrupting the userspace r13 value. - */ -#ifdef CONFIG_PPC_BOOK3S - /* No MSR:RI on BookE */ - li r12,MSR_RI - andc r11,r10,r12 - mtmsrd r11,1 /* clear MSR.RI */ -#endif /* CONFIG_PPC_BOOK3S */ beq- 1f ACCOUNT_CPU_USER_EXIT(r11, r12) @@ -262,7 +260,7 @@ syscall_dotrace: ld r7,GPR7(r1) ld r8,GPR8(r1) addi r9,r1,STACK_FRAME_OVERHEAD - clrrdi r10,r1,THREAD_SHIFT + CURRENT_THREAD_INFO(r10, r1) ld r10,TI_FLAGS(r10) b .Lsyscall_dotrace_cont @@ -271,6 +269,9 @@ syscall_enosys: b syscall_exit syscall_exit_work: +#ifdef CONFIG_PPC_BOOK3S + mtmsrd r10,1 /* Restore RI */ +#endif /* If TIF_RESTOREALL is set, don't scribble on either r3 or ccr. If TIF_NOERROR is set, just save r3 as it is. */ @@ -499,7 +500,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT) 2: #endif /* !CONFIG_PPC_BOOK3S */ - clrrdi r7,r8,THREAD_SHIFT /* base of new stack */ + CURRENT_THREAD_INFO(r7, r8) /* base of new stack */ /* Note: this uses SWITCH_FRAME_SIZE rather than INT_FRAME_SIZE because we don't need to leave the 288-byte ABI gap at the top of the kernel stack. */ @@ -558,7 +559,7 @@ _GLOBAL(ret_from_except_lite) mtmsrd r10,1 /* Update machine state */ #endif /* CONFIG_PPC_BOOK3E */ - clrrdi r9,r1,THREAD_SHIFT /* current_thread_info() */ + CURRENT_THREAD_INFO(r9, r1) ld r3,_MSR(r1) ld r4,TI_FLAGS(r9) andi. r3,r3,MSR_PR @@ -601,7 +602,7 @@ resume_kernel: 1: bl .preempt_schedule_irq /* Re-test flags and eventually loop */ - clrrdi r9,r1,THREAD_SHIFT + CURRENT_THREAD_INFO(r9, r1) ld r4,TI_FLAGS(r9) andi. r0,r4,_TIF_NEED_RESCHED bne 1b diff --git a/arch/powerpc/kernel/epapr_hcalls.S b/arch/powerpc/kernel/epapr_hcalls.S new file mode 100644 index 000000000000..697b390ebfd8 --- /dev/null +++ b/arch/powerpc/kernel/epapr_hcalls.S @@ -0,0 +1,25 @@ +/* + * Copyright (C) 2012 Freescale Semiconductor, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/threads.h> +#include <asm/reg.h> +#include <asm/page.h> +#include <asm/cputable.h> +#include <asm/thread_info.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> + +/* Hypercall entry point. Will be patched with device tree instructions. */ +.global epapr_hypercall_start +epapr_hypercall_start: + li r3, -1 + nop + nop + nop + blr diff --git a/arch/powerpc/kernel/epapr_paravirt.c b/arch/powerpc/kernel/epapr_paravirt.c new file mode 100644 index 000000000000..028aeae370b6 --- /dev/null +++ b/arch/powerpc/kernel/epapr_paravirt.c @@ -0,0 +1,52 @@ +/* + * ePAPR para-virtualization support. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright (C) 2012 Freescale Semiconductor, Inc. + */ + +#include <linux/of.h> +#include <asm/epapr_hcalls.h> +#include <asm/cacheflush.h> +#include <asm/code-patching.h> + +bool epapr_paravirt_enabled; + +static int __init epapr_paravirt_init(void) +{ + struct device_node *hyper_node; + const u32 *insts; + int len, i; + + hyper_node = of_find_node_by_path("/hypervisor"); + if (!hyper_node) + return -ENODEV; + + insts = of_get_property(hyper_node, "hcall-instructions", &len); + if (!insts) + return -ENODEV; + + if (len % 4 || len > (4 * 4)) + return -ENODEV; + + for (i = 0; i < (len / 4); i++) + patch_instruction(epapr_hypercall_start + i, insts[i]); + + epapr_paravirt_enabled = true; + + return 0; +} + +early_initcall(epapr_paravirt_init); diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 7215cc2495df..98be7f0cd227 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -222,7 +222,7 @@ exc_##n##_bad_stack: \ * interrupts happen before the wait instruction. */ #define CHECK_NAPPING() \ - clrrdi r11,r1,THREAD_SHIFT; \ + CURRENT_THREAD_INFO(r11, r1); \ ld r10,TI_LOCAL_FLAGS(r11); \ andi. r9,r10,_TLF_NAPPING; \ beq+ 1f; \ @@ -903,7 +903,7 @@ skpinv: addi r6,r6,1 /* Increment */ bne 1b /* If not, repeat */ /* Invalidate all TLBs */ - PPC_TLBILX_ALL(0,0) + PPC_TLBILX_ALL(0,R0) sync isync @@ -961,7 +961,7 @@ skpinv: addi r6,r6,1 /* Increment */ tlbwe /* Invalidate TLB1 */ - PPC_TLBILX_ALL(0,0) + PPC_TLBILX_ALL(0,R0) sync isync @@ -1020,7 +1020,7 @@ skpinv: addi r6,r6,1 /* Increment */ tlbwe /* Invalidate TLB1 */ - PPC_TLBILX_ALL(0,0) + PPC_TLBILX_ALL(0,R0) sync isync @@ -1138,7 +1138,7 @@ a2_tlbinit_after_iprot_flush: tlbwe #endif /* CONFIG_PPC_EARLY_DEBUG_WSP */ - PPC_TLBILX(0,0,0) + PPC_TLBILX(0,0,R0) sync isync diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 1c06d2971545..e894515e77bb 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -239,6 +239,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) * out of line to handle them */ . = 0xe00 +hv_exception_trampoline: b h_data_storage_hv . = 0xe20 b h_instr_storage_hv @@ -851,7 +852,7 @@ BEGIN_FTR_SECTION bne- do_ste_alloc /* If so handle it */ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_SLB) - clrrdi r11,r1,THREAD_SHIFT + CURRENT_THREAD_INFO(r11, r1) lwz r0,TI_PREEMPT(r11) /* If we're in an "NMI" */ andis. r0,r0,NMI_MASK@h /* (i.e. an irq when soft-disabled) */ bne 77f /* then don't call hash_page now */ diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S index de369558bf0a..e0ada05f2df3 100644 --- a/arch/powerpc/kernel/fpu.S +++ b/arch/powerpc/kernel/fpu.S @@ -26,7 +26,7 @@ #include <asm/ptrace.h> #ifdef CONFIG_VSX -#define REST_32FPVSRS(n,c,base) \ +#define __REST_32FPVSRS(n,c,base) \ BEGIN_FTR_SECTION \ b 2f; \ END_FTR_SECTION_IFSET(CPU_FTR_VSX); \ @@ -35,7 +35,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX); \ 2: REST_32VSRS(n,c,base); \ 3: -#define SAVE_32FPVSRS(n,c,base) \ +#define __SAVE_32FPVSRS(n,c,base) \ BEGIN_FTR_SECTION \ b 2f; \ END_FTR_SECTION_IFSET(CPU_FTR_VSX); \ @@ -44,9 +44,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX); \ 2: SAVE_32VSRS(n,c,base); \ 3: #else -#define REST_32FPVSRS(n,b,base) REST_32FPRS(n, base) -#define SAVE_32FPVSRS(n,b,base) SAVE_32FPRS(n, base) +#define __REST_32FPVSRS(n,b,base) REST_32FPRS(n, base) +#define __SAVE_32FPVSRS(n,b,base) SAVE_32FPRS(n, base) #endif +#define REST_32FPVSRS(n,c,base) __REST_32FPVSRS(n,__REG_##c,__REG_##base) +#define SAVE_32FPVSRS(n,c,base) __SAVE_32FPVSRS(n,__REG_##c,__REG_##base) /* * This task wants to use the FPU now. @@ -79,7 +81,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) beq 1f toreal(r4) addi r4,r4,THREAD /* want last_task_used_math->thread */ - SAVE_32FPVSRS(0, r5, r4) + SAVE_32FPVSRS(0, R5, R4) mffs fr0 stfd fr0,THREAD_FPSCR(r4) PPC_LL r5,PT_REGS(r4) @@ -106,7 +108,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) #endif lfd fr0,THREAD_FPSCR(r5) MTFSF_L(fr0) - REST_32FPVSRS(0, r4, r5) + REST_32FPVSRS(0, R4, R5) #ifndef CONFIG_SMP subi r4,r5,THREAD fromreal(r4) @@ -140,7 +142,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) addi r3,r3,THREAD /* want THREAD of task */ PPC_LL r5,PT_REGS(r3) PPC_LCMPI 0,r5,0 - SAVE_32FPVSRS(0, r4 ,r3) + SAVE_32FPVSRS(0, R4 ,R3) mffs fr0 stfd fr0,THREAD_FPSCR(r3) beq 1f diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c index bf99cfa6bbfe..1fb78561096a 100644 --- a/arch/powerpc/kernel/ftrace.c +++ b/arch/powerpc/kernel/ftrace.c @@ -63,11 +63,9 @@ ftrace_modify_code(unsigned long ip, unsigned int old, unsigned int new) return -EINVAL; /* replace the text with the new text */ - if (probe_kernel_write((void *)ip, &new, MCOUNT_INSN_SIZE)) + if (patch_instruction((unsigned int *)ip, new)) return -EPERM; - flush_icache_range(ip, ip + 8); - return 0; } @@ -212,12 +210,9 @@ __ftrace_make_nop(struct module *mod, */ op = 0x48000008; /* b +8 */ - if (probe_kernel_write((void *)ip, &op, MCOUNT_INSN_SIZE)) + if (patch_instruction((unsigned int *)ip, op)) return -EPERM; - - flush_icache_range(ip, ip + 8); - return 0; } @@ -245,9 +240,9 @@ __ftrace_make_nop(struct module *mod, /* * On PPC32 the trampoline looks like: - * 0x3d, 0x60, 0x00, 0x00 lis r11,sym@ha - * 0x39, 0x6b, 0x00, 0x00 addi r11,r11,sym@l - * 0x7d, 0x69, 0x03, 0xa6 mtctr r11 + * 0x3d, 0x80, 0x00, 0x00 lis r12,sym@ha + * 0x39, 0x8c, 0x00, 0x00 addi r12,r12,sym@l + * 0x7d, 0x89, 0x03, 0xa6 mtctr r12 * 0x4e, 0x80, 0x04, 0x20 bctr */ @@ -262,9 +257,9 @@ __ftrace_make_nop(struct module *mod, pr_devel(" %08x %08x ", jmp[0], jmp[1]); /* verify that this is what we expect it to be */ - if (((jmp[0] & 0xffff0000) != 0x3d600000) || - ((jmp[1] & 0xffff0000) != 0x396b0000) || - (jmp[2] != 0x7d6903a6) || + if (((jmp[0] & 0xffff0000) != 0x3d800000) || + ((jmp[1] & 0xffff0000) != 0x398c0000) || + (jmp[2] != 0x7d8903a6) || (jmp[3] != 0x4e800420)) { printk(KERN_ERR "Not a trampoline\n"); return -EINVAL; @@ -286,11 +281,9 @@ __ftrace_make_nop(struct module *mod, op = PPC_INST_NOP; - if (probe_kernel_write((void *)ip, &op, MCOUNT_INSN_SIZE)) + if (patch_instruction((unsigned int *)ip, op)) return -EPERM; - flush_icache_range(ip, ip + 8); - return 0; } #endif /* PPC64 */ @@ -426,11 +419,9 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) pr_devel("write to %lx\n", rec->ip); - if (probe_kernel_write((void *)ip, &op, MCOUNT_INSN_SIZE)) + if (patch_instruction((unsigned int *)ip, op)) return -EPERM; - flush_icache_range(ip, ip + 8); - return 0; } #endif /* CONFIG_PPC64 */ @@ -484,6 +475,58 @@ int ftrace_update_ftrace_func(ftrace_func_t func) return ret; } +static int __ftrace_replace_code(struct dyn_ftrace *rec, int enable) +{ + unsigned long ftrace_addr = (unsigned long)FTRACE_ADDR; + int ret; + + ret = ftrace_update_record(rec, enable); + + switch (ret) { + case FTRACE_UPDATE_IGNORE: + return 0; + case FTRACE_UPDATE_MAKE_CALL: + return ftrace_make_call(rec, ftrace_addr); + case FTRACE_UPDATE_MAKE_NOP: + return ftrace_make_nop(NULL, rec, ftrace_addr); + } + + return 0; +} + +void ftrace_replace_code(int enable) +{ + struct ftrace_rec_iter *iter; + struct dyn_ftrace *rec; + int ret; + + for (iter = ftrace_rec_iter_start(); iter; + iter = ftrace_rec_iter_next(iter)) { + rec = ftrace_rec_iter_record(iter); + ret = __ftrace_replace_code(rec, enable); + if (ret) { + ftrace_bug(ret, rec->ip); + return; + } + } +} + +void arch_ftrace_update_code(int command) +{ + if (command & FTRACE_UPDATE_CALLS) + ftrace_replace_code(1); + else if (command & FTRACE_DISABLE_CALLS) + ftrace_replace_code(0); + + if (command & FTRACE_UPDATE_TRACE_FUNC) + ftrace_update_ftrace_func(ftrace_trace_function); + + if (command & FTRACE_START_FUNC_RET) + ftrace_enable_ftrace_graph_caller(); + else if (command & FTRACE_STOP_FUNC_RET) + ftrace_disable_ftrace_graph_caller(); +} + int __init ftrace_dyn_arch_init(void *data) { /* caller expects data to be zero */ @@ -587,18 +630,17 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) return; } - if (ftrace_push_return_trace(old, self_addr, &trace.depth, 0) == -EBUSY) { - *parent = old; - return; - } - trace.func = self_addr; + trace.depth = current->curr_ret_stack + 1; /* Only trace if the calling function expects to */ if (!ftrace_graph_entry(&trace)) { - current->curr_ret_stack--; *parent = old; + return; } + + if (ftrace_push_return_trace(old, self_addr, &trace.depth, 0) == -EBUSY) + *parent = old; } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 1f4434a38608..0f59863c3ade 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -192,7 +192,7 @@ _ENTRY(__early_start) li r0,0 stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1) - rlwinm r22,r1,0,0,31-THREAD_SHIFT /* current thread_info */ + CURRENT_THREAD_INFO(r22, r1) stw r24, TI_CPU(r22) bl early_init @@ -556,8 +556,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV) /* SPE Unavailable */ START_EXCEPTION(SPEUnavailable) NORMAL_EXCEPTION_PROLOG(SPE_UNAVAIL) - bne load_up_spe - addi r3,r1,STACK_FRAME_OVERHEAD + beq 1f + bl load_up_spe + b fast_exception_return +1: addi r3,r1,STACK_FRAME_OVERHEAD EXC_XFER_EE_LITE(0x2010, KernelSPE) #else EXCEPTION(0x2020, SPE_UNAVAIL, SPEUnavailable, \ @@ -778,7 +780,7 @@ tlb_write_entry: /* Note that the SPE support is closely modeled after the AltiVec * support. Changes to one are likely to be applicable to the * other! */ -load_up_spe: +_GLOBAL(load_up_spe) /* * Disable SPE for the task which had SPE previously, * and save its SPE registers in its thread_struct. @@ -826,20 +828,7 @@ load_up_spe: subi r4,r5,THREAD stw r4,last_task_used_spe@l(r3) #endif /* !CONFIG_SMP */ - /* restore registers and return */ -2: REST_4GPRS(3, r11) - lwz r10,_CCR(r11) - REST_GPR(1, r11) - mtcr r10 - lwz r10,_LINK(r11) - mtlr r10 - REST_GPR(10, r11) - mtspr SPRN_SRR1,r9 - mtspr SPRN_SRR0,r12 - REST_GPR(9, r11) - REST_GPR(12, r11) - lwz r11,GPR11(r11) - rfi + blr /* * SPE unavailable trap from kernel - print a message, but let diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index 2bc0584be81c..f3a82dde61db 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -111,7 +111,7 @@ void arch_unregister_hw_breakpoint(struct perf_event *bp) * and the single_step_dabr_instruction(), then cleanup the breakpoint * restoration variables to prevent dangling pointers. */ - if (bp->ctx->task) + if (bp->ctx && bp->ctx->task) bp->ctx->task->thread.last_hit_ubp = NULL; } diff --git a/arch/powerpc/kernel/idle_6xx.S b/arch/powerpc/kernel/idle_6xx.S index 15c611de1ee2..1686916cc7f0 100644 --- a/arch/powerpc/kernel/idle_6xx.S +++ b/arch/powerpc/kernel/idle_6xx.S @@ -135,7 +135,7 @@ BEGIN_FTR_SECTION DSSALL sync END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) - rlwinm r9,r1,0,0,31-THREAD_SHIFT /* current thread_info */ + CURRENT_THREAD_INFO(r9, r1) lwz r8,TI_LOCAL_FLAGS(r9) /* set napping bit */ ori r8,r8,_TLF_NAPPING /* so when we take an exception */ stw r8,TI_LOCAL_FLAGS(r9) /* it will return to our caller */ @@ -158,7 +158,7 @@ _GLOBAL(power_save_ppc32_restore) stw r9,_NIP(r11) /* make it do a blr */ #ifdef CONFIG_SMP - rlwinm r12,r11,0,0,31-THREAD_SHIFT + CURRENT_THREAD_INFO(r12, r11) lwz r11,TI_CPU(r12) /* get cpu number * 4 */ slwi r11,r11,2 #else diff --git a/arch/powerpc/kernel/idle_book3e.S b/arch/powerpc/kernel/idle_book3e.S index ff007b59448d..4c7cb4008585 100644 --- a/arch/powerpc/kernel/idle_book3e.S +++ b/arch/powerpc/kernel/idle_book3e.S @@ -60,7 +60,7 @@ _GLOBAL(book3e_idle) 1: /* Let's set the _TLF_NAPPING flag so interrupts make us return * to the right spot */ - clrrdi r11,r1,THREAD_SHIFT + CURRENT_THREAD_INFO(r11, r1) ld r10,TI_LOCAL_FLAGS(r11) ori r10,r10,_TLF_NAPPING std r10,TI_LOCAL_FLAGS(r11) diff --git a/arch/powerpc/kernel/idle_e500.S b/arch/powerpc/kernel/idle_e500.S index 4f0ab85f3788..15448668988d 100644 --- a/arch/powerpc/kernel/idle_e500.S +++ b/arch/powerpc/kernel/idle_e500.S @@ -21,7 +21,7 @@ .text _GLOBAL(e500_idle) - rlwinm r3,r1,0,0,31-THREAD_SHIFT /* current thread_info */ + CURRENT_THREAD_INFO(r3, r1) lwz r4,TI_LOCAL_FLAGS(r3) /* set napping bit */ ori r4,r4,_TLF_NAPPING /* so when we take an exception */ stw r4,TI_LOCAL_FLAGS(r3) /* it will return to our caller */ @@ -96,7 +96,7 @@ _GLOBAL(power_save_ppc32_restore) stw r9,_NIP(r11) /* make it do a blr */ #ifdef CONFIG_SMP - rlwinm r12,r1,0,0,31-THREAD_SHIFT + CURRENT_THREAD_INFO(r12, r1) lwz r11,TI_CPU(r12) /* get cpu number * 4 */ slwi r11,r11,2 #else diff --git a/arch/powerpc/kernel/idle_power4.S b/arch/powerpc/kernel/idle_power4.S index 2c71b0fc9f91..e3edaa189911 100644 --- a/arch/powerpc/kernel/idle_power4.S +++ b/arch/powerpc/kernel/idle_power4.S @@ -59,7 +59,7 @@ BEGIN_FTR_SECTION DSSALL sync END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) - clrrdi r9,r1,THREAD_SHIFT /* current thread_info */ + CURRENT_THREAD_INFO(r9, r1) ld r8,TI_LOCAL_FLAGS(r9) /* set napping bit */ ori r8,r8,_TLF_NAPPING /* so when we take an exception */ std r8,TI_LOCAL_FLAGS(r9) /* it will return to our caller */ diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 359f078571c7..ff5a6ce027b8 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -33,6 +33,9 @@ #include <linux/bitmap.h> #include <linux/iommu-helper.h> #include <linux/crash_dump.h> +#include <linux/hash.h> +#include <linux/fault-inject.h> +#include <linux/pci.h> #include <asm/io.h> #include <asm/prom.h> #include <asm/iommu.h> @@ -40,6 +43,7 @@ #include <asm/machdep.h> #include <asm/kdump.h> #include <asm/fadump.h> +#include <asm/vio.h> #define DBG(...) @@ -58,6 +62,114 @@ static int __init setup_iommu(char *str) __setup("iommu=", setup_iommu); +static DEFINE_PER_CPU(unsigned int, iommu_pool_hash); + +/* + * We precalculate the hash to avoid doing it on every allocation. + * + * The hash is important to spread CPUs across all the pools. For example, + * on a POWER7 with 4 way SMT we want interrupts on the primary threads and + * with 4 pools all primary threads would map to the same pool. + */ +static int __init setup_iommu_pool_hash(void) +{ + unsigned int i; + + for_each_possible_cpu(i) + per_cpu(iommu_pool_hash, i) = hash_32(i, IOMMU_POOL_HASHBITS); + + return 0; +} +subsys_initcall(setup_iommu_pool_hash); + +#ifdef CONFIG_FAIL_IOMMU + +static DECLARE_FAULT_ATTR(fail_iommu); + +static int __init setup_fail_iommu(char *str) +{ + return setup_fault_attr(&fail_iommu, str); +} +__setup("fail_iommu=", setup_fail_iommu); + +static bool should_fail_iommu(struct device *dev) +{ + return dev->archdata.fail_iommu && should_fail(&fail_iommu, 1); +} + +static int __init fail_iommu_debugfs(void) +{ + struct dentry *dir = fault_create_debugfs_attr("fail_iommu", + NULL, &fail_iommu); + + return IS_ERR(dir) ? PTR_ERR(dir) : 0; +} +late_initcall(fail_iommu_debugfs); + +static ssize_t fail_iommu_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", dev->archdata.fail_iommu); +} + +static ssize_t fail_iommu_store(struct device *dev, + struct device_attribute *attr, const char *buf, + size_t count) +{ + int i; + + if (count > 0 && sscanf(buf, "%d", &i) > 0) + dev->archdata.fail_iommu = (i == 0) ? 0 : 1; + + return count; +} + +static DEVICE_ATTR(fail_iommu, S_IRUGO|S_IWUSR, fail_iommu_show, + fail_iommu_store); + +static int fail_iommu_bus_notify(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct device *dev = data; + + if (action == BUS_NOTIFY_ADD_DEVICE) { + if (device_create_file(dev, &dev_attr_fail_iommu)) + pr_warn("Unable to create IOMMU fault injection sysfs " + "entries\n"); + } else if (action == BUS_NOTIFY_DEL_DEVICE) { + device_remove_file(dev, &dev_attr_fail_iommu); + } + + return 0; +} + +static struct notifier_block fail_iommu_bus_notifier = { + .notifier_call = fail_iommu_bus_notify +}; + +static int __init fail_iommu_setup(void) +{ +#ifdef CONFIG_PCI + bus_register_notifier(&pci_bus_type, &fail_iommu_bus_notifier); +#endif +#ifdef CONFIG_IBMVIO + bus_register_notifier(&vio_bus_type, &fail_iommu_bus_notifier); +#endif + + return 0; +} +/* + * Must execute after PCI and VIO subsystem have initialised but before + * devices are probed. + */ +arch_initcall(fail_iommu_setup); +#else +static inline bool should_fail_iommu(struct device *dev) +{ + return false; +} +#endif + static unsigned long iommu_range_alloc(struct device *dev, struct iommu_table *tbl, unsigned long npages, @@ -71,6 +183,9 @@ static unsigned long iommu_range_alloc(struct device *dev, int pass = 0; unsigned long align_mask; unsigned long boundary_size; + unsigned long flags; + unsigned int pool_nr; + struct iommu_pool *pool; align_mask = 0xffffffffffffffffl >> (64 - align_order); @@ -83,36 +198,49 @@ static unsigned long iommu_range_alloc(struct device *dev, return DMA_ERROR_CODE; } - if (handle && *handle) - start = *handle; + if (should_fail_iommu(dev)) + return DMA_ERROR_CODE; + + /* + * We don't need to disable preemption here because any CPU can + * safely use any IOMMU pool. + */ + pool_nr = __raw_get_cpu_var(iommu_pool_hash) & (tbl->nr_pools - 1); + + if (largealloc) + pool = &(tbl->large_pool); else - start = largealloc ? tbl->it_largehint : tbl->it_hint; + pool = &(tbl->pools[pool_nr]); - /* Use only half of the table for small allocs (15 pages or less) */ - limit = largealloc ? tbl->it_size : tbl->it_halfpoint; + spin_lock_irqsave(&(pool->lock), flags); - if (largealloc && start < tbl->it_halfpoint) - start = tbl->it_halfpoint; +again: + if ((pass == 0) && handle && *handle) + start = *handle; + else + start = pool->hint; + + limit = pool->end; /* The case below can happen if we have a small segment appended * to a large, or when the previous alloc was at the very end of * the available space. If so, go back to the initial start. */ if (start >= limit) - start = largealloc ? tbl->it_largehint : tbl->it_hint; - - again: + start = pool->start; if (limit + tbl->it_offset > mask) { limit = mask - tbl->it_offset + 1; /* If we're constrained on address range, first try * at the masked hint to avoid O(n) search complexity, - * but on second pass, start at 0. + * but on second pass, start at 0 in pool 0. */ - if ((start & mask) >= limit || pass > 0) - start = 0; - else + if ((start & mask) >= limit || pass > 0) { + pool = &(tbl->pools[0]); + start = pool->start; + } else { start &= mask; + } } if (dev) @@ -126,16 +254,25 @@ static unsigned long iommu_range_alloc(struct device *dev, tbl->it_offset, boundary_size >> IOMMU_PAGE_SHIFT, align_mask); if (n == -1) { - if (likely(pass < 2)) { - /* First failure, just rescan the half of the table. - * Second failure, rescan the other half of the table. - */ - start = (largealloc ^ pass) ? tbl->it_halfpoint : 0; - limit = pass ? tbl->it_size : limit; + if (likely(pass == 0)) { + /* First try the pool from the start */ + pool->hint = pool->start; pass++; goto again; + + } else if (pass <= tbl->nr_pools) { + /* Now try scanning all the other pools */ + spin_unlock(&(pool->lock)); + pool_nr = (pool_nr + 1) & (tbl->nr_pools - 1); + pool = &tbl->pools[pool_nr]; + spin_lock(&(pool->lock)); + pool->hint = pool->start; + pass++; + goto again; + } else { - /* Third failure, give up */ + /* Give up */ + spin_unlock_irqrestore(&(pool->lock), flags); return DMA_ERROR_CODE; } } @@ -145,10 +282,10 @@ static unsigned long iommu_range_alloc(struct device *dev, /* Bump the hint to a new block for small allocs. */ if (largealloc) { /* Don't bump to new block to avoid fragmentation */ - tbl->it_largehint = end; + pool->hint = end; } else { /* Overflow will be taken care of at the next allocation */ - tbl->it_hint = (end + tbl->it_blocksize - 1) & + pool->hint = (end + tbl->it_blocksize - 1) & ~(tbl->it_blocksize - 1); } @@ -156,6 +293,8 @@ static unsigned long iommu_range_alloc(struct device *dev, if (handle) *handle = end; + spin_unlock_irqrestore(&(pool->lock), flags); + return n; } @@ -165,18 +304,14 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl, unsigned long mask, unsigned int align_order, struct dma_attrs *attrs) { - unsigned long entry, flags; + unsigned long entry; dma_addr_t ret = DMA_ERROR_CODE; int build_fail; - spin_lock_irqsave(&(tbl->it_lock), flags); - entry = iommu_range_alloc(dev, tbl, npages, NULL, mask, align_order); - if (unlikely(entry == DMA_ERROR_CODE)) { - spin_unlock_irqrestore(&(tbl->it_lock), flags); + if (unlikely(entry == DMA_ERROR_CODE)) return DMA_ERROR_CODE; - } entry += tbl->it_offset; /* Offset into real TCE table */ ret = entry << IOMMU_PAGE_SHIFT; /* Set the return dma address */ @@ -193,8 +328,6 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl, */ if (unlikely(build_fail)) { __iommu_free(tbl, ret, npages); - - spin_unlock_irqrestore(&(tbl->it_lock), flags); return DMA_ERROR_CODE; } @@ -202,16 +335,14 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl, if (ppc_md.tce_flush) ppc_md.tce_flush(tbl); - spin_unlock_irqrestore(&(tbl->it_lock), flags); - /* Make sure updates are seen by hardware */ mb(); return ret; } -static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, - unsigned int npages) +static bool iommu_free_check(struct iommu_table *tbl, dma_addr_t dma_addr, + unsigned int npages) { unsigned long entry, free_entry; @@ -231,20 +362,57 @@ static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, printk(KERN_INFO "\tindex = 0x%llx\n", (u64)tbl->it_index); WARN_ON(1); } - return; + + return false; + } + + return true; +} + +static struct iommu_pool *get_pool(struct iommu_table *tbl, + unsigned long entry) +{ + struct iommu_pool *p; + unsigned long largepool_start = tbl->large_pool.start; + + /* The large pool is the last pool at the top of the table */ + if (entry >= largepool_start) { + p = &tbl->large_pool; + } else { + unsigned int pool_nr = entry / tbl->poolsize; + + BUG_ON(pool_nr > tbl->nr_pools); + p = &tbl->pools[pool_nr]; } + return p; +} + +static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, + unsigned int npages) +{ + unsigned long entry, free_entry; + unsigned long flags; + struct iommu_pool *pool; + + entry = dma_addr >> IOMMU_PAGE_SHIFT; + free_entry = entry - tbl->it_offset; + + pool = get_pool(tbl, free_entry); + + if (!iommu_free_check(tbl, dma_addr, npages)) + return; + ppc_md.tce_free(tbl, entry, npages); + + spin_lock_irqsave(&(pool->lock), flags); bitmap_clear(tbl->it_map, free_entry, npages); + spin_unlock_irqrestore(&(pool->lock), flags); } static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, unsigned int npages) { - unsigned long flags; - - spin_lock_irqsave(&(tbl->it_lock), flags); - __iommu_free(tbl, dma_addr, npages); /* Make sure TLB cache is flushed if the HW needs it. We do @@ -253,8 +421,6 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, */ if (ppc_md.tce_flush) ppc_md.tce_flush(tbl); - - spin_unlock_irqrestore(&(tbl->it_lock), flags); } int iommu_map_sg(struct device *dev, struct iommu_table *tbl, @@ -263,7 +429,6 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl, struct dma_attrs *attrs) { dma_addr_t dma_next = 0, dma_addr; - unsigned long flags; struct scatterlist *s, *outs, *segstart; int outcount, incount, i, build_fail = 0; unsigned int align; @@ -285,8 +450,6 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl, DBG("sg mapping %d elements:\n", nelems); - spin_lock_irqsave(&(tbl->it_lock), flags); - max_seg_size = dma_get_max_seg_size(dev); for_each_sg(sglist, s, nelems, i) { unsigned long vaddr, npages, entry, slen; @@ -369,8 +532,6 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl, if (ppc_md.tce_flush) ppc_md.tce_flush(tbl); - spin_unlock_irqrestore(&(tbl->it_lock), flags); - DBG("mapped %d elements:\n", outcount); /* For the sake of iommu_unmap_sg, we clear out the length in the @@ -402,7 +563,6 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl, if (s == outs) break; } - spin_unlock_irqrestore(&(tbl->it_lock), flags); return 0; } @@ -412,15 +572,12 @@ void iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist, struct dma_attrs *attrs) { struct scatterlist *sg; - unsigned long flags; BUG_ON(direction == DMA_NONE); if (!tbl) return; - spin_lock_irqsave(&(tbl->it_lock), flags); - sg = sglist; while (nelems--) { unsigned int npages; @@ -440,8 +597,6 @@ void iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist, */ if (ppc_md.tce_flush) ppc_md.tce_flush(tbl); - - spin_unlock_irqrestore(&(tbl->it_lock), flags); } static void iommu_table_clear(struct iommu_table *tbl) @@ -494,9 +649,8 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid) unsigned long sz; static int welcomed = 0; struct page *page; - - /* Set aside 1/4 of the table for large allocations. */ - tbl->it_halfpoint = tbl->it_size * 3 / 4; + unsigned int i; + struct iommu_pool *p; /* number of bytes needed for the bitmap */ sz = (tbl->it_size + 7) >> 3; @@ -515,9 +669,28 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid) if (tbl->it_offset == 0) set_bit(0, tbl->it_map); - tbl->it_hint = 0; - tbl->it_largehint = tbl->it_halfpoint; - spin_lock_init(&tbl->it_lock); + /* We only split the IOMMU table if we have 1GB or more of space */ + if ((tbl->it_size << IOMMU_PAGE_SHIFT) >= (1UL * 1024 * 1024 * 1024)) + tbl->nr_pools = IOMMU_NR_POOLS; + else + tbl->nr_pools = 1; + + /* We reserve the top 1/4 of the table for large allocations */ + tbl->poolsize = (tbl->it_size * 3 / 4) / tbl->nr_pools; + + for (i = 0; i < tbl->nr_pools; i++) { + p = &tbl->pools[i]; + spin_lock_init(&(p->lock)); + p->start = tbl->poolsize * i; + p->hint = p->start; + p->end = p->start + tbl->poolsize; + } + + p = &tbl->large_pool; + spin_lock_init(&(p->lock)); + p->start = tbl->poolsize * i; + p->hint = p->start; + p->end = tbl->it_size; iommu_table_clear(tbl); diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 1b415027ec0e..1f017bb7a7ce 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -229,7 +229,7 @@ notrace void arch_local_irq_restore(unsigned long en) */ if (unlikely(irq_happened != PACA_IRQ_HARD_DIS)) __hard_irq_disable(); -#ifdef CONFIG_TRACE_IRQFLAG +#ifdef CONFIG_TRACE_IRQFLAGS else { /* * We should already be hard disabled here. We had bugs @@ -286,6 +286,52 @@ void notrace restore_interrupts(void) __hard_irq_enable(); } +/* + * This is a helper to use when about to go into idle low-power + * when the latter has the side effect of re-enabling interrupts + * (such as calling H_CEDE under pHyp). + * + * You call this function with interrupts soft-disabled (this is + * already the case when ppc_md.power_save is called). The function + * will return whether to enter power save or just return. + * + * In the former case, it will have notified lockdep of interrupts + * being re-enabled and generally sanitized the lazy irq state, + * and in the latter case it will leave with interrupts hard + * disabled and marked as such, so the local_irq_enable() call + * in cpu_idle() will properly re-enable everything. + */ +bool prep_irq_for_idle(void) +{ + /* + * First we need to hard disable to ensure no interrupt + * occurs before we effectively enter the low power state + */ + hard_irq_disable(); + + /* + * If anything happened while we were soft-disabled, + * we return now and do not enter the low power state. + */ + if (lazy_irq_pending()) + return false; + + /* Tell lockdep we are about to re-enable */ + trace_hardirqs_on(); + + /* + * Mark interrupts as soft-enabled and clear the + * PACA_IRQ_HARD_DIS from the pending mask since we + * are about to hard enable as well as a side effect + * of entering the low power state. + */ + local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS; + local_paca->soft_enabled = 1; + + /* Tell the caller to enter the low power state */ + return true; +} + #endif /* CONFIG_PPC64 */ int arch_show_interrupts(struct seq_file *p, int prec) diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c index 62bdf2389669..867db1de8949 100644 --- a/arch/powerpc/kernel/kvm.c +++ b/arch/powerpc/kernel/kvm.c @@ -31,6 +31,7 @@ #include <asm/cacheflush.h> #include <asm/disassemble.h> #include <asm/ppc-opcode.h> +#include <asm/epapr_hcalls.h> #define KVM_MAGIC_PAGE (-4096L) #define magic_var(x) KVM_MAGIC_PAGE + offsetof(struct kvm_vcpu_arch_shared, x) @@ -302,7 +303,7 @@ static void kvm_patch_ins_wrtee(u32 *inst, u32 rt, int imm_one) if (imm_one) { p[kvm_emulate_wrtee_reg_offs] = - KVM_INST_LI | __PPC_RT(30) | MSR_EE; + KVM_INST_LI | __PPC_RT(R30) | MSR_EE; } else { /* Make clobbered registers work too */ switch (get_rt(rt)) { @@ -726,7 +727,7 @@ unsigned long kvm_hypercall(unsigned long *in, unsigned long register r11 asm("r11") = nr; unsigned long register r12 asm("r12"); - asm volatile("bl kvm_hypercall_start" + asm volatile("bl epapr_hypercall_start" : "=r"(r0), "=r"(r3), "=r"(r4), "=r"(r5), "=r"(r6), "=r"(r7), "=r"(r8), "=r"(r9), "=r"(r10), "=r"(r11), "=r"(r12) @@ -747,29 +748,6 @@ unsigned long kvm_hypercall(unsigned long *in, } EXPORT_SYMBOL_GPL(kvm_hypercall); -static int kvm_para_setup(void) -{ - extern u32 kvm_hypercall_start; - struct device_node *hyper_node; - u32 *insts; - int len, i; - - hyper_node = of_find_node_by_path("/hypervisor"); - if (!hyper_node) - return -1; - - insts = (u32*)of_get_property(hyper_node, "hcall-instructions", &len); - if (len % 4) - return -1; - if (len > (4 * 4)) - return -1; - - for (i = 0; i < (len / 4); i++) - kvm_patch_ins(&(&kvm_hypercall_start)[i], insts[i]); - - return 0; -} - static __init void kvm_free_tmp(void) { unsigned long start, end; @@ -791,7 +769,7 @@ static int __init kvm_guest_init(void) if (!kvm_para_available()) goto free_tmp; - if (kvm_para_setup()) + if (!epapr_paravirt_enabled) goto free_tmp; if (kvm_para_has_feature(KVM_FEATURE_MAGIC_PAGE)) diff --git a/arch/powerpc/kernel/kvm_emul.S b/arch/powerpc/kernel/kvm_emul.S index e291cf3cf954..e100ff324a85 100644 --- a/arch/powerpc/kernel/kvm_emul.S +++ b/arch/powerpc/kernel/kvm_emul.S @@ -24,16 +24,6 @@ #include <asm/page.h> #include <asm/asm-offsets.h> -/* Hypercall entry point. Will be patched with device tree instructions. */ - -.global kvm_hypercall_start -kvm_hypercall_start: - li r3, -1 - nop - nop - nop - blr - #define KVM_MAGIC_PAGE (-4096) #ifdef CONFIG_64BIT @@ -132,7 +122,7 @@ kvm_emulate_mtmsrd_len: .long (kvm_emulate_mtmsrd_end - kvm_emulate_mtmsrd) / 4 -#define MSR_SAFE_BITS (MSR_EE | MSR_CE | MSR_ME | MSR_RI) +#define MSR_SAFE_BITS (MSR_EE | MSR_RI) #define MSR_CRITICAL_BITS ~MSR_SAFE_BITS .global kvm_emulate_mtmsr diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index 386d57f66f28..407e293aad2f 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -179,7 +179,7 @@ _GLOBAL(low_choose_750fx_pll) mtspr SPRN_HID1,r4 /* Store new HID1 image */ - rlwinm r6,r1,0,0,(31-THREAD_SHIFT) + CURRENT_THREAD_INFO(r6, r1) lwz r6,TI_CPU(r6) slwi r6,r6,2 addis r6,r6,nap_save_hid1@ha @@ -699,7 +699,7 @@ _GLOBAL(kernel_thread) #ifdef CONFIG_SMP _GLOBAL(start_secondary_resume) /* Reset stack */ - rlwinm r1,r1,0,0,(31-THREAD_SHIFT) /* current_thread_info() */ + CURRENT_THREAD_INFO(r1, r1) addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD li r3,0 stw r3,0(r1) /* Zero the stack frame pointer */ diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index 616921ef1439..565b78625a32 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -301,11 +301,6 @@ _GLOBAL(real_writeb) #ifdef CONFIG_PPC_PASEMI -/* No support in all binutils for these yet, so use defines */ -#define LBZCIX(RT,RA,RB) .long (0x7c0006aa|(RT<<21)|(RA<<16)|(RB << 11)) -#define STBCIX(RS,RA,RB) .long (0x7c0007aa|(RS<<21)|(RA<<16)|(RB << 11)) - - _GLOBAL(real_205_readb) mfmsr r7 ori r0,r7,MSR_DR @@ -314,7 +309,7 @@ _GLOBAL(real_205_readb) mtmsrd r0 sync isync - LBZCIX(r3,0,r3) + LBZCIX(R3,R0,R3) isync mtmsrd r7 sync @@ -329,7 +324,7 @@ _GLOBAL(real_205_writeb) mtmsrd r0 sync isync - STBCIX(r3,0,r4) + STBCIX(R3,R0,R4) isync mtmsrd r7 sync diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 8e78e93c8185..2aa04f29e1de 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -200,11 +200,6 @@ int pcibios_add_platform_entries(struct pci_dev *pdev) return device_create_file(&pdev->dev, &dev_attr_devspec); } -char __devinit *pcibios_setup(char *str) -{ - return str; -} - /* * Reads the interrupt pin to determine if interrupt is use by card. * If the interrupt is used, then gets the interrupt line from the @@ -248,8 +243,7 @@ static int pci_read_irq_line(struct pci_dev *pci_dev) } else { pr_debug(" Got one, spec %d cells (0x%08x 0x%08x...) on %s\n", oirq.size, oirq.specifier[0], oirq.specifier[1], - oirq.controller ? oirq.controller->full_name : - "<default>"); + of_node_full_name(oirq.controller)); virq = irq_create_of_mapping(oirq.controller, oirq.specifier, oirq.size); @@ -1628,8 +1622,7 @@ void __devinit pcibios_scan_phb(struct pci_controller *hose) struct device_node *node = hose->dn; int mode; - pr_debug("PCI: Scanning PHB %s\n", - node ? node->full_name : "<NO NAME>"); + pr_debug("PCI: Scanning PHB %s\n", of_node_full_name(node)); /* Get some IO space for the new PHB */ pcibios_setup_phb_io_space(hose); @@ -1637,6 +1630,11 @@ void __devinit pcibios_scan_phb(struct pci_controller *hose) /* Wire up PHB bus resources */ pcibios_setup_phb_resources(hose, &resources); + hose->busn.start = hose->first_busno; + hose->busn.end = hose->last_busno; + hose->busn.flags = IORESOURCE_BUS; + pci_add_resource(&resources, &hose->busn); + /* Create an empty bus for the toplevel */ bus = pci_create_root_bus(hose->parent, hose->first_busno, hose->ops, hose, &resources); @@ -1646,7 +1644,6 @@ void __devinit pcibios_scan_phb(struct pci_controller *hose) pci_free_resource_list(&resources); return; } - bus->secondary = hose->first_busno; hose->bus = bus; /* Get probe mode and perform scan */ @@ -1654,13 +1651,14 @@ void __devinit pcibios_scan_phb(struct pci_controller *hose) if (node && ppc_md.pci_probe_mode) mode = ppc_md.pci_probe_mode(bus); pr_debug(" probe mode: %d\n", mode); - if (mode == PCI_PROBE_DEVTREE) { - bus->subordinate = hose->last_busno; + if (mode == PCI_PROBE_DEVTREE) of_scan_bus(node, bus); - } - if (mode == PCI_PROBE_NORMAL) - hose->last_busno = bus->subordinate = pci_scan_child_bus(bus); + if (mode == PCI_PROBE_NORMAL) { + pci_bus_update_busn_res_end(bus, 255); + hose->last_busno = pci_scan_child_bus(bus); + pci_bus_update_busn_res_end(bus, hose->last_busno); + } /* Platform gets a chance to do some global fixups before * we proceed to resource allocation diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c index 94a54f61d341..4ff190ff24a0 100644 --- a/arch/powerpc/kernel/pci_64.c +++ b/arch/powerpc/kernel/pci_64.c @@ -236,7 +236,7 @@ long sys_pciconfig_iobase(long which, unsigned long in_bus, for (ln = pci_root_buses.next; ln != &pci_root_buses; ln = ln->next) { bus = pci_bus_b(ln); - if (in_bus >= bus->number && in_bus <= bus->subordinate) + if (in_bus >= bus->number && in_bus <= bus->busn_res.end) break; bus = NULL; } diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c index 89dde171a6fa..30378a19f65d 100644 --- a/arch/powerpc/kernel/pci_of_scan.c +++ b/arch/powerpc/kernel/pci_of_scan.c @@ -198,7 +198,6 @@ EXPORT_SYMBOL(of_create_pci_dev); /** * of_scan_pci_bridge - Set up a PCI bridge and scan for child nodes - * @node: device tree node of bridge * @dev: pci_dev structure for the bridge * * of_scan_bus() calls this routine for each PCI bridge that it finds, and @@ -240,7 +239,7 @@ void __devinit of_scan_pci_bridge(struct pci_dev *dev) } bus->primary = dev->bus->number; - bus->subordinate = busrange[1]; + pci_bus_insert_busn_res(bus, busrange[0], busrange[1]); bus->bridge_ctl = 0; /* parse ranges property */ diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c index 4174b4b23246..2c0ee6405633 100644 --- a/arch/powerpc/kernel/rtas_flash.c +++ b/arch/powerpc/kernel/rtas_flash.c @@ -709,7 +709,7 @@ static int __init rtas_flash_init(void) if (rtas_token("ibm,update-flash-64-and-reboot") == RTAS_UNKNOWN_SERVICE) { - printk(KERN_ERR "rtas_flash: no firmware flash support\n"); + pr_info("rtas_flash: no firmware flash support\n"); return 1; } diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index afd4f051f3f2..bdc499c17872 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -720,6 +720,33 @@ static int powerpc_debugfs_init(void) arch_initcall(powerpc_debugfs_init); #endif +#ifdef CONFIG_BOOKE_WDT +extern u32 booke_wdt_enabled; +extern u32 booke_wdt_period; + +/* Checks wdt=x and wdt_period=xx command-line option */ +notrace int __init early_parse_wdt(char *p) +{ + if (p && strncmp(p, "0", 1) != 0) + booke_wdt_enabled = 1; + + return 0; +} +early_param("wdt", early_parse_wdt); + +int __init early_parse_wdt_period(char *p) +{ + unsigned long ret; + if (p) { + if (!kstrtol(p, 0, &ret)) + booke_wdt_period = ret; + } + + return 0; +} +early_param("wdt_period", early_parse_wdt_period); +#endif /* CONFIG_BOOKE_WDT */ + void ppc_printk_progress(char *s, unsigned short hex) { pr_info("%s\n", s); diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index ec8a53fa9e8f..a8f54ecb091f 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -149,30 +149,6 @@ notrace void __init machine_init(u64 dt_ptr) ppc_md.progress("id mach(): done", 0x200); } -#ifdef CONFIG_BOOKE_WDT -extern u32 booke_wdt_enabled; -extern u32 booke_wdt_period; - -/* Checks wdt=x and wdt_period=xx command-line option */ -notrace int __init early_parse_wdt(char *p) -{ - if (p && strncmp(p, "0", 1) != 0) - booke_wdt_enabled = 1; - - return 0; -} -early_param("wdt", early_parse_wdt); - -int __init early_parse_wdt_period (char *p) -{ - if (p) - booke_wdt_period = simple_strtoul(p, NULL, 0); - - return 0; -} -early_param("wdt_period", early_parse_wdt_period); -#endif /* CONFIG_BOOKE_WDT */ - /* Checks "l2cr=xxxx" command-line option */ int __init ppc_setup_l2cr(char *str) { diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index e4cb34322de4..0321007086f7 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -48,6 +48,7 @@ #ifdef CONFIG_PPC64 #include <asm/paca.h> #endif +#include <asm/vdso.h> #include <asm/debug.h> #ifdef DEBUG @@ -570,8 +571,9 @@ void __devinit start_secondary(void *unused) #ifdef CONFIG_PPC64 if (system_state == SYSTEM_RUNNING) vdso_data->processorCount++; + + vdso_getcpu_init(); #endif - ipi_call_lock(); notify_cpu_starting(cpu); set_cpu_online(cpu, true); /* Update sibling maps */ @@ -601,7 +603,6 @@ void __devinit start_secondary(void *unused) of_node_put(np); } of_node_put(l2_cache); - ipi_call_unlock(); local_irq_enable(); diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index 9eb5b9b536a7..b67db22e102d 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -706,6 +706,34 @@ static void __init vdso_setup_syscall_map(void) } } +#ifdef CONFIG_PPC64 +int __cpuinit vdso_getcpu_init(void) +{ + unsigned long cpu, node, val; + + /* + * SPRG3 contains the CPU in the bottom 16 bits and the NUMA node in + * the next 16 bits. The VDSO uses this to implement getcpu(). + */ + cpu = get_cpu(); + WARN_ON_ONCE(cpu > 0xffff); + + node = cpu_to_node(cpu); + WARN_ON_ONCE(node > 0xffff); + + val = (cpu & 0xfff) | ((node & 0xffff) << 16); + mtspr(SPRN_SPRG3, val); +#ifdef CONFIG_KVM_BOOK3S_HANDLER + get_paca()->kvm_hstate.sprg3 = val; +#endif + + put_cpu(); + + return 0; +} +/* We need to call this before SMP init */ +early_initcall(vdso_getcpu_init); +#endif static int __init vdso_init(void) { diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile index 9a7946c41738..53e6c9b979ec 100644 --- a/arch/powerpc/kernel/vdso32/Makefile +++ b/arch/powerpc/kernel/vdso32/Makefile @@ -1,7 +1,9 @@ # List of files in the vdso, has to be asm only for now -obj-vdso32 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o +obj-vdso32-$(CONFIG_PPC64) = getcpu.o +obj-vdso32 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o \ + $(obj-vdso32-y) # Build rules diff --git a/arch/powerpc/kernel/vdso32/getcpu.S b/arch/powerpc/kernel/vdso32/getcpu.S new file mode 100644 index 000000000000..47afd08c90f7 --- /dev/null +++ b/arch/powerpc/kernel/vdso32/getcpu.S @@ -0,0 +1,45 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2012 + * + * Author: Anton Blanchard <anton@au.ibm.com> + */ +#include <asm/ppc_asm.h> +#include <asm/vdso.h> + + .text +/* + * Exact prototype of getcpu + * + * int __kernel_getcpu(unsigned *cpu, unsigned *node); + * + */ +V_FUNCTION_BEGIN(__kernel_getcpu) + .cfi_startproc + mfspr r5,SPRN_USPRG3 + cmpdi cr0,r3,0 + cmpdi cr1,r4,0 + clrlwi r6,r5,16 + rlwinm r7,r5,16,31-15,31-0 + beq cr0,1f + stw r6,0(r3) +1: beq cr1,2f + stw r7,0(r4) +2: crclr cr0*4+so + li r3,0 /* always success */ + blr + .cfi_endproc +V_FUNCTION_END(__kernel_getcpu) diff --git a/arch/powerpc/kernel/vdso32/vdso32.lds.S b/arch/powerpc/kernel/vdso32/vdso32.lds.S index 0546bcd49cd0..43200ba2e570 100644 --- a/arch/powerpc/kernel/vdso32/vdso32.lds.S +++ b/arch/powerpc/kernel/vdso32/vdso32.lds.S @@ -147,6 +147,9 @@ VERSION __kernel_sync_dicache_p5; __kernel_sigtramp32; __kernel_sigtramp_rt32; +#ifdef CONFIG_PPC64 + __kernel_getcpu; +#endif local: *; }; diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile index 8c500d8622e4..effca9404b17 100644 --- a/arch/powerpc/kernel/vdso64/Makefile +++ b/arch/powerpc/kernel/vdso64/Makefile @@ -1,6 +1,6 @@ # List of files in the vdso, has to be asm only for now -obj-vdso64 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o +obj-vdso64 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o getcpu.o # Build rules diff --git a/arch/powerpc/kernel/vdso64/getcpu.S b/arch/powerpc/kernel/vdso64/getcpu.S new file mode 100644 index 000000000000..47afd08c90f7 --- /dev/null +++ b/arch/powerpc/kernel/vdso64/getcpu.S @@ -0,0 +1,45 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2012 + * + * Author: Anton Blanchard <anton@au.ibm.com> + */ +#include <asm/ppc_asm.h> +#include <asm/vdso.h> + + .text +/* + * Exact prototype of getcpu + * + * int __kernel_getcpu(unsigned *cpu, unsigned *node); + * + */ +V_FUNCTION_BEGIN(__kernel_getcpu) + .cfi_startproc + mfspr r5,SPRN_USPRG3 + cmpdi cr0,r3,0 + cmpdi cr1,r4,0 + clrlwi r6,r5,16 + rlwinm r7,r5,16,31-15,31-0 + beq cr0,1f + stw r6,0(r3) +1: beq cr1,2f + stw r7,0(r4) +2: crclr cr0*4+so + li r3,0 /* always success */ + blr + .cfi_endproc +V_FUNCTION_END(__kernel_getcpu) diff --git a/arch/powerpc/kernel/vdso64/vdso64.lds.S b/arch/powerpc/kernel/vdso64/vdso64.lds.S index 0e615404e247..e6c1758f3588 100644 --- a/arch/powerpc/kernel/vdso64/vdso64.lds.S +++ b/arch/powerpc/kernel/vdso64/vdso64.lds.S @@ -146,6 +146,7 @@ VERSION __kernel_sync_dicache; __kernel_sync_dicache_p5; __kernel_sigtramp_rt64; + __kernel_getcpu; local: *; }; diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c index cb87301ccd55..02b32216bbc3 100644 --- a/arch/powerpc/kernel/vio.c +++ b/arch/powerpc/kernel/vio.c @@ -37,8 +37,6 @@ #include <asm/page.h> #include <asm/hvcall.h> -static struct bus_type vio_bus_type; - static struct vio_dev vio_bus_device = { /* fake "parent" device */ .name = "vio", .type = "", @@ -613,6 +611,7 @@ static u64 vio_dma_get_required_mask(struct device *dev) struct dma_map_ops vio_dma_mapping_ops = { .alloc = vio_dma_iommu_alloc_coherent, .free = vio_dma_iommu_free_coherent, + .mmap = dma_direct_mmap_coherent, .map_sg = vio_dma_iommu_map_sg, .unmap_sg = vio_dma_iommu_unmap_sg, .map_page = vio_dma_iommu_map_page, @@ -625,7 +624,7 @@ struct dma_map_ops vio_dma_mapping_ops = { * vio_cmo_set_dev_desired - Set desired entitlement for a device * * @viodev: struct vio_dev for device to alter - * @new_desired: new desired entitlement level in bytes + * @desired: new desired entitlement level in bytes * * For use by devices to request a change to their entitlement at runtime or * through sysfs. The desired entitlement level is changed and a balancing @@ -1262,7 +1261,7 @@ static int vio_bus_remove(struct device *dev) /** * vio_register_driver: - Register a new vio driver - * @drv: The vio_driver structure to be registered. + * @viodrv: The vio_driver structure to be registered. */ int __vio_register_driver(struct vio_driver *viodrv, struct module *owner, const char *mod_name) @@ -1282,7 +1281,7 @@ EXPORT_SYMBOL(__vio_register_driver); /** * vio_unregister_driver - Remove registration of vio driver. - * @driver: The vio_driver struct to be removed form registration + * @viodrv: The vio_driver struct to be removed form registration */ void vio_unregister_driver(struct vio_driver *viodrv) { @@ -1296,8 +1295,7 @@ static void __devinit vio_dev_release(struct device *dev) struct iommu_table *tbl = get_iommu_table_base(dev); if (tbl) - iommu_free_table(tbl, dev->of_node ? - dev->of_node->full_name : dev_name(dev)); + iommu_free_table(tbl, of_node_full_name(dev->of_node)); of_node_put(dev->of_node); kfree(to_vio_dev(dev)); } @@ -1397,21 +1395,27 @@ struct vio_dev *vio_register_device_node(struct device_node *of_node) viodev->name = of_node->name; viodev->dev.of_node = of_node_get(of_node); - if (firmware_has_feature(FW_FEATURE_CMO)) - vio_cmo_set_dma_ops(viodev); - else - set_dma_ops(&viodev->dev, &dma_iommu_ops); - set_iommu_table_base(&viodev->dev, vio_build_iommu_table(viodev)); set_dev_node(&viodev->dev, of_node_to_nid(of_node)); /* init generic 'struct device' fields: */ viodev->dev.parent = &vio_bus_device.dev; viodev->dev.bus = &vio_bus_type; viodev->dev.release = vio_dev_release; - /* needed to ensure proper operation of coherent allocations - * later, in case driver doesn't set it explicitly */ - dma_set_mask(&viodev->dev, DMA_BIT_MASK(64)); - dma_set_coherent_mask(&viodev->dev, DMA_BIT_MASK(64)); + + if (of_get_property(viodev->dev.of_node, "ibm,my-dma-window", NULL)) { + if (firmware_has_feature(FW_FEATURE_CMO)) + vio_cmo_set_dma_ops(viodev); + else + set_dma_ops(&viodev->dev, &dma_iommu_ops); + + set_iommu_table_base(&viodev->dev, + vio_build_iommu_table(viodev)); + + /* needed to ensure proper operation of coherent allocations + * later, in case driver doesn't set it explicitly */ + dma_set_mask(&viodev->dev, DMA_BIT_MASK(64)); + dma_set_coherent_mask(&viodev->dev, DMA_BIT_MASK(64)); + } /* register with generic device framework */ if (device_register(&viodev->dev)) { @@ -1491,12 +1495,18 @@ static int __init vio_bus_init(void) if (firmware_has_feature(FW_FEATURE_CMO)) vio_cmo_bus_init(); + return 0; +} +postcore_initcall(vio_bus_init); + +static int __init vio_device_init(void) +{ vio_bus_scan_register_devices("vdevice"); vio_bus_scan_register_devices("ibm,platform-facilities"); return 0; } -__initcall(vio_bus_init); +device_initcall(vio_device_init); static ssize_t name_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -1509,7 +1519,7 @@ static ssize_t devspec_show(struct device *dev, { struct device_node *of_node = dev->of_node; - return sprintf(buf, "%s\n", of_node ? of_node->full_name : "none"); + return sprintf(buf, "%s\n", of_node_full_name(of_node)); } static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, @@ -1568,7 +1578,7 @@ static int vio_hotplug(struct device *dev, struct kobj_uevent_env *env) return 0; } -static struct bus_type vio_bus_type = { +struct bus_type vio_bus_type = { .name = "vio", .dev_attrs = vio_dev_attrs, .uevent = vio_hotplug, |