diff options
author | Michal Marek <mmarek@suse.cz> | 2010-08-04 13:59:13 +0200 |
---|---|---|
committer | Michal Marek <mmarek@suse.cz> | 2010-08-04 13:59:13 +0200 |
commit | 772320e84588dcbe1600ffb83e5f328f2209ac2a (patch) | |
tree | a7de21b79340aeaa17c58126f6b801b82c77b53a /arch/powerpc/kernel | |
parent | modpost: support objects with more than 64k sections (diff) | |
parent | Linux 2.6.35 (diff) | |
download | linux-772320e84588dcbe1600ffb83e5f328f2209ac2a.tar.xz linux-772320e84588dcbe1600ffb83e5f328f2209ac2a.zip |
Merge commit 'v2.6.35' into kbuild/kbuild
Conflicts:
arch/powerpc/Makefile
Diffstat (limited to 'arch/powerpc/kernel')
72 files changed, 4011 insertions, 2135 deletions
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index c002b0410219..58d0572de6f9 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -57,8 +57,12 @@ obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_E500) += idle_e500.o obj-$(CONFIG_6xx) += idle_6xx.o l2cr_6xx.o cpu_setup_6xx.o obj-$(CONFIG_TAU) += tau_6xx.o -obj-$(CONFIG_HIBERNATION) += swsusp.o suspend.o \ - swsusp_$(CONFIG_WORD_SIZE).o +obj-$(CONFIG_HIBERNATION) += swsusp.o suspend.o +ifeq ($(CONFIG_FSL_BOOKE),y) +obj-$(CONFIG_HIBERNATION) += swsusp_booke.o +else +obj-$(CONFIG_HIBERNATION) += swsusp_$(CONFIG_WORD_SIZE).o +endif obj64-$(CONFIG_HIBERNATION) += swsusp_asm64.o obj-$(CONFIG_MODULES) += module.o module_$(CONFIG_WORD_SIZE).o obj-$(CONFIG_44x) += cpu_setup_44x.o @@ -98,11 +102,16 @@ obj64-$(CONFIG_AUDIT) += compat_audit.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o -obj-$(CONFIG_PPC_PERF_CTRS) += perf_event.o perf_callchain.o +obj-$(CONFIG_PERF_EVENTS) += perf_callchain.o + +obj-$(CONFIG_PPC_PERF_CTRS) += perf_event.o obj64-$(CONFIG_PPC_PERF_CTRS) += power4-pmu.o ppc970-pmu.o power5-pmu.o \ power5+-pmu.o power6-pmu.o power7-pmu.o obj32-$(CONFIG_PPC_PERF_CTRS) += mpc7450-pmu.o +obj-$(CONFIG_FSL_EMB_PERF_EVENT) += perf_event_fsl_emb.o +obj-$(CONFIG_FSL_EMB_PERF_EVENT_E500) += e500-pmu.o + obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o ifneq ($(CONFIG_PPC_INDIRECT_IO),y) diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index a6c2b63227b3..496cc5b3984f 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -50,6 +50,9 @@ #endif #ifdef CONFIG_KVM #include <linux/kvm_host.h> +#ifndef CONFIG_BOOKE +#include <asm/kvm_book3s.h> +#endif #endif #ifdef CONFIG_PPC32 @@ -105,6 +108,9 @@ int main(void) DEFINE(THREAD_USED_SPE, offsetof(struct thread_struct, used_spe)); #endif /* CONFIG_SPE */ #endif /* CONFIG_PPC64 */ +#ifdef CONFIG_KVM_BOOK3S_32_HANDLER + DEFINE(THREAD_KVM_SVCPU, offsetof(struct thread_struct, kvm_shadow_vcpu)); +#endif DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); DEFINE(TI_LOCAL_FLAGS, offsetof(struct thread_info, local_flags)); @@ -133,7 +139,6 @@ int main(void) DEFINE(PACAKMSR, offsetof(struct paca_struct, kernel_msr)); DEFINE(PACASOFTIRQEN, offsetof(struct paca_struct, soft_enabled)); DEFINE(PACAHARDIRQEN, offsetof(struct paca_struct, hard_enabled)); - DEFINE(PACAPERFPEND, offsetof(struct paca_struct, perf_event_pending)); DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id)); #ifdef CONFIG_PPC_MM_SLICES DEFINE(PACALOWSLICESPSIZE, offsetof(struct paca_struct, @@ -184,6 +189,7 @@ int main(void) #endif /* CONFIG_PPC_STD_MMU_64 */ DEFINE(PACAEMERGSP, offsetof(struct paca_struct, emergency_sp)); DEFINE(PACAHWCPUID, offsetof(struct paca_struct, hw_cpu_id)); + DEFINE(PACAKEXECSTATE, offsetof(struct paca_struct, kexec_state)); DEFINE(PACA_STARTPURR, offsetof(struct paca_struct, startpurr)); DEFINE(PACA_STARTSPURR, offsetof(struct paca_struct, startspurr)); DEFINE(PACA_USER_TIME, offsetof(struct paca_struct, user_time)); @@ -191,9 +197,9 @@ int main(void) DEFINE(PACA_DATA_OFFSET, offsetof(struct paca_struct, data_offset)); DEFINE(PACA_TRAP_SAVE, offsetof(struct paca_struct, trap_save)); #ifdef CONFIG_KVM_BOOK3S_64_HANDLER - DEFINE(PACA_KVM_IN_GUEST, offsetof(struct paca_struct, kvm_in_guest)); - DEFINE(PACA_KVM_SLB, offsetof(struct paca_struct, kvm_slb)); - DEFINE(PACA_KVM_SLB_MAX, offsetof(struct paca_struct, kvm_slb_max)); + DEFINE(PACA_KVM_SVCPU, offsetof(struct paca_struct, shadow_vcpu)); + DEFINE(SVCPU_SLB, offsetof(struct kvmppc_book3s_shadow_vcpu, slb)); + DEFINE(SVCPU_SLB_MAX, offsetof(struct kvmppc_book3s_shadow_vcpu, slb_max)); #endif #endif /* CONFIG_PPC64 */ @@ -204,8 +210,8 @@ int main(void) /* Interrupt register frame */ DEFINE(STACK_FRAME_OVERHEAD, STACK_FRAME_OVERHEAD); DEFINE(INT_FRAME_SIZE, STACK_INT_FRAME_SIZE); -#ifdef CONFIG_PPC64 DEFINE(SWITCH_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs)); +#ifdef CONFIG_PPC64 /* Create extra stack space for SRR0 and SRR1 when calling prom/rtas. */ DEFINE(PROM_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16); DEFINE(RTAS_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16); @@ -388,11 +394,6 @@ int main(void) DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack)); DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid)); DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr)); - DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr)); - DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr)); - DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer)); - DEFINE(VCPU_CTR, offsetof(struct kvm_vcpu, arch.ctr)); - DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc)); DEFINE(VCPU_MSR, offsetof(struct kvm_vcpu, arch.msr)); DEFINE(VCPU_SPRG4, offsetof(struct kvm_vcpu, arch.sprg4)); DEFINE(VCPU_SPRG5, offsetof(struct kvm_vcpu, arch.sprg5)); @@ -400,27 +401,81 @@ int main(void) DEFINE(VCPU_SPRG7, offsetof(struct kvm_vcpu, arch.sprg7)); DEFINE(VCPU_SHADOW_PID, offsetof(struct kvm_vcpu, arch.shadow_pid)); - DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst)); - DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear)); - DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr)); - - /* book3s_64 */ -#ifdef CONFIG_PPC64 - DEFINE(VCPU_FAULT_DSISR, offsetof(struct kvm_vcpu, arch.fault_dsisr)); + /* book3s */ +#ifdef CONFIG_PPC_BOOK3S DEFINE(VCPU_HOST_RETIP, offsetof(struct kvm_vcpu, arch.host_retip)); - DEFINE(VCPU_HOST_R2, offsetof(struct kvm_vcpu, arch.host_r2)); DEFINE(VCPU_HOST_MSR, offsetof(struct kvm_vcpu, arch.host_msr)); DEFINE(VCPU_SHADOW_MSR, offsetof(struct kvm_vcpu, arch.shadow_msr)); DEFINE(VCPU_TRAMPOLINE_LOWMEM, offsetof(struct kvm_vcpu, arch.trampoline_lowmem)); DEFINE(VCPU_TRAMPOLINE_ENTER, offsetof(struct kvm_vcpu, arch.trampoline_enter)); DEFINE(VCPU_HIGHMEM_HANDLER, offsetof(struct kvm_vcpu, arch.highmem_handler)); + DEFINE(VCPU_RMCALL, offsetof(struct kvm_vcpu, arch.rmcall)); DEFINE(VCPU_HFLAGS, offsetof(struct kvm_vcpu, arch.hflags)); + DEFINE(VCPU_SVCPU, offsetof(struct kvmppc_vcpu_book3s, shadow_vcpu) - + offsetof(struct kvmppc_vcpu_book3s, vcpu)); + DEFINE(SVCPU_CR, offsetof(struct kvmppc_book3s_shadow_vcpu, cr)); + DEFINE(SVCPU_XER, offsetof(struct kvmppc_book3s_shadow_vcpu, xer)); + DEFINE(SVCPU_CTR, offsetof(struct kvmppc_book3s_shadow_vcpu, ctr)); + DEFINE(SVCPU_LR, offsetof(struct kvmppc_book3s_shadow_vcpu, lr)); + DEFINE(SVCPU_PC, offsetof(struct kvmppc_book3s_shadow_vcpu, pc)); + DEFINE(SVCPU_R0, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[0])); + DEFINE(SVCPU_R1, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[1])); + DEFINE(SVCPU_R2, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[2])); + DEFINE(SVCPU_R3, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[3])); + DEFINE(SVCPU_R4, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[4])); + DEFINE(SVCPU_R5, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[5])); + DEFINE(SVCPU_R6, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[6])); + DEFINE(SVCPU_R7, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[7])); + DEFINE(SVCPU_R8, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[8])); + DEFINE(SVCPU_R9, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[9])); + DEFINE(SVCPU_R10, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[10])); + DEFINE(SVCPU_R11, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[11])); + DEFINE(SVCPU_R12, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[12])); + DEFINE(SVCPU_R13, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[13])); + DEFINE(SVCPU_HOST_R1, offsetof(struct kvmppc_book3s_shadow_vcpu, host_r1)); + DEFINE(SVCPU_HOST_R2, offsetof(struct kvmppc_book3s_shadow_vcpu, host_r2)); + DEFINE(SVCPU_VMHANDLER, offsetof(struct kvmppc_book3s_shadow_vcpu, + vmhandler)); + DEFINE(SVCPU_SCRATCH0, offsetof(struct kvmppc_book3s_shadow_vcpu, + scratch0)); + DEFINE(SVCPU_SCRATCH1, offsetof(struct kvmppc_book3s_shadow_vcpu, + scratch1)); + DEFINE(SVCPU_IN_GUEST, offsetof(struct kvmppc_book3s_shadow_vcpu, + in_guest)); + DEFINE(SVCPU_FAULT_DSISR, offsetof(struct kvmppc_book3s_shadow_vcpu, + fault_dsisr)); + DEFINE(SVCPU_FAULT_DAR, offsetof(struct kvmppc_book3s_shadow_vcpu, + fault_dar)); + DEFINE(SVCPU_LAST_INST, offsetof(struct kvmppc_book3s_shadow_vcpu, + last_inst)); + DEFINE(SVCPU_SHADOW_SRR1, offsetof(struct kvmppc_book3s_shadow_vcpu, + shadow_srr1)); +#ifdef CONFIG_PPC_BOOK3S_32 + DEFINE(SVCPU_SR, offsetof(struct kvmppc_book3s_shadow_vcpu, sr)); #endif +#else + DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr)); + DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer)); + DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr)); + DEFINE(VCPU_CTR, offsetof(struct kvm_vcpu, arch.ctr)); + DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc)); + DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst)); + DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear)); + DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr)); +#endif /* CONFIG_PPC_BOOK3S */ #endif #ifdef CONFIG_44x DEFINE(PGD_T_LOG2, PGD_T_LOG2); DEFINE(PTE_T_LOG2, PTE_T_LOG2); #endif +#ifdef CONFIG_FSL_BOOKE + DEFINE(TLBCAM_SIZE, sizeof(struct tlbcam)); + DEFINE(TLBCAM_MAS0, offsetof(struct tlbcam, MAS0)); + DEFINE(TLBCAM_MAS1, offsetof(struct tlbcam, MAS1)); + DEFINE(TLBCAM_MAS2, offsetof(struct tlbcam, MAS2)); + DEFINE(TLBCAM_MAS3, offsetof(struct tlbcam, MAS3)); + DEFINE(TLBCAM_MAS7, offsetof(struct tlbcam, MAS7)); +#endif #ifdef CONFIG_KVM_EXIT_TIMING DEFINE(VCPU_TIMING_EXIT_TBU, offsetof(struct kvm_vcpu, diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c index 26e58630ed7b..625942ae5585 100644 --- a/arch/powerpc/kernel/btext.c +++ b/arch/powerpc/kernel/btext.c @@ -7,7 +7,7 @@ #include <linux/string.h> #include <linux/init.h> #include <linux/module.h> -#include <linux/lmb.h> +#include <linux/memblock.h> #include <asm/sections.h> #include <asm/prom.h> diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c index bb37b1d19a58..a3c684b4c862 100644 --- a/arch/powerpc/kernel/cacheinfo.c +++ b/arch/powerpc/kernel/cacheinfo.c @@ -19,6 +19,7 @@ #include <linux/notifier.h> #include <linux/of.h> #include <linux/percpu.h> +#include <linux/slab.h> #include <asm/prom.h> #include "cacheinfo.h" @@ -642,7 +643,7 @@ static struct kobj_attribute *cache_index_opt_attrs[] = { &cache_assoc_attr, }; -static struct sysfs_ops cache_index_ops = { +static const struct sysfs_ops cache_index_ops = { .show = cache_index_show, }; diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 2fc82bac3bbc..87aa0f3c6047 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -1701,6 +1701,35 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_440A, .platform = "ppc440", }, + { /* 476 core */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x11a50000, + .cpu_name = "476", + .cpu_features = CPU_FTRS_47X, + .cpu_user_features = COMMON_USER_BOOKE | + PPC_FEATURE_HAS_FPU, + .mmu_features = MMU_FTR_TYPE_47x | + MMU_FTR_USE_TLBIVAX_BCAST | MMU_FTR_LOCK_BCAST_INVAL, + .icache_bsize = 32, + .dcache_bsize = 128, + .machine_check = machine_check_47x, + .platform = "ppc470", + }, + { /* 476 iss */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00050000, + .cpu_name = "476", + .cpu_features = CPU_FTRS_47X, + .cpu_user_features = COMMON_USER_BOOKE | + PPC_FEATURE_HAS_FPU, + .cpu_user_features = COMMON_USER_BOOKE, + .mmu_features = MMU_FTR_TYPE_47x | + MMU_FTR_USE_TLBIVAX_BCAST | MMU_FTR_LOCK_BCAST_INVAL, + .icache_bsize = 32, + .dcache_bsize = 128, + .machine_check = machine_check_47x, + .platform = "ppc470", + }, { /* default match */ .pvr_mask = 0x00000000, .pvr_value = 0x00000000, @@ -1808,10 +1837,10 @@ static struct cpu_spec __initdata cpu_specs[] = { .icache_bsize = 64, .dcache_bsize = 64, .num_pmcs = 4, - .oprofile_cpu_type = "ppc/e500", /* xxx - galak, e500mc? */ + .oprofile_cpu_type = "ppc/e500mc", .oprofile_type = PPC_OPROFILE_FSL_EMB, .cpu_setup = __setup_cpu_e500mc, - .machine_check = machine_check_e500, + .machine_check = machine_check_e500mc, .platform = "ppce500mc", }, { /* default match */ diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c index 6f4613dd05ef..417f7b05a9ce 100644 --- a/arch/powerpc/kernel/crash.c +++ b/arch/powerpc/kernel/crash.c @@ -24,7 +24,7 @@ #include <linux/init.h> #include <linux/irq.h> #include <linux/types.h> -#include <linux/lmb.h> +#include <linux/memblock.h> #include <asm/processor.h> #include <asm/machdep.h> @@ -162,6 +162,34 @@ static void crash_kexec_prepare_cpus(int cpu) /* Leave the IPI callback set */ } +/* wait for all the CPUs to hit real mode but timeout if they don't come in */ +#ifdef CONFIG_PPC_STD_MMU_64 +static void crash_kexec_wait_realmode(int cpu) +{ + unsigned int msecs; + int i; + + msecs = 10000; + for (i=0; i < NR_CPUS && msecs > 0; i++) { + if (i == cpu) + continue; + + while (paca[i].kexec_state < KEXEC_STATE_REAL_MODE) { + barrier(); + if (!cpu_possible(i)) { + break; + } + if (!cpu_online(i)) { + break; + } + msecs--; + mdelay(1); + } + } + mb(); +} +#endif + /* * This function will be called by secondary cpus or by kexec cpu * if soft-reset is activated to stop some CPUs. @@ -347,10 +375,12 @@ int crash_shutdown_unregister(crash_shutdown_t handler) EXPORT_SYMBOL(crash_shutdown_unregister); static unsigned long crash_shutdown_buf[JMP_BUF_LEN]; +static int crash_shutdown_cpu = -1; static int handle_fault(struct pt_regs *regs) { - longjmp(crash_shutdown_buf, 1); + if (crash_shutdown_cpu == smp_processor_id()) + longjmp(crash_shutdown_buf, 1); return 0; } @@ -375,11 +405,14 @@ void default_machine_crash_shutdown(struct pt_regs *regs) for_each_irq(i) { struct irq_desc *desc = irq_to_desc(i); + if (!desc || !desc->chip || !desc->chip->eoi) + continue; + if (desc->status & IRQ_INPROGRESS) desc->chip->eoi(i); if (!(desc->status & IRQ_DISABLED)) - desc->chip->disable(i); + desc->chip->shutdown(i); } /* @@ -388,6 +421,7 @@ void default_machine_crash_shutdown(struct pt_regs *regs) */ old_handler = __debugger_fault_handler; __debugger_fault_handler = handle_fault; + crash_shutdown_cpu = smp_processor_id(); for (i = 0; crash_shutdown_handles[i]; i++) { if (setjmp(crash_shutdown_buf) == 0) { /* @@ -401,6 +435,7 @@ void default_machine_crash_shutdown(struct pt_regs *regs) asm volatile("sync; isync"); } } + crash_shutdown_cpu = -1; __debugger_fault_handler = old_handler; /* @@ -412,6 +447,9 @@ void default_machine_crash_shutdown(struct pt_regs *regs) crash_kexec_prepare_cpus(crashing_cpu); cpu_set(crashing_cpu, cpus_in_crash); crash_kexec_stop_spus(); +#if defined(CONFIG_PPC_STD_MMU_64) && defined(CONFIG_SMP) + crash_kexec_wait_realmode(crashing_cpu); +#endif if (ppc_md.kexec_cpu_down) ppc_md.kexec_cpu_down(1, 0); } diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c index 5fb667a60894..40f524643ba6 100644 --- a/arch/powerpc/kernel/crash_dump.c +++ b/arch/powerpc/kernel/crash_dump.c @@ -13,7 +13,7 @@ #include <linux/crash_dump.h> #include <linux/bootmem.h> -#include <linux/lmb.h> +#include <linux/memblock.h> #include <asm/code-patching.h> #include <asm/kdump.h> #include <asm/prom.h> @@ -33,7 +33,7 @@ unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX; #ifndef CONFIG_RELOCATABLE void __init reserve_kdump_trampoline(void) { - lmb_reserve(0, KDUMP_RESERVE_LIMIT); + memblock_reserve(0, KDUMP_RESERVE_LIMIT); } static void __init create_trampoline(unsigned long addr) diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c index 59c928564a03..02f724f36753 100644 --- a/arch/powerpc/kernel/dma-swiotlb.c +++ b/arch/powerpc/kernel/dma-swiotlb.c @@ -1,7 +1,8 @@ /* * Contains routines needed to support swiotlb for ppc. * - * Copyright (C) 2009 Becky Bruce, Freescale Semiconductor + * Copyright (C) 2009-2010 Freescale Semiconductor, Inc. + * Author: Becky Bruce * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -38,8 +39,8 @@ struct dma_map_ops swiotlb_dma_ops = { .dma_supported = swiotlb_dma_supported, .map_page = swiotlb_map_page, .unmap_page = swiotlb_unmap_page, - .sync_single_range_for_cpu = swiotlb_sync_single_range_for_cpu, - .sync_single_range_for_device = swiotlb_sync_single_range_for_device, + .sync_single_for_cpu = swiotlb_sync_single_for_cpu, + .sync_single_for_device = swiotlb_sync_single_for_device, .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, .sync_sg_for_device = swiotlb_sync_sg_for_device, .mapping_error = swiotlb_dma_mapping_error, @@ -70,7 +71,7 @@ static int ppc_swiotlb_bus_notify(struct notifier_block *nb, sd->max_direct_dma_addr = 0; /* May need to bounce if the device can't address all of DRAM */ - if (dma_get_mask(dev) < lmb_end_of_DRAM()) + if ((dma_get_mask(dev) + 1) < memblock_end_of_DRAM()) set_dma_ops(dev, &swiotlb_dma_ops); return NOTIFY_DONE; diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c index 6215062caf8c..84d6367ec003 100644 --- a/arch/powerpc/kernel/dma.c +++ b/arch/powerpc/kernel/dma.c @@ -8,7 +8,8 @@ #include <linux/device.h> #include <linux/dma-mapping.h> #include <linux/dma-debug.h> -#include <linux/lmb.h> +#include <linux/gfp.h> +#include <linux/memblock.h> #include <asm/bug.h> #include <asm/abs_addr.h> @@ -88,7 +89,7 @@ static int dma_direct_dma_supported(struct device *dev, u64 mask) /* Could be improved so platforms can set the limit in case * they have limited DMA windows */ - return mask >= (lmb_end_of_DRAM() - 1); + return mask >= (memblock_end_of_DRAM() - 1); #else return 1; #endif @@ -126,11 +127,11 @@ static inline void dma_direct_sync_sg(struct device *dev, __dma_sync_page(sg_page(sg), sg->offset, sg->length, direction); } -static inline void dma_direct_sync_single_range(struct device *dev, - dma_addr_t dma_handle, unsigned long offset, size_t size, - enum dma_data_direction direction) +static inline void dma_direct_sync_single(struct device *dev, + dma_addr_t dma_handle, size_t size, + enum dma_data_direction direction) { - __dma_sync(bus_to_virt(dma_handle+offset), size, direction); + __dma_sync(bus_to_virt(dma_handle), size, direction); } #endif @@ -143,8 +144,8 @@ struct dma_map_ops dma_direct_ops = { .map_page = dma_direct_map_page, .unmap_page = dma_direct_unmap_page, #ifdef CONFIG_NOT_COHERENT_CACHE - .sync_single_range_for_cpu = dma_direct_sync_single_range, - .sync_single_range_for_device = dma_direct_sync_single_range, + .sync_single_for_cpu = dma_direct_sync_single, + .sync_single_for_device = dma_direct_sync_single, .sync_sg_for_cpu = dma_direct_sync_sg, .sync_sg_for_device = dma_direct_sync_sg, #endif diff --git a/arch/powerpc/kernel/e500-pmu.c b/arch/powerpc/kernel/e500-pmu.c new file mode 100644 index 000000000000..7c07de0d8943 --- /dev/null +++ b/arch/powerpc/kernel/e500-pmu.c @@ -0,0 +1,129 @@ +/* + * Performance counter support for e500 family processors. + * + * Copyright 2008-2009 Paul Mackerras, IBM Corporation. + * Copyright 2010 Freescale Semiconductor, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <linux/string.h> +#include <linux/perf_event.h> +#include <asm/reg.h> +#include <asm/cputable.h> + +/* + * Map of generic hardware event types to hardware events + * Zero if unsupported + */ +static int e500_generic_events[] = { + [PERF_COUNT_HW_CPU_CYCLES] = 1, + [PERF_COUNT_HW_INSTRUCTIONS] = 2, + [PERF_COUNT_HW_CACHE_MISSES] = 41, /* Data L1 cache reloads */ + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 12, + [PERF_COUNT_HW_BRANCH_MISSES] = 15, +}; + +#define C(x) PERF_COUNT_HW_CACHE_##x + +/* + * Table of generalized cache-related events. + * 0 means not supported, -1 means nonsensical, other values + * are event codes. + */ +static int e500_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { + /* + * D-cache misses are not split into read/write/prefetch; + * use raw event 41. + */ + [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(OP_READ)] = { 27, 0 }, + [C(OP_WRITE)] = { 28, 0 }, + [C(OP_PREFETCH)] = { 29, 0 }, + }, + [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(OP_READ)] = { 2, 60 }, + [C(OP_WRITE)] = { -1, -1 }, + [C(OP_PREFETCH)] = { 0, 0 }, + }, + /* + * Assuming LL means L2, it's not a good match for this model. + * It allocates only on L1 castout or explicit prefetch, and + * does not have separate read/write events (but it does have + * separate instruction/data events). + */ + [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(OP_READ)] = { 0, 0 }, + [C(OP_WRITE)] = { 0, 0 }, + [C(OP_PREFETCH)] = { 0, 0 }, + }, + /* + * There are data/instruction MMU misses, but that's a miss on + * the chip's internal level-one TLB which is probably not + * what the user wants. Instead, unified level-two TLB misses + * are reported here. + */ + [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(OP_READ)] = { 26, 66 }, + [C(OP_WRITE)] = { -1, -1 }, + [C(OP_PREFETCH)] = { -1, -1 }, + }, + [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(OP_READ)] = { 12, 15 }, + [C(OP_WRITE)] = { -1, -1 }, + [C(OP_PREFETCH)] = { -1, -1 }, + }, +}; + +static int num_events = 128; + +/* Upper half of event id is PMLCb, for threshold events */ +static u64 e500_xlate_event(u64 event_id) +{ + u32 event_low = (u32)event_id; + u64 ret; + + if (event_low >= num_events) + return 0; + + ret = FSL_EMB_EVENT_VALID; + + if (event_low >= 76 && event_low <= 81) { + ret |= FSL_EMB_EVENT_RESTRICTED; + ret |= event_id & + (FSL_EMB_EVENT_THRESHMUL | FSL_EMB_EVENT_THRESH); + } else if (event_id & + (FSL_EMB_EVENT_THRESHMUL | FSL_EMB_EVENT_THRESH)) { + /* Threshold requested on non-threshold event */ + return 0; + } + + return ret; +} + +static struct fsl_emb_pmu e500_pmu = { + .name = "e500 family", + .n_counter = 4, + .n_restricted = 2, + .xlate_event = e500_xlate_event, + .n_generic = ARRAY_SIZE(e500_generic_events), + .generic_events = e500_generic_events, + .cache_events = &e500_cache_events, +}; + +static int init_e500_pmu(void) +{ + if (!cur_cpu_spec->oprofile_cpu_type) + return -ENODEV; + + if (!strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc/e500mc")) + num_events = 256; + else if (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc/e500")) + return -ENODEV; + + return register_fsl_emb_pmu(&e500_pmu); +} + +arch_initcall(init_e500_pmu); diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 1175a8539e6c..ed4aeb96398b 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -373,11 +373,13 @@ syscall_exit_cont: bnel- load_dbcr0 #endif #ifdef CONFIG_44x +BEGIN_MMU_FTR_SECTION lis r4,icache_44x_need_flush@ha lwz r5,icache_44x_need_flush@l(r4) cmplwi cr0,r5,0 bne- 2f 1: +END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_47x) #endif /* CONFIG_44x */ BEGIN_FTR_SECTION lwarx r7,0,r1 @@ -848,6 +850,9 @@ resume_kernel: /* interrupts are hard-disabled at this point */ restore: #ifdef CONFIG_44x +BEGIN_MMU_FTR_SECTION + b 1f +END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x) lis r4,icache_44x_need_flush@ha lwz r5,icache_44x_need_flush@l(r4) cmplwi cr0,r5,0 diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index bdcb557d470a..42e9d908914a 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -556,15 +556,6 @@ ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES) 2: TRACE_AND_RESTORE_IRQ(r5); -#ifdef CONFIG_PERF_EVENTS - /* check paca->perf_event_pending if we're enabling ints */ - lbz r3,PACAPERFPEND(r13) - and. r3,r3,r5 - beq 27f - bl .perf_event_do_pending -27: -#endif /* CONFIG_PERF_EVENTS */ - /* extract EE bit and use it to restore paca->hard_enabled */ ld r3,_MSR(r1) rldicl r4,r3,49,63 /* r0 = (r3 >> 15) & 1 */ @@ -791,9 +782,8 @@ _GLOBAL(enter_rtas) li r9,1 rldicr r9,r9,MSR_SF_LG,(63-MSR_SF_LG) - ori r9,r9,MSR_IR|MSR_DR|MSR_FE0|MSR_FE1|MSR_FP + ori r9,r9,MSR_IR|MSR_DR|MSR_FE0|MSR_FE1|MSR_FP|MSR_RI andc r6,r0,r9 - ori r6,r6,MSR_RI sync /* disable interrupts so SRR0/1 */ mtmsrd r0 /* don't get trashed */ diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index e3be98ffe2a7..3e423fbad6bc 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -735,8 +735,11 @@ _STATIC(do_hash_page) std r3,_DAR(r1) std r4,_DSISR(r1) - andis. r0,r4,0xa450 /* weird error? */ + andis. r0,r4,0xa410 /* weird error? */ bne- handle_page_fault /* if not, try to insert a HPTE */ + andis. r0,r4,DSISR_DABRMATCH@h + bne- handle_dabr_fault + BEGIN_FTR_SECTION andis. r0,r4,0x0020 /* Is it a segment table fault? */ bne- do_ste_alloc /* If so handle it */ @@ -823,6 +826,14 @@ END_FW_FTR_SECTION_IFCLR(FW_FEATURE_ISERIES) bl .raw_local_irq_restore b 11f +/* We have a data breakpoint exception - handle it */ +handle_dabr_fault: + ld r4,_DAR(r1) + ld r5,_DSISR(r1) + addi r3,r1,STACK_FRAME_OVERHEAD + bl .do_dabr + b .ret_from_except_lite + /* Here we have a page fault that hash_page can't handle. */ handle_page_fault: ENABLE_INTS diff --git a/arch/powerpc/kernel/firmware.c b/arch/powerpc/kernel/firmware.c index 1679a70bbcad..6b1f4271eb53 100644 --- a/arch/powerpc/kernel/firmware.c +++ b/arch/powerpc/kernel/firmware.c @@ -17,5 +17,5 @@ #include <asm/firmware.h> -unsigned long powerpc_firmware_features; +unsigned long powerpc_firmware_features __read_mostly; EXPORT_SYMBOL_GPL(powerpc_firmware_features); diff --git a/arch/powerpc/kernel/fsl_booke_entry_mapping.S b/arch/powerpc/kernel/fsl_booke_entry_mapping.S new file mode 100644 index 000000000000..a92c79be2728 --- /dev/null +++ b/arch/powerpc/kernel/fsl_booke_entry_mapping.S @@ -0,0 +1,235 @@ + +/* 1. Find the index of the entry we're executing in */ + bl invstr /* Find our address */ +invstr: mflr r6 /* Make it accessible */ + mfmsr r7 + rlwinm r4,r7,27,31,31 /* extract MSR[IS] */ + mfspr r7, SPRN_PID0 + slwi r7,r7,16 + or r7,r7,r4 + mtspr SPRN_MAS6,r7 + tlbsx 0,r6 /* search MSR[IS], SPID=PID0 */ + mfspr r7,SPRN_MAS1 + andis. r7,r7,MAS1_VALID@h + bne match_TLB + + mfspr r7,SPRN_MMUCFG + rlwinm r7,r7,21,28,31 /* extract MMUCFG[NPIDS] */ + cmpwi r7,3 + bne match_TLB /* skip if NPIDS != 3 */ + + mfspr r7,SPRN_PID1 + slwi r7,r7,16 + or r7,r7,r4 + mtspr SPRN_MAS6,r7 + tlbsx 0,r6 /* search MSR[IS], SPID=PID1 */ + mfspr r7,SPRN_MAS1 + andis. r7,r7,MAS1_VALID@h + bne match_TLB + mfspr r7, SPRN_PID2 + slwi r7,r7,16 + or r7,r7,r4 + mtspr SPRN_MAS6,r7 + tlbsx 0,r6 /* Fall through, we had to match */ + +match_TLB: + mfspr r7,SPRN_MAS0 + rlwinm r3,r7,16,20,31 /* Extract MAS0(Entry) */ + + mfspr r7,SPRN_MAS1 /* Insure IPROT set */ + oris r7,r7,MAS1_IPROT@h + mtspr SPRN_MAS1,r7 + tlbwe + +/* 2. Invalidate all entries except the entry we're executing in */ + mfspr r9,SPRN_TLB1CFG + andi. r9,r9,0xfff + li r6,0 /* Set Entry counter to 0 */ +1: lis r7,0x1000 /* Set MAS0(TLBSEL) = 1 */ + rlwimi r7,r6,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r6) */ + mtspr SPRN_MAS0,r7 + tlbre + mfspr r7,SPRN_MAS1 + rlwinm r7,r7,0,2,31 /* Clear MAS1 Valid and IPROT */ + cmpw r3,r6 + beq skpinv /* Dont update the current execution TLB */ + mtspr SPRN_MAS1,r7 + tlbwe + isync +skpinv: addi r6,r6,1 /* Increment */ + cmpw r6,r9 /* Are we done? */ + bne 1b /* If not, repeat */ + + /* Invalidate TLB0 */ + li r6,0x04 + tlbivax 0,r6 + TLBSYNC + /* Invalidate TLB1 */ + li r6,0x0c + tlbivax 0,r6 + TLBSYNC + +/* 3. Setup a temp mapping and jump to it */ + andi. r5, r3, 0x1 /* Find an entry not used and is non-zero */ + addi r5, r5, 0x1 + lis r7,0x1000 /* Set MAS0(TLBSEL) = 1 */ + rlwimi r7,r3,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r3) */ + mtspr SPRN_MAS0,r7 + tlbre + + /* grab and fixup the RPN */ + mfspr r6,SPRN_MAS1 /* extract MAS1[SIZE] */ + rlwinm r6,r6,25,27,31 + li r8,-1 + addi r6,r6,10 + slw r6,r8,r6 /* convert to mask */ + + bl 1f /* Find our address */ +1: mflr r7 + + mfspr r8,SPRN_MAS3 +#ifdef CONFIG_PHYS_64BIT + mfspr r23,SPRN_MAS7 +#endif + and r8,r6,r8 + subfic r9,r6,-4096 + and r9,r9,r7 + + or r25,r8,r9 + ori r8,r25,(MAS3_SX|MAS3_SW|MAS3_SR) + + /* Just modify the entry ID and EPN for the temp mapping */ + lis r7,0x1000 /* Set MAS0(TLBSEL) = 1 */ + rlwimi r7,r5,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r5) */ + mtspr SPRN_MAS0,r7 + xori r6,r4,1 /* Setup TMP mapping in the other Address space */ + slwi r6,r6,12 + oris r6,r6,(MAS1_VALID|MAS1_IPROT)@h + ori r6,r6,(MAS1_TSIZE(BOOK3E_PAGESZ_4K))@l + mtspr SPRN_MAS1,r6 + mfspr r6,SPRN_MAS2 + li r7,0 /* temp EPN = 0 */ + rlwimi r7,r6,0,20,31 + mtspr SPRN_MAS2,r7 + mtspr SPRN_MAS3,r8 + tlbwe + + xori r6,r4,1 + slwi r6,r6,5 /* setup new context with other address space */ + bl 1f /* Find our address */ +1: mflr r9 + rlwimi r7,r9,0,20,31 + addi r7,r7,(2f - 1b) + mtspr SPRN_SRR0,r7 + mtspr SPRN_SRR1,r6 + rfi +2: +/* 4. Clear out PIDs & Search info */ + li r6,0 + mtspr SPRN_MAS6,r6 + mtspr SPRN_PID0,r6 + + mfspr r7,SPRN_MMUCFG + rlwinm r7,r7,21,28,31 /* extract MMUCFG[NPIDS] */ + cmpwi r7,3 + bne 2f /* skip if NPIDS != 3 */ + + mtspr SPRN_PID1,r6 + mtspr SPRN_PID2,r6 + +/* 5. Invalidate mapping we started in */ +2: + lis r7,0x1000 /* Set MAS0(TLBSEL) = 1 */ + rlwimi r7,r3,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r3) */ + mtspr SPRN_MAS0,r7 + tlbre + mfspr r6,SPRN_MAS1 + rlwinm r6,r6,0,2,0 /* clear IPROT */ + mtspr SPRN_MAS1,r6 + tlbwe + /* Invalidate TLB1 */ + li r9,0x0c + tlbivax 0,r9 + TLBSYNC + +/* The mapping only needs to be cache-coherent on SMP */ +#ifdef CONFIG_SMP +#define M_IF_SMP MAS2_M +#else +#define M_IF_SMP 0 +#endif + +#if defined(ENTRY_MAPPING_BOOT_SETUP) + +/* 6. Setup KERNELBASE mapping in TLB1[0] */ + lis r6,0x1000 /* Set MAS0(TLBSEL) = TLB1(1), ESEL = 0 */ + mtspr SPRN_MAS0,r6 + lis r6,(MAS1_VALID|MAS1_IPROT)@h + ori r6,r6,(MAS1_TSIZE(BOOK3E_PAGESZ_64M))@l + mtspr SPRN_MAS1,r6 + lis r6,MAS2_VAL(PAGE_OFFSET, BOOK3E_PAGESZ_64M, M_IF_SMP)@h + ori r6,r6,MAS2_VAL(PAGE_OFFSET, BOOK3E_PAGESZ_64M, M_IF_SMP)@l + mtspr SPRN_MAS2,r6 + mtspr SPRN_MAS3,r8 + tlbwe + +/* 7. Jump to KERNELBASE mapping */ + lis r6,(KERNELBASE & ~0xfff)@h + ori r6,r6,(KERNELBASE & ~0xfff)@l + +#elif defined(ENTRY_MAPPING_KEXEC_SETUP) +/* + * 6. Setup a 1:1 mapping in TLB1. Esel 0 is unsued, 1 or 2 contains the tmp + * mapping so we start at 3. We setup 8 mappings, each 256MiB in size. This + * will cover the first 2GiB of memory. + */ + + lis r10, (MAS1_VALID|MAS1_IPROT)@h + ori r10,r10, (MAS1_TSIZE(BOOK3E_PAGESZ_256M))@l + li r11, 0 + li r0, 8 + mtctr r0 + +next_tlb_setup: + addi r0, r11, 3 + rlwinm r0, r0, 16, 4, 15 // Compute esel + rlwinm r9, r11, 28, 0, 3 // Compute [ER]PN + oris r0, r0, (MAS0_TLBSEL(1))@h + mtspr SPRN_MAS0,r0 + mtspr SPRN_MAS1,r10 + mtspr SPRN_MAS2,r9 + ori r9, r9, (MAS3_SX|MAS3_SW|MAS3_SR) + mtspr SPRN_MAS3,r9 + tlbwe + addi r11, r11, 1 + bdnz+ next_tlb_setup + +/* 7. Jump to our 1:1 mapping */ + mr r6, r25 +#else + #error You need to specify the mapping or not use this at all. +#endif + + lis r7,MSR_KERNEL@h + ori r7,r7,MSR_KERNEL@l + bl 1f /* Find our address */ +1: mflr r9 + rlwimi r6,r9,0,20,31 + addi r6,r6,(2f - 1b) + mtspr SPRN_SRR0,r6 + mtspr SPRN_SRR1,r7 + rfi /* start execution out of TLB1[0] entry */ + +/* 8. Clear out the temp mapping */ +2: lis r7,0x1000 /* Set MAS0(TLBSEL) = 1 */ + rlwimi r7,r5,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r5) */ + mtspr SPRN_MAS0,r7 + tlbre + mfspr r8,SPRN_MAS1 + rlwinm r8,r8,0,2,0 /* clear IPROT */ + mtspr SPRN_MAS1,r8 + tlbwe + /* Invalidate TLB1 */ + li r9,0x0c + tlbivax 0,r9 + TLBSYNC diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index e025e89fe93e..98c4b29a56f4 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -33,6 +33,7 @@ #include <asm/asm-offsets.h> #include <asm/ptrace.h> #include <asm/bug.h> +#include <asm/kvm_book3s_asm.h> /* 601 only have IBAT; cr0.eq is set on 601 when using this macro */ #define LOAD_BAT(n, reg, RA, RB) \ @@ -303,6 +304,7 @@ __secondary_hold_acknowledge: */ #define EXCEPTION(n, label, hdlr, xfer) \ . = n; \ + DO_KVM n; \ label: \ EXCEPTION_PROLOG; \ addi r3,r1,STACK_FRAME_OVERHEAD; \ @@ -358,6 +360,7 @@ i##n: \ * -- paulus. */ . = 0x200 + DO_KVM 0x200 mtspr SPRN_SPRG_SCRATCH0,r10 mtspr SPRN_SPRG_SCRATCH1,r11 mfcr r10 @@ -381,6 +384,7 @@ i##n: \ /* Data access exception. */ . = 0x300 + DO_KVM 0x300 DataAccess: EXCEPTION_PROLOG mfspr r10,SPRN_DSISR @@ -397,6 +401,7 @@ DataAccess: /* Instruction access exception. */ . = 0x400 + DO_KVM 0x400 InstructionAccess: EXCEPTION_PROLOG andis. r0,r9,0x4000 /* no pte found? */ @@ -413,6 +418,7 @@ InstructionAccess: /* Alignment exception */ . = 0x600 + DO_KVM 0x600 Alignment: EXCEPTION_PROLOG mfspr r4,SPRN_DAR @@ -427,6 +433,7 @@ Alignment: /* Floating-point unavailable */ . = 0x800 + DO_KVM 0x800 FPUnavailable: BEGIN_FTR_SECTION /* @@ -450,6 +457,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_FPU_UNAVAILABLE) /* System call */ . = 0xc00 + DO_KVM 0xc00 SystemCall: EXCEPTION_PROLOG EXC_XFER_EE_LITE(0xc00, DoSyscall) @@ -467,9 +475,11 @@ SystemCall: * by executing an altivec instruction. */ . = 0xf00 + DO_KVM 0xf00 b PerformanceMonitor . = 0xf20 + DO_KVM 0xf20 b AltiVecUnavailable /* @@ -882,6 +892,10 @@ __secondary_start: RFI #endif /* CONFIG_SMP */ +#ifdef CONFIG_KVM_BOOK3S_HANDLER +#include "../kvm/book3s_rmhandlers.S" +#endif + /* * Those generic dummy functions are kept for CPUs not * included in CONFIG_6xx diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index 711368b993f2..5ab484ef06a7 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -37,6 +37,7 @@ #include <asm/thread_info.h> #include <asm/ppc_asm.h> #include <asm/asm-offsets.h> +#include <asm/synch.h> #include "head_booke.h" @@ -69,165 +70,7 @@ _ENTRY(_start); mr r27,r7 li r24,0 /* CPU number */ -/* - * In case the firmware didn't do it, we apply some workarounds - * that are good for all 440 core variants here - */ - mfspr r3,SPRN_CCR0 - rlwinm r3,r3,0,0,27 /* disable icache prefetch */ - isync - mtspr SPRN_CCR0,r3 - isync - sync - -/* - * Set up the initial MMU state - * - * We are still executing code at the virtual address - * mappings set by the firmware for the base of RAM. - * - * We first invalidate all TLB entries but the one - * we are running from. We then load the KERNELBASE - * mappings so we can begin to use kernel addresses - * natively and so the interrupt vector locations are - * permanently pinned (necessary since Book E - * implementations always have translation enabled). - * - * TODO: Use the known TLB entry we are running from to - * determine which physical region we are located - * in. This can be used to determine where in RAM - * (on a shared CPU system) or PCI memory space - * (on a DRAMless system) we are located. - * For now, we assume a perfect world which means - * we are located at the base of DRAM (physical 0). - */ - -/* - * Search TLB for entry that we are currently using. - * Invalidate all entries but the one we are using. - */ - /* Load our current PID->MMUCR TID and MSR IS->MMUCR STS */ - mfspr r3,SPRN_PID /* Get PID */ - mfmsr r4 /* Get MSR */ - andi. r4,r4,MSR_IS@l /* TS=1? */ - beq wmmucr /* If not, leave STS=0 */ - oris r3,r3,PPC44x_MMUCR_STS@h /* Set STS=1 */ -wmmucr: mtspr SPRN_MMUCR,r3 /* Put MMUCR */ - sync - - bl invstr /* Find our address */ -invstr: mflr r5 /* Make it accessible */ - tlbsx r23,0,r5 /* Find entry we are in */ - li r4,0 /* Start at TLB entry 0 */ - li r3,0 /* Set PAGEID inval value */ -1: cmpw r23,r4 /* Is this our entry? */ - beq skpinv /* If so, skip the inval */ - tlbwe r3,r4,PPC44x_TLB_PAGEID /* If not, inval the entry */ -skpinv: addi r4,r4,1 /* Increment */ - cmpwi r4,64 /* Are we done? */ - bne 1b /* If not, repeat */ - isync /* If so, context change */ - -/* - * Configure and load pinned entry into TLB slot 63. - */ - - lis r3,PAGE_OFFSET@h - ori r3,r3,PAGE_OFFSET@l - - /* Kernel is at the base of RAM */ - li r4, 0 /* Load the kernel physical address */ - - /* Load the kernel PID = 0 */ - li r0,0 - mtspr SPRN_PID,r0 - sync - - /* Initialize MMUCR */ - li r5,0 - mtspr SPRN_MMUCR,r5 - sync - - /* pageid fields */ - clrrwi r3,r3,10 /* Mask off the effective page number */ - ori r3,r3,PPC44x_TLB_VALID | PPC44x_TLB_256M - - /* xlat fields */ - clrrwi r4,r4,10 /* Mask off the real page number */ - /* ERPN is 0 for first 4GB page */ - - /* attrib fields */ - /* Added guarded bit to protect against speculative loads/stores */ - li r5,0 - ori r5,r5,(PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_G) - - li r0,63 /* TLB slot 63 */ - - tlbwe r3,r0,PPC44x_TLB_PAGEID /* Load the pageid fields */ - tlbwe r4,r0,PPC44x_TLB_XLAT /* Load the translation fields */ - tlbwe r5,r0,PPC44x_TLB_ATTRIB /* Load the attrib/access fields */ - - /* Force context change */ - mfmsr r0 - mtspr SPRN_SRR1, r0 - lis r0,3f@h - ori r0,r0,3f@l - mtspr SPRN_SRR0,r0 - sync - rfi - - /* If necessary, invalidate original entry we used */ -3: cmpwi r23,63 - beq 4f - li r6,0 - tlbwe r6,r23,PPC44x_TLB_PAGEID - isync - -4: -#ifdef CONFIG_PPC_EARLY_DEBUG_44x - /* Add UART mapping for early debug. */ - - /* pageid fields */ - lis r3,PPC44x_EARLY_DEBUG_VIRTADDR@h - ori r3,r3,PPC44x_TLB_VALID|PPC44x_TLB_TS|PPC44x_TLB_64K - - /* xlat fields */ - lis r4,CONFIG_PPC_EARLY_DEBUG_44x_PHYSLOW@h - ori r4,r4,CONFIG_PPC_EARLY_DEBUG_44x_PHYSHIGH - - /* attrib fields */ - li r5,(PPC44x_TLB_SW|PPC44x_TLB_SR|PPC44x_TLB_I|PPC44x_TLB_G) - li r0,62 /* TLB slot 0 */ - - tlbwe r3,r0,PPC44x_TLB_PAGEID - tlbwe r4,r0,PPC44x_TLB_XLAT - tlbwe r5,r0,PPC44x_TLB_ATTRIB - - /* Force context change */ - isync -#endif /* CONFIG_PPC_EARLY_DEBUG_44x */ - - /* Establish the interrupt vector offsets */ - SET_IVOR(0, CriticalInput); - SET_IVOR(1, MachineCheck); - SET_IVOR(2, DataStorage); - SET_IVOR(3, InstructionStorage); - SET_IVOR(4, ExternalInput); - SET_IVOR(5, Alignment); - SET_IVOR(6, Program); - SET_IVOR(7, FloatingPointUnavailable); - SET_IVOR(8, SystemCall); - SET_IVOR(9, AuxillaryProcessorUnavailable); - SET_IVOR(10, Decrementer); - SET_IVOR(11, FixedIntervalTimer); - SET_IVOR(12, WatchdogTimer); - SET_IVOR(13, DataTLBError); - SET_IVOR(14, InstructionTLBError); - SET_IVOR(15, DebugCrit); - - /* Establish the interrupt vector base */ - lis r4,interrupt_base@h /* IVPR only uses the high 16-bits */ - mtspr SPRN_IVPR,r4 + bl init_cpu_state /* * This is where the main kernel code starts. @@ -349,7 +192,7 @@ interrupt_base: #endif /* Data TLB Error Interrupt */ - START_EXCEPTION(DataTLBError) + START_EXCEPTION(DataTLBError44x) mtspr SPRN_SPRG_WSCRATCH0, r10 /* Save some working registers */ mtspr SPRN_SPRG_WSCRATCH1, r11 mtspr SPRN_SPRG_WSCRATCH2, r12 @@ -440,7 +283,7 @@ tlb_44x_patch_hwater_D: mfspr r10,SPRN_DEAR /* Jump to common tlb load */ - b finish_tlb_load + b finish_tlb_load_44x 2: /* The bailout. Restore registers to pre-exception conditions @@ -460,7 +303,7 @@ tlb_44x_patch_hwater_D: * information from different registers and bailout * to a different point. */ - START_EXCEPTION(InstructionTLBError) + START_EXCEPTION(InstructionTLBError44x) mtspr SPRN_SPRG_WSCRATCH0, r10 /* Save some working registers */ mtspr SPRN_SPRG_WSCRATCH1, r11 mtspr SPRN_SPRG_WSCRATCH2, r12 @@ -536,7 +379,7 @@ tlb_44x_patch_hwater_I: mfspr r10,SPRN_SRR0 /* Jump to common TLB load point */ - b finish_tlb_load + b finish_tlb_load_44x 2: /* The bailout. Restore registers to pre-exception conditions @@ -550,15 +393,7 @@ tlb_44x_patch_hwater_I: mfspr r10, SPRN_SPRG_RSCRATCH0 b InstructionStorage - /* Debug Interrupt */ - DEBUG_CRIT_EXCEPTION - -/* - * Local functions - */ - /* - * Both the instruction and data TLB miss get to this * point to load the TLB. * r10 - EA of fault @@ -568,7 +403,7 @@ tlb_44x_patch_hwater_I: * MMUCR - loaded with proper value when we get here * Upon exit, we reload everything and RFI. */ -finish_tlb_load: +finish_tlb_load_44x: /* Combine RPN & ERPN an write WS 0 */ rlwimi r11,r12,0,0,31-PAGE_SHIFT tlbwe r11,r13,PPC44x_TLB_XLAT @@ -601,6 +436,227 @@ finish_tlb_load: mfspr r10, SPRN_SPRG_RSCRATCH0 rfi /* Force context change */ +/* TLB error interrupts for 476 + */ +#ifdef CONFIG_PPC_47x + START_EXCEPTION(DataTLBError47x) + mtspr SPRN_SPRG_WSCRATCH0,r10 /* Save some working registers */ + mtspr SPRN_SPRG_WSCRATCH1,r11 + mtspr SPRN_SPRG_WSCRATCH2,r12 + mtspr SPRN_SPRG_WSCRATCH3,r13 + mfcr r11 + mtspr SPRN_SPRG_WSCRATCH4,r11 + mfspr r10,SPRN_DEAR /* Get faulting address */ + + /* If we are faulting a kernel address, we have to use the + * kernel page tables. + */ + lis r11,PAGE_OFFSET@h + cmplw cr0,r10,r11 + blt+ 3f + lis r11,swapper_pg_dir@h + ori r11,r11, swapper_pg_dir@l + li r12,0 /* MMUCR = 0 */ + b 4f + + /* Get the PGD for the current thread and setup MMUCR */ +3: mfspr r11,SPRN_SPRG3 + lwz r11,PGDIR(r11) + mfspr r12,SPRN_PID /* Get PID */ +4: mtspr SPRN_MMUCR,r12 /* Set MMUCR */ + + /* Mask of required permission bits. Note that while we + * do copy ESR:ST to _PAGE_RW position as trying to write + * to an RO page is pretty common, we don't do it with + * _PAGE_DIRTY. We could do it, but it's a fairly rare + * event so I'd rather take the overhead when it happens + * rather than adding an instruction here. We should measure + * whether the whole thing is worth it in the first place + * as we could avoid loading SPRN_ESR completely in the first + * place... + * + * TODO: Is it worth doing that mfspr & rlwimi in the first + * place or can we save a couple of instructions here ? + */ + mfspr r12,SPRN_ESR + li r13,_PAGE_PRESENT|_PAGE_ACCESSED + rlwimi r13,r12,10,30,30 + + /* Load the PTE */ + /* Compute pgdir/pmd offset */ + rlwinm r12,r10,PPC44x_PGD_OFF_SHIFT,PPC44x_PGD_OFF_MASK_BIT,29 + lwzx r11,r12,r11 /* Get pgd/pmd entry */ + + /* Word 0 is EPN,V,TS,DSIZ */ + li r12,PPC47x_TLB0_VALID | PPC47x_TLBE_SIZE + rlwimi r10,r12,0,32-PAGE_SHIFT,31 /* Insert valid and page size*/ + li r12,0 + tlbwe r10,r12,0 + + /* XXX can we do better ? Need to make sure tlbwe has established + * latch V bit in MMUCR0 before the PTE is loaded further down */ +#ifdef CONFIG_SMP + isync +#endif + + rlwinm. r12,r11,0,0,20 /* Extract pt base address */ + /* Compute pte address */ + rlwimi r12,r10,PPC44x_PTE_ADD_SHIFT,PPC44x_PTE_ADD_MASK_BIT,28 + beq 2f /* Bail if no table */ + lwz r11,0(r12) /* Get high word of pte entry */ + + /* XXX can we do better ? maybe insert a known 0 bit from r11 into the + * bottom of r12 to create a data dependency... We can also use r10 + * as destination nowadays + */ +#ifdef CONFIG_SMP + lwsync +#endif + lwz r12,4(r12) /* Get low word of pte entry */ + + andc. r13,r13,r12 /* Check permission */ + + /* Jump to common tlb load */ + beq finish_tlb_load_47x + +2: /* The bailout. Restore registers to pre-exception conditions + * and call the heavyweights to help us out. + */ + mfspr r11,SPRN_SPRG_RSCRATCH4 + mtcr r11 + mfspr r13,SPRN_SPRG_RSCRATCH3 + mfspr r12,SPRN_SPRG_RSCRATCH2 + mfspr r11,SPRN_SPRG_RSCRATCH1 + mfspr r10,SPRN_SPRG_RSCRATCH0 + b DataStorage + + /* Instruction TLB Error Interrupt */ + /* + * Nearly the same as above, except we get our + * information from different registers and bailout + * to a different point. + */ + START_EXCEPTION(InstructionTLBError47x) + mtspr SPRN_SPRG_WSCRATCH0,r10 /* Save some working registers */ + mtspr SPRN_SPRG_WSCRATCH1,r11 + mtspr SPRN_SPRG_WSCRATCH2,r12 + mtspr SPRN_SPRG_WSCRATCH3,r13 + mfcr r11 + mtspr SPRN_SPRG_WSCRATCH4,r11 + mfspr r10,SPRN_SRR0 /* Get faulting address */ + + /* If we are faulting a kernel address, we have to use the + * kernel page tables. + */ + lis r11,PAGE_OFFSET@h + cmplw cr0,r10,r11 + blt+ 3f + lis r11,swapper_pg_dir@h + ori r11,r11, swapper_pg_dir@l + li r12,0 /* MMUCR = 0 */ + b 4f + + /* Get the PGD for the current thread and setup MMUCR */ +3: mfspr r11,SPRN_SPRG_THREAD + lwz r11,PGDIR(r11) + mfspr r12,SPRN_PID /* Get PID */ +4: mtspr SPRN_MMUCR,r12 /* Set MMUCR */ + + /* Make up the required permissions */ + li r13,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC + + /* Load PTE */ + /* Compute pgdir/pmd offset */ + rlwinm r12,r10,PPC44x_PGD_OFF_SHIFT,PPC44x_PGD_OFF_MASK_BIT,29 + lwzx r11,r12,r11 /* Get pgd/pmd entry */ + + /* Word 0 is EPN,V,TS,DSIZ */ + li r12,PPC47x_TLB0_VALID | PPC47x_TLBE_SIZE + rlwimi r10,r12,0,32-PAGE_SHIFT,31 /* Insert valid and page size*/ + li r12,0 + tlbwe r10,r12,0 + + /* XXX can we do better ? Need to make sure tlbwe has established + * latch V bit in MMUCR0 before the PTE is loaded further down */ +#ifdef CONFIG_SMP + isync +#endif + + rlwinm. r12,r11,0,0,20 /* Extract pt base address */ + /* Compute pte address */ + rlwimi r12,r10,PPC44x_PTE_ADD_SHIFT,PPC44x_PTE_ADD_MASK_BIT,28 + beq 2f /* Bail if no table */ + + lwz r11,0(r12) /* Get high word of pte entry */ + /* XXX can we do better ? maybe insert a known 0 bit from r11 into the + * bottom of r12 to create a data dependency... We can also use r10 + * as destination nowadays + */ +#ifdef CONFIG_SMP + lwsync +#endif + lwz r12,4(r12) /* Get low word of pte entry */ + + andc. r13,r13,r12 /* Check permission */ + + /* Jump to common TLB load point */ + beq finish_tlb_load_47x + +2: /* The bailout. Restore registers to pre-exception conditions + * and call the heavyweights to help us out. + */ + mfspr r11, SPRN_SPRG_RSCRATCH4 + mtcr r11 + mfspr r13, SPRN_SPRG_RSCRATCH3 + mfspr r12, SPRN_SPRG_RSCRATCH2 + mfspr r11, SPRN_SPRG_RSCRATCH1 + mfspr r10, SPRN_SPRG_RSCRATCH0 + b InstructionStorage + +/* + * Both the instruction and data TLB miss get to this + * point to load the TLB. + * r10 - free to use + * r11 - PTE high word value + * r12 - PTE low word value + * r13 - free to use + * MMUCR - loaded with proper value when we get here + * Upon exit, we reload everything and RFI. + */ +finish_tlb_load_47x: + /* Combine RPN & ERPN an write WS 1 */ + rlwimi r11,r12,0,0,31-PAGE_SHIFT + tlbwe r11,r13,1 + + /* And make up word 2 */ + li r10,0xf85 /* Mask to apply from PTE */ + rlwimi r10,r12,29,30,30 /* DIRTY -> SW position */ + and r11,r12,r10 /* Mask PTE bits to keep */ + andi. r10,r12,_PAGE_USER /* User page ? */ + beq 1f /* nope, leave U bits empty */ + rlwimi r11,r11,3,26,28 /* yes, copy S bits to U */ +1: tlbwe r11,r13,2 + + /* Done...restore registers and get out of here. + */ + mfspr r11, SPRN_SPRG_RSCRATCH4 + mtcr r11 + mfspr r13, SPRN_SPRG_RSCRATCH3 + mfspr r12, SPRN_SPRG_RSCRATCH2 + mfspr r11, SPRN_SPRG_RSCRATCH1 + mfspr r10, SPRN_SPRG_RSCRATCH0 + rfi + +#endif /* CONFIG_PPC_47x */ + + /* Debug Interrupt */ + /* + * This statement needs to exist at the end of the IVPR + * definition just in case you end up taking a debug + * exception within another exception. + */ + DEBUG_CRIT_EXCEPTION + /* * Global functions */ @@ -647,6 +703,428 @@ _GLOBAL(set_context) blr /* + * Init CPU state. This is called at boot time or for secondary CPUs + * to setup initial TLB entries, setup IVORs, etc... + * + */ +_GLOBAL(init_cpu_state) + mflr r22 +#ifdef CONFIG_PPC_47x + /* We use the PVR to differenciate 44x cores from 476 */ + mfspr r3,SPRN_PVR + srwi r3,r3,16 + cmplwi cr0,r3,PVR_476@h + beq head_start_47x + cmplwi cr0,r3,PVR_476_ISS@h + beq head_start_47x +#endif /* CONFIG_PPC_47x */ + +/* + * In case the firmware didn't do it, we apply some workarounds + * that are good for all 440 core variants here + */ + mfspr r3,SPRN_CCR0 + rlwinm r3,r3,0,0,27 /* disable icache prefetch */ + isync + mtspr SPRN_CCR0,r3 + isync + sync + +/* + * Set up the initial MMU state for 44x + * + * We are still executing code at the virtual address + * mappings set by the firmware for the base of RAM. + * + * We first invalidate all TLB entries but the one + * we are running from. We then load the KERNELBASE + * mappings so we can begin to use kernel addresses + * natively and so the interrupt vector locations are + * permanently pinned (necessary since Book E + * implementations always have translation enabled). + * + * TODO: Use the known TLB entry we are running from to + * determine which physical region we are located + * in. This can be used to determine where in RAM + * (on a shared CPU system) or PCI memory space + * (on a DRAMless system) we are located. + * For now, we assume a perfect world which means + * we are located at the base of DRAM (physical 0). + */ + +/* + * Search TLB for entry that we are currently using. + * Invalidate all entries but the one we are using. + */ + /* Load our current PID->MMUCR TID and MSR IS->MMUCR STS */ + mfspr r3,SPRN_PID /* Get PID */ + mfmsr r4 /* Get MSR */ + andi. r4,r4,MSR_IS@l /* TS=1? */ + beq wmmucr /* If not, leave STS=0 */ + oris r3,r3,PPC44x_MMUCR_STS@h /* Set STS=1 */ +wmmucr: mtspr SPRN_MMUCR,r3 /* Put MMUCR */ + sync + + bl invstr /* Find our address */ +invstr: mflr r5 /* Make it accessible */ + tlbsx r23,0,r5 /* Find entry we are in */ + li r4,0 /* Start at TLB entry 0 */ + li r3,0 /* Set PAGEID inval value */ +1: cmpw r23,r4 /* Is this our entry? */ + beq skpinv /* If so, skip the inval */ + tlbwe r3,r4,PPC44x_TLB_PAGEID /* If not, inval the entry */ +skpinv: addi r4,r4,1 /* Increment */ + cmpwi r4,64 /* Are we done? */ + bne 1b /* If not, repeat */ + isync /* If so, context change */ + +/* + * Configure and load pinned entry into TLB slot 63. + */ + + lis r3,PAGE_OFFSET@h + ori r3,r3,PAGE_OFFSET@l + + /* Kernel is at the base of RAM */ + li r4, 0 /* Load the kernel physical address */ + + /* Load the kernel PID = 0 */ + li r0,0 + mtspr SPRN_PID,r0 + sync + + /* Initialize MMUCR */ + li r5,0 + mtspr SPRN_MMUCR,r5 + sync + + /* pageid fields */ + clrrwi r3,r3,10 /* Mask off the effective page number */ + ori r3,r3,PPC44x_TLB_VALID | PPC44x_TLB_256M + + /* xlat fields */ + clrrwi r4,r4,10 /* Mask off the real page number */ + /* ERPN is 0 for first 4GB page */ + + /* attrib fields */ + /* Added guarded bit to protect against speculative loads/stores */ + li r5,0 + ori r5,r5,(PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_G) + + li r0,63 /* TLB slot 63 */ + + tlbwe r3,r0,PPC44x_TLB_PAGEID /* Load the pageid fields */ + tlbwe r4,r0,PPC44x_TLB_XLAT /* Load the translation fields */ + tlbwe r5,r0,PPC44x_TLB_ATTRIB /* Load the attrib/access fields */ + + /* Force context change */ + mfmsr r0 + mtspr SPRN_SRR1, r0 + lis r0,3f@h + ori r0,r0,3f@l + mtspr SPRN_SRR0,r0 + sync + rfi + + /* If necessary, invalidate original entry we used */ +3: cmpwi r23,63 + beq 4f + li r6,0 + tlbwe r6,r23,PPC44x_TLB_PAGEID + isync + +4: +#ifdef CONFIG_PPC_EARLY_DEBUG_44x + /* Add UART mapping for early debug. */ + + /* pageid fields */ + lis r3,PPC44x_EARLY_DEBUG_VIRTADDR@h + ori r3,r3,PPC44x_TLB_VALID|PPC44x_TLB_TS|PPC44x_TLB_64K + + /* xlat fields */ + lis r4,CONFIG_PPC_EARLY_DEBUG_44x_PHYSLOW@h + ori r4,r4,CONFIG_PPC_EARLY_DEBUG_44x_PHYSHIGH + + /* attrib fields */ + li r5,(PPC44x_TLB_SW|PPC44x_TLB_SR|PPC44x_TLB_I|PPC44x_TLB_G) + li r0,62 /* TLB slot 0 */ + + tlbwe r3,r0,PPC44x_TLB_PAGEID + tlbwe r4,r0,PPC44x_TLB_XLAT + tlbwe r5,r0,PPC44x_TLB_ATTRIB + + /* Force context change */ + isync +#endif /* CONFIG_PPC_EARLY_DEBUG_44x */ + + /* Establish the interrupt vector offsets */ + SET_IVOR(0, CriticalInput); + SET_IVOR(1, MachineCheck); + SET_IVOR(2, DataStorage); + SET_IVOR(3, InstructionStorage); + SET_IVOR(4, ExternalInput); + SET_IVOR(5, Alignment); + SET_IVOR(6, Program); + SET_IVOR(7, FloatingPointUnavailable); + SET_IVOR(8, SystemCall); + SET_IVOR(9, AuxillaryProcessorUnavailable); + SET_IVOR(10, Decrementer); + SET_IVOR(11, FixedIntervalTimer); + SET_IVOR(12, WatchdogTimer); + SET_IVOR(13, DataTLBError44x); + SET_IVOR(14, InstructionTLBError44x); + SET_IVOR(15, DebugCrit); + + b head_start_common + + +#ifdef CONFIG_PPC_47x + +#ifdef CONFIG_SMP + +/* Entry point for secondary 47x processors */ +_GLOBAL(start_secondary_47x) + mr r24,r3 /* CPU number */ + + bl init_cpu_state + + /* Now we need to bolt the rest of kernel memory which + * is done in C code. We must be careful because our task + * struct or our stack can (and will probably) be out + * of reach of the initial 256M TLB entry, so we use a + * small temporary stack in .bss for that. This works + * because only one CPU at a time can be in this code + */ + lis r1,temp_boot_stack@h + ori r1,r1,temp_boot_stack@l + addi r1,r1,1024-STACK_FRAME_OVERHEAD + li r0,0 + stw r0,0(r1) + bl mmu_init_secondary + + /* Now we can get our task struct and real stack pointer */ + + /* Get current_thread_info and current */ + lis r1,secondary_ti@ha + lwz r1,secondary_ti@l(r1) + lwz r2,TI_TASK(r1) + + /* Current stack pointer */ + addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD + li r0,0 + stw r0,0(r1) + + /* Kernel stack for exception entry in SPRG3 */ + addi r4,r2,THREAD /* init task's THREAD */ + mtspr SPRN_SPRG3,r4 + + b start_secondary + +#endif /* CONFIG_SMP */ + +/* + * Set up the initial MMU state for 44x + * + * We are still executing code at the virtual address + * mappings set by the firmware for the base of RAM. + */ + +head_start_47x: + /* Load our current PID->MMUCR TID and MSR IS->MMUCR STS */ + mfspr r3,SPRN_PID /* Get PID */ + mfmsr r4 /* Get MSR */ + andi. r4,r4,MSR_IS@l /* TS=1? */ + beq 1f /* If not, leave STS=0 */ + oris r3,r3,PPC47x_MMUCR_STS@h /* Set STS=1 */ +1: mtspr SPRN_MMUCR,r3 /* Put MMUCR */ + sync + + /* Find the entry we are running from */ + bl 1f +1: mflr r23 + tlbsx r23,0,r23 + tlbre r24,r23,0 + tlbre r25,r23,1 + tlbre r26,r23,2 + +/* + * Cleanup time + */ + + /* Initialize MMUCR */ + li r5,0 + mtspr SPRN_MMUCR,r5 + sync + +clear_all_utlb_entries: + + #; Set initial values. + + addis r3,0,0x8000 + addi r4,0,0 + addi r5,0,0 + b clear_utlb_entry + + #; Align the loop to speed things up. + + .align 6 + +clear_utlb_entry: + + tlbwe r4,r3,0 + tlbwe r5,r3,1 + tlbwe r5,r3,2 + addis r3,r3,0x2000 + cmpwi r3,0 + bne clear_utlb_entry + addis r3,0,0x8000 + addis r4,r4,0x100 + cmpwi r4,0 + bne clear_utlb_entry + + #; Restore original entry. + + oris r23,r23,0x8000 /* specify the way */ + tlbwe r24,r23,0 + tlbwe r25,r23,1 + tlbwe r26,r23,2 + +/* + * Configure and load pinned entry into TLB for the kernel core + */ + + lis r3,PAGE_OFFSET@h + ori r3,r3,PAGE_OFFSET@l + + /* Kernel is at the base of RAM */ + li r4, 0 /* Load the kernel physical address */ + + /* Load the kernel PID = 0 */ + li r0,0 + mtspr SPRN_PID,r0 + sync + + /* Word 0 */ + clrrwi r3,r3,12 /* Mask off the effective page number */ + ori r3,r3,PPC47x_TLB0_VALID | PPC47x_TLB0_256M + + /* Word 1 */ + clrrwi r4,r4,12 /* Mask off the real page number */ + /* ERPN is 0 for first 4GB page */ + /* Word 2 */ + li r5,0 + ori r5,r5,PPC47x_TLB2_S_RWX +#ifdef CONFIG_SMP + ori r5,r5,PPC47x_TLB2_M +#endif + + /* We write to way 0 and bolted 0 */ + lis r0,0x8800 + tlbwe r3,r0,0 + tlbwe r4,r0,1 + tlbwe r5,r0,2 + +/* + * Configure SSPCR, ISPCR and USPCR for now to search everything, we can fix + * them up later + */ + LOAD_REG_IMMEDIATE(r3, 0x9abcdef0) + mtspr SPRN_SSPCR,r3 + mtspr SPRN_USPCR,r3 + LOAD_REG_IMMEDIATE(r3, 0x12345670) + mtspr SPRN_ISPCR,r3 + + /* Force context change */ + mfmsr r0 + mtspr SPRN_SRR1, r0 + lis r0,3f@h + ori r0,r0,3f@l + mtspr SPRN_SRR0,r0 + sync + rfi + + /* Invalidate original entry we used */ +3: + rlwinm r24,r24,0,21,19 /* clear the "valid" bit */ + tlbwe r24,r23,0 + addi r24,0,0 + tlbwe r24,r23,1 + tlbwe r24,r23,2 + isync /* Clear out the shadow TLB entries */ + +#ifdef CONFIG_PPC_EARLY_DEBUG_44x + /* Add UART mapping for early debug. */ + + /* Word 0 */ + lis r3,PPC44x_EARLY_DEBUG_VIRTADDR@h + ori r3,r3,PPC47x_TLB0_VALID | PPC47x_TLB0_TS | PPC47x_TLB0_1M + + /* Word 1 */ + lis r4,CONFIG_PPC_EARLY_DEBUG_44x_PHYSLOW@h + ori r4,r4,CONFIG_PPC_EARLY_DEBUG_44x_PHYSHIGH + + /* Word 2 */ + li r5,(PPC47x_TLB2_S_RW | PPC47x_TLB2_IMG) + + /* Bolted in way 0, bolt slot 5, we -hope- we don't hit the same + * congruence class as the kernel, we need to make sure of it at + * some point + */ + lis r0,0x8d00 + tlbwe r3,r0,0 + tlbwe r4,r0,1 + tlbwe r5,r0,2 + + /* Force context change */ + isync +#endif /* CONFIG_PPC_EARLY_DEBUG_44x */ + + /* Establish the interrupt vector offsets */ + SET_IVOR(0, CriticalInput); + SET_IVOR(1, MachineCheckA); + SET_IVOR(2, DataStorage); + SET_IVOR(3, InstructionStorage); + SET_IVOR(4, ExternalInput); + SET_IVOR(5, Alignment); + SET_IVOR(6, Program); + SET_IVOR(7, FloatingPointUnavailable); + SET_IVOR(8, SystemCall); + SET_IVOR(9, AuxillaryProcessorUnavailable); + SET_IVOR(10, Decrementer); + SET_IVOR(11, FixedIntervalTimer); + SET_IVOR(12, WatchdogTimer); + SET_IVOR(13, DataTLBError47x); + SET_IVOR(14, InstructionTLBError47x); + SET_IVOR(15, DebugCrit); + + /* We configure icbi to invalidate 128 bytes at a time since the + * current 32-bit kernel code isn't too happy with icache != dcache + * block size + */ + mfspr r3,SPRN_CCR0 + oris r3,r3,0x0020 + mtspr SPRN_CCR0,r3 + isync + +#endif /* CONFIG_PPC_47x */ + +/* + * Here we are back to code that is common between 44x and 47x + * + * We proceed to further kernel initialization and return to the + * main kernel entry + */ +head_start_common: + /* Establish the interrupt vector base */ + lis r4,interrupt_base@h /* IVPR only uses the high 16-bits */ + mtspr SPRN_IVPR,r4 + + addis r22,r22,KERNELBASE@h + mtlr r22 + isync + blr + +/* * We put a few things here that have to be page-aligned. This stuff * goes at the beginning of the data segment, which is page-aligned. */ @@ -671,3 +1149,9 @@ swapper_pg_dir: */ abatron_pteptrs: .space 8 + +#ifdef CONFIG_SMP + .align 12 +temp_boot_stack: + .space 1024 +#endif /* CONFIG_SMP */ diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 925807488022..844a44b64472 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -37,7 +37,7 @@ #include <asm/firmware.h> #include <asm/page_64.h> #include <asm/irqflags.h> -#include <asm/kvm_book3s_64_asm.h> +#include <asm/kvm_book3s_asm.h> /* The physical memory is layed out such that the secondary processor * spin code sits at 0x0000...0x00ff. On server, the vectors follow @@ -169,7 +169,7 @@ exception_marker: /* KVM trampoline code needs to be close to the interrupt handlers */ #ifdef CONFIG_KVM_BOOK3S_64_HANDLER -#include "../kvm/book3s_64_rmhandlers.S" +#include "../kvm/book3s_rmhandlers.S" #endif _GLOBAL(generic_secondary_thread_init) @@ -219,7 +219,8 @@ generic_secondary_common_init: * physical cpu id in r24, we need to search the pacas to find * which logical id maps to our physical one. */ - LOAD_REG_ADDR(r13, paca) /* Get base vaddr of paca array */ + LOAD_REG_ADDR(r13, paca) /* Load paca pointer */ + ld r13,0(r13) /* Get base vaddr of paca array */ li r5,0 /* logical cpu id */ 1: lhz r6,PACAHWCPUID(r13) /* Load HW procid from paca */ cmpw r6,r24 /* Compare to our id */ @@ -536,7 +537,8 @@ _GLOBAL(pmac_secondary_start) mtmsrd r3 /* RI on */ /* Set up a paca value for this processor. */ - LOAD_REG_ADDR(r4,paca) /* Get base vaddr of paca array */ + LOAD_REG_ADDR(r4,paca) /* Load paca pointer */ + ld r4,0(r4) /* Get base vaddr of paca array */ mulli r13,r24,PACA_SIZE /* Calculate vaddr of right paca */ add r13,r13,r4 /* for this processor. */ mtspr SPRN_SPRG_PACA,r13 /* Save vaddr of paca in an SPRG*/ @@ -615,6 +617,17 @@ _GLOBAL(start_secondary_prolog) std r3,0(r1) /* Zero the stack frame pointer */ bl .start_secondary b . +/* + * Reset stack pointer and call start_secondary + * to continue with online operation when woken up + * from cede in cpu offline. + */ +_GLOBAL(start_secondary_resume) + ld r1,PACAKSAVE(r13) /* Reload kernel stack pointer */ + li r3,0 + std r3,0(r1) /* Zero the stack frame pointer */ + bl .start_secondary + b . #endif /* diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 3ef743fa5d7c..1f1a04b5c2a4 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -71,9 +71,6 @@ _ENTRY(_start); * in the first level table, but that would require many changes to the * Linux page directory/table functions that I don't want to do right now. * - * I used to use SPRG2 for a temporary register in the TLB handler, but it - * has since been put to other uses. I now use a hack to save a register - * and the CCR at memory location 0.....Someday I'll fix this..... * -- Dan */ .globl __start @@ -302,8 +299,13 @@ InstructionTLBMiss: DO_8xx_CPU6(0x3f80, r3) mtspr SPRN_M_TW, r10 /* Save a couple of working registers */ mfcr r10 +#ifdef CONFIG_8xx_CPU6 stw r10, 0(r0) stw r11, 4(r0) +#else + mtspr SPRN_DAR, r10 + mtspr SPRN_SPRG2, r11 +#endif mfspr r10, SPRN_SRR0 /* Get effective address of fault */ #ifdef CONFIG_8xx_CPU15 addi r11, r10, 0x1000 @@ -318,12 +320,16 @@ InstructionTLBMiss: /* If we are faulting a kernel address, we have to use the * kernel page tables. */ +#ifdef CONFIG_MODULES + /* Only modules will cause ITLB Misses as we always + * pin the first 8MB of kernel memory */ andi. r11, r10, 0x0800 /* Address >= 0x80000000 */ beq 3f lis r11, swapper_pg_dir@h ori r11, r11, swapper_pg_dir@l rlwimi r10, r11, 0, 2, 19 3: +#endif lwz r11, 0(r10) /* Get the level 1 entry */ rlwinm. r10, r11,0,0,19 /* Extract page descriptor page address */ beq 2f /* If zero, don't try to find a pte */ @@ -339,31 +345,35 @@ InstructionTLBMiss: mfspr r11, SPRN_MD_TWC /* ....and get the pte address */ lwz r10, 0(r11) /* Get the pte */ +#ifdef CONFIG_SWAP andi. r11, r10, _PAGE_ACCESSED | _PAGE_PRESENT cmpwi cr0, r11, _PAGE_ACCESSED | _PAGE_PRESENT bne- cr0, 2f - - /* Clear PP lsb, 0x400 */ - rlwinm r10, r10, 0, 22, 20 - +#endif /* The Linux PTE won't go exactly into the MMU TLB. - * Software indicator bits 22 and 28 must be clear. + * Software indicator bits 21 and 28 must be clear. * Software indicator bits 24, 25, 26, and 27 must be * set. All other Linux PTE bits control the behavior * of the MMU. */ li r11, 0x00f0 - rlwimi r10, r11, 0, 24, 28 /* Set 24-27, clear 28 */ + rlwimi r10, r11, 0, 0x07f8 /* Set 24-27, clear 21-23,28 */ DO_8xx_CPU6(0x2d80, r3) mtspr SPRN_MI_RPN, r10 /* Update TLB entry */ - mfspr r10, SPRN_M_TW /* Restore registers */ + /* Restore registers */ +#ifndef CONFIG_8xx_CPU6 + mfspr r10, SPRN_DAR + mtcr r10 + mtspr SPRN_DAR, r11 /* Tag DAR */ + mfspr r11, SPRN_SPRG2 +#else lwz r11, 0(r0) mtcr r11 lwz r11, 4(r0) -#ifdef CONFIG_8xx_CPU6 lwz r3, 8(r0) #endif + mfspr r10, SPRN_M_TW rfi 2: mfspr r11, SPRN_SRR1 @@ -373,13 +383,20 @@ InstructionTLBMiss: rlwinm r11, r11, 0, 0xffff mtspr SPRN_SRR1, r11 - mfspr r10, SPRN_M_TW /* Restore registers */ + /* Restore registers */ +#ifndef CONFIG_8xx_CPU6 + mfspr r10, SPRN_DAR + mtcr r10 + li r11, 0x00f0 + mtspr SPRN_DAR, r11 /* Tag DAR */ + mfspr r11, SPRN_SPRG2 +#else lwz r11, 0(r0) mtcr r11 lwz r11, 4(r0) -#ifdef CONFIG_8xx_CPU6 lwz r3, 8(r0) #endif + mfspr r10, SPRN_M_TW b InstructionAccess . = 0x1200 @@ -390,8 +407,13 @@ DataStoreTLBMiss: DO_8xx_CPU6(0x3f80, r3) mtspr SPRN_M_TW, r10 /* Save a couple of working registers */ mfcr r10 +#ifdef CONFIG_8xx_CPU6 stw r10, 0(r0) stw r11, 4(r0) +#else + mtspr SPRN_DAR, r10 + mtspr SPRN_SPRG2, r11 +#endif mfspr r10, SPRN_M_TWB /* Get level 1 table entry address */ /* If we are faulting a kernel address, we have to use the @@ -438,15 +460,14 @@ DataStoreTLBMiss: * r11 = ((r10 & PRESENT) & ((r10 & ACCESSED) >> 5)); * r10 = (r10 & ~PRESENT) | r11; */ +#ifdef CONFIG_SWAP rlwinm r11, r10, 32-5, _PAGE_PRESENT and r11, r11, r10 rlwimi r10, r11, 0, _PAGE_PRESENT - +#endif /* Honour kernel RO, User NA */ /* 0x200 == Extended encoding, bit 22 */ - /* r11 = (r10 & _PAGE_USER) >> 2 */ - rlwinm r11, r10, 32-2, 0x200 - or r10, r11, r10 + rlwimi r10, r10, 32-2, 0x200 /* Copy USER to bit 22, 0x200 */ /* r11 = (r10 & _PAGE_RW) >> 1 */ rlwinm r11, r10, 32-1, 0x200 or r10, r11, r10 @@ -460,18 +481,24 @@ DataStoreTLBMiss: * of the MMU. */ 2: li r11, 0x00f0 - mtspr SPRN_DAR,r11 /* Tag DAR */ rlwimi r10, r11, 0, 24, 28 /* Set 24-27, clear 28 */ DO_8xx_CPU6(0x3d80, r3) mtspr SPRN_MD_RPN, r10 /* Update TLB entry */ - mfspr r10, SPRN_M_TW /* Restore registers */ + /* Restore registers */ +#ifndef CONFIG_8xx_CPU6 + mfspr r10, SPRN_DAR + mtcr r10 + mtspr SPRN_DAR, r11 /* Tag DAR */ + mfspr r11, SPRN_SPRG2 +#else + mtspr SPRN_DAR, r11 /* Tag DAR */ lwz r11, 0(r0) mtcr r11 lwz r11, 4(r0) -#ifdef CONFIG_8xx_CPU6 lwz r3, 8(r0) #endif + mfspr r10, SPRN_M_TW rfi /* This is an instruction TLB error on the MPC8xx. This could be due @@ -683,9 +710,6 @@ start_here: tophys(r4,r2) addi r4,r4,THREAD /* init task's THREAD */ mtspr SPRN_SPRG_THREAD,r4 - li r3,0 - /* XXX What is that for ? SPRG2 appears otherwise unused on 8xx */ - mtspr SPRN_SPRG2,r3 /* 0 => r1 has kernel sp */ /* stack */ lis r1,init_thread_union@ha diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index 50504ae39cb7..a0bf158c8b47 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -1,6 +1,7 @@ #ifndef __HEAD_BOOKE_H__ #define __HEAD_BOOKE_H__ +#include <asm/ptrace.h> /* for STACK_FRAME_REGS_MARKER */ /* * Macros used for common Book-e exception handling */ @@ -48,6 +49,9 @@ stw r10,0(r11); \ rlwinm r9,r9,0,14,12; /* clear MSR_WE (necessary?) */\ stw r0,GPR0(r11); \ + lis r10, STACK_FRAME_REGS_MARKER@ha;/* exception frame marker */ \ + addi r10, r10, STACK_FRAME_REGS_MARKER@l; \ + stw r10, 8(r11); \ SAVE_4GPRS(3, r11); \ SAVE_2GPRS(7, r11) diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 7f4bd7f3b6af..4faeba247854 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -94,204 +94,10 @@ _ENTRY(_start); */ _ENTRY(__early_start) -/* 1. Find the index of the entry we're executing in */ - bl invstr /* Find our address */ -invstr: mflr r6 /* Make it accessible */ - mfmsr r7 - rlwinm r4,r7,27,31,31 /* extract MSR[IS] */ - mfspr r7, SPRN_PID0 - slwi r7,r7,16 - or r7,r7,r4 - mtspr SPRN_MAS6,r7 - tlbsx 0,r6 /* search MSR[IS], SPID=PID0 */ - mfspr r7,SPRN_MAS1 - andis. r7,r7,MAS1_VALID@h - bne match_TLB - - mfspr r7,SPRN_MMUCFG - rlwinm r7,r7,21,28,31 /* extract MMUCFG[NPIDS] */ - cmpwi r7,3 - bne match_TLB /* skip if NPIDS != 3 */ - - mfspr r7,SPRN_PID1 - slwi r7,r7,16 - or r7,r7,r4 - mtspr SPRN_MAS6,r7 - tlbsx 0,r6 /* search MSR[IS], SPID=PID1 */ - mfspr r7,SPRN_MAS1 - andis. r7,r7,MAS1_VALID@h - bne match_TLB - mfspr r7, SPRN_PID2 - slwi r7,r7,16 - or r7,r7,r4 - mtspr SPRN_MAS6,r7 - tlbsx 0,r6 /* Fall through, we had to match */ - -match_TLB: - mfspr r7,SPRN_MAS0 - rlwinm r3,r7,16,20,31 /* Extract MAS0(Entry) */ - - mfspr r7,SPRN_MAS1 /* Insure IPROT set */ - oris r7,r7,MAS1_IPROT@h - mtspr SPRN_MAS1,r7 - tlbwe - -/* 2. Invalidate all entries except the entry we're executing in */ - mfspr r9,SPRN_TLB1CFG - andi. r9,r9,0xfff - li r6,0 /* Set Entry counter to 0 */ -1: lis r7,0x1000 /* Set MAS0(TLBSEL) = 1 */ - rlwimi r7,r6,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r6) */ - mtspr SPRN_MAS0,r7 - tlbre - mfspr r7,SPRN_MAS1 - rlwinm r7,r7,0,2,31 /* Clear MAS1 Valid and IPROT */ - cmpw r3,r6 - beq skpinv /* Dont update the current execution TLB */ - mtspr SPRN_MAS1,r7 - tlbwe - isync -skpinv: addi r6,r6,1 /* Increment */ - cmpw r6,r9 /* Are we done? */ - bne 1b /* If not, repeat */ - - /* Invalidate TLB0 */ - li r6,0x04 - tlbivax 0,r6 - TLBSYNC - /* Invalidate TLB1 */ - li r6,0x0c - tlbivax 0,r6 - TLBSYNC - -/* 3. Setup a temp mapping and jump to it */ - andi. r5, r3, 0x1 /* Find an entry not used and is non-zero */ - addi r5, r5, 0x1 - lis r7,0x1000 /* Set MAS0(TLBSEL) = 1 */ - rlwimi r7,r3,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r3) */ - mtspr SPRN_MAS0,r7 - tlbre - - /* grab and fixup the RPN */ - mfspr r6,SPRN_MAS1 /* extract MAS1[SIZE] */ - rlwinm r6,r6,25,27,31 - li r8,-1 - addi r6,r6,10 - slw r6,r8,r6 /* convert to mask */ - - bl 1f /* Find our address */ -1: mflr r7 - - mfspr r8,SPRN_MAS3 -#ifdef CONFIG_PHYS_64BIT - mfspr r23,SPRN_MAS7 -#endif - and r8,r6,r8 - subfic r9,r6,-4096 - and r9,r9,r7 - - or r25,r8,r9 - ori r8,r25,(MAS3_SX|MAS3_SW|MAS3_SR) - - /* Just modify the entry ID and EPN for the temp mapping */ - lis r7,0x1000 /* Set MAS0(TLBSEL) = 1 */ - rlwimi r7,r5,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r5) */ - mtspr SPRN_MAS0,r7 - xori r6,r4,1 /* Setup TMP mapping in the other Address space */ - slwi r6,r6,12 - oris r6,r6,(MAS1_VALID|MAS1_IPROT)@h - ori r6,r6,(MAS1_TSIZE(BOOK3E_PAGESZ_4K))@l - mtspr SPRN_MAS1,r6 - mfspr r6,SPRN_MAS2 - li r7,0 /* temp EPN = 0 */ - rlwimi r7,r6,0,20,31 - mtspr SPRN_MAS2,r7 - mtspr SPRN_MAS3,r8 - tlbwe - - xori r6,r4,1 - slwi r6,r6,5 /* setup new context with other address space */ - bl 1f /* Find our address */ -1: mflr r9 - rlwimi r7,r9,0,20,31 - addi r7,r7,24 - mtspr SPRN_SRR0,r7 - mtspr SPRN_SRR1,r6 - rfi -/* 4. Clear out PIDs & Search info */ - li r6,0 - mtspr SPRN_MAS6,r6 - mtspr SPRN_PID0,r6 - - mfspr r7,SPRN_MMUCFG - rlwinm r7,r7,21,28,31 /* extract MMUCFG[NPIDS] */ - cmpwi r7,3 - bne 2f /* skip if NPIDS != 3 */ - - mtspr SPRN_PID1,r6 - mtspr SPRN_PID2,r6 - -/* 5. Invalidate mapping we started in */ -2: - lis r7,0x1000 /* Set MAS0(TLBSEL) = 1 */ - rlwimi r7,r3,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r3) */ - mtspr SPRN_MAS0,r7 - tlbre - mfspr r6,SPRN_MAS1 - rlwinm r6,r6,0,2,0 /* clear IPROT */ - mtspr SPRN_MAS1,r6 - tlbwe - /* Invalidate TLB1 */ - li r9,0x0c - tlbivax 0,r9 - TLBSYNC - -/* The mapping only needs to be cache-coherent on SMP */ -#ifdef CONFIG_SMP -#define M_IF_SMP MAS2_M -#else -#define M_IF_SMP 0 -#endif - -/* 6. Setup KERNELBASE mapping in TLB1[0] */ - lis r6,0x1000 /* Set MAS0(TLBSEL) = TLB1(1), ESEL = 0 */ - mtspr SPRN_MAS0,r6 - lis r6,(MAS1_VALID|MAS1_IPROT)@h - ori r6,r6,(MAS1_TSIZE(BOOK3E_PAGESZ_64M))@l - mtspr SPRN_MAS1,r6 - lis r6,MAS2_VAL(PAGE_OFFSET, BOOK3E_PAGESZ_64M, M_IF_SMP)@h - ori r6,r6,MAS2_VAL(PAGE_OFFSET, BOOK3E_PAGESZ_64M, M_IF_SMP)@l - mtspr SPRN_MAS2,r6 - mtspr SPRN_MAS3,r8 - tlbwe - -/* 7. Jump to KERNELBASE mapping */ - lis r6,(KERNELBASE & ~0xfff)@h - ori r6,r6,(KERNELBASE & ~0xfff)@l - lis r7,MSR_KERNEL@h - ori r7,r7,MSR_KERNEL@l - bl 1f /* Find our address */ -1: mflr r9 - rlwimi r6,r9,0,20,31 - addi r6,r6,(2f - 1b) - mtspr SPRN_SRR0,r6 - mtspr SPRN_SRR1,r7 - rfi /* start execution out of TLB1[0] entry */ - -/* 8. Clear out the temp mapping */ -2: lis r7,0x1000 /* Set MAS0(TLBSEL) = 1 */ - rlwimi r7,r5,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r5) */ - mtspr SPRN_MAS0,r7 - tlbre - mfspr r8,SPRN_MAS1 - rlwinm r8,r8,0,2,0 /* clear IPROT */ - mtspr SPRN_MAS1,r8 - tlbwe - /* Invalidate TLB1 */ - li r9,0x0c - tlbivax 0,r9 - TLBSYNC +#define ENTRY_MAPPING_BOOT_SETUP +#include "fsl_booke_entry_mapping.S" +#undef ENTRY_MAPPING_BOOT_SETUP /* Establish the interrupt vector offsets */ SET_IVOR(0, CriticalInput); @@ -639,6 +445,13 @@ interrupt_base: rlwinm r12,r12,0,16,1 mtspr SPRN_MAS1,r12 + /* Make up the required permissions for kernel code */ +#ifdef CONFIG_PTE_64BIT + li r13,_PAGE_PRESENT | _PAGE_BAP_SX + oris r13,r13,_PAGE_ACCESSED@h +#else + li r13,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC +#endif b 4f /* Get the PGD for the current thread */ @@ -646,15 +459,15 @@ interrupt_base: mfspr r11,SPRN_SPRG_THREAD lwz r11,PGDIR(r11) -4: - /* Make up the required permissions */ + /* Make up the required permissions for user code */ #ifdef CONFIG_PTE_64BIT - li r13,_PAGE_PRESENT | _PAGE_EXEC + li r13,_PAGE_PRESENT | _PAGE_BAP_UX oris r13,r13,_PAGE_ACCESSED@h #else li r13,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC #endif +4: FIND_PTE andc. r13,r13,r11 /* Check permission */ @@ -747,9 +560,6 @@ finish_tlb_load: #else rlwimi r12, r11, 26, 27, 31 /* extract WIMGE from pte */ #endif -#ifdef CONFIG_SMP - ori r12, r12, MAS2_M -#endif mtspr SPRN_MAS2, r12 #ifdef CONFIG_PTE_64BIT @@ -887,13 +697,17 @@ KernelSPE: lwz r3,_MSR(r1) oris r3,r3,MSR_SPE@h stw r3,_MSR(r1) /* enable use of SPE after return */ +#ifdef CONFIG_PRINTK lis r3,87f@h ori r3,r3,87f@l mr r4,r2 /* current */ lwz r5,_NIP(r1) bl printk +#endif b ret_from_except +#ifdef CONFIG_PRINTK 87: .string "SPE used in kernel (task=%p, pc=%x) \n" +#endif .align 4,0 #endif /* CONFIG_SPE */ diff --git a/arch/powerpc/kernel/ibmebus.c b/arch/powerpc/kernel/ibmebus.c index a4c8b38b0ba1..21266abfbda6 100644 --- a/arch/powerpc/kernel/ibmebus.c +++ b/arch/powerpc/kernel/ibmebus.c @@ -42,6 +42,7 @@ #include <linux/dma-mapping.h> #include <linux/interrupt.h> #include <linux/of.h> +#include <linux/slab.h> #include <linux/of_platform.h> #include <asm/ibmebus.h> #include <asm/abs_addr.h> @@ -139,14 +140,14 @@ static struct dma_map_ops ibmebus_dma_ops = { static int ibmebus_match_path(struct device *dev, void *data) { - struct device_node *dn = to_of_device(dev)->node; + struct device_node *dn = to_of_device(dev)->dev.of_node; return (dn->full_name && (strcasecmp((char *)data, dn->full_name) == 0)); } static int ibmebus_match_node(struct device *dev, void *data) { - return to_of_device(dev)->node == data; + return to_of_device(dev)->dev.of_node == data; } static int ibmebus_create_device(struct device_node *dn) @@ -201,7 +202,7 @@ static int ibmebus_create_devices(const struct of_device_id *matches) int ibmebus_register_driver(struct of_platform_driver *drv) { /* If the driver uses devices that ibmebus doesn't know, add them */ - ibmebus_create_devices(drv->match_table); + ibmebus_create_devices(drv->driver.of_match_table); return of_register_driver(drv, &ibmebus_bus_type); } diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 5547ae6e6b0b..d5839179ec77 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -42,26 +42,10 @@ #define DBG(...) -#ifdef CONFIG_IOMMU_VMERGE -static int novmerge = 0; -#else -static int novmerge = 1; -#endif - -static int protect4gb = 1; +static int novmerge; static void __iommu_free(struct iommu_table *, dma_addr_t, unsigned int); -static int __init setup_protect4gb(char *str) -{ - if (strcmp(str, "on") == 0) - protect4gb = 1; - else if (strcmp(str, "off") == 0) - protect4gb = 0; - - return 1; -} - static int __init setup_iommu(char *str) { if (!strcmp(str, "novmerge")) @@ -71,7 +55,6 @@ static int __init setup_iommu(char *str) return 1; } -__setup("protect4gb=", setup_protect4gb); __setup("iommu=", setup_iommu); static unsigned long iommu_range_alloc(struct device *dev, diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 9040330b0530..77be3d058a65 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -53,7 +53,6 @@ #include <linux/bootmem.h> #include <linux/pci.h> #include <linux/debugfs.h> -#include <linux/perf_event.h> #include <asm/uaccess.h> #include <asm/system.h> @@ -73,8 +72,10 @@ #define CREATE_TRACE_POINTS #include <asm/trace.h> +DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); +EXPORT_PER_CPU_SYMBOL(irq_stat); + int __irq_offset_value; -static int ppc_spurious_interrupts; #ifdef CONFIG_PPC32 EXPORT_SYMBOL(__irq_offset_value); @@ -143,11 +144,6 @@ notrace void raw_local_irq_restore(unsigned long en) } #endif /* CONFIG_PPC_STD_MMU_64 */ - if (test_perf_event_pending()) { - clear_perf_event_pending(); - perf_event_do_pending(); - } - /* * if (get_paca()->hard_enabled) return; * But again we need to take care that gcc gets hard_enabled directly @@ -180,30 +176,64 @@ notrace void raw_local_irq_restore(unsigned long en) EXPORT_SYMBOL(raw_local_irq_restore); #endif /* CONFIG_PPC64 */ +static int show_other_interrupts(struct seq_file *p, int prec) +{ + int j; + +#if defined(CONFIG_PPC32) && defined(CONFIG_TAU_INT) + if (tau_initialized) { + seq_printf(p, "%*s: ", prec, "TAU"); + for_each_online_cpu(j) + seq_printf(p, "%10u ", tau_interrupts(j)); + seq_puts(p, " PowerPC Thermal Assist (cpu temp)\n"); + } +#endif /* CONFIG_PPC32 && CONFIG_TAU_INT */ + + seq_printf(p, "%*s: ", prec, "LOC"); + for_each_online_cpu(j) + seq_printf(p, "%10u ", per_cpu(irq_stat, j).timer_irqs); + seq_printf(p, " Local timer interrupts\n"); + + seq_printf(p, "%*s: ", prec, "SPU"); + for_each_online_cpu(j) + seq_printf(p, "%10u ", per_cpu(irq_stat, j).spurious_irqs); + seq_printf(p, " Spurious interrupts\n"); + + seq_printf(p, "%*s: ", prec, "CNT"); + for_each_online_cpu(j) + seq_printf(p, "%10u ", per_cpu(irq_stat, j).pmu_irqs); + seq_printf(p, " Performance monitoring interrupts\n"); + + seq_printf(p, "%*s: ", prec, "MCE"); + for_each_online_cpu(j) + seq_printf(p, "%10u ", per_cpu(irq_stat, j).mce_exceptions); + seq_printf(p, " Machine check exceptions\n"); + + return 0; +} + int show_interrupts(struct seq_file *p, void *v) { - int i = *(loff_t *)v, j; + unsigned long flags, any_count = 0; + int i = *(loff_t *) v, j, prec; struct irqaction *action; struct irq_desc *desc; - unsigned long flags; + if (i > nr_irqs) + return 0; + + for (prec = 3, j = 1000; prec < 10 && j <= nr_irqs; ++prec) + j *= 10; + + if (i == nr_irqs) + return show_other_interrupts(p, prec); + + /* print header */ if (i == 0) { - seq_puts(p, " "); + seq_printf(p, "%*s", prec + 8, ""); for_each_online_cpu(j) - seq_printf(p, "CPU%d ", j); + seq_printf(p, "CPU%-8d", j); seq_putc(p, '\n'); - } else if (i == nr_irqs) { -#if defined(CONFIG_PPC32) && defined(CONFIG_TAU_INT) - if (tau_initialized){ - seq_puts(p, "TAU: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", tau_interrupts(j)); - seq_puts(p, " PowerPC Thermal Assist (cpu temp)\n"); - } -#endif /* CONFIG_PPC32 && CONFIG_TAU_INT*/ - seq_printf(p, "BAD: %10u\n", ppc_spurious_interrupts); - - return 0; } desc = irq_to_desc(i); @@ -211,69 +241,85 @@ int show_interrupts(struct seq_file *p, void *v) return 0; raw_spin_lock_irqsave(&desc->lock, flags); - + for_each_online_cpu(j) + any_count |= kstat_irqs_cpu(i, j); action = desc->action; - if (!action || !action->handler) - goto skip; + if (!action && !any_count) + goto out; - seq_printf(p, "%3d: ", i); -#ifdef CONFIG_SMP + seq_printf(p, "%*d: ", prec, i); for_each_online_cpu(j) seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); -#else - seq_printf(p, "%10u ", kstat_irqs(i)); -#endif /* CONFIG_SMP */ if (desc->chip) - seq_printf(p, " %s ", desc->chip->name); + seq_printf(p, " %-16s", desc->chip->name); else - seq_puts(p, " None "); + seq_printf(p, " %-16s", "None"); + seq_printf(p, " %-8s", (desc->status & IRQ_LEVEL) ? "Level" : "Edge"); - seq_printf(p, "%s", (desc->status & IRQ_LEVEL) ? "Level " : "Edge "); - seq_printf(p, " %s", action->name); + if (action) { + seq_printf(p, " %s", action->name); + while ((action = action->next) != NULL) + seq_printf(p, ", %s", action->name); + } - for (action = action->next; action; action = action->next) - seq_printf(p, ", %s", action->name); seq_putc(p, '\n'); - -skip: +out: raw_spin_unlock_irqrestore(&desc->lock, flags); - return 0; } +/* + * /proc/stat helpers + */ +u64 arch_irq_stat_cpu(unsigned int cpu) +{ + u64 sum = per_cpu(irq_stat, cpu).timer_irqs; + + sum += per_cpu(irq_stat, cpu).pmu_irqs; + sum += per_cpu(irq_stat, cpu).mce_exceptions; + sum += per_cpu(irq_stat, cpu).spurious_irqs; + + return sum; +} + #ifdef CONFIG_HOTPLUG_CPU -void fixup_irqs(cpumask_t map) +void fixup_irqs(const struct cpumask *map) { struct irq_desc *desc; unsigned int irq; static int warned; + cpumask_var_t mask; - for_each_irq(irq) { - cpumask_t mask; + alloc_cpumask_var(&mask, GFP_KERNEL); + for_each_irq(irq) { desc = irq_to_desc(irq); - if (desc && desc->status & IRQ_PER_CPU) + if (!desc) + continue; + + if (desc->status & IRQ_PER_CPU) continue; - cpumask_and(&mask, desc->affinity, &map); - if (any_online_cpu(mask) == NR_CPUS) { + cpumask_and(mask, desc->affinity, map); + if (cpumask_any(mask) >= nr_cpu_ids) { printk("Breaking affinity for irq %i\n", irq); - mask = map; + cpumask_copy(mask, map); } if (desc->chip->set_affinity) - desc->chip->set_affinity(irq, &mask); + desc->chip->set_affinity(irq, mask); else if (desc->action && !(warned++)) printk("Cannot set affinity for irq %i\n", irq); } + free_cpumask_var(mask); + local_irq_enable(); mdelay(1); local_irq_disable(); } #endif -#ifdef CONFIG_IRQSTACKS static inline void handle_one_irq(unsigned int irq) { struct thread_info *curtp, *irqtp; @@ -314,12 +360,6 @@ static inline void handle_one_irq(unsigned int irq) if (irqtp->flags) set_bits(irqtp->flags, &curtp->flags); } -#else -static inline void handle_one_irq(unsigned int irq) -{ - generic_handle_irq(irq); -} -#endif static inline void check_stack_overflow(void) { @@ -353,8 +393,7 @@ void do_IRQ(struct pt_regs *regs) if (irq != NO_IRQ && irq != NO_IRQ_IGNORE) handle_one_irq(irq); else if (irq != NO_IRQ_IGNORE) - /* That's not SMP safe ... but who cares ? */ - ppc_spurious_interrupts++; + __get_cpu_var(irq_stat).spurious_irqs++; irq_exit(); set_irq_regs(old_regs); @@ -412,7 +451,6 @@ void exc_lvl_ctx_init(void) } #endif -#ifdef CONFIG_IRQSTACKS struct thread_info *softirq_ctx[NR_CPUS] __read_mostly; struct thread_info *hardirq_ctx[NR_CPUS] __read_mostly; @@ -449,10 +487,6 @@ static inline void do_softirq_onstack(void) irqtp->task = NULL; } -#else -#define do_softirq_onstack() __do_softirq() -#endif /* CONFIG_IRQSTACKS */ - void do_softirq(void) { unsigned long flags; @@ -474,7 +508,7 @@ void do_softirq(void) */ static LIST_HEAD(irq_hosts); -static DEFINE_SPINLOCK(irq_big_lock); +static DEFINE_RAW_SPINLOCK(irq_big_lock); static unsigned int revmap_trees_allocated; static DEFINE_MUTEX(revmap_trees_mutex); struct irq_map_entry irq_map[NR_IRQS]; @@ -520,14 +554,14 @@ struct irq_host *irq_alloc_host(struct device_node *of_node, if (host->ops->match == NULL) host->ops->match = default_irq_host_match; - spin_lock_irqsave(&irq_big_lock, flags); + raw_spin_lock_irqsave(&irq_big_lock, flags); /* If it's a legacy controller, check for duplicates and * mark it as allocated (we use irq 0 host pointer for that */ if (revmap_type == IRQ_HOST_MAP_LEGACY) { if (irq_map[0].host != NULL) { - spin_unlock_irqrestore(&irq_big_lock, flags); + raw_spin_unlock_irqrestore(&irq_big_lock, flags); /* If we are early boot, we can't free the structure, * too bad... * this will be fixed once slab is made available early @@ -541,7 +575,7 @@ struct irq_host *irq_alloc_host(struct device_node *of_node, } list_add(&host->link, &irq_hosts); - spin_unlock_irqrestore(&irq_big_lock, flags); + raw_spin_unlock_irqrestore(&irq_big_lock, flags); /* Additional setups per revmap type */ switch(revmap_type) { @@ -592,13 +626,13 @@ struct irq_host *irq_find_host(struct device_node *node) * the absence of a device node. This isn't a problem so far * yet though... */ - spin_lock_irqsave(&irq_big_lock, flags); + raw_spin_lock_irqsave(&irq_big_lock, flags); list_for_each_entry(h, &irq_hosts, link) if (h->ops->match(h, node)) { found = h; break; } - spin_unlock_irqrestore(&irq_big_lock, flags); + raw_spin_unlock_irqrestore(&irq_big_lock, flags); return found; } EXPORT_SYMBOL_GPL(irq_find_host); @@ -967,7 +1001,7 @@ unsigned int irq_alloc_virt(struct irq_host *host, if (count == 0 || count > (irq_virq_count - NUM_ISA_INTERRUPTS)) return NO_IRQ; - spin_lock_irqsave(&irq_big_lock, flags); + raw_spin_lock_irqsave(&irq_big_lock, flags); /* Use hint for 1 interrupt if any */ if (count == 1 && hint >= NUM_ISA_INTERRUPTS && @@ -991,7 +1025,7 @@ unsigned int irq_alloc_virt(struct irq_host *host, } } if (found == NO_IRQ) { - spin_unlock_irqrestore(&irq_big_lock, flags); + raw_spin_unlock_irqrestore(&irq_big_lock, flags); return NO_IRQ; } hint_found: @@ -1000,7 +1034,7 @@ unsigned int irq_alloc_virt(struct irq_host *host, smp_wmb(); irq_map[i].host = host; } - spin_unlock_irqrestore(&irq_big_lock, flags); + raw_spin_unlock_irqrestore(&irq_big_lock, flags); return found; } @@ -1012,7 +1046,7 @@ void irq_free_virt(unsigned int virq, unsigned int count) WARN_ON (virq < NUM_ISA_INTERRUPTS); WARN_ON (count == 0 || (virq + count) > irq_virq_count); - spin_lock_irqsave(&irq_big_lock, flags); + raw_spin_lock_irqsave(&irq_big_lock, flags); for (i = virq; i < (virq + count); i++) { struct irq_host *host; @@ -1025,7 +1059,7 @@ void irq_free_virt(unsigned int virq, unsigned int count) smp_wmb(); irq_map[i].host = NULL; } - spin_unlock_irqrestore(&irq_big_lock, flags); + raw_spin_unlock_irqrestore(&irq_big_lock, flags); } int arch_early_irq_init(void) diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index b6bd1eaa1c24..82a7b228c81a 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -20,6 +20,7 @@ #include <linux/smp.h> #include <linux/signal.h> #include <linux/ptrace.h> +#include <linux/kdebug.h> #include <asm/current.h> #include <asm/processor.h> #include <asm/machdep.h> @@ -115,7 +116,8 @@ void kgdb_roundup_cpus(unsigned long flags) /* KGDB functions to use existing PowerPC64 hooks. */ static int kgdb_debugger(struct pt_regs *regs) { - return kgdb_handle_exception(0, computeSignal(TRAP(regs)), 0, regs); + return !kgdb_handle_exception(1, computeSignal(TRAP(regs)), + DIE_OOPS, regs); } static int kgdb_handle_breakpoint(struct pt_regs *regs) @@ -123,7 +125,7 @@ static int kgdb_handle_breakpoint(struct pt_regs *regs) if (user_mode(regs)) return 0; - if (kgdb_handle_exception(0, SIGTRAP, 0, regs) != 0) + if (kgdb_handle_exception(1, SIGTRAP, 0, regs) != 0) return 0; if (*(u32 *) (regs->nip) == *(u32 *) (&arch_kgdb_ops.gdb_bpt_instr)) @@ -309,6 +311,11 @@ void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs) (unsigned long)(((void *)gdb_regs) + NUMREGBYTES)); } +void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc) +{ + regs->nip = pc; +} + /* * This function does PowerPC specific procesing for interfacing to gdb. */ @@ -333,7 +340,7 @@ int kgdb_arch_handle_exception(int vector, int signo, int err_code, atomic_set(&kgdb_cpu_doing_single_step, -1); /* set the trace bit if we're stepping */ if (remcom_in_buffer[0] == 's') { -#if defined(CONFIG_40x) || defined(CONFIG_BOOKE) +#ifdef CONFIG_PPC_ADV_DEBUG_REGS mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) | DBCR0_IC | DBCR0_IDM); linux_regs->msr |= MSR_DE; diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index c9329786073b..bc47352deb1f 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -31,12 +31,13 @@ #include <linux/preempt.h> #include <linux/module.h> #include <linux/kdebug.h> +#include <linux/slab.h> #include <asm/cacheflush.h> #include <asm/sstep.h> #include <asm/uaccess.h> #include <asm/system.h> -#ifdef CONFIG_BOOKE +#ifdef CONFIG_PPC_ADV_DEBUG_REGS #define MSR_SINGLESTEP (MSR_DE) #else #define MSR_SINGLESTEP (MSR_SE) @@ -110,9 +111,12 @@ static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs) * like Decrementer or External Interrupt */ regs->msr &= ~MSR_EE; regs->msr |= MSR_SINGLESTEP; -#ifdef CONFIG_BOOKE +#ifdef CONFIG_PPC_ADV_DEBUG_REGS regs->msr &= ~MSR_CE; mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) | DBCR0_IC | DBCR0_IDM); +#ifdef CONFIG_PPC_47x + isync(); +#endif #endif /* @@ -374,17 +378,6 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, * single-stepped a copy of the instruction. The address of this * copy is p->ainsn.insn. */ -static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs) -{ - int ret; - unsigned int insn = *p->ainsn.insn; - - regs->nip = (unsigned long)p->addr; - ret = emulate_step(regs, insn); - if (ret == 0) - regs->nip = (unsigned long)p->addr + 4; -} - static int __kprobes post_kprobe_handler(struct pt_regs *regs) { struct kprobe *cur = kprobe_running(); @@ -402,7 +395,8 @@ static int __kprobes post_kprobe_handler(struct pt_regs *regs) cur->post_handler(cur, regs, 0); } - resume_execution(cur, regs); + /* Adjust nip to after the single-stepped instruction */ + regs->nip = (unsigned long)cur->addr + 4; regs->msr |= kcb->kprobe_saved_msr; /*Restore back the original saved kprobes variables and continue. */ diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c index 9ddfaef1a184..035ada5443ee 100644 --- a/arch/powerpc/kernel/legacy_serial.c +++ b/arch/powerpc/kernel/legacy_serial.c @@ -469,7 +469,7 @@ static int __init serial_dev_init(void) return -ENODEV; /* - * Before we register the platfrom serial devices, we need + * Before we register the platform serial devices, we need * to fixup their interrupts and their IO ports. */ DBG("Fixing serial ports interrupts and IO ports ...\n"); diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c index 79a00bb9c64c..50362b6ef6e9 100644 --- a/arch/powerpc/kernel/lparcfg.c +++ b/arch/powerpc/kernel/lparcfg.c @@ -24,6 +24,7 @@ #include <linux/proc_fs.h> #include <linux/init.h> #include <linux/seq_file.h> +#include <linux/slab.h> #include <asm/uaccess.h> #include <asm/iseries/hv_lp_config.h> #include <asm/lppaca.h> @@ -37,7 +38,7 @@ #include <asm/vio.h> #include <asm/mmu.h> -#define MODULE_VERS "1.8" +#define MODULE_VERS "1.9" #define MODULE_NAME "lparcfg" /* #define LPARCFG_DEBUG */ @@ -359,7 +360,7 @@ static void parse_system_parameter_string(struct seq_file *m) unsigned char *local_buffer = kmalloc(SPLPAR_MAXLENGTH, GFP_KERNEL); if (!local_buffer) { - printk(KERN_ERR "%s %s kmalloc failure at line %d \n", + printk(KERN_ERR "%s %s kmalloc failure at line %d\n", __FILE__, __func__, __LINE__); return; } @@ -383,13 +384,13 @@ static void parse_system_parameter_string(struct seq_file *m) int idx, w_idx; char *workbuffer = kzalloc(SPLPAR_MAXLENGTH, GFP_KERNEL); if (!workbuffer) { - printk(KERN_ERR "%s %s kmalloc failure at line %d \n", + printk(KERN_ERR "%s %s kmalloc failure at line %d\n", __FILE__, __func__, __LINE__); kfree(local_buffer); return; } #ifdef LPARCFG_DEBUG - printk(KERN_INFO "success calling get-system-parameter \n"); + printk(KERN_INFO "success calling get-system-parameter\n"); #endif splpar_strlen = local_buffer[0] * 256 + local_buffer[1]; local_buffer += 2; /* step over strlen value */ @@ -440,7 +441,7 @@ static int lparcfg_count_active_processors(void) while ((cpus_dn = of_find_node_by_type(cpus_dn, "cpu"))) { #ifdef LPARCFG_DEBUG - printk(KERN_ERR "cpus_dn %p \n", cpus_dn); + printk(KERN_ERR "cpus_dn %p\n", cpus_dn); #endif count++; } @@ -486,6 +487,14 @@ static void splpar_dispatch_data(struct seq_file *m) seq_printf(m, "dispatch_dispersions=%lu\n", dispatch_dispersions); } +static void parse_em_data(struct seq_file *m) +{ + unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; + + if (plpar_hcall(H_GET_EM_PARMS, retbuf) == H_SUCCESS) + seq_printf(m, "power_mode_data=%016lx\n", retbuf[0]); +} + static int pseries_lparcfg_data(struct seq_file *m, void *v) { int partition_potential_processors; @@ -540,6 +549,8 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v) seq_printf(m, "slb_size=%d\n", mmu_slb_size); + parse_em_data(m); + return 0; } @@ -725,7 +736,7 @@ static int lparcfg_data(struct seq_file *m, void *v) const unsigned int *lp_index_ptr; unsigned int lp_index = 0; - seq_printf(m, "%s %s \n", MODULE_NAME, MODULE_VERS); + seq_printf(m, "%s %s\n", MODULE_NAME, MODULE_VERS); rootdn = of_find_node_by_path("/"); if (rootdn) { diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c index bb3d893a8353..89f005116aac 100644 --- a/arch/powerpc/kernel/machine_kexec.c +++ b/arch/powerpc/kernel/machine_kexec.c @@ -12,7 +12,7 @@ #include <linux/kexec.h> #include <linux/reboot.h> #include <linux/threads.h> -#include <linux/lmb.h> +#include <linux/memblock.h> #include <linux/of.h> #include <asm/machdep.h> #include <asm/prom.h> @@ -66,11 +66,11 @@ void __init reserve_crashkernel(void) unsigned long long crash_size, crash_base; int ret; - /* this is necessary because of lmb_phys_mem_size() */ - lmb_analyze(); + /* this is necessary because of memblock_phys_mem_size() */ + memblock_analyze(); /* use common parsing */ - ret = parse_crashkernel(boot_command_line, lmb_phys_mem_size(), + ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), &crash_size, &crash_base); if (ret == 0 && crash_size > 0) { crashk_res.start = crash_base; @@ -133,9 +133,9 @@ void __init reserve_crashkernel(void) "for crashkernel (System RAM: %ldMB)\n", (unsigned long)(crash_size >> 20), (unsigned long)(crashk_res.start >> 20), - (unsigned long)(lmb_phys_mem_size() >> 20)); + (unsigned long)(memblock_phys_mem_size() >> 20)); - lmb_reserve(crashk_res.start, crash_size); + memblock_reserve(crashk_res.start, crash_size); } int overlaps_crashkernel(unsigned long start, unsigned long size) diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c index 040bd1de8d99..ed31a29c4ff7 100644 --- a/arch/powerpc/kernel/machine_kexec_64.c +++ b/arch/powerpc/kernel/machine_kexec_64.c @@ -155,66 +155,70 @@ void kexec_copy_flush(struct kimage *image) #ifdef CONFIG_SMP -/* FIXME: we should schedule this function to be called on all cpus based - * on calling the interrupts, but we would like to call it off irq level - * so that the interrupt controller is clean. - */ +static int kexec_all_irq_disabled = 0; + static void kexec_smp_down(void *arg) { + local_irq_disable(); + mb(); /* make sure our irqs are disabled before we say they are */ + get_paca()->kexec_state = KEXEC_STATE_IRQS_OFF; + while(kexec_all_irq_disabled == 0) + cpu_relax(); + mb(); /* make sure all irqs are disabled before this */ + /* + * Now every CPU has IRQs off, we can clear out any pending + * IPIs and be sure that no more will come in after this. + */ if (ppc_md.kexec_cpu_down) ppc_md.kexec_cpu_down(0, 1); - local_irq_disable(); kexec_smp_wait(); /* NOTREACHED */ } -static void kexec_prepare_cpus(void) +static void kexec_prepare_cpus_wait(int wait_state) { int my_cpu, i, notified=-1; - smp_call_function(kexec_smp_down, NULL, /* wait */0); my_cpu = get_cpu(); - - /* check the others cpus are now down (via paca hw cpu id == -1) */ - for (i=0; i < NR_CPUS; i++) { + /* Make sure each CPU has atleast made it to the state we need */ + for_each_online_cpu(i) { if (i == my_cpu) continue; - while (paca[i].hw_cpu_id != -1) { + while (paca[i].kexec_state < wait_state) { barrier(); - if (!cpu_possible(i)) { - printk("kexec: cpu %d hw_cpu_id %d is not" - " possible, ignoring\n", - i, paca[i].hw_cpu_id); - break; - } - if (!cpu_online(i)) { - /* Fixme: this can be spinning in - * pSeries_secondary_wait with a paca - * waiting for it to go online. - */ - printk("kexec: cpu %d hw_cpu_id %d is not" - " online, ignoring\n", - i, paca[i].hw_cpu_id); - break; - } if (i != notified) { printk( "kexec: waiting for cpu %d (physical" - " %d) to go down\n", - i, paca[i].hw_cpu_id); + " %d) to enter %i state\n", + i, paca[i].hw_cpu_id, wait_state); notified = i; } } } + mb(); +} + +static void kexec_prepare_cpus(void) +{ + + smp_call_function(kexec_smp_down, NULL, /* wait */0); + local_irq_disable(); + mb(); /* make sure IRQs are disabled before we say they are */ + get_paca()->kexec_state = KEXEC_STATE_IRQS_OFF; + + kexec_prepare_cpus_wait(KEXEC_STATE_IRQS_OFF); + /* we are sure every CPU has IRQs off at this point */ + kexec_all_irq_disabled = 1; /* after we tell the others to go down */ if (ppc_md.kexec_cpu_down) ppc_md.kexec_cpu_down(0, 0); - put_cpu(); + /* Before removing MMU mapings make sure all CPUs have entered real mode */ + kexec_prepare_cpus_wait(KEXEC_STATE_REAL_MODE); - local_irq_disable(); + put_cpu(); } #else /* ! SMP */ diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S index 2d29752cbe16..22e507c8a556 100644 --- a/arch/powerpc/kernel/misc.S +++ b/arch/powerpc/kernel/misc.S @@ -127,3 +127,29 @@ _GLOBAL(__setup_cpu_power7) _GLOBAL(__restore_cpu_power7) /* place holder */ blr + +/* + * Get a minimal set of registers for our caller's nth caller. + * r3 = regs pointer, r5 = n. + * + * We only get R1 (stack pointer), NIP (next instruction pointer) + * and LR (link register). These are all we can get in the + * general case without doing complicated stack unwinding, but + * fortunately they are enough to do a stack backtrace, which + * is all we need them for. + */ +_GLOBAL(perf_arch_fetch_caller_regs) + mr r6,r1 + cmpwi r5,0 + mflr r4 + ble 2f + mtctr r5 +1: PPC_LL r6,0(r6) + bdnz 1b + PPC_LL r4,PPC_LR_STKOFF(r6) +2: PPC_LL r7,0(r6) + PPC_LL r7,PPC_LR_STKOFF(r7) + PPC_STL r6,GPR1-STACK_FRAME_OVERHEAD(r3) + PPC_STL r4,_NIP-STACK_FRAME_OVERHEAD(r3) + PPC_STL r7,_LINK-STACK_FRAME_OVERHEAD(r3) + blr diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index 8649f536f8df..6bbd7a604d24 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -33,7 +33,6 @@ .text -#ifdef CONFIG_IRQSTACKS _GLOBAL(call_do_softirq) mflr r0 stw r0,4(r1) @@ -56,7 +55,6 @@ _GLOBAL(call_handle_irq) lwz r0,4(r1) mtlr r0 blr -#endif /* CONFIG_IRQSTACKS */ /* * This returns the high 64 bits of the product of two 64-bit numbers. @@ -441,7 +439,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) addi r3,r3,L1_CACHE_BYTES bdnz 0b sync -#ifndef CONFIG_44x +#ifdef CONFIG_44x /* We don't flush the icache on 44x. Those have a virtual icache * and we don't have access to the virtual address here (it's * not the page vaddr but where it's mapped in user space). The @@ -449,15 +447,19 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) * a change in the address space occurs, before returning to * user space */ +BEGIN_MMU_FTR_SECTION + blr +END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_44x) +#endif /* CONFIG_44x */ mtctr r4 1: icbi 0,r6 addi r6,r6,L1_CACHE_BYTES bdnz 1b sync isync -#endif /* CONFIG_44x */ blr +#ifndef CONFIG_BOOKE /* * Flush a particular page from the data cache to RAM, identified * by its physical address. We turn off the MMU so we can just use @@ -490,6 +492,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) mtmsr r10 /* restore DR */ isync blr +#endif /* CONFIG_BOOKE */ /* * Clear pages using the dcbz instruction, which doesn't cause any @@ -706,6 +709,22 @@ relocate_new_kernel: /* r4 = reboot_code_buffer */ /* r5 = start_address */ +#ifdef CONFIG_FSL_BOOKE + + mr r29, r3 + mr r30, r4 + mr r31, r5 + +#define ENTRY_MAPPING_KEXEC_SETUP +#include "fsl_booke_entry_mapping.S" +#undef ENTRY_MAPPING_KEXEC_SETUP + + mr r3, r29 + mr r4, r30 + mr r5, r31 + + li r0, 0 +#else li r0, 0 /* @@ -722,6 +741,7 @@ relocate_new_kernel: rfi 1: +#endif /* from this point address translation is turned off */ /* and interrupts are disabled */ diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index a5cf9c1356a6..e5144906a56d 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -24,10 +24,10 @@ #include <asm/asm-offsets.h> #include <asm/cputable.h> #include <asm/thread_info.h> +#include <asm/kexec.h> .text -#ifdef CONFIG_IRQSTACKS _GLOBAL(call_do_softirq) mflr r0 std r0,16(r1) @@ -51,7 +51,6 @@ _GLOBAL(call_handle_irq) ld r0,16(r1) mtlr r0 blr -#endif /* CONFIG_IRQSTACKS */ .section ".toc","aw" PPC64_CACHES: @@ -471,6 +470,10 @@ _GLOBAL(kexec_wait) 1: mflr r5 addi r5,r5,kexec_flag-1b + li r4,KEXEC_STATE_REAL_MODE + stb r4,PACAKEXECSTATE(r13) + SYNC + 99: HMT_LOW #ifdef CONFIG_KEXEC /* use no memory without kexec */ lwz r4,0(r5) @@ -494,14 +497,11 @@ kexec_flag: * note: this is a terminal routine, it does not save lr * * get phys id from paca - * set paca id to -1 to say we got here * switch to real mode * join other cpus in kexec_wait(phys_id) */ _GLOBAL(kexec_smp_wait) lhz r3,PACAHWCPUID(r13) - li r4,-1 - sth r4,PACAHWCPUID(r13) /* let others know we left */ bl real_mode b .kexec_wait diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c index ad461e735aec..9cf197f01e94 100644 --- a/arch/powerpc/kernel/nvram_64.c +++ b/arch/powerpc/kernel/nvram_64.c @@ -338,8 +338,8 @@ static int __init nvram_create_os_partition(void) rc = nvram_write_header(new_part); if (rc <= 0) { - printk(KERN_ERR "nvram_create_os_partition: nvram_write_header \ - failed (%d)\n", rc); + printk(KERN_ERR "nvram_create_os_partition: nvram_write_header " + "failed (%d)\n", rc); return rc; } @@ -349,7 +349,7 @@ static int __init nvram_create_os_partition(void) rc = ppc_md.nvram_write((char *)&seq_init, sizeof(seq_init), &tmp_index); if (rc <= 0) { printk(KERN_ERR "nvram_create_os_partition: nvram_write " - "failed (%d)\n", rc); + "failed (%d)\n", rc); return rc; } diff --git a/arch/powerpc/kernel/of_device.c b/arch/powerpc/kernel/of_device.c index a359cb08e900..df78e0236a02 100644 --- a/arch/powerpc/kernel/of_device.c +++ b/arch/powerpc/kernel/of_device.c @@ -13,7 +13,7 @@ static void of_device_make_bus_id(struct of_device *dev) { static atomic_t bus_no_reg_magic; - struct device_node *node = dev->node; + struct device_node *node = dev->dev.of_node; const u32 *reg; u64 addr; int magic; @@ -69,11 +69,10 @@ struct of_device *of_device_alloc(struct device_node *np, if (!dev) return NULL; - dev->node = of_node_get(np); - dev->dev.dma_mask = &dev->dma_mask; + dev->dev.of_node = of_node_get(np); + dev->dev.dma_mask = &dev->archdata.dma_mask; dev->dev.parent = parent; dev->dev.release = of_release_dev; - dev->dev.archdata.of_node = np; if (bus_id) dev_set_name(&dev->dev, "%s", bus_id); @@ -95,17 +94,17 @@ int of_device_uevent(struct device *dev, struct kobj_uevent_env *env) ofdev = to_of_device(dev); - if (add_uevent_var(env, "OF_NAME=%s", ofdev->node->name)) + if (add_uevent_var(env, "OF_NAME=%s", ofdev->dev.of_node->name)) return -ENOMEM; - if (add_uevent_var(env, "OF_TYPE=%s", ofdev->node->type)) + if (add_uevent_var(env, "OF_TYPE=%s", ofdev->dev.of_node->type)) return -ENOMEM; /* Since the compatible field can contain pretty much anything * it's not really legal to split it out with commas. We split it * up using a number of environment variables instead. */ - compat = of_get_property(ofdev->node, "compatible", &cplen); + compat = of_get_property(ofdev->dev.of_node, "compatible", &cplen); while (compat && *compat && cplen > 0) { if (add_uevent_var(env, "OF_COMPATIBLE_%d=%s", seen, compat)) return -ENOMEM; diff --git a/arch/powerpc/kernel/of_platform.c b/arch/powerpc/kernel/of_platform.c index 1a4fc0d11a03..487a98851ba6 100644 --- a/arch/powerpc/kernel/of_platform.c +++ b/arch/powerpc/kernel/of_platform.c @@ -17,7 +17,6 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/mod_devicetable.h> -#include <linux/slab.h> #include <linux/pci.h> #include <linux/of.h> #include <linux/of_device.h> @@ -75,7 +74,7 @@ struct of_device* of_platform_device_create(struct device_node *np, if (!dev) return NULL; - dev->dma_mask = 0xffffffffUL; + dev->archdata.dma_mask = 0xffffffffUL; dev->dev.coherent_dma_mask = DMA_BIT_MASK(32); dev->dev.bus = &of_platform_bus_type; @@ -196,7 +195,7 @@ EXPORT_SYMBOL(of_platform_bus_probe); static int of_dev_node_match(struct device *dev, void *data) { - return to_of_device(dev)->node == data; + return to_of_device(dev)->dev.of_node == data; } struct of_device *of_find_device_by_node(struct device_node *np) @@ -214,7 +213,7 @@ EXPORT_SYMBOL(of_find_device_by_node); static int of_dev_phandle_match(struct device *dev, void *data) { phandle *ph = data; - return to_of_device(dev)->node->linux_phandle == *ph; + return to_of_device(dev)->dev.of_node->phandle == *ph; } struct of_device *of_find_device_by_phandle(phandle ph) @@ -247,10 +246,10 @@ static int __devinit of_pci_phb_probe(struct of_device *dev, if (ppc_md.pci_setup_phb == NULL) return -ENODEV; - printk(KERN_INFO "Setting up PCI bus %s\n", dev->node->full_name); + pr_info("Setting up PCI bus %s\n", dev->dev.of_node->full_name); /* Alloc and setup PHB data structure */ - phb = pcibios_alloc_controller(dev->node); + phb = pcibios_alloc_controller(dev->dev.of_node); if (!phb) return -ENODEV; @@ -264,19 +263,19 @@ static int __devinit of_pci_phb_probe(struct of_device *dev, } /* Process "ranges" property */ - pci_process_bridge_OF_ranges(phb, dev->node, 0); + pci_process_bridge_OF_ranges(phb, dev->dev.of_node, 0); /* Init pci_dn data structures */ pci_devs_phb_init_dynamic(phb); /* Register devices with EEH */ #ifdef CONFIG_EEH - if (dev->node->child) - eeh_add_device_tree_early(dev->node); + if (dev->dev.of_node->child) + eeh_add_device_tree_early(dev->dev.of_node); #endif /* CONFIG_EEH */ /* Scan the bus */ - pcibios_scan_phb(phb, dev->node); + pcibios_scan_phb(phb, dev->dev.of_node); if (phb->bus == NULL) return -ENXIO; @@ -307,10 +306,11 @@ static struct of_device_id of_pci_phb_ids[] = { }; static struct of_platform_driver of_pci_phb_driver = { - .match_table = of_pci_phb_ids, .probe = of_pci_phb_probe, .driver = { .name = "of-pci", + .owner = THIS_MODULE, + .of_match_table = of_pci_phb_ids, }, }; diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index d16b1ea55d44..139a773853f4 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -9,11 +9,16 @@ #include <linux/threads.h> #include <linux/module.h> +#include <linux/memblock.h> +#include <asm/firmware.h> #include <asm/lppaca.h> #include <asm/paca.h> #include <asm/sections.h> #include <asm/pgtable.h> +#include <asm/iseries/lpar_map.h> +#include <asm/iseries/hv_types.h> +#include <asm/kexec.h> /* This symbol is provided by the linker - let it fill in the paca * field correctly */ @@ -70,37 +75,83 @@ struct slb_shadow slb_shadow[] __cacheline_aligned = { * processors. The processor VPD array needs one entry per physical * processor (not thread). */ -struct paca_struct paca[NR_CPUS]; +struct paca_struct *paca; EXPORT_SYMBOL(paca); -void __init initialise_pacas(void) -{ - int cpu; +struct paca_struct boot_paca; - /* The TOC register (GPR2) points 32kB into the TOC, so that 64kB - * of the TOC can be addressed using a single machine instruction. - */ +void __init initialise_paca(struct paca_struct *new_paca, int cpu) +{ + /* The TOC register (GPR2) points 32kB into the TOC, so that 64kB + * of the TOC can be addressed using a single machine instruction. + */ unsigned long kernel_toc = (unsigned long)(&__toc_start) + 0x8000UL; - /* Can't use for_each_*_cpu, as they aren't functional yet */ - for (cpu = 0; cpu < NR_CPUS; cpu++) { - struct paca_struct *new_paca = &paca[cpu]; - #ifdef CONFIG_PPC_BOOK3S - new_paca->lppaca_ptr = &lppaca[cpu]; + new_paca->lppaca_ptr = &lppaca[cpu]; #else - new_paca->kernel_pgd = swapper_pg_dir; + new_paca->kernel_pgd = swapper_pg_dir; #endif - new_paca->lock_token = 0x8000; - new_paca->paca_index = cpu; - new_paca->kernel_toc = kernel_toc; - new_paca->kernelbase = (unsigned long) _stext; - new_paca->kernel_msr = MSR_KERNEL; - new_paca->hw_cpu_id = 0xffff; - new_paca->__current = &init_task; + new_paca->lock_token = 0x8000; + new_paca->paca_index = cpu; + new_paca->kernel_toc = kernel_toc; + new_paca->kernelbase = (unsigned long) _stext; + new_paca->kernel_msr = MSR_KERNEL; + new_paca->hw_cpu_id = 0xffff; + new_paca->kexec_state = KEXEC_STATE_NONE; + new_paca->__current = &init_task; #ifdef CONFIG_PPC_STD_MMU_64 - new_paca->slb_shadow_ptr = &slb_shadow[cpu]; + new_paca->slb_shadow_ptr = &slb_shadow[cpu]; #endif /* CONFIG_PPC_STD_MMU_64 */ +} + +static int __initdata paca_size; + +void __init allocate_pacas(void) +{ + int nr_cpus, cpu, limit; + + /* + * We can't take SLB misses on the paca, and we want to access them + * in real mode, so allocate them within the RMA and also within + * the first segment. On iSeries they must be within the area mapped + * by the HV, which is HvPagesToMap * HVPAGESIZE bytes. + */ + limit = min(0x10000000ULL, memblock.rmo_size); + if (firmware_has_feature(FW_FEATURE_ISERIES)) + limit = min(limit, HvPagesToMap * HVPAGESIZE); + + nr_cpus = NR_CPUS; + /* On iSeries we know we can never have more than 64 cpus */ + if (firmware_has_feature(FW_FEATURE_ISERIES)) + nr_cpus = min(64, nr_cpus); + + paca_size = PAGE_ALIGN(sizeof(struct paca_struct) * nr_cpus); + + paca = __va(memblock_alloc_base(paca_size, PAGE_SIZE, limit)); + memset(paca, 0, paca_size); + + printk(KERN_DEBUG "Allocated %u bytes for %d pacas at %p\n", + paca_size, nr_cpus, paca); + + /* Can't use for_each_*_cpu, as they aren't functional yet */ + for (cpu = 0; cpu < nr_cpus; cpu++) + initialise_paca(&paca[cpu], cpu); +} + +void __init free_unused_pacas(void) +{ + int new_size; + + new_size = PAGE_ALIGN(sizeof(struct paca_struct) * num_possible_cpus()); + + if (new_size >= paca_size) + return; + + memblock_free(__pa(paca) + new_size, paca_size - new_size); + + printk(KERN_DEBUG "Freed %u bytes for unused pacas\n", + paca_size - new_size); - } + paca_size = new_size; } diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index cadbed679fbb..5b38f6ae2b29 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -26,6 +26,7 @@ #include <linux/syscalls.h> #include <linux/irq.h> #include <linux/vmalloc.h> +#include <linux/slab.h> #include <asm/processor.h> #include <asm/io.h> @@ -63,21 +64,6 @@ struct dma_map_ops *get_pci_dma_ops(void) } EXPORT_SYMBOL(get_pci_dma_ops); -int pci_set_dma_mask(struct pci_dev *dev, u64 mask) -{ - return dma_set_mask(&dev->dev, mask); -} - -int pci_set_consistent_dma_mask(struct pci_dev *dev, u64 mask) -{ - int rc; - - rc = dma_set_mask(&dev->dev, mask); - dev->dev.coherent_dma_mask = dev->dma_mask; - - return rc; -} - struct pci_controller *pcibios_alloc_controller(struct device_node *dev) { struct pci_controller *phb; @@ -1047,10 +1033,8 @@ static void __devinit pcibios_fixup_bridge(struct pci_bus *bus) struct pci_dev *dev = bus->self; - for (i = 0; i < PCI_BUS_NUM_RESOURCES; ++i) { - if ((res = bus->resource[i]) == NULL) - continue; - if (!res->flags) + pci_bus_for_each_resource(bus, res, i) { + if (!res || !res->flags) continue; if (i >= 3 && bus->self->transparent) continue; @@ -1113,8 +1097,8 @@ void __devinit pcibios_setup_bus_devices(struct pci_bus *bus) if (dev->is_added) continue; - /* Setup OF node pointer in archdata */ - sd->of_node = pci_device_to_OF_node(dev); + /* Setup OF node pointer in the device */ + dev->dev.of_node = pci_device_to_OF_node(dev); /* Fixup NUMA node as it may not be setup yet by the generic * code and is needed by the DMA init @@ -1181,21 +1165,20 @@ static int skip_isa_ioresource_align(struct pci_dev *dev) * but we want to try to avoid allocating at 0x2900-0x2bff * which might have be mirrored at 0x0100-0x03ff.. */ -void pcibios_align_resource(void *data, struct resource *res, +resource_size_t pcibios_align_resource(void *data, const struct resource *res, resource_size_t size, resource_size_t align) { struct pci_dev *dev = data; + resource_size_t start = res->start; if (res->flags & IORESOURCE_IO) { - resource_size_t start = res->start; - if (skip_isa_ioresource_align(dev)) - return; - if (start & 0x300) { + return start; + if (start & 0x300) start = (start + 0x3ff) & ~0x3ff; - res->start = start; - } } + + return start; } EXPORT_SYMBOL(pcibios_align_resource); @@ -1278,9 +1261,8 @@ void pcibios_allocate_bus_resources(struct pci_bus *bus) pr_debug("PCI: Allocating bus resources for %04x:%02x...\n", pci_domain_nr(bus), bus->number); - for (i = 0; i < PCI_BUS_NUM_RESOURCES; ++i) { - if ((res = bus->resource[i]) == NULL || !res->flags - || res->start > res->end || res->parent) + pci_bus_for_each_resource(bus, res, i) { + if (!res || !res->flags || res->start > res->end || res->parent) continue; if (bus->parent == NULL) pr = (res->flags & IORESOURCE_IO) ? @@ -1327,6 +1309,7 @@ void pcibios_allocate_bus_resources(struct pci_bus *bus) printk(KERN_WARNING "PCI: Cannot allocate resource region " "%d of PCI bridge %d, will remap\n", i, bus->number); clear_resource: + res->start = res->end = 0; res->flags = 0; } diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c index c13668cf36d9..e7db5b48004a 100644 --- a/arch/powerpc/kernel/pci_32.c +++ b/arch/powerpc/kernel/pci_32.c @@ -14,6 +14,7 @@ #include <linux/irq.h> #include <linux/list.h> #include <linux/of.h> +#include <linux/slab.h> #include <asm/processor.h> #include <asm/io.h> diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c index ccf56ac92de5..d43fc65749c1 100644 --- a/arch/powerpc/kernel/pci_64.c +++ b/arch/powerpc/kernel/pci_64.c @@ -224,7 +224,7 @@ long sys_pciconfig_iobase(long which, unsigned long in_bus, * G5 machines... So when something asks for bus 0 io base * (bus 0 is HT root), we return the AGP one instead. */ - if (in_bus == 0 && machine_is_compatible("MacRISC4")) { + if (in_bus == 0 && of_machine_is_compatible("MacRISC4")) { struct device_node *agp; agp = of_find_compatible_node(NULL, NULL, "u3-agp"); diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c index d5e36e5dc7c2..d56b35ee7f74 100644 --- a/arch/powerpc/kernel/pci_dn.c +++ b/arch/powerpc/kernel/pci_dn.c @@ -23,6 +23,7 @@ #include <linux/pci.h> #include <linux/string.h> #include <linux/init.h> +#include <linux/gfp.h> #include <asm/io.h> #include <asm/prom.h> diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c index 7311fdfb9bf8..6ddb795f83e8 100644 --- a/arch/powerpc/kernel/pci_of_scan.c +++ b/arch/powerpc/kernel/pci_of_scan.c @@ -123,6 +123,7 @@ struct pci_dev *of_create_pci_dev(struct device_node *node, { struct pci_dev *dev; const char *type; + struct pci_slot *slot; dev = alloc_pci_dev(); if (!dev) @@ -140,6 +141,11 @@ struct pci_dev *of_create_pci_dev(struct device_node *node, dev->devfn = devfn; dev->multifunction = 0; /* maybe a lie? */ dev->needs_freset = 0; /* pcie fundamental reset required */ + set_pcie_port_type(dev); + + list_for_each_entry(slot, &dev->bus->slots, list) + if (PCI_SLOT(dev->devfn) == slot->number) + dev->slot = slot; dev->vendor = get_int_prop(node, "vendor-id", 0xffff); dev->device = get_int_prop(node, "device-id", 0xffff); @@ -160,10 +166,14 @@ struct pci_dev *of_create_pci_dev(struct device_node *node, dev->error_state = pci_channel_io_normal; dev->dma_mask = 0xffffffff; + /* Early fixups, before probing the BARs */ + pci_fixup_device(pci_fixup_early, dev); + if (!strcmp(type, "pci") || !strcmp(type, "pciex")) { /* a PCI-PCI bridge */ dev->hdr_type = PCI_HEADER_TYPE_BRIDGE; dev->rom_base_reg = PCI_ROM_ADDRESS1; + set_pcie_hotplug_bridge(dev); } else if (!strcmp(type, "cardbus")) { dev->hdr_type = PCI_HEADER_TYPE_CARDBUS; } else { @@ -294,12 +304,14 @@ static void __devinit __of_scan_bus(struct device_node *node, int reglen, devfn; struct pci_dev *dev; - pr_debug("of_scan_bus(%s) bus no %d... \n", + pr_debug("of_scan_bus(%s) bus no %d...\n", node->full_name, bus->number); /* Scan direct children */ for_each_child_of_node(node, child) { pr_debug(" * %s\n", child->full_name); + if (!of_device_is_available(child)) + continue; reg = of_get_property(child, "reg", ®len); if (reg == NULL || reglen < 20) continue; diff --git a/arch/powerpc/kernel/perf_callchain.c b/arch/powerpc/kernel/perf_callchain.c index a3c11cac3d71..95ad9dad298e 100644 --- a/arch/powerpc/kernel/perf_callchain.c +++ b/arch/powerpc/kernel/perf_callchain.c @@ -495,9 +495,6 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) entry->nr = 0; - if (current->pid == 0) /* idle task? */ - return entry; - if (!user_mode(regs)) { perf_callchain_kernel(regs, entry); if (current->mm) diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c index 1eb85fbf53a5..5c14ffe51258 100644 --- a/arch/powerpc/kernel/perf_event.c +++ b/arch/powerpc/kernel/perf_event.c @@ -35,6 +35,9 @@ struct cpu_hw_events { u64 alternatives[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES]; unsigned long amasks[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES]; unsigned long avalues[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES]; + + unsigned int group_flag; + int n_txn_start; }; DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); @@ -718,66 +721,6 @@ static int collect_events(struct perf_event *group, int max_count, return n; } -static void event_sched_in(struct perf_event *event, int cpu) -{ - event->state = PERF_EVENT_STATE_ACTIVE; - event->oncpu = cpu; - event->tstamp_running += event->ctx->time - event->tstamp_stopped; - if (is_software_event(event)) - event->pmu->enable(event); -} - -/* - * Called to enable a whole group of events. - * Returns 1 if the group was enabled, or -EAGAIN if it could not be. - * Assumes the caller has disabled interrupts and has - * frozen the PMU with hw_perf_save_disable. - */ -int hw_perf_group_sched_in(struct perf_event *group_leader, - struct perf_cpu_context *cpuctx, - struct perf_event_context *ctx, int cpu) -{ - struct cpu_hw_events *cpuhw; - long i, n, n0; - struct perf_event *sub; - - if (!ppmu) - return 0; - cpuhw = &__get_cpu_var(cpu_hw_events); - n0 = cpuhw->n_events; - n = collect_events(group_leader, ppmu->n_counter - n0, - &cpuhw->event[n0], &cpuhw->events[n0], - &cpuhw->flags[n0]); - if (n < 0) - return -EAGAIN; - if (check_excludes(cpuhw->event, cpuhw->flags, n0, n)) - return -EAGAIN; - i = power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n + n0); - if (i < 0) - return -EAGAIN; - cpuhw->n_events = n0 + n; - cpuhw->n_added += n; - - /* - * OK, this group can go on; update event states etc., - * and enable any software events - */ - for (i = n0; i < n0 + n; ++i) - cpuhw->event[i]->hw.config = cpuhw->events[i]; - cpuctx->active_oncpu += n; - n = 1; - event_sched_in(group_leader, cpu); - list_for_each_entry(sub, &group_leader->sibling_list, group_entry) { - if (sub->state != PERF_EVENT_STATE_OFF) { - event_sched_in(sub, cpu); - ++n; - } - } - ctx->nr_active += n; - - return 1; -} - /* * Add a event to the PMU. * If all events are not already frozen, then we disable and @@ -805,12 +748,22 @@ static int power_pmu_enable(struct perf_event *event) cpuhw->event[n0] = event; cpuhw->events[n0] = event->hw.config; cpuhw->flags[n0] = event->hw.event_base; + + /* + * If group events scheduling transaction was started, + * skip the schedulability test here, it will be peformed + * at commit time(->commit_txn) as a whole + */ + if (cpuhw->group_flag & PERF_EVENT_TXN_STARTED) + goto nocheck; + if (check_excludes(cpuhw->event, cpuhw->flags, n0, 1)) goto out; if (power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n0 + 1)) goto out; - event->hw.config = cpuhw->events[n0]; + +nocheck: ++cpuhw->n_events; ++cpuhw->n_added; @@ -838,8 +791,11 @@ static void power_pmu_disable(struct perf_event *event) cpuhw = &__get_cpu_var(cpu_hw_events); for (i = 0; i < cpuhw->n_events; ++i) { if (event == cpuhw->event[i]) { - while (++i < cpuhw->n_events) + while (++i < cpuhw->n_events) { cpuhw->event[i-1] = cpuhw->event[i]; + cpuhw->events[i-1] = cpuhw->events[i]; + cpuhw->flags[i-1] = cpuhw->flags[i]; + } --cpuhw->n_events; ppmu->disable_pmc(event->hw.idx - 1, cpuhw->mmcr); if (event->hw.idx) { @@ -896,11 +852,65 @@ static void power_pmu_unthrottle(struct perf_event *event) local_irq_restore(flags); } +/* + * Start group events scheduling transaction + * Set the flag to make pmu::enable() not perform the + * schedulability test, it will be performed at commit time + */ +void power_pmu_start_txn(const struct pmu *pmu) +{ + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + + cpuhw->group_flag |= PERF_EVENT_TXN_STARTED; + cpuhw->n_txn_start = cpuhw->n_events; +} + +/* + * Stop group events scheduling transaction + * Clear the flag and pmu::enable() will perform the + * schedulability test. + */ +void power_pmu_cancel_txn(const struct pmu *pmu) +{ + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + + cpuhw->group_flag &= ~PERF_EVENT_TXN_STARTED; +} + +/* + * Commit group events scheduling transaction + * Perform the group schedulability test as a whole + * Return 0 if success + */ +int power_pmu_commit_txn(const struct pmu *pmu) +{ + struct cpu_hw_events *cpuhw; + long i, n; + + if (!ppmu) + return -EAGAIN; + cpuhw = &__get_cpu_var(cpu_hw_events); + n = cpuhw->n_events; + if (check_excludes(cpuhw->event, cpuhw->flags, 0, n)) + return -EAGAIN; + i = power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n); + if (i < 0) + return -EAGAIN; + + for (i = cpuhw->n_txn_start; i < n; ++i) + cpuhw->event[i]->hw.config = cpuhw->events[i]; + + return 0; +} + struct pmu power_pmu = { .enable = power_pmu_enable, .disable = power_pmu_disable, .read = power_pmu_read, .unthrottle = power_pmu_unthrottle, + .start_txn = power_pmu_start_txn, + .cancel_txn = power_pmu_cancel_txn, + .commit_txn = power_pmu_commit_txn, }; /* @@ -1164,10 +1174,10 @@ static void record_and_restart(struct perf_event *event, unsigned long val, * Finally record data if requested. */ if (record) { - struct perf_sample_data data = { - .addr = ~0ULL, - .period = event->hw.last_period, - }; + struct perf_sample_data data; + + perf_sample_data_init(&data, ~0ULL); + data.period = event->hw.last_period; if (event->attr.sample_type & PERF_SAMPLE_ADDR) perf_get_data_addr(regs, &data.addr); @@ -1287,7 +1297,7 @@ static void perf_event_interrupt(struct pt_regs *regs) irq_exit(); } -void hw_perf_event_setup(int cpu) +static void power_pmu_setup(int cpu) { struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu); @@ -1297,6 +1307,23 @@ void hw_perf_event_setup(int cpu) cpuhw->mmcr[0] = MMCR0_FC; } +static int __cpuinit +power_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) +{ + unsigned int cpu = (long)hcpu; + + switch (action & ~CPU_TASKS_FROZEN) { + case CPU_UP_PREPARE: + power_pmu_setup(cpu); + break; + + default: + break; + } + + return NOTIFY_OK; +} + int register_power_pmu(struct power_pmu *pmu) { if (ppmu) @@ -1314,5 +1341,7 @@ int register_power_pmu(struct power_pmu *pmu) freeze_events_kernel = MMCR0_FCHV; #endif /* CONFIG_PPC64 */ + perf_cpu_notifier(power_pmu_notifier); + return 0; } diff --git a/arch/powerpc/kernel/perf_event_fsl_emb.c b/arch/powerpc/kernel/perf_event_fsl_emb.c new file mode 100644 index 000000000000..babcceecd2ea --- /dev/null +++ b/arch/powerpc/kernel/perf_event_fsl_emb.c @@ -0,0 +1,654 @@ +/* + * Performance event support - Freescale Embedded Performance Monitor + * + * Copyright 2008-2009 Paul Mackerras, IBM Corporation. + * Copyright 2010 Freescale Semiconductor, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/perf_event.h> +#include <linux/percpu.h> +#include <linux/hardirq.h> +#include <asm/reg_fsl_emb.h> +#include <asm/pmc.h> +#include <asm/machdep.h> +#include <asm/firmware.h> +#include <asm/ptrace.h> + +struct cpu_hw_events { + int n_events; + int disabled; + u8 pmcs_enabled; + struct perf_event *event[MAX_HWEVENTS]; +}; +static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); + +static struct fsl_emb_pmu *ppmu; + +/* Number of perf_events counting hardware events */ +static atomic_t num_events; +/* Used to avoid races in calling reserve/release_pmc_hardware */ +static DEFINE_MUTEX(pmc_reserve_mutex); + +/* + * If interrupts were soft-disabled when a PMU interrupt occurs, treat + * it as an NMI. + */ +static inline int perf_intr_is_nmi(struct pt_regs *regs) +{ +#ifdef __powerpc64__ + return !regs->softe; +#else + return 0; +#endif +} + +static void perf_event_interrupt(struct pt_regs *regs); + +/* + * Read one performance monitor counter (PMC). + */ +static unsigned long read_pmc(int idx) +{ + unsigned long val; + + switch (idx) { + case 0: + val = mfpmr(PMRN_PMC0); + break; + case 1: + val = mfpmr(PMRN_PMC1); + break; + case 2: + val = mfpmr(PMRN_PMC2); + break; + case 3: + val = mfpmr(PMRN_PMC3); + break; + default: + printk(KERN_ERR "oops trying to read PMC%d\n", idx); + val = 0; + } + return val; +} + +/* + * Write one PMC. + */ +static void write_pmc(int idx, unsigned long val) +{ + switch (idx) { + case 0: + mtpmr(PMRN_PMC0, val); + break; + case 1: + mtpmr(PMRN_PMC1, val); + break; + case 2: + mtpmr(PMRN_PMC2, val); + break; + case 3: + mtpmr(PMRN_PMC3, val); + break; + default: + printk(KERN_ERR "oops trying to write PMC%d\n", idx); + } + + isync(); +} + +/* + * Write one local control A register + */ +static void write_pmlca(int idx, unsigned long val) +{ + switch (idx) { + case 0: + mtpmr(PMRN_PMLCA0, val); + break; + case 1: + mtpmr(PMRN_PMLCA1, val); + break; + case 2: + mtpmr(PMRN_PMLCA2, val); + break; + case 3: + mtpmr(PMRN_PMLCA3, val); + break; + default: + printk(KERN_ERR "oops trying to write PMLCA%d\n", idx); + } + + isync(); +} + +/* + * Write one local control B register + */ +static void write_pmlcb(int idx, unsigned long val) +{ + switch (idx) { + case 0: + mtpmr(PMRN_PMLCB0, val); + break; + case 1: + mtpmr(PMRN_PMLCB1, val); + break; + case 2: + mtpmr(PMRN_PMLCB2, val); + break; + case 3: + mtpmr(PMRN_PMLCB3, val); + break; + default: + printk(KERN_ERR "oops trying to write PMLCB%d\n", idx); + } + + isync(); +} + +static void fsl_emb_pmu_read(struct perf_event *event) +{ + s64 val, delta, prev; + + /* + * Performance monitor interrupts come even when interrupts + * are soft-disabled, as long as interrupts are hard-enabled. + * Therefore we treat them like NMIs. + */ + do { + prev = atomic64_read(&event->hw.prev_count); + barrier(); + val = read_pmc(event->hw.idx); + } while (atomic64_cmpxchg(&event->hw.prev_count, prev, val) != prev); + + /* The counters are only 32 bits wide */ + delta = (val - prev) & 0xfffffffful; + atomic64_add(delta, &event->count); + atomic64_sub(delta, &event->hw.period_left); +} + +/* + * Disable all events to prevent PMU interrupts and to allow + * events to be added or removed. + */ +void hw_perf_disable(void) +{ + struct cpu_hw_events *cpuhw; + unsigned long flags; + + local_irq_save(flags); + cpuhw = &__get_cpu_var(cpu_hw_events); + + if (!cpuhw->disabled) { + cpuhw->disabled = 1; + + /* + * Check if we ever enabled the PMU on this cpu. + */ + if (!cpuhw->pmcs_enabled) { + ppc_enable_pmcs(); + cpuhw->pmcs_enabled = 1; + } + + if (atomic_read(&num_events)) { + /* + * Set the 'freeze all counters' bit, and disable + * interrupts. The barrier is to make sure the + * mtpmr has been executed and the PMU has frozen + * the events before we return. + */ + + mtpmr(PMRN_PMGC0, PMGC0_FAC); + isync(); + } + } + local_irq_restore(flags); +} + +/* + * Re-enable all events if disable == 0. + * If we were previously disabled and events were added, then + * put the new config on the PMU. + */ +void hw_perf_enable(void) +{ + struct cpu_hw_events *cpuhw; + unsigned long flags; + + local_irq_save(flags); + cpuhw = &__get_cpu_var(cpu_hw_events); + if (!cpuhw->disabled) + goto out; + + cpuhw->disabled = 0; + ppc_set_pmu_inuse(cpuhw->n_events != 0); + + if (cpuhw->n_events > 0) { + mtpmr(PMRN_PMGC0, PMGC0_PMIE | PMGC0_FCECE); + isync(); + } + + out: + local_irq_restore(flags); +} + +static int collect_events(struct perf_event *group, int max_count, + struct perf_event *ctrs[]) +{ + int n = 0; + struct perf_event *event; + + if (!is_software_event(group)) { + if (n >= max_count) + return -1; + ctrs[n] = group; + n++; + } + list_for_each_entry(event, &group->sibling_list, group_entry) { + if (!is_software_event(event) && + event->state != PERF_EVENT_STATE_OFF) { + if (n >= max_count) + return -1; + ctrs[n] = event; + n++; + } + } + return n; +} + +/* perf must be disabled, context locked on entry */ +static int fsl_emb_pmu_enable(struct perf_event *event) +{ + struct cpu_hw_events *cpuhw; + int ret = -EAGAIN; + int num_counters = ppmu->n_counter; + u64 val; + int i; + + cpuhw = &get_cpu_var(cpu_hw_events); + + if (event->hw.config & FSL_EMB_EVENT_RESTRICTED) + num_counters = ppmu->n_restricted; + + /* + * Allocate counters from top-down, so that restricted-capable + * counters are kept free as long as possible. + */ + for (i = num_counters - 1; i >= 0; i--) { + if (cpuhw->event[i]) + continue; + + break; + } + + if (i < 0) + goto out; + + event->hw.idx = i; + cpuhw->event[i] = event; + ++cpuhw->n_events; + + val = 0; + if (event->hw.sample_period) { + s64 left = atomic64_read(&event->hw.period_left); + if (left < 0x80000000L) + val = 0x80000000L - left; + } + atomic64_set(&event->hw.prev_count, val); + write_pmc(i, val); + perf_event_update_userpage(event); + + write_pmlcb(i, event->hw.config >> 32); + write_pmlca(i, event->hw.config_base); + + ret = 0; + out: + put_cpu_var(cpu_hw_events); + return ret; +} + +/* perf must be disabled, context locked on entry */ +static void fsl_emb_pmu_disable(struct perf_event *event) +{ + struct cpu_hw_events *cpuhw; + int i = event->hw.idx; + + if (i < 0) + goto out; + + fsl_emb_pmu_read(event); + + cpuhw = &get_cpu_var(cpu_hw_events); + + WARN_ON(event != cpuhw->event[event->hw.idx]); + + write_pmlca(i, 0); + write_pmlcb(i, 0); + write_pmc(i, 0); + + cpuhw->event[i] = NULL; + event->hw.idx = -1; + + /* + * TODO: if at least one restricted event exists, and we + * just freed up a non-restricted-capable counter, and + * there is a restricted-capable counter occupied by + * a non-restricted event, migrate that event to the + * vacated counter. + */ + + cpuhw->n_events--; + + out: + put_cpu_var(cpu_hw_events); +} + +/* + * Re-enable interrupts on a event after they were throttled + * because they were coming too fast. + * + * Context is locked on entry, but perf is not disabled. + */ +static void fsl_emb_pmu_unthrottle(struct perf_event *event) +{ + s64 val, left; + unsigned long flags; + + if (event->hw.idx < 0 || !event->hw.sample_period) + return; + local_irq_save(flags); + perf_disable(); + fsl_emb_pmu_read(event); + left = event->hw.sample_period; + event->hw.last_period = left; + val = 0; + if (left < 0x80000000L) + val = 0x80000000L - left; + write_pmc(event->hw.idx, val); + atomic64_set(&event->hw.prev_count, val); + atomic64_set(&event->hw.period_left, left); + perf_event_update_userpage(event); + perf_enable(); + local_irq_restore(flags); +} + +static struct pmu fsl_emb_pmu = { + .enable = fsl_emb_pmu_enable, + .disable = fsl_emb_pmu_disable, + .read = fsl_emb_pmu_read, + .unthrottle = fsl_emb_pmu_unthrottle, +}; + +/* + * Release the PMU if this is the last perf_event. + */ +static void hw_perf_event_destroy(struct perf_event *event) +{ + if (!atomic_add_unless(&num_events, -1, 1)) { + mutex_lock(&pmc_reserve_mutex); + if (atomic_dec_return(&num_events) == 0) + release_pmc_hardware(); + mutex_unlock(&pmc_reserve_mutex); + } +} + +/* + * Translate a generic cache event_id config to a raw event_id code. + */ +static int hw_perf_cache_event(u64 config, u64 *eventp) +{ + unsigned long type, op, result; + int ev; + + if (!ppmu->cache_events) + return -EINVAL; + + /* unpack config */ + type = config & 0xff; + op = (config >> 8) & 0xff; + result = (config >> 16) & 0xff; + + if (type >= PERF_COUNT_HW_CACHE_MAX || + op >= PERF_COUNT_HW_CACHE_OP_MAX || + result >= PERF_COUNT_HW_CACHE_RESULT_MAX) + return -EINVAL; + + ev = (*ppmu->cache_events)[type][op][result]; + if (ev == 0) + return -EOPNOTSUPP; + if (ev == -1) + return -EINVAL; + *eventp = ev; + return 0; +} + +const struct pmu *hw_perf_event_init(struct perf_event *event) +{ + u64 ev; + struct perf_event *events[MAX_HWEVENTS]; + int n; + int err; + int num_restricted; + int i; + + switch (event->attr.type) { + case PERF_TYPE_HARDWARE: + ev = event->attr.config; + if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) + return ERR_PTR(-EOPNOTSUPP); + ev = ppmu->generic_events[ev]; + break; + + case PERF_TYPE_HW_CACHE: + err = hw_perf_cache_event(event->attr.config, &ev); + if (err) + return ERR_PTR(err); + break; + + case PERF_TYPE_RAW: + ev = event->attr.config; + break; + + default: + return ERR_PTR(-EINVAL); + } + + event->hw.config = ppmu->xlate_event(ev); + if (!(event->hw.config & FSL_EMB_EVENT_VALID)) + return ERR_PTR(-EINVAL); + + /* + * If this is in a group, check if it can go on with all the + * other hardware events in the group. We assume the event + * hasn't been linked into its leader's sibling list at this point. + */ + n = 0; + if (event->group_leader != event) { + n = collect_events(event->group_leader, + ppmu->n_counter - 1, events); + if (n < 0) + return ERR_PTR(-EINVAL); + } + + if (event->hw.config & FSL_EMB_EVENT_RESTRICTED) { + num_restricted = 0; + for (i = 0; i < n; i++) { + if (events[i]->hw.config & FSL_EMB_EVENT_RESTRICTED) + num_restricted++; + } + + if (num_restricted >= ppmu->n_restricted) + return ERR_PTR(-EINVAL); + } + + event->hw.idx = -1; + + event->hw.config_base = PMLCA_CE | PMLCA_FCM1 | + (u32)((ev << 16) & PMLCA_EVENT_MASK); + + if (event->attr.exclude_user) + event->hw.config_base |= PMLCA_FCU; + if (event->attr.exclude_kernel) + event->hw.config_base |= PMLCA_FCS; + if (event->attr.exclude_idle) + return ERR_PTR(-ENOTSUPP); + + event->hw.last_period = event->hw.sample_period; + atomic64_set(&event->hw.period_left, event->hw.last_period); + + /* + * See if we need to reserve the PMU. + * If no events are currently in use, then we have to take a + * mutex to ensure that we don't race with another task doing + * reserve_pmc_hardware or release_pmc_hardware. + */ + err = 0; + if (!atomic_inc_not_zero(&num_events)) { + mutex_lock(&pmc_reserve_mutex); + if (atomic_read(&num_events) == 0 && + reserve_pmc_hardware(perf_event_interrupt)) + err = -EBUSY; + else + atomic_inc(&num_events); + mutex_unlock(&pmc_reserve_mutex); + + mtpmr(PMRN_PMGC0, PMGC0_FAC); + isync(); + } + event->destroy = hw_perf_event_destroy; + + if (err) + return ERR_PTR(err); + return &fsl_emb_pmu; +} + +/* + * A counter has overflowed; update its count and record + * things if requested. Note that interrupts are hard-disabled + * here so there is no possibility of being interrupted. + */ +static void record_and_restart(struct perf_event *event, unsigned long val, + struct pt_regs *regs, int nmi) +{ + u64 period = event->hw.sample_period; + s64 prev, delta, left; + int record = 0; + + /* we don't have to worry about interrupts here */ + prev = atomic64_read(&event->hw.prev_count); + delta = (val - prev) & 0xfffffffful; + atomic64_add(delta, &event->count); + + /* + * See if the total period for this event has expired, + * and update for the next period. + */ + val = 0; + left = atomic64_read(&event->hw.period_left) - delta; + if (period) { + if (left <= 0) { + left += period; + if (left <= 0) + left = period; + record = 1; + } + if (left < 0x80000000LL) + val = 0x80000000LL - left; + } + + /* + * Finally record data if requested. + */ + if (record) { + struct perf_sample_data data; + + perf_sample_data_init(&data, 0); + + if (perf_event_overflow(event, nmi, &data, regs)) { + /* + * Interrupts are coming too fast - throttle them + * by setting the event to 0, so it will be + * at least 2^30 cycles until the next interrupt + * (assuming each event counts at most 2 counts + * per cycle). + */ + val = 0; + left = ~0ULL >> 1; + } + } + + write_pmc(event->hw.idx, val); + atomic64_set(&event->hw.prev_count, val); + atomic64_set(&event->hw.period_left, left); + perf_event_update_userpage(event); +} + +static void perf_event_interrupt(struct pt_regs *regs) +{ + int i; + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct perf_event *event; + unsigned long val; + int found = 0; + int nmi; + + nmi = perf_intr_is_nmi(regs); + if (nmi) + nmi_enter(); + else + irq_enter(); + + for (i = 0; i < ppmu->n_counter; ++i) { + event = cpuhw->event[i]; + + val = read_pmc(i); + if ((int)val < 0) { + if (event) { + /* event has overflowed */ + found = 1; + record_and_restart(event, val, regs, nmi); + } else { + /* + * Disabled counter is negative, + * reset it just in case. + */ + write_pmc(i, 0); + } + } + } + + /* PMM will keep counters frozen until we return from the interrupt. */ + mtmsr(mfmsr() | MSR_PMM); + mtpmr(PMRN_PMGC0, PMGC0_PMIE | PMGC0_FCECE); + isync(); + + if (nmi) + nmi_exit(); + else + irq_exit(); +} + +void hw_perf_event_setup(int cpu) +{ + struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu); + + memset(cpuhw, 0, sizeof(*cpuhw)); +} + +int register_fsl_emb_pmu(struct fsl_emb_pmu *pmu) +{ + if (ppmu) + return -EBUSY; /* something's already registered */ + + ppmu = pmu; + pr_info("%s performance monitor hardware support registered\n", + pmu->name); + + return 0; +} diff --git a/arch/powerpc/kernel/pmc.c b/arch/powerpc/kernel/pmc.c index 0516e2d3e02e..461499b43cff 100644 --- a/arch/powerpc/kernel/pmc.c +++ b/arch/powerpc/kernel/pmc.c @@ -37,7 +37,7 @@ static void dummy_perf(struct pt_regs *regs) } -static DEFINE_SPINLOCK(pmc_owner_lock); +static DEFINE_RAW_SPINLOCK(pmc_owner_lock); static void *pmc_owner_caller; /* mostly for debugging */ perf_irq_t perf_irq = dummy_perf; @@ -45,7 +45,7 @@ int reserve_pmc_hardware(perf_irq_t new_perf_irq) { int err = 0; - spin_lock(&pmc_owner_lock); + raw_spin_lock(&pmc_owner_lock); if (pmc_owner_caller) { printk(KERN_WARNING "reserve_pmc_hardware: " @@ -59,21 +59,21 @@ int reserve_pmc_hardware(perf_irq_t new_perf_irq) perf_irq = new_perf_irq ? new_perf_irq : dummy_perf; out: - spin_unlock(&pmc_owner_lock); + raw_spin_unlock(&pmc_owner_lock); return err; } EXPORT_SYMBOL_GPL(reserve_pmc_hardware); void release_pmc_hardware(void) { - spin_lock(&pmc_owner_lock); + raw_spin_lock(&pmc_owner_lock); WARN_ON(! pmc_owner_caller); pmc_owner_caller = NULL; perf_irq = dummy_perf; - spin_unlock(&pmc_owner_lock); + raw_spin_unlock(&pmc_owner_lock); } EXPORT_SYMBOL_GPL(release_pmc_hardware); diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index 425451453e96..3b4dcc82a4c1 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -101,12 +101,17 @@ EXPORT_SYMBOL(pci_dram_offset); EXPORT_SYMBOL(start_thread); EXPORT_SYMBOL(kernel_thread); +#ifdef CONFIG_PPC_FPU +EXPORT_SYMBOL_GPL(cvt_df); +EXPORT_SYMBOL_GPL(cvt_fd); +#endif EXPORT_SYMBOL(giveup_fpu); #ifdef CONFIG_ALTIVEC EXPORT_SYMBOL(giveup_altivec); #endif /* CONFIG_ALTIVEC */ #ifdef CONFIG_VSX EXPORT_SYMBOL(giveup_vsx); +EXPORT_SYMBOL_GPL(__giveup_vsx); #endif /* CONFIG_VSX */ #ifdef CONFIG_SPE EXPORT_SYMBOL(giveup_spe); diff --git a/arch/powerpc/kernel/proc_powerpc.c b/arch/powerpc/kernel/proc_powerpc.c index 1ed3b8d7981e..c8ae3714e79b 100644 --- a/arch/powerpc/kernel/proc_powerpc.c +++ b/arch/powerpc/kernel/proc_powerpc.c @@ -19,7 +19,6 @@ #include <linux/init.h> #include <linux/mm.h> #include <linux/proc_fs.h> -#include <linux/slab.h> #include <linux/kernel.h> #include <asm/machdep.h> diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index c930ac38e59f..773424df828a 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -245,6 +245,24 @@ void discard_lazy_cpu_state(void) } #endif /* CONFIG_SMP */ +#ifdef CONFIG_PPC_ADV_DEBUG_REGS +void do_send_trap(struct pt_regs *regs, unsigned long address, + unsigned long error_code, int signal_code, int breakpt) +{ + siginfo_t info; + + if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code, + 11, SIGSEGV) == NOTIFY_STOP) + return; + + /* Deliver the signal to userspace */ + info.si_signo = SIGTRAP; + info.si_errno = breakpt; /* breakpoint or watchpoint id */ + info.si_code = signal_code; + info.si_addr = (void __user *)address; + force_sig_info(SIGTRAP, &info, current); +} +#else /* !CONFIG_PPC_ADV_DEBUG_REGS */ void do_dabr(struct pt_regs *regs, unsigned long address, unsigned long error_code) { @@ -257,12 +275,6 @@ void do_dabr(struct pt_regs *regs, unsigned long address, if (debugger_dabr_match(regs)) return; - /* Clear the DAC and struct entries. One shot trigger */ -#if defined(CONFIG_BOOKE) - mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) & ~(DBSR_DAC1R | DBSR_DAC1W - | DBCR0_IDM)); -#endif - /* Clear the DABR */ set_dabr(0); @@ -273,9 +285,82 @@ void do_dabr(struct pt_regs *regs, unsigned long address, info.si_addr = (void __user *)address; force_sig_info(SIGTRAP, &info, current); } +#endif /* CONFIG_PPC_ADV_DEBUG_REGS */ static DEFINE_PER_CPU(unsigned long, current_dabr); +#ifdef CONFIG_PPC_ADV_DEBUG_REGS +/* + * Set the debug registers back to their default "safe" values. + */ +static void set_debug_reg_defaults(struct thread_struct *thread) +{ + thread->iac1 = thread->iac2 = 0; +#if CONFIG_PPC_ADV_DEBUG_IACS > 2 + thread->iac3 = thread->iac4 = 0; +#endif + thread->dac1 = thread->dac2 = 0; +#if CONFIG_PPC_ADV_DEBUG_DVCS > 0 + thread->dvc1 = thread->dvc2 = 0; +#endif + thread->dbcr0 = 0; +#ifdef CONFIG_BOOKE + /* + * Force User/Supervisor bits to b11 (user-only MSR[PR]=1) + */ + thread->dbcr1 = DBCR1_IAC1US | DBCR1_IAC2US | \ + DBCR1_IAC3US | DBCR1_IAC4US; + /* + * Force Data Address Compare User/Supervisor bits to be User-only + * (0b11 MSR[PR]=1) and set all other bits in DBCR2 register to be 0. + */ + thread->dbcr2 = DBCR2_DAC1US | DBCR2_DAC2US; +#else + thread->dbcr1 = 0; +#endif +} + +static void prime_debug_regs(struct thread_struct *thread) +{ + mtspr(SPRN_IAC1, thread->iac1); + mtspr(SPRN_IAC2, thread->iac2); +#if CONFIG_PPC_ADV_DEBUG_IACS > 2 + mtspr(SPRN_IAC3, thread->iac3); + mtspr(SPRN_IAC4, thread->iac4); +#endif + mtspr(SPRN_DAC1, thread->dac1); + mtspr(SPRN_DAC2, thread->dac2); +#if CONFIG_PPC_ADV_DEBUG_DVCS > 0 + mtspr(SPRN_DVC1, thread->dvc1); + mtspr(SPRN_DVC2, thread->dvc2); +#endif + mtspr(SPRN_DBCR0, thread->dbcr0); + mtspr(SPRN_DBCR1, thread->dbcr1); +#ifdef CONFIG_BOOKE + mtspr(SPRN_DBCR2, thread->dbcr2); +#endif +} +/* + * Unless neither the old or new thread are making use of the + * debug registers, set the debug registers from the values + * stored in the new thread. + */ +static void switch_booke_debug_regs(struct thread_struct *new_thread) +{ + if ((current->thread.dbcr0 & DBCR0_IDM) + || (new_thread->dbcr0 & DBCR0_IDM)) + prime_debug_regs(new_thread); +} +#else /* !CONFIG_PPC_ADV_DEBUG_REGS */ +static void set_debug_reg_defaults(struct thread_struct *thread) +{ + if (thread->dabr) { + thread->dabr = 0; + set_dabr(0); + } +} +#endif /* CONFIG_PPC_ADV_DEBUG_REGS */ + int set_dabr(unsigned long dabr) { __get_cpu_var(current_dabr) = dabr; @@ -284,8 +369,11 @@ int set_dabr(unsigned long dabr) return ppc_md.set_dabr(dabr); /* XXX should we have a CPU_FTR_HAS_DABR ? */ -#if defined(CONFIG_BOOKE) +#ifdef CONFIG_PPC_ADV_DEBUG_REGS mtspr(SPRN_DAC1, dabr); +#ifdef CONFIG_PPC_47x + isync(); +#endif #elif defined(CONFIG_PPC_BOOK3S) mtspr(SPRN_DABR, dabr); #endif @@ -371,10 +459,8 @@ struct task_struct *__switch_to(struct task_struct *prev, #endif /* CONFIG_SMP */ -#if defined(CONFIG_BOOKE) - /* If new thread DAC (HW breakpoint) is the same then leave it */ - if (new->thread.dabr) - set_dabr(new->thread.dabr); +#ifdef CONFIG_PPC_ADV_DEBUG_REGS + switch_booke_debug_regs(&new->thread); #else if (unlikely(__get_cpu_var(current_dabr) != new->thread.dabr)) set_dabr(new->thread.dabr); @@ -514,7 +600,7 @@ void show_regs(struct pt_regs * regs) printk(" CR: %08lx XER: %08lx\n", regs->ccr, regs->xer); trap = TRAP(regs); if (trap == 0x300 || trap == 0x600) -#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) +#ifdef CONFIG_PPC_ADV_DEBUG_REGS printk("DEAR: "REG", ESR: "REG"\n", regs->dar, regs->dsisr); #else printk("DAR: "REG", DSISR: "REG"\n", regs->dar, regs->dsisr); @@ -554,28 +640,9 @@ void exit_thread(void) void flush_thread(void) { -#ifdef CONFIG_PPC64 - struct thread_info *t = current_thread_info(); - - if (test_ti_thread_flag(t, TIF_ABI_PENDING)) { - clear_ti_thread_flag(t, TIF_ABI_PENDING); - if (test_ti_thread_flag(t, TIF_32BIT)) - clear_ti_thread_flag(t, TIF_32BIT); - else - set_ti_thread_flag(t, TIF_32BIT); - } -#endif - discard_lazy_cpu_state(); - if (current->thread.dabr) { - current->thread.dabr = 0; - set_dabr(0); - -#if defined(CONFIG_BOOKE) - current->thread.dbcr0 &= ~(DBSR_DAC1R | DBSR_DAC1W); -#endif - } + set_debug_reg_defaults(¤t->thread); } void @@ -938,7 +1005,6 @@ out: return error; } -#ifdef CONFIG_IRQSTACKS static inline int valid_irq_stack(unsigned long sp, struct task_struct *p, unsigned long nbytes) { @@ -963,10 +1029,6 @@ static inline int valid_irq_stack(unsigned long sp, struct task_struct *p, return 0; } -#else -#define valid_irq_stack(sp, p, nb) 0 -#endif /* CONFIG_IRQSTACKS */ - int validate_sp(unsigned long sp, struct task_struct *p, unsigned long nbytes) { diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 4ec300862466..fed9bf6187d1 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -31,7 +31,7 @@ #include <linux/kexec.h> #include <linux/debugfs.h> #include <linux/irq.h> -#include <linux/lmb.h> +#include <linux/memblock.h> #include <asm/prom.h> #include <asm/rtas.h> @@ -43,6 +43,7 @@ #include <asm/smp.h> #include <asm/system.h> #include <asm/mmu.h> +#include <asm/paca.h> #include <asm/pgtable.h> #include <asm/pci.h> #include <asm/iommu.h> @@ -61,365 +62,12 @@ #define DBG(fmt...) #endif - -static int __initdata dt_root_addr_cells; -static int __initdata dt_root_size_cells; - #ifdef CONFIG_PPC64 int __initdata iommu_is_off; int __initdata iommu_force_on; unsigned long tce_alloc_start, tce_alloc_end; #endif -typedef u32 cell_t; - -#if 0 -static struct boot_param_header *initial_boot_params __initdata; -#else -struct boot_param_header *initial_boot_params; -#endif - -extern struct device_node *allnodes; /* temporary while merging */ - -extern rwlock_t devtree_lock; /* temporary while merging */ - -/* export that to outside world */ -struct device_node *of_chosen; - -static inline char *find_flat_dt_string(u32 offset) -{ - return ((char *)initial_boot_params) + - initial_boot_params->off_dt_strings + offset; -} - -/** - * This function is used to scan the flattened device-tree, it is - * used to extract the memory informations at boot before we can - * unflatten the tree - */ -int __init of_scan_flat_dt(int (*it)(unsigned long node, - const char *uname, int depth, - void *data), - void *data) -{ - unsigned long p = ((unsigned long)initial_boot_params) + - initial_boot_params->off_dt_struct; - int rc = 0; - int depth = -1; - - do { - u32 tag = *((u32 *)p); - char *pathp; - - p += 4; - if (tag == OF_DT_END_NODE) { - depth --; - continue; - } - if (tag == OF_DT_NOP) - continue; - if (tag == OF_DT_END) - break; - if (tag == OF_DT_PROP) { - u32 sz = *((u32 *)p); - p += 8; - if (initial_boot_params->version < 0x10) - p = _ALIGN(p, sz >= 8 ? 8 : 4); - p += sz; - p = _ALIGN(p, 4); - continue; - } - if (tag != OF_DT_BEGIN_NODE) { - printk(KERN_WARNING "Invalid tag %x scanning flattened" - " device tree !\n", tag); - return -EINVAL; - } - depth++; - pathp = (char *)p; - p = _ALIGN(p + strlen(pathp) + 1, 4); - if ((*pathp) == '/') { - char *lp, *np; - for (lp = NULL, np = pathp; *np; np++) - if ((*np) == '/') - lp = np+1; - if (lp != NULL) - pathp = lp; - } - rc = it(p, pathp, depth, data); - if (rc != 0) - break; - } while(1); - - return rc; -} - -unsigned long __init of_get_flat_dt_root(void) -{ - unsigned long p = ((unsigned long)initial_boot_params) + - initial_boot_params->off_dt_struct; - - while(*((u32 *)p) == OF_DT_NOP) - p += 4; - BUG_ON (*((u32 *)p) != OF_DT_BEGIN_NODE); - p += 4; - return _ALIGN(p + strlen((char *)p) + 1, 4); -} - -/** - * This function can be used within scan_flattened_dt callback to get - * access to properties - */ -void* __init of_get_flat_dt_prop(unsigned long node, const char *name, - unsigned long *size) -{ - unsigned long p = node; - - do { - u32 tag = *((u32 *)p); - u32 sz, noff; - const char *nstr; - - p += 4; - if (tag == OF_DT_NOP) - continue; - if (tag != OF_DT_PROP) - return NULL; - - sz = *((u32 *)p); - noff = *((u32 *)(p + 4)); - p += 8; - if (initial_boot_params->version < 0x10) - p = _ALIGN(p, sz >= 8 ? 8 : 4); - - nstr = find_flat_dt_string(noff); - if (nstr == NULL) { - printk(KERN_WARNING "Can't find property index" - " name !\n"); - return NULL; - } - if (strcmp(name, nstr) == 0) { - if (size) - *size = sz; - return (void *)p; - } - p += sz; - p = _ALIGN(p, 4); - } while(1); -} - -int __init of_flat_dt_is_compatible(unsigned long node, const char *compat) -{ - const char* cp; - unsigned long cplen, l; - - cp = of_get_flat_dt_prop(node, "compatible", &cplen); - if (cp == NULL) - return 0; - while (cplen > 0) { - if (strncasecmp(cp, compat, strlen(compat)) == 0) - return 1; - l = strlen(cp) + 1; - cp += l; - cplen -= l; - } - - return 0; -} - -static void *__init unflatten_dt_alloc(unsigned long *mem, unsigned long size, - unsigned long align) -{ - void *res; - - *mem = _ALIGN(*mem, align); - res = (void *)*mem; - *mem += size; - - return res; -} - -static unsigned long __init unflatten_dt_node(unsigned long mem, - unsigned long *p, - struct device_node *dad, - struct device_node ***allnextpp, - unsigned long fpsize) -{ - struct device_node *np; - struct property *pp, **prev_pp = NULL; - char *pathp; - u32 tag; - unsigned int l, allocl; - int has_name = 0; - int new_format = 0; - - tag = *((u32 *)(*p)); - if (tag != OF_DT_BEGIN_NODE) { - printk("Weird tag at start of node: %x\n", tag); - return mem; - } - *p += 4; - pathp = (char *)*p; - l = allocl = strlen(pathp) + 1; - *p = _ALIGN(*p + l, 4); - - /* version 0x10 has a more compact unit name here instead of the full - * path. we accumulate the full path size using "fpsize", we'll rebuild - * it later. We detect this because the first character of the name is - * not '/'. - */ - if ((*pathp) != '/') { - new_format = 1; - if (fpsize == 0) { - /* root node: special case. fpsize accounts for path - * plus terminating zero. root node only has '/', so - * fpsize should be 2, but we want to avoid the first - * level nodes to have two '/' so we use fpsize 1 here - */ - fpsize = 1; - allocl = 2; - } else { - /* account for '/' and path size minus terminal 0 - * already in 'l' - */ - fpsize += l; - allocl = fpsize; - } - } - - - np = unflatten_dt_alloc(&mem, sizeof(struct device_node) + allocl, - __alignof__(struct device_node)); - if (allnextpp) { - memset(np, 0, sizeof(*np)); - np->full_name = ((char*)np) + sizeof(struct device_node); - if (new_format) { - char *p = np->full_name; - /* rebuild full path for new format */ - if (dad && dad->parent) { - strcpy(p, dad->full_name); -#ifdef DEBUG - if ((strlen(p) + l + 1) != allocl) { - DBG("%s: p: %d, l: %d, a: %d\n", - pathp, (int)strlen(p), l, allocl); - } -#endif - p += strlen(p); - } - *(p++) = '/'; - memcpy(p, pathp, l); - } else - memcpy(np->full_name, pathp, l); - prev_pp = &np->properties; - **allnextpp = np; - *allnextpp = &np->allnext; - if (dad != NULL) { - np->parent = dad; - /* we temporarily use the next field as `last_child'*/ - if (dad->next == 0) - dad->child = np; - else - dad->next->sibling = np; - dad->next = np; - } - kref_init(&np->kref); - } - while(1) { - u32 sz, noff; - char *pname; - - tag = *((u32 *)(*p)); - if (tag == OF_DT_NOP) { - *p += 4; - continue; - } - if (tag != OF_DT_PROP) - break; - *p += 4; - sz = *((u32 *)(*p)); - noff = *((u32 *)((*p) + 4)); - *p += 8; - if (initial_boot_params->version < 0x10) - *p = _ALIGN(*p, sz >= 8 ? 8 : 4); - - pname = find_flat_dt_string(noff); - if (pname == NULL) { - printk("Can't find property name in list !\n"); - break; - } - if (strcmp(pname, "name") == 0) - has_name = 1; - l = strlen(pname) + 1; - pp = unflatten_dt_alloc(&mem, sizeof(struct property), - __alignof__(struct property)); - if (allnextpp) { - if (strcmp(pname, "linux,phandle") == 0) { - np->node = *((u32 *)*p); - if (np->linux_phandle == 0) - np->linux_phandle = np->node; - } - if (strcmp(pname, "ibm,phandle") == 0) - np->linux_phandle = *((u32 *)*p); - pp->name = pname; - pp->length = sz; - pp->value = (void *)*p; - *prev_pp = pp; - prev_pp = &pp->next; - } - *p = _ALIGN((*p) + sz, 4); - } - /* with version 0x10 we may not have the name property, recreate - * it here from the unit name if absent - */ - if (!has_name) { - char *p = pathp, *ps = pathp, *pa = NULL; - int sz; - - while (*p) { - if ((*p) == '@') - pa = p; - if ((*p) == '/') - ps = p + 1; - p++; - } - if (pa < ps) - pa = p; - sz = (pa - ps) + 1; - pp = unflatten_dt_alloc(&mem, sizeof(struct property) + sz, - __alignof__(struct property)); - if (allnextpp) { - pp->name = "name"; - pp->length = sz; - pp->value = pp + 1; - *prev_pp = pp; - prev_pp = &pp->next; - memcpy(pp->value, ps, sz - 1); - ((char *)pp->value)[sz - 1] = 0; - DBG("fixed up name for %s -> %s\n", pathp, - (char *)pp->value); - } - } - if (allnextpp) { - *prev_pp = NULL; - np->name = of_get_property(np, "name", NULL); - np->type = of_get_property(np, "device_type", NULL); - - if (!np->name) - np->name = "<NULL>"; - if (!np->type) - np->type = "<NULL>"; - } - while (tag == OF_DT_BEGIN_NODE) { - mem = unflatten_dt_node(mem, p, np, allnextpp, fpsize); - tag = *((u32 *)(*p)); - } - if (tag != OF_DT_END_NODE) { - printk("Weird tag at end of node: %x\n", tag); - return mem; - } - *p += 4; - return mem; -} - static int __init early_parse_mem(char *p) { if (!p) @@ -446,11 +94,11 @@ static void __init move_device_tree(void) DBG("-> move_device_tree\n"); start = __pa(initial_boot_params); - size = initial_boot_params->totalsize; + size = be32_to_cpu(initial_boot_params->totalsize); if ((memory_limit && (start + size) > memory_limit) || overlaps_crashkernel(start, size)) { - p = __va(lmb_alloc_base(size, PAGE_SIZE, lmb.rmo_size)); + p = __va(memblock_alloc_base(size, PAGE_SIZE, memblock.rmo_size)); memcpy(p, initial_boot_params, size); initial_boot_params = (struct boot_param_header *)p; DBG("Moved device tree to 0x%p\n", p); @@ -459,54 +107,6 @@ static void __init move_device_tree(void) DBG("<- move_device_tree\n"); } -/** - * unflattens the device-tree passed by the firmware, creating the - * tree of struct device_node. It also fills the "name" and "type" - * pointers of the nodes so the normal device-tree walking functions - * can be used (this used to be done by finish_device_tree) - */ -void __init unflatten_device_tree(void) -{ - unsigned long start, mem, size; - struct device_node **allnextp = &allnodes; - - DBG(" -> unflatten_device_tree()\n"); - - /* First pass, scan for size */ - start = ((unsigned long)initial_boot_params) + - initial_boot_params->off_dt_struct; - size = unflatten_dt_node(0, &start, NULL, NULL, 0); - size = (size | 3) + 1; - - DBG(" size is %lx, allocating...\n", size); - - /* Allocate memory for the expanded device tree */ - mem = lmb_alloc(size + 4, __alignof__(struct device_node)); - mem = (unsigned long) __va(mem); - - ((u32 *)mem)[size / 4] = 0xdeadbeef; - - DBG(" unflattening %lx...\n", mem); - - /* Second pass, do actual unflattening */ - start = ((unsigned long)initial_boot_params) + - initial_boot_params->off_dt_struct; - unflatten_dt_node(mem, &start, NULL, &allnextp, 0); - if (*((u32 *)start) != OF_DT_END) - printk(KERN_WARNING "Weird tag at end of tree: %08x\n", *((u32 *)start)); - if (((u32 *)mem)[size / 4] != 0xdeadbeef) - printk(KERN_WARNING "End of tree marker overwritten: %08x\n", - ((u32 *)mem)[size / 4] ); - *allnextp = NULL; - - /* Get pointer to OF "/chosen" node for use everywhere */ - of_chosen = of_find_node_by_path("/chosen"); - if (of_chosen == NULL) - of_chosen = of_find_node_by_path("/chosen@0"); - - DBG(" <- unflatten_device_tree()\n"); -} - /* * ibm,pa-features is a per-cpu property that contains a string of * attribute descriptors, each of which has a 2 byte header plus up @@ -763,48 +363,9 @@ static int __init early_init_dt_scan_cpus(unsigned long node, return 0; } -#ifdef CONFIG_BLK_DEV_INITRD -static void __init early_init_dt_check_for_initrd(unsigned long node) -{ - unsigned long l; - u32 *prop; - - DBG("Looking for initrd properties... "); - - prop = of_get_flat_dt_prop(node, "linux,initrd-start", &l); - if (prop) { - initrd_start = (unsigned long)__va(of_read_ulong(prop, l/4)); - - prop = of_get_flat_dt_prop(node, "linux,initrd-end", &l); - if (prop) { - initrd_end = (unsigned long) - __va(of_read_ulong(prop, l/4)); - initrd_below_start_ok = 1; - } else { - initrd_start = 0; - } - } - - DBG("initrd_start=0x%lx initrd_end=0x%lx\n", initrd_start, initrd_end); -} -#else -static inline void early_init_dt_check_for_initrd(unsigned long node) -{ -} -#endif /* CONFIG_BLK_DEV_INITRD */ - -static int __init early_init_dt_scan_chosen(unsigned long node, - const char *uname, int depth, void *data) +void __init early_init_dt_scan_chosen_arch(unsigned long node) { unsigned long *lprop; - unsigned long l; - char *p; - - DBG("search \"chosen\", depth: %d, uname: %s\n", depth, uname); - - if (depth != 1 || - (strcmp(uname, "chosen") != 0 && strcmp(uname, "chosen@0") != 0)) - return 0; #ifdef CONFIG_PPC64 /* check if iommu is forced on or off */ @@ -815,17 +376,17 @@ static int __init early_init_dt_scan_chosen(unsigned long node, #endif /* mem=x on the command line is the preferred mechanism */ - lprop = of_get_flat_dt_prop(node, "linux,memory-limit", NULL); - if (lprop) - memory_limit = *lprop; + lprop = of_get_flat_dt_prop(node, "linux,memory-limit", NULL); + if (lprop) + memory_limit = *lprop; #ifdef CONFIG_PPC64 - lprop = of_get_flat_dt_prop(node, "linux,tce-alloc-start", NULL); - if (lprop) - tce_alloc_start = *lprop; - lprop = of_get_flat_dt_prop(node, "linux,tce-alloc-end", NULL); - if (lprop) - tce_alloc_end = *lprop; + lprop = of_get_flat_dt_prop(node, "linux,tce-alloc-start", NULL); + if (lprop) + tce_alloc_start = *lprop; + lprop = of_get_flat_dt_prop(node, "linux,tce-alloc-end", NULL); + if (lprop) + tce_alloc_end = *lprop; #endif #ifdef CONFIG_KEXEC @@ -837,51 +398,6 @@ static int __init early_init_dt_scan_chosen(unsigned long node, if (lprop) crashk_res.end = crashk_res.start + *lprop - 1; #endif - - early_init_dt_check_for_initrd(node); - - /* Retreive command line */ - p = of_get_flat_dt_prop(node, "bootargs", &l); - if (p != NULL && l > 0) - strlcpy(cmd_line, p, min((int)l, COMMAND_LINE_SIZE)); - -#ifdef CONFIG_CMDLINE - if (p == NULL || l == 0 || (l == 1 && (*p) == 0)) - strlcpy(cmd_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE); -#endif /* CONFIG_CMDLINE */ - - DBG("Command line is: %s\n", cmd_line); - - /* break now */ - return 1; -} - -static int __init early_init_dt_scan_root(unsigned long node, - const char *uname, int depth, void *data) -{ - u32 *prop; - - if (depth != 0) - return 0; - - prop = of_get_flat_dt_prop(node, "#size-cells", NULL); - dt_root_size_cells = (prop == NULL) ? 1 : *prop; - DBG("dt_root_size_cells = %x\n", dt_root_size_cells); - - prop = of_get_flat_dt_prop(node, "#address-cells", NULL); - dt_root_addr_cells = (prop == NULL) ? 2 : *prop; - DBG("dt_root_addr_cells = %x\n", dt_root_addr_cells); - - /* break now */ - return 1; -} - -static u64 __init dt_mem_next_cell(int s, cell_t **cellp) -{ - cell_t *p = *cellp; - - *cellp = p + s; - return of_read_number(p, s); } #ifdef CONFIG_PPC_PSERIES @@ -893,22 +409,22 @@ static u64 __init dt_mem_next_cell(int s, cell_t **cellp) */ static int __init early_init_dt_scan_drconf_memory(unsigned long node) { - cell_t *dm, *ls, *usm; + __be32 *dm, *ls, *usm; unsigned long l, n, flags; - u64 base, size, lmb_size; + u64 base, size, memblock_size; unsigned int is_kexec_kdump = 0, rngs; ls = of_get_flat_dt_prop(node, "ibm,lmb-size", &l); - if (ls == NULL || l < dt_root_size_cells * sizeof(cell_t)) + if (ls == NULL || l < dt_root_size_cells * sizeof(__be32)) return 0; - lmb_size = dt_mem_next_cell(dt_root_size_cells, &ls); + memblock_size = dt_mem_next_cell(dt_root_size_cells, &ls); dm = of_get_flat_dt_prop(node, "ibm,dynamic-memory", &l); - if (dm == NULL || l < sizeof(cell_t)) + if (dm == NULL || l < sizeof(__be32)) return 0; n = *dm++; /* number of entries */ - if (l < (n * (dt_root_addr_cells + 4) + 1) * sizeof(cell_t)) + if (l < (n * (dt_root_addr_cells + 4) + 1) * sizeof(__be32)) return 0; /* check if this is a kexec/kdump kernel. */ @@ -926,11 +442,11 @@ static int __init early_init_dt_scan_drconf_memory(unsigned long node) or if the block is not assigned to this partition (0x8) */ if ((flags & 0x80) || !(flags & 0x8)) continue; - size = lmb_size; + size = memblock_size; rngs = 1; if (is_kexec_kdump) { /* - * For each lmb in ibm,dynamic-memory, a corresponding + * For each memblock in ibm,dynamic-memory, a corresponding * entry in linux,drconf-usable-memory property contains * a counter 'p' followed by 'p' (base, size) duple. * Now read the counter from @@ -953,75 +469,57 @@ static int __init early_init_dt_scan_drconf_memory(unsigned long node) if ((base + size) > 0x80000000ul) size = 0x80000000ul - base; } - lmb_add(base, size); + memblock_add(base, size); } while (--rngs); } - lmb_dump_all(); + memblock_dump_all(); return 0; } #else #define early_init_dt_scan_drconf_memory(node) 0 #endif /* CONFIG_PPC_PSERIES */ -static int __init early_init_dt_scan_memory(unsigned long node, - const char *uname, int depth, void *data) +static int __init early_init_dt_scan_memory_ppc(unsigned long node, + const char *uname, + int depth, void *data) { - char *type = of_get_flat_dt_prop(node, "device_type", NULL); - cell_t *reg, *endp; - unsigned long l; - - /* Look for the ibm,dynamic-reconfiguration-memory node */ if (depth == 1 && strcmp(uname, "ibm,dynamic-reconfiguration-memory") == 0) return early_init_dt_scan_drconf_memory(node); + + return early_init_dt_scan_memory(node, uname, depth, data); +} - /* We are scanning "memory" nodes only */ - if (type == NULL) { - /* - * The longtrail doesn't have a device_type on the - * /memory node, so look for the node called /memory@0. - */ - if (depth != 1 || strcmp(uname, "memory@0") != 0) - return 0; - } else if (strcmp(type, "memory") != 0) - return 0; - - reg = of_get_flat_dt_prop(node, "linux,usable-memory", &l); - if (reg == NULL) - reg = of_get_flat_dt_prop(node, "reg", &l); - if (reg == NULL) - return 0; - - endp = reg + (l / sizeof(cell_t)); - - DBG("memory scan node %s, reg size %ld, data: %x %x %x %x,\n", - uname, l, reg[0], reg[1], reg[2], reg[3]); - - while ((endp - reg) >= (dt_root_addr_cells + dt_root_size_cells)) { - u64 base, size; +void __init early_init_dt_add_memory_arch(u64 base, u64 size) +{ +#if defined(CONFIG_PPC64) + if (iommu_is_off) { + if (base >= 0x80000000ul) + return; + if ((base + size) > 0x80000000ul) + size = 0x80000000ul - base; + } +#endif - base = dt_mem_next_cell(dt_root_addr_cells, ®); - size = dt_mem_next_cell(dt_root_size_cells, ®); + memblock_add(base, size); - if (size == 0) - continue; - DBG(" - %llx , %llx\n", (unsigned long long)base, - (unsigned long long)size); -#ifdef CONFIG_PPC64 - if (iommu_is_off) { - if (base >= 0x80000000ul) - continue; - if ((base + size) > 0x80000000ul) - size = 0x80000000ul - base; - } -#endif - lmb_add(base, size); + memstart_addr = min((u64)memstart_addr, base); +} - memstart_addr = min((u64)memstart_addr, base); - } +u64 __init early_init_dt_alloc_memory_arch(u64 size, u64 align) +{ + return memblock_alloc(size, align); +} - return 0; +#ifdef CONFIG_BLK_DEV_INITRD +void __init early_init_dt_setup_initrd_arch(unsigned long start, + unsigned long end) +{ + initrd_start = (unsigned long)__va(start); + initrd_end = (unsigned long)__va(end); + initrd_below_start_ok = 1; } +#endif static void __init early_reserve_mem(void) { @@ -1036,12 +534,12 @@ static void __init early_reserve_mem(void) /* before we do anything, lets reserve the dt blob */ self_base = __pa((unsigned long)initial_boot_params); self_size = initial_boot_params->totalsize; - lmb_reserve(self_base, self_size); + memblock_reserve(self_base, self_size); #ifdef CONFIG_BLK_DEV_INITRD /* then reserve the initrd, if any */ if (initrd_start && (initrd_end > initrd_start)) - lmb_reserve(__pa(initrd_start), initrd_end - initrd_start); + memblock_reserve(__pa(initrd_start), initrd_end - initrd_start); #endif /* CONFIG_BLK_DEV_INITRD */ #ifdef CONFIG_PPC32 @@ -1062,7 +560,7 @@ static void __init early_reserve_mem(void) if (base_32 == self_base && size_32 == self_size) continue; DBG("reserving: %x -> %x\n", base_32, size_32); - lmb_reserve(base_32, size_32); + memblock_reserve(base_32, size_32); } return; } @@ -1073,7 +571,7 @@ static void __init early_reserve_mem(void) if (size == 0) break; DBG("reserving: %llx -> %llx\n", base, size); - lmb_reserve(base, size); + memblock_reserve(base, size); } } @@ -1096,7 +594,7 @@ static inline unsigned long phyp_dump_calculate_reserve_size(void) return phyp_dump_info->reserve_bootvar; /* divide by 20 to get 5% of value */ - tmp = lmb_end_of_DRAM(); + tmp = memblock_end_of_DRAM(); do_div(tmp, 20); /* round it down in multiples of 256 */ @@ -1135,11 +633,11 @@ static void __init phyp_dump_reserve_mem(void) if (phyp_dump_info->phyp_dump_is_active) { /* Reserve *everything* above RMR.Area freed by userland tools*/ base = variable_reserve_size; - size = lmb_end_of_DRAM() - base; + size = memblock_end_of_DRAM() - base; /* XXX crashed_ram_end is wrong, since it may be beyond * the memory_limit, it will need to be adjusted. */ - lmb_reserve(base, size); + memblock_reserve(base, size); phyp_dump_info->init_reserve_start = base; phyp_dump_info->init_reserve_size = size; @@ -1147,8 +645,8 @@ static void __init phyp_dump_reserve_mem(void) size = phyp_dump_info->cpu_state_size + phyp_dump_info->hpte_region_size + variable_reserve_size; - base = lmb_end_of_DRAM() - size; - lmb_reserve(base, size); + base = memblock_end_of_DRAM() - size; + memblock_reserve(base, size); phyp_dump_info->init_reserve_start = base; phyp_dump_info->init_reserve_size = size; } @@ -1183,20 +681,20 @@ void __init early_init_devtree(void *params) */ of_scan_flat_dt(early_init_dt_scan_chosen, NULL); - /* Scan memory nodes and rebuild LMBs */ - lmb_init(); + /* Scan memory nodes and rebuild MEMBLOCKs */ + memblock_init(); of_scan_flat_dt(early_init_dt_scan_root, NULL); - of_scan_flat_dt(early_init_dt_scan_memory, NULL); + of_scan_flat_dt(early_init_dt_scan_memory_ppc, NULL); /* Save command line for /proc/cmdline and then parse parameters */ strlcpy(boot_command_line, cmd_line, COMMAND_LINE_SIZE); parse_early_param(); - /* Reserve LMB regions used by kernel, initrd, dt, etc... */ - lmb_reserve(PHYSICAL_START, __pa(klimit) - PHYSICAL_START); + /* Reserve MEMBLOCK regions used by kernel, initrd, dt, etc... */ + memblock_reserve(PHYSICAL_START, __pa(klimit) - PHYSICAL_START); /* If relocatable, reserve first 32k for interrupt vectors etc. */ if (PHYSICAL_START > MEMORY_START) - lmb_reserve(MEMORY_START, 0x8000); + memblock_reserve(MEMORY_START, 0x8000); reserve_kdump_trampoline(); reserve_crashkernel(); early_reserve_mem(); @@ -1208,22 +706,24 @@ void __init early_init_devtree(void *params) /* Ensure that total memory size is page-aligned, because * otherwise mark_bootmem() gets upset. */ - lmb_analyze(); - memsize = lmb_phys_mem_size(); + memblock_analyze(); + memsize = memblock_phys_mem_size(); if ((memsize & PAGE_MASK) != memsize) limit = memsize & PAGE_MASK; } - lmb_enforce_memory_limit(limit); + memblock_enforce_memory_limit(limit); - lmb_analyze(); - lmb_dump_all(); + memblock_analyze(); + memblock_dump_all(); - DBG("Phys. mem: %llx\n", lmb_phys_mem_size()); + DBG("Phys. mem: %llx\n", memblock_phys_mem_size()); /* We may need to relocate the flat tree, do it now. * FIXME .. and the initrd too? */ move_device_tree(); + allocate_pacas(); + DBG("Scanning CPUs ...\n"); /* Retreive CPU related informations from the flat tree @@ -1234,25 +734,6 @@ void __init early_init_devtree(void *params) DBG(" <- early_init_devtree()\n"); } - -/** - * Indicates whether the root node has a given value in its - * compatible property. - */ -int machine_is_compatible(const char *compat) -{ - struct device_node *root; - int rc = 0; - - root = of_find_node_by_path("/"); - if (root) { - rc = of_device_is_compatible(root, compat); - of_node_put(root); - } - return rc; -} -EXPORT_SYMBOL(machine_is_compatible); - /******* * * New implementation of the OF "find" APIs, return a refcounted @@ -1265,27 +746,6 @@ EXPORT_SYMBOL(machine_is_compatible); *******/ /** - * of_find_node_by_phandle - Find a node given a phandle - * @handle: phandle of the node to find - * - * Returns a node pointer with refcount incremented, use - * of_node_put() on it when done. - */ -struct device_node *of_find_node_by_phandle(phandle handle) -{ - struct device_node *np; - - read_lock(&devtree_lock); - for (np = allnodes; np != 0; np = np->allnext) - if (np->linux_phandle == handle) - break; - of_node_get(np); - read_unlock(&devtree_lock); - return np; -} -EXPORT_SYMBOL(of_find_node_by_phandle); - -/** * of_find_next_cache_node - Find a node's subsidiary cache * @np: node of type "cpu" or "cache" * @@ -1316,138 +776,6 @@ struct device_node *of_find_next_cache_node(struct device_node *np) return NULL; } -/** - * of_node_get - Increment refcount of a node - * @node: Node to inc refcount, NULL is supported to - * simplify writing of callers - * - * Returns node. - */ -struct device_node *of_node_get(struct device_node *node) -{ - if (node) - kref_get(&node->kref); - return node; -} -EXPORT_SYMBOL(of_node_get); - -static inline struct device_node * kref_to_device_node(struct kref *kref) -{ - return container_of(kref, struct device_node, kref); -} - -/** - * of_node_release - release a dynamically allocated node - * @kref: kref element of the node to be released - * - * In of_node_put() this function is passed to kref_put() - * as the destructor. - */ -static void of_node_release(struct kref *kref) -{ - struct device_node *node = kref_to_device_node(kref); - struct property *prop = node->properties; - - /* We should never be releasing nodes that haven't been detached. */ - if (!of_node_check_flag(node, OF_DETACHED)) { - printk("WARNING: Bad of_node_put() on %s\n", node->full_name); - dump_stack(); - kref_init(&node->kref); - return; - } - - if (!of_node_check_flag(node, OF_DYNAMIC)) - return; - - while (prop) { - struct property *next = prop->next; - kfree(prop->name); - kfree(prop->value); - kfree(prop); - prop = next; - - if (!prop) { - prop = node->deadprops; - node->deadprops = NULL; - } - } - kfree(node->full_name); - kfree(node->data); - kfree(node); -} - -/** - * of_node_put - Decrement refcount of a node - * @node: Node to dec refcount, NULL is supported to - * simplify writing of callers - * - */ -void of_node_put(struct device_node *node) -{ - if (node) - kref_put(&node->kref, of_node_release); -} -EXPORT_SYMBOL(of_node_put); - -/* - * Plug a device node into the tree and global list. - */ -void of_attach_node(struct device_node *np) -{ - unsigned long flags; - - write_lock_irqsave(&devtree_lock, flags); - np->sibling = np->parent->child; - np->allnext = allnodes; - np->parent->child = np; - allnodes = np; - write_unlock_irqrestore(&devtree_lock, flags); -} - -/* - * "Unplug" a node from the device tree. The caller must hold - * a reference to the node. The memory associated with the node - * is not freed until its refcount goes to zero. - */ -void of_detach_node(struct device_node *np) -{ - struct device_node *parent; - unsigned long flags; - - write_lock_irqsave(&devtree_lock, flags); - - parent = np->parent; - if (!parent) - goto out_unlock; - - if (allnodes == np) - allnodes = np->allnext; - else { - struct device_node *prev; - for (prev = allnodes; - prev->allnext != np; - prev = prev->allnext) - ; - prev->allnext = np->allnext; - } - - if (parent->child == np) - parent->child = np->sibling; - else { - struct device_node *prevsib; - for (prevsib = np->parent->child; - prevsib->sibling != np; - prevsib = prevsib->sibling) - ; - prevsib->sibling = np->sibling; - } - - of_node_set_flag(np, OF_DETACHED); - -out_unlock: - write_unlock_irqrestore(&devtree_lock, flags); -} - #ifdef CONFIG_PPC_PSERIES /* * Fix up the uninitialized fields in a new device node: @@ -1479,9 +807,9 @@ static int of_finish_dynamic_node(struct device_node *node) if (machine_is(powermac)) return -ENODEV; - /* fix up new node's linux_phandle field */ + /* fix up new node's phandle field */ if ((ibm_phandle = of_get_property(node, "ibm,phandle", NULL))) - node->linux_phandle = *ibm_phandle; + node->phandle = *ibm_phandle; out: of_node_put(parent); @@ -1520,120 +848,6 @@ static int __init prom_reconfig_setup(void) __initcall(prom_reconfig_setup); #endif -/* - * Add a property to a node - */ -int prom_add_property(struct device_node* np, struct property* prop) -{ - struct property **next; - unsigned long flags; - - prop->next = NULL; - write_lock_irqsave(&devtree_lock, flags); - next = &np->properties; - while (*next) { - if (strcmp(prop->name, (*next)->name) == 0) { - /* duplicate ! don't insert it */ - write_unlock_irqrestore(&devtree_lock, flags); - return -1; - } - next = &(*next)->next; - } - *next = prop; - write_unlock_irqrestore(&devtree_lock, flags); - -#ifdef CONFIG_PROC_DEVICETREE - /* try to add to proc as well if it was initialized */ - if (np->pde) - proc_device_tree_add_prop(np->pde, prop); -#endif /* CONFIG_PROC_DEVICETREE */ - - return 0; -} - -/* - * Remove a property from a node. Note that we don't actually - * remove it, since we have given out who-knows-how-many pointers - * to the data using get-property. Instead we just move the property - * to the "dead properties" list, so it won't be found any more. - */ -int prom_remove_property(struct device_node *np, struct property *prop) -{ - struct property **next; - unsigned long flags; - int found = 0; - - write_lock_irqsave(&devtree_lock, flags); - next = &np->properties; - while (*next) { - if (*next == prop) { - /* found the node */ - *next = prop->next; - prop->next = np->deadprops; - np->deadprops = prop; - found = 1; - break; - } - next = &(*next)->next; - } - write_unlock_irqrestore(&devtree_lock, flags); - - if (!found) - return -ENODEV; - -#ifdef CONFIG_PROC_DEVICETREE - /* try to remove the proc node as well */ - if (np->pde) - proc_device_tree_remove_prop(np->pde, prop); -#endif /* CONFIG_PROC_DEVICETREE */ - - return 0; -} - -/* - * Update a property in a node. Note that we don't actually - * remove it, since we have given out who-knows-how-many pointers - * to the data using get-property. Instead we just move the property - * to the "dead properties" list, and add the new property to the - * property list - */ -int prom_update_property(struct device_node *np, - struct property *newprop, - struct property *oldprop) -{ - struct property **next; - unsigned long flags; - int found = 0; - - write_lock_irqsave(&devtree_lock, flags); - next = &np->properties; - while (*next) { - if (*next == oldprop) { - /* found the node */ - newprop->next = oldprop->next; - *next = newprop; - oldprop->next = np->deadprops; - np->deadprops = oldprop; - found = 1; - break; - } - next = &(*next)->next; - } - write_unlock_irqrestore(&devtree_lock, flags); - - if (!found) - return -ENODEV; - -#ifdef CONFIG_PROC_DEVICETREE - /* try to add to proc as well if it was initialized */ - if (np->pde) - proc_device_tree_update_prop(np->pde, newprop, oldprop); -#endif /* CONFIG_PROC_DEVICETREE */ - - return 0; -} - - /* Find the device node for a given logical cpu number, also returns the cpu * local thread number (index in ibm,interrupt-server#s) if relevant and * asked for (non NULL) diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index bafac2e41ae1..3b6f8ae9b8cc 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -653,6 +653,10 @@ static void __init early_cmdline_parse(void) #else #define OV5_CMO 0x00 #endif +#define OV5_TYPE1_AFFINITY 0x80 /* Type 1 NUMA affinity */ + +/* Option Vector 6: IBM PAPR hints */ +#define OV6_LINUX 0x02 /* Linux is our OS */ /* * The architecture vector has an array of PVR mask/value pairs, @@ -665,7 +669,7 @@ static unsigned char ibm_architecture_vec[] = { W(0xffffffff), W(0x0f000003), /* all 2.06-compliant */ W(0xffffffff), W(0x0f000002), /* all 2.05-compliant */ W(0xfffffffe), W(0x0f000001), /* all 2.04-compliant and earlier */ - 5 - 1, /* 5 option vectors */ + 6 - 1, /* 6 option vectors */ /* option vector 1: processor architectures supported */ 3 - 2, /* length */ @@ -697,12 +701,29 @@ static unsigned char ibm_architecture_vec[] = { 0, /* don't halt */ /* option vector 5: PAPR/OF options */ - 5 - 2, /* length */ + 13 - 2, /* length */ 0, /* don't ignore, don't halt */ OV5_LPAR | OV5_SPLPAR | OV5_LARGE_PAGES | OV5_DRCONF_MEMORY | OV5_DONATE_DEDICATE_CPU | OV5_MSI, 0, OV5_CMO, + OV5_TYPE1_AFFINITY, + 0, + 0, + 0, + /* WARNING: The offset of the "number of cores" field below + * must match by the macro below. Update the definition if + * the structure layout changes. + */ +#define IBM_ARCH_VEC_NRCORES_OFFSET 100 + W(NR_CPUS), /* number of cores supported */ + + /* option vector 6: IBM PAPR hints */ + 4 - 2, /* length */ + 0, + 0, + OV6_LINUX, + }; /* Old method - ELF header with PT_NOTE sections */ @@ -792,13 +813,70 @@ static struct fake_elf { } }; +static int __init prom_count_smt_threads(void) +{ + phandle node; + char type[64]; + unsigned int plen; + + /* Pick up th first CPU node we can find */ + for (node = 0; prom_next_node(&node); ) { + type[0] = 0; + prom_getprop(node, "device_type", type, sizeof(type)); + + if (strcmp(type, RELOC("cpu"))) + continue; + /* + * There is an entry for each smt thread, each entry being + * 4 bytes long. All cpus should have the same number of + * smt threads, so return after finding the first. + */ + plen = prom_getproplen(node, "ibm,ppc-interrupt-server#s"); + if (plen == PROM_ERROR) + break; + plen >>= 2; + prom_debug("Found 0x%x smt threads per core\n", (unsigned long)plen); + + /* Sanity check */ + if (plen < 1 || plen > 64) { + prom_printf("Threads per core 0x%x out of bounds, assuming 1\n", + (unsigned long)plen); + return 1; + } + return plen; + } + prom_debug("No threads found, assuming 1 per core\n"); + + return 1; + +} + + static void __init prom_send_capabilities(void) { ihandle elfloader, root; prom_arg_t ret; + u32 *cores; root = call_prom("open", 1, 1, ADDR("/")); if (root != 0) { + /* We need to tell the FW about the number of cores we support. + * + * To do that, we count the number of threads on the first core + * (we assume this is the same for all cores) and use it to + * divide NR_CPUS. + */ + cores = (u32 *)PTRRELOC(&ibm_architecture_vec[IBM_ARCH_VEC_NRCORES_OFFSET]); + if (*cores != NR_CPUS) { + prom_printf("WARNING ! " + "ibm_architecture_vec structure inconsistent: 0x%x !\n", + *cores); + } else { + *cores = DIV_ROUND_UP(NR_CPUS, prom_count_smt_threads()); + prom_printf("Max number of cores passed to firmware: 0x%x\n", + (unsigned long)*cores); + } + /* try calling the ibm,client-architecture-support method */ prom_printf("Calling ibm,client-architecture-support..."); if (call_prom_ret("call-method", 3, 2, &ret, diff --git a/arch/powerpc/kernel/prom_init_check.sh b/arch/powerpc/kernel/prom_init_check.sh index 1ac136b128f0..9f82f4937892 100644 --- a/arch/powerpc/kernel/prom_init_check.sh +++ b/arch/powerpc/kernel/prom_init_check.sh @@ -52,12 +52,18 @@ do if [ "${UNDEF:0:9}" = "_restgpr_" ]; then OK=1 fi + if [ "${UNDEF:0:10}" = "_restgpr0_" ]; then + OK=1 + fi if [ "${UNDEF:0:11}" = "_rest32gpr_" ]; then OK=1 fi if [ "${UNDEF:0:9}" = "_savegpr_" ]; then OK=1 fi + if [ "${UNDEF:0:10}" = "_savegpr0_" ]; then + OK=1 + fi if [ "${UNDEF:0:11}" = "_save32gpr_" ]; then OK=1 fi diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index ef149880c145..7a0c0199ea28 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -39,6 +39,109 @@ #include <asm/system.h> /* + * The parameter save area on the stack is used to store arguments being passed + * to callee function and is located at fixed offset from stack pointer. + */ +#ifdef CONFIG_PPC32 +#define PARAMETER_SAVE_AREA_OFFSET 24 /* bytes */ +#else /* CONFIG_PPC32 */ +#define PARAMETER_SAVE_AREA_OFFSET 48 /* bytes */ +#endif + +struct pt_regs_offset { + const char *name; + int offset; +}; + +#define STR(s) #s /* convert to string */ +#define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, r)} +#define GPR_OFFSET_NAME(num) \ + {.name = STR(gpr##num), .offset = offsetof(struct pt_regs, gpr[num])} +#define REG_OFFSET_END {.name = NULL, .offset = 0} + +static const struct pt_regs_offset regoffset_table[] = { + GPR_OFFSET_NAME(0), + GPR_OFFSET_NAME(1), + GPR_OFFSET_NAME(2), + GPR_OFFSET_NAME(3), + GPR_OFFSET_NAME(4), + GPR_OFFSET_NAME(5), + GPR_OFFSET_NAME(6), + GPR_OFFSET_NAME(7), + GPR_OFFSET_NAME(8), + GPR_OFFSET_NAME(9), + GPR_OFFSET_NAME(10), + GPR_OFFSET_NAME(11), + GPR_OFFSET_NAME(12), + GPR_OFFSET_NAME(13), + GPR_OFFSET_NAME(14), + GPR_OFFSET_NAME(15), + GPR_OFFSET_NAME(16), + GPR_OFFSET_NAME(17), + GPR_OFFSET_NAME(18), + GPR_OFFSET_NAME(19), + GPR_OFFSET_NAME(20), + GPR_OFFSET_NAME(21), + GPR_OFFSET_NAME(22), + GPR_OFFSET_NAME(23), + GPR_OFFSET_NAME(24), + GPR_OFFSET_NAME(25), + GPR_OFFSET_NAME(26), + GPR_OFFSET_NAME(27), + GPR_OFFSET_NAME(28), + GPR_OFFSET_NAME(29), + GPR_OFFSET_NAME(30), + GPR_OFFSET_NAME(31), + REG_OFFSET_NAME(nip), + REG_OFFSET_NAME(msr), + REG_OFFSET_NAME(ctr), + REG_OFFSET_NAME(link), + REG_OFFSET_NAME(xer), + REG_OFFSET_NAME(ccr), +#ifdef CONFIG_PPC64 + REG_OFFSET_NAME(softe), +#else + REG_OFFSET_NAME(mq), +#endif + REG_OFFSET_NAME(trap), + REG_OFFSET_NAME(dar), + REG_OFFSET_NAME(dsisr), + REG_OFFSET_END, +}; + +/** + * regs_query_register_offset() - query register offset from its name + * @name: the name of a register + * + * regs_query_register_offset() returns the offset of a register in struct + * pt_regs from its name. If the name is invalid, this returns -EINVAL; + */ +int regs_query_register_offset(const char *name) +{ + const struct pt_regs_offset *roff; + for (roff = regoffset_table; roff->name != NULL; roff++) + if (!strcmp(roff->name, name)) + return roff->offset; + return -EINVAL; +} + +/** + * regs_query_register_name() - query register name from its offset + * @offset: the offset of a register in struct pt_regs. + * + * regs_query_register_name() returns the name of a register from its + * offset in struct pt_regs. If the @offset is invalid, this returns NULL; + */ +const char *regs_query_register_name(unsigned int offset) +{ + const struct pt_regs_offset *roff; + for (roff = regoffset_table; roff->name != NULL; roff++) + if (roff->offset == offset) + return roff->name; + return NULL; +} + +/* * does not yet catch signals sent when the child dies. * in exit.c or in signal.c. */ @@ -46,7 +149,7 @@ /* * Set of msr bits that gdb can change on behalf of a process. */ -#if defined(CONFIG_40x) || defined(CONFIG_BOOKE) +#ifdef CONFIG_PPC_ADV_DEBUG_REGS #define MSR_DEBUGCHANGE 0 #else #define MSR_DEBUGCHANGE (MSR_SE | MSR_BE) @@ -703,7 +806,7 @@ void user_enable_single_step(struct task_struct *task) struct pt_regs *regs = task->thread.regs; if (regs != NULL) { -#if defined(CONFIG_40x) || defined(CONFIG_BOOKE) +#ifdef CONFIG_PPC_ADV_DEBUG_REGS task->thread.dbcr0 &= ~DBCR0_BT; task->thread.dbcr0 |= DBCR0_IDM | DBCR0_IC; regs->msr |= MSR_DE; @@ -720,7 +823,7 @@ void user_enable_block_step(struct task_struct *task) struct pt_regs *regs = task->thread.regs; if (regs != NULL) { -#if defined(CONFIG_40x) || defined(CONFIG_BOOKE) +#ifdef CONFIG_PPC_ADV_DEBUG_REGS task->thread.dbcr0 &= ~DBCR0_IC; task->thread.dbcr0 = DBCR0_IDM | DBCR0_BT; regs->msr |= MSR_DE; @@ -737,17 +840,25 @@ void user_disable_single_step(struct task_struct *task) struct pt_regs *regs = task->thread.regs; if (regs != NULL) { -#if defined(CONFIG_BOOKE) - /* If DAC don't clear DBCRO_IDM or MSR_DE */ - if (task->thread.dabr) - task->thread.dbcr0 &= ~(DBCR0_IC | DBCR0_BT); - else { - task->thread.dbcr0 &= ~(DBCR0_IC | DBCR0_BT | DBCR0_IDM); +#ifdef CONFIG_PPC_ADV_DEBUG_REGS + /* + * The logic to disable single stepping should be as + * simple as turning off the Instruction Complete flag. + * And, after doing so, if all debug flags are off, turn + * off DBCR0(IDM) and MSR(DE) .... Torez + */ + task->thread.dbcr0 &= ~DBCR0_IC; + /* + * Test to see if any of the DBCR_ACTIVE_EVENTS bits are set. + */ + if (!DBCR_ACTIVE_EVENTS(task->thread.dbcr0, + task->thread.dbcr1)) { + /* + * All debug events were off..... + */ + task->thread.dbcr0 &= ~DBCR0_IDM; regs->msr &= ~MSR_DE; } -#elif defined(CONFIG_40x) - task->thread.dbcr0 &= ~(DBCR0_IC | DBCR0_BT | DBCR0_IDM); - regs->msr &= ~MSR_DE; #else regs->msr &= ~(MSR_SE | MSR_BE); #endif @@ -769,8 +880,7 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, if ((data & ~0x7UL) >= TASK_SIZE) return -EIO; -#ifndef CONFIG_BOOKE - +#ifndef CONFIG_PPC_ADV_DEBUG_REGS /* For processors using DABR (i.e. 970), the bottom 3 bits are flags. * It was assumed, on previous implementations, that 3 bits were * passed together with the data address, fitting the design of the @@ -789,21 +899,22 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, /* Move contents to the DABR register */ task->thread.dabr = data; - -#endif -#if defined(CONFIG_BOOKE) - +#else /* CONFIG_PPC_ADV_DEBUG_REGS */ /* As described above, it was assumed 3 bits were passed with the data * address, but we will assume only the mode bits will be passed * as to not cause alignment restrictions for DAC-based processors. */ /* DAC's hold the whole address without any mode flags */ - task->thread.dabr = data & ~0x3UL; - - if (task->thread.dabr == 0) { - task->thread.dbcr0 &= ~(DBSR_DAC1R | DBSR_DAC1W | DBCR0_IDM); - task->thread.regs->msr &= ~MSR_DE; + task->thread.dac1 = data & ~0x3UL; + + if (task->thread.dac1 == 0) { + dbcr_dac(task) &= ~(DBCR_DAC1R | DBCR_DAC1W); + if (!DBCR_ACTIVE_EVENTS(task->thread.dbcr0, + task->thread.dbcr1)) { + task->thread.regs->msr &= ~MSR_DE; + task->thread.dbcr0 &= ~DBCR0_IDM; + } return 0; } @@ -814,17 +925,17 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, /* Set the Internal Debugging flag (IDM bit 1) for the DBCR0 register */ - task->thread.dbcr0 = DBCR0_IDM; + task->thread.dbcr0 |= DBCR0_IDM; /* Check for write and read flags and set DBCR0 accordingly */ + dbcr_dac(task) &= ~(DBCR_DAC1R|DBCR_DAC1W); if (data & 0x1UL) - task->thread.dbcr0 |= DBSR_DAC1R; + dbcr_dac(task) |= DBCR_DAC1R; if (data & 0x2UL) - task->thread.dbcr0 |= DBSR_DAC1W; - + dbcr_dac(task) |= DBCR_DAC1W; task->thread.regs->msr |= MSR_DE; -#endif +#endif /* CONFIG_PPC_ADV_DEBUG_REGS */ return 0; } @@ -839,6 +950,394 @@ void ptrace_disable(struct task_struct *child) user_disable_single_step(child); } +#ifdef CONFIG_PPC_ADV_DEBUG_REGS +static long set_intruction_bp(struct task_struct *child, + struct ppc_hw_breakpoint *bp_info) +{ + int slot; + int slot1_in_use = ((child->thread.dbcr0 & DBCR0_IAC1) != 0); + int slot2_in_use = ((child->thread.dbcr0 & DBCR0_IAC2) != 0); + int slot3_in_use = ((child->thread.dbcr0 & DBCR0_IAC3) != 0); + int slot4_in_use = ((child->thread.dbcr0 & DBCR0_IAC4) != 0); + + if (dbcr_iac_range(child) & DBCR_IAC12MODE) + slot2_in_use = 1; + if (dbcr_iac_range(child) & DBCR_IAC34MODE) + slot4_in_use = 1; + + if (bp_info->addr >= TASK_SIZE) + return -EIO; + + if (bp_info->addr_mode != PPC_BREAKPOINT_MODE_EXACT) { + + /* Make sure range is valid. */ + if (bp_info->addr2 >= TASK_SIZE) + return -EIO; + + /* We need a pair of IAC regsisters */ + if ((!slot1_in_use) && (!slot2_in_use)) { + slot = 1; + child->thread.iac1 = bp_info->addr; + child->thread.iac2 = bp_info->addr2; + child->thread.dbcr0 |= DBCR0_IAC1; + if (bp_info->addr_mode == + PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE) + dbcr_iac_range(child) |= DBCR_IAC12X; + else + dbcr_iac_range(child) |= DBCR_IAC12I; +#if CONFIG_PPC_ADV_DEBUG_IACS > 2 + } else if ((!slot3_in_use) && (!slot4_in_use)) { + slot = 3; + child->thread.iac3 = bp_info->addr; + child->thread.iac4 = bp_info->addr2; + child->thread.dbcr0 |= DBCR0_IAC3; + if (bp_info->addr_mode == + PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE) + dbcr_iac_range(child) |= DBCR_IAC34X; + else + dbcr_iac_range(child) |= DBCR_IAC34I; +#endif + } else + return -ENOSPC; + } else { + /* We only need one. If possible leave a pair free in + * case a range is needed later + */ + if (!slot1_in_use) { + /* + * Don't use iac1 if iac1-iac2 are free and either + * iac3 or iac4 (but not both) are free + */ + if (slot2_in_use || (slot3_in_use == slot4_in_use)) { + slot = 1; + child->thread.iac1 = bp_info->addr; + child->thread.dbcr0 |= DBCR0_IAC1; + goto out; + } + } + if (!slot2_in_use) { + slot = 2; + child->thread.iac2 = bp_info->addr; + child->thread.dbcr0 |= DBCR0_IAC2; +#if CONFIG_PPC_ADV_DEBUG_IACS > 2 + } else if (!slot3_in_use) { + slot = 3; + child->thread.iac3 = bp_info->addr; + child->thread.dbcr0 |= DBCR0_IAC3; + } else if (!slot4_in_use) { + slot = 4; + child->thread.iac4 = bp_info->addr; + child->thread.dbcr0 |= DBCR0_IAC4; +#endif + } else + return -ENOSPC; + } +out: + child->thread.dbcr0 |= DBCR0_IDM; + child->thread.regs->msr |= MSR_DE; + + return slot; +} + +static int del_instruction_bp(struct task_struct *child, int slot) +{ + switch (slot) { + case 1: + if ((child->thread.dbcr0 & DBCR0_IAC1) == 0) + return -ENOENT; + + if (dbcr_iac_range(child) & DBCR_IAC12MODE) { + /* address range - clear slots 1 & 2 */ + child->thread.iac2 = 0; + dbcr_iac_range(child) &= ~DBCR_IAC12MODE; + } + child->thread.iac1 = 0; + child->thread.dbcr0 &= ~DBCR0_IAC1; + break; + case 2: + if ((child->thread.dbcr0 & DBCR0_IAC2) == 0) + return -ENOENT; + + if (dbcr_iac_range(child) & DBCR_IAC12MODE) + /* used in a range */ + return -EINVAL; + child->thread.iac2 = 0; + child->thread.dbcr0 &= ~DBCR0_IAC2; + break; +#if CONFIG_PPC_ADV_DEBUG_IACS > 2 + case 3: + if ((child->thread.dbcr0 & DBCR0_IAC3) == 0) + return -ENOENT; + + if (dbcr_iac_range(child) & DBCR_IAC34MODE) { + /* address range - clear slots 3 & 4 */ + child->thread.iac4 = 0; + dbcr_iac_range(child) &= ~DBCR_IAC34MODE; + } + child->thread.iac3 = 0; + child->thread.dbcr0 &= ~DBCR0_IAC3; + break; + case 4: + if ((child->thread.dbcr0 & DBCR0_IAC4) == 0) + return -ENOENT; + + if (dbcr_iac_range(child) & DBCR_IAC34MODE) + /* Used in a range */ + return -EINVAL; + child->thread.iac4 = 0; + child->thread.dbcr0 &= ~DBCR0_IAC4; + break; +#endif + default: + return -EINVAL; + } + return 0; +} + +static int set_dac(struct task_struct *child, struct ppc_hw_breakpoint *bp_info) +{ + int byte_enable = + (bp_info->condition_mode >> PPC_BREAKPOINT_CONDITION_BE_SHIFT) + & 0xf; + int condition_mode = + bp_info->condition_mode & PPC_BREAKPOINT_CONDITION_MODE; + int slot; + + if (byte_enable && (condition_mode == 0)) + return -EINVAL; + + if (bp_info->addr >= TASK_SIZE) + return -EIO; + + if ((dbcr_dac(child) & (DBCR_DAC1R | DBCR_DAC1W)) == 0) { + slot = 1; + if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ) + dbcr_dac(child) |= DBCR_DAC1R; + if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE) + dbcr_dac(child) |= DBCR_DAC1W; + child->thread.dac1 = (unsigned long)bp_info->addr; +#if CONFIG_PPC_ADV_DEBUG_DVCS > 0 + if (byte_enable) { + child->thread.dvc1 = + (unsigned long)bp_info->condition_value; + child->thread.dbcr2 |= + ((byte_enable << DBCR2_DVC1BE_SHIFT) | + (condition_mode << DBCR2_DVC1M_SHIFT)); + } +#endif +#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE + } else if (child->thread.dbcr2 & DBCR2_DAC12MODE) { + /* Both dac1 and dac2 are part of a range */ + return -ENOSPC; +#endif + } else if ((dbcr_dac(child) & (DBCR_DAC2R | DBCR_DAC2W)) == 0) { + slot = 2; + if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ) + dbcr_dac(child) |= DBCR_DAC2R; + if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE) + dbcr_dac(child) |= DBCR_DAC2W; + child->thread.dac2 = (unsigned long)bp_info->addr; +#if CONFIG_PPC_ADV_DEBUG_DVCS > 0 + if (byte_enable) { + child->thread.dvc2 = + (unsigned long)bp_info->condition_value; + child->thread.dbcr2 |= + ((byte_enable << DBCR2_DVC2BE_SHIFT) | + (condition_mode << DBCR2_DVC2M_SHIFT)); + } +#endif + } else + return -ENOSPC; + child->thread.dbcr0 |= DBCR0_IDM; + child->thread.regs->msr |= MSR_DE; + + return slot + 4; +} + +static int del_dac(struct task_struct *child, int slot) +{ + if (slot == 1) { + if ((dbcr_dac(child) & (DBCR_DAC1R | DBCR_DAC1W)) == 0) + return -ENOENT; + + child->thread.dac1 = 0; + dbcr_dac(child) &= ~(DBCR_DAC1R | DBCR_DAC1W); +#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE + if (child->thread.dbcr2 & DBCR2_DAC12MODE) { + child->thread.dac2 = 0; + child->thread.dbcr2 &= ~DBCR2_DAC12MODE; + } + child->thread.dbcr2 &= ~(DBCR2_DVC1M | DBCR2_DVC1BE); +#endif +#if CONFIG_PPC_ADV_DEBUG_DVCS > 0 + child->thread.dvc1 = 0; +#endif + } else if (slot == 2) { + if ((dbcr_dac(child) & (DBCR_DAC2R | DBCR_DAC2W)) == 0) + return -ENOENT; + +#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE + if (child->thread.dbcr2 & DBCR2_DAC12MODE) + /* Part of a range */ + return -EINVAL; + child->thread.dbcr2 &= ~(DBCR2_DVC2M | DBCR2_DVC2BE); +#endif +#if CONFIG_PPC_ADV_DEBUG_DVCS > 0 + child->thread.dvc2 = 0; +#endif + child->thread.dac2 = 0; + dbcr_dac(child) &= ~(DBCR_DAC2R | DBCR_DAC2W); + } else + return -EINVAL; + + return 0; +} +#endif /* CONFIG_PPC_ADV_DEBUG_REGS */ + +#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE +static int set_dac_range(struct task_struct *child, + struct ppc_hw_breakpoint *bp_info) +{ + int mode = bp_info->addr_mode & PPC_BREAKPOINT_MODE_MASK; + + /* We don't allow range watchpoints to be used with DVC */ + if (bp_info->condition_mode) + return -EINVAL; + + /* + * Best effort to verify the address range. The user/supervisor bits + * prevent trapping in kernel space, but let's fail on an obvious bad + * range. The simple test on the mask is not fool-proof, and any + * exclusive range will spill over into kernel space. + */ + if (bp_info->addr >= TASK_SIZE) + return -EIO; + if (mode == PPC_BREAKPOINT_MODE_MASK) { + /* + * dac2 is a bitmask. Don't allow a mask that makes a + * kernel space address from a valid dac1 value + */ + if (~((unsigned long)bp_info->addr2) >= TASK_SIZE) + return -EIO; + } else { + /* + * For range breakpoints, addr2 must also be a valid address + */ + if (bp_info->addr2 >= TASK_SIZE) + return -EIO; + } + + if (child->thread.dbcr0 & + (DBCR0_DAC1R | DBCR0_DAC1W | DBCR0_DAC2R | DBCR0_DAC2W)) + return -ENOSPC; + + if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ) + child->thread.dbcr0 |= (DBCR0_DAC1R | DBCR0_IDM); + if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE) + child->thread.dbcr0 |= (DBCR0_DAC1W | DBCR0_IDM); + child->thread.dac1 = bp_info->addr; + child->thread.dac2 = bp_info->addr2; + if (mode == PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE) + child->thread.dbcr2 |= DBCR2_DAC12M; + else if (mode == PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE) + child->thread.dbcr2 |= DBCR2_DAC12MX; + else /* PPC_BREAKPOINT_MODE_MASK */ + child->thread.dbcr2 |= DBCR2_DAC12MM; + child->thread.regs->msr |= MSR_DE; + + return 5; +} +#endif /* CONFIG_PPC_ADV_DEBUG_DAC_RANGE */ + +static long ppc_set_hwdebug(struct task_struct *child, + struct ppc_hw_breakpoint *bp_info) +{ + if (bp_info->version != 1) + return -ENOTSUPP; +#ifdef CONFIG_PPC_ADV_DEBUG_REGS + /* + * Check for invalid flags and combinations + */ + if ((bp_info->trigger_type == 0) || + (bp_info->trigger_type & ~(PPC_BREAKPOINT_TRIGGER_EXECUTE | + PPC_BREAKPOINT_TRIGGER_RW)) || + (bp_info->addr_mode & ~PPC_BREAKPOINT_MODE_MASK) || + (bp_info->condition_mode & + ~(PPC_BREAKPOINT_CONDITION_MODE | + PPC_BREAKPOINT_CONDITION_BE_ALL))) + return -EINVAL; +#if CONFIG_PPC_ADV_DEBUG_DVCS == 0 + if (bp_info->condition_mode != PPC_BREAKPOINT_CONDITION_NONE) + return -EINVAL; +#endif + + if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_EXECUTE) { + if ((bp_info->trigger_type != PPC_BREAKPOINT_TRIGGER_EXECUTE) || + (bp_info->condition_mode != PPC_BREAKPOINT_CONDITION_NONE)) + return -EINVAL; + return set_intruction_bp(child, bp_info); + } + if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_EXACT) + return set_dac(child, bp_info); + +#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE + return set_dac_range(child, bp_info); +#else + return -EINVAL; +#endif +#else /* !CONFIG_PPC_ADV_DEBUG_DVCS */ + /* + * We only support one data breakpoint + */ + if (((bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_RW) == 0) || + ((bp_info->trigger_type & ~PPC_BREAKPOINT_TRIGGER_RW) != 0) || + (bp_info->trigger_type != PPC_BREAKPOINT_TRIGGER_WRITE) || + (bp_info->addr_mode != PPC_BREAKPOINT_MODE_EXACT) || + (bp_info->condition_mode != PPC_BREAKPOINT_CONDITION_NONE)) + return -EINVAL; + + if (child->thread.dabr) + return -ENOSPC; + + if ((unsigned long)bp_info->addr >= TASK_SIZE) + return -EIO; + + child->thread.dabr = (unsigned long)bp_info->addr; + + return 1; +#endif /* !CONFIG_PPC_ADV_DEBUG_DVCS */ +} + +static long ppc_del_hwdebug(struct task_struct *child, long addr, long data) +{ +#ifdef CONFIG_PPC_ADV_DEBUG_REGS + int rc; + + if (data <= 4) + rc = del_instruction_bp(child, (int)data); + else + rc = del_dac(child, (int)data - 4); + + if (!rc) { + if (!DBCR_ACTIVE_EVENTS(child->thread.dbcr0, + child->thread.dbcr1)) { + child->thread.dbcr0 &= ~DBCR0_IDM; + child->thread.regs->msr &= ~MSR_DE; + } + } + return rc; +#else + if (data != 1) + return -EINVAL; + if (child->thread.dabr == 0) + return -ENOENT; + + child->thread.dabr = 0; + + return 0; +#endif +} + /* * Here are the old "legacy" powerpc specific getregs/setregs ptrace calls, * we mark them as obsolete now, they will be removed in a future version @@ -932,13 +1431,77 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) break; } + case PPC_PTRACE_GETHWDBGINFO: { + struct ppc_debug_info dbginfo; + + dbginfo.version = 1; +#ifdef CONFIG_PPC_ADV_DEBUG_REGS + dbginfo.num_instruction_bps = CONFIG_PPC_ADV_DEBUG_IACS; + dbginfo.num_data_bps = CONFIG_PPC_ADV_DEBUG_DACS; + dbginfo.num_condition_regs = CONFIG_PPC_ADV_DEBUG_DVCS; + dbginfo.data_bp_alignment = 4; + dbginfo.sizeof_condition = 4; + dbginfo.features = PPC_DEBUG_FEATURE_INSN_BP_RANGE | + PPC_DEBUG_FEATURE_INSN_BP_MASK; +#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE + dbginfo.features |= + PPC_DEBUG_FEATURE_DATA_BP_RANGE | + PPC_DEBUG_FEATURE_DATA_BP_MASK; +#endif +#else /* !CONFIG_PPC_ADV_DEBUG_REGS */ + dbginfo.num_instruction_bps = 0; + dbginfo.num_data_bps = 1; + dbginfo.num_condition_regs = 0; +#ifdef CONFIG_PPC64 + dbginfo.data_bp_alignment = 8; +#else + dbginfo.data_bp_alignment = 4; +#endif + dbginfo.sizeof_condition = 0; + dbginfo.features = 0; +#endif /* CONFIG_PPC_ADV_DEBUG_REGS */ + + if (!access_ok(VERIFY_WRITE, data, + sizeof(struct ppc_debug_info))) + return -EFAULT; + ret = __copy_to_user((struct ppc_debug_info __user *)data, + &dbginfo, sizeof(struct ppc_debug_info)) ? + -EFAULT : 0; + break; + } + + case PPC_PTRACE_SETHWDEBUG: { + struct ppc_hw_breakpoint bp_info; + + if (!access_ok(VERIFY_READ, data, + sizeof(struct ppc_hw_breakpoint))) + return -EFAULT; + ret = __copy_from_user(&bp_info, + (struct ppc_hw_breakpoint __user *)data, + sizeof(struct ppc_hw_breakpoint)) ? + -EFAULT : 0; + if (!ret) + ret = ppc_set_hwdebug(child, &bp_info); + break; + } + + case PPC_PTRACE_DELHWDEBUG: { + ret = ppc_del_hwdebug(child, addr, data); + break; + } + case PTRACE_GET_DEBUGREG: { ret = -EINVAL; /* We only support one DABR and no IABRS at the moment */ if (addr > 0) break; +#ifdef CONFIG_PPC_ADV_DEBUG_REGS + ret = put_user(child->thread.dac1, + (unsigned long __user *)data); +#else ret = put_user(child->thread.dabr, (unsigned long __user *)data); +#endif break; } diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index fd0d29493fd6..d0516dbee762 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -22,7 +22,8 @@ #include <linux/smp.h> #include <linux/completion.h> #include <linux/cpumask.h> -#include <linux/lmb.h> +#include <linux/memblock.h> +#include <linux/slab.h> #include <asm/prom.h> #include <asm/rtas.h> @@ -690,10 +691,14 @@ void rtas_os_term(char *str) { int status; - if (panic_timeout) - return; - - if (RTAS_UNKNOWN_SERVICE == rtas_token("ibm,os-term")) + /* + * Firmware with the ibm,extended-os-term property is guaranteed + * to always return from an ibm,os-term call. Earlier versions without + * this property may terminate the partition which we want to avoid + * since it interferes with panic_timeout. + */ + if (RTAS_UNKNOWN_SERVICE == rtas_token("ibm,os-term") || + RTAS_UNKNOWN_SERVICE == rtas_token("ibm,extended-os-term")) return; snprintf(rtas_os_term_buf, 2048, "OS panic: %s", str); @@ -704,8 +709,7 @@ void rtas_os_term(char *str) } while (rtas_busy_delay(status)); if (status != 0) - printk(KERN_EMERG "ibm,os-term call failed %d\n", - status); + printk(KERN_EMERG "ibm,os-term call failed %d\n", status); } static int ibm_suspend_me_token = RTAS_UNKNOWN_SERVICE; @@ -930,11 +934,11 @@ void __init rtas_initialize(void) */ #ifdef CONFIG_PPC64 if (machine_is(pseries) && firmware_has_feature(FW_FEATURE_LPAR)) { - rtas_region = min(lmb.rmo_size, RTAS_INSTANTIATE_MAX); + rtas_region = min(memblock.rmo_size, RTAS_INSTANTIATE_MAX); ibm_suspend_me_token = rtas_token("ibm,suspend-me"); } #endif - rtas_rmo_buf = lmb_alloc_base(RTAS_RMOBUF_MAX, PAGE_SIZE, rtas_region); + rtas_rmo_buf = memblock_alloc_base(RTAS_RMOBUF_MAX, PAGE_SIZE, rtas_region); #ifdef CONFIG_RTAS_ERROR_LOGGING rtas_last_error_token = rtas_token("rtas-last-error"); diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c index a85117d5c9a4..67a84d8f118d 100644 --- a/arch/powerpc/kernel/rtas_flash.c +++ b/arch/powerpc/kernel/rtas_flash.c @@ -15,6 +15,7 @@ #include <linux/module.h> #include <linux/init.h> +#include <linux/slab.h> #include <linux/proc_fs.h> #include <asm/delay.h> #include <asm/uaccess.h> @@ -93,12 +94,8 @@ struct flash_block_list { struct flash_block_list *next; struct flash_block blocks[FLASH_BLOCKS_PER_NODE]; }; -struct flash_block_list_header { /* just the header of flash_block_list */ - unsigned long num_blocks; - struct flash_block_list *next; -}; -static struct flash_block_list_header rtas_firmware_flash_list = {0, NULL}; +static struct flash_block_list *rtas_firmware_flash_list; /* Use slab cache to guarantee 4k alignment */ static struct kmem_cache *flash_block_cache = NULL; @@ -107,13 +104,14 @@ static struct kmem_cache *flash_block_cache = NULL; /* Local copy of the flash block list. * We only allow one open of the flash proc file and create this - * list as we go. This list will be put in the - * rtas_firmware_flash_list var once it is fully read. + * list as we go. The rtas_firmware_flash_list varable will be + * set once the data is fully read. * * For convenience as we build the list we use virtual addrs, * we do not fill in the version number, and the length field * is treated as the number of entries currently in the block - * (i.e. not a byte count). This is all fixed on release. + * (i.e. not a byte count). This is all fixed when calling + * the flash routine. */ /* Status int must be first member of struct */ @@ -200,16 +198,16 @@ static int rtas_flash_release(struct inode *inode, struct file *file) if (uf->flist) { /* File was opened in write mode for a new flash attempt */ /* Clear saved list */ - if (rtas_firmware_flash_list.next) { - free_flash_list(rtas_firmware_flash_list.next); - rtas_firmware_flash_list.next = NULL; + if (rtas_firmware_flash_list) { + free_flash_list(rtas_firmware_flash_list); + rtas_firmware_flash_list = NULL; } if (uf->status != FLASH_AUTH) uf->status = flash_list_valid(uf->flist); if (uf->status == FLASH_IMG_READY) - rtas_firmware_flash_list.next = uf->flist; + rtas_firmware_flash_list = uf->flist; else free_flash_list(uf->flist); @@ -592,7 +590,7 @@ static void rtas_flash_firmware(int reboot_type) unsigned long rtas_block_list; int i, status, update_token; - if (rtas_firmware_flash_list.next == NULL) + if (rtas_firmware_flash_list == NULL) return; /* nothing to do */ if (reboot_type != SYS_RESTART) { @@ -609,20 +607,25 @@ static void rtas_flash_firmware(int reboot_type) return; } - /* NOTE: the "first" block list is a global var with no data - * blocks in the kernel data segment. We do this because - * we want to ensure this block_list addr is under 4GB. + /* + * NOTE: the "first" block must be under 4GB, so we create + * an entry with no data blocks in the reserved buffer in + * the kernel data segment. */ - rtas_firmware_flash_list.num_blocks = 0; - flist = (struct flash_block_list *)&rtas_firmware_flash_list; + spin_lock(&rtas_data_buf_lock); + flist = (struct flash_block_list *)&rtas_data_buf[0]; + flist->num_blocks = 0; + flist->next = rtas_firmware_flash_list; rtas_block_list = virt_to_abs(flist); if (rtas_block_list >= 4UL*1024*1024*1024) { printk(KERN_ALERT "FLASH: kernel bug...flash list header addr above 4GB\n"); + spin_unlock(&rtas_data_buf_lock); return; } printk(KERN_ALERT "FLASH: preparing saved firmware image for flash\n"); /* Update the block_list in place. */ + rtas_firmware_flash_list = NULL; /* too hard to backout on error */ image_size = 0; for (f = flist; f; f = next) { /* Translate data addrs to absolute */ @@ -663,6 +666,7 @@ static void rtas_flash_firmware(int reboot_type) printk(KERN_ALERT "FLASH: unknown flash return code %d\n", status); break; } + spin_unlock(&rtas_data_buf_lock); } static void remove_flash_pde(struct proc_dir_entry *dp) diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c index 2e4832ab2108..638883e23e3a 100644 --- a/arch/powerpc/kernel/rtasd.c +++ b/arch/powerpc/kernel/rtasd.c @@ -20,6 +20,7 @@ #include <linux/spinlock.h> #include <linux/cpu.h> #include <linux/workqueue.h> +#include <linux/slab.h> #include <asm/uaccess.h> #include <asm/io.h> @@ -410,9 +411,9 @@ static void rtas_event_scan(struct work_struct *w) get_online_cpus(); - cpu = next_cpu(smp_processor_id(), cpu_online_map); - if (cpu == NR_CPUS) { - cpu = first_cpu(cpu_online_map); + cpu = cpumask_next(smp_processor_id(), cpu_online_mask); + if (cpu >= nr_cpu_ids) { + cpu = cpumask_first(cpu_online_mask); if (first_pass) { first_pass = 0; @@ -465,8 +466,8 @@ static void start_event_scan(void) /* Retreive errors from nvram if any */ retreive_nvram_error_log(); - schedule_delayed_work_on(first_cpu(cpu_online_map), &event_scan_work, - event_scan_delay); + schedule_delayed_work_on(cpumask_first(cpu_online_mask), + &event_scan_work, event_scan_delay); } static int __init rtas_init(void) @@ -489,6 +490,12 @@ static int __init rtas_init(void) return -ENODEV; } + if (!rtas_event_scan_rate) { + /* Broken firmware: take a rate of zero to mean don't scan */ + printk(KERN_DEBUG "rtasd: scan rate is 0, not scanning\n"); + return 0; + } + /* Make room for the sequence number */ rtas_error_log_max = rtas_get_error_log_max(); rtas_error_log_buffer_max = rtas_error_log_max + sizeof(int); diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 03dd6a248198..b7e6c7e193ae 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -33,9 +33,10 @@ #include <linux/serial_8250.h> #include <linux/debugfs.h> #include <linux/percpu.h> -#include <linux/lmb.h> +#include <linux/memblock.h> #include <linux/of_platform.h> #include <asm/io.h> +#include <asm/paca.h> #include <asm/prom.h> #include <asm/processor.h> #include <asm/vdso_datapage.h> @@ -160,45 +161,44 @@ extern u32 cpu_temp_both(unsigned long cpu); DEFINE_PER_CPU(unsigned int, cpu_pvr); #endif -static int show_cpuinfo(struct seq_file *m, void *v) +static void show_cpuinfo_summary(struct seq_file *m) { - unsigned long cpu_id = (unsigned long)v - 1; - unsigned int pvr; - unsigned short maj; - unsigned short min; - - if (cpu_id == NR_CPUS) { - struct device_node *root; - const char *model = NULL; + struct device_node *root; + const char *model = NULL; #if defined(CONFIG_SMP) && defined(CONFIG_PPC32) - unsigned long bogosum = 0; - int i; - for_each_online_cpu(i) - bogosum += loops_per_jiffy; - seq_printf(m, "total bogomips\t: %lu.%02lu\n", - bogosum/(500000/HZ), bogosum/(5000/HZ) % 100); + unsigned long bogosum = 0; + int i; + for_each_online_cpu(i) + bogosum += loops_per_jiffy; + seq_printf(m, "total bogomips\t: %lu.%02lu\n", + bogosum/(500000/HZ), bogosum/(5000/HZ) % 100); #endif /* CONFIG_SMP && CONFIG_PPC32 */ - seq_printf(m, "timebase\t: %lu\n", ppc_tb_freq); - if (ppc_md.name) - seq_printf(m, "platform\t: %s\n", ppc_md.name); - root = of_find_node_by_path("/"); - if (root) - model = of_get_property(root, "model", NULL); - if (model) - seq_printf(m, "model\t\t: %s\n", model); - of_node_put(root); - - if (ppc_md.show_cpuinfo != NULL) - ppc_md.show_cpuinfo(m); + seq_printf(m, "timebase\t: %lu\n", ppc_tb_freq); + if (ppc_md.name) + seq_printf(m, "platform\t: %s\n", ppc_md.name); + root = of_find_node_by_path("/"); + if (root) + model = of_get_property(root, "model", NULL); + if (model) + seq_printf(m, "model\t\t: %s\n", model); + of_node_put(root); + + if (ppc_md.show_cpuinfo != NULL) + ppc_md.show_cpuinfo(m); #ifdef CONFIG_PPC32 - /* Display the amount of memory */ - seq_printf(m, "Memory\t\t: %d MB\n", - (unsigned int)(total_memory / (1024 * 1024))); + /* Display the amount of memory */ + seq_printf(m, "Memory\t\t: %d MB\n", + (unsigned int)(total_memory / (1024 * 1024))); #endif +} - return 0; - } +static int show_cpuinfo(struct seq_file *m, void *v) +{ + unsigned long cpu_id = (unsigned long)v - 1; + unsigned int pvr; + unsigned short maj; + unsigned short min; /* We only show online cpus: disable preempt (overzealous, I * knew) to prevent cpu going down. */ @@ -307,19 +307,28 @@ static int show_cpuinfo(struct seq_file *m, void *v) #endif preempt_enable(); + + /* If this is the last cpu, print the summary */ + if (cpumask_next(cpu_id, cpu_online_mask) >= nr_cpu_ids) + show_cpuinfo_summary(m); + return 0; } static void *c_start(struct seq_file *m, loff_t *pos) { - unsigned long i = *pos; - - return i <= NR_CPUS ? (void *)(i + 1) : NULL; + if (*pos == 0) /* just in case, cpu 0 is not the first */ + *pos = cpumask_first(cpu_online_mask); + else + *pos = cpumask_next(*pos - 1, cpu_online_mask); + if ((*pos) < nr_cpu_ids) + return (void *)(unsigned long)(*pos + 1); + return NULL; } static void *c_next(struct seq_file *m, void *v, loff_t *pos) { - ++*pos; + (*pos)++; return c_start(m, pos); } @@ -385,14 +394,14 @@ static void __init cpu_init_thread_core_maps(int tpc) /** * setup_cpu_maps - initialize the following cpu maps: - * cpu_possible_map - * cpu_present_map + * cpu_possible_mask + * cpu_present_mask * * Having the possible map set up early allows us to restrict allocations * of things like irqstacks to num_possible_cpus() rather than NR_CPUS. * * We do not initialize the online map here; cpus set their own bits in - * cpu_online_map as they come up. + * cpu_online_mask as they come up. * * This function is valid only for Open Firmware systems. finish_device_tree * must be called before using this. @@ -493,6 +502,8 @@ void __init smp_setup_cpu_maps(void) * here will have to be reworked */ cpu_init_thread_core_maps(nthreads); + + free_unused_pacas(); } #endif /* CONFIG_SMP */ diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index b152de3e64d4..a10ffc85ada7 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -16,7 +16,7 @@ #include <linux/root_dev.h> #include <linux/cpu.h> #include <linux/console.h> -#include <linux/lmb.h> +#include <linux/memblock.h> #include <asm/io.h> #include <asm/prom.h> @@ -39,7 +39,6 @@ #include <asm/serial.h> #include <asm/udbg.h> #include <asm/mmu_context.h> -#include <asm/swiotlb.h> #include "setup.h" @@ -242,23 +241,19 @@ int __init ppc_init(void) arch_initcall(ppc_init); -#ifdef CONFIG_IRQSTACKS static void __init irqstack_early_init(void) { unsigned int i; /* interrupt stacks must be in lowmem, we get that for free on ppc32 - * as the lmb is limited to lowmem by LMB_REAL_LIMIT */ + * as the memblock is limited to lowmem by MEMBLOCK_REAL_LIMIT */ for_each_possible_cpu(i) { softirq_ctx[i] = (struct thread_info *) - __va(lmb_alloc(THREAD_SIZE, THREAD_SIZE)); + __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE)); hardirq_ctx[i] = (struct thread_info *) - __va(lmb_alloc(THREAD_SIZE, THREAD_SIZE)); + __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE)); } } -#else -#define irqstack_early_init() -#endif #if defined(CONFIG_BOOKE) || defined(CONFIG_40x) static void __init exc_lvl_early_init(void) @@ -266,15 +261,15 @@ static void __init exc_lvl_early_init(void) unsigned int i; /* interrupt stacks must be in lowmem, we get that for free on ppc32 - * as the lmb is limited to lowmem by LMB_REAL_LIMIT */ + * as the memblock is limited to lowmem by MEMBLOCK_REAL_LIMIT */ for_each_possible_cpu(i) { critirq_ctx[i] = (struct thread_info *) - __va(lmb_alloc(THREAD_SIZE, THREAD_SIZE)); + __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE)); #ifdef CONFIG_BOOKE dbgirq_ctx[i] = (struct thread_info *) - __va(lmb_alloc(THREAD_SIZE, THREAD_SIZE)); + __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE)); mcheckirq_ctx[i] = (struct thread_info *) - __va(lmb_alloc(THREAD_SIZE, THREAD_SIZE)); + __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE)); #endif } } @@ -343,11 +338,6 @@ void __init setup_arch(char **cmdline_p) ppc_md.setup_arch(); if ( ppc_md.progress ) ppc_md.progress("arch: exit", 0x3eab); -#ifdef CONFIG_SWIOTLB - if (ppc_swiotlb_enable) - swiotlb_init(1); -#endif - paging_init(); /* Initialize the MMU context management stuff */ diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 6568406b2a30..d135f93cb0f6 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -34,7 +34,7 @@ #include <linux/bootmem.h> #include <linux/pci.h> #include <linux/lockdep.h> -#include <linux/lmb.h> +#include <linux/memblock.h> #include <asm/io.h> #include <asm/kdump.h> #include <asm/prom.h> @@ -61,7 +61,6 @@ #include <asm/xmon.h> #include <asm/udbg.h> #include <asm/kexec.h> -#include <asm/swiotlb.h> #include <asm/mmu_context.h> #include "setup.h" @@ -144,9 +143,9 @@ early_param("smt-enabled", early_smt_enabled); #endif /* CONFIG_SMP */ /* Put the paca pointer into r13 and SPRG_PACA */ -void __init setup_paca(int cpu) +static void __init setup_paca(struct paca_struct *new_paca) { - local_paca = &paca[cpu]; + local_paca = new_paca; mtspr(SPRN_SPRG_PACA, local_paca); #ifdef CONFIG_PPC_BOOK3E mtspr(SPRN_SPRG_TLB_EXFRAME, local_paca->extlb); @@ -159,7 +158,7 @@ void __init setup_paca(int cpu) * the CPU that ignores the top 2 bits of the address in real * mode so we can access kernel globals normally provided we * only toy with things in the RMO region. From here, we do - * some early parsing of the device-tree to setup out LMB + * some early parsing of the device-tree to setup out MEMBLOCK * data structures, and allocate & initialize the hash table * and segment tables so we can start running with translation * enabled. @@ -176,14 +175,12 @@ void __init early_setup(unsigned long dt_ptr) { /* -------- printk is _NOT_ safe to use here ! ------- */ - /* Fill in any unititialised pacas */ - initialise_pacas(); - /* Identify CPU type */ identify_cpu(0, mfspr(SPRN_PVR)); /* Assume we're on cpu 0 for now. Don't write to the paca yet! */ - setup_paca(0); + initialise_paca(&boot_paca, 0); + setup_paca(&boot_paca); /* Initialize lockdep early or else spinlocks will blow */ lockdep_init(); @@ -203,7 +200,7 @@ void __init early_setup(unsigned long dt_ptr) early_init_devtree(__va(dt_ptr)); /* Now we know the logical id of our boot cpu, setup the paca. */ - setup_paca(boot_cpuid); + setup_paca(&paca[boot_cpuid]); /* Fix up paca fields required for the boot cpu */ get_paca()->cpu_start = 1; @@ -407,7 +404,7 @@ void __init setup_system(void) printk("-----------------------------------------------------\n"); printk("ppc64_pft_size = 0x%llx\n", ppc64_pft_size); - printk("physicalMemorySize = 0x%llx\n", lmb_phys_mem_size()); + printk("physicalMemorySize = 0x%llx\n", memblock_phys_mem_size()); if (ppc64_caches.dline_size != 0x80) printk("ppc64_caches.dcache_line_size = 0x%x\n", ppc64_caches.dline_size); @@ -427,9 +424,17 @@ void __init setup_system(void) DBG(" <- setup_system()\n"); } -#ifdef CONFIG_IRQSTACKS +static u64 slb0_limit(void) +{ + if (cpu_has_feature(CPU_FTR_1T_SEGMENT)) { + return 1UL << SID_SHIFT_1T; + } + return 1UL << SID_SHIFT; +} + static void __init irqstack_early_init(void) { + u64 limit = slb0_limit(); unsigned int i; /* @@ -438,16 +443,13 @@ static void __init irqstack_early_init(void) */ for_each_possible_cpu(i) { softirq_ctx[i] = (struct thread_info *) - __va(lmb_alloc_base(THREAD_SIZE, - THREAD_SIZE, 0x10000000)); + __va(memblock_alloc_base(THREAD_SIZE, + THREAD_SIZE, limit)); hardirq_ctx[i] = (struct thread_info *) - __va(lmb_alloc_base(THREAD_SIZE, - THREAD_SIZE, 0x10000000)); + __va(memblock_alloc_base(THREAD_SIZE, + THREAD_SIZE, limit)); } } -#else -#define irqstack_early_init() -#endif #ifdef CONFIG_PPC_BOOK3E static void __init exc_lvl_early_init(void) @@ -456,11 +458,11 @@ static void __init exc_lvl_early_init(void) for_each_possible_cpu(i) { critirq_ctx[i] = (struct thread_info *) - __va(lmb_alloc(THREAD_SIZE, THREAD_SIZE)); + __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE)); dbgirq_ctx[i] = (struct thread_info *) - __va(lmb_alloc(THREAD_SIZE, THREAD_SIZE)); + __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE)); mcheckirq_ctx[i] = (struct thread_info *) - __va(lmb_alloc(THREAD_SIZE, THREAD_SIZE)); + __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE)); } } #else @@ -473,7 +475,7 @@ static void __init exc_lvl_early_init(void) */ static void __init emergency_stack_init(void) { - unsigned long limit; + u64 limit; unsigned int i; /* @@ -485,11 +487,11 @@ static void __init emergency_stack_init(void) * bringup, we need to get at them in real mode. This means they * must also be within the RMO region. */ - limit = min(0x10000000ULL, lmb.rmo_size); + limit = min(slb0_limit(), memblock.rmo_size); for_each_possible_cpu(i) { unsigned long sp; - sp = lmb_alloc_base(THREAD_SIZE, THREAD_SIZE, limit); + sp = memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit); sp += THREAD_SIZE; paca[i].emergency_sp = __va(sp); } @@ -543,11 +545,6 @@ void __init setup_arch(char **cmdline_p) if (ppc_md.setup_arch) ppc_md.setup_arch(); -#ifdef CONFIG_SWIOTLB - if (ppc_swiotlb_enable) - swiotlb_init(1); -#endif - paging_init(); /* Initialize the MMU context management stuff */ @@ -581,12 +578,6 @@ void ppc64_boot_msg(unsigned int src, const char *msg) printk("[boot]%04x %s\n", src, msg); } -void cpu_die(void) -{ - if (ppc_md.cpu_die) - ppc_md.cpu_die(); -} - #ifdef CONFIG_SMP #define PCPU_DYN_SIZE () diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index 00b5078da9a3..a0afb555a7c9 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -140,17 +140,15 @@ static int do_signal_pending(sigset_t *oldset, struct pt_regs *regs) return 0; /* no signals delivered */ } +#ifndef CONFIG_PPC_ADV_DEBUG_REGS /* * Reenable the DABR before delivering the signal to * user space. The DABR will have been cleared if it * triggered inside the kernel. */ - if (current->thread.dabr) { + if (current->thread.dabr) set_dabr(current->thread.dabr); -#if defined(CONFIG_BOOKE) - mtspr(SPRN_DBCR0, current->thread.dbcr0); #endif - } if (is32) { if (ka.sa.sa_flags & SA_SIGINFO) diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index d670429a1608..266610119f66 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -1078,7 +1078,7 @@ int sys_debug_setcontext(struct ucontext __user *ctx, int i; unsigned char tmp; unsigned long new_msr = regs->msr; -#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) +#ifdef CONFIG_PPC_ADV_DEBUG_REGS unsigned long new_dbcr0 = current->thread.dbcr0; #endif @@ -1087,13 +1087,17 @@ int sys_debug_setcontext(struct ucontext __user *ctx, return -EFAULT; switch (op.dbg_type) { case SIG_DBG_SINGLE_STEPPING: -#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) +#ifdef CONFIG_PPC_ADV_DEBUG_REGS if (op.dbg_value) { new_msr |= MSR_DE; new_dbcr0 |= (DBCR0_IDM | DBCR0_IC); } else { - new_msr &= ~MSR_DE; - new_dbcr0 &= ~(DBCR0_IDM | DBCR0_IC); + new_dbcr0 &= ~DBCR0_IC; + if (!DBCR_ACTIVE_EVENTS(new_dbcr0, + current->thread.dbcr1)) { + new_msr &= ~MSR_DE; + new_dbcr0 &= ~DBCR0_IDM; + } } #else if (op.dbg_value) @@ -1103,7 +1107,7 @@ int sys_debug_setcontext(struct ucontext __user *ctx, #endif break; case SIG_DBG_BRANCH_TRACING: -#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) +#ifdef CONFIG_PPC_ADV_DEBUG_REGS return -EINVAL; #else if (op.dbg_value) @@ -1124,7 +1128,7 @@ int sys_debug_setcontext(struct ucontext __user *ctx, failure is a problem, anyway, and it's very unlikely unless the user is really doing something wrong. */ regs->msr = new_msr; -#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) +#ifdef CONFIG_PPC_ADV_DEBUG_REGS current->thread.dbcr0 = new_dbcr0; #endif diff --git a/arch/powerpc/kernel/smp-tbsync.c b/arch/powerpc/kernel/smp-tbsync.c index a5e54526403d..03e45c4a9ef1 100644 --- a/arch/powerpc/kernel/smp-tbsync.c +++ b/arch/powerpc/kernel/smp-tbsync.c @@ -10,6 +10,7 @@ #include <linux/smp.h> #include <linux/unistd.h> #include <linux/init.h> +#include <linux/slab.h> #include <asm/atomic.h> #include <asm/smp.h> #include <asm/time.h> diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index c2ee14498077..5c196d1086d9 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -59,8 +59,8 @@ struct thread_info *secondary_ti; -DEFINE_PER_CPU(cpumask_t, cpu_sibling_map) = CPU_MASK_NONE; -DEFINE_PER_CPU(cpumask_t, cpu_core_map) = CPU_MASK_NONE; +DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map); +DEFINE_PER_CPU(cpumask_var_t, cpu_core_map); EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); EXPORT_PER_CPU_SYMBOL(cpu_core_map); @@ -271,6 +271,16 @@ void __init smp_prepare_cpus(unsigned int max_cpus) smp_store_cpu_info(boot_cpuid); cpu_callin_map[boot_cpuid] = 1; + for_each_possible_cpu(cpu) { + zalloc_cpumask_var_node(&per_cpu(cpu_sibling_map, cpu), + GFP_KERNEL, cpu_to_node(cpu)); + zalloc_cpumask_var_node(&per_cpu(cpu_core_map, cpu), + GFP_KERNEL, cpu_to_node(cpu)); + } + + cpumask_set_cpu(boot_cpuid, cpu_sibling_mask(boot_cpuid)); + cpumask_set_cpu(boot_cpuid, cpu_core_mask(boot_cpuid)); + if (smp_ops) if (smp_ops->probe) max_cpus = smp_ops->probe(); @@ -289,10 +299,6 @@ void __init smp_prepare_cpus(unsigned int max_cpus) void __devinit smp_prepare_boot_cpu(void) { BUG_ON(smp_processor_id() != boot_cpuid); - - set_cpu_online(boot_cpuid, true); - cpu_set(boot_cpuid, per_cpu(cpu_sibling_map, boot_cpuid)); - cpu_set(boot_cpuid, per_cpu(cpu_core_map, boot_cpuid)); #ifdef CONFIG_PPC64 paca[boot_cpuid].__current = current; #endif @@ -313,7 +319,7 @@ int generic_cpu_disable(void) set_cpu_online(cpu, false); #ifdef CONFIG_PPC64 vdso_data->processorCount--; - fixup_irqs(cpu_online_map); + fixup_irqs(cpu_online_mask); #endif return 0; } @@ -333,7 +339,7 @@ int generic_cpu_enable(unsigned int cpu) cpu_relax(); #ifdef CONFIG_PPC64 - fixup_irqs(cpu_online_map); + fixup_irqs(cpu_online_mask); /* counter the irq disable in fixup_irqs */ local_irq_enable(); #endif @@ -462,7 +468,7 @@ out: return id; } -/* Must be called when no change can occur to cpu_present_map, +/* Must be called when no change can occur to cpu_present_mask, * i.e. during cpu online or offline. */ static struct device_node *cpu_to_l2cache(int cpu) @@ -495,6 +501,14 @@ int __devinit start_secondary(void *unused) current->active_mm = &init_mm; smp_store_cpu_info(cpu); + +#if defined(CONFIG_BOOKE) || defined(CONFIG_40x) + /* Clear any pending timer interrupts */ + mtspr(SPRN_TSR, TSR_ENW | TSR_WIS | TSR_DIS | TSR_FIS); + + /* Enable decrementer interrupt */ + mtspr(SPRN_TCR, TCR_DIE); +#endif set_dec(tb_ticks_per_jiffy); preempt_disable(); cpu_callin_map[cpu] = 1; @@ -517,15 +531,15 @@ int __devinit start_secondary(void *unused) for (i = 0; i < threads_per_core; i++) { if (cpu_is_offline(base + i)) continue; - cpu_set(cpu, per_cpu(cpu_sibling_map, base + i)); - cpu_set(base + i, per_cpu(cpu_sibling_map, cpu)); + cpumask_set_cpu(cpu, cpu_sibling_mask(base + i)); + cpumask_set_cpu(base + i, cpu_sibling_mask(cpu)); /* cpu_core_map should be a superset of * cpu_sibling_map even if we don't have cache * information, so update the former here, too. */ - cpu_set(cpu, per_cpu(cpu_core_map, base +i)); - cpu_set(base + i, per_cpu(cpu_core_map, cpu)); + cpumask_set_cpu(cpu, cpu_core_mask(base + i)); + cpumask_set_cpu(base + i, cpu_core_mask(cpu)); } l2_cache = cpu_to_l2cache(cpu); for_each_online_cpu(i) { @@ -533,8 +547,8 @@ int __devinit start_secondary(void *unused) if (!np) continue; if (np == l2_cache) { - cpu_set(cpu, per_cpu(cpu_core_map, i)); - cpu_set(i, per_cpu(cpu_core_map, cpu)); + cpumask_set_cpu(cpu, cpu_core_mask(i)); + cpumask_set_cpu(i, cpu_core_mask(cpu)); } of_node_put(np); } @@ -554,19 +568,22 @@ int setup_profiling_timer(unsigned int multiplier) void __init smp_cpus_done(unsigned int max_cpus) { - cpumask_t old_mask; + cpumask_var_t old_mask; /* We want the setup_cpu() here to be called from CPU 0, but our * init thread may have been "borrowed" by another CPU in the meantime * se we pin us down to CPU 0 for a short while */ - old_mask = current->cpus_allowed; - set_cpus_allowed(current, cpumask_of_cpu(boot_cpuid)); + alloc_cpumask_var(&old_mask, GFP_NOWAIT); + cpumask_copy(old_mask, ¤t->cpus_allowed); + set_cpus_allowed_ptr(current, cpumask_of(boot_cpuid)); if (smp_ops && smp_ops->setup_cpu) smp_ops->setup_cpu(boot_cpuid); - set_cpus_allowed(current, old_mask); + set_cpus_allowed_ptr(current, old_mask); + + free_cpumask_var(old_mask); snapshot_timebases(); @@ -591,10 +608,10 @@ int __cpu_disable(void) /* Update sibling maps */ base = cpu_first_thread_in_core(cpu); for (i = 0; i < threads_per_core; i++) { - cpu_clear(cpu, per_cpu(cpu_sibling_map, base + i)); - cpu_clear(base + i, per_cpu(cpu_sibling_map, cpu)); - cpu_clear(cpu, per_cpu(cpu_core_map, base +i)); - cpu_clear(base + i, per_cpu(cpu_core_map, cpu)); + cpumask_clear_cpu(cpu, cpu_sibling_mask(base + i)); + cpumask_clear_cpu(base + i, cpu_sibling_mask(cpu)); + cpumask_clear_cpu(cpu, cpu_core_mask(base + i)); + cpumask_clear_cpu(base + i, cpu_core_mask(cpu)); } l2_cache = cpu_to_l2cache(cpu); @@ -603,8 +620,8 @@ int __cpu_disable(void) if (!np) continue; if (np == l2_cache) { - cpu_clear(cpu, per_cpu(cpu_core_map, i)); - cpu_clear(i, per_cpu(cpu_core_map, cpu)); + cpumask_clear_cpu(cpu, cpu_core_mask(i)); + cpumask_clear_cpu(i, cpu_core_mask(cpu)); } of_node_put(np); } @@ -631,4 +648,10 @@ void cpu_hotplug_driver_unlock() { mutex_unlock(&powerpc_cpu_hotplug_driver_mutex); } + +void cpu_die(void) +{ + if (ppc_md.cpu_die) + ppc_md.cpu_die(); +} #endif diff --git a/arch/powerpc/kernel/softemu8xx.c b/arch/powerpc/kernel/softemu8xx.c index 23c8c5e7dc4d..af0e8290b4fc 100644 --- a/arch/powerpc/kernel/softemu8xx.c +++ b/arch/powerpc/kernel/softemu8xx.c @@ -21,7 +21,6 @@ #include <linux/stddef.h> #include <linux/unistd.h> #include <linux/ptrace.h> -#include <linux/slab.h> #include <linux/user.h> #include <linux/interrupt.h> diff --git a/arch/powerpc/kernel/swsusp_booke.S b/arch/powerpc/kernel/swsusp_booke.S new file mode 100644 index 000000000000..11a39307dd71 --- /dev/null +++ b/arch/powerpc/kernel/swsusp_booke.S @@ -0,0 +1,193 @@ +/* + * Based on swsusp_32.S, modified for FSL BookE by + * Anton Vorontsov <avorontsov@ru.mvista.com> + * Copyright (c) 2009-2010 MontaVista Software, LLC. + */ + +#include <linux/threads.h> +#include <asm/processor.h> +#include <asm/page.h> +#include <asm/cputable.h> +#include <asm/thread_info.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> +#include <asm/mmu.h> + +/* + * Structure for storing CPU registers on the save area. + */ +#define SL_SP 0 +#define SL_PC 4 +#define SL_MSR 8 +#define SL_TCR 0xc +#define SL_SPRG0 0x10 +#define SL_SPRG1 0x14 +#define SL_SPRG2 0x18 +#define SL_SPRG3 0x1c +#define SL_SPRG4 0x20 +#define SL_SPRG5 0x24 +#define SL_SPRG6 0x28 +#define SL_SPRG7 0x2c +#define SL_TBU 0x30 +#define SL_TBL 0x34 +#define SL_R2 0x38 +#define SL_CR 0x3c +#define SL_LR 0x40 +#define SL_R12 0x44 /* r12 to r31 */ +#define SL_SIZE (SL_R12 + 80) + + .section .data + .align 5 + +_GLOBAL(swsusp_save_area) + .space SL_SIZE + + + .section .text + .align 5 + +_GLOBAL(swsusp_arch_suspend) + lis r11,swsusp_save_area@h + ori r11,r11,swsusp_save_area@l + + mflr r0 + stw r0,SL_LR(r11) + mfcr r0 + stw r0,SL_CR(r11) + stw r1,SL_SP(r11) + stw r2,SL_R2(r11) + stmw r12,SL_R12(r11) + + /* Save MSR & TCR */ + mfmsr r4 + stw r4,SL_MSR(r11) + mfspr r4,SPRN_TCR + stw r4,SL_TCR(r11) + + /* Get a stable timebase and save it */ +1: mfspr r4,SPRN_TBRU + stw r4,SL_TBU(r11) + mfspr r5,SPRN_TBRL + stw r5,SL_TBL(r11) + mfspr r3,SPRN_TBRU + cmpw r3,r4 + bne 1b + + /* Save SPRGs */ + mfsprg r4,0 + stw r4,SL_SPRG0(r11) + mfsprg r4,1 + stw r4,SL_SPRG1(r11) + mfsprg r4,2 + stw r4,SL_SPRG2(r11) + mfsprg r4,3 + stw r4,SL_SPRG3(r11) + mfsprg r4,4 + stw r4,SL_SPRG4(r11) + mfsprg r4,5 + stw r4,SL_SPRG5(r11) + mfsprg r4,6 + stw r4,SL_SPRG6(r11) + mfsprg r4,7 + stw r4,SL_SPRG7(r11) + + /* Call the low level suspend stuff (we should probably have made + * a stackframe... + */ + bl swsusp_save + + /* Restore LR from the save area */ + lis r11,swsusp_save_area@h + ori r11,r11,swsusp_save_area@l + lwz r0,SL_LR(r11) + mtlr r0 + + blr + +_GLOBAL(swsusp_arch_resume) + sync + + /* Load ptr the list of pages to copy in r3 */ + lis r11,(restore_pblist)@h + ori r11,r11,restore_pblist@l + lwz r3,0(r11) + + /* Copy the pages. This is a very basic implementation, to + * be replaced by something more cache efficient */ +1: + li r0,256 + mtctr r0 + lwz r5,pbe_address(r3) /* source */ + lwz r6,pbe_orig_address(r3) /* destination */ +2: + lwz r8,0(r5) + lwz r9,4(r5) + lwz r10,8(r5) + lwz r11,12(r5) + addi r5,r5,16 + stw r8,0(r6) + stw r9,4(r6) + stw r10,8(r6) + stw r11,12(r6) + addi r6,r6,16 + bdnz 2b + lwz r3,pbe_next(r3) + cmpwi 0,r3,0 + bne 1b + + bl flush_dcache_L1 + bl flush_instruction_cache + + lis r11,swsusp_save_area@h + ori r11,r11,swsusp_save_area@l + + lwz r4,SL_SPRG0(r11) + mtsprg 0,r4 + lwz r4,SL_SPRG1(r11) + mtsprg 1,r4 + lwz r4,SL_SPRG2(r11) + mtsprg 2,r4 + lwz r4,SL_SPRG3(r11) + mtsprg 3,r4 + lwz r4,SL_SPRG4(r11) + mtsprg 4,r4 + lwz r4,SL_SPRG5(r11) + mtsprg 5,r4 + lwz r4,SL_SPRG6(r11) + mtsprg 6,r4 + lwz r4,SL_SPRG7(r11) + mtsprg 7,r4 + + /* restore the MSR */ + lwz r3,SL_MSR(r11) + mtmsr r3 + + /* Restore TB */ + li r3,0 + mtspr SPRN_TBWL,r3 + lwz r3,SL_TBU(r11) + lwz r4,SL_TBL(r11) + mtspr SPRN_TBWU,r3 + mtspr SPRN_TBWL,r4 + + /* Restore TCR and clear any pending bits in TSR. */ + lwz r4,SL_TCR(r11) + mtspr SPRN_TCR,r4 + lis r4, (TSR_ENW | TSR_WIS | TSR_DIS | TSR_FIS)@h + mtspr SPRN_TSR,r4 + + /* Kick decrementer */ + li r0,1 + mtdec r0 + + /* Restore the callee-saved registers and return */ + lwz r0,SL_CR(r11) + mtcr r0 + lwz r2,SL_R2(r11) + lmw r12,SL_R12(r11) + lwz r1,SL_SP(r11) + lwz r0,SL_LR(r11) + mtlr r0 + + li r3,0 + blr diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c index c5a4732bcc48..19471a1cef1a 100644 --- a/arch/powerpc/kernel/sys_ppc32.c +++ b/arch/powerpc/kernel/sys_ppc32.c @@ -41,6 +41,7 @@ #include <linux/ptrace.h> #include <linux/elf.h> #include <linux/ipc.h> +#include <linux/slab.h> #include <asm/ptrace.h> #include <asm/types.h> diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c index 3370e62e43d4..f2496f2faecc 100644 --- a/arch/powerpc/kernel/syscalls.c +++ b/arch/powerpc/kernel/syscalls.c @@ -42,100 +42,6 @@ #include <asm/time.h> #include <asm/unistd.h> -/* - * sys_ipc() is the de-multiplexer for the SysV IPC calls.. - * - * This is really horribly ugly. - */ -int sys_ipc(uint call, int first, unsigned long second, long third, - void __user *ptr, long fifth) -{ - int version, ret; - - version = call >> 16; /* hack for backward compatibility */ - call &= 0xffff; - - ret = -ENOSYS; - switch (call) { - case SEMOP: - ret = sys_semtimedop(first, (struct sembuf __user *)ptr, - (unsigned)second, NULL); - break; - case SEMTIMEDOP: - ret = sys_semtimedop(first, (struct sembuf __user *)ptr, - (unsigned)second, - (const struct timespec __user *) fifth); - break; - case SEMGET: - ret = sys_semget (first, (int)second, third); - break; - case SEMCTL: { - union semun fourth; - - ret = -EINVAL; - if (!ptr) - break; - if ((ret = get_user(fourth.__pad, (void __user * __user *)ptr))) - break; - ret = sys_semctl(first, (int)second, third, fourth); - break; - } - case MSGSND: - ret = sys_msgsnd(first, (struct msgbuf __user *)ptr, - (size_t)second, third); - break; - case MSGRCV: - switch (version) { - case 0: { - struct ipc_kludge tmp; - - ret = -EINVAL; - if (!ptr) - break; - if ((ret = copy_from_user(&tmp, - (struct ipc_kludge __user *) ptr, - sizeof (tmp)) ? -EFAULT : 0)) - break; - ret = sys_msgrcv(first, tmp.msgp, (size_t) second, - tmp.msgtyp, third); - break; - } - default: - ret = sys_msgrcv (first, (struct msgbuf __user *) ptr, - (size_t)second, fifth, third); - break; - } - break; - case MSGGET: - ret = sys_msgget((key_t)first, (int)second); - break; - case MSGCTL: - ret = sys_msgctl(first, (int)second, - (struct msqid_ds __user *)ptr); - break; - case SHMAT: { - ulong raddr; - ret = do_shmat(first, (char __user *)ptr, (int)second, &raddr); - if (ret) - break; - ret = put_user(raddr, (ulong __user *) third); - break; - } - case SHMDT: - ret = sys_shmdt((char __user *)ptr); - break; - case SHMGET: - ret = sys_shmget(first, (size_t)second, third); - break; - case SHMCTL: - ret = sys_shmctl(first, (int)second, - (struct shmid_ds __user *)ptr); - break; - } - - return ret; -} - static inline unsigned long do_mmap2(unsigned long addr, size_t len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long off, int shift) @@ -210,76 +116,6 @@ long ppc64_personality(unsigned long personality) } #endif -#ifdef CONFIG_PPC64 -#define OVERRIDE_MACHINE (personality(current->personality) == PER_LINUX32) -#else -#define OVERRIDE_MACHINE 0 -#endif - -static inline int override_machine(char __user *mach) -{ - if (OVERRIDE_MACHINE) { - /* change ppc64 to ppc */ - if (__put_user(0, mach+3) || __put_user(0, mach+4)) - return -EFAULT; - } - return 0; -} - -long ppc_newuname(struct new_utsname __user * name) -{ - int err = 0; - - down_read(&uts_sem); - if (copy_to_user(name, utsname(), sizeof(*name))) - err = -EFAULT; - up_read(&uts_sem); - if (!err) - err = override_machine(name->machine); - return err; -} - -int sys_uname(struct old_utsname __user *name) -{ - int err = 0; - - down_read(&uts_sem); - if (copy_to_user(name, utsname(), sizeof(*name))) - err = -EFAULT; - up_read(&uts_sem); - if (!err) - err = override_machine(name->machine); - return err; -} - -int sys_olduname(struct oldold_utsname __user *name) -{ - int error; - - if (!access_ok(VERIFY_WRITE, name, sizeof(struct oldold_utsname))) - return -EFAULT; - - down_read(&uts_sem); - error = __copy_to_user(&name->sysname, &utsname()->sysname, - __OLD_UTS_LEN); - error |= __put_user(0, name->sysname + __OLD_UTS_LEN); - error |= __copy_to_user(&name->nodename, &utsname()->nodename, - __OLD_UTS_LEN); - error |= __put_user(0, name->nodename + __OLD_UTS_LEN); - error |= __copy_to_user(&name->release, &utsname()->release, - __OLD_UTS_LEN); - error |= __put_user(0, name->release + __OLD_UTS_LEN); - error |= __copy_to_user(&name->version, &utsname()->version, - __OLD_UTS_LEN); - error |= __put_user(0, name->version + __OLD_UTS_LEN); - error |= __copy_to_user(&name->machine, &utsname()->machine, - __OLD_UTS_LEN); - error |= override_machine(name->machine); - up_read(&uts_sem); - - return error? -EFAULT: 0; -} - long ppc_fadvise64_64(int fd, int advice, u32 offset_high, u32 offset_low, u32 len_high, u32 len_low) { diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index e235e52dc4fe..c0d8c2006bf4 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -35,7 +35,7 @@ static DEFINE_PER_CPU(struct cpu, cpu_devices); #ifdef CONFIG_PPC64 /* Time in microseconds we delay before sleeping in the idle loop */ -DEFINE_PER_CPU(unsigned long, smt_snooze_delay) = { 100 }; +DEFINE_PER_CPU(long, smt_snooze_delay) = { 100 }; static ssize_t store_smt_snooze_delay(struct sys_device *dev, struct sysdev_attribute *attr, @@ -44,9 +44,9 @@ static ssize_t store_smt_snooze_delay(struct sys_device *dev, { struct cpu *cpu = container_of(dev, struct cpu, sysdev); ssize_t ret; - unsigned long snooze; + long snooze; - ret = sscanf(buf, "%lu", &snooze); + ret = sscanf(buf, "%ld", &snooze); if (ret != 1) return -EINVAL; @@ -61,53 +61,23 @@ static ssize_t show_smt_snooze_delay(struct sys_device *dev, { struct cpu *cpu = container_of(dev, struct cpu, sysdev); - return sprintf(buf, "%lu\n", per_cpu(smt_snooze_delay, cpu->sysdev.id)); + return sprintf(buf, "%ld\n", per_cpu(smt_snooze_delay, cpu->sysdev.id)); } static SYSDEV_ATTR(smt_snooze_delay, 0644, show_smt_snooze_delay, store_smt_snooze_delay); -/* Only parse OF options if the matching cmdline option was not specified */ -static int smt_snooze_cmdline; - -static int __init smt_setup(void) -{ - struct device_node *options; - const unsigned int *val; - unsigned int cpu; - - if (!cpu_has_feature(CPU_FTR_SMT)) - return -ENODEV; - - options = of_find_node_by_path("/options"); - if (!options) - return -ENODEV; - - val = of_get_property(options, "ibm,smt-snooze-delay", NULL); - if (!smt_snooze_cmdline && val) { - for_each_possible_cpu(cpu) - per_cpu(smt_snooze_delay, cpu) = *val; - } - - of_node_put(options); - return 0; -} -__initcall(smt_setup); - static int __init setup_smt_snooze_delay(char *str) { unsigned int cpu; - int snooze; + long snooze; if (!cpu_has_feature(CPU_FTR_SMT)) return 1; - smt_snooze_cmdline = 1; - - if (get_option(&str, &snooze)) { - for_each_possible_cpu(cpu) - per_cpu(smt_snooze_delay, cpu) = snooze; - } + snooze = simple_strtol(str, NULL, 10); + for_each_possible_cpu(cpu) + per_cpu(smt_snooze_delay, cpu) = snooze; return 1; } diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 6c6093d67f30..0441bbdadbd1 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -265,8 +265,8 @@ void account_system_vtime(struct task_struct *tsk) account_system_time(tsk, 0, delta, deltascaled); else account_idle_time(delta); - per_cpu(cputime_last_delta, smp_processor_id()) = delta; - per_cpu(cputime_scaled_last_delta, smp_processor_id()) = deltascaled; + __get_cpu_var(cputime_last_delta) = delta; + __get_cpu_var(cputime_scaled_last_delta) = deltascaled; local_irq_restore(flags); } EXPORT_SYMBOL_GPL(account_system_vtime); @@ -532,25 +532,60 @@ void __init iSeries_time_init_early(void) } #endif /* CONFIG_PPC_ISERIES */ -#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_PPC32) -DEFINE_PER_CPU(u8, perf_event_pending); +#ifdef CONFIG_PERF_EVENTS -void set_perf_event_pending(void) +/* + * 64-bit uses a byte in the PACA, 32-bit uses a per-cpu variable... + */ +#ifdef CONFIG_PPC64 +static inline unsigned long test_perf_event_pending(void) { - get_cpu_var(perf_event_pending) = 1; - set_dec(1); - put_cpu_var(perf_event_pending); + unsigned long x; + + asm volatile("lbz %0,%1(13)" + : "=r" (x) + : "i" (offsetof(struct paca_struct, perf_event_pending))); + return x; } +static inline void set_perf_event_pending_flag(void) +{ + asm volatile("stb %0,%1(13)" : : + "r" (1), + "i" (offsetof(struct paca_struct, perf_event_pending))); +} + +static inline void clear_perf_event_pending(void) +{ + asm volatile("stb %0,%1(13)" : : + "r" (0), + "i" (offsetof(struct paca_struct, perf_event_pending))); +} + +#else /* 32-bit */ + +DEFINE_PER_CPU(u8, perf_event_pending); + +#define set_perf_event_pending_flag() __get_cpu_var(perf_event_pending) = 1 #define test_perf_event_pending() __get_cpu_var(perf_event_pending) #define clear_perf_event_pending() __get_cpu_var(perf_event_pending) = 0 -#else /* CONFIG_PERF_EVENTS && CONFIG_PPC32 */ +#endif /* 32 vs 64 bit */ + +void set_perf_event_pending(void) +{ + preempt_disable(); + set_perf_event_pending_flag(); + set_dec(1); + preempt_enable(); +} + +#else /* CONFIG_PERF_EVENTS */ #define test_perf_event_pending() 0 #define clear_perf_event_pending() -#endif /* CONFIG_PERF_EVENTS && CONFIG_PPC32 */ +#endif /* CONFIG_PERF_EVENTS */ /* * For iSeries shared processors, we have to let the hypervisor @@ -575,15 +610,13 @@ void timer_interrupt(struct pt_regs * regs) trace_timer_interrupt_entry(regs); + __get_cpu_var(irq_stat).timer_irqs++; + /* Ensure a positive value is written to the decrementer, or else * some CPUs will continuue to take decrementer exceptions */ set_dec(DECREMENTER_MAX); #ifdef CONFIG_PPC32 - if (test_perf_event_pending()) { - clear_perf_event_pending(); - perf_event_do_pending(); - } if (atomic_read(&ppc_n_lost_interrupts) != 0) do_IRQ(regs); #endif @@ -602,6 +635,11 @@ void timer_interrupt(struct pt_regs * regs) calculate_steal_time(); + if (test_perf_event_pending()) { + clear_perf_event_pending(); + perf_event_do_pending(); + } + #ifdef CONFIG_PPC_ISERIES if (firmware_has_feature(FW_FEATURE_ISERIES)) get_lppaca()->int_dword.fields.decr_int = 0; @@ -935,8 +973,8 @@ static void register_decrementer_clockevent(int cpu) *dec = decrementer_clockevent; dec->cpumask = cpumask_of(cpu); - printk(KERN_DEBUG "clockevent: %s mult[%x] shift[%d] cpu[%d]\n", - dec->name, dec->mult, dec->shift, cpu); + printk_once(KERN_DEBUG "clockevent: %s mult[%x] shift[%d] cpu[%d]\n", + dec->name, dec->mult, dec->shift, cpu); clockevents_register_device(dec); } diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index d069ff8a7e03..25fc33984c2b 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -1,5 +1,6 @@ /* * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * Copyright 2007-2010 Freescale Semiconductor, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -21,7 +22,6 @@ #include <linux/stddef.h> #include <linux/unistd.h> #include <linux/ptrace.h> -#include <linux/slab.h> #include <linux/user.h> #include <linux/interrupt.h> #include <linux/init.h> @@ -60,13 +60,13 @@ #endif #if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC) -int (*__debugger)(struct pt_regs *regs); -int (*__debugger_ipi)(struct pt_regs *regs); -int (*__debugger_bpt)(struct pt_regs *regs); -int (*__debugger_sstep)(struct pt_regs *regs); -int (*__debugger_iabr_match)(struct pt_regs *regs); -int (*__debugger_dabr_match)(struct pt_regs *regs); -int (*__debugger_fault_handler)(struct pt_regs *regs); +int (*__debugger)(struct pt_regs *regs) __read_mostly; +int (*__debugger_ipi)(struct pt_regs *regs) __read_mostly; +int (*__debugger_bpt)(struct pt_regs *regs) __read_mostly; +int (*__debugger_sstep)(struct pt_regs *regs) __read_mostly; +int (*__debugger_iabr_match)(struct pt_regs *regs) __read_mostly; +int (*__debugger_dabr_match)(struct pt_regs *regs) __read_mostly; +int (*__debugger_fault_handler)(struct pt_regs *regs) __read_mostly; EXPORT_SYMBOL(__debugger); EXPORT_SYMBOL(__debugger_ipi); @@ -102,11 +102,11 @@ static inline void pmac_backlight_unblank(void) { } int die(const char *str, struct pt_regs *regs, long err) { static struct { - spinlock_t lock; + raw_spinlock_t lock; u32 lock_owner; int lock_owner_depth; } die = { - .lock = __SPIN_LOCK_UNLOCKED(die.lock), + .lock = __RAW_SPIN_LOCK_UNLOCKED(die.lock), .lock_owner = -1, .lock_owner_depth = 0 }; @@ -120,7 +120,7 @@ int die(const char *str, struct pt_regs *regs, long err) if (die.lock_owner != raw_smp_processor_id()) { console_verbose(); - spin_lock_irqsave(&die.lock, flags); + raw_spin_lock_irqsave(&die.lock, flags); die.lock_owner = smp_processor_id(); die.lock_owner_depth = 0; bust_spinlocks(1); @@ -146,6 +146,11 @@ int die(const char *str, struct pt_regs *regs, long err) #endif printk("%s\n", ppc_md.name ? ppc_md.name : ""); + sysfs_printk_last_file(); + if (notify_die(DIE_OOPS, str, regs, err, 255, + SIGSEGV) == NOTIFY_STOP) + return 1; + print_modules(); show_regs(regs); } else { @@ -155,7 +160,7 @@ int die(const char *str, struct pt_regs *regs, long err) bust_spinlocks(0); die.lock_owner = -1; add_taint(TAINT_DIE); - spin_unlock_irqrestore(&die.lock, flags); + raw_spin_unlock_irqrestore(&die.lock, flags); if (kexec_should_crash(current) || kexec_sr_activated(smp_processor_id())) @@ -294,14 +299,14 @@ static inline int check_io_access(struct pt_regs *regs) return 0; } -#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) +#ifdef CONFIG_PPC_ADV_DEBUG_REGS /* On 4xx, the reason for the machine check or program exception is in the ESR. */ #define get_reason(regs) ((regs)->dsisr) #ifndef CONFIG_FSL_BOOKE #define get_mc_reason(regs) ((regs)->dsisr) #else -#define get_mc_reason(regs) (mfspr(SPRN_MCSR) & MCSR_MASK) +#define get_mc_reason(regs) (mfspr(SPRN_MCSR)) #endif #define REASON_FP ESR_FP #define REASON_ILLEGAL (ESR_PIL | ESR_PUO) @@ -376,7 +381,132 @@ int machine_check_440A(struct pt_regs *regs) } return 0; } + +int machine_check_47x(struct pt_regs *regs) +{ + unsigned long reason = get_mc_reason(regs); + u32 mcsr; + + printk(KERN_ERR "Machine check in kernel mode.\n"); + if (reason & ESR_IMCP) { + printk(KERN_ERR + "Instruction Synchronous Machine Check exception\n"); + mtspr(SPRN_ESR, reason & ~ESR_IMCP); + return 0; + } + mcsr = mfspr(SPRN_MCSR); + if (mcsr & MCSR_IB) + printk(KERN_ERR "Instruction Read PLB Error\n"); + if (mcsr & MCSR_DRB) + printk(KERN_ERR "Data Read PLB Error\n"); + if (mcsr & MCSR_DWB) + printk(KERN_ERR "Data Write PLB Error\n"); + if (mcsr & MCSR_TLBP) + printk(KERN_ERR "TLB Parity Error\n"); + if (mcsr & MCSR_ICP) { + flush_instruction_cache(); + printk(KERN_ERR "I-Cache Parity Error\n"); + } + if (mcsr & MCSR_DCSP) + printk(KERN_ERR "D-Cache Search Parity Error\n"); + if (mcsr & PPC47x_MCSR_GPR) + printk(KERN_ERR "GPR Parity Error\n"); + if (mcsr & PPC47x_MCSR_FPR) + printk(KERN_ERR "FPR Parity Error\n"); + if (mcsr & PPC47x_MCSR_IPR) + printk(KERN_ERR "Machine Check exception is imprecise\n"); + + /* Clear MCSR */ + mtspr(SPRN_MCSR, mcsr); + + return 0; +} #elif defined(CONFIG_E500) +int machine_check_e500mc(struct pt_regs *regs) +{ + unsigned long mcsr = mfspr(SPRN_MCSR); + unsigned long reason = mcsr; + int recoverable = 1; + + printk("Machine check in kernel mode.\n"); + printk("Caused by (from MCSR=%lx): ", reason); + + if (reason & MCSR_MCP) + printk("Machine Check Signal\n"); + + if (reason & MCSR_ICPERR) { + printk("Instruction Cache Parity Error\n"); + + /* + * This is recoverable by invalidating the i-cache. + */ + mtspr(SPRN_L1CSR1, mfspr(SPRN_L1CSR1) | L1CSR1_ICFI); + while (mfspr(SPRN_L1CSR1) & L1CSR1_ICFI) + ; + + /* + * This will generally be accompanied by an instruction + * fetch error report -- only treat MCSR_IF as fatal + * if it wasn't due to an L1 parity error. + */ + reason &= ~MCSR_IF; + } + + if (reason & MCSR_DCPERR_MC) { + printk("Data Cache Parity Error\n"); + recoverable = 0; + } + + if (reason & MCSR_L2MMU_MHIT) { + printk("Hit on multiple TLB entries\n"); + recoverable = 0; + } + + if (reason & MCSR_NMI) + printk("Non-maskable interrupt\n"); + + if (reason & MCSR_IF) { + printk("Instruction Fetch Error Report\n"); + recoverable = 0; + } + + if (reason & MCSR_LD) { + printk("Load Error Report\n"); + recoverable = 0; + } + + if (reason & MCSR_ST) { + printk("Store Error Report\n"); + recoverable = 0; + } + + if (reason & MCSR_LDG) { + printk("Guarded Load Error Report\n"); + recoverable = 0; + } + + if (reason & MCSR_TLBSYNC) + printk("Simultaneous tlbsync operations\n"); + + if (reason & MCSR_BSL2_ERR) { + printk("Level 2 Cache Error\n"); + recoverable = 0; + } + + if (reason & MCSR_MAV) { + u64 addr; + + addr = mfspr(SPRN_MCAR); + addr |= (u64)mfspr(SPRN_MCARU) << 32; + + printk("Machine Check %s Address: %#llx\n", + reason & MCSR_MEA ? "Effective" : "Physical", addr); + } + + mtspr(SPRN_MCSR, mcsr); + return mfspr(SPRN_MCSR) == 0 && recoverable; +} + int machine_check_e500(struct pt_regs *regs) { unsigned long reason = get_mc_reason(regs); @@ -478,6 +608,8 @@ void machine_check_exception(struct pt_regs *regs) { int recover = 0; + __get_cpu_var(irq_stat).mce_exceptions++; + /* See if any machine dependent calls. In theory, we would want * to call the CPU first, and call the ppc_md. one if the CPU * one returns a positive number. However there is existing code @@ -809,12 +941,15 @@ void __kprobes program_check_exception(struct pt_regs *regs) return; } if (reason & REASON_TRAP) { + /* Debugger is first in line to stop recursive faults in + * rcu_lock, notify_die, or atomic_notifier_call_chain */ + if (debugger_bpt(regs)) + return; + /* trap exception */ if (notify_die(DIE_BPT, "breakpoint", regs, 5, 5, SIGTRAP) == NOTIFY_STOP) return; - if (debugger_bpt(regs)) - return; if (!(regs->msr & MSR_PR) && /* not user-mode */ report_bug(regs->nip, regs) == BUG_TRAP_TYPE_WARN) { @@ -960,6 +1095,8 @@ void vsx_unavailable_exception(struct pt_regs *regs) void performance_monitor_exception(struct pt_regs *regs) { + __get_cpu_var(irq_stat).pmu_irqs++; + perf_irq(regs); } @@ -1024,10 +1161,69 @@ void SoftwareEmulation(struct pt_regs *regs) } #endif /* CONFIG_8xx */ -#if defined(CONFIG_40x) || defined(CONFIG_BOOKE) +#ifdef CONFIG_PPC_ADV_DEBUG_REGS +static void handle_debug(struct pt_regs *regs, unsigned long debug_status) +{ + int changed = 0; + /* + * Determine the cause of the debug event, clear the + * event flags and send a trap to the handler. Torez + */ + if (debug_status & (DBSR_DAC1R | DBSR_DAC1W)) { + dbcr_dac(current) &= ~(DBCR_DAC1R | DBCR_DAC1W); +#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE + current->thread.dbcr2 &= ~DBCR2_DAC12MODE; +#endif + do_send_trap(regs, mfspr(SPRN_DAC1), debug_status, TRAP_HWBKPT, + 5); + changed |= 0x01; + } else if (debug_status & (DBSR_DAC2R | DBSR_DAC2W)) { + dbcr_dac(current) &= ~(DBCR_DAC2R | DBCR_DAC2W); + do_send_trap(regs, mfspr(SPRN_DAC2), debug_status, TRAP_HWBKPT, + 6); + changed |= 0x01; + } else if (debug_status & DBSR_IAC1) { + current->thread.dbcr0 &= ~DBCR0_IAC1; + dbcr_iac_range(current) &= ~DBCR_IAC12MODE; + do_send_trap(regs, mfspr(SPRN_IAC1), debug_status, TRAP_HWBKPT, + 1); + changed |= 0x01; + } else if (debug_status & DBSR_IAC2) { + current->thread.dbcr0 &= ~DBCR0_IAC2; + do_send_trap(regs, mfspr(SPRN_IAC2), debug_status, TRAP_HWBKPT, + 2); + changed |= 0x01; + } else if (debug_status & DBSR_IAC3) { + current->thread.dbcr0 &= ~DBCR0_IAC3; + dbcr_iac_range(current) &= ~DBCR_IAC34MODE; + do_send_trap(regs, mfspr(SPRN_IAC3), debug_status, TRAP_HWBKPT, + 3); + changed |= 0x01; + } else if (debug_status & DBSR_IAC4) { + current->thread.dbcr0 &= ~DBCR0_IAC4; + do_send_trap(regs, mfspr(SPRN_IAC4), debug_status, TRAP_HWBKPT, + 4); + changed |= 0x01; + } + /* + * At the point this routine was called, the MSR(DE) was turned off. + * Check all other debug flags and see if that bit needs to be turned + * back on or not. + */ + if (DBCR_ACTIVE_EVENTS(current->thread.dbcr0, current->thread.dbcr1)) + regs->msr |= MSR_DE; + else + /* Make sure the IDM flag is off */ + current->thread.dbcr0 &= ~DBCR0_IDM; + + if (changed & 0x01) + mtspr(SPRN_DBCR0, current->thread.dbcr0); +} void __kprobes DebugException(struct pt_regs *regs, unsigned long debug_status) { + current->thread.dbsr = debug_status; + /* Hack alert: On BookE, Branch Taken stops on the branch itself, while * on server, it stops on the target of the branch. In order to simulate * the server behaviour, we thus restart right away with a single step @@ -1071,29 +1267,23 @@ void __kprobes DebugException(struct pt_regs *regs, unsigned long debug_status) if (debugger_sstep(regs)) return; - if (user_mode(regs)) - current->thread.dbcr0 &= ~(DBCR0_IC); - - _exception(SIGTRAP, regs, TRAP_TRACE, regs->nip); - } else if (debug_status & (DBSR_DAC1R | DBSR_DAC1W)) { - regs->msr &= ~MSR_DE; - if (user_mode(regs)) { - current->thread.dbcr0 &= ~(DBSR_DAC1R | DBSR_DAC1W | - DBCR0_IDM); - } else { - /* Disable DAC interupts */ - mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) & ~(DBSR_DAC1R | - DBSR_DAC1W | DBCR0_IDM)); - - /* Clear the DAC event */ - mtspr(SPRN_DBSR, (DBSR_DAC1R | DBSR_DAC1W)); + current->thread.dbcr0 &= ~DBCR0_IC; +#ifdef CONFIG_PPC_ADV_DEBUG_REGS + if (DBCR_ACTIVE_EVENTS(current->thread.dbcr0, + current->thread.dbcr1)) + regs->msr |= MSR_DE; + else + /* Make sure the IDM bit is off */ + current->thread.dbcr0 &= ~DBCR0_IDM; +#endif } - /* Setup and send the trap to the handler */ - do_dabr(regs, mfspr(SPRN_DAC1), debug_status); - } + + _exception(SIGTRAP, regs, TRAP_TRACE, regs->nip); + } else + handle_debug(regs, debug_status); } -#endif /* CONFIG_4xx || CONFIG_BOOKE */ +#endif /* CONFIG_PPC_ADV_DEBUG_REGS */ #if !defined(CONFIG_TAU_INT) void TAUException(struct pt_regs *regs) diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index d84d19224a95..13002fe206e7 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -22,7 +22,7 @@ #include <linux/elf.h> #include <linux/security.h> #include <linux/bootmem.h> -#include <linux/lmb.h> +#include <linux/memblock.h> #include <asm/pgtable.h> #include <asm/system.h> @@ -734,7 +734,7 @@ static int __init vdso_init(void) vdso_data->platform = machine_is(iseries) ? 0x200 : 0x100; if (firmware_has_feature(FW_FEATURE_LPAR)) vdso_data->platform |= 1; - vdso_data->physicalMemorySize = lmb_phys_mem_size(); + vdso_data->physicalMemorySize = memblock_phys_mem_size(); vdso_data->dcache_size = ppc64_caches.dsize; vdso_data->dcache_line_size = ppc64_caches.dline_size; vdso_data->icache_size = ppc64_caches.isize; diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c index 77f64218abf3..00b9436f7652 100644 --- a/arch/powerpc/kernel/vio.c +++ b/arch/powerpc/kernel/vio.c @@ -17,6 +17,7 @@ #include <linux/types.h> #include <linux/device.h> #include <linux/init.h> +#include <linux/slab.h> #include <linux/console.h> #include <linux/module.h> #include <linux/mm.h> @@ -644,8 +645,10 @@ void vio_cmo_set_dev_desired(struct vio_dev *viodev, size_t desired) found = 1; break; } - if (!found) + if (!found) { + spin_unlock_irqrestore(&vio_cmo.lock, flags); return; + } /* Increase/decrease in desired device entitlement */ if (desired >= viodev->cmo.desired) { @@ -704,7 +707,7 @@ static int vio_cmo_bus_probe(struct vio_dev *viodev) * Check to see that device has a DMA window and configure * entitlement for the device. */ - if (of_get_property(viodev->dev.archdata.of_node, + if (of_get_property(viodev->dev.of_node, "ibm,my-dma-window", NULL)) { /* Check that the driver is CMO enabled and get desired DMA */ if (!viodrv->get_desired_dma) { @@ -957,9 +960,12 @@ viodev_cmo_rd_attr(allocated); static ssize_t name_show(struct device *, struct device_attribute *, char *); static ssize_t devspec_show(struct device *, struct device_attribute *, char *); +static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, + char *buf); static struct device_attribute vio_cmo_dev_attrs[] = { __ATTR_RO(name), __ATTR_RO(devspec), + __ATTR_RO(modalias), __ATTR(cmo_desired, S_IWUSR|S_IRUSR|S_IWGRP|S_IRGRP|S_IROTH, viodev_cmo_desired_show, viodev_cmo_desired_set), __ATTR(cmo_entitled, S_IRUGO, viodev_cmo_entitled_show, NULL), @@ -1048,7 +1054,7 @@ static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev) if (firmware_has_feature(FW_FEATURE_ISERIES)) return vio_build_iommu_table_iseries(dev); - dma_window = of_get_property(dev->dev.archdata.of_node, + dma_window = of_get_property(dev->dev.of_node, "ibm,my-dma-window", NULL); if (!dma_window) return NULL; @@ -1057,7 +1063,7 @@ static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev) if (tbl == NULL) return NULL; - of_parse_dma_window(dev->dev.archdata.of_node, dma_window, + of_parse_dma_window(dev->dev.of_node, dma_window, &tbl->it_index, &offset, &size); /* TCE table size - measured in tce entries */ @@ -1085,7 +1091,7 @@ static const struct vio_device_id *vio_match_device( { while (ids->type[0] != '\0') { if ((strncmp(dev->type, ids->type, strlen(ids->type)) == 0) && - of_device_is_compatible(dev->dev.archdata.of_node, + of_device_is_compatible(dev->dev.of_node, ids->compat)) return ids; ids++; @@ -1178,7 +1184,7 @@ EXPORT_SYMBOL(vio_unregister_driver); static void __devinit vio_dev_release(struct device *dev) { /* XXX should free TCE table */ - of_node_put(dev->archdata.of_node); + of_node_put(dev->of_node); kfree(to_vio_dev(dev)); } @@ -1229,7 +1235,7 @@ struct vio_dev *vio_register_device_node(struct device_node *of_node) if (unit_address != NULL) viodev->unit_address = *unit_address; } - viodev->dev.archdata.of_node = of_node_get(of_node); + viodev->dev.of_node = of_node_get(of_node); if (firmware_has_feature(FW_FEATURE_CMO)) vio_cmo_set_dma_ops(viodev); @@ -1314,14 +1320,32 @@ static ssize_t name_show(struct device *dev, static ssize_t devspec_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct device_node *of_node = dev->archdata.of_node; + struct device_node *of_node = dev->of_node; return sprintf(buf, "%s\n", of_node ? of_node->full_name : "none"); } +static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + const struct vio_dev *vio_dev = to_vio_dev(dev); + struct device_node *dn; + const char *cp; + + dn = dev->of_node; + if (!dn) + return -ENODEV; + cp = of_get_property(dn, "compatible", NULL); + if (!cp) + return -ENODEV; + + return sprintf(buf, "vio:T%sS%s\n", vio_dev->type, cp); +} + static struct device_attribute vio_dev_attrs[] = { __ATTR_RO(name), __ATTR_RO(devspec), + __ATTR_RO(modalias), __ATTR_NULL }; @@ -1346,7 +1370,7 @@ static int vio_hotplug(struct device *dev, struct kobj_uevent_env *env) struct device_node *dn; const char *cp; - dn = dev->archdata.of_node; + dn = dev->of_node; if (!dn) return -ENODEV; cp = of_get_property(dn, "compatible", NULL); @@ -1364,6 +1388,7 @@ static struct bus_type vio_bus_type = { .match = vio_bus_match, .probe = vio_bus_probe, .remove = vio_bus_remove, + .pm = GENERIC_SUBSYS_PM_OPS, }; /** @@ -1377,7 +1402,7 @@ static struct bus_type vio_bus_type = { */ const void *vio_get_attribute(struct vio_dev *vdev, char *which, int *length) { - return of_get_property(vdev->dev.archdata.of_node, which, length); + return of_get_property(vdev->dev.of_node, which, length); } EXPORT_SYMBOL(vio_get_attribute); |