diff options
Diffstat (limited to 'arch/powerpc/kernel')
33 files changed, 1208 insertions, 458 deletions
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 9b6146056e48..9bf2be123093 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -54,6 +54,13 @@ CFLAGS_cputable.o += -DDISABLE_BRANCH_PROFILING CFLAGS_btext.o += -DDISABLE_BRANCH_PROFILING endif +KCSAN_SANITIZE_early_32.o := n +KCSAN_SANITIZE_early_64.o := n +KCSAN_SANITIZE_cputable.o := n +KCSAN_SANITIZE_btext.o := n +KCSAN_SANITIZE_paca.o := n +KCSAN_SANITIZE_setup_64.o := n + #ifdef CONFIG_RANDOMIZE_KSTACK_OFFSET # Remove stack protector to avoid triggering unneeded stack canary # checks due to randomize_kstack_offset. @@ -177,12 +184,15 @@ obj-$(CONFIG_PPC_SECVAR_SYSFS) += secvar-sysfs.o # Disable GCOV, KCOV & sanitizers in odd or sensitive code GCOV_PROFILE_prom_init.o := n KCOV_INSTRUMENT_prom_init.o := n +KCSAN_SANITIZE_prom_init.o := n UBSAN_SANITIZE_prom_init.o := n GCOV_PROFILE_kprobes.o := n KCOV_INSTRUMENT_kprobes.o := n +KCSAN_SANITIZE_kprobes.o := n UBSAN_SANITIZE_kprobes.o := n GCOV_PROFILE_kprobes-ftrace.o := n KCOV_INSTRUMENT_kprobes-ftrace.o := n +KCSAN_SANITIZE_kprobes-ftrace.o := n UBSAN_SANITIZE_kprobes-ftrace.o := n GCOV_PROFILE_syscall_64.o := n KCOV_INSTRUMENT_syscall_64.o := n diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c index f55c6fb34a3a..5712dd846263 100644 --- a/arch/powerpc/kernel/dbell.c +++ b/arch/powerpc/kernel/dbell.c @@ -27,7 +27,7 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(doorbell_exception) ppc_msgsync(); - if (should_hard_irq_enable()) + if (should_hard_irq_enable(regs)) do_hard_irq_enable(); kvmppc_clear_host_ipi(smp_processor_id()); diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index f279295179bd..438568a472d0 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -1065,10 +1065,10 @@ recover_failed: eeh_slot_error_detail(pe, EEH_LOG_PERM); /* Notify all devices that they're about to go down. */ - eeh_set_channel_state(pe, pci_channel_io_perm_failure); eeh_set_irq_state(pe, false); eeh_pe_report("error_detected(permanent failure)", pe, eeh_report_failure, NULL); + eeh_set_channel_state(pe, pci_channel_io_perm_failure); /* Mark the PE to be removed permanently */ eeh_pe_state_mark(pe, EEH_PE_REMOVED); @@ -1185,10 +1185,10 @@ void eeh_handle_special_event(void) /* Notify all devices to be down */ eeh_pe_state_clear(pe, EEH_PE_PRI_BUS, true); - eeh_set_channel_state(pe, pci_channel_io_perm_failure); eeh_pe_report( "error_detected(permanent failure)", pe, eeh_report_failure, NULL); + eeh_set_channel_state(pe, pci_channel_io_perm_failure); pci_lock_rescan_remove(); list_for_each_entry(hose, &hose_list, list_node) { diff --git a/arch/powerpc/kernel/epapr_hcalls.S b/arch/powerpc/kernel/epapr_hcalls.S index 69a912550577..033116e465d0 100644 --- a/arch/powerpc/kernel/epapr_hcalls.S +++ b/arch/powerpc/kernel/epapr_hcalls.S @@ -21,7 +21,13 @@ _GLOBAL(epapr_ev_idle) ori r4, r4,_TLF_NAPPING /* so when we take an exception */ PPC_STL r4, TI_LOCAL_FLAGS(r2) /* it will return to our caller */ +#ifdef CONFIG_BOOKE_OR_40x wrteei 1 +#else + mfmsr r4 + ori r4, r4, MSR_EE + mtmsr r4 +#endif idle_loop: LOAD_REG_IMMEDIATE(r11, EV_HCALL_TOKEN(EV_IDLE)) diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 7558ba4eb864..1febb56ebaeb 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -160,12 +160,8 @@ __secondary_hold: std r24,(ABS_ADDR(__secondary_hold_acknowledge, first_256B))(0) sync - li r26,0 -#ifdef CONFIG_PPC_BOOK3E_64 - tovirt(r26,r26) -#endif /* All secondary cpus wait here until told to start. */ -100: ld r12,(ABS_ADDR(__secondary_hold_spinloop, first_256B))(r26) +100: ld r12,(ABS_ADDR(__secondary_hold_spinloop, first_256B))(0) cmpdi 0,r12,0 beq 100b @@ -475,8 +471,31 @@ SYM_FUNC_START_LOCAL(__mmu_off) rfid b . /* prevent speculative execution */ SYM_FUNC_END(__mmu_off) -#endif +SYM_FUNC_START_LOCAL(start_initialization_book3s) + mflr r25 + + /* Setup some critical 970 SPRs before switching MMU off */ + mfspr r0,SPRN_PVR + srwi r0,r0,16 + cmpwi r0,0x39 /* 970 */ + beq 1f + cmpwi r0,0x3c /* 970FX */ + beq 1f + cmpwi r0,0x44 /* 970MP */ + beq 1f + cmpwi r0,0x45 /* 970GX */ + bne 2f +1: bl __cpu_preinit_ppc970 +2: + + /* Switch off MMU if not already off */ + bl __mmu_off + + mtlr r25 + blr +SYM_FUNC_END(start_initialization_book3s) +#endif /* * Here is our main kernel entry point. We support currently 2 kind of entries @@ -523,26 +542,10 @@ __start_initialization_multiplatform: #ifdef CONFIG_PPC_BOOK3E_64 bl start_initialization_book3e - b __after_prom_start #else - /* Setup some critical 970 SPRs before switching MMU off */ - mfspr r0,SPRN_PVR - srwi r0,r0,16 - cmpwi r0,0x39 /* 970 */ - beq 1f - cmpwi r0,0x3c /* 970FX */ - beq 1f - cmpwi r0,0x44 /* 970MP */ - beq 1f - cmpwi r0,0x45 /* 970GX */ - bne 2f -1: bl __cpu_preinit_ppc970 -2: - - /* Switch off MMU if not already off */ - bl __mmu_off - b __after_prom_start + bl start_initialization_book3s #endif /* CONFIG_PPC_BOOK3E_64 */ + b __after_prom_start __REF __boot_from_prom: diff --git a/arch/powerpc/kernel/head_85xx.S b/arch/powerpc/kernel/head_85xx.S index d438ca74e96c..fdbee1093e2b 100644 --- a/arch/powerpc/kernel/head_85xx.S +++ b/arch/powerpc/kernel/head_85xx.S @@ -864,7 +864,7 @@ _GLOBAL(load_up_spe) * SPE unavailable trap from kernel - print a message, but let * the task use SPE in the kernel until it returns to user mode. */ -KernelSPE: +SYM_FUNC_START_LOCAL(KernelSPE) lwz r3,_MSR(r1) oris r3,r3,MSR_SPE@h stw r3,_MSR(r1) /* enable use of SPE after return */ @@ -881,6 +881,7 @@ KernelSPE: #endif .align 4,0 +SYM_FUNC_END(KernelSPE) #endif /* CONFIG_SPE */ /* diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c index 77cd4c5a2d63..b9a725abc596 100644 --- a/arch/powerpc/kernel/idle.c +++ b/arch/powerpc/kernel/idle.c @@ -51,10 +51,9 @@ void arch_cpu_idle(void) * Some power_save functions return with * interrupts enabled, some don't. */ - if (irqs_disabled()) - raw_local_irq_enable(); + if (!irqs_disabled()) + raw_local_irq_disable(); } else { - raw_local_irq_enable(); /* * Go into low thread priority and possibly * low power mode. diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index fc6631a80527..0ec1581619db 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -50,16 +50,18 @@ static inline bool exit_must_hard_disable(void) */ static notrace __always_inline bool prep_irq_for_enabled_exit(bool restartable) { + bool must_hard_disable = (exit_must_hard_disable() || !restartable); + /* This must be done with RI=1 because tracing may touch vmaps */ trace_hardirqs_on(); - if (exit_must_hard_disable() || !restartable) + if (must_hard_disable) __hard_EE_RI_disable(); #ifdef CONFIG_PPC64 /* This pattern matches prep_irq_for_idle */ if (unlikely(lazy_irq_pending_nocheck())) { - if (exit_must_hard_disable() || !restartable) { + if (must_hard_disable) { local_paca->irq_happened |= PACA_IRQ_HARD_DIS; __hard_RI_enable(); } diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index caebe1431596..ee95937bdaf1 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -67,11 +67,9 @@ static void iommu_debugfs_add(struct iommu_table *tbl) static void iommu_debugfs_del(struct iommu_table *tbl) { char name[10]; - struct dentry *liobn_entry; sprintf(name, "%08lx", tbl->it_index); - liobn_entry = debugfs_lookup(name, iommu_debugfs_dir); - debugfs_remove(liobn_entry); + debugfs_lookup_and_remove(name, iommu_debugfs_dir); } #else static void iommu_debugfs_add(struct iommu_table *tbl){} diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index c5b9ce887483..c9535f2760b5 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -238,7 +238,7 @@ static void __do_irq(struct pt_regs *regs, unsigned long oldsp) irq = static_call(ppc_get_irq)(); /* We can hard enable interrupts now to allow perf interrupts */ - if (should_hard_irq_enable()) + if (should_hard_irq_enable(regs)) do_hard_irq_enable(); /* And finally process it */ diff --git a/arch/powerpc/kernel/irq_64.c b/arch/powerpc/kernel/irq_64.c index eb2b380e52a0..c788c55512ed 100644 --- a/arch/powerpc/kernel/irq_64.c +++ b/arch/powerpc/kernel/irq_64.c @@ -70,22 +70,19 @@ int distribute_irqs = 1; static inline void next_interrupt(struct pt_regs *regs) { - /* - * Softirq processing can enable/disable irqs, which will leave - * MSR[EE] enabled and the soft mask set to IRQS_DISABLED. Fix - * this up. - */ - if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) - hard_irq_disable(); - else - irq_soft_mask_set(IRQS_ALL_DISABLED); + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) { + WARN_ON(!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)); + WARN_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED); + } /* * We are responding to the next interrupt, so interrupt-off * latencies should be reset here. */ + lockdep_hardirq_exit(); trace_hardirqs_on(); trace_hardirqs_off(); + lockdep_hardirq_enter(); } static inline bool irq_happened_test_and_clear(u8 irq) @@ -97,22 +94,11 @@ static inline bool irq_happened_test_and_clear(u8 irq) return false; } -void replay_soft_interrupts(void) +static __no_kcsan void __replay_soft_interrupts(void) { struct pt_regs regs; /* - * Be careful here, calling these interrupt handlers can cause - * softirqs to be raised, which they may run when calling irq_exit, - * which will cause local_irq_enable() to be run, which can then - * recurse into this function. Don't keep any state across - * interrupt handler calls which may change underneath us. - * - * Softirqs can not be disabled over replay to stop this recursion - * because interrupts taken in idle code may require RCU softirq - * to run in the irq RCU tracking context. This is a hard problem - * to fix without changes to the softirq or idle layer. - * * We use local_paca rather than get_paca() to avoid all the * debug_smp_processor_id() business in this low level function. */ @@ -120,13 +106,20 @@ void replay_soft_interrupts(void) if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) { WARN_ON_ONCE(mfmsr() & MSR_EE); WARN_ON(!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)); + WARN_ON(local_paca->irq_happened & PACA_IRQ_REPLAYING); } + /* + * PACA_IRQ_REPLAYING prevents interrupt handlers from enabling + * MSR[EE] to get PMIs, which can result in more IRQs becoming + * pending. + */ + local_paca->irq_happened |= PACA_IRQ_REPLAYING; + ppc_save_regs(®s); regs.softe = IRQS_ENABLED; regs.msr |= MSR_EE; -again: /* * Force the delivery of pending soft-disabled interrupts on PS3. * Any HV call will have this side effect. @@ -175,17 +168,18 @@ again: next_interrupt(®s); } - /* - * Softirq processing can enable and disable interrupts, which can - * result in new irqs becoming pending. Must keep looping until we - * have cleared out all pending interrupts. - */ - if (local_paca->irq_happened & ~PACA_IRQ_HARD_DIS) - goto again; + local_paca->irq_happened &= ~PACA_IRQ_REPLAYING; +} + +__no_kcsan void replay_soft_interrupts(void) +{ + irq_enter(); /* See comment in arch_local_irq_restore */ + __replay_soft_interrupts(); + irq_exit(); } #if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_PPC_KUAP) -static inline void replay_soft_interrupts_irqrestore(void) +static inline __no_kcsan void replay_soft_interrupts_irqrestore(void) { unsigned long kuap_state = get_kuap(); @@ -200,16 +194,16 @@ static inline void replay_soft_interrupts_irqrestore(void) if (kuap_state != AMR_KUAP_BLOCKED) set_kuap(AMR_KUAP_BLOCKED); - replay_soft_interrupts(); + __replay_soft_interrupts(); if (kuap_state != AMR_KUAP_BLOCKED) set_kuap(kuap_state); } #else -#define replay_soft_interrupts_irqrestore() replay_soft_interrupts() +#define replay_soft_interrupts_irqrestore() __replay_soft_interrupts() #endif -notrace void arch_local_irq_restore(unsigned long mask) +notrace __no_kcsan void arch_local_irq_restore(unsigned long mask) { unsigned char irq_happened; @@ -219,9 +213,13 @@ notrace void arch_local_irq_restore(unsigned long mask) return; } - if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) - WARN_ON_ONCE(in_nmi() || in_hardirq()); + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) { + WARN_ON_ONCE(in_nmi()); + WARN_ON_ONCE(in_hardirq()); + WARN_ON_ONCE(local_paca->irq_happened & PACA_IRQ_REPLAYING); + } +again: /* * After the stb, interrupts are unmasked and there are no interrupts * pending replay. The restart sequence makes this atomic with @@ -248,6 +246,12 @@ notrace void arch_local_irq_restore(unsigned long mask) if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) WARN_ON_ONCE(!(mfmsr() & MSR_EE)); + /* + * If we came here from the replay below, we might have a preempt + * pending (due to preempt_enable_no_resched()). Have to check now. + */ + preempt_check_resched(); + return; happened: @@ -261,6 +265,7 @@ happened: irq_soft_mask_set(IRQS_ENABLED); local_paca->irq_happened = 0; __hard_irq_enable(); + preempt_check_resched(); return; } @@ -296,12 +301,38 @@ happened: irq_soft_mask_set(IRQS_ALL_DISABLED); trace_hardirqs_off(); + /* + * Now enter interrupt context. The interrupt handlers themselves + * also call irq_enter/exit (which is okay, they can nest). But call + * it here now to hold off softirqs until the below irq_exit(). If + * we allowed replayed handlers to run softirqs, that enables irqs, + * which must replay interrupts, which recurses in here and makes + * things more complicated. The recursion is limited to 2, and it can + * be made to work, but it's complicated. + * + * local_bh_disable can not be used here because interrupts taken in + * idle are not in the right context (RCU, tick, etc) to run softirqs + * so irq_enter must be called. + */ + irq_enter(); + replay_soft_interrupts_irqrestore(); + irq_exit(); + + if (unlikely(local_paca->irq_happened != PACA_IRQ_HARD_DIS)) { + /* + * The softirq processing in irq_exit() may enable interrupts + * temporarily, which can result in MSR[EE] being enabled and + * more irqs becoming pending. Go around again if that happens. + */ + trace_hardirqs_on(); + preempt_enable_no_resched(); + goto again; + } + trace_hardirqs_on(); irq_soft_mask_set(IRQS_ENABLED); - if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) - WARN_ON(local_paca->irq_happened != PACA_IRQ_HARD_DIS); local_paca->irq_happened = 0; __hard_irq_enable(); preempt_enable(); diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index 6c5d30fba766..219f28637a3e 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -131,6 +131,13 @@ void save_mce_event(struct pt_regs *regs, long handled, if (mce->error_type == MCE_ERROR_TYPE_UE) mce->u.ue_error.ignore_event = mce_err->ignore_event; + /* + * Raise irq work, So that we don't miss to log the error for + * unrecoverable errors. + */ + if (mce->disposition == MCE_DISPOSITION_NOT_RECOVERED) + mce_irq_work_queue(); + if (!addr) return; @@ -233,9 +240,6 @@ static void machine_check_ue_event(struct machine_check_event *evt) } memcpy(&local_paca->mce_info->mce_ue_event_queue[index], evt, sizeof(*evt)); - - /* Queue work to process this event later. */ - mce_irq_work_queue(); } /* diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index ff045644f13f..2ac78d207f77 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -502,9 +502,10 @@ static unsigned long stub_for_addr(const Elf64_Shdr *sechdrs, static int restore_r2(const char *name, u32 *instruction, struct module *me) { u32 *prev_insn = instruction - 1; + u32 insn_val = *instruction; if (is_mprofile_ftrace_call(name)) - return 1; + return 0; /* * Make sure the branch isn't a sibling call. Sibling calls aren't @@ -512,19 +513,25 @@ static int restore_r2(const char *name, u32 *instruction, struct module *me) * restore afterwards. */ if (!instr_is_relative_link_branch(ppc_inst(*prev_insn))) - return 1; + return 0; - if (*instruction != PPC_RAW_NOP()) { - pr_err("%s: Expected nop after call, got %08x at %pS\n", - me->name, *instruction, instruction); + /* + * For livepatch, the restore r2 instruction might have already been + * written previously, if the referenced symbol is in a previously + * unloaded module which is now being loaded again. In that case, skip + * the warning and the instruction write. + */ + if (insn_val == PPC_INST_LD_TOC) return 0; + + if (insn_val != PPC_RAW_NOP()) { + pr_err("%s: Expected nop after call, got %08x at %pS\n", + me->name, insn_val, instruction); + return -ENOEXEC; } /* ld r2,R2_STACK_OFFSET(r1) */ - if (patch_instruction(instruction, ppc_inst(PPC_INST_LD_TOC))) - return 0; - - return 1; + return patch_instruction(instruction, ppc_inst(PPC_INST_LD_TOC)); } int apply_relocate_add(Elf64_Shdr *sechdrs, @@ -648,8 +655,8 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, strtab + sym->st_name); if (!value) return -ENOENT; - if (!restore_r2(strtab + sym->st_name, - (u32 *)location + 1, me)) + if (restore_r2(strtab + sym->st_name, + (u32 *)location + 1, me)) return -ENOEXEC; } else value += local_entry_offset(sym); diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c index 855b59892c5c..ce0c8623e563 100644 --- a/arch/powerpc/kernel/pci_32.c +++ b/arch/powerpc/kernel/pci_32.c @@ -62,7 +62,7 @@ fixup_cpc710_pci64(struct pci_dev* dev) } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CPC710_PCI64, fixup_cpc710_pci64); -#if defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_CHRP) +#ifdef CONFIG_PPC_PCI_OF_BUS_MAP static u8* pci_to_OF_bus_map; static int pci_bus_count; @@ -152,6 +152,7 @@ pcibios_make_OF_bus_map(void) } #endif } +#endif // CONFIG_PPC_PCI_OF_BUS_MAP #ifdef CONFIG_PPC_PMAC @@ -160,7 +161,9 @@ pcibios_make_OF_bus_map(void) */ int pci_device_from_OF_node(struct device_node *node, u8 *bus, u8 *devfn) { +#ifdef CONFIG_PPC_PCI_OF_BUS_MAP struct pci_dev *dev = NULL; +#endif const __be32 *reg; int size; @@ -175,6 +178,9 @@ int pci_device_from_OF_node(struct device_node *node, u8 *bus, u8 *devfn) *bus = (be32_to_cpup(®[0]) >> 16) & 0xff; *devfn = (be32_to_cpup(®[0]) >> 8) & 0xff; +#ifndef CONFIG_PPC_PCI_OF_BUS_MAP + return 0; +#else /* Ok, here we need some tweak. If we have already renumbered * all busses, we can't rely on the OF bus number any more. * the pci_to_OF_bus_map is not enough as several PCI busses @@ -192,11 +198,12 @@ int pci_device_from_OF_node(struct device_node *node, u8 *bus, u8 *devfn) } return -ENODEV; +#endif // CONFIG_PPC_PCI_OF_BUS_MAP } EXPORT_SYMBOL(pci_device_from_OF_node); #endif -#ifdef CONFIG_PPC_CHRP +#ifdef CONFIG_PPC_PCI_OF_BUS_MAP /* We create the "pci-OF-bus-map" property now so it appears in the * /proc device tree */ @@ -221,9 +228,7 @@ pci_create_OF_bus_map(void) of_node_put(dn); } } -#endif - -#endif /* defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_CHRP) */ +#endif // CONFIG_PPC_PCI_OF_BUS_MAP void pcibios_setup_phb_io_space(struct pci_controller *hose) { @@ -273,6 +278,7 @@ static int __init pcibios_init(void) } #if defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_CHRP) +#ifdef CONFIG_PPC_PCI_OF_BUS_MAP pci_bus_count = next_busno; /* OpenFirmware based machines need a map of OF bus @@ -282,6 +288,7 @@ static int __init pcibios_init(void) if (pci_assign_all_buses) pcibios_make_OF_bus_map(); #endif +#endif /* Call common code to handle resource allocation */ pcibios_resource_survey(); diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c index 0c7cfb9fab04..fd42059ae2a5 100644 --- a/arch/powerpc/kernel/pci_64.c +++ b/arch/powerpc/kernel/pci_64.c @@ -132,7 +132,7 @@ void __iomem *ioremap_phb(phys_addr_t paddr, unsigned long size) * address decoding but I'd rather not deal with those outside of the * reserved 64K legacy region. */ - area = __get_vm_area_caller(size, 0, PHB_IO_BASE, PHB_IO_END, + area = __get_vm_area_caller(size, VM_IOREMAP, PHB_IO_BASE, PHB_IO_END, __builtin_return_address(0)); if (!area) return NULL; diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index c22cc234672f..4b29ac5ddac6 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1405,8 +1405,7 @@ static void show_instructions(struct pt_regs *regs) for (i = 0; i < NR_INSN_TO_PRINT; i++) { int instr; - if (!__kernel_text_address(pc) || - get_kernel_nofault(instr, (const void *)pc)) { + if (get_kernel_nofault(instr, (const void *)pc)) { pr_cont("XXXXXXXX "); } else { if (nip == pc) @@ -2118,6 +2117,9 @@ static inline int valid_irq_stack(unsigned long sp, struct task_struct *p, unsigned long stack_page; unsigned long cpu = task_cpu(p); + if (!hardirq_ctx[cpu] || !softirq_ctx[cpu]) + return 0; + stack_page = (unsigned long)hardirq_ctx[cpu]; if (sp >= stack_page && sp <= stack_page + THREAD_SIZE - nbytes) return 1; @@ -2139,6 +2141,14 @@ static inline int valid_emergency_stack(unsigned long sp, struct task_struct *p, if (!paca_ptrs) return 0; + if (!paca_ptrs[cpu]->emergency_sp) + return 0; + +# ifdef CONFIG_PPC_BOOK3S_64 + if (!paca_ptrs[cpu]->nmi_emergency_sp || !paca_ptrs[cpu]->mc_emergency_sp) + return 0; +#endif + stack_page = (unsigned long)paca_ptrs[cpu]->emergency_sp - THREAD_SIZE; if (sp >= stack_page && sp <= stack_page + THREAD_SIZE - nbytes) return 1; diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 4f1c920aa13e..9d9ee4e9e1a1 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -56,6 +56,7 @@ #include <asm/drmem.h> #include <asm/ultravisor.h> #include <asm/prom.h> +#include <asm/plpks.h> #include <mm/mmu_decl.h> @@ -370,8 +371,8 @@ static int __init early_init_dt_scan_cpus(unsigned long node, be32_to_cpu(intserv[found_thread])); boot_cpuid = found; - // Pass the boot CPU's hard CPU id back to our caller - *((u32 *)data) = be32_to_cpu(intserv[found_thread]); + if (IS_ENABLED(CONFIG_PPC64)) + boot_cpu_hwid = be32_to_cpu(intserv[found_thread]); /* * PAPR defines "logical" PVR values for cpus that @@ -755,7 +756,6 @@ static inline void save_fscr_to_task(void) {} void __init early_init_devtree(void *params) { - u32 boot_cpu_hwid; phys_addr_t limit; DBG(" -> early_init_devtree(%px)\n", params); @@ -851,7 +851,7 @@ void __init early_init_devtree(void *params) /* Retrieve CPU related informations from the flat tree * (altivec support, boot CPU ID, ...) */ - of_scan_flat_dt(early_init_dt_scan_cpus, &boot_cpu_hwid); + of_scan_flat_dt(early_init_dt_scan_cpus, NULL); if (boot_cpuid < 0) { printk("Failed to identify boot CPU !\n"); BUG(); @@ -868,11 +868,6 @@ void __init early_init_devtree(void *params) mmu_early_init_devtree(); - // NB. paca is not installed until later in early_setup() - allocate_paca_ptrs(); - allocate_paca(boot_cpuid); - set_hard_smp_processor_id(boot_cpuid, boot_cpu_hwid); - #ifdef CONFIG_PPC_POWERNV /* Scan and build the list of machine check recoverable ranges */ of_scan_flat_dt(early_init_dt_scan_recoverable_ranges, NULL); @@ -893,6 +888,9 @@ void __init early_init_devtree(void *params) powerpc_firmware_features |= FW_FEATURE_PS3_POSSIBLE; #endif + /* If kexec left a PLPKS password in the DT, get it and clear it */ + plpks_early_init_devtree(); + tm_init(); DBG(" <- early_init_devtree()\n"); diff --git a/arch/powerpc/kernel/prom_init_check.sh b/arch/powerpc/kernel/prom_init_check.sh index 311890d71c4c..5a319863f289 100644 --- a/arch/powerpc/kernel/prom_init_check.sh +++ b/arch/powerpc/kernel/prom_init_check.sh @@ -51,11 +51,10 @@ do # a leading . on the name, so strip it off here. UNDEF="${UNDEF#.}" - if [ $KBUILD_VERBOSE ]; then - if [ $KBUILD_VERBOSE -ne 0 ]; then - echo "Checking prom_init.o symbol '$UNDEF'" - fi - fi + case "$KBUILD_VERBOSE" in + *1*) + echo "Checking prom_init.o symbol '$UNDEF'" ;; + esac OK=0 for WHITE in $WHITELIST diff --git a/arch/powerpc/kernel/rtas-proc.c b/arch/powerpc/kernel/rtas-proc.c index 081b2b741a8c..9454b8395b6a 100644 --- a/arch/powerpc/kernel/rtas-proc.c +++ b/arch/powerpc/kernel/rtas-proc.c @@ -287,9 +287,9 @@ static ssize_t ppc_rtas_poweron_write(struct file *file, rtc_time64_to_tm(nowtime, &tm); - error = rtas_call(rtas_token("set-time-for-power-on"), 7, 1, NULL, - tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, - tm.tm_hour, tm.tm_min, tm.tm_sec, 0 /* nano */); + error = rtas_call(rtas_function_token(RTAS_FN_SET_TIME_FOR_POWER_ON), 7, 1, NULL, + tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, + tm.tm_hour, tm.tm_min, tm.tm_sec, 0 /* nano */); if (error) printk(KERN_WARNING "error: setting poweron time returned: %s\n", ppc_rtas_process_error(error)); @@ -350,9 +350,9 @@ static ssize_t ppc_rtas_clock_write(struct file *file, return error; rtc_time64_to_tm(nowtime, &tm); - error = rtas_call(rtas_token("set-time-of-day"), 7, 1, NULL, - tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, - tm.tm_hour, tm.tm_min, tm.tm_sec, 0); + error = rtas_call(rtas_function_token(RTAS_FN_SET_TIME_OF_DAY), 7, 1, NULL, + tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, + tm.tm_hour, tm.tm_min, tm.tm_sec, 0); if (error) printk(KERN_WARNING "error: setting the clock returned: %s\n", ppc_rtas_process_error(error)); @@ -362,7 +362,7 @@ static ssize_t ppc_rtas_clock_write(struct file *file, static int ppc_rtas_clock_show(struct seq_file *m, void *v) { int ret[8]; - int error = rtas_call(rtas_token("get-time-of-day"), 0, 8, ret); + int error = rtas_call(rtas_function_token(RTAS_FN_GET_TIME_OF_DAY), 0, 8, ret); if (error) { printk(KERN_WARNING "error: reading the clock returned: %s\n", @@ -385,7 +385,7 @@ static int ppc_rtas_sensors_show(struct seq_file *m, void *v) { int i,j; int state, error; - int get_sensor_state = rtas_token("get-sensor-state"); + int get_sensor_state = rtas_function_token(RTAS_FN_GET_SENSOR_STATE); seq_printf(m, "RTAS (RunTime Abstraction Services) Sensor Information\n"); seq_printf(m, "Sensor\t\tValue\t\tCondition\tLocation\n"); @@ -708,8 +708,8 @@ static ssize_t ppc_rtas_tone_freq_write(struct file *file, return error; rtas_tone_frequency = freq; /* save it for later */ - error = rtas_call(rtas_token("set-indicator"), 3, 1, NULL, - TONE_FREQUENCY, 0, freq); + error = rtas_call(rtas_function_token(RTAS_FN_SET_INDICATOR), 3, 1, NULL, + TONE_FREQUENCY, 0, freq); if (error) printk(KERN_WARNING "error: setting tone frequency returned: %s\n", ppc_rtas_process_error(error)); @@ -736,8 +736,8 @@ static ssize_t ppc_rtas_tone_volume_write(struct file *file, volume = 100; rtas_tone_volume = volume; /* save it for later */ - error = rtas_call(rtas_token("set-indicator"), 3, 1, NULL, - TONE_VOLUME, 0, volume); + error = rtas_call(rtas_function_token(RTAS_FN_SET_INDICATOR), 3, 1, NULL, + TONE_VOLUME, 0, volume); if (error) printk(KERN_WARNING "error: setting tone volume returned: %s\n", ppc_rtas_process_error(error)); diff --git a/arch/powerpc/kernel/rtas-rtc.c b/arch/powerpc/kernel/rtas-rtc.c index 5a31d1829bca..6996214532bd 100644 --- a/arch/powerpc/kernel/rtas-rtc.c +++ b/arch/powerpc/kernel/rtas-rtc.c @@ -21,7 +21,7 @@ time64_t __init rtas_get_boot_time(void) max_wait_tb = get_tb() + tb_ticks_per_usec * 1000 * MAX_RTC_WAIT; do { - error = rtas_call(rtas_token("get-time-of-day"), 0, 8, ret); + error = rtas_call(rtas_function_token(RTAS_FN_GET_TIME_OF_DAY), 0, 8, ret); wait_time = rtas_busy_delay_time(error); if (wait_time) { @@ -53,7 +53,7 @@ void rtas_get_rtc_time(struct rtc_time *rtc_tm) max_wait_tb = get_tb() + tb_ticks_per_usec * 1000 * MAX_RTC_WAIT; do { - error = rtas_call(rtas_token("get-time-of-day"), 0, 8, ret); + error = rtas_call(rtas_function_token(RTAS_FN_GET_TIME_OF_DAY), 0, 8, ret); wait_time = rtas_busy_delay_time(error); if (wait_time) { @@ -90,7 +90,7 @@ int rtas_set_rtc_time(struct rtc_time *tm) max_wait_tb = get_tb() + tb_ticks_per_usec * 1000 * MAX_RTC_WAIT; do { - error = rtas_call(rtas_token("set-time-of-day"), 7, 1, NULL, + error = rtas_call(rtas_function_token(RTAS_FN_SET_TIME_OF_DAY), 7, 1, NULL, tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday, tm->tm_hour, tm->tm_min, tm->tm_sec, 0); diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index deded51a7978..31175b34856a 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -9,10 +9,12 @@ #define pr_fmt(fmt) "rtas: " fmt +#include <linux/bsearch.h> #include <linux/capability.h> #include <linux/delay.h> #include <linux/export.h> #include <linux/init.h> +#include <linux/kconfig.h> #include <linux/kernel.h> #include <linux/memblock.h> #include <linux/of.h> @@ -26,6 +28,7 @@ #include <linux/syscalls.h> #include <linux/types.h> #include <linux/uaccess.h> +#include <linux/xarray.h> #include <asm/delay.h> #include <asm/firmware.h> @@ -33,43 +36,604 @@ #include <asm/machdep.h> #include <asm/mmu.h> #include <asm/page.h> +#include <asm/rtas-work-area.h> #include <asm/rtas.h> #include <asm/time.h> +#include <asm/trace.h> #include <asm/udbg.h> +struct rtas_filter { + /* Indexes into the args buffer, -1 if not used */ + const int buf_idx1; + const int size_idx1; + const int buf_idx2; + const int size_idx2; + /* + * Assumed buffer size per the spec if the function does not + * have a size parameter, e.g. ibm,errinjct. 0 if unused. + */ + const int fixed_size; +}; + +/** + * struct rtas_function - Descriptor for RTAS functions. + * + * @token: Value of @name if it exists under the /rtas node. + * @name: Function name. + * @filter: If non-NULL, invoking this function via the rtas syscall is + * generally allowed, and @filter describes constraints on the + * arguments. See also @banned_for_syscall_on_le. + * @banned_for_syscall_on_le: Set when call via sys_rtas is generally allowed + * but specifically restricted on ppc64le. Such + * functions are believed to have no users on + * ppc64le, and we want to keep it that way. It does + * not make sense for this to be set when @filter + * is false. + */ +struct rtas_function { + s32 token; + const bool banned_for_syscall_on_le:1; + const char * const name; + const struct rtas_filter *filter; +}; + +static struct rtas_function rtas_function_table[] __ro_after_init = { + [RTAS_FNIDX__CHECK_EXCEPTION] = { + .name = "check-exception", + }, + [RTAS_FNIDX__DISPLAY_CHARACTER] = { + .name = "display-character", + .filter = &(const struct rtas_filter) { + .buf_idx1 = -1, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__EVENT_SCAN] = { + .name = "event-scan", + }, + [RTAS_FNIDX__FREEZE_TIME_BASE] = { + .name = "freeze-time-base", + }, + [RTAS_FNIDX__GET_POWER_LEVEL] = { + .name = "get-power-level", + .filter = &(const struct rtas_filter) { + .buf_idx1 = -1, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__GET_SENSOR_STATE] = { + .name = "get-sensor-state", + .filter = &(const struct rtas_filter) { + .buf_idx1 = -1, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__GET_TERM_CHAR] = { + .name = "get-term-char", + }, + [RTAS_FNIDX__GET_TIME_OF_DAY] = { + .name = "get-time-of-day", + .filter = &(const struct rtas_filter) { + .buf_idx1 = -1, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__IBM_ACTIVATE_FIRMWARE] = { + .name = "ibm,activate-firmware", + .filter = &(const struct rtas_filter) { + .buf_idx1 = -1, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__IBM_CBE_START_PTCAL] = { + .name = "ibm,cbe-start-ptcal", + }, + [RTAS_FNIDX__IBM_CBE_STOP_PTCAL] = { + .name = "ibm,cbe-stop-ptcal", + }, + [RTAS_FNIDX__IBM_CHANGE_MSI] = { + .name = "ibm,change-msi", + }, + [RTAS_FNIDX__IBM_CLOSE_ERRINJCT] = { + .name = "ibm,close-errinjct", + .filter = &(const struct rtas_filter) { + .buf_idx1 = -1, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__IBM_CONFIGURE_BRIDGE] = { + .name = "ibm,configure-bridge", + }, + [RTAS_FNIDX__IBM_CONFIGURE_CONNECTOR] = { + .name = "ibm,configure-connector", + .filter = &(const struct rtas_filter) { + .buf_idx1 = 0, .size_idx1 = -1, + .buf_idx2 = 1, .size_idx2 = -1, + .fixed_size = 4096, + }, + }, + [RTAS_FNIDX__IBM_CONFIGURE_KERNEL_DUMP] = { + .name = "ibm,configure-kernel-dump", + }, + [RTAS_FNIDX__IBM_CONFIGURE_PE] = { + .name = "ibm,configure-pe", + }, + [RTAS_FNIDX__IBM_CREATE_PE_DMA_WINDOW] = { + .name = "ibm,create-pe-dma-window", + }, + [RTAS_FNIDX__IBM_DISPLAY_MESSAGE] = { + .name = "ibm,display-message", + .filter = &(const struct rtas_filter) { + .buf_idx1 = 0, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__IBM_ERRINJCT] = { + .name = "ibm,errinjct", + .filter = &(const struct rtas_filter) { + .buf_idx1 = 2, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + .fixed_size = 1024, + }, + }, + [RTAS_FNIDX__IBM_EXTI2C] = { + .name = "ibm,exti2c", + }, + [RTAS_FNIDX__IBM_GET_CONFIG_ADDR_INFO] = { + .name = "ibm,get-config-addr-info", + }, + [RTAS_FNIDX__IBM_GET_CONFIG_ADDR_INFO2] = { + .name = "ibm,get-config-addr-info2", + .filter = &(const struct rtas_filter) { + .buf_idx1 = -1, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__IBM_GET_DYNAMIC_SENSOR_STATE] = { + .name = "ibm,get-dynamic-sensor-state", + .filter = &(const struct rtas_filter) { + .buf_idx1 = 1, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__IBM_GET_INDICES] = { + .name = "ibm,get-indices", + .filter = &(const struct rtas_filter) { + .buf_idx1 = 2, .size_idx1 = 3, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__IBM_GET_RIO_TOPOLOGY] = { + .name = "ibm,get-rio-topology", + }, + [RTAS_FNIDX__IBM_GET_SYSTEM_PARAMETER] = { + .name = "ibm,get-system-parameter", + .filter = &(const struct rtas_filter) { + .buf_idx1 = 1, .size_idx1 = 2, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__IBM_GET_VPD] = { + .name = "ibm,get-vpd", + .filter = &(const struct rtas_filter) { + .buf_idx1 = 0, .size_idx1 = -1, + .buf_idx2 = 1, .size_idx2 = 2, + }, + }, + [RTAS_FNIDX__IBM_GET_XIVE] = { + .name = "ibm,get-xive", + }, + [RTAS_FNIDX__IBM_INT_OFF] = { + .name = "ibm,int-off", + }, + [RTAS_FNIDX__IBM_INT_ON] = { + .name = "ibm,int-on", + }, + [RTAS_FNIDX__IBM_IO_QUIESCE_ACK] = { + .name = "ibm,io-quiesce-ack", + }, + [RTAS_FNIDX__IBM_LPAR_PERFTOOLS] = { + .name = "ibm,lpar-perftools", + .filter = &(const struct rtas_filter) { + .buf_idx1 = 2, .size_idx1 = 3, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__IBM_MANAGE_FLASH_IMAGE] = { + .name = "ibm,manage-flash-image", + }, + [RTAS_FNIDX__IBM_MANAGE_STORAGE_PRESERVATION] = { + .name = "ibm,manage-storage-preservation", + }, + [RTAS_FNIDX__IBM_NMI_INTERLOCK] = { + .name = "ibm,nmi-interlock", + }, + [RTAS_FNIDX__IBM_NMI_REGISTER] = { + .name = "ibm,nmi-register", + }, + [RTAS_FNIDX__IBM_OPEN_ERRINJCT] = { + .name = "ibm,open-errinjct", + .filter = &(const struct rtas_filter) { + .buf_idx1 = -1, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__IBM_OPEN_SRIOV_ALLOW_UNFREEZE] = { + .name = "ibm,open-sriov-allow-unfreeze", + }, + [RTAS_FNIDX__IBM_OPEN_SRIOV_MAP_PE_NUMBER] = { + .name = "ibm,open-sriov-map-pe-number", + }, + [RTAS_FNIDX__IBM_OS_TERM] = { + .name = "ibm,os-term", + }, + [RTAS_FNIDX__IBM_PARTNER_CONTROL] = { + .name = "ibm,partner-control", + }, + [RTAS_FNIDX__IBM_PHYSICAL_ATTESTATION] = { + .name = "ibm,physical-attestation", + .filter = &(const struct rtas_filter) { + .buf_idx1 = 0, .size_idx1 = 1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__IBM_PLATFORM_DUMP] = { + .name = "ibm,platform-dump", + .filter = &(const struct rtas_filter) { + .buf_idx1 = 4, .size_idx1 = 5, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__IBM_POWER_OFF_UPS] = { + .name = "ibm,power-off-ups", + }, + [RTAS_FNIDX__IBM_QUERY_INTERRUPT_SOURCE_NUMBER] = { + .name = "ibm,query-interrupt-source-number", + }, + [RTAS_FNIDX__IBM_QUERY_PE_DMA_WINDOW] = { + .name = "ibm,query-pe-dma-window", + }, + [RTAS_FNIDX__IBM_READ_PCI_CONFIG] = { + .name = "ibm,read-pci-config", + }, + [RTAS_FNIDX__IBM_READ_SLOT_RESET_STATE] = { + .name = "ibm,read-slot-reset-state", + .filter = &(const struct rtas_filter) { + .buf_idx1 = -1, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__IBM_READ_SLOT_RESET_STATE2] = { + .name = "ibm,read-slot-reset-state2", + }, + [RTAS_FNIDX__IBM_REMOVE_PE_DMA_WINDOW] = { + .name = "ibm,remove-pe-dma-window", + }, + [RTAS_FNIDX__IBM_RESET_PE_DMA_WINDOWS] = { + .name = "ibm,reset-pe-dma-windows", + }, + [RTAS_FNIDX__IBM_SCAN_LOG_DUMP] = { + .name = "ibm,scan-log-dump", + .filter = &(const struct rtas_filter) { + .buf_idx1 = 0, .size_idx1 = 1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__IBM_SET_DYNAMIC_INDICATOR] = { + .name = "ibm,set-dynamic-indicator", + .filter = &(const struct rtas_filter) { + .buf_idx1 = 2, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__IBM_SET_EEH_OPTION] = { + .name = "ibm,set-eeh-option", + .filter = &(const struct rtas_filter) { + .buf_idx1 = -1, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__IBM_SET_SLOT_RESET] = { + .name = "ibm,set-slot-reset", + }, + [RTAS_FNIDX__IBM_SET_SYSTEM_PARAMETER] = { + .name = "ibm,set-system-parameter", + .filter = &(const struct rtas_filter) { + .buf_idx1 = 1, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__IBM_SET_XIVE] = { + .name = "ibm,set-xive", + }, + [RTAS_FNIDX__IBM_SLOT_ERROR_DETAIL] = { + .name = "ibm,slot-error-detail", + }, + [RTAS_FNIDX__IBM_SUSPEND_ME] = { + .name = "ibm,suspend-me", + .banned_for_syscall_on_le = true, + .filter = &(const struct rtas_filter) { + .buf_idx1 = -1, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__IBM_TUNE_DMA_PARMS] = { + .name = "ibm,tune-dma-parms", + }, + [RTAS_FNIDX__IBM_UPDATE_FLASH_64_AND_REBOOT] = { + .name = "ibm,update-flash-64-and-reboot", + }, + [RTAS_FNIDX__IBM_UPDATE_NODES] = { + .name = "ibm,update-nodes", + .banned_for_syscall_on_le = true, + .filter = &(const struct rtas_filter) { + .buf_idx1 = 0, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + .fixed_size = 4096, + }, + }, + [RTAS_FNIDX__IBM_UPDATE_PROPERTIES] = { + .name = "ibm,update-properties", + .banned_for_syscall_on_le = true, + .filter = &(const struct rtas_filter) { + .buf_idx1 = 0, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + .fixed_size = 4096, + }, + }, + [RTAS_FNIDX__IBM_VALIDATE_FLASH_IMAGE] = { + .name = "ibm,validate-flash-image", + }, + [RTAS_FNIDX__IBM_WRITE_PCI_CONFIG] = { + .name = "ibm,write-pci-config", + }, + [RTAS_FNIDX__NVRAM_FETCH] = { + .name = "nvram-fetch", + }, + [RTAS_FNIDX__NVRAM_STORE] = { + .name = "nvram-store", + }, + [RTAS_FNIDX__POWER_OFF] = { + .name = "power-off", + }, + [RTAS_FNIDX__PUT_TERM_CHAR] = { + .name = "put-term-char", + }, + [RTAS_FNIDX__QUERY_CPU_STOPPED_STATE] = { + .name = "query-cpu-stopped-state", + }, + [RTAS_FNIDX__READ_PCI_CONFIG] = { + .name = "read-pci-config", + }, + [RTAS_FNIDX__RTAS_LAST_ERROR] = { + .name = "rtas-last-error", + }, + [RTAS_FNIDX__SET_INDICATOR] = { + .name = "set-indicator", + .filter = &(const struct rtas_filter) { + .buf_idx1 = -1, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__SET_POWER_LEVEL] = { + .name = "set-power-level", + .filter = &(const struct rtas_filter) { + .buf_idx1 = -1, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__SET_TIME_FOR_POWER_ON] = { + .name = "set-time-for-power-on", + .filter = &(const struct rtas_filter) { + .buf_idx1 = -1, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__SET_TIME_OF_DAY] = { + .name = "set-time-of-day", + .filter = &(const struct rtas_filter) { + .buf_idx1 = -1, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__START_CPU] = { + .name = "start-cpu", + }, + [RTAS_FNIDX__STOP_SELF] = { + .name = "stop-self", + }, + [RTAS_FNIDX__SYSTEM_REBOOT] = { + .name = "system-reboot", + }, + [RTAS_FNIDX__THAW_TIME_BASE] = { + .name = "thaw-time-base", + }, + [RTAS_FNIDX__WRITE_PCI_CONFIG] = { + .name = "write-pci-config", + }, +}; + +/** + * rtas_function_token() - RTAS function token lookup. + * @handle: Function handle, e.g. RTAS_FN_EVENT_SCAN. + * + * Context: Any context. + * Return: the token value for the function if implemented by this platform, + * otherwise RTAS_UNKNOWN_SERVICE. + */ +s32 rtas_function_token(const rtas_fn_handle_t handle) +{ + const size_t index = handle.index; + const bool out_of_bounds = index >= ARRAY_SIZE(rtas_function_table); + + if (WARN_ONCE(out_of_bounds, "invalid function index %zu", index)) + return RTAS_UNKNOWN_SERVICE; + /* + * Various drivers attempt token lookups on non-RTAS + * platforms. + */ + if (!rtas.dev) + return RTAS_UNKNOWN_SERVICE; + + return rtas_function_table[index].token; +} +EXPORT_SYMBOL_GPL(rtas_function_token); + +static int rtas_function_cmp(const void *a, const void *b) +{ + const struct rtas_function *f1 = a; + const struct rtas_function *f2 = b; + + return strcmp(f1->name, f2->name); +} + +/* + * Boot-time initialization of the function table needs the lookup to + * return a non-const-qualified object. Use rtas_name_to_function() + * in all other contexts. + */ +static struct rtas_function *__rtas_name_to_function(const char *name) +{ + const struct rtas_function key = { + .name = name, + }; + struct rtas_function *found; + + found = bsearch(&key, rtas_function_table, ARRAY_SIZE(rtas_function_table), + sizeof(rtas_function_table[0]), rtas_function_cmp); + + return found; +} + +static const struct rtas_function *rtas_name_to_function(const char *name) +{ + return __rtas_name_to_function(name); +} + +static DEFINE_XARRAY(rtas_token_to_function_xarray); + +static int __init rtas_token_to_function_xarray_init(void) +{ + int err = 0; + + for (size_t i = 0; i < ARRAY_SIZE(rtas_function_table); ++i) { + const struct rtas_function *func = &rtas_function_table[i]; + const s32 token = func->token; + + if (token == RTAS_UNKNOWN_SERVICE) + continue; + + err = xa_err(xa_store(&rtas_token_to_function_xarray, + token, (void *)func, GFP_KERNEL)); + if (err) + break; + } + + return err; +} +arch_initcall(rtas_token_to_function_xarray_init); + +static const struct rtas_function *rtas_token_to_function(s32 token) +{ + const struct rtas_function *func; + + if (WARN_ONCE(token < 0, "invalid token %d", token)) + return NULL; + + func = xa_load(&rtas_token_to_function_xarray, token); + + if (WARN_ONCE(!func, "unexpected failed lookup for token %d", token)) + return NULL; + + return func; +} + /* This is here deliberately so it's only used in this file */ void enter_rtas(unsigned long); -static inline void do_enter_rtas(unsigned long args) +static void __do_enter_rtas(struct rtas_args *args) +{ + enter_rtas(__pa(args)); + srr_regs_clobbered(); /* rtas uses SRRs, invalidate */ +} + +static void __do_enter_rtas_trace(struct rtas_args *args) { - unsigned long msr; + const char *name = NULL; + /* + * If the tracepoints that consume the function name aren't + * active, avoid the lookup. + */ + if ((trace_rtas_input_enabled() || trace_rtas_output_enabled())) { + const s32 token = be32_to_cpu(args->token); + const struct rtas_function *func = rtas_token_to_function(token); + + name = func->name; + } + + trace_rtas_input(args, name); + trace_rtas_ll_entry(args); + + __do_enter_rtas(args); + + trace_rtas_ll_exit(args); + trace_rtas_output(args, name); +} +static void do_enter_rtas(struct rtas_args *args) +{ + const unsigned long msr = mfmsr(); + /* + * Situations where we want to skip any active tracepoints for + * safety reasons: + * + * 1. The last code executed on an offline CPU as it stops, + * i.e. we're about to call stop-self. The tracepoints' + * function name lookup uses xarray, which uses RCU, which + * isn't valid to call on an offline CPU. Any events + * emitted on an offline CPU will be discarded anyway. + * + * 2. In real mode, as when invoking ibm,nmi-interlock from + * the pseries MCE handler. We cannot count on trace + * buffers or the entries in rtas_token_to_function_xarray + * to be contained in the RMO. + */ + const unsigned long mask = MSR_IR | MSR_DR; + const bool can_trace = likely(cpu_online(raw_smp_processor_id()) && + (msr & mask) == mask); /* * Make sure MSR[RI] is currently enabled as it will be forced later * in enter_rtas. */ - msr = mfmsr(); BUG_ON(!(msr & MSR_RI)); BUG_ON(!irqs_disabled()); hard_irq_disable(); /* Ensure MSR[EE] is disabled on PPC64 */ - enter_rtas(args); - - srr_regs_clobbered(); /* rtas uses SRRs, invalidate */ + if (can_trace) + __do_enter_rtas_trace(args); + else + __do_enter_rtas(args); } -struct rtas_t rtas = { - .lock = __ARCH_SPIN_LOCK_UNLOCKED -}; -EXPORT_SYMBOL(rtas); +struct rtas_t rtas; + +/* + * Nearly all RTAS calls need to be serialized. All uses of the + * default rtas_args block must hold rtas_lock. + * + * Exceptions to the RTAS serialization requirement (e.g. stop-self) + * must use a separate rtas_args structure. + */ +static DEFINE_RAW_SPINLOCK(rtas_lock); +static struct rtas_args rtas_args; DEFINE_SPINLOCK(rtas_data_buf_lock); -EXPORT_SYMBOL(rtas_data_buf_lock); +EXPORT_SYMBOL_GPL(rtas_data_buf_lock); -char rtas_data_buf[RTAS_DATA_BUF_SIZE] __cacheline_aligned; -EXPORT_SYMBOL(rtas_data_buf); +char rtas_data_buf[RTAS_DATA_BUF_SIZE] __aligned(SZ_4K); +EXPORT_SYMBOL_GPL(rtas_data_buf); unsigned long rtas_rmo_buf; @@ -78,29 +642,7 @@ unsigned long rtas_rmo_buf; * This is done like this so rtas_flash can be a module. */ void (*rtas_flash_term_hook)(int); -EXPORT_SYMBOL(rtas_flash_term_hook); - -/* RTAS use home made raw locking instead of spin_lock_irqsave - * because those can be called from within really nasty contexts - * such as having the timebase stopped which would lockup with - * normal locks and spinlock debugging enabled - */ -static unsigned long lock_rtas(void) -{ - unsigned long flags; - - local_irq_save(flags); - preempt_disable(); - arch_spin_lock(&rtas.lock); - return flags; -} - -static void unlock_rtas(unsigned long flags) -{ - arch_spin_unlock(&rtas.lock); - local_irq_restore(flags); - preempt_enable(); -} +EXPORT_SYMBOL_GPL(rtas_flash_term_hook); /* * call_rtas_display_status and call_rtas_display_status_delay @@ -109,14 +651,14 @@ static void unlock_rtas(unsigned long flags) */ static void call_rtas_display_status(unsigned char c) { - unsigned long s; + unsigned long flags; if (!rtas.base) return; - s = lock_rtas(); - rtas_call_unlocked(&rtas.args, 10, 1, 1, NULL, c); - unlock_rtas(s); + raw_spin_lock_irqsave(&rtas_lock, flags); + rtas_call_unlocked(&rtas_args, 10, 1, 1, NULL, c); + raw_spin_unlock_irqrestore(&rtas_lock, flags); } static void call_rtas_display_status_delay(char c) @@ -240,8 +782,8 @@ void rtas_progress(char *s, unsigned short hex) "ibm,display-truncation-length", NULL); of_node_put(root); } - display_character = rtas_token("display-character"); - set_indicator = rtas_token("set-indicator"); + display_character = rtas_function_token(RTAS_FN_DISPLAY_CHARACTER); + set_indicator = rtas_function_token(RTAS_FN_SET_INDICATOR); } if (display_character == RTAS_UNKNOWN_SERVICE) { @@ -326,23 +868,38 @@ void rtas_progress(char *s, unsigned short hex) spin_unlock(&progress_lock); } -EXPORT_SYMBOL(rtas_progress); /* needed by rtas_flash module */ +EXPORT_SYMBOL_GPL(rtas_progress); /* needed by rtas_flash module */ int rtas_token(const char *service) { + const struct rtas_function *func; const __be32 *tokp; + if (rtas.dev == NULL) return RTAS_UNKNOWN_SERVICE; + + func = rtas_name_to_function(service); + if (func) + return func->token; + /* + * The caller is looking up a name that is not known to be an + * RTAS function. Either it's a function that needs to be + * added to the table, or they're misusing rtas_token() to + * access non-function properties of the /rtas node. Warn and + * fall back to the legacy behavior. + */ + WARN_ONCE(1, "unknown function `%s`, should it be added to rtas_function_table?\n", + service); + tokp = of_get_property(rtas.dev, service, NULL); return tokp ? be32_to_cpu(*tokp) : RTAS_UNKNOWN_SERVICE; } -EXPORT_SYMBOL(rtas_token); +EXPORT_SYMBOL_GPL(rtas_token); int rtas_service_present(const char *service) { return rtas_token(service) != RTAS_UNKNOWN_SERVICE; } -EXPORT_SYMBOL(rtas_service_present); #ifdef CONFIG_RTAS_ERROR_LOGGING @@ -357,7 +914,6 @@ int rtas_get_error_log_max(void) { return rtas_error_log_max; } -EXPORT_SYMBOL(rtas_get_error_log_max); static void __init init_error_log_max(void) { @@ -381,39 +937,39 @@ static void __init init_error_log_max(void) static char rtas_err_buf[RTAS_ERROR_LOG_MAX]; -static int rtas_last_error_token; /** Return a copy of the detailed error text associated with the * most recent failed call to rtas. Because the error text * might go stale if there are any other intervening rtas calls, * this routine must be called atomically with whatever produced - * the error (i.e. with rtas.lock still held from the previous call). + * the error (i.e. with rtas_lock still held from the previous call). */ static char *__fetch_rtas_last_error(char *altbuf) { + const s32 token = rtas_function_token(RTAS_FN_RTAS_LAST_ERROR); struct rtas_args err_args, save_args; u32 bufsz; char *buf = NULL; - if (rtas_last_error_token == -1) + if (token == -1) return NULL; bufsz = rtas_get_error_log_max(); - err_args.token = cpu_to_be32(rtas_last_error_token); + err_args.token = cpu_to_be32(token); err_args.nargs = cpu_to_be32(2); err_args.nret = cpu_to_be32(1); err_args.args[0] = cpu_to_be32(__pa(rtas_err_buf)); err_args.args[1] = cpu_to_be32(bufsz); err_args.args[2] = 0; - save_args = rtas.args; - rtas.args = err_args; + save_args = rtas_args; + rtas_args = err_args; - do_enter_rtas(__pa(&rtas.args)); + do_enter_rtas(&rtas_args); - err_args = rtas.args; - rtas.args = save_args; + err_args = rtas_args; + rtas_args = save_args; /* Log the error in the unlikely case that there was one. */ if (unlikely(err_args.args[2] == 0)) { @@ -457,7 +1013,7 @@ va_rtas_call_unlocked(struct rtas_args *args, int token, int nargs, int nret, for (i = 0; i < nret; ++i) args->rets[i] = 0; - do_enter_rtas(__pa(args)); + do_enter_rtas(args); } void rtas_call_unlocked(struct rtas_args *args, int token, int nargs, int nret, ...) @@ -469,8 +1025,11 @@ void rtas_call_unlocked(struct rtas_args *args, int token, int nargs, int nret, va_end(list); } -static int ibm_open_errinjct_token; -static int ibm_errinjct_token; +static bool token_is_restricted_errinjct(s32 token) +{ + return token == rtas_function_token(RTAS_FN_IBM_OPEN_ERRINJCT) || + token == rtas_function_token(RTAS_FN_IBM_ERRINJCT); +} /** * rtas_call() - Invoke an RTAS firmware function. @@ -481,7 +1040,7 @@ static int ibm_errinjct_token; * @....: List of @nargs input parameters. * * Invokes the RTAS function indicated by @token, which the caller - * should obtain via rtas_token(). + * should obtain via rtas_function_token(). * * The @nargs and @nret arguments must match the number of input and * output parameters specified for the RTAS function. @@ -534,15 +1093,15 @@ int rtas_call(int token, int nargs, int nret, int *outputs, ...) { va_list list; int i; - unsigned long s; - struct rtas_args *rtas_args; + unsigned long flags; + struct rtas_args *args; char *buff_copy = NULL; int ret; if (!rtas.entry || token == RTAS_UNKNOWN_SERVICE) return -1; - if (token == ibm_open_errinjct_token || token == ibm_errinjct_token) { + if (token_is_restricted_errinjct(token)) { /* * It would be nicer to not discard the error value * from security_locked_down(), but callers expect an @@ -557,26 +1116,25 @@ int rtas_call(int token, int nargs, int nret, int *outputs, ...) return -1; } - s = lock_rtas(); - + raw_spin_lock_irqsave(&rtas_lock, flags); /* We use the global rtas args buffer */ - rtas_args = &rtas.args; + args = &rtas_args; va_start(list, outputs); - va_rtas_call_unlocked(rtas_args, token, nargs, nret, list); + va_rtas_call_unlocked(args, token, nargs, nret, list); va_end(list); /* A -1 return code indicates that the last command couldn't be completed due to a hardware error. */ - if (be32_to_cpu(rtas_args->rets[0]) == -1) + if (be32_to_cpu(args->rets[0]) == -1) buff_copy = __fetch_rtas_last_error(NULL); if (nret > 1 && outputs != NULL) for (i = 0; i < nret-1; ++i) - outputs[i] = be32_to_cpu(rtas_args->rets[i+1]); - ret = (nret > 0)? be32_to_cpu(rtas_args->rets[0]): 0; + outputs[i] = be32_to_cpu(args->rets[i + 1]); + ret = (nret > 0) ? be32_to_cpu(args->rets[0]) : 0; - unlock_rtas(s); + raw_spin_unlock_irqrestore(&rtas_lock, flags); if (buff_copy) { log_error(buff_copy, ERR_TYPE_RTAS_LOG, 0); @@ -585,7 +1143,7 @@ int rtas_call(int token, int nargs, int nret, int *outputs, ...) } return ret; } -EXPORT_SYMBOL(rtas_call); +EXPORT_SYMBOL_GPL(rtas_call); /** * rtas_busy_delay_time() - From an RTAS status value, calculate the @@ -623,7 +1181,47 @@ unsigned int rtas_busy_delay_time(int status) return ms; } -EXPORT_SYMBOL(rtas_busy_delay_time); + +/* + * Early boot fallback for rtas_busy_delay(). + */ +static bool __init rtas_busy_delay_early(int status) +{ + static size_t successive_ext_delays __initdata; + bool retry; + + switch (status) { + case RTAS_EXTENDED_DELAY_MIN...RTAS_EXTENDED_DELAY_MAX: + /* + * In the unlikely case that we receive an extended + * delay status in early boot, the OS is probably not + * the cause, and there's nothing we can do to clear + * the condition. Best we can do is delay for a bit + * and hope it's transient. Lie to the caller if it + * seems like we're stuck in a retry loop. + */ + mdelay(1); + retry = true; + successive_ext_delays += 1; + if (successive_ext_delays > 1000) { + pr_err("too many extended delays, giving up\n"); + dump_stack(); + retry = false; + successive_ext_delays = 0; + } + break; + case RTAS_BUSY: + retry = true; + successive_ext_delays = 0; + break; + default: + retry = false; + successive_ext_delays = 0; + break; + } + + return retry; +} /** * rtas_busy_delay() - helper for RTAS busy and extended delay statuses @@ -643,11 +1241,17 @@ EXPORT_SYMBOL(rtas_busy_delay_time); * * false - @status is not @RTAS_BUSY nor an extended delay hint. The * caller is responsible for handling @status. */ -bool rtas_busy_delay(int status) +bool __ref rtas_busy_delay(int status) { unsigned int ms; bool ret; + /* + * Can't do timed sleeps before timekeeping is up. + */ + if (system_state < SYSTEM_SCHEDULING) + return rtas_busy_delay_early(status); + switch (status) { case RTAS_EXTENDED_DELAY_MIN...RTAS_EXTENDED_DELAY_MAX: ret = true; @@ -697,7 +1301,7 @@ bool rtas_busy_delay(int status) return ret; } -EXPORT_SYMBOL(rtas_busy_delay); +EXPORT_SYMBOL_GPL(rtas_busy_delay); static int rtas_error_rc(int rtas_rc) { @@ -729,7 +1333,7 @@ static int rtas_error_rc(int rtas_rc) int rtas_get_power_level(int powerdomain, int *level) { - int token = rtas_token("get-power-level"); + int token = rtas_function_token(RTAS_FN_GET_POWER_LEVEL); int rc; if (token == RTAS_UNKNOWN_SERVICE) @@ -742,11 +1346,11 @@ int rtas_get_power_level(int powerdomain, int *level) return rtas_error_rc(rc); return rc; } -EXPORT_SYMBOL(rtas_get_power_level); +EXPORT_SYMBOL_GPL(rtas_get_power_level); int rtas_set_power_level(int powerdomain, int level, int *setlevel) { - int token = rtas_token("set-power-level"); + int token = rtas_function_token(RTAS_FN_SET_POWER_LEVEL); int rc; if (token == RTAS_UNKNOWN_SERVICE) @@ -760,11 +1364,11 @@ int rtas_set_power_level(int powerdomain, int level, int *setlevel) return rtas_error_rc(rc); return rc; } -EXPORT_SYMBOL(rtas_set_power_level); +EXPORT_SYMBOL_GPL(rtas_set_power_level); int rtas_get_sensor(int sensor, int index, int *state) { - int token = rtas_token("get-sensor-state"); + int token = rtas_function_token(RTAS_FN_GET_SENSOR_STATE); int rc; if (token == RTAS_UNKNOWN_SERVICE) @@ -778,11 +1382,11 @@ int rtas_get_sensor(int sensor, int index, int *state) return rtas_error_rc(rc); return rc; } -EXPORT_SYMBOL(rtas_get_sensor); +EXPORT_SYMBOL_GPL(rtas_get_sensor); int rtas_get_sensor_fast(int sensor, int index, int *state) { - int token = rtas_token("get-sensor-state"); + int token = rtas_function_token(RTAS_FN_GET_SENSOR_STATE); int rc; if (token == RTAS_UNKNOWN_SERVICE) @@ -821,11 +1425,10 @@ bool rtas_indicator_present(int token, int *maxindex) return false; } -EXPORT_SYMBOL(rtas_indicator_present); int rtas_set_indicator(int indicator, int index, int new_value) { - int token = rtas_token("set-indicator"); + int token = rtas_function_token(RTAS_FN_SET_INDICATOR); int rc; if (token == RTAS_UNKNOWN_SERVICE) @@ -839,15 +1442,15 @@ int rtas_set_indicator(int indicator, int index, int new_value) return rtas_error_rc(rc); return rc; } -EXPORT_SYMBOL(rtas_set_indicator); +EXPORT_SYMBOL_GPL(rtas_set_indicator); /* * Ignoring RTAS extended delay */ int rtas_set_indicator_fast(int indicator, int index, int new_value) { + int token = rtas_function_token(RTAS_FN_SET_INDICATOR); int rc; - int token = rtas_token("set-indicator"); if (token == RTAS_UNKNOWN_SERVICE) return -ENOENT; @@ -889,10 +1492,11 @@ int rtas_set_indicator_fast(int indicator, int index, int new_value) */ int rtas_ibm_suspend_me(int *fw_status) { + int token = rtas_function_token(RTAS_FN_IBM_SUSPEND_ME); int fwrc; int ret; - fwrc = rtas_call(rtas_token("ibm,suspend-me"), 0, 1, NULL); + fwrc = rtas_call(token, 0, 1, NULL); switch (fwrc) { case 0: @@ -925,7 +1529,7 @@ void __noreturn rtas_restart(char *cmd) if (rtas_flash_term_hook) rtas_flash_term_hook(SYS_RESTART); pr_emerg("system-reboot returned %d\n", - rtas_call(rtas_token("system-reboot"), 0, 1, NULL)); + rtas_call(rtas_function_token(RTAS_FN_SYSTEM_REBOOT), 0, 1, NULL)); for (;;); } @@ -935,7 +1539,7 @@ void rtas_power_off(void) rtas_flash_term_hook(SYS_POWER_OFF); /* allow power on only with power button press */ pr_emerg("power-off returned %d\n", - rtas_call(rtas_token("power-off"), 2, 1, NULL, -1, -1)); + rtas_call(rtas_function_token(RTAS_FN_POWER_OFF), 2, 1, NULL, -1, -1)); for (;;); } @@ -945,16 +1549,17 @@ void __noreturn rtas_halt(void) rtas_flash_term_hook(SYS_HALT); /* allow power on only with power button press */ pr_emerg("power-off returned %d\n", - rtas_call(rtas_token("power-off"), 2, 1, NULL, -1, -1)); + rtas_call(rtas_function_token(RTAS_FN_POWER_OFF), 2, 1, NULL, -1, -1)); for (;;); } /* Must be in the RMO region, so we place it here */ static char rtas_os_term_buf[2048]; -static s32 ibm_os_term_token = RTAS_UNKNOWN_SERVICE; +static bool ibm_extended_os_term; void rtas_os_term(char *str) { + s32 token = rtas_function_token(RTAS_FN_IBM_OS_TERM); int status; /* @@ -963,7 +1568,8 @@ void rtas_os_term(char *str) * this property may terminate the partition which we want to avoid * since it interferes with panic_timeout. */ - if (ibm_os_term_token == RTAS_UNKNOWN_SERVICE) + + if (token == RTAS_UNKNOWN_SERVICE || !ibm_extended_os_term) return; snprintf(rtas_os_term_buf, 2048, "OS panic: %s", str); @@ -974,8 +1580,7 @@ void rtas_os_term(char *str) * schedules. */ do { - status = rtas_call(ibm_os_term_token, 1, 1, NULL, - __pa(rtas_os_term_buf)); + status = rtas_call(token, 1, 1, NULL, __pa(rtas_os_term_buf)); } while (rtas_busy_delay_time(status)); if (status != 0) @@ -995,10 +1600,9 @@ void rtas_os_term(char *str) */ void rtas_activate_firmware(void) { - int token; + int token = rtas_function_token(RTAS_FN_IBM_ACTIVATE_FIRMWARE); int fwrc; - token = rtas_token("ibm,activate-firmware"); if (token == RTAS_UNKNOWN_SERVICE) { pr_notice("ibm,activate-firmware method unavailable\n"); return; @@ -1063,56 +1667,12 @@ noinstr struct pseries_errorlog *get_pseries_errorlog(struct rtas_error_log *log * * Accordingly, we filter RTAS requests to check that the call is * permitted, and that provided pointers fall within the RMO buffer. - * The rtas_filters list contains an entry for each permitted call, - * with the indexes of the parameters which are expected to contain - * addresses and sizes of buffers allocated inside the RMO buffer. + * If a function is allowed to be invoked via the syscall, then its + * entry in the rtas_functions table points to a rtas_filter that + * describes its constraints, with the indexes of the parameters which + * are expected to contain addresses and sizes of buffers allocated + * inside the RMO buffer. */ -struct rtas_filter { - const char *name; - int token; - /* Indexes into the args buffer, -1 if not used */ - int buf_idx1; - int size_idx1; - int buf_idx2; - int size_idx2; - - int fixed_size; -}; - -static struct rtas_filter rtas_filters[] __ro_after_init = { - { "ibm,activate-firmware", -1, -1, -1, -1, -1 }, - { "ibm,configure-connector", -1, 0, -1, 1, -1, 4096 }, /* Special cased */ - { "display-character", -1, -1, -1, -1, -1 }, - { "ibm,display-message", -1, 0, -1, -1, -1 }, - { "ibm,errinjct", -1, 2, -1, -1, -1, 1024 }, - { "ibm,close-errinjct", -1, -1, -1, -1, -1 }, - { "ibm,open-errinjct", -1, -1, -1, -1, -1 }, - { "ibm,get-config-addr-info2", -1, -1, -1, -1, -1 }, - { "ibm,get-dynamic-sensor-state", -1, 1, -1, -1, -1 }, - { "ibm,get-indices", -1, 2, 3, -1, -1 }, - { "get-power-level", -1, -1, -1, -1, -1 }, - { "get-sensor-state", -1, -1, -1, -1, -1 }, - { "ibm,get-system-parameter", -1, 1, 2, -1, -1 }, - { "get-time-of-day", -1, -1, -1, -1, -1 }, - { "ibm,get-vpd", -1, 0, -1, 1, 2 }, - { "ibm,lpar-perftools", -1, 2, 3, -1, -1 }, - { "ibm,platform-dump", -1, 4, 5, -1, -1 }, /* Special cased */ - { "ibm,read-slot-reset-state", -1, -1, -1, -1, -1 }, - { "ibm,scan-log-dump", -1, 0, 1, -1, -1 }, - { "ibm,set-dynamic-indicator", -1, 2, -1, -1, -1 }, - { "ibm,set-eeh-option", -1, -1, -1, -1, -1 }, - { "set-indicator", -1, -1, -1, -1, -1 }, - { "set-power-level", -1, -1, -1, -1, -1 }, - { "set-time-for-power-on", -1, -1, -1, -1, -1 }, - { "ibm,set-system-parameter", -1, 1, -1, -1, -1 }, - { "set-time-of-day", -1, -1, -1, -1, -1 }, -#ifdef CONFIG_CPU_BIG_ENDIAN - { "ibm,suspend-me", -1, -1, -1, -1, -1 }, - { "ibm,update-nodes", -1, 0, -1, -1, -1, 4096 }, - { "ibm,update-properties", -1, 0, -1, -1, -1, 4096 }, -#endif - { "ibm,physical-attestation", -1, 0, 1, -1, -1 }, -}; static bool in_rmo_buf(u32 base, u32 end) { @@ -1126,63 +1686,75 @@ static bool in_rmo_buf(u32 base, u32 end) static bool block_rtas_call(int token, int nargs, struct rtas_args *args) { - int i; - - for (i = 0; i < ARRAY_SIZE(rtas_filters); i++) { - struct rtas_filter *f = &rtas_filters[i]; - u32 base, size, end; + const struct rtas_function *func; + const struct rtas_filter *f; + const bool is_platform_dump = token == rtas_function_token(RTAS_FN_IBM_PLATFORM_DUMP); + const bool is_config_conn = token == rtas_function_token(RTAS_FN_IBM_CONFIGURE_CONNECTOR); + u32 base, size, end; - if (token != f->token) - continue; - - if (f->buf_idx1 != -1) { - base = be32_to_cpu(args->args[f->buf_idx1]); - if (f->size_idx1 != -1) - size = be32_to_cpu(args->args[f->size_idx1]); - else if (f->fixed_size) - size = f->fixed_size; - else - size = 1; - - end = base + size - 1; + /* + * If this token doesn't correspond to a function the kernel + * understands, you're not allowed to call it. + */ + func = rtas_token_to_function(token); + if (!func) + goto err; + /* + * And only functions with filters attached are allowed. + */ + f = func->filter; + if (!f) + goto err; + /* + * And some functions aren't allowed on LE. + */ + if (IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN) && func->banned_for_syscall_on_le) + goto err; + + if (f->buf_idx1 != -1) { + base = be32_to_cpu(args->args[f->buf_idx1]); + if (f->size_idx1 != -1) + size = be32_to_cpu(args->args[f->size_idx1]); + else if (f->fixed_size) + size = f->fixed_size; + else + size = 1; - /* - * Special case for ibm,platform-dump - NULL buffer - * address is used to indicate end of dump processing - */ - if (!strcmp(f->name, "ibm,platform-dump") && - base == 0) - return false; + end = base + size - 1; - if (!in_rmo_buf(base, end)) - goto err; - } + /* + * Special case for ibm,platform-dump - NULL buffer + * address is used to indicate end of dump processing + */ + if (is_platform_dump && base == 0) + return false; - if (f->buf_idx2 != -1) { - base = be32_to_cpu(args->args[f->buf_idx2]); - if (f->size_idx2 != -1) - size = be32_to_cpu(args->args[f->size_idx2]); - else if (f->fixed_size) - size = f->fixed_size; - else - size = 1; - end = base + size - 1; + if (!in_rmo_buf(base, end)) + goto err; + } - /* - * Special case for ibm,configure-connector where the - * address can be 0 - */ - if (!strcmp(f->name, "ibm,configure-connector") && - base == 0) - return false; + if (f->buf_idx2 != -1) { + base = be32_to_cpu(args->args[f->buf_idx2]); + if (f->size_idx2 != -1) + size = be32_to_cpu(args->args[f->size_idx2]); + else if (f->fixed_size) + size = f->fixed_size; + else + size = 1; + end = base + size - 1; - if (!in_rmo_buf(base, end)) - goto err; - } + /* + * Special case for ibm,configure-connector where the + * address can be 0 + */ + if (is_config_conn && base == 0) + return false; - return false; + if (!in_rmo_buf(base, end)) + goto err; } + return false; err: pr_err_ratelimited("sys_rtas: RTAS call blocked - exploit attempt?\n"); pr_err_ratelimited("sys_rtas: token=0x%x, nargs=%d (called by %s)\n", @@ -1190,14 +1762,6 @@ err: return true; } -static void __init rtas_syscall_filter_init(void) -{ - unsigned int i; - - for (i = 0; i < ARRAY_SIZE(rtas_filters); i++) - rtas_filters[i].token = rtas_token(rtas_filters[i].name); -} - /* We assume to be passed big endian arguments */ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs) { @@ -1238,7 +1802,7 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs) if (block_rtas_call(token, nargs, &args)) return -EINVAL; - if (token == ibm_open_errinjct_token || token == ibm_errinjct_token) { + if (token_is_restricted_errinjct(token)) { int err; err = security_locked_down(LOCKDOWN_RTAS_ERROR_INJECTION); @@ -1247,7 +1811,7 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs) } /* Need to handle ibm,suspend_me call specially */ - if (token == rtas_token("ibm,suspend-me")) { + if (token == rtas_function_token(RTAS_FN_IBM_SUSPEND_ME)) { /* * rtas_ibm_suspend_me assumes the streamid handle is in cpu @@ -1268,18 +1832,18 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs) buff_copy = get_errorlog_buffer(); - flags = lock_rtas(); + raw_spin_lock_irqsave(&rtas_lock, flags); - rtas.args = args; - do_enter_rtas(__pa(&rtas.args)); - args = rtas.args; + rtas_args = args; + do_enter_rtas(&rtas_args); + args = rtas_args; /* A -1 return code indicates that the last command couldn't be completed due to a hardware error. */ if (be32_to_cpu(args.rets[0]) == -1) errbuf = __fetch_rtas_last_error(buff_copy); - unlock_rtas(flags); + raw_spin_unlock_irqrestore(&rtas_lock, flags); if (buff_copy) { if (errbuf) @@ -1297,6 +1861,54 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs) return 0; } +static void __init rtas_function_table_init(void) +{ + struct property *prop; + + for (size_t i = 0; i < ARRAY_SIZE(rtas_function_table); ++i) { + struct rtas_function *curr = &rtas_function_table[i]; + struct rtas_function *prior; + int cmp; + + curr->token = RTAS_UNKNOWN_SERVICE; + + if (i == 0) + continue; + /* + * Ensure table is sorted correctly for binary search + * on function names. + */ + prior = &rtas_function_table[i - 1]; + + cmp = strcmp(prior->name, curr->name); + if (cmp < 0) + continue; + + if (cmp == 0) { + pr_err("'%s' has duplicate function table entries\n", + curr->name); + } else { + pr_err("function table unsorted: '%s' wrongly precedes '%s'\n", + prior->name, curr->name); + } + } + + for_each_property_of_node(rtas.dev, prop) { + struct rtas_function *func; + + if (prop->length != sizeof(u32)) + continue; + + func = __rtas_name_to_function(prop->name); + if (!func) + continue; + + func->token = be32_to_cpup((__be32 *)prop->value); + + pr_debug("function %s has token %u\n", func->name, func->token); + } +} + /* * Call early during boot, before mem init, to retrieve the RTAS * information from the device-tree and allocate the RMO buffer for userland @@ -1330,12 +1942,14 @@ void __init rtas_initialize(void) init_error_log_max(); + /* Must be called before any function token lookups */ + rtas_function_table_init(); + /* - * Discover these now to avoid device tree lookups in the + * Discover this now to avoid a device tree lookup in the * panic path. */ - if (of_property_read_bool(rtas.dev, "ibm,extended-os-term")) - ibm_os_term_token = rtas_token("ibm,os-term"); + ibm_extended_os_term = of_property_read_bool(rtas.dev, "ibm,extended-os-term"); /* If RTAS was found, allocate the RMO buffer for it and look for * the stop-self token if any @@ -1350,12 +1964,7 @@ void __init rtas_initialize(void) panic("ERROR: RTAS: Failed to allocate %lx bytes below %pa\n", PAGE_SIZE, &rtas_region); -#ifdef CONFIG_RTAS_ERROR_LOGGING - rtas_last_error_token = rtas_token("rtas-last-error"); -#endif - ibm_open_errinjct_token = rtas_token("ibm,open-errinjct"); - ibm_errinjct_token = rtas_token("ibm,errinjct"); - rtas_syscall_filter_init(); + rtas_work_area_reserve_arena(rtas_region); } int __init early_init_dt_scan_rtas(unsigned long node, @@ -1401,23 +2010,22 @@ int __init early_init_dt_scan_rtas(unsigned long node, return 1; } -static arch_spinlock_t timebase_lock; +static DEFINE_RAW_SPINLOCK(timebase_lock); static u64 timebase = 0; void rtas_give_timebase(void) { unsigned long flags; - local_irq_save(flags); + raw_spin_lock_irqsave(&timebase_lock, flags); hard_irq_disable(); - arch_spin_lock(&timebase_lock); - rtas_call(rtas_token("freeze-time-base"), 0, 1, NULL); + rtas_call(rtas_function_token(RTAS_FN_FREEZE_TIME_BASE), 0, 1, NULL); timebase = get_tb(); - arch_spin_unlock(&timebase_lock); + raw_spin_unlock(&timebase_lock); while (timebase) barrier(); - rtas_call(rtas_token("thaw-time-base"), 0, 1, NULL); + rtas_call(rtas_function_token(RTAS_FN_THAW_TIME_BASE), 0, 1, NULL); local_irq_restore(flags); } @@ -1425,8 +2033,8 @@ void rtas_take_timebase(void) { while (!timebase) barrier(); - arch_spin_lock(&timebase_lock); + raw_spin_lock(&timebase_lock); set_tb(timebase >> 32, timebase & 0xffffffff); timebase = 0; - arch_spin_unlock(&timebase_lock); + raw_spin_unlock(&timebase_lock); } diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c index bc817a5619d6..4caf5e3079eb 100644 --- a/arch/powerpc/kernel/rtas_flash.c +++ b/arch/powerpc/kernel/rtas_flash.c @@ -376,7 +376,7 @@ static void manage_flash(struct rtas_manage_flash_t *args_buf, unsigned int op) s32 rc; do { - rc = rtas_call(rtas_token("ibm,manage-flash-image"), 1, 1, + rc = rtas_call(rtas_function_token(RTAS_FN_IBM_MANAGE_FLASH_IMAGE), 1, 1, NULL, op); } while (rtas_busy_delay(rc)); @@ -444,7 +444,7 @@ error: */ static void validate_flash(struct rtas_validate_flash_t *args_buf) { - int token = rtas_token("ibm,validate-flash-image"); + int token = rtas_function_token(RTAS_FN_IBM_VALIDATE_FLASH_IMAGE); int update_results; s32 rc; @@ -570,7 +570,7 @@ static void rtas_flash_firmware(int reboot_type) return; } - update_token = rtas_token("ibm,update-flash-64-and-reboot"); + update_token = rtas_function_token(RTAS_FN_IBM_UPDATE_FLASH_64_AND_REBOOT); if (update_token == RTAS_UNKNOWN_SERVICE) { printk(KERN_ALERT "FLASH: ibm,update-flash-64-and-reboot " "is not available -- not a service partition?\n"); @@ -653,7 +653,7 @@ static void rtas_flash_firmware(int reboot_type) */ struct rtas_flash_file { const char *filename; - const char *rtas_call_name; + const rtas_fn_handle_t handle; int *status; const struct proc_ops ops; }; @@ -661,7 +661,7 @@ struct rtas_flash_file { static const struct rtas_flash_file rtas_flash_files[] = { { .filename = "powerpc/rtas/" FIRMWARE_FLASH_NAME, - .rtas_call_name = "ibm,update-flash-64-and-reboot", + .handle = RTAS_FN_IBM_UPDATE_FLASH_64_AND_REBOOT, .status = &rtas_update_flash_data.status, .ops.proc_read = rtas_flash_read_msg, .ops.proc_write = rtas_flash_write, @@ -670,7 +670,7 @@ static const struct rtas_flash_file rtas_flash_files[] = { }, { .filename = "powerpc/rtas/" FIRMWARE_UPDATE_NAME, - .rtas_call_name = "ibm,update-flash-64-and-reboot", + .handle = RTAS_FN_IBM_UPDATE_FLASH_64_AND_REBOOT, .status = &rtas_update_flash_data.status, .ops.proc_read = rtas_flash_read_num, .ops.proc_write = rtas_flash_write, @@ -679,7 +679,7 @@ static const struct rtas_flash_file rtas_flash_files[] = { }, { .filename = "powerpc/rtas/" VALIDATE_FLASH_NAME, - .rtas_call_name = "ibm,validate-flash-image", + .handle = RTAS_FN_IBM_VALIDATE_FLASH_IMAGE, .status = &rtas_validate_flash_data.status, .ops.proc_read = validate_flash_read, .ops.proc_write = validate_flash_write, @@ -688,7 +688,7 @@ static const struct rtas_flash_file rtas_flash_files[] = { }, { .filename = "powerpc/rtas/" MANAGE_FLASH_NAME, - .rtas_call_name = "ibm,manage-flash-image", + .handle = RTAS_FN_IBM_MANAGE_FLASH_IMAGE, .status = &rtas_manage_flash_data.status, .ops.proc_read = manage_flash_read, .ops.proc_write = manage_flash_write, @@ -700,8 +700,7 @@ static int __init rtas_flash_init(void) { int i; - if (rtas_token("ibm,update-flash-64-and-reboot") == - RTAS_UNKNOWN_SERVICE) { + if (rtas_function_token(RTAS_FN_IBM_UPDATE_FLASH_64_AND_REBOOT) == RTAS_UNKNOWN_SERVICE) { pr_info("rtas_flash: no firmware flash support\n"); return -EINVAL; } @@ -730,7 +729,7 @@ static int __init rtas_flash_init(void) * This code assumes that the status int is the first member of the * struct */ - token = rtas_token(f->rtas_call_name); + token = rtas_function_token(f->handle); if (token == RTAS_UNKNOWN_SERVICE) *f->status = FLASH_AUTH; else diff --git a/arch/powerpc/kernel/rtas_pci.c b/arch/powerpc/kernel/rtas_pci.c index 5a2f5ea3b054..e1fdc7473b72 100644 --- a/arch/powerpc/kernel/rtas_pci.c +++ b/arch/powerpc/kernel/rtas_pci.c @@ -191,10 +191,10 @@ static void python_countermeasures(struct device_node *dev) void __init init_pci_config_tokens(void) { - read_pci_config = rtas_token("read-pci-config"); - write_pci_config = rtas_token("write-pci-config"); - ibm_read_pci_config = rtas_token("ibm,read-pci-config"); - ibm_write_pci_config = rtas_token("ibm,write-pci-config"); + read_pci_config = rtas_function_token(RTAS_FN_READ_PCI_CONFIG); + write_pci_config = rtas_function_token(RTAS_FN_WRITE_PCI_CONFIG); + ibm_read_pci_config = rtas_function_token(RTAS_FN_IBM_READ_PCI_CONFIG); + ibm_write_pci_config = rtas_function_token(RTAS_FN_IBM_WRITE_PCI_CONFIG); } unsigned long get_phb_buid(struct device_node *phb) diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c index cc56ac6ba4b0..9bba469239fc 100644 --- a/arch/powerpc/kernel/rtasd.c +++ b/arch/powerpc/kernel/rtasd.c @@ -506,7 +506,7 @@ static int __init rtas_event_scan_init(void) return 0; /* No RTAS */ - event_scan = rtas_token("event-scan"); + event_scan = rtas_function_token(RTAS_FN_EVENT_SCAN); if (event_scan == RTAS_UNKNOWN_SERVICE) { printk(KERN_INFO "rtasd: No event-scan on system\n"); return -ENODEV; diff --git a/arch/powerpc/kernel/secvar-ops.c b/arch/powerpc/kernel/secvar-ops.c index 6a29777d6a2d..19172a2804f0 100644 --- a/arch/powerpc/kernel/secvar-ops.c +++ b/arch/powerpc/kernel/secvar-ops.c @@ -8,10 +8,16 @@ #include <linux/cache.h> #include <asm/secvar.h> +#include <asm/bug.h> -const struct secvar_operations *secvar_ops __ro_after_init; +const struct secvar_operations *secvar_ops __ro_after_init = NULL; -void set_secvar_ops(const struct secvar_operations *ops) +int set_secvar_ops(const struct secvar_operations *ops) { + if (WARN_ON_ONCE(secvar_ops)) + return -EBUSY; + secvar_ops = ops; + + return 0; } diff --git a/arch/powerpc/kernel/secvar-sysfs.c b/arch/powerpc/kernel/secvar-sysfs.c index 1ee4640a2641..eb3c053f323f 100644 --- a/arch/powerpc/kernel/secvar-sysfs.c +++ b/arch/powerpc/kernel/secvar-sysfs.c @@ -21,56 +21,48 @@ static struct kset *secvar_kset; static ssize_t format_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - ssize_t rc = 0; - struct device_node *node; - const char *format; - - node = of_find_compatible_node(NULL, NULL, "ibm,secvar-backend"); - if (!of_device_is_available(node)) { - rc = -ENODEV; - goto out; - } - - rc = of_property_read_string(node, "format", &format); - if (rc) - goto out; - - rc = sprintf(buf, "%s\n", format); + char tmp[32]; + ssize_t len = secvar_ops->format(tmp, sizeof(tmp)); -out: - of_node_put(node); + if (len > 0) + return sysfs_emit(buf, "%s\n", tmp); + else if (len < 0) + pr_err("Error %zd reading format string\n", len); + else + pr_err("Got empty format string from backend\n"); - return rc; + return -EIO; } static ssize_t size_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - uint64_t dsize; + u64 dsize; int rc; rc = secvar_ops->get(kobj->name, strlen(kobj->name) + 1, NULL, &dsize); if (rc) { - pr_err("Error retrieving %s variable size %d\n", kobj->name, - rc); + if (rc != -ENOENT) + pr_err("Error retrieving %s variable size %d\n", kobj->name, rc); return rc; } - return sprintf(buf, "%llu\n", dsize); + return sysfs_emit(buf, "%llu\n", dsize); } static ssize_t data_read(struct file *filep, struct kobject *kobj, struct bin_attribute *attr, char *buf, loff_t off, size_t count) { - uint64_t dsize; char *data; + u64 dsize; int rc; rc = secvar_ops->get(kobj->name, strlen(kobj->name) + 1, NULL, &dsize); if (rc) { - pr_err("Error getting %s variable size %d\n", kobj->name, rc); + if (rc != -ENOENT) + pr_err("Error getting %s variable size %d\n", kobj->name, rc); return rc; } pr_debug("dsize is %llu\n", dsize); @@ -141,34 +133,58 @@ static struct kobj_type secvar_ktype = { static int update_kobj_size(void) { - struct device_node *node; u64 varsize; - int rc = 0; - - node = of_find_compatible_node(NULL, NULL, "ibm,secvar-backend"); - if (!of_device_is_available(node)) { - rc = -ENODEV; - goto out; - } + int rc = secvar_ops->max_size(&varsize); - rc = of_property_read_u64(node, "max-var-size", &varsize); if (rc) - goto out; + return rc; data_attr.size = varsize; update_attr.size = varsize; -out: - of_node_put(node); + return 0; +} - return rc; +static int secvar_sysfs_config(struct kobject *kobj) +{ + struct attribute_group config_group = { + .name = "config", + .attrs = (struct attribute **)secvar_ops->config_attrs, + }; + + if (secvar_ops->config_attrs) + return sysfs_create_group(kobj, &config_group); + + return 0; +} + +static int add_var(const char *name) +{ + struct kobject *kobj; + int rc; + + kobj = kzalloc(sizeof(*kobj), GFP_KERNEL); + if (!kobj) + return -ENOMEM; + + kobject_init(kobj, &secvar_ktype); + + rc = kobject_add(kobj, &secvar_kset->kobj, "%s", name); + if (rc) { + pr_warn("kobject_add error %d for attribute: %s\n", rc, + name); + kobject_put(kobj); + return rc; + } + + kobject_uevent(kobj, KOBJ_ADD); + return 0; } static int secvar_sysfs_load(void) { + u64 namesize = 0; char *name; - uint64_t namesize = 0; - struct kobject *kobj; int rc; name = kzalloc(NAME_MAX_SIZE, GFP_KERNEL); @@ -179,73 +195,99 @@ static int secvar_sysfs_load(void) rc = secvar_ops->get_next(name, &namesize, NAME_MAX_SIZE); if (rc) { if (rc != -ENOENT) - pr_err("error getting secvar from firmware %d\n", - rc); - break; - } + pr_err("error getting secvar from firmware %d\n", rc); + else + rc = 0; - kobj = kzalloc(sizeof(*kobj), GFP_KERNEL); - if (!kobj) { - rc = -ENOMEM; break; } - kobject_init(kobj, &secvar_ktype); - - rc = kobject_add(kobj, &secvar_kset->kobj, "%s", name); - if (rc) { - pr_warn("kobject_add error %d for attribute: %s\n", rc, - name); - kobject_put(kobj); - kobj = NULL; - } - - if (kobj) - kobject_uevent(kobj, KOBJ_ADD); - + rc = add_var(name); } while (!rc); kfree(name); return rc; } +static int secvar_sysfs_load_static(void) +{ + const char * const *name_ptr = secvar_ops->var_names; + int rc; + + while (*name_ptr) { + rc = add_var(*name_ptr); + if (rc) + return rc; + name_ptr++; + } + + return 0; +} + static int secvar_sysfs_init(void) { + u64 max_size; int rc; if (!secvar_ops) { - pr_warn("secvar: failed to retrieve secvar operations.\n"); + pr_warn("Failed to retrieve secvar operations\n"); return -ENODEV; } secvar_kobj = kobject_create_and_add("secvar", firmware_kobj); if (!secvar_kobj) { - pr_err("secvar: Failed to create firmware kobj\n"); + pr_err("Failed to create firmware kobj\n"); return -ENOMEM; } rc = sysfs_create_file(secvar_kobj, &format_attr.attr); if (rc) { - kobject_put(secvar_kobj); - return -ENOMEM; + pr_err("Failed to create format object\n"); + rc = -ENOMEM; + goto err; } secvar_kset = kset_create_and_add("vars", NULL, secvar_kobj); if (!secvar_kset) { - pr_err("secvar: sysfs kobject registration failed.\n"); - kobject_put(secvar_kobj); - return -ENOMEM; + pr_err("sysfs kobject registration failed\n"); + rc = -ENOMEM; + goto err; } rc = update_kobj_size(); if (rc) { pr_err("Cannot read the size of the attribute\n"); - return rc; + goto err; + } + + rc = secvar_sysfs_config(secvar_kobj); + if (rc) { + pr_err("Failed to create config directory\n"); + goto err; } - secvar_sysfs_load(); + if (secvar_ops->get_next) + rc = secvar_sysfs_load(); + else + rc = secvar_sysfs_load_static(); + + if (rc) { + pr_err("Failed to create variable attributes\n"); + goto err; + } + + // Due to sysfs limitations, we will only ever get a write buffer of + // up to 1 page in size. Print a warning if this is potentially going + // to cause problems, so that the user is aware. + secvar_ops->max_size(&max_size); + if (max_size > PAGE_SIZE) + pr_warn_ratelimited("PAGE_SIZE (%lu) is smaller than maximum object size (%llu), writes are limited to PAGE_SIZE\n", + PAGE_SIZE, max_size); return 0; +err: + kobject_put(secvar_kobj); + return rc; } late_initcall(secvar_sysfs_init); diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 9b10e57040c6..e77734e5a127 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -87,6 +87,10 @@ EXPORT_SYMBOL(machine_id); int boot_cpuid = -1; EXPORT_SYMBOL_GPL(boot_cpuid); +#ifdef CONFIG_PPC64 +int boot_cpu_hwid = -1; +#endif + /* * These are used in binfmt_elf.c to put aux entries on the stack * for each elf executable being started. diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index a0dee7354fe6..b2e0d3ce4261 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -385,17 +385,21 @@ void __init early_setup(unsigned long dt_ptr) /* * Do early initialization using the flattened device * tree, such as retrieving the physical memory map or - * calculating/retrieving the hash table size. + * calculating/retrieving the hash table size, discover + * boot_cpuid and boot_cpu_hwid. */ early_init_devtree(__va(dt_ptr)); - /* Now we know the logical id of our boot cpu, setup the paca. */ - if (boot_cpuid != 0) { - /* Poison paca_ptrs[0] again if it's not the boot cpu */ - memset(&paca_ptrs[0], 0x88, sizeof(paca_ptrs[0])); - } + allocate_paca_ptrs(); + allocate_paca(boot_cpuid); + set_hard_smp_processor_id(boot_cpuid, boot_cpu_hwid); fixup_boot_paca(paca_ptrs[boot_cpuid]); setup_paca(paca_ptrs[boot_cpuid]); /* install the paca into registers */ + // smp_processor_id() now reports boot_cpuid + +#ifdef CONFIG_SMP + task_thread_info(current)->cpu = boot_cpuid; // fix task_cpu(current) +#endif /* * Configure exception handlers. This include setting up trampolines diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index d68de3618741..9d8665910350 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -356,7 +356,7 @@ void vtime_flush(struct task_struct *tsk) } #endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ -void __delay(unsigned long loops) +void __no_kcsan __delay(unsigned long loops) { unsigned long start; @@ -377,7 +377,7 @@ void __delay(unsigned long loops) } EXPORT_SYMBOL(__delay); -void udelay(unsigned long usecs) +void __no_kcsan udelay(unsigned long usecs) { __delay(tb_ticks_per_usec * usecs); } @@ -515,7 +515,7 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(timer_interrupt) } /* Conditionally hard-enable interrupts. */ - if (should_hard_irq_enable()) { + if (should_hard_irq_enable(regs)) { /* * Ensure a positive value is written to the decrementer, or * else some CPUs will continue to take decrementer exceptions. diff --git a/arch/powerpc/kernel/trace/Makefile b/arch/powerpc/kernel/trace/Makefile index af8527538fe4..b16a9f9c0b35 100644 --- a/arch/powerpc/kernel/trace/Makefile +++ b/arch/powerpc/kernel/trace/Makefile @@ -23,4 +23,5 @@ obj-$(CONFIG_PPC32) += $(obj32-y) # Disable GCOV, KCOV & sanitizers in odd or sensitive code GCOV_PROFILE_ftrace.o := n KCOV_INSTRUMENT_ftrace.o := n +KCSAN_SANITIZE_ftrace.o := n UBSAN_SANITIZE_ftrace.o := n diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index 507f8228f983..7a2ff9010f17 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -120,10 +120,8 @@ int vdso_join_timens(struct task_struct *task, struct time_namespace *ns) mmap_read_lock(mm); for_each_vma(vmi, vma) { - unsigned long size = vma->vm_end - vma->vm_start; - if (vma_is_special_mapping(vma, &vvar_spec)) - zap_page_range(vma, vma->vm_start, size); + zap_vma_pages(vma); } mmap_read_unlock(mm); diff --git a/arch/powerpc/kernel/vdso/Makefile b/arch/powerpc/kernel/vdso/Makefile index 6a977b0d8ffc..66f723f53be2 100644 --- a/arch/powerpc/kernel/vdso/Makefile +++ b/arch/powerpc/kernel/vdso/Makefile @@ -16,6 +16,11 @@ ifneq ($(c-gettimeofday-y),) CFLAGS_vgettimeofday-32.o += -ffreestanding -fasynchronous-unwind-tables CFLAGS_REMOVE_vgettimeofday-32.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_vgettimeofday-32.o += -mcmodel=medium -mabi=elfv1 -mabi=elfv2 -mcall-aixdesc + # This flag is supported by clang for 64-bit but not 32-bit so it will cause + # an unused command line flag warning for this file. + ifdef CONFIG_CC_IS_CLANG + CFLAGS_REMOVE_vgettimeofday-32.o += -fno-stack-clash-protection + endif CFLAGS_vgettimeofday-64.o += -include $(c-gettimeofday-y) CFLAGS_vgettimeofday-64.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) CFLAGS_vgettimeofday-64.o += $(call cc-option, -fno-stack-protector) @@ -46,15 +51,20 @@ GCOV_PROFILE := n KCOV_INSTRUMENT := n UBSAN_SANITIZE := n KASAN_SANITIZE := n +KCSAN_SANITIZE := n -ccflags-y := -shared -fno-common -fno-builtin -nostdlib -Wl,--hash-style=both -ccflags-$(CONFIG_LD_IS_LLD) += $(call cc-option,--ld-path=$(LD),-fuse-ld=lld) +ccflags-y := -fno-common -fno-builtin +ldflags-y := -Wl,--hash-style=both -nostdlib -shared -z noexecstack +ldflags-$(CONFIG_LD_IS_LLD) += $(call cc-option,--ld-path=$(LD),-fuse-ld=lld) +# Filter flags that clang will warn are unused for linking +ldflags-y += $(filter-out $(CC_AUTO_VAR_INIT_ZERO_ENABLER) $(CC_FLAGS_FTRACE) -Wa$(comma)%, $(KBUILD_CFLAGS)) -CC32FLAGS := -Wl,-soname=linux-vdso32.so.1 -m32 -AS32FLAGS := -D__VDSO32__ -s +CC32FLAGS := -m32 +LD32FLAGS := -Wl,-soname=linux-vdso32.so.1 +AS32FLAGS := -D__VDSO32__ -CC64FLAGS := -Wl,-soname=linux-vdso64.so.1 -AS64FLAGS := -D__VDSO64__ -s +LD64FLAGS := -Wl,-soname=linux-vdso64.so.1 +AS64FLAGS := -D__VDSO64__ targets += vdso32.lds CPPFLAGS_vdso32.lds += -P -C -Upowerpc @@ -92,15 +102,15 @@ include/generated/vdso64-offsets.h: $(obj)/vdso64.so.dbg FORCE # actual build commands quiet_cmd_vdso32ld_and_check = VDSO32L $@ - cmd_vdso32ld_and_check = $(VDSOCC) $(c_flags) $(CC32FLAGS) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) -z noexecstack ; $(cmd_vdso_check) + cmd_vdso32ld_and_check = $(VDSOCC) $(ldflags-y) $(CC32FLAGS) $(LD32FLAGS) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^); $(cmd_vdso_check) quiet_cmd_vdso32as = VDSO32A $@ cmd_vdso32as = $(VDSOCC) $(a_flags) $(CC32FLAGS) $(AS32FLAGS) -c -o $@ $< quiet_cmd_vdso32cc = VDSO32C $@ cmd_vdso32cc = $(VDSOCC) $(c_flags) $(CC32FLAGS) -c -o $@ $< quiet_cmd_vdso64ld_and_check = VDSO64L $@ - cmd_vdso64ld_and_check = $(VDSOCC) $(c_flags) $(CC64FLAGS) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) -z noexecstack ; $(cmd_vdso_check) + cmd_vdso64ld_and_check = $(VDSOCC) $(ldflags-y) $(LD64FLAGS) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^); $(cmd_vdso_check) quiet_cmd_vdso64as = VDSO64A $@ - cmd_vdso64as = $(VDSOCC) $(a_flags) $(CC64FLAGS) $(AS64FLAGS) -c -o $@ $< + cmd_vdso64as = $(VDSOCC) $(a_flags) $(AS64FLAGS) -c -o $@ $< OBJECT_FILES_NON_STANDARD := y diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 8c3862b4c259..f128c7cf9c1d 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -8,6 +8,7 @@ #define BSS_FIRST_SECTIONS *(.bss.prominit) #define EMITS_PT_NOTE #define RO_EXCEPTION_TABLE_ALIGN 0 +#define RUNTIME_DISCARD_EXIT #define SOFT_MASK_TABLE(align) \ . = ALIGN(align); \ @@ -111,7 +112,6 @@ SECTIONS #endif NOINSTR_TEXT SCHED_TEXT - CPUIDLE_TEXT LOCK_TEXT KPROBES_TEXT IRQENTRY_TEXT @@ -410,9 +410,12 @@ SECTIONS DISCARDS /DISCARD/ : { *(*.EMB.apuinfo) - *(.glink .iplt .plt .rela* .comment) + *(.glink .iplt .plt) *(.gnu.version*) *(.gnu.attributes) *(.eh_frame) +#ifndef CONFIG_RELOCATABLE + *(.rela*) +#endif } } |