diff options
Diffstat (limited to 'arch')
216 files changed, 2701 insertions, 1035 deletions
diff --git a/arch/arc/include/asm/hugepage.h b/arch/arc/include/asm/hugepage.h index 317ff773e1ca..b18fcb606908 100644 --- a/arch/arc/include/asm/hugepage.h +++ b/arch/arc/include/asm/hugepage.h @@ -11,6 +11,7 @@ #define _ASM_ARC_HUGEPAGE_H #include <linux/types.h> +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopmd.h> static inline pte_t pmd_pte(pmd_t pmd) diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h index e94ca72b974e..ee22d40afef4 100644 --- a/arch/arc/include/asm/pgtable.h +++ b/arch/arc/include/asm/pgtable.h @@ -37,6 +37,7 @@ #include <asm/page.h> #include <asm/mmu.h> +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopmd.h> #include <linux/const.h> diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h index e22089fb44dc..a3f0b3d50089 100644 --- a/arch/arm/include/asm/kvm_arm.h +++ b/arch/arm/include/asm/kvm_arm.h @@ -209,6 +209,7 @@ #define HSR_EC_IABT_HYP (0x21) #define HSR_EC_DABT (0x24) #define HSR_EC_DABT_HYP (0x25) +#define HSR_EC_MAX (0x3f) #define HSR_WFI_IS_WFE (_AC(1, UL) << 0) diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index cc495d799c67..31ee468ce667 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -30,7 +30,6 @@ #define __KVM_HAVE_ARCH_INTC_INITIALIZED #define KVM_USER_MEM_SLOTS 32 -#define KVM_PRIVATE_MEM_SLOTS 4 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 #define KVM_HAVE_ONE_REG #define KVM_HALT_POLL_NS_DEFAULT 500000 diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index a8d656d9aec7..1c462381c225 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -20,6 +20,7 @@ #else +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopud.h> #include <asm/memory.h> #include <asm/pgtable-hwdef.h> diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index c9a2103faeb9..96dba7cd8be7 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -221,6 +221,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_MAX_VCPUS: r = KVM_MAX_VCPUS; break; + case KVM_CAP_NR_MEMSLOTS: + r = KVM_USER_MEM_SLOTS; + break; case KVM_CAP_MSI_DEVID: if (!kvm) r = -EINVAL; diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c index 4e40d1955e35..96af65a30d78 100644 --- a/arch/arm/kvm/handle_exit.c +++ b/arch/arm/kvm/handle_exit.c @@ -79,7 +79,19 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run) return 1; } +static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + u32 hsr = kvm_vcpu_get_hsr(vcpu); + + kvm_pr_unimpl("Unknown exception class: hsr: %#08x\n", + hsr); + + kvm_inject_undefined(vcpu); + return 1; +} + static exit_handle_fn arm_exit_handlers[] = { + [0 ... HSR_EC_MAX] = kvm_handle_unknown_ec, [HSR_EC_WFI] = kvm_handle_wfx, [HSR_EC_CP15_32] = kvm_handle_cp15_32, [HSR_EC_CP15_64] = kvm_handle_cp15_64, @@ -98,13 +110,6 @@ static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu) { u8 hsr_ec = kvm_vcpu_trap_get_class(vcpu); - if (hsr_ec >= ARRAY_SIZE(arm_exit_handlers) || - !arm_exit_handlers[hsr_ec]) { - kvm_err("Unknown exception class: hsr: %#08x\n", - (unsigned int)kvm_vcpu_get_hsr(vcpu)); - BUG(); - } - return arm_exit_handlers[hsr_ec]; } diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index 3c2cb5d5adfa..0bb0e9c6376c 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -411,3 +411,4 @@ 394 common pkey_mprotect sys_pkey_mprotect 395 common pkey_alloc sys_pkey_alloc 396 common pkey_free sys_pkey_free +397 common statx sys_statx diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c index ce18c91b50a1..f0325d96b97a 100644 --- a/arch/arm/xen/mm.c +++ b/arch/arm/xen/mm.c @@ -198,6 +198,8 @@ static const struct dma_map_ops xen_swiotlb_dma_ops = { .unmap_page = xen_swiotlb_unmap_page, .dma_supported = xen_swiotlb_dma_supported, .set_dma_mask = xen_swiotlb_set_dma_mask, + .mmap = xen_swiotlb_dma_mmap, + .get_sgtable = xen_swiotlb_get_sgtable, }; int __init xen_mm_init(void) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index a39029b5414e..3741859765cf 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -508,6 +508,16 @@ config QCOM_FALKOR_ERRATUM_1009 If unsure, say Y. +config QCOM_QDF2400_ERRATUM_0065 + bool "QDF2400 E0065: Incorrect GITS_TYPER.ITT_Entry_size" + default y + help + On Qualcomm Datacenter Technologies QDF2400 SoC, ITS hardware reports + ITE size incorrectly. The GITS_TYPER.ITT_Entry_size field should have + been indicated as 16Bytes (0xf), not 8Bytes (0x7). + + If unsure, say Y. + endmenu @@ -1063,6 +1073,10 @@ config SYSVIPC_COMPAT def_bool y depends on COMPAT && SYSVIPC +config KEYS_COMPAT + def_bool y + depends on COMPAT && KEYS + endmenu menu "Power management options" diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug index fca2f02cde68..cc6bd559af85 100644 --- a/arch/arm64/Kconfig.debug +++ b/arch/arm64/Kconfig.debug @@ -92,6 +92,10 @@ config DEBUG_EFI the kernel that are only useful when using a debug build of the UEFI firmware +config ARM64_RELOC_TEST + depends on m + tristate "Relocation testing module" + source "drivers/hwtracing/coresight/Kconfig" endmenu diff --git a/arch/arm64/include/asm/bug.h b/arch/arm64/include/asm/bug.h index 561190d15881..0bfe1df12b19 100644 --- a/arch/arm64/include/asm/bug.h +++ b/arch/arm64/include/asm/bug.h @@ -20,9 +20,6 @@ #include <asm/brk-imm.h> -#ifdef CONFIG_GENERIC_BUG -#define HAVE_ARCH_BUG - #ifdef CONFIG_DEBUG_BUGVERBOSE #define _BUGVERBOSE_LOCATION(file, line) __BUGVERBOSE_LOCATION(file, line) #define __BUGVERBOSE_LOCATION(file, line) \ @@ -36,28 +33,36 @@ #define _BUGVERBOSE_LOCATION(file, line) #endif -#define _BUG_FLAGS(flags) __BUG_FLAGS(flags) +#ifdef CONFIG_GENERIC_BUG -#define __BUG_FLAGS(flags) asm volatile ( \ +#define __BUG_ENTRY(flags) \ ".pushsection __bug_table,\"a\"\n\t" \ ".align 2\n\t" \ "0: .long 1f - 0b\n\t" \ _BUGVERBOSE_LOCATION(__FILE__, __LINE__) \ ".short " #flags "\n\t" \ ".popsection\n" \ - \ - "1: brk %[imm]" \ - :: [imm] "i" (BUG_BRK_IMM) \ -) + "1: " +#else +#define __BUG_ENTRY(flags) "" +#endif + +#define __BUG_FLAGS(flags) \ + asm volatile ( \ + __BUG_ENTRY(flags) \ + "brk %[imm]" :: [imm] "i" (BUG_BRK_IMM) \ + ); -#define BUG() do { \ - _BUG_FLAGS(0); \ - unreachable(); \ + +#define BUG() do { \ + __BUG_FLAGS(0); \ + unreachable(); \ } while (0) -#define __WARN_TAINT(taint) _BUG_FLAGS(BUGFLAG_TAINT(taint)) +#define __WARN_TAINT(taint) \ + __BUG_FLAGS(BUGFLAG_TAINT(taint)) -#endif /* ! CONFIG_GENERIC_BUG */ +#define HAVE_ARCH_BUG #include <asm-generic/bug.h> diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h index 5082b30bc2c0..ea9bb4e0e9bb 100644 --- a/arch/arm64/include/asm/cache.h +++ b/arch/arm64/include/asm/cache.h @@ -16,7 +16,18 @@ #ifndef __ASM_CACHE_H #define __ASM_CACHE_H -#include <asm/cachetype.h> +#include <asm/cputype.h> + +#define CTR_L1IP_SHIFT 14 +#define CTR_L1IP_MASK 3 +#define CTR_CWG_SHIFT 24 +#define CTR_CWG_MASK 15 + +#define CTR_L1IP(ctr) (((ctr) >> CTR_L1IP_SHIFT) & CTR_L1IP_MASK) + +#define ICACHE_POLICY_VPIPT 0 +#define ICACHE_POLICY_VIPT 2 +#define ICACHE_POLICY_PIPT 3 #define L1_CACHE_SHIFT 7 #define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) @@ -32,6 +43,31 @@ #ifndef __ASSEMBLY__ +#include <linux/bitops.h> + +#define ICACHEF_ALIASING 0 +#define ICACHEF_VPIPT 1 +extern unsigned long __icache_flags; + +/* + * Whilst the D-side always behaves as PIPT on AArch64, aliasing is + * permitted in the I-cache. + */ +static inline int icache_is_aliasing(void) +{ + return test_bit(ICACHEF_ALIASING, &__icache_flags); +} + +static inline int icache_is_vpipt(void) +{ + return test_bit(ICACHEF_VPIPT, &__icache_flags); +} + +static inline u32 cache_type_cwg(void) +{ + return (read_cpuid_cachetype() >> CTR_CWG_SHIFT) & CTR_CWG_MASK; +} + #define __read_mostly __attribute__((__section__(".data..read_mostly"))) static inline int cache_line_size(void) diff --git a/arch/arm64/include/asm/cachetype.h b/arch/arm64/include/asm/cachetype.h deleted file mode 100644 index f5588692f1d4..000000000000 --- a/arch/arm64/include/asm/cachetype.h +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Copyright (C) 2012 ARM Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ -#ifndef __ASM_CACHETYPE_H -#define __ASM_CACHETYPE_H - -#include <asm/cputype.h> - -#define CTR_L1IP_SHIFT 14 -#define CTR_L1IP_MASK 3 -#define CTR_CWG_SHIFT 24 -#define CTR_CWG_MASK 15 - -#define ICACHE_POLICY_RESERVED 0 -#define ICACHE_POLICY_AIVIVT 1 -#define ICACHE_POLICY_VIPT 2 -#define ICACHE_POLICY_PIPT 3 - -#ifndef __ASSEMBLY__ - -#include <linux/bitops.h> - -#define CTR_L1IP(ctr) (((ctr) >> CTR_L1IP_SHIFT) & CTR_L1IP_MASK) - -#define ICACHEF_ALIASING 0 -#define ICACHEF_AIVIVT 1 - -extern unsigned long __icache_flags; - -/* - * NumSets, bits[27:13] - (Number of sets in cache) - 1 - * Associativity, bits[12:3] - (Associativity of cache) - 1 - * LineSize, bits[2:0] - (Log2(Number of words in cache line)) - 2 - */ -#define CCSIDR_EL1_WRITE_THROUGH BIT(31) -#define CCSIDR_EL1_WRITE_BACK BIT(30) -#define CCSIDR_EL1_READ_ALLOCATE BIT(29) -#define CCSIDR_EL1_WRITE_ALLOCATE BIT(28) -#define CCSIDR_EL1_LINESIZE_MASK 0x7 -#define CCSIDR_EL1_LINESIZE(x) ((x) & CCSIDR_EL1_LINESIZE_MASK) -#define CCSIDR_EL1_ASSOCIATIVITY_SHIFT 3 -#define CCSIDR_EL1_ASSOCIATIVITY_MASK 0x3ff -#define CCSIDR_EL1_ASSOCIATIVITY(x) \ - (((x) >> CCSIDR_EL1_ASSOCIATIVITY_SHIFT) & CCSIDR_EL1_ASSOCIATIVITY_MASK) -#define CCSIDR_EL1_NUMSETS_SHIFT 13 -#define CCSIDR_EL1_NUMSETS_MASK 0x7fff -#define CCSIDR_EL1_NUMSETS(x) \ - (((x) >> CCSIDR_EL1_NUMSETS_SHIFT) & CCSIDR_EL1_NUMSETS_MASK) - -#define CACHE_LINESIZE(x) (16 << CCSIDR_EL1_LINESIZE(x)) -#define CACHE_NUMSETS(x) (CCSIDR_EL1_NUMSETS(x) + 1) -#define CACHE_ASSOCIATIVITY(x) (CCSIDR_EL1_ASSOCIATIVITY(x) + 1) - -extern u64 __attribute_const__ cache_get_ccsidr(u64 csselr); - -/* Helpers for Level 1 Instruction cache csselr = 1L */ -static inline int icache_get_linesize(void) -{ - return CACHE_LINESIZE(cache_get_ccsidr(1L)); -} - -static inline int icache_get_numsets(void) -{ - return CACHE_NUMSETS(cache_get_ccsidr(1L)); -} - -/* - * Whilst the D-side always behaves as PIPT on AArch64, aliasing is - * permitted in the I-cache. - */ -static inline int icache_is_aliasing(void) -{ - return test_bit(ICACHEF_ALIASING, &__icache_flags); -} - -static inline int icache_is_aivivt(void) -{ - return test_bit(ICACHEF_AIVIVT, &__icache_flags); -} - -static inline u32 cache_type_cwg(void) -{ - return (read_cpuid_cachetype() >> CTR_CWG_SHIFT) & CTR_CWG_MASK; -} - -#endif /* __ASSEMBLY__ */ - -#endif /* __ASM_CACHETYPE_H */ diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 05310ad8c5ab..e7f84a7b4465 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -226,7 +226,7 @@ void update_cpu_errata_workarounds(void); void __init enable_errata_workarounds(void); void verify_local_cpu_errata_workarounds(void); -u64 read_system_reg(u32 id); +u64 read_sanitised_ftr_reg(u32 id); static inline bool cpu_supports_mixed_endian_el0(void) { @@ -240,7 +240,7 @@ static inline bool system_supports_32bit_el0(void) static inline bool system_supports_mixed_endian_el0(void) { - return id_aa64mmfr0_mixed_endian_el0(read_system_reg(SYS_ID_AA64MMFR0_EL1)); + return id_aa64mmfr0_mixed_endian_el0(read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1)); } static inline bool system_supports_fpsimd(void) @@ -251,7 +251,7 @@ static inline bool system_supports_fpsimd(void) static inline bool system_uses_ttbr0_pan(void) { return IS_ENABLED(CONFIG_ARM64_SW_TTBR0_PAN) && - !cpus_have_cap(ARM64_HAS_PAN); + !cpus_have_const_cap(ARM64_HAS_PAN); } #endif /* __ASSEMBLY__ */ diff --git a/arch/arm64/include/asm/hw_breakpoint.h b/arch/arm64/include/asm/hw_breakpoint.h index b6b167ac082b..41770766d964 100644 --- a/arch/arm64/include/asm/hw_breakpoint.h +++ b/arch/arm64/include/asm/hw_breakpoint.h @@ -149,7 +149,7 @@ static inline void ptrace_hw_copy_thread(struct task_struct *task) /* Determine number of BRP registers available. */ static inline int get_num_brps(void) { - u64 dfr0 = read_system_reg(SYS_ID_AA64DFR0_EL1); + u64 dfr0 = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1); return 1 + cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_BRPS_SHIFT); @@ -158,7 +158,7 @@ static inline int get_num_brps(void) /* Determine number of WRP registers available. */ static inline int get_num_wrps(void) { - u64 dfr0 = read_system_reg(SYS_ID_AA64DFR0_EL1); + u64 dfr0 = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1); return 1 + cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_WRPS_SHIFT); diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index f21fd3894370..e7705e7bb07b 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -30,8 +30,7 @@ #define __KVM_HAVE_ARCH_INTC_INITIALIZED -#define KVM_USER_MEM_SLOTS 32 -#define KVM_PRIVATE_MEM_SLOTS 4 +#define KVM_USER_MEM_SLOTS 512 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 #define KVM_HALT_POLL_NS_DEFAULT 500000 diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index ed1246014901..2bc6ffa7b89b 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -108,7 +108,7 @@ alternative_else_nop_endif #else #include <asm/pgalloc.h> -#include <asm/cachetype.h> +#include <asm/cache.h> #include <asm/cacheflush.h> #include <asm/mmu_context.h> #include <asm/pgtable.h> @@ -242,12 +242,13 @@ static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu, kvm_flush_dcache_to_poc(va, size); - if (!icache_is_aliasing()) { /* PIPT */ - flush_icache_range((unsigned long)va, - (unsigned long)va + size); - } else if (!icache_is_aivivt()) { /* non ASID-tagged VIVT */ + if (icache_is_aliasing()) { /* any kind of VIPT cache */ __flush_icache_all(); + } else if (is_kernel_in_hyp_mode() || !icache_is_vpipt()) { + /* PIPT or VPIPT at EL2 (see comment in __kvm_tlb_flush_vmid_ipa) */ + flush_icache_range((unsigned long)va, + (unsigned long)va + size); } } @@ -307,7 +308,7 @@ static inline void __kvm_extend_hypmap(pgd_t *boot_hyp_pgd, static inline unsigned int kvm_get_vmid_bits(void) { - int reg = read_system_reg(SYS_ID_AA64MMFR1_EL1); + int reg = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); return (cpuid_feature_extract_unsigned_field(reg, ID_AA64MMFR1_VMIDBITS_SHIFT) == 2) ? 16 : 8; } diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index 47619411f0ff..5468c834b072 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -37,5 +37,6 @@ extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot, bool page_mappings_only); extern void *fixmap_remap_fdt(phys_addr_t dt_phys); +extern void mark_linear_text_alias_ro(void); #endif diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h index 06ff7fd9e81f..e12af6754634 100644 --- a/arch/arm64/include/asm/module.h +++ b/arch/arm64/include/asm/module.h @@ -17,7 +17,6 @@ #define __ASM_MODULE_H #include <asm-generic/module.h> -#include <asm/memory.h> #define MODULE_ARCH_VERMAGIC "aarch64" @@ -33,10 +32,6 @@ u64 module_emit_plt_entry(struct module *mod, const Elf64_Rela *rela, Elf64_Sym *sym); #ifdef CONFIG_RANDOMIZE_BASE -#ifdef CONFIG_MODVERSIONS -#define ARCH_RELOCATES_KCRCTAB -#define reloc_start (kimage_vaddr - KIMAGE_VADDR) -#endif extern u64 module_alloc_base; #else #define module_alloc_base ((u64)_etext - MODULES_VSIZE) diff --git a/arch/arm64/include/asm/pgtable-types.h b/arch/arm64/include/asm/pgtable-types.h index 69b2fd41503c..345a072b5856 100644 --- a/arch/arm64/include/asm/pgtable-types.h +++ b/arch/arm64/include/asm/pgtable-types.h @@ -55,9 +55,13 @@ typedef struct { pteval_t pgprot; } pgprot_t; #define __pgprot(x) ((pgprot_t) { (x) } ) #if CONFIG_PGTABLE_LEVELS == 2 +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopmd.h> #elif CONFIG_PGTABLE_LEVELS == 3 +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopud.h> +#elif CONFIG_PGTABLE_LEVELS == 4 +#include <asm-generic/5level-fixup.h> #endif #endif /* __ASM_PGTABLE_TYPES_H */ diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 0eef6064bf3b..c213fdbd056c 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -74,6 +74,16 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; #define pte_user_exec(pte) (!(pte_val(pte) & PTE_UXN)) #define pte_cont(pte) (!!(pte_val(pte) & PTE_CONT)) +#define pte_cont_addr_end(addr, end) \ +({ unsigned long __boundary = ((addr) + CONT_PTE_SIZE) & CONT_PTE_MASK; \ + (__boundary - 1 < (end) - 1) ? __boundary : (end); \ +}) + +#define pmd_cont_addr_end(addr, end) \ +({ unsigned long __boundary = ((addr) + CONT_PMD_SIZE) & CONT_PMD_MASK; \ + (__boundary - 1 < (end) - 1) ? __boundary : (end); \ +}) + #ifdef CONFIG_ARM64_HW_AFDBM #define pte_hw_dirty(pte) (pte_write(pte) && !(pte_val(pte) & PTE_RDONLY)) #else diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index c97b8bd2acba..9428b93fefb2 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -50,6 +50,7 @@ extern phys_addr_t arm64_dma_phys_limit; #define ARCH_LOW_ADDRESS_LIMIT (arm64_dma_phys_limit - 1) struct debug_info { +#ifdef CONFIG_HAVE_HW_BREAKPOINT /* Have we suspended stepping by a debugger? */ int suspended_step; /* Allow breakpoints and watchpoints to be disabled for this thread. */ @@ -58,6 +59,7 @@ struct debug_info { /* Hardware breakpoints pinned to this task. */ struct perf_event *hbp_break[ARM_MAX_BRP]; struct perf_event *hbp_watch[ARM_MAX_WRP]; +#endif }; struct cpu_context { diff --git a/arch/arm64/include/asm/sections.h b/arch/arm64/include/asm/sections.h index 4e7e7067afdb..941267caa39c 100644 --- a/arch/arm64/include/asm/sections.h +++ b/arch/arm64/include/asm/sections.h @@ -24,6 +24,8 @@ extern char __hibernate_exit_text_start[], __hibernate_exit_text_end[]; extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[]; extern char __hyp_text_start[], __hyp_text_end[]; extern char __idmap_text_start[], __idmap_text_end[]; +extern char __initdata_begin[], __initdata_end[]; +extern char __inittext_begin[], __inittext_end[]; extern char __irqentry_text_start[], __irqentry_text_end[]; extern char __mmuoff_data_start[], __mmuoff_data_end[]; diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 128eae8cc97e..15c142ce991c 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -304,6 +304,11 @@ #define ID_AA64ISAR0_SHA1_SHIFT 8 #define ID_AA64ISAR0_AES_SHIFT 4 +/* id_aa64isar1 */ +#define ID_AA64ISAR1_LRCPC_SHIFT 20 +#define ID_AA64ISAR1_FCMA_SHIFT 16 +#define ID_AA64ISAR1_JSCVT_SHIFT 12 + /* id_aa64pfr0 */ #define ID_AA64PFR0_GIC_SHIFT 24 #define ID_AA64PFR0_ASIMD_SHIFT 20 diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index 61c263cba272..4e187ce2a811 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -32,5 +32,8 @@ #define HWCAP_ASIMDHP (1 << 10) #define HWCAP_CPUID (1 << 11) #define HWCAP_ASIMDRDM (1 << 12) +#define HWCAP_JSCVT (1 << 13) +#define HWCAP_FCMA (1 << 14) +#define HWCAP_LRCPC (1 << 15) #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 1606c6b2a280..aaaf06b117e3 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -50,6 +50,8 @@ arm64-obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o arm64-obj-$(CONFIG_HIBERNATION) += hibernate.o hibernate-asm.o arm64-obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o \ cpu-reset.o +arm64-obj-$(CONFIG_ARM64_RELOC_TEST) += arm64-reloc-test.o +arm64-reloc-test-y := reloc_test_core.o reloc_test_syms.o obj-y += $(arm64-obj-y) vdso/ probes/ obj-m += $(arm64-obj-m) diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index 06d650f61da7..8840c109c5d6 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -105,11 +105,11 @@ static u32 get_alt_insn(struct alt_instr *alt, u32 *insnptr, u32 *altinsnptr) return insn; } -static void __apply_alternatives(void *alt_region) +static void __apply_alternatives(void *alt_region, bool use_linear_alias) { struct alt_instr *alt; struct alt_region *region = alt_region; - u32 *origptr, *replptr; + u32 *origptr, *replptr, *updptr; for (alt = region->begin; alt < region->end; alt++) { u32 insn; @@ -124,11 +124,12 @@ static void __apply_alternatives(void *alt_region) origptr = ALT_ORIG_PTR(alt); replptr = ALT_REPL_PTR(alt); + updptr = use_linear_alias ? (u32 *)lm_alias(origptr) : origptr; nr_inst = alt->alt_len / sizeof(insn); for (i = 0; i < nr_inst; i++) { insn = get_alt_insn(alt, origptr + i, replptr + i); - *(origptr + i) = cpu_to_le32(insn); + updptr[i] = cpu_to_le32(insn); } flush_icache_range((uintptr_t)origptr, @@ -155,7 +156,7 @@ static int __apply_alternatives_multi_stop(void *unused) isb(); } else { BUG_ON(patched); - __apply_alternatives(®ion); + __apply_alternatives(®ion, true); /* Barriers provided by the cache flushing */ WRITE_ONCE(patched, 1); } @@ -176,5 +177,5 @@ void apply_alternatives(void *start, size_t length) .end = start + length, }; - __apply_alternatives(®ion); + __apply_alternatives(®ion, false); } diff --git a/arch/arm64/kernel/cacheinfo.c b/arch/arm64/kernel/cacheinfo.c index 3f2250fc391b..380f2e2fbed5 100644 --- a/arch/arm64/kernel/cacheinfo.c +++ b/arch/arm64/kernel/cacheinfo.c @@ -17,15 +17,9 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ -#include <linux/bitops.h> #include <linux/cacheinfo.h> -#include <linux/cpu.h> -#include <linux/compiler.h> #include <linux/of.h> -#include <asm/cachetype.h> -#include <asm/processor.h> - #define MAX_CACHE_LEVEL 7 /* Max 7 level supported */ /* Ctypen, bits[3(n - 1) + 2 : 3(n - 1)], for n = 1 to 7 */ #define CLIDR_CTYPE_SHIFT(level) (3 * (level - 1)) @@ -43,43 +37,11 @@ static inline enum cache_type get_cache_type(int level) return CLIDR_CTYPE(clidr, level); } -/* - * Cache Size Selection Register(CSSELR) selects which Cache Size ID - * Register(CCSIDR) is accessible by specifying the required cache - * level and the cache type. We need to ensure that no one else changes - * CSSELR by calling this in non-preemtible context - */ -u64 __attribute_const__ cache_get_ccsidr(u64 csselr) -{ - u64 ccsidr; - - WARN_ON(preemptible()); - - write_sysreg(csselr, csselr_el1); - isb(); - ccsidr = read_sysreg(ccsidr_el1); - - return ccsidr; -} - static void ci_leaf_init(struct cacheinfo *this_leaf, enum cache_type type, unsigned int level) { - bool is_icache = type & CACHE_TYPE_INST; - u64 tmp = cache_get_ccsidr((level - 1) << 1 | is_icache); - this_leaf->level = level; this_leaf->type = type; - this_leaf->coherency_line_size = CACHE_LINESIZE(tmp); - this_leaf->number_of_sets = CACHE_NUMSETS(tmp); - this_leaf->ways_of_associativity = CACHE_ASSOCIATIVITY(tmp); - this_leaf->size = this_leaf->number_of_sets * - this_leaf->coherency_line_size * this_leaf->ways_of_associativity; - this_leaf->attributes = - ((tmp & CCSIDR_EL1_WRITE_THROUGH) ? CACHE_WRITE_THROUGH : 0) | - ((tmp & CCSIDR_EL1_WRITE_BACK) ? CACHE_WRITE_BACK : 0) | - ((tmp & CCSIDR_EL1_READ_ALLOCATE) ? CACHE_READ_ALLOCATE : 0) | - ((tmp & CCSIDR_EL1_WRITE_ALLOCATE) ? CACHE_WRITE_ALLOCATE : 0); } static int __init_cache_level(unsigned int cpu) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index abda8e861865..3e0d4f1ab5da 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -97,6 +97,13 @@ static const struct arm64_ftr_bits ftr_id_aa64isar0[] = { ARM64_FTR_END, }; +static const struct arm64_ftr_bits ftr_id_aa64isar1[] = { + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_LRCPC_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_FCMA_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_JSCVT_SHIFT, 4, 0), + ARM64_FTR_END, +}; + static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64PFR0_GIC_SHIFT, 4, 0), S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_ASIMD_SHIFT, 4, ID_AA64PFR0_ASIMD_NI), @@ -153,9 +160,9 @@ static const struct arm64_ftr_bits ftr_ctr[] = { /* * Linux can handle differing I-cache policies. Userspace JITs will * make use of *minLine. - * If we have differing I-cache policies, report it as the weakest - AIVIVT. + * If we have differing I-cache policies, report it as the weakest - VIPT. */ - ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_EXACT, 14, 2, ICACHE_POLICY_AIVIVT), /* L1Ip */ + ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_EXACT, 14, 2, ICACHE_POLICY_VIPT), /* L1Ip */ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* IminLine */ ARM64_FTR_END, }; @@ -314,7 +321,7 @@ static const struct __ftr_reg_entry { /* Op1 = 0, CRn = 0, CRm = 6 */ ARM64_FTR_REG(SYS_ID_AA64ISAR0_EL1, ftr_id_aa64isar0), - ARM64_FTR_REG(SYS_ID_AA64ISAR1_EL1, ftr_raz), + ARM64_FTR_REG(SYS_ID_AA64ISAR1_EL1, ftr_id_aa64isar1), /* Op1 = 0, CRn = 0, CRm = 7 */ ARM64_FTR_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0), @@ -585,7 +592,7 @@ void update_cpu_features(int cpu, * If we have AArch32, we care about 32-bit features for compat. * If the system doesn't support AArch32, don't update them. */ - if (id_aa64pfr0_32bit_el0(read_system_reg(SYS_ID_AA64PFR0_EL1)) && + if (id_aa64pfr0_32bit_el0(read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1)) && id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) { taint |= check_update_ftr_reg(SYS_ID_DFR0_EL1, cpu, @@ -636,7 +643,7 @@ void update_cpu_features(int cpu, "Unsupported CPU feature variation.\n"); } -u64 read_system_reg(u32 id) +u64 read_sanitised_ftr_reg(u32 id) { struct arm64_ftr_reg *regp = get_arm64_ftr_reg(id); @@ -649,10 +656,10 @@ u64 read_system_reg(u32 id) case r: return read_sysreg_s(r) /* - * __raw_read_system_reg() - Used by a STARTING cpu before cpuinfo is populated. + * __read_sysreg_by_encoding() - Used by a STARTING cpu before cpuinfo is populated. * Read the system register on the current CPU */ -static u64 __raw_read_system_reg(u32 sys_id) +static u64 __read_sysreg_by_encoding(u32 sys_id) { switch (sys_id) { read_sysreg_case(SYS_ID_PFR0_EL1); @@ -709,9 +716,9 @@ has_cpuid_feature(const struct arm64_cpu_capabilities *entry, int scope) WARN_ON(scope == SCOPE_LOCAL_CPU && preemptible()); if (scope == SCOPE_SYSTEM) - val = read_system_reg(entry->sys_reg); + val = read_sanitised_ftr_reg(entry->sys_reg); else - val = __raw_read_system_reg(entry->sys_reg); + val = __read_sysreg_by_encoding(entry->sys_reg); return feature_matches(val, entry); } @@ -761,7 +768,7 @@ static bool hyp_offset_low(const struct arm64_cpu_capabilities *entry, static bool has_no_fpsimd(const struct arm64_cpu_capabilities *entry, int __unused) { - u64 pfr0 = read_system_reg(SYS_ID_AA64PFR0_EL1); + u64 pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1); return cpuid_feature_extract_signed_field(pfr0, ID_AA64PFR0_FP_SHIFT) < 0; @@ -888,6 +895,9 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_FPHP), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_ASIMD), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_ASIMDHP), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_JSCVT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_JSCVT), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FCMA_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_FCMA), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_LRCPC), {}, }; diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c index 75a0f8acef66..fd691087dc9a 100644 --- a/arch/arm64/kernel/cpuidle.c +++ b/arch/arm64/kernel/cpuidle.c @@ -30,7 +30,7 @@ int arm_cpuidle_init(unsigned int cpu) } /** - * cpu_suspend() - function to enter a low-power idle state + * arm_cpuidle_suspend() - function to enter a low-power idle state * @arg: argument to pass to CPU suspend operations * * Return: 0 on success, -EOPNOTSUPP if CPU suspend hook not initialized, CPU diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 5b22c687f02a..68b1f364c515 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -15,7 +15,7 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ #include <asm/arch_timer.h> -#include <asm/cachetype.h> +#include <asm/cache.h> #include <asm/cpu.h> #include <asm/cputype.h> #include <asm/cpufeature.h> @@ -43,10 +43,10 @@ DEFINE_PER_CPU(struct cpuinfo_arm64, cpu_data); static struct cpuinfo_arm64 boot_cpu_data; static char *icache_policy_str[] = { - [ICACHE_POLICY_RESERVED] = "RESERVED/UNKNOWN", - [ICACHE_POLICY_AIVIVT] = "AIVIVT", - [ICACHE_POLICY_VIPT] = "VIPT", - [ICACHE_POLICY_PIPT] = "PIPT", + [0 ... ICACHE_POLICY_PIPT] = "RESERVED/UNKNOWN", + [ICACHE_POLICY_VIPT] = "VIPT", + [ICACHE_POLICY_PIPT] = "PIPT", + [ICACHE_POLICY_VPIPT] = "VPIPT", }; unsigned long __icache_flags; @@ -65,6 +65,9 @@ static const char *const hwcap_str[] = { "asimdhp", "cpuid", "asimdrdm", + "jscvt", + "fcma", + "lrcpc", NULL }; @@ -289,20 +292,18 @@ static void cpuinfo_detect_icache_policy(struct cpuinfo_arm64 *info) unsigned int cpu = smp_processor_id(); u32 l1ip = CTR_L1IP(info->reg_ctr); - if (l1ip != ICACHE_POLICY_PIPT) { - /* - * VIPT caches are non-aliasing if the VA always equals the PA - * in all bit positions that are covered by the index. This is - * the case if the size of a way (# of sets * line size) does - * not exceed PAGE_SIZE. - */ - u32 waysize = icache_get_numsets() * icache_get_linesize(); - - if (l1ip != ICACHE_POLICY_VIPT || waysize > PAGE_SIZE) - set_bit(ICACHEF_ALIASING, &__icache_flags); + switch (l1ip) { + case ICACHE_POLICY_PIPT: + break; + case ICACHE_POLICY_VPIPT: + set_bit(ICACHEF_VPIPT, &__icache_flags); + break; + default: + /* Fallthrough */ + case ICACHE_POLICY_VIPT: + /* Assume aliasing */ + set_bit(ICACHEF_ALIASING, &__icache_flags); } - if (l1ip == ICACHE_POLICY_AIVIVT) - set_bit(ICACHEF_AIVIVT, &__icache_flags); pr_info("Detected %s I-cache on CPU%d\n", icache_policy_str[l1ip], cpu); } diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 32913567da08..d618e25c3de1 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -36,7 +36,7 @@ /* Determine debug architecture. */ u8 debug_monitors_arch(void) { - return cpuid_feature_extract_unsigned_field(read_system_reg(SYS_ID_AA64DFR0_EL1), + return cpuid_feature_extract_unsigned_field(read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1), ID_AA64DFR0_DEBUGVER_SHIFT); } diff --git a/arch/arm64/kernel/efi-header.S b/arch/arm64/kernel/efi-header.S new file mode 100644 index 000000000000..613fc3000677 --- /dev/null +++ b/arch/arm64/kernel/efi-header.S @@ -0,0 +1,155 @@ +/* + * Copyright (C) 2013 - 2017 Linaro, Ltd. + * Copyright (C) 2013, 2014 Red Hat, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/pe.h> +#include <linux/sizes.h> + + .macro __EFI_PE_HEADER + .long PE_MAGIC +coff_header: + .short IMAGE_FILE_MACHINE_ARM64 // Machine + .short section_count // NumberOfSections + .long 0 // TimeDateStamp + .long 0 // PointerToSymbolTable + .long 0 // NumberOfSymbols + .short section_table - optional_header // SizeOfOptionalHeader + .short IMAGE_FILE_DEBUG_STRIPPED | \ + IMAGE_FILE_EXECUTABLE_IMAGE | \ + IMAGE_FILE_LINE_NUMS_STRIPPED // Characteristics + +optional_header: + .short PE_OPT_MAGIC_PE32PLUS // PE32+ format + .byte 0x02 // MajorLinkerVersion + .byte 0x14 // MinorLinkerVersion + .long __initdata_begin - efi_header_end // SizeOfCode + .long __pecoff_data_size // SizeOfInitializedData + .long 0 // SizeOfUninitializedData + .long __efistub_entry - _head // AddressOfEntryPoint + .long efi_header_end - _head // BaseOfCode + +extra_header_fields: + .quad 0 // ImageBase + .long SZ_4K // SectionAlignment + .long PECOFF_FILE_ALIGNMENT // FileAlignment + .short 0 // MajorOperatingSystemVersion + .short 0 // MinorOperatingSystemVersion + .short 0 // MajorImageVersion + .short 0 // MinorImageVersion + .short 0 // MajorSubsystemVersion + .short 0 // MinorSubsystemVersion + .long 0 // Win32VersionValue + + .long _end - _head // SizeOfImage + + // Everything before the kernel image is considered part of the header + .long efi_header_end - _head // SizeOfHeaders + .long 0 // CheckSum + .short IMAGE_SUBSYSTEM_EFI_APPLICATION // Subsystem + .short 0 // DllCharacteristics + .quad 0 // SizeOfStackReserve + .quad 0 // SizeOfStackCommit + .quad 0 // SizeOfHeapReserve + .quad 0 // SizeOfHeapCommit + .long 0 // LoaderFlags + .long (section_table - .) / 8 // NumberOfRvaAndSizes + + .quad 0 // ExportTable + .quad 0 // ImportTable + .quad 0 // ResourceTable + .quad 0 // ExceptionTable + .quad 0 // CertificationTable + .quad 0 // BaseRelocationTable + +#ifdef CONFIG_DEBUG_EFI + .long efi_debug_table - _head // DebugTable + .long efi_debug_table_size +#endif + + // Section table +section_table: + .ascii ".text\0\0\0" + .long __initdata_begin - efi_header_end // VirtualSize + .long efi_header_end - _head // VirtualAddress + .long __initdata_begin - efi_header_end // SizeOfRawData + .long efi_header_end - _head // PointerToRawData + + .long 0 // PointerToRelocations + .long 0 // PointerToLineNumbers + .short 0 // NumberOfRelocations + .short 0 // NumberOfLineNumbers + .long IMAGE_SCN_CNT_CODE | \ + IMAGE_SCN_MEM_READ | \ + IMAGE_SCN_MEM_EXECUTE // Characteristics + + .ascii ".data\0\0\0" + .long __pecoff_data_size // VirtualSize + .long __initdata_begin - _head // VirtualAddress + .long __pecoff_data_rawsize // SizeOfRawData + .long __initdata_begin - _head // PointerToRawData + + .long 0 // PointerToRelocations + .long 0 // PointerToLineNumbers + .short 0 // NumberOfRelocations + .short 0 // NumberOfLineNumbers + .long IMAGE_SCN_CNT_INITIALIZED_DATA | \ + IMAGE_SCN_MEM_READ | \ + IMAGE_SCN_MEM_WRITE // Characteristics + + .set section_count, (. - section_table) / 40 + +#ifdef CONFIG_DEBUG_EFI + /* + * The debug table is referenced via its Relative Virtual Address (RVA), + * which is only defined for those parts of the image that are covered + * by a section declaration. Since this header is not covered by any + * section, the debug table must be emitted elsewhere. So stick it in + * the .init.rodata section instead. + * + * Note that the EFI debug entry itself may legally have a zero RVA, + * which means we can simply put it right after the section headers. + */ + __INITRODATA + + .align 2 +efi_debug_table: + // EFI_IMAGE_DEBUG_DIRECTORY_ENTRY + .long 0 // Characteristics + .long 0 // TimeDateStamp + .short 0 // MajorVersion + .short 0 // MinorVersion + .long IMAGE_DEBUG_TYPE_CODEVIEW // Type + .long efi_debug_entry_size // SizeOfData + .long 0 // RVA + .long efi_debug_entry - _head // FileOffset + + .set efi_debug_table_size, . - efi_debug_table + .previous + +efi_debug_entry: + // EFI_IMAGE_DEBUG_CODEVIEW_NB10_ENTRY + .ascii "NB10" // Signature + .long 0 // Unknown + .long 0 // Unknown2 + .long 0 // Unknown3 + + .asciz VMLINUX_PATH + + .set efi_debug_entry_size, . - efi_debug_entry +#endif + + /* + * EFI will load .text onwards at the 4k section alignment + * described in the PE/COFF header. To ensure that instruction + * sequences using an adrp and a :lo12: immediate will function + * correctly at this alignment, we must ensure that .text is + * placed at a 4k boundary in the Image to begin with. + */ + .align 12 +efi_header_end: + .endm diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 95ae40ac3f40..973df7de7bf8 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -42,6 +42,8 @@ #include <asm/thread_info.h> #include <asm/virt.h> +#include "efi-header.S" + #define __PHYS_OFFSET (KERNEL_START - TEXT_OFFSET) #if (TEXT_OFFSET & 0xfff) != 0 @@ -89,166 +91,14 @@ _head: .quad 0 // reserved .quad 0 // reserved .quad 0 // reserved - .byte 0x41 // Magic number, "ARM\x64" - .byte 0x52 - .byte 0x4d - .byte 0x64 + .ascii "ARM\x64" // Magic number #ifdef CONFIG_EFI .long pe_header - _head // Offset to the PE header. -#else - .word 0 // reserved -#endif -#ifdef CONFIG_EFI - .align 3 pe_header: - .ascii "PE" - .short 0 -coff_header: - .short 0xaa64 // AArch64 - .short 2 // nr_sections - .long 0 // TimeDateStamp - .long 0 // PointerToSymbolTable - .long 1 // NumberOfSymbols - .short section_table - optional_header // SizeOfOptionalHeader - .short 0x206 // Characteristics. - // IMAGE_FILE_DEBUG_STRIPPED | - // IMAGE_FILE_EXECUTABLE_IMAGE | - // IMAGE_FILE_LINE_NUMS_STRIPPED -optional_header: - .short 0x20b // PE32+ format - .byte 0x02 // MajorLinkerVersion - .byte 0x14 // MinorLinkerVersion - .long _end - efi_header_end // SizeOfCode - .long 0 // SizeOfInitializedData - .long 0 // SizeOfUninitializedData - .long __efistub_entry - _head // AddressOfEntryPoint - .long efi_header_end - _head // BaseOfCode - -extra_header_fields: - .quad 0 // ImageBase - .long 0x1000 // SectionAlignment - .long PECOFF_FILE_ALIGNMENT // FileAlignment - .short 0 // MajorOperatingSystemVersion - .short 0 // MinorOperatingSystemVersion - .short 0 // MajorImageVersion - .short 0 // MinorImageVersion - .short 0 // MajorSubsystemVersion - .short 0 // MinorSubsystemVersion - .long 0 // Win32VersionValue - - .long _end - _head // SizeOfImage - - // Everything before the kernel image is considered part of the header - .long efi_header_end - _head // SizeOfHeaders - .long 0 // CheckSum - .short 0xa // Subsystem (EFI application) - .short 0 // DllCharacteristics - .quad 0 // SizeOfStackReserve - .quad 0 // SizeOfStackCommit - .quad 0 // SizeOfHeapReserve - .quad 0 // SizeOfHeapCommit - .long 0 // LoaderFlags - .long (section_table - .) / 8 // NumberOfRvaAndSizes - - .quad 0 // ExportTable - .quad 0 // ImportTable - .quad 0 // ResourceTable - .quad 0 // ExceptionTable - .quad 0 // CertificationTable - .quad 0 // BaseRelocationTable - -#ifdef CONFIG_DEBUG_EFI - .long efi_debug_table - _head // DebugTable - .long efi_debug_table_size -#endif - - // Section table -section_table: - - /* - * The EFI application loader requires a relocation section - * because EFI applications must be relocatable. This is a - * dummy section as far as we are concerned. - */ - .ascii ".reloc" - .byte 0 - .byte 0 // end of 0 padding of section name - .long 0 - .long 0 - .long 0 // SizeOfRawData - .long 0 // PointerToRawData - .long 0 // PointerToRelocations - .long 0 // PointerToLineNumbers - .short 0 // NumberOfRelocations - .short 0 // NumberOfLineNumbers - .long 0x42100040 // Characteristics (section flags) - - - .ascii ".text" - .byte 0 - .byte 0 - .byte 0 // end of 0 padding of section name - .long _end - efi_header_end // VirtualSize - .long efi_header_end - _head // VirtualAddress - .long _edata - efi_header_end // SizeOfRawData - .long efi_header_end - _head // PointerToRawData - - .long 0 // PointerToRelocations (0 for executables) - .long 0 // PointerToLineNumbers (0 for executables) - .short 0 // NumberOfRelocations (0 for executables) - .short 0 // NumberOfLineNumbers (0 for executables) - .long 0xe0500020 // Characteristics (section flags) - -#ifdef CONFIG_DEBUG_EFI - /* - * The debug table is referenced via its Relative Virtual Address (RVA), - * which is only defined for those parts of the image that are covered - * by a section declaration. Since this header is not covered by any - * section, the debug table must be emitted elsewhere. So stick it in - * the .init.rodata section instead. - * - * Note that the EFI debug entry itself may legally have a zero RVA, - * which means we can simply put it right after the section headers. - */ - __INITRODATA - - .align 2 -efi_debug_table: - // EFI_IMAGE_DEBUG_DIRECTORY_ENTRY - .long 0 // Characteristics - .long 0 // TimeDateStamp - .short 0 // MajorVersion - .short 0 // MinorVersion - .long 2 // Type == EFI_IMAGE_DEBUG_TYPE_CODEVIEW - .long efi_debug_entry_size // SizeOfData - .long 0 // RVA - .long efi_debug_entry - _head // FileOffset - - .set efi_debug_table_size, . - efi_debug_table - .previous - -efi_debug_entry: - // EFI_IMAGE_DEBUG_CODEVIEW_NB10_ENTRY - .ascii "NB10" // Signature - .long 0 // Unknown - .long 0 // Unknown2 - .long 0 // Unknown3 - - .asciz VMLINUX_PATH - - .set efi_debug_entry_size, . - efi_debug_entry -#endif - - /* - * EFI will load .text onwards at the 4k section alignment - * described in the PE/COFF header. To ensure that instruction - * sequences using an adrp and a :lo12: immediate will function - * correctly at this alignment, we must ensure that .text is - * placed at a 4k boundary in the Image to begin with. - */ - .align 12 -efi_header_end: + __EFI_PE_HEADER +#else + .long 0 // reserved #endif __INIT @@ -534,13 +384,8 @@ ENTRY(kimage_vaddr) ENTRY(el2_setup) mrs x0, CurrentEL cmp x0, #CurrentEL_EL2 - b.ne 1f - mrs x0, sctlr_el2 -CPU_BE( orr x0, x0, #(1 << 25) ) // Set the EE bit for EL2 -CPU_LE( bic x0, x0, #(1 << 25) ) // Clear the EE bit for EL2 - msr sctlr_el2, x0 - b 2f -1: mrs x0, sctlr_el1 + b.eq 1f + mrs x0, sctlr_el1 CPU_BE( orr x0, x0, #(3 << 24) ) // Set the EE and E0E bits for EL1 CPU_LE( bic x0, x0, #(3 << 24) ) // Clear the EE and E0E bits for EL1 msr sctlr_el1, x0 @@ -548,7 +393,11 @@ CPU_LE( bic x0, x0, #(3 << 24) ) // Clear the EE and E0E bits for EL1 isb ret -2: +1: mrs x0, sctlr_el2 +CPU_BE( orr x0, x0, #(1 << 25) ) // Set the EE bit for EL2 +CPU_LE( bic x0, x0, #(1 << 25) ) // Clear the EE bit for EL2 + msr sctlr_el2, x0 + #ifdef CONFIG_ARM64_VHE /* * Check for VHE being present. For the rest of the EL2 setup, @@ -612,26 +461,6 @@ set_hcr: msr vpidr_el2, x0 msr vmpidr_el2, x1 - /* - * When VHE is not in use, early init of EL2 and EL1 needs to be - * done here. - * When VHE _is_ in use, EL1 will not be used in the host and - * requires no configuration, and all non-hyp-specific EL2 setup - * will be done via the _EL1 system register aliases in __cpu_setup. - */ - cbnz x2, 1f - - /* sctlr_el1 */ - mov x0, #0x0800 // Set/clear RES{1,0} bits -CPU_BE( movk x0, #0x33d0, lsl #16 ) // Set EE and E0E on BE systems -CPU_LE( movk x0, #0x30d0, lsl #16 ) // Clear EE and E0E on LE systems - msr sctlr_el1, x0 - - /* Coprocessor traps. */ - mov x0, #0x33ff - msr cptr_el2, x0 // Disable copro. traps to EL2 -1: - #ifdef CONFIG_COMPAT msr hstr_el2, xzr // Disable CP15 traps to EL2 #endif @@ -668,6 +497,23 @@ CPU_LE( movk x0, #0x30d0, lsl #16 ) // Clear EE and E0E on LE systems ret install_el2_stub: + /* + * When VHE is not in use, early init of EL2 and EL1 needs to be + * done here. + * When VHE _is_ in use, EL1 will not be used in the host and + * requires no configuration, and all non-hyp-specific EL2 setup + * will be done via the _EL1 system register aliases in __cpu_setup. + */ + /* sctlr_el1 */ + mov x0, #0x0800 // Set/clear RES{1,0} bits +CPU_BE( movk x0, #0x33d0, lsl #16 ) // Set EE and E0E on BE systems +CPU_LE( movk x0, #0x30d0, lsl #16 ) // Clear EE and E0E on LE systems + msr sctlr_el1, x0 + + /* Coprocessor traps. */ + mov x0, #0x33ff + msr cptr_el2, x0 // Disable copro. traps to EL2 + /* Hypervisor stub */ adr_l x0, __hyp_stub_vectors msr vbar_el2, x0 diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index 2a07aae5b8a2..c5c45942fb6e 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -372,12 +372,6 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr) return 0; } -int __kprobes kprobe_exceptions_notify(struct notifier_block *self, - unsigned long val, void *data) -{ - return NOTIFY_DONE; -} - static void __kprobes kprobe_handler(struct pt_regs *regs) { struct kprobe *p, *cur_kprobe; diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 043d373b8369..ae2a835898d7 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -205,12 +205,10 @@ void __show_regs(struct pt_regs *regs) pr_cont("\n"); } - printk("\n"); } void show_regs(struct pt_regs * regs) { - printk("\n"); __show_regs(regs); } diff --git a/arch/arm64/kernel/reloc_test_core.c b/arch/arm64/kernel/reloc_test_core.c new file mode 100644 index 000000000000..c124752a8bd3 --- /dev/null +++ b/arch/arm64/kernel/reloc_test_core.c @@ -0,0 +1,81 @@ +/* + * Copyright (C) 2017 Linaro, Ltd. <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include <linux/module.h> + +int sym64_rel; + +#define SYM64_ABS_VAL 0xffff880000cccccc +#define SYM32_ABS_VAL 0xf800cccc +#define SYM16_ABS_VAL 0xf8cc + +#define __SET_ABS(name, val) asm(".globl " #name "; .set "#name ", " #val) +#define SET_ABS(name, val) __SET_ABS(name, val) + +SET_ABS(sym64_abs, SYM64_ABS_VAL); +SET_ABS(sym32_abs, SYM32_ABS_VAL); +SET_ABS(sym16_abs, SYM16_ABS_VAL); + +asmlinkage u64 absolute_data64(void); +asmlinkage u64 absolute_data32(void); +asmlinkage u64 absolute_data16(void); +asmlinkage u64 signed_movw(void); +asmlinkage u64 unsigned_movw(void); +asmlinkage u64 relative_adrp(void); +asmlinkage u64 relative_adr(void); +asmlinkage u64 relative_data64(void); +asmlinkage u64 relative_data32(void); +asmlinkage u64 relative_data16(void); + +static struct { + char name[32]; + u64 (*f)(void); + u64 expect; +} const funcs[] = { + { "R_AARCH64_ABS64", absolute_data64, UL(SYM64_ABS_VAL) }, + { "R_AARCH64_ABS32", absolute_data32, UL(SYM32_ABS_VAL) }, + { "R_AARCH64_ABS16", absolute_data16, UL(SYM16_ABS_VAL) }, + { "R_AARCH64_MOVW_SABS_Gn", signed_movw, UL(SYM64_ABS_VAL) }, + { "R_AARCH64_MOVW_UABS_Gn", unsigned_movw, UL(SYM64_ABS_VAL) }, +#ifndef CONFIG_ARM64_ERRATUM_843419 + { "R_AARCH64_ADR_PREL_PG_HI21", relative_adrp, (u64)&sym64_rel }, +#endif + { "R_AARCH64_ADR_PREL_LO21", relative_adr, (u64)&sym64_rel }, + { "R_AARCH64_PREL64", relative_data64, (u64)&sym64_rel }, + { "R_AARCH64_PREL32", relative_data32, (u64)&sym64_rel }, + { "R_AARCH64_PREL16", relative_data16, (u64)&sym64_rel }, +}; + +static int reloc_test_init(void) +{ + int i; + + pr_info("Relocation test:\n"); + pr_info("-------------------------------------------------------\n"); + + for (i = 0; i < ARRAY_SIZE(funcs); i++) { + u64 ret = funcs[i].f(); + + pr_info("%-31s 0x%016llx %s\n", funcs[i].name, ret, + ret == funcs[i].expect ? "pass" : "fail"); + if (ret != funcs[i].expect) + pr_err("Relocation failed, expected 0x%016llx, not 0x%016llx\n", + funcs[i].expect, ret); + } + return 0; +} + +static void reloc_test_exit(void) +{ +} + +module_init(reloc_test_init); +module_exit(reloc_test_exit); + +MODULE_LICENSE("GPL v2"); diff --git a/arch/arm64/kernel/reloc_test_syms.S b/arch/arm64/kernel/reloc_test_syms.S new file mode 100644 index 000000000000..e1edcefeb02d --- /dev/null +++ b/arch/arm64/kernel/reloc_test_syms.S @@ -0,0 +1,83 @@ +/* + * Copyright (C) 2017 Linaro, Ltd. <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include <linux/linkage.h> + +ENTRY(absolute_data64) + ldr x0, 0f + ret +0: .quad sym64_abs +ENDPROC(absolute_data64) + +ENTRY(absolute_data32) + ldr w0, 0f + ret +0: .long sym32_abs +ENDPROC(absolute_data32) + +ENTRY(absolute_data16) + adr x0, 0f + ldrh w0, [x0] + ret +0: .short sym16_abs, 0 +ENDPROC(absolute_data16) + +ENTRY(signed_movw) + movz x0, #:abs_g2_s:sym64_abs + movk x0, #:abs_g1_nc:sym64_abs + movk x0, #:abs_g0_nc:sym64_abs + ret +ENDPROC(signed_movw) + +ENTRY(unsigned_movw) + movz x0, #:abs_g3:sym64_abs + movk x0, #:abs_g2_nc:sym64_abs + movk x0, #:abs_g1_nc:sym64_abs + movk x0, #:abs_g0_nc:sym64_abs + ret +ENDPROC(unsigned_movw) + +#ifndef CONFIG_ARM64_ERRATUM_843419 + +ENTRY(relative_adrp) + adrp x0, sym64_rel + add x0, x0, #:lo12:sym64_rel + ret +ENDPROC(relative_adrp) + +#endif + +ENTRY(relative_adr) + adr x0, sym64_rel + ret +ENDPROC(relative_adr) + +ENTRY(relative_data64) + adr x1, 0f + ldr x0, [x1] + add x0, x0, x1 + ret +0: .quad sym64_rel - . +ENDPROC(relative_data64) + +ENTRY(relative_data32) + adr x1, 0f + ldr w0, [x1] + add x0, x0, x1 + ret +0: .long sym64_rel - . +ENDPROC(relative_data32) + +ENTRY(relative_data16) + adr x1, 0f + ldrsh w0, [x1] + add x0, x0, x1 + ret +0: .short sym64_rel - ., 0 +ENDPROC(relative_data16) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index ef1caae02110..d4739552da28 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -434,6 +434,7 @@ void __init smp_cpus_done(unsigned int max_cpus) setup_cpu_features(); hyp_mode_check(); apply_alternatives_all(); + mark_linear_text_alias_ro(); } void __init smp_prepare_boot_cpu(void) diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index b8deffa9e1bf..987a00ee446c 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -143,12 +143,27 @@ SECTIONS . = ALIGN(SEGMENT_ALIGN); __init_begin = .; + __inittext_begin = .; INIT_TEXT_SECTION(8) .exit.text : { ARM_EXIT_KEEP(EXIT_TEXT) } + . = ALIGN(4); + .altinstructions : { + __alt_instructions = .; + *(.altinstructions) + __alt_instructions_end = .; + } + .altinstr_replacement : { + *(.altinstr_replacement) + } + + . = ALIGN(PAGE_SIZE); + __inittext_end = .; + __initdata_begin = .; + .init.data : { INIT_DATA INIT_SETUP(16) @@ -164,15 +179,6 @@ SECTIONS PERCPU_SECTION(L1_CACHE_BYTES) - . = ALIGN(4); - .altinstructions : { - __alt_instructions = .; - *(.altinstructions) - __alt_instructions_end = .; - } - .altinstr_replacement : { - *(.altinstr_replacement) - } .rela : ALIGN(8) { *(.rela .rela*) } @@ -181,6 +187,7 @@ SECTIONS __rela_size = SIZEOF(.rela); . = ALIGN(SEGMENT_ALIGN); + __initdata_end = .; __init_end = .; _data = .; @@ -206,6 +213,7 @@ SECTIONS } PECOFF_EDATA_PADDING + __pecoff_data_rawsize = ABSOLUTE(. - __initdata_begin); _edata = .; BSS_SECTION(0, 0, 0) @@ -221,6 +229,7 @@ SECTIONS . += RESERVED_TTBR0_SIZE; #endif + __pecoff_data_size = ABSOLUTE(. - __initdata_begin); _end = .; STABS_DEBUG diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 1bfe30dfbfe7..fa1b18e364fc 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -135,7 +135,19 @@ static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu, struct kvm_run *run) return ret; } +static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + u32 hsr = kvm_vcpu_get_hsr(vcpu); + + kvm_pr_unimpl("Unknown exception class: hsr: %#08x -- %s\n", + hsr, esr_get_class_string(hsr)); + + kvm_inject_undefined(vcpu); + return 1; +} + static exit_handle_fn arm_exit_handlers[] = { + [0 ... ESR_ELx_EC_MAX] = kvm_handle_unknown_ec, [ESR_ELx_EC_WFx] = kvm_handle_wfx, [ESR_ELx_EC_CP15_32] = kvm_handle_cp15_32, [ESR_ELx_EC_CP15_64] = kvm_handle_cp15_64, @@ -162,13 +174,6 @@ static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu) u32 hsr = kvm_vcpu_get_hsr(vcpu); u8 hsr_ec = ESR_ELx_EC(hsr); - if (hsr_ec >= ARRAY_SIZE(arm_exit_handlers) || - !arm_exit_handlers[hsr_ec]) { - kvm_err("Unknown exception class: hsr: %#08x -- %s\n", - hsr, esr_get_class_string(hsr)); - BUG(); - } - return arm_exit_handlers[hsr_ec]; } diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c index e8e7ba2bc11f..73464a96c365 100644 --- a/arch/arm64/kvm/hyp/tlb.c +++ b/arch/arm64/kvm/hyp/tlb.c @@ -18,14 +18,62 @@ #include <asm/kvm_hyp.h> #include <asm/tlbflush.h> +static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm) +{ + u64 val; + + /* + * With VHE enabled, we have HCR_EL2.{E2H,TGE} = {1,1}, and + * most TLB operations target EL2/EL0. In order to affect the + * guest TLBs (EL1/EL0), we need to change one of these two + * bits. Changing E2H is impossible (goodbye TTBR1_EL2), so + * let's flip TGE before executing the TLB operation. + */ + write_sysreg(kvm->arch.vttbr, vttbr_el2); + val = read_sysreg(hcr_el2); + val &= ~HCR_TGE; + write_sysreg(val, hcr_el2); + isb(); +} + +static void __hyp_text __tlb_switch_to_guest_nvhe(struct kvm *kvm) +{ + write_sysreg(kvm->arch.vttbr, vttbr_el2); + isb(); +} + +static hyp_alternate_select(__tlb_switch_to_guest, + __tlb_switch_to_guest_nvhe, + __tlb_switch_to_guest_vhe, + ARM64_HAS_VIRT_HOST_EXTN); + +static void __hyp_text __tlb_switch_to_host_vhe(struct kvm *kvm) +{ + /* + * We're done with the TLB operation, let's restore the host's + * view of HCR_EL2. + */ + write_sysreg(0, vttbr_el2); + write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2); +} + +static void __hyp_text __tlb_switch_to_host_nvhe(struct kvm *kvm) +{ + write_sysreg(0, vttbr_el2); +} + +static hyp_alternate_select(__tlb_switch_to_host, + __tlb_switch_to_host_nvhe, + __tlb_switch_to_host_vhe, + ARM64_HAS_VIRT_HOST_EXTN); + void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) { dsb(ishst); /* Switch to requested VMID */ kvm = kern_hyp_va(kvm); - write_sysreg(kvm->arch.vttbr, vttbr_el2); - isb(); + __tlb_switch_to_guest()(kvm); /* * We could do so much better if we had the VA as well. @@ -46,7 +94,29 @@ void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) dsb(ish); isb(); - write_sysreg(0, vttbr_el2); + /* + * If the host is running at EL1 and we have a VPIPT I-cache, + * then we must perform I-cache maintenance at EL2 in order for + * it to have an effect on the guest. Since the guest cannot hit + * I-cache lines allocated with a different VMID, we don't need + * to worry about junk out of guest reset (we nuke the I-cache on + * VMID rollover), but we do need to be careful when remapping + * executable pages for the same guest. This can happen when KSM + * takes a CoW fault on an executable page, copies the page into + * a page that was previously mapped in the guest and then needs + * to invalidate the guest view of the I-cache for that page + * from EL1. To solve this, we invalidate the entire I-cache when + * unmapping a page from a guest if we have a VPIPT I-cache but + * the host is running at EL1. As above, we could do better if + * we had the VA. + * + * The moral of this story is: if you have a VPIPT I-cache, then + * you should be running with VHE enabled. + */ + if (!has_vhe() && icache_is_vpipt()) + __flush_icache_all(); + + __tlb_switch_to_host()(kvm); } void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm) @@ -55,14 +125,13 @@ void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm) /* Switch to requested VMID */ kvm = kern_hyp_va(kvm); - write_sysreg(kvm->arch.vttbr, vttbr_el2); - isb(); + __tlb_switch_to_guest()(kvm); __tlbi(vmalls12e1is); dsb(ish); isb(); - write_sysreg(0, vttbr_el2); + __tlb_switch_to_host()(kvm); } void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu) @@ -70,14 +139,13 @@ void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu) struct kvm *kvm = kern_hyp_va(kern_hyp_va(vcpu)->kvm); /* Switch to requested VMID */ - write_sysreg(kvm->arch.vttbr, vttbr_el2); - isb(); + __tlb_switch_to_guest()(kvm); __tlbi(vmalle1); dsb(nsh); isb(); - write_sysreg(0, vttbr_el2); + __tlb_switch_to_host()(kvm); } void __hyp_text __kvm_flush_vm_context(void) diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index d9e9697de1b2..561badf93de8 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -60,7 +60,7 @@ static bool cpu_has_32bit_el1(void) { u64 pfr0; - pfr0 = read_system_reg(SYS_ID_AA64PFR0_EL1); + pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1); return !!(pfr0 & 0x20); } diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 0e26f8c2b56f..26b0e77878b5 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1183,8 +1183,8 @@ static bool trap_dbgidr(struct kvm_vcpu *vcpu, if (p->is_write) { return ignore_write(vcpu, p); } else { - u64 dfr = read_system_reg(SYS_ID_AA64DFR0_EL1); - u64 pfr = read_system_reg(SYS_ID_AA64PFR0_EL1); + u64 dfr = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1); + u64 pfr = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1); u32 el3 = !!cpuid_feature_extract_unsigned_field(pfr, ID_AA64PFR0_EL3_SHIFT); p->regval = ((((dfr >> ID_AA64DFR0_WRPS_SHIFT) & 0xf) << 28) | diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index 68634c630cdd..ab9f5f0fb2c7 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -119,9 +119,6 @@ static void flush_context(unsigned int cpu) /* Queue a TLB invalidate and flush the I-cache if necessary. */ cpumask_setall(&tlb_flush_pending); - - if (icache_is_aivivt()) - __flush_icache_all(); } static bool check_update_reserved_asid(u64 asid, u64 newasid) diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 81cdb2e844ed..f7b54019ef55 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -584,20 +584,7 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size, */ gfp |= __GFP_ZERO; - if (gfpflags_allow_blocking(gfp)) { - struct page **pages; - pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, coherent); - - pages = iommu_dma_alloc(dev, iosize, gfp, attrs, ioprot, - handle, flush_page); - if (!pages) - return NULL; - - addr = dma_common_pages_remap(pages, size, VM_USERMAP, prot, - __builtin_return_address(0)); - if (!addr) - iommu_dma_free(dev, pages, iosize, handle); - } else { + if (!gfpflags_allow_blocking(gfp)) { struct page *page; /* * In atomic context we can't remap anything, so we'll only @@ -621,6 +608,45 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size, __free_from_pool(addr, size); addr = NULL; } + } else if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) { + pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, coherent); + struct page *page; + + page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT, + get_order(size), gfp); + if (!page) + return NULL; + + *handle = iommu_dma_map_page(dev, page, 0, iosize, ioprot); + if (iommu_dma_mapping_error(dev, *handle)) { + dma_release_from_contiguous(dev, page, + size >> PAGE_SHIFT); + return NULL; + } + if (!coherent) + __dma_flush_area(page_to_virt(page), iosize); + + addr = dma_common_contiguous_remap(page, size, VM_USERMAP, + prot, + __builtin_return_address(0)); + if (!addr) { + iommu_dma_unmap_page(dev, *handle, iosize, 0, attrs); + dma_release_from_contiguous(dev, page, + size >> PAGE_SHIFT); + } + } else { + pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, coherent); + struct page **pages; + + pages = iommu_dma_alloc(dev, iosize, gfp, attrs, ioprot, + handle, flush_page); + if (!pages) + return NULL; + + addr = dma_common_pages_remap(pages, size, VM_USERMAP, prot, + __builtin_return_address(0)); + if (!addr) + iommu_dma_free(dev, pages, iosize, handle); } return addr; } @@ -632,7 +658,8 @@ static void __iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, size = PAGE_ALIGN(size); /* - * @cpu_addr will be one of 3 things depending on how it was allocated: + * @cpu_addr will be one of 4 things depending on how it was allocated: + * - A remapped array of pages for contiguous allocations. * - A remapped array of pages from iommu_dma_alloc(), for all * non-atomic allocations. * - A non-cacheable alias from the atomic pool, for atomic @@ -644,6 +671,12 @@ static void __iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, if (__in_atomic_pool(cpu_addr, size)) { iommu_dma_unmap_page(dev, handle, iosize, 0, 0); __free_from_pool(cpu_addr, size); + } else if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) { + struct page *page = vmalloc_to_page(cpu_addr); + + iommu_dma_unmap_page(dev, handle, iosize, 0, attrs); + dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT); + dma_common_free_remap(cpu_addr, size, VM_USERMAP); } else if (is_vmalloc_addr(cpu_addr)){ struct vm_struct *area = find_vm_area(cpu_addr); diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c index 554a2558c12e..21a8d828cbf4 100644 --- a/arch/arm64/mm/flush.c +++ b/arch/arm64/mm/flush.c @@ -22,7 +22,7 @@ #include <linux/pagemap.h> #include <asm/cacheflush.h> -#include <asm/cachetype.h> +#include <asm/cache.h> #include <asm/tlbflush.h> void sync_icache_aliases(void *kaddr, unsigned long len) @@ -65,8 +65,6 @@ void __sync_icache_dcache(pte_t pte, unsigned long addr) if (!test_and_set_bit(PG_dcache_clean, &page->flags)) sync_icache_aliases(page_address(page), PAGE_SIZE << compound_order(page)); - else if (icache_is_aivivt()) - __flush_icache_all(); } /* diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c index 55d1e9205543..687a358a3733 100644 --- a/arch/arm64/mm/kasan_init.c +++ b/arch/arm64/mm/kasan_init.c @@ -162,7 +162,7 @@ void __init kasan_init(void) clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END); vmemmap_populate(kimg_shadow_start, kimg_shadow_end, - pfn_to_nid(virt_to_pfn(_text))); + pfn_to_nid(virt_to_pfn(lm_alias(_text)))); /* * vmemmap_populate() has populated the shadow region that covers the diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index d28dbcf596b6..91502e36e6d9 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -43,6 +43,9 @@ #include <asm/mmu_context.h> #include <asm/ptdump.h> +#define NO_BLOCK_MAPPINGS BIT(0) +#define NO_CONT_MAPPINGS BIT(1) + u64 idmap_t0sz = TCR_T0SZ(VA_BITS); u64 kimage_voffset __ro_after_init; @@ -103,33 +106,27 @@ static bool pgattr_change_is_safe(u64 old, u64 new) */ static const pteval_t mask = PTE_PXN | PTE_RDONLY | PTE_WRITE; - return old == 0 || new == 0 || ((old ^ new) & ~mask) == 0; + /* creating or taking down mappings is always safe */ + if (old == 0 || new == 0) + return true; + + /* live contiguous mappings may not be manipulated at all */ + if ((old | new) & PTE_CONT) + return false; + + return ((old ^ new) & ~mask) == 0; } -static void alloc_init_pte(pmd_t *pmd, unsigned long addr, - unsigned long end, unsigned long pfn, - pgprot_t prot, - phys_addr_t (*pgtable_alloc)(void)) +static void init_pte(pmd_t *pmd, unsigned long addr, unsigned long end, + phys_addr_t phys, pgprot_t prot) { pte_t *pte; - BUG_ON(pmd_sect(*pmd)); - if (pmd_none(*pmd)) { - phys_addr_t pte_phys; - BUG_ON(!pgtable_alloc); - pte_phys = pgtable_alloc(); - pte = pte_set_fixmap(pte_phys); - __pmd_populate(pmd, pte_phys, PMD_TYPE_TABLE); - pte_clear_fixmap(); - } - BUG_ON(pmd_bad(*pmd)); - pte = pte_set_fixmap_offset(pmd, addr); do { pte_t old_pte = *pte; - set_pte(pte, pfn_pte(pfn, prot)); - pfn++; + set_pte(pte, pfn_pte(__phys_to_pfn(phys), prot)); /* * After the PTE entry has been populated once, we @@ -137,32 +134,51 @@ static void alloc_init_pte(pmd_t *pmd, unsigned long addr, */ BUG_ON(!pgattr_change_is_safe(pte_val(old_pte), pte_val(*pte))); + phys += PAGE_SIZE; } while (pte++, addr += PAGE_SIZE, addr != end); pte_clear_fixmap(); } -static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end, - phys_addr_t phys, pgprot_t prot, - phys_addr_t (*pgtable_alloc)(void), - bool page_mappings_only) +static void alloc_init_cont_pte(pmd_t *pmd, unsigned long addr, + unsigned long end, phys_addr_t phys, + pgprot_t prot, + phys_addr_t (*pgtable_alloc)(void), + int flags) { - pmd_t *pmd; unsigned long next; - /* - * Check for initial section mappings in the pgd/pud and remove them. - */ - BUG_ON(pud_sect(*pud)); - if (pud_none(*pud)) { - phys_addr_t pmd_phys; + BUG_ON(pmd_sect(*pmd)); + if (pmd_none(*pmd)) { + phys_addr_t pte_phys; BUG_ON(!pgtable_alloc); - pmd_phys = pgtable_alloc(); - pmd = pmd_set_fixmap(pmd_phys); - __pud_populate(pud, pmd_phys, PUD_TYPE_TABLE); - pmd_clear_fixmap(); + pte_phys = pgtable_alloc(); + __pmd_populate(pmd, pte_phys, PMD_TYPE_TABLE); } - BUG_ON(pud_bad(*pud)); + BUG_ON(pmd_bad(*pmd)); + + do { + pgprot_t __prot = prot; + + next = pte_cont_addr_end(addr, end); + + /* use a contiguous mapping if the range is suitably aligned */ + if ((((addr | next | phys) & ~CONT_PTE_MASK) == 0) && + (flags & NO_CONT_MAPPINGS) == 0) + __prot = __pgprot(pgprot_val(prot) | PTE_CONT); + + init_pte(pmd, addr, next, phys, __prot); + + phys += next - addr; + } while (addr = next, addr != end); +} + +static void init_pmd(pud_t *pud, unsigned long addr, unsigned long end, + phys_addr_t phys, pgprot_t prot, + phys_addr_t (*pgtable_alloc)(void), int flags) +{ + unsigned long next; + pmd_t *pmd; pmd = pmd_set_fixmap_offset(pud, addr); do { @@ -172,7 +188,7 @@ static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end, /* try section mapping first */ if (((addr | next | phys) & ~SECTION_MASK) == 0 && - !page_mappings_only) { + (flags & NO_BLOCK_MAPPINGS) == 0) { pmd_set_huge(pmd, phys, prot); /* @@ -182,8 +198,8 @@ static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end, BUG_ON(!pgattr_change_is_safe(pmd_val(old_pmd), pmd_val(*pmd))); } else { - alloc_init_pte(pmd, addr, next, __phys_to_pfn(phys), - prot, pgtable_alloc); + alloc_init_cont_pte(pmd, addr, next, phys, prot, + pgtable_alloc, flags); BUG_ON(pmd_val(old_pmd) != 0 && pmd_val(old_pmd) != pmd_val(*pmd)); @@ -194,6 +210,41 @@ static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end, pmd_clear_fixmap(); } +static void alloc_init_cont_pmd(pud_t *pud, unsigned long addr, + unsigned long end, phys_addr_t phys, + pgprot_t prot, + phys_addr_t (*pgtable_alloc)(void), int flags) +{ + unsigned long next; + + /* + * Check for initial section mappings in the pgd/pud. + */ + BUG_ON(pud_sect(*pud)); + if (pud_none(*pud)) { + phys_addr_t pmd_phys; + BUG_ON(!pgtable_alloc); + pmd_phys = pgtable_alloc(); + __pud_populate(pud, pmd_phys, PUD_TYPE_TABLE); + } + BUG_ON(pud_bad(*pud)); + + do { + pgprot_t __prot = prot; + + next = pmd_cont_addr_end(addr, end); + + /* use a contiguous mapping if the range is suitably aligned */ + if ((((addr | next | phys) & ~CONT_PMD_MASK) == 0) && + (flags & NO_CONT_MAPPINGS) == 0) + __prot = __pgprot(pgprot_val(prot) | PTE_CONT); + + init_pmd(pud, addr, next, phys, __prot, pgtable_alloc, flags); + + phys += next - addr; + } while (addr = next, addr != end); +} + static inline bool use_1G_block(unsigned long addr, unsigned long next, unsigned long phys) { @@ -209,7 +260,7 @@ static inline bool use_1G_block(unsigned long addr, unsigned long next, static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end, phys_addr_t phys, pgprot_t prot, phys_addr_t (*pgtable_alloc)(void), - bool page_mappings_only) + int flags) { pud_t *pud; unsigned long next; @@ -231,7 +282,8 @@ static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end, /* * For 4K granule only, attempt to put down a 1GB block */ - if (use_1G_block(addr, next, phys) && !page_mappings_only) { + if (use_1G_block(addr, next, phys) && + (flags & NO_BLOCK_MAPPINGS) == 0) { pud_set_huge(pud, phys, prot); /* @@ -241,8 +293,8 @@ static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end, BUG_ON(!pgattr_change_is_safe(pud_val(old_pud), pud_val(*pud))); } else { - alloc_init_pmd(pud, addr, next, phys, prot, - pgtable_alloc, page_mappings_only); + alloc_init_cont_pmd(pud, addr, next, phys, prot, + pgtable_alloc, flags); BUG_ON(pud_val(old_pud) != 0 && pud_val(old_pud) != pud_val(*pud)); @@ -257,7 +309,7 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot, phys_addr_t (*pgtable_alloc)(void), - bool page_mappings_only) + int flags) { unsigned long addr, length, end, next; pgd_t *pgd = pgd_offset_raw(pgdir, virt); @@ -277,7 +329,7 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, do { next = pgd_addr_end(addr, end); alloc_init_pud(pgd, addr, next, phys, prot, pgtable_alloc, - page_mappings_only); + flags); phys += next - addr; } while (pgd++, addr = next, addr != end); } @@ -306,36 +358,49 @@ static void __init create_mapping_noalloc(phys_addr_t phys, unsigned long virt, &phys, virt); return; } - __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, NULL, false); + __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, NULL, + NO_CONT_MAPPINGS); } void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot, bool page_mappings_only) { + int flags = 0; + BUG_ON(mm == &init_mm); + if (page_mappings_only) + flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; + __create_pgd_mapping(mm->pgd, phys, virt, size, prot, - pgd_pgtable_alloc, page_mappings_only); + pgd_pgtable_alloc, flags); } -static void create_mapping_late(phys_addr_t phys, unsigned long virt, - phys_addr_t size, pgprot_t prot) +static void update_mapping_prot(phys_addr_t phys, unsigned long virt, + phys_addr_t size, pgprot_t prot) { if (virt < VMALLOC_START) { - pr_warn("BUG: not creating mapping for %pa at 0x%016lx - outside kernel range\n", + pr_warn("BUG: not updating mapping for %pa at 0x%016lx - outside kernel range\n", &phys, virt); return; } - __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, - NULL, debug_pagealloc_enabled()); + __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, NULL, + NO_CONT_MAPPINGS); + + /* flush the TLBs after updating live kernel mappings */ + flush_tlb_kernel_range(virt, virt + size); } static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end) { phys_addr_t kernel_start = __pa_symbol(_text); phys_addr_t kernel_end = __pa_symbol(__init_begin); + int flags = 0; + + if (debug_pagealloc_enabled()) + flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; /* * Take care not to create a writable alias for the @@ -346,8 +411,7 @@ static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end if (end < kernel_start || start >= kernel_end) { __create_pgd_mapping(pgd, start, __phys_to_virt(start), end - start, PAGE_KERNEL, - early_pgtable_alloc, - debug_pagealloc_enabled()); + early_pgtable_alloc, flags); return; } @@ -359,24 +423,36 @@ static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end __create_pgd_mapping(pgd, start, __phys_to_virt(start), kernel_start - start, PAGE_KERNEL, - early_pgtable_alloc, - debug_pagealloc_enabled()); + early_pgtable_alloc, flags); if (kernel_end < end) __create_pgd_mapping(pgd, kernel_end, __phys_to_virt(kernel_end), end - kernel_end, PAGE_KERNEL, - early_pgtable_alloc, - debug_pagealloc_enabled()); + early_pgtable_alloc, flags); /* - * Map the linear alias of the [_text, __init_begin) interval as - * read-only/non-executable. This makes the contents of the - * region accessible to subsystems such as hibernate, but - * protects it from inadvertent modification or execution. + * Map the linear alias of the [_text, __init_begin) interval + * as non-executable now, and remove the write permission in + * mark_linear_text_alias_ro() below (which will be called after + * alternative patching has completed). This makes the contents + * of the region accessible to subsystems such as hibernate, + * but protects it from inadvertent modification or execution. + * Note that contiguous mappings cannot be remapped in this way, + * so we should avoid them here. */ __create_pgd_mapping(pgd, kernel_start, __phys_to_virt(kernel_start), - kernel_end - kernel_start, PAGE_KERNEL_RO, - early_pgtable_alloc, debug_pagealloc_enabled()); + kernel_end - kernel_start, PAGE_KERNEL, + early_pgtable_alloc, NO_CONT_MAPPINGS); +} + +void __init mark_linear_text_alias_ro(void) +{ + /* + * Remove the write permissions from the linear alias of .text/.rodata + */ + update_mapping_prot(__pa_symbol(_text), (unsigned long)lm_alias(_text), + (unsigned long)__init_begin - (unsigned long)_text, + PAGE_KERNEL_RO); } static void __init map_mem(pgd_t *pgd) @@ -401,25 +477,20 @@ void mark_rodata_ro(void) { unsigned long section_size; - section_size = (unsigned long)_etext - (unsigned long)_text; - create_mapping_late(__pa_symbol(_text), (unsigned long)_text, - section_size, PAGE_KERNEL_ROX); /* * mark .rodata as read only. Use __init_begin rather than __end_rodata * to cover NOTES and EXCEPTION_TABLE. */ section_size = (unsigned long)__init_begin - (unsigned long)__start_rodata; - create_mapping_late(__pa_symbol(__start_rodata), (unsigned long)__start_rodata, + update_mapping_prot(__pa_symbol(__start_rodata), (unsigned long)__start_rodata, section_size, PAGE_KERNEL_RO); - /* flush the TLBs after updating live kernel mappings */ - flush_tlb_all(); - debug_checkwx(); } static void __init map_kernel_segment(pgd_t *pgd, void *va_start, void *va_end, - pgprot_t prot, struct vm_struct *vma) + pgprot_t prot, struct vm_struct *vma, + int flags) { phys_addr_t pa_start = __pa_symbol(va_start); unsigned long size = va_end - va_start; @@ -428,7 +499,7 @@ static void __init map_kernel_segment(pgd_t *pgd, void *va_start, void *va_end, BUG_ON(!PAGE_ALIGNED(size)); __create_pgd_mapping(pgd, pa_start, (unsigned long)va_start, size, prot, - early_pgtable_alloc, debug_pagealloc_enabled()); + early_pgtable_alloc, flags); vma->addr = va_start; vma->phys_addr = pa_start; @@ -439,18 +510,39 @@ static void __init map_kernel_segment(pgd_t *pgd, void *va_start, void *va_end, vm_area_add_early(vma); } +static int __init parse_rodata(char *arg) +{ + return strtobool(arg, &rodata_enabled); +} +early_param("rodata", parse_rodata); + /* * Create fine-grained mappings for the kernel. */ static void __init map_kernel(pgd_t *pgd) { - static struct vm_struct vmlinux_text, vmlinux_rodata, vmlinux_init, vmlinux_data; + static struct vm_struct vmlinux_text, vmlinux_rodata, vmlinux_inittext, + vmlinux_initdata, vmlinux_data; - map_kernel_segment(pgd, _text, _etext, PAGE_KERNEL_EXEC, &vmlinux_text); - map_kernel_segment(pgd, __start_rodata, __init_begin, PAGE_KERNEL, &vmlinux_rodata); - map_kernel_segment(pgd, __init_begin, __init_end, PAGE_KERNEL_EXEC, - &vmlinux_init); - map_kernel_segment(pgd, _data, _end, PAGE_KERNEL, &vmlinux_data); + /* + * External debuggers may need to write directly to the text + * mapping to install SW breakpoints. Allow this (only) when + * explicitly requested with rodata=off. + */ + pgprot_t text_prot = rodata_enabled ? PAGE_KERNEL_ROX : PAGE_KERNEL_EXEC; + + /* + * Only rodata will be remapped with different permissions later on, + * all other segments are allowed to use contiguous mappings. + */ + map_kernel_segment(pgd, _text, _etext, text_prot, &vmlinux_text, 0); + map_kernel_segment(pgd, __start_rodata, __inittext_begin, PAGE_KERNEL, + &vmlinux_rodata, NO_CONT_MAPPINGS); + map_kernel_segment(pgd, __inittext_begin, __inittext_end, text_prot, + &vmlinux_inittext, 0); + map_kernel_segment(pgd, __initdata_begin, __initdata_end, PAGE_KERNEL, + &vmlinux_initdata, 0); + map_kernel_segment(pgd, _data, _end, PAGE_KERNEL, &vmlinux_data, 0); if (!pgd_val(*pgd_offset_raw(pgd, FIXADDR_START))) { /* diff --git a/arch/avr32/include/asm/pgtable-2level.h b/arch/avr32/include/asm/pgtable-2level.h index 425dd567b5b9..d5b1c63993ec 100644 --- a/arch/avr32/include/asm/pgtable-2level.h +++ b/arch/avr32/include/asm/pgtable-2level.h @@ -8,6 +8,7 @@ #ifndef __ASM_AVR32_PGTABLE_2LEVEL_H #define __ASM_AVR32_PGTABLE_2LEVEL_H +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopmd.h> /* diff --git a/arch/avr32/oprofile/backtrace.c b/arch/avr32/oprofile/backtrace.c index 75d9ad6f99cf..29cf2f191bfd 100644 --- a/arch/avr32/oprofile/backtrace.c +++ b/arch/avr32/oprofile/backtrace.c @@ -14,7 +14,7 @@ */ #include <linux/oprofile.h> -#include <linux/sched.h> +#include <linux/ptrace.h> #include <linux/uaccess.h> /* The first two words of each frame on the stack look like this if we have diff --git a/arch/cris/arch-v32/drivers/cryptocop.c b/arch/cris/arch-v32/drivers/cryptocop.c index ae6903d7fdbe..14970f11bbf2 100644 --- a/arch/cris/arch-v32/drivers/cryptocop.c +++ b/arch/cris/arch-v32/drivers/cryptocop.c @@ -2086,7 +2086,7 @@ static void cryptocop_job_queue_close(void) dma_in_cfg.en = regk_dma_no; REG_WR(dma, IN_DMA_INST, rw_cfg, dma_in_cfg); - /* Disble the cryptocop. */ + /* Disable the cryptocop. */ rw_cfg = REG_RD(strcop, regi_strcop, rw_cfg); rw_cfg.en = 0; REG_WR(strcop, regi_strcop, rw_cfg, rw_cfg); diff --git a/arch/cris/include/asm/pgtable.h b/arch/cris/include/asm/pgtable.h index 2a3210ba4c72..fa3a73004cc5 100644 --- a/arch/cris/include/asm/pgtable.h +++ b/arch/cris/include/asm/pgtable.h @@ -6,6 +6,7 @@ #define _CRIS_PGTABLE_H #include <asm/page.h> +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopmd.h> #ifndef __ASSEMBLY__ diff --git a/arch/frv/include/asm/pgtable.h b/arch/frv/include/asm/pgtable.h index a0513d463a1f..ab6e7e961b54 100644 --- a/arch/frv/include/asm/pgtable.h +++ b/arch/frv/include/asm/pgtable.h @@ -16,6 +16,7 @@ #ifndef _ASM_PGTABLE_H #define _ASM_PGTABLE_H +#include <asm-generic/5level-fixup.h> #include <asm/mem-layout.h> #include <asm/setup.h> #include <asm/processor.h> diff --git a/arch/h8300/include/asm/pgtable.h b/arch/h8300/include/asm/pgtable.h index 8341db67821d..7d265d28ba5e 100644 --- a/arch/h8300/include/asm/pgtable.h +++ b/arch/h8300/include/asm/pgtable.h @@ -1,5 +1,6 @@ #ifndef _H8300_PGTABLE_H #define _H8300_PGTABLE_H +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopud.h> #include <asm-generic/pgtable.h> #define pgtable_cache_init() do { } while (0) diff --git a/arch/h8300/kernel/ptrace_h.c b/arch/h8300/kernel/ptrace_h.c index fe3b5673baba..f5ff3b794c85 100644 --- a/arch/h8300/kernel/ptrace_h.c +++ b/arch/h8300/kernel/ptrace_h.c @@ -9,7 +9,7 @@ */ #include <linux/linkage.h> -#include <linux/sched.h> +#include <linux/sched/signal.h> #include <asm/ptrace.h> #define BREAKINST 0x5730 /* trapa #3 */ diff --git a/arch/hexagon/include/asm/pgtable.h b/arch/hexagon/include/asm/pgtable.h index 49eab8136ec3..24a9177fb897 100644 --- a/arch/hexagon/include/asm/pgtable.h +++ b/arch/hexagon/include/asm/pgtable.h @@ -26,6 +26,7 @@ */ #include <linux/swap.h> #include <asm/page.h> +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopmd.h> /* A handy thing to have if one has the RAM. Declared in head.S */ diff --git a/arch/ia64/include/asm/pgtable.h b/arch/ia64/include/asm/pgtable.h index 384794e665fc..6cc22c8d8923 100644 --- a/arch/ia64/include/asm/pgtable.h +++ b/arch/ia64/include/asm/pgtable.h @@ -587,8 +587,10 @@ extern struct page *zero_page_memmap_ptr; #if CONFIG_PGTABLE_LEVELS == 3 +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopud.h> #endif +#include <asm-generic/5level-fixup.h> #include <asm-generic/pgtable.h> #endif /* _ASM_IA64_PGTABLE_H */ diff --git a/arch/metag/include/asm/pgtable.h b/arch/metag/include/asm/pgtable.h index ffa3a3a2ecad..0c151e5af079 100644 --- a/arch/metag/include/asm/pgtable.h +++ b/arch/metag/include/asm/pgtable.h @@ -6,6 +6,7 @@ #define _METAG_PGTABLE_H #include <asm/pgtable-bits.h> +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopmd.h> /* Invalid regions on Meta: 0x00000000-0x001FFFFF and 0xFFFF0000-0xFFFFFFFF */ diff --git a/arch/microblaze/include/asm/page.h b/arch/microblaze/include/asm/page.h index fd850879854d..d506bb0893f9 100644 --- a/arch/microblaze/include/asm/page.h +++ b/arch/microblaze/include/asm/page.h @@ -95,7 +95,8 @@ typedef struct { unsigned long pgd; } pgd_t; # else /* CONFIG_MMU */ typedef struct { unsigned long ste[64]; } pmd_t; typedef struct { pmd_t pue[1]; } pud_t; -typedef struct { pud_t pge[1]; } pgd_t; +typedef struct { pud_t p4e[1]; } p4d_t; +typedef struct { p4d_t pge[1]; } pgd_t; # endif /* CONFIG_MMU */ # define pte_val(x) ((x).pte) diff --git a/arch/mips/cavium-octeon/cpu.c b/arch/mips/cavium-octeon/cpu.c index a5b427909b5c..036d56cc4591 100644 --- a/arch/mips/cavium-octeon/cpu.c +++ b/arch/mips/cavium-octeon/cpu.c @@ -10,7 +10,9 @@ #include <linux/irqflags.h> #include <linux/notifier.h> #include <linux/prefetch.h> +#include <linux/ptrace.h> #include <linux/sched.h> +#include <linux/sched/task_stack.h> #include <asm/cop2.h> #include <asm/current.h> diff --git a/arch/mips/cavium-octeon/crypto/octeon-crypto.c b/arch/mips/cavium-octeon/crypto/octeon-crypto.c index 4d22365844af..cfb4a146cf17 100644 --- a/arch/mips/cavium-octeon/crypto/octeon-crypto.c +++ b/arch/mips/cavium-octeon/crypto/octeon-crypto.c @@ -9,6 +9,7 @@ #include <asm/cop2.h> #include <linux/export.h> #include <linux/interrupt.h> +#include <linux/sched/task_stack.h> #include "octeon-crypto.h" diff --git a/arch/mips/cavium-octeon/smp.c b/arch/mips/cavium-octeon/smp.c index 4b94b7fbafa3..3de786545ded 100644 --- a/arch/mips/cavium-octeon/smp.c +++ b/arch/mips/cavium-octeon/smp.c @@ -12,6 +12,7 @@ #include <linux/kernel_stat.h> #include <linux/sched.h> #include <linux/sched/hotplug.h> +#include <linux/sched/task_stack.h> #include <linux/init.h> #include <linux/export.h> diff --git a/arch/mips/include/asm/fpu.h b/arch/mips/include/asm/fpu.h index 321752bcbab6..f94455f964ec 100644 --- a/arch/mips/include/asm/fpu.h +++ b/arch/mips/include/asm/fpu.h @@ -12,6 +12,7 @@ #include <linux/sched.h> #include <linux/sched/task_stack.h> +#include <linux/ptrace.h> #include <linux/thread_info.h> #include <linux/bitops.h> diff --git a/arch/mips/include/asm/pgtable-32.h b/arch/mips/include/asm/pgtable-32.h index d21f3da7bdb6..6f94bed571c4 100644 --- a/arch/mips/include/asm/pgtable-32.h +++ b/arch/mips/include/asm/pgtable-32.h @@ -16,6 +16,7 @@ #include <asm/cachectl.h> #include <asm/fixmap.h> +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopmd.h> extern int temp_tlb_entry; diff --git a/arch/mips/include/asm/pgtable-64.h b/arch/mips/include/asm/pgtable-64.h index 514cbc0a6a67..130a2a6c1531 100644 --- a/arch/mips/include/asm/pgtable-64.h +++ b/arch/mips/include/asm/pgtable-64.h @@ -17,6 +17,7 @@ #include <asm/cachectl.h> #include <asm/fixmap.h> +#define __ARCH_USE_5LEVEL_HACK #if defined(CONFIG_PAGE_SIZE_64KB) && !defined(CONFIG_MIPS_VA_BITS_48) #include <asm-generic/pgtable-nopmd.h> #else diff --git a/arch/mips/kernel/smp-bmips.c b/arch/mips/kernel/smp-bmips.c index 3daa2cae50b0..1b070a76fcdd 100644 --- a/arch/mips/kernel/smp-bmips.c +++ b/arch/mips/kernel/smp-bmips.c @@ -11,6 +11,7 @@ #include <linux/init.h> #include <linux/sched.h> #include <linux/sched/hotplug.h> +#include <linux/sched/task_stack.h> #include <linux/mm.h> #include <linux/delay.h> #include <linux/smp.h> diff --git a/arch/mips/kernel/smp-mt.c b/arch/mips/kernel/smp-mt.c index e077ea3e11fb..e398cbc3d776 100644 --- a/arch/mips/kernel/smp-mt.c +++ b/arch/mips/kernel/smp-mt.c @@ -23,6 +23,7 @@ #include <linux/interrupt.h> #include <linux/irqchip/mips-gic.h> #include <linux/compiler.h> +#include <linux/sched/task_stack.h> #include <linux/smp.h> #include <linux/atomic.h> diff --git a/arch/mips/loongson64/loongson-3/cop2-ex.c b/arch/mips/loongson64/loongson-3/cop2-ex.c index ea13764d0a03..621d6af5f6eb 100644 --- a/arch/mips/loongson64/loongson-3/cop2-ex.c +++ b/arch/mips/loongson64/loongson-3/cop2-ex.c @@ -13,6 +13,7 @@ #include <linux/init.h> #include <linux/sched.h> #include <linux/notifier.h> +#include <linux/ptrace.h> #include <asm/fpu.h> #include <asm/cop2.h> diff --git a/arch/mips/netlogic/common/smp.c b/arch/mips/netlogic/common/smp.c index 10d86d54880a..bddf1ef553a4 100644 --- a/arch/mips/netlogic/common/smp.c +++ b/arch/mips/netlogic/common/smp.c @@ -35,6 +35,7 @@ #include <linux/kernel.h> #include <linux/delay.h> #include <linux/init.h> +#include <linux/sched/task_stack.h> #include <linux/smp.h> #include <linux/irq.h> diff --git a/arch/mips/netlogic/xlp/cop2-ex.c b/arch/mips/netlogic/xlp/cop2-ex.c index 52bc5de42005..21e439b3db70 100644 --- a/arch/mips/netlogic/xlp/cop2-ex.c +++ b/arch/mips/netlogic/xlp/cop2-ex.c @@ -9,11 +9,14 @@ * Copyright (C) 2009 Wind River Systems, * written by Ralf Baechle <ralf@linux-mips.org> */ +#include <linux/capability.h> #include <linux/init.h> #include <linux/irqflags.h> #include <linux/notifier.h> #include <linux/prefetch.h> +#include <linux/ptrace.h> #include <linux/sched.h> +#include <linux/sched/task_stack.h> #include <asm/cop2.h> #include <asm/current.h> diff --git a/arch/mips/sgi-ip22/ip28-berr.c b/arch/mips/sgi-ip22/ip28-berr.c index 1f2a5bc4779e..75460e1e106b 100644 --- a/arch/mips/sgi-ip22/ip28-berr.c +++ b/arch/mips/sgi-ip22/ip28-berr.c @@ -9,6 +9,7 @@ #include <linux/kernel.h> #include <linux/sched.h> #include <linux/sched/debug.h> +#include <linux/sched/signal.h> #include <linux/seq_file.h> #include <asm/addrspace.h> diff --git a/arch/mips/sgi-ip27/ip27-berr.c b/arch/mips/sgi-ip27/ip27-berr.c index d12879eb2b1f..83efe03d5c60 100644 --- a/arch/mips/sgi-ip27/ip27-berr.c +++ b/arch/mips/sgi-ip27/ip27-berr.c @@ -12,7 +12,9 @@ #include <linux/signal.h> /* for SIGBUS */ #include <linux/sched.h> /* schow_regs(), force_sig() */ #include <linux/sched/debug.h> +#include <linux/sched/signal.h> +#include <asm/ptrace.h> #include <asm/sn/addrs.h> #include <asm/sn/arch.h> #include <asm/sn/sn0/hub.h> diff --git a/arch/mips/sgi-ip27/ip27-smp.c b/arch/mips/sgi-ip27/ip27-smp.c index f5ed45e8f442..4cd47d23d81a 100644 --- a/arch/mips/sgi-ip27/ip27-smp.c +++ b/arch/mips/sgi-ip27/ip27-smp.c @@ -8,10 +8,13 @@ */ #include <linux/init.h> #include <linux/sched.h> +#include <linux/sched/task_stack.h> #include <linux/topology.h> #include <linux/nodemask.h> + #include <asm/page.h> #include <asm/processor.h> +#include <asm/ptrace.h> #include <asm/sn/arch.h> #include <asm/sn/gda.h> #include <asm/sn/intr.h> diff --git a/arch/mips/sgi-ip32/ip32-berr.c b/arch/mips/sgi-ip32/ip32-berr.c index 57d8c7486fe6..c1f12a9cf305 100644 --- a/arch/mips/sgi-ip32/ip32-berr.c +++ b/arch/mips/sgi-ip32/ip32-berr.c @@ -11,6 +11,7 @@ #include <linux/kernel.h> #include <linux/sched.h> #include <linux/sched/debug.h> +#include <linux/sched/signal.h> #include <asm/traps.h> #include <linux/uaccess.h> #include <asm/addrspace.h> diff --git a/arch/mips/sgi-ip32/ip32-reset.c b/arch/mips/sgi-ip32/ip32-reset.c index 8bd415c8729f..b3b442def423 100644 --- a/arch/mips/sgi-ip32/ip32-reset.c +++ b/arch/mips/sgi-ip32/ip32-reset.c @@ -13,6 +13,7 @@ #include <linux/kernel.h> #include <linux/module.h> #include <linux/sched.h> +#include <linux/sched/signal.h> #include <linux/notifier.h> #include <linux/delay.h> #include <linux/rtc/ds1685.h> diff --git a/arch/mn10300/include/asm/page.h b/arch/mn10300/include/asm/page.h index 3810a6f740fd..dfe730a5ede0 100644 --- a/arch/mn10300/include/asm/page.h +++ b/arch/mn10300/include/asm/page.h @@ -57,6 +57,7 @@ typedef struct page *pgtable_t; #define __pgd(x) ((pgd_t) { (x) }) #define __pgprot(x) ((pgprot_t) { (x) }) +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopmd.h> #endif /* !__ASSEMBLY__ */ diff --git a/arch/nios2/include/asm/pgtable.h b/arch/nios2/include/asm/pgtable.h index 298393c3cb42..db4f7d179220 100644 --- a/arch/nios2/include/asm/pgtable.h +++ b/arch/nios2/include/asm/pgtable.h @@ -22,6 +22,7 @@ #include <asm/tlbflush.h> #include <asm/pgtable-bits.h> +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopmd.h> #define FIRST_USER_ADDRESS 0UL diff --git a/arch/openrisc/include/asm/cmpxchg.h b/arch/openrisc/include/asm/cmpxchg.h index 5fcb9ac72693..f0a5d8b844d6 100644 --- a/arch/openrisc/include/asm/cmpxchg.h +++ b/arch/openrisc/include/asm/cmpxchg.h @@ -77,7 +77,11 @@ static inline unsigned long __xchg(unsigned long val, volatile void *ptr, return val; } -#define xchg(ptr, with) \ - ((typeof(*(ptr)))__xchg((unsigned long)(with), (ptr), sizeof(*(ptr)))) +#define xchg(ptr, with) \ + ({ \ + (__typeof__(*(ptr))) __xchg((unsigned long)(with), \ + (ptr), \ + sizeof(*(ptr))); \ + }) #endif /* __ASM_OPENRISC_CMPXCHG_H */ diff --git a/arch/openrisc/include/asm/pgtable.h b/arch/openrisc/include/asm/pgtable.h index 3567aa7be555..ff97374ca069 100644 --- a/arch/openrisc/include/asm/pgtable.h +++ b/arch/openrisc/include/asm/pgtable.h @@ -25,6 +25,7 @@ #ifndef __ASM_OPENRISC_PGTABLE_H #define __ASM_OPENRISC_PGTABLE_H +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopmd.h> #ifndef __ASSEMBLY__ diff --git a/arch/openrisc/include/asm/uaccess.h b/arch/openrisc/include/asm/uaccess.h index 140faa16685a..1311e6b13991 100644 --- a/arch/openrisc/include/asm/uaccess.h +++ b/arch/openrisc/include/asm/uaccess.h @@ -211,7 +211,7 @@ do { \ case 1: __get_user_asm(x, ptr, retval, "l.lbz"); break; \ case 2: __get_user_asm(x, ptr, retval, "l.lhz"); break; \ case 4: __get_user_asm(x, ptr, retval, "l.lwz"); break; \ - case 8: __get_user_asm2(x, ptr, retval); \ + case 8: __get_user_asm2(x, ptr, retval); break; \ default: (x) = __get_user_bad(); \ } \ } while (0) diff --git a/arch/openrisc/kernel/or32_ksyms.c b/arch/openrisc/kernel/or32_ksyms.c index 5c4695d13542..ee3e604959e1 100644 --- a/arch/openrisc/kernel/or32_ksyms.c +++ b/arch/openrisc/kernel/or32_ksyms.c @@ -30,6 +30,7 @@ #include <asm/hardirq.h> #include <asm/delay.h> #include <asm/pgalloc.h> +#include <asm/pgtable.h> #define DECLARE_EXPORT(name) extern void name(void); EXPORT_SYMBOL(name) @@ -42,6 +43,9 @@ DECLARE_EXPORT(__muldi3); DECLARE_EXPORT(__ashrdi3); DECLARE_EXPORT(__ashldi3); DECLARE_EXPORT(__lshrdi3); +DECLARE_EXPORT(__ucmpdi2); +EXPORT_SYMBOL(empty_zero_page); EXPORT_SYMBOL(__copy_tofrom_user); +EXPORT_SYMBOL(__clear_user); EXPORT_SYMBOL(memset); diff --git a/arch/openrisc/kernel/process.c b/arch/openrisc/kernel/process.c index 828a29110459..f8da545854f9 100644 --- a/arch/openrisc/kernel/process.c +++ b/arch/openrisc/kernel/process.c @@ -90,6 +90,7 @@ void arch_cpu_idle(void) } void (*pm_power_off) (void) = machine_power_off; +EXPORT_SYMBOL(pm_power_off); /* * When a process does an "exec", machine state like FPU and debug diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h index 19c9c3c5f267..c7e15cc5c668 100644 --- a/arch/parisc/include/asm/cacheflush.h +++ b/arch/parisc/include/asm/cacheflush.h @@ -43,28 +43,9 @@ static inline void flush_kernel_dcache_page(struct page *page) #define flush_kernel_dcache_range(start,size) \ flush_kernel_dcache_range_asm((start), (start)+(size)); -/* vmap range flushes and invalidates. Architecturally, we don't need - * the invalidate, because the CPU should refuse to speculate once an - * area has been flushed, so invalidate is left empty */ -static inline void flush_kernel_vmap_range(void *vaddr, int size) -{ - unsigned long start = (unsigned long)vaddr; - - flush_kernel_dcache_range_asm(start, start + size); -} -static inline void invalidate_kernel_vmap_range(void *vaddr, int size) -{ - unsigned long start = (unsigned long)vaddr; - void *cursor = vaddr; - for ( ; cursor < vaddr + size; cursor += PAGE_SIZE) { - struct page *page = vmalloc_to_page(cursor); - - if (test_and_clear_bit(PG_dcache_dirty, &page->flags)) - flush_kernel_dcache_page(page); - } - flush_kernel_dcache_range_asm(start, start + size); -} +void flush_kernel_vmap_range(void *vaddr, int size); +void invalidate_kernel_vmap_range(void *vaddr, int size); #define flush_cache_vmap(start, end) flush_cache_all() #define flush_cache_vunmap(start, end) flush_cache_all() diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h index fb4382c28259..edfbf9d6a6dd 100644 --- a/arch/parisc/include/asm/uaccess.h +++ b/arch/parisc/include/asm/uaccess.h @@ -32,7 +32,8 @@ * that put_user is the same as __put_user, etc. */ -#define access_ok(type, uaddr, size) (1) +#define access_ok(type, uaddr, size) \ + ( (uaddr) == (uaddr) ) #define put_user __put_user #define get_user __get_user diff --git a/arch/parisc/include/uapi/asm/unistd.h b/arch/parisc/include/uapi/asm/unistd.h index 6b0741e7a7ed..667c99421003 100644 --- a/arch/parisc/include/uapi/asm/unistd.h +++ b/arch/parisc/include/uapi/asm/unistd.h @@ -362,8 +362,9 @@ #define __NR_copy_file_range (__NR_Linux + 346) #define __NR_preadv2 (__NR_Linux + 347) #define __NR_pwritev2 (__NR_Linux + 348) +#define __NR_statx (__NR_Linux + 349) -#define __NR_Linux_syscalls (__NR_pwritev2 + 1) +#define __NR_Linux_syscalls (__NR_statx + 1) #define __IGNORE_select /* newselect */ diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index 0dc72d5de861..c32a09095216 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -616,3 +616,25 @@ flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long __flush_cache_page(vma, vmaddr, PFN_PHYS(pfn)); } } + +void flush_kernel_vmap_range(void *vaddr, int size) +{ + unsigned long start = (unsigned long)vaddr; + + if ((unsigned long)size > parisc_cache_flush_threshold) + flush_data_cache(); + else + flush_kernel_dcache_range_asm(start, start + size); +} +EXPORT_SYMBOL(flush_kernel_vmap_range); + +void invalidate_kernel_vmap_range(void *vaddr, int size) +{ + unsigned long start = (unsigned long)vaddr; + + if ((unsigned long)size > parisc_cache_flush_threshold) + flush_data_cache(); + else + flush_kernel_dcache_range_asm(start, start + size); +} +EXPORT_SYMBOL(invalidate_kernel_vmap_range); diff --git a/arch/parisc/kernel/module.c b/arch/parisc/kernel/module.c index a0ecdb4abcc8..c66c943d9322 100644 --- a/arch/parisc/kernel/module.c +++ b/arch/parisc/kernel/module.c @@ -620,6 +620,10 @@ int apply_relocate_add(Elf_Shdr *sechdrs, */ *loc = fsel(val, addend); break; + case R_PARISC_SECREL32: + /* 32-bit section relative address. */ + *loc = fsel(val, addend); + break; case R_PARISC_DPREL21L: /* left 21 bit of relative address */ val = lrsel(val - dp, addend); @@ -807,6 +811,10 @@ int apply_relocate_add(Elf_Shdr *sechdrs, */ *loc = fsel(val, addend); break; + case R_PARISC_SECREL32: + /* 32-bit section relative address. */ + *loc = fsel(val, addend); + break; case R_PARISC_FPTR64: /* 64-bit function address */ if(in_local(me, (void *)(val + addend))) { diff --git a/arch/parisc/kernel/perf.c b/arch/parisc/kernel/perf.c index e282a5131d77..6017a5af2e6e 100644 --- a/arch/parisc/kernel/perf.c +++ b/arch/parisc/kernel/perf.c @@ -39,7 +39,7 @@ * the PDC INTRIGUE calls. This is done to eliminate bugs introduced * in various PDC revisions. The code is much more maintainable * and reliable this way vs having to debug on every version of PDC - * on every box. + * on every box. */ #include <linux/capability.h> @@ -195,8 +195,8 @@ static int perf_config(uint32_t *image_ptr); static int perf_release(struct inode *inode, struct file *file); static int perf_open(struct inode *inode, struct file *file); static ssize_t perf_read(struct file *file, char __user *buf, size_t cnt, loff_t *ppos); -static ssize_t perf_write(struct file *file, const char __user *buf, size_t count, - loff_t *ppos); +static ssize_t perf_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos); static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg); static void perf_start_counters(void); static int perf_stop_counters(uint32_t *raddr); @@ -222,7 +222,7 @@ extern void perf_intrigue_disable_perf_counters (void); /* * configure: * - * Configure the cpu with a given data image. First turn off the counters, + * Configure the cpu with a given data image. First turn off the counters, * then download the image, then turn the counters back on. */ static int perf_config(uint32_t *image_ptr) @@ -234,7 +234,7 @@ static int perf_config(uint32_t *image_ptr) error = perf_stop_counters(raddr); if (error != 0) { printk("perf_config: perf_stop_counters = %ld\n", error); - return -EINVAL; + return -EINVAL; } printk("Preparing to write image\n"); @@ -242,7 +242,7 @@ printk("Preparing to write image\n"); error = perf_write_image((uint64_t *)image_ptr); if (error != 0) { printk("perf_config: DOWNLOAD = %ld\n", error); - return -EINVAL; + return -EINVAL; } printk("Preparing to start counters\n"); @@ -254,7 +254,7 @@ printk("Preparing to start counters\n"); } /* - * Open the device and initialize all of its memory. The device is only + * Open the device and initialize all of its memory. The device is only * opened once, but can be "queried" by multiple processes that know its * file descriptor. */ @@ -298,19 +298,19 @@ static ssize_t perf_read(struct file *file, char __user *buf, size_t cnt, loff_t * called on the processor that the download should happen * on. */ -static ssize_t perf_write(struct file *file, const char __user *buf, size_t count, - loff_t *ppos) +static ssize_t perf_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) { size_t image_size; uint32_t image_type; uint32_t interface_type; uint32_t test; - if (perf_processor_interface == ONYX_INTF) + if (perf_processor_interface == ONYX_INTF) image_size = PCXU_IMAGE_SIZE; - else if (perf_processor_interface == CUDA_INTF) + else if (perf_processor_interface == CUDA_INTF) image_size = PCXW_IMAGE_SIZE; - else + else return -EFAULT; if (!capable(CAP_SYS_ADMIN)) @@ -330,22 +330,22 @@ static ssize_t perf_write(struct file *file, const char __user *buf, size_t coun /* First check the machine type is correct for the requested image */ - if (((perf_processor_interface == CUDA_INTF) && - (interface_type != CUDA_INTF)) || - ((perf_processor_interface == ONYX_INTF) && - (interface_type != ONYX_INTF))) + if (((perf_processor_interface == CUDA_INTF) && + (interface_type != CUDA_INTF)) || + ((perf_processor_interface == ONYX_INTF) && + (interface_type != ONYX_INTF))) return -EINVAL; /* Next check to make sure the requested image is valid */ - if (((interface_type == CUDA_INTF) && + if (((interface_type == CUDA_INTF) && (test >= MAX_CUDA_IMAGES)) || - ((interface_type == ONYX_INTF) && - (test >= MAX_ONYX_IMAGES))) + ((interface_type == ONYX_INTF) && + (test >= MAX_ONYX_IMAGES))) return -EINVAL; /* Copy the image into the processor */ - if (interface_type == CUDA_INTF) + if (interface_type == CUDA_INTF) return perf_config(cuda_images[test]); else return perf_config(onyx_images[test]); @@ -359,7 +359,7 @@ static ssize_t perf_write(struct file *file, const char __user *buf, size_t coun static void perf_patch_images(void) { #if 0 /* FIXME!! */ -/* +/* * NOTE: this routine is VERY specific to the current TLB image. * If the image is changed, this routine might also need to be changed. */ @@ -367,9 +367,9 @@ static void perf_patch_images(void) extern void $i_dtlb_miss_2_0(); extern void PA2_0_iva(); - /* + /* * We can only use the lower 32-bits, the upper 32-bits should be 0 - * anyway given this is in the kernel + * anyway given this is in the kernel */ uint32_t itlb_addr = (uint32_t)&($i_itlb_miss_2_0); uint32_t dtlb_addr = (uint32_t)&($i_dtlb_miss_2_0); @@ -377,21 +377,21 @@ static void perf_patch_images(void) if (perf_processor_interface == ONYX_INTF) { /* clear last 2 bytes */ - onyx_images[TLBMISS][15] &= 0xffffff00; + onyx_images[TLBMISS][15] &= 0xffffff00; /* set 2 bytes */ onyx_images[TLBMISS][15] |= (0x000000ff&((dtlb_addr) >> 24)); onyx_images[TLBMISS][16] = (dtlb_addr << 8)&0xffffff00; onyx_images[TLBMISS][17] = itlb_addr; /* clear last 2 bytes */ - onyx_images[TLBHANDMISS][15] &= 0xffffff00; + onyx_images[TLBHANDMISS][15] &= 0xffffff00; /* set 2 bytes */ onyx_images[TLBHANDMISS][15] |= (0x000000ff&((dtlb_addr) >> 24)); onyx_images[TLBHANDMISS][16] = (dtlb_addr << 8)&0xffffff00; onyx_images[TLBHANDMISS][17] = itlb_addr; /* clear last 2 bytes */ - onyx_images[BIG_CPI][15] &= 0xffffff00; + onyx_images[BIG_CPI][15] &= 0xffffff00; /* set 2 bytes */ onyx_images[BIG_CPI][15] |= (0x000000ff&((dtlb_addr) >> 24)); onyx_images[BIG_CPI][16] = (dtlb_addr << 8)&0xffffff00; @@ -404,24 +404,24 @@ static void perf_patch_images(void) } else if (perf_processor_interface == CUDA_INTF) { /* Cuda interface */ - cuda_images[TLBMISS][16] = + cuda_images[TLBMISS][16] = (cuda_images[TLBMISS][16]&0xffff0000) | ((dtlb_addr >> 8)&0x0000ffff); - cuda_images[TLBMISS][17] = + cuda_images[TLBMISS][17] = ((dtlb_addr << 24)&0xff000000) | ((itlb_addr >> 16)&0x000000ff); cuda_images[TLBMISS][18] = (itlb_addr << 16)&0xffff0000; - cuda_images[TLBHANDMISS][16] = + cuda_images[TLBHANDMISS][16] = (cuda_images[TLBHANDMISS][16]&0xffff0000) | ((dtlb_addr >> 8)&0x0000ffff); - cuda_images[TLBHANDMISS][17] = + cuda_images[TLBHANDMISS][17] = ((dtlb_addr << 24)&0xff000000) | ((itlb_addr >> 16)&0x000000ff); cuda_images[TLBHANDMISS][18] = (itlb_addr << 16)&0xffff0000; - cuda_images[BIG_CPI][16] = + cuda_images[BIG_CPI][16] = (cuda_images[BIG_CPI][16]&0xffff0000) | ((dtlb_addr >> 8)&0x0000ffff); - cuda_images[BIG_CPI][17] = + cuda_images[BIG_CPI][17] = ((dtlb_addr << 24)&0xff000000) | ((itlb_addr >> 16)&0x000000ff); cuda_images[BIG_CPI][18] = (itlb_addr << 16)&0xffff0000; } else { @@ -433,7 +433,7 @@ static void perf_patch_images(void) /* * ioctl routine - * All routines effect the processor that they are executed on. Thus you + * All routines effect the processor that they are executed on. Thus you * must be running on the processor that you wish to change. */ @@ -459,7 +459,7 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) } /* copy out the Counters */ - if (copy_to_user((void __user *)arg, raddr, + if (copy_to_user((void __user *)arg, raddr, sizeof (raddr)) != 0) { error = -EFAULT; break; @@ -487,7 +487,7 @@ static const struct file_operations perf_fops = { .open = perf_open, .release = perf_release }; - + static struct miscdevice perf_dev = { MISC_DYNAMIC_MINOR, PA_PERF_DEV, @@ -595,7 +595,7 @@ static int perf_stop_counters(uint32_t *raddr) /* OR sticky2 (bit 1496) to counter2 bit 32 */ tmp64 |= (userbuf[23] >> 8) & 0x0000000080000000; raddr[2] = (uint32_t)tmp64; - + /* Counter3 is bits 1497 to 1528 */ tmp64 = (userbuf[23] >> 7) & 0x00000000ffffffff; /* OR sticky3 (bit 1529) to counter3 bit 32 */ @@ -617,7 +617,7 @@ static int perf_stop_counters(uint32_t *raddr) userbuf[22] = 0; userbuf[23] = 0; - /* + /* * Write back the zeroed bytes + the image given * the read was destructive. */ @@ -625,13 +625,13 @@ static int perf_stop_counters(uint32_t *raddr) } else { /* - * Read RDR-15 which contains the counters and sticky bits + * Read RDR-15 which contains the counters and sticky bits */ if (!perf_rdr_read_ubuf(15, userbuf)) { return -13; } - /* + /* * Clear out the counters */ perf_rdr_clear(15); @@ -644,7 +644,7 @@ static int perf_stop_counters(uint32_t *raddr) raddr[2] = (uint32_t)((userbuf[1] >> 32) & 0x00000000ffffffffUL); raddr[3] = (uint32_t)(userbuf[1] & 0x00000000ffffffffUL); } - + return 0; } @@ -682,7 +682,7 @@ static int perf_rdr_read_ubuf(uint32_t rdr_num, uint64_t *buffer) i = tentry->num_words; while (i--) { buffer[i] = 0; - } + } /* Check for bits an even number of 64 */ if ((xbits = width & 0x03f) != 0) { @@ -808,18 +808,22 @@ static int perf_write_image(uint64_t *memaddr) } runway = ioremap_nocache(cpu_device->hpa.start, 4096); + if (!runway) { + pr_err("perf_write_image: ioremap failed!\n"); + return -ENOMEM; + } /* Merge intrigue bits into Runway STATUS 0 */ tmp64 = __raw_readq(runway + RUNWAY_STATUS) & 0xffecfffffffffffful; - __raw_writeq(tmp64 | (*memaddr++ & 0x0013000000000000ul), + __raw_writeq(tmp64 | (*memaddr++ & 0x0013000000000000ul), runway + RUNWAY_STATUS); - + /* Write RUNWAY DEBUG registers */ for (i = 0; i < 8; i++) { __raw_writeq(*memaddr++, runway + RUNWAY_DEBUG); } - return 0; + return 0; } /* @@ -843,7 +847,7 @@ printk("perf_rdr_write\n"); perf_rdr_shift_out_U(rdr_num, buffer[i]); } else { perf_rdr_shift_out_W(rdr_num, buffer[i]); - } + } } printk("perf_rdr_write done\n"); } diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index 06f7ca7fe70b..b76f503eee4a 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -142,6 +142,8 @@ void machine_power_off(void) printk(KERN_EMERG "System shut down completed.\n" "Please power this system off now."); + + for (;;); } void (*pm_power_off)(void) = machine_power_off; diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S index 3cfef1de8061..44aeaa9c039f 100644 --- a/arch/parisc/kernel/syscall_table.S +++ b/arch/parisc/kernel/syscall_table.S @@ -444,6 +444,7 @@ ENTRY_SAME(copy_file_range) ENTRY_COMP(preadv2) ENTRY_COMP(pwritev2) + ENTRY_SAME(statx) .ifne (. - 90b) - (__NR_Linux_syscalls * (91b - 90b)) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 494091762bd7..97a8bc8a095c 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -80,93 +80,99 @@ config ARCH_HAS_DMA_SET_COHERENT_MASK config PPC bool default y - select BUILDTIME_EXTABLE_SORT + # + # Please keep this list sorted alphabetically. + # + select ARCH_HAS_DEVMEM_IS_ALLOWED + select ARCH_HAS_DMA_SET_COHERENT_MASK + select ARCH_HAS_ELF_RANDOMIZE + select ARCH_HAS_GCOV_PROFILE_ALL + select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE + select ARCH_HAS_SG_CHAIN + select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST + select ARCH_HAS_UBSAN_SANITIZE_ALL + select ARCH_HAVE_NMI_SAFE_CMPXCHG select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_MIGHT_HAVE_PC_SERIO + select ARCH_SUPPORTS_ATOMIC_RMW + select ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT + select ARCH_USE_BUILTIN_BSWAP + select ARCH_USE_CMPXCHG_LOCKREF if PPC64 + select ARCH_WANT_IPC_PARSE_VERSION select BINFMT_ELF - select ARCH_HAS_ELF_RANDOMIZE - select OF - select OF_EARLY_FLATTREE - select OF_RESERVED_MEM - select HAVE_FTRACE_MCOUNT_RECORD + select BUILDTIME_EXTABLE_SORT + select CLONE_BACKWARDS + select DCACHE_WORD_ACCESS if PPC64 && CPU_LITTLE_ENDIAN + select EDAC_ATOMIC_SCRUB + select EDAC_SUPPORT + select GENERIC_ATOMIC64 if PPC32 + select GENERIC_CLOCKEVENTS + select GENERIC_CLOCKEVENTS_BROADCAST if SMP + select GENERIC_CMOS_UPDATE + select GENERIC_CPU_AUTOPROBE + select GENERIC_IRQ_SHOW + select GENERIC_IRQ_SHOW_LEVEL + select GENERIC_SMP_IDLE_THREAD + select GENERIC_STRNCPY_FROM_USER + select GENERIC_STRNLEN_USER + select GENERIC_TIME_VSYSCALL_OLD + select HAVE_ARCH_AUDITSYSCALL + select HAVE_ARCH_HARDENED_USERCOPY + select HAVE_ARCH_JUMP_LABEL + select HAVE_ARCH_KGDB + select HAVE_ARCH_SECCOMP_FILTER + select HAVE_ARCH_TRACEHOOK + select HAVE_CBPF_JIT if !PPC64 + select HAVE_CONTEXT_TRACKING if PPC64 + select HAVE_DEBUG_KMEMLEAK + select HAVE_DEBUG_STACKOVERFLOW + select HAVE_DMA_API_DEBUG select HAVE_DYNAMIC_FTRACE - select HAVE_DYNAMIC_FTRACE_WITH_REGS if MPROFILE_KERNEL - select HAVE_FUNCTION_TRACER + select HAVE_DYNAMIC_FTRACE_WITH_REGS if MPROFILE_KERNEL + select HAVE_EBPF_JIT if PPC64 + select HAVE_EFFICIENT_UNALIGNED_ACCESS if !(CPU_LITTLE_ENDIAN && POWER7_CPU) + select HAVE_FTRACE_MCOUNT_RECORD select HAVE_FUNCTION_GRAPH_TRACER + select HAVE_FUNCTION_TRACER select HAVE_GCC_PLUGINS - select SYSCTL_EXCEPTION_TRACE - select VIRT_TO_BUS if !PPC64 + select HAVE_GENERIC_RCU_GUP + select HAVE_HW_BREAKPOINT if PERF_EVENTS && (PPC_BOOK3S || PPC_8xx) select HAVE_IDE select HAVE_IOREMAP_PROT - select HAVE_EFFICIENT_UNALIGNED_ACCESS if !(CPU_LITTLE_ENDIAN && POWER7_CPU) + select HAVE_IRQ_EXIT_ON_IRQ_STACK + select HAVE_KERNEL_GZIP select HAVE_KPROBES - select HAVE_OPTPROBES if PPC64 - select HAVE_ARCH_KGDB select HAVE_KRETPROBES - select HAVE_ARCH_TRACEHOOK + select HAVE_LIVEPATCH if HAVE_DYNAMIC_FTRACE_WITH_REGS select HAVE_MEMBLOCK select HAVE_MEMBLOCK_NODE_MAP - select HAVE_DMA_API_DEBUG + select HAVE_MOD_ARCH_SPECIFIC + select HAVE_NMI if PERF_EVENTS select HAVE_OPROFILE - select HAVE_DEBUG_KMEMLEAK - select ARCH_HAS_SG_CHAIN - select GENERIC_ATOMIC64 if PPC32 + select HAVE_OPTPROBES if PPC64 select HAVE_PERF_EVENTS + select HAVE_PERF_EVENTS_NMI if PPC64 select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP + select HAVE_RCU_TABLE_FREE if SMP select HAVE_REGS_AND_STACK_ACCESS_API - select HAVE_HW_BREAKPOINT if PERF_EVENTS && (PPC_BOOK3S || PPC_8xx) - select ARCH_WANT_IPC_PARSE_VERSION - select SPARSE_IRQ + select HAVE_SYSCALL_TRACEPOINTS + select HAVE_VIRT_CPU_ACCOUNTING select IRQ_DOMAIN - select GENERIC_IRQ_SHOW - select GENERIC_IRQ_SHOW_LEVEL select IRQ_FORCED_THREADING - select HAVE_RCU_TABLE_FREE if SMP - select HAVE_SYSCALL_TRACEPOINTS - select HAVE_CBPF_JIT if !PPC64 - select HAVE_EBPF_JIT if PPC64 - select HAVE_ARCH_JUMP_LABEL - select ARCH_HAVE_NMI_SAFE_CMPXCHG - select ARCH_HAS_GCOV_PROFILE_ALL - select GENERIC_SMP_IDLE_THREAD - select GENERIC_CMOS_UPDATE - select GENERIC_TIME_VSYSCALL_OLD - select GENERIC_CLOCKEVENTS - select GENERIC_CLOCKEVENTS_BROADCAST if SMP - select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST - select GENERIC_STRNCPY_FROM_USER - select GENERIC_STRNLEN_USER - select HAVE_MOD_ARCH_SPECIFIC select MODULES_USE_ELF_RELA - select CLONE_BACKWARDS - select ARCH_USE_BUILTIN_BSWAP - select OLD_SIGSUSPEND - select OLD_SIGACTION if PPC32 - select HAVE_DEBUG_STACKOVERFLOW - select HAVE_IRQ_EXIT_ON_IRQ_STACK - select ARCH_USE_CMPXCHG_LOCKREF if PPC64 - select HAVE_ARCH_AUDITSYSCALL - select ARCH_SUPPORTS_ATOMIC_RMW - select DCACHE_WORD_ACCESS if PPC64 && CPU_LITTLE_ENDIAN select NO_BOOTMEM - select HAVE_GENERIC_RCU_GUP - select HAVE_PERF_EVENTS_NMI if PPC64 - select HAVE_NMI if PERF_EVENTS - select EDAC_SUPPORT - select EDAC_ATOMIC_SCRUB - select ARCH_HAS_DMA_SET_COHERENT_MASK - select ARCH_HAS_DEVMEM_IS_ALLOWED - select HAVE_ARCH_SECCOMP_FILTER - select ARCH_HAS_UBSAN_SANITIZE_ALL - select ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT - select HAVE_LIVEPATCH if HAVE_DYNAMIC_FTRACE_WITH_REGS - select GENERIC_CPU_AUTOPROBE - select HAVE_VIRT_CPU_ACCOUNTING - select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE - select HAVE_ARCH_HARDENED_USERCOPY - select HAVE_KERNEL_GZIP - select HAVE_CONTEXT_TRACKING if PPC64 + select OF + select OF_EARLY_FLATTREE + select OF_RESERVED_MEM + select OLD_SIGACTION if PPC32 + select OLD_SIGSUSPEND + select SPARSE_IRQ + select SYSCTL_EXCEPTION_TRACE + select VIRT_TO_BUS if !PPC64 + # + # Please keep this list sorted alphabetically. + # config GENERIC_CSUM def_bool n diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 31286fa7873c..19b0d1a81959 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -72,8 +72,15 @@ GNUTARGET := powerpc MULTIPLEWORD := -mmultiple endif -cflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mbig-endian) +ifdef CONFIG_PPC64 +cflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mabi=elfv1) +cflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mcall-aixdesc) +aflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mabi=elfv1) +aflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -mabi=elfv2 +endif + cflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -mlittle-endian +cflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mbig-endian) ifneq ($(cc-name),clang) cflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -mno-strict-align endif @@ -113,7 +120,9 @@ ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y) CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv2,$(call cc-option,-mcall-aixdesc)) AFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv2) else +CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv1) CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mcall-aixdesc) +AFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv1) endif CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mcmodel=medium,$(call cc-option,-mminimal-toc)) CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mno-pointers-to-nested-functions) diff --git a/arch/powerpc/boot/zImage.lds.S b/arch/powerpc/boot/zImage.lds.S index 861e72109df2..f080abfc2f83 100644 --- a/arch/powerpc/boot/zImage.lds.S +++ b/arch/powerpc/boot/zImage.lds.S @@ -68,6 +68,7 @@ SECTIONS } #ifdef CONFIG_PPC64_BOOT_WRAPPER + . = ALIGN(256); .got : { __toc_start = .; diff --git a/arch/powerpc/crypto/crc32c-vpmsum_glue.c b/arch/powerpc/crypto/crc32c-vpmsum_glue.c index 9fa046d56eba..411994551afc 100644 --- a/arch/powerpc/crypto/crc32c-vpmsum_glue.c +++ b/arch/powerpc/crypto/crc32c-vpmsum_glue.c @@ -52,7 +52,7 @@ static int crc32c_vpmsum_cra_init(struct crypto_tfm *tfm) { u32 *key = crypto_tfm_ctx(tfm); - *key = 0; + *key = ~0; return 0; } diff --git a/arch/powerpc/include/asm/bitops.h b/arch/powerpc/include/asm/bitops.h index 73eb794d6163..bc5fdfd22788 100644 --- a/arch/powerpc/include/asm/bitops.h +++ b/arch/powerpc/include/asm/bitops.h @@ -51,6 +51,10 @@ #define PPC_BIT(bit) (1UL << PPC_BITLSHIFT(bit)) #define PPC_BITMASK(bs, be) ((PPC_BIT(bs) - PPC_BIT(be)) | PPC_BIT(bs)) +/* Put a PPC bit into a "normal" bit position */ +#define PPC_BITEXTRACT(bits, ppc_bit, dst_bit) \ + ((((bits) >> PPC_BITLSHIFT(ppc_bit)) & 1) << (dst_bit)) + #include <asm/barrier.h> /* Macro for generating the ***_bits() functions */ diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 012223638815..26ed228d4dc6 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -1,6 +1,7 @@ #ifndef _ASM_POWERPC_BOOK3S_32_PGTABLE_H #define _ASM_POWERPC_BOOK3S_32_PGTABLE_H +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopmd.h> #include <asm/book3s/32/hash.h> diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 1eeeb72c7015..8f4d41936e5a 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -1,9 +1,12 @@ #ifndef _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ #define _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ +#include <asm-generic/5level-fixup.h> + #ifndef __ASSEMBLY__ #include <linux/mmdebug.h> #endif + /* * Common bits between hash and Radix page table */ @@ -347,23 +350,58 @@ static inline int __ptep_test_and_clear_young(struct mm_struct *mm, __r; \ }) +static inline int __pte_write(pte_t pte) +{ + return !!(pte_raw(pte) & cpu_to_be64(_PAGE_WRITE)); +} + +#ifdef CONFIG_NUMA_BALANCING +#define pte_savedwrite pte_savedwrite +static inline bool pte_savedwrite(pte_t pte) +{ + /* + * Saved write ptes are prot none ptes that doesn't have + * privileged bit sit. We mark prot none as one which has + * present and pviliged bit set and RWX cleared. To mark + * protnone which used to have _PAGE_WRITE set we clear + * the privileged bit. + */ + return !(pte_raw(pte) & cpu_to_be64(_PAGE_RWX | _PAGE_PRIVILEGED)); +} +#else +#define pte_savedwrite pte_savedwrite +static inline bool pte_savedwrite(pte_t pte) +{ + return false; +} +#endif + +static inline int pte_write(pte_t pte) +{ + return __pte_write(pte) || pte_savedwrite(pte); +} + #define __HAVE_ARCH_PTEP_SET_WRPROTECT static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - if ((pte_raw(*ptep) & cpu_to_be64(_PAGE_WRITE)) == 0) - return; - - pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 0); + if (__pte_write(*ptep)) + pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 0); + else if (unlikely(pte_savedwrite(*ptep))) + pte_update(mm, addr, ptep, 0, _PAGE_PRIVILEGED, 0); } static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - if ((pte_raw(*ptep) & cpu_to_be64(_PAGE_WRITE)) == 0) - return; - - pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 1); + /* + * We should not find protnone for hugetlb, but this complete the + * interface. + */ + if (__pte_write(*ptep)) + pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 1); + else if (unlikely(pte_savedwrite(*ptep))) + pte_update(mm, addr, ptep, 0, _PAGE_PRIVILEGED, 1); } #define __HAVE_ARCH_PTEP_GET_AND_CLEAR @@ -397,11 +435,6 @@ static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_update(mm, addr, ptep, ~0UL, 0, 0); } -static inline int pte_write(pte_t pte) -{ - return !!(pte_raw(pte) & cpu_to_be64(_PAGE_WRITE)); -} - static inline int pte_dirty(pte_t pte) { return !!(pte_raw(pte) & cpu_to_be64(_PAGE_DIRTY)); @@ -465,19 +498,12 @@ static inline pte_t pte_clear_savedwrite(pte_t pte) VM_BUG_ON(!pte_protnone(pte)); return __pte(pte_val(pte) | _PAGE_PRIVILEGED); } - -#define pte_savedwrite pte_savedwrite -static inline bool pte_savedwrite(pte_t pte) +#else +#define pte_clear_savedwrite pte_clear_savedwrite +static inline pte_t pte_clear_savedwrite(pte_t pte) { - /* - * Saved write ptes are prot none ptes that doesn't have - * privileged bit sit. We mark prot none as one which has - * present and pviliged bit set and RWX cleared. To mark - * protnone which used to have _PAGE_WRITE set we clear - * the privileged bit. - */ - VM_BUG_ON(!pte_protnone(pte)); - return !(pte_raw(pte) & cpu_to_be64(_PAGE_RWX | _PAGE_PRIVILEGED)); + VM_WARN_ON(1); + return __pte(pte_val(pte) & ~_PAGE_WRITE); } #endif /* CONFIG_NUMA_BALANCING */ @@ -506,6 +532,8 @@ static inline unsigned long pte_pfn(pte_t pte) /* Generic modifiers for PTE bits */ static inline pte_t pte_wrprotect(pte_t pte) { + if (unlikely(pte_savedwrite(pte))) + return pte_clear_savedwrite(pte); return __pte(pte_val(pte) & ~_PAGE_WRITE); } @@ -926,6 +954,7 @@ static inline int pmd_protnone(pmd_t pmd) #define __HAVE_ARCH_PMD_WRITE #define pmd_write(pmd) pte_write(pmd_pte(pmd)) +#define __pmd_write(pmd) __pte_write(pmd_pte(pmd)) #define pmd_savedwrite(pmd) pte_savedwrite(pmd_pte(pmd)) #ifdef CONFIG_TRANSPARENT_HUGEPAGE @@ -982,11 +1011,10 @@ static inline int __pmdp_test_and_clear_young(struct mm_struct *mm, static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp) { - - if ((pmd_raw(*pmdp) & cpu_to_be64(_PAGE_WRITE)) == 0) - return; - - pmd_hugepage_update(mm, addr, pmdp, _PAGE_WRITE, 0); + if (__pmd_write((*pmdp))) + pmd_hugepage_update(mm, addr, pmdp, _PAGE_WRITE, 0); + else if (unlikely(pmd_savedwrite(*pmdp))) + pmd_hugepage_update(mm, addr, pmdp, 0, _PAGE_PRIVILEGED); } static inline int pmd_trans_huge(pmd_t pmd) diff --git a/arch/powerpc/include/asm/checksum.h b/arch/powerpc/include/asm/checksum.h index 4e63787dc3be..842124b199b5 100644 --- a/arch/powerpc/include/asm/checksum.h +++ b/arch/powerpc/include/asm/checksum.h @@ -112,7 +112,7 @@ static inline __wsum csum_add(__wsum csum, __wsum addend) #ifdef __powerpc64__ res += (__force u64)addend; - return (__force __wsum)((u32)res + (res >> 32)); + return (__force __wsum) from64to32(res); #else asm("addc %0,%0,%1;" "addze %0,%0;" diff --git a/arch/powerpc/include/asm/cpuidle.h b/arch/powerpc/include/asm/cpuidle.h index fd321eb423cb..155731557c9b 100644 --- a/arch/powerpc/include/asm/cpuidle.h +++ b/arch/powerpc/include/asm/cpuidle.h @@ -70,8 +70,8 @@ static inline void report_invalid_psscr_val(u64 psscr_val, int err) std r0,0(r1); \ ptesync; \ ld r0,0(r1); \ -1: cmpd cr0,r0,r0; \ - bne 1b; \ +236: cmpd cr0,r0,r0; \ + bne 236b; \ IDLE_INST; \ #define IDLE_STATE_ENTER_SEQ_NORET(IDLE_INST) \ diff --git a/arch/powerpc/include/asm/elf.h b/arch/powerpc/include/asm/elf.h index 93b9b84568e8..09bde6e34f5d 100644 --- a/arch/powerpc/include/asm/elf.h +++ b/arch/powerpc/include/asm/elf.h @@ -144,8 +144,8 @@ extern int arch_setup_additional_pages(struct linux_binprm *bprm, #define ARCH_DLINFO_CACHE_GEOMETRY \ NEW_AUX_ENT(AT_L1I_CACHESIZE, ppc64_caches.l1i.size); \ NEW_AUX_ENT(AT_L1I_CACHEGEOMETRY, get_cache_geometry(l1i)); \ - NEW_AUX_ENT(AT_L1D_CACHESIZE, ppc64_caches.l1i.size); \ - NEW_AUX_ENT(AT_L1D_CACHEGEOMETRY, get_cache_geometry(l1i)); \ + NEW_AUX_ENT(AT_L1D_CACHESIZE, ppc64_caches.l1d.size); \ + NEW_AUX_ENT(AT_L1D_CACHEGEOMETRY, get_cache_geometry(l1d)); \ NEW_AUX_ENT(AT_L2_CACHESIZE, ppc64_caches.l2.size); \ NEW_AUX_ENT(AT_L2_CACHEGEOMETRY, get_cache_geometry(l2)); \ NEW_AUX_ENT(AT_L3_CACHESIZE, ppc64_caches.l3.size); \ diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h index f97d8cb6bdf6..ed62efe01e49 100644 --- a/arch/powerpc/include/asm/mce.h +++ b/arch/powerpc/include/asm/mce.h @@ -66,6 +66,55 @@ #define P8_DSISR_MC_SLB_ERRORS (P7_DSISR_MC_SLB_ERRORS | \ P8_DSISR_MC_ERAT_MULTIHIT_SEC) + +/* + * Machine Check bits on power9 + */ +#define P9_SRR1_MC_LOADSTORE(srr1) (((srr1) >> PPC_BITLSHIFT(42)) & 1) + +#define P9_SRR1_MC_IFETCH(srr1) ( \ + PPC_BITEXTRACT(srr1, 45, 0) | \ + PPC_BITEXTRACT(srr1, 44, 1) | \ + PPC_BITEXTRACT(srr1, 43, 2) | \ + PPC_BITEXTRACT(srr1, 36, 3) ) + +/* 0 is reserved */ +#define P9_SRR1_MC_IFETCH_UE 1 +#define P9_SRR1_MC_IFETCH_SLB_PARITY 2 +#define P9_SRR1_MC_IFETCH_SLB_MULTIHIT 3 +#define P9_SRR1_MC_IFETCH_ERAT_MULTIHIT 4 +#define P9_SRR1_MC_IFETCH_TLB_MULTIHIT 5 +#define P9_SRR1_MC_IFETCH_UE_TLB_RELOAD 6 +/* 7 is reserved */ +#define P9_SRR1_MC_IFETCH_LINK_TIMEOUT 8 +#define P9_SRR1_MC_IFETCH_LINK_TABLEWALK_TIMEOUT 9 +/* 10 ? */ +#define P9_SRR1_MC_IFETCH_RA 11 +#define P9_SRR1_MC_IFETCH_RA_TABLEWALK 12 +#define P9_SRR1_MC_IFETCH_RA_ASYNC_STORE 13 +#define P9_SRR1_MC_IFETCH_LINK_ASYNC_STORE_TIMEOUT 14 +#define P9_SRR1_MC_IFETCH_RA_TABLEWALK_FOREIGN 15 + +/* DSISR bits for machine check (On Power9) */ +#define P9_DSISR_MC_UE (PPC_BIT(48)) +#define P9_DSISR_MC_UE_TABLEWALK (PPC_BIT(49)) +#define P9_DSISR_MC_LINK_LOAD_TIMEOUT (PPC_BIT(50)) +#define P9_DSISR_MC_LINK_TABLEWALK_TIMEOUT (PPC_BIT(51)) +#define P9_DSISR_MC_ERAT_MULTIHIT (PPC_BIT(52)) +#define P9_DSISR_MC_TLB_MULTIHIT_MFTLB (PPC_BIT(53)) +#define P9_DSISR_MC_USER_TLBIE (PPC_BIT(54)) +#define P9_DSISR_MC_SLB_PARITY_MFSLB (PPC_BIT(55)) +#define P9_DSISR_MC_SLB_MULTIHIT_MFSLB (PPC_BIT(56)) +#define P9_DSISR_MC_RA_LOAD (PPC_BIT(57)) +#define P9_DSISR_MC_RA_TABLEWALK (PPC_BIT(58)) +#define P9_DSISR_MC_RA_TABLEWALK_FOREIGN (PPC_BIT(59)) +#define P9_DSISR_MC_RA_FOREIGN (PPC_BIT(60)) + +/* SLB error bits */ +#define P9_DSISR_MC_SLB_ERRORS (P9_DSISR_MC_ERAT_MULTIHIT | \ + P9_DSISR_MC_SLB_PARITY_MFSLB | \ + P9_DSISR_MC_SLB_MULTIHIT_MFSLB) + enum MCE_Version { MCE_V1 = 1, }; @@ -93,6 +142,9 @@ enum MCE_ErrorType { MCE_ERROR_TYPE_SLB = 2, MCE_ERROR_TYPE_ERAT = 3, MCE_ERROR_TYPE_TLB = 4, + MCE_ERROR_TYPE_USER = 5, + MCE_ERROR_TYPE_RA = 6, + MCE_ERROR_TYPE_LINK = 7, }; enum MCE_UeErrorType { @@ -121,6 +173,32 @@ enum MCE_TlbErrorType { MCE_TLB_ERROR_MULTIHIT = 2, }; +enum MCE_UserErrorType { + MCE_USER_ERROR_INDETERMINATE = 0, + MCE_USER_ERROR_TLBIE = 1, +}; + +enum MCE_RaErrorType { + MCE_RA_ERROR_INDETERMINATE = 0, + MCE_RA_ERROR_IFETCH = 1, + MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH = 2, + MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH_FOREIGN = 3, + MCE_RA_ERROR_LOAD = 4, + MCE_RA_ERROR_STORE = 5, + MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE = 6, + MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN = 7, + MCE_RA_ERROR_LOAD_STORE_FOREIGN = 8, +}; + +enum MCE_LinkErrorType { + MCE_LINK_ERROR_INDETERMINATE = 0, + MCE_LINK_ERROR_IFETCH_TIMEOUT = 1, + MCE_LINK_ERROR_PAGE_TABLE_WALK_IFETCH_TIMEOUT = 2, + MCE_LINK_ERROR_LOAD_TIMEOUT = 3, + MCE_LINK_ERROR_STORE_TIMEOUT = 4, + MCE_LINK_ERROR_PAGE_TABLE_WALK_LOAD_STORE_TIMEOUT = 5, +}; + struct machine_check_event { enum MCE_Version version:8; /* 0x00 */ uint8_t in_use; /* 0x01 */ @@ -166,6 +244,30 @@ struct machine_check_event { uint64_t effective_address; uint8_t reserved_2[16]; } tlb_error; + + struct { + enum MCE_UserErrorType user_error_type:8; + uint8_t effective_address_provided; + uint8_t reserved_1[6]; + uint64_t effective_address; + uint8_t reserved_2[16]; + } user_error; + + struct { + enum MCE_RaErrorType ra_error_type:8; + uint8_t effective_address_provided; + uint8_t reserved_1[6]; + uint64_t effective_address; + uint8_t reserved_2[16]; + } ra_error; + + struct { + enum MCE_LinkErrorType link_error_type:8; + uint8_t effective_address_provided; + uint8_t reserved_1[6]; + uint64_t effective_address; + uint8_t reserved_2[16]; + } link_error; } u; }; @@ -176,8 +278,12 @@ struct mce_error_info { enum MCE_SlbErrorType slb_error_type:8; enum MCE_EratErrorType erat_error_type:8; enum MCE_TlbErrorType tlb_error_type:8; + enum MCE_UserErrorType user_error_type:8; + enum MCE_RaErrorType ra_error_type:8; + enum MCE_LinkErrorType link_error_type:8; } u; - uint8_t reserved[2]; + enum MCE_Severity severity:8; + enum MCE_Initiator initiator:8; }; #define MAX_MC_EVT 100 diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index ba9921bf202e..5134ade2e850 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -1,6 +1,7 @@ #ifndef _ASM_POWERPC_NOHASH_32_PGTABLE_H #define _ASM_POWERPC_NOHASH_32_PGTABLE_H +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopmd.h> #ifndef __ASSEMBLY__ diff --git a/arch/powerpc/include/asm/nohash/64/pgtable-4k.h b/arch/powerpc/include/asm/nohash/64/pgtable-4k.h index d0db98793dd8..9f4de0a1035e 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable-4k.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable-4k.h @@ -1,5 +1,8 @@ #ifndef _ASM_POWERPC_NOHASH_64_PGTABLE_4K_H #define _ASM_POWERPC_NOHASH_64_PGTABLE_4K_H + +#include <asm-generic/5level-fixup.h> + /* * Entries per page directory level. The PTE level must use a 64b record * for each page table entry. The PMD and PGD level use a 32b record for diff --git a/arch/powerpc/include/asm/nohash/64/pgtable-64k.h b/arch/powerpc/include/asm/nohash/64/pgtable-64k.h index 55b28ef3409a..1facb584dd29 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable-64k.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable-64k.h @@ -1,6 +1,7 @@ #ifndef _ASM_POWERPC_NOHASH_64_PGTABLE_64K_H #define _ASM_POWERPC_NOHASH_64_PGTABLE_64K_H +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopud.h> diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h index 0cd8a3852763..e5805ad78e12 100644 --- a/arch/powerpc/include/asm/nohash/pgtable.h +++ b/arch/powerpc/include/asm/nohash/pgtable.h @@ -230,7 +230,7 @@ static inline int hugepd_ok(hugepd_t hpd) return ((hpd_val(hpd) & 0x4) != 0); #else /* We clear the top bit to indicate hugepd */ - return ((hpd_val(hpd) & PD_HUGE) == 0); + return (hpd_val(hpd) && (hpd_val(hpd) & PD_HUGE) == 0); #endif } diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index d99bd442aacb..e7d6d86563ee 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -284,6 +284,13 @@ #define PPC_INST_BRANCH_COND 0x40800000 #define PPC_INST_LBZCIX 0x7c0006aa #define PPC_INST_STBCIX 0x7c0007aa +#define PPC_INST_LWZX 0x7c00002e +#define PPC_INST_LFSX 0x7c00042e +#define PPC_INST_STFSX 0x7c00052e +#define PPC_INST_LFDX 0x7c0004ae +#define PPC_INST_STFDX 0x7c0005ae +#define PPC_INST_LVX 0x7c0000ce +#define PPC_INST_STVX 0x7c0001ce /* macros to insert fields into opcodes */ #define ___PPC_RA(a) (((a) & 0x1f) << 16) diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h index 4a90634e8322..35c00d7a0cf8 100644 --- a/arch/powerpc/include/asm/prom.h +++ b/arch/powerpc/include/asm/prom.h @@ -160,12 +160,18 @@ struct of_drconf_cell { #define OV5_PFO_HW_ENCR 0x1120 /* PFO Encryption Accelerator */ #define OV5_SUB_PROCESSORS 0x1501 /* 1,2,or 4 Sub-Processors supported */ #define OV5_XIVE_EXPLOIT 0x1701 /* XIVE exploitation supported */ -#define OV5_MMU_RADIX_300 0x1880 /* ISA v3.00 radix MMU supported */ -#define OV5_MMU_HASH_300 0x1840 /* ISA v3.00 hash MMU supported */ -#define OV5_MMU_SEGM_RADIX 0x1820 /* radix mode (no segmentation) */ -#define OV5_MMU_PROC_TBL 0x1810 /* hcall selects SLB or proc table */ -#define OV5_MMU_SLB 0x1800 /* always use SLB */ -#define OV5_MMU_GTSE 0x1808 /* Guest translation shootdown */ +/* MMU Base Architecture */ +#define OV5_MMU_SUPPORT 0x18C0 /* MMU Mode Support Mask */ +#define OV5_MMU_HASH 0x1800 /* Hash MMU Only */ +#define OV5_MMU_RADIX 0x1840 /* Radix MMU Only */ +#define OV5_MMU_EITHER 0x1880 /* Hash or Radix Supported */ +#define OV5_MMU_DYNAMIC 0x18C0 /* Hash or Radix Can Switch Later */ +#define OV5_NMMU 0x1820 /* Nest MMU Available */ +/* Hash Table Extensions */ +#define OV5_HASH_SEG_TBL 0x1980 /* In Memory Segment Tables Available */ +#define OV5_HASH_GTSE 0x1940 /* Guest Translation Shoot Down Avail */ +/* Radix Table Extensions */ +#define OV5_RADIX_GTSE 0x1A40 /* Guest Translation Shoot Down Avail */ /* Option Vector 6: IBM PAPR hints */ #define OV6_LINUX 0x02 /* Linux is our OS */ diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h index 4b369d83fe9c..1c9470881c4a 100644 --- a/arch/powerpc/include/asm/systbl.h +++ b/arch/powerpc/include/asm/systbl.h @@ -387,3 +387,4 @@ SYSCALL(copy_file_range) COMPAT_SYS_SPU(preadv2) COMPAT_SYS_SPU(pwritev2) SYSCALL(kexec_file_load) +SYSCALL(statx) diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h index eb1acee91a20..9ba11dbcaca9 100644 --- a/arch/powerpc/include/asm/unistd.h +++ b/arch/powerpc/include/asm/unistd.h @@ -12,7 +12,7 @@ #include <uapi/asm/unistd.h> -#define NR_syscalls 383 +#define NR_syscalls 384 #define __NR__exit __NR_exit diff --git a/arch/powerpc/include/uapi/asm/unistd.h b/arch/powerpc/include/uapi/asm/unistd.h index 2f26335a3c42..b85f14228857 100644 --- a/arch/powerpc/include/uapi/asm/unistd.h +++ b/arch/powerpc/include/uapi/asm/unistd.h @@ -393,5 +393,6 @@ #define __NR_preadv2 380 #define __NR_pwritev2 381 #define __NR_kexec_file_load 382 +#define __NR_statx 383 #endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */ diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index bb7a1890aeb7..e79b9daa873c 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -77,6 +77,7 @@ extern void __flush_tlb_power8(unsigned int action); extern void __flush_tlb_power9(unsigned int action); extern long __machine_check_early_realmode_p7(struct pt_regs *regs); extern long __machine_check_early_realmode_p8(struct pt_regs *regs); +extern long __machine_check_early_realmode_p9(struct pt_regs *regs); #endif /* CONFIG_PPC64 */ #if defined(CONFIG_E500) extern void __setup_cpu_e5500(unsigned long offset, struct cpu_spec* spec); @@ -540,6 +541,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_setup = __setup_cpu_power9, .cpu_restore = __restore_cpu_power9, .flush_tlb = __flush_tlb_power9, + .machine_check_early = __machine_check_early_realmode_p9, .platform = "power9", }, { /* Power9 */ @@ -559,6 +561,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_setup = __setup_cpu_power9, .cpu_restore = __restore_cpu_power9, .flush_tlb = __flush_tlb_power9, + .machine_check_early = __machine_check_early_realmode_p9, .platform = "power9", }, { /* Cell Broadband Engine */ diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index 5f61cc0349c0..995728736677 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -276,19 +276,21 @@ power_enter_stop: */ andis. r4,r3,PSSCR_EC_ESL_MASK_SHIFTED clrldi r3,r3,60 /* r3 = Bits[60:63] = Requested Level (RL) */ - bne 1f + bne .Lhandle_esl_ec_set IDLE_STATE_ENTER_SEQ(PPC_STOP) li r3,0 /* Since we didn't lose state, return 0 */ b pnv_wakeup_noloss + +.Lhandle_esl_ec_set: /* * Check if the requested state is a deep idle state. */ -1: LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state) + LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state) ld r4,ADDROFF(pnv_first_deep_stop_state)(r5) cmpd r3,r4 - bge 2f + bge .Lhandle_deep_stop IDLE_STATE_ENTER_SEQ_NORET(PPC_STOP) -2: +.Lhandle_deep_stop: /* * Entering deep idle state. * Clear thread bit in PACA_CORE_IDLE_STATE, save SPRs to diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index c6923ff45131..a1475e6aef3a 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -58,6 +58,15 @@ static void mce_set_error_info(struct machine_check_event *mce, case MCE_ERROR_TYPE_TLB: mce->u.tlb_error.tlb_error_type = mce_err->u.tlb_error_type; break; + case MCE_ERROR_TYPE_USER: + mce->u.user_error.user_error_type = mce_err->u.user_error_type; + break; + case MCE_ERROR_TYPE_RA: + mce->u.ra_error.ra_error_type = mce_err->u.ra_error_type; + break; + case MCE_ERROR_TYPE_LINK: + mce->u.link_error.link_error_type = mce_err->u.link_error_type; + break; case MCE_ERROR_TYPE_UNKNOWN: default: break; @@ -90,13 +99,14 @@ void save_mce_event(struct pt_regs *regs, long handled, mce->gpr3 = regs->gpr[3]; mce->in_use = 1; - mce->initiator = MCE_INITIATOR_CPU; /* Mark it recovered if we have handled it and MSR(RI=1). */ if (handled && (regs->msr & MSR_RI)) mce->disposition = MCE_DISPOSITION_RECOVERED; else mce->disposition = MCE_DISPOSITION_NOT_RECOVERED; - mce->severity = MCE_SEV_ERROR_SYNC; + + mce->initiator = mce_err->initiator; + mce->severity = mce_err->severity; /* * Populate the mce error_type and type-specific error_type. @@ -115,6 +125,15 @@ void save_mce_event(struct pt_regs *regs, long handled, } else if (mce->error_type == MCE_ERROR_TYPE_ERAT) { mce->u.erat_error.effective_address_provided = true; mce->u.erat_error.effective_address = addr; + } else if (mce->error_type == MCE_ERROR_TYPE_USER) { + mce->u.user_error.effective_address_provided = true; + mce->u.user_error.effective_address = addr; + } else if (mce->error_type == MCE_ERROR_TYPE_RA) { + mce->u.ra_error.effective_address_provided = true; + mce->u.ra_error.effective_address = addr; + } else if (mce->error_type == MCE_ERROR_TYPE_LINK) { + mce->u.link_error.effective_address_provided = true; + mce->u.link_error.effective_address = addr; } else if (mce->error_type == MCE_ERROR_TYPE_UE) { mce->u.ue_error.effective_address_provided = true; mce->u.ue_error.effective_address = addr; @@ -239,6 +258,29 @@ void machine_check_print_event_info(struct machine_check_event *evt) "Parity", "Multihit", }; + static const char *mc_user_types[] = { + "Indeterminate", + "tlbie(l) invalid", + }; + static const char *mc_ra_types[] = { + "Indeterminate", + "Instruction fetch (bad)", + "Page table walk ifetch (bad)", + "Page table walk ifetch (foreign)", + "Load (bad)", + "Store (bad)", + "Page table walk Load/Store (bad)", + "Page table walk Load/Store (foreign)", + "Load/Store (foreign)", + }; + static const char *mc_link_types[] = { + "Indeterminate", + "Instruction fetch (timeout)", + "Page table walk ifetch (timeout)", + "Load (timeout)", + "Store (timeout)", + "Page table walk Load/Store (timeout)", + }; /* Print things out */ if (evt->version != MCE_V1) { @@ -315,6 +357,36 @@ void machine_check_print_event_info(struct machine_check_event *evt) printk("%s Effective address: %016llx\n", level, evt->u.tlb_error.effective_address); break; + case MCE_ERROR_TYPE_USER: + subtype = evt->u.user_error.user_error_type < + ARRAY_SIZE(mc_user_types) ? + mc_user_types[evt->u.user_error.user_error_type] + : "Unknown"; + printk("%s Error type: User [%s]\n", level, subtype); + if (evt->u.user_error.effective_address_provided) + printk("%s Effective address: %016llx\n", + level, evt->u.user_error.effective_address); + break; + case MCE_ERROR_TYPE_RA: + subtype = evt->u.ra_error.ra_error_type < + ARRAY_SIZE(mc_ra_types) ? + mc_ra_types[evt->u.ra_error.ra_error_type] + : "Unknown"; + printk("%s Error type: Real address [%s]\n", level, subtype); + if (evt->u.ra_error.effective_address_provided) + printk("%s Effective address: %016llx\n", + level, evt->u.ra_error.effective_address); + break; + case MCE_ERROR_TYPE_LINK: + subtype = evt->u.link_error.link_error_type < + ARRAY_SIZE(mc_link_types) ? + mc_link_types[evt->u.link_error.link_error_type] + : "Unknown"; + printk("%s Error type: Link [%s]\n", level, subtype); + if (evt->u.link_error.effective_address_provided) + printk("%s Effective address: %016llx\n", + level, evt->u.link_error.effective_address); + break; default: case MCE_ERROR_TYPE_UNKNOWN: printk("%s Error type: Unknown\n", level); @@ -341,6 +413,18 @@ uint64_t get_mce_fault_addr(struct machine_check_event *evt) if (evt->u.tlb_error.effective_address_provided) return evt->u.tlb_error.effective_address; break; + case MCE_ERROR_TYPE_USER: + if (evt->u.user_error.effective_address_provided) + return evt->u.user_error.effective_address; + break; + case MCE_ERROR_TYPE_RA: + if (evt->u.ra_error.effective_address_provided) + return evt->u.ra_error.effective_address; + break; + case MCE_ERROR_TYPE_LINK: + if (evt->u.link_error.effective_address_provided) + return evt->u.link_error.effective_address; + break; default: case MCE_ERROR_TYPE_UNKNOWN: break; diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c index 7353991c4ece..763d6f58caa8 100644 --- a/arch/powerpc/kernel/mce_power.c +++ b/arch/powerpc/kernel/mce_power.c @@ -116,6 +116,51 @@ static void flush_and_reload_slb(void) } #endif +static void flush_erat(void) +{ + asm volatile(PPC_INVALIDATE_ERAT : : :"memory"); +} + +#define MCE_FLUSH_SLB 1 +#define MCE_FLUSH_TLB 2 +#define MCE_FLUSH_ERAT 3 + +static int mce_flush(int what) +{ +#ifdef CONFIG_PPC_STD_MMU_64 + if (what == MCE_FLUSH_SLB) { + flush_and_reload_slb(); + return 1; + } +#endif + if (what == MCE_FLUSH_ERAT) { + flush_erat(); + return 1; + } + if (what == MCE_FLUSH_TLB) { + if (cur_cpu_spec && cur_cpu_spec->flush_tlb) { + cur_cpu_spec->flush_tlb(TLB_INVAL_SCOPE_GLOBAL); + return 1; + } + } + + return 0; +} + +static int mce_handle_flush_derrors(uint64_t dsisr, uint64_t slb, uint64_t tlb, uint64_t erat) +{ + if ((dsisr & slb) && mce_flush(MCE_FLUSH_SLB)) + dsisr &= ~slb; + if ((dsisr & erat) && mce_flush(MCE_FLUSH_ERAT)) + dsisr &= ~erat; + if ((dsisr & tlb) && mce_flush(MCE_FLUSH_TLB)) + dsisr &= ~tlb; + /* Any other errors we don't understand? */ + if (dsisr) + return 0; + return 1; +} + static long mce_handle_derror(uint64_t dsisr, uint64_t slb_error_bits) { long handled = 1; @@ -281,6 +326,9 @@ long __machine_check_early_realmode_p7(struct pt_regs *regs) long handled = 1; struct mce_error_info mce_error_info = { 0 }; + mce_error_info.severity = MCE_SEV_ERROR_SYNC; + mce_error_info.initiator = MCE_INITIATOR_CPU; + srr1 = regs->msr; nip = regs->nip; @@ -352,6 +400,9 @@ long __machine_check_early_realmode_p8(struct pt_regs *regs) long handled = 1; struct mce_error_info mce_error_info = { 0 }; + mce_error_info.severity = MCE_SEV_ERROR_SYNC; + mce_error_info.initiator = MCE_INITIATOR_CPU; + srr1 = regs->msr; nip = regs->nip; @@ -372,3 +423,189 @@ long __machine_check_early_realmode_p8(struct pt_regs *regs) save_mce_event(regs, handled, &mce_error_info, nip, addr); return handled; } + +static int mce_handle_derror_p9(struct pt_regs *regs) +{ + uint64_t dsisr = regs->dsisr; + + return mce_handle_flush_derrors(dsisr, + P9_DSISR_MC_SLB_PARITY_MFSLB | + P9_DSISR_MC_SLB_MULTIHIT_MFSLB, + + P9_DSISR_MC_TLB_MULTIHIT_MFTLB, + + P9_DSISR_MC_ERAT_MULTIHIT); +} + +static int mce_handle_ierror_p9(struct pt_regs *regs) +{ + uint64_t srr1 = regs->msr; + + switch (P9_SRR1_MC_IFETCH(srr1)) { + case P9_SRR1_MC_IFETCH_SLB_PARITY: + case P9_SRR1_MC_IFETCH_SLB_MULTIHIT: + return mce_flush(MCE_FLUSH_SLB); + case P9_SRR1_MC_IFETCH_TLB_MULTIHIT: + return mce_flush(MCE_FLUSH_TLB); + case P9_SRR1_MC_IFETCH_ERAT_MULTIHIT: + return mce_flush(MCE_FLUSH_ERAT); + default: + return 0; + } +} + +static void mce_get_derror_p9(struct pt_regs *regs, + struct mce_error_info *mce_err, uint64_t *addr) +{ + uint64_t dsisr = regs->dsisr; + + mce_err->severity = MCE_SEV_ERROR_SYNC; + mce_err->initiator = MCE_INITIATOR_CPU; + + if (dsisr & P9_DSISR_MC_USER_TLBIE) + *addr = regs->nip; + else + *addr = regs->dar; + + if (dsisr & P9_DSISR_MC_UE) { + mce_err->error_type = MCE_ERROR_TYPE_UE; + mce_err->u.ue_error_type = MCE_UE_ERROR_LOAD_STORE; + } else if (dsisr & P9_DSISR_MC_UE_TABLEWALK) { + mce_err->error_type = MCE_ERROR_TYPE_UE; + mce_err->u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE; + } else if (dsisr & P9_DSISR_MC_LINK_LOAD_TIMEOUT) { + mce_err->error_type = MCE_ERROR_TYPE_LINK; + mce_err->u.link_error_type = MCE_LINK_ERROR_LOAD_TIMEOUT; + } else if (dsisr & P9_DSISR_MC_LINK_TABLEWALK_TIMEOUT) { + mce_err->error_type = MCE_ERROR_TYPE_LINK; + mce_err->u.link_error_type = MCE_LINK_ERROR_PAGE_TABLE_WALK_LOAD_STORE_TIMEOUT; + } else if (dsisr & P9_DSISR_MC_ERAT_MULTIHIT) { + mce_err->error_type = MCE_ERROR_TYPE_ERAT; + mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT; + } else if (dsisr & P9_DSISR_MC_TLB_MULTIHIT_MFTLB) { + mce_err->error_type = MCE_ERROR_TYPE_TLB; + mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT; + } else if (dsisr & P9_DSISR_MC_USER_TLBIE) { + mce_err->error_type = MCE_ERROR_TYPE_USER; + mce_err->u.user_error_type = MCE_USER_ERROR_TLBIE; + } else if (dsisr & P9_DSISR_MC_SLB_PARITY_MFSLB) { + mce_err->error_type = MCE_ERROR_TYPE_SLB; + mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY; + } else if (dsisr & P9_DSISR_MC_SLB_MULTIHIT_MFSLB) { + mce_err->error_type = MCE_ERROR_TYPE_SLB; + mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT; + } else if (dsisr & P9_DSISR_MC_RA_LOAD) { + mce_err->error_type = MCE_ERROR_TYPE_RA; + mce_err->u.ra_error_type = MCE_RA_ERROR_LOAD; + } else if (dsisr & P9_DSISR_MC_RA_TABLEWALK) { + mce_err->error_type = MCE_ERROR_TYPE_RA; + mce_err->u.ra_error_type = MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE; + } else if (dsisr & P9_DSISR_MC_RA_TABLEWALK_FOREIGN) { + mce_err->error_type = MCE_ERROR_TYPE_RA; + mce_err->u.ra_error_type = MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN; + } else if (dsisr & P9_DSISR_MC_RA_FOREIGN) { + mce_err->error_type = MCE_ERROR_TYPE_RA; + mce_err->u.ra_error_type = MCE_RA_ERROR_LOAD_STORE_FOREIGN; + } +} + +static void mce_get_ierror_p9(struct pt_regs *regs, + struct mce_error_info *mce_err, uint64_t *addr) +{ + uint64_t srr1 = regs->msr; + + switch (P9_SRR1_MC_IFETCH(srr1)) { + case P9_SRR1_MC_IFETCH_RA_ASYNC_STORE: + case P9_SRR1_MC_IFETCH_LINK_ASYNC_STORE_TIMEOUT: + mce_err->severity = MCE_SEV_FATAL; + break; + default: + mce_err->severity = MCE_SEV_ERROR_SYNC; + break; + } + + mce_err->initiator = MCE_INITIATOR_CPU; + + *addr = regs->nip; + + switch (P9_SRR1_MC_IFETCH(srr1)) { + case P9_SRR1_MC_IFETCH_UE: + mce_err->error_type = MCE_ERROR_TYPE_UE; + mce_err->u.ue_error_type = MCE_UE_ERROR_IFETCH; + break; + case P9_SRR1_MC_IFETCH_SLB_PARITY: + mce_err->error_type = MCE_ERROR_TYPE_SLB; + mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY; + break; + case P9_SRR1_MC_IFETCH_SLB_MULTIHIT: + mce_err->error_type = MCE_ERROR_TYPE_SLB; + mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT; + break; + case P9_SRR1_MC_IFETCH_ERAT_MULTIHIT: + mce_err->error_type = MCE_ERROR_TYPE_ERAT; + mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT; + break; + case P9_SRR1_MC_IFETCH_TLB_MULTIHIT: + mce_err->error_type = MCE_ERROR_TYPE_TLB; + mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT; + break; + case P9_SRR1_MC_IFETCH_UE_TLB_RELOAD: + mce_err->error_type = MCE_ERROR_TYPE_UE; + mce_err->u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH; + break; + case P9_SRR1_MC_IFETCH_LINK_TIMEOUT: + mce_err->error_type = MCE_ERROR_TYPE_LINK; + mce_err->u.link_error_type = MCE_LINK_ERROR_IFETCH_TIMEOUT; + break; + case P9_SRR1_MC_IFETCH_LINK_TABLEWALK_TIMEOUT: + mce_err->error_type = MCE_ERROR_TYPE_LINK; + mce_err->u.link_error_type = MCE_LINK_ERROR_PAGE_TABLE_WALK_IFETCH_TIMEOUT; + break; + case P9_SRR1_MC_IFETCH_RA: + mce_err->error_type = MCE_ERROR_TYPE_RA; + mce_err->u.ra_error_type = MCE_RA_ERROR_IFETCH; + break; + case P9_SRR1_MC_IFETCH_RA_TABLEWALK: + mce_err->error_type = MCE_ERROR_TYPE_RA; + mce_err->u.ra_error_type = MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH; + break; + case P9_SRR1_MC_IFETCH_RA_ASYNC_STORE: + mce_err->error_type = MCE_ERROR_TYPE_RA; + mce_err->u.ra_error_type = MCE_RA_ERROR_STORE; + break; + case P9_SRR1_MC_IFETCH_LINK_ASYNC_STORE_TIMEOUT: + mce_err->error_type = MCE_ERROR_TYPE_LINK; + mce_err->u.link_error_type = MCE_LINK_ERROR_STORE_TIMEOUT; + break; + case P9_SRR1_MC_IFETCH_RA_TABLEWALK_FOREIGN: + mce_err->error_type = MCE_ERROR_TYPE_RA; + mce_err->u.ra_error_type = MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH_FOREIGN; + break; + default: + break; + } +} + +long __machine_check_early_realmode_p9(struct pt_regs *regs) +{ + uint64_t nip, addr; + long handled; + struct mce_error_info mce_error_info = { 0 }; + + nip = regs->nip; + + if (P9_SRR1_MC_LOADSTORE(regs->msr)) { + handled = mce_handle_derror_p9(regs); + mce_get_derror_p9(regs, &mce_error_info, &addr); + } else { + handled = mce_handle_ierror_p9(regs); + mce_get_ierror_p9(regs, &mce_error_info, &addr); + } + + /* Handle UE error. */ + if (mce_error_info.error_type == MCE_ERROR_TYPE_UE) + handled = mce_handle_ue_error(regs); + + save_mce_event(regs, handled, &mce_error_info, nip, addr); + return handled; +} diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index a3944540fe0d..1c1b44ec7642 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -168,6 +168,14 @@ static unsigned long __initdata prom_tce_alloc_start; static unsigned long __initdata prom_tce_alloc_end; #endif +static bool __initdata prom_radix_disable; + +struct platform_support { + bool hash_mmu; + bool radix_mmu; + bool radix_gtse; +}; + /* Platforms codes are now obsolete in the kernel. Now only used within this * file and ultimately gone too. Feel free to change them if you need, they * are not shared with anything outside of this file anymore @@ -626,6 +634,12 @@ static void __init early_cmdline_parse(void) prom_memory_limit = ALIGN(prom_memory_limit, 0x1000000); #endif } + + opt = strstr(prom_cmd_line, "disable_radix"); + if (opt) { + prom_debug("Radix disabled from cmdline\n"); + prom_radix_disable = true; + } } #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) @@ -695,6 +709,8 @@ struct option_vector5 { u8 byte22; u8 intarch; u8 mmu; + u8 hash_ext; + u8 radix_ext; } __packed; struct option_vector6 { @@ -850,8 +866,9 @@ struct ibm_arch_vec __cacheline_aligned ibm_architecture_vec = { .reserved3 = 0, .subprocessors = 1, .intarch = 0, - .mmu = OV5_FEAT(OV5_MMU_RADIX_300) | OV5_FEAT(OV5_MMU_HASH_300) | - OV5_FEAT(OV5_MMU_PROC_TBL) | OV5_FEAT(OV5_MMU_GTSE), + .mmu = 0, + .hash_ext = 0, + .radix_ext = 0, }, /* option vector 6: IBM PAPR hints */ @@ -990,6 +1007,92 @@ static int __init prom_count_smt_threads(void) } +static void __init prom_parse_mmu_model(u8 val, + struct platform_support *support) +{ + switch (val) { + case OV5_FEAT(OV5_MMU_DYNAMIC): + case OV5_FEAT(OV5_MMU_EITHER): /* Either Available */ + prom_debug("MMU - either supported\n"); + support->radix_mmu = !prom_radix_disable; + support->hash_mmu = true; + break; + case OV5_FEAT(OV5_MMU_RADIX): /* Only Radix */ + prom_debug("MMU - radix only\n"); + if (prom_radix_disable) { + /* + * If we __have__ to do radix, we're better off ignoring + * the command line rather than not booting. + */ + prom_printf("WARNING: Ignoring cmdline option disable_radix\n"); + } + support->radix_mmu = true; + break; + case OV5_FEAT(OV5_MMU_HASH): + prom_debug("MMU - hash only\n"); + support->hash_mmu = true; + break; + default: + prom_debug("Unknown mmu support option: 0x%x\n", val); + break; + } +} + +static void __init prom_parse_platform_support(u8 index, u8 val, + struct platform_support *support) +{ + switch (index) { + case OV5_INDX(OV5_MMU_SUPPORT): /* MMU Model */ + prom_parse_mmu_model(val & OV5_FEAT(OV5_MMU_SUPPORT), support); + break; + case OV5_INDX(OV5_RADIX_GTSE): /* Radix Extensions */ + if (val & OV5_FEAT(OV5_RADIX_GTSE)) { + prom_debug("Radix - GTSE supported\n"); + support->radix_gtse = true; + } + break; + } +} + +static void __init prom_check_platform_support(void) +{ + struct platform_support supported = { + .hash_mmu = false, + .radix_mmu = false, + .radix_gtse = false + }; + int prop_len = prom_getproplen(prom.chosen, + "ibm,arch-vec-5-platform-support"); + if (prop_len > 1) { + int i; + u8 vec[prop_len]; + prom_debug("Found ibm,arch-vec-5-platform-support, len: %d\n", + prop_len); + prom_getprop(prom.chosen, "ibm,arch-vec-5-platform-support", + &vec, sizeof(vec)); + for (i = 0; i < prop_len; i += 2) { + prom_debug("%d: index = 0x%x val = 0x%x\n", i / 2 + , vec[i] + , vec[i + 1]); + prom_parse_platform_support(vec[i], vec[i + 1], + &supported); + } + } + + if (supported.radix_mmu && supported.radix_gtse) { + /* Radix preferred - but we require GTSE for now */ + prom_debug("Asking for radix with GTSE\n"); + ibm_architecture_vec.vec5.mmu = OV5_FEAT(OV5_MMU_RADIX); + ibm_architecture_vec.vec5.radix_ext = OV5_FEAT(OV5_RADIX_GTSE); + } else if (supported.hash_mmu) { + /* Default to hash mmu (if we can) */ + prom_debug("Asking for hash\n"); + ibm_architecture_vec.vec5.mmu = OV5_FEAT(OV5_MMU_HASH); + } else { + /* We're probably on a legacy hypervisor */ + prom_debug("Assuming legacy hash support\n"); + } +} static void __init prom_send_capabilities(void) { @@ -997,6 +1100,9 @@ static void __init prom_send_capabilities(void) prom_arg_t ret; u32 cores; + /* Check ibm,arch-vec-5-platform-support and fixup vec5 if required */ + prom_check_platform_support(); + root = call_prom("open", 1, 1, ADDR("/")); if (root != 0) { /* We need to tell the FW about the number of cores we support. @@ -2993,6 +3099,11 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, */ prom_check_initrd(r3, r4); + /* + * Do early parsing of command line + */ + early_cmdline_parse(); + #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) /* * On pSeries, inform the firmware about our capabilities @@ -3009,11 +3120,6 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, copy_and_flush(0, kbase, 0x100, 0); /* - * Do early parsing of command line - */ - early_cmdline_parse(); - - /* * Initialize memory management within prom_init */ prom_init_mem(); diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index adf2084f214b..9cfaa8b69b5f 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -408,7 +408,10 @@ static void init_cache_info(struct ppc_cache_info *info, u32 size, u32 lsize, info->line_size = lsize; info->block_size = bsize; info->log_block_size = __ilog2(bsize); - info->blocks_per_page = PAGE_SIZE / bsize; + if (bsize) + info->blocks_per_page = PAGE_SIZE / bsize; + else + info->blocks_per_page = 0; if (sets == 0) info->assoc = 0xffff; diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index f3158fb16de3..8c68145ba1bd 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -601,7 +601,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, hva, NULL, NULL); if (ptep) { pte = kvmppc_read_update_linux_pte(ptep, 1); - if (pte_write(pte)) + if (__pte_write(pte)) write_ok = 1; } local_irq_restore(flags); diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 6fca970373ee..ce6f2121fffe 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -256,7 +256,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, } pte = kvmppc_read_update_linux_pte(ptep, writing); if (pte_present(pte) && !pte_protnone(pte)) { - if (writing && !pte_write(pte)) + if (writing && !__pte_write(pte)) /* make the actual HPTE be read-only */ ptel = hpte_make_readonly(ptel); is_ci = pte_ci(pte); diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index 0e649d72fe8d..2b5e09020cfe 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -20,6 +20,7 @@ obj64-y += copypage_64.o copyuser_64.o usercopy_64.o mem_64.o hweight_64.o \ obj64-$(CONFIG_SMP) += locks.o obj64-$(CONFIG_ALTIVEC) += vmx-helper.o +obj64-$(CONFIG_KPROBES_SANITY_TEST) += test_emulate_step.o obj-y += checksum_$(BITS).o checksum_wrappers.o diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 846dba2c6360..9c542ec70c5b 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -1799,8 +1799,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) goto instr_done; case LARX: - if (regs->msr & MSR_LE) - return 0; if (op.ea & (size - 1)) break; /* can't handle misaligned */ if (!address_ok(regs, op.ea, size)) @@ -1823,8 +1821,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) goto ldst_done; case STCX: - if (regs->msr & MSR_LE) - return 0; if (op.ea & (size - 1)) break; /* can't handle misaligned */ if (!address_ok(regs, op.ea, size)) @@ -1849,8 +1845,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) goto ldst_done; case LOAD: - if (regs->msr & MSR_LE) - return 0; err = read_mem(®s->gpr[op.reg], op.ea, size, regs); if (!err) { if (op.type & SIGNEXT) @@ -1862,8 +1856,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) #ifdef CONFIG_PPC_FPU case LOAD_FP: - if (regs->msr & MSR_LE) - return 0; if (size == 4) err = do_fp_load(op.reg, do_lfs, op.ea, size, regs); else @@ -1872,15 +1864,11 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) #endif #ifdef CONFIG_ALTIVEC case LOAD_VMX: - if (regs->msr & MSR_LE) - return 0; err = do_vec_load(op.reg, do_lvx, op.ea & ~0xfUL, regs); goto ldst_done; #endif #ifdef CONFIG_VSX case LOAD_VSX: - if (regs->msr & MSR_LE) - return 0; err = do_vsx_load(op.reg, do_lxvd2x, op.ea, regs); goto ldst_done; #endif @@ -1903,8 +1891,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) goto instr_done; case STORE: - if (regs->msr & MSR_LE) - return 0; if ((op.type & UPDATE) && size == sizeof(long) && op.reg == 1 && op.update_reg == 1 && !(regs->msr & MSR_PR) && @@ -1917,8 +1903,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) #ifdef CONFIG_PPC_FPU case STORE_FP: - if (regs->msr & MSR_LE) - return 0; if (size == 4) err = do_fp_store(op.reg, do_stfs, op.ea, size, regs); else @@ -1927,15 +1911,11 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) #endif #ifdef CONFIG_ALTIVEC case STORE_VMX: - if (regs->msr & MSR_LE) - return 0; err = do_vec_store(op.reg, do_stvx, op.ea & ~0xfUL, regs); goto ldst_done; #endif #ifdef CONFIG_VSX case STORE_VSX: - if (regs->msr & MSR_LE) - return 0; err = do_vsx_store(op.reg, do_stxvd2x, op.ea, regs); goto ldst_done; #endif diff --git a/arch/powerpc/lib/test_emulate_step.c b/arch/powerpc/lib/test_emulate_step.c new file mode 100644 index 000000000000..2534c1447554 --- /dev/null +++ b/arch/powerpc/lib/test_emulate_step.c @@ -0,0 +1,434 @@ +/* + * Simple sanity test for emulate_step load/store instructions. + * + * Copyright IBM Corp. 2016 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#define pr_fmt(fmt) "emulate_step_test: " fmt + +#include <linux/ptrace.h> +#include <asm/sstep.h> +#include <asm/ppc-opcode.h> + +#define IMM_L(i) ((uintptr_t)(i) & 0xffff) + +/* + * Defined with TEST_ prefix so it does not conflict with other + * definitions. + */ +#define TEST_LD(r, base, i) (PPC_INST_LD | ___PPC_RT(r) | \ + ___PPC_RA(base) | IMM_L(i)) +#define TEST_LWZ(r, base, i) (PPC_INST_LWZ | ___PPC_RT(r) | \ + ___PPC_RA(base) | IMM_L(i)) +#define TEST_LWZX(t, a, b) (PPC_INST_LWZX | ___PPC_RT(t) | \ + ___PPC_RA(a) | ___PPC_RB(b)) +#define TEST_STD(r, base, i) (PPC_INST_STD | ___PPC_RS(r) | \ + ___PPC_RA(base) | ((i) & 0xfffc)) +#define TEST_LDARX(t, a, b, eh) (PPC_INST_LDARX | ___PPC_RT(t) | \ + ___PPC_RA(a) | ___PPC_RB(b) | \ + __PPC_EH(eh)) +#define TEST_STDCX(s, a, b) (PPC_INST_STDCX | ___PPC_RS(s) | \ + ___PPC_RA(a) | ___PPC_RB(b)) +#define TEST_LFSX(t, a, b) (PPC_INST_LFSX | ___PPC_RT(t) | \ + ___PPC_RA(a) | ___PPC_RB(b)) +#define TEST_STFSX(s, a, b) (PPC_INST_STFSX | ___PPC_RS(s) | \ + ___PPC_RA(a) | ___PPC_RB(b)) +#define TEST_LFDX(t, a, b) (PPC_INST_LFDX | ___PPC_RT(t) | \ + ___PPC_RA(a) | ___PPC_RB(b)) +#define TEST_STFDX(s, a, b) (PPC_INST_STFDX | ___PPC_RS(s) | \ + ___PPC_RA(a) | ___PPC_RB(b)) +#define TEST_LVX(t, a, b) (PPC_INST_LVX | ___PPC_RT(t) | \ + ___PPC_RA(a) | ___PPC_RB(b)) +#define TEST_STVX(s, a, b) (PPC_INST_STVX | ___PPC_RS(s) | \ + ___PPC_RA(a) | ___PPC_RB(b)) +#define TEST_LXVD2X(s, a, b) (PPC_INST_LXVD2X | VSX_XX1((s), R##a, R##b)) +#define TEST_STXVD2X(s, a, b) (PPC_INST_STXVD2X | VSX_XX1((s), R##a, R##b)) + + +static void __init init_pt_regs(struct pt_regs *regs) +{ + static unsigned long msr; + static bool msr_cached; + + memset(regs, 0, sizeof(struct pt_regs)); + + if (likely(msr_cached)) { + regs->msr = msr; + return; + } + + asm volatile("mfmsr %0" : "=r"(regs->msr)); + + regs->msr |= MSR_FP; + regs->msr |= MSR_VEC; + regs->msr |= MSR_VSX; + + msr = regs->msr; + msr_cached = true; +} + +static void __init show_result(char *ins, char *result) +{ + pr_info("%-14s : %s\n", ins, result); +} + +static void __init test_ld(void) +{ + struct pt_regs regs; + unsigned long a = 0x23; + int stepped = -1; + + init_pt_regs(®s); + regs.gpr[3] = (unsigned long) &a; + + /* ld r5, 0(r3) */ + stepped = emulate_step(®s, TEST_LD(5, 3, 0)); + + if (stepped == 1 && regs.gpr[5] == a) + show_result("ld", "PASS"); + else + show_result("ld", "FAIL"); +} + +static void __init test_lwz(void) +{ + struct pt_regs regs; + unsigned int a = 0x4545; + int stepped = -1; + + init_pt_regs(®s); + regs.gpr[3] = (unsigned long) &a; + + /* lwz r5, 0(r3) */ + stepped = emulate_step(®s, TEST_LWZ(5, 3, 0)); + + if (stepped == 1 && regs.gpr[5] == a) + show_result("lwz", "PASS"); + else + show_result("lwz", "FAIL"); +} + +static void __init test_lwzx(void) +{ + struct pt_regs regs; + unsigned int a[3] = {0x0, 0x0, 0x1234}; + int stepped = -1; + + init_pt_regs(®s); + regs.gpr[3] = (unsigned long) a; + regs.gpr[4] = 8; + regs.gpr[5] = 0x8765; + + /* lwzx r5, r3, r4 */ + stepped = emulate_step(®s, TEST_LWZX(5, 3, 4)); + if (stepped == 1 && regs.gpr[5] == a[2]) + show_result("lwzx", "PASS"); + else + show_result("lwzx", "FAIL"); +} + +static void __init test_std(void) +{ + struct pt_regs regs; + unsigned long a = 0x1234; + int stepped = -1; + + init_pt_regs(®s); + regs.gpr[3] = (unsigned long) &a; + regs.gpr[5] = 0x5678; + + /* std r5, 0(r3) */ + stepped = emulate_step(®s, TEST_STD(5, 3, 0)); + if (stepped == 1 || regs.gpr[5] == a) + show_result("std", "PASS"); + else + show_result("std", "FAIL"); +} + +static void __init test_ldarx_stdcx(void) +{ + struct pt_regs regs; + unsigned long a = 0x1234; + int stepped = -1; + unsigned long cr0_eq = 0x1 << 29; /* eq bit of CR0 */ + + init_pt_regs(®s); + asm volatile("mfcr %0" : "=r"(regs.ccr)); + + + /*** ldarx ***/ + + regs.gpr[3] = (unsigned long) &a; + regs.gpr[4] = 0; + regs.gpr[5] = 0x5678; + + /* ldarx r5, r3, r4, 0 */ + stepped = emulate_step(®s, TEST_LDARX(5, 3, 4, 0)); + + /* + * Don't touch 'a' here. Touching 'a' can do Load/store + * of 'a' which result in failure of subsequent stdcx. + * Instead, use hardcoded value for comparison. + */ + if (stepped <= 0 || regs.gpr[5] != 0x1234) { + show_result("ldarx / stdcx.", "FAIL (ldarx)"); + return; + } + + + /*** stdcx. ***/ + + regs.gpr[5] = 0x9ABC; + + /* stdcx. r5, r3, r4 */ + stepped = emulate_step(®s, TEST_STDCX(5, 3, 4)); + + /* + * Two possible scenarios that indicates successful emulation + * of stdcx. : + * 1. Reservation is active and store is performed. In this + * case cr0.eq bit will be set to 1. + * 2. Reservation is not active and store is not performed. + * In this case cr0.eq bit will be set to 0. + */ + if (stepped == 1 && ((regs.gpr[5] == a && (regs.ccr & cr0_eq)) + || (regs.gpr[5] != a && !(regs.ccr & cr0_eq)))) + show_result("ldarx / stdcx.", "PASS"); + else + show_result("ldarx / stdcx.", "FAIL (stdcx.)"); +} + +#ifdef CONFIG_PPC_FPU +static void __init test_lfsx_stfsx(void) +{ + struct pt_regs regs; + union { + float a; + int b; + } c; + int cached_b; + int stepped = -1; + + init_pt_regs(®s); + + + /*** lfsx ***/ + + c.a = 123.45; + cached_b = c.b; + + regs.gpr[3] = (unsigned long) &c.a; + regs.gpr[4] = 0; + + /* lfsx frt10, r3, r4 */ + stepped = emulate_step(®s, TEST_LFSX(10, 3, 4)); + + if (stepped == 1) + show_result("lfsx", "PASS"); + else + show_result("lfsx", "FAIL"); + + + /*** stfsx ***/ + + c.a = 678.91; + + /* stfsx frs10, r3, r4 */ + stepped = emulate_step(®s, TEST_STFSX(10, 3, 4)); + + if (stepped == 1 && c.b == cached_b) + show_result("stfsx", "PASS"); + else + show_result("stfsx", "FAIL"); +} + +static void __init test_lfdx_stfdx(void) +{ + struct pt_regs regs; + union { + double a; + long b; + } c; + long cached_b; + int stepped = -1; + + init_pt_regs(®s); + + + /*** lfdx ***/ + + c.a = 123456.78; + cached_b = c.b; + + regs.gpr[3] = (unsigned long) &c.a; + regs.gpr[4] = 0; + + /* lfdx frt10, r3, r4 */ + stepped = emulate_step(®s, TEST_LFDX(10, 3, 4)); + + if (stepped == 1) + show_result("lfdx", "PASS"); + else + show_result("lfdx", "FAIL"); + + + /*** stfdx ***/ + + c.a = 987654.32; + + /* stfdx frs10, r3, r4 */ + stepped = emulate_step(®s, TEST_STFDX(10, 3, 4)); + + if (stepped == 1 && c.b == cached_b) + show_result("stfdx", "PASS"); + else + show_result("stfdx", "FAIL"); +} +#else +static void __init test_lfsx_stfsx(void) +{ + show_result("lfsx", "SKIP (CONFIG_PPC_FPU is not set)"); + show_result("stfsx", "SKIP (CONFIG_PPC_FPU is not set)"); +} + +static void __init test_lfdx_stfdx(void) +{ + show_result("lfdx", "SKIP (CONFIG_PPC_FPU is not set)"); + show_result("stfdx", "SKIP (CONFIG_PPC_FPU is not set)"); +} +#endif /* CONFIG_PPC_FPU */ + +#ifdef CONFIG_ALTIVEC +static void __init test_lvx_stvx(void) +{ + struct pt_regs regs; + union { + vector128 a; + u32 b[4]; + } c; + u32 cached_b[4]; + int stepped = -1; + + init_pt_regs(®s); + + + /*** lvx ***/ + + cached_b[0] = c.b[0] = 923745; + cached_b[1] = c.b[1] = 2139478; + cached_b[2] = c.b[2] = 9012; + cached_b[3] = c.b[3] = 982134; + + regs.gpr[3] = (unsigned long) &c.a; + regs.gpr[4] = 0; + + /* lvx vrt10, r3, r4 */ + stepped = emulate_step(®s, TEST_LVX(10, 3, 4)); + + if (stepped == 1) + show_result("lvx", "PASS"); + else + show_result("lvx", "FAIL"); + + + /*** stvx ***/ + + c.b[0] = 4987513; + c.b[1] = 84313948; + c.b[2] = 71; + c.b[3] = 498532; + + /* stvx vrs10, r3, r4 */ + stepped = emulate_step(®s, TEST_STVX(10, 3, 4)); + + if (stepped == 1 && cached_b[0] == c.b[0] && cached_b[1] == c.b[1] && + cached_b[2] == c.b[2] && cached_b[3] == c.b[3]) + show_result("stvx", "PASS"); + else + show_result("stvx", "FAIL"); +} +#else +static void __init test_lvx_stvx(void) +{ + show_result("lvx", "SKIP (CONFIG_ALTIVEC is not set)"); + show_result("stvx", "SKIP (CONFIG_ALTIVEC is not set)"); +} +#endif /* CONFIG_ALTIVEC */ + +#ifdef CONFIG_VSX +static void __init test_lxvd2x_stxvd2x(void) +{ + struct pt_regs regs; + union { + vector128 a; + u32 b[4]; + } c; + u32 cached_b[4]; + int stepped = -1; + + init_pt_regs(®s); + + + /*** lxvd2x ***/ + + cached_b[0] = c.b[0] = 18233; + cached_b[1] = c.b[1] = 34863571; + cached_b[2] = c.b[2] = 834; + cached_b[3] = c.b[3] = 6138911; + + regs.gpr[3] = (unsigned long) &c.a; + regs.gpr[4] = 0; + + /* lxvd2x vsr39, r3, r4 */ + stepped = emulate_step(®s, TEST_LXVD2X(39, 3, 4)); + + if (stepped == 1) + show_result("lxvd2x", "PASS"); + else + show_result("lxvd2x", "FAIL"); + + + /*** stxvd2x ***/ + + c.b[0] = 21379463; + c.b[1] = 87; + c.b[2] = 374234; + c.b[3] = 4; + + /* stxvd2x vsr39, r3, r4 */ + stepped = emulate_step(®s, TEST_STXVD2X(39, 3, 4)); + + if (stepped == 1 && cached_b[0] == c.b[0] && cached_b[1] == c.b[1] && + cached_b[2] == c.b[2] && cached_b[3] == c.b[3]) + show_result("stxvd2x", "PASS"); + else + show_result("stxvd2x", "FAIL"); +} +#else +static void __init test_lxvd2x_stxvd2x(void) +{ + show_result("lxvd2x", "SKIP (CONFIG_VSX is not set)"); + show_result("stxvd2x", "SKIP (CONFIG_VSX is not set)"); +} +#endif /* CONFIG_VSX */ + +static int __init test_emulate_step(void) +{ + test_ld(); + test_lwz(); + test_lwzx(); + test_std(); + test_ldarx_stdcx(); + test_lfsx_stfsx(); + test_lfdx_stfdx(); + test_lvx_stvx(); + test_lxvd2x_stxvd2x(); + + return 0; +} +late_initcall(test_emulate_step); diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 6aa3b76aa0d6..9be992083d2a 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -356,18 +356,42 @@ static void early_check_vec5(void) unsigned long root, chosen; int size; const u8 *vec5; + u8 mmu_supported; root = of_get_flat_dt_root(); chosen = of_get_flat_dt_subnode_by_name(root, "chosen"); - if (chosen == -FDT_ERR_NOTFOUND) + if (chosen == -FDT_ERR_NOTFOUND) { + cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX; return; + } vec5 = of_get_flat_dt_prop(chosen, "ibm,architecture-vec-5", &size); - if (!vec5) + if (!vec5) { + cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX; return; - if (size <= OV5_INDX(OV5_MMU_RADIX_300) || - !(vec5[OV5_INDX(OV5_MMU_RADIX_300)] & OV5_FEAT(OV5_MMU_RADIX_300))) - /* Hypervisor doesn't support radix */ + } + if (size <= OV5_INDX(OV5_MMU_SUPPORT)) { cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX; + return; + } + + /* Check for supported configuration */ + mmu_supported = vec5[OV5_INDX(OV5_MMU_SUPPORT)] & + OV5_FEAT(OV5_MMU_SUPPORT); + if (mmu_supported == OV5_FEAT(OV5_MMU_RADIX)) { + /* Hypervisor only supports radix - check enabled && GTSE */ + if (!early_radix_enabled()) { + pr_warn("WARNING: Ignoring cmdline option disable_radix\n"); + } + if (!(vec5[OV5_INDX(OV5_RADIX_GTSE)] & + OV5_FEAT(OV5_RADIX_GTSE))) { + pr_warn("WARNING: Hypervisor doesn't support RADIX with GTSE\n"); + } + /* Do radix anyway - the hypervisor said we had to */ + cur_cpu_spec->mmu_features |= MMU_FTR_TYPE_RADIX; + } else if (mmu_supported == OV5_FEAT(OV5_MMU_HASH)) { + /* Hypervisor only supports hash - disable radix */ + cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX; + } } void __init mmu_early_init_devtree(void) @@ -383,7 +407,7 @@ void __init mmu_early_init_devtree(void) * even though the ibm,architecture-vec-5 property created by * skiboot doesn't have the necessary bits set. */ - if (early_radix_enabled() && !(mfmsr() & MSR_HV)) + if (!(mfmsr() & MSR_HV)) early_check_vec5(); if (early_radix_enabled()) diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index 2a590a98e652..c28165d8970b 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -186,6 +186,10 @@ static void __init radix_init_pgtable(void) */ register_process_table(__pa(process_tb), 0, PRTB_SIZE_SHIFT - 12); pr_info("Process table %p and radix root for kernel: %p\n", process_tb, init_mm.pgd); + asm volatile("ptesync" : : : "memory"); + asm volatile(PPC_TLBIE_5(%0,%1,2,1,1) : : + "r" (TLBIEL_INVAL_SET_LPID), "r" (0)); + asm volatile("eieio; tlbsync; ptesync" : : : "memory"); } static void __init radix_init_partition_table(void) diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 595dd718ea87..2ff13249f87a 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -188,6 +188,8 @@ static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp) sdsync = POWER7P_MMCRA_SDAR_VALID; else if (ppmu->flags & PPMU_ALT_SIPR) sdsync = POWER6_MMCRA_SDSYNC; + else if (ppmu->flags & PPMU_NO_SIAR) + sdsync = MMCRA_SAMPLE_ENABLE; else sdsync = MMCRA_SDSYNC; diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c index e79fb5fb817d..cd951fd231c4 100644 --- a/arch/powerpc/perf/isa207-common.c +++ b/arch/powerpc/perf/isa207-common.c @@ -65,12 +65,41 @@ static bool is_event_valid(u64 event) return !(event & ~valid_mask); } -static u64 mmcra_sdar_mode(u64 event) +static inline bool is_event_marked(u64 event) { - if (cpu_has_feature(CPU_FTR_ARCH_300) && !cpu_has_feature(CPU_FTR_POWER9_DD1)) - return p9_SDAR_MODE(event) << MMCRA_SDAR_MODE_SHIFT; + if (event & EVENT_IS_MARKED) + return true; + + return false; +} - return MMCRA_SDAR_MODE_TLB; +static void mmcra_sdar_mode(u64 event, unsigned long *mmcra) +{ + /* + * MMCRA[SDAR_MODE] specifices how the SDAR should be updated in + * continous sampling mode. + * + * Incase of Power8: + * MMCRA[SDAR_MODE] will be programmed as "0b01" for continous sampling + * mode and will be un-changed when setting MMCRA[63] (Marked events). + * + * Incase of Power9: + * Marked event: MMCRA[SDAR_MODE] will be set to 0b00 ('No Updates'), + * or if group already have any marked events. + * Non-Marked events (for DD1): + * MMCRA[SDAR_MODE] will be set to 0b01 + * For rest + * MMCRA[SDAR_MODE] will be set from event code. + */ + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + if (is_event_marked(event) || (*mmcra & MMCRA_SAMPLE_ENABLE)) + *mmcra &= MMCRA_SDAR_MODE_NO_UPDATES; + else if (!cpu_has_feature(CPU_FTR_POWER9_DD1)) + *mmcra |= p9_SDAR_MODE(event) << MMCRA_SDAR_MODE_SHIFT; + else if (cpu_has_feature(CPU_FTR_POWER9_DD1)) + *mmcra |= MMCRA_SDAR_MODE_TLB; + } else + *mmcra |= MMCRA_SDAR_MODE_TLB; } static u64 thresh_cmp_val(u64 value) @@ -180,7 +209,7 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp) value |= CNST_L1_QUAL_VAL(cache); } - if (event & EVENT_IS_MARKED) { + if (is_event_marked(event)) { mask |= CNST_SAMPLE_MASK; value |= CNST_SAMPLE_VAL(event >> EVENT_SAMPLE_SHIFT); } @@ -276,7 +305,7 @@ int isa207_compute_mmcr(u64 event[], int n_ev, } /* In continuous sampling mode, update SDAR on TLB miss */ - mmcra |= mmcra_sdar_mode(event[i]); + mmcra_sdar_mode(event[i], &mmcra); if (event[i] & EVENT_IS_L1) { cache = event[i] >> EVENT_CACHE_SEL_SHIFT; @@ -285,7 +314,7 @@ int isa207_compute_mmcr(u64 event[], int n_ev, mmcr1 |= (cache & 1) << MMCR1_DC_QUAL_SHIFT; } - if (event[i] & EVENT_IS_MARKED) { + if (is_event_marked(event[i])) { mmcra |= MMCRA_SAMPLE_ENABLE; val = (event[i] >> EVENT_SAMPLE_SHIFT) & EVENT_SAMPLE_MASK; diff --git a/arch/powerpc/perf/isa207-common.h b/arch/powerpc/perf/isa207-common.h index cf9bd8990159..899210f14ee4 100644 --- a/arch/powerpc/perf/isa207-common.h +++ b/arch/powerpc/perf/isa207-common.h @@ -246,6 +246,7 @@ #define MMCRA_THR_CMP_SHIFT 32 #define MMCRA_SDAR_MODE_SHIFT 42 #define MMCRA_SDAR_MODE_TLB (1ull << MMCRA_SDAR_MODE_SHIFT) +#define MMCRA_SDAR_MODE_NO_UPDATES ~(0x3ull << MMCRA_SDAR_MODE_SHIFT) #define MMCRA_IFM_SHIFT 30 /* MMCR1 Threshold Compare bit constant for power9 */ diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index 6693f75e93d1..da8a0f7a035c 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -39,8 +39,8 @@ opal_tracepoint_refcount: BEGIN_FTR_SECTION; \ b 1f; \ END_FTR_SECTION(0, 1); \ - ld r12,opal_tracepoint_refcount@toc(r2); \ - cmpdi r12,0; \ + ld r11,opal_tracepoint_refcount@toc(r2); \ + cmpdi r11,0; \ bne- LABEL; \ 1: diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 86d9fde93c17..e0f856bfbfe8 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -395,7 +395,6 @@ static int opal_recover_mce(struct pt_regs *regs, struct machine_check_event *evt) { int recovered = 0; - uint64_t ea = get_mce_fault_addr(evt); if (!(regs->msr & MSR_RI)) { /* If MSR_RI isn't set, we cannot recover */ @@ -404,26 +403,18 @@ static int opal_recover_mce(struct pt_regs *regs, } else if (evt->disposition == MCE_DISPOSITION_RECOVERED) { /* Platform corrected itself */ recovered = 1; - } else if (ea && !is_kernel_addr(ea)) { + } else if (evt->severity == MCE_SEV_FATAL) { + /* Fatal machine check */ + pr_err("Machine check interrupt is fatal\n"); + recovered = 0; + } else if ((evt->severity == MCE_SEV_ERROR_SYNC) && + (user_mode(regs) && !is_global_init(current))) { /* - * Faulting address is not in kernel text. We should be fine. - * We need to find which process uses this address. * For now, kill the task if we have received exception when * in userspace. * * TODO: Queue up this address for hwpoisioning later. */ - if (user_mode(regs) && !is_global_init(current)) { - _exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip); - recovered = 1; - } else - recovered = 0; - } else if (user_mode(regs) && !is_global_init(current) && - evt->severity == MCE_SEV_ERROR_SYNC) { - /* - * If we have received a synchronous error when in userspace - * kill the task. - */ _exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip); recovered = 1; } diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 6901a06da2f9..e36738291c32 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1775,17 +1775,20 @@ static u64 pnv_pci_ioda_dma_get_required_mask(struct pci_dev *pdev) } static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, - struct pci_bus *bus) + struct pci_bus *bus, + bool add_to_group) { struct pci_dev *dev; list_for_each_entry(dev, &bus->devices, bus_list) { set_iommu_table_base(&dev->dev, pe->table_group.tables[0]); set_dma_offset(&dev->dev, pe->tce_bypass_base); - iommu_add_device(&dev->dev); + if (add_to_group) + iommu_add_device(&dev->dev); if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate) - pnv_ioda_setup_bus_dma(pe, dev->subordinate); + pnv_ioda_setup_bus_dma(pe, dev->subordinate, + add_to_group); } } @@ -2191,7 +2194,7 @@ found: set_iommu_table_base(&pe->pdev->dev, tbl); iommu_add_device(&pe->pdev->dev); } else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) - pnv_ioda_setup_bus_dma(pe, pe->pbus); + pnv_ioda_setup_bus_dma(pe, pe->pbus, true); return; fail: @@ -2426,6 +2429,8 @@ static void pnv_ioda2_take_ownership(struct iommu_table_group *table_group) pnv_pci_ioda2_set_bypass(pe, false); pnv_pci_ioda2_unset_window(&pe->table_group, 0); + if (pe->pbus) + pnv_ioda_setup_bus_dma(pe, pe->pbus, false); pnv_ioda2_table_free(tbl); } @@ -2435,6 +2440,8 @@ static void pnv_ioda2_release_ownership(struct iommu_table_group *table_group) table_group); pnv_pci_ioda2_setup_default_config(pe); + if (pe->pbus) + pnv_ioda_setup_bus_dma(pe, pe->pbus, false); } static struct iommu_table_group_ops pnv_pci_ioda2_ops = { @@ -2624,6 +2631,9 @@ static long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset, level_shift = entries_shift + 3; level_shift = max_t(unsigned, level_shift, PAGE_SHIFT); + if ((level_shift - 3) * levels + page_shift >= 60) + return -EINVAL; + /* Allocate TCE table */ addr = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift, levels, tce_table_size, &offset, &total_allocated); @@ -2728,7 +2738,7 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, if (pe->flags & PNV_IODA_PE_DEV) iommu_add_device(&pe->pdev->dev); else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) - pnv_ioda_setup_bus_dma(pe, pe->pbus); + pnv_ioda_setup_bus_dma(pe, pe->pbus, true); } #ifdef CONFIG_PCI_MSI diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 251060cf1713..8b1fe895daa3 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -751,7 +751,9 @@ void __init hpte_init_pseries(void) mmu_hash_ops.flush_hash_range = pSeries_lpar_flush_hash_range; mmu_hash_ops.hpte_clear_all = pseries_hpte_clear_all; mmu_hash_ops.hugepage_invalidate = pSeries_lpar_hugepage_invalidate; - mmu_hash_ops.resize_hpt = pseries_lpar_resize_hpt; + + if (firmware_has_feature(FW_FEATURE_HPT_RESIZE)) + mmu_hash_ops.resize_hpt = pseries_lpar_resize_hpt; } void radix_init_pseries(void) diff --git a/arch/powerpc/purgatory/trampoline.S b/arch/powerpc/purgatory/trampoline.S index f9760ccf4032..3696ea6c4826 100644 --- a/arch/powerpc/purgatory/trampoline.S +++ b/arch/powerpc/purgatory/trampoline.S @@ -116,13 +116,13 @@ dt_offset: .data .balign 8 -.globl sha256_digest -sha256_digest: +.globl purgatory_sha256_digest +purgatory_sha256_digest: .skip 32 - .size sha256_digest, . - sha256_digest + .size purgatory_sha256_digest, . - purgatory_sha256_digest .balign 8 -.globl sha_regions -sha_regions: +.globl purgatory_sha_regions +purgatory_sha_regions: .skip 8 * 2 * 16 - .size sha_regions, . - sha_regions + .size purgatory_sha_regions, . - purgatory_sha_regions diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c index ada29eaed6e2..f523ac883150 100644 --- a/arch/powerpc/sysdev/axonram.c +++ b/arch/powerpc/sysdev/axonram.c @@ -274,7 +274,9 @@ failed: if (bank->disk->major > 0) unregister_blkdev(bank->disk->major, bank->disk->disk_name); - del_gendisk(bank->disk); + if (bank->disk->flags & GENHD_FL_UP) + del_gendisk(bank->disk); + put_disk(bank->disk); } device->dev.platform_data = NULL; if (bank->io_addr != 0) @@ -299,6 +301,7 @@ axon_ram_remove(struct platform_device *device) device_remove_file(&device->dev, &dev_attr_ecc); free_irq(bank->irq_id, device); del_gendisk(bank->disk); + put_disk(bank->disk); iounmap((void __iomem *) bank->io_addr); kfree(bank); diff --git a/arch/powerpc/sysdev/xics/icp-opal.c b/arch/powerpc/sysdev/xics/icp-opal.c index f9670eabfcfa..b53f80f0b4d8 100644 --- a/arch/powerpc/sysdev/xics/icp-opal.c +++ b/arch/powerpc/sysdev/xics/icp-opal.c @@ -91,6 +91,16 @@ static unsigned int icp_opal_get_irq(void) static void icp_opal_set_cpu_priority(unsigned char cppr) { + /* + * Here be dragons. The caller has asked to allow only IPI's and not + * external interrupts. But OPAL XIVE doesn't support that. So instead + * of allowing no interrupts allow all. That's still not right, but + * currently the only caller who does this is xics_migrate_irqs_away() + * and it works in that case. + */ + if (cppr >= DEFAULT_PRIORITY) + cppr = LOWEST_PRIORITY; + xics_set_base_cppr(cppr); opal_int_set_cppr(cppr); iosync(); diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c index 69d858e51ac7..23efe4e42172 100644 --- a/arch/powerpc/sysdev/xics/xics-common.c +++ b/arch/powerpc/sysdev/xics/xics-common.c @@ -20,6 +20,7 @@ #include <linux/of.h> #include <linux/slab.h> #include <linux/spinlock.h> +#include <linux/delay.h> #include <asm/prom.h> #include <asm/io.h> @@ -198,9 +199,6 @@ void xics_migrate_irqs_away(void) /* Remove ourselves from the global interrupt queue */ xics_set_cpu_giq(xics_default_distrib_server, 0); - /* Allow IPIs again... */ - icp_ops->set_priority(DEFAULT_PRIORITY); - for_each_irq_desc(virq, desc) { struct irq_chip *chip; long server; @@ -255,6 +253,19 @@ void xics_migrate_irqs_away(void) unlock: raw_spin_unlock_irqrestore(&desc->lock, flags); } + + /* Allow "sufficient" time to drop any inflight IRQ's */ + mdelay(5); + + /* + * Allow IPIs again. This is done at the very end, after migrating all + * interrupts, the expectation is that we'll only get woken up by an IPI + * interrupt beyond this point, but leave externals masked just to be + * safe. If we're using icp-opal this may actually allow all + * interrupts anyway, but that should be OK. + */ + icp_ops->set_priority(DEFAULT_PRIORITY); + } #endif /* CONFIG_HOTPLUG_CPU */ diff --git a/arch/s390/configs/default_defconfig b/arch/s390/configs/default_defconfig index 143b1e00b818..4b176fe83da4 100644 --- a/arch/s390/configs/default_defconfig +++ b/arch/s390/configs/default_defconfig @@ -609,7 +609,7 @@ CONFIG_SCHED_TRACER=y CONFIG_FTRACE_SYSCALLS=y CONFIG_STACK_TRACER=y CONFIG_BLK_DEV_IO_TRACE=y -CONFIG_UPROBE_EVENT=y +CONFIG_UPROBE_EVENTS=y CONFIG_FUNCTION_PROFILER=y CONFIG_HIST_TRIGGERS=y CONFIG_TRACE_ENUM_MAP_FILE=y diff --git a/arch/s390/configs/gcov_defconfig b/arch/s390/configs/gcov_defconfig index f05d2d6e1087..0de46cc397f6 100644 --- a/arch/s390/configs/gcov_defconfig +++ b/arch/s390/configs/gcov_defconfig @@ -560,7 +560,7 @@ CONFIG_SCHED_TRACER=y CONFIG_FTRACE_SYSCALLS=y CONFIG_STACK_TRACER=y CONFIG_BLK_DEV_IO_TRACE=y -CONFIG_UPROBE_EVENT=y +CONFIG_UPROBE_EVENTS=y CONFIG_FUNCTION_PROFILER=y CONFIG_HIST_TRIGGERS=y CONFIG_TRACE_ENUM_MAP_FILE=y diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig index 2358bf33c5ef..e167557b434c 100644 --- a/arch/s390/configs/performance_defconfig +++ b/arch/s390/configs/performance_defconfig @@ -558,7 +558,7 @@ CONFIG_SCHED_TRACER=y CONFIG_FTRACE_SYSCALLS=y CONFIG_STACK_TRACER=y CONFIG_BLK_DEV_IO_TRACE=y -CONFIG_UPROBE_EVENT=y +CONFIG_UPROBE_EVENTS=y CONFIG_FUNCTION_PROFILER=y CONFIG_HIST_TRIGGERS=y CONFIG_TRACE_ENUM_MAP_FILE=y diff --git a/arch/s390/crypto/paes_s390.c b/arch/s390/crypto/paes_s390.c index d69ea495c4d7..716b17238599 100644 --- a/arch/s390/crypto/paes_s390.c +++ b/arch/s390/crypto/paes_s390.c @@ -474,8 +474,11 @@ static int ctr_paes_crypt(struct blkcipher_desc *desc, unsigned long modifier, ret = blkcipher_walk_done(desc, walk, nbytes - n); } if (k < n) { - if (__ctr_paes_set_key(ctx) != 0) + if (__ctr_paes_set_key(ctx) != 0) { + if (locked) + spin_unlock(&ctrblk_lock); return blkcipher_walk_done(desc, walk, -EIO); + } } } if (locked) diff --git a/arch/s390/defconfig b/arch/s390/defconfig index 68bfd09f1b02..97189dbaf34b 100644 --- a/arch/s390/defconfig +++ b/arch/s390/defconfig @@ -179,7 +179,7 @@ CONFIG_FTRACE_SYSCALLS=y CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP=y CONFIG_STACK_TRACER=y CONFIG_BLK_DEV_IO_TRACE=y -CONFIG_UPROBE_EVENT=y +CONFIG_UPROBE_EVENTS=y CONFIG_FUNCTION_PROFILER=y CONFIG_TRACE_ENUM_MAP_FILE=y CONFIG_KPROBES_SANITY_TEST=y diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h index d1c407ddf703..9072bf63a846 100644 --- a/arch/s390/include/asm/cputime.h +++ b/arch/s390/include/asm/cputime.h @@ -8,31 +8,27 @@ #define _S390_CPUTIME_H #include <linux/types.h> -#include <asm/div64.h> +#include <asm/timex.h> #define CPUTIME_PER_USEC 4096ULL #define CPUTIME_PER_SEC (CPUTIME_PER_USEC * USEC_PER_SEC) /* We want to use full resolution of the CPU timer: 2**-12 micro-seconds. */ -typedef unsigned long long __nocast cputime_t; -typedef unsigned long long __nocast cputime64_t; - #define cmpxchg_cputime(ptr, old, new) cmpxchg64(ptr, old, new) -static inline unsigned long __div(unsigned long long n, unsigned long base) -{ - return n / base; -} - /* - * Convert cputime to microseconds and back. + * Convert cputime to microseconds. */ -static inline unsigned int cputime_to_usecs(const cputime_t cputime) +static inline u64 cputime_to_usecs(const u64 cputime) { - return (__force unsigned long long) cputime >> 12; + return cputime >> 12; } +/* + * Convert cputime to nanoseconds. + */ +#define cputime_to_nsecs(cputime) tod_to_ns(cputime) u64 arch_cpu_idle_time(int cpu); diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 7ed1972b1920..93e37b12e882 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -24,6 +24,7 @@ * the S390 page table tree. */ #ifndef __ASSEMBLY__ +#include <asm-generic/5level-fixup.h> #include <linux/sched.h> #include <linux/mm_types.h> #include <linux/page-flags.h> diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h index 354344dcc198..118535123f34 100644 --- a/arch/s390/include/asm/timex.h +++ b/arch/s390/include/asm/timex.h @@ -206,20 +206,16 @@ static inline unsigned long long get_tod_clock_monotonic(void) * ns = (todval * 125) >> 9; * * In order to avoid an overflow with the multiplication we can rewrite this. - * With a split todval == 2^32 * th + tl (th upper 32 bits, tl lower 32 bits) + * With a split todval == 2^9 * th + tl (th upper 55 bits, tl lower 9 bits) * we end up with * - * ns = ((2^32 * th + tl) * 125 ) >> 9; - * -> ns = (2^23 * th * 125) + ((tl * 125) >> 9); + * ns = ((2^9 * th + tl) * 125 ) >> 9; + * -> ns = (th * 125) + ((tl * 125) >> 9); * */ static inline unsigned long long tod_to_ns(unsigned long long todval) { - unsigned long long ns; - - ns = ((todval >> 32) << 23) * 125; - ns += ((todval & 0xffffffff) * 125) >> 9; - return ns; + return ((todval >> 9) * 125) + (((todval & 0x1ff) * 125) >> 9); } #endif diff --git a/arch/s390/include/uapi/asm/unistd.h b/arch/s390/include/uapi/asm/unistd.h index 4384bc797a54..152de9b796e1 100644 --- a/arch/s390/include/uapi/asm/unistd.h +++ b/arch/s390/include/uapi/asm/unistd.h @@ -313,7 +313,9 @@ #define __NR_copy_file_range 375 #define __NR_preadv2 376 #define __NR_pwritev2 377 -#define NR_syscalls 378 +/* Number 378 is reserved for guarded storage */ +#define __NR_statx 379 +#define NR_syscalls 380 /* * There are some system calls that are not present on 64 bit, some diff --git a/arch/s390/kernel/compat_wrapper.c b/arch/s390/kernel/compat_wrapper.c index ae2cda5eee5a..e89cc2e71db1 100644 --- a/arch/s390/kernel/compat_wrapper.c +++ b/arch/s390/kernel/compat_wrapper.c @@ -178,3 +178,4 @@ COMPAT_SYSCALL_WRAP3(getpeername, int, fd, struct sockaddr __user *, usockaddr, COMPAT_SYSCALL_WRAP6(sendto, int, fd, void __user *, buff, size_t, len, unsigned int, flags, struct sockaddr __user *, addr, int, addr_len); COMPAT_SYSCALL_WRAP3(mlock2, unsigned long, start, size_t, len, int, flags); COMPAT_SYSCALL_WRAP6(copy_file_range, int, fd_in, loff_t __user *, off_in, int, fd_out, loff_t __user *, off_out, size_t, len, unsigned int, flags); +COMPAT_SYSCALL_WRAP5(statx, int, dfd, const char __user *, path, unsigned, flags, unsigned, mask, struct statx __user *, buffer); diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index dff2152350a7..6a7d737d514c 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -490,7 +490,7 @@ ENTRY(pgm_check_handler) jnz .Lpgm_svcper # -> single stepped svc 1: CHECK_STACK STACK_SIZE,__LC_SAVE_AREA_SYNC aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) - j 3f + j 4f 2: UPDATE_VTIME %r14,%r15,__LC_SYNC_ENTER_TIMER lg %r15,__LC_KERNEL_STACK lgr %r14,%r12 @@ -499,8 +499,8 @@ ENTRY(pgm_check_handler) tm __LC_PGM_ILC+2,0x02 # check for transaction abort jz 3f mvc __THREAD_trap_tdb(256,%r14),0(%r13) -3: la %r11,STACK_FRAME_OVERHEAD(%r15) - stg %r10,__THREAD_last_break(%r14) +3: stg %r10,__THREAD_last_break(%r14) +4: la %r11,STACK_FRAME_OVERHEAD(%r15) stmg %r0,%r7,__PT_R0(%r11) mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC stmg %r8,%r9,__PT_PSW(%r11) @@ -509,14 +509,14 @@ ENTRY(pgm_check_handler) xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) stg %r10,__PT_ARGS(%r11) tm __LC_PGM_ILC+3,0x80 # check for per exception - jz 4f + jz 5f tmhh %r8,0x0001 # kernel per event ? jz .Lpgm_kprobe oi __PT_FLAGS+7(%r11),_PIF_PER_TRAP mvc __THREAD_per_address(8,%r14),__LC_PER_ADDRESS mvc __THREAD_per_cause(2,%r14),__LC_PER_CODE mvc __THREAD_per_paid(1,%r14),__LC_PER_ACCESS_ID -4: REENABLE_IRQS +5: REENABLE_IRQS xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) larl %r1,pgm_check_table llgh %r10,__PT_INT_CODE+2(%r11) diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index b67dafb7b7cf..e545ffe5155a 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -564,6 +564,8 @@ static struct kset *ipl_kset; static void __ipl_run(void *unused) { + if (MACHINE_IS_LPAR && ipl_info.type == IPL_TYPE_CCW) + diag308(DIAG308_LOAD_NORMAL_DUMP, NULL); diag308(DIAG308_LOAD_CLEAR, NULL); if (MACHINE_IS_VM) __cpcmd("IPL", NULL, 0, NULL); diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 20cd339e11ae..f29e41c5e2ec 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -124,7 +124,10 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long new_stackp, clear_tsk_thread_flag(p, TIF_SINGLE_STEP); /* Initialize per thread user and system timer values */ p->thread.user_timer = 0; + p->thread.guest_timer = 0; p->thread.system_timer = 0; + p->thread.hardirq_timer = 0; + p->thread.softirq_timer = 0; frame->sf.back_chain = 0; /* new return point is ret_from_fork */ diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index 9b59e6212d8f..2659b5cfeddb 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -386,3 +386,5 @@ SYSCALL(sys_mlock2,compat_sys_mlock2) SYSCALL(sys_copy_file_range,compat_sys_copy_file_range) /* 375 */ SYSCALL(sys_preadv2,compat_sys_preadv2) SYSCALL(sys_pwritev2,compat_sys_pwritev2) +NI_SYSCALL +SYSCALL(sys_statx,compat_sys_statx) diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index c14fc9029912..072d84ba42a3 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -111,7 +111,7 @@ static inline u64 scale_vtime(u64 vtime) } static void account_system_index_scaled(struct task_struct *p, - cputime_t cputime, cputime_t scaled, + u64 cputime, u64 scaled, enum cpu_usage_stat index) { p->stimescaled += cputime_to_nsecs(scaled); diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index b48dc5f1900b..463e5ef02304 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -608,12 +608,29 @@ void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep) bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr) { spinlock_t *ptl; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; pgste_t pgste; pte_t *ptep; pte_t pte; bool dirty; - ptep = get_locked_pte(mm, addr, &ptl); + pgd = pgd_offset(mm, addr); + pud = pud_alloc(mm, pgd, addr); + if (!pud) + return false; + pmd = pmd_alloc(mm, pud, addr); + if (!pmd) + return false; + /* We can't run guests backed by huge pages, but userspace can + * still set them up and then try to migrate them without any + * migration support. + */ + if (pmd_large(*pmd)) + return true; + + ptep = pte_alloc_map_lock(mm, pmd, addr, &ptl); if (unlikely(!ptep)) return false; diff --git a/arch/score/include/asm/pgtable.h b/arch/score/include/asm/pgtable.h index 0553e5cd5985..46ff8fd678a7 100644 --- a/arch/score/include/asm/pgtable.h +++ b/arch/score/include/asm/pgtable.h @@ -2,6 +2,7 @@ #define _ASM_SCORE_PGTABLE_H #include <linux/const.h> +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopmd.h> #include <asm/fixmap.h> diff --git a/arch/score/kernel/traps.c b/arch/score/kernel/traps.c index e359ec675869..12daf45369b4 100644 --- a/arch/score/kernel/traps.c +++ b/arch/score/kernel/traps.c @@ -24,6 +24,7 @@ */ #include <linux/extable.h> +#include <linux/ptrace.h> #include <linux/sched/mm.h> #include <linux/sched/signal.h> #include <linux/sched/debug.h> diff --git a/arch/score/mm/extable.c b/arch/score/mm/extable.c index ec871355fc2d..6736a3ad6286 100644 --- a/arch/score/mm/extable.c +++ b/arch/score/mm/extable.c @@ -24,6 +24,8 @@ */ #include <linux/extable.h> +#include <linux/ptrace.h> +#include <asm/extable.h> int fixup_exception(struct pt_regs *regs) { diff --git a/arch/sh/boards/mach-cayman/setup.c b/arch/sh/boards/mach-cayman/setup.c index 340fd40b381d..9c292c27e0d7 100644 --- a/arch/sh/boards/mach-cayman/setup.c +++ b/arch/sh/boards/mach-cayman/setup.c @@ -128,7 +128,6 @@ static int __init smsc_superio_setup(void) SMSC_SUPERIO_WRITE_INDEXED(1, SMSC_PRIMARY_INT_INDEX); SMSC_SUPERIO_WRITE_INDEXED(12, SMSC_SECONDARY_INT_INDEX); -#ifdef CONFIG_IDE /* * Only IDE1 exists on the Cayman */ @@ -158,7 +157,6 @@ static int __init smsc_superio_setup(void) SMSC_SUPERIO_WRITE_INDEXED(0x01, 0xc5); /* GP45 = IDE1_IRQ */ SMSC_SUPERIO_WRITE_INDEXED(0x00, 0xc6); /* GP46 = nIOROP */ SMSC_SUPERIO_WRITE_INDEXED(0x00, 0xc7); /* GP47 = nIOWOP */ -#endif /* Exit the configuration state */ outb(SMSC_EXIT_CONFIG_KEY, SMSC_CONFIG_PORT_ADDR); diff --git a/arch/sh/include/asm/pgtable-2level.h b/arch/sh/include/asm/pgtable-2level.h index 19bd89db17e7..f75cf4387257 100644 --- a/arch/sh/include/asm/pgtable-2level.h +++ b/arch/sh/include/asm/pgtable-2level.h @@ -1,6 +1,7 @@ #ifndef __ASM_SH_PGTABLE_2LEVEL_H #define __ASM_SH_PGTABLE_2LEVEL_H +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopmd.h> /* diff --git a/arch/sh/include/asm/pgtable-3level.h b/arch/sh/include/asm/pgtable-3level.h index 249a985d9648..9b1e776eca31 100644 --- a/arch/sh/include/asm/pgtable-3level.h +++ b/arch/sh/include/asm/pgtable-3level.h @@ -1,6 +1,7 @@ #ifndef __ASM_SH_PGTABLE_3LEVEL_H #define __ASM_SH_PGTABLE_3LEVEL_H +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopud.h> /* diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index 56e49c8f770d..8a598528ec1f 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -12,6 +12,7 @@ * the SpitFire page tables. */ +#include <asm-generic/5level-fixup.h> #include <linux/compiler.h> #include <linux/const.h> #include <asm/types.h> diff --git a/arch/tile/include/asm/pgtable_32.h b/arch/tile/include/asm/pgtable_32.h index d26a42279036..5f8c615cb5e9 100644 --- a/arch/tile/include/asm/pgtable_32.h +++ b/arch/tile/include/asm/pgtable_32.h @@ -74,6 +74,7 @@ extern unsigned long VMALLOC_RESERVE /* = CONFIG_VMALLOC_RESERVE */; #define MAXMEM (_VMALLOC_START - PAGE_OFFSET) /* We have no pmd or pud since we are strictly a two-level page table */ +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopmd.h> static inline int pud_huge_page(pud_t pud) { return 0; } diff --git a/arch/tile/include/asm/pgtable_64.h b/arch/tile/include/asm/pgtable_64.h index e96cec52f6d8..96fe58b45118 100644 --- a/arch/tile/include/asm/pgtable_64.h +++ b/arch/tile/include/asm/pgtable_64.h @@ -59,6 +59,7 @@ #ifndef __ASSEMBLY__ /* We have no pud since we are a three-level page table. */ +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopud.h> /* diff --git a/arch/um/include/asm/pgtable-2level.h b/arch/um/include/asm/pgtable-2level.h index cfbe59752469..179c0ea87a0c 100644 --- a/arch/um/include/asm/pgtable-2level.h +++ b/arch/um/include/asm/pgtable-2level.h @@ -8,6 +8,7 @@ #ifndef __UM_PGTABLE_2LEVEL_H #define __UM_PGTABLE_2LEVEL_H +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopmd.h> /* PGDIR_SHIFT determines what a third-level page table entry can map */ diff --git a/arch/um/include/asm/pgtable-3level.h b/arch/um/include/asm/pgtable-3level.h index bae8523a162f..c4d876dfb9ac 100644 --- a/arch/um/include/asm/pgtable-3level.h +++ b/arch/um/include/asm/pgtable-3level.h @@ -7,6 +7,7 @@ #ifndef __UM_PGTABLE_3LEVEL_H #define __UM_PGTABLE_3LEVEL_H +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopud.h> /* PGDIR_SHIFT determines what a third-level page table entry can map */ diff --git a/arch/unicore32/include/asm/pgtable.h b/arch/unicore32/include/asm/pgtable.h index 818d0f5598e3..a4f2bef37e70 100644 --- a/arch/unicore32/include/asm/pgtable.h +++ b/arch/unicore32/include/asm/pgtable.h @@ -12,6 +12,7 @@ #ifndef __UNICORE_PGTABLE_H__ #define __UNICORE_PGTABLE_H__ +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopmd.h> #include <asm/cpu-single.h> diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index 7ef4a099defc..6205d3b81e6d 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig @@ -176,6 +176,7 @@ CONFIG_E1000E=y CONFIG_SKY2=y CONFIG_FORCEDETH=y CONFIG_8139TOO=y +CONFIG_R8169=y CONFIG_FDDI=y CONFIG_INPUT_POLLDEV=y # CONFIG_INPUT_MOUSEDEV_PSAUX is not set diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index afb222b63cae..c84584bb9402 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -604,7 +604,7 @@ amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, int idx, return &amd_f15_PMC20; } case AMD_EVENT_NB: - /* moved to perf_event_amd_uncore.c */ + /* moved to uncore.c */ return &emptyconstraint; default: return &emptyconstraint; diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 349d4d17aa7f..2aa1ad194db2 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -2101,8 +2101,8 @@ static int x86_pmu_event_init(struct perf_event *event) static void refresh_pce(void *ignored) { - if (current->mm) - load_mm_cr4(current->mm); + if (current->active_mm) + load_mm_cr4(current->active_mm); } static void x86_pmu_event_mapped(struct perf_event *event) @@ -2110,6 +2110,18 @@ static void x86_pmu_event_mapped(struct perf_event *event) if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED)) return; + /* + * This function relies on not being called concurrently in two + * tasks in the same mm. Otherwise one task could observe + * perf_rdpmc_allowed > 1 and return all the way back to + * userspace with CR4.PCE clear while another task is still + * doing on_each_cpu_mask() to propagate CR4.PCE. + * + * For now, this can't happen because all callers hold mmap_sem + * for write. If this changes, we'll need a different solution. + */ + lockdep_assert_held_exclusive(¤t->mm->mmap_sem); + if (atomic_inc_return(¤t->mm->context.perf_rdpmc_allowed) == 1) on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1); } diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index aff4b5b69d40..238ae3248ba5 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -1,5 +1,5 @@ /* - * perf_event_intel_cstate.c: support cstate residency counters + * Support cstate residency counters * * Copyright (C) 2015, Intel Corp. * Author: Kan Liang (kan.liang@intel.com) diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c index 22054ca49026..9d05c7e67f60 100644 --- a/arch/x86/events/intel/rapl.c +++ b/arch/x86/events/intel/rapl.c @@ -1,5 +1,5 @@ /* - * perf_event_intel_rapl.c: support Intel RAPL energy consumption counters + * Support Intel RAPL energy consumption counters * Copyright (C) 2013 Google, Inc., Stephane Eranian * * Intel RAPL interface is specified in the IA-32 Manual Vol3b diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h index ad986c1e29bc..df5989f27b1b 100644 --- a/arch/x86/events/intel/uncore.h +++ b/arch/x86/events/intel/uncore.h @@ -360,7 +360,7 @@ extern struct list_head pci2phy_map_head; extern struct pci_extra_dev *uncore_extra_pci_dev; extern struct event_constraint uncore_constraint_empty; -/* perf_event_intel_uncore_snb.c */ +/* uncore_snb.c */ int snb_uncore_pci_init(void); int ivb_uncore_pci_init(void); int hsw_uncore_pci_init(void); @@ -371,7 +371,7 @@ void nhm_uncore_cpu_init(void); void skl_uncore_cpu_init(void); int snb_pci2phy_map_init(int devid); -/* perf_event_intel_uncore_snbep.c */ +/* uncore_snbep.c */ int snbep_uncore_pci_init(void); void snbep_uncore_cpu_init(void); int ivbep_uncore_pci_init(void); @@ -385,5 +385,5 @@ void knl_uncore_cpu_init(void); int skx_uncore_pci_init(void); void skx_uncore_cpu_init(void); -/* perf_event_intel_uncore_nhmex.c */ +/* uncore_nhmex.c */ void nhmex_uncore_cpu_init(void); diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index db64baf0e500..8bef70e7f3cc 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -158,13 +158,13 @@ void hyperv_init(void) clocksource_register_hz(&hyperv_cs_tsc, NSEC_PER_SEC/100); return; } +register_msr_cs: #endif /* * For 32 bit guests just use the MSR based mechanism for reading * the partition counter. */ -register_msr_cs: hyperv_cs = &hyperv_cs_msr; if (ms_hyperv.features & HV_X64_MSR_TIME_REF_COUNT_AVAILABLE) clocksource_register_hz(&hyperv_cs_msr, NSEC_PER_SEC/100); diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 4e7772387c6e..b04bb6dfed7f 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -289,7 +289,8 @@ #define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */ #define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */ #define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */ -#define X86_FEATURE_RDPID (16*32+ 22) /* RDPID instruction */ +#define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */ +#define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */ /* AMD-defined CPU features, CPUID level 0x80000007 (ebx), word 17 */ #define X86_FEATURE_OVERFLOW_RECOV (17*32+0) /* MCA overflow recovery support */ diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h index 72277b1028a5..50d35e3185f5 100644 --- a/arch/x86/include/asm/pgtable-3level.h +++ b/arch/x86/include/asm/pgtable-3level.h @@ -121,12 +121,9 @@ static inline void native_pmd_clear(pmd_t *pmd) *(tmp + 1) = 0; } -#if !defined(CONFIG_SMP) || (defined(CONFIG_HIGHMEM64G) && \ - defined(CONFIG_PARAVIRT)) static inline void native_pud_clear(pud_t *pudp) { } -#endif static inline void pud_clear(pud_t *pudp) { diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 1cfb36b8c024..585ee0d42d18 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -62,7 +62,7 @@ extern struct mm_struct *pgd_page_get_mm(struct page *page); # define set_pud(pudp, pud) native_set_pud(pudp, pud) #endif -#ifndef __PAGETABLE_PMD_FOLDED +#ifndef __PAGETABLE_PUD_FOLDED #define pud_clear(pud) native_pud_clear(pud) #endif diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 8b4de22d6429..62484333673d 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -273,6 +273,8 @@ static inline pgdval_t pgd_flags(pgd_t pgd) } #if CONFIG_PGTABLE_LEVELS > 3 +#include <asm-generic/5level-fixup.h> + typedef struct { pudval_t pud; } pud_t; static inline pud_t native_make_pud(pmdval_t val) @@ -285,6 +287,7 @@ static inline pudval_t native_pud_val(pud_t pud) return pud.pud; } #else +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopud.h> static inline pudval_t native_pud_val(pud_t pud) @@ -306,6 +309,7 @@ static inline pmdval_t native_pmd_val(pmd_t pmd) return pmd.pmd; } #else +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopmd.h> static inline pmdval_t native_pmd_val(pmd_t pmd) diff --git a/arch/x86/include/asm/pkeys.h b/arch/x86/include/asm/pkeys.h index 34684adb6899..b3b09b98896d 100644 --- a/arch/x86/include/asm/pkeys.h +++ b/arch/x86/include/asm/pkeys.h @@ -46,6 +46,15 @@ extern int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey, static inline bool mm_pkey_is_allocated(struct mm_struct *mm, int pkey) { + /* + * "Allocated" pkeys are those that have been returned + * from pkey_alloc(). pkey 0 is special, and never + * returned from pkey_alloc(). + */ + if (pkey <= 0) + return false; + if (pkey >= arch_max_pkey()) + return false; return mm_pkey_allocation_map(mm) & (1U << pkey); } @@ -82,12 +91,6 @@ int mm_pkey_alloc(struct mm_struct *mm) static inline int mm_pkey_free(struct mm_struct *mm, int pkey) { - /* - * pkey 0 is special, always allocated and can never - * be freed. - */ - if (!pkey) - return -EINVAL; if (!mm_pkey_is_allocated(mm, pkey)) return -EINVAL; diff --git a/arch/x86/include/asm/purgatory.h b/arch/x86/include/asm/purgatory.h new file mode 100644 index 000000000000..d7da2729903d --- /dev/null +++ b/arch/x86/include/asm/purgatory.h @@ -0,0 +1,20 @@ +#ifndef _ASM_X86_PURGATORY_H +#define _ASM_X86_PURGATORY_H + +#ifndef __ASSEMBLY__ +#include <linux/purgatory.h> + +extern void purgatory(void); +/* + * These forward declarations serve two purposes: + * + * 1) Make sparse happy when checking arch/purgatory + * 2) Document that these are required to be global so the symbol + * lookup in kexec works + */ +extern unsigned long purgatory_backup_dest; +extern unsigned long purgatory_backup_src; +extern unsigned long purgatory_backup_sz; +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_PURGATORY_H */ diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 6fa85944af83..fc5abff9b7fd 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -188,7 +188,7 @@ static inline void __native_flush_tlb_single(unsigned long addr) static inline void __flush_tlb_all(void) { - if (static_cpu_has(X86_FEATURE_PGE)) + if (boot_cpu_has(X86_FEATURE_PGE)) __flush_tlb_global(); else __flush_tlb(); diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h index 5138dacf8bb8..07244ea16765 100644 --- a/arch/x86/include/uapi/asm/bootparam.h +++ b/arch/x86/include/uapi/asm/bootparam.h @@ -58,7 +58,7 @@ struct setup_header { __u32 header; __u16 version; __u32 realmode_swtch; - __u16 start_sys; + __u16 start_sys_seg; __u16 kernel_version; __u8 type_of_loader; __u8 loadflags; diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index ae32838cac5f..b2879cc23db4 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -179,10 +179,15 @@ static int acpi_register_lapic(int id, u32 acpiid, u8 enabled) return -EINVAL; } + if (!enabled) { + ++disabled_cpus; + return -EINVAL; + } + if (boot_cpu_physical_apicid != -1U) ver = boot_cpu_apic_version; - cpu = __generic_processor_info(id, ver, enabled); + cpu = generic_processor_info(id, ver); if (cpu >= 0) early_per_cpu(x86_cpu_to_acpiid, cpu) = acpiid; @@ -710,7 +715,7 @@ static void __init acpi_set_irq_model_ioapic(void) #ifdef CONFIG_ACPI_HOTPLUG_CPU #include <acpi/processor.h> -int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid) +static int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid) { #ifdef CONFIG_ACPI_NUMA int nid; diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 4261b3282ad9..8ccb7ef512e0 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1610,24 +1610,15 @@ static inline void try_to_enable_x2apic(int remap_mode) { } static inline void __x2apic_enable(void) { } #endif /* !CONFIG_X86_X2APIC */ -static int __init try_to_enable_IR(void) -{ -#ifdef CONFIG_X86_IO_APIC - if (!x2apic_enabled() && skip_ioapic_setup) { - pr_info("Not enabling interrupt remapping due to skipped IO-APIC setup\n"); - return -1; - } -#endif - return irq_remapping_enable(); -} - void __init enable_IR_x2apic(void) { unsigned long flags; int ret, ir_stat; - if (skip_ioapic_setup) + if (skip_ioapic_setup) { + pr_info("Not enabling interrupt remapping due to skipped IO-APIC setup\n"); return; + } ir_stat = irq_remapping_prepare(); if (ir_stat < 0 && !x2apic_supported()) @@ -1645,7 +1636,7 @@ void __init enable_IR_x2apic(void) /* If irq_remapping_prepare() succeeded, try to enable it */ if (ir_stat >= 0) - ir_stat = try_to_enable_IR(); + ir_stat = irq_remapping_enable(); /* ir_stat contains the remap mode or an error code */ try_to_enable_x2apic(ir_stat); @@ -2062,17 +2053,17 @@ static int allocate_logical_cpuid(int apicid) /* Allocate a new cpuid. */ if (nr_logical_cpuids >= nr_cpu_ids) { - WARN_ONCE(1, "Only %d processors supported." + WARN_ONCE(1, "APIC: NR_CPUS/possible_cpus limit of %i reached. " "Processor %d/0x%x and the rest are ignored.\n", - nr_cpu_ids - 1, nr_logical_cpuids, apicid); - return -1; + nr_cpu_ids, nr_logical_cpuids, apicid); + return -EINVAL; } cpuid_to_apicid[nr_logical_cpuids] = apicid; return nr_logical_cpuids++; } -int __generic_processor_info(int apicid, int version, bool enabled) +int generic_processor_info(int apicid, int version) { int cpu, max = nr_cpu_ids; bool boot_cpu_detected = physid_isset(boot_cpu_physical_apicid, @@ -2130,11 +2121,9 @@ int __generic_processor_info(int apicid, int version, bool enabled) if (num_processors >= nr_cpu_ids) { int thiscpu = max + disabled_cpus; - if (enabled) { - pr_warning("APIC: NR_CPUS/possible_cpus limit of %i " - "reached. Processor %d/0x%x ignored.\n", - max, thiscpu, apicid); - } + pr_warning("APIC: NR_CPUS/possible_cpus limit of %i " + "reached. Processor %d/0x%x ignored.\n", + max, thiscpu, apicid); disabled_cpus++; return -EINVAL; @@ -2186,23 +2175,13 @@ int __generic_processor_info(int apicid, int version, bool enabled) apic->x86_32_early_logical_apicid(cpu); #endif set_cpu_possible(cpu, true); - - if (enabled) { - num_processors++; - physid_set(apicid, phys_cpu_present_map); - set_cpu_present(cpu, true); - } else { - disabled_cpus++; - } + physid_set(apicid, phys_cpu_present_map); + set_cpu_present(cpu, true); + num_processors++; return cpu; } -int generic_processor_info(int apicid, int version) -{ - return __generic_processor_info(apicid, version, true); -} - int hard_smp_processor_id(void) { return read_apic_id(); diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 35a5d5dca2fa..c36140d788fe 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -556,10 +556,6 @@ static void early_init_amd(struct cpuinfo_x86 *c) if (c->x86_power & (1 << 8)) { set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); - if (check_tsc_unstable()) - clear_sched_clock_stable(); - } else { - clear_sched_clock_stable(); } /* Bit 12 of 8000_0007 edx is accumulated power mechanism. */ diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index adc0ebd8bed0..43955ee6715b 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c @@ -105,8 +105,6 @@ static void early_init_centaur(struct cpuinfo_x86 *c) #ifdef CONFIG_X86_64 set_cpu_cap(c, X86_FEATURE_SYSENTER32); #endif - - clear_sched_clock_stable(); } static void init_centaur(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index b11b38c3b0bd..58094a1f9e9d 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -88,7 +88,6 @@ static void default_init(struct cpuinfo_x86 *c) strcpy(c->x86_model_id, "386"); } #endif - clear_sched_clock_stable(); } static const struct cpu_dev default_cpu = { @@ -1077,8 +1076,6 @@ static void identify_cpu(struct cpuinfo_x86 *c) */ if (this_cpu->c_init) this_cpu->c_init(c); - else - clear_sched_clock_stable(); /* Disable the PN if appropriate */ squash_the_stupid_serial_number(c); diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index 0a3bc19de017..a70fd61095f8 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -185,7 +185,6 @@ static void early_init_cyrix(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_CYRIX_ARR); break; } - clear_sched_clock_stable(); } static void init_cyrix(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index fe0a615a051b..063197771b8d 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -162,10 +162,6 @@ static void early_init_intel(struct cpuinfo_x86 *c) if (c->x86_power & (1 << 8)) { set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); - if (check_tsc_unstable()) - clear_sched_clock_stable(); - } else { - clear_sched_clock_stable(); } /* Penwell and Cloverview have the TSC which doesn't sleep on S3 */ diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c index 0bbe0f3a039f..9ac2a5cdd9c2 100644 --- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c +++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c @@ -28,7 +28,6 @@ #include <linux/sched/signal.h> #include <linux/sched/task.h> #include <linux/slab.h> -#include <linux/cpu.h> #include <linux/task_work.h> #include <uapi/linux/magic.h> @@ -728,7 +727,7 @@ void rdtgroup_kn_unlock(struct kernfs_node *kn) if (atomic_dec_and_test(&rdtgrp->waitcount) && (rdtgrp->flags & RDT_DELETED)) { kernfs_unbreak_active_protection(kn); - kernfs_put(kn); + kernfs_put(rdtgrp->kn); kfree(rdtgrp); } else { kernfs_unbreak_active_protection(kn); diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c index 8457b4978668..d77d07ab310b 100644 --- a/arch/x86/kernel/cpu/transmeta.c +++ b/arch/x86/kernel/cpu/transmeta.c @@ -16,8 +16,6 @@ static void early_init_transmeta(struct cpuinfo_x86 *c) if (xlvl >= 0x80860001) c->x86_capability[CPUID_8086_0001_EDX] = cpuid_edx(0x80860001); } - - clear_sched_clock_stable(); } static void init_transmeta(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index 891f4dad7b2c..22403a28caf5 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c @@ -30,7 +30,6 @@ #include <asm/hypervisor.h> #include <asm/timer.h> #include <asm/apic.h> -#include <asm/timer.h> #undef pr_fmt #define pr_fmt(fmt) "vmware: " fmt diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 8639bb2ae058..8f3d9cf26ff9 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -535,7 +535,7 @@ static void run_sync(void) { int enable_irqs = irqs_disabled(); - /* We may be called with interrupts disbled (on bootup). */ + /* We may be called with interrupts disabled (on bootup). */ if (enable_irqs) local_irq_enable(); on_each_cpu(do_sync_core, NULL, 1); diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 54a2372f5dbb..b5785c197e53 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -4,6 +4,7 @@ * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE */ +#define DISABLE_BRANCH_PROFILING #include <linux/init.h> #include <linux/linkage.h> #include <linux/types.h> diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index dc6ba5bda9fc..89ff7af2de50 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -354,7 +354,7 @@ static int hpet_resume(struct clock_event_device *evt, int timer) irq_domain_deactivate_irq(irq_get_irq_data(hdev->irq)); irq_domain_activate_irq(irq_get_irq_data(hdev->irq)); - disable_irq(hdev->irq); + disable_hardirq(hdev->irq); irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu)); enable_irq(hdev->irq); } diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c index bdb83e431d89..38b64587b31b 100644 --- a/arch/x86/kernel/kdebugfs.c +++ b/arch/x86/kernel/kdebugfs.c @@ -167,7 +167,7 @@ static int __init boot_params_kdebugfs_init(void) struct dentry *dbp, *version, *data; int error = -ENOMEM; - dbp = debugfs_create_dir("boot_params", NULL); + dbp = debugfs_create_dir("boot_params", arch_debugfs_dir); if (!dbp) return -ENOMEM; diff --git a/arch/x86/kernel/kprobes/common.h b/arch/x86/kernel/kprobes/common.h index c6ee63f927ab..d688826e5736 100644 --- a/arch/x86/kernel/kprobes/common.h +++ b/arch/x86/kernel/kprobes/common.h @@ -67,7 +67,7 @@ #endif /* Ensure if the instruction can be boostable */ -extern int can_boost(kprobe_opcode_t *instruction); +extern int can_boost(kprobe_opcode_t *instruction, void *addr); /* Recover instruction if given address is probed */ extern unsigned long recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr); diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 6384eb754a58..993fa4fe4f68 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -167,12 +167,12 @@ NOKPROBE_SYMBOL(skip_prefixes); * Returns non-zero if opcode is boostable. * RIP relative instructions are adjusted at copying time in 64 bits mode */ -int can_boost(kprobe_opcode_t *opcodes) +int can_boost(kprobe_opcode_t *opcodes, void *addr) { kprobe_opcode_t opcode; kprobe_opcode_t *orig_opcodes = opcodes; - if (search_exception_tables((unsigned long)opcodes)) + if (search_exception_tables((unsigned long)addr)) return 0; /* Page fault may occur on this address. */ retry: @@ -417,7 +417,7 @@ static int arch_copy_kprobe(struct kprobe *p) * __copy_instruction can modify the displacement of the instruction, * but it doesn't affect boostable check. */ - if (can_boost(p->ainsn.insn)) + if (can_boost(p->ainsn.insn, p->addr)) p->ainsn.boostable = 0; else p->ainsn.boostable = -1; diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index 3d1bee9d6a72..3e7c6e5a08ff 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -178,7 +178,7 @@ static int copy_optimized_instructions(u8 *dest, u8 *src) while (len < RELATIVEJUMP_SIZE) { ret = __copy_instruction(dest + len, src + len); - if (!ret || !can_boost(dest + len)) + if (!ret || !can_boost(dest + len, src + len)) return -EINVAL; len += ret; } diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 307b1f4543de..857cdbd02867 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -194,19 +194,22 @@ static int arch_update_purgatory(struct kimage *image) /* Setup copying of backup region */ if (image->type == KEXEC_TYPE_CRASH) { - ret = kexec_purgatory_get_set_symbol(image, "backup_dest", + ret = kexec_purgatory_get_set_symbol(image, + "purgatory_backup_dest", &image->arch.backup_load_addr, sizeof(image->arch.backup_load_addr), 0); if (ret) return ret; - ret = kexec_purgatory_get_set_symbol(image, "backup_src", + ret = kexec_purgatory_get_set_symbol(image, + "purgatory_backup_src", &image->arch.backup_src_start, sizeof(image->arch.backup_src_start), 0); if (ret) return ret; - ret = kexec_purgatory_get_set_symbol(image, "backup_sz", + ret = kexec_purgatory_get_set_symbol(image, + "purgatory_backup_sz", &image->arch.backup_src_sz, sizeof(image->arch.backup_src_sz), 0); if (ret) diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index f088ea4c66e7..a723ae9440ab 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -166,11 +166,9 @@ int __register_nmi_handler(unsigned int type, struct nmiaction *action) spin_lock_irqsave(&desc->lock, flags); /* - * most handlers of type NMI_UNKNOWN never return because - * they just assume the NMI is theirs. Just a sanity check - * to manage expectations + * Indicate if there are multiple registrations on the + * internal NMI handler call chains (SERR and IO_CHECK). */ - WARN_ON_ONCE(type == NMI_UNKNOWN && !list_empty(&desc->head)); WARN_ON_ONCE(type == NMI_SERR && !list_empty(&desc->head)); WARN_ON_ONCE(type == NMI_IO_CHECK && !list_empty(&desc->head)); diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index e244c19a2451..067f9813fd2c 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -223,6 +223,22 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { DMI_MATCH(DMI_BOARD_NAME, "P4S800"), }, }, + { /* Handle problems with rebooting on ASUS EeeBook X205TA */ + .callback = set_acpi_reboot, + .ident = "ASUS EeeBook X205TA", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "X205TA"), + }, + }, + { /* Handle problems with rebooting on ASUS EeeBook X205TAW */ + .callback = set_acpi_reboot, + .ident = "ASUS EeeBook X205TAW", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "X205TAW"), + }, + }, /* Certec */ { /* Handle problems with rebooting on Certec BPC600 */ diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 46bcda4cb1c2..c73a7f9e881a 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -327,9 +327,16 @@ unsigned long long sched_clock(void) { return paravirt_sched_clock(); } + +static inline bool using_native_sched_clock(void) +{ + return pv_time_ops.sched_clock == native_sched_clock; +} #else unsigned long long sched_clock(void) __attribute__((alias("native_sched_clock"))); + +static inline bool using_native_sched_clock(void) { return true; } #endif int check_tsc_unstable(void) @@ -1112,8 +1119,10 @@ static void tsc_cs_mark_unstable(struct clocksource *cs) { if (tsc_unstable) return; + tsc_unstable = 1; - clear_sched_clock_stable(); + if (using_native_sched_clock()) + clear_sched_clock_stable(); disable_sched_clock_irqtime(); pr_info("Marking TSC unstable due to clocksource watchdog\n"); } @@ -1135,18 +1144,20 @@ static struct clocksource clocksource_tsc = { void mark_tsc_unstable(char *reason) { - if (!tsc_unstable) { - tsc_unstable = 1; + if (tsc_unstable) + return; + + tsc_unstable = 1; + if (using_native_sched_clock()) clear_sched_clock_stable(); - disable_sched_clock_irqtime(); - pr_info("Marking TSC unstable due to %s\n", reason); - /* Change only the rating, when not registered */ - if (clocksource_tsc.mult) - clocksource_mark_unstable(&clocksource_tsc); - else { - clocksource_tsc.flags |= CLOCK_SOURCE_UNSTABLE; - clocksource_tsc.rating = 0; - } + disable_sched_clock_irqtime(); + pr_info("Marking TSC unstable due to %s\n", reason); + /* Change only the rating, when not registered */ + if (clocksource_tsc.mult) { + clocksource_mark_unstable(&clocksource_tsc); + } else { + clocksource_tsc.flags |= CLOCK_SOURCE_UNSTABLE; + clocksource_tsc.rating = 0; } } @@ -1322,6 +1333,8 @@ static int __init init_tsc_clocksource(void) * the refined calibration and directly register it as a clocksource. */ if (boot_cpu_has(X86_FEATURE_TSC_KNOWN_FREQ)) { + if (boot_cpu_has(X86_FEATURE_ART)) + art_related_clocksource = &clocksource_tsc; clocksource_register_khz(&clocksource_tsc, tsc_khz); return 0; } diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c index 478d15dbaee4..08339262b666 100644 --- a/arch/x86/kernel/unwind_frame.c +++ b/arch/x86/kernel/unwind_frame.c @@ -82,19 +82,43 @@ static size_t regs_size(struct pt_regs *regs) return sizeof(*regs); } +#ifdef CONFIG_X86_32 +#define GCC_REALIGN_WORDS 3 +#else +#define GCC_REALIGN_WORDS 1 +#endif + static bool is_last_task_frame(struct unwind_state *state) { - unsigned long bp = (unsigned long)state->bp; - unsigned long regs = (unsigned long)task_pt_regs(state->task); + unsigned long *last_bp = (unsigned long *)task_pt_regs(state->task) - 2; + unsigned long *aligned_bp = last_bp - GCC_REALIGN_WORDS; /* * We have to check for the last task frame at two different locations * because gcc can occasionally decide to realign the stack pointer and - * change the offset of the stack frame by a word in the prologue of a - * function called by head/entry code. + * change the offset of the stack frame in the prologue of a function + * called by head/entry code. Examples: + * + * <start_secondary>: + * push %edi + * lea 0x8(%esp),%edi + * and $0xfffffff8,%esp + * pushl -0x4(%edi) + * push %ebp + * mov %esp,%ebp + * + * <x86_64_start_kernel>: + * lea 0x8(%rsp),%r10 + * and $0xfffffffffffffff0,%rsp + * pushq -0x8(%r10) + * push %rbp + * mov %rsp,%rbp + * + * Note that after aligning the stack, it pushes a duplicate copy of + * the return address before pushing the frame pointer. */ - return bp == regs - FRAME_HEADER_SIZE || - bp == regs - FRAME_HEADER_SIZE - sizeof(long); + return (state->bp == last_bp || + (state->bp == aligned_bp && *(aligned_bp+1) == *(last_bp+1))); } /* diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 283aa8601833..98e82ee1e699 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -7258,9 +7258,8 @@ static int handle_vmoff(struct kvm_vcpu *vcpu) static int handle_vmclear(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); + u32 zero = 0; gpa_t vmptr; - struct vmcs12 *vmcs12; - struct page *page; if (!nested_vmx_check_permission(vcpu)) return 1; @@ -7271,22 +7270,9 @@ static int handle_vmclear(struct kvm_vcpu *vcpu) if (vmptr == vmx->nested.current_vmptr) nested_release_vmcs12(vmx); - page = nested_get_page(vcpu, vmptr); - if (page == NULL) { - /* - * For accurate processor emulation, VMCLEAR beyond available - * physical memory should do nothing at all. However, it is - * possible that a nested vmx bug, not a guest hypervisor bug, - * resulted in this case, so let's shut down before doing any - * more damage: - */ - kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); - return 1; - } - vmcs12 = kmap(page); - vmcs12->launch_state = 0; - kunmap(page); - nested_release_page(page); + kvm_vcpu_write_guest(vcpu, + vmptr + offsetof(struct vmcs12, launch_state), + &zero, sizeof(zero)); nested_free_vmcs02(vmx, vmptr); @@ -9694,10 +9680,8 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, return false; page = nested_get_page(vcpu, vmcs12->msr_bitmap); - if (!page) { - WARN_ON(1); + if (!page) return false; - } msr_bitmap_l1 = (unsigned long *)kmap(page); memset(msr_bitmap_l0, 0xff, PAGE_SIZE); @@ -11121,8 +11105,10 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, */ static void vmx_leave_nested(struct kvm_vcpu *vcpu) { - if (is_guest_mode(vcpu)) + if (is_guest_mode(vcpu)) { + to_vmx(vcpu)->nested.nested_run_pending = 0; nested_vmx_vmexit(vcpu, -1, 0, 0); + } free_nested(to_vmx(vcpu)); } diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index 99c7805a9693..1f3b6ef105cd 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c @@ -106,32 +106,35 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, int write, struct page **pages, int *nr) { struct dev_pagemap *pgmap = NULL; - int nr_start = *nr; - pte_t *ptep; + int nr_start = *nr, ret = 0; + pte_t *ptep, *ptem; - ptep = pte_offset_map(&pmd, addr); + /* + * Keep the original mapped PTE value (ptem) around since we + * might increment ptep off the end of the page when finishing + * our loop iteration. + */ + ptem = ptep = pte_offset_map(&pmd, addr); do { pte_t pte = gup_get_pte(ptep); struct page *page; /* Similar to the PMD case, NUMA hinting must take slow path */ - if (pte_protnone(pte)) { - pte_unmap(ptep); - return 0; - } + if (pte_protnone(pte)) + break; + + if (!pte_allows_gup(pte_val(pte), write)) + break; if (pte_devmap(pte)) { pgmap = get_dev_pagemap(pte_pfn(pte), pgmap); if (unlikely(!pgmap)) { undo_dev_pagemap(nr, nr_start, pages); - pte_unmap(ptep); - return 0; + break; } - } else if (!pte_allows_gup(pte_val(pte), write) || - pte_special(pte)) { - pte_unmap(ptep); - return 0; - } + } else if (pte_special(pte)) + break; + VM_BUG_ON(!pfn_valid(pte_pfn(pte))); page = pte_page(pte); get_page(page); @@ -141,9 +144,11 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, (*nr)++; } while (ptep++, addr += PAGE_SIZE, addr != end); - pte_unmap(ptep - 1); + if (addr == end) + ret = 1; + pte_unmap(ptem); - return 1; + return ret; } static inline void get_head_page_multiple(struct page *page, int nr) diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 8d63d7a104c3..4c90cfdc128b 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -1,3 +1,4 @@ +#define DISABLE_BRANCH_PROFILING #define pr_fmt(fmt) "kasan: " fmt #include <linux/bootmem.h> #include <linux/kasan.h> diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c index 5126dfd52b18..cd44ae727df7 100644 --- a/arch/x86/mm/mpx.c +++ b/arch/x86/mm/mpx.c @@ -590,7 +590,7 @@ static unsigned long mpx_bd_entry_to_bt_addr(struct mm_struct *mm, * we might run off the end of the bounds table if we are on * a 64-bit kernel and try to get 8 bytes. */ -int get_user_bd_entry(struct mm_struct *mm, unsigned long *bd_entry_ret, +static int get_user_bd_entry(struct mm_struct *mm, unsigned long *bd_entry_ret, long __user *bd_entry_ptr) { u32 bd_entry_32; diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 0cb52ae0a8f0..190e718694b1 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -735,6 +735,15 @@ void pcibios_disable_device (struct pci_dev *dev) pcibios_disable_irq(dev); } +#ifdef CONFIG_ACPI_HOTPLUG_IOAPIC +void pcibios_release_device(struct pci_dev *dev) +{ + if (atomic_dec_return(&dev->enable_cnt) >= 0) + pcibios_disable_device(dev); + +} +#endif + int pci_ext_cfg_avail(void) { if (raw_pci_ext_ops) diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index e1fb269c87af..292ab0364a89 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c @@ -234,23 +234,14 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) return 1; for_each_pci_msi_entry(msidesc, dev) { - __pci_read_msi_msg(msidesc, &msg); - pirq = MSI_ADDR_EXT_DEST_ID(msg.address_hi) | - ((msg.address_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xff); - if (msg.data != XEN_PIRQ_MSI_DATA || - xen_irq_from_pirq(pirq) < 0) { - pirq = xen_allocate_pirq_msi(dev, msidesc); - if (pirq < 0) { - irq = -ENODEV; - goto error; - } - xen_msi_compose_msg(dev, pirq, &msg); - __pci_write_msi_msg(msidesc, &msg); - dev_dbg(&dev->dev, "xen: msi bound to pirq=%d\n", pirq); - } else { - dev_dbg(&dev->dev, - "xen: msi already bound to pirq=%d\n", pirq); + pirq = xen_allocate_pirq_msi(dev, msidesc); + if (pirq < 0) { + irq = -ENODEV; + goto error; } + xen_msi_compose_msg(dev, pirq, &msg); + __pci_write_msi_msg(msidesc, &msg); + dev_dbg(&dev->dev, "xen: msi bound to pirq=%d\n", pirq); irq = xen_bind_pirq_msi_to_irq(dev, msidesc, pirq, (type == PCI_CAP_ID_MSI) ? nvec : 1, (type == PCI_CAP_ID_MSIX) ? diff --git a/arch/x86/platform/intel-mid/device_libs/Makefile b/arch/x86/platform/intel-mid/device_libs/Makefile index a7dbec4dce27..3dbde04febdc 100644 --- a/arch/x86/platform/intel-mid/device_libs/Makefile +++ b/arch/x86/platform/intel-mid/device_libs/Makefile @@ -26,5 +26,6 @@ obj-$(subst m,y,$(CONFIG_GPIO_PCA953X)) += platform_pcal9555a.o obj-$(subst m,y,$(CONFIG_GPIO_PCA953X)) += platform_tca6416.o # MISC Devices obj-$(subst m,y,$(CONFIG_KEYBOARD_GPIO)) += platform_gpio_keys.o +obj-$(subst m,y,$(CONFIG_INTEL_MID_POWER_BUTTON)) += platform_mrfld_power_btn.o obj-$(subst m,y,$(CONFIG_RTC_DRV_CMOS)) += platform_mrfld_rtc.o obj-$(subst m,y,$(CONFIG_INTEL_MID_WATCHDOG)) += platform_mrfld_wdt.o diff --git a/arch/x86/platform/intel-mid/device_libs/platform_mrfld_power_btn.c b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_power_btn.c new file mode 100644 index 000000000000..a6c3705a28ad --- /dev/null +++ b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_power_btn.c @@ -0,0 +1,82 @@ +/* + * Intel Merrifield power button support + * + * (C) Copyright 2017 Intel Corporation + * + * Author: Andy Shevchenko <andriy.shevchenko@linux.intel.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ + +#include <linux/init.h> +#include <linux/ioport.h> +#include <linux/platform_device.h> +#include <linux/sfi.h> + +#include <asm/intel-mid.h> +#include <asm/intel_scu_ipc.h> + +static struct resource mrfld_power_btn_resources[] = { + { + .flags = IORESOURCE_IRQ, + }, +}; + +static struct platform_device mrfld_power_btn_dev = { + .name = "msic_power_btn", + .id = PLATFORM_DEVID_NONE, + .num_resources = ARRAY_SIZE(mrfld_power_btn_resources), + .resource = mrfld_power_btn_resources, +}; + +static int mrfld_power_btn_scu_status_change(struct notifier_block *nb, + unsigned long code, void *data) +{ + if (code == SCU_DOWN) { + platform_device_unregister(&mrfld_power_btn_dev); + return 0; + } + + return platform_device_register(&mrfld_power_btn_dev); +} + +static struct notifier_block mrfld_power_btn_scu_notifier = { + .notifier_call = mrfld_power_btn_scu_status_change, +}; + +static int __init register_mrfld_power_btn(void) +{ + if (intel_mid_identify_cpu() != INTEL_MID_CPU_CHIP_TANGIER) + return -ENODEV; + + /* + * We need to be sure that the SCU IPC is ready before + * PMIC power button device can be registered: + */ + intel_scu_notifier_add(&mrfld_power_btn_scu_notifier); + + return 0; +} +arch_initcall(register_mrfld_power_btn); + +static void __init *mrfld_power_btn_platform_data(void *info) +{ + struct resource *res = mrfld_power_btn_resources; + struct sfi_device_table_entry *pentry = info; + + res->start = res->end = pentry->irq; + return NULL; +} + +static const struct devs_id mrfld_power_btn_dev_id __initconst = { + .name = "bcove_power_btn", + .type = SFI_DEV_TYPE_IPC, + .delay = 1, + .msic = 1, + .get_platform_data = &mrfld_power_btn_platform_data, +}; + +sfi_device(mrfld_power_btn_dev_id); diff --git a/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c index 86edd1e941eb..9e304e2ea4f5 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c @@ -19,7 +19,7 @@ #include <asm/intel_scu_ipc.h> #include <asm/io_apic.h> -#define TANGIER_EXT_TIMER0_MSI 15 +#define TANGIER_EXT_TIMER0_MSI 12 static struct platform_device wdt_dev = { .name = "intel_mid_wdt", diff --git a/arch/x86/platform/intel-mid/mfld.c b/arch/x86/platform/intel-mid/mfld.c index e793fe509971..e42978d4deaf 100644 --- a/arch/x86/platform/intel-mid/mfld.c +++ b/arch/x86/platform/intel-mid/mfld.c @@ -17,16 +17,6 @@ #include "intel_mid_weak_decls.h" -static void penwell_arch_setup(void); -/* penwell arch ops */ -static struct intel_mid_ops penwell_ops = { - .arch_setup = penwell_arch_setup, -}; - -static void mfld_power_off(void) -{ -} - static unsigned long __init mfld_calibrate_tsc(void) { unsigned long fast_calibrate; @@ -63,9 +53,12 @@ static unsigned long __init mfld_calibrate_tsc(void) static void __init penwell_arch_setup(void) { x86_platform.calibrate_tsc = mfld_calibrate_tsc; - pm_power_off = mfld_power_off; } +static struct intel_mid_ops penwell_ops = { + .arch_setup = penwell_arch_setup, +}; + void *get_penwell_ops(void) { return &penwell_ops; diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index 766d4d3529a1..f25982cdff90 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c @@ -1847,7 +1847,6 @@ static void pq_init(int node, int pnode) ops.write_payload_first(pnode, first); ops.write_payload_last(pnode, last); - ops.write_g_sw_ack(pnode, 0xffffUL); /* in effect, all msg_type's are set to MSG_NOOP */ memset(pqp, 0, sizeof(struct bau_pq_entry) * DEST_Q_SIZE); diff --git a/arch/x86/purgatory/purgatory.c b/arch/x86/purgatory/purgatory.c index 25e068ba3382..470edad96bb9 100644 --- a/arch/x86/purgatory/purgatory.c +++ b/arch/x86/purgatory/purgatory.c @@ -10,21 +10,19 @@ * Version 2. See the file COPYING for more details. */ +#include <linux/bug.h> +#include <asm/purgatory.h> + #include "sha256.h" #include "../boot/string.h" -struct sha_region { - unsigned long start; - unsigned long len; -}; - -unsigned long backup_dest = 0; -unsigned long backup_src = 0; -unsigned long backup_sz = 0; +unsigned long purgatory_backup_dest __section(.kexec-purgatory); +unsigned long purgatory_backup_src __section(.kexec-purgatory); +unsigned long purgatory_backup_sz __section(.kexec-purgatory); -u8 sha256_digest[SHA256_DIGEST_SIZE] = { 0 }; +u8 purgatory_sha256_digest[SHA256_DIGEST_SIZE] __section(.kexec-purgatory); -struct sha_region sha_regions[16] = {}; +struct kexec_sha_region purgatory_sha_regions[KEXEC_SEGMENT_MAX] __section(.kexec-purgatory); /* * On x86, second kernel requries first 640K of memory to boot. Copy @@ -33,26 +31,28 @@ struct sha_region sha_regions[16] = {}; */ static int copy_backup_region(void) { - if (backup_dest) - memcpy((void *)backup_dest, (void *)backup_src, backup_sz); - + if (purgatory_backup_dest) { + memcpy((void *)purgatory_backup_dest, + (void *)purgatory_backup_src, purgatory_backup_sz); + } return 0; } -int verify_sha256_digest(void) +static int verify_sha256_digest(void) { - struct sha_region *ptr, *end; + struct kexec_sha_region *ptr, *end; u8 digest[SHA256_DIGEST_SIZE]; struct sha256_state sctx; sha256_init(&sctx); - end = &sha_regions[sizeof(sha_regions)/sizeof(sha_regions[0])]; - for (ptr = sha_regions; ptr < end; ptr++) + end = purgatory_sha_regions + ARRAY_SIZE(purgatory_sha_regions); + + for (ptr = purgatory_sha_regions; ptr < end; ptr++) sha256_update(&sctx, (uint8_t *)(ptr->start), ptr->len); sha256_final(&sctx, digest); - if (memcmp(digest, sha256_digest, sizeof(digest))) + if (memcmp(digest, purgatory_sha256_digest, sizeof(digest))) return 1; return 0; diff --git a/arch/x86/purgatory/setup-x86_64.S b/arch/x86/purgatory/setup-x86_64.S index fe3c91ba1bd0..dfae9b9e60b5 100644 --- a/arch/x86/purgatory/setup-x86_64.S +++ b/arch/x86/purgatory/setup-x86_64.S @@ -9,6 +9,7 @@ * This source code is licensed under the GNU General Public License, * Version 2. See the file COPYING for more details. */ +#include <asm/purgatory.h> .text .globl purgatory_start diff --git a/arch/x86/purgatory/sha256.h b/arch/x86/purgatory/sha256.h index bd15a4127735..2867d9825a57 100644 --- a/arch/x86/purgatory/sha256.h +++ b/arch/x86/purgatory/sha256.h @@ -10,7 +10,6 @@ #ifndef SHA256_H #define SHA256_H - #include <linux/types.h> #include <crypto/sha.h> diff --git a/arch/xtensa/include/asm/pgtable.h b/arch/xtensa/include/asm/pgtable.h index 8aa0e0d9cbb2..30dd5b2e4ad5 100644 --- a/arch/xtensa/include/asm/pgtable.h +++ b/arch/xtensa/include/asm/pgtable.h @@ -11,6 +11,7 @@ #ifndef _XTENSA_PGTABLE_H #define _XTENSA_PGTABLE_H +#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopmd.h> #include <asm/page.h> #include <asm/kmem_layout.h> |