summaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/arm64/Kconfig18
-rw-r--r--arch/arm64/include/asm/atomic_ll_sc.h2
-rw-r--r--arch/arm64/include/asm/atomic_lse.h2
-rw-r--r--arch/arm64/include/asm/cputype.h4
-rw-r--r--arch/arm64/include/asm/esr.h9
-rw-r--r--arch/arm64/include/asm/hugetlb.h9
-rw-r--r--arch/arm64/include/asm/kvm_arm.h15
-rw-r--r--arch/arm64/include/asm/kvm_emulate.h42
-rw-r--r--arch/arm64/include/asm/pgtable.h16
-rw-r--r--arch/arm64/include/asm/uprobes.h2
-rw-r--r--arch/arm64/kernel/cpu_errata.c7
-rw-r--r--arch/arm64/kernel/efi-rt-wrapper.S1
-rw-r--r--arch/arm64/kernel/elfcore.c61
-rw-r--r--arch/arm64/kernel/fpsimd.c2
-rw-r--r--arch/arm64/kernel/ptrace.c2
-rw-r--r--arch/arm64/kernel/signal.c9
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/fault.h2
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/switch.h2
-rw-r--r--arch/arm64/kvm/mmu.c21
-rw-r--r--arch/arm64/kvm/sys_regs.c2
-rw-r--r--arch/arm64/kvm/vgic/vgic-v3.c2
-rw-r--r--arch/arm64/mm/hugetlbpage.c21
-rw-r--r--arch/arm64/mm/mmu.c21
-rw-r--r--arch/arm64/tools/cpucaps1
-rw-r--r--arch/ia64/kernel/elfcore.c4
-rw-r--r--arch/loongarch/include/asm/ftrace.h2
-rw-r--r--arch/loongarch/include/asm/inst.h9
-rw-r--r--arch/loongarch/include/asm/unwind.h41
-rw-r--r--arch/loongarch/kernel/Makefile2
-rw-r--r--arch/loongarch/kernel/alternative.c6
-rw-r--r--arch/loongarch/kernel/cpu-probe.c2
-rw-r--r--arch/loongarch/kernel/genex.S3
-rw-r--r--arch/loongarch/kernel/inst.c45
-rw-r--r--arch/loongarch/kernel/process.c12
-rw-r--r--arch/loongarch/kernel/traps.c3
-rw-r--r--arch/loongarch/kernel/unwind.c32
-rw-r--r--arch/loongarch/kernel/unwind_guess.c49
-rw-r--r--arch/loongarch/kernel/unwind_prologue.c252
-rw-r--r--arch/loongarch/mm/tlb.c2
-rwxr-xr-xarch/powerpc/boot/wrapper4
-rw-r--r--arch/powerpc/include/asm/imc-pmu.h2
-rw-r--r--arch/powerpc/mm/book3s64/hash_utils.c2
-rw-r--r--arch/powerpc/perf/imc-pmu.c136
-rw-r--r--arch/s390/kernel/setup.c5
-rw-r--r--arch/x86/boot/bioscall.S4
-rw-r--r--arch/x86/include/asm/kvm_host.h1
-rw-r--r--arch/x86/kernel/cpu/resctrl/monitor.c49
-rw-r--r--arch/x86/kernel/cpu/resctrl/rdtgroup.c12
-rw-r--r--arch/x86/kvm/cpuid.c32
-rw-r--r--arch/x86/kvm/svm/nested.c12
-rw-r--r--arch/x86/kvm/xen.c90
-rw-r--r--arch/x86/mm/init.c4
-rw-r--r--arch/x86/mm/pat/memtype.c3
-rw-r--r--arch/x86/pci/mmconfig-shared.c44
-rw-r--r--arch/x86/um/elfcore.c4
55 files changed, 677 insertions, 464 deletions
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 03934808b2ed..c5ccca26a408 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -184,8 +184,6 @@ config ARM64
select HAVE_DEBUG_KMEMLEAK
select HAVE_DMA_CONTIGUOUS
select HAVE_DYNAMIC_FTRACE
- select HAVE_DYNAMIC_FTRACE_WITH_ARGS \
- if $(cc-option,-fpatchable-function-entry=2)
select FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY \
if DYNAMIC_FTRACE_WITH_ARGS
select HAVE_EFFICIENT_UNALIGNED_ACCESS
@@ -972,6 +970,22 @@ config ARM64_ERRATUM_2457168
If unsure, say Y.
+config ARM64_ERRATUM_2645198
+ bool "Cortex-A715: 2645198: Workaround possible [ESR|FAR]_ELx corruption"
+ default y
+ help
+ This option adds the workaround for ARM Cortex-A715 erratum 2645198.
+
+ If a Cortex-A715 cpu sees a page mapping permissions change from executable
+ to non-executable, it may corrupt the ESR_ELx and FAR_ELx registers on the
+ next instruction abort caused by permission fault.
+
+ Only user-space does executable to non-executable permission transition via
+ mprotect() system call. Workaround the problem by doing a break-before-make
+ TLB invalidation, for all changes to executable user space mappings.
+
+ If unsure, say Y.
+
config CAVIUM_ERRATUM_22375
bool "Cavium erratum 22375, 24313"
default y
diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h
index 0890e4f568fb..cbb3d961123b 100644
--- a/arch/arm64/include/asm/atomic_ll_sc.h
+++ b/arch/arm64/include/asm/atomic_ll_sc.h
@@ -315,7 +315,7 @@ __ll_sc__cmpxchg_double##name(unsigned long old1, \
" cbnz %w0, 1b\n" \
" " #mb "\n" \
"2:" \
- : "=&r" (tmp), "=&r" (ret), "+Q" (*(unsigned long *)ptr) \
+ : "=&r" (tmp), "=&r" (ret), "+Q" (*(__uint128_t *)ptr) \
: "r" (old1), "r" (old2), "r" (new1), "r" (new2) \
: cl); \
\
diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h
index 52075e93de6c..a94d6dacc029 100644
--- a/arch/arm64/include/asm/atomic_lse.h
+++ b/arch/arm64/include/asm/atomic_lse.h
@@ -311,7 +311,7 @@ __lse__cmpxchg_double##name(unsigned long old1, \
" eor %[old2], %[old2], %[oldval2]\n" \
" orr %[old1], %[old1], %[old2]" \
: [old1] "+&r" (x0), [old2] "+&r" (x1), \
- [v] "+Q" (*(unsigned long *)ptr) \
+ [v] "+Q" (*(__uint128_t *)ptr) \
: [new1] "r" (x2), [new2] "r" (x3), [ptr] "r" (x4), \
[oldval1] "r" (oldval1), [oldval2] "r" (oldval2) \
: cl); \
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 4e8b66c74ea2..683ca3af4084 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -124,6 +124,8 @@
#define APPLE_CPU_PART_M1_FIRESTORM_PRO 0x025
#define APPLE_CPU_PART_M1_ICESTORM_MAX 0x028
#define APPLE_CPU_PART_M1_FIRESTORM_MAX 0x029
+#define APPLE_CPU_PART_M2_BLIZZARD 0x032
+#define APPLE_CPU_PART_M2_AVALANCHE 0x033
#define AMPERE_CPU_PART_AMPERE1 0xAC3
@@ -177,6 +179,8 @@
#define MIDR_APPLE_M1_FIRESTORM_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM_PRO)
#define MIDR_APPLE_M1_ICESTORM_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM_MAX)
#define MIDR_APPLE_M1_FIRESTORM_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM_MAX)
+#define MIDR_APPLE_M2_BLIZZARD MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD)
+#define MIDR_APPLE_M2_AVALANCHE MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_AVALANCHE)
#define MIDR_AMPERE1 MIDR_CPU_MODEL(ARM_CPU_IMP_AMPERE, AMPERE_CPU_PART_AMPERE1)
/* Fujitsu Erratum 010001 affects A64FX 1.0 and 1.1, (v0r0 and v1r0) */
diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
index 15b34fbfca66..206de10524e3 100644
--- a/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@@ -114,6 +114,15 @@
#define ESR_ELx_FSC_ACCESS (0x08)
#define ESR_ELx_FSC_FAULT (0x04)
#define ESR_ELx_FSC_PERM (0x0C)
+#define ESR_ELx_FSC_SEA_TTW0 (0x14)
+#define ESR_ELx_FSC_SEA_TTW1 (0x15)
+#define ESR_ELx_FSC_SEA_TTW2 (0x16)
+#define ESR_ELx_FSC_SEA_TTW3 (0x17)
+#define ESR_ELx_FSC_SECC (0x18)
+#define ESR_ELx_FSC_SECC_TTW0 (0x1c)
+#define ESR_ELx_FSC_SECC_TTW1 (0x1d)
+#define ESR_ELx_FSC_SECC_TTW2 (0x1e)
+#define ESR_ELx_FSC_SECC_TTW3 (0x1f)
/* ISS field definitions for Data Aborts */
#define ESR_ELx_ISV_SHIFT (24)
diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h
index d20f5da2d76f..6a4a1ab8eb23 100644
--- a/arch/arm64/include/asm/hugetlb.h
+++ b/arch/arm64/include/asm/hugetlb.h
@@ -49,6 +49,15 @@ extern pte_t huge_ptep_get(pte_t *ptep);
void __init arm64_hugetlb_cma_reserve(void);
+#define huge_ptep_modify_prot_start huge_ptep_modify_prot_start
+extern pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep);
+
+#define huge_ptep_modify_prot_commit huge_ptep_modify_prot_commit
+extern void huge_ptep_modify_prot_commit(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep,
+ pte_t old_pte, pte_t new_pte);
+
#include <asm-generic/hugetlb.h>
#endif /* __ASM_HUGETLB_H */
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 0df3fc3a0173..26b0c97df986 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -319,21 +319,6 @@
BIT(18) | \
GENMASK(16, 15))
-/* For compatibility with fault code shared with 32-bit */
-#define FSC_FAULT ESR_ELx_FSC_FAULT
-#define FSC_ACCESS ESR_ELx_FSC_ACCESS
-#define FSC_PERM ESR_ELx_FSC_PERM
-#define FSC_SEA ESR_ELx_FSC_EXTABT
-#define FSC_SEA_TTW0 (0x14)
-#define FSC_SEA_TTW1 (0x15)
-#define FSC_SEA_TTW2 (0x16)
-#define FSC_SEA_TTW3 (0x17)
-#define FSC_SECC (0x18)
-#define FSC_SECC_TTW0 (0x1c)
-#define FSC_SECC_TTW1 (0x1d)
-#define FSC_SECC_TTW2 (0x1e)
-#define FSC_SECC_TTW3 (0x1f)
-
/* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */
#define HPFAR_MASK (~UL(0xf))
/*
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 9bdba47f7e14..193583df2d9c 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -349,16 +349,16 @@ static __always_inline u8 kvm_vcpu_trap_get_fault_level(const struct kvm_vcpu *v
static __always_inline bool kvm_vcpu_abt_issea(const struct kvm_vcpu *vcpu)
{
switch (kvm_vcpu_trap_get_fault(vcpu)) {
- case FSC_SEA:
- case FSC_SEA_TTW0:
- case FSC_SEA_TTW1:
- case FSC_SEA_TTW2:
- case FSC_SEA_TTW3:
- case FSC_SECC:
- case FSC_SECC_TTW0:
- case FSC_SECC_TTW1:
- case FSC_SECC_TTW2:
- case FSC_SECC_TTW3:
+ case ESR_ELx_FSC_EXTABT:
+ case ESR_ELx_FSC_SEA_TTW0:
+ case ESR_ELx_FSC_SEA_TTW1:
+ case ESR_ELx_FSC_SEA_TTW2:
+ case ESR_ELx_FSC_SEA_TTW3:
+ case ESR_ELx_FSC_SECC:
+ case ESR_ELx_FSC_SECC_TTW0:
+ case ESR_ELx_FSC_SECC_TTW1:
+ case ESR_ELx_FSC_SECC_TTW2:
+ case ESR_ELx_FSC_SECC_TTW3:
return true;
default:
return false;
@@ -373,8 +373,26 @@ static __always_inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu)
static inline bool kvm_is_write_fault(struct kvm_vcpu *vcpu)
{
- if (kvm_vcpu_abt_iss1tw(vcpu))
- return true;
+ if (kvm_vcpu_abt_iss1tw(vcpu)) {
+ /*
+ * Only a permission fault on a S1PTW should be
+ * considered as a write. Otherwise, page tables baked
+ * in a read-only memslot will result in an exception
+ * being delivered in the guest.
+ *
+ * The drawback is that we end-up faulting twice if the
+ * guest is using any of HW AF/DB: a translation fault
+ * to map the page containing the PT (read only at
+ * first), then a permission fault to allow the flags
+ * to be set.
+ */
+ switch (kvm_vcpu_trap_get_fault_type(vcpu)) {
+ case ESR_ELx_FSC_PERM:
+ return true;
+ default:
+ return false;
+ }
+ }
if (kvm_vcpu_trap_is_iabt(vcpu))
return false;
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index b4bbeed80fb6..65e78999c75d 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -681,7 +681,7 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
#define pud_leaf(pud) (pud_present(pud) && !pud_table(pud))
#define pud_valid(pud) pte_valid(pud_pte(pud))
#define pud_user(pud) pte_user(pud_pte(pud))
-
+#define pud_user_exec(pud) pte_user_exec(pud_pte(pud))
static inline void set_pud(pud_t *pudp, pud_t pud)
{
@@ -730,6 +730,7 @@ static inline pmd_t *pud_pgtable(pud_t pud)
#else
#define pud_page_paddr(pud) ({ BUILD_BUG(); 0; })
+#define pud_user_exec(pud) pud_user(pud) /* Always 0 with folding */
/* Match pmd_offset folding in <asm/generic/pgtable-nopmd.h> */
#define pmd_set_fixmap(addr) NULL
@@ -862,12 +863,12 @@ static inline bool pte_user_accessible_page(pte_t pte)
static inline bool pmd_user_accessible_page(pmd_t pmd)
{
- return pmd_leaf(pmd) && (pmd_user(pmd) || pmd_user_exec(pmd));
+ return pmd_leaf(pmd) && !pmd_present_invalid(pmd) && (pmd_user(pmd) || pmd_user_exec(pmd));
}
static inline bool pud_user_accessible_page(pud_t pud)
{
- return pud_leaf(pud) && pud_user(pud);
+ return pud_leaf(pud) && (pud_user(pud) || pud_user_exec(pud));
}
#endif
@@ -1093,6 +1094,15 @@ static inline bool pud_sect_supported(void)
}
+#define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
+#define ptep_modify_prot_start ptep_modify_prot_start
+extern pte_t ptep_modify_prot_start(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep);
+
+#define ptep_modify_prot_commit ptep_modify_prot_commit
+extern void ptep_modify_prot_commit(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep,
+ pte_t old_pte, pte_t new_pte);
#endif /* !__ASSEMBLY__ */
#endif /* __ASM_PGTABLE_H */
diff --git a/arch/arm64/include/asm/uprobes.h b/arch/arm64/include/asm/uprobes.h
index ba4bff5ca674..2b09495499c6 100644
--- a/arch/arm64/include/asm/uprobes.h
+++ b/arch/arm64/include/asm/uprobes.h
@@ -16,7 +16,7 @@
#define UPROBE_SWBP_INSN_SIZE AARCH64_INSN_SIZE
#define UPROBE_XOL_SLOT_BYTES MAX_UINSN_BYTES
-typedef u32 uprobe_opcode_t;
+typedef __le32 uprobe_opcode_t;
struct arch_uprobe_task {
};
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 89ac00084f38..307faa2b4395 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -661,6 +661,13 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
CAP_MIDR_RANGE_LIST(trbe_write_out_of_range_cpus),
},
#endif
+#ifdef CONFIG_ARM64_ERRATUM_2645198
+ {
+ .desc = "ARM erratum 2645198",
+ .capability = ARM64_WORKAROUND_2645198,
+ ERRATA_MIDR_ALL_VERSIONS(MIDR_CORTEX_A715)
+ },
+#endif
#ifdef CONFIG_ARM64_ERRATUM_2077057
{
.desc = "ARM erratum 2077057",
diff --git a/arch/arm64/kernel/efi-rt-wrapper.S b/arch/arm64/kernel/efi-rt-wrapper.S
index a00886410537..d872d18101d8 100644
--- a/arch/arm64/kernel/efi-rt-wrapper.S
+++ b/arch/arm64/kernel/efi-rt-wrapper.S
@@ -4,6 +4,7 @@
*/
#include <linux/linkage.h>
+#include <asm/assembler.h>
SYM_FUNC_START(__efi_rt_asm_wrapper)
stp x29, x30, [sp, #-112]!
diff --git a/arch/arm64/kernel/elfcore.c b/arch/arm64/kernel/elfcore.c
index 353009d7f307..2e94d20c4ac7 100644
--- a/arch/arm64/kernel/elfcore.c
+++ b/arch/arm64/kernel/elfcore.c
@@ -8,28 +8,27 @@
#include <asm/cpufeature.h>
#include <asm/mte.h>
-#define for_each_mte_vma(vmi, vma) \
+#define for_each_mte_vma(cprm, i, m) \
if (system_supports_mte()) \
- for_each_vma(vmi, vma) \
- if (vma->vm_flags & VM_MTE)
+ for (i = 0, m = cprm->vma_meta; \
+ i < cprm->vma_count; \
+ i++, m = cprm->vma_meta + i) \
+ if (m->flags & VM_MTE)
-static unsigned long mte_vma_tag_dump_size(struct vm_area_struct *vma)
+static unsigned long mte_vma_tag_dump_size(struct core_vma_metadata *m)
{
- if (vma->vm_flags & VM_DONTDUMP)
- return 0;
-
- return vma_pages(vma) * MTE_PAGE_TAG_STORAGE;
+ return (m->dump_size >> PAGE_SHIFT) * MTE_PAGE_TAG_STORAGE;
}
/* Derived from dump_user_range(); start/end must be page-aligned */
static int mte_dump_tag_range(struct coredump_params *cprm,
- unsigned long start, unsigned long end)
+ unsigned long start, unsigned long len)
{
int ret = 1;
unsigned long addr;
void *tags = NULL;
- for (addr = start; addr < end; addr += PAGE_SIZE) {
+ for (addr = start; addr < start + len; addr += PAGE_SIZE) {
struct page *page = get_dump_page(addr);
/*
@@ -65,7 +64,6 @@ static int mte_dump_tag_range(struct coredump_params *cprm,
mte_save_page_tags(page_address(page), tags);
put_page(page);
if (!dump_emit(cprm, tags, MTE_PAGE_TAG_STORAGE)) {
- mte_free_tag_storage(tags);
ret = 0;
break;
}
@@ -77,13 +75,13 @@ static int mte_dump_tag_range(struct coredump_params *cprm,
return ret;
}
-Elf_Half elf_core_extra_phdrs(void)
+Elf_Half elf_core_extra_phdrs(struct coredump_params *cprm)
{
- struct vm_area_struct *vma;
+ int i;
+ struct core_vma_metadata *m;
int vma_count = 0;
- VMA_ITERATOR(vmi, current->mm, 0);
- for_each_mte_vma(vmi, vma)
+ for_each_mte_vma(cprm, i, m)
vma_count++;
return vma_count;
@@ -91,18 +89,18 @@ Elf_Half elf_core_extra_phdrs(void)
int elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset)
{
- struct vm_area_struct *vma;
- VMA_ITERATOR(vmi, current->mm, 0);
+ int i;
+ struct core_vma_metadata *m;
- for_each_mte_vma(vmi, vma) {
+ for_each_mte_vma(cprm, i, m) {
struct elf_phdr phdr;
phdr.p_type = PT_AARCH64_MEMTAG_MTE;
phdr.p_offset = offset;
- phdr.p_vaddr = vma->vm_start;
+ phdr.p_vaddr = m->start;
phdr.p_paddr = 0;
- phdr.p_filesz = mte_vma_tag_dump_size(vma);
- phdr.p_memsz = vma->vm_end - vma->vm_start;
+ phdr.p_filesz = mte_vma_tag_dump_size(m);
+ phdr.p_memsz = m->end - m->start;
offset += phdr.p_filesz;
phdr.p_flags = 0;
phdr.p_align = 0;
@@ -114,28 +112,25 @@ int elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset)
return 1;
}
-size_t elf_core_extra_data_size(void)
+size_t elf_core_extra_data_size(struct coredump_params *cprm)
{
- struct vm_area_struct *vma;
+ int i;
+ struct core_vma_metadata *m;
size_t data_size = 0;
- VMA_ITERATOR(vmi, current->mm, 0);
- for_each_mte_vma(vmi, vma)
- data_size += mte_vma_tag_dump_size(vma);
+ for_each_mte_vma(cprm, i, m)
+ data_size += mte_vma_tag_dump_size(m);
return data_size;
}
int elf_core_write_extra_data(struct coredump_params *cprm)
{
- struct vm_area_struct *vma;
- VMA_ITERATOR(vmi, current->mm, 0);
-
- for_each_mte_vma(vmi, vma) {
- if (vma->vm_flags & VM_DONTDUMP)
- continue;
+ int i;
+ struct core_vma_metadata *m;
- if (!mte_dump_tag_range(cprm, vma->vm_start, vma->vm_end))
+ for_each_mte_vma(cprm, i, m) {
+ if (!mte_dump_tag_range(cprm, m->start, m->dump_size))
return 0;
}
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index dcc81e7200d4..b6ef1af0122e 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -385,7 +385,7 @@ static void task_fpsimd_load(void)
WARN_ON(!system_supports_fpsimd());
WARN_ON(!have_cpu_fpsimd_context());
- if (system_supports_sve()) {
+ if (system_supports_sve() || system_supports_sme()) {
switch (current->thread.fp_type) {
case FP_STATE_FPSIMD:
/* Stop tracking SVE for this task until next use. */
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 2686ab157601..0c321ad23cd3 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -1357,7 +1357,7 @@ enum aarch64_regset {
#ifdef CONFIG_ARM64_SVE
REGSET_SVE,
#endif
-#ifdef CONFIG_ARM64_SVE
+#ifdef CONFIG_ARM64_SME
REGSET_SSVE,
REGSET_ZA,
#endif
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index e0d09bf5b01b..be279fd48248 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -281,7 +281,12 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user)
vl = task_get_sme_vl(current);
} else {
- if (!system_supports_sve())
+ /*
+ * A SME only system use SVE for streaming mode so can
+ * have a SVE formatted context with a zero VL and no
+ * payload data.
+ */
+ if (!system_supports_sve() && !system_supports_sme())
return -EINVAL;
vl = task_get_sve_vl(current);
@@ -732,7 +737,7 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user,
return err;
}
- if (system_supports_sve()) {
+ if (system_supports_sve() || system_supports_sme()) {
unsigned int vq = 0;
if (add_all || test_thread_flag(TIF_SVE) ||
diff --git a/arch/arm64/kvm/hyp/include/hyp/fault.h b/arch/arm64/kvm/hyp/include/hyp/fault.h
index 1b8a2dcd712f..9ddcfe2c3e57 100644
--- a/arch/arm64/kvm/hyp/include/hyp/fault.h
+++ b/arch/arm64/kvm/hyp/include/hyp/fault.h
@@ -60,7 +60,7 @@ static inline bool __get_fault_info(u64 esr, struct kvm_vcpu_fault_info *fault)
*/
if (!(esr & ESR_ELx_S1PTW) &&
(cpus_have_final_cap(ARM64_WORKAROUND_834220) ||
- (esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) {
+ (esr & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_PERM)) {
if (!__translate_far_to_hpfar(far, &hpfar))
return false;
} else {
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index 3330d1b76bdd..07d37ff88a3f 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -367,7 +367,7 @@ static bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
if (static_branch_unlikely(&vgic_v2_cpuif_trap)) {
bool valid;
- valid = kvm_vcpu_trap_get_fault_type(vcpu) == FSC_FAULT &&
+ valid = kvm_vcpu_trap_get_fault_type(vcpu) == ESR_ELx_FSC_FAULT &&
kvm_vcpu_dabt_isvalid(vcpu) &&
!kvm_vcpu_abt_issea(vcpu) &&
!kvm_vcpu_abt_iss1tw(vcpu);
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 31d7fa4c7c14..a3ee3b605c9b 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -1212,7 +1212,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
exec_fault = kvm_vcpu_trap_is_exec_fault(vcpu);
VM_BUG_ON(write_fault && exec_fault);
- if (fault_status == FSC_PERM && !write_fault && !exec_fault) {
+ if (fault_status == ESR_ELx_FSC_PERM && !write_fault && !exec_fault) {
kvm_err("Unexpected L2 read permission error\n");
return -EFAULT;
}
@@ -1277,7 +1277,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
* only exception to this is when dirty logging is enabled at runtime
* and a write fault needs to collapse a block entry into a table.
*/
- if (fault_status != FSC_PERM || (logging_active && write_fault)) {
+ if (fault_status != ESR_ELx_FSC_PERM ||
+ (logging_active && write_fault)) {
ret = kvm_mmu_topup_memory_cache(memcache,
kvm_mmu_cache_min_pages(kvm));
if (ret)
@@ -1342,7 +1343,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
* backed by a THP and thus use block mapping if possible.
*/
if (vma_pagesize == PAGE_SIZE && !(force_pte || device)) {
- if (fault_status == FSC_PERM && fault_granule > PAGE_SIZE)
+ if (fault_status == ESR_ELx_FSC_PERM &&
+ fault_granule > PAGE_SIZE)
vma_pagesize = fault_granule;
else
vma_pagesize = transparent_hugepage_adjust(kvm, memslot,
@@ -1350,7 +1352,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
&fault_ipa);
}
- if (fault_status != FSC_PERM && !device && kvm_has_mte(kvm)) {
+ if (fault_status != ESR_ELx_FSC_PERM && !device && kvm_has_mte(kvm)) {
/* Check the VMM hasn't introduced a new disallowed VMA */
if (kvm_vma_mte_allowed(vma)) {
sanitise_mte_tags(kvm, pfn, vma_pagesize);
@@ -1376,7 +1378,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
* permissions only if vma_pagesize equals fault_granule. Otherwise,
* kvm_pgtable_stage2_map() should be called to change block size.
*/
- if (fault_status == FSC_PERM && vma_pagesize == fault_granule)
+ if (fault_status == ESR_ELx_FSC_PERM && vma_pagesize == fault_granule)
ret = kvm_pgtable_stage2_relax_perms(pgt, fault_ipa, prot);
else
ret = kvm_pgtable_stage2_map(pgt, fault_ipa, vma_pagesize,
@@ -1441,7 +1443,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
is_iabt = kvm_vcpu_trap_is_iabt(vcpu);
- if (fault_status == FSC_FAULT) {
+ if (fault_status == ESR_ELx_FSC_FAULT) {
/* Beyond sanitised PARange (which is the IPA limit) */
if (fault_ipa >= BIT_ULL(get_kvm_ipa_limit())) {
kvm_inject_size_fault(vcpu);
@@ -1476,8 +1478,9 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
kvm_vcpu_get_hfar(vcpu), fault_ipa);
/* Check the stage-2 fault is trans. fault or write fault */
- if (fault_status != FSC_FAULT && fault_status != FSC_PERM &&
- fault_status != FSC_ACCESS) {
+ if (fault_status != ESR_ELx_FSC_FAULT &&
+ fault_status != ESR_ELx_FSC_PERM &&
+ fault_status != ESR_ELx_FSC_ACCESS) {
kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n",
kvm_vcpu_trap_get_class(vcpu),
(unsigned long)kvm_vcpu_trap_get_fault(vcpu),
@@ -1539,7 +1542,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
/* Userspace should not be able to register out-of-bounds IPAs */
VM_BUG_ON(fault_ipa >= kvm_phys_size(vcpu->kvm));
- if (fault_status == FSC_ACCESS) {
+ if (fault_status == ESR_ELx_FSC_ACCESS) {
handle_access_fault(vcpu, fault_ipa);
ret = 1;
goto out_unlock;
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index d5ee52d6bf73..c6cbfe6b854b 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -646,7 +646,7 @@ static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
return;
/* Only preserve PMCR_EL0.N, and reset the rest to 0 */
- pmcr = read_sysreg(pmcr_el0) & ARMV8_PMU_PMCR_N_MASK;
+ pmcr = read_sysreg(pmcr_el0) & (ARMV8_PMU_PMCR_N_MASK << ARMV8_PMU_PMCR_N_SHIFT);
if (!kvm_supports_32bit_el0())
pmcr |= ARMV8_PMU_PMCR_LC;
diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c
index 826ff6f2a4e7..2074521d4a8c 100644
--- a/arch/arm64/kvm/vgic/vgic-v3.c
+++ b/arch/arm64/kvm/vgic/vgic-v3.c
@@ -616,6 +616,8 @@ static const struct midr_range broken_seis[] = {
MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM_PRO),
MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM_MAX),
MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM_MAX),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD),
+ MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE),
{},
};
diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index 35e9a468d13e..95364e8bdc19 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -559,3 +559,24 @@ bool __init arch_hugetlb_valid_size(unsigned long size)
{
return __hugetlb_valid_size(size);
}
+
+pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
+{
+ if (IS_ENABLED(CONFIG_ARM64_ERRATUM_2645198) &&
+ cpus_have_const_cap(ARM64_WORKAROUND_2645198)) {
+ /*
+ * Break-before-make (BBM) is required for all user space mappings
+ * when the permission changes from executable to non-executable
+ * in cases where cpu is affected with errata #2645198.
+ */
+ if (pte_user_exec(READ_ONCE(*ptep)))
+ return huge_ptep_clear_flush(vma, addr, ptep);
+ }
+ return huge_ptep_get_and_clear(vma->vm_mm, addr, ptep);
+}
+
+void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep,
+ pte_t old_pte, pte_t pte)
+{
+ set_huge_pte_at(vma->vm_mm, addr, ptep, pte);
+}
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 14c87e8d69d8..d77c9f56b7b4 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -1630,3 +1630,24 @@ static int __init prevent_bootmem_remove_init(void)
}
early_initcall(prevent_bootmem_remove_init);
#endif
+
+pte_t ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
+{
+ if (IS_ENABLED(CONFIG_ARM64_ERRATUM_2645198) &&
+ cpus_have_const_cap(ARM64_WORKAROUND_2645198)) {
+ /*
+ * Break-before-make (BBM) is required for all user space mappings
+ * when the permission changes from executable to non-executable
+ * in cases where cpu is affected with errata #2645198.
+ */
+ if (pte_user_exec(READ_ONCE(*ptep)))
+ return ptep_clear_flush(vma, addr, ptep);
+ }
+ return ptep_get_and_clear(vma->vm_mm, addr, ptep);
+}
+
+void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep,
+ pte_t old_pte, pte_t pte)
+{
+ set_pte_at(vma->vm_mm, addr, ptep, pte);
+}
diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
index a86ee376920a..dfeb2c51e257 100644
--- a/arch/arm64/tools/cpucaps
+++ b/arch/arm64/tools/cpucaps
@@ -71,6 +71,7 @@ WORKAROUND_2038923
WORKAROUND_2064142
WORKAROUND_2077057
WORKAROUND_2457168
+WORKAROUND_2645198
WORKAROUND_2658417
WORKAROUND_TRBE_OVERWRITE_FILL_MODE
WORKAROUND_TSB_FLUSH_FAILURE
diff --git a/arch/ia64/kernel/elfcore.c b/arch/ia64/kernel/elfcore.c
index 94680521fbf9..8895df121540 100644
--- a/arch/ia64/kernel/elfcore.c
+++ b/arch/ia64/kernel/elfcore.c
@@ -7,7 +7,7 @@
#include <asm/elf.h>
-Elf64_Half elf_core_extra_phdrs(void)
+Elf64_Half elf_core_extra_phdrs(struct coredump_params *cprm)
{
return GATE_EHDR->e_phnum;
}
@@ -60,7 +60,7 @@ int elf_core_write_extra_data(struct coredump_params *cprm)
return 1;
}
-size_t elf_core_extra_data_size(void)
+size_t elf_core_extra_data_size(struct coredump_params *cprm)
{
const struct elf_phdr *const gate_phdrs =
(const struct elf_phdr *) (GATE_ADDR + GATE_EHDR->e_phoff);
diff --git a/arch/loongarch/include/asm/ftrace.h b/arch/loongarch/include/asm/ftrace.h
index 90f9d3399b2a..3418d32d4fc7 100644
--- a/arch/loongarch/include/asm/ftrace.h
+++ b/arch/loongarch/include/asm/ftrace.h
@@ -10,8 +10,6 @@
#define FTRACE_REGS_PLT_IDX 1
#define NR_FTRACE_PLTS 2
-#define GRAPH_FAKE_OFFSET (sizeof(struct pt_regs) - offsetof(struct pt_regs, regs[1]))
-
#ifdef CONFIG_FUNCTION_TRACER
#define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */
diff --git a/arch/loongarch/include/asm/inst.h b/arch/loongarch/include/asm/inst.h
index c00e1512d4fa..7eedd83fd0d7 100644
--- a/arch/loongarch/include/asm/inst.h
+++ b/arch/loongarch/include/asm/inst.h
@@ -377,14 +377,6 @@ static inline bool unsigned_imm_check(unsigned long val, unsigned int bit)
return val < (1UL << bit);
}
-static inline unsigned long sign_extend(unsigned long val, unsigned int idx)
-{
- if (!is_imm_negative(val, idx + 1))
- return ((1UL << idx) - 1) & val;
- else
- return ~((1UL << idx) - 1) | val;
-}
-
#define DEF_EMIT_REG0I26_FORMAT(NAME, OP) \
static inline void emit_##NAME(union loongarch_instruction *insn, \
int offset) \
@@ -401,6 +393,7 @@ static inline void emit_##NAME(union loongarch_instruction *insn, \
}
DEF_EMIT_REG0I26_FORMAT(b, b_op)
+DEF_EMIT_REG0I26_FORMAT(bl, bl_op)
#define DEF_EMIT_REG1I20_FORMAT(NAME, OP) \
static inline void emit_##NAME(union loongarch_instruction *insn, \
diff --git a/arch/loongarch/include/asm/unwind.h b/arch/loongarch/include/asm/unwind.h
index f2b52b9ea93d..b9dce87afd2e 100644
--- a/arch/loongarch/include/asm/unwind.h
+++ b/arch/loongarch/include/asm/unwind.h
@@ -8,7 +8,9 @@
#define _ASM_UNWIND_H
#include <linux/sched.h>
+#include <linux/ftrace.h>
+#include <asm/ptrace.h>
#include <asm/stacktrace.h>
enum unwinder_type {
@@ -20,11 +22,13 @@ struct unwind_state {
char type; /* UNWINDER_XXX */
struct stack_info stack_info;
struct task_struct *task;
- bool first, error, is_ftrace;
+ bool first, error, reset;
int graph_idx;
unsigned long sp, pc, ra;
};
+bool default_next_frame(struct unwind_state *state);
+
void unwind_start(struct unwind_state *state,
struct task_struct *task, struct pt_regs *regs);
bool unwind_next_frame(struct unwind_state *state);
@@ -40,4 +44,39 @@ static inline bool unwind_error(struct unwind_state *state)
return state->error;
}
+#define GRAPH_FAKE_OFFSET (sizeof(struct pt_regs) - offsetof(struct pt_regs, regs[1]))
+
+static inline unsigned long unwind_graph_addr(struct unwind_state *state,
+ unsigned long pc, unsigned long cfa)
+{
+ return ftrace_graph_ret_addr(state->task, &state->graph_idx,
+ pc, (unsigned long *)(cfa - GRAPH_FAKE_OFFSET));
+}
+
+static __always_inline void __unwind_start(struct unwind_state *state,
+ struct task_struct *task, struct pt_regs *regs)
+{
+ memset(state, 0, sizeof(*state));
+ if (regs) {
+ state->sp = regs->regs[3];
+ state->pc = regs->csr_era;
+ state->ra = regs->regs[1];
+ } else if (task && task != current) {
+ state->sp = thread_saved_fp(task);
+ state->pc = thread_saved_ra(task);
+ state->ra = 0;
+ } else {
+ state->sp = (unsigned long)__builtin_frame_address(0);
+ state->pc = (unsigned long)__builtin_return_address(0);
+ state->ra = 0;
+ }
+ state->task = task;
+ get_stack_info(state->sp, state->task, &state->stack_info);
+ state->pc = unwind_graph_addr(state, state->pc, state->sp);
+}
+
+static __always_inline unsigned long __unwind_get_return_address(struct unwind_state *state)
+{
+ return unwind_done(state) ? 0 : state->pc;
+}
#endif /* _ASM_UNWIND_H */
diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
index fcaa024a685e..c8cfbd562921 100644
--- a/arch/loongarch/kernel/Makefile
+++ b/arch/loongarch/kernel/Makefile
@@ -8,7 +8,7 @@ extra-y := vmlinux.lds
obj-y += head.o cpu-probe.o cacheinfo.o env.o setup.o entry.o genex.o \
traps.o irq.o idle.o process.o dma.o mem.o io.o reset.o switch.o \
elf.o syscall.o signal.o time.o topology.o inst.o ptrace.o vdso.o \
- alternative.o unaligned.o
+ alternative.o unaligned.o unwind.o
obj-$(CONFIG_ACPI) += acpi.o
obj-$(CONFIG_EFI) += efi.o
diff --git a/arch/loongarch/kernel/alternative.c b/arch/loongarch/kernel/alternative.c
index c5aebeac960b..4ad13847e962 100644
--- a/arch/loongarch/kernel/alternative.c
+++ b/arch/loongarch/kernel/alternative.c
@@ -74,7 +74,7 @@ static void __init_or_module recompute_jump(union loongarch_instruction *buf,
switch (src->reg0i26_format.opcode) {
case b_op:
case bl_op:
- jump_addr = cur_pc + sign_extend((si_h << 16 | si_l) << 2, 27);
+ jump_addr = cur_pc + sign_extend64((si_h << 16 | si_l) << 2, 27);
if (in_alt_jump(jump_addr, start, end))
return;
offset = jump_addr - pc;
@@ -93,7 +93,7 @@ static void __init_or_module recompute_jump(union loongarch_instruction *buf,
fallthrough;
case beqz_op:
case bnez_op:
- jump_addr = cur_pc + sign_extend((si_h << 16 | si_l) << 2, 22);
+ jump_addr = cur_pc + sign_extend64((si_h << 16 | si_l) << 2, 22);
if (in_alt_jump(jump_addr, start, end))
return;
offset = jump_addr - pc;
@@ -112,7 +112,7 @@ static void __init_or_module recompute_jump(union loongarch_instruction *buf,
case bge_op:
case bltu_op:
case bgeu_op:
- jump_addr = cur_pc + sign_extend(si << 2, 17);
+ jump_addr = cur_pc + sign_extend64(si << 2, 17);
if (in_alt_jump(jump_addr, start, end))
return;
offset = jump_addr - pc;
diff --git a/arch/loongarch/kernel/cpu-probe.c b/arch/loongarch/kernel/cpu-probe.c
index 255a09876ef2..3a3fce2d7846 100644
--- a/arch/loongarch/kernel/cpu-probe.c
+++ b/arch/loongarch/kernel/cpu-probe.c
@@ -94,7 +94,7 @@ static void cpu_probe_common(struct cpuinfo_loongarch *c)
c->options = LOONGARCH_CPU_CPUCFG | LOONGARCH_CPU_CSR |
LOONGARCH_CPU_TLB | LOONGARCH_CPU_VINT | LOONGARCH_CPU_WATCH;
- elf_hwcap |= HWCAP_LOONGARCH_CRC32;
+ elf_hwcap = HWCAP_LOONGARCH_CPUCFG | HWCAP_LOONGARCH_CRC32;
config = read_cpucfg(LOONGARCH_CPUCFG1);
if (config & CPUCFG1_UAL) {
diff --git a/arch/loongarch/kernel/genex.S b/arch/loongarch/kernel/genex.S
index 75e5be807a0d..7e5c293ed89f 100644
--- a/arch/loongarch/kernel/genex.S
+++ b/arch/loongarch/kernel/genex.S
@@ -67,14 +67,17 @@ SYM_FUNC_END(except_vec_cex)
.macro BUILD_HANDLER exception handler prep
.align 5
SYM_FUNC_START(handle_\exception)
+ 666:
BACKUP_T0T1
SAVE_ALL
build_prep_\prep
move a0, sp
la.abs t0, do_\handler
jirl ra, t0, 0
+ 668:
RESTORE_ALL_AND_RET
SYM_FUNC_END(handle_\exception)
+ SYM_DATA(unwind_hint_\exception, .word 668b - 666b)
.endm
BUILD_HANDLER ade ade badv
diff --git a/arch/loongarch/kernel/inst.c b/arch/loongarch/kernel/inst.c
index 512579d79b22..badc59087042 100644
--- a/arch/loongarch/kernel/inst.c
+++ b/arch/loongarch/kernel/inst.c
@@ -58,7 +58,6 @@ u32 larch_insn_gen_nop(void)
u32 larch_insn_gen_b(unsigned long pc, unsigned long dest)
{
long offset = dest - pc;
- unsigned int immediate_l, immediate_h;
union loongarch_instruction insn;
if ((offset & 3) || offset < -SZ_128M || offset >= SZ_128M) {
@@ -66,15 +65,7 @@ u32 larch_insn_gen_b(unsigned long pc, unsigned long dest)
return INSN_BREAK;
}
- offset >>= 2;
-
- immediate_l = offset & 0xffff;
- offset >>= 16;
- immediate_h = offset & 0x3ff;
-
- insn.reg0i26_format.opcode = b_op;
- insn.reg0i26_format.immediate_l = immediate_l;
- insn.reg0i26_format.immediate_h = immediate_h;
+ emit_b(&insn, offset >> 2);
return insn.word;
}
@@ -82,7 +73,6 @@ u32 larch_insn_gen_b(unsigned long pc, unsigned long dest)
u32 larch_insn_gen_bl(unsigned long pc, unsigned long dest)
{
long offset = dest - pc;
- unsigned int immediate_l, immediate_h;
union loongarch_instruction insn;
if ((offset & 3) || offset < -SZ_128M || offset >= SZ_128M) {
@@ -90,15 +80,7 @@ u32 larch_insn_gen_bl(unsigned long pc, unsigned long dest)
return INSN_BREAK;
}
- offset >>= 2;
-
- immediate_l = offset & 0xffff;
- offset >>= 16;
- immediate_h = offset & 0x3ff;
-
- insn.reg0i26_format.opcode = bl_op;
- insn.reg0i26_format.immediate_l = immediate_l;
- insn.reg0i26_format.immediate_h = immediate_h;
+ emit_bl(&insn, offset >> 2);
return insn.word;
}
@@ -107,10 +89,7 @@ u32 larch_insn_gen_or(enum loongarch_gpr rd, enum loongarch_gpr rj, enum loongar
{
union loongarch_instruction insn;
- insn.reg3_format.opcode = or_op;
- insn.reg3_format.rd = rd;
- insn.reg3_format.rj = rj;
- insn.reg3_format.rk = rk;
+ emit_or(&insn, rd, rj, rk);
return insn.word;
}
@@ -124,9 +103,7 @@ u32 larch_insn_gen_lu12iw(enum loongarch_gpr rd, int imm)
{
union loongarch_instruction insn;
- insn.reg1i20_format.opcode = lu12iw_op;
- insn.reg1i20_format.rd = rd;
- insn.reg1i20_format.immediate = imm;
+ emit_lu12iw(&insn, rd, imm);
return insn.word;
}
@@ -135,9 +112,7 @@ u32 larch_insn_gen_lu32id(enum loongarch_gpr rd, int imm)
{
union loongarch_instruction insn;
- insn.reg1i20_format.opcode = lu32id_op;
- insn.reg1i20_format.rd = rd;
- insn.reg1i20_format.immediate = imm;
+ emit_lu32id(&insn, rd, imm);
return insn.word;
}
@@ -146,10 +121,7 @@ u32 larch_insn_gen_lu52id(enum loongarch_gpr rd, enum loongarch_gpr rj, int imm)
{
union loongarch_instruction insn;
- insn.reg2i12_format.opcode = lu52id_op;
- insn.reg2i12_format.rd = rd;
- insn.reg2i12_format.rj = rj;
- insn.reg2i12_format.immediate = imm;
+ emit_lu52id(&insn, rd, rj, imm);
return insn.word;
}
@@ -158,10 +130,7 @@ u32 larch_insn_gen_jirl(enum loongarch_gpr rd, enum loongarch_gpr rj, unsigned l
{
union loongarch_instruction insn;
- insn.reg2i16_format.opcode = jirl_op;
- insn.reg2i16_format.rd = rd;
- insn.reg2i16_format.rj = rj;
- insn.reg2i16_format.immediate = (dest - pc) >> 2;
+ emit_jirl(&insn, rj, rd, (dest - pc) >> 2);
return insn.word;
}
diff --git a/arch/loongarch/kernel/process.c b/arch/loongarch/kernel/process.c
index c583b1ef1f44..edfd220a3737 100644
--- a/arch/loongarch/kernel/process.c
+++ b/arch/loongarch/kernel/process.c
@@ -191,20 +191,14 @@ out:
unsigned long __get_wchan(struct task_struct *task)
{
- unsigned long pc;
+ unsigned long pc = 0;
struct unwind_state state;
if (!try_get_task_stack(task))
return 0;
- unwind_start(&state, task, NULL);
- state.sp = thread_saved_fp(task);
- get_stack_info(state.sp, state.task, &state.stack_info);
- state.pc = thread_saved_ra(task);
-#ifdef CONFIG_UNWINDER_PROLOGUE
- state.type = UNWINDER_PROLOGUE;
-#endif
- for (; !unwind_done(&state); unwind_next_frame(&state)) {
+ for (unwind_start(&state, task, NULL);
+ !unwind_done(&state); unwind_next_frame(&state)) {
pc = unwind_get_return_address(&state);
if (!pc)
break;
diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c
index 7ea62faeeadb..c38a146a973b 100644
--- a/arch/loongarch/kernel/traps.c
+++ b/arch/loongarch/kernel/traps.c
@@ -72,9 +72,6 @@ static void show_backtrace(struct task_struct *task, const struct pt_regs *regs,
if (!task)
task = current;
- if (user_mode(regs))
- state.type = UNWINDER_GUESS;
-
printk("%sCall Trace:", loglvl);
for (unwind_start(&state, task, pregs);
!unwind_done(&state); unwind_next_frame(&state)) {
diff --git a/arch/loongarch/kernel/unwind.c b/arch/loongarch/kernel/unwind.c
new file mode 100644
index 000000000000..a463d6961344
--- /dev/null
+++ b/arch/loongarch/kernel/unwind.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022-2023 Loongson Technology Corporation Limited
+ */
+#include <linux/kernel.h>
+#include <linux/ftrace.h>
+
+#include <asm/unwind.h>
+
+bool default_next_frame(struct unwind_state *state)
+{
+ struct stack_info *info = &state->stack_info;
+ unsigned long addr;
+
+ if (unwind_done(state))
+ return false;
+
+ do {
+ for (state->sp += sizeof(unsigned long);
+ state->sp < info->end; state->sp += sizeof(unsigned long)) {
+ addr = *(unsigned long *)(state->sp);
+ state->pc = unwind_graph_addr(state, addr, state->sp + 8);
+ if (__kernel_text_address(state->pc))
+ return true;
+ }
+
+ state->sp = info->next_sp;
+
+ } while (!get_stack_info(state->sp, state->task, info));
+
+ return false;
+}
diff --git a/arch/loongarch/kernel/unwind_guess.c b/arch/loongarch/kernel/unwind_guess.c
index e2d2e4f3001f..98379b7d4147 100644
--- a/arch/loongarch/kernel/unwind_guess.c
+++ b/arch/loongarch/kernel/unwind_guess.c
@@ -2,37 +2,18 @@
/*
* Copyright (C) 2022 Loongson Technology Corporation Limited
*/
-#include <linux/kernel.h>
-#include <linux/ftrace.h>
-
#include <asm/unwind.h>
unsigned long unwind_get_return_address(struct unwind_state *state)
{
- if (unwind_done(state))
- return 0;
- else if (state->first)
- return state->pc;
-
- return *(unsigned long *)(state->sp);
+ return __unwind_get_return_address(state);
}
EXPORT_SYMBOL_GPL(unwind_get_return_address);
void unwind_start(struct unwind_state *state, struct task_struct *task,
struct pt_regs *regs)
{
- memset(state, 0, sizeof(*state));
-
- if (regs) {
- state->sp = regs->regs[3];
- state->pc = regs->csr_era;
- }
-
- state->task = task;
- state->first = true;
-
- get_stack_info(state->sp, state->task, &state->stack_info);
-
+ __unwind_start(state, task, regs);
if (!unwind_done(state) && !__kernel_text_address(state->pc))
unwind_next_frame(state);
}
@@ -40,30 +21,6 @@ EXPORT_SYMBOL_GPL(unwind_start);
bool unwind_next_frame(struct unwind_state *state)
{
- struct stack_info *info = &state->stack_info;
- unsigned long addr;
-
- if (unwind_done(state))
- return false;
-
- if (state->first)
- state->first = false;
-
- do {
- for (state->sp += sizeof(unsigned long);
- state->sp < info->end;
- state->sp += sizeof(unsigned long)) {
- addr = *(unsigned long *)(state->sp);
- state->pc = ftrace_graph_ret_addr(state->task, &state->graph_idx,
- addr, (unsigned long *)(state->sp - GRAPH_FAKE_OFFSET));
- if (__kernel_text_address(addr))
- return true;
- }
-
- state->sp = info->next_sp;
-
- } while (!get_stack_info(state->sp, state->task, info));
-
- return false;
+ return default_next_frame(state);
}
EXPORT_SYMBOL_GPL(unwind_next_frame);
diff --git a/arch/loongarch/kernel/unwind_prologue.c b/arch/loongarch/kernel/unwind_prologue.c
index 0f8d1451ebb8..9095fde8e55d 100644
--- a/arch/loongarch/kernel/unwind_prologue.c
+++ b/arch/loongarch/kernel/unwind_prologue.c
@@ -2,61 +2,116 @@
/*
* Copyright (C) 2022 Loongson Technology Corporation Limited
*/
+#include <linux/cpumask.h>
#include <linux/ftrace.h>
#include <linux/kallsyms.h>
#include <asm/inst.h>
+#include <asm/loongson.h>
#include <asm/ptrace.h>
+#include <asm/setup.h>
#include <asm/unwind.h>
-static inline void unwind_state_fixup(struct unwind_state *state)
-{
-#ifdef CONFIG_DYNAMIC_FTRACE
- static unsigned long ftrace = (unsigned long)ftrace_call + 4;
-
- if (state->pc == ftrace)
- state->is_ftrace = true;
+extern const int unwind_hint_ade;
+extern const int unwind_hint_ale;
+extern const int unwind_hint_bp;
+extern const int unwind_hint_fpe;
+extern const int unwind_hint_fpu;
+extern const int unwind_hint_lsx;
+extern const int unwind_hint_lasx;
+extern const int unwind_hint_lbt;
+extern const int unwind_hint_ri;
+extern const int unwind_hint_watch;
+extern unsigned long eentry;
+#ifdef CONFIG_NUMA
+extern unsigned long pcpu_handlers[NR_CPUS];
#endif
-}
-unsigned long unwind_get_return_address(struct unwind_state *state)
+static inline bool scan_handlers(unsigned long entry_offset)
{
+ int idx, offset;
- if (unwind_done(state))
- return 0;
- else if (state->type)
- return state->pc;
- else if (state->first)
- return state->pc;
-
- return *(unsigned long *)(state->sp);
+ if (entry_offset >= EXCCODE_INT_START * VECSIZE)
+ return false;
+ idx = entry_offset / VECSIZE;
+ offset = entry_offset % VECSIZE;
+ switch (idx) {
+ case EXCCODE_ADE:
+ return offset == unwind_hint_ade;
+ case EXCCODE_ALE:
+ return offset == unwind_hint_ale;
+ case EXCCODE_BP:
+ return offset == unwind_hint_bp;
+ case EXCCODE_FPE:
+ return offset == unwind_hint_fpe;
+ case EXCCODE_FPDIS:
+ return offset == unwind_hint_fpu;
+ case EXCCODE_LSXDIS:
+ return offset == unwind_hint_lsx;
+ case EXCCODE_LASXDIS:
+ return offset == unwind_hint_lasx;
+ case EXCCODE_BTDIS:
+ return offset == unwind_hint_lbt;
+ case EXCCODE_INE:
+ return offset == unwind_hint_ri;
+ case EXCCODE_WATCH:
+ return offset == unwind_hint_watch;
+ default:
+ return false;
+ }
}
-EXPORT_SYMBOL_GPL(unwind_get_return_address);
-static bool unwind_by_guess(struct unwind_state *state)
+static inline bool fix_exception(unsigned long pc)
{
- struct stack_info *info = &state->stack_info;
- unsigned long addr;
-
- for (state->sp += sizeof(unsigned long);
- state->sp < info->end;
- state->sp += sizeof(unsigned long)) {
- addr = *(unsigned long *)(state->sp);
- state->pc = ftrace_graph_ret_addr(state->task, &state->graph_idx,
- addr, (unsigned long *)(state->sp - GRAPH_FAKE_OFFSET));
- if (__kernel_text_address(addr))
+#ifdef CONFIG_NUMA
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ if (!pcpu_handlers[cpu])
+ continue;
+ if (scan_handlers(pc - pcpu_handlers[cpu]))
return true;
}
+#endif
+ return scan_handlers(pc - eentry);
+}
+/*
+ * As we meet ftrace_regs_entry, reset first flag like first doing
+ * tracing. Prologue analysis will stop soon because PC is at entry.
+ */
+static inline bool fix_ftrace(unsigned long pc)
+{
+#ifdef CONFIG_DYNAMIC_FTRACE
+ return pc == (unsigned long)ftrace_call + LOONGARCH_INSN_SIZE;
+#else
return false;
+#endif
}
+static inline bool unwind_state_fixup(struct unwind_state *state)
+{
+ if (!fix_exception(state->pc) && !fix_ftrace(state->pc))
+ return false;
+
+ state->reset = true;
+ return true;
+}
+
+/*
+ * LoongArch function prologue is like follows,
+ * [instructions not use stack var]
+ * addi.d sp, sp, -imm
+ * st.d xx, sp, offset <- save callee saved regs and
+ * st.d yy, sp, offset save ra if function is nest.
+ * [others instructions]
+ */
static bool unwind_by_prologue(struct unwind_state *state)
{
long frame_ra = -1;
unsigned long frame_size = 0;
- unsigned long size, offset, pc = state->pc;
+ unsigned long size, offset, pc;
struct pt_regs *regs;
struct stack_info *info = &state->stack_info;
union loongarch_instruction *ip, *ip_end;
@@ -64,20 +119,21 @@ static bool unwind_by_prologue(struct unwind_state *state)
if (state->sp >= info->end || state->sp < info->begin)
return false;
- if (state->is_ftrace) {
- /*
- * As we meet ftrace_regs_entry, reset first flag like first doing
- * tracing. Prologue analysis will stop soon because PC is at entry.
- */
+ if (state->reset) {
regs = (struct pt_regs *)state->sp;
state->first = true;
- state->is_ftrace = false;
+ state->reset = false;
state->pc = regs->csr_era;
state->ra = regs->regs[1];
state->sp = regs->regs[3];
return true;
}
+ /*
+ * When first is not set, the PC is a return address in the previous frame.
+ * We need to adjust its value in case overflow to the next symbol.
+ */
+ pc = state->pc - (state->first ? 0 : LOONGARCH_INSN_SIZE);
if (!kallsyms_lookup_size_offset(pc, &size, &offset))
return false;
@@ -93,6 +149,10 @@ static bool unwind_by_prologue(struct unwind_state *state)
ip++;
}
+ /*
+ * Can't find stack alloc action, PC may be in a leaf function. Only the
+ * first being true is reasonable, otherwise indicate analysis is broken.
+ */
if (!frame_size) {
if (state->first)
goto first;
@@ -110,6 +170,7 @@ static bool unwind_by_prologue(struct unwind_state *state)
ip++;
}
+ /* Can't find save $ra action, PC may be in a leaf function, too. */
if (frame_ra < 0) {
if (state->first) {
state->sp = state->sp + frame_size;
@@ -118,88 +179,47 @@ static bool unwind_by_prologue(struct unwind_state *state)
return false;
}
- if (state->first)
- state->first = false;
-
state->pc = *(unsigned long *)(state->sp + frame_ra);
state->sp = state->sp + frame_size;
goto out;
first:
- state->first = false;
- if (state->pc == state->ra)
- return false;
-
state->pc = state->ra;
out:
- unwind_state_fixup(state);
- return !!__kernel_text_address(state->pc);
-}
-
-void unwind_start(struct unwind_state *state, struct task_struct *task,
- struct pt_regs *regs)
-{
- memset(state, 0, sizeof(*state));
-
- if (regs && __kernel_text_address(regs->csr_era)) {
- state->pc = regs->csr_era;
- state->sp = regs->regs[3];
- state->ra = regs->regs[1];
- state->type = UNWINDER_PROLOGUE;
- }
-
- state->task = task;
- state->first = true;
-
- get_stack_info(state->sp, state->task, &state->stack_info);
-
- if (!unwind_done(state) && !__kernel_text_address(state->pc))
- unwind_next_frame(state);
+ state->first = false;
+ return unwind_state_fixup(state) || __kernel_text_address(state->pc);
}
-EXPORT_SYMBOL_GPL(unwind_start);
-bool unwind_next_frame(struct unwind_state *state)
+static bool next_frame(struct unwind_state *state)
{
- struct stack_info *info = &state->stack_info;
- struct pt_regs *regs;
unsigned long pc;
+ struct pt_regs *regs;
+ struct stack_info *info = &state->stack_info;
if (unwind_done(state))
return false;
do {
- switch (state->type) {
- case UNWINDER_GUESS:
- state->first = false;
- if (unwind_by_guess(state))
- return true;
- break;
+ if (unwind_by_prologue(state)) {
+ state->pc = unwind_graph_addr(state, state->pc, state->sp);
+ return true;
+ }
+
+ if (info->type == STACK_TYPE_IRQ && info->end == state->sp) {
+ regs = (struct pt_regs *)info->next_sp;
+ pc = regs->csr_era;
+
+ if (user_mode(regs) || !__kernel_text_address(pc))
+ return false;
+
+ state->first = true;
+ state->pc = pc;
+ state->ra = regs->regs[1];
+ state->sp = regs->regs[3];
+ get_stack_info(state->sp, state->task, info);
- case UNWINDER_PROLOGUE:
- if (unwind_by_prologue(state)) {
- state->pc = ftrace_graph_ret_addr(state->task, &state->graph_idx,
- state->pc, (unsigned long *)(state->sp - GRAPH_FAKE_OFFSET));
- return true;
- }
-
- if (info->type == STACK_TYPE_IRQ &&
- info->end == state->sp) {
- regs = (struct pt_regs *)info->next_sp;
- pc = regs->csr_era;
-
- if (user_mode(regs) || !__kernel_text_address(pc))
- return false;
-
- state->first = true;
- state->ra = regs->regs[1];
- state->sp = regs->regs[3];
- state->pc = ftrace_graph_ret_addr(state->task, &state->graph_idx,
- pc, (unsigned long *)(state->sp - GRAPH_FAKE_OFFSET));
- get_stack_info(state->sp, state->task, info);
-
- return true;
- }
+ return true;
}
state->sp = info->next_sp;
@@ -208,4 +228,36 @@ bool unwind_next_frame(struct unwind_state *state)
return false;
}
+
+unsigned long unwind_get_return_address(struct unwind_state *state)
+{
+ return __unwind_get_return_address(state);
+}
+EXPORT_SYMBOL_GPL(unwind_get_return_address);
+
+void unwind_start(struct unwind_state *state, struct task_struct *task,
+ struct pt_regs *regs)
+{
+ __unwind_start(state, task, regs);
+ state->type = UNWINDER_PROLOGUE;
+ state->first = true;
+
+ /*
+ * The current PC is not kernel text address, we cannot find its
+ * relative symbol. Thus, prologue analysis will be broken. Luckily,
+ * we can use the default_next_frame().
+ */
+ if (!__kernel_text_address(state->pc)) {
+ state->type = UNWINDER_GUESS;
+ if (!unwind_done(state))
+ unwind_next_frame(state);
+ }
+}
+EXPORT_SYMBOL_GPL(unwind_start);
+
+bool unwind_next_frame(struct unwind_state *state)
+{
+ return state->type == UNWINDER_PROLOGUE ?
+ next_frame(state) : default_next_frame(state);
+}
EXPORT_SYMBOL_GPL(unwind_next_frame);
diff --git a/arch/loongarch/mm/tlb.c b/arch/loongarch/mm/tlb.c
index da3681f131c8..8bad6b0cff59 100644
--- a/arch/loongarch/mm/tlb.c
+++ b/arch/loongarch/mm/tlb.c
@@ -251,7 +251,7 @@ static void output_pgtable_bits_defines(void)
}
#ifdef CONFIG_NUMA
-static unsigned long pcpu_handlers[NR_CPUS];
+unsigned long pcpu_handlers[NR_CPUS];
#endif
extern long exception_handlers[VECSIZE * 128 / sizeof(long)];
diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper
index af04cea82b94..352d7de24018 100755
--- a/arch/powerpc/boot/wrapper
+++ b/arch/powerpc/boot/wrapper
@@ -210,6 +210,10 @@ ld_version()
gsub(".*version ", "");
gsub("-.*", "");
split($1,a, ".");
+ if( length(a[3]) == "8" )
+ # a[3] is probably a date of format yyyymmdd used for release snapshots. We
+ # can assume it to be zero as it does not signify a new version as such.
+ a[3] = 0;
print a[1]*100000000 + a[2]*1000000 + a[3]*10000;
exit
}'
diff --git a/arch/powerpc/include/asm/imc-pmu.h b/arch/powerpc/include/asm/imc-pmu.h
index 4f897993b710..699a88584ae1 100644
--- a/arch/powerpc/include/asm/imc-pmu.h
+++ b/arch/powerpc/include/asm/imc-pmu.h
@@ -137,7 +137,7 @@ struct imc_pmu {
* are inited.
*/
struct imc_pmu_ref {
- struct mutex lock;
+ spinlock_t lock;
unsigned int id;
int refc;
};
diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c
index 80a148c57de8..44a35ed4f686 100644
--- a/arch/powerpc/mm/book3s64/hash_utils.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -1012,7 +1012,7 @@ static void __init hash_init_partition_table(phys_addr_t hash_table,
void hpt_clear_stress(void);
static struct timer_list stress_hpt_timer;
-void stress_hpt_timer_fn(struct timer_list *timer)
+static void stress_hpt_timer_fn(struct timer_list *timer)
{
int next_cpu;
diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index d517aba94d1b..100e97daf76b 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -14,6 +14,7 @@
#include <asm/cputhreads.h>
#include <asm/smp.h>
#include <linux/string.h>
+#include <linux/spinlock.h>
/* Nest IMC data structures and variables */
@@ -21,7 +22,7 @@
* Used to avoid races in counting the nest-pmu units during hotplug
* register and unregister
*/
-static DEFINE_MUTEX(nest_init_lock);
+static DEFINE_SPINLOCK(nest_init_lock);
static DEFINE_PER_CPU(struct imc_pmu_ref *, local_nest_imc_refc);
static struct imc_pmu **per_nest_pmu_arr;
static cpumask_t nest_imc_cpumask;
@@ -50,7 +51,7 @@ static int trace_imc_mem_size;
* core and trace-imc
*/
static struct imc_pmu_ref imc_global_refc = {
- .lock = __MUTEX_INITIALIZER(imc_global_refc.lock),
+ .lock = __SPIN_LOCK_INITIALIZER(imc_global_refc.lock),
.id = 0,
.refc = 0,
};
@@ -400,7 +401,7 @@ static int ppc_nest_imc_cpu_offline(unsigned int cpu)
get_hard_smp_processor_id(cpu));
/*
* If this is the last cpu in this chip then, skip the reference
- * count mutex lock and make the reference count on this chip zero.
+ * count lock and make the reference count on this chip zero.
*/
ref = get_nest_pmu_ref(cpu);
if (!ref)
@@ -462,15 +463,15 @@ static void nest_imc_counters_release(struct perf_event *event)
/*
* See if we need to disable the nest PMU.
* If no events are currently in use, then we have to take a
- * mutex to ensure that we don't race with another task doing
+ * lock to ensure that we don't race with another task doing
* enable or disable the nest counters.
*/
ref = get_nest_pmu_ref(event->cpu);
if (!ref)
return;
- /* Take the mutex lock for this node and then decrement the reference count */
- mutex_lock(&ref->lock);
+ /* Take the lock for this node and then decrement the reference count */
+ spin_lock(&ref->lock);
if (ref->refc == 0) {
/*
* The scenario where this is true is, when perf session is
@@ -482,7 +483,7 @@ static void nest_imc_counters_release(struct perf_event *event)
* an OPAL call to disable the engine in that node.
*
*/
- mutex_unlock(&ref->lock);
+ spin_unlock(&ref->lock);
return;
}
ref->refc--;
@@ -490,7 +491,7 @@ static void nest_imc_counters_release(struct perf_event *event)
rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST,
get_hard_smp_processor_id(event->cpu));
if (rc) {
- mutex_unlock(&ref->lock);
+ spin_unlock(&ref->lock);
pr_err("nest-imc: Unable to stop the counters for core %d\n", node_id);
return;
}
@@ -498,7 +499,7 @@ static void nest_imc_counters_release(struct perf_event *event)
WARN(1, "nest-imc: Invalid event reference count\n");
ref->refc = 0;
}
- mutex_unlock(&ref->lock);
+ spin_unlock(&ref->lock);
}
static int nest_imc_event_init(struct perf_event *event)
@@ -557,26 +558,25 @@ static int nest_imc_event_init(struct perf_event *event)
/*
* Get the imc_pmu_ref struct for this node.
- * Take the mutex lock and then increment the count of nest pmu events
- * inited.
+ * Take the lock and then increment the count of nest pmu events inited.
*/
ref = get_nest_pmu_ref(event->cpu);
if (!ref)
return -EINVAL;
- mutex_lock(&ref->lock);
+ spin_lock(&ref->lock);
if (ref->refc == 0) {
rc = opal_imc_counters_start(OPAL_IMC_COUNTERS_NEST,
get_hard_smp_processor_id(event->cpu));
if (rc) {
- mutex_unlock(&ref->lock);
+ spin_unlock(&ref->lock);
pr_err("nest-imc: Unable to start the counters for node %d\n",
node_id);
return rc;
}
}
++ref->refc;
- mutex_unlock(&ref->lock);
+ spin_unlock(&ref->lock);
event->destroy = nest_imc_counters_release;
return 0;
@@ -612,9 +612,8 @@ static int core_imc_mem_init(int cpu, int size)
return -ENOMEM;
mem_info->vbase = page_address(page);
- /* Init the mutex */
core_imc_refc[core_id].id = core_id;
- mutex_init(&core_imc_refc[core_id].lock);
+ spin_lock_init(&core_imc_refc[core_id].lock);
rc = opal_imc_counters_init(OPAL_IMC_COUNTERS_CORE,
__pa((void *)mem_info->vbase),
@@ -703,9 +702,8 @@ static int ppc_core_imc_cpu_offline(unsigned int cpu)
perf_pmu_migrate_context(&core_imc_pmu->pmu, cpu, ncpu);
} else {
/*
- * If this is the last cpu in this core then, skip taking refernce
- * count mutex lock for this core and directly zero "refc" for
- * this core.
+ * If this is the last cpu in this core then skip taking reference
+ * count lock for this core and directly zero "refc" for this core.
*/
opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE,
get_hard_smp_processor_id(cpu));
@@ -720,11 +718,11 @@ static int ppc_core_imc_cpu_offline(unsigned int cpu)
* last cpu in this core and core-imc event running
* in this cpu.
*/
- mutex_lock(&imc_global_refc.lock);
+ spin_lock(&imc_global_refc.lock);
if (imc_global_refc.id == IMC_DOMAIN_CORE)
imc_global_refc.refc--;
- mutex_unlock(&imc_global_refc.lock);
+ spin_unlock(&imc_global_refc.lock);
}
return 0;
}
@@ -739,7 +737,7 @@ static int core_imc_pmu_cpumask_init(void)
static void reset_global_refc(struct perf_event *event)
{
- mutex_lock(&imc_global_refc.lock);
+ spin_lock(&imc_global_refc.lock);
imc_global_refc.refc--;
/*
@@ -751,7 +749,7 @@ static void reset_global_refc(struct perf_event *event)
imc_global_refc.refc = 0;
imc_global_refc.id = 0;
}
- mutex_unlock(&imc_global_refc.lock);
+ spin_unlock(&imc_global_refc.lock);
}
static void core_imc_counters_release(struct perf_event *event)
@@ -764,17 +762,17 @@ static void core_imc_counters_release(struct perf_event *event)
/*
* See if we need to disable the IMC PMU.
* If no events are currently in use, then we have to take a
- * mutex to ensure that we don't race with another task doing
+ * lock to ensure that we don't race with another task doing
* enable or disable the core counters.
*/
core_id = event->cpu / threads_per_core;
- /* Take the mutex lock and decrement the refernce count for this core */
+ /* Take the lock and decrement the refernce count for this core */
ref = &core_imc_refc[core_id];
if (!ref)
return;
- mutex_lock(&ref->lock);
+ spin_lock(&ref->lock);
if (ref->refc == 0) {
/*
* The scenario where this is true is, when perf session is
@@ -786,7 +784,7 @@ static void core_imc_counters_release(struct perf_event *event)
* an OPAL call to disable the engine in that core.
*
*/
- mutex_unlock(&ref->lock);
+ spin_unlock(&ref->lock);
return;
}
ref->refc--;
@@ -794,7 +792,7 @@ static void core_imc_counters_release(struct perf_event *event)
rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE,
get_hard_smp_processor_id(event->cpu));
if (rc) {
- mutex_unlock(&ref->lock);
+ spin_unlock(&ref->lock);
pr_err("IMC: Unable to stop the counters for core %d\n", core_id);
return;
}
@@ -802,7 +800,7 @@ static void core_imc_counters_release(struct perf_event *event)
WARN(1, "core-imc: Invalid event reference count\n");
ref->refc = 0;
}
- mutex_unlock(&ref->lock);
+ spin_unlock(&ref->lock);
reset_global_refc(event);
}
@@ -840,7 +838,6 @@ static int core_imc_event_init(struct perf_event *event)
if ((!pcmi->vbase))
return -ENODEV;
- /* Get the core_imc mutex for this core */
ref = &core_imc_refc[core_id];
if (!ref)
return -EINVAL;
@@ -848,22 +845,22 @@ static int core_imc_event_init(struct perf_event *event)
/*
* Core pmu units are enabled only when it is used.
* See if this is triggered for the first time.
- * If yes, take the mutex lock and enable the core counters.
+ * If yes, take the lock and enable the core counters.
* If not, just increment the count in core_imc_refc struct.
*/
- mutex_lock(&ref->lock);
+ spin_lock(&ref->lock);
if (ref->refc == 0) {
rc = opal_imc_counters_start(OPAL_IMC_COUNTERS_CORE,
get_hard_smp_processor_id(event->cpu));
if (rc) {
- mutex_unlock(&ref->lock);
+ spin_unlock(&ref->lock);
pr_err("core-imc: Unable to start the counters for core %d\n",
core_id);
return rc;
}
}
++ref->refc;
- mutex_unlock(&ref->lock);
+ spin_unlock(&ref->lock);
/*
* Since the system can run either in accumulation or trace-mode
@@ -874,7 +871,7 @@ static int core_imc_event_init(struct perf_event *event)
* to know whether any other trace/thread imc
* events are running.
*/
- mutex_lock(&imc_global_refc.lock);
+ spin_lock(&imc_global_refc.lock);
if (imc_global_refc.id == 0 || imc_global_refc.id == IMC_DOMAIN_CORE) {
/*
* No other trace/thread imc events are running in
@@ -883,10 +880,10 @@ static int core_imc_event_init(struct perf_event *event)
imc_global_refc.id = IMC_DOMAIN_CORE;
imc_global_refc.refc++;
} else {
- mutex_unlock(&imc_global_refc.lock);
+ spin_unlock(&imc_global_refc.lock);
return -EBUSY;
}
- mutex_unlock(&imc_global_refc.lock);
+ spin_unlock(&imc_global_refc.lock);
event->hw.event_base = (u64)pcmi->vbase + (config & IMC_EVENT_OFFSET_MASK);
event->destroy = core_imc_counters_release;
@@ -958,10 +955,10 @@ static int ppc_thread_imc_cpu_offline(unsigned int cpu)
mtspr(SPRN_LDBAR, (mfspr(SPRN_LDBAR) & (~(1UL << 63))));
/* Reduce the refc if thread-imc event running on this cpu */
- mutex_lock(&imc_global_refc.lock);
+ spin_lock(&imc_global_refc.lock);
if (imc_global_refc.id == IMC_DOMAIN_THREAD)
imc_global_refc.refc--;
- mutex_unlock(&imc_global_refc.lock);
+ spin_unlock(&imc_global_refc.lock);
return 0;
}
@@ -1001,7 +998,7 @@ static int thread_imc_event_init(struct perf_event *event)
if (!target)
return -EINVAL;
- mutex_lock(&imc_global_refc.lock);
+ spin_lock(&imc_global_refc.lock);
/*
* Check if any other trace/core imc events are running in the
* system, if not set the global id to thread-imc.
@@ -1010,10 +1007,10 @@ static int thread_imc_event_init(struct perf_event *event)
imc_global_refc.id = IMC_DOMAIN_THREAD;
imc_global_refc.refc++;
} else {
- mutex_unlock(&imc_global_refc.lock);
+ spin_unlock(&imc_global_refc.lock);
return -EBUSY;
}
- mutex_unlock(&imc_global_refc.lock);
+ spin_unlock(&imc_global_refc.lock);
event->pmu->task_ctx_nr = perf_sw_context;
event->destroy = reset_global_refc;
@@ -1135,25 +1132,25 @@ static int thread_imc_event_add(struct perf_event *event, int flags)
/*
* imc pmus are enabled only when it is used.
* See if this is triggered for the first time.
- * If yes, take the mutex lock and enable the counters.
+ * If yes, take the lock and enable the counters.
* If not, just increment the count in ref count struct.
*/
ref = &core_imc_refc[core_id];
if (!ref)
return -EINVAL;
- mutex_lock(&ref->lock);
+ spin_lock(&ref->lock);
if (ref->refc == 0) {
if (opal_imc_counters_start(OPAL_IMC_COUNTERS_CORE,
get_hard_smp_processor_id(smp_processor_id()))) {
- mutex_unlock(&ref->lock);
+ spin_unlock(&ref->lock);
pr_err("thread-imc: Unable to start the counter\
for core %d\n", core_id);
return -EINVAL;
}
}
++ref->refc;
- mutex_unlock(&ref->lock);
+ spin_unlock(&ref->lock);
return 0;
}
@@ -1170,12 +1167,12 @@ static void thread_imc_event_del(struct perf_event *event, int flags)
return;
}
- mutex_lock(&ref->lock);
+ spin_lock(&ref->lock);
ref->refc--;
if (ref->refc == 0) {
if (opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE,
get_hard_smp_processor_id(smp_processor_id()))) {
- mutex_unlock(&ref->lock);
+ spin_unlock(&ref->lock);
pr_err("thread-imc: Unable to stop the counters\
for core %d\n", core_id);
return;
@@ -1183,7 +1180,7 @@ static void thread_imc_event_del(struct perf_event *event, int flags)
} else if (ref->refc < 0) {
ref->refc = 0;
}
- mutex_unlock(&ref->lock);
+ spin_unlock(&ref->lock);
/* Set bit 0 of LDBAR to zero, to stop posting updates to memory */
mtspr(SPRN_LDBAR, (mfspr(SPRN_LDBAR) & (~(1UL << 63))));
@@ -1224,9 +1221,8 @@ static int trace_imc_mem_alloc(int cpu_id, int size)
}
}
- /* Init the mutex, if not already */
trace_imc_refc[core_id].id = core_id;
- mutex_init(&trace_imc_refc[core_id].lock);
+ spin_lock_init(&trace_imc_refc[core_id].lock);
mtspr(SPRN_LDBAR, 0);
return 0;
@@ -1246,10 +1242,10 @@ static int ppc_trace_imc_cpu_offline(unsigned int cpu)
* Reduce the refc if any trace-imc event running
* on this cpu.
*/
- mutex_lock(&imc_global_refc.lock);
+ spin_lock(&imc_global_refc.lock);
if (imc_global_refc.id == IMC_DOMAIN_TRACE)
imc_global_refc.refc--;
- mutex_unlock(&imc_global_refc.lock);
+ spin_unlock(&imc_global_refc.lock);
return 0;
}
@@ -1371,17 +1367,17 @@ static int trace_imc_event_add(struct perf_event *event, int flags)
}
mtspr(SPRN_LDBAR, ldbar_value);
- mutex_lock(&ref->lock);
+ spin_lock(&ref->lock);
if (ref->refc == 0) {
if (opal_imc_counters_start(OPAL_IMC_COUNTERS_TRACE,
get_hard_smp_processor_id(smp_processor_id()))) {
- mutex_unlock(&ref->lock);
+ spin_unlock(&ref->lock);
pr_err("trace-imc: Unable to start the counters for core %d\n", core_id);
return -EINVAL;
}
}
++ref->refc;
- mutex_unlock(&ref->lock);
+ spin_unlock(&ref->lock);
return 0;
}
@@ -1414,19 +1410,19 @@ static void trace_imc_event_del(struct perf_event *event, int flags)
return;
}
- mutex_lock(&ref->lock);
+ spin_lock(&ref->lock);
ref->refc--;
if (ref->refc == 0) {
if (opal_imc_counters_stop(OPAL_IMC_COUNTERS_TRACE,
get_hard_smp_processor_id(smp_processor_id()))) {
- mutex_unlock(&ref->lock);
+ spin_unlock(&ref->lock);
pr_err("trace-imc: Unable to stop the counters for core %d\n", core_id);
return;
}
} else if (ref->refc < 0) {
ref->refc = 0;
}
- mutex_unlock(&ref->lock);
+ spin_unlock(&ref->lock);
trace_imc_event_stop(event, flags);
}
@@ -1448,7 +1444,7 @@ static int trace_imc_event_init(struct perf_event *event)
* no other thread is running any core/thread imc
* events
*/
- mutex_lock(&imc_global_refc.lock);
+ spin_lock(&imc_global_refc.lock);
if (imc_global_refc.id == 0 || imc_global_refc.id == IMC_DOMAIN_TRACE) {
/*
* No core/thread imc events are running in the
@@ -1457,10 +1453,10 @@ static int trace_imc_event_init(struct perf_event *event)
imc_global_refc.id = IMC_DOMAIN_TRACE;
imc_global_refc.refc++;
} else {
- mutex_unlock(&imc_global_refc.lock);
+ spin_unlock(&imc_global_refc.lock);
return -EBUSY;
}
- mutex_unlock(&imc_global_refc.lock);
+ spin_unlock(&imc_global_refc.lock);
event->hw.idx = -1;
@@ -1533,10 +1529,10 @@ static int init_nest_pmu_ref(void)
i = 0;
for_each_node(nid) {
/*
- * Mutex lock to avoid races while tracking the number of
+ * Take the lock to avoid races while tracking the number of
* sessions using the chip's nest pmu units.
*/
- mutex_init(&nest_imc_refc[i].lock);
+ spin_lock_init(&nest_imc_refc[i].lock);
/*
* Loop to init the "id" with the node_id. Variable "i" initialized to
@@ -1633,7 +1629,7 @@ static void imc_common_mem_free(struct imc_pmu *pmu_ptr)
static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr)
{
if (pmu_ptr->domain == IMC_DOMAIN_NEST) {
- mutex_lock(&nest_init_lock);
+ spin_lock(&nest_init_lock);
if (nest_pmus == 1) {
cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE);
kfree(nest_imc_refc);
@@ -1643,7 +1639,7 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr)
if (nest_pmus > 0)
nest_pmus--;
- mutex_unlock(&nest_init_lock);
+ spin_unlock(&nest_init_lock);
}
/* Free core_imc memory */
@@ -1800,11 +1796,11 @@ int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id
* rest. To handle the cpuhotplug callback unregister, we track
* the number of nest pmus in "nest_pmus".
*/
- mutex_lock(&nest_init_lock);
+ spin_lock(&nest_init_lock);
if (nest_pmus == 0) {
ret = init_nest_pmu_ref();
if (ret) {
- mutex_unlock(&nest_init_lock);
+ spin_unlock(&nest_init_lock);
kfree(per_nest_pmu_arr);
per_nest_pmu_arr = NULL;
goto err_free_mem;
@@ -1812,7 +1808,7 @@ int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id
/* Register for cpu hotplug notification. */
ret = nest_pmu_cpumask_init();
if (ret) {
- mutex_unlock(&nest_init_lock);
+ spin_unlock(&nest_init_lock);
kfree(nest_imc_refc);
kfree(per_nest_pmu_arr);
per_nest_pmu_arr = NULL;
@@ -1820,7 +1816,7 @@ int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id
}
}
nest_pmus++;
- mutex_unlock(&nest_init_lock);
+ spin_unlock(&nest_init_lock);
break;
case IMC_DOMAIN_CORE:
ret = core_imc_pmu_cpumask_init();
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 2b6091349daa..696c9e007a36 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -508,6 +508,7 @@ static void __init setup_lowcore_dat_on(void)
{
struct lowcore *abs_lc;
unsigned long flags;
+ int i;
__ctl_clear_bit(0, 28);
S390_lowcore.external_new_psw.mask |= PSW_MASK_DAT;
@@ -523,8 +524,8 @@ static void __init setup_lowcore_dat_on(void)
abs_lc = get_abs_lowcore(&flags);
abs_lc->restart_flags = RESTART_FLAG_CTLREGS;
abs_lc->program_new_psw = S390_lowcore.program_new_psw;
- memcpy(abs_lc->cregs_save_area, S390_lowcore.cregs_save_area,
- sizeof(abs_lc->cregs_save_area));
+ for (i = 0; i < 16; i++)
+ abs_lc->cregs_save_area[i] = S390_lowcore.cregs_save_area[i];
put_abs_lowcore(abs_lc, flags);
}
diff --git a/arch/x86/boot/bioscall.S b/arch/x86/boot/bioscall.S
index 5521ea12f44e..aa9b96457584 100644
--- a/arch/x86/boot/bioscall.S
+++ b/arch/x86/boot/bioscall.S
@@ -32,7 +32,7 @@ intcall:
movw %dx, %si
movw %sp, %di
movw $11, %cx
- rep; movsd
+ rep; movsl
/* Pop full state from the stack */
popal
@@ -67,7 +67,7 @@ intcall:
jz 4f
movw %sp, %si
movw $11, %cx
- rep; movsd
+ rep; movsl
4: addw $44, %sp
/* Restore state and return */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index f35f1ff4427b..6aaae18f1854 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1111,6 +1111,7 @@ struct msr_bitmap_range {
/* Xen emulation context */
struct kvm_xen {
+ struct mutex xen_lock;
u32 xen_version;
bool long_mode;
bool runstate_update_flag;
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index efe0c30d3a12..77538abeb72a 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -146,6 +146,30 @@ static inline struct rmid_entry *__rmid_entry(u32 rmid)
return entry;
}
+static int __rmid_read(u32 rmid, enum resctrl_event_id eventid, u64 *val)
+{
+ u64 msr_val;
+
+ /*
+ * As per the SDM, when IA32_QM_EVTSEL.EvtID (bits 7:0) is configured
+ * with a valid event code for supported resource type and the bits
+ * IA32_QM_EVTSEL.RMID (bits 41:32) are configured with valid RMID,
+ * IA32_QM_CTR.data (bits 61:0) reports the monitored data.
+ * IA32_QM_CTR.Error (bit 63) and IA32_QM_CTR.Unavailable (bit 62)
+ * are error bits.
+ */
+ wrmsr(MSR_IA32_QM_EVTSEL, eventid, rmid);
+ rdmsrl(MSR_IA32_QM_CTR, msr_val);
+
+ if (msr_val & RMID_VAL_ERROR)
+ return -EIO;
+ if (msr_val & RMID_VAL_UNAVAIL)
+ return -EINVAL;
+
+ *val = msr_val;
+ return 0;
+}
+
static struct arch_mbm_state *get_arch_mbm_state(struct rdt_hw_domain *hw_dom,
u32 rmid,
enum resctrl_event_id eventid)
@@ -172,8 +196,12 @@ void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_domain *d,
struct arch_mbm_state *am;
am = get_arch_mbm_state(hw_dom, rmid, eventid);
- if (am)
+ if (am) {
memset(am, 0, sizeof(*am));
+
+ /* Record any initial, non-zero count value. */
+ __rmid_read(rmid, eventid, &am->prev_msr);
+ }
}
static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr, unsigned int width)
@@ -191,25 +219,14 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d,
struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d);
struct arch_mbm_state *am;
u64 msr_val, chunks;
+ int ret;
if (!cpumask_test_cpu(smp_processor_id(), &d->cpu_mask))
return -EINVAL;
- /*
- * As per the SDM, when IA32_QM_EVTSEL.EvtID (bits 7:0) is configured
- * with a valid event code for supported resource type and the bits
- * IA32_QM_EVTSEL.RMID (bits 41:32) are configured with valid RMID,
- * IA32_QM_CTR.data (bits 61:0) reports the monitored data.
- * IA32_QM_CTR.Error (bit 63) and IA32_QM_CTR.Unavailable (bit 62)
- * are error bits.
- */
- wrmsr(MSR_IA32_QM_EVTSEL, eventid, rmid);
- rdmsrl(MSR_IA32_QM_CTR, msr_val);
-
- if (msr_val & RMID_VAL_ERROR)
- return -EIO;
- if (msr_val & RMID_VAL_UNAVAIL)
- return -EINVAL;
+ ret = __rmid_read(rmid, eventid, &msr_val);
+ if (ret)
+ return ret;
am = get_arch_mbm_state(hw_dom, rmid, eventid);
if (am) {
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index e5a48f05e787..5993da21d822 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -580,8 +580,10 @@ static int __rdtgroup_move_task(struct task_struct *tsk,
/*
* Ensure the task's closid and rmid are written before determining if
* the task is current that will decide if it will be interrupted.
+ * This pairs with the full barrier between the rq->curr update and
+ * resctrl_sched_in() during context switch.
*/
- barrier();
+ smp_mb();
/*
* By now, the task's closid and rmid are set. If the task is current
@@ -2402,6 +2404,14 @@ static void rdt_move_group_tasks(struct rdtgroup *from, struct rdtgroup *to,
WRITE_ONCE(t->rmid, to->mon.rmid);
/*
+ * Order the closid/rmid stores above before the loads
+ * in task_curr(). This pairs with the full barrier
+ * between the rq->curr update and resctrl_sched_in()
+ * during context switch.
+ */
+ smp_mb();
+
+ /*
* If the task is on a CPU, set the CPU in the mask.
* The detection is inaccurate as tasks might move or
* schedule before the smp function call takes place.
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index b14653b61470..596061c1610e 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -770,16 +770,22 @@ struct kvm_cpuid_array {
int nent;
};
+static struct kvm_cpuid_entry2 *get_next_cpuid(struct kvm_cpuid_array *array)
+{
+ if (array->nent >= array->maxnent)
+ return NULL;
+
+ return &array->entries[array->nent++];
+}
+
static struct kvm_cpuid_entry2 *do_host_cpuid(struct kvm_cpuid_array *array,
u32 function, u32 index)
{
- struct kvm_cpuid_entry2 *entry;
+ struct kvm_cpuid_entry2 *entry = get_next_cpuid(array);
- if (array->nent >= array->maxnent)
+ if (!entry)
return NULL;
- entry = &array->entries[array->nent++];
-
memset(entry, 0, sizeof(*entry));
entry->function = function;
entry->index = index;
@@ -956,22 +962,13 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
entry->edx = edx.full;
break;
}
- /*
- * Per Intel's SDM, the 0x1f is a superset of 0xb,
- * thus they can be handled by common code.
- */
case 0x1f:
case 0xb:
/*
- * Populate entries until the level type (ECX[15:8]) of the
- * previous entry is zero. Note, CPUID EAX.{0x1f,0xb}.0 is
- * the starting entry, filled by the primary do_host_cpuid().
+ * No topology; a valid topology is indicated by the presence
+ * of subleaf 1.
*/
- for (i = 1; entry->ecx & 0xff00; ++i) {
- entry = do_host_cpuid(array, function, i);
- if (!entry)
- goto out;
- }
+ entry->eax = entry->ebx = entry->ecx = 0;
break;
case 0xd: {
u64 permitted_xcr0 = kvm_caps.supported_xcr0 & xstate_get_guest_group_perm();
@@ -1202,6 +1199,9 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
entry->ebx = entry->ecx = entry->edx = 0;
break;
case 0x8000001e:
+ /* Do not return host topology information. */
+ entry->eax = entry->ebx = entry->ecx = 0;
+ entry->edx = 0; /* reserved */
break;
case 0x8000001F:
if (!kvm_cpu_cap_has(X86_FEATURE_SEV)) {
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index bc9cd7086fa9..add65dd59756 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -138,15 +138,13 @@ void recalc_intercepts(struct vcpu_svm *svm)
c->intercepts[i] = h->intercepts[i];
if (g->int_ctl & V_INTR_MASKING_MASK) {
- /* We only want the cr8 intercept bits of L1 */
- vmcb_clr_intercept(c, INTERCEPT_CR8_READ);
- vmcb_clr_intercept(c, INTERCEPT_CR8_WRITE);
-
/*
- * Once running L2 with HF_VINTR_MASK, EFLAGS.IF does not
- * affect any interrupt we may want to inject; therefore,
- * interrupt window vmexits are irrelevant to L0.
+ * Once running L2 with HF_VINTR_MASK, EFLAGS.IF and CR8
+ * does not affect any interrupt we may want to inject;
+ * therefore, writes to CR8 are irrelevant to L0, as are
+ * interrupt window vmexits.
*/
+ vmcb_clr_intercept(c, INTERCEPT_CR8_WRITE);
vmcb_clr_intercept(c, INTERCEPT_VINTR);
}
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index 2e29bdc2949c..8fd41f5deae3 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -271,7 +271,15 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic)
* Attempt to obtain the GPC lock on *both* (if there are two)
* gfn_to_pfn caches that cover the region.
*/
- read_lock_irqsave(&gpc1->lock, flags);
+ if (atomic) {
+ local_irq_save(flags);
+ if (!read_trylock(&gpc1->lock)) {
+ local_irq_restore(flags);
+ return;
+ }
+ } else {
+ read_lock_irqsave(&gpc1->lock, flags);
+ }
while (!kvm_gpc_check(gpc1, user_len1)) {
read_unlock_irqrestore(&gpc1->lock, flags);
@@ -304,9 +312,18 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic)
* The guest's runstate_info is split across two pages and we
* need to hold and validate both GPCs simultaneously. We can
* declare a lock ordering GPC1 > GPC2 because nothing else
- * takes them more than one at a time.
+ * takes them more than one at a time. Set a subclass on the
+ * gpc1 lock to make lockdep shut up about it.
*/
- read_lock(&gpc2->lock);
+ lock_set_subclass(&gpc1->lock.dep_map, 1, _THIS_IP_);
+ if (atomic) {
+ if (!read_trylock(&gpc2->lock)) {
+ read_unlock_irqrestore(&gpc1->lock, flags);
+ return;
+ }
+ } else {
+ read_lock(&gpc2->lock);
+ }
if (!kvm_gpc_check(gpc2, user_len2)) {
read_unlock(&gpc2->lock);
@@ -590,26 +607,26 @@ int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data)
if (!IS_ENABLED(CONFIG_64BIT) && data->u.long_mode) {
r = -EINVAL;
} else {
- mutex_lock(&kvm->lock);
+ mutex_lock(&kvm->arch.xen.xen_lock);
kvm->arch.xen.long_mode = !!data->u.long_mode;
- mutex_unlock(&kvm->lock);
+ mutex_unlock(&kvm->arch.xen.xen_lock);
r = 0;
}
break;
case KVM_XEN_ATTR_TYPE_SHARED_INFO:
- mutex_lock(&kvm->lock);
+ mutex_lock(&kvm->arch.xen.xen_lock);
r = kvm_xen_shared_info_init(kvm, data->u.shared_info.gfn);
- mutex_unlock(&kvm->lock);
+ mutex_unlock(&kvm->arch.xen.xen_lock);
break;
case KVM_XEN_ATTR_TYPE_UPCALL_VECTOR:
if (data->u.vector && data->u.vector < 0x10)
r = -EINVAL;
else {
- mutex_lock(&kvm->lock);
+ mutex_lock(&kvm->arch.xen.xen_lock);
kvm->arch.xen.upcall_vector = data->u.vector;
- mutex_unlock(&kvm->lock);
+ mutex_unlock(&kvm->arch.xen.xen_lock);
r = 0;
}
break;
@@ -619,9 +636,9 @@ int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data)
break;
case KVM_XEN_ATTR_TYPE_XEN_VERSION:
- mutex_lock(&kvm->lock);
+ mutex_lock(&kvm->arch.xen.xen_lock);
kvm->arch.xen.xen_version = data->u.xen_version;
- mutex_unlock(&kvm->lock);
+ mutex_unlock(&kvm->arch.xen.xen_lock);
r = 0;
break;
@@ -630,9 +647,9 @@ int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data)
r = -EOPNOTSUPP;
break;
}
- mutex_lock(&kvm->lock);
+ mutex_lock(&kvm->arch.xen.xen_lock);
kvm->arch.xen.runstate_update_flag = !!data->u.runstate_update_flag;
- mutex_unlock(&kvm->lock);
+ mutex_unlock(&kvm->arch.xen.xen_lock);
r = 0;
break;
@@ -647,7 +664,7 @@ int kvm_xen_hvm_get_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data)
{
int r = -ENOENT;
- mutex_lock(&kvm->lock);
+ mutex_lock(&kvm->arch.xen.xen_lock);
switch (data->type) {
case KVM_XEN_ATTR_TYPE_LONG_MODE:
@@ -686,7 +703,7 @@ int kvm_xen_hvm_get_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data)
break;
}
- mutex_unlock(&kvm->lock);
+ mutex_unlock(&kvm->arch.xen.xen_lock);
return r;
}
@@ -694,7 +711,7 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
{
int idx, r = -ENOENT;
- mutex_lock(&vcpu->kvm->lock);
+ mutex_lock(&vcpu->kvm->arch.xen.xen_lock);
idx = srcu_read_lock(&vcpu->kvm->srcu);
switch (data->type) {
@@ -922,7 +939,7 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
}
srcu_read_unlock(&vcpu->kvm->srcu, idx);
- mutex_unlock(&vcpu->kvm->lock);
+ mutex_unlock(&vcpu->kvm->arch.xen.xen_lock);
return r;
}
@@ -930,7 +947,7 @@ int kvm_xen_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
{
int r = -ENOENT;
- mutex_lock(&vcpu->kvm->lock);
+ mutex_lock(&vcpu->kvm->arch.xen.xen_lock);
switch (data->type) {
case KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO:
@@ -1013,7 +1030,7 @@ int kvm_xen_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
break;
}
- mutex_unlock(&vcpu->kvm->lock);
+ mutex_unlock(&vcpu->kvm->arch.xen.xen_lock);
return r;
}
@@ -1106,7 +1123,7 @@ int kvm_xen_hvm_config(struct kvm *kvm, struct kvm_xen_hvm_config *xhc)
xhc->blob_size_32 || xhc->blob_size_64))
return -EINVAL;
- mutex_lock(&kvm->lock);
+ mutex_lock(&kvm->arch.xen.xen_lock);
if (xhc->msr && !kvm->arch.xen_hvm_config.msr)
static_branch_inc(&kvm_xen_enabled.key);
@@ -1115,7 +1132,7 @@ int kvm_xen_hvm_config(struct kvm *kvm, struct kvm_xen_hvm_config *xhc)
memcpy(&kvm->arch.xen_hvm_config, xhc, sizeof(*xhc));
- mutex_unlock(&kvm->lock);
+ mutex_unlock(&kvm->arch.xen.xen_lock);
return 0;
}
@@ -1658,15 +1675,7 @@ static int kvm_xen_set_evtchn(struct kvm_xen_evtchn *xe, struct kvm *kvm)
mm_borrowed = true;
}
- /*
- * For the irqfd workqueue, using the main kvm->lock mutex is
- * fine since this function is invoked from kvm_set_irq() with
- * no other lock held, no srcu. In future if it will be called
- * directly from a vCPU thread (e.g. on hypercall for an IPI)
- * then it may need to switch to using a leaf-node mutex for
- * serializing the shared_info mapping.
- */
- mutex_lock(&kvm->lock);
+ mutex_lock(&kvm->arch.xen.xen_lock);
/*
* It is theoretically possible for the page to be unmapped
@@ -1695,7 +1704,7 @@ static int kvm_xen_set_evtchn(struct kvm_xen_evtchn *xe, struct kvm *kvm)
srcu_read_unlock(&kvm->srcu, idx);
} while(!rc);
- mutex_unlock(&kvm->lock);
+ mutex_unlock(&kvm->arch.xen.xen_lock);
if (mm_borrowed)
kthread_unuse_mm(kvm->mm);
@@ -1811,7 +1820,7 @@ static int kvm_xen_eventfd_update(struct kvm *kvm,
int ret;
/* Protect writes to evtchnfd as well as the idr lookup. */
- mutex_lock(&kvm->lock);
+ mutex_lock(&kvm->arch.xen.xen_lock);
evtchnfd = idr_find(&kvm->arch.xen.evtchn_ports, port);
ret = -ENOENT;
@@ -1842,7 +1851,7 @@ static int kvm_xen_eventfd_update(struct kvm *kvm,
}
ret = 0;
out_unlock:
- mutex_unlock(&kvm->lock);
+ mutex_unlock(&kvm->arch.xen.xen_lock);
return ret;
}
@@ -1905,10 +1914,10 @@ static int kvm_xen_eventfd_assign(struct kvm *kvm,
evtchnfd->deliver.port.priority = data->u.evtchn.deliver.port.priority;
}
- mutex_lock(&kvm->lock);
+ mutex_lock(&kvm->arch.xen.xen_lock);
ret = idr_alloc(&kvm->arch.xen.evtchn_ports, evtchnfd, port, port + 1,
GFP_KERNEL);
- mutex_unlock(&kvm->lock);
+ mutex_unlock(&kvm->arch.xen.xen_lock);
if (ret >= 0)
return 0;
@@ -1926,9 +1935,9 @@ static int kvm_xen_eventfd_deassign(struct kvm *kvm, u32 port)
{
struct evtchnfd *evtchnfd;
- mutex_lock(&kvm->lock);
+ mutex_lock(&kvm->arch.xen.xen_lock);
evtchnfd = idr_remove(&kvm->arch.xen.evtchn_ports, port);
- mutex_unlock(&kvm->lock);
+ mutex_unlock(&kvm->arch.xen.xen_lock);
if (!evtchnfd)
return -ENOENT;
@@ -1946,7 +1955,7 @@ static int kvm_xen_eventfd_reset(struct kvm *kvm)
int i;
int n = 0;
- mutex_lock(&kvm->lock);
+ mutex_lock(&kvm->arch.xen.xen_lock);
/*
* Because synchronize_srcu() cannot be called inside the
@@ -1958,7 +1967,7 @@ static int kvm_xen_eventfd_reset(struct kvm *kvm)
all_evtchnfds = kmalloc_array(n, sizeof(struct evtchnfd *), GFP_KERNEL);
if (!all_evtchnfds) {
- mutex_unlock(&kvm->lock);
+ mutex_unlock(&kvm->arch.xen.xen_lock);
return -ENOMEM;
}
@@ -1967,7 +1976,7 @@ static int kvm_xen_eventfd_reset(struct kvm *kvm)
all_evtchnfds[n++] = evtchnfd;
idr_remove(&kvm->arch.xen.evtchn_ports, evtchnfd->send_port);
}
- mutex_unlock(&kvm->lock);
+ mutex_unlock(&kvm->arch.xen.xen_lock);
synchronize_srcu(&kvm->srcu);
@@ -2069,6 +2078,7 @@ void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu)
void kvm_xen_init_vm(struct kvm *kvm)
{
+ mutex_init(&kvm->arch.xen.xen_lock);
idr_init(&kvm->arch.xen.evtchn_ports);
kvm_gpc_init(&kvm->arch.xen.shinfo_cache, kvm, NULL, KVM_HOST_USES_PFN);
}
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index d3987359d441..cb258f58fdc8 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -26,6 +26,7 @@
#include <asm/pti.h>
#include <asm/text-patching.h>
#include <asm/memtype.h>
+#include <asm/paravirt.h>
/*
* We need to define the tracepoints somewhere, and tlb.c
@@ -804,6 +805,9 @@ void __init poking_init(void)
poking_mm = mm_alloc();
BUG_ON(!poking_mm);
+ /* Xen PV guests need the PGD to be pinned. */
+ paravirt_arch_dup_mmap(NULL, poking_mm);
+
/*
* Randomize the poking address, but make sure that the following page
* will be mapped at the same PMD. We need 2 pages, so find space for 3,
diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c
index 46de9cf5c91d..fb4b1b5e0dea 100644
--- a/arch/x86/mm/pat/memtype.c
+++ b/arch/x86/mm/pat/memtype.c
@@ -387,7 +387,8 @@ static unsigned long pat_x_mtrr_type(u64 start, u64 end,
u8 mtrr_type, uniform;
mtrr_type = mtrr_type_lookup(start, end, &uniform);
- if (mtrr_type != MTRR_TYPE_WRBACK)
+ if (mtrr_type != MTRR_TYPE_WRBACK &&
+ mtrr_type != MTRR_TYPE_INVALID)
return _PAGE_CACHE_MODE_UC_MINUS;
return _PAGE_CACHE_MODE_WB;
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
index 758cbfe55daa..4b3efaa82ab7 100644
--- a/arch/x86/pci/mmconfig-shared.c
+++ b/arch/x86/pci/mmconfig-shared.c
@@ -12,6 +12,7 @@
*/
#include <linux/acpi.h>
+#include <linux/efi.h>
#include <linux/pci.h>
#include <linux/init.h>
#include <linux/bitmap.h>
@@ -442,17 +443,42 @@ static bool is_acpi_reserved(u64 start, u64 end, enum e820_type not_used)
return mcfg_res.flags;
}
+static bool is_efi_mmio(u64 start, u64 end, enum e820_type not_used)
+{
+#ifdef CONFIG_EFI
+ efi_memory_desc_t *md;
+ u64 size, mmio_start, mmio_end;
+
+ for_each_efi_memory_desc(md) {
+ if (md->type == EFI_MEMORY_MAPPED_IO) {
+ size = md->num_pages << EFI_PAGE_SHIFT;
+ mmio_start = md->phys_addr;
+ mmio_end = mmio_start + size;
+
+ /*
+ * N.B. Caller supplies (start, start + size),
+ * so to match, mmio_end is the first address
+ * *past* the EFI_MEMORY_MAPPED_IO area.
+ */
+ if (mmio_start <= start && end <= mmio_end)
+ return true;
+ }
+ }
+#endif
+
+ return false;
+}
+
typedef bool (*check_reserved_t)(u64 start, u64 end, enum e820_type type);
static bool __ref is_mmconf_reserved(check_reserved_t is_reserved,
struct pci_mmcfg_region *cfg,
- struct device *dev, int with_e820)
+ struct device *dev, const char *method)
{
u64 addr = cfg->res.start;
u64 size = resource_size(&cfg->res);
u64 old_size = size;
int num_buses;
- char *method = with_e820 ? "E820" : "ACPI motherboard resources";
while (!is_reserved(addr, addr + size, E820_TYPE_RESERVED)) {
size >>= 1;
@@ -464,10 +490,10 @@ static bool __ref is_mmconf_reserved(check_reserved_t is_reserved,
return false;
if (dev)
- dev_info(dev, "MMCONFIG at %pR reserved in %s\n",
+ dev_info(dev, "MMCONFIG at %pR reserved as %s\n",
&cfg->res, method);
else
- pr_info(PREFIX "MMCONFIG at %pR reserved in %s\n",
+ pr_info(PREFIX "MMCONFIG at %pR reserved as %s\n",
&cfg->res, method);
if (old_size != size) {
@@ -500,7 +526,8 @@ static bool __ref
pci_mmcfg_check_reserved(struct device *dev, struct pci_mmcfg_region *cfg, int early)
{
if (!early && !acpi_disabled) {
- if (is_mmconf_reserved(is_acpi_reserved, cfg, dev, 0))
+ if (is_mmconf_reserved(is_acpi_reserved, cfg, dev,
+ "ACPI motherboard resource"))
return true;
if (dev)
@@ -513,6 +540,10 @@ pci_mmcfg_check_reserved(struct device *dev, struct pci_mmcfg_region *cfg, int e
"MMCONFIG at %pR not reserved in "
"ACPI motherboard resources\n",
&cfg->res);
+
+ if (is_mmconf_reserved(is_efi_mmio, cfg, dev,
+ "EfiMemoryMappedIO"))
+ return true;
}
/*
@@ -527,7 +558,8 @@ pci_mmcfg_check_reserved(struct device *dev, struct pci_mmcfg_region *cfg, int e
/* Don't try to do this check unless configuration
type 1 is available. how about type 2 ?*/
if (raw_pci_ops)
- return is_mmconf_reserved(e820__mapped_all, cfg, dev, 1);
+ return is_mmconf_reserved(e820__mapped_all, cfg, dev,
+ "E820 entry");
return false;
}
diff --git a/arch/x86/um/elfcore.c b/arch/x86/um/elfcore.c
index 48a3eb09d951..650cdbbdaf45 100644
--- a/arch/x86/um/elfcore.c
+++ b/arch/x86/um/elfcore.c
@@ -7,7 +7,7 @@
#include <asm/elf.h>
-Elf32_Half elf_core_extra_phdrs(void)
+Elf32_Half elf_core_extra_phdrs(struct coredump_params *cprm)
{
return vsyscall_ehdr ? (((struct elfhdr *)vsyscall_ehdr)->e_phnum) : 0;
}
@@ -60,7 +60,7 @@ int elf_core_write_extra_data(struct coredump_params *cprm)
return 1;
}
-size_t elf_core_extra_data_size(void)
+size_t elf_core_extra_data_size(struct coredump_params *cprm)
{
if ( vsyscall_ehdr ) {
const struct elfhdr *const ehdrp =