diff options
Diffstat (limited to 'arch')
789 files changed, 17744 insertions, 14464 deletions
diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h index e8c956098424..572b228c44c7 100644 --- a/arch/alpha/include/asm/atomic.h +++ b/arch/alpha/include/asm/atomic.h @@ -17,11 +17,11 @@ #define ATOMIC_INIT(i) { (i) } #define ATOMIC64_INIT(i) { (i) } -#define atomic_read(v) ACCESS_ONCE((v)->counter) -#define atomic64_read(v) ACCESS_ONCE((v)->counter) +#define atomic_read(v) READ_ONCE((v)->counter) +#define atomic64_read(v) READ_ONCE((v)->counter) -#define atomic_set(v,i) ((v)->counter = (i)) -#define atomic64_set(v,i) ((v)->counter = (i)) +#define atomic_set(v,i) WRITE_ONCE((v)->counter, (i)) +#define atomic64_set(v,i) WRITE_ONCE((v)->counter, (i)) /* * To get proper branch prediction for the main line, we must branch diff --git a/arch/alpha/include/asm/word-at-a-time.h b/arch/alpha/include/asm/word-at-a-time.h index 6b340d0f1521..902e6ab00a06 100644 --- a/arch/alpha/include/asm/word-at-a-time.h +++ b/arch/alpha/include/asm/word-at-a-time.h @@ -52,4 +52,6 @@ static inline unsigned long find_zero(unsigned long bits) #endif } +#define zero_bytemask(mask) ((2ul << (find_zero(mask) * 8)) - 1) + #endif /* _ASM_WORD_AT_A_TIME_H */ diff --git a/arch/alpha/include/uapi/asm/mman.h b/arch/alpha/include/uapi/asm/mman.h index 0086b472bc2b..f2f949671798 100644 --- a/arch/alpha/include/uapi/asm/mman.h +++ b/arch/alpha/include/uapi/asm/mman.h @@ -37,6 +37,9 @@ #define MCL_CURRENT 8192 /* lock all currently mapped pages */ #define MCL_FUTURE 16384 /* lock all additions to address space */ +#define MCL_ONFAULT 32768 /* lock all pages that are faulted in */ + +#define MLOCK_ONFAULT 0x01 /* Lock pages in range after they are faulted in, do not prefault */ #define MADV_NORMAL 0 /* no further special treatment */ #define MADV_RANDOM 1 /* expect random page references */ diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 78c0621d5819..2c2ac3f3ff80 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -76,6 +76,10 @@ config STACKTRACE_SUPPORT config HAVE_LATENCYTOP_SUPPORT def_bool y +config HAVE_ARCH_TRANSPARENT_HUGEPAGE + def_bool y + depends on ARC_MMU_V4 + source "init/Kconfig" source "kernel/Kconfig.freezer" @@ -190,6 +194,16 @@ config NR_CPUS range 2 4096 default "4" +config ARC_SMP_HALT_ON_RESET + bool "Enable Halt-on-reset boot mode" + default y if ARC_UBOOT_SUPPORT + help + In SMP configuration cores can be configured as Halt-on-reset + or they could all start at same time. For Halt-on-reset, non + masters are parked until Master kicks them so they can start of + at designated entry point. For other case, all jump to common + entry point and spin wait for Master's signal. + endif #SMP menuconfig ARC_CACHE @@ -278,6 +292,8 @@ choice default ARC_MMU_V2 if ARC_CPU_750D default ARC_MMU_V4 if ARC_CPU_HS +if ISA_ARCOMPACT + config ARC_MMU_V1 bool "MMU v1" help @@ -297,6 +313,8 @@ config ARC_MMU_V3 Variable Page size (1k-16k), var JTLB size 128 x (2 or 4) Shared Address Spaces (SASID) +endif + config ARC_MMU_V4 bool "MMU v4" depends on ISA_ARCV2 @@ -428,6 +446,28 @@ config LINUX_LINK_BASE Linux needs to be scooted a bit. If you don't know what the above means, leave this setting alone. +config HIGHMEM + bool "High Memory Support" + help + With ARC 2G:2G address split, only upper 2G is directly addressable by + kernel. Enable this to potentially allow access to rest of 2G and PAE + in future + +config ARC_HAS_PAE40 + bool "Support for the 40-bit Physical Address Extension" + default n + depends on ISA_ARCV2 + select HIGHMEM + help + Enable access to physical memory beyond 4G, only supported on + ARC cores with 40 bit Physical Addressing support + +config ARCH_PHYS_ADDR_T_64BIT + def_bool ARC_HAS_PAE40 + +config ARCH_DMA_ADDR_T_64BIT + bool + config ARC_CURR_IN_REG bool "Dedicate Register r25 for current_task pointer" default y diff --git a/arch/arc/boot/dts/axc001.dtsi b/arch/arc/boot/dts/axc001.dtsi index a5e2726a067e..420dcfde289f 100644 --- a/arch/arc/boot/dts/axc001.dtsi +++ b/arch/arc/boot/dts/axc001.dtsi @@ -95,6 +95,6 @@ #size-cells = <1>; ranges = <0x00000000 0x80000000 0x40000000>; device_type = "memory"; - reg = <0x00000000 0x20000000>; /* 512MiB */ + reg = <0x80000000 0x20000000>; /* 512MiB */ }; }; diff --git a/arch/arc/boot/dts/axc003.dtsi b/arch/arc/boot/dts/axc003.dtsi index 846481f37eef..f90fadf7f94e 100644 --- a/arch/arc/boot/dts/axc003.dtsi +++ b/arch/arc/boot/dts/axc003.dtsi @@ -98,6 +98,6 @@ #size-cells = <1>; ranges = <0x00000000 0x80000000 0x40000000>; device_type = "memory"; - reg = <0x00000000 0x20000000>; /* 512MiB */ + reg = <0x80000000 0x20000000>; /* 512MiB */ }; }; diff --git a/arch/arc/boot/dts/axc003_idu.dtsi b/arch/arc/boot/dts/axc003_idu.dtsi index 2f0b33257db2..06a9f294a2e6 100644 --- a/arch/arc/boot/dts/axc003_idu.dtsi +++ b/arch/arc/boot/dts/axc003_idu.dtsi @@ -121,6 +121,6 @@ #size-cells = <1>; ranges = <0x00000000 0x80000000 0x40000000>; device_type = "memory"; - reg = <0x00000000 0x20000000>; /* 512MiB */ + reg = <0x80000000 0x20000000>; /* 512MiB */ }; }; diff --git a/arch/arc/boot/dts/nsim_hs.dts b/arch/arc/boot/dts/nsim_hs.dts index 911f069e0540..b0eb0e7fe21d 100644 --- a/arch/arc/boot/dts/nsim_hs.dts +++ b/arch/arc/boot/dts/nsim_hs.dts @@ -11,8 +11,16 @@ / { compatible = "snps,nsim_hs"; + #address-cells = <2>; + #size-cells = <2>; interrupt-parent = <&core_intc>; + memory { + device_type = "memory"; + reg = <0x0 0x80000000 0x0 0x40000000 /* 1 GB low mem */ + 0x1 0x00000000 0x0 0x40000000>; /* 1 GB highmem */ + }; + chosen { bootargs = "earlycon=arc_uart,mmio32,0xc0fc1000,115200n8 console=ttyARC0,115200n8"; }; @@ -26,8 +34,8 @@ #address-cells = <1>; #size-cells = <1>; - /* child and parent address space 1:1 mapped */ - ranges; + /* only perip space at end of low mem accessible */ + ranges = <0x80000000 0x0 0x80000000 0x80000000>; core_intc: core-interrupt-controller { compatible = "snps,archs-intc"; diff --git a/arch/arc/boot/dts/skeleton.dtsi b/arch/arc/boot/dts/skeleton.dtsi index a870bdd5e404..296d371a335c 100644 --- a/arch/arc/boot/dts/skeleton.dtsi +++ b/arch/arc/boot/dts/skeleton.dtsi @@ -32,6 +32,6 @@ memory { device_type = "memory"; - reg = <0x00000000 0x10000000>; /* 256M */ + reg = <0x80000000 0x10000000>; /* 256M */ }; }; diff --git a/arch/arc/boot/dts/vdk_axc003.dtsi b/arch/arc/boot/dts/vdk_axc003.dtsi index 9393fd902f0d..84226bd48baf 100644 --- a/arch/arc/boot/dts/vdk_axc003.dtsi +++ b/arch/arc/boot/dts/vdk_axc003.dtsi @@ -56,6 +56,6 @@ #size-cells = <1>; ranges = <0x00000000 0x80000000 0x40000000>; device_type = "memory"; - reg = <0x00000000 0x20000000>; /* 512MiB */ + reg = <0x80000000 0x20000000>; /* 512MiB */ }; }; diff --git a/arch/arc/boot/dts/vdk_axc003_idu.dtsi b/arch/arc/boot/dts/vdk_axc003_idu.dtsi index 9bee8ed09eb0..31f0fb5fc91d 100644 --- a/arch/arc/boot/dts/vdk_axc003_idu.dtsi +++ b/arch/arc/boot/dts/vdk_axc003_idu.dtsi @@ -71,6 +71,6 @@ #size-cells = <1>; ranges = <0x00000000 0x80000000 0x40000000>; device_type = "memory"; - reg = <0x00000000 0x20000000>; /* 512MiB */ + reg = <0x80000000 0x20000000>; /* 512MiB */ }; }; diff --git a/arch/arc/configs/axs101_defconfig b/arch/arc/configs/axs101_defconfig index 562dac6a7f78..c92c0ef1e9d2 100644 --- a/arch/arc/configs/axs101_defconfig +++ b/arch/arc/configs/axs101_defconfig @@ -89,7 +89,6 @@ CONFIG_MMC=y CONFIG_MMC_SDHCI=y CONFIG_MMC_SDHCI_PLTFM=y CONFIG_MMC_DW=y -CONFIG_MMC_DW_IDMAC=y # CONFIG_IOMMU_SUPPORT is not set CONFIG_EXT3_FS=y CONFIG_EXT4_FS=y diff --git a/arch/arc/configs/axs103_defconfig b/arch/arc/configs/axs103_defconfig index 83a6d8d5cc58..cfac24e0e7b6 100644 --- a/arch/arc/configs/axs103_defconfig +++ b/arch/arc/configs/axs103_defconfig @@ -95,7 +95,6 @@ CONFIG_MMC=y CONFIG_MMC_SDHCI=y CONFIG_MMC_SDHCI_PLTFM=y CONFIG_MMC_DW=y -CONFIG_MMC_DW_IDMAC=y # CONFIG_IOMMU_SUPPORT is not set CONFIG_EXT3_FS=y CONFIG_EXT4_FS=y diff --git a/arch/arc/configs/axs103_smp_defconfig b/arch/arc/configs/axs103_smp_defconfig index f1e1c84e0dda..9922a118a15a 100644 --- a/arch/arc/configs/axs103_smp_defconfig +++ b/arch/arc/configs/axs103_smp_defconfig @@ -96,7 +96,6 @@ CONFIG_MMC=y CONFIG_MMC_SDHCI=y CONFIG_MMC_SDHCI_PLTFM=y CONFIG_MMC_DW=y -CONFIG_MMC_DW_IDMAC=y # CONFIG_IOMMU_SUPPORT is not set CONFIG_EXT3_FS=y CONFIG_EXT4_FS=y diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild index 7611b10a2d23..0b10ef2a4372 100644 --- a/arch/arc/include/asm/Kbuild +++ b/arch/arc/include/asm/Kbuild @@ -48,4 +48,5 @@ generic-y += types.h generic-y += ucontext.h generic-y += user.h generic-y += vga.h +generic-y += word-at-a-time.h generic-y += xor.h diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h index d8023bc8d1ad..7fac7d85ed6a 100644 --- a/arch/arc/include/asm/arcregs.h +++ b/arch/arc/include/asm/arcregs.h @@ -120,7 +120,7 @@ /* gcc builtin sr needs reg param to be long immediate */ #define write_aux_reg(reg_immed, val) \ - __builtin_arc_sr((unsigned int)val, reg_immed) + __builtin_arc_sr((unsigned int)(val), reg_immed) #else @@ -327,8 +327,8 @@ struct bcr_generic { */ struct cpuinfo_arc_mmu { - unsigned int ver:4, pg_sz_k:8, s_pg_sz_m:8, u_dtlb:6, u_itlb:6; - unsigned int num_tlb:16, sets:12, ways:4; + unsigned int ver:4, pg_sz_k:8, s_pg_sz_m:8, pad:10, sasid:1, pae:1; + unsigned int sets:12, ways:4, u_dtlb:8, u_itlb:8; }; struct cpuinfo_arc_cache { diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h index c3ecda023e3a..7730d302cadb 100644 --- a/arch/arc/include/asm/atomic.h +++ b/arch/arc/include/asm/atomic.h @@ -17,11 +17,11 @@ #include <asm/barrier.h> #include <asm/smp.h> -#define atomic_read(v) ((v)->counter) +#define atomic_read(v) READ_ONCE((v)->counter) #ifdef CONFIG_ARC_HAS_LLSC -#define atomic_set(v, i) (((v)->counter) = (i)) +#define atomic_set(v, i) WRITE_ONCE(((v)->counter), (i)) #ifdef CONFIG_ARC_STAR_9000923308 @@ -107,7 +107,7 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ #ifndef CONFIG_SMP /* violating atomic_xxx API locking protocol in UP for optimization sake */ -#define atomic_set(v, i) (((v)->counter) = (i)) +#define atomic_set(v, i) WRITE_ONCE(((v)->counter), (i)) #else @@ -125,7 +125,7 @@ static inline void atomic_set(atomic_t *v, int i) unsigned long flags; atomic_ops_lock(flags); - v->counter = i; + WRITE_ONCE(v->counter, i); atomic_ops_unlock(flags); } diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h index e23ea6e7633a..abf06e81c929 100644 --- a/arch/arc/include/asm/cache.h +++ b/arch/arc/include/asm/cache.h @@ -65,6 +65,7 @@ extern int ioc_exists; #if defined(CONFIG_ARC_MMU_V3) || defined(CONFIG_ARC_MMU_V4) #define ARC_REG_IC_PTAG 0x1E #endif +#define ARC_REG_IC_PTAG_HI 0x1F /* Bit val in IC_CTRL */ #define IC_CTRL_CACHE_DISABLE 0x1 @@ -77,6 +78,7 @@ extern int ioc_exists; #define ARC_REG_DC_FLSH 0x4B #define ARC_REG_DC_FLDL 0x4C #define ARC_REG_DC_PTAG 0x5C +#define ARC_REG_DC_PTAG_HI 0x5F /* Bit val in DC_CTRL */ #define DC_CTRL_INV_MODE_FLUSH 0x40 diff --git a/arch/arc/include/asm/cacheflush.h b/arch/arc/include/asm/cacheflush.h index 0992d3dbcc65..fbe3587c4f36 100644 --- a/arch/arc/include/asm/cacheflush.h +++ b/arch/arc/include/asm/cacheflush.h @@ -31,10 +31,10 @@ void flush_cache_all(void); -void flush_icache_range(unsigned long start, unsigned long end); -void __sync_icache_dcache(unsigned long paddr, unsigned long vaddr, int len); -void __inv_icache_page(unsigned long paddr, unsigned long vaddr); -void __flush_dcache_page(unsigned long paddr, unsigned long vaddr); +void flush_icache_range(unsigned long kstart, unsigned long kend); +void __sync_icache_dcache(phys_addr_t paddr, unsigned long vaddr, int len); +void __inv_icache_page(phys_addr_t paddr, unsigned long vaddr); +void __flush_dcache_page(phys_addr_t paddr, unsigned long vaddr); #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 diff --git a/arch/arc/include/asm/entry-compact.h b/arch/arc/include/asm/entry-compact.h index 415443c2a8c4..1aff3be91075 100644 --- a/arch/arc/include/asm/entry-compact.h +++ b/arch/arc/include/asm/entry-compact.h @@ -110,13 +110,12 @@ .macro FAKE_RET_FROM_EXCPN - ld r9, [sp, PT_status32] - bic r9, r9, (STATUS_U_MASK|STATUS_DE_MASK) - bset r9, r9, STATUS_L_BIT - sr r9, [erstatus] - mov r9, 55f - sr r9, [eret] - + lr r9, [status32] + bclr r9, r9, STATUS_AE_BIT + or r9, r9, (STATUS_E1_MASK|STATUS_E2_MASK) + sr r9, [erstatus] + mov r9, 55f + sr r9, [eret] rtie 55: .endm diff --git a/arch/arc/include/asm/highmem.h b/arch/arc/include/asm/highmem.h new file mode 100644 index 000000000000..b1585c96324a --- /dev/null +++ b/arch/arc/include/asm/highmem.h @@ -0,0 +1,61 @@ +/* + * Copyright (C) 2015 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#ifndef _ASM_HIGHMEM_H +#define _ASM_HIGHMEM_H + +#ifdef CONFIG_HIGHMEM + +#include <uapi/asm/page.h> +#include <asm/kmap_types.h> + +/* start after vmalloc area */ +#define FIXMAP_BASE (PAGE_OFFSET - FIXMAP_SIZE - PKMAP_SIZE) +#define FIXMAP_SIZE PGDIR_SIZE /* only 1 PGD worth */ +#define KM_TYPE_NR ((FIXMAP_SIZE >> PAGE_SHIFT)/NR_CPUS) +#define FIXMAP_ADDR(nr) (FIXMAP_BASE + ((nr) << PAGE_SHIFT)) + +/* start after fixmap area */ +#define PKMAP_BASE (FIXMAP_BASE + FIXMAP_SIZE) +#define PKMAP_SIZE PGDIR_SIZE +#define LAST_PKMAP (PKMAP_SIZE >> PAGE_SHIFT) +#define LAST_PKMAP_MASK (LAST_PKMAP - 1) +#define PKMAP_ADDR(nr) (PKMAP_BASE + ((nr) << PAGE_SHIFT)) +#define PKMAP_NR(virt) (((virt) - PKMAP_BASE) >> PAGE_SHIFT) + +#define kmap_prot PAGE_KERNEL + + +#include <asm/cacheflush.h> + +extern void *kmap(struct page *page); +extern void *kmap_high(struct page *page); +extern void *kmap_atomic(struct page *page); +extern void __kunmap_atomic(void *kvaddr); +extern void kunmap_high(struct page *page); + +extern void kmap_init(void); + +static inline void flush_cache_kmaps(void) +{ + flush_cache_all(); +} + +static inline void kunmap(struct page *page) +{ + BUG_ON(in_interrupt()); + if (!PageHighMem(page)) + return; + kunmap_high(page); +} + + +#endif + +#endif diff --git a/arch/arc/include/asm/hugepage.h b/arch/arc/include/asm/hugepage.h new file mode 100644 index 000000000000..c5094de86403 --- /dev/null +++ b/arch/arc/include/asm/hugepage.h @@ -0,0 +1,81 @@ +/* + * Copyright (C) 2013-15 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + + +#ifndef _ASM_ARC_HUGEPAGE_H +#define _ASM_ARC_HUGEPAGE_H + +#include <linux/types.h> +#include <asm-generic/pgtable-nopmd.h> + +static inline pte_t pmd_pte(pmd_t pmd) +{ + return __pte(pmd_val(pmd)); +} + +static inline pmd_t pte_pmd(pte_t pte) +{ + return __pmd(pte_val(pte)); +} + +#define pmd_wrprotect(pmd) pte_pmd(pte_wrprotect(pmd_pte(pmd))) +#define pmd_mkwrite(pmd) pte_pmd(pte_mkwrite(pmd_pte(pmd))) +#define pmd_mkdirty(pmd) pte_pmd(pte_mkdirty(pmd_pte(pmd))) +#define pmd_mkold(pmd) pte_pmd(pte_mkold(pmd_pte(pmd))) +#define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd))) +#define pmd_mkhuge(pmd) pte_pmd(pte_mkhuge(pmd_pte(pmd))) +#define pmd_mknotpresent(pmd) pte_pmd(pte_mknotpresent(pmd_pte(pmd))) +#define pmd_mksplitting(pmd) pte_pmd(pte_mkspecial(pmd_pte(pmd))) +#define pmd_mkclean(pmd) pte_pmd(pte_mkclean(pmd_pte(pmd))) + +#define pmd_write(pmd) pte_write(pmd_pte(pmd)) +#define pmd_young(pmd) pte_young(pmd_pte(pmd)) +#define pmd_pfn(pmd) pte_pfn(pmd_pte(pmd)) +#define pmd_dirty(pmd) pte_dirty(pmd_pte(pmd)) +#define pmd_special(pmd) pte_special(pmd_pte(pmd)) + +#define mk_pmd(page, prot) pte_pmd(mk_pte(page, prot)) + +#define pmd_trans_huge(pmd) (pmd_val(pmd) & _PAGE_HW_SZ) +#define pmd_trans_splitting(pmd) (pmd_trans_huge(pmd) && pmd_special(pmd)) + +#define pfn_pmd(pfn, prot) (__pmd(((pfn) << PAGE_SHIFT) | pgprot_val(prot))) + +static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) +{ + /* + * open-coded pte_modify() with additional retaining of HW_SZ bit + * so that pmd_trans_huge() remains true for this PMD + */ + return __pmd((pmd_val(pmd) & (_PAGE_CHG_MASK | _PAGE_HW_SZ)) | pgprot_val(newprot)); +} + +static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, + pmd_t *pmdp, pmd_t pmd) +{ + *pmdp = pmd; +} + +extern void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr, + pmd_t *pmd); + +#define has_transparent_hugepage() 1 + +/* Generic variants assume pgtable_t is struct page *, hence need for these */ +#define __HAVE_ARCH_PGTABLE_DEPOSIT +extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, + pgtable_t pgtable); + +#define __HAVE_ARCH_PGTABLE_WITHDRAW +extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp); + +#define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE +extern void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end); + +#endif diff --git a/arch/arc/include/asm/irq.h b/arch/arc/include/asm/irq.h index bc5103637326..4fd7d62a6e30 100644 --- a/arch/arc/include/asm/irq.h +++ b/arch/arc/include/asm/irq.h @@ -16,6 +16,7 @@ #ifdef CONFIG_ISA_ARCOMPACT #define TIMER0_IRQ 3 #define TIMER1_IRQ 4 +#define IPI_IRQ (NR_CPU_IRQS-1) /* dummy to enable SMP build for up hardware */ #else #define TIMER0_IRQ 16 #define TIMER1_IRQ 17 diff --git a/arch/arc/include/asm/irqflags-compact.h b/arch/arc/include/asm/irqflags-compact.h index aa805575c320..d8c608174617 100644 --- a/arch/arc/include/asm/irqflags-compact.h +++ b/arch/arc/include/asm/irqflags-compact.h @@ -23,11 +23,13 @@ #define STATUS_E2_BIT 2 /* Int 2 enable */ #define STATUS_A1_BIT 3 /* Int 1 active */ #define STATUS_A2_BIT 4 /* Int 2 active */ +#define STATUS_AE_BIT 5 /* Exception active */ #define STATUS_E1_MASK (1<<STATUS_E1_BIT) #define STATUS_E2_MASK (1<<STATUS_E2_BIT) #define STATUS_A1_MASK (1<<STATUS_A1_BIT) #define STATUS_A2_MASK (1<<STATUS_A2_BIT) +#define STATUS_AE_MASK (1<<STATUS_AE_BIT) #define STATUS_IE_MASK (STATUS_E1_MASK | STATUS_E2_MASK) /* Other Interrupt Handling related Aux regs */ @@ -91,7 +93,19 @@ static inline void arch_local_irq_restore(unsigned long flags) /* * Unconditionally Enable IRQs */ -extern void arch_local_irq_enable(void); +static inline void arch_local_irq_enable(void) +{ + unsigned long temp; + + __asm__ __volatile__( + " lr %0, [status32] \n" + " or %0, %0, %1 \n" + " flag %0 \n" + : "=&r"(temp) + : "n"((STATUS_E1_MASK | STATUS_E2_MASK)) + : "cc", "memory"); +} + /* * Unconditionally Disable IRQs diff --git a/arch/arc/include/asm/kmap_types.h b/arch/arc/include/asm/kmap_types.h new file mode 100644 index 000000000000..f0d7f6acea4e --- /dev/null +++ b/arch/arc/include/asm/kmap_types.h @@ -0,0 +1,18 @@ +/* + * Copyright (C) 2015 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#ifndef _ASM_KMAP_TYPES_H +#define _ASM_KMAP_TYPES_H + +/* + * We primarily need to define KM_TYPE_NR here but that in turn + * is a function of PGDIR_SIZE etc. + * To avoid circular deps issue, put everything in asm/highmem.h + */ +#endif diff --git a/arch/arc/include/asm/mach_desc.h b/arch/arc/include/asm/mach_desc.h index e8993a2be6c2..6ff657a904b6 100644 --- a/arch/arc/include/asm/mach_desc.h +++ b/arch/arc/include/asm/mach_desc.h @@ -23,11 +23,8 @@ * @dt_compat: Array of device tree 'compatible' strings * (XXX: although only 1st entry is looked at) * @init_early: Very early callback [called from setup_arch()] - * @init_irq: setup external IRQ controllers [called from init_IRQ()] - * @init_smp: for each CPU (e.g. setup IPI) + * @init_cpu_smp: for each CPU as it is coming up (SMP as well as UP) * [(M):init_IRQ(), (o):start_kernel_secondary()] - * @init_time: platform specific clocksource/clockevent registration - * [called from time_init()] * @init_machine: arch initcall level callback (e.g. populate static * platform devices or parse Devicetree) * @init_late: Late initcall level callback @@ -36,13 +33,10 @@ struct machine_desc { const char *name; const char **dt_compat; - void (*init_early)(void); - void (*init_irq)(void); #ifdef CONFIG_SMP - void (*init_smp)(unsigned int); + void (*init_cpu_smp)(unsigned int); #endif - void (*init_time)(void); void (*init_machine)(void); void (*init_late)(void); diff --git a/arch/arc/include/asm/mcip.h b/arch/arc/include/asm/mcip.h index 52c11f0bb0e5..46f4e5351b2a 100644 --- a/arch/arc/include/asm/mcip.h +++ b/arch/arc/include/asm/mcip.h @@ -86,9 +86,6 @@ static inline void __mcip_cmd_data(unsigned int cmd, unsigned int param, __mcip_cmd(cmd, param); } -extern void mcip_init_early_smp(void); -extern void mcip_init_smp(unsigned int cpu); - #endif #endif diff --git a/arch/arc/include/asm/mmu.h b/arch/arc/include/asm/mmu.h index 0f9c3eb5327e..b144d7ca7d20 100644 --- a/arch/arc/include/asm/mmu.h +++ b/arch/arc/include/asm/mmu.h @@ -24,6 +24,7 @@ #if (CONFIG_ARC_MMU_VER < 4) #define ARC_REG_TLBPD0 0x405 #define ARC_REG_TLBPD1 0x406 +#define ARC_REG_TLBPD1HI 0 /* Dummy: allows code sharing with ARC700 */ #define ARC_REG_TLBINDEX 0x407 #define ARC_REG_TLBCOMMAND 0x408 #define ARC_REG_PID 0x409 @@ -31,6 +32,7 @@ #else #define ARC_REG_TLBPD0 0x460 #define ARC_REG_TLBPD1 0x461 +#define ARC_REG_TLBPD1HI 0x463 #define ARC_REG_TLBINDEX 0x464 #define ARC_REG_TLBCOMMAND 0x465 #define ARC_REG_PID 0x468 @@ -83,6 +85,11 @@ void arc_mmu_init(void); extern char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len); void read_decode_mmu_bcr(void); +static inline int is_pae40_enabled(void) +{ + return IS_ENABLED(CONFIG_ARC_HAS_PAE40); +} + #endif /* !__ASSEMBLY__ */ #endif diff --git a/arch/arc/include/asm/page.h b/arch/arc/include/asm/page.h index 9c8aa41e45c2..429957f1c236 100644 --- a/arch/arc/include/asm/page.h +++ b/arch/arc/include/asm/page.h @@ -43,7 +43,6 @@ typedef struct { typedef struct { unsigned long pgprot; } pgprot_t; -typedef unsigned long pgtable_t; #define pte_val(x) ((x).pte) #define pgd_val(x) ((x).pgd) @@ -57,20 +56,26 @@ typedef unsigned long pgtable_t; #else /* !STRICT_MM_TYPECHECKS */ +#ifdef CONFIG_ARC_HAS_PAE40 +typedef unsigned long long pte_t; +#else typedef unsigned long pte_t; +#endif typedef unsigned long pgd_t; typedef unsigned long pgprot_t; -typedef unsigned long pgtable_t; #define pte_val(x) (x) #define pgd_val(x) (x) #define pgprot_val(x) (x) #define __pte(x) (x) +#define __pgd(x) (x) #define __pgprot(x) (x) #define pte_pgprot(x) (x) #endif +typedef pte_t * pgtable_t; + #define ARCH_PFN_OFFSET (CONFIG_LINUX_LINK_BASE >> PAGE_SHIFT) #define pfn_valid(pfn) (((pfn) - ARCH_PFN_OFFSET) < max_mapnr) diff --git a/arch/arc/include/asm/pgalloc.h b/arch/arc/include/asm/pgalloc.h index 81208bfd9dcb..86ed671286df 100644 --- a/arch/arc/include/asm/pgalloc.h +++ b/arch/arc/include/asm/pgalloc.h @@ -49,7 +49,7 @@ pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t ptep) static inline int __get_order_pgd(void) { - return get_order(PTRS_PER_PGD * 4); + return get_order(PTRS_PER_PGD * sizeof(pgd_t)); } static inline pgd_t *pgd_alloc(struct mm_struct *mm) @@ -87,7 +87,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) static inline int __get_order_pte(void) { - return get_order(PTRS_PER_PTE * 4); + return get_order(PTRS_PER_PTE * sizeof(pte_t)); } static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, @@ -107,10 +107,10 @@ pte_alloc_one(struct mm_struct *mm, unsigned long address) pgtable_t pte_pg; struct page *page; - pte_pg = __get_free_pages(GFP_KERNEL | __GFP_REPEAT, __get_order_pte()); + pte_pg = (pgtable_t)__get_free_pages(GFP_KERNEL | __GFP_REPEAT, __get_order_pte()); if (!pte_pg) return 0; - memzero((void *)pte_pg, PTRS_PER_PTE * 4); + memzero((void *)pte_pg, PTRS_PER_PTE * sizeof(pte_t)); page = virt_to_page(pte_pg); if (!pgtable_page_ctor(page)) { __free_page(page); @@ -128,12 +128,12 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) static inline void pte_free(struct mm_struct *mm, pgtable_t ptep) { pgtable_page_dtor(virt_to_page(ptep)); - free_pages(ptep, __get_order_pte()); + free_pages((unsigned long)ptep, __get_order_pte()); } #define __pte_free_tlb(tlb, pte, addr) pte_free((tlb)->mm, pte) #define check_pgt_cache() do { } while (0) -#define pmd_pgtable(pmd) pmd_page_vaddr(pmd) +#define pmd_pgtable(pmd) ((pgtable_t) pmd_page_vaddr(pmd)) #endif /* _ASM_ARC_PGALLOC_H */ diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h index 1281718802f7..57af2f05ae84 100644 --- a/arch/arc/include/asm/pgtable.h +++ b/arch/arc/include/asm/pgtable.h @@ -38,6 +38,7 @@ #include <asm/page.h> #include <asm/mmu.h> #include <asm-generic/pgtable-nopmd.h> +#include <linux/const.h> /************************************************************************** * Page Table Flags @@ -60,7 +61,8 @@ #define _PAGE_EXECUTE (1<<3) /* Page has user execute perm (H) */ #define _PAGE_WRITE (1<<4) /* Page has user write perm (H) */ #define _PAGE_READ (1<<5) /* Page has user read perm (H) */ -#define _PAGE_MODIFIED (1<<6) /* Page modified (dirty) (S) */ +#define _PAGE_DIRTY (1<<6) /* Page modified (dirty) (S) */ +#define _PAGE_SPECIAL (1<<7) #define _PAGE_GLOBAL (1<<8) /* Page is global (H) */ #define _PAGE_PRESENT (1<<10) /* TLB entry is valid (H) */ @@ -71,7 +73,8 @@ #define _PAGE_WRITE (1<<2) /* Page has user write perm (H) */ #define _PAGE_READ (1<<3) /* Page has user read perm (H) */ #define _PAGE_ACCESSED (1<<4) /* Page is accessed (S) */ -#define _PAGE_MODIFIED (1<<5) /* Page modified (dirty) (S) */ +#define _PAGE_DIRTY (1<<5) /* Page modified (dirty) (S) */ +#define _PAGE_SPECIAL (1<<6) #if (CONFIG_ARC_MMU_VER >= 4) #define _PAGE_WTHRU (1<<7) /* Page cache mode write-thru (H) */ @@ -81,32 +84,33 @@ #define _PAGE_PRESENT (1<<9) /* TLB entry is valid (H) */ #if (CONFIG_ARC_MMU_VER >= 4) -#define _PAGE_SZ (1<<10) /* Page Size indicator (H) */ +#define _PAGE_HW_SZ (1<<10) /* Page Size indicator (H): 0 normal, 1 super */ #endif #define _PAGE_SHARED_CODE (1<<11) /* Shared Code page with cmn vaddr usable for shared TLB entries (H) */ + +#define _PAGE_UNUSED_BIT (1<<12) #endif /* vmalloc permissions */ #define _K_PAGE_PERMS (_PAGE_EXECUTE | _PAGE_WRITE | _PAGE_READ | \ _PAGE_GLOBAL | _PAGE_PRESENT) -#ifdef CONFIG_ARC_CACHE_PAGES -#define _PAGE_DEF_CACHEABLE _PAGE_CACHEABLE -#else -#define _PAGE_DEF_CACHEABLE (0) +#ifndef CONFIG_ARC_CACHE_PAGES +#undef _PAGE_CACHEABLE +#define _PAGE_CACHEABLE 0 #endif -/* Helper for every "user" page - * -kernel can R/W/X - * -by default cached, unless config otherwise - * -present in memory - */ -#define ___DEF (_PAGE_PRESENT | _PAGE_DEF_CACHEABLE) +#ifndef _PAGE_HW_SZ +#define _PAGE_HW_SZ 0 +#endif + +/* Defaults for every user page */ +#define ___DEF (_PAGE_PRESENT | _PAGE_CACHEABLE) /* Set of bits not changed in pte_modify */ -#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_MODIFIED) +#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY) /* More Abbrevaited helpers */ #define PAGE_U_NONE __pgprot(___DEF) @@ -122,15 +126,20 @@ * user vaddr space - visible in all addr spaces, but kernel mode only * Thus Global, all-kernel-access, no-user-access, cached */ -#define PAGE_KERNEL __pgprot(_K_PAGE_PERMS | _PAGE_DEF_CACHEABLE) +#define PAGE_KERNEL __pgprot(_K_PAGE_PERMS | _PAGE_CACHEABLE) /* ioremap */ #define PAGE_KERNEL_NO_CACHE __pgprot(_K_PAGE_PERMS) /* Masks for actual TLB "PD"s */ -#define PTE_BITS_IN_PD0 (_PAGE_GLOBAL | _PAGE_PRESENT) +#define PTE_BITS_IN_PD0 (_PAGE_GLOBAL | _PAGE_PRESENT | _PAGE_HW_SZ) #define PTE_BITS_RWX (_PAGE_EXECUTE | _PAGE_WRITE | _PAGE_READ) + +#ifdef CONFIG_ARC_HAS_PAE40 +#define PTE_BITS_NON_RWX_IN_PD1 (0xff00000000 | PAGE_MASK | _PAGE_CACHEABLE) +#else #define PTE_BITS_NON_RWX_IN_PD1 (PAGE_MASK | _PAGE_CACHEABLE) +#endif /************************************************************************** * Mapping of vm_flags (Generic VM) to PTE flags (arch specific) @@ -191,26 +200,22 @@ /* Optimal Sizing of Pg Tbl - based on MMU page size */ #if defined(CONFIG_ARC_PAGE_SIZE_8K) -#define BITS_FOR_PTE 8 +#define BITS_FOR_PTE 8 /* 11:8:13 */ #elif defined(CONFIG_ARC_PAGE_SIZE_16K) -#define BITS_FOR_PTE 8 +#define BITS_FOR_PTE 8 /* 10:8:14 */ #elif defined(CONFIG_ARC_PAGE_SIZE_4K) -#define BITS_FOR_PTE 9 +#define BITS_FOR_PTE 9 /* 11:9:12 */ #endif #define BITS_FOR_PGD (32 - BITS_FOR_PTE - BITS_IN_PAGE) -#define PGDIR_SHIFT (BITS_FOR_PTE + BITS_IN_PAGE) +#define PGDIR_SHIFT (32 - BITS_FOR_PGD) #define PGDIR_SIZE (1UL << PGDIR_SHIFT) /* vaddr span, not PDG sz */ #define PGDIR_MASK (~(PGDIR_SIZE-1)) -#ifdef __ASSEMBLY__ -#define PTRS_PER_PTE (1 << BITS_FOR_PTE) -#define PTRS_PER_PGD (1 << BITS_FOR_PGD) -#else -#define PTRS_PER_PTE (1UL << BITS_FOR_PTE) -#define PTRS_PER_PGD (1UL << BITS_FOR_PGD) -#endif +#define PTRS_PER_PTE _BITUL(BITS_FOR_PTE) +#define PTRS_PER_PGD _BITUL(BITS_FOR_PGD) + /* * Number of entries a user land program use. * TASK_SIZE is the maximum vaddr that can be used by a userland program. @@ -270,15 +275,10 @@ static inline void pmd_set(pmd_t *pmdp, pte_t *ptep) (unsigned long)(((pte_val(x) - CONFIG_LINUX_LINK_BASE) >> \ PAGE_SHIFT))) -#define mk_pte(page, pgprot) \ -({ \ - pte_t pte; \ - pte_val(pte) = __pa(page_address(page)) + pgprot_val(pgprot); \ - pte; \ -}) - +#define mk_pte(page, prot) pfn_pte(page_to_pfn(page), prot) #define pte_pfn(pte) (pte_val(pte) >> PAGE_SHIFT) -#define pfn_pte(pfn, prot) (__pte(((pfn) << PAGE_SHIFT) | pgprot_val(prot))) +#define pfn_pte(pfn, prot) (__pte(((pte_t)(pfn) << PAGE_SHIFT) | \ + pgprot_val(prot))) #define __pte_index(addr) (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) /* @@ -295,23 +295,26 @@ static inline void pmd_set(pmd_t *pmdp, pte_t *ptep) /* Zoo of pte_xxx function */ #define pte_read(pte) (pte_val(pte) & _PAGE_READ) #define pte_write(pte) (pte_val(pte) & _PAGE_WRITE) -#define pte_dirty(pte) (pte_val(pte) & _PAGE_MODIFIED) +#define pte_dirty(pte) (pte_val(pte) & _PAGE_DIRTY) #define pte_young(pte) (pte_val(pte) & _PAGE_ACCESSED) -#define pte_special(pte) (0) +#define pte_special(pte) (pte_val(pte) & _PAGE_SPECIAL) #define PTE_BIT_FUNC(fn, op) \ static inline pte_t pte_##fn(pte_t pte) { pte_val(pte) op; return pte; } +PTE_BIT_FUNC(mknotpresent, &= ~(_PAGE_PRESENT)); PTE_BIT_FUNC(wrprotect, &= ~(_PAGE_WRITE)); PTE_BIT_FUNC(mkwrite, |= (_PAGE_WRITE)); -PTE_BIT_FUNC(mkclean, &= ~(_PAGE_MODIFIED)); -PTE_BIT_FUNC(mkdirty, |= (_PAGE_MODIFIED)); +PTE_BIT_FUNC(mkclean, &= ~(_PAGE_DIRTY)); +PTE_BIT_FUNC(mkdirty, |= (_PAGE_DIRTY)); PTE_BIT_FUNC(mkold, &= ~(_PAGE_ACCESSED)); PTE_BIT_FUNC(mkyoung, |= (_PAGE_ACCESSED)); PTE_BIT_FUNC(exprotect, &= ~(_PAGE_EXECUTE)); PTE_BIT_FUNC(mkexec, |= (_PAGE_EXECUTE)); +PTE_BIT_FUNC(mkspecial, |= (_PAGE_SPECIAL)); +PTE_BIT_FUNC(mkhuge, |= (_PAGE_HW_SZ)); -static inline pte_t pte_mkspecial(pte_t pte) { return pte; } +#define __HAVE_ARCH_PTE_SPECIAL static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { @@ -357,7 +360,6 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, #define pgd_offset_fast(mm, addr) pgd_offset(mm, addr) #endif -extern void paging_init(void); extern pgd_t swapper_pg_dir[] __aligned(PAGE_SIZE); void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep); @@ -383,6 +385,10 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, * remap a physical page `pfn' of size `size' with page protection `prot' * into virtual address `from' */ +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#include <asm/hugepage.h> +#endif + #include <asm-generic/pgtable.h> /* to cope with aliasing VIPT cache */ diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h index ee682d8e0213..44545354e9e8 100644 --- a/arch/arc/include/asm/processor.h +++ b/arch/arc/include/asm/processor.h @@ -114,7 +114,12 @@ extern unsigned int get_wchan(struct task_struct *p); * ----------------------------------------------------------------------------- */ #define VMALLOC_START 0x70000000 -#define VMALLOC_SIZE (PAGE_OFFSET - VMALLOC_START) + +/* + * 1 PGDIR_SIZE each for fixmap/pkmap, 2 PGDIR_SIZE gutter + * See asm/highmem.h for details + */ +#define VMALLOC_SIZE (PAGE_OFFSET - VMALLOC_START - PGDIR_SIZE * 4) #define VMALLOC_END (VMALLOC_START + VMALLOC_SIZE) #define USER_KERNEL_GUTTER 0x10000000 diff --git a/arch/arc/include/asm/setup.h b/arch/arc/include/asm/setup.h index 6e3ef5ba4f74..307846691be6 100644 --- a/arch/arc/include/asm/setup.h +++ b/arch/arc/include/asm/setup.h @@ -33,4 +33,11 @@ extern int root_mountflags, end_mem; void setup_processor(void); void __init setup_arch_memory(void); +/* Helpers used in arc_*_mumbojumbo routines */ +#define IS_AVAIL1(v, s) ((v) ? s : "") +#define IS_DISABLED_RUN(v) ((v) ? "" : "(disabled) ") +#define IS_USED_RUN(v) ((v) ? "" : "(not used) ") +#define IS_USED_CFG(cfg) IS_USED_RUN(IS_ENABLED(cfg)) +#define IS_AVAIL2(v, s, cfg) IS_AVAIL1(v, s), IS_AVAIL1(v, IS_USED_CFG(cfg)) + #endif /* __ASMARC_SETUP_H */ diff --git a/arch/arc/include/asm/smp.h b/arch/arc/include/asm/smp.h index 3845b9e94f69..133c867d15af 100644 --- a/arch/arc/include/asm/smp.h +++ b/arch/arc/include/asm/smp.h @@ -45,12 +45,19 @@ extern int smp_ipi_irq_setup(int cpu, int irq); * struct plat_smp_ops - SMP callbacks provided by platform to ARC SMP * * @info: SoC SMP specific info for /proc/cpuinfo etc + * @init_early_smp: A SMP specific h/w block can init itself + * Could be common across platforms so not covered by + * mach_desc->init_early() + * @init_irq_cpu: Called for each core so SMP h/w block driver can do + * any needed setup per cpu (e.g. IPI request) * @cpu_kick: For Master to kickstart a cpu (optionally at a PC) * @ipi_send: To send IPI to a @cpu * @ips_clear: To clear IPI received at @irq */ struct plat_smp_ops { const char *info; + void (*init_early_smp)(void); + void (*init_irq_cpu)(int cpu); void (*cpu_kick)(int cpu, unsigned long pc); void (*ipi_send)(int cpu); void (*ipi_clear)(int irq); diff --git a/arch/arc/include/asm/tlbflush.h b/arch/arc/include/asm/tlbflush.h index 71c7b2e4b874..1fe9c8c80280 100644 --- a/arch/arc/include/asm/tlbflush.h +++ b/arch/arc/include/asm/tlbflush.h @@ -17,6 +17,8 @@ void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page); void local_flush_tlb_kernel_range(unsigned long start, unsigned long end); void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); +void local_flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end); #ifndef CONFIG_SMP #define flush_tlb_range(vma, s, e) local_flush_tlb_range(vma, s, e) @@ -24,6 +26,7 @@ void local_flush_tlb_range(struct vm_area_struct *vma, #define flush_tlb_kernel_range(s, e) local_flush_tlb_kernel_range(s, e) #define flush_tlb_all() local_flush_tlb_all() #define flush_tlb_mm(mm) local_flush_tlb_mm(mm) +#define flush_pmd_tlb_range(vma, s, e) local_flush_pmd_tlb_range(vma, s, e) #else extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); @@ -31,5 +34,7 @@ extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long page); extern void flush_tlb_kernel_range(unsigned long start, unsigned long end); extern void flush_tlb_all(void); extern void flush_tlb_mm(struct mm_struct *mm); +extern void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); + #endif /* CONFIG_SMP */ #endif diff --git a/arch/arc/include/uapi/asm/page.h b/arch/arc/include/uapi/asm/page.h index 9d129a2a1351..059aff38f10a 100644 --- a/arch/arc/include/uapi/asm/page.h +++ b/arch/arc/include/uapi/asm/page.h @@ -9,6 +9,8 @@ #ifndef _UAPI__ASM_ARC_PAGE_H #define _UAPI__ASM_ARC_PAGE_H +#include <linux/const.h> + /* PAGE_SHIFT determines the page size */ #if defined(CONFIG_ARC_PAGE_SIZE_16K) #define PAGE_SHIFT 14 @@ -25,13 +27,8 @@ #define PAGE_SHIFT 13 #endif -#ifdef __ASSEMBLY__ -#define PAGE_SIZE (1 << PAGE_SHIFT) -#define PAGE_OFFSET (0x80000000) -#else -#define PAGE_SIZE (1UL << PAGE_SHIFT) /* Default 8K */ -#define PAGE_OFFSET (0x80000000UL) /* Kernel starts at 2G onwards */ -#endif +#define PAGE_SIZE _BITUL(PAGE_SHIFT) /* Default 8K */ +#define PAGE_OFFSET _AC(0x80000000, UL) /* Kernel starts at 2G onwrds */ #define PAGE_MASK (~(PAGE_SIZE-1)) diff --git a/arch/arc/kernel/entry-arcv2.S b/arch/arc/kernel/entry-arcv2.S index 8fa76567e402..445e63a10754 100644 --- a/arch/arc/kernel/entry-arcv2.S +++ b/arch/arc/kernel/entry-arcv2.S @@ -24,7 +24,7 @@ .align 4 # Initial 16 slots are Exception Vectors -VECTOR stext ; Restart Vector (jump to entry point) +VECTOR res_service ; Reset Vector VECTOR mem_service ; Mem exception VECTOR instr_service ; Instrn Error VECTOR EV_MachineCheck ; Fatal Machine check diff --git a/arch/arc/kernel/entry-compact.S b/arch/arc/kernel/entry-compact.S index 15d457b4403a..59f52035b4ea 100644 --- a/arch/arc/kernel/entry-compact.S +++ b/arch/arc/kernel/entry-compact.S @@ -86,7 +86,7 @@ */ ; ********* Critical System Events ********************** -VECTOR res_service ; 0x0, Restart Vector (0x0) +VECTOR res_service ; 0x0, Reset Vector (0x0) VECTOR mem_service ; 0x8, Mem exception (0x1) VECTOR instr_service ; 0x10, Instrn Error (0x2) @@ -155,13 +155,9 @@ int2_saved_reg: ; --------------------------------------------- .section .text, "ax",@progbits -res_service: ; processor restart - flag 0x1 ; not implemented - nop - nop -reserved: ; processor restart - rtie ; jump to processor initializations +reserved: + flag 1 ; Unexpected event, halt ;##################### Interrupt Handling ############################## @@ -175,12 +171,25 @@ ENTRY(handle_interrupt_level2) ;------------------------------------------------------ ; if L2 IRQ interrupted a L1 ISR, disable preemption + ; + ; This is to avoid a potential L1-L2-L1 scenario + ; -L1 IRQ taken + ; -L2 interrupts L1 (before L1 ISR could run) + ; -preemption off IRQ, user task in syscall picked to run + ; -RTIE to userspace + ; Returns from L2 context fine + ; But both L1 and L2 re-enabled, so another L1 can be taken + ; while prev L1 is still unserviced + ; ;------------------------------------------------------ + ; L2 interrupting L1 implies both L2 and L1 active + ; However both A2 and A1 are NOT set in STATUS32, thus + ; need to check STATUS32_L2 to determine if L1 was active + ld r9, [sp, PT_status32] ; get statu32_l2 (saved in pt_regs) bbit0 r9, STATUS_A1_BIT, 1f ; L1 not active when L2 IRQ, so normal - ; A1 is set in status32_l2 ; bump thread_info->preempt_count (Disable preemption) GET_CURR_THR_INFO_FROM_SP r10 ld r9, [r10, THREAD_INFO_PREEMPT_COUNT] @@ -320,11 +329,10 @@ END(call_do_page_fault) ; Note that we use realtime STATUS32 (not pt_regs->status32) to ; decide that. - ; if Returning from Exception - btst r10, STATUS_AE_BIT - bnz .Lexcep_ret + and.f 0, r10, (STATUS_A1_MASK|STATUS_A2_MASK) + bz .Lexcep_or_pure_K_ret - ; Not Exception so maybe Interrupts (Level 1 or 2) + ; Returning from Interrupts (Level 1 or 2) #ifdef CONFIG_ARC_COMPACT_IRQ_LEVELS @@ -365,8 +373,7 @@ END(call_do_page_fault) st r9, [r10, THREAD_INFO_PREEMPT_COUNT] 149: - ;return from level 2 - INTERRUPT_EPILOGUE 2 + INTERRUPT_EPILOGUE 2 ; return from level 2 interrupt debug_marker_l2: rtie @@ -374,15 +381,11 @@ not_level2_interrupt: #endif - bbit0 r10, STATUS_A1_BIT, .Lpure_k_mode_ret - - ;return from level 1 - INTERRUPT_EPILOGUE 1 + INTERRUPT_EPILOGUE 1 ; return from level 1 interrupt debug_marker_l1: rtie -.Lexcep_ret: -.Lpure_k_mode_ret: +.Lexcep_or_pure_K_ret: ;this case is for syscalls or Exceptions or pure kernel mode diff --git a/arch/arc/kernel/head.S b/arch/arc/kernel/head.S index 812f95e6ae69..689dd867fdff 100644 --- a/arch/arc/kernel/head.S +++ b/arch/arc/kernel/head.S @@ -50,28 +50,37 @@ .endm .section .init.text, "ax",@progbits - .type stext, @function - .globl stext -stext: - ;------------------------------------------------------------------- - ; Don't clobber r0-r2 yet. It might have bootloader provided info - ;------------------------------------------------------------------- + +;---------------------------------------------------------------- +; Default Reset Handler (jumped into from Reset vector) +; - Don't clobber r0,r1,r2 as they might have u-boot provided args +; - Platforms can override this weak version if needed +;---------------------------------------------------------------- +WEAK(res_service) + j stext +END(res_service) + +;---------------------------------------------------------------- +; Kernel Entry point +;---------------------------------------------------------------- +ENTRY(stext) CPU_EARLY_SETUP #ifdef CONFIG_SMP - ; Ensure Boot (Master) proceeds. Others wait in platform dependent way - ; IDENTITY Reg [ 3 2 1 0 ] - ; (cpu-id) ^^^ => Zero for UP ARC700 - ; => #Core-ID if SMP (Master 0) - ; Note that non-boot CPUs might not land here if halt-on-reset and - ; instead breath life from @first_lines_of_secondary, but we still - ; need to make sure only boot cpu takes this path. GET_CPU_ID r5 cmp r5, 0 - mov.ne r0, r5 - jne arc_platform_smp_wait_to_boot + mov.nz r0, r5 +#ifdef CONFIG_ARC_SMP_HALT_ON_RESET + ; Non-Master can proceed as system would be booted sufficiently + jnz first_lines_of_secondary +#else + ; Non-Masters wait for Master to boot enough and bring them up + jnz arc_platform_smp_wait_to_boot #endif + ; Master falls thru +#endif + ; Clear BSS before updating any globals ; XXX: use ZOL here mov r5, __bss_start @@ -102,18 +111,14 @@ stext: GET_TSK_STACK_BASE r9, sp ; r9 = tsk, sp = stack base(output) j start_kernel ; "C" entry point +END(stext) #ifdef CONFIG_SMP ;---------------------------------------------------------------- ; First lines of code run by secondary before jumping to 'C' ;---------------------------------------------------------------- .section .text, "ax",@progbits - .type first_lines_of_secondary, @function - .globl first_lines_of_secondary - -first_lines_of_secondary: - - CPU_EARLY_SETUP +ENTRY(first_lines_of_secondary) ; setup per-cpu idle task as "current" on this CPU ld r0, [@secondary_idle_tsk] @@ -126,5 +131,5 @@ first_lines_of_secondary: GET_TSK_STACK_BASE r0, sp j start_kernel_secondary - +END(first_lines_of_secondary) #endif diff --git a/arch/arc/kernel/intc-compact.c b/arch/arc/kernel/intc-compact.c index 039fac30b5c1..06bcedf19b62 100644 --- a/arch/arc/kernel/intc-compact.c +++ b/arch/arc/kernel/intc-compact.c @@ -79,17 +79,16 @@ static struct irq_chip onchip_intc = { static int arc_intc_domain_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hw) { - /* - * XXX: the IPI IRQ needs to be handled like TIMER too. However ARC core - * code doesn't own it (like TIMER0). ISS IDU / ezchip define it - * in platform header which can't be included here as it goes - * against multi-platform image philisophy - */ - if (irq == TIMER0_IRQ) + switch (irq) { + case TIMER0_IRQ: +#ifdef CONFIG_SMP + case IPI_IRQ: +#endif irq_set_chip_and_handler(irq, &onchip_intc, handle_percpu_irq); - else + break; + default: irq_set_chip_and_handler(irq, &onchip_intc, handle_level_irq); - + } return 0; } @@ -148,78 +147,15 @@ IRQCHIP_DECLARE(arc_intc, "snps,arc700-intc", init_onchip_IRQ); void arch_local_irq_enable(void) { - unsigned long flags = arch_local_save_flags(); - /* Allow both L1 and L2 at the onset */ - flags |= (STATUS_E1_MASK | STATUS_E2_MASK); - - /* Called from hard ISR (between irq_enter and irq_exit) */ - if (in_irq()) { - - /* If in L2 ISR, don't re-enable any further IRQs as this can - * cause IRQ priorities to get upside down. e.g. it could allow - * L1 be taken while in L2 hard ISR which is wrong not only in - * theory, it can also cause the dreaded L1-L2-L1 scenario - */ - if (flags & STATUS_A2_MASK) - flags &= ~(STATUS_E1_MASK | STATUS_E2_MASK); - - /* Even if in L1 ISR, allowe Higher prio L2 IRQs */ - else if (flags & STATUS_A1_MASK) - flags &= ~(STATUS_E1_MASK); - } - - /* called from soft IRQ, ideally we want to re-enable all levels */ - - else if (in_softirq()) { - - /* However if this is case of L1 interrupted by L2, - * re-enabling both may cause whaco L1-L2-L1 scenario - * because ARC700 allows level 1 to interrupt an active L2 ISR - * Thus we disable both - * However some code, executing in soft ISR wants some IRQs - * to be enabled so we re-enable L2 only - * - * How do we determine L1 intr by L2 - * -A2 is set (means in L2 ISR) - * -E1 is set in this ISR's pt_regs->status32 which is - * saved copy of status32_l2 when l2 ISR happened - */ - struct pt_regs *pt = get_irq_regs(); - - if ((flags & STATUS_A2_MASK) && pt && - (pt->status32 & STATUS_A1_MASK)) { - /*flags &= ~(STATUS_E1_MASK | STATUS_E2_MASK); */ - flags &= ~(STATUS_E1_MASK); - } - } + if (flags & STATUS_A2_MASK) + flags |= STATUS_E2_MASK; + else if (flags & STATUS_A1_MASK) + flags |= STATUS_E1_MASK; arch_local_irq_restore(flags); } -#else /* ! CONFIG_ARC_COMPACT_IRQ_LEVELS */ - -/* - * Simpler version for only 1 level of interrupt - * Here we only Worry about Level 1 Bits - */ -void arch_local_irq_enable(void) -{ - unsigned long flags; - - /* - * ARC IDE Drivers tries to re-enable interrupts from hard-isr - * context which is simply wrong - */ - if (in_irq()) { - WARN_ONCE(1, "IRQ enabled from hard-isr"); - return; - } - - flags = arch_local_save_flags(); - flags |= (STATUS_E1_MASK | STATUS_E2_MASK); - arch_local_irq_restore(flags); -} -#endif EXPORT_SYMBOL(arch_local_irq_enable); +#endif diff --git a/arch/arc/kernel/irq.c b/arch/arc/kernel/irq.c index 2989a7bcf8a8..2ee226546c6a 100644 --- a/arch/arc/kernel/irq.c +++ b/arch/arc/kernel/irq.c @@ -10,6 +10,7 @@ #include <linux/interrupt.h> #include <linux/irqchip.h> #include <asm/mach_desc.h> +#include <asm/smp.h> /* * Late Interrupt system init called from start_kernel for Boot CPU only @@ -19,17 +20,20 @@ */ void __init init_IRQ(void) { - /* Any external intc can be setup here */ - if (machine_desc->init_irq) - machine_desc->init_irq(); - - /* process the entire interrupt tree in one go */ + /* + * process the entire interrupt tree in one go + * Any external intc will be setup provided DT chains them + * properly + */ irqchip_init(); #ifdef CONFIG_SMP - /* Master CPU can initialize it's side of IPI */ - if (machine_desc->init_smp) - machine_desc->init_smp(smp_processor_id()); + /* a SMP H/w block could do IPI IRQ request here */ + if (plat_smp_ops.init_irq_cpu) + plat_smp_ops.init_irq_cpu(smp_processor_id()); + + if (machine_desc->init_cpu_smp) + machine_desc->init_cpu_smp(smp_processor_id()); #endif } diff --git a/arch/arc/kernel/mcip.c b/arch/arc/kernel/mcip.c index 4ffd1855f1bd..74a9b074ac3e 100644 --- a/arch/arc/kernel/mcip.c +++ b/arch/arc/kernel/mcip.c @@ -12,20 +12,14 @@ #include <linux/irq.h> #include <linux/spinlock.h> #include <asm/mcip.h> +#include <asm/setup.h> static char smp_cpuinfo_buf[128]; static int idu_detected; static DEFINE_RAW_SPINLOCK(mcip_lock); -/* - * Any SMP specific init any CPU does when it comes up. - * Here we setup the CPU to enable Inter-Processor-Interrupts - * Called for each CPU - * -Master : init_IRQ() - * -Other(s) : start_kernel_secondary() - */ -void mcip_init_smp(unsigned int cpu) +static void mcip_setup_per_cpu(int cpu) { smp_ipi_irq_setup(cpu, IPI_IRQ); } @@ -96,34 +90,8 @@ static void mcip_ipi_clear(int irq) #endif } -volatile int wake_flag; - -static void mcip_wakeup_cpu(int cpu, unsigned long pc) -{ - BUG_ON(cpu == 0); - wake_flag = cpu; -} - -void arc_platform_smp_wait_to_boot(int cpu) +static void mcip_probe_n_setup(void) { - while (wake_flag != cpu) - ; - - wake_flag = 0; - __asm__ __volatile__("j @first_lines_of_secondary \n"); -} - -struct plat_smp_ops plat_smp_ops = { - .info = smp_cpuinfo_buf, - .cpu_kick = mcip_wakeup_cpu, - .ipi_send = mcip_ipi_send, - .ipi_clear = mcip_ipi_clear, -}; - -void mcip_init_early_smp(void) -{ -#define IS_AVAIL1(var, str) ((var) ? str : "") - struct mcip_bcr { #ifdef CONFIG_CPU_BIG_ENDIAN unsigned int pad3:8, @@ -161,6 +129,14 @@ void mcip_init_early_smp(void) panic("kernel trying to use non-existent GRTC\n"); } +struct plat_smp_ops plat_smp_ops = { + .info = smp_cpuinfo_buf, + .init_early_smp = mcip_probe_n_setup, + .init_irq_cpu = mcip_setup_per_cpu, + .ipi_send = mcip_ipi_send, + .ipi_clear = mcip_ipi_clear, +}; + /*************************************************************************** * ARCv2 Interrupt Distribution Unit (IDU) * diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index cabde9dc0696..c33e77c0ad3e 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -160,10 +160,6 @@ static const struct cpuinfo_data arc_cpu_tbl[] = { { {0x00, NULL } } }; -#define IS_AVAIL1(v, s) ((v) ? s : "") -#define IS_USED_RUN(v) ((v) ? "" : "(not used) ") -#define IS_USED_CFG(cfg) IS_USED_RUN(IS_ENABLED(cfg)) -#define IS_AVAIL2(v, s, cfg) IS_AVAIL1(v, s), IS_AVAIL1(v, IS_USED_CFG(cfg)) static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len) { @@ -415,8 +411,9 @@ void __init setup_arch(char **cmdline_p) if (machine_desc->init_early) machine_desc->init_early(); - setup_processor(); smp_init_cpus(); + + setup_processor(); setup_arch_memory(); /* copy flat DT out of .init and then unflatten it */ diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c index be13d12420ba..580587805fa3 100644 --- a/arch/arc/kernel/smp.c +++ b/arch/arc/kernel/smp.c @@ -42,8 +42,13 @@ void __init smp_prepare_boot_cpu(void) } /* - * Initialise the CPU possible map early - this describes the CPUs - * which may be present or become present in the system. + * Called from setup_arch() before calling setup_processor() + * + * - Initialise the CPU possible map early - this describes the CPUs + * which may be present or become present in the system. + * - Call early smp init hook. This can initialize a specific multi-core + * IP which is say common to several platforms (hence not part of + * platform specific int_early() hook) */ void __init smp_init_cpus(void) { @@ -51,6 +56,9 @@ void __init smp_init_cpus(void) for (i = 0; i < NR_CPUS; i++) set_cpu_possible(i, true); + + if (plat_smp_ops.init_early_smp) + plat_smp_ops.init_early_smp(); } /* called from init ( ) => process 1 */ @@ -72,35 +80,29 @@ void __init smp_cpus_done(unsigned int max_cpus) } /* - * After power-up, a non Master CPU needs to wait for Master to kick start it - * - * The default implementation halts - * - * This relies on platform specific support allowing Master to directly set - * this CPU's PC (to be @first_lines_of_secondary() and kick start it. - * - * In lack of such h/w assist, platforms can override this function - * - make this function busy-spin on a token, eventually set by Master - * (from arc_platform_smp_wakeup_cpu()) - * - Once token is available, jump to @first_lines_of_secondary - * (using inline asm). - * - * Alert: can NOT use stack here as it has not been determined/setup for CPU. - * If it turns out to be elaborate, it's better to code it in assembly - * + * Default smp boot helper for Run-on-reset case where all cores start off + * together. Non-masters need to wait for Master to start running. + * This is implemented using a flag in memory, which Non-masters spin-wait on. + * Master sets it to cpu-id of core to "ungate" it. */ -void __weak arc_platform_smp_wait_to_boot(int cpu) +static volatile int wake_flag; + +static void arc_default_smp_cpu_kick(int cpu, unsigned long pc) { - /* - * As a hack for debugging - since debugger will single-step over the - * FLAG insn - wrap the halt itself it in a self loop - */ - __asm__ __volatile__( - "1: \n" - " flag 1 \n" - " b 1b \n"); + BUG_ON(cpu == 0); + wake_flag = cpu; +} + +void arc_platform_smp_wait_to_boot(int cpu) +{ + while (wake_flag != cpu) + ; + + wake_flag = 0; + __asm__ __volatile__("j @first_lines_of_secondary \n"); } + const char *arc_platform_smp_cpuinfo(void) { return plat_smp_ops.info ? : ""; @@ -129,8 +131,12 @@ void start_kernel_secondary(void) pr_info("## CPU%u LIVE ##: Executing Code...\n", cpu); - if (machine_desc->init_smp) - machine_desc->init_smp(cpu); + /* Some SMP H/w setup - for each cpu */ + if (plat_smp_ops.init_irq_cpu) + plat_smp_ops.init_irq_cpu(cpu); + + if (machine_desc->init_cpu_smp) + machine_desc->init_cpu_smp(cpu); arc_local_timer_setup(); @@ -161,6 +167,8 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) if (plat_smp_ops.cpu_kick) plat_smp_ops.cpu_kick(cpu, (unsigned long)first_lines_of_secondary); + else + arc_default_smp_cpu_kick(cpu, (unsigned long)NULL); /* wait for 1 sec after kicking the secondary */ wait_till = jiffies + HZ; diff --git a/arch/arc/kernel/time.c b/arch/arc/kernel/time.c index 4294761a2b3e..dfad287f1db1 100644 --- a/arch/arc/kernel/time.c +++ b/arch/arc/kernel/time.c @@ -285,7 +285,4 @@ void __init time_init(void) /* sets up the periodic event timer */ arc_local_timer_setup(); - - if (machine_desc->init_time) - machine_desc->init_time(); } diff --git a/arch/arc/kernel/vmlinux.lds.S b/arch/arc/kernel/vmlinux.lds.S index dd35bde39f69..894e696bddaa 100644 --- a/arch/arc/kernel/vmlinux.lds.S +++ b/arch/arc/kernel/vmlinux.lds.S @@ -12,7 +12,7 @@ #include <asm/thread_info.h> OUTPUT_ARCH(arc) -ENTRY(_stext) +ENTRY(res_service) #ifdef CONFIG_CPU_BIG_ENDIAN jiffies = jiffies_64 + 4; diff --git a/arch/arc/mm/Makefile b/arch/arc/mm/Makefile index 7beb941556c3..3703a4969349 100644 --- a/arch/arc/mm/Makefile +++ b/arch/arc/mm/Makefile @@ -8,3 +8,4 @@ obj-y := extable.o ioremap.o dma.o fault.o init.o obj-y += tlb.o tlbex.o cache.o mmap.o +obj-$(CONFIG_HIGHMEM) += highmem.o diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c index 0d1a6e96839f..ff7ff6cbb811 100644 --- a/arch/arc/mm/cache.c +++ b/arch/arc/mm/cache.c @@ -25,7 +25,7 @@ static int l2_line_sz; int ioc_exists; volatile int slc_enable = 1, ioc_enable = 1; -void (*_cache_line_loop_ic_fn)(unsigned long paddr, unsigned long vaddr, +void (*_cache_line_loop_ic_fn)(phys_addr_t paddr, unsigned long vaddr, unsigned long sz, const int cacheop); void (*__dma_cache_wback_inv)(unsigned long start, unsigned long sz); @@ -37,7 +37,6 @@ char *arc_cache_mumbojumbo(int c, char *buf, int len) int n = 0; struct cpuinfo_arc_cache *p; -#define IS_USED_RUN(v) ((v) ? "" : "(disabled) ") #define PR_CACHE(p, cfg, str) \ if (!(p)->ver) \ n += scnprintf(buf + n, len - n, str"\t\t: N/A\n"); \ @@ -47,7 +46,7 @@ char *arc_cache_mumbojumbo(int c, char *buf, int len) (p)->sz_k, (p)->assoc, (p)->line_len, \ (p)->vipt ? "VIPT" : "PIPT", \ (p)->alias ? " aliasing" : "", \ - IS_ENABLED(cfg) ? "" : " (not used)"); + IS_USED_CFG(cfg)); PR_CACHE(&cpuinfo_arc700[c].icache, CONFIG_ARC_HAS_ICACHE, "I-Cache"); PR_CACHE(&cpuinfo_arc700[c].dcache, CONFIG_ARC_HAS_DCACHE, "D-Cache"); @@ -63,7 +62,7 @@ char *arc_cache_mumbojumbo(int c, char *buf, int len) if (ioc_exists) n += scnprintf(buf + n, len - n, "IOC\t\t:%s\n", - IS_USED_RUN(ioc_enable)); + IS_DISABLED_RUN(ioc_enable)); return buf; } @@ -217,7 +216,7 @@ slc_chk: */ static inline -void __cache_line_loop_v2(unsigned long paddr, unsigned long vaddr, +void __cache_line_loop_v2(phys_addr_t paddr, unsigned long vaddr, unsigned long sz, const int op) { unsigned int aux_cmd; @@ -254,8 +253,12 @@ void __cache_line_loop_v2(unsigned long paddr, unsigned long vaddr, } } +/* + * For ARC700 MMUv3 I-cache and D-cache flushes + * Also reused for HS38 aliasing I-cache configuration + */ static inline -void __cache_line_loop_v3(unsigned long paddr, unsigned long vaddr, +void __cache_line_loop_v3(phys_addr_t paddr, unsigned long vaddr, unsigned long sz, const int op) { unsigned int aux_cmd, aux_tag; @@ -290,6 +293,16 @@ void __cache_line_loop_v3(unsigned long paddr, unsigned long vaddr, if (full_page) write_aux_reg(aux_tag, paddr); + /* + * This is technically for MMU v4, using the MMU v3 programming model + * Special work for HS38 aliasing I-cache configuratino with PAE40 + * - upper 8 bits of paddr need to be written into PTAG_HI + * - (and needs to be written before the lower 32 bits) + * Note that PTAG_HI is hoisted outside the line loop + */ + if (is_pae40_enabled() && op == OP_INV_IC) + write_aux_reg(ARC_REG_IC_PTAG_HI, (u64)paddr >> 32); + while (num_lines-- > 0) { if (!full_page) { write_aux_reg(aux_tag, paddr); @@ -302,14 +315,20 @@ void __cache_line_loop_v3(unsigned long paddr, unsigned long vaddr, } /* - * In HS38x (MMU v4), although icache is VIPT, only paddr is needed for cache - * maintenance ops (in IVIL reg), as long as icache doesn't alias. + * In HS38x (MMU v4), I-cache is VIPT (can alias), D-cache is PIPT + * Here's how cache ops are implemented + * + * - D-cache: only paddr needed (in DC_IVDL/DC_FLDL) + * - I-cache Non Aliasing: Despite VIPT, only paddr needed (in IC_IVIL) + * - I-cache Aliasing: Both vaddr and paddr needed (in IC_IVIL, IC_PTAG + * respectively, similar to MMU v3 programming model, hence + * __cache_line_loop_v3() is used) * - * For Aliasing icache, vaddr is also needed (in IVIL), while paddr is - * specified in PTAG (similar to MMU v3) + * If PAE40 is enabled, independent of aliasing considerations, the higher bits + * needs to be written into PTAG_HI */ static inline -void __cache_line_loop_v4(unsigned long paddr, unsigned long vaddr, +void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr, unsigned long sz, const int cacheop) { unsigned int aux_cmd; @@ -336,6 +355,22 @@ void __cache_line_loop_v4(unsigned long paddr, unsigned long vaddr, num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES); + /* + * For HS38 PAE40 configuration + * - upper 8 bits of paddr need to be written into PTAG_HI + * - (and needs to be written before the lower 32 bits) + */ + if (is_pae40_enabled()) { + if (cacheop == OP_INV_IC) + /* + * Non aliasing I-cache in HS38, + * aliasing I-cache handled in __cache_line_loop_v3() + */ + write_aux_reg(ARC_REG_IC_PTAG_HI, (u64)paddr >> 32); + else + write_aux_reg(ARC_REG_DC_PTAG_HI, (u64)paddr >> 32); + } + while (num_lines-- > 0) { write_aux_reg(aux_cmd, paddr); paddr += L1_CACHE_BYTES; @@ -413,7 +448,7 @@ static inline void __dc_entire_op(const int op) /* * D-Cache Line ops: Per Line INV (discard or wback+discard) or FLUSH (wback) */ -static inline void __dc_line_op(unsigned long paddr, unsigned long vaddr, +static inline void __dc_line_op(phys_addr_t paddr, unsigned long vaddr, unsigned long sz, const int op) { unsigned long flags; @@ -446,7 +481,7 @@ static inline void __ic_entire_inv(void) } static inline void -__ic_line_inv_vaddr_local(unsigned long paddr, unsigned long vaddr, +__ic_line_inv_vaddr_local(phys_addr_t paddr, unsigned long vaddr, unsigned long sz) { unsigned long flags; @@ -463,7 +498,7 @@ __ic_line_inv_vaddr_local(unsigned long paddr, unsigned long vaddr, #else struct ic_inv_args { - unsigned long paddr, vaddr; + phys_addr_t paddr, vaddr; int sz; }; @@ -474,7 +509,7 @@ static void __ic_line_inv_vaddr_helper(void *info) __ic_line_inv_vaddr_local(ic_inv->paddr, ic_inv->vaddr, ic_inv->sz); } -static void __ic_line_inv_vaddr(unsigned long paddr, unsigned long vaddr, +static void __ic_line_inv_vaddr(phys_addr_t paddr, unsigned long vaddr, unsigned long sz) { struct ic_inv_args ic_inv = { @@ -495,7 +530,7 @@ static void __ic_line_inv_vaddr(unsigned long paddr, unsigned long vaddr, #endif /* CONFIG_ARC_HAS_ICACHE */ -noinline void slc_op(unsigned long paddr, unsigned long sz, const int op) +noinline void slc_op(phys_addr_t paddr, unsigned long sz, const int op) { #ifdef CONFIG_ISA_ARCV2 /* @@ -585,7 +620,7 @@ void flush_dcache_page(struct page *page) } else if (page_mapped(page)) { /* kernel reading from page with U-mapping */ - unsigned long paddr = (unsigned long)page_address(page); + phys_addr_t paddr = (unsigned long)page_address(page); unsigned long vaddr = page->index << PAGE_CACHE_SHIFT; if (addr_not_cache_congruent(paddr, vaddr)) @@ -733,14 +768,14 @@ EXPORT_SYMBOL(flush_icache_range); * builtin kernel page will not have any virtual mappings. * kprobe on loadable module will be kernel vaddr. */ -void __sync_icache_dcache(unsigned long paddr, unsigned long vaddr, int len) +void __sync_icache_dcache(phys_addr_t paddr, unsigned long vaddr, int len) { __dc_line_op(paddr, vaddr, len, OP_FLUSH_N_INV); __ic_line_inv_vaddr(paddr, vaddr, len); } /* wrapper to compile time eliminate alignment checks in flush loop */ -void __inv_icache_page(unsigned long paddr, unsigned long vaddr) +void __inv_icache_page(phys_addr_t paddr, unsigned long vaddr) { __ic_line_inv_vaddr(paddr, vaddr, PAGE_SIZE); } @@ -749,7 +784,7 @@ void __inv_icache_page(unsigned long paddr, unsigned long vaddr) * wrapper to clearout kernel or userspace mappings of a page * For kernel mappings @vaddr == @paddr */ -void __flush_dcache_page(unsigned long paddr, unsigned long vaddr) +void __flush_dcache_page(phys_addr_t paddr, unsigned long vaddr) { __dc_line_op(paddr, vaddr & PAGE_MASK, PAGE_SIZE, OP_FLUSH_N_INV); } @@ -807,8 +842,8 @@ void flush_anon_page(struct vm_area_struct *vma, struct page *page, void copy_user_highpage(struct page *to, struct page *from, unsigned long u_vaddr, struct vm_area_struct *vma) { - unsigned long kfrom = (unsigned long)page_address(from); - unsigned long kto = (unsigned long)page_address(to); + void *kfrom = kmap_atomic(from); + void *kto = kmap_atomic(to); int clean_src_k_mappings = 0; /* @@ -818,13 +853,16 @@ void copy_user_highpage(struct page *to, struct page *from, * * Note that while @u_vaddr refers to DST page's userspace vaddr, it is * equally valid for SRC page as well + * + * For !VIPT cache, all of this gets compiled out as + * addr_not_cache_congruent() is 0 */ if (page_mapped(from) && addr_not_cache_congruent(kfrom, u_vaddr)) { - __flush_dcache_page(kfrom, u_vaddr); + __flush_dcache_page((unsigned long)kfrom, u_vaddr); clean_src_k_mappings = 1; } - copy_page((void *)kto, (void *)kfrom); + copy_page(kto, kfrom); /* * Mark DST page K-mapping as dirty for a later finalization by @@ -841,11 +879,14 @@ void copy_user_highpage(struct page *to, struct page *from, * sync the kernel mapping back to physical page */ if (clean_src_k_mappings) { - __flush_dcache_page(kfrom, kfrom); + __flush_dcache_page((unsigned long)kfrom, (unsigned long)kfrom); set_bit(PG_dc_clean, &from->flags); } else { clear_bit(PG_dc_clean, &from->flags); } + + kunmap_atomic(kto); + kunmap_atomic(kfrom); } void clear_user_page(void *to, unsigned long u_vaddr, struct page *page) diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c index d948e4e9d89c..af63f4a13e60 100644 --- a/arch/arc/mm/fault.c +++ b/arch/arc/mm/fault.c @@ -18,7 +18,14 @@ #include <asm/pgalloc.h> #include <asm/mmu.h> -static int handle_vmalloc_fault(unsigned long address) +/* + * kernel virtual address is required to implement vmalloc/pkmap/fixmap + * Refer to asm/processor.h for System Memory Map + * + * It simply copies the PMD entry (pointer to 2nd level page table or hugepage) + * from swapper pgdir to task pgdir. The 2nd level table/page is thus shared + */ +noinline static int handle_kernel_vaddr_fault(unsigned long address) { /* * Synchronize this task's top level page-table @@ -72,8 +79,8 @@ void do_page_fault(unsigned long address, struct pt_regs *regs) * only copy the information from the master page table, * nothing more. */ - if (address >= VMALLOC_START && address <= VMALLOC_END) { - ret = handle_vmalloc_fault(address); + if (address >= VMALLOC_START) { + ret = handle_kernel_vaddr_fault(address); if (unlikely(ret)) goto bad_area_nosemaphore; else diff --git a/arch/arc/mm/highmem.c b/arch/arc/mm/highmem.c new file mode 100644 index 000000000000..065ee6bfa82a --- /dev/null +++ b/arch/arc/mm/highmem.c @@ -0,0 +1,140 @@ +/* + * Copyright (C) 2015 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include <linux/bootmem.h> +#include <linux/export.h> +#include <linux/highmem.h> +#include <asm/processor.h> +#include <asm/pgtable.h> +#include <asm/pgalloc.h> +#include <asm/tlbflush.h> + +/* + * HIGHMEM API: + * + * kmap() API provides sleep semantics hence refered to as "permanent maps" + * It allows mapping LAST_PKMAP pages, using @last_pkmap_nr as the cursor + * for book-keeping + * + * kmap_atomic() can't sleep (calls pagefault_disable()), thus it provides + * shortlived ala "temporary mappings" which historically were implemented as + * fixmaps (compile time addr etc). Their book-keeping is done per cpu. + * + * Both these facts combined (preemption disabled and per-cpu allocation) + * means the total number of concurrent fixmaps will be limited to max + * such allocations in a single control path. Thus KM_TYPE_NR (another + * historic relic) is a small'ish number which caps max percpu fixmaps + * + * ARC HIGHMEM Details + * + * - the kernel vaddr space from 0x7z to 0x8z (currently used by vmalloc/module) + * is now shared between vmalloc and kmap (non overlapping though) + * + * - Both fixmap/pkmap use a dedicated page table each, hooked up to swapper PGD + * This means each only has 1 PGDIR_SIZE worth of kvaddr mappings, which means + * 2M of kvaddr space for typical config (8K page and 11:8:13 traversal split) + * + * - fixmap anyhow needs a limited number of mappings. So 2M kvaddr == 256 PTE + * slots across NR_CPUS would be more than sufficient (generic code defines + * KM_TYPE_NR as 20). + * + * - pkmap being preemptible, in theory could do with more than 256 concurrent + * mappings. However, generic pkmap code: map_new_virtual(), doesn't traverse + * the PGD and only works with a single page table @pkmap_page_table, hence + * sets the limit + */ + +extern pte_t * pkmap_page_table; +static pte_t * fixmap_page_table; + +void *kmap(struct page *page) +{ + BUG_ON(in_interrupt()); + if (!PageHighMem(page)) + return page_address(page); + + return kmap_high(page); +} + +void *kmap_atomic(struct page *page) +{ + int idx, cpu_idx; + unsigned long vaddr; + + preempt_disable(); + pagefault_disable(); + if (!PageHighMem(page)) + return page_address(page); + + cpu_idx = kmap_atomic_idx_push(); + idx = cpu_idx + KM_TYPE_NR * smp_processor_id(); + vaddr = FIXMAP_ADDR(idx); + + set_pte_at(&init_mm, vaddr, fixmap_page_table + idx, + mk_pte(page, kmap_prot)); + + return (void *)vaddr; +} +EXPORT_SYMBOL(kmap_atomic); + +void __kunmap_atomic(void *kv) +{ + unsigned long kvaddr = (unsigned long)kv; + + if (kvaddr >= FIXMAP_BASE && kvaddr < (FIXMAP_BASE + FIXMAP_SIZE)) { + + /* + * Because preemption is disabled, this vaddr can be associated + * with the current allocated index. + * But in case of multiple live kmap_atomic(), it still relies on + * callers to unmap in right order. + */ + int cpu_idx = kmap_atomic_idx(); + int idx = cpu_idx + KM_TYPE_NR * smp_processor_id(); + + WARN_ON(kvaddr != FIXMAP_ADDR(idx)); + + pte_clear(&init_mm, kvaddr, fixmap_page_table + idx); + local_flush_tlb_kernel_range(kvaddr, kvaddr + PAGE_SIZE); + + kmap_atomic_idx_pop(); + } + + pagefault_enable(); + preempt_enable(); +} +EXPORT_SYMBOL(__kunmap_atomic); + +noinline pte_t *alloc_kmap_pgtable(unsigned long kvaddr) +{ + pgd_t *pgd_k; + pud_t *pud_k; + pmd_t *pmd_k; + pte_t *pte_k; + + pgd_k = pgd_offset_k(kvaddr); + pud_k = pud_offset(pgd_k, kvaddr); + pmd_k = pmd_offset(pud_k, kvaddr); + + pte_k = (pte_t *)alloc_bootmem_low_pages(PAGE_SIZE); + pmd_populate_kernel(&init_mm, pmd_k, pte_k); + return pte_k; +} + +void kmap_init(void) +{ + /* Due to recursive include hell, we can't do this in processor.h */ + BUILD_BUG_ON(PAGE_OFFSET < (VMALLOC_END + FIXMAP_SIZE + PKMAP_SIZE)); + + BUILD_BUG_ON(KM_TYPE_NR > PTRS_PER_PTE); + pkmap_page_table = alloc_kmap_pgtable(PKMAP_BASE); + + BUILD_BUG_ON(LAST_PKMAP > PTRS_PER_PTE); + fixmap_page_table = alloc_kmap_pgtable(FIXMAP_BASE); +} diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c index d44eedd8c322..a9305b5a2cd4 100644 --- a/arch/arc/mm/init.c +++ b/arch/arc/mm/init.c @@ -15,6 +15,7 @@ #endif #include <linux/swap.h> #include <linux/module.h> +#include <linux/highmem.h> #include <asm/page.h> #include <asm/pgalloc.h> #include <asm/sections.h> @@ -24,16 +25,22 @@ pgd_t swapper_pg_dir[PTRS_PER_PGD] __aligned(PAGE_SIZE); char empty_zero_page[PAGE_SIZE] __aligned(PAGE_SIZE); EXPORT_SYMBOL(empty_zero_page); -/* Default tot mem from .config */ -static unsigned long arc_mem_sz = 0x20000000; /* some default */ +static const unsigned long low_mem_start = CONFIG_LINUX_LINK_BASE; +static unsigned long low_mem_sz; + +#ifdef CONFIG_HIGHMEM +static unsigned long min_high_pfn; +static u64 high_mem_start; +static u64 high_mem_sz; +#endif /* User can over-ride above with "mem=nnn[KkMm]" in cmdline */ static int __init setup_mem_sz(char *str) { - arc_mem_sz = memparse(str, NULL) & PAGE_MASK; + low_mem_sz = memparse(str, NULL) & PAGE_MASK; /* early console might not be setup yet - it will show up later */ - pr_info("\"mem=%s\": mem sz set to %ldM\n", str, TO_MB(arc_mem_sz)); + pr_info("\"mem=%s\": mem sz set to %ldM\n", str, TO_MB(low_mem_sz)); return 0; } @@ -41,8 +48,22 @@ early_param("mem", setup_mem_sz); void __init early_init_dt_add_memory_arch(u64 base, u64 size) { - arc_mem_sz = size & PAGE_MASK; - pr_info("Memory size set via devicetree %ldM\n", TO_MB(arc_mem_sz)); + int in_use = 0; + + if (!low_mem_sz) { + BUG_ON(base != low_mem_start); + low_mem_sz = size; + in_use = 1; + } else { +#ifdef CONFIG_HIGHMEM + high_mem_start = base; + high_mem_sz = size; + in_use = 1; +#endif + } + + pr_info("Memory @ %llx [%lldM] %s\n", + base, TO_MB(size), !in_use ? "Not used":""); } #ifdef CONFIG_BLK_DEV_INITRD @@ -72,46 +93,62 @@ early_param("initrd", early_initrd); void __init setup_arch_memory(void) { unsigned long zones_size[MAX_NR_ZONES]; - unsigned long end_mem = CONFIG_LINUX_LINK_BASE + arc_mem_sz; + unsigned long zones_holes[MAX_NR_ZONES]; init_mm.start_code = (unsigned long)_text; init_mm.end_code = (unsigned long)_etext; init_mm.end_data = (unsigned long)_edata; init_mm.brk = (unsigned long)_end; - /* - * We do it here, so that memory is correctly instantiated - * even if "mem=xxx" cmline over-ride is given and/or - * DT has memory node. Each causes an update to @arc_mem_sz - * and we finally add memory one here - */ - memblock_add(CONFIG_LINUX_LINK_BASE, arc_mem_sz); - - /*------------- externs in mm need setting up ---------------*/ - /* first page of system - kernel .vector starts here */ min_low_pfn = ARCH_PFN_OFFSET; - /* Last usable page of low mem (no HIGHMEM yet for ARC port) */ - max_low_pfn = max_pfn = PFN_DOWN(end_mem); + /* Last usable page of low mem */ + max_low_pfn = max_pfn = PFN_DOWN(low_mem_start + low_mem_sz); - max_mapnr = max_low_pfn - min_low_pfn; +#ifdef CONFIG_HIGHMEM + min_high_pfn = PFN_DOWN(high_mem_start); + max_pfn = PFN_DOWN(high_mem_start + high_mem_sz); +#endif + + max_mapnr = max_pfn - min_low_pfn; - /*------------- reserve kernel image -----------------------*/ - memblock_reserve(CONFIG_LINUX_LINK_BASE, - __pa(_end) - CONFIG_LINUX_LINK_BASE); + /*------------- bootmem allocator setup -----------------------*/ + + /* + * seed the bootmem allocator after any DT memory node parsing or + * "mem=xxx" cmdline overrides have potentially updated @arc_mem_sz + * + * Only low mem is added, otherwise we have crashes when allocating + * mem_map[] itself. NO_BOOTMEM allocates mem_map[] at the end of + * avail memory, ending in highmem with a > 32-bit address. However + * it then tries to memset it with a truncaed 32-bit handle, causing + * the crash + */ + + memblock_add(low_mem_start, low_mem_sz); + memblock_reserve(low_mem_start, __pa(_end) - low_mem_start); #ifdef CONFIG_BLK_DEV_INITRD - /*------------- reserve initrd image -----------------------*/ if (initrd_start) memblock_reserve(__pa(initrd_start), initrd_end - initrd_start); #endif memblock_dump_all(); - /*-------------- node setup --------------------------------*/ + /*----------------- node/zones setup --------------------------*/ memset(zones_size, 0, sizeof(zones_size)); - zones_size[ZONE_NORMAL] = max_mapnr; + memset(zones_holes, 0, sizeof(zones_holes)); + + zones_size[ZONE_NORMAL] = max_low_pfn - min_low_pfn; + zones_holes[ZONE_NORMAL] = 0; + +#ifdef CONFIG_HIGHMEM + zones_size[ZONE_HIGHMEM] = max_pfn - max_low_pfn; + + /* This handles the peripheral address space hole */ + zones_holes[ZONE_HIGHMEM] = min_high_pfn - max_low_pfn; +#endif /* * We can't use the helper free_area_init(zones[]) because it uses @@ -122,9 +159,12 @@ void __init setup_arch_memory(void) free_area_init_node(0, /* node-id */ zones_size, /* num pages per zone */ min_low_pfn, /* first pfn of node */ - NULL); /* NO holes */ + zones_holes); /* holes */ - high_memory = (void *)end_mem; +#ifdef CONFIG_HIGHMEM + high_memory = (void *)(min_high_pfn << PAGE_SHIFT); + kmap_init(); +#endif } /* @@ -135,6 +175,14 @@ void __init setup_arch_memory(void) */ void __init mem_init(void) { +#ifdef CONFIG_HIGHMEM + unsigned long tmp; + + reset_all_zones_managed_pages(); + for (tmp = min_high_pfn; tmp < max_pfn; tmp++) + free_highmem_page(pfn_to_page(tmp)); +#endif + free_all_bootmem(); mem_init_print_info(NULL); } diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c index 2c7ce8bb7475..0ee739846847 100644 --- a/arch/arc/mm/tlb.c +++ b/arch/arc/mm/tlb.c @@ -109,6 +109,10 @@ DEFINE_PER_CPU(unsigned int, asid_cache) = MM_CTXT_FIRST_CYCLE; static inline void __tlb_entry_erase(void) { write_aux_reg(ARC_REG_TLBPD1, 0); + + if (is_pae40_enabled()) + write_aux_reg(ARC_REG_TLBPD1HI, 0); + write_aux_reg(ARC_REG_TLBPD0, 0); write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite); } @@ -182,7 +186,7 @@ static void utlb_invalidate(void) } -static void tlb_entry_insert(unsigned int pd0, unsigned int pd1) +static void tlb_entry_insert(unsigned int pd0, pte_t pd1) { unsigned int idx; @@ -225,10 +229,14 @@ static void tlb_entry_erase(unsigned int vaddr_n_asid) write_aux_reg(ARC_REG_TLBCOMMAND, TLBDeleteEntry); } -static void tlb_entry_insert(unsigned int pd0, unsigned int pd1) +static void tlb_entry_insert(unsigned int pd0, pte_t pd1) { write_aux_reg(ARC_REG_TLBPD0, pd0); write_aux_reg(ARC_REG_TLBPD1, pd1); + + if (is_pae40_enabled()) + write_aux_reg(ARC_REG_TLBPD1HI, (u64)pd1 >> 32); + write_aux_reg(ARC_REG_TLBCOMMAND, TLBInsertEntry); } @@ -240,22 +248,39 @@ static void tlb_entry_insert(unsigned int pd0, unsigned int pd1) noinline void local_flush_tlb_all(void) { + struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu; unsigned long flags; unsigned int entry; - struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu; + int num_tlb = mmu->sets * mmu->ways; local_irq_save(flags); /* Load PD0 and PD1 with template for a Blank Entry */ write_aux_reg(ARC_REG_TLBPD1, 0); + + if (is_pae40_enabled()) + write_aux_reg(ARC_REG_TLBPD1HI, 0); + write_aux_reg(ARC_REG_TLBPD0, 0); - for (entry = 0; entry < mmu->num_tlb; entry++) { + for (entry = 0; entry < num_tlb; entry++) { /* write this entry to the TLB */ write_aux_reg(ARC_REG_TLBINDEX, entry); write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite); } + if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) { + const int stlb_idx = 0x800; + + /* Blank sTLB entry */ + write_aux_reg(ARC_REG_TLBPD0, _PAGE_HW_SZ); + + for (entry = stlb_idx; entry < stlb_idx + 16; entry++) { + write_aux_reg(ARC_REG_TLBINDEX, entry); + write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite); + } + } + utlb_invalidate(); local_irq_restore(flags); @@ -409,6 +434,15 @@ static inline void ipi_flush_tlb_range(void *arg) local_flush_tlb_range(ta->ta_vma, ta->ta_start, ta->ta_end); } +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +static inline void ipi_flush_pmd_tlb_range(void *arg) +{ + struct tlb_args *ta = arg; + + local_flush_pmd_tlb_range(ta->ta_vma, ta->ta_start, ta->ta_end); +} +#endif + static inline void ipi_flush_tlb_kernel_range(void *arg) { struct tlb_args *ta = (struct tlb_args *)arg; @@ -449,6 +483,20 @@ void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, on_each_cpu_mask(mm_cpumask(vma->vm_mm), ipi_flush_tlb_range, &ta, 1); } +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end) +{ + struct tlb_args ta = { + .ta_vma = vma, + .ta_start = start, + .ta_end = end + }; + + on_each_cpu_mask(mm_cpumask(vma->vm_mm), ipi_flush_pmd_tlb_range, &ta, 1); +} +#endif + void flush_tlb_kernel_range(unsigned long start, unsigned long end) { struct tlb_args ta = { @@ -463,11 +511,12 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end) /* * Routine to create a TLB entry */ -void create_tlb(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) +void create_tlb(struct vm_area_struct *vma, unsigned long vaddr, pte_t *ptep) { unsigned long flags; unsigned int asid_or_sasid, rwx; - unsigned long pd0, pd1; + unsigned long pd0; + pte_t pd1; /* * create_tlb() assumes that current->mm == vma->mm, since @@ -499,9 +548,9 @@ void create_tlb(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) local_irq_save(flags); - tlb_paranoid_check(asid_mm(vma->vm_mm, smp_processor_id()), address); + tlb_paranoid_check(asid_mm(vma->vm_mm, smp_processor_id()), vaddr); - address &= PAGE_MASK; + vaddr &= PAGE_MASK; /* update this PTE credentials */ pte_val(*ptep) |= (_PAGE_PRESENT | _PAGE_ACCESSED); @@ -511,7 +560,7 @@ void create_tlb(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) /* ASID for this task */ asid_or_sasid = read_aux_reg(ARC_REG_PID) & 0xff; - pd0 = address | asid_or_sasid | (pte_val(*ptep) & PTE_BITS_IN_PD0); + pd0 = vaddr | asid_or_sasid | (pte_val(*ptep) & PTE_BITS_IN_PD0); /* * ARC MMU provides fully orthogonal access bits for K/U mode, @@ -547,7 +596,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long vaddr_unaligned, pte_t *ptep) { unsigned long vaddr = vaddr_unaligned & PAGE_MASK; - unsigned long paddr = pte_val(*ptep) & PAGE_MASK; + phys_addr_t paddr = pte_val(*ptep) & PAGE_MASK; struct page *page = pfn_to_page(pte_pfn(*ptep)); create_tlb(vma, vaddr, ptep); @@ -580,6 +629,95 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long vaddr_unaligned, } } +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + +/* + * MMUv4 in HS38x cores supports Super Pages which are basis for Linux THP + * support. + * + * Normal and Super pages can co-exist (ofcourse not overlap) in TLB with a + * new bit "SZ" in TLB page desciptor to distinguish between them. + * Super Page size is configurable in hardware (4K to 16M), but fixed once + * RTL builds. + * + * The exact THP size a Linx configuration will support is a function of: + * - MMU page size (typical 8K, RTL fixed) + * - software page walker address split between PGD:PTE:PFN (typical + * 11:8:13, but can be changed with 1 line) + * So for above default, THP size supported is 8K * (2^8) = 2M + * + * Default Page Walker is 2 levels, PGD:PTE:PFN, which in THP regime + * reduces to 1 level (as PTE is folded into PGD and canonically referred + * to as PMD). + * Thus THP PMD accessors are implemented in terms of PTE (just like sparc) + */ + +void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr, + pmd_t *pmd) +{ + pte_t pte = __pte(pmd_val(*pmd)); + update_mmu_cache(vma, addr, &pte); +} + +void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, + pgtable_t pgtable) +{ + struct list_head *lh = (struct list_head *) pgtable; + + assert_spin_locked(&mm->page_table_lock); + + /* FIFO */ + if (!pmd_huge_pte(mm, pmdp)) + INIT_LIST_HEAD(lh); + else + list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp)); + pmd_huge_pte(mm, pmdp) = pgtable; +} + +pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) +{ + struct list_head *lh; + pgtable_t pgtable; + + assert_spin_locked(&mm->page_table_lock); + + pgtable = pmd_huge_pte(mm, pmdp); + lh = (struct list_head *) pgtable; + if (list_empty(lh)) + pmd_huge_pte(mm, pmdp) = NULL; + else { + pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next; + list_del(lh); + } + + pte_val(pgtable[0]) = 0; + pte_val(pgtable[1]) = 0; + + return pgtable; +} + +void local_flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end) +{ + unsigned int cpu; + unsigned long flags; + + local_irq_save(flags); + + cpu = smp_processor_id(); + + if (likely(asid_mm(vma->vm_mm, cpu) != MM_CTXT_NO_ASID)) { + unsigned int asid = hw_pid(vma->vm_mm, cpu); + + /* No need to loop here: this will always be for 1 Huge Page */ + tlb_entry_erase(start | _PAGE_HW_SZ | asid); + } + + local_irq_restore(flags); +} + +#endif + /* Read the Cache Build Confuration Registers, Decode them and save into * the cpuinfo structure for later use. * No Validation is done here, simply read/convert the BCRs @@ -598,10 +736,10 @@ void read_decode_mmu_bcr(void) struct bcr_mmu_3 { #ifdef CONFIG_CPU_BIG_ENDIAN - unsigned int ver:8, ways:4, sets:4, osm:1, reserv:3, pg_sz:4, + unsigned int ver:8, ways:4, sets:4, res:3, sasid:1, pg_sz:4, u_itlb:4, u_dtlb:4; #else - unsigned int u_dtlb:4, u_itlb:4, pg_sz:4, reserv:3, osm:1, sets:4, + unsigned int u_dtlb:4, u_itlb:4, pg_sz:4, sasid:1, res:3, sets:4, ways:4, ver:8; #endif } *mmu3; @@ -622,7 +760,7 @@ void read_decode_mmu_bcr(void) if (mmu->ver <= 2) { mmu2 = (struct bcr_mmu_1_2 *)&tmp; - mmu->pg_sz_k = TO_KB(PAGE_SIZE); + mmu->pg_sz_k = TO_KB(0x2000); mmu->sets = 1 << mmu2->sets; mmu->ways = 1 << mmu2->ways; mmu->u_dtlb = mmu2->u_dtlb; @@ -634,6 +772,7 @@ void read_decode_mmu_bcr(void) mmu->ways = 1 << mmu3->ways; mmu->u_dtlb = mmu3->u_dtlb; mmu->u_itlb = mmu3->u_itlb; + mmu->sasid = mmu3->sasid; } else { mmu4 = (struct bcr_mmu_4 *)&tmp; mmu->pg_sz_k = 1 << (mmu4->sz0 - 1); @@ -642,9 +781,9 @@ void read_decode_mmu_bcr(void) mmu->ways = mmu4->n_ways * 2; mmu->u_dtlb = mmu4->u_dtlb * 4; mmu->u_itlb = mmu4->u_itlb * 4; + mmu->sasid = mmu4->sasid; + mmu->pae = mmu4->pae; } - - mmu->num_tlb = mmu->sets * mmu->ways; } char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len) @@ -655,14 +794,15 @@ char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len) if (p_mmu->s_pg_sz_m) scnprintf(super_pg, 64, "%dM Super Page%s, ", - p_mmu->s_pg_sz_m, " (not used)"); + p_mmu->s_pg_sz_m, + IS_USED_CFG(CONFIG_TRANSPARENT_HUGEPAGE)); n += scnprintf(buf + n, len - n, - "MMU [v%x]\t: %dk PAGE, %sJTLB %d (%dx%d), uDTLB %d, uITLB %d %s\n", + "MMU [v%x]\t: %dk PAGE, %sJTLB %d (%dx%d), uDTLB %d, uITLB %d %s%s\n", p_mmu->ver, p_mmu->pg_sz_k, super_pg, - p_mmu->num_tlb, p_mmu->sets, p_mmu->ways, + p_mmu->sets * p_mmu->ways, p_mmu->sets, p_mmu->ways, p_mmu->u_dtlb, p_mmu->u_itlb, - IS_ENABLED(CONFIG_ARC_MMU_SASID) ? ",SASID" : ""); + IS_AVAIL2(p_mmu->pae, "PAE40 ", CONFIG_ARC_HAS_PAE40)); return buf; } @@ -690,6 +830,14 @@ void arc_mmu_init(void) if (mmu->pg_sz_k != TO_KB(PAGE_SIZE)) panic("MMU pg size != PAGE_SIZE (%luk)\n", TO_KB(PAGE_SIZE)); + if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && + mmu->s_pg_sz_m != TO_MB(HPAGE_PMD_SIZE)) + panic("MMU Super pg size != Linux HPAGE_PMD_SIZE (%luM)\n", + (unsigned long)TO_MB(HPAGE_PMD_SIZE)); + + if (IS_ENABLED(CONFIG_ARC_HAS_PAE40) && !mmu->pae) + panic("Hardware doesn't support PAE40\n"); + /* Enable the MMU */ write_aux_reg(ARC_REG_PID, MMU_ENABLE); @@ -725,15 +873,15 @@ void arc_mmu_init(void) * the duplicate one. * -Knob to be verbose abt it.(TODO: hook them up to debugfs) */ -volatile int dup_pd_verbose = 1;/* Be slient abt it or complain (default) */ +volatile int dup_pd_silent; /* Be slient abt it or complain (default) */ void do_tlb_overlap_fault(unsigned long cause, unsigned long address, struct pt_regs *regs) { - int set, way, n; - unsigned long flags, is_valid; struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu; - unsigned int pd0[mmu->ways], pd1[mmu->ways]; + unsigned int pd0[mmu->ways]; + unsigned long flags; + int set; local_irq_save(flags); @@ -743,14 +891,16 @@ void do_tlb_overlap_fault(unsigned long cause, unsigned long address, /* loop thru all sets of TLB */ for (set = 0; set < mmu->sets; set++) { + int is_valid, way; + /* read out all the ways of current set */ for (way = 0, is_valid = 0; way < mmu->ways; way++) { write_aux_reg(ARC_REG_TLBINDEX, SET_WAY_TO_IDX(mmu, set, way)); write_aux_reg(ARC_REG_TLBCOMMAND, TLBRead); pd0[way] = read_aux_reg(ARC_REG_TLBPD0); - pd1[way] = read_aux_reg(ARC_REG_TLBPD1); is_valid |= pd0[way] & _PAGE_PRESENT; + pd0[way] &= PAGE_MASK; } /* If all the WAYS in SET are empty, skip to next SET */ @@ -759,30 +909,28 @@ void do_tlb_overlap_fault(unsigned long cause, unsigned long address, /* Scan the set for duplicate ways: needs a nested loop */ for (way = 0; way < mmu->ways - 1; way++) { + + int n; + if (!pd0[way]) continue; for (n = way + 1; n < mmu->ways; n++) { - if ((pd0[way] & PAGE_MASK) == - (pd0[n] & PAGE_MASK)) { - - if (dup_pd_verbose) { - pr_info("Duplicate PD's @" - "[%d:%d]/[%d:%d]\n", - set, way, set, n); - pr_info("TLBPD0[%u]: %08x\n", - way, pd0[way]); - } - - /* - * clear entry @way and not @n. This is - * critical to our optimised loop - */ - pd0[way] = pd1[way] = 0; - write_aux_reg(ARC_REG_TLBINDEX, + if (pd0[way] != pd0[n]) + continue; + + if (!dup_pd_silent) + pr_info("Dup TLB PD0 %08x @ set %d ways %d,%d\n", + pd0[way], set, way, n); + + /* + * clear entry @way and not @n. + * This is critical to our optimised loop + */ + pd0[way] = 0; + write_aux_reg(ARC_REG_TLBINDEX, SET_WAY_TO_IDX(mmu, set, way)); - __tlb_entry_erase(); - } + __tlb_entry_erase(); } } } diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S index f6f4c3cb505d..63860adc4814 100644 --- a/arch/arc/mm/tlbex.S +++ b/arch/arc/mm/tlbex.S @@ -205,20 +205,38 @@ ex_saved_reg1: #endif lsr r0, r2, PGDIR_SHIFT ; Bits for indexing into PGD - ld.as r1, [r1, r0] ; PGD entry corresp to faulting addr - and.f r1, r1, PAGE_MASK ; Ignoring protection and other flags - ; contains Ptr to Page Table - bz.d do_slow_path_pf ; if no Page Table, do page fault + ld.as r3, [r1, r0] ; PGD entry corresp to faulting addr + tst r3, r3 + bz do_slow_path_pf ; if no Page Table, do page fault + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + and.f 0, r3, _PAGE_HW_SZ ; Is this Huge PMD (thp) + add2.nz r1, r1, r0 + bnz.d 2f ; YES: PGD == PMD has THP PTE: stop pgd walk + mov.nz r0, r3 + +#endif + and r1, r3, PAGE_MASK ; Get the PTE entry: The idea is ; (1) x = addr >> PAGE_SHIFT -> masks page-off bits from @fault-addr ; (2) y = x & (PTRS_PER_PTE - 1) -> to get index - ; (3) z = pgtbl[y] - ; To avoid the multiply by in end, we do the -2, <<2 below + ; (3) z = (pgtbl + y * 4) + +#ifdef CONFIG_ARC_HAS_PAE40 +#define PTE_SIZE_LOG 3 /* 8 == 2 ^ 3 */ +#else +#define PTE_SIZE_LOG 2 /* 4 == 2 ^ 2 */ +#endif + + ; multiply in step (3) above avoided by shifting lesser in step (1) + lsr r0, r2, ( PAGE_SHIFT - PTE_SIZE_LOG ) + and r0, r0, ( (PTRS_PER_PTE - 1) << PTE_SIZE_LOG ) + ld.aw r0, [r1, r0] ; r0: PTE (lower word only for PAE40) + ; r1: PTE ptr + +2: - lsr r0, r2, (PAGE_SHIFT - 2) - and r0, r0, ( (PTRS_PER_PTE - 1) << 2) - ld.aw r0, [r1, r0] ; get PTE and PTE ptr for fault addr #ifdef CONFIG_ARC_DBG_TLB_MISS_COUNT and.f 0, r0, _PAGE_PRESENT bz 1f @@ -233,18 +251,23 @@ ex_saved_reg1: ;----------------------------------------------------------------- ; Convert Linux PTE entry into TLB entry ; A one-word PTE entry is programmed as two-word TLB Entry [PD0:PD1] in mmu +; (for PAE40, two-words PTE, while three-word TLB Entry [PD0:PD1:PD1HI]) ; IN: r0 = PTE, r1 = ptr to PTE .macro CONV_PTE_TO_TLB - and r3, r0, PTE_BITS_RWX ; r w x - lsl r2, r3, 3 ; r w x 0 0 0 (GLOBAL, kernel only) + and r3, r0, PTE_BITS_RWX ; r w x + lsl r2, r3, 3 ; Kr Kw Kx 0 0 0 (GLOBAL, kernel only) and.f 0, r0, _PAGE_GLOBAL - or.z r2, r2, r3 ; r w x r w x (!GLOBAL, user page) + or.z r2, r2, r3 ; Kr Kw Kx Ur Uw Ux (!GLOBAL, user page) and r3, r0, PTE_BITS_NON_RWX_IN_PD1 ; Extract PFN+cache bits from PTE or r3, r3, r2 - sr r3, [ARC_REG_TLBPD1] ; these go in PD1 + sr r3, [ARC_REG_TLBPD1] ; paddr[31..13] | Kr Kw Kx Ur Uw Ux | C +#ifdef CONFIG_ARC_HAS_PAE40 + ld r3, [r1, 4] ; paddr[39..32] + sr r3, [ARC_REG_TLBPD1HI] +#endif and r2, r0, PTE_BITS_IN_PD0 ; Extract other PTE flags: (V)alid, (G)lb @@ -365,7 +388,7 @@ ENTRY(EV_TLBMissD) lr r3, [ecr] or r0, r0, _PAGE_ACCESSED ; Accessed bit always btst_s r3, ECR_C_BIT_DTLB_ST_MISS ; See if it was a Write Access ? - or.nz r0, r0, _PAGE_MODIFIED ; if Write, set Dirty bit as well + or.nz r0, r0, _PAGE_DIRTY ; if Write, set Dirty bit as well st_s r0, [r1] ; Write back PTE CONV_PTE_TO_TLB diff --git a/arch/arc/plat-axs10x/axs10x.c b/arch/arc/plat-axs10x/axs10x.c index 0a77b19e1df8..1b0f0f458a2b 100644 --- a/arch/arc/plat-axs10x/axs10x.c +++ b/arch/arc/plat-axs10x/axs10x.c @@ -455,11 +455,6 @@ static void __init axs103_early_init(void) axs10x_print_board_ver(AXC003_CREG + 4088, "AXC003 CPU Card"); axs10x_early_init(); - -#ifdef CONFIG_ARC_MCIP - /* No Hardware init, but filling the smp ops callbacks */ - mcip_init_early_smp(); -#endif } #endif @@ -487,9 +482,6 @@ static const char *axs103_compat[] __initconst = { MACHINE_START(AXS103, "axs103") .dt_compat = axs103_compat, .init_early = axs103_early_init, -#ifdef CONFIG_ARC_MCIP - .init_smp = mcip_init_smp, -#endif MACHINE_END /* diff --git a/arch/arc/plat-sim/platform.c b/arch/arc/plat-sim/platform.c index d9e35b4a2f08..dde692812bc1 100644 --- a/arch/arc/plat-sim/platform.c +++ b/arch/arc/plat-sim/platform.c @@ -30,8 +30,4 @@ static const char *simulation_compat[] __initconst = { MACHINE_START(SIMULATION, "simulation") .dt_compat = simulation_compat, -#ifdef CONFIG_ARC_MCIP - .init_early = mcip_init_early_smp, - .init_smp = mcip_init_smp, -#endif MACHINE_END diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 72ad724c67ae..f1ed1109f488 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -645,6 +645,7 @@ config ARCH_SHMOBILE_LEGACY config ARCH_RPC bool "RiscPC" + depends on MMU select ARCH_ACORN select ARCH_MAY_HAVE_PC_FDC select ARCH_SPARSEMEM_ENABLE @@ -819,6 +820,7 @@ config ARCH_VIRT bool "Dummy Virtual Machine" if ARCH_MULTI_V7 select ARM_AMBA select ARM_GIC + select ARM_GIC_V3 select ARM_PSCI select HAVE_ARM_ARCH_TIMER @@ -1410,7 +1412,6 @@ config HAVE_ARM_ARCH_TIMER config HAVE_ARM_TWD bool - depends on SMP select CLKSRC_OF if OF help This options enables support for the ARM timer and watchdog unit @@ -1470,6 +1471,8 @@ choice config VMSPLIT_3G bool "3G/1G user/kernel split" + config VMSPLIT_3G_OPT + bool "3G/1G user/kernel split (for full 1G low memory)" config VMSPLIT_2G bool "2G/2G user/kernel split" config VMSPLIT_1G @@ -1481,6 +1484,7 @@ config PAGE_OFFSET default PHYS_OFFSET if !MMU default 0x40000000 if VMSPLIT_1G default 0x80000000 if VMSPLIT_2G + default 0xB0000000 if VMSPLIT_3G_OPT default 0xC0000000 config NR_CPUS @@ -1695,8 +1699,9 @@ config HIGHMEM If unsure, say n. config HIGHPTE - bool "Allocate 2nd-level pagetables from highmem" + bool "Allocate 2nd-level pagetables from highmem" if EXPERT depends on HIGHMEM + default y help The VM uses one page of physical memory for each page table. For systems with a lot of processes, this can use a lot of diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile index 233159d2eaab..bb8fa023d574 100644 --- a/arch/arm/boot/dts/Makefile +++ b/arch/arm/boot/dts/Makefile @@ -578,7 +578,7 @@ dtb-$(CONFIG_MACH_SUN4I) += \ sun4i-a10-hackberry.dtb \ sun4i-a10-hyundai-a7hd.dtb \ sun4i-a10-inet97fv2.dtb \ - sun4i-a10-itead-iteaduino-plus.dts \ + sun4i-a10-itead-iteaduino-plus.dtb \ sun4i-a10-jesurun-q5.dtb \ sun4i-a10-marsboard.dtb \ sun4i-a10-mini-xplus.dtb \ diff --git a/arch/arm/boot/dts/am4372.dtsi b/arch/arm/boot/dts/am4372.dtsi index 0447c04a40cc..d83ff9c9701e 100644 --- a/arch/arm/boot/dts/am4372.dtsi +++ b/arch/arm/boot/dts/am4372.dtsi @@ -591,6 +591,7 @@ cpts_clock_mult = <0x80000000>; cpts_clock_shift = <29>; ranges; + syscon = <&scm_conf>; davinci_mdio: mdio@4a101000 { compatible = "ti,am4372-mdio","ti,davinci_mdio"; diff --git a/arch/arm/boot/dts/am57xx-beagle-x15.dts b/arch/arm/boot/dts/am57xx-beagle-x15.dts index 568adf5efde0..d55e3ea89fda 100644 --- a/arch/arm/boot/dts/am57xx-beagle-x15.dts +++ b/arch/arm/boot/dts/am57xx-beagle-x15.dts @@ -402,11 +402,12 @@ /* SMPS9 unused */ ldo1_reg: ldo1 { - /* VDD_SD */ + /* VDD_SD / VDDSHV8 */ regulator-name = "ldo1"; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <3300000>; regulator-boot-on; + regulator-always-on; }; ldo2_reg: ldo2 { diff --git a/arch/arm/boot/dts/armada-385-db-ap.dts b/arch/arm/boot/dts/armada-385-db-ap.dts index 89f5a95954ed..4047621b137e 100644 --- a/arch/arm/boot/dts/armada-385-db-ap.dts +++ b/arch/arm/boot/dts/armada-385-db-ap.dts @@ -46,7 +46,7 @@ / { model = "Marvell Armada 385 Access Point Development Board"; - compatible = "marvell,a385-db-ap", "marvell,armada385", "marvell,armada38x"; + compatible = "marvell,a385-db-ap", "marvell,armada385", "marvell,armada380"; chosen { stdout-path = "serial1:115200n8"; diff --git a/arch/arm/boot/dts/berlin2q.dtsi b/arch/arm/boot/dts/berlin2q.dtsi index 63a48490e2f9..d4dbd28d348c 100644 --- a/arch/arm/boot/dts/berlin2q.dtsi +++ b/arch/arm/boot/dts/berlin2q.dtsi @@ -152,7 +152,7 @@ }; usb_phy2: phy@a2f400 { - compatible = "marvell,berlin2-usb-phy"; + compatible = "marvell,berlin2cd-usb-phy"; reg = <0xa2f400 0x128>; #phy-cells = <0>; resets = <&chip_rst 0x104 14>; @@ -170,7 +170,7 @@ }; usb_phy0: phy@b74000 { - compatible = "marvell,berlin2-usb-phy"; + compatible = "marvell,berlin2cd-usb-phy"; reg = <0xb74000 0x128>; #phy-cells = <0>; resets = <&chip_rst 0x104 12>; @@ -178,7 +178,7 @@ }; usb_phy1: phy@b78000 { - compatible = "marvell,berlin2-usb-phy"; + compatible = "marvell,berlin2cd-usb-phy"; reg = <0xb78000 0x128>; #phy-cells = <0>; resets = <&chip_rst 0x104 13>; diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi index e289c706d27d..8fedddc35999 100644 --- a/arch/arm/boot/dts/dra7.dtsi +++ b/arch/arm/boot/dts/dra7.dtsi @@ -1448,6 +1448,7 @@ <GIC_SPI 336 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 337 IRQ_TYPE_LEVEL_HIGH>; ranges; + syscon = <&scm_conf>; status = "disabled"; davinci_mdio: mdio@48485000 { diff --git a/arch/arm/boot/dts/emev2-kzm9d.dts b/arch/arm/boot/dts/emev2-kzm9d.dts index 955c24ee4a8c..8c24975e8f9d 100644 --- a/arch/arm/boot/dts/emev2-kzm9d.dts +++ b/arch/arm/boot/dts/emev2-kzm9d.dts @@ -35,28 +35,28 @@ button@1 { debounce_interval = <50>; - wakeup = <1>; + wakeup-source; label = "DSW2-1"; linux,code = <KEY_1>; gpios = <&gpio0 14 GPIO_ACTIVE_HIGH>; }; button@2 { debounce_interval = <50>; - wakeup = <1>; + wakeup-source; label = "DSW2-2"; linux,code = <KEY_2>; gpios = <&gpio0 15 GPIO_ACTIVE_HIGH>; }; button@3 { debounce_interval = <50>; - wakeup = <1>; + wakeup-source; label = "DSW2-3"; linux,code = <KEY_3>; gpios = <&gpio0 16 GPIO_ACTIVE_HIGH>; }; button@4 { debounce_interval = <50>; - wakeup = <1>; + wakeup-source; label = "DSW2-4"; linux,code = <KEY_4>; gpios = <&gpio0 17 GPIO_ACTIVE_HIGH>; diff --git a/arch/arm/boot/dts/exynos4412.dtsi b/arch/arm/boot/dts/exynos4412.dtsi index ca0e3c15977f..294cfe40388d 100644 --- a/arch/arm/boot/dts/exynos4412.dtsi +++ b/arch/arm/boot/dts/exynos4412.dtsi @@ -98,6 +98,7 @@ opp-hz = /bits/ 64 <800000000>; opp-microvolt = <1000000>; clock-latency-ns = <200000>; + opp-suspend; }; opp07 { opp-hz = /bits/ 64 <900000000>; diff --git a/arch/arm/boot/dts/exynos5250-smdk5250.dts b/arch/arm/boot/dts/exynos5250-smdk5250.dts index 15aea760c1da..c625e71217aa 100644 --- a/arch/arm/boot/dts/exynos5250-smdk5250.dts +++ b/arch/arm/boot/dts/exynos5250-smdk5250.dts @@ -197,6 +197,7 @@ regulator-name = "P1.8V_LDO_OUT10"; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; + regulator-always-on; }; ldo11_reg: LDO11 { diff --git a/arch/arm/boot/dts/exynos5420-peach-pit.dts b/arch/arm/boot/dts/exynos5420-peach-pit.dts index 8f4d76c5e11c..1b95da79293c 100644 --- a/arch/arm/boot/dts/exynos5420-peach-pit.dts +++ b/arch/arm/boot/dts/exynos5420-peach-pit.dts @@ -915,6 +915,11 @@ }; }; +&pmu_system_controller { + assigned-clocks = <&pmu_system_controller 0>; + assigned-clock-parents = <&clock CLK_FIN_PLL>; +}; + &rtc { status = "okay"; clocks = <&clock CLK_RTC>, <&max77802 MAX77802_CLK_32K_AP>; diff --git a/arch/arm/boot/dts/exynos5420.dtsi b/arch/arm/boot/dts/exynos5420.dtsi index df9aee92ecf4..1b3d6c769a3c 100644 --- a/arch/arm/boot/dts/exynos5420.dtsi +++ b/arch/arm/boot/dts/exynos5420.dtsi @@ -1117,7 +1117,7 @@ interrupt-parent = <&combiner>; interrupts = <3 0>; clock-names = "sysmmu", "master"; - clocks = <&clock CLK_SMMU_FIMD1M0>, <&clock CLK_FIMD1>; + clocks = <&clock CLK_SMMU_FIMD1M1>, <&clock CLK_FIMD1>; power-domains = <&disp_pd>; #iommu-cells = <0>; }; diff --git a/arch/arm/boot/dts/exynos5422-odroidxu3-common.dtsi b/arch/arm/boot/dts/exynos5422-odroidxu3-common.dtsi index 79ffdfe712aa..3b43e57845ae 100644 --- a/arch/arm/boot/dts/exynos5422-odroidxu3-common.dtsi +++ b/arch/arm/boot/dts/exynos5422-odroidxu3-common.dtsi @@ -472,7 +472,6 @@ */ pinctrl-0 = <&pwm0_out &pwm1_out &pwm2_out &pwm3_out>; pinctrl-names = "default"; - samsung,pwm-outputs = <0>; status = "okay"; }; diff --git a/arch/arm/boot/dts/exynos5800-peach-pi.dts b/arch/arm/boot/dts/exynos5800-peach-pi.dts index 7d5b386b5ae6..8f40c7e549bd 100644 --- a/arch/arm/boot/dts/exynos5800-peach-pi.dts +++ b/arch/arm/boot/dts/exynos5800-peach-pi.dts @@ -878,6 +878,11 @@ }; }; +&pmu_system_controller { + assigned-clocks = <&pmu_system_controller 0>; + assigned-clock-parents = <&clock CLK_FIN_PLL>; +}; + &rtc { status = "okay"; clocks = <&clock CLK_RTC>, <&max77802 MAX77802_CLK_32K_AP>; diff --git a/arch/arm/boot/dts/imx53-qsrb.dts b/arch/arm/boot/dts/imx53-qsrb.dts index 66e47de5e826..96d7eede412e 100644 --- a/arch/arm/boot/dts/imx53-qsrb.dts +++ b/arch/arm/boot/dts/imx53-qsrb.dts @@ -36,7 +36,7 @@ pinctrl-0 = <&pinctrl_pmic>; reg = <0x08>; interrupt-parent = <&gpio5>; - interrupts = <23 0x8>; + interrupts = <23 IRQ_TYPE_LEVEL_HIGH>; regulators { sw1_reg: sw1a { regulator-name = "SW1"; diff --git a/arch/arm/boot/dts/imx53.dtsi b/arch/arm/boot/dts/imx53.dtsi index c3e3ca9362fb..cd170376eaca 100644 --- a/arch/arm/boot/dts/imx53.dtsi +++ b/arch/arm/boot/dts/imx53.dtsi @@ -15,6 +15,7 @@ #include <dt-bindings/clock/imx5-clock.h> #include <dt-bindings/gpio/gpio.h> #include <dt-bindings/input/input.h> +#include <dt-bindings/interrupt-controller/irq.h> / { aliases { diff --git a/arch/arm/boot/dts/imx6qdl-rex.dtsi b/arch/arm/boot/dts/imx6qdl-rex.dtsi index 3373fd958e95..a50356243888 100644 --- a/arch/arm/boot/dts/imx6qdl-rex.dtsi +++ b/arch/arm/boot/dts/imx6qdl-rex.dtsi @@ -35,7 +35,6 @@ compatible = "regulator-fixed"; reg = <1>; pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_usbh1>; regulator-name = "usbh1_vbus"; regulator-min-microvolt = <5000000>; regulator-max-microvolt = <5000000>; @@ -47,7 +46,6 @@ compatible = "regulator-fixed"; reg = <2>; pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_usbotg>; regulator-name = "usb_otg_vbus"; regulator-min-microvolt = <5000000>; regulator-max-microvolt = <5000000>; diff --git a/arch/arm/boot/dts/imx7d.dtsi b/arch/arm/boot/dts/imx7d.dtsi index b738ce0f9d9b..6e444bb873f9 100644 --- a/arch/arm/boot/dts/imx7d.dtsi +++ b/arch/arm/boot/dts/imx7d.dtsi @@ -588,10 +588,10 @@ status = "disabled"; }; - uart2: serial@30870000 { + uart2: serial@30890000 { compatible = "fsl,imx7d-uart", "fsl,imx6q-uart"; - reg = <0x30870000 0x10000>; + reg = <0x30890000 0x10000>; interrupts = <GIC_SPI 27 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clks IMX7D_UART2_ROOT_CLK>, <&clks IMX7D_UART2_ROOT_CLK>; diff --git a/arch/arm/boot/dts/kirkwood-net5big.dts b/arch/arm/boot/dts/kirkwood-net5big.dts index 36155b749d9f..d2d44df9c8c0 100644 --- a/arch/arm/boot/dts/kirkwood-net5big.dts +++ b/arch/arm/boot/dts/kirkwood-net5big.dts @@ -86,6 +86,66 @@ clock-frequency = <32768>; }; }; + + netxbig-leds { + blue-sata2 { + label = "netxbig:blue:sata2"; + mode-addr = <5>; + mode-val = <NETXBIG_LED_OFF 0 + NETXBIG_LED_ON 7 + NETXBIG_LED_SATA 1 + NETXBIG_LED_TIMER1 3>; + bright-addr = <2>; + max-brightness = <7>; + }; + red-sata2 { + label = "netxbig:red:sata2"; + mode-addr = <5>; + mode-val = <NETXBIG_LED_OFF 0 + NETXBIG_LED_ON 2 + NETXBIG_LED_TIMER1 4>; + bright-addr = <2>; + max-brightness = <7>; + }; + blue-sata3 { + label = "netxbig:blue:sata3"; + mode-addr = <6>; + mode-val = <NETXBIG_LED_OFF 0 + NETXBIG_LED_ON 7 + NETXBIG_LED_SATA 1 + NETXBIG_LED_TIMER1 3>; + bright-addr = <2>; + max-brightness = <7>; + }; + red-sata3 { + label = "netxbig:red:sata3"; + mode-addr = <6>; + mode-val = <NETXBIG_LED_OFF 0 + NETXBIG_LED_ON 2 + NETXBIG_LED_TIMER1 4>; + bright-addr = <2>; + max-brightness = <7>; + }; + blue-sata4 { + label = "netxbig:blue:sata4"; + mode-addr = <7>; + mode-val = <NETXBIG_LED_OFF 0 + NETXBIG_LED_ON 7 + NETXBIG_LED_SATA 1 + NETXBIG_LED_TIMER1 3>; + bright-addr = <2>; + max-brightness = <7>; + }; + red-sata4 { + label = "netxbig:red:sata4"; + mode-addr = <7>; + mode-val = <NETXBIG_LED_OFF 0 + NETXBIG_LED_ON 2 + NETXBIG_LED_TIMER1 4>; + bright-addr = <2>; + max-brightness = <7>; + }; + }; }; &mdio { diff --git a/arch/arm/boot/dts/kirkwood-netxbig.dtsi b/arch/arm/boot/dts/kirkwood-netxbig.dtsi index 1508b12147df..62515a8b99b9 100644 --- a/arch/arm/boot/dts/kirkwood-netxbig.dtsi +++ b/arch/arm/boot/dts/kirkwood-netxbig.dtsi @@ -13,6 +13,7 @@ * warranty of any kind, whether express or implied. */ +#include <dt-bindings/leds/leds-netxbig.h> #include "kirkwood.dtsi" #include "kirkwood-6281.dtsi" @@ -105,6 +106,85 @@ gpio = <&gpio0 16 GPIO_ACTIVE_HIGH>; }; }; + + netxbig_gpio_ext: netxbig-gpio-ext { + compatible = "lacie,netxbig-gpio-ext"; + + addr-gpios = <&gpio1 15 GPIO_ACTIVE_HIGH + &gpio1 16 GPIO_ACTIVE_HIGH + &gpio1 17 GPIO_ACTIVE_HIGH>; + data-gpios = <&gpio1 12 GPIO_ACTIVE_HIGH + &gpio1 13 GPIO_ACTIVE_HIGH + &gpio1 14 GPIO_ACTIVE_HIGH>; + enable-gpio = <&gpio0 29 GPIO_ACTIVE_HIGH>; + }; + + netxbig-leds { + compatible = "lacie,netxbig-leds"; + + gpio-ext = <&netxbig_gpio_ext>; + + timers = <NETXBIG_LED_TIMER1 500 500 + NETXBIG_LED_TIMER2 500 1000>; + + blue-power { + label = "netxbig:blue:power"; + mode-addr = <0>; + mode-val = <NETXBIG_LED_OFF 0 + NETXBIG_LED_ON 1 + NETXBIG_LED_TIMER1 3 + NETXBIG_LED_TIMER2 7>; + bright-addr = <1>; + max-brightness = <7>; + }; + red-power { + label = "netxbig:red:power"; + mode-addr = <0>; + mode-val = <NETXBIG_LED_OFF 0 + NETXBIG_LED_ON 2 + NETXBIG_LED_TIMER1 4>; + bright-addr = <1>; + max-brightness = <7>; + }; + blue-sata0 { + label = "netxbig:blue:sata0"; + mode-addr = <3>; + mode-val = <NETXBIG_LED_OFF 0 + NETXBIG_LED_ON 7 + NETXBIG_LED_SATA 1 + NETXBIG_LED_TIMER1 3>; + bright-addr = <2>; + max-brightness = <7>; + }; + red-sata0 { + label = "netxbig:red:sata0"; + mode-addr = <3>; + mode-val = <NETXBIG_LED_OFF 0 + NETXBIG_LED_ON 2 + NETXBIG_LED_TIMER1 4>; + bright-addr = <2>; + max-brightness = <7>; + }; + blue-sata1 { + label = "netxbig:blue:sata1"; + mode-addr = <4>; + mode-val = <NETXBIG_LED_OFF 0 + NETXBIG_LED_ON 7 + NETXBIG_LED_SATA 1 + NETXBIG_LED_TIMER1 3>; + bright-addr = <2>; + max-brightness = <7>; + }; + red-sata1 { + label = "netxbig:red:sata1"; + mode-addr = <4>; + mode-val = <NETXBIG_LED_OFF 0 + NETXBIG_LED_ON 2 + NETXBIG_LED_TIMER1 4>; + bright-addr = <2>; + max-brightness = <7>; + }; + }; }; &mdio { diff --git a/arch/arm/boot/dts/logicpd-torpedo-37xx-devkit.dts b/arch/arm/boot/dts/logicpd-torpedo-37xx-devkit.dts index 91146c318798..5b0430041ec6 100644 --- a/arch/arm/boot/dts/logicpd-torpedo-37xx-devkit.dts +++ b/arch/arm/boot/dts/logicpd-torpedo-37xx-devkit.dts @@ -12,7 +12,7 @@ / { model = "LogicPD Zoom DM3730 Torpedo Development Kit"; - compatible = "logicpd,dm3730-torpedo-devkit", "ti,omap36xx"; + compatible = "logicpd,dm3730-torpedo-devkit", "ti,omap3630", "ti,omap3"; gpio_keys { compatible = "gpio-keys"; diff --git a/arch/arm/boot/dts/meson.dtsi b/arch/arm/boot/dts/meson.dtsi index 548441384d2a..8c77c87660cd 100644 --- a/arch/arm/boot/dts/meson.dtsi +++ b/arch/arm/boot/dts/meson.dtsi @@ -67,7 +67,7 @@ timer@c1109940 { compatible = "amlogic,meson6-timer"; - reg = <0xc1109940 0x14>; + reg = <0xc1109940 0x18>; interrupts = <0 10 1>; }; @@ -80,36 +80,37 @@ wdt: watchdog@c1109900 { compatible = "amlogic,meson6-wdt"; reg = <0xc1109900 0x8>; + interrupts = <0 0 1>; }; uart_AO: serial@c81004c0 { compatible = "amlogic,meson-uart"; - reg = <0xc81004c0 0x14>; + reg = <0xc81004c0 0x18>; interrupts = <0 90 1>; clocks = <&clk81>; status = "disabled"; }; - uart_A: serial@c81084c0 { + uart_A: serial@c11084c0 { compatible = "amlogic,meson-uart"; - reg = <0xc81084c0 0x14>; - interrupts = <0 90 1>; + reg = <0xc11084c0 0x18>; + interrupts = <0 26 1>; clocks = <&clk81>; status = "disabled"; }; - uart_B: serial@c81084dc { + uart_B: serial@c11084dc { compatible = "amlogic,meson-uart"; - reg = <0xc81084dc 0x14>; - interrupts = <0 90 1>; + reg = <0xc11084dc 0x18>; + interrupts = <0 75 1>; clocks = <&clk81>; status = "disabled"; }; - uart_C: serial@c8108700 { + uart_C: serial@c1108700 { compatible = "amlogic,meson-uart"; - reg = <0xc8108700 0x14>; - interrupts = <0 90 1>; + reg = <0xc1108700 0x18>; + interrupts = <0 93 1>; clocks = <&clk81>; status = "disabled"; }; diff --git a/arch/arm/boot/dts/omap3-evm-37xx.dts b/arch/arm/boot/dts/omap3-evm-37xx.dts index 16e8ce350dda..bb339d1648e0 100644 --- a/arch/arm/boot/dts/omap3-evm-37xx.dts +++ b/arch/arm/boot/dts/omap3-evm-37xx.dts @@ -13,7 +13,7 @@ / { model = "TI OMAP37XX EVM (TMDSEVM3730)"; - compatible = "ti,omap3-evm-37xx", "ti,omap36xx"; + compatible = "ti,omap3-evm-37xx", "ti,omap3630", "ti,omap3"; memory { device_type = "memory"; diff --git a/arch/arm/boot/dts/r8a7790.dtsi b/arch/arm/boot/dts/r8a7790.dtsi index a0b2a79cbfbd..4624d0f2a754 100644 --- a/arch/arm/boot/dts/r8a7790.dtsi +++ b/arch/arm/boot/dts/r8a7790.dtsi @@ -1627,6 +1627,7 @@ "mix.0", "mix.1", "dvc.0", "dvc.1", "clk_a", "clk_b", "clk_c", "clk_i"; + power-domains = <&cpg_clocks>; status = "disabled"; diff --git a/arch/arm/boot/dts/r8a7791.dtsi b/arch/arm/boot/dts/r8a7791.dtsi index 831525dd39a6..1666c8a6b143 100644 --- a/arch/arm/boot/dts/r8a7791.dtsi +++ b/arch/arm/boot/dts/r8a7791.dtsi @@ -1677,6 +1677,7 @@ "mix.0", "mix.1", "dvc.0", "dvc.1", "clk_a", "clk_b", "clk_c", "clk_i"; + power-domains = <&cpg_clocks>; status = "disabled"; diff --git a/arch/arm/boot/dts/rk3288-veyron-sdmmc.dtsi b/arch/arm/boot/dts/rk3288-veyron-sdmmc.dtsi index b5334ecff13c..fec076eb7aef 100644 --- a/arch/arm/boot/dts/rk3288-veyron-sdmmc.dtsi +++ b/arch/arm/boot/dts/rk3288-veyron-sdmmc.dtsi @@ -90,7 +90,7 @@ regulators { vccio_sd: LDO_REG4 { regulator-name = "vccio_sd"; - regulator-min-microvolt = <3300000>; + regulator-min-microvolt = <1800000>; regulator-max-microvolt = <3300000>; regulator-state-mem { regulator-off-in-suspend; @@ -116,7 +116,12 @@ cap-sd-highspeed; card-detect-delay = <200>; cd-gpios = <&gpio7 5 GPIO_ACTIVE_LOW>; + rockchip,default-sample-phase = <90>; num-slots = <1>; + sd-uhs-sdr12; + sd-uhs-sdr25; + sd-uhs-sdr50; + sd-uhs-sdr104; vmmc-supply = <&vcc33_sd>; vqmmc-supply = <&vccio_sd>; }; diff --git a/arch/arm/boot/dts/rk3288-veyron.dtsi b/arch/arm/boot/dts/rk3288-veyron.dtsi index 275c78ccc0f3..860cea0a7613 100644 --- a/arch/arm/boot/dts/rk3288-veyron.dtsi +++ b/arch/arm/boot/dts/rk3288-veyron.dtsi @@ -149,7 +149,9 @@ broken-cd; bus-width = <8>; cap-mmc-highspeed; + rockchip,default-sample-phase = <158>; disable-wp; + mmc-hs200-1_8v; mmc-pwrseq = <&emmc_pwrseq>; non-removable; num-slots = <1>; @@ -355,6 +357,10 @@ num-slots = <1>; pinctrl-names = "default"; pinctrl-0 = <&sdio0_clk &sdio0_cmd &sdio0_bus4>; + sd-uhs-sdr12; + sd-uhs-sdr25; + sd-uhs-sdr50; + sd-uhs-sdr104; vmmc-supply = <&vcc33_sys>; vqmmc-supply = <&vcc18_wl>; }; diff --git a/arch/arm/boot/dts/rk3288.dtsi b/arch/arm/boot/dts/rk3288.dtsi index 906e938fb6bf..4e7c6b7392af 100644 --- a/arch/arm/boot/dts/rk3288.dtsi +++ b/arch/arm/boot/dts/rk3288.dtsi @@ -222,8 +222,9 @@ sdmmc: dwmmc@ff0c0000 { compatible = "rockchip,rk3288-dw-mshc"; clock-freq-min-max = <400000 150000000>; - clocks = <&cru HCLK_SDMMC>, <&cru SCLK_SDMMC>; - clock-names = "biu", "ciu"; + clocks = <&cru HCLK_SDMMC>, <&cru SCLK_SDMMC>, + <&cru SCLK_SDMMC_DRV>, <&cru SCLK_SDMMC_SAMPLE>; + clock-names = "biu", "ciu", "ciu-drive", "ciu-sample"; fifo-depth = <0x100>; interrupts = <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>; reg = <0xff0c0000 0x4000>; @@ -233,8 +234,9 @@ sdio0: dwmmc@ff0d0000 { compatible = "rockchip,rk3288-dw-mshc"; clock-freq-min-max = <400000 150000000>; - clocks = <&cru HCLK_SDIO0>, <&cru SCLK_SDIO0>; - clock-names = "biu", "ciu"; + clocks = <&cru HCLK_SDIO0>, <&cru SCLK_SDIO0>, + <&cru SCLK_SDIO0_DRV>, <&cru SCLK_SDIO0_SAMPLE>; + clock-names = "biu", "ciu", "ciu-drive", "ciu-sample"; fifo-depth = <0x100>; interrupts = <GIC_SPI 33 IRQ_TYPE_LEVEL_HIGH>; reg = <0xff0d0000 0x4000>; @@ -244,8 +246,9 @@ sdio1: dwmmc@ff0e0000 { compatible = "rockchip,rk3288-dw-mshc"; clock-freq-min-max = <400000 150000000>; - clocks = <&cru HCLK_SDIO1>, <&cru SCLK_SDIO1>; - clock-names = "biu", "ciu"; + clocks = <&cru HCLK_SDIO1>, <&cru SCLK_SDIO1>, + <&cru SCLK_SDIO1_DRV>, <&cru SCLK_SDIO1_SAMPLE>; + clock-names = "biu", "ciu", "ciu-drive", "ciu-sample"; fifo-depth = <0x100>; interrupts = <GIC_SPI 34 IRQ_TYPE_LEVEL_HIGH>; reg = <0xff0e0000 0x4000>; @@ -255,8 +258,9 @@ emmc: dwmmc@ff0f0000 { compatible = "rockchip,rk3288-dw-mshc"; clock-freq-min-max = <400000 150000000>; - clocks = <&cru HCLK_EMMC>, <&cru SCLK_EMMC>; - clock-names = "biu", "ciu"; + clocks = <&cru HCLK_EMMC>, <&cru SCLK_EMMC>, + <&cru SCLK_EMMC_DRV>, <&cru SCLK_EMMC_SAMPLE>; + clock-names = "biu", "ciu", "ciu-drive", "ciu-sample"; fifo-depth = <0x100>; interrupts = <GIC_SPI 35 IRQ_TYPE_LEVEL_HIGH>; reg = <0xff0f0000 0x4000>; diff --git a/arch/arm/boot/dts/sama5d2.dtsi b/arch/arm/boot/dts/sama5d2.dtsi index 034cd48ae28b..cc05cde0f9a4 100644 --- a/arch/arm/boot/dts/sama5d2.dtsi +++ b/arch/arm/boot/dts/sama5d2.dtsi @@ -921,6 +921,20 @@ clocks = <&twi1_clk>; status = "disabled"; }; + + pioA: pinctrl@fc038000 { + compatible = "atmel,sama5d2-pinctrl"; + reg = <0xfc038000 0x600>; + interrupts = <18 IRQ_TYPE_LEVEL_HIGH 7>, + <68 IRQ_TYPE_LEVEL_HIGH 7>, + <69 IRQ_TYPE_LEVEL_HIGH 7>, + <70 IRQ_TYPE_LEVEL_HIGH 7>; + interrupt-controller; + #interrupt-cells = <2>; + gpio-controller; + #gpio-cells = <2>; + clocks = <&pioA_clk>; + }; }; }; }; diff --git a/arch/arm/boot/dts/ste-hrefv60plus.dtsi b/arch/arm/boot/dts/ste-hrefv60plus.dtsi index 810cda743b6d..9c2387b34d0c 100644 --- a/arch/arm/boot/dts/ste-hrefv60plus.dtsi +++ b/arch/arm/boot/dts/ste-hrefv60plus.dtsi @@ -56,7 +56,7 @@ /* VMMCI level-shifter enable */ default_hrefv60_cfg2 { pins = "GPIO169_D22"; - ste,config = <&gpio_out_lo>; + ste,config = <&gpio_out_hi>; }; /* VMMCI level-shifter voltage select */ default_hrefv60_cfg3 { diff --git a/arch/arm/boot/dts/ste-snowball.dts b/arch/arm/boot/dts/ste-snowball.dts index 32a5ccb14e7e..e80e42163883 100644 --- a/arch/arm/boot/dts/ste-snowball.dts +++ b/arch/arm/boot/dts/ste-snowball.dts @@ -47,35 +47,35 @@ button@1 { debounce_interval = <50>; - wakeup = <1>; + wakeup-source; linux,code = <2>; label = "userpb"; gpios = <&gpio1 0 0x4>; }; button@2 { debounce_interval = <50>; - wakeup = <1>; + wakeup-source; linux,code = <3>; label = "extkb1"; gpios = <&gpio4 23 0x4>; }; button@3 { debounce_interval = <50>; - wakeup = <1>; + wakeup-source; linux,code = <4>; label = "extkb2"; gpios = <&gpio4 24 0x4>; }; button@4 { debounce_interval = <50>; - wakeup = <1>; + wakeup-source; linux,code = <5>; label = "extkb3"; gpios = <&gpio5 1 0x4>; }; button@5 { debounce_interval = <50>; - wakeup = <1>; + wakeup-source; linux,code = <6>; label = "extkb4"; gpios = <&gpio5 2 0x4>; diff --git a/arch/arm/boot/dts/stih407-family.dtsi b/arch/arm/boot/dts/stih407-family.dtsi index ae0527754000..0c24fcb03577 100644 --- a/arch/arm/boot/dts/stih407-family.dtsi +++ b/arch/arm/boot/dts/stih407-family.dtsi @@ -610,5 +610,19 @@ clocks = <&clk_sysin>; st,pwm-num-chan = <4>; }; + + rng10: rng@08a89000 { + compatible = "st,rng"; + reg = <0x08a89000 0x1000>; + clocks = <&clk_sysin>; + status = "okay"; + }; + + rng11: rng@08a8a000 { + compatible = "st,rng"; + reg = <0x08a8a000 0x1000>; + clocks = <&clk_sysin>; + status = "okay"; + }; }; }; diff --git a/arch/arm/boot/dts/stm32f429.dtsi b/arch/arm/boot/dts/stm32f429.dtsi index d78a4815da8f..5e1e234e8c0a 100644 --- a/arch/arm/boot/dts/stm32f429.dtsi +++ b/arch/arm/boot/dts/stm32f429.dtsi @@ -174,6 +174,13 @@ reg = <0x40023800 0x400>; clocks = <&clk_hse>; }; + + rng: rng@50060800 { + compatible = "st,stm32-rng"; + reg = <0x50060800 0x400>; + interrupts = <80>; + clocks = <&rcc 0 38>; + }; }; }; diff --git a/arch/arm/boot/dts/sun7i-a20.dtsi b/arch/arm/boot/dts/sun7i-a20.dtsi index 2bebaa286f9a..391230c3dc93 100644 --- a/arch/arm/boot/dts/sun7i-a20.dtsi +++ b/arch/arm/boot/dts/sun7i-a20.dtsi @@ -107,7 +107,7 @@ 720000 1200000 528000 1100000 312000 1000000 - 144000 900000 + 144000 1000000 >; #cooling-cells = <2>; cooling-min-level = <0>; diff --git a/arch/arm/boot/dts/tegra114.dtsi b/arch/arm/boot/dts/tegra114.dtsi index 9d4f86e9c50a..d845bd1448b5 100644 --- a/arch/arm/boot/dts/tegra114.dtsi +++ b/arch/arm/boot/dts/tegra114.dtsi @@ -234,7 +234,9 @@ gpio-controller; #interrupt-cells = <2>; interrupt-controller; + /* gpio-ranges = <&pinmux 0 0 246>; + */ }; apbmisc@70000800 { diff --git a/arch/arm/boot/dts/tegra124.dtsi b/arch/arm/boot/dts/tegra124.dtsi index 1e204a6de12c..819e2ae2cabe 100644 --- a/arch/arm/boot/dts/tegra124.dtsi +++ b/arch/arm/boot/dts/tegra124.dtsi @@ -258,7 +258,9 @@ gpio-controller; #interrupt-cells = <2>; interrupt-controller; + /* gpio-ranges = <&pinmux 0 0 251>; + */ }; apbdma: dma@0,60020000 { diff --git a/arch/arm/boot/dts/tegra20.dtsi b/arch/arm/boot/dts/tegra20.dtsi index e058709e6d98..969b828505ae 100644 --- a/arch/arm/boot/dts/tegra20.dtsi +++ b/arch/arm/boot/dts/tegra20.dtsi @@ -244,7 +244,9 @@ gpio-controller; #interrupt-cells = <2>; interrupt-controller; + /* gpio-ranges = <&pinmux 0 0 224>; + */ }; apbmisc@70000800 { diff --git a/arch/arm/boot/dts/tegra30.dtsi b/arch/arm/boot/dts/tegra30.dtsi index fe04fb5e155f..c6938ad1b543 100644 --- a/arch/arm/boot/dts/tegra30.dtsi +++ b/arch/arm/boot/dts/tegra30.dtsi @@ -349,7 +349,9 @@ gpio-controller; #interrupt-cells = <2>; interrupt-controller; + /* gpio-ranges = <&pinmux 0 0 248>; + */ }; apbmisc@70000800 { diff --git a/arch/arm/boot/dts/twl4030.dtsi b/arch/arm/boot/dts/twl4030.dtsi index 36ae9160b558..482b7aa37808 100644 --- a/arch/arm/boot/dts/twl4030.dtsi +++ b/arch/arm/boot/dts/twl4030.dtsi @@ -22,6 +22,8 @@ charger: bci { compatible = "ti,twl4030-bci"; interrupts = <9>, <2>; + io-channels = <&twl4030_madc 11>; + io-channel-name = "vac"; bci3v1-supply = <&vusb3v1>; }; diff --git a/arch/arm/boot/dts/uniphier-ph1-ld6b-ref.dts b/arch/arm/boot/dts/uniphier-ph1-ld6b-ref.dts index 33963acd7e8f..f80f772d99fb 100644 --- a/arch/arm/boot/dts/uniphier-ph1-ld6b-ref.dts +++ b/arch/arm/boot/dts/uniphier-ph1-ld6b-ref.dts @@ -85,7 +85,7 @@ }; ðsc { - interrupts = <0 50 4>; + interrupts = <0 52 4>; }; &serial0 { diff --git a/arch/arm/boot/dts/zynq-7000.dtsi b/arch/arm/boot/dts/zynq-7000.dtsi index dc0457e40775..1a5220e05109 100644 --- a/arch/arm/boot/dts/zynq-7000.dtsi +++ b/arch/arm/boot/dts/zynq-7000.dtsi @@ -294,6 +294,11 @@ devcfg: devcfg@f8007000 { compatible = "xlnx,zynq-devcfg-1.0"; reg = <0xf8007000 0x100>; + interrupt-parent = <&intc>; + interrupts = <0 8 4>; + clocks = <&clkc 12>; + clock-names = "ref_clk"; + syscon = <&slcr>; }; global_timer: timer@f8f00200 { diff --git a/arch/arm/configs/exynos_defconfig b/arch/arm/configs/exynos_defconfig index 1ff2bfa2e183..13ba48c4b03b 100644 --- a/arch/arm/configs/exynos_defconfig +++ b/arch/arm/configs/exynos_defconfig @@ -166,7 +166,6 @@ CONFIG_MMC_SDHCI=y CONFIG_MMC_SDHCI_S3C=y CONFIG_MMC_SDHCI_S3C_DMA=y CONFIG_MMC_DW=y -CONFIG_MMC_DW_IDMAC=y CONFIG_MMC_DW_EXYNOS=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_MAX77686=y diff --git a/arch/arm/configs/hisi_defconfig b/arch/arm/configs/hisi_defconfig index 5997dbc69822..b2e340b272ee 100644 --- a/arch/arm/configs/hisi_defconfig +++ b/arch/arm/configs/hisi_defconfig @@ -69,7 +69,6 @@ CONFIG_NOP_USB_XCEIV=y CONFIG_MMC=y CONFIG_RTC_CLASS=y CONFIG_MMC_DW=y -CONFIG_MMC_DW_IDMAC=y CONFIG_MMC_DW_PLTFM=y CONFIG_RTC_DRV_PL031=y CONFIG_DMADEVICES=y diff --git a/arch/arm/configs/lpc18xx_defconfig b/arch/arm/configs/lpc18xx_defconfig index 1c47f86c3970..b7e8cdab51f9 100644 --- a/arch/arm/configs/lpc18xx_defconfig +++ b/arch/arm/configs/lpc18xx_defconfig @@ -119,7 +119,6 @@ CONFIG_USB_EHCI_HCD=y CONFIG_USB_EHCI_ROOT_HUB_TT=y CONFIG_MMC=y CONFIG_MMC_DW=y -CONFIG_MMC_DW_IDMAC=y CONFIG_NEW_LEDS=y CONFIG_LEDS_CLASS=y CONFIG_LEDS_PCA9532=y diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig index 3f15a5cae167..c5e1943e5427 100644 --- a/arch/arm/configs/omap2plus_defconfig +++ b/arch/arm/configs/omap2plus_defconfig @@ -246,7 +246,7 @@ CONFIG_GPIO_TWL4030=y CONFIG_GPIO_PALMAS=y CONFIG_W1=m CONFIG_HDQ_MASTER_OMAP=m -CONFIG_BATTERY_BQ27x00=m +CONFIG_BATTERY_BQ27XXX=m CONFIG_CHARGER_ISP1704=m CONFIG_CHARGER_TWL4030=m CONFIG_CHARGER_BQ2415X=m diff --git a/arch/arm/include/asm/arch_gicv3.h b/arch/arm/include/asm/arch_gicv3.h new file mode 100644 index 000000000000..6607d976e07d --- /dev/null +++ b/arch/arm/include/asm/arch_gicv3.h @@ -0,0 +1,188 @@ +/* + * arch/arm/include/asm/arch_gicv3.h + * + * Copyright (C) 2015 ARM Ltd. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_ARCH_GICV3_H +#define __ASM_ARCH_GICV3_H + +#ifndef __ASSEMBLY__ + +#include <linux/io.h> + +#define __ACCESS_CP15(CRn, Op1, CRm, Op2) p15, Op1, %0, CRn, CRm, Op2 +#define __ACCESS_CP15_64(Op1, CRm) p15, Op1, %Q0, %R0, CRm + +#define ICC_EOIR1 __ACCESS_CP15(c12, 0, c12, 1) +#define ICC_DIR __ACCESS_CP15(c12, 0, c11, 1) +#define ICC_IAR1 __ACCESS_CP15(c12, 0, c12, 0) +#define ICC_SGI1R __ACCESS_CP15_64(0, c12) +#define ICC_PMR __ACCESS_CP15(c4, 0, c6, 0) +#define ICC_CTLR __ACCESS_CP15(c12, 0, c12, 4) +#define ICC_SRE __ACCESS_CP15(c12, 0, c12, 5) +#define ICC_IGRPEN1 __ACCESS_CP15(c12, 0, c12, 7) + +#define ICC_HSRE __ACCESS_CP15(c12, 4, c9, 5) + +#define ICH_VSEIR __ACCESS_CP15(c12, 4, c9, 4) +#define ICH_HCR __ACCESS_CP15(c12, 4, c11, 0) +#define ICH_VTR __ACCESS_CP15(c12, 4, c11, 1) +#define ICH_MISR __ACCESS_CP15(c12, 4, c11, 2) +#define ICH_EISR __ACCESS_CP15(c12, 4, c11, 3) +#define ICH_ELSR __ACCESS_CP15(c12, 4, c11, 5) +#define ICH_VMCR __ACCESS_CP15(c12, 4, c11, 7) + +#define __LR0(x) __ACCESS_CP15(c12, 4, c12, x) +#define __LR8(x) __ACCESS_CP15(c12, 4, c13, x) + +#define ICH_LR0 __LR0(0) +#define ICH_LR1 __LR0(1) +#define ICH_LR2 __LR0(2) +#define ICH_LR3 __LR0(3) +#define ICH_LR4 __LR0(4) +#define ICH_LR5 __LR0(5) +#define ICH_LR6 __LR0(6) +#define ICH_LR7 __LR0(7) +#define ICH_LR8 __LR8(0) +#define ICH_LR9 __LR8(1) +#define ICH_LR10 __LR8(2) +#define ICH_LR11 __LR8(3) +#define ICH_LR12 __LR8(4) +#define ICH_LR13 __LR8(5) +#define ICH_LR14 __LR8(6) +#define ICH_LR15 __LR8(7) + +/* LR top half */ +#define __LRC0(x) __ACCESS_CP15(c12, 4, c14, x) +#define __LRC8(x) __ACCESS_CP15(c12, 4, c15, x) + +#define ICH_LRC0 __LRC0(0) +#define ICH_LRC1 __LRC0(1) +#define ICH_LRC2 __LRC0(2) +#define ICH_LRC3 __LRC0(3) +#define ICH_LRC4 __LRC0(4) +#define ICH_LRC5 __LRC0(5) +#define ICH_LRC6 __LRC0(6) +#define ICH_LRC7 __LRC0(7) +#define ICH_LRC8 __LRC8(0) +#define ICH_LRC9 __LRC8(1) +#define ICH_LRC10 __LRC8(2) +#define ICH_LRC11 __LRC8(3) +#define ICH_LRC12 __LRC8(4) +#define ICH_LRC13 __LRC8(5) +#define ICH_LRC14 __LRC8(6) +#define ICH_LRC15 __LRC8(7) + +#define __AP0Rx(x) __ACCESS_CP15(c12, 4, c8, x) +#define ICH_AP0R0 __AP0Rx(0) +#define ICH_AP0R1 __AP0Rx(1) +#define ICH_AP0R2 __AP0Rx(2) +#define ICH_AP0R3 __AP0Rx(3) + +#define __AP1Rx(x) __ACCESS_CP15(c12, 4, c9, x) +#define ICH_AP1R0 __AP1Rx(0) +#define ICH_AP1R1 __AP1Rx(1) +#define ICH_AP1R2 __AP1Rx(2) +#define ICH_AP1R3 __AP1Rx(3) + +/* Low-level accessors */ + +static inline void gic_write_eoir(u32 irq) +{ + asm volatile("mcr " __stringify(ICC_EOIR1) : : "r" (irq)); + isb(); +} + +static inline void gic_write_dir(u32 val) +{ + asm volatile("mcr " __stringify(ICC_DIR) : : "r" (val)); + isb(); +} + +static inline u32 gic_read_iar(void) +{ + u32 irqstat; + + asm volatile("mrc " __stringify(ICC_IAR1) : "=r" (irqstat)); + return irqstat; +} + +static inline void gic_write_pmr(u32 val) +{ + asm volatile("mcr " __stringify(ICC_PMR) : : "r" (val)); +} + +static inline void gic_write_ctlr(u32 val) +{ + asm volatile("mcr " __stringify(ICC_CTLR) : : "r" (val)); + isb(); +} + +static inline void gic_write_grpen1(u32 val) +{ + asm volatile("mcr " __stringify(ICC_IGRPEN1) : : "r" (val)); + isb(); +} + +static inline void gic_write_sgi1r(u64 val) +{ + asm volatile("mcrr " __stringify(ICC_SGI1R) : : "r" (val)); +} + +static inline u32 gic_read_sre(void) +{ + u32 val; + + asm volatile("mrc " __stringify(ICC_SRE) : "=r" (val)); + return val; +} + +static inline void gic_write_sre(u32 val) +{ + asm volatile("mcr " __stringify(ICC_SRE) : : "r" (val)); + isb(); +} + +/* + * Even in 32bit systems that use LPAE, there is no guarantee that the I/O + * interface provides true 64bit atomic accesses, so using strd/ldrd doesn't + * make much sense. + * Moreover, 64bit I/O emulation is extremely difficult to implement on + * AArch32, since the syndrome register doesn't provide any information for + * them. + * Consequently, the following IO helpers use 32bit accesses. + * + * There are only two registers that need 64bit accesses in this driver: + * - GICD_IROUTERn, contain the affinity values associated to each interrupt. + * The upper-word (aff3) will always be 0, so there is no need for a lock. + * - GICR_TYPER is an ID register and doesn't need atomicity. + */ +static inline void gic_write_irouter(u64 val, volatile void __iomem *addr) +{ + writel_relaxed((u32)val, addr); + writel_relaxed((u32)(val >> 32), addr + 4); +} + +static inline u64 gic_read_typer(const volatile void __iomem *addr) +{ + u64 val; + + val = readl_relaxed(addr); + val |= (u64)readl_relaxed(addr + 4) << 32; + return val; +} + +#endif /* !__ASSEMBLY__ */ +#endif /* !__ASM_ARCH_GICV3_H */ diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h index fe3ef397f5a4..9e10c4567eb4 100644 --- a/arch/arm/include/asm/atomic.h +++ b/arch/arm/include/asm/atomic.h @@ -27,8 +27,8 @@ * strex/ldrex monitor on some implementations. The reason we can use it for * atomic_set() is the clrex or dummy strex done on every exception return. */ -#define atomic_read(v) ACCESS_ONCE((v)->counter) -#define atomic_set(v,i) (((v)->counter) = (i)) +#define atomic_read(v) READ_ONCE((v)->counter) +#define atomic_set(v,i) WRITE_ONCE(((v)->counter), (i)) #if __LINUX_ARM_ARCH__ >= 6 @@ -210,8 +210,8 @@ ATOMIC_OP(xor, ^=, eor) #define atomic_inc_and_test(v) (atomic_add_return(1, v) == 0) #define atomic_dec_and_test(v) (atomic_sub_return(1, v) == 0) -#define atomic_inc_return(v) (atomic_add_return(1, v)) -#define atomic_dec_return(v) (atomic_sub_return(1, v)) +#define atomic_inc_return_relaxed(v) (atomic_add_return_relaxed(1, v)) +#define atomic_dec_return_relaxed(v) (atomic_sub_return_relaxed(1, v)) #define atomic_sub_and_test(i, v) (atomic_sub_return(i, v) == 0) #define atomic_add_negative(i,v) (atomic_add_return(i, v) < 0) @@ -442,11 +442,11 @@ static inline int atomic64_add_unless(atomic64_t *v, long long a, long long u) #define atomic64_add_negative(a, v) (atomic64_add_return((a), (v)) < 0) #define atomic64_inc(v) atomic64_add(1LL, (v)) -#define atomic64_inc_return(v) atomic64_add_return(1LL, (v)) +#define atomic64_inc_return_relaxed(v) atomic64_add_return_relaxed(1LL, (v)) #define atomic64_inc_and_test(v) (atomic64_inc_return(v) == 0) #define atomic64_sub_and_test(a, v) (atomic64_sub_return((a), (v)) == 0) #define atomic64_dec(v) atomic64_sub(1LL, (v)) -#define atomic64_dec_return(v) atomic64_sub_return(1LL, (v)) +#define atomic64_dec_return_relaxed(v) atomic64_sub_return_relaxed(1LL, (v)) #define atomic64_dec_and_test(v) (atomic64_dec_return((v)) == 0) #define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1LL, 0LL) diff --git a/arch/arm/include/asm/cmpxchg.h b/arch/arm/include/asm/cmpxchg.h index 916a2744d5c6..97882f9bad12 100644 --- a/arch/arm/include/asm/cmpxchg.h +++ b/arch/arm/include/asm/cmpxchg.h @@ -39,6 +39,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size switch (size) { #if __LINUX_ARM_ARCH__ >= 6 +#ifndef CONFIG_CPU_V6 /* MIN ARCH >= V6K */ case 1: asm volatile("@ __xchg1\n" "1: ldrexb %0, [%3]\n" @@ -49,6 +50,17 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size : "r" (x), "r" (ptr) : "memory", "cc"); break; + case 2: + asm volatile("@ __xchg2\n" + "1: ldrexh %0, [%3]\n" + " strexh %1, %2, [%3]\n" + " teq %1, #0\n" + " bne 1b" + : "=&r" (ret), "=&r" (tmp) + : "r" (x), "r" (ptr) + : "memory", "cc"); + break; +#endif case 4: asm volatile("@ __xchg4\n" "1: ldrex %0, [%3]\n" diff --git a/arch/arm/include/asm/irqflags.h b/arch/arm/include/asm/irqflags.h index 43908146a5cf..e6b70d9d084e 100644 --- a/arch/arm/include/asm/irqflags.h +++ b/arch/arm/include/asm/irqflags.h @@ -54,6 +54,14 @@ static inline void arch_local_irq_disable(void) #define local_fiq_enable() __asm__("cpsie f @ __stf" : : : "memory", "cc") #define local_fiq_disable() __asm__("cpsid f @ __clf" : : : "memory", "cc") + +#ifndef CONFIG_CPU_V7M +#define local_abt_enable() __asm__("cpsie a @ __sta" : : : "memory", "cc") +#define local_abt_disable() __asm__("cpsid a @ __cla" : : : "memory", "cc") +#else +#define local_abt_enable() do { } while (0) +#define local_abt_disable() do { } while (0) +#endif #else /* @@ -136,6 +144,8 @@ static inline void arch_local_irq_disable(void) : "memory", "cc"); \ }) +#define local_abt_enable() do { } while (0) +#define local_abt_disable() do { } while (0) #endif /* diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h index d995821f1698..dc641ddf0784 100644 --- a/arch/arm/include/asm/kvm_arm.h +++ b/arch/arm/include/asm/kvm_arm.h @@ -218,4 +218,24 @@ #define HSR_DABT_CM (1U << 8) #define HSR_DABT_EA (1U << 9) +#define kvm_arm_exception_type \ + {0, "RESET" }, \ + {1, "UNDEFINED" }, \ + {2, "SOFTWARE" }, \ + {3, "PREF_ABORT" }, \ + {4, "DATA_ABORT" }, \ + {5, "IRQ" }, \ + {6, "FIQ" }, \ + {7, "HVC" } + +#define HSRECN(x) { HSR_EC_##x, #x } + +#define kvm_arm_exception_class \ + HSRECN(UNKNOWN), HSRECN(WFI), HSRECN(CP15_32), HSRECN(CP15_64), \ + HSRECN(CP14_MR), HSRECN(CP14_LS), HSRECN(CP_0_13), HSRECN(CP10_ID), \ + HSRECN(JAZELLE), HSRECN(BXJ), HSRECN(CP14_64), HSRECN(SVC_HYP), \ + HSRECN(HVC), HSRECN(SMC), HSRECN(IABT), HSRECN(IABT_HYP), \ + HSRECN(DABT), HSRECN(DABT_HYP) + + #endif /* __ARM_KVM_ARM_H__ */ diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index c4072d9f32c7..6692982c9b57 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -126,7 +126,10 @@ struct kvm_vcpu_arch { * here. */ - /* Don't run the guest on this vcpu */ + /* vcpu power-off state */ + bool power_off; + + /* Don't run the guest (internal implementation need) */ bool pause; /* IO related fields */ diff --git a/arch/arm/include/asm/mach/arch.h b/arch/arm/include/asm/mach/arch.h index cb3a40717edd..5c1ad11aa392 100644 --- a/arch/arm/include/asm/mach/arch.h +++ b/arch/arm/include/asm/mach/arch.h @@ -47,7 +47,7 @@ struct machine_desc { unsigned l2c_aux_val; /* L2 cache aux value */ unsigned l2c_aux_mask; /* L2 cache aux mask */ void (*l2c_write_sec)(unsigned long, unsigned); - struct smp_operations *smp; /* SMP operations */ + const struct smp_operations *smp; /* SMP operations */ bool (*smp_init)(void); void (*fixup)(struct tag *, char **); void (*dt_fixup)(void); diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h index 98d58bb04ac5..c79b57bf71c4 100644 --- a/arch/arm/include/asm/memory.h +++ b/arch/arm/include/asm/memory.h @@ -76,10 +76,12 @@ */ #define XIP_VIRT_ADDR(physaddr) (MODULES_VADDR + ((physaddr) & 0x000fffff)) +#if !defined(CONFIG_SMP) && !defined(CONFIG_ARM_LPAE) /* * Allow 16MB-aligned ioremap pages */ #define IOREMAP_MAX_ORDER 24 +#endif #else /* CONFIG_MMU */ diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index f40354198bad..348caabb7625 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -43,7 +43,7 @@ */ #define VMALLOC_OFFSET (8*1024*1024) #define VMALLOC_START (((unsigned long)high_memory + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1)) -#define VMALLOC_END 0xff000000UL +#define VMALLOC_END 0xff800000UL #define LIBRARY_TEXT_START 0x0c000000 diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h index ef356659b4f4..3d6dc8b460e4 100644 --- a/arch/arm/include/asm/smp.h +++ b/arch/arm/include/asm/smp.h @@ -112,7 +112,7 @@ struct smp_operations { struct of_cpu_method { const char *method; - struct smp_operations *ops; + const struct smp_operations *ops; }; #define CPU_METHOD_OF_DECLARE(name, _method, _ops) \ @@ -122,6 +122,6 @@ struct of_cpu_method { /* * set platform specific SMP operations */ -extern void smp_set_ops(struct smp_operations *); +extern void smp_set_ops(const struct smp_operations *); #endif /* ifndef __ASM_ARM_SMP_H */ diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h index 7cba573c2cc9..7b84657fba35 100644 --- a/arch/arm/include/asm/unistd.h +++ b/arch/arm/include/asm/unistd.h @@ -21,13 +21,6 @@ */ #define __NR_syscalls (392) -/* - * *NOTE*: This is a ghost syscall private to the kernel. Only the - * __kuser_cmpxchg code in entry-armv.S should be aware of its - * existence. Don't ever use this from user code. - */ -#define __ARM_NR_cmpxchg (__ARM_NR_BASE+0x00fff0) - #define __ARCH_WANT_STAT64 #define __ARCH_WANT_SYS_GETHOSTNAME #define __ARCH_WANT_SYS_PAUSE diff --git a/arch/arm/include/asm/xen/hypervisor.h b/arch/arm/include/asm/xen/hypervisor.h index 04ff8e7b37df..95251512e2c4 100644 --- a/arch/arm/include/asm/xen/hypervisor.h +++ b/arch/arm/include/asm/xen/hypervisor.h @@ -26,4 +26,14 @@ void __init xen_early_init(void); static inline void xen_early_init(void) { return; } #endif +#ifdef CONFIG_HOTPLUG_CPU +static inline void xen_arch_register_cpu(int num) +{ +} + +static inline void xen_arch_unregister_cpu(int num) +{ +} +#endif + #endif /* _ASM_ARM_XEN_HYPERVISOR_H */ diff --git a/arch/arm/include/asm/xen/page-coherent.h b/arch/arm/include/asm/xen/page-coherent.h index efd562412850..0375c8caa061 100644 --- a/arch/arm/include/asm/xen/page-coherent.h +++ b/arch/arm/include/asm/xen/page-coherent.h @@ -35,11 +35,15 @@ static inline void xen_dma_map_page(struct device *hwdev, struct page *page, dma_addr_t dev_addr, unsigned long offset, size_t size, enum dma_data_direction dir, struct dma_attrs *attrs) { - bool local = PFN_DOWN(dev_addr) == page_to_pfn(page); - /* Dom0 is mapped 1:1, so if pfn == mfn the page is local otherwise - * is a foreign page grant-mapped in dom0. If the page is local we - * can safely call the native dma_ops function, otherwise we call - * the xen specific function. */ + bool local = XEN_PFN_DOWN(dev_addr) == page_to_xen_pfn(page); + /* + * Dom0 is mapped 1:1, while the Linux page can be spanned accross + * multiple Xen page, it's not possible to have a mix of local and + * foreign Xen page. So if the first xen_pfn == mfn the page is local + * otherwise it's a foreign page grant-mapped in dom0. If the page is + * local we can safely call the native dma_ops function, otherwise we + * call the xen specific function. + */ if (local) __generic_dma_ops(hwdev)->map_page(hwdev, page, offset, size, dir, attrs); else @@ -51,10 +55,14 @@ static inline void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle, struct dma_attrs *attrs) { unsigned long pfn = PFN_DOWN(handle); - /* Dom0 is mapped 1:1, so calling pfn_valid on a foreign mfn will - * always return false. If the page is local we can safely call the - * native dma_ops function, otherwise we call the xen specific - * function. */ + /* + * Dom0 is mapped 1:1, while the Linux page can be spanned accross + * multiple Xen page, it's not possible to have a mix of local and + * foreign Xen page. Dom0 is mapped 1:1, so calling pfn_valid on a + * foreign mfn will always return false. If the page is local we can + * safely call the native dma_ops function, otherwise we call the xen + * specific function. + */ if (pfn_valid(pfn)) { if (__generic_dma_ops(hwdev)->unmap_page) __generic_dma_ops(hwdev)->unmap_page(hwdev, handle, size, dir, attrs); diff --git a/arch/arm/include/asm/xen/page.h b/arch/arm/include/asm/xen/page.h index 127956353b00..415dbc6e43fd 100644 --- a/arch/arm/include/asm/xen/page.h +++ b/arch/arm/include/asm/xen/page.h @@ -13,9 +13,6 @@ #define phys_to_machine_mapping_valid(pfn) (1) -#define pte_mfn pte_pfn -#define mfn_pte pfn_pte - /* Xen machine address */ typedef struct xmaddr { phys_addr_t maddr; @@ -31,6 +28,17 @@ typedef struct xpaddr { #define INVALID_P2M_ENTRY (~0UL) +/* + * The pseudo-physical frame (pfn) used in all the helpers is always based + * on Xen page granularity (i.e 4KB). + * + * A Linux page may be split across multiple non-contiguous Xen page so we + * have to keep track with frame based on 4KB page granularity. + * + * PV drivers should never make a direct usage of those helpers (particularly + * pfn_to_gfn and gfn_to_pfn). + */ + unsigned long __pfn_to_mfn(unsigned long pfn); extern struct rb_root phys_to_mach; @@ -67,8 +75,8 @@ static inline unsigned long bfn_to_pfn(unsigned long bfn) #define bfn_to_local_pfn(bfn) bfn_to_pfn(bfn) /* VIRT <-> GUEST conversion */ -#define virt_to_gfn(v) (pfn_to_gfn(virt_to_pfn(v))) -#define gfn_to_virt(m) (__va(gfn_to_pfn(m) << PAGE_SHIFT)) +#define virt_to_gfn(v) (pfn_to_gfn(virt_to_phys(v) >> XEN_PAGE_SHIFT)) +#define gfn_to_virt(m) (__va(gfn_to_pfn(m) << XEN_PAGE_SHIFT)) /* Only used in PV code. But ARM guests are always HVM. */ static inline xmaddr_t arbitrary_virt_to_machine(void *vaddr) @@ -107,8 +115,8 @@ static inline bool set_phys_to_machine(unsigned long pfn, unsigned long mfn) #define xen_unmap(cookie) iounmap((cookie)) bool xen_arch_need_swiotlb(struct device *dev, - unsigned long pfn, - unsigned long bfn); + phys_addr_t phys, + dma_addr_t dev_addr); unsigned long xen_get_swiotlb_free_pages(unsigned int order); #endif /* _ASM_ARM_XEN_PAGE_H */ diff --git a/arch/arm/kernel/devtree.c b/arch/arm/kernel/devtree.c index 11c54de9f8cf..65addcbf5b30 100644 --- a/arch/arm/kernel/devtree.c +++ b/arch/arm/kernel/devtree.c @@ -101,6 +101,7 @@ void __init arm_dt_init_cpu_maps(void) if (of_property_read_u32(cpu, "reg", &hwid)) { pr_debug(" * %s missing reg property\n", cpu->full_name); + of_node_put(cpu); return; } @@ -108,8 +109,10 @@ void __init arm_dt_init_cpu_maps(void) * 8 MSBs must be set to 0 in the DT since the reg property * defines the MPIDR[23:0]. */ - if (hwid & ~MPIDR_HWID_BITMASK) + if (hwid & ~MPIDR_HWID_BITMASK) { + of_node_put(cpu); return; + } /* * Duplicate MPIDRs are a recipe for disaster. @@ -119,9 +122,11 @@ void __init arm_dt_init_cpu_maps(void) * to avoid matching valid MPIDR[23:0] values. */ for (j = 0; j < cpuidx; j++) - if (WARN(tmp_map[j] == hwid, "Duplicate /cpu reg " - "properties in the DT\n")) + if (WARN(tmp_map[j] == hwid, + "Duplicate /cpu reg properties in the DT\n")) { + of_node_put(cpu); return; + } /* * Build a stashed array of MPIDR values. Numbering scheme @@ -143,6 +148,7 @@ void __init arm_dt_init_cpu_maps(void) "max cores %u, capping them\n", cpuidx, nr_cpu_ids)) { cpuidx = nr_cpu_ids; + of_node_put(cpu); break; } diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 3e1c26eb32b4..3ce377f7251f 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -427,8 +427,7 @@ ENDPROC(__fiq_abt) .endm .macro kuser_cmpxchg_check -#if !defined(CONFIG_CPU_32v6K) && defined(CONFIG_KUSER_HELPERS) && \ - !defined(CONFIG_NEEDS_SYSCALL_FOR_CMPXCHG) +#if !defined(CONFIG_CPU_32v6K) && defined(CONFIG_KUSER_HELPERS) #ifndef CONFIG_MMU #warning "NPTL on non MMU needs fixing" #else @@ -859,20 +858,7 @@ __kuser_helper_start: __kuser_cmpxchg64: @ 0xffff0f60 -#if defined(CONFIG_NEEDS_SYSCALL_FOR_CMPXCHG) - - /* - * Poor you. No fast solution possible... - * The kernel itself must perform the operation. - * A special ghost syscall is used for that (see traps.c). - */ - stmfd sp!, {r7, lr} - ldr r7, 1f @ it's 20 bits - swi __ARM_NR_cmpxchg64 - ldmfd sp!, {r7, pc} -1: .word __ARM_NR_cmpxchg64 - -#elif defined(CONFIG_CPU_32v6K) +#if defined(CONFIG_CPU_32v6K) stmfd sp!, {r4, r5, r6, r7} ldrd r4, r5, [r0] @ load old val @@ -948,20 +934,7 @@ __kuser_memory_barrier: @ 0xffff0fa0 __kuser_cmpxchg: @ 0xffff0fc0 -#if defined(CONFIG_NEEDS_SYSCALL_FOR_CMPXCHG) - - /* - * Poor you. No fast solution possible... - * The kernel itself must perform the operation. - * A special ghost syscall is used for that (see traps.c). - */ - stmfd sp!, {r7, lr} - ldr r7, 1f @ it's 20 bits - swi __ARM_NR_cmpxchg - ldmfd sp!, {r7, pc} -1: .word __ARM_NR_cmpxchg - -#elif __LINUX_ARM_ARCH__ < 6 +#if __LINUX_ARM_ARCH__ < 6 #ifdef CONFIG_MMU diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index dc7d0a95bd36..6284779d64ee 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -35,7 +35,6 @@ #include <asm/cputype.h> #include <asm/current.h> #include <asm/hw_breakpoint.h> -#include <asm/kdebug.h> #include <asm/traps.h> /* Breakpoint currently in use for each BRP. */ diff --git a/arch/arm/kernel/kgdb.c b/arch/arm/kernel/kgdb.c index fd9eefce0a7b..9232caee7060 100644 --- a/arch/arm/kernel/kgdb.c +++ b/arch/arm/kernel/kgdb.c @@ -74,7 +74,7 @@ int dbg_set_reg(int regno, void *mem, struct pt_regs *regs) void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *task) { - struct pt_regs *thread_regs; + struct thread_info *ti; int regno; /* Just making sure... */ @@ -86,24 +86,17 @@ sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *task) gdb_regs[regno] = 0; /* Otherwise, we have only some registers from switch_to() */ - thread_regs = task_pt_regs(task); - gdb_regs[_R0] = thread_regs->ARM_r0; - gdb_regs[_R1] = thread_regs->ARM_r1; - gdb_regs[_R2] = thread_regs->ARM_r2; - gdb_regs[_R3] = thread_regs->ARM_r3; - gdb_regs[_R4] = thread_regs->ARM_r4; - gdb_regs[_R5] = thread_regs->ARM_r5; - gdb_regs[_R6] = thread_regs->ARM_r6; - gdb_regs[_R7] = thread_regs->ARM_r7; - gdb_regs[_R8] = thread_regs->ARM_r8; - gdb_regs[_R9] = thread_regs->ARM_r9; - gdb_regs[_R10] = thread_regs->ARM_r10; - gdb_regs[_FP] = thread_regs->ARM_fp; - gdb_regs[_IP] = thread_regs->ARM_ip; - gdb_regs[_SPT] = thread_regs->ARM_sp; - gdb_regs[_LR] = thread_regs->ARM_lr; - gdb_regs[_PC] = thread_regs->ARM_pc; - gdb_regs[_CPSR] = thread_regs->ARM_cpsr; + ti = task_thread_info(task); + gdb_regs[_R4] = ti->cpu_context.r4; + gdb_regs[_R5] = ti->cpu_context.r5; + gdb_regs[_R6] = ti->cpu_context.r6; + gdb_regs[_R7] = ti->cpu_context.r7; + gdb_regs[_R8] = ti->cpu_context.r8; + gdb_regs[_R9] = ti->cpu_context.r9; + gdb_regs[_R10] = ti->cpu_context.sl; + gdb_regs[_FP] = ti->cpu_context.fp; + gdb_regs[_SPT] = ti->cpu_context.sp; + gdb_regs[_PC] = ti->cpu_context.pc; } void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc) diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 48185a773852..b26361355dae 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -80,7 +80,7 @@ static DECLARE_COMPLETION(cpu_running); static struct smp_operations smp_ops; -void __init smp_set_ops(struct smp_operations *ops) +void __init smp_set_ops(const struct smp_operations *ops) { if (ops) smp_ops = *ops; @@ -400,6 +400,7 @@ asmlinkage void secondary_start_kernel(void) local_irq_enable(); local_fiq_enable(); + local_abt_enable(); /* * OK, it's off to the idle thread for us @@ -748,6 +749,15 @@ core_initcall(register_cpufreq_notifier); static void raise_nmi(cpumask_t *mask) { + /* + * Generate the backtrace directly if we are running in a calling + * context that is not preemptible by the backtrace IPI. Note + * that nmi_cpu_backtrace() automatically removes the current cpu + * from mask. + */ + if (cpumask_test_cpu(smp_processor_id(), mask) && irqs_disabled()) + nmi_cpu_backtrace(NULL); + smp_cross_call(mask, IPI_CPU_BACKTRACE); } diff --git a/arch/arm/kernel/smp_twd.c b/arch/arm/kernel/smp_twd.c index e9035cda1485..1bfa7a7f5533 100644 --- a/arch/arm/kernel/smp_twd.c +++ b/arch/arm/kernel/smp_twd.c @@ -23,7 +23,6 @@ #include <linux/of_irq.h> #include <linux/of_address.h> -#include <asm/smp_plat.h> #include <asm/smp_twd.h> /* set up by the platform code */ @@ -34,6 +33,8 @@ static unsigned long twd_timer_rate; static DEFINE_PER_CPU(bool, percpu_setup_called); static struct clock_event_device __percpu *twd_evt; +static unsigned int twd_features = + CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT; static int twd_ppi; static int twd_shutdown(struct clock_event_device *clk) @@ -294,8 +295,7 @@ static void twd_timer_setup(void) writel_relaxed(0, twd_base + TWD_TIMER_CONTROL); clk->name = "local_timer"; - clk->features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT | - CLOCK_EVT_FEAT_C3STOP; + clk->features = twd_features; clk->rating = 350; clk->set_state_shutdown = twd_shutdown; clk->set_state_periodic = twd_set_periodic; @@ -350,6 +350,8 @@ static int __init twd_local_timer_common_register(struct device_node *np) goto out_irq; twd_get_clock(np); + if (!of_property_read_bool(np, "always-on")) + twd_features |= CLOCK_EVT_FEAT_C3STOP; /* * Immediately configure the timer on the boot CPU, unless we need @@ -392,9 +394,6 @@ static void __init twd_local_timer_of_register(struct device_node *np) { int err; - if (!is_smp() || !setup_max_cpus) - return; - twd_ppi = irq_of_parse_and_map(np, 0); if (!twd_ppi) { err = -EINVAL; diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c index a66e37e211a9..97b22fa7cb3a 100644 --- a/arch/arm/kernel/time.c +++ b/arch/arm/kernel/time.c @@ -120,6 +120,6 @@ void __init time_init(void) #ifdef CONFIG_COMMON_CLK of_clk_init(NULL); #endif - clocksource_of_init(); + clocksource_probe(); } } diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 969f9d9e665f..bc698383e822 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -625,58 +625,6 @@ asmlinkage int arm_syscall(int no, struct pt_regs *regs) set_tls(regs->ARM_r0); return 0; -#ifdef CONFIG_NEEDS_SYSCALL_FOR_CMPXCHG - /* - * Atomically store r1 in *r2 if *r2 is equal to r0 for user space. - * Return zero in r0 if *MEM was changed or non-zero if no exchange - * happened. Also set the user C flag accordingly. - * If access permissions have to be fixed up then non-zero is - * returned and the operation has to be re-attempted. - * - * *NOTE*: This is a ghost syscall private to the kernel. Only the - * __kuser_cmpxchg code in entry-armv.S should be aware of its - * existence. Don't ever use this from user code. - */ - case NR(cmpxchg): - for (;;) { - extern void do_DataAbort(unsigned long addr, unsigned int fsr, - struct pt_regs *regs); - unsigned long val; - unsigned long addr = regs->ARM_r2; - struct mm_struct *mm = current->mm; - pgd_t *pgd; pmd_t *pmd; pte_t *pte; - spinlock_t *ptl; - - regs->ARM_cpsr &= ~PSR_C_BIT; - down_read(&mm->mmap_sem); - pgd = pgd_offset(mm, addr); - if (!pgd_present(*pgd)) - goto bad_access; - pmd = pmd_offset(pgd, addr); - if (!pmd_present(*pmd)) - goto bad_access; - pte = pte_offset_map_lock(mm, pmd, addr, &ptl); - if (!pte_present(*pte) || !pte_write(*pte) || !pte_dirty(*pte)) { - pte_unmap_unlock(pte, ptl); - goto bad_access; - } - val = *(unsigned long *)addr; - val -= regs->ARM_r0; - if (val == 0) { - *(unsigned long *)addr = regs->ARM_r1; - regs->ARM_cpsr |= PSR_C_BIT; - } - pte_unmap_unlock(pte, ptl); - up_read(&mm->mmap_sem); - return val; - - bad_access: - up_read(&mm->mmap_sem); - /* simulate a write access fault */ - do_DataAbort(addr, 15 + (1 << 11), regs); - } -#endif - default: /* Calls 9f00xx..9f07ff are defined to return -ENOSYS if not implemented, rather than raising SIGILL. This diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig index 210eccadb69a..95a000515e43 100644 --- a/arch/arm/kvm/Kconfig +++ b/arch/arm/kvm/Kconfig @@ -21,6 +21,7 @@ config KVM depends on MMU && OF select PREEMPT_NOTIFIERS select ANON_INODES + select ARM_GIC select HAVE_KVM_CPU_RELAX_INTERCEPT select HAVE_KVM_ARCH_TLB_FLUSH_ALL select KVM_MMIO @@ -45,4 +46,6 @@ config KVM_ARM_HOST ---help--- Provides host support for ARM processors. +source drivers/vhost/Kconfig + endif # VIRTUALIZATION diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index dc017adfddc8..eab83b2435b8 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -271,6 +271,16 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) return kvm_timer_should_fire(vcpu); } +void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) +{ + kvm_timer_schedule(vcpu); +} + +void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) +{ + kvm_timer_unschedule(vcpu); +} + int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) { /* Force users to call KVM_ARM_VCPU_INIT */ @@ -308,7 +318,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { - if (vcpu->arch.pause) + if (vcpu->arch.power_off) mp_state->mp_state = KVM_MP_STATE_STOPPED; else mp_state->mp_state = KVM_MP_STATE_RUNNABLE; @@ -321,10 +331,10 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, { switch (mp_state->mp_state) { case KVM_MP_STATE_RUNNABLE: - vcpu->arch.pause = false; + vcpu->arch.power_off = false; break; case KVM_MP_STATE_STOPPED: - vcpu->arch.pause = true; + vcpu->arch.power_off = true; break; default: return -EINVAL; @@ -342,7 +352,8 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, */ int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) { - return !!v->arch.irq_lines || kvm_vgic_vcpu_pending_irq(v); + return ((!!v->arch.irq_lines || kvm_vgic_vcpu_pending_irq(v)) + && !v->arch.power_off && !v->arch.pause); } /* Just ensure a guest exit from a particular CPU */ @@ -468,11 +479,38 @@ bool kvm_arch_intc_initialized(struct kvm *kvm) return vgic_initialized(kvm); } -static void vcpu_pause(struct kvm_vcpu *vcpu) +static void kvm_arm_halt_guest(struct kvm *kvm) __maybe_unused; +static void kvm_arm_resume_guest(struct kvm *kvm) __maybe_unused; + +static void kvm_arm_halt_guest(struct kvm *kvm) +{ + int i; + struct kvm_vcpu *vcpu; + + kvm_for_each_vcpu(i, vcpu, kvm) + vcpu->arch.pause = true; + force_vm_exit(cpu_all_mask); +} + +static void kvm_arm_resume_guest(struct kvm *kvm) +{ + int i; + struct kvm_vcpu *vcpu; + + kvm_for_each_vcpu(i, vcpu, kvm) { + wait_queue_head_t *wq = kvm_arch_vcpu_wq(vcpu); + + vcpu->arch.pause = false; + wake_up_interruptible(wq); + } +} + +static void vcpu_sleep(struct kvm_vcpu *vcpu) { wait_queue_head_t *wq = kvm_arch_vcpu_wq(vcpu); - wait_event_interruptible(*wq, !vcpu->arch.pause); + wait_event_interruptible(*wq, ((!vcpu->arch.power_off) && + (!vcpu->arch.pause))); } static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu) @@ -522,8 +560,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) update_vttbr(vcpu->kvm); - if (vcpu->arch.pause) - vcpu_pause(vcpu); + if (vcpu->arch.power_off || vcpu->arch.pause) + vcpu_sleep(vcpu); /* * Disarming the background timer must be done in a @@ -549,11 +587,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) run->exit_reason = KVM_EXIT_INTR; } - if (ret <= 0 || need_new_vmid_gen(vcpu->kvm)) { + if (ret <= 0 || need_new_vmid_gen(vcpu->kvm) || + vcpu->arch.power_off || vcpu->arch.pause) { local_irq_enable(); + kvm_timer_sync_hwstate(vcpu); kvm_vgic_sync_hwstate(vcpu); preempt_enable(); - kvm_timer_sync_hwstate(vcpu); continue; } @@ -596,14 +635,19 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) * guest time. */ kvm_guest_exit(); - trace_kvm_exit(kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu)); + trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu)); + + /* + * We must sync the timer state before the vgic state so that + * the vgic can properly sample the updated state of the + * interrupt line. + */ + kvm_timer_sync_hwstate(vcpu); kvm_vgic_sync_hwstate(vcpu); preempt_enable(); - kvm_timer_sync_hwstate(vcpu); - ret = handle_exit(vcpu, run, ret); } @@ -765,12 +809,12 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu, vcpu_reset_hcr(vcpu); /* - * Handle the "start in power-off" case by marking the VCPU as paused. + * Handle the "start in power-off" case. */ if (test_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features)) - vcpu->arch.pause = true; + vcpu->arch.power_off = true; else - vcpu->arch.pause = false; + vcpu->arch.power_off = false; return 0; } @@ -1080,7 +1124,7 @@ static int init_hyp_mode(void) */ err = kvm_timer_hyp_init(); if (err) - goto out_free_mappings; + goto out_free_context; #ifndef CONFIG_HOTPLUG_CPU free_boot_hyp_pgd(); diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c index ad6f6424f1d1..0b556968a6da 100644 --- a/arch/arm/kvm/psci.c +++ b/arch/arm/kvm/psci.c @@ -63,7 +63,7 @@ static unsigned long kvm_psci_vcpu_suspend(struct kvm_vcpu *vcpu) static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu) { - vcpu->arch.pause = true; + vcpu->arch.power_off = true; } static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) @@ -87,7 +87,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) */ if (!vcpu) return PSCI_RET_INVALID_PARAMS; - if (!vcpu->arch.pause) { + if (!vcpu->arch.power_off) { if (kvm_psci_version(source_vcpu) != KVM_ARM_PSCI_0_1) return PSCI_RET_ALREADY_ON; else @@ -115,7 +115,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) * the general puspose registers are undefined upon CPU_ON. */ *vcpu_reg(vcpu, 0) = context_id; - vcpu->arch.pause = false; + vcpu->arch.power_off = false; smp_mb(); /* Make sure the above is visible */ wq = kvm_arch_vcpu_wq(vcpu); @@ -153,7 +153,7 @@ static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu) mpidr = kvm_vcpu_get_mpidr_aff(tmp); if ((mpidr & target_affinity_mask) == target_affinity) { matching_cpus++; - if (!tmp->arch.pause) + if (!tmp->arch.power_off) return PSCI_0_2_AFFINITY_LEVEL_ON; } } @@ -179,7 +179,7 @@ static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type) * re-initialized. */ kvm_for_each_vcpu(i, tmp, vcpu->kvm) { - tmp->arch.pause = true; + tmp->arch.power_off = true; kvm_vcpu_kick(tmp); } diff --git a/arch/arm/kvm/trace.h b/arch/arm/kvm/trace.h index 0ec35392d208..c25a88598eb0 100644 --- a/arch/arm/kvm/trace.h +++ b/arch/arm/kvm/trace.h @@ -25,21 +25,25 @@ TRACE_EVENT(kvm_entry, ); TRACE_EVENT(kvm_exit, - TP_PROTO(unsigned int exit_reason, unsigned long vcpu_pc), - TP_ARGS(exit_reason, vcpu_pc), + TP_PROTO(int idx, unsigned int exit_reason, unsigned long vcpu_pc), + TP_ARGS(idx, exit_reason, vcpu_pc), TP_STRUCT__entry( + __field( int, idx ) __field( unsigned int, exit_reason ) __field( unsigned long, vcpu_pc ) ), TP_fast_assign( + __entry->idx = idx; __entry->exit_reason = exit_reason; __entry->vcpu_pc = vcpu_pc; ), - TP_printk("HSR_EC: 0x%04x, PC: 0x%08lx", + TP_printk("%s: HSR_EC: 0x%04x (%s), PC: 0x%08lx", + __print_symbolic(__entry->idx, kvm_arm_exception_type), __entry->exit_reason, + __print_symbolic(__entry->exit_reason, kvm_arm_exception_class), __entry->vcpu_pc) ); diff --git a/arch/arm/lib/clear_user.S b/arch/arm/lib/clear_user.S index 970d6c043774..e936352ccb00 100644 --- a/arch/arm/lib/clear_user.S +++ b/arch/arm/lib/clear_user.S @@ -9,6 +9,7 @@ */ #include <linux/linkage.h> #include <asm/assembler.h> +#include <asm/unwind.h> .text @@ -20,6 +21,8 @@ */ ENTRY(__clear_user_std) WEAK(arm_clear_user) +UNWIND(.fnstart) +UNWIND(.save {r1, lr}) stmfd sp!, {r1, lr} mov r2, #0 cmp r1, #4 @@ -44,6 +47,7 @@ WEAK(arm_clear_user) USER( strnebt r2, [r0]) mov r0, #0 ldmfd sp!, {r1, pc} +UNWIND(.fnend) ENDPROC(arm_clear_user) ENDPROC(__clear_user_std) diff --git a/arch/arm/mach-at91/Kconfig b/arch/arm/mach-at91/Kconfig index 89a755b90db2..92673006e55c 100644 --- a/arch/arm/mach-at91/Kconfig +++ b/arch/arm/mach-at91/Kconfig @@ -102,6 +102,9 @@ config HAVE_AT91_SMD config HAVE_AT91_H32MX bool +config HAVE_AT91_GENERATED_CLK + bool + config SOC_SAM_V4_V5 bool diff --git a/arch/arm/mach-bcm/Kconfig b/arch/arm/mach-bcm/Kconfig index 1319c3c14327..84bd26535ae9 100644 --- a/arch/arm/mach-bcm/Kconfig +++ b/arch/arm/mach-bcm/Kconfig @@ -14,7 +14,7 @@ config ARCH_BCM_IPROC select HAVE_ARM_SCU if SMP select HAVE_ARM_TWD if SMP select ARM_GLOBAL_TIMER - + select COMMON_CLK_IPROC select CLKSRC_MMIO select ARCH_REQUIRE_GPIOLIB select ARM_AMBA diff --git a/arch/arm/mach-exynos/mcpm-exynos.c b/arch/arm/mach-exynos/mcpm-exynos.c index 9bdf54795f05..56978199c479 100644 --- a/arch/arm/mach-exynos/mcpm-exynos.c +++ b/arch/arm/mach-exynos/mcpm-exynos.c @@ -20,6 +20,7 @@ #include <asm/cputype.h> #include <asm/cp15.h> #include <asm/mcpm.h> +#include <asm/smp_plat.h> #include "regs-pmu.h" #include "common.h" @@ -70,7 +71,31 @@ static int exynos_cpu_powerup(unsigned int cpu, unsigned int cluster) cluster >= EXYNOS5420_NR_CLUSTERS) return -EINVAL; - exynos_cpu_power_up(cpunr); + if (!exynos_cpu_power_state(cpunr)) { + exynos_cpu_power_up(cpunr); + + /* + * This assumes the cluster number of the big cores(Cortex A15) + * is 0 and the Little cores(Cortex A7) is 1. + * When the system was booted from the Little core, + * they should be reset during power up cpu. + */ + if (cluster && + cluster == MPIDR_AFFINITY_LEVEL(cpu_logical_map(0), 1)) { + /* + * Before we reset the Little cores, we should wait + * the SPARE2 register is set to 1 because the init + * codes of the iROM will set the register after + * initialization. + */ + while (!pmu_raw_readl(S5P_PMU_SPARE2)) + udelay(10); + + pmu_raw_writel(EXYNOS5420_KFC_CORE_RESET(cpu), + EXYNOS_SWRESET); + } + } + return 0; } diff --git a/arch/arm/mach-exynos/pm_domains.c b/arch/arm/mach-exynos/pm_domains.c index 4a87e86dec45..7c21760f590f 100644 --- a/arch/arm/mach-exynos/pm_domains.c +++ b/arch/arm/mach-exynos/pm_domains.c @@ -200,15 +200,15 @@ no_clk: args.args_count = 0; child_domain = of_genpd_get_from_provider(&args); if (IS_ERR(child_domain)) - goto next_pd; + continue; if (of_parse_phandle_with_args(np, "power-domains", "#power-domain-cells", 0, &args) != 0) - goto next_pd; + continue; parent_domain = of_genpd_get_from_provider(&args); if (IS_ERR(parent_domain)) - goto next_pd; + continue; if (pm_genpd_add_subdomain(parent_domain, child_domain)) pr_warn("%s failed to add subdomain: %s\n", @@ -216,8 +216,6 @@ no_clk: else pr_info("%s has as child subdomain: %s.\n", parent_domain->name, child_domain->name); -next_pd: - of_node_put(np); } return 0; diff --git a/arch/arm/mach-exynos/regs-pmu.h b/arch/arm/mach-exynos/regs-pmu.h index b7614333d296..fba9068ed260 100644 --- a/arch/arm/mach-exynos/regs-pmu.h +++ b/arch/arm/mach-exynos/regs-pmu.h @@ -513,6 +513,12 @@ static inline unsigned int exynos_pmu_cpunr(unsigned int mpidr) #define SPREAD_ENABLE 0xF #define SPREAD_USE_STANDWFI 0xF +#define EXYNOS5420_KFC_CORE_RESET0 BIT(8) +#define EXYNOS5420_KFC_ETM_RESET0 BIT(20) + +#define EXYNOS5420_KFC_CORE_RESET(_nr) \ + ((EXYNOS5420_KFC_CORE_RESET0 | EXYNOS5420_KFC_ETM_RESET0) << (_nr)) + #define EXYNOS5420_BB_CON1 0x0784 #define EXYNOS5420_BB_SEL_EN BIT(31) #define EXYNOS5420_BB_PMOS_EN BIT(7) diff --git a/arch/arm/mach-exynos/suspend.c b/arch/arm/mach-exynos/suspend.c index e00eb39453a4..5a7e47ceec91 100644 --- a/arch/arm/mach-exynos/suspend.c +++ b/arch/arm/mach-exynos/suspend.c @@ -177,54 +177,57 @@ static struct irq_chip exynos_pmu_chip = { #endif }; -static int exynos_pmu_domain_xlate(struct irq_domain *domain, - struct device_node *controller, - const u32 *intspec, - unsigned int intsize, - unsigned long *out_hwirq, - unsigned int *out_type) +static int exynos_pmu_domain_translate(struct irq_domain *d, + struct irq_fwspec *fwspec, + unsigned long *hwirq, + unsigned int *type) { - if (domain->of_node != controller) - return -EINVAL; /* Shouldn't happen, really... */ - if (intsize != 3) - return -EINVAL; /* Not GIC compliant */ - if (intspec[0] != 0) - return -EINVAL; /* No PPI should point to this domain */ + if (is_of_node(fwspec->fwnode)) { + if (fwspec->param_count != 3) + return -EINVAL; - *out_hwirq = intspec[1]; - *out_type = intspec[2]; - return 0; + /* No PPI should point to this domain */ + if (fwspec->param[0] != 0) + return -EINVAL; + + *hwirq = fwspec->param[1]; + *type = fwspec->param[2]; + return 0; + } + + return -EINVAL; } static int exynos_pmu_domain_alloc(struct irq_domain *domain, unsigned int virq, unsigned int nr_irqs, void *data) { - struct of_phandle_args *args = data; - struct of_phandle_args parent_args; + struct irq_fwspec *fwspec = data; + struct irq_fwspec parent_fwspec; irq_hw_number_t hwirq; int i; - if (args->args_count != 3) + if (fwspec->param_count != 3) return -EINVAL; /* Not GIC compliant */ - if (args->args[0] != 0) + if (fwspec->param[0] != 0) return -EINVAL; /* No PPI should point to this domain */ - hwirq = args->args[1]; + hwirq = fwspec->param[1]; for (i = 0; i < nr_irqs; i++) irq_domain_set_hwirq_and_chip(domain, virq + i, hwirq + i, &exynos_pmu_chip, NULL); - parent_args = *args; - parent_args.np = domain->parent->of_node; - return irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, &parent_args); + parent_fwspec = *fwspec; + parent_fwspec.fwnode = domain->parent->fwnode; + return irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, + &parent_fwspec); } static const struct irq_domain_ops exynos_pmu_domain_ops = { - .xlate = exynos_pmu_domain_xlate, - .alloc = exynos_pmu_domain_alloc, - .free = irq_domain_free_irqs_common, + .translate = exynos_pmu_domain_translate, + .alloc = exynos_pmu_domain_alloc, + .free = irq_domain_free_irqs_common, }; static int __init exynos_pmu_irq_init(struct device_node *node, diff --git a/arch/arm/mach-gemini/board-nas4220b.c b/arch/arm/mach-gemini/board-nas4220b.c index ca8a25bb3521..18b12796acf9 100644 --- a/arch/arm/mach-gemini/board-nas4220b.c +++ b/arch/arm/mach-gemini/board-nas4220b.c @@ -18,7 +18,6 @@ #include <linux/leds.h> #include <linux/input.h> #include <linux/gpio_keys.h> -#include <linux/mdio-gpio.h> #include <linux/io.h> #include <asm/setup.h> diff --git a/arch/arm/mach-gemini/board-wbd111.c b/arch/arm/mach-gemini/board-wbd111.c index 418188cd1712..14c56f3f0ec2 100644 --- a/arch/arm/mach-gemini/board-wbd111.c +++ b/arch/arm/mach-gemini/board-wbd111.c @@ -15,7 +15,6 @@ #include <linux/input.h> #include <linux/skbuff.h> #include <linux/gpio_keys.h> -#include <linux/mdio-gpio.h> #include <linux/mtd/mtd.h> #include <linux/mtd/partitions.h> #include <asm/mach-types.h> diff --git a/arch/arm/mach-gemini/board-wbd222.c b/arch/arm/mach-gemini/board-wbd222.c index 266b265090cd..6070282ce243 100644 --- a/arch/arm/mach-gemini/board-wbd222.c +++ b/arch/arm/mach-gemini/board-wbd222.c @@ -15,7 +15,6 @@ #include <linux/input.h> #include <linux/skbuff.h> #include <linux/gpio_keys.h> -#include <linux/mdio-gpio.h> #include <linux/mtd/mtd.h> #include <linux/mtd/partitions.h> #include <asm/mach-types.h> diff --git a/arch/arm/mach-imx/gpc.c b/arch/arm/mach-imx/gpc.c index 8c4467fad837..10bf7159b27d 100644 --- a/arch/arm/mach-imx/gpc.c +++ b/arch/arm/mach-imx/gpc.c @@ -181,40 +181,42 @@ static struct irq_chip imx_gpc_chip = { #endif }; -static int imx_gpc_domain_xlate(struct irq_domain *domain, - struct device_node *controller, - const u32 *intspec, - unsigned int intsize, - unsigned long *out_hwirq, - unsigned int *out_type) +static int imx_gpc_domain_translate(struct irq_domain *d, + struct irq_fwspec *fwspec, + unsigned long *hwirq, + unsigned int *type) { - if (domain->of_node != controller) - return -EINVAL; /* Shouldn't happen, really... */ - if (intsize != 3) - return -EINVAL; /* Not GIC compliant */ - if (intspec[0] != 0) - return -EINVAL; /* No PPI should point to this domain */ + if (is_of_node(fwspec->fwnode)) { + if (fwspec->param_count != 3) + return -EINVAL; - *out_hwirq = intspec[1]; - *out_type = intspec[2]; - return 0; + /* No PPI should point to this domain */ + if (fwspec->param[0] != 0) + return -EINVAL; + + *hwirq = fwspec->param[1]; + *type = fwspec->param[2]; + return 0; + } + + return -EINVAL; } static int imx_gpc_domain_alloc(struct irq_domain *domain, unsigned int irq, unsigned int nr_irqs, void *data) { - struct of_phandle_args *args = data; - struct of_phandle_args parent_args; + struct irq_fwspec *fwspec = data; + struct irq_fwspec parent_fwspec; irq_hw_number_t hwirq; int i; - if (args->args_count != 3) + if (fwspec->param_count != 3) return -EINVAL; /* Not GIC compliant */ - if (args->args[0] != 0) + if (fwspec->param[0] != 0) return -EINVAL; /* No PPI should point to this domain */ - hwirq = args->args[1]; + hwirq = fwspec->param[1]; if (hwirq >= GPC_MAX_IRQS) return -EINVAL; /* Can't deal with this */ @@ -222,15 +224,16 @@ static int imx_gpc_domain_alloc(struct irq_domain *domain, irq_domain_set_hwirq_and_chip(domain, irq + i, hwirq + i, &imx_gpc_chip, NULL); - parent_args = *args; - parent_args.np = domain->parent->of_node; - return irq_domain_alloc_irqs_parent(domain, irq, nr_irqs, &parent_args); + parent_fwspec = *fwspec; + parent_fwspec.fwnode = domain->parent->fwnode; + return irq_domain_alloc_irqs_parent(domain, irq, nr_irqs, + &parent_fwspec); } static const struct irq_domain_ops imx_gpc_domain_ops = { - .xlate = imx_gpc_domain_xlate, - .alloc = imx_gpc_domain_alloc, - .free = irq_domain_free_irqs_common, + .translate = imx_gpc_domain_translate, + .alloc = imx_gpc_domain_alloc, + .free = irq_domain_free_irqs_common, }; static int __init imx_gpc_init(struct device_node *node, diff --git a/arch/arm/mach-imx/mach-imx6q.c b/arch/arm/mach-imx/mach-imx6q.c index 9602cc12d2f1..3878494bd118 100644 --- a/arch/arm/mach-imx/mach-imx6q.c +++ b/arch/arm/mach-imx/mach-imx6q.c @@ -350,7 +350,7 @@ static void __init imx6q_opp_init(void) return; } - if (of_init_opp_table(cpu_dev)) { + if (dev_pm_opp_of_add_table(cpu_dev)) { pr_warn("failed to init OPP table\n"); goto put_node; } diff --git a/arch/arm/mach-mvebu/Kconfig b/arch/arm/mach-mvebu/Kconfig index c86a5a0aefac..e20fc4178b15 100644 --- a/arch/arm/mach-mvebu/Kconfig +++ b/arch/arm/mach-mvebu/Kconfig @@ -117,11 +117,4 @@ config MACH_KIRKWOOD Say 'Y' here if you want your kernel to support boards based on the Marvell Kirkwood device tree. -config MACH_NETXBIG - bool "LaCie 2Big and 5Big Network v2" - depends on MACH_KIRKWOOD - help - Say 'Y' here if you want your kernel to support the - LaCie 2Big and 5Big Network v2 - endif diff --git a/arch/arm/mach-mvebu/Makefile b/arch/arm/mach-mvebu/Makefile index b4f01497ce0b..ecf9e0c3b107 100644 --- a/arch/arm/mach-mvebu/Makefile +++ b/arch/arm/mach-mvebu/Makefile @@ -13,4 +13,3 @@ endif obj-$(CONFIG_MACH_DOVE) += dove.o obj-$(CONFIG_MACH_KIRKWOOD) += kirkwood.o kirkwood-pm.o -obj-$(CONFIG_MACH_NETXBIG) += netxbig.o diff --git a/arch/arm/mach-mvebu/board.h b/arch/arm/mach-mvebu/board.h deleted file mode 100644 index 98e32cc2ef3d..000000000000 --- a/arch/arm/mach-mvebu/board.h +++ /dev/null @@ -1,21 +0,0 @@ -/* - * Board functions for Marvell System On Chip - * - * Copyright (C) 2014 - * - * Andrew Lunn <andrew@lunn.ch> - * - * This file is licensed under the terms of the GNU General Public - * License version 2. This program is licensed "as is" without any - * warranty of any kind, whether express or implied. - */ - -#ifndef __ARCH_MVEBU_BOARD_H -#define __ARCH_MVEBU_BOARD_H - -#ifdef CONFIG_MACH_NETXBIG -void netxbig_init(void); -#else -static inline void netxbig_init(void) {}; -#endif -#endif diff --git a/arch/arm/mach-mvebu/kirkwood.c b/arch/arm/mach-mvebu/kirkwood.c index 925f75f54268..f9d8e1ea7183 100644 --- a/arch/arm/mach-mvebu/kirkwood.c +++ b/arch/arm/mach-mvebu/kirkwood.c @@ -25,7 +25,6 @@ #include "kirkwood.h" #include "kirkwood-pm.h" #include "common.h" -#include "board.h" static struct resource kirkwood_cpufreq_resources[] = { [0] = { @@ -180,9 +179,6 @@ static void __init kirkwood_dt_init(void) kirkwood_pm_init(); kirkwood_dt_eth_fixup(); - if (of_machine_is_compatible("lacie,netxbig")) - netxbig_init(); - of_platform_populate(NULL, of_default_bus_match_table, auxdata, NULL); } diff --git a/arch/arm/mach-mvebu/netxbig.c b/arch/arm/mach-mvebu/netxbig.c deleted file mode 100644 index 94b11b6585a4..000000000000 --- a/arch/arm/mach-mvebu/netxbig.c +++ /dev/null @@ -1,191 +0,0 @@ -/* - * arch/arm/mach-mvbu/board-netxbig.c - * - * LaCie 2Big and 5Big Network v2 board setup - * - * Copyright (C) 2010 Simon Guinot <sguinot@lacie.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ - -#include <linux/kernel.h> -#include <linux/of.h> -#include <linux/platform_device.h> -#include <linux/platform_data/leds-kirkwood-netxbig.h> -#include "common.h" - -/***************************************************************************** - * GPIO extension LEDs - ****************************************************************************/ - -/* - * The LEDs are controlled by a CPLD and can be configured through a GPIO - * extension bus: - * - * - address register : bit [0-2] -> GPIO [47-49] - * - data register : bit [0-2] -> GPIO [44-46] - * - enable register : GPIO 29 - */ - -static int netxbig_v2_gpio_ext_addr[] = { 47, 48, 49 }; -static int netxbig_v2_gpio_ext_data[] = { 44, 45, 46 }; - -static struct netxbig_gpio_ext netxbig_v2_gpio_ext = { - .addr = netxbig_v2_gpio_ext_addr, - .num_addr = ARRAY_SIZE(netxbig_v2_gpio_ext_addr), - .data = netxbig_v2_gpio_ext_data, - .num_data = ARRAY_SIZE(netxbig_v2_gpio_ext_data), - .enable = 29, -}; - -/* - * Address register selection: - * - * addr | register - * ---------------------------- - * 0 | front LED - * 1 | front LED brightness - * 2 | SATA LED brightness - * 3 | SATA0 LED - * 4 | SATA1 LED - * 5 | SATA2 LED - * 6 | SATA3 LED - * 7 | SATA4 LED - * - * Data register configuration: - * - * data | LED brightness - * ------------------------------------------------- - * 0 | min (off) - * - | - - * 7 | max - * - * data | front LED mode - * ------------------------------------------------- - * 0 | fix off - * 1 | fix blue on - * 2 | fix red on - * 3 | blink blue on=1 sec and blue off=1 sec - * 4 | blink red on=1 sec and red off=1 sec - * 5 | blink blue on=2.5 sec and red on=0.5 sec - * 6 | blink blue on=1 sec and red on=1 sec - * 7 | blink blue on=0.5 sec and blue off=2.5 sec - * - * data | SATA LED mode - * ------------------------------------------------- - * 0 | fix off - * 1 | SATA activity blink - * 2 | fix red on - * 3 | blink blue on=1 sec and blue off=1 sec - * 4 | blink red on=1 sec and red off=1 sec - * 5 | blink blue on=2.5 sec and red on=0.5 sec - * 6 | blink blue on=1 sec and red on=1 sec - * 7 | fix blue on - */ - -static int netxbig_v2_red_mled[NETXBIG_LED_MODE_NUM] = { - [NETXBIG_LED_OFF] = 0, - [NETXBIG_LED_ON] = 2, - [NETXBIG_LED_SATA] = NETXBIG_LED_INVALID_MODE, - [NETXBIG_LED_TIMER1] = 4, - [NETXBIG_LED_TIMER2] = NETXBIG_LED_INVALID_MODE, -}; - -static int netxbig_v2_blue_pwr_mled[NETXBIG_LED_MODE_NUM] = { - [NETXBIG_LED_OFF] = 0, - [NETXBIG_LED_ON] = 1, - [NETXBIG_LED_SATA] = NETXBIG_LED_INVALID_MODE, - [NETXBIG_LED_TIMER1] = 3, - [NETXBIG_LED_TIMER2] = 7, -}; - -static int netxbig_v2_blue_sata_mled[NETXBIG_LED_MODE_NUM] = { - [NETXBIG_LED_OFF] = 0, - [NETXBIG_LED_ON] = 7, - [NETXBIG_LED_SATA] = 1, - [NETXBIG_LED_TIMER1] = 3, - [NETXBIG_LED_TIMER2] = NETXBIG_LED_INVALID_MODE, -}; - -static struct netxbig_led_timer netxbig_v2_led_timer[] = { - [0] = { - .delay_on = 500, - .delay_off = 500, - .mode = NETXBIG_LED_TIMER1, - }, - [1] = { - .delay_on = 500, - .delay_off = 1000, - .mode = NETXBIG_LED_TIMER2, - }, -}; - -#define NETXBIG_LED(_name, maddr, mval, baddr) \ - { .name = _name, \ - .mode_addr = maddr, \ - .mode_val = mval, \ - .bright_addr = baddr } - -static struct netxbig_led net2big_v2_leds_ctrl[] = { - NETXBIG_LED("net2big-v2:blue:power", 0, netxbig_v2_blue_pwr_mled, 1), - NETXBIG_LED("net2big-v2:red:power", 0, netxbig_v2_red_mled, 1), - NETXBIG_LED("net2big-v2:blue:sata0", 3, netxbig_v2_blue_sata_mled, 2), - NETXBIG_LED("net2big-v2:red:sata0", 3, netxbig_v2_red_mled, 2), - NETXBIG_LED("net2big-v2:blue:sata1", 4, netxbig_v2_blue_sata_mled, 2), - NETXBIG_LED("net2big-v2:red:sata1", 4, netxbig_v2_red_mled, 2), -}; - -static struct netxbig_led_platform_data net2big_v2_leds_data = { - .gpio_ext = &netxbig_v2_gpio_ext, - .timer = netxbig_v2_led_timer, - .num_timer = ARRAY_SIZE(netxbig_v2_led_timer), - .leds = net2big_v2_leds_ctrl, - .num_leds = ARRAY_SIZE(net2big_v2_leds_ctrl), -}; - -static struct netxbig_led net5big_v2_leds_ctrl[] = { - NETXBIG_LED("net5big-v2:blue:power", 0, netxbig_v2_blue_pwr_mled, 1), - NETXBIG_LED("net5big-v2:red:power", 0, netxbig_v2_red_mled, 1), - NETXBIG_LED("net5big-v2:blue:sata0", 3, netxbig_v2_blue_sata_mled, 2), - NETXBIG_LED("net5big-v2:red:sata0", 3, netxbig_v2_red_mled, 2), - NETXBIG_LED("net5big-v2:blue:sata1", 4, netxbig_v2_blue_sata_mled, 2), - NETXBIG_LED("net5big-v2:red:sata1", 4, netxbig_v2_red_mled, 2), - NETXBIG_LED("net5big-v2:blue:sata2", 5, netxbig_v2_blue_sata_mled, 2), - NETXBIG_LED("net5big-v2:red:sata2", 5, netxbig_v2_red_mled, 2), - NETXBIG_LED("net5big-v2:blue:sata3", 6, netxbig_v2_blue_sata_mled, 2), - NETXBIG_LED("net5big-v2:red:sata3", 6, netxbig_v2_red_mled, 2), - NETXBIG_LED("net5big-v2:blue:sata4", 7, netxbig_v2_blue_sata_mled, 2), - NETXBIG_LED("net5big-v2:red:sata4", 7, netxbig_v2_red_mled, 2), -}; - -static struct netxbig_led_platform_data net5big_v2_leds_data = { - .gpio_ext = &netxbig_v2_gpio_ext, - .timer = netxbig_v2_led_timer, - .num_timer = ARRAY_SIZE(netxbig_v2_led_timer), - .leds = net5big_v2_leds_ctrl, - .num_leds = ARRAY_SIZE(net5big_v2_leds_ctrl), -}; - -static struct platform_device netxbig_v2_leds = { - .name = "leds-netxbig", - .id = -1, - .dev = { - .platform_data = &net2big_v2_leds_data, - }, -}; - -void __init netxbig_init(void) -{ - - if (of_machine_is_compatible("lacie,net5big_v2")) - netxbig_v2_leds.dev.platform_data = &net5big_v2_leds_data; - platform_device_register(&netxbig_v2_leds); -} diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig index b3a0dff67e3f..33d1460a5639 100644 --- a/arch/arm/mach-omap2/Kconfig +++ b/arch/arm/mach-omap2/Kconfig @@ -49,6 +49,7 @@ config SOC_OMAP5 select OMAP_INTERCONNECT select OMAP_INTERCONNECT_BARRIER select PM_OPP if PM + select ZONE_DMA if ARM_LPAE config SOC_AM33XX bool "TI AM33XX" @@ -78,6 +79,7 @@ config SOC_DRA7XX select OMAP_INTERCONNECT select OMAP_INTERCONNECT_BARRIER select PM_OPP if PM + select ZONE_DMA if ARM_LPAE config ARCH_OMAP2PLUS bool diff --git a/arch/arm/mach-omap2/board-generic.c b/arch/arm/mach-omap2/board-generic.c index 6133eaac685d..fb219a30c10c 100644 --- a/arch/arm/mach-omap2/board-generic.c +++ b/arch/arm/mach-omap2/board-generic.c @@ -106,6 +106,7 @@ DT_MACHINE_START(OMAP3_DT, "Generic OMAP3 (Flattened Device Tree)") MACHINE_END static const char *const omap36xx_boards_compat[] __initconst = { + "ti,omap3630", "ti,omap36xx", NULL, }; @@ -243,6 +244,9 @@ static const char *const omap5_boards_compat[] __initconst = { }; DT_MACHINE_START(OMAP5_DT, "Generic OMAP5 (Flattened Device Tree)") +#if defined(CONFIG_ZONE_DMA) && defined(CONFIG_ARM_LPAE) + .dma_zone_size = SZ_2G, +#endif .reserve = omap_reserve, .smp = smp_ops(omap4_smp_ops), .map_io = omap5_map_io, @@ -288,6 +292,9 @@ static const char *const dra74x_boards_compat[] __initconst = { }; DT_MACHINE_START(DRA74X_DT, "Generic DRA74X (Flattened Device Tree)") +#if defined(CONFIG_ZONE_DMA) && defined(CONFIG_ARM_LPAE) + .dma_zone_size = SZ_2G, +#endif .reserve = omap_reserve, .smp = smp_ops(omap4_smp_ops), .map_io = dra7xx_map_io, @@ -308,6 +315,9 @@ static const char *const dra72x_boards_compat[] __initconst = { }; DT_MACHINE_START(DRA72X_DT, "Generic DRA72X (Flattened Device Tree)") +#if defined(CONFIG_ZONE_DMA) && defined(CONFIG_ARM_LPAE) + .dma_zone_size = SZ_2G, +#endif .reserve = omap_reserve, .map_io = dra7xx_map_io, .init_early = dra7xx_init_early, diff --git a/arch/arm/mach-omap2/omap-wakeupgen.c b/arch/arm/mach-omap2/omap-wakeupgen.c index e1d2e991d17a..db7e0bab3587 100644 --- a/arch/arm/mach-omap2/omap-wakeupgen.c +++ b/arch/arm/mach-omap2/omap-wakeupgen.c @@ -399,40 +399,42 @@ static struct irq_chip wakeupgen_chip = { #endif }; -static int wakeupgen_domain_xlate(struct irq_domain *domain, - struct device_node *controller, - const u32 *intspec, - unsigned int intsize, - unsigned long *out_hwirq, - unsigned int *out_type) +static int wakeupgen_domain_translate(struct irq_domain *d, + struct irq_fwspec *fwspec, + unsigned long *hwirq, + unsigned int *type) { - if (domain->of_node != controller) - return -EINVAL; /* Shouldn't happen, really... */ - if (intsize != 3) - return -EINVAL; /* Not GIC compliant */ - if (intspec[0] != 0) - return -EINVAL; /* No PPI should point to this domain */ + if (is_of_node(fwspec->fwnode)) { + if (fwspec->param_count != 3) + return -EINVAL; - *out_hwirq = intspec[1]; - *out_type = intspec[2]; - return 0; + /* No PPI should point to this domain */ + if (fwspec->param[0] != 0) + return -EINVAL; + + *hwirq = fwspec->param[1]; + *type = fwspec->param[2]; + return 0; + } + + return -EINVAL; } static int wakeupgen_domain_alloc(struct irq_domain *domain, unsigned int virq, unsigned int nr_irqs, void *data) { - struct of_phandle_args *args = data; - struct of_phandle_args parent_args; + struct irq_fwspec *fwspec = data; + struct irq_fwspec parent_fwspec; irq_hw_number_t hwirq; int i; - if (args->args_count != 3) + if (fwspec->param_count != 3) return -EINVAL; /* Not GIC compliant */ - if (args->args[0] != 0) + if (fwspec->param[0] != 0) return -EINVAL; /* No PPI should point to this domain */ - hwirq = args->args[1]; + hwirq = fwspec->param[1]; if (hwirq >= MAX_IRQS) return -EINVAL; /* Can't deal with this */ @@ -440,15 +442,16 @@ static int wakeupgen_domain_alloc(struct irq_domain *domain, irq_domain_set_hwirq_and_chip(domain, virq + i, hwirq + i, &wakeupgen_chip, NULL); - parent_args = *args; - parent_args.np = domain->parent->of_node; - return irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, &parent_args); + parent_fwspec = *fwspec; + parent_fwspec.fwnode = domain->parent->fwnode; + return irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, + &parent_fwspec); } static const struct irq_domain_ops wakeupgen_domain_ops = { - .xlate = wakeupgen_domain_xlate, - .alloc = wakeupgen_domain_alloc, - .free = irq_domain_free_irqs_common, + .translate = wakeupgen_domain_translate, + .alloc = wakeupgen_domain_alloc, + .free = irq_domain_free_irqs_common, }; /* diff --git a/arch/arm/mach-omap2/pdata-quirks.c b/arch/arm/mach-omap2/pdata-quirks.c index ea56397599c2..1dfe34654c43 100644 --- a/arch/arm/mach-omap2/pdata-quirks.c +++ b/arch/arm/mach-omap2/pdata-quirks.c @@ -559,7 +559,14 @@ static void pdata_quirks_check(struct pdata_init *quirks) void __init pdata_quirks_init(const struct of_device_id *omap_dt_match_table) { - omap_sdrc_init(NULL, NULL); + /* + * We still need this for omap2420 and omap3 PM to work, others are + * using drivers/misc/sram.c already. + */ + if (of_machine_is_compatible("ti,omap2420") || + of_machine_is_compatible("ti,omap3")) + omap_sdrc_init(NULL, NULL); + pdata_quirks_check(auxdata_quirks); of_platform_populate(NULL, omap_dt_match_table, omap_auxdata_lookup, NULL); diff --git a/arch/arm/mach-omap2/timer.c b/arch/arm/mach-omap2/timer.c index a55655127ef2..bef41837bf7f 100644 --- a/arch/arm/mach-omap2/timer.c +++ b/arch/arm/mach-omap2/timer.c @@ -647,7 +647,7 @@ static OMAP_SYS_32K_TIMER_INIT(4, 1, "timer_32k_ck", "ti,timer-alwon", void __init omap4_local_timer_init(void) { omap4_sync32k_timer_init(); - clocksource_of_init(); + clocksource_probe(); } #else void __init omap4_local_timer_init(void) @@ -663,7 +663,7 @@ void __init omap5_realtime_timer_init(void) omap4_sync32k_timer_init(); realtime_counter_init(); - clocksource_of_init(); + clocksource_probe(); } #endif /* CONFIG_SOC_OMAP5 || CONFIG_SOC_DRA7XX */ diff --git a/arch/arm/mach-pxa/pxa3xx.c b/arch/arm/mach-pxa/pxa3xx.c index 06005d3f2ba3..20ce2d386f17 100644 --- a/arch/arm/mach-pxa/pxa3xx.c +++ b/arch/arm/mach-pxa/pxa3xx.c @@ -42,10 +42,6 @@ #define PECR_IS(n) ((1 << ((n) * 2)) << 29) extern void __init pxa_dt_irq_init(int (*fn)(struct irq_data *, unsigned int)); -#ifdef CONFIG_PM - -#define ISRAM_START 0x5c000000 -#define ISRAM_SIZE SZ_256K /* * NAND NFC: DFI bus arbitration subset @@ -54,6 +50,11 @@ extern void __init pxa_dt_irq_init(int (*fn)(struct irq_data *, unsigned int)); #define NDCR_ND_ARB_EN (1 << 12) #define NDCR_ND_ARB_CNTL (1 << 19) +#ifdef CONFIG_PM + +#define ISRAM_START 0x5c000000 +#define ISRAM_SIZE SZ_256K + static void __iomem *sram; static unsigned long wakeup_src; diff --git a/arch/arm/mach-rockchip/rockchip.c b/arch/arm/mach-rockchip/rockchip.c index b6cf3b449428..251c7b9c5f9b 100644 --- a/arch/arm/mach-rockchip/rockchip.c +++ b/arch/arm/mach-rockchip/rockchip.c @@ -67,7 +67,7 @@ static void __init rockchip_timer_init(void) } of_clk_init(NULL); - clocksource_of_init(); + clocksource_probe(); } static void __init rockchip_dt_init(void) diff --git a/arch/arm/mach-s3c64xx/mach-crag6410.c b/arch/arm/mach-s3c64xx/mach-crag6410.c index 65c426bc45f7..14bd9ae3f476 100644 --- a/arch/arm/mach-s3c64xx/mach-crag6410.c +++ b/arch/arm/mach-s3c64xx/mach-crag6410.c @@ -809,7 +809,7 @@ static const struct gpio_led_platform_data gpio_leds_pdata = { .num_leds = ARRAY_SIZE(gpio_leds), }; -static struct s3c_hsotg_plat crag6410_hsotg_pdata; +static struct dwc2_hsotg_plat crag6410_hsotg_pdata; static void __init crag6410_machine_init(void) { @@ -835,7 +835,7 @@ static void __init crag6410_machine_init(void) s3c_i2c0_set_platdata(&i2c0_pdata); s3c_i2c1_set_platdata(&i2c1_pdata); s3c_fb_set_platdata(&crag6410_lcd_pdata); - s3c_hsotg_set_platdata(&crag6410_hsotg_pdata); + dwc2_hsotg_set_platdata(&crag6410_hsotg_pdata); i2c_register_board_info(0, i2c_devs0, ARRAY_SIZE(i2c_devs0)); i2c_register_board_info(1, i2c_devs1, ARRAY_SIZE(i2c_devs1)); diff --git a/arch/arm/mach-s3c64xx/mach-smartq.c b/arch/arm/mach-s3c64xx/mach-smartq.c index b3d13537a7f0..719843dca510 100644 --- a/arch/arm/mach-s3c64xx/mach-smartq.c +++ b/arch/arm/mach-s3c64xx/mach-smartq.c @@ -189,7 +189,7 @@ static struct s3c_hwmon_pdata smartq_hwmon_pdata __initdata = { }, }; -static struct s3c_hsotg_plat smartq_hsotg_pdata; +static struct dwc2_hsotg_plat smartq_hsotg_pdata; static int __init smartq_lcd_setup_gpio(void) { @@ -382,7 +382,7 @@ void __init smartq_map_io(void) void __init smartq_machine_init(void) { s3c_i2c0_set_platdata(NULL); - s3c_hsotg_set_platdata(&smartq_hsotg_pdata); + dwc2_hsotg_set_platdata(&smartq_hsotg_pdata); s3c_hwmon_set_platdata(&smartq_hwmon_pdata); s3c_sdhci1_set_platdata(&smartq_internal_hsmmc_pdata); s3c_sdhci2_set_platdata(&smartq_internal_hsmmc_pdata); diff --git a/arch/arm/mach-s3c64xx/mach-smdk6410.c b/arch/arm/mach-s3c64xx/mach-smdk6410.c index d590b88bd8a8..286c9bd676e1 100644 --- a/arch/arm/mach-s3c64xx/mach-smdk6410.c +++ b/arch/arm/mach-s3c64xx/mach-smdk6410.c @@ -628,7 +628,7 @@ static struct platform_pwm_backlight_data smdk6410_bl_data = { .enable_gpio = -1, }; -static struct s3c_hsotg_plat smdk6410_hsotg_pdata; +static struct dwc2_hsotg_plat smdk6410_hsotg_pdata; static void __init smdk6410_map_io(void) { @@ -659,7 +659,7 @@ static void __init smdk6410_machine_init(void) s3c_i2c0_set_platdata(NULL); s3c_i2c1_set_platdata(NULL); s3c_fb_set_platdata(&smdk6410_lcd_pdata); - s3c_hsotg_set_platdata(&smdk6410_hsotg_pdata); + dwc2_hsotg_set_platdata(&smdk6410_hsotg_pdata); samsung_keypad_set_platdata(&smdk6410_keypad_data); diff --git a/arch/arm/mach-shmobile/setup-r8a7779.c b/arch/arm/mach-shmobile/setup-r8a7779.c index 6bfa6407a27c..1e572a903f8e 100644 --- a/arch/arm/mach-shmobile/setup-r8a7779.c +++ b/arch/arm/mach-shmobile/setup-r8a7779.c @@ -97,7 +97,7 @@ static u32 __init r8a7779_read_mode_pins(void) static void __init r8a7779_init_time(void) { r8a7779_clocks_init(r8a7779_read_mode_pins()); - clocksource_of_init(); + clocksource_probe(); } static const char *const r8a7779_compat_dt[] __initconst = { diff --git a/arch/arm/mach-shmobile/setup-rcar-gen2.c b/arch/arm/mach-shmobile/setup-rcar-gen2.c index aa3339258d9c..9eccde3c7b13 100644 --- a/arch/arm/mach-shmobile/setup-rcar-gen2.c +++ b/arch/arm/mach-shmobile/setup-rcar-gen2.c @@ -128,7 +128,7 @@ void __init rcar_gen2_timer_init(void) #endif /* CONFIG_ARM_ARCH_TIMER */ rcar_gen2_clocks_init(mode); - clocksource_of_init(); + clocksource_probe(); } struct memory_reserve_config { diff --git a/arch/arm/mach-spear/spear13xx.c b/arch/arm/mach-spear/spear13xx.c index b7afce6795f4..ca2f6a82a414 100644 --- a/arch/arm/mach-spear/spear13xx.c +++ b/arch/arm/mach-spear/spear13xx.c @@ -124,5 +124,5 @@ void __init spear13xx_timer_init(void) clk_put(pclk); spear_setup_of_timer(); - clocksource_of_init(); + clocksource_probe(); } diff --git a/arch/arm/mach-sunxi/sunxi.c b/arch/arm/mach-sunxi/sunxi.c index 65bab2876343..223c9e99380d 100644 --- a/arch/arm/mach-sunxi/sunxi.c +++ b/arch/arm/mach-sunxi/sunxi.c @@ -46,7 +46,7 @@ static void __init sun6i_timer_init(void) of_clk_init(NULL); if (IS_ENABLED(CONFIG_RESET_CONTROLLER)) sun6i_reset_init(); - clocksource_of_init(); + clocksource_probe(); } DT_MACHINE_START(SUN6I_DT, "Allwinner sun6i (A31) Family") diff --git a/arch/arm/mach-u300/core.c b/arch/arm/mach-u300/core.c index 35670b15f281..546338bbacf8 100644 --- a/arch/arm/mach-u300/core.c +++ b/arch/arm/mach-u300/core.c @@ -408,7 +408,7 @@ static const char * u300_board_compat[] = { DT_MACHINE_START(U300_DT, "U300 S335/B335 (Device Tree)") .map_io = u300_map_io, .init_irq = u300_init_irq_dt, - .init_time = clocksource_of_init, + .init_time = clocksource_probe, .init_machine = u300_init_machine_dt, .restart = u300_restart, .dt_compat = u300_board_compat, diff --git a/arch/arm/mach-u300/dummyspichip.c b/arch/arm/mach-u300/dummyspichip.c index 131996805690..68fe986ca42e 100644 --- a/arch/arm/mach-u300/dummyspichip.c +++ b/arch/arm/mach-u300/dummyspichip.c @@ -264,7 +264,6 @@ static const struct of_device_id pl022_dummy_dt_match[] = { static struct spi_driver pl022_dummy_driver = { .driver = { .name = "spi-dummy", - .owner = THIS_MODULE, .of_match_table = pl022_dummy_dt_match, }, .probe = pl022_dummy_probe, diff --git a/arch/arm/mach-ux500/timer.c b/arch/arm/mach-ux500/timer.c index ff28d8ad1ed7..8d2d233f8e6c 100644 --- a/arch/arm/mach-ux500/timer.c +++ b/arch/arm/mach-ux500/timer.c @@ -44,5 +44,5 @@ void __init ux500_timer_init(void) dt_fail: clksrc_dbx500_prcmu_init(prcmu_timer_base); - clocksource_of_init(); + clocksource_probe(); } diff --git a/arch/arm/mach-zynq/common.c b/arch/arm/mach-zynq/common.c index 5a6e4e20ca0a..6f39d03cc27e 100644 --- a/arch/arm/mach-zynq/common.c +++ b/arch/arm/mach-zynq/common.c @@ -154,7 +154,7 @@ static void __init zynq_timer_init(void) zynq_clock_init(); of_clk_init(NULL); - clocksource_of_init(); + clocksource_probe(); } static struct map_desc zynq_cortex_a9_scu_map __initdata = { diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index df7537f12469..c21941349b3e 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -419,28 +419,24 @@ config CPU_THUMBONLY config CPU_32v3 bool select CPU_USE_DOMAINS if MMU - select NEEDS_SYSCALL_FOR_CMPXCHG if SMP select NEED_KUSER_HELPERS select TLS_REG_EMUL if SMP || !MMU config CPU_32v4 bool select CPU_USE_DOMAINS if MMU - select NEEDS_SYSCALL_FOR_CMPXCHG if SMP select NEED_KUSER_HELPERS select TLS_REG_EMUL if SMP || !MMU config CPU_32v4T bool select CPU_USE_DOMAINS if MMU - select NEEDS_SYSCALL_FOR_CMPXCHG if SMP select NEED_KUSER_HELPERS select TLS_REG_EMUL if SMP || !MMU config CPU_32v5 bool select CPU_USE_DOMAINS if MMU - select NEEDS_SYSCALL_FOR_CMPXCHG if SMP select NEED_KUSER_HELPERS select TLS_REG_EMUL if SMP || !MMU @@ -805,14 +801,6 @@ config TLS_REG_EMUL a few prototypes like that in existence) and therefore access to that required register must be emulated. -config NEEDS_SYSCALL_FOR_CMPXCHG - bool - select NEED_KUSER_HELPERS - help - SMP on a pre-ARMv6 processor? Well OK then. - Forget about fast user space cmpxchg support. - It is just not possible. - config NEED_KUSER_HELPERS bool diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c index 00b7f7de28a1..7d5f4c736a16 100644 --- a/arch/arm/mm/alignment.c +++ b/arch/arm/mm/alignment.c @@ -803,7 +803,7 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) } } } else { - fault = probe_kernel_address(instrptr, instr); + fault = probe_kernel_address((void *)instrptr, instr); instr = __mem_to_opcode_arm(instr); } diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 1a7815e5421b..ad4eb2d26e16 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -1407,12 +1407,19 @@ static int arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, unsigned long uaddr = vma->vm_start; unsigned long usize = vma->vm_end - vma->vm_start; struct page **pages = __iommu_get_pages(cpu_addr, attrs); + unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; + unsigned long off = vma->vm_pgoff; vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot); if (!pages) return -ENXIO; + if (off >= nr_pages || (usize >> PAGE_SHIFT) > nr_pages - off) + return -ENXIO; + + pages += off; + do { int ret = vm_insert_page(vma, uaddr, *pages++); if (ret) { diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index 0d629b8f973f..daafcf121ce0 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -593,6 +593,28 @@ do_PrefetchAbort(unsigned long addr, unsigned int ifsr, struct pt_regs *regs) arm_notify_die("", regs, &info, ifsr, 0); } +/* + * Abort handler to be used only during first unmasking of asynchronous aborts + * on the boot CPU. This makes sure that the machine will not die if the + * firmware/bootloader left an imprecise abort pending for us to trip over. + */ +static int __init early_abort_handler(unsigned long addr, unsigned int fsr, + struct pt_regs *regs) +{ + pr_warn("Hit pending asynchronous external abort (FSR=0x%08x) during " + "first unmask, this is most likely caused by a " + "firmware/bootloader bug.\n", fsr); + + return 0; +} + +void __init early_abt_enable(void) +{ + fsr_info[22].fn = early_abort_handler; + local_abt_enable(); + fsr_info[22].fn = do_bad; +} + #ifndef CONFIG_ARM_LPAE static int __init exceptions_init(void) { diff --git a/arch/arm/mm/fault.h b/arch/arm/mm/fault.h index cf08bdfbe0d6..05ec5e0df32d 100644 --- a/arch/arm/mm/fault.h +++ b/arch/arm/mm/fault.h @@ -24,5 +24,6 @@ static inline int fsr_fs(unsigned int fsr) void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs); unsigned long search_exception_table(unsigned long addr); +void early_abt_enable(void); #endif /* __ARCH_ARM_FAULT_H */ diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 7cd15143a507..4867f5daf82c 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -38,6 +38,7 @@ #include <asm/mach/pci.h> #include <asm/fixmap.h> +#include "fault.h" #include "mm.h" #include "tcm.h" @@ -1363,6 +1364,9 @@ static void __init devicemaps_init(const struct machine_desc *mdesc) */ local_flush_tlb_all(); flush_cache_all(); + + /* Enable asynchronous aborts */ + early_abt_enable(); } static void __init kmap_init(void) diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 876060bcceeb..2f4b14cfddb4 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -125,7 +125,7 @@ static u64 jit_get_skb_w(struct sk_buff *skb, int offset) } /* - * Wrapper that handles both OABI and EABI and assures Thumb2 interworking + * Wrappers which handle both OABI and EABI and assures Thumb2 interworking * (where the assembly routines like __aeabi_uidiv could cause problems). */ static u32 jit_udiv(u32 dividend, u32 divisor) @@ -133,6 +133,11 @@ static u32 jit_udiv(u32 dividend, u32 divisor) return dividend / divisor; } +static u32 jit_mod(u32 dividend, u32 divisor) +{ + return dividend % divisor; +} + static inline void _emit(int cond, u32 inst, struct jit_ctx *ctx) { inst |= (cond << 28); @@ -471,11 +476,17 @@ static inline void emit_blx_r(u8 tgt_reg, struct jit_ctx *ctx) #endif } -static inline void emit_udiv(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx) +static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, + int bpf_op) { #if __LINUX_ARM_ARCH__ == 7 if (elf_hwcap & HWCAP_IDIVA) { - emit(ARM_UDIV(rd, rm, rn), ctx); + if (bpf_op == BPF_DIV) + emit(ARM_UDIV(rd, rm, rn), ctx); + else { + emit(ARM_UDIV(ARM_R3, rm, rn), ctx); + emit(ARM_MLS(rd, rn, ARM_R3, rm), ctx); + } return; } #endif @@ -496,7 +507,8 @@ static inline void emit_udiv(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx) emit(ARM_MOV_R(ARM_R0, rm), ctx); ctx->seen |= SEEN_CALL; - emit_mov_i(ARM_R3, (u32)jit_udiv, ctx); + emit_mov_i(ARM_R3, bpf_op == BPF_DIV ? (u32)jit_udiv : (u32)jit_mod, + ctx); emit_blx_r(ARM_R3, ctx); if (rd != ARM_R0) @@ -614,6 +626,7 @@ load_common: case BPF_LD | BPF_B | BPF_IND: load_order = 0; load_ind: + update_on_xread(ctx); OP_IMM3(ARM_ADD, r_off, r_X, k, ctx); goto load_common; case BPF_LDX | BPF_IMM: @@ -697,13 +710,27 @@ load_ind: if (k == 1) break; emit_mov_i(r_scratch, k, ctx); - emit_udiv(r_A, r_A, r_scratch, ctx); + emit_udivmod(r_A, r_A, r_scratch, ctx, BPF_DIV); break; case BPF_ALU | BPF_DIV | BPF_X: update_on_xread(ctx); emit(ARM_CMP_I(r_X, 0), ctx); emit_err_ret(ARM_COND_EQ, ctx); - emit_udiv(r_A, r_A, r_X, ctx); + emit_udivmod(r_A, r_A, r_X, ctx, BPF_DIV); + break; + case BPF_ALU | BPF_MOD | BPF_K: + if (k == 1) { + emit_mov_i(r_A, 0, ctx); + break; + } + emit_mov_i(r_scratch, k, ctx); + emit_udivmod(r_A, r_A, r_scratch, ctx, BPF_MOD); + break; + case BPF_ALU | BPF_MOD | BPF_X: + update_on_xread(ctx); + emit(ARM_CMP_I(r_X, 0), ctx); + emit_err_ret(ARM_COND_EQ, ctx); + emit_udivmod(r_A, r_A, r_X, ctx, BPF_MOD); break; case BPF_ALU | BPF_OR | BPF_K: /* A |= K */ @@ -1047,7 +1074,7 @@ void bpf_jit_compile(struct bpf_prog *fp) set_memory_ro((unsigned long)header, header->pages); fp->bpf_func = (void *)ctx.target; - fp->jited = true; + fp->jited = 1; out: kfree(ctx.offsets); return; diff --git a/arch/arm/net/bpf_jit_32.h b/arch/arm/net/bpf_jit_32.h index 4b17d5ab652a..c46fca2972f7 100644 --- a/arch/arm/net/bpf_jit_32.h +++ b/arch/arm/net/bpf_jit_32.h @@ -115,6 +115,8 @@ #define ARM_INST_UMULL 0x00800090 +#define ARM_INST_MLS 0x00600090 + /* * Use a suitable undefined instruction to use for ARM/Thumb2 faulting. * We need to be careful not to conflict with those used by other modules @@ -210,4 +212,7 @@ #define ARM_UMULL(rd_lo, rd_hi, rn, rm) (ARM_INST_UMULL | (rd_hi) << 16 \ | (rd_lo) << 12 | (rm) << 8 | rn) +#define ARM_MLS(rd, rn, rm, ra) (ARM_INST_MLS | (rd) << 16 | (rn) | (rm) << 8 \ + | (ra) << 12) + #endif /* PFILTER_OPCODES_ARM_H */ diff --git a/arch/arm/plat-orion/common.c b/arch/arm/plat-orion/common.c index 2235081a04ee..8861c367d061 100644 --- a/arch/arm/plat-orion/common.c +++ b/arch/arm/plat-orion/common.c @@ -495,7 +495,7 @@ void __init orion_ge00_switch_init(struct dsa_platform_data *d, int irq) d->netdev = &orion_ge00.dev; for (i = 0; i < d->nr_chips; i++) - d->chip[i].host_dev = &orion_ge00_shared.dev; + d->chip[i].host_dev = &orion_ge_mvmdio.dev; orion_switch_device.dev.platform_data = d; platform_device_register(&orion_switch_device); diff --git a/arch/arm/plat-samsung/devs.c b/arch/arm/plat-samsung/devs.c index 83c7d154bde0..82074625de5c 100644 --- a/arch/arm/plat-samsung/devs.c +++ b/arch/arm/plat-samsung/devs.c @@ -1042,11 +1042,11 @@ struct platform_device s3c_device_usb_hsotg = { }, }; -void __init s3c_hsotg_set_platdata(struct s3c_hsotg_plat *pd) +void __init dwc2_hsotg_set_platdata(struct dwc2_hsotg_plat *pd) { - struct s3c_hsotg_plat *npd; + struct dwc2_hsotg_plat *npd; - npd = s3c_set_platdata(pd, sizeof(struct s3c_hsotg_plat), + npd = s3c_set_platdata(pd, sizeof(struct dwc2_hsotg_plat), &s3c_device_usb_hsotg); if (!npd->phy_init) diff --git a/arch/arm/vdso/vdsomunge.c b/arch/arm/vdso/vdsomunge.c index aedec81d1198..f6455273b2f8 100644 --- a/arch/arm/vdso/vdsomunge.c +++ b/arch/arm/vdso/vdsomunge.c @@ -45,7 +45,6 @@ * it does. */ -#include <byteswap.h> #include <elf.h> #include <errno.h> #include <fcntl.h> @@ -59,6 +58,16 @@ #include <sys/types.h> #include <unistd.h> +#define swab16(x) \ + ((((x) & 0x00ff) << 8) | \ + (((x) & 0xff00) >> 8)) + +#define swab32(x) \ + ((((x) & 0x000000ff) << 24) | \ + (((x) & 0x0000ff00) << 8) | \ + (((x) & 0x00ff0000) >> 8) | \ + (((x) & 0xff000000) >> 24)) + #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ #define HOST_ORDER ELFDATA2LSB #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ @@ -104,17 +113,17 @@ static void cleanup(void) static Elf32_Word read_elf_word(Elf32_Word word, bool swap) { - return swap ? bswap_32(word) : word; + return swap ? swab32(word) : word; } static Elf32_Half read_elf_half(Elf32_Half half, bool swap) { - return swap ? bswap_16(half) : half; + return swap ? swab16(half) : half; } static void write_elf_word(Elf32_Word val, Elf32_Word *dst, bool swap) { - *dst = swap ? bswap_32(val) : val; + *dst = swap ? swab32(val) : val; } int main(int argc, char **argv) diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index eeeab074e154..fc7ea529f462 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -86,16 +86,25 @@ static void xen_percpu_init(void) int err; int cpu = get_cpu(); + /* + * VCPUOP_register_vcpu_info cannot be called twice for the same + * vcpu, so if vcpu_info is already registered, just get out. This + * can happen with cpu-hotplug. + */ + if (per_cpu(xen_vcpu, cpu) != NULL) + goto after_register_vcpu_info; + pr_info("Xen: initializing cpu%d\n", cpu); vcpup = per_cpu_ptr(xen_vcpu_info, cpu); - info.mfn = __pa(vcpup) >> PAGE_SHIFT; - info.offset = offset_in_page(vcpup); + info.mfn = virt_to_gfn(vcpup); + info.offset = xen_offset_in_page(vcpup); err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, cpu, &info); BUG_ON(err); per_cpu(xen_vcpu, cpu) = vcpup; +after_register_vcpu_info: enable_percpu_irq(xen_events_irq, 0); put_cpu(); } @@ -124,6 +133,9 @@ static int xen_cpu_notification(struct notifier_block *self, case CPU_STARTING: xen_percpu_init(); break; + case CPU_DYING: + disable_percpu_irq(xen_events_irq); + break; default: break; } @@ -213,7 +225,7 @@ static int __init xen_guest_init(void) xatp.domid = DOMID_SELF; xatp.idx = 0; xatp.space = XENMAPSPACE_shared_info; - xatp.gpfn = __pa(shared_info_page) >> PAGE_SHIFT; + xatp.gpfn = virt_to_gfn(shared_info_page); if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) BUG(); @@ -284,7 +296,7 @@ void xen_arch_resume(void) { } void xen_arch_suspend(void) { } -/* In the hypervisor.S file. */ +/* In the hypercall.S file. */ EXPORT_SYMBOL_GPL(HYPERVISOR_event_channel_op); EXPORT_SYMBOL_GPL(HYPERVISOR_grant_table_op); EXPORT_SYMBOL_GPL(HYPERVISOR_xen_version); diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c index 6dd911d1f0ac..7c34f7126b04 100644 --- a/arch/arm/xen/mm.c +++ b/arch/arm/xen/mm.c @@ -48,22 +48,22 @@ static void dma_cache_maint(dma_addr_t handle, unsigned long offset, size_t size, enum dma_data_direction dir, enum dma_cache_op op) { struct gnttab_cache_flush cflush; - unsigned long pfn; + unsigned long xen_pfn; size_t left = size; - pfn = (handle >> PAGE_SHIFT) + offset / PAGE_SIZE; - offset %= PAGE_SIZE; + xen_pfn = (handle >> XEN_PAGE_SHIFT) + offset / XEN_PAGE_SIZE; + offset %= XEN_PAGE_SIZE; do { size_t len = left; /* buffers in highmem or foreign pages cannot cross page * boundaries */ - if (len + offset > PAGE_SIZE) - len = PAGE_SIZE - offset; + if (len + offset > XEN_PAGE_SIZE) + len = XEN_PAGE_SIZE - offset; cflush.op = 0; - cflush.a.dev_bus_addr = pfn << PAGE_SHIFT; + cflush.a.dev_bus_addr = xen_pfn << XEN_PAGE_SHIFT; cflush.offset = offset; cflush.length = len; @@ -79,7 +79,7 @@ static void dma_cache_maint(dma_addr_t handle, unsigned long offset, HYPERVISOR_grant_table_op(GNTTABOP_cache_flush, &cflush, 1); offset = 0; - pfn++; + xen_pfn++; left -= len; } while (left); } @@ -138,10 +138,29 @@ void __xen_dma_sync_single_for_device(struct device *hwdev, } bool xen_arch_need_swiotlb(struct device *dev, - unsigned long pfn, - unsigned long bfn) + phys_addr_t phys, + dma_addr_t dev_addr) { - return (!hypercall_cflush && (pfn != bfn) && !is_device_dma_coherent(dev)); + unsigned int xen_pfn = XEN_PFN_DOWN(phys); + unsigned int bfn = XEN_PFN_DOWN(dev_addr); + + /* + * The swiotlb buffer should be used if + * - Xen doesn't have the cache flush hypercall + * - The Linux page refers to foreign memory + * - The device doesn't support coherent DMA request + * + * The Linux page may be spanned acrros multiple Xen page, although + * it's not possible to have a mix of local and foreign Xen page. + * Furthermore, range_straddles_page_boundary is already checking + * if buffer is physically contiguous in the host RAM. + * + * Therefore we only need to check the first Xen page to know if we + * require a bounce buffer because the device doesn't support coherent + * memory and we are not able to flush the cache. + */ + return (!hypercall_cflush && (xen_pfn != bfn) && + !is_device_dma_coherent(dev)); } int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order, diff --git a/arch/arm/xen/p2m.c b/arch/arm/xen/p2m.c index 887596c67b12..0ed01f2d5ee4 100644 --- a/arch/arm/xen/p2m.c +++ b/arch/arm/xen/p2m.c @@ -93,8 +93,8 @@ int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, for (i = 0; i < count; i++) { if (map_ops[i].status) continue; - set_phys_to_machine(map_ops[i].host_addr >> PAGE_SHIFT, - map_ops[i].dev_bus_addr >> PAGE_SHIFT); + set_phys_to_machine(map_ops[i].host_addr >> XEN_PAGE_SHIFT, + map_ops[i].dev_bus_addr >> XEN_PAGE_SHIFT); } return 0; @@ -108,7 +108,7 @@ int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, int i; for (i = 0; i < count; i++) { - set_phys_to_machine(unmap_ops[i].host_addr >> PAGE_SHIFT, + set_phys_to_machine(unmap_ops[i].host_addr >> XEN_PAGE_SHIFT, INVALID_P2M_ENTRY); } diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 07d1811aa03f..851fe11c6069 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -48,6 +48,7 @@ config ARM64 select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_BITREVERSE select HAVE_ARCH_JUMP_LABEL + select HAVE_ARCH_KASAN if SPARSEMEM_VMEMMAP select HAVE_ARCH_KGDB select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK @@ -75,6 +76,7 @@ config ARM64 select HAVE_PERF_USER_STACK_DUMP select HAVE_RCU_TABLE_FREE select HAVE_SYSCALL_TRACEPOINTS + select IOMMU_DMA if IOMMU_SUPPORT select IRQ_DOMAIN select IRQ_FORCED_THREADING select MODULES_USE_ELF_RELA @@ -169,10 +171,12 @@ config FIX_EARLYCON_MEM config PGTABLE_LEVELS int + default 2 if ARM64_16K_PAGES && ARM64_VA_BITS_36 default 2 if ARM64_64K_PAGES && ARM64_VA_BITS_42 default 3 if ARM64_64K_PAGES && ARM64_VA_BITS_48 default 3 if ARM64_4K_PAGES && ARM64_VA_BITS_39 - default 4 if ARM64_4K_PAGES && ARM64_VA_BITS_48 + default 3 if ARM64_16K_PAGES && ARM64_VA_BITS_47 + default 4 if !ARM64_64K_PAGES && ARM64_VA_BITS_48 source "init/Kconfig" @@ -348,6 +352,33 @@ config ARM64_ERRATUM_843419 If unsure, say Y. +config CAVIUM_ERRATUM_22375 + bool "Cavium erratum 22375, 24313" + default y + help + Enable workaround for erratum 22375, 24313. + + This implements two gicv3-its errata workarounds for ThunderX. Both + with small impact affecting only ITS table allocation. + + erratum 22375: only alloc 8MB table size + erratum 24313: ignore memory access type + + The fixes are in ITS initialization and basically ignore memory access + type and table size provided by the TYPER and BASER registers. + + If unsure, say Y. + +config CAVIUM_ERRATUM_23154 + bool "Cavium erratum 23154: Access to ICC_IAR1_EL1 is not sync'ed" + default y + help + The gicv3 of ThunderX requires a modified version for + reading the IAR status to ensure data synchronization + (access to icc_iar1_el1 is not sync'ed before and after). + + If unsure, say Y. + endmenu @@ -362,25 +393,37 @@ config ARM64_4K_PAGES help This feature enables 4KB pages support. +config ARM64_16K_PAGES + bool "16KB" + help + The system will use 16KB pages support. AArch32 emulation + requires applications compiled with 16K (or a multiple of 16K) + aligned segments. + config ARM64_64K_PAGES bool "64KB" help This feature enables 64KB pages support (4KB by default) allowing only two levels of page tables and faster TLB - look-up. AArch32 emulation is not available when this feature - is enabled. + look-up. AArch32 emulation requires applications compiled + with 64K aligned segments. endchoice choice prompt "Virtual address space size" default ARM64_VA_BITS_39 if ARM64_4K_PAGES + default ARM64_VA_BITS_47 if ARM64_16K_PAGES default ARM64_VA_BITS_42 if ARM64_64K_PAGES help Allows choosing one of multiple possible virtual address space sizes. The level of translation table is determined by a combination of page size and virtual address space size. +config ARM64_VA_BITS_36 + bool "36-bit" if EXPERT + depends on ARM64_16K_PAGES + config ARM64_VA_BITS_39 bool "39-bit" depends on ARM64_4K_PAGES @@ -389,6 +432,10 @@ config ARM64_VA_BITS_42 bool "42-bit" depends on ARM64_64K_PAGES +config ARM64_VA_BITS_47 + bool "47-bit" + depends on ARM64_16K_PAGES + config ARM64_VA_BITS_48 bool "48-bit" @@ -396,8 +443,10 @@ endchoice config ARM64_VA_BITS int + default 36 if ARM64_VA_BITS_36 default 39 if ARM64_VA_BITS_39 default 42 if ARM64_VA_BITS_42 + default 47 if ARM64_VA_BITS_47 default 48 if ARM64_VA_BITS_48 config CPU_BIG_ENDIAN @@ -427,15 +476,13 @@ config NR_CPUS config HOTPLUG_CPU bool "Support for hot-pluggable CPUs" + select GENERIC_IRQ_MIGRATION help Say Y here to experiment with turning CPUs off and on. CPUs can be controlled through /sys/devices/system/cpu. source kernel/Kconfig.preempt - -config HZ - int - default 100 +source kernel/Kconfig.hz config ARCH_HAS_HOLES_MEMORYMODEL def_bool y if SPARSEMEM @@ -454,12 +501,8 @@ config HAVE_ARCH_PFN_VALID def_bool ARCH_HAS_HOLES_MEMORYMODEL || !SPARSEMEM config HW_PERF_EVENTS - bool "Enable hardware performance counter support for perf events" - depends on PERF_EVENTS - default y - help - Enable hardware performance counter support for perf events. If - disabled, perf events will use software events only. + def_bool y + depends on ARM_PMU config SYS_SUPPORTS_HUGETLBFS def_bool y @@ -468,7 +511,7 @@ config ARCH_WANT_GENERAL_HUGETLB def_bool y config ARCH_WANT_HUGE_PMD_SHARE - def_bool y if !ARM64_64K_PAGES + def_bool y if ARM64_4K_PAGES || (ARM64_16K_PAGES && !ARM64_VA_BITS_36) config HAVE_ARCH_TRANSPARENT_HUGEPAGE def_bool y @@ -505,7 +548,25 @@ config XEN config FORCE_MAX_ZONEORDER int default "14" if (ARM64_64K_PAGES && TRANSPARENT_HUGEPAGE) + default "12" if (ARM64_16K_PAGES && TRANSPARENT_HUGEPAGE) default "11" + help + The kernel memory allocator divides physically contiguous memory + blocks into "zones", where each zone is a power of two number of + pages. This option selects the largest power of two that the kernel + keeps in the memory allocator. If you need to allocate very large + blocks of physically contiguous memory, then you may need to + increase this value. + + This config option is actually maximum order plus one. For example, + a value of 11 means that the largest free memory block is 2^10 pages. + + We make sure that we can allocate upto a HugePage size for each configuration. + Hence we have : + MAX_ORDER = (PMD_SHIFT - PAGE_SHIFT) + 1 => PAGE_SHIFT - 2 + + However for 4K, we choose a higher default value, 11 as opposed to 10, giving us + 4M allocations matching the default size used by generic code. menuconfig ARMV8_DEPRECATED bool "Emulate deprecated/obsolete ARMv8 instructions" @@ -680,7 +741,7 @@ source "fs/Kconfig.binfmt" config COMPAT bool "Kernel support for 32-bit EL0" - depends on !ARM64_64K_PAGES || EXPERT + depends on ARM64_4K_PAGES || EXPERT select COMPAT_BINFMT_ELF select HAVE_UID16 select OLD_SIGSUSPEND3 @@ -691,9 +752,9 @@ config COMPAT the user helper functions, VFP support and the ptrace interface are handled appropriately by the kernel. - If you also enabled CONFIG_ARM64_64K_PAGES, please be aware that you - will only be able to execute AArch32 binaries that were compiled with - 64k aligned segments. + If you use a page size other than 4KB (i.e, 16KB or 64KB), please be aware + that you will only be able to execute AArch32 binaries that were compiled + with page size aligned segments. If you want to execute 32-bit userspace applications, say Y. diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug index d6285ef9b5f9..c24d6adc0420 100644 --- a/arch/arm64/Kconfig.debug +++ b/arch/arm64/Kconfig.debug @@ -77,7 +77,7 @@ config DEBUG_RODATA If in doubt, say Y config DEBUG_ALIGN_RODATA - depends on DEBUG_RODATA && !ARM64_64K_PAGES + depends on DEBUG_RODATA && ARM64_4K_PAGES bool "Align linker sections up to SECTION_SIZE" help If this option is enabled, sections that may potentially be marked as diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index f9914d7c1bb0..cd822d8454c0 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -42,7 +42,7 @@ endif CHECKFLAGS += -D__aarch64__ ifeq ($(CONFIG_ARM64_ERRATUM_843419), y) -CFLAGS_MODULE += -mcmodel=large +KBUILD_CFLAGS_MODULE += -mcmodel=large endif # Default value @@ -55,6 +55,13 @@ else TEXT_OFFSET := 0x00080000 endif +# KASAN_SHADOW_OFFSET = VA_START + (1 << (VA_BITS - 3)) - (1 << 61) +# in 32-bit arithmetic +KASAN_SHADOW_OFFSET := $(shell printf "0x%08x00000000\n" $$(( \ + (0xffffffff & (-1 << ($(CONFIG_ARM64_VA_BITS) - 32))) \ + + (1 << ($(CONFIG_ARM64_VA_BITS) - 32 - 3)) \ + - (1 << (64 - 32 - 3)) )) ) + export TEXT_OFFSET GZFLAGS core-y += arch/arm64/kernel/ arch/arm64/mm/ diff --git a/arch/arm64/boot/dts/apm/apm-storm.dtsi b/arch/arm64/boot/dts/apm/apm-storm.dtsi index d831bc2ac204..d6c9630a5c20 100644 --- a/arch/arm64/boot/dts/apm/apm-storm.dtsi +++ b/arch/arm64/boot/dts/apm/apm-storm.dtsi @@ -207,6 +207,17 @@ clock-output-names = "xge0clk"; }; + xge1clk: xge1clk@1f62c000 { + compatible = "apm,xgene-device-clock"; + status = "disabled"; + #clock-cells = <1>; + clocks = <&socplldiv2 0>; + reg = <0x0 0x1f62c000 0x0 0x1000>; + reg-names = "csr-reg"; + csr-mask = <0x3>; + clock-output-names = "xge1clk"; + }; + sataphy1clk: sataphy1clk@1f21c000 { compatible = "apm,xgene-device-clock"; #clock-cells = <1>; @@ -477,6 +488,16 @@ reg = <0x0 0x7c600000 0x0 0x200000>; pmd-controller = <3>; }; + + edacl3@7e600000 { + compatible = "apm,xgene-edac-l3"; + reg = <0x0 0x7e600000 0x0 0x1000>; + }; + + edacsoc@7e930000 { + compatible = "apm,xgene-edac-soc-v1"; + reg = <0x0 0x7e930000 0x0 0x1000>; + }; }; pcie0: pcie@1f2b0000 { @@ -816,6 +837,23 @@ phy-connection-type = "xgmii"; }; + xgenet1: ethernet@1f620000 { + compatible = "apm,xgene1-xgenet"; + status = "disabled"; + reg = <0x0 0x1f620000 0x0 0xd100>, + <0x0 0x1f600000 0x0 0Xc300>, + <0x0 0x18000000 0x0 0X8000>; + reg-names = "enet_csr", "ring_csr", "ring_cmd"; + interrupts = <0x0 0x6C 0x4>, + <0x0 0x6D 0x4>; + port-id = <1>; + dma-coherent; + clocks = <&xge1clk 0>; + /* mac address will be overwritten by the bootloader */ + local-mac-address = [00 00 00 00 00 00]; + phy-connection-type = "xgmii"; + }; + rng: rng@10520000 { compatible = "apm,xgene-rng"; reg = <0x0 0x10520000 0x0 0x100>; diff --git a/arch/arm64/boot/dts/arm/juno-motherboard.dtsi b/arch/arm64/boot/dts/arm/juno-motherboard.dtsi index 637e046f0e36..3c386680357e 100644 --- a/arch/arm64/boot/dts/arm/juno-motherboard.dtsi +++ b/arch/arm64/boot/dts/arm/juno-motherboard.dtsi @@ -61,42 +61,42 @@ button@1 { debounce_interval = <50>; - wakeup = <1>; + wakeup-source; linux,code = <116>; label = "POWER"; gpios = <&iofpga_gpio0 0 0x4>; }; button@2 { debounce_interval = <50>; - wakeup = <1>; + wakeup-source; linux,code = <102>; label = "HOME"; gpios = <&iofpga_gpio0 1 0x4>; }; button@3 { debounce_interval = <50>; - wakeup = <1>; + wakeup-source; linux,code = <152>; label = "RLOCK"; gpios = <&iofpga_gpio0 2 0x4>; }; button@4 { debounce_interval = <50>; - wakeup = <1>; + wakeup-source; linux,code = <115>; label = "VOL+"; gpios = <&iofpga_gpio0 3 0x4>; }; button@5 { debounce_interval = <50>; - wakeup = <1>; + wakeup-source; linux,code = <114>; label = "VOL-"; gpios = <&iofpga_gpio0 4 0x4>; }; button@6 { debounce_interval = <50>; - wakeup = <1>; + wakeup-source; linux,code = <99>; label = "NMI"; gpios = <&iofpga_gpio0 5 0x4>; diff --git a/arch/arm64/boot/dts/arm/juno-r1.dts b/arch/arm64/boot/dts/arm/juno-r1.dts index c62751153a4f..734e1272b19f 100644 --- a/arch/arm64/boot/dts/arm/juno-r1.dts +++ b/arch/arm64/boot/dts/arm/juno-r1.dts @@ -91,17 +91,21 @@ }; }; - pmu { - compatible = "arm,armv8-pmuv3"; + pmu_a57 { + compatible = "arm,cortex-a57-pmu"; interrupts = <GIC_SPI 02 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 06 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 18 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 06 IRQ_TYPE_LEVEL_HIGH>; + interrupt-affinity = <&A57_0>, + <&A57_1>; + }; + + pmu_a53 { + compatible = "arm,cortex-a53-pmu"; + interrupts = <GIC_SPI 18 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 22 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 26 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 30 IRQ_TYPE_LEVEL_HIGH>; - interrupt-affinity = <&A57_0>, - <&A57_1>, - <&A53_0>, + interrupt-affinity = <&A53_0>, <&A53_1>, <&A53_2>, <&A53_3>; diff --git a/arch/arm64/boot/dts/arm/juno.dts b/arch/arm64/boot/dts/arm/juno.dts index d7cbdd482a61..ffa05aeab3c7 100644 --- a/arch/arm64/boot/dts/arm/juno.dts +++ b/arch/arm64/boot/dts/arm/juno.dts @@ -91,17 +91,21 @@ }; }; - pmu { - compatible = "arm,armv8-pmuv3"; + pmu_a57 { + compatible = "arm,cortex-a57-pmu"; interrupts = <GIC_SPI 02 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 06 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 18 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 06 IRQ_TYPE_LEVEL_HIGH>; + interrupt-affinity = <&A57_0>, + <&A57_1>; + }; + + pmu_a53 { + compatible = "arm,cortex-a53-pmu"; + interrupts = <GIC_SPI 18 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 22 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 26 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 30 IRQ_TYPE_LEVEL_HIGH>; - interrupt-affinity = <&A57_0>, - <&A57_1>, - <&A53_0>, + interrupt-affinity = <&A53_0>, <&A53_1>, <&A53_2>, <&A53_3>; diff --git a/arch/arm64/boot/dts/hisilicon/hip05_hns.dtsi b/arch/arm64/boot/dts/hisilicon/hip05_hns.dtsi new file mode 100644 index 000000000000..606dd5a05c2d --- /dev/null +++ b/arch/arm64/boot/dts/hisilicon/hip05_hns.dtsi @@ -0,0 +1,191 @@ +soc0: soc@000000000 { + #address-cells = <2>; + #size-cells = <2>; + device_type = "soc"; + compatible = "simple-bus"; + ranges = <0x0 0x0 0x0 0x0 0x1 0x0>; + chip-id = <0>; + + soc0_mdio0: mdio@803c0000 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "hisilicon,hns-mdio"; + reg = <0x0 0x803c0000 0x0 0x10000 + 0x0 0x80000000 0x0 0x10000>; + + soc0_phy0: ethernet-phy@0 { + reg = <0x0>; + compatible = "ethernet-phy-ieee802.3-c22"; + }; + soc0_phy1: ethernet-phy@1 { + reg = <0x1>; + compatible = "ethernet-phy-ieee802.3-c22"; + }; + }; + + dsa: dsa@c7000000 { + compatible = "hisilicon,hns-dsaf-v1"; + dsa_name = "dsaf0"; + mode = "6port-16rss"; + interrupt-parent = <&mbigen_dsa>; + + reg = <0x0 0xC0000000 0x0 0x420000 + 0x0 0xC2000000 0x0 0x300000 + 0x0 0xc5000000 0x0 0x890000 + 0x0 0xc7000000 0x0 0x60000 + >; + + phy-handle = <0 0 0 0 &soc0_phy0 &soc0_phy1 0 0>; + interrupts = < + /* [14] ge fifo err 8 / xge 6**/ + 149 0x4 150 0x4 151 0x4 152 0x4 + 153 0x4 154 0x4 26 0x4 27 0x4 + 155 0x4 156 0x4 157 0x4 158 0x4 159 0x4 160 0x4 + /* [12] rcb com 4*3**/ + 0x6 0x4 0x7 0x4 0x8 0x4 0x9 0x4 + 16 0x4 17 0x4 18 0x4 19 0x4 + 22 0x4 23 0x4 24 0x4 25 0x4 + /* [8] ppe tnl 0-7***/ + 0x0 0x4 0x1 0x4 0x2 0x4 0x3 0x4 + 0x4 0x4 0x5 0x4 12 0x4 13 0x4 + /* [21] dsaf event int 3+18**/ + 128 0x4 129 0x4 130 0x4 + 0x83 0x4 0x84 0x4 0x85 0x4 0x86 0x4 0x87 0x4 0x88 0x4 + 0x89 0x4 0x8a 0x4 0x8b 0x4 0x8c 0x4 0x8d 0x4 0x8e 0x4 + 0x8f 0x4 0x90 0x4 0x91 0x4 0x92 0x4 0x93 0x4 0x94 0x4 + /* [4] debug rcb 2*2*/ + 0xe 0x1 0xf 0x1 0x14 0x1 0x15 0x1 + /* [256] sevice rcb 2*128*/ + 0x180 0x1 0x181 0x1 0x182 0x1 0x183 0x1 + 0x184 0x1 0x185 0x1 0x186 0x1 0x187 0x1 + 0x188 0x1 0x189 0x1 0x18a 0x1 0x18b 0x1 + 0x18c 0x1 0x18d 0x1 0x18e 0x1 0x18f 0x1 + 0x190 0x1 0x191 0x1 0x192 0x1 0x193 0x1 + 0x194 0x1 0x195 0x1 0x196 0x1 0x197 0x1 + 0x198 0x1 0x199 0x1 0x19a 0x1 0x19b 0x1 + 0x19c 0x1 0x19d 0x1 0x19e 0x1 0x19f 0x1 + 0x1a0 0x1 0x1a1 0x1 0x1a2 0x1 0x1a3 0x1 + 0x1a4 0x1 0x1a5 0x1 0x1a6 0x1 0x1a7 0x1 + 0x1a8 0x1 0x1a9 0x1 0x1aa 0x1 0x1ab 0x1 + 0x1ac 0x1 0x1ad 0x1 0x1ae 0x1 0x1af 0x1 + 0x1b0 0x1 0x1b1 0x1 0x1b2 0x1 0x1b3 0x1 + 0x1b4 0x1 0x1b5 0x1 0x1b6 0x1 0x1b7 0x1 + 0x1b8 0x1 0x1b9 0x1 0x1ba 0x1 0x1bb 0x1 + 0x1bc 0x1 0x1bd 0x1 0x1be 0x1 0x1bf 0x1 + 0x1c0 0x1 0x1c1 0x1 0x1c2 0x1 0x1c3 0x1 + 0x1c4 0x1 0x1c5 0x1 0x1c6 0x1 0x1c7 0x1 + 0x1c8 0x1 0x1c9 0x1 0x1ca 0x1 0x1cb 0x1 + 0x1cc 0x1 0x1cd 0x1 0x1ce 0x1 0x1cf 0x1 + 0x1d0 0x1 0x1d1 0x1 0x1d2 0x1 0x1d3 0x1 + 0x1d4 0x1 0x1d5 0x1 0x1d6 0x1 0x1d7 0x1 + 0x1d8 0x1 0x1d9 0x1 0x1da 0x1 0x1db 0x1 + 0x1dc 0x1 0x1dd 0x1 0x1de 0x1 0x1df 0x1 + 0x1e0 0x1 0x1e1 0x1 0x1e2 0x1 0x1e3 0x1 + 0x1e4 0x1 0x1e5 0x1 0x1e6 0x1 0x1e7 0x1 + 0x1e8 0x1 0x1e9 0x1 0x1ea 0x1 0x1eb 0x1 + 0x1ec 0x1 0x1ed 0x1 0x1ee 0x1 0x1ef 0x1 + 0x1f0 0x1 0x1f1 0x1 0x1f2 0x1 0x1f3 0x1 + 0x1f4 0x1 0x1f5 0x1 0x1f6 0x1 0x1f7 0x1 + 0x1f8 0x1 0x1f9 0x1 0x1fa 0x1 0x1fb 0x1 + 0x1fc 0x1 0x1fd 0x1 0x1fe 0x1 0x1ff 0x1 + 0x200 0x1 0x201 0x1 0x202 0x1 0x203 0x1 + 0x204 0x1 0x205 0x1 0x206 0x1 0x207 0x1 + 0x208 0x1 0x209 0x1 0x20a 0x1 0x20b 0x1 + 0x20c 0x1 0x20d 0x1 0x20e 0x1 0x20f 0x1 + 0x210 0x1 0x211 0x1 0x212 0x1 0x213 0x1 + 0x214 0x1 0x215 0x1 0x216 0x1 0x217 0x1 + 0x218 0x1 0x219 0x1 0x21a 0x1 0x21b 0x1 + 0x21c 0x1 0x21d 0x1 0x21e 0x1 0x21f 0x1 + 0x220 0x1 0x221 0x1 0x222 0x1 0x223 0x1 + 0x224 0x1 0x225 0x1 0x226 0x1 0x227 0x1 + 0x228 0x1 0x229 0x1 0x22a 0x1 0x22b 0x1 + 0x22c 0x1 0x22d 0x1 0x22e 0x1 0x22f 0x1 + 0x230 0x1 0x231 0x1 0x232 0x1 0x233 0x1 + 0x234 0x1 0x235 0x1 0x236 0x1 0x237 0x1 + 0x238 0x1 0x239 0x1 0x23a 0x1 0x23b 0x1 + 0x23c 0x1 0x23d 0x1 0x23e 0x1 0x23f 0x1 + 0x240 0x1 0x241 0x1 0x242 0x1 0x243 0x1 + 0x244 0x1 0x245 0x1 0x246 0x1 0x247 0x1 + 0x248 0x1 0x249 0x1 0x24a 0x1 0x24b 0x1 + 0x24c 0x1 0x24d 0x1 0x24e 0x1 0x24f 0x1 + 0x250 0x1 0x251 0x1 0x252 0x1 0x253 0x1 + 0x254 0x1 0x255 0x1 0x256 0x1 0x257 0x1 + 0x258 0x1 0x259 0x1 0x25a 0x1 0x25b 0x1 + 0x25c 0x1 0x25d 0x1 0x25e 0x1 0x25f 0x1 + 0x260 0x1 0x261 0x1 0x262 0x1 0x263 0x1 + 0x264 0x1 0x265 0x1 0x266 0x1 0x267 0x1 + 0x268 0x1 0x269 0x1 0x26a 0x1 0x26b 0x1 + 0x26c 0x1 0x26d 0x1 0x26e 0x1 0x26f 0x1 + 0x270 0x1 0x271 0x1 0x272 0x1 0x273 0x1 + 0x274 0x1 0x275 0x1 0x276 0x1 0x277 0x1 + 0x278 0x1 0x279 0x1 0x27a 0x1 0x27b 0x1 + 0x27c 0x1 0x27d 0x1 0x27e 0x1 0x27f 0x1>; + buf-size = <4096>; + desc-num = <1024>; + dma-coherent; + }; + + eth0: ethernet@0{ + compatible = "hisilicon,hns-nic-v1"; + ae-name = "dsaf0"; + port-id = <0>; + local-mac-address = [00 00 00 01 00 58]; + status = "disabled"; + dma-coherent; + }; + eth1: ethernet@1{ + compatible = "hisilicon,hns-nic-v1"; + ae-name = "dsaf0"; + port-id = <1>; + status = "disabled"; + dma-coherent; + }; + eth2: ethernet@2{ + compatible = "hisilicon,hns-nic-v1"; + ae-name = "dsaf0"; + port-id = <2>; + local-mac-address = [00 00 00 01 00 5a]; + status = "disabled"; + dma-coherent; + }; + eth3: ethernet@3{ + compatible = "hisilicon,hns-nic-v1"; + ae-name = "dsaf0"; + port-id = <3>; + local-mac-address = [00 00 00 01 00 5b]; + status = "disabled"; + dma-coherent; + }; + eth4: ethernet@4{ + compatible = "hisilicon,hns-nic-v1"; + ae-name = "dsaf0"; + port-id = <4>; + local-mac-address = [00 00 00 01 00 5c]; + status = "disabled"; + dma-coherent; + }; + eth5: ethernet@5{ + compatible = "hisilicon,hns-nic-v1"; + ae-name = "dsaf0"; + port-id = <5>; + local-mac-address = [00 00 00 01 00 5d]; + status = "disabled"; + dma-coherent; + }; + eth6: ethernet@6{ + compatible = "hisilicon,hns-nic-v1"; + ae-name = "dsaf0"; + port-id = <6>; + local-mac-address = [00 00 00 01 00 5e]; + status = "disabled"; + dma-coherent; + }; + eth7: ethernet@7{ + compatible = "hisilicon,hns-nic-v1"; + ae-name = "dsaf0"; + port-id = <7>; + local-mac-address = [00 00 00 01 00 5f]; + status = "disabled"; + dma-coherent; + }; +}; diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 34d71dd86781..5f760347aee2 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -51,6 +51,7 @@ CONFIG_PCI=y CONFIG_PCI_MSI=y CONFIG_PCI_XGENE=y CONFIG_SMP=y +CONFIG_SCHED_MC=y CONFIG_PREEMPT=y CONFIG_KSM=y CONFIG_TRANSPARENT_HUGEPAGE=y @@ -109,6 +110,10 @@ CONFIG_SERIAL_8250_DW=y CONFIG_SERIAL_8250_MT6577=y CONFIG_SERIAL_AMBA_PL011=y CONFIG_SERIAL_AMBA_PL011_CONSOLE=y +CONFIG_SERIAL_SAMSUNG=y +CONFIG_SERIAL_SAMSUNG_UARTS_4=y +CONFIG_SERIAL_SAMSUNG_UARTS=4 +CONFIG_SERIAL_SAMSUNG_CONSOLE=y CONFIG_SERIAL_MSM=y CONFIG_SERIAL_MSM_CONSOLE=y CONFIG_SERIAL_OF_PLATFORM=y @@ -145,6 +150,10 @@ CONFIG_MMC_ARMMMCI=y CONFIG_MMC_SDHCI=y CONFIG_MMC_SDHCI_PLTFM=y CONFIG_MMC_SPI=y +CONFIG_MMC_DW=y +CONFIG_MMC_DW_IDMAC=y +CONFIG_MMC_DW_PLTFM=y +CONFIG_MMC_DW_EXYNOS=y CONFIG_NEW_LEDS=y CONFIG_LEDS_CLASS=y CONFIG_LEDS_SYSCON=y diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h index 208cec08a74f..caafd63b8092 100644 --- a/arch/arm64/include/asm/acpi.h +++ b/arch/arm64/include/asm/acpi.h @@ -12,7 +12,6 @@ #ifndef _ASM_ACPI_H #define _ASM_ACPI_H -#include <linux/irqchip/arm-gic-acpi.h> #include <linux/mm.h> #include <linux/psci.h> @@ -92,4 +91,9 @@ static inline const char *acpi_get_enable_method(int cpu) { return acpi_psci_present() ? "psci" : NULL; } + +#ifdef CONFIG_ACPI_APEI +pgprot_t arch_apei_get_mem_attribute(phys_addr_t addr); +#endif + #endif /*_ASM_ACPI_H*/ diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h new file mode 100644 index 000000000000..030cdcb46c6b --- /dev/null +++ b/arch/arm64/include/asm/arch_gicv3.h @@ -0,0 +1,170 @@ +/* + * arch/arm64/include/asm/arch_gicv3.h + * + * Copyright (C) 2015 ARM Ltd. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_ARCH_GICV3_H +#define __ASM_ARCH_GICV3_H + +#include <asm/sysreg.h> + +#define ICC_EOIR1_EL1 sys_reg(3, 0, 12, 12, 1) +#define ICC_DIR_EL1 sys_reg(3, 0, 12, 11, 1) +#define ICC_IAR1_EL1 sys_reg(3, 0, 12, 12, 0) +#define ICC_SGI1R_EL1 sys_reg(3, 0, 12, 11, 5) +#define ICC_PMR_EL1 sys_reg(3, 0, 4, 6, 0) +#define ICC_CTLR_EL1 sys_reg(3, 0, 12, 12, 4) +#define ICC_SRE_EL1 sys_reg(3, 0, 12, 12, 5) +#define ICC_GRPEN1_EL1 sys_reg(3, 0, 12, 12, 7) + +#define ICC_SRE_EL2 sys_reg(3, 4, 12, 9, 5) + +/* + * System register definitions + */ +#define ICH_VSEIR_EL2 sys_reg(3, 4, 12, 9, 4) +#define ICH_HCR_EL2 sys_reg(3, 4, 12, 11, 0) +#define ICH_VTR_EL2 sys_reg(3, 4, 12, 11, 1) +#define ICH_MISR_EL2 sys_reg(3, 4, 12, 11, 2) +#define ICH_EISR_EL2 sys_reg(3, 4, 12, 11, 3) +#define ICH_ELSR_EL2 sys_reg(3, 4, 12, 11, 5) +#define ICH_VMCR_EL2 sys_reg(3, 4, 12, 11, 7) + +#define __LR0_EL2(x) sys_reg(3, 4, 12, 12, x) +#define __LR8_EL2(x) sys_reg(3, 4, 12, 13, x) + +#define ICH_LR0_EL2 __LR0_EL2(0) +#define ICH_LR1_EL2 __LR0_EL2(1) +#define ICH_LR2_EL2 __LR0_EL2(2) +#define ICH_LR3_EL2 __LR0_EL2(3) +#define ICH_LR4_EL2 __LR0_EL2(4) +#define ICH_LR5_EL2 __LR0_EL2(5) +#define ICH_LR6_EL2 __LR0_EL2(6) +#define ICH_LR7_EL2 __LR0_EL2(7) +#define ICH_LR8_EL2 __LR8_EL2(0) +#define ICH_LR9_EL2 __LR8_EL2(1) +#define ICH_LR10_EL2 __LR8_EL2(2) +#define ICH_LR11_EL2 __LR8_EL2(3) +#define ICH_LR12_EL2 __LR8_EL2(4) +#define ICH_LR13_EL2 __LR8_EL2(5) +#define ICH_LR14_EL2 __LR8_EL2(6) +#define ICH_LR15_EL2 __LR8_EL2(7) + +#define __AP0Rx_EL2(x) sys_reg(3, 4, 12, 8, x) +#define ICH_AP0R0_EL2 __AP0Rx_EL2(0) +#define ICH_AP0R1_EL2 __AP0Rx_EL2(1) +#define ICH_AP0R2_EL2 __AP0Rx_EL2(2) +#define ICH_AP0R3_EL2 __AP0Rx_EL2(3) + +#define __AP1Rx_EL2(x) sys_reg(3, 4, 12, 9, x) +#define ICH_AP1R0_EL2 __AP1Rx_EL2(0) +#define ICH_AP1R1_EL2 __AP1Rx_EL2(1) +#define ICH_AP1R2_EL2 __AP1Rx_EL2(2) +#define ICH_AP1R3_EL2 __AP1Rx_EL2(3) + +#ifndef __ASSEMBLY__ + +#include <linux/stringify.h> + +/* + * Low-level accessors + * + * These system registers are 32 bits, but we make sure that the compiler + * sets the GP register's most significant bits to 0 with an explicit cast. + */ + +static inline void gic_write_eoir(u32 irq) +{ + asm volatile("msr_s " __stringify(ICC_EOIR1_EL1) ", %0" : : "r" ((u64)irq)); + isb(); +} + +static inline void gic_write_dir(u32 irq) +{ + asm volatile("msr_s " __stringify(ICC_DIR_EL1) ", %0" : : "r" ((u64)irq)); + isb(); +} + +static inline u64 gic_read_iar_common(void) +{ + u64 irqstat; + + asm volatile("mrs_s %0, " __stringify(ICC_IAR1_EL1) : "=r" (irqstat)); + return irqstat; +} + +/* + * Cavium ThunderX erratum 23154 + * + * The gicv3 of ThunderX requires a modified version for reading the + * IAR status to ensure data synchronization (access to icc_iar1_el1 + * is not sync'ed before and after). + */ +static inline u64 gic_read_iar_cavium_thunderx(void) +{ + u64 irqstat; + + asm volatile( + "nop;nop;nop;nop\n\t" + "nop;nop;nop;nop\n\t" + "mrs_s %0, " __stringify(ICC_IAR1_EL1) "\n\t" + "nop;nop;nop;nop" + : "=r" (irqstat)); + mb(); + + return irqstat; +} + +static inline void gic_write_pmr(u32 val) +{ + asm volatile("msr_s " __stringify(ICC_PMR_EL1) ", %0" : : "r" ((u64)val)); +} + +static inline void gic_write_ctlr(u32 val) +{ + asm volatile("msr_s " __stringify(ICC_CTLR_EL1) ", %0" : : "r" ((u64)val)); + isb(); +} + +static inline void gic_write_grpen1(u32 val) +{ + asm volatile("msr_s " __stringify(ICC_GRPEN1_EL1) ", %0" : : "r" ((u64)val)); + isb(); +} + +static inline void gic_write_sgi1r(u64 val) +{ + asm volatile("msr_s " __stringify(ICC_SGI1R_EL1) ", %0" : : "r" (val)); +} + +static inline u32 gic_read_sre(void) +{ + u64 val; + + asm volatile("mrs_s %0, " __stringify(ICC_SRE_EL1) : "=r" (val)); + return val; +} + +static inline void gic_write_sre(u32 val) +{ + asm volatile("msr_s " __stringify(ICC_SRE_EL1) ", %0" : : "r" ((u64)val)); + isb(); +} + +#define gic_read_typer(c) readq_relaxed(c) +#define gic_write_irouter(v, c) writeq_relaxed(v, c) + +#endif /* __ASSEMBLY__ */ +#endif /* __ASM_ARCH_GICV3_H */ diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index b51f2cc22ca9..12eff928ef8b 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -193,4 +193,15 @@ lr .req x30 // link register str \src, [\tmp, :lo12:\sym] .endm +/* + * Annotate a function as position independent, i.e., safe to be called before + * the kernel virtual mapping is activated. + */ +#define ENDPIPROC(x) \ + .globl __pi_##x; \ + .type __pi_##x, %function; \ + .set __pi_##x, x; \ + .size __pi_##x, . - x; \ + ENDPROC(x) + #endif /* __ASM_ASSEMBLER_H */ diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h index 35a67783cfa0..f3a3586a421c 100644 --- a/arch/arm64/include/asm/atomic.h +++ b/arch/arm64/include/asm/atomic.h @@ -54,14 +54,43 @@ #define ATOMIC_INIT(i) { (i) } #define atomic_read(v) READ_ONCE((v)->counter) -#define atomic_set(v, i) (((v)->counter) = (i)) +#define atomic_set(v, i) WRITE_ONCE(((v)->counter), (i)) + +#define atomic_add_return_relaxed atomic_add_return_relaxed +#define atomic_add_return_acquire atomic_add_return_acquire +#define atomic_add_return_release atomic_add_return_release +#define atomic_add_return atomic_add_return + +#define atomic_inc_return_relaxed(v) atomic_add_return_relaxed(1, (v)) +#define atomic_inc_return_acquire(v) atomic_add_return_acquire(1, (v)) +#define atomic_inc_return_release(v) atomic_add_return_release(1, (v)) +#define atomic_inc_return(v) atomic_add_return(1, (v)) + +#define atomic_sub_return_relaxed atomic_sub_return_relaxed +#define atomic_sub_return_acquire atomic_sub_return_acquire +#define atomic_sub_return_release atomic_sub_return_release +#define atomic_sub_return atomic_sub_return + +#define atomic_dec_return_relaxed(v) atomic_sub_return_relaxed(1, (v)) +#define atomic_dec_return_acquire(v) atomic_sub_return_acquire(1, (v)) +#define atomic_dec_return_release(v) atomic_sub_return_release(1, (v)) +#define atomic_dec_return(v) atomic_sub_return(1, (v)) + +#define atomic_xchg_relaxed(v, new) xchg_relaxed(&((v)->counter), (new)) +#define atomic_xchg_acquire(v, new) xchg_acquire(&((v)->counter), (new)) +#define atomic_xchg_release(v, new) xchg_release(&((v)->counter), (new)) #define atomic_xchg(v, new) xchg(&((v)->counter), (new)) + +#define atomic_cmpxchg_relaxed(v, old, new) \ + cmpxchg_relaxed(&((v)->counter), (old), (new)) +#define atomic_cmpxchg_acquire(v, old, new) \ + cmpxchg_acquire(&((v)->counter), (old), (new)) +#define atomic_cmpxchg_release(v, old, new) \ + cmpxchg_release(&((v)->counter), (old), (new)) #define atomic_cmpxchg(v, old, new) cmpxchg(&((v)->counter), (old), (new)) #define atomic_inc(v) atomic_add(1, (v)) #define atomic_dec(v) atomic_sub(1, (v)) -#define atomic_inc_return(v) atomic_add_return(1, (v)) -#define atomic_dec_return(v) atomic_sub_return(1, (v)) #define atomic_inc_and_test(v) (atomic_inc_return(v) == 0) #define atomic_dec_and_test(v) (atomic_dec_return(v) == 0) #define atomic_sub_and_test(i, v) (atomic_sub_return((i), (v)) == 0) @@ -75,13 +104,39 @@ #define ATOMIC64_INIT ATOMIC_INIT #define atomic64_read atomic_read #define atomic64_set atomic_set + +#define atomic64_add_return_relaxed atomic64_add_return_relaxed +#define atomic64_add_return_acquire atomic64_add_return_acquire +#define atomic64_add_return_release atomic64_add_return_release +#define atomic64_add_return atomic64_add_return + +#define atomic64_inc_return_relaxed(v) atomic64_add_return_relaxed(1, (v)) +#define atomic64_inc_return_acquire(v) atomic64_add_return_acquire(1, (v)) +#define atomic64_inc_return_release(v) atomic64_add_return_release(1, (v)) +#define atomic64_inc_return(v) atomic64_add_return(1, (v)) + +#define atomic64_sub_return_relaxed atomic64_sub_return_relaxed +#define atomic64_sub_return_acquire atomic64_sub_return_acquire +#define atomic64_sub_return_release atomic64_sub_return_release +#define atomic64_sub_return atomic64_sub_return + +#define atomic64_dec_return_relaxed(v) atomic64_sub_return_relaxed(1, (v)) +#define atomic64_dec_return_acquire(v) atomic64_sub_return_acquire(1, (v)) +#define atomic64_dec_return_release(v) atomic64_sub_return_release(1, (v)) +#define atomic64_dec_return(v) atomic64_sub_return(1, (v)) + +#define atomic64_xchg_relaxed atomic_xchg_relaxed +#define atomic64_xchg_acquire atomic_xchg_acquire +#define atomic64_xchg_release atomic_xchg_release #define atomic64_xchg atomic_xchg + +#define atomic64_cmpxchg_relaxed atomic_cmpxchg_relaxed +#define atomic64_cmpxchg_acquire atomic_cmpxchg_acquire +#define atomic64_cmpxchg_release atomic_cmpxchg_release #define atomic64_cmpxchg atomic_cmpxchg #define atomic64_inc(v) atomic64_add(1, (v)) #define atomic64_dec(v) atomic64_sub(1, (v)) -#define atomic64_inc_return(v) atomic64_add_return(1, (v)) -#define atomic64_dec_return(v) atomic64_sub_return(1, (v)) #define atomic64_inc_and_test(v) (atomic64_inc_return(v) == 0) #define atomic64_dec_and_test(v) (atomic64_dec_return(v) == 0) #define atomic64_sub_and_test(i, v) (atomic64_sub_return((i), (v)) == 0) diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h index b3b5c4ae3800..74d0b8eb0799 100644 --- a/arch/arm64/include/asm/atomic_ll_sc.h +++ b/arch/arm64/include/asm/atomic_ll_sc.h @@ -55,40 +55,47 @@ __LL_SC_PREFIX(atomic_##op(int i, atomic_t *v)) \ } \ __LL_SC_EXPORT(atomic_##op); -#define ATOMIC_OP_RETURN(op, asm_op) \ +#define ATOMIC_OP_RETURN(name, mb, acq, rel, cl, op, asm_op) \ __LL_SC_INLINE int \ -__LL_SC_PREFIX(atomic_##op##_return(int i, atomic_t *v)) \ +__LL_SC_PREFIX(atomic_##op##_return##name(int i, atomic_t *v)) \ { \ unsigned long tmp; \ int result; \ \ - asm volatile("// atomic_" #op "_return\n" \ + asm volatile("// atomic_" #op "_return" #name "\n" \ " prfm pstl1strm, %2\n" \ -"1: ldxr %w0, %2\n" \ +"1: ld" #acq "xr %w0, %2\n" \ " " #asm_op " %w0, %w0, %w3\n" \ -" stlxr %w1, %w0, %2\n" \ -" cbnz %w1, 1b" \ +" st" #rel "xr %w1, %w0, %2\n" \ +" cbnz %w1, 1b\n" \ +" " #mb \ : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \ : "Ir" (i) \ - : "memory"); \ + : cl); \ \ - smp_mb(); \ return result; \ } \ -__LL_SC_EXPORT(atomic_##op##_return); +__LL_SC_EXPORT(atomic_##op##_return##name); + +#define ATOMIC_OPS(...) \ + ATOMIC_OP(__VA_ARGS__) \ + ATOMIC_OP_RETURN( , dmb ish, , l, "memory", __VA_ARGS__) -#define ATOMIC_OPS(op, asm_op) \ - ATOMIC_OP(op, asm_op) \ - ATOMIC_OP_RETURN(op, asm_op) +#define ATOMIC_OPS_RLX(...) \ + ATOMIC_OPS(__VA_ARGS__) \ + ATOMIC_OP_RETURN(_relaxed, , , , , __VA_ARGS__)\ + ATOMIC_OP_RETURN(_acquire, , a, , "memory", __VA_ARGS__)\ + ATOMIC_OP_RETURN(_release, , , l, "memory", __VA_ARGS__) -ATOMIC_OPS(add, add) -ATOMIC_OPS(sub, sub) +ATOMIC_OPS_RLX(add, add) +ATOMIC_OPS_RLX(sub, sub) ATOMIC_OP(and, and) ATOMIC_OP(andnot, bic) ATOMIC_OP(or, orr) ATOMIC_OP(xor, eor) +#undef ATOMIC_OPS_RLX #undef ATOMIC_OPS #undef ATOMIC_OP_RETURN #undef ATOMIC_OP @@ -111,40 +118,47 @@ __LL_SC_PREFIX(atomic64_##op(long i, atomic64_t *v)) \ } \ __LL_SC_EXPORT(atomic64_##op); -#define ATOMIC64_OP_RETURN(op, asm_op) \ +#define ATOMIC64_OP_RETURN(name, mb, acq, rel, cl, op, asm_op) \ __LL_SC_INLINE long \ -__LL_SC_PREFIX(atomic64_##op##_return(long i, atomic64_t *v)) \ +__LL_SC_PREFIX(atomic64_##op##_return##name(long i, atomic64_t *v)) \ { \ long result; \ unsigned long tmp; \ \ - asm volatile("// atomic64_" #op "_return\n" \ + asm volatile("// atomic64_" #op "_return" #name "\n" \ " prfm pstl1strm, %2\n" \ -"1: ldxr %0, %2\n" \ +"1: ld" #acq "xr %0, %2\n" \ " " #asm_op " %0, %0, %3\n" \ -" stlxr %w1, %0, %2\n" \ -" cbnz %w1, 1b" \ +" st" #rel "xr %w1, %0, %2\n" \ +" cbnz %w1, 1b\n" \ +" " #mb \ : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \ : "Ir" (i) \ - : "memory"); \ + : cl); \ \ - smp_mb(); \ return result; \ } \ -__LL_SC_EXPORT(atomic64_##op##_return); +__LL_SC_EXPORT(atomic64_##op##_return##name); + +#define ATOMIC64_OPS(...) \ + ATOMIC64_OP(__VA_ARGS__) \ + ATOMIC64_OP_RETURN(, dmb ish, , l, "memory", __VA_ARGS__) -#define ATOMIC64_OPS(op, asm_op) \ - ATOMIC64_OP(op, asm_op) \ - ATOMIC64_OP_RETURN(op, asm_op) +#define ATOMIC64_OPS_RLX(...) \ + ATOMIC64_OPS(__VA_ARGS__) \ + ATOMIC64_OP_RETURN(_relaxed,, , , , __VA_ARGS__) \ + ATOMIC64_OP_RETURN(_acquire,, a, , "memory", __VA_ARGS__) \ + ATOMIC64_OP_RETURN(_release,, , l, "memory", __VA_ARGS__) -ATOMIC64_OPS(add, add) -ATOMIC64_OPS(sub, sub) +ATOMIC64_OPS_RLX(add, add) +ATOMIC64_OPS_RLX(sub, sub) ATOMIC64_OP(and, and) ATOMIC64_OP(andnot, bic) ATOMIC64_OP(or, orr) ATOMIC64_OP(xor, eor) +#undef ATOMIC64_OPS_RLX #undef ATOMIC64_OPS #undef ATOMIC64_OP_RETURN #undef ATOMIC64_OP @@ -172,7 +186,7 @@ __LL_SC_PREFIX(atomic64_dec_if_positive(atomic64_t *v)) } __LL_SC_EXPORT(atomic64_dec_if_positive); -#define __CMPXCHG_CASE(w, sz, name, mb, rel, cl) \ +#define __CMPXCHG_CASE(w, sz, name, mb, acq, rel, cl) \ __LL_SC_INLINE unsigned long \ __LL_SC_PREFIX(__cmpxchg_case_##name(volatile void *ptr, \ unsigned long old, \ @@ -182,7 +196,7 @@ __LL_SC_PREFIX(__cmpxchg_case_##name(volatile void *ptr, \ \ asm volatile( \ " prfm pstl1strm, %[v]\n" \ - "1: ldxr" #sz "\t%" #w "[oldval], %[v]\n" \ + "1: ld" #acq "xr" #sz "\t%" #w "[oldval], %[v]\n" \ " eor %" #w "[tmp], %" #w "[oldval], %" #w "[old]\n" \ " cbnz %" #w "[tmp], 2f\n" \ " st" #rel "xr" #sz "\t%w[tmp], %" #w "[new], %[v]\n" \ @@ -199,14 +213,22 @@ __LL_SC_PREFIX(__cmpxchg_case_##name(volatile void *ptr, \ } \ __LL_SC_EXPORT(__cmpxchg_case_##name); -__CMPXCHG_CASE(w, b, 1, , , ) -__CMPXCHG_CASE(w, h, 2, , , ) -__CMPXCHG_CASE(w, , 4, , , ) -__CMPXCHG_CASE( , , 8, , , ) -__CMPXCHG_CASE(w, b, mb_1, dmb ish, l, "memory") -__CMPXCHG_CASE(w, h, mb_2, dmb ish, l, "memory") -__CMPXCHG_CASE(w, , mb_4, dmb ish, l, "memory") -__CMPXCHG_CASE( , , mb_8, dmb ish, l, "memory") +__CMPXCHG_CASE(w, b, 1, , , , ) +__CMPXCHG_CASE(w, h, 2, , , , ) +__CMPXCHG_CASE(w, , 4, , , , ) +__CMPXCHG_CASE( , , 8, , , , ) +__CMPXCHG_CASE(w, b, acq_1, , a, , "memory") +__CMPXCHG_CASE(w, h, acq_2, , a, , "memory") +__CMPXCHG_CASE(w, , acq_4, , a, , "memory") +__CMPXCHG_CASE( , , acq_8, , a, , "memory") +__CMPXCHG_CASE(w, b, rel_1, , , l, "memory") +__CMPXCHG_CASE(w, h, rel_2, , , l, "memory") +__CMPXCHG_CASE(w, , rel_4, , , l, "memory") +__CMPXCHG_CASE( , , rel_8, , , l, "memory") +__CMPXCHG_CASE(w, b, mb_1, dmb ish, , l, "memory") +__CMPXCHG_CASE(w, h, mb_2, dmb ish, , l, "memory") +__CMPXCHG_CASE(w, , mb_4, dmb ish, , l, "memory") +__CMPXCHG_CASE( , , mb_8, dmb ish, , l, "memory") #undef __CMPXCHG_CASE diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h index 55d740e63459..1fce7908e690 100644 --- a/arch/arm64/include/asm/atomic_lse.h +++ b/arch/arm64/include/asm/atomic_lse.h @@ -75,24 +75,32 @@ static inline void atomic_add(int i, atomic_t *v) : "x30"); } -static inline int atomic_add_return(int i, atomic_t *v) -{ - register int w0 asm ("w0") = i; - register atomic_t *x1 asm ("x1") = v; +#define ATOMIC_OP_ADD_RETURN(name, mb, cl...) \ +static inline int atomic_add_return##name(int i, atomic_t *v) \ +{ \ + register int w0 asm ("w0") = i; \ + register atomic_t *x1 asm ("x1") = v; \ + \ + asm volatile(ARM64_LSE_ATOMIC_INSN( \ + /* LL/SC */ \ + " nop\n" \ + __LL_SC_ATOMIC(add_return##name), \ + /* LSE atomics */ \ + " ldadd" #mb " %w[i], w30, %[v]\n" \ + " add %w[i], %w[i], w30") \ + : [i] "+r" (w0), [v] "+Q" (v->counter) \ + : "r" (x1) \ + : "x30" , ##cl); \ + \ + return w0; \ +} - asm volatile(ARM64_LSE_ATOMIC_INSN( - /* LL/SC */ - " nop\n" - __LL_SC_ATOMIC(add_return), - /* LSE atomics */ - " ldaddal %w[i], w30, %[v]\n" - " add %w[i], %w[i], w30") - : [i] "+r" (w0), [v] "+Q" (v->counter) - : "r" (x1) - : "x30", "memory"); +ATOMIC_OP_ADD_RETURN(_relaxed, ) +ATOMIC_OP_ADD_RETURN(_acquire, a, "memory") +ATOMIC_OP_ADD_RETURN(_release, l, "memory") +ATOMIC_OP_ADD_RETURN( , al, "memory") - return w0; -} +#undef ATOMIC_OP_ADD_RETURN static inline void atomic_and(int i, atomic_t *v) { @@ -128,27 +136,34 @@ static inline void atomic_sub(int i, atomic_t *v) : "x30"); } -static inline int atomic_sub_return(int i, atomic_t *v) -{ - register int w0 asm ("w0") = i; - register atomic_t *x1 asm ("x1") = v; - - asm volatile(ARM64_LSE_ATOMIC_INSN( - /* LL/SC */ - " nop\n" - __LL_SC_ATOMIC(sub_return) - " nop", - /* LSE atomics */ - " neg %w[i], %w[i]\n" - " ldaddal %w[i], w30, %[v]\n" - " add %w[i], %w[i], w30") - : [i] "+r" (w0), [v] "+Q" (v->counter) - : "r" (x1) - : "x30", "memory"); - - return w0; +#define ATOMIC_OP_SUB_RETURN(name, mb, cl...) \ +static inline int atomic_sub_return##name(int i, atomic_t *v) \ +{ \ + register int w0 asm ("w0") = i; \ + register atomic_t *x1 asm ("x1") = v; \ + \ + asm volatile(ARM64_LSE_ATOMIC_INSN( \ + /* LL/SC */ \ + " nop\n" \ + __LL_SC_ATOMIC(sub_return##name) \ + " nop", \ + /* LSE atomics */ \ + " neg %w[i], %w[i]\n" \ + " ldadd" #mb " %w[i], w30, %[v]\n" \ + " add %w[i], %w[i], w30") \ + : [i] "+r" (w0), [v] "+Q" (v->counter) \ + : "r" (x1) \ + : "x30" , ##cl); \ + \ + return w0; \ } +ATOMIC_OP_SUB_RETURN(_relaxed, ) +ATOMIC_OP_SUB_RETURN(_acquire, a, "memory") +ATOMIC_OP_SUB_RETURN(_release, l, "memory") +ATOMIC_OP_SUB_RETURN( , al, "memory") + +#undef ATOMIC_OP_SUB_RETURN #undef __LL_SC_ATOMIC #define __LL_SC_ATOMIC64(op) __LL_SC_CALL(atomic64_##op) @@ -201,24 +216,32 @@ static inline void atomic64_add(long i, atomic64_t *v) : "x30"); } -static inline long atomic64_add_return(long i, atomic64_t *v) -{ - register long x0 asm ("x0") = i; - register atomic64_t *x1 asm ("x1") = v; +#define ATOMIC64_OP_ADD_RETURN(name, mb, cl...) \ +static inline long atomic64_add_return##name(long i, atomic64_t *v) \ +{ \ + register long x0 asm ("x0") = i; \ + register atomic64_t *x1 asm ("x1") = v; \ + \ + asm volatile(ARM64_LSE_ATOMIC_INSN( \ + /* LL/SC */ \ + " nop\n" \ + __LL_SC_ATOMIC64(add_return##name), \ + /* LSE atomics */ \ + " ldadd" #mb " %[i], x30, %[v]\n" \ + " add %[i], %[i], x30") \ + : [i] "+r" (x0), [v] "+Q" (v->counter) \ + : "r" (x1) \ + : "x30" , ##cl); \ + \ + return x0; \ +} - asm volatile(ARM64_LSE_ATOMIC_INSN( - /* LL/SC */ - " nop\n" - __LL_SC_ATOMIC64(add_return), - /* LSE atomics */ - " ldaddal %[i], x30, %[v]\n" - " add %[i], %[i], x30") - : [i] "+r" (x0), [v] "+Q" (v->counter) - : "r" (x1) - : "x30", "memory"); +ATOMIC64_OP_ADD_RETURN(_relaxed, ) +ATOMIC64_OP_ADD_RETURN(_acquire, a, "memory") +ATOMIC64_OP_ADD_RETURN(_release, l, "memory") +ATOMIC64_OP_ADD_RETURN( , al, "memory") - return x0; -} +#undef ATOMIC64_OP_ADD_RETURN static inline void atomic64_and(long i, atomic64_t *v) { @@ -254,26 +277,34 @@ static inline void atomic64_sub(long i, atomic64_t *v) : "x30"); } -static inline long atomic64_sub_return(long i, atomic64_t *v) -{ - register long x0 asm ("x0") = i; - register atomic64_t *x1 asm ("x1") = v; +#define ATOMIC64_OP_SUB_RETURN(name, mb, cl...) \ +static inline long atomic64_sub_return##name(long i, atomic64_t *v) \ +{ \ + register long x0 asm ("x0") = i; \ + register atomic64_t *x1 asm ("x1") = v; \ + \ + asm volatile(ARM64_LSE_ATOMIC_INSN( \ + /* LL/SC */ \ + " nop\n" \ + __LL_SC_ATOMIC64(sub_return##name) \ + " nop", \ + /* LSE atomics */ \ + " neg %[i], %[i]\n" \ + " ldadd" #mb " %[i], x30, %[v]\n" \ + " add %[i], %[i], x30") \ + : [i] "+r" (x0), [v] "+Q" (v->counter) \ + : "r" (x1) \ + : "x30" , ##cl); \ + \ + return x0; \ +} - asm volatile(ARM64_LSE_ATOMIC_INSN( - /* LL/SC */ - " nop\n" - __LL_SC_ATOMIC64(sub_return) - " nop", - /* LSE atomics */ - " neg %[i], %[i]\n" - " ldaddal %[i], x30, %[v]\n" - " add %[i], %[i], x30") - : [i] "+r" (x0), [v] "+Q" (v->counter) - : "r" (x1) - : "x30", "memory"); +ATOMIC64_OP_SUB_RETURN(_relaxed, ) +ATOMIC64_OP_SUB_RETURN(_acquire, a, "memory") +ATOMIC64_OP_SUB_RETURN(_release, l, "memory") +ATOMIC64_OP_SUB_RETURN( , al, "memory") - return x0; -} +#undef ATOMIC64_OP_SUB_RETURN static inline long atomic64_dec_if_positive(atomic64_t *v) { @@ -333,14 +364,22 @@ static inline unsigned long __cmpxchg_case_##name(volatile void *ptr, \ return x0; \ } -__CMPXCHG_CASE(w, b, 1, ) -__CMPXCHG_CASE(w, h, 2, ) -__CMPXCHG_CASE(w, , 4, ) -__CMPXCHG_CASE(x, , 8, ) -__CMPXCHG_CASE(w, b, mb_1, al, "memory") -__CMPXCHG_CASE(w, h, mb_2, al, "memory") -__CMPXCHG_CASE(w, , mb_4, al, "memory") -__CMPXCHG_CASE(x, , mb_8, al, "memory") +__CMPXCHG_CASE(w, b, 1, ) +__CMPXCHG_CASE(w, h, 2, ) +__CMPXCHG_CASE(w, , 4, ) +__CMPXCHG_CASE(x, , 8, ) +__CMPXCHG_CASE(w, b, acq_1, a, "memory") +__CMPXCHG_CASE(w, h, acq_2, a, "memory") +__CMPXCHG_CASE(w, , acq_4, a, "memory") +__CMPXCHG_CASE(x, , acq_8, a, "memory") +__CMPXCHG_CASE(w, b, rel_1, l, "memory") +__CMPXCHG_CASE(w, h, rel_2, l, "memory") +__CMPXCHG_CASE(w, , rel_4, l, "memory") +__CMPXCHG_CASE(x, , rel_8, l, "memory") +__CMPXCHG_CASE(w, b, mb_1, al, "memory") +__CMPXCHG_CASE(w, h, mb_2, al, "memory") +__CMPXCHG_CASE(w, , mb_4, al, "memory") +__CMPXCHG_CASE(x, , mb_8, al, "memory") #undef __LL_SC_CMPXCHG #undef __CMPXCHG_CASE diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h index bde449936e2f..5082b30bc2c0 100644 --- a/arch/arm64/include/asm/cache.h +++ b/arch/arm64/include/asm/cache.h @@ -18,7 +18,7 @@ #include <asm/cachetype.h> -#define L1_CACHE_SHIFT 6 +#define L1_CACHE_SHIFT 7 #define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) /* diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index c75b8d027eb1..54efedaf331f 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -115,6 +115,13 @@ extern void copy_to_user_page(struct vm_area_struct *, struct page *, #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 extern void flush_dcache_page(struct page *); +static inline void __local_flush_icache_all(void) +{ + asm("ic iallu"); + dsb(nsh); + isb(); +} + static inline void __flush_icache_all(void) { asm("ic ialluis"); diff --git a/arch/arm64/include/asm/cachetype.h b/arch/arm64/include/asm/cachetype.h index da2fc9e3cedd..f5588692f1d4 100644 --- a/arch/arm64/include/asm/cachetype.h +++ b/arch/arm64/include/asm/cachetype.h @@ -34,8 +34,8 @@ #define CTR_L1IP(ctr) (((ctr) >> CTR_L1IP_SHIFT) & CTR_L1IP_MASK) -#define ICACHEF_ALIASING BIT(0) -#define ICACHEF_AIVIVT BIT(1) +#define ICACHEF_ALIASING 0 +#define ICACHEF_AIVIVT 1 extern unsigned long __icache_flags; diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h index 899e9f1d19e4..9ea611ea69df 100644 --- a/arch/arm64/include/asm/cmpxchg.h +++ b/arch/arm64/include/asm/cmpxchg.h @@ -25,154 +25,151 @@ #include <asm/barrier.h> #include <asm/lse.h> -static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size) -{ - unsigned long ret, tmp; - - switch (size) { - case 1: - asm volatile(ARM64_LSE_ATOMIC_INSN( - /* LL/SC */ - " prfm pstl1strm, %2\n" - "1: ldxrb %w0, %2\n" - " stlxrb %w1, %w3, %2\n" - " cbnz %w1, 1b\n" - " dmb ish", - /* LSE atomics */ - " nop\n" - " nop\n" - " swpalb %w3, %w0, %2\n" - " nop\n" - " nop") - : "=&r" (ret), "=&r" (tmp), "+Q" (*(u8 *)ptr) - : "r" (x) - : "memory"); - break; - case 2: - asm volatile(ARM64_LSE_ATOMIC_INSN( - /* LL/SC */ - " prfm pstl1strm, %2\n" - "1: ldxrh %w0, %2\n" - " stlxrh %w1, %w3, %2\n" - " cbnz %w1, 1b\n" - " dmb ish", - /* LSE atomics */ - " nop\n" - " nop\n" - " swpalh %w3, %w0, %2\n" - " nop\n" - " nop") - : "=&r" (ret), "=&r" (tmp), "+Q" (*(u16 *)ptr) - : "r" (x) - : "memory"); - break; - case 4: - asm volatile(ARM64_LSE_ATOMIC_INSN( - /* LL/SC */ - " prfm pstl1strm, %2\n" - "1: ldxr %w0, %2\n" - " stlxr %w1, %w3, %2\n" - " cbnz %w1, 1b\n" - " dmb ish", - /* LSE atomics */ - " nop\n" - " nop\n" - " swpal %w3, %w0, %2\n" - " nop\n" - " nop") - : "=&r" (ret), "=&r" (tmp), "+Q" (*(u32 *)ptr) - : "r" (x) - : "memory"); - break; - case 8: - asm volatile(ARM64_LSE_ATOMIC_INSN( - /* LL/SC */ - " prfm pstl1strm, %2\n" - "1: ldxr %0, %2\n" - " stlxr %w1, %3, %2\n" - " cbnz %w1, 1b\n" - " dmb ish", - /* LSE atomics */ - " nop\n" - " nop\n" - " swpal %3, %0, %2\n" - " nop\n" - " nop") - : "=&r" (ret), "=&r" (tmp), "+Q" (*(u64 *)ptr) - : "r" (x) - : "memory"); - break; - default: - BUILD_BUG(); - } - - return ret; +/* + * We need separate acquire parameters for ll/sc and lse, since the full + * barrier case is generated as release+dmb for the former and + * acquire+release for the latter. + */ +#define __XCHG_CASE(w, sz, name, mb, nop_lse, acq, acq_lse, rel, cl) \ +static inline unsigned long __xchg_case_##name(unsigned long x, \ + volatile void *ptr) \ +{ \ + unsigned long ret, tmp; \ + \ + asm volatile(ARM64_LSE_ATOMIC_INSN( \ + /* LL/SC */ \ + " prfm pstl1strm, %2\n" \ + "1: ld" #acq "xr" #sz "\t%" #w "0, %2\n" \ + " st" #rel "xr" #sz "\t%w1, %" #w "3, %2\n" \ + " cbnz %w1, 1b\n" \ + " " #mb, \ + /* LSE atomics */ \ + " nop\n" \ + " nop\n" \ + " swp" #acq_lse #rel #sz "\t%" #w "3, %" #w "0, %2\n" \ + " nop\n" \ + " " #nop_lse) \ + : "=&r" (ret), "=&r" (tmp), "+Q" (*(u8 *)ptr) \ + : "r" (x) \ + : cl); \ + \ + return ret; \ } -#define xchg(ptr,x) \ -({ \ - __typeof__(*(ptr)) __ret; \ - __ret = (__typeof__(*(ptr))) \ - __xchg((unsigned long)(x), (ptr), sizeof(*(ptr))); \ - __ret; \ +__XCHG_CASE(w, b, 1, , , , , , ) +__XCHG_CASE(w, h, 2, , , , , , ) +__XCHG_CASE(w, , 4, , , , , , ) +__XCHG_CASE( , , 8, , , , , , ) +__XCHG_CASE(w, b, acq_1, , , a, a, , "memory") +__XCHG_CASE(w, h, acq_2, , , a, a, , "memory") +__XCHG_CASE(w, , acq_4, , , a, a, , "memory") +__XCHG_CASE( , , acq_8, , , a, a, , "memory") +__XCHG_CASE(w, b, rel_1, , , , , l, "memory") +__XCHG_CASE(w, h, rel_2, , , , , l, "memory") +__XCHG_CASE(w, , rel_4, , , , , l, "memory") +__XCHG_CASE( , , rel_8, , , , , l, "memory") +__XCHG_CASE(w, b, mb_1, dmb ish, nop, , a, l, "memory") +__XCHG_CASE(w, h, mb_2, dmb ish, nop, , a, l, "memory") +__XCHG_CASE(w, , mb_4, dmb ish, nop, , a, l, "memory") +__XCHG_CASE( , , mb_8, dmb ish, nop, , a, l, "memory") + +#undef __XCHG_CASE + +#define __XCHG_GEN(sfx) \ +static inline unsigned long __xchg##sfx(unsigned long x, \ + volatile void *ptr, \ + int size) \ +{ \ + switch (size) { \ + case 1: \ + return __xchg_case##sfx##_1(x, ptr); \ + case 2: \ + return __xchg_case##sfx##_2(x, ptr); \ + case 4: \ + return __xchg_case##sfx##_4(x, ptr); \ + case 8: \ + return __xchg_case##sfx##_8(x, ptr); \ + default: \ + BUILD_BUG(); \ + } \ + \ + unreachable(); \ +} + +__XCHG_GEN() +__XCHG_GEN(_acq) +__XCHG_GEN(_rel) +__XCHG_GEN(_mb) + +#undef __XCHG_GEN + +#define __xchg_wrapper(sfx, ptr, x) \ +({ \ + __typeof__(*(ptr)) __ret; \ + __ret = (__typeof__(*(ptr))) \ + __xchg##sfx((unsigned long)(x), (ptr), sizeof(*(ptr))); \ + __ret; \ }) -static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, - unsigned long new, int size) -{ - switch (size) { - case 1: - return __cmpxchg_case_1(ptr, (u8)old, new); - case 2: - return __cmpxchg_case_2(ptr, (u16)old, new); - case 4: - return __cmpxchg_case_4(ptr, old, new); - case 8: - return __cmpxchg_case_8(ptr, old, new); - default: - BUILD_BUG(); - } - - unreachable(); +/* xchg */ +#define xchg_relaxed(...) __xchg_wrapper( , __VA_ARGS__) +#define xchg_acquire(...) __xchg_wrapper(_acq, __VA_ARGS__) +#define xchg_release(...) __xchg_wrapper(_rel, __VA_ARGS__) +#define xchg(...) __xchg_wrapper( _mb, __VA_ARGS__) + +#define __CMPXCHG_GEN(sfx) \ +static inline unsigned long __cmpxchg##sfx(volatile void *ptr, \ + unsigned long old, \ + unsigned long new, \ + int size) \ +{ \ + switch (size) { \ + case 1: \ + return __cmpxchg_case##sfx##_1(ptr, (u8)old, new); \ + case 2: \ + return __cmpxchg_case##sfx##_2(ptr, (u16)old, new); \ + case 4: \ + return __cmpxchg_case##sfx##_4(ptr, old, new); \ + case 8: \ + return __cmpxchg_case##sfx##_8(ptr, old, new); \ + default: \ + BUILD_BUG(); \ + } \ + \ + unreachable(); \ } -static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old, - unsigned long new, int size) -{ - switch (size) { - case 1: - return __cmpxchg_case_mb_1(ptr, (u8)old, new); - case 2: - return __cmpxchg_case_mb_2(ptr, (u16)old, new); - case 4: - return __cmpxchg_case_mb_4(ptr, old, new); - case 8: - return __cmpxchg_case_mb_8(ptr, old, new); - default: - BUILD_BUG(); - } - - unreachable(); -} +__CMPXCHG_GEN() +__CMPXCHG_GEN(_acq) +__CMPXCHG_GEN(_rel) +__CMPXCHG_GEN(_mb) -#define cmpxchg(ptr, o, n) \ -({ \ - __typeof__(*(ptr)) __ret; \ - __ret = (__typeof__(*(ptr))) \ - __cmpxchg_mb((ptr), (unsigned long)(o), (unsigned long)(n), \ - sizeof(*(ptr))); \ - __ret; \ -}) +#undef __CMPXCHG_GEN -#define cmpxchg_local(ptr, o, n) \ -({ \ - __typeof__(*(ptr)) __ret; \ - __ret = (__typeof__(*(ptr))) \ - __cmpxchg((ptr), (unsigned long)(o), \ - (unsigned long)(n), sizeof(*(ptr))); \ - __ret; \ +#define __cmpxchg_wrapper(sfx, ptr, o, n) \ +({ \ + __typeof__(*(ptr)) __ret; \ + __ret = (__typeof__(*(ptr))) \ + __cmpxchg##sfx((ptr), (unsigned long)(o), \ + (unsigned long)(n), sizeof(*(ptr))); \ + __ret; \ }) +/* cmpxchg */ +#define cmpxchg_relaxed(...) __cmpxchg_wrapper( , __VA_ARGS__) +#define cmpxchg_acquire(...) __cmpxchg_wrapper(_acq, __VA_ARGS__) +#define cmpxchg_release(...) __cmpxchg_wrapper(_rel, __VA_ARGS__) +#define cmpxchg(...) __cmpxchg_wrapper( _mb, __VA_ARGS__) +#define cmpxchg_local cmpxchg_relaxed + +/* cmpxchg64 */ +#define cmpxchg64_relaxed cmpxchg_relaxed +#define cmpxchg64_acquire cmpxchg_acquire +#define cmpxchg64_release cmpxchg_release +#define cmpxchg64 cmpxchg +#define cmpxchg64_local cmpxchg_local + +/* cmpxchg_double */ #define system_has_cmpxchg_double() 1 #define __cmpxchg_double_check(ptr1, ptr2) \ @@ -202,6 +199,7 @@ static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old, __ret; \ }) +/* this_cpu_cmpxchg */ #define _protect_cmpxchg_local(pcp, o, n) \ ({ \ typeof(*raw_cpu_ptr(&(pcp))) __ret; \ @@ -227,9 +225,4 @@ static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old, __ret; \ }) -#define cmpxchg64(ptr,o,n) cmpxchg((ptr),(o),(n)) -#define cmpxchg64_local(ptr,o,n) cmpxchg_local((ptr),(o),(n)) - -#define cmpxchg64_relaxed(ptr,o,n) cmpxchg_local((ptr),(o),(n)) - #endif /* __ASM_CMPXCHG_H */ diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h index 8e797b2fcc01..b5e9cee4b5f8 100644 --- a/arch/arm64/include/asm/cpu.h +++ b/arch/arm64/include/asm/cpu.h @@ -63,4 +63,8 @@ DECLARE_PER_CPU(struct cpuinfo_arm64, cpu_data); void cpuinfo_store_cpu(void); void __init cpuinfo_store_boot_cpu(void); +void __init init_cpu_features(struct cpuinfo_arm64 *info); +void update_cpu_features(int cpu, struct cpuinfo_arm64 *info, + struct cpuinfo_arm64 *boot); + #endif /* __ASM_CPU_H */ diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 171570702bb8..11d5bb0fdd54 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -10,6 +10,7 @@ #define __ASM_CPUFEATURE_H #include <asm/hwcap.h> +#include <asm/sysreg.h> /* * In the arm64 world (as in the ARM world), elf_hwcap is used both internally @@ -27,18 +28,50 @@ #define ARM64_HAS_SYSREG_GIC_CPUIF 3 #define ARM64_HAS_PAN 4 #define ARM64_HAS_LSE_ATOMICS 5 +#define ARM64_WORKAROUND_CAVIUM_23154 6 -#define ARM64_NCAPS 6 +#define ARM64_NCAPS 7 #ifndef __ASSEMBLY__ #include <linux/kernel.h> +/* CPU feature register tracking */ +enum ftr_type { + FTR_EXACT, /* Use a predefined safe value */ + FTR_LOWER_SAFE, /* Smaller value is safe */ + FTR_HIGHER_SAFE,/* Bigger value is safe */ +}; + +#define FTR_STRICT true /* SANITY check strict matching required */ +#define FTR_NONSTRICT false /* SANITY check ignored */ + +struct arm64_ftr_bits { + bool strict; /* CPU Sanity check: strict matching required ? */ + enum ftr_type type; + u8 shift; + u8 width; + s64 safe_val; /* safe value for discrete features */ +}; + +/* + * @arm64_ftr_reg - Feature register + * @strict_mask Bits which should match across all CPUs for sanity. + * @sys_val Safe value across the CPUs (system view) + */ +struct arm64_ftr_reg { + u32 sys_id; + const char *name; + u64 strict_mask; + u64 sys_val; + struct arm64_ftr_bits *ftr_bits; +}; + struct arm64_cpu_capabilities { const char *desc; u16 capability; bool (*matches)(const struct arm64_cpu_capabilities *); - void (*enable)(void); + void (*enable)(void *); /* Called on all active CPUs */ union { struct { /* To be used for erratum handling only */ u32 midr_model; @@ -46,8 +79,11 @@ struct arm64_cpu_capabilities { }; struct { /* Feature register checking */ + u32 sys_reg; int field_pos; int min_field_value; + int hwcap_type; + unsigned long hwcap; }; }; }; @@ -75,19 +111,59 @@ static inline void cpus_set_cap(unsigned int num) __set_bit(num, cpu_hwcaps); } -static inline int __attribute_const__ cpuid_feature_extract_field(u64 features, - int field) +static inline int __attribute_const__ +cpuid_feature_extract_field_width(u64 features, int field, int width) +{ + return (s64)(features << (64 - width - field)) >> (64 - width); +} + +static inline int __attribute_const__ +cpuid_feature_extract_field(u64 features, int field) +{ + return cpuid_feature_extract_field_width(features, field, 4); +} + +static inline u64 arm64_ftr_mask(struct arm64_ftr_bits *ftrp) +{ + return (u64)GENMASK(ftrp->shift + ftrp->width - 1, ftrp->shift); +} + +static inline s64 arm64_ftr_value(struct arm64_ftr_bits *ftrp, u64 val) +{ + return cpuid_feature_extract_field_width(val, ftrp->shift, ftrp->width); +} + +static inline bool id_aa64mmfr0_mixed_endian_el0(u64 mmfr0) { - return (s64)(features << (64 - 4 - field)) >> (64 - 4); + return cpuid_feature_extract_field(mmfr0, ID_AA64MMFR0_BIGENDEL_SHIFT) == 0x1 || + cpuid_feature_extract_field(mmfr0, ID_AA64MMFR0_BIGENDEL0_SHIFT) == 0x1; } +void __init setup_cpu_features(void); -void check_cpu_capabilities(const struct arm64_cpu_capabilities *caps, +void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, const char *info); void check_local_cpu_errata(void); -void check_local_cpu_features(void); -bool cpu_supports_mixed_endian_el0(void); -bool system_supports_mixed_endian_el0(void); + +#ifdef CONFIG_HOTPLUG_CPU +void verify_local_cpu_capabilities(void); +#else +static inline void verify_local_cpu_capabilities(void) +{ +} +#endif + +u64 read_system_reg(u32 id); + +static inline bool cpu_supports_mixed_endian_el0(void) +{ + return id_aa64mmfr0_mixed_endian_el0(read_cpuid(ID_AA64MMFR0_EL1)); +} + +static inline bool system_supports_mixed_endian_el0(void) +{ + return id_aa64mmfr0_mixed_endian_el0(read_system_reg(SYS_ID_AA64MMFR0_EL1)); +} #endif /* __ASSEMBLY__ */ diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index ee6403df9fe4..1a5949364ed0 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -62,24 +62,18 @@ (0xf << MIDR_ARCHITECTURE_SHIFT) | \ ((partnum) << MIDR_PARTNUM_SHIFT)) -#define ARM_CPU_IMP_ARM 0x41 -#define ARM_CPU_IMP_APM 0x50 +#define ARM_CPU_IMP_ARM 0x41 +#define ARM_CPU_IMP_APM 0x50 +#define ARM_CPU_IMP_CAVIUM 0x43 -#define ARM_CPU_PART_AEM_V8 0xD0F -#define ARM_CPU_PART_FOUNDATION 0xD00 -#define ARM_CPU_PART_CORTEX_A57 0xD07 -#define ARM_CPU_PART_CORTEX_A53 0xD03 +#define ARM_CPU_PART_AEM_V8 0xD0F +#define ARM_CPU_PART_FOUNDATION 0xD00 +#define ARM_CPU_PART_CORTEX_A57 0xD07 +#define ARM_CPU_PART_CORTEX_A53 0xD03 -#define APM_CPU_PART_POTENZA 0x000 +#define APM_CPU_PART_POTENZA 0x000 -#define ID_AA64MMFR0_BIGENDEL0_SHIFT 16 -#define ID_AA64MMFR0_BIGENDEL0_MASK (0xf << ID_AA64MMFR0_BIGENDEL0_SHIFT) -#define ID_AA64MMFR0_BIGENDEL0(mmfr0) \ - (((mmfr0) & ID_AA64MMFR0_BIGENDEL0_MASK) >> ID_AA64MMFR0_BIGENDEL0_SHIFT) -#define ID_AA64MMFR0_BIGEND_SHIFT 8 -#define ID_AA64MMFR0_BIGEND_MASK (0xf << ID_AA64MMFR0_BIGEND_SHIFT) -#define ID_AA64MMFR0_BIGEND(mmfr0) \ - (((mmfr0) & ID_AA64MMFR0_BIGEND_MASK) >> ID_AA64MMFR0_BIGEND_SHIFT) +#define CAVIUM_CPU_PART_THUNDERX 0x0A1 #ifndef __ASSEMBLY__ @@ -112,12 +106,6 @@ static inline u32 __attribute_const__ read_cpuid_cachetype(void) { return read_cpuid(CTR_EL0); } - -static inline bool id_aa64mmfr0_mixed_endian_el0(u64 mmfr0) -{ - return (ID_AA64MMFR0_BIGEND(mmfr0) == 0x1) || - (ID_AA64MMFR0_BIGENDEL0(mmfr0) == 0x1); -} #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/arm64/include/asm/dcc.h b/arch/arm64/include/asm/dcc.h new file mode 100644 index 000000000000..65e0190e97c8 --- /dev/null +++ b/arch/arm64/include/asm/dcc.h @@ -0,0 +1,55 @@ +/* Copyright (c) 2014-2015 The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A call to __dcc_getchar() or __dcc_putchar() is typically followed by + * a call to __dcc_getstatus(). We want to make sure that the CPU does + * not speculative read the DCC status before executing the read or write + * instruction. That's what the ISBs are for. + * + * The 'volatile' ensures that the compiler does not cache the status bits, + * and instead reads the DCC register every time. + */ +#ifndef __ASM_DCC_H +#define __ASM_DCC_H + +#include <asm/barrier.h> + +static inline u32 __dcc_getstatus(void) +{ + u32 ret; + + asm volatile("mrs %0, mdccsr_el0" : "=r" (ret)); + + return ret; +} + +static inline char __dcc_getchar(void) +{ + char c; + + asm volatile("mrs %0, dbgdtrrx_el0" : "=r" (c)); + isb(); + + return c; +} + +static inline void __dcc_putchar(char c) +{ + /* + * The typecast is to make absolutely certain that 'c' is + * zero-extended. + */ + asm volatile("msr dbgdtrtx_el0, %0" + : : "r" ((unsigned long)(unsigned char)c)); + isb(); +} + +#endif diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h index cfdb34bedbcd..54d0ead41afc 100644 --- a/arch/arm64/include/asm/dma-mapping.h +++ b/arch/arm64/include/asm/dma-mapping.h @@ -54,16 +54,15 @@ static inline struct dma_map_ops *get_dma_ops(struct device *dev) return __generic_dma_ops(dev); } -static inline void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, - struct iommu_ops *iommu, bool coherent) -{ - if (!acpi_disabled && !dev->archdata.dma_ops) - dev->archdata.dma_ops = dma_ops; - - dev->archdata.dma_coherent = coherent; -} +void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, + struct iommu_ops *iommu, bool coherent); #define arch_setup_dma_ops arch_setup_dma_ops +#ifdef CONFIG_IOMMU_DMA +void arch_teardown_dma_ops(struct device *dev); +#define arch_teardown_dma_ops arch_teardown_dma_ops +#endif + /* do not use this function in a driver */ static inline bool is_device_dma_coherent(struct device *dev) { diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h index 8b9884c726ad..309704544d22 100644 --- a/arch/arm64/include/asm/fixmap.h +++ b/arch/arm64/include/asm/fixmap.h @@ -17,6 +17,7 @@ #ifndef __ASSEMBLY__ #include <linux/kernel.h> +#include <linux/sizes.h> #include <asm/boot.h> #include <asm/page.h> @@ -55,11 +56,7 @@ enum fixed_addresses { * Temporary boot-time mappings, used by early_ioremap(), * before ioremap() is functional. */ -#ifdef CONFIG_ARM64_64K_PAGES -#define NR_FIX_BTMAPS 4 -#else -#define NR_FIX_BTMAPS 64 -#endif +#define NR_FIX_BTMAPS (SZ_256K / PAGE_SIZE) #define FIX_BTMAPS_SLOTS 7 #define TOTAL_FIX_BTMAPS (NR_FIX_BTMAPS * FIX_BTMAPS_SLOTS) diff --git a/arch/arm64/include/asm/hw_breakpoint.h b/arch/arm64/include/asm/hw_breakpoint.h index 4c47cb2fbb52..e54415ec6935 100644 --- a/arch/arm64/include/asm/hw_breakpoint.h +++ b/arch/arm64/include/asm/hw_breakpoint.h @@ -17,6 +17,7 @@ #define __ASM_HW_BREAKPOINT_H #include <asm/cputype.h> +#include <asm/cpufeature.h> #ifdef __KERNEL__ @@ -137,13 +138,17 @@ extern struct pmu perf_ops_bp; /* Determine number of BRP registers available. */ static inline int get_num_brps(void) { - return ((read_cpuid(ID_AA64DFR0_EL1) >> 12) & 0xf) + 1; + return 1 + + cpuid_feature_extract_field(read_system_reg(SYS_ID_AA64DFR0_EL1), + ID_AA64DFR0_BRPS_SHIFT); } /* Determine number of WRP registers available. */ static inline int get_num_wrps(void) { - return ((read_cpuid(ID_AA64DFR0_EL1) >> 20) & 0xf) + 1; + return 1 + + cpuid_feature_extract_field(read_system_reg(SYS_ID_AA64DFR0_EL1), + ID_AA64DFR0_WRPS_SHIFT); } #endif /* __KERNEL__ */ diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index 0ad735166d9f..400b80b49595 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -52,6 +52,14 @@ extern unsigned int compat_elf_hwcap, compat_elf_hwcap2; #endif +enum { + CAP_HWCAP = 1, +#ifdef CONFIG_COMPAT + CAP_COMPAT_HWCAP, + CAP_COMPAT_HWCAP2, +#endif +}; + extern unsigned long elf_hwcap; #endif #endif diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h index bbb251b14746..23eb450b820b 100644 --- a/arch/arm64/include/asm/irq.h +++ b/arch/arm64/include/asm/irq.h @@ -1,24 +1,10 @@ #ifndef __ASM_IRQ_H #define __ASM_IRQ_H -#include <linux/irqchip/arm-gic-acpi.h> - #include <asm-generic/irq.h> struct pt_regs; -extern void migrate_irqs(void); extern void set_handle_irq(void (*handle_irq)(struct pt_regs *)); -static inline void acpi_irq_init(void) -{ - /* - * Hardcode ACPI IRQ chip initialization to GICv2 for now. - * Proper irqchip infrastructure will be implemented along with - * incoming GICv2m|GICv3|ITS bits. - */ - acpi_gic_init(); -} -#define acpi_irq_init acpi_irq_init - #endif diff --git a/arch/arm64/include/asm/kasan.h b/arch/arm64/include/asm/kasan.h new file mode 100644 index 000000000000..2774fa384c47 --- /dev/null +++ b/arch/arm64/include/asm/kasan.h @@ -0,0 +1,38 @@ +#ifndef __ASM_KASAN_H +#define __ASM_KASAN_H + +#ifndef __ASSEMBLY__ + +#ifdef CONFIG_KASAN + +#include <linux/linkage.h> +#include <asm/memory.h> + +/* + * KASAN_SHADOW_START: beginning of the kernel virtual addresses. + * KASAN_SHADOW_END: KASAN_SHADOW_START + 1/8 of kernel virtual addresses. + */ +#define KASAN_SHADOW_START (VA_START) +#define KASAN_SHADOW_END (KASAN_SHADOW_START + (1UL << (VA_BITS - 3))) + +/* + * This value is used to map an address to the corresponding shadow + * address by the following formula: + * shadow_addr = (address >> 3) + KASAN_SHADOW_OFFSET; + * + * (1 << 61) shadow addresses - [KASAN_SHADOW_OFFSET,KASAN_SHADOW_END] + * cover all 64-bits of virtual addresses. So KASAN_SHADOW_OFFSET + * should satisfy the following equation: + * KASAN_SHADOW_OFFSET = KASAN_SHADOW_END - (1ULL << 61) + */ +#define KASAN_SHADOW_OFFSET (KASAN_SHADOW_END - (1ULL << (64 - 3))) + +void kasan_init(void); +asmlinkage void kasan_early_init(void); + +#else +static inline void kasan_init(void) { } +#endif + +#endif +#endif diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h new file mode 100644 index 000000000000..a459714ee29e --- /dev/null +++ b/arch/arm64/include/asm/kernel-pgtable.h @@ -0,0 +1,83 @@ +/* + * Kernel page table mapping + * + * Copyright (C) 2015 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef __ASM_KERNEL_PGTABLE_H +#define __ASM_KERNEL_PGTABLE_H + + +/* + * The linear mapping and the start of memory are both 2M aligned (per + * the arm64 booting.txt requirements). Hence we can use section mapping + * with 4K (section size = 2M) but not with 16K (section size = 32M) or + * 64K (section size = 512M). + */ +#ifdef CONFIG_ARM64_4K_PAGES +#define ARM64_SWAPPER_USES_SECTION_MAPS 1 +#else +#define ARM64_SWAPPER_USES_SECTION_MAPS 0 +#endif + +/* + * The idmap and swapper page tables need some space reserved in the kernel + * image. Both require pgd, pud (4 levels only) and pmd tables to (section) + * map the kernel. With the 64K page configuration, swapper and idmap need to + * map to pte level. The swapper also maps the FDT (see __create_page_tables + * for more information). Note that the number of ID map translation levels + * could be increased on the fly if system RAM is out of reach for the default + * VA range, so pages required to map highest possible PA are reserved in all + * cases. + */ +#if ARM64_SWAPPER_USES_SECTION_MAPS +#define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS - 1) +#define IDMAP_PGTABLE_LEVELS (ARM64_HW_PGTABLE_LEVELS(PHYS_MASK_SHIFT) - 1) +#else +#define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS) +#define IDMAP_PGTABLE_LEVELS (ARM64_HW_PGTABLE_LEVELS(PHYS_MASK_SHIFT)) +#endif + +#define SWAPPER_DIR_SIZE (SWAPPER_PGTABLE_LEVELS * PAGE_SIZE) +#define IDMAP_DIR_SIZE (IDMAP_PGTABLE_LEVELS * PAGE_SIZE) + +/* Initial memory map size */ +#if ARM64_SWAPPER_USES_SECTION_MAPS +#define SWAPPER_BLOCK_SHIFT SECTION_SHIFT +#define SWAPPER_BLOCK_SIZE SECTION_SIZE +#define SWAPPER_TABLE_SHIFT PUD_SHIFT +#else +#define SWAPPER_BLOCK_SHIFT PAGE_SHIFT +#define SWAPPER_BLOCK_SIZE PAGE_SIZE +#define SWAPPER_TABLE_SHIFT PMD_SHIFT +#endif + +/* The size of the initial kernel direct mapping */ +#define SWAPPER_INIT_MAP_SIZE (_AC(1, UL) << SWAPPER_TABLE_SHIFT) + +/* + * Initial memory map attributes. + */ +#define SWAPPER_PTE_FLAGS (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED) +#define SWAPPER_PMD_FLAGS (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S) + +#if ARM64_SWAPPER_USES_SECTION_MAPS +#define SWAPPER_MM_MMUFLAGS (PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS) +#else +#define SWAPPER_MM_MMUFLAGS (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS) +#endif + + +#endif /* __ASM_KERNEL_PGTABLE_H */ diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 9694f2654593..5e6857b6bdc4 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -200,4 +200,20 @@ /* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */ #define HPFAR_MASK (~UL(0xf)) +#define kvm_arm_exception_type \ + {0, "IRQ" }, \ + {1, "TRAP" } + +#define ECN(x) { ESR_ELx_EC_##x, #x } + +#define kvm_arm_exception_class \ + ECN(UNKNOWN), ECN(WFx), ECN(CP15_32), ECN(CP15_64), ECN(CP14_MR), \ + ECN(CP14_LS), ECN(FP_ASIMD), ECN(CP10_ID), ECN(CP14_64), ECN(SVC64), \ + ECN(HVC64), ECN(SMC64), ECN(SYS64), ECN(IMP_DEF), ECN(IABT_LOW), \ + ECN(IABT_CUR), ECN(PC_ALIGN), ECN(DABT_LOW), ECN(DABT_CUR), \ + ECN(SP_ALIGN), ECN(FP_EXC32), ECN(FP_EXC64), ECN(SERROR), \ + ECN(BREAKPT_LOW), ECN(BREAKPT_CUR), ECN(SOFTSTP_LOW), \ + ECN(SOFTSTP_CUR), ECN(WATCHPT_LOW), ECN(WATCHPT_CUR), \ + ECN(BKPT32), ECN(VECTOR32), ECN(BRK64) + #endif /* __ARM64_KVM_ARM_H__ */ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index ed039688c221..a35ce7266aac 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -149,7 +149,10 @@ struct kvm_vcpu_arch { u32 mdscr_el1; } guest_debug_preserved; - /* Don't run the guest */ + /* vcpu power-off state */ + bool power_off; + + /* Don't run the guest (internal implementation need) */ bool pause; /* IO related fields */ diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 6b4c3ad75a2a..853953cd1f08 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -42,12 +42,14 @@ * PAGE_OFFSET - the virtual address of the start of the kernel image (top * (VA_BITS - 1)) * VA_BITS - the maximum number of bits for virtual addresses. + * VA_START - the first kernel virtual address. * TASK_SIZE - the maximum size of a user space task. * TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area. * The module space lives between the addresses given by TASK_SIZE * and PAGE_OFFSET - it must be within 128MB of the kernel text. */ #define VA_BITS (CONFIG_ARM64_VA_BITS) +#define VA_START (UL(0xffffffffffffffff) << VA_BITS) #define PAGE_OFFSET (UL(0xffffffffffffffff) << (VA_BITS - 1)) #define MODULES_END (PAGE_OFFSET) #define MODULES_VADDR (MODULES_END - SZ_64M) @@ -68,10 +70,6 @@ #define TASK_UNMAPPED_BASE (PAGE_ALIGN(TASK_SIZE / 4)) -#if TASK_SIZE_64 > MODULES_VADDR -#error Top of 64-bit user space clashes with start of module space -#endif - /* * Physical vs virtual RAM address space conversion. These are * private definitions which should NOT be used outside memory.h @@ -94,6 +92,7 @@ #define MT_DEVICE_GRE 2 #define MT_NORMAL_NC 3 #define MT_NORMAL 4 +#define MT_NORMAL_WT 5 /* * Memory types for Stage-2 translation diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index 030208767185..990124a67eeb 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -17,15 +17,16 @@ #define __ASM_MMU_H typedef struct { - unsigned int id; - raw_spinlock_t id_lock; - void *vdso; + atomic64_t id; + void *vdso; } mm_context_t; -#define INIT_MM_CONTEXT(name) \ - .context.id_lock = __RAW_SPIN_LOCK_UNLOCKED(name.context.id_lock), - -#define ASID(mm) ((mm)->context.id & 0xffff) +/* + * This macro is only used by the TLBI code, which cannot race with an + * ASID change and therefore doesn't need to reload the counter using + * atomic64_read. + */ +#define ASID(mm) ((mm)->context.id.counter & 0xffff) extern void paging_init(void); extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt); diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index 8ec41e5f56f0..c0e87898ba96 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -28,13 +28,6 @@ #include <asm/cputype.h> #include <asm/pgtable.h> -#define MAX_ASID_BITS 16 - -extern unsigned int cpu_last_asid; - -void __init_new_context(struct task_struct *tsk, struct mm_struct *mm); -void __new_context(struct mm_struct *mm); - #ifdef CONFIG_PID_IN_CONTEXTIDR static inline void contextidr_thread_switch(struct task_struct *next) { @@ -77,96 +70,38 @@ static inline bool __cpu_uses_extended_idmap(void) unlikely(idmap_t0sz != TCR_T0SZ(VA_BITS))); } -static inline void __cpu_set_tcr_t0sz(u64 t0sz) -{ - unsigned long tcr; - - if (__cpu_uses_extended_idmap()) - asm volatile ( - " mrs %0, tcr_el1 ;" - " bfi %0, %1, %2, %3 ;" - " msr tcr_el1, %0 ;" - " isb" - : "=&r" (tcr) - : "r"(t0sz), "I"(TCR_T0SZ_OFFSET), "I"(TCR_TxSZ_WIDTH)); -} - -/* - * Set TCR.T0SZ to the value appropriate for activating the identity map. - */ -static inline void cpu_set_idmap_tcr_t0sz(void) -{ - __cpu_set_tcr_t0sz(idmap_t0sz); -} - /* * Set TCR.T0SZ to its default value (based on VA_BITS) */ static inline void cpu_set_default_tcr_t0sz(void) { - __cpu_set_tcr_t0sz(TCR_T0SZ(VA_BITS)); -} - -static inline void switch_new_context(struct mm_struct *mm) -{ - unsigned long flags; - - __new_context(mm); + unsigned long tcr; - local_irq_save(flags); - cpu_switch_mm(mm->pgd, mm); - local_irq_restore(flags); -} + if (!__cpu_uses_extended_idmap()) + return; -static inline void check_and_switch_context(struct mm_struct *mm, - struct task_struct *tsk) -{ - /* - * Required during context switch to avoid speculative page table - * walking with the wrong TTBR. - */ - cpu_set_reserved_ttbr0(); - - if (!((mm->context.id ^ cpu_last_asid) >> MAX_ASID_BITS)) - /* - * The ASID is from the current generation, just switch to the - * new pgd. This condition is only true for calls from - * context_switch() and interrupts are already disabled. - */ - cpu_switch_mm(mm->pgd, mm); - else if (irqs_disabled()) - /* - * Defer the new ASID allocation until after the context - * switch critical region since __new_context() cannot be - * called with interrupts disabled. - */ - set_ti_thread_flag(task_thread_info(tsk), TIF_SWITCH_MM); - else - /* - * That is a direct call to switch_mm() or activate_mm() with - * interrupts enabled and a new context. - */ - switch_new_context(mm); + asm volatile ( + " mrs %0, tcr_el1 ;" + " bfi %0, %1, %2, %3 ;" + " msr tcr_el1, %0 ;" + " isb" + : "=&r" (tcr) + : "r"(TCR_T0SZ(VA_BITS)), "I"(TCR_T0SZ_OFFSET), "I"(TCR_TxSZ_WIDTH)); } -#define init_new_context(tsk,mm) (__init_new_context(tsk,mm),0) +/* + * It would be nice to return ASIDs back to the allocator, but unfortunately + * that introduces a race with a generation rollover where we could erroneously + * free an ASID allocated in a future generation. We could workaround this by + * freeing the ASID from the context of the dying mm (e.g. in arch_exit_mmap), + * but we'd then need to make sure that we didn't dirty any TLBs afterwards. + * Setting a reserved TTBR0 or EPD0 would work, but it all gets ugly when you + * take CPU migration into account. + */ #define destroy_context(mm) do { } while(0) +void check_and_switch_context(struct mm_struct *mm, unsigned int cpu); -#define finish_arch_post_lock_switch \ - finish_arch_post_lock_switch -static inline void finish_arch_post_lock_switch(void) -{ - if (test_and_clear_thread_flag(TIF_SWITCH_MM)) { - struct mm_struct *mm = current->mm; - unsigned long flags; - - __new_context(mm); - - local_irq_save(flags); - cpu_switch_mm(mm->pgd, mm); - local_irq_restore(flags); - } -} +#define init_new_context(tsk,mm) ({ atomic64_set(&mm->context.id, 0); 0; }) /* * This is called when "tsk" is about to enter lazy TLB mode. @@ -194,6 +129,9 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next, { unsigned int cpu = smp_processor_id(); + if (prev == next) + return; + /* * init_mm.pgd does not contain any user mappings and it is always * active for kernel addresses in TTBR1. Just set the reserved TTBR0. @@ -203,8 +141,7 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next, return; } - if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next)) || prev != next) - check_and_switch_context(next, tsk); + check_and_switch_context(next, cpu); } #define deactivate_mm(tsk,mm) do { } while (0) diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h index 7d9c7e4a424b..9b2f5a9d019d 100644 --- a/arch/arm64/include/asm/page.h +++ b/arch/arm64/include/asm/page.h @@ -20,31 +20,22 @@ #define __ASM_PAGE_H /* PAGE_SHIFT determines the page size */ +/* CONT_SHIFT determines the number of pages which can be tracked together */ #ifdef CONFIG_ARM64_64K_PAGES #define PAGE_SHIFT 16 +#define CONT_SHIFT 5 +#elif defined(CONFIG_ARM64_16K_PAGES) +#define PAGE_SHIFT 14 +#define CONT_SHIFT 7 #else #define PAGE_SHIFT 12 +#define CONT_SHIFT 4 #endif -#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT) +#define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT) #define PAGE_MASK (~(PAGE_SIZE-1)) -/* - * The idmap and swapper page tables need some space reserved in the kernel - * image. Both require pgd, pud (4 levels only) and pmd tables to (section) - * map the kernel. With the 64K page configuration, swapper and idmap need to - * map to pte level. The swapper also maps the FDT (see __create_page_tables - * for more information). Note that the number of ID map translation levels - * could be increased on the fly if system RAM is out of reach for the default - * VA range, so 3 pages are reserved in all cases. - */ -#ifdef CONFIG_ARM64_64K_PAGES -#define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS) -#else -#define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS - 1) -#endif - -#define SWAPPER_DIR_SIZE (SWAPPER_PGTABLE_LEVELS * PAGE_SIZE) -#define IDMAP_DIR_SIZE (3 * PAGE_SIZE) +#define CONT_SIZE (_AC(1, UL) << (CONT_SHIFT + PAGE_SHIFT)) +#define CONT_MASK (~(CONT_SIZE-1)) #ifndef __ASSEMBLY__ diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h index 76420568d66a..c15053902942 100644 --- a/arch/arm64/include/asm/pgalloc.h +++ b/arch/arm64/include/asm/pgalloc.h @@ -27,6 +27,7 @@ #define check_pgt_cache() do { } while (0) #define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO) +#define PGD_SIZE (PTRS_PER_PGD * sizeof(pgd_t)) #if CONFIG_PGTABLE_LEVELS > 2 diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index 24154b055835..d6739e836f7b 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -16,13 +16,46 @@ #ifndef __ASM_PGTABLE_HWDEF_H #define __ASM_PGTABLE_HWDEF_H +/* + * Number of page-table levels required to address 'va_bits' wide + * address, without section mapping. We resolve the top (va_bits - PAGE_SHIFT) + * bits with (PAGE_SHIFT - 3) bits at each page table level. Hence: + * + * levels = DIV_ROUND_UP((va_bits - PAGE_SHIFT), (PAGE_SHIFT - 3)) + * + * where DIV_ROUND_UP(n, d) => (((n) + (d) - 1) / (d)) + * + * We cannot include linux/kernel.h which defines DIV_ROUND_UP here + * due to build issues. So we open code DIV_ROUND_UP here: + * + * ((((va_bits) - PAGE_SHIFT) + (PAGE_SHIFT - 3) - 1) / (PAGE_SHIFT - 3)) + * + * which gets simplified as : + */ +#define ARM64_HW_PGTABLE_LEVELS(va_bits) (((va_bits) - 4) / (PAGE_SHIFT - 3)) + +/* + * Size mapped by an entry at level n ( 0 <= n <= 3) + * We map (PAGE_SHIFT - 3) at all translation levels and PAGE_SHIFT bits + * in the final page. The maximum number of translation levels supported by + * the architecture is 4. Hence, starting at at level n, we have further + * ((4 - n) - 1) levels of translation excluding the offset within the page. + * So, the total number of bits mapped by an entry at level n is : + * + * ((4 - n) - 1) * (PAGE_SHIFT - 3) + PAGE_SHIFT + * + * Rearranging it a bit we get : + * (4 - n) * (PAGE_SHIFT - 3) + 3 + */ +#define ARM64_HW_PGTABLE_LEVEL_SHIFT(n) ((PAGE_SHIFT - 3) * (4 - (n)) + 3) + #define PTRS_PER_PTE (1 << (PAGE_SHIFT - 3)) /* * PMD_SHIFT determines the size a level 2 page table entry can map. */ #if CONFIG_PGTABLE_LEVELS > 2 -#define PMD_SHIFT ((PAGE_SHIFT - 3) * 2 + 3) +#define PMD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(2) #define PMD_SIZE (_AC(1, UL) << PMD_SHIFT) #define PMD_MASK (~(PMD_SIZE-1)) #define PTRS_PER_PMD PTRS_PER_PTE @@ -32,7 +65,7 @@ * PUD_SHIFT determines the size a level 1 page table entry can map. */ #if CONFIG_PGTABLE_LEVELS > 3 -#define PUD_SHIFT ((PAGE_SHIFT - 3) * 3 + 3) +#define PUD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(1) #define PUD_SIZE (_AC(1, UL) << PUD_SHIFT) #define PUD_MASK (~(PUD_SIZE-1)) #define PTRS_PER_PUD PTRS_PER_PTE @@ -42,7 +75,7 @@ * PGDIR_SHIFT determines the size a top-level page table entry can map * (depending on the configuration, this level can be 0, 1 or 2). */ -#define PGDIR_SHIFT ((PAGE_SHIFT - 3) * CONFIG_PGTABLE_LEVELS + 3) +#define PGDIR_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(4 - CONFIG_PGTABLE_LEVELS) #define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE-1)) #define PTRS_PER_PGD (1 << (VA_BITS - PGDIR_SHIFT)) @@ -55,6 +88,13 @@ #define SECTION_MASK (~(SECTION_SIZE-1)) /* + * Contiguous page definitions. + */ +#define CONT_PTES (_AC(1, UL) << CONT_SHIFT) +/* the the numerical offset of the PTE within a range of CONT_PTES */ +#define CONT_RANGE_OFFSET(addr) (((addr)>>PAGE_SHIFT)&(CONT_PTES-1)) + +/* * Hardware page table definitions. * * Level 1 descriptor (PUD). @@ -83,6 +123,7 @@ #define PMD_SECT_S (_AT(pmdval_t, 3) << 8) #define PMD_SECT_AF (_AT(pmdval_t, 1) << 10) #define PMD_SECT_NG (_AT(pmdval_t, 1) << 11) +#define PMD_SECT_CONT (_AT(pmdval_t, 1) << 52) #define PMD_SECT_PXN (_AT(pmdval_t, 1) << 53) #define PMD_SECT_UXN (_AT(pmdval_t, 1) << 54) @@ -105,6 +146,7 @@ #define PTE_AF (_AT(pteval_t, 1) << 10) /* Access Flag */ #define PTE_NG (_AT(pteval_t, 1) << 11) /* nG */ #define PTE_DBM (_AT(pteval_t, 1) << 51) /* Dirty Bit Management */ +#define PTE_CONT (_AT(pteval_t, 1) << 52) /* Contiguous range */ #define PTE_PXN (_AT(pteval_t, 1) << 53) /* Privileged XN */ #define PTE_UXN (_AT(pteval_t, 1) << 54) /* User XN */ diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index b0329be95cb1..f3acf421ded4 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -41,7 +41,14 @@ * fixed mappings and modules */ #define VMEMMAP_SIZE ALIGN((1UL << (VA_BITS - PAGE_SHIFT)) * sizeof(struct page), PUD_SIZE) -#define VMALLOC_START (UL(0xffffffffffffffff) << VA_BITS) + +#ifndef CONFIG_KASAN +#define VMALLOC_START (VA_START) +#else +#include <asm/kasan.h> +#define VMALLOC_START (KASAN_SHADOW_END + SZ_64K) +#endif + #define VMALLOC_END (PAGE_OFFSET - PUD_SIZE - VMEMMAP_SIZE - SZ_64K) #define vmemmap ((struct page *)(VMALLOC_END + SZ_64K)) @@ -60,8 +67,10 @@ extern void __pgd_error(const char *file, int line, unsigned long val); #define PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED) #define PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S) +#define PROT_DEVICE_nGnRnE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_DEVICE_nGnRnE)) #define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_DEVICE_nGnRE)) #define PROT_NORMAL_NC (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL_NC)) +#define PROT_NORMAL_WT (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL_WT)) #define PROT_NORMAL (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL)) #define PROT_SECT_DEVICE_nGnRE (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_DEVICE_nGnRE)) @@ -72,6 +81,7 @@ extern void __pgd_error(const char *file, int line, unsigned long val); #define PAGE_KERNEL __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE) #define PAGE_KERNEL_EXEC __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE) +#define PAGE_KERNEL_EXEC_CONT __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_CONT) #define PAGE_HYP __pgprot(_PAGE_DEFAULT | PTE_HYP) #define PAGE_HYP_DEVICE __pgprot(PROT_DEVICE_nGnRE | PTE_HYP) @@ -79,7 +89,7 @@ extern void __pgd_error(const char *file, int line, unsigned long val); #define PAGE_S2 __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY) #define PAGE_S2_DEVICE __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDONLY | PTE_UXN) -#define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_TYPE_MASK) | PTE_PROT_NONE | PTE_PXN | PTE_UXN) +#define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_PXN | PTE_UXN) #define PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE) #define PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_WRITE) #define PAGE_COPY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) @@ -140,6 +150,7 @@ extern struct page *empty_zero_page; #define pte_special(pte) (!!(pte_val(pte) & PTE_SPECIAL)) #define pte_write(pte) (!!(pte_val(pte) & PTE_WRITE)) #define pte_exec(pte) (!(pte_val(pte) & PTE_UXN)) +#define pte_cont(pte) (!!(pte_val(pte) & PTE_CONT)) #ifdef CONFIG_ARM64_HW_AFDBM #define pte_hw_dirty(pte) (pte_write(pte) && !(pte_val(pte) & PTE_RDONLY)) @@ -202,6 +213,16 @@ static inline pte_t pte_mkspecial(pte_t pte) return set_pte_bit(pte, __pgprot(PTE_SPECIAL)); } +static inline pte_t pte_mkcont(pte_t pte) +{ + return set_pte_bit(pte, __pgprot(PTE_CONT)); +} + +static inline pte_t pte_mknoncont(pte_t pte) +{ + return clear_pte_bit(pte, __pgprot(PTE_CONT)); +} + static inline void set_pte(pte_t *ptep, pte_t pte) { *ptep = pte; @@ -496,7 +517,7 @@ static inline pud_t *pud_offset(pgd_t *pgd, unsigned long addr) static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { const pteval_t mask = PTE_USER | PTE_PXN | PTE_UXN | PTE_RDONLY | - PTE_PROT_NONE | PTE_WRITE | PTE_TYPE_MASK; + PTE_PROT_NONE | PTE_VALID | PTE_WRITE; /* preserve the hardware dirty information */ if (pte_hw_dirty(pte)) pte = pte_mkdirty(pte); @@ -646,14 +667,17 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { /* - * set_pte() does not have a DSB for user mappings, so make sure that - * the page table write is visible. + * We don't do anything here, so there's a very small chance of + * us retaking a user fault which we just fixed up. The alternative + * is doing a dsb(ishst), but that penalises the fastpath. */ - dsb(ishst); } #define update_mmu_cache_pmd(vma, address, pmd) do { } while (0) +#define kc_vaddr_to_offset(v) ((v) & ~VA_START) +#define kc_offset_to_vaddr(o) ((o) | VA_START) + #endif /* !__ASSEMBLY__ */ #endif /* __ASM_PGTABLE_H */ diff --git a/arch/arm64/include/asm/pmu.h b/arch/arm64/include/asm/pmu.h deleted file mode 100644 index b7710a59672c..000000000000 --- a/arch/arm64/include/asm/pmu.h +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Based on arch/arm/include/asm/pmu.h - * - * Copyright (C) 2009 picoChip Designs Ltd, Jamie Iles - * Copyright (C) 2012 ARM Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ -#ifndef __ASM_PMU_H -#define __ASM_PMU_H - -#ifdef CONFIG_HW_PERF_EVENTS - -/* The events for a given PMU register set. */ -struct pmu_hw_events { - /* - * The events that are active on the PMU for the given index. - */ - struct perf_event **events; - - /* - * A 1 bit for an index indicates that the counter is being used for - * an event. A 0 means that the counter can be used. - */ - unsigned long *used_mask; - - /* - * Hardware lock to serialize accesses to PMU registers. Needed for the - * read/modify/write sequences. - */ - raw_spinlock_t pmu_lock; -}; - -struct arm_pmu { - struct pmu pmu; - cpumask_t active_irqs; - int *irq_affinity; - const char *name; - irqreturn_t (*handle_irq)(int irq_num, void *dev); - void (*enable)(struct hw_perf_event *evt, int idx); - void (*disable)(struct hw_perf_event *evt, int idx); - int (*get_event_idx)(struct pmu_hw_events *hw_events, - struct hw_perf_event *hwc); - int (*set_event_filter)(struct hw_perf_event *evt, - struct perf_event_attr *attr); - u32 (*read_counter)(int idx); - void (*write_counter)(int idx, u32 val); - void (*start)(void); - void (*stop)(void); - void (*reset)(void *); - int (*map_event)(struct perf_event *event); - int num_events; - atomic_t active_events; - struct mutex reserve_mutex; - u64 max_period; - struct platform_device *plat_device; - struct pmu_hw_events *(*get_hw_events)(void); -}; - -#define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu)) - -int __init armpmu_register(struct arm_pmu *armpmu, char *name, int type); - -u64 armpmu_event_update(struct perf_event *event, - struct hw_perf_event *hwc, - int idx); - -int armpmu_event_set_period(struct perf_event *event, - struct hw_perf_event *hwc, - int idx); - -#endif /* CONFIG_HW_PERF_EVENTS */ -#endif /* __ASM_PMU_H */ diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 98f32355dc97..4acb7ca94fcd 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -186,6 +186,6 @@ static inline void spin_lock_prefetch(const void *x) #endif -void cpu_enable_pan(void); +void cpu_enable_pan(void *__unused); #endif /* __ASM_PROCESSOR_H */ diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index 536274ed292e..e9e5467e0bf4 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -83,14 +83,14 @@ #define compat_sp regs[13] #define compat_lr regs[14] #define compat_sp_hyp regs[15] -#define compat_sp_irq regs[16] -#define compat_lr_irq regs[17] -#define compat_sp_svc regs[18] -#define compat_lr_svc regs[19] -#define compat_sp_abt regs[20] -#define compat_lr_abt regs[21] -#define compat_sp_und regs[22] -#define compat_lr_und regs[23] +#define compat_lr_irq regs[16] +#define compat_sp_irq regs[17] +#define compat_lr_svc regs[18] +#define compat_sp_svc regs[19] +#define compat_lr_abt regs[20] +#define compat_sp_abt regs[21] +#define compat_lr_und regs[22] +#define compat_sp_und regs[23] #define compat_r8_fiq regs[24] #define compat_r9_fiq regs[25] #define compat_r10_fiq regs[26] diff --git a/arch/arm64/include/asm/string.h b/arch/arm64/include/asm/string.h index 64d2d4884a9d..2eb714c4639f 100644 --- a/arch/arm64/include/asm/string.h +++ b/arch/arm64/include/asm/string.h @@ -36,17 +36,33 @@ extern __kernel_size_t strnlen(const char *, __kernel_size_t); #define __HAVE_ARCH_MEMCPY extern void *memcpy(void *, const void *, __kernel_size_t); +extern void *__memcpy(void *, const void *, __kernel_size_t); #define __HAVE_ARCH_MEMMOVE extern void *memmove(void *, const void *, __kernel_size_t); +extern void *__memmove(void *, const void *, __kernel_size_t); #define __HAVE_ARCH_MEMCHR extern void *memchr(const void *, int, __kernel_size_t); #define __HAVE_ARCH_MEMSET extern void *memset(void *, int, __kernel_size_t); +extern void *__memset(void *, int, __kernel_size_t); #define __HAVE_ARCH_MEMCMP extern int memcmp(const void *, const void *, size_t); + +#if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__) + +/* + * For files that are not instrumented (e.g. mm/slub.c) we + * should use not instrumented version of mem* functions. + */ + +#define memcpy(dst, src, len) __memcpy(dst, src, len) +#define memmove(dst, src, len) __memmove(dst, src, len) +#define memset(s, c, n) __memset(s, c, n) +#endif + #endif diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index a7f3d4b2514d..d48ab5b41f52 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -22,9 +22,6 @@ #include <asm/opcodes.h> -#define SCTLR_EL1_CP15BEN (0x1 << 5) -#define SCTLR_EL1_SED (0x1 << 8) - /* * ARMv8 ARM reserves the following encoding for system registers: * (Ref: ARMv8 ARM, Section: "System instruction class encoding overview", @@ -38,12 +35,162 @@ #define sys_reg(op0, op1, crn, crm, op2) \ ((((op0)&3)<<19)|((op1)<<16)|((crn)<<12)|((crm)<<8)|((op2)<<5)) -#define REG_PSTATE_PAN_IMM sys_reg(0, 0, 4, 0, 4) -#define SCTLR_EL1_SPAN (1 << 23) +#define SYS_MIDR_EL1 sys_reg(3, 0, 0, 0, 0) +#define SYS_MPIDR_EL1 sys_reg(3, 0, 0, 0, 5) +#define SYS_REVIDR_EL1 sys_reg(3, 0, 0, 0, 6) + +#define SYS_ID_PFR0_EL1 sys_reg(3, 0, 0, 1, 0) +#define SYS_ID_PFR1_EL1 sys_reg(3, 0, 0, 1, 1) +#define SYS_ID_DFR0_EL1 sys_reg(3, 0, 0, 1, 2) +#define SYS_ID_MMFR0_EL1 sys_reg(3, 0, 0, 1, 4) +#define SYS_ID_MMFR1_EL1 sys_reg(3, 0, 0, 1, 5) +#define SYS_ID_MMFR2_EL1 sys_reg(3, 0, 0, 1, 6) +#define SYS_ID_MMFR3_EL1 sys_reg(3, 0, 0, 1, 7) + +#define SYS_ID_ISAR0_EL1 sys_reg(3, 0, 0, 2, 0) +#define SYS_ID_ISAR1_EL1 sys_reg(3, 0, 0, 2, 1) +#define SYS_ID_ISAR2_EL1 sys_reg(3, 0, 0, 2, 2) +#define SYS_ID_ISAR3_EL1 sys_reg(3, 0, 0, 2, 3) +#define SYS_ID_ISAR4_EL1 sys_reg(3, 0, 0, 2, 4) +#define SYS_ID_ISAR5_EL1 sys_reg(3, 0, 0, 2, 5) +#define SYS_ID_MMFR4_EL1 sys_reg(3, 0, 0, 2, 6) + +#define SYS_MVFR0_EL1 sys_reg(3, 0, 0, 3, 0) +#define SYS_MVFR1_EL1 sys_reg(3, 0, 0, 3, 1) +#define SYS_MVFR2_EL1 sys_reg(3, 0, 0, 3, 2) + +#define SYS_ID_AA64PFR0_EL1 sys_reg(3, 0, 0, 4, 0) +#define SYS_ID_AA64PFR1_EL1 sys_reg(3, 0, 0, 4, 1) + +#define SYS_ID_AA64DFR0_EL1 sys_reg(3, 0, 0, 5, 0) +#define SYS_ID_AA64DFR1_EL1 sys_reg(3, 0, 0, 5, 1) + +#define SYS_ID_AA64ISAR0_EL1 sys_reg(3, 0, 0, 6, 0) +#define SYS_ID_AA64ISAR1_EL1 sys_reg(3, 0, 0, 6, 1) + +#define SYS_ID_AA64MMFR0_EL1 sys_reg(3, 0, 0, 7, 0) +#define SYS_ID_AA64MMFR1_EL1 sys_reg(3, 0, 0, 7, 1) + +#define SYS_CNTFRQ_EL0 sys_reg(3, 3, 14, 0, 0) +#define SYS_CTR_EL0 sys_reg(3, 3, 0, 0, 1) +#define SYS_DCZID_EL0 sys_reg(3, 3, 0, 0, 7) + +#define REG_PSTATE_PAN_IMM sys_reg(0, 0, 4, 0, 4) #define SET_PSTATE_PAN(x) __inst_arm(0xd5000000 | REG_PSTATE_PAN_IMM |\ (!!x)<<8 | 0x1f) +/* SCTLR_EL1 */ +#define SCTLR_EL1_CP15BEN (0x1 << 5) +#define SCTLR_EL1_SED (0x1 << 8) +#define SCTLR_EL1_SPAN (0x1 << 23) + + +/* id_aa64isar0 */ +#define ID_AA64ISAR0_RDM_SHIFT 28 +#define ID_AA64ISAR0_ATOMICS_SHIFT 20 +#define ID_AA64ISAR0_CRC32_SHIFT 16 +#define ID_AA64ISAR0_SHA2_SHIFT 12 +#define ID_AA64ISAR0_SHA1_SHIFT 8 +#define ID_AA64ISAR0_AES_SHIFT 4 + +/* id_aa64pfr0 */ +#define ID_AA64PFR0_GIC_SHIFT 24 +#define ID_AA64PFR0_ASIMD_SHIFT 20 +#define ID_AA64PFR0_FP_SHIFT 16 +#define ID_AA64PFR0_EL3_SHIFT 12 +#define ID_AA64PFR0_EL2_SHIFT 8 +#define ID_AA64PFR0_EL1_SHIFT 4 +#define ID_AA64PFR0_EL0_SHIFT 0 + +#define ID_AA64PFR0_FP_NI 0xf +#define ID_AA64PFR0_FP_SUPPORTED 0x0 +#define ID_AA64PFR0_ASIMD_NI 0xf +#define ID_AA64PFR0_ASIMD_SUPPORTED 0x0 +#define ID_AA64PFR0_EL1_64BIT_ONLY 0x1 +#define ID_AA64PFR0_EL0_64BIT_ONLY 0x1 + +/* id_aa64mmfr0 */ +#define ID_AA64MMFR0_TGRAN4_SHIFT 28 +#define ID_AA64MMFR0_TGRAN64_SHIFT 24 +#define ID_AA64MMFR0_TGRAN16_SHIFT 20 +#define ID_AA64MMFR0_BIGENDEL0_SHIFT 16 +#define ID_AA64MMFR0_SNSMEM_SHIFT 12 +#define ID_AA64MMFR0_BIGENDEL_SHIFT 8 +#define ID_AA64MMFR0_ASID_SHIFT 4 +#define ID_AA64MMFR0_PARANGE_SHIFT 0 + +#define ID_AA64MMFR0_TGRAN4_NI 0xf +#define ID_AA64MMFR0_TGRAN4_SUPPORTED 0x0 +#define ID_AA64MMFR0_TGRAN64_NI 0xf +#define ID_AA64MMFR0_TGRAN64_SUPPORTED 0x0 +#define ID_AA64MMFR0_TGRAN16_NI 0x0 +#define ID_AA64MMFR0_TGRAN16_SUPPORTED 0x1 + +/* id_aa64mmfr1 */ +#define ID_AA64MMFR1_PAN_SHIFT 20 +#define ID_AA64MMFR1_LOR_SHIFT 16 +#define ID_AA64MMFR1_HPD_SHIFT 12 +#define ID_AA64MMFR1_VHE_SHIFT 8 +#define ID_AA64MMFR1_VMIDBITS_SHIFT 4 +#define ID_AA64MMFR1_HADBS_SHIFT 0 + +/* id_aa64dfr0 */ +#define ID_AA64DFR0_CTX_CMPS_SHIFT 28 +#define ID_AA64DFR0_WRPS_SHIFT 20 +#define ID_AA64DFR0_BRPS_SHIFT 12 +#define ID_AA64DFR0_PMUVER_SHIFT 8 +#define ID_AA64DFR0_TRACEVER_SHIFT 4 +#define ID_AA64DFR0_DEBUGVER_SHIFT 0 + +#define ID_ISAR5_RDM_SHIFT 24 +#define ID_ISAR5_CRC32_SHIFT 16 +#define ID_ISAR5_SHA2_SHIFT 12 +#define ID_ISAR5_SHA1_SHIFT 8 +#define ID_ISAR5_AES_SHIFT 4 +#define ID_ISAR5_SEVL_SHIFT 0 + +#define MVFR0_FPROUND_SHIFT 28 +#define MVFR0_FPSHVEC_SHIFT 24 +#define MVFR0_FPSQRT_SHIFT 20 +#define MVFR0_FPDIVIDE_SHIFT 16 +#define MVFR0_FPTRAP_SHIFT 12 +#define MVFR0_FPDP_SHIFT 8 +#define MVFR0_FPSP_SHIFT 4 +#define MVFR0_SIMD_SHIFT 0 + +#define MVFR1_SIMDFMAC_SHIFT 28 +#define MVFR1_FPHP_SHIFT 24 +#define MVFR1_SIMDHP_SHIFT 20 +#define MVFR1_SIMDSP_SHIFT 16 +#define MVFR1_SIMDINT_SHIFT 12 +#define MVFR1_SIMDLS_SHIFT 8 +#define MVFR1_FPDNAN_SHIFT 4 +#define MVFR1_FPFTZ_SHIFT 0 + + +#define ID_AA64MMFR0_TGRAN4_SHIFT 28 +#define ID_AA64MMFR0_TGRAN64_SHIFT 24 +#define ID_AA64MMFR0_TGRAN16_SHIFT 20 + +#define ID_AA64MMFR0_TGRAN4_NI 0xf +#define ID_AA64MMFR0_TGRAN4_SUPPORTED 0x0 +#define ID_AA64MMFR0_TGRAN64_NI 0xf +#define ID_AA64MMFR0_TGRAN64_SUPPORTED 0x0 +#define ID_AA64MMFR0_TGRAN16_NI 0x0 +#define ID_AA64MMFR0_TGRAN16_SUPPORTED 0x1 + +#if defined(CONFIG_ARM64_4K_PAGES) +#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN4_SHIFT +#define ID_AA64MMFR0_TGRAN_SUPPORTED ID_AA64MMFR0_TGRAN4_SUPPORTED +#elif defined(CONFIG_ARM64_16K_PAGES) +#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN16_SHIFT +#define ID_AA64MMFR0_TGRAN_SUPPORTED ID_AA64MMFR0_TGRAN16_SUPPORTED +#elif defined(CONFIG_ARM64_64K_PAGES) +#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN64_SHIFT +#define ID_AA64MMFR0_TGRAN_SUPPORTED ID_AA64MMFR0_TGRAN64_SUPPORTED +#endif + #ifdef __ASSEMBLY__ .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30 diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index dcd06d18a42a..90c7ff233735 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -23,8 +23,10 @@ #include <linux/compiler.h> -#ifndef CONFIG_ARM64_64K_PAGES +#ifdef CONFIG_ARM64_4K_PAGES #define THREAD_SIZE_ORDER 2 +#elif defined(CONFIG_ARM64_16K_PAGES) +#define THREAD_SIZE_ORDER 0 #endif #define THREAD_SIZE 16384 @@ -111,7 +113,6 @@ static inline struct thread_info *current_thread_info(void) #define TIF_RESTORE_SIGMASK 20 #define TIF_SINGLESTEP 21 #define TIF_32BIT 22 /* 32bit process */ -#define TIF_SWITCH_MM 23 /* deferred switch_mm */ #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h index d6e6b6660380..ffdaea7954bb 100644 --- a/arch/arm64/include/asm/tlb.h +++ b/arch/arm64/include/asm/tlb.h @@ -37,17 +37,21 @@ static inline void __tlb_remove_table(void *_table) static inline void tlb_flush(struct mmu_gather *tlb) { - if (tlb->fullmm) { - flush_tlb_mm(tlb->mm); - } else { - struct vm_area_struct vma = { .vm_mm = tlb->mm, }; - /* - * The intermediate page table levels are already handled by - * the __(pte|pmd|pud)_free_tlb() functions, so last level - * TLBI is sufficient here. - */ - __flush_tlb_range(&vma, tlb->start, tlb->end, true); - } + struct vm_area_struct vma = { .vm_mm = tlb->mm, }; + + /* + * The ASID allocator will either invalidate the ASID or mark + * it as used. + */ + if (tlb->fullmm) + return; + + /* + * The intermediate page table levels are already handled by + * the __(pte|pmd|pud)_free_tlb() functions, so last level + * TLBI is sufficient here. + */ + __flush_tlb_range(&vma, tlb->start, tlb->end, true); } static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index 7bd2da021658..b460ae28e346 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -63,6 +63,14 @@ * only require the D-TLB to be invalidated. * - kaddr - Kernel virtual memory address */ +static inline void local_flush_tlb_all(void) +{ + dsb(nshst); + asm("tlbi vmalle1"); + dsb(nsh); + isb(); +} + static inline void flush_tlb_all(void) { dsb(ishst); @@ -73,7 +81,7 @@ static inline void flush_tlb_all(void) static inline void flush_tlb_mm(struct mm_struct *mm) { - unsigned long asid = (unsigned long)ASID(mm) << 48; + unsigned long asid = ASID(mm) << 48; dsb(ishst); asm("tlbi aside1is, %0" : : "r" (asid)); @@ -83,8 +91,7 @@ static inline void flush_tlb_mm(struct mm_struct *mm) static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr) { - unsigned long addr = uaddr >> 12 | - ((unsigned long)ASID(vma->vm_mm) << 48); + unsigned long addr = uaddr >> 12 | (ASID(vma->vm_mm) << 48); dsb(ishst); asm("tlbi vale1is, %0" : : "r" (addr)); @@ -101,7 +108,7 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end, bool last_level) { - unsigned long asid = (unsigned long)ASID(vma->vm_mm) << 48; + unsigned long asid = ASID(vma->vm_mm) << 48; unsigned long addr; if ((end - start) > MAX_TLB_RANGE) { @@ -154,9 +161,8 @@ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end static inline void __flush_tlb_pgtable(struct mm_struct *mm, unsigned long uaddr) { - unsigned long addr = uaddr >> 12 | ((unsigned long)ASID(mm) << 48); + unsigned long addr = uaddr >> 12 | (ASID(mm) << 48); - dsb(ishst); asm("tlbi vae1is, %0" : : "r" (addr)); dsb(ish); } diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index 3bc498c250dc..41e58fe3c041 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -44,7 +44,7 @@ #define __ARM_NR_compat_cacheflush (__ARM_NR_COMPAT_BASE+2) #define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE+5) -#define __NR_compat_syscalls 388 +#define __NR_compat_syscalls 390 #endif #define __ARCH_WANT_SYS_CLONE diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index cef934a90f17..5b925b761a2a 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -797,3 +797,12 @@ __SYSCALL(__NR_memfd_create, sys_memfd_create) __SYSCALL(__NR_bpf, sys_bpf) #define __NR_execveat 387 __SYSCALL(__NR_execveat, compat_sys_execveat) +#define __NR_userfaultfd 388 +__SYSCALL(__NR_userfaultfd, sys_userfaultfd) +#define __NR_membarrier 389 +__SYSCALL(__NR_membarrier, sys_membarrier) + +/* + * Please add new compat syscalls above this comment and update + * __NR_compat_syscalls in asm/unistd.h. + */ diff --git a/arch/arm64/include/uapi/asm/signal.h b/arch/arm64/include/uapi/asm/signal.h index 8d1e7236431b..991bf5db2ca1 100644 --- a/arch/arm64/include/uapi/asm/signal.h +++ b/arch/arm64/include/uapi/asm/signal.h @@ -19,6 +19,9 @@ /* Required for AArch32 compatibility. */ #define SA_RESTORER 0x04000000 +#define MINSIGSTKSZ 5120 +#define SIGSTKSZ 16384 + #include <asm-generic/signal.h> #endif diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 22dc9bc781be..474691f8b13a 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -4,7 +4,6 @@ CPPFLAGS_vmlinux.lds := -DTEXT_OFFSET=$(TEXT_OFFSET) AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET) -CFLAGS_efi-stub.o := -DTEXT_OFFSET=$(TEXT_OFFSET) CFLAGS_armv8_deprecated.o := -I$(src) CFLAGS_REMOVE_ftrace.o = -pg @@ -20,6 +19,12 @@ arm64-obj-y := debug-monitors.o entry.o irq.o fpsimd.o \ cpufeature.o alternative.o cacheinfo.o \ smp.o smp_spin_table.o topology.o +extra-$(CONFIG_EFI) := efi-entry.o + +OBJCOPYFLAGS := --prefix-symbols=__efistub_ +$(obj)/%.stub.o: $(obj)/%.o FORCE + $(call if_changed,objcopy) + arm64-obj-$(CONFIG_COMPAT) += sys32.o kuser32.o signal32.o \ sys_compat.o entry32.o \ ../../arm/kernel/opcodes.o @@ -32,7 +37,7 @@ arm64-obj-$(CONFIG_CPU_PM) += sleep.o suspend.o arm64-obj-$(CONFIG_CPU_IDLE) += cpuidle.o arm64-obj-$(CONFIG_JUMP_LABEL) += jump_label.o arm64-obj-$(CONFIG_KGDB) += kgdb.o -arm64-obj-$(CONFIG_EFI) += efi.o efi-stub.o efi-entry.o +arm64-obj-$(CONFIG_EFI) += efi.o efi-entry.stub.o arm64-obj-$(CONFIG_PCI) += pci.o arm64-obj-$(CONFIG_ARMV8_DEPRECATED) += armv8_deprecated.o arm64-obj-$(CONFIG_ACPI) += acpi.o @@ -40,7 +45,7 @@ arm64-obj-$(CONFIG_ACPI) += acpi.o obj-y += $(arm64-obj-y) vdso/ obj-m += $(arm64-obj-m) head-y := head.o -extra-y := $(head-y) vmlinux.lds +extra-y += $(head-y) vmlinux.lds # vDSO - this must be built first to generate the symbol offsets $(call objectify,$(arm64-obj-y)): $(obj)/vdso/vdso-offsets.h diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c index 19de7537e7d3..d1ce8e2f98b9 100644 --- a/arch/arm64/kernel/acpi.c +++ b/arch/arm64/kernel/acpi.c @@ -29,6 +29,11 @@ #include <asm/cpu_ops.h> #include <asm/smp_plat.h> +#ifdef CONFIG_ACPI_APEI +# include <linux/efi.h> +# include <asm/pgtable.h> +#endif + int acpi_noirq = 1; /* skip ACPI IRQ initialization */ int acpi_disabled = 1; EXPORT_SYMBOL(acpi_disabled); @@ -206,27 +211,26 @@ void __init acpi_boot_table_init(void) } } -void __init acpi_gic_init(void) +#ifdef CONFIG_ACPI_APEI +pgprot_t arch_apei_get_mem_attribute(phys_addr_t addr) { - struct acpi_table_header *table; - acpi_status status; - acpi_size tbl_size; - int err; - - if (acpi_disabled) - return; - - status = acpi_get_table_with_size(ACPI_SIG_MADT, 0, &table, &tbl_size); - if (ACPI_FAILURE(status)) { - const char *msg = acpi_format_exception(status); - - pr_err("Failed to get MADT table, %s\n", msg); - return; - } + /* + * According to "Table 8 Map: EFI memory types to AArch64 memory + * types" of UEFI 2.5 section 2.3.6.1, each EFI memory type is + * mapped to a corresponding MAIR attribute encoding. + * The EFI memory attribute advises all possible capabilities + * of a memory region. We use the most efficient capability. + */ - err = gic_v2_acpi_init(table); - if (err) - pr_err("Failed to initialize GIC IRQ controller"); + u64 attr; - early_acpi_os_unmap_memory((char *)table, tbl_size); + attr = efi_mem_attributes(addr); + if (attr & EFI_MEMORY_WB) + return PAGE_KERNEL; + if (attr & EFI_MEMORY_WT) + return __pgprot(PROT_NORMAL_WT); + if (attr & EFI_MEMORY_WC) + return __pgprot(PROT_NORMAL_NC); + return __pgprot(PROT_DEVICE_nGnRnE); } +#endif diff --git a/arch/arm64/kernel/arm64ksyms.c b/arch/arm64/kernel/arm64ksyms.c index a85843ddbde8..3b6d8cc9dfe0 100644 --- a/arch/arm64/kernel/arm64ksyms.c +++ b/arch/arm64/kernel/arm64ksyms.c @@ -51,6 +51,9 @@ EXPORT_SYMBOL(strnlen); EXPORT_SYMBOL(memset); EXPORT_SYMBOL(memcpy); EXPORT_SYMBOL(memmove); +EXPORT_SYMBOL(__memset); +EXPORT_SYMBOL(__memcpy); +EXPORT_SYMBOL(__memmove); EXPORT_SYMBOL(memchr); EXPORT_SYMBOL(memcmp); diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index bcee7abac68e..937f5e58a4d3 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -284,21 +284,23 @@ static void register_insn_emulation_sysctl(struct ctl_table *table) __asm__ __volatile__( \ ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, \ CONFIG_ARM64_PAN) \ - " mov %w2, %w1\n" \ - "0: ldxr"B" %w1, [%3]\n" \ - "1: stxr"B" %w0, %w2, [%3]\n" \ + "0: ldxr"B" %w2, [%3]\n" \ + "1: stxr"B" %w0, %w1, [%3]\n" \ " cbz %w0, 2f\n" \ " mov %w0, %w4\n" \ + " b 3f\n" \ "2:\n" \ + " mov %w1, %w2\n" \ + "3:\n" \ " .pushsection .fixup,\"ax\"\n" \ " .align 2\n" \ - "3: mov %w0, %w5\n" \ - " b 2b\n" \ + "4: mov %w0, %w5\n" \ + " b 3b\n" \ " .popsection" \ " .pushsection __ex_table,\"a\"\n" \ " .align 3\n" \ - " .quad 0b, 3b\n" \ - " .quad 1b, 3b\n" \ + " .quad 0b, 4b\n" \ + " .quad 1b, 4b\n" \ " .popsection\n" \ ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, \ CONFIG_ARM64_PAN) \ diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 8d89cf8dae55..25de8b244961 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -60,7 +60,7 @@ int main(void) DEFINE(S_SYSCALLNO, offsetof(struct pt_regs, syscallno)); DEFINE(S_FRAME_SIZE, sizeof(struct pt_regs)); BLANK(); - DEFINE(MM_CONTEXT_ID, offsetof(struct mm_struct, context.id)); + DEFINE(MM_CONTEXT_ID, offsetof(struct mm_struct, context.id.counter)); BLANK(); DEFINE(VMA_VM_MM, offsetof(struct vm_area_struct, vm_mm)); DEFINE(VMA_VM_FLAGS, offsetof(struct vm_area_struct, vm_flags)); diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 6ffd91438560..24926f2504f7 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -23,6 +23,7 @@ #define MIDR_CORTEX_A53 MIDR_CPU_PART(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53) #define MIDR_CORTEX_A57 MIDR_CPU_PART(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57) +#define MIDR_THUNDERX MIDR_CPU_PART(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) #define CPU_MODEL_MASK (MIDR_IMPLEMENTOR_MASK | MIDR_PARTNUM_MASK | \ MIDR_ARCHITECTURE_MASK) @@ -82,11 +83,19 @@ const struct arm64_cpu_capabilities arm64_errata[] = { MIDR_RANGE(MIDR_CORTEX_A53, 0x00, 0x04), }, #endif +#ifdef CONFIG_CAVIUM_ERRATUM_23154 + { + /* Cavium ThunderX, pass 1.x */ + .desc = "Cavium erratum 23154", + .capability = ARM64_WORKAROUND_CAVIUM_23154, + MIDR_RANGE(MIDR_THUNDERX, 0x00, 0x01), + }, +#endif { } }; void check_local_cpu_errata(void) { - check_cpu_capabilities(arm64_errata, "enabling workaround for"); + update_cpu_capabilities(arm64_errata, "enabling workaround for"); } diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 3c9aed32f70b..52f0d7a5a1c2 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -16,12 +16,569 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ -#define pr_fmt(fmt) "alternatives: " fmt +#define pr_fmt(fmt) "CPU features: " fmt +#include <linux/bsearch.h> +#include <linux/sort.h> #include <linux/types.h> #include <asm/cpu.h> #include <asm/cpufeature.h> +#include <asm/cpu_ops.h> #include <asm/processor.h> +#include <asm/sysreg.h> + +unsigned long elf_hwcap __read_mostly; +EXPORT_SYMBOL_GPL(elf_hwcap); + +#ifdef CONFIG_COMPAT +#define COMPAT_ELF_HWCAP_DEFAULT \ + (COMPAT_HWCAP_HALF|COMPAT_HWCAP_THUMB|\ + COMPAT_HWCAP_FAST_MULT|COMPAT_HWCAP_EDSP|\ + COMPAT_HWCAP_TLS|COMPAT_HWCAP_VFP|\ + COMPAT_HWCAP_VFPv3|COMPAT_HWCAP_VFPv4|\ + COMPAT_HWCAP_NEON|COMPAT_HWCAP_IDIV|\ + COMPAT_HWCAP_LPAE) +unsigned int compat_elf_hwcap __read_mostly = COMPAT_ELF_HWCAP_DEFAULT; +unsigned int compat_elf_hwcap2 __read_mostly; +#endif + +DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS); + +#define ARM64_FTR_BITS(STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \ + { \ + .strict = STRICT, \ + .type = TYPE, \ + .shift = SHIFT, \ + .width = WIDTH, \ + .safe_val = SAFE_VAL, \ + } + +#define ARM64_FTR_END \ + { \ + .width = 0, \ + } + +static struct arm64_ftr_bits ftr_id_aa64isar0[] = { + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64ISAR0_RDM_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 24, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_ATOMICS_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_CRC32_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SHA2_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SHA1_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_AES_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 4, 0), /* RAZ */ + ARM64_FTR_END, +}; + +static struct arm64_ftr_bits ftr_id_aa64pfr0[] = { + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64PFR0_GIC_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_ASIMD_SHIFT, 4, ID_AA64PFR0_ASIMD_NI), + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_FP_SHIFT, 4, ID_AA64PFR0_FP_NI), + /* Linux doesn't care about the EL3 */ + ARM64_FTR_BITS(FTR_NONSTRICT, FTR_EXACT, ID_AA64PFR0_EL3_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64PFR0_EL2_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64PFR0_EL1_SHIFT, 4, ID_AA64PFR0_EL1_64BIT_ONLY), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64PFR0_EL0_SHIFT, 4, ID_AA64PFR0_EL0_64BIT_ONLY), + ARM64_FTR_END, +}; + +static struct arm64_ftr_bits ftr_id_aa64mmfr0[] = { + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN4_SHIFT, 4, ID_AA64MMFR0_TGRAN4_NI), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN64_SHIFT, 4, ID_AA64MMFR0_TGRAN64_NI), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN16_SHIFT, 4, ID_AA64MMFR0_TGRAN16_NI), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_BIGENDEL0_SHIFT, 4, 0), + /* Linux shouldn't care about secure memory */ + ARM64_FTR_BITS(FTR_NONSTRICT, FTR_EXACT, ID_AA64MMFR0_SNSMEM_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_BIGENDEL_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_ASID_SHIFT, 4, 0), + /* + * Differing PARange is fine as long as all peripherals and memory are mapped + * within the minimum PARange of all CPUs + */ + ARM64_FTR_BITS(FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_PARANGE_SHIFT, 4, 0), + ARM64_FTR_END, +}; + +static struct arm64_ftr_bits ftr_id_aa64mmfr1[] = { + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_PAN_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_LOR_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_HPD_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_VHE_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_VMIDBITS_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_HADBS_SHIFT, 4, 0), + ARM64_FTR_END, +}; + +static struct arm64_ftr_bits ftr_ctr[] = { + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 31, 1, 1), /* RAO */ + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 3, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_HIGHER_SAFE, 24, 4, 0), /* CWG */ + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0), /* ERG */ + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 1), /* DminLine */ + /* + * Linux can handle differing I-cache policies. Userspace JITs will + * make use of *minLine + */ + ARM64_FTR_BITS(FTR_NONSTRICT, FTR_EXACT, 14, 2, 0), /* L1Ip */ + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 10, 0), /* RAZ */ + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* IminLine */ + ARM64_FTR_END, +}; + +static struct arm64_ftr_bits ftr_id_mmfr0[] = { + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 4, 0), /* InnerShr */ + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 24, 4, 0), /* FCSE */ + ARM64_FTR_BITS(FTR_NONSTRICT, FTR_LOWER_SAFE, 20, 4, 0), /* AuxReg */ + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 16, 4, 0), /* TCM */ + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 12, 4, 0), /* ShareLvl */ + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 8, 4, 0), /* OuterShr */ + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 4, 0), /* PMSA */ + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 4, 0), /* VMSA */ + ARM64_FTR_END, +}; + +static struct arm64_ftr_bits ftr_id_aa64dfr0[] = { + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_CTX_CMPS_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_WRPS_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_BRPS_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_PMUVER_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_TRACEVER_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_DEBUGVER_SHIFT, 4, 0x6), + ARM64_FTR_END, +}; + +static struct arm64_ftr_bits ftr_mvfr2[] = { + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 8, 24, 0), /* RAZ */ + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 4, 0), /* FPMisc */ + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 4, 0), /* SIMDMisc */ + ARM64_FTR_END, +}; + +static struct arm64_ftr_bits ftr_dczid[] = { + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 5, 27, 0), /* RAZ */ + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 1, 1), /* DZP */ + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* BS */ + ARM64_FTR_END, +}; + + +static struct arm64_ftr_bits ftr_id_isar5[] = { + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_RDM_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 20, 4, 0), /* RAZ */ + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_CRC32_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_SHA2_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_SHA1_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_AES_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_SEVL_SHIFT, 4, 0), + ARM64_FTR_END, +}; + +static struct arm64_ftr_bits ftr_id_mmfr4[] = { + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 8, 24, 0), /* RAZ */ + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 4, 0), /* ac2 */ + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 4, 0), /* RAZ */ + ARM64_FTR_END, +}; + +static struct arm64_ftr_bits ftr_id_pfr0[] = { + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 16, 16, 0), /* RAZ */ + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 12, 4, 0), /* State3 */ + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 8, 4, 0), /* State2 */ + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 4, 0), /* State1 */ + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 4, 0), /* State0 */ + ARM64_FTR_END, +}; + +/* + * Common ftr bits for a 32bit register with all hidden, strict + * attributes, with 4bit feature fields and a default safe value of + * 0. Covers the following 32bit registers: + * id_isar[0-4], id_mmfr[1-3], id_pfr1, mvfr[0-1] + */ +static struct arm64_ftr_bits ftr_generic_32bits[] = { + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 28, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 24, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 12, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 8, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), + ARM64_FTR_END, +}; + +static struct arm64_ftr_bits ftr_generic[] = { + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 64, 0), + ARM64_FTR_END, +}; + +static struct arm64_ftr_bits ftr_generic32[] = { + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 32, 0), + ARM64_FTR_END, +}; + +static struct arm64_ftr_bits ftr_aa64raz[] = { + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 64, 0), + ARM64_FTR_END, +}; + +#define ARM64_FTR_REG(id, table) \ + { \ + .sys_id = id, \ + .name = #id, \ + .ftr_bits = &((table)[0]), \ + } + +static struct arm64_ftr_reg arm64_ftr_regs[] = { + + /* Op1 = 0, CRn = 0, CRm = 1 */ + ARM64_FTR_REG(SYS_ID_PFR0_EL1, ftr_id_pfr0), + ARM64_FTR_REG(SYS_ID_PFR1_EL1, ftr_generic_32bits), + ARM64_FTR_REG(SYS_ID_DFR0_EL1, ftr_generic_32bits), + ARM64_FTR_REG(SYS_ID_MMFR0_EL1, ftr_id_mmfr0), + ARM64_FTR_REG(SYS_ID_MMFR1_EL1, ftr_generic_32bits), + ARM64_FTR_REG(SYS_ID_MMFR2_EL1, ftr_generic_32bits), + ARM64_FTR_REG(SYS_ID_MMFR3_EL1, ftr_generic_32bits), + + /* Op1 = 0, CRn = 0, CRm = 2 */ + ARM64_FTR_REG(SYS_ID_ISAR0_EL1, ftr_generic_32bits), + ARM64_FTR_REG(SYS_ID_ISAR1_EL1, ftr_generic_32bits), + ARM64_FTR_REG(SYS_ID_ISAR2_EL1, ftr_generic_32bits), + ARM64_FTR_REG(SYS_ID_ISAR3_EL1, ftr_generic_32bits), + ARM64_FTR_REG(SYS_ID_ISAR4_EL1, ftr_generic_32bits), + ARM64_FTR_REG(SYS_ID_ISAR5_EL1, ftr_id_isar5), + ARM64_FTR_REG(SYS_ID_MMFR4_EL1, ftr_id_mmfr4), + + /* Op1 = 0, CRn = 0, CRm = 3 */ + ARM64_FTR_REG(SYS_MVFR0_EL1, ftr_generic_32bits), + ARM64_FTR_REG(SYS_MVFR1_EL1, ftr_generic_32bits), + ARM64_FTR_REG(SYS_MVFR2_EL1, ftr_mvfr2), + + /* Op1 = 0, CRn = 0, CRm = 4 */ + ARM64_FTR_REG(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0), + ARM64_FTR_REG(SYS_ID_AA64PFR1_EL1, ftr_aa64raz), + + /* Op1 = 0, CRn = 0, CRm = 5 */ + ARM64_FTR_REG(SYS_ID_AA64DFR0_EL1, ftr_id_aa64dfr0), + ARM64_FTR_REG(SYS_ID_AA64DFR1_EL1, ftr_generic), + + /* Op1 = 0, CRn = 0, CRm = 6 */ + ARM64_FTR_REG(SYS_ID_AA64ISAR0_EL1, ftr_id_aa64isar0), + ARM64_FTR_REG(SYS_ID_AA64ISAR1_EL1, ftr_aa64raz), + + /* Op1 = 0, CRn = 0, CRm = 7 */ + ARM64_FTR_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0), + ARM64_FTR_REG(SYS_ID_AA64MMFR1_EL1, ftr_id_aa64mmfr1), + + /* Op1 = 3, CRn = 0, CRm = 0 */ + ARM64_FTR_REG(SYS_CTR_EL0, ftr_ctr), + ARM64_FTR_REG(SYS_DCZID_EL0, ftr_dczid), + + /* Op1 = 3, CRn = 14, CRm = 0 */ + ARM64_FTR_REG(SYS_CNTFRQ_EL0, ftr_generic32), +}; + +static int search_cmp_ftr_reg(const void *id, const void *regp) +{ + return (int)(unsigned long)id - (int)((const struct arm64_ftr_reg *)regp)->sys_id; +} + +/* + * get_arm64_ftr_reg - Lookup a feature register entry using its + * sys_reg() encoding. With the array arm64_ftr_regs sorted in the + * ascending order of sys_id , we use binary search to find a matching + * entry. + * + * returns - Upon success, matching ftr_reg entry for id. + * - NULL on failure. It is upto the caller to decide + * the impact of a failure. + */ +static struct arm64_ftr_reg *get_arm64_ftr_reg(u32 sys_id) +{ + return bsearch((const void *)(unsigned long)sys_id, + arm64_ftr_regs, + ARRAY_SIZE(arm64_ftr_regs), + sizeof(arm64_ftr_regs[0]), + search_cmp_ftr_reg); +} + +static u64 arm64_ftr_set_value(struct arm64_ftr_bits *ftrp, s64 reg, s64 ftr_val) +{ + u64 mask = arm64_ftr_mask(ftrp); + + reg &= ~mask; + reg |= (ftr_val << ftrp->shift) & mask; + return reg; +} + +static s64 arm64_ftr_safe_value(struct arm64_ftr_bits *ftrp, s64 new, s64 cur) +{ + s64 ret = 0; + + switch (ftrp->type) { + case FTR_EXACT: + ret = ftrp->safe_val; + break; + case FTR_LOWER_SAFE: + ret = new < cur ? new : cur; + break; + case FTR_HIGHER_SAFE: + ret = new > cur ? new : cur; + break; + default: + BUG(); + } + + return ret; +} + +static int __init sort_cmp_ftr_regs(const void *a, const void *b) +{ + return ((const struct arm64_ftr_reg *)a)->sys_id - + ((const struct arm64_ftr_reg *)b)->sys_id; +} + +static void __init swap_ftr_regs(void *a, void *b, int size) +{ + struct arm64_ftr_reg tmp = *(struct arm64_ftr_reg *)a; + *(struct arm64_ftr_reg *)a = *(struct arm64_ftr_reg *)b; + *(struct arm64_ftr_reg *)b = tmp; +} + +static void __init sort_ftr_regs(void) +{ + /* Keep the array sorted so that we can do the binary search */ + sort(arm64_ftr_regs, + ARRAY_SIZE(arm64_ftr_regs), + sizeof(arm64_ftr_regs[0]), + sort_cmp_ftr_regs, + swap_ftr_regs); +} + +/* + * Initialise the CPU feature register from Boot CPU values. + * Also initiliases the strict_mask for the register. + */ +static void __init init_cpu_ftr_reg(u32 sys_reg, u64 new) +{ + u64 val = 0; + u64 strict_mask = ~0x0ULL; + struct arm64_ftr_bits *ftrp; + struct arm64_ftr_reg *reg = get_arm64_ftr_reg(sys_reg); + + BUG_ON(!reg); + + for (ftrp = reg->ftr_bits; ftrp->width; ftrp++) { + s64 ftr_new = arm64_ftr_value(ftrp, new); + + val = arm64_ftr_set_value(ftrp, val, ftr_new); + if (!ftrp->strict) + strict_mask &= ~arm64_ftr_mask(ftrp); + } + reg->sys_val = val; + reg->strict_mask = strict_mask; +} + +void __init init_cpu_features(struct cpuinfo_arm64 *info) +{ + /* Before we start using the tables, make sure it is sorted */ + sort_ftr_regs(); + + init_cpu_ftr_reg(SYS_CTR_EL0, info->reg_ctr); + init_cpu_ftr_reg(SYS_DCZID_EL0, info->reg_dczid); + init_cpu_ftr_reg(SYS_CNTFRQ_EL0, info->reg_cntfrq); + init_cpu_ftr_reg(SYS_ID_AA64DFR0_EL1, info->reg_id_aa64dfr0); + init_cpu_ftr_reg(SYS_ID_AA64DFR1_EL1, info->reg_id_aa64dfr1); + init_cpu_ftr_reg(SYS_ID_AA64ISAR0_EL1, info->reg_id_aa64isar0); + init_cpu_ftr_reg(SYS_ID_AA64ISAR1_EL1, info->reg_id_aa64isar1); + init_cpu_ftr_reg(SYS_ID_AA64MMFR0_EL1, info->reg_id_aa64mmfr0); + init_cpu_ftr_reg(SYS_ID_AA64MMFR1_EL1, info->reg_id_aa64mmfr1); + init_cpu_ftr_reg(SYS_ID_AA64PFR0_EL1, info->reg_id_aa64pfr0); + init_cpu_ftr_reg(SYS_ID_AA64PFR1_EL1, info->reg_id_aa64pfr1); + init_cpu_ftr_reg(SYS_ID_DFR0_EL1, info->reg_id_dfr0); + init_cpu_ftr_reg(SYS_ID_ISAR0_EL1, info->reg_id_isar0); + init_cpu_ftr_reg(SYS_ID_ISAR1_EL1, info->reg_id_isar1); + init_cpu_ftr_reg(SYS_ID_ISAR2_EL1, info->reg_id_isar2); + init_cpu_ftr_reg(SYS_ID_ISAR3_EL1, info->reg_id_isar3); + init_cpu_ftr_reg(SYS_ID_ISAR4_EL1, info->reg_id_isar4); + init_cpu_ftr_reg(SYS_ID_ISAR5_EL1, info->reg_id_isar5); + init_cpu_ftr_reg(SYS_ID_MMFR0_EL1, info->reg_id_mmfr0); + init_cpu_ftr_reg(SYS_ID_MMFR1_EL1, info->reg_id_mmfr1); + init_cpu_ftr_reg(SYS_ID_MMFR2_EL1, info->reg_id_mmfr2); + init_cpu_ftr_reg(SYS_ID_MMFR3_EL1, info->reg_id_mmfr3); + init_cpu_ftr_reg(SYS_ID_PFR0_EL1, info->reg_id_pfr0); + init_cpu_ftr_reg(SYS_ID_PFR1_EL1, info->reg_id_pfr1); + init_cpu_ftr_reg(SYS_MVFR0_EL1, info->reg_mvfr0); + init_cpu_ftr_reg(SYS_MVFR1_EL1, info->reg_mvfr1); + init_cpu_ftr_reg(SYS_MVFR2_EL1, info->reg_mvfr2); +} + +static void update_cpu_ftr_reg(struct arm64_ftr_reg *reg, u64 new) +{ + struct arm64_ftr_bits *ftrp; + + for (ftrp = reg->ftr_bits; ftrp->width; ftrp++) { + s64 ftr_cur = arm64_ftr_value(ftrp, reg->sys_val); + s64 ftr_new = arm64_ftr_value(ftrp, new); + + if (ftr_cur == ftr_new) + continue; + /* Find a safe value */ + ftr_new = arm64_ftr_safe_value(ftrp, ftr_new, ftr_cur); + reg->sys_val = arm64_ftr_set_value(ftrp, reg->sys_val, ftr_new); + } + +} + +static int check_update_ftr_reg(u32 sys_id, int cpu, u64 val, u64 boot) +{ + struct arm64_ftr_reg *regp = get_arm64_ftr_reg(sys_id); + + BUG_ON(!regp); + update_cpu_ftr_reg(regp, val); + if ((boot & regp->strict_mask) == (val & regp->strict_mask)) + return 0; + pr_warn("SANITY CHECK: Unexpected variation in %s. Boot CPU: %#016llx, CPU%d: %#016llx\n", + regp->name, boot, cpu, val); + return 1; +} + +/* + * Update system wide CPU feature registers with the values from a + * non-boot CPU. Also performs SANITY checks to make sure that there + * aren't any insane variations from that of the boot CPU. + */ +void update_cpu_features(int cpu, + struct cpuinfo_arm64 *info, + struct cpuinfo_arm64 *boot) +{ + int taint = 0; + + /* + * The kernel can handle differing I-cache policies, but otherwise + * caches should look identical. Userspace JITs will make use of + * *minLine. + */ + taint |= check_update_ftr_reg(SYS_CTR_EL0, cpu, + info->reg_ctr, boot->reg_ctr); + + /* + * Userspace may perform DC ZVA instructions. Mismatched block sizes + * could result in too much or too little memory being zeroed if a + * process is preempted and migrated between CPUs. + */ + taint |= check_update_ftr_reg(SYS_DCZID_EL0, cpu, + info->reg_dczid, boot->reg_dczid); + + /* If different, timekeeping will be broken (especially with KVM) */ + taint |= check_update_ftr_reg(SYS_CNTFRQ_EL0, cpu, + info->reg_cntfrq, boot->reg_cntfrq); + + /* + * The kernel uses self-hosted debug features and expects CPUs to + * support identical debug features. We presently need CTX_CMPs, WRPs, + * and BRPs to be identical. + * ID_AA64DFR1 is currently RES0. + */ + taint |= check_update_ftr_reg(SYS_ID_AA64DFR0_EL1, cpu, + info->reg_id_aa64dfr0, boot->reg_id_aa64dfr0); + taint |= check_update_ftr_reg(SYS_ID_AA64DFR1_EL1, cpu, + info->reg_id_aa64dfr1, boot->reg_id_aa64dfr1); + /* + * Even in big.LITTLE, processors should be identical instruction-set + * wise. + */ + taint |= check_update_ftr_reg(SYS_ID_AA64ISAR0_EL1, cpu, + info->reg_id_aa64isar0, boot->reg_id_aa64isar0); + taint |= check_update_ftr_reg(SYS_ID_AA64ISAR1_EL1, cpu, + info->reg_id_aa64isar1, boot->reg_id_aa64isar1); + + /* + * Differing PARange support is fine as long as all peripherals and + * memory are mapped within the minimum PARange of all CPUs. + * Linux should not care about secure memory. + */ + taint |= check_update_ftr_reg(SYS_ID_AA64MMFR0_EL1, cpu, + info->reg_id_aa64mmfr0, boot->reg_id_aa64mmfr0); + taint |= check_update_ftr_reg(SYS_ID_AA64MMFR1_EL1, cpu, + info->reg_id_aa64mmfr1, boot->reg_id_aa64mmfr1); + + /* + * EL3 is not our concern. + * ID_AA64PFR1 is currently RES0. + */ + taint |= check_update_ftr_reg(SYS_ID_AA64PFR0_EL1, cpu, + info->reg_id_aa64pfr0, boot->reg_id_aa64pfr0); + taint |= check_update_ftr_reg(SYS_ID_AA64PFR1_EL1, cpu, + info->reg_id_aa64pfr1, boot->reg_id_aa64pfr1); + + /* + * If we have AArch32, we care about 32-bit features for compat. These + * registers should be RES0 otherwise. + */ + taint |= check_update_ftr_reg(SYS_ID_DFR0_EL1, cpu, + info->reg_id_dfr0, boot->reg_id_dfr0); + taint |= check_update_ftr_reg(SYS_ID_ISAR0_EL1, cpu, + info->reg_id_isar0, boot->reg_id_isar0); + taint |= check_update_ftr_reg(SYS_ID_ISAR1_EL1, cpu, + info->reg_id_isar1, boot->reg_id_isar1); + taint |= check_update_ftr_reg(SYS_ID_ISAR2_EL1, cpu, + info->reg_id_isar2, boot->reg_id_isar2); + taint |= check_update_ftr_reg(SYS_ID_ISAR3_EL1, cpu, + info->reg_id_isar3, boot->reg_id_isar3); + taint |= check_update_ftr_reg(SYS_ID_ISAR4_EL1, cpu, + info->reg_id_isar4, boot->reg_id_isar4); + taint |= check_update_ftr_reg(SYS_ID_ISAR5_EL1, cpu, + info->reg_id_isar5, boot->reg_id_isar5); + + /* + * Regardless of the value of the AuxReg field, the AIFSR, ADFSR, and + * ACTLR formats could differ across CPUs and therefore would have to + * be trapped for virtualization anyway. + */ + taint |= check_update_ftr_reg(SYS_ID_MMFR0_EL1, cpu, + info->reg_id_mmfr0, boot->reg_id_mmfr0); + taint |= check_update_ftr_reg(SYS_ID_MMFR1_EL1, cpu, + info->reg_id_mmfr1, boot->reg_id_mmfr1); + taint |= check_update_ftr_reg(SYS_ID_MMFR2_EL1, cpu, + info->reg_id_mmfr2, boot->reg_id_mmfr2); + taint |= check_update_ftr_reg(SYS_ID_MMFR3_EL1, cpu, + info->reg_id_mmfr3, boot->reg_id_mmfr3); + taint |= check_update_ftr_reg(SYS_ID_PFR0_EL1, cpu, + info->reg_id_pfr0, boot->reg_id_pfr0); + taint |= check_update_ftr_reg(SYS_ID_PFR1_EL1, cpu, + info->reg_id_pfr1, boot->reg_id_pfr1); + taint |= check_update_ftr_reg(SYS_MVFR0_EL1, cpu, + info->reg_mvfr0, boot->reg_mvfr0); + taint |= check_update_ftr_reg(SYS_MVFR1_EL1, cpu, + info->reg_mvfr1, boot->reg_mvfr1); + taint |= check_update_ftr_reg(SYS_MVFR2_EL1, cpu, + info->reg_mvfr2, boot->reg_mvfr2); + + /* + * Mismatched CPU features are a recipe for disaster. Don't even + * pretend to support them. + */ + WARN_TAINT_ONCE(taint, TAINT_CPU_OUT_OF_SPEC, + "Unsupported CPU feature variation.\n"); +} + +u64 read_system_reg(u32 id) +{ + struct arm64_ftr_reg *regp = get_arm64_ftr_reg(id); + + /* We shouldn't get a request for an unsupported register */ + BUG_ON(!regp); + return regp->sys_val; +} + +#include <linux/irqchip/arm-gic-v3.h> static bool feature_matches(u64 reg, const struct arm64_cpu_capabilities *entry) @@ -31,34 +588,46 @@ feature_matches(u64 reg, const struct arm64_cpu_capabilities *entry) return val >= entry->min_field_value; } -#define __ID_FEAT_CHK(reg) \ -static bool __maybe_unused \ -has_##reg##_feature(const struct arm64_cpu_capabilities *entry) \ -{ \ - u64 val; \ - \ - val = read_cpuid(reg##_el1); \ - return feature_matches(val, entry); \ +static bool +has_cpuid_feature(const struct arm64_cpu_capabilities *entry) +{ + u64 val; + + val = read_system_reg(entry->sys_reg); + return feature_matches(val, entry); } -__ID_FEAT_CHK(id_aa64pfr0); -__ID_FEAT_CHK(id_aa64mmfr1); -__ID_FEAT_CHK(id_aa64isar0); +static bool has_useable_gicv3_cpuif(const struct arm64_cpu_capabilities *entry) +{ + bool has_sre; + + if (!has_cpuid_feature(entry)) + return false; + + has_sre = gic_enable_sre(); + if (!has_sre) + pr_warn_once("%s present but disabled by higher exception level\n", + entry->desc); + + return has_sre; +} static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "GIC system register CPU interface", .capability = ARM64_HAS_SYSREG_GIC_CPUIF, - .matches = has_id_aa64pfr0_feature, - .field_pos = 24, + .matches = has_useable_gicv3_cpuif, + .sys_reg = SYS_ID_AA64PFR0_EL1, + .field_pos = ID_AA64PFR0_GIC_SHIFT, .min_field_value = 1, }, #ifdef CONFIG_ARM64_PAN { .desc = "Privileged Access Never", .capability = ARM64_HAS_PAN, - .matches = has_id_aa64mmfr1_feature, - .field_pos = 20, + .matches = has_cpuid_feature, + .sys_reg = SYS_ID_AA64MMFR1_EL1, + .field_pos = ID_AA64MMFR1_PAN_SHIFT, .min_field_value = 1, .enable = cpu_enable_pan, }, @@ -67,15 +636,101 @@ static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "LSE atomic instructions", .capability = ARM64_HAS_LSE_ATOMICS, - .matches = has_id_aa64isar0_feature, - .field_pos = 20, + .matches = has_cpuid_feature, + .sys_reg = SYS_ID_AA64ISAR0_EL1, + .field_pos = ID_AA64ISAR0_ATOMICS_SHIFT, .min_field_value = 2, }, #endif /* CONFIG_AS_LSE && CONFIG_ARM64_LSE_ATOMICS */ {}, }; -void check_cpu_capabilities(const struct arm64_cpu_capabilities *caps, +#define HWCAP_CAP(reg, field, min_value, type, cap) \ + { \ + .desc = #cap, \ + .matches = has_cpuid_feature, \ + .sys_reg = reg, \ + .field_pos = field, \ + .min_field_value = min_value, \ + .hwcap_type = type, \ + .hwcap = cap, \ + } + +static const struct arm64_cpu_capabilities arm64_hwcaps[] = { + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, 2, CAP_HWCAP, HWCAP_PMULL), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, 1, CAP_HWCAP, HWCAP_AES), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA1_SHIFT, 1, CAP_HWCAP, HWCAP_SHA1), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, 1, CAP_HWCAP, HWCAP_SHA2), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_CRC32_SHIFT, 1, CAP_HWCAP, HWCAP_CRC32), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_ATOMICS_SHIFT, 2, CAP_HWCAP, HWCAP_ATOMICS), + HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, 0, CAP_HWCAP, HWCAP_FP), + HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, 0, CAP_HWCAP, HWCAP_ASIMD), +#ifdef CONFIG_COMPAT + HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, 2, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_PMULL), + HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_AES), + HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA1_SHIFT, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA1), + HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA2_SHIFT, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA2), + HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_CRC32_SHIFT, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_CRC32), +#endif + {}, +}; + +static void cap_set_hwcap(const struct arm64_cpu_capabilities *cap) +{ + switch (cap->hwcap_type) { + case CAP_HWCAP: + elf_hwcap |= cap->hwcap; + break; +#ifdef CONFIG_COMPAT + case CAP_COMPAT_HWCAP: + compat_elf_hwcap |= (u32)cap->hwcap; + break; + case CAP_COMPAT_HWCAP2: + compat_elf_hwcap2 |= (u32)cap->hwcap; + break; +#endif + default: + WARN_ON(1); + break; + } +} + +/* Check if we have a particular HWCAP enabled */ +static bool cpus_have_hwcap(const struct arm64_cpu_capabilities *cap) +{ + bool rc; + + switch (cap->hwcap_type) { + case CAP_HWCAP: + rc = (elf_hwcap & cap->hwcap) != 0; + break; +#ifdef CONFIG_COMPAT + case CAP_COMPAT_HWCAP: + rc = (compat_elf_hwcap & (u32)cap->hwcap) != 0; + break; + case CAP_COMPAT_HWCAP2: + rc = (compat_elf_hwcap2 & (u32)cap->hwcap) != 0; + break; +#endif + default: + WARN_ON(1); + rc = false; + } + + return rc; +} + +static void setup_cpu_hwcaps(void) +{ + int i; + const struct arm64_cpu_capabilities *hwcaps = arm64_hwcaps; + + for (i = 0; hwcaps[i].desc; i++) + if (hwcaps[i].matches(&hwcaps[i])) + cap_set_hwcap(&hwcaps[i]); +} + +void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, const char *info) { int i; @@ -88,15 +743,178 @@ void check_cpu_capabilities(const struct arm64_cpu_capabilities *caps, pr_info("%s %s\n", info, caps[i].desc); cpus_set_cap(caps[i].capability); } +} + +/* + * Run through the enabled capabilities and enable() it on all active + * CPUs + */ +static void enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps) +{ + int i; + + for (i = 0; caps[i].desc; i++) + if (caps[i].enable && cpus_have_cap(caps[i].capability)) + on_each_cpu(caps[i].enable, NULL, true); +} + +#ifdef CONFIG_HOTPLUG_CPU + +/* + * Flag to indicate if we have computed the system wide + * capabilities based on the boot time active CPUs. This + * will be used to determine if a new booting CPU should + * go through the verification process to make sure that it + * supports the system capabilities, without using a hotplug + * notifier. + */ +static bool sys_caps_initialised; + +static inline void set_sys_caps_initialised(void) +{ + sys_caps_initialised = true; +} + +/* + * __raw_read_system_reg() - Used by a STARTING cpu before cpuinfo is populated. + */ +static u64 __raw_read_system_reg(u32 sys_id) +{ + switch (sys_id) { + case SYS_ID_PFR0_EL1: return (u64)read_cpuid(ID_PFR0_EL1); + case SYS_ID_PFR1_EL1: return (u64)read_cpuid(ID_PFR1_EL1); + case SYS_ID_DFR0_EL1: return (u64)read_cpuid(ID_DFR0_EL1); + case SYS_ID_MMFR0_EL1: return (u64)read_cpuid(ID_MMFR0_EL1); + case SYS_ID_MMFR1_EL1: return (u64)read_cpuid(ID_MMFR1_EL1); + case SYS_ID_MMFR2_EL1: return (u64)read_cpuid(ID_MMFR2_EL1); + case SYS_ID_MMFR3_EL1: return (u64)read_cpuid(ID_MMFR3_EL1); + case SYS_ID_ISAR0_EL1: return (u64)read_cpuid(ID_ISAR0_EL1); + case SYS_ID_ISAR1_EL1: return (u64)read_cpuid(ID_ISAR1_EL1); + case SYS_ID_ISAR2_EL1: return (u64)read_cpuid(ID_ISAR2_EL1); + case SYS_ID_ISAR3_EL1: return (u64)read_cpuid(ID_ISAR3_EL1); + case SYS_ID_ISAR4_EL1: return (u64)read_cpuid(ID_ISAR4_EL1); + case SYS_ID_ISAR5_EL1: return (u64)read_cpuid(ID_ISAR4_EL1); + case SYS_MVFR0_EL1: return (u64)read_cpuid(MVFR0_EL1); + case SYS_MVFR1_EL1: return (u64)read_cpuid(MVFR1_EL1); + case SYS_MVFR2_EL1: return (u64)read_cpuid(MVFR2_EL1); + + case SYS_ID_AA64PFR0_EL1: return (u64)read_cpuid(ID_AA64PFR0_EL1); + case SYS_ID_AA64PFR1_EL1: return (u64)read_cpuid(ID_AA64PFR0_EL1); + case SYS_ID_AA64DFR0_EL1: return (u64)read_cpuid(ID_AA64DFR0_EL1); + case SYS_ID_AA64DFR1_EL1: return (u64)read_cpuid(ID_AA64DFR0_EL1); + case SYS_ID_AA64MMFR0_EL1: return (u64)read_cpuid(ID_AA64MMFR0_EL1); + case SYS_ID_AA64MMFR1_EL1: return (u64)read_cpuid(ID_AA64MMFR1_EL1); + case SYS_ID_AA64ISAR0_EL1: return (u64)read_cpuid(ID_AA64ISAR0_EL1); + case SYS_ID_AA64ISAR1_EL1: return (u64)read_cpuid(ID_AA64ISAR1_EL1); - /* second pass allows enable() to consider interacting capabilities */ + case SYS_CNTFRQ_EL0: return (u64)read_cpuid(CNTFRQ_EL0); + case SYS_CTR_EL0: return (u64)read_cpuid(CTR_EL0); + case SYS_DCZID_EL0: return (u64)read_cpuid(DCZID_EL0); + default: + BUG(); + return 0; + } +} + +/* + * Park the CPU which doesn't have the capability as advertised + * by the system. + */ +static void fail_incapable_cpu(char *cap_type, + const struct arm64_cpu_capabilities *cap) +{ + int cpu = smp_processor_id(); + + pr_crit("CPU%d: missing %s : %s\n", cpu, cap_type, cap->desc); + /* Mark this CPU absent */ + set_cpu_present(cpu, 0); + + /* Check if we can park ourselves */ + if (cpu_ops[cpu] && cpu_ops[cpu]->cpu_die) + cpu_ops[cpu]->cpu_die(cpu); + asm( + "1: wfe\n" + " wfi\n" + " b 1b"); +} + +/* + * Run through the enabled system capabilities and enable() it on this CPU. + * The capabilities were decided based on the available CPUs at the boot time. + * Any new CPU should match the system wide status of the capability. If the + * new CPU doesn't have a capability which the system now has enabled, we + * cannot do anything to fix it up and could cause unexpected failures. So + * we park the CPU. + */ +void verify_local_cpu_capabilities(void) +{ + int i; + const struct arm64_cpu_capabilities *caps; + + /* + * If we haven't computed the system capabilities, there is nothing + * to verify. + */ + if (!sys_caps_initialised) + return; + + caps = arm64_features; for (i = 0; caps[i].desc; i++) { - if (cpus_have_cap(caps[i].capability) && caps[i].enable) - caps[i].enable(); + if (!cpus_have_cap(caps[i].capability) || !caps[i].sys_reg) + continue; + /* + * If the new CPU misses an advertised feature, we cannot proceed + * further, park the cpu. + */ + if (!feature_matches(__raw_read_system_reg(caps[i].sys_reg), &caps[i])) + fail_incapable_cpu("arm64_features", &caps[i]); + if (caps[i].enable) + caps[i].enable(NULL); } + + for (i = 0, caps = arm64_hwcaps; caps[i].desc; i++) { + if (!cpus_have_hwcap(&caps[i])) + continue; + if (!feature_matches(__raw_read_system_reg(caps[i].sys_reg), &caps[i])) + fail_incapable_cpu("arm64_hwcaps", &caps[i]); + } +} + +#else /* !CONFIG_HOTPLUG_CPU */ + +static inline void set_sys_caps_initialised(void) +{ +} + +#endif /* CONFIG_HOTPLUG_CPU */ + +static void setup_feature_capabilities(void) +{ + update_cpu_capabilities(arm64_features, "detected feature:"); + enable_cpu_capabilities(arm64_features); } -void check_local_cpu_features(void) +void __init setup_cpu_features(void) { - check_cpu_capabilities(arm64_features, "detected feature:"); + u32 cwg; + int cls; + + /* Set the CPU feature capabilies */ + setup_feature_capabilities(); + setup_cpu_hwcaps(); + + /* Advertise that we have computed the system capabilities */ + set_sys_caps_initialised(); + + /* + * Check for sane CTR_EL0.CWG value. + */ + cwg = cache_type_cwg(); + cls = cache_line_size(); + if (!cwg) + pr_warn("No Cache Writeback Granule information, assuming cache line size %d\n", + cls); + if (L1_CACHE_BYTES < cls) + pr_warn("L1_CACHE_BYTES smaller than the Cache Writeback Granule (%d < %d)\n", + L1_CACHE_BYTES, cls); } diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 75d5a867e7fb..706679d0a0b4 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -24,8 +24,11 @@ #include <linux/bug.h> #include <linux/init.h> #include <linux/kernel.h> +#include <linux/personality.h> #include <linux/preempt.h> #include <linux/printk.h> +#include <linux/seq_file.h> +#include <linux/sched.h> #include <linux/smp.h> /* @@ -35,7 +38,6 @@ */ DEFINE_PER_CPU(struct cpuinfo_arm64, cpu_data); static struct cpuinfo_arm64 boot_cpu_data; -static bool mixed_endian_el0 = true; static char *icache_policy_str[] = { [ICACHE_POLICY_RESERVED] = "RESERVED/UNKNOWN", @@ -46,157 +48,148 @@ static char *icache_policy_str[] = { unsigned long __icache_flags; -static void cpuinfo_detect_icache_policy(struct cpuinfo_arm64 *info) +static const char *const hwcap_str[] = { + "fp", + "asimd", + "evtstrm", + "aes", + "pmull", + "sha1", + "sha2", + "crc32", + "atomics", + NULL +}; + +#ifdef CONFIG_COMPAT +static const char *const compat_hwcap_str[] = { + "swp", + "half", + "thumb", + "26bit", + "fastmult", + "fpa", + "vfp", + "edsp", + "java", + "iwmmxt", + "crunch", + "thumbee", + "neon", + "vfpv3", + "vfpv3d16", + "tls", + "vfpv4", + "idiva", + "idivt", + "vfpd32", + "lpae", + "evtstrm" +}; + +static const char *const compat_hwcap2_str[] = { + "aes", + "pmull", + "sha1", + "sha2", + "crc32", + NULL +}; +#endif /* CONFIG_COMPAT */ + +static int c_show(struct seq_file *m, void *v) { - unsigned int cpu = smp_processor_id(); - u32 l1ip = CTR_L1IP(info->reg_ctr); + int i, j; + + for_each_online_cpu(i) { + struct cpuinfo_arm64 *cpuinfo = &per_cpu(cpu_data, i); + u32 midr = cpuinfo->reg_midr; - if (l1ip != ICACHE_POLICY_PIPT) { /* - * VIPT caches are non-aliasing if the VA always equals the PA - * in all bit positions that are covered by the index. This is - * the case if the size of a way (# of sets * line size) does - * not exceed PAGE_SIZE. + * glibc reads /proc/cpuinfo to determine the number of + * online processors, looking for lines beginning with + * "processor". Give glibc what it expects. */ - u32 waysize = icache_get_numsets() * icache_get_linesize(); + seq_printf(m, "processor\t: %d\n", i); - if (l1ip != ICACHE_POLICY_VIPT || waysize > PAGE_SIZE) - set_bit(ICACHEF_ALIASING, &__icache_flags); + /* + * Dump out the common processor features in a single line. + * Userspace should read the hwcaps with getauxval(AT_HWCAP) + * rather than attempting to parse this, but there's a body of + * software which does already (at least for 32-bit). + */ + seq_puts(m, "Features\t:"); + if (personality(current->personality) == PER_LINUX32) { +#ifdef CONFIG_COMPAT + for (j = 0; compat_hwcap_str[j]; j++) + if (compat_elf_hwcap & (1 << j)) + seq_printf(m, " %s", compat_hwcap_str[j]); + + for (j = 0; compat_hwcap2_str[j]; j++) + if (compat_elf_hwcap2 & (1 << j)) + seq_printf(m, " %s", compat_hwcap2_str[j]); +#endif /* CONFIG_COMPAT */ + } else { + for (j = 0; hwcap_str[j]; j++) + if (elf_hwcap & (1 << j)) + seq_printf(m, " %s", hwcap_str[j]); + } + seq_puts(m, "\n"); + + seq_printf(m, "CPU implementer\t: 0x%02x\n", + MIDR_IMPLEMENTOR(midr)); + seq_printf(m, "CPU architecture: 8\n"); + seq_printf(m, "CPU variant\t: 0x%x\n", MIDR_VARIANT(midr)); + seq_printf(m, "CPU part\t: 0x%03x\n", MIDR_PARTNUM(midr)); + seq_printf(m, "CPU revision\t: %d\n\n", MIDR_REVISION(midr)); } - if (l1ip == ICACHE_POLICY_AIVIVT) - set_bit(ICACHEF_AIVIVT, &__icache_flags); - pr_info("Detected %s I-cache on CPU%d\n", icache_policy_str[l1ip], cpu); -} - -bool cpu_supports_mixed_endian_el0(void) -{ - return id_aa64mmfr0_mixed_endian_el0(read_cpuid(ID_AA64MMFR0_EL1)); -} - -bool system_supports_mixed_endian_el0(void) -{ - return mixed_endian_el0; + return 0; } -static void update_mixed_endian_el0_support(struct cpuinfo_arm64 *info) +static void *c_start(struct seq_file *m, loff_t *pos) { - mixed_endian_el0 &= id_aa64mmfr0_mixed_endian_el0(info->reg_id_aa64mmfr0); + return *pos < 1 ? (void *)1 : NULL; } -static void update_cpu_features(struct cpuinfo_arm64 *info) +static void *c_next(struct seq_file *m, void *v, loff_t *pos) { - update_mixed_endian_el0_support(info); + ++*pos; + return NULL; } -static int check_reg_mask(char *name, u64 mask, u64 boot, u64 cur, int cpu) +static void c_stop(struct seq_file *m, void *v) { - if ((boot & mask) == (cur & mask)) - return 0; - - pr_warn("SANITY CHECK: Unexpected variation in %s. Boot CPU: %#016lx, CPU%d: %#016lx\n", - name, (unsigned long)boot, cpu, (unsigned long)cur); - - return 1; } -#define CHECK_MASK(field, mask, boot, cur, cpu) \ - check_reg_mask(#field, mask, (boot)->reg_ ## field, (cur)->reg_ ## field, cpu) - -#define CHECK(field, boot, cur, cpu) \ - CHECK_MASK(field, ~0ULL, boot, cur, cpu) +const struct seq_operations cpuinfo_op = { + .start = c_start, + .next = c_next, + .stop = c_stop, + .show = c_show +}; -/* - * Verify that CPUs don't have unexpected differences that will cause problems. - */ -static void cpuinfo_sanity_check(struct cpuinfo_arm64 *cur) +static void cpuinfo_detect_icache_policy(struct cpuinfo_arm64 *info) { unsigned int cpu = smp_processor_id(); - struct cpuinfo_arm64 *boot = &boot_cpu_data; - unsigned int diff = 0; - - /* - * The kernel can handle differing I-cache policies, but otherwise - * caches should look identical. Userspace JITs will make use of - * *minLine. - */ - diff |= CHECK_MASK(ctr, 0xffff3fff, boot, cur, cpu); - - /* - * Userspace may perform DC ZVA instructions. Mismatched block sizes - * could result in too much or too little memory being zeroed if a - * process is preempted and migrated between CPUs. - */ - diff |= CHECK(dczid, boot, cur, cpu); - - /* If different, timekeeping will be broken (especially with KVM) */ - diff |= CHECK(cntfrq, boot, cur, cpu); - - /* - * The kernel uses self-hosted debug features and expects CPUs to - * support identical debug features. We presently need CTX_CMPs, WRPs, - * and BRPs to be identical. - * ID_AA64DFR1 is currently RES0. - */ - diff |= CHECK(id_aa64dfr0, boot, cur, cpu); - diff |= CHECK(id_aa64dfr1, boot, cur, cpu); - - /* - * Even in big.LITTLE, processors should be identical instruction-set - * wise. - */ - diff |= CHECK(id_aa64isar0, boot, cur, cpu); - diff |= CHECK(id_aa64isar1, boot, cur, cpu); - - /* - * Differing PARange support is fine as long as all peripherals and - * memory are mapped within the minimum PARange of all CPUs. - * Linux should not care about secure memory. - * ID_AA64MMFR1 is currently RES0. - */ - diff |= CHECK_MASK(id_aa64mmfr0, 0xffffffffffff0ff0, boot, cur, cpu); - diff |= CHECK(id_aa64mmfr1, boot, cur, cpu); - - /* - * EL3 is not our concern. - * ID_AA64PFR1 is currently RES0. - */ - diff |= CHECK_MASK(id_aa64pfr0, 0xffffffffffff0fff, boot, cur, cpu); - diff |= CHECK(id_aa64pfr1, boot, cur, cpu); + u32 l1ip = CTR_L1IP(info->reg_ctr); - /* - * If we have AArch32, we care about 32-bit features for compat. These - * registers should be RES0 otherwise. - */ - diff |= CHECK(id_dfr0, boot, cur, cpu); - diff |= CHECK(id_isar0, boot, cur, cpu); - diff |= CHECK(id_isar1, boot, cur, cpu); - diff |= CHECK(id_isar2, boot, cur, cpu); - diff |= CHECK(id_isar3, boot, cur, cpu); - diff |= CHECK(id_isar4, boot, cur, cpu); - diff |= CHECK(id_isar5, boot, cur, cpu); - /* - * Regardless of the value of the AuxReg field, the AIFSR, ADFSR, and - * ACTLR formats could differ across CPUs and therefore would have to - * be trapped for virtualization anyway. - */ - diff |= CHECK_MASK(id_mmfr0, 0xff0fffff, boot, cur, cpu); - diff |= CHECK(id_mmfr1, boot, cur, cpu); - diff |= CHECK(id_mmfr2, boot, cur, cpu); - diff |= CHECK(id_mmfr3, boot, cur, cpu); - diff |= CHECK(id_pfr0, boot, cur, cpu); - diff |= CHECK(id_pfr1, boot, cur, cpu); + if (l1ip != ICACHE_POLICY_PIPT) { + /* + * VIPT caches are non-aliasing if the VA always equals the PA + * in all bit positions that are covered by the index. This is + * the case if the size of a way (# of sets * line size) does + * not exceed PAGE_SIZE. + */ + u32 waysize = icache_get_numsets() * icache_get_linesize(); - diff |= CHECK(mvfr0, boot, cur, cpu); - diff |= CHECK(mvfr1, boot, cur, cpu); - diff |= CHECK(mvfr2, boot, cur, cpu); + if (l1ip != ICACHE_POLICY_VIPT || waysize > PAGE_SIZE) + set_bit(ICACHEF_ALIASING, &__icache_flags); + } + if (l1ip == ICACHE_POLICY_AIVIVT) + set_bit(ICACHEF_AIVIVT, &__icache_flags); - /* - * Mismatched CPU features are a recipe for disaster. Don't even - * pretend to support them. - */ - WARN_TAINT_ONCE(diff, TAINT_CPU_OUT_OF_SPEC, - "Unsupported CPU feature variation.\n"); + pr_info("Detected %s I-cache on CPU%d\n", icache_policy_str[l1ip], cpu); } static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) @@ -236,15 +229,13 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) cpuinfo_detect_icache_policy(info); check_local_cpu_errata(); - check_local_cpu_features(); - update_cpu_features(info); } void cpuinfo_store_cpu(void) { struct cpuinfo_arm64 *info = this_cpu_ptr(&cpu_data); __cpuinfo_store_cpu(info); - cpuinfo_sanity_check(info); + update_cpu_features(smp_processor_id(), info, &boot_cpu_data); } void __init cpuinfo_store_boot_cpu(void) @@ -253,4 +244,5 @@ void __init cpuinfo_store_boot_cpu(void) __cpuinfo_store_cpu(info); boot_cpu_data = *info; + init_cpu_features(&boot_cpu_data); } diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index cebf78661a55..8aee3aeec3e6 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -26,14 +26,16 @@ #include <linux/stat.h> #include <linux/uaccess.h> -#include <asm/debug-monitors.h> +#include <asm/cpufeature.h> #include <asm/cputype.h> +#include <asm/debug-monitors.h> #include <asm/system_misc.h> /* Determine debug architecture. */ u8 debug_monitors_arch(void) { - return read_cpuid(ID_AA64DFR0_EL1) & 0xf; + return cpuid_feature_extract_field(read_system_reg(SYS_ID_AA64DFR0_EL1), + ID_AA64DFR0_DEBUGVER_SHIFT); } /* @@ -58,7 +60,7 @@ static u32 mdscr_read(void) * Allow root to disable self-hosted debug from userspace. * This is useful if you want to connect an external JTAG debugger. */ -static u32 debug_enabled = 1; +static bool debug_enabled = true; static int create_debug_debugfs_entry(void) { @@ -69,7 +71,7 @@ fs_initcall(create_debug_debugfs_entry); static int __init early_debug_disable(char *buf) { - debug_enabled = 0; + debug_enabled = false; return 0; } @@ -201,7 +203,7 @@ void unregister_step_hook(struct step_hook *hook) } /* - * Call registered single step handers + * Call registered single step handlers * There is no Syndrome info to check for determining the handler. * So we call all the registered handlers, until the right handler is * found which returns zero. @@ -271,20 +273,21 @@ static int single_step_handler(unsigned long addr, unsigned int esr, * Use reader/writer locks instead of plain spinlock. */ static LIST_HEAD(break_hook); -static DEFINE_RWLOCK(break_hook_lock); +static DEFINE_SPINLOCK(break_hook_lock); void register_break_hook(struct break_hook *hook) { - write_lock(&break_hook_lock); - list_add(&hook->node, &break_hook); - write_unlock(&break_hook_lock); + spin_lock(&break_hook_lock); + list_add_rcu(&hook->node, &break_hook); + spin_unlock(&break_hook_lock); } void unregister_break_hook(struct break_hook *hook) { - write_lock(&break_hook_lock); - list_del(&hook->node); - write_unlock(&break_hook_lock); + spin_lock(&break_hook_lock); + list_del_rcu(&hook->node); + spin_unlock(&break_hook_lock); + synchronize_rcu(); } static int call_break_hook(struct pt_regs *regs, unsigned int esr) @@ -292,11 +295,11 @@ static int call_break_hook(struct pt_regs *regs, unsigned int esr) struct break_hook *hook; int (*fn)(struct pt_regs *regs, unsigned int esr) = NULL; - read_lock(&break_hook_lock); - list_for_each_entry(hook, &break_hook, node) + rcu_read_lock(); + list_for_each_entry_rcu(hook, &break_hook, node) if ((esr & hook->esr_mask) == hook->esr_val) fn = hook->fn; - read_unlock(&break_hook_lock); + rcu_read_unlock(); return fn ? fn(regs, esr) : DBG_HOOK_ERROR; } diff --git a/arch/arm64/kernel/efi-entry.S b/arch/arm64/kernel/efi-entry.S index 8ce9b0577442..a773db92908b 100644 --- a/arch/arm64/kernel/efi-entry.S +++ b/arch/arm64/kernel/efi-entry.S @@ -29,7 +29,7 @@ * we want to be. The kernel image wants to be placed at TEXT_OFFSET * from start of RAM. */ -ENTRY(efi_stub_entry) +ENTRY(entry) /* * Create a stack frame to save FP/LR with extra space * for image_addr variable passed to efi_entry(). @@ -86,8 +86,8 @@ ENTRY(efi_stub_entry) * entries for the VA range of the current image, so no maintenance is * necessary. */ - adr x0, efi_stub_entry - adr x1, efi_stub_entry_end + adr x0, entry + adr x1, entry_end sub x1, x1, x0 bl __flush_dcache_area @@ -120,5 +120,5 @@ efi_load_fail: ldp x29, x30, [sp], #32 ret -efi_stub_entry_end: -ENDPROC(efi_stub_entry) +entry_end: +ENDPROC(entry) diff --git a/arch/arm64/kernel/efi-stub.c b/arch/arm64/kernel/efi-stub.c deleted file mode 100644 index 816120ece6bc..000000000000 --- a/arch/arm64/kernel/efi-stub.c +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright (C) 2013, 2014 Linaro Ltd; <roy.franz@linaro.org> - * - * This file implements the EFI boot stub for the arm64 kernel. - * Adapted from ARM version by Mark Salter <msalter@redhat.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - */ -#include <linux/efi.h> -#include <asm/efi.h> -#include <asm/sections.h> - -efi_status_t __init handle_kernel_image(efi_system_table_t *sys_table_arg, - unsigned long *image_addr, - unsigned long *image_size, - unsigned long *reserve_addr, - unsigned long *reserve_size, - unsigned long dram_base, - efi_loaded_image_t *image) -{ - efi_status_t status; - unsigned long kernel_size, kernel_memsize = 0; - unsigned long nr_pages; - void *old_image_addr = (void *)*image_addr; - - /* Relocate the image, if required. */ - kernel_size = _edata - _text; - if (*image_addr != (dram_base + TEXT_OFFSET)) { - kernel_memsize = kernel_size + (_end - _edata); - - /* - * First, try a straight allocation at the preferred offset. - * This will work around the issue where, if dram_base == 0x0, - * efi_low_alloc() refuses to allocate at 0x0 (to prevent the - * address of the allocation to be mistaken for a FAIL return - * value or a NULL pointer). It will also ensure that, on - * platforms where the [dram_base, dram_base + TEXT_OFFSET) - * interval is partially occupied by the firmware (like on APM - * Mustang), we can still place the kernel at the address - * 'dram_base + TEXT_OFFSET'. - */ - *image_addr = *reserve_addr = dram_base + TEXT_OFFSET; - nr_pages = round_up(kernel_memsize, EFI_ALLOC_ALIGN) / - EFI_PAGE_SIZE; - status = efi_call_early(allocate_pages, EFI_ALLOCATE_ADDRESS, - EFI_LOADER_DATA, nr_pages, - (efi_physical_addr_t *)reserve_addr); - if (status != EFI_SUCCESS) { - kernel_memsize += TEXT_OFFSET; - status = efi_low_alloc(sys_table_arg, kernel_memsize, - SZ_2M, reserve_addr); - - if (status != EFI_SUCCESS) { - pr_efi_err(sys_table_arg, "Failed to relocate kernel\n"); - return status; - } - *image_addr = *reserve_addr + TEXT_OFFSET; - } - memcpy((void *)*image_addr, old_image_addr, kernel_size); - *reserve_size = kernel_memsize; - } - - - return EFI_SUCCESS; -} diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index e8ca6eaedd02..de46b50f4cdf 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -48,18 +48,8 @@ static struct mm_struct efi_mm = { .mmap_sem = __RWSEM_INITIALIZER(efi_mm.mmap_sem), .page_table_lock = __SPIN_LOCK_UNLOCKED(efi_mm.page_table_lock), .mmlist = LIST_HEAD_INIT(efi_mm.mmlist), - INIT_MM_CONTEXT(efi_mm) }; -static int uefi_debug __initdata; -static int __init uefi_debug_setup(char *str) -{ - uefi_debug = 1; - - return 0; -} -early_param("uefi_debug", uefi_debug_setup); - static int __init is_normal_ram(efi_memory_desc_t *md) { if (md->attribute & EFI_MEMORY_WB) @@ -171,14 +161,14 @@ static __init void reserve_regions(void) efi_memory_desc_t *md; u64 paddr, npages, size; - if (uefi_debug) + if (efi_enabled(EFI_DBG)) pr_info("Processing EFI memory map:\n"); for_each_efi_memory_desc(&memmap, md) { paddr = md->phys_addr; npages = md->num_pages; - if (uefi_debug) { + if (efi_enabled(EFI_DBG)) { char buf[64]; pr_info(" 0x%012llx-0x%012llx %s", @@ -194,11 +184,11 @@ static __init void reserve_regions(void) if (is_reserve_region(md)) { memblock_reserve(paddr, size); - if (uefi_debug) + if (efi_enabled(EFI_DBG)) pr_cont("*"); } - if (uefi_debug) + if (efi_enabled(EFI_DBG)) pr_cont("\n"); } @@ -210,14 +200,14 @@ void __init efi_init(void) struct efi_fdt_params params; /* Grab UEFI information placed in FDT by stub */ - if (!efi_get_fdt_params(¶ms, uefi_debug)) + if (!efi_get_fdt_params(¶ms)) return; efi_system_table = params.system_table; memblock_reserve(params.mmap & PAGE_MASK, PAGE_ALIGN(params.mmap_size + (params.mmap & ~PAGE_MASK))); - memmap.phys_map = (void *)params.mmap; + memmap.phys_map = params.mmap; memmap.map = early_memremap(params.mmap, params.mmap_size); memmap.map_end = memmap.map + params.mmap_size; memmap.desc_size = params.desc_size; @@ -258,7 +248,8 @@ static bool __init efi_virtmap_init(void) */ if (!is_normal_ram(md)) prot = __pgprot(PROT_DEVICE_nGnRE); - else if (md->type == EFI_RUNTIME_SERVICES_CODE) + else if (md->type == EFI_RUNTIME_SERVICES_CODE || + !PAGE_ALIGNED(md->phys_addr)) prot = PAGE_KERNEL_EXEC; else prot = PAGE_KERNEL; @@ -290,7 +281,7 @@ static int __init arm64_enable_runtime_services(void) pr_info("Remapping and enabling EFI services.\n"); mapsize = memmap.map_end - memmap.map; - memmap.map = (__force void *)ioremap_cache((phys_addr_t)memmap.phys_map, + memmap.map = (__force void *)ioremap_cache(memmap.phys_map, mapsize); if (!memmap.map) { pr_err("Failed to remap EFI memory map\n"); @@ -343,9 +334,9 @@ static void efi_set_pgd(struct mm_struct *mm) else cpu_switch_mm(mm->pgd, mm); - flush_tlb_all(); + local_flush_tlb_all(); if (icache_is_aivivt()) - __flush_icache_all(); + __local_flush_icache_all(); } void efi_virtmap_load(void) diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S index 08cafc518b9a..0f03a8fe2314 100644 --- a/arch/arm64/kernel/entry-ftrace.S +++ b/arch/arm64/kernel/entry-ftrace.S @@ -178,6 +178,24 @@ ENTRY(ftrace_stub) ENDPROC(ftrace_stub) #ifdef CONFIG_FUNCTION_GRAPH_TRACER + /* save return value regs*/ + .macro save_return_regs + sub sp, sp, #64 + stp x0, x1, [sp] + stp x2, x3, [sp, #16] + stp x4, x5, [sp, #32] + stp x6, x7, [sp, #48] + .endm + + /* restore return value regs*/ + .macro restore_return_regs + ldp x0, x1, [sp] + ldp x2, x3, [sp, #16] + ldp x4, x5, [sp, #32] + ldp x6, x7, [sp, #48] + add sp, sp, #64 + .endm + /* * void ftrace_graph_caller(void) * @@ -204,11 +222,11 @@ ENDPROC(ftrace_graph_caller) * only when CONFIG_HAVE_FUNCTION_GRAPH_FP_TEST is enabled. */ ENTRY(return_to_handler) - str x0, [sp, #-16]! + save_return_regs mov x0, x29 // parent's fp bl ftrace_return_to_handler// addr = ftrace_return_to_hander(fp); mov x30, x0 // restore the original return address - ldr x0, [sp], #16 + restore_return_regs ret END(return_to_handler) #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 4306c937b1ff..7ed3d75f6304 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -430,6 +430,8 @@ el0_sync_compat: b.eq el0_fpsimd_acc cmp x24, #ESR_ELx_EC_FP_EXC32 // FP/ASIMD exception b.eq el0_fpsimd_exc + cmp x24, #ESR_ELx_EC_PC_ALIGN // pc alignment exception + b.eq el0_sp_pc cmp x24, #ESR_ELx_EC_UNKNOWN // unknown exception in EL0 b.eq el0_undef cmp x24, #ESR_ELx_EC_CP15_32 // CP15 MRC/MCR trap diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index c56956a16d3f..4c46c54a3ad7 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -332,21 +332,15 @@ static inline void fpsimd_hotplug_init(void) { } */ static int __init fpsimd_init(void) { - u64 pfr = read_cpuid(ID_AA64PFR0_EL1); - - if (pfr & (0xf << 16)) { + if (elf_hwcap & HWCAP_FP) { + fpsimd_pm_init(); + fpsimd_hotplug_init(); + } else { pr_notice("Floating-point is not implemented\n"); - return 0; } - elf_hwcap |= HWCAP_FP; - if (pfr & (0xf << 20)) + if (!(elf_hwcap & HWCAP_ASIMD)) pr_notice("Advanced SIMD is not implemented\n"); - else - elf_hwcap |= HWCAP_ASIMD; - - fpsimd_pm_init(); - fpsimd_hotplug_init(); return 0; } diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 90d09eddd5b2..23cfc08fc8ba 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -29,11 +29,13 @@ #include <asm/asm-offsets.h> #include <asm/cache.h> #include <asm/cputype.h> +#include <asm/kernel-pgtable.h> #include <asm/memory.h> -#include <asm/thread_info.h> #include <asm/pgtable-hwdef.h> #include <asm/pgtable.h> #include <asm/page.h> +#include <asm/sysreg.h> +#include <asm/thread_info.h> #include <asm/virt.h> #define __PHYS_OFFSET (KERNEL_START - TEXT_OFFSET) @@ -46,32 +48,10 @@ #error TEXT_OFFSET must be less than 2MB #endif -#ifdef CONFIG_ARM64_64K_PAGES -#define BLOCK_SHIFT PAGE_SHIFT -#define BLOCK_SIZE PAGE_SIZE -#define TABLE_SHIFT PMD_SHIFT -#else -#define BLOCK_SHIFT SECTION_SHIFT -#define BLOCK_SIZE SECTION_SIZE -#define TABLE_SHIFT PUD_SHIFT -#endif - #define KERNEL_START _text #define KERNEL_END _end /* - * Initial memory map attributes. - */ -#define PTE_FLAGS PTE_TYPE_PAGE | PTE_AF | PTE_SHARED -#define PMD_FLAGS PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S - -#ifdef CONFIG_ARM64_64K_PAGES -#define MM_MMUFLAGS PTE_ATTRINDX(MT_NORMAL) | PTE_FLAGS -#else -#define MM_MMUFLAGS PMD_ATTRINDX(MT_NORMAL) | PMD_FLAGS -#endif - -/* * Kernel startup entry point. * --------------------------- * @@ -120,8 +100,8 @@ efi_head: #endif #ifdef CONFIG_EFI - .globl stext_offset - .set stext_offset, stext - efi_head + .globl __efistub_stext_offset + .set __efistub_stext_offset, stext - efi_head .align 3 pe_header: .ascii "PE" @@ -144,8 +124,8 @@ optional_header: .long _end - stext // SizeOfCode .long 0 // SizeOfInitializedData .long 0 // SizeOfUninitializedData - .long efi_stub_entry - efi_head // AddressOfEntryPoint - .long stext_offset // BaseOfCode + .long __efistub_entry - efi_head // AddressOfEntryPoint + .long __efistub_stext_offset // BaseOfCode extra_header_fields: .quad 0 // ImageBase @@ -162,7 +142,7 @@ extra_header_fields: .long _end - efi_head // SizeOfImage // Everything before the kernel image is considered part of the header - .long stext_offset // SizeOfHeaders + .long __efistub_stext_offset // SizeOfHeaders .long 0 // CheckSum .short 0xa // Subsystem (EFI application) .short 0 // DllCharacteristics @@ -207,9 +187,9 @@ section_table: .byte 0 .byte 0 // end of 0 padding of section name .long _end - stext // VirtualSize - .long stext_offset // VirtualAddress + .long __efistub_stext_offset // VirtualAddress .long _edata - stext // SizeOfRawData - .long stext_offset // PointerToRawData + .long __efistub_stext_offset // PointerToRawData .long 0 // PointerToRelocations (0 for executables) .long 0 // PointerToLineNumbers (0 for executables) @@ -292,8 +272,11 @@ ENDPROC(preserve_boot_args) */ .macro create_pgd_entry, tbl, virt, tmp1, tmp2 create_table_entry \tbl, \virt, PGDIR_SHIFT, PTRS_PER_PGD, \tmp1, \tmp2 -#if SWAPPER_PGTABLE_LEVELS == 3 - create_table_entry \tbl, \virt, TABLE_SHIFT, PTRS_PER_PTE, \tmp1, \tmp2 +#if SWAPPER_PGTABLE_LEVELS > 3 + create_table_entry \tbl, \virt, PUD_SHIFT, PTRS_PER_PUD, \tmp1, \tmp2 +#endif +#if SWAPPER_PGTABLE_LEVELS > 2 + create_table_entry \tbl, \virt, SWAPPER_TABLE_SHIFT, PTRS_PER_PTE, \tmp1, \tmp2 #endif .endm @@ -305,15 +288,15 @@ ENDPROC(preserve_boot_args) * Corrupts: phys, start, end, pstate */ .macro create_block_map, tbl, flags, phys, start, end - lsr \phys, \phys, #BLOCK_SHIFT - lsr \start, \start, #BLOCK_SHIFT + lsr \phys, \phys, #SWAPPER_BLOCK_SHIFT + lsr \start, \start, #SWAPPER_BLOCK_SHIFT and \start, \start, #PTRS_PER_PTE - 1 // table index - orr \phys, \flags, \phys, lsl #BLOCK_SHIFT // table entry - lsr \end, \end, #BLOCK_SHIFT + orr \phys, \flags, \phys, lsl #SWAPPER_BLOCK_SHIFT // table entry + lsr \end, \end, #SWAPPER_BLOCK_SHIFT and \end, \end, #PTRS_PER_PTE - 1 // table end index 9999: str \phys, [\tbl, \start, lsl #3] // store the entry add \start, \start, #1 // next entry - add \phys, \phys, #BLOCK_SIZE // next block + add \phys, \phys, #SWAPPER_BLOCK_SIZE // next block cmp \start, \end b.ls 9999b .endm @@ -350,7 +333,7 @@ __create_page_tables: cmp x0, x6 b.lo 1b - ldr x7, =MM_MMUFLAGS + ldr x7, =SWAPPER_MM_MMUFLAGS /* * Create the identity mapping. @@ -444,6 +427,9 @@ __mmap_switched: str_l x21, __fdt_pointer, x5 // Save FDT pointer str_l x24, memstart_addr, x6 // Save PHYS_OFFSET mov x29, #0 +#ifdef CONFIG_KASAN + bl kasan_early_init +#endif b start_kernel ENDPROC(__mmap_switched) @@ -498,6 +484,8 @@ CPU_LE( bic x0, x0, #(3 << 24) ) // Clear the EE and E0E bits for EL1 orr x0, x0, #ICC_SRE_EL2_ENABLE // Set ICC_SRE_EL2.Enable==1 msr_s ICC_SRE_EL2, x0 isb // Make sure SRE is now set + mrs_s x0, ICC_SRE_EL2 // Read SRE back, + tbz x0, #0, 3f // and check that it sticks msr_s ICH_HCR_EL2, xzr // Reset ICC_HCR_EL2 to defaults 3: @@ -628,10 +616,17 @@ ENDPROC(__secondary_switched) * x0 = SCTLR_EL1 value for turning on the MMU. * x27 = *virtual* address to jump to upon completion * - * other registers depend on the function called upon completion + * Other registers depend on the function called upon completion. + * + * Checks if the selected granule size is supported by the CPU. + * If it isn't, park the CPU */ .section ".idmap.text", "ax" __enable_mmu: + mrs x1, ID_AA64MMFR0_EL1 + ubfx x2, x1, #ID_AA64MMFR0_TGRAN_SHIFT, 4 + cmp x2, #ID_AA64MMFR0_TGRAN_SUPPORTED + b.ne __no_granule_support ldr x5, =vectors msr vbar_el1, x5 msr ttbr0_el1, x25 // load TTBR0 @@ -649,3 +644,8 @@ __enable_mmu: isb br x27 ENDPROC(__enable_mmu) + +__no_granule_support: + wfe + b __no_granule_support +ENDPROC(__no_granule_support) diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index bba85c8f8037..b45c95d34b83 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -28,6 +28,7 @@ #include <linux/ptrace.h> #include <linux/smp.h> +#include <asm/compat.h> #include <asm/current.h> #include <asm/debug-monitors.h> #include <asm/hw_breakpoint.h> @@ -163,6 +164,20 @@ enum hw_breakpoint_ops { HW_BREAKPOINT_RESTORE }; +static int is_compat_bp(struct perf_event *bp) +{ + struct task_struct *tsk = bp->hw.target; + + /* + * tsk can be NULL for per-cpu (non-ptrace) breakpoints. + * In this case, use the native interface, since we don't have + * the notion of a "compat CPU" and could end up relying on + * deprecated behaviour if we use unaligned watchpoints in + * AArch64 state. + */ + return tsk && is_compat_thread(task_thread_info(tsk)); +} + /** * hw_breakpoint_slot_setup - Find and setup a perf slot according to * operations @@ -420,7 +435,7 @@ static int arch_build_bp_info(struct perf_event *bp) * Watchpoints can be of length 1, 2, 4 or 8 bytes. */ if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE) { - if (is_compat_task()) { + if (is_compat_bp(bp)) { if (info->ctrl.len != ARM_BREAKPOINT_LEN_2 && info->ctrl.len != ARM_BREAKPOINT_LEN_4) return -EINVAL; @@ -477,7 +492,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) * AArch32 tasks expect some simple alignment fixups, so emulate * that here. */ - if (is_compat_task()) { + if (is_compat_bp(bp)) { if (info->ctrl.len == ARM_BREAKPOINT_LEN_8) alignment_mask = 0x7; else diff --git a/arch/arm64/kernel/image.h b/arch/arm64/kernel/image.h index 8fae0756e175..bc2abb8b1599 100644 --- a/arch/arm64/kernel/image.h +++ b/arch/arm64/kernel/image.h @@ -47,7 +47,10 @@ #define __HEAD_FLAG_BE 0 #endif -#define __HEAD_FLAGS (__HEAD_FLAG_BE << 0) +#define __HEAD_FLAG_PAGE_SIZE ((PAGE_SHIFT - 10) / 2) + +#define __HEAD_FLAGS ((__HEAD_FLAG_BE << 0) | \ + (__HEAD_FLAG_PAGE_SIZE << 1)) /* * These will output as part of the Image header, which should be little-endian @@ -59,4 +62,37 @@ _kernel_offset_le = DATA_LE64(TEXT_OFFSET); \ _kernel_flags_le = DATA_LE64(__HEAD_FLAGS); +#ifdef CONFIG_EFI + +/* + * The EFI stub has its own symbol namespace prefixed by __efistub_, to + * isolate it from the kernel proper. The following symbols are legally + * accessed by the stub, so provide some aliases to make them accessible. + * Only include data symbols here, or text symbols of functions that are + * guaranteed to be safe when executed at another offset than they were + * linked at. The routines below are all implemented in assembler in a + * position independent manner + */ +__efistub_memcmp = __pi_memcmp; +__efistub_memchr = __pi_memchr; +__efistub_memcpy = __pi_memcpy; +__efistub_memmove = __pi_memmove; +__efistub_memset = __pi_memset; +__efistub_strlen = __pi_strlen; +__efistub_strcmp = __pi_strcmp; +__efistub_strncmp = __pi_strncmp; +__efistub___flush_dcache_area = __pi___flush_dcache_area; + +#ifdef CONFIG_KASAN +__efistub___memcpy = __pi_memcpy; +__efistub___memmove = __pi_memmove; +__efistub___memset = __pi_memset; +#endif + +__efistub__text = _text; +__efistub__end = _end; +__efistub__edata = _edata; + +#endif + #endif /* __ASM_IMAGE_H */ diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index f341866aa810..c08b9ad6f429 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -85,7 +85,7 @@ bool aarch64_insn_is_branch_imm(u32 insn) aarch64_insn_is_bcond(insn)); } -static DEFINE_SPINLOCK(patch_lock); +static DEFINE_RAW_SPINLOCK(patch_lock); static void __kprobes *patch_map(void *addr, int fixmap) { @@ -131,13 +131,13 @@ static int __kprobes __aarch64_insn_write(void *addr, u32 insn) unsigned long flags = 0; int ret; - spin_lock_irqsave(&patch_lock, flags); + raw_spin_lock_irqsave(&patch_lock, flags); waddr = patch_map(addr, FIX_TEXT_POKE0); ret = probe_kernel_write(waddr, &insn, AARCH64_INSN_SIZE); patch_unmap(FIX_TEXT_POKE0); - spin_unlock_irqrestore(&patch_lock, flags); + raw_spin_unlock_irqrestore(&patch_lock, flags); return ret; } diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c index 11dc3fd47853..9f17ec071ee0 100644 --- a/arch/arm64/kernel/irq.c +++ b/arch/arm64/kernel/irq.c @@ -27,7 +27,6 @@ #include <linux/init.h> #include <linux/irqchip.h> #include <linux/seq_file.h> -#include <linux/ratelimit.h> unsigned long irq_err_count; @@ -54,64 +53,3 @@ void __init init_IRQ(void) if (!handle_arch_irq) panic("No interrupt controller found."); } - -#ifdef CONFIG_HOTPLUG_CPU -static bool migrate_one_irq(struct irq_desc *desc) -{ - struct irq_data *d = irq_desc_get_irq_data(desc); - const struct cpumask *affinity = irq_data_get_affinity_mask(d); - struct irq_chip *c; - bool ret = false; - - /* - * If this is a per-CPU interrupt, or the affinity does not - * include this CPU, then we have nothing to do. - */ - if (irqd_is_per_cpu(d) || !cpumask_test_cpu(smp_processor_id(), affinity)) - return false; - - if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { - affinity = cpu_online_mask; - ret = true; - } - - c = irq_data_get_irq_chip(d); - if (!c->irq_set_affinity) - pr_debug("IRQ%u: unable to set affinity\n", d->irq); - else if (c->irq_set_affinity(d, affinity, false) == IRQ_SET_MASK_OK && ret) - cpumask_copy(irq_data_get_affinity_mask(d), affinity); - - return ret; -} - -/* - * The current CPU has been marked offline. Migrate IRQs off this CPU. - * If the affinity settings do not allow other CPUs, force them onto any - * available CPU. - * - * Note: we must iterate over all IRQs, whether they have an attached - * action structure or not, as we need to get chained interrupts too. - */ -void migrate_irqs(void) -{ - unsigned int i; - struct irq_desc *desc; - unsigned long flags; - - local_irq_save(flags); - - for_each_irq_desc(i, desc) { - bool affinity_broken; - - raw_spin_lock(&desc->lock); - affinity_broken = migrate_one_irq(desc); - raw_spin_unlock(&desc->lock); - - if (affinity_broken) - pr_warn_ratelimited("IRQ%u no longer affine to CPU%u\n", - i, smp_processor_id()); - } - - local_irq_restore(flags); -} -#endif /* CONFIG_HOTPLUG_CPU */ diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index 876eb8df50bf..f4bc779e62e8 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -21,6 +21,7 @@ #include <linux/bitops.h> #include <linux/elf.h> #include <linux/gfp.h> +#include <linux/kasan.h> #include <linux/kernel.h> #include <linux/mm.h> #include <linux/moduleloader.h> @@ -34,9 +35,18 @@ void *module_alloc(unsigned long size) { - return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, - GFP_KERNEL, PAGE_KERNEL_EXEC, 0, - NUMA_NO_NODE, __builtin_return_address(0)); + void *p; + + p = __vmalloc_node_range(size, MODULE_ALIGN, MODULES_VADDR, MODULES_END, + GFP_KERNEL, PAGE_KERNEL_EXEC, 0, + NUMA_NO_NODE, __builtin_return_address(0)); + + if (p && (kasan_module_alloc(p, size) < 0)) { + vfree(p); + return NULL; + } + + return p; } enum aarch64_reloc_op { diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index f9a74d4fff3b..5b1897e8ca24 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -18,651 +18,12 @@ * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ -#define pr_fmt(fmt) "hw perfevents: " fmt - -#include <linux/bitmap.h> -#include <linux/interrupt.h> -#include <linux/irq.h> -#include <linux/kernel.h> -#include <linux/export.h> -#include <linux/of_device.h> -#include <linux/perf_event.h> -#include <linux/platform_device.h> -#include <linux/slab.h> -#include <linux/spinlock.h> -#include <linux/uaccess.h> -#include <asm/cputype.h> -#include <asm/irq.h> #include <asm/irq_regs.h> -#include <asm/pmu.h> - -/* - * ARMv8 supports a maximum of 32 events. - * The cycle counter is included in this total. - */ -#define ARMPMU_MAX_HWEVENTS 32 - -static DEFINE_PER_CPU(struct perf_event * [ARMPMU_MAX_HWEVENTS], hw_events); -static DEFINE_PER_CPU(unsigned long [BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)], used_mask); -static DEFINE_PER_CPU(struct pmu_hw_events, cpu_hw_events); - -#define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu)) - -/* Set at runtime when we know what CPU type we are. */ -static struct arm_pmu *cpu_pmu; - -int -armpmu_get_max_events(void) -{ - int max_events = 0; - - if (cpu_pmu != NULL) - max_events = cpu_pmu->num_events; - - return max_events; -} -EXPORT_SYMBOL_GPL(armpmu_get_max_events); - -int perf_num_counters(void) -{ - return armpmu_get_max_events(); -} -EXPORT_SYMBOL_GPL(perf_num_counters); - -#define HW_OP_UNSUPPORTED 0xFFFF - -#define C(_x) \ - PERF_COUNT_HW_CACHE_##_x - -#define CACHE_OP_UNSUPPORTED 0xFFFF - -#define PERF_MAP_ALL_UNSUPPORTED \ - [0 ... PERF_COUNT_HW_MAX - 1] = HW_OP_UNSUPPORTED - -#define PERF_CACHE_MAP_ALL_UNSUPPORTED \ -[0 ... C(MAX) - 1] = { \ - [0 ... C(OP_MAX) - 1] = { \ - [0 ... C(RESULT_MAX) - 1] = CACHE_OP_UNSUPPORTED, \ - }, \ -} - -static int -armpmu_map_cache_event(const unsigned (*cache_map) - [PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX], - u64 config) -{ - unsigned int cache_type, cache_op, cache_result, ret; - - cache_type = (config >> 0) & 0xff; - if (cache_type >= PERF_COUNT_HW_CACHE_MAX) - return -EINVAL; - - cache_op = (config >> 8) & 0xff; - if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) - return -EINVAL; - - cache_result = (config >> 16) & 0xff; - if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) - return -EINVAL; - - ret = (int)(*cache_map)[cache_type][cache_op][cache_result]; - - if (ret == CACHE_OP_UNSUPPORTED) - return -ENOENT; - - return ret; -} - -static int -armpmu_map_event(const unsigned (*event_map)[PERF_COUNT_HW_MAX], u64 config) -{ - int mapping; - - if (config >= PERF_COUNT_HW_MAX) - return -EINVAL; - - mapping = (*event_map)[config]; - return mapping == HW_OP_UNSUPPORTED ? -ENOENT : mapping; -} - -static int -armpmu_map_raw_event(u32 raw_event_mask, u64 config) -{ - return (int)(config & raw_event_mask); -} - -static int map_cpu_event(struct perf_event *event, - const unsigned (*event_map)[PERF_COUNT_HW_MAX], - const unsigned (*cache_map) - [PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX], - u32 raw_event_mask) -{ - u64 config = event->attr.config; - - switch (event->attr.type) { - case PERF_TYPE_HARDWARE: - return armpmu_map_event(event_map, config); - case PERF_TYPE_HW_CACHE: - return armpmu_map_cache_event(cache_map, config); - case PERF_TYPE_RAW: - return armpmu_map_raw_event(raw_event_mask, config); - } - - return -ENOENT; -} - -int -armpmu_event_set_period(struct perf_event *event, - struct hw_perf_event *hwc, - int idx) -{ - struct arm_pmu *armpmu = to_arm_pmu(event->pmu); - s64 left = local64_read(&hwc->period_left); - s64 period = hwc->sample_period; - int ret = 0; - - if (unlikely(left <= -period)) { - left = period; - local64_set(&hwc->period_left, left); - hwc->last_period = period; - ret = 1; - } - - if (unlikely(left <= 0)) { - left += period; - local64_set(&hwc->period_left, left); - hwc->last_period = period; - ret = 1; - } - - /* - * Limit the maximum period to prevent the counter value - * from overtaking the one we are about to program. In - * effect we are reducing max_period to account for - * interrupt latency (and we are being very conservative). - */ - if (left > (armpmu->max_period >> 1)) - left = armpmu->max_period >> 1; - - local64_set(&hwc->prev_count, (u64)-left); - - armpmu->write_counter(idx, (u64)(-left) & 0xffffffff); - - perf_event_update_userpage(event); - - return ret; -} - -u64 -armpmu_event_update(struct perf_event *event, - struct hw_perf_event *hwc, - int idx) -{ - struct arm_pmu *armpmu = to_arm_pmu(event->pmu); - u64 delta, prev_raw_count, new_raw_count; - -again: - prev_raw_count = local64_read(&hwc->prev_count); - new_raw_count = armpmu->read_counter(idx); - - if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, - new_raw_count) != prev_raw_count) - goto again; - - delta = (new_raw_count - prev_raw_count) & armpmu->max_period; - - local64_add(delta, &event->count); - local64_sub(delta, &hwc->period_left); - - return new_raw_count; -} - -static void -armpmu_read(struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - - /* Don't read disabled counters! */ - if (hwc->idx < 0) - return; - - armpmu_event_update(event, hwc, hwc->idx); -} - -static void -armpmu_stop(struct perf_event *event, int flags) -{ - struct arm_pmu *armpmu = to_arm_pmu(event->pmu); - struct hw_perf_event *hwc = &event->hw; - - /* - * ARM pmu always has to update the counter, so ignore - * PERF_EF_UPDATE, see comments in armpmu_start(). - */ - if (!(hwc->state & PERF_HES_STOPPED)) { - armpmu->disable(hwc, hwc->idx); - barrier(); /* why? */ - armpmu_event_update(event, hwc, hwc->idx); - hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; - } -} - -static void -armpmu_start(struct perf_event *event, int flags) -{ - struct arm_pmu *armpmu = to_arm_pmu(event->pmu); - struct hw_perf_event *hwc = &event->hw; - - /* - * ARM pmu always has to reprogram the period, so ignore - * PERF_EF_RELOAD, see the comment below. - */ - if (flags & PERF_EF_RELOAD) - WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); - - hwc->state = 0; - /* - * Set the period again. Some counters can't be stopped, so when we - * were stopped we simply disabled the IRQ source and the counter - * may have been left counting. If we don't do this step then we may - * get an interrupt too soon or *way* too late if the overflow has - * happened since disabling. - */ - armpmu_event_set_period(event, hwc, hwc->idx); - armpmu->enable(hwc, hwc->idx); -} - -static void -armpmu_del(struct perf_event *event, int flags) -{ - struct arm_pmu *armpmu = to_arm_pmu(event->pmu); - struct pmu_hw_events *hw_events = armpmu->get_hw_events(); - struct hw_perf_event *hwc = &event->hw; - int idx = hwc->idx; - - WARN_ON(idx < 0); - - armpmu_stop(event, PERF_EF_UPDATE); - hw_events->events[idx] = NULL; - clear_bit(idx, hw_events->used_mask); - - perf_event_update_userpage(event); -} - -static int -armpmu_add(struct perf_event *event, int flags) -{ - struct arm_pmu *armpmu = to_arm_pmu(event->pmu); - struct pmu_hw_events *hw_events = armpmu->get_hw_events(); - struct hw_perf_event *hwc = &event->hw; - int idx; - int err = 0; - - perf_pmu_disable(event->pmu); - - /* If we don't have a space for the counter then finish early. */ - idx = armpmu->get_event_idx(hw_events, hwc); - if (idx < 0) { - err = idx; - goto out; - } - - /* - * If there is an event in the counter we are going to use then make - * sure it is disabled. - */ - event->hw.idx = idx; - armpmu->disable(hwc, idx); - hw_events->events[idx] = event; - - hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; - if (flags & PERF_EF_START) - armpmu_start(event, PERF_EF_RELOAD); - - /* Propagate our changes to the userspace mapping. */ - perf_event_update_userpage(event); - -out: - perf_pmu_enable(event->pmu); - return err; -} - -static int -validate_event(struct pmu *pmu, struct pmu_hw_events *hw_events, - struct perf_event *event) -{ - struct arm_pmu *armpmu; - struct hw_perf_event fake_event = event->hw; - struct pmu *leader_pmu = event->group_leader->pmu; - - if (is_software_event(event)) - return 1; - - /* - * Reject groups spanning multiple HW PMUs (e.g. CPU + CCI). The - * core perf code won't check that the pmu->ctx == leader->ctx - * until after pmu->event_init(event). - */ - if (event->pmu != pmu) - return 0; - - if (event->pmu != leader_pmu || event->state < PERF_EVENT_STATE_OFF) - return 1; - - if (event->state == PERF_EVENT_STATE_OFF && !event->attr.enable_on_exec) - return 1; - - armpmu = to_arm_pmu(event->pmu); - return armpmu->get_event_idx(hw_events, &fake_event) >= 0; -} - -static int -validate_group(struct perf_event *event) -{ - struct perf_event *sibling, *leader = event->group_leader; - struct pmu_hw_events fake_pmu; - DECLARE_BITMAP(fake_used_mask, ARMPMU_MAX_HWEVENTS); - - /* - * Initialise the fake PMU. We only need to populate the - * used_mask for the purposes of validation. - */ - memset(fake_used_mask, 0, sizeof(fake_used_mask)); - fake_pmu.used_mask = fake_used_mask; - - if (!validate_event(event->pmu, &fake_pmu, leader)) - return -EINVAL; - - list_for_each_entry(sibling, &leader->sibling_list, group_entry) { - if (!validate_event(event->pmu, &fake_pmu, sibling)) - return -EINVAL; - } - - if (!validate_event(event->pmu, &fake_pmu, event)) - return -EINVAL; - - return 0; -} - -static void -armpmu_disable_percpu_irq(void *data) -{ - unsigned int irq = *(unsigned int *)data; - disable_percpu_irq(irq); -} - -static void -armpmu_release_hardware(struct arm_pmu *armpmu) -{ - int irq; - unsigned int i, irqs; - struct platform_device *pmu_device = armpmu->plat_device; - - irqs = min(pmu_device->num_resources, num_possible_cpus()); - if (!irqs) - return; - - irq = platform_get_irq(pmu_device, 0); - if (irq <= 0) - return; - - if (irq_is_percpu(irq)) { - on_each_cpu(armpmu_disable_percpu_irq, &irq, 1); - free_percpu_irq(irq, &cpu_hw_events); - } else { - for (i = 0; i < irqs; ++i) { - int cpu = i; - - if (armpmu->irq_affinity) - cpu = armpmu->irq_affinity[i]; - - if (!cpumask_test_and_clear_cpu(cpu, &armpmu->active_irqs)) - continue; - irq = platform_get_irq(pmu_device, i); - if (irq > 0) - free_irq(irq, armpmu); - } - } -} - -static void -armpmu_enable_percpu_irq(void *data) -{ - unsigned int irq = *(unsigned int *)data; - enable_percpu_irq(irq, IRQ_TYPE_NONE); -} - -static int -armpmu_reserve_hardware(struct arm_pmu *armpmu) -{ - int err, irq; - unsigned int i, irqs; - struct platform_device *pmu_device = armpmu->plat_device; - - if (!pmu_device) - return -ENODEV; - - irqs = min(pmu_device->num_resources, num_possible_cpus()); - if (!irqs) { - pr_err("no irqs for PMUs defined\n"); - return -ENODEV; - } - - irq = platform_get_irq(pmu_device, 0); - if (irq <= 0) { - pr_err("failed to get valid irq for PMU device\n"); - return -ENODEV; - } - - if (irq_is_percpu(irq)) { - err = request_percpu_irq(irq, armpmu->handle_irq, - "arm-pmu", &cpu_hw_events); - - if (err) { - pr_err("unable to request percpu IRQ%d for ARM PMU counters\n", - irq); - armpmu_release_hardware(armpmu); - return err; - } - - on_each_cpu(armpmu_enable_percpu_irq, &irq, 1); - } else { - for (i = 0; i < irqs; ++i) { - int cpu = i; - - err = 0; - irq = platform_get_irq(pmu_device, i); - if (irq <= 0) - continue; - - if (armpmu->irq_affinity) - cpu = armpmu->irq_affinity[i]; - - /* - * If we have a single PMU interrupt that we can't shift, - * assume that we're running on a uniprocessor machine and - * continue. Otherwise, continue without this interrupt. - */ - if (irq_set_affinity(irq, cpumask_of(cpu)) && irqs > 1) { - pr_warning("unable to set irq affinity (irq=%d, cpu=%u)\n", - irq, cpu); - continue; - } - - err = request_irq(irq, armpmu->handle_irq, - IRQF_NOBALANCING | IRQF_NO_THREAD, - "arm-pmu", armpmu); - if (err) { - pr_err("unable to request IRQ%d for ARM PMU counters\n", - irq); - armpmu_release_hardware(armpmu); - return err; - } - - cpumask_set_cpu(cpu, &armpmu->active_irqs); - } - } - - return 0; -} - -static void -hw_perf_event_destroy(struct perf_event *event) -{ - struct arm_pmu *armpmu = to_arm_pmu(event->pmu); - atomic_t *active_events = &armpmu->active_events; - struct mutex *pmu_reserve_mutex = &armpmu->reserve_mutex; - - if (atomic_dec_and_mutex_lock(active_events, pmu_reserve_mutex)) { - armpmu_release_hardware(armpmu); - mutex_unlock(pmu_reserve_mutex); - } -} - -static int -event_requires_mode_exclusion(struct perf_event_attr *attr) -{ - return attr->exclude_idle || attr->exclude_user || - attr->exclude_kernel || attr->exclude_hv; -} - -static int -__hw_perf_event_init(struct perf_event *event) -{ - struct arm_pmu *armpmu = to_arm_pmu(event->pmu); - struct hw_perf_event *hwc = &event->hw; - int mapping, err; - - mapping = armpmu->map_event(event); - - if (mapping < 0) { - pr_debug("event %x:%llx not supported\n", event->attr.type, - event->attr.config); - return mapping; - } - - /* - * We don't assign an index until we actually place the event onto - * hardware. Use -1 to signify that we haven't decided where to put it - * yet. For SMP systems, each core has it's own PMU so we can't do any - * clever allocation or constraints checking at this point. - */ - hwc->idx = -1; - hwc->config_base = 0; - hwc->config = 0; - hwc->event_base = 0; - - /* - * Check whether we need to exclude the counter from certain modes. - */ - if ((!armpmu->set_event_filter || - armpmu->set_event_filter(hwc, &event->attr)) && - event_requires_mode_exclusion(&event->attr)) { - pr_debug("ARM performance counters do not support mode exclusion\n"); - return -EPERM; - } - - /* - * Store the event encoding into the config_base field. - */ - hwc->config_base |= (unsigned long)mapping; - - if (!hwc->sample_period) { - /* - * For non-sampling runs, limit the sample_period to half - * of the counter width. That way, the new counter value - * is far less likely to overtake the previous one unless - * you have some serious IRQ latency issues. - */ - hwc->sample_period = armpmu->max_period >> 1; - hwc->last_period = hwc->sample_period; - local64_set(&hwc->period_left, hwc->sample_period); - } - - err = 0; - if (event->group_leader != event) { - err = validate_group(event); - if (err) - return -EINVAL; - } - - return err; -} - -static int armpmu_event_init(struct perf_event *event) -{ - struct arm_pmu *armpmu = to_arm_pmu(event->pmu); - int err = 0; - atomic_t *active_events = &armpmu->active_events; - - if (armpmu->map_event(event) == -ENOENT) - return -ENOENT; - - event->destroy = hw_perf_event_destroy; - - if (!atomic_inc_not_zero(active_events)) { - mutex_lock(&armpmu->reserve_mutex); - if (atomic_read(active_events) == 0) - err = armpmu_reserve_hardware(armpmu); - - if (!err) - atomic_inc(active_events); - mutex_unlock(&armpmu->reserve_mutex); - } - if (err) - return err; - - err = __hw_perf_event_init(event); - if (err) - hw_perf_event_destroy(event); - - return err; -} - -static void armpmu_enable(struct pmu *pmu) -{ - struct arm_pmu *armpmu = to_arm_pmu(pmu); - struct pmu_hw_events *hw_events = armpmu->get_hw_events(); - int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events); - - if (enabled) - armpmu->start(); -} - -static void armpmu_disable(struct pmu *pmu) -{ - struct arm_pmu *armpmu = to_arm_pmu(pmu); - armpmu->stop(); -} - -static void __init armpmu_init(struct arm_pmu *armpmu) -{ - atomic_set(&armpmu->active_events, 0); - mutex_init(&armpmu->reserve_mutex); - - armpmu->pmu = (struct pmu) { - .pmu_enable = armpmu_enable, - .pmu_disable = armpmu_disable, - .event_init = armpmu_event_init, - .add = armpmu_add, - .del = armpmu_del, - .start = armpmu_start, - .stop = armpmu_stop, - .read = armpmu_read, - }; -} - -int __init armpmu_register(struct arm_pmu *armpmu, char *name, int type) -{ - armpmu_init(armpmu); - return perf_pmu_register(&armpmu->pmu, name, type); -} +#include <linux/of.h> +#include <linux/perf/arm_pmu.h> +#include <linux/platform_device.h> /* * ARMv8 PMUv3 Performance Events handling code. @@ -708,6 +69,21 @@ enum armv8_pmuv3_perf_types { ARMV8_PMUV3_PERFCTR_BUS_CYCLES = 0x1D, }; +/* ARMv8 Cortex-A53 specific event types. */ +enum armv8_a53_pmu_perf_types { + ARMV8_A53_PERFCTR_PREFETCH_LINEFILL = 0xC2, +}; + +/* ARMv8 Cortex-A57 specific event types. */ +enum armv8_a57_perf_types { + ARMV8_A57_PERFCTR_L1_DCACHE_ACCESS_LD = 0x40, + ARMV8_A57_PERFCTR_L1_DCACHE_ACCESS_ST = 0x41, + ARMV8_A57_PERFCTR_L1_DCACHE_REFILL_LD = 0x42, + ARMV8_A57_PERFCTR_L1_DCACHE_REFILL_ST = 0x43, + ARMV8_A57_PERFCTR_DTLB_REFILL_LD = 0x4c, + ARMV8_A57_PERFCTR_DTLB_REFILL_ST = 0x4d, +}; + /* PMUv3 HW events mapping. */ static const unsigned armv8_pmuv3_perf_map[PERF_COUNT_HW_MAX] = { PERF_MAP_ALL_UNSUPPORTED, @@ -718,6 +94,28 @@ static const unsigned armv8_pmuv3_perf_map[PERF_COUNT_HW_MAX] = { [PERF_COUNT_HW_BRANCH_MISSES] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED, }; +/* ARM Cortex-A53 HW events mapping. */ +static const unsigned armv8_a53_perf_map[PERF_COUNT_HW_MAX] = { + PERF_MAP_ALL_UNSUPPORTED, + [PERF_COUNT_HW_CPU_CYCLES] = ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_INSTR_EXECUTED, + [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS, + [PERF_COUNT_HW_CACHE_MISSES] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_PC_WRITE, + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED, + [PERF_COUNT_HW_BUS_CYCLES] = ARMV8_PMUV3_PERFCTR_BUS_CYCLES, +}; + +static const unsigned armv8_a57_perf_map[PERF_COUNT_HW_MAX] = { + PERF_MAP_ALL_UNSUPPORTED, + [PERF_COUNT_HW_CPU_CYCLES] = ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_INSTR_EXECUTED, + [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS, + [PERF_COUNT_HW_CACHE_MISSES] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL, + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED, + [PERF_COUNT_HW_BUS_CYCLES] = ARMV8_PMUV3_PERFCTR_BUS_CYCLES, +}; + static const unsigned armv8_pmuv3_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = { @@ -734,12 +132,60 @@ static const unsigned armv8_pmuv3_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED, }; +static const unsigned armv8_a53_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + PERF_CACHE_MAP_ALL_UNSUPPORTED, + + [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS, + [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL, + [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS, + [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL, + [C(L1D)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV8_A53_PERFCTR_PREFETCH_LINEFILL, + + [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1_ICACHE_ACCESS, + [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1_ICACHE_REFILL, + + [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_ITLB_REFILL, + + [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED, + [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED, +}; + +static const unsigned armv8_a57_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + PERF_CACHE_MAP_ALL_UNSUPPORTED, + + [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_A57_PERFCTR_L1_DCACHE_ACCESS_LD, + [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_A57_PERFCTR_L1_DCACHE_REFILL_LD, + [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_A57_PERFCTR_L1_DCACHE_ACCESS_ST, + [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_A57_PERFCTR_L1_DCACHE_REFILL_ST, + + [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1_ICACHE_ACCESS, + [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1_ICACHE_REFILL, + + [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_A57_PERFCTR_DTLB_REFILL_LD, + [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_A57_PERFCTR_DTLB_REFILL_ST, + + [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_ITLB_REFILL, + + [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED, + [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED, +}; + + /* * Perf Events' indices */ #define ARMV8_IDX_CYCLE_COUNTER 0 #define ARMV8_IDX_COUNTER0 1 -#define ARMV8_IDX_COUNTER_LAST (ARMV8_IDX_CYCLE_COUNTER + cpu_pmu->num_events - 1) +#define ARMV8_IDX_COUNTER_LAST(cpu_pmu) \ + (ARMV8_IDX_CYCLE_COUNTER + cpu_pmu->num_events - 1) #define ARMV8_MAX_COUNTERS 32 #define ARMV8_COUNTER_MASK (ARMV8_MAX_COUNTERS - 1) @@ -805,49 +251,34 @@ static inline int armv8pmu_has_overflowed(u32 pmovsr) return pmovsr & ARMV8_OVERFLOWED_MASK; } -static inline int armv8pmu_counter_valid(int idx) +static inline int armv8pmu_counter_valid(struct arm_pmu *cpu_pmu, int idx) { - return idx >= ARMV8_IDX_CYCLE_COUNTER && idx <= ARMV8_IDX_COUNTER_LAST; + return idx >= ARMV8_IDX_CYCLE_COUNTER && + idx <= ARMV8_IDX_COUNTER_LAST(cpu_pmu); } static inline int armv8pmu_counter_has_overflowed(u32 pmnc, int idx) { - int ret = 0; - u32 counter; - - if (!armv8pmu_counter_valid(idx)) { - pr_err("CPU%u checking wrong counter %d overflow status\n", - smp_processor_id(), idx); - } else { - counter = ARMV8_IDX_TO_COUNTER(idx); - ret = pmnc & BIT(counter); - } - - return ret; + return pmnc & BIT(ARMV8_IDX_TO_COUNTER(idx)); } static inline int armv8pmu_select_counter(int idx) { - u32 counter; - - if (!armv8pmu_counter_valid(idx)) { - pr_err("CPU%u selecting wrong PMNC counter %d\n", - smp_processor_id(), idx); - return -EINVAL; - } - - counter = ARMV8_IDX_TO_COUNTER(idx); + u32 counter = ARMV8_IDX_TO_COUNTER(idx); asm volatile("msr pmselr_el0, %0" :: "r" (counter)); isb(); return idx; } -static inline u32 armv8pmu_read_counter(int idx) +static inline u32 armv8pmu_read_counter(struct perf_event *event) { + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; u32 value = 0; - if (!armv8pmu_counter_valid(idx)) + if (!armv8pmu_counter_valid(cpu_pmu, idx)) pr_err("CPU%u reading wrong counter %d\n", smp_processor_id(), idx); else if (idx == ARMV8_IDX_CYCLE_COUNTER) @@ -858,9 +289,13 @@ static inline u32 armv8pmu_read_counter(int idx) return value; } -static inline void armv8pmu_write_counter(int idx, u32 value) +static inline void armv8pmu_write_counter(struct perf_event *event, u32 value) { - if (!armv8pmu_counter_valid(idx)) + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + if (!armv8pmu_counter_valid(cpu_pmu, idx)) pr_err("CPU%u writing wrong counter %d\n", smp_processor_id(), idx); else if (idx == ARMV8_IDX_CYCLE_COUNTER) @@ -879,65 +314,34 @@ static inline void armv8pmu_write_evtype(int idx, u32 val) static inline int armv8pmu_enable_counter(int idx) { - u32 counter; - - if (!armv8pmu_counter_valid(idx)) { - pr_err("CPU%u enabling wrong PMNC counter %d\n", - smp_processor_id(), idx); - return -EINVAL; - } - - counter = ARMV8_IDX_TO_COUNTER(idx); + u32 counter = ARMV8_IDX_TO_COUNTER(idx); asm volatile("msr pmcntenset_el0, %0" :: "r" (BIT(counter))); return idx; } static inline int armv8pmu_disable_counter(int idx) { - u32 counter; - - if (!armv8pmu_counter_valid(idx)) { - pr_err("CPU%u disabling wrong PMNC counter %d\n", - smp_processor_id(), idx); - return -EINVAL; - } - - counter = ARMV8_IDX_TO_COUNTER(idx); + u32 counter = ARMV8_IDX_TO_COUNTER(idx); asm volatile("msr pmcntenclr_el0, %0" :: "r" (BIT(counter))); return idx; } static inline int armv8pmu_enable_intens(int idx) { - u32 counter; - - if (!armv8pmu_counter_valid(idx)) { - pr_err("CPU%u enabling wrong PMNC counter IRQ enable %d\n", - smp_processor_id(), idx); - return -EINVAL; - } - - counter = ARMV8_IDX_TO_COUNTER(idx); + u32 counter = ARMV8_IDX_TO_COUNTER(idx); asm volatile("msr pmintenset_el1, %0" :: "r" (BIT(counter))); return idx; } static inline int armv8pmu_disable_intens(int idx) { - u32 counter; - - if (!armv8pmu_counter_valid(idx)) { - pr_err("CPU%u disabling wrong PMNC counter IRQ enable %d\n", - smp_processor_id(), idx); - return -EINVAL; - } - - counter = ARMV8_IDX_TO_COUNTER(idx); + u32 counter = ARMV8_IDX_TO_COUNTER(idx); asm volatile("msr pmintenclr_el1, %0" :: "r" (BIT(counter))); isb(); /* Clear the overflow flag in case an interrupt is pending. */ asm volatile("msr pmovsclr_el0, %0" :: "r" (BIT(counter))); isb(); + return idx; } @@ -955,10 +359,13 @@ static inline u32 armv8pmu_getreset_flags(void) return value; } -static void armv8pmu_enable_event(struct hw_perf_event *hwc, int idx) +static void armv8pmu_enable_event(struct perf_event *event) { unsigned long flags; - struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + struct hw_perf_event *hwc = &event->hw; + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); + int idx = hwc->idx; /* * Enable counter and interrupt, and set the counter to count @@ -989,10 +396,13 @@ static void armv8pmu_enable_event(struct hw_perf_event *hwc, int idx) raw_spin_unlock_irqrestore(&events->pmu_lock, flags); } -static void armv8pmu_disable_event(struct hw_perf_event *hwc, int idx) +static void armv8pmu_disable_event(struct perf_event *event) { unsigned long flags; - struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + struct hw_perf_event *hwc = &event->hw; + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); + int idx = hwc->idx; /* * Disable counter and interrupt @@ -1016,7 +426,8 @@ static irqreturn_t armv8pmu_handle_irq(int irq_num, void *dev) { u32 pmovsr; struct perf_sample_data data; - struct pmu_hw_events *cpuc; + struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev; + struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events); struct pt_regs *regs; int idx; @@ -1036,7 +447,6 @@ static irqreturn_t armv8pmu_handle_irq(int irq_num, void *dev) */ regs = get_irq_regs(); - cpuc = this_cpu_ptr(&cpu_hw_events); for (idx = 0; idx < cpu_pmu->num_events; ++idx) { struct perf_event *event = cpuc->events[idx]; struct hw_perf_event *hwc; @@ -1053,13 +463,13 @@ static irqreturn_t armv8pmu_handle_irq(int irq_num, void *dev) continue; hwc = &event->hw; - armpmu_event_update(event, hwc, idx); + armpmu_event_update(event); perf_sample_data_init(&data, 0, hwc->last_period); - if (!armpmu_event_set_period(event, hwc, idx)) + if (!armpmu_event_set_period(event)) continue; if (perf_event_overflow(event, &data, regs)) - cpu_pmu->disable(hwc, idx); + cpu_pmu->disable(event); } /* @@ -1074,10 +484,10 @@ static irqreturn_t armv8pmu_handle_irq(int irq_num, void *dev) return IRQ_HANDLED; } -static void armv8pmu_start(void) +static void armv8pmu_start(struct arm_pmu *cpu_pmu) { unsigned long flags; - struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); raw_spin_lock_irqsave(&events->pmu_lock, flags); /* Enable all counters */ @@ -1085,10 +495,10 @@ static void armv8pmu_start(void) raw_spin_unlock_irqrestore(&events->pmu_lock, flags); } -static void armv8pmu_stop(void) +static void armv8pmu_stop(struct arm_pmu *cpu_pmu) { unsigned long flags; - struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); raw_spin_lock_irqsave(&events->pmu_lock, flags); /* Disable all counters */ @@ -1097,10 +507,12 @@ static void armv8pmu_stop(void) } static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc, - struct hw_perf_event *event) + struct perf_event *event) { int idx; - unsigned long evtype = event->config_base & ARMV8_EVTYPE_EVENT; + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + unsigned long evtype = hwc->config_base & ARMV8_EVTYPE_EVENT; /* Always place a cycle counter into the cycle counter. */ if (evtype == ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES) { @@ -1151,11 +563,14 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event, static void armv8pmu_reset(void *info) { + struct arm_pmu *cpu_pmu = (struct arm_pmu *)info; u32 idx, nb_cnt = cpu_pmu->num_events; /* The counter and interrupt enable registers are unknown at reset. */ - for (idx = ARMV8_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx) - armv8pmu_disable_event(NULL, idx); + for (idx = ARMV8_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx) { + armv8pmu_disable_counter(idx); + armv8pmu_disable_intens(idx); + } /* Initialize & Reset PMNC: C and P bits. */ armv8pmu_pmcr_write(ARMV8_PMCR_P | ARMV8_PMCR_C); @@ -1166,169 +581,104 @@ static void armv8pmu_reset(void *info) static int armv8_pmuv3_map_event(struct perf_event *event) { - return map_cpu_event(event, &armv8_pmuv3_perf_map, + return armpmu_map_event(event, &armv8_pmuv3_perf_map, &armv8_pmuv3_perf_cache_map, ARMV8_EVTYPE_EVENT); } -static struct arm_pmu armv8pmu = { - .handle_irq = armv8pmu_handle_irq, - .enable = armv8pmu_enable_event, - .disable = armv8pmu_disable_event, - .read_counter = armv8pmu_read_counter, - .write_counter = armv8pmu_write_counter, - .get_event_idx = armv8pmu_get_event_idx, - .start = armv8pmu_start, - .stop = armv8pmu_stop, - .reset = armv8pmu_reset, - .max_period = (1LLU << 32) - 1, -}; +static int armv8_a53_map_event(struct perf_event *event) +{ + return armpmu_map_event(event, &armv8_a53_perf_map, + &armv8_a53_perf_cache_map, + ARMV8_EVTYPE_EVENT); +} -static u32 __init armv8pmu_read_num_pmnc_events(void) +static int armv8_a57_map_event(struct perf_event *event) { - u32 nb_cnt; + return armpmu_map_event(event, &armv8_a57_perf_map, + &armv8_a57_perf_cache_map, + ARMV8_EVTYPE_EVENT); +} + +static void armv8pmu_read_num_pmnc_events(void *info) +{ + int *nb_cnt = info; /* Read the nb of CNTx counters supported from PMNC */ - nb_cnt = (armv8pmu_pmcr_read() >> ARMV8_PMCR_N_SHIFT) & ARMV8_PMCR_N_MASK; + *nb_cnt = (armv8pmu_pmcr_read() >> ARMV8_PMCR_N_SHIFT) & ARMV8_PMCR_N_MASK; - /* Add the CPU cycles counter and return */ - return nb_cnt + 1; + /* Add the CPU cycles counter */ + *nb_cnt += 1; } -static struct arm_pmu *__init armv8_pmuv3_pmu_init(void) +static int armv8pmu_probe_num_events(struct arm_pmu *arm_pmu) { - armv8pmu.name = "arm/armv8-pmuv3"; - armv8pmu.map_event = armv8_pmuv3_map_event; - armv8pmu.num_events = armv8pmu_read_num_pmnc_events(); - armv8pmu.set_event_filter = armv8pmu_set_event_filter; - return &armv8pmu; + return smp_call_function_any(&arm_pmu->supported_cpus, + armv8pmu_read_num_pmnc_events, + &arm_pmu->num_events, 1); } -/* - * Ensure the PMU has sane values out of reset. - * This requires SMP to be available, so exists as a separate initcall. - */ -static int __init -cpu_pmu_reset(void) +static void armv8_pmu_init(struct arm_pmu *cpu_pmu) { - if (cpu_pmu && cpu_pmu->reset) - return on_each_cpu(cpu_pmu->reset, NULL, 1); - return 0; + cpu_pmu->handle_irq = armv8pmu_handle_irq, + cpu_pmu->enable = armv8pmu_enable_event, + cpu_pmu->disable = armv8pmu_disable_event, + cpu_pmu->read_counter = armv8pmu_read_counter, + cpu_pmu->write_counter = armv8pmu_write_counter, + cpu_pmu->get_event_idx = armv8pmu_get_event_idx, + cpu_pmu->start = armv8pmu_start, + cpu_pmu->stop = armv8pmu_stop, + cpu_pmu->reset = armv8pmu_reset, + cpu_pmu->max_period = (1LLU << 32) - 1, + cpu_pmu->set_event_filter = armv8pmu_set_event_filter; } -arch_initcall(cpu_pmu_reset); - -/* - * PMU platform driver and devicetree bindings. - */ -static const struct of_device_id armpmu_of_device_ids[] = { - {.compatible = "arm,armv8-pmuv3"}, - {}, -}; -static int armpmu_device_probe(struct platform_device *pdev) +static int armv8_pmuv3_init(struct arm_pmu *cpu_pmu) { - int i, irq, *irqs; - - if (!cpu_pmu) - return -ENODEV; - - /* Don't bother with PPIs; they're already affine */ - irq = platform_get_irq(pdev, 0); - if (irq >= 0 && irq_is_percpu(irq)) - goto out; - - irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL); - if (!irqs) - return -ENOMEM; - - for (i = 0; i < pdev->num_resources; ++i) { - struct device_node *dn; - int cpu; - - dn = of_parse_phandle(pdev->dev.of_node, "interrupt-affinity", - i); - if (!dn) { - pr_warn("Failed to parse %s/interrupt-affinity[%d]\n", - of_node_full_name(pdev->dev.of_node), i); - break; - } - - for_each_possible_cpu(cpu) - if (dn == of_cpu_device_node_get(cpu)) - break; - - if (cpu >= nr_cpu_ids) { - pr_warn("Failed to find logical CPU for %s\n", - dn->name); - of_node_put(dn); - break; - } - of_node_put(dn); - - irqs[i] = cpu; - } - - if (i == pdev->num_resources) - cpu_pmu->irq_affinity = irqs; - else - kfree(irqs); - -out: - cpu_pmu->plat_device = pdev; - return 0; + armv8_pmu_init(cpu_pmu); + cpu_pmu->name = "armv8_pmuv3"; + cpu_pmu->map_event = armv8_pmuv3_map_event; + return armv8pmu_probe_num_events(cpu_pmu); } -static struct platform_driver armpmu_driver = { - .driver = { - .name = "arm-pmu", - .of_match_table = armpmu_of_device_ids, - }, - .probe = armpmu_device_probe, -}; - -static int __init register_pmu_driver(void) +static int armv8_a53_pmu_init(struct arm_pmu *cpu_pmu) { - return platform_driver_register(&armpmu_driver); + armv8_pmu_init(cpu_pmu); + cpu_pmu->name = "armv8_cortex_a53"; + cpu_pmu->map_event = armv8_a53_map_event; + return armv8pmu_probe_num_events(cpu_pmu); } -device_initcall(register_pmu_driver); -static struct pmu_hw_events *armpmu_get_cpu_events(void) +static int armv8_a57_pmu_init(struct arm_pmu *cpu_pmu) { - return this_cpu_ptr(&cpu_hw_events); + armv8_pmu_init(cpu_pmu); + cpu_pmu->name = "armv8_cortex_a57"; + cpu_pmu->map_event = armv8_a57_map_event; + return armv8pmu_probe_num_events(cpu_pmu); } -static void __init cpu_pmu_init(struct arm_pmu *armpmu) -{ - int cpu; - for_each_possible_cpu(cpu) { - struct pmu_hw_events *events = &per_cpu(cpu_hw_events, cpu); - events->events = per_cpu(hw_events, cpu); - events->used_mask = per_cpu(used_mask, cpu); - raw_spin_lock_init(&events->pmu_lock); - } - armpmu->get_hw_events = armpmu_get_cpu_events; -} +static const struct of_device_id armv8_pmu_of_device_ids[] = { + {.compatible = "arm,armv8-pmuv3", .data = armv8_pmuv3_init}, + {.compatible = "arm,cortex-a53-pmu", .data = armv8_a53_pmu_init}, + {.compatible = "arm,cortex-a57-pmu", .data = armv8_a57_pmu_init}, + {}, +}; -static int __init init_hw_perf_events(void) +static int armv8_pmu_device_probe(struct platform_device *pdev) { - u64 dfr = read_cpuid(ID_AA64DFR0_EL1); - - switch ((dfr >> 8) & 0xf) { - case 0x1: /* PMUv3 */ - cpu_pmu = armv8_pmuv3_pmu_init(); - break; - } + return arm_pmu_device_probe(pdev, armv8_pmu_of_device_ids, NULL); +} - if (cpu_pmu) { - pr_info("enabled with %s PMU driver, %d counters available\n", - cpu_pmu->name, cpu_pmu->num_events); - cpu_pmu_init(cpu_pmu); - armpmu_register(cpu_pmu, "cpu", PERF_TYPE_RAW); - } else { - pr_info("no hardware support available\n"); - } +static struct platform_driver armv8_pmu_driver = { + .driver = { + .name = "armv8-pmu", + .of_match_table = armv8_pmu_of_device_ids, + }, + .probe = armv8_pmu_device_probe, +}; - return 0; +static int __init register_armv8_pmu_driver(void) +{ + return platform_driver_register(&armv8_pmu_driver); } -early_initcall(init_hw_perf_events); - +device_initcall(register_armv8_pmu_driver); diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 223b093c9440..f75b540bc3b4 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -44,6 +44,7 @@ #include <linux/hw_breakpoint.h> #include <linux/personality.h> #include <linux/notifier.h> +#include <trace/events/power.h> #include <asm/compat.h> #include <asm/cacheflush.h> @@ -75,8 +76,10 @@ void arch_cpu_idle(void) * This should do all the clock switching and wait for interrupt * tricks */ + trace_cpu_idle_rcuidle(1, smp_processor_id()); cpu_do_idle(); local_irq_enable(); + trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); } #ifdef CONFIG_HOTPLUG_CPU diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 6bab21f84a9f..8119479147db 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -28,7 +28,6 @@ #include <linux/console.h> #include <linux/cache.h> #include <linux/bootmem.h> -#include <linux/seq_file.h> #include <linux/screen_info.h> #include <linux/init.h> #include <linux/kexec.h> @@ -44,7 +43,6 @@ #include <linux/of_fdt.h> #include <linux/of_platform.h> #include <linux/efi.h> -#include <linux/personality.h> #include <linux/psci.h> #include <asm/acpi.h> @@ -54,6 +52,7 @@ #include <asm/elf.h> #include <asm/cpufeature.h> #include <asm/cpu_ops.h> +#include <asm/kasan.h> #include <asm/sections.h> #include <asm/setup.h> #include <asm/smp_plat.h> @@ -64,23 +63,6 @@ #include <asm/efi.h> #include <asm/xen/hypervisor.h> -unsigned long elf_hwcap __read_mostly; -EXPORT_SYMBOL_GPL(elf_hwcap); - -#ifdef CONFIG_COMPAT -#define COMPAT_ELF_HWCAP_DEFAULT \ - (COMPAT_HWCAP_HALF|COMPAT_HWCAP_THUMB|\ - COMPAT_HWCAP_FAST_MULT|COMPAT_HWCAP_EDSP|\ - COMPAT_HWCAP_TLS|COMPAT_HWCAP_VFP|\ - COMPAT_HWCAP_VFPv3|COMPAT_HWCAP_VFPv4|\ - COMPAT_HWCAP_NEON|COMPAT_HWCAP_IDIV|\ - COMPAT_HWCAP_LPAE) -unsigned int compat_elf_hwcap __read_mostly = COMPAT_ELF_HWCAP_DEFAULT; -unsigned int compat_elf_hwcap2 __read_mostly; -#endif - -DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS); - phys_addr_t __fdt_pointer __initdata; /* @@ -195,104 +177,6 @@ static void __init smp_build_mpidr_hash(void) __flush_dcache_area(&mpidr_hash, sizeof(struct mpidr_hash)); } -static void __init setup_processor(void) -{ - u64 features; - s64 block; - u32 cwg; - int cls; - - printk("CPU: AArch64 Processor [%08x] revision %d\n", - read_cpuid_id(), read_cpuid_id() & 15); - - sprintf(init_utsname()->machine, ELF_PLATFORM); - elf_hwcap = 0; - - cpuinfo_store_boot_cpu(); - - /* - * Check for sane CTR_EL0.CWG value. - */ - cwg = cache_type_cwg(); - cls = cache_line_size(); - if (!cwg) - pr_warn("No Cache Writeback Granule information, assuming cache line size %d\n", - cls); - if (L1_CACHE_BYTES < cls) - pr_warn("L1_CACHE_BYTES smaller than the Cache Writeback Granule (%d < %d)\n", - L1_CACHE_BYTES, cls); - - /* - * ID_AA64ISAR0_EL1 contains 4-bit wide signed feature blocks. - * The blocks we test below represent incremental functionality - * for non-negative values. Negative values are reserved. - */ - features = read_cpuid(ID_AA64ISAR0_EL1); - block = cpuid_feature_extract_field(features, 4); - if (block > 0) { - switch (block) { - default: - case 2: - elf_hwcap |= HWCAP_PMULL; - case 1: - elf_hwcap |= HWCAP_AES; - case 0: - break; - } - } - - if (cpuid_feature_extract_field(features, 8) > 0) - elf_hwcap |= HWCAP_SHA1; - - if (cpuid_feature_extract_field(features, 12) > 0) - elf_hwcap |= HWCAP_SHA2; - - if (cpuid_feature_extract_field(features, 16) > 0) - elf_hwcap |= HWCAP_CRC32; - - block = cpuid_feature_extract_field(features, 20); - if (block > 0) { - switch (block) { - default: - case 2: - elf_hwcap |= HWCAP_ATOMICS; - case 1: - /* RESERVED */ - case 0: - break; - } - } - -#ifdef CONFIG_COMPAT - /* - * ID_ISAR5_EL1 carries similar information as above, but pertaining to - * the AArch32 32-bit execution state. - */ - features = read_cpuid(ID_ISAR5_EL1); - block = cpuid_feature_extract_field(features, 4); - if (block > 0) { - switch (block) { - default: - case 2: - compat_elf_hwcap2 |= COMPAT_HWCAP2_PMULL; - case 1: - compat_elf_hwcap2 |= COMPAT_HWCAP2_AES; - case 0: - break; - } - } - - if (cpuid_feature_extract_field(features, 8) > 0) - compat_elf_hwcap2 |= COMPAT_HWCAP2_SHA1; - - if (cpuid_feature_extract_field(features, 12) > 0) - compat_elf_hwcap2 |= COMPAT_HWCAP2_SHA2; - - if (cpuid_feature_extract_field(features, 16) > 0) - compat_elf_hwcap2 |= COMPAT_HWCAP2_CRC32; -#endif -} - static void __init setup_machine_fdt(phys_addr_t dt_phys) { void *dt_virt = fixmap_remap_fdt(dt_phys); @@ -364,6 +248,8 @@ static void __init relocate_initrd(void) to_free = ram_end - orig_start; size = orig_end - orig_start; + if (!size) + return; /* initrd needs to be relocated completely inside linear mapping */ new_start = memblock_find_in_range(0, PFN_PHYS(max_pfn), @@ -404,8 +290,9 @@ u64 __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID_HWID }; void __init setup_arch(char **cmdline_p) { - setup_processor(); + pr_info("Boot CPU: AArch64 Processor [%08x]\n", read_cpuid_id()); + sprintf(init_utsname()->machine, ELF_PLATFORM); init_mm.start_code = (unsigned long) _text; init_mm.end_code = (unsigned long) _etext; init_mm.end_data = (unsigned long) _edata; @@ -434,6 +321,9 @@ void __init setup_arch(char **cmdline_p) paging_init(); relocate_initrd(); + + kasan_init(); + request_standard_resources(); early_ioremap_reset(); @@ -491,124 +381,3 @@ static int __init topology_init(void) return 0; } subsys_initcall(topology_init); - -static const char *hwcap_str[] = { - "fp", - "asimd", - "evtstrm", - "aes", - "pmull", - "sha1", - "sha2", - "crc32", - "atomics", - NULL -}; - -#ifdef CONFIG_COMPAT -static const char *compat_hwcap_str[] = { - "swp", - "half", - "thumb", - "26bit", - "fastmult", - "fpa", - "vfp", - "edsp", - "java", - "iwmmxt", - "crunch", - "thumbee", - "neon", - "vfpv3", - "vfpv3d16", - "tls", - "vfpv4", - "idiva", - "idivt", - "vfpd32", - "lpae", - "evtstrm" -}; - -static const char *compat_hwcap2_str[] = { - "aes", - "pmull", - "sha1", - "sha2", - "crc32", - NULL -}; -#endif /* CONFIG_COMPAT */ - -static int c_show(struct seq_file *m, void *v) -{ - int i, j; - - for_each_online_cpu(i) { - struct cpuinfo_arm64 *cpuinfo = &per_cpu(cpu_data, i); - u32 midr = cpuinfo->reg_midr; - - /* - * glibc reads /proc/cpuinfo to determine the number of - * online processors, looking for lines beginning with - * "processor". Give glibc what it expects. - */ - seq_printf(m, "processor\t: %d\n", i); - - /* - * Dump out the common processor features in a single line. - * Userspace should read the hwcaps with getauxval(AT_HWCAP) - * rather than attempting to parse this, but there's a body of - * software which does already (at least for 32-bit). - */ - seq_puts(m, "Features\t:"); - if (personality(current->personality) == PER_LINUX32) { -#ifdef CONFIG_COMPAT - for (j = 0; compat_hwcap_str[j]; j++) - if (compat_elf_hwcap & (1 << j)) - seq_printf(m, " %s", compat_hwcap_str[j]); - - for (j = 0; compat_hwcap2_str[j]; j++) - if (compat_elf_hwcap2 & (1 << j)) - seq_printf(m, " %s", compat_hwcap2_str[j]); -#endif /* CONFIG_COMPAT */ - } else { - for (j = 0; hwcap_str[j]; j++) - if (elf_hwcap & (1 << j)) - seq_printf(m, " %s", hwcap_str[j]); - } - seq_puts(m, "\n"); - - seq_printf(m, "CPU implementer\t: 0x%02x\n", - MIDR_IMPLEMENTOR(midr)); - seq_printf(m, "CPU architecture: 8\n"); - seq_printf(m, "CPU variant\t: 0x%x\n", MIDR_VARIANT(midr)); - seq_printf(m, "CPU part\t: 0x%03x\n", MIDR_PARTNUM(midr)); - seq_printf(m, "CPU revision\t: %d\n\n", MIDR_REVISION(midr)); - } - - return 0; -} - -static void *c_start(struct seq_file *m, loff_t *pos) -{ - return *pos < 1 ? (void *)1 : NULL; -} - -static void *c_next(struct seq_file *m, void *v, loff_t *pos) -{ - ++*pos; - return NULL; -} - -static void c_stop(struct seq_file *m, void *v) -{ -} - -const struct seq_operations cpuinfo_op = { - .start = c_start, - .next = c_next, - .stop = c_stop, - .show = c_show -}; diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index dbdaacddd9a5..2bbdc0e4fd14 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -142,22 +142,27 @@ asmlinkage void secondary_start_kernel(void) */ atomic_inc(&mm->mm_count); current->active_mm = mm; - cpumask_set_cpu(cpu, mm_cpumask(mm)); set_my_cpu_offset(per_cpu_offset(smp_processor_id())); - printk("CPU%u: Booted secondary processor\n", cpu); /* * TTBR0 is only used for the identity mapping at this stage. Make it * point to zero page to avoid speculatively fetching new entries. */ cpu_set_reserved_ttbr0(); - flush_tlb_all(); + local_flush_tlb_all(); cpu_set_default_tcr_t0sz(); preempt_disable(); trace_hardirqs_off(); + /* + * If the system has established the capabilities, make sure + * this CPU ticks all of those. If it doesn't, the CPU will + * fail to come online. + */ + verify_local_cpu_capabilities(); + if (cpu_ops[cpu]->cpu_postboot) cpu_ops[cpu]->cpu_postboot(); @@ -178,6 +183,8 @@ asmlinkage void secondary_start_kernel(void) * the CPU migration code to notice that the CPU is online * before we continue. */ + pr_info("CPU%u: Booted secondary processor [%08x]\n", + cpu, read_cpuid_id()); set_cpu_online(cpu, true); complete(&cpu_running); @@ -232,12 +239,7 @@ int __cpu_disable(void) /* * OK - migrate IRQs away from this CPU */ - migrate_irqs(); - - /* - * Remove this CPU from the vm mask set of all processes. - */ - clear_tasks_mm_cpumask(cpu); + irq_migrate_all_off_this_cpu(); return 0; } @@ -325,12 +327,14 @@ static void __init hyp_mode_check(void) void __init smp_cpus_done(unsigned int max_cpus) { pr_info("SMP: Total of %d processors activated.\n", num_online_cpus()); + setup_cpu_features(); hyp_mode_check(); apply_alternatives_all(); } void __init smp_prepare_boot_cpu(void) { + cpuinfo_store_boot_cpu(); set_my_cpu_offset(per_cpu_offset(smp_processor_id())); } diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index 407991bf79f5..ccb6078ed9f2 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -48,11 +48,7 @@ int notrace unwind_frame(struct stackframe *frame) frame->sp = fp + 0x10; frame->fp = *(unsigned long *)(fp); - /* - * -4 here because we care about the PC at time of bl, - * not where the return will go. - */ - frame->pc = *(unsigned long *)(fp + 8) - 4; + frame->pc = *(unsigned long *)(fp + 8); return 0; } diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index 8297d502217e..40f7b33a22da 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -80,17 +80,21 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) if (ret == 0) { /* * We are resuming from reset with TTBR0_EL1 set to the - * idmap to enable the MMU; restore the active_mm mappings in - * TTBR0_EL1 unless the active_mm == &init_mm, in which case - * the thread entered cpu_suspend with TTBR0_EL1 set to - * reserved TTBR0 page tables and should be restored as such. + * idmap to enable the MMU; set the TTBR0 to the reserved + * page tables to prevent speculative TLB allocations, flush + * the local tlb and set the default tcr_el1.t0sz so that + * the TTBR0 address space set-up is properly restored. + * If the current active_mm != &init_mm we entered cpu_suspend + * with mappings in TTBR0 that must be restored, so we switch + * them back to complete the address space configuration + * restoration before returning. */ - if (mm == &init_mm) - cpu_set_reserved_ttbr0(); - else - cpu_switch_mm(mm->pgd, mm); + cpu_set_reserved_ttbr0(); + local_flush_tlb_all(); + cpu_set_default_tcr_t0sz(); - flush_tlb_all(); + if (mm != &init_mm) + cpu_switch_mm(mm->pgd, mm); /* * Restore per-cpu offset before any kernel diff --git a/arch/arm64/kernel/time.c b/arch/arm64/kernel/time.c index 149151fb42bb..13339b6ffc1a 100644 --- a/arch/arm64/kernel/time.c +++ b/arch/arm64/kernel/time.c @@ -67,16 +67,10 @@ void __init time_init(void) u32 arch_timer_rate; of_clk_init(NULL); - clocksource_of_init(); + clocksource_probe(); tick_setup_hrtimer_broadcast(); - /* - * Since ACPI or FDT will only one be available in the system, - * we can use acpi_generic_timer_init() here safely - */ - acpi_generic_timer_init(); - arch_timer_rate = arch_timer_get_rate(); if (!arch_timer_rate) panic("Unable to initialise architected timer.\n"); diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index f93aae5e4307..e9b9b5364393 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -103,12 +103,12 @@ static void dump_mem(const char *lvl, const char *str, unsigned long bottom, set_fs(fs); } -static void dump_backtrace_entry(unsigned long where, unsigned long stack) +static void dump_backtrace_entry(unsigned long where) { + /* + * Note that 'where' can have a physical address, but it's not handled. + */ print_ip_sym(where); - if (in_exception_text(where)) - dump_mem("", "Exception stack", stack, - stack + sizeof(struct pt_regs), false); } static void dump_instr(const char *lvl, struct pt_regs *regs) @@ -172,12 +172,17 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) pr_emerg("Call trace:\n"); while (1) { unsigned long where = frame.pc; + unsigned long stack; int ret; + dump_backtrace_entry(where); ret = unwind_frame(&frame); if (ret < 0) break; - dump_backtrace_entry(where, frame.sp); + stack = frame.sp; + if (in_exception_text(where)) + dump_mem("", "Exception stack", stack, + stack + sizeof(struct pt_regs), false); } } diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 98073332e2d0..1ee2c3937d4e 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -5,6 +5,7 @@ */ #include <asm-generic/vmlinux.lds.h> +#include <asm/kernel-pgtable.h> #include <asm/thread_info.h> #include <asm/memory.h> #include <asm/page.h> @@ -60,9 +61,12 @@ PECOFF_FILE_ALIGNMENT = 0x200; #define PECOFF_EDATA_PADDING #endif -#ifdef CONFIG_DEBUG_ALIGN_RODATA +#if defined(CONFIG_DEBUG_ALIGN_RODATA) #define ALIGN_DEBUG_RO . = ALIGN(1<<SECTION_SHIFT); #define ALIGN_DEBUG_RO_MIN(min) ALIGN_DEBUG_RO +#elif defined(CONFIG_DEBUG_RODATA) +#define ALIGN_DEBUG_RO . = ALIGN(1<<PAGE_SHIFT); +#define ALIGN_DEBUG_RO_MIN(min) ALIGN_DEBUG_RO #else #define ALIGN_DEBUG_RO #define ALIGN_DEBUG_RO_MIN(min) . = ALIGN(min); diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index 5c7e920e4861..a5272c07d1cb 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -16,9 +16,13 @@ menuconfig VIRTUALIZATION if VIRTUALIZATION +config KVM_ARM_VGIC_V3 + bool + config KVM bool "Kernel-based Virtual Machine (KVM) support" depends on OF + depends on !ARM64_16K_PAGES select MMU_NOTIFIER select PREEMPT_NOTIFIERS select ANON_INODES @@ -31,8 +35,11 @@ config KVM select KVM_VFIO select HAVE_KVM_EVENTFD select HAVE_KVM_IRQFD + select KVM_ARM_VGIC_V3 ---help--- Support hosting virtualized guest machines. + We don't support KVM with 16K page tables yet, due to the multiple + levels of fake page tables. If unsure, say N. @@ -41,4 +48,6 @@ config KVM_ARM_HOST ---help--- Provides host support for ARM processors. +source drivers/vhost/Kconfig + endif # VIRTUALIZATION diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index e5836138ec42..1599701ef044 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -880,6 +880,14 @@ __kvm_hyp_panic: bl __restore_sysregs + /* + * Make sure we have a valid host stack, and don't leave junk in the + * frame pointer that will give us a misleading host stack unwinding. + */ + ldr x22, [x2, #CPU_GP_REG_OFFSET(CPU_SP_EL1)] + msr sp_el1, x22 + mov x29, xzr + 1: adr x0, __hyp_panic_str adr x1, 2f ldp x2, x3, [x1] diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 91cf5350b328..f34745cb3d23 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -53,7 +53,7 @@ static bool cpu_has_32bit_el1(void) { u64 pfr0; - pfr0 = read_cpuid(ID_AA64PFR0_EL1); + pfr0 = read_system_reg(SYS_ID_AA64PFR0_EL1); return !!(pfr0 & 0x20); } diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index d03d3af17e7e..87a64e8db04c 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -693,13 +693,13 @@ static bool trap_dbgidr(struct kvm_vcpu *vcpu, if (p->is_write) { return ignore_write(vcpu, p); } else { - u64 dfr = read_cpuid(ID_AA64DFR0_EL1); - u64 pfr = read_cpuid(ID_AA64PFR0_EL1); - u32 el3 = !!((pfr >> 12) & 0xf); + u64 dfr = read_system_reg(SYS_ID_AA64DFR0_EL1); + u64 pfr = read_system_reg(SYS_ID_AA64PFR0_EL1); + u32 el3 = !!cpuid_feature_extract_field(pfr, ID_AA64PFR0_EL3_SHIFT); - *vcpu_reg(vcpu, p->Rt) = ((((dfr >> 20) & 0xf) << 28) | - (((dfr >> 12) & 0xf) << 24) | - (((dfr >> 28) & 0xf) << 20) | + *vcpu_reg(vcpu, p->Rt) = ((((dfr >> ID_AA64DFR0_WRPS_SHIFT) & 0xf) << 28) | + (((dfr >> ID_AA64DFR0_BRPS_SHIFT) & 0xf) << 24) | + (((dfr >> ID_AA64DFR0_CTX_CMPS_SHIFT) & 0xf) << 20) | (6 << 16) | (el3 << 14) | (el3 << 12)); return true; } diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S index 1be9ef27be97..4699cd74f87e 100644 --- a/arch/arm64/lib/copy_from_user.S +++ b/arch/arm64/lib/copy_from_user.S @@ -18,6 +18,7 @@ #include <asm/alternative.h> #include <asm/assembler.h> +#include <asm/cache.h> #include <asm/cpufeature.h> #include <asm/sysreg.h> @@ -31,49 +32,58 @@ * Returns: * x0 - bytes not copied */ + + .macro ldrb1 ptr, regB, val + USER(9998f, ldrb \ptr, [\regB], \val) + .endm + + .macro strb1 ptr, regB, val + strb \ptr, [\regB], \val + .endm + + .macro ldrh1 ptr, regB, val + USER(9998f, ldrh \ptr, [\regB], \val) + .endm + + .macro strh1 ptr, regB, val + strh \ptr, [\regB], \val + .endm + + .macro ldr1 ptr, regB, val + USER(9998f, ldr \ptr, [\regB], \val) + .endm + + .macro str1 ptr, regB, val + str \ptr, [\regB], \val + .endm + + .macro ldp1 ptr, regB, regC, val + USER(9998f, ldp \ptr, \regB, [\regC], \val) + .endm + + .macro stp1 ptr, regB, regC, val + stp \ptr, \regB, [\regC], \val + .endm + +end .req x5 ENTRY(__copy_from_user) ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \ CONFIG_ARM64_PAN) - add x5, x1, x2 // upper user buffer boundary - subs x2, x2, #16 - b.mi 1f -0: -USER(9f, ldp x3, x4, [x1], #16) - subs x2, x2, #16 - stp x3, x4, [x0], #16 - b.pl 0b -1: adds x2, x2, #8 - b.mi 2f -USER(9f, ldr x3, [x1], #8 ) - sub x2, x2, #8 - str x3, [x0], #8 -2: adds x2, x2, #4 - b.mi 3f -USER(9f, ldr w3, [x1], #4 ) - sub x2, x2, #4 - str w3, [x0], #4 -3: adds x2, x2, #2 - b.mi 4f -USER(9f, ldrh w3, [x1], #2 ) - sub x2, x2, #2 - strh w3, [x0], #2 -4: adds x2, x2, #1 - b.mi 5f -USER(9f, ldrb w3, [x1] ) - strb w3, [x0] -5: mov x0, #0 + add end, x0, x2 +#include "copy_template.S" ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \ CONFIG_ARM64_PAN) + mov x0, #0 // Nothing to copy ret ENDPROC(__copy_from_user) .section .fixup,"ax" .align 2 -9: sub x2, x5, x1 - mov x3, x2 -10: strb wzr, [x0], #1 // zero remaining buffer space - subs x3, x3, #1 - b.ne 10b - mov x0, x2 // bytes not copied +9998: + sub x0, end, dst +9999: + strb wzr, [dst], #1 // zero remaining buffer space + cmp dst, end + b.lo 9999b ret .previous diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S index 1b94661e22b3..81c8fc93c100 100644 --- a/arch/arm64/lib/copy_in_user.S +++ b/arch/arm64/lib/copy_in_user.S @@ -20,6 +20,7 @@ #include <asm/alternative.h> #include <asm/assembler.h> +#include <asm/cache.h> #include <asm/cpufeature.h> #include <asm/sysreg.h> @@ -33,44 +34,52 @@ * Returns: * x0 - bytes not copied */ + .macro ldrb1 ptr, regB, val + USER(9998f, ldrb \ptr, [\regB], \val) + .endm + + .macro strb1 ptr, regB, val + USER(9998f, strb \ptr, [\regB], \val) + .endm + + .macro ldrh1 ptr, regB, val + USER(9998f, ldrh \ptr, [\regB], \val) + .endm + + .macro strh1 ptr, regB, val + USER(9998f, strh \ptr, [\regB], \val) + .endm + + .macro ldr1 ptr, regB, val + USER(9998f, ldr \ptr, [\regB], \val) + .endm + + .macro str1 ptr, regB, val + USER(9998f, str \ptr, [\regB], \val) + .endm + + .macro ldp1 ptr, regB, regC, val + USER(9998f, ldp \ptr, \regB, [\regC], \val) + .endm + + .macro stp1 ptr, regB, regC, val + USER(9998f, stp \ptr, \regB, [\regC], \val) + .endm + +end .req x5 ENTRY(__copy_in_user) ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \ CONFIG_ARM64_PAN) - add x5, x0, x2 // upper user buffer boundary - subs x2, x2, #16 - b.mi 1f -0: -USER(9f, ldp x3, x4, [x1], #16) - subs x2, x2, #16 -USER(9f, stp x3, x4, [x0], #16) - b.pl 0b -1: adds x2, x2, #8 - b.mi 2f -USER(9f, ldr x3, [x1], #8 ) - sub x2, x2, #8 -USER(9f, str x3, [x0], #8 ) -2: adds x2, x2, #4 - b.mi 3f -USER(9f, ldr w3, [x1], #4 ) - sub x2, x2, #4 -USER(9f, str w3, [x0], #4 ) -3: adds x2, x2, #2 - b.mi 4f -USER(9f, ldrh w3, [x1], #2 ) - sub x2, x2, #2 -USER(9f, strh w3, [x0], #2 ) -4: adds x2, x2, #1 - b.mi 5f -USER(9f, ldrb w3, [x1] ) -USER(9f, strb w3, [x0] ) -5: mov x0, #0 + add end, x0, x2 +#include "copy_template.S" ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \ CONFIG_ARM64_PAN) + mov x0, #0 ret ENDPROC(__copy_in_user) .section .fixup,"ax" .align 2 -9: sub x0, x5, x0 // bytes not copied +9998: sub x0, end, dst // bytes not copied ret .previous diff --git a/arch/arm64/lib/copy_template.S b/arch/arm64/lib/copy_template.S new file mode 100644 index 000000000000..410fbdb8163f --- /dev/null +++ b/arch/arm64/lib/copy_template.S @@ -0,0 +1,193 @@ +/* + * Copyright (C) 2013 ARM Ltd. + * Copyright (C) 2013 Linaro. + * + * This code is based on glibc cortex strings work originally authored by Linaro + * and re-licensed under GPLv2 for the Linux kernel. The original code can + * be found @ + * + * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/ + * files/head:/src/aarch64/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + + +/* + * Copy a buffer from src to dest (alignment handled by the hardware) + * + * Parameters: + * x0 - dest + * x1 - src + * x2 - n + * Returns: + * x0 - dest + */ +dstin .req x0 +src .req x1 +count .req x2 +tmp1 .req x3 +tmp1w .req w3 +tmp2 .req x4 +tmp2w .req w4 +dst .req x6 + +A_l .req x7 +A_h .req x8 +B_l .req x9 +B_h .req x10 +C_l .req x11 +C_h .req x12 +D_l .req x13 +D_h .req x14 + + mov dst, dstin + cmp count, #16 + /*When memory length is less than 16, the accessed are not aligned.*/ + b.lo .Ltiny15 + + neg tmp2, src + ands tmp2, tmp2, #15/* Bytes to reach alignment. */ + b.eq .LSrcAligned + sub count, count, tmp2 + /* + * Copy the leading memory data from src to dst in an increasing + * address order.By this way,the risk of overwritting the source + * memory data is eliminated when the distance between src and + * dst is less than 16. The memory accesses here are alignment. + */ + tbz tmp2, #0, 1f + ldrb1 tmp1w, src, #1 + strb1 tmp1w, dst, #1 +1: + tbz tmp2, #1, 2f + ldrh1 tmp1w, src, #2 + strh1 tmp1w, dst, #2 +2: + tbz tmp2, #2, 3f + ldr1 tmp1w, src, #4 + str1 tmp1w, dst, #4 +3: + tbz tmp2, #3, .LSrcAligned + ldr1 tmp1, src, #8 + str1 tmp1, dst, #8 + +.LSrcAligned: + cmp count, #64 + b.ge .Lcpy_over64 + /* + * Deal with small copies quickly by dropping straight into the + * exit block. + */ +.Ltail63: + /* + * Copy up to 48 bytes of data. At this point we only need the + * bottom 6 bits of count to be accurate. + */ + ands tmp1, count, #0x30 + b.eq .Ltiny15 + cmp tmp1w, #0x20 + b.eq 1f + b.lt 2f + ldp1 A_l, A_h, src, #16 + stp1 A_l, A_h, dst, #16 +1: + ldp1 A_l, A_h, src, #16 + stp1 A_l, A_h, dst, #16 +2: + ldp1 A_l, A_h, src, #16 + stp1 A_l, A_h, dst, #16 +.Ltiny15: + /* + * Prefer to break one ldp/stp into several load/store to access + * memory in an increasing address order,rather than to load/store 16 + * bytes from (src-16) to (dst-16) and to backward the src to aligned + * address,which way is used in original cortex memcpy. If keeping + * the original memcpy process here, memmove need to satisfy the + * precondition that src address is at least 16 bytes bigger than dst + * address,otherwise some source data will be overwritten when memove + * call memcpy directly. To make memmove simpler and decouple the + * memcpy's dependency on memmove, withdrew the original process. + */ + tbz count, #3, 1f + ldr1 tmp1, src, #8 + str1 tmp1, dst, #8 +1: + tbz count, #2, 2f + ldr1 tmp1w, src, #4 + str1 tmp1w, dst, #4 +2: + tbz count, #1, 3f + ldrh1 tmp1w, src, #2 + strh1 tmp1w, dst, #2 +3: + tbz count, #0, .Lexitfunc + ldrb1 tmp1w, src, #1 + strb1 tmp1w, dst, #1 + + b .Lexitfunc + +.Lcpy_over64: + subs count, count, #128 + b.ge .Lcpy_body_large + /* + * Less than 128 bytes to copy, so handle 64 here and then jump + * to the tail. + */ + ldp1 A_l, A_h, src, #16 + stp1 A_l, A_h, dst, #16 + ldp1 B_l, B_h, src, #16 + ldp1 C_l, C_h, src, #16 + stp1 B_l, B_h, dst, #16 + stp1 C_l, C_h, dst, #16 + ldp1 D_l, D_h, src, #16 + stp1 D_l, D_h, dst, #16 + + tst count, #0x3f + b.ne .Ltail63 + b .Lexitfunc + + /* + * Critical loop. Start at a new cache line boundary. Assuming + * 64 bytes per line this ensures the entire loop is in one line. + */ + .p2align L1_CACHE_SHIFT +.Lcpy_body_large: + /* pre-get 64 bytes data. */ + ldp1 A_l, A_h, src, #16 + ldp1 B_l, B_h, src, #16 + ldp1 C_l, C_h, src, #16 + ldp1 D_l, D_h, src, #16 +1: + /* + * interlace the load of next 64 bytes data block with store of the last + * loaded 64 bytes data. + */ + stp1 A_l, A_h, dst, #16 + ldp1 A_l, A_h, src, #16 + stp1 B_l, B_h, dst, #16 + ldp1 B_l, B_h, src, #16 + stp1 C_l, C_h, dst, #16 + ldp1 C_l, C_h, src, #16 + stp1 D_l, D_h, dst, #16 + ldp1 D_l, D_h, src, #16 + subs count, count, #64 + b.ge 1b + stp1 A_l, A_h, dst, #16 + stp1 B_l, B_h, dst, #16 + stp1 C_l, C_h, dst, #16 + stp1 D_l, D_h, dst, #16 + + tst count, #0x3f + b.ne .Ltail63 +.Lexitfunc: diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S index a257b47e2dc4..7512bbbc07ac 100644 --- a/arch/arm64/lib/copy_to_user.S +++ b/arch/arm64/lib/copy_to_user.S @@ -18,6 +18,7 @@ #include <asm/alternative.h> #include <asm/assembler.h> +#include <asm/cache.h> #include <asm/cpufeature.h> #include <asm/sysreg.h> @@ -31,44 +32,52 @@ * Returns: * x0 - bytes not copied */ + .macro ldrb1 ptr, regB, val + ldrb \ptr, [\regB], \val + .endm + + .macro strb1 ptr, regB, val + USER(9998f, strb \ptr, [\regB], \val) + .endm + + .macro ldrh1 ptr, regB, val + ldrh \ptr, [\regB], \val + .endm + + .macro strh1 ptr, regB, val + USER(9998f, strh \ptr, [\regB], \val) + .endm + + .macro ldr1 ptr, regB, val + ldr \ptr, [\regB], \val + .endm + + .macro str1 ptr, regB, val + USER(9998f, str \ptr, [\regB], \val) + .endm + + .macro ldp1 ptr, regB, regC, val + ldp \ptr, \regB, [\regC], \val + .endm + + .macro stp1 ptr, regB, regC, val + USER(9998f, stp \ptr, \regB, [\regC], \val) + .endm + +end .req x5 ENTRY(__copy_to_user) ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \ CONFIG_ARM64_PAN) - add x5, x0, x2 // upper user buffer boundary - subs x2, x2, #16 - b.mi 1f -0: - ldp x3, x4, [x1], #16 - subs x2, x2, #16 -USER(9f, stp x3, x4, [x0], #16) - b.pl 0b -1: adds x2, x2, #8 - b.mi 2f - ldr x3, [x1], #8 - sub x2, x2, #8 -USER(9f, str x3, [x0], #8 ) -2: adds x2, x2, #4 - b.mi 3f - ldr w3, [x1], #4 - sub x2, x2, #4 -USER(9f, str w3, [x0], #4 ) -3: adds x2, x2, #2 - b.mi 4f - ldrh w3, [x1], #2 - sub x2, x2, #2 -USER(9f, strh w3, [x0], #2 ) -4: adds x2, x2, #1 - b.mi 5f - ldrb w3, [x1] -USER(9f, strb w3, [x0] ) -5: mov x0, #0 + add end, x0, x2 +#include "copy_template.S" ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \ CONFIG_ARM64_PAN) + mov x0, #0 ret ENDPROC(__copy_to_user) .section .fixup,"ax" .align 2 -9: sub x0, x5, x0 // bytes not copied +9998: sub x0, end, dst // bytes not copied ret .previous diff --git a/arch/arm64/lib/memchr.S b/arch/arm64/lib/memchr.S index 8636b7549163..4444c1d25f4b 100644 --- a/arch/arm64/lib/memchr.S +++ b/arch/arm64/lib/memchr.S @@ -41,4 +41,4 @@ ENTRY(memchr) ret 2: mov x0, #0 ret -ENDPROC(memchr) +ENDPIPROC(memchr) diff --git a/arch/arm64/lib/memcmp.S b/arch/arm64/lib/memcmp.S index 6ea0776ba6de..ffbdec00327d 100644 --- a/arch/arm64/lib/memcmp.S +++ b/arch/arm64/lib/memcmp.S @@ -255,4 +255,4 @@ CPU_LE( rev data2, data2 ) .Lret0: mov result, #0 ret -ENDPROC(memcmp) +ENDPIPROC(memcmp) diff --git a/arch/arm64/lib/memcpy.S b/arch/arm64/lib/memcpy.S index 8a9a96d3ddae..67613937711f 100644 --- a/arch/arm64/lib/memcpy.S +++ b/arch/arm64/lib/memcpy.S @@ -36,166 +36,42 @@ * Returns: * x0 - dest */ -dstin .req x0 -src .req x1 -count .req x2 -tmp1 .req x3 -tmp1w .req w3 -tmp2 .req x4 -tmp2w .req w4 -tmp3 .req x5 -tmp3w .req w5 -dst .req x6 + .macro ldrb1 ptr, regB, val + ldrb \ptr, [\regB], \val + .endm -A_l .req x7 -A_h .req x8 -B_l .req x9 -B_h .req x10 -C_l .req x11 -C_h .req x12 -D_l .req x13 -D_h .req x14 + .macro strb1 ptr, regB, val + strb \ptr, [\regB], \val + .endm -ENTRY(memcpy) - mov dst, dstin - cmp count, #16 - /*When memory length is less than 16, the accessed are not aligned.*/ - b.lo .Ltiny15 + .macro ldrh1 ptr, regB, val + ldrh \ptr, [\regB], \val + .endm - neg tmp2, src - ands tmp2, tmp2, #15/* Bytes to reach alignment. */ - b.eq .LSrcAligned - sub count, count, tmp2 - /* - * Copy the leading memory data from src to dst in an increasing - * address order.By this way,the risk of overwritting the source - * memory data is eliminated when the distance between src and - * dst is less than 16. The memory accesses here are alignment. - */ - tbz tmp2, #0, 1f - ldrb tmp1w, [src], #1 - strb tmp1w, [dst], #1 -1: - tbz tmp2, #1, 2f - ldrh tmp1w, [src], #2 - strh tmp1w, [dst], #2 -2: - tbz tmp2, #2, 3f - ldr tmp1w, [src], #4 - str tmp1w, [dst], #4 -3: - tbz tmp2, #3, .LSrcAligned - ldr tmp1, [src],#8 - str tmp1, [dst],#8 + .macro strh1 ptr, regB, val + strh \ptr, [\regB], \val + .endm -.LSrcAligned: - cmp count, #64 - b.ge .Lcpy_over64 - /* - * Deal with small copies quickly by dropping straight into the - * exit block. - */ -.Ltail63: - /* - * Copy up to 48 bytes of data. At this point we only need the - * bottom 6 bits of count to be accurate. - */ - ands tmp1, count, #0x30 - b.eq .Ltiny15 - cmp tmp1w, #0x20 - b.eq 1f - b.lt 2f - ldp A_l, A_h, [src], #16 - stp A_l, A_h, [dst], #16 -1: - ldp A_l, A_h, [src], #16 - stp A_l, A_h, [dst], #16 -2: - ldp A_l, A_h, [src], #16 - stp A_l, A_h, [dst], #16 -.Ltiny15: - /* - * Prefer to break one ldp/stp into several load/store to access - * memory in an increasing address order,rather than to load/store 16 - * bytes from (src-16) to (dst-16) and to backward the src to aligned - * address,which way is used in original cortex memcpy. If keeping - * the original memcpy process here, memmove need to satisfy the - * precondition that src address is at least 16 bytes bigger than dst - * address,otherwise some source data will be overwritten when memove - * call memcpy directly. To make memmove simpler and decouple the - * memcpy's dependency on memmove, withdrew the original process. - */ - tbz count, #3, 1f - ldr tmp1, [src], #8 - str tmp1, [dst], #8 -1: - tbz count, #2, 2f - ldr tmp1w, [src], #4 - str tmp1w, [dst], #4 -2: - tbz count, #1, 3f - ldrh tmp1w, [src], #2 - strh tmp1w, [dst], #2 -3: - tbz count, #0, .Lexitfunc - ldrb tmp1w, [src] - strb tmp1w, [dst] + .macro ldr1 ptr, regB, val + ldr \ptr, [\regB], \val + .endm -.Lexitfunc: - ret + .macro str1 ptr, regB, val + str \ptr, [\regB], \val + .endm -.Lcpy_over64: - subs count, count, #128 - b.ge .Lcpy_body_large - /* - * Less than 128 bytes to copy, so handle 64 here and then jump - * to the tail. - */ - ldp A_l, A_h, [src],#16 - stp A_l, A_h, [dst],#16 - ldp B_l, B_h, [src],#16 - ldp C_l, C_h, [src],#16 - stp B_l, B_h, [dst],#16 - stp C_l, C_h, [dst],#16 - ldp D_l, D_h, [src],#16 - stp D_l, D_h, [dst],#16 + .macro ldp1 ptr, regB, regC, val + ldp \ptr, \regB, [\regC], \val + .endm - tst count, #0x3f - b.ne .Ltail63 - ret + .macro stp1 ptr, regB, regC, val + stp \ptr, \regB, [\regC], \val + .endm - /* - * Critical loop. Start at a new cache line boundary. Assuming - * 64 bytes per line this ensures the entire loop is in one line. - */ - .p2align L1_CACHE_SHIFT -.Lcpy_body_large: - /* pre-get 64 bytes data. */ - ldp A_l, A_h, [src],#16 - ldp B_l, B_h, [src],#16 - ldp C_l, C_h, [src],#16 - ldp D_l, D_h, [src],#16 -1: - /* - * interlace the load of next 64 bytes data block with store of the last - * loaded 64 bytes data. - */ - stp A_l, A_h, [dst],#16 - ldp A_l, A_h, [src],#16 - stp B_l, B_h, [dst],#16 - ldp B_l, B_h, [src],#16 - stp C_l, C_h, [dst],#16 - ldp C_l, C_h, [src],#16 - stp D_l, D_h, [dst],#16 - ldp D_l, D_h, [src],#16 - subs count, count, #64 - b.ge 1b - stp A_l, A_h, [dst],#16 - stp B_l, B_h, [dst],#16 - stp C_l, C_h, [dst],#16 - stp D_l, D_h, [dst],#16 - - tst count, #0x3f - b.ne .Ltail63 + .weak memcpy +ENTRY(__memcpy) +ENTRY(memcpy) +#include "copy_template.S" ret -ENDPROC(memcpy) +ENDPIPROC(memcpy) +ENDPROC(__memcpy) diff --git a/arch/arm64/lib/memmove.S b/arch/arm64/lib/memmove.S index 57b19ea2dad4..a5a4459013b1 100644 --- a/arch/arm64/lib/memmove.S +++ b/arch/arm64/lib/memmove.S @@ -57,12 +57,14 @@ C_h .req x12 D_l .req x13 D_h .req x14 + .weak memmove +ENTRY(__memmove) ENTRY(memmove) cmp dstin, src - b.lo memcpy + b.lo __memcpy add tmp1, src, count cmp dstin, tmp1 - b.hs memcpy /* No overlap. */ + b.hs __memcpy /* No overlap. */ add dst, dstin, count add src, src, count @@ -194,4 +196,5 @@ ENTRY(memmove) tst count, #0x3f b.ne .Ltail63 ret -ENDPROC(memmove) +ENDPIPROC(memmove) +ENDPROC(__memmove) diff --git a/arch/arm64/lib/memset.S b/arch/arm64/lib/memset.S index 7c72dfd36b63..f2670a9f218c 100644 --- a/arch/arm64/lib/memset.S +++ b/arch/arm64/lib/memset.S @@ -54,6 +54,8 @@ dst .req x8 tmp3w .req w9 tmp3 .req x9 + .weak memset +ENTRY(__memset) ENTRY(memset) mov dst, dstin /* Preserve return value. */ and A_lw, val, #255 @@ -213,4 +215,5 @@ ENTRY(memset) ands count, count, zva_bits_x b.ne .Ltail_maybe_long ret -ENDPROC(memset) +ENDPIPROC(memset) +ENDPROC(__memset) diff --git a/arch/arm64/lib/strcmp.S b/arch/arm64/lib/strcmp.S index 42f828b06c59..471fe61760ef 100644 --- a/arch/arm64/lib/strcmp.S +++ b/arch/arm64/lib/strcmp.S @@ -231,4 +231,4 @@ CPU_BE( orr syndrome, diff, has_nul ) lsr data1, data1, #56 sub result, data1, data2, lsr #56 ret -ENDPROC(strcmp) +ENDPIPROC(strcmp) diff --git a/arch/arm64/lib/strlen.S b/arch/arm64/lib/strlen.S index 987b68b9ce44..55ccc8e24c08 100644 --- a/arch/arm64/lib/strlen.S +++ b/arch/arm64/lib/strlen.S @@ -123,4 +123,4 @@ CPU_LE( lsr tmp2, tmp2, tmp1 ) /* Shift (tmp1 & 63). */ csinv data1, data1, xzr, le csel data2, data2, data2a, le b .Lrealigned -ENDPROC(strlen) +ENDPIPROC(strlen) diff --git a/arch/arm64/lib/strncmp.S b/arch/arm64/lib/strncmp.S index 0224cf5a5533..e267044761c6 100644 --- a/arch/arm64/lib/strncmp.S +++ b/arch/arm64/lib/strncmp.S @@ -307,4 +307,4 @@ CPU_BE( orr syndrome, diff, has_nul ) .Lret0: mov result, #0 ret -ENDPROC(strncmp) +ENDPIPROC(strncmp) diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile index 773d37a14039..57f57fde5722 100644 --- a/arch/arm64/mm/Makefile +++ b/arch/arm64/mm/Makefile @@ -4,3 +4,6 @@ obj-y := dma-mapping.o extable.o fault.o init.o \ context.o proc.o pageattr.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_ARM64_PTDUMP) += dump.o + +obj-$(CONFIG_KASAN) += kasan_init.o +KASAN_SANITIZE_kasan_init.o := n diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index eb48d5df4a0f..cfa44a6adc0a 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -98,7 +98,7 @@ ENTRY(__flush_dcache_area) b.lo 1b dsb sy ret -ENDPROC(__flush_dcache_area) +ENDPIPROC(__flush_dcache_area) /* * __inval_cache_range(start, end) @@ -131,7 +131,7 @@ __dma_inv_range: b.lo 2b dsb sy ret -ENDPROC(__inval_cache_range) +ENDPIPROC(__inval_cache_range) ENDPROC(__dma_inv_range) /* @@ -171,7 +171,7 @@ ENTRY(__dma_flush_range) b.lo 1b dsb sy ret -ENDPROC(__dma_flush_range) +ENDPIPROC(__dma_flush_range) /* * __dma_map_area(start, size, dir) @@ -184,7 +184,7 @@ ENTRY(__dma_map_area) cmp w2, #DMA_FROM_DEVICE b.eq __dma_inv_range b __dma_clean_range -ENDPROC(__dma_map_area) +ENDPIPROC(__dma_map_area) /* * __dma_unmap_area(start, size, dir) @@ -197,4 +197,4 @@ ENTRY(__dma_unmap_area) cmp w2, #DMA_TO_DEVICE b.ne __dma_inv_range ret -ENDPROC(__dma_unmap_area) +ENDPIPROC(__dma_unmap_area) diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index d70ff14dbdbd..f636a2639f03 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -17,135 +17,185 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ -#include <linux/init.h> +#include <linux/bitops.h> #include <linux/sched.h> +#include <linux/slab.h> #include <linux/mm.h> -#include <linux/smp.h> -#include <linux/percpu.h> +#include <asm/cpufeature.h> #include <asm/mmu_context.h> #include <asm/tlbflush.h> -#include <asm/cachetype.h> -#define asid_bits(reg) \ - (((read_cpuid(ID_AA64MMFR0_EL1) & 0xf0) >> 2) + 8) +static u32 asid_bits; +static DEFINE_RAW_SPINLOCK(cpu_asid_lock); -#define ASID_FIRST_VERSION (1 << MAX_ASID_BITS) +static atomic64_t asid_generation; +static unsigned long *asid_map; -static DEFINE_RAW_SPINLOCK(cpu_asid_lock); -unsigned int cpu_last_asid = ASID_FIRST_VERSION; +static DEFINE_PER_CPU(atomic64_t, active_asids); +static DEFINE_PER_CPU(u64, reserved_asids); +static cpumask_t tlb_flush_pending; -/* - * We fork()ed a process, and we need a new context for the child to run in. - */ -void __init_new_context(struct task_struct *tsk, struct mm_struct *mm) -{ - mm->context.id = 0; - raw_spin_lock_init(&mm->context.id_lock); -} +#define ASID_MASK (~GENMASK(asid_bits - 1, 0)) +#define ASID_FIRST_VERSION (1UL << asid_bits) +#define NUM_USER_ASIDS ASID_FIRST_VERSION -static void flush_context(void) +static void flush_context(unsigned int cpu) { - /* set the reserved TTBR0 before flushing the TLB */ - cpu_set_reserved_ttbr0(); - flush_tlb_all(); - if (icache_is_aivivt()) - __flush_icache_all(); -} + int i; + u64 asid; -static void set_mm_context(struct mm_struct *mm, unsigned int asid) -{ - unsigned long flags; + /* Update the list of reserved ASIDs and the ASID bitmap. */ + bitmap_clear(asid_map, 0, NUM_USER_ASIDS); /* - * Locking needed for multi-threaded applications where the same - * mm->context.id could be set from different CPUs during the - * broadcast. This function is also called via IPI so the - * mm->context.id_lock has to be IRQ-safe. + * Ensure the generation bump is observed before we xchg the + * active_asids. */ - raw_spin_lock_irqsave(&mm->context.id_lock, flags); - if (likely((mm->context.id ^ cpu_last_asid) >> MAX_ASID_BITS)) { + smp_wmb(); + + for_each_possible_cpu(i) { + asid = atomic64_xchg_relaxed(&per_cpu(active_asids, i), 0); /* - * Old version of ASID found. Set the new one and reset - * mm_cpumask(mm). + * If this CPU has already been through a + * rollover, but hasn't run another task in + * the meantime, we must preserve its reserved + * ASID, as this is the only trace we have of + * the process it is still running. */ - mm->context.id = asid; - cpumask_clear(mm_cpumask(mm)); + if (asid == 0) + asid = per_cpu(reserved_asids, i); + __set_bit(asid & ~ASID_MASK, asid_map); + per_cpu(reserved_asids, i) = asid; } - raw_spin_unlock_irqrestore(&mm->context.id_lock, flags); - /* - * Set the mm_cpumask(mm) bit for the current CPU. - */ - cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm)); + /* Queue a TLB invalidate and flush the I-cache if necessary. */ + cpumask_setall(&tlb_flush_pending); + + if (icache_is_aivivt()) + __flush_icache_all(); } -/* - * Reset the ASID on the current CPU. This function call is broadcast from the - * CPU handling the ASID rollover and holding cpu_asid_lock. - */ -static void reset_context(void *info) +static int is_reserved_asid(u64 asid) +{ + int cpu; + for_each_possible_cpu(cpu) + if (per_cpu(reserved_asids, cpu) == asid) + return 1; + return 0; +} + +static u64 new_context(struct mm_struct *mm, unsigned int cpu) { - unsigned int asid; - unsigned int cpu = smp_processor_id(); - struct mm_struct *mm = current->active_mm; + static u32 cur_idx = 1; + u64 asid = atomic64_read(&mm->context.id); + u64 generation = atomic64_read(&asid_generation); + + if (asid != 0) { + /* + * If our current ASID was active during a rollover, we + * can continue to use it and this was just a false alarm. + */ + if (is_reserved_asid(asid)) + return generation | (asid & ~ASID_MASK); + + /* + * We had a valid ASID in a previous life, so try to re-use + * it if possible. + */ + asid &= ~ASID_MASK; + if (!__test_and_set_bit(asid, asid_map)) + goto bump_gen; + } /* - * current->active_mm could be init_mm for the idle thread immediately - * after secondary CPU boot or hotplug. TTBR0_EL1 is already set to - * the reserved value, so no need to reset any context. + * Allocate a free ASID. If we can't find one, take a note of the + * currently active ASIDs and mark the TLBs as requiring flushes. + * We always count from ASID #1, as we use ASID #0 when setting a + * reserved TTBR0 for the init_mm. */ - if (mm == &init_mm) - return; + asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, cur_idx); + if (asid != NUM_USER_ASIDS) + goto set_asid; - smp_rmb(); - asid = cpu_last_asid + cpu; + /* We're out of ASIDs, so increment the global generation count */ + generation = atomic64_add_return_relaxed(ASID_FIRST_VERSION, + &asid_generation); + flush_context(cpu); - flush_context(); - set_mm_context(mm, asid); + /* We have at least 1 ASID per CPU, so this will always succeed */ + asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, 1); - /* set the new ASID */ - cpu_switch_mm(mm->pgd, mm); +set_asid: + __set_bit(asid, asid_map); + cur_idx = asid; + +bump_gen: + asid |= generation; + return asid; } -void __new_context(struct mm_struct *mm) +void check_and_switch_context(struct mm_struct *mm, unsigned int cpu) { - unsigned int asid; - unsigned int bits = asid_bits(); + unsigned long flags; + u64 asid; + + asid = atomic64_read(&mm->context.id); - raw_spin_lock(&cpu_asid_lock); /* - * Check the ASID again, in case the change was broadcast from another - * CPU before we acquired the lock. + * The memory ordering here is subtle. We rely on the control + * dependency between the generation read and the update of + * active_asids to ensure that we are synchronised with a + * parallel rollover (i.e. this pairs with the smp_wmb() in + * flush_context). */ - if (!unlikely((mm->context.id ^ cpu_last_asid) >> MAX_ASID_BITS)) { - cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm)); - raw_spin_unlock(&cpu_asid_lock); - return; + if (!((asid ^ atomic64_read(&asid_generation)) >> asid_bits) + && atomic64_xchg_relaxed(&per_cpu(active_asids, cpu), asid)) + goto switch_mm_fastpath; + + raw_spin_lock_irqsave(&cpu_asid_lock, flags); + /* Check that our ASID belongs to the current generation. */ + asid = atomic64_read(&mm->context.id); + if ((asid ^ atomic64_read(&asid_generation)) >> asid_bits) { + asid = new_context(mm, cpu); + atomic64_set(&mm->context.id, asid); } - /* - * At this point, it is guaranteed that the current mm (with an old - * ASID) isn't active on any other CPU since the ASIDs are changed - * simultaneously via IPI. - */ - asid = ++cpu_last_asid; - /* - * If we've used up all our ASIDs, we need to start a new version and - * flush the TLB. - */ - if (unlikely((asid & ((1 << bits) - 1)) == 0)) { - /* increment the ASID version */ - cpu_last_asid += (1 << MAX_ASID_BITS) - (1 << bits); - if (cpu_last_asid == 0) - cpu_last_asid = ASID_FIRST_VERSION; - asid = cpu_last_asid + smp_processor_id(); - flush_context(); - smp_wmb(); - smp_call_function(reset_context, NULL, 1); - cpu_last_asid += NR_CPUS - 1; + if (cpumask_test_and_clear_cpu(cpu, &tlb_flush_pending)) + local_flush_tlb_all(); + + atomic64_set(&per_cpu(active_asids, cpu), asid); + raw_spin_unlock_irqrestore(&cpu_asid_lock, flags); + +switch_mm_fastpath: + cpu_switch_mm(mm->pgd, mm); +} + +static int asids_init(void) +{ + int fld = cpuid_feature_extract_field(read_cpuid(ID_AA64MMFR0_EL1), 4); + + switch (fld) { + default: + pr_warn("Unknown ASID size (%d); assuming 8-bit\n", fld); + /* Fallthrough */ + case 0: + asid_bits = 8; + break; + case 2: + asid_bits = 16; } - set_mm_context(mm, asid); - raw_spin_unlock(&cpu_asid_lock); + /* If we end up with more CPUs than ASIDs, expect things to crash */ + WARN_ON(NUM_USER_ASIDS < num_possible_cpus()); + atomic64_set(&asid_generation, ASID_FIRST_VERSION); + asid_map = kzalloc(BITS_TO_LONGS(NUM_USER_ASIDS) * sizeof(*asid_map), + GFP_KERNEL); + if (!asid_map) + panic("Failed to allocate bitmap for %lu ASIDs\n", + NUM_USER_ASIDS); + + pr_info("ASID allocator initialised with %lu entries\n", NUM_USER_ASIDS); + return 0; } +early_initcall(asids_init); diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 99224dcebdc5..6320361d8d4c 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -533,3 +533,460 @@ static int __init dma_debug_do_init(void) return 0; } fs_initcall(dma_debug_do_init); + + +#ifdef CONFIG_IOMMU_DMA +#include <linux/dma-iommu.h> +#include <linux/platform_device.h> +#include <linux/amba/bus.h> + +/* Thankfully, all cache ops are by VA so we can ignore phys here */ +static void flush_page(struct device *dev, const void *virt, phys_addr_t phys) +{ + __dma_flush_range(virt, virt + PAGE_SIZE); +} + +static void *__iommu_alloc_attrs(struct device *dev, size_t size, + dma_addr_t *handle, gfp_t gfp, + struct dma_attrs *attrs) +{ + bool coherent = is_device_dma_coherent(dev); + int ioprot = dma_direction_to_prot(DMA_BIDIRECTIONAL, coherent); + void *addr; + + if (WARN(!dev, "cannot create IOMMU mapping for unknown device\n")) + return NULL; + /* + * Some drivers rely on this, and we probably don't want the + * possibility of stale kernel data being read by devices anyway. + */ + gfp |= __GFP_ZERO; + + if (gfp & __GFP_WAIT) { + struct page **pages; + pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, coherent); + + pages = iommu_dma_alloc(dev, size, gfp, ioprot, handle, + flush_page); + if (!pages) + return NULL; + + addr = dma_common_pages_remap(pages, size, VM_USERMAP, prot, + __builtin_return_address(0)); + if (!addr) + iommu_dma_free(dev, pages, size, handle); + } else { + struct page *page; + /* + * In atomic context we can't remap anything, so we'll only + * get the virtually contiguous buffer we need by way of a + * physically contiguous allocation. + */ + if (coherent) { + page = alloc_pages(gfp, get_order(size)); + addr = page ? page_address(page) : NULL; + } else { + addr = __alloc_from_pool(size, &page, gfp); + } + if (!addr) + return NULL; + + *handle = iommu_dma_map_page(dev, page, 0, size, ioprot); + if (iommu_dma_mapping_error(dev, *handle)) { + if (coherent) + __free_pages(page, get_order(size)); + else + __free_from_pool(addr, size); + addr = NULL; + } + } + return addr; +} + +static void __iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t handle, struct dma_attrs *attrs) +{ + /* + * @cpu_addr will be one of 3 things depending on how it was allocated: + * - A remapped array of pages from iommu_dma_alloc(), for all + * non-atomic allocations. + * - A non-cacheable alias from the atomic pool, for atomic + * allocations by non-coherent devices. + * - A normal lowmem address, for atomic allocations by + * coherent devices. + * Hence how dodgy the below logic looks... + */ + if (__in_atomic_pool(cpu_addr, size)) { + iommu_dma_unmap_page(dev, handle, size, 0, NULL); + __free_from_pool(cpu_addr, size); + } else if (is_vmalloc_addr(cpu_addr)){ + struct vm_struct *area = find_vm_area(cpu_addr); + + if (WARN_ON(!area || !area->pages)) + return; + iommu_dma_free(dev, area->pages, size, &handle); + dma_common_free_remap(cpu_addr, size, VM_USERMAP); + } else { + iommu_dma_unmap_page(dev, handle, size, 0, NULL); + __free_pages(virt_to_page(cpu_addr), get_order(size)); + } +} + +static int __iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + struct dma_attrs *attrs) +{ + struct vm_struct *area; + int ret; + + vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot, + is_device_dma_coherent(dev)); + + if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret)) + return ret; + + area = find_vm_area(cpu_addr); + if (WARN_ON(!area || !area->pages)) + return -ENXIO; + + return iommu_dma_mmap(area->pages, size, vma); +} + +static int __iommu_get_sgtable(struct device *dev, struct sg_table *sgt, + void *cpu_addr, dma_addr_t dma_addr, + size_t size, struct dma_attrs *attrs) +{ + unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; + struct vm_struct *area = find_vm_area(cpu_addr); + + if (WARN_ON(!area || !area->pages)) + return -ENXIO; + + return sg_alloc_table_from_pages(sgt, area->pages, count, 0, size, + GFP_KERNEL); +} + +static void __iommu_sync_single_for_cpu(struct device *dev, + dma_addr_t dev_addr, size_t size, + enum dma_data_direction dir) +{ + phys_addr_t phys; + + if (is_device_dma_coherent(dev)) + return; + + phys = iommu_iova_to_phys(iommu_get_domain_for_dev(dev), dev_addr); + __dma_unmap_area(phys_to_virt(phys), size, dir); +} + +static void __iommu_sync_single_for_device(struct device *dev, + dma_addr_t dev_addr, size_t size, + enum dma_data_direction dir) +{ + phys_addr_t phys; + + if (is_device_dma_coherent(dev)) + return; + + phys = iommu_iova_to_phys(iommu_get_domain_for_dev(dev), dev_addr); + __dma_map_area(phys_to_virt(phys), size, dir); +} + +static dma_addr_t __iommu_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + bool coherent = is_device_dma_coherent(dev); + int prot = dma_direction_to_prot(dir, coherent); + dma_addr_t dev_addr = iommu_dma_map_page(dev, page, offset, size, prot); + + if (!iommu_dma_mapping_error(dev, dev_addr) && + !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) + __iommu_sync_single_for_device(dev, dev_addr, size, dir); + + return dev_addr; +} + +static void __iommu_unmap_page(struct device *dev, dma_addr_t dev_addr, + size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) + __iommu_sync_single_for_cpu(dev, dev_addr, size, dir); + + iommu_dma_unmap_page(dev, dev_addr, size, dir, attrs); +} + +static void __iommu_sync_sg_for_cpu(struct device *dev, + struct scatterlist *sgl, int nelems, + enum dma_data_direction dir) +{ + struct scatterlist *sg; + int i; + + if (is_device_dma_coherent(dev)) + return; + + for_each_sg(sgl, sg, nelems, i) + __dma_unmap_area(sg_virt(sg), sg->length, dir); +} + +static void __iommu_sync_sg_for_device(struct device *dev, + struct scatterlist *sgl, int nelems, + enum dma_data_direction dir) +{ + struct scatterlist *sg; + int i; + + if (is_device_dma_coherent(dev)) + return; + + for_each_sg(sgl, sg, nelems, i) + __dma_map_area(sg_virt(sg), sg->length, dir); +} + +static int __iommu_map_sg_attrs(struct device *dev, struct scatterlist *sgl, + int nelems, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + bool coherent = is_device_dma_coherent(dev); + + if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) + __iommu_sync_sg_for_device(dev, sgl, nelems, dir); + + return iommu_dma_map_sg(dev, sgl, nelems, + dma_direction_to_prot(dir, coherent)); +} + +static void __iommu_unmap_sg_attrs(struct device *dev, + struct scatterlist *sgl, int nelems, + enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) + __iommu_sync_sg_for_cpu(dev, sgl, nelems, dir); + + iommu_dma_unmap_sg(dev, sgl, nelems, dir, attrs); +} + +static struct dma_map_ops iommu_dma_ops = { + .alloc = __iommu_alloc_attrs, + .free = __iommu_free_attrs, + .mmap = __iommu_mmap_attrs, + .get_sgtable = __iommu_get_sgtable, + .map_page = __iommu_map_page, + .unmap_page = __iommu_unmap_page, + .map_sg = __iommu_map_sg_attrs, + .unmap_sg = __iommu_unmap_sg_attrs, + .sync_single_for_cpu = __iommu_sync_single_for_cpu, + .sync_single_for_device = __iommu_sync_single_for_device, + .sync_sg_for_cpu = __iommu_sync_sg_for_cpu, + .sync_sg_for_device = __iommu_sync_sg_for_device, + .dma_supported = iommu_dma_supported, + .mapping_error = iommu_dma_mapping_error, +}; + +/* + * TODO: Right now __iommu_setup_dma_ops() gets called too early to do + * everything it needs to - the device is only partially created and the + * IOMMU driver hasn't seen it yet, so it can't have a group. Thus we + * need this delayed attachment dance. Once IOMMU probe ordering is sorted + * to move the arch_setup_dma_ops() call later, all the notifier bits below + * become unnecessary, and will go away. + */ +struct iommu_dma_notifier_data { + struct list_head list; + struct device *dev; + const struct iommu_ops *ops; + u64 dma_base; + u64 size; +}; +static LIST_HEAD(iommu_dma_masters); +static DEFINE_MUTEX(iommu_dma_notifier_lock); + +/* + * Temporarily "borrow" a domain feature flag to to tell if we had to resort + * to creating our own domain here, in case we need to clean it up again. + */ +#define __IOMMU_DOMAIN_FAKE_DEFAULT (1U << 31) + +static bool do_iommu_attach(struct device *dev, const struct iommu_ops *ops, + u64 dma_base, u64 size) +{ + struct iommu_domain *domain = iommu_get_domain_for_dev(dev); + + /* + * Best case: The device is either part of a group which was + * already attached to a domain in a previous call, or it's + * been put in a default DMA domain by the IOMMU core. + */ + if (!domain) { + /* + * Urgh. The IOMMU core isn't going to do default domains + * for non-PCI devices anyway, until it has some means of + * abstracting the entirely implementation-specific + * sideband data/SoC topology/unicorn dust that may or + * may not differentiate upstream masters. + * So until then, HORRIBLE HACKS! + */ + domain = ops->domain_alloc(IOMMU_DOMAIN_DMA); + if (!domain) + goto out_no_domain; + + domain->ops = ops; + domain->type = IOMMU_DOMAIN_DMA | __IOMMU_DOMAIN_FAKE_DEFAULT; + + if (iommu_attach_device(domain, dev)) + goto out_put_domain; + } + + if (iommu_dma_init_domain(domain, dma_base, size)) + goto out_detach; + + dev->archdata.dma_ops = &iommu_dma_ops; + return true; + +out_detach: + iommu_detach_device(domain, dev); +out_put_domain: + if (domain->type & __IOMMU_DOMAIN_FAKE_DEFAULT) + iommu_domain_free(domain); +out_no_domain: + pr_warn("Failed to set up IOMMU for device %s; retaining platform DMA ops\n", + dev_name(dev)); + return false; +} + +static void queue_iommu_attach(struct device *dev, const struct iommu_ops *ops, + u64 dma_base, u64 size) +{ + struct iommu_dma_notifier_data *iommudata; + + iommudata = kzalloc(sizeof(*iommudata), GFP_KERNEL); + if (!iommudata) + return; + + iommudata->dev = dev; + iommudata->ops = ops; + iommudata->dma_base = dma_base; + iommudata->size = size; + + mutex_lock(&iommu_dma_notifier_lock); + list_add(&iommudata->list, &iommu_dma_masters); + mutex_unlock(&iommu_dma_notifier_lock); +} + +static int __iommu_attach_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct iommu_dma_notifier_data *master, *tmp; + + if (action != BUS_NOTIFY_ADD_DEVICE) + return 0; + + mutex_lock(&iommu_dma_notifier_lock); + list_for_each_entry_safe(master, tmp, &iommu_dma_masters, list) { + if (do_iommu_attach(master->dev, master->ops, + master->dma_base, master->size)) { + list_del(&master->list); + kfree(master); + } + } + mutex_unlock(&iommu_dma_notifier_lock); + return 0; +} + +static int register_iommu_dma_ops_notifier(struct bus_type *bus) +{ + struct notifier_block *nb = kzalloc(sizeof(*nb), GFP_KERNEL); + int ret; + + if (!nb) + return -ENOMEM; + /* + * The device must be attached to a domain before the driver probe + * routine gets a chance to start allocating DMA buffers. However, + * the IOMMU driver also needs a chance to configure the iommu_group + * via its add_device callback first, so we need to make the attach + * happen between those two points. Since the IOMMU core uses a bus + * notifier with default priority for add_device, do the same but + * with a lower priority to ensure the appropriate ordering. + */ + nb->notifier_call = __iommu_attach_notifier; + nb->priority = -100; + + ret = bus_register_notifier(bus, nb); + if (ret) { + pr_warn("Failed to register DMA domain notifier; IOMMU DMA ops unavailable on bus '%s'\n", + bus->name); + kfree(nb); + } + return ret; +} + +static int __init __iommu_dma_init(void) +{ + int ret; + + ret = iommu_dma_init(); + if (!ret) + ret = register_iommu_dma_ops_notifier(&platform_bus_type); + if (!ret) + ret = register_iommu_dma_ops_notifier(&amba_bustype); + return ret; +} +arch_initcall(__iommu_dma_init); + +static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, + const struct iommu_ops *ops) +{ + struct iommu_group *group; + + if (!ops) + return; + /* + * TODO: As a concession to the future, we're ready to handle being + * called both early and late (i.e. after bus_add_device). Once all + * the platform bus code is reworked to call us late and the notifier + * junk above goes away, move the body of do_iommu_attach here. + */ + group = iommu_group_get(dev); + if (group) { + do_iommu_attach(dev, ops, dma_base, size); + iommu_group_put(group); + } else { + queue_iommu_attach(dev, ops, dma_base, size); + } +} + +void arch_teardown_dma_ops(struct device *dev) +{ + struct iommu_domain *domain = iommu_get_domain_for_dev(dev); + + if (domain) { + iommu_detach_device(domain, dev); + if (domain->type & __IOMMU_DOMAIN_FAKE_DEFAULT) + iommu_domain_free(domain); + } + + dev->archdata.dma_ops = NULL; +} + +#else + +static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, + struct iommu_ops *iommu) +{ } + +#endif /* CONFIG_IOMMU_DMA */ + +void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, + struct iommu_ops *iommu, bool coherent) +{ + if (!acpi_disabled && !dev->archdata.dma_ops) + dev->archdata.dma_ops = dma_ops; + + dev->archdata.dma_coherent = coherent; + __iommu_setup_dma_ops(dev, dma_base, size, iommu); +} diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c index f3d6221cd5bd..5a22a119a74c 100644 --- a/arch/arm64/mm/dump.c +++ b/arch/arm64/mm/dump.c @@ -67,6 +67,12 @@ static struct addr_marker address_markers[] = { { -1, NULL }, }; +/* + * The page dumper groups page table entries of the same type into a single + * description. It uses pg_state to track the range information while + * iterating over the pte entries. When the continuity is broken it then + * dumps out a description of the range. + */ struct pg_state { struct seq_file *seq; const struct addr_marker *marker; @@ -114,6 +120,16 @@ static const struct prot_bits pte_bits[] = { .set = "NG", .clear = " ", }, { + .mask = PTE_CONT, + .val = PTE_CONT, + .set = "CON", + .clear = " ", + }, { + .mask = PTE_TABLE_BIT, + .val = PTE_TABLE_BIT, + .set = " ", + .clear = "BLK", + }, { .mask = PTE_UXN, .val = PTE_UXN, .set = "UXN", @@ -198,7 +214,7 @@ static void note_page(struct pg_state *st, unsigned long addr, unsigned level, unsigned long delta; if (st->current_prot) { - seq_printf(st->seq, "0x%16lx-0x%16lx ", + seq_printf(st->seq, "0x%016lx-0x%016lx ", st->start_address, addr); delta = (addr - st->start_address) >> 10; diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index aba9ead1384c..19211c4a8911 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -287,6 +287,7 @@ retry: * starvation. */ mm_flags &= ~FAULT_FLAG_ALLOW_RETRY; + mm_flags |= FAULT_FLAG_TRIED; goto retry; } } @@ -555,7 +556,7 @@ asmlinkage int __exception do_debug_exception(unsigned long addr, } #ifdef CONFIG_ARM64_PAN -void cpu_enable_pan(void) +void cpu_enable_pan(void *__unused) { config_sctlr_el1(SCTLR_EL1_SPAN, 0); } diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index f5c0680d17d9..17bf39ac83ba 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -86,10 +86,10 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) memset(zone_size, 0, sizeof(zone_size)); /* 4GB maximum for 32-bit only capable devices */ - if (IS_ENABLED(CONFIG_ZONE_DMA)) { - max_dma = PFN_DOWN(arm64_dma_phys_limit); - zone_size[ZONE_DMA] = max_dma - min; - } +#ifdef CONFIG_ZONE_DMA + max_dma = PFN_DOWN(arm64_dma_phys_limit); + zone_size[ZONE_DMA] = max_dma - min; +#endif zone_size[ZONE_NORMAL] = max - max_dma; memcpy(zhole_size, zone_size, sizeof(zhole_size)); @@ -101,11 +101,12 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) if (start >= max) continue; - if (IS_ENABLED(CONFIG_ZONE_DMA) && start < max_dma) { +#ifdef CONFIG_ZONE_DMA + if (start < max_dma) { unsigned long dma_end = min(end, max_dma); zhole_size[ZONE_DMA] -= dma_end - start; } - +#endif if (end > max_dma) { unsigned long normal_end = min(end, max); unsigned long normal_start = max(start, max_dma); @@ -298,6 +299,9 @@ void __init mem_init(void) #define MLK_ROUNDUP(b, t) b, t, DIV_ROUND_UP(((t) - (b)), SZ_1K) pr_notice("Virtual kernel memory layout:\n" +#ifdef CONFIG_KASAN + " kasan : 0x%16lx - 0x%16lx (%6ld GB)\n" +#endif " vmalloc : 0x%16lx - 0x%16lx (%6ld GB)\n" #ifdef CONFIG_SPARSEMEM_VMEMMAP " vmemmap : 0x%16lx - 0x%16lx (%6ld GB maximum)\n" @@ -310,6 +314,9 @@ void __init mem_init(void) " .init : 0x%p" " - 0x%p" " (%6ld KB)\n" " .text : 0x%p" " - 0x%p" " (%6ld KB)\n" " .data : 0x%p" " - 0x%p" " (%6ld KB)\n", +#ifdef CONFIG_KASAN + MLG(KASAN_SHADOW_START, KASAN_SHADOW_END), +#endif MLG(VMALLOC_START, VMALLOC_END), #ifdef CONFIG_SPARSEMEM_VMEMMAP MLG((unsigned long)vmemmap, diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c new file mode 100644 index 000000000000..cf038c7d9fa9 --- /dev/null +++ b/arch/arm64/mm/kasan_init.c @@ -0,0 +1,165 @@ +/* + * This file contains kasan initialization code for ARM64. + * + * Copyright (c) 2015 Samsung Electronics Co., Ltd. + * Author: Andrey Ryabinin <ryabinin.a.a@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#define pr_fmt(fmt) "kasan: " fmt +#include <linux/kasan.h> +#include <linux/kernel.h> +#include <linux/memblock.h> +#include <linux/start_kernel.h> + +#include <asm/page.h> +#include <asm/pgalloc.h> +#include <asm/pgtable.h> +#include <asm/tlbflush.h> + +static pgd_t tmp_pg_dir[PTRS_PER_PGD] __initdata __aligned(PGD_SIZE); + +static void __init kasan_early_pte_populate(pmd_t *pmd, unsigned long addr, + unsigned long end) +{ + pte_t *pte; + unsigned long next; + + if (pmd_none(*pmd)) + pmd_populate_kernel(&init_mm, pmd, kasan_zero_pte); + + pte = pte_offset_kernel(pmd, addr); + do { + next = addr + PAGE_SIZE; + set_pte(pte, pfn_pte(virt_to_pfn(kasan_zero_page), + PAGE_KERNEL)); + } while (pte++, addr = next, addr != end && pte_none(*pte)); +} + +static void __init kasan_early_pmd_populate(pud_t *pud, + unsigned long addr, + unsigned long end) +{ + pmd_t *pmd; + unsigned long next; + + if (pud_none(*pud)) + pud_populate(&init_mm, pud, kasan_zero_pmd); + + pmd = pmd_offset(pud, addr); + do { + next = pmd_addr_end(addr, end); + kasan_early_pte_populate(pmd, addr, next); + } while (pmd++, addr = next, addr != end && pmd_none(*pmd)); +} + +static void __init kasan_early_pud_populate(pgd_t *pgd, + unsigned long addr, + unsigned long end) +{ + pud_t *pud; + unsigned long next; + + if (pgd_none(*pgd)) + pgd_populate(&init_mm, pgd, kasan_zero_pud); + + pud = pud_offset(pgd, addr); + do { + next = pud_addr_end(addr, end); + kasan_early_pmd_populate(pud, addr, next); + } while (pud++, addr = next, addr != end && pud_none(*pud)); +} + +static void __init kasan_map_early_shadow(void) +{ + unsigned long addr = KASAN_SHADOW_START; + unsigned long end = KASAN_SHADOW_END; + unsigned long next; + pgd_t *pgd; + + pgd = pgd_offset_k(addr); + do { + next = pgd_addr_end(addr, end); + kasan_early_pud_populate(pgd, addr, next); + } while (pgd++, addr = next, addr != end); +} + +asmlinkage void __init kasan_early_init(void) +{ + BUILD_BUG_ON(KASAN_SHADOW_OFFSET != KASAN_SHADOW_END - (1UL << 61)); + BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, PGDIR_SIZE)); + BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PGDIR_SIZE)); + kasan_map_early_shadow(); +} + +static void __init clear_pgds(unsigned long start, + unsigned long end) +{ + /* + * Remove references to kasan page tables from + * swapper_pg_dir. pgd_clear() can't be used + * here because it's nop on 2,3-level pagetable setups + */ + for (; start < end; start += PGDIR_SIZE) + set_pgd(pgd_offset_k(start), __pgd(0)); +} + +static void __init cpu_set_ttbr1(unsigned long ttbr1) +{ + asm( + " msr ttbr1_el1, %0\n" + " isb" + : + : "r" (ttbr1)); +} + +void __init kasan_init(void) +{ + struct memblock_region *reg; + + /* + * We are going to perform proper setup of shadow memory. + * At first we should unmap early shadow (clear_pgds() call bellow). + * However, instrumented code couldn't execute without shadow memory. + * tmp_pg_dir used to keep early shadow mapped until full shadow + * setup will be finished. + */ + memcpy(tmp_pg_dir, swapper_pg_dir, sizeof(tmp_pg_dir)); + cpu_set_ttbr1(__pa(tmp_pg_dir)); + flush_tlb_all(); + + clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END); + + kasan_populate_zero_shadow((void *)KASAN_SHADOW_START, + kasan_mem_to_shadow((void *)MODULES_VADDR)); + + for_each_memblock(memory, reg) { + void *start = (void *)__phys_to_virt(reg->base); + void *end = (void *)__phys_to_virt(reg->base + reg->size); + + if (start >= end) + break; + + /* + * end + 1 here is intentional. We check several shadow bytes in + * advance to slightly speed up fastpath. In some rare cases + * we could cross boundary of mapped shadow, so we just map + * some more here. + */ + vmemmap_populate((unsigned long)kasan_mem_to_shadow(start), + (unsigned long)kasan_mem_to_shadow(end) + 1, + pfn_to_nid(virt_to_pfn(start))); + } + + memset(kasan_zero_page, 0, PAGE_SIZE); + cpu_set_ttbr1(__pa(swapper_pg_dir)); + flush_tlb_all(); + + /* At this point kasan is fully initialized. Enable error messages */ + init_task.kasan_depth = 0; + pr_info("KernelAddressSanitizer initialized\n"); +} diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 9211b8527f25..c2fa6b56613c 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -32,6 +32,7 @@ #include <asm/cputype.h> #include <asm/fixmap.h> +#include <asm/kernel-pgtable.h> #include <asm/sections.h> #include <asm/setup.h> #include <asm/sizes.h> @@ -80,19 +81,55 @@ static void split_pmd(pmd_t *pmd, pte_t *pte) do { /* * Need to have the least restrictive permissions available - * permissions will be fixed up later + * permissions will be fixed up later. Default the new page + * range as contiguous ptes. */ - set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC)); + set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC_CONT)); pfn++; } while (pte++, i++, i < PTRS_PER_PTE); } +/* + * Given a PTE with the CONT bit set, determine where the CONT range + * starts, and clear the entire range of PTE CONT bits. + */ +static void clear_cont_pte_range(pte_t *pte, unsigned long addr) +{ + int i; + + pte -= CONT_RANGE_OFFSET(addr); + for (i = 0; i < CONT_PTES; i++) { + set_pte(pte, pte_mknoncont(*pte)); + pte++; + } + flush_tlb_all(); +} + +/* + * Given a range of PTEs set the pfn and provided page protection flags + */ +static void __populate_init_pte(pte_t *pte, unsigned long addr, + unsigned long end, phys_addr_t phys, + pgprot_t prot) +{ + unsigned long pfn = __phys_to_pfn(phys); + + do { + /* clear all the bits except the pfn, then apply the prot */ + set_pte(pte, pfn_pte(pfn, prot)); + pte++; + pfn++; + addr += PAGE_SIZE; + } while (addr != end); +} + static void alloc_init_pte(pmd_t *pmd, unsigned long addr, - unsigned long end, unsigned long pfn, + unsigned long end, phys_addr_t phys, pgprot_t prot, void *(*alloc)(unsigned long size)) { pte_t *pte; + unsigned long next; if (pmd_none(*pmd) || pmd_sect(*pmd)) { pte = alloc(PTRS_PER_PTE * sizeof(pte_t)); @@ -105,9 +142,27 @@ static void alloc_init_pte(pmd_t *pmd, unsigned long addr, pte = pte_offset_kernel(pmd, addr); do { - set_pte(pte, pfn_pte(pfn, prot)); - pfn++; - } while (pte++, addr += PAGE_SIZE, addr != end); + next = min(end, (addr + CONT_SIZE) & CONT_MASK); + if (((addr | next | phys) & ~CONT_MASK) == 0) { + /* a block of CONT_PTES */ + __populate_init_pte(pte, addr, next, phys, + prot | __pgprot(PTE_CONT)); + } else { + /* + * If the range being split is already inside of a + * contiguous range but this PTE isn't going to be + * contiguous, then we want to unmark the adjacent + * ranges, then update the portion of the range we + * are interrested in. + */ + clear_cont_pte_range(pte, addr); + __populate_init_pte(pte, addr, next, phys, prot); + } + + pte += (next - addr) >> PAGE_SHIFT; + phys += next - addr; + addr = next; + } while (addr != end); } void split_pud(pud_t *old_pud, pmd_t *pmd) @@ -168,8 +223,7 @@ static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud, } } } else { - alloc_init_pte(pmd, addr, next, __phys_to_pfn(phys), - prot, alloc); + alloc_init_pte(pmd, addr, next, phys, prot, alloc); } phys += next - addr; } while (pmd++, addr = next, addr != end); @@ -353,14 +407,11 @@ static void __init map_mem(void) * memory addressable from the initial direct kernel mapping. * * The initial direct kernel mapping, located at swapper_pg_dir, gives - * us PUD_SIZE (4K pages) or PMD_SIZE (64K pages) memory starting from - * PHYS_OFFSET (which must be aligned to 2MB as per - * Documentation/arm64/booting.txt). + * us PUD_SIZE (with SECTION maps) or PMD_SIZE (without SECTION maps, + * memory starting from PHYS_OFFSET (which must be aligned to 2MB as + * per Documentation/arm64/booting.txt). */ - if (IS_ENABLED(CONFIG_ARM64_64K_PAGES)) - limit = PHYS_OFFSET + PMD_SIZE; - else - limit = PHYS_OFFSET + PUD_SIZE; + limit = PHYS_OFFSET + SWAPPER_INIT_MAP_SIZE; memblock_set_current_limit(limit); /* map all the memory banks */ @@ -371,21 +422,24 @@ static void __init map_mem(void) if (start >= end) break; -#ifndef CONFIG_ARM64_64K_PAGES - /* - * For the first memory bank align the start address and - * current memblock limit to prevent create_mapping() from - * allocating pte page tables from unmapped memory. - * When 64K pages are enabled, the pte page table for the - * first PGDIR_SIZE is already present in swapper_pg_dir. - */ - if (start < limit) - start = ALIGN(start, PMD_SIZE); - if (end < limit) { - limit = end & PMD_MASK; - memblock_set_current_limit(limit); + if (ARM64_SWAPPER_USES_SECTION_MAPS) { + /* + * For the first memory bank align the start address and + * current memblock limit to prevent create_mapping() from + * allocating pte page tables from unmapped memory. With + * the section maps, if the first block doesn't end on section + * size boundary, create_mapping() will try to allocate a pte + * page, which may be returned from an unmapped area. + * When section maps are not used, the pte page table for the + * current limit is already present in swapper_pg_dir. + */ + if (start < limit) + start = ALIGN(start, SECTION_SIZE); + if (end < limit) { + limit = end & SECTION_MASK; + memblock_set_current_limit(limit); + } } -#endif __map_memblock(start, end); } @@ -456,7 +510,7 @@ void __init paging_init(void) * point to zero page to avoid speculatively fetching new entries. */ cpu_set_reserved_ttbr0(); - flush_tlb_all(); + local_flush_tlb_all(); cpu_set_default_tcr_t0sz(); } @@ -498,12 +552,12 @@ int kern_addr_valid(unsigned long addr) return pfn_valid(pte_pfn(*pte)); } #ifdef CONFIG_SPARSEMEM_VMEMMAP -#ifdef CONFIG_ARM64_64K_PAGES +#if !ARM64_SWAPPER_USES_SECTION_MAPS int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) { return vmemmap_populate_basepages(start, end, node); } -#else /* !CONFIG_ARM64_64K_PAGES */ +#else /* !ARM64_SWAPPER_USES_SECTION_MAPS */ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) { unsigned long addr = start; @@ -638,7 +692,7 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys) { const u64 dt_virt_base = __fix_to_virt(FIX_FDT); pgprot_t prot = PAGE_KERNEL | PTE_RDONLY; - int granularity, size, offset; + int size, offset; void *dt_virt; /* @@ -664,24 +718,15 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys) */ BUILD_BUG_ON(dt_virt_base % SZ_2M); - if (IS_ENABLED(CONFIG_ARM64_64K_PAGES)) { - BUILD_BUG_ON(__fix_to_virt(FIX_FDT_END) >> PMD_SHIFT != - __fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT); - - granularity = PAGE_SIZE; - } else { - BUILD_BUG_ON(__fix_to_virt(FIX_FDT_END) >> PUD_SHIFT != - __fix_to_virt(FIX_BTMAP_BEGIN) >> PUD_SHIFT); - - granularity = PMD_SIZE; - } + BUILD_BUG_ON(__fix_to_virt(FIX_FDT_END) >> SWAPPER_TABLE_SHIFT != + __fix_to_virt(FIX_BTMAP_BEGIN) >> SWAPPER_TABLE_SHIFT); - offset = dt_phys % granularity; + offset = dt_phys % SWAPPER_BLOCK_SIZE; dt_virt = (void *)dt_virt_base + offset; /* map the first chunk so we can read the size from the header */ - create_mapping(round_down(dt_phys, granularity), dt_virt_base, - granularity, prot); + create_mapping(round_down(dt_phys, SWAPPER_BLOCK_SIZE), dt_virt_base, + SWAPPER_BLOCK_SIZE, prot); if (fdt_check_header(dt_virt) != 0) return NULL; @@ -690,9 +735,9 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys) if (size > MAX_FDT_SIZE) return NULL; - if (offset + size > granularity) - create_mapping(round_down(dt_phys, granularity), dt_virt_base, - round_up(offset + size, granularity), prot); + if (offset + size > SWAPPER_BLOCK_SIZE) + create_mapping(round_down(dt_phys, SWAPPER_BLOCK_SIZE), dt_virt_base, + round_up(offset + size, SWAPPER_BLOCK_SIZE), prot); memblock_reserve(dt_phys, size); diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c index e47ed1c5dce1..3571c7309c5e 100644 --- a/arch/arm64/mm/pageattr.c +++ b/arch/arm64/mm/pageattr.c @@ -45,7 +45,7 @@ static int change_memory_common(unsigned long addr, int numpages, int ret; struct page_change_data data; - if (!IS_ALIGNED(addr, PAGE_SIZE)) { + if (!PAGE_ALIGNED(addr)) { start &= PAGE_MASK; end = start + size; WARN_ON_ONCE(1); diff --git a/arch/arm64/mm/pgd.c b/arch/arm64/mm/pgd.c index 71ca104f97bd..cb3ba1b812e7 100644 --- a/arch/arm64/mm/pgd.c +++ b/arch/arm64/mm/pgd.c @@ -28,8 +28,6 @@ #include "mm.h" -#define PGD_SIZE (PTRS_PER_PGD * sizeof(pgd_t)) - static struct kmem_cache *pgd_cache; pgd_t *pgd_alloc(struct mm_struct *mm) diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index e4ee7bd8830a..cacecc4ad3e5 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -30,7 +30,9 @@ #ifdef CONFIG_ARM64_64K_PAGES #define TCR_TG_FLAGS TCR_TG0_64K | TCR_TG1_64K -#else +#elif defined(CONFIG_ARM64_16K_PAGES) +#define TCR_TG_FLAGS TCR_TG0_16K | TCR_TG1_16K +#else /* CONFIG_ARM64_4K_PAGES */ #define TCR_TG_FLAGS TCR_TG0_4K | TCR_TG1_4K #endif @@ -130,7 +132,7 @@ ENDPROC(cpu_do_resume) * - pgd_phys - physical address of new TTB */ ENTRY(cpu_do_switch_mm) - mmid w1, x1 // get mm->context.id + mmid x1, x1 // get mm->context.id bfi x0, x1, #48, #16 // set the ASID msr ttbr0_el1, x0 // set TTBR0 isb @@ -146,8 +148,8 @@ ENDPROC(cpu_do_switch_mm) * value of the SCTLR_EL1 register. */ ENTRY(__cpu_setup) - tlbi vmalle1is // invalidate I + D TLBs - dsb ish + tlbi vmalle1 // Invalidate local TLB + dsb nsh mov x0, #3 << 20 msr cpacr_el1, x0 // Enable FP/ASIMD @@ -163,12 +165,14 @@ ENTRY(__cpu_setup) * DEVICE_GRE 010 00001100 * NORMAL_NC 011 01000100 * NORMAL 100 11111111 + * NORMAL_WT 101 10111011 */ ldr x5, =MAIR(0x00, MT_DEVICE_nGnRnE) | \ MAIR(0x04, MT_DEVICE_nGnRE) | \ MAIR(0x0c, MT_DEVICE_GRE) | \ MAIR(0x44, MT_NORMAL_NC) | \ - MAIR(0xff, MT_NORMAL) + MAIR(0xff, MT_NORMAL) | \ + MAIR(0xbb, MT_NORMAL_WT) msr mair_el1, x5 /* * Prepare SCTLR diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index c047598b09e0..a44e5293c6f5 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -744,7 +744,7 @@ void bpf_int_jit_compile(struct bpf_prog *prog) set_memory_ro((unsigned long)header, header->pages); prog->bpf_func = (void *)ctx.image; - prog->jited = true; + prog->jited = 1; out: kfree(ctx.offset); } diff --git a/arch/avr32/boards/atngw100/mrmt.c b/arch/avr32/boards/atngw100/mrmt.c index 91146b416cdb..99b0a7984950 100644 --- a/arch/avr32/boards/atngw100/mrmt.c +++ b/arch/avr32/boards/atngw100/mrmt.c @@ -21,7 +21,6 @@ #include <linux/leds_pwm.h> #include <linux/input.h> #include <linux/gpio_keys.h> -#include <linux/atmel_serial.h> #include <linux/spi/spi.h> #include <linux/spi/ads7846.h> diff --git a/arch/avr32/include/asm/Kbuild b/arch/avr32/include/asm/Kbuild index f61f2dd67464..241b9b9729d8 100644 --- a/arch/avr32/include/asm/Kbuild +++ b/arch/avr32/include/asm/Kbuild @@ -20,4 +20,5 @@ generic-y += sections.h generic-y += topology.h generic-y += trace_clock.h generic-y += vga.h +generic-y += word-at-a-time.h generic-y += xor.h diff --git a/arch/avr32/include/asm/atomic.h b/arch/avr32/include/asm/atomic.h index 97c9bdf83409..d74fd8ce980a 100644 --- a/arch/avr32/include/asm/atomic.h +++ b/arch/avr32/include/asm/atomic.h @@ -19,8 +19,8 @@ #define ATOMIC_INIT(i) { (i) } -#define atomic_read(v) ACCESS_ONCE((v)->counter) -#define atomic_set(v, i) (((v)->counter) = i) +#define atomic_read(v) READ_ONCE((v)->counter) +#define atomic_set(v, i) WRITE_ONCE(((v)->counter), (i)) #define ATOMIC_OP_RETURN(op, asm_op, asm_con) \ static inline int __atomic_##op##_return(int i, atomic_t *v) \ diff --git a/arch/blackfin/include/asm/Kbuild b/arch/blackfin/include/asm/Kbuild index 61cd1e786a14..91d49c0a3118 100644 --- a/arch/blackfin/include/asm/Kbuild +++ b/arch/blackfin/include/asm/Kbuild @@ -46,4 +46,5 @@ generic-y += types.h generic-y += ucontext.h generic-y += unaligned.h generic-y += user.h +generic-y += word-at-a-time.h generic-y += xor.h diff --git a/arch/c6x/include/asm/Kbuild b/arch/c6x/include/asm/Kbuild index f17c4dc6050c..945544ec603e 100644 --- a/arch/c6x/include/asm/Kbuild +++ b/arch/c6x/include/asm/Kbuild @@ -59,4 +59,5 @@ generic-y += types.h generic-y += ucontext.h generic-y += user.h generic-y += vga.h +generic-y += word-at-a-time.h generic-y += xor.h diff --git a/arch/c6x/platforms/megamod-pic.c b/arch/c6x/platforms/megamod-pic.c index ddcb45d7dfa7..43afc03e4125 100644 --- a/arch/c6x/platforms/megamod-pic.c +++ b/arch/c6x/platforms/megamod-pic.c @@ -178,7 +178,7 @@ static void __init set_megamod_mux(struct megamod_pic *pic, int src, int output) static void __init parse_priority_map(struct megamod_pic *pic, int *mapping, int size) { - struct device_node *np = pic->irqhost->of_node; + struct device_node *np = irq_domain_get_of_node(pic->irqhost); const __be32 *map; int i, maplen; u32 val; diff --git a/arch/cris/Kconfig b/arch/cris/Kconfig index 8da5653bd895..e086f9e93728 100644 --- a/arch/cris/Kconfig +++ b/arch/cris/Kconfig @@ -57,7 +57,6 @@ config CRIS select ARCH_WANT_IPC_PARSE_VERSION select GENERIC_IRQ_SHOW select GENERIC_IOMAP - select GENERIC_CMOS_UPDATE select MODULES_USE_ELF_RELA select CLONE_BACKWARDS2 select OLD_SIGSUSPEND diff --git a/arch/cris/arch-v10/kernel/head.S b/arch/cris/arch-v10/kernel/head.S index 4a146e1749c9..a4877a421756 100644 --- a/arch/cris/arch-v10/kernel/head.S +++ b/arch/cris/arch-v10/kernel/head.S @@ -354,63 +354,6 @@ no_command_line: blo 1b nop -#ifdef CONFIG_BLK_DEV_ETRAXIDE - ;; disable ATA before enabling it in genconfig below - moveq 0,$r0 - move.d $r0,[R_ATA_CTRL_DATA] - move.d $r0,[R_ATA_TRANSFER_CNT] - move.d $r0,[R_ATA_CONFIG] -#if 0 - move.d R_PORT_G_DATA, $r1 - move.d $r0, [$r1]; assert ATA bus-reset - nop - nop - nop - nop - nop - nop - move.d 0x08000000,$r0 - move.d $r0,[$r1] -#endif -#endif - -#ifdef CONFIG_JULIETTE - ;; configure external DMA channel 0 before enabling it in genconfig - - moveq 0,$r0 - move.d $r0,[R_EXT_DMA_0_ADDR] - ; cnt enable, word size, output, stop, size 0 - move.d IO_STATE (R_EXT_DMA_0_CMD, cnt, enable) \ - | IO_STATE (R_EXT_DMA_0_CMD, rqpol, ahigh) \ - | IO_STATE (R_EXT_DMA_0_CMD, apol, ahigh) \ - | IO_STATE (R_EXT_DMA_0_CMD, rq_ack, burst) \ - | IO_STATE (R_EXT_DMA_0_CMD, wid, word) \ - | IO_STATE (R_EXT_DMA_0_CMD, dir, output) \ - | IO_STATE (R_EXT_DMA_0_CMD, run, stop) \ - | IO_FIELD (R_EXT_DMA_0_CMD, trf_count, 0),$r0 - move.d $r0,[R_EXT_DMA_0_CMD] - - ;; reset dma4 and wait for completion - - moveq IO_STATE (R_DMA_CH4_CMD, cmd, reset),$r0 - move.b $r0,[R_DMA_CH4_CMD] -1: move.b [R_DMA_CH4_CMD],$r0 - and.b IO_MASK (R_DMA_CH4_CMD, cmd),$r0 - cmp.b IO_STATE (R_DMA_CH4_CMD, cmd, reset),$r0 - beq 1b - nop - - ;; reset dma5 and wait for completion - - moveq IO_STATE (R_DMA_CH5_CMD, cmd, reset),$r0 - move.b $r0,[R_DMA_CH5_CMD] -1: move.b [R_DMA_CH5_CMD],$r0 - and.b IO_MASK (R_DMA_CH5_CMD, cmd),$r0 - cmp.b IO_STATE (R_DMA_CH5_CMD, cmd, reset),$r0 - beq 1b - nop -#endif - ;; Etrax product HW genconfig setup moveq 0,$r0 @@ -447,21 +390,6 @@ no_command_line: | IO_STATE (R_GEN_CONFIG, dma9, usb),$r0 -#if defined(CONFIG_ETRAX_DEF_R_PORT_G0_DIR_OUT) - or.d IO_STATE (R_GEN_CONFIG, g0dir, out),$r0 -#endif - -#if defined(CONFIG_ETRAX_DEF_R_PORT_G8_15_DIR_OUT) - or.d IO_STATE (R_GEN_CONFIG, g8_15dir, out),$r0 -#endif -#if defined(CONFIG_ETRAX_DEF_R_PORT_G16_23_DIR_OUT) - or.d IO_STATE (R_GEN_CONFIG, g16_23dir, out),$r0 -#endif - -#if defined(CONFIG_ETRAX_DEF_R_PORT_G24_DIR_OUT) - or.d IO_STATE (R_GEN_CONFIG, g24dir, out),$r0 -#endif - move.d $r0,[genconfig_shadow] ; init a shadow register of R_GEN_CONFIG move.d $r0,[R_GEN_CONFIG] @@ -500,19 +428,9 @@ no_command_line: ;; including their shadow registers move.b CONFIG_ETRAX_DEF_R_PORT_PA_DIR,$r0 -#if defined(CONFIG_BLUETOOTH) && defined(CONFIG_BLUETOOTH_RESET_PA7) - or.b IO_STATE (R_PORT_PA_DIR, dir7, output),$r0 -#endif move.b $r0,[port_pa_dir_shadow] move.b $r0,[R_PORT_PA_DIR] move.b CONFIG_ETRAX_DEF_R_PORT_PA_DATA,$r0 -#if defined(CONFIG_BLUETOOTH) && defined(CONFIG_BLUETOOTH_RESET_PA7) -#if defined(CONFIG_BLUETOOTH_RESET_ACTIVE_HIGH) - and.b ~(1 << 7),$r0 -#else - or.b (1 << 7),$r0 -#endif -#endif move.b $r0,[port_pa_data_shadow] move.b $r0,[R_PORT_PA_DATA] @@ -520,19 +438,9 @@ no_command_line: move.b $r0,[port_pb_config_shadow] move.b $r0,[R_PORT_PB_CONFIG] move.b CONFIG_ETRAX_DEF_R_PORT_PB_DIR,$r0 -#if defined(CONFIG_BLUETOOTH) && defined(CONFIG_BLUETOOTH_RESET_PB5) - or.b IO_STATE (R_PORT_PB_DIR, dir5, output),$r0 -#endif move.b $r0,[port_pb_dir_shadow] move.b $r0,[R_PORT_PB_DIR] move.b CONFIG_ETRAX_DEF_R_PORT_PB_DATA,$r0 -#if defined(CONFIG_BLUETOOTH) && defined(CONFIG_BLUETOOTH_RESET_PB5) -#if defined(CONFIG_BLUETOOTH_RESET_ACTIVE_HIGH) - and.b ~(1 << 5),$r0 -#else - or.b (1 << 5),$r0 -#endif -#endif move.b $r0,[port_pb_data_shadow] move.b $r0,[R_PORT_PB_DATA] @@ -541,20 +449,6 @@ no_command_line: move.d $r0, [R_PORT_PB_I2C] moveq 0,$r0 -#if defined(CONFIG_BLUETOOTH) && defined(CONFIG_BLUETOOTH_RESET_G10) -#if defined(CONFIG_BLUETOOTH_RESET_ACTIVE_HIGH) - and.d ~(1 << 10),$r0 -#else - or.d (1 << 10),$r0 -#endif -#endif -#if defined(CONFIG_BLUETOOTH) && defined(CONFIG_BLUETOOTH_RESET_G11) -#if defined(CONFIG_BLUETOOTH_RESET_ACTIVE_HIGH) - and.d ~(1 << 11),$r0 -#else - or.d (1 << 11),$r0 -#endif -#endif move.d $r0,[port_g_data_shadow] move.d $r0,[R_PORT_G_DATA] diff --git a/arch/cris/arch-v10/kernel/kgdb.c b/arch/cris/arch-v10/kernel/kgdb.c index 22d846bfc570..ed71ade93a73 100644 --- a/arch/cris/arch-v10/kernel/kgdb.c +++ b/arch/cris/arch-v10/kernel/kgdb.c @@ -275,7 +275,7 @@ static char remcomOutBuffer[BUFMAX]; /* Error and warning messages. */ enum error_type { - SUCCESS, E01, E02, E03, E04, E05, E06, E07 + SUCCESS, E01, E02, E03, E04, E05, E06, E07, E08 }; static char *error_message[] = { @@ -286,7 +286,8 @@ static char *error_message[] = "E04 The command is not supported - [s,C,S,!,R,d,r] - internal error.", "E05 Change register content - P - the register is not implemented..", "E06 Change memory content - M - internal error.", - "E07 Change register content - P - the register is not stored on the stack" + "E07 Change register content - P - the register is not stored on the stack", + "E08 Invalid parameter" }; /********************************* Register image ****************************/ /* Use the order of registers as defined in "AXIS ETRAX CRIS Programmer's @@ -351,7 +352,7 @@ char internal_stack[INTERNAL_STACK_SIZE]; breakpoint to be handled. A static breakpoint uses the content of register BRP as it is whereas a dynamic breakpoint requires subtraction with 2 in order to execute the instruction. The first breakpoint is static. */ -static unsigned char is_dyn_brkp = 0; +static unsigned char __used is_dyn_brkp; /********************************* String library ****************************/ /* Single-step over library functions creates trap loops. */ @@ -413,18 +414,6 @@ gdb_cris_strtol (const char *s, char **endptr, int base) } /********************************** Packet I/O ******************************/ -/* Returns the integer equivalent of a hexadecimal character. */ -static int -hex (char ch) -{ - if ((ch >= 'a') && (ch <= 'f')) - return (ch - 'a' + 10); - if ((ch >= '0') && (ch <= '9')) - return (ch - '0'); - if ((ch >= 'A') && (ch <= 'F')) - return (ch - 'A' + 10); - return (-1); -} /* Convert the memory, pointed to by mem into hexadecimal representation. Put the result in buf, and return a pointer to the last character @@ -455,22 +444,6 @@ mem2hex(char *buf, unsigned char *mem, int count) return (buf); } -/* Convert the array, in hexadecimal representation, pointed to by buf into - binary representation. Put the result in mem, and return a pointer to - the character after the last byte written. */ -static unsigned char* -hex2mem (unsigned char *mem, char *buf, int count) -{ - int i; - unsigned char ch; - for (i = 0; i < count; i++) { - ch = hex (*buf++) << 4; - ch = ch + hex (*buf++); - *mem++ = ch; - } - return (mem); -} - /* Put the content of the array, in binary representation, pointed to by buf into memory pointed to by mem, and return a pointer to the character after the last byte written. @@ -524,8 +497,8 @@ getpacket (char *buffer) buffer[count] = '\0'; if (ch == '#') { - xmitcsum = hex (getDebugChar ()) << 4; - xmitcsum += hex (getDebugChar ()); + xmitcsum = hex_to_bin(getDebugChar()) << 4; + xmitcsum += hex_to_bin(getDebugChar()); if (checksum != xmitcsum) { /* Wrong checksum */ putDebugChar ('-'); @@ -599,7 +572,7 @@ putDebugString (const unsigned char *str, int length) /********************************* Register image ****************************/ /* Write a value to a specified register in the register image of the current - thread. Returns status code SUCCESS, E02 or E05. */ + thread. Returns status code SUCCESS, E02, E05 or E08. */ static int write_register (int regno, char *val) { @@ -608,8 +581,9 @@ write_register (int regno, char *val) if (regno >= R0 && regno <= PC) { /* 32-bit register with simple offset. */ - hex2mem ((unsigned char *)current_reg + regno * sizeof(unsigned int), - val, sizeof(unsigned int)); + if (hex2bin((unsigned char *)current_reg + regno * sizeof(unsigned int), + val, sizeof(unsigned int))) + status = E08; } else if (regno == P0 || regno == VR || regno == P4 || regno == P8) { /* Do not support read-only registers. */ @@ -618,13 +592,15 @@ write_register (int regno, char *val) else if (regno == CCR) { /* 16 bit register with complex offset. (P4 is read-only, P6 is not implemented, and P7 (MOF) is 32 bits in ETRAX 100LX. */ - hex2mem ((unsigned char *)&(current_reg->ccr) + (regno-CCR) * sizeof(unsigned short), - val, sizeof(unsigned short)); + if (hex2bin((unsigned char *)&(current_reg->ccr) + (regno-CCR) * sizeof(unsigned short), + val, sizeof(unsigned short))) + status = E08; } else if (regno >= MOF && regno <= USP) { /* 32 bit register with complex offset. (P8 has been taken care of.) */ - hex2mem ((unsigned char *)&(current_reg->ibr) + (regno-IBR) * sizeof(unsigned int), - val, sizeof(unsigned int)); + if (hex2bin((unsigned char *)&(current_reg->ibr) + (regno-IBR) * sizeof(unsigned int), + val, sizeof(unsigned int))) + status = E08; } else { /* Do not support nonexisting or unimplemented registers (P2, P3, and P6). */ @@ -759,9 +735,11 @@ handle_exception (int sigval) /* Write registers. GXX..XX Each byte of register data is described by two hex digits. Success: OK - Failure: void. */ - hex2mem((char *)&cris_reg, &remcomInBuffer[1], sizeof(registers)); - gdb_cris_strcpy (remcomOutBuffer, "OK"); + Failure: E08. */ + if (hex2bin((char *)&cris_reg, &remcomInBuffer[1], sizeof(registers))) + gdb_cris_strcpy (remcomOutBuffer, error_message[E08]); + else + gdb_cris_strcpy (remcomOutBuffer, "OK"); break; case 'P': @@ -771,7 +749,7 @@ handle_exception (int sigval) for each byte in the register (target byte order). P1f=11223344 means set register 31 to 44332211. Success: OK - Failure: E02, E05 */ + Failure: E02, E05, E08 */ { char *suffix; int regno = gdb_cris_strtol (&remcomInBuffer[1], &suffix, 16); @@ -791,6 +769,10 @@ handle_exception (int sigval) /* Do not support non-existing registers on the stack. */ gdb_cris_strcpy (remcomOutBuffer, error_message[E07]); break; + case E08: + /* Invalid parameter. */ + gdb_cris_strcpy (remcomOutBuffer, error_message[E08]); + break; default: /* Valid register number. */ gdb_cris_strcpy (remcomOutBuffer, "OK"); @@ -826,7 +808,7 @@ handle_exception (int sigval) AA..AA is the start address, LLLL is the number of bytes, and XX..XX is the hexadecimal data. Success: OK - Failure: void. */ + Failure: E08. */ { char *lenptr; char *dataptr; @@ -835,14 +817,15 @@ handle_exception (int sigval) int length = gdb_cris_strtol(lenptr+1, &dataptr, 16); if (*lenptr == ',' && *dataptr == ':') { if (remcomInBuffer[0] == 'M') { - hex2mem(addr, dataptr + 1, length); - } - else /* X */ { + if (hex2bin(addr, dataptr + 1, length)) + gdb_cris_strcpy (remcomOutBuffer, error_message[E08]); + else + gdb_cris_strcpy (remcomOutBuffer, "OK"); + } else /* X */ { bin2mem(addr, dataptr + 1, length); + gdb_cris_strcpy (remcomOutBuffer, "OK"); } - gdb_cris_strcpy (remcomOutBuffer, "OK"); - } - else { + } else { gdb_cris_strcpy (remcomOutBuffer, error_message[E06]); } } @@ -970,7 +953,7 @@ asm ("\n" " move $ibr,[cris_reg+0x4E] ; P9,\n" " move $irp,[cris_reg+0x52] ; P10,\n" " move $srp,[cris_reg+0x56] ; P11,\n" -" move $dtp0,[cris_reg+0x5A] ; P12, register BAR, assembler might not know BAR\n" +" move $bar,[cris_reg+0x5A] ; P12,\n" " ; P13, register DCCR already saved\n" ";; Due to the old assembler-versions BRP might not be recognized\n" " .word 0xE670 ; move brp,r0\n" @@ -1063,7 +1046,7 @@ asm ("\n" " move $ibr,[cris_reg+0x4E] ; P9,\n" " move $irp,[cris_reg+0x52] ; P10,\n" " move $srp,[cris_reg+0x56] ; P11,\n" -" move $dtp0,[cris_reg+0x5A] ; P12, register BAR, assembler might not know BAR\n" +" move $bar,[cris_reg+0x5A] ; P12,\n" " ; P13, register DCCR already saved\n" ";; Due to the old assembler-versions BRP might not be recognized\n" " .word 0xE670 ; move brp,r0\n" diff --git a/arch/cris/arch-v10/mm/init.c b/arch/cris/arch-v10/mm/init.c index e7f8066105aa..85e3f1b1f3ac 100644 --- a/arch/cris/arch-v10/mm/init.c +++ b/arch/cris/arch-v10/mm/init.c @@ -68,14 +68,10 @@ paging_init(void) *R_MMU_KSEG = ( IO_STATE(R_MMU_KSEG, seg_f, seg ) | /* bootrom */ IO_STATE(R_MMU_KSEG, seg_e, page ) | - IO_STATE(R_MMU_KSEG, seg_d, page ) | - IO_STATE(R_MMU_KSEG, seg_c, page ) | + IO_STATE(R_MMU_KSEG, seg_d, page ) | + IO_STATE(R_MMU_KSEG, seg_c, page ) | IO_STATE(R_MMU_KSEG, seg_b, seg ) | /* kernel reg area */ -#ifdef CONFIG_JULIETTE - IO_STATE(R_MMU_KSEG, seg_a, seg ) | /* ARTPEC etc. */ -#else IO_STATE(R_MMU_KSEG, seg_a, page ) | -#endif IO_STATE(R_MMU_KSEG, seg_9, seg ) | /* LED's on some boards */ IO_STATE(R_MMU_KSEG, seg_8, seg ) | /* CSE0/1, flash and I/O */ IO_STATE(R_MMU_KSEG, seg_7, page ) | /* kernel vmalloc area */ @@ -92,14 +88,10 @@ paging_init(void) IO_FIELD(R_MMU_KBASE_HI, base_d, 0x0 ) | IO_FIELD(R_MMU_KBASE_HI, base_c, 0x0 ) | IO_FIELD(R_MMU_KBASE_HI, base_b, 0xb ) | -#ifdef CONFIG_JULIETTE - IO_FIELD(R_MMU_KBASE_HI, base_a, 0xa ) | -#else IO_FIELD(R_MMU_KBASE_HI, base_a, 0x0 ) | -#endif IO_FIELD(R_MMU_KBASE_HI, base_9, 0x9 ) | IO_FIELD(R_MMU_KBASE_HI, base_8, 0x8 ) ); - + *R_MMU_KBASE_LO = ( IO_FIELD(R_MMU_KBASE_LO, base_7, 0x0 ) | IO_FIELD(R_MMU_KBASE_LO, base_6, 0x4 ) | IO_FIELD(R_MMU_KBASE_LO, base_5, 0x0 ) | diff --git a/arch/cris/arch-v32/Kconfig b/arch/cris/arch-v32/Kconfig index 21bbd93be34f..17dbe03af5f4 100644 --- a/arch/cris/arch-v32/Kconfig +++ b/arch/cris/arch-v32/Kconfig @@ -11,95 +11,6 @@ config ETRAX_DRAM_VIRTUAL_BASE default "c0000000" choice - prompt "Nbr of Ethernet LED groups" - depends on ETRAX_ARCH_V32 - default ETRAX_NBR_LED_GRP_ONE - help - Select how many Ethernet LED groups that can be used. Usually one per Ethernet - interface is a good choice. - -config ETRAX_NBR_LED_GRP_ZERO - bool "Use zero LED groups" - help - Select this if you do not want any Ethernet LEDs. - -config ETRAX_NBR_LED_GRP_ONE - bool "Use one LED group" - help - Select this if you want one Ethernet LED group. This LED group - can be used for one or more Ethernet interfaces. However, it is - recommended that each Ethernet interface use a dedicated LED group. - -config ETRAX_NBR_LED_GRP_TWO - bool "Use two LED groups" - help - Select this if you want two Ethernet LED groups. This is the - best choice if you have more than one Ethernet interface and - would like to have separate LEDs for the interfaces. - -endchoice - -config ETRAX_LED_G_NET0 - string "Ethernet LED group 0 green LED bit" - depends on ETRAX_ARCH_V32 && (ETRAX_NBR_LED_GRP_ONE || ETRAX_NBR_LED_GRP_TWO) - default "PA3" - help - Bit to use for the green LED in Ethernet LED group 0. - -config ETRAX_LED_R_NET0 - string "Ethernet LED group 0 red LED bit" - depends on ETRAX_ARCH_V32 && (ETRAX_NBR_LED_GRP_ONE || ETRAX_NBR_LED_GRP_TWO) - default "PA4" - help - Bit to use for the red LED in Ethernet LED group 0. - -config ETRAX_LED_G_NET1 - string "Ethernet group 1 green LED bit" - depends on ETRAX_ARCH_V32 && ETRAX_NBR_LED_GRP_TWO - default "" - help - Bit to use for the green LED in Ethernet LED group 1. - -config ETRAX_LED_R_NET1 - string "Ethernet group 1 red LED bit" - depends on ETRAX_ARCH_V32 && ETRAX_NBR_LED_GRP_TWO - default "" - help - Bit to use for the red LED in Ethernet LED group 1. - -config ETRAX_V32_LED2G - string "Second green LED bit" - depends on ETRAX_ARCH_V32 - default "PA5" - help - Bit to use for the first green LED (status LED). - Most Axis products use bit A5 here. - -config ETRAX_V32_LED2R - string "Second red LED bit" - depends on ETRAX_ARCH_V32 - default "PA6" - help - Bit to use for the first red LED (network LED). - Most Axis products use bit A6 here. - -config ETRAX_V32_LED3G - string "Third green LED bit" - depends on ETRAX_ARCH_V32 - default "PA7" - help - Bit to use for the first green LED (drive/power LED). - Most Axis products use bit A7 here. - -config ETRAX_V32_LED3R - string "Third red LED bit" - depends on ETRAX_ARCH_V32 - default "PA7" - help - Bit to use for the first red LED (drive/power LED). - Most Axis products use bit A7 here. - -choice prompt "Kernel GDB port" depends on ETRAX_KGDB default ETRAX_KGDB_PORT0 diff --git a/arch/cris/arch-v32/drivers/Kconfig b/arch/cris/arch-v32/drivers/Kconfig index e6c523cc40bc..2735eb7671a5 100644 --- a/arch/cris/arch-v32/drivers/Kconfig +++ b/arch/cris/arch-v32/drivers/Kconfig @@ -149,173 +149,6 @@ config ETRAX_NANDBOOT Say Y if your boot code, kernel and root file system is in NAND flash. Say N if they are in NOR flash. -config ETRAX_I2C - bool "I2C driver" - depends on ETRAX_ARCH_V32 - help - This option enables the I2C driver used by e.g. the RTC driver. - -config ETRAX_V32_I2C_DATA_PORT - string "I2C data pin" - depends on ETRAX_I2C - help - The pin to use for I2C data. - -config ETRAX_V32_I2C_CLK_PORT - string "I2C clock pin" - depends on ETRAX_I2C - help - The pin to use for I2C clock. - -config ETRAX_GPIO - bool "GPIO support" - depends on ETRAX_ARCH_V32 - ---help--- - Enables the ETRAX general port device (major 120, minors 0-4). - You can use this driver to access the general port bits. It supports - these ioctl's: - #include <linux/etraxgpio.h> - fd = open("/dev/gpioa", O_RDWR); // or /dev/gpiob - ioctl(fd, _IO(ETRAXGPIO_IOCTYPE, IO_SETBITS), bits_to_set); - ioctl(fd, _IO(ETRAXGPIO_IOCTYPE, IO_CLRBITS), bits_to_clear); - err = ioctl(fd, _IO(ETRAXGPIO_IOCTYPE, IO_READ_INBITS), &val); - Remember that you need to setup the port directions appropriately in - the General configuration. - -config ETRAX_VIRTUAL_GPIO - bool "Virtual GPIO support" - depends on ETRAX_GPIO - help - Enables the virtual Etrax general port device (major 120, minor 6). - It uses an I/O expander for the I2C-bus. - -config ETRAX_VIRTUAL_GPIO_INTERRUPT_PA_PIN - int "Virtual GPIO interrupt pin on PA pin" - range 0 7 - depends on ETRAX_VIRTUAL_GPIO - help - The pin to use on PA for virtual gpio interrupt. - -config ETRAX_PA_CHANGEABLE_DIR - hex "PA user changeable dir mask" - depends on ETRAX_GPIO - default "0x00" if ETRAXFS - default "0x00000000" if !ETRAXFS - help - This is a bitmask with information of what bits in PA that a - user can change direction on using ioctl's. - Bit set = changeable. - You probably want 0 here, but it depends on your hardware. - -config ETRAX_PA_CHANGEABLE_BITS - hex "PA user changeable bits mask" - depends on ETRAX_GPIO - default "0x00" if ETRAXFS - default "0x00000000" if !ETRAXFS - help - This is a bitmask with information of what bits in PA - that a user can change the value on using ioctl's. - Bit set = changeable. - -config ETRAX_PB_CHANGEABLE_DIR - hex "PB user changeable dir mask" - depends on ETRAX_GPIO - default "0x00000" if ETRAXFS - default "0x00000000" if !ETRAXFS - help - This is a bitmask with information of what bits in PB - that a user can change direction on using ioctl's. - Bit set = changeable. - You probably want 0 here, but it depends on your hardware. - -config ETRAX_PB_CHANGEABLE_BITS - hex "PB user changeable bits mask" - depends on ETRAX_GPIO - default "0x00000" if ETRAXFS - default "0x00000000" if !ETRAXFS - help - This is a bitmask with information of what bits in PB - that a user can change the value on using ioctl's. - Bit set = changeable. - -config ETRAX_PC_CHANGEABLE_DIR - hex "PC user changeable dir mask" - depends on ETRAX_GPIO - default "0x00000" if ETRAXFS - default "0x00000000" if !ETRAXFS - help - This is a bitmask with information of what bits in PC - that a user can change direction on using ioctl's. - Bit set = changeable. - You probably want 0 here, but it depends on your hardware. - -config ETRAX_PC_CHANGEABLE_BITS - hex "PC user changeable bits mask" - depends on ETRAX_GPIO - default "0x00000" if ETRAXFS - default "0x00000000" if !ETRAXFS - help - This is a bitmask with information of what bits in PC - that a user can change the value on using ioctl's. - Bit set = changeable. - -config ETRAX_PD_CHANGEABLE_DIR - hex "PD user changeable dir mask" - depends on ETRAX_GPIO && ETRAXFS - default "0x00000" - help - This is a bitmask with information of what bits in PD - that a user can change direction on using ioctl's. - Bit set = changeable. - You probably want 0x00000 here, but it depends on your hardware. - -config ETRAX_PD_CHANGEABLE_BITS - hex "PD user changeable bits mask" - depends on ETRAX_GPIO && ETRAXFS - default "0x00000" - help - This is a bitmask (18 bits) with information of what bits in PD - that a user can change the value on using ioctl's. - Bit set = changeable. - -config ETRAX_PE_CHANGEABLE_DIR - hex "PE user changeable dir mask" - depends on ETRAX_GPIO && ETRAXFS - default "0x00000" - help - This is a bitmask (18 bits) with information of what bits in PE - that a user can change direction on using ioctl's. - Bit set = changeable. - You probably want 0x00000 here, but it depends on your hardware. - -config ETRAX_PE_CHANGEABLE_BITS - hex "PE user changeable bits mask" - depends on ETRAX_GPIO && ETRAXFS - default "0x00000" - help - This is a bitmask (18 bits) with information of what bits in PE - that a user can change the value on using ioctl's. - Bit set = changeable. - -config ETRAX_PV_CHANGEABLE_DIR - hex "PV user changeable dir mask" - depends on ETRAX_VIRTUAL_GPIO - default "0x0000" - help - This is a bitmask (16 bits) with information of what bits in PV - that a user can change direction on using ioctl's. - Bit set = changeable. - You probably want 0x0000 here, but it depends on your hardware. - -config ETRAX_PV_CHANGEABLE_BITS - hex "PV user changeable bits mask" - depends on ETRAX_VIRTUAL_GPIO - default "0x0000" - help - This is a bitmask (16 bits) with information of what bits in PV - that a user can change the value on using ioctl's. - Bit set = changeable. - config ETRAX_CARDBUS bool "Cardbus support" depends on ETRAX_ARCH_V32 diff --git a/arch/cris/arch-v32/drivers/Makefile b/arch/cris/arch-v32/drivers/Makefile index 15fbfefced2c..b5a75fdce77b 100644 --- a/arch/cris/arch-v32/drivers/Makefile +++ b/arch/cris/arch-v32/drivers/Makefile @@ -7,6 +7,5 @@ obj-$(CONFIG_ETRAX_AXISFLASHMAP) += axisflashmap.o obj-$(CONFIG_ETRAXFS) += mach-fs/ obj-$(CONFIG_CRIS_MACH_ARTPEC3) += mach-a3/ obj-$(CONFIG_ETRAX_IOP_FW_LOAD) += iop_fw_load.o -obj-$(CONFIG_ETRAX_I2C) += i2c.o obj-$(CONFIG_ETRAX_SYNCHRONOUS_SERIAL) += sync_serial.o obj-$(CONFIG_PCI) += pci/ diff --git a/arch/cris/arch-v32/drivers/axisflashmap.c b/arch/cris/arch-v32/drivers/axisflashmap.c index 5387424683cc..c6309a182f46 100644 --- a/arch/cris/arch-v32/drivers/axisflashmap.c +++ b/arch/cris/arch-v32/drivers/axisflashmap.c @@ -361,7 +361,7 @@ static int __init init_axis_flash(void) #if 0 /* Dump flash memory so we can see what is going on */ if (main_mtd) { - int sectoraddr, i; + int sectoraddr; for (sectoraddr = 0; sectoraddr < 2*65536+4096; sectoraddr += PAGESIZE) { main_mtd->read(main_mtd, sectoraddr, PAGESIZE, &len, @@ -369,21 +369,7 @@ static int __init init_axis_flash(void) printk(KERN_INFO "Sector at %d (length %d):\n", sectoraddr, len); - for (i = 0; i < PAGESIZE; i += 16) { - printk(KERN_INFO - "%02x %02x %02x %02x " - "%02x %02x %02x %02x " - "%02x %02x %02x %02x " - "%02x %02x %02x %02x\n", - page[i] & 255, page[i+1] & 255, - page[i+2] & 255, page[i+3] & 255, - page[i+4] & 255, page[i+5] & 255, - page[i+6] & 255, page[i+7] & 255, - page[i+8] & 255, page[i+9] & 255, - page[i+10] & 255, page[i+11] & 255, - page[i+12] & 255, page[i+13] & 255, - page[i+14] & 255, page[i+15] & 255); - } + print_hex_dump(KERN_INFO, "", DUMP_PREFIX_NONE, 16, 1, page, PAGESIZE, false); } } #endif @@ -417,25 +403,11 @@ static int __init init_axis_flash(void) #if 0 /* Dump partition table so we can see what is going on */ printk(KERN_INFO - "axisflashmap: flash read %d bytes at 0x%08x, data: " - "%02x %02x %02x %02x %02x %02x %02x %02x\n", - len, CONFIG_ETRAX_PTABLE_SECTOR, - page[0] & 255, page[1] & 255, - page[2] & 255, page[3] & 255, - page[4] & 255, page[5] & 255, - page[6] & 255, page[7] & 255); + "axisflashmap: flash read %d bytes at 0x%08x, data: %8ph\n", + len, CONFIG_ETRAX_PTABLE_SECTOR, page); printk(KERN_INFO - "axisflashmap: partition table offset %d, data: " - "%02x %02x %02x %02x %02x %02x %02x %02x\n", - PARTITION_TABLE_OFFSET, - page[PARTITION_TABLE_OFFSET+0] & 255, - page[PARTITION_TABLE_OFFSET+1] & 255, - page[PARTITION_TABLE_OFFSET+2] & 255, - page[PARTITION_TABLE_OFFSET+3] & 255, - page[PARTITION_TABLE_OFFSET+4] & 255, - page[PARTITION_TABLE_OFFSET+5] & 255, - page[PARTITION_TABLE_OFFSET+6] & 255, - page[PARTITION_TABLE_OFFSET+7] & 255); + "axisflashmap: partition table offset %d, data: %8ph\n", + PARTITION_TABLE_OFFSET, page + PARTITION_TABLE_OFFSET); #endif } diff --git a/arch/cris/arch-v32/drivers/i2c.c b/arch/cris/arch-v32/drivers/i2c.c deleted file mode 100644 index 3b2c82ce8147..000000000000 --- a/arch/cris/arch-v32/drivers/i2c.c +++ /dev/null @@ -1,751 +0,0 @@ -/*!*************************************************************************** -*! -*! FILE NAME : i2c.c -*! -*! DESCRIPTION: implements an interface for IIC/I2C, both directly from other -*! kernel modules (i2c_writereg/readreg) and from userspace using -*! ioctl()'s -*! -*! Nov 30 1998 Torbjorn Eliasson Initial version. -*! Bjorn Wesen Elinux kernel version. -*! Jan 14 2000 Johan Adolfsson Fixed PB shadow register stuff - -*! don't use PB_I2C if DS1302 uses same bits, -*! use PB. -*| June 23 2003 Pieter Grimmerink Added 'i2c_sendnack'. i2c_readreg now -*| generates nack on last received byte, -*| instead of ack. -*| i2c_getack changed data level while clock -*| was high, causing DS75 to see a stop condition -*! -*! --------------------------------------------------------------------------- -*! -*! (C) Copyright 1999-2007 Axis Communications AB, LUND, SWEDEN -*! -*!***************************************************************************/ - -/****************** INCLUDE FILES SECTION ***********************************/ - -#include <linux/module.h> -#include <linux/sched.h> -#include <linux/errno.h> -#include <linux/kernel.h> -#include <linux/fs.h> -#include <linux/string.h> -#include <linux/init.h> -#include <linux/mutex.h> - -#include <asm/etraxi2c.h> - -#include <asm/io.h> -#include <asm/delay.h> - -#include "i2c.h" - -/****************** I2C DEFINITION SECTION *************************/ - -#define D(x) - -#define I2C_MAJOR 123 /* LOCAL/EXPERIMENTAL */ -static DEFINE_MUTEX(i2c_mutex); -static const char i2c_name[] = "i2c"; - -#define CLOCK_LOW_TIME 8 -#define CLOCK_HIGH_TIME 8 -#define START_CONDITION_HOLD_TIME 8 -#define STOP_CONDITION_HOLD_TIME 8 -#define ENABLE_OUTPUT 0x01 -#define ENABLE_INPUT 0x00 -#define I2C_CLOCK_HIGH 1 -#define I2C_CLOCK_LOW 0 -#define I2C_DATA_HIGH 1 -#define I2C_DATA_LOW 0 - -#define i2c_enable() -#define i2c_disable() - -/* enable or disable output-enable, to select output or input on the i2c bus */ - -#define i2c_dir_out() crisv32_io_set_dir(&cris_i2c_data, crisv32_io_dir_out) -#define i2c_dir_in() crisv32_io_set_dir(&cris_i2c_data, crisv32_io_dir_in) - -/* control the i2c clock and data signals */ - -#define i2c_clk(x) crisv32_io_set(&cris_i2c_clk, x) -#define i2c_data(x) crisv32_io_set(&cris_i2c_data, x) - -/* read a bit from the i2c interface */ - -#define i2c_getbit() crisv32_io_rd(&cris_i2c_data) - -#define i2c_delay(usecs) udelay(usecs) - -static DEFINE_SPINLOCK(i2c_lock); /* Protect directions etc */ - -/****************** VARIABLE SECTION ************************************/ - -static struct crisv32_iopin cris_i2c_clk; -static struct crisv32_iopin cris_i2c_data; - -/****************** FUNCTION DEFINITION SECTION *************************/ - - -/* generate i2c start condition */ - -void -i2c_start(void) -{ - /* - * SCL=1 SDA=1 - */ - i2c_dir_out(); - i2c_delay(CLOCK_HIGH_TIME/6); - i2c_data(I2C_DATA_HIGH); - i2c_clk(I2C_CLOCK_HIGH); - i2c_delay(CLOCK_HIGH_TIME); - /* - * SCL=1 SDA=0 - */ - i2c_data(I2C_DATA_LOW); - i2c_delay(START_CONDITION_HOLD_TIME); - /* - * SCL=0 SDA=0 - */ - i2c_clk(I2C_CLOCK_LOW); - i2c_delay(CLOCK_LOW_TIME); -} - -/* generate i2c stop condition */ - -void -i2c_stop(void) -{ - i2c_dir_out(); - - /* - * SCL=0 SDA=0 - */ - i2c_clk(I2C_CLOCK_LOW); - i2c_data(I2C_DATA_LOW); - i2c_delay(CLOCK_LOW_TIME*2); - /* - * SCL=1 SDA=0 - */ - i2c_clk(I2C_CLOCK_HIGH); - i2c_delay(CLOCK_HIGH_TIME*2); - /* - * SCL=1 SDA=1 - */ - i2c_data(I2C_DATA_HIGH); - i2c_delay(STOP_CONDITION_HOLD_TIME); - - i2c_dir_in(); -} - -/* write a byte to the i2c interface */ - -void -i2c_outbyte(unsigned char x) -{ - int i; - - i2c_dir_out(); - - for (i = 0; i < 8; i++) { - if (x & 0x80) { - i2c_data(I2C_DATA_HIGH); - } else { - i2c_data(I2C_DATA_LOW); - } - - i2c_delay(CLOCK_LOW_TIME/2); - i2c_clk(I2C_CLOCK_HIGH); - i2c_delay(CLOCK_HIGH_TIME); - i2c_clk(I2C_CLOCK_LOW); - i2c_delay(CLOCK_LOW_TIME/2); - x <<= 1; - } - i2c_data(I2C_DATA_LOW); - i2c_delay(CLOCK_LOW_TIME/2); - - /* - * enable input - */ - i2c_dir_in(); -} - -/* read a byte from the i2c interface */ - -unsigned char -i2c_inbyte(void) -{ - unsigned char aBitByte = 0; - int i; - - /* Switch off I2C to get bit */ - i2c_disable(); - i2c_dir_in(); - i2c_delay(CLOCK_HIGH_TIME/2); - - /* Get bit */ - aBitByte |= i2c_getbit(); - - /* Enable I2C */ - i2c_enable(); - i2c_delay(CLOCK_LOW_TIME/2); - - for (i = 1; i < 8; i++) { - aBitByte <<= 1; - /* Clock pulse */ - i2c_clk(I2C_CLOCK_HIGH); - i2c_delay(CLOCK_HIGH_TIME); - i2c_clk(I2C_CLOCK_LOW); - i2c_delay(CLOCK_LOW_TIME); - - /* Switch off I2C to get bit */ - i2c_disable(); - i2c_dir_in(); - i2c_delay(CLOCK_HIGH_TIME/2); - - /* Get bit */ - aBitByte |= i2c_getbit(); - - /* Enable I2C */ - i2c_enable(); - i2c_delay(CLOCK_LOW_TIME/2); - } - i2c_clk(I2C_CLOCK_HIGH); - i2c_delay(CLOCK_HIGH_TIME); - - /* - * we leave the clock low, getbyte is usually followed - * by sendack/nack, they assume the clock to be low - */ - i2c_clk(I2C_CLOCK_LOW); - return aBitByte; -} - -/*#--------------------------------------------------------------------------- -*# -*# FUNCTION NAME: i2c_getack -*# -*# DESCRIPTION : checks if ack was received from ic2 -*# -*#--------------------------------------------------------------------------*/ - -int -i2c_getack(void) -{ - int ack = 1; - /* - * enable output - */ - i2c_dir_out(); - /* - * Release data bus by setting - * data high - */ - i2c_data(I2C_DATA_HIGH); - /* - * enable input - */ - i2c_dir_in(); - i2c_delay(CLOCK_HIGH_TIME/4); - /* - * generate ACK clock pulse - */ - i2c_clk(I2C_CLOCK_HIGH); -#if 0 - /* - * Use PORT PB instead of I2C - * for input. (I2C not working) - */ - i2c_clk(1); - i2c_data(1); - /* - * switch off I2C - */ - i2c_data(1); - i2c_disable(); - i2c_dir_in(); -#endif - - /* - * now wait for ack - */ - i2c_delay(CLOCK_HIGH_TIME/2); - /* - * check for ack - */ - if (i2c_getbit()) - ack = 0; - i2c_delay(CLOCK_HIGH_TIME/2); - if (!ack) { - if (!i2c_getbit()) /* receiver pulld SDA low */ - ack = 1; - i2c_delay(CLOCK_HIGH_TIME/2); - } - - /* - * our clock is high now, make sure data is low - * before we enable our output. If we keep data high - * and enable output, we would generate a stop condition. - */ -#if 0 - i2c_data(I2C_DATA_LOW); - - /* - * end clock pulse - */ - i2c_enable(); - i2c_dir_out(); -#endif - i2c_clk(I2C_CLOCK_LOW); - i2c_delay(CLOCK_HIGH_TIME/4); - /* - * enable output - */ - i2c_dir_out(); - /* - * remove ACK clock pulse - */ - i2c_data(I2C_DATA_HIGH); - i2c_delay(CLOCK_LOW_TIME/2); - return ack; -} - -/*#--------------------------------------------------------------------------- -*# -*# FUNCTION NAME: I2C::sendAck -*# -*# DESCRIPTION : Send ACK on received data -*# -*#--------------------------------------------------------------------------*/ -void -i2c_sendack(void) -{ - /* - * enable output - */ - i2c_delay(CLOCK_LOW_TIME); - i2c_dir_out(); - /* - * set ack pulse high - */ - i2c_data(I2C_DATA_LOW); - /* - * generate clock pulse - */ - i2c_delay(CLOCK_HIGH_TIME/6); - i2c_clk(I2C_CLOCK_HIGH); - i2c_delay(CLOCK_HIGH_TIME); - i2c_clk(I2C_CLOCK_LOW); - i2c_delay(CLOCK_LOW_TIME/6); - /* - * reset data out - */ - i2c_data(I2C_DATA_HIGH); - i2c_delay(CLOCK_LOW_TIME); - - i2c_dir_in(); -} - -/*#--------------------------------------------------------------------------- -*# -*# FUNCTION NAME: i2c_sendnack -*# -*# DESCRIPTION : Sends NACK on received data -*# -*#--------------------------------------------------------------------------*/ -void -i2c_sendnack(void) -{ - /* - * enable output - */ - i2c_delay(CLOCK_LOW_TIME); - i2c_dir_out(); - /* - * set data high - */ - i2c_data(I2C_DATA_HIGH); - /* - * generate clock pulse - */ - i2c_delay(CLOCK_HIGH_TIME/6); - i2c_clk(I2C_CLOCK_HIGH); - i2c_delay(CLOCK_HIGH_TIME); - i2c_clk(I2C_CLOCK_LOW); - i2c_delay(CLOCK_LOW_TIME); - - i2c_dir_in(); -} - -/*#--------------------------------------------------------------------------- -*# -*# FUNCTION NAME: i2c_write -*# -*# DESCRIPTION : Writes a value to an I2C device -*# -*#--------------------------------------------------------------------------*/ -int -i2c_write(unsigned char theSlave, void *data, size_t nbytes) -{ - int error, cntr = 3; - unsigned char bytes_wrote = 0; - unsigned char value; - unsigned long flags; - - spin_lock_irqsave(&i2c_lock, flags); - - do { - error = 0; - - i2c_start(); - /* - * send slave address - */ - i2c_outbyte((theSlave & 0xfe)); - /* - * wait for ack - */ - if (!i2c_getack()) - error = 1; - /* - * send data - */ - for (bytes_wrote = 0; bytes_wrote < nbytes; bytes_wrote++) { - memcpy(&value, data + bytes_wrote, sizeof value); - i2c_outbyte(value); - /* - * now it's time to wait for ack - */ - if (!i2c_getack()) - error |= 4; - } - /* - * end byte stream - */ - i2c_stop(); - - } while (error && cntr--); - - i2c_delay(CLOCK_LOW_TIME); - - spin_unlock_irqrestore(&i2c_lock, flags); - - return -error; -} - -/*#--------------------------------------------------------------------------- -*# -*# FUNCTION NAME: i2c_read -*# -*# DESCRIPTION : Reads a value from an I2C device -*# -*#--------------------------------------------------------------------------*/ -int -i2c_read(unsigned char theSlave, void *data, size_t nbytes) -{ - unsigned char b = 0; - unsigned char bytes_read = 0; - int error, cntr = 3; - unsigned long flags; - - spin_lock_irqsave(&i2c_lock, flags); - - do { - error = 0; - memset(data, 0, nbytes); - /* - * generate start condition - */ - i2c_start(); - /* - * send slave address - */ - i2c_outbyte((theSlave | 0x01)); - /* - * wait for ack - */ - if (!i2c_getack()) - error = 1; - /* - * fetch data - */ - for (bytes_read = 0; bytes_read < nbytes; bytes_read++) { - b = i2c_inbyte(); - memcpy(data + bytes_read, &b, sizeof b); - - if (bytes_read < (nbytes - 1)) - i2c_sendack(); - } - /* - * last received byte needs to be nacked - * instead of acked - */ - i2c_sendnack(); - /* - * end sequence - */ - i2c_stop(); - } while (error && cntr--); - - spin_unlock_irqrestore(&i2c_lock, flags); - - return -error; -} - -/*#--------------------------------------------------------------------------- -*# -*# FUNCTION NAME: i2c_writereg -*# -*# DESCRIPTION : Writes a value to an I2C device -*# -*#--------------------------------------------------------------------------*/ -int -i2c_writereg(unsigned char theSlave, unsigned char theReg, - unsigned char theValue) -{ - int error, cntr = 3; - unsigned long flags; - - spin_lock_irqsave(&i2c_lock, flags); - - do { - error = 0; - - i2c_start(); - /* - * send slave address - */ - i2c_outbyte((theSlave & 0xfe)); - /* - * wait for ack - */ - if(!i2c_getack()) - error = 1; - /* - * now select register - */ - i2c_dir_out(); - i2c_outbyte(theReg); - /* - * now it's time to wait for ack - */ - if(!i2c_getack()) - error |= 2; - /* - * send register register data - */ - i2c_outbyte(theValue); - /* - * now it's time to wait for ack - */ - if(!i2c_getack()) - error |= 4; - /* - * end byte stream - */ - i2c_stop(); - } while(error && cntr--); - - i2c_delay(CLOCK_LOW_TIME); - - spin_unlock_irqrestore(&i2c_lock, flags); - - return -error; -} - -/*#--------------------------------------------------------------------------- -*# -*# FUNCTION NAME: i2c_readreg -*# -*# DESCRIPTION : Reads a value from the decoder registers. -*# -*#--------------------------------------------------------------------------*/ -unsigned char -i2c_readreg(unsigned char theSlave, unsigned char theReg) -{ - unsigned char b = 0; - int error, cntr = 3; - unsigned long flags; - - spin_lock_irqsave(&i2c_lock, flags); - - do { - error = 0; - /* - * generate start condition - */ - i2c_start(); - - /* - * send slave address - */ - i2c_outbyte((theSlave & 0xfe)); - /* - * wait for ack - */ - if(!i2c_getack()) - error = 1; - /* - * now select register - */ - i2c_dir_out(); - i2c_outbyte(theReg); - /* - * now it's time to wait for ack - */ - if(!i2c_getack()) - error |= 2; - /* - * repeat start condition - */ - i2c_delay(CLOCK_LOW_TIME); - i2c_start(); - /* - * send slave address - */ - i2c_outbyte(theSlave | 0x01); - /* - * wait for ack - */ - if(!i2c_getack()) - error |= 4; - /* - * fetch register - */ - b = i2c_inbyte(); - /* - * last received byte needs to be nacked - * instead of acked - */ - i2c_sendnack(); - /* - * end sequence - */ - i2c_stop(); - - } while(error && cntr--); - - spin_unlock_irqrestore(&i2c_lock, flags); - - return b; -} - -static int -i2c_open(struct inode *inode, struct file *filp) -{ - return 0; -} - -static int -i2c_release(struct inode *inode, struct file *filp) -{ - return 0; -} - -/* Main device API. ioctl's to write or read to/from i2c registers. - */ - -static long -i2c_ioctl(struct file *file, unsigned int cmd, unsigned long arg) -{ - int ret; - if(_IOC_TYPE(cmd) != ETRAXI2C_IOCTYPE) { - return -ENOTTY; - } - - switch (_IOC_NR(cmd)) { - case I2C_WRITEREG: - /* write to an i2c slave */ - D(printk("i2cw %d %d %d\n", - I2C_ARGSLAVE(arg), - I2C_ARGREG(arg), - I2C_ARGVALUE(arg))); - - mutex_lock(&i2c_mutex); - ret = i2c_writereg(I2C_ARGSLAVE(arg), - I2C_ARGREG(arg), - I2C_ARGVALUE(arg)); - mutex_unlock(&i2c_mutex); - return ret; - - case I2C_READREG: - { - unsigned char val; - /* read from an i2c slave */ - D(printk("i2cr %d %d ", - I2C_ARGSLAVE(arg), - I2C_ARGREG(arg))); - mutex_lock(&i2c_mutex); - val = i2c_readreg(I2C_ARGSLAVE(arg), I2C_ARGREG(arg)); - mutex_unlock(&i2c_mutex); - D(printk("= %d\n", val)); - return val; - } - default: - return -EINVAL; - - } - - return 0; -} - -static const struct file_operations i2c_fops = { - .owner = THIS_MODULE, - .unlocked_ioctl = i2c_ioctl, - .open = i2c_open, - .release = i2c_release, - .llseek = noop_llseek, -}; - -static int __init i2c_init(void) -{ - static int res; - static int first = 1; - - if (!first) - return res; - - first = 0; - - /* Setup and enable the DATA and CLK pins */ - - res = crisv32_io_get_name(&cris_i2c_data, - CONFIG_ETRAX_V32_I2C_DATA_PORT); - if (res < 0) - return res; - - res = crisv32_io_get_name(&cris_i2c_clk, CONFIG_ETRAX_V32_I2C_CLK_PORT); - crisv32_io_set_dir(&cris_i2c_clk, crisv32_io_dir_out); - - return res; -} - - -static int __init i2c_register(void) -{ - int res; - - res = i2c_init(); - if (res < 0) - return res; - - /* register char device */ - - res = register_chrdev(I2C_MAJOR, i2c_name, &i2c_fops); - if (res < 0) { - printk(KERN_ERR "i2c: couldn't get a major number.\n"); - return res; - } - - printk(KERN_INFO - "I2C driver v2.2, (c) 1999-2007 Axis Communications AB\n"); - - return 0; -} -/* this makes sure that i2c_init is called during boot */ -module_init(i2c_register); - -/****************** END OF FILE i2c.c ********************************/ diff --git a/arch/cris/arch-v32/drivers/i2c.h b/arch/cris/arch-v32/drivers/i2c.h deleted file mode 100644 index d9cc856f89fb..000000000000 --- a/arch/cris/arch-v32/drivers/i2c.h +++ /dev/null @@ -1,16 +0,0 @@ - -#include <linux/init.h> - -/* High level I2C actions */ -int i2c_write(unsigned char theSlave, void *data, size_t nbytes); -int i2c_read(unsigned char theSlave, void *data, size_t nbytes); -int i2c_writereg(unsigned char theSlave, unsigned char theReg, unsigned char theValue); -unsigned char i2c_readreg(unsigned char theSlave, unsigned char theReg); - -/* Low level I2C */ -void i2c_start(void); -void i2c_stop(void); -void i2c_outbyte(unsigned char x); -unsigned char i2c_inbyte(void); -int i2c_getack(void); -void i2c_sendack(void); diff --git a/arch/cris/arch-v32/drivers/mach-a3/Makefile b/arch/cris/arch-v32/drivers/mach-a3/Makefile index 5c6d2a2a080e..59028d0b981c 100644 --- a/arch/cris/arch-v32/drivers/mach-a3/Makefile +++ b/arch/cris/arch-v32/drivers/mach-a3/Makefile @@ -3,4 +3,3 @@ # obj-$(CONFIG_ETRAX_NANDFLASH) += nandflash.o -obj-$(CONFIG_ETRAX_GPIO) += gpio.o diff --git a/arch/cris/arch-v32/drivers/mach-a3/gpio.c b/arch/cris/arch-v32/drivers/mach-a3/gpio.c deleted file mode 100644 index c92e1da3684d..000000000000 --- a/arch/cris/arch-v32/drivers/mach-a3/gpio.c +++ /dev/null @@ -1,999 +0,0 @@ -/* - * Artec-3 general port I/O device - * - * Copyright (c) 2007 Axis Communications AB - * - * Authors: Bjorn Wesen (initial version) - * Ola Knutsson (LED handling) - * Johan Adolfsson (read/set directions, write, port G, - * port to ETRAX FS. - * Ricard Wanderlof (PWM for Artpec-3) - * - */ - -#include <linux/module.h> -#include <linux/sched.h> -#include <linux/slab.h> -#include <linux/ioport.h> -#include <linux/errno.h> -#include <linux/kernel.h> -#include <linux/fs.h> -#include <linux/string.h> -#include <linux/poll.h> -#include <linux/init.h> -#include <linux/interrupt.h> -#include <linux/spinlock.h> -#include <linux/mutex.h> - -#include <asm/etraxgpio.h> -#include <hwregs/reg_map.h> -#include <hwregs/reg_rdwr.h> -#include <hwregs/gio_defs.h> -#include <hwregs/intr_vect_defs.h> -#include <asm/io.h> -#include <asm/irq.h> -#include <mach/pinmux.h> - -#ifdef CONFIG_ETRAX_VIRTUAL_GPIO -#include "../i2c.h" - -#define VIRT_I2C_ADDR 0x40 -#endif - -/* The following gio ports on ARTPEC-3 is available: - * pa 32 bits - * pb 32 bits - * pc 16 bits - * each port has a rw_px_dout, r_px_din and rw_px_oe register. - */ - -#define GPIO_MAJOR 120 /* experimental MAJOR number */ - -#define I2C_INTERRUPT_BITS 0x300 /* i2c0_done and i2c1_done bits */ - -#define D(x) - -#if 0 -static int dp_cnt; -#define DP(x) \ - do { \ - dp_cnt++; \ - if (dp_cnt % 1000 == 0) \ - x; \ - } while (0) -#else -#define DP(x) -#endif - -static DEFINE_MUTEX(gpio_mutex); -static char gpio_name[] = "etrax gpio"; - -#ifdef CONFIG_ETRAX_VIRTUAL_GPIO -static int virtual_gpio_ioctl(struct file *file, unsigned int cmd, - unsigned long arg); -#endif -static long gpio_ioctl(struct file *file, unsigned int cmd, unsigned long arg); -static ssize_t gpio_write(struct file *file, const char __user *buf, - size_t count, loff_t *off); -static int gpio_open(struct inode *inode, struct file *filp); -static int gpio_release(struct inode *inode, struct file *filp); -static unsigned int gpio_poll(struct file *filp, - struct poll_table_struct *wait); - -/* private data per open() of this driver */ - -struct gpio_private { - struct gpio_private *next; - /* The IO_CFG_WRITE_MODE_VALUE only support 8 bits: */ - unsigned char clk_mask; - unsigned char data_mask; - unsigned char write_msb; - unsigned char pad1; - /* These fields are generic */ - unsigned long highalarm, lowalarm; - wait_queue_head_t alarm_wq; - int minor; -}; - -static void gpio_set_alarm(struct gpio_private *priv); -static int gpio_leds_ioctl(unsigned int cmd, unsigned long arg); -static int gpio_pwm_ioctl(struct gpio_private *priv, unsigned int cmd, - unsigned long arg); - - -/* linked list of alarms to check for */ - -static struct gpio_private *alarmlist; - -static int wanted_interrupts; - -static DEFINE_SPINLOCK(gpio_lock); - -#define NUM_PORTS (GPIO_MINOR_LAST+1) -#define GIO_REG_RD_ADDR(reg) \ - (unsigned long *)(regi_gio + REG_RD_ADDR_gio_##reg) -#define GIO_REG_WR_ADDR(reg) \ - (unsigned long *)(regi_gio + REG_WR_ADDR_gio_##reg) -static unsigned long led_dummy; -static unsigned long port_d_dummy; /* Only input on Artpec-3 */ -#ifdef CONFIG_ETRAX_VIRTUAL_GPIO -static unsigned long port_e_dummy; /* Non existent on Artpec-3 */ -static unsigned long virtual_dummy; -static unsigned long virtual_rw_pv_oe = CONFIG_ETRAX_DEF_GIO_PV_OE; -static unsigned short cached_virtual_gpio_read; -#endif - -static unsigned long *data_out[NUM_PORTS] = { - GIO_REG_WR_ADDR(rw_pa_dout), - GIO_REG_WR_ADDR(rw_pb_dout), - &led_dummy, - GIO_REG_WR_ADDR(rw_pc_dout), - &port_d_dummy, -#ifdef CONFIG_ETRAX_VIRTUAL_GPIO - &port_e_dummy, - &virtual_dummy, -#endif -}; - -static unsigned long *data_in[NUM_PORTS] = { - GIO_REG_RD_ADDR(r_pa_din), - GIO_REG_RD_ADDR(r_pb_din), - &led_dummy, - GIO_REG_RD_ADDR(r_pc_din), - GIO_REG_RD_ADDR(r_pd_din), -#ifdef CONFIG_ETRAX_VIRTUAL_GPIO - &port_e_dummy, - &virtual_dummy, -#endif -}; - -static unsigned long changeable_dir[NUM_PORTS] = { - CONFIG_ETRAX_PA_CHANGEABLE_DIR, - CONFIG_ETRAX_PB_CHANGEABLE_DIR, - 0, - CONFIG_ETRAX_PC_CHANGEABLE_DIR, - 0, -#ifdef CONFIG_ETRAX_VIRTUAL_GPIO - 0, - CONFIG_ETRAX_PV_CHANGEABLE_DIR, -#endif -}; - -static unsigned long changeable_bits[NUM_PORTS] = { - CONFIG_ETRAX_PA_CHANGEABLE_BITS, - CONFIG_ETRAX_PB_CHANGEABLE_BITS, - 0, - CONFIG_ETRAX_PC_CHANGEABLE_BITS, - 0, -#ifdef CONFIG_ETRAX_VIRTUAL_GPIO - 0, - CONFIG_ETRAX_PV_CHANGEABLE_BITS, -#endif -}; - -static unsigned long *dir_oe[NUM_PORTS] = { - GIO_REG_WR_ADDR(rw_pa_oe), - GIO_REG_WR_ADDR(rw_pb_oe), - &led_dummy, - GIO_REG_WR_ADDR(rw_pc_oe), - &port_d_dummy, -#ifdef CONFIG_ETRAX_VIRTUAL_GPIO - &port_e_dummy, - &virtual_rw_pv_oe, -#endif -}; - -static void gpio_set_alarm(struct gpio_private *priv) -{ - int bit; - int intr_cfg; - int mask; - int pins; - unsigned long flags; - - spin_lock_irqsave(&gpio_lock, flags); - intr_cfg = REG_RD_INT(gio, regi_gio, rw_intr_cfg); - pins = REG_RD_INT(gio, regi_gio, rw_intr_pins); - mask = REG_RD_INT(gio, regi_gio, rw_intr_mask) & I2C_INTERRUPT_BITS; - - for (bit = 0; bit < 32; bit++) { - int intr = bit % 8; - int pin = bit / 8; - if (priv->minor < GPIO_MINOR_LEDS) - pin += priv->minor * 4; - else - pin += (priv->minor - 1) * 4; - - if (priv->highalarm & (1<<bit)) { - intr_cfg |= (regk_gio_hi << (intr * 3)); - mask |= 1 << intr; - wanted_interrupts = mask & 0xff; - pins |= pin << (intr * 4); - } else if (priv->lowalarm & (1<<bit)) { - intr_cfg |= (regk_gio_lo << (intr * 3)); - mask |= 1 << intr; - wanted_interrupts = mask & 0xff; - pins |= pin << (intr * 4); - } - } - - REG_WR_INT(gio, regi_gio, rw_intr_cfg, intr_cfg); - REG_WR_INT(gio, regi_gio, rw_intr_pins, pins); - REG_WR_INT(gio, regi_gio, rw_intr_mask, mask); - - spin_unlock_irqrestore(&gpio_lock, flags); -} - -static unsigned int gpio_poll(struct file *file, struct poll_table_struct *wait) -{ - unsigned int mask = 0; - struct gpio_private *priv = file->private_data; - unsigned long data; - unsigned long tmp; - - if (priv->minor >= GPIO_MINOR_PWM0 && - priv->minor <= GPIO_MINOR_LAST_PWM) - return 0; - - poll_wait(file, &priv->alarm_wq, wait); - if (priv->minor <= GPIO_MINOR_D) { - data = readl(data_in[priv->minor]); - REG_WR_INT(gio, regi_gio, rw_ack_intr, wanted_interrupts); - tmp = REG_RD_INT(gio, regi_gio, rw_intr_mask); - tmp &= I2C_INTERRUPT_BITS; - tmp |= wanted_interrupts; - REG_WR_INT(gio, regi_gio, rw_intr_mask, tmp); - } else - return 0; - - if ((data & priv->highalarm) || (~data & priv->lowalarm)) - mask = POLLIN|POLLRDNORM; - - DP(printk(KERN_DEBUG "gpio_poll ready: mask 0x%08X\n", mask)); - return mask; -} - -static irqreturn_t gpio_interrupt(int irq, void *dev_id) -{ - reg_gio_rw_intr_mask intr_mask; - reg_gio_r_masked_intr masked_intr; - reg_gio_rw_ack_intr ack_intr; - unsigned long flags; - unsigned long tmp; - unsigned long tmp2; -#ifdef CONFIG_ETRAX_VIRTUAL_GPIO - unsigned char enable_gpiov_ack = 0; -#endif - - /* Find what PA interrupts are active */ - masked_intr = REG_RD(gio, regi_gio, r_masked_intr); - tmp = REG_TYPE_CONV(unsigned long, reg_gio_r_masked_intr, masked_intr); - - /* Find those that we have enabled */ - spin_lock_irqsave(&gpio_lock, flags); - tmp &= wanted_interrupts; - spin_unlock_irqrestore(&gpio_lock, flags); - -#ifdef CONFIG_ETRAX_VIRTUAL_GPIO - /* Something changed on virtual GPIO. Interrupt is acked by - * reading the device. - */ - if (tmp & (1 << CONFIG_ETRAX_VIRTUAL_GPIO_INTERRUPT_PA_PIN)) { - i2c_read(VIRT_I2C_ADDR, (void *)&cached_virtual_gpio_read, - sizeof(cached_virtual_gpio_read)); - enable_gpiov_ack = 1; - } -#endif - - /* Ack them */ - ack_intr = REG_TYPE_CONV(reg_gio_rw_ack_intr, unsigned long, tmp); - REG_WR(gio, regi_gio, rw_ack_intr, ack_intr); - - /* Disable those interrupts.. */ - intr_mask = REG_RD(gio, regi_gio, rw_intr_mask); - tmp2 = REG_TYPE_CONV(unsigned long, reg_gio_rw_intr_mask, intr_mask); - tmp2 &= ~tmp; -#ifdef CONFIG_ETRAX_VIRTUAL_GPIO - /* Do not disable interrupt on virtual GPIO. Changes on virtual - * pins are only noticed by an interrupt. - */ - if (enable_gpiov_ack) - tmp2 |= (1 << CONFIG_ETRAX_VIRTUAL_GPIO_INTERRUPT_PA_PIN); -#endif - intr_mask = REG_TYPE_CONV(reg_gio_rw_intr_mask, unsigned long, tmp2); - REG_WR(gio, regi_gio, rw_intr_mask, intr_mask); - - return IRQ_RETVAL(tmp); -} - -static void gpio_write_bit(unsigned long *port, unsigned char data, int bit, - unsigned char clk_mask, unsigned char data_mask) -{ - unsigned long shadow = readl(port) & ~clk_mask; - writel(shadow, port); - if (data & 1 << bit) - shadow |= data_mask; - else - shadow &= ~data_mask; - writel(shadow, port); - /* For FPGA: min 5.0ns (DCC) before CCLK high */ - shadow |= clk_mask; - writel(shadow, port); -} - -static void gpio_write_byte(struct gpio_private *priv, unsigned long *port, - unsigned char data) -{ - int i; - - if (priv->write_msb) - for (i = 7; i >= 0; i--) - gpio_write_bit(port, data, i, priv->clk_mask, - priv->data_mask); - else - for (i = 0; i <= 7; i++) - gpio_write_bit(port, data, i, priv->clk_mask, - priv->data_mask); -} - - -static ssize_t gpio_write(struct file *file, const char __user *buf, - size_t count, loff_t *off) -{ - struct gpio_private *priv = file->private_data; - unsigned long flags; - ssize_t retval = count; - /* Only bits 0-7 may be used for write operations but allow all - devices except leds... */ -#ifdef CONFIG_ETRAX_VIRTUAL_GPIO - if (priv->minor == GPIO_MINOR_V) - return -EFAULT; -#endif - if (priv->minor == GPIO_MINOR_LEDS) - return -EFAULT; - - if (priv->minor >= GPIO_MINOR_PWM0 && - priv->minor <= GPIO_MINOR_LAST_PWM) - return -EFAULT; - - if (!access_ok(VERIFY_READ, buf, count)) - return -EFAULT; - - /* It must have been configured using the IO_CFG_WRITE_MODE */ - /* Perhaps a better error code? */ - if (priv->clk_mask == 0 || priv->data_mask == 0) - return -EPERM; - - D(printk(KERN_DEBUG "gpio_write: %lu to data 0x%02X clk 0x%02X " - "msb: %i\n", - count, priv->data_mask, priv->clk_mask, priv->write_msb)); - - spin_lock_irqsave(&gpio_lock, flags); - - while (count--) - gpio_write_byte(priv, data_out[priv->minor], *buf++); - - spin_unlock_irqrestore(&gpio_lock, flags); - return retval; -} - -static int gpio_open(struct inode *inode, struct file *filp) -{ - struct gpio_private *priv; - int p = iminor(inode); - - if (p > GPIO_MINOR_LAST_PWM || - (p > GPIO_MINOR_LAST && p < GPIO_MINOR_PWM0)) - return -EINVAL; - - priv = kmalloc(sizeof(struct gpio_private), GFP_KERNEL); - - if (!priv) - return -ENOMEM; - - mutex_lock(&gpio_mutex); - memset(priv, 0, sizeof(*priv)); - - priv->minor = p; - filp->private_data = priv; - - /* initialize the io/alarm struct, not for PWM ports though */ - if (p <= GPIO_MINOR_LAST) { - - priv->clk_mask = 0; - priv->data_mask = 0; - priv->highalarm = 0; - priv->lowalarm = 0; - - init_waitqueue_head(&priv->alarm_wq); - - /* link it into our alarmlist */ - spin_lock_irq(&gpio_lock); - priv->next = alarmlist; - alarmlist = priv; - spin_unlock_irq(&gpio_lock); - } - - mutex_unlock(&gpio_mutex); - return 0; -} - -static int gpio_release(struct inode *inode, struct file *filp) -{ - struct gpio_private *p; - struct gpio_private *todel; - /* local copies while updating them: */ - unsigned long a_high, a_low; - - /* prepare to free private structure */ - todel = filp->private_data; - - /* unlink from alarmlist - only for non-PWM ports though */ - if (todel->minor <= GPIO_MINOR_LAST) { - spin_lock_irq(&gpio_lock); - p = alarmlist; - - if (p == todel) - alarmlist = todel->next; - else { - while (p->next != todel) - p = p->next; - p->next = todel->next; - } - - /* Check if there are still any alarms set */ - p = alarmlist; - a_high = 0; - a_low = 0; - while (p) { - if (p->minor == GPIO_MINOR_A) { -#ifdef CONFIG_ETRAX_VIRTUAL_GPIO - p->lowalarm |= (1 << CONFIG_ETRAX_VIRTUAL_GPIO_INTERRUPT_PA_PIN); -#endif - a_high |= p->highalarm; - a_low |= p->lowalarm; - } - - p = p->next; - } - -#ifdef CONFIG_ETRAX_VIRTUAL_GPIO - /* Variable 'a_low' needs to be set here again - * to ensure that interrupt for virtual GPIO is handled. - */ - a_low |= (1 << CONFIG_ETRAX_VIRTUAL_GPIO_INTERRUPT_PA_PIN); -#endif - - spin_unlock_irq(&gpio_lock); - } - kfree(todel); - - return 0; -} - -/* Main device API. ioctl's to read/set/clear bits, as well as to - * set alarms to wait for using a subsequent select(). - */ - -inline unsigned long setget_input(struct gpio_private *priv, unsigned long arg) -{ - /* Set direction 0=unchanged 1=input, - * return mask with 1=input - */ - unsigned long flags; - unsigned long dir_shadow; - - spin_lock_irqsave(&gpio_lock, flags); - - dir_shadow = readl(dir_oe[priv->minor]) & - ~(arg & changeable_dir[priv->minor]); - writel(dir_shadow, dir_oe[priv->minor]); - - spin_unlock_irqrestore(&gpio_lock, flags); - - if (priv->minor == GPIO_MINOR_C) - dir_shadow ^= 0xFFFF; /* Only 16 bits */ -#ifdef CONFIG_ETRAX_VIRTUAL_GPIO - else if (priv->minor == GPIO_MINOR_V) - dir_shadow ^= 0xFFFF; /* Only 16 bits */ -#endif - else - dir_shadow ^= 0xFFFFFFFF; /* PA, PB and PD 32 bits */ - - return dir_shadow; - -} /* setget_input */ - -static inline unsigned long setget_output(struct gpio_private *priv, - unsigned long arg) -{ - unsigned long flags; - unsigned long dir_shadow; - - spin_lock_irqsave(&gpio_lock, flags); - - dir_shadow = readl(dir_oe[priv->minor]) | - (arg & changeable_dir[priv->minor]); - writel(dir_shadow, dir_oe[priv->minor]); - - spin_unlock_irqrestore(&gpio_lock, flags); - return dir_shadow; -} /* setget_output */ - -static long gpio_ioctl_unlocked(struct file *file, - unsigned int cmd, unsigned long arg) -{ - unsigned long flags; - unsigned long val; - unsigned long shadow; - struct gpio_private *priv = file->private_data; - - if (_IOC_TYPE(cmd) != ETRAXGPIO_IOCTYPE) - return -ENOTTY; - - /* Check for special ioctl handlers first */ - -#ifdef CONFIG_ETRAX_VIRTUAL_GPIO - if (priv->minor == GPIO_MINOR_V) - return virtual_gpio_ioctl(file, cmd, arg); -#endif - - if (priv->minor == GPIO_MINOR_LEDS) - return gpio_leds_ioctl(cmd, arg); - - if (priv->minor >= GPIO_MINOR_PWM0 && - priv->minor <= GPIO_MINOR_LAST_PWM) - return gpio_pwm_ioctl(priv, cmd, arg); - - switch (_IOC_NR(cmd)) { - case IO_READBITS: /* Use IO_READ_INBITS and IO_READ_OUTBITS instead */ - /* Read the port. */ - return readl(data_in[priv->minor]); - case IO_SETBITS: - spin_lock_irqsave(&gpio_lock, flags); - /* Set changeable bits with a 1 in arg. */ - shadow = readl(data_out[priv->minor]) | - (arg & changeable_bits[priv->minor]); - writel(shadow, data_out[priv->minor]); - spin_unlock_irqrestore(&gpio_lock, flags); - break; - case IO_CLRBITS: - spin_lock_irqsave(&gpio_lock, flags); - /* Clear changeable bits with a 1 in arg. */ - shadow = readl(data_out[priv->minor]) & - ~(arg & changeable_bits[priv->minor]); - writel(shadow, data_out[priv->minor]); - spin_unlock_irqrestore(&gpio_lock, flags); - break; - case IO_HIGHALARM: - /* Set alarm when bits with 1 in arg go high. */ - priv->highalarm |= arg; - gpio_set_alarm(priv); - break; - case IO_LOWALARM: - /* Set alarm when bits with 1 in arg go low. */ - priv->lowalarm |= arg; - gpio_set_alarm(priv); - break; - case IO_CLRALARM: - /* Clear alarm for bits with 1 in arg. */ - priv->highalarm &= ~arg; - priv->lowalarm &= ~arg; - gpio_set_alarm(priv); - break; - case IO_READDIR: /* Use IO_SETGET_INPUT/OUTPUT instead! */ - /* Read direction 0=input 1=output */ - return readl(dir_oe[priv->minor]); - - case IO_SETINPUT: /* Use IO_SETGET_INPUT instead! */ - /* Set direction 0=unchanged 1=input, - * return mask with 1=input - */ - return setget_input(priv, arg); - - case IO_SETOUTPUT: /* Use IO_SETGET_OUTPUT instead! */ - /* Set direction 0=unchanged 1=output, - * return mask with 1=output - */ - return setget_output(priv, arg); - - case IO_CFG_WRITE_MODE: - { - int res = -EPERM; - unsigned long dir_shadow, clk_mask, data_mask, write_msb; - - clk_mask = arg & 0xFF; - data_mask = (arg >> 8) & 0xFF; - write_msb = (arg >> 16) & 0x01; - - /* Check if we're allowed to change the bits and - * the direction is correct - */ - spin_lock_irqsave(&gpio_lock, flags); - dir_shadow = readl(dir_oe[priv->minor]); - if ((clk_mask & changeable_bits[priv->minor]) && - (data_mask & changeable_bits[priv->minor]) && - (clk_mask & dir_shadow) && - (data_mask & dir_shadow)) { - priv->clk_mask = clk_mask; - priv->data_mask = data_mask; - priv->write_msb = write_msb; - res = 0; - } - spin_unlock_irqrestore(&gpio_lock, flags); - - return res; - } - case IO_READ_INBITS: - /* *arg is result of reading the input pins */ - val = readl(data_in[priv->minor]); - if (copy_to_user((void __user *)arg, &val, sizeof(val))) - return -EFAULT; - return 0; - case IO_READ_OUTBITS: - /* *arg is result of reading the output shadow */ - val = *data_out[priv->minor]; - if (copy_to_user((void __user *)arg, &val, sizeof(val))) - return -EFAULT; - break; - case IO_SETGET_INPUT: - /* bits set in *arg is set to input, - * *arg updated with current input pins. - */ - if (copy_from_user(&val, (void __user *)arg, sizeof(val))) - return -EFAULT; - val = setget_input(priv, val); - if (copy_to_user((void __user *)arg, &val, sizeof(val))) - return -EFAULT; - break; - case IO_SETGET_OUTPUT: - /* bits set in *arg is set to output, - * *arg updated with current output pins. - */ - if (copy_from_user(&val, (void __user *)arg, sizeof(val))) - return -EFAULT; - val = setget_output(priv, val); - if (copy_to_user((void __user *)arg, &val, sizeof(val))) - return -EFAULT; - break; - default: - return -EINVAL; - } /* switch */ - - return 0; -} - -static long gpio_ioctl(struct file *file, unsigned int cmd, unsigned long arg) -{ - long ret; - - mutex_lock(&gpio_mutex); - ret = gpio_ioctl_unlocked(file, cmd, arg); - mutex_unlock(&gpio_mutex); - - return ret; -} - -#ifdef CONFIG_ETRAX_VIRTUAL_GPIO -static int virtual_gpio_ioctl(struct file *file, unsigned int cmd, - unsigned long arg) -{ - unsigned long flags; - unsigned short val; - unsigned short shadow; - struct gpio_private *priv = file->private_data; - - switch (_IOC_NR(cmd)) { - case IO_SETBITS: - spin_lock_irqsave(&gpio_lock, flags); - /* Set changeable bits with a 1 in arg. */ - i2c_read(VIRT_I2C_ADDR, (void *)&shadow, sizeof(shadow)); - shadow |= ~readl(dir_oe[priv->minor]) | - (arg & changeable_bits[priv->minor]); - i2c_write(VIRT_I2C_ADDR, (void *)&shadow, sizeof(shadow)); - spin_unlock_irqrestore(&gpio_lock, flags); - break; - case IO_CLRBITS: - spin_lock_irqsave(&gpio_lock, flags); - /* Clear changeable bits with a 1 in arg. */ - i2c_read(VIRT_I2C_ADDR, (void *)&shadow, sizeof(shadow)); - shadow |= ~readl(dir_oe[priv->minor]) & - ~(arg & changeable_bits[priv->minor]); - i2c_write(VIRT_I2C_ADDR, (void *)&shadow, sizeof(shadow)); - spin_unlock_irqrestore(&gpio_lock, flags); - break; - case IO_HIGHALARM: - /* Set alarm when bits with 1 in arg go high. */ - priv->highalarm |= arg; - break; - case IO_LOWALARM: - /* Set alarm when bits with 1 in arg go low. */ - priv->lowalarm |= arg; - break; - case IO_CLRALARM: - /* Clear alarm for bits with 1 in arg. */ - priv->highalarm &= ~arg; - priv->lowalarm &= ~arg; - break; - case IO_CFG_WRITE_MODE: - { - unsigned long dir_shadow; - dir_shadow = readl(dir_oe[priv->minor]); - - priv->clk_mask = arg & 0xFF; - priv->data_mask = (arg >> 8) & 0xFF; - priv->write_msb = (arg >> 16) & 0x01; - /* Check if we're allowed to change the bits and - * the direction is correct - */ - if (!((priv->clk_mask & changeable_bits[priv->minor]) && - (priv->data_mask & changeable_bits[priv->minor]) && - (priv->clk_mask & dir_shadow) && - (priv->data_mask & dir_shadow))) { - priv->clk_mask = 0; - priv->data_mask = 0; - return -EPERM; - } - break; - } - case IO_READ_INBITS: - /* *arg is result of reading the input pins */ - val = cached_virtual_gpio_read & ~readl(dir_oe[priv->minor]); - if (copy_to_user((void __user *)arg, &val, sizeof(val))) - return -EFAULT; - return 0; - - case IO_READ_OUTBITS: - /* *arg is result of reading the output shadow */ - i2c_read(VIRT_I2C_ADDR, (void *)&val, sizeof(val)); - val &= readl(dir_oe[priv->minor]); - if (copy_to_user((void __user *)arg, &val, sizeof(val))) - return -EFAULT; - break; - case IO_SETGET_INPUT: - { - /* bits set in *arg is set to input, - * *arg updated with current input pins. - */ - unsigned short input_mask = ~readl(dir_oe[priv->minor]); - if (copy_from_user(&val, (void __user *)arg, sizeof(val))) - return -EFAULT; - val = setget_input(priv, val); - if (copy_to_user((void __user *)arg, &val, sizeof(val))) - return -EFAULT; - if ((input_mask & val) != input_mask) { - /* Input pins changed. All ports desired as input - * should be set to logic 1. - */ - unsigned short change = input_mask ^ val; - i2c_read(VIRT_I2C_ADDR, (void *)&shadow, - sizeof(shadow)); - shadow &= ~change; - shadow |= val; - i2c_write(VIRT_I2C_ADDR, (void *)&shadow, - sizeof(shadow)); - } - break; - } - case IO_SETGET_OUTPUT: - /* bits set in *arg is set to output, - * *arg updated with current output pins. - */ - if (copy_from_user(&val, (void __user *)arg, sizeof(val))) - return -EFAULT; - val = setget_output(priv, val); - if (copy_to_user((void __user *)arg, &val, sizeof(val))) - return -EFAULT; - break; - default: - return -EINVAL; - } /* switch */ - return 0; -} -#endif /* CONFIG_ETRAX_VIRTUAL_GPIO */ - -static int gpio_leds_ioctl(unsigned int cmd, unsigned long arg) -{ - unsigned char green; - unsigned char red; - - switch (_IOC_NR(cmd)) { - case IO_LEDACTIVE_SET: - green = ((unsigned char) arg) & 1; - red = (((unsigned char) arg) >> 1) & 1; - CRIS_LED_ACTIVE_SET_G(green); - CRIS_LED_ACTIVE_SET_R(red); - break; - - default: - return -EINVAL; - } /* switch */ - - return 0; -} - -static int gpio_pwm_set_mode(unsigned long arg, int pwm_port) -{ - int pinmux_pwm = pinmux_pwm0 + pwm_port; - int mode; - reg_gio_rw_pwm0_ctrl rw_pwm_ctrl = { - .ccd_val = 0, - .ccd_override = regk_gio_no, - .mode = regk_gio_no - }; - int allocstatus; - - if (get_user(mode, &((struct io_pwm_set_mode *) arg)->mode)) - return -EFAULT; - rw_pwm_ctrl.mode = mode; - if (mode != PWM_OFF) - allocstatus = crisv32_pinmux_alloc_fixed(pinmux_pwm); - else - allocstatus = crisv32_pinmux_dealloc_fixed(pinmux_pwm); - if (allocstatus) - return allocstatus; - REG_WRITE(reg_gio_rw_pwm0_ctrl, REG_ADDR(gio, regi_gio, rw_pwm0_ctrl) + - 12 * pwm_port, rw_pwm_ctrl); - return 0; -} - -static int gpio_pwm_set_period(unsigned long arg, int pwm_port) -{ - struct io_pwm_set_period periods; - reg_gio_rw_pwm0_var rw_pwm_widths; - - if (copy_from_user(&periods, (void __user *)arg, sizeof(periods))) - return -EFAULT; - if (periods.lo > 8191 || periods.hi > 8191) - return -EINVAL; - rw_pwm_widths.lo = periods.lo; - rw_pwm_widths.hi = periods.hi; - REG_WRITE(reg_gio_rw_pwm0_var, REG_ADDR(gio, regi_gio, rw_pwm0_var) + - 12 * pwm_port, rw_pwm_widths); - return 0; -} - -static int gpio_pwm_set_duty(unsigned long arg, int pwm_port) -{ - unsigned int duty; - reg_gio_rw_pwm0_data rw_pwm_duty; - - if (get_user(duty, &((struct io_pwm_set_duty *) arg)->duty)) - return -EFAULT; - if (duty > 255) - return -EINVAL; - rw_pwm_duty.data = duty; - REG_WRITE(reg_gio_rw_pwm0_data, REG_ADDR(gio, regi_gio, rw_pwm0_data) + - 12 * pwm_port, rw_pwm_duty); - return 0; -} - -static int gpio_pwm_ioctl(struct gpio_private *priv, unsigned int cmd, - unsigned long arg) -{ - int pwm_port = priv->minor - GPIO_MINOR_PWM0; - - switch (_IOC_NR(cmd)) { - case IO_PWM_SET_MODE: - return gpio_pwm_set_mode(arg, pwm_port); - case IO_PWM_SET_PERIOD: - return gpio_pwm_set_period(arg, pwm_port); - case IO_PWM_SET_DUTY: - return gpio_pwm_set_duty(arg, pwm_port); - default: - return -EINVAL; - } - return 0; -} - -static const struct file_operations gpio_fops = { - .owner = THIS_MODULE, - .poll = gpio_poll, - .unlocked_ioctl = gpio_ioctl, - .write = gpio_write, - .open = gpio_open, - .release = gpio_release, - .llseek = noop_llseek, -}; - -#ifdef CONFIG_ETRAX_VIRTUAL_GPIO -static void __init virtual_gpio_init(void) -{ - reg_gio_rw_intr_cfg intr_cfg; - reg_gio_rw_intr_mask intr_mask; - unsigned short shadow; - - shadow = ~virtual_rw_pv_oe; /* Input ports should be set to logic 1 */ - shadow |= CONFIG_ETRAX_DEF_GIO_PV_OUT; - i2c_write(VIRT_I2C_ADDR, (void *)&shadow, sizeof(shadow)); - - /* Set interrupt mask and on what state the interrupt shall trigger. - * For virtual gpio the interrupt shall trigger on logic '0'. - */ - intr_cfg = REG_RD(gio, regi_gio, rw_intr_cfg); - intr_mask = REG_RD(gio, regi_gio, rw_intr_mask); - - switch (CONFIG_ETRAX_VIRTUAL_GPIO_INTERRUPT_PA_PIN) { - case 0: - intr_cfg.pa0 = regk_gio_lo; - intr_mask.pa0 = regk_gio_yes; - break; - case 1: - intr_cfg.pa1 = regk_gio_lo; - intr_mask.pa1 = regk_gio_yes; - break; - case 2: - intr_cfg.pa2 = regk_gio_lo; - intr_mask.pa2 = regk_gio_yes; - break; - case 3: - intr_cfg.pa3 = regk_gio_lo; - intr_mask.pa3 = regk_gio_yes; - break; - case 4: - intr_cfg.pa4 = regk_gio_lo; - intr_mask.pa4 = regk_gio_yes; - break; - case 5: - intr_cfg.pa5 = regk_gio_lo; - intr_mask.pa5 = regk_gio_yes; - break; - case 6: - intr_cfg.pa6 = regk_gio_lo; - intr_mask.pa6 = regk_gio_yes; - break; - case 7: - intr_cfg.pa7 = regk_gio_lo; - intr_mask.pa7 = regk_gio_yes; - break; - } - - REG_WR(gio, regi_gio, rw_intr_cfg, intr_cfg); - REG_WR(gio, regi_gio, rw_intr_mask, intr_mask); -} -#endif - -/* main driver initialization routine, called from mem.c */ - -static int __init gpio_init(void) -{ - int res, res2; - - printk(KERN_INFO "ETRAX FS GPIO driver v2.7, (c) 2003-2008 " - "Axis Communications AB\n"); - - /* do the formalities */ - - res = register_chrdev(GPIO_MAJOR, gpio_name, &gpio_fops); - if (res < 0) { - printk(KERN_ERR "gpio: couldn't get a major number.\n"); - return res; - } - - /* Clear all leds */ - CRIS_LED_NETWORK_GRP0_SET(0); - CRIS_LED_NETWORK_GRP1_SET(0); - CRIS_LED_ACTIVE_SET(0); - CRIS_LED_DISK_READ(0); - CRIS_LED_DISK_WRITE(0); - - res2 = request_irq(GIO_INTR_VECT, gpio_interrupt, - IRQF_SHARED, "gpio", &alarmlist); - if (res2) { - printk(KERN_ERR "err: irq for gpio\n"); - return res2; - } - - /* No IRQs by default. */ - REG_WR_INT(gio, regi_gio, rw_intr_pins, 0); - -#ifdef CONFIG_ETRAX_VIRTUAL_GPIO - virtual_gpio_init(); -#endif - - return res; -} - -/* this makes sure that gpio_init is called during kernel boot */ - -module_init(gpio_init); diff --git a/arch/cris/arch-v32/drivers/mach-fs/Makefile b/arch/cris/arch-v32/drivers/mach-fs/Makefile index 5c6d2a2a080e..59028d0b981c 100644 --- a/arch/cris/arch-v32/drivers/mach-fs/Makefile +++ b/arch/cris/arch-v32/drivers/mach-fs/Makefile @@ -3,4 +3,3 @@ # obj-$(CONFIG_ETRAX_NANDFLASH) += nandflash.o -obj-$(CONFIG_ETRAX_GPIO) += gpio.o diff --git a/arch/cris/arch-v32/drivers/mach-fs/gpio.c b/arch/cris/arch-v32/drivers/mach-fs/gpio.c deleted file mode 100644 index 72968fbf814b..000000000000 --- a/arch/cris/arch-v32/drivers/mach-fs/gpio.c +++ /dev/null @@ -1,978 +0,0 @@ -/* - * ETRAX CRISv32 general port I/O device - * - * Copyright (c) 1999-2006 Axis Communications AB - * - * Authors: Bjorn Wesen (initial version) - * Ola Knutsson (LED handling) - * Johan Adolfsson (read/set directions, write, port G, - * port to ETRAX FS. - * - */ - -#include <linux/module.h> -#include <linux/sched.h> -#include <linux/slab.h> -#include <linux/ioport.h> -#include <linux/errno.h> -#include <linux/kernel.h> -#include <linux/fs.h> -#include <linux/string.h> -#include <linux/poll.h> -#include <linux/init.h> -#include <linux/interrupt.h> -#include <linux/spinlock.h> -#include <linux/mutex.h> - -#include <asm/etraxgpio.h> -#include <hwregs/reg_map.h> -#include <hwregs/reg_rdwr.h> -#include <hwregs/gio_defs.h> -#include <hwregs/intr_vect_defs.h> -#include <asm/io.h> -#include <asm/irq.h> - -#ifdef CONFIG_ETRAX_VIRTUAL_GPIO -#include "../i2c.h" - -#define VIRT_I2C_ADDR 0x40 -#endif - -/* The following gio ports on ETRAX FS is available: - * pa 8 bits, supports interrupts off, hi, low, set, posedge, negedge anyedge - * pb 18 bits - * pc 18 bits - * pd 18 bits - * pe 18 bits - * each port has a rw_px_dout, r_px_din and rw_px_oe register. - */ - -#define GPIO_MAJOR 120 /* experimental MAJOR number */ - -#define D(x) - -#if 0 -static int dp_cnt; -#define DP(x) \ - do { \ - dp_cnt++; \ - if (dp_cnt % 1000 == 0) \ - x; \ - } while (0) -#else -#define DP(x) -#endif - -static DEFINE_MUTEX(gpio_mutex); -static char gpio_name[] = "etrax gpio"; - -#if 0 -static wait_queue_head_t *gpio_wq; -#endif - -#ifdef CONFIG_ETRAX_VIRTUAL_GPIO -static int virtual_gpio_ioctl(struct file *file, unsigned int cmd, - unsigned long arg); -#endif -static long gpio_ioctl(struct file *file, unsigned int cmd, unsigned long arg); -static ssize_t gpio_write(struct file *file, const char *buf, size_t count, - loff_t *off); -static int gpio_open(struct inode *inode, struct file *filp); -static int gpio_release(struct inode *inode, struct file *filp); -static unsigned int gpio_poll(struct file *filp, - struct poll_table_struct *wait); - -/* private data per open() of this driver */ - -struct gpio_private { - struct gpio_private *next; - /* The IO_CFG_WRITE_MODE_VALUE only support 8 bits: */ - unsigned char clk_mask; - unsigned char data_mask; - unsigned char write_msb; - unsigned char pad1; - /* These fields are generic */ - unsigned long highalarm, lowalarm; - wait_queue_head_t alarm_wq; - int minor; -}; - -/* linked list of alarms to check for */ - -static struct gpio_private *alarmlist; - -static int gpio_some_alarms; /* Set if someone uses alarm */ -static unsigned long gpio_pa_high_alarms; -static unsigned long gpio_pa_low_alarms; - -static DEFINE_SPINLOCK(alarm_lock); - -#define NUM_PORTS (GPIO_MINOR_LAST+1) -#define GIO_REG_RD_ADDR(reg) \ - (volatile unsigned long *)(regi_gio + REG_RD_ADDR_gio_##reg) -#define GIO_REG_WR_ADDR(reg) \ - (volatile unsigned long *)(regi_gio + REG_RD_ADDR_gio_##reg) -unsigned long led_dummy; -#ifdef CONFIG_ETRAX_VIRTUAL_GPIO -static unsigned long virtual_dummy; -static unsigned long virtual_rw_pv_oe = CONFIG_ETRAX_DEF_GIO_PV_OE; -static unsigned short cached_virtual_gpio_read; -#endif - -static volatile unsigned long *data_out[NUM_PORTS] = { - GIO_REG_WR_ADDR(rw_pa_dout), - GIO_REG_WR_ADDR(rw_pb_dout), - &led_dummy, - GIO_REG_WR_ADDR(rw_pc_dout), - GIO_REG_WR_ADDR(rw_pd_dout), - GIO_REG_WR_ADDR(rw_pe_dout), -#ifdef CONFIG_ETRAX_VIRTUAL_GPIO - &virtual_dummy, -#endif -}; - -static volatile unsigned long *data_in[NUM_PORTS] = { - GIO_REG_RD_ADDR(r_pa_din), - GIO_REG_RD_ADDR(r_pb_din), - &led_dummy, - GIO_REG_RD_ADDR(r_pc_din), - GIO_REG_RD_ADDR(r_pd_din), - GIO_REG_RD_ADDR(r_pe_din), -#ifdef CONFIG_ETRAX_VIRTUAL_GPIO - &virtual_dummy, -#endif -}; - -static unsigned long changeable_dir[NUM_PORTS] = { - CONFIG_ETRAX_PA_CHANGEABLE_DIR, - CONFIG_ETRAX_PB_CHANGEABLE_DIR, - 0, - CONFIG_ETRAX_PC_CHANGEABLE_DIR, - CONFIG_ETRAX_PD_CHANGEABLE_DIR, - CONFIG_ETRAX_PE_CHANGEABLE_DIR, -#ifdef CONFIG_ETRAX_VIRTUAL_GPIO - CONFIG_ETRAX_PV_CHANGEABLE_DIR, -#endif -}; - -static unsigned long changeable_bits[NUM_PORTS] = { - CONFIG_ETRAX_PA_CHANGEABLE_BITS, - CONFIG_ETRAX_PB_CHANGEABLE_BITS, - 0, - CONFIG_ETRAX_PC_CHANGEABLE_BITS, - CONFIG_ETRAX_PD_CHANGEABLE_BITS, - CONFIG_ETRAX_PE_CHANGEABLE_BITS, -#ifdef CONFIG_ETRAX_VIRTUAL_GPIO - CONFIG_ETRAX_PV_CHANGEABLE_BITS, -#endif -}; - -static volatile unsigned long *dir_oe[NUM_PORTS] = { - GIO_REG_WR_ADDR(rw_pa_oe), - GIO_REG_WR_ADDR(rw_pb_oe), - &led_dummy, - GIO_REG_WR_ADDR(rw_pc_oe), - GIO_REG_WR_ADDR(rw_pd_oe), - GIO_REG_WR_ADDR(rw_pe_oe), -#ifdef CONFIG_ETRAX_VIRTUAL_GPIO - &virtual_rw_pv_oe, -#endif -}; - - - -static unsigned int gpio_poll(struct file *file, struct poll_table_struct *wait) -{ - unsigned int mask = 0; - struct gpio_private *priv = file->private_data; - unsigned long data; - poll_wait(file, &priv->alarm_wq, wait); - if (priv->minor == GPIO_MINOR_A) { - reg_gio_rw_intr_cfg intr_cfg; - unsigned long tmp; - unsigned long flags; - - local_irq_save(flags); - data = REG_TYPE_CONV(unsigned long, reg_gio_r_pa_din, - REG_RD(gio, regi_gio, r_pa_din)); - /* PA has support for interrupt - * lets activate high for those low and with highalarm set - */ - intr_cfg = REG_RD(gio, regi_gio, rw_intr_cfg); - - tmp = ~data & priv->highalarm & 0xFF; - if (tmp & (1 << 0)) - intr_cfg.pa0 = regk_gio_hi; - if (tmp & (1 << 1)) - intr_cfg.pa1 = regk_gio_hi; - if (tmp & (1 << 2)) - intr_cfg.pa2 = regk_gio_hi; - if (tmp & (1 << 3)) - intr_cfg.pa3 = regk_gio_hi; - if (tmp & (1 << 4)) - intr_cfg.pa4 = regk_gio_hi; - if (tmp & (1 << 5)) - intr_cfg.pa5 = regk_gio_hi; - if (tmp & (1 << 6)) - intr_cfg.pa6 = regk_gio_hi; - if (tmp & (1 << 7)) - intr_cfg.pa7 = regk_gio_hi; - /* - * lets activate low for those high and with lowalarm set - */ - tmp = data & priv->lowalarm & 0xFF; - if (tmp & (1 << 0)) - intr_cfg.pa0 = regk_gio_lo; - if (tmp & (1 << 1)) - intr_cfg.pa1 = regk_gio_lo; - if (tmp & (1 << 2)) - intr_cfg.pa2 = regk_gio_lo; - if (tmp & (1 << 3)) - intr_cfg.pa3 = regk_gio_lo; - if (tmp & (1 << 4)) - intr_cfg.pa4 = regk_gio_lo; - if (tmp & (1 << 5)) - intr_cfg.pa5 = regk_gio_lo; - if (tmp & (1 << 6)) - intr_cfg.pa6 = regk_gio_lo; - if (tmp & (1 << 7)) - intr_cfg.pa7 = regk_gio_lo; - - REG_WR(gio, regi_gio, rw_intr_cfg, intr_cfg); - local_irq_restore(flags); - } else if (priv->minor <= GPIO_MINOR_E) - data = *data_in[priv->minor]; - else - return 0; - - if ((data & priv->highalarm) || (~data & priv->lowalarm)) - mask = POLLIN|POLLRDNORM; - - DP(printk(KERN_DEBUG "gpio_poll ready: mask 0x%08X\n", mask)); - return mask; -} - -int etrax_gpio_wake_up_check(void) -{ - struct gpio_private *priv; - unsigned long data = 0; - unsigned long flags; - int ret = 0; - spin_lock_irqsave(&alarm_lock, flags); - priv = alarmlist; - while (priv) { -#ifdef CONFIG_ETRAX_VIRTUAL_GPIO - if (priv->minor == GPIO_MINOR_V) - data = (unsigned long)cached_virtual_gpio_read; - else { - data = *data_in[priv->minor]; - if (priv->minor == GPIO_MINOR_A) - priv->lowalarm |= (1 << CONFIG_ETRAX_VIRTUAL_GPIO_INTERRUPT_PA_PIN); - } -#else - data = *data_in[priv->minor]; -#endif - if ((data & priv->highalarm) || - (~data & priv->lowalarm)) { - DP(printk(KERN_DEBUG - "etrax_gpio_wake_up_check %i\n", priv->minor)); - wake_up_interruptible(&priv->alarm_wq); - ret = 1; - } - priv = priv->next; - } - spin_unlock_irqrestore(&alarm_lock, flags); - return ret; -} - -static irqreturn_t -gpio_poll_timer_interrupt(int irq, void *dev_id) -{ - if (gpio_some_alarms) - return IRQ_RETVAL(etrax_gpio_wake_up_check()); - return IRQ_NONE; -} - -static irqreturn_t -gpio_pa_interrupt(int irq, void *dev_id) -{ - reg_gio_rw_intr_mask intr_mask; - reg_gio_r_masked_intr masked_intr; - reg_gio_rw_ack_intr ack_intr; - unsigned long tmp; - unsigned long tmp2; -#ifdef CONFIG_ETRAX_VIRTUAL_GPIO - unsigned char enable_gpiov_ack = 0; -#endif - - /* Find what PA interrupts are active */ - masked_intr = REG_RD(gio, regi_gio, r_masked_intr); - tmp = REG_TYPE_CONV(unsigned long, reg_gio_r_masked_intr, masked_intr); - - /* Find those that we have enabled */ - spin_lock(&alarm_lock); - tmp &= (gpio_pa_high_alarms | gpio_pa_low_alarms); - spin_unlock(&alarm_lock); - -#ifdef CONFIG_ETRAX_VIRTUAL_GPIO - /* Something changed on virtual GPIO. Interrupt is acked by - * reading the device. - */ - if (tmp & (1 << CONFIG_ETRAX_VIRTUAL_GPIO_INTERRUPT_PA_PIN)) { - i2c_read(VIRT_I2C_ADDR, (void *)&cached_virtual_gpio_read, - sizeof(cached_virtual_gpio_read)); - enable_gpiov_ack = 1; - } -#endif - - /* Ack them */ - ack_intr = REG_TYPE_CONV(reg_gio_rw_ack_intr, unsigned long, tmp); - REG_WR(gio, regi_gio, rw_ack_intr, ack_intr); - - /* Disable those interrupts.. */ - intr_mask = REG_RD(gio, regi_gio, rw_intr_mask); - tmp2 = REG_TYPE_CONV(unsigned long, reg_gio_rw_intr_mask, intr_mask); - tmp2 &= ~tmp; -#ifdef CONFIG_ETRAX_VIRTUAL_GPIO - /* Do not disable interrupt on virtual GPIO. Changes on virtual - * pins are only noticed by an interrupt. - */ - if (enable_gpiov_ack) - tmp2 |= (1 << CONFIG_ETRAX_VIRTUAL_GPIO_INTERRUPT_PA_PIN); -#endif - intr_mask = REG_TYPE_CONV(reg_gio_rw_intr_mask, unsigned long, tmp2); - REG_WR(gio, regi_gio, rw_intr_mask, intr_mask); - - if (gpio_some_alarms) - return IRQ_RETVAL(etrax_gpio_wake_up_check()); - return IRQ_NONE; -} - - -static ssize_t gpio_write(struct file *file, const char *buf, size_t count, - loff_t *off) -{ - struct gpio_private *priv = file->private_data; - unsigned char data, clk_mask, data_mask, write_msb; - unsigned long flags; - unsigned long shadow; - volatile unsigned long *port; - ssize_t retval = count; - /* Only bits 0-7 may be used for write operations but allow all - devices except leds... */ -#ifdef CONFIG_ETRAX_VIRTUAL_GPIO - if (priv->minor == GPIO_MINOR_V) - return -EFAULT; -#endif - if (priv->minor == GPIO_MINOR_LEDS) - return -EFAULT; - - if (!access_ok(VERIFY_READ, buf, count)) - return -EFAULT; - clk_mask = priv->clk_mask; - data_mask = priv->data_mask; - /* It must have been configured using the IO_CFG_WRITE_MODE */ - /* Perhaps a better error code? */ - if (clk_mask == 0 || data_mask == 0) - return -EPERM; - write_msb = priv->write_msb; - D(printk(KERN_DEBUG "gpio_write: %lu to data 0x%02X clk 0x%02X " - "msb: %i\n", count, data_mask, clk_mask, write_msb)); - port = data_out[priv->minor]; - - while (count--) { - int i; - data = *buf++; - if (priv->write_msb) { - for (i = 7; i >= 0; i--) { - local_irq_save(flags); - shadow = *port; - *port = shadow &= ~clk_mask; - if (data & 1<<i) - *port = shadow |= data_mask; - else - *port = shadow &= ~data_mask; - /* For FPGA: min 5.0ns (DCC) before CCLK high */ - *port = shadow |= clk_mask; - local_irq_restore(flags); - } - } else { - for (i = 0; i <= 7; i++) { - local_irq_save(flags); - shadow = *port; - *port = shadow &= ~clk_mask; - if (data & 1<<i) - *port = shadow |= data_mask; - else - *port = shadow &= ~data_mask; - /* For FPGA: min 5.0ns (DCC) before CCLK high */ - *port = shadow |= clk_mask; - local_irq_restore(flags); - } - } - } - return retval; -} - - - -static int -gpio_open(struct inode *inode, struct file *filp) -{ - struct gpio_private *priv; - int p = iminor(inode); - - if (p > GPIO_MINOR_LAST) - return -EINVAL; - - priv = kzalloc(sizeof(struct gpio_private), GFP_KERNEL); - if (!priv) - return -ENOMEM; - - mutex_lock(&gpio_mutex); - - priv->minor = p; - - /* initialize the io/alarm struct */ - - priv->clk_mask = 0; - priv->data_mask = 0; - priv->highalarm = 0; - priv->lowalarm = 0; - init_waitqueue_head(&priv->alarm_wq); - - filp->private_data = (void *)priv; - - /* link it into our alarmlist */ - spin_lock_irq(&alarm_lock); - priv->next = alarmlist; - alarmlist = priv; - spin_unlock_irq(&alarm_lock); - - mutex_unlock(&gpio_mutex); - return 0; -} - -static int -gpio_release(struct inode *inode, struct file *filp) -{ - struct gpio_private *p; - struct gpio_private *todel; - /* local copies while updating them: */ - unsigned long a_high, a_low; - unsigned long some_alarms; - - /* unlink from alarmlist and free the private structure */ - - spin_lock_irq(&alarm_lock); - p = alarmlist; - todel = filp->private_data; - - if (p == todel) { - alarmlist = todel->next; - } else { - while (p->next != todel) - p = p->next; - p->next = todel->next; - } - - kfree(todel); - /* Check if there are still any alarms set */ - p = alarmlist; - some_alarms = 0; - a_high = 0; - a_low = 0; - while (p) { - if (p->minor == GPIO_MINOR_A) { -#ifdef CONFIG_ETRAX_VIRTUAL_GPIO - p->lowalarm |= (1 << CONFIG_ETRAX_VIRTUAL_GPIO_INTERRUPT_PA_PIN); -#endif - a_high |= p->highalarm; - a_low |= p->lowalarm; - } - - if (p->highalarm | p->lowalarm) - some_alarms = 1; - p = p->next; - } - -#ifdef CONFIG_ETRAX_VIRTUAL_GPIO - /* Variables 'some_alarms' and 'a_low' needs to be set here again - * to ensure that interrupt for virtual GPIO is handled. - */ - some_alarms = 1; - a_low |= (1 << CONFIG_ETRAX_VIRTUAL_GPIO_INTERRUPT_PA_PIN); -#endif - - gpio_some_alarms = some_alarms; - gpio_pa_high_alarms = a_high; - gpio_pa_low_alarms = a_low; - spin_unlock_irq(&alarm_lock); - - return 0; -} - -/* Main device API. ioctl's to read/set/clear bits, as well as to - * set alarms to wait for using a subsequent select(). - */ - -inline unsigned long setget_input(struct gpio_private *priv, unsigned long arg) -{ - /* Set direction 0=unchanged 1=input, - * return mask with 1=input - */ - unsigned long flags; - unsigned long dir_shadow; - - local_irq_save(flags); - dir_shadow = *dir_oe[priv->minor]; - dir_shadow &= ~(arg & changeable_dir[priv->minor]); - *dir_oe[priv->minor] = dir_shadow; - local_irq_restore(flags); - - if (priv->minor == GPIO_MINOR_A) - dir_shadow ^= 0xFF; /* Only 8 bits */ -#ifdef CONFIG_ETRAX_VIRTUAL_GPIO - else if (priv->minor == GPIO_MINOR_V) - dir_shadow ^= 0xFFFF; /* Only 16 bits */ -#endif - else - dir_shadow ^= 0x3FFFF; /* Only 18 bits */ - return dir_shadow; - -} /* setget_input */ - -inline unsigned long setget_output(struct gpio_private *priv, unsigned long arg) -{ - unsigned long flags; - unsigned long dir_shadow; - - local_irq_save(flags); - dir_shadow = *dir_oe[priv->minor]; - dir_shadow |= (arg & changeable_dir[priv->minor]); - *dir_oe[priv->minor] = dir_shadow; - local_irq_restore(flags); - return dir_shadow; -} /* setget_output */ - -static int gpio_leds_ioctl(unsigned int cmd, unsigned long arg); - -static int -gpio_ioctl_unlocked(struct file *file, unsigned int cmd, unsigned long arg) -{ - unsigned long flags; - unsigned long val; - unsigned long shadow; - struct gpio_private *priv = file->private_data; - if (_IOC_TYPE(cmd) != ETRAXGPIO_IOCTYPE) - return -EINVAL; - -#ifdef CONFIG_ETRAX_VIRTUAL_GPIO - if (priv->minor == GPIO_MINOR_V) - return virtual_gpio_ioctl(file, cmd, arg); -#endif - - switch (_IOC_NR(cmd)) { - case IO_READBITS: /* Use IO_READ_INBITS and IO_READ_OUTBITS instead */ - /* Read the port. */ - return *data_in[priv->minor]; - break; - case IO_SETBITS: - local_irq_save(flags); - /* Set changeable bits with a 1 in arg. */ - shadow = *data_out[priv->minor]; - shadow |= (arg & changeable_bits[priv->minor]); - *data_out[priv->minor] = shadow; - local_irq_restore(flags); - break; - case IO_CLRBITS: - local_irq_save(flags); - /* Clear changeable bits with a 1 in arg. */ - shadow = *data_out[priv->minor]; - shadow &= ~(arg & changeable_bits[priv->minor]); - *data_out[priv->minor] = shadow; - local_irq_restore(flags); - break; - case IO_HIGHALARM: - /* Set alarm when bits with 1 in arg go high. */ - priv->highalarm |= arg; - spin_lock_irqsave(&alarm_lock, flags); - gpio_some_alarms = 1; - if (priv->minor == GPIO_MINOR_A) - gpio_pa_high_alarms |= arg; - spin_unlock_irqrestore(&alarm_lock, flags); - break; - case IO_LOWALARM: - /* Set alarm when bits with 1 in arg go low. */ - priv->lowalarm |= arg; - spin_lock_irqsave(&alarm_lock, flags); - gpio_some_alarms = 1; - if (priv->minor == GPIO_MINOR_A) - gpio_pa_low_alarms |= arg; - spin_unlock_irqrestore(&alarm_lock, flags); - break; - case IO_CLRALARM: - /* Clear alarm for bits with 1 in arg. */ - priv->highalarm &= ~arg; - priv->lowalarm &= ~arg; - spin_lock_irqsave(&alarm_lock, flags); - if (priv->minor == GPIO_MINOR_A) { - if (gpio_pa_high_alarms & arg || - gpio_pa_low_alarms & arg) - /* Must update the gpio_pa_*alarms masks */ - ; - } - spin_unlock_irqrestore(&alarm_lock, flags); - break; - case IO_READDIR: /* Use IO_SETGET_INPUT/OUTPUT instead! */ - /* Read direction 0=input 1=output */ - return *dir_oe[priv->minor]; - case IO_SETINPUT: /* Use IO_SETGET_INPUT instead! */ - /* Set direction 0=unchanged 1=input, - * return mask with 1=input - */ - return setget_input(priv, arg); - break; - case IO_SETOUTPUT: /* Use IO_SETGET_OUTPUT instead! */ - /* Set direction 0=unchanged 1=output, - * return mask with 1=output - */ - return setget_output(priv, arg); - - case IO_CFG_WRITE_MODE: - { - unsigned long dir_shadow; - dir_shadow = *dir_oe[priv->minor]; - - priv->clk_mask = arg & 0xFF; - priv->data_mask = (arg >> 8) & 0xFF; - priv->write_msb = (arg >> 16) & 0x01; - /* Check if we're allowed to change the bits and - * the direction is correct - */ - if (!((priv->clk_mask & changeable_bits[priv->minor]) && - (priv->data_mask & changeable_bits[priv->minor]) && - (priv->clk_mask & dir_shadow) && - (priv->data_mask & dir_shadow))) { - priv->clk_mask = 0; - priv->data_mask = 0; - return -EPERM; - } - break; - } - case IO_READ_INBITS: - /* *arg is result of reading the input pins */ - val = *data_in[priv->minor]; - if (copy_to_user((unsigned long *)arg, &val, sizeof(val))) - return -EFAULT; - return 0; - break; - case IO_READ_OUTBITS: - /* *arg is result of reading the output shadow */ - val = *data_out[priv->minor]; - if (copy_to_user((unsigned long *)arg, &val, sizeof(val))) - return -EFAULT; - break; - case IO_SETGET_INPUT: - /* bits set in *arg is set to input, - * *arg updated with current input pins. - */ - if (copy_from_user(&val, (unsigned long *)arg, sizeof(val))) - return -EFAULT; - val = setget_input(priv, val); - if (copy_to_user((unsigned long *)arg, &val, sizeof(val))) - return -EFAULT; - break; - case IO_SETGET_OUTPUT: - /* bits set in *arg is set to output, - * *arg updated with current output pins. - */ - if (copy_from_user(&val, (unsigned long *)arg, sizeof(val))) - return -EFAULT; - val = setget_output(priv, val); - if (copy_to_user((unsigned long *)arg, &val, sizeof(val))) - return -EFAULT; - break; - default: - if (priv->minor == GPIO_MINOR_LEDS) - return gpio_leds_ioctl(cmd, arg); - else - return -EINVAL; - } /* switch */ - - return 0; -} - -static long gpio_ioctl(struct file *file, unsigned int cmd, unsigned long arg) -{ - long ret; - - mutex_lock(&gpio_mutex); - ret = gpio_ioctl_unlocked(file, cmd, arg); - mutex_unlock(&gpio_mutex); - - return ret; -} - -#ifdef CONFIG_ETRAX_VIRTUAL_GPIO -static int -virtual_gpio_ioctl(struct file *file, unsigned int cmd, unsigned long arg) -{ - unsigned long flags; - unsigned short val; - unsigned short shadow; - struct gpio_private *priv = file->private_data; - - switch (_IOC_NR(cmd)) { - case IO_SETBITS: - local_irq_save(flags); - /* Set changeable bits with a 1 in arg. */ - i2c_read(VIRT_I2C_ADDR, (void *)&shadow, sizeof(shadow)); - shadow |= ~*dir_oe[priv->minor]; - shadow |= (arg & changeable_bits[priv->minor]); - i2c_write(VIRT_I2C_ADDR, (void *)&shadow, sizeof(shadow)); - local_irq_restore(flags); - break; - case IO_CLRBITS: - local_irq_save(flags); - /* Clear changeable bits with a 1 in arg. */ - i2c_read(VIRT_I2C_ADDR, (void *)&shadow, sizeof(shadow)); - shadow |= ~*dir_oe[priv->minor]; - shadow &= ~(arg & changeable_bits[priv->minor]); - i2c_write(VIRT_I2C_ADDR, (void *)&shadow, sizeof(shadow)); - local_irq_restore(flags); - break; - case IO_HIGHALARM: - /* Set alarm when bits with 1 in arg go high. */ - priv->highalarm |= arg; - spin_lock(&alarm_lock); - gpio_some_alarms = 1; - spin_unlock(&alarm_lock); - break; - case IO_LOWALARM: - /* Set alarm when bits with 1 in arg go low. */ - priv->lowalarm |= arg; - spin_lock(&alarm_lock); - gpio_some_alarms = 1; - spin_unlock(&alarm_lock); - break; - case IO_CLRALARM: - /* Clear alarm for bits with 1 in arg. */ - priv->highalarm &= ~arg; - priv->lowalarm &= ~arg; - spin_lock(&alarm_lock); - spin_unlock(&alarm_lock); - break; - case IO_CFG_WRITE_MODE: - { - unsigned long dir_shadow; - dir_shadow = *dir_oe[priv->minor]; - - priv->clk_mask = arg & 0xFF; - priv->data_mask = (arg >> 8) & 0xFF; - priv->write_msb = (arg >> 16) & 0x01; - /* Check if we're allowed to change the bits and - * the direction is correct - */ - if (!((priv->clk_mask & changeable_bits[priv->minor]) && - (priv->data_mask & changeable_bits[priv->minor]) && - (priv->clk_mask & dir_shadow) && - (priv->data_mask & dir_shadow))) { - priv->clk_mask = 0; - priv->data_mask = 0; - return -EPERM; - } - break; - } - case IO_READ_INBITS: - /* *arg is result of reading the input pins */ - val = cached_virtual_gpio_read; - val &= ~*dir_oe[priv->minor]; - if (copy_to_user((unsigned long *)arg, &val, sizeof(val))) - return -EFAULT; - return 0; - break; - case IO_READ_OUTBITS: - /* *arg is result of reading the output shadow */ - i2c_read(VIRT_I2C_ADDR, (void *)&val, sizeof(val)); - val &= *dir_oe[priv->minor]; - if (copy_to_user((unsigned long *)arg, &val, sizeof(val))) - return -EFAULT; - break; - case IO_SETGET_INPUT: - { - /* bits set in *arg is set to input, - * *arg updated with current input pins. - */ - unsigned short input_mask = ~*dir_oe[priv->minor]; - if (copy_from_user(&val, (unsigned long *)arg, sizeof(val))) - return -EFAULT; - val = setget_input(priv, val); - if (copy_to_user((unsigned long *)arg, &val, sizeof(val))) - return -EFAULT; - if ((input_mask & val) != input_mask) { - /* Input pins changed. All ports desired as input - * should be set to logic 1. - */ - unsigned short change = input_mask ^ val; - i2c_read(VIRT_I2C_ADDR, (void *)&shadow, - sizeof(shadow)); - shadow &= ~change; - shadow |= val; - i2c_write(VIRT_I2C_ADDR, (void *)&shadow, - sizeof(shadow)); - } - break; - } - case IO_SETGET_OUTPUT: - /* bits set in *arg is set to output, - * *arg updated with current output pins. - */ - if (copy_from_user(&val, (unsigned long *)arg, sizeof(val))) - return -EFAULT; - val = setget_output(priv, val); - if (copy_to_user((unsigned long *)arg, &val, sizeof(val))) - return -EFAULT; - break; - default: - return -EINVAL; - } /* switch */ - return 0; -} -#endif /* CONFIG_ETRAX_VIRTUAL_GPIO */ - -static int -gpio_leds_ioctl(unsigned int cmd, unsigned long arg) -{ - unsigned char green; - unsigned char red; - - switch (_IOC_NR(cmd)) { - case IO_LEDACTIVE_SET: - green = ((unsigned char) arg) & 1; - red = (((unsigned char) arg) >> 1) & 1; - CRIS_LED_ACTIVE_SET_G(green); - CRIS_LED_ACTIVE_SET_R(red); - break; - - default: - return -EINVAL; - } /* switch */ - - return 0; -} - -static const struct file_operations gpio_fops = { - .owner = THIS_MODULE, - .poll = gpio_poll, - .unlocked_ioctl = gpio_ioctl, - .write = gpio_write, - .open = gpio_open, - .release = gpio_release, - .llseek = noop_llseek, -}; - -#ifdef CONFIG_ETRAX_VIRTUAL_GPIO -static void -virtual_gpio_init(void) -{ - reg_gio_rw_intr_cfg intr_cfg; - reg_gio_rw_intr_mask intr_mask; - unsigned short shadow; - - shadow = ~virtual_rw_pv_oe; /* Input ports should be set to logic 1 */ - shadow |= CONFIG_ETRAX_DEF_GIO_PV_OUT; - i2c_write(VIRT_I2C_ADDR, (void *)&shadow, sizeof(shadow)); - - /* Set interrupt mask and on what state the interrupt shall trigger. - * For virtual gpio the interrupt shall trigger on logic '0'. - */ - intr_cfg = REG_RD(gio, regi_gio, rw_intr_cfg); - intr_mask = REG_RD(gio, regi_gio, rw_intr_mask); - - switch (CONFIG_ETRAX_VIRTUAL_GPIO_INTERRUPT_PA_PIN) { - case 0: - intr_cfg.pa0 = regk_gio_lo; - intr_mask.pa0 = regk_gio_yes; - break; - case 1: - intr_cfg.pa1 = regk_gio_lo; - intr_mask.pa1 = regk_gio_yes; - break; - case 2: - intr_cfg.pa2 = regk_gio_lo; - intr_mask.pa2 = regk_gio_yes; - break; - case 3: - intr_cfg.pa3 = regk_gio_lo; - intr_mask.pa3 = regk_gio_yes; - break; - case 4: - intr_cfg.pa4 = regk_gio_lo; - intr_mask.pa4 = regk_gio_yes; - break; - case 5: - intr_cfg.pa5 = regk_gio_lo; - intr_mask.pa5 = regk_gio_yes; - break; - case 6: - intr_cfg.pa6 = regk_gio_lo; - intr_mask.pa6 = regk_gio_yes; - break; - case 7: - intr_cfg.pa7 = regk_gio_lo; - intr_mask.pa7 = regk_gio_yes; - break; - } - - REG_WR(gio, regi_gio, rw_intr_cfg, intr_cfg); - REG_WR(gio, regi_gio, rw_intr_mask, intr_mask); - - gpio_pa_low_alarms |= (1 << CONFIG_ETRAX_VIRTUAL_GPIO_INTERRUPT_PA_PIN); - gpio_some_alarms = 1; -} -#endif - -/* main driver initialization routine, called from mem.c */ - -static __init int -gpio_init(void) -{ - int res; - - /* do the formalities */ - - res = register_chrdev(GPIO_MAJOR, gpio_name, &gpio_fops); - if (res < 0) { - printk(KERN_ERR "gpio: couldn't get a major number.\n"); - return res; - } - - /* Clear all leds */ - CRIS_LED_NETWORK_GRP0_SET(0); - CRIS_LED_NETWORK_GRP1_SET(0); - CRIS_LED_ACTIVE_SET(0); - CRIS_LED_DISK_READ(0); - CRIS_LED_DISK_WRITE(0); - - printk(KERN_INFO "ETRAX FS GPIO driver v2.5, (c) 2003-2007 " - "Axis Communications AB\n"); - /* We call etrax_gpio_wake_up_check() from timer interrupt */ - if (request_irq(TIMER0_INTR_VECT, gpio_poll_timer_interrupt, - IRQF_SHARED, "gpio poll", &alarmlist)) - printk(KERN_ERR "timer0 irq for gpio\n"); - - if (request_irq(GIO_INTR_VECT, gpio_pa_interrupt, - IRQF_SHARED, "gpio PA", &alarmlist)) - printk(KERN_ERR "PA irq for gpio\n"); - -#ifdef CONFIG_ETRAX_VIRTUAL_GPIO - virtual_gpio_init(); -#endif - - return res; -} - -/* this makes sure that gpio_init is called during kernel boot */ - -module_init(gpio_init); diff --git a/arch/cris/arch-v32/kernel/crisksyms.c b/arch/cris/arch-v32/kernel/crisksyms.c index bde8d1a10cad..b0566350a840 100644 --- a/arch/cris/arch-v32/kernel/crisksyms.c +++ b/arch/cris/arch-v32/kernel/crisksyms.c @@ -3,7 +3,6 @@ #include <arch/dma.h> #include <arch/intmem.h> #include <mach/pinmux.h> -#include <arch/io.h> /* Functions for allocating DMA channels */ EXPORT_SYMBOL(crisv32_request_dma); @@ -20,8 +19,6 @@ EXPORT_SYMBOL(crisv32_pinmux_alloc); EXPORT_SYMBOL(crisv32_pinmux_alloc_fixed); EXPORT_SYMBOL(crisv32_pinmux_dealloc); EXPORT_SYMBOL(crisv32_pinmux_dealloc_fixed); -EXPORT_SYMBOL(crisv32_io_get_name); -EXPORT_SYMBOL(crisv32_io_get); /* Functions masking/unmasking interrupts */ EXPORT_SYMBOL(crisv32_mask_irq); diff --git a/arch/cris/arch-v32/kernel/debugport.c b/arch/cris/arch-v32/kernel/debugport.c index 02e33ebe51ec..d2f3f9c37102 100644 --- a/arch/cris/arch-v32/kernel/debugport.c +++ b/arch/cris/arch-v32/kernel/debugport.c @@ -77,8 +77,6 @@ static struct dbg_port *port = &ports[2]; #elif defined(CONFIG_ETRAX_DEBUG_PORT3) &ports[3]; -#elif defined(CONFIG_ETRAX_DEBUG_PORT4) - &ports[4]; #else NULL; #endif diff --git a/arch/cris/arch-v32/kernel/head.S b/arch/cris/arch-v32/kernel/head.S index 74a66e0e3777..ea6366800df7 100644 --- a/arch/cris/arch-v32/kernel/head.S +++ b/arch/cris/arch-v32/kernel/head.S @@ -292,11 +292,7 @@ _no_romfs_in_flash: ;; For cramfs, partition starts with magic and length. ;; For jffs2, a jhead is prepended which contains with magic and length. ;; The jhead is not part of the jffs2 partition however. -#ifndef CONFIG_ETRAXFS_SIM move.d __bss_start, $r0 -#else - move.d __end, $r0 -#endif move.d [$r0], $r1 cmp.d CRAMFS_MAGIC, $r1 ; cramfs magic? beq 2f ; yes, jump diff --git a/arch/cris/arch-v32/kernel/irq.c b/arch/cris/arch-v32/kernel/irq.c index 6a881e0e92b4..6de8db67cb09 100644 --- a/arch/cris/arch-v32/kernel/irq.c +++ b/arch/cris/arch-v32/kernel/irq.c @@ -37,7 +37,7 @@ #define IGNOREMASK (1 << (SER0_INTR_VECT - FIRST_IRQ)) #elif defined(CONFIG_ETRAX_KGDB_PORT1) #define IGNOREMASK (1 << (SER1_INTR_VECT - FIRST_IRQ)) -#elif defined(CONFIG_ETRAX_KGB_PORT2) +#elif defined(CONFIG_ETRAX_KGDB_PORT2) #define IGNOREMASK (1 << (SER2_INTR_VECT - FIRST_IRQ)) #elif defined(CONFIG_ETRAX_KGDB_PORT3) #define IGNOREMASK (1 << (SER3_INTR_VECT - FIRST_IRQ)) @@ -464,14 +464,14 @@ init_IRQ(void) etrax_irv->v[i] = weird_irq; np = of_find_compatible_node(NULL, NULL, "axis,crisv32-intc"); - domain = irq_domain_add_legacy(np, NR_IRQS - FIRST_IRQ, + domain = irq_domain_add_legacy(np, NBR_INTR_VECT - FIRST_IRQ, FIRST_IRQ, FIRST_IRQ, &crisv32_irq_ops, NULL); BUG_ON(!domain); irq_set_default_host(domain); of_node_put(np); - for (i = FIRST_IRQ, j = 0; j < NR_IRQS; i++, j++) { + for (i = FIRST_IRQ, j = 0; j < NBR_INTR_VECT; i++, j++) { set_exception_vector(i, interrupt[j]); } diff --git a/arch/cris/arch-v32/kernel/kgdb.c b/arch/cris/arch-v32/kernel/kgdb.c index b06813aeb120..e0fdea706eca 100644 --- a/arch/cris/arch-v32/kernel/kgdb.c +++ b/arch/cris/arch-v32/kernel/kgdb.c @@ -384,19 +384,11 @@ int getDebugChar(void); /* Serial port, writes one character. ETRAX 100 specific. from debugport.c */ void putDebugChar(int val); -/* Returns the integer equivalent of a hexadecimal character. */ -static int hex(char ch); - /* Convert the memory, pointed to by mem into hexadecimal representation. Put the result in buf, and return a pointer to the last character in buf (null). */ static char *mem2hex(char *buf, unsigned char *mem, int count); -/* Convert the array, in hexadecimal representation, pointed to by buf into - binary representation. Put the result in mem, and return a pointer to - the character after the last byte written. */ -static unsigned char *hex2mem(unsigned char *mem, char *buf, int count); - /* Put the content of the array, in binary representation, pointed to by buf into memory pointed to by mem, and return a pointer to the character after the last byte written. */ @@ -449,7 +441,7 @@ static char output_buffer[BUFMAX]; /* Error and warning messages. */ enum error_type { - SUCCESS, E01, E02, E03, E04, E05, E06, + SUCCESS, E01, E02, E03, E04, E05, E06, E07, E08 }; static char *error_message[] = @@ -461,6 +453,8 @@ static char *error_message[] = "E04 The command is not supported - [s,C,S,!,R,d,r] - internal error.", "E05 Change register content - P - the register is not implemented..", "E06 Change memory content - M - internal error.", + "E07 Change register content - P - the register is not stored on the stack", + "E08 Invalid parameter" }; /********************************** Breakpoint *******************************/ @@ -539,7 +533,7 @@ gdb_cris_strtol(const char *s, char **endptr, int base) /********************************* Register image ****************************/ /* Write a value to a specified register in the register image of the current - thread. Returns status code SUCCESS, E02 or E05. */ + thread. Returns status code SUCCESS, E02, E05 or E08. */ static int write_register(int regno, char *val) { @@ -547,8 +541,9 @@ write_register(int regno, char *val) if (regno >= R0 && regno <= ACR) { /* Consecutive 32-bit registers. */ - hex2mem((unsigned char *)®.r0 + (regno - R0) * sizeof(unsigned int), - val, sizeof(unsigned int)); + if (hex2bin((unsigned char *)®.r0 + (regno - R0) * sizeof(unsigned int), + val, sizeof(unsigned int))) + status = E08; } else if (regno == BZ || regno == VR || regno == WZ || regno == DZ) { /* Read-only registers. */ @@ -557,16 +552,19 @@ write_register(int regno, char *val) } else if (regno == PID) { /* 32-bit register. (Even though we already checked SRS and WZ, we cannot combine this with the EXS - SPC write since SRS and WZ have different size.) */ - hex2mem((unsigned char *)®.pid, val, sizeof(unsigned int)); + if (hex2bin((unsigned char *)®.pid, val, sizeof(unsigned int))) + status = E08; } else if (regno == SRS) { /* 8-bit register. */ - hex2mem((unsigned char *)®.srs, val, sizeof(unsigned char)); + if (hex2bin((unsigned char *)®.srs, val, sizeof(unsigned char))) + status = E08; } else if (regno >= EXS && regno <= SPC) { /* Consecutive 32-bit registers. */ - hex2mem((unsigned char *)®.exs + (regno - EXS) * sizeof(unsigned int), - val, sizeof(unsigned int)); + if (hex2bin((unsigned char *)®.exs + (regno - EXS) * sizeof(unsigned int), + val, sizeof(unsigned int))) + status = E08; } else if (regno == PC) { /* Pseudo-register. Treat as read-only. */ @@ -574,7 +572,9 @@ write_register(int regno, char *val) } else if (regno >= S0 && regno <= S15) { /* 32-bit registers. */ - hex2mem((unsigned char *)&sreg.s0_0 + (reg.srs * 16 * sizeof(unsigned int)) + (regno - S0) * sizeof(unsigned int), val, sizeof(unsigned int)); + if (hex2bin((unsigned char *)&sreg.s0_0 + (reg.srs * 16 * sizeof(unsigned int)) + (regno - S0) * sizeof(unsigned int), + val, sizeof(unsigned int))) + status = E08; } else { /* Non-existing register. */ status = E05; @@ -630,19 +630,6 @@ read_register(char regno, unsigned int *valptr) } /********************************** Packet I/O ******************************/ -/* Returns the integer equivalent of a hexadecimal character. */ -static int -hex(char ch) -{ - if ((ch >= 'a') && (ch <= 'f')) - return (ch - 'a' + 10); - if ((ch >= '0') && (ch <= '9')) - return (ch - '0'); - if ((ch >= 'A') && (ch <= 'F')) - return (ch - 'A' + 10); - return -1; -} - /* Convert the memory, pointed to by mem into hexadecimal representation. Put the result in buf, and return a pointer to the last character in buf (null). */ @@ -689,22 +676,6 @@ mem2hex_nbo(char *buf, unsigned char *mem, int count) return buf; } -/* Convert the array, in hexadecimal representation, pointed to by buf into - binary representation. Put the result in mem, and return a pointer to - the character after the last byte written. */ -static unsigned char* -hex2mem(unsigned char *mem, char *buf, int count) -{ - int i; - unsigned char ch; - for (i = 0; i < count; i++) { - ch = hex (*buf++) << 4; - ch = ch + hex (*buf++); - *mem++ = ch; - } - return mem; -} - /* Put the content of the array, in binary representation, pointed to by buf into memory pointed to by mem, and return a pointer to the character after the last byte written. @@ -763,8 +734,8 @@ getpacket(char *buffer) buffer[count] = 0; if (ch == '#') { - xmitcsum = hex(getDebugChar()) << 4; - xmitcsum += hex(getDebugChar()); + xmitcsum = hex_to_bin(getDebugChar()) << 4; + xmitcsum += hex_to_bin(getDebugChar()); if (checksum != xmitcsum) { /* Wrong checksum */ putDebugChar('-'); @@ -1304,14 +1275,17 @@ handle_exception(int sigval) /* Write registers. GXX..XX Each byte of register data is described by two hex digits. Success: OK - Failure: void. */ + Failure: E08. */ /* General and special registers. */ - hex2mem((char *)®, &input_buffer[1], sizeof(registers)); + if (hex2bin((char *)®, &input_buffer[1], sizeof(registers))) + gdb_cris_strcpy(output_buffer, error_message[E08]); /* Support registers. */ - hex2mem((char *)&sreg + (reg.srs * 16 * sizeof(unsigned int)), + else if (hex2bin((char *)&sreg + (reg.srs * 16 * sizeof(unsigned int)), &input_buffer[1] + sizeof(registers), - 16 * sizeof(unsigned int)); - gdb_cris_strcpy(output_buffer, "OK"); + 16 * sizeof(unsigned int))) + gdb_cris_strcpy(output_buffer, error_message[E08]); + else + gdb_cris_strcpy(output_buffer, "OK"); break; case 'P': @@ -1338,6 +1312,10 @@ handle_exception(int sigval) /* Do not support non-existing registers. */ gdb_cris_strcpy(output_buffer, error_message[E05]); break; + case E08: + /* Invalid parameter. */ + gdb_cris_strcpy(output_buffer, error_message[E08]); + break; default: /* Valid register number. */ gdb_cris_strcpy(output_buffer, "OK"); @@ -1380,7 +1358,7 @@ handle_exception(int sigval) AA..AA is the start address, LLLL is the number of bytes, and XX..XX is the hexadecimal data. Success: OK - Failure: void. */ + Failure: E08. */ { char *lenptr; char *dataptr; @@ -1389,13 +1367,15 @@ handle_exception(int sigval) int len = gdb_cris_strtol(lenptr+1, &dataptr, 16); if (*lenptr == ',' && *dataptr == ':') { if (input_buffer[0] == 'M') { - hex2mem(addr, dataptr + 1, len); + if (hex2bin(addr, dataptr + 1, len)) + gdb_cris_strcpy(output_buffer, error_message[E08]); + else + gdb_cris_strcpy(output_buffer, "OK"); } else /* X */ { bin2mem(addr, dataptr + 1, len); + gdb_cris_strcpy(output_buffer, "OK"); } - gdb_cris_strcpy(output_buffer, "OK"); - } - else { + } else { gdb_cris_strcpy(output_buffer, error_message[E06]); } } diff --git a/arch/cris/arch-v32/kernel/setup.c b/arch/cris/arch-v32/kernel/setup.c index cd1865d68b2e..fe50287aa928 100644 --- a/arch/cris/arch-v32/kernel/setup.c +++ b/arch/cris/arch-v32/kernel/setup.c @@ -129,10 +129,6 @@ static struct i2c_board_info __initdata i2c_info[] = { #ifdef CONFIG_RTC_DRV_PCF8563 {I2C_BOARD_INFO("pcf8563", 0x51)}, #endif -#ifdef CONFIG_ETRAX_VIRTUAL_GPIO - {I2C_BOARD_INFO("vgpio", 0x20)}, - {I2C_BOARD_INFO("vgpio", 0x21)}, -#endif {I2C_BOARD_INFO("pca9536", 0x41)}, {I2C_BOARD_INFO("fnp300", 0x40)}, {I2C_BOARD_INFO("fnp300", 0x42)}, @@ -146,10 +142,6 @@ static struct i2c_board_info __initdata i2c_info2[] = { {I2C_BOARD_INFO("tmp100", 0x4C)}, {I2C_BOARD_INFO("tmp100", 0x4D)}, {I2C_BOARD_INFO("tmp100", 0x4E)}, -#ifdef CONFIG_ETRAX_VIRTUAL_GPIO - {I2C_BOARD_INFO("vgpio", 0x20)}, - {I2C_BOARD_INFO("vgpio", 0x21)}, -#endif {I2C_BOARD_INFO("pca9536", 0x41)}, {I2C_BOARD_INFO("fnp300", 0x40)}, {I2C_BOARD_INFO("fnp300", 0x42)}, diff --git a/arch/cris/arch-v32/mach-a3/Makefile b/arch/cris/arch-v32/mach-a3/Makefile index 18a227196a41..0cc6eebacbed 100644 --- a/arch/cris/arch-v32/mach-a3/Makefile +++ b/arch/cris/arch-v32/mach-a3/Makefile @@ -2,7 +2,7 @@ # Makefile for the linux kernel. # -obj-y := dma.o pinmux.o io.o arbiter.o +obj-y := dma.o pinmux.o arbiter.o clean: diff --git a/arch/cris/arch-v32/mach-a3/io.c b/arch/cris/arch-v32/mach-a3/io.c deleted file mode 100644 index 090ceb99ef0b..000000000000 --- a/arch/cris/arch-v32/mach-a3/io.c +++ /dev/null @@ -1,149 +0,0 @@ -/* - * Helper functions for I/O pins. - * - * Copyright (c) 2005-2007 Axis Communications AB. - */ - -#include <linux/types.h> -#include <linux/errno.h> -#include <linux/init.h> -#include <linux/string.h> -#include <linux/ctype.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <asm/io.h> -#include <mach/pinmux.h> -#include <hwregs/gio_defs.h> - -struct crisv32_ioport crisv32_ioports[] = { - { - (unsigned long *)REG_ADDR(gio, regi_gio, rw_pa_oe), - (unsigned long *)REG_ADDR(gio, regi_gio, rw_pa_dout), - (unsigned long *)REG_ADDR(gio, regi_gio, r_pa_din), - 32 - }, - { - (unsigned long *)REG_ADDR(gio, regi_gio, rw_pb_oe), - (unsigned long *)REG_ADDR(gio, regi_gio, rw_pb_dout), - (unsigned long *)REG_ADDR(gio, regi_gio, r_pb_din), - 32 - }, - { - (unsigned long *)REG_ADDR(gio, regi_gio, rw_pc_oe), - (unsigned long *)REG_ADDR(gio, regi_gio, rw_pc_dout), - (unsigned long *)REG_ADDR(gio, regi_gio, r_pc_din), - 16 - }, -}; - -#define NBR_OF_PORTS ARRAY_SIZE(crisv32_ioports) - -struct crisv32_iopin crisv32_led_net0_green; -struct crisv32_iopin crisv32_led_net0_red; -struct crisv32_iopin crisv32_led2_green; -struct crisv32_iopin crisv32_led2_red; -struct crisv32_iopin crisv32_led3_green; -struct crisv32_iopin crisv32_led3_red; - -/* Dummy port used when green LED and red LED is on the same bit */ -static unsigned long io_dummy; -static struct crisv32_ioport dummy_port = { - &io_dummy, - &io_dummy, - &io_dummy, - 32 -}; -static struct crisv32_iopin dummy_led = { - &dummy_port, - 0 -}; - -static int __init crisv32_io_init(void) -{ - int ret = 0; - - u32 i; - - /* Locks *should* be dynamically initialized. */ - for (i = 0; i < ARRAY_SIZE(crisv32_ioports); i++) - spin_lock_init(&crisv32_ioports[i].lock); - spin_lock_init(&dummy_port.lock); - - /* Initialize LEDs */ -#if (defined(CONFIG_ETRAX_NBR_LED_GRP_ONE) || defined(CONFIG_ETRAX_NBR_LED_GRP_TWO)) - ret += crisv32_io_get_name(&crisv32_led_net0_green, - CONFIG_ETRAX_LED_G_NET0); - crisv32_io_set_dir(&crisv32_led_net0_green, crisv32_io_dir_out); - if (strcmp(CONFIG_ETRAX_LED_G_NET0, CONFIG_ETRAX_LED_R_NET0)) { - ret += crisv32_io_get_name(&crisv32_led_net0_red, - CONFIG_ETRAX_LED_R_NET0); - crisv32_io_set_dir(&crisv32_led_net0_red, crisv32_io_dir_out); - } else - crisv32_led_net0_red = dummy_led; -#endif - - ret += crisv32_io_get_name(&crisv32_led2_green, CONFIG_ETRAX_V32_LED2G); - ret += crisv32_io_get_name(&crisv32_led2_red, CONFIG_ETRAX_V32_LED2R); - ret += crisv32_io_get_name(&crisv32_led3_green, CONFIG_ETRAX_V32_LED3G); - ret += crisv32_io_get_name(&crisv32_led3_red, CONFIG_ETRAX_V32_LED3R); - - crisv32_io_set_dir(&crisv32_led2_green, crisv32_io_dir_out); - crisv32_io_set_dir(&crisv32_led2_red, crisv32_io_dir_out); - crisv32_io_set_dir(&crisv32_led3_green, crisv32_io_dir_out); - crisv32_io_set_dir(&crisv32_led3_red, crisv32_io_dir_out); - - return ret; -} - -__initcall(crisv32_io_init); - -int crisv32_io_get(struct crisv32_iopin *iopin, - unsigned int port, unsigned int pin) -{ - if (port > NBR_OF_PORTS) - return -EINVAL; - if (port > crisv32_ioports[port].pin_count) - return -EINVAL; - - iopin->bit = 1 << pin; - iopin->port = &crisv32_ioports[port]; - - if (crisv32_pinmux_alloc(port, pin, pin, pinmux_gpio)) - return -EIO; - - return 0; -} - -int crisv32_io_get_name(struct crisv32_iopin *iopin, const char *name) -{ - int port; - int pin; - - if (toupper(*name) == 'P') - name++; - - if (toupper(*name) < 'A' || toupper(*name) > 'E') - return -EINVAL; - - port = toupper(*name) - 'A'; - name++; - pin = simple_strtoul(name, NULL, 10); - - if (pin < 0 || pin > crisv32_ioports[port].pin_count) - return -EINVAL; - - iopin->bit = 1 << pin; - iopin->port = &crisv32_ioports[port]; - - if (crisv32_pinmux_alloc(port, pin, pin, pinmux_gpio)) - return -EIO; - - return 0; -} - -#ifdef CONFIG_PCI -/* PCI I/O access stuff */ -struct cris_io_operations *cris_iops = NULL; -EXPORT_SYMBOL(cris_iops); -#endif - diff --git a/arch/cris/arch-v32/mach-fs/Kconfig b/arch/cris/arch-v32/mach-fs/Kconfig index 774de82abef6..7d1ab972bc0f 100644 --- a/arch/cris/arch-v32/mach-fs/Kconfig +++ b/arch/cris/arch-v32/mach-fs/Kconfig @@ -192,25 +192,6 @@ config ETRAX_DEF_GIO_PE_OUT Configures the initial data for the general port E bits. Most products should use 00000 here. -config ETRAX_DEF_GIO_PV_OE - hex "GIO_PV_OE" - depends on ETRAX_VIRTUAL_GPIO - default "0000" - help - Configures the direction of virtual general port V bits. 1 is out, - 0 is in. This is often totally different depending on the product - used. These bits are used for all kinds of stuff. If you don't know - what to use, it is always safe to put all as inputs, although - floating inputs isn't good. - -config ETRAX_DEF_GIO_PV_OUT - hex "GIO_PV_OUT" - depends on ETRAX_VIRTUAL_GPIO - default "0000" - help - Configures the initial data for the virtual general port V bits. - Most products should use 0000 here. - endmenu endif diff --git a/arch/cris/arch-v32/mach-fs/Makefile b/arch/cris/arch-v32/mach-fs/Makefile index 18a227196a41..0cc6eebacbed 100644 --- a/arch/cris/arch-v32/mach-fs/Makefile +++ b/arch/cris/arch-v32/mach-fs/Makefile @@ -2,7 +2,7 @@ # Makefile for the linux kernel. # -obj-y := dma.o pinmux.o io.o arbiter.o +obj-y := dma.o pinmux.o arbiter.o clean: diff --git a/arch/cris/arch-v32/mach-fs/io.c b/arch/cris/arch-v32/mach-fs/io.c deleted file mode 100644 index a6958661fa8e..000000000000 --- a/arch/cris/arch-v32/mach-fs/io.c +++ /dev/null @@ -1,191 +0,0 @@ -/* - * Helper functions for I/O pins. - * - * Copyright (c) 2004-2007 Axis Communications AB. - */ - -#include <linux/types.h> -#include <linux/errno.h> -#include <linux/init.h> -#include <linux/string.h> -#include <linux/ctype.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <asm/io.h> -#include <mach/pinmux.h> -#include <hwregs/gio_defs.h> - -#ifndef DEBUG -#define DEBUG(x) -#endif - -struct crisv32_ioport crisv32_ioports[] = { - { - (unsigned long *)REG_ADDR(gio, regi_gio, rw_pa_oe), - (unsigned long *)REG_ADDR(gio, regi_gio, rw_pa_dout), - (unsigned long *)REG_ADDR(gio, regi_gio, r_pa_din), - 8 - }, - { - (unsigned long *)REG_ADDR(gio, regi_gio, rw_pb_oe), - (unsigned long *)REG_ADDR(gio, regi_gio, rw_pb_dout), - (unsigned long *)REG_ADDR(gio, regi_gio, r_pb_din), - 18 - }, - { - (unsigned long *)REG_ADDR(gio, regi_gio, rw_pc_oe), - (unsigned long *)REG_ADDR(gio, regi_gio, rw_pc_dout), - (unsigned long *)REG_ADDR(gio, regi_gio, r_pc_din), - 18 - }, - { - (unsigned long *)REG_ADDR(gio, regi_gio, rw_pd_oe), - (unsigned long *)REG_ADDR(gio, regi_gio, rw_pd_dout), - (unsigned long *)REG_ADDR(gio, regi_gio, r_pd_din), - 18 - }, - { - (unsigned long *)REG_ADDR(gio, regi_gio, rw_pe_oe), - (unsigned long *)REG_ADDR(gio, regi_gio, rw_pe_dout), - (unsigned long *)REG_ADDR(gio, regi_gio, r_pe_din), - 18 - } -}; - -#define NBR_OF_PORTS ARRAY_SIZE(crisv32_ioports) - -struct crisv32_iopin crisv32_led_net0_green; -struct crisv32_iopin crisv32_led_net0_red; -struct crisv32_iopin crisv32_led_net1_green; -struct crisv32_iopin crisv32_led_net1_red; -struct crisv32_iopin crisv32_led2_green; -struct crisv32_iopin crisv32_led2_red; -struct crisv32_iopin crisv32_led3_green; -struct crisv32_iopin crisv32_led3_red; - -/* Dummy port used when green LED and red LED is on the same bit */ -static unsigned long io_dummy; -static struct crisv32_ioport dummy_port = { - &io_dummy, - &io_dummy, - &io_dummy, - 18 -}; -static struct crisv32_iopin dummy_led = { - &dummy_port, - 0 -}; - -static int __init crisv32_io_init(void) -{ - int ret = 0; - - u32 i; - - /* Locks *should* be dynamically initialized. */ - for (i = 0; i < ARRAY_SIZE(crisv32_ioports); i++) - spin_lock_init(&crisv32_ioports[i].lock); - spin_lock_init(&dummy_port.lock); - - /* Initialize LEDs */ -#if (defined(CONFIG_ETRAX_NBR_LED_GRP_ONE) || defined(CONFIG_ETRAX_NBR_LED_GRP_TWO)) - ret += - crisv32_io_get_name(&crisv32_led_net0_green, - CONFIG_ETRAX_LED_G_NET0); - crisv32_io_set_dir(&crisv32_led_net0_green, crisv32_io_dir_out); - if (strcmp(CONFIG_ETRAX_LED_G_NET0, CONFIG_ETRAX_LED_R_NET0)) { - ret += - crisv32_io_get_name(&crisv32_led_net0_red, - CONFIG_ETRAX_LED_R_NET0); - crisv32_io_set_dir(&crisv32_led_net0_red, crisv32_io_dir_out); - } else - crisv32_led_net0_red = dummy_led; -#endif - -#ifdef CONFIG_ETRAX_NBR_LED_GRP_TWO - ret += - crisv32_io_get_name(&crisv32_led_net1_green, - CONFIG_ETRAX_LED_G_NET1); - crisv32_io_set_dir(&crisv32_led_net1_green, crisv32_io_dir_out); - if (strcmp(CONFIG_ETRAX_LED_G_NET1, CONFIG_ETRAX_LED_R_NET1)) { - crisv32_io_get_name(&crisv32_led_net1_red, - CONFIG_ETRAX_LED_R_NET1); - crisv32_io_set_dir(&crisv32_led_net1_red, crisv32_io_dir_out); - } else - crisv32_led_net1_red = dummy_led; -#endif - - ret += crisv32_io_get_name(&crisv32_led2_green, CONFIG_ETRAX_V32_LED2G); - ret += crisv32_io_get_name(&crisv32_led2_red, CONFIG_ETRAX_V32_LED2R); - ret += crisv32_io_get_name(&crisv32_led3_green, CONFIG_ETRAX_V32_LED3G); - ret += crisv32_io_get_name(&crisv32_led3_red, CONFIG_ETRAX_V32_LED3R); - - crisv32_io_set_dir(&crisv32_led2_green, crisv32_io_dir_out); - crisv32_io_set_dir(&crisv32_led2_red, crisv32_io_dir_out); - crisv32_io_set_dir(&crisv32_led3_green, crisv32_io_dir_out); - crisv32_io_set_dir(&crisv32_led3_red, crisv32_io_dir_out); - - return ret; -} - -__initcall(crisv32_io_init); - -int crisv32_io_get(struct crisv32_iopin *iopin, - unsigned int port, unsigned int pin) -{ - if (port > NBR_OF_PORTS) - return -EINVAL; - if (port > crisv32_ioports[port].pin_count) - return -EINVAL; - - iopin->bit = 1 << pin; - iopin->port = &crisv32_ioports[port]; - - /* Only allocate pinmux gpiopins if port != PORT_A (port 0) */ - /* NOTE! crisv32_pinmux_alloc thinks PORT_B is port 0 */ - if (port != 0 && crisv32_pinmux_alloc(port - 1, pin, pin, pinmux_gpio)) - return -EIO; - DEBUG(printk(KERN_DEBUG "crisv32_io_get: Allocated pin %d on port %d\n", - pin, port)); - - return 0; -} - -int crisv32_io_get_name(struct crisv32_iopin *iopin, const char *name) -{ - int port; - int pin; - - if (toupper(*name) == 'P') - name++; - - if (toupper(*name) < 'A' || toupper(*name) > 'E') - return -EINVAL; - - port = toupper(*name) - 'A'; - name++; - pin = simple_strtoul(name, NULL, 10); - - if (pin < 0 || pin > crisv32_ioports[port].pin_count) - return -EINVAL; - - iopin->bit = 1 << pin; - iopin->port = &crisv32_ioports[port]; - - /* Only allocate pinmux gpiopins if port != PORT_A (port 0) */ - /* NOTE! crisv32_pinmux_alloc thinks PORT_B is port 0 */ - if (port != 0 && crisv32_pinmux_alloc(port - 1, pin, pin, pinmux_gpio)) - return -EIO; - - DEBUG(printk(KERN_DEBUG - "crisv32_io_get_name: Allocated pin %d on port %d\n", - pin, port)); - - return 0; -} - -#ifdef CONFIG_PCI -/* PCI I/O access stuff */ -struct cris_io_operations *cris_iops = NULL; -EXPORT_SYMBOL(cris_iops); -#endif diff --git a/arch/cris/boot/dts/artpec3.dtsi b/arch/cris/boot/dts/artpec3.dtsi new file mode 100644 index 000000000000..be15be67b653 --- /dev/null +++ b/arch/cris/boot/dts/artpec3.dtsi @@ -0,0 +1,46 @@ +/ { + #address-cells = <1>; + #size-cells = <1>; + interrupt-parent = <&intc>; + + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu@0 { + device_type = "cpu"; + model = "axis,crisv32"; + reg = <0>; + }; + }; + + soc { + compatible = "simple-bus"; + model = "artpec3"; + #address-cells = <1>; + #size-cells = <1>; + ranges; + + intc: interrupt-controller { + compatible = "axis,crisv32-intc"; + reg = <0xb002a000 0x1000>; + interrupt-controller; + #interrupt-cells = <1>; + }; + + gio: gpio@b0020000 { + compatible = "axis,artpec3-gio"; + reg = <0xb0020000 0x1000>; + interrupts = <61>; + gpio-controller; + #gpio-cells = <3>; + }; + + serial@b003e000 { + compatible = "axis,etraxfs-uart"; + reg = <0xb003e000 0x1000>; + interrupts = <64>; + status = "disabled"; + }; + }; +}; diff --git a/arch/cris/boot/dts/dev88.dts b/arch/cris/boot/dts/dev88.dts index 4fa5a3f9d0ec..b9a230d10874 100644 --- a/arch/cris/boot/dts/dev88.dts +++ b/arch/cris/boot/dts/dev88.dts @@ -1,5 +1,7 @@ /dts-v1/; +#include <dt-bindings/gpio/gpio.h> + /include/ "etraxfs.dtsi" / { @@ -15,4 +17,51 @@ status = "okay"; }; }; + + spi { + compatible = "spi-gpio"; + #address-cells = <1>; + #size-cells = <0>; + + gpio-sck = <&gio 1 0 0xd>; + gpio-miso = <&gio 4 0 0xd>; + gpio-mosi = <&gio 0 0 0xd>; + cs-gpios = <&gio 3 0 0xd>; + num-chipselects = <1>; + + temp-sensor@0 { + compatible = "ti,lm70"; + reg = <0>; + + spi-max-frequency = <100000>; + }; + }; + + i2c { + compatible = "i2c-gpio"; + gpios = <&gio 5 0 0xd>, <&gio 6 0 0xd>; + i2c-gpio,delay-us = <2>; + #address-cells = <1>; + #size-cells = <0>; + + rtc@51 { + compatible = "nxp,pcf8563"; + reg = <0x51>; + }; + }; + + leds { + compatible = "gpio-leds"; + + network { + label = "network"; + gpios = <&gio 2 GPIO_ACTIVE_LOW 0xa>; + }; + + status { + label = "status"; + gpios = <&gio 3 GPIO_ACTIVE_LOW 0xa>; + linux,default-trigger = "heartbeat"; + }; + }; }; diff --git a/arch/cris/boot/dts/etraxfs.dtsi b/arch/cris/boot/dts/etraxfs.dtsi index 909bcedc3565..bf1b8582d4d8 100644 --- a/arch/cris/boot/dts/etraxfs.dtsi +++ b/arch/cris/boot/dts/etraxfs.dtsi @@ -28,6 +28,14 @@ #interrupt-cells = <1>; }; + gio: gpio@b001a000 { + compatible = "axis,etraxfs-gio"; + reg = <0xb001a000 0x1000>; + interrupts = <50>; + gpio-controller; + #gpio-cells = <3>; + }; + serial@b00260000 { compatible = "axis,etraxfs-uart"; reg = <0xb0026000 0x1000>; diff --git a/arch/cris/boot/dts/include/dt-bindings b/arch/cris/boot/dts/include/dt-bindings new file mode 120000 index 000000000000..08c00e4972fa --- /dev/null +++ b/arch/cris/boot/dts/include/dt-bindings @@ -0,0 +1 @@ +../../../../../include/dt-bindings
\ No newline at end of file diff --git a/arch/cris/boot/dts/p1343.dts b/arch/cris/boot/dts/p1343.dts new file mode 100644 index 000000000000..fab7bdbd0f15 --- /dev/null +++ b/arch/cris/boot/dts/p1343.dts @@ -0,0 +1,76 @@ +/dts-v1/; + +#include <dt-bindings/gpio/gpio.h> +#include <dt-bindings/input/input.h> + +/include/ "artpec3.dtsi" + +/ { + model = "Axis P1343 Network Camera"; + compatible = "axis,p1343"; + + aliases { + serial0 = &uart0; + }; + + soc { + uart0: serial@b003e000 { + status = "okay"; + }; + }; + + i2c { + compatible = "i2c-gpio"; + gpios = <&gio 3 0 0xa>, <&gio 2 0 0xa>; + i2c-gpio,delay-us = <2>; + #address-cells = <1>; + #size-cells = <0>; + + rtc@51 { + compatible = "nxp,pcf8563"; + reg = <0x51>; + }; + }; + + leds { + compatible = "gpio-leds"; + + status_green { + label = "status:green"; + gpios = <&gio 0 GPIO_ACTIVE_LOW 0xc>; + linux,default-trigger = "heartbeat"; + }; + + status_red { + label = "status:red"; + gpios = <&gio 1 GPIO_ACTIVE_LOW 0xc>; + }; + + network_green { + label = "network:green"; + gpios = <&gio 2 GPIO_ACTIVE_LOW 0xc>; + }; + + network_red { + label = "network:red"; + gpios = <&gio 3 GPIO_ACTIVE_LOW 0xc>; + }; + + power_red { + label = "power:red"; + gpios = <&gio 4 GPIO_ACTIVE_LOW 0xc>; + }; + }; + + gpio_keys { + compatible = "gpio-keys"; + #address-cells = <1>; + #size-cells = <0>; + + activity-button@0 { + label = "Activity Button"; + linux,code = <KEY_FN>; + gpios = <&gio 13 GPIO_ACTIVE_LOW 0xd>; + }; + }; +}; diff --git a/arch/cris/boot/rescue/head_v10.S b/arch/cris/boot/rescue/head_v10.S index af55df0994b3..1c05492f3eb2 100644 --- a/arch/cris/boot/rescue/head_v10.S +++ b/arch/cris/boot/rescue/head_v10.S @@ -281,9 +281,6 @@ wait_ser: #ifdef CONFIG_ETRAX_PB_LEDS move.b $r2, [R_PORT_PB_DATA] #endif -#ifdef CONFIG_ETRAX_90000000_LEDS - move.b $r2, [0x90000000] -#endif #endif ;; check if we got something on the serial port diff --git a/arch/cris/include/arch-v32/arch/io.h b/arch/cris/include/arch-v32/arch/io.h deleted file mode 100644 index adc5484351bf..000000000000 --- a/arch/cris/include/arch-v32/arch/io.h +++ /dev/null @@ -1,140 +0,0 @@ -#ifndef _ASM_ARCH_CRIS_IO_H -#define _ASM_ARCH_CRIS_IO_H - -#include <linux/spinlock.h> -#include <hwregs/reg_map.h> -#include <hwregs/reg_rdwr.h> -#include <hwregs/gio_defs.h> - -enum crisv32_io_dir -{ - crisv32_io_dir_in = 0, - crisv32_io_dir_out = 1 -}; - -struct crisv32_ioport -{ - volatile unsigned long *oe; - volatile unsigned long *data; - volatile unsigned long *data_in; - unsigned int pin_count; - spinlock_t lock; -}; - -struct crisv32_iopin -{ - struct crisv32_ioport* port; - int bit; -}; - -extern struct crisv32_ioport crisv32_ioports[]; - -extern struct crisv32_iopin crisv32_led1_green; -extern struct crisv32_iopin crisv32_led1_red; -extern struct crisv32_iopin crisv32_led2_green; -extern struct crisv32_iopin crisv32_led2_red; -extern struct crisv32_iopin crisv32_led3_green; -extern struct crisv32_iopin crisv32_led3_red; - -extern struct crisv32_iopin crisv32_led_net0_green; -extern struct crisv32_iopin crisv32_led_net0_red; -extern struct crisv32_iopin crisv32_led_net1_green; -extern struct crisv32_iopin crisv32_led_net1_red; - -static inline void crisv32_io_set(struct crisv32_iopin *iopin, int val) -{ - unsigned long flags; - spin_lock_irqsave(&iopin->port->lock, flags); - - if (iopin->port->data) { - if (val) - *iopin->port->data |= iopin->bit; - else - *iopin->port->data &= ~iopin->bit; - } - - spin_unlock_irqrestore(&iopin->port->lock, flags); -} - -static inline void crisv32_io_set_dir(struct crisv32_iopin* iopin, - enum crisv32_io_dir dir) -{ - unsigned long flags; - spin_lock_irqsave(&iopin->port->lock, flags); - - if (iopin->port->oe) { - if (dir == crisv32_io_dir_in) - *iopin->port->oe &= ~iopin->bit; - else - *iopin->port->oe |= iopin->bit; - } - - spin_unlock_irqrestore(&iopin->port->lock, flags); -} - -static inline int crisv32_io_rd(struct crisv32_iopin* iopin) -{ - return ((*iopin->port->data_in & iopin->bit) ? 1 : 0); -} - -int crisv32_io_get(struct crisv32_iopin* iopin, - unsigned int port, unsigned int pin); -int crisv32_io_get_name(struct crisv32_iopin* iopin, - const char *name); - -#define CRIS_LED_OFF 0x00 -#define CRIS_LED_GREEN 0x01 -#define CRIS_LED_RED 0x02 -#define CRIS_LED_ORANGE (CRIS_LED_GREEN | CRIS_LED_RED) - -#if (defined(CONFIG_ETRAX_NBR_LED_GRP_ONE) || defined(CONFIG_ETRAX_NBR_LED_GRP_TWO)) -#define CRIS_LED_NETWORK_GRP0_SET(x) \ - do { \ - CRIS_LED_NETWORK_GRP0_SET_G((x) & CRIS_LED_GREEN); \ - CRIS_LED_NETWORK_GRP0_SET_R((x) & CRIS_LED_RED); \ - } while (0) -#else -#define CRIS_LED_NETWORK_GRP0_SET(x) while (0) {} -#endif - -#define CRIS_LED_NETWORK_GRP0_SET_G(x) \ - crisv32_io_set(&crisv32_led_net0_green, !(x)); - -#define CRIS_LED_NETWORK_GRP0_SET_R(x) \ - crisv32_io_set(&crisv32_led_net0_red, !(x)); - -#if defined(CONFIG_ETRAX_NBR_LED_GRP_TWO) -#define CRIS_LED_NETWORK_GRP1_SET(x) \ - do { \ - CRIS_LED_NETWORK_GRP1_SET_G((x) & CRIS_LED_GREEN); \ - CRIS_LED_NETWORK_GRP1_SET_R((x) & CRIS_LED_RED); \ - } while (0) -#else -#define CRIS_LED_NETWORK_GRP1_SET(x) while (0) {} -#endif - -#define CRIS_LED_NETWORK_GRP1_SET_G(x) \ - crisv32_io_set(&crisv32_led_net1_green, !(x)); - -#define CRIS_LED_NETWORK_GRP1_SET_R(x) \ - crisv32_io_set(&crisv32_led_net1_red, !(x)); - -#define CRIS_LED_ACTIVE_SET(x) \ - do { \ - CRIS_LED_ACTIVE_SET_G((x) & CRIS_LED_GREEN); \ - CRIS_LED_ACTIVE_SET_R((x) & CRIS_LED_RED); \ - } while (0) - -#define CRIS_LED_ACTIVE_SET_G(x) \ - crisv32_io_set(&crisv32_led2_green, !(x)); -#define CRIS_LED_ACTIVE_SET_R(x) \ - crisv32_io_set(&crisv32_led2_red, !(x)); -#define CRIS_LED_DISK_WRITE(x) \ - do{\ - crisv32_io_set(&crisv32_led3_green, !(x)); \ - crisv32_io_set(&crisv32_led3_red, !(x)); \ - }while(0) -#define CRIS_LED_DISK_READ(x) \ - crisv32_io_set(&crisv32_led3_green, !(x)); - -#endif diff --git a/arch/cris/include/arch-v32/arch/irq.h b/arch/cris/include/arch-v32/arch/irq.h index 0c1b4d3a34e7..8270a1bbfdb6 100644 --- a/arch/cris/include/arch-v32/arch/irq.h +++ b/arch/cris/include/arch-v32/arch/irq.h @@ -4,7 +4,7 @@ #include <hwregs/intr_vect.h> /* Number of non-cpu interrupts. */ -#define NR_IRQS NBR_INTR_VECT /* Exceptions + IRQs */ +#define NR_IRQS (NBR_INTR_VECT + 256) /* Exceptions + IRQs */ #define FIRST_IRQ 0x31 /* Exception number for first IRQ */ #define NR_REAL_IRQS (NBR_INTR_VECT - FIRST_IRQ) /* IRQs */ #if NR_REAL_IRQS > 32 diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild index b7f68192d15b..1778805f6380 100644 --- a/arch/cris/include/asm/Kbuild +++ b/arch/cris/include/asm/Kbuild @@ -43,4 +43,5 @@ generic-y += topology.h generic-y += trace_clock.h generic-y += types.h generic-y += vga.h +generic-y += word-at-a-time.h generic-y += xor.h diff --git a/arch/cris/include/asm/eshlibld.h b/arch/cris/include/asm/eshlibld.h index 10ce36cf79a9..70aa448256b0 100644 --- a/arch/cris/include/asm/eshlibld.h +++ b/arch/cris/include/asm/eshlibld.h @@ -45,8 +45,7 @@ assumed that we want to share code when debugging (exposes more trouble). */ #ifndef SHARE_LIB_CORE -# if (defined(__KERNEL__) || !defined(RELOC_DEBUG)) \ - && !defined(CONFIG_SHARE_SHLIB_CORE) +# if (defined(__KERNEL__) || !defined(RELOC_DEBUG)) # define SHARE_LIB_CORE 0 # else # define SHARE_LIB_CORE 1 diff --git a/arch/cris/include/asm/io.h b/arch/cris/include/asm/io.h index 752a3f45df60..cce8664d5dd6 100644 --- a/arch/cris/include/asm/io.h +++ b/arch/cris/include/asm/io.h @@ -2,7 +2,9 @@ #define _ASM_CRIS_IO_H #include <asm/page.h> /* for __va, __pa */ +#ifdef CONFIG_ETRAX_ARCH_V10 #include <arch/io.h> +#endif #include <asm-generic/iomap.h> #include <linux/kernel.h> diff --git a/arch/cris/include/uapi/asm/etraxgpio.h b/arch/cris/include/uapi/asm/etraxgpio.h index 461c089db765..c6e7d57c8b24 100644 --- a/arch/cris/include/uapi/asm/etraxgpio.h +++ b/arch/cris/include/uapi/asm/etraxgpio.h @@ -11,26 +11,6 @@ * g1-g7 and g25-g31 is both input and outputs but on different pins * Also note that some bits change pins depending on what interfaces * are enabled. - * - * For ETRAX FS (CONFIG_ETRAXFS): - * /dev/gpioa minor 0, 8 bit GPIO, each bit can change direction - * /dev/gpiob minor 1, 18 bit GPIO, each bit can change direction - * /dev/gpioc minor 3, 18 bit GPIO, each bit can change direction - * /dev/gpiod minor 4, 18 bit GPIO, each bit can change direction - * /dev/gpioe minor 5, 18 bit GPIO, each bit can change direction - * /dev/leds minor 2, Access to leds depending on kernelconfig - * - * For ARTPEC-3 (CONFIG_CRIS_MACH_ARTPEC3): - * /dev/gpioa minor 0, 32 bit GPIO, each bit can change direction - * /dev/gpiob minor 1, 32 bit GPIO, each bit can change direction - * /dev/gpioc minor 3, 16 bit GPIO, each bit can change direction - * /dev/gpiod minor 4, 32 bit GPIO, input only - * /dev/leds minor 2, Access to leds depending on kernelconfig - * /dev/pwm0 minor 16, PWM channel 0 on PA30 - * /dev/pwm1 minor 17, PWM channel 1 on PA31 - * /dev/pwm2 minor 18, PWM channel 2 on PB26 - * /dev/ppwm minor 19, PPWM channel - * */ #ifndef _ASM_ETRAXGPIO_H #define _ASM_ETRAXGPIO_H @@ -40,52 +20,12 @@ #define ETRAXGPIO_IOCTYPE 43 /* etraxgpio _IOC_TYPE, bits 8 to 15 in ioctl cmd */ -#ifdef CONFIG_ETRAX_ARCH_V10 #define GPIO_MINOR_A 0 #define GPIO_MINOR_B 1 #define GPIO_MINOR_LEDS 2 #define GPIO_MINOR_G 3 #define GPIO_MINOR_LAST 3 #define GPIO_MINOR_LAST_REAL GPIO_MINOR_LAST -#endif - -#ifdef CONFIG_ETRAXFS -#define GPIO_MINOR_A 0 -#define GPIO_MINOR_B 1 -#define GPIO_MINOR_LEDS 2 -#define GPIO_MINOR_C 3 -#define GPIO_MINOR_D 4 -#define GPIO_MINOR_E 5 -#ifdef CONFIG_ETRAX_VIRTUAL_GPIO -#define GPIO_MINOR_V 6 -#define GPIO_MINOR_LAST 6 -#else -#define GPIO_MINOR_LAST 5 -#endif -#define GPIO_MINOR_LAST_REAL GPIO_MINOR_LAST -#endif - -#ifdef CONFIG_CRIS_MACH_ARTPEC3 -#define GPIO_MINOR_A 0 -#define GPIO_MINOR_B 1 -#define GPIO_MINOR_LEDS 2 -#define GPIO_MINOR_C 3 -#define GPIO_MINOR_D 4 -#ifdef CONFIG_ETRAX_VIRTUAL_GPIO -#define GPIO_MINOR_V 6 -#define GPIO_MINOR_LAST 6 -#else -#define GPIO_MINOR_LAST 4 -#endif -#define GPIO_MINOR_FIRST_PWM 16 -#define GPIO_MINOR_PWM0 (GPIO_MINOR_FIRST_PWM+0) -#define GPIO_MINOR_PWM1 (GPIO_MINOR_FIRST_PWM+1) -#define GPIO_MINOR_PWM2 (GPIO_MINOR_FIRST_PWM+2) -#define GPIO_MINOR_PPWM (GPIO_MINOR_FIRST_PWM+3) -#define GPIO_MINOR_LAST_PWM GPIO_MINOR_PPWM -#define GPIO_MINOR_LAST_REAL GPIO_MINOR_LAST_PWM -#endif - /* supported ioctl _IOC_NR's */ @@ -139,101 +79,4 @@ #define IO_SETGET_OUTPUT 0x13 /* bits set in *arg is set to output, */ /* *arg updated with current output pins. */ -/* The following ioctl's are applicable to the PWM channels only */ - -#define IO_PWM_SET_MODE 0x20 - -enum io_pwm_mode { - PWM_OFF = 0, /* disabled, deallocated */ - PWM_STANDARD = 1, /* 390 kHz, duty cycle 0..255/256 */ - PWM_FAST = 2, /* variable freq, w/ 10ns active pulse len */ - PWM_VARFREQ = 3, /* individually configurable high/low periods */ - PWM_SOFT = 4 /* software generated */ -}; - -struct io_pwm_set_mode { - enum io_pwm_mode mode; -}; - -/* Only for mode PWM_VARFREQ. Period lo/high set in increments of 10ns - * from 10ns (value = 0) to 81920ns (value = 8191) - * (Resulting frequencies range from 50 MHz (10ns + 10ns) down to - * 6.1 kHz (81920ns + 81920ns) at 50% duty cycle, to 12.2 kHz at min/max duty - * cycle (81920 + 10ns or 10ns + 81920ns, respectively).) - */ -#define IO_PWM_SET_PERIOD 0x21 - -struct io_pwm_set_period { - unsigned int lo; /* 0..8191 */ - unsigned int hi; /* 0..8191 */ -}; - -/* Only for modes PWM_STANDARD and PWM_FAST. - * For PWM_STANDARD, set duty cycle of 390 kHz PWM output signal, from - * 0 (value = 0) to 255/256 (value = 255). - * For PWM_FAST, set duty cycle of PWM output signal from - * 0% (value = 0) to 100% (value = 255). Output signal in this mode - * is a 10ns pulse surrounded by a high or low level depending on duty - * cycle (except for 0% and 100% which result in a constant output). - * Resulting output frequency varies from 50 MHz at 50% duty cycle, - * down to 390 kHz at min/max duty cycle. - */ -#define IO_PWM_SET_DUTY 0x22 - -struct io_pwm_set_duty { - int duty; /* 0..255 */ -}; - -/* Returns information about the latest PWM pulse. - * lo: Length of the latest low period, in units of 10ns. - * hi: Length of the latest high period, in units of 10ns. - * cnt: Time since last detected edge, in units of 10ns. - * - * The input source to PWM is decied by IO_PWM_SET_INPUT_SRC. - * - * NOTE: All PWM devices is connected to the same input source. - */ -#define IO_PWM_GET_PERIOD 0x23 - -struct io_pwm_get_period { - unsigned int lo; - unsigned int hi; - unsigned int cnt; -}; - -/* Sets the input source for the PWM input. For the src value see the - * register description for gio:rw_pwm_in_cfg. - * - * NOTE: All PWM devices is connected to the same input source. - */ -#define IO_PWM_SET_INPUT_SRC 0x24 -struct io_pwm_set_input_src { - unsigned int src; /* 0..7 */ -}; - -/* Sets the duty cycles in steps of 1/256, 0 = 0%, 255 = 100% duty cycle */ -#define IO_PPWM_SET_DUTY 0x25 - -struct io_ppwm_set_duty { - int duty; /* 0..255 */ -}; - -/* Configuraton struct for the IO_PWMCLK_SET_CONFIG ioctl to configure - * PWM capable gpio pins: - */ -#define IO_PWMCLK_SETGET_CONFIG 0x26 -struct gpio_pwmclk_conf { - unsigned int gpiopin; /* The pin number based on the opened device */ - unsigned int baseclk; /* The base clock to use, or sw will select one close*/ - unsigned int low; /* The number of low periods of the baseclk */ - unsigned int high; /* The number of high periods of the baseclk */ -}; - -/* Examples: - * To get a symmetric 12 MHz clock without knowing anything about the hardware: - * baseclk = 12000000, low = 0, high = 0 - * To just get info of current setting: - * baseclk = 0, low = 0, high = 0, the values will be updated by driver. - */ - #endif diff --git a/arch/cris/kernel/crisksyms.c b/arch/cris/kernel/crisksyms.c index e704f81f85cc..31b4bd288cad 100644 --- a/arch/cris/kernel/crisksyms.c +++ b/arch/cris/kernel/crisksyms.c @@ -18,7 +18,6 @@ #include <asm/pgtable.h> #include <asm/fasttimer.h> -extern unsigned long get_cmos_time(void); extern void __Udiv(void); extern void __Umod(void); extern void __Div(void); @@ -30,7 +29,6 @@ extern void __negdi2(void); extern void iounmap(volatile void * __iomem); /* Platform dependent support */ -EXPORT_SYMBOL(get_cmos_time); EXPORT_SYMBOL(loops_per_usec); /* Math functions */ diff --git a/arch/cris/kernel/time.c b/arch/cris/kernel/time.c index 7780d379522f..2dda6da71521 100644 --- a/arch/cris/kernel/time.c +++ b/arch/cris/kernel/time.c @@ -39,31 +39,6 @@ extern unsigned long loops_per_jiffy; /* init/main.c */ unsigned long loops_per_usec; -int set_rtc_mmss(unsigned long nowtime) -{ - D(printk(KERN_DEBUG "set_rtc_mmss(%lu)\n", nowtime)); - return 0; -} - -/* grab the time from the RTC chip */ -unsigned long get_cmos_time(void) -{ - return 0; -} - - -int update_persistent_clock(struct timespec now) -{ - return set_rtc_mmss(now.tv_sec); -} - -void read_persistent_clock(struct timespec *ts) -{ - ts->tv_sec = 0; - ts->tv_nsec = 0; -} - - extern void cris_profile_sample(struct pt_regs* regs); void diff --git a/arch/frv/include/asm/Kbuild b/arch/frv/include/asm/Kbuild index 8e47b832cc76..1fa084cf1a43 100644 --- a/arch/frv/include/asm/Kbuild +++ b/arch/frv/include/asm/Kbuild @@ -7,3 +7,4 @@ generic-y += mcs_spinlock.h generic-y += mm-arch-hooks.h generic-y += preempt.h generic-y += trace_clock.h +generic-y += word-at-a-time.h diff --git a/arch/frv/include/asm/atomic.h b/arch/frv/include/asm/atomic.h index 0da689def4cc..64f02d451aa8 100644 --- a/arch/frv/include/asm/atomic.h +++ b/arch/frv/include/asm/atomic.h @@ -32,8 +32,8 @@ */ #define ATOMIC_INIT(i) { (i) } -#define atomic_read(v) ACCESS_ONCE((v)->counter) -#define atomic_set(v, i) (((v)->counter) = (i)) +#define atomic_read(v) READ_ONCE((v)->counter) +#define atomic_set(v, i) WRITE_ONCE(((v)->counter), (i)) static inline int atomic_inc_return(atomic_t *v) { diff --git a/arch/h8300/include/asm/Kbuild b/arch/h8300/include/asm/Kbuild index 70e6ae1e7006..373cb23301e3 100644 --- a/arch/h8300/include/asm/Kbuild +++ b/arch/h8300/include/asm/Kbuild @@ -73,4 +73,5 @@ generic-y += uaccess.h generic-y += ucontext.h generic-y += unaligned.h generic-y += vga.h +generic-y += word-at-a-time.h generic-y += xor.h diff --git a/arch/h8300/include/asm/atomic.h b/arch/h8300/include/asm/atomic.h index 702ee539f87d..4435a445ae7e 100644 --- a/arch/h8300/include/asm/atomic.h +++ b/arch/h8300/include/asm/atomic.h @@ -11,8 +11,8 @@ #define ATOMIC_INIT(i) { (i) } -#define atomic_read(v) ACCESS_ONCE((v)->counter) -#define atomic_set(v, i) (((v)->counter) = i) +#define atomic_read(v) READ_ONCE((v)->counter) +#define atomic_set(v, i) WRITE_ONCE(((v)->counter), (i)) #include <linux/kernel.h> diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild index daee37bd0999..db8ddabc6bd2 100644 --- a/arch/hexagon/include/asm/Kbuild +++ b/arch/hexagon/include/asm/Kbuild @@ -58,4 +58,5 @@ generic-y += types.h generic-y += ucontext.h generic-y += unaligned.h generic-y += vga.h +generic-y += word-at-a-time.h generic-y += xor.h diff --git a/arch/hexagon/include/asm/atomic.h b/arch/hexagon/include/asm/atomic.h index 811d61f6422d..55696c4100d4 100644 --- a/arch/hexagon/include/asm/atomic.h +++ b/arch/hexagon/include/asm/atomic.h @@ -48,7 +48,7 @@ static inline void atomic_set(atomic_t *v, int new) * * Assumes all word reads on our architecture are atomic. */ -#define atomic_read(v) ((v)->counter) +#define atomic_read(v) READ_ONCE((v)->counter) /** * atomic_xchg - atomic diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild index 9de3ba12f6b9..502a91d8dbbd 100644 --- a/arch/ia64/include/asm/Kbuild +++ b/arch/ia64/include/asm/Kbuild @@ -8,3 +8,4 @@ generic-y += mm-arch-hooks.h generic-y += preempt.h generic-y += trace_clock.h generic-y += vtime.h +generic-y += word-at-a-time.h diff --git a/arch/ia64/include/asm/atomic.h b/arch/ia64/include/asm/atomic.h index be4beeb77d57..8dfb5f6f6c35 100644 --- a/arch/ia64/include/asm/atomic.h +++ b/arch/ia64/include/asm/atomic.h @@ -21,11 +21,11 @@ #define ATOMIC_INIT(i) { (i) } #define ATOMIC64_INIT(i) { (i) } -#define atomic_read(v) ACCESS_ONCE((v)->counter) -#define atomic64_read(v) ACCESS_ONCE((v)->counter) +#define atomic_read(v) READ_ONCE((v)->counter) +#define atomic64_read(v) READ_ONCE((v)->counter) -#define atomic_set(v,i) (((v)->counter) = (i)) -#define atomic64_set(v,i) (((v)->counter) = (i)) +#define atomic_set(v,i) WRITE_ONCE(((v)->counter), (i)) +#define atomic64_set(v,i) WRITE_ONCE(((v)->counter), (i)) #define ATOMIC_OP(op, c_op) \ static __inline__ int \ diff --git a/arch/ia64/include/asm/pci.h b/arch/ia64/include/asm/pci.h index 36d2c1e3928b..07039d168f37 100644 --- a/arch/ia64/include/asm/pci.h +++ b/arch/ia64/include/asm/pci.h @@ -64,11 +64,6 @@ extern int pci_mmap_legacy_page_range(struct pci_bus *bus, #define pci_legacy_read platform_pci_legacy_read #define pci_legacy_write platform_pci_legacy_write -struct iospace_resource { - struct list_head list; - struct resource res; -}; - struct pci_controller { struct acpi_device *companion; void *iommu; diff --git a/arch/ia64/include/asm/unistd.h b/arch/ia64/include/asm/unistd.h index 99c96a5e6016..db73390568c8 100644 --- a/arch/ia64/include/asm/unistd.h +++ b/arch/ia64/include/asm/unistd.h @@ -11,7 +11,7 @@ -#define NR_syscalls 321 /* length of syscall table */ +#define NR_syscalls 322 /* length of syscall table */ /* * The following defines stop scripts/checksyscalls.sh from complaining about diff --git a/arch/ia64/include/uapi/asm/unistd.h b/arch/ia64/include/uapi/asm/unistd.h index 98e94e19a5a0..9038726e7d26 100644 --- a/arch/ia64/include/uapi/asm/unistd.h +++ b/arch/ia64/include/uapi/asm/unistd.h @@ -334,5 +334,6 @@ #define __NR_execveat 1342 #define __NR_userfaultfd 1343 #define __NR_membarrier 1344 +#define __NR_kcmp 1345 #endif /* _UAPI_ASM_IA64_UNISTD_H */ diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index 37cc7a65cd3e..dcd97f84d065 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -1770,5 +1770,6 @@ sys_call_table: data8 sys_execveat data8 sys_userfaultfd data8 sys_membarrier + data8 sys_kcmp // 1345 .org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c index 7cc3be9fa7c6..8f6ac2f8ae4c 100644 --- a/arch/ia64/pci/pci.c +++ b/arch/ia64/pci/pci.c @@ -115,33 +115,13 @@ struct pci_ops pci_root_ops = { .write = pci_write, }; -/* Called by ACPI when it finds a new root bus. */ - -static struct pci_controller *alloc_pci_controller(int seg) -{ - struct pci_controller *controller; - - controller = kzalloc(sizeof(*controller), GFP_KERNEL); - if (!controller) - return NULL; - - controller->segment = seg; - return controller; -} - struct pci_root_info { - struct acpi_device *bridge; - struct pci_controller *controller; - struct list_head resources; - struct resource *res; - resource_size_t *res_offset; - unsigned int res_num; + struct acpi_pci_root_info common; + struct pci_controller controller; struct list_head io_resources; - char *name; }; -static unsigned int -new_space (u64 phys_base, int sparse) +static unsigned int new_space(u64 phys_base, int sparse) { u64 mmio_base; int i; @@ -168,39 +148,36 @@ new_space (u64 phys_base, int sparse) return i; } -static u64 add_io_space(struct pci_root_info *info, - struct acpi_resource_address64 *addr) +static int add_io_space(struct device *dev, struct pci_root_info *info, + struct resource_entry *entry) { - struct iospace_resource *iospace; - struct resource *resource; + struct resource_entry *iospace; + struct resource *resource, *res = entry->res; char *name; unsigned long base, min, max, base_port; unsigned int sparse = 0, space_nr, len; - len = strlen(info->name) + 32; - iospace = kzalloc(sizeof(*iospace) + len, GFP_KERNEL); + len = strlen(info->common.name) + 32; + iospace = resource_list_create_entry(NULL, len); if (!iospace) { - dev_err(&info->bridge->dev, - "PCI: No memory for %s I/O port space\n", - info->name); - goto out; + dev_err(dev, "PCI: No memory for %s I/O port space\n", + info->common.name); + return -ENOMEM; } - name = (char *)(iospace + 1); - - min = addr->address.minimum; - max = min + addr->address.address_length - 1; - if (addr->info.io.translation_type == ACPI_SPARSE_TRANSLATION) + if (res->flags & IORESOURCE_IO_SPARSE) sparse = 1; - - space_nr = new_space(addr->address.translation_offset, sparse); + space_nr = new_space(entry->offset, sparse); if (space_nr == ~0) goto free_resource; + name = (char *)(iospace + 1); + min = res->start - entry->offset; + max = res->end - entry->offset; base = __pa(io_space[space_nr].mmio_base); base_port = IO_SPACE_BASE(space_nr); - snprintf(name, len, "%s I/O Ports %08lx-%08lx", info->name, - base_port + min, base_port + max); + snprintf(name, len, "%s I/O Ports %08lx-%08lx", info->common.name, + base_port + min, base_port + max); /* * The SDM guarantees the legacy 0-64K space is sparse, but if the @@ -210,270 +187,125 @@ static u64 add_io_space(struct pci_root_info *info, if (space_nr == 0) sparse = 1; - resource = &iospace->res; + resource = iospace->res; resource->name = name; resource->flags = IORESOURCE_MEM; resource->start = base + (sparse ? IO_SPACE_SPARSE_ENCODING(min) : min); resource->end = base + (sparse ? IO_SPACE_SPARSE_ENCODING(max) : max); if (insert_resource(&iomem_resource, resource)) { - dev_err(&info->bridge->dev, - "can't allocate host bridge io space resource %pR\n", - resource); + dev_err(dev, + "can't allocate host bridge io space resource %pR\n", + resource); goto free_resource; } - list_add_tail(&iospace->list, &info->io_resources); - return base_port; + entry->offset = base_port; + res->start = min + base_port; + res->end = max + base_port; + resource_list_add_tail(iospace, &info->io_resources); -free_resource: - kfree(iospace); -out: - return ~0; -} - -static acpi_status resource_to_window(struct acpi_resource *resource, - struct acpi_resource_address64 *addr) -{ - acpi_status status; + return 0; - /* - * We're only interested in _CRS descriptors that are - * - address space descriptors for memory or I/O space - * - non-zero size - */ - status = acpi_resource_to_address64(resource, addr); - if (ACPI_SUCCESS(status) && - (addr->resource_type == ACPI_MEMORY_RANGE || - addr->resource_type == ACPI_IO_RANGE) && - addr->address.address_length) - return AE_OK; - - return AE_ERROR; +free_resource: + resource_list_free_entry(iospace); + return -ENOSPC; } -static acpi_status count_window(struct acpi_resource *resource, void *data) +/* + * An IO port or MMIO resource assigned to a PCI host bridge may be + * consumed by the host bridge itself or available to its child + * bus/devices. The ACPI specification defines a bit (Producer/Consumer) + * to tell whether the resource is consumed by the host bridge itself, + * but firmware hasn't used that bit consistently, so we can't rely on it. + * + * On x86 and IA64 platforms, all IO port and MMIO resources are assumed + * to be available to child bus/devices except one special case: + * IO port [0xCF8-0xCFF] is consumed by the host bridge itself + * to access PCI configuration space. + * + * So explicitly filter out PCI CFG IO ports[0xCF8-0xCFF]. + */ +static bool resource_is_pcicfg_ioport(struct resource *res) { - unsigned int *windows = (unsigned int *) data; - struct acpi_resource_address64 addr; - acpi_status status; - - status = resource_to_window(resource, &addr); - if (ACPI_SUCCESS(status)) - (*windows)++; - - return AE_OK; + return (res->flags & IORESOURCE_IO) && + res->start == 0xCF8 && res->end == 0xCFF; } -static acpi_status add_window(struct acpi_resource *res, void *data) +static int pci_acpi_root_prepare_resources(struct acpi_pci_root_info *ci) { - struct pci_root_info *info = data; - struct resource *resource; - struct acpi_resource_address64 addr; - acpi_status status; - unsigned long flags, offset = 0; - struct resource *root; - - /* Return AE_OK for non-window resources to keep scanning for more */ - status = resource_to_window(res, &addr); - if (!ACPI_SUCCESS(status)) - return AE_OK; - - if (addr.resource_type == ACPI_MEMORY_RANGE) { - flags = IORESOURCE_MEM; - root = &iomem_resource; - offset = addr.address.translation_offset; - } else if (addr.resource_type == ACPI_IO_RANGE) { - flags = IORESOURCE_IO; - root = &ioport_resource; - offset = add_io_space(info, &addr); - if (offset == ~0) - return AE_OK; - } else - return AE_OK; - - resource = &info->res[info->res_num]; - resource->name = info->name; - resource->flags = flags; - resource->start = addr.address.minimum + offset; - resource->end = resource->start + addr.address.address_length - 1; - info->res_offset[info->res_num] = offset; - - if (insert_resource(root, resource)) { - dev_err(&info->bridge->dev, - "can't allocate host bridge window %pR\n", - resource); - } else { - if (offset) - dev_info(&info->bridge->dev, "host bridge window %pR " - "(PCI address [%#llx-%#llx])\n", - resource, - resource->start - offset, - resource->end - offset); - else - dev_info(&info->bridge->dev, - "host bridge window %pR\n", resource); + struct device *dev = &ci->bridge->dev; + struct pci_root_info *info; + struct resource *res; + struct resource_entry *entry, *tmp; + int status; + + status = acpi_pci_probe_root_resources(ci); + if (status > 0) { + info = container_of(ci, struct pci_root_info, common); + resource_list_for_each_entry_safe(entry, tmp, &ci->resources) { + res = entry->res; + if (res->flags & IORESOURCE_MEM) { + /* + * HP's firmware has a hack to work around a + * Windows bug. Ignore these tiny memory ranges. + */ + if (resource_size(res) <= 16) { + resource_list_del(entry); + insert_resource(&iomem_resource, + entry->res); + resource_list_add_tail(entry, + &info->io_resources); + } + } else if (res->flags & IORESOURCE_IO) { + if (resource_is_pcicfg_ioport(entry->res)) + resource_list_destroy_entry(entry); + else if (add_io_space(dev, info, entry)) + resource_list_destroy_entry(entry); + } + } } - /* HP's firmware has a hack to work around a Windows bug. - * Ignore these tiny memory ranges */ - if (!((resource->flags & IORESOURCE_MEM) && - (resource->end - resource->start < 16))) - pci_add_resource_offset(&info->resources, resource, - info->res_offset[info->res_num]); - - info->res_num++; - return AE_OK; -} -static void free_pci_root_info_res(struct pci_root_info *info) -{ - struct iospace_resource *iospace, *tmp; - - list_for_each_entry_safe(iospace, tmp, &info->io_resources, list) - kfree(iospace); - - kfree(info->name); - kfree(info->res); - info->res = NULL; - kfree(info->res_offset); - info->res_offset = NULL; - info->res_num = 0; - kfree(info->controller); - info->controller = NULL; + return status; } -static void __release_pci_root_info(struct pci_root_info *info) +static void pci_acpi_root_release_info(struct acpi_pci_root_info *ci) { - int i; - struct resource *res; - struct iospace_resource *iospace; + struct pci_root_info *info; + struct resource_entry *entry, *tmp; - list_for_each_entry(iospace, &info->io_resources, list) - release_resource(&iospace->res); - - for (i = 0; i < info->res_num; i++) { - res = &info->res[i]; - - if (!res->parent) - continue; - - if (!(res->flags & (IORESOURCE_MEM | IORESOURCE_IO))) - continue; - - release_resource(res); + info = container_of(ci, struct pci_root_info, common); + resource_list_for_each_entry_safe(entry, tmp, &info->io_resources) { + release_resource(entry->res); + resource_list_destroy_entry(entry); } - - free_pci_root_info_res(info); kfree(info); } -static void release_pci_root_info(struct pci_host_bridge *bridge) -{ - struct pci_root_info *info = bridge->release_data; - - __release_pci_root_info(info); -} - -static int -probe_pci_root_info(struct pci_root_info *info, struct acpi_device *device, - int busnum, int domain) -{ - char *name; - - name = kmalloc(16, GFP_KERNEL); - if (!name) - return -ENOMEM; - - sprintf(name, "PCI Bus %04x:%02x", domain, busnum); - info->bridge = device; - info->name = name; - - acpi_walk_resources(device->handle, METHOD_NAME__CRS, count_window, - &info->res_num); - if (info->res_num) { - info->res = - kzalloc_node(sizeof(*info->res) * info->res_num, - GFP_KERNEL, info->controller->node); - if (!info->res) { - kfree(name); - return -ENOMEM; - } - - info->res_offset = - kzalloc_node(sizeof(*info->res_offset) * info->res_num, - GFP_KERNEL, info->controller->node); - if (!info->res_offset) { - kfree(name); - kfree(info->res); - info->res = NULL; - return -ENOMEM; - } - - info->res_num = 0; - acpi_walk_resources(device->handle, METHOD_NAME__CRS, - add_window, info); - } else - kfree(name); - - return 0; -} +static struct acpi_pci_root_ops pci_acpi_root_ops = { + .pci_ops = &pci_root_ops, + .release_info = pci_acpi_root_release_info, + .prepare_resources = pci_acpi_root_prepare_resources, +}; struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) { struct acpi_device *device = root->device; - int domain = root->segment; - int bus = root->secondary.start; - struct pci_controller *controller; - struct pci_root_info *info = NULL; - int busnum = root->secondary.start; - struct pci_bus *pbus; - int ret; - - controller = alloc_pci_controller(domain); - if (!controller) - return NULL; - - controller->companion = device; - controller->node = acpi_get_node(device->handle); + struct pci_root_info *info; info = kzalloc(sizeof(*info), GFP_KERNEL); if (!info) { dev_err(&device->dev, - "pci_bus %04x:%02x: ignored (out of memory)\n", - domain, busnum); - kfree(controller); + "pci_bus %04x:%02x: ignored (out of memory)\n", + root->segment, (int)root->secondary.start); return NULL; } - info->controller = controller; + info->controller.segment = root->segment; + info->controller.companion = device; + info->controller.node = acpi_get_node(device->handle); INIT_LIST_HEAD(&info->io_resources); - INIT_LIST_HEAD(&info->resources); - - ret = probe_pci_root_info(info, device, busnum, domain); - if (ret) { - kfree(info->controller); - kfree(info); - return NULL; - } - /* insert busn resource at first */ - pci_add_resource(&info->resources, &root->secondary); - /* - * See arch/x86/pci/acpi.c. - * The desired pci bus might already be scanned in a quirk. We - * should handle the case here, but it appears that IA64 hasn't - * such quirk. So we just ignore the case now. - */ - pbus = pci_create_root_bus(NULL, bus, &pci_root_ops, controller, - &info->resources); - if (!pbus) { - pci_free_resource_list(&info->resources); - __release_pci_root_info(info); - return NULL; - } - - pci_set_host_bridge_release(to_pci_host_bridge(pbus->bridge), - release_pci_root_info, info); - pci_scan_child_bus(pbus); - return pbus; + return acpi_pci_root_create(root, &pci_acpi_root_ops, + &info->common, &info->controller); } int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge) diff --git a/arch/m32r/include/asm/Kbuild b/arch/m32r/include/asm/Kbuild index e0eb704ca1fa..fd104bd221ce 100644 --- a/arch/m32r/include/asm/Kbuild +++ b/arch/m32r/include/asm/Kbuild @@ -9,3 +9,4 @@ generic-y += module.h generic-y += preempt.h generic-y += sections.h generic-y += trace_clock.h +generic-y += word-at-a-time.h diff --git a/arch/m32r/include/asm/atomic.h b/arch/m32r/include/asm/atomic.h index 025e2a170493..ea35160d632b 100644 --- a/arch/m32r/include/asm/atomic.h +++ b/arch/m32r/include/asm/atomic.h @@ -28,7 +28,7 @@ * * Atomically reads the value of @v. */ -#define atomic_read(v) ACCESS_ONCE((v)->counter) +#define atomic_read(v) READ_ONCE((v)->counter) /** * atomic_set - set atomic variable @@ -37,7 +37,7 @@ * * Atomically sets the value of @v to @i. */ -#define atomic_set(v,i) (((v)->counter) = (i)) +#define atomic_set(v,i) WRITE_ONCE(((v)->counter), (i)) #ifdef CONFIG_CHIP_M32700_TS1 #define __ATOMIC_CLOBBER , "r4" diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig index 0b6b40d37b95..5b4ec541ba7c 100644 --- a/arch/m68k/configs/amiga_defconfig +++ b/arch/m68k/configs/amiga_defconfig @@ -10,6 +10,7 @@ CONFIG_LOG_BUF_SHIFT=16 # CONFIG_PID_NS is not set # CONFIG_NET_NS is not set CONFIG_BLK_DEV_INITRD=y +CONFIG_USERFAULTFD=y CONFIG_SLAB=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y @@ -57,7 +58,6 @@ CONFIG_NET_IPGRE_DEMUX=m CONFIG_NET_IPGRE=m CONFIG_NET_IPVTI=m CONFIG_NET_FOU_IP_TUNNELS=y -CONFIG_GENEVE_CORE=m CONFIG_INET_AH=m CONFIG_INET_ESP=m CONFIG_INET_IPCOMP=m @@ -67,10 +67,12 @@ CONFIG_INET_XFRM_MODE_BEET=m # CONFIG_INET_LRO is not set CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m +CONFIG_IPV6=m CONFIG_IPV6_ROUTER_PREF=y CONFIG_INET6_AH=m CONFIG_INET6_ESP=m CONFIG_INET6_IPCOMP=m +CONFIG_IPV6_ILA=m CONFIG_IPV6_VTI=m CONFIG_IPV6_GRE=m CONFIG_NETFILTER=y @@ -179,6 +181,7 @@ CONFIG_IP_SET_HASH_NETIFACE=m CONFIG_IP_SET_LIST_SET=m CONFIG_NF_CONNTRACK_IPV4=m CONFIG_NFT_CHAIN_ROUTE_IPV4=m +CONFIG_NFT_DUP_IPV4=m CONFIG_NF_TABLES_ARP=m CONFIG_NF_LOG_ARP=m CONFIG_NFT_CHAIN_NAT_IPV4=m @@ -206,6 +209,7 @@ CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NF_CONNTRACK_IPV6=m CONFIG_NFT_CHAIN_ROUTE_IPV6=m +CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_CHAIN_NAT_IPV6=m CONFIG_NFT_MASQ_IPV6=m CONFIG_NFT_REDIR_IPV6=m @@ -271,6 +275,7 @@ CONFIG_NETLINK_DIAG=m CONFIG_MPLS=y CONFIG_NET_MPLS_GSO=m CONFIG_MPLS_ROUTING=m +CONFIG_MPLS_IPTUNNEL=m # CONFIG_WIRELESS is not set # CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y @@ -370,6 +375,7 @@ CONFIG_ZORRO8390=y # CONFIG_NET_VENDOR_SEEQ is not set # CONFIG_NET_VENDOR_SMSC is not set # CONFIG_NET_VENDOR_STMICRO is not set +# CONFIG_NET_VENDOR_SYNOPSYS is not set # CONFIG_NET_VENDOR_VIA is not set # CONFIG_NET_VENDOR_WIZNET is not set CONFIG_PPP=m @@ -537,6 +543,7 @@ CONFIG_TEST_USER_COPY=m CONFIG_TEST_BPF=m CONFIG_TEST_FIRMWARE=m CONFIG_TEST_UDELAY=m +CONFIG_TEST_STATIC_KEYS=m CONFIG_EARLY_PRINTK=y CONFIG_ENCRYPTED_KEYS=m CONFIG_CRYPTO_RSA=m diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig index eeb3a8991fc4..6e5198e2c124 100644 --- a/arch/m68k/configs/apollo_defconfig +++ b/arch/m68k/configs/apollo_defconfig @@ -10,6 +10,7 @@ CONFIG_LOG_BUF_SHIFT=16 # CONFIG_PID_NS is not set # CONFIG_NET_NS is not set CONFIG_BLK_DEV_INITRD=y +CONFIG_USERFAULTFD=y CONFIG_SLAB=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y @@ -55,7 +56,6 @@ CONFIG_NET_IPGRE_DEMUX=m CONFIG_NET_IPGRE=m CONFIG_NET_IPVTI=m CONFIG_NET_FOU_IP_TUNNELS=y -CONFIG_GENEVE_CORE=m CONFIG_INET_AH=m CONFIG_INET_ESP=m CONFIG_INET_IPCOMP=m @@ -65,10 +65,12 @@ CONFIG_INET_XFRM_MODE_BEET=m # CONFIG_INET_LRO is not set CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m +CONFIG_IPV6=m CONFIG_IPV6_ROUTER_PREF=y CONFIG_INET6_AH=m CONFIG_INET6_ESP=m CONFIG_INET6_IPCOMP=m +CONFIG_IPV6_ILA=m CONFIG_IPV6_VTI=m CONFIG_IPV6_GRE=m CONFIG_NETFILTER=y @@ -177,6 +179,7 @@ CONFIG_IP_SET_HASH_NETIFACE=m CONFIG_IP_SET_LIST_SET=m CONFIG_NF_CONNTRACK_IPV4=m CONFIG_NFT_CHAIN_ROUTE_IPV4=m +CONFIG_NFT_DUP_IPV4=m CONFIG_NF_TABLES_ARP=m CONFIG_NF_LOG_ARP=m CONFIG_NFT_CHAIN_NAT_IPV4=m @@ -204,6 +207,7 @@ CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NF_CONNTRACK_IPV6=m CONFIG_NFT_CHAIN_ROUTE_IPV6=m +CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_CHAIN_NAT_IPV6=m CONFIG_NFT_MASQ_IPV6=m CONFIG_NFT_REDIR_IPV6=m @@ -269,6 +273,7 @@ CONFIG_NETLINK_DIAG=m CONFIG_MPLS=y CONFIG_NET_MPLS_GSO=m CONFIG_MPLS_ROUTING=m +CONFIG_MPLS_IPTUNNEL=m # CONFIG_WIRELESS is not set # CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y @@ -344,6 +349,7 @@ CONFIG_VETH=m # CONFIG_NET_VENDOR_SAMSUNG is not set # CONFIG_NET_VENDOR_SEEQ is not set # CONFIG_NET_VENDOR_STMICRO is not set +# CONFIG_NET_VENDOR_SYNOPSYS is not set # CONFIG_NET_VENDOR_VIA is not set # CONFIG_NET_VENDOR_WIZNET is not set CONFIG_PPP=m @@ -495,6 +501,7 @@ CONFIG_TEST_USER_COPY=m CONFIG_TEST_BPF=m CONFIG_TEST_FIRMWARE=m CONFIG_TEST_UDELAY=m +CONFIG_TEST_STATIC_KEYS=m CONFIG_EARLY_PRINTK=y CONFIG_ENCRYPTED_KEYS=m CONFIG_CRYPTO_RSA=m diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig index 3a7006654ce9..f75600b0ca23 100644 --- a/arch/m68k/configs/atari_defconfig +++ b/arch/m68k/configs/atari_defconfig @@ -10,6 +10,7 @@ CONFIG_LOG_BUF_SHIFT=16 # CONFIG_PID_NS is not set # CONFIG_NET_NS is not set CONFIG_BLK_DEV_INITRD=y +CONFIG_USERFAULTFD=y CONFIG_SLAB=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y @@ -55,7 +56,6 @@ CONFIG_NET_IPGRE_DEMUX=m CONFIG_NET_IPGRE=m CONFIG_NET_IPVTI=m CONFIG_NET_FOU_IP_TUNNELS=y -CONFIG_GENEVE_CORE=m CONFIG_INET_AH=m CONFIG_INET_ESP=m CONFIG_INET_IPCOMP=m @@ -65,10 +65,12 @@ CONFIG_INET_XFRM_MODE_BEET=m # CONFIG_INET_LRO is not set CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m +CONFIG_IPV6=m CONFIG_IPV6_ROUTER_PREF=y CONFIG_INET6_AH=m CONFIG_INET6_ESP=m CONFIG_INET6_IPCOMP=m +CONFIG_IPV6_ILA=m CONFIG_IPV6_VTI=m CONFIG_IPV6_GRE=m CONFIG_NETFILTER=y @@ -177,6 +179,7 @@ CONFIG_IP_SET_HASH_NETIFACE=m CONFIG_IP_SET_LIST_SET=m CONFIG_NF_CONNTRACK_IPV4=m CONFIG_NFT_CHAIN_ROUTE_IPV4=m +CONFIG_NFT_DUP_IPV4=m CONFIG_NF_TABLES_ARP=m CONFIG_NF_LOG_ARP=m CONFIG_NFT_CHAIN_NAT_IPV4=m @@ -204,6 +207,7 @@ CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NF_CONNTRACK_IPV6=m CONFIG_NFT_CHAIN_ROUTE_IPV6=m +CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_CHAIN_NAT_IPV6=m CONFIG_NFT_MASQ_IPV6=m CONFIG_NFT_REDIR_IPV6=m @@ -269,6 +273,7 @@ CONFIG_NETLINK_DIAG=m CONFIG_MPLS=y CONFIG_NET_MPLS_GSO=m CONFIG_MPLS_ROUTING=m +CONFIG_MPLS_IPTUNNEL=m # CONFIG_WIRELESS is not set # CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y @@ -355,6 +360,7 @@ CONFIG_NE2000=y # CONFIG_NET_VENDOR_SEEQ is not set CONFIG_SMC91X=y # CONFIG_NET_VENDOR_STMICRO is not set +# CONFIG_NET_VENDOR_SYNOPSYS is not set # CONFIG_NET_VENDOR_VIA is not set # CONFIG_NET_VENDOR_WIZNET is not set CONFIG_PPP=m @@ -517,6 +523,7 @@ CONFIG_TEST_USER_COPY=m CONFIG_TEST_BPF=m CONFIG_TEST_FIRMWARE=m CONFIG_TEST_UDELAY=m +CONFIG_TEST_STATIC_KEYS=m CONFIG_EARLY_PRINTK=y CONFIG_ENCRYPTED_KEYS=m CONFIG_CRYPTO_RSA=m diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig index 0586b323a673..a42d91c389a6 100644 --- a/arch/m68k/configs/bvme6000_defconfig +++ b/arch/m68k/configs/bvme6000_defconfig @@ -10,6 +10,7 @@ CONFIG_LOG_BUF_SHIFT=16 # CONFIG_PID_NS is not set # CONFIG_NET_NS is not set CONFIG_BLK_DEV_INITRD=y +CONFIG_USERFAULTFD=y CONFIG_SLAB=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y @@ -53,7 +54,6 @@ CONFIG_NET_IPGRE_DEMUX=m CONFIG_NET_IPGRE=m CONFIG_NET_IPVTI=m CONFIG_NET_FOU_IP_TUNNELS=y -CONFIG_GENEVE_CORE=m CONFIG_INET_AH=m CONFIG_INET_ESP=m CONFIG_INET_IPCOMP=m @@ -63,10 +63,12 @@ CONFIG_INET_XFRM_MODE_BEET=m # CONFIG_INET_LRO is not set CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m +CONFIG_IPV6=m CONFIG_IPV6_ROUTER_PREF=y CONFIG_INET6_AH=m CONFIG_INET6_ESP=m CONFIG_INET6_IPCOMP=m +CONFIG_IPV6_ILA=m CONFIG_IPV6_VTI=m CONFIG_IPV6_GRE=m CONFIG_NETFILTER=y @@ -175,6 +177,7 @@ CONFIG_IP_SET_HASH_NETIFACE=m CONFIG_IP_SET_LIST_SET=m CONFIG_NF_CONNTRACK_IPV4=m CONFIG_NFT_CHAIN_ROUTE_IPV4=m +CONFIG_NFT_DUP_IPV4=m CONFIG_NF_TABLES_ARP=m CONFIG_NF_LOG_ARP=m CONFIG_NFT_CHAIN_NAT_IPV4=m @@ -202,6 +205,7 @@ CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NF_CONNTRACK_IPV6=m CONFIG_NFT_CHAIN_ROUTE_IPV6=m +CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_CHAIN_NAT_IPV6=m CONFIG_NFT_MASQ_IPV6=m CONFIG_NFT_REDIR_IPV6=m @@ -267,6 +271,7 @@ CONFIG_NETLINK_DIAG=m CONFIG_MPLS=y CONFIG_NET_MPLS_GSO=m CONFIG_MPLS_ROUTING=m +CONFIG_MPLS_IPTUNNEL=m # CONFIG_WIRELESS is not set # CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y @@ -343,6 +348,7 @@ CONFIG_BVME6000_NET=y # CONFIG_NET_VENDOR_SAMSUNG is not set # CONFIG_NET_VENDOR_SEEQ is not set # CONFIG_NET_VENDOR_STMICRO is not set +# CONFIG_NET_VENDOR_SYNOPSYS is not set # CONFIG_NET_VENDOR_VIA is not set # CONFIG_NET_VENDOR_WIZNET is not set CONFIG_PPP=m @@ -488,6 +494,7 @@ CONFIG_TEST_USER_COPY=m CONFIG_TEST_BPF=m CONFIG_TEST_FIRMWARE=m CONFIG_TEST_UDELAY=m +CONFIG_TEST_STATIC_KEYS=m CONFIG_EARLY_PRINTK=y CONFIG_ENCRYPTED_KEYS=m CONFIG_CRYPTO_RSA=m diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig index ad1dbce07aa4..77f4a11083e9 100644 --- a/arch/m68k/configs/hp300_defconfig +++ b/arch/m68k/configs/hp300_defconfig @@ -10,6 +10,7 @@ CONFIG_LOG_BUF_SHIFT=16 # CONFIG_PID_NS is not set # CONFIG_NET_NS is not set CONFIG_BLK_DEV_INITRD=y +CONFIG_USERFAULTFD=y CONFIG_SLAB=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y @@ -55,7 +56,6 @@ CONFIG_NET_IPGRE_DEMUX=m CONFIG_NET_IPGRE=m CONFIG_NET_IPVTI=m CONFIG_NET_FOU_IP_TUNNELS=y -CONFIG_GENEVE_CORE=m CONFIG_INET_AH=m CONFIG_INET_ESP=m CONFIG_INET_IPCOMP=m @@ -65,10 +65,12 @@ CONFIG_INET_XFRM_MODE_BEET=m # CONFIG_INET_LRO is not set CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m +CONFIG_IPV6=m CONFIG_IPV6_ROUTER_PREF=y CONFIG_INET6_AH=m CONFIG_INET6_ESP=m CONFIG_INET6_IPCOMP=m +CONFIG_IPV6_ILA=m CONFIG_IPV6_VTI=m CONFIG_IPV6_GRE=m CONFIG_NETFILTER=y @@ -177,6 +179,7 @@ CONFIG_IP_SET_HASH_NETIFACE=m CONFIG_IP_SET_LIST_SET=m CONFIG_NF_CONNTRACK_IPV4=m CONFIG_NFT_CHAIN_ROUTE_IPV4=m +CONFIG_NFT_DUP_IPV4=m CONFIG_NF_TABLES_ARP=m CONFIG_NF_LOG_ARP=m CONFIG_NFT_CHAIN_NAT_IPV4=m @@ -204,6 +207,7 @@ CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NF_CONNTRACK_IPV6=m CONFIG_NFT_CHAIN_ROUTE_IPV6=m +CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_CHAIN_NAT_IPV6=m CONFIG_NFT_MASQ_IPV6=m CONFIG_NFT_REDIR_IPV6=m @@ -269,6 +273,7 @@ CONFIG_NETLINK_DIAG=m CONFIG_MPLS=y CONFIG_NET_MPLS_GSO=m CONFIG_MPLS_ROUTING=m +CONFIG_MPLS_IPTUNNEL=m # CONFIG_WIRELESS is not set # CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y @@ -345,6 +350,7 @@ CONFIG_HPLANCE=y # CONFIG_NET_VENDOR_SAMSUNG is not set # CONFIG_NET_VENDOR_SEEQ is not set # CONFIG_NET_VENDOR_STMICRO is not set +# CONFIG_NET_VENDOR_SYNOPSYS is not set # CONFIG_NET_VENDOR_VIA is not set # CONFIG_NET_VENDOR_WIZNET is not set CONFIG_PPP=m @@ -497,6 +503,7 @@ CONFIG_TEST_USER_COPY=m CONFIG_TEST_BPF=m CONFIG_TEST_FIRMWARE=m CONFIG_TEST_UDELAY=m +CONFIG_TEST_STATIC_KEYS=m CONFIG_EARLY_PRINTK=y CONFIG_ENCRYPTED_KEYS=m CONFIG_CRYPTO_RSA=m diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig index b44acacaecf4..5a329f77329b 100644 --- a/arch/m68k/configs/mac_defconfig +++ b/arch/m68k/configs/mac_defconfig @@ -10,6 +10,7 @@ CONFIG_LOG_BUF_SHIFT=16 # CONFIG_PID_NS is not set # CONFIG_NET_NS is not set CONFIG_BLK_DEV_INITRD=y +CONFIG_USERFAULTFD=y CONFIG_SLAB=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y @@ -54,7 +55,6 @@ CONFIG_NET_IPGRE_DEMUX=m CONFIG_NET_IPGRE=m CONFIG_NET_IPVTI=m CONFIG_NET_FOU_IP_TUNNELS=y -CONFIG_GENEVE_CORE=m CONFIG_INET_AH=m CONFIG_INET_ESP=m CONFIG_INET_IPCOMP=m @@ -64,10 +64,12 @@ CONFIG_INET_XFRM_MODE_BEET=m # CONFIG_INET_LRO is not set CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m +CONFIG_IPV6=m CONFIG_IPV6_ROUTER_PREF=y CONFIG_INET6_AH=m CONFIG_INET6_ESP=m CONFIG_INET6_IPCOMP=m +CONFIG_IPV6_ILA=m CONFIG_IPV6_VTI=m CONFIG_IPV6_GRE=m CONFIG_NETFILTER=y @@ -176,6 +178,7 @@ CONFIG_IP_SET_HASH_NETIFACE=m CONFIG_IP_SET_LIST_SET=m CONFIG_NF_CONNTRACK_IPV4=m CONFIG_NFT_CHAIN_ROUTE_IPV4=m +CONFIG_NFT_DUP_IPV4=m CONFIG_NF_TABLES_ARP=m CONFIG_NF_LOG_ARP=m CONFIG_NFT_CHAIN_NAT_IPV4=m @@ -203,6 +206,7 @@ CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NF_CONNTRACK_IPV6=m CONFIG_NFT_CHAIN_ROUTE_IPV6=m +CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_CHAIN_NAT_IPV6=m CONFIG_NFT_MASQ_IPV6=m CONFIG_NFT_REDIR_IPV6=m @@ -271,6 +275,7 @@ CONFIG_NETLINK_DIAG=m CONFIG_MPLS=y CONFIG_NET_MPLS_GSO=m CONFIG_MPLS_ROUTING=m +CONFIG_MPLS_IPTUNNEL=m # CONFIG_WIRELESS is not set # CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y @@ -364,6 +369,7 @@ CONFIG_MAC8390=y # CONFIG_NET_VENDOR_SEEQ is not set # CONFIG_NET_VENDOR_SMSC is not set # CONFIG_NET_VENDOR_STMICRO is not set +# CONFIG_NET_VENDOR_SYNOPSYS is not set # CONFIG_NET_VENDOR_VIA is not set # CONFIG_NET_VENDOR_WIZNET is not set CONFIG_PPP=m @@ -519,6 +525,7 @@ CONFIG_TEST_USER_COPY=m CONFIG_TEST_BPF=m CONFIG_TEST_FIRMWARE=m CONFIG_TEST_UDELAY=m +CONFIG_TEST_STATIC_KEYS=m CONFIG_EARLY_PRINTK=y CONFIG_ENCRYPTED_KEYS=m CONFIG_CRYPTO_RSA=m diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig index 8afca3753db1..83c80d2030ec 100644 --- a/arch/m68k/configs/multi_defconfig +++ b/arch/m68k/configs/multi_defconfig @@ -10,6 +10,7 @@ CONFIG_LOG_BUF_SHIFT=16 # CONFIG_PID_NS is not set # CONFIG_NET_NS is not set CONFIG_BLK_DEV_INITRD=y +CONFIG_USERFAULTFD=y CONFIG_SLAB=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y @@ -64,7 +65,6 @@ CONFIG_NET_IPGRE_DEMUX=m CONFIG_NET_IPGRE=m CONFIG_NET_IPVTI=m CONFIG_NET_FOU_IP_TUNNELS=y -CONFIG_GENEVE_CORE=m CONFIG_INET_AH=m CONFIG_INET_ESP=m CONFIG_INET_IPCOMP=m @@ -74,10 +74,12 @@ CONFIG_INET_XFRM_MODE_BEET=m # CONFIG_INET_LRO is not set CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m +CONFIG_IPV6=m CONFIG_IPV6_ROUTER_PREF=y CONFIG_INET6_AH=m CONFIG_INET6_ESP=m CONFIG_INET6_IPCOMP=m +CONFIG_IPV6_ILA=m CONFIG_IPV6_VTI=m CONFIG_IPV6_GRE=m CONFIG_NETFILTER=y @@ -186,6 +188,7 @@ CONFIG_IP_SET_HASH_NETIFACE=m CONFIG_IP_SET_LIST_SET=m CONFIG_NF_CONNTRACK_IPV4=m CONFIG_NFT_CHAIN_ROUTE_IPV4=m +CONFIG_NFT_DUP_IPV4=m CONFIG_NF_TABLES_ARP=m CONFIG_NF_LOG_ARP=m CONFIG_NFT_CHAIN_NAT_IPV4=m @@ -213,6 +216,7 @@ CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NF_CONNTRACK_IPV6=m CONFIG_NFT_CHAIN_ROUTE_IPV6=m +CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_CHAIN_NAT_IPV6=m CONFIG_NFT_MASQ_IPV6=m CONFIG_NFT_REDIR_IPV6=m @@ -281,6 +285,7 @@ CONFIG_NETLINK_DIAG=m CONFIG_MPLS=y CONFIG_NET_MPLS_GSO=m CONFIG_MPLS_ROUTING=m +CONFIG_MPLS_IPTUNNEL=m # CONFIG_WIRELESS is not set # CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y @@ -410,6 +415,7 @@ CONFIG_ZORRO8390=y # CONFIG_NET_VENDOR_SEEQ is not set CONFIG_SMC91X=y # CONFIG_NET_VENDOR_STMICRO is not set +# CONFIG_NET_VENDOR_SYNOPSYS is not set # CONFIG_NET_VENDOR_VIA is not set # CONFIG_NET_VENDOR_WIZNET is not set CONFIG_PLIP=m @@ -599,6 +605,7 @@ CONFIG_TEST_USER_COPY=m CONFIG_TEST_BPF=m CONFIG_TEST_FIRMWARE=m CONFIG_TEST_UDELAY=m +CONFIG_TEST_STATIC_KEYS=m CONFIG_EARLY_PRINTK=y CONFIG_ENCRYPTED_KEYS=m CONFIG_CRYPTO_RSA=m diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig index ef00875994d9..6cb42c3bf5a2 100644 --- a/arch/m68k/configs/mvme147_defconfig +++ b/arch/m68k/configs/mvme147_defconfig @@ -10,6 +10,7 @@ CONFIG_LOG_BUF_SHIFT=16 # CONFIG_PID_NS is not set # CONFIG_NET_NS is not set CONFIG_BLK_DEV_INITRD=y +CONFIG_USERFAULTFD=y CONFIG_SLAB=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y @@ -52,7 +53,6 @@ CONFIG_NET_IPGRE_DEMUX=m CONFIG_NET_IPGRE=m CONFIG_NET_IPVTI=m CONFIG_NET_FOU_IP_TUNNELS=y -CONFIG_GENEVE_CORE=m CONFIG_INET_AH=m CONFIG_INET_ESP=m CONFIG_INET_IPCOMP=m @@ -62,10 +62,12 @@ CONFIG_INET_XFRM_MODE_BEET=m # CONFIG_INET_LRO is not set CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m +CONFIG_IPV6=m CONFIG_IPV6_ROUTER_PREF=y CONFIG_INET6_AH=m CONFIG_INET6_ESP=m CONFIG_INET6_IPCOMP=m +CONFIG_IPV6_ILA=m CONFIG_IPV6_VTI=m CONFIG_IPV6_GRE=m CONFIG_NETFILTER=y @@ -174,6 +176,7 @@ CONFIG_IP_SET_HASH_NETIFACE=m CONFIG_IP_SET_LIST_SET=m CONFIG_NF_CONNTRACK_IPV4=m CONFIG_NFT_CHAIN_ROUTE_IPV4=m +CONFIG_NFT_DUP_IPV4=m CONFIG_NF_TABLES_ARP=m CONFIG_NF_LOG_ARP=m CONFIG_NFT_CHAIN_NAT_IPV4=m @@ -201,6 +204,7 @@ CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NF_CONNTRACK_IPV6=m CONFIG_NFT_CHAIN_ROUTE_IPV6=m +CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_CHAIN_NAT_IPV6=m CONFIG_NFT_MASQ_IPV6=m CONFIG_NFT_REDIR_IPV6=m @@ -266,6 +270,7 @@ CONFIG_NETLINK_DIAG=m CONFIG_MPLS=y CONFIG_NET_MPLS_GSO=m CONFIG_MPLS_ROUTING=m +CONFIG_MPLS_IPTUNNEL=m # CONFIG_WIRELESS is not set # CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y @@ -343,6 +348,7 @@ CONFIG_MVME147_NET=y # CONFIG_NET_VENDOR_SAMSUNG is not set # CONFIG_NET_VENDOR_SEEQ is not set # CONFIG_NET_VENDOR_STMICRO is not set +# CONFIG_NET_VENDOR_SYNOPSYS is not set # CONFIG_NET_VENDOR_VIA is not set # CONFIG_NET_VENDOR_WIZNET is not set CONFIG_PPP=m @@ -488,6 +494,7 @@ CONFIG_TEST_USER_COPY=m CONFIG_TEST_BPF=m CONFIG_TEST_FIRMWARE=m CONFIG_TEST_UDELAY=m +CONFIG_TEST_STATIC_KEYS=m CONFIG_EARLY_PRINTK=y CONFIG_ENCRYPTED_KEYS=m CONFIG_CRYPTO_RSA=m diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig index 387c2bd90ff1..c7508c30330c 100644 --- a/arch/m68k/configs/mvme16x_defconfig +++ b/arch/m68k/configs/mvme16x_defconfig @@ -10,6 +10,7 @@ CONFIG_LOG_BUF_SHIFT=16 # CONFIG_PID_NS is not set # CONFIG_NET_NS is not set CONFIG_BLK_DEV_INITRD=y +CONFIG_USERFAULTFD=y CONFIG_SLAB=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y @@ -53,7 +54,6 @@ CONFIG_NET_IPGRE_DEMUX=m CONFIG_NET_IPGRE=m CONFIG_NET_IPVTI=m CONFIG_NET_FOU_IP_TUNNELS=y -CONFIG_GENEVE_CORE=m CONFIG_INET_AH=m CONFIG_INET_ESP=m CONFIG_INET_IPCOMP=m @@ -63,10 +63,12 @@ CONFIG_INET_XFRM_MODE_BEET=m # CONFIG_INET_LRO is not set CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m +CONFIG_IPV6=m CONFIG_IPV6_ROUTER_PREF=y CONFIG_INET6_AH=m CONFIG_INET6_ESP=m CONFIG_INET6_IPCOMP=m +CONFIG_IPV6_ILA=m CONFIG_IPV6_VTI=m CONFIG_IPV6_GRE=m CONFIG_NETFILTER=y @@ -175,6 +177,7 @@ CONFIG_IP_SET_HASH_NETIFACE=m CONFIG_IP_SET_LIST_SET=m CONFIG_NF_CONNTRACK_IPV4=m CONFIG_NFT_CHAIN_ROUTE_IPV4=m +CONFIG_NFT_DUP_IPV4=m CONFIG_NF_TABLES_ARP=m CONFIG_NF_LOG_ARP=m CONFIG_NFT_CHAIN_NAT_IPV4=m @@ -202,6 +205,7 @@ CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NF_CONNTRACK_IPV6=m CONFIG_NFT_CHAIN_ROUTE_IPV6=m +CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_CHAIN_NAT_IPV6=m CONFIG_NFT_MASQ_IPV6=m CONFIG_NFT_REDIR_IPV6=m @@ -267,6 +271,7 @@ CONFIG_NETLINK_DIAG=m CONFIG_MPLS=y CONFIG_NET_MPLS_GSO=m CONFIG_MPLS_ROUTING=m +CONFIG_MPLS_IPTUNNEL=m # CONFIG_WIRELESS is not set # CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y @@ -343,6 +348,7 @@ CONFIG_MVME16x_NET=y # CONFIG_NET_VENDOR_SAMSUNG is not set # CONFIG_NET_VENDOR_SEEQ is not set # CONFIG_NET_VENDOR_STMICRO is not set +# CONFIG_NET_VENDOR_SYNOPSYS is not set # CONFIG_NET_VENDOR_VIA is not set # CONFIG_NET_VENDOR_WIZNET is not set CONFIG_PPP=m @@ -488,6 +494,7 @@ CONFIG_TEST_USER_COPY=m CONFIG_TEST_BPF=m CONFIG_TEST_FIRMWARE=m CONFIG_TEST_UDELAY=m +CONFIG_TEST_STATIC_KEYS=m CONFIG_EARLY_PRINTK=y CONFIG_ENCRYPTED_KEYS=m CONFIG_CRYPTO_RSA=m diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig index 35355c1bc714..64b71664a303 100644 --- a/arch/m68k/configs/q40_defconfig +++ b/arch/m68k/configs/q40_defconfig @@ -10,6 +10,7 @@ CONFIG_LOG_BUF_SHIFT=16 # CONFIG_PID_NS is not set # CONFIG_NET_NS is not set CONFIG_BLK_DEV_INITRD=y +CONFIG_USERFAULTFD=y CONFIG_SLAB=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y @@ -53,7 +54,6 @@ CONFIG_NET_IPGRE_DEMUX=m CONFIG_NET_IPGRE=m CONFIG_NET_IPVTI=m CONFIG_NET_FOU_IP_TUNNELS=y -CONFIG_GENEVE_CORE=m CONFIG_INET_AH=m CONFIG_INET_ESP=m CONFIG_INET_IPCOMP=m @@ -63,10 +63,12 @@ CONFIG_INET_XFRM_MODE_BEET=m # CONFIG_INET_LRO is not set CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m +CONFIG_IPV6=m CONFIG_IPV6_ROUTER_PREF=y CONFIG_INET6_AH=m CONFIG_INET6_ESP=m CONFIG_INET6_IPCOMP=m +CONFIG_IPV6_ILA=m CONFIG_IPV6_VTI=m CONFIG_IPV6_GRE=m CONFIG_NETFILTER=y @@ -175,6 +177,7 @@ CONFIG_IP_SET_HASH_NETIFACE=m CONFIG_IP_SET_LIST_SET=m CONFIG_NF_CONNTRACK_IPV4=m CONFIG_NFT_CHAIN_ROUTE_IPV4=m +CONFIG_NFT_DUP_IPV4=m CONFIG_NF_TABLES_ARP=m CONFIG_NF_LOG_ARP=m CONFIG_NFT_CHAIN_NAT_IPV4=m @@ -202,6 +205,7 @@ CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NF_CONNTRACK_IPV6=m CONFIG_NFT_CHAIN_ROUTE_IPV6=m +CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_CHAIN_NAT_IPV6=m CONFIG_NFT_MASQ_IPV6=m CONFIG_NFT_REDIR_IPV6=m @@ -267,6 +271,7 @@ CONFIG_NETLINK_DIAG=m CONFIG_MPLS=y CONFIG_NET_MPLS_GSO=m CONFIG_MPLS_ROUTING=m +CONFIG_MPLS_IPTUNNEL=m # CONFIG_WIRELESS is not set # CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y @@ -354,6 +359,7 @@ CONFIG_NE2000=y # CONFIG_NET_VENDOR_SEEQ is not set # CONFIG_NET_VENDOR_SMSC is not set # CONFIG_NET_VENDOR_STMICRO is not set +# CONFIG_NET_VENDOR_SYNOPSYS is not set # CONFIG_NET_VENDOR_VIA is not set # CONFIG_NET_VENDOR_WIZNET is not set CONFIG_PLIP=m @@ -510,6 +516,7 @@ CONFIG_TEST_USER_COPY=m CONFIG_TEST_BPF=m CONFIG_TEST_FIRMWARE=m CONFIG_TEST_UDELAY=m +CONFIG_TEST_STATIC_KEYS=m CONFIG_EARLY_PRINTK=y CONFIG_ENCRYPTED_KEYS=m CONFIG_CRYPTO_RSA=m diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig index 8442d267b877..9a4cab78a2ea 100644 --- a/arch/m68k/configs/sun3_defconfig +++ b/arch/m68k/configs/sun3_defconfig @@ -10,6 +10,7 @@ CONFIG_LOG_BUF_SHIFT=16 # CONFIG_PID_NS is not set # CONFIG_NET_NS is not set CONFIG_BLK_DEV_INITRD=y +CONFIG_USERFAULTFD=y CONFIG_SLAB=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y @@ -50,7 +51,6 @@ CONFIG_NET_IPGRE_DEMUX=m CONFIG_NET_IPGRE=m CONFIG_NET_IPVTI=m CONFIG_NET_FOU_IP_TUNNELS=y -CONFIG_GENEVE_CORE=m CONFIG_INET_AH=m CONFIG_INET_ESP=m CONFIG_INET_IPCOMP=m @@ -60,10 +60,12 @@ CONFIG_INET_XFRM_MODE_BEET=m # CONFIG_INET_LRO is not set CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m +CONFIG_IPV6=m CONFIG_IPV6_ROUTER_PREF=y CONFIG_INET6_AH=m CONFIG_INET6_ESP=m CONFIG_INET6_IPCOMP=m +CONFIG_IPV6_ILA=m CONFIG_IPV6_VTI=m CONFIG_IPV6_GRE=m CONFIG_NETFILTER=y @@ -172,6 +174,7 @@ CONFIG_IP_SET_HASH_NETIFACE=m CONFIG_IP_SET_LIST_SET=m CONFIG_NF_CONNTRACK_IPV4=m CONFIG_NFT_CHAIN_ROUTE_IPV4=m +CONFIG_NFT_DUP_IPV4=m CONFIG_NF_TABLES_ARP=m CONFIG_NF_LOG_ARP=m CONFIG_NFT_CHAIN_NAT_IPV4=m @@ -199,6 +202,7 @@ CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NF_CONNTRACK_IPV6=m CONFIG_NFT_CHAIN_ROUTE_IPV6=m +CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_CHAIN_NAT_IPV6=m CONFIG_NFT_MASQ_IPV6=m CONFIG_NFT_REDIR_IPV6=m @@ -264,6 +268,7 @@ CONFIG_NETLINK_DIAG=m CONFIG_MPLS=y CONFIG_NET_MPLS_GSO=m CONFIG_MPLS_ROUTING=m +CONFIG_MPLS_IPTUNNEL=m # CONFIG_WIRELESS is not set # CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y @@ -341,6 +346,7 @@ CONFIG_SUN3_82586=y # CONFIG_NET_VENDOR_SEEQ is not set # CONFIG_NET_VENDOR_STMICRO is not set # CONFIG_NET_VENDOR_SUN is not set +# CONFIG_NET_VENDOR_SYNOPSYS is not set # CONFIG_NET_VENDOR_VIA is not set # CONFIG_NET_VENDOR_WIZNET is not set CONFIG_PPP=m @@ -489,6 +495,7 @@ CONFIG_TEST_USER_COPY=m CONFIG_TEST_BPF=m CONFIG_TEST_FIRMWARE=m CONFIG_TEST_UDELAY=m +CONFIG_TEST_STATIC_KEYS=m CONFIG_ENCRYPTED_KEYS=m CONFIG_CRYPTO_RSA=m CONFIG_CRYPTO_MANAGER=y diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig index 0e1b542e1555..1a2eaac13dbd 100644 --- a/arch/m68k/configs/sun3x_defconfig +++ b/arch/m68k/configs/sun3x_defconfig @@ -10,6 +10,7 @@ CONFIG_LOG_BUF_SHIFT=16 # CONFIG_PID_NS is not set # CONFIG_NET_NS is not set CONFIG_BLK_DEV_INITRD=y +CONFIG_USERFAULTFD=y CONFIG_SLAB=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y @@ -50,7 +51,6 @@ CONFIG_NET_IPGRE_DEMUX=m CONFIG_NET_IPGRE=m CONFIG_NET_IPVTI=m CONFIG_NET_FOU_IP_TUNNELS=y -CONFIG_GENEVE_CORE=m CONFIG_INET_AH=m CONFIG_INET_ESP=m CONFIG_INET_IPCOMP=m @@ -60,10 +60,12 @@ CONFIG_INET_XFRM_MODE_BEET=m # CONFIG_INET_LRO is not set CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m +CONFIG_IPV6=m CONFIG_IPV6_ROUTER_PREF=y CONFIG_INET6_AH=m CONFIG_INET6_ESP=m CONFIG_INET6_IPCOMP=m +CONFIG_IPV6_ILA=m CONFIG_IPV6_VTI=m CONFIG_IPV6_GRE=m CONFIG_NETFILTER=y @@ -172,6 +174,7 @@ CONFIG_IP_SET_HASH_NETIFACE=m CONFIG_IP_SET_LIST_SET=m CONFIG_NF_CONNTRACK_IPV4=m CONFIG_NFT_CHAIN_ROUTE_IPV4=m +CONFIG_NFT_DUP_IPV4=m CONFIG_NF_TABLES_ARP=m CONFIG_NF_LOG_ARP=m CONFIG_NFT_CHAIN_NAT_IPV4=m @@ -199,6 +202,7 @@ CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NF_CONNTRACK_IPV6=m CONFIG_NFT_CHAIN_ROUTE_IPV6=m +CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_CHAIN_NAT_IPV6=m CONFIG_NFT_MASQ_IPV6=m CONFIG_NFT_REDIR_IPV6=m @@ -264,6 +268,7 @@ CONFIG_NETLINK_DIAG=m CONFIG_MPLS=y CONFIG_NET_MPLS_GSO=m CONFIG_MPLS_ROUTING=m +CONFIG_MPLS_IPTUNNEL=m # CONFIG_WIRELESS is not set # CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y @@ -341,6 +346,7 @@ CONFIG_SUN3LANCE=y # CONFIG_NET_VENDOR_SAMSUNG is not set # CONFIG_NET_VENDOR_SEEQ is not set # CONFIG_NET_VENDOR_STMICRO is not set +# CONFIG_NET_VENDOR_SYNOPSYS is not set # CONFIG_NET_VENDOR_VIA is not set # CONFIG_NET_VENDOR_WIZNET is not set CONFIG_PPP=m @@ -489,6 +495,7 @@ CONFIG_TEST_USER_COPY=m CONFIG_TEST_BPF=m CONFIG_TEST_FIRMWARE=m CONFIG_TEST_UDELAY=m +CONFIG_TEST_STATIC_KEYS=m CONFIG_EARLY_PRINTK=y CONFIG_ENCRYPTED_KEYS=m CONFIG_CRYPTO_RSA=m diff --git a/arch/m68k/include/asm/atomic.h b/arch/m68k/include/asm/atomic.h index 039fac120cc0..4858178260f9 100644 --- a/arch/m68k/include/asm/atomic.h +++ b/arch/m68k/include/asm/atomic.h @@ -17,8 +17,8 @@ #define ATOMIC_INIT(i) { (i) } -#define atomic_read(v) ACCESS_ONCE((v)->counter) -#define atomic_set(v, i) (((v)->counter) = i) +#define atomic_read(v) READ_ONCE((v)->counter) +#define atomic_set(v, i) WRITE_ONCE(((v)->counter), (i)) /* * The ColdFire parts cannot do some immediate to memory operations, diff --git a/arch/m68k/include/asm/linkage.h b/arch/m68k/include/asm/linkage.h index 5a822bb790f7..066e74f666ae 100644 --- a/arch/m68k/include/asm/linkage.h +++ b/arch/m68k/include/asm/linkage.h @@ -4,4 +4,34 @@ #define __ALIGN .align 4 #define __ALIGN_STR ".align 4" +/* + * Make sure the compiler doesn't do anything stupid with the + * arguments on the stack - they are owned by the *caller*, not + * the callee. This just fools gcc into not spilling into them, + * and keeps it from doing tailcall recursion and/or using the + * stack slots for temporaries, since they are live and "used" + * all the way to the end of the function. + */ +#define asmlinkage_protect(n, ret, args...) \ + __asmlinkage_protect##n(ret, ##args) +#define __asmlinkage_protect_n(ret, args...) \ + __asm__ __volatile__ ("" : "=r" (ret) : "0" (ret), ##args) +#define __asmlinkage_protect0(ret) \ + __asmlinkage_protect_n(ret) +#define __asmlinkage_protect1(ret, arg1) \ + __asmlinkage_protect_n(ret, "m" (arg1)) +#define __asmlinkage_protect2(ret, arg1, arg2) \ + __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2)) +#define __asmlinkage_protect3(ret, arg1, arg2, arg3) \ + __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3)) +#define __asmlinkage_protect4(ret, arg1, arg2, arg3, arg4) \ + __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3), \ + "m" (arg4)) +#define __asmlinkage_protect5(ret, arg1, arg2, arg3, arg4, arg5) \ + __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3), \ + "m" (arg4), "m" (arg5)) +#define __asmlinkage_protect6(ret, arg1, arg2, arg3, arg4, arg5, arg6) \ + __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3), \ + "m" (arg4), "m" (arg5), "m" (arg6)) + #endif diff --git a/arch/m68k/include/asm/unistd.h b/arch/m68k/include/asm/unistd.h index 244e0dbe45db..0793a7f17417 100644 --- a/arch/m68k/include/asm/unistd.h +++ b/arch/m68k/include/asm/unistd.h @@ -4,7 +4,7 @@ #include <uapi/asm/unistd.h> -#define NR_syscalls 356 +#define NR_syscalls 375 #define __ARCH_WANT_OLD_READDIR #define __ARCH_WANT_OLD_STAT diff --git a/arch/m68k/include/uapi/asm/unistd.h b/arch/m68k/include/uapi/asm/unistd.h index 61fb6cb9d2ae..5e6fae6c275f 100644 --- a/arch/m68k/include/uapi/asm/unistd.h +++ b/arch/m68k/include/uapi/asm/unistd.h @@ -361,5 +361,24 @@ #define __NR_memfd_create 353 #define __NR_bpf 354 #define __NR_execveat 355 +#define __NR_socket 356 +#define __NR_socketpair 357 +#define __NR_bind 358 +#define __NR_connect 359 +#define __NR_listen 360 +#define __NR_accept4 361 +#define __NR_getsockopt 362 +#define __NR_setsockopt 363 +#define __NR_getsockname 364 +#define __NR_getpeername 365 +#define __NR_sendto 366 +#define __NR_sendmsg 367 +#define __NR_recvfrom 368 +#define __NR_recvmsg 369 +#define __NR_shutdown 370 +#define __NR_recvmmsg 371 +#define __NR_sendmmsg 372 +#define __NR_userfaultfd 373 +#define __NR_membarrier 374 #endif /* _UAPI_ASM_M68K_UNISTD_H_ */ diff --git a/arch/m68k/kernel/syscalltable.S b/arch/m68k/kernel/syscalltable.S index a0ec4303f2c8..5dd0e80042f5 100644 --- a/arch/m68k/kernel/syscalltable.S +++ b/arch/m68k/kernel/syscalltable.S @@ -376,4 +376,22 @@ ENTRY(sys_call_table) .long sys_memfd_create .long sys_bpf .long sys_execveat /* 355 */ - + .long sys_socket + .long sys_socketpair + .long sys_bind + .long sys_connect + .long sys_listen /* 360 */ + .long sys_accept4 + .long sys_getsockopt + .long sys_setsockopt + .long sys_getsockname + .long sys_getpeername /* 365 */ + .long sys_sendto + .long sys_sendmsg + .long sys_recvfrom + .long sys_recvmsg + .long sys_shutdown /* 370 */ + .long sys_recvmmsg + .long sys_sendmmsg + .long sys_userfaultfd + .long sys_membarrier diff --git a/arch/m68k/mac/psc.c b/arch/m68k/mac/psc.c index cd38f29955c8..2290c0cae48b 100644 --- a/arch/m68k/mac/psc.c +++ b/arch/m68k/mac/psc.c @@ -29,6 +29,7 @@ int psc_present; volatile __u8 *psc; +EXPORT_SYMBOL_GPL(psc); /* * Debugging dump, used in various places to see what's going on. diff --git a/arch/m68k/sun3/idprom.c b/arch/m68k/sun3/idprom.c index c86ac37d1983..cfe9aa422343 100644 --- a/arch/m68k/sun3/idprom.c +++ b/arch/m68k/sun3/idprom.c @@ -125,8 +125,5 @@ void __init idprom_init(void) display_system_type(idprom->id_machtype); - printk("Ethernet address: %x:%x:%x:%x:%x:%x\n", - idprom->id_ethaddr[0], idprom->id_ethaddr[1], - idprom->id_ethaddr[2], idprom->id_ethaddr[3], - idprom->id_ethaddr[4], idprom->id_ethaddr[5]); + printk("Ethernet address: %pM\n", idprom->id_ethaddr); } diff --git a/arch/metag/include/asm/Kbuild b/arch/metag/include/asm/Kbuild index df31353fd200..29acb89daaaa 100644 --- a/arch/metag/include/asm/Kbuild +++ b/arch/metag/include/asm/Kbuild @@ -54,4 +54,5 @@ generic-y += ucontext.h generic-y += unaligned.h generic-y += user.h generic-y += vga.h +generic-y += word-at-a-time.h generic-y += xor.h diff --git a/arch/metag/include/asm/atomic_lnkget.h b/arch/metag/include/asm/atomic_lnkget.h index 21c4c268b86c..a62581815624 100644 --- a/arch/metag/include/asm/atomic_lnkget.h +++ b/arch/metag/include/asm/atomic_lnkget.h @@ -3,7 +3,7 @@ #define ATOMIC_INIT(i) { (i) } -#define atomic_set(v, i) ((v)->counter = (i)) +#define atomic_set(v, i) WRITE_ONCE((v)->counter, (i)) #include <linux/compiler.h> diff --git a/arch/metag/include/asm/atomic_lock1.h b/arch/metag/include/asm/atomic_lock1.h index f8efe380fe8b..0295d9b8d5bf 100644 --- a/arch/metag/include/asm/atomic_lock1.h +++ b/arch/metag/include/asm/atomic_lock1.h @@ -10,7 +10,7 @@ static inline int atomic_read(const atomic_t *v) { - return (v)->counter; + return READ_ONCE((v)->counter); } /* diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild index 2f222f355c4b..b0ae88c9fed9 100644 --- a/arch/microblaze/include/asm/Kbuild +++ b/arch/microblaze/include/asm/Kbuild @@ -10,3 +10,4 @@ generic-y += mm-arch-hooks.h generic-y += preempt.h generic-y += syscalls.h generic-y += trace_clock.h +generic-y += word-at-a-time.h diff --git a/arch/microblaze/kernel/setup.c b/arch/microblaze/kernel/setup.c index ab5b488e1fde..89a2a9394927 100644 --- a/arch/microblaze/kernel/setup.c +++ b/arch/microblaze/kernel/setup.c @@ -194,7 +194,7 @@ void __init time_init(void) { of_clk_init(NULL); setup_cpuinfo_clk(); - clocksource_of_init(); + clocksource_probe(); } #ifdef CONFIG_DEBUG_FS diff --git a/arch/mips/ath79/irq.c b/arch/mips/ath79/irq.c index 15ecb4831e12..eeb3953ed8ac 100644 --- a/arch/mips/ath79/irq.c +++ b/arch/mips/ath79/irq.c @@ -293,8 +293,26 @@ static int __init ath79_misc_intc_of_init( return 0; } -IRQCHIP_DECLARE(ath79_misc_intc, "qca,ar7100-misc-intc", - ath79_misc_intc_of_init); + +static int __init ar7100_misc_intc_of_init( + struct device_node *node, struct device_node *parent) +{ + ath79_misc_irq_chip.irq_mask_ack = ar71xx_misc_irq_mask; + return ath79_misc_intc_of_init(node, parent); +} + +IRQCHIP_DECLARE(ar7100_misc_intc, "qca,ar7100-misc-intc", + ar7100_misc_intc_of_init); + +static int __init ar7240_misc_intc_of_init( + struct device_node *node, struct device_node *parent) +{ + ath79_misc_irq_chip.irq_ack = ar724x_misc_irq_ack; + return ath79_misc_intc_of_init(node, parent); +} + +IRQCHIP_DECLARE(ar7240_misc_intc, "qca,ar7240-misc-intc", + ar7240_misc_intc_of_init); static int __init ar79_cpu_intc_of_init( struct device_node *node, struct device_node *parent) diff --git a/arch/mips/bcm47xx/Kconfig b/arch/mips/bcm47xx/Kconfig index 51ed599cc894..e970fd9cf769 100644 --- a/arch/mips/bcm47xx/Kconfig +++ b/arch/mips/bcm47xx/Kconfig @@ -4,6 +4,7 @@ config BCM47XX_SSB bool "SSB Support for Broadcom BCM47XX" select SYS_HAS_CPU_BMIPS32_3300 select SSB + select SSB_HOST_SOC select SSB_DRIVER_MIPS select SSB_DRIVER_EXTIF select SSB_EMBEDDED diff --git a/arch/mips/bcm63xx/dev-spi.c b/arch/mips/bcm63xx/dev-spi.c index ad448e41e3bd..232385441e46 100644 --- a/arch/mips/bcm63xx/dev-spi.c +++ b/arch/mips/bcm63xx/dev-spi.c @@ -18,29 +18,6 @@ #include <bcm63xx_dev_spi.h> #include <bcm63xx_regs.h> -/* - * register offsets - */ -static const unsigned long bcm6348_regs_spi[] = { - __GEN_SPI_REGS_TABLE(6348) -}; - -static const unsigned long bcm6358_regs_spi[] = { - __GEN_SPI_REGS_TABLE(6358) -}; - -const unsigned long *bcm63xx_regs_spi; -EXPORT_SYMBOL(bcm63xx_regs_spi); - -static __init void bcm63xx_spi_regs_init(void) -{ - if (BCMCPU_IS_6338() || BCMCPU_IS_6348()) - bcm63xx_regs_spi = bcm6348_regs_spi; - if (BCMCPU_IS_3368() || BCMCPU_IS_6358() || - BCMCPU_IS_6362() || BCMCPU_IS_6368()) - bcm63xx_regs_spi = bcm6358_regs_spi; -} - static struct resource spi_resources[] = { { .start = -1, /* filled at runtime */ @@ -53,19 +30,10 @@ static struct resource spi_resources[] = { }, }; -static struct bcm63xx_spi_pdata spi_pdata = { - .bus_num = 0, - .num_chipselect = 8, -}; - static struct platform_device bcm63xx_spi_device = { - .name = "bcm63xx-spi", .id = -1, .num_resources = ARRAY_SIZE(spi_resources), .resource = spi_resources, - .dev = { - .platform_data = &spi_pdata, - }, }; int __init bcm63xx_spi_register(void) @@ -78,21 +46,15 @@ int __init bcm63xx_spi_register(void) spi_resources[1].start = bcm63xx_get_irq_number(IRQ_SPI); if (BCMCPU_IS_6338() || BCMCPU_IS_6348()) { + bcm63xx_spi_device.name = "bcm6348-spi", spi_resources[0].end += BCM_6348_RSET_SPI_SIZE - 1; - spi_pdata.fifo_size = SPI_6348_MSG_DATA_SIZE; - spi_pdata.msg_type_shift = SPI_6348_MSG_TYPE_SHIFT; - spi_pdata.msg_ctl_width = SPI_6348_MSG_CTL_WIDTH; } if (BCMCPU_IS_3368() || BCMCPU_IS_6358() || BCMCPU_IS_6362() || BCMCPU_IS_6368()) { + bcm63xx_spi_device.name = "bcm6358-spi", spi_resources[0].end += BCM_6358_RSET_SPI_SIZE - 1; - spi_pdata.fifo_size = SPI_6358_MSG_DATA_SIZE; - spi_pdata.msg_type_shift = SPI_6358_MSG_TYPE_SHIFT; - spi_pdata.msg_ctl_width = SPI_6358_MSG_CTL_WIDTH; } - bcm63xx_spi_regs_init(); - return platform_device_register(&bcm63xx_spi_device); } diff --git a/arch/mips/cavium-octeon/octeon-irq.c b/arch/mips/cavium-octeon/octeon-irq.c index 0352bc8d56b3..4f9eb0576884 100644 --- a/arch/mips/cavium-octeon/octeon-irq.c +++ b/arch/mips/cavium-octeon/octeon-irq.c @@ -1094,7 +1094,7 @@ static int octeon_irq_gpio_xlat(struct irq_domain *d, unsigned int pin; unsigned int trigger; - if (d->of_node != node) + if (irq_domain_get_of_node(d) != node) return -EINVAL; if (intsize < 2) @@ -2163,7 +2163,7 @@ static int octeon_irq_cib_map(struct irq_domain *d, if (hw >= host_data->max_bits) { pr_err("ERROR: %s mapping %u is to big!\n", - d->of_node->name, (unsigned)hw); + irq_domain_get_of_node(d)->name, (unsigned)hw); return -EINVAL; } diff --git a/arch/mips/cavium-octeon/setup.c b/arch/mips/cavium-octeon/setup.c index 89a628455bc2..bd634259eab9 100644 --- a/arch/mips/cavium-octeon/setup.c +++ b/arch/mips/cavium-octeon/setup.c @@ -933,7 +933,7 @@ void __init plat_mem_setup(void) while ((boot_mem_map.nr_map < BOOT_MEM_MAP_MAX) && (total < MAX_MEMORY)) { memory = cvmx_bootmem_phy_alloc(mem_alloc_size, - __pa_symbol(&__init_end), -1, + __pa_symbol(&_end), -1, 0x100000, CVMX_BOOTMEM_FLAG_NO_LOCKING); if (memory >= 0) { diff --git a/arch/mips/configs/pistachio_defconfig b/arch/mips/configs/pistachio_defconfig index 642b50946943..8b7429127a1d 100644 --- a/arch/mips/configs/pistachio_defconfig +++ b/arch/mips/configs/pistachio_defconfig @@ -257,7 +257,6 @@ CONFIG_MMC=y CONFIG_MMC_BLOCK_MINORS=16 CONFIG_MMC_TEST=m CONFIG_MMC_DW=y -CONFIG_MMC_DW_IDMAC=y CONFIG_NEW_LEDS=y CONFIG_LEDS_CLASS=y CONFIG_RTC_CLASS=y diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild index 40ec4ca3f946..c7fe4d01e79c 100644 --- a/arch/mips/include/asm/Kbuild +++ b/arch/mips/include/asm/Kbuild @@ -17,4 +17,5 @@ generic-y += segment.h generic-y += serial.h generic-y += trace_clock.h generic-y += user.h +generic-y += word-at-a-time.h generic-y += xor.h diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h index 4c42fd9af777..f82d3af07931 100644 --- a/arch/mips/include/asm/atomic.h +++ b/arch/mips/include/asm/atomic.h @@ -30,7 +30,7 @@ * * Atomically reads the value of @v. */ -#define atomic_read(v) ACCESS_ONCE((v)->counter) +#define atomic_read(v) READ_ONCE((v)->counter) /* * atomic_set - set atomic variable @@ -39,7 +39,7 @@ * * Atomically sets the value of @v to @i. */ -#define atomic_set(v, i) ((v)->counter = (i)) +#define atomic_set(v, i) WRITE_ONCE((v)->counter, (i)) #define ATOMIC_OP(op, c_op, asm_op) \ static __inline__ void atomic_##op(int i, atomic_t * v) \ @@ -315,14 +315,14 @@ static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u) * @v: pointer of type atomic64_t * */ -#define atomic64_read(v) ACCESS_ONCE((v)->counter) +#define atomic64_read(v) READ_ONCE((v)->counter) /* * atomic64_set - set atomic variable * @v: pointer of type atomic64_t * @i: required value */ -#define atomic64_set(v, i) ((v)->counter = (i)) +#define atomic64_set(v, i) WRITE_ONCE((v)->counter, (i)) #define ATOMIC64_OP(op, c_op, asm_op) \ static __inline__ void atomic64_##op(long i, atomic64_t * v) \ diff --git a/arch/mips/include/asm/cpu-features.h b/arch/mips/include/asm/cpu-features.h index 9801ac982655..fe67f12ac239 100644 --- a/arch/mips/include/asm/cpu-features.h +++ b/arch/mips/include/asm/cpu-features.h @@ -20,6 +20,9 @@ #ifndef cpu_has_tlb #define cpu_has_tlb (cpu_data[0].options & MIPS_CPU_TLB) #endif +#ifndef cpu_has_ftlb +#define cpu_has_ftlb (cpu_data[0].options & MIPS_CPU_FTLB) +#endif #ifndef cpu_has_tlbinv #define cpu_has_tlbinv (cpu_data[0].options & MIPS_CPU_TLBINV) #endif diff --git a/arch/mips/include/asm/cpu.h b/arch/mips/include/asm/cpu.h index cd89e9855775..82ad15f11049 100644 --- a/arch/mips/include/asm/cpu.h +++ b/arch/mips/include/asm/cpu.h @@ -385,6 +385,7 @@ enum cpu_type_enum { #define MIPS_CPU_CDMM 0x4000000000ull /* CPU has Common Device Memory Map */ #define MIPS_CPU_BP_GHIST 0x8000000000ull /* R12K+ Branch Prediction Global History */ #define MIPS_CPU_SP 0x10000000000ull /* Small (1KB) page support */ +#define MIPS_CPU_FTLB 0x20000000000ull /* CPU has Fixed-page-size TLB */ /* * CPU ASE encodings diff --git a/arch/mips/include/asm/io.h b/arch/mips/include/asm/io.h index 9e777cd42b67..d10fd80dbb7e 100644 --- a/arch/mips/include/asm/io.h +++ b/arch/mips/include/asm/io.h @@ -256,6 +256,7 @@ static inline void __iomem * __ioremap_mode(phys_addr_t offset, unsigned long si */ #define ioremap_nocache(offset, size) \ __ioremap_mode((offset), (size), _CACHE_UNCACHED) +#define ioremap_uc ioremap_nocache /* * ioremap_cachable - map bus memory into CPU space diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index 5a1a882e0a75..6ded8d347af9 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -847,5 +847,7 @@ static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm, struct kvm_memory_slot *slot) {} static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} +static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {} +static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {} #endif /* __MIPS_KVM_HOST_H__ */ diff --git a/arch/mips/include/asm/maar.h b/arch/mips/include/asm/maar.h index b02891f9caaf..21d9607c80d7 100644 --- a/arch/mips/include/asm/maar.h +++ b/arch/mips/include/asm/maar.h @@ -66,6 +66,15 @@ static inline void write_maar_pair(unsigned idx, phys_addr_t lower, } /** + * maar_init() - initialise MAARs + * + * Performs initialisation of MAARs for the current CPU, making use of the + * platforms implementation of platform_maar_init where necessary and + * duplicating the setup it provides on secondary CPUs. + */ +extern void maar_init(void); + +/** * struct maar_config - MAAR configuration data * @lower: The lowest address that the MAAR pair will affect. Must be * aligned to a 2^16 byte boundary. diff --git a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_dev_spi.h b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_dev_spi.h index 25737655d141..dd299548860d 100644 --- a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_dev_spi.h +++ b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_dev_spi.h @@ -7,48 +7,4 @@ int __init bcm63xx_spi_register(void); -struct bcm63xx_spi_pdata { - unsigned int fifo_size; - unsigned int msg_type_shift; - unsigned int msg_ctl_width; - int bus_num; - int num_chipselect; -}; - -enum bcm63xx_regs_spi { - SPI_CMD, - SPI_INT_STATUS, - SPI_INT_MASK_ST, - SPI_INT_MASK, - SPI_ST, - SPI_CLK_CFG, - SPI_FILL_BYTE, - SPI_MSG_TAIL, - SPI_RX_TAIL, - SPI_MSG_CTL, - SPI_MSG_DATA, - SPI_RX_DATA, -}; - -#define __GEN_SPI_REGS_TABLE(__cpu) \ - [SPI_CMD] = SPI_## __cpu ##_CMD, \ - [SPI_INT_STATUS] = SPI_## __cpu ##_INT_STATUS, \ - [SPI_INT_MASK_ST] = SPI_## __cpu ##_INT_MASK_ST, \ - [SPI_INT_MASK] = SPI_## __cpu ##_INT_MASK, \ - [SPI_ST] = SPI_## __cpu ##_ST, \ - [SPI_CLK_CFG] = SPI_## __cpu ##_CLK_CFG, \ - [SPI_FILL_BYTE] = SPI_## __cpu ##_FILL_BYTE, \ - [SPI_MSG_TAIL] = SPI_## __cpu ##_MSG_TAIL, \ - [SPI_RX_TAIL] = SPI_## __cpu ##_RX_TAIL, \ - [SPI_MSG_CTL] = SPI_## __cpu ##_MSG_CTL, \ - [SPI_MSG_DATA] = SPI_## __cpu ##_MSG_DATA, \ - [SPI_RX_DATA] = SPI_## __cpu ##_RX_DATA, - -static inline unsigned long bcm63xx_spireg(enum bcm63xx_regs_spi reg) -{ - extern const unsigned long *bcm63xx_regs_spi; - - return bcm63xx_regs_spi[reg]; -} - #endif /* BCM63XX_DEV_SPI_H */ diff --git a/arch/mips/include/asm/mips-cm.h b/arch/mips/include/asm/mips-cm.h index d75b75e78ebb..1f1927ab4269 100644 --- a/arch/mips/include/asm/mips-cm.h +++ b/arch/mips/include/asm/mips-cm.h @@ -194,6 +194,7 @@ BUILD_CM_RW(reg3_mask, MIPS_CM_GCB_OFS + 0xc8) BUILD_CM_R_(gic_status, MIPS_CM_GCB_OFS + 0xd0) BUILD_CM_R_(cpc_status, MIPS_CM_GCB_OFS + 0xf0) BUILD_CM_RW(l2_config, MIPS_CM_GCB_OFS + 0x130) +BUILD_CM_RW(sys_config2, MIPS_CM_GCB_OFS + 0x150) /* Core Local & Core Other register accessor functions */ BUILD_CM_Cx_RW(reset_release, 0x00) @@ -316,6 +317,10 @@ BUILD_CM_Cx_R_(tcid_8_priority, 0x80) #define CM_GCR_L2_CONFIG_ASSOC_SHF 0 #define CM_GCR_L2_CONFIG_ASSOC_MSK (_ULCAST_(0xff) << 0) +/* GCR_SYS_CONFIG2 register fields */ +#define CM_GCR_SYS_CONFIG2_MAXVPW_SHF 0 +#define CM_GCR_SYS_CONFIG2_MAXVPW_MSK (_ULCAST_(0xf) << 0) + /* GCR_Cx_COHERENCE register fields */ #define CM_GCR_Cx_COHERENCE_COHDOMAINEN_SHF 0 #define CM_GCR_Cx_COHERENCE_COHDOMAINEN_MSK (_ULCAST_(0xff) << 0) @@ -405,4 +410,38 @@ static inline int mips_cm_revision(void) return read_gcr_rev(); } +/** + * mips_cm_max_vp_width() - return the width in bits of VP indices + * + * Return: the width, in bits, of VP indices in fields that combine core & VP + * indices. + */ +static inline unsigned int mips_cm_max_vp_width(void) +{ + extern int smp_num_siblings; + + if (mips_cm_revision() >= CM_REV_CM3) + return read_gcr_sys_config2() & CM_GCR_SYS_CONFIG2_MAXVPW_MSK; + + return smp_num_siblings; +} + +/** + * mips_cm_vp_id() - calculate the hardware VP ID for a CPU + * @cpu: the CPU whose VP ID to calculate + * + * Hardware such as the GIC uses identifiers for VPs which may not match the + * CPU numbers used by Linux. This function calculates the hardware VP + * identifier corresponding to a given CPU. + * + * Return: the VP ID for the CPU. + */ +static inline unsigned int mips_cm_vp_id(unsigned int cpu) +{ + unsigned int core = cpu_data[cpu].core; + unsigned int vp = cpu_vpe_id(&cpu_data[cpu]); + + return (core * mips_cm_max_vp_width()) + vp; +} + #endif /* __MIPS_ASM_MIPS_CM_H__ */ diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h index d3cd8eac81e3..c64781cf649f 100644 --- a/arch/mips/include/asm/mipsregs.h +++ b/arch/mips/include/asm/mipsregs.h @@ -487,6 +487,8 @@ /* Bits specific to the MIPS32/64 PRA. */ #define MIPS_CONF_MT (_ULCAST_(7) << 7) +#define MIPS_CONF_MT_TLB (_ULCAST_(1) << 7) +#define MIPS_CONF_MT_FTLB (_ULCAST_(4) << 7) #define MIPS_CONF_AR (_ULCAST_(7) << 10) #define MIPS_CONF_AT (_ULCAST_(3) << 13) #define MIPS_CONF_M (_ULCAST_(1) << 31) diff --git a/arch/mips/include/uapi/asm/mman.h b/arch/mips/include/uapi/asm/mman.h index cfcb876cae6b..97c03f468924 100644 --- a/arch/mips/include/uapi/asm/mman.h +++ b/arch/mips/include/uapi/asm/mman.h @@ -61,6 +61,12 @@ */ #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ +#define MCL_ONFAULT 4 /* lock all pages that are faulted in */ + +/* + * Flags for mlock + */ +#define MLOCK_ONFAULT 0x01 /* Lock pages in range after they are faulted in, do not prefault */ #define MADV_NORMAL 0 /* no further special treatment */ #define MADV_RANDOM 1 /* expect random page references */ diff --git a/arch/mips/include/uapi/asm/swab.h b/arch/mips/include/uapi/asm/swab.h index c4ddc4f0d2dc..23cd9b118c9e 100644 --- a/arch/mips/include/uapi/asm/swab.h +++ b/arch/mips/include/uapi/asm/swab.h @@ -13,16 +13,15 @@ #define __SWAB_64_THRU_32__ -#if (defined(__mips_isa_rev) && (__mips_isa_rev >= 2)) || \ - defined(_MIPS_ARCH_LOONGSON3A) +#if !defined(__mips16) && \ + ((defined(__mips_isa_rev) && (__mips_isa_rev >= 2)) || \ + defined(_MIPS_ARCH_LOONGSON3A)) -static inline __attribute__((nomips16)) __attribute_const__ - __u16 __arch_swab16(__u16 x) +static inline __attribute_const__ __u16 __arch_swab16(__u16 x) { __asm__( " .set push \n" " .set arch=mips32r2 \n" - " .set nomips16 \n" " wsbh %0, %1 \n" " .set pop \n" : "=r" (x) @@ -32,13 +31,11 @@ static inline __attribute__((nomips16)) __attribute_const__ } #define __arch_swab16 __arch_swab16 -static inline __attribute__((nomips16)) __attribute_const__ - __u32 __arch_swab32(__u32 x) +static inline __attribute_const__ __u32 __arch_swab32(__u32 x) { __asm__( " .set push \n" " .set arch=mips32r2 \n" - " .set nomips16 \n" " wsbh %0, %1 \n" " rotr %0, %0, 16 \n" " .set pop \n" @@ -54,13 +51,11 @@ static inline __attribute__((nomips16)) __attribute_const__ * 64-bit kernel on r2 CPUs. */ #ifdef __mips64 -static inline __attribute__((nomips16)) __attribute_const__ - __u64 __arch_swab64(__u64 x) +static inline __attribute_const__ __u64 __arch_swab64(__u64 x) { __asm__( " .set push \n" " .set arch=mips64r2 \n" - " .set nomips16 \n" " dsbh %0, %1 \n" " dshd %0, %0 \n" " .set pop \n" @@ -71,5 +66,5 @@ static inline __attribute__((nomips16)) __attribute_const__ } #define __arch_swab64 __arch_swab64 #endif /* __mips64 */ -#endif /* MIPS R2 or newer or Loongson 3A */ +#endif /* (not __mips16) and (MIPS R2 or newer or Loongson 3A) */ #endif /* _ASM_SWAB_H */ diff --git a/arch/mips/include/uapi/asm/unistd.h b/arch/mips/include/uapi/asm/unistd.h index c03088f9f514..cfabadb135d9 100644 --- a/arch/mips/include/uapi/asm/unistd.h +++ b/arch/mips/include/uapi/asm/unistd.h @@ -377,16 +377,18 @@ #define __NR_memfd_create (__NR_Linux + 354) #define __NR_bpf (__NR_Linux + 355) #define __NR_execveat (__NR_Linux + 356) +#define __NR_userfaultfd (__NR_Linux + 357) +#define __NR_membarrier (__NR_Linux + 358) /* * Offset of the last Linux o32 flavoured syscall */ -#define __NR_Linux_syscalls 356 +#define __NR_Linux_syscalls 358 #endif /* _MIPS_SIM == _MIPS_SIM_ABI32 */ #define __NR_O32_Linux 4000 -#define __NR_O32_Linux_syscalls 356 +#define __NR_O32_Linux_syscalls 358 #if _MIPS_SIM == _MIPS_SIM_ABI64 @@ -711,16 +713,18 @@ #define __NR_memfd_create (__NR_Linux + 314) #define __NR_bpf (__NR_Linux + 315) #define __NR_execveat (__NR_Linux + 316) +#define __NR_userfaultfd (__NR_Linux + 317) +#define __NR_membarrier (__NR_Linux + 318) /* * Offset of the last Linux 64-bit flavoured syscall */ -#define __NR_Linux_syscalls 316 +#define __NR_Linux_syscalls 318 #endif /* _MIPS_SIM == _MIPS_SIM_ABI64 */ #define __NR_64_Linux 5000 -#define __NR_64_Linux_syscalls 316 +#define __NR_64_Linux_syscalls 318 #if _MIPS_SIM == _MIPS_SIM_NABI32 @@ -1049,15 +1053,17 @@ #define __NR_memfd_create (__NR_Linux + 318) #define __NR_bpf (__NR_Linux + 319) #define __NR_execveat (__NR_Linux + 320) +#define __NR_userfaultfd (__NR_Linux + 321) +#define __NR_membarrier (__NR_Linux + 322) /* * Offset of the last N32 flavoured syscall */ -#define __NR_Linux_syscalls 320 +#define __NR_Linux_syscalls 322 #endif /* _MIPS_SIM == _MIPS_SIM_NABI32 */ #define __NR_N32_Linux 6000 -#define __NR_N32_Linux_syscalls 320 +#define __NR_N32_Linux_syscalls 322 #endif /* _UAPI_ASM_UNISTD_H */ diff --git a/arch/mips/jz4740/board-qi_lb60.c b/arch/mips/jz4740/board-qi_lb60.c index 4e62bf85d0b0..459cb017306c 100644 --- a/arch/mips/jz4740/board-qi_lb60.c +++ b/arch/mips/jz4740/board-qi_lb60.c @@ -26,6 +26,7 @@ #include <linux/power/jz4740-battery.h> #include <linux/power/gpio-charger.h> +#include <asm/mach-jz4740/gpio.h> #include <asm/mach-jz4740/jz4740_fb.h> #include <asm/mach-jz4740/jz4740_mmc.h> #include <asm/mach-jz4740/jz4740_nand.h> diff --git a/arch/mips/jz4740/gpio.c b/arch/mips/jz4740/gpio.c index a74e181058b0..8c6d76c9b2d6 100644 --- a/arch/mips/jz4740/gpio.c +++ b/arch/mips/jz4740/gpio.c @@ -28,6 +28,7 @@ #include <linux/seq_file.h> #include <asm/mach-jz4740/base.h> +#include <asm/mach-jz4740/gpio.h> #define JZ4740_GPIO_BASE_A (32*0) #define JZ4740_GPIO_BASE_B (32*1) diff --git a/arch/mips/kernel/cps-vec.S b/arch/mips/kernel/cps-vec.S index 9f71c06aebf6..209ded16806b 100644 --- a/arch/mips/kernel/cps-vec.S +++ b/arch/mips/kernel/cps-vec.S @@ -39,6 +39,7 @@ mfc0 \dest, CP0_CONFIG, 3 andi \dest, \dest, MIPS_CONF3_MT beqz \dest, \nomt + nop .endm .section .text.cps-vec @@ -223,10 +224,9 @@ LEAF(excep_ejtag) END(excep_ejtag) LEAF(mips_cps_core_init) -#ifdef CONFIG_MIPS_MT +#ifdef CONFIG_MIPS_MT_SMP /* Check that the core implements the MT ASE */ has_mt t0, 3f - nop .set push .set mips64r2 @@ -310,8 +310,9 @@ LEAF(mips_cps_boot_vpes) PTR_ADDU t0, t0, t1 /* Calculate this VPEs ID. If the core doesn't support MT use 0 */ + li t9, 0 +#ifdef CONFIG_MIPS_MT_SMP has_mt ta2, 1f - li t9, 0 /* Find the number of VPEs present in the core */ mfc0 t1, CP0_MVPCONF0 @@ -330,6 +331,7 @@ LEAF(mips_cps_boot_vpes) /* Retrieve the VPE ID from EBase.CPUNum */ mfc0 t9, $15, 1 and t9, t9, t1 +#endif 1: /* Calculate a pointer to this VPEs struct vpe_boot_config */ li t1, VPEBOOTCFG_SIZE @@ -337,7 +339,7 @@ LEAF(mips_cps_boot_vpes) PTR_L ta3, COREBOOTCFG_VPECONFIG(t0) PTR_ADDU v0, v0, ta3 -#ifdef CONFIG_MIPS_MT +#ifdef CONFIG_MIPS_MT_SMP /* If the core doesn't support MT then return */ bnez ta2, 1f @@ -451,7 +453,7 @@ LEAF(mips_cps_boot_vpes) 2: .set pop -#endif /* CONFIG_MIPS_MT */ +#endif /* CONFIG_MIPS_MT_SMP */ /* Return */ jr ra diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c index 571a8e6ea5bd..09a51d091941 100644 --- a/arch/mips/kernel/cpu-probe.c +++ b/arch/mips/kernel/cpu-probe.c @@ -410,16 +410,18 @@ static int set_ftlb_enable(struct cpuinfo_mips *c, int enable) static inline unsigned int decode_config0(struct cpuinfo_mips *c) { unsigned int config0; - int isa; + int isa, mt; config0 = read_c0_config(); /* * Look for Standard TLB or Dual VTLB and FTLB */ - if ((((config0 & MIPS_CONF_MT) >> 7) == 1) || - (((config0 & MIPS_CONF_MT) >> 7) == 4)) + mt = config0 & MIPS_CONF_MT; + if (mt == MIPS_CONF_MT_TLB) c->options |= MIPS_CPU_TLB; + else if (mt == MIPS_CONF_MT_FTLB) + c->options |= MIPS_CPU_TLB | MIPS_CPU_FTLB; isa = (config0 & MIPS_CONF_AT) >> 13; switch (isa) { @@ -559,15 +561,18 @@ static inline unsigned int decode_config4(struct cpuinfo_mips *c) if (cpu_has_tlb) { if (((config4 & MIPS_CONF4_IE) >> 29) == 2) c->options |= MIPS_CPU_TLBINV; + /* - * This is a bit ugly. R6 has dropped that field from - * config4 and the only valid configuration is VTLB+FTLB so - * set a good value for mmuextdef for that case. + * R6 has dropped the MMUExtDef field from config4. + * On R6 the fields always describe the FTLB, and only if it is + * present according to Config.MT. */ - if (cpu_has_mips_r6) + if (!cpu_has_mips_r6) + mmuextdef = config4 & MIPS_CONF4_MMUEXTDEF; + else if (cpu_has_ftlb) mmuextdef = MIPS_CONF4_MMUEXTDEF_VTLBSIZEEXT; else - mmuextdef = config4 & MIPS_CONF4_MMUEXTDEF; + mmuextdef = 0; switch (mmuextdef) { case MIPS_CONF4_MMUEXTDEF_MMUSIZEEXT: diff --git a/arch/mips/kernel/octeon_switch.S b/arch/mips/kernel/octeon_switch.S index 423ae83af1fb..3375745b9198 100644 --- a/arch/mips/kernel/octeon_switch.S +++ b/arch/mips/kernel/octeon_switch.S @@ -18,7 +18,7 @@ .set pop /* * task_struct *resume(task_struct *prev, task_struct *next, - * struct thread_info *next_ti, int usedfpu) + * struct thread_info *next_ti) */ .align 7 LEAF(resume) @@ -28,30 +28,6 @@ cpu_save_nonscratch a0 LONG_S ra, THREAD_REG31(a0) - /* - * check if we need to save FPU registers - */ - .set push - .set noreorder - beqz a3, 1f - PTR_L t3, TASK_THREAD_INFO(a0) - .set pop - - /* - * clear saved user stack CU1 bit - */ - LONG_L t0, ST_OFF(t3) - li t1, ~ST0_CU1 - and t0, t0, t1 - LONG_S t0, ST_OFF(t3) - - .set push - .set arch=mips64r2 - fpu_save_double a0 t0 t1 # c0_status passed in t0 - # clobbers t1 - .set pop -1: - #if CONFIG_CAVIUM_OCTEON_CVMSEG_SIZE > 0 /* Check if we need to store CVMSEG state */ dmfc0 t0, $11,7 /* CvmMemCtl */ diff --git a/arch/mips/kernel/r2300_switch.S b/arch/mips/kernel/r2300_switch.S index 5087a4b72e6b..ac27ef7d4d0e 100644 --- a/arch/mips/kernel/r2300_switch.S +++ b/arch/mips/kernel/r2300_switch.S @@ -31,18 +31,8 @@ #define ST_OFF (_THREAD_SIZE - 32 - PT_SIZE + PT_STATUS) /* - * FPU context is saved iff the process has used it's FPU in the current - * time slice as indicated by TIF_USEDFPU. In any case, the CU1 bit for user - * space STATUS register should be 0, so that a process *always* starts its - * userland with FPU disabled after each context switch. - * - * FPU will be enabled as soon as the process accesses FPU again, through - * do_cpu() trap. - */ - -/* * task_struct *resume(task_struct *prev, task_struct *next, - * struct thread_info *next_ti, int usedfpu) + * struct thread_info *next_ti) */ LEAF(resume) mfc0 t1, CP0_STATUS @@ -50,22 +40,6 @@ LEAF(resume) cpu_save_nonscratch a0 sw ra, THREAD_REG31(a0) - beqz a3, 1f - - PTR_L t3, TASK_THREAD_INFO(a0) - - /* - * clear saved user stack CU1 bit - */ - lw t0, ST_OFF(t3) - li t1, ~ST0_CU1 - and t0, t0, t1 - sw t0, ST_OFF(t3) - - fpu_save_single a0, t0 # clobbers t0 - -1: - #if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP) PTR_LA t8, __stack_chk_guard LONG_L t9, TASK_STACK_CANARY(a1) diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S index 4cc13508d967..65a74e4f0f45 100644 --- a/arch/mips/kernel/scall32-o32.S +++ b/arch/mips/kernel/scall32-o32.S @@ -36,16 +36,8 @@ NESTED(handle_sys, PT_SIZE, sp) lw t1, PT_EPC(sp) # skip syscall on return subu v0, v0, __NR_O32_Linux # check syscall number - sltiu t0, v0, __NR_O32_Linux_syscalls + 1 addiu t1, 4 # skip to next instruction sw t1, PT_EPC(sp) - beqz t0, illegal_syscall - - sll t0, v0, 2 - la t1, sys_call_table - addu t1, t0 - lw t2, (t1) # syscall routine - beqz t2, illegal_syscall sw a3, PT_R26(sp) # save a3 for syscall restarting @@ -96,6 +88,16 @@ loads_done: li t1, _TIF_WORK_SYSCALL_ENTRY and t0, t1 bnez t0, syscall_trace_entry # -> yes +syscall_common: + sltiu t0, v0, __NR_O32_Linux_syscalls + 1 + beqz t0, illegal_syscall + + sll t0, v0, 2 + la t1, sys_call_table + addu t1, t0 + lw t2, (t1) # syscall routine + + beqz t2, illegal_syscall jalr t2 # Do The Real Thing (TM) @@ -116,7 +118,7 @@ o32_syscall_exit: syscall_trace_entry: SAVE_STATIC - move s0, t2 + move s0, v0 move a0, sp /* @@ -129,27 +131,18 @@ syscall_trace_entry: 1: jal syscall_trace_enter - bltz v0, 2f # seccomp failed? Skip syscall + bltz v0, 1f # seccomp failed? Skip syscall + + move v0, s0 # restore syscall - move t0, s0 RESTORE_STATIC lw a0, PT_R4(sp) # Restore argument registers lw a1, PT_R5(sp) lw a2, PT_R6(sp) lw a3, PT_R7(sp) - jalr t0 - - li t0, -EMAXERRNO - 1 # error? - sltu t0, t0, v0 - sw t0, PT_R7(sp) # set error flag - beqz t0, 1f - - lw t1, PT_R2(sp) # syscall number - negu v0 # error - sw t1, PT_R0(sp) # save it for syscall restarting -1: sw v0, PT_R2(sp) # result + j syscall_common -2: j syscall_exit +1: j syscall_exit /* ------------------------------------------------------------------------ */ @@ -599,3 +592,5 @@ EXPORT(sys_call_table) PTR sys_memfd_create PTR sys_bpf /* 4355 */ PTR sys_execveat + PTR sys_userfaultfd + PTR sys_membarrier diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S index a6f6b762c47a..e732981cf99f 100644 --- a/arch/mips/kernel/scall64-64.S +++ b/arch/mips/kernel/scall64-64.S @@ -39,18 +39,11 @@ NESTED(handle_sys64, PT_SIZE, sp) .set at #endif - dsubu t0, v0, __NR_64_Linux # check syscall number - sltiu t0, t0, __NR_64_Linux_syscalls + 1 #if !defined(CONFIG_MIPS32_O32) && !defined(CONFIG_MIPS32_N32) ld t1, PT_EPC(sp) # skip syscall on return daddiu t1, 4 # skip to next instruction sd t1, PT_EPC(sp) #endif - beqz t0, illegal_syscall - - dsll t0, v0, 3 # offset into table - ld t2, (sys_call_table - (__NR_64_Linux * 8))(t0) - # syscall routine sd a3, PT_R26(sp) # save a3 for syscall restarting @@ -59,6 +52,17 @@ NESTED(handle_sys64, PT_SIZE, sp) and t0, t1, t0 bnez t0, syscall_trace_entry +syscall_common: + dsubu t2, v0, __NR_64_Linux + sltiu t0, t2, __NR_64_Linux_syscalls + 1 + beqz t0, illegal_syscall + + dsll t0, t2, 3 # offset into table + dla t2, sys_call_table + daddu t0, t2, t0 + ld t2, (t0) # syscall routine + beqz t2, illegal_syscall + jalr t2 # Do The Real Thing (TM) li t0, -EMAXERRNO - 1 # error? @@ -78,14 +82,14 @@ n64_syscall_exit: syscall_trace_entry: SAVE_STATIC - move s0, t2 + move s0, v0 move a0, sp move a1, v0 jal syscall_trace_enter - bltz v0, 2f # seccomp failed? Skip syscall + bltz v0, 1f # seccomp failed? Skip syscall - move t0, s0 + move v0, s0 RESTORE_STATIC ld a0, PT_R4(sp) # Restore argument registers ld a1, PT_R5(sp) @@ -93,19 +97,9 @@ syscall_trace_entry: ld a3, PT_R7(sp) ld a4, PT_R8(sp) ld a5, PT_R9(sp) - jalr t0 - - li t0, -EMAXERRNO - 1 # error? - sltu t0, t0, v0 - sd t0, PT_R7(sp) # set error flag - beqz t0, 1f - - ld t1, PT_R2(sp) # syscall number - dnegu v0 # error - sd t1, PT_R0(sp) # save it for syscall restarting -1: sd v0, PT_R2(sp) # result + j syscall_common -2: j syscall_exit +1: j syscall_exit illegal_syscall: /* This also isn't a 64-bit syscall, throw an error. */ @@ -436,4 +430,6 @@ EXPORT(sys_call_table) PTR sys_memfd_create PTR sys_bpf /* 5315 */ PTR sys_execveat + PTR sys_userfaultfd + PTR sys_membarrier .size sys_call_table,.-sys_call_table diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S index 4b2010654c46..c79484397584 100644 --- a/arch/mips/kernel/scall64-n32.S +++ b/arch/mips/kernel/scall64-n32.S @@ -52,6 +52,7 @@ NESTED(handle_sysn32, PT_SIZE, sp) and t0, t1, t0 bnez t0, n32_syscall_trace_entry +syscall_common: jalr t2 # Do The Real Thing (TM) li t0, -EMAXERRNO - 1 # error? @@ -75,9 +76,9 @@ n32_syscall_trace_entry: move a1, v0 jal syscall_trace_enter - bltz v0, 2f # seccomp failed? Skip syscall + bltz v0, 1f # seccomp failed? Skip syscall - move t0, s0 + move t2, s0 RESTORE_STATIC ld a0, PT_R4(sp) # Restore argument registers ld a1, PT_R5(sp) @@ -85,19 +86,9 @@ n32_syscall_trace_entry: ld a3, PT_R7(sp) ld a4, PT_R8(sp) ld a5, PT_R9(sp) - jalr t0 + j syscall_common - li t0, -EMAXERRNO - 1 # error? - sltu t0, t0, v0 - sd t0, PT_R7(sp) # set error flag - beqz t0, 1f - - ld t1, PT_R2(sp) # syscall number - dnegu v0 # error - sd t1, PT_R0(sp) # save it for syscall restarting -1: sd v0, PT_R2(sp) # result - -2: j syscall_exit +1: j syscall_exit not_n32_scall: /* This is not an n32 compatibility syscall, pass it on to @@ -429,4 +420,6 @@ EXPORT(sysn32_call_table) PTR sys_memfd_create PTR sys_bpf PTR compat_sys_execveat /* 6320 */ + PTR sys_userfaultfd + PTR sys_membarrier .size sysn32_call_table,.-sysn32_call_table diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S index f543ff4feef9..6369cfd390c6 100644 --- a/arch/mips/kernel/scall64-o32.S +++ b/arch/mips/kernel/scall64-o32.S @@ -87,6 +87,7 @@ loads_done: and t0, t1, t0 bnez t0, trace_a_syscall +syscall_common: jalr t2 # Do The Real Thing (TM) li t0, -EMAXERRNO - 1 # error? @@ -130,9 +131,9 @@ trace_a_syscall: 1: jal syscall_trace_enter - bltz v0, 2f # seccomp failed? Skip syscall + bltz v0, 1f # seccomp failed? Skip syscall - move t0, s0 + move t2, s0 RESTORE_STATIC ld a0, PT_R4(sp) # Restore argument registers ld a1, PT_R5(sp) @@ -142,19 +143,9 @@ trace_a_syscall: ld a5, PT_R9(sp) ld a6, PT_R10(sp) ld a7, PT_R11(sp) # For indirect syscalls - jalr t0 + j syscall_common - li t0, -EMAXERRNO - 1 # error? - sltu t0, t0, v0 - sd t0, PT_R7(sp) # set error flag - beqz t0, 1f - - ld t1, PT_R2(sp) # syscall number - dnegu v0 # error - sd t1, PT_R0(sp) # save it for syscall restarting -1: sd v0, PT_R2(sp) # result - -2: j syscall_exit +1: j syscall_exit /* ------------------------------------------------------------------------ */ @@ -584,4 +575,6 @@ EXPORT(sys32_call_table) PTR sys_memfd_create PTR sys_bpf /* 4355 */ PTR compat_sys_execveat + PTR sys_userfaultfd + PTR sys_membarrier .size sys32_call_table,.-sys32_call_table diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index 35b8316002f8..479515109e5b 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -338,7 +338,7 @@ static void __init bootmem_init(void) if (end <= reserved_end) continue; #ifdef CONFIG_BLK_DEV_INITRD - /* mapstart should be after initrd_end */ + /* Skip zones before initrd and initrd itself */ if (initrd_end && end <= (unsigned long)PFN_UP(__pa(initrd_end))) continue; #endif @@ -371,6 +371,14 @@ static void __init bootmem_init(void) max_low_pfn = PFN_DOWN(HIGHMEM_START); } +#ifdef CONFIG_BLK_DEV_INITRD + /* + * mapstart should be after initrd_end + */ + if (initrd_end) + mapstart = max(mapstart, (unsigned long)PFN_UP(__pa(initrd_end))); +#endif + /* * Initialize the boot-time allocator with low memory only. */ diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c index a31896c33716..bd4385a8e6e8 100644 --- a/arch/mips/kernel/smp.c +++ b/arch/mips/kernel/smp.c @@ -42,6 +42,7 @@ #include <asm/mmu_context.h> #include <asm/time.h> #include <asm/setup.h> +#include <asm/maar.h> cpumask_t cpu_callin_map; /* Bitmask of started secondaries */ @@ -157,6 +158,7 @@ asmlinkage void start_secondary(void) mips_clockevent_init(); mp_ops->init_secondary(); cpu_report(); + maar_init(); /* * XXX parity protection should be folded in here when it's converted diff --git a/arch/mips/loongson64/common/env.c b/arch/mips/loongson64/common/env.c index f6c44dd332e2..d6d07ad56180 100644 --- a/arch/mips/loongson64/common/env.c +++ b/arch/mips/loongson64/common/env.c @@ -64,6 +64,9 @@ void __init prom_init_env(void) } if (memsize == 0) memsize = 256; + + loongson_sysconf.nr_uarts = 1; + pr_info("memsize=%u, highmemsize=%u\n", memsize, highmemsize); #else struct boot_params *boot_p; diff --git a/arch/mips/mm/dma-default.c b/arch/mips/mm/dma-default.c index a914dc1cb6d1..d8117be729a2 100644 --- a/arch/mips/mm/dma-default.c +++ b/arch/mips/mm/dma-default.c @@ -100,7 +100,7 @@ static gfp_t massage_gfp_flags(const struct device *dev, gfp_t gfp) else #endif #if defined(CONFIG_ZONE_DMA) && !defined(CONFIG_ZONE_DMA32) - if (dev->coherent_dma_mask < DMA_BIT_MASK(64)) + if (dev->coherent_dma_mask < DMA_BIT_MASK(sizeof(phys_addr_t) * 8)) dma_flag = __GFP_DMA; else #endif diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index 66d0f49c5bec..8770e619185e 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -44,6 +44,7 @@ #include <asm/pgalloc.h> #include <asm/tlb.h> #include <asm/fixmap.h> +#include <asm/maar.h> /* * We have up to 8 empty zeroed pages so we can map one of the right colour @@ -252,6 +253,119 @@ void __init fixrange_init(unsigned long start, unsigned long end, #endif } +unsigned __weak platform_maar_init(unsigned num_pairs) +{ + struct maar_config cfg[BOOT_MEM_MAP_MAX]; + unsigned i, num_configured, num_cfg = 0; + phys_addr_t skip; + + for (i = 0; i < boot_mem_map.nr_map; i++) { + switch (boot_mem_map.map[i].type) { + case BOOT_MEM_RAM: + case BOOT_MEM_INIT_RAM: + break; + default: + continue; + } + + skip = 0x10000 - (boot_mem_map.map[i].addr & 0xffff); + + cfg[num_cfg].lower = boot_mem_map.map[i].addr; + cfg[num_cfg].lower += skip; + + cfg[num_cfg].upper = cfg[num_cfg].lower; + cfg[num_cfg].upper += boot_mem_map.map[i].size - 1; + cfg[num_cfg].upper -= skip; + + cfg[num_cfg].attrs = MIPS_MAAR_S; + num_cfg++; + } + + num_configured = maar_config(cfg, num_cfg, num_pairs); + if (num_configured < num_cfg) + pr_warn("Not enough MAAR pairs (%u) for all bootmem regions (%u)\n", + num_pairs, num_cfg); + + return num_configured; +} + +void maar_init(void) +{ + unsigned num_maars, used, i; + phys_addr_t lower, upper, attr; + static struct { + struct maar_config cfgs[3]; + unsigned used; + } recorded = { { { 0 } }, 0 }; + + if (!cpu_has_maar) + return; + + /* Detect the number of MAARs */ + write_c0_maari(~0); + back_to_back_c0_hazard(); + num_maars = read_c0_maari() + 1; + + /* MAARs should be in pairs */ + WARN_ON(num_maars % 2); + + /* Set MAARs using values we recorded already */ + if (recorded.used) { + used = maar_config(recorded.cfgs, recorded.used, num_maars / 2); + BUG_ON(used != recorded.used); + } else { + /* Configure the required MAARs */ + used = platform_maar_init(num_maars / 2); + } + + /* Disable any further MAARs */ + for (i = (used * 2); i < num_maars; i++) { + write_c0_maari(i); + back_to_back_c0_hazard(); + write_c0_maar(0); + back_to_back_c0_hazard(); + } + + if (recorded.used) + return; + + pr_info("MAAR configuration:\n"); + for (i = 0; i < num_maars; i += 2) { + write_c0_maari(i); + back_to_back_c0_hazard(); + upper = read_c0_maar(); + + write_c0_maari(i + 1); + back_to_back_c0_hazard(); + lower = read_c0_maar(); + + attr = lower & upper; + lower = (lower & MIPS_MAAR_ADDR) << 4; + upper = ((upper & MIPS_MAAR_ADDR) << 4) | 0xffff; + + pr_info(" [%d]: ", i / 2); + if (!(attr & MIPS_MAAR_V)) { + pr_cont("disabled\n"); + continue; + } + + pr_cont("%pa-%pa", &lower, &upper); + + if (attr & MIPS_MAAR_S) + pr_cont(" speculate"); + + pr_cont("\n"); + + /* Record the setup for use on secondary CPUs */ + if (used <= ARRAY_SIZE(recorded.cfgs)) { + recorded.cfgs[recorded.used].lower = lower; + recorded.cfgs[recorded.used].upper = upper; + recorded.cfgs[recorded.used].attrs = attr; + recorded.used++; + } + } +} + #ifndef CONFIG_NEED_MULTIPLE_NODES int page_is_ram(unsigned long pagenr) { @@ -334,69 +448,6 @@ static inline void mem_init_free_highmem(void) #endif } -unsigned __weak platform_maar_init(unsigned num_pairs) -{ - struct maar_config cfg[BOOT_MEM_MAP_MAX]; - unsigned i, num_configured, num_cfg = 0; - phys_addr_t skip; - - for (i = 0; i < boot_mem_map.nr_map; i++) { - switch (boot_mem_map.map[i].type) { - case BOOT_MEM_RAM: - case BOOT_MEM_INIT_RAM: - break; - default: - continue; - } - - skip = 0x10000 - (boot_mem_map.map[i].addr & 0xffff); - - cfg[num_cfg].lower = boot_mem_map.map[i].addr; - cfg[num_cfg].lower += skip; - - cfg[num_cfg].upper = cfg[num_cfg].lower; - cfg[num_cfg].upper += boot_mem_map.map[i].size - 1; - cfg[num_cfg].upper -= skip; - - cfg[num_cfg].attrs = MIPS_MAAR_S; - num_cfg++; - } - - num_configured = maar_config(cfg, num_cfg, num_pairs); - if (num_configured < num_cfg) - pr_warn("Not enough MAAR pairs (%u) for all bootmem regions (%u)\n", - num_pairs, num_cfg); - - return num_configured; -} - -static void maar_init(void) -{ - unsigned num_maars, used, i; - - if (!cpu_has_maar) - return; - - /* Detect the number of MAARs */ - write_c0_maari(~0); - back_to_back_c0_hazard(); - num_maars = read_c0_maari() + 1; - - /* MAARs should be in pairs */ - WARN_ON(num_maars % 2); - - /* Configure the required MAARs */ - used = platform_maar_init(num_maars / 2); - - /* Disable any further MAARs */ - for (i = (used * 2); i < num_maars; i++) { - write_c0_maari(i); - back_to_back_c0_hazard(); - write_c0_maar(0); - back_to_back_c0_hazard(); - } -} - void __init mem_init(void) { #ifdef CONFIG_HIGHMEM diff --git a/arch/mips/mti-sead3/Makefile b/arch/mips/mti-sead3/Makefile index 2e52cbd20ceb..7a584e0bf933 100644 --- a/arch/mips/mti-sead3/Makefile +++ b/arch/mips/mti-sead3/Makefile @@ -12,6 +12,4 @@ obj-y := sead3-lcd.o sead3-display.o sead3-init.o \ sead3-int.o sead3-platform.o sead3-reset.o \ sead3-setup.o sead3-time.o -obj-y += leds-sead3.o - obj-$(CONFIG_EARLY_PRINTK) += sead3-console.o diff --git a/arch/mips/mti-sead3/leds-sead3.c b/arch/mips/mti-sead3/leds-sead3.c deleted file mode 100644 index c938ceeb8848..000000000000 --- a/arch/mips/mti-sead3/leds-sead3.c +++ /dev/null @@ -1,77 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. - * Copyright (C) 2015 Imagination Technologies, Inc. - */ -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/init.h> -#include <linux/platform_device.h> -#include <linux/leds.h> -#include <linux/err.h> -#include <linux/io.h> - -#include <asm/mips-boards/sead3-addr.h> - -static void sead3_pled_set(struct led_classdev *led_cdev, - enum led_brightness value) -{ - writel(value, (void __iomem *)SEAD3_CPLD_P_LED); -} - -static void sead3_fled_set(struct led_classdev *led_cdev, - enum led_brightness value) -{ - writel(value, (void __iomem *)SEAD3_CPLD_F_LED); -} - -static struct led_classdev sead3_pled = { - .name = "sead3::pled", - .brightness_set = sead3_pled_set, - .flags = LED_CORE_SUSPENDRESUME, -}; - -static struct led_classdev sead3_fled = { - .name = "sead3::fled", - .brightness_set = sead3_fled_set, - .flags = LED_CORE_SUSPENDRESUME, -}; - -static int sead3_led_probe(struct platform_device *pdev) -{ - int ret; - - ret = led_classdev_register(&pdev->dev, &sead3_pled); - if (ret < 0) - return ret; - - ret = led_classdev_register(&pdev->dev, &sead3_fled); - if (ret < 0) - led_classdev_unregister(&sead3_pled); - - return ret; -} - -static int sead3_led_remove(struct platform_device *pdev) -{ - led_classdev_unregister(&sead3_pled); - led_classdev_unregister(&sead3_fled); - return 0; -} - -static struct platform_driver sead3_led_driver = { - .probe = sead3_led_probe, - .remove = sead3_led_remove, - .driver = { - .name = "sead3-led", - }, -}; - -module_platform_driver(sead3_led_driver); - -MODULE_AUTHOR("Kristian Kielhofner <kris@krisk.org>"); -MODULE_DESCRIPTION("SEAD3 LED driver"); -MODULE_LICENSE("GPL"); diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c index 0c4a133f6216..77cb27309db2 100644 --- a/arch/mips/net/bpf_jit.c +++ b/arch/mips/net/bpf_jit.c @@ -1251,7 +1251,7 @@ void bpf_jit_compile(struct bpf_prog *fp) bpf_jit_dump(fp->len, alloc_size, 2, ctx.target); fp->bpf_func = (void *)ctx.target; - fp->jited = true; + fp->jited = 1; out: kfree(ctx.offsets); diff --git a/arch/mips/net/bpf_jit_asm.S b/arch/mips/net/bpf_jit_asm.S index e92726099be0..5d2e0c8d29c0 100644 --- a/arch/mips/net/bpf_jit_asm.S +++ b/arch/mips/net/bpf_jit_asm.S @@ -57,15 +57,28 @@ LEAF(sk_load_word) is_offset_negative(word) - .globl sk_load_word_positive -sk_load_word_positive: +FEXPORT(sk_load_word_positive) is_offset_in_header(4, word) /* Offset within header boundaries */ PTR_ADDU t1, $r_skb_data, offset + .set reorder lw $r_A, 0(t1) + .set noreorder #ifdef CONFIG_CPU_LITTLE_ENDIAN +# if defined(__mips_isa_rev) && (__mips_isa_rev >= 2) wsbh t0, $r_A rotr $r_A, t0, 16 +# else + sll t0, $r_A, 24 + srl t1, $r_A, 24 + srl t2, $r_A, 8 + or t0, t0, t1 + andi t2, t2, 0xff00 + andi t1, $r_A, 0xff00 + or t0, t0, t2 + sll t1, t1, 8 + or $r_A, t0, t1 +# endif #endif jr $r_ra move $r_ret, zero @@ -73,15 +86,24 @@ sk_load_word_positive: LEAF(sk_load_half) is_offset_negative(half) - .globl sk_load_half_positive -sk_load_half_positive: +FEXPORT(sk_load_half_positive) is_offset_in_header(2, half) /* Offset within header boundaries */ PTR_ADDU t1, $r_skb_data, offset + .set reorder lh $r_A, 0(t1) + .set noreorder #ifdef CONFIG_CPU_LITTLE_ENDIAN +# if defined(__mips_isa_rev) && (__mips_isa_rev >= 2) wsbh t0, $r_A seh $r_A, t0 +# else + sll t0, $r_A, 24 + andi t1, $r_A, 0xff00 + sra t0, t0, 16 + srl t1, t1, 8 + or $r_A, t0, t1 +# endif #endif jr $r_ra move $r_ret, zero @@ -89,8 +111,7 @@ sk_load_half_positive: LEAF(sk_load_byte) is_offset_negative(byte) - .globl sk_load_byte_positive -sk_load_byte_positive: +FEXPORT(sk_load_byte_positive) is_offset_in_header(1, byte) /* Offset within header boundaries */ PTR_ADDU t1, $r_skb_data, offset @@ -148,23 +169,47 @@ sk_load_byte_positive: NESTED(bpf_slow_path_word, (6 * SZREG), $r_sp) bpf_slow_path_common(4) #ifdef CONFIG_CPU_LITTLE_ENDIAN +# if defined(__mips_isa_rev) && (__mips_isa_rev >= 2) wsbh t0, $r_s0 jr $r_ra rotr $r_A, t0, 16 -#endif +# else + sll t0, $r_s0, 24 + srl t1, $r_s0, 24 + srl t2, $r_s0, 8 + or t0, t0, t1 + andi t2, t2, 0xff00 + andi t1, $r_s0, 0xff00 + or t0, t0, t2 + sll t1, t1, 8 + jr $r_ra + or $r_A, t0, t1 +# endif +#else jr $r_ra - move $r_A, $r_s0 + move $r_A, $r_s0 +#endif END(bpf_slow_path_word) NESTED(bpf_slow_path_half, (6 * SZREG), $r_sp) bpf_slow_path_common(2) #ifdef CONFIG_CPU_LITTLE_ENDIAN +# if defined(__mips_isa_rev) && (__mips_isa_rev >= 2) jr $r_ra wsbh $r_A, $r_s0 -#endif +# else + sll t0, $r_s0, 8 + andi t1, $r_s0, 0xff00 + andi t0, t0, 0xff00 + srl t1, t1, 8 + jr $r_ra + or $r_A, t0, t1 +# endif +#else jr $r_ra move $r_A, $r_s0 +#endif END(bpf_slow_path_half) diff --git a/arch/mips/pistachio/time.c b/arch/mips/pistachio/time.c index 8a377346f0ca..1022201b2beb 100644 --- a/arch/mips/pistachio/time.c +++ b/arch/mips/pistachio/time.c @@ -39,7 +39,7 @@ void __init plat_time_init(void) struct clk *clk; of_clk_init(NULL); - clocksource_of_init(); + clocksource_probe(); np = of_get_cpu_node(0, NULL); if (!np) { diff --git a/arch/mips/ralink/clk.c b/arch/mips/ralink/clk.c index feb5a9bf98b4..25c4a61779f1 100644 --- a/arch/mips/ralink/clk.c +++ b/arch/mips/ralink/clk.c @@ -75,5 +75,5 @@ void __init plat_time_init(void) pr_info("CPU Clock: %ldMHz\n", clk_get_rate(clk) / 1000000); mips_hpt_frequency = clk_get_rate(clk) / 2; clk_put(clk); - clocksource_of_init(); + clocksource_probe(); } diff --git a/arch/mips/txx9/generic/spi_eeprom.c b/arch/mips/txx9/generic/spi_eeprom.c index 3dbad99d5611..d833dd2c9b55 100644 --- a/arch/mips/txx9/generic/spi_eeprom.c +++ b/arch/mips/txx9/generic/spi_eeprom.c @@ -80,7 +80,6 @@ static int __init early_seeprom_probe(struct spi_device *spi) static struct spi_driver early_seeprom_driver __initdata = { .driver = { .name = "at25", - .owner = THIS_MODULE, }, .probe = early_seeprom_probe, }; diff --git a/arch/mn10300/include/asm/Kbuild b/arch/mn10300/include/asm/Kbuild index 6edb9ee6128e..1c8dd0f5cd5d 100644 --- a/arch/mn10300/include/asm/Kbuild +++ b/arch/mn10300/include/asm/Kbuild @@ -9,3 +9,4 @@ generic-y += mm-arch-hooks.h generic-y += preempt.h generic-y += sections.h generic-y += trace_clock.h +generic-y += word-at-a-time.h diff --git a/arch/mn10300/include/asm/atomic.h b/arch/mn10300/include/asm/atomic.h index 375e59140c9c..ce318d5ab23b 100644 --- a/arch/mn10300/include/asm/atomic.h +++ b/arch/mn10300/include/asm/atomic.h @@ -34,7 +34,7 @@ * * Atomically reads the value of @v. Note that the guaranteed */ -#define atomic_read(v) (ACCESS_ONCE((v)->counter)) +#define atomic_read(v) READ_ONCE((v)->counter) /** * atomic_set - set atomic variable @@ -43,7 +43,7 @@ * * Atomically sets the value of @v to @i. Note that the guaranteed */ -#define atomic_set(v, i) (((v)->counter) = (i)) +#define atomic_set(v, i) WRITE_ONCE(((v)->counter), (i)) #define ATOMIC_OP(op) \ static inline void atomic_##op(int i, atomic_t *v) \ diff --git a/arch/nios2/include/asm/Kbuild b/arch/nios2/include/asm/Kbuild index 914864eb5a25..d63330e88379 100644 --- a/arch/nios2/include/asm/Kbuild +++ b/arch/nios2/include/asm/Kbuild @@ -61,4 +61,5 @@ generic-y += types.h generic-y += unaligned.h generic-y += user.h generic-y += vga.h +generic-y += word-at-a-time.h generic-y += xor.h diff --git a/arch/nios2/kernel/time.c b/arch/nios2/kernel/time.c index bbc3f9157f9c..e835dda2bfe2 100644 --- a/arch/nios2/kernel/time.c +++ b/arch/nios2/kernel/time.c @@ -324,7 +324,7 @@ void __init time_init(void) if (count < 2) panic("%d timer is found, it needs 2 timers in system\n", count); - clocksource_of_init(); + clocksource_probe(); } CLOCKSOURCE_OF_DECLARE(nios2_timer, ALTR_TIMER_COMPATIBLE, nios2_time_init); diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h index 2536965d00ea..1d109990a022 100644 --- a/arch/parisc/include/asm/atomic.h +++ b/arch/parisc/include/asm/atomic.h @@ -67,7 +67,7 @@ static __inline__ void atomic_set(atomic_t *v, int i) static __inline__ int atomic_read(const atomic_t *v) { - return ACCESS_ONCE((v)->counter); + return READ_ONCE((v)->counter); } /* exported interface */ diff --git a/arch/parisc/include/asm/cache.h b/arch/parisc/include/asm/cache.h index 47f11c707b65..3d0e17bcc8e9 100644 --- a/arch/parisc/include/asm/cache.h +++ b/arch/parisc/include/asm/cache.h @@ -7,20 +7,12 @@ /* - * PA 2.0 processors have 64-byte cachelines; PA 1.1 processors have - * 32-byte cachelines. The default configuration is not for SMP anyway, - * so if you're building for SMP, you should select the appropriate - * processor type. There is a potential livelock danger when running - * a machine with this value set too small, but it's more probable you'll - * just ruin performance. + * PA 2.0 processors have 64 and 128-byte L2 cachelines; PA 1.1 processors + * have 32-byte cachelines. The L1 length appears to be 16 bytes but this + * is not clearly documented. */ -#ifdef CONFIG_PA20 -#define L1_CACHE_BYTES 64 -#define L1_CACHE_SHIFT 6 -#else -#define L1_CACHE_BYTES 32 -#define L1_CACHE_SHIFT 5 -#endif +#define L1_CACHE_BYTES 16 +#define L1_CACHE_SHIFT 4 #ifndef __ASSEMBLY__ diff --git a/arch/parisc/include/uapi/asm/mman.h b/arch/parisc/include/uapi/asm/mman.h index 294d251ca7b2..ecc3ae1ca28e 100644 --- a/arch/parisc/include/uapi/asm/mman.h +++ b/arch/parisc/include/uapi/asm/mman.h @@ -31,6 +31,9 @@ #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ +#define MCL_ONFAULT 4 /* lock all pages that are faulted in */ + +#define MLOCK_ONFAULT 0x01 /* Lock pages in range after they are faulted in, do not prefault */ #define MADV_NORMAL 0 /* no further special treatment */ #define MADV_RANDOM 1 /* expect random page references */ diff --git a/arch/parisc/include/uapi/asm/unistd.h b/arch/parisc/include/uapi/asm/unistd.h index 2e639d7604f6..33170384d3ac 100644 --- a/arch/parisc/include/uapi/asm/unistd.h +++ b/arch/parisc/include/uapi/asm/unistd.h @@ -358,8 +358,10 @@ #define __NR_memfd_create (__NR_Linux + 340) #define __NR_bpf (__NR_Linux + 341) #define __NR_execveat (__NR_Linux + 342) +#define __NR_membarrier (__NR_Linux + 343) +#define __NR_userfaultfd (__NR_Linux + 344) -#define __NR_Linux_syscalls (__NR_execveat + 1) +#define __NR_Linux_syscalls (__NR_userfaultfd + 1) #define __IGNORE_select /* newselect */ diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S index 8eefb12d1d33..78c3ef8c348d 100644 --- a/arch/parisc/kernel/syscall_table.S +++ b/arch/parisc/kernel/syscall_table.S @@ -438,6 +438,8 @@ ENTRY_SAME(memfd_create) /* 340 */ ENTRY_SAME(bpf) ENTRY_COMP(execveat) + ENTRY_SAME(membarrier) + ENTRY_SAME(userfaultfd) .ifne (. - 90b) - (__NR_Linux_syscalls * (91b - 90b)) diff --git a/arch/powerpc/boot/dts/fsl/p1022si-post.dtsi b/arch/powerpc/boot/dts/fsl/p1022si-post.dtsi index 426bf4103b9e..5f51b7bfc064 100644 --- a/arch/powerpc/boot/dts/fsl/p1022si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/p1022si-post.dtsi @@ -224,10 +224,12 @@ /include/ "pq3-etsec2-0.dtsi" enet0: enet0_grp2: ethernet@b0000 { + fsl,wake-on-filer; }; /include/ "pq3-etsec2-1.dtsi" enet1: enet1_grp2: ethernet@b1000 { + fsl,wake-on-filer; }; global-utilities@e0000 { diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig index 6bc0ee4b1070..2c041b535a64 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig @@ -111,7 +111,7 @@ CONFIG_SCSI_QLA_FC=m CONFIG_SCSI_QLA_ISCSI=m CONFIG_SCSI_LPFC=m CONFIG_SCSI_VIRTIO=m -CONFIG_SCSI_DH=m +CONFIG_SCSI_DH=y CONFIG_SCSI_DH_RDAC=m CONFIG_SCSI_DH_ALUA=m CONFIG_ATA=y diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig index 7991f37e5fe2..36871a4bfa54 100644 --- a/arch/powerpc/configs/pseries_defconfig +++ b/arch/powerpc/configs/pseries_defconfig @@ -114,7 +114,7 @@ CONFIG_SCSI_QLA_FC=m CONFIG_SCSI_QLA_ISCSI=m CONFIG_SCSI_LPFC=m CONFIG_SCSI_VIRTIO=m -CONFIG_SCSI_DH=m +CONFIG_SCSI_DH=y CONFIG_SCSI_DH_RDAC=m CONFIG_SCSI_DH_ALUA=m CONFIG_ATA=y diff --git a/arch/powerpc/include/asm/cache.h b/arch/powerpc/include/asm/cache.h index 0dc42c5082b7..5f8229e24fe6 100644 --- a/arch/powerpc/include/asm/cache.h +++ b/arch/powerpc/include/asm/cache.h @@ -3,7 +3,6 @@ #ifdef __KERNEL__ -#include <asm/reg.h> /* bytes per L1 cache line */ #if defined(CONFIG_8xx) || defined(CONFIG_403GCX) @@ -40,12 +39,6 @@ struct ppc64_caches { }; extern struct ppc64_caches ppc64_caches; - -static inline void logmpp(u64 x) -{ - asm volatile(PPC_LOGMPP(R1) : : "r" (x)); -} - #endif /* __powerpc64__ && ! __ASSEMBLY__ */ #if defined(__ASSEMBLY__) diff --git a/arch/powerpc/include/asm/disassemble.h b/arch/powerpc/include/asm/disassemble.h index 6330a61b875a..4852e849128b 100644 --- a/arch/powerpc/include/asm/disassemble.h +++ b/arch/powerpc/include/asm/disassemble.h @@ -42,6 +42,11 @@ static inline unsigned int get_dcrn(u32 inst) return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0); } +static inline unsigned int get_tmrn(u32 inst) +{ + return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0); +} + static inline unsigned int get_rt(u32 inst) { return (inst >> 21) & 0x1f; diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 827a38d7a9db..cfa758c6b4f6 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -297,8 +297,6 @@ struct kvmppc_vcore { u32 arch_compat; ulong pcr; ulong dpdes; /* doorbell state (POWER8) */ - void *mpp_buffer; /* Micro Partition Prefetch buffer */ - bool mpp_buffer_is_valid; ulong conferring_threads; }; @@ -718,5 +716,7 @@ static inline void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslot static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {} static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} static inline void kvm_arch_exit(void) {} +static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {} +static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {} #endif /* __POWERPC_KVM_HOST_H__ */ diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index cab6753f1be5..3f191f573d4f 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -61,8 +61,13 @@ struct machdep_calls { unsigned long addr, unsigned char *hpte_slot_array, int psize, int ssize, int local); - /* special for kexec, to be called in real mode, linear mapping is - * destroyed as well */ + /* + * Special for kexec. + * To be called in real mode with interrupts disabled. No locks are + * taken as such, concurrent access on pre POWER5 hardware could result + * in a deadlock. + * The linear mapping is destroyed as well. + */ void (*hpte_clear_all)(void); void __iomem * (*ioremap)(phys_addr_t addr, unsigned long size, diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index 790f5d1d9a46..7ab04fc59e24 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -141,7 +141,6 @@ #define PPC_INST_ISEL 0x7c00001e #define PPC_INST_ISEL_MASK 0xfc00003e #define PPC_INST_LDARX 0x7c0000a8 -#define PPC_INST_LOGMPP 0x7c0007e4 #define PPC_INST_LSWI 0x7c0004aa #define PPC_INST_LSWX 0x7c00042a #define PPC_INST_LWARX 0x7c000028 @@ -285,20 +284,6 @@ #define __PPC_EH(eh) 0 #endif -/* POWER8 Micro Partition Prefetch (MPP) parameters */ -/* Address mask is common for LOGMPP instruction and MPPR SPR */ -#define PPC_MPPE_ADDRESS_MASK 0xffffffffc000ULL - -/* Bits 60 and 61 of MPP SPR should be set to one of the following */ -/* Aborting the fetch is indeed setting 00 in the table size bits */ -#define PPC_MPPR_FETCH_ABORT (0x0ULL << 60) -#define PPC_MPPR_FETCH_WHOLE_TABLE (0x2ULL << 60) - -/* Bits 54 and 55 of register for LOGMPP instruction should be set to: */ -#define PPC_LOGMPP_LOG_L2 (0x02ULL << 54) -#define PPC_LOGMPP_LOG_L2L3 (0x01ULL << 54) -#define PPC_LOGMPP_LOG_ABORT (0x03ULL << 54) - /* Deal with instructions that older assemblers aren't aware of */ #define PPC_DCBAL(a, b) stringify_in_c(.long PPC_INST_DCBAL | \ __PPC_RA(a) | __PPC_RB(b)) @@ -307,8 +292,6 @@ #define PPC_LDARX(t, a, b, eh) stringify_in_c(.long PPC_INST_LDARX | \ ___PPC_RT(t) | ___PPC_RA(a) | \ ___PPC_RB(b) | __PPC_EH(eh)) -#define PPC_LOGMPP(b) stringify_in_c(.long PPC_INST_LOGMPP | \ - __PPC_RB(b)) #define PPC_LWARX(t, a, b, eh) stringify_in_c(.long PPC_INST_LWARX | \ ___PPC_RT(t) | ___PPC_RA(a) | \ ___PPC_RB(b) | __PPC_EH(eh)) diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index aa1cc5f015ee..a908ada8e0a5 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -226,7 +226,6 @@ #define CTRL_TE 0x00c00000 /* thread enable */ #define CTRL_RUNLATCH 0x1 #define SPRN_DAWR 0xB4 -#define SPRN_MPPR 0xB8 /* Micro Partition Prefetch Register */ #define SPRN_RPR 0xBA /* Relative Priority Register */ #define SPRN_CIABR 0xBB #define CIABR_PRIV 0x3 diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h index 16547efa2d5a..2fef74b474f0 100644 --- a/arch/powerpc/include/asm/reg_booke.h +++ b/arch/powerpc/include/asm/reg_booke.h @@ -742,6 +742,12 @@ #define MMUBE1_VBE4 0x00000002 #define MMUBE1_VBE5 0x00000001 +#define TMRN_TMCFG0 16 /* Thread Management Configuration Register 0 */ +#define TMRN_TMCFG0_NPRIBITS 0x003f0000 /* Bits of thread priority */ +#define TMRN_TMCFG0_NPRIBITS_SHIFT 16 +#define TMRN_TMCFG0_NATHRD 0x00003f00 /* Number of active threads */ +#define TMRN_TMCFG0_NATHRD_SHIFT 8 +#define TMRN_TMCFG0_NTHRD 0x0000003f /* Number of threads */ #define TMRN_IMSR0 0x120 /* Initial MSR Register 0 (e6500) */ #define TMRN_IMSR1 0x121 /* Initial MSR Register 1 (e6500) */ #define TMRN_INIA0 0x140 /* Next Instruction Address Register 0 */ diff --git a/arch/powerpc/include/asm/word-at-a-time.h b/arch/powerpc/include/asm/word-at-a-time.h index 5b3a903adae6..e4396a7d0f7c 100644 --- a/arch/powerpc/include/asm/word-at-a-time.h +++ b/arch/powerpc/include/asm/word-at-a-time.h @@ -40,6 +40,11 @@ static inline bool has_zero(unsigned long val, unsigned long *data, const struct return (val + c->high_bits) & ~rhs; } +static inline unsigned long zero_bytemask(unsigned long mask) +{ + return ~1ul << __fls(mask); +} + #else #ifdef CONFIG_64BIT diff --git a/arch/powerpc/include/uapi/asm/mman.h b/arch/powerpc/include/uapi/asm/mman.h index 6ea26df0a73c..03c06ba7464f 100644 --- a/arch/powerpc/include/uapi/asm/mman.h +++ b/arch/powerpc/include/uapi/asm/mman.h @@ -22,6 +22,7 @@ #define MCL_CURRENT 0x2000 /* lock all currently mapped pages */ #define MCL_FUTURE 0x4000 /* lock all additions to address space */ +#define MCL_ONFAULT 0x8000 /* lock all pages that are faulted in */ #define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */ #define MAP_NONBLOCK 0x10000 /* do not block on IO */ diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c index 59503ed98e5f..3f1472a78f39 100644 --- a/arch/powerpc/kernel/dma.c +++ b/arch/powerpc/kernel/dma.c @@ -303,7 +303,7 @@ int dma_set_coherent_mask(struct device *dev, u64 mask) dev->coherent_dma_mask = mask; return 0; } -EXPORT_SYMBOL_GPL(dma_set_coherent_mask); +EXPORT_SYMBOL(dma_set_coherent_mask); #define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16) diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 15099c41622e..92dea8df6b26 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -1425,27 +1425,45 @@ static void __init prom_instantiate_sml(void) { phandle ibmvtpm_node; ihandle ibmvtpm_inst; - u32 entry = 0, size = 0; + u32 entry = 0, size = 0, succ = 0; u64 base; + __be32 val; prom_debug("prom_instantiate_sml: start...\n"); - ibmvtpm_node = call_prom("finddevice", 1, 1, ADDR("/ibm,vtpm")); + ibmvtpm_node = call_prom("finddevice", 1, 1, ADDR("/vdevice/vtpm")); prom_debug("ibmvtpm_node: %x\n", ibmvtpm_node); if (!PHANDLE_VALID(ibmvtpm_node)) return; - ibmvtpm_inst = call_prom("open", 1, 1, ADDR("/ibm,vtpm")); + ibmvtpm_inst = call_prom("open", 1, 1, ADDR("/vdevice/vtpm")); if (!IHANDLE_VALID(ibmvtpm_inst)) { prom_printf("opening vtpm package failed (%x)\n", ibmvtpm_inst); return; } - if (call_prom_ret("call-method", 2, 2, &size, - ADDR("sml-get-handover-size"), - ibmvtpm_inst) != 0 || size == 0) { - prom_printf("SML get handover size failed\n"); - return; + if (prom_getprop(ibmvtpm_node, "ibm,sml-efi-reformat-supported", + &val, sizeof(val)) != PROM_ERROR) { + if (call_prom_ret("call-method", 2, 2, &succ, + ADDR("reformat-sml-to-efi-alignment"), + ibmvtpm_inst) != 0 || succ == 0) { + prom_printf("Reformat SML to EFI alignment failed\n"); + return; + } + + if (call_prom_ret("call-method", 2, 2, &size, + ADDR("sml-get-allocated-size"), + ibmvtpm_inst) != 0 || size == 0) { + prom_printf("SML get allocated size failed\n"); + return; + } + } else { + if (call_prom_ret("call-method", 2, 2, &size, + ADDR("sml-get-handover-size"), + ibmvtpm_inst) != 0 || size == 0) { + prom_printf("SML get handover size failed\n"); + return; + } } base = alloc_down(size, PAGE_SIZE, 0); @@ -1454,6 +1472,8 @@ static void __init prom_instantiate_sml(void) prom_printf("instantiating sml at 0x%x...", base); + memset((void *)base, 0, size); + if (call_prom_ret("call-method", 4, 2, &entry, ADDR("sml-handover"), ibmvtpm_inst, size, base) != 0 || entry == 0) { @@ -1464,9 +1484,9 @@ static void __init prom_instantiate_sml(void) reserve_mem(base, size); - prom_setprop(ibmvtpm_node, "/ibm,vtpm", "linux,sml-base", + prom_setprop(ibmvtpm_node, "/vdevice/vtpm", "linux,sml-base", &base, sizeof(base)); - prom_setprop(ibmvtpm_node, "/ibm,vtpm", "linux,sml-size", + prom_setprop(ibmvtpm_node, "/vdevice/vtpm", "linux,sml-size", &size, sizeof(size)); prom_debug("sml base = 0x%x\n", base); diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 84bf934cf748..5a753fae8265 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -1043,6 +1043,9 @@ asmlinkage int ppc_rtas(struct rtas_args __user *uargs) if (!capable(CAP_SYS_ADMIN)) return -EPERM; + if (!rtas.entry) + return -EINVAL; + if (copy_from_user(&args, uargs, 3 * sizeof(u32)) != 0) return -EFAULT; diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 3fc2ba784a71..fb37290a57b4 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -70,7 +70,8 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp) } /* Lastly try successively smaller sizes from the page allocator */ - while (!hpt && order > PPC_MIN_HPT_ORDER) { + /* Only do this if userspace didn't specify a size via ioctl */ + while (!hpt && order > PPC_MIN_HPT_ORDER && !htab_orderp) { hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT| __GFP_NOWARN, order - PAGE_SHIFT); if (!hpt) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 228049786888..9c26c5a96ea2 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -36,7 +36,6 @@ #include <asm/reg.h> #include <asm/cputable.h> -#include <asm/cache.h> #include <asm/cacheflush.h> #include <asm/tlbflush.h> #include <asm/uaccess.h> @@ -75,12 +74,6 @@ static DECLARE_BITMAP(default_enabled_hcalls, MAX_HCALL_OPCODE/4 + 1); -#if defined(CONFIG_PPC_64K_PAGES) -#define MPP_BUFFER_ORDER 0 -#elif defined(CONFIG_PPC_4K_PAGES) -#define MPP_BUFFER_ORDER 3 -#endif - static int dynamic_mt_modes = 6; module_param(dynamic_mt_modes, int, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(dynamic_mt_modes, "Set of allowed dynamic micro-threading modes: 0 (= none), 2, 4, or 6 (= 2 or 4)"); @@ -1455,13 +1448,6 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core) vcore->kvm = kvm; INIT_LIST_HEAD(&vcore->preempt_list); - vcore->mpp_buffer_is_valid = false; - - if (cpu_has_feature(CPU_FTR_ARCH_207S)) - vcore->mpp_buffer = (void *)__get_free_pages( - GFP_KERNEL|__GFP_ZERO, - MPP_BUFFER_ORDER); - return vcore; } @@ -1894,33 +1880,6 @@ static int on_primary_thread(void) return 1; } -static void kvmppc_start_saving_l2_cache(struct kvmppc_vcore *vc) -{ - phys_addr_t phy_addr, mpp_addr; - - phy_addr = (phys_addr_t)virt_to_phys(vc->mpp_buffer); - mpp_addr = phy_addr & PPC_MPPE_ADDRESS_MASK; - - mtspr(SPRN_MPPR, mpp_addr | PPC_MPPR_FETCH_ABORT); - logmpp(mpp_addr | PPC_LOGMPP_LOG_L2); - - vc->mpp_buffer_is_valid = true; -} - -static void kvmppc_start_restoring_l2_cache(const struct kvmppc_vcore *vc) -{ - phys_addr_t phy_addr, mpp_addr; - - phy_addr = virt_to_phys(vc->mpp_buffer); - mpp_addr = phy_addr & PPC_MPPE_ADDRESS_MASK; - - /* We must abort any in-progress save operations to ensure - * the table is valid so that prefetch engine knows when to - * stop prefetching. */ - logmpp(mpp_addr | PPC_LOGMPP_LOG_ABORT); - mtspr(SPRN_MPPR, mpp_addr | PPC_MPPR_FETCH_WHOLE_TABLE); -} - /* * A list of virtual cores for each physical CPU. * These are vcores that could run but their runner VCPU tasks are @@ -2471,14 +2430,8 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) srcu_idx = srcu_read_lock(&vc->kvm->srcu); - if (vc->mpp_buffer_is_valid) - kvmppc_start_restoring_l2_cache(vc); - __kvmppc_vcore_entry(); - if (vc->mpp_buffer) - kvmppc_start_saving_l2_cache(vc); - srcu_read_unlock(&vc->kvm->srcu, srcu_idx); spin_lock(&vc->lock); @@ -3073,14 +3026,8 @@ static void kvmppc_free_vcores(struct kvm *kvm) { long int i; - for (i = 0; i < KVM_MAX_VCORES; ++i) { - if (kvm->arch.vcores[i] && kvm->arch.vcores[i]->mpp_buffer) { - struct kvmppc_vcore *vc = kvm->arch.vcores[i]; - free_pages((unsigned long)vc->mpp_buffer, - MPP_BUFFER_ORDER); - } + for (i = 0; i < KVM_MAX_VCORES; ++i) kfree(kvm->arch.vcores[i]); - } kvm->arch.online_vcores = 0; } diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 0bce4fffcb2e..91700518bbf3 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -472,6 +472,8 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags, note_hpte_modification(kvm, rev); unlock_hpte(hpte, 0); + if (v & HPTE_V_ABSENT) + v = (v & ~HPTE_V_ABSENT) | HPTE_V_VALID; hpret[0] = v; hpret[1] = r; return H_SUCCESS; diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index b98889e9851d..b1dab8d1d885 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -150,6 +150,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) cmpwi cr1, r12, BOOK3S_INTERRUPT_MACHINE_CHECK cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL beq 11f + cmpwi r12, BOOK3S_INTERRUPT_H_DOORBELL + beq 15f /* Invoke the H_DOORBELL handler */ cmpwi cr2, r12, BOOK3S_INTERRUPT_HMI beq cr2, 14f /* HMI check */ @@ -174,6 +176,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) mtspr SPRN_HSRR1, r7 b hmi_exception_after_realmode +15: mtspr SPRN_HSRR0, r8 + mtspr SPRN_HSRR1, r7 + ba 0xe80 + kvmppc_primary_no_guest: /* We handle this much like a ceded vcpu */ /* put the HDEC into the DEC, since HDEC interrupts don't wake us */ @@ -2377,7 +2383,6 @@ machine_check_realmode: mr r3, r9 /* get vcpu pointer */ bl kvmppc_realmode_machine_check nop - cmpdi r3, 0 /* Did we handle MCE ? */ ld r9, HSTATE_KVM_VCPU(r13) li r12, BOOK3S_INTERRUPT_MACHINE_CHECK /* @@ -2390,13 +2395,18 @@ machine_check_realmode: * The old code used to return to host for unhandled errors which * was causing guest to hang with soft lockups inside guest and * makes it difficult to recover guest instance. + * + * if we receive machine check with MSR(RI=0) then deliver it to + * guest as machine check causing guest to crash. */ - ld r10, VCPU_PC(r9) ld r11, VCPU_MSR(r9) + andi. r10, r11, MSR_RI /* check for unrecoverable exception */ + beq 1f /* Deliver a machine check to guest */ + ld r10, VCPU_PC(r9) + cmpdi r3, 0 /* Did we handle MCE ? */ bne 2f /* Continue guest execution. */ /* If not, deliver a machine check. SRR0/1 are already set */ - li r10, BOOK3S_INTERRUPT_MACHINE_CHECK - ld r11, VCPU_MSR(r9) +1: li r10, BOOK3S_INTERRUPT_MACHINE_CHECK bl kvmppc_msr_interrupt 2: b fast_interrupt_c_return @@ -2436,14 +2446,19 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) /* hypervisor doorbell */ 3: li r12, BOOK3S_INTERRUPT_H_DOORBELL + + /* + * Clear the doorbell as we will invoke the handler + * explicitly in the guest exit path. + */ + lis r6, (PPC_DBELL_SERVER << (63-36))@h + PPC_MSGCLR(6) /* see if it's a host IPI */ li r3, 1 lbz r0, HSTATE_HOST_IPI(r13) cmpwi r0, 0 bnelr - /* if not, clear it and return -1 */ - lis r6, (PPC_DBELL_SERVER << (63-36))@h - PPC_MSGCLR(6) + /* if not, return -1 */ li r3, -1 blr diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c index b29ce752c7d6..32fdab57d604 100644 --- a/arch/powerpc/kvm/e500.c +++ b/arch/powerpc/kvm/e500.c @@ -237,7 +237,8 @@ void kvmppc_e500_tlbil_one(struct kvmppc_vcpu_e500 *vcpu_e500, struct kvm_book3e_206_tlb_entry *gtlbe) { struct vcpu_id_table *idt = vcpu_e500->idt; - unsigned int pr, tid, ts, pid; + unsigned int pr, tid, ts; + int pid; u32 val, eaddr; unsigned long flags; diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c index ce7291c79f6c..990db69a1d0b 100644 --- a/arch/powerpc/kvm/e500_emulate.c +++ b/arch/powerpc/kvm/e500_emulate.c @@ -15,6 +15,7 @@ #include <asm/kvm_ppc.h> #include <asm/disassemble.h> #include <asm/dbell.h> +#include <asm/reg_booke.h> #include "booke.h" #include "e500.h" @@ -22,6 +23,7 @@ #define XOP_DCBTLS 166 #define XOP_MSGSND 206 #define XOP_MSGCLR 238 +#define XOP_MFTMR 366 #define XOP_TLBIVAX 786 #define XOP_TLBSX 914 #define XOP_TLBRE 946 @@ -113,6 +115,19 @@ static int kvmppc_e500_emul_dcbtls(struct kvm_vcpu *vcpu) return EMULATE_DONE; } +static int kvmppc_e500_emul_mftmr(struct kvm_vcpu *vcpu, unsigned int inst, + int rt) +{ + /* Expose one thread per vcpu */ + if (get_tmrn(inst) == TMRN_TMCFG0) { + kvmppc_set_gpr(vcpu, rt, + 1 | (1 << TMRN_TMCFG0_NATHRD_SHIFT)); + return EMULATE_DONE; + } + + return EMULATE_FAIL; +} + int kvmppc_core_emulate_op_e500(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int inst, int *advance) { @@ -165,6 +180,10 @@ int kvmppc_core_emulate_op_e500(struct kvm_run *run, struct kvm_vcpu *vcpu, emulated = kvmppc_e500_emul_tlbivax(vcpu, ea); break; + case XOP_MFTMR: + emulated = kvmppc_e500_emul_mftmr(vcpu, inst, rt); + break; + case XOP_EHPRIV: emulated = kvmppc_e500_emul_ehpriv(run, vcpu, inst, advance); diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c index 805fee9beefa..34c43fff4adb 100644 --- a/arch/powerpc/kvm/e500_mmu_host.c +++ b/arch/powerpc/kvm/e500_mmu_host.c @@ -406,7 +406,7 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, for (; tsize > BOOK3E_PAGESZ_4K; tsize -= 2) { unsigned long gfn_start, gfn_end; - tsize_pages = 1 << (tsize - 2); + tsize_pages = 1UL << (tsize - 2); gfn_start = gfn & ~(tsize_pages - 1); gfn_end = gfn_start + tsize_pages; @@ -447,7 +447,7 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, } if (likely(!pfnmap)) { - tsize_pages = 1 << (tsize + 10 - PAGE_SHIFT); + tsize_pages = 1UL << (tsize + 10 - PAGE_SHIFT); pfn = gfn_to_pfn_memslot(slot, gfn); if (is_error_noslot_pfn(pfn)) { if (printk_ratelimit()) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 2e51289610e4..6fd2405c7f4a 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -559,6 +559,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) else r = num_online_cpus(); break; + case KVM_CAP_NR_MEMSLOTS: + r = KVM_USER_MEM_SLOTS; + break; case KVM_CAP_MAX_VCPUS: r = KVM_MAX_VCPUS; break; diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index 13befa35d8a8..c8822af10a58 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -582,13 +582,21 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot, * be when they isi), and we are the only one left. We rely on our kernel * mapping being 0xC0's and the hardware ignoring those two real bits. * + * This must be called with interrupts disabled. + * + * Taking the native_tlbie_lock is unsafe here due to the possibility of + * lockdep being on. On pre POWER5 hardware, not taking the lock could + * cause deadlock. POWER5 and newer not taking the lock is fine. This only + * gets called during boot before secondary CPUs have come up and during + * crashdump and all bets are off anyway. + * * TODO: add batching support when enabled. remember, no dynamic memory here, * athough there is the control page available... */ static void native_hpte_clear(void) { unsigned long vpn = 0; - unsigned long slot, slots, flags; + unsigned long slot, slots; struct hash_pte *hptep = htab_address; unsigned long hpte_v; unsigned long pteg_count; @@ -596,13 +604,6 @@ static void native_hpte_clear(void) pteg_count = htab_hash_mask + 1; - local_irq_save(flags); - - /* we take the tlbie lock and hold it. Some hardware will - * deadlock if we try to tlbie from two processors at once. - */ - raw_spin_lock(&native_tlbie_lock); - slots = pteg_count * HPTES_PER_GROUP; for (slot = 0; slot < slots; slot++, hptep++) { @@ -614,8 +615,8 @@ static void native_hpte_clear(void) hpte_v = be64_to_cpu(hptep->v); /* - * Call __tlbie() here rather than tlbie() since we - * already hold the native_tlbie_lock. + * Call __tlbie() here rather than tlbie() since we can't take the + * native_tlbie_lock. */ if (hpte_v & HPTE_V_VALID) { hpte_decode(hptep, slot, &psize, &apsize, &ssize, &vpn); @@ -625,8 +626,6 @@ static void native_hpte_clear(void) } asm volatile("eieio; tlbsync; ptesync":::"memory"); - raw_spin_unlock(&native_tlbie_lock); - local_irq_restore(flags); } /* diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index b85d44271c3b..669a15e7fa76 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -80,7 +80,7 @@ static void __init setup_node_to_cpumask_map(void) setup_nr_node_ids(); /* allocate the map */ - for (node = 0; node < nr_node_ids; node++) + for_each_node(node) alloc_bootmem_cpumask_var(&node_to_cpumask_map[node]); /* cpumask_of_node() will now work */ diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 17cea18a09d3..04782164ee67 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -679,7 +679,7 @@ void bpf_jit_compile(struct bpf_prog *fp) ((u64 *)image)[1] = local_paca->kernel_toc; #endif fp->bpf_func = (void *)image; - fp->jited = true; + fp->jited = 1; } out: kfree(addrs); diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index b0382f3f1095..d1e65ce545b3 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -48,7 +48,7 @@ struct cpu_hw_events { unsigned long amasks[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES]; unsigned long avalues[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES]; - unsigned int group_flag; + unsigned int txn_flags; int n_txn_start; /* BHRB bits */ @@ -1441,7 +1441,7 @@ static int power_pmu_add(struct perf_event *event, int ef_flags) * skip the schedulability test here, it will be performed * at commit time(->commit_txn) as a whole */ - if (cpuhw->group_flag & PERF_EVENT_TXN) + if (cpuhw->txn_flags & PERF_PMU_TXN_ADD) goto nocheck; if (check_excludes(cpuhw->event, cpuhw->flags, n0, 1)) @@ -1586,13 +1586,22 @@ static void power_pmu_stop(struct perf_event *event, int ef_flags) * Start group events scheduling transaction * Set the flag to make pmu::enable() not perform the * schedulability test, it will be performed at commit time + * + * We only support PERF_PMU_TXN_ADD transactions. Save the + * transaction flags but otherwise ignore non-PERF_PMU_TXN_ADD + * transactions. */ -static void power_pmu_start_txn(struct pmu *pmu) +static void power_pmu_start_txn(struct pmu *pmu, unsigned int txn_flags) { struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); + WARN_ON_ONCE(cpuhw->txn_flags); /* txn already in flight */ + + cpuhw->txn_flags = txn_flags; + if (txn_flags & ~PERF_PMU_TXN_ADD) + return; + perf_pmu_disable(pmu); - cpuhw->group_flag |= PERF_EVENT_TXN; cpuhw->n_txn_start = cpuhw->n_events; } @@ -1604,8 +1613,15 @@ static void power_pmu_start_txn(struct pmu *pmu) static void power_pmu_cancel_txn(struct pmu *pmu) { struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); + unsigned int txn_flags; + + WARN_ON_ONCE(!cpuhw->txn_flags); /* no txn in flight */ + + txn_flags = cpuhw->txn_flags; + cpuhw->txn_flags = 0; + if (txn_flags & ~PERF_PMU_TXN_ADD) + return; - cpuhw->group_flag &= ~PERF_EVENT_TXN; perf_pmu_enable(pmu); } @@ -1621,7 +1637,15 @@ static int power_pmu_commit_txn(struct pmu *pmu) if (!ppmu) return -EAGAIN; + cpuhw = this_cpu_ptr(&cpu_hw_events); + WARN_ON_ONCE(!cpuhw->txn_flags); /* no txn in flight */ + + if (cpuhw->txn_flags & ~PERF_PMU_TXN_ADD) { + cpuhw->txn_flags = 0; + return 0; + } + n = cpuhw->n_events; if (check_excludes(cpuhw->event, cpuhw->flags, 0, n)) return -EAGAIN; @@ -1632,7 +1656,7 @@ static int power_pmu_commit_txn(struct pmu *pmu) for (i = cpuhw->n_txn_start; i < n; ++i) cpuhw->event[i]->hw.config = cpuhw->events[i]; - cpuhw->group_flag &= ~PERF_EVENT_TXN; + cpuhw->txn_flags = 0; perf_pmu_enable(pmu); return 0; } diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index 527c8b98e97e..9f9dfda9ed2c 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -142,6 +142,15 @@ static struct attribute_group event_long_desc_group = { static struct kmem_cache *hv_page_cache; +DEFINE_PER_CPU(int, hv_24x7_txn_flags); +DEFINE_PER_CPU(int, hv_24x7_txn_err); + +struct hv_24x7_hw { + struct perf_event *events[255]; +}; + +DEFINE_PER_CPU(struct hv_24x7_hw, hv_24x7_hw); + /* * request_buffer and result_buffer are not required to be 4k aligned, * but are not allowed to cross any 4k boundary. Aligning them to 4k is @@ -1231,9 +1240,48 @@ static void update_event_count(struct perf_event *event, u64 now) static void h_24x7_event_read(struct perf_event *event) { u64 now; + struct hv_24x7_request_buffer *request_buffer; + struct hv_24x7_hw *h24x7hw; + int txn_flags; + + txn_flags = __this_cpu_read(hv_24x7_txn_flags); + + /* + * If in a READ transaction, add this counter to the list of + * counters to read during the next HCALL (i.e commit_txn()). + * If not in a READ transaction, go ahead and make the HCALL + * to read this counter by itself. + */ + + if (txn_flags & PERF_PMU_TXN_READ) { + int i; + int ret; - now = h_24x7_get_value(event); - update_event_count(event, now); + if (__this_cpu_read(hv_24x7_txn_err)) + return; + + request_buffer = (void *)get_cpu_var(hv_24x7_reqb); + + ret = add_event_to_24x7_request(event, request_buffer); + if (ret) { + __this_cpu_write(hv_24x7_txn_err, ret); + } else { + /* + * Assoicate the event with the HCALL request index, + * so ->commit_txn() can quickly find/update count. + */ + i = request_buffer->num_requests - 1; + + h24x7hw = &get_cpu_var(hv_24x7_hw); + h24x7hw->events[i] = event; + put_cpu_var(h24x7hw); + } + + put_cpu_var(hv_24x7_reqb); + } else { + now = h_24x7_get_value(event); + update_event_count(event, now); + } } static void h_24x7_event_start(struct perf_event *event, int flags) @@ -1255,6 +1303,117 @@ static int h_24x7_event_add(struct perf_event *event, int flags) return 0; } +/* + * 24x7 counters only support READ transactions. They are + * always counting and dont need/support ADD transactions. + * Cache the flags, but otherwise ignore transactions that + * are not PERF_PMU_TXN_READ. + */ +static void h_24x7_event_start_txn(struct pmu *pmu, unsigned int flags) +{ + struct hv_24x7_request_buffer *request_buffer; + struct hv_24x7_data_result_buffer *result_buffer; + + /* We should not be called if we are already in a txn */ + WARN_ON_ONCE(__this_cpu_read(hv_24x7_txn_flags)); + + __this_cpu_write(hv_24x7_txn_flags, flags); + if (flags & ~PERF_PMU_TXN_READ) + return; + + request_buffer = (void *)get_cpu_var(hv_24x7_reqb); + result_buffer = (void *)get_cpu_var(hv_24x7_resb); + + init_24x7_request(request_buffer, result_buffer); + + put_cpu_var(hv_24x7_resb); + put_cpu_var(hv_24x7_reqb); +} + +/* + * Clean up transaction state. + * + * NOTE: Ignore state of request and result buffers for now. + * We will initialize them during the next read/txn. + */ +static void reset_txn(void) +{ + __this_cpu_write(hv_24x7_txn_flags, 0); + __this_cpu_write(hv_24x7_txn_err, 0); +} + +/* + * 24x7 counters only support READ transactions. They are always counting + * and dont need/support ADD transactions. Clear ->txn_flags but otherwise + * ignore transactions that are not of type PERF_PMU_TXN_READ. + * + * For READ transactions, submit all pending 24x7 requests (i.e requests + * that were queued by h_24x7_event_read()), to the hypervisor and update + * the event counts. + */ +static int h_24x7_event_commit_txn(struct pmu *pmu) +{ + struct hv_24x7_request_buffer *request_buffer; + struct hv_24x7_data_result_buffer *result_buffer; + struct hv_24x7_result *resb; + struct perf_event *event; + u64 count; + int i, ret, txn_flags; + struct hv_24x7_hw *h24x7hw; + + txn_flags = __this_cpu_read(hv_24x7_txn_flags); + WARN_ON_ONCE(!txn_flags); + + ret = 0; + if (txn_flags & ~PERF_PMU_TXN_READ) + goto out; + + ret = __this_cpu_read(hv_24x7_txn_err); + if (ret) + goto out; + + request_buffer = (void *)get_cpu_var(hv_24x7_reqb); + result_buffer = (void *)get_cpu_var(hv_24x7_resb); + + ret = make_24x7_request(request_buffer, result_buffer); + if (ret) { + log_24x7_hcall(request_buffer, result_buffer, ret); + goto put_reqb; + } + + h24x7hw = &get_cpu_var(hv_24x7_hw); + + /* Update event counts from hcall */ + for (i = 0; i < request_buffer->num_requests; i++) { + resb = &result_buffer->results[i]; + count = be64_to_cpu(resb->elements[0].element_data[0]); + event = h24x7hw->events[i]; + h24x7hw->events[i] = NULL; + update_event_count(event, count); + } + + put_cpu_var(hv_24x7_hw); + +put_reqb: + put_cpu_var(hv_24x7_resb); + put_cpu_var(hv_24x7_reqb); +out: + reset_txn(); + return ret; +} + +/* + * 24x7 counters only support READ transactions. They are always counting + * and dont need/support ADD transactions. However, regardless of type + * of transaction, all we need to do is cleanup, so we don't have to check + * the type of transaction. + */ +static void h_24x7_event_cancel_txn(struct pmu *pmu) +{ + WARN_ON_ONCE(!__this_cpu_read(hv_24x7_txn_flags)); + reset_txn(); +} + static struct pmu h_24x7_pmu = { .task_ctx_nr = perf_invalid_context, @@ -1266,6 +1425,9 @@ static struct pmu h_24x7_pmu = { .start = h_24x7_event_start, .stop = h_24x7_event_stop, .read = h_24x7_event_read, + .start_txn = h_24x7_event_start_txn, + .commit_txn = h_24x7_event_commit_txn, + .cancel_txn = h_24x7_event_cancel_txn, }; static int hv_24x7_init(void) diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c index 396351db601b..7d5e295255b7 100644 --- a/arch/powerpc/perf/power8-pmu.c +++ b/arch/powerpc/perf/power8-pmu.c @@ -676,6 +676,9 @@ static u64 power8_bhrb_filter_map(u64 branch_sample_type) if (branch_sample_type & PERF_SAMPLE_BRANCH_IND_CALL) return -1; + if (branch_sample_type & PERF_SAMPLE_BRANCH_CALL) + return -1; + if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY_CALL) { pmu_bhrb_filter |= POWER8_MMCRA_IFM1; return pmu_bhrb_filter; diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c index e0e68a1c0d3c..aed7714495c1 100644 --- a/arch/powerpc/platforms/cell/axon_msi.c +++ b/arch/powerpc/platforms/cell/axon_msi.c @@ -327,7 +327,7 @@ static void axon_msi_shutdown(struct platform_device *device) u32 tmp; pr_devel("axon_msi: disabling %s\n", - msic->irq_domain->of_node->full_name); + irq_domain_get_of_node(msic->irq_domain)->full_name); tmp = dcr_read(msic->dcr_host, MSIC_CTRL_REG); tmp &= ~MSIC_CTRL_ENABLE & ~MSIC_CTRL_IRQ_ENABLE; msic_dcr_write(msic, MSIC_CTRL_REG, tmp); diff --git a/arch/powerpc/platforms/cell/spider-pic.c b/arch/powerpc/platforms/cell/spider-pic.c index 9d27de62dc62..54ee5743cb72 100644 --- a/arch/powerpc/platforms/cell/spider-pic.c +++ b/arch/powerpc/platforms/cell/spider-pic.c @@ -231,20 +231,23 @@ static unsigned int __init spider_find_cascade_and_node(struct spider_pic *pic) const u32 *imap, *tmp; int imaplen, intsize, unit; struct device_node *iic; + struct device_node *of_node; + + of_node = irq_domain_get_of_node(pic->host); /* First, we check whether we have a real "interrupts" in the device * tree in case the device-tree is ever fixed */ - virq = irq_of_parse_and_map(pic->host->of_node, 0); + virq = irq_of_parse_and_map(of_node, 0); if (virq) return virq; /* Now do the horrible hacks */ - tmp = of_get_property(pic->host->of_node, "#interrupt-cells", NULL); + tmp = of_get_property(of_node, "#interrupt-cells", NULL); if (tmp == NULL) return NO_IRQ; intsize = *tmp; - imap = of_get_property(pic->host->of_node, "interrupt-map", &imaplen); + imap = of_get_property(of_node, "interrupt-map", &imaplen); if (imap == NULL || imaplen < (intsize + 1)) return NO_IRQ; iic = of_find_node_by_phandle(imap[intsize]); diff --git a/arch/powerpc/platforms/pasemi/msi.c b/arch/powerpc/platforms/pasemi/msi.c index b304a9fe55cc..d9af76342d99 100644 --- a/arch/powerpc/platforms/pasemi/msi.c +++ b/arch/powerpc/platforms/pasemi/msi.c @@ -144,9 +144,11 @@ int mpic_pasemi_msi_init(struct mpic *mpic) { int rc; struct pci_controller *phb; + struct device_node *of_node; - if (!mpic->irqhost->of_node || - !of_device_is_compatible(mpic->irqhost->of_node, + of_node = irq_domain_get_of_node(mpic->irqhost); + if (!of_node || + !of_device_is_compatible(of_node, "pasemi,pwrficient-openpic")) return -ENODEV; diff --git a/arch/powerpc/platforms/powernv/opal-irqchip.c b/arch/powerpc/platforms/powernv/opal-irqchip.c index 2c91ee7800b9..6ccfb6c1c707 100644 --- a/arch/powerpc/platforms/powernv/opal-irqchip.c +++ b/arch/powerpc/platforms/powernv/opal-irqchip.c @@ -137,7 +137,7 @@ static void opal_handle_irq_work(struct irq_work *work) static int opal_event_match(struct irq_domain *h, struct device_node *node, enum irq_domain_bus_token bus_token) { - return h->of_node == node; + return irq_domain_get_of_node(h) == node; } static int opal_event_xlate(struct irq_domain *h, struct device_node *np, diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 230f3a7cdea4..4296d55e88f3 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -487,9 +487,12 @@ int opal_machine_check(struct pt_regs *regs) * PRD component would have already got notified about this * error through other channels. * - * In any case, let us just fall through. We anyway heading - * down to panic path. + * If hardware marked this as an unrecoverable MCE, we are + * going to panic anyway. Even if it didn't, it's not safe to + * continue at this point, so we should explicitly panic. */ + + panic("PowerNV Unrecovered Machine Check"); return 0; } diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c index 8f70ba681a78..ca264833ee64 100644 --- a/arch/powerpc/platforms/powernv/smp.c +++ b/arch/powerpc/platforms/powernv/smp.c @@ -171,7 +171,26 @@ static void pnv_smp_cpu_kill_self(void) * so clear LPCR:PECE1. We keep PECE2 enabled. */ mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1); + + /* + * Hard-disable interrupts, and then clear irq_happened flags + * that we can safely ignore while off-line, since they + * are for things for which we do no processing when off-line + * (or in the case of HMI, all the processing we need to do + * is done in lower-level real-mode code). + */ + hard_irq_disable(); + local_paca->irq_happened &= ~(PACA_IRQ_DEC | PACA_IRQ_HMI); + while (!generic_check_cpu_restart(cpu)) { + /* + * Clear IPI flag, since we don't handle IPIs while + * offline, except for those when changing micro-threading + * mode, which are handled explicitly below, and those + * for coming online, which are handled via + * generic_check_cpu_restart() calls. + */ + kvmppc_set_host_ipi(cpu, 0); ppc64_runlatch_off(); @@ -196,20 +215,20 @@ static void pnv_smp_cpu_kill_self(void) * having finished executing in a KVM guest, then srr1 * contains 0. */ - if ((srr1 & wmask) == SRR1_WAKEEE) { + if (((srr1 & wmask) == SRR1_WAKEEE) || + (local_paca->irq_happened & PACA_IRQ_EE)) { icp_native_flush_interrupt(); - local_paca->irq_happened &= PACA_IRQ_HARD_DIS; - smp_mb(); } else if ((srr1 & wmask) == SRR1_WAKEHDBELL) { unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER); asm volatile(PPC_MSGCLR(%0) : : "r" (msg)); - kvmppc_set_host_ipi(cpu, 0); } + local_paca->irq_happened &= ~(PACA_IRQ_EE | PACA_IRQ_DBELL); + smp_mb(); if (cpu_core_split_required()) continue; - if (!generic_check_cpu_restart(cpu)) + if (srr1 && !generic_check_cpu_restart(cpu)) DBG("CPU%d Unexpected exit while offline !\n", cpu); } mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) | LPCR_PECE1); diff --git a/arch/powerpc/platforms/ps3/os-area.c b/arch/powerpc/platforms/ps3/os-area.c index 09787139834d..3db53e8aff92 100644 --- a/arch/powerpc/platforms/ps3/os-area.c +++ b/arch/powerpc/platforms/ps3/os-area.c @@ -194,11 +194,6 @@ static const struct os_area_db_id os_area_db_id_rtc_diff = { .key = OS_AREA_DB_KEY_RTC_DIFF }; -static const struct os_area_db_id os_area_db_id_video_mode = { - .owner = OS_AREA_DB_OWNER_LINUX, - .key = OS_AREA_DB_KEY_VIDEO_MODE -}; - #define SECONDS_FROM_1970_TO_2000 946684800LL /** diff --git a/arch/powerpc/sysdev/ehv_pic.c b/arch/powerpc/sysdev/ehv_pic.c index eca0b00794fa..bffcc7a486a1 100644 --- a/arch/powerpc/sysdev/ehv_pic.c +++ b/arch/powerpc/sysdev/ehv_pic.c @@ -181,7 +181,8 @@ static int ehv_pic_host_match(struct irq_domain *h, struct device_node *node, enum irq_domain_bus_token bus_token) { /* Exact match, unless ehv_pic node is NULL */ - return h->of_node == NULL || h->of_node == node; + struct device_node *of_node = irq_domain_get_of_node(h); + return of_node == NULL || of_node == node; } static int ehv_pic_host_map(struct irq_domain *h, unsigned int virq, diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c index 48a576aa47b9..3a2be3676f43 100644 --- a/arch/powerpc/sysdev/fsl_msi.c +++ b/arch/powerpc/sysdev/fsl_msi.c @@ -110,7 +110,7 @@ static int fsl_msi_init_allocator(struct fsl_msi *msi_data) int rc, hwirq; rc = msi_bitmap_alloc(&msi_data->bitmap, NR_MSI_IRQS_MAX, - msi_data->irqhost->of_node); + irq_domain_get_of_node(msi_data->irqhost)); if (rc) return rc; diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index 1c65ef92768d..610f472f91d1 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -1037,7 +1037,7 @@ int fsl_pci_mcheck_exception(struct pt_regs *regs) ret = get_user(regs->nip, &inst); pagefault_enable(); } else { - ret = probe_kernel_address(regs->nip, inst); + ret = probe_kernel_address((void *)regs->nip, inst); } if (!ret && mcheck_handle_load(regs, inst)) { diff --git a/arch/powerpc/sysdev/i8259.c b/arch/powerpc/sysdev/i8259.c index e1a9c2c2d5d3..6f99ed3967fd 100644 --- a/arch/powerpc/sysdev/i8259.c +++ b/arch/powerpc/sysdev/i8259.c @@ -165,7 +165,8 @@ static struct resource pic_edgectrl_iores = { static int i8259_host_match(struct irq_domain *h, struct device_node *node, enum irq_domain_bus_token bus_token) { - return h->of_node == NULL || h->of_node == node; + struct device_node *of_node = irq_domain_get_of_node(h); + return of_node == NULL || of_node == node; } static int i8259_host_map(struct irq_domain *h, unsigned int virq, diff --git a/arch/powerpc/sysdev/ipic.c b/arch/powerpc/sysdev/ipic.c index b1297ab1599b..f76ee39cb337 100644 --- a/arch/powerpc/sysdev/ipic.c +++ b/arch/powerpc/sysdev/ipic.c @@ -675,7 +675,8 @@ static int ipic_host_match(struct irq_domain *h, struct device_node *node, enum irq_domain_bus_token bus_token) { /* Exact match, unless ipic node is NULL */ - return h->of_node == NULL || h->of_node == node; + struct device_node *of_node = irq_domain_get_of_node(h); + return of_node == NULL || of_node == node; } static int ipic_host_map(struct irq_domain *h, unsigned int virq, diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c index 123e43612f0a..2a0452e364ba 100644 --- a/arch/powerpc/sysdev/mpic.c +++ b/arch/powerpc/sysdev/mpic.c @@ -993,7 +993,8 @@ static int mpic_host_match(struct irq_domain *h, struct device_node *node, enum irq_domain_bus_token bus_token) { /* Exact match, unless mpic node is NULL */ - return h->of_node == NULL || h->of_node == node; + struct device_node *of_node = irq_domain_get_of_node(h); + return of_node == NULL || of_node == node; } static int mpic_host_map(struct irq_domain *h, unsigned int virq, diff --git a/arch/powerpc/sysdev/mpic_msi.c b/arch/powerpc/sysdev/mpic_msi.c index 7dc39f35a4cc..1d48a5385905 100644 --- a/arch/powerpc/sysdev/mpic_msi.c +++ b/arch/powerpc/sysdev/mpic_msi.c @@ -84,7 +84,7 @@ int mpic_msi_init_allocator(struct mpic *mpic) int rc; rc = msi_bitmap_alloc(&mpic->msi_bitmap, mpic->num_sources, - mpic->irqhost->of_node); + irq_domain_get_of_node(mpic->irqhost)); if (rc) return rc; diff --git a/arch/powerpc/sysdev/qe_lib/qe_ic.c b/arch/powerpc/sysdev/qe_lib/qe_ic.c index fbcc1f855a7f..ef36f16f9f6f 100644 --- a/arch/powerpc/sysdev/qe_lib/qe_ic.c +++ b/arch/powerpc/sysdev/qe_lib/qe_ic.c @@ -248,7 +248,8 @@ static int qe_ic_host_match(struct irq_domain *h, struct device_node *node, enum irq_domain_bus_token bus_token) { /* Exact match, unless qe_ic node is NULL */ - return h->of_node == NULL || h->of_node == node; + struct device_node *of_node = irq_domain_get_of_node(h); + return of_node == NULL || of_node == node; } static int qe_ic_host_map(struct irq_domain *h, unsigned int virq, diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 1d57000b1b24..3a55f493c7da 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -101,6 +101,7 @@ config S390 select ARCH_SAVE_PAGE_KEYS if HIBERNATION select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_SUPPORTS_NUMA_BALANCING + select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_CMPXCHG_LOCKREF select ARCH_WANTS_PROT_NUMA_PROT_NONE select ARCH_WANT_IPC_PARSE_VERSION @@ -118,6 +119,7 @@ config S390 select HAVE_ARCH_EARLY_PFN_TO_NID select HAVE_ARCH_JUMP_LABEL select HAVE_ARCH_SECCOMP_FILTER + select HAVE_ARCH_SOFT_DIRTY select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE select HAVE_BPF_JIT if PACK_STACK && HAVE_MARCH_Z196_FEATURES @@ -582,6 +584,7 @@ menuconfig PCI bool "PCI support" select HAVE_DMA_ATTRS select PCI_MSI + select IOMMU_SUPPORT help Enable PCI support. diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile index d4788111c161..fac6ac9790fa 100644 --- a/arch/s390/boot/compressed/Makefile +++ b/arch/s390/boot/compressed/Makefile @@ -10,7 +10,7 @@ targets += misc.o piggy.o sizes.h head.o KBUILD_CFLAGS := -m64 -D__KERNEL__ $(LINUX_INCLUDE) -O2 KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING -KBUILD_CFLAGS += $(cflags-y) -fno-delete-null-pointer-checks +KBUILD_CFLAGS += $(cflags-y) -fno-delete-null-pointer-checks -msoft-float KBUILD_CFLAGS += $(call cc-option,-mpacked-stack) KBUILD_CFLAGS += $(call cc-option,-ffreestanding) diff --git a/arch/s390/configs/default_defconfig b/arch/s390/configs/default_defconfig index 0c98f1508542..ed7da281df66 100644 --- a/arch/s390/configs/default_defconfig +++ b/arch/s390/configs/default_defconfig @@ -381,7 +381,7 @@ CONFIG_ISCSI_TCP=m CONFIG_SCSI_DEBUG=m CONFIG_ZFCP=y CONFIG_SCSI_VIRTIO=m -CONFIG_SCSI_DH=m +CONFIG_SCSI_DH=y CONFIG_SCSI_DH_RDAC=m CONFIG_SCSI_DH_HP_SW=m CONFIG_SCSI_DH_EMC=m diff --git a/arch/s390/configs/gcov_defconfig b/arch/s390/configs/gcov_defconfig index 82083e1fbdc4..9858b14cde1e 100644 --- a/arch/s390/configs/gcov_defconfig +++ b/arch/s390/configs/gcov_defconfig @@ -377,7 +377,7 @@ CONFIG_ISCSI_TCP=m CONFIG_SCSI_DEBUG=m CONFIG_ZFCP=y CONFIG_SCSI_VIRTIO=m -CONFIG_SCSI_DH=m +CONFIG_SCSI_DH=y CONFIG_SCSI_DH_RDAC=m CONFIG_SCSI_DH_HP_SW=m CONFIG_SCSI_DH_EMC=m diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig index c05c9e0821e3..7f14f80717d4 100644 --- a/arch/s390/configs/performance_defconfig +++ b/arch/s390/configs/performance_defconfig @@ -377,7 +377,7 @@ CONFIG_ISCSI_TCP=m CONFIG_SCSI_DEBUG=m CONFIG_ZFCP=y CONFIG_SCSI_VIRTIO=m -CONFIG_SCSI_DH=m +CONFIG_SCSI_DH=y CONFIG_SCSI_DH_RDAC=m CONFIG_SCSI_DH_HP_SW=m CONFIG_SCSI_DH_EMC=m diff --git a/arch/s390/crypto/sha.h b/arch/s390/crypto/sha.h index f4e9dc71675f..10f200790079 100644 --- a/arch/s390/crypto/sha.h +++ b/arch/s390/crypto/sha.h @@ -19,7 +19,7 @@ #include <crypto/sha.h> /* must be big enough for the largest SHA variant */ -#define SHA_MAX_STATE_SIZE 16 +#define SHA_MAX_STATE_SIZE (SHA512_DIGEST_SIZE / 4) #define SHA_MAX_BLOCK_SIZE SHA512_BLOCK_SIZE struct s390_sha_ctx { diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c index 5eeffeefae06..045035796ca7 100644 --- a/arch/s390/hypfs/hypfs_diag.c +++ b/arch/s390/hypfs/hypfs_diag.c @@ -15,6 +15,7 @@ #include <linux/string.h> #include <linux/vmalloc.h> #include <linux/mm.h> +#include <asm/diag.h> #include <asm/ebcdic.h> #include "hypfs.h" @@ -336,7 +337,7 @@ static inline __u64 phys_cpu__ctidx(enum diag204_format type, void *hdr) /* Diagnose 204 functions */ -static int diag204(unsigned long subcode, unsigned long size, void *addr) +static inline int __diag204(unsigned long subcode, unsigned long size, void *addr) { register unsigned long _subcode asm("0") = subcode; register unsigned long _size asm("1") = size; @@ -351,6 +352,12 @@ static int diag204(unsigned long subcode, unsigned long size, void *addr) return _size; } +static int diag204(unsigned long subcode, unsigned long size, void *addr) +{ + diag_stat_inc(DIAG_STAT_X204); + return __diag204(subcode, size, addr); +} + /* * For the old diag subcode 4 with simple data format we have to use real * memory. If we use subcode 6 or 7 with extended data format, we can (and @@ -505,6 +512,7 @@ static int diag224(void *ptr) { int rc = -EOPNOTSUPP; + diag_stat_inc(DIAG_STAT_X224); asm volatile( " diag %1,%2,0x224\n" "0: lhi %0,0x0\n" diff --git a/arch/s390/hypfs/hypfs_diag0c.c b/arch/s390/hypfs/hypfs_diag0c.c index 24c747a0fcc3..0f1927cbba31 100644 --- a/arch/s390/hypfs/hypfs_diag0c.c +++ b/arch/s390/hypfs/hypfs_diag0c.c @@ -8,6 +8,7 @@ #include <linux/slab.h> #include <linux/cpu.h> +#include <asm/diag.h> #include <asm/hypfs.h> #include "hypfs.h" @@ -18,6 +19,7 @@ */ static void diag0c(struct hypfs_diag0c_entry *entry) { + diag_stat_inc(DIAG_STAT_X00C); asm volatile ( " sam31\n" " diag %0,%0,0x0c\n" diff --git a/arch/s390/hypfs/hypfs_sprp.c b/arch/s390/hypfs/hypfs_sprp.c index dd42a26d049d..c9e5c72f78bd 100644 --- a/arch/s390/hypfs/hypfs_sprp.c +++ b/arch/s390/hypfs/hypfs_sprp.c @@ -13,6 +13,7 @@ #include <linux/types.h> #include <linux/uaccess.h> #include <asm/compat.h> +#include <asm/diag.h> #include <asm/sclp.h> #include "hypfs.h" @@ -22,7 +23,7 @@ #define DIAG304_CMD_MAX 2 -static unsigned long hypfs_sprp_diag304(void *data, unsigned long cmd) +static inline unsigned long __hypfs_sprp_diag304(void *data, unsigned long cmd) { register unsigned long _data asm("2") = (unsigned long) data; register unsigned long _rc asm("3"); @@ -34,6 +35,12 @@ static unsigned long hypfs_sprp_diag304(void *data, unsigned long cmd) return _rc; } +static unsigned long hypfs_sprp_diag304(void *data, unsigned long cmd) +{ + diag_stat_inc(DIAG_STAT_X304); + return __hypfs_sprp_diag304(data, cmd); +} + static void hypfs_sprp_free(const void *data) { free_page((unsigned long) data); diff --git a/arch/s390/hypfs/hypfs_vm.c b/arch/s390/hypfs/hypfs_vm.c index afbe07907c10..44feac38ccfc 100644 --- a/arch/s390/hypfs/hypfs_vm.c +++ b/arch/s390/hypfs/hypfs_vm.c @@ -9,6 +9,7 @@ #include <linux/errno.h> #include <linux/string.h> #include <linux/vmalloc.h> +#include <asm/diag.h> #include <asm/ebcdic.h> #include <asm/timex.h> #include "hypfs.h" @@ -66,6 +67,7 @@ static int diag2fc(int size, char* query, void *addr) memset(parm_list.aci_grp, 0x40, NAME_LEN); rc = -1; + diag_stat_inc(DIAG_STAT_X2FC); asm volatile( " diag %0,%1,0x2fc\n" "0:\n" diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild index 5ad26dd94d77..9043d2e1e2ae 100644 --- a/arch/s390/include/asm/Kbuild +++ b/arch/s390/include/asm/Kbuild @@ -6,3 +6,4 @@ generic-y += mcs_spinlock.h generic-y += mm-arch-hooks.h generic-y += preempt.h generic-y += trace_clock.h +generic-y += word-at-a-time.h diff --git a/arch/s390/include/asm/appldata.h b/arch/s390/include/asm/appldata.h index 16887c5fd989..a6263d4e8e56 100644 --- a/arch/s390/include/asm/appldata.h +++ b/arch/s390/include/asm/appldata.h @@ -7,6 +7,7 @@ #ifndef _ASM_S390_APPLDATA_H #define _ASM_S390_APPLDATA_H +#include <asm/diag.h> #include <asm/io.h> #define APPLDATA_START_INTERVAL_REC 0x80 @@ -53,6 +54,7 @@ static inline int appldata_asm(struct appldata_product_id *id, parm_list.buffer_length = length; parm_list.product_id_addr = (unsigned long) id; parm_list.buffer_addr = virt_to_phys(buffer); + diag_stat_inc(DIAG_STAT_X0DC); asm volatile( " diag %1,%0,0xdc" : "=d" (ry) diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h index 117fa5c921c1..911064aa59b2 100644 --- a/arch/s390/include/asm/atomic.h +++ b/arch/s390/include/asm/atomic.h @@ -36,7 +36,6 @@ \ typecheck(atomic_t *, ptr); \ asm volatile( \ - __barrier \ op_string " %0,%2,%1\n" \ __barrier \ : "=d" (old_val), "+Q" ((ptr)->counter) \ @@ -180,7 +179,6 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u) \ typecheck(atomic64_t *, ptr); \ asm volatile( \ - __barrier \ op_string " %0,%2,%1\n" \ __barrier \ : "=d" (old_val), "+Q" ((ptr)->counter) \ diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h index d48fe0162331..d68e11e0df5e 100644 --- a/arch/s390/include/asm/barrier.h +++ b/arch/s390/include/asm/barrier.h @@ -22,10 +22,10 @@ #define mb() do { asm volatile(__ASM_BARRIER : : : "memory"); } while (0) -#define rmb() mb() -#define wmb() mb() -#define dma_rmb() rmb() -#define dma_wmb() wmb() +#define rmb() barrier() +#define wmb() barrier() +#define dma_rmb() mb() +#define dma_wmb() mb() #define smp_mb() mb() #define smp_rmb() rmb() #define smp_wmb() wmb() diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h index 9b68e98a724f..8043f10da6b5 100644 --- a/arch/s390/include/asm/bitops.h +++ b/arch/s390/include/asm/bitops.h @@ -11,30 +11,25 @@ * big-endian system because, unlike little endian, the number of each * bit depends on the word size. * - * The bitop functions are defined to work on unsigned longs, so for an - * s390x system the bits end up numbered: + * The bitop functions are defined to work on unsigned longs, so the bits + * end up numbered: * |63..............0|127............64|191...........128|255...........192| - * and on s390: - * |31.....0|63....32|95....64|127...96|159..128|191..160|223..192|255..224| * * There are a few little-endian macros used mostly for filesystem - * bitmaps, these work on similar bit arrays layouts, but - * byte-oriented: + * bitmaps, these work on similar bit array layouts, but byte-oriented: * |7...0|15...8|23...16|31...24|39...32|47...40|55...48|63...56| * - * The main difference is that bit 3-5 (64b) or 3-4 (32b) in the bit - * number field needs to be reversed compared to the big-endian bit - * fields. This can be achieved by XOR with 0x38 (64b) or 0x18 (32b). + * The main difference is that bit 3-5 in the bit number field needs to be + * reversed compared to the big-endian bit fields. This can be achieved by + * XOR with 0x38. * - * We also have special functions which work with an MSB0 encoding: - * on an s390x system the bits are numbered: + * We also have special functions which work with an MSB0 encoding. + * The bits are numbered: * |0..............63|64............127|128...........191|192...........255| - * and on s390: - * |0.....31|32....63|64....95|96...127|128..159|160..191|192..223|224..255| * - * The main difference is that bit 0-63 (64b) or 0-31 (32b) in the bit - * number field needs to be reversed compared to the LSB0 encoded bit - * fields. This can be achieved by XOR with 0x3f (64b) or 0x1f (32b). + * The main difference is that bit 0-63 in the bit number field needs to be + * reversed compared to the LSB0 encoded bit fields. This can be achieved by + * XOR with 0x3f. * */ @@ -64,7 +59,6 @@ \ typecheck(unsigned long *, (__addr)); \ asm volatile( \ - __barrier \ __op_string " %0,%2,%1\n" \ __barrier \ : "=d" (__old), "+Q" (*(__addr)) \ @@ -276,12 +270,32 @@ static inline int test_bit(unsigned long nr, const volatile unsigned long *ptr) return (*addr >> (nr & 7)) & 1; } +static inline int test_and_set_bit_lock(unsigned long nr, + volatile unsigned long *ptr) +{ + if (test_bit(nr, ptr)) + return 1; + return test_and_set_bit(nr, ptr); +} + +static inline void clear_bit_unlock(unsigned long nr, + volatile unsigned long *ptr) +{ + smp_mb__before_atomic(); + clear_bit(nr, ptr); +} + +static inline void __clear_bit_unlock(unsigned long nr, + volatile unsigned long *ptr) +{ + smp_mb(); + __clear_bit(nr, ptr); +} + /* * Functions which use MSB0 bit numbering. - * On an s390x system the bits are numbered: + * The bits are numbered: * |0..............63|64............127|128...........191|192...........255| - * and on s390: - * |0.....31|32....63|64....95|96...127|128..159|160..191|192..223|224..255| */ unsigned long find_first_bit_inv(const unsigned long *addr, unsigned long size); unsigned long find_next_bit_inv(const unsigned long *addr, unsigned long size, @@ -446,7 +460,6 @@ static inline int fls(int word) #include <asm-generic/bitops/ffz.h> #include <asm-generic/bitops/find.h> #include <asm-generic/bitops/hweight.h> -#include <asm-generic/bitops/lock.h> #include <asm-generic/bitops/sched.h> #include <asm-generic/bitops/le.h> #include <asm-generic/bitops/ext2-atomic-setbit.h> diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h index 096339207764..0c5d8ee657f0 100644 --- a/arch/s390/include/asm/cio.h +++ b/arch/s390/include/asm/cio.h @@ -5,6 +5,7 @@ #define _ASM_S390_CIO_H_ #include <linux/spinlock.h> +#include <linux/bitops.h> #include <asm/types.h> #define LPM_ANYPATH 0xff @@ -296,6 +297,15 @@ static inline int ccw_dev_id_is_equal(struct ccw_dev_id *dev_id1, return 0; } +/** + * pathmask_to_pos() - find the position of the left-most bit in a pathmask + * @mask: pathmask with at least one bit set + */ +static inline u8 pathmask_to_pos(u8 mask) +{ + return 8 - ffs(mask); +} + void channel_subsystem_reinit(void); extern void css_schedule_reprobe(void); diff --git a/arch/s390/include/asm/cmb.h b/arch/s390/include/asm/cmb.h index 806eac12e3bd..ed2630c23f90 100644 --- a/arch/s390/include/asm/cmb.h +++ b/arch/s390/include/asm/cmb.h @@ -6,6 +6,7 @@ struct ccw_device; extern int enable_cmf(struct ccw_device *cdev); extern int disable_cmf(struct ccw_device *cdev); +extern int __disable_cmf(struct ccw_device *cdev); extern u64 cmf_read(struct ccw_device *cdev, int index); extern int cmf_readall(struct ccw_device *cdev, struct cmbdata *data); diff --git a/arch/s390/include/asm/cmpxchg.h b/arch/s390/include/asm/cmpxchg.h index 411464f4c97a..24ea6948e32b 100644 --- a/arch/s390/include/asm/cmpxchg.h +++ b/arch/s390/include/asm/cmpxchg.h @@ -32,7 +32,7 @@ __old; \ }) -#define __cmpxchg_double_op(p1, p2, o1, o2, n1, n2, insn) \ +#define __cmpxchg_double(p1, p2, o1, o2, n1, n2) \ ({ \ register __typeof__(*(p1)) __old1 asm("2") = (o1); \ register __typeof__(*(p2)) __old2 asm("3") = (o2); \ @@ -40,7 +40,7 @@ register __typeof__(*(p2)) __new2 asm("5") = (n2); \ int cc; \ asm volatile( \ - insn " %[old],%[new],%[ptr]\n" \ + " cdsg %[old],%[new],%[ptr]\n" \ " ipm %[cc]\n" \ " srl %[cc],28" \ : [cc] "=d" (cc), [old] "+d" (__old1), "+d" (__old2) \ @@ -50,30 +50,6 @@ !cc; \ }) -#define __cmpxchg_double_4(p1, p2, o1, o2, n1, n2) \ - __cmpxchg_double_op(p1, p2, o1, o2, n1, n2, "cds") - -#define __cmpxchg_double_8(p1, p2, o1, o2, n1, n2) \ - __cmpxchg_double_op(p1, p2, o1, o2, n1, n2, "cdsg") - -extern void __cmpxchg_double_called_with_bad_pointer(void); - -#define __cmpxchg_double(p1, p2, o1, o2, n1, n2) \ -({ \ - int __ret; \ - switch (sizeof(*(p1))) { \ - case 4: \ - __ret = __cmpxchg_double_4(p1, p2, o1, o2, n1, n2); \ - break; \ - case 8: \ - __ret = __cmpxchg_double_8(p1, p2, o1, o2, n1, n2); \ - break; \ - default: \ - __cmpxchg_double_called_with_bad_pointer(); \ - } \ - __ret; \ -}) - #define cmpxchg_double(p1, p2, o1, o2, n1, n2) \ ({ \ __typeof__(p1) __p1 = (p1); \ @@ -81,7 +57,7 @@ extern void __cmpxchg_double_called_with_bad_pointer(void); BUILD_BUG_ON(sizeof(*(p1)) != sizeof(long)); \ BUILD_BUG_ON(sizeof(*(p2)) != sizeof(long)); \ VM_BUG_ON((unsigned long)((__p1) + 1) != (unsigned long)(__p2));\ - __cmpxchg_double_8(__p1, __p2, o1, o2, n1, n2); \ + __cmpxchg_double(__p1, __p2, o1, o2, n1, n2); \ }) #define system_has_cmpxchg_double() 1 diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h index 5243a8679a1d..9dd04b9e9782 100644 --- a/arch/s390/include/asm/cpu_mf.h +++ b/arch/s390/include/asm/cpu_mf.h @@ -22,15 +22,10 @@ #define CPU_MF_INT_SF_LSDA (1 << 22) /* loss of sample data alert */ #define CPU_MF_INT_CF_CACA (1 << 7) /* counter auth. change alert */ #define CPU_MF_INT_CF_LCDA (1 << 6) /* loss of counter data alert */ -#define CPU_MF_INT_RI_HALTED (1 << 5) /* run-time instr. halted */ -#define CPU_MF_INT_RI_BUF_FULL (1 << 4) /* run-time instr. program - buffer full */ - #define CPU_MF_INT_CF_MASK (CPU_MF_INT_CF_CACA|CPU_MF_INT_CF_LCDA) #define CPU_MF_INT_SF_MASK (CPU_MF_INT_SF_IAE|CPU_MF_INT_SF_ISE| \ CPU_MF_INT_SF_PRA|CPU_MF_INT_SF_SACA| \ CPU_MF_INT_SF_LSDA) -#define CPU_MF_INT_RI_MASK (CPU_MF_INT_RI_HALTED|CPU_MF_INT_RI_BUF_FULL) /* CPU measurement facility support */ static inline int cpum_cf_avail(void) diff --git a/arch/s390/include/asm/ctl_reg.h b/arch/s390/include/asm/ctl_reg.h index 17a373576868..d7697ab802f6 100644 --- a/arch/s390/include/asm/ctl_reg.h +++ b/arch/s390/include/asm/ctl_reg.h @@ -46,8 +46,6 @@ static inline void __ctl_clear_bit(unsigned int cr, unsigned int bit) __ctl_load(reg, cr, cr); } -void __ctl_set_vx(void); - void smp_ctl_set_bit(int cr, int bit); void smp_ctl_clear_bit(int cr, int bit); diff --git a/arch/s390/include/asm/diag.h b/arch/s390/include/asm/diag.h index 7e91c58072e2..5fac921c1c42 100644 --- a/arch/s390/include/asm/diag.h +++ b/arch/s390/include/asm/diag.h @@ -8,6 +8,34 @@ #ifndef _ASM_S390_DIAG_H #define _ASM_S390_DIAG_H +#include <linux/percpu.h> + +enum diag_stat_enum { + DIAG_STAT_X008, + DIAG_STAT_X00C, + DIAG_STAT_X010, + DIAG_STAT_X014, + DIAG_STAT_X044, + DIAG_STAT_X064, + DIAG_STAT_X09C, + DIAG_STAT_X0DC, + DIAG_STAT_X204, + DIAG_STAT_X210, + DIAG_STAT_X224, + DIAG_STAT_X250, + DIAG_STAT_X258, + DIAG_STAT_X288, + DIAG_STAT_X2C4, + DIAG_STAT_X2FC, + DIAG_STAT_X304, + DIAG_STAT_X308, + DIAG_STAT_X500, + NR_DIAG_STAT +}; + +void diag_stat_inc(enum diag_stat_enum nr); +void diag_stat_inc_norecursion(enum diag_stat_enum nr); + /* * Diagnose 10: Release page range */ @@ -18,6 +46,7 @@ static inline void diag10_range(unsigned long start_pfn, unsigned long num_pfn) start_addr = start_pfn << PAGE_SHIFT; end_addr = (start_pfn + num_pfn - 1) << PAGE_SHIFT; + diag_stat_inc(DIAG_STAT_X010); asm volatile( "0: diag %0,%1,0x10\n" "1:\n" diff --git a/arch/s390/include/asm/etr.h b/arch/s390/include/asm/etr.h index f7e5c36688c3..105f90e63a0e 100644 --- a/arch/s390/include/asm/etr.h +++ b/arch/s390/include/asm/etr.h @@ -211,8 +211,9 @@ static inline int etr_ptff(void *ptff_block, unsigned int func) #define ETR_PTFF_SGS 0x43 /* set gross steering rate */ /* Functions needed by the machine check handler */ -void etr_switch_to_local(void); -void etr_sync_check(void); +int etr_switch_to_local(void); +int etr_sync_check(void); +void etr_queue_work(void); /* notifier for syncs */ extern struct atomic_notifier_head s390_epoch_delta_notifier; @@ -253,7 +254,8 @@ struct stp_sstpi { } __attribute__ ((packed)); /* Functions needed by the machine check handler */ -void stp_sync_check(void); -void stp_island_check(void); +int stp_sync_check(void); +int stp_island_check(void); +void stp_queue_work(void); #endif /* __S390_ETR_H */ diff --git a/arch/s390/include/asm/fpu/api.h b/arch/s390/include/asm/fpu/api.h new file mode 100644 index 000000000000..5e04f3cbd320 --- /dev/null +++ b/arch/s390/include/asm/fpu/api.h @@ -0,0 +1,30 @@ +/* + * In-kernel FPU support functions + * + * Copyright IBM Corp. 2015 + * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> + */ + +#ifndef _ASM_S390_FPU_API_H +#define _ASM_S390_FPU_API_H + +void save_fpu_regs(void); + +static inline int test_fp_ctl(u32 fpc) +{ + u32 orig_fpc; + int rc; + + asm volatile( + " efpc %1\n" + " sfpc %2\n" + "0: sfpc %1\n" + " la %0,0\n" + "1:\n" + EX_TABLE(0b,1b) + : "=d" (rc), "=d" (orig_fpc) + : "d" (fpc), "0" (-EINVAL)); + return rc; +} + +#endif /* _ASM_S390_FPU_API_H */ diff --git a/arch/s390/include/asm/fpu-internal.h b/arch/s390/include/asm/fpu/internal.h index 55dc2c0fb40a..2559b16da525 100644 --- a/arch/s390/include/asm/fpu-internal.h +++ b/arch/s390/include/asm/fpu/internal.h @@ -1,5 +1,5 @@ /* - * General floating pointer and vector register helpers + * FPU state and register content conversion primitives * * Copyright IBM Corp. 2015 * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> @@ -8,50 +8,9 @@ #ifndef _ASM_S390_FPU_INTERNAL_H #define _ASM_S390_FPU_INTERNAL_H -#define FPU_USE_VX 1 /* Vector extension is active */ - -#ifndef __ASSEMBLY__ - -#include <linux/errno.h> #include <linux/string.h> -#include <asm/linkage.h> #include <asm/ctl_reg.h> -#include <asm/sigcontext.h> - -struct fpu { - __u32 fpc; /* Floating-point control */ - __u32 flags; - union { - void *regs; - freg_t *fprs; /* Floating-point register save area */ - __vector128 *vxrs; /* Vector register save area */ - }; -}; - -void save_fpu_regs(void); - -#define is_vx_fpu(fpu) (!!((fpu)->flags & FPU_USE_VX)) -#define is_vx_task(tsk) (!!((tsk)->thread.fpu.flags & FPU_USE_VX)) - -/* VX array structure for address operand constraints in inline assemblies */ -struct vx_array { __vector128 _[__NUM_VXRS]; }; - -static inline int test_fp_ctl(u32 fpc) -{ - u32 orig_fpc; - int rc; - - asm volatile( - " efpc %1\n" - " sfpc %2\n" - "0: sfpc %1\n" - " la %0,0\n" - "1:\n" - EX_TABLE(0b,1b) - : "=d" (rc), "=d" (orig_fpc) - : "d" (fpc), "0" (-EINVAL)); - return rc; -} +#include <asm/fpu/types.h> static inline void save_vx_regs_safe(__vector128 *vxrs) { @@ -89,7 +48,7 @@ static inline void convert_fp_to_vx(__vector128 *vxrs, freg_t *fprs) static inline void fpregs_store(_s390_fp_regs *fpregs, struct fpu *fpu) { fpregs->pad = 0; - if (is_vx_fpu(fpu)) + if (MACHINE_HAS_VX) convert_vx_to_fp((freg_t *)&fpregs->fprs, fpu->vxrs); else memcpy((freg_t *)&fpregs->fprs, fpu->fprs, @@ -98,13 +57,11 @@ static inline void fpregs_store(_s390_fp_regs *fpregs, struct fpu *fpu) static inline void fpregs_load(_s390_fp_regs *fpregs, struct fpu *fpu) { - if (is_vx_fpu(fpu)) + if (MACHINE_HAS_VX) convert_fp_to_vx(fpu->vxrs, (freg_t *)&fpregs->fprs); else memcpy(fpu->fprs, (freg_t *)&fpregs->fprs, sizeof(fpregs->fprs)); } -#endif - #endif /* _ASM_S390_FPU_INTERNAL_H */ diff --git a/arch/s390/include/asm/fpu/types.h b/arch/s390/include/asm/fpu/types.h new file mode 100644 index 000000000000..14a8b0c14f87 --- /dev/null +++ b/arch/s390/include/asm/fpu/types.h @@ -0,0 +1,25 @@ +/* + * FPU data structures + * + * Copyright IBM Corp. 2015 + * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> + */ + +#ifndef _ASM_S390_FPU_TYPES_H +#define _ASM_S390_FPU_TYPES_H + +#include <asm/sigcontext.h> + +struct fpu { + __u32 fpc; /* Floating-point control */ + union { + void *regs; + freg_t *fprs; /* Floating-point register save area */ + __vector128 *vxrs; /* Vector register save area */ + }; +}; + +/* VX array structure for address operand constraints in inline assemblies */ +struct vx_array { __vector128 _[__NUM_VXRS]; }; + +#endif /* _ASM_S390_FPU_TYPES_H */ diff --git a/arch/s390/include/asm/idle.h b/arch/s390/include/asm/idle.h index 113cd963dbbe..51ff96d9f287 100644 --- a/arch/s390/include/asm/idle.h +++ b/arch/s390/include/asm/idle.h @@ -24,4 +24,6 @@ struct s390_idle_data { extern struct device_attribute dev_attr_idle_count; extern struct device_attribute dev_attr_idle_time_us; +void psw_idle(struct s390_idle_data *, unsigned long); + #endif /* _S390_IDLE_H */ diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h index ff95d15a2384..f97b055de76a 100644 --- a/arch/s390/include/asm/irq.h +++ b/arch/s390/include/asm/irq.h @@ -47,7 +47,6 @@ enum interruption_class { IRQEXT_IUC, IRQEXT_CMS, IRQEXT_CMC, - IRQEXT_CMR, IRQEXT_FTP, IRQIO_CIO, IRQIO_QAI, @@ -96,6 +95,19 @@ enum irq_subclass { IRQ_SUBCLASS_SERVICE_SIGNAL = 9, }; +#define CR0_IRQ_SUBCLASS_MASK \ + ((1UL << (63 - 30)) /* Warning Track */ | \ + (1UL << (63 - 48)) /* Malfunction Alert */ | \ + (1UL << (63 - 49)) /* Emergency Signal */ | \ + (1UL << (63 - 50)) /* External Call */ | \ + (1UL << (63 - 52)) /* Clock Comparator */ | \ + (1UL << (63 - 53)) /* CPU Timer */ | \ + (1UL << (63 - 54)) /* Service Signal */ | \ + (1UL << (63 - 57)) /* Interrupt Key */ | \ + (1UL << (63 - 58)) /* Measurement Alert */ | \ + (1UL << (63 - 59)) /* Timing Alert */ | \ + (1UL << (63 - 62))) /* IUCV */ + void irq_subclass_register(enum irq_subclass subclass); void irq_subclass_unregister(enum irq_subclass subclass); diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 8ced426091e1..efaac2c3bb77 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -22,7 +22,7 @@ #include <linux/kvm.h> #include <asm/debug.h> #include <asm/cpu.h> -#include <asm/fpu-internal.h> +#include <asm/fpu/api.h> #include <asm/isc.h> #define KVM_MAX_VCPUS 64 @@ -644,5 +644,7 @@ static inline void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslot static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {} static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm, struct kvm_memory_slot *slot) {} +static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {} +static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {} #endif diff --git a/arch/s390/include/asm/kvm_para.h b/arch/s390/include/asm/kvm_para.h index e0f842308a68..41393052ac57 100644 --- a/arch/s390/include/asm/kvm_para.h +++ b/arch/s390/include/asm/kvm_para.h @@ -27,10 +27,9 @@ #define __S390_KVM_PARA_H #include <uapi/asm/kvm_para.h> +#include <asm/diag.h> - - -static inline long kvm_hypercall0(unsigned long nr) +static inline long __kvm_hypercall0(unsigned long nr) { register unsigned long __nr asm("1") = nr; register long __rc asm("2"); @@ -40,7 +39,13 @@ static inline long kvm_hypercall0(unsigned long nr) return __rc; } -static inline long kvm_hypercall1(unsigned long nr, unsigned long p1) +static inline long kvm_hypercall0(unsigned long nr) +{ + diag_stat_inc(DIAG_STAT_X500); + return __kvm_hypercall0(nr); +} + +static inline long __kvm_hypercall1(unsigned long nr, unsigned long p1) { register unsigned long __nr asm("1") = nr; register unsigned long __p1 asm("2") = p1; @@ -51,7 +56,13 @@ static inline long kvm_hypercall1(unsigned long nr, unsigned long p1) return __rc; } -static inline long kvm_hypercall2(unsigned long nr, unsigned long p1, +static inline long kvm_hypercall1(unsigned long nr, unsigned long p1) +{ + diag_stat_inc(DIAG_STAT_X500); + return __kvm_hypercall1(nr, p1); +} + +static inline long __kvm_hypercall2(unsigned long nr, unsigned long p1, unsigned long p2) { register unsigned long __nr asm("1") = nr; @@ -65,7 +76,14 @@ static inline long kvm_hypercall2(unsigned long nr, unsigned long p1, return __rc; } -static inline long kvm_hypercall3(unsigned long nr, unsigned long p1, +static inline long kvm_hypercall2(unsigned long nr, unsigned long p1, + unsigned long p2) +{ + diag_stat_inc(DIAG_STAT_X500); + return __kvm_hypercall2(nr, p1, p2); +} + +static inline long __kvm_hypercall3(unsigned long nr, unsigned long p1, unsigned long p2, unsigned long p3) { register unsigned long __nr asm("1") = nr; @@ -80,8 +98,14 @@ static inline long kvm_hypercall3(unsigned long nr, unsigned long p1, return __rc; } +static inline long kvm_hypercall3(unsigned long nr, unsigned long p1, + unsigned long p2, unsigned long p3) +{ + diag_stat_inc(DIAG_STAT_X500); + return __kvm_hypercall3(nr, p1, p2, p3); +} -static inline long kvm_hypercall4(unsigned long nr, unsigned long p1, +static inline long __kvm_hypercall4(unsigned long nr, unsigned long p1, unsigned long p2, unsigned long p3, unsigned long p4) { @@ -98,7 +122,15 @@ static inline long kvm_hypercall4(unsigned long nr, unsigned long p1, return __rc; } -static inline long kvm_hypercall5(unsigned long nr, unsigned long p1, +static inline long kvm_hypercall4(unsigned long nr, unsigned long p1, + unsigned long p2, unsigned long p3, + unsigned long p4) +{ + diag_stat_inc(DIAG_STAT_X500); + return __kvm_hypercall4(nr, p1, p2, p3, p4); +} + +static inline long __kvm_hypercall5(unsigned long nr, unsigned long p1, unsigned long p2, unsigned long p3, unsigned long p4, unsigned long p5) { @@ -116,7 +148,15 @@ static inline long kvm_hypercall5(unsigned long nr, unsigned long p1, return __rc; } -static inline long kvm_hypercall6(unsigned long nr, unsigned long p1, +static inline long kvm_hypercall5(unsigned long nr, unsigned long p1, + unsigned long p2, unsigned long p3, + unsigned long p4, unsigned long p5) +{ + diag_stat_inc(DIAG_STAT_X500); + return __kvm_hypercall5(nr, p1, p2, p3, p4, p5); +} + +static inline long __kvm_hypercall6(unsigned long nr, unsigned long p1, unsigned long p2, unsigned long p3, unsigned long p4, unsigned long p5, unsigned long p6) @@ -137,6 +177,15 @@ static inline long kvm_hypercall6(unsigned long nr, unsigned long p1, return __rc; } +static inline long kvm_hypercall6(unsigned long nr, unsigned long p1, + unsigned long p2, unsigned long p3, + unsigned long p4, unsigned long p5, + unsigned long p6) +{ + diag_stat_inc(DIAG_STAT_X500); + return __kvm_hypercall6(nr, p1, p2, p3, p4, p5, p6); +} + /* kvm on s390 is always paravirtualization enabled */ static inline int kvm_para_available(void) { diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 663f23e37460..afe1cfebf1a4 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -67,7 +67,7 @@ struct _lowcore { __u8 pad_0x00c4[0x00c8-0x00c4]; /* 0x00c4 */ __u32 stfl_fac_list; /* 0x00c8 */ __u8 pad_0x00cc[0x00e8-0x00cc]; /* 0x00cc */ - __u32 mcck_interruption_code[2]; /* 0x00e8 */ + __u64 mcck_interruption_code; /* 0x00e8 */ __u8 pad_0x00f0[0x00f4-0x00f0]; /* 0x00f0 */ __u32 external_damage_code; /* 0x00f4 */ __u64 failing_storage_address; /* 0x00f8 */ @@ -132,7 +132,14 @@ struct _lowcore { /* Address space pointer. */ __u64 kernel_asce; /* 0x0358 */ __u64 user_asce; /* 0x0360 */ - __u64 current_pid; /* 0x0368 */ + + /* + * The lpp and current_pid fields form a + * 64-bit value that is set as program + * parameter with the LPP instruction. + */ + __u32 lpp; /* 0x0368 */ + __u32 current_pid; /* 0x036c */ /* SMP info area */ __u32 cpu_nr; /* 0x0370 */ diff --git a/arch/s390/include/asm/nmi.h b/arch/s390/include/asm/nmi.h index 3027a5a72b74..b75fd910386a 100644 --- a/arch/s390/include/asm/nmi.h +++ b/arch/s390/include/asm/nmi.h @@ -11,51 +11,62 @@ #ifndef _ASM_S390_NMI_H #define _ASM_S390_NMI_H +#include <linux/const.h> #include <linux/types.h> -struct mci { - __u32 sd : 1; /* 00 system damage */ - __u32 pd : 1; /* 01 instruction-processing damage */ - __u32 sr : 1; /* 02 system recovery */ - __u32 : 1; /* 03 */ - __u32 cd : 1; /* 04 timing-facility damage */ - __u32 ed : 1; /* 05 external damage */ - __u32 : 1; /* 06 */ - __u32 dg : 1; /* 07 degradation */ - __u32 w : 1; /* 08 warning pending */ - __u32 cp : 1; /* 09 channel-report pending */ - __u32 sp : 1; /* 10 service-processor damage */ - __u32 ck : 1; /* 11 channel-subsystem damage */ - __u32 : 2; /* 12-13 */ - __u32 b : 1; /* 14 backed up */ - __u32 : 1; /* 15 */ - __u32 se : 1; /* 16 storage error uncorrected */ - __u32 sc : 1; /* 17 storage error corrected */ - __u32 ke : 1; /* 18 storage-key error uncorrected */ - __u32 ds : 1; /* 19 storage degradation */ - __u32 wp : 1; /* 20 psw mwp validity */ - __u32 ms : 1; /* 21 psw mask and key validity */ - __u32 pm : 1; /* 22 psw program mask and cc validity */ - __u32 ia : 1; /* 23 psw instruction address validity */ - __u32 fa : 1; /* 24 failing storage address validity */ - __u32 vr : 1; /* 25 vector register validity */ - __u32 ec : 1; /* 26 external damage code validity */ - __u32 fp : 1; /* 27 floating point register validity */ - __u32 gr : 1; /* 28 general register validity */ - __u32 cr : 1; /* 29 control register validity */ - __u32 : 1; /* 30 */ - __u32 st : 1; /* 31 storage logical validity */ - __u32 ie : 1; /* 32 indirect storage error */ - __u32 ar : 1; /* 33 access register validity */ - __u32 da : 1; /* 34 delayed access exception */ - __u32 : 7; /* 35-41 */ - __u32 pr : 1; /* 42 tod programmable register validity */ - __u32 fc : 1; /* 43 fp control register validity */ - __u32 ap : 1; /* 44 ancillary report */ - __u32 : 1; /* 45 */ - __u32 ct : 1; /* 46 cpu timer validity */ - __u32 cc : 1; /* 47 clock comparator validity */ - __u32 : 16; /* 47-63 */ +#define MCCK_CODE_SYSTEM_DAMAGE _BITUL(63) +#define MCCK_CODE_CPU_TIMER_VALID _BITUL(63 - 46) +#define MCCK_CODE_PSW_MWP_VALID _BITUL(63 - 20) +#define MCCK_CODE_PSW_IA_VALID _BITUL(63 - 23) + +#ifndef __ASSEMBLY__ + +union mci { + unsigned long val; + struct { + u64 sd : 1; /* 00 system damage */ + u64 pd : 1; /* 01 instruction-processing damage */ + u64 sr : 1; /* 02 system recovery */ + u64 : 1; /* 03 */ + u64 cd : 1; /* 04 timing-facility damage */ + u64 ed : 1; /* 05 external damage */ + u64 : 1; /* 06 */ + u64 dg : 1; /* 07 degradation */ + u64 w : 1; /* 08 warning pending */ + u64 cp : 1; /* 09 channel-report pending */ + u64 sp : 1; /* 10 service-processor damage */ + u64 ck : 1; /* 11 channel-subsystem damage */ + u64 : 2; /* 12-13 */ + u64 b : 1; /* 14 backed up */ + u64 : 1; /* 15 */ + u64 se : 1; /* 16 storage error uncorrected */ + u64 sc : 1; /* 17 storage error corrected */ + u64 ke : 1; /* 18 storage-key error uncorrected */ + u64 ds : 1; /* 19 storage degradation */ + u64 wp : 1; /* 20 psw mwp validity */ + u64 ms : 1; /* 21 psw mask and key validity */ + u64 pm : 1; /* 22 psw program mask and cc validity */ + u64 ia : 1; /* 23 psw instruction address validity */ + u64 fa : 1; /* 24 failing storage address validity */ + u64 vr : 1; /* 25 vector register validity */ + u64 ec : 1; /* 26 external damage code validity */ + u64 fp : 1; /* 27 floating point register validity */ + u64 gr : 1; /* 28 general register validity */ + u64 cr : 1; /* 29 control register validity */ + u64 : 1; /* 30 */ + u64 st : 1; /* 31 storage logical validity */ + u64 ie : 1; /* 32 indirect storage error */ + u64 ar : 1; /* 33 access register validity */ + u64 da : 1; /* 34 delayed access exception */ + u64 : 7; /* 35-41 */ + u64 pr : 1; /* 42 tod programmable register validity */ + u64 fc : 1; /* 43 fp control register validity */ + u64 ap : 1; /* 44 ancillary report */ + u64 : 1; /* 45 */ + u64 ct : 1; /* 46 cpu timer validity */ + u64 cc : 1; /* 47 clock comparator validity */ + u64 : 16; /* 47-63 */ + }; }; struct pt_regs; @@ -63,4 +74,5 @@ struct pt_regs; extern void s390_handle_mcck(void); extern void s390_do_machine_check(struct pt_regs *regs); +#endif /* __ASSEMBLY__ */ #endif /* _ASM_S390_NMI_H */ diff --git a/arch/s390/include/asm/numa.h b/arch/s390/include/asm/numa.h index 2a0efc63b9e5..dc19ee0c92aa 100644 --- a/arch/s390/include/asm/numa.h +++ b/arch/s390/include/asm/numa.h @@ -19,7 +19,7 @@ int numa_pfn_to_nid(unsigned long pfn); int __node_distance(int a, int b); void numa_update_cpu_topology(void); -extern cpumask_var_t node_to_cpumask_map[MAX_NUMNODES]; +extern cpumask_t node_to_cpumask_map[MAX_NUMNODES]; extern int numa_debug_enabled; #else diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 34d960353a08..c873e682b67f 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -62,6 +62,8 @@ struct zpci_bar_struct { u8 size; /* order 2 exponent */ }; +struct s390_domain; + /* Private data per function */ struct zpci_dev { struct pci_dev *pdev; @@ -118,6 +120,8 @@ struct zpci_dev { struct dentry *debugfs_dev; struct dentry *debugfs_perf; + + struct s390_domain *s390_domain; /* s390 IOMMU domain data */ }; static inline bool zdev_enabled(struct zpci_dev *zdev) diff --git a/arch/s390/include/asm/pci_dma.h b/arch/s390/include/asm/pci_dma.h index 30b4c179c38c..7a7abf1a5537 100644 --- a/arch/s390/include/asm/pci_dma.h +++ b/arch/s390/include/asm/pci_dma.h @@ -192,5 +192,8 @@ static inline unsigned long *get_st_pto(unsigned long entry) /* Prototypes */ int zpci_dma_init_device(struct zpci_dev *); void zpci_dma_exit_device(struct zpci_dev *); - +void dma_free_seg_table(unsigned long); +unsigned long *dma_alloc_cpu_table(void); +void dma_cleanup_tables(unsigned long *); +void dma_update_cpu_trans(unsigned long *, void *, dma_addr_t, int); #endif diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index bdb2f51124ed..024f85f947ae 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -193,9 +193,15 @@ static inline int is_module_addr(void *addr) #define _PAGE_UNUSED 0x080 /* SW bit for pgste usage state */ #define __HAVE_ARCH_PTE_SPECIAL +#ifdef CONFIG_MEM_SOFT_DIRTY +#define _PAGE_SOFT_DIRTY 0x002 /* SW pte soft dirty bit */ +#else +#define _PAGE_SOFT_DIRTY 0x000 +#endif + /* Set of bits not changed in pte_modify */ #define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_SPECIAL | _PAGE_DIRTY | \ - _PAGE_YOUNG) + _PAGE_YOUNG | _PAGE_SOFT_DIRTY) /* * handle_pte_fault uses pte_present and pte_none to find out the pte type @@ -285,6 +291,12 @@ static inline int is_module_addr(void *addr) #define _SEGMENT_ENTRY_READ 0x0002 /* SW segment read bit */ #define _SEGMENT_ENTRY_WRITE 0x0001 /* SW segment write bit */ +#ifdef CONFIG_MEM_SOFT_DIRTY +#define _SEGMENT_ENTRY_SOFT_DIRTY 0x4000 /* SW segment soft dirty bit */ +#else +#define _SEGMENT_ENTRY_SOFT_DIRTY 0x0000 /* SW segment soft dirty bit */ +#endif + /* * Segment table entry encoding (R = read-only, I = invalid, y = young bit): * dy..R...I...wr @@ -589,6 +601,43 @@ static inline int pmd_protnone(pmd_t pmd) } #endif +static inline int pte_soft_dirty(pte_t pte) +{ + return pte_val(pte) & _PAGE_SOFT_DIRTY; +} +#define pte_swp_soft_dirty pte_soft_dirty + +static inline pte_t pte_mksoft_dirty(pte_t pte) +{ + pte_val(pte) |= _PAGE_SOFT_DIRTY; + return pte; +} +#define pte_swp_mksoft_dirty pte_mksoft_dirty + +static inline pte_t pte_clear_soft_dirty(pte_t pte) +{ + pte_val(pte) &= ~_PAGE_SOFT_DIRTY; + return pte; +} +#define pte_swp_clear_soft_dirty pte_clear_soft_dirty + +static inline int pmd_soft_dirty(pmd_t pmd) +{ + return pmd_val(pmd) & _SEGMENT_ENTRY_SOFT_DIRTY; +} + +static inline pmd_t pmd_mksoft_dirty(pmd_t pmd) +{ + pmd_val(pmd) |= _SEGMENT_ENTRY_SOFT_DIRTY; + return pmd; +} + +static inline pmd_t pmd_clear_soft_dirty(pmd_t pmd) +{ + pmd_val(pmd) &= ~_SEGMENT_ENTRY_SOFT_DIRTY; + return pmd; +} + static inline pgste_t pgste_get_lock(pte_t *ptep) { unsigned long new = 0; @@ -889,7 +938,7 @@ static inline pte_t pte_mkclean(pte_t pte) static inline pte_t pte_mkdirty(pte_t pte) { - pte_val(pte) |= _PAGE_DIRTY; + pte_val(pte) |= _PAGE_DIRTY | _PAGE_SOFT_DIRTY; if (pte_val(pte) & _PAGE_WRITE) pte_val(pte) &= ~_PAGE_PROTECT; return pte; @@ -1218,8 +1267,10 @@ static inline int ptep_set_access_flags(struct vm_area_struct *vma, pte_t entry, int dirty) { pgste_t pgste; + pte_t oldpte; - if (pte_same(*ptep, entry)) + oldpte = *ptep; + if (pte_same(oldpte, entry)) return 0; if (mm_has_pgste(vma->vm_mm)) { pgste = pgste_get_lock(ptep); @@ -1229,7 +1280,8 @@ static inline int ptep_set_access_flags(struct vm_area_struct *vma, ptep_flush_direct(vma->vm_mm, address, ptep); if (mm_has_pgste(vma->vm_mm)) { - pgste_set_key(ptep, pgste, entry, vma->vm_mm); + if (pte_val(oldpte) & _PAGE_INVALID) + pgste_set_key(ptep, pgste, entry, vma->vm_mm); pgste = pgste_set_pte(ptep, pgste, entry); pgste_set_unlock(ptep, pgste); } else @@ -1340,7 +1392,8 @@ static inline pmd_t pmd_mkclean(pmd_t pmd) static inline pmd_t pmd_mkdirty(pmd_t pmd) { if (pmd_large(pmd)) { - pmd_val(pmd) |= _SEGMENT_ENTRY_DIRTY; + pmd_val(pmd) |= _SEGMENT_ENTRY_DIRTY | + _SEGMENT_ENTRY_SOFT_DIRTY; if (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE) pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT; } @@ -1371,7 +1424,8 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) if (pmd_large(pmd)) { pmd_val(pmd) &= _SEGMENT_ENTRY_ORIGIN_LARGE | _SEGMENT_ENTRY_DIRTY | _SEGMENT_ENTRY_YOUNG | - _SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_SPLIT; + _SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_SPLIT | + _SEGMENT_ENTRY_SOFT_DIRTY; pmd_val(pmd) |= massage_pgprot_pmd(newprot); if (!(pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY)) pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT; diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 085fb0d3c54e..b16c3d0a1b9f 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -11,15 +11,19 @@ #ifndef __ASM_S390_PROCESSOR_H #define __ASM_S390_PROCESSOR_H +#include <linux/const.h> + #define CIF_MCCK_PENDING 0 /* machine check handling is pending */ #define CIF_ASCE 1 /* user asce needs fixup / uaccess */ #define CIF_NOHZ_DELAY 2 /* delay HZ disable for a tick */ -#define CIF_FPU 3 /* restore vector registers */ +#define CIF_FPU 3 /* restore FPU registers */ +#define CIF_IGNORE_IRQ 4 /* ignore interrupt (for udelay) */ -#define _CIF_MCCK_PENDING (1<<CIF_MCCK_PENDING) -#define _CIF_ASCE (1<<CIF_ASCE) -#define _CIF_NOHZ_DELAY (1<<CIF_NOHZ_DELAY) -#define _CIF_FPU (1<<CIF_FPU) +#define _CIF_MCCK_PENDING _BITUL(CIF_MCCK_PENDING) +#define _CIF_ASCE _BITUL(CIF_ASCE) +#define _CIF_NOHZ_DELAY _BITUL(CIF_NOHZ_DELAY) +#define _CIF_FPU _BITUL(CIF_FPU) +#define _CIF_IGNORE_IRQ _BITUL(CIF_IGNORE_IRQ) #ifndef __ASSEMBLY__ @@ -30,21 +34,22 @@ #include <asm/ptrace.h> #include <asm/setup.h> #include <asm/runtime_instr.h> -#include <asm/fpu-internal.h> +#include <asm/fpu/types.h> +#include <asm/fpu/internal.h> static inline void set_cpu_flag(int flag) { - S390_lowcore.cpu_flags |= (1U << flag); + S390_lowcore.cpu_flags |= (1UL << flag); } static inline void clear_cpu_flag(int flag) { - S390_lowcore.cpu_flags &= ~(1U << flag); + S390_lowcore.cpu_flags &= ~(1UL << flag); } static inline int test_cpu_flag(int flag) { - return !!(S390_lowcore.cpu_flags & (1U << flag)); + return !!(S390_lowcore.cpu_flags & (1UL << flag)); } #define arch_needs_cpu() test_cpu_flag(CIF_NOHZ_DELAY) @@ -102,7 +107,6 @@ struct thread_struct { struct list_head list; /* cpu runtime instrumentation */ struct runtime_instr_cb *ri_cb; - int ri_signum; unsigned char trap_tdb[256]; /* Transaction abort diagnose block */ }; @@ -139,8 +143,10 @@ struct stack_frame { #define ARCH_MIN_TASKALIGN 8 +extern __vector128 init_task_fpu_regs[__NUM_VXRS]; #define INIT_THREAD { \ .ksp = sizeof(init_stack) + (unsigned long) &init_stack, \ + .fpu.regs = (void *)&init_task_fpu_regs, \ } /* @@ -217,7 +223,7 @@ static inline void __load_psw(psw_t psw) * Set PSW mask to specified value, while leaving the * PSW addr pointing to the next instruction. */ -static inline void __load_psw_mask (unsigned long mask) +static inline void __load_psw_mask(unsigned long mask) { unsigned long addr; psw_t psw; @@ -243,6 +249,16 @@ static inline unsigned long __extract_psw(void) return (((unsigned long) reg1) << 32) | ((unsigned long) reg2); } +static inline void local_mcck_enable(void) +{ + __load_psw_mask(__extract_psw() | PSW_MASK_MCHECK); +} + +static inline void local_mcck_disable(void) +{ + __load_psw_mask(__extract_psw() & ~PSW_MASK_MCHECK); +} + /* * Rewind PSW instruction address by specified number of bytes. */ @@ -266,65 +282,14 @@ void enabled_wait(void); */ static inline void __noreturn disabled_wait(unsigned long code) { - unsigned long ctl_buf; - psw_t dw_psw; - - dw_psw.mask = PSW_MASK_BASE | PSW_MASK_WAIT | PSW_MASK_BA | PSW_MASK_EA; - dw_psw.addr = code; - /* - * Store status and then load disabled wait psw, - * the processor is dead afterwards - */ - asm volatile( - " stctg 0,0,0(%2)\n" - " ni 4(%2),0xef\n" /* switch off protection */ - " lctlg 0,0,0(%2)\n" - " lghi 1,0x1000\n" - " stpt 0x328(1)\n" /* store timer */ - " stckc 0x330(1)\n" /* store clock comparator */ - " stpx 0x318(1)\n" /* store prefix register */ - " stam 0,15,0x340(1)\n"/* store access registers */ - " stfpc 0x31c(1)\n" /* store fpu control */ - " std 0,0x200(1)\n" /* store f0 */ - " std 1,0x208(1)\n" /* store f1 */ - " std 2,0x210(1)\n" /* store f2 */ - " std 3,0x218(1)\n" /* store f3 */ - " std 4,0x220(1)\n" /* store f4 */ - " std 5,0x228(1)\n" /* store f5 */ - " std 6,0x230(1)\n" /* store f6 */ - " std 7,0x238(1)\n" /* store f7 */ - " std 8,0x240(1)\n" /* store f8 */ - " std 9,0x248(1)\n" /* store f9 */ - " std 10,0x250(1)\n" /* store f10 */ - " std 11,0x258(1)\n" /* store f11 */ - " std 12,0x260(1)\n" /* store f12 */ - " std 13,0x268(1)\n" /* store f13 */ - " std 14,0x270(1)\n" /* store f14 */ - " std 15,0x278(1)\n" /* store f15 */ - " stmg 0,15,0x280(1)\n"/* store general registers */ - " stctg 0,15,0x380(1)\n"/* store control registers */ - " oi 0x384(1),0x10\n"/* fake protection bit */ - " lpswe 0(%1)" - : "=m" (ctl_buf) - : "a" (&dw_psw), "a" (&ctl_buf), "m" (dw_psw) : "cc", "0", "1"); - while (1); -} + psw_t psw; -/* - * Use to set psw mask except for the first byte which - * won't be changed by this function. - */ -static inline void -__set_psw_mask(unsigned long mask) -{ - __load_psw_mask(mask | (arch_local_save_flags() & ~(-1UL >> 8))); + psw.mask = PSW_MASK_BASE | PSW_MASK_WAIT | PSW_MASK_BA | PSW_MASK_EA; + psw.addr = code; + __load_psw(psw); + while (1); } -#define local_mcck_enable() \ - __set_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT | PSW_MASK_MCHECK) -#define local_mcck_disable() \ - __set_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT) - /* * Basic Machine Check/Program Check Handler. */ diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h index 6feda2599282..37cbc50947f2 100644 --- a/arch/s390/include/asm/ptrace.h +++ b/arch/s390/include/asm/ptrace.h @@ -6,13 +6,14 @@ #ifndef _S390_PTRACE_H #define _S390_PTRACE_H +#include <linux/const.h> #include <uapi/asm/ptrace.h> #define PIF_SYSCALL 0 /* inside a system call */ #define PIF_PER_TRAP 1 /* deliver sigtrap on return to user */ -#define _PIF_SYSCALL (1<<PIF_SYSCALL) -#define _PIF_PER_TRAP (1<<PIF_PER_TRAP) +#define _PIF_SYSCALL _BITUL(PIF_SYSCALL) +#define _PIF_PER_TRAP _BITUL(PIF_PER_TRAP) #ifndef __ASSEMBLY__ @@ -128,17 +129,17 @@ struct per_struct_kernel { static inline void set_pt_regs_flag(struct pt_regs *regs, int flag) { - regs->flags |= (1U << flag); + regs->flags |= (1UL << flag); } static inline void clear_pt_regs_flag(struct pt_regs *regs, int flag) { - regs->flags &= ~(1U << flag); + regs->flags &= ~(1UL << flag); } static inline int test_pt_regs_flag(struct pt_regs *regs, int flag) { - return !!(regs->flags & (1U << flag)); + return !!(regs->flags & (1UL << flag)); } /* diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index b8ffc1bd0a9f..23537661da0e 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -5,11 +5,38 @@ #ifndef _ASM_S390_SETUP_H #define _ASM_S390_SETUP_H +#include <linux/const.h> #include <uapi/asm/setup.h> #define PARMAREA 0x10400 +/* + * Machine features detected in head.S + */ + +#define MACHINE_FLAG_VM _BITUL(0) +#define MACHINE_FLAG_IEEE _BITUL(1) +#define MACHINE_FLAG_CSP _BITUL(2) +#define MACHINE_FLAG_MVPG _BITUL(3) +#define MACHINE_FLAG_DIAG44 _BITUL(4) +#define MACHINE_FLAG_IDTE _BITUL(5) +#define MACHINE_FLAG_DIAG9C _BITUL(6) +#define MACHINE_FLAG_KVM _BITUL(8) +#define MACHINE_FLAG_ESOP _BITUL(9) +#define MACHINE_FLAG_EDAT1 _BITUL(10) +#define MACHINE_FLAG_EDAT2 _BITUL(11) +#define MACHINE_FLAG_LPAR _BITUL(12) +#define MACHINE_FLAG_LPP _BITUL(13) +#define MACHINE_FLAG_TOPOLOGY _BITUL(14) +#define MACHINE_FLAG_TE _BITUL(15) +#define MACHINE_FLAG_TLB_LC _BITUL(17) +#define MACHINE_FLAG_VX _BITUL(18) +#define MACHINE_FLAG_CAD _BITUL(19) + +#define LPP_MAGIC _BITUL(31) +#define LPP_PFAULT_PID_MASK _AC(0xffffffff, UL) + #ifndef __ASSEMBLY__ #include <asm/lowcore.h> @@ -28,29 +55,6 @@ extern unsigned long max_physmem_end; extern void detect_memory_memblock(void); -/* - * Machine features detected in head.S - */ - -#define MACHINE_FLAG_VM (1UL << 0) -#define MACHINE_FLAG_IEEE (1UL << 1) -#define MACHINE_FLAG_CSP (1UL << 2) -#define MACHINE_FLAG_MVPG (1UL << 3) -#define MACHINE_FLAG_DIAG44 (1UL << 4) -#define MACHINE_FLAG_IDTE (1UL << 5) -#define MACHINE_FLAG_DIAG9C (1UL << 6) -#define MACHINE_FLAG_KVM (1UL << 8) -#define MACHINE_FLAG_ESOP (1UL << 9) -#define MACHINE_FLAG_EDAT1 (1UL << 10) -#define MACHINE_FLAG_EDAT2 (1UL << 11) -#define MACHINE_FLAG_LPAR (1UL << 12) -#define MACHINE_FLAG_LPP (1UL << 13) -#define MACHINE_FLAG_TOPOLOGY (1UL << 14) -#define MACHINE_FLAG_TE (1UL << 15) -#define MACHINE_FLAG_TLB_LC (1UL << 17) -#define MACHINE_FLAG_VX (1UL << 18) -#define MACHINE_FLAG_CAD (1UL << 19) - #define MACHINE_IS_VM (S390_lowcore.machine_flags & MACHINE_FLAG_VM) #define MACHINE_IS_KVM (S390_lowcore.machine_flags & MACHINE_FLAG_KVM) #define MACHINE_IS_LPAR (S390_lowcore.machine_flags & MACHINE_FLAG_LPAR) diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h index 0e37cd041241..63ebf37d3143 100644 --- a/arch/s390/include/asm/spinlock.h +++ b/arch/s390/include/asm/spinlock.h @@ -87,7 +87,6 @@ static inline void arch_spin_unlock(arch_spinlock_t *lp) { typecheck(unsigned int, lp->lock); asm volatile( - __ASM_BARRIER "st %1,%0\n" : "+Q" (lp->lock) : "d" (0) @@ -169,7 +168,6 @@ static inline int arch_write_trylock_once(arch_rwlock_t *rw) \ typecheck(unsigned int *, ptr); \ asm volatile( \ - "bcr 14,0\n" \ op_string " %0,%2,%1\n" \ : "=d" (old_val), "+Q" (*ptr) \ : "d" (op_val) \ @@ -243,7 +241,6 @@ static inline void arch_write_unlock(arch_rwlock_t *rw) rw->owner = 0; asm volatile( - __ASM_BARRIER "st %1,%0\n" : "+Q" (rw->lock) : "d" (0) diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h index dcadfde32265..12d45f0cfdd9 100644 --- a/arch/s390/include/asm/switch_to.h +++ b/arch/s390/include/asm/switch_to.h @@ -8,7 +8,7 @@ #define __ASM_SWITCH_TO_H #include <linux/thread_info.h> -#include <asm/fpu-internal.h> +#include <asm/fpu/api.h> #include <asm/ptrace.h> extern struct task_struct *__switch_to(void *, void *); diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index 4c27ec764c36..692b9247c019 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -7,6 +7,8 @@ #ifndef _ASM_THREAD_INFO_H #define _ASM_THREAD_INFO_H +#include <linux/const.h> + /* * Size of kernel stack for each process */ @@ -83,16 +85,16 @@ void arch_release_task_struct(struct task_struct *tsk); #define TIF_BLOCK_STEP 20 /* This task is block stepped */ #define TIF_UPROBE_SINGLESTEP 21 /* This task is uprobe single stepped */ -#define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME) -#define _TIF_SIGPENDING (1<<TIF_SIGPENDING) -#define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED) -#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE) -#define _TIF_SYSCALL_AUDIT (1<<TIF_SYSCALL_AUDIT) -#define _TIF_SECCOMP (1<<TIF_SECCOMP) -#define _TIF_SYSCALL_TRACEPOINT (1<<TIF_SYSCALL_TRACEPOINT) -#define _TIF_UPROBE (1<<TIF_UPROBE) -#define _TIF_31BIT (1<<TIF_31BIT) -#define _TIF_SINGLE_STEP (1<<TIF_SINGLE_STEP) +#define _TIF_NOTIFY_RESUME _BITUL(TIF_NOTIFY_RESUME) +#define _TIF_SIGPENDING _BITUL(TIF_SIGPENDING) +#define _TIF_NEED_RESCHED _BITUL(TIF_NEED_RESCHED) +#define _TIF_SYSCALL_TRACE _BITUL(TIF_SYSCALL_TRACE) +#define _TIF_SYSCALL_AUDIT _BITUL(TIF_SYSCALL_AUDIT) +#define _TIF_SECCOMP _BITUL(TIF_SECCOMP) +#define _TIF_SYSCALL_TRACEPOINT _BITUL(TIF_SYSCALL_TRACEPOINT) +#define _TIF_UPROBE _BITUL(TIF_UPROBE) +#define _TIF_31BIT _BITUL(TIF_31BIT) +#define _TIF_SINGLE_STEP _BITUL(TIF_SINGLE_STEP) #define is_32bit_task() (test_thread_flag(TIF_31BIT)) diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h index 27ebde643933..94fc55fc72ce 100644 --- a/arch/s390/include/asm/topology.h +++ b/arch/s390/include/asm/topology.h @@ -68,7 +68,7 @@ static inline int cpu_to_node(int cpu) #define cpumask_of_node cpumask_of_node static inline const struct cpumask *cpumask_of_node(int node) { - return node_to_cpumask_map[node]; + return &node_to_cpumask_map[node]; } /* diff --git a/arch/s390/include/asm/trace/diag.h b/arch/s390/include/asm/trace/diag.h new file mode 100644 index 000000000000..776f307960cc --- /dev/null +++ b/arch/s390/include/asm/trace/diag.h @@ -0,0 +1,43 @@ +/* + * Tracepoint header for s390 diagnose calls + * + * Copyright IBM Corp. 2015 + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM s390 + +#if !defined(_TRACE_S390_DIAG_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_S390_DIAG_H + +#include <linux/tracepoint.h> + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE + +#define TRACE_INCLUDE_PATH asm/trace +#define TRACE_INCLUDE_FILE diag + +TRACE_EVENT(diagnose, + TP_PROTO(unsigned short nr), + TP_ARGS(nr), + TP_STRUCT__entry( + __field(unsigned short, nr) + ), + TP_fast_assign( + __entry->nr = nr; + ), + TP_printk("nr=0x%x", __entry->nr) +); + +#ifdef CONFIG_TRACEPOINTS +void trace_diagnose_norecursion(int diag_nr); +#else +static inline void trace_diagnose_norecursion(int diag_nr) { } +#endif + +#endif /* _TRACE_S390_DIAG_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index b756c6348ac6..dc167a23b920 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -66,6 +66,8 @@ obj-$(CONFIG_UPROBES) += uprobes.o obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_cpum_cf.o perf_cpum_sf.o obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf_events.o +obj-$(CONFIG_TRACEPOINTS) += trace.o + # vdso obj-y += vdso64/ obj-$(CONFIG_COMPAT) += vdso32/ diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 48c9af7a7683..9cd248f637c7 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -23,59 +23,64 @@ int main(void) { - DEFINE(__TASK_thread_info, offsetof(struct task_struct, stack)); - DEFINE(__TASK_thread, offsetof(struct task_struct, thread)); - DEFINE(__TASK_pid, offsetof(struct task_struct, pid)); + /* task struct offsets */ + OFFSET(__TASK_thread_info, task_struct, stack); + OFFSET(__TASK_thread, task_struct, thread); + OFFSET(__TASK_pid, task_struct, pid); BLANK(); - DEFINE(__THREAD_ksp, offsetof(struct thread_struct, ksp)); - DEFINE(__THREAD_FPU_fpc, offsetof(struct thread_struct, fpu.fpc)); - DEFINE(__THREAD_FPU_flags, offsetof(struct thread_struct, fpu.flags)); - DEFINE(__THREAD_FPU_regs, offsetof(struct thread_struct, fpu.regs)); - DEFINE(__THREAD_per_cause, offsetof(struct thread_struct, per_event.cause)); - DEFINE(__THREAD_per_address, offsetof(struct thread_struct, per_event.address)); - DEFINE(__THREAD_per_paid, offsetof(struct thread_struct, per_event.paid)); - DEFINE(__THREAD_trap_tdb, offsetof(struct thread_struct, trap_tdb)); + /* thread struct offsets */ + OFFSET(__THREAD_ksp, thread_struct, ksp); + OFFSET(__THREAD_FPU_fpc, thread_struct, fpu.fpc); + OFFSET(__THREAD_FPU_regs, thread_struct, fpu.regs); + OFFSET(__THREAD_per_cause, thread_struct, per_event.cause); + OFFSET(__THREAD_per_address, thread_struct, per_event.address); + OFFSET(__THREAD_per_paid, thread_struct, per_event.paid); + OFFSET(__THREAD_trap_tdb, thread_struct, trap_tdb); BLANK(); - DEFINE(__TI_task, offsetof(struct thread_info, task)); - DEFINE(__TI_flags, offsetof(struct thread_info, flags)); - DEFINE(__TI_sysc_table, offsetof(struct thread_info, sys_call_table)); - DEFINE(__TI_cpu, offsetof(struct thread_info, cpu)); - DEFINE(__TI_precount, offsetof(struct thread_info, preempt_count)); - DEFINE(__TI_user_timer, offsetof(struct thread_info, user_timer)); - DEFINE(__TI_system_timer, offsetof(struct thread_info, system_timer)); - DEFINE(__TI_last_break, offsetof(struct thread_info, last_break)); + /* thread info offsets */ + OFFSET(__TI_task, thread_info, task); + OFFSET(__TI_flags, thread_info, flags); + OFFSET(__TI_sysc_table, thread_info, sys_call_table); + OFFSET(__TI_cpu, thread_info, cpu); + OFFSET(__TI_precount, thread_info, preempt_count); + OFFSET(__TI_user_timer, thread_info, user_timer); + OFFSET(__TI_system_timer, thread_info, system_timer); + OFFSET(__TI_last_break, thread_info, last_break); BLANK(); - DEFINE(__PT_ARGS, offsetof(struct pt_regs, args)); - DEFINE(__PT_PSW, offsetof(struct pt_regs, psw)); - DEFINE(__PT_GPRS, offsetof(struct pt_regs, gprs)); - DEFINE(__PT_ORIG_GPR2, offsetof(struct pt_regs, orig_gpr2)); - DEFINE(__PT_INT_CODE, offsetof(struct pt_regs, int_code)); - DEFINE(__PT_INT_PARM, offsetof(struct pt_regs, int_parm)); - DEFINE(__PT_INT_PARM_LONG, offsetof(struct pt_regs, int_parm_long)); - DEFINE(__PT_FLAGS, offsetof(struct pt_regs, flags)); + /* pt_regs offsets */ + OFFSET(__PT_ARGS, pt_regs, args); + OFFSET(__PT_PSW, pt_regs, psw); + OFFSET(__PT_GPRS, pt_regs, gprs); + OFFSET(__PT_ORIG_GPR2, pt_regs, orig_gpr2); + OFFSET(__PT_INT_CODE, pt_regs, int_code); + OFFSET(__PT_INT_PARM, pt_regs, int_parm); + OFFSET(__PT_INT_PARM_LONG, pt_regs, int_parm_long); + OFFSET(__PT_FLAGS, pt_regs, flags); DEFINE(__PT_SIZE, sizeof(struct pt_regs)); BLANK(); - DEFINE(__SF_BACKCHAIN, offsetof(struct stack_frame, back_chain)); - DEFINE(__SF_GPRS, offsetof(struct stack_frame, gprs)); - DEFINE(__SF_EMPTY, offsetof(struct stack_frame, empty1)); + /* stack_frame offsets */ + OFFSET(__SF_BACKCHAIN, stack_frame, back_chain); + OFFSET(__SF_GPRS, stack_frame, gprs); + OFFSET(__SF_EMPTY, stack_frame, empty1); BLANK(); /* timeval/timezone offsets for use by vdso */ - DEFINE(__VDSO_UPD_COUNT, offsetof(struct vdso_data, tb_update_count)); - DEFINE(__VDSO_XTIME_STAMP, offsetof(struct vdso_data, xtime_tod_stamp)); - DEFINE(__VDSO_XTIME_SEC, offsetof(struct vdso_data, xtime_clock_sec)); - DEFINE(__VDSO_XTIME_NSEC, offsetof(struct vdso_data, xtime_clock_nsec)); - DEFINE(__VDSO_XTIME_CRS_SEC, offsetof(struct vdso_data, xtime_coarse_sec)); - DEFINE(__VDSO_XTIME_CRS_NSEC, offsetof(struct vdso_data, xtime_coarse_nsec)); - DEFINE(__VDSO_WTOM_SEC, offsetof(struct vdso_data, wtom_clock_sec)); - DEFINE(__VDSO_WTOM_NSEC, offsetof(struct vdso_data, wtom_clock_nsec)); - DEFINE(__VDSO_WTOM_CRS_SEC, offsetof(struct vdso_data, wtom_coarse_sec)); - DEFINE(__VDSO_WTOM_CRS_NSEC, offsetof(struct vdso_data, wtom_coarse_nsec)); - DEFINE(__VDSO_TIMEZONE, offsetof(struct vdso_data, tz_minuteswest)); - DEFINE(__VDSO_ECTG_OK, offsetof(struct vdso_data, ectg_available)); - DEFINE(__VDSO_TK_MULT, offsetof(struct vdso_data, tk_mult)); - DEFINE(__VDSO_TK_SHIFT, offsetof(struct vdso_data, tk_shift)); - DEFINE(__VDSO_ECTG_BASE, offsetof(struct vdso_per_cpu_data, ectg_timer_base)); - DEFINE(__VDSO_ECTG_USER, offsetof(struct vdso_per_cpu_data, ectg_user_time)); + OFFSET(__VDSO_UPD_COUNT, vdso_data, tb_update_count); + OFFSET(__VDSO_XTIME_STAMP, vdso_data, xtime_tod_stamp); + OFFSET(__VDSO_XTIME_SEC, vdso_data, xtime_clock_sec); + OFFSET(__VDSO_XTIME_NSEC, vdso_data, xtime_clock_nsec); + OFFSET(__VDSO_XTIME_CRS_SEC, vdso_data, xtime_coarse_sec); + OFFSET(__VDSO_XTIME_CRS_NSEC, vdso_data, xtime_coarse_nsec); + OFFSET(__VDSO_WTOM_SEC, vdso_data, wtom_clock_sec); + OFFSET(__VDSO_WTOM_NSEC, vdso_data, wtom_clock_nsec); + OFFSET(__VDSO_WTOM_CRS_SEC, vdso_data, wtom_coarse_sec); + OFFSET(__VDSO_WTOM_CRS_NSEC, vdso_data, wtom_coarse_nsec); + OFFSET(__VDSO_TIMEZONE, vdso_data, tz_minuteswest); + OFFSET(__VDSO_ECTG_OK, vdso_data, ectg_available); + OFFSET(__VDSO_TK_MULT, vdso_data, tk_mult); + OFFSET(__VDSO_TK_SHIFT, vdso_data, tk_shift); + OFFSET(__VDSO_ECTG_BASE, vdso_per_cpu_data, ectg_timer_base); + OFFSET(__VDSO_ECTG_USER, vdso_per_cpu_data, ectg_user_time); + BLANK(); /* constants used by the vdso */ DEFINE(__CLOCK_REALTIME, CLOCK_REALTIME); DEFINE(__CLOCK_MONOTONIC, CLOCK_MONOTONIC); @@ -86,101 +91,105 @@ int main(void) DEFINE(__CLOCK_COARSE_RES, LOW_RES_NSEC); BLANK(); /* idle data offsets */ - DEFINE(__CLOCK_IDLE_ENTER, offsetof(struct s390_idle_data, clock_idle_enter)); - DEFINE(__CLOCK_IDLE_EXIT, offsetof(struct s390_idle_data, clock_idle_exit)); - DEFINE(__TIMER_IDLE_ENTER, offsetof(struct s390_idle_data, timer_idle_enter)); - DEFINE(__TIMER_IDLE_EXIT, offsetof(struct s390_idle_data, timer_idle_exit)); - /* lowcore offsets */ - DEFINE(__LC_EXT_PARAMS, offsetof(struct _lowcore, ext_params)); - DEFINE(__LC_EXT_CPU_ADDR, offsetof(struct _lowcore, ext_cpu_addr)); - DEFINE(__LC_EXT_INT_CODE, offsetof(struct _lowcore, ext_int_code)); - DEFINE(__LC_SVC_ILC, offsetof(struct _lowcore, svc_ilc)); - DEFINE(__LC_SVC_INT_CODE, offsetof(struct _lowcore, svc_code)); - DEFINE(__LC_PGM_ILC, offsetof(struct _lowcore, pgm_ilc)); - DEFINE(__LC_PGM_INT_CODE, offsetof(struct _lowcore, pgm_code)); - DEFINE(__LC_TRANS_EXC_CODE, offsetof(struct _lowcore, trans_exc_code)); - DEFINE(__LC_MON_CLASS_NR, offsetof(struct _lowcore, mon_class_num)); - DEFINE(__LC_PER_CODE, offsetof(struct _lowcore, per_code)); - DEFINE(__LC_PER_ATMID, offsetof(struct _lowcore, per_atmid)); - DEFINE(__LC_PER_ADDRESS, offsetof(struct _lowcore, per_address)); - DEFINE(__LC_EXC_ACCESS_ID, offsetof(struct _lowcore, exc_access_id)); - DEFINE(__LC_PER_ACCESS_ID, offsetof(struct _lowcore, per_access_id)); - DEFINE(__LC_OP_ACCESS_ID, offsetof(struct _lowcore, op_access_id)); - DEFINE(__LC_AR_MODE_ID, offsetof(struct _lowcore, ar_mode_id)); - DEFINE(__LC_MON_CODE, offsetof(struct _lowcore, monitor_code)); - DEFINE(__LC_SUBCHANNEL_ID, offsetof(struct _lowcore, subchannel_id)); - DEFINE(__LC_SUBCHANNEL_NR, offsetof(struct _lowcore, subchannel_nr)); - DEFINE(__LC_IO_INT_PARM, offsetof(struct _lowcore, io_int_parm)); - DEFINE(__LC_IO_INT_WORD, offsetof(struct _lowcore, io_int_word)); - DEFINE(__LC_STFL_FAC_LIST, offsetof(struct _lowcore, stfl_fac_list)); - DEFINE(__LC_MCCK_CODE, offsetof(struct _lowcore, mcck_interruption_code)); - DEFINE(__LC_MCCK_EXT_DAM_CODE, offsetof(struct _lowcore, external_damage_code)); - DEFINE(__LC_RST_OLD_PSW, offsetof(struct _lowcore, restart_old_psw)); - DEFINE(__LC_EXT_OLD_PSW, offsetof(struct _lowcore, external_old_psw)); - DEFINE(__LC_SVC_OLD_PSW, offsetof(struct _lowcore, svc_old_psw)); - DEFINE(__LC_PGM_OLD_PSW, offsetof(struct _lowcore, program_old_psw)); - DEFINE(__LC_MCK_OLD_PSW, offsetof(struct _lowcore, mcck_old_psw)); - DEFINE(__LC_IO_OLD_PSW, offsetof(struct _lowcore, io_old_psw)); - DEFINE(__LC_RST_NEW_PSW, offsetof(struct _lowcore, restart_psw)); - DEFINE(__LC_EXT_NEW_PSW, offsetof(struct _lowcore, external_new_psw)); - DEFINE(__LC_SVC_NEW_PSW, offsetof(struct _lowcore, svc_new_psw)); - DEFINE(__LC_PGM_NEW_PSW, offsetof(struct _lowcore, program_new_psw)); - DEFINE(__LC_MCK_NEW_PSW, offsetof(struct _lowcore, mcck_new_psw)); - DEFINE(__LC_IO_NEW_PSW, offsetof(struct _lowcore, io_new_psw)); + OFFSET(__CLOCK_IDLE_ENTER, s390_idle_data, clock_idle_enter); + OFFSET(__CLOCK_IDLE_EXIT, s390_idle_data, clock_idle_exit); + OFFSET(__TIMER_IDLE_ENTER, s390_idle_data, timer_idle_enter); + OFFSET(__TIMER_IDLE_EXIT, s390_idle_data, timer_idle_exit); BLANK(); - DEFINE(__LC_SAVE_AREA_SYNC, offsetof(struct _lowcore, save_area_sync)); - DEFINE(__LC_SAVE_AREA_ASYNC, offsetof(struct _lowcore, save_area_async)); - DEFINE(__LC_SAVE_AREA_RESTART, offsetof(struct _lowcore, save_area_restart)); - DEFINE(__LC_CPU_FLAGS, offsetof(struct _lowcore, cpu_flags)); - DEFINE(__LC_RETURN_PSW, offsetof(struct _lowcore, return_psw)); - DEFINE(__LC_RETURN_MCCK_PSW, offsetof(struct _lowcore, return_mcck_psw)); - DEFINE(__LC_SYNC_ENTER_TIMER, offsetof(struct _lowcore, sync_enter_timer)); - DEFINE(__LC_ASYNC_ENTER_TIMER, offsetof(struct _lowcore, async_enter_timer)); - DEFINE(__LC_MCCK_ENTER_TIMER, offsetof(struct _lowcore, mcck_enter_timer)); - DEFINE(__LC_EXIT_TIMER, offsetof(struct _lowcore, exit_timer)); - DEFINE(__LC_USER_TIMER, offsetof(struct _lowcore, user_timer)); - DEFINE(__LC_SYSTEM_TIMER, offsetof(struct _lowcore, system_timer)); - DEFINE(__LC_STEAL_TIMER, offsetof(struct _lowcore, steal_timer)); - DEFINE(__LC_LAST_UPDATE_TIMER, offsetof(struct _lowcore, last_update_timer)); - DEFINE(__LC_LAST_UPDATE_CLOCK, offsetof(struct _lowcore, last_update_clock)); - DEFINE(__LC_CURRENT, offsetof(struct _lowcore, current_task)); - DEFINE(__LC_CURRENT_PID, offsetof(struct _lowcore, current_pid)); - DEFINE(__LC_THREAD_INFO, offsetof(struct _lowcore, thread_info)); - DEFINE(__LC_KERNEL_STACK, offsetof(struct _lowcore, kernel_stack)); - DEFINE(__LC_ASYNC_STACK, offsetof(struct _lowcore, async_stack)); - DEFINE(__LC_PANIC_STACK, offsetof(struct _lowcore, panic_stack)); - DEFINE(__LC_RESTART_STACK, offsetof(struct _lowcore, restart_stack)); - DEFINE(__LC_RESTART_FN, offsetof(struct _lowcore, restart_fn)); - DEFINE(__LC_RESTART_DATA, offsetof(struct _lowcore, restart_data)); - DEFINE(__LC_RESTART_SOURCE, offsetof(struct _lowcore, restart_source)); - DEFINE(__LC_KERNEL_ASCE, offsetof(struct _lowcore, kernel_asce)); - DEFINE(__LC_USER_ASCE, offsetof(struct _lowcore, user_asce)); - DEFINE(__LC_INT_CLOCK, offsetof(struct _lowcore, int_clock)); - DEFINE(__LC_MCCK_CLOCK, offsetof(struct _lowcore, mcck_clock)); - DEFINE(__LC_MACHINE_FLAGS, offsetof(struct _lowcore, machine_flags)); - DEFINE(__LC_DUMP_REIPL, offsetof(struct _lowcore, ipib)); + /* hardware defined lowcore locations 0x000 - 0x1ff */ + OFFSET(__LC_EXT_PARAMS, _lowcore, ext_params); + OFFSET(__LC_EXT_CPU_ADDR, _lowcore, ext_cpu_addr); + OFFSET(__LC_EXT_INT_CODE, _lowcore, ext_int_code); + OFFSET(__LC_SVC_ILC, _lowcore, svc_ilc); + OFFSET(__LC_SVC_INT_CODE, _lowcore, svc_code); + OFFSET(__LC_PGM_ILC, _lowcore, pgm_ilc); + OFFSET(__LC_PGM_INT_CODE, _lowcore, pgm_code); + OFFSET(__LC_DATA_EXC_CODE, _lowcore, data_exc_code); + OFFSET(__LC_MON_CLASS_NR, _lowcore, mon_class_num); + OFFSET(__LC_PER_CODE, _lowcore, per_code); + OFFSET(__LC_PER_ATMID, _lowcore, per_atmid); + OFFSET(__LC_PER_ADDRESS, _lowcore, per_address); + OFFSET(__LC_EXC_ACCESS_ID, _lowcore, exc_access_id); + OFFSET(__LC_PER_ACCESS_ID, _lowcore, per_access_id); + OFFSET(__LC_OP_ACCESS_ID, _lowcore, op_access_id); + OFFSET(__LC_AR_MODE_ID, _lowcore, ar_mode_id); + OFFSET(__LC_TRANS_EXC_CODE, _lowcore, trans_exc_code); + OFFSET(__LC_MON_CODE, _lowcore, monitor_code); + OFFSET(__LC_SUBCHANNEL_ID, _lowcore, subchannel_id); + OFFSET(__LC_SUBCHANNEL_NR, _lowcore, subchannel_nr); + OFFSET(__LC_IO_INT_PARM, _lowcore, io_int_parm); + OFFSET(__LC_IO_INT_WORD, _lowcore, io_int_word); + OFFSET(__LC_STFL_FAC_LIST, _lowcore, stfl_fac_list); + OFFSET(__LC_MCCK_CODE, _lowcore, mcck_interruption_code); + OFFSET(__LC_MCCK_FAIL_STOR_ADDR, _lowcore, failing_storage_address); + OFFSET(__LC_LAST_BREAK, _lowcore, breaking_event_addr); + OFFSET(__LC_RST_OLD_PSW, _lowcore, restart_old_psw); + OFFSET(__LC_EXT_OLD_PSW, _lowcore, external_old_psw); + OFFSET(__LC_SVC_OLD_PSW, _lowcore, svc_old_psw); + OFFSET(__LC_PGM_OLD_PSW, _lowcore, program_old_psw); + OFFSET(__LC_MCK_OLD_PSW, _lowcore, mcck_old_psw); + OFFSET(__LC_IO_OLD_PSW, _lowcore, io_old_psw); + OFFSET(__LC_RST_NEW_PSW, _lowcore, restart_psw); + OFFSET(__LC_EXT_NEW_PSW, _lowcore, external_new_psw); + OFFSET(__LC_SVC_NEW_PSW, _lowcore, svc_new_psw); + OFFSET(__LC_PGM_NEW_PSW, _lowcore, program_new_psw); + OFFSET(__LC_MCK_NEW_PSW, _lowcore, mcck_new_psw); + OFFSET(__LC_IO_NEW_PSW, _lowcore, io_new_psw); + /* software defined lowcore locations 0x200 - 0xdff*/ + OFFSET(__LC_SAVE_AREA_SYNC, _lowcore, save_area_sync); + OFFSET(__LC_SAVE_AREA_ASYNC, _lowcore, save_area_async); + OFFSET(__LC_SAVE_AREA_RESTART, _lowcore, save_area_restart); + OFFSET(__LC_CPU_FLAGS, _lowcore, cpu_flags); + OFFSET(__LC_RETURN_PSW, _lowcore, return_psw); + OFFSET(__LC_RETURN_MCCK_PSW, _lowcore, return_mcck_psw); + OFFSET(__LC_SYNC_ENTER_TIMER, _lowcore, sync_enter_timer); + OFFSET(__LC_ASYNC_ENTER_TIMER, _lowcore, async_enter_timer); + OFFSET(__LC_MCCK_ENTER_TIMER, _lowcore, mcck_enter_timer); + OFFSET(__LC_EXIT_TIMER, _lowcore, exit_timer); + OFFSET(__LC_USER_TIMER, _lowcore, user_timer); + OFFSET(__LC_SYSTEM_TIMER, _lowcore, system_timer); + OFFSET(__LC_STEAL_TIMER, _lowcore, steal_timer); + OFFSET(__LC_LAST_UPDATE_TIMER, _lowcore, last_update_timer); + OFFSET(__LC_LAST_UPDATE_CLOCK, _lowcore, last_update_clock); + OFFSET(__LC_INT_CLOCK, _lowcore, int_clock); + OFFSET(__LC_MCCK_CLOCK, _lowcore, mcck_clock); + OFFSET(__LC_CURRENT, _lowcore, current_task); + OFFSET(__LC_THREAD_INFO, _lowcore, thread_info); + OFFSET(__LC_KERNEL_STACK, _lowcore, kernel_stack); + OFFSET(__LC_ASYNC_STACK, _lowcore, async_stack); + OFFSET(__LC_PANIC_STACK, _lowcore, panic_stack); + OFFSET(__LC_RESTART_STACK, _lowcore, restart_stack); + OFFSET(__LC_RESTART_FN, _lowcore, restart_fn); + OFFSET(__LC_RESTART_DATA, _lowcore, restart_data); + OFFSET(__LC_RESTART_SOURCE, _lowcore, restart_source); + OFFSET(__LC_USER_ASCE, _lowcore, user_asce); + OFFSET(__LC_LPP, _lowcore, lpp); + OFFSET(__LC_CURRENT_PID, _lowcore, current_pid); + OFFSET(__LC_PERCPU_OFFSET, _lowcore, percpu_offset); + OFFSET(__LC_VDSO_PER_CPU, _lowcore, vdso_per_cpu_data); + OFFSET(__LC_MACHINE_FLAGS, _lowcore, machine_flags); + OFFSET(__LC_GMAP, _lowcore, gmap); + OFFSET(__LC_PASTE, _lowcore, paste); + /* software defined ABI-relevant lowcore locations 0xe00 - 0xe20 */ + OFFSET(__LC_DUMP_REIPL, _lowcore, ipib); + /* hardware defined lowcore locations 0x1000 - 0x18ff */ + OFFSET(__LC_VX_SAVE_AREA_ADDR, _lowcore, vector_save_area_addr); + OFFSET(__LC_EXT_PARAMS2, _lowcore, ext_params2); + OFFSET(SAVE_AREA_BASE, _lowcore, floating_pt_save_area); + OFFSET(__LC_FPREGS_SAVE_AREA, _lowcore, floating_pt_save_area); + OFFSET(__LC_GPREGS_SAVE_AREA, _lowcore, gpregs_save_area); + OFFSET(__LC_PSW_SAVE_AREA, _lowcore, psw_save_area); + OFFSET(__LC_PREFIX_SAVE_AREA, _lowcore, prefixreg_save_area); + OFFSET(__LC_FP_CREG_SAVE_AREA, _lowcore, fpt_creg_save_area); + OFFSET(__LC_CPU_TIMER_SAVE_AREA, _lowcore, cpu_timer_save_area); + OFFSET(__LC_CLOCK_COMP_SAVE_AREA, _lowcore, clock_comp_save_area); + OFFSET(__LC_AREGS_SAVE_AREA, _lowcore, access_regs_save_area); + OFFSET(__LC_CREGS_SAVE_AREA, _lowcore, cregs_save_area); + OFFSET(__LC_PGM_TDB, _lowcore, pgm_tdb); BLANK(); - DEFINE(__LC_CPU_TIMER_SAVE_AREA, offsetof(struct _lowcore, cpu_timer_save_area)); - DEFINE(__LC_CLOCK_COMP_SAVE_AREA, offsetof(struct _lowcore, clock_comp_save_area)); - DEFINE(__LC_PSW_SAVE_AREA, offsetof(struct _lowcore, psw_save_area)); - DEFINE(__LC_PREFIX_SAVE_AREA, offsetof(struct _lowcore, prefixreg_save_area)); - DEFINE(__LC_AREGS_SAVE_AREA, offsetof(struct _lowcore, access_regs_save_area)); - DEFINE(__LC_FPREGS_SAVE_AREA, offsetof(struct _lowcore, floating_pt_save_area)); - DEFINE(__LC_GPREGS_SAVE_AREA, offsetof(struct _lowcore, gpregs_save_area)); - DEFINE(__LC_CREGS_SAVE_AREA, offsetof(struct _lowcore, cregs_save_area)); - DEFINE(__LC_DATA_EXC_CODE, offsetof(struct _lowcore, data_exc_code)); - DEFINE(__LC_MCCK_FAIL_STOR_ADDR, offsetof(struct _lowcore, failing_storage_address)); - DEFINE(__LC_VX_SAVE_AREA_ADDR, offsetof(struct _lowcore, vector_save_area_addr)); - DEFINE(__LC_EXT_PARAMS2, offsetof(struct _lowcore, ext_params2)); - DEFINE(SAVE_AREA_BASE, offsetof(struct _lowcore, floating_pt_save_area)); - DEFINE(__LC_PASTE, offsetof(struct _lowcore, paste)); - DEFINE(__LC_FP_CREG_SAVE_AREA, offsetof(struct _lowcore, fpt_creg_save_area)); - DEFINE(__LC_LAST_BREAK, offsetof(struct _lowcore, breaking_event_addr)); - DEFINE(__LC_VDSO_PER_CPU, offsetof(struct _lowcore, vdso_per_cpu_data)); - DEFINE(__LC_GMAP, offsetof(struct _lowcore, gmap)); - DEFINE(__LC_PGM_TDB, offsetof(struct _lowcore, pgm_tdb)); - DEFINE(__GMAP_ASCE, offsetof(struct gmap, asce)); - DEFINE(__SIE_PROG0C, offsetof(struct kvm_s390_sie_block, prog0c)); - DEFINE(__SIE_PROG20, offsetof(struct kvm_s390_sie_block, prog20)); + /* gmap/sie offsets */ + OFFSET(__GMAP_ASCE, gmap, asce); + OFFSET(__SIE_PROG0C, kvm_s390_sie_block, prog0c); + OFFSET(__SIE_PROG20, kvm_s390_sie_block, prog20); return 0; } diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c index e0f9d270b30f..66c94417c0ba 100644 --- a/arch/s390/kernel/compat_signal.c +++ b/arch/s390/kernel/compat_signal.c @@ -249,7 +249,7 @@ static int save_sigregs_ext32(struct pt_regs *regs, return -EFAULT; /* Save vector registers to signal stack */ - if (is_vx_task(current)) { + if (MACHINE_HAS_VX) { for (i = 0; i < __NUM_VXRS_LOW; i++) vxrs[i] = *((__u64 *)(current->thread.fpu.vxrs + i) + 1); if (__copy_to_user(&sregs_ext->vxrs_low, vxrs, @@ -277,7 +277,7 @@ static int restore_sigregs_ext32(struct pt_regs *regs, *(__u32 *)®s->gprs[i] = gprs_high[i]; /* Restore vector registers from signal stack */ - if (is_vx_task(current)) { + if (MACHINE_HAS_VX) { if (__copy_from_user(vxrs, &sregs_ext->vxrs_low, sizeof(sregs_ext->vxrs_low)) || __copy_from_user(current->thread.fpu.vxrs + __NUM_VXRS_LOW, @@ -470,8 +470,7 @@ static int setup_rt_frame32(struct ksignal *ksig, sigset_t *set, */ uc_flags = UC_GPRS_HIGH; if (MACHINE_HAS_VX) { - if (is_vx_task(current)) - uc_flags |= UC_VXRS; + uc_flags |= UC_VXRS; } else frame_size -= sizeof(frame->uc.uc_mcontext_ext.vxrs_low) + sizeof(frame->uc.uc_mcontext_ext.vxrs_high); diff --git a/arch/s390/kernel/cpcmd.c b/arch/s390/kernel/cpcmd.c index 199ec92ef4fe..7f768914fb4f 100644 --- a/arch/s390/kernel/cpcmd.c +++ b/arch/s390/kernel/cpcmd.c @@ -14,6 +14,7 @@ #include <linux/spinlock.h> #include <linux/stddef.h> #include <linux/string.h> +#include <asm/diag.h> #include <asm/ebcdic.h> #include <asm/cpcmd.h> #include <asm/io.h> @@ -70,6 +71,7 @@ int __cpcmd(const char *cmd, char *response, int rlen, int *response_code) memcpy(cpcmd_buf, cmd, cmdlen); ASCEBC(cpcmd_buf, cmdlen); + diag_stat_inc(DIAG_STAT_X008); if (response) { memset(response, 0, rlen); response_len = rlen; diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index 0c6c01eb3613..171e09bb8ea2 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -32,16 +32,6 @@ static struct memblock_type oldmem_type = { .regions = &oldmem_region, }; -#define for_each_dump_mem_range(i, nid, p_start, p_end, p_nid) \ - for (i = 0, __next_mem_range(&i, nid, MEMBLOCK_NONE, \ - &memblock.physmem, \ - &oldmem_type, p_start, \ - p_end, p_nid); \ - i != (u64)ULLONG_MAX; \ - __next_mem_range(&i, nid, MEMBLOCK_NONE, &memblock.physmem,\ - &oldmem_type, \ - p_start, p_end, p_nid)) - struct dump_save_areas dump_save_areas; /* @@ -515,7 +505,8 @@ static int get_mem_chunk_cnt(void) int cnt = 0; u64 idx; - for_each_dump_mem_range(idx, NUMA_NO_NODE, NULL, NULL, NULL) + for_each_mem_range(idx, &memblock.physmem, &oldmem_type, NUMA_NO_NODE, + MEMBLOCK_NONE, NULL, NULL, NULL) cnt++; return cnt; } @@ -528,7 +519,8 @@ static void loads_init(Elf64_Phdr *phdr, u64 loads_offset) phys_addr_t start, end; u64 idx; - for_each_dump_mem_range(idx, NUMA_NO_NODE, &start, &end, NULL) { + for_each_mem_range(idx, &memblock.physmem, &oldmem_type, NUMA_NO_NODE, + MEMBLOCK_NONE, &start, &end, NULL) { phdr->p_filesz = end - start; phdr->p_type = PT_LOAD; phdr->p_offset = start; diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c index 2f69243bf700..f98766ede4e1 100644 --- a/arch/s390/kernel/diag.c +++ b/arch/s390/kernel/diag.c @@ -6,12 +6,137 @@ */ #include <linux/module.h> +#include <linux/cpu.h> +#include <linux/seq_file.h> +#include <linux/debugfs.h> #include <asm/diag.h> +#include <asm/trace/diag.h> + +struct diag_stat { + unsigned int counter[NR_DIAG_STAT]; +}; + +static DEFINE_PER_CPU(struct diag_stat, diag_stat); + +struct diag_desc { + int code; + char *name; +}; + +static const struct diag_desc diag_map[NR_DIAG_STAT] = { + [DIAG_STAT_X008] = { .code = 0x008, .name = "Console Function" }, + [DIAG_STAT_X00C] = { .code = 0x00c, .name = "Pseudo Timer" }, + [DIAG_STAT_X010] = { .code = 0x010, .name = "Release Pages" }, + [DIAG_STAT_X014] = { .code = 0x014, .name = "Spool File Services" }, + [DIAG_STAT_X044] = { .code = 0x044, .name = "Voluntary Timeslice End" }, + [DIAG_STAT_X064] = { .code = 0x064, .name = "NSS Manipulation" }, + [DIAG_STAT_X09C] = { .code = 0x09c, .name = "Relinquish Timeslice" }, + [DIAG_STAT_X0DC] = { .code = 0x0dc, .name = "Appldata Control" }, + [DIAG_STAT_X204] = { .code = 0x204, .name = "Logical-CPU Utilization" }, + [DIAG_STAT_X210] = { .code = 0x210, .name = "Device Information" }, + [DIAG_STAT_X224] = { .code = 0x224, .name = "EBCDIC-Name Table" }, + [DIAG_STAT_X250] = { .code = 0x250, .name = "Block I/O" }, + [DIAG_STAT_X258] = { .code = 0x258, .name = "Page-Reference Services" }, + [DIAG_STAT_X288] = { .code = 0x288, .name = "Time Bomb" }, + [DIAG_STAT_X2C4] = { .code = 0x2c4, .name = "FTP Services" }, + [DIAG_STAT_X2FC] = { .code = 0x2fc, .name = "Guest Performance Data" }, + [DIAG_STAT_X304] = { .code = 0x304, .name = "Partition-Resource Service" }, + [DIAG_STAT_X308] = { .code = 0x308, .name = "List-Directed IPL" }, + [DIAG_STAT_X500] = { .code = 0x500, .name = "Virtio Service" }, +}; + +static int show_diag_stat(struct seq_file *m, void *v) +{ + struct diag_stat *stat; + unsigned long n = (unsigned long) v - 1; + int cpu, prec, tmp; + + get_online_cpus(); + if (n == 0) { + seq_puts(m, " "); + + for_each_online_cpu(cpu) { + prec = 10; + for (tmp = 10; cpu >= tmp; tmp *= 10) + prec--; + seq_printf(m, "%*s%d", prec, "CPU", cpu); + } + seq_putc(m, '\n'); + } else if (n <= NR_DIAG_STAT) { + seq_printf(m, "diag %03x:", diag_map[n-1].code); + for_each_online_cpu(cpu) { + stat = &per_cpu(diag_stat, cpu); + seq_printf(m, " %10u", stat->counter[n-1]); + } + seq_printf(m, " %s\n", diag_map[n-1].name); + } + put_online_cpus(); + return 0; +} + +static void *show_diag_stat_start(struct seq_file *m, loff_t *pos) +{ + return *pos <= nr_cpu_ids ? (void *)((unsigned long) *pos + 1) : NULL; +} + +static void *show_diag_stat_next(struct seq_file *m, void *v, loff_t *pos) +{ + ++*pos; + return show_diag_stat_start(m, pos); +} + +static void show_diag_stat_stop(struct seq_file *m, void *v) +{ +} + +static const struct seq_operations show_diag_stat_sops = { + .start = show_diag_stat_start, + .next = show_diag_stat_next, + .stop = show_diag_stat_stop, + .show = show_diag_stat, +}; + +static int show_diag_stat_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &show_diag_stat_sops); +} + +static const struct file_operations show_diag_stat_fops = { + .open = show_diag_stat_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + + +static int __init show_diag_stat_init(void) +{ + debugfs_create_file("diag_stat", 0400, NULL, NULL, + &show_diag_stat_fops); + return 0; +} + +device_initcall(show_diag_stat_init); + +void diag_stat_inc(enum diag_stat_enum nr) +{ + this_cpu_inc(diag_stat.counter[nr]); + trace_diagnose(diag_map[nr].code); +} +EXPORT_SYMBOL(diag_stat_inc); + +void diag_stat_inc_norecursion(enum diag_stat_enum nr) +{ + this_cpu_inc(diag_stat.counter[nr]); + trace_diagnose_norecursion(diag_map[nr].code); +} +EXPORT_SYMBOL(diag_stat_inc_norecursion); /* * Diagnose 14: Input spool file manipulation */ -int diag14(unsigned long rx, unsigned long ry1, unsigned long subcode) +static inline int __diag14(unsigned long rx, unsigned long ry1, + unsigned long subcode) { register unsigned long _ry1 asm("2") = ry1; register unsigned long _ry2 asm("3") = subcode; @@ -29,6 +154,12 @@ int diag14(unsigned long rx, unsigned long ry1, unsigned long subcode) return rc; } + +int diag14(unsigned long rx, unsigned long ry1, unsigned long subcode) +{ + diag_stat_inc(DIAG_STAT_X014); + return __diag14(rx, ry1, subcode); +} EXPORT_SYMBOL(diag14); /* @@ -48,6 +179,7 @@ int diag210(struct diag210 *addr) spin_lock_irqsave(&diag210_lock, flags); diag210_tmp = *addr; + diag_stat_inc(DIAG_STAT_X210); asm volatile( " lhi %0,-1\n" " sam31\n" diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 549a73a4b543..3c31609df959 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -17,6 +17,7 @@ #include <linux/pfn.h> #include <linux/uaccess.h> #include <linux/kernel.h> +#include <asm/diag.h> #include <asm/ebcdic.h> #include <asm/ipl.h> #include <asm/lowcore.h> @@ -286,6 +287,7 @@ static __init void detect_diag9c(void) int rc; cpu_address = stap(); + diag_stat_inc(DIAG_STAT_X09C); asm volatile( " diag %2,0,0x9c\n" "0: la %0,0\n" @@ -300,6 +302,7 @@ static __init void detect_diag44(void) { int rc; + diag_stat_inc(DIAG_STAT_X044); asm volatile( " diag 0,0,0x44\n" "0: la %0,0\n" @@ -326,9 +329,19 @@ static __init void detect_machine_facilities(void) S390_lowcore.machine_flags |= MACHINE_FLAG_TE; if (test_facility(51)) S390_lowcore.machine_flags |= MACHINE_FLAG_TLB_LC; - if (test_facility(129)) + if (test_facility(129)) { S390_lowcore.machine_flags |= MACHINE_FLAG_VX; + __ctl_set_bit(0, 17); + } +} + +static int __init disable_vector_extension(char *str) +{ + S390_lowcore.machine_flags &= ~MACHINE_FLAG_VX; + __ctl_clear_bit(0, 17); + return 1; } +early_param("novx", disable_vector_extension); static int __init cad_setup(char *str) { diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 09b039d7983d..857b6526d298 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -20,8 +20,9 @@ #include <asm/page.h> #include <asm/sigp.h> #include <asm/irq.h> -#include <asm/fpu-internal.h> #include <asm/vx-insn.h> +#include <asm/setup.h> +#include <asm/nmi.h> __PT_R0 = __PT_GPRS __PT_R1 = __PT_GPRS + 8 @@ -139,6 +140,28 @@ _PIF_WORK = (_PIF_PER_TRAP) #endif .endm + /* + * The TSTMSK macro generates a test-under-mask instruction by + * calculating the memory offset for the specified mask value. + * Mask value can be any constant. The macro shifts the mask + * value to calculate the memory offset for the test-under-mask + * instruction. + */ + .macro TSTMSK addr, mask, size=8, bytepos=0 + .if (\bytepos < \size) && (\mask >> 8) + .if (\mask & 0xff) + .error "Mask exceeds byte boundary" + .endif + TSTMSK \addr, "(\mask >> 8)", \size, "(\bytepos + 1)" + .exitm + .endif + .ifeq \mask + .error "Mask must not be zero" + .endif + off = \size - \bytepos - 1 + tm off+\addr, \mask + .endm + .section .kprobes.text, "ax" /* @@ -164,8 +187,11 @@ ENTRY(__switch_to) stg %r15,__LC_KERNEL_STACK # store end of kernel stack lg %r15,__THREAD_ksp(%r1) # load kernel stack of next lctl %c4,%c4,__TASK_pid(%r3) # load pid to control reg. 4 - mvc __LC_CURRENT_PID+4(4,%r0),__TASK_pid(%r3) # store pid of next + mvc __LC_CURRENT_PID(4,%r0),__TASK_pid(%r3) # store pid of next lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task + TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_LPP + bzr %r14 + .insn s,0xb2800000,__LC_LPP # set program parameter br %r14 .L__critical_start: @@ -180,8 +206,8 @@ ENTRY(sie64a) stmg %r6,%r14,__SF_GPRS(%r15) # save kernel registers stg %r2,__SF_EMPTY(%r15) # save control block pointer stg %r3,__SF_EMPTY+8(%r15) # save guest register save area - xc __SF_EMPTY+16(16,%r15),__SF_EMPTY+16(%r15) # host id & reason - tm __LC_CPU_FLAGS+7,_CIF_FPU # load guest fp/vx registers ? + xc __SF_EMPTY+16(8,%r15),__SF_EMPTY+16(%r15) # reason code = 0 + TSTMSK __LC_CPU_FLAGS,_CIF_FPU # load guest fp/vx registers ? jno .Lsie_load_guest_gprs brasl %r14,load_fpu_regs # load guest fp/vx regs .Lsie_load_guest_gprs: @@ -195,16 +221,9 @@ ENTRY(sie64a) oi __SIE_PROG0C+3(%r14),1 # we are going into SIE now tm __SIE_PROG20+3(%r14),3 # last exit... jnz .Lsie_skip - tm __LC_CPU_FLAGS+7,_CIF_FPU + TSTMSK __LC_CPU_FLAGS,_CIF_FPU jo .Lsie_skip # exit if fp/vx regs changed - tm __LC_MACHINE_FLAGS+6,0x20 # MACHINE_FLAG_LPP - jz .Lsie_enter - .insn s,0xb2800000,__LC_CURRENT_PID # set guest id to pid -.Lsie_enter: sie 0(%r14) - tm __LC_MACHINE_FLAGS+6,0x20 # MACHINE_FLAG_LPP - jz .Lsie_skip - .insn s,0xb2800000,__SF_EMPTY+16(%r15)# set host id .Lsie_skip: ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE lctlg %c1,%c1,__LC_USER_ASCE # load primary asce @@ -221,11 +240,11 @@ sie_exit: lg %r14,__SF_EMPTY+8(%r15) # load guest register save area stmg %r0,%r13,0(%r14) # save guest gprs 0-13 lmg %r6,%r14,__SF_GPRS(%r15) # restore kernel registers - lg %r2,__SF_EMPTY+24(%r15) # return exit reason code + lg %r2,__SF_EMPTY+16(%r15) # return exit reason code br %r14 .Lsie_fault: lghi %r14,-EFAULT - stg %r14,__SF_EMPTY+24(%r15) # set exit reason code + stg %r14,__SF_EMPTY+16(%r15) # set exit reason code j sie_exit EX_TABLE(.Lrewind_pad,.Lsie_fault) @@ -271,7 +290,7 @@ ENTRY(system_call) stg %r2,__PT_ORIG_GPR2(%r11) stg %r7,STACK_FRAME_OVERHEAD(%r15) lgf %r9,0(%r8,%r10) # get system call add. - tm __TI_flags+7(%r12),_TIF_TRACE + TSTMSK __TI_flags(%r12),_TIF_TRACE jnz .Lsysc_tracesys basr %r14,%r9 # call sys_xxxx stg %r2,__PT_R2(%r11) # store return value @@ -279,11 +298,11 @@ ENTRY(system_call) .Lsysc_return: LOCKDEP_SYS_EXIT .Lsysc_tif: - tm __PT_FLAGS+7(%r11),_PIF_WORK + TSTMSK __PT_FLAGS(%r11),_PIF_WORK jnz .Lsysc_work - tm __TI_flags+7(%r12),_TIF_WORK + TSTMSK __TI_flags(%r12),_TIF_WORK jnz .Lsysc_work # check for work - tm __LC_CPU_FLAGS+7,_CIF_WORK + TSTMSK __LC_CPU_FLAGS,_CIF_WORK jnz .Lsysc_work .Lsysc_restore: lg %r14,__LC_VDSO_PER_CPU @@ -299,23 +318,23 @@ ENTRY(system_call) # One of the work bits is on. Find out which one. # .Lsysc_work: - tm __LC_CPU_FLAGS+7,_CIF_MCCK_PENDING + TSTMSK __LC_CPU_FLAGS,_CIF_MCCK_PENDING jo .Lsysc_mcck_pending - tm __TI_flags+7(%r12),_TIF_NEED_RESCHED + TSTMSK __TI_flags(%r12),_TIF_NEED_RESCHED jo .Lsysc_reschedule #ifdef CONFIG_UPROBES - tm __TI_flags+7(%r12),_TIF_UPROBE + TSTMSK __TI_flags(%r12),_TIF_UPROBE jo .Lsysc_uprobe_notify #endif - tm __PT_FLAGS+7(%r11),_PIF_PER_TRAP + TSTMSK __PT_FLAGS(%r11),_PIF_PER_TRAP jo .Lsysc_singlestep - tm __TI_flags+7(%r12),_TIF_SIGPENDING + TSTMSK __TI_flags(%r12),_TIF_SIGPENDING jo .Lsysc_sigpending - tm __TI_flags+7(%r12),_TIF_NOTIFY_RESUME + TSTMSK __TI_flags(%r12),_TIF_NOTIFY_RESUME jo .Lsysc_notify_resume - tm __LC_CPU_FLAGS+7,_CIF_FPU + TSTMSK __LC_CPU_FLAGS,_CIF_FPU jo .Lsysc_vxrs - tm __LC_CPU_FLAGS+7,_CIF_ASCE + TSTMSK __LC_CPU_FLAGS,_CIF_ASCE jo .Lsysc_uaccess j .Lsysc_return # beware of critical section cleanup @@ -354,7 +373,7 @@ ENTRY(system_call) .Lsysc_sigpending: lgr %r2,%r11 # pass pointer to pt_regs brasl %r14,do_signal - tm __PT_FLAGS+7(%r11),_PIF_SYSCALL + TSTMSK __PT_FLAGS(%r11),_PIF_SYSCALL jno .Lsysc_return lmg %r2,%r7,__PT_R2(%r11) # load svc arguments lg %r10,__TI_sysc_table(%r12) # address of system call table @@ -414,7 +433,7 @@ ENTRY(system_call) basr %r14,%r9 # call sys_xxx stg %r2,__PT_R2(%r11) # store return value .Lsysc_tracenogo: - tm __TI_flags+7(%r12),_TIF_TRACE + TSTMSK __TI_flags(%r12),_TIF_TRACE jz .Lsysc_return lgr %r2,%r11 # pass pointer to pt_regs larl %r14,.Lsysc_return @@ -544,6 +563,8 @@ ENTRY(io_int_handler) stmg %r8,%r9,__PT_PSW(%r11) mvc __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) + TSTMSK __LC_CPU_FLAGS,_CIF_IGNORE_IRQ + jo .Lio_restore TRACE_IRQS_OFF xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) .Lio_loop: @@ -554,7 +575,7 @@ ENTRY(io_int_handler) lghi %r3,THIN_INTERRUPT .Lio_call: brasl %r14,do_IRQ - tm __LC_MACHINE_FLAGS+6,0x10 # MACHINE_FLAG_LPAR + TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_LPAR jz .Lio_return tpi 0 jz .Lio_return @@ -564,9 +585,9 @@ ENTRY(io_int_handler) LOCKDEP_SYS_EXIT TRACE_IRQS_ON .Lio_tif: - tm __TI_flags+7(%r12),_TIF_WORK + TSTMSK __TI_flags(%r12),_TIF_WORK jnz .Lio_work # there is work to do (signals etc.) - tm __LC_CPU_FLAGS+7,_CIF_WORK + TSTMSK __LC_CPU_FLAGS,_CIF_WORK jnz .Lio_work .Lio_restore: lg %r14,__LC_VDSO_PER_CPU @@ -594,7 +615,7 @@ ENTRY(io_int_handler) # check for preemptive scheduling icm %r0,15,__TI_precount(%r12) jnz .Lio_restore # preemption is disabled - tm __TI_flags+7(%r12),_TIF_NEED_RESCHED + TSTMSK __TI_flags(%r12),_TIF_NEED_RESCHED jno .Lio_restore # switch to kernel stack lg %r1,__PT_R15(%r11) @@ -626,17 +647,17 @@ ENTRY(io_int_handler) # One of the work bits is on. Find out which one. # .Lio_work_tif: - tm __LC_CPU_FLAGS+7,_CIF_MCCK_PENDING + TSTMSK __LC_CPU_FLAGS,_CIF_MCCK_PENDING jo .Lio_mcck_pending - tm __TI_flags+7(%r12),_TIF_NEED_RESCHED + TSTMSK __TI_flags(%r12),_TIF_NEED_RESCHED jo .Lio_reschedule - tm __TI_flags+7(%r12),_TIF_SIGPENDING + TSTMSK __TI_flags(%r12),_TIF_SIGPENDING jo .Lio_sigpending - tm __TI_flags+7(%r12),_TIF_NOTIFY_RESUME + TSTMSK __TI_flags(%r12),_TIF_NOTIFY_RESUME jo .Lio_notify_resume - tm __LC_CPU_FLAGS+7,_CIF_FPU + TSTMSK __LC_CPU_FLAGS,_CIF_FPU jo .Lio_vxrs - tm __LC_CPU_FLAGS+7,_CIF_ASCE + TSTMSK __LC_CPU_FLAGS,_CIF_ASCE jo .Lio_uaccess j .Lio_return # beware of critical section cleanup @@ -719,6 +740,8 @@ ENTRY(ext_int_handler) mvc __PT_INT_PARM(4,%r11),__LC_EXT_PARAMS mvc __PT_INT_PARM_LONG(8,%r11),0(%r1) xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) + TSTMSK __LC_CPU_FLAGS,_CIF_IGNORE_IRQ + jo .Lio_restore TRACE_IRQS_OFF xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) lgr %r2,%r11 # pass pointer to pt_regs @@ -733,6 +756,14 @@ ENTRY(psw_idle) stg %r3,__SF_EMPTY(%r15) larl %r1,.Lpsw_idle_lpsw+4 stg %r1,__SF_EMPTY+8(%r15) +#ifdef CONFIG_SMP + larl %r1,smp_cpu_mtid + llgf %r1,0(%r1) + ltgr %r1,%r1 + jz .Lpsw_idle_stcctm + .insn rsy,0xeb0000000017,%r1,5,__SF_EMPTY+16(%r15) +.Lpsw_idle_stcctm: +#endif STCK __CLOCK_IDLE_ENTER(%r2) stpt __TIMER_IDLE_ENTER(%r2) .Lpsw_idle_lpsw: @@ -740,27 +771,22 @@ ENTRY(psw_idle) br %r14 .Lpsw_idle_end: -/* Store floating-point controls and floating-point or vector extension - * registers instead. A critical section cleanup assures that the registers - * are stored even if interrupted for some other work. The register %r2 - * designates a struct fpu to store register contents. If the specified - * structure does not contain a register save area, the register store is - * omitted (see also comments in arch_dup_task_struct()). - * - * The CIF_FPU flag is set in any case. The CIF_FPU triggers a lazy restore - * of the register contents at system call or io return. +/* + * Store floating-point controls and floating-point or vector register + * depending whether the vector facility is available. A critical section + * cleanup assures that the registers are stored even if interrupted for + * some other work. The CIF_FPU flag is set to trigger a lazy restore + * of the register contents at return from io or a system call. */ ENTRY(save_fpu_regs) lg %r2,__LC_CURRENT aghi %r2,__TASK_thread - tm __LC_CPU_FLAGS+7,_CIF_FPU + TSTMSK __LC_CPU_FLAGS,_CIF_FPU bor %r14 stfpc __THREAD_FPU_fpc(%r2) .Lsave_fpu_regs_fpc_end: lg %r3,__THREAD_FPU_regs(%r2) - ltgr %r3,%r3 - jz .Lsave_fpu_regs_done # no save area -> set CIF_FPU - tm __THREAD_FPU_flags+3(%r2),FPU_USE_VX + TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_VX jz .Lsave_fpu_regs_fp # no -> store FP regs .Lsave_fpu_regs_vx_low: VSTM %v0,%v15,0,%r3 # vstm 0,15,0(3) @@ -789,41 +815,30 @@ ENTRY(save_fpu_regs) br %r14 .Lsave_fpu_regs_end: -/* Load floating-point controls and floating-point or vector extension - * registers. A critical section cleanup assures that the register contents - * are loaded even if interrupted for some other work. Depending on the saved - * FP/VX state, the vector-enablement control, CR0.46, is either set or cleared. +/* + * Load floating-point controls and floating-point or vector registers. + * A critical section cleanup assures that the register contents are + * loaded even if interrupted for some other work. * * There are special calling conventions to fit into sysc and io return work: * %r15: <kernel stack> * The function requires: - * %r4 and __SF_EMPTY+32(%r15) + * %r4 */ load_fpu_regs: lg %r4,__LC_CURRENT aghi %r4,__TASK_thread - tm __LC_CPU_FLAGS+7,_CIF_FPU + TSTMSK __LC_CPU_FLAGS,_CIF_FPU bnor %r14 lfpc __THREAD_FPU_fpc(%r4) - stctg %c0,%c0,__SF_EMPTY+32(%r15) # store CR0 - tm __THREAD_FPU_flags+3(%r4),FPU_USE_VX # VX-enabled task ? + TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_VX lg %r4,__THREAD_FPU_regs(%r4) # %r4 <- reg save area - jz .Lload_fpu_regs_fp_ctl # -> no VX, load FP regs -.Lload_fpu_regs_vx_ctl: - tm __SF_EMPTY+32+5(%r15),2 # test VX control - jo .Lload_fpu_regs_vx - oi __SF_EMPTY+32+5(%r15),2 # set VX control - lctlg %c0,%c0,__SF_EMPTY+32(%r15) + jz .Lload_fpu_regs_fp # -> no VX, load FP regs .Lload_fpu_regs_vx: VLM %v0,%v15,0,%r4 .Lload_fpu_regs_vx_high: VLM %v16,%v31,256,%r4 j .Lload_fpu_regs_done -.Lload_fpu_regs_fp_ctl: - tm __SF_EMPTY+32+5(%r15),2 # test VX control - jz .Lload_fpu_regs_fp - ni __SF_EMPTY+32+5(%r15),253 # clear VX control - lctlg %c0,%c0,__SF_EMPTY+32(%r15) .Lload_fpu_regs_fp: ld 0,0(%r4) ld 1,8(%r4) @@ -846,16 +861,6 @@ load_fpu_regs: br %r14 .Lload_fpu_regs_end: -/* Test and set the vector enablement control in CR0.46 */ -ENTRY(__ctl_set_vx) - stctg %c0,%c0,__SF_EMPTY(%r15) - tm __SF_EMPTY+5(%r15),2 - bor %r14 - oi __SF_EMPTY+5(%r15),2 - lctlg %c0,%c0,__SF_EMPTY(%r15) - br %r14 -.L__ctl_set_vx_end: - .L__critical_end: /* @@ -870,11 +875,11 @@ ENTRY(mcck_int_handler) lg %r12,__LC_THREAD_INFO larl %r13,cleanup_critical lmg %r8,%r9,__LC_MCK_OLD_PSW - tm __LC_MCCK_CODE,0x80 # system damage? + TSTMSK __LC_MCCK_CODE,MCCK_CODE_SYSTEM_DAMAGE jo .Lmcck_panic # yes -> rest of mcck code invalid lghi %r14,__LC_CPU_TIMER_SAVE_AREA mvc __LC_MCCK_ENTER_TIMER(8),0(%r14) - tm __LC_MCCK_CODE+5,0x02 # stored cpu timer value valid? + TSTMSK __LC_MCCK_CODE,MCCK_CODE_CPU_TIMER_VALID jo 3f la %r14,__LC_SYNC_ENTER_TIMER clc 0(8,%r14),__LC_ASYNC_ENTER_TIMER @@ -888,7 +893,7 @@ ENTRY(mcck_int_handler) la %r14,__LC_LAST_UPDATE_TIMER 2: spt 0(%r14) mvc __LC_MCCK_ENTER_TIMER(8),0(%r14) -3: tm __LC_MCCK_CODE+2,0x09 # mwp + ia of old psw valid? +3: TSTMSK __LC_MCCK_CODE,(MCCK_CODE_PSW_MWP_VALID|MCCK_CODE_PSW_IA_VALID) jno .Lmcck_panic # no -> skip cleanup critical SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+64,__LC_MCCK_ENTER_TIMER .Lmcck_skip: @@ -908,7 +913,7 @@ ENTRY(mcck_int_handler) la %r11,STACK_FRAME_OVERHEAD(%r1) lgr %r15,%r1 ssm __LC_PGM_NEW_PSW # turn dat on, keep irqs off - tm __LC_CPU_FLAGS+7,_CIF_MCCK_PENDING + TSTMSK __LC_CPU_FLAGS,_CIF_MCCK_PENDING jno .Lmcck_return TRACE_IRQS_OFF brasl %r14,s390_handle_mcck @@ -933,7 +938,10 @@ ENTRY(mcck_int_handler) # PSW restart interrupt handler # ENTRY(restart_int_handler) - stg %r15,__LC_SAVE_AREA_RESTART + TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_LPP + jz 0f + .insn s,0xb2800000,__LC_LPP +0: stg %r15,__LC_SAVE_AREA_RESTART lg %r15,__LC_RESTART_STACK aghi %r15,-__PT_SIZE # create pt_regs on stack xc 0(__PT_SIZE,%r15),0(%r15) @@ -1011,10 +1019,6 @@ cleanup_critical: jl 0f clg %r9,BASED(.Lcleanup_table+104) # .Lload_fpu_regs_end jl .Lcleanup_load_fpu_regs - clg %r9,BASED(.Lcleanup_table+112) # __ctl_set_vx - jl 0f - clg %r9,BASED(.Lcleanup_table+120) # .L__ctl_set_vx_end - jl .Lcleanup___ctl_set_vx 0: br %r14 .align 8 @@ -1033,8 +1037,6 @@ cleanup_critical: .quad .Lsave_fpu_regs_end .quad load_fpu_regs .quad .Lload_fpu_regs_end - .quad __ctl_set_vx - .quad .L__ctl_set_vx_end #if IS_ENABLED(CONFIG_KVM) .Lcleanup_table_sie: @@ -1043,10 +1045,7 @@ cleanup_critical: .Lcleanup_sie: lg %r9,__SF_EMPTY(%r15) # get control block pointer - tm __LC_MACHINE_FLAGS+6,0x20 # MACHINE_FLAG_LPP - jz 0f - .insn s,0xb2800000,__SF_EMPTY+16(%r15)# set host id -0: ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE + ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE lctlg %c1,%c1,__LC_USER_ASCE # load primary asce larl %r9,sie_exit # skip forward to sie_exit br %r14 @@ -1159,7 +1158,27 @@ cleanup_critical: jhe 1f mvc __CLOCK_IDLE_ENTER(8,%r2),__CLOCK_IDLE_EXIT(%r2) mvc __TIMER_IDLE_ENTER(8,%r2),__TIMER_IDLE_EXIT(%r2) -1: # account system time going idle +1: # calculate idle cycles +#ifdef CONFIG_SMP + clg %r9,BASED(.Lcleanup_idle_insn) + jl 3f + larl %r1,smp_cpu_mtid + llgf %r1,0(%r1) + ltgr %r1,%r1 + jz 3f + .insn rsy,0xeb0000000017,%r1,5,__SF_EMPTY+80(%r15) + larl %r3,mt_cycles + ag %r3,__LC_PERCPU_OFFSET + la %r4,__SF_EMPTY+16(%r15) +2: lg %r0,0(%r3) + slg %r0,0(%r4) + alg %r0,64(%r4) + stg %r0,0(%r3) + la %r3,8(%r3) + la %r4,8(%r4) + brct %r1,2b +#endif +3: # account system time going idle lg %r9,__LC_STEAL_TIMER alg %r9,__CLOCK_IDLE_ENTER(%r2) slg %r9,__LC_LAST_UPDATE_CLOCK @@ -1178,7 +1197,7 @@ cleanup_critical: .quad .Lpsw_idle_lpsw .Lcleanup_save_fpu_regs: - tm __LC_CPU_FLAGS+7,_CIF_FPU + TSTMSK __LC_CPU_FLAGS,_CIF_FPU bor %r14 clg %r9,BASED(.Lcleanup_save_fpu_regs_done) jhe 5f @@ -1196,9 +1215,7 @@ cleanup_critical: stfpc __THREAD_FPU_fpc(%r2) 1: # Load register save area and check if VX is active lg %r3,__THREAD_FPU_regs(%r2) - ltgr %r3,%r3 - jz 5f # no save area -> set CIF_FPU - tm __THREAD_FPU_flags+3(%r2),FPU_USE_VX + TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_VX jz 4f # no VX -> store FP regs 2: # Store vector registers (V0-V15) VSTM %v0,%v15,0,%r3 # vstm 0,15,0(3) @@ -1238,43 +1255,27 @@ cleanup_critical: .quad .Lsave_fpu_regs_done .Lcleanup_load_fpu_regs: - tm __LC_CPU_FLAGS+7,_CIF_FPU + TSTMSK __LC_CPU_FLAGS,_CIF_FPU bnor %r14 clg %r9,BASED(.Lcleanup_load_fpu_regs_done) jhe 1f clg %r9,BASED(.Lcleanup_load_fpu_regs_fp) jhe 2f - clg %r9,BASED(.Lcleanup_load_fpu_regs_fp_ctl) - jhe 3f clg %r9,BASED(.Lcleanup_load_fpu_regs_vx_high) - jhe 4f + jhe 3f clg %r9,BASED(.Lcleanup_load_fpu_regs_vx) - jhe 5f - clg %r9,BASED(.Lcleanup_load_fpu_regs_vx_ctl) - jhe 6f + jhe 4f lg %r4,__LC_CURRENT aghi %r4,__TASK_thread lfpc __THREAD_FPU_fpc(%r4) - tm __THREAD_FPU_flags+3(%r4),FPU_USE_VX # VX-enabled task ? + TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_VX lg %r4,__THREAD_FPU_regs(%r4) # %r4 <- reg save area - jz 3f # -> no VX, load FP regs -6: # Set VX-enablement control - stctg %c0,%c0,__SF_EMPTY+32(%r15) # store CR0 - tm __SF_EMPTY+32+5(%r15),2 # test VX control - jo 5f - oi __SF_EMPTY+32+5(%r15),2 # set VX control - lctlg %c0,%c0,__SF_EMPTY+32(%r15) -5: # Load V0 ..V15 registers + jz 2f # -> no VX, load FP regs +4: # Load V0 ..V15 registers VLM %v0,%v15,0,%r4 -4: # Load V16..V31 registers +3: # Load V16..V31 registers VLM %v16,%v31,256,%r4 j 1f -3: # Clear VX-enablement control for FP - stctg %c0,%c0,__SF_EMPTY+32(%r15) # store CR0 - tm __SF_EMPTY+32+5(%r15),2 # test VX control - jz 2f - ni __SF_EMPTY+32+5(%r15),253 # clear VX control - lctlg %c0,%c0,__SF_EMPTY+32(%r15) 2: # Load floating-point registers ld 0,0(%r4) ld 1,8(%r4) @@ -1296,28 +1297,15 @@ cleanup_critical: ni __LC_CPU_FLAGS+7,255-_CIF_FPU lg %r9,48(%r11) # return from load_fpu_regs br %r14 -.Lcleanup_load_fpu_regs_vx_ctl: - .quad .Lload_fpu_regs_vx_ctl .Lcleanup_load_fpu_regs_vx: .quad .Lload_fpu_regs_vx .Lcleanup_load_fpu_regs_vx_high: .quad .Lload_fpu_regs_vx_high -.Lcleanup_load_fpu_regs_fp_ctl: - .quad .Lload_fpu_regs_fp_ctl .Lcleanup_load_fpu_regs_fp: .quad .Lload_fpu_regs_fp .Lcleanup_load_fpu_regs_done: .quad .Lload_fpu_regs_done -.Lcleanup___ctl_set_vx: - stctg %c0,%c0,__SF_EMPTY(%r15) - tm __SF_EMPTY+5(%r15),2 - bor %r14 - oi __SF_EMPTY+5(%r15),2 - lctlg %c0,%c0,__SF_EMPTY(%r15) - lg %r9,48(%r11) # return from __ctl_set_vx - br %r14 - /* * Integer constants */ diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index 834df047d35f..b7019ab74070 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -16,13 +16,10 @@ void io_int_handler(void); void mcck_int_handler(void); void restart_int_handler(void); void restart_call_handler(void); -void psw_idle(struct s390_idle_data *, unsigned long); asmlinkage long do_syscall_trace_enter(struct pt_regs *regs); asmlinkage void do_syscall_trace_exit(struct pt_regs *regs); -int alloc_vector_registers(struct task_struct *tsk); - void do_protection_exception(struct pt_regs *regs); void do_dat_exception(struct pt_regs *regs); diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index d7c00507568a..58b719fa8067 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -16,7 +16,12 @@ __HEAD ENTRY(startup_continue) - larl %r1,sched_clock_base_cc + tm __LC_STFL_FAC_LIST+6,0x80 # LPP available ? + jz 0f + xc __LC_LPP+1(7,0),__LC_LPP+1 # clear lpp and current_pid + mvi __LC_LPP,0x80 # and set LPP_MAGIC + .insn s,0xb2800000,__LC_LPP # load program parameter +0: larl %r1,sched_clock_base_cc mvc 0(8,%r1),__LC_LAST_UPDATE_CLOCK larl %r13,.LPG1 # get base lctlg %c0,%c15,.Lctl-.LPG1(%r13) # load control registers diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 52fbef91d1d9..f6d8acd7e136 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -17,6 +17,7 @@ #include <linux/gfp.h> #include <linux/crash_dump.h> #include <linux/debug_locks.h> +#include <asm/diag.h> #include <asm/ipl.h> #include <asm/smp.h> #include <asm/setup.h> @@ -165,7 +166,7 @@ static struct ipl_parameter_block *dump_block_ccw; static struct sclp_ipl_info sclp_ipl_info; -int diag308(unsigned long subcode, void *addr) +static inline int __diag308(unsigned long subcode, void *addr) { register unsigned long _addr asm("0") = (unsigned long) addr; register unsigned long _rc asm("1") = 0; @@ -178,6 +179,12 @@ int diag308(unsigned long subcode, void *addr) : "d" (subcode) : "cc", "memory"); return _rc; } + +int diag308(unsigned long subcode, void *addr) +{ + diag_stat_inc(DIAG_STAT_X308); + return __diag308(subcode, addr); +} EXPORT_SYMBOL_GPL(diag308); /* SYSFS */ diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index e9d9addfaa44..f41d5208aaf7 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -69,7 +69,6 @@ static const struct irq_class irqclass_sub_desc[] = { {.irq = IRQEXT_IUC, .name = "IUC", .desc = "[EXT] IUCV"}, {.irq = IRQEXT_CMS, .name = "CMS", .desc = "[EXT] CPU-Measurement: Sampling"}, {.irq = IRQEXT_CMC, .name = "CMC", .desc = "[EXT] CPU-Measurement: Counter"}, - {.irq = IRQEXT_CMR, .name = "CMR", .desc = "[EXT] CPU-Measurement: RI"}, {.irq = IRQEXT_FTP, .name = "FTP", .desc = "[EXT] HMC FTP Service"}, {.irq = IRQIO_CIO, .name = "CIO", .desc = "[I/O] Common I/O Layer Interrupt"}, {.irq = IRQIO_QAI, .name = "QAI", .desc = "[I/O] QDIO Adapter Interrupt"}, diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index 0ae6f8e74840..07302ce37648 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -21,19 +21,20 @@ #include <asm/nmi.h> #include <asm/crw.h> #include <asm/switch_to.h> -#include <asm/fpu-internal.h> #include <asm/ctl_reg.h> struct mcck_struct { - int kill_task; - int channel_report; - int warning; - unsigned long long mcck_code; + unsigned int kill_task : 1; + unsigned int channel_report : 1; + unsigned int warning : 1; + unsigned int etr_queue : 1; + unsigned int stp_queue : 1; + unsigned long mcck_code; }; static DEFINE_PER_CPU(struct mcck_struct, cpu_mcck); -static void s390_handle_damage(char *msg) +static void s390_handle_damage(void) { smp_send_stop(); disabled_wait((unsigned long) __builtin_return_address(0)); @@ -81,10 +82,14 @@ void s390_handle_mcck(void) if (xchg(&mchchk_wng_posted, 1) == 0) kill_cad_pid(SIGPWR, 1); } + if (mcck.etr_queue) + etr_queue_work(); + if (mcck.stp_queue) + stp_queue_work(); if (mcck.kill_task) { local_irq_enable(); printk(KERN_EMERG "mcck: Terminating task because of machine " - "malfunction (code 0x%016llx).\n", mcck.mcck_code); + "malfunction (code 0x%016lx).\n", mcck.mcck_code); printk(KERN_EMERG "mcck: task: %s, pid: %d.\n", current->comm, current->pid); do_exit(SIGSEGV); @@ -96,7 +101,7 @@ EXPORT_SYMBOL_GPL(s390_handle_mcck); * returns 0 if all registers could be validated * returns 1 otherwise */ -static int notrace s390_revalidate_registers(struct mci *mci) +static int notrace s390_validate_registers(union mci mci) { int kill_task; u64 zero; @@ -105,14 +110,14 @@ static int notrace s390_revalidate_registers(struct mci *mci) kill_task = 0; zero = 0; - if (!mci->gr) { + if (!mci.gr) { /* * General purpose registers couldn't be restored and have * unknown contents. Process needs to be terminated. */ kill_task = 1; } - if (!mci->fp) { + if (!mci.fp) { /* * Floating point registers can't be restored and * therefore the process needs to be terminated. @@ -121,7 +126,7 @@ static int notrace s390_revalidate_registers(struct mci *mci) } fpt_save_area = &S390_lowcore.floating_pt_save_area; fpt_creg_save_area = &S390_lowcore.fpt_creg_save_area; - if (!mci->fc) { + if (!mci.fc) { /* * Floating point control register can't be restored. * Task will be terminated. @@ -132,7 +137,7 @@ static int notrace s390_revalidate_registers(struct mci *mci) asm volatile("lfpc 0(%0)" : : "a" (fpt_creg_save_area)); if (!MACHINE_HAS_VX) { - /* Revalidate floating point registers */ + /* Validate floating point registers */ asm volatile( " ld 0,0(%0)\n" " ld 1,8(%0)\n" @@ -152,10 +157,10 @@ static int notrace s390_revalidate_registers(struct mci *mci) " ld 15,120(%0)\n" : : "a" (fpt_save_area)); } else { - /* Revalidate vector registers */ + /* Validate vector registers */ union ctlreg0 cr0; - if (!mci->vr) { + if (!mci.vr) { /* * Vector registers can't be restored and therefore * the process needs to be terminated. @@ -173,38 +178,38 @@ static int notrace s390_revalidate_registers(struct mci *mci) &S390_lowcore.vector_save_area) : "1"); __ctl_load(S390_lowcore.cregs_save_area[0], 0, 0); } - /* Revalidate access registers */ + /* Validate access registers */ asm volatile( " lam 0,15,0(%0)" : : "a" (&S390_lowcore.access_regs_save_area)); - if (!mci->ar) { + if (!mci.ar) { /* * Access registers have unknown contents. * Terminating task. */ kill_task = 1; } - /* Revalidate control registers */ - if (!mci->cr) { + /* Validate control registers */ + if (!mci.cr) { /* * Control registers have unknown contents. * Can't recover and therefore stopping machine. */ - s390_handle_damage("invalid control registers."); + s390_handle_damage(); } else { asm volatile( " lctlg 0,15,0(%0)" : : "a" (&S390_lowcore.cregs_save_area)); } /* - * We don't even try to revalidate the TOD register, since we simply + * We don't even try to validate the TOD register, since we simply * can't write something sensible into that register. */ /* - * See if we can revalidate the TOD programmable register with its + * See if we can validate the TOD programmable register with its * old contents (should be zero) otherwise set it to zero. */ - if (!mci->pr) + if (!mci.pr) asm volatile( " sr 0,0\n" " sckpf" @@ -215,17 +220,17 @@ static int notrace s390_revalidate_registers(struct mci *mci) " sckpf" : : "a" (&S390_lowcore.tod_progreg_save_area) : "0", "cc"); - /* Revalidate clock comparator register */ + /* Validate clock comparator register */ set_clock_comparator(S390_lowcore.clock_comparator); /* Check if old PSW is valid */ - if (!mci->wp) + if (!mci.wp) /* * Can't tell if we come from user or kernel mode * -> stopping machine. */ - s390_handle_damage("old psw invalid."); + s390_handle_damage(); - if (!mci->ms || !mci->pm || !mci->ia) + if (!mci.ms || !mci.pm || !mci.ia) kill_task = 1; return kill_task; @@ -249,21 +254,21 @@ void notrace s390_do_machine_check(struct pt_regs *regs) static unsigned long long last_ipd; struct mcck_struct *mcck; unsigned long long tmp; - struct mci *mci; + union mci mci; int umode; nmi_enter(); inc_irq_stat(NMI_NMI); - mci = (struct mci *) &S390_lowcore.mcck_interruption_code; + mci.val = S390_lowcore.mcck_interruption_code; mcck = this_cpu_ptr(&cpu_mcck); umode = user_mode(regs); - if (mci->sd) { + if (mci.sd) { /* System damage -> stopping machine */ - s390_handle_damage("received system damage machine check."); + s390_handle_damage(); } - if (mci->pd) { - if (mci->b) { + if (mci.pd) { + if (mci.b) { /* Processing backup -> verify if we can survive this */ u64 z_mcic, o_mcic, t_mcic; z_mcic = (1ULL<<63 | 1ULL<<59 | 1ULL<<29); @@ -271,12 +276,11 @@ void notrace s390_do_machine_check(struct pt_regs *regs) 1ULL<<36 | 1ULL<<35 | 1ULL<<34 | 1ULL<<32 | 1ULL<<30 | 1ULL<<21 | 1ULL<<20 | 1ULL<<17 | 1ULL<<16); - t_mcic = *(u64 *)mci; + t_mcic = mci.val; if (((t_mcic & z_mcic) != 0) || ((t_mcic & o_mcic) != o_mcic)) { - s390_handle_damage("processing backup machine " - "check with damage."); + s390_handle_damage(); } /* @@ -291,64 +295,62 @@ void notrace s390_do_machine_check(struct pt_regs *regs) ipd_count = 1; last_ipd = tmp; if (ipd_count == MAX_IPD_COUNT) - s390_handle_damage("too many ipd retries."); + s390_handle_damage(); spin_unlock(&ipd_lock); } else { /* Processing damage -> stopping machine */ - s390_handle_damage("received instruction processing " - "damage machine check."); + s390_handle_damage(); } } - if (s390_revalidate_registers(mci)) { + if (s390_validate_registers(mci)) { if (umode) { /* * Couldn't restore all register contents while in * user mode -> mark task for termination. */ mcck->kill_task = 1; - mcck->mcck_code = *(unsigned long long *) mci; + mcck->mcck_code = mci.val; set_cpu_flag(CIF_MCCK_PENDING); } else { /* * Couldn't restore all register contents while in * kernel mode -> stopping machine. */ - s390_handle_damage("unable to revalidate registers."); + s390_handle_damage(); } } - if (mci->cd) { + if (mci.cd) { /* Timing facility damage */ - s390_handle_damage("TOD clock damaged"); + s390_handle_damage(); } - if (mci->ed && mci->ec) { + if (mci.ed && mci.ec) { /* External damage */ if (S390_lowcore.external_damage_code & (1U << ED_ETR_SYNC)) - etr_sync_check(); + mcck->etr_queue |= etr_sync_check(); if (S390_lowcore.external_damage_code & (1U << ED_ETR_SWITCH)) - etr_switch_to_local(); + mcck->etr_queue |= etr_switch_to_local(); if (S390_lowcore.external_damage_code & (1U << ED_STP_SYNC)) - stp_sync_check(); + mcck->stp_queue |= stp_sync_check(); if (S390_lowcore.external_damage_code & (1U << ED_STP_ISLAND)) - stp_island_check(); + mcck->stp_queue |= stp_island_check(); + if (mcck->etr_queue || mcck->stp_queue) + set_cpu_flag(CIF_MCCK_PENDING); } - if (mci->se) + if (mci.se) /* Storage error uncorrected */ - s390_handle_damage("received storage error uncorrected " - "machine check."); - if (mci->ke) + s390_handle_damage(); + if (mci.ke) /* Storage key-error uncorrected */ - s390_handle_damage("received storage key-error uncorrected " - "machine check."); - if (mci->ds && mci->fa) + s390_handle_damage(); + if (mci.ds && mci.fa) /* Storage degradation */ - s390_handle_damage("received storage degradation machine " - "check."); - if (mci->cp) { + s390_handle_damage(); + if (mci.cp) { /* Channel report word pending */ mcck->channel_report = 1; set_cpu_flag(CIF_MCCK_PENDING); } - if (mci->w) { + if (mci.w) { /* Warning pending */ mcck->warning = 1; set_cpu_flag(CIF_MCCK_PENDING); diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index a9563409c36e..929c147e07b4 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -72,6 +72,7 @@ struct cpu_hw_events { atomic_t ctr_set[CPUMF_CTR_SET_MAX]; u64 state, tx_state; unsigned int flags; + unsigned int txn_flags; }; static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .ctr_set = { @@ -82,6 +83,7 @@ static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { }, .state = 0, .flags = 0, + .txn_flags = 0, }; static int get_counter_set(u64 event) @@ -538,7 +540,7 @@ static int cpumf_pmu_add(struct perf_event *event, int flags) * For group events transaction, the authorization check is * done in cpumf_pmu_commit_txn(). */ - if (!(cpuhw->flags & PERF_EVENT_TXN)) + if (!(cpuhw->txn_flags & PERF_PMU_TXN_ADD)) if (validate_ctr_auth(&event->hw)) return -ENOENT; @@ -576,13 +578,22 @@ static void cpumf_pmu_del(struct perf_event *event, int flags) /* * Start group events scheduling transaction. * Set flags to perform a single test at commit time. + * + * We only support PERF_PMU_TXN_ADD transactions. Save the + * transaction flags but otherwise ignore non-PERF_PMU_TXN_ADD + * transactions. */ -static void cpumf_pmu_start_txn(struct pmu *pmu) +static void cpumf_pmu_start_txn(struct pmu *pmu, unsigned int txn_flags) { struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); + WARN_ON_ONCE(cpuhw->txn_flags); /* txn already in flight */ + + cpuhw->txn_flags = txn_flags; + if (txn_flags & ~PERF_PMU_TXN_ADD) + return; + perf_pmu_disable(pmu); - cpuhw->flags |= PERF_EVENT_TXN; cpuhw->tx_state = cpuhw->state; } @@ -593,11 +604,18 @@ static void cpumf_pmu_start_txn(struct pmu *pmu) */ static void cpumf_pmu_cancel_txn(struct pmu *pmu) { + unsigned int txn_flags; struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); + WARN_ON_ONCE(!cpuhw->txn_flags); /* no txn in flight */ + + txn_flags = cpuhw->txn_flags; + cpuhw->txn_flags = 0; + if (txn_flags & ~PERF_PMU_TXN_ADD) + return; + WARN_ON(cpuhw->tx_state != cpuhw->state); - cpuhw->flags &= ~PERF_EVENT_TXN; perf_pmu_enable(pmu); } @@ -611,13 +629,20 @@ static int cpumf_pmu_commit_txn(struct pmu *pmu) struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); u64 state; + WARN_ON_ONCE(!cpuhw->txn_flags); /* no txn in flight */ + + if (cpuhw->txn_flags & ~PERF_PMU_TXN_ADD) { + cpuhw->txn_flags = 0; + return 0; + } + /* check if the updated state can be scheduled */ state = cpuhw->state & ~((1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1); state >>= CPUMF_LCCTL_ENABLE_SHIFT; if ((state & cpuhw->info.auth_ctl) != state) return -ENOENT; - cpuhw->flags &= ~PERF_EVENT_TXN; + cpuhw->txn_flags = 0; perf_pmu_enable(pmu); return 0; } diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index b973972f6ba5..3d8da1e742c2 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -1019,11 +1019,13 @@ static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr) break; } - /* The host-program-parameter (hpp) contains the pid of - * the CPU thread as set by sie64a() in entry.S. - * If non-zero assume a guest sample. + /* + * A non-zero guest program parameter indicates a guest + * sample. + * Note that some early samples might be misaccounted to + * the host. */ - if (sfr->basic.hpp) + if (sfr->basic.gpp) sde_regs->in_guest = 1; overflow = 0; diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index f2dac9f0799d..688a3aad9c79 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -23,6 +23,7 @@ #include <linux/kprobes.h> #include <linux/random.h> #include <linux/module.h> +#include <linux/init_task.h> #include <asm/io.h> #include <asm/processor.h> #include <asm/vtimer.h> @@ -36,6 +37,9 @@ asmlinkage void ret_from_fork(void) asm ("ret_from_fork"); +/* FPU save area for the init task */ +__vector128 init_task_fpu_regs[__NUM_VXRS] __init_task_data; + /* * Return saved PC of a blocked thread. used in kernel/sched. * resume in entry.S does not create a new stack frame, it @@ -87,31 +91,29 @@ void arch_release_task_struct(struct task_struct *tsk) int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) { + size_t fpu_regs_size; + *dst = *src; - /* Set up a new floating-point register save area */ - dst->thread.fpu.fpc = 0; - dst->thread.fpu.flags = 0; /* Always start with VX disabled */ - dst->thread.fpu.fprs = kzalloc(sizeof(freg_t) * __NUM_FPRS, - GFP_KERNEL|__GFP_REPEAT); - if (!dst->thread.fpu.fprs) + /* + * If the vector extension is available, it is enabled for all tasks, + * and, thus, the FPU register save area must be allocated accordingly. + */ + fpu_regs_size = MACHINE_HAS_VX ? sizeof(__vector128) * __NUM_VXRS + : sizeof(freg_t) * __NUM_FPRS; + dst->thread.fpu.regs = kzalloc(fpu_regs_size, GFP_KERNEL|__GFP_REPEAT); + if (!dst->thread.fpu.regs) return -ENOMEM; /* * Save the floating-point or vector register state of the current - * task. The state is not saved for early kernel threads, for example, - * the init_task, which do not have an allocated save area. - * The CIF_FPU flag is set in any case to lazy clear or restore a saved - * state when switching to a different task or returning to user space. + * task and set the CIF_FPU flag to lazy restore the FPU register + * state when returning to user space. */ save_fpu_regs(); dst->thread.fpu.fpc = current->thread.fpu.fpc; - if (is_vx_task(current)) - convert_vx_to_fp(dst->thread.fpu.fprs, - current->thread.fpu.vxrs); - else - memcpy(dst->thread.fpu.fprs, current->thread.fpu.fprs, - sizeof(freg_t) * __NUM_FPRS); + memcpy(dst->thread.fpu.regs, current->thread.fpu.regs, fpu_regs_size); + return 0; } @@ -169,7 +171,6 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp, /* Don't copy runtime instrumentation info */ p->thread.ri_cb = NULL; - p->thread.ri_signum = 0; frame->childregs.psw.mask &= ~PSW_MASK_RI; /* Set a new TLS ? */ @@ -199,7 +200,7 @@ int dump_fpu (struct pt_regs * regs, s390_fp_regs *fpregs) save_fpu_regs(); fpregs->fpc = current->thread.fpu.fpc; fpregs->pad = 0; - if (is_vx_task(current)) + if (MACHINE_HAS_VX) convert_vx_to_fp((freg_t *)&fpregs->fprs, current->thread.fpu.vxrs); else diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index e6e077ae3990..7ce00e7a709a 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -11,6 +11,7 @@ #include <linux/seq_file.h> #include <linux/delay.h> #include <linux/cpu.h> +#include <asm/diag.h> #include <asm/elf.h> #include <asm/lowcore.h> #include <asm/param.h> @@ -20,8 +21,10 @@ static DEFINE_PER_CPU(struct cpuid, cpu_id); void notrace cpu_relax(void) { - if (!smp_cpu_mtid && MACHINE_HAS_DIAG44) + if (!smp_cpu_mtid && MACHINE_HAS_DIAG44) { + diag_stat_inc(DIAG_STAT_X044); asm volatile("diag 0,0,0x44"); + } barrier(); } EXPORT_SYMBOL(cpu_relax); diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 8b1c8e33f184..01c37b36caf9 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -239,12 +239,12 @@ static unsigned long __peek_user(struct task_struct *child, addr_t addr) * or the child->thread.fpu.vxrs array */ offset = addr - (addr_t) &dummy->regs.fp_regs.fprs; - if (is_vx_task(child)) + if (MACHINE_HAS_VX) tmp = *(addr_t *) ((addr_t) child->thread.fpu.vxrs + 2*offset); else tmp = *(addr_t *) - ((addr_t) &child->thread.fpu.fprs + offset); + ((addr_t) child->thread.fpu.fprs + offset); } else if (addr < (addr_t) (&dummy->regs.per_info + 1)) { /* @@ -383,12 +383,12 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data) * or the child->thread.fpu.vxrs array */ offset = addr - (addr_t) &dummy->regs.fp_regs.fprs; - if (is_vx_task(child)) + if (MACHINE_HAS_VX) *(addr_t *)((addr_t) child->thread.fpu.vxrs + 2*offset) = data; else *(addr_t *)((addr_t) - &child->thread.fpu.fprs + offset) = data; + child->thread.fpu.fprs + offset) = data; } else if (addr < (addr_t) (&dummy->regs.per_info + 1)) { /* @@ -617,12 +617,12 @@ static u32 __peek_user_compat(struct task_struct *child, addr_t addr) * or the child->thread.fpu.vxrs array */ offset = addr - (addr_t) &dummy32->regs.fp_regs.fprs; - if (is_vx_task(child)) + if (MACHINE_HAS_VX) tmp = *(__u32 *) ((addr_t) child->thread.fpu.vxrs + 2*offset); else tmp = *(__u32 *) - ((addr_t) &child->thread.fpu.fprs + offset); + ((addr_t) child->thread.fpu.fprs + offset); } else if (addr < (addr_t) (&dummy32->regs.per_info + 1)) { /* @@ -742,12 +742,12 @@ static int __poke_user_compat(struct task_struct *child, * or the child->thread.fpu.vxrs array */ offset = addr - (addr_t) &dummy32->regs.fp_regs.fprs; - if (is_vx_task(child)) + if (MACHINE_HAS_VX) *(__u32 *)((addr_t) child->thread.fpu.vxrs + 2*offset) = tmp; else *(__u32 *)((addr_t) - &child->thread.fpu.fprs + offset) = tmp; + child->thread.fpu.fprs + offset) = tmp; } else if (addr < (addr_t) (&dummy32->regs.per_info + 1)) { /* @@ -981,7 +981,7 @@ static int s390_fpregs_set(struct task_struct *target, if (rc) return rc; - if (is_vx_task(target)) + if (MACHINE_HAS_VX) convert_fp_to_vx(target->thread.fpu.vxrs, fprs); else memcpy(target->thread.fpu.fprs, &fprs, sizeof(fprs)); @@ -1047,13 +1047,10 @@ static int s390_vxrs_low_get(struct task_struct *target, if (!MACHINE_HAS_VX) return -ENODEV; - if (is_vx_task(target)) { - if (target == current) - save_fpu_regs(); - for (i = 0; i < __NUM_VXRS_LOW; i++) - vxrs[i] = *((__u64 *)(target->thread.fpu.vxrs + i) + 1); - } else - memset(vxrs, 0, sizeof(vxrs)); + if (target == current) + save_fpu_regs(); + for (i = 0; i < __NUM_VXRS_LOW; i++) + vxrs[i] = *((__u64 *)(target->thread.fpu.vxrs + i) + 1); return user_regset_copyout(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1); } @@ -1067,11 +1064,7 @@ static int s390_vxrs_low_set(struct task_struct *target, if (!MACHINE_HAS_VX) return -ENODEV; - if (!is_vx_task(target)) { - rc = alloc_vector_registers(target); - if (rc) - return rc; - } else if (target == current) + if (target == current) save_fpu_regs(); rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1); @@ -1091,13 +1084,10 @@ static int s390_vxrs_high_get(struct task_struct *target, if (!MACHINE_HAS_VX) return -ENODEV; - if (is_vx_task(target)) { - if (target == current) - save_fpu_regs(); - memcpy(vxrs, target->thread.fpu.vxrs + __NUM_VXRS_LOW, - sizeof(vxrs)); - } else - memset(vxrs, 0, sizeof(vxrs)); + if (target == current) + save_fpu_regs(); + memcpy(vxrs, target->thread.fpu.vxrs + __NUM_VXRS_LOW, sizeof(vxrs)); + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1); } @@ -1110,11 +1100,7 @@ static int s390_vxrs_high_set(struct task_struct *target, if (!MACHINE_HAS_VX) return -ENODEV; - if (!is_vx_task(target)) { - rc = alloc_vector_registers(target); - if (rc) - return rc; - } else if (target == current) + if (target == current) save_fpu_regs(); rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, diff --git a/arch/s390/kernel/runtime_instr.c b/arch/s390/kernel/runtime_instr.c index 26b4ae96fdd7..fffa0e5462af 100644 --- a/arch/s390/kernel/runtime_instr.c +++ b/arch/s390/kernel/runtime_instr.c @@ -18,11 +18,6 @@ /* empty control block to disable RI by loading it */ struct runtime_instr_cb runtime_instr_empty_cb; -static int runtime_instr_avail(void) -{ - return test_facility(64); -} - static void disable_runtime_instr(void) { struct pt_regs *regs = task_pt_regs(current); @@ -40,7 +35,6 @@ static void disable_runtime_instr(void) static void init_runtime_instr_cb(struct runtime_instr_cb *cb) { cb->buf_limit = 0xfff; - cb->int_requested = 1; cb->pstate = 1; cb->pstate_set_buf = 1; cb->pstate_sample = 1; @@ -57,46 +51,14 @@ void exit_thread_runtime_instr(void) return; disable_runtime_instr(); kfree(task->thread.ri_cb); - task->thread.ri_signum = 0; task->thread.ri_cb = NULL; } -static void runtime_instr_int_handler(struct ext_code ext_code, - unsigned int param32, unsigned long param64) -{ - struct siginfo info; - - if (!(param32 & CPU_MF_INT_RI_MASK)) - return; - - inc_irq_stat(IRQEXT_CMR); - - if (!current->thread.ri_cb) - return; - if (current->thread.ri_signum < SIGRTMIN || - current->thread.ri_signum > SIGRTMAX) { - WARN_ON_ONCE(1); - return; - } - - memset(&info, 0, sizeof(info)); - info.si_signo = current->thread.ri_signum; - info.si_code = SI_QUEUE; - if (param32 & CPU_MF_INT_RI_BUF_FULL) - info.si_int = ENOBUFS; - else if (param32 & CPU_MF_INT_RI_HALTED) - info.si_int = ECANCELED; - else - return; /* unknown reason */ - - send_sig_info(current->thread.ri_signum, &info, current); -} - -SYSCALL_DEFINE2(s390_runtime_instr, int, command, int, signum) +SYSCALL_DEFINE1(s390_runtime_instr, int, command) { struct runtime_instr_cb *cb; - if (!runtime_instr_avail()) + if (!test_facility(64)) return -EOPNOTSUPP; if (command == S390_RUNTIME_INSTR_STOP) { @@ -106,8 +68,7 @@ SYSCALL_DEFINE2(s390_runtime_instr, int, command, int, signum) return 0; } - if (command != S390_RUNTIME_INSTR_START || - (signum < SIGRTMIN || signum > SIGRTMAX)) + if (command != S390_RUNTIME_INSTR_START) return -EINVAL; if (!current->thread.ri_cb) { @@ -120,7 +81,6 @@ SYSCALL_DEFINE2(s390_runtime_instr, int, command, int, signum) } init_runtime_instr_cb(cb); - current->thread.ri_signum = signum; /* now load the control block to make it available */ preempt_disable(); @@ -129,21 +89,3 @@ SYSCALL_DEFINE2(s390_runtime_instr, int, command, int, signum) preempt_enable(); return 0; } - -static int __init runtime_instr_init(void) -{ - int rc; - - if (!runtime_instr_avail()) - return 0; - - irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT); - rc = register_external_irq(EXT_IRQ_MEASURE_ALERT, - runtime_instr_int_handler); - if (rc) - irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT); - else - pr_info("Runtime instrumentation facility initialized\n"); - return rc; -} -device_initcall(runtime_instr_init); diff --git a/arch/s390/kernel/s390_ksyms.c b/arch/s390/kernel/s390_ksyms.c index 5090d3dad10b..e67453b73c3c 100644 --- a/arch/s390/kernel/s390_ksyms.c +++ b/arch/s390/kernel/s390_ksyms.c @@ -1,6 +1,6 @@ #include <linux/module.h> #include <linux/kvm_host.h> -#include <asm/fpu-internal.h> +#include <asm/fpu/api.h> #include <asm/ftrace.h> #ifdef CONFIG_FUNCTION_TRACER @@ -10,7 +10,6 @@ EXPORT_SYMBOL(_mcount); EXPORT_SYMBOL(sie64a); EXPORT_SYMBOL(sie_exit); EXPORT_SYMBOL(save_fpu_regs); -EXPORT_SYMBOL(__ctl_set_vx); #endif EXPORT_SYMBOL(memcpy); EXPORT_SYMBOL(memset); diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index 9549af102d75..028cc46cb82a 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -179,7 +179,7 @@ static int save_sigregs_ext(struct pt_regs *regs, int i; /* Save vector registers to signal stack */ - if (is_vx_task(current)) { + if (MACHINE_HAS_VX) { for (i = 0; i < __NUM_VXRS_LOW; i++) vxrs[i] = *((__u64 *)(current->thread.fpu.vxrs + i) + 1); if (__copy_to_user(&sregs_ext->vxrs_low, vxrs, @@ -199,7 +199,7 @@ static int restore_sigregs_ext(struct pt_regs *regs, int i; /* Restore vector registers from signal stack */ - if (is_vx_task(current)) { + if (MACHINE_HAS_VX) { if (__copy_from_user(vxrs, &sregs_ext->vxrs_low, sizeof(sregs_ext->vxrs_low)) || __copy_from_user(current->thread.fpu.vxrs + __NUM_VXRS_LOW, @@ -381,8 +381,7 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set, uc_flags = 0; if (MACHINE_HAS_VX) { frame_size += sizeof(_sigregs_ext); - if (is_vx_task(current)) - uc_flags |= UC_VXRS; + uc_flags |= UC_VXRS; } frame = get_sigframe(&ksig->ka, regs, frame_size); if (frame == (void __user *) -1UL) diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index c6355e6f3fcc..9062df575afe 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -33,6 +33,7 @@ #include <linux/crash_dump.h> #include <linux/memblock.h> #include <asm/asm-offsets.h> +#include <asm/diag.h> #include <asm/switch_to.h> #include <asm/facility.h> #include <asm/ipl.h> @@ -261,6 +262,8 @@ static void pcpu_attach_task(struct pcpu *pcpu, struct task_struct *tsk) + THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs); lc->thread_info = (unsigned long) task_thread_info(tsk); lc->current_task = (unsigned long) tsk; + lc->lpp = LPP_MAGIC; + lc->current_pid = tsk->pid; lc->user_timer = ti->user_timer; lc->system_timer = ti->system_timer; lc->steal_timer = 0; @@ -375,11 +378,14 @@ int smp_vcpu_scheduled(int cpu) void smp_yield_cpu(int cpu) { - if (MACHINE_HAS_DIAG9C) + if (MACHINE_HAS_DIAG9C) { + diag_stat_inc_norecursion(DIAG_STAT_X09C); asm volatile("diag %0,0,0x9c" : : "d" (pcpu_devices[cpu].address)); - else if (MACHINE_HAS_DIAG44) + } else if (MACHINE_HAS_DIAG44) { + diag_stat_inc_norecursion(DIAG_STAT_X044); asm volatile("diag 0,0,0x44"); + } } /* diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 017c3a9bfc28..99f84ac31307 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -542,16 +542,17 @@ arch_initcall(etr_init); * Switch to local machine check. This is called when the last usable * ETR port goes inactive. After switch to local the clock is not in sync. */ -void etr_switch_to_local(void) +int etr_switch_to_local(void) { if (!etr_eacr.sl) - return; + return 0; disable_sync_clock(NULL); if (!test_and_set_bit(ETR_EVENT_SWITCH_LOCAL, &etr_events)) { etr_eacr.es = etr_eacr.sl = 0; etr_setr(&etr_eacr); - queue_work(time_sync_wq, &etr_work); + return 1; } + return 0; } /* @@ -560,16 +561,22 @@ void etr_switch_to_local(void) * After a ETR sync check the clock is not in sync. The machine check * is broadcasted to all cpus at the same time. */ -void etr_sync_check(void) +int etr_sync_check(void) { if (!etr_eacr.es) - return; + return 0; disable_sync_clock(NULL); if (!test_and_set_bit(ETR_EVENT_SYNC_CHECK, &etr_events)) { etr_eacr.es = 0; etr_setr(&etr_eacr); - queue_work(time_sync_wq, &etr_work); + return 1; } + return 0; +} + +void etr_queue_work(void) +{ + queue_work(time_sync_wq, &etr_work); } /* @@ -1504,10 +1511,10 @@ static void stp_timing_alert(struct stp_irq_parm *intparm) * After a STP sync check the clock is not in sync. The machine check * is broadcasted to all cpus at the same time. */ -void stp_sync_check(void) +int stp_sync_check(void) { disable_sync_clock(NULL); - queue_work(time_sync_wq, &stp_work); + return 1; } /* @@ -1516,12 +1523,16 @@ void stp_sync_check(void) * have matching CTN ids and have a valid stratum-1 configuration * but the configurations do not match. */ -void stp_island_check(void) +int stp_island_check(void) { disable_sync_clock(NULL); - queue_work(time_sync_wq, &stp_work); + return 1; } +void stp_queue_work(void) +{ + queue_work(time_sync_wq, &stp_work); +} static int stp_sync_clock(void *data) { diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index bf05e7fc3e70..40b8102fdadb 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -84,6 +84,7 @@ static struct mask_info *add_cpus_to_mask(struct topology_core *tl_core, struct mask_info *socket, int one_socket_per_cpu) { + struct cpu_topology_s390 *topo; unsigned int core; for_each_set_bit(core, &tl_core->mask[0], TOPOLOGY_CORE_BITS) { @@ -95,15 +96,16 @@ static struct mask_info *add_cpus_to_mask(struct topology_core *tl_core, if (lcpu < 0) continue; for (i = 0; i <= smp_cpu_mtid; i++) { - per_cpu(cpu_topology, lcpu + i).book_id = book->id; - per_cpu(cpu_topology, lcpu + i).core_id = rcore; - per_cpu(cpu_topology, lcpu + i).thread_id = lcpu + i; + topo = &per_cpu(cpu_topology, lcpu + i); + topo->book_id = book->id; + topo->core_id = rcore; + topo->thread_id = lcpu + i; cpumask_set_cpu(lcpu + i, &book->mask); cpumask_set_cpu(lcpu + i, &socket->mask); if (one_socket_per_cpu) - per_cpu(cpu_topology, lcpu + i).socket_id = rcore; + topo->socket_id = rcore; else - per_cpu(cpu_topology, lcpu + i).socket_id = socket->id; + topo->socket_id = socket->id; smp_cpu_set_polarization(lcpu + i, tl_core->pp); } if (one_socket_per_cpu) @@ -247,17 +249,19 @@ int topology_set_cpu_management(int fc) static void update_cpu_masks(void) { + struct cpu_topology_s390 *topo; int cpu; for_each_possible_cpu(cpu) { - per_cpu(cpu_topology, cpu).thread_mask = cpu_thread_map(cpu); - per_cpu(cpu_topology, cpu).core_mask = cpu_group_map(&socket_info, cpu); - per_cpu(cpu_topology, cpu).book_mask = cpu_group_map(&book_info, cpu); + topo = &per_cpu(cpu_topology, cpu); + topo->thread_mask = cpu_thread_map(cpu); + topo->core_mask = cpu_group_map(&socket_info, cpu); + topo->book_mask = cpu_group_map(&book_info, cpu); if (!MACHINE_HAS_TOPOLOGY) { - per_cpu(cpu_topology, cpu).thread_id = cpu; - per_cpu(cpu_topology, cpu).core_id = cpu; - per_cpu(cpu_topology, cpu).socket_id = cpu; - per_cpu(cpu_topology, cpu).book_id = cpu; + topo->thread_id = cpu; + topo->core_id = cpu; + topo->socket_id = cpu; + topo->book_id = cpu; } } numa_update_cpu_topology(); diff --git a/arch/s390/kernel/trace.c b/arch/s390/kernel/trace.c new file mode 100644 index 000000000000..73239bb576c4 --- /dev/null +++ b/arch/s390/kernel/trace.c @@ -0,0 +1,29 @@ +/* + * Tracepoint definitions for s390 + * + * Copyright IBM Corp. 2015 + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ + +#include <linux/percpu.h> +#define CREATE_TRACE_POINTS +#include <asm/trace/diag.h> + +EXPORT_TRACEPOINT_SYMBOL(diagnose); + +static DEFINE_PER_CPU(unsigned int, diagnose_trace_depth); + +void trace_diagnose_norecursion(int diag_nr) +{ + unsigned long flags; + unsigned int *depth; + + local_irq_save(flags); + depth = this_cpu_ptr(&diagnose_trace_depth); + if (*depth == 0) { + (*depth)++; + trace_diagnose(diag_nr); + (*depth)--; + } + local_irq_restore(flags); +} diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index 9861613fb35a..1b18118bbc06 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -19,7 +19,7 @@ #include <linux/sched.h> #include <linux/mm.h> #include <linux/slab.h> -#include <asm/fpu-internal.h> +#include <asm/fpu/api.h> #include "entry.h" int show_unhandled_signals = 1; @@ -224,29 +224,6 @@ NOKPROBE_SYMBOL(illegal_op); DO_ERROR_INFO(specification_exception, SIGILL, ILL_ILLOPN, "specification exception"); -int alloc_vector_registers(struct task_struct *tsk) -{ - __vector128 *vxrs; - freg_t *fprs; - - /* Allocate vector register save area. */ - vxrs = kzalloc(sizeof(__vector128) * __NUM_VXRS, - GFP_KERNEL|__GFP_REPEAT); - if (!vxrs) - return -ENOMEM; - preempt_disable(); - if (tsk == current) - save_fpu_regs(); - /* Copy the 16 floating point registers */ - convert_fp_to_vx(vxrs, tsk->thread.fpu.fprs); - fprs = tsk->thread.fpu.fprs; - tsk->thread.fpu.vxrs = vxrs; - tsk->thread.fpu.flags |= FPU_USE_VX; - kfree(fprs); - preempt_enable(); - return 0; -} - void vector_exception(struct pt_regs *regs) { int si_code, vic; @@ -281,13 +258,6 @@ void vector_exception(struct pt_regs *regs) do_trap(regs, SIGFPE, si_code, "vector exception"); } -static int __init disable_vector_extension(char *str) -{ - S390_lowcore.machine_flags &= ~MACHINE_FLAG_VX; - return 1; -} -__setup("novx", disable_vector_extension); - void data_exception(struct pt_regs *regs) { __u16 __user *location; @@ -296,15 +266,6 @@ void data_exception(struct pt_regs *regs) location = get_trap_ip(regs); save_fpu_regs(); - /* Check for vector register enablement */ - if (MACHINE_HAS_VX && !is_vx_task(current) && - (current->thread.fpu.fpc & FPC_DXC_MASK) == 0xfe00) { - alloc_vector_registers(current); - /* Vector data exception is suppressing, rewind psw. */ - regs->psw.addr = __rewind_psw(regs->psw, regs->int_code >> 16); - clear_pt_regs_flag(regs, PIF_PER_TRAP); - return; - } if (current->thread.fpu.fpc & FPC_DXC_MASK) signal = SIGFPE; else diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index 0d58269ff425..59eddb0e1a3e 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -299,7 +299,7 @@ static int __init vdso_init(void) get_page(virt_to_page(vdso_data)); - smp_wmb(); + smp_mb(); return 0; } diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index c8653435c70d..dafc44f519c3 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -25,7 +25,7 @@ static DEFINE_SPINLOCK(virt_timer_lock); static atomic64_t virt_timer_current; static atomic64_t virt_timer_elapsed; -static DEFINE_PER_CPU(u64, mt_cycles[32]); +DEFINE_PER_CPU(u64, mt_cycles[8]); static DEFINE_PER_CPU(u64, mt_scaling_mult) = { 1 }; static DEFINE_PER_CPU(u64, mt_scaling_div) = { 1 }; static DEFINE_PER_CPU(u64, mt_scaling_jiffies); @@ -60,6 +60,34 @@ static inline int virt_timer_forward(u64 elapsed) return elapsed >= atomic64_read(&virt_timer_current); } +static void update_mt_scaling(void) +{ + u64 cycles_new[8], *cycles_old; + u64 delta, fac, mult, div; + int i; + + stcctm5(smp_cpu_mtid + 1, cycles_new); + cycles_old = this_cpu_ptr(mt_cycles); + fac = 1; + mult = div = 0; + for (i = 0; i <= smp_cpu_mtid; i++) { + delta = cycles_new[i] - cycles_old[i]; + div += delta; + mult *= i + 1; + mult += delta * fac; + fac *= i + 1; + } + div *= fac; + if (div > 0) { + /* Update scaling factor */ + __this_cpu_write(mt_scaling_mult, mult); + __this_cpu_write(mt_scaling_div, div); + memcpy(cycles_old, cycles_new, + sizeof(u64) * (smp_cpu_mtid + 1)); + } + __this_cpu_write(mt_scaling_jiffies, jiffies_64); +} + /* * Update process times based on virtual cpu times stored by entry.S * to the lowcore fields user_timer, system_timer & steal_clock. @@ -69,7 +97,6 @@ static int do_account_vtime(struct task_struct *tsk, int hardirq_offset) struct thread_info *ti = task_thread_info(tsk); u64 timer, clock, user, system, steal; u64 user_scaled, system_scaled; - int i; timer = S390_lowcore.last_update_timer; clock = S390_lowcore.last_update_clock; @@ -85,34 +112,10 @@ static int do_account_vtime(struct task_struct *tsk, int hardirq_offset) S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer; S390_lowcore.steal_timer += S390_lowcore.last_update_clock - clock; - /* Do MT utilization calculation */ + /* Update MT utilization calculation */ if (smp_cpu_mtid && - time_after64(jiffies_64, __this_cpu_read(mt_scaling_jiffies))) { - u64 cycles_new[32], *cycles_old; - u64 delta, fac, mult, div; - - cycles_old = this_cpu_ptr(mt_cycles); - if (stcctm5(smp_cpu_mtid + 1, cycles_new) < 2) { - fac = 1; - mult = div = 0; - for (i = 0; i <= smp_cpu_mtid; i++) { - delta = cycles_new[i] - cycles_old[i]; - div += delta; - mult *= i + 1; - mult += delta * fac; - fac *= i + 1; - } - div *= fac; - if (div > 0) { - /* Update scaling factor */ - __this_cpu_write(mt_scaling_mult, mult); - __this_cpu_write(mt_scaling_div, div); - memcpy(cycles_old, cycles_new, - sizeof(u64) * (smp_cpu_mtid + 1)); - } - } - __this_cpu_write(mt_scaling_jiffies, jiffies_64); - } + time_after64(jiffies_64, this_cpu_read(mt_scaling_jiffies))) + update_mt_scaling(); user = S390_lowcore.user_timer - ti->user_timer; S390_lowcore.steal_timer -= user; @@ -181,6 +184,11 @@ void vtime_account_irq_enter(struct task_struct *tsk) S390_lowcore.last_update_timer = get_vtimer(); S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer; + /* Update MT utilization calculation */ + if (smp_cpu_mtid && + time_after64(jiffies_64, this_cpu_read(mt_scaling_jiffies))) + update_mt_scaling(); + system = S390_lowcore.system_timer - ti->system_timer; S390_lowcore.steal_timer -= system; ti->system_timer = S390_lowcore.system_timer; diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index 7365e8a46032..b4a5aa110cec 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -336,28 +336,28 @@ static int handle_partial_execution(struct kvm_vcpu *vcpu) return -EOPNOTSUPP; } -static const intercept_handler_t intercept_funcs[] = { - [0x00 >> 2] = handle_noop, - [0x04 >> 2] = handle_instruction, - [0x08 >> 2] = handle_prog, - [0x10 >> 2] = handle_noop, - [0x14 >> 2] = handle_external_interrupt, - [0x18 >> 2] = handle_noop, - [0x1C >> 2] = kvm_s390_handle_wait, - [0x20 >> 2] = handle_validity, - [0x28 >> 2] = handle_stop, - [0x38 >> 2] = handle_partial_execution, -}; - int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu) { - intercept_handler_t func; - u8 code = vcpu->arch.sie_block->icptcode; - - if (code & 3 || (code >> 2) >= ARRAY_SIZE(intercept_funcs)) + switch (vcpu->arch.sie_block->icptcode) { + case 0x00: + case 0x10: + case 0x18: + return handle_noop(vcpu); + case 0x04: + return handle_instruction(vcpu); + case 0x08: + return handle_prog(vcpu); + case 0x14: + return handle_external_interrupt(vcpu); + case 0x1c: + return kvm_s390_handle_wait(vcpu); + case 0x20: + return handle_validity(vcpu); + case 0x28: + return handle_stop(vcpu); + case 0x38: + return handle_partial_execution(vcpu); + default: return -EOPNOTSUPP; - func = intercept_funcs[code >> 2]; - if (func) - return func(vcpu); - return -EOPNOTSUPP; + } } diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 5c2c169395c3..373e32346d68 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -51,11 +51,9 @@ static int psw_mchk_disabled(struct kvm_vcpu *vcpu) static int psw_interrupts_disabled(struct kvm_vcpu *vcpu) { - if ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PER) || - (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_IO) || - (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_EXT)) - return 0; - return 1; + return psw_extint_disabled(vcpu) && + psw_ioint_disabled(vcpu) && + psw_mchk_disabled(vcpu); } static int ckc_interrupts_enabled(struct kvm_vcpu *vcpu) @@ -71,13 +69,8 @@ static int ckc_interrupts_enabled(struct kvm_vcpu *vcpu) static int ckc_irq_pending(struct kvm_vcpu *vcpu) { - preempt_disable(); - if (!(vcpu->arch.sie_block->ckc < - get_tod_clock_fast() + vcpu->arch.sie_block->epoch)) { - preempt_enable(); + if (vcpu->arch.sie_block->ckc >= kvm_s390_get_tod_clock_fast(vcpu->kvm)) return 0; - } - preempt_enable(); return ckc_interrupts_enabled(vcpu); } @@ -109,14 +102,10 @@ static inline u8 int_word_to_isc(u32 int_word) return (int_word & 0x38000000) >> 27; } -static inline unsigned long pending_floating_irqs(struct kvm_vcpu *vcpu) +static inline unsigned long pending_irqs(struct kvm_vcpu *vcpu) { - return vcpu->kvm->arch.float_int.pending_irqs; -} - -static inline unsigned long pending_local_irqs(struct kvm_vcpu *vcpu) -{ - return vcpu->arch.local_int.pending_irqs; + return vcpu->kvm->arch.float_int.pending_irqs | + vcpu->arch.local_int.pending_irqs; } static unsigned long disable_iscs(struct kvm_vcpu *vcpu, @@ -135,8 +124,7 @@ static unsigned long deliverable_irqs(struct kvm_vcpu *vcpu) { unsigned long active_mask; - active_mask = pending_local_irqs(vcpu); - active_mask |= pending_floating_irqs(vcpu); + active_mask = pending_irqs(vcpu); if (!active_mask) return 0; @@ -204,7 +192,7 @@ static void __set_cpuflag(struct kvm_vcpu *vcpu, u32 flag) static void set_intercept_indicators_io(struct kvm_vcpu *vcpu) { - if (!(pending_floating_irqs(vcpu) & IRQ_PEND_IO_MASK)) + if (!(pending_irqs(vcpu) & IRQ_PEND_IO_MASK)) return; else if (psw_ioint_disabled(vcpu)) __set_cpuflag(vcpu, CPUSTAT_IO_INT); @@ -214,7 +202,7 @@ static void set_intercept_indicators_io(struct kvm_vcpu *vcpu) static void set_intercept_indicators_ext(struct kvm_vcpu *vcpu) { - if (!(pending_local_irqs(vcpu) & IRQ_PEND_EXT_MASK)) + if (!(pending_irqs(vcpu) & IRQ_PEND_EXT_MASK)) return; if (psw_extint_disabled(vcpu)) __set_cpuflag(vcpu, CPUSTAT_EXT_INT); @@ -224,7 +212,7 @@ static void set_intercept_indicators_ext(struct kvm_vcpu *vcpu) static void set_intercept_indicators_mchk(struct kvm_vcpu *vcpu) { - if (!(pending_local_irqs(vcpu) & IRQ_PEND_MCHK_MASK)) + if (!(pending_irqs(vcpu) & IRQ_PEND_MCHK_MASK)) return; if (psw_mchk_disabled(vcpu)) vcpu->arch.sie_block->ictl |= ICTL_LPSW; @@ -815,23 +803,21 @@ int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu) int kvm_s390_vcpu_has_irq(struct kvm_vcpu *vcpu, int exclude_stop) { - int rc; + if (deliverable_irqs(vcpu)) + return 1; - rc = !!deliverable_irqs(vcpu); - - if (!rc && kvm_cpu_has_pending_timer(vcpu)) - rc = 1; + if (kvm_cpu_has_pending_timer(vcpu)) + return 1; /* external call pending and deliverable */ - if (!rc && kvm_s390_ext_call_pending(vcpu) && + if (kvm_s390_ext_call_pending(vcpu) && !psw_extint_disabled(vcpu) && (vcpu->arch.sie_block->gcr[0] & 0x2000ul)) - rc = 1; - - if (!rc && !exclude_stop && kvm_s390_is_stop_irq_pending(vcpu)) - rc = 1; + return 1; - return rc; + if (!exclude_stop && kvm_s390_is_stop_irq_pending(vcpu)) + return 1; + return 0; } int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) @@ -846,7 +832,7 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu) vcpu->stat.exit_wait_state++; /* fast path */ - if (kvm_cpu_has_pending_timer(vcpu) || kvm_arch_vcpu_runnable(vcpu)) + if (kvm_arch_vcpu_runnable(vcpu)) return 0; if (psw_interrupts_disabled(vcpu)) { @@ -860,9 +846,7 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu) goto no_timer; } - preempt_disable(); - now = get_tod_clock_fast() + vcpu->arch.sie_block->epoch; - preempt_enable(); + now = kvm_s390_get_tod_clock_fast(vcpu->kvm); sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now); /* underflow */ @@ -901,9 +885,7 @@ enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer) u64 now, sltime; vcpu = container_of(timer, struct kvm_vcpu, arch.ckc_timer); - preempt_disable(); - now = get_tod_clock_fast() + vcpu->arch.sie_block->epoch; - preempt_enable(); + now = kvm_s390_get_tod_clock_fast(vcpu->kvm); sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now); /* @@ -981,39 +963,30 @@ static int __inject_prog(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT, irq->u.pgm.code, 0); - li->irq.pgm = irq->u.pgm; + if (irq->u.pgm.code == PGM_PER) { + li->irq.pgm.code |= PGM_PER; + /* only modify PER related information */ + li->irq.pgm.per_address = irq->u.pgm.per_address; + li->irq.pgm.per_code = irq->u.pgm.per_code; + li->irq.pgm.per_atmid = irq->u.pgm.per_atmid; + li->irq.pgm.per_access_id = irq->u.pgm.per_access_id; + } else if (!(irq->u.pgm.code & PGM_PER)) { + li->irq.pgm.code = (li->irq.pgm.code & PGM_PER) | + irq->u.pgm.code; + /* only modify non-PER information */ + li->irq.pgm.trans_exc_code = irq->u.pgm.trans_exc_code; + li->irq.pgm.mon_code = irq->u.pgm.mon_code; + li->irq.pgm.data_exc_code = irq->u.pgm.data_exc_code; + li->irq.pgm.mon_class_nr = irq->u.pgm.mon_class_nr; + li->irq.pgm.exc_access_id = irq->u.pgm.exc_access_id; + li->irq.pgm.op_access_id = irq->u.pgm.op_access_id; + } else { + li->irq.pgm = irq->u.pgm; + } set_bit(IRQ_PEND_PROG, &li->pending_irqs); return 0; } -int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code) -{ - struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; - struct kvm_s390_irq irq; - - spin_lock(&li->lock); - irq.u.pgm.code = code; - __inject_prog(vcpu, &irq); - BUG_ON(waitqueue_active(li->wq)); - spin_unlock(&li->lock); - return 0; -} - -int kvm_s390_inject_prog_irq(struct kvm_vcpu *vcpu, - struct kvm_s390_pgm_info *pgm_info) -{ - struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; - struct kvm_s390_irq irq; - int rc; - - spin_lock(&li->lock); - irq.u.pgm = *pgm_info; - rc = __inject_prog(vcpu, &irq); - BUG_ON(waitqueue_active(li->wq)); - spin_unlock(&li->lock); - return rc; -} - static int __inject_pfault_init(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) { struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; @@ -1390,12 +1363,9 @@ static void __floating_irq_kick(struct kvm *kvm, u64 type) static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti) { - struct kvm_s390_float_interrupt *fi; u64 type = READ_ONCE(inti->type); int rc; - fi = &kvm->arch.float_int; - switch (type) { case KVM_S390_MCHK: rc = __inject_float_mchk(kvm, inti); diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 0a67c40eece9..8fe2f1c722dc 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -514,35 +514,20 @@ static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr) if (gtod_high != 0) return -EINVAL; - VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x\n", gtod_high); + VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high); return 0; } static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr) { - struct kvm_vcpu *cur_vcpu; - unsigned int vcpu_idx; - u64 host_tod, gtod; - int r; + u64 gtod; if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod))) return -EFAULT; - r = store_tod_clock(&host_tod); - if (r) - return r; - - mutex_lock(&kvm->lock); - preempt_disable(); - kvm->arch.epoch = gtod - host_tod; - kvm_s390_vcpu_block_all(kvm); - kvm_for_each_vcpu(vcpu_idx, cur_vcpu, kvm) - cur_vcpu->arch.sie_block->epoch = kvm->arch.epoch; - kvm_s390_vcpu_unblock_all(kvm); - preempt_enable(); - mutex_unlock(&kvm->lock); - VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx\n", gtod); + kvm_s390_set_tod_clock(kvm, gtod); + VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod); return 0; } @@ -574,26 +559,19 @@ static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr) if (copy_to_user((void __user *)attr->addr, >od_high, sizeof(gtod_high))) return -EFAULT; - VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x\n", gtod_high); + VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high); return 0; } static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr) { - u64 host_tod, gtod; - int r; + u64 gtod; - r = store_tod_clock(&host_tod); - if (r) - return r; - - preempt_disable(); - gtod = host_tod + kvm->arch.epoch; - preempt_enable(); + gtod = kvm_s390_get_tod_clock_fast(kvm); if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod))) return -EFAULT; - VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx\n", gtod); + VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod); return 0; } @@ -1120,7 +1098,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) if (!kvm->arch.sca) goto out_err; spin_lock(&kvm_lock); - sca_offset = (sca_offset + 16) & 0x7f0; + sca_offset += 16; + if (sca_offset + sizeof(struct sca_block) > PAGE_SIZE) + sca_offset = 0; kvm->arch.sca = (struct sca_block *) ((char *) kvm->arch.sca + sca_offset); spin_unlock(&kvm_lock); @@ -1292,7 +1272,6 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) static inline void save_fpu_to(struct fpu *dst) { dst->fpc = current->thread.fpu.fpc; - dst->flags = current->thread.fpu.flags; dst->regs = current->thread.fpu.regs; } @@ -1303,7 +1282,6 @@ static inline void save_fpu_to(struct fpu *dst) static inline void load_fpu_from(struct fpu *from) { current->thread.fpu.fpc = from->fpc; - current->thread.fpu.flags = from->flags; current->thread.fpu.regs = from->regs; } @@ -1315,15 +1293,12 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) if (test_kvm_facility(vcpu->kvm, 129)) { current->thread.fpu.fpc = vcpu->run->s.regs.fpc; - current->thread.fpu.flags = FPU_USE_VX; /* * Use the register save area in the SIE-control block * for register restore and save in kvm_arch_vcpu_put() */ current->thread.fpu.vxrs = (__vector128 *)&vcpu->run->s.regs.vrs; - /* Always enable the vector extension for KVM */ - __ctl_set_vx(); } else load_fpu_from(&vcpu->arch.guest_fpregs); @@ -1916,6 +1891,22 @@ retry: return 0; } +void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod) +{ + struct kvm_vcpu *vcpu; + int i; + + mutex_lock(&kvm->lock); + preempt_disable(); + kvm->arch.epoch = tod - get_tod_clock(); + kvm_s390_vcpu_block_all(kvm); + kvm_for_each_vcpu(i, vcpu, kvm) + vcpu->arch.sie_block->epoch = kvm->arch.epoch; + kvm_s390_vcpu_unblock_all(kvm); + preempt_enable(); + mutex_unlock(&kvm->lock); +} + /** * kvm_arch_fault_in_page - fault-in guest page if necessary * @vcpu: The corresponding virtual cpu @@ -2326,7 +2317,6 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) * registers and the FPC value and store them in the * guest_fpregs structure. */ - WARN_ON(!is_vx_task(current)); /* XXX remove later */ vcpu->arch.guest_fpregs.fpc = current->thread.fpu.fpc; convert_vx_to_fp(vcpu->arch.guest_fpregs.fprs, current->thread.fpu.vxrs); diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index c446aabf60d3..1e70e00d3c5e 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -175,6 +175,7 @@ static inline int kvm_s390_user_cpu_state_ctrl(struct kvm *kvm) return kvm->arch.user_cpu_state_ctrl != 0; } +/* implemented in interrupt.c */ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu); void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu); enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer); @@ -185,7 +186,25 @@ int __must_check kvm_s390_inject_vm(struct kvm *kvm, struct kvm_s390_interrupt *s390int); int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq); -int __must_check kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code); +static inline int kvm_s390_inject_prog_irq(struct kvm_vcpu *vcpu, + struct kvm_s390_pgm_info *pgm_info) +{ + struct kvm_s390_irq irq = { + .type = KVM_S390_PROGRAM_INT, + .u.pgm = *pgm_info, + }; + + return kvm_s390_inject_vcpu(vcpu, &irq); +} +static inline int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code) +{ + struct kvm_s390_irq irq = { + .type = KVM_S390_PROGRAM_INT, + .u.pgm.code = code, + }; + + return kvm_s390_inject_vcpu(vcpu, &irq); +} struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm, u64 isc_mask, u32 schid); int kvm_s390_reinject_io_int(struct kvm *kvm, @@ -212,6 +231,7 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu); int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu); /* implemented in kvm-s390.c */ +void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod); long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable); int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr); int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu, @@ -231,9 +251,6 @@ extern unsigned long kvm_s390_fac_list_mask[]; /* implemented in diag.c */ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu); -/* implemented in interrupt.c */ -int kvm_s390_inject_prog_irq(struct kvm_vcpu *vcpu, - struct kvm_s390_pgm_info *pgm_info); static inline void kvm_s390_vcpu_block_all(struct kvm *kvm) { @@ -254,6 +271,16 @@ static inline void kvm_s390_vcpu_unblock_all(struct kvm *kvm) kvm_s390_vcpu_unblock(vcpu); } +static inline u64 kvm_s390_get_tod_clock_fast(struct kvm *kvm) +{ + u64 rc; + + preempt_disable(); + rc = get_tod_clock_fast() + kvm->arch.epoch; + preempt_enable(); + return rc; +} + /** * kvm_s390_inject_prog_cond - conditionally inject a program check * @vcpu: virtual cpu diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 4d21dc4d1a84..77191b85ea7a 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -33,11 +33,9 @@ /* Handle SCK (SET CLOCK) interception */ static int handle_set_clock(struct kvm_vcpu *vcpu) { - struct kvm_vcpu *cpup; - s64 hostclk, val; - int i, rc; + int rc; ar_t ar; - u64 op2; + u64 op2, val; if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); @@ -49,19 +47,8 @@ static int handle_set_clock(struct kvm_vcpu *vcpu) if (rc) return kvm_s390_inject_prog_cond(vcpu, rc); - if (store_tod_clock(&hostclk)) { - kvm_s390_set_psw_cc(vcpu, 3); - return 0; - } VCPU_EVENT(vcpu, 3, "SCK: setting guest TOD to 0x%llx", val); - val = (val - hostclk) & ~0x3fUL; - - mutex_lock(&vcpu->kvm->lock); - preempt_disable(); - kvm_for_each_vcpu(i, cpup, vcpu->kvm) - cpup->arch.sie_block->epoch = val; - preempt_enable(); - mutex_unlock(&vcpu->kvm->lock); + kvm_s390_set_tod_clock(vcpu->kvm, val); kvm_s390_set_psw_cc(vcpu, 0); return 0; diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c index 246a7eb4b680..501dcd4ca4a0 100644 --- a/arch/s390/lib/delay.c +++ b/arch/s390/lib/delay.c @@ -12,8 +12,10 @@ #include <linux/module.h> #include <linux/irqflags.h> #include <linux/interrupt.h> +#include <linux/irq.h> #include <asm/vtimer.h> #include <asm/div64.h> +#include <asm/idle.h> void __delay(unsigned long loops) { @@ -30,26 +32,22 @@ EXPORT_SYMBOL(__delay); static void __udelay_disabled(unsigned long long usecs) { - unsigned long cr0, cr6, new; - u64 clock_saved, end; + unsigned long cr0, cr0_new, psw_mask; + struct s390_idle_data idle; + u64 end; end = get_tod_clock() + (usecs << 12); - clock_saved = local_tick_disable(); __ctl_store(cr0, 0, 0); - __ctl_store(cr6, 6, 6); - new = (cr0 & 0xffff00e0) | 0x00000800; - __ctl_load(new , 0, 0); - new = 0; - __ctl_load(new, 6, 6); - lockdep_off(); - do { - set_clock_comparator(end); - enabled_wait(); - } while (get_tod_clock_fast() < end); - lockdep_on(); + cr0_new = cr0 & ~CR0_IRQ_SUBCLASS_MASK; + cr0_new |= (1UL << (63 - 52)); /* enable clock comparator irq */ + __ctl_load(cr0_new, 0, 0); + psw_mask = __extract_psw() | PSW_MASK_EXT | PSW_MASK_WAIT; + set_clock_comparator(end); + set_cpu_flag(CIF_IGNORE_IRQ); + psw_idle(&idle, psw_mask); + clear_cpu_flag(CIF_IGNORE_IRQ); + set_clock_comparator(S390_lowcore.clock_comparator); __ctl_load(cr0, 0, 0); - __ctl_load(cr6, 6, 6); - local_tick_enable(clock_saved); } static void __udelay_enabled(unsigned long long usecs) diff --git a/arch/s390/lib/find.c b/arch/s390/lib/find.c index 922003c1b90d..d90b9245ea41 100644 --- a/arch/s390/lib/find.c +++ b/arch/s390/lib/find.c @@ -1,10 +1,8 @@ /* * MSB0 numbered special bitops handling. * - * On s390x the bits are numbered: + * The bits are numbered: * |0..............63|64............127|128...........191|192...........255| - * and on s390: - * |0.....31|32....63|64....95|96...127|128..159|160..191|192..223|224..255| * * The reason for this bit numbering is the fact that the hardware sets bits * in a bitmap starting at bit 0 (MSB) and we don't want to scan the bitmap diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c index d6c9991f7797..427aa44b3505 100644 --- a/arch/s390/lib/spinlock.c +++ b/arch/s390/lib/spinlock.c @@ -197,7 +197,7 @@ void _raw_write_lock_wait(arch_rwlock_t *rw, unsigned int prev) } old = ACCESS_ONCE(rw->lock); owner = ACCESS_ONCE(rw->owner); - smp_rmb(); + smp_mb(); if ((int) old >= 0) { prev = __RAW_LOCK(&rw->lock, 0x80000000, __RAW_OP_OR); old = prev; @@ -231,7 +231,7 @@ void _raw_write_lock_wait(arch_rwlock_t *rw) _raw_compare_and_swap(&rw->lock, old, old | 0x80000000)) prev = old; else - smp_rmb(); + smp_mb(); if ((old & 0x7fffffff) == 0 && (int) prev >= 0) break; if (MACHINE_HAS_CAD) diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c index 23c496957c22..18fccc303db7 100644 --- a/arch/s390/mm/extmem.c +++ b/arch/s390/mm/extmem.c @@ -18,6 +18,7 @@ #include <linux/bootmem.h> #include <linux/ctype.h> #include <linux/ioport.h> +#include <asm/diag.h> #include <asm/page.h> #include <asm/pgtable.h> #include <asm/ebcdic.h> @@ -112,6 +113,7 @@ dcss_set_subcodes(void) ry = DCSS_FINDSEGX; strcpy(name, "dummy"); + diag_stat_inc(DIAG_STAT_X064); asm volatile( " diag %0,%1,0x64\n" "0: ipm %2\n" @@ -205,6 +207,7 @@ dcss_diag(int *func, void *parameter, ry = (unsigned long) *func; /* 64-bit Diag x'64' new subcode, keep in 64-bit addressing mode */ + diag_stat_inc(DIAG_STAT_X064); if (*func > DCSS_SEGEXT) asm volatile( " diag %0,%1,0x64\n" diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index f985856a538b..ec1a30d0d11a 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -30,6 +30,7 @@ #include <linux/uaccess.h> #include <linux/hugetlb.h> #include <asm/asm-offsets.h> +#include <asm/diag.h> #include <asm/pgtable.h> #include <asm/irq.h> #include <asm/mmu_context.h> @@ -589,7 +590,7 @@ int pfault_init(void) .reffcode = 0, .refdwlen = 5, .refversn = 2, - .refgaddr = __LC_CURRENT_PID, + .refgaddr = __LC_LPP, .refselmk = 1ULL << 48, .refcmpmk = 1ULL << 48, .reserved = __PF_RES_FIELD }; @@ -597,6 +598,7 @@ int pfault_init(void) if (pfault_disable) return -1; + diag_stat_inc(DIAG_STAT_X258); asm volatile( " diag %1,%0,0x258\n" "0: j 2f\n" @@ -618,6 +620,7 @@ void pfault_fini(void) if (pfault_disable) return; + diag_stat_inc(DIAG_STAT_X258); asm volatile( " diag %0,0,0x258\n" "0:\n" @@ -646,7 +649,7 @@ static void pfault_interrupt(struct ext_code ext_code, return; inc_irq_stat(IRQEXT_PFL); /* Get the token (= pid of the affected task). */ - pid = param64; + pid = param64 & LPP_PFAULT_PID_MASK; rcu_read_lock(); tsk = find_task_by_pid_ns(pid, &init_pid_ns); if (tsk) diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index fb4bf2c4379e..f81096b6940d 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -40,6 +40,7 @@ static inline pmd_t __pte_to_pmd(pte_t pte) pmd_val(pmd) |= (pte_val(pte) & _PAGE_PROTECT); pmd_val(pmd) |= (pte_val(pte) & _PAGE_DIRTY) << 10; pmd_val(pmd) |= (pte_val(pte) & _PAGE_YOUNG) << 10; + pmd_val(pmd) |= (pte_val(pte) & _PAGE_SOFT_DIRTY) << 13; } else pmd_val(pmd) = _SEGMENT_ENTRY_INVALID; return pmd; @@ -78,6 +79,7 @@ static inline pte_t __pmd_to_pte(pmd_t pmd) pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT); pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY) >> 10; pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) >> 10; + pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_SOFT_DIRTY) >> 13; } else pte_val(pte) = _PAGE_INVALID; return pte; diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index eeda051442c3..9a0c4c22e536 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -1310,7 +1310,7 @@ void bpf_int_jit_compile(struct bpf_prog *fp) if (jit.prg_buf) { set_memory_ro((unsigned long)header, header->pages); fp->bpf_func = (void *) jit.prg_buf; - fp->jited = true; + fp->jited = 1; } free_addrs: kfree(jit.addrs); diff --git a/arch/s390/numa/mode_emu.c b/arch/s390/numa/mode_emu.c index 7de4e2f780d7..828d0695d0d4 100644 --- a/arch/s390/numa/mode_emu.c +++ b/arch/s390/numa/mode_emu.c @@ -368,7 +368,7 @@ static void topology_add_core(struct toptree *core) cpumask_copy(&top->thread_mask, &core->mask); cpumask_copy(&top->core_mask, &core_mc(core)->mask); cpumask_copy(&top->book_mask, &core_book(core)->mask); - cpumask_set_cpu(cpu, node_to_cpumask_map[core_node(core)->id]); + cpumask_set_cpu(cpu, &node_to_cpumask_map[core_node(core)->id]); top->node_id = core_node(core)->id; } } @@ -383,7 +383,7 @@ static void toptree_to_topology(struct toptree *numa) /* Clear all node masks */ for (i = 0; i < MAX_NUMNODES; i++) - cpumask_clear(node_to_cpumask_map[i]); + cpumask_clear(&node_to_cpumask_map[i]); /* Rebuild all masks */ toptree_for_each(core, numa, CORE) @@ -436,9 +436,15 @@ static void emu_update_cpu_topology(void) */ static unsigned long emu_setup_size_adjust(unsigned long size) { + unsigned long size_new; + size = size ? : CONFIG_EMU_SIZE; - size = roundup(size, memory_block_size_bytes()); - return size; + size_new = roundup(size, memory_block_size_bytes()); + if (size_new == size) + return size; + pr_warn("Increasing memory stripe size from %ld MB to %ld MB\n", + size >> 20, size_new >> 20); + return size_new; } /* diff --git a/arch/s390/numa/numa.c b/arch/s390/numa/numa.c index 09b1d2355bd9..43f32ce60aa3 100644 --- a/arch/s390/numa/numa.c +++ b/arch/s390/numa/numa.c @@ -23,7 +23,7 @@ pg_data_t *node_data[MAX_NUMNODES]; EXPORT_SYMBOL(node_data); -cpumask_var_t node_to_cpumask_map[MAX_NUMNODES]; +cpumask_t node_to_cpumask_map[MAX_NUMNODES]; EXPORT_SYMBOL(node_to_cpumask_map); const struct numa_mode numa_mode_plain = { @@ -144,7 +144,7 @@ void __init numa_setup(void) static int __init numa_init_early(void) { /* Attach all possible CPUs to node 0 for now. */ - cpumask_copy(node_to_cpumask_map[0], cpu_possible_mask); + cpumask_copy(&node_to_cpumask_map[0], cpu_possible_mask); return 0; } early_initcall(numa_init_early); diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index 37505b8b4093..37d10f74425a 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -24,7 +24,7 @@ static int zpci_refresh_global(struct zpci_dev *zdev) zdev->iommu_pages * PAGE_SIZE); } -static unsigned long *dma_alloc_cpu_table(void) +unsigned long *dma_alloc_cpu_table(void) { unsigned long *table, *entry; @@ -114,12 +114,12 @@ static unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr return &pto[px]; } -static void dma_update_cpu_trans(struct zpci_dev *zdev, void *page_addr, - dma_addr_t dma_addr, int flags) +void dma_update_cpu_trans(unsigned long *dma_table, void *page_addr, + dma_addr_t dma_addr, int flags) { unsigned long *entry; - entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr); + entry = dma_walk_cpu_trans(dma_table, dma_addr); if (!entry) { WARN_ON_ONCE(1); return; @@ -156,7 +156,8 @@ static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa, goto no_refresh; for (i = 0; i < nr_pages; i++) { - dma_update_cpu_trans(zdev, page_addr, dma_addr, flags); + dma_update_cpu_trans(zdev->dma_table, page_addr, dma_addr, + flags); page_addr += PAGE_SIZE; dma_addr += PAGE_SIZE; } @@ -181,7 +182,7 @@ no_refresh: return rc; } -static void dma_free_seg_table(unsigned long entry) +void dma_free_seg_table(unsigned long entry) { unsigned long *sto = get_rt_sto(entry); int sx; @@ -193,21 +194,18 @@ static void dma_free_seg_table(unsigned long entry) dma_free_cpu_table(sto); } -static void dma_cleanup_tables(struct zpci_dev *zdev) +void dma_cleanup_tables(unsigned long *table) { - unsigned long *table; int rtx; - if (!zdev || !zdev->dma_table) + if (!table) return; - table = zdev->dma_table; for (rtx = 0; rtx < ZPCI_TABLE_ENTRIES; rtx++) if (reg_entry_isvalid(table[rtx])) dma_free_seg_table(table[rtx]); dma_free_cpu_table(table); - zdev->dma_table = NULL; } static unsigned long __dma_alloc_iommu(struct zpci_dev *zdev, @@ -416,6 +414,13 @@ int zpci_dma_init_device(struct zpci_dev *zdev) { int rc; + /* + * At this point, if the device is part of an IOMMU domain, this would + * be a strong hint towards a bug in the IOMMU API (common) code and/or + * simultaneous access via IOMMU and DMA API. So let's issue a warning. + */ + WARN_ON(zdev->s390_domain); + spin_lock_init(&zdev->iommu_bitmap_lock); spin_lock_init(&zdev->dma_table_lock); @@ -450,8 +455,16 @@ out_clean: void zpci_dma_exit_device(struct zpci_dev *zdev) { + /* + * At this point, if the device is part of an IOMMU domain, this would + * be a strong hint towards a bug in the IOMMU API (common) code and/or + * simultaneous access via IOMMU and DMA API. So let's issue a warning. + */ + WARN_ON(zdev->s390_domain); + zpci_unregister_ioat(zdev, 0); - dma_cleanup_tables(zdev); + dma_cleanup_tables(zdev->dma_table); + zdev->dma_table = NULL; vfree(zdev->iommu_bitmap); zdev->iommu_bitmap = NULL; zdev->next_bit = 0; diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c index dcc2634ccbe2..10ca15dcab11 100644 --- a/arch/s390/pci/pci_insn.c +++ b/arch/s390/pci/pci_insn.c @@ -16,11 +16,11 @@ static inline void zpci_err_insn(u8 cc, u8 status, u64 req, u64 offset) { struct { - u8 cc; - u8 status; u64 req; u64 offset; - } data = {cc, status, req, offset}; + u8 cc; + u8 status; + } __packed data = {req, offset, cc, status}; zpci_err_hex(&data, sizeof(data)); } diff --git a/arch/score/include/asm/Kbuild b/arch/score/include/asm/Kbuild index 92ffe397b893..a05218ff3fe4 100644 --- a/arch/score/include/asm/Kbuild +++ b/arch/score/include/asm/Kbuild @@ -13,3 +13,4 @@ generic-y += sections.h generic-y += trace_clock.h generic-y += xor.h generic-y += serial.h +generic-y += word-at-a-time.h diff --git a/arch/sh/include/asm/atomic.h b/arch/sh/include/asm/atomic.h index 05b9f74ce2d5..c399e1c55685 100644 --- a/arch/sh/include/asm/atomic.h +++ b/arch/sh/include/asm/atomic.h @@ -14,8 +14,8 @@ #define ATOMIC_INIT(i) { (i) } -#define atomic_read(v) ACCESS_ONCE((v)->counter) -#define atomic_set(v,i) ((v)->counter = (i)) +#define atomic_read(v) READ_ONCE((v)->counter) +#define atomic_set(v,i) WRITE_ONCE((v)->counter, (i)) #if defined(CONFIG_GUSA_RB) #include <asm/atomic-grb.h> diff --git a/arch/sh/include/asm/page.h b/arch/sh/include/asm/page.h index fe20d14ae051..ceb5201a30ed 100644 --- a/arch/sh/include/asm/page.h +++ b/arch/sh/include/asm/page.h @@ -59,6 +59,7 @@ pages_do_alias(unsigned long addr1, unsigned long addr2) #define clear_page(page) memset((void *)(page), 0, PAGE_SIZE) extern void copy_page(void *to, void *from); +#define copy_user_page(to, from, vaddr, pg) __copy_user(to, from, PAGE_SIZE) struct page; struct vm_area_struct; diff --git a/arch/sparc/crypto/aes_glue.c b/arch/sparc/crypto/aes_glue.c index 2e48eb8813ff..c90930de76ba 100644 --- a/arch/sparc/crypto/aes_glue.c +++ b/arch/sparc/crypto/aes_glue.c @@ -433,6 +433,7 @@ static struct crypto_alg algs[] = { { .blkcipher = { .min_keysize = AES_MIN_KEY_SIZE, .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, .setkey = aes_set_key, .encrypt = cbc_encrypt, .decrypt = cbc_decrypt, @@ -452,6 +453,7 @@ static struct crypto_alg algs[] = { { .blkcipher = { .min_keysize = AES_MIN_KEY_SIZE, .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, .setkey = aes_set_key, .encrypt = ctr_crypt, .decrypt = ctr_crypt, diff --git a/arch/sparc/crypto/camellia_glue.c b/arch/sparc/crypto/camellia_glue.c index 6bf2479a12fb..561a84d93cf6 100644 --- a/arch/sparc/crypto/camellia_glue.c +++ b/arch/sparc/crypto/camellia_glue.c @@ -274,6 +274,7 @@ static struct crypto_alg algs[] = { { .blkcipher = { .min_keysize = CAMELLIA_MIN_KEY_SIZE, .max_keysize = CAMELLIA_MAX_KEY_SIZE, + .ivsize = CAMELLIA_BLOCK_SIZE, .setkey = camellia_set_key, .encrypt = cbc_encrypt, .decrypt = cbc_decrypt, diff --git a/arch/sparc/crypto/des_glue.c b/arch/sparc/crypto/des_glue.c index dd6a34fa6e19..61af794aa2d3 100644 --- a/arch/sparc/crypto/des_glue.c +++ b/arch/sparc/crypto/des_glue.c @@ -429,6 +429,7 @@ static struct crypto_alg algs[] = { { .blkcipher = { .min_keysize = DES_KEY_SIZE, .max_keysize = DES_KEY_SIZE, + .ivsize = DES_BLOCK_SIZE, .setkey = des_set_key, .encrypt = cbc_encrypt, .decrypt = cbc_decrypt, @@ -485,6 +486,7 @@ static struct crypto_alg algs[] = { { .blkcipher = { .min_keysize = DES3_EDE_KEY_SIZE, .max_keysize = DES3_EDE_KEY_SIZE, + .ivsize = DES3_EDE_BLOCK_SIZE, .setkey = des3_ede_set_key, .encrypt = cbc3_encrypt, .decrypt = cbc3_decrypt, diff --git a/arch/sparc/include/asm/atomic_64.h b/arch/sparc/include/asm/atomic_64.h index 917084ace49d..f2fbf9e16faf 100644 --- a/arch/sparc/include/asm/atomic_64.h +++ b/arch/sparc/include/asm/atomic_64.h @@ -14,11 +14,11 @@ #define ATOMIC_INIT(i) { (i) } #define ATOMIC64_INIT(i) { (i) } -#define atomic_read(v) ACCESS_ONCE((v)->counter) -#define atomic64_read(v) ACCESS_ONCE((v)->counter) +#define atomic_read(v) READ_ONCE((v)->counter) +#define atomic64_read(v) READ_ONCE((v)->counter) -#define atomic_set(v, i) (((v)->counter) = i) -#define atomic64_set(v, i) (((v)->counter) = i) +#define atomic_set(v, i) WRITE_ONCE(((v)->counter), (i)) +#define atomic64_set(v, i) WRITE_ONCE(((v)->counter), (i)) #define ATOMIC_OP(op) \ void atomic_##op(int, atomic_t *); \ diff --git a/arch/sparc/include/asm/topology_64.h b/arch/sparc/include/asm/topology_64.h index 01d17046225a..bec481aaca16 100644 --- a/arch/sparc/include/asm/topology_64.h +++ b/arch/sparc/include/asm/topology_64.h @@ -31,6 +31,9 @@ static inline int pcibus_to_node(struct pci_bus *pbus) cpu_all_mask : \ cpumask_of_node(pcibus_to_node(bus))) +int __node_distance(int, int); +#define node_distance(a, b) __node_distance(a, b) + #else /* CONFIG_NUMA */ #include <asm-generic/topology.h> diff --git a/arch/sparc/include/uapi/asm/asi.h b/arch/sparc/include/uapi/asm/asi.h index aace6f313716..7ad7203deaec 100644 --- a/arch/sparc/include/uapi/asm/asi.h +++ b/arch/sparc/include/uapi/asm/asi.h @@ -279,7 +279,7 @@ * Most-Recently-Used, primary, * implicit */ -#define ASI_ST_BLKINIT_MRU_S 0xf2 /* (NG4) init-store, twin load, +#define ASI_ST_BLKINIT_MRU_S 0xf3 /* (NG4) init-store, twin load, * Most-Recently-Used, secondary, * implicit */ diff --git a/arch/sparc/include/uapi/asm/mman.h b/arch/sparc/include/uapi/asm/mman.h index 0b14df33cffa..9765896ecb2c 100644 --- a/arch/sparc/include/uapi/asm/mman.h +++ b/arch/sparc/include/uapi/asm/mman.h @@ -17,6 +17,7 @@ #define MCL_CURRENT 0x2000 /* lock all currently mapped pages */ #define MCL_FUTURE 0x4000 /* lock all additions to address space */ +#define MCL_ONFAULT 0x8000 /* lock all pages that are faulted in */ #define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */ #define MAP_NONBLOCK 0x10000 /* do not block on IO */ diff --git a/arch/sparc/kernel/iommu.c b/arch/sparc/kernel/iommu.c index 5320689c06e9..37686828c3d9 100644 --- a/arch/sparc/kernel/iommu.c +++ b/arch/sparc/kernel/iommu.c @@ -161,7 +161,7 @@ static inline iopte_t *alloc_npages(struct device *dev, entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages, NULL, (unsigned long)(-1), 0); - if (unlikely(entry == DMA_ERROR_CODE)) + if (unlikely(entry == IOMMU_ERROR_CODE)) return NULL; return iommu->page_table + entry; @@ -253,7 +253,7 @@ static void dma_4u_free_coherent(struct device *dev, size_t size, npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT; iommu = dev->archdata.iommu; - iommu_tbl_range_free(&iommu->tbl, dvma, npages, DMA_ERROR_CODE); + iommu_tbl_range_free(&iommu->tbl, dvma, npages, IOMMU_ERROR_CODE); order = get_order(size); if (order < 10) @@ -426,7 +426,7 @@ static void dma_4u_unmap_page(struct device *dev, dma_addr_t bus_addr, iommu_free_ctx(iommu, ctx); spin_unlock_irqrestore(&iommu->lock, flags); - iommu_tbl_range_free(&iommu->tbl, bus_addr, npages, DMA_ERROR_CODE); + iommu_tbl_range_free(&iommu->tbl, bus_addr, npages, IOMMU_ERROR_CODE); } static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist, @@ -492,7 +492,7 @@ static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist, &handle, (unsigned long)(-1), 0); /* Handle failure */ - if (unlikely(entry == DMA_ERROR_CODE)) { + if (unlikely(entry == IOMMU_ERROR_CODE)) { if (printk_ratelimit()) printk(KERN_INFO "iommu_alloc failed, iommu %p paddr %lx" " npages %lx\n", iommu, paddr, npages); @@ -571,7 +571,7 @@ iommu_map_failed: iopte_make_dummy(iommu, base + j); iommu_tbl_range_free(&iommu->tbl, vaddr, npages, - DMA_ERROR_CODE); + IOMMU_ERROR_CODE); s->dma_address = DMA_ERROR_CODE; s->dma_length = 0; @@ -648,7 +648,7 @@ static void dma_4u_unmap_sg(struct device *dev, struct scatterlist *sglist, iopte_make_dummy(iommu, base + i); iommu_tbl_range_free(&iommu->tbl, dma_handle, npages, - DMA_ERROR_CODE); + IOMMU_ERROR_CODE); sg = sg_next(sg); } diff --git a/arch/sparc/kernel/ldc.c b/arch/sparc/kernel/ldc.c index 1ae5eb1bb045..59d503866431 100644 --- a/arch/sparc/kernel/ldc.c +++ b/arch/sparc/kernel/ldc.c @@ -1953,7 +1953,7 @@ static struct ldc_mtable_entry *alloc_npages(struct ldc_iommu *iommu, entry = iommu_tbl_range_alloc(NULL, &iommu->iommu_map_table, npages, NULL, (unsigned long)-1, 0); - if (unlikely(entry < 0)) + if (unlikely(entry == IOMMU_ERROR_CODE)) return NULL; return iommu->page_table + entry; diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c index d2fe57dad433..836e8cef47e2 100644 --- a/arch/sparc/kernel/pci_sun4v.c +++ b/arch/sparc/kernel/pci_sun4v.c @@ -159,7 +159,7 @@ static void *dma_4v_alloc_coherent(struct device *dev, size_t size, entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages, NULL, (unsigned long)(-1), 0); - if (unlikely(entry == DMA_ERROR_CODE)) + if (unlikely(entry == IOMMU_ERROR_CODE)) goto range_alloc_fail; *dma_addrp = (iommu->tbl.table_map_base + (entry << IO_PAGE_SHIFT)); @@ -187,7 +187,7 @@ static void *dma_4v_alloc_coherent(struct device *dev, size_t size, return ret; iommu_map_fail: - iommu_tbl_range_free(&iommu->tbl, *dma_addrp, npages, DMA_ERROR_CODE); + iommu_tbl_range_free(&iommu->tbl, *dma_addrp, npages, IOMMU_ERROR_CODE); range_alloc_fail: free_pages(first_page, order); @@ -226,7 +226,7 @@ static void dma_4v_free_coherent(struct device *dev, size_t size, void *cpu, devhandle = pbm->devhandle; entry = ((dvma - iommu->tbl.table_map_base) >> IO_PAGE_SHIFT); dma_4v_iommu_demap(&devhandle, entry, npages); - iommu_tbl_range_free(&iommu->tbl, dvma, npages, DMA_ERROR_CODE); + iommu_tbl_range_free(&iommu->tbl, dvma, npages, IOMMU_ERROR_CODE); order = get_order(size); if (order < 10) free_pages((unsigned long)cpu, order); @@ -256,7 +256,7 @@ static dma_addr_t dma_4v_map_page(struct device *dev, struct page *page, entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages, NULL, (unsigned long)(-1), 0); - if (unlikely(entry == DMA_ERROR_CODE)) + if (unlikely(entry == IOMMU_ERROR_CODE)) goto bad; bus_addr = (iommu->tbl.table_map_base + (entry << IO_PAGE_SHIFT)); @@ -288,7 +288,7 @@ bad: return DMA_ERROR_CODE; iommu_map_fail: - iommu_tbl_range_free(&iommu->tbl, bus_addr, npages, DMA_ERROR_CODE); + iommu_tbl_range_free(&iommu->tbl, bus_addr, npages, IOMMU_ERROR_CODE); return DMA_ERROR_CODE; } @@ -317,7 +317,7 @@ static void dma_4v_unmap_page(struct device *dev, dma_addr_t bus_addr, bus_addr &= IO_PAGE_MASK; entry = (bus_addr - iommu->tbl.table_map_base) >> IO_PAGE_SHIFT; dma_4v_iommu_demap(&devhandle, entry, npages); - iommu_tbl_range_free(&iommu->tbl, bus_addr, npages, DMA_ERROR_CODE); + iommu_tbl_range_free(&iommu->tbl, bus_addr, npages, IOMMU_ERROR_CODE); } static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist, @@ -376,7 +376,7 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist, &handle, (unsigned long)(-1), 0); /* Handle failure */ - if (unlikely(entry == DMA_ERROR_CODE)) { + if (unlikely(entry == IOMMU_ERROR_CODE)) { if (printk_ratelimit()) printk(KERN_INFO "iommu_alloc failed, iommu %p paddr %lx" " npages %lx\n", iommu, paddr, npages); @@ -451,7 +451,7 @@ iommu_map_failed: npages = iommu_num_pages(s->dma_address, s->dma_length, IO_PAGE_SIZE); iommu_tbl_range_free(&iommu->tbl, vaddr, npages, - DMA_ERROR_CODE); + IOMMU_ERROR_CODE); /* XXX demap? XXX */ s->dma_address = DMA_ERROR_CODE; s->dma_length = 0; @@ -496,7 +496,7 @@ static void dma_4v_unmap_sg(struct device *dev, struct scatterlist *sglist, entry = ((dma_handle - tbl->table_map_base) >> shift); dma_4v_iommu_demap(&devhandle, entry, npages); iommu_tbl_range_free(&iommu->tbl, dma_handle, npages, - DMA_ERROR_CODE); + IOMMU_ERROR_CODE); sg = sg_next(sg); } diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 689db65f8529..b0da5aedb336 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -108,7 +108,7 @@ struct cpu_hw_events { /* Enabled/disable state. */ int enabled; - unsigned int group_flag; + unsigned int txn_flags; }; static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, }; @@ -1379,7 +1379,7 @@ static int sparc_pmu_add(struct perf_event *event, int ef_flags) * skip the schedulability test here, it will be performed * at commit time(->commit_txn) as a whole */ - if (cpuc->group_flag & PERF_EVENT_TXN) + if (cpuc->txn_flags & PERF_PMU_TXN_ADD) goto nocheck; if (check_excludes(cpuc->event, n0, 1)) @@ -1494,12 +1494,17 @@ static int sparc_pmu_event_init(struct perf_event *event) * Set the flag to make pmu::enable() not perform the * schedulability test, it will be performed at commit time */ -static void sparc_pmu_start_txn(struct pmu *pmu) +static void sparc_pmu_start_txn(struct pmu *pmu, unsigned int txn_flags) { struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); + WARN_ON_ONCE(cpuhw->txn_flags); /* txn already in flight */ + + cpuhw->txn_flags = txn_flags; + if (txn_flags & ~PERF_PMU_TXN_ADD) + return; + perf_pmu_disable(pmu); - cpuhw->group_flag |= PERF_EVENT_TXN; } /* @@ -1510,8 +1515,15 @@ static void sparc_pmu_start_txn(struct pmu *pmu) static void sparc_pmu_cancel_txn(struct pmu *pmu) { struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); + unsigned int txn_flags; + + WARN_ON_ONCE(!cpuhw->txn_flags); /* no txn in flight */ + + txn_flags = cpuhw->txn_flags; + cpuhw->txn_flags = 0; + if (txn_flags & ~PERF_PMU_TXN_ADD) + return; - cpuhw->group_flag &= ~PERF_EVENT_TXN; perf_pmu_enable(pmu); } @@ -1528,14 +1540,20 @@ static int sparc_pmu_commit_txn(struct pmu *pmu) if (!sparc_pmu) return -EINVAL; - cpuc = this_cpu_ptr(&cpu_hw_events); + WARN_ON_ONCE(!cpuc->txn_flags); /* no txn in flight */ + + if (cpuc->txn_flags & ~PERF_PMU_TXN_ADD) { + cpuc->txn_flags = 0; + return 0; + } + n = cpuc->n_events; if (check_excludes(cpuc->event, 0, n)) return -EINVAL; if (sparc_check_constraints(cpuc->event, cpuc->events, n)) return -EAGAIN; - cpuc->group_flag &= ~PERF_EVENT_TXN; + cpuc->txn_flags = 0; perf_pmu_enable(pmu); return 0; } diff --git a/arch/sparc/kernel/unaligned_64.c b/arch/sparc/kernel/unaligned_64.c index 62098a89bbbf..d89e97b374cf 100644 --- a/arch/sparc/kernel/unaligned_64.c +++ b/arch/sparc/kernel/unaligned_64.c @@ -436,24 +436,26 @@ extern void sun4v_data_access_exception(struct pt_regs *regs, int handle_ldf_stq(u32 insn, struct pt_regs *regs) { unsigned long addr = compute_effective_address(regs, insn, 0); - int freg = ((insn >> 25) & 0x1e) | ((insn >> 20) & 0x20); + int freg; struct fpustate *f = FPUSTATE; int asi = decode_asi(insn, regs); - int flag = (freg < 32) ? FPRS_DL : FPRS_DU; + int flag; perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0); save_and_clear_fpu(); current_thread_info()->xfsr[0] &= ~0x1c000; - if (freg & 3) { - current_thread_info()->xfsr[0] |= (6 << 14) /* invalid_fp_register */; - do_fpother(regs); - return 0; - } if (insn & 0x200000) { /* STQ */ u64 first = 0, second = 0; + freg = ((insn >> 25) & 0x1e) | ((insn >> 20) & 0x20); + flag = (freg < 32) ? FPRS_DL : FPRS_DU; + if (freg & 3) { + current_thread_info()->xfsr[0] |= (6 << 14) /* invalid_fp_register */; + do_fpother(regs); + return 0; + } if (current_thread_info()->fpsaved[0] & flag) { first = *(u64 *)&f->regs[freg]; second = *(u64 *)&f->regs[freg+2]; @@ -513,6 +515,12 @@ int handle_ldf_stq(u32 insn, struct pt_regs *regs) case 0x100000: size = 4; break; default: size = 2; break; } + if (size == 1) + freg = (insn >> 25) & 0x1f; + else + freg = ((insn >> 25) & 0x1e) | ((insn >> 20) & 0x20); + flag = (freg < 32) ? FPRS_DL : FPRS_DU; + for (i = 0; i < size; i++) data[i] = 0; diff --git a/arch/sparc/lib/VISsave.S b/arch/sparc/lib/VISsave.S index a063d84336d6..62c2647bd5ce 100644 --- a/arch/sparc/lib/VISsave.S +++ b/arch/sparc/lib/VISsave.S @@ -6,24 +6,23 @@ * Copyright (C) 1998 Jakub Jelinek (jj@ultra.linux.cz) */ +#include <linux/linkage.h> + #include <asm/asi.h> #include <asm/page.h> #include <asm/ptrace.h> #include <asm/visasm.h> #include <asm/thread_info.h> - .text - .globl VISenter, VISenterhalf - /* On entry: %o5=current FPRS value, %g7 is callers address */ /* May clobber %o5, %g1, %g2, %g3, %g7, %icc, %xcc */ /* Nothing special need be done here to handle pre-emption, this * FPU save/restore mechanism is already preemption safe. */ - + .text .align 32 -VISenter: +ENTRY(VISenter) ldub [%g6 + TI_FPDEPTH], %g1 brnz,a,pn %g1, 1f cmp %g1, 1 @@ -79,3 +78,4 @@ vis1: ldub [%g6 + TI_FPSAVED], %g3 .align 32 80: jmpl %g7 + %g0, %g0 nop +ENDPROC(VISenter) diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 4ac88b757514..3025bd57f7ab 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -93,6 +93,8 @@ static unsigned long cpu_pgsz_mask; static struct linux_prom64_registers pavail[MAX_BANKS]; static int pavail_ents; +u64 numa_latency[MAX_NUMNODES][MAX_NUMNODES]; + static int cmp_p64(const void *a, const void *b) { const struct linux_prom64_registers *x = a, *y = b; @@ -1157,6 +1159,48 @@ static struct mdesc_mlgroup * __init find_mlgroup(u64 node) return NULL; } +int __node_distance(int from, int to) +{ + if ((from >= MAX_NUMNODES) || (to >= MAX_NUMNODES)) { + pr_warn("Returning default NUMA distance value for %d->%d\n", + from, to); + return (from == to) ? LOCAL_DISTANCE : REMOTE_DISTANCE; + } + return numa_latency[from][to]; +} + +static int find_best_numa_node_for_mlgroup(struct mdesc_mlgroup *grp) +{ + int i; + + for (i = 0; i < MAX_NUMNODES; i++) { + struct node_mem_mask *n = &node_masks[i]; + + if ((grp->mask == n->mask) && (grp->match == n->val)) + break; + } + return i; +} + +static void find_numa_latencies_for_group(struct mdesc_handle *md, u64 grp, + int index) +{ + u64 arc; + + mdesc_for_each_arc(arc, md, grp, MDESC_ARC_TYPE_FWD) { + int tnode; + u64 target = mdesc_arc_target(md, arc); + struct mdesc_mlgroup *m = find_mlgroup(target); + + if (!m) + continue; + tnode = find_best_numa_node_for_mlgroup(m); + if (tnode == MAX_NUMNODES) + continue; + numa_latency[index][tnode] = m->latency; + } +} + static int __init numa_attach_mlgroup(struct mdesc_handle *md, u64 grp, int index) { @@ -1220,9 +1264,16 @@ static int __init numa_parse_mdesc_group(struct mdesc_handle *md, u64 grp, static int __init numa_parse_mdesc(void) { struct mdesc_handle *md = mdesc_grab(); - int i, err, count; + int i, j, err, count; u64 node; + /* Some sane defaults for numa latency values */ + for (i = 0; i < MAX_NUMNODES; i++) { + for (j = 0; j < MAX_NUMNODES; j++) + numa_latency[i][j] = (i == j) ? + LOCAL_DISTANCE : REMOTE_DISTANCE; + } + node = mdesc_node_by_name(md, MDESC_NODE_NULL, "latency-groups"); if (node == MDESC_NODE_NULL) { mdesc_release(md); @@ -1245,6 +1296,23 @@ static int __init numa_parse_mdesc(void) count++; } + count = 0; + mdesc_for_each_node_by_name(md, node, "group") { + find_numa_latencies_for_group(md, node, count); + count++; + } + + /* Normalize numa latency matrix according to ACPI SLIT spec. */ + for (i = 0; i < MAX_NUMNODES; i++) { + u64 self_latency = numa_latency[i][i]; + + for (j = 0; j < MAX_NUMNODES; j++) { + numa_latency[i][j] = + (numa_latency[i][j] * LOCAL_DISTANCE) / + self_latency; + } + } + add_node_ranges(); for (i = 0; i < num_node_masks; i++) { diff --git a/arch/sparc/net/bpf_jit_comp.c b/arch/sparc/net/bpf_jit_comp.c index f8b9f71b9a2b..22564f5f2364 100644 --- a/arch/sparc/net/bpf_jit_comp.c +++ b/arch/sparc/net/bpf_jit_comp.c @@ -812,7 +812,7 @@ cond_branch: f_offset = addrs[i + filter[i].jf]; if (image) { bpf_flush_icache(image, image + proglen); fp->bpf_func = (void *)image; - fp->jited = true; + fp->jited = 1; } out: kfree(addrs); diff --git a/arch/tile/gxio/mpipe.c b/arch/tile/gxio/mpipe.c index ee186e13dfe6..f102048d9c0e 100644 --- a/arch/tile/gxio/mpipe.c +++ b/arch/tile/gxio/mpipe.c @@ -19,6 +19,7 @@ #include <linux/errno.h> #include <linux/io.h> #include <linux/module.h> +#include <linux/string.h> #include <gxio/iorpc_globals.h> #include <gxio/iorpc_mpipe.h> @@ -29,32 +30,6 @@ /* HACK: Avoid pointless "shadow" warnings. */ #define link link_shadow -/** - * strscpy - Copy a C-string into a sized buffer, but only if it fits - * @dest: Where to copy the string to - * @src: Where to copy the string from - * @size: size of destination buffer - * - * Use this routine to avoid copying too-long strings. - * The routine returns the total number of bytes copied - * (including the trailing NUL) or zero if the buffer wasn't - * big enough. To ensure that programmers pay attention - * to the return code, the destination has a single NUL - * written at the front (if size is non-zero) when the - * buffer is not big enough. - */ -static size_t strscpy(char *dest, const char *src, size_t size) -{ - size_t len = strnlen(src, size) + 1; - if (len > size) { - if (size) - dest[0] = '\0'; - return 0; - } - memcpy(dest, src, len); - return len; -} - int gxio_mpipe_init(gxio_mpipe_context_t *context, unsigned int mpipe_index) { char file[32]; @@ -540,7 +515,7 @@ int gxio_mpipe_link_instance(const char *link_name) if (!context) return GXIO_ERR_NO_DEVICE; - if (strscpy(name.name, link_name, sizeof(name.name)) == 0) + if (strscpy(name.name, link_name, sizeof(name.name)) < 0) return GXIO_ERR_NO_DEVICE; return gxio_mpipe_info_instance_aux(context, name); @@ -559,7 +534,7 @@ int gxio_mpipe_link_enumerate_mac(int idx, char *link_name, uint8_t *link_mac) rv = gxio_mpipe_info_enumerate_aux(context, idx, &name, &mac); if (rv >= 0) { - if (strscpy(link_name, name.name, sizeof(name.name)) == 0) + if (strscpy(link_name, name.name, sizeof(name.name)) < 0) return GXIO_ERR_INVAL_MEMORY_SIZE; memcpy(link_mac, mac.mac, sizeof(mac.mac)); } @@ -576,7 +551,7 @@ int gxio_mpipe_link_open(gxio_mpipe_link_t *link, _gxio_mpipe_link_name_t name; int rv; - if (strscpy(name.name, link_name, sizeof(name.name)) == 0) + if (strscpy(name.name, link_name, sizeof(name.name)) < 0) return GXIO_ERR_NO_DEVICE; rv = gxio_mpipe_link_open_aux(context, name, flags); diff --git a/arch/tile/include/asm/atomic.h b/arch/tile/include/asm/atomic.h index 709798460763..9fc0107a9c5e 100644 --- a/arch/tile/include/asm/atomic.h +++ b/arch/tile/include/asm/atomic.h @@ -34,7 +34,7 @@ */ static inline int atomic_read(const atomic_t *v) { - return ACCESS_ONCE(v->counter); + return READ_ONCE(v->counter); } /** diff --git a/arch/tile/include/asm/atomic_64.h b/arch/tile/include/asm/atomic_64.h index 096a56d6ead4..51cabc26e387 100644 --- a/arch/tile/include/asm/atomic_64.h +++ b/arch/tile/include/asm/atomic_64.h @@ -24,7 +24,7 @@ /* First, the 32-bit atomic ops that are "real" on our 64-bit platform. */ -#define atomic_set(v, i) ((v)->counter = (i)) +#define atomic_set(v, i) WRITE_ONCE((v)->counter, (i)) /* * The smp_mb() operations throughout are to support the fact that @@ -82,8 +82,8 @@ static inline void atomic_xor(int i, atomic_t *v) #define ATOMIC64_INIT(i) { (i) } -#define atomic64_read(v) ((v)->counter) -#define atomic64_set(v, i) ((v)->counter = (i)) +#define atomic64_read(v) READ_ONCE((v)->counter) +#define atomic64_set(v, i) WRITE_ONCE((v)->counter, (i)) static inline void atomic64_add(long i, atomic64_t *v) { diff --git a/arch/tile/include/asm/word-at-a-time.h b/arch/tile/include/asm/word-at-a-time.h index 9e5ce0d7b292..b66a693c2c34 100644 --- a/arch/tile/include/asm/word-at-a-time.h +++ b/arch/tile/include/asm/word-at-a-time.h @@ -6,7 +6,7 @@ struct word_at_a_time { /* unused */ }; #define WORD_AT_A_TIME_CONSTANTS {} -/* Generate 0x01 byte values for non-zero bytes using a SIMD instruction. */ +/* Generate 0x01 byte values for zero bytes using a SIMD instruction. */ static inline unsigned long has_zero(unsigned long val, unsigned long *data, const struct word_at_a_time *c) { @@ -33,4 +33,10 @@ static inline long find_zero(unsigned long mask) #endif } +#ifdef __BIG_ENDIAN +#define zero_bytemask(mask) (~1ul << (63 - __builtin_clzl(mask))) +#else +#define zero_bytemask(mask) ((2ul << __builtin_ctzl(mask)) - 1) +#endif + #endif /* _ASM_WORD_AT_A_TIME_H */ diff --git a/arch/tile/include/uapi/asm/mman.h b/arch/tile/include/uapi/asm/mman.h index 81b8fc348d63..63ee13faf17d 100644 --- a/arch/tile/include/uapi/asm/mman.h +++ b/arch/tile/include/uapi/asm/mman.h @@ -36,6 +36,7 @@ */ #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ +#define MCL_ONFAULT 4 /* lock all pages that are faulted in */ #endif /* _ASM_TILE_MMAN_H */ diff --git a/arch/tile/kernel/usb.c b/arch/tile/kernel/usb.c index f0da5a237e94..9f1e05e12255 100644 --- a/arch/tile/kernel/usb.c +++ b/arch/tile/kernel/usb.c @@ -22,6 +22,7 @@ #include <linux/platform_device.h> #include <linux/usb/tilegx.h> #include <linux/init.h> +#include <linux/module.h> #include <linux/types.h> static u64 ehci_dmamask = DMA_BIT_MASK(32); diff --git a/arch/um/Makefile b/arch/um/Makefile index 098ab3333e7c..e3abe6f3156d 100644 --- a/arch/um/Makefile +++ b/arch/um/Makefile @@ -70,8 +70,8 @@ KBUILD_AFLAGS += $(ARCH_INCLUDE) USER_CFLAGS = $(patsubst $(KERNEL_DEFINES),,$(patsubst -I%,,$(KBUILD_CFLAGS))) \ $(ARCH_INCLUDE) $(MODE_INCLUDE) $(filter -I%,$(CFLAGS)) \ - -D_FILE_OFFSET_BITS=64 -idirafter include \ - -D__KERNEL__ -D__UM_HOST__ + -D_FILE_OFFSET_BITS=64 -idirafter $(srctree)/include \ + -idirafter $(obj)/include -D__KERNEL__ -D__UM_HOST__ #This will adjust *FLAGS accordingly to the platform. include $(ARCH_DIR)/Makefile-os-$(OS) diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild index 149ec55f9c46..904f3ebf4220 100644 --- a/arch/um/include/asm/Kbuild +++ b/arch/um/include/asm/Kbuild @@ -25,4 +25,5 @@ generic-y += preempt.h generic-y += switch_to.h generic-y += topology.h generic-y += trace_clock.h +generic-y += word-at-a-time.h generic-y += xor.h diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c index d8a9fce6ee2e..98783dd0fa2e 100644 --- a/arch/um/kernel/trap.c +++ b/arch/um/kernel/trap.c @@ -220,7 +220,7 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user, show_regs(container_of(regs, struct pt_regs, regs)); panic("Segfault with no mm"); } - else if (!is_user && address < TASK_SIZE) { + else if (!is_user && address > PAGE_SIZE && address < TASK_SIZE) { show_regs(container_of(regs, struct pt_regs, regs)); panic("Kernel tried to access user memory at addr 0x%lx, ip 0x%lx", address, ip); diff --git a/arch/um/os-Linux/helper.c b/arch/um/os-Linux/helper.c index e3ee4a51ef63..3f02d4232812 100644 --- a/arch/um/os-Linux/helper.c +++ b/arch/um/os-Linux/helper.c @@ -96,7 +96,7 @@ int run_helper(void (*pre_exec)(void *), void *pre_data, char **argv) "ret = %d\n", -n); ret = n; } - CATCH_EINTR(waitpid(pid, NULL, __WCLONE)); + CATCH_EINTR(waitpid(pid, NULL, __WALL)); } out_free2: @@ -129,7 +129,7 @@ int run_helper_thread(int (*proc)(void *), void *arg, unsigned int flags, return err; } if (stack_out == NULL) { - CATCH_EINTR(pid = waitpid(pid, &status, __WCLONE)); + CATCH_EINTR(pid = waitpid(pid, &status, __WALL)); if (pid < 0) { err = -errno; printk(UM_KERN_ERR "run_helper_thread - wait failed, " @@ -148,7 +148,7 @@ int run_helper_thread(int (*proc)(void *), void *arg, unsigned int flags, int helper_wait(int pid) { int ret, status; - int wflags = __WCLONE; + int wflags = __WALL; CATCH_EINTR(ret = waitpid(pid, &status, wflags)); if (ret < 0) { diff --git a/arch/unicore32/Kconfig b/arch/unicore32/Kconfig index 928237a7b9ca..c9faddc61100 100644 --- a/arch/unicore32/Kconfig +++ b/arch/unicore32/Kconfig @@ -222,7 +222,7 @@ config I2C_BATTERY_BQ27200 tristate "I2C Battery BQ27200 Support" select I2C_PUV3 select POWER_SUPPLY - select BATTERY_BQ27x00 + select BATTERY_BQ27XXX config I2C_EEPROM_AT24 tristate "I2C EEPROMs AT24 support" diff --git a/arch/unicore32/include/asm/Kbuild b/arch/unicore32/include/asm/Kbuild index 1fc7a286dc6f..256c45b3ae34 100644 --- a/arch/unicore32/include/asm/Kbuild +++ b/arch/unicore32/include/asm/Kbuild @@ -62,4 +62,5 @@ generic-y += ucontext.h generic-y += unaligned.h generic-y += user.h generic-y += vga.h +generic-y += word-at-a-time.h generic-y += xor.h diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 328c8352480c..db3622f22b61 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1123,8 +1123,10 @@ config X86_REBOOTFIXUPS Say N otherwise. config MICROCODE - tristate "CPU microcode loading support" + bool "CPU microcode loading support" + default y depends on CPU_SUP_AMD || CPU_SUP_INTEL + depends on BLK_DEV_INITRD select FW_LOADER ---help--- @@ -1166,24 +1168,6 @@ config MICROCODE_OLD_INTERFACE def_bool y depends on MICROCODE -config MICROCODE_INTEL_EARLY - bool - -config MICROCODE_AMD_EARLY - bool - -config MICROCODE_EARLY - bool "Early load microcode" - depends on MICROCODE=y && BLK_DEV_INITRD - select MICROCODE_INTEL_EARLY if MICROCODE_INTEL - select MICROCODE_AMD_EARLY if MICROCODE_AMD - default y - help - This option provides functionality to read additional microcode data - at the beginning of initrd image. The data tells kernel to load - microcode to CPU's as early as possible. No functional change if no - microcode data is glued to the initrd, therefore it's safe to say Y. - config X86_MSR tristate "/dev/cpu/*/msr - Model-specific register support" ---help--- @@ -1308,6 +1292,7 @@ config HIGHMEM config X86_PAE bool "PAE (Physical Address Extension) Support" depends on X86_32 && !HIGHMEM4G + select SWIOTLB ---help--- PAE is required for NX support, and furthermore enables larger swapspace support for non-overcommit purposes. It @@ -2042,6 +2027,55 @@ config COMPAT_VDSO If unsure, say N: if you are compiling your own kernel, you are unlikely to be using a buggy version of glibc. +choice + prompt "vsyscall table for legacy applications" + depends on X86_64 + default LEGACY_VSYSCALL_EMULATE + help + Legacy user code that does not know how to find the vDSO expects + to be able to issue three syscalls by calling fixed addresses in + kernel space. Since this location is not randomized with ASLR, + it can be used to assist security vulnerability exploitation. + + This setting can be changed at boot time via the kernel command + line parameter vsyscall=[native|emulate|none]. + + On a system with recent enough glibc (2.14 or newer) and no + static binaries, you can say None without a performance penalty + to improve security. + + If unsure, select "Emulate". + + config LEGACY_VSYSCALL_NATIVE + bool "Native" + help + Actual executable code is located in the fixed vsyscall + address mapping, implementing time() efficiently. Since + this makes the mapping executable, it can be used during + security vulnerability exploitation (traditionally as + ROP gadgets). This configuration is not recommended. + + config LEGACY_VSYSCALL_EMULATE + bool "Emulate" + help + The kernel traps and emulates calls into the fixed + vsyscall address mapping. This makes the mapping + non-executable, but it still contains known contents, + which could be used in certain rare security vulnerability + exploits. This configuration is recommended when userspace + still uses the vsyscall area. + + config LEGACY_VSYSCALL_NONE + bool "None" + help + There will be no vsyscall mapping at all. This will + eliminate any risk of ASLR bypass due to the vsyscall + fixed address mapping. Attempts to use the vsyscalls + will be reported to dmesg, so that either old or + malicious userspace programs can be identified. + +endchoice + config CMDLINE_BOOL bool "Built-in kernel command line" ---help--- diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 6983314c8b37..3ba5ff2f2d08 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -3,10 +3,6 @@ choice prompt "Processor family" default M686 if X86_32 default GENERIC_CPU if X86_64 - -config M486 - bool "486" - depends on X86_32 ---help--- This is the processor type of your CPU. This information is used for optimizing purposes. In order to compile a kernel @@ -23,9 +19,9 @@ config M486 Here are the settings recommended for greatest speed: - "486" for the AMD/Cyrix/IBM/Intel 486DX/DX2/DX4 or - SL/SLC/SLC2/SLC3/SX/SX2 and UMC U5D or U5S. + SL/SLC/SLC2/SLC3/SX/SX2 and UMC U5D or U5S. - "586" for generic Pentium CPUs lacking the TSC - (time stamp counter) register. + (time stamp counter) register. - "Pentium-Classic" for the Intel Pentium. - "Pentium-MMX" for the Intel Pentium MMX. - "Pentium-Pro" for the Intel Pentium Pro. @@ -34,17 +30,31 @@ config M486 - "Pentium-4" for the Intel Pentium 4 or P4-based Celeron. - "K6" for the AMD K6, K6-II and K6-III (aka K6-3D). - "Athlon" for the AMD K7 family (Athlon/Duron/Thunderbird). + - "Opteron/Athlon64/Hammer/K8" for all K8 and newer AMD CPUs. - "Crusoe" for the Transmeta Crusoe series. - "Efficeon" for the Transmeta Efficeon series. - "Winchip-C6" for original IDT Winchip. - "Winchip-2" for IDT Winchips with 3dNow! capabilities. + - "AMD Elan" for the 32-bit AMD Elan embedded CPU. - "GeodeGX1" for Geode GX1 (Cyrix MediaGX). - "Geode GX/LX" For AMD Geode GX and LX processors. - "CyrixIII/VIA C3" for VIA Cyrix III or VIA C3. - "VIA C3-2" for VIA C3-2 "Nehemiah" (model 9 and above). - "VIA C7" for VIA C7. + - "Intel P4" for the Pentium 4/Netburst microarchitecture. + - "Core 2/newer Xeon" for all core2 and newer Intel CPUs. + - "Intel Atom" for the Atom-microarchitecture CPUs. + - "Generic-x86-64" for a kernel which runs on any x86-64 CPU. + + See each option's help text for additional details. If you don't know + what to do, choose "486". - If you don't know what to do, choose "486". +config M486 + bool "486" + depends on X86_32 + ---help--- + Select this for an 486-class CPU such as AMD/Cyrix/IBM/Intel + 486DX/DX2/DX4 or SL/SLC/SLC2/SLC3/SX/SX2 and UMC U5D or U5S. config M586 bool "586/K5/5x86/6x86/6x86MX" diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index d8c0d3266173..3e0baf726eef 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -65,10 +65,14 @@ config EARLY_PRINTK_EFI This is useful for kernel debugging when your machine crashes very early before the console code is initialized. +config X86_PTDUMP_CORE + def_bool n + config X86_PTDUMP bool "Export kernel pagetable layout to userspace via debugfs" depends on DEBUG_KERNEL select DEBUG_FS + select X86_PTDUMP_CORE ---help--- Say Y here if you want to show the kernel pagetable layout in a debugfs file. This information is only useful for kernel developers @@ -79,7 +83,8 @@ config X86_PTDUMP config EFI_PGT_DUMP bool "Dump the EFI pagetable" - depends on EFI && X86_PTDUMP + depends on EFI + select X86_PTDUMP_CORE ---help--- Enable this if you want to dump the EFI page table before enabling virtual mode. This can be used to debug miscellaneous @@ -105,6 +110,35 @@ config DEBUG_RODATA_TEST feature as well as for the change_page_attr() infrastructure. If in doubt, say "N" +config DEBUG_WX + bool "Warn on W+X mappings at boot" + depends on DEBUG_RODATA + default y + select X86_PTDUMP_CORE + ---help--- + Generate a warning if any W+X mappings are found at boot. + + This is useful for discovering cases where the kernel is leaving + W+X mappings after applying NX, as such mappings are a security risk. + + Look for a message in dmesg output like this: + + x86/mm: Checked W+X mappings: passed, no W+X pages found. + + or like this, if the check failed: + + x86/mm: Checked W+X mappings: FAILED, <N> W+X pages found. + + Note that even if the check fails, your kernel is possibly + still fine, as W+X mappings are not a security hole in + themselves, what they do is that they make the exploitation + of other unfixed kernel bugs easier. + + There is no runtime or memory usage effect of this option + once the kernel has booted up - it's a one time check. + + If in doubt, say "Y". + config DEBUG_SET_MODULE_RONX bool "Set loadable kernel module data as NX and text as RO" depends on MODULES diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 747860c696e1..4086abca0b32 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -159,15 +159,23 @@ endif sp-$(CONFIG_X86_32) := esp sp-$(CONFIG_X86_64) := rsp +# do binutils support CFI? +cfi := $(call as-instr,.cfi_startproc\n.cfi_rel_offset $(sp-y)$(comma)0\n.cfi_endproc,-DCONFIG_AS_CFI=1) +# is .cfi_signal_frame supported too? +cfi-sigframe := $(call as-instr,.cfi_startproc\n.cfi_signal_frame\n.cfi_endproc,-DCONFIG_AS_CFI_SIGNAL_FRAME=1) +cfi-sections := $(call as-instr,.cfi_sections .debug_frame,-DCONFIG_AS_CFI_SECTIONS=1) + # does binutils support specific instructions? asinstr := $(call as-instr,fxsaveq (%rax),-DCONFIG_AS_FXSAVEQ=1) asinstr += $(call as-instr,pshufb %xmm0$(comma)%xmm0,-DCONFIG_AS_SSSE3=1) asinstr += $(call as-instr,crc32l %eax$(comma)%eax,-DCONFIG_AS_CRC32=1) avx_instr := $(call as-instr,vxorps %ymm0$(comma)%ymm1$(comma)%ymm2,-DCONFIG_AS_AVX=1) avx2_instr :=$(call as-instr,vpbroadcastb %xmm0$(comma)%ymm1,-DCONFIG_AS_AVX2=1) +sha1_ni_instr :=$(call as-instr,sha1msg1 %xmm0$(comma)%xmm1,-DCONFIG_AS_SHA1_NI=1) +sha256_ni_instr :=$(call as-instr,sha256msg1 %xmm0$(comma)%xmm1,-DCONFIG_AS_SHA256_NI=1) -KBUILD_AFLAGS += $(asinstr) $(avx_instr) $(avx2_instr) -KBUILD_CFLAGS += $(asinstr) $(avx_instr) $(avx2_instr) +KBUILD_AFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr) $(sha1_ni_instr) $(sha256_ni_instr) +KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr) $(sha1_ni_instr) $(sha256_ni_instr) LDFLAGS := -m elf_$(UTS_MACHINE) diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index 0d553e54171b..2ee62dba0373 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -9,13 +9,13 @@ # Changed by many, many contributors over the years. # +KASAN_SANITIZE := n + # If you want to preset the SVGA mode, uncomment the next line and # set SVGA_MODE to whatever number you want. # Set it to -DSVGA_MODE=NORMAL_VGA if you just want the EGA/VGA mode. # The number is the same as you would ordinarily press at bootup. -KASAN_SANITIZE := n - SVGA_MODE := -DSVGA_MODE=NORMAL_VGA targets := vmlinux.bin setup.bin setup.elf bzImage diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index ee1b6d346b98..583d539a4197 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -624,7 +624,7 @@ setup_pixel_info(struct screen_info *si, u32 pixels_per_scan_line, static efi_status_t __gop_query32(struct efi_graphics_output_protocol_32 *gop32, struct efi_graphics_output_mode_info **info, - unsigned long *size, u32 *fb_base) + unsigned long *size, u64 *fb_base) { struct efi_graphics_output_protocol_mode_32 *mode; efi_status_t status; @@ -650,7 +650,8 @@ setup_gop32(struct screen_info *si, efi_guid_t *proto, unsigned long nr_gops; u16 width, height; u32 pixels_per_scan_line; - u32 fb_base; + u32 ext_lfb_base; + u64 fb_base; struct efi_pixel_bitmask pixel_info; int pixel_format; efi_status_t status; @@ -667,6 +668,7 @@ setup_gop32(struct screen_info *si, efi_guid_t *proto, bool conout_found = false; void *dummy = NULL; u32 h = handles[i]; + u64 current_fb_base; status = efi_call_early(handle_protocol, h, proto, (void **)&gop32); @@ -678,7 +680,7 @@ setup_gop32(struct screen_info *si, efi_guid_t *proto, if (status == EFI_SUCCESS) conout_found = true; - status = __gop_query32(gop32, &info, &size, &fb_base); + status = __gop_query32(gop32, &info, &size, ¤t_fb_base); if (status == EFI_SUCCESS && (!first_gop || conout_found)) { /* * Systems that use the UEFI Console Splitter may @@ -692,6 +694,7 @@ setup_gop32(struct screen_info *si, efi_guid_t *proto, pixel_format = info->pixel_format; pixel_info = info->pixel_information; pixels_per_scan_line = info->pixels_per_scan_line; + fb_base = current_fb_base; /* * Once we've found a GOP supporting ConOut, @@ -713,6 +716,13 @@ setup_gop32(struct screen_info *si, efi_guid_t *proto, si->lfb_width = width; si->lfb_height = height; si->lfb_base = fb_base; + + ext_lfb_base = (u64)(unsigned long)fb_base >> 32; + if (ext_lfb_base) { + si->capabilities |= VIDEO_CAPABILITY_64BIT_BASE; + si->ext_lfb_base = ext_lfb_base; + } + si->pages = 1; setup_pixel_info(si, pixels_per_scan_line, pixel_info, pixel_format); @@ -727,7 +737,7 @@ out: static efi_status_t __gop_query64(struct efi_graphics_output_protocol_64 *gop64, struct efi_graphics_output_mode_info **info, - unsigned long *size, u32 *fb_base) + unsigned long *size, u64 *fb_base) { struct efi_graphics_output_protocol_mode_64 *mode; efi_status_t status; @@ -753,7 +763,8 @@ setup_gop64(struct screen_info *si, efi_guid_t *proto, unsigned long nr_gops; u16 width, height; u32 pixels_per_scan_line; - u32 fb_base; + u32 ext_lfb_base; + u64 fb_base; struct efi_pixel_bitmask pixel_info; int pixel_format; efi_status_t status; @@ -770,6 +781,7 @@ setup_gop64(struct screen_info *si, efi_guid_t *proto, bool conout_found = false; void *dummy = NULL; u64 h = handles[i]; + u64 current_fb_base; status = efi_call_early(handle_protocol, h, proto, (void **)&gop64); @@ -781,7 +793,7 @@ setup_gop64(struct screen_info *si, efi_guid_t *proto, if (status == EFI_SUCCESS) conout_found = true; - status = __gop_query64(gop64, &info, &size, &fb_base); + status = __gop_query64(gop64, &info, &size, ¤t_fb_base); if (status == EFI_SUCCESS && (!first_gop || conout_found)) { /* * Systems that use the UEFI Console Splitter may @@ -795,6 +807,7 @@ setup_gop64(struct screen_info *si, efi_guid_t *proto, pixel_format = info->pixel_format; pixel_info = info->pixel_information; pixels_per_scan_line = info->pixels_per_scan_line; + fb_base = current_fb_base; /* * Once we've found a GOP supporting ConOut, @@ -816,6 +829,13 @@ setup_gop64(struct screen_info *si, efi_guid_t *proto, si->lfb_width = width; si->lfb_height = height; si->lfb_base = fb_base; + + ext_lfb_base = (u64)(unsigned long)fb_base >> 32; + if (ext_lfb_base) { + si->capabilities |= VIDEO_CAPABILITY_64BIT_BASE; + si->ext_lfb_base = ext_lfb_base; + } + si->pages = 1; setup_pixel_info(si, pixels_per_scan_line, pixel_info, pixel_format); diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index 2d6b309c8e9a..6236b9ec4b76 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -154,7 +154,7 @@ extra_header_fields: #else .quad 0 # ImageBase #endif - .long CONFIG_PHYSICAL_ALIGN # SectionAlignment + .long 0x20 # SectionAlignment .long 0x20 # FileAlignment .word 0 # MajorOperatingSystemVersion .word 0 # MinorOperatingSystemVersion diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index 9a2838cf0591..b9b912a44d61 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -5,6 +5,8 @@ avx_supported := $(call as-instr,vpxor %xmm0$(comma)%xmm0$(comma)%xmm0,yes,no) avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\ $(comma)4)$(comma)%ymm2,yes,no) +sha1_ni_supported :=$(call as-instr,sha1msg1 %xmm0$(comma)%xmm1,yes,no) +sha256_ni_supported :=$(call as-instr,sha256msg1 %xmm0$(comma)%xmm1,yes,no) obj-$(CONFIG_CRYPTO_GLUE_HELPER_X86) += glue_helper.o @@ -91,9 +93,15 @@ ifeq ($(avx2_supported),yes) sha1-ssse3-y += sha1_avx2_x86_64_asm.o poly1305-x86_64-y += poly1305-avx2-x86_64.o endif +ifeq ($(sha1_ni_supported),yes) +sha1-ssse3-y += sha1_ni_asm.o +endif crc32c-intel-y := crc32c-intel_glue.o crc32c-intel-$(CONFIG_64BIT) += crc32c-pcl-intel-asm_64.o crc32-pclmul-y := crc32-pclmul_asm.o crc32-pclmul_glue.o sha256-ssse3-y := sha256-ssse3-asm.o sha256-avx-asm.o sha256-avx2-asm.o sha256_ssse3_glue.o +ifeq ($(sha256_ni_supported),yes) +sha256-ssse3-y += sha256_ni_asm.o +endif sha512-ssse3-y := sha512-ssse3-asm.o sha512-avx-asm.o sha512-avx2-asm.o sha512_ssse3_glue.o crct10dif-pclmul-y := crct10dif-pcl-asm_64.o crct10dif-pclmul_glue.o diff --git a/arch/x86/crypto/camellia_aesni_avx2_glue.c b/arch/x86/crypto/camellia_aesni_avx2_glue.c index 4c65c70e628b..d84456924563 100644 --- a/arch/x86/crypto/camellia_aesni_avx2_glue.c +++ b/arch/x86/crypto/camellia_aesni_avx2_glue.c @@ -567,7 +567,8 @@ static int __init camellia_aesni_init(void) return -ENODEV; } - if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) { + if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, + &feature_name)) { pr_info("CPU feature '%s' is not supported.\n", feature_name); return -ENODEV; } diff --git a/arch/x86/crypto/camellia_aesni_avx_glue.c b/arch/x86/crypto/camellia_aesni_avx_glue.c index 80a0e4389c9a..93d8f295784e 100644 --- a/arch/x86/crypto/camellia_aesni_avx_glue.c +++ b/arch/x86/crypto/camellia_aesni_avx_glue.c @@ -554,7 +554,13 @@ static int __init camellia_aesni_init(void) { const char *feature_name; - if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) { + if (!cpu_has_avx || !cpu_has_aes || !cpu_has_osxsave) { + pr_info("AVX or AES-NI instructions are not detected.\n"); + return -ENODEV; + } + + if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, + &feature_name)) { pr_info("CPU feature '%s' is not supported.\n", feature_name); return -ENODEV; } diff --git a/arch/x86/crypto/cast5_avx_glue.c b/arch/x86/crypto/cast5_avx_glue.c index be00aa48b2b5..8648158f3916 100644 --- a/arch/x86/crypto/cast5_avx_glue.c +++ b/arch/x86/crypto/cast5_avx_glue.c @@ -469,7 +469,8 @@ static int __init cast5_init(void) { const char *feature_name; - if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) { + if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, + &feature_name)) { pr_info("CPU feature '%s' is not supported.\n", feature_name); return -ENODEV; } diff --git a/arch/x86/crypto/cast6_avx_glue.c b/arch/x86/crypto/cast6_avx_glue.c index 5dbba7224221..fca459578c35 100644 --- a/arch/x86/crypto/cast6_avx_glue.c +++ b/arch/x86/crypto/cast6_avx_glue.c @@ -591,7 +591,8 @@ static int __init cast6_init(void) { const char *feature_name; - if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) { + if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, + &feature_name)) { pr_info("CPU feature '%s' is not supported.\n", feature_name); return -ENODEV; } diff --git a/arch/x86/crypto/chacha20_glue.c b/arch/x86/crypto/chacha20_glue.c index effe2160b7c5..722bacea040e 100644 --- a/arch/x86/crypto/chacha20_glue.c +++ b/arch/x86/crypto/chacha20_glue.c @@ -130,7 +130,7 @@ static int __init chacha20_simd_mod_init(void) #ifdef CONFIG_AS_AVX2 chacha20_use_avx2 = cpu_has_avx && cpu_has_avx2 && - cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, NULL); + cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL); #endif return crypto_register_alg(&alg); } diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S index 225be06edc80..4fe27e074194 100644 --- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S +++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S @@ -330,7 +330,7 @@ ENDPROC(crc_pcl) ## PCLMULQDQ tables ## Table is 128 entries x 2 words (8 bytes) each ################################################################ -.section .rotata, "a", %progbits +.section .rodata, "a", %progbits .align 8 K_table: .long 0x493c7d27, 0x00000001 diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c index f7170d764f32..4264a3d59589 100644 --- a/arch/x86/crypto/poly1305_glue.c +++ b/arch/x86/crypto/poly1305_glue.c @@ -184,7 +184,7 @@ static int __init poly1305_simd_mod_init(void) #ifdef CONFIG_AS_AVX2 poly1305_use_avx2 = cpu_has_avx && cpu_has_avx2 && - cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, NULL); + cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL); alg.descsize = sizeof(struct poly1305_simd_desc_ctx); if (poly1305_use_avx2) alg.descsize += 10 * sizeof(u32); diff --git a/arch/x86/crypto/serpent_avx2_glue.c b/arch/x86/crypto/serpent_avx2_glue.c index 7d838dc4d888..6d198342e2de 100644 --- a/arch/x86/crypto/serpent_avx2_glue.c +++ b/arch/x86/crypto/serpent_avx2_glue.c @@ -542,7 +542,8 @@ static int __init init(void) pr_info("AVX2 instructions are not detected.\n"); return -ENODEV; } - if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) { + if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, + &feature_name)) { pr_info("CPU feature '%s' is not supported.\n", feature_name); return -ENODEV; } diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c index da7dafc9b16d..5dc37026c7ce 100644 --- a/arch/x86/crypto/serpent_avx_glue.c +++ b/arch/x86/crypto/serpent_avx_glue.c @@ -597,7 +597,8 @@ static int __init serpent_init(void) { const char *feature_name; - if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) { + if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, + &feature_name)) { pr_info("CPU feature '%s' is not supported.\n", feature_name); return -ENODEV; } diff --git a/arch/x86/crypto/sha1_ni_asm.S b/arch/x86/crypto/sha1_ni_asm.S new file mode 100644 index 000000000000..874a651b9e7d --- /dev/null +++ b/arch/x86/crypto/sha1_ni_asm.S @@ -0,0 +1,302 @@ +/* + * Intel SHA Extensions optimized implementation of a SHA-1 update function + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Contact Information: + * Sean Gulley <sean.m.gulley@intel.com> + * Tim Chen <tim.c.chen@linux.intel.com> + * + * BSD LICENSE + * + * Copyright(c) 2015 Intel Corporation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include <linux/linkage.h> + +#define DIGEST_PTR %rdi /* 1st arg */ +#define DATA_PTR %rsi /* 2nd arg */ +#define NUM_BLKS %rdx /* 3rd arg */ + +#define RSPSAVE %rax + +/* gcc conversion */ +#define FRAME_SIZE 32 /* space for 2x16 bytes */ + +#define ABCD %xmm0 +#define E0 %xmm1 /* Need two E's b/c they ping pong */ +#define E1 %xmm2 +#define MSG0 %xmm3 +#define MSG1 %xmm4 +#define MSG2 %xmm5 +#define MSG3 %xmm6 +#define SHUF_MASK %xmm7 + + +/* + * Intel SHA Extensions optimized implementation of a SHA-1 update function + * + * The function takes a pointer to the current hash values, a pointer to the + * input data, and a number of 64 byte blocks to process. Once all blocks have + * been processed, the digest pointer is updated with the resulting hash value. + * The function only processes complete blocks, there is no functionality to + * store partial blocks. All message padding and hash value initialization must + * be done outside the update function. + * + * The indented lines in the loop are instructions related to rounds processing. + * The non-indented lines are instructions related to the message schedule. + * + * void sha1_ni_transform(uint32_t *digest, const void *data, + uint32_t numBlocks) + * digest : pointer to digest + * data: pointer to input data + * numBlocks: Number of blocks to process + */ +.text +.align 32 +ENTRY(sha1_ni_transform) + mov %rsp, RSPSAVE + sub $FRAME_SIZE, %rsp + and $~0xF, %rsp + + shl $6, NUM_BLKS /* convert to bytes */ + jz .Ldone_hash + add DATA_PTR, NUM_BLKS /* pointer to end of data */ + + /* load initial hash values */ + pinsrd $3, 1*16(DIGEST_PTR), E0 + movdqu 0*16(DIGEST_PTR), ABCD + pand UPPER_WORD_MASK(%rip), E0 + pshufd $0x1B, ABCD, ABCD + + movdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK + +.Lloop0: + /* Save hash values for addition after rounds */ + movdqa E0, (0*16)(%rsp) + movdqa ABCD, (1*16)(%rsp) + + /* Rounds 0-3 */ + movdqu 0*16(DATA_PTR), MSG0 + pshufb SHUF_MASK, MSG0 + paddd MSG0, E0 + movdqa ABCD, E1 + sha1rnds4 $0, E0, ABCD + + /* Rounds 4-7 */ + movdqu 1*16(DATA_PTR), MSG1 + pshufb SHUF_MASK, MSG1 + sha1nexte MSG1, E1 + movdqa ABCD, E0 + sha1rnds4 $0, E1, ABCD + sha1msg1 MSG1, MSG0 + + /* Rounds 8-11 */ + movdqu 2*16(DATA_PTR), MSG2 + pshufb SHUF_MASK, MSG2 + sha1nexte MSG2, E0 + movdqa ABCD, E1 + sha1rnds4 $0, E0, ABCD + sha1msg1 MSG2, MSG1 + pxor MSG2, MSG0 + + /* Rounds 12-15 */ + movdqu 3*16(DATA_PTR), MSG3 + pshufb SHUF_MASK, MSG3 + sha1nexte MSG3, E1 + movdqa ABCD, E0 + sha1msg2 MSG3, MSG0 + sha1rnds4 $0, E1, ABCD + sha1msg1 MSG3, MSG2 + pxor MSG3, MSG1 + + /* Rounds 16-19 */ + sha1nexte MSG0, E0 + movdqa ABCD, E1 + sha1msg2 MSG0, MSG1 + sha1rnds4 $0, E0, ABCD + sha1msg1 MSG0, MSG3 + pxor MSG0, MSG2 + + /* Rounds 20-23 */ + sha1nexte MSG1, E1 + movdqa ABCD, E0 + sha1msg2 MSG1, MSG2 + sha1rnds4 $1, E1, ABCD + sha1msg1 MSG1, MSG0 + pxor MSG1, MSG3 + + /* Rounds 24-27 */ + sha1nexte MSG2, E0 + movdqa ABCD, E1 + sha1msg2 MSG2, MSG3 + sha1rnds4 $1, E0, ABCD + sha1msg1 MSG2, MSG1 + pxor MSG2, MSG0 + + /* Rounds 28-31 */ + sha1nexte MSG3, E1 + movdqa ABCD, E0 + sha1msg2 MSG3, MSG0 + sha1rnds4 $1, E1, ABCD + sha1msg1 MSG3, MSG2 + pxor MSG3, MSG1 + + /* Rounds 32-35 */ + sha1nexte MSG0, E0 + movdqa ABCD, E1 + sha1msg2 MSG0, MSG1 + sha1rnds4 $1, E0, ABCD + sha1msg1 MSG0, MSG3 + pxor MSG0, MSG2 + + /* Rounds 36-39 */ + sha1nexte MSG1, E1 + movdqa ABCD, E0 + sha1msg2 MSG1, MSG2 + sha1rnds4 $1, E1, ABCD + sha1msg1 MSG1, MSG0 + pxor MSG1, MSG3 + + /* Rounds 40-43 */ + sha1nexte MSG2, E0 + movdqa ABCD, E1 + sha1msg2 MSG2, MSG3 + sha1rnds4 $2, E0, ABCD + sha1msg1 MSG2, MSG1 + pxor MSG2, MSG0 + + /* Rounds 44-47 */ + sha1nexte MSG3, E1 + movdqa ABCD, E0 + sha1msg2 MSG3, MSG0 + sha1rnds4 $2, E1, ABCD + sha1msg1 MSG3, MSG2 + pxor MSG3, MSG1 + + /* Rounds 48-51 */ + sha1nexte MSG0, E0 + movdqa ABCD, E1 + sha1msg2 MSG0, MSG1 + sha1rnds4 $2, E0, ABCD + sha1msg1 MSG0, MSG3 + pxor MSG0, MSG2 + + /* Rounds 52-55 */ + sha1nexte MSG1, E1 + movdqa ABCD, E0 + sha1msg2 MSG1, MSG2 + sha1rnds4 $2, E1, ABCD + sha1msg1 MSG1, MSG0 + pxor MSG1, MSG3 + + /* Rounds 56-59 */ + sha1nexte MSG2, E0 + movdqa ABCD, E1 + sha1msg2 MSG2, MSG3 + sha1rnds4 $2, E0, ABCD + sha1msg1 MSG2, MSG1 + pxor MSG2, MSG0 + + /* Rounds 60-63 */ + sha1nexte MSG3, E1 + movdqa ABCD, E0 + sha1msg2 MSG3, MSG0 + sha1rnds4 $3, E1, ABCD + sha1msg1 MSG3, MSG2 + pxor MSG3, MSG1 + + /* Rounds 64-67 */ + sha1nexte MSG0, E0 + movdqa ABCD, E1 + sha1msg2 MSG0, MSG1 + sha1rnds4 $3, E0, ABCD + sha1msg1 MSG0, MSG3 + pxor MSG0, MSG2 + + /* Rounds 68-71 */ + sha1nexte MSG1, E1 + movdqa ABCD, E0 + sha1msg2 MSG1, MSG2 + sha1rnds4 $3, E1, ABCD + pxor MSG1, MSG3 + + /* Rounds 72-75 */ + sha1nexte MSG2, E0 + movdqa ABCD, E1 + sha1msg2 MSG2, MSG3 + sha1rnds4 $3, E0, ABCD + + /* Rounds 76-79 */ + sha1nexte MSG3, E1 + movdqa ABCD, E0 + sha1rnds4 $3, E1, ABCD + + /* Add current hash values with previously saved */ + sha1nexte (0*16)(%rsp), E0 + paddd (1*16)(%rsp), ABCD + + /* Increment data pointer and loop if more to process */ + add $64, DATA_PTR + cmp NUM_BLKS, DATA_PTR + jne .Lloop0 + + /* Write hash values back in the correct order */ + pshufd $0x1B, ABCD, ABCD + movdqu ABCD, 0*16(DIGEST_PTR) + pextrd $3, E0, 1*16(DIGEST_PTR) + +.Ldone_hash: + mov RSPSAVE, %rsp + + ret +ENDPROC(sha1_ni_transform) + +.data + +.align 64 +PSHUFFLE_BYTE_FLIP_MASK: + .octa 0x000102030405060708090a0b0c0d0e0f +UPPER_WORD_MASK: + .octa 0xFFFFFFFF000000000000000000000000 diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c index 7c48e8b20848..dd14616b7739 100644 --- a/arch/x86/crypto/sha1_ssse3_glue.c +++ b/arch/x86/crypto/sha1_ssse3_glue.c @@ -31,24 +31,11 @@ #include <crypto/sha1_base.h> #include <asm/fpu/api.h> +typedef void (sha1_transform_fn)(u32 *digest, const char *data, + unsigned int rounds); -asmlinkage void sha1_transform_ssse3(u32 *digest, const char *data, - unsigned int rounds); -#ifdef CONFIG_AS_AVX -asmlinkage void sha1_transform_avx(u32 *digest, const char *data, - unsigned int rounds); -#endif -#ifdef CONFIG_AS_AVX2 -#define SHA1_AVX2_BLOCK_OPTSIZE 4 /* optimal 4*64 bytes of SHA1 blocks */ - -asmlinkage void sha1_transform_avx2(u32 *digest, const char *data, - unsigned int rounds); -#endif - -static void (*sha1_transform_asm)(u32 *, const char *, unsigned int); - -static int sha1_ssse3_update(struct shash_desc *desc, const u8 *data, - unsigned int len) +static int sha1_update(struct shash_desc *desc, const u8 *data, + unsigned int len, sha1_transform_fn *sha1_xform) { struct sha1_state *sctx = shash_desc_ctx(desc); @@ -61,14 +48,14 @@ static int sha1_ssse3_update(struct shash_desc *desc, const u8 *data, kernel_fpu_begin(); sha1_base_do_update(desc, data, len, - (sha1_block_fn *)sha1_transform_asm); + (sha1_block_fn *)sha1_xform); kernel_fpu_end(); return 0; } -static int sha1_ssse3_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) +static int sha1_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out, sha1_transform_fn *sha1_xform) { if (!irq_fpu_usable()) return crypto_sha1_finup(desc, data, len, out); @@ -76,32 +63,37 @@ static int sha1_ssse3_finup(struct shash_desc *desc, const u8 *data, kernel_fpu_begin(); if (len) sha1_base_do_update(desc, data, len, - (sha1_block_fn *)sha1_transform_asm); - sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_transform_asm); + (sha1_block_fn *)sha1_xform); + sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_xform); kernel_fpu_end(); return sha1_base_finish(desc, out); } -/* Add padding and return the message digest. */ -static int sha1_ssse3_final(struct shash_desc *desc, u8 *out) +asmlinkage void sha1_transform_ssse3(u32 *digest, const char *data, + unsigned int rounds); + +static int sha1_ssse3_update(struct shash_desc *desc, const u8 *data, + unsigned int len) { - return sha1_ssse3_finup(desc, NULL, 0, out); + return sha1_update(desc, data, len, + (sha1_transform_fn *) sha1_transform_ssse3); } -#ifdef CONFIG_AS_AVX2 -static void sha1_apply_transform_avx2(u32 *digest, const char *data, - unsigned int rounds) +static int sha1_ssse3_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) { - /* Select the optimal transform based on data block size */ - if (rounds >= SHA1_AVX2_BLOCK_OPTSIZE) - sha1_transform_avx2(digest, data, rounds); - else - sha1_transform_avx(digest, data, rounds); + return sha1_finup(desc, data, len, out, + (sha1_transform_fn *) sha1_transform_ssse3); +} + +/* Add padding and return the message digest. */ +static int sha1_ssse3_final(struct shash_desc *desc, u8 *out) +{ + return sha1_ssse3_finup(desc, NULL, 0, out); } -#endif -static struct shash_alg alg = { +static struct shash_alg sha1_ssse3_alg = { .digestsize = SHA1_DIGEST_SIZE, .init = sha1_base_init, .update = sha1_ssse3_update, @@ -110,7 +102,7 @@ static struct shash_alg alg = { .descsize = sizeof(struct sha1_state), .base = { .cra_name = "sha1", - .cra_driver_name= "sha1-ssse3", + .cra_driver_name = "sha1-ssse3", .cra_priority = 150, .cra_flags = CRYPTO_ALG_TYPE_SHASH, .cra_blocksize = SHA1_BLOCK_SIZE, @@ -118,10 +110,62 @@ static struct shash_alg alg = { } }; +static int register_sha1_ssse3(void) +{ + if (boot_cpu_has(X86_FEATURE_SSSE3)) + return crypto_register_shash(&sha1_ssse3_alg); + return 0; +} + +static void unregister_sha1_ssse3(void) +{ + if (boot_cpu_has(X86_FEATURE_SSSE3)) + crypto_unregister_shash(&sha1_ssse3_alg); +} + #ifdef CONFIG_AS_AVX -static bool __init avx_usable(void) +asmlinkage void sha1_transform_avx(u32 *digest, const char *data, + unsigned int rounds); + +static int sha1_avx_update(struct shash_desc *desc, const u8 *data, + unsigned int len) { - if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, NULL)) { + return sha1_update(desc, data, len, + (sha1_transform_fn *) sha1_transform_avx); +} + +static int sha1_avx_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + return sha1_finup(desc, data, len, out, + (sha1_transform_fn *) sha1_transform_avx); +} + +static int sha1_avx_final(struct shash_desc *desc, u8 *out) +{ + return sha1_avx_finup(desc, NULL, 0, out); +} + +static struct shash_alg sha1_avx_alg = { + .digestsize = SHA1_DIGEST_SIZE, + .init = sha1_base_init, + .update = sha1_avx_update, + .final = sha1_avx_final, + .finup = sha1_avx_finup, + .descsize = sizeof(struct sha1_state), + .base = { + .cra_name = "sha1", + .cra_driver_name = "sha1-avx", + .cra_priority = 160, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA1_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}; + +static bool avx_usable(void) +{ + if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) { if (cpu_has_avx) pr_info("AVX detected but unusable.\n"); return false; @@ -130,55 +174,197 @@ static bool __init avx_usable(void) return true; } -#ifdef CONFIG_AS_AVX2 -static bool __init avx2_usable(void) +static int register_sha1_avx(void) +{ + if (avx_usable()) + return crypto_register_shash(&sha1_avx_alg); + return 0; +} + +static void unregister_sha1_avx(void) { - if (avx_usable() && cpu_has_avx2 && boot_cpu_has(X86_FEATURE_BMI1) && - boot_cpu_has(X86_FEATURE_BMI2)) + if (avx_usable()) + crypto_unregister_shash(&sha1_avx_alg); +} + +#else /* CONFIG_AS_AVX */ +static inline int register_sha1_avx(void) { return 0; } +static inline void unregister_sha1_avx(void) { } +#endif /* CONFIG_AS_AVX */ + + +#if defined(CONFIG_AS_AVX2) && (CONFIG_AS_AVX) +#define SHA1_AVX2_BLOCK_OPTSIZE 4 /* optimal 4*64 bytes of SHA1 blocks */ + +asmlinkage void sha1_transform_avx2(u32 *digest, const char *data, + unsigned int rounds); + +static bool avx2_usable(void) +{ + if (avx_usable() && boot_cpu_has(X86_FEATURE_AVX2) + && boot_cpu_has(X86_FEATURE_BMI1) + && boot_cpu_has(X86_FEATURE_BMI2)) return true; return false; } + +static void sha1_apply_transform_avx2(u32 *digest, const char *data, + unsigned int rounds) +{ + /* Select the optimal transform based on data block size */ + if (rounds >= SHA1_AVX2_BLOCK_OPTSIZE) + sha1_transform_avx2(digest, data, rounds); + else + sha1_transform_avx(digest, data, rounds); +} + +static int sha1_avx2_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + return sha1_update(desc, data, len, + (sha1_transform_fn *) sha1_apply_transform_avx2); +} + +static int sha1_avx2_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + return sha1_finup(desc, data, len, out, + (sha1_transform_fn *) sha1_apply_transform_avx2); +} + +static int sha1_avx2_final(struct shash_desc *desc, u8 *out) +{ + return sha1_avx2_finup(desc, NULL, 0, out); +} + +static struct shash_alg sha1_avx2_alg = { + .digestsize = SHA1_DIGEST_SIZE, + .init = sha1_base_init, + .update = sha1_avx2_update, + .final = sha1_avx2_final, + .finup = sha1_avx2_finup, + .descsize = sizeof(struct sha1_state), + .base = { + .cra_name = "sha1", + .cra_driver_name = "sha1-avx2", + .cra_priority = 170, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA1_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}; + +static int register_sha1_avx2(void) +{ + if (avx2_usable()) + return crypto_register_shash(&sha1_avx2_alg); + return 0; +} + +static void unregister_sha1_avx2(void) +{ + if (avx2_usable()) + crypto_unregister_shash(&sha1_avx2_alg); +} + +#else +static inline int register_sha1_avx2(void) { return 0; } +static inline void unregister_sha1_avx2(void) { } #endif + +#ifdef CONFIG_AS_SHA1_NI +asmlinkage void sha1_ni_transform(u32 *digest, const char *data, + unsigned int rounds); + +static int sha1_ni_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + return sha1_update(desc, data, len, + (sha1_transform_fn *) sha1_ni_transform); +} + +static int sha1_ni_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + return sha1_finup(desc, data, len, out, + (sha1_transform_fn *) sha1_ni_transform); +} + +static int sha1_ni_final(struct shash_desc *desc, u8 *out) +{ + return sha1_ni_finup(desc, NULL, 0, out); +} + +static struct shash_alg sha1_ni_alg = { + .digestsize = SHA1_DIGEST_SIZE, + .init = sha1_base_init, + .update = sha1_ni_update, + .final = sha1_ni_final, + .finup = sha1_ni_finup, + .descsize = sizeof(struct sha1_state), + .base = { + .cra_name = "sha1", + .cra_driver_name = "sha1-ni", + .cra_priority = 250, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA1_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}; + +static int register_sha1_ni(void) +{ + if (boot_cpu_has(X86_FEATURE_SHA_NI)) + return crypto_register_shash(&sha1_ni_alg); + return 0; +} + +static void unregister_sha1_ni(void) +{ + if (boot_cpu_has(X86_FEATURE_SHA_NI)) + crypto_unregister_shash(&sha1_ni_alg); +} + +#else +static inline int register_sha1_ni(void) { return 0; } +static inline void unregister_sha1_ni(void) { } #endif static int __init sha1_ssse3_mod_init(void) { - char *algo_name; + if (register_sha1_ssse3()) + goto fail; - /* test for SSSE3 first */ - if (cpu_has_ssse3) { - sha1_transform_asm = sha1_transform_ssse3; - algo_name = "SSSE3"; + if (register_sha1_avx()) { + unregister_sha1_ssse3(); + goto fail; } -#ifdef CONFIG_AS_AVX - /* allow AVX to override SSSE3, it's a little faster */ - if (avx_usable()) { - sha1_transform_asm = sha1_transform_avx; - algo_name = "AVX"; -#ifdef CONFIG_AS_AVX2 - /* allow AVX2 to override AVX, it's a little faster */ - if (avx2_usable()) { - sha1_transform_asm = sha1_apply_transform_avx2; - algo_name = "AVX2"; - } -#endif + if (register_sha1_avx2()) { + unregister_sha1_avx(); + unregister_sha1_ssse3(); + goto fail; } -#endif - if (sha1_transform_asm) { - pr_info("Using %s optimized SHA-1 implementation\n", algo_name); - return crypto_register_shash(&alg); + if (register_sha1_ni()) { + unregister_sha1_avx2(); + unregister_sha1_avx(); + unregister_sha1_ssse3(); + goto fail; } - pr_info("Neither AVX nor AVX2 nor SSSE3 is available/usable.\n"); + return 0; +fail: return -ENODEV; } static void __exit sha1_ssse3_mod_fini(void) { - crypto_unregister_shash(&alg); + unregister_sha1_ni(); + unregister_sha1_avx2(); + unregister_sha1_avx(); + unregister_sha1_ssse3(); } module_init(sha1_ssse3_mod_init); diff --git a/arch/x86/crypto/sha256_ni_asm.S b/arch/x86/crypto/sha256_ni_asm.S new file mode 100644 index 000000000000..748cdf21a938 --- /dev/null +++ b/arch/x86/crypto/sha256_ni_asm.S @@ -0,0 +1,353 @@ +/* + * Intel SHA Extensions optimized implementation of a SHA-256 update function + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Contact Information: + * Sean Gulley <sean.m.gulley@intel.com> + * Tim Chen <tim.c.chen@linux.intel.com> + * + * BSD LICENSE + * + * Copyright(c) 2015 Intel Corporation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include <linux/linkage.h> + +#define DIGEST_PTR %rdi /* 1st arg */ +#define DATA_PTR %rsi /* 2nd arg */ +#define NUM_BLKS %rdx /* 3rd arg */ + +#define SHA256CONSTANTS %rax + +#define MSG %xmm0 +#define STATE0 %xmm1 +#define STATE1 %xmm2 +#define MSGTMP0 %xmm3 +#define MSGTMP1 %xmm4 +#define MSGTMP2 %xmm5 +#define MSGTMP3 %xmm6 +#define MSGTMP4 %xmm7 + +#define SHUF_MASK %xmm8 + +#define ABEF_SAVE %xmm9 +#define CDGH_SAVE %xmm10 + +/* + * Intel SHA Extensions optimized implementation of a SHA-256 update function + * + * The function takes a pointer to the current hash values, a pointer to the + * input data, and a number of 64 byte blocks to process. Once all blocks have + * been processed, the digest pointer is updated with the resulting hash value. + * The function only processes complete blocks, there is no functionality to + * store partial blocks. All message padding and hash value initialization must + * be done outside the update function. + * + * The indented lines in the loop are instructions related to rounds processing. + * The non-indented lines are instructions related to the message schedule. + * + * void sha256_ni_transform(uint32_t *digest, const void *data, + uint32_t numBlocks); + * digest : pointer to digest + * data: pointer to input data + * numBlocks: Number of blocks to process + */ + +.text +.align 32 +ENTRY(sha256_ni_transform) + + shl $6, NUM_BLKS /* convert to bytes */ + jz .Ldone_hash + add DATA_PTR, NUM_BLKS /* pointer to end of data */ + + /* + * load initial hash values + * Need to reorder these appropriately + * DCBA, HGFE -> ABEF, CDGH + */ + movdqu 0*16(DIGEST_PTR), STATE0 + movdqu 1*16(DIGEST_PTR), STATE1 + + pshufd $0xB1, STATE0, STATE0 /* CDAB */ + pshufd $0x1B, STATE1, STATE1 /* EFGH */ + movdqa STATE0, MSGTMP4 + palignr $8, STATE1, STATE0 /* ABEF */ + pblendw $0xF0, MSGTMP4, STATE1 /* CDGH */ + + movdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK + lea K256(%rip), SHA256CONSTANTS + +.Lloop0: + /* Save hash values for addition after rounds */ + movdqa STATE0, ABEF_SAVE + movdqa STATE1, CDGH_SAVE + + /* Rounds 0-3 */ + movdqu 0*16(DATA_PTR), MSG + pshufb SHUF_MASK, MSG + movdqa MSG, MSGTMP0 + paddd 0*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + + /* Rounds 4-7 */ + movdqu 1*16(DATA_PTR), MSG + pshufb SHUF_MASK, MSG + movdqa MSG, MSGTMP1 + paddd 1*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + sha256msg1 MSGTMP1, MSGTMP0 + + /* Rounds 8-11 */ + movdqu 2*16(DATA_PTR), MSG + pshufb SHUF_MASK, MSG + movdqa MSG, MSGTMP2 + paddd 2*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + sha256msg1 MSGTMP2, MSGTMP1 + + /* Rounds 12-15 */ + movdqu 3*16(DATA_PTR), MSG + pshufb SHUF_MASK, MSG + movdqa MSG, MSGTMP3 + paddd 3*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + movdqa MSGTMP3, MSGTMP4 + palignr $4, MSGTMP2, MSGTMP4 + paddd MSGTMP4, MSGTMP0 + sha256msg2 MSGTMP3, MSGTMP0 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + sha256msg1 MSGTMP3, MSGTMP2 + + /* Rounds 16-19 */ + movdqa MSGTMP0, MSG + paddd 4*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + movdqa MSGTMP0, MSGTMP4 + palignr $4, MSGTMP3, MSGTMP4 + paddd MSGTMP4, MSGTMP1 + sha256msg2 MSGTMP0, MSGTMP1 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + sha256msg1 MSGTMP0, MSGTMP3 + + /* Rounds 20-23 */ + movdqa MSGTMP1, MSG + paddd 5*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + movdqa MSGTMP1, MSGTMP4 + palignr $4, MSGTMP0, MSGTMP4 + paddd MSGTMP4, MSGTMP2 + sha256msg2 MSGTMP1, MSGTMP2 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + sha256msg1 MSGTMP1, MSGTMP0 + + /* Rounds 24-27 */ + movdqa MSGTMP2, MSG + paddd 6*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + movdqa MSGTMP2, MSGTMP4 + palignr $4, MSGTMP1, MSGTMP4 + paddd MSGTMP4, MSGTMP3 + sha256msg2 MSGTMP2, MSGTMP3 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + sha256msg1 MSGTMP2, MSGTMP1 + + /* Rounds 28-31 */ + movdqa MSGTMP3, MSG + paddd 7*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + movdqa MSGTMP3, MSGTMP4 + palignr $4, MSGTMP2, MSGTMP4 + paddd MSGTMP4, MSGTMP0 + sha256msg2 MSGTMP3, MSGTMP0 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + sha256msg1 MSGTMP3, MSGTMP2 + + /* Rounds 32-35 */ + movdqa MSGTMP0, MSG + paddd 8*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + movdqa MSGTMP0, MSGTMP4 + palignr $4, MSGTMP3, MSGTMP4 + paddd MSGTMP4, MSGTMP1 + sha256msg2 MSGTMP0, MSGTMP1 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + sha256msg1 MSGTMP0, MSGTMP3 + + /* Rounds 36-39 */ + movdqa MSGTMP1, MSG + paddd 9*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + movdqa MSGTMP1, MSGTMP4 + palignr $4, MSGTMP0, MSGTMP4 + paddd MSGTMP4, MSGTMP2 + sha256msg2 MSGTMP1, MSGTMP2 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + sha256msg1 MSGTMP1, MSGTMP0 + + /* Rounds 40-43 */ + movdqa MSGTMP2, MSG + paddd 10*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + movdqa MSGTMP2, MSGTMP4 + palignr $4, MSGTMP1, MSGTMP4 + paddd MSGTMP4, MSGTMP3 + sha256msg2 MSGTMP2, MSGTMP3 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + sha256msg1 MSGTMP2, MSGTMP1 + + /* Rounds 44-47 */ + movdqa MSGTMP3, MSG + paddd 11*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + movdqa MSGTMP3, MSGTMP4 + palignr $4, MSGTMP2, MSGTMP4 + paddd MSGTMP4, MSGTMP0 + sha256msg2 MSGTMP3, MSGTMP0 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + sha256msg1 MSGTMP3, MSGTMP2 + + /* Rounds 48-51 */ + movdqa MSGTMP0, MSG + paddd 12*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + movdqa MSGTMP0, MSGTMP4 + palignr $4, MSGTMP3, MSGTMP4 + paddd MSGTMP4, MSGTMP1 + sha256msg2 MSGTMP0, MSGTMP1 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + sha256msg1 MSGTMP0, MSGTMP3 + + /* Rounds 52-55 */ + movdqa MSGTMP1, MSG + paddd 13*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + movdqa MSGTMP1, MSGTMP4 + palignr $4, MSGTMP0, MSGTMP4 + paddd MSGTMP4, MSGTMP2 + sha256msg2 MSGTMP1, MSGTMP2 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + + /* Rounds 56-59 */ + movdqa MSGTMP2, MSG + paddd 14*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + movdqa MSGTMP2, MSGTMP4 + palignr $4, MSGTMP1, MSGTMP4 + paddd MSGTMP4, MSGTMP3 + sha256msg2 MSGTMP2, MSGTMP3 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + + /* Rounds 60-63 */ + movdqa MSGTMP3, MSG + paddd 15*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + + /* Add current hash values with previously saved */ + paddd ABEF_SAVE, STATE0 + paddd CDGH_SAVE, STATE1 + + /* Increment data pointer and loop if more to process */ + add $64, DATA_PTR + cmp NUM_BLKS, DATA_PTR + jne .Lloop0 + + /* Write hash values back in the correct order */ + pshufd $0x1B, STATE0, STATE0 /* FEBA */ + pshufd $0xB1, STATE1, STATE1 /* DCHG */ + movdqa STATE0, MSGTMP4 + pblendw $0xF0, STATE1, STATE0 /* DCBA */ + palignr $8, MSGTMP4, STATE1 /* HGFE */ + + movdqu STATE0, 0*16(DIGEST_PTR) + movdqu STATE1, 1*16(DIGEST_PTR) + +.Ldone_hash: + + ret +ENDPROC(sha256_ni_transform) + +.data +.align 64 +K256: + .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 + .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 + .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 + .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 + .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc + .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da + .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 + .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 + .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 + .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 + .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 + .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 + .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 + .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 + .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 + .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 + +PSHUFFLE_BYTE_FLIP_MASK: + .octa 0x0c0d0e0f08090a0b0405060700010203 diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c index f8097fc0d1d1..5f4d6086dc59 100644 --- a/arch/x86/crypto/sha256_ssse3_glue.c +++ b/arch/x86/crypto/sha256_ssse3_glue.c @@ -42,19 +42,10 @@ asmlinkage void sha256_transform_ssse3(u32 *digest, const char *data, u64 rounds); -#ifdef CONFIG_AS_AVX -asmlinkage void sha256_transform_avx(u32 *digest, const char *data, - u64 rounds); -#endif -#ifdef CONFIG_AS_AVX2 -asmlinkage void sha256_transform_rorx(u32 *digest, const char *data, - u64 rounds); -#endif - -static void (*sha256_transform_asm)(u32 *, const char *, u64); +typedef void (sha256_transform_fn)(u32 *digest, const char *data, u64 rounds); -static int sha256_ssse3_update(struct shash_desc *desc, const u8 *data, - unsigned int len) +static int sha256_update(struct shash_desc *desc, const u8 *data, + unsigned int len, sha256_transform_fn *sha256_xform) { struct sha256_state *sctx = shash_desc_ctx(desc); @@ -67,14 +58,14 @@ static int sha256_ssse3_update(struct shash_desc *desc, const u8 *data, kernel_fpu_begin(); sha256_base_do_update(desc, data, len, - (sha256_block_fn *)sha256_transform_asm); + (sha256_block_fn *)sha256_xform); kernel_fpu_end(); return 0; } -static int sha256_ssse3_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) +static int sha256_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out, sha256_transform_fn *sha256_xform) { if (!irq_fpu_usable()) return crypto_sha256_finup(desc, data, len, out); @@ -82,20 +73,32 @@ static int sha256_ssse3_finup(struct shash_desc *desc, const u8 *data, kernel_fpu_begin(); if (len) sha256_base_do_update(desc, data, len, - (sha256_block_fn *)sha256_transform_asm); - sha256_base_do_finalize(desc, (sha256_block_fn *)sha256_transform_asm); + (sha256_block_fn *)sha256_xform); + sha256_base_do_finalize(desc, (sha256_block_fn *)sha256_xform); kernel_fpu_end(); return sha256_base_finish(desc, out); } +static int sha256_ssse3_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + return sha256_update(desc, data, len, sha256_transform_ssse3); +} + +static int sha256_ssse3_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + return sha256_finup(desc, data, len, out, sha256_transform_ssse3); +} + /* Add padding and return the message digest. */ static int sha256_ssse3_final(struct shash_desc *desc, u8 *out) { return sha256_ssse3_finup(desc, NULL, 0, out); } -static struct shash_alg algs[] = { { +static struct shash_alg sha256_ssse3_algs[] = { { .digestsize = SHA256_DIGEST_SIZE, .init = sha256_base_init, .update = sha256_ssse3_update, @@ -127,10 +130,77 @@ static struct shash_alg algs[] = { { } } }; +static int register_sha256_ssse3(void) +{ + if (boot_cpu_has(X86_FEATURE_SSSE3)) + return crypto_register_shashes(sha256_ssse3_algs, + ARRAY_SIZE(sha256_ssse3_algs)); + return 0; +} + +static void unregister_sha256_ssse3(void) +{ + if (boot_cpu_has(X86_FEATURE_SSSE3)) + crypto_unregister_shashes(sha256_ssse3_algs, + ARRAY_SIZE(sha256_ssse3_algs)); +} + #ifdef CONFIG_AS_AVX -static bool __init avx_usable(void) +asmlinkage void sha256_transform_avx(u32 *digest, const char *data, + u64 rounds); + +static int sha256_avx_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + return sha256_update(desc, data, len, sha256_transform_avx); +} + +static int sha256_avx_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + return sha256_finup(desc, data, len, out, sha256_transform_avx); +} + +static int sha256_avx_final(struct shash_desc *desc, u8 *out) +{ + return sha256_avx_finup(desc, NULL, 0, out); +} + +static struct shash_alg sha256_avx_algs[] = { { + .digestsize = SHA256_DIGEST_SIZE, + .init = sha256_base_init, + .update = sha256_avx_update, + .final = sha256_avx_final, + .finup = sha256_avx_finup, + .descsize = sizeof(struct sha256_state), + .base = { + .cra_name = "sha256", + .cra_driver_name = "sha256-avx", + .cra_priority = 160, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA256_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}, { + .digestsize = SHA224_DIGEST_SIZE, + .init = sha224_base_init, + .update = sha256_avx_update, + .final = sha256_avx_final, + .finup = sha256_avx_finup, + .descsize = sizeof(struct sha256_state), + .base = { + .cra_name = "sha224", + .cra_driver_name = "sha224-avx", + .cra_priority = 160, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA224_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +} }; + +static bool avx_usable(void) { - if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, NULL)) { + if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) { if (cpu_has_avx) pr_info("AVX detected but unusable.\n"); return false; @@ -138,47 +208,216 @@ static bool __init avx_usable(void) return true; } -#endif -static int __init sha256_ssse3_mod_init(void) +static int register_sha256_avx(void) { - /* test for SSSE3 first */ - if (cpu_has_ssse3) - sha256_transform_asm = sha256_transform_ssse3; + if (avx_usable()) + return crypto_register_shashes(sha256_avx_algs, + ARRAY_SIZE(sha256_avx_algs)); + return 0; +} -#ifdef CONFIG_AS_AVX - /* allow AVX to override SSSE3, it's a little faster */ - if (avx_usable()) { -#ifdef CONFIG_AS_AVX2 - if (boot_cpu_has(X86_FEATURE_AVX2) && boot_cpu_has(X86_FEATURE_BMI2)) - sha256_transform_asm = sha256_transform_rorx; - else +static void unregister_sha256_avx(void) +{ + if (avx_usable()) + crypto_unregister_shashes(sha256_avx_algs, + ARRAY_SIZE(sha256_avx_algs)); +} + +#else +static inline int register_sha256_avx(void) { return 0; } +static inline void unregister_sha256_avx(void) { } #endif - sha256_transform_asm = sha256_transform_avx; + +#if defined(CONFIG_AS_AVX2) && defined(CONFIG_AS_AVX) +asmlinkage void sha256_transform_rorx(u32 *digest, const char *data, + u64 rounds); + +static int sha256_avx2_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + return sha256_update(desc, data, len, sha256_transform_rorx); +} + +static int sha256_avx2_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + return sha256_finup(desc, data, len, out, sha256_transform_rorx); +} + +static int sha256_avx2_final(struct shash_desc *desc, u8 *out) +{ + return sha256_avx2_finup(desc, NULL, 0, out); +} + +static struct shash_alg sha256_avx2_algs[] = { { + .digestsize = SHA256_DIGEST_SIZE, + .init = sha256_base_init, + .update = sha256_avx2_update, + .final = sha256_avx2_final, + .finup = sha256_avx2_finup, + .descsize = sizeof(struct sha256_state), + .base = { + .cra_name = "sha256", + .cra_driver_name = "sha256-avx2", + .cra_priority = 170, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA256_BLOCK_SIZE, + .cra_module = THIS_MODULE, } -#endif +}, { + .digestsize = SHA224_DIGEST_SIZE, + .init = sha224_base_init, + .update = sha256_avx2_update, + .final = sha256_avx2_final, + .finup = sha256_avx2_finup, + .descsize = sizeof(struct sha256_state), + .base = { + .cra_name = "sha224", + .cra_driver_name = "sha224-avx2", + .cra_priority = 170, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA224_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +} }; - if (sha256_transform_asm) { -#ifdef CONFIG_AS_AVX - if (sha256_transform_asm == sha256_transform_avx) - pr_info("Using AVX optimized SHA-256 implementation\n"); -#ifdef CONFIG_AS_AVX2 - else if (sha256_transform_asm == sha256_transform_rorx) - pr_info("Using AVX2 optimized SHA-256 implementation\n"); +static bool avx2_usable(void) +{ + if (avx_usable() && boot_cpu_has(X86_FEATURE_AVX2) && + boot_cpu_has(X86_FEATURE_BMI2)) + return true; + + return false; +} + +static int register_sha256_avx2(void) +{ + if (avx2_usable()) + return crypto_register_shashes(sha256_avx2_algs, + ARRAY_SIZE(sha256_avx2_algs)); + return 0; +} + +static void unregister_sha256_avx2(void) +{ + if (avx2_usable()) + crypto_unregister_shashes(sha256_avx2_algs, + ARRAY_SIZE(sha256_avx2_algs)); +} + +#else +static inline int register_sha256_avx2(void) { return 0; } +static inline void unregister_sha256_avx2(void) { } #endif - else + +#ifdef CONFIG_AS_SHA256_NI +asmlinkage void sha256_ni_transform(u32 *digest, const char *data, + u64 rounds); /*unsigned int rounds);*/ + +static int sha256_ni_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + return sha256_update(desc, data, len, sha256_ni_transform); +} + +static int sha256_ni_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + return sha256_finup(desc, data, len, out, sha256_ni_transform); +} + +static int sha256_ni_final(struct shash_desc *desc, u8 *out) +{ + return sha256_ni_finup(desc, NULL, 0, out); +} + +static struct shash_alg sha256_ni_algs[] = { { + .digestsize = SHA256_DIGEST_SIZE, + .init = sha256_base_init, + .update = sha256_ni_update, + .final = sha256_ni_final, + .finup = sha256_ni_finup, + .descsize = sizeof(struct sha256_state), + .base = { + .cra_name = "sha256", + .cra_driver_name = "sha256-ni", + .cra_priority = 250, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA256_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}, { + .digestsize = SHA224_DIGEST_SIZE, + .init = sha224_base_init, + .update = sha256_ni_update, + .final = sha256_ni_final, + .finup = sha256_ni_finup, + .descsize = sizeof(struct sha256_state), + .base = { + .cra_name = "sha224", + .cra_driver_name = "sha224-ni", + .cra_priority = 250, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA224_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +} }; + +static int register_sha256_ni(void) +{ + if (boot_cpu_has(X86_FEATURE_SHA_NI)) + return crypto_register_shashes(sha256_ni_algs, + ARRAY_SIZE(sha256_ni_algs)); + return 0; +} + +static void unregister_sha256_ni(void) +{ + if (boot_cpu_has(X86_FEATURE_SHA_NI)) + crypto_unregister_shashes(sha256_ni_algs, + ARRAY_SIZE(sha256_ni_algs)); +} + +#else +static inline int register_sha256_ni(void) { return 0; } +static inline void unregister_sha256_ni(void) { } #endif - pr_info("Using SSSE3 optimized SHA-256 implementation\n"); - return crypto_register_shashes(algs, ARRAY_SIZE(algs)); + +static int __init sha256_ssse3_mod_init(void) +{ + if (register_sha256_ssse3()) + goto fail; + + if (register_sha256_avx()) { + unregister_sha256_ssse3(); + goto fail; } - pr_info("Neither AVX nor SSSE3 is available/usable.\n"); + if (register_sha256_avx2()) { + unregister_sha256_avx(); + unregister_sha256_ssse3(); + goto fail; + } + + if (register_sha256_ni()) { + unregister_sha256_avx2(); + unregister_sha256_avx(); + unregister_sha256_ssse3(); + goto fail; + } + + return 0; +fail: return -ENODEV; } static void __exit sha256_ssse3_mod_fini(void) { - crypto_unregister_shashes(algs, ARRAY_SIZE(algs)); + unregister_sha256_ni(); + unregister_sha256_avx2(); + unregister_sha256_avx(); + unregister_sha256_ssse3(); } module_init(sha256_ssse3_mod_init); diff --git a/arch/x86/crypto/sha512_ssse3_glue.c b/arch/x86/crypto/sha512_ssse3_glue.c index 2edad7b81870..34e5083d6f36 100644 --- a/arch/x86/crypto/sha512_ssse3_glue.c +++ b/arch/x86/crypto/sha512_ssse3_glue.c @@ -41,19 +41,11 @@ asmlinkage void sha512_transform_ssse3(u64 *digest, const char *data, u64 rounds); -#ifdef CONFIG_AS_AVX -asmlinkage void sha512_transform_avx(u64 *digest, const char *data, - u64 rounds); -#endif -#ifdef CONFIG_AS_AVX2 -asmlinkage void sha512_transform_rorx(u64 *digest, const char *data, - u64 rounds); -#endif -static void (*sha512_transform_asm)(u64 *, const char *, u64); +typedef void (sha512_transform_fn)(u64 *digest, const char *data, u64 rounds); -static int sha512_ssse3_update(struct shash_desc *desc, const u8 *data, - unsigned int len) +static int sha512_update(struct shash_desc *desc, const u8 *data, + unsigned int len, sha512_transform_fn *sha512_xform) { struct sha512_state *sctx = shash_desc_ctx(desc); @@ -66,14 +58,14 @@ static int sha512_ssse3_update(struct shash_desc *desc, const u8 *data, kernel_fpu_begin(); sha512_base_do_update(desc, data, len, - (sha512_block_fn *)sha512_transform_asm); + (sha512_block_fn *)sha512_xform); kernel_fpu_end(); return 0; } -static int sha512_ssse3_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) +static int sha512_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out, sha512_transform_fn *sha512_xform) { if (!irq_fpu_usable()) return crypto_sha512_finup(desc, data, len, out); @@ -81,20 +73,32 @@ static int sha512_ssse3_finup(struct shash_desc *desc, const u8 *data, kernel_fpu_begin(); if (len) sha512_base_do_update(desc, data, len, - (sha512_block_fn *)sha512_transform_asm); - sha512_base_do_finalize(desc, (sha512_block_fn *)sha512_transform_asm); + (sha512_block_fn *)sha512_xform); + sha512_base_do_finalize(desc, (sha512_block_fn *)sha512_xform); kernel_fpu_end(); return sha512_base_finish(desc, out); } +static int sha512_ssse3_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + return sha512_update(desc, data, len, sha512_transform_ssse3); +} + +static int sha512_ssse3_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + return sha512_finup(desc, data, len, out, sha512_transform_ssse3); +} + /* Add padding and return the message digest. */ static int sha512_ssse3_final(struct shash_desc *desc, u8 *out) { return sha512_ssse3_finup(desc, NULL, 0, out); } -static struct shash_alg algs[] = { { +static struct shash_alg sha512_ssse3_algs[] = { { .digestsize = SHA512_DIGEST_SIZE, .init = sha512_base_init, .update = sha512_ssse3_update, @@ -126,10 +130,27 @@ static struct shash_alg algs[] = { { } } }; +static int register_sha512_ssse3(void) +{ + if (boot_cpu_has(X86_FEATURE_SSSE3)) + return crypto_register_shashes(sha512_ssse3_algs, + ARRAY_SIZE(sha512_ssse3_algs)); + return 0; +} + +static void unregister_sha512_ssse3(void) +{ + if (boot_cpu_has(X86_FEATURE_SSSE3)) + crypto_unregister_shashes(sha512_ssse3_algs, + ARRAY_SIZE(sha512_ssse3_algs)); +} + #ifdef CONFIG_AS_AVX -static bool __init avx_usable(void) +asmlinkage void sha512_transform_avx(u64 *digest, const char *data, + u64 rounds); +static bool avx_usable(void) { - if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, NULL)) { + if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) { if (cpu_has_avx) pr_info("AVX detected but unusable.\n"); return false; @@ -137,47 +158,185 @@ static bool __init avx_usable(void) return true; } -#endif -static int __init sha512_ssse3_mod_init(void) +static int sha512_avx_update(struct shash_desc *desc, const u8 *data, + unsigned int len) { - /* test for SSSE3 first */ - if (cpu_has_ssse3) - sha512_transform_asm = sha512_transform_ssse3; + return sha512_update(desc, data, len, sha512_transform_avx); +} -#ifdef CONFIG_AS_AVX - /* allow AVX to override SSSE3, it's a little faster */ - if (avx_usable()) { -#ifdef CONFIG_AS_AVX2 - if (boot_cpu_has(X86_FEATURE_AVX2)) - sha512_transform_asm = sha512_transform_rorx; - else -#endif - sha512_transform_asm = sha512_transform_avx; +static int sha512_avx_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + return sha512_finup(desc, data, len, out, sha512_transform_avx); +} + +/* Add padding and return the message digest. */ +static int sha512_avx_final(struct shash_desc *desc, u8 *out) +{ + return sha512_avx_finup(desc, NULL, 0, out); +} + +static struct shash_alg sha512_avx_algs[] = { { + .digestsize = SHA512_DIGEST_SIZE, + .init = sha512_base_init, + .update = sha512_avx_update, + .final = sha512_avx_final, + .finup = sha512_avx_finup, + .descsize = sizeof(struct sha512_state), + .base = { + .cra_name = "sha512", + .cra_driver_name = "sha512-avx", + .cra_priority = 160, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA512_BLOCK_SIZE, + .cra_module = THIS_MODULE, } -#endif +}, { + .digestsize = SHA384_DIGEST_SIZE, + .init = sha384_base_init, + .update = sha512_avx_update, + .final = sha512_avx_final, + .finup = sha512_avx_finup, + .descsize = sizeof(struct sha512_state), + .base = { + .cra_name = "sha384", + .cra_driver_name = "sha384-avx", + .cra_priority = 160, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA384_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +} }; - if (sha512_transform_asm) { -#ifdef CONFIG_AS_AVX - if (sha512_transform_asm == sha512_transform_avx) - pr_info("Using AVX optimized SHA-512 implementation\n"); -#ifdef CONFIG_AS_AVX2 - else if (sha512_transform_asm == sha512_transform_rorx) - pr_info("Using AVX2 optimized SHA-512 implementation\n"); +static int register_sha512_avx(void) +{ + if (avx_usable()) + return crypto_register_shashes(sha512_avx_algs, + ARRAY_SIZE(sha512_avx_algs)); + return 0; +} + +static void unregister_sha512_avx(void) +{ + if (avx_usable()) + crypto_unregister_shashes(sha512_avx_algs, + ARRAY_SIZE(sha512_avx_algs)); +} +#else +static inline int register_sha512_avx(void) { return 0; } +static inline void unregister_sha512_avx(void) { } #endif - else + +#if defined(CONFIG_AS_AVX2) && defined(CONFIG_AS_AVX) +asmlinkage void sha512_transform_rorx(u64 *digest, const char *data, + u64 rounds); + +static int sha512_avx2_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + return sha512_update(desc, data, len, sha512_transform_rorx); +} + +static int sha512_avx2_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + return sha512_finup(desc, data, len, out, sha512_transform_rorx); +} + +/* Add padding and return the message digest. */ +static int sha512_avx2_final(struct shash_desc *desc, u8 *out) +{ + return sha512_avx2_finup(desc, NULL, 0, out); +} + +static struct shash_alg sha512_avx2_algs[] = { { + .digestsize = SHA512_DIGEST_SIZE, + .init = sha512_base_init, + .update = sha512_avx2_update, + .final = sha512_avx2_final, + .finup = sha512_avx2_finup, + .descsize = sizeof(struct sha512_state), + .base = { + .cra_name = "sha512", + .cra_driver_name = "sha512-avx2", + .cra_priority = 170, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA512_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}, { + .digestsize = SHA384_DIGEST_SIZE, + .init = sha384_base_init, + .update = sha512_avx2_update, + .final = sha512_avx2_final, + .finup = sha512_avx2_finup, + .descsize = sizeof(struct sha512_state), + .base = { + .cra_name = "sha384", + .cra_driver_name = "sha384-avx2", + .cra_priority = 170, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA384_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +} }; + +static bool avx2_usable(void) +{ + if (avx_usable() && boot_cpu_has(X86_FEATURE_AVX2) && + boot_cpu_has(X86_FEATURE_BMI2)) + return true; + + return false; +} + +static int register_sha512_avx2(void) +{ + if (avx2_usable()) + return crypto_register_shashes(sha512_avx2_algs, + ARRAY_SIZE(sha512_avx2_algs)); + return 0; +} + +static void unregister_sha512_avx2(void) +{ + if (avx2_usable()) + crypto_unregister_shashes(sha512_avx2_algs, + ARRAY_SIZE(sha512_avx2_algs)); +} +#else +static inline int register_sha512_avx2(void) { return 0; } +static inline void unregister_sha512_avx2(void) { } #endif - pr_info("Using SSSE3 optimized SHA-512 implementation\n"); - return crypto_register_shashes(algs, ARRAY_SIZE(algs)); + +static int __init sha512_ssse3_mod_init(void) +{ + + if (register_sha512_ssse3()) + goto fail; + + if (register_sha512_avx()) { + unregister_sha512_ssse3(); + goto fail; } - pr_info("Neither AVX nor SSSE3 is available/usable.\n"); + if (register_sha512_avx2()) { + unregister_sha512_avx(); + unregister_sha512_ssse3(); + goto fail; + } + + return 0; +fail: return -ENODEV; } static void __exit sha512_ssse3_mod_fini(void) { - crypto_unregister_shashes(algs, ARRAY_SIZE(algs)); + unregister_sha512_avx2(); + unregister_sha512_avx(); + unregister_sha512_ssse3(); } module_init(sha512_ssse3_mod_init); diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c index c2bd0ce718ee..b7a3904b953c 100644 --- a/arch/x86/crypto/twofish_avx_glue.c +++ b/arch/x86/crypto/twofish_avx_glue.c @@ -558,7 +558,7 @@ static int __init twofish_init(void) { const char *feature_name; - if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) { + if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, &feature_name)) { pr_info("CPU feature '%s' is not supported.\n", feature_name); return -ENODEV; } diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 80dcc9261ca3..a89fdbc1f0be 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -24,10 +24,19 @@ #include <asm/desc.h> #include <asm/traps.h> +#include <asm/vdso.h> +#include <asm/uaccess.h> #define CREATE_TRACE_POINTS #include <trace/events/syscalls.h> +static struct thread_info *pt_regs_to_thread_info(struct pt_regs *regs) +{ + unsigned long top_of_stack = + (unsigned long)(regs + 1) + TOP_OF_KERNEL_STACK_PADDING; + return (struct thread_info *)(top_of_stack - THREAD_SIZE); +} + #ifdef CONFIG_CONTEXT_TRACKING /* Called on entry from user mode with IRQs off. */ __visible void enter_from_user_mode(void) @@ -66,13 +75,14 @@ static void do_audit_syscall_entry(struct pt_regs *regs, u32 arch) */ unsigned long syscall_trace_enter_phase1(struct pt_regs *regs, u32 arch) { + struct thread_info *ti = pt_regs_to_thread_info(regs); unsigned long ret = 0; u32 work; - BUG_ON(regs != task_pt_regs(current)); + if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) + BUG_ON(regs != task_pt_regs(current)); - work = ACCESS_ONCE(current_thread_info()->flags) & - _TIF_WORK_SYSCALL_ENTRY; + work = ACCESS_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY; #ifdef CONFIG_CONTEXT_TRACKING /* @@ -154,11 +164,12 @@ unsigned long syscall_trace_enter_phase1(struct pt_regs *regs, u32 arch) long syscall_trace_enter_phase2(struct pt_regs *regs, u32 arch, unsigned long phase1_result) { + struct thread_info *ti = pt_regs_to_thread_info(regs); long ret = 0; - u32 work = ACCESS_ONCE(current_thread_info()->flags) & - _TIF_WORK_SYSCALL_ENTRY; + u32 work = ACCESS_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY; - BUG_ON(regs != task_pt_regs(current)); + if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) + BUG_ON(regs != task_pt_regs(current)); /* * If we stepped into a sysenter/syscall insn, it trapped in @@ -207,19 +218,12 @@ long syscall_trace_enter(struct pt_regs *regs) return syscall_trace_enter_phase2(regs, arch, phase1_result); } -static struct thread_info *pt_regs_to_thread_info(struct pt_regs *regs) -{ - unsigned long top_of_stack = - (unsigned long)(regs + 1) + TOP_OF_KERNEL_STACK_PADDING; - return (struct thread_info *)(top_of_stack - THREAD_SIZE); -} +#define EXIT_TO_USERMODE_LOOP_FLAGS \ + (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ + _TIF_NEED_RESCHED | _TIF_USER_RETURN_NOTIFY) -/* Called with IRQs disabled. */ -__visible void prepare_exit_to_usermode(struct pt_regs *regs) +static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags) { - if (WARN_ON(!irqs_disabled())) - local_irq_disable(); - /* * In order to return to user mode, we need to have IRQs off with * none of _TIF_SIGPENDING, _TIF_NOTIFY_RESUME, _TIF_USER_RETURN_NOTIFY, @@ -229,14 +233,6 @@ __visible void prepare_exit_to_usermode(struct pt_regs *regs) * work to clear some of the flags can sleep. */ while (true) { - u32 cached_flags = - READ_ONCE(pt_regs_to_thread_info(regs)->flags); - - if (!(cached_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | - _TIF_UPROBE | _TIF_NEED_RESCHED | - _TIF_USER_RETURN_NOTIFY))) - break; - /* We have work to do. */ local_irq_enable(); @@ -260,50 +256,81 @@ __visible void prepare_exit_to_usermode(struct pt_regs *regs) /* Disable IRQs and retry */ local_irq_disable(); + + cached_flags = READ_ONCE(pt_regs_to_thread_info(regs)->flags); + + if (!(cached_flags & EXIT_TO_USERMODE_LOOP_FLAGS)) + break; + } +} + +/* Called with IRQs disabled. */ +__visible inline void prepare_exit_to_usermode(struct pt_regs *regs) +{ + u32 cached_flags; + + if (IS_ENABLED(CONFIG_PROVE_LOCKING) && WARN_ON(!irqs_disabled())) + local_irq_disable(); + + lockdep_sys_exit(); + + cached_flags = + READ_ONCE(pt_regs_to_thread_info(regs)->flags); + + if (unlikely(cached_flags & EXIT_TO_USERMODE_LOOP_FLAGS)) + exit_to_usermode_loop(regs, cached_flags); user_enter(); } +#define SYSCALL_EXIT_WORK_FLAGS \ + (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ + _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT) + +static void syscall_slow_exit_work(struct pt_regs *regs, u32 cached_flags) +{ + bool step; + + audit_syscall_exit(regs); + + if (cached_flags & _TIF_SYSCALL_TRACEPOINT) + trace_sys_exit(regs, regs->ax); + + /* + * If TIF_SYSCALL_EMU is set, we only get here because of + * TIF_SINGLESTEP (i.e. this is PTRACE_SYSEMU_SINGLESTEP). + * We already reported this syscall instruction in + * syscall_trace_enter(). + */ + step = unlikely( + (cached_flags & (_TIF_SINGLESTEP | _TIF_SYSCALL_EMU)) + == _TIF_SINGLESTEP); + if (step || cached_flags & _TIF_SYSCALL_TRACE) + tracehook_report_syscall_exit(regs, step); +} + /* * Called with IRQs on and fully valid regs. Returns with IRQs off in a * state such that we can immediately switch to user mode. */ -__visible void syscall_return_slowpath(struct pt_regs *regs) +__visible inline void syscall_return_slowpath(struct pt_regs *regs) { struct thread_info *ti = pt_regs_to_thread_info(regs); u32 cached_flags = READ_ONCE(ti->flags); - bool step; CT_WARN_ON(ct_state() != CONTEXT_KERNEL); - if (WARN(irqs_disabled(), "syscall %ld left IRQs disabled", - regs->orig_ax)) + if (IS_ENABLED(CONFIG_PROVE_LOCKING) && + WARN(irqs_disabled(), "syscall %ld left IRQs disabled", regs->orig_ax)) local_irq_enable(); /* * First do one-time work. If these work items are enabled, we * want to run them exactly once per syscall exit with IRQs on. */ - if (cached_flags & (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | - _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT)) { - audit_syscall_exit(regs); - - if (cached_flags & _TIF_SYSCALL_TRACEPOINT) - trace_sys_exit(regs, regs->ax); - - /* - * If TIF_SYSCALL_EMU is set, we only get here because of - * TIF_SINGLESTEP (i.e. this is PTRACE_SYSEMU_SINGLESTEP). - * We already reported this syscall instruction in - * syscall_trace_enter(). - */ - step = unlikely( - (cached_flags & (_TIF_SINGLESTEP | _TIF_SYSCALL_EMU)) - == _TIF_SINGLESTEP); - if (step || cached_flags & _TIF_SYSCALL_TRACE) - tracehook_report_syscall_exit(regs, step); - } + if (unlikely(cached_flags & SYSCALL_EXIT_WORK_FLAGS)) + syscall_slow_exit_work(regs, cached_flags); #ifdef CONFIG_COMPAT /* @@ -316,3 +343,144 @@ __visible void syscall_return_slowpath(struct pt_regs *regs) local_irq_disable(); prepare_exit_to_usermode(regs); } + +#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) +/* + * Does a 32-bit syscall. Called with IRQs on and does all entry and + * exit work and returns with IRQs off. This function is extremely hot + * in workloads that use it, and it's usually called from + * do_fast_syscall_32, so forcibly inline it to improve performance. + */ +#ifdef CONFIG_X86_32 +/* 32-bit kernels use a trap gate for INT80, and the asm code calls here. */ +__visible +#else +/* 64-bit kernels use do_syscall_32_irqs_off() instead. */ +static +#endif +__always_inline void do_syscall_32_irqs_on(struct pt_regs *regs) +{ + struct thread_info *ti = pt_regs_to_thread_info(regs); + unsigned int nr = (unsigned int)regs->orig_ax; + +#ifdef CONFIG_IA32_EMULATION + ti->status |= TS_COMPAT; +#endif + + if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY) { + /* + * Subtlety here: if ptrace pokes something larger than + * 2^32-1 into orig_ax, this truncates it. This may or + * may not be necessary, but it matches the old asm + * behavior. + */ + nr = syscall_trace_enter(regs); + } + + if (likely(nr < IA32_NR_syscalls)) { + /* + * It's possible that a 32-bit syscall implementation + * takes a 64-bit parameter but nonetheless assumes that + * the high bits are zero. Make sure we zero-extend all + * of the args. + */ + regs->ax = ia32_sys_call_table[nr]( + (unsigned int)regs->bx, (unsigned int)regs->cx, + (unsigned int)regs->dx, (unsigned int)regs->si, + (unsigned int)regs->di, (unsigned int)regs->bp); + } + + syscall_return_slowpath(regs); +} + +#ifdef CONFIG_X86_64 +/* Handles INT80 on 64-bit kernels */ +__visible void do_syscall_32_irqs_off(struct pt_regs *regs) +{ + local_irq_enable(); + do_syscall_32_irqs_on(regs); +} +#endif + +/* Returns 0 to return using IRET or 1 to return using SYSEXIT/SYSRETL. */ +__visible long do_fast_syscall_32(struct pt_regs *regs) +{ + /* + * Called using the internal vDSO SYSENTER/SYSCALL32 calling + * convention. Adjust regs so it looks like we entered using int80. + */ + + unsigned long landing_pad = (unsigned long)current->mm->context.vdso + + vdso_image_32.sym_int80_landing_pad; + + /* + * SYSENTER loses EIP, and even SYSCALL32 needs us to skip forward + * so that 'regs->ip -= 2' lands back on an int $0x80 instruction. + * Fix it up. + */ + regs->ip = landing_pad; + + /* + * Fetch ECX from where the vDSO stashed it. + * + * WARNING: We are in CONTEXT_USER and RCU isn't paying attention! + */ + local_irq_enable(); + if ( +#ifdef CONFIG_X86_64 + /* + * Micro-optimization: the pointer we're following is explicitly + * 32 bits, so it can't be out of range. + */ + __get_user(*(u32 *)®s->cx, + (u32 __user __force *)(unsigned long)(u32)regs->sp) +#else + get_user(*(u32 *)®s->cx, + (u32 __user __force *)(unsigned long)(u32)regs->sp) +#endif + ) { + + /* User code screwed up. */ + local_irq_disable(); + regs->ax = -EFAULT; +#ifdef CONFIG_CONTEXT_TRACKING + enter_from_user_mode(); +#endif + prepare_exit_to_usermode(regs); + return 0; /* Keep it simple: use IRET. */ + } + + /* Now this is just like a normal syscall. */ + do_syscall_32_irqs_on(regs); + +#ifdef CONFIG_X86_64 + /* + * Opportunistic SYSRETL: if possible, try to return using SYSRETL. + * SYSRETL is available on all 64-bit CPUs, so we don't need to + * bother with SYSEXIT. + * + * Unlike 64-bit opportunistic SYSRET, we can't check that CX == IP, + * because the ECX fixup above will ensure that this is essentially + * never the case. + */ + return regs->cs == __USER32_CS && regs->ss == __USER_DS && + regs->ip == landing_pad && + (regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF)) == 0; +#else + /* + * Opportunistic SYSEXIT: if possible, try to return using SYSEXIT. + * + * Unlike 64-bit opportunistic SYSRET, we can't check that CX == IP, + * because the ECX fixup above will ensure that this is essentially + * never the case. + * + * We don't allow syscalls at all from VM86 mode, but we still + * need to check VM, because we might be returning from sys_vm86. + */ + return static_cpu_has(X86_FEATURE_SEP) && + regs->cs == __USER_CS && regs->ss == __USER_DS && + regs->ip == landing_pad && + (regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF | X86_EFLAGS_VM)) == 0; +#endif +} +#endif diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index b2909bf8cf70..3eb572ed3d7a 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -3,7 +3,7 @@ * * entry_32.S contains the system-call and low-level fault and trap handling routines. * - * Stack layout in 'syscall_exit': + * Stack layout while running C code: * ptrace needs to have all registers on the stack. * If the order here is changed, it needs to be * updated in fork.c:copy_process(), signal.c:do_signal(), @@ -153,13 +153,13 @@ #endif /* CONFIG_X86_32_LAZY_GS */ -.macro SAVE_ALL +.macro SAVE_ALL pt_regs_ax=%eax cld PUSH_GS pushl %fs pushl %es pushl %ds - pushl %eax + pushl \pt_regs_ax pushl %ebp pushl %edi pushl %esi @@ -211,7 +211,11 @@ ENTRY(ret_from_fork) popl %eax pushl $0x0202 # Reset kernel eflags popfl - jmp syscall_exit + + /* When we fork, we trace the syscall return in the child, too. */ + movl %esp, %eax + call syscall_return_slowpath + jmp restore_all END(ret_from_fork) ENTRY(ret_from_kernel_thread) @@ -224,7 +228,15 @@ ENTRY(ret_from_kernel_thread) movl PT_EBP(%esp), %eax call *PT_EBX(%esp) movl $0, PT_EAX(%esp) - jmp syscall_exit + + /* + * Kernel threads return to userspace as if returning from a syscall. + * We should check whether anything actually uses this path and, if so, + * consider switching it over to ret_from_fork. + */ + movl %esp, %eax + call syscall_return_slowpath + jmp restore_all ENDPROC(ret_from_kernel_thread) /* @@ -255,7 +267,6 @@ ret_from_intr: jb resume_kernel # not returning to v8086 or userspace ENTRY(resume_userspace) - LOCKDEP_SYS_EXIT DISABLE_INTERRUPTS(CLBR_ANY) TRACE_IRQS_OFF movl %esp, %eax @@ -276,76 +287,47 @@ need_resched: END(resume_kernel) #endif -/* - * SYSENTER_RETURN points to after the SYSENTER instruction - * in the vsyscall page. See vsyscall-sysentry.S, which defines - * the symbol. - */ - # SYSENTER call handler stub ENTRY(entry_SYSENTER_32) movl TSS_sysenter_sp0(%esp), %esp sysenter_past_esp: + pushl $__USER_DS /* pt_regs->ss */ + pushl %ecx /* pt_regs->cx */ + pushfl /* pt_regs->flags (except IF = 0) */ + orl $X86_EFLAGS_IF, (%esp) /* Fix IF */ + pushl $__USER_CS /* pt_regs->cs */ + pushl $0 /* pt_regs->ip = 0 (placeholder) */ + pushl %eax /* pt_regs->orig_ax */ + SAVE_ALL pt_regs_ax=$-ENOSYS /* save rest */ + /* - * Interrupts are disabled here, but we can't trace it until - * enough kernel state to call TRACE_IRQS_OFF can be called - but - * we immediately enable interrupts at that point anyway. - */ - pushl $__USER_DS - pushl %ebp - pushfl - orl $X86_EFLAGS_IF, (%esp) - pushl $__USER_CS - /* - * Push current_thread_info()->sysenter_return to the stack. - * A tiny bit of offset fixup is necessary: TI_sysenter_return - * is relative to thread_info, which is at the bottom of the - * kernel stack page. 4*4 means the 4 words pushed above; - * TOP_OF_KERNEL_STACK_PADDING takes us to the top of the stack; - * and THREAD_SIZE takes us to the bottom. + * User mode is traced as though IRQs are on, and SYSENTER + * turned them off. */ - pushl ((TI_sysenter_return) - THREAD_SIZE + TOP_OF_KERNEL_STACK_PADDING + 4*4)(%esp) - - pushl %eax - SAVE_ALL - ENABLE_INTERRUPTS(CLBR_NONE) - -/* - * Load the potential sixth argument from user stack. - * Careful about security. - */ - cmpl $__PAGE_OFFSET-3, %ebp - jae syscall_fault - ASM_STAC -1: movl (%ebp), %ebp - ASM_CLAC - movl %ebp, PT_EBP(%esp) - _ASM_EXTABLE(1b, syscall_fault) + TRACE_IRQS_OFF - GET_THREAD_INFO(%ebp) + movl %esp, %eax + call do_fast_syscall_32 + testl %eax, %eax + jz .Lsyscall_32_done - testl $_TIF_WORK_SYSCALL_ENTRY, TI_flags(%ebp) - jnz syscall_trace_entry -sysenter_do_call: - cmpl $(NR_syscalls), %eax - jae sysenter_badsys - call *sys_call_table(, %eax, 4) -sysenter_after_call: - movl %eax, PT_EAX(%esp) - LOCKDEP_SYS_EXIT - DISABLE_INTERRUPTS(CLBR_ANY) - TRACE_IRQS_OFF - movl TI_flags(%ebp), %ecx - testl $_TIF_ALLWORK_MASK, %ecx - jnz syscall_exit_work_irqs_off -sysenter_exit: -/* if something modifies registers it must also disable sysexit */ - movl PT_EIP(%esp), %edx - movl PT_OLDESP(%esp), %ecx - xorl %ebp, %ebp - TRACE_IRQS_ON +/* Opportunistic SYSEXIT */ + TRACE_IRQS_ON /* User mode traces as IRQs on. */ + movl PT_EIP(%esp), %edx /* pt_regs->ip */ + movl PT_OLDESP(%esp), %ecx /* pt_regs->sp */ 1: mov PT_FS(%esp), %fs PTGS_TO_GS + popl %ebx /* pt_regs->bx */ + addl $2*4, %esp /* skip pt_regs->cx and pt_regs->dx */ + popl %esi /* pt_regs->si */ + popl %edi /* pt_regs->di */ + popl %ebp /* pt_regs->bp */ + popl %eax /* pt_regs->ax */ + + /* + * Return back to the vDSO, which will pop ecx and edx. + * Don't bother with DS and ES (they already contain __USER_DS). + */ ENABLE_INTERRUPTS_SYSEXIT .pushsection .fixup, "ax" @@ -359,21 +341,18 @@ ENDPROC(entry_SYSENTER_32) # system call handler stub ENTRY(entry_INT80_32) ASM_CLAC - pushl %eax # save orig_eax - SAVE_ALL - GET_THREAD_INFO(%ebp) - # system call tracing in operation / emulation - testl $_TIF_WORK_SYSCALL_ENTRY, TI_flags(%ebp) - jnz syscall_trace_entry - cmpl $(NR_syscalls), %eax - jae syscall_badsys -syscall_call: - call *sys_call_table(, %eax, 4) -syscall_after_call: - movl %eax, PT_EAX(%esp) # store the return value -syscall_exit: - LOCKDEP_SYS_EXIT - jmp syscall_exit_work + pushl %eax /* pt_regs->orig_ax */ + SAVE_ALL pt_regs_ax=$-ENOSYS /* save rest */ + + /* + * User mode is traced as though IRQs are on. Unlike the 64-bit + * case, INT80 is a trap gate on 32-bit kernels, so interrupts + * are already on (unless user code is messing around with iopl). + */ + + movl %esp, %eax + call do_syscall_32_irqs_on +.Lsyscall_32_done: restore_all: TRACE_IRQS_IRET @@ -450,47 +429,6 @@ ldt_ss: #endif ENDPROC(entry_INT80_32) - # perform syscall exit tracing - ALIGN -syscall_trace_entry: - movl $-ENOSYS, PT_EAX(%esp) - movl %esp, %eax - call syscall_trace_enter - /* What it returned is what we'll actually use. */ - cmpl $(NR_syscalls), %eax - jnae syscall_call - jmp syscall_exit -END(syscall_trace_entry) - - # perform syscall exit tracing - ALIGN -syscall_exit_work_irqs_off: - TRACE_IRQS_ON - ENABLE_INTERRUPTS(CLBR_ANY) - -syscall_exit_work: - movl %esp, %eax - call syscall_return_slowpath - jmp restore_all -END(syscall_exit_work) - -syscall_fault: - ASM_CLAC - GET_THREAD_INFO(%ebp) - movl $-EFAULT, PT_EAX(%esp) - jmp resume_userspace -END(syscall_fault) - -syscall_badsys: - movl $-ENOSYS, %eax - jmp syscall_after_call -END(syscall_badsys) - -sysenter_badsys: - movl $-ENOSYS, %eax - jmp sysenter_after_call -END(sysenter_badsys) - .macro FIXUP_ESPFIX_STACK /* * Switch back for ESPFIX stack to the normal zerobased stack diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 055a01de7c8d..53616ca03244 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -391,20 +391,16 @@ GLOBAL(stub_execveat) jmp return_from_execve END(stub_execveat) -#if defined(CONFIG_X86_X32_ABI) || defined(CONFIG_IA32_EMULATION) +#if defined(CONFIG_X86_X32_ABI) .align 8 GLOBAL(stub_x32_execve) -GLOBAL(stub32_execve) call compat_sys_execve jmp return_from_execve -END(stub32_execve) END(stub_x32_execve) .align 8 GLOBAL(stub_x32_execveat) -GLOBAL(stub32_execveat) call compat_sys_execveat jmp return_from_execve -END(stub32_execveat) END(stub_x32_execveat) #endif @@ -557,7 +553,6 @@ ret_from_intr: jz retint_kernel /* Interrupt came from user space */ - LOCKDEP_SYS_EXIT_IRQ GLOBAL(retint_user) mov %rsp,%rdi call prepare_exit_to_usermode @@ -587,7 +582,7 @@ retint_kernel: * At this label, code paths which return to kernel and to user, * which come from interrupts/exception and from syscalls, merge. */ -restore_regs_and_iret: +GLOBAL(restore_regs_and_iret) RESTORE_EXTRA_REGS restore_c_regs_and_iret: RESTORE_C_REGS diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index a9360d40fb7f..c3201830a85e 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -16,16 +16,6 @@ #include <linux/linkage.h> #include <linux/err.h> -/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ -#include <linux/elf-em.h> -#define AUDIT_ARCH_I386 (EM_386|__AUDIT_ARCH_LE) -#define __AUDIT_ARCH_LE 0x40000000 - -#ifndef CONFIG_AUDITSYSCALL -# define sysexit_audit ia32_ret_from_sys_call_irqs_off -# define sysretl_audit ia32_ret_from_sys_call_irqs_off -#endif - .section .entry.text, "ax" #ifdef CONFIG_PARAVIRT @@ -58,219 +48,87 @@ ENDPROC(native_usergs_sysret32) * with the int 0x80 path. */ ENTRY(entry_SYSENTER_compat) - /* - * Interrupts are off on entry. - * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON, - * it is too small to ever cause noticeable irq latency. - */ + /* Interrupts are off on entry. */ SWAPGS_UNSAFE_STACK movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp - ENABLE_INTERRUPTS(CLBR_NONE) - /* Zero-extending 32-bit regs, do not remove */ - movl %ebp, %ebp + /* + * User tracing code (ptrace or signal handlers) might assume that + * the saved RAX contains a 32-bit number when we're invoking a 32-bit + * syscall. Just in case the high bits are nonzero, zero-extend + * the syscall number. (This could almost certainly be deleted + * with no ill effects.) + */ movl %eax, %eax - movl ASM_THREAD_INFO(TI_sysenter_return, %rsp, 0), %r10d - /* Construct struct pt_regs on stack */ pushq $__USER32_DS /* pt_regs->ss */ - pushq %rbp /* pt_regs->sp */ - pushfq /* pt_regs->flags */ + pushq %rcx /* pt_regs->sp */ + + /* + * Push flags. This is nasty. First, interrupts are currently + * off, but we need pt_regs->flags to have IF set. Second, even + * if TF was set when SYSENTER started, it's clear by now. We fix + * that later using TIF_SINGLESTEP. + */ + pushfq /* pt_regs->flags (except IF = 0) */ + orl $X86_EFLAGS_IF, (%rsp) /* Fix saved flags */ + ASM_CLAC /* Clear AC after saving FLAGS */ + pushq $__USER32_CS /* pt_regs->cs */ - pushq %r10 /* pt_regs->ip = thread_info->sysenter_return */ + xorq %r8,%r8 + pushq %r8 /* pt_regs->ip = 0 (placeholder) */ pushq %rax /* pt_regs->orig_ax */ pushq %rdi /* pt_regs->di */ pushq %rsi /* pt_regs->si */ pushq %rdx /* pt_regs->dx */ - pushq %rcx /* pt_regs->cx */ + pushq %rcx /* pt_regs->cx (will be overwritten) */ pushq $-ENOSYS /* pt_regs->ax */ + pushq %r8 /* pt_regs->r8 = 0 */ + pushq %r8 /* pt_regs->r9 = 0 */ + pushq %r8 /* pt_regs->r10 = 0 */ + pushq %r8 /* pt_regs->r11 = 0 */ + pushq %rbx /* pt_regs->rbx */ + pushq %rbp /* pt_regs->rbp */ + pushq %r8 /* pt_regs->r12 = 0 */ + pushq %r8 /* pt_regs->r13 = 0 */ + pushq %r8 /* pt_regs->r14 = 0 */ + pushq %r8 /* pt_regs->r15 = 0 */ cld - sub $(10*8), %rsp /* pt_regs->r8-11, bp, bx, r12-15 not saved */ - - /* - * no need to do an access_ok check here because rbp has been - * 32-bit zero extended - */ - ASM_STAC -1: movl (%rbp), %ebp - _ASM_EXTABLE(1b, ia32_badarg) - ASM_CLAC /* * Sysenter doesn't filter flags, so we need to clear NT * ourselves. To save a few cycles, we can check whether * NT was set instead of doing an unconditional popfq. + * This needs to happen before enabling interrupts so that + * we don't get preempted with NT set. + * + * NB.: sysenter_fix_flags is a label with the code under it moved + * out-of-line as an optimization: NT is unlikely to be set in the + * majority of the cases and instead of polluting the I$ unnecessarily, + * we're keeping that code behind a branch which will predict as + * not-taken and therefore its instructions won't be fetched. */ testl $X86_EFLAGS_NT, EFLAGS(%rsp) jnz sysenter_fix_flags sysenter_flags_fixed: - orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS) - testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) - jnz sysenter_tracesys - -sysenter_do_call: - /* 32-bit syscall -> 64-bit C ABI argument conversion */ - movl %edi, %r8d /* arg5 */ - movl %ebp, %r9d /* arg6 */ - xchg %ecx, %esi /* rsi:arg2, rcx:arg4 */ - movl %ebx, %edi /* arg1 */ - movl %edx, %edx /* arg3 (zero extension) */ -sysenter_dispatch: - cmpq $(IA32_NR_syscalls-1), %rax - ja 1f - call *ia32_sys_call_table(, %rax, 8) - movq %rax, RAX(%rsp) -1: - DISABLE_INTERRUPTS(CLBR_NONE) - TRACE_IRQS_OFF - testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) - jnz sysexit_audit -sysexit_from_sys_call: /* - * NB: SYSEXIT is not obviously safe for 64-bit kernels -- an - * NMI between STI and SYSEXIT has poorly specified behavior, - * and and NMI followed by an IRQ with usergs is fatal. So - * we just pretend we're using SYSEXIT but we really use - * SYSRETL instead. - * - * This code path is still called 'sysexit' because it pairs - * with 'sysenter' and it uses the SYSENTER calling convention. + * User mode is traced as though IRQs are on, and SYSENTER + * turned them off. */ - andl $~TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS) - movl RIP(%rsp), %ecx /* User %eip */ - movq RAX(%rsp), %rax - movl RSI(%rsp), %esi - movl RDI(%rsp), %edi - xorl %edx, %edx /* Do not leak kernel information */ - xorq %r8, %r8 - xorq %r9, %r9 - xorq %r10, %r10 - movl EFLAGS(%rsp), %r11d /* User eflags */ - TRACE_IRQS_ON - - /* - * SYSRETL works even on Intel CPUs. Use it in preference to SYSEXIT, - * since it avoids a dicey window with interrupts enabled. - */ - movl RSP(%rsp), %esp - - /* - * USERGS_SYSRET32 does: - * gsbase = user's gs base - * eip = ecx - * rflags = r11 - * cs = __USER32_CS - * ss = __USER_DS - * - * The prologue set RIP(%rsp) to VDSO32_SYSENTER_RETURN, which does: - * - * pop %ebp - * pop %edx - * pop %ecx - * - * Therefore, we invoke SYSRETL with EDX and R8-R10 zeroed to - * avoid info leaks. R11 ends up with VDSO32_SYSENTER_RETURN's - * address (already known to user code), and R12-R15 are - * callee-saved and therefore don't contain any interesting - * kernel data. - */ - USERGS_SYSRET32 - -#ifdef CONFIG_AUDITSYSCALL - .macro auditsys_entry_common - /* - * At this point, registers hold syscall args in the 32-bit syscall ABI: - * EAX is syscall number, the 6 args are in EBX,ECX,EDX,ESI,EDI,EBP. - * - * We want to pass them to __audit_syscall_entry(), which is a 64-bit - * C function with 5 parameters, so shuffle them to match what - * the function expects: RDI,RSI,RDX,RCX,R8. - */ - movl %esi, %r8d /* arg5 (R8 ) <= 4th syscall arg (ESI) */ - xchg %ecx, %edx /* arg4 (RCX) <= 3rd syscall arg (EDX) */ - /* arg3 (RDX) <= 2nd syscall arg (ECX) */ - movl %ebx, %esi /* arg2 (RSI) <= 1st syscall arg (EBX) */ - movl %eax, %edi /* arg1 (RDI) <= syscall number (EAX) */ - call __audit_syscall_entry - - /* - * We are going to jump back to the syscall dispatch code. - * Prepare syscall args as required by the 64-bit C ABI. - * Registers clobbered by __audit_syscall_entry() are - * loaded from pt_regs on stack: - */ - movl ORIG_RAX(%rsp), %eax /* syscall number */ - movl %ebx, %edi /* arg1 */ - movl RCX(%rsp), %esi /* arg2 */ - movl RDX(%rsp), %edx /* arg3 */ - movl RSI(%rsp), %ecx /* arg4 */ - movl RDI(%rsp), %r8d /* arg5 */ - .endm - - .macro auditsys_exit exit - TRACE_IRQS_ON - ENABLE_INTERRUPTS(CLBR_NONE) - testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) - jnz ia32_ret_from_sys_call - movl %eax, %esi /* second arg, syscall return value */ - cmpl $-MAX_ERRNO, %eax /* is it an error ? */ - jbe 1f - movslq %eax, %rsi /* if error sign extend to 64 bits */ -1: setbe %al /* 1 if error, 0 if not */ - movzbl %al, %edi /* zero-extend that into %edi */ - call __audit_syscall_exit - movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %edi - DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF - testl %edi, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) - jz \exit - xorl %eax, %eax /* Do not leak kernel information */ - movq %rax, R11(%rsp) - movq %rax, R10(%rsp) - movq %rax, R9(%rsp) - movq %rax, R8(%rsp) - jmp int_ret_from_sys_call_irqs_off - .endm -sysenter_auditsys: - auditsys_entry_common - movl %ebp, %r9d /* reload 6th syscall arg */ - jmp sysenter_dispatch - -sysexit_audit: - auditsys_exit sysexit_from_sys_call -#endif + movq %rsp, %rdi + call do_fast_syscall_32 + testl %eax, %eax + jz .Lsyscall_32_done + jmp sysret32_from_system_call sysenter_fix_flags: - pushq $(X86_EFLAGS_IF|X86_EFLAGS_FIXED) + pushq $X86_EFLAGS_FIXED popfq jmp sysenter_flags_fixed - -sysenter_tracesys: -#ifdef CONFIG_AUDITSYSCALL - testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT), ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) - jz sysenter_auditsys -#endif - SAVE_EXTRA_REGS - xorl %eax, %eax /* Do not leak kernel information */ - movq %rax, R11(%rsp) - movq %rax, R10(%rsp) - movq %rax, R9(%rsp) - movq %rax, R8(%rsp) - movq %rsp, %rdi /* &pt_regs -> arg1 */ - call syscall_trace_enter - - /* Reload arg registers from stack. (see sysenter_tracesys) */ - movl RCX(%rsp), %ecx - movl RDX(%rsp), %edx - movl RSI(%rsp), %esi - movl RDI(%rsp), %edi - movl %eax, %eax /* zero extension */ - - RESTORE_EXTRA_REGS - jmp sysenter_do_call ENDPROC(entry_SYSENTER_compat) /* @@ -298,21 +156,14 @@ ENDPROC(entry_SYSENTER_compat) * edi arg5 * esp user stack * 0(%esp) arg6 - * - * This is purely a fast path. For anything complicated we use the int 0x80 - * path below. We set up a complete hardware stack frame to share code - * with the int 0x80 path. */ ENTRY(entry_SYSCALL_compat) - /* - * Interrupts are off on entry. - * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON, - * it is too small to ever cause noticeable irq latency. - */ + /* Interrupts are off on entry. */ SWAPGS_UNSAFE_STACK + + /* Stash user ESP and switch to the kernel stack. */ movl %esp, %r8d movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp - ENABLE_INTERRUPTS(CLBR_NONE) /* Zero-extending 32-bit regs, do not remove */ movl %eax, %eax @@ -327,162 +178,67 @@ ENTRY(entry_SYSCALL_compat) pushq %rdi /* pt_regs->di */ pushq %rsi /* pt_regs->si */ pushq %rdx /* pt_regs->dx */ - pushq %rbp /* pt_regs->cx */ - movl %ebp, %ecx + pushq %rcx /* pt_regs->cx (will be overwritten) */ pushq $-ENOSYS /* pt_regs->ax */ - sub $(10*8), %rsp /* pt_regs->r8-11, bp, bx, r12-15 not saved */ + xorq %r8,%r8 + pushq %r8 /* pt_regs->r8 = 0 */ + pushq %r8 /* pt_regs->r9 = 0 */ + pushq %r8 /* pt_regs->r10 = 0 */ + pushq %r8 /* pt_regs->r11 = 0 */ + pushq %rbx /* pt_regs->rbx */ + pushq %rbp /* pt_regs->rbp */ + pushq %r8 /* pt_regs->r12 = 0 */ + pushq %r8 /* pt_regs->r13 = 0 */ + pushq %r8 /* pt_regs->r14 = 0 */ + pushq %r8 /* pt_regs->r15 = 0 */ /* - * No need to do an access_ok check here because r8 has been - * 32-bit zero extended: + * User mode is traced as though IRQs are on, and SYSENTER + * turned them off. */ - ASM_STAC -1: movl (%r8), %r9d - _ASM_EXTABLE(1b, ia32_badarg) - ASM_CLAC - orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS) - testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) - jnz cstar_tracesys - -cstar_do_call: - /* 32-bit syscall -> 64-bit C ABI argument conversion */ - movl %edi, %r8d /* arg5 */ - /* r9 already loaded */ /* arg6 */ - xchg %ecx, %esi /* rsi:arg2, rcx:arg4 */ - movl %ebx, %edi /* arg1 */ - movl %edx, %edx /* arg3 (zero extension) */ - -cstar_dispatch: - cmpq $(IA32_NR_syscalls-1), %rax - ja 1f - - call *ia32_sys_call_table(, %rax, 8) - movq %rax, RAX(%rsp) -1: - DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF - testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) - jnz sysretl_audit -sysretl_from_sys_call: - andl $~TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS) - movl RDX(%rsp), %edx - movl RSI(%rsp), %esi - movl RDI(%rsp), %edi - movl RIP(%rsp), %ecx - movl EFLAGS(%rsp), %r11d - movq RAX(%rsp), %rax - xorq %r10, %r10 - xorq %r9, %r9 - xorq %r8, %r8 - TRACE_IRQS_ON - movl RSP(%rsp), %esp - /* - * 64-bit->32-bit SYSRET restores eip from ecx, - * eflags from r11 (but RF and VM bits are forced to 0), - * cs and ss are loaded from MSRs. - * (Note: 32-bit->32-bit SYSRET is different: since r11 - * does not exist, it merely sets eflags.IF=1). + movq %rsp, %rdi + call do_fast_syscall_32 + testl %eax, %eax + jz .Lsyscall_32_done + + /* Opportunistic SYSRET */ +sysret32_from_system_call: + TRACE_IRQS_ON /* User mode traces as IRQs on. */ + movq RBX(%rsp), %rbx /* pt_regs->rbx */ + movq RBP(%rsp), %rbp /* pt_regs->rbp */ + movq EFLAGS(%rsp), %r11 /* pt_regs->flags (in r11) */ + movq RIP(%rsp), %rcx /* pt_regs->ip (in rcx) */ + addq $RAX, %rsp /* Skip r8-r15 */ + popq %rax /* pt_regs->rax */ + popq %rdx /* Skip pt_regs->cx */ + popq %rdx /* pt_regs->dx */ + popq %rsi /* pt_regs->si */ + popq %rdi /* pt_regs->di */ + + /* + * USERGS_SYSRET32 does: + * GSBASE = user's GS base + * EIP = ECX + * RFLAGS = R11 + * CS = __USER32_CS + * SS = __USER_DS + * + * ECX will not match pt_regs->cx, but we're returning to a vDSO + * trampoline that will fix up RCX, so this is okay. * - * NB: On AMD CPUs with the X86_BUG_SYSRET_SS_ATTRS bug, the ss - * descriptor is not reinitialized. This means that we must - * avoid SYSRET with SS == NULL, which could happen if we schedule, - * exit the kernel, and re-enter using an interrupt vector. (All - * interrupt entries on x86_64 set SS to NULL.) We prevent that - * from happening by reloading SS in __switch_to. - */ - USERGS_SYSRET32 - -#ifdef CONFIG_AUDITSYSCALL -cstar_auditsys: - movl %r9d, R9(%rsp) /* register to be clobbered by call */ - auditsys_entry_common - movl R9(%rsp), %r9d /* reload 6th syscall arg */ - jmp cstar_dispatch - -sysretl_audit: - auditsys_exit sysretl_from_sys_call -#endif - -cstar_tracesys: -#ifdef CONFIG_AUDITSYSCALL - testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT), ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) - jz cstar_auditsys -#endif - xchgl %r9d, %ebp - SAVE_EXTRA_REGS - xorl %eax, %eax /* Do not leak kernel information */ - movq %rax, R11(%rsp) - movq %rax, R10(%rsp) - movq %r9, R9(%rsp) - movq %rax, R8(%rsp) - movq %rsp, %rdi /* &pt_regs -> arg1 */ - call syscall_trace_enter - movl R9(%rsp), %r9d - - /* Reload arg registers from stack. (see sysenter_tracesys) */ - movl RCX(%rsp), %ecx - movl RDX(%rsp), %edx - movl RSI(%rsp), %esi - movl RDI(%rsp), %edi - movl %eax, %eax /* zero extension */ - - RESTORE_EXTRA_REGS - xchgl %ebp, %r9d - jmp cstar_do_call + * R12-R15 are callee-saved, so they contain whatever was in them + * when the system call started, which is already known to user + * code. We zero R8-R10 to avoid info leaks. + */ + xorq %r8, %r8 + xorq %r9, %r9 + xorq %r10, %r10 + movq RSP-ORIG_RAX(%rsp), %rsp + USERGS_SYSRET32 END(entry_SYSCALL_compat) -ia32_badarg: - /* - * So far, we've entered kernel mode, set AC, turned on IRQs, and - * saved C regs except r8-r11. We haven't done any of the other - * standard entry work, though. We want to bail, but we shouldn't - * treat this as a syscall entry since we don't even know what the - * args are. Instead, treat this as a non-syscall entry, finish - * the entry work, and immediately exit after setting AX = -EFAULT. - * - * We're really just being polite here. Killing the task outright - * would be a reasonable action, too. Given that the only valid - * way to have gotten here is through the vDSO, and we already know - * that the stack pointer is bad, the task isn't going to survive - * for long no matter what we do. - */ - - ASM_CLAC /* undo STAC */ - movq $-EFAULT, RAX(%rsp) /* return -EFAULT if possible */ - - /* Fill in the rest of pt_regs */ - xorl %eax, %eax - movq %rax, R11(%rsp) - movq %rax, R10(%rsp) - movq %rax, R9(%rsp) - movq %rax, R8(%rsp) - SAVE_EXTRA_REGS - - /* Turn IRQs back off. */ - DISABLE_INTERRUPTS(CLBR_NONE) - TRACE_IRQS_OFF - - /* Now finish entering normal kernel mode. */ -#ifdef CONFIG_CONTEXT_TRACKING - call enter_from_user_mode -#endif - - /* And exit again. */ - jmp retint_user - -ia32_ret_from_sys_call_irqs_off: - TRACE_IRQS_ON - ENABLE_INTERRUPTS(CLBR_NONE) - -ia32_ret_from_sys_call: - xorl %eax, %eax /* Do not leak kernel information */ - movq %rax, R11(%rsp) - movq %rax, R10(%rsp) - movq %rax, R9(%rsp) - movq %rax, R8(%rsp) - jmp int_ret_from_sys_call - /* * Emulated IA32 system calls via int 0x80. * @@ -507,14 +263,17 @@ ia32_ret_from_sys_call: ENTRY(entry_INT80_compat) /* * Interrupts are off on entry. - * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON, - * it is too small to ever cause noticeable irq latency. */ PARAVIRT_ADJUST_EXCEPTION_FRAME SWAPGS - ENABLE_INTERRUPTS(CLBR_NONE) - /* Zero-extending 32-bit regs, do not remove */ + /* + * User tracing code (ptrace or signal handlers) might assume that + * the saved RAX contains a 32-bit number when we're invoking a 32-bit + * syscall. Just in case the high bits are nonzero, zero-extend + * the syscall number. (This could almost certainly be deleted + * with no ill effects.) + */ movl %eax, %eax /* Construct struct pt_regs on stack (iret frame is already on stack) */ @@ -524,67 +283,37 @@ ENTRY(entry_INT80_compat) pushq %rdx /* pt_regs->dx */ pushq %rcx /* pt_regs->cx */ pushq $-ENOSYS /* pt_regs->ax */ - pushq $0 /* pt_regs->r8 */ - pushq $0 /* pt_regs->r9 */ - pushq $0 /* pt_regs->r10 */ - pushq $0 /* pt_regs->r11 */ + xorq %r8,%r8 + pushq %r8 /* pt_regs->r8 = 0 */ + pushq %r8 /* pt_regs->r9 = 0 */ + pushq %r8 /* pt_regs->r10 = 0 */ + pushq %r8 /* pt_regs->r11 = 0 */ + pushq %rbx /* pt_regs->rbx */ + pushq %rbp /* pt_regs->rbp */ + pushq %r12 /* pt_regs->r12 */ + pushq %r13 /* pt_regs->r13 */ + pushq %r14 /* pt_regs->r14 */ + pushq %r15 /* pt_regs->r15 */ cld - sub $(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */ - - orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS) - testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) - jnz ia32_tracesys - -ia32_do_call: - /* 32-bit syscall -> 64-bit C ABI argument conversion */ - movl %edi, %r8d /* arg5 */ - movl %ebp, %r9d /* arg6 */ - xchg %ecx, %esi /* rsi:arg2, rcx:arg4 */ - movl %ebx, %edi /* arg1 */ - movl %edx, %edx /* arg3 (zero extension) */ - cmpq $(IA32_NR_syscalls-1), %rax - ja 1f - call *ia32_sys_call_table(, %rax, 8) - movq %rax, RAX(%rsp) -1: - jmp int_ret_from_sys_call - -ia32_tracesys: - SAVE_EXTRA_REGS - movq %rsp, %rdi /* &pt_regs -> arg1 */ - call syscall_trace_enter /* - * Reload arg registers from stack in case ptrace changed them. - * Don't reload %eax because syscall_trace_enter() returned - * the %rax value we should see. But do truncate it to 32 bits. - * If it's -1 to make us punt the syscall, then (u32)-1 is still - * an appropriately invalid value. + * User mode is traced as though IRQs are on, and the interrupt + * gate turned them off. */ - movl RCX(%rsp), %ecx - movl RDX(%rsp), %edx - movl RSI(%rsp), %esi - movl RDI(%rsp), %edi - movl %eax, %eax /* zero extension */ - RESTORE_EXTRA_REGS - jmp ia32_do_call -END(entry_INT80_compat) + TRACE_IRQS_OFF - .macro PTREGSCALL label, func - ALIGN -GLOBAL(\label) - leaq \func(%rip), %rax - jmp ia32_ptregs_common - .endm + movq %rsp, %rdi + call do_syscall_32_irqs_off +.Lsyscall_32_done: - PTREGSCALL stub32_rt_sigreturn, sys32_rt_sigreturn - PTREGSCALL stub32_sigreturn, sys32_sigreturn - PTREGSCALL stub32_fork, sys_fork - PTREGSCALL stub32_vfork, sys_vfork + /* Go back to user mode. */ + TRACE_IRQS_ON + SWAPGS + jmp restore_regs_and_iret +END(entry_INT80_compat) ALIGN GLOBAL(stub32_clone) - leaq sys_clone(%rip), %rax /* * The 32-bit clone ABI is: clone(..., int tls_val, int *child_tidptr). * The 64-bit clone ABI is: clone(..., int *child_tidptr, int tls_val). @@ -593,12 +322,4 @@ GLOBAL(stub32_clone) * so we need to swap arguments here before calling it: */ xchg %r8, %rcx - jmp ia32_ptregs_common - - ALIGN -ia32_ptregs_common: - SAVE_EXTRA_REGS 8 - call *%rax - RESTORE_EXTRA_REGS 8 - ret -END(ia32_ptregs_common) + jmp sys_clone diff --git a/arch/x86/entry/syscall_32.c b/arch/x86/entry/syscall_32.c index 8ea34f94e973..9a6649857106 100644 --- a/arch/x86/entry/syscall_32.c +++ b/arch/x86/entry/syscall_32.c @@ -4,24 +4,21 @@ #include <linux/sys.h> #include <linux/cache.h> #include <asm/asm-offsets.h> +#include <asm/syscall.h> #ifdef CONFIG_IA32_EMULATION #define SYM(sym, compat) compat #else #define SYM(sym, compat) sym -#define ia32_sys_call_table sys_call_table -#define __NR_syscall_compat_max __NR_syscall_max #endif -#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage void SYM(sym, compat)(void) ; +#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage long SYM(sym, compat)(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ; #include <asm/syscalls_32.h> #undef __SYSCALL_I386 #define __SYSCALL_I386(nr, sym, compat) [nr] = SYM(sym, compat), -typedef asmlinkage void (*sys_call_ptr_t)(void); - -extern asmlinkage void sys_ni_syscall(void); +extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); __visible const sys_call_ptr_t ia32_sys_call_table[__NR_syscall_compat_max+1] = { /* diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c index 4ac730b37f0b..41283d22be7a 100644 --- a/arch/x86/entry/syscall_64.c +++ b/arch/x86/entry/syscall_64.c @@ -14,13 +14,13 @@ # define __SYSCALL_X32(nr, sym, compat) /* nothing */ #endif -#define __SYSCALL_64(nr, sym, compat) extern asmlinkage void sym(void) ; +#define __SYSCALL_64(nr, sym, compat) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ; #include <asm/syscalls_64.h> #undef __SYSCALL_64 #define __SYSCALL_64(nr, sym, compat) [nr] = sym, -extern void sys_ni_syscall(void); +extern long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); asmlinkage const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = { /* diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 7663c455b9f6..f17705e1332c 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -8,7 +8,7 @@ # 0 i386 restart_syscall sys_restart_syscall 1 i386 exit sys_exit -2 i386 fork sys_fork stub32_fork +2 i386 fork sys_fork sys_fork 3 i386 read sys_read 4 i386 write sys_write 5 i386 open sys_open compat_sys_open @@ -17,7 +17,7 @@ 8 i386 creat sys_creat 9 i386 link sys_link 10 i386 unlink sys_unlink -11 i386 execve sys_execve stub32_execve +11 i386 execve sys_execve compat_sys_execve 12 i386 chdir sys_chdir 13 i386 time sys_time compat_sys_time 14 i386 mknod sys_mknod @@ -125,7 +125,7 @@ 116 i386 sysinfo sys_sysinfo compat_sys_sysinfo 117 i386 ipc sys_ipc compat_sys_ipc 118 i386 fsync sys_fsync -119 i386 sigreturn sys_sigreturn stub32_sigreturn +119 i386 sigreturn sys_sigreturn sys32_sigreturn 120 i386 clone sys_clone stub32_clone 121 i386 setdomainname sys_setdomainname 122 i386 uname sys_newuname @@ -179,7 +179,7 @@ 170 i386 setresgid sys_setresgid16 171 i386 getresgid sys_getresgid16 172 i386 prctl sys_prctl -173 i386 rt_sigreturn sys_rt_sigreturn stub32_rt_sigreturn +173 i386 rt_sigreturn sys_rt_sigreturn sys32_rt_sigreturn 174 i386 rt_sigaction sys_rt_sigaction compat_sys_rt_sigaction 175 i386 rt_sigprocmask sys_rt_sigprocmask 176 i386 rt_sigpending sys_rt_sigpending compat_sys_rt_sigpending @@ -196,7 +196,7 @@ 187 i386 sendfile sys_sendfile compat_sys_sendfile 188 i386 getpmsg 189 i386 putpmsg -190 i386 vfork sys_vfork stub32_vfork +190 i386 vfork sys_vfork sys_vfork 191 i386 ugetrlimit sys_getrlimit compat_sys_getrlimit 192 i386 mmap2 sys_mmap_pgoff 193 i386 truncate64 sys_truncate64 sys32_truncate64 @@ -364,7 +364,7 @@ 355 i386 getrandom sys_getrandom 356 i386 memfd_create sys_memfd_create 357 i386 bpf sys_bpf -358 i386 execveat sys_execveat stub32_execveat +358 i386 execveat sys_execveat compat_sys_execveat 359 i386 socket sys_socket 360 i386 socketpair sys_socketpair 361 i386 bind sys_bind @@ -382,3 +382,4 @@ 373 i386 shutdown sys_shutdown 374 i386 userfaultfd sys_userfaultfd 375 i386 membarrier sys_membarrier +376 i386 mlock2 sys_mlock2 diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index 278842fdf1f6..314a90bfc09c 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -331,6 +331,7 @@ 322 64 execveat stub_execveat 323 common userfaultfd sys_userfaultfd 324 common membarrier sys_membarrier +325 common mlock2 sys_mlock2 # # x32-specific system call numbers start at 512 to avoid cache impact diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index a3d0767a6b29..265c0ed68118 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -19,9 +19,7 @@ obj-y += vma.o # vDSO images to build vdso_img-$(VDSO64-y) += 64 vdso_img-$(VDSOX32-y) += x32 -vdso_img-$(VDSO32-y) += 32-int80 -vdso_img-$(CONFIG_IA32_EMULATION) += 32-syscall -vdso_img-$(VDSO32-y) += 32-sysenter +vdso_img-$(VDSO32-y) += 32 obj-$(VDSO32-y) += vdso32-setup.o @@ -69,7 +67,7 @@ $(obj)/vdso-image-%.c: $(obj)/vdso%.so.dbg $(obj)/vdso%.so $(obj)/vdso2c FORCE CFL := $(PROFILING) -mcmodel=small -fPIC -O2 -fasynchronous-unwind-tables -m64 \ $(filter -g%,$(KBUILD_CFLAGS)) $(call cc-option, -fno-stack-protector) \ -fno-omit-frame-pointer -foptimize-sibling-calls \ - -DDISABLE_BRANCH_PROFILING + -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO $(vobjs): KBUILD_CFLAGS += $(CFL) @@ -122,15 +120,6 @@ $(obj)/%.so: $(obj)/%.so.dbg $(obj)/vdsox32.so.dbg: $(src)/vdsox32.lds $(vobjx32s) FORCE $(call if_changed,vdso) -# -# Build multiple 32-bit vDSO images to choose from at boot time. -# -vdso32.so-$(VDSO32-y) += int80 -vdso32.so-$(CONFIG_IA32_EMULATION) += syscall -vdso32.so-$(VDSO32-y) += sysenter - -vdso32-images = $(vdso32.so-y:%=vdso32-%.so) - CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds) VDSO_LDFLAGS_vdso32.lds = -m32 -Wl,-m,elf_i386 -Wl,-soname=linux-gate.so.1 @@ -139,14 +128,12 @@ VDSO_LDFLAGS_vdso32.lds = -m32 -Wl,-m,elf_i386 -Wl,-soname=linux-gate.so.1 override obj-dirs = $(dir $(obj)) $(obj)/vdso32/ targets += vdso32/vdso32.lds -targets += vdso32/note.o vdso32/vclock_gettime.o $(vdso32.so-y:%=vdso32/%.o) +targets += vdso32/note.o vdso32/vclock_gettime.o vdso32/system_call.o targets += vdso32/vclock_gettime.o -$(obj)/vdso32.o: $(vdso32-images:%=$(obj)/%) - -KBUILD_AFLAGS_32 := $(filter-out -m64,$(KBUILD_AFLAGS)) -$(vdso32-images:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_32) -$(vdso32-images:%=$(obj)/%.dbg): asflags-$(CONFIG_X86_64) += -m32 +KBUILD_AFLAGS_32 := $(filter-out -m64,$(KBUILD_AFLAGS)) -DBUILD_VDSO +$(obj)/vdso32.so.dbg: KBUILD_AFLAGS = $(KBUILD_AFLAGS_32) +$(obj)/vdso32.so.dbg: asflags-$(CONFIG_X86_64) += -m32 KBUILD_CFLAGS_32 := $(filter-out -m64,$(KBUILD_CFLAGS)) KBUILD_CFLAGS_32 := $(filter-out -mcmodel=kernel,$(KBUILD_CFLAGS_32)) @@ -157,13 +144,13 @@ KBUILD_CFLAGS_32 += $(call cc-option, -fno-stack-protector) KBUILD_CFLAGS_32 += $(call cc-option, -foptimize-sibling-calls) KBUILD_CFLAGS_32 += -fno-omit-frame-pointer KBUILD_CFLAGS_32 += -DDISABLE_BRANCH_PROFILING -$(vdso32-images:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_32) +$(obj)/vdso32.so.dbg: KBUILD_CFLAGS = $(KBUILD_CFLAGS_32) -$(vdso32-images:%=$(obj)/%.dbg): $(obj)/vdso32-%.so.dbg: FORCE \ - $(obj)/vdso32/vdso32.lds \ - $(obj)/vdso32/vclock_gettime.o \ - $(obj)/vdso32/note.o \ - $(obj)/vdso32/%.o +$(obj)/vdso32.so.dbg: FORCE \ + $(obj)/vdso32/vdso32.lds \ + $(obj)/vdso32/vclock_gettime.o \ + $(obj)/vdso32/note.o \ + $(obj)/vdso32/system_call.o $(call if_changed,vdso) # @@ -206,4 +193,4 @@ $(vdso_img_insttargets): install_%: $(obj)/%.dbg $(MODLIB)/vdso FORCE PHONY += vdso_install $(vdso_img_insttargets) vdso_install: $(vdso_img_insttargets) FORCE -clean-files := vdso32-syscall* vdso32-sysenter* vdso32-int80* vdso64* vdso-image-*.c vdsox32.so* +clean-files := vdso32.so vdso32.so.dbg vdso64* vdso-image-*.c vdsox32.so* diff --git a/arch/x86/entry/vdso/vdso2c.c b/arch/x86/entry/vdso/vdso2c.c index 8627db24a7f6..785d9922b106 100644 --- a/arch/x86/entry/vdso/vdso2c.c +++ b/arch/x86/entry/vdso/vdso2c.c @@ -98,10 +98,10 @@ struct vdso_sym required_syms[] = { "VDSO_FAKE_SECTION_TABLE_END", false }, {"VDSO32_NOTE_MASK", true}, - {"VDSO32_SYSENTER_RETURN", true}, {"__kernel_vsyscall", true}, {"__kernel_sigreturn", true}, {"__kernel_rt_sigreturn", true}, + {"int80_landing_pad", true}, }; __attribute__((format(printf, 1, 2))) __attribute__((noreturn)) diff --git a/arch/x86/entry/vdso/vdso32-setup.c b/arch/x86/entry/vdso/vdso32-setup.c index e904c270573b..08a317a9ae4b 100644 --- a/arch/x86/entry/vdso/vdso32-setup.c +++ b/arch/x86/entry/vdso/vdso32-setup.c @@ -48,35 +48,9 @@ __setup("vdso32=", vdso32_setup); __setup_param("vdso=", vdso_setup, vdso32_setup, 0); #endif -#ifdef CONFIG_X86_64 - -#define vdso32_sysenter() (boot_cpu_has(X86_FEATURE_SYSENTER32)) -#define vdso32_syscall() (boot_cpu_has(X86_FEATURE_SYSCALL32)) - -#else /* CONFIG_X86_32 */ - -#define vdso32_sysenter() (boot_cpu_has(X86_FEATURE_SEP)) -#define vdso32_syscall() (0) - -#endif /* CONFIG_X86_64 */ - -#if defined(CONFIG_X86_32) || defined(CONFIG_COMPAT) -const struct vdso_image *selected_vdso32; -#endif - int __init sysenter_setup(void) { -#ifdef CONFIG_COMPAT - if (vdso32_syscall()) - selected_vdso32 = &vdso_image_32_syscall; - else -#endif - if (vdso32_sysenter()) - selected_vdso32 = &vdso_image_32_sysenter; - else - selected_vdso32 = &vdso_image_32_int80; - - init_vdso_image(selected_vdso32); + init_vdso_image(&vdso_image_32); return 0; } diff --git a/arch/x86/entry/vdso/vdso32/int80.S b/arch/x86/entry/vdso/vdso32/int80.S deleted file mode 100644 index b15b7c01aedb..000000000000 --- a/arch/x86/entry/vdso/vdso32/int80.S +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Code for the vDSO. This version uses the old int $0x80 method. - * - * First get the common code for the sigreturn entry points. - * This must come first. - */ -#include "sigreturn.S" - - .text - .globl __kernel_vsyscall - .type __kernel_vsyscall,@function - ALIGN -__kernel_vsyscall: -.LSTART_vsyscall: - int $0x80 - ret -.LEND_vsyscall: - .size __kernel_vsyscall,.-.LSTART_vsyscall - .previous - - .section .eh_frame,"a",@progbits -.LSTARTFRAMEDLSI: - .long .LENDCIEDLSI-.LSTARTCIEDLSI -.LSTARTCIEDLSI: - .long 0 /* CIE ID */ - .byte 1 /* Version number */ - .string "zR" /* NUL-terminated augmentation string */ - .uleb128 1 /* Code alignment factor */ - .sleb128 -4 /* Data alignment factor */ - .byte 8 /* Return address register column */ - .uleb128 1 /* Augmentation value length */ - .byte 0x1b /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */ - .byte 0x0c /* DW_CFA_def_cfa */ - .uleb128 4 - .uleb128 4 - .byte 0x88 /* DW_CFA_offset, column 0x8 */ - .uleb128 1 - .align 4 -.LENDCIEDLSI: - .long .LENDFDEDLSI-.LSTARTFDEDLSI /* Length FDE */ -.LSTARTFDEDLSI: - .long .LSTARTFDEDLSI-.LSTARTFRAMEDLSI /* CIE pointer */ - .long .LSTART_vsyscall-. /* PC-relative start address */ - .long .LEND_vsyscall-.LSTART_vsyscall - .uleb128 0 - .align 4 -.LENDFDEDLSI: - .previous - - /* - * Pad out the segment to match the size of the sysenter.S version. - */ -VDSO32_vsyscall_eh_frame_size = 0x40 - .section .data,"aw",@progbits - .space VDSO32_vsyscall_eh_frame_size-(.LENDFDEDLSI-.LSTARTFRAMEDLSI), 0 - .previous diff --git a/arch/x86/entry/vdso/vdso32/syscall.S b/arch/x86/entry/vdso/vdso32/syscall.S deleted file mode 100644 index 6b286bb5251c..000000000000 --- a/arch/x86/entry/vdso/vdso32/syscall.S +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Code for the vDSO. This version uses the syscall instruction. - * - * First get the common code for the sigreturn entry points. - * This must come first. - */ -#define SYSCALL_ENTER_KERNEL syscall -#include "sigreturn.S" - -#include <asm/segment.h> - - .text - .globl __kernel_vsyscall - .type __kernel_vsyscall,@function - ALIGN -__kernel_vsyscall: -.LSTART_vsyscall: - push %ebp -.Lpush_ebp: - movl %ecx, %ebp - syscall - movl %ebp, %ecx - popl %ebp -.Lpop_ebp: - ret -.LEND_vsyscall: - .size __kernel_vsyscall,.-.LSTART_vsyscall - - .section .eh_frame,"a",@progbits -.LSTARTFRAME: - .long .LENDCIE-.LSTARTCIE -.LSTARTCIE: - .long 0 /* CIE ID */ - .byte 1 /* Version number */ - .string "zR" /* NUL-terminated augmentation string */ - .uleb128 1 /* Code alignment factor */ - .sleb128 -4 /* Data alignment factor */ - .byte 8 /* Return address register column */ - .uleb128 1 /* Augmentation value length */ - .byte 0x1b /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */ - .byte 0x0c /* DW_CFA_def_cfa */ - .uleb128 4 - .uleb128 4 - .byte 0x88 /* DW_CFA_offset, column 0x8 */ - .uleb128 1 - .align 4 -.LENDCIE: - - .long .LENDFDE1-.LSTARTFDE1 /* Length FDE */ -.LSTARTFDE1: - .long .LSTARTFDE1-.LSTARTFRAME /* CIE pointer */ - .long .LSTART_vsyscall-. /* PC-relative start address */ - .long .LEND_vsyscall-.LSTART_vsyscall - .uleb128 0 /* Augmentation length */ - /* What follows are the instructions for the table generation. - We have to record all changes of the stack pointer. */ - .byte 0x40 + .Lpush_ebp-.LSTART_vsyscall /* DW_CFA_advance_loc */ - .byte 0x0e /* DW_CFA_def_cfa_offset */ - .uleb128 8 - .byte 0x85, 0x02 /* DW_CFA_offset %ebp -8 */ - .byte 0x40 + .Lpop_ebp-.Lpush_ebp /* DW_CFA_advance_loc */ - .byte 0xc5 /* DW_CFA_restore %ebp */ - .byte 0x0e /* DW_CFA_def_cfa_offset */ - .uleb128 4 - .align 4 -.LENDFDE1: - .previous - - /* - * Pad out the segment to match the size of the sysenter.S version. - */ -VDSO32_vsyscall_eh_frame_size = 0x40 - .section .data,"aw",@progbits - .space VDSO32_vsyscall_eh_frame_size-(.LENDFDE1-.LSTARTFRAME), 0 - .previous diff --git a/arch/x86/entry/vdso/vdso32/sysenter.S b/arch/x86/entry/vdso/vdso32/sysenter.S deleted file mode 100644 index e354bceee0e0..000000000000 --- a/arch/x86/entry/vdso/vdso32/sysenter.S +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Code for the vDSO. This version uses the sysenter instruction. - * - * First get the common code for the sigreturn entry points. - * This must come first. - */ -#include "sigreturn.S" - -/* - * The caller puts arg2 in %ecx, which gets pushed. The kernel will use - * %ecx itself for arg2. The pushing is because the sysexit instruction - * (found in entry.S) requires that we clobber %ecx with the desired %esp. - * User code might expect that %ecx is unclobbered though, as it would be - * for returning via the iret instruction, so we must push and pop. - * - * The caller puts arg3 in %edx, which the sysexit instruction requires - * for %eip. Thus, exactly as for arg2, we must push and pop. - * - * Arg6 is different. The caller puts arg6 in %ebp. Since the sysenter - * instruction clobbers %esp, the user's %esp won't even survive entry - * into the kernel. We store %esp in %ebp. Code in entry.S must fetch - * arg6 from the stack. - * - * You can not use this vsyscall for the clone() syscall because the - * three words on the parent stack do not get copied to the child. - */ - .text - .globl __kernel_vsyscall - .type __kernel_vsyscall,@function - ALIGN -__kernel_vsyscall: -.LSTART_vsyscall: - push %ecx -.Lpush_ecx: - push %edx -.Lpush_edx: - push %ebp -.Lenter_kernel: - movl %esp,%ebp - sysenter - - /* 7: align return point with nop's to make disassembly easier */ - .space 7,0x90 - - /* 14: System call restart point is here! (SYSENTER_RETURN-2) */ - int $0x80 - /* 16: System call normal return point is here! */ -VDSO32_SYSENTER_RETURN: /* Symbol used by sysenter.c via vdso32-syms.h */ - pop %ebp -.Lpop_ebp: - pop %edx -.Lpop_edx: - pop %ecx -.Lpop_ecx: - ret -.LEND_vsyscall: - .size __kernel_vsyscall,.-.LSTART_vsyscall - .previous - - .section .eh_frame,"a",@progbits -.LSTARTFRAMEDLSI: - .long .LENDCIEDLSI-.LSTARTCIEDLSI -.LSTARTCIEDLSI: - .long 0 /* CIE ID */ - .byte 1 /* Version number */ - .string "zR" /* NUL-terminated augmentation string */ - .uleb128 1 /* Code alignment factor */ - .sleb128 -4 /* Data alignment factor */ - .byte 8 /* Return address register column */ - .uleb128 1 /* Augmentation value length */ - .byte 0x1b /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */ - .byte 0x0c /* DW_CFA_def_cfa */ - .uleb128 4 - .uleb128 4 - .byte 0x88 /* DW_CFA_offset, column 0x8 */ - .uleb128 1 - .align 4 -.LENDCIEDLSI: - .long .LENDFDEDLSI-.LSTARTFDEDLSI /* Length FDE */ -.LSTARTFDEDLSI: - .long .LSTARTFDEDLSI-.LSTARTFRAMEDLSI /* CIE pointer */ - .long .LSTART_vsyscall-. /* PC-relative start address */ - .long .LEND_vsyscall-.LSTART_vsyscall - .uleb128 0 - /* What follows are the instructions for the table generation. - We have to record all changes of the stack pointer. */ - .byte 0x40 + (.Lpush_ecx-.LSTART_vsyscall) /* DW_CFA_advance_loc */ - .byte 0x0e /* DW_CFA_def_cfa_offset */ - .byte 0x08 /* RA at offset 8 now */ - .byte 0x40 + (.Lpush_edx-.Lpush_ecx) /* DW_CFA_advance_loc */ - .byte 0x0e /* DW_CFA_def_cfa_offset */ - .byte 0x0c /* RA at offset 12 now */ - .byte 0x40 + (.Lenter_kernel-.Lpush_edx) /* DW_CFA_advance_loc */ - .byte 0x0e /* DW_CFA_def_cfa_offset */ - .byte 0x10 /* RA at offset 16 now */ - .byte 0x85, 0x04 /* DW_CFA_offset %ebp -16 */ - /* Finally the epilogue. */ - .byte 0x40 + (.Lpop_ebp-.Lenter_kernel) /* DW_CFA_advance_loc */ - .byte 0x0e /* DW_CFA_def_cfa_offset */ - .byte 0x0c /* RA at offset 12 now */ - .byte 0xc5 /* DW_CFA_restore %ebp */ - .byte 0x40 + (.Lpop_edx-.Lpop_ebp) /* DW_CFA_advance_loc */ - .byte 0x0e /* DW_CFA_def_cfa_offset */ - .byte 0x08 /* RA at offset 8 now */ - .byte 0x40 + (.Lpop_ecx-.Lpop_edx) /* DW_CFA_advance_loc */ - .byte 0x0e /* DW_CFA_def_cfa_offset */ - .byte 0x04 /* RA at offset 4 now */ - .align 4 -.LENDFDEDLSI: - .previous - - /* - * Emit a symbol with the size of this .eh_frame data, - * to verify it matches the other versions. - */ -VDSO32_vsyscall_eh_frame_size = (.LENDFDEDLSI-.LSTARTFRAMEDLSI) diff --git a/arch/x86/entry/vdso/vdso32/system_call.S b/arch/x86/entry/vdso/vdso32/system_call.S new file mode 100644 index 000000000000..93bd8452383f --- /dev/null +++ b/arch/x86/entry/vdso/vdso32/system_call.S @@ -0,0 +1,57 @@ +/* + * Code for the vDSO. This version uses the old int $0x80 method. +*/ + +#include <asm/dwarf2.h> +#include <asm/cpufeature.h> +#include <asm/alternative-asm.h> + +/* + * First get the common code for the sigreturn entry points. + * This must come first. + */ +#include "sigreturn.S" + + .text + .globl __kernel_vsyscall + .type __kernel_vsyscall,@function + ALIGN +__kernel_vsyscall: + CFI_STARTPROC + /* + * Reshuffle regs so that all of any of the entry instructions + * will preserve enough state. + */ + pushl %edx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET edx, 0 + pushl %ecx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET ecx, 0 + movl %esp, %ecx + +#ifdef CONFIG_X86_64 + /* If SYSENTER (Intel) or SYSCALL32 (AMD) is available, use it. */ + ALTERNATIVE_2 "", "sysenter", X86_FEATURE_SYSENTER32, \ + "syscall", X86_FEATURE_SYSCALL32 +#else + ALTERNATIVE "", "sysenter", X86_FEATURE_SEP +#endif + + /* Enter using int $0x80 */ + movl (%esp), %ecx + int $0x80 +GLOBAL(int80_landing_pad) + + /* Restore ECX and EDX in case they were clobbered. */ + popl %ecx + CFI_RESTORE ecx + CFI_ADJUST_CFA_OFFSET -4 + popl %edx + CFI_RESTORE edx + CFI_ADJUST_CFA_OFFSET -4 + ret + CFI_ENDPROC + + .size __kernel_vsyscall,.-__kernel_vsyscall + .previous diff --git a/arch/x86/entry/vdso/vdso32/vclock_gettime.c b/arch/x86/entry/vdso/vdso32/vclock_gettime.c index 175cc72c0f68..87a86e017f0e 100644 --- a/arch/x86/entry/vdso/vdso32/vclock_gettime.c +++ b/arch/x86/entry/vdso/vdso32/vclock_gettime.c @@ -14,11 +14,13 @@ */ #undef CONFIG_64BIT #undef CONFIG_X86_64 +#undef CONFIG_PGTABLE_LEVELS #undef CONFIG_ILLEGAL_POINTER_VALUE #undef CONFIG_SPARSEMEM_VMEMMAP #undef CONFIG_NR_CPUS #define CONFIG_X86_32 1 +#define CONFIG_PGTABLE_LEVELS 2 #define CONFIG_PAGE_OFFSET 0 #define CONFIG_ILLEGAL_POINTER_VALUE 0 #define CONFIG_NR_CPUS 1 diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c index 434543145d78..64df47148160 100644 --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c @@ -180,21 +180,10 @@ up_fail: #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) static int load_vdso32(void) { - int ret; - if (vdso32_enabled != 1) /* Other values all mean "disabled" */ return 0; - ret = map_vdso(selected_vdso32, false); - if (ret) - return ret; - - if (selected_vdso32->sym_VDSO32_SYSENTER_RETURN) - current_thread_info()->sysenter_return = - current->mm->context.vdso + - selected_vdso32->sym_VDSO32_SYSENTER_RETURN; - - return 0; + return map_vdso(&vdso_image_32, false); } #endif diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c index b160c0c6baed..174c2549939d 100644 --- a/arch/x86/entry/vsyscall/vsyscall_64.c +++ b/arch/x86/entry/vsyscall/vsyscall_64.c @@ -38,7 +38,14 @@ #define CREATE_TRACE_POINTS #include "vsyscall_trace.h" -static enum { EMULATE, NATIVE, NONE } vsyscall_mode = EMULATE; +static enum { EMULATE, NATIVE, NONE } vsyscall_mode = +#if defined(CONFIG_LEGACY_VSYSCALL_NATIVE) + NATIVE; +#elif defined(CONFIG_LEGACY_VSYSCALL_NONE) + NONE; +#else + EMULATE; +#endif static int __init vsyscall_setup(char *str) { diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index a0a19b7ba22d..0552884da18d 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -26,7 +26,7 @@ #include <asm/ptrace.h> #include <asm/ia32_unistd.h> #include <asm/user32.h> -#include <asm/sigcontext32.h> +#include <uapi/asm/sigcontext.h> #include <asm/proto.h> #include <asm/vdso.h> #include <asm/sigframe.h> @@ -68,7 +68,7 @@ } static int ia32_restore_sigcontext(struct pt_regs *regs, - struct sigcontext_ia32 __user *sc) + struct sigcontext_32 __user *sc) { unsigned int tmpflags, err = 0; void __user *buf; @@ -170,7 +170,7 @@ badframe: * Set up a signal frame. */ -static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc, +static int ia32_setup_sigcontext(struct sigcontext_32 __user *sc, void __user *fpstate, struct pt_regs *regs, unsigned int mask) { @@ -234,7 +234,7 @@ static void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, unsigned long fx_aligned, math_size; sp = fpu__alloc_mathframe(sp, 1, &fx_aligned, &math_size); - *fpstate = (struct _fpstate_ia32 __user *) sp; + *fpstate = (struct _fpstate_32 __user *) sp; if (copy_fpstate_to_sigframe(*fpstate, (void __user *)fx_aligned, math_size) < 0) return (void __user *) -1L; @@ -289,7 +289,7 @@ int ia32_setup_frame(int sig, struct ksignal *ksig, /* Return stub is in 32bit vsyscall page */ if (current->mm->context.vdso) restorer = current->mm->context.vdso + - selected_vdso32->sym___kernel_sigreturn; + vdso_image_32.sym___kernel_sigreturn; else restorer = &frame->retcode; } @@ -368,7 +368,7 @@ int ia32_setup_rt_frame(int sig, struct ksignal *ksig, restorer = ksig->ka.sa.sa_restorer; else restorer = current->mm->context.vdso + - selected_vdso32->sym___kernel_rt_sigreturn; + vdso_image_32.sym___kernel_rt_sigreturn; put_user_ex(ptr_to_compat(restorer), &frame->pretcode); /* diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 3a45668f6dc3..94c18ebfd68c 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -32,6 +32,10 @@ #include <asm/mpspec.h> #include <asm/realmode.h> +#ifdef CONFIG_ACPI_APEI +# include <asm/pgtable_types.h> +#endif + #ifdef CONFIG_ACPI extern int acpi_lapic; extern int acpi_ioapic; @@ -147,4 +151,23 @@ extern int x86_acpi_numa_init(void); #define acpi_unlazy_tlb(x) leave_mm(x) +#ifdef CONFIG_ACPI_APEI +static inline pgprot_t arch_apei_get_mem_attribute(phys_addr_t addr) +{ + /* + * We currently have no way to look up the EFI memory map + * attributes for a region in a consistent way, because the + * memmap is discarded after efi_free_boot_services(). So if + * you call efi_mem_attributes() during boot and at runtime, + * you could theoretically see different attributes. + * + * Since we are yet to see any x86 platforms that require + * anything other than PAGE_KERNEL (some arm64 platforms + * require the equivalent of PAGE_KERNEL_NOCACHE), return that + * until we know differently. + */ + return PAGE_KERNEL; +} +#endif + #endif /* _ASM_X86_ACPI_H */ diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h index 1a5da2e63aee..3c56ef1ae068 100644 --- a/arch/x86/include/asm/amd_nb.h +++ b/arch/x86/include/asm/amd_nb.h @@ -81,7 +81,7 @@ static inline struct amd_northbridge *node_to_amd_nb(int node) return (node < amd_northbridges.num) ? &amd_northbridges.nb[node] : NULL; } -static inline u16 amd_get_node_id(struct pci_dev *pdev) +static inline u16 amd_pci_dev_to_node_id(struct pci_dev *pdev) { struct pci_dev *misc; int i; diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index ebf6d5e5668c..a30316bf801a 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -115,6 +115,59 @@ static inline bool apic_is_x2apic_enabled(void) return msr & X2APIC_ENABLE; } +extern void enable_IR_x2apic(void); + +extern int get_physical_broadcast(void); + +extern int lapic_get_maxlvt(void); +extern void clear_local_APIC(void); +extern void disconnect_bsp_APIC(int virt_wire_setup); +extern void disable_local_APIC(void); +extern void lapic_shutdown(void); +extern void sync_Arb_IDs(void); +extern void init_bsp_APIC(void); +extern void setup_local_APIC(void); +extern void init_apic_mappings(void); +void register_lapic_address(unsigned long address); +extern void setup_boot_APIC_clock(void); +extern void setup_secondary_APIC_clock(void); +extern int APIC_init_uniprocessor(void); + +#ifdef CONFIG_X86_64 +static inline int apic_force_enable(unsigned long addr) +{ + return -1; +} +#else +extern int apic_force_enable(unsigned long addr); +#endif + +extern int apic_bsp_setup(bool upmode); +extern void apic_ap_setup(void); + +/* + * On 32bit this is mach-xxx local + */ +#ifdef CONFIG_X86_64 +extern int apic_is_clustered_box(void); +#else +static inline int apic_is_clustered_box(void) +{ + return 0; +} +#endif + +extern int setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask); + +#else /* !CONFIG_X86_LOCAL_APIC */ +static inline void lapic_shutdown(void) { } +#define local_apic_timer_c2_ok 1 +static inline void init_apic_mappings(void) { } +static inline void disable_local_APIC(void) { } +# define setup_boot_APIC_clock x86_init_noop +# define setup_secondary_APIC_clock x86_init_noop +#endif /* !CONFIG_X86_LOCAL_APIC */ + #ifdef CONFIG_X86_X2APIC /* * Make previous memory operations globally visible before @@ -186,67 +239,14 @@ static inline int x2apic_enabled(void) } #define x2apic_supported() (cpu_has_x2apic) -#else +#else /* !CONFIG_X86_X2APIC */ static inline void check_x2apic(void) { } static inline void x2apic_setup(void) { } static inline int x2apic_enabled(void) { return 0; } #define x2apic_mode (0) #define x2apic_supported() (0) -#endif - -extern void enable_IR_x2apic(void); - -extern int get_physical_broadcast(void); - -extern int lapic_get_maxlvt(void); -extern void clear_local_APIC(void); -extern void disconnect_bsp_APIC(int virt_wire_setup); -extern void disable_local_APIC(void); -extern void lapic_shutdown(void); -extern void sync_Arb_IDs(void); -extern void init_bsp_APIC(void); -extern void setup_local_APIC(void); -extern void init_apic_mappings(void); -void register_lapic_address(unsigned long address); -extern void setup_boot_APIC_clock(void); -extern void setup_secondary_APIC_clock(void); -extern int APIC_init_uniprocessor(void); - -#ifdef CONFIG_X86_64 -static inline int apic_force_enable(unsigned long addr) -{ - return -1; -} -#else -extern int apic_force_enable(unsigned long addr); -#endif - -extern int apic_bsp_setup(bool upmode); -extern void apic_ap_setup(void); - -/* - * On 32bit this is mach-xxx local - */ -#ifdef CONFIG_X86_64 -extern int apic_is_clustered_box(void); -#else -static inline int apic_is_clustered_box(void) -{ - return 0; -} -#endif - -extern int setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask); - -#else /* !CONFIG_X86_LOCAL_APIC */ -static inline void lapic_shutdown(void) { } -#define local_apic_timer_c2_ok 1 -static inline void init_apic_mappings(void) { } -static inline void disable_local_APIC(void) { } -# define setup_boot_APIC_clock x86_init_noop -# define setup_secondary_APIC_clock x86_init_noop -#endif /* !CONFIG_X86_LOCAL_APIC */ +#endif /* !CONFIG_X86_X2APIC */ #ifdef CONFIG_X86_64 #define SET_APIC_ID(x) (apic->set_apic_id(x)) diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h index fb52aa644aab..ae5fb83e6d91 100644 --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h @@ -24,7 +24,7 @@ */ static __always_inline int atomic_read(const atomic_t *v) { - return ACCESS_ONCE((v)->counter); + return READ_ONCE((v)->counter); } /** @@ -36,7 +36,7 @@ static __always_inline int atomic_read(const atomic_t *v) */ static __always_inline void atomic_set(atomic_t *v, int i) { - v->counter = i; + WRITE_ONCE(v->counter, i); } /** diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h index 50e33eff58de..037351022f54 100644 --- a/arch/x86/include/asm/atomic64_64.h +++ b/arch/x86/include/asm/atomic64_64.h @@ -18,7 +18,7 @@ */ static inline long atomic64_read(const atomic64_t *v) { - return ACCESS_ONCE((v)->counter); + return READ_ONCE((v)->counter); } /** @@ -30,7 +30,7 @@ static inline long atomic64_read(const atomic64_t *v) */ static inline void atomic64_set(atomic64_t *v, long i) { - v->counter = i; + WRITE_ONCE(v->counter, i); } /** diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index e6cf2ad350d1..e4f8010f22e0 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -12,7 +12,7 @@ #include <asm/disabled-features.h> #endif -#define NCAPINTS 13 /* N 32-bit words worth of info */ +#define NCAPINTS 14 /* N 32-bit words worth of info */ #define NBUGINTS 1 /* N 32-bit bug flags */ /* @@ -193,7 +193,7 @@ #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ #define X86_FEATURE_HWP ( 7*32+ 10) /* "hwp" Intel HWP */ -#define X86_FEATURE_HWP_NOITFY ( 7*32+ 11) /* Intel HWP_NOTIFY */ +#define X86_FEATURE_HWP_NOTIFY ( 7*32+ 11) /* Intel HWP_NOTIFY */ #define X86_FEATURE_HWP_ACT_WINDOW ( 7*32+ 12) /* Intel HWP_ACT_WINDOW */ #define X86_FEATURE_HWP_EPP ( 7*32+13) /* Intel HWP_EPP */ #define X86_FEATURE_HWP_PKG_REQ ( 7*32+14) /* Intel HWP_PKG_REQ */ @@ -255,6 +255,9 @@ /* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (edx), word 12 */ #define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring if 1 */ +/* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */ +#define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */ + /* * BUG word(s) */ diff --git a/arch/x86/include/asm/dwarf2.h b/arch/x86/include/asm/dwarf2.h new file mode 100644 index 000000000000..b7a1ab865d68 --- /dev/null +++ b/arch/x86/include/asm/dwarf2.h @@ -0,0 +1,84 @@ +#ifndef _ASM_X86_DWARF2_H +#define _ASM_X86_DWARF2_H + +#ifndef __ASSEMBLY__ +#warning "asm/dwarf2.h should be only included in pure assembly files" +#endif + +/* + * Macros for dwarf2 CFI unwind table entries. + * See "as.info" for details on these pseudo ops. Unfortunately + * they are only supported in very new binutils, so define them + * away for older version. + */ + +#ifdef CONFIG_AS_CFI + +#define CFI_STARTPROC .cfi_startproc +#define CFI_ENDPROC .cfi_endproc +#define CFI_DEF_CFA .cfi_def_cfa +#define CFI_DEF_CFA_REGISTER .cfi_def_cfa_register +#define CFI_DEF_CFA_OFFSET .cfi_def_cfa_offset +#define CFI_ADJUST_CFA_OFFSET .cfi_adjust_cfa_offset +#define CFI_OFFSET .cfi_offset +#define CFI_REL_OFFSET .cfi_rel_offset +#define CFI_REGISTER .cfi_register +#define CFI_RESTORE .cfi_restore +#define CFI_REMEMBER_STATE .cfi_remember_state +#define CFI_RESTORE_STATE .cfi_restore_state +#define CFI_UNDEFINED .cfi_undefined +#define CFI_ESCAPE .cfi_escape + +#ifdef CONFIG_AS_CFI_SIGNAL_FRAME +#define CFI_SIGNAL_FRAME .cfi_signal_frame +#else +#define CFI_SIGNAL_FRAME +#endif + +#if defined(CONFIG_AS_CFI_SECTIONS) && defined(__ASSEMBLY__) +#ifndef BUILD_VDSO + /* + * Emit CFI data in .debug_frame sections, not .eh_frame sections. + * The latter we currently just discard since we don't do DWARF + * unwinding at runtime. So only the offline DWARF information is + * useful to anyone. Note we should not use this directive if + * vmlinux.lds.S gets changed so it doesn't discard .eh_frame. + */ + .cfi_sections .debug_frame +#else + /* + * For the vDSO, emit both runtime unwind information and debug + * symbols for the .dbg file. + */ + .cfi_sections .eh_frame, .debug_frame +#endif +#endif + +#else + +/* + * Due to the structure of pre-exisiting code, don't use assembler line + * comment character # to ignore the arguments. Instead, use a dummy macro. + */ +.macro cfi_ignore a=0, b=0, c=0, d=0 +.endm + +#define CFI_STARTPROC cfi_ignore +#define CFI_ENDPROC cfi_ignore +#define CFI_DEF_CFA cfi_ignore +#define CFI_DEF_CFA_REGISTER cfi_ignore +#define CFI_DEF_CFA_OFFSET cfi_ignore +#define CFI_ADJUST_CFA_OFFSET cfi_ignore +#define CFI_OFFSET cfi_ignore +#define CFI_REL_OFFSET cfi_ignore +#define CFI_REGISTER cfi_ignore +#define CFI_RESTORE cfi_ignore +#define CFI_REMEMBER_STATE cfi_ignore +#define CFI_RESTORE_STATE cfi_ignore +#define CFI_UNDEFINED cfi_ignore +#define CFI_ESCAPE cfi_ignore +#define CFI_SIGNAL_FRAME cfi_ignore + +#endif + +#endif /* _ASM_X86_DWARF2_H */ diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index ab5f1d447ef9..0010c78c4998 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -86,6 +86,7 @@ extern u64 asmlinkage efi_call(void *fp, ...); extern void __iomem *__init efi_ioremap(unsigned long addr, unsigned long size, u32 type, u64 attribute); +#ifdef CONFIG_KASAN /* * CONFIG_KASAN may redefine memset to __memset. __memset function is present * only in kernel binary. Since the EFI stub linked into a separate binary it @@ -95,6 +96,7 @@ extern void __iomem *__init efi_ioremap(unsigned long addr, unsigned long size, #undef memcpy #undef memset #undef memmove +#endif #endif /* CONFIG_X86_32 */ @@ -103,6 +105,7 @@ extern void __init efi_set_executable(efi_memory_desc_t *md, bool executable); extern int __init efi_memblock_x86_reserve_range(void); extern pgd_t * __init efi_call_phys_prolog(void); extern void __init efi_call_phys_epilog(pgd_t *save_pgd); +extern void __init efi_print_memmap(void); extern void __init efi_unmap_memmap(void); extern void __init efi_memory_uc(u64 addr, unsigned long size); extern void __init efi_map_region(efi_memory_desc_t *md); diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index 141c561f4664..1514753fd435 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -171,11 +171,11 @@ do { \ static inline void elf_common_init(struct thread_struct *t, struct pt_regs *regs, const u16 ds) { - /* Commented-out registers are cleared in stub_execve */ - /*regs->ax = regs->bx =*/ regs->cx = regs->dx = 0; - regs->si = regs->di /*= regs->bp*/ = 0; + /* ax gets execve's return value. */ + /*regs->ax = */ regs->bx = regs->cx = regs->dx = 0; + regs->si = regs->di = regs->bp = 0; regs->r8 = regs->r9 = regs->r10 = regs->r11 = 0; - /*regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0;*/ + regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0; t->fs = t->gs = 0; t->fsindex = t->gsindex = 0; t->ds = t->es = ds; @@ -328,7 +328,7 @@ else \ #define VDSO_ENTRY \ ((unsigned long)current->mm->context.vdso + \ - selected_vdso32->sym___kernel_vsyscall) + vdso_image_32.sym___kernel_vsyscall) struct linux_binprm; diff --git a/arch/x86/include/asm/fpu/signal.h b/arch/x86/include/asm/fpu/signal.h index 7358e9d61f1e..0e970d00dfcd 100644 --- a/arch/x86/include/asm/fpu/signal.h +++ b/arch/x86/include/asm/fpu/signal.h @@ -5,7 +5,7 @@ #define _ASM_X86_FPU_SIGNAL_H #ifdef CONFIG_X86_64 -# include <asm/sigcontext32.h> +# include <uapi/asm/sigcontext.h> # include <asm/user32.h> struct ksignal; int ia32_setup_rt_frame(int sig, struct ksignal *ksig, diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h index c49c5173158e..1c6f6ac52ad0 100644 --- a/arch/x86/include/asm/fpu/types.h +++ b/arch/x86/include/asm/fpu/types.h @@ -95,63 +95,122 @@ struct swregs_state { /* * List of XSAVE features Linux knows about: */ -enum xfeature_bit { - XSTATE_BIT_FP, - XSTATE_BIT_SSE, - XSTATE_BIT_YMM, - XSTATE_BIT_BNDREGS, - XSTATE_BIT_BNDCSR, - XSTATE_BIT_OPMASK, - XSTATE_BIT_ZMM_Hi256, - XSTATE_BIT_Hi16_ZMM, - - XFEATURES_NR_MAX, +enum xfeature { + XFEATURE_FP, + XFEATURE_SSE, + /* + * Values above here are "legacy states". + * Those below are "extended states". + */ + XFEATURE_YMM, + XFEATURE_BNDREGS, + XFEATURE_BNDCSR, + XFEATURE_OPMASK, + XFEATURE_ZMM_Hi256, + XFEATURE_Hi16_ZMM, + + XFEATURE_MAX, }; -#define XSTATE_FP (1 << XSTATE_BIT_FP) -#define XSTATE_SSE (1 << XSTATE_BIT_SSE) -#define XSTATE_YMM (1 << XSTATE_BIT_YMM) -#define XSTATE_BNDREGS (1 << XSTATE_BIT_BNDREGS) -#define XSTATE_BNDCSR (1 << XSTATE_BIT_BNDCSR) -#define XSTATE_OPMASK (1 << XSTATE_BIT_OPMASK) -#define XSTATE_ZMM_Hi256 (1 << XSTATE_BIT_ZMM_Hi256) -#define XSTATE_Hi16_ZMM (1 << XSTATE_BIT_Hi16_ZMM) +#define XFEATURE_MASK_FP (1 << XFEATURE_FP) +#define XFEATURE_MASK_SSE (1 << XFEATURE_SSE) +#define XFEATURE_MASK_YMM (1 << XFEATURE_YMM) +#define XFEATURE_MASK_BNDREGS (1 << XFEATURE_BNDREGS) +#define XFEATURE_MASK_BNDCSR (1 << XFEATURE_BNDCSR) +#define XFEATURE_MASK_OPMASK (1 << XFEATURE_OPMASK) +#define XFEATURE_MASK_ZMM_Hi256 (1 << XFEATURE_ZMM_Hi256) +#define XFEATURE_MASK_Hi16_ZMM (1 << XFEATURE_Hi16_ZMM) + +#define XFEATURE_MASK_FPSSE (XFEATURE_MASK_FP | XFEATURE_MASK_SSE) +#define XFEATURE_MASK_AVX512 (XFEATURE_MASK_OPMASK \ + | XFEATURE_MASK_ZMM_Hi256 \ + | XFEATURE_MASK_Hi16_ZMM) + +#define FIRST_EXTENDED_XFEATURE XFEATURE_YMM -#define XSTATE_FPSSE (XSTATE_FP | XSTATE_SSE) -#define XSTATE_AVX512 (XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM) +struct reg_128_bit { + u8 regbytes[128/8]; +}; +struct reg_256_bit { + u8 regbytes[256/8]; +}; +struct reg_512_bit { + u8 regbytes[512/8]; +}; /* + * State component 2: + * * There are 16x 256-bit AVX registers named YMM0-YMM15. * The low 128 bits are aliased to the 16 SSE registers (XMM0-XMM15) - * and are stored in 'struct fxregs_state::xmm_space[]'. + * and are stored in 'struct fxregs_state::xmm_space[]' in the + * "legacy" area. * - * The high 128 bits are stored here: - * 16x 128 bits == 256 bytes. + * The high 128 bits are stored here. */ struct ymmh_struct { - u8 ymmh_space[256]; -}; - -/* We don't support LWP yet: */ -struct lwp_struct { - u8 reserved[128]; -}; + struct reg_128_bit hi_ymm[16]; +} __packed; /* Intel MPX support: */ -struct bndreg { + +struct mpx_bndreg { u64 lower_bound; u64 upper_bound; } __packed; +/* + * State component 3 is used for the 4 128-bit bounds registers + */ +struct mpx_bndreg_state { + struct mpx_bndreg bndreg[4]; +} __packed; -struct bndcsr { +/* + * State component 4 is used for the 64-bit user-mode MPX + * configuration register BNDCFGU and the 64-bit MPX status + * register BNDSTATUS. We call the pair "BNDCSR". + */ +struct mpx_bndcsr { u64 bndcfgu; u64 bndstatus; } __packed; -struct mpx_struct { - struct bndreg bndreg[4]; - struct bndcsr bndcsr; -}; +/* + * The BNDCSR state is padded out to be 64-bytes in size. + */ +struct mpx_bndcsr_state { + union { + struct mpx_bndcsr bndcsr; + u8 pad_to_64_bytes[64]; + }; +} __packed; + +/* AVX-512 Components: */ + +/* + * State component 5 is used for the 8 64-bit opmask registers + * k0-k7 (opmask state). + */ +struct avx_512_opmask_state { + u64 opmask_reg[8]; +} __packed; + +/* + * State component 6 is used for the upper 256 bits of the + * registers ZMM0-ZMM15. These 16 256-bit values are denoted + * ZMM0_H-ZMM15_H (ZMM_Hi256 state). + */ +struct avx_512_zmm_uppers_state { + struct reg_256_bit zmm_upper[16]; +} __packed; + +/* + * State component 7 is used for the 16 512-bit registers + * ZMM16-ZMM31 (Hi16_ZMM state). + */ +struct avx_512_hi16_state { + struct reg_512_bit hi16_zmm[16]; +} __packed; struct xstate_header { u64 xfeatures; @@ -159,22 +218,19 @@ struct xstate_header { u64 reserved[6]; } __attribute__((packed)); -/* New processor state extensions should be added here: */ -#define XSTATE_RESERVE (sizeof(struct ymmh_struct) + \ - sizeof(struct lwp_struct) + \ - sizeof(struct mpx_struct) ) /* * This is our most modern FPU state format, as saved by the XSAVE * and restored by the XRSTOR instructions. * * It consists of a legacy fxregs portion, an xstate header and - * subsequent fixed size areas as defined by the xstate header. - * Not all CPUs support all the extensions. + * subsequent areas as defined by the xstate header. Not all CPUs + * support all the extensions, so the size of the extended area + * can vary quite a bit between CPUs. */ struct xregs_state { struct fxregs_state i387; struct xstate_header header; - u8 __reserved[XSTATE_RESERVE]; + u8 extended_state_area[0]; } __attribute__ ((packed, aligned (64))); /* @@ -182,7 +238,9 @@ struct xregs_state { * put together, so that we can pick the right one runtime. * * The size of the structure is determined by the largest - * member - which is the xsave area: + * member - which is the xsave area. The padding is there + * to ensure that statically-allocated task_structs (just + * the init_task today) have enough space. */ union fpregs_state { struct fregs_state fsave; diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index 4656b25bb9a7..3a6c89b70307 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -6,7 +6,7 @@ #include <linux/uaccess.h> /* Bit 63 of XCR0 is reserved for future expansion */ -#define XSTATE_EXTEND_MASK (~(XSTATE_FPSSE | (1ULL << 63))) +#define XFEATURE_MASK_EXTEND (~(XFEATURE_MASK_FPSSE | (1ULL << 63))) #define XSTATE_CPUID 0x0000000d @@ -19,14 +19,18 @@ #define XSAVE_YMM_OFFSET (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET) /* Supported features which support lazy state saving */ -#define XSTATE_LAZY (XSTATE_FP | XSTATE_SSE | XSTATE_YMM \ - | XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM) +#define XFEATURE_MASK_LAZY (XFEATURE_MASK_FP | \ + XFEATURE_MASK_SSE | \ + XFEATURE_MASK_YMM | \ + XFEATURE_MASK_OPMASK | \ + XFEATURE_MASK_ZMM_Hi256 | \ + XFEATURE_MASK_Hi16_ZMM) /* Supported features which require eager state saving */ -#define XSTATE_EAGER (XSTATE_BNDREGS | XSTATE_BNDCSR) +#define XFEATURE_MASK_EAGER (XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR) /* All currently supported features */ -#define XCNTXT_MASK (XSTATE_LAZY | XSTATE_EAGER) +#define XCNTXT_MASK (XFEATURE_MASK_LAZY | XFEATURE_MASK_EAGER) #ifdef CONFIG_X86_64 #define REX_PREFIX "0x48, " @@ -40,6 +44,7 @@ extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask); +void fpu__xstate_clear_all_cpu_caps(void); void *get_xsave_addr(struct xregs_state *xsave, int xstate); const void *get_xsave_field_ptr(int xstate_field); diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h index 5fa9fb0f8809..cc285ec4b2c1 100644 --- a/arch/x86/include/asm/hpet.h +++ b/arch/x86/include/asm/hpet.h @@ -63,10 +63,10 @@ /* hpet memory map physical address */ extern unsigned long hpet_address; extern unsigned long force_hpet_address; -extern int boot_hpet_disable; +extern bool boot_hpet_disable; extern u8 hpet_blockid; -extern int hpet_force_user; -extern u8 hpet_msi_disable; +extern bool hpet_force_user; +extern bool hpet_msi_disable; extern int is_hpet_enabled(void); extern int hpet_enable(void); extern void hpet_disable(void); diff --git a/arch/x86/include/asm/ia32.h b/arch/x86/include/asm/ia32.h index 28019765442e..a9bdf5569ab3 100644 --- a/arch/x86/include/asm/ia32.h +++ b/arch/x86/include/asm/ia32.h @@ -10,7 +10,7 @@ * 32 bit structures for IA32 support. */ -#include <asm/sigcontext32.h> +#include <uapi/asm/sigcontext.h> /* signal.h */ @@ -18,7 +18,7 @@ struct ucontext_ia32 { unsigned int uc_flags; unsigned int uc_link; compat_stack_t uc_stack; - struct sigcontext_ia32 uc_mcontext; + struct sigcontext_32 uc_mcontext; compat_sigset_t uc_sigmask; /* mask last for extensibility */ }; diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h index 046c7fb1ca43..a210eba2727c 100644 --- a/arch/x86/include/asm/irq_remapping.h +++ b/arch/x86/include/asm/irq_remapping.h @@ -33,6 +33,11 @@ enum irq_remap_cap { IRQ_POSTING_CAP = 0, }; +struct vcpu_data { + u64 pi_desc_addr; /* Physical address of PI Descriptor */ + u32 vector; /* Guest vector of the interrupt */ +}; + #ifdef CONFIG_IRQ_REMAP extern bool irq_remapping_cap(enum irq_remap_cap cap); @@ -58,11 +63,6 @@ static inline struct irq_domain *arch_get_ir_parent_domain(void) return x86_vector_domain; } -struct vcpu_data { - u64 pi_desc_addr; /* Physical address of PI Descriptor */ - u32 vector; /* Guest vector of the interrupt */ -}; - #else /* CONFIG_IRQ_REMAP */ static inline bool irq_remapping_cap(enum irq_remap_cap cap) { return 0; } diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h index b130d59406fb..e5f5dc9787d5 100644 --- a/arch/x86/include/asm/kdebug.h +++ b/arch/x86/include/asm/kdebug.h @@ -29,11 +29,5 @@ extern void show_trace(struct task_struct *t, struct pt_regs *regs, extern void __show_regs(struct pt_regs *regs, int all); extern unsigned long oops_begin(void); extern void oops_end(unsigned long, struct pt_regs *, int signr); -#ifdef CONFIG_KEXEC_CORE -extern int in_crash_kexec; -#else -/* no crash dump is ever in progress if no crash kernel can be kexec'd */ -#define in_crash_kexec 0 -#endif #endif /* _ASM_X86_KDEBUG_H */ diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index e16466ec473c..e9cd7befcb76 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -112,6 +112,16 @@ struct x86_emulate_ops { struct x86_exception *fault); /* + * read_phys: Read bytes of standard (non-emulated/special) memory. + * Used for descriptor reading. + * @addr: [IN ] Physical address from which to read. + * @val: [OUT] Value read from memory. + * @bytes: [IN ] Number of bytes to read from memory. + */ + int (*read_phys)(struct x86_emulate_ctxt *ctxt, unsigned long addr, + void *val, unsigned int bytes); + + /* * write_std: Write bytes of standard (non-emulated/special) memory. * Used for descriptor writing. * @addr: [IN ] Linear address to which to write. diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 2beee0382088..9265196e877f 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -24,6 +24,7 @@ #include <linux/perf_event.h> #include <linux/pvclock_gtod.h> #include <linux/clocksource.h> +#include <linux/irqbypass.h> #include <asm/pvclock-abi.h> #include <asm/desc.h> @@ -176,6 +177,8 @@ enum { */ #define KVM_APIC_PV_EOI_PENDING 1 +struct kvm_kernel_irq_routing_entry; + /* * We don't want allocation failures within the mmu code, so we preallocate * enough memory for a single page fault in a cache. @@ -374,6 +377,7 @@ struct kvm_mtrr { /* Hyper-V per vcpu emulation context */ struct kvm_vcpu_hv { u64 hv_vapic; + s64 runtime_offset; }; struct kvm_vcpu_arch { @@ -396,6 +400,7 @@ struct kvm_vcpu_arch { u64 efer; u64 apic_base; struct kvm_lapic *apic; /* kernel irqchip context */ + u64 eoi_exit_bitmap[4]; unsigned long apic_attention; int32_t apic_arb_prio; int mp_state; @@ -573,6 +578,9 @@ struct kvm_vcpu_arch { struct { bool pv_unhalted; } pv; + + int pending_ioapic_eoi; + int pending_external_vector; }; struct kvm_lpage_info { @@ -683,6 +691,9 @@ struct kvm_arch { u32 bsp_vcpu_id; u64 disabled_quirks; + + bool irqchip_split; + u8 nr_reserved_ioapic_pins; }; struct kvm_vm_stat { @@ -819,10 +830,10 @@ struct kvm_x86_ops { void (*enable_nmi_window)(struct kvm_vcpu *vcpu); void (*enable_irq_window)(struct kvm_vcpu *vcpu); void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); - int (*vm_has_apicv)(struct kvm *kvm); + int (*cpu_uses_apicv)(struct kvm_vcpu *vcpu); void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr); void (*hwapic_isr_update)(struct kvm *kvm, int isr); - void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); + void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu); void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set); void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa); void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector); @@ -887,6 +898,20 @@ struct kvm_x86_ops { gfn_t offset, unsigned long mask); /* pmu operations of sub-arch */ const struct kvm_pmu_ops *pmu_ops; + + /* + * Architecture specific hooks for vCPU blocking due to + * HLT instruction. + * Returns for .pre_block(): + * - 0 means continue to block the vCPU. + * - 1 means we cannot block the vCPU since some event + * happens during this period, such as, 'ON' bit in + * posted-interrupts descriptor is set. + */ + int (*pre_block)(struct kvm_vcpu *vcpu); + void (*post_block)(struct kvm_vcpu *vcpu); + int (*update_pi_irte)(struct kvm *kvm, unsigned int host_irq, + uint32_t guest_irq, bool set); }; struct kvm_arch_async_pf { @@ -1226,11 +1251,18 @@ void kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err); int kvm_is_in_guest(void); -int __x86_set_memory_region(struct kvm *kvm, - const struct kvm_userspace_memory_region *mem); -int x86_set_memory_region(struct kvm *kvm, - const struct kvm_userspace_memory_region *mem); +int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size); +int x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size); bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu); bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu); +bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq, + struct kvm_vcpu **dest_vcpu); + +void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e, + struct kvm_lapic_irq *irq); + +static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {} +static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {} + #endif /* _ASM_X86_KVM_HOST_H */ diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 2dbc0bf2b9f3..2ea4527e462f 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -123,19 +123,27 @@ struct mca_config { }; struct mce_vendor_flags { - /* - * overflow recovery cpuid bit indicates that overflow - * conditions are not fatal - */ - __u64 overflow_recov : 1, - - /* - * SUCCOR stands for S/W UnCorrectable error COntainment - * and Recovery. It indicates support for data poisoning - * in HW and deferred error interrupts. - */ - succor : 1, - __reserved_0 : 62; + /* + * Indicates that overflow conditions are not fatal, when set. + */ + __u64 overflow_recov : 1, + + /* + * (AMD) SUCCOR stands for S/W UnCorrectable error COntainment and + * Recovery. It indicates support for data poisoning in HW and deferred + * error interrupts. + */ + succor : 1, + + /* + * (AMD) SMCA: This bit indicates support for Scalable MCA which expands + * the register space for each MCA bank and also increases number of + * banks. Also, to accommodate the new banks and registers, the MCA + * register space is moved to a new MSR range. + */ + smca : 1, + + __reserved_0 : 61; }; extern struct mce_vendor_flags mce_flags; diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h index 9e6278c7140e..34e62b1dcfce 100644 --- a/arch/x86/include/asm/microcode.h +++ b/arch/x86/include/asm/microcode.h @@ -27,7 +27,6 @@ struct cpu_signature { struct device; enum ucode_state { UCODE_ERROR, UCODE_OK, UCODE_NFOUND }; -extern bool dis_ucode_ldr; struct microcode_ops { enum ucode_state (*request_microcode_user) (int cpu, @@ -55,6 +54,12 @@ struct ucode_cpu_info { }; extern struct ucode_cpu_info ucode_cpu_info[]; +#ifdef CONFIG_MICROCODE +int __init microcode_init(void); +#else +static inline int __init microcode_init(void) { return 0; }; +#endif + #ifdef CONFIG_MICROCODE_INTEL extern struct microcode_ops * __init init_intel_microcode(void); #else @@ -75,7 +80,6 @@ static inline struct microcode_ops * __init init_amd_microcode(void) static inline void __exit exit_amd_microcode(void) {} #endif -#ifdef CONFIG_MICROCODE_EARLY #define MAX_UCODE_COUNT 128 #define QCHAR(a, b, c, d) ((a) + ((b) << 8) + ((c) << 16) + ((d) << 24)) @@ -150,22 +154,18 @@ static inline unsigned int x86_model(unsigned int sig) return model; } +#ifdef CONFIG_MICROCODE extern void __init load_ucode_bsp(void); extern void load_ucode_ap(void); extern int __init save_microcode_in_initrd(void); void reload_early_microcode(void); extern bool get_builtin_firmware(struct cpio_data *cd, const char *name); #else -static inline void __init load_ucode_bsp(void) {} -static inline void load_ucode_ap(void) {} -static inline int __init save_microcode_in_initrd(void) -{ - return 0; -} -static inline void reload_early_microcode(void) {} -static inline bool get_builtin_firmware(struct cpio_data *cd, const char *name) -{ - return false; -} +static inline void __init load_ucode_bsp(void) { } +static inline void load_ucode_ap(void) { } +static inline int __init save_microcode_in_initrd(void) { return 0; } +static inline void reload_early_microcode(void) { } +static inline bool +get_builtin_firmware(struct cpio_data *cd, const char *name) { return false; } #endif #endif /* _ASM_X86_MICROCODE_H */ diff --git a/arch/x86/include/asm/microcode_amd.h b/arch/x86/include/asm/microcode_amd.h index ac6d328977a6..adfc847a395e 100644 --- a/arch/x86/include/asm/microcode_amd.h +++ b/arch/x86/include/asm/microcode_amd.h @@ -64,7 +64,7 @@ extern enum ucode_state load_microcode_amd(int cpu, u8 family, const u8 *data, s #define PATCH_MAX_SIZE PAGE_SIZE extern u8 amd_ucode_patch[PATCH_MAX_SIZE]; -#ifdef CONFIG_MICROCODE_AMD_EARLY +#ifdef CONFIG_MICROCODE_AMD extern void __init load_ucode_amd_bsp(unsigned int family); extern void load_ucode_amd_ap(void); extern int __init save_microcode_in_initrd_amd(void); @@ -76,4 +76,5 @@ static inline int __init save_microcode_in_initrd_amd(void) { return -EINVAL; } void reload_ucode_amd(void) {} #endif +extern bool check_current_patch_level(u32 *rev, bool early); #endif /* _ASM_X86_MICROCODE_AMD_H */ diff --git a/arch/x86/include/asm/microcode_intel.h b/arch/x86/include/asm/microcode_intel.h index 7991c606125d..8559b0102ea1 100644 --- a/arch/x86/include/asm/microcode_intel.h +++ b/arch/x86/include/asm/microcode_intel.h @@ -57,7 +57,7 @@ extern int has_newer_microcode(void *mc, unsigned int csig, int cpf, int rev); extern int microcode_sanity_check(void *mc, int print_err); extern int find_matching_signature(void *mc, unsigned int csig, int cpf); -#ifdef CONFIG_MICROCODE_INTEL_EARLY +#ifdef CONFIG_MICROCODE_INTEL extern void __init load_ucode_intel_bsp(void); extern void load_ucode_intel_ap(void); extern void show_ucode_info_early(void); @@ -71,13 +71,9 @@ static inline int __init save_microcode_in_initrd_intel(void) { return -EINVAL; static inline void reload_ucode_intel(void) {} #endif -#if defined(CONFIG_MICROCODE_INTEL_EARLY) && defined(CONFIG_HOTPLUG_CPU) +#ifdef CONFIG_HOTPLUG_CPU extern int save_mc_for_early(u8 *mc); #else -static inline int save_mc_for_early(u8 *mc) -{ - return 0; -} +static inline int save_mc_for_early(u8 *mc) { return 0; } #endif - #endif /* _ASM_X86_MICROCODE_INTEL_H */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index b98b471a3b7e..9f3905697f12 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -141,6 +141,8 @@ #define DEBUGCTLMSR_BTS_OFF_USR (1UL << 10) #define DEBUGCTLMSR_FREEZE_LBRS_ON_PMI (1UL << 11) +#define MSR_PEBS_FRONTEND 0x000003f7 + #define MSR_IA32_POWER_CTL 0x000001fc #define MSR_IA32_MC0_CTL 0x00000400 @@ -204,6 +206,13 @@ #define MSR_GFX_PERF_LIMIT_REASONS 0x000006B0 #define MSR_RING_PERF_LIMIT_REASONS 0x000006B1 +/* Config TDP MSRs */ +#define MSR_CONFIG_TDP_NOMINAL 0x00000648 +#define MSR_CONFIG_TDP_LEVEL1 0x00000649 +#define MSR_CONFIG_TDP_LEVEL2 0x0000064A +#define MSR_CONFIG_TDP_CONTROL 0x0000064B +#define MSR_TURBO_ACTIVATION_RATIO 0x0000064C + /* Hardware P state interface */ #define MSR_PPERF 0x0000064e #define MSR_PERF_LIMIT_REASONS 0x0000064f diff --git a/arch/x86/include/asm/numachip/numachip.h b/arch/x86/include/asm/numachip/numachip.h index 1c6f7f6212c1..c64373a2d731 100644 --- a/arch/x86/include/asm/numachip/numachip.h +++ b/arch/x86/include/asm/numachip/numachip.h @@ -14,6 +14,7 @@ #ifndef _ASM_X86_NUMACHIP_NUMACHIP_H #define _ASM_X86_NUMACHIP_NUMACHIP_H +extern u8 numachip_system; extern int __init pci_numachip_init(void); #endif /* _ASM_X86_NUMACHIP_NUMACHIP_H */ diff --git a/arch/x86/include/asm/numachip/numachip_csr.h b/arch/x86/include/asm/numachip/numachip_csr.h index 660f843df928..29719eecdc2e 100644 --- a/arch/x86/include/asm/numachip/numachip_csr.h +++ b/arch/x86/include/asm/numachip/numachip_csr.h @@ -14,12 +14,8 @@ #ifndef _ASM_X86_NUMACHIP_NUMACHIP_CSR_H #define _ASM_X86_NUMACHIP_NUMACHIP_CSR_H -#include <linux/numa.h> -#include <linux/percpu.h> +#include <linux/smp.h> #include <linux/io.h> -#include <linux/swab.h> -#include <asm/types.h> -#include <asm/processor.h> #define CSR_NODE_SHIFT 16 #define CSR_NODE_BITS(p) (((unsigned long)(p)) << CSR_NODE_SHIFT) @@ -27,11 +23,8 @@ /* 32K CSR space, b15 indicates geo/non-geo */ #define CSR_OFFSET_MASK 0x7fffUL - -/* Global CSR space covers all 4K possible nodes with 64K CSR space per node */ -#define NUMACHIP_GCSR_BASE 0x3fff00000000ULL -#define NUMACHIP_GCSR_LIM 0x3fff0fffffffULL -#define NUMACHIP_GCSR_SIZE (NUMACHIP_GCSR_LIM - NUMACHIP_GCSR_BASE + 1) +#define CSR_G0_NODE_IDS (0x008 + (0 << 12)) +#define CSR_G3_EXT_IRQ_GEN (0x030 + (3 << 12)) /* * Local CSR space starts in global CSR space with "nodeid" = 0xfff0, however @@ -41,12 +34,7 @@ #define NUMACHIP_LCSR_BASE 0x3ffffe000000ULL #define NUMACHIP_LCSR_LIM 0x3fffffffffffULL #define NUMACHIP_LCSR_SIZE (NUMACHIP_LCSR_LIM - NUMACHIP_LCSR_BASE + 1) - -static inline void *gcsr_address(int node, unsigned long offset) -{ - return __va(NUMACHIP_GCSR_BASE | (1UL << 15) | - CSR_NODE_BITS(node & CSR_NODE_MASK) | (offset & CSR_OFFSET_MASK)); -} +#define NUMACHIP_LAPIC_BITS 8 static inline void *lcsr_address(unsigned long offset) { @@ -54,114 +42,57 @@ static inline void *lcsr_address(unsigned long offset) CSR_NODE_BITS(0xfff0) | (offset & CSR_OFFSET_MASK)); } -static inline unsigned int read_gcsr(int node, unsigned long offset) +static inline unsigned int read_lcsr(unsigned long offset) { - return swab32(readl(gcsr_address(node, offset))); + return swab32(readl(lcsr_address(offset))); } -static inline void write_gcsr(int node, unsigned long offset, unsigned int val) +static inline void write_lcsr(unsigned long offset, unsigned int val) { - writel(swab32(val), gcsr_address(node, offset)); + writel(swab32(val), lcsr_address(offset)); } -static inline unsigned int read_lcsr(unsigned long offset) +/* + * On NumaChip2, local CSR space is 16MB and starts at fixed offset below 4G + */ + +#define NUMACHIP2_LCSR_BASE 0xf0000000UL +#define NUMACHIP2_LCSR_SIZE 0x1000000UL +#define NUMACHIP2_APIC_ICR 0x100000 +#define NUMACHIP2_TIMER_DEADLINE 0x200000 +#define NUMACHIP2_TIMER_INT 0x200008 +#define NUMACHIP2_TIMER_NOW 0x200018 +#define NUMACHIP2_TIMER_RESET 0x200020 + +static inline void __iomem *numachip2_lcsr_address(unsigned long offset) { - return swab32(readl(lcsr_address(offset))); + return (void __iomem *)__va(NUMACHIP2_LCSR_BASE | + (offset & (NUMACHIP2_LCSR_SIZE - 1))); } -static inline void write_lcsr(unsigned long offset, unsigned int val) +static inline u32 numachip2_read32_lcsr(unsigned long offset) { - writel(swab32(val), lcsr_address(offset)); + return readl(numachip2_lcsr_address(offset)); } -/* ========================================================================= */ -/* CSR_G0_STATE_CLEAR */ -/* ========================================================================= */ - -#define CSR_G0_STATE_CLEAR (0x000 + (0 << 12)) -union numachip_csr_g0_state_clear { - unsigned int v; - struct numachip_csr_g0_state_clear_s { - unsigned int _state:2; - unsigned int _rsvd_2_6:5; - unsigned int _lost:1; - unsigned int _rsvd_8_31:24; - } s; -}; - -/* ========================================================================= */ -/* CSR_G0_NODE_IDS */ -/* ========================================================================= */ +static inline u64 numachip2_read64_lcsr(unsigned long offset) +{ + return readq(numachip2_lcsr_address(offset)); +} -#define CSR_G0_NODE_IDS (0x008 + (0 << 12)) -union numachip_csr_g0_node_ids { - unsigned int v; - struct numachip_csr_g0_node_ids_s { - unsigned int _initialid:16; - unsigned int _nodeid:12; - unsigned int _rsvd_28_31:4; - } s; -}; - -/* ========================================================================= */ -/* CSR_G3_EXT_IRQ_GEN */ -/* ========================================================================= */ +static inline void numachip2_write32_lcsr(unsigned long offset, u32 val) +{ + writel(val, numachip2_lcsr_address(offset)); +} -#define CSR_G3_EXT_IRQ_GEN (0x030 + (3 << 12)) -union numachip_csr_g3_ext_irq_gen { - unsigned int v; - struct numachip_csr_g3_ext_irq_gen_s { - unsigned int _vector:8; - unsigned int _msgtype:3; - unsigned int _index:5; - unsigned int _destination_apic_id:16; - } s; -}; - -/* ========================================================================= */ -/* CSR_G3_EXT_IRQ_STATUS */ -/* ========================================================================= */ - -#define CSR_G3_EXT_IRQ_STATUS (0x034 + (3 << 12)) -union numachip_csr_g3_ext_irq_status { - unsigned int v; - struct numachip_csr_g3_ext_irq_status_s { - unsigned int _result:32; - } s; -}; - -/* ========================================================================= */ -/* CSR_G3_EXT_IRQ_DEST */ -/* ========================================================================= */ - -#define CSR_G3_EXT_IRQ_DEST (0x038 + (3 << 12)) -union numachip_csr_g3_ext_irq_dest { - unsigned int v; - struct numachip_csr_g3_ext_irq_dest_s { - unsigned int _irq:8; - unsigned int _rsvd_8_31:24; - } s; -}; - -/* ========================================================================= */ -/* CSR_G3_NC_ATT_MAP_SELECT */ -/* ========================================================================= */ - -#define CSR_G3_NC_ATT_MAP_SELECT (0x7fc + (3 << 12)) -union numachip_csr_g3_nc_att_map_select { - unsigned int v; - struct numachip_csr_g3_nc_att_map_select_s { - unsigned int _upper_address_bits:4; - unsigned int _select_ram:4; - unsigned int _rsvd_8_31:24; - } s; -}; - -/* ========================================================================= */ -/* CSR_G3_NC_ATT_MAP_SELECT_0-255 */ -/* ========================================================================= */ - -#define CSR_G3_NC_ATT_MAP_SELECT_0 (0x800 + (3 << 12)) +static inline void numachip2_write64_lcsr(unsigned long offset, u64 val) +{ + writeq(val, numachip2_lcsr_address(offset)); +} -#endif /* _ASM_X86_NUMACHIP_NUMACHIP_CSR_H */ +static inline unsigned int numachip2_timer(void) +{ + return (smp_processor_id() % 48) << 6; +} +#endif /* _ASM_X86_NUMACHIP_NUMACHIP_CSR_H */ diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index 4edd53b79a81..4928cf0d5af0 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h @@ -26,9 +26,6 @@ #define MCE_STACK 4 #define N_EXCEPTION_STACKS 4 /* hw limit: 7 */ -#define PUD_PAGE_SIZE (_AC(1, UL) << PUD_SHIFT) -#define PUD_PAGE_MASK (~(PUD_PAGE_SIZE-1)) - /* * Set __PAGE_OFFSET to the most negative possible address + * PGDIR_SIZE*16 (pgd slot 272). The gap is to allow a space for a diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h index c7c712f2648b..c5b7fb2774d0 100644 --- a/arch/x86/include/asm/page_types.h +++ b/arch/x86/include/asm/page_types.h @@ -20,6 +20,9 @@ #define PMD_PAGE_SIZE (_AC(1, UL) << PMD_SHIFT) #define PMD_PAGE_MASK (~(PMD_PAGE_SIZE-1)) +#define PUD_PAGE_SIZE (_AC(1, UL) << PUD_SHIFT) +#define PUD_PAGE_MASK (~(PUD_PAGE_SIZE-1)) + #define HPAGE_SHIFT PMD_SHIFT #define HPAGE_SIZE (_AC(1,UL) << HPAGE_SHIFT) #define HPAGE_MASK (~(HPAGE_SIZE - 1)) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 867da5bbb4a3..6ec0c8b2e9df 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -19,6 +19,13 @@ #include <asm/x86_init.h> void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd); +void ptdump_walk_pgd_level_checkwx(void); + +#ifdef CONFIG_DEBUG_WX +#define debug_checkwx() ptdump_walk_pgd_level_checkwx() +#else +#define debug_checkwx() do { } while (0) +#endif /* * ZERO_PAGE is a global shared page that is always zero: used @@ -142,12 +149,12 @@ static inline unsigned long pte_pfn(pte_t pte) static inline unsigned long pmd_pfn(pmd_t pmd) { - return (pmd_val(pmd) & PTE_PFN_MASK) >> PAGE_SHIFT; + return (pmd_val(pmd) & pmd_pfn_mask(pmd)) >> PAGE_SHIFT; } static inline unsigned long pud_pfn(pud_t pud) { - return (pud_val(pud) & PTE_PFN_MASK) >> PAGE_SHIFT; + return (pud_val(pud) & pud_pfn_mask(pud)) >> PAGE_SHIFT; } #define pte_page(pte) pfn_to_page(pte_pfn(pte)) @@ -318,6 +325,16 @@ static inline pmd_t pmd_mksoft_dirty(pmd_t pmd) return pmd_set_flags(pmd, _PAGE_SOFT_DIRTY); } +static inline pte_t pte_clear_soft_dirty(pte_t pte) +{ + return pte_clear_flags(pte, _PAGE_SOFT_DIRTY); +} + +static inline pmd_t pmd_clear_soft_dirty(pmd_t pmd) +{ + return pmd_clear_flags(pmd, _PAGE_SOFT_DIRTY); +} + #endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */ /* @@ -379,7 +396,9 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) return __pgprot(preservebits | addbits); } -#define pte_pgprot(x) __pgprot(pte_flags(x) & PTE_FLAGS_MASK) +#define pte_pgprot(x) __pgprot(pte_flags(x)) +#define pmd_pgprot(x) __pgprot(pmd_flags(x)) +#define pud_pgprot(x) __pgprot(pud_flags(x)) #define canon_pgprot(p) __pgprot(massage_pgprot(p)) @@ -502,14 +521,15 @@ static inline int pmd_none(pmd_t pmd) static inline unsigned long pmd_page_vaddr(pmd_t pmd) { - return (unsigned long)__va(pmd_val(pmd) & PTE_PFN_MASK); + return (unsigned long)__va(pmd_val(pmd) & pmd_pfn_mask(pmd)); } /* * Currently stuck as a macro due to indirect forward reference to * linux/mmzone.h's __section_mem_map_addr() definition: */ -#define pmd_page(pmd) pfn_to_page((pmd_val(pmd) & PTE_PFN_MASK) >> PAGE_SHIFT) +#define pmd_page(pmd) \ + pfn_to_page((pmd_val(pmd) & pmd_pfn_mask(pmd)) >> PAGE_SHIFT) /* * the pmd page can be thought of an array like this: pmd_t[PTRS_PER_PMD] @@ -570,14 +590,15 @@ static inline int pud_present(pud_t pud) static inline unsigned long pud_page_vaddr(pud_t pud) { - return (unsigned long)__va((unsigned long)pud_val(pud) & PTE_PFN_MASK); + return (unsigned long)__va(pud_val(pud) & pud_pfn_mask(pud)); } /* * Currently stuck as a macro due to indirect forward reference to * linux/mmzone.h's __section_mem_map_addr() definition: */ -#define pud_page(pud) pfn_to_page(pud_val(pud) >> PAGE_SHIFT) +#define pud_page(pud) \ + pfn_to_page((pud_val(pud) & pud_pfn_mask(pud)) >> PAGE_SHIFT) /* Find an entry in the second-level page table.. */ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 13f310bfc09a..dd5b0aa9dd2f 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -209,10 +209,10 @@ enum page_cache_mode { #include <linux/types.h> -/* PTE_PFN_MASK extracts the PFN from a (pte|pmd|pud|pgd)val_t */ +/* Extracts the PFN from a (pte|pmd|pud|pgd)val_t of a 4KB page */ #define PTE_PFN_MASK ((pteval_t)PHYSICAL_PAGE_MASK) -/* PTE_FLAGS_MASK extracts the flags from a (pte|pmd|pud|pgd)val_t */ +/* Extracts the flags from a (pte|pmd|pud|pgd)val_t of a 4KB page */ #define PTE_FLAGS_MASK (~PTE_PFN_MASK) typedef struct pgprot { pgprotval_t pgprot; } pgprot_t; @@ -276,14 +276,46 @@ static inline pmdval_t native_pmd_val(pmd_t pmd) } #endif +static inline pudval_t pud_pfn_mask(pud_t pud) +{ + if (native_pud_val(pud) & _PAGE_PSE) + return PUD_PAGE_MASK & PHYSICAL_PAGE_MASK; + else + return PTE_PFN_MASK; +} + +static inline pudval_t pud_flags_mask(pud_t pud) +{ + if (native_pud_val(pud) & _PAGE_PSE) + return ~(PUD_PAGE_MASK & (pudval_t)PHYSICAL_PAGE_MASK); + else + return ~PTE_PFN_MASK; +} + static inline pudval_t pud_flags(pud_t pud) { - return native_pud_val(pud) & PTE_FLAGS_MASK; + return native_pud_val(pud) & pud_flags_mask(pud); +} + +static inline pmdval_t pmd_pfn_mask(pmd_t pmd) +{ + if (native_pmd_val(pmd) & _PAGE_PSE) + return PMD_PAGE_MASK & PHYSICAL_PAGE_MASK; + else + return PTE_PFN_MASK; +} + +static inline pmdval_t pmd_flags_mask(pmd_t pmd) +{ + if (native_pmd_val(pmd) & _PAGE_PSE) + return ~(PMD_PAGE_MASK & (pmdval_t)PHYSICAL_PAGE_MASK); + else + return ~PTE_PFN_MASK; } static inline pmdval_t pmd_flags(pmd_t pmd) { - return native_pmd_val(pmd) & PTE_FLAGS_MASK; + return native_pmd_val(pmd) & pmd_flags_mask(pmd); } static inline pte_t native_make_pte(pteval_t val) diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h index b12f81022a6b..01bcde84d3e4 100644 --- a/arch/x86/include/asm/preempt.h +++ b/arch/x86/include/asm/preempt.h @@ -30,12 +30,9 @@ static __always_inline void preempt_count_set(int pc) /* * must be macros to avoid header recursion hell */ -#define init_task_preempt_count(p) do { \ - task_thread_info(p)->saved_preempt_count = PREEMPT_DISABLED; \ -} while (0) +#define init_task_preempt_count(p) do { } while (0) #define init_idle_preempt_count(p, cpu) do { \ - task_thread_info(p)->saved_preempt_count = PREEMPT_ENABLED; \ per_cpu(__preempt_count, (cpu)) = PREEMPT_ENABLED; \ } while (0) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 19577dd325fa..67522256c7ff 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -11,7 +11,7 @@ struct vm86; #include <asm/math_emu.h> #include <asm/segment.h> #include <asm/types.h> -#include <asm/sigcontext.h> +#include <uapi/asm/sigcontext.h> #include <asm/current.h> #include <asm/cpufeature.h> #include <asm/page.h> @@ -556,12 +556,12 @@ static inline unsigned int cpuid_edx(unsigned int op) } /* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */ -static inline void rep_nop(void) +static __always_inline void rep_nop(void) { asm volatile("rep; nop" ::: "memory"); } -static inline void cpu_relax(void) +static __always_inline void cpu_relax(void) { rep_nop(); } diff --git a/arch/x86/include/asm/pvclock-abi.h b/arch/x86/include/asm/pvclock-abi.h index 655e07a48f6c..67f08230103a 100644 --- a/arch/x86/include/asm/pvclock-abi.h +++ b/arch/x86/include/asm/pvclock-abi.h @@ -41,6 +41,7 @@ struct pvclock_wall_clock { #define PVCLOCK_TSC_STABLE_BIT (1 << 0) #define PVCLOCK_GUEST_STOPPED (1 << 1) +/* PVCLOCK_COUNTS_FROM_ZERO broke ABI and can't be used anymore. */ #define PVCLOCK_COUNTS_FROM_ZERO (1 << 2) #endif /* __ASSEMBLY__ */ #endif /* _ASM_X86_PVCLOCK_ABI_H */ diff --git a/arch/x86/include/asm/sigcontext.h b/arch/x86/include/asm/sigcontext.h index 9dfce4e0417d..e6cd2c489dbb 100644 --- a/arch/x86/include/asm/sigcontext.h +++ b/arch/x86/include/asm/sigcontext.h @@ -1,79 +1,8 @@ #ifndef _ASM_X86_SIGCONTEXT_H #define _ASM_X86_SIGCONTEXT_H -#include <uapi/asm/sigcontext.h> - -#ifdef __i386__ -struct sigcontext { - unsigned short gs, __gsh; - unsigned short fs, __fsh; - unsigned short es, __esh; - unsigned short ds, __dsh; - unsigned long di; - unsigned long si; - unsigned long bp; - unsigned long sp; - unsigned long bx; - unsigned long dx; - unsigned long cx; - unsigned long ax; - unsigned long trapno; - unsigned long err; - unsigned long ip; - unsigned short cs, __csh; - unsigned long flags; - unsigned long sp_at_signal; - unsigned short ss, __ssh; +/* This is a legacy header - all kernel code includes <uapi/asm/sigcontext.h> directly. */ - /* - * fpstate is really (struct _fpstate *) or (struct _xstate *) - * depending on the FP_XSTATE_MAGIC1 encoded in the SW reserved - * bytes of (struct _fpstate) and FP_XSTATE_MAGIC2 present at the end - * of extended memory layout. See comments at the definition of - * (struct _fpx_sw_bytes) - */ - void __user *fpstate; /* zero when no FPU/extended context */ - unsigned long oldmask; - unsigned long cr2; -}; -#else /* __i386__ */ -struct sigcontext { - unsigned long r8; - unsigned long r9; - unsigned long r10; - unsigned long r11; - unsigned long r12; - unsigned long r13; - unsigned long r14; - unsigned long r15; - unsigned long di; - unsigned long si; - unsigned long bp; - unsigned long bx; - unsigned long dx; - unsigned long ax; - unsigned long cx; - unsigned long sp; - unsigned long ip; - unsigned long flags; - unsigned short cs; - unsigned short gs; - unsigned short fs; - unsigned short __pad0; - unsigned long err; - unsigned long trapno; - unsigned long oldmask; - unsigned long cr2; +#include <uapi/asm/sigcontext.h> - /* - * fpstate is really (struct _fpstate *) or (struct _xstate *) - * depending on the FP_XSTATE_MAGIC1 encoded in the SW reserved - * bytes of (struct _fpstate) and FP_XSTATE_MAGIC2 present at the end - * of extended memory layout. See comments at the definition of - * (struct _fpx_sw_bytes) - */ - void __user *fpstate; /* zero when no FPU/extended context */ - unsigned long reserved1[8]; -}; -#endif /* !__i386__ */ #endif /* _ASM_X86_SIGCONTEXT_H */ diff --git a/arch/x86/include/asm/sigframe.h b/arch/x86/include/asm/sigframe.h index 1f3175bb994e..34edd1650bae 100644 --- a/arch/x86/include/asm/sigframe.h +++ b/arch/x86/include/asm/sigframe.h @@ -1,7 +1,7 @@ #ifndef _ASM_X86_SIGFRAME_H #define _ASM_X86_SIGFRAME_H -#include <asm/sigcontext.h> +#include <uapi/asm/sigcontext.h> #include <asm/siginfo.h> #include <asm/ucontext.h> #include <linux/compat.h> @@ -9,8 +9,6 @@ #ifdef CONFIG_X86_32 #define sigframe_ia32 sigframe #define rt_sigframe_ia32 rt_sigframe -#define sigcontext_ia32 sigcontext -#define _fpstate_ia32 _fpstate #define ucontext_ia32 ucontext #else /* !CONFIG_X86_32 */ @@ -24,7 +22,7 @@ struct sigframe_ia32 { u32 pretcode; int sig; - struct sigcontext_ia32 sc; + struct sigcontext_32 sc; /* * fpstate is unused. fpstate is moved/allocated after * retcode[] below. This movement allows to have the FP state and the @@ -33,7 +31,7 @@ struct sigframe_ia32 { * the offset of extramask[] in the sigframe and thus prevent any * legacy application accessing/modifying it. */ - struct _fpstate_ia32 fpstate_unused; + struct _fpstate_32 fpstate_unused; #ifdef CONFIG_IA32_EMULATION unsigned int extramask[_COMPAT_NSIG_WORDS-1]; #else /* !CONFIG_IA32_EMULATION */ diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h index c481be78fcf1..2138c9ae19ee 100644 --- a/arch/x86/include/asm/signal.h +++ b/arch/x86/include/asm/signal.h @@ -34,7 +34,7 @@ extern void do_signal(struct pt_regs *regs); #define __ARCH_HAS_SA_RESTORER -#include <asm/sigcontext.h> +#include <uapi/asm/sigcontext.h> #ifdef __i386__ diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h index e4661196994e..ff8b9a17dc4b 100644 --- a/arch/x86/include/asm/string_64.h +++ b/arch/x86/include/asm/string_64.h @@ -27,12 +27,11 @@ static __always_inline void *__inline_memcpy(void *to, const void *from, size_t function. */ #define __HAVE_ARCH_MEMCPY 1 +extern void *memcpy(void *to, const void *from, size_t len); extern void *__memcpy(void *to, const void *from, size_t len); #ifndef CONFIG_KMEMCHECK -#if (__GNUC__ == 4 && __GNUC_MINOR__ >= 3) || __GNUC__ > 4 -extern void *memcpy(void *to, const void *from, size_t len); -#else +#if (__GNUC__ == 4 && __GNUC_MINOR__ < 3) || __GNUC__ < 4 #define memcpy(dst, src, len) \ ({ \ size_t __len = (len); \ diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h index d7f3b3b78ac3..751bf4b7bf11 100644 --- a/arch/x86/include/asm/switch_to.h +++ b/arch/x86/include/asm/switch_to.h @@ -79,12 +79,12 @@ do { \ #else /* CONFIG_X86_32 */ /* frame pointer must be last for get_wchan */ -#define SAVE_CONTEXT "pushf ; pushq %%rbp ; movq %%rsi,%%rbp\n\t" -#define RESTORE_CONTEXT "movq %%rbp,%%rsi ; popq %%rbp ; popf\t" +#define SAVE_CONTEXT "pushq %%rbp ; movq %%rsi,%%rbp\n\t" +#define RESTORE_CONTEXT "movq %%rbp,%%rsi ; popq %%rbp\t" #define __EXTRA_CLOBBER \ , "rcx", "rbx", "rdx", "r8", "r9", "r10", "r11", \ - "r12", "r13", "r14", "r15" + "r12", "r13", "r14", "r15", "flags" #ifdef CONFIG_CC_STACKPROTECTOR #define __switch_canary \ @@ -100,7 +100,11 @@ do { \ #define __switch_canary_iparam #endif /* CC_STACKPROTECTOR */ -/* Save restore flags to clear handle leaking NT */ +/* + * There is no need to save or restore flags, because flags are always + * clean in kernel mode, with the possible exception of IOPL. Kernel IOPL + * has no effect. + */ #define switch_to(prev, next, last) \ asm volatile(SAVE_CONTEXT \ "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h index d6a756ae04c8..999b7cd2e78c 100644 --- a/arch/x86/include/asm/syscall.h +++ b/arch/x86/include/asm/syscall.h @@ -20,9 +20,21 @@ #include <asm/thread_info.h> /* for TS_COMPAT */ #include <asm/unistd.h> -typedef void (*sys_call_ptr_t)(void); +typedef asmlinkage long (*sys_call_ptr_t)(unsigned long, unsigned long, + unsigned long, unsigned long, + unsigned long, unsigned long); extern const sys_call_ptr_t sys_call_table[]; +#if defined(CONFIG_X86_32) +#define ia32_sys_call_table sys_call_table +#define __NR_syscall_compat_max __NR_syscall_max +#define IA32_NR_syscalls NR_syscalls +#endif + +#if defined(CONFIG_IA32_EMULATION) +extern const sys_call_ptr_t ia32_sys_call_table[]; +#endif + /* * Only the low 32 bits of orig_ax are meaningful, so we return int. * This importantly ignores the high bits on 64-bit, so comparisons diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 8afdc3e44247..c7b551028740 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -57,9 +57,7 @@ struct thread_info { __u32 flags; /* low level flags */ __u32 status; /* thread synchronous flags */ __u32 cpu; /* current CPU */ - int saved_preempt_count; mm_segment_t addr_limit; - void __user *sysenter_return; unsigned int sig_on_uaccess_error:1; unsigned int uaccess_err:1; /* uaccess failed */ }; @@ -69,7 +67,6 @@ struct thread_info { .task = &tsk, \ .flags = 0, \ .cpu = 0, \ - .saved_preempt_count = INIT_PREEMPT_COUNT, \ .addr_limit = KERNEL_DS, \ } diff --git a/arch/x86/include/asm/trace/mpx.h b/arch/x86/include/asm/trace/mpx.h index 173dd3ba108c..0f492fc50bce 100644 --- a/arch/x86/include/asm/trace/mpx.h +++ b/arch/x86/include/asm/trace/mpx.h @@ -11,7 +11,7 @@ TRACE_EVENT(mpx_bounds_register_exception, TP_PROTO(void *addr_referenced, - const struct bndreg *bndreg), + const struct mpx_bndreg *bndreg), TP_ARGS(addr_referenced, bndreg), TP_STRUCT__entry( @@ -44,7 +44,7 @@ TRACE_EVENT(mpx_bounds_register_exception, TRACE_EVENT(bounds_exception_mpx, - TP_PROTO(const struct bndcsr *bndcsr), + TP_PROTO(const struct mpx_bndcsr *bndcsr), TP_ARGS(bndcsr), TP_STRUCT__entry( @@ -116,7 +116,8 @@ TRACE_EVENT(mpx_new_bounds_table, /* * This gets used outside of MPX-specific code, so we need a stub. */ -static inline void trace_bounds_exception_mpx(const struct bndcsr *bndcsr) +static inline +void trace_bounds_exception_mpx(const struct mpx_bndcsr *bndcsr) { } diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index a8df874f3e88..09b1b0ab94b7 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -51,13 +51,13 @@ static inline bool __chk_range_not_ok(unsigned long addr, unsigned long size, un * limit, not add it to the address). */ if (__builtin_constant_p(size)) - return addr > limit - size; + return unlikely(addr > limit - size); /* Arbitrary sizes? Be careful about overflow */ addr += size; - if (addr < size) + if (unlikely(addr < size)) return true; - return addr > limit; + return unlikely(addr > limit); } #define __range_not_ok(addr, size, limit) \ @@ -182,7 +182,7 @@ __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL)) : "=a" (__ret_gu), "=r" (__val_gu) \ : "0" (ptr), "i" (sizeof(*(ptr)))); \ (x) = (__force __typeof__(*(ptr))) __val_gu; \ - __ret_gu; \ + __builtin_expect(__ret_gu, 0); \ }) #define __put_user_x(size, x, ptr, __ret_pu) \ @@ -278,7 +278,7 @@ extern void __put_user_8(void); __put_user_x(X, __pu_val, ptr, __ret_pu); \ break; \ } \ - __ret_pu; \ + __builtin_expect(__ret_pu, 0); \ }) #define __put_user_size(x, ptr, size, retval, errret) \ @@ -401,7 +401,7 @@ do { \ ({ \ int __pu_err; \ __put_user_size((x), (ptr), (size), __pu_err, -EFAULT); \ - __pu_err; \ + __builtin_expect(__pu_err, 0); \ }) #define __get_user_nocheck(x, ptr, size) \ @@ -410,7 +410,7 @@ do { \ unsigned long __gu_val; \ __get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT); \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ - __gu_err; \ + __builtin_expect(__gu_err, 0); \ }) /* FIXME: this hack is definitely wrong -AK */ diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index a00ad8f2a657..ea7074784cc4 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h @@ -609,7 +609,7 @@ struct uv_cpu_nmi_s { DECLARE_PER_CPU(struct uv_cpu_nmi_s, uv_cpu_nmi); -#define uv_hub_nmi (uv_cpu_nmi.hub) +#define uv_hub_nmi this_cpu_read(uv_cpu_nmi.hub) #define uv_cpu_nmi_per(cpu) (per_cpu(uv_cpu_nmi, cpu)) #define uv_hub_nmi_per(cpu) (uv_cpu_nmi_per(cpu).hub) diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h index 8021bd28c0f1..756de9190aec 100644 --- a/arch/x86/include/asm/vdso.h +++ b/arch/x86/include/asm/vdso.h @@ -26,7 +26,7 @@ struct vdso_image { long sym___kernel_sigreturn; long sym___kernel_rt_sigreturn; long sym___kernel_vsyscall; - long sym_VDSO32_SYSENTER_RETURN; + long sym_int80_landing_pad; }; #ifdef CONFIG_X86_64 @@ -38,13 +38,7 @@ extern const struct vdso_image vdso_image_x32; #endif #if defined CONFIG_X86_32 || defined CONFIG_COMPAT -extern const struct vdso_image vdso_image_32_int80; -#ifdef CONFIG_COMPAT -extern const struct vdso_image vdso_image_32_syscall; -#endif -extern const struct vdso_image vdso_image_32_sysenter; - -extern const struct vdso_image *selected_vdso32; +extern const struct vdso_image vdso_image_32; #endif extern void __init init_vdso_image(const struct vdso_image *image); diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 448b7ca61aee..aa336ff3e03e 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -72,7 +72,7 @@ #define SECONDARY_EXEC_SHADOW_VMCS 0x00004000 #define SECONDARY_EXEC_ENABLE_PML 0x00020000 #define SECONDARY_EXEC_XSAVES 0x00100000 - +#define SECONDARY_EXEC_PCOMMIT 0x00200000 #define PIN_BASED_EXT_INTR_MASK 0x00000001 #define PIN_BASED_NMI_EXITING 0x00000008 @@ -416,6 +416,7 @@ enum vmcs_field { #define VMX_EPT_EXTENT_CONTEXT_BIT (1ull << 25) #define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26) +#define VMX_VPID_INVVPID_BIT (1ull << 0) /* (32 - 32) */ #define VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT (1ull << 9) /* (41 - 32) */ #define VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT (1ull << 10) /* (42 - 32) */ diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index 83aea8055119..4c20dd333412 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h @@ -336,10 +336,10 @@ HYPERVISOR_update_descriptor(u64 ma, u64 desc) return _hypercall4(int, update_descriptor, ma, ma>>32, desc, desc>>32); } -static inline int +static inline long HYPERVISOR_memory_op(unsigned int cmd, void *arg) { - return _hypercall2(int, memory_op, cmd, arg); + return _hypercall2(long, memory_op, cmd, arg); } static inline int diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h index d866959e5685..8b2d4bea9962 100644 --- a/arch/x86/include/asm/xen/hypervisor.h +++ b/arch/x86/include/asm/xen/hypervisor.h @@ -57,4 +57,9 @@ static inline bool xen_x2apic_para_available(void) } #endif +#ifdef CONFIG_HOTPLUG_CPU +void xen_arch_register_cpu(int num); +void xen_arch_unregister_cpu(int num); +#endif + #endif /* _ASM_X86_XEN_HYPERVISOR_H */ diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index 0679e11d2cf7..f5fb840b43e8 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -12,7 +12,7 @@ #include <asm/pgtable.h> #include <xen/interface/xen.h> -#include <xen/grant_table.h> +#include <xen/interface/grant_table.h> #include <xen/features.h> /* Xen machine address */ @@ -43,6 +43,8 @@ extern unsigned long *xen_p2m_addr; extern unsigned long xen_p2m_size; extern unsigned long xen_max_p2m_pfn; +extern int xen_alloc_p2m_entry(unsigned long pfn); + extern unsigned long get_phys_to_machine(unsigned long pfn); extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn); extern bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn); @@ -296,8 +298,8 @@ void make_lowmem_page_readwrite(void *vaddr); #define xen_unmap(cookie) iounmap((cookie)) static inline bool xen_arch_need_swiotlb(struct device *dev, - unsigned long pfn, - unsigned long bfn) + phys_addr_t phys, + dma_addr_t dev_addr) { return false; } diff --git a/arch/x86/include/uapi/asm/bitsperlong.h b/arch/x86/include/uapi/asm/bitsperlong.h index b0ae1c4dc791..217909b4d6f5 100644 --- a/arch/x86/include/uapi/asm/bitsperlong.h +++ b/arch/x86/include/uapi/asm/bitsperlong.h @@ -1,7 +1,7 @@ #ifndef __ASM_X86_BITSPERLONG_H #define __ASM_X86_BITSPERLONG_H -#ifdef __x86_64__ +#if defined(__x86_64__) && !defined(__ILP32__) # define __BITS_PER_LONG 64 #else # define __BITS_PER_LONG 32 diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h index f0412c50c47b..040d4083c24f 100644 --- a/arch/x86/include/uapi/asm/hyperv.h +++ b/arch/x86/include/uapi/asm/hyperv.h @@ -153,6 +153,12 @@ /* MSR used to provide vcpu index */ #define HV_X64_MSR_VP_INDEX 0x40000002 +/* MSR used to reset the guest OS. */ +#define HV_X64_MSR_RESET 0x40000003 + +/* MSR used to provide vcpu runtime in 100ns units */ +#define HV_X64_MSR_VP_RUNTIME 0x40000010 + /* MSR used to read the per-partition time reference counter */ #define HV_X64_MSR_TIME_REF_COUNT 0x40000020 @@ -251,4 +257,16 @@ typedef struct _HV_REFERENCE_TSC_PAGE { __s64 tsc_offset; } HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE; +/* Define the number of synthetic interrupt sources. */ +#define HV_SYNIC_SINT_COUNT (16) +/* Define the expected SynIC version. */ +#define HV_SYNIC_VERSION_1 (0x1) + +#define HV_SYNIC_CONTROL_ENABLE (1ULL << 0) +#define HV_SYNIC_SIMP_ENABLE (1ULL << 0) +#define HV_SYNIC_SIEFP_ENABLE (1ULL << 0) +#define HV_SYNIC_SINT_MASKED (1ULL << 16) +#define HV_SYNIC_SINT_AUTO_EOI (1ULL << 17) +#define HV_SYNIC_SINT_VECTOR_MASK (0xFF) + #endif diff --git a/arch/x86/include/uapi/asm/mce.h b/arch/x86/include/uapi/asm/mce.h index 76880ede9a35..03429da2fa80 100644 --- a/arch/x86/include/uapi/asm/mce.h +++ b/arch/x86/include/uapi/asm/mce.h @@ -2,7 +2,7 @@ #define _UAPI_ASM_X86_MCE_H #include <linux/types.h> -#include <asm/ioctls.h> +#include <linux/ioctl.h> /* Fields are zero when not available */ struct mce { diff --git a/arch/x86/include/uapi/asm/sigcontext.h b/arch/x86/include/uapi/asm/sigcontext.h index 40836a9a7250..d485232f1e9f 100644 --- a/arch/x86/include/uapi/asm/sigcontext.h +++ b/arch/x86/include/uapi/asm/sigcontext.h @@ -1,221 +1,360 @@ #ifndef _UAPI_ASM_X86_SIGCONTEXT_H #define _UAPI_ASM_X86_SIGCONTEXT_H +/* + * Linux signal context definitions. The sigcontext includes a complex + * hierarchy of CPU and FPU state, available to user-space (on the stack) when + * a signal handler is executed. + * + * As over the years this ABI grew from its very simple roots towards + * supporting more and more CPU state organically, some of the details (which + * were rather clever hacks back in the days) became a bit quirky by today. + * + * The current ABI includes flexible provisions for future extensions, so we + * won't have to grow new quirks for quite some time. Promise! + */ + #include <linux/compiler.h> #include <linux/types.h> -#define FP_XSTATE_MAGIC1 0x46505853U -#define FP_XSTATE_MAGIC2 0x46505845U -#define FP_XSTATE_MAGIC2_SIZE sizeof(FP_XSTATE_MAGIC2) +#define FP_XSTATE_MAGIC1 0x46505853U +#define FP_XSTATE_MAGIC2 0x46505845U +#define FP_XSTATE_MAGIC2_SIZE sizeof(FP_XSTATE_MAGIC2) /* - * bytes 464..511 in the current 512byte layout of fxsave/fxrstor frame - * are reserved for SW usage. On cpu's supporting xsave/xrstor, these bytes - * are used to extended the fpstate pointer in the sigcontext, which now - * includes the extended state information along with fpstate information. + * Bytes 464..511 in the current 512-byte layout of the FXSAVE/FXRSTOR frame + * are reserved for SW usage. On CPUs supporting XSAVE/XRSTOR, these bytes are + * used to extend the fpstate pointer in the sigcontext, which now includes the + * extended state information along with fpstate information. + * + * If sw_reserved.magic1 == FP_XSTATE_MAGIC1 then there's a + * sw_reserved.extended_size bytes large extended context area present. (The + * last 32-bit word of this extended area (at the + * fpstate+extended_size-FP_XSTATE_MAGIC2_SIZE address) is set to + * FP_XSTATE_MAGIC2 so that you can sanity check your size calculations.) * - * Presence of FP_XSTATE_MAGIC1 at the beginning of this SW reserved - * area and FP_XSTATE_MAGIC2 at the end of memory layout - * (extended_size - FP_XSTATE_MAGIC2_SIZE) indicates the presence of the - * extended state information in the memory layout pointed by the fpstate - * pointer in sigcontext. + * This extended area typically grows with newer CPUs that have larger and + * larger XSAVE areas. */ struct _fpx_sw_bytes { - __u32 magic1; /* FP_XSTATE_MAGIC1 */ - __u32 extended_size; /* total size of the layout referred by - * fpstate pointer in the sigcontext. - */ - __u64 xfeatures; - /* feature bit mask (including fp/sse/extended - * state) that is present in the memory - * layout. - */ - __u32 xstate_size; /* actual xsave state size, based on the - * features saved in the layout. - * 'extended_size' will be greater than - * 'xstate_size'. - */ - __u32 padding[7]; /* for future use. */ + /* + * If set to FP_XSTATE_MAGIC1 then this is an xstate context. + * 0 if a legacy frame. + */ + __u32 magic1; + + /* + * Total size of the fpstate area: + * + * - if magic1 == 0 then it's sizeof(struct _fpstate) + * - if magic1 == FP_XSTATE_MAGIC1 then it's sizeof(struct _xstate) + * plus extensions (if any) + */ + __u32 extended_size; + + /* + * Feature bit mask (including FP/SSE/extended state) that is present + * in the memory layout: + */ + __u64 xfeatures; + + /* + * Actual XSAVE state size, based on the xfeatures saved in the layout. + * 'extended_size' is greater than 'xstate_size': + */ + __u32 xstate_size; + + /* For future use: */ + __u32 padding[7]; }; -#ifdef __i386__ /* - * As documented in the iBCS2 standard.. - * - * The first part of "struct _fpstate" is just the normal i387 - * hardware setup, the extra "status" word is used to save the - * coprocessor status word before entering the handler. + * As documented in the iBCS2 standard: * - * Pentium III FXSR, SSE support - * Gareth Hughes <gareth@valinux.com>, May 2000 + * The first part of "struct _fpstate" is just the normal i387 hardware setup, + * the extra "status" word is used to save the coprocessor status word before + * entering the handler. * - * The FPU state data structure has had to grow to accommodate the - * extended FPU state required by the Streaming SIMD Extensions. - * There is no documented standard to accomplish this at the moment. + * The FPU state data structure has had to grow to accommodate the extended FPU + * state required by the Streaming SIMD Extensions. There is no documented + * standard to accomplish this at the moment. */ + +/* 10-byte legacy floating point register: */ struct _fpreg { - unsigned short significand[4]; - unsigned short exponent; + __u16 significand[4]; + __u16 exponent; }; +/* 16-byte floating point register: */ struct _fpxreg { - unsigned short significand[4]; - unsigned short exponent; - unsigned short padding[3]; + __u16 significand[4]; + __u16 exponent; + __u16 padding[3]; }; +/* 16-byte XMM register: */ struct _xmmreg { - unsigned long element[4]; + __u32 element[4]; }; -struct _fpstate { - /* Regular FPU environment */ - unsigned long cw; - unsigned long sw; - unsigned long tag; - unsigned long ipoff; - unsigned long cssel; - unsigned long dataoff; - unsigned long datasel; - struct _fpreg _st[8]; - unsigned short status; - unsigned short magic; /* 0xffff = regular FPU data only */ +#define X86_FXSR_MAGIC 0x0000 + +/* + * The 32-bit FPU frame: + */ +struct _fpstate_32 { + /* Legacy FPU environment: */ + __u32 cw; + __u32 sw; + __u32 tag; + __u32 ipoff; + __u32 cssel; + __u32 dataoff; + __u32 datasel; + struct _fpreg _st[8]; + __u16 status; + __u16 magic; /* 0xffff: regular FPU data only */ + /* 0x0000: FXSR FPU data */ /* FXSR FPU environment */ - unsigned long _fxsr_env[6]; /* FXSR FPU env is ignored */ - unsigned long mxcsr; - unsigned long reserved; - struct _fpxreg _fxsr_st[8]; /* FXSR FPU reg data is ignored */ - struct _xmmreg _xmm[8]; - unsigned long padding1[44]; + __u32 _fxsr_env[6]; /* FXSR FPU env is ignored */ + __u32 mxcsr; + __u32 reserved; + struct _fpxreg _fxsr_st[8]; /* FXSR FPU reg data is ignored */ + struct _xmmreg _xmm[8]; /* First 8 XMM registers */ + union { + __u32 padding1[44]; /* Second 8 XMM registers plus padding */ + __u32 padding[44]; /* Alias name for old user-space */ + }; union { - unsigned long padding2[12]; - struct _fpx_sw_bytes sw_reserved; /* represents the extended - * state info */ + __u32 padding2[12]; + struct _fpx_sw_bytes sw_reserved; /* Potential extended state is encoded here */ }; }; -#define X86_FXSR_MAGIC 0x0000 - -#ifndef __KERNEL__ /* - * User-space might still rely on the old definition: + * The 64-bit FPU frame. (FXSAVE format and later) + * + * Note1: If sw_reserved.magic1 == FP_XSTATE_MAGIC1 then the structure is + * larger: 'struct _xstate'. Note that 'struct _xstate' embedds + * 'struct _fpstate' so that you can always assume the _fpstate portion + * exists so that you can check the magic value. + * + * Note2: Reserved fields may someday contain valuable data. Always + * save/restore them when you change signal frames. */ -struct sigcontext { - unsigned short gs, __gsh; - unsigned short fs, __fsh; - unsigned short es, __esh; - unsigned short ds, __dsh; - unsigned long edi; - unsigned long esi; - unsigned long ebp; - unsigned long esp; - unsigned long ebx; - unsigned long edx; - unsigned long ecx; - unsigned long eax; - unsigned long trapno; - unsigned long err; - unsigned long eip; - unsigned short cs, __csh; - unsigned long eflags; - unsigned long esp_at_signal; - unsigned short ss, __ssh; - struct _fpstate __user *fpstate; - unsigned long oldmask; - unsigned long cr2; -}; -#endif /* !__KERNEL__ */ - -#else /* __i386__ */ - -/* FXSAVE frame */ -/* Note: reserved1/2 may someday contain valuable data. Always save/restore - them when you change signal frames. */ -struct _fpstate { - __u16 cwd; - __u16 swd; - __u16 twd; /* Note this is not the same as the - 32bit/x87/FSAVE twd */ - __u16 fop; - __u64 rip; - __u64 rdp; - __u32 mxcsr; - __u32 mxcsr_mask; - __u32 st_space[32]; /* 8*16 bytes for each FP-reg */ - __u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg */ - __u32 reserved2[12]; +struct _fpstate_64 { + __u16 cwd; + __u16 swd; + /* Note this is not the same as the 32-bit/x87/FSAVE twd: */ + __u16 twd; + __u16 fop; + __u64 rip; + __u64 rdp; + __u32 mxcsr; + __u32 mxcsr_mask; + __u32 st_space[32]; /* 8x FP registers, 16 bytes each */ + __u32 xmm_space[64]; /* 16x XMM registers, 16 bytes each */ + __u32 reserved2[12]; union { - __u32 reserved3[12]; - struct _fpx_sw_bytes sw_reserved; /* represents the extended - * state information */ + __u32 reserved3[12]; + struct _fpx_sw_bytes sw_reserved; /* Potential extended state is encoded here */ }; }; -#ifndef __KERNEL__ -/* - * User-space might still rely on the old definition: - */ -struct sigcontext { - __u64 r8; - __u64 r9; - __u64 r10; - __u64 r11; - __u64 r12; - __u64 r13; - __u64 r14; - __u64 r15; - __u64 rdi; - __u64 rsi; - __u64 rbp; - __u64 rbx; - __u64 rdx; - __u64 rax; - __u64 rcx; - __u64 rsp; - __u64 rip; - __u64 eflags; /* RFLAGS */ - __u16 cs; - __u16 gs; - __u16 fs; - __u16 __pad0; - __u64 err; - __u64 trapno; - __u64 oldmask; - __u64 cr2; - struct _fpstate __user *fpstate; /* zero when no FPU context */ -#ifdef __ILP32__ - __u32 __fpstate_pad; +#ifdef __i386__ +# define _fpstate _fpstate_32 +#else +# define _fpstate _fpstate_64 #endif - __u64 reserved1[8]; -}; -#endif /* !__KERNEL__ */ - -#endif /* !__i386__ */ struct _header { - __u64 xfeatures; - __u64 reserved1[2]; - __u64 reserved2[5]; + __u64 xfeatures; + __u64 reserved1[2]; + __u64 reserved2[5]; }; struct _ymmh_state { - /* 16 * 16 bytes for each YMMH-reg */ - __u32 ymmh_space[64]; + /* 16x YMM registers, 16 bytes each: */ + __u32 ymmh_space[64]; }; /* - * Extended state pointed by the fpstate pointer in the sigcontext. - * In addition to the fpstate, information encoded in the xstate_hdr - * indicates the presence of other extended state information - * supported by the processor and OS. + * Extended state pointed to by sigcontext::fpstate. + * + * In addition to the fpstate, information encoded in _xstate::xstate_hdr + * indicates the presence of other extended state information supported + * by the CPU and kernel: */ struct _xstate { - struct _fpstate fpstate; - struct _header xstate_hdr; - struct _ymmh_state ymmh; - /* new processor state extensions go here */ + struct _fpstate fpstate; + struct _header xstate_hdr; + struct _ymmh_state ymmh; + /* New processor state extensions go here: */ +}; + +/* + * The 32-bit signal frame: + */ +struct sigcontext_32 { + __u16 gs, __gsh; + __u16 fs, __fsh; + __u16 es, __esh; + __u16 ds, __dsh; + __u32 di; + __u32 si; + __u32 bp; + __u32 sp; + __u32 bx; + __u32 dx; + __u32 cx; + __u32 ax; + __u32 trapno; + __u32 err; + __u32 ip; + __u16 cs, __csh; + __u32 flags; + __u32 sp_at_signal; + __u16 ss, __ssh; + + /* + * fpstate is really (struct _fpstate *) or (struct _xstate *) + * depending on the FP_XSTATE_MAGIC1 encoded in the SW reserved + * bytes of (struct _fpstate) and FP_XSTATE_MAGIC2 present at the end + * of extended memory layout. See comments at the definition of + * (struct _fpx_sw_bytes) + */ + __u32 fpstate; /* Zero when no FPU/extended context */ + __u32 oldmask; + __u32 cr2; +}; + +/* + * The 64-bit signal frame: + */ +struct sigcontext_64 { + __u64 r8; + __u64 r9; + __u64 r10; + __u64 r11; + __u64 r12; + __u64 r13; + __u64 r14; + __u64 r15; + __u64 di; + __u64 si; + __u64 bp; + __u64 bx; + __u64 dx; + __u64 ax; + __u64 cx; + __u64 sp; + __u64 ip; + __u64 flags; + __u16 cs; + __u16 gs; + __u16 fs; + __u16 __pad0; + __u64 err; + __u64 trapno; + __u64 oldmask; + __u64 cr2; + + /* + * fpstate is really (struct _fpstate *) or (struct _xstate *) + * depending on the FP_XSTATE_MAGIC1 encoded in the SW reserved + * bytes of (struct _fpstate) and FP_XSTATE_MAGIC2 present at the end + * of extended memory layout. See comments at the definition of + * (struct _fpx_sw_bytes) + */ + __u64 fpstate; /* Zero when no FPU/extended context */ + __u64 reserved1[8]; +}; + +/* + * Create the real 'struct sigcontext' type: + */ +#ifdef __KERNEL__ +# ifdef __i386__ +# define sigcontext sigcontext_32 +# else +# define sigcontext sigcontext_64 +# endif +#endif + +/* + * The old user-space sigcontext definition, just in case user-space still + * relies on it. The kernel definition (in asm/sigcontext.h) has unified + * field names but otherwise the same layout. + */ +#ifndef __KERNEL__ + +#define _fpstate_ia32 _fpstate_32 +#define sigcontext_ia32 sigcontext_32 + + +# ifdef __i386__ +struct sigcontext { + __u16 gs, __gsh; + __u16 fs, __fsh; + __u16 es, __esh; + __u16 ds, __dsh; + __u32 edi; + __u32 esi; + __u32 ebp; + __u32 esp; + __u32 ebx; + __u32 edx; + __u32 ecx; + __u32 eax; + __u32 trapno; + __u32 err; + __u32 eip; + __u16 cs, __csh; + __u32 eflags; + __u32 esp_at_signal; + __u16 ss, __ssh; + struct _fpstate __user *fpstate; + __u32 oldmask; + __u32 cr2; }; +# else /* __x86_64__: */ +struct sigcontext { + __u64 r8; + __u64 r9; + __u64 r10; + __u64 r11; + __u64 r12; + __u64 r13; + __u64 r14; + __u64 r15; + __u64 rdi; + __u64 rsi; + __u64 rbp; + __u64 rbx; + __u64 rdx; + __u64 rax; + __u64 rcx; + __u64 rsp; + __u64 rip; + __u64 eflags; /* RFLAGS */ + __u16 cs; + __u16 gs; + __u16 fs; + __u16 __pad0; + __u64 err; + __u64 trapno; + __u64 oldmask; + __u64 cr2; + struct _fpstate __user *fpstate; /* Zero when no FPU context */ +# ifdef __ILP32__ + __u32 __fpstate_pad; +# endif + __u64 reserved1[8]; +}; +# endif /* __x86_64__ */ +#endif /* !__KERNEL__ */ #endif /* _UAPI_ASM_X86_SIGCONTEXT_H */ diff --git a/arch/x86/include/uapi/asm/sigcontext32.h b/arch/x86/include/uapi/asm/sigcontext32.h index ad1478c4ae12..a92b0f0dc09e 100644 --- a/arch/x86/include/uapi/asm/sigcontext32.h +++ b/arch/x86/include/uapi/asm/sigcontext32.h @@ -1,77 +1,8 @@ #ifndef _ASM_X86_SIGCONTEXT32_H #define _ASM_X86_SIGCONTEXT32_H -#include <linux/types.h> +/* This is a legacy file - all the type definitions are in sigcontext.h: */ -/* signal context for 32bit programs. */ - -#define X86_FXSR_MAGIC 0x0000 - -struct _fpreg { - unsigned short significand[4]; - unsigned short exponent; -}; - -struct _fpxreg { - unsigned short significand[4]; - unsigned short exponent; - unsigned short padding[3]; -}; - -struct _xmmreg { - __u32 element[4]; -}; - -/* FSAVE frame with extensions */ -struct _fpstate_ia32 { - /* Regular FPU environment */ - __u32 cw; - __u32 sw; - __u32 tag; /* not compatible to 64bit twd */ - __u32 ipoff; - __u32 cssel; - __u32 dataoff; - __u32 datasel; - struct _fpreg _st[8]; - unsigned short status; - unsigned short magic; /* 0xffff = regular FPU data only */ - - /* FXSR FPU environment */ - __u32 _fxsr_env[6]; - __u32 mxcsr; - __u32 reserved; - struct _fpxreg _fxsr_st[8]; - struct _xmmreg _xmm[8]; /* It's actually 16 */ - __u32 padding[44]; - union { - __u32 padding2[12]; - struct _fpx_sw_bytes sw_reserved; - }; -}; - -struct sigcontext_ia32 { - unsigned short gs, __gsh; - unsigned short fs, __fsh; - unsigned short es, __esh; - unsigned short ds, __dsh; - unsigned int di; - unsigned int si; - unsigned int bp; - unsigned int sp; - unsigned int bx; - unsigned int dx; - unsigned int cx; - unsigned int ax; - unsigned int trapno; - unsigned int err; - unsigned int ip; - unsigned short cs, __csh; - unsigned int flags; - unsigned int sp_at_signal; - unsigned short ss, __ssh; - unsigned int fpstate; /* really (struct _fpstate_ia32 *) */ - unsigned int oldmask; - unsigned int cr2; -}; +#include <asm/sigcontext.h> #endif /* _ASM_X86_SIGCONTEXT32_H */ diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h index 37fee272618f..5b15d94a33f8 100644 --- a/arch/x86/include/uapi/asm/vmx.h +++ b/arch/x86/include/uapi/asm/vmx.h @@ -78,6 +78,7 @@ #define EXIT_REASON_PML_FULL 62 #define EXIT_REASON_XSAVES 63 #define EXIT_REASON_XRSTORS 64 +#define EXIT_REASON_PCOMMIT 65 #define VMX_EXIT_REASONS \ { EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \ @@ -126,7 +127,8 @@ { EXIT_REASON_INVVPID, "INVVPID" }, \ { EXIT_REASON_INVPCID, "INVPCID" }, \ { EXIT_REASON_XSAVES, "XSAVES" }, \ - { EXIT_REASON_XRSTORS, "XRSTORS" } + { EXIT_REASON_XRSTORS, "XRSTORS" }, \ + { EXIT_REASON_PCOMMIT, "PCOMMIT" } #define VMX_ABORT_SAVE_GUEST_MSR_FAIL 1 #define VMX_ABORT_LOAD_HOST_MSR_FAIL 4 diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index ded848c20e05..e75907601a41 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -976,6 +976,8 @@ static int __init acpi_parse_madt_lapic_entries(void) { int count; int x2count = 0; + int ret; + struct acpi_subtable_proc madt_proc[2]; if (!cpu_has_apic) return -ENODEV; @@ -999,10 +1001,22 @@ static int __init acpi_parse_madt_lapic_entries(void) acpi_parse_sapic, MAX_LOCAL_APIC); if (!count) { - x2count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC, - acpi_parse_x2apic, MAX_LOCAL_APIC); - count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC, - acpi_parse_lapic, MAX_LOCAL_APIC); + memset(madt_proc, 0, sizeof(madt_proc)); + madt_proc[0].id = ACPI_MADT_TYPE_LOCAL_APIC; + madt_proc[0].handler = acpi_parse_lapic; + madt_proc[1].id = ACPI_MADT_TYPE_LOCAL_X2APIC; + madt_proc[1].handler = acpi_parse_x2apic; + ret = acpi_table_parse_entries_array(ACPI_SIG_MADT, + sizeof(struct acpi_table_madt), + madt_proc, ARRAY_SIZE(madt_proc), MAX_LOCAL_APIC); + if (ret < 0) { + printk(KERN_ERR PREFIX + "Error parsing LAPIC/X2APIC entries\n"); + return ret; + } + + x2count = madt_proc[0].count; + count = madt_proc[1].count; } if (!count && !x2count) { printk(KERN_ERR PREFIX "No LAPIC entries present\n"); diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 24e94ce454e2..2f69e3b184f6 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1431,7 +1431,7 @@ enum { }; static int x2apic_state; -static inline void __x2apic_disable(void) +static void __x2apic_disable(void) { u64 msr; @@ -1447,7 +1447,7 @@ static inline void __x2apic_disable(void) printk_once(KERN_INFO "x2apic disabled\n"); } -static inline void __x2apic_enable(void) +static void __x2apic_enable(void) { u64 msr; @@ -1807,7 +1807,7 @@ int apic_version[MAX_LOCAL_APIC]; /* * This interrupt should _never_ happen with our APIC/SMP architecture */ -static inline void __smp_spurious_interrupt(u8 vector) +static void __smp_spurious_interrupt(u8 vector) { u32 v; @@ -1848,7 +1848,7 @@ __visible void smp_trace_spurious_interrupt(struct pt_regs *regs) /* * This interrupt should never happen with our APIC/SMP architecture */ -static inline void __smp_error_interrupt(struct pt_regs *regs) +static void __smp_error_interrupt(struct pt_regs *regs) { u32 v; u32 i = 0; diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c index b548fd3b764b..38dd5efdd04c 100644 --- a/arch/x86/kernel/apic/apic_numachip.c +++ b/arch/x86/kernel/apic/apic_numachip.c @@ -11,30 +11,21 @@ * */ -#include <linux/errno.h> -#include <linux/threads.h> -#include <linux/cpumask.h> -#include <linux/string.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/ctype.h> #include <linux/init.h> -#include <linux/hardirq.h> -#include <linux/delay.h> #include <asm/numachip/numachip.h> #include <asm/numachip/numachip_csr.h> -#include <asm/smp.h> -#include <asm/apic.h> #include <asm/ipi.h> #include <asm/apic_flat_64.h> #include <asm/pgtable.h> +#include <asm/pci_x86.h> -static int numachip_system __read_mostly; +u8 numachip_system __read_mostly; +static const struct apic apic_numachip1; +static const struct apic apic_numachip2; +static void (*numachip_apic_icr_write)(int apicid, unsigned int val) __read_mostly; -static const struct apic apic_numachip; - -static unsigned int get_apic_id(unsigned long x) +static unsigned int numachip1_get_apic_id(unsigned long x) { unsigned long value; unsigned int id = (x >> 24) & 0xff; @@ -47,7 +38,7 @@ static unsigned int get_apic_id(unsigned long x) return id; } -static unsigned long set_apic_id(unsigned int id) +static unsigned long numachip1_set_apic_id(unsigned int id) { unsigned long x; @@ -55,9 +46,17 @@ static unsigned long set_apic_id(unsigned int id) return x; } -static unsigned int read_xapic_id(void) +static unsigned int numachip2_get_apic_id(unsigned long x) +{ + u64 mcfg; + + rdmsrl(MSR_FAM10H_MMIO_CONF_BASE, mcfg); + return ((mcfg >> (28 - 8)) & 0xfff00) | (x >> 24); +} + +static unsigned long numachip2_set_apic_id(unsigned int id) { - return get_apic_id(apic_read(APIC_ID)); + return id << 24; } static int numachip_apic_id_valid(int apicid) @@ -68,7 +67,7 @@ static int numachip_apic_id_valid(int apicid) static int numachip_apic_id_registered(void) { - return physid_isset(read_xapic_id(), phys_cpu_present_map); + return 1; } static int numachip_phys_pkg_id(int initial_apic_id, int index_msb) @@ -76,36 +75,48 @@ static int numachip_phys_pkg_id(int initial_apic_id, int index_msb) return initial_apic_id >> index_msb; } -static int numachip_wakeup_secondary(int phys_apicid, unsigned long start_rip) +static void numachip1_apic_icr_write(int apicid, unsigned int val) { - union numachip_csr_g3_ext_irq_gen int_gen; - - int_gen.s._destination_apic_id = phys_apicid; - int_gen.s._vector = 0; - int_gen.s._msgtype = APIC_DM_INIT >> 8; - int_gen.s._index = 0; - - write_lcsr(CSR_G3_EXT_IRQ_GEN, int_gen.v); + write_lcsr(CSR_G3_EXT_IRQ_GEN, (apicid << 16) | val); +} - int_gen.s._msgtype = APIC_DM_STARTUP >> 8; - int_gen.s._vector = start_rip >> 12; +static void numachip2_apic_icr_write(int apicid, unsigned int val) +{ + numachip2_write32_lcsr(NUMACHIP2_APIC_ICR, (apicid << 12) | val); +} - write_lcsr(CSR_G3_EXT_IRQ_GEN, int_gen.v); +static int numachip_wakeup_secondary(int phys_apicid, unsigned long start_rip) +{ + numachip_apic_icr_write(phys_apicid, APIC_DM_INIT); + numachip_apic_icr_write(phys_apicid, APIC_DM_STARTUP | + (start_rip >> 12)); return 0; } static void numachip_send_IPI_one(int cpu, int vector) { - union numachip_csr_g3_ext_irq_gen int_gen; - int apicid = per_cpu(x86_cpu_to_apicid, cpu); - - int_gen.s._destination_apic_id = apicid; - int_gen.s._vector = vector; - int_gen.s._msgtype = (vector == NMI_VECTOR ? APIC_DM_NMI : APIC_DM_FIXED) >> 8; - int_gen.s._index = 0; + int local_apicid, apicid = per_cpu(x86_cpu_to_apicid, cpu); + unsigned int dmode; + + preempt_disable(); + local_apicid = __this_cpu_read(x86_cpu_to_apicid); + + /* Send via local APIC where non-local part matches */ + if (!((apicid ^ local_apicid) >> NUMACHIP_LAPIC_BITS)) { + unsigned long flags; + + local_irq_save(flags); + __default_send_IPI_dest_field(apicid, vector, + APIC_DEST_PHYSICAL); + local_irq_restore(flags); + preempt_enable(); + return; + } + preempt_enable(); - write_lcsr(CSR_G3_EXT_IRQ_GEN, int_gen.v); + dmode = (vector == NMI_VECTOR) ? APIC_DM_NMI : APIC_DM_FIXED; + numachip_apic_icr_write(apicid, dmode | vector); } static void numachip_send_IPI_mask(const struct cpumask *mask, int vector) @@ -149,9 +160,14 @@ static void numachip_send_IPI_self(int vector) apic_write(APIC_SELF_IPI, vector); } -static int __init numachip_probe(void) +static int __init numachip1_probe(void) { - return apic == &apic_numachip; + return apic == &apic_numachip1; +} + +static int __init numachip2_probe(void) +{ + return apic == &apic_numachip2; } static void fixup_cpu_id(struct cpuinfo_x86 *c, int node) @@ -172,34 +188,118 @@ static void fixup_cpu_id(struct cpuinfo_x86 *c, int node) static int __init numachip_system_init(void) { - if (!numachip_system) + /* Map the LCSR area and set up the apic_icr_write function */ + switch (numachip_system) { + case 1: + init_extra_mapping_uc(NUMACHIP_LCSR_BASE, NUMACHIP_LCSR_SIZE); + numachip_apic_icr_write = numachip1_apic_icr_write; + x86_init.pci.arch_init = pci_numachip_init; + break; + case 2: + init_extra_mapping_uc(NUMACHIP2_LCSR_BASE, NUMACHIP2_LCSR_SIZE); + numachip_apic_icr_write = numachip2_apic_icr_write; + + /* Use MCFG config cycles rather than locked CF8 cycles */ + raw_pci_ops = &pci_mmcfg; + break; + default: return 0; - - init_extra_mapping_uc(NUMACHIP_LCSR_BASE, NUMACHIP_LCSR_SIZE); - init_extra_mapping_uc(NUMACHIP_GCSR_BASE, NUMACHIP_GCSR_SIZE); + } x86_cpuinit.fixup_cpu_id = fixup_cpu_id; - x86_init.pci.arch_init = pci_numachip_init; return 0; } early_initcall(numachip_system_init); -static int numachip_acpi_madt_oem_check(char *oem_id, char *oem_table_id) +static int numachip1_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { - if (!strncmp(oem_id, "NUMASC", 6)) { - numachip_system = 1; - return 1; - } + if ((strncmp(oem_id, "NUMASC", 6) != 0) || + (strncmp(oem_table_id, "NCONNECT", 8) != 0)) + return 0; - return 0; + numachip_system = 1; + + return 1; +} + +static int numachip2_acpi_madt_oem_check(char *oem_id, char *oem_table_id) +{ + if ((strncmp(oem_id, "NUMASC", 6) != 0) || + (strncmp(oem_table_id, "NCONECT2", 8) != 0)) + return 0; + + numachip_system = 2; + + return 1; +} + +/* APIC IPIs are queued */ +static void numachip_apic_wait_icr_idle(void) +{ } -static const struct apic apic_numachip __refconst = { +/* APIC NMI IPIs are queued */ +static u32 numachip_safe_apic_wait_icr_idle(void) +{ + return 0; +} +static const struct apic apic_numachip1 __refconst = { .name = "NumaConnect system", - .probe = numachip_probe, - .acpi_madt_oem_check = numachip_acpi_madt_oem_check, + .probe = numachip1_probe, + .acpi_madt_oem_check = numachip1_acpi_madt_oem_check, + .apic_id_valid = numachip_apic_id_valid, + .apic_id_registered = numachip_apic_id_registered, + + .irq_delivery_mode = dest_Fixed, + .irq_dest_mode = 0, /* physical */ + + .target_cpus = online_target_cpus, + .disable_esr = 0, + .dest_logical = 0, + .check_apicid_used = NULL, + + .vector_allocation_domain = default_vector_allocation_domain, + .init_apic_ldr = flat_init_apic_ldr, + + .ioapic_phys_id_map = NULL, + .setup_apic_routing = NULL, + .cpu_present_to_apicid = default_cpu_present_to_apicid, + .apicid_to_cpu_present = NULL, + .check_phys_apicid_present = default_check_phys_apicid_present, + .phys_pkg_id = numachip_phys_pkg_id, + + .get_apic_id = numachip1_get_apic_id, + .set_apic_id = numachip1_set_apic_id, + .apic_id_mask = 0xffU << 24, + + .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, + + .send_IPI_mask = numachip_send_IPI_mask, + .send_IPI_mask_allbutself = numachip_send_IPI_mask_allbutself, + .send_IPI_allbutself = numachip_send_IPI_allbutself, + .send_IPI_all = numachip_send_IPI_all, + .send_IPI_self = numachip_send_IPI_self, + + .wakeup_secondary_cpu = numachip_wakeup_secondary, + .inquire_remote_apic = NULL, /* REMRD not supported */ + + .read = native_apic_mem_read, + .write = native_apic_mem_write, + .eoi_write = native_apic_mem_write, + .icr_read = native_apic_icr_read, + .icr_write = native_apic_icr_write, + .wait_icr_idle = numachip_apic_wait_icr_idle, + .safe_wait_icr_idle = numachip_safe_apic_wait_icr_idle, +}; + +apic_driver(apic_numachip1); + +static const struct apic apic_numachip2 __refconst = { + .name = "NumaConnect2 system", + .probe = numachip2_probe, + .acpi_madt_oem_check = numachip2_acpi_madt_oem_check, .apic_id_valid = numachip_apic_id_valid, .apic_id_registered = numachip_apic_id_registered, @@ -221,8 +321,8 @@ static const struct apic apic_numachip __refconst = { .check_phys_apicid_present = default_check_phys_apicid_present, .phys_pkg_id = numachip_phys_pkg_id, - .get_apic_id = get_apic_id, - .set_apic_id = set_apic_id, + .get_apic_id = numachip2_get_apic_id, + .set_apic_id = numachip2_set_apic_id, .apic_id_mask = 0xffU << 24, .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, @@ -241,8 +341,8 @@ static const struct apic apic_numachip __refconst = { .eoi_write = native_apic_mem_write, .icr_read = native_apic_icr_read, .icr_write = native_apic_icr_write, - .wait_icr_idle = native_apic_wait_icr_idle, - .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, + .wait_icr_idle = numachip_apic_wait_icr_idle, + .safe_wait_icr_idle = numachip_safe_apic_wait_icr_idle, }; -apic_driver(apic_numachip); +apic_driver(apic_numachip2); diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 5c60bb162622..f25321894ad2 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -529,7 +529,7 @@ static void __eoi_ioapic_pin(int apic, int pin, int vector) } } -void eoi_ioapic_pin(int vector, struct mp_chip_data *data) +static void eoi_ioapic_pin(int vector, struct mp_chip_data *data) { unsigned long flags; struct irq_pin_list *entry; @@ -2547,7 +2547,9 @@ void __init setup_ioapic_dest(void) mask = apic->target_cpus(); chip = irq_data_get_irq_chip(idata); - chip->irq_set_affinity(idata, mask, false); + /* Might be lapic_chip for irq 0 */ + if (chip->irq_set_affinity) + chip->irq_set_affinity(idata, mask, false); } } #endif @@ -2907,6 +2909,7 @@ int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq, struct irq_data *irq_data; struct mp_chip_data *data; struct irq_alloc_info *info = arg; + unsigned long flags; if (!info || nr_irqs > 1) return -EINVAL; @@ -2939,11 +2942,14 @@ int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq, cfg = irqd_cfg(irq_data); add_pin_to_irq_node(data, ioapic_alloc_attr_node(info), ioapic, pin); + + local_irq_save(flags); if (info->ioapic_entry) mp_setup_entry(cfg, data, info->ioapic_entry); mp_register_handler(virq, data->trigger); if (virq < nr_legacy_irqs()) legacy_pic->mask(virq); + local_irq_restore(flags); apic_printk(APIC_VERBOSE, KERN_DEBUG "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i Dest:%d)\n", diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 8e3d22a1af94..439df975bc7a 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -43,18 +43,15 @@ void common(void) { #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) BLANK(); - OFFSET(IA32_SIGCONTEXT_ax, sigcontext_ia32, ax); - OFFSET(IA32_SIGCONTEXT_bx, sigcontext_ia32, bx); - OFFSET(IA32_SIGCONTEXT_cx, sigcontext_ia32, cx); - OFFSET(IA32_SIGCONTEXT_dx, sigcontext_ia32, dx); - OFFSET(IA32_SIGCONTEXT_si, sigcontext_ia32, si); - OFFSET(IA32_SIGCONTEXT_di, sigcontext_ia32, di); - OFFSET(IA32_SIGCONTEXT_bp, sigcontext_ia32, bp); - OFFSET(IA32_SIGCONTEXT_sp, sigcontext_ia32, sp); - OFFSET(IA32_SIGCONTEXT_ip, sigcontext_ia32, ip); - - BLANK(); - OFFSET(TI_sysenter_return, thread_info, sysenter_return); + OFFSET(IA32_SIGCONTEXT_ax, sigcontext_32, ax); + OFFSET(IA32_SIGCONTEXT_bx, sigcontext_32, bx); + OFFSET(IA32_SIGCONTEXT_cx, sigcontext_32, cx); + OFFSET(IA32_SIGCONTEXT_dx, sigcontext_32, dx); + OFFSET(IA32_SIGCONTEXT_si, sigcontext_32, si); + OFFSET(IA32_SIGCONTEXT_di, sigcontext_32, di); + OFFSET(IA32_SIGCONTEXT_bp, sigcontext_32, bp); + OFFSET(IA32_SIGCONTEXT_sp, sigcontext_32, sp); + OFFSET(IA32_SIGCONTEXT_ip, sigcontext_32, ip); BLANK(); OFFSET(IA32_RT_SIGFRAME_sigcontext, rt_sigframe_ia32, uc.uc_mcontext); diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 4eb065c6bed2..58031303e304 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -41,6 +41,7 @@ obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_knc.o perf_event_p4.o obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_rapl.o perf_event_intel_cqm.o obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_pt.o perf_event_intel_bts.o +obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_cstate.o obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += perf_event_intel_uncore.o \ perf_event_intel_uncore_snb.o \ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index de22ea7ff82f..4ddd780aeac9 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -670,6 +670,7 @@ void get_cpu_cap(struct cpuinfo_x86 *c) c->x86_virt_bits = (eax >> 8) & 0xff; c->x86_phys_bits = eax & 0xff; + c->x86_capability[13] = cpuid_ebx(0x80000008); } #ifdef CONFIG_X86_32 else if (cpu_has(c, X86_FEATURE_PAE) || cpu_has(c, X86_FEATURE_PSE36)) diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index be4febc58b94..e38d338a6447 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -157,7 +157,7 @@ struct _cpuid4_info_regs { struct amd_northbridge *nb; }; -unsigned short num_cache_leaves; +static unsigned short num_cache_leaves; /* AMD doesn't have CPUID4. Emulate it here to report the same information to the user. This makes some assumptions about the machine: @@ -326,7 +326,7 @@ static void amd_calc_l3_indices(struct amd_northbridge *nb) * * @returns: the disabled index if used or negative value if slot free. */ -int amd_get_l3_disable_slot(struct amd_northbridge *nb, unsigned slot) +static int amd_get_l3_disable_slot(struct amd_northbridge *nb, unsigned slot) { unsigned int reg = 0; @@ -403,8 +403,8 @@ static void amd_l3_disable_index(struct amd_northbridge *nb, int cpu, * * @return: 0 on success, error status on failure */ -int amd_set_l3_disable_slot(struct amd_northbridge *nb, int cpu, unsigned slot, - unsigned long index) +static int amd_set_l3_disable_slot(struct amd_northbridge *nb, int cpu, + unsigned slot, unsigned long index) { int ret = 0; diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 9d014b82a124..c5b0d562dbf5 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1586,6 +1586,8 @@ static int __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c) winchip_mcheck_init(c); return 1; break; + default: + return 0; } return 0; @@ -1605,6 +1607,8 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) mce_amd_feature_init(c); mce_flags.overflow_recov = !!(ebx & BIT(0)); mce_flags.succor = !!(ebx & BIT(1)); + mce_flags.smca = !!(ebx & BIT(3)); + break; } @@ -2042,7 +2046,7 @@ int __init mcheck_init(void) * Disable machine checks on suspend and shutdown. We can't really handle * them later. */ -static int mce_disable_error_reporting(void) +static void mce_disable_error_reporting(void) { int i; @@ -2052,17 +2056,32 @@ static int mce_disable_error_reporting(void) if (b->init) wrmsrl(MSR_IA32_MCx_CTL(i), 0); } - return 0; + return; +} + +static void vendor_disable_error_reporting(void) +{ + /* + * Don't clear on Intel CPUs. Some of these MSRs are socket-wide. + * Disabling them for just a single offlined CPU is bad, since it will + * inhibit reporting for all shared resources on the socket like the + * last level cache (LLC), the integrated memory controller (iMC), etc. + */ + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) + return; + + mce_disable_error_reporting(); } static int mce_syscore_suspend(void) { - return mce_disable_error_reporting(); + vendor_disable_error_reporting(); + return 0; } static void mce_syscore_shutdown(void) { - mce_disable_error_reporting(); + vendor_disable_error_reporting(); } /* @@ -2342,19 +2361,14 @@ static void mce_device_remove(unsigned int cpu) static void mce_disable_cpu(void *h) { unsigned long action = *(unsigned long *)h; - int i; if (!mce_available(raw_cpu_ptr(&cpu_info))) return; if (!(action & CPU_TASKS_FROZEN)) cmci_clear(); - for (i = 0; i < mca_cfg.banks; i++) { - struct mce_bank *b = &mce_banks[i]; - if (b->init) - wrmsrl(MSR_IA32_MCx_CTL(i), 0); - } + vendor_disable_error_reporting(); } static void mce_reenable_cpu(void *h) diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 1af51b1586d7..2c5aaf8c2e2f 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -503,14 +503,6 @@ void intel_init_thermal(struct cpuinfo_x86 *c) return; } - /* Check whether a vector already exists */ - if (h & APIC_VECTOR_MASK) { - printk(KERN_DEBUG - "CPU%d: Thermal LVT vector (%#x) already installed\n", - cpu, (h & APIC_VECTOR_MASK)); - return; - } - /* early Pentium M models use different method for enabling TM2 */ if (cpu_has(c, X86_FEATURE_TM2)) { if (c->x86 == 6 && (c->x86_model == 9 || c->x86_model == 13)) { diff --git a/arch/x86/kernel/cpu/microcode/Makefile b/arch/x86/kernel/cpu/microcode/Makefile index 285c85427c32..220b1a508513 100644 --- a/arch/x86/kernel/cpu/microcode/Makefile +++ b/arch/x86/kernel/cpu/microcode/Makefile @@ -2,6 +2,3 @@ microcode-y := core.o obj-$(CONFIG_MICROCODE) += microcode.o microcode-$(CONFIG_MICROCODE_INTEL) += intel.o intel_lib.o microcode-$(CONFIG_MICROCODE_AMD) += amd.o -obj-$(CONFIG_MICROCODE_EARLY) += core_early.o -obj-$(CONFIG_MICROCODE_INTEL_EARLY) += intel_early.o -obj-$(CONFIG_MICROCODE_AMD_EARLY) += amd_early.o diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 12829c3ced3c..2233f8a76615 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -1,5 +1,9 @@ /* * AMD CPU Microcode Update Driver for Linux + * + * This driver allows to upgrade microcode on F10h AMD + * CPUs and later. + * * Copyright (C) 2008-2011 Advanced Micro Devices Inc. * * Author: Peter Oruba <peter.oruba@amd.com> @@ -7,34 +11,31 @@ * Based on work by: * Tigran Aivazian <tigran@aivazian.fsnet.co.uk> * - * Maintainers: - * Andreas Herrmann <herrmann.der.user@googlemail.com> - * Borislav Petkov <bp@alien8.de> + * early loader: + * Copyright (C) 2013 Advanced Micro Devices, Inc. * - * This driver allows to upgrade microcode on F10h AMD - * CPUs and later. + * Author: Jacob Shin <jacob.shin@amd.com> + * Fixes: Borislav Petkov <bp@suse.de> * * Licensed under the terms of the GNU General Public * License version 2. See file COPYING for details. */ +#define pr_fmt(fmt) "microcode: " fmt -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - +#include <linux/earlycpio.h> #include <linux/firmware.h> #include <linux/uaccess.h> #include <linux/vmalloc.h> +#include <linux/initrd.h> #include <linux/kernel.h> -#include <linux/module.h> #include <linux/pci.h> +#include <asm/microcode_amd.h> #include <asm/microcode.h> #include <asm/processor.h> +#include <asm/setup.h> +#include <asm/cpu.h> #include <asm/msr.h> -#include <asm/microcode_amd.h> - -MODULE_DESCRIPTION("AMD Microcode Update Driver"); -MODULE_AUTHOR("Peter Oruba"); -MODULE_LICENSE("GPL v2"); static struct equiv_cpu_entry *equiv_cpu_table; @@ -47,6 +48,432 @@ struct ucode_patch { static LIST_HEAD(pcache); +/* + * This points to the current valid container of microcode patches which we will + * save from the initrd before jettisoning its contents. + */ +static u8 *container; +static size_t container_size; + +static u32 ucode_new_rev; +u8 amd_ucode_patch[PATCH_MAX_SIZE]; +static u16 this_equiv_id; + +static struct cpio_data ucode_cpio; + +/* + * Microcode patch container file is prepended to the initrd in cpio format. + * See Documentation/x86/early-microcode.txt + */ +static __initdata char ucode_path[] = "kernel/x86/microcode/AuthenticAMD.bin"; + +static struct cpio_data __init find_ucode_in_initrd(void) +{ + long offset = 0; + char *path; + void *start; + size_t size; + +#ifdef CONFIG_X86_32 + struct boot_params *p; + + /* + * On 32-bit, early load occurs before paging is turned on so we need + * to use physical addresses. + */ + p = (struct boot_params *)__pa_nodebug(&boot_params); + path = (char *)__pa_nodebug(ucode_path); + start = (void *)p->hdr.ramdisk_image; + size = p->hdr.ramdisk_size; +#else + path = ucode_path; + start = (void *)(boot_params.hdr.ramdisk_image + PAGE_OFFSET); + size = boot_params.hdr.ramdisk_size; +#endif + + return find_cpio_data(path, start, size, &offset); +} + +static size_t compute_container_size(u8 *data, u32 total_size) +{ + size_t size = 0; + u32 *header = (u32 *)data; + + if (header[0] != UCODE_MAGIC || + header[1] != UCODE_EQUIV_CPU_TABLE_TYPE || /* type */ + header[2] == 0) /* size */ + return size; + + size = header[2] + CONTAINER_HDR_SZ; + total_size -= size; + data += size; + + while (total_size) { + u16 patch_size; + + header = (u32 *)data; + + if (header[0] != UCODE_UCODE_TYPE) + break; + + /* + * Sanity-check patch size. + */ + patch_size = header[1]; + if (patch_size > PATCH_MAX_SIZE) + break; + + size += patch_size + SECTION_HDR_SIZE; + data += patch_size + SECTION_HDR_SIZE; + total_size -= patch_size + SECTION_HDR_SIZE; + } + + return size; +} + +/* + * Early load occurs before we can vmalloc(). So we look for the microcode + * patch container file in initrd, traverse equivalent cpu table, look for a + * matching microcode patch, and update, all in initrd memory in place. + * When vmalloc() is available for use later -- on 64-bit during first AP load, + * and on 32-bit during save_microcode_in_initrd_amd() -- we can call + * load_microcode_amd() to save equivalent cpu table and microcode patches in + * kernel heap memory. + */ +static void apply_ucode_in_initrd(void *ucode, size_t size, bool save_patch) +{ + struct equiv_cpu_entry *eq; + size_t *cont_sz; + u32 *header; + u8 *data, **cont; + u8 (*patch)[PATCH_MAX_SIZE]; + u16 eq_id = 0; + int offset, left; + u32 rev, eax, ebx, ecx, edx; + u32 *new_rev; + +#ifdef CONFIG_X86_32 + new_rev = (u32 *)__pa_nodebug(&ucode_new_rev); + cont_sz = (size_t *)__pa_nodebug(&container_size); + cont = (u8 **)__pa_nodebug(&container); + patch = (u8 (*)[PATCH_MAX_SIZE])__pa_nodebug(&amd_ucode_patch); +#else + new_rev = &ucode_new_rev; + cont_sz = &container_size; + cont = &container; + patch = &amd_ucode_patch; +#endif + + data = ucode; + left = size; + header = (u32 *)data; + + /* find equiv cpu table */ + if (header[0] != UCODE_MAGIC || + header[1] != UCODE_EQUIV_CPU_TABLE_TYPE || /* type */ + header[2] == 0) /* size */ + return; + + eax = 0x00000001; + ecx = 0; + native_cpuid(&eax, &ebx, &ecx, &edx); + + while (left > 0) { + eq = (struct equiv_cpu_entry *)(data + CONTAINER_HDR_SZ); + + *cont = data; + + /* Advance past the container header */ + offset = header[2] + CONTAINER_HDR_SZ; + data += offset; + left -= offset; + + eq_id = find_equiv_id(eq, eax); + if (eq_id) { + this_equiv_id = eq_id; + *cont_sz = compute_container_size(*cont, left + offset); + + /* + * truncate how much we need to iterate over in the + * ucode update loop below + */ + left = *cont_sz - offset; + break; + } + + /* + * support multiple container files appended together. if this + * one does not have a matching equivalent cpu entry, we fast + * forward to the next container file. + */ + while (left > 0) { + header = (u32 *)data; + if (header[0] == UCODE_MAGIC && + header[1] == UCODE_EQUIV_CPU_TABLE_TYPE) + break; + + offset = header[1] + SECTION_HDR_SIZE; + data += offset; + left -= offset; + } + + /* mark where the next microcode container file starts */ + offset = data - (u8 *)ucode; + ucode = data; + } + + if (!eq_id) { + *cont = NULL; + *cont_sz = 0; + return; + } + + if (check_current_patch_level(&rev, true)) + return; + + while (left > 0) { + struct microcode_amd *mc; + + header = (u32 *)data; + if (header[0] != UCODE_UCODE_TYPE || /* type */ + header[1] == 0) /* size */ + break; + + mc = (struct microcode_amd *)(data + SECTION_HDR_SIZE); + + if (eq_id == mc->hdr.processor_rev_id && rev < mc->hdr.patch_id) { + + if (!__apply_microcode_amd(mc)) { + rev = mc->hdr.patch_id; + *new_rev = rev; + + if (save_patch) + memcpy(patch, mc, + min_t(u32, header[1], PATCH_MAX_SIZE)); + } + } + + offset = header[1] + SECTION_HDR_SIZE; + data += offset; + left -= offset; + } +} + +static bool __init load_builtin_amd_microcode(struct cpio_data *cp, + unsigned int family) +{ +#ifdef CONFIG_X86_64 + char fw_name[36] = "amd-ucode/microcode_amd.bin"; + + if (family >= 0x15) + snprintf(fw_name, sizeof(fw_name), + "amd-ucode/microcode_amd_fam%.2xh.bin", family); + + return get_builtin_firmware(cp, fw_name); +#else + return false; +#endif +} + +void __init load_ucode_amd_bsp(unsigned int family) +{ + struct cpio_data cp; + void **data; + size_t *size; + +#ifdef CONFIG_X86_32 + data = (void **)__pa_nodebug(&ucode_cpio.data); + size = (size_t *)__pa_nodebug(&ucode_cpio.size); +#else + data = &ucode_cpio.data; + size = &ucode_cpio.size; +#endif + + cp = find_ucode_in_initrd(); + if (!cp.data) { + if (!load_builtin_amd_microcode(&cp, family)) + return; + } + + *data = cp.data; + *size = cp.size; + + apply_ucode_in_initrd(cp.data, cp.size, true); +} + +#ifdef CONFIG_X86_32 +/* + * On 32-bit, since AP's early load occurs before paging is turned on, we + * cannot traverse cpu_equiv_table and pcache in kernel heap memory. So during + * cold boot, AP will apply_ucode_in_initrd() just like the BSP. During + * save_microcode_in_initrd_amd() BSP's patch is copied to amd_ucode_patch, + * which is used upon resume from suspend. + */ +void load_ucode_amd_ap(void) +{ + struct microcode_amd *mc; + size_t *usize; + void **ucode; + + mc = (struct microcode_amd *)__pa_nodebug(amd_ucode_patch); + if (mc->hdr.patch_id && mc->hdr.processor_rev_id) { + __apply_microcode_amd(mc); + return; + } + + ucode = (void *)__pa_nodebug(&container); + usize = (size_t *)__pa_nodebug(&container_size); + + if (!*ucode || !*usize) + return; + + apply_ucode_in_initrd(*ucode, *usize, false); +} + +static void __init collect_cpu_sig_on_bsp(void *arg) +{ + unsigned int cpu = smp_processor_id(); + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + + uci->cpu_sig.sig = cpuid_eax(0x00000001); +} + +static void __init get_bsp_sig(void) +{ + unsigned int bsp = boot_cpu_data.cpu_index; + struct ucode_cpu_info *uci = ucode_cpu_info + bsp; + + if (!uci->cpu_sig.sig) + smp_call_function_single(bsp, collect_cpu_sig_on_bsp, NULL, 1); +} +#else +void load_ucode_amd_ap(void) +{ + unsigned int cpu = smp_processor_id(); + struct equiv_cpu_entry *eq; + struct microcode_amd *mc; + u32 rev, eax; + u16 eq_id; + + /* Exit if called on the BSP. */ + if (!cpu) + return; + + if (!container) + return; + + /* + * 64-bit runs with paging enabled, thus early==false. + */ + if (check_current_patch_level(&rev, false)) + return; + + eax = cpuid_eax(0x00000001); + eq = (struct equiv_cpu_entry *)(container + CONTAINER_HDR_SZ); + + eq_id = find_equiv_id(eq, eax); + if (!eq_id) + return; + + if (eq_id == this_equiv_id) { + mc = (struct microcode_amd *)amd_ucode_patch; + + if (mc && rev < mc->hdr.patch_id) { + if (!__apply_microcode_amd(mc)) + ucode_new_rev = mc->hdr.patch_id; + } + + } else { + if (!ucode_cpio.data) + return; + + /* + * AP has a different equivalence ID than BSP, looks like + * mixed-steppings silicon so go through the ucode blob anew. + */ + apply_ucode_in_initrd(ucode_cpio.data, ucode_cpio.size, false); + } +} +#endif + +int __init save_microcode_in_initrd_amd(void) +{ + unsigned long cont; + int retval = 0; + enum ucode_state ret; + u8 *cont_va; + u32 eax; + + if (!container) + return -EINVAL; + +#ifdef CONFIG_X86_32 + get_bsp_sig(); + cont = (unsigned long)container; + cont_va = __va(container); +#else + /* + * We need the physical address of the container for both bitness since + * boot_params.hdr.ramdisk_image is a physical address. + */ + cont = __pa(container); + cont_va = container; +#endif + + /* + * Take into account the fact that the ramdisk might get relocated and + * therefore we need to recompute the container's position in virtual + * memory space. + */ + if (relocated_ramdisk) + container = (u8 *)(__va(relocated_ramdisk) + + (cont - boot_params.hdr.ramdisk_image)); + else + container = cont_va; + + if (ucode_new_rev) + pr_info("microcode: updated early to new patch_level=0x%08x\n", + ucode_new_rev); + + eax = cpuid_eax(0x00000001); + eax = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff); + + ret = load_microcode_amd(smp_processor_id(), eax, container, container_size); + if (ret != UCODE_OK) + retval = -EINVAL; + + /* + * This will be freed any msec now, stash patches for the current + * family and switch to patch cache for cpu hotplug, etc later. + */ + container = NULL; + container_size = 0; + + return retval; +} + +void reload_ucode_amd(void) +{ + struct microcode_amd *mc; + u32 rev; + + /* + * early==false because this is a syscore ->resume path and by + * that time paging is long enabled. + */ + if (check_current_patch_level(&rev, false)) + return; + + mc = (struct microcode_amd *)amd_ucode_patch; + + if (mc && rev < mc->hdr.patch_id) { + if (!__apply_microcode_amd(mc)) { + ucode_new_rev = mc->hdr.patch_id; + pr_info("microcode: reload patch_level=0x%08x\n", + ucode_new_rev); + } + } +} static u16 __find_equiv_id(unsigned int cpu) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu; @@ -177,6 +604,53 @@ static unsigned int verify_patch_size(u8 family, u32 patch_size, return patch_size; } +/* + * Those patch levels cannot be updated to newer ones and thus should be final. + */ +static u32 final_levels[] = { + 0x01000098, + 0x0100009f, + 0x010000af, + 0, /* T-101 terminator */ +}; + +/* + * Check the current patch level on this CPU. + * + * @rev: Use it to return the patch level. It is set to 0 in the case of + * error. + * + * Returns: + * - true: if update should stop + * - false: otherwise + */ +bool check_current_patch_level(u32 *rev, bool early) +{ + u32 lvl, dummy, i; + bool ret = false; + u32 *levels; + + native_rdmsr(MSR_AMD64_PATCH_LEVEL, lvl, dummy); + + if (IS_ENABLED(CONFIG_X86_32) && early) + levels = (u32 *)__pa_nodebug(&final_levels); + else + levels = final_levels; + + for (i = 0; levels[i]; i++) { + if (lvl == levels[i]) { + lvl = 0; + ret = true; + break; + } + } + + if (rev) + *rev = lvl; + + return ret; +} + int __apply_microcode_amd(struct microcode_amd *mc_amd) { u32 rev, dummy; @@ -197,7 +671,7 @@ int apply_microcode_amd(int cpu) struct microcode_amd *mc_amd; struct ucode_cpu_info *uci; struct ucode_patch *p; - u32 rev, dummy; + u32 rev; BUG_ON(raw_smp_processor_id() != cpu); @@ -210,7 +684,8 @@ int apply_microcode_amd(int cpu) mc_amd = p->data; uci->mc = p->data; - rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); + if (check_current_patch_level(&rev, false)) + return -1; /* need to apply patch? */ if (rev >= mc_amd->hdr.patch_id) { @@ -387,7 +862,7 @@ enum ucode_state load_microcode_amd(int cpu, u8 family, const u8 *data, size_t s if (ret != UCODE_OK) cleanup(); -#if defined(CONFIG_MICROCODE_AMD_EARLY) && defined(CONFIG_X86_32) +#ifdef CONFIG_X86_32 /* save BSP's matching patch for early load */ if (cpu_data(cpu).cpu_index == boot_cpu_data.cpu_index) { struct ucode_patch *p = find_patch(cpu); @@ -475,7 +950,7 @@ static struct microcode_ops microcode_amd_ops = { struct microcode_ops * __init init_amd_microcode(void) { - struct cpuinfo_x86 *c = &cpu_data(0); + struct cpuinfo_x86 *c = &boot_cpu_data; if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) { pr_warning("AMD CPU family 0x%x not supported\n", c->x86); diff --git a/arch/x86/kernel/cpu/microcode/amd_early.c b/arch/x86/kernel/cpu/microcode/amd_early.c deleted file mode 100644 index e8a215a9a345..000000000000 --- a/arch/x86/kernel/cpu/microcode/amd_early.c +++ /dev/null @@ -1,440 +0,0 @@ -/* - * Copyright (C) 2013 Advanced Micro Devices, Inc. - * - * Author: Jacob Shin <jacob.shin@amd.com> - * Fixes: Borislav Petkov <bp@suse.de> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/earlycpio.h> -#include <linux/initrd.h> - -#include <asm/cpu.h> -#include <asm/setup.h> -#include <asm/microcode_amd.h> - -/* - * This points to the current valid container of microcode patches which we will - * save from the initrd before jettisoning its contents. - */ -static u8 *container; -static size_t container_size; - -static u32 ucode_new_rev; -u8 amd_ucode_patch[PATCH_MAX_SIZE]; -static u16 this_equiv_id; - -static struct cpio_data ucode_cpio; - -/* - * Microcode patch container file is prepended to the initrd in cpio format. - * See Documentation/x86/early-microcode.txt - */ -static __initdata char ucode_path[] = "kernel/x86/microcode/AuthenticAMD.bin"; - -static struct cpio_data __init find_ucode_in_initrd(void) -{ - long offset = 0; - char *path; - void *start; - size_t size; - -#ifdef CONFIG_X86_32 - struct boot_params *p; - - /* - * On 32-bit, early load occurs before paging is turned on so we need - * to use physical addresses. - */ - p = (struct boot_params *)__pa_nodebug(&boot_params); - path = (char *)__pa_nodebug(ucode_path); - start = (void *)p->hdr.ramdisk_image; - size = p->hdr.ramdisk_size; -#else - path = ucode_path; - start = (void *)(boot_params.hdr.ramdisk_image + PAGE_OFFSET); - size = boot_params.hdr.ramdisk_size; -#endif - - return find_cpio_data(path, start, size, &offset); -} - -static size_t compute_container_size(u8 *data, u32 total_size) -{ - size_t size = 0; - u32 *header = (u32 *)data; - - if (header[0] != UCODE_MAGIC || - header[1] != UCODE_EQUIV_CPU_TABLE_TYPE || /* type */ - header[2] == 0) /* size */ - return size; - - size = header[2] + CONTAINER_HDR_SZ; - total_size -= size; - data += size; - - while (total_size) { - u16 patch_size; - - header = (u32 *)data; - - if (header[0] != UCODE_UCODE_TYPE) - break; - - /* - * Sanity-check patch size. - */ - patch_size = header[1]; - if (patch_size > PATCH_MAX_SIZE) - break; - - size += patch_size + SECTION_HDR_SIZE; - data += patch_size + SECTION_HDR_SIZE; - total_size -= patch_size + SECTION_HDR_SIZE; - } - - return size; -} - -/* - * Early load occurs before we can vmalloc(). So we look for the microcode - * patch container file in initrd, traverse equivalent cpu table, look for a - * matching microcode patch, and update, all in initrd memory in place. - * When vmalloc() is available for use later -- on 64-bit during first AP load, - * and on 32-bit during save_microcode_in_initrd_amd() -- we can call - * load_microcode_amd() to save equivalent cpu table and microcode patches in - * kernel heap memory. - */ -static void apply_ucode_in_initrd(void *ucode, size_t size, bool save_patch) -{ - struct equiv_cpu_entry *eq; - size_t *cont_sz; - u32 *header; - u8 *data, **cont; - u8 (*patch)[PATCH_MAX_SIZE]; - u16 eq_id = 0; - int offset, left; - u32 rev, eax, ebx, ecx, edx; - u32 *new_rev; - -#ifdef CONFIG_X86_32 - new_rev = (u32 *)__pa_nodebug(&ucode_new_rev); - cont_sz = (size_t *)__pa_nodebug(&container_size); - cont = (u8 **)__pa_nodebug(&container); - patch = (u8 (*)[PATCH_MAX_SIZE])__pa_nodebug(&amd_ucode_patch); -#else - new_rev = &ucode_new_rev; - cont_sz = &container_size; - cont = &container; - patch = &amd_ucode_patch; -#endif - - data = ucode; - left = size; - header = (u32 *)data; - - /* find equiv cpu table */ - if (header[0] != UCODE_MAGIC || - header[1] != UCODE_EQUIV_CPU_TABLE_TYPE || /* type */ - header[2] == 0) /* size */ - return; - - eax = 0x00000001; - ecx = 0; - native_cpuid(&eax, &ebx, &ecx, &edx); - - while (left > 0) { - eq = (struct equiv_cpu_entry *)(data + CONTAINER_HDR_SZ); - - *cont = data; - - /* Advance past the container header */ - offset = header[2] + CONTAINER_HDR_SZ; - data += offset; - left -= offset; - - eq_id = find_equiv_id(eq, eax); - if (eq_id) { - this_equiv_id = eq_id; - *cont_sz = compute_container_size(*cont, left + offset); - - /* - * truncate how much we need to iterate over in the - * ucode update loop below - */ - left = *cont_sz - offset; - break; - } - - /* - * support multiple container files appended together. if this - * one does not have a matching equivalent cpu entry, we fast - * forward to the next container file. - */ - while (left > 0) { - header = (u32 *)data; - if (header[0] == UCODE_MAGIC && - header[1] == UCODE_EQUIV_CPU_TABLE_TYPE) - break; - - offset = header[1] + SECTION_HDR_SIZE; - data += offset; - left -= offset; - } - - /* mark where the next microcode container file starts */ - offset = data - (u8 *)ucode; - ucode = data; - } - - if (!eq_id) { - *cont = NULL; - *cont_sz = 0; - return; - } - - /* find ucode and update if needed */ - - native_rdmsr(MSR_AMD64_PATCH_LEVEL, rev, eax); - - while (left > 0) { - struct microcode_amd *mc; - - header = (u32 *)data; - if (header[0] != UCODE_UCODE_TYPE || /* type */ - header[1] == 0) /* size */ - break; - - mc = (struct microcode_amd *)(data + SECTION_HDR_SIZE); - - if (eq_id == mc->hdr.processor_rev_id && rev < mc->hdr.patch_id) { - - if (!__apply_microcode_amd(mc)) { - rev = mc->hdr.patch_id; - *new_rev = rev; - - if (save_patch) - memcpy(patch, mc, - min_t(u32, header[1], PATCH_MAX_SIZE)); - } - } - - offset = header[1] + SECTION_HDR_SIZE; - data += offset; - left -= offset; - } -} - -static bool __init load_builtin_amd_microcode(struct cpio_data *cp, - unsigned int family) -{ -#ifdef CONFIG_X86_64 - char fw_name[36] = "amd-ucode/microcode_amd.bin"; - - if (family >= 0x15) - snprintf(fw_name, sizeof(fw_name), - "amd-ucode/microcode_amd_fam%.2xh.bin", family); - - return get_builtin_firmware(cp, fw_name); -#else - return false; -#endif -} - -void __init load_ucode_amd_bsp(unsigned int family) -{ - struct cpio_data cp; - void **data; - size_t *size; - -#ifdef CONFIG_X86_32 - data = (void **)__pa_nodebug(&ucode_cpio.data); - size = (size_t *)__pa_nodebug(&ucode_cpio.size); -#else - data = &ucode_cpio.data; - size = &ucode_cpio.size; -#endif - - cp = find_ucode_in_initrd(); - if (!cp.data) { - if (!load_builtin_amd_microcode(&cp, family)) - return; - } - - *data = cp.data; - *size = cp.size; - - apply_ucode_in_initrd(cp.data, cp.size, true); -} - -#ifdef CONFIG_X86_32 -/* - * On 32-bit, since AP's early load occurs before paging is turned on, we - * cannot traverse cpu_equiv_table and pcache in kernel heap memory. So during - * cold boot, AP will apply_ucode_in_initrd() just like the BSP. During - * save_microcode_in_initrd_amd() BSP's patch is copied to amd_ucode_patch, - * which is used upon resume from suspend. - */ -void load_ucode_amd_ap(void) -{ - struct microcode_amd *mc; - size_t *usize; - void **ucode; - - mc = (struct microcode_amd *)__pa_nodebug(amd_ucode_patch); - if (mc->hdr.patch_id && mc->hdr.processor_rev_id) { - __apply_microcode_amd(mc); - return; - } - - ucode = (void *)__pa_nodebug(&container); - usize = (size_t *)__pa_nodebug(&container_size); - - if (!*ucode || !*usize) - return; - - apply_ucode_in_initrd(*ucode, *usize, false); -} - -static void __init collect_cpu_sig_on_bsp(void *arg) -{ - unsigned int cpu = smp_processor_id(); - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - - uci->cpu_sig.sig = cpuid_eax(0x00000001); -} - -static void __init get_bsp_sig(void) -{ - unsigned int bsp = boot_cpu_data.cpu_index; - struct ucode_cpu_info *uci = ucode_cpu_info + bsp; - - if (!uci->cpu_sig.sig) - smp_call_function_single(bsp, collect_cpu_sig_on_bsp, NULL, 1); -} -#else -void load_ucode_amd_ap(void) -{ - unsigned int cpu = smp_processor_id(); - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - struct equiv_cpu_entry *eq; - struct microcode_amd *mc; - u32 rev, eax; - u16 eq_id; - - /* Exit if called on the BSP. */ - if (!cpu) - return; - - if (!container) - return; - - rdmsr(MSR_AMD64_PATCH_LEVEL, rev, eax); - - uci->cpu_sig.rev = rev; - uci->cpu_sig.sig = eax; - - eax = cpuid_eax(0x00000001); - eq = (struct equiv_cpu_entry *)(container + CONTAINER_HDR_SZ); - - eq_id = find_equiv_id(eq, eax); - if (!eq_id) - return; - - if (eq_id == this_equiv_id) { - mc = (struct microcode_amd *)amd_ucode_patch; - - if (mc && rev < mc->hdr.patch_id) { - if (!__apply_microcode_amd(mc)) - ucode_new_rev = mc->hdr.patch_id; - } - - } else { - if (!ucode_cpio.data) - return; - - /* - * AP has a different equivalence ID than BSP, looks like - * mixed-steppings silicon so go through the ucode blob anew. - */ - apply_ucode_in_initrd(ucode_cpio.data, ucode_cpio.size, false); - } -} -#endif - -int __init save_microcode_in_initrd_amd(void) -{ - unsigned long cont; - int retval = 0; - enum ucode_state ret; - u8 *cont_va; - u32 eax; - - if (!container) - return -EINVAL; - -#ifdef CONFIG_X86_32 - get_bsp_sig(); - cont = (unsigned long)container; - cont_va = __va(container); -#else - /* - * We need the physical address of the container for both bitness since - * boot_params.hdr.ramdisk_image is a physical address. - */ - cont = __pa(container); - cont_va = container; -#endif - - /* - * Take into account the fact that the ramdisk might get relocated and - * therefore we need to recompute the container's position in virtual - * memory space. - */ - if (relocated_ramdisk) - container = (u8 *)(__va(relocated_ramdisk) + - (cont - boot_params.hdr.ramdisk_image)); - else - container = cont_va; - - if (ucode_new_rev) - pr_info("microcode: updated early to new patch_level=0x%08x\n", - ucode_new_rev); - - eax = cpuid_eax(0x00000001); - eax = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff); - - ret = load_microcode_amd(smp_processor_id(), eax, container, container_size); - if (ret != UCODE_OK) - retval = -EINVAL; - - /* - * This will be freed any msec now, stash patches for the current - * family and switch to patch cache for cpu hotplug, etc later. - */ - container = NULL; - container_size = 0; - - return retval; -} - -void reload_ucode_amd(void) -{ - struct microcode_amd *mc; - u32 rev, eax; - - rdmsr(MSR_AMD64_PATCH_LEVEL, rev, eax); - - mc = (struct microcode_amd *)amd_ucode_patch; - - if (mc && rev < mc->hdr.patch_id) { - if (!__apply_microcode_amd(mc)) { - ucode_new_rev = mc->hdr.patch_id; - pr_info("microcode: reload patch_level=0x%08x\n", - ucode_new_rev); - } - } -} diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 9e3f3c7dd5d7..7fc27f1cca58 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -5,6 +5,12 @@ * 2006 Shaohua Li <shaohua.li@intel.com> * 2013-2015 Borislav Petkov <bp@alien8.de> * + * X86 CPU microcode early update for Linux: + * + * Copyright (C) 2012 Fenghua Yu <fenghua.yu@intel.com> + * H Peter Anvin" <hpa@zytor.com> + * (C) 2015 Borislav Petkov <bp@alien8.de> + * * This driver allows to upgrade microcode on x86 processors. * * This program is free software; you can redistribute it and/or @@ -13,34 +19,39 @@ * 2 of the License, or (at your option) any later version. */ -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#define pr_fmt(fmt) "microcode: " fmt #include <linux/platform_device.h> +#include <linux/syscore_ops.h> #include <linux/miscdevice.h> #include <linux/capability.h> +#include <linux/firmware.h> #include <linux/kernel.h> -#include <linux/module.h> #include <linux/mutex.h> #include <linux/cpu.h> #include <linux/fs.h> #include <linux/mm.h> -#include <linux/syscore_ops.h> -#include <asm/microcode.h> -#include <asm/processor.h> +#include <asm/microcode_intel.h> #include <asm/cpu_device_id.h> +#include <asm/microcode_amd.h> #include <asm/perf_event.h> +#include <asm/microcode.h> +#include <asm/processor.h> +#include <asm/cmdline.h> -MODULE_DESCRIPTION("Microcode Update Driver"); -MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>"); -MODULE_LICENSE("GPL"); - -#define MICROCODE_VERSION "2.00" +#define MICROCODE_VERSION "2.01" static struct microcode_ops *microcode_ops; -bool dis_ucode_ldr; -module_param(dis_ucode_ldr, bool, 0); +static bool dis_ucode_ldr; + +static int __init disable_loader(char *str) +{ + dis_ucode_ldr = true; + return 1; +} +__setup("dis_ucode_ldr", disable_loader); /* * Synchronization. @@ -68,6 +79,150 @@ struct cpu_info_ctx { int err; }; +static bool __init check_loader_disabled_bsp(void) +{ +#ifdef CONFIG_X86_32 + const char *cmdline = (const char *)__pa_nodebug(boot_command_line); + const char *opt = "dis_ucode_ldr"; + const char *option = (const char *)__pa_nodebug(opt); + bool *res = (bool *)__pa_nodebug(&dis_ucode_ldr); + +#else /* CONFIG_X86_64 */ + const char *cmdline = boot_command_line; + const char *option = "dis_ucode_ldr"; + bool *res = &dis_ucode_ldr; +#endif + + if (cmdline_find_option_bool(cmdline, option)) + *res = true; + + return *res; +} + +extern struct builtin_fw __start_builtin_fw[]; +extern struct builtin_fw __end_builtin_fw[]; + +bool get_builtin_firmware(struct cpio_data *cd, const char *name) +{ +#ifdef CONFIG_FW_LOADER + struct builtin_fw *b_fw; + + for (b_fw = __start_builtin_fw; b_fw != __end_builtin_fw; b_fw++) { + if (!strcmp(name, b_fw->name)) { + cd->size = b_fw->size; + cd->data = b_fw->data; + return true; + } + } +#endif + return false; +} + +void __init load_ucode_bsp(void) +{ + int vendor; + unsigned int family; + + if (check_loader_disabled_bsp()) + return; + + if (!have_cpuid_p()) + return; + + vendor = x86_vendor(); + family = x86_family(); + + switch (vendor) { + case X86_VENDOR_INTEL: + if (family >= 6) + load_ucode_intel_bsp(); + break; + case X86_VENDOR_AMD: + if (family >= 0x10) + load_ucode_amd_bsp(family); + break; + default: + break; + } +} + +static bool check_loader_disabled_ap(void) +{ +#ifdef CONFIG_X86_32 + return *((bool *)__pa_nodebug(&dis_ucode_ldr)); +#else + return dis_ucode_ldr; +#endif +} + +void load_ucode_ap(void) +{ + int vendor, family; + + if (check_loader_disabled_ap()) + return; + + if (!have_cpuid_p()) + return; + + vendor = x86_vendor(); + family = x86_family(); + + switch (vendor) { + case X86_VENDOR_INTEL: + if (family >= 6) + load_ucode_intel_ap(); + break; + case X86_VENDOR_AMD: + if (family >= 0x10) + load_ucode_amd_ap(); + break; + default: + break; + } +} + +int __init save_microcode_in_initrd(void) +{ + struct cpuinfo_x86 *c = &boot_cpu_data; + + switch (c->x86_vendor) { + case X86_VENDOR_INTEL: + if (c->x86 >= 6) + save_microcode_in_initrd_intel(); + break; + case X86_VENDOR_AMD: + if (c->x86 >= 0x10) + save_microcode_in_initrd_amd(); + break; + default: + break; + } + + return 0; +} + +void reload_early_microcode(void) +{ + int vendor, family; + + vendor = x86_vendor(); + family = x86_family(); + + switch (vendor) { + case X86_VENDOR_INTEL: + if (family >= 6) + reload_ucode_intel(); + break; + case X86_VENDOR_AMD: + if (family >= 0x10) + reload_ucode_amd(); + break; + default: + break; + } +} + static void collect_cpu_info_local(void *arg) { struct cpu_info_ctx *ctx = arg; @@ -210,9 +365,6 @@ static void __exit microcode_dev_exit(void) { misc_deregister(µcode_dev); } - -MODULE_ALIAS_MISCDEV(MICROCODE_MINOR); -MODULE_ALIAS("devname:cpu/microcode"); #else #define microcode_dev_init() 0 #define microcode_dev_exit() do { } while (0) @@ -463,20 +615,6 @@ static struct notifier_block mc_cpu_notifier = { .notifier_call = mc_cpu_callback, }; -#ifdef MODULE -/* Autoload on Intel and AMD systems */ -static const struct x86_cpu_id __initconst microcode_id[] = { -#ifdef CONFIG_MICROCODE_INTEL - { X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, }, -#endif -#ifdef CONFIG_MICROCODE_AMD - { X86_VENDOR_AMD, X86_FAMILY_ANY, X86_MODEL_ANY, }, -#endif - {} -}; -MODULE_DEVICE_TABLE(x86cpu, microcode_id); -#endif - static struct attribute *cpu_root_microcode_attrs[] = { &dev_attr_reload.attr, NULL @@ -487,9 +625,9 @@ static struct attribute_group cpu_root_microcode_group = { .attrs = cpu_root_microcode_attrs, }; -static int __init microcode_init(void) +int __init microcode_init(void) { - struct cpuinfo_x86 *c = &cpu_data(0); + struct cpuinfo_x86 *c = &boot_cpu_data; int error; if (paravirt_enabled() || dis_ucode_ldr) @@ -560,35 +698,3 @@ static int __init microcode_init(void) return error; } -module_init(microcode_init); - -static void __exit microcode_exit(void) -{ - struct cpuinfo_x86 *c = &cpu_data(0); - - microcode_dev_exit(); - - unregister_hotcpu_notifier(&mc_cpu_notifier); - unregister_syscore_ops(&mc_syscore_ops); - - sysfs_remove_group(&cpu_subsys.dev_root->kobj, - &cpu_root_microcode_group); - - get_online_cpus(); - mutex_lock(µcode_mutex); - - subsys_interface_unregister(&mc_cpu_interface); - - mutex_unlock(µcode_mutex); - put_online_cpus(); - - platform_device_unregister(microcode_pdev); - - microcode_ops = NULL; - - if (c->x86_vendor == X86_VENDOR_AMD) - exit_amd_microcode(); - - pr_info("Microcode Update Driver: v" MICROCODE_VERSION " removed.\n"); -} -module_exit(microcode_exit); diff --git a/arch/x86/kernel/cpu/microcode/core_early.c b/arch/x86/kernel/cpu/microcode/core_early.c deleted file mode 100644 index 8ebc421d6299..000000000000 --- a/arch/x86/kernel/cpu/microcode/core_early.c +++ /dev/null @@ -1,170 +0,0 @@ -/* - * X86 CPU microcode early update for Linux - * - * Copyright (C) 2012 Fenghua Yu <fenghua.yu@intel.com> - * H Peter Anvin" <hpa@zytor.com> - * (C) 2015 Borislav Petkov <bp@alien8.de> - * - * This driver allows to early upgrade microcode on Intel processors - * belonging to IA-32 family - PentiumPro, Pentium II, - * Pentium III, Xeon, Pentium 4, etc. - * - * Reference: Section 9.11 of Volume 3, IA-32 Intel Architecture - * Software Developer's Manual. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ -#include <linux/module.h> -#include <linux/firmware.h> -#include <asm/microcode.h> -#include <asm/microcode_intel.h> -#include <asm/microcode_amd.h> -#include <asm/processor.h> -#include <asm/cmdline.h> - -static bool __init check_loader_disabled_bsp(void) -{ -#ifdef CONFIG_X86_32 - const char *cmdline = (const char *)__pa_nodebug(boot_command_line); - const char *opt = "dis_ucode_ldr"; - const char *option = (const char *)__pa_nodebug(opt); - bool *res = (bool *)__pa_nodebug(&dis_ucode_ldr); - -#else /* CONFIG_X86_64 */ - const char *cmdline = boot_command_line; - const char *option = "dis_ucode_ldr"; - bool *res = &dis_ucode_ldr; -#endif - - if (cmdline_find_option_bool(cmdline, option)) - *res = true; - - return *res; -} - -extern struct builtin_fw __start_builtin_fw[]; -extern struct builtin_fw __end_builtin_fw[]; - -bool get_builtin_firmware(struct cpio_data *cd, const char *name) -{ -#ifdef CONFIG_FW_LOADER - struct builtin_fw *b_fw; - - for (b_fw = __start_builtin_fw; b_fw != __end_builtin_fw; b_fw++) { - if (!strcmp(name, b_fw->name)) { - cd->size = b_fw->size; - cd->data = b_fw->data; - return true; - } - } -#endif - return false; -} - -void __init load_ucode_bsp(void) -{ - int vendor; - unsigned int family; - - if (check_loader_disabled_bsp()) - return; - - if (!have_cpuid_p()) - return; - - vendor = x86_vendor(); - family = x86_family(); - - switch (vendor) { - case X86_VENDOR_INTEL: - if (family >= 6) - load_ucode_intel_bsp(); - break; - case X86_VENDOR_AMD: - if (family >= 0x10) - load_ucode_amd_bsp(family); - break; - default: - break; - } -} - -static bool check_loader_disabled_ap(void) -{ -#ifdef CONFIG_X86_32 - return *((bool *)__pa_nodebug(&dis_ucode_ldr)); -#else - return dis_ucode_ldr; -#endif -} - -void load_ucode_ap(void) -{ - int vendor, family; - - if (check_loader_disabled_ap()) - return; - - if (!have_cpuid_p()) - return; - - vendor = x86_vendor(); - family = x86_family(); - - switch (vendor) { - case X86_VENDOR_INTEL: - if (family >= 6) - load_ucode_intel_ap(); - break; - case X86_VENDOR_AMD: - if (family >= 0x10) - load_ucode_amd_ap(); - break; - default: - break; - } -} - -int __init save_microcode_in_initrd(void) -{ - struct cpuinfo_x86 *c = &boot_cpu_data; - - switch (c->x86_vendor) { - case X86_VENDOR_INTEL: - if (c->x86 >= 6) - save_microcode_in_initrd_intel(); - break; - case X86_VENDOR_AMD: - if (c->x86 >= 0x10) - save_microcode_in_initrd_amd(); - break; - default: - break; - } - - return 0; -} - -void reload_early_microcode(void) -{ - int vendor, family; - - vendor = x86_vendor(); - family = x86_family(); - - switch (vendor) { - case X86_VENDOR_INTEL: - if (family >= 6) - reload_ucode_intel(); - break; - case X86_VENDOR_AMD: - if (family >= 0x10) - reload_ucode_amd(); - break; - default: - break; - } -} diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index 969dc17eb1b4..ce47402eb2f9 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -4,27 +4,804 @@ * Copyright (C) 2000-2006 Tigran Aivazian <tigran@aivazian.fsnet.co.uk> * 2006 Shaohua Li <shaohua.li@intel.com> * + * Intel CPU microcode early update for Linux + * + * Copyright (C) 2012 Fenghua Yu <fenghua.yu@intel.com> + * H Peter Anvin" <hpa@zytor.com> + * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +/* + * This needs to be before all headers so that pr_debug in printk.h doesn't turn + * printk calls into no_printk(). + * + *#define DEBUG + */ +#define pr_fmt(fmt) "microcode: " fmt +#include <linux/earlycpio.h> #include <linux/firmware.h> #include <linux/uaccess.h> -#include <linux/kernel.h> -#include <linux/module.h> #include <linux/vmalloc.h> +#include <linux/initrd.h> +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/cpu.h> +#include <linux/mm.h> #include <asm/microcode_intel.h> #include <asm/processor.h> +#include <asm/tlbflush.h> +#include <asm/setup.h> #include <asm/msr.h> -MODULE_DESCRIPTION("Microcode Update Driver"); -MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>"); -MODULE_LICENSE("GPL"); +static unsigned long mc_saved_in_initrd[MAX_UCODE_COUNT]; +static struct mc_saved_data { + unsigned int mc_saved_count; + struct microcode_intel **mc_saved; +} mc_saved_data; + +static enum ucode_state +load_microcode_early(struct microcode_intel **saved, + unsigned int num_saved, struct ucode_cpu_info *uci) +{ + struct microcode_intel *ucode_ptr, *new_mc = NULL; + struct microcode_header_intel *mc_hdr; + int new_rev, ret, i; + + new_rev = uci->cpu_sig.rev; + + for (i = 0; i < num_saved; i++) { + ucode_ptr = saved[i]; + mc_hdr = (struct microcode_header_intel *)ucode_ptr; + + ret = has_newer_microcode(ucode_ptr, + uci->cpu_sig.sig, + uci->cpu_sig.pf, + new_rev); + if (!ret) + continue; + + new_rev = mc_hdr->rev; + new_mc = ucode_ptr; + } + + if (!new_mc) + return UCODE_NFOUND; + + uci->mc = (struct microcode_intel *)new_mc; + return UCODE_OK; +} + +static inline void +copy_initrd_ptrs(struct microcode_intel **mc_saved, unsigned long *initrd, + unsigned long off, int num_saved) +{ + int i; + + for (i = 0; i < num_saved; i++) + mc_saved[i] = (struct microcode_intel *)(initrd[i] + off); +} + +#ifdef CONFIG_X86_32 +static void +microcode_phys(struct microcode_intel **mc_saved_tmp, + struct mc_saved_data *mc_saved_data) +{ + int i; + struct microcode_intel ***mc_saved; + + mc_saved = (struct microcode_intel ***) + __pa_nodebug(&mc_saved_data->mc_saved); + for (i = 0; i < mc_saved_data->mc_saved_count; i++) { + struct microcode_intel *p; + + p = *(struct microcode_intel **) + __pa_nodebug(mc_saved_data->mc_saved + i); + mc_saved_tmp[i] = (struct microcode_intel *)__pa_nodebug(p); + } +} +#endif + +static enum ucode_state +load_microcode(struct mc_saved_data *mc_saved_data, unsigned long *initrd, + unsigned long initrd_start, struct ucode_cpu_info *uci) +{ + struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT]; + unsigned int count = mc_saved_data->mc_saved_count; + + if (!mc_saved_data->mc_saved) { + copy_initrd_ptrs(mc_saved_tmp, initrd, initrd_start, count); + + return load_microcode_early(mc_saved_tmp, count, uci); + } else { +#ifdef CONFIG_X86_32 + microcode_phys(mc_saved_tmp, mc_saved_data); + return load_microcode_early(mc_saved_tmp, count, uci); +#else + return load_microcode_early(mc_saved_data->mc_saved, + count, uci); +#endif + } +} + +/* + * Given CPU signature and a microcode patch, this function finds if the + * microcode patch has matching family and model with the CPU. + */ +static enum ucode_state +matching_model_microcode(struct microcode_header_intel *mc_header, + unsigned long sig) +{ + unsigned int fam, model; + unsigned int fam_ucode, model_ucode; + struct extended_sigtable *ext_header; + unsigned long total_size = get_totalsize(mc_header); + unsigned long data_size = get_datasize(mc_header); + int ext_sigcount, i; + struct extended_signature *ext_sig; + + fam = __x86_family(sig); + model = x86_model(sig); + + fam_ucode = __x86_family(mc_header->sig); + model_ucode = x86_model(mc_header->sig); + + if (fam == fam_ucode && model == model_ucode) + return UCODE_OK; + + /* Look for ext. headers: */ + if (total_size <= data_size + MC_HEADER_SIZE) + return UCODE_NFOUND; + + ext_header = (void *) mc_header + data_size + MC_HEADER_SIZE; + ext_sig = (void *)ext_header + EXT_HEADER_SIZE; + ext_sigcount = ext_header->count; + + for (i = 0; i < ext_sigcount; i++) { + fam_ucode = __x86_family(ext_sig->sig); + model_ucode = x86_model(ext_sig->sig); + + if (fam == fam_ucode && model == model_ucode) + return UCODE_OK; + + ext_sig++; + } + return UCODE_NFOUND; +} + +static int +save_microcode(struct mc_saved_data *mc_saved_data, + struct microcode_intel **mc_saved_src, + unsigned int mc_saved_count) +{ + int i, j; + struct microcode_intel **saved_ptr; + int ret; + + if (!mc_saved_count) + return -EINVAL; + + /* + * Copy new microcode data. + */ + saved_ptr = kcalloc(mc_saved_count, sizeof(struct microcode_intel *), GFP_KERNEL); + if (!saved_ptr) + return -ENOMEM; + + for (i = 0; i < mc_saved_count; i++) { + struct microcode_header_intel *mc_hdr; + struct microcode_intel *mc; + unsigned long size; + + if (!mc_saved_src[i]) { + ret = -EINVAL; + goto err; + } + + mc = mc_saved_src[i]; + mc_hdr = &mc->hdr; + size = get_totalsize(mc_hdr); + + saved_ptr[i] = kmalloc(size, GFP_KERNEL); + if (!saved_ptr[i]) { + ret = -ENOMEM; + goto err; + } + + memcpy(saved_ptr[i], mc, size); + } + + /* + * Point to newly saved microcode. + */ + mc_saved_data->mc_saved = saved_ptr; + mc_saved_data->mc_saved_count = mc_saved_count; + + return 0; + +err: + for (j = 0; j <= i; j++) + kfree(saved_ptr[j]); + kfree(saved_ptr); + + return ret; +} + +/* + * A microcode patch in ucode_ptr is saved into mc_saved + * - if it has matching signature and newer revision compared to an existing + * patch mc_saved. + * - or if it is a newly discovered microcode patch. + * + * The microcode patch should have matching model with CPU. + * + * Returns: The updated number @num_saved of saved microcode patches. + */ +static unsigned int _save_mc(struct microcode_intel **mc_saved, + u8 *ucode_ptr, unsigned int num_saved) +{ + struct microcode_header_intel *mc_hdr, *mc_saved_hdr; + unsigned int sig, pf; + int found = 0, i; + + mc_hdr = (struct microcode_header_intel *)ucode_ptr; + + for (i = 0; i < num_saved; i++) { + mc_saved_hdr = (struct microcode_header_intel *)mc_saved[i]; + sig = mc_saved_hdr->sig; + pf = mc_saved_hdr->pf; + + if (!find_matching_signature(ucode_ptr, sig, pf)) + continue; + + found = 1; + + if (mc_hdr->rev <= mc_saved_hdr->rev) + continue; + + /* + * Found an older ucode saved earlier. Replace it with + * this newer one. + */ + mc_saved[i] = (struct microcode_intel *)ucode_ptr; + break; + } + + /* Newly detected microcode, save it to memory. */ + if (i >= num_saved && !found) + mc_saved[num_saved++] = (struct microcode_intel *)ucode_ptr; + + return num_saved; +} + +/* + * Get microcode matching with BSP's model. Only CPUs with the same model as + * BSP can stay in the platform. + */ +static enum ucode_state __init +get_matching_model_microcode(int cpu, unsigned long start, + void *data, size_t size, + struct mc_saved_data *mc_saved_data, + unsigned long *mc_saved_in_initrd, + struct ucode_cpu_info *uci) +{ + u8 *ucode_ptr = data; + unsigned int leftover = size; + enum ucode_state state = UCODE_OK; + unsigned int mc_size; + struct microcode_header_intel *mc_header; + struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT]; + unsigned int mc_saved_count = mc_saved_data->mc_saved_count; + int i; + + while (leftover && mc_saved_count < ARRAY_SIZE(mc_saved_tmp)) { + + if (leftover < sizeof(mc_header)) + break; + + mc_header = (struct microcode_header_intel *)ucode_ptr; + + mc_size = get_totalsize(mc_header); + if (!mc_size || mc_size > leftover || + microcode_sanity_check(ucode_ptr, 0) < 0) + break; + + leftover -= mc_size; + + /* + * Since APs with same family and model as the BSP may boot in + * the platform, we need to find and save microcode patches + * with the same family and model as the BSP. + */ + if (matching_model_microcode(mc_header, uci->cpu_sig.sig) != + UCODE_OK) { + ucode_ptr += mc_size; + continue; + } + + mc_saved_count = _save_mc(mc_saved_tmp, ucode_ptr, mc_saved_count); + + ucode_ptr += mc_size; + } + + if (leftover) { + state = UCODE_ERROR; + goto out; + } + + if (mc_saved_count == 0) { + state = UCODE_NFOUND; + goto out; + } + + for (i = 0; i < mc_saved_count; i++) + mc_saved_in_initrd[i] = (unsigned long)mc_saved_tmp[i] - start; + + mc_saved_data->mc_saved_count = mc_saved_count; +out: + return state; +} + +static int collect_cpu_info_early(struct ucode_cpu_info *uci) +{ + unsigned int val[2]; + unsigned int family, model; + struct cpu_signature csig; + unsigned int eax, ebx, ecx, edx; + + csig.sig = 0; + csig.pf = 0; + csig.rev = 0; + + memset(uci, 0, sizeof(*uci)); + + eax = 0x00000001; + ecx = 0; + native_cpuid(&eax, &ebx, &ecx, &edx); + csig.sig = eax; + + family = __x86_family(csig.sig); + model = x86_model(csig.sig); + + if ((model >= 5) || (family > 6)) { + /* get processor flags from MSR 0x17 */ + native_rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]); + csig.pf = 1 << ((val[1] >> 18) & 7); + } + native_wrmsr(MSR_IA32_UCODE_REV, 0, 0); + + /* As documented in the SDM: Do a CPUID 1 here */ + sync_core(); + + /* get the current revision from MSR 0x8B */ + native_rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); + + csig.rev = val[1]; + + uci->cpu_sig = csig; + uci->valid = 1; + + return 0; +} + +static void show_saved_mc(void) +{ +#ifdef DEBUG + int i, j; + unsigned int sig, pf, rev, total_size, data_size, date; + struct ucode_cpu_info uci; + + if (mc_saved_data.mc_saved_count == 0) { + pr_debug("no microcode data saved.\n"); + return; + } + pr_debug("Total microcode saved: %d\n", mc_saved_data.mc_saved_count); + + collect_cpu_info_early(&uci); + + sig = uci.cpu_sig.sig; + pf = uci.cpu_sig.pf; + rev = uci.cpu_sig.rev; + pr_debug("CPU: sig=0x%x, pf=0x%x, rev=0x%x\n", sig, pf, rev); + + for (i = 0; i < mc_saved_data.mc_saved_count; i++) { + struct microcode_header_intel *mc_saved_header; + struct extended_sigtable *ext_header; + int ext_sigcount; + struct extended_signature *ext_sig; + + mc_saved_header = (struct microcode_header_intel *) + mc_saved_data.mc_saved[i]; + sig = mc_saved_header->sig; + pf = mc_saved_header->pf; + rev = mc_saved_header->rev; + total_size = get_totalsize(mc_saved_header); + data_size = get_datasize(mc_saved_header); + date = mc_saved_header->date; + + pr_debug("mc_saved[%d]: sig=0x%x, pf=0x%x, rev=0x%x, toal size=0x%x, date = %04x-%02x-%02x\n", + i, sig, pf, rev, total_size, + date & 0xffff, + date >> 24, + (date >> 16) & 0xff); + + /* Look for ext. headers: */ + if (total_size <= data_size + MC_HEADER_SIZE) + continue; + + ext_header = (void *) mc_saved_header + data_size + MC_HEADER_SIZE; + ext_sigcount = ext_header->count; + ext_sig = (void *)ext_header + EXT_HEADER_SIZE; + + for (j = 0; j < ext_sigcount; j++) { + sig = ext_sig->sig; + pf = ext_sig->pf; + + pr_debug("\tExtended[%d]: sig=0x%x, pf=0x%x\n", + j, sig, pf); + + ext_sig++; + } + + } +#endif +} + +#ifdef CONFIG_HOTPLUG_CPU +static DEFINE_MUTEX(x86_cpu_microcode_mutex); +/* + * Save this mc into mc_saved_data. So it will be loaded early when a CPU is + * hot added or resumes. + * + * Please make sure this mc should be a valid microcode patch before calling + * this function. + */ +int save_mc_for_early(u8 *mc) +{ + struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT]; + unsigned int mc_saved_count_init; + unsigned int mc_saved_count; + struct microcode_intel **mc_saved; + int ret = 0; + int i; + + /* + * Hold hotplug lock so mc_saved_data is not accessed by a CPU in + * hotplug. + */ + mutex_lock(&x86_cpu_microcode_mutex); + + mc_saved_count_init = mc_saved_data.mc_saved_count; + mc_saved_count = mc_saved_data.mc_saved_count; + mc_saved = mc_saved_data.mc_saved; + + if (mc_saved && mc_saved_count) + memcpy(mc_saved_tmp, mc_saved, + mc_saved_count * sizeof(struct microcode_intel *)); + /* + * Save the microcode patch mc in mc_save_tmp structure if it's a newer + * version. + */ + mc_saved_count = _save_mc(mc_saved_tmp, mc, mc_saved_count); + + /* + * Save the mc_save_tmp in global mc_saved_data. + */ + ret = save_microcode(&mc_saved_data, mc_saved_tmp, mc_saved_count); + if (ret) { + pr_err("Cannot save microcode patch.\n"); + goto out; + } + + show_saved_mc(); + + /* + * Free old saved microcode data. + */ + if (mc_saved) { + for (i = 0; i < mc_saved_count_init; i++) + kfree(mc_saved[i]); + kfree(mc_saved); + } + +out: + mutex_unlock(&x86_cpu_microcode_mutex); + + return ret; +} +EXPORT_SYMBOL_GPL(save_mc_for_early); +#endif + +static bool __init load_builtin_intel_microcode(struct cpio_data *cp) +{ +#ifdef CONFIG_X86_64 + unsigned int eax = 0x00000001, ebx, ecx = 0, edx; + unsigned int family, model, stepping; + char name[30]; + + native_cpuid(&eax, &ebx, &ecx, &edx); + + family = __x86_family(eax); + model = x86_model(eax); + stepping = eax & 0xf; + + sprintf(name, "intel-ucode/%02x-%02x-%02x", family, model, stepping); + + return get_builtin_firmware(cp, name); +#else + return false; +#endif +} + +static __initdata char ucode_name[] = "kernel/x86/microcode/GenuineIntel.bin"; +static __init enum ucode_state +scan_microcode(struct mc_saved_data *mc_saved_data, unsigned long *initrd, + unsigned long start, unsigned long size, + struct ucode_cpu_info *uci) +{ + struct cpio_data cd; + long offset = 0; +#ifdef CONFIG_X86_32 + char *p = (char *)__pa_nodebug(ucode_name); +#else + char *p = ucode_name; +#endif + + cd.data = NULL; + cd.size = 0; + + cd = find_cpio_data(p, (void *)start, size, &offset); + if (!cd.data) { + if (!load_builtin_intel_microcode(&cd)) + return UCODE_ERROR; + } + + return get_matching_model_microcode(0, start, cd.data, cd.size, + mc_saved_data, initrd, uci); +} + +/* + * Print ucode update info. + */ +static void +print_ucode_info(struct ucode_cpu_info *uci, unsigned int date) +{ + int cpu = smp_processor_id(); + + pr_info("CPU%d microcode updated early to revision 0x%x, date = %04x-%02x-%02x\n", + cpu, + uci->cpu_sig.rev, + date & 0xffff, + date >> 24, + (date >> 16) & 0xff); +} + +#ifdef CONFIG_X86_32 + +static int delay_ucode_info; +static int current_mc_date; + +/* + * Print early updated ucode info after printk works. This is delayed info dump. + */ +void show_ucode_info_early(void) +{ + struct ucode_cpu_info uci; + + if (delay_ucode_info) { + collect_cpu_info_early(&uci); + print_ucode_info(&uci, current_mc_date); + delay_ucode_info = 0; + } +} + +/* + * At this point, we can not call printk() yet. Keep microcode patch number in + * mc_saved_data.mc_saved and delay printing microcode info in + * show_ucode_info_early() until printk() works. + */ +static void print_ucode(struct ucode_cpu_info *uci) +{ + struct microcode_intel *mc_intel; + int *delay_ucode_info_p; + int *current_mc_date_p; + + mc_intel = uci->mc; + if (mc_intel == NULL) + return; + + delay_ucode_info_p = (int *)__pa_nodebug(&delay_ucode_info); + current_mc_date_p = (int *)__pa_nodebug(¤t_mc_date); + + *delay_ucode_info_p = 1; + *current_mc_date_p = mc_intel->hdr.date; +} +#else + +/* + * Flush global tlb. We only do this in x86_64 where paging has been enabled + * already and PGE should be enabled as well. + */ +static inline void flush_tlb_early(void) +{ + __native_flush_tlb_global_irq_disabled(); +} + +static inline void print_ucode(struct ucode_cpu_info *uci) +{ + struct microcode_intel *mc_intel; + + mc_intel = uci->mc; + if (mc_intel == NULL) + return; + + print_ucode_info(uci, mc_intel->hdr.date); +} +#endif + +static int apply_microcode_early(struct ucode_cpu_info *uci, bool early) +{ + struct microcode_intel *mc_intel; + unsigned int val[2]; + + mc_intel = uci->mc; + if (mc_intel == NULL) + return 0; + + /* write microcode via MSR 0x79 */ + native_wrmsr(MSR_IA32_UCODE_WRITE, + (unsigned long) mc_intel->bits, + (unsigned long) mc_intel->bits >> 16 >> 16); + native_wrmsr(MSR_IA32_UCODE_REV, 0, 0); + + /* As documented in the SDM: Do a CPUID 1 here */ + sync_core(); + + /* get the current revision from MSR 0x8B */ + native_rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); + if (val[1] != mc_intel->hdr.rev) + return -1; + +#ifdef CONFIG_X86_64 + /* Flush global tlb. This is precaution. */ + flush_tlb_early(); +#endif + uci->cpu_sig.rev = val[1]; + + if (early) + print_ucode(uci); + else + print_ucode_info(uci, mc_intel->hdr.date); + + return 0; +} + +/* + * This function converts microcode patch offsets previously stored in + * mc_saved_in_initrd to pointers and stores the pointers in mc_saved_data. + */ +int __init save_microcode_in_initrd_intel(void) +{ + unsigned int count = mc_saved_data.mc_saved_count; + struct microcode_intel *mc_saved[MAX_UCODE_COUNT]; + int ret = 0; + + if (count == 0) + return ret; + + copy_initrd_ptrs(mc_saved, mc_saved_in_initrd, initrd_start, count); + ret = save_microcode(&mc_saved_data, mc_saved, count); + if (ret) + pr_err("Cannot save microcode patches from initrd.\n"); + + show_saved_mc(); + + return ret; +} + +static void __init +_load_ucode_intel_bsp(struct mc_saved_data *mc_saved_data, + unsigned long *initrd, + unsigned long start, unsigned long size) +{ + struct ucode_cpu_info uci; + enum ucode_state ret; + + collect_cpu_info_early(&uci); + + ret = scan_microcode(mc_saved_data, initrd, start, size, &uci); + if (ret != UCODE_OK) + return; + + ret = load_microcode(mc_saved_data, initrd, start, &uci); + if (ret != UCODE_OK) + return; + + apply_microcode_early(&uci, true); +} + +void __init load_ucode_intel_bsp(void) +{ + u64 start, size; +#ifdef CONFIG_X86_32 + struct boot_params *p; + + p = (struct boot_params *)__pa_nodebug(&boot_params); + start = p->hdr.ramdisk_image; + size = p->hdr.ramdisk_size; + + _load_ucode_intel_bsp( + (struct mc_saved_data *)__pa_nodebug(&mc_saved_data), + (unsigned long *)__pa_nodebug(&mc_saved_in_initrd), + start, size); +#else + start = boot_params.hdr.ramdisk_image + PAGE_OFFSET; + size = boot_params.hdr.ramdisk_size; + + _load_ucode_intel_bsp(&mc_saved_data, mc_saved_in_initrd, start, size); +#endif +} + +void load_ucode_intel_ap(void) +{ + struct mc_saved_data *mc_saved_data_p; + struct ucode_cpu_info uci; + unsigned long *mc_saved_in_initrd_p; + unsigned long initrd_start_addr; + enum ucode_state ret; +#ifdef CONFIG_X86_32 + unsigned long *initrd_start_p; + + mc_saved_in_initrd_p = + (unsigned long *)__pa_nodebug(mc_saved_in_initrd); + mc_saved_data_p = (struct mc_saved_data *)__pa_nodebug(&mc_saved_data); + initrd_start_p = (unsigned long *)__pa_nodebug(&initrd_start); + initrd_start_addr = (unsigned long)__pa_nodebug(*initrd_start_p); +#else + mc_saved_data_p = &mc_saved_data; + mc_saved_in_initrd_p = mc_saved_in_initrd; + initrd_start_addr = initrd_start; +#endif + + /* + * If there is no valid ucode previously saved in memory, no need to + * update ucode on this AP. + */ + if (mc_saved_data_p->mc_saved_count == 0) + return; + + collect_cpu_info_early(&uci); + ret = load_microcode(mc_saved_data_p, mc_saved_in_initrd_p, + initrd_start_addr, &uci); + + if (ret != UCODE_OK) + return; + + apply_microcode_early(&uci, true); +} + +void reload_ucode_intel(void) +{ + struct ucode_cpu_info uci; + enum ucode_state ret; + + if (!mc_saved_data.mc_saved_count) + return; + + collect_cpu_info_early(&uci); + + ret = load_microcode_early(mc_saved_data.mc_saved, + mc_saved_data.mc_saved_count, &uci); + if (ret != UCODE_OK) + return; + + apply_microcode_early(&uci, false); +} static int collect_cpu_info(int cpu_num, struct cpu_signature *csig) { @@ -264,7 +1041,7 @@ static struct microcode_ops microcode_intel_ops = { struct microcode_ops * __init init_intel_microcode(void) { - struct cpuinfo_x86 *c = &cpu_data(0); + struct cpuinfo_x86 *c = &boot_cpu_data; if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 || cpu_has(c, X86_FEATURE_IA64)) { diff --git a/arch/x86/kernel/cpu/microcode/intel_early.c b/arch/x86/kernel/cpu/microcode/intel_early.c deleted file mode 100644 index 37ea89c11520..000000000000 --- a/arch/x86/kernel/cpu/microcode/intel_early.c +++ /dev/null @@ -1,808 +0,0 @@ -/* - * Intel CPU microcode early update for Linux - * - * Copyright (C) 2012 Fenghua Yu <fenghua.yu@intel.com> - * H Peter Anvin" <hpa@zytor.com> - * - * This allows to early upgrade microcode on Intel processors - * belonging to IA-32 family - PentiumPro, Pentium II, - * Pentium III, Xeon, Pentium 4, etc. - * - * Reference: Section 9.11 of Volume 3, IA-32 Intel Architecture - * Software Developer's Manual. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -/* - * This needs to be before all headers so that pr_debug in printk.h doesn't turn - * printk calls into no_printk(). - * - *#define DEBUG - */ - -#include <linux/module.h> -#include <linux/mm.h> -#include <linux/slab.h> -#include <linux/earlycpio.h> -#include <linux/initrd.h> -#include <linux/cpu.h> -#include <asm/msr.h> -#include <asm/microcode_intel.h> -#include <asm/processor.h> -#include <asm/tlbflush.h> -#include <asm/setup.h> - -#undef pr_fmt -#define pr_fmt(fmt) "microcode: " fmt - -static unsigned long mc_saved_in_initrd[MAX_UCODE_COUNT]; -static struct mc_saved_data { - unsigned int mc_saved_count; - struct microcode_intel **mc_saved; -} mc_saved_data; - -static enum ucode_state -load_microcode_early(struct microcode_intel **saved, - unsigned int num_saved, struct ucode_cpu_info *uci) -{ - struct microcode_intel *ucode_ptr, *new_mc = NULL; - struct microcode_header_intel *mc_hdr; - int new_rev, ret, i; - - new_rev = uci->cpu_sig.rev; - - for (i = 0; i < num_saved; i++) { - ucode_ptr = saved[i]; - mc_hdr = (struct microcode_header_intel *)ucode_ptr; - - ret = has_newer_microcode(ucode_ptr, - uci->cpu_sig.sig, - uci->cpu_sig.pf, - new_rev); - if (!ret) - continue; - - new_rev = mc_hdr->rev; - new_mc = ucode_ptr; - } - - if (!new_mc) - return UCODE_NFOUND; - - uci->mc = (struct microcode_intel *)new_mc; - return UCODE_OK; -} - -static inline void -copy_initrd_ptrs(struct microcode_intel **mc_saved, unsigned long *initrd, - unsigned long off, int num_saved) -{ - int i; - - for (i = 0; i < num_saved; i++) - mc_saved[i] = (struct microcode_intel *)(initrd[i] + off); -} - -#ifdef CONFIG_X86_32 -static void -microcode_phys(struct microcode_intel **mc_saved_tmp, - struct mc_saved_data *mc_saved_data) -{ - int i; - struct microcode_intel ***mc_saved; - - mc_saved = (struct microcode_intel ***) - __pa_nodebug(&mc_saved_data->mc_saved); - for (i = 0; i < mc_saved_data->mc_saved_count; i++) { - struct microcode_intel *p; - - p = *(struct microcode_intel **) - __pa_nodebug(mc_saved_data->mc_saved + i); - mc_saved_tmp[i] = (struct microcode_intel *)__pa_nodebug(p); - } -} -#endif - -static enum ucode_state -load_microcode(struct mc_saved_data *mc_saved_data, unsigned long *initrd, - unsigned long initrd_start, struct ucode_cpu_info *uci) -{ - struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT]; - unsigned int count = mc_saved_data->mc_saved_count; - - if (!mc_saved_data->mc_saved) { - copy_initrd_ptrs(mc_saved_tmp, initrd, initrd_start, count); - - return load_microcode_early(mc_saved_tmp, count, uci); - } else { -#ifdef CONFIG_X86_32 - microcode_phys(mc_saved_tmp, mc_saved_data); - return load_microcode_early(mc_saved_tmp, count, uci); -#else - return load_microcode_early(mc_saved_data->mc_saved, - count, uci); -#endif - } -} - -/* - * Given CPU signature and a microcode patch, this function finds if the - * microcode patch has matching family and model with the CPU. - */ -static enum ucode_state -matching_model_microcode(struct microcode_header_intel *mc_header, - unsigned long sig) -{ - unsigned int fam, model; - unsigned int fam_ucode, model_ucode; - struct extended_sigtable *ext_header; - unsigned long total_size = get_totalsize(mc_header); - unsigned long data_size = get_datasize(mc_header); - int ext_sigcount, i; - struct extended_signature *ext_sig; - - fam = __x86_family(sig); - model = x86_model(sig); - - fam_ucode = __x86_family(mc_header->sig); - model_ucode = x86_model(mc_header->sig); - - if (fam == fam_ucode && model == model_ucode) - return UCODE_OK; - - /* Look for ext. headers: */ - if (total_size <= data_size + MC_HEADER_SIZE) - return UCODE_NFOUND; - - ext_header = (void *) mc_header + data_size + MC_HEADER_SIZE; - ext_sig = (void *)ext_header + EXT_HEADER_SIZE; - ext_sigcount = ext_header->count; - - for (i = 0; i < ext_sigcount; i++) { - fam_ucode = __x86_family(ext_sig->sig); - model_ucode = x86_model(ext_sig->sig); - - if (fam == fam_ucode && model == model_ucode) - return UCODE_OK; - - ext_sig++; - } - return UCODE_NFOUND; -} - -static int -save_microcode(struct mc_saved_data *mc_saved_data, - struct microcode_intel **mc_saved_src, - unsigned int mc_saved_count) -{ - int i, j; - struct microcode_intel **saved_ptr; - int ret; - - if (!mc_saved_count) - return -EINVAL; - - /* - * Copy new microcode data. - */ - saved_ptr = kcalloc(mc_saved_count, sizeof(struct microcode_intel *), GFP_KERNEL); - if (!saved_ptr) - return -ENOMEM; - - for (i = 0; i < mc_saved_count; i++) { - struct microcode_header_intel *mc_hdr; - struct microcode_intel *mc; - unsigned long size; - - if (!mc_saved_src[i]) { - ret = -EINVAL; - goto err; - } - - mc = mc_saved_src[i]; - mc_hdr = &mc->hdr; - size = get_totalsize(mc_hdr); - - saved_ptr[i] = kmalloc(size, GFP_KERNEL); - if (!saved_ptr[i]) { - ret = -ENOMEM; - goto err; - } - - memcpy(saved_ptr[i], mc, size); - } - - /* - * Point to newly saved microcode. - */ - mc_saved_data->mc_saved = saved_ptr; - mc_saved_data->mc_saved_count = mc_saved_count; - - return 0; - -err: - for (j = 0; j <= i; j++) - kfree(saved_ptr[j]); - kfree(saved_ptr); - - return ret; -} - -/* - * A microcode patch in ucode_ptr is saved into mc_saved - * - if it has matching signature and newer revision compared to an existing - * patch mc_saved. - * - or if it is a newly discovered microcode patch. - * - * The microcode patch should have matching model with CPU. - * - * Returns: The updated number @num_saved of saved microcode patches. - */ -static unsigned int _save_mc(struct microcode_intel **mc_saved, - u8 *ucode_ptr, unsigned int num_saved) -{ - struct microcode_header_intel *mc_hdr, *mc_saved_hdr; - unsigned int sig, pf; - int found = 0, i; - - mc_hdr = (struct microcode_header_intel *)ucode_ptr; - - for (i = 0; i < num_saved; i++) { - mc_saved_hdr = (struct microcode_header_intel *)mc_saved[i]; - sig = mc_saved_hdr->sig; - pf = mc_saved_hdr->pf; - - if (!find_matching_signature(ucode_ptr, sig, pf)) - continue; - - found = 1; - - if (mc_hdr->rev <= mc_saved_hdr->rev) - continue; - - /* - * Found an older ucode saved earlier. Replace it with - * this newer one. - */ - mc_saved[i] = (struct microcode_intel *)ucode_ptr; - break; - } - - /* Newly detected microcode, save it to memory. */ - if (i >= num_saved && !found) - mc_saved[num_saved++] = (struct microcode_intel *)ucode_ptr; - - return num_saved; -} - -/* - * Get microcode matching with BSP's model. Only CPUs with the same model as - * BSP can stay in the platform. - */ -static enum ucode_state __init -get_matching_model_microcode(int cpu, unsigned long start, - void *data, size_t size, - struct mc_saved_data *mc_saved_data, - unsigned long *mc_saved_in_initrd, - struct ucode_cpu_info *uci) -{ - u8 *ucode_ptr = data; - unsigned int leftover = size; - enum ucode_state state = UCODE_OK; - unsigned int mc_size; - struct microcode_header_intel *mc_header; - struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT]; - unsigned int mc_saved_count = mc_saved_data->mc_saved_count; - int i; - - while (leftover && mc_saved_count < ARRAY_SIZE(mc_saved_tmp)) { - - if (leftover < sizeof(mc_header)) - break; - - mc_header = (struct microcode_header_intel *)ucode_ptr; - - mc_size = get_totalsize(mc_header); - if (!mc_size || mc_size > leftover || - microcode_sanity_check(ucode_ptr, 0) < 0) - break; - - leftover -= mc_size; - - /* - * Since APs with same family and model as the BSP may boot in - * the platform, we need to find and save microcode patches - * with the same family and model as the BSP. - */ - if (matching_model_microcode(mc_header, uci->cpu_sig.sig) != - UCODE_OK) { - ucode_ptr += mc_size; - continue; - } - - mc_saved_count = _save_mc(mc_saved_tmp, ucode_ptr, mc_saved_count); - - ucode_ptr += mc_size; - } - - if (leftover) { - state = UCODE_ERROR; - goto out; - } - - if (mc_saved_count == 0) { - state = UCODE_NFOUND; - goto out; - } - - for (i = 0; i < mc_saved_count; i++) - mc_saved_in_initrd[i] = (unsigned long)mc_saved_tmp[i] - start; - - mc_saved_data->mc_saved_count = mc_saved_count; -out: - return state; -} - -static int collect_cpu_info_early(struct ucode_cpu_info *uci) -{ - unsigned int val[2]; - unsigned int family, model; - struct cpu_signature csig; - unsigned int eax, ebx, ecx, edx; - - csig.sig = 0; - csig.pf = 0; - csig.rev = 0; - - memset(uci, 0, sizeof(*uci)); - - eax = 0x00000001; - ecx = 0; - native_cpuid(&eax, &ebx, &ecx, &edx); - csig.sig = eax; - - family = __x86_family(csig.sig); - model = x86_model(csig.sig); - - if ((model >= 5) || (family > 6)) { - /* get processor flags from MSR 0x17 */ - native_rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]); - csig.pf = 1 << ((val[1] >> 18) & 7); - } - native_wrmsr(MSR_IA32_UCODE_REV, 0, 0); - - /* As documented in the SDM: Do a CPUID 1 here */ - sync_core(); - - /* get the current revision from MSR 0x8B */ - native_rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); - - csig.rev = val[1]; - - uci->cpu_sig = csig; - uci->valid = 1; - - return 0; -} - -#ifdef DEBUG -static void show_saved_mc(void) -{ - int i, j; - unsigned int sig, pf, rev, total_size, data_size, date; - struct ucode_cpu_info uci; - - if (mc_saved_data.mc_saved_count == 0) { - pr_debug("no microcode data saved.\n"); - return; - } - pr_debug("Total microcode saved: %d\n", mc_saved_data.mc_saved_count); - - collect_cpu_info_early(&uci); - - sig = uci.cpu_sig.sig; - pf = uci.cpu_sig.pf; - rev = uci.cpu_sig.rev; - pr_debug("CPU: sig=0x%x, pf=0x%x, rev=0x%x\n", sig, pf, rev); - - for (i = 0; i < mc_saved_data.mc_saved_count; i++) { - struct microcode_header_intel *mc_saved_header; - struct extended_sigtable *ext_header; - int ext_sigcount; - struct extended_signature *ext_sig; - - mc_saved_header = (struct microcode_header_intel *) - mc_saved_data.mc_saved[i]; - sig = mc_saved_header->sig; - pf = mc_saved_header->pf; - rev = mc_saved_header->rev; - total_size = get_totalsize(mc_saved_header); - data_size = get_datasize(mc_saved_header); - date = mc_saved_header->date; - - pr_debug("mc_saved[%d]: sig=0x%x, pf=0x%x, rev=0x%x, toal size=0x%x, date = %04x-%02x-%02x\n", - i, sig, pf, rev, total_size, - date & 0xffff, - date >> 24, - (date >> 16) & 0xff); - - /* Look for ext. headers: */ - if (total_size <= data_size + MC_HEADER_SIZE) - continue; - - ext_header = (void *) mc_saved_header + data_size + MC_HEADER_SIZE; - ext_sigcount = ext_header->count; - ext_sig = (void *)ext_header + EXT_HEADER_SIZE; - - for (j = 0; j < ext_sigcount; j++) { - sig = ext_sig->sig; - pf = ext_sig->pf; - - pr_debug("\tExtended[%d]: sig=0x%x, pf=0x%x\n", - j, sig, pf); - - ext_sig++; - } - - } -} -#else -static inline void show_saved_mc(void) -{ -} -#endif - -#if defined(CONFIG_MICROCODE_INTEL_EARLY) && defined(CONFIG_HOTPLUG_CPU) -static DEFINE_MUTEX(x86_cpu_microcode_mutex); -/* - * Save this mc into mc_saved_data. So it will be loaded early when a CPU is - * hot added or resumes. - * - * Please make sure this mc should be a valid microcode patch before calling - * this function. - */ -int save_mc_for_early(u8 *mc) -{ - struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT]; - unsigned int mc_saved_count_init; - unsigned int mc_saved_count; - struct microcode_intel **mc_saved; - int ret = 0; - int i; - - /* - * Hold hotplug lock so mc_saved_data is not accessed by a CPU in - * hotplug. - */ - mutex_lock(&x86_cpu_microcode_mutex); - - mc_saved_count_init = mc_saved_data.mc_saved_count; - mc_saved_count = mc_saved_data.mc_saved_count; - mc_saved = mc_saved_data.mc_saved; - - if (mc_saved && mc_saved_count) - memcpy(mc_saved_tmp, mc_saved, - mc_saved_count * sizeof(struct microcode_intel *)); - /* - * Save the microcode patch mc in mc_save_tmp structure if it's a newer - * version. - */ - mc_saved_count = _save_mc(mc_saved_tmp, mc, mc_saved_count); - - /* - * Save the mc_save_tmp in global mc_saved_data. - */ - ret = save_microcode(&mc_saved_data, mc_saved_tmp, mc_saved_count); - if (ret) { - pr_err("Cannot save microcode patch.\n"); - goto out; - } - - show_saved_mc(); - - /* - * Free old saved microcode data. - */ - if (mc_saved) { - for (i = 0; i < mc_saved_count_init; i++) - kfree(mc_saved[i]); - kfree(mc_saved); - } - -out: - mutex_unlock(&x86_cpu_microcode_mutex); - - return ret; -} -EXPORT_SYMBOL_GPL(save_mc_for_early); -#endif - -static bool __init load_builtin_intel_microcode(struct cpio_data *cp) -{ -#ifdef CONFIG_X86_64 - unsigned int eax = 0x00000001, ebx, ecx = 0, edx; - unsigned int family, model, stepping; - char name[30]; - - native_cpuid(&eax, &ebx, &ecx, &edx); - - family = __x86_family(eax); - model = x86_model(eax); - stepping = eax & 0xf; - - sprintf(name, "intel-ucode/%02x-%02x-%02x", family, model, stepping); - - return get_builtin_firmware(cp, name); -#else - return false; -#endif -} - -static __initdata char ucode_name[] = "kernel/x86/microcode/GenuineIntel.bin"; -static __init enum ucode_state -scan_microcode(struct mc_saved_data *mc_saved_data, unsigned long *initrd, - unsigned long start, unsigned long size, - struct ucode_cpu_info *uci) -{ - struct cpio_data cd; - long offset = 0; -#ifdef CONFIG_X86_32 - char *p = (char *)__pa_nodebug(ucode_name); -#else - char *p = ucode_name; -#endif - - cd.data = NULL; - cd.size = 0; - - cd = find_cpio_data(p, (void *)start, size, &offset); - if (!cd.data) { - if (!load_builtin_intel_microcode(&cd)) - return UCODE_ERROR; - } - - return get_matching_model_microcode(0, start, cd.data, cd.size, - mc_saved_data, initrd, uci); -} - -/* - * Print ucode update info. - */ -static void -print_ucode_info(struct ucode_cpu_info *uci, unsigned int date) -{ - int cpu = smp_processor_id(); - - pr_info("CPU%d microcode updated early to revision 0x%x, date = %04x-%02x-%02x\n", - cpu, - uci->cpu_sig.rev, - date & 0xffff, - date >> 24, - (date >> 16) & 0xff); -} - -#ifdef CONFIG_X86_32 - -static int delay_ucode_info; -static int current_mc_date; - -/* - * Print early updated ucode info after printk works. This is delayed info dump. - */ -void show_ucode_info_early(void) -{ - struct ucode_cpu_info uci; - - if (delay_ucode_info) { - collect_cpu_info_early(&uci); - print_ucode_info(&uci, current_mc_date); - delay_ucode_info = 0; - } -} - -/* - * At this point, we can not call printk() yet. Keep microcode patch number in - * mc_saved_data.mc_saved and delay printing microcode info in - * show_ucode_info_early() until printk() works. - */ -static void print_ucode(struct ucode_cpu_info *uci) -{ - struct microcode_intel *mc_intel; - int *delay_ucode_info_p; - int *current_mc_date_p; - - mc_intel = uci->mc; - if (mc_intel == NULL) - return; - - delay_ucode_info_p = (int *)__pa_nodebug(&delay_ucode_info); - current_mc_date_p = (int *)__pa_nodebug(¤t_mc_date); - - *delay_ucode_info_p = 1; - *current_mc_date_p = mc_intel->hdr.date; -} -#else - -/* - * Flush global tlb. We only do this in x86_64 where paging has been enabled - * already and PGE should be enabled as well. - */ -static inline void flush_tlb_early(void) -{ - __native_flush_tlb_global_irq_disabled(); -} - -static inline void print_ucode(struct ucode_cpu_info *uci) -{ - struct microcode_intel *mc_intel; - - mc_intel = uci->mc; - if (mc_intel == NULL) - return; - - print_ucode_info(uci, mc_intel->hdr.date); -} -#endif - -static int apply_microcode_early(struct ucode_cpu_info *uci, bool early) -{ - struct microcode_intel *mc_intel; - unsigned int val[2]; - - mc_intel = uci->mc; - if (mc_intel == NULL) - return 0; - - /* write microcode via MSR 0x79 */ - native_wrmsr(MSR_IA32_UCODE_WRITE, - (unsigned long) mc_intel->bits, - (unsigned long) mc_intel->bits >> 16 >> 16); - native_wrmsr(MSR_IA32_UCODE_REV, 0, 0); - - /* As documented in the SDM: Do a CPUID 1 here */ - sync_core(); - - /* get the current revision from MSR 0x8B */ - native_rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); - if (val[1] != mc_intel->hdr.rev) - return -1; - -#ifdef CONFIG_X86_64 - /* Flush global tlb. This is precaution. */ - flush_tlb_early(); -#endif - uci->cpu_sig.rev = val[1]; - - if (early) - print_ucode(uci); - else - print_ucode_info(uci, mc_intel->hdr.date); - - return 0; -} - -/* - * This function converts microcode patch offsets previously stored in - * mc_saved_in_initrd to pointers and stores the pointers in mc_saved_data. - */ -int __init save_microcode_in_initrd_intel(void) -{ - unsigned int count = mc_saved_data.mc_saved_count; - struct microcode_intel *mc_saved[MAX_UCODE_COUNT]; - int ret = 0; - - if (count == 0) - return ret; - - copy_initrd_ptrs(mc_saved, mc_saved_in_initrd, initrd_start, count); - ret = save_microcode(&mc_saved_data, mc_saved, count); - if (ret) - pr_err("Cannot save microcode patches from initrd.\n"); - - show_saved_mc(); - - return ret; -} - -static void __init -_load_ucode_intel_bsp(struct mc_saved_data *mc_saved_data, - unsigned long *initrd, - unsigned long start, unsigned long size) -{ - struct ucode_cpu_info uci; - enum ucode_state ret; - - collect_cpu_info_early(&uci); - - ret = scan_microcode(mc_saved_data, initrd, start, size, &uci); - if (ret != UCODE_OK) - return; - - ret = load_microcode(mc_saved_data, initrd, start, &uci); - if (ret != UCODE_OK) - return; - - apply_microcode_early(&uci, true); -} - -void __init load_ucode_intel_bsp(void) -{ - u64 start, size; -#ifdef CONFIG_X86_32 - struct boot_params *p; - - p = (struct boot_params *)__pa_nodebug(&boot_params); - start = p->hdr.ramdisk_image; - size = p->hdr.ramdisk_size; - - _load_ucode_intel_bsp( - (struct mc_saved_data *)__pa_nodebug(&mc_saved_data), - (unsigned long *)__pa_nodebug(&mc_saved_in_initrd), - start, size); -#else - start = boot_params.hdr.ramdisk_image + PAGE_OFFSET; - size = boot_params.hdr.ramdisk_size; - - _load_ucode_intel_bsp(&mc_saved_data, mc_saved_in_initrd, start, size); -#endif -} - -void load_ucode_intel_ap(void) -{ - struct mc_saved_data *mc_saved_data_p; - struct ucode_cpu_info uci; - unsigned long *mc_saved_in_initrd_p; - unsigned long initrd_start_addr; - enum ucode_state ret; -#ifdef CONFIG_X86_32 - unsigned long *initrd_start_p; - - mc_saved_in_initrd_p = - (unsigned long *)__pa_nodebug(mc_saved_in_initrd); - mc_saved_data_p = (struct mc_saved_data *)__pa_nodebug(&mc_saved_data); - initrd_start_p = (unsigned long *)__pa_nodebug(&initrd_start); - initrd_start_addr = (unsigned long)__pa_nodebug(*initrd_start_p); -#else - mc_saved_data_p = &mc_saved_data; - mc_saved_in_initrd_p = mc_saved_in_initrd; - initrd_start_addr = initrd_start; -#endif - - /* - * If there is no valid ucode previously saved in memory, no need to - * update ucode on this AP. - */ - if (mc_saved_data_p->mc_saved_count == 0) - return; - - collect_cpu_info_early(&uci); - ret = load_microcode(mc_saved_data_p, mc_saved_in_initrd_p, - initrd_start_addr, &uci); - - if (ret != UCODE_OK) - return; - - apply_microcode_early(&uci, true); -} - -void reload_ucode_intel(void) -{ - struct ucode_cpu_info uci; - enum ucode_state ret; - - if (!mc_saved_data.mc_saved_count) - return; - - collect_cpu_info_early(&uci); - - ret = load_microcode_early(mc_saved_data.mc_saved, - mc_saved_data.mc_saved_count, &uci); - if (ret != UCODE_OK) - return; - - apply_microcode_early(&uci, false); -} diff --git a/arch/x86/kernel/cpu/microcode/intel_lib.c b/arch/x86/kernel/cpu/microcode/intel_lib.c index 1883d252ff7d..b96896bcbdaf 100644 --- a/arch/x86/kernel/cpu/microcode/intel_lib.c +++ b/arch/x86/kernel/cpu/microcode/intel_lib.c @@ -25,7 +25,6 @@ #include <linux/firmware.h> #include <linux/uaccess.h> #include <linux/kernel.h> -#include <linux/module.h> #include <asm/microcode_intel.h> #include <asm/processor.h> diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 381c8b9b3a33..20e242ea1bc4 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -34,11 +34,10 @@ struct ms_hyperv_info ms_hyperv; EXPORT_SYMBOL_GPL(ms_hyperv); -static void (*hv_kexec_handler)(void); -static void (*hv_crash_handler)(struct pt_regs *regs); - #if IS_ENABLED(CONFIG_HYPERV) static void (*vmbus_handler)(void); +static void (*hv_kexec_handler)(void); +static void (*hv_crash_handler)(struct pt_regs *regs); void hyperv_vector_handler(struct pt_regs *regs) { @@ -96,8 +95,8 @@ void hv_remove_crash_handler(void) hv_crash_handler = NULL; } EXPORT_SYMBOL_GPL(hv_remove_crash_handler); -#endif +#ifdef CONFIG_KEXEC_CORE static void hv_machine_shutdown(void) { if (kexec_in_progress && hv_kexec_handler) @@ -111,7 +110,8 @@ static void hv_machine_crash_shutdown(struct pt_regs *regs) hv_crash_handler(regs); native_machine_crash_shutdown(regs); } - +#endif /* CONFIG_KEXEC_CORE */ +#endif /* CONFIG_HYPERV */ static uint32_t __init ms_hyperv_platform(void) { @@ -186,8 +186,10 @@ static void __init ms_hyperv_init_platform(void) no_timer_check = 1; #endif +#if IS_ENABLED(CONFIG_HYPERV) && defined(CONFIG_KEXEC_CORE) machine_ops.shutdown = hv_machine_shutdown; machine_ops.crash_shutdown = hv_machine_crash_shutdown; +#endif mark_tsc_unstable("running on Hyper-V"); } diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 66dd3fe99b82..4562cf070c27 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1175,7 +1175,7 @@ static int x86_pmu_add(struct perf_event *event, int flags) * skip the schedulability test here, it will be performed * at commit time (->commit_txn) as a whole. */ - if (cpuc->group_flag & PERF_EVENT_TXN) + if (cpuc->txn_flags & PERF_PMU_TXN_ADD) goto done_collect; ret = x86_pmu.schedule_events(cpuc, n, assign); @@ -1326,7 +1326,7 @@ static void x86_pmu_del(struct perf_event *event, int flags) * XXX assumes any ->del() called during a TXN will only be on * an event added during that same TXN. */ - if (cpuc->group_flag & PERF_EVENT_TXN) + if (cpuc->txn_flags & PERF_PMU_TXN_ADD) return; /* @@ -1748,11 +1748,22 @@ static inline void x86_pmu_read(struct perf_event *event) * Start group events scheduling transaction * Set the flag to make pmu::enable() not perform the * schedulability test, it will be performed at commit time + * + * We only support PERF_PMU_TXN_ADD transactions. Save the + * transaction flags but otherwise ignore non-PERF_PMU_TXN_ADD + * transactions. */ -static void x86_pmu_start_txn(struct pmu *pmu) +static void x86_pmu_start_txn(struct pmu *pmu, unsigned int txn_flags) { + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + WARN_ON_ONCE(cpuc->txn_flags); /* txn already in flight */ + + cpuc->txn_flags = txn_flags; + if (txn_flags & ~PERF_PMU_TXN_ADD) + return; + perf_pmu_disable(pmu); - __this_cpu_or(cpu_hw_events.group_flag, PERF_EVENT_TXN); __this_cpu_write(cpu_hw_events.n_txn, 0); } @@ -1763,7 +1774,16 @@ static void x86_pmu_start_txn(struct pmu *pmu) */ static void x86_pmu_cancel_txn(struct pmu *pmu) { - __this_cpu_and(cpu_hw_events.group_flag, ~PERF_EVENT_TXN); + unsigned int txn_flags; + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + WARN_ON_ONCE(!cpuc->txn_flags); /* no txn in flight */ + + txn_flags = cpuc->txn_flags; + cpuc->txn_flags = 0; + if (txn_flags & ~PERF_PMU_TXN_ADD) + return; + /* * Truncate collected array by the number of events added in this * transaction. See x86_pmu_add() and x86_pmu_*_txn(). @@ -1786,6 +1806,13 @@ static int x86_pmu_commit_txn(struct pmu *pmu) int assign[X86_PMC_IDX_MAX]; int n, ret; + WARN_ON_ONCE(!cpuc->txn_flags); /* no txn in flight */ + + if (cpuc->txn_flags & ~PERF_PMU_TXN_ADD) { + cpuc->txn_flags = 0; + return 0; + } + n = cpuc->n_events; if (!x86_pmu_initialized()) @@ -1801,7 +1828,7 @@ static int x86_pmu_commit_txn(struct pmu *pmu) */ memcpy(cpuc->assign, assign, n*sizeof(int)); - cpuc->group_flag &= ~PERF_EVENT_TXN; + cpuc->txn_flags = 0; perf_pmu_enable(pmu); return 0; } diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 5edf6d868fc1..499f533dd3cc 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -47,6 +47,7 @@ enum extra_reg_type { EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */ EXTRA_REG_LBR = 2, /* lbr_select */ EXTRA_REG_LDLAT = 3, /* ld_lat_threshold */ + EXTRA_REG_FE = 4, /* fe_* */ EXTRA_REG_MAX /* number of entries needed */ }; @@ -195,7 +196,7 @@ struct cpu_hw_events { int n_excl; /* the number of exclusive events */ - unsigned int group_flag; + unsigned int txn_flags; int is_fake; /* diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 3fefebfbdf4b..f63360be2238 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -205,6 +205,11 @@ static struct extra_reg intel_skl_extra_regs[] __read_mostly = { INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0), INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1), INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), + /* + * Note the low 8 bits eventsel code is not a continuous field, containing + * some #GPing bits. These are masked out. + */ + INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff17, FE), EVENT_EXTRA_END }; @@ -250,7 +255,7 @@ struct event_constraint intel_bdw_event_constraints[] = { FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ INTEL_UEVENT_CONSTRAINT(0x148, 0x4), /* L1D_PEND_MISS.PENDING */ - INTEL_EVENT_CONSTRAINT(0xa3, 0x4), /* CYCLE_ACTIVITY.* */ + INTEL_UEVENT_CONSTRAINT(0x8a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_MISS */ EVENT_CONSTRAINT_END }; @@ -2891,6 +2896,8 @@ PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63"); PMU_FORMAT_ATTR(ldlat, "config1:0-15"); +PMU_FORMAT_ATTR(frontend, "config1:0-23"); + static struct attribute *intel_arch3_formats_attr[] = { &format_attr_event.attr, &format_attr_umask.attr, @@ -2907,6 +2914,11 @@ static struct attribute *intel_arch3_formats_attr[] = { NULL, }; +static struct attribute *skl_format_attr[] = { + &format_attr_frontend.attr, + NULL, +}; + static __initconst const struct x86_pmu core_pmu = { .name = "core", .handle_irq = x86_pmu_handle_irq, @@ -3516,7 +3528,8 @@ __init int intel_pmu_init(void) x86_pmu.hw_config = hsw_hw_config; x86_pmu.get_event_constraints = hsw_get_event_constraints; - x86_pmu.cpu_events = hsw_events_attrs; + x86_pmu.format_attrs = merge_attr(intel_arch3_formats_attr, + skl_format_attr); WARN_ON(!x86_pmu.format_attrs); x86_pmu.cpu_events = hsw_events_attrs; pr_cont("Skylake events, "); diff --git a/arch/x86/kernel/cpu/perf_event_intel_bts.c b/arch/x86/kernel/cpu/perf_event_intel_bts.c index d1c0f254afbe..2cad71d1b14c 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_bts.c +++ b/arch/x86/kernel/cpu/perf_event_intel_bts.c @@ -495,6 +495,19 @@ static int bts_event_init(struct perf_event *event) if (x86_add_exclusive(x86_lbr_exclusive_bts)) return -EBUSY; + /* + * BTS leaks kernel addresses even when CPL0 tracing is + * disabled, so disallow intel_bts driver for unprivileged + * users on paranoid systems since it provides trace data + * to the user in a zero-copy fashion. + * + * Note that the default paranoia setting permits unprivileged + * users to profile the kernel. + */ + if (event->attr.exclude_kernel && perf_paranoid_kernel() && + !capable(CAP_SYS_ADMIN)) + return -EACCES; + ret = x86_reserve_hardware(); if (ret) { x86_del_exclusive(x86_lbr_exclusive_bts); diff --git a/arch/x86/kernel/cpu/perf_event_intel_cstate.c b/arch/x86/kernel/cpu/perf_event_intel_cstate.c new file mode 100644 index 000000000000..75a38b5a2e26 --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_intel_cstate.c @@ -0,0 +1,694 @@ +/* + * perf_event_intel_cstate.c: support cstate residency counters + * + * Copyright (C) 2015, Intel Corp. + * Author: Kan Liang (kan.liang@intel.com) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + */ + +/* + * This file export cstate related free running (read-only) counters + * for perf. These counters may be use simultaneously by other tools, + * such as turbostat. However, it still make sense to implement them + * in perf. Because we can conveniently collect them together with + * other events, and allow to use them from tools without special MSR + * access code. + * + * The events only support system-wide mode counting. There is no + * sampling support because it is not supported by the hardware. + * + * According to counters' scope and category, two PMUs are registered + * with the perf_event core subsystem. + * - 'cstate_core': The counter is available for each physical core. + * The counters include CORE_C*_RESIDENCY. + * - 'cstate_pkg': The counter is available for each physical package. + * The counters include PKG_C*_RESIDENCY. + * + * All of these counters are specified in the Intel® 64 and IA-32 + * Architectures Software Developer.s Manual Vol3b. + * + * Model specific counters: + * MSR_CORE_C1_RES: CORE C1 Residency Counter + * perf code: 0x00 + * Available model: SLM,AMT + * Scope: Core (each processor core has a MSR) + * MSR_CORE_C3_RESIDENCY: CORE C3 Residency Counter + * perf code: 0x01 + * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL + * Scope: Core + * MSR_CORE_C6_RESIDENCY: CORE C6 Residency Counter + * perf code: 0x02 + * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,SKL + * Scope: Core + * MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter + * perf code: 0x03 + * Available model: SNB,IVB,HSW,BDW,SKL + * Scope: Core + * MSR_PKG_C2_RESIDENCY: Package C2 Residency Counter. + * perf code: 0x00 + * Available model: SNB,IVB,HSW,BDW,SKL + * Scope: Package (physical package) + * MSR_PKG_C3_RESIDENCY: Package C3 Residency Counter. + * perf code: 0x01 + * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL + * Scope: Package (physical package) + * MSR_PKG_C6_RESIDENCY: Package C6 Residency Counter. + * perf code: 0x02 + * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,SKL + * Scope: Package (physical package) + * MSR_PKG_C7_RESIDENCY: Package C7 Residency Counter. + * perf code: 0x03 + * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL + * Scope: Package (physical package) + * MSR_PKG_C8_RESIDENCY: Package C8 Residency Counter. + * perf code: 0x04 + * Available model: HSW ULT only + * Scope: Package (physical package) + * MSR_PKG_C9_RESIDENCY: Package C9 Residency Counter. + * perf code: 0x05 + * Available model: HSW ULT only + * Scope: Package (physical package) + * MSR_PKG_C10_RESIDENCY: Package C10 Residency Counter. + * perf code: 0x06 + * Available model: HSW ULT only + * Scope: Package (physical package) + * + */ + +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/perf_event.h> +#include <asm/cpu_device_id.h> +#include "perf_event.h" + +#define DEFINE_CSTATE_FORMAT_ATTR(_var, _name, _format) \ +static ssize_t __cstate_##_var##_show(struct kobject *kobj, \ + struct kobj_attribute *attr, \ + char *page) \ +{ \ + BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \ + return sprintf(page, _format "\n"); \ +} \ +static struct kobj_attribute format_attr_##_var = \ + __ATTR(_name, 0444, __cstate_##_var##_show, NULL) + +static ssize_t cstate_get_attr_cpumask(struct device *dev, + struct device_attribute *attr, + char *buf); + +struct perf_cstate_msr { + u64 msr; + struct perf_pmu_events_attr *attr; + bool (*test)(int idx); +}; + + +/* cstate_core PMU */ + +static struct pmu cstate_core_pmu; +static bool has_cstate_core; + +enum perf_cstate_core_id { + /* + * cstate_core events + */ + PERF_CSTATE_CORE_C1_RES = 0, + PERF_CSTATE_CORE_C3_RES, + PERF_CSTATE_CORE_C6_RES, + PERF_CSTATE_CORE_C7_RES, + + PERF_CSTATE_CORE_EVENT_MAX, +}; + +bool test_core(int idx) +{ + if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || + boot_cpu_data.x86 != 6) + return false; + + switch (boot_cpu_data.x86_model) { + case 30: /* 45nm Nehalem */ + case 26: /* 45nm Nehalem-EP */ + case 46: /* 45nm Nehalem-EX */ + + case 37: /* 32nm Westmere */ + case 44: /* 32nm Westmere-EP */ + case 47: /* 32nm Westmere-EX */ + if (idx == PERF_CSTATE_CORE_C3_RES || + idx == PERF_CSTATE_CORE_C6_RES) + return true; + break; + case 42: /* 32nm SandyBridge */ + case 45: /* 32nm SandyBridge-E/EN/EP */ + + case 58: /* 22nm IvyBridge */ + case 62: /* 22nm IvyBridge-EP/EX */ + + case 60: /* 22nm Haswell Core */ + case 63: /* 22nm Haswell Server */ + case 69: /* 22nm Haswell ULT */ + case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */ + + case 61: /* 14nm Broadwell Core-M */ + case 86: /* 14nm Broadwell Xeon D */ + case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */ + case 79: /* 14nm Broadwell Server */ + + case 78: /* 14nm Skylake Mobile */ + case 94: /* 14nm Skylake Desktop */ + if (idx == PERF_CSTATE_CORE_C3_RES || + idx == PERF_CSTATE_CORE_C6_RES || + idx == PERF_CSTATE_CORE_C7_RES) + return true; + break; + case 55: /* 22nm Atom "Silvermont" */ + case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */ + case 76: /* 14nm Atom "Airmont" */ + if (idx == PERF_CSTATE_CORE_C1_RES || + idx == PERF_CSTATE_CORE_C6_RES) + return true; + break; + } + + return false; +} + +PMU_EVENT_ATTR_STRING(c1-residency, evattr_cstate_core_c1, "event=0x00"); +PMU_EVENT_ATTR_STRING(c3-residency, evattr_cstate_core_c3, "event=0x01"); +PMU_EVENT_ATTR_STRING(c6-residency, evattr_cstate_core_c6, "event=0x02"); +PMU_EVENT_ATTR_STRING(c7-residency, evattr_cstate_core_c7, "event=0x03"); + +static struct perf_cstate_msr core_msr[] = { + [PERF_CSTATE_CORE_C1_RES] = { MSR_CORE_C1_RES, &evattr_cstate_core_c1, test_core, }, + [PERF_CSTATE_CORE_C3_RES] = { MSR_CORE_C3_RESIDENCY, &evattr_cstate_core_c3, test_core, }, + [PERF_CSTATE_CORE_C6_RES] = { MSR_CORE_C6_RESIDENCY, &evattr_cstate_core_c6, test_core, }, + [PERF_CSTATE_CORE_C7_RES] = { MSR_CORE_C7_RESIDENCY, &evattr_cstate_core_c7, test_core, }, +}; + +static struct attribute *core_events_attrs[PERF_CSTATE_CORE_EVENT_MAX + 1] = { + NULL, +}; + +static struct attribute_group core_events_attr_group = { + .name = "events", + .attrs = core_events_attrs, +}; + +DEFINE_CSTATE_FORMAT_ATTR(core_event, event, "config:0-63"); +static struct attribute *core_format_attrs[] = { + &format_attr_core_event.attr, + NULL, +}; + +static struct attribute_group core_format_attr_group = { + .name = "format", + .attrs = core_format_attrs, +}; + +static cpumask_t cstate_core_cpu_mask; +static DEVICE_ATTR(cpumask, S_IRUGO, cstate_get_attr_cpumask, NULL); + +static struct attribute *cstate_cpumask_attrs[] = { + &dev_attr_cpumask.attr, + NULL, +}; + +static struct attribute_group cpumask_attr_group = { + .attrs = cstate_cpumask_attrs, +}; + +static const struct attribute_group *core_attr_groups[] = { + &core_events_attr_group, + &core_format_attr_group, + &cpumask_attr_group, + NULL, +}; + +/* cstate_core PMU end */ + + +/* cstate_pkg PMU */ + +static struct pmu cstate_pkg_pmu; +static bool has_cstate_pkg; + +enum perf_cstate_pkg_id { + /* + * cstate_pkg events + */ + PERF_CSTATE_PKG_C2_RES = 0, + PERF_CSTATE_PKG_C3_RES, + PERF_CSTATE_PKG_C6_RES, + PERF_CSTATE_PKG_C7_RES, + PERF_CSTATE_PKG_C8_RES, + PERF_CSTATE_PKG_C9_RES, + PERF_CSTATE_PKG_C10_RES, + + PERF_CSTATE_PKG_EVENT_MAX, +}; + +bool test_pkg(int idx) +{ + if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || + boot_cpu_data.x86 != 6) + return false; + + switch (boot_cpu_data.x86_model) { + case 30: /* 45nm Nehalem */ + case 26: /* 45nm Nehalem-EP */ + case 46: /* 45nm Nehalem-EX */ + + case 37: /* 32nm Westmere */ + case 44: /* 32nm Westmere-EP */ + case 47: /* 32nm Westmere-EX */ + if (idx == PERF_CSTATE_CORE_C3_RES || + idx == PERF_CSTATE_CORE_C6_RES || + idx == PERF_CSTATE_CORE_C7_RES) + return true; + break; + case 42: /* 32nm SandyBridge */ + case 45: /* 32nm SandyBridge-E/EN/EP */ + + case 58: /* 22nm IvyBridge */ + case 62: /* 22nm IvyBridge-EP/EX */ + + case 60: /* 22nm Haswell Core */ + case 63: /* 22nm Haswell Server */ + case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */ + + case 61: /* 14nm Broadwell Core-M */ + case 86: /* 14nm Broadwell Xeon D */ + case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */ + case 79: /* 14nm Broadwell Server */ + + case 78: /* 14nm Skylake Mobile */ + case 94: /* 14nm Skylake Desktop */ + if (idx == PERF_CSTATE_PKG_C2_RES || + idx == PERF_CSTATE_PKG_C3_RES || + idx == PERF_CSTATE_PKG_C6_RES || + idx == PERF_CSTATE_PKG_C7_RES) + return true; + break; + case 55: /* 22nm Atom "Silvermont" */ + case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */ + case 76: /* 14nm Atom "Airmont" */ + if (idx == PERF_CSTATE_CORE_C6_RES) + return true; + break; + case 69: /* 22nm Haswell ULT */ + if (idx == PERF_CSTATE_PKG_C2_RES || + idx == PERF_CSTATE_PKG_C3_RES || + idx == PERF_CSTATE_PKG_C6_RES || + idx == PERF_CSTATE_PKG_C7_RES || + idx == PERF_CSTATE_PKG_C8_RES || + idx == PERF_CSTATE_PKG_C9_RES || + idx == PERF_CSTATE_PKG_C10_RES) + return true; + break; + } + + return false; +} + +PMU_EVENT_ATTR_STRING(c2-residency, evattr_cstate_pkg_c2, "event=0x00"); +PMU_EVENT_ATTR_STRING(c3-residency, evattr_cstate_pkg_c3, "event=0x01"); +PMU_EVENT_ATTR_STRING(c6-residency, evattr_cstate_pkg_c6, "event=0x02"); +PMU_EVENT_ATTR_STRING(c7-residency, evattr_cstate_pkg_c7, "event=0x03"); +PMU_EVENT_ATTR_STRING(c8-residency, evattr_cstate_pkg_c8, "event=0x04"); +PMU_EVENT_ATTR_STRING(c9-residency, evattr_cstate_pkg_c9, "event=0x05"); +PMU_EVENT_ATTR_STRING(c10-residency, evattr_cstate_pkg_c10, "event=0x06"); + +static struct perf_cstate_msr pkg_msr[] = { + [PERF_CSTATE_PKG_C2_RES] = { MSR_PKG_C2_RESIDENCY, &evattr_cstate_pkg_c2, test_pkg, }, + [PERF_CSTATE_PKG_C3_RES] = { MSR_PKG_C3_RESIDENCY, &evattr_cstate_pkg_c3, test_pkg, }, + [PERF_CSTATE_PKG_C6_RES] = { MSR_PKG_C6_RESIDENCY, &evattr_cstate_pkg_c6, test_pkg, }, + [PERF_CSTATE_PKG_C7_RES] = { MSR_PKG_C7_RESIDENCY, &evattr_cstate_pkg_c7, test_pkg, }, + [PERF_CSTATE_PKG_C8_RES] = { MSR_PKG_C8_RESIDENCY, &evattr_cstate_pkg_c8, test_pkg, }, + [PERF_CSTATE_PKG_C9_RES] = { MSR_PKG_C9_RESIDENCY, &evattr_cstate_pkg_c9, test_pkg, }, + [PERF_CSTATE_PKG_C10_RES] = { MSR_PKG_C10_RESIDENCY, &evattr_cstate_pkg_c10, test_pkg, }, +}; + +static struct attribute *pkg_events_attrs[PERF_CSTATE_PKG_EVENT_MAX + 1] = { + NULL, +}; + +static struct attribute_group pkg_events_attr_group = { + .name = "events", + .attrs = pkg_events_attrs, +}; + +DEFINE_CSTATE_FORMAT_ATTR(pkg_event, event, "config:0-63"); +static struct attribute *pkg_format_attrs[] = { + &format_attr_pkg_event.attr, + NULL, +}; +static struct attribute_group pkg_format_attr_group = { + .name = "format", + .attrs = pkg_format_attrs, +}; + +static cpumask_t cstate_pkg_cpu_mask; + +static const struct attribute_group *pkg_attr_groups[] = { + &pkg_events_attr_group, + &pkg_format_attr_group, + &cpumask_attr_group, + NULL, +}; + +/* cstate_pkg PMU end*/ + +static ssize_t cstate_get_attr_cpumask(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct pmu *pmu = dev_get_drvdata(dev); + + if (pmu == &cstate_core_pmu) + return cpumap_print_to_pagebuf(true, buf, &cstate_core_cpu_mask); + else if (pmu == &cstate_pkg_pmu) + return cpumap_print_to_pagebuf(true, buf, &cstate_pkg_cpu_mask); + else + return 0; +} + +static int cstate_pmu_event_init(struct perf_event *event) +{ + u64 cfg = event->attr.config; + int ret = 0; + + if (event->attr.type != event->pmu->type) + return -ENOENT; + + /* unsupported modes and filters */ + if (event->attr.exclude_user || + event->attr.exclude_kernel || + event->attr.exclude_hv || + event->attr.exclude_idle || + event->attr.exclude_host || + event->attr.exclude_guest || + event->attr.sample_period) /* no sampling */ + return -EINVAL; + + if (event->pmu == &cstate_core_pmu) { + if (cfg >= PERF_CSTATE_CORE_EVENT_MAX) + return -EINVAL; + if (!core_msr[cfg].attr) + return -EINVAL; + event->hw.event_base = core_msr[cfg].msr; + } else if (event->pmu == &cstate_pkg_pmu) { + if (cfg >= PERF_CSTATE_PKG_EVENT_MAX) + return -EINVAL; + if (!pkg_msr[cfg].attr) + return -EINVAL; + event->hw.event_base = pkg_msr[cfg].msr; + } else + return -ENOENT; + + /* must be done before validate_group */ + event->hw.config = cfg; + event->hw.idx = -1; + + return ret; +} + +static inline u64 cstate_pmu_read_counter(struct perf_event *event) +{ + u64 val; + + rdmsrl(event->hw.event_base, val); + return val; +} + +static void cstate_pmu_event_update(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + u64 prev_raw_count, new_raw_count; + +again: + prev_raw_count = local64_read(&hwc->prev_count); + new_raw_count = cstate_pmu_read_counter(event); + + if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, + new_raw_count) != prev_raw_count) + goto again; + + local64_add(new_raw_count - prev_raw_count, &event->count); +} + +static void cstate_pmu_event_start(struct perf_event *event, int mode) +{ + local64_set(&event->hw.prev_count, cstate_pmu_read_counter(event)); +} + +static void cstate_pmu_event_stop(struct perf_event *event, int mode) +{ + cstate_pmu_event_update(event); +} + +static void cstate_pmu_event_del(struct perf_event *event, int mode) +{ + cstate_pmu_event_stop(event, PERF_EF_UPDATE); +} + +static int cstate_pmu_event_add(struct perf_event *event, int mode) +{ + if (mode & PERF_EF_START) + cstate_pmu_event_start(event, mode); + + return 0; +} + +static void cstate_cpu_exit(int cpu) +{ + int i, id, target; + + /* cpu exit for cstate core */ + if (has_cstate_core) { + id = topology_core_id(cpu); + target = -1; + + for_each_online_cpu(i) { + if (i == cpu) + continue; + if (id == topology_core_id(i)) { + target = i; + break; + } + } + if (cpumask_test_and_clear_cpu(cpu, &cstate_core_cpu_mask) && target >= 0) + cpumask_set_cpu(target, &cstate_core_cpu_mask); + WARN_ON(cpumask_empty(&cstate_core_cpu_mask)); + if (target >= 0) + perf_pmu_migrate_context(&cstate_core_pmu, cpu, target); + } + + /* cpu exit for cstate pkg */ + if (has_cstate_pkg) { + id = topology_physical_package_id(cpu); + target = -1; + + for_each_online_cpu(i) { + if (i == cpu) + continue; + if (id == topology_physical_package_id(i)) { + target = i; + break; + } + } + if (cpumask_test_and_clear_cpu(cpu, &cstate_pkg_cpu_mask) && target >= 0) + cpumask_set_cpu(target, &cstate_pkg_cpu_mask); + WARN_ON(cpumask_empty(&cstate_pkg_cpu_mask)); + if (target >= 0) + perf_pmu_migrate_context(&cstate_pkg_pmu, cpu, target); + } +} + +static void cstate_cpu_init(int cpu) +{ + int i, id; + + /* cpu init for cstate core */ + if (has_cstate_core) { + id = topology_core_id(cpu); + for_each_cpu(i, &cstate_core_cpu_mask) { + if (id == topology_core_id(i)) + break; + } + if (i >= nr_cpu_ids) + cpumask_set_cpu(cpu, &cstate_core_cpu_mask); + } + + /* cpu init for cstate pkg */ + if (has_cstate_pkg) { + id = topology_physical_package_id(cpu); + for_each_cpu(i, &cstate_pkg_cpu_mask) { + if (id == topology_physical_package_id(i)) + break; + } + if (i >= nr_cpu_ids) + cpumask_set_cpu(cpu, &cstate_pkg_cpu_mask); + } +} + +static int cstate_cpu_notifier(struct notifier_block *self, + unsigned long action, void *hcpu) +{ + unsigned int cpu = (long)hcpu; + + switch (action & ~CPU_TASKS_FROZEN) { + case CPU_UP_PREPARE: + break; + case CPU_STARTING: + cstate_cpu_init(cpu); + break; + case CPU_UP_CANCELED: + case CPU_DYING: + break; + case CPU_ONLINE: + case CPU_DEAD: + break; + case CPU_DOWN_PREPARE: + cstate_cpu_exit(cpu); + break; + default: + break; + } + + return NOTIFY_OK; +} + +/* + * Probe the cstate events and insert the available one into sysfs attrs + * Return false if there is no available events. + */ +static bool cstate_probe_msr(struct perf_cstate_msr *msr, + struct attribute **events_attrs, + int max_event_nr) +{ + int i, j = 0; + u64 val; + + /* Probe the cstate events. */ + for (i = 0; i < max_event_nr; i++) { + if (!msr[i].test(i) || rdmsrl_safe(msr[i].msr, &val)) + msr[i].attr = NULL; + } + + /* List remaining events in the sysfs attrs. */ + for (i = 0; i < max_event_nr; i++) { + if (msr[i].attr) + events_attrs[j++] = &msr[i].attr->attr.attr; + } + events_attrs[j] = NULL; + + return (j > 0) ? true : false; +} + +static int __init cstate_init(void) +{ + /* SLM has different MSR for PKG C6 */ + switch (boot_cpu_data.x86_model) { + case 55: + case 76: + case 77: + pkg_msr[PERF_CSTATE_PKG_C6_RES].msr = MSR_PKG_C7_RESIDENCY; + } + + if (cstate_probe_msr(core_msr, core_events_attrs, PERF_CSTATE_CORE_EVENT_MAX)) + has_cstate_core = true; + + if (cstate_probe_msr(pkg_msr, pkg_events_attrs, PERF_CSTATE_PKG_EVENT_MAX)) + has_cstate_pkg = true; + + return (has_cstate_core || has_cstate_pkg) ? 0 : -ENODEV; +} + +static void __init cstate_cpumask_init(void) +{ + int cpu; + + cpu_notifier_register_begin(); + + for_each_online_cpu(cpu) + cstate_cpu_init(cpu); + + __perf_cpu_notifier(cstate_cpu_notifier); + + cpu_notifier_register_done(); +} + +static struct pmu cstate_core_pmu = { + .attr_groups = core_attr_groups, + .name = "cstate_core", + .task_ctx_nr = perf_invalid_context, + .event_init = cstate_pmu_event_init, + .add = cstate_pmu_event_add, /* must have */ + .del = cstate_pmu_event_del, /* must have */ + .start = cstate_pmu_event_start, + .stop = cstate_pmu_event_stop, + .read = cstate_pmu_event_update, + .capabilities = PERF_PMU_CAP_NO_INTERRUPT, +}; + +static struct pmu cstate_pkg_pmu = { + .attr_groups = pkg_attr_groups, + .name = "cstate_pkg", + .task_ctx_nr = perf_invalid_context, + .event_init = cstate_pmu_event_init, + .add = cstate_pmu_event_add, /* must have */ + .del = cstate_pmu_event_del, /* must have */ + .start = cstate_pmu_event_start, + .stop = cstate_pmu_event_stop, + .read = cstate_pmu_event_update, + .capabilities = PERF_PMU_CAP_NO_INTERRUPT, +}; + +static void __init cstate_pmus_register(void) +{ + int err; + + if (has_cstate_core) { + err = perf_pmu_register(&cstate_core_pmu, cstate_core_pmu.name, -1); + if (WARN_ON(err)) + pr_info("Failed to register PMU %s error %d\n", + cstate_core_pmu.name, err); + } + + if (has_cstate_pkg) { + err = perf_pmu_register(&cstate_pkg_pmu, cstate_pkg_pmu.name, -1); + if (WARN_ON(err)) + pr_info("Failed to register PMU %s error %d\n", + cstate_pkg_pmu.name, err); + } +} + +static int __init cstate_pmu_init(void) +{ + int err; + + if (cpu_has_hypervisor) + return -ENODEV; + + err = cstate_init(); + if (err) + return err; + + cstate_cpumask_init(); + + cstate_pmus_register(); + + return 0; +} + +device_initcall(cstate_pmu_init); diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 84f236ab96b0..5db1c7755548 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -510,10 +510,11 @@ int intel_pmu_drain_bts_buffer(void) u64 flags; }; struct perf_event *event = cpuc->events[INTEL_PMC_IDX_FIXED_BTS]; - struct bts_record *at, *top; + struct bts_record *at, *base, *top; struct perf_output_handle handle; struct perf_event_header header; struct perf_sample_data data; + unsigned long skip = 0; struct pt_regs regs; if (!event) @@ -522,10 +523,10 @@ int intel_pmu_drain_bts_buffer(void) if (!x86_pmu.bts_active) return 0; - at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; - top = (struct bts_record *)(unsigned long)ds->bts_index; + base = (struct bts_record *)(unsigned long)ds->bts_buffer_base; + top = (struct bts_record *)(unsigned long)ds->bts_index; - if (top <= at) + if (top <= base) return 0; memset(®s, 0, sizeof(regs)); @@ -535,16 +536,43 @@ int intel_pmu_drain_bts_buffer(void) perf_sample_data_init(&data, 0, event->hw.last_period); /* + * BTS leaks kernel addresses in branches across the cpl boundary, + * such as traps or system calls, so unless the user is asking for + * kernel tracing (and right now it's not possible), we'd need to + * filter them out. But first we need to count how many of those we + * have in the current batch. This is an extra O(n) pass, however, + * it's much faster than the other one especially considering that + * n <= 2560 (BTS_BUFFER_SIZE / BTS_RECORD_SIZE * 15/16; see the + * alloc_bts_buffer()). + */ + for (at = base; at < top; at++) { + /* + * Note that right now *this* BTS code only works if + * attr::exclude_kernel is set, but let's keep this extra + * check here in case that changes. + */ + if (event->attr.exclude_kernel && + (kernel_ip(at->from) || kernel_ip(at->to))) + skip++; + } + + /* * Prepare a generic sample, i.e. fill in the invariant fields. * We will overwrite the from and to address before we output * the sample. */ perf_prepare_sample(&header, &data, event, ®s); - if (perf_output_begin(&handle, event, header.size * (top - at))) + if (perf_output_begin(&handle, event, header.size * + (top - base - skip))) return 1; - for (; at < top; at++) { + for (at = base; at < top; at++) { + /* Filter out any records that contain kernel addresses. */ + if (event->attr.exclude_kernel && + (kernel_ip(at->from) || kernel_ip(at->to))) + continue; + data.ip = at->from; data.addr = at->to; diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c index b2c9475b7ff2..bfd0b717e944 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c @@ -151,10 +151,10 @@ static void __intel_pmu_lbr_enable(bool pmi) * No need to reprogram LBR_SELECT in a PMI, as it * did not change. */ - if (cpuc->lbr_sel && !pmi) { + if (cpuc->lbr_sel) lbr_select = cpuc->lbr_sel->config; + if (!pmi) wrmsrl(MSR_LBR_SELECT, lbr_select); - } rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); orig_debugctl = debugctl; @@ -555,6 +555,8 @@ static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event) if (br_type & PERF_SAMPLE_BRANCH_IND_JUMP) mask |= X86_BR_IND_JMP; + if (br_type & PERF_SAMPLE_BRANCH_CALL) + mask |= X86_BR_CALL | X86_BR_ZERO_CALL; /* * stash actual user request into reg, it may * be used by fixup code for some CPU @@ -890,6 +892,7 @@ static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = { [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL, [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC, [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP, + [PERF_SAMPLE_BRANCH_CALL_SHIFT] = LBR_REL_CALL, }; static const int hsw_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = { @@ -905,6 +908,7 @@ static const int hsw_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = { [PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] = LBR_REL_CALL | LBR_IND_CALL | LBR_RETURN | LBR_CALL_STACK, [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP, + [PERF_SAMPLE_BRANCH_CALL_SHIFT] = LBR_REL_CALL, }; /* core */ diff --git a/arch/x86/kernel/cpu/perf_event_intel_pt.c b/arch/x86/kernel/cpu/perf_event_intel_pt.c index 42169283448b..868e1194337f 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_pt.c +++ b/arch/x86/kernel/cpu/perf_event_intel_pt.c @@ -139,9 +139,6 @@ static int __init pt_pmu_hw_init(void) long i; attrs = NULL; - ret = -ENODEV; - if (!test_cpu_cap(&boot_cpu_data, X86_FEATURE_INTEL_PT)) - goto fail; for (i = 0; i < PT_CPUID_LEAVES; i++) { cpuid_count(20, i, @@ -1130,6 +1127,10 @@ static __init int pt_init(void) int ret, cpu, prior_warn = 0; BUILD_BUG_ON(sizeof(struct topa) > PAGE_SIZE); + + if (!test_cpu_cap(&boot_cpu_data, X86_FEATURE_INTEL_PT)) + return -ENODEV; + get_online_cpus(); for_each_online_cpu(cpu) { u64 ctl; diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index 560e5255b15e..61215a69b03d 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -7,7 +7,8 @@ struct intel_uncore_type **uncore_pci_uncores = empty_uncore; static bool pcidrv_registered; struct pci_driver *uncore_pci_driver; /* pci bus to socket mapping */ -int uncore_pcibus_to_physid[256] = { [0 ... 255] = -1, }; +DEFINE_RAW_SPINLOCK(pci2phy_map_lock); +struct list_head pci2phy_map_head = LIST_HEAD_INIT(pci2phy_map_head); struct pci_dev *uncore_extra_pci_dev[UNCORE_SOCKET_MAX][UNCORE_EXTRA_PCI_DEV_MAX]; static DEFINE_RAW_SPINLOCK(uncore_box_lock); @@ -20,6 +21,59 @@ static struct event_constraint uncore_constraint_fixed = struct event_constraint uncore_constraint_empty = EVENT_CONSTRAINT(0, 0, 0); +int uncore_pcibus_to_physid(struct pci_bus *bus) +{ + struct pci2phy_map *map; + int phys_id = -1; + + raw_spin_lock(&pci2phy_map_lock); + list_for_each_entry(map, &pci2phy_map_head, list) { + if (map->segment == pci_domain_nr(bus)) { + phys_id = map->pbus_to_physid[bus->number]; + break; + } + } + raw_spin_unlock(&pci2phy_map_lock); + + return phys_id; +} + +struct pci2phy_map *__find_pci2phy_map(int segment) +{ + struct pci2phy_map *map, *alloc = NULL; + int i; + + lockdep_assert_held(&pci2phy_map_lock); + +lookup: + list_for_each_entry(map, &pci2phy_map_head, list) { + if (map->segment == segment) + goto end; + } + + if (!alloc) { + raw_spin_unlock(&pci2phy_map_lock); + alloc = kmalloc(sizeof(struct pci2phy_map), GFP_KERNEL); + raw_spin_lock(&pci2phy_map_lock); + + if (!alloc) + return NULL; + + goto lookup; + } + + map = alloc; + alloc = NULL; + map->segment = segment; + for (i = 0; i < 256; i++) + map->pbus_to_physid[i] = -1; + list_add_tail(&map->list, &pci2phy_map_head); + +end: + kfree(alloc); + return map; +} + ssize_t uncore_event_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { @@ -809,7 +863,7 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id int phys_id; bool first_box = false; - phys_id = uncore_pcibus_to_physid[pdev->bus->number]; + phys_id = uncore_pcibus_to_physid(pdev->bus); if (phys_id < 0) return -ENODEV; @@ -856,9 +910,10 @@ static void uncore_pci_remove(struct pci_dev *pdev) { struct intel_uncore_box *box = pci_get_drvdata(pdev); struct intel_uncore_pmu *pmu; - int i, cpu, phys_id = uncore_pcibus_to_physid[pdev->bus->number]; + int i, cpu, phys_id; bool last_box = false; + phys_id = uncore_pcibus_to_physid(pdev->bus); box = pci_get_drvdata(pdev); if (!box) { for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) { diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h index 72c54c2e5b1a..2f0a4a98e16b 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h @@ -117,6 +117,15 @@ struct uncore_event_desc { const char *config; }; +struct pci2phy_map { + struct list_head list; + int segment; + int pbus_to_physid[256]; +}; + +int uncore_pcibus_to_physid(struct pci_bus *bus); +struct pci2phy_map *__find_pci2phy_map(int segment); + ssize_t uncore_event_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf); @@ -317,7 +326,8 @@ u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx); extern struct intel_uncore_type **uncore_msr_uncores; extern struct intel_uncore_type **uncore_pci_uncores; extern struct pci_driver *uncore_pci_driver; -extern int uncore_pcibus_to_physid[256]; +extern raw_spinlock_t pci2phy_map_lock; +extern struct list_head pci2phy_map_head; extern struct pci_dev *uncore_extra_pci_dev[UNCORE_SOCKET_MAX][UNCORE_EXTRA_PCI_DEV_MAX]; extern struct event_constraint uncore_constraint_empty; diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c index f78574b3cb55..845256158a10 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c @@ -420,15 +420,25 @@ static void snb_uncore_imc_event_del(struct perf_event *event, int flags) static int snb_pci2phy_map_init(int devid) { struct pci_dev *dev = NULL; - int bus; + struct pci2phy_map *map; + int bus, segment; dev = pci_get_device(PCI_VENDOR_ID_INTEL, devid, dev); if (!dev) return -ENOTTY; bus = dev->bus->number; - - uncore_pcibus_to_physid[bus] = 0; + segment = pci_domain_nr(dev->bus); + + raw_spin_lock(&pci2phy_map_lock); + map = __find_pci2phy_map(segment); + if (!map) { + raw_spin_unlock(&pci2phy_map_lock); + pci_dev_put(dev); + return -ENOMEM; + } + map->pbus_to_physid[bus] = 0; + raw_spin_unlock(&pci2phy_map_lock); pci_dev_put(dev); diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c index 694510a887dc..f0f4fcba252e 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c @@ -1087,7 +1087,8 @@ static struct pci_driver snbep_uncore_pci_driver = { static int snbep_pci2phy_map_init(int devid) { struct pci_dev *ubox_dev = NULL; - int i, bus, nodeid; + int i, bus, nodeid, segment; + struct pci2phy_map *map; int err = 0; u32 config = 0; @@ -1106,16 +1107,27 @@ static int snbep_pci2phy_map_init(int devid) err = pci_read_config_dword(ubox_dev, 0x54, &config); if (err) break; + + segment = pci_domain_nr(ubox_dev->bus); + raw_spin_lock(&pci2phy_map_lock); + map = __find_pci2phy_map(segment); + if (!map) { + raw_spin_unlock(&pci2phy_map_lock); + err = -ENOMEM; + break; + } + /* * every three bits in the Node ID mapping register maps * to a particular node. */ for (i = 0; i < 8; i++) { if (nodeid == ((config >> (3 * i)) & 0x7)) { - uncore_pcibus_to_physid[bus] = i; + map->pbus_to_physid[bus] = i; break; } } + raw_spin_unlock(&pci2phy_map_lock); } if (!err) { @@ -1123,13 +1135,17 @@ static int snbep_pci2phy_map_init(int devid) * For PCI bus with no UBOX device, find the next bus * that has UBOX device and use its mapping. */ - i = -1; - for (bus = 255; bus >= 0; bus--) { - if (uncore_pcibus_to_physid[bus] >= 0) - i = uncore_pcibus_to_physid[bus]; - else - uncore_pcibus_to_physid[bus] = i; + raw_spin_lock(&pci2phy_map_lock); + list_for_each_entry(map, &pci2phy_map_head, list) { + i = -1; + for (bus = 255; bus >= 0; bus--) { + if (map->pbus_to_physid[bus] >= 0) + i = map->pbus_to_physid[bus]; + else + map->pbus_to_physid[bus] = i; + } } + raw_spin_unlock(&pci2phy_map_lock); } pci_dev_put(ubox_dev); @@ -2444,7 +2460,7 @@ static struct intel_uncore_type *bdx_pci_uncores[] = { NULL, }; -static DEFINE_PCI_DEVICE_TABLE(bdx_uncore_pci_ids) = { +static const struct pci_device_id bdx_uncore_pci_ids[] = { { /* Home Agent 0 */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f30), .driver_data = UNCORE_PCI_DEV_DATA(BDX_PCI_UNCORE_HA, 0), diff --git a/arch/x86/kernel/cpu/perf_event_msr.c b/arch/x86/kernel/cpu/perf_event_msr.c index 086b12eae794..f32ac13934f2 100644 --- a/arch/x86/kernel/cpu/perf_event_msr.c +++ b/arch/x86/kernel/cpu/perf_event_msr.c @@ -10,12 +10,12 @@ enum perf_msr_id { PERF_MSR_EVENT_MAX, }; -bool test_aperfmperf(int idx) +static bool test_aperfmperf(int idx) { return boot_cpu_has(X86_FEATURE_APERFMPERF); } -bool test_intel(int idx) +static bool test_intel(int idx) { if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || boot_cpu_data.x86 != 6) diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 3d423a101fae..608fb26c7254 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -37,7 +37,7 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c) { X86_FEATURE_PLN, CR_EAX, 4, 0x00000006, 0 }, { X86_FEATURE_PTS, CR_EAX, 6, 0x00000006, 0 }, { X86_FEATURE_HWP, CR_EAX, 7, 0x00000006, 0 }, - { X86_FEATURE_HWP_NOITFY, CR_EAX, 8, 0x00000006, 0 }, + { X86_FEATURE_HWP_NOTIFY, CR_EAX, 8, 0x00000006, 0 }, { X86_FEATURE_HWP_ACT_WINDOW, CR_EAX, 9, 0x00000006, 0 }, { X86_FEATURE_HWP_EPP, CR_EAX,10, 0x00000006, 0 }, { X86_FEATURE_HWP_PKG_REQ, CR_EAX,11, 0x00000006, 0 }, diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index e068d6683dba..2c1910f6717e 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -75,8 +75,6 @@ struct crash_memmap_data { unsigned int type; }; -int in_crash_kexec; - /* * This is used to VMCLEAR all VMCSs loaded on the * processor. And when loading kvm_intel module, the @@ -132,7 +130,6 @@ static void kdump_nmi_callback(int cpu, struct pt_regs *regs) static void kdump_nmi_shootdown_cpus(void) { - in_crash_kexec = 1; nmi_shootdown_cpus(kdump_nmi_callback); disable_local_APIC(); @@ -185,10 +182,9 @@ void native_machine_crash_shutdown(struct pt_regs *regs) } #ifdef CONFIG_KEXEC_FILE -static int get_nr_ram_ranges_callback(unsigned long start_pfn, - unsigned long nr_pfn, void *arg) +static int get_nr_ram_ranges_callback(u64 start, u64 end, void *arg) { - int *nr_ranges = arg; + unsigned int *nr_ranges = arg; (*nr_ranges)++; return 0; @@ -214,7 +210,7 @@ static void fill_up_crash_elf_data(struct crash_elf_data *ced, ced->image = image; - walk_system_ram_range(0, -1, &nr_ranges, + walk_system_ram_res(0, -1, &nr_ranges, get_nr_ram_ranges_callback); ced->max_nr_ranges = nr_ranges; diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index a102564d08eb..569c1e4f96fe 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -911,7 +911,7 @@ void __init finish_e820_parsing(void) } } -static inline const char *e820_type_to_string(int e820_type) +static const char *e820_type_to_string(int e820_type) { switch (e820_type) { case E820_RESERVED_KERN: diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index 9f9cc682e561..db9a675e751b 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -584,7 +584,7 @@ static void __init intel_graphics_stolen(int num, int slot, int func) static void __init force_disable_hpet(int num, int slot, int func) { #ifdef CONFIG_HPET_TIMER - boot_hpet_disable = 1; + boot_hpet_disable = true; pr_info("x86/hpet: Will disable the HPET for this platform because it's not reliable\n"); #endif } diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index eec40f595ab9..21bf92490a7b 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c @@ -195,14 +195,14 @@ static __init void early_serial_init(char *s) #ifdef CONFIG_PCI static void mem32_serial_out(unsigned long addr, int offset, int value) { - u32 *vaddr = (u32 *)addr; + u32 __iomem *vaddr = (u32 __iomem *)addr; /* shift implied by pointer type */ writel(value, vaddr + offset); } static unsigned int mem32_serial_in(unsigned long addr, int offset) { - u32 *vaddr = (u32 *)addr; + u32 __iomem *vaddr = (u32 __iomem *)addr; /* shift implied by pointer type */ return readl(vaddr + offset); } @@ -316,7 +316,7 @@ static struct console early_serial_console = { .index = -1, }; -static inline void early_console_register(struct console *con, int keep_early) +static void early_console_register(struct console *con, int keep_early) { if (con->index != -1) { printk(KERN_CRIT "ERROR: earlyprintk= %s already used\n", diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index d14e9ac3235a..be39b5fde4b9 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -290,11 +290,11 @@ static void __init fpu__init_system_ctx_switch(void) if (cpu_has_xsaveopt && eagerfpu != DISABLE) eagerfpu = ENABLE; - if (xfeatures_mask & XSTATE_EAGER) { + if (xfeatures_mask & XFEATURE_MASK_EAGER) { if (eagerfpu == DISABLE) { pr_err("x86/fpu: eagerfpu switching disabled, disabling the following xstate features: 0x%llx.\n", - xfeatures_mask & XSTATE_EAGER); - xfeatures_mask &= ~XSTATE_EAGER; + xfeatures_mask & XFEATURE_MASK_EAGER); + xfeatures_mask &= ~XFEATURE_MASK_EAGER; } else { eagerfpu = ENABLE; } @@ -354,17 +354,7 @@ static int __init x86_noxsave_setup(char *s) if (strlen(s)) return 0; - setup_clear_cpu_cap(X86_FEATURE_XSAVE); - setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); - setup_clear_cpu_cap(X86_FEATURE_XSAVEC); - setup_clear_cpu_cap(X86_FEATURE_XSAVES); - setup_clear_cpu_cap(X86_FEATURE_AVX); - setup_clear_cpu_cap(X86_FEATURE_AVX2); - setup_clear_cpu_cap(X86_FEATURE_AVX512F); - setup_clear_cpu_cap(X86_FEATURE_AVX512PF); - setup_clear_cpu_cap(X86_FEATURE_AVX512ER); - setup_clear_cpu_cap(X86_FEATURE_AVX512CD); - setup_clear_cpu_cap(X86_FEATURE_MPX); + fpu__xstate_clear_all_cpu_caps(); return 1; } diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c index dc60810c1c74..0bc3490420c5 100644 --- a/arch/x86/kernel/fpu/regset.c +++ b/arch/x86/kernel/fpu/regset.c @@ -66,7 +66,7 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset, * presence of FP and SSE state. */ if (cpu_has_xsave) - fpu->state.xsave.header.xfeatures |= XSTATE_FPSSE; + fpu->state.xsave.header.xfeatures |= XFEATURE_MASK_FPSSE; return ret; } @@ -326,7 +326,7 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, * presence of FP. */ if (cpu_has_xsave) - fpu->state.xsave.header.xfeatures |= XSTATE_FP; + fpu->state.xsave.header.xfeatures |= XFEATURE_MASK_FP; return ret; } diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 50ec9af1bd51..ef29b742cea7 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -56,7 +56,7 @@ static inline int save_fsave_header(struct task_struct *tsk, void __user *buf) if (use_fxsr()) { struct xregs_state *xsave = &tsk->thread.fpu.state.xsave; struct user_i387_ia32_struct env; - struct _fpstate_ia32 __user *fp = buf; + struct _fpstate_32 __user *fp = buf; convert_from_fxsr(&env, tsk); @@ -107,7 +107,7 @@ static inline int save_xstate_epilog(void __user *buf, int ia32_frame) * header as well as change any contents in the memory layout. * xrestore as part of sigreturn will capture all the changes. */ - xfeatures |= XSTATE_FPSSE; + xfeatures |= XFEATURE_MASK_FPSSE; err |= __put_user(xfeatures, (__u32 *)&x->header.xfeatures); @@ -165,7 +165,7 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size) if (!static_cpu_has(X86_FEATURE_FPU)) return fpregs_soft_get(current, NULL, 0, sizeof(struct user_i387_ia32_struct), NULL, - (struct _fpstate_ia32 __user *) buf) ? -1 : 1; + (struct _fpstate_32 __user *) buf) ? -1 : 1; if (fpregs_active()) { /* Save the live register state to the user directly. */ @@ -207,7 +207,7 @@ sanitize_restored_xstate(struct task_struct *tsk, * layout and not enabled by the OS. */ if (fx_only) - header->xfeatures = XSTATE_FPSSE; + header->xfeatures = XFEATURE_MASK_FPSSE; else header->xfeatures &= (xfeatures_mask & xfeatures); } @@ -230,7 +230,7 @@ static inline int copy_user_to_fpregs_zeroing(void __user *buf, u64 xbv, int fx_ { if (use_xsave()) { if ((unsigned long)buf % 64 || fx_only) { - u64 init_bv = xfeatures_mask & ~XSTATE_FPSSE; + u64 init_bv = xfeatures_mask & ~XFEATURE_MASK_FPSSE; copy_kernel_to_xregs(&init_fpstate.xsave, init_bv); return copy_user_to_fxregs(buf); } else { diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 62fc001c7846..6454f2731b56 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -31,12 +31,28 @@ static const char *xfeature_names[] = */ u64 xfeatures_mask __read_mostly; -static unsigned int xstate_offsets[XFEATURES_NR_MAX] = { [ 0 ... XFEATURES_NR_MAX - 1] = -1}; -static unsigned int xstate_sizes[XFEATURES_NR_MAX] = { [ 0 ... XFEATURES_NR_MAX - 1] = -1}; +static unsigned int xstate_offsets[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] = -1}; +static unsigned int xstate_sizes[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] = -1}; static unsigned int xstate_comp_offsets[sizeof(xfeatures_mask)*8]; -/* The number of supported xfeatures in xfeatures_mask: */ -static unsigned int xfeatures_nr; +/* + * Clear all of the X86_FEATURE_* bits that are unavailable + * when the CPU has no XSAVE support. + */ +void fpu__xstate_clear_all_cpu_caps(void) +{ + setup_clear_cpu_cap(X86_FEATURE_XSAVE); + setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); + setup_clear_cpu_cap(X86_FEATURE_XSAVEC); + setup_clear_cpu_cap(X86_FEATURE_XSAVES); + setup_clear_cpu_cap(X86_FEATURE_AVX); + setup_clear_cpu_cap(X86_FEATURE_AVX2); + setup_clear_cpu_cap(X86_FEATURE_AVX512F); + setup_clear_cpu_cap(X86_FEATURE_AVX512PF); + setup_clear_cpu_cap(X86_FEATURE_AVX512ER); + setup_clear_cpu_cap(X86_FEATURE_AVX512CD); + setup_clear_cpu_cap(X86_FEATURE_MPX); +} /* * Return whether the system supports a given xfeature. @@ -53,7 +69,7 @@ int cpu_has_xfeatures(u64 xfeatures_needed, const char **feature_name) /* * So we use FLS here to be able to print the most advanced * feature that was requested but is missing. So if a driver - * asks about "XSTATE_SSE | XSTATE_YMM" we'll print the + * asks about "XFEATURE_MASK_SSE | XFEATURE_MASK_YMM" we'll print the * missing AVX feature - this is the most informative message * to users: */ @@ -112,7 +128,7 @@ void fpstate_sanitize_xstate(struct fpu *fpu) /* * FP is in init state */ - if (!(xfeatures & XSTATE_FP)) { + if (!(xfeatures & XFEATURE_MASK_FP)) { fx->cwd = 0x37f; fx->swd = 0; fx->twd = 0; @@ -125,7 +141,7 @@ void fpstate_sanitize_xstate(struct fpu *fpu) /* * SSE is in init state */ - if (!(xfeatures & XSTATE_SSE)) + if (!(xfeatures & XFEATURE_MASK_SSE)) memset(&fx->xmm_space[0], 0, 256); /* @@ -169,25 +185,43 @@ void fpu__init_cpu_xstate(void) } /* + * Note that in the future we will likely need a pair of + * functions here: one for user xstates and the other for + * system xstates. For now, they are the same. + */ +static int xfeature_enabled(enum xfeature xfeature) +{ + return !!(xfeatures_mask & (1UL << xfeature)); +} + +/* * Record the offsets and sizes of various xstates contained * in the XSAVE state memory layout. - * - * ( Note that certain features might be non-present, for them - * we'll have 0 offset and 0 size. ) */ static void __init setup_xstate_features(void) { - u32 eax, ebx, ecx, edx, leaf; - - xfeatures_nr = fls64(xfeatures_mask); - - for (leaf = 2; leaf < xfeatures_nr; leaf++) { - cpuid_count(XSTATE_CPUID, leaf, &eax, &ebx, &ecx, &edx); - - xstate_offsets[leaf] = ebx; - xstate_sizes[leaf] = eax; + u32 eax, ebx, ecx, edx, i; + /* start at the beginnning of the "extended state" */ + unsigned int last_good_offset = offsetof(struct xregs_state, + extended_state_area); + + for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) { + if (!xfeature_enabled(i)) + continue; + + cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx); + xstate_offsets[i] = ebx; + xstate_sizes[i] = eax; + /* + * In our xstate size checks, we assume that the + * highest-numbered xstate feature has the + * highest offset in the buffer. Ensure it does. + */ + WARN_ONCE(last_good_offset > xstate_offsets[i], + "x86/fpu: misordered xstate at %d\n", last_good_offset); + last_good_offset = xstate_offsets[i]; - printk(KERN_INFO "x86/fpu: xstate_offset[%d]: %04x, xstate_sizes[%d]: %04x\n", leaf, ebx, leaf, eax); + printk(KERN_INFO "x86/fpu: xstate_offset[%d]: %4d, xstate_sizes[%d]: %4d\n", i, ebx, i, eax); } } @@ -204,14 +238,14 @@ static void __init print_xstate_feature(u64 xstate_mask) */ static void __init print_xstate_features(void) { - print_xstate_feature(XSTATE_FP); - print_xstate_feature(XSTATE_SSE); - print_xstate_feature(XSTATE_YMM); - print_xstate_feature(XSTATE_BNDREGS); - print_xstate_feature(XSTATE_BNDCSR); - print_xstate_feature(XSTATE_OPMASK); - print_xstate_feature(XSTATE_ZMM_Hi256); - print_xstate_feature(XSTATE_Hi16_ZMM); + print_xstate_feature(XFEATURE_MASK_FP); + print_xstate_feature(XFEATURE_MASK_SSE); + print_xstate_feature(XFEATURE_MASK_YMM); + print_xstate_feature(XFEATURE_MASK_BNDREGS); + print_xstate_feature(XFEATURE_MASK_BNDCSR); + print_xstate_feature(XFEATURE_MASK_OPMASK); + print_xstate_feature(XFEATURE_MASK_ZMM_Hi256); + print_xstate_feature(XFEATURE_MASK_Hi16_ZMM); } /* @@ -233,8 +267,8 @@ static void __init setup_xstate_comp(void) xstate_comp_offsets[1] = offsetof(struct fxregs_state, xmm_space); if (!cpu_has_xsaves) { - for (i = 2; i < xfeatures_nr; i++) { - if (test_bit(i, (unsigned long *)&xfeatures_mask)) { + for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) { + if (xfeature_enabled(i)) { xstate_comp_offsets[i] = xstate_offsets[i]; xstate_comp_sizes[i] = xstate_sizes[i]; } @@ -242,15 +276,16 @@ static void __init setup_xstate_comp(void) return; } - xstate_comp_offsets[2] = FXSAVE_SIZE + XSAVE_HDR_SIZE; + xstate_comp_offsets[FIRST_EXTENDED_XFEATURE] = + FXSAVE_SIZE + XSAVE_HDR_SIZE; - for (i = 2; i < xfeatures_nr; i++) { - if (test_bit(i, (unsigned long *)&xfeatures_mask)) + for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) { + if (xfeature_enabled(i)) xstate_comp_sizes[i] = xstate_sizes[i]; else xstate_comp_sizes[i] = 0; - if (i > 2) + if (i > FIRST_EXTENDED_XFEATURE) xstate_comp_offsets[i] = xstate_comp_offsets[i-1] + xstate_comp_sizes[i-1]; @@ -290,27 +325,280 @@ static void __init setup_init_fpu_buf(void) copy_xregs_to_kernel_booting(&init_fpstate.xsave); } +static int xfeature_is_supervisor(int xfeature_nr) +{ + /* + * We currently do not support supervisor states, but if + * we did, we could find out like this. + * + * SDM says: If state component i is a user state component, + * ECX[0] return 0; if state component i is a supervisor + * state component, ECX[0] returns 1. + u32 eax, ebx, ecx, edx; + cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx; + return !!(ecx & 1); + */ + return 0; +} +/* +static int xfeature_is_user(int xfeature_nr) +{ + return !xfeature_is_supervisor(xfeature_nr); +} +*/ + +/* + * This check is important because it is easy to get XSTATE_* + * confused with XSTATE_BIT_*. + */ +#define CHECK_XFEATURE(nr) do { \ + WARN_ON(nr < FIRST_EXTENDED_XFEATURE); \ + WARN_ON(nr >= XFEATURE_MAX); \ +} while (0) + +/* + * We could cache this like xstate_size[], but we only use + * it here, so it would be a waste of space. + */ +static int xfeature_is_aligned(int xfeature_nr) +{ + u32 eax, ebx, ecx, edx; + + CHECK_XFEATURE(xfeature_nr); + cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx); + /* + * The value returned by ECX[1] indicates the alignment + * of state component i when the compacted format + * of the extended region of an XSAVE area is used + */ + return !!(ecx & 2); +} + +static int xfeature_uncompacted_offset(int xfeature_nr) +{ + u32 eax, ebx, ecx, edx; + + CHECK_XFEATURE(xfeature_nr); + cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx); + return ebx; +} + +static int xfeature_size(int xfeature_nr) +{ + u32 eax, ebx, ecx, edx; + + CHECK_XFEATURE(xfeature_nr); + cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx); + return eax; +} + +/* + * 'XSAVES' implies two different things: + * 1. saving of supervisor/system state + * 2. using the compacted format + * + * Use this function when dealing with the compacted format so + * that it is obvious which aspect of 'XSAVES' is being handled + * by the calling code. + */ +static int using_compacted_format(void) +{ + return cpu_has_xsaves; +} + +static void __xstate_dump_leaves(void) +{ + int i; + u32 eax, ebx, ecx, edx; + static int should_dump = 1; + + if (!should_dump) + return; + should_dump = 0; + /* + * Dump out a few leaves past the ones that we support + * just in case there are some goodies up there + */ + for (i = 0; i < XFEATURE_MAX + 10; i++) { + cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx); + pr_warn("CPUID[%02x, %02x]: eax=%08x ebx=%08x ecx=%08x edx=%08x\n", + XSTATE_CPUID, i, eax, ebx, ecx, edx); + } +} + +#define XSTATE_WARN_ON(x) do { \ + if (WARN_ONCE(x, "XSAVE consistency problem, dumping leaves")) { \ + __xstate_dump_leaves(); \ + } \ +} while (0) + +#define XCHECK_SZ(sz, nr, nr_macro, __struct) do { \ + if ((nr == nr_macro) && \ + WARN_ONCE(sz != sizeof(__struct), \ + "%s: struct is %zu bytes, cpu state %d bytes\n", \ + __stringify(nr_macro), sizeof(__struct), sz)) { \ + __xstate_dump_leaves(); \ + } \ +} while (0) + +/* + * We have a C struct for each 'xstate'. We need to ensure + * that our software representation matches what the CPU + * tells us about the state's size. + */ +static void check_xstate_against_struct(int nr) +{ + /* + * Ask the CPU for the size of the state. + */ + int sz = xfeature_size(nr); + /* + * Match each CPU state with the corresponding software + * structure. + */ + XCHECK_SZ(sz, nr, XFEATURE_YMM, struct ymmh_struct); + XCHECK_SZ(sz, nr, XFEATURE_BNDREGS, struct mpx_bndreg_state); + XCHECK_SZ(sz, nr, XFEATURE_BNDCSR, struct mpx_bndcsr_state); + XCHECK_SZ(sz, nr, XFEATURE_OPMASK, struct avx_512_opmask_state); + XCHECK_SZ(sz, nr, XFEATURE_ZMM_Hi256, struct avx_512_zmm_uppers_state); + XCHECK_SZ(sz, nr, XFEATURE_Hi16_ZMM, struct avx_512_hi16_state); + + /* + * Make *SURE* to add any feature numbers in below if + * there are "holes" in the xsave state component + * numbers. + */ + if ((nr < XFEATURE_YMM) || + (nr >= XFEATURE_MAX)) { + WARN_ONCE(1, "no structure for xstate: %d\n", nr); + XSTATE_WARN_ON(1); + } +} + +/* + * This essentially double-checks what the cpu told us about + * how large the XSAVE buffer needs to be. We are recalculating + * it to be safe. + */ +static void do_extra_xstate_size_checks(void) +{ + int paranoid_xstate_size = FXSAVE_SIZE + XSAVE_HDR_SIZE; + int i; + + for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) { + if (!xfeature_enabled(i)) + continue; + + check_xstate_against_struct(i); + /* + * Supervisor state components can be managed only by + * XSAVES, which is compacted-format only. + */ + if (!using_compacted_format()) + XSTATE_WARN_ON(xfeature_is_supervisor(i)); + + /* Align from the end of the previous feature */ + if (xfeature_is_aligned(i)) + paranoid_xstate_size = ALIGN(paranoid_xstate_size, 64); + /* + * The offset of a given state in the non-compacted + * format is given to us in a CPUID leaf. We check + * them for being ordered (increasing offsets) in + * setup_xstate_features(). + */ + if (!using_compacted_format()) + paranoid_xstate_size = xfeature_uncompacted_offset(i); + /* + * The compacted-format offset always depends on where + * the previous state ended. + */ + paranoid_xstate_size += xfeature_size(i); + } + XSTATE_WARN_ON(paranoid_xstate_size != xstate_size); +} + /* * Calculate total size of enabled xstates in XCR0/xfeatures_mask. + * + * Note the SDM's wording here. "sub-function 0" only enumerates + * the size of the *user* states. If we use it to size a buffer + * that we use 'XSAVES' on, we could potentially overflow the + * buffer because 'XSAVES' saves system states too. + * + * Note that we do not currently set any bits on IA32_XSS so + * 'XCR0 | IA32_XSS == XCR0' for now. */ -static void __init init_xstate_size(void) +static unsigned int __init calculate_xstate_size(void) { unsigned int eax, ebx, ecx, edx; - int i; + unsigned int calculated_xstate_size; if (!cpu_has_xsaves) { + /* + * - CPUID function 0DH, sub-function 0: + * EBX enumerates the size (in bytes) required by + * the XSAVE instruction for an XSAVE area + * containing all the *user* state components + * corresponding to bits currently set in XCR0. + */ cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); - xstate_size = ebx; - return; + calculated_xstate_size = ebx; + } else { + /* + * - CPUID function 0DH, sub-function 1: + * EBX enumerates the size (in bytes) required by + * the XSAVES instruction for an XSAVE area + * containing all the state components + * corresponding to bits currently set in + * XCR0 | IA32_XSS. + */ + cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx); + calculated_xstate_size = ebx; } + return calculated_xstate_size; +} - xstate_size = FXSAVE_SIZE + XSAVE_HDR_SIZE; - for (i = 2; i < 64; i++) { - if (test_bit(i, (unsigned long *)&xfeatures_mask)) { - cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx); - xstate_size += eax; - } - } +/* + * Will the runtime-enumerated 'xstate_size' fit in the init + * task's statically-allocated buffer? + */ +static bool is_supported_xstate_size(unsigned int test_xstate_size) +{ + if (test_xstate_size <= sizeof(union fpregs_state)) + return true; + + pr_warn("x86/fpu: xstate buffer too small (%zu < %d), disabling xsave\n", + sizeof(union fpregs_state), test_xstate_size); + return false; +} + +static int init_xstate_size(void) +{ + /* Recompute the context size for enabled features: */ + unsigned int possible_xstate_size = calculate_xstate_size(); + + /* Ensure we have the space to store all enabled: */ + if (!is_supported_xstate_size(possible_xstate_size)) + return -EINVAL; + + /* + * The size is OK, we are definitely going to use xsave, + * make it known to the world that we need more space. + */ + xstate_size = possible_xstate_size; + do_extra_xstate_size_checks(); + return 0; +} + +/* + * We enabled the XSAVE hardware, but something went wrong and + * we can not use it. Disable it. + */ +static void fpu__init_disable_system_xstate(void) +{ + xfeatures_mask = 0; + cr4_clear_bits(X86_CR4_OSXSAVE); + fpu__xstate_clear_all_cpu_caps(); } /* @@ -321,6 +609,7 @@ void __init fpu__init_system_xstate(void) { unsigned int eax, ebx, ecx, edx; static int on_boot_cpu = 1; + int err; WARN_ON_FPU(!on_boot_cpu); on_boot_cpu = 0; @@ -338,7 +627,7 @@ void __init fpu__init_system_xstate(void) cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); xfeatures_mask = eax + ((u64)edx << 32); - if ((xfeatures_mask & XSTATE_FPSSE) != XSTATE_FPSSE) { + if ((xfeatures_mask & XFEATURE_MASK_FPSSE) != XFEATURE_MASK_FPSSE) { pr_err("x86/fpu: FP/SSE not present amongst the CPU's xstate features: 0x%llx.\n", xfeatures_mask); BUG(); } @@ -348,16 +637,19 @@ void __init fpu__init_system_xstate(void) /* Enable xstate instructions to be able to continue with initialization: */ fpu__init_cpu_xstate(); - - /* Recompute the context size for enabled features: */ - init_xstate_size(); + err = init_xstate_size(); + if (err) { + /* something went wrong, boot without any XSAVE support */ + fpu__init_disable_system_xstate(); + return; + } update_regset_xstate_info(xstate_size, xfeatures_mask); fpu__init_prepare_fx_sw_frame(); setup_init_fpu_buf(); setup_xstate_comp(); - pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is 0x%x bytes, using '%s' format.\n", + pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n", xfeatures_mask, xstate_size, cpu_has_xsaves ? "compacted" : "standard"); @@ -388,7 +680,7 @@ void fpu__resume_cpu(void) * Inputs: * xstate: the thread's storage area for all FPU data * xstate_feature: state which is defined in xsave.h (e.g. - * XSTATE_FP, XSTATE_SSE, etc...) + * XFEATURE_MASK_FP, XFEATURE_MASK_SSE, etc...) * Output: * address of the state in the xsave area, or NULL if the * field is not present in the xsave buffer. @@ -439,8 +731,8 @@ EXPORT_SYMBOL_GPL(get_xsave_addr); * Note that this only works on the current task. * * Inputs: - * @xsave_state: state which is defined in xsave.h (e.g. XSTATE_FP, - * XSTATE_SSE, etc...) + * @xsave_state: state which is defined in xsave.h (e.g. XFEATURE_MASK_FP, + * XFEATURE_MASK_SSE, etc...) * Output: * address of the state in the xsave area or NULL if the state * is not present or is in its 'init state'. diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 0e2d96ffd158..6bc9ae24b6d2 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -152,7 +152,7 @@ ENTRY(startup_32) movl %eax, pa(olpc_ofw_pgd) #endif -#ifdef CONFIG_MICROCODE_EARLY +#ifdef CONFIG_MICROCODE /* Early load ucode on BSP. */ call load_ucode_bsp #endif @@ -311,12 +311,11 @@ ENTRY(startup_32_smp) movl %eax,%ss leal -__PAGE_OFFSET(%ecx),%esp -#ifdef CONFIG_MICROCODE_EARLY +#ifdef CONFIG_MICROCODE /* Early load ucode on AP. */ call load_ucode_ap #endif - default_entry: #define CR0_STATE (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \ X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \ diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 88b4da373081..b8e6ff5cd5d0 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -37,10 +37,10 @@ */ unsigned long hpet_address; u8 hpet_blockid; /* OS timer block num */ -u8 hpet_msi_disable; +bool hpet_msi_disable; #ifdef CONFIG_PCI_MSI -static unsigned long hpet_num_timers; +static unsigned int hpet_num_timers; #endif static void __iomem *hpet_virt_address; @@ -86,9 +86,9 @@ static inline void hpet_clear_mapping(void) /* * HPET command line enable / disable */ -int boot_hpet_disable; -int hpet_force_user; -static int hpet_verbose; +bool boot_hpet_disable; +bool hpet_force_user; +static bool hpet_verbose; static int __init hpet_setup(char *str) { @@ -98,11 +98,11 @@ static int __init hpet_setup(char *str) if (next) *next++ = 0; if (!strncmp("disable", str, 7)) - boot_hpet_disable = 1; + boot_hpet_disable = true; if (!strncmp("force", str, 5)) - hpet_force_user = 1; + hpet_force_user = true; if (!strncmp("verbose", str, 7)) - hpet_verbose = 1; + hpet_verbose = true; str = next; } return 1; @@ -111,7 +111,7 @@ __setup("hpet=", hpet_setup); static int __init disable_hpet(char *str) { - boot_hpet_disable = 1; + boot_hpet_disable = true; return 1; } __setup("nohpet", disable_hpet); @@ -124,7 +124,7 @@ static inline int is_hpet_capable(void) /* * HPET timer interrupt enable / disable */ -static int hpet_legacy_int_enabled; +static bool hpet_legacy_int_enabled; /** * is_hpet_enabled - check whether the hpet timer interrupt is enabled @@ -230,7 +230,7 @@ static struct clock_event_device hpet_clockevent; static void hpet_stop_counter(void) { - unsigned long cfg = hpet_readl(HPET_CFG); + u32 cfg = hpet_readl(HPET_CFG); cfg &= ~HPET_CFG_ENABLE; hpet_writel(cfg, HPET_CFG); } @@ -272,7 +272,7 @@ static void hpet_enable_legacy_int(void) cfg |= HPET_CFG_LEGACY; hpet_writel(cfg, HPET_CFG); - hpet_legacy_int_enabled = 1; + hpet_legacy_int_enabled = true; } static void hpet_legacy_clockevent_register(void) @@ -983,7 +983,7 @@ void hpet_disable(void) cfg = *hpet_boot_cfg; else if (hpet_legacy_int_enabled) { cfg &= ~HPET_CFG_LEGACY; - hpet_legacy_int_enabled = 0; + hpet_legacy_int_enabled = false; } cfg &= ~HPET_CFG_ENABLE; hpet_writel(cfg, HPET_CFG); @@ -1121,8 +1121,7 @@ EXPORT_SYMBOL_GPL(hpet_rtc_timer_init); static void hpet_disable_rtc_channel(void) { - unsigned long cfg; - cfg = hpet_readl(HPET_T1_CFG); + u32 cfg = hpet_readl(HPET_T1_CFG); cfg &= ~HPET_TN_ENABLE; hpet_writel(cfg, HPET_T1_CFG); } diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index c767cf2bc80a..206d0b90a3ab 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -72,7 +72,7 @@ bool handle_irq(struct irq_desc *desc, struct pt_regs *regs) { stack_overflow_check(regs); - if (unlikely(IS_ERR_OR_NULL(desc))) + if (IS_ERR_OR_NULL(desc)) return false; generic_handle_irq_desc(desc); diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index d6178d9791db..44256a62702b 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -511,26 +511,31 @@ single_step_cont(struct pt_regs *regs, struct die_args *args) return NOTIFY_STOP; } -static int was_in_debug_nmi[NR_CPUS]; +static DECLARE_BITMAP(was_in_debug_nmi, NR_CPUS); static int kgdb_nmi_handler(unsigned int cmd, struct pt_regs *regs) { + int cpu; + switch (cmd) { case NMI_LOCAL: if (atomic_read(&kgdb_active) != -1) { /* KGDB CPU roundup */ - kgdb_nmicallback(raw_smp_processor_id(), regs); - was_in_debug_nmi[raw_smp_processor_id()] = 1; + cpu = raw_smp_processor_id(); + kgdb_nmicallback(cpu, regs); + set_bit(cpu, was_in_debug_nmi); touch_nmi_watchdog(); + return NMI_HANDLED; } break; case NMI_UNKNOWN: - if (was_in_debug_nmi[raw_smp_processor_id()]) { - was_in_debug_nmi[raw_smp_processor_id()] = 0; + cpu = raw_smp_processor_id(); + + if (__test_and_clear_bit(cpu, was_in_debug_nmi)) return NMI_HANDLED; - } + break; default: /* do nothing */ diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 2c7aafa70702..2bd81e302427 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -32,6 +32,7 @@ static int kvmclock = 1; static int msr_kvm_system_time = MSR_KVM_SYSTEM_TIME; static int msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK; +static cycle_t kvm_sched_clock_offset; static int parse_no_kvmclock(char *arg) { @@ -92,6 +93,29 @@ static cycle_t kvm_clock_get_cycles(struct clocksource *cs) return kvm_clock_read(); } +static cycle_t kvm_sched_clock_read(void) +{ + return kvm_clock_read() - kvm_sched_clock_offset; +} + +static inline void kvm_sched_clock_init(bool stable) +{ + if (!stable) { + pv_time_ops.sched_clock = kvm_clock_read; + return; + } + + kvm_sched_clock_offset = kvm_clock_read(); + pv_time_ops.sched_clock = kvm_sched_clock_read; + set_sched_clock_stable(); + + printk(KERN_INFO "kvm-clock: using sched offset of %llu cycles\n", + kvm_sched_clock_offset); + + BUILD_BUG_ON(sizeof(kvm_sched_clock_offset) > + sizeof(((struct pvclock_vcpu_time_info *)NULL)->system_time)); +} + /* * If we don't do that, there is the possibility that the guest * will calibrate under heavy load - thus, getting a lower lpj - @@ -248,7 +272,17 @@ void __init kvmclock_init(void) memblock_free(mem, size); return; } - pv_time_ops.sched_clock = kvm_clock_read; + + if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT)) + pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT); + + cpu = get_cpu(); + vcpu_time = &hv_clock[cpu].pvti; + flags = pvclock_read_flags(vcpu_time); + + kvm_sched_clock_init(flags & PVCLOCK_TSC_STABLE_BIT); + put_cpu(); + x86_platform.calibrate_tsc = kvm_get_tsc_khz; x86_platform.get_wallclock = kvm_get_wallclock; x86_platform.set_wallclock = kvm_set_wallclock; @@ -265,16 +299,6 @@ void __init kvmclock_init(void) kvm_get_preset_lpj(); clocksource_register_hz(&kvm_clock, NSEC_PER_SEC); pv_info.name = "KVM"; - - if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT)) - pvclock_set_flags(~0); - - cpu = get_cpu(); - vcpu_time = &hv_clock[cpu].pvti; - flags = pvclock_read_flags(vcpu_time); - if (flags & PVCLOCK_COUNTS_FROM_ZERO) - set_sched_clock_stable(); - put_cpu(); } int __init kvm_setup_vsyscall_timeinfo(void) diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 1b55de1267cf..cd99433b8ba1 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -131,11 +131,12 @@ void dma_generic_free_coherent(struct device *dev, size_t size, void *vaddr, bool arch_dma_alloc_attrs(struct device **dev, gfp_t *gfp) { + if (!*dev) + *dev = &x86_dma_fallback_dev; + *gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); *gfp = dma_alloc_coherent_gfp_flags(*dev, *gfp); - if (!*dev) - *dev = &x86_dma_fallback_dev; if (!is_device_dma_capable(*dev)) return false; return true; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 6d0e62ae8516..9f7c21c22477 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -84,6 +84,9 @@ EXPORT_SYMBOL_GPL(idle_notifier_unregister); int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) { memcpy(dst, src, arch_task_struct_size); +#ifdef CONFIG_VM86 + dst->thread.vm86 = NULL; +#endif return fpu__copy(&dst->thread.fpu, &src->thread.fpu); } @@ -506,3 +509,58 @@ unsigned long arch_randomize_brk(struct mm_struct *mm) return randomize_range(mm->brk, range_end, 0) ? : mm->brk; } +/* + * Called from fs/proc with a reference on @p to find the function + * which called into schedule(). This needs to be done carefully + * because the task might wake up and we might look at a stack + * changing under us. + */ +unsigned long get_wchan(struct task_struct *p) +{ + unsigned long start, bottom, top, sp, fp, ip; + int count = 0; + + if (!p || p == current || p->state == TASK_RUNNING) + return 0; + + start = (unsigned long)task_stack_page(p); + if (!start) + return 0; + + /* + * Layout of the stack page: + * + * ----------- topmax = start + THREAD_SIZE - sizeof(unsigned long) + * PADDING + * ----------- top = topmax - TOP_OF_KERNEL_STACK_PADDING + * stack + * ----------- bottom = start + sizeof(thread_info) + * thread_info + * ----------- start + * + * The tasks stack pointer points at the location where the + * framepointer is stored. The data on the stack is: + * ... IP FP ... IP FP + * + * We need to read FP and IP, so we need to adjust the upper + * bound by another unsigned long. + */ + top = start + THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING; + top -= 2 * sizeof(unsigned long); + bottom = start + sizeof(struct thread_info); + + sp = READ_ONCE(p->thread.sp); + if (sp < bottom || sp > top) + return 0; + + fp = READ_ONCE_NOCHECK(*(unsigned long *)sp); + do { + if (fp < bottom || fp > top) + return 0; + ip = READ_ONCE_NOCHECK(*(unsigned long *)(fp + sizeof(unsigned long))); + if (!in_sched_functions(ip)) + return ip; + fp = READ_ONCE_NOCHECK(*(unsigned long *)fp); + } while (count++ < 16 && p->state != TASK_RUNNING); + return 0; +} diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index c13df2c735f8..9f950917528b 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -280,14 +280,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) set_iopl_mask(next->iopl); /* - * If it were not for PREEMPT_ACTIVE we could guarantee that the - * preempt_count of all tasks was equal here and this would not be - * needed. - */ - task_thread_info(prev_p)->saved_preempt_count = this_cpu_read(__preempt_count); - this_cpu_write(__preempt_count, task_thread_info(next_p)->saved_preempt_count); - - /* * Now maybe handle debug registers and/or IO bitmaps */ if (unlikely(task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV || @@ -324,31 +316,3 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) return prev_p; } - -#define top_esp (THREAD_SIZE - sizeof(unsigned long)) -#define top_ebp (THREAD_SIZE - 2*sizeof(unsigned long)) - -unsigned long get_wchan(struct task_struct *p) -{ - unsigned long bp, sp, ip; - unsigned long stack_page; - int count = 0; - if (!p || p == current || p->state == TASK_RUNNING) - return 0; - stack_page = (unsigned long)task_stack_page(p); - sp = p->thread.sp; - if (!stack_page || sp < stack_page || sp > top_esp+stack_page) - return 0; - /* include/asm-i386/system.h:switch_to() pushes bp last. */ - bp = *(unsigned long *) sp; - do { - if (bp < stack_page || bp > top_ebp+stack_page) - return 0; - ip = *(unsigned long *) (bp+4); - if (!in_sched_functions(ip)) - return ip; - bp = *(unsigned long *) bp; - } while (count++ < 16); - return 0; -} - diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 3c1bbcf12924..e835d263a33b 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -332,7 +332,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) /* * Switch FS and GS. * - * These are even more complicated than FS and GS: they have + * These are even more complicated than DS and ES: they have * 64-bit bases are that controlled by arch_prctl. Those bases * only differ from the values in the GDT or LDT if the selector * is 0. @@ -401,14 +401,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) */ this_cpu_write(current_task, next_p); - /* - * If it were not for PREEMPT_ACTIVE we could guarantee that the - * preempt_count of all tasks was equal here and this would not be - * needed. - */ - task_thread_info(prev_p)->saved_preempt_count = this_cpu_read(__preempt_count); - this_cpu_write(__preempt_count, task_thread_info(next_p)->saved_preempt_count); - /* Reload esp0 and ss1. This changes current_thread_info(). */ load_sp0(tss, next); @@ -499,30 +491,6 @@ void set_personality_ia32(bool x32) } EXPORT_SYMBOL_GPL(set_personality_ia32); -unsigned long get_wchan(struct task_struct *p) -{ - unsigned long stack; - u64 fp, ip; - int count = 0; - - if (!p || p == current || p->state == TASK_RUNNING) - return 0; - stack = (unsigned long)task_stack_page(p); - if (p->thread.sp < stack || p->thread.sp >= stack+THREAD_SIZE) - return 0; - fp = *(u64 *)(p->thread.sp); - do { - if (fp < (unsigned long)stack || - fp >= (unsigned long)stack+THREAD_SIZE) - return 0; - ip = *(u64 *)(fp+8); - if (!in_sched_functions(ip)) - return ip; - fp = *(u64 *)fp; - } while (count++ < 16); - return 0; -} - long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) { int ret = 0; diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index 176a0f99d4da..cc457ff818ad 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c @@ -524,7 +524,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E6XX_CU, */ static void force_disable_hpet_msi(struct pci_dev *unused) { - hpet_msi_disable = 1; + hpet_msi_disable = true; } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS, diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index fdb7f2a2d328..a1e4da98c8f0 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -111,6 +111,7 @@ #include <asm/mce.h> #include <asm/alternative.h> #include <asm/prom.h> +#include <asm/microcode.h> /* * max_low_pfn_mapped: highest direct mapped pfn under 4GB @@ -480,34 +481,34 @@ static void __init memblock_x86_reserve_range_setup_data(void) #ifdef CONFIG_KEXEC_CORE +/* 16M alignment for crash kernel regions */ +#define CRASH_ALIGN (16 << 20) + /* * Keep the crash kernel below this limit. On 32 bits earlier kernels * would limit the kernel to the low 512 MiB due to mapping restrictions. * On 64bit, old kexec-tools need to under 896MiB. */ #ifdef CONFIG_X86_32 -# define CRASH_KERNEL_ADDR_LOW_MAX (512 << 20) -# define CRASH_KERNEL_ADDR_HIGH_MAX (512 << 20) +# define CRASH_ADDR_LOW_MAX (512 << 20) +# define CRASH_ADDR_HIGH_MAX (512 << 20) #else -# define CRASH_KERNEL_ADDR_LOW_MAX (896UL<<20) -# define CRASH_KERNEL_ADDR_HIGH_MAX MAXMEM +# define CRASH_ADDR_LOW_MAX (896UL << 20) +# define CRASH_ADDR_HIGH_MAX MAXMEM #endif -static void __init reserve_crashkernel_low(void) +static int __init reserve_crashkernel_low(void) { #ifdef CONFIG_X86_64 - const unsigned long long alignment = 16<<20; /* 16M */ - unsigned long long low_base = 0, low_size = 0; + unsigned long long base, low_base = 0, low_size = 0; unsigned long total_low_mem; - unsigned long long base; - bool auto_set = false; int ret; - total_low_mem = memblock_mem_size(1UL<<(32-PAGE_SHIFT)); + total_low_mem = memblock_mem_size(1UL << (32 - PAGE_SHIFT)); + /* crashkernel=Y,low */ - ret = parse_crashkernel_low(boot_command_line, total_low_mem, - &low_size, &base); - if (ret != 0) { + ret = parse_crashkernel_low(boot_command_line, total_low_mem, &low_size, &base); + if (ret) { /* * two parts from lib/swiotlb.c: * -swiotlb size: user-specified with swiotlb= or default. @@ -517,52 +518,52 @@ static void __init reserve_crashkernel_low(void) * make sure we allocate enough extra low memory so that we * don't run out of DMA buffers for 32-bit devices. */ - low_size = max(swiotlb_size_or_default() + (8UL<<20), 256UL<<20); - auto_set = true; + low_size = max(swiotlb_size_or_default() + (8UL << 20), 256UL << 20); } else { /* passed with crashkernel=0,low ? */ if (!low_size) - return; + return 0; } - low_base = memblock_find_in_range(low_size, (1ULL<<32), - low_size, alignment); - + low_base = memblock_find_in_range(low_size, 1ULL << 32, low_size, CRASH_ALIGN); if (!low_base) { - if (!auto_set) - pr_info("crashkernel low reservation failed - No suitable area found.\n"); + pr_err("Cannot reserve %ldMB crashkernel low memory, please try smaller size.\n", + (unsigned long)(low_size >> 20)); + return -ENOMEM; + } - return; + ret = memblock_reserve(low_base, low_size); + if (ret) { + pr_err("%s: Error reserving crashkernel low memblock.\n", __func__); + return ret; } - memblock_reserve(low_base, low_size); pr_info("Reserving %ldMB of low memory at %ldMB for crashkernel (System low RAM: %ldMB)\n", - (unsigned long)(low_size >> 20), - (unsigned long)(low_base >> 20), - (unsigned long)(total_low_mem >> 20)); + (unsigned long)(low_size >> 20), + (unsigned long)(low_base >> 20), + (unsigned long)(total_low_mem >> 20)); + crashk_low_res.start = low_base; crashk_low_res.end = low_base + low_size - 1; insert_resource(&iomem_resource, &crashk_low_res); #endif + return 0; } static void __init reserve_crashkernel(void) { - const unsigned long long alignment = 16<<20; /* 16M */ - unsigned long long total_mem; - unsigned long long crash_size, crash_base; + unsigned long long crash_size, crash_base, total_mem; bool high = false; int ret; total_mem = memblock_phys_mem_size(); /* crashkernel=XM */ - ret = parse_crashkernel(boot_command_line, total_mem, - &crash_size, &crash_base); + ret = parse_crashkernel(boot_command_line, total_mem, &crash_size, &crash_base); if (ret != 0 || crash_size <= 0) { /* crashkernel=X,high */ ret = parse_crashkernel_high(boot_command_line, total_mem, - &crash_size, &crash_base); + &crash_size, &crash_base); if (ret != 0 || crash_size <= 0) return; high = true; @@ -573,11 +574,10 @@ static void __init reserve_crashkernel(void) /* * kexec want bzImage is below CRASH_KERNEL_ADDR_MAX */ - crash_base = memblock_find_in_range(alignment, - high ? CRASH_KERNEL_ADDR_HIGH_MAX : - CRASH_KERNEL_ADDR_LOW_MAX, - crash_size, alignment); - + crash_base = memblock_find_in_range(CRASH_ALIGN, + high ? CRASH_ADDR_HIGH_MAX + : CRASH_ADDR_LOW_MAX, + crash_size, CRASH_ALIGN); if (!crash_base) { pr_info("crashkernel reservation failed - No suitable area found.\n"); return; @@ -587,26 +587,32 @@ static void __init reserve_crashkernel(void) unsigned long long start; start = memblock_find_in_range(crash_base, - crash_base + crash_size, crash_size, 1<<20); + crash_base + crash_size, + crash_size, 1 << 20); if (start != crash_base) { pr_info("crashkernel reservation failed - memory is in use.\n"); return; } } - memblock_reserve(crash_base, crash_size); + ret = memblock_reserve(crash_base, crash_size); + if (ret) { + pr_err("%s: Error reserving crashkernel memblock.\n", __func__); + return; + } - printk(KERN_INFO "Reserving %ldMB of memory at %ldMB " - "for crashkernel (System RAM: %ldMB)\n", - (unsigned long)(crash_size >> 20), - (unsigned long)(crash_base >> 20), - (unsigned long)(total_mem >> 20)); + if (crash_base >= (1ULL << 32) && reserve_crashkernel_low()) { + memblock_free(crash_base, crash_size); + return; + } + + pr_info("Reserving %ldMB of memory at %ldMB for crashkernel (System RAM: %ldMB)\n", + (unsigned long)(crash_size >> 20), + (unsigned long)(crash_base >> 20), + (unsigned long)(total_mem >> 20)); crashk_res.start = crash_base; crashk_res.end = crash_base + crash_size - 1; insert_resource(&iomem_resource, &crashk_res); - - if (crash_base >= (1ULL<<32)) - reserve_crashkernel_low(); } #else static void __init reserve_crashkernel(void) @@ -1079,8 +1085,10 @@ void __init setup_arch(char **cmdline_p) memblock_set_current_limit(ISA_END_ADDRESS); memblock_x86_fill(); - if (efi_enabled(EFI_BOOT)) + if (efi_enabled(EFI_BOOT)) { + efi_fake_memmap(); efi_find_mirror(); + } /* * The EFI specification says that boot service code won't be called @@ -1173,6 +1181,14 @@ void __init setup_arch(char **cmdline_p) clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY, swapper_pg_dir + KERNEL_PGD_BOUNDARY, KERNEL_PGD_PTRS); + + /* + * sync back low identity map too. It is used for example + * in the 32-bit EFI stub. + */ + clone_pgd_range(initial_page_table, + swapper_pg_dir + KERNEL_PGD_BOUNDARY, + KERNEL_PGD_PTRS); #endif tboot_probe(); @@ -1234,6 +1250,8 @@ void __init setup_arch(char **cmdline_p) if (efi_enabled(EFI_BOOT)) efi_apply_memmap_quirks(); #endif + + microcode_init(); } #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index da52e6bb5c7f..b7ffb7c00075 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -63,6 +63,7 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc) { + unsigned long buf_val; void __user *buf; unsigned int tmpflags; unsigned int err = 0; @@ -107,7 +108,8 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc) regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS); regs->orig_ax = -1; /* disable syscall checks */ - get_user_ex(buf, &sc->fpstate); + get_user_ex(buf_val, &sc->fpstate); + buf = (void __user *)buf_val; } get_user_catch(err); err |= fpu__restore_sig(buf, config_enabled(CONFIG_X86_32)); @@ -196,7 +198,7 @@ static unsigned long align_sigframe(unsigned long sp) return sp; } -static inline void __user * +static void __user * get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, void __user **fpstate) { @@ -299,7 +301,7 @@ __setup_frame(int sig, struct ksignal *ksig, sigset_t *set, if (current->mm->context.vdso) restorer = current->mm->context.vdso + - selected_vdso32->sym___kernel_sigreturn; + vdso_image_32.sym___kernel_sigreturn; else restorer = &frame->retcode; if (ksig->ka.sa.sa_flags & SA_RESTORER) @@ -363,7 +365,7 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig, /* Set up to return from userspace. */ restorer = current->mm->context.vdso + - selected_vdso32->sym___kernel_rt_sigreturn; + vdso_image_32.sym___kernel_rt_sigreturn; if (ksig->ka.sa.sa_flags & SA_RESTORER) restorer = ksig->ka.sa.sa_restorer; put_user_ex(restorer, &frame->pretcode); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index e0c198e5f920..892ee2e5ecbc 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -509,7 +509,7 @@ void __inquire_remote_apic(int apicid) */ #define UDELAY_10MS_DEFAULT 10000 -static unsigned int init_udelay = UDELAY_10MS_DEFAULT; +static unsigned int init_udelay = INT_MAX; static int __init cpu_init_udelay(char *str) { @@ -522,13 +522,16 @@ early_param("cpu_init_udelay", cpu_init_udelay); static void __init smp_quirk_init_udelay(void) { /* if cmdline changed it from default, leave it alone */ - if (init_udelay != UDELAY_10MS_DEFAULT) + if (init_udelay != INT_MAX) return; /* if modern processor, use no delay */ if (((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && (boot_cpu_data.x86 == 6)) || ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && (boot_cpu_data.x86 >= 0xF))) init_udelay = 0; + + /* else, use legacy delay */ + init_udelay = UDELAY_10MS_DEFAULT; } /* @@ -657,7 +660,9 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip) /* * Give the other CPU some time to accept the IPI. */ - if (init_udelay) + if (init_udelay == 0) + udelay(10); + else udelay(300); pr_debug("Startup point 1\n"); @@ -668,7 +673,9 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip) /* * Give the other CPU some time to accept the IPI. */ - if (init_udelay) + if (init_udelay == 0) + udelay(10); + else udelay(200); if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 346eec73f7db..ade185a46b1d 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -361,7 +361,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) dotraplinkage void do_bounds(struct pt_regs *regs, long error_code) { - const struct bndcsr *bndcsr; + const struct mpx_bndcsr *bndcsr; siginfo_t *info; RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); @@ -384,7 +384,7 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code) * which is all zeros which indicates MPX was not * responsible for the exception. */ - bndcsr = get_xsave_field_ptr(XSTATE_BNDCSR); + bndcsr = get_xsave_field_ptr(XFEATURE_MASK_BNDCSR); if (!bndcsr) goto exit_trap; diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index c3f7602cd038..c7c4d9c51e99 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -168,21 +168,20 @@ static void cyc2ns_write_end(int cpu, struct cyc2ns_data *data) * ns = cycles * cyc2ns_scale / SC * * And since SC is a constant power of two, we can convert the div - * into a shift. + * into a shift. The larger SC is, the more accurate the conversion, but + * cyc2ns_scale needs to be a 32-bit value so that 32-bit multiplication + * (64-bit result) can be used. * - * We can use khz divisor instead of mhz to keep a better precision, since - * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits. + * We can use khz divisor instead of mhz to keep a better precision. * (mathieu.desnoyers@polymtl.ca) * * -johnstul@us.ibm.com "math is hard, lets go shopping!" */ -#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ - static void cyc2ns_data_init(struct cyc2ns_data *data) { data->cyc2ns_mul = 0; - data->cyc2ns_shift = CYC2NS_SCALE_FACTOR; + data->cyc2ns_shift = 0; data->cyc2ns_offset = 0; data->__count = 0; } @@ -216,14 +215,14 @@ static inline unsigned long long cycles_2_ns(unsigned long long cyc) if (likely(data == tail)) { ns = data->cyc2ns_offset; - ns += mul_u64_u32_shr(cyc, data->cyc2ns_mul, CYC2NS_SCALE_FACTOR); + ns += mul_u64_u32_shr(cyc, data->cyc2ns_mul, data->cyc2ns_shift); } else { data->__count++; barrier(); ns = data->cyc2ns_offset; - ns += mul_u64_u32_shr(cyc, data->cyc2ns_mul, CYC2NS_SCALE_FACTOR); + ns += mul_u64_u32_shr(cyc, data->cyc2ns_mul, data->cyc2ns_shift); barrier(); @@ -257,12 +256,22 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) * time function is continuous; see the comment near struct * cyc2ns_data. */ - data->cyc2ns_mul = - DIV_ROUND_CLOSEST(NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR, - cpu_khz); - data->cyc2ns_shift = CYC2NS_SCALE_FACTOR; + clocks_calc_mult_shift(&data->cyc2ns_mul, &data->cyc2ns_shift, cpu_khz, + NSEC_PER_MSEC, 0); + + /* + * cyc2ns_shift is exported via arch_perf_update_userpage() where it is + * not expected to be greater than 31 due to the original published + * conversion algorithm shifting a 32-bit value (now specifies a 64-bit + * value) - refer perf_event_mmap_page documentation in perf_event.h. + */ + if (data->cyc2ns_shift == 32) { + data->cyc2ns_shift = 31; + data->cyc2ns_mul >>= 1; + } + data->cyc2ns_offset = ns_now - - mul_u64_u32_shr(tsc_now, data->cyc2ns_mul, CYC2NS_SCALE_FACTOR); + mul_u64_u32_shr(tsc_now, data->cyc2ns_mul, data->cyc2ns_shift); cyc2ns_write_end(cpu, data); diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index d8a1d56276e1..639a6e34500c 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -28,6 +28,8 @@ config KVM select ANON_INODES select HAVE_KVM_IRQCHIP select HAVE_KVM_IRQFD + select IRQ_BYPASS_MANAGER + select HAVE_KVM_IRQ_BYPASS select HAVE_KVM_IRQ_ROUTING select HAVE_KVM_EVENTFD select KVM_APIC_ARCHITECTURE diff --git a/arch/x86/kvm/assigned-dev.c b/arch/x86/kvm/assigned-dev.c index d090ecf08809..9dc091acd5fb 100644 --- a/arch/x86/kvm/assigned-dev.c +++ b/arch/x86/kvm/assigned-dev.c @@ -21,6 +21,7 @@ #include <linux/fs.h> #include "irq.h" #include "assigned-dev.h" +#include "trace/events/kvm.h" struct kvm_assigned_dev_kernel { struct kvm_irq_ack_notifier ack_notifier; @@ -131,7 +132,42 @@ static irqreturn_t kvm_assigned_dev_thread_intx(int irq, void *dev_id) return IRQ_HANDLED; } -#ifdef __KVM_HAVE_MSI +/* + * Deliver an IRQ in an atomic context if we can, or return a failure, + * user can retry in a process context. + * Return value: + * -EWOULDBLOCK - Can't deliver in atomic context: retry in a process context. + * Other values - No need to retry. + */ +static int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, + int level) +{ + struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS]; + struct kvm_kernel_irq_routing_entry *e; + int ret = -EINVAL; + int idx; + + trace_kvm_set_irq(irq, level, irq_source_id); + + /* + * Injection into either PIC or IOAPIC might need to scan all CPUs, + * which would need to be retried from thread context; when same GSI + * is connected to both PIC and IOAPIC, we'd have to report a + * partial failure here. + * Since there's no easy way to do this, we only support injecting MSI + * which is limited to 1:1 GSI mapping. + */ + idx = srcu_read_lock(&kvm->irq_srcu); + if (kvm_irq_map_gsi(kvm, entries, irq) > 0) { + e = &entries[0]; + ret = kvm_arch_set_irq_inatomic(e, kvm, irq_source_id, + irq, level); + } + srcu_read_unlock(&kvm->irq_srcu, idx); + return ret; +} + + static irqreturn_t kvm_assigned_dev_msi(int irq, void *dev_id) { struct kvm_assigned_dev_kernel *assigned_dev = dev_id; @@ -150,9 +186,7 @@ static irqreturn_t kvm_assigned_dev_thread_msi(int irq, void *dev_id) return IRQ_HANDLED; } -#endif -#ifdef __KVM_HAVE_MSIX static irqreturn_t kvm_assigned_dev_msix(int irq, void *dev_id) { struct kvm_assigned_dev_kernel *assigned_dev = dev_id; @@ -183,7 +217,6 @@ static irqreturn_t kvm_assigned_dev_thread_msix(int irq, void *dev_id) return IRQ_HANDLED; } -#endif /* Ack the irq line for an assigned device */ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) @@ -386,7 +419,6 @@ static int assigned_device_enable_host_intx(struct kvm *kvm, return 0; } -#ifdef __KVM_HAVE_MSI static int assigned_device_enable_host_msi(struct kvm *kvm, struct kvm_assigned_dev_kernel *dev) { @@ -408,9 +440,7 @@ static int assigned_device_enable_host_msi(struct kvm *kvm, return 0; } -#endif -#ifdef __KVM_HAVE_MSIX static int assigned_device_enable_host_msix(struct kvm *kvm, struct kvm_assigned_dev_kernel *dev) { @@ -443,8 +473,6 @@ err: return r; } -#endif - static int assigned_device_enable_guest_intx(struct kvm *kvm, struct kvm_assigned_dev_kernel *dev, struct kvm_assigned_irq *irq) @@ -454,7 +482,6 @@ static int assigned_device_enable_guest_intx(struct kvm *kvm, return 0; } -#ifdef __KVM_HAVE_MSI static int assigned_device_enable_guest_msi(struct kvm *kvm, struct kvm_assigned_dev_kernel *dev, struct kvm_assigned_irq *irq) @@ -463,9 +490,7 @@ static int assigned_device_enable_guest_msi(struct kvm *kvm, dev->ack_notifier.gsi = -1; return 0; } -#endif -#ifdef __KVM_HAVE_MSIX static int assigned_device_enable_guest_msix(struct kvm *kvm, struct kvm_assigned_dev_kernel *dev, struct kvm_assigned_irq *irq) @@ -474,7 +499,6 @@ static int assigned_device_enable_guest_msix(struct kvm *kvm, dev->ack_notifier.gsi = -1; return 0; } -#endif static int assign_host_irq(struct kvm *kvm, struct kvm_assigned_dev_kernel *dev, @@ -492,16 +516,12 @@ static int assign_host_irq(struct kvm *kvm, case KVM_DEV_IRQ_HOST_INTX: r = assigned_device_enable_host_intx(kvm, dev); break; -#ifdef __KVM_HAVE_MSI case KVM_DEV_IRQ_HOST_MSI: r = assigned_device_enable_host_msi(kvm, dev); break; -#endif -#ifdef __KVM_HAVE_MSIX case KVM_DEV_IRQ_HOST_MSIX: r = assigned_device_enable_host_msix(kvm, dev); break; -#endif default: r = -EINVAL; } @@ -534,16 +554,12 @@ static int assign_guest_irq(struct kvm *kvm, case KVM_DEV_IRQ_GUEST_INTX: r = assigned_device_enable_guest_intx(kvm, dev, irq); break; -#ifdef __KVM_HAVE_MSI case KVM_DEV_IRQ_GUEST_MSI: r = assigned_device_enable_guest_msi(kvm, dev, irq); break; -#endif -#ifdef __KVM_HAVE_MSIX case KVM_DEV_IRQ_GUEST_MSIX: r = assigned_device_enable_guest_msix(kvm, dev, irq); break; -#endif default: r = -EINVAL; } @@ -826,7 +842,6 @@ out: } -#ifdef __KVM_HAVE_MSIX static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm, struct kvm_assigned_msix_nr *entry_nr) { @@ -906,7 +921,6 @@ msix_entry_out: return r; } -#endif static int kvm_vm_ioctl_set_pci_irq_mask(struct kvm *kvm, struct kvm_assigned_pci_dev *assigned_dev) @@ -1012,7 +1026,6 @@ long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, goto out; break; } -#ifdef __KVM_HAVE_MSIX case KVM_ASSIGN_SET_MSIX_NR: { struct kvm_assigned_msix_nr entry_nr; r = -EFAULT; @@ -1033,7 +1046,6 @@ long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, goto out; break; } -#endif case KVM_ASSIGN_SET_INTX_MASK: { struct kvm_assigned_pci_dev assigned_dev; diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 2fbea2544f24..6525e926f566 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -30,7 +30,7 @@ static u32 xstate_required_size(u64 xstate_bv, bool compacted) int feature_bit = 0; u32 ret = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET; - xstate_bv &= XSTATE_EXTEND_MASK; + xstate_bv &= XFEATURE_MASK_EXTEND; while (xstate_bv) { if (xstate_bv & 0x1) { u32 eax, ebx, ecx, edx, offset; @@ -51,7 +51,7 @@ u64 kvm_supported_xcr0(void) u64 xcr0 = KVM_SUPPORTED_XCR0 & host_xcr0; if (!kvm_x86_ops->mpx_supported()) - xcr0 &= ~(XSTATE_BNDREGS | XSTATE_BNDCSR); + xcr0 &= ~(XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR); return xcr0; } @@ -348,7 +348,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) | F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) | F(ADX) | F(SMAP) | F(AVX512F) | F(AVX512PF) | F(AVX512ER) | - F(AVX512CD); + F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB) | F(PCOMMIT); /* cpuid 0xD.1.eax */ const u32 kvm_supported_word10_x86_features = diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index dd05b9cef6ae..06332cb7e7d1 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -133,4 +133,41 @@ static inline bool guest_cpuid_has_mpx(struct kvm_vcpu *vcpu) best = kvm_find_cpuid_entry(vcpu, 7, 0); return best && (best->ebx & bit(X86_FEATURE_MPX)); } + +static inline bool guest_cpuid_has_pcommit(struct kvm_vcpu *vcpu) +{ + struct kvm_cpuid_entry2 *best; + + best = kvm_find_cpuid_entry(vcpu, 7, 0); + return best && (best->ebx & bit(X86_FEATURE_PCOMMIT)); +} + +static inline bool guest_cpuid_has_rdtscp(struct kvm_vcpu *vcpu) +{ + struct kvm_cpuid_entry2 *best; + + best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); + return best && (best->edx & bit(X86_FEATURE_RDTSCP)); +} + +/* + * NRIPS is provided through cpuidfn 0x8000000a.edx bit 3 + */ +#define BIT_NRIPS 3 + +static inline bool guest_cpuid_has_nrips(struct kvm_vcpu *vcpu) +{ + struct kvm_cpuid_entry2 *best; + + best = kvm_find_cpuid_entry(vcpu, 0x8000000a, 0); + + /* + * NRIPS is a scattered cpuid feature, so we can't use + * X86_FEATURE_NRIPS here (X86_FEATURE_NRIPS would be bit + * position 8, not 3). + */ + return best && (best->edx & bit(BIT_NRIPS)); +} +#undef BIT_NRIPS + #endif diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index b372a7557c16..1505587d06e9 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2272,8 +2272,8 @@ static int emulator_has_longmode(struct x86_emulate_ctxt *ctxt) #define GET_SMSTATE(type, smbase, offset) \ ({ \ type __val; \ - int r = ctxt->ops->read_std(ctxt, smbase + offset, &__val, \ - sizeof(__val), NULL); \ + int r = ctxt->ops->read_phys(ctxt, smbase + offset, &__val, \ + sizeof(__val)); \ if (r != X86EMUL_CONTINUE) \ return X86EMUL_UNHANDLEABLE; \ __val; \ @@ -2418,7 +2418,7 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase) u64 val, cr0, cr4; u32 base3; u16 selector; - int i; + int i, r; for (i = 0; i < 16; i++) *reg_write(ctxt, i) = GET_SMSTATE(u64, smbase, 0x7ff8 - i * 8); @@ -2460,13 +2460,17 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase) dt.address = GET_SMSTATE(u64, smbase, 0x7e68); ctxt->ops->set_gdt(ctxt, &dt); + r = rsm_enter_protected_mode(ctxt, cr0, cr4); + if (r != X86EMUL_CONTINUE) + return r; + for (i = 0; i < 6; i++) { - int r = rsm_load_seg_64(ctxt, smbase, i); + r = rsm_load_seg_64(ctxt, smbase, i); if (r != X86EMUL_CONTINUE) return r; } - return rsm_enter_protected_mode(ctxt, cr0, cr4); + return X86EMUL_CONTINUE; } static int em_rsm(struct x86_emulate_ctxt *ctxt) @@ -2480,17 +2484,36 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt) /* * Get back to real mode, to prepare a safe state in which to load - * CR0/CR3/CR4/EFER. Also this will ensure that addresses passed - * to read_std/write_std are not virtual. - * - * CR4.PCIDE must be zero, because it is a 64-bit mode only feature. + * CR0/CR3/CR4/EFER. It's all a bit more complicated if the vCPU + * supports long mode. */ + cr4 = ctxt->ops->get_cr(ctxt, 4); + if (emulator_has_longmode(ctxt)) { + struct desc_struct cs_desc; + + /* Zero CR4.PCIDE before CR0.PG. */ + if (cr4 & X86_CR4_PCIDE) { + ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE); + cr4 &= ~X86_CR4_PCIDE; + } + + /* A 32-bit code segment is required to clear EFER.LMA. */ + memset(&cs_desc, 0, sizeof(cs_desc)); + cs_desc.type = 0xb; + cs_desc.s = cs_desc.g = cs_desc.p = 1; + ctxt->ops->set_segment(ctxt, 0, &cs_desc, 0, VCPU_SREG_CS); + } + + /* For the 64-bit case, this will clear EFER.LMA. */ cr0 = ctxt->ops->get_cr(ctxt, 0); if (cr0 & X86_CR0_PE) ctxt->ops->set_cr(ctxt, 0, cr0 & ~(X86_CR0_PG | X86_CR0_PE)); - cr4 = ctxt->ops->get_cr(ctxt, 4); + + /* Now clear CR4.PAE (which must be done before clearing EFER.LME). */ if (cr4 & X86_CR4_PAE) ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PAE); + + /* And finally go back to 32-bit mode. */ efer = 0; ctxt->ops->set_msr(ctxt, MSR_EFER, efer); @@ -4451,7 +4474,7 @@ static const struct opcode twobyte_table[256] = { F(DstMem | SrcReg | Src2CL | ModRM, em_shld), N, N, /* 0xA8 - 0xAF */ I(Stack | Src2GS, em_push_sreg), I(Stack | Src2GS, em_pop_sreg), - II(No64 | EmulateOnUD | ImplicitOps, em_rsm, rsm), + II(EmulateOnUD | ImplicitOps, em_rsm, rsm), F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts), F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd), F(DstMem | SrcReg | Src2CL | ModRM, em_shrd), diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index a8160d2ae362..62cf8c915e95 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -41,6 +41,7 @@ static bool kvm_hv_msr_partition_wide(u32 msr) case HV_X64_MSR_TIME_REF_COUNT: case HV_X64_MSR_CRASH_CTL: case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: + case HV_X64_MSR_RESET: r = true; break; } @@ -163,6 +164,12 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data, data); case HV_X64_MSR_CRASH_CTL: return kvm_hv_msr_set_crash_ctl(vcpu, data, host); + case HV_X64_MSR_RESET: + if (data == 1) { + vcpu_debug(vcpu, "hyper-v reset requested\n"); + kvm_make_request(KVM_REQ_HV_RESET, vcpu); + } + break; default: vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n", msr, data); @@ -171,7 +178,16 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data, return 0; } -static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data) +/* Calculate cpu time spent by current task in 100ns units */ +static u64 current_task_runtime_100ns(void) +{ + cputime_t utime, stime; + + task_cputime_adjusted(current, &utime, &stime); + return div_u64(cputime_to_nsecs(utime + stime), 100); +} + +static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) { struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv; @@ -205,6 +221,11 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data) return kvm_hv_vapic_msr_write(vcpu, APIC_ICR, data); case HV_X64_MSR_TPR: return kvm_hv_vapic_msr_write(vcpu, APIC_TASKPRI, data); + case HV_X64_MSR_VP_RUNTIME: + if (!host) + return 1; + hv->runtime_offset = data - current_task_runtime_100ns(); + break; default: vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n", msr, data); @@ -241,6 +262,9 @@ static int kvm_hv_get_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) pdata); case HV_X64_MSR_CRASH_CTL: return kvm_hv_msr_get_crash_ctl(vcpu, pdata); + case HV_X64_MSR_RESET: + data = 0; + break; default: vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); return 1; @@ -277,6 +301,9 @@ static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) case HV_X64_MSR_APIC_ASSIST_PAGE: data = hv->hv_vapic; break; + case HV_X64_MSR_VP_RUNTIME: + data = current_task_runtime_100ns() + hv->runtime_offset; + break; default: vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); return 1; @@ -295,7 +322,7 @@ int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) mutex_unlock(&vcpu->kvm->lock); return r; } else - return kvm_hv_set_msr(vcpu, msr, data); + return kvm_hv_set_msr(vcpu, msr, data, host); } int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index f90952f64e79..08116ff227cc 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -35,6 +35,7 @@ #include <linux/kvm_host.h> #include <linux/slab.h> +#include "ioapic.h" #include "irq.h" #include "i8254.h" #include "x86.h" @@ -333,7 +334,8 @@ static void create_pit_timer(struct kvm *kvm, u32 val, int is_period) struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state; s64 interval; - if (!irqchip_in_kernel(kvm) || ps->flags & KVM_PIT_FLAGS_HPET_LEGACY) + if (!ioapic_in_kernel(kvm) || + ps->flags & KVM_PIT_FLAGS_HPET_LEGACY) return; interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ); diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index 856f79105bb5..88d0a92d3f94 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -233,21 +233,7 @@ static void kvm_ioapic_inject_all(struct kvm_ioapic *ioapic, unsigned long irr) } -static void update_handled_vectors(struct kvm_ioapic *ioapic) -{ - DECLARE_BITMAP(handled_vectors, 256); - int i; - - memset(handled_vectors, 0, sizeof(handled_vectors)); - for (i = 0; i < IOAPIC_NUM_PINS; ++i) - __set_bit(ioapic->redirtbl[i].fields.vector, handled_vectors); - memcpy(ioapic->handled_vectors, handled_vectors, - sizeof(handled_vectors)); - smp_wmb(); -} - -void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap, - u32 *tmr) +void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) { struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic; union kvm_ioapic_redirect_entry *e; @@ -260,13 +246,11 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap, kvm_irq_has_notifier(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index) || index == RTC_GSI) { if (kvm_apic_match_dest(vcpu, NULL, 0, - e->fields.dest_id, e->fields.dest_mode)) { + e->fields.dest_id, e->fields.dest_mode) || + (e->fields.trig_mode == IOAPIC_EDGE_TRIG && + kvm_apic_pending_eoi(vcpu, e->fields.vector))) __set_bit(e->fields.vector, (unsigned long *)eoi_exit_bitmap); - if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG) - __set_bit(e->fields.vector, - (unsigned long *)tmr); - } } } spin_unlock(&ioapic->lock); @@ -315,7 +299,6 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) e->bits |= (u32) val; e->fields.remote_irr = 0; } - update_handled_vectors(ioapic); mask_after = e->fields.mask; if (mask_before != mask_after) kvm_fire_mask_notifiers(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index, mask_after); @@ -599,7 +582,6 @@ static void kvm_ioapic_reset(struct kvm_ioapic *ioapic) ioapic->id = 0; memset(ioapic->irq_eoi, 0x00, IOAPIC_NUM_PINS); rtc_irq_eoi_tracking_reset(ioapic); - update_handled_vectors(ioapic); } static const struct kvm_io_device_ops ioapic_mmio_ops = { @@ -628,8 +610,10 @@ int kvm_ioapic_init(struct kvm *kvm) if (ret < 0) { kvm->arch.vioapic = NULL; kfree(ioapic); + return ret; } + kvm_vcpu_request_scan_ioapic(kvm); return ret; } @@ -666,7 +650,6 @@ int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state) memcpy(ioapic, state, sizeof(struct kvm_ioapic_state)); ioapic->irr = 0; ioapic->irr_delivered = 0; - update_handled_vectors(ioapic); kvm_vcpu_request_scan_ioapic(kvm); kvm_ioapic_inject_all(ioapic, state->irr); spin_unlock(&ioapic->lock); diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h index ca0b0b4e6256..084617d37c74 100644 --- a/arch/x86/kvm/ioapic.h +++ b/arch/x86/kvm/ioapic.h @@ -9,6 +9,7 @@ struct kvm; struct kvm_vcpu; #define IOAPIC_NUM_PINS KVM_IOAPIC_NUM_PINS +#define MAX_NR_RESERVED_IOAPIC_PINS KVM_MAX_IRQ_ROUTES #define IOAPIC_VERSION_ID 0x11 /* IOAPIC version */ #define IOAPIC_EDGE_TRIG 0 #define IOAPIC_LEVEL_TRIG 1 @@ -73,7 +74,6 @@ struct kvm_ioapic { struct kvm *kvm; void (*ack_notifier)(void *opaque, int irq); spinlock_t lock; - DECLARE_BITMAP(handled_vectors, 256); struct rtc_status rtc_status; struct delayed_work eoi_inject; u32 irq_eoi[IOAPIC_NUM_PINS]; @@ -98,11 +98,12 @@ static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm) return kvm->arch.vioapic; } -static inline bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector) +static inline int ioapic_in_kernel(struct kvm *kvm) { - struct kvm_ioapic *ioapic = kvm->arch.vioapic; - smp_rmb(); - return test_bit(vector, ioapic->handled_vectors); + int ret; + + ret = (ioapic_irqchip(kvm) != NULL); + return ret; } void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu); @@ -120,7 +121,7 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, struct kvm_lapic_irq *irq, unsigned long *dest_map); int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); -void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap, - u32 *tmr); +void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); +void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); #endif diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index a1ec6a50a05a..097060e33bd6 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c @@ -38,14 +38,27 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) EXPORT_SYMBOL(kvm_cpu_has_pending_timer); /* + * check if there is a pending userspace external interrupt + */ +static int pending_userspace_extint(struct kvm_vcpu *v) +{ + return v->arch.pending_external_vector != -1; +} + +/* * check if there is pending interrupt from * non-APIC source without intack. */ static int kvm_cpu_has_extint(struct kvm_vcpu *v) { - if (kvm_apic_accept_pic_intr(v)) - return pic_irqchip(v->kvm)->output; /* PIC */ - else + u8 accept = kvm_apic_accept_pic_intr(v); + + if (accept) { + if (irqchip_split(v->kvm)) + return pending_userspace_extint(v); + else + return pic_irqchip(v->kvm)->output; + } else return 0; } @@ -57,13 +70,13 @@ static int kvm_cpu_has_extint(struct kvm_vcpu *v) */ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v) { - if (!irqchip_in_kernel(v->kvm)) + if (!lapic_in_kernel(v)) return v->arch.interrupt.pending; if (kvm_cpu_has_extint(v)) return 1; - if (kvm_apic_vid_enabled(v->kvm)) + if (kvm_vcpu_apic_vid_enabled(v)) return 0; return kvm_apic_has_interrupt(v) != -1; /* LAPIC */ @@ -75,7 +88,7 @@ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v) */ int kvm_cpu_has_interrupt(struct kvm_vcpu *v) { - if (!irqchip_in_kernel(v->kvm)) + if (!lapic_in_kernel(v)) return v->arch.interrupt.pending; if (kvm_cpu_has_extint(v)) @@ -91,9 +104,16 @@ EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt); */ static int kvm_cpu_get_extint(struct kvm_vcpu *v) { - if (kvm_cpu_has_extint(v)) - return kvm_pic_read_irq(v->kvm); /* PIC */ - return -1; + if (kvm_cpu_has_extint(v)) { + if (irqchip_split(v->kvm)) { + int vector = v->arch.pending_external_vector; + + v->arch.pending_external_vector = -1; + return vector; + } else + return kvm_pic_read_irq(v->kvm); /* PIC */ + } else + return -1; } /* @@ -103,7 +123,7 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v) { int vector; - if (!irqchip_in_kernel(v->kvm)) + if (!lapic_in_kernel(v)) return v->arch.interrupt.nr; vector = kvm_cpu_get_extint(v); diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index 3d782a2c336a..ae5c78f2337d 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h @@ -83,13 +83,38 @@ static inline struct kvm_pic *pic_irqchip(struct kvm *kvm) return kvm->arch.vpic; } +static inline int pic_in_kernel(struct kvm *kvm) +{ + int ret; + + ret = (pic_irqchip(kvm) != NULL); + return ret; +} + +static inline int irqchip_split(struct kvm *kvm) +{ + return kvm->arch.irqchip_split; +} + static inline int irqchip_in_kernel(struct kvm *kvm) { struct kvm_pic *vpic = pic_irqchip(kvm); + bool ret; + + ret = (vpic != NULL); + ret |= irqchip_split(kvm); /* Read vpic before kvm->irq_routing. */ smp_rmb(); - return vpic != NULL; + return ret; +} + +static inline int lapic_in_kernel(struct kvm_vcpu *vcpu) +{ + /* Same as irqchip_in_kernel(vcpu->kvm), but with less + * pointer chasing and no unnecessary memory barriers. + */ + return vcpu->arch.apic != NULL; } void kvm_pic_reset(struct kvm_kpic_state *s); diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c index 9efff9e5b58c..84b96d319909 100644 --- a/arch/x86/kvm/irq_comm.c +++ b/arch/x86/kvm/irq_comm.c @@ -91,8 +91,8 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, return r; } -static inline void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e, - struct kvm_lapic_irq *irq) +void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e, + struct kvm_lapic_irq *irq) { trace_kvm_msi_set_irq(e->msi.address_lo, e->msi.data); @@ -108,6 +108,7 @@ static inline void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e, irq->level = 1; irq->shorthand = 0; } +EXPORT_SYMBOL_GPL(kvm_set_msi_irq); int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm, int irq_source_id, int level, bool line_status) @@ -123,12 +124,16 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, } -static int kvm_set_msi_inatomic(struct kvm_kernel_irq_routing_entry *e, - struct kvm *kvm) +int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int irq_source_id, int level, + bool line_status) { struct kvm_lapic_irq irq; int r; + if (unlikely(e->type != KVM_IRQ_ROUTING_MSI)) + return -EWOULDBLOCK; + kvm_set_msi_irq(e, &irq); if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r, NULL)) @@ -137,42 +142,6 @@ static int kvm_set_msi_inatomic(struct kvm_kernel_irq_routing_entry *e, return -EWOULDBLOCK; } -/* - * Deliver an IRQ in an atomic context if we can, or return a failure, - * user can retry in a process context. - * Return value: - * -EWOULDBLOCK - Can't deliver in atomic context: retry in a process context. - * Other values - No need to retry. - */ -int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level) -{ - struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS]; - struct kvm_kernel_irq_routing_entry *e; - int ret = -EINVAL; - int idx; - - trace_kvm_set_irq(irq, level, irq_source_id); - - /* - * Injection into either PIC or IOAPIC might need to scan all CPUs, - * which would need to be retried from thread context; when same GSI - * is connected to both PIC and IOAPIC, we'd have to report a - * partial failure here. - * Since there's no easy way to do this, we only support injecting MSI - * which is limited to 1:1 GSI mapping. - */ - idx = srcu_read_lock(&kvm->irq_srcu); - if (kvm_irq_map_gsi(kvm, entries, irq) > 0) { - e = &entries[0]; - if (likely(e->type == KVM_IRQ_ROUTING_MSI)) - ret = kvm_set_msi_inatomic(e, kvm); - else - ret = -EWOULDBLOCK; - } - srcu_read_unlock(&kvm->irq_srcu, idx); - return ret; -} - int kvm_request_irq_source_id(struct kvm *kvm) { unsigned long *bitmap = &kvm->arch.irq_sources_bitmap; @@ -208,7 +177,7 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id) goto unlock; } clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap); - if (!irqchip_in_kernel(kvm)) + if (!ioapic_in_kernel(kvm)) goto unlock; kvm_ioapic_clear_all(kvm->arch.vioapic, irq_source_id); @@ -297,6 +266,33 @@ out: return r; } +bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq, + struct kvm_vcpu **dest_vcpu) +{ + int i, r = 0; + struct kvm_vcpu *vcpu; + + if (kvm_intr_is_single_vcpu_fast(kvm, irq, dest_vcpu)) + return true; + + kvm_for_each_vcpu(i, vcpu, kvm) { + if (!kvm_apic_present(vcpu)) + continue; + + if (!kvm_apic_match_dest(vcpu, NULL, irq->shorthand, + irq->dest_id, irq->dest_mode)) + continue; + + if (++r == 2) + return false; + + *dest_vcpu = vcpu; + } + + return r == 1; +} +EXPORT_SYMBOL_GPL(kvm_intr_is_single_vcpu); + #define IOAPIC_ROUTING_ENTRY(irq) \ { .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP, \ .u.irqchip = { .irqchip = KVM_IRQCHIP_IOAPIC, .pin = (irq) } } @@ -328,3 +324,54 @@ int kvm_setup_default_irq_routing(struct kvm *kvm) return kvm_set_irq_routing(kvm, default_routing, ARRAY_SIZE(default_routing), 0); } + +static const struct kvm_irq_routing_entry empty_routing[] = {}; + +int kvm_setup_empty_irq_routing(struct kvm *kvm) +{ + return kvm_set_irq_routing(kvm, empty_routing, 0, 0); +} + +void kvm_arch_irq_routing_update(struct kvm *kvm) +{ + if (ioapic_in_kernel(kvm) || !irqchip_in_kernel(kvm)) + return; + kvm_make_scan_ioapic_request(kvm); +} + +void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) +{ + struct kvm *kvm = vcpu->kvm; + struct kvm_kernel_irq_routing_entry *entry; + struct kvm_irq_routing_table *table; + u32 i, nr_ioapic_pins; + int idx; + + /* kvm->irq_routing must be read after clearing + * KVM_SCAN_IOAPIC. */ + smp_mb(); + idx = srcu_read_lock(&kvm->irq_srcu); + table = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); + nr_ioapic_pins = min_t(u32, table->nr_rt_entries, + kvm->arch.nr_reserved_ioapic_pins); + for (i = 0; i < nr_ioapic_pins; ++i) { + hlist_for_each_entry(entry, &table->map[i], link) { + u32 dest_id, dest_mode; + bool level; + + if (entry->type != KVM_IRQ_ROUTING_MSI) + continue; + dest_id = (entry->msi.address_lo >> 12) & 0xff; + dest_mode = (entry->msi.address_lo >> 2) & 0x1; + level = entry->msi.data & MSI_DATA_TRIGGER_LEVEL; + if (level && kvm_apic_match_dest(vcpu, NULL, 0, + dest_id, dest_mode)) { + u32 vector = entry->msi.data & 0xff; + + __set_bit(vector, + (unsigned long *) eoi_exit_bitmap); + } + } + } + srcu_read_unlock(&kvm->irq_srcu, idx); +} diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 8d9013c5e1ee..ecd4ea1d28a8 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -209,7 +209,7 @@ out: if (old) kfree_rcu(old, rcu); - kvm_vcpu_request_scan_ioapic(kvm); + kvm_make_scan_ioapic_request(kvm); } static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val) @@ -348,6 +348,8 @@ void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir) struct kvm_lapic *apic = vcpu->arch.apic; __kvm_apic_update_irr(pir, apic->regs); + + kvm_make_request(KVM_REQ_EVENT, vcpu); } EXPORT_SYMBOL_GPL(kvm_apic_update_irr); @@ -390,7 +392,7 @@ static inline void apic_clear_irr(int vec, struct kvm_lapic *apic) vcpu = apic->vcpu; - if (unlikely(kvm_apic_vid_enabled(vcpu->kvm))) { + if (unlikely(kvm_vcpu_apic_vid_enabled(vcpu))) { /* try to update RVI */ apic_clear_vector(vec, apic->regs + APIC_IRR); kvm_make_request(KVM_REQ_EVENT, vcpu); @@ -551,15 +553,6 @@ static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu) __clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); } -void kvm_apic_update_tmr(struct kvm_vcpu *vcpu, u32 *tmr) -{ - struct kvm_lapic *apic = vcpu->arch.apic; - int i; - - for (i = 0; i < 8; i++) - apic_set_reg(apic, APIC_TMR + 0x10 * i, tmr[i]); -} - static void apic_update_ppr(struct kvm_lapic *apic) { u32 tpr, isrv, ppr, old_ppr; @@ -764,6 +757,65 @@ out: return ret; } +bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq, + struct kvm_vcpu **dest_vcpu) +{ + struct kvm_apic_map *map; + bool ret = false; + struct kvm_lapic *dst = NULL; + + if (irq->shorthand) + return false; + + rcu_read_lock(); + map = rcu_dereference(kvm->arch.apic_map); + + if (!map) + goto out; + + if (irq->dest_mode == APIC_DEST_PHYSICAL) { + if (irq->dest_id == 0xFF) + goto out; + + if (irq->dest_id >= ARRAY_SIZE(map->phys_map)) + goto out; + + dst = map->phys_map[irq->dest_id]; + if (dst && kvm_apic_present(dst->vcpu)) + *dest_vcpu = dst->vcpu; + else + goto out; + } else { + u16 cid; + unsigned long bitmap = 1; + int i, r = 0; + + if (!kvm_apic_logical_map_valid(map)) + goto out; + + apic_logical_id(map, irq->dest_id, &cid, (u16 *)&bitmap); + + if (cid >= ARRAY_SIZE(map->logical_map)) + goto out; + + for_each_set_bit(i, &bitmap, 16) { + dst = map->logical_map[cid][i]; + if (++r == 2) + goto out; + } + + if (dst && kvm_apic_present(dst->vcpu)) + *dest_vcpu = dst->vcpu; + else + goto out; + } + + ret = true; +out: + rcu_read_unlock(); + return ret; +} + /* * Add a pending IRQ into lapic. * Return 1 if successfully added and 0 if discarded. @@ -781,6 +833,9 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, case APIC_DM_LOWEST: vcpu->arch.apic_arb_prio++; case APIC_DM_FIXED: + if (unlikely(trig_mode && !level)) + break; + /* FIXME add logic for vcpu on reset */ if (unlikely(!apic_enabled(apic))) break; @@ -790,6 +845,13 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, if (dest_map) __set_bit(vcpu->vcpu_id, dest_map); + if (apic_test_vector(vector, apic->regs + APIC_TMR) != !!trig_mode) { + if (trig_mode) + apic_set_vector(vector, apic->regs + APIC_TMR); + else + apic_clear_vector(vector, apic->regs + APIC_TMR); + } + if (kvm_x86_ops->deliver_posted_interrupt) kvm_x86_ops->deliver_posted_interrupt(vcpu, vector); else { @@ -868,16 +930,32 @@ int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2) return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio; } +static bool kvm_ioapic_handles_vector(struct kvm_lapic *apic, int vector) +{ + return test_bit(vector, (ulong *)apic->vcpu->arch.eoi_exit_bitmap); +} + static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector) { - if (kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) { - int trigger_mode; - if (apic_test_vector(vector, apic->regs + APIC_TMR)) - trigger_mode = IOAPIC_LEVEL_TRIG; - else - trigger_mode = IOAPIC_EDGE_TRIG; - kvm_ioapic_update_eoi(apic->vcpu, vector, trigger_mode); + int trigger_mode; + + /* Eoi the ioapic only if the ioapic doesn't own the vector. */ + if (!kvm_ioapic_handles_vector(apic, vector)) + return; + + /* Request a KVM exit to inform the userspace IOAPIC. */ + if (irqchip_split(apic->vcpu->kvm)) { + apic->vcpu->arch.pending_ioapic_eoi = vector; + kvm_make_request(KVM_REQ_IOAPIC_EOI_EXIT, apic->vcpu); + return; } + + if (apic_test_vector(vector, apic->regs + APIC_TMR)) + trigger_mode = IOAPIC_LEVEL_TRIG; + else + trigger_mode = IOAPIC_EDGE_TRIG; + + kvm_ioapic_update_eoi(apic->vcpu, vector, trigger_mode); } static int apic_set_eoi(struct kvm_lapic *apic) @@ -1615,7 +1693,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) apic_set_reg(apic, APIC_ISR + 0x10 * i, 0); apic_set_reg(apic, APIC_TMR + 0x10 * i, 0); } - apic->irr_pending = kvm_apic_vid_enabled(vcpu->kvm); + apic->irr_pending = kvm_vcpu_apic_vid_enabled(vcpu); apic->isr_count = kvm_x86_ops->hwapic_isr_update ? 1 : 0; apic->highest_isr_cache = -1; update_divide_count(apic); @@ -1838,7 +1916,10 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu, kvm_x86_ops->hwapic_isr_update(vcpu->kvm, apic_find_highest_isr(apic)); kvm_make_request(KVM_REQ_EVENT, vcpu); - kvm_rtc_eoi_tracking_restore_one(vcpu); + if (ioapic_in_kernel(vcpu->kvm)) + kvm_rtc_eoi_tracking_restore_one(vcpu); + + vcpu->arch.apic_arb_prio = 0; } void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu) @@ -1922,7 +2003,7 @@ static void apic_sync_pv_eoi_to_guest(struct kvm_vcpu *vcpu, /* Cache not set: could be safe but we don't bother. */ apic->highest_isr_cache == -1 || /* Need EOI to update ioapic. */ - kvm_ioapic_handles_vector(vcpu->kvm, apic->highest_isr_cache)) { + kvm_ioapic_handles_vector(apic, apic->highest_isr_cache)) { /* * PV EOI was disabled by apic_sync_pv_eoi_from_guest * so we need not do anything here. @@ -1978,7 +2059,7 @@ int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data) struct kvm_lapic *apic = vcpu->arch.apic; u32 reg = (msr - APIC_BASE_MSR) << 4; - if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic)) + if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic)) return 1; if (reg == APIC_ICR2) @@ -1995,7 +2076,7 @@ int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data) struct kvm_lapic *apic = vcpu->arch.apic; u32 reg = (msr - APIC_BASE_MSR) << 4, low, high = 0; - if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic)) + if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic)) return 1; if (reg == APIC_DFR || reg == APIC_ICR2) { diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 764037991d26..fde8e35d5850 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -57,7 +57,6 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value); u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu); void kvm_apic_set_version(struct kvm_vcpu *vcpu); -void kvm_apic_update_tmr(struct kvm_vcpu *vcpu, u32 *tmr); void __kvm_apic_update_irr(u32 *pir, void *regs); void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir); int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, @@ -144,9 +143,9 @@ static inline int apic_x2apic_mode(struct kvm_lapic *apic) return apic->vcpu->arch.apic_base & X2APIC_ENABLE; } -static inline bool kvm_apic_vid_enabled(struct kvm *kvm) +static inline bool kvm_vcpu_apic_vid_enabled(struct kvm_vcpu *vcpu) { - return kvm_x86_ops->vm_has_apicv(kvm); + return kvm_x86_ops->cpu_uses_apicv(vcpu); } static inline bool kvm_apic_has_events(struct kvm_vcpu *vcpu) @@ -169,4 +168,6 @@ bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector); void wait_lapic_expire(struct kvm_vcpu *vcpu); +bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq, + struct kvm_vcpu **dest_vcpu); #endif diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index ff606f507913..7d85bcae3332 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -818,14 +818,11 @@ static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) kvm->arch.indirect_shadow_pages--; } -static int has_wrprotected_page(struct kvm_vcpu *vcpu, - gfn_t gfn, - int level) +static int __has_wrprotected_page(gfn_t gfn, int level, + struct kvm_memory_slot *slot) { - struct kvm_memory_slot *slot; struct kvm_lpage_info *linfo; - slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); if (slot) { linfo = lpage_info_slot(gfn, slot, level); return linfo->write_count; @@ -834,6 +831,14 @@ static int has_wrprotected_page(struct kvm_vcpu *vcpu, return 1; } +static int has_wrprotected_page(struct kvm_vcpu *vcpu, gfn_t gfn, int level) +{ + struct kvm_memory_slot *slot; + + slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); + return __has_wrprotected_page(gfn, level, slot); +} + static int host_mapping_level(struct kvm *kvm, gfn_t gfn) { unsigned long page_size; @@ -851,6 +856,17 @@ static int host_mapping_level(struct kvm *kvm, gfn_t gfn) return ret; } +static inline bool memslot_valid_for_gpte(struct kvm_memory_slot *slot, + bool no_dirty_log) +{ + if (!slot || slot->flags & KVM_MEMSLOT_INVALID) + return false; + if (no_dirty_log && slot->dirty_bitmap) + return false; + + return true; +} + static struct kvm_memory_slot * gfn_to_memslot_dirty_bitmap(struct kvm_vcpu *vcpu, gfn_t gfn, bool no_dirty_log) @@ -858,21 +874,25 @@ gfn_to_memslot_dirty_bitmap(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_memory_slot *slot; slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); - if (!slot || slot->flags & KVM_MEMSLOT_INVALID || - (no_dirty_log && slot->dirty_bitmap)) + if (!memslot_valid_for_gpte(slot, no_dirty_log)) slot = NULL; return slot; } -static bool mapping_level_dirty_bitmap(struct kvm_vcpu *vcpu, gfn_t large_gfn) -{ - return !gfn_to_memslot_dirty_bitmap(vcpu, large_gfn, true); -} - -static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn) +static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn, + bool *force_pt_level) { int host_level, level, max_level; + struct kvm_memory_slot *slot; + + if (unlikely(*force_pt_level)) + return PT_PAGE_TABLE_LEVEL; + + slot = kvm_vcpu_gfn_to_memslot(vcpu, large_gfn); + *force_pt_level = !memslot_valid_for_gpte(slot, true); + if (unlikely(*force_pt_level)) + return PT_PAGE_TABLE_LEVEL; host_level = host_mapping_level(vcpu->kvm, large_gfn); @@ -882,7 +902,7 @@ static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn) max_level = min(kvm_x86_ops->get_lpage_level(), host_level); for (level = PT_DIRECTORY_LEVEL; level <= max_level; ++level) - if (has_wrprotected_page(vcpu, large_gfn, level)) + if (__has_wrprotected_page(large_gfn, level, slot)) break; return level - 1; @@ -2962,14 +2982,13 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, { int r; int level; - int force_pt_level; + bool force_pt_level = false; pfn_t pfn; unsigned long mmu_seq; bool map_writable, write = error_code & PFERR_WRITE_MASK; - force_pt_level = mapping_level_dirty_bitmap(vcpu, gfn); + level = mapping_level(vcpu, gfn, &force_pt_level); if (likely(!force_pt_level)) { - level = mapping_level(vcpu, gfn); /* * This path builds a PAE pagetable - so we can map * 2mb pages at maximum. Therefore check if the level @@ -2979,8 +2998,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, level = PT_DIRECTORY_LEVEL; gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1); - } else - level = PT_PAGE_TABLE_LEVEL; + } if (fast_page_fault(vcpu, v, level, error_code)) return 0; @@ -3427,7 +3445,7 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn) static bool can_do_async_pf(struct kvm_vcpu *vcpu) { - if (unlikely(!irqchip_in_kernel(vcpu->kvm) || + if (unlikely(!lapic_in_kernel(vcpu) || kvm_event_needs_reinjection(vcpu))) return false; @@ -3476,7 +3494,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, pfn_t pfn; int r; int level; - int force_pt_level; + bool force_pt_level; gfn_t gfn = gpa >> PAGE_SHIFT; unsigned long mmu_seq; int write = error_code & PFERR_WRITE_MASK; @@ -3495,20 +3513,15 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, if (r) return r; - if (mapping_level_dirty_bitmap(vcpu, gfn) || - !check_hugepage_cache_consistency(vcpu, gfn, PT_DIRECTORY_LEVEL)) - force_pt_level = 1; - else - force_pt_level = 0; - + force_pt_level = !check_hugepage_cache_consistency(vcpu, gfn, + PT_DIRECTORY_LEVEL); + level = mapping_level(vcpu, gfn, &force_pt_level); if (likely(!force_pt_level)) { - level = mapping_level(vcpu, gfn); if (level > PT_DIRECTORY_LEVEL && !check_hugepage_cache_consistency(vcpu, gfn, level)) level = PT_DIRECTORY_LEVEL; gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1); - } else - level = PT_PAGE_TABLE_LEVEL; + } if (fast_page_fault(vcpu, gpa, level, error_code)) return 0; @@ -3706,7 +3719,7 @@ static void __reset_rsvds_bits_mask_ept(struct rsvd_bits_validate *rsvd_check, int maxphyaddr, bool execonly) { - int pte; + u64 bad_mt_xwr; rsvd_check->rsvd_bits_mask[0][3] = rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 7); @@ -3724,14 +3737,16 @@ __reset_rsvds_bits_mask_ept(struct rsvd_bits_validate *rsvd_check, rsvd_bits(maxphyaddr, 51) | rsvd_bits(12, 20); rsvd_check->rsvd_bits_mask[1][0] = rsvd_check->rsvd_bits_mask[0][0]; - for (pte = 0; pte < 64; pte++) { - int rwx_bits = pte & 7; - int mt = pte >> 3; - if (mt == 0x2 || mt == 0x3 || mt == 0x7 || - rwx_bits == 0x2 || rwx_bits == 0x6 || - (rwx_bits == 0x4 && !execonly)) - rsvd_check->bad_mt_xwr |= (1ull << pte); + bad_mt_xwr = 0xFFull << (2 * 8); /* bits 3..5 must not be 2 */ + bad_mt_xwr |= 0xFFull << (3 * 8); /* bits 3..5 must not be 3 */ + bad_mt_xwr |= 0xFFull << (7 * 8); /* bits 3..5 must not be 7 */ + bad_mt_xwr |= REPEAT_BYTE(1ull << 2); /* bits 0..2 must not be 010 */ + bad_mt_xwr |= REPEAT_BYTE(1ull << 6); /* bits 0..2 must not be 110 */ + if (!execonly) { + /* bits 0..2 must not be 100 unless VMX capabilities allow it */ + bad_mt_xwr |= REPEAT_BYTE(1ull << 4); } + rsvd_check->bad_mt_xwr = bad_mt_xwr; } static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu, diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 736e6ab8784d..b41faa91a6f9 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -698,7 +698,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, int r; pfn_t pfn; int level = PT_PAGE_TABLE_LEVEL; - int force_pt_level; + bool force_pt_level = false; unsigned long mmu_seq; bool map_writable, is_self_change_mapping; @@ -743,15 +743,14 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, is_self_change_mapping = FNAME(is_self_change_mapping)(vcpu, &walker, user_fault, &vcpu->arch.write_fault_to_shadow_pgtable); - if (walker.level >= PT_DIRECTORY_LEVEL) - force_pt_level = mapping_level_dirty_bitmap(vcpu, walker.gfn) - || is_self_change_mapping; - else - force_pt_level = 1; - if (!force_pt_level) { - level = min(walker.level, mapping_level(vcpu, walker.gfn)); - walker.gfn = walker.gfn & ~(KVM_PAGES_PER_HPAGE(level) - 1); - } + if (walker.level >= PT_DIRECTORY_LEVEL && !is_self_change_mapping) { + level = mapping_level(vcpu, walker.gfn, &force_pt_level); + if (likely(!force_pt_level)) { + level = min(walker.level, level); + walker.gfn = walker.gfn & ~(KVM_PAGES_PER_HPAGE(level) - 1); + } + } else + force_pt_level = true; mmu_seq = vcpu->kvm->mmu_notifier_seq; smp_rmb(); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 94b7d15db3fc..f2c8e4917688 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -159,6 +159,9 @@ struct vcpu_svm { u32 apf_reason; u64 tsc_ratio; + + /* cached guest cpuid flags for faster access */ + bool nrips_enabled : 1; }; static DEFINE_PER_CPU(u64, current_tsc_ratio); @@ -514,7 +517,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) struct vcpu_svm *svm = to_svm(vcpu); if (svm->vmcb->control.next_rip != 0) { - WARN_ON(!static_cpu_has(X86_FEATURE_NRIPS)); + WARN_ON_ONCE(!static_cpu_has(X86_FEATURE_NRIPS)); svm->next_rip = svm->vmcb->control.next_rip; } @@ -866,64 +869,6 @@ static void svm_disable_lbrv(struct vcpu_svm *svm) set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 0, 0); } -#define MTRR_TYPE_UC_MINUS 7 -#define MTRR2PROTVAL_INVALID 0xff - -static u8 mtrr2protval[8]; - -static u8 fallback_mtrr_type(int mtrr) -{ - /* - * WT and WP aren't always available in the host PAT. Treat - * them as UC and UC- respectively. Everything else should be - * there. - */ - switch (mtrr) - { - case MTRR_TYPE_WRTHROUGH: - return MTRR_TYPE_UNCACHABLE; - case MTRR_TYPE_WRPROT: - return MTRR_TYPE_UC_MINUS; - default: - BUG(); - } -} - -static void build_mtrr2protval(void) -{ - int i; - u64 pat; - - for (i = 0; i < 8; i++) - mtrr2protval[i] = MTRR2PROTVAL_INVALID; - - /* Ignore the invalid MTRR types. */ - mtrr2protval[2] = 0; - mtrr2protval[3] = 0; - - /* - * Use host PAT value to figure out the mapping from guest MTRR - * values to nested page table PAT/PCD/PWT values. We do not - * want to change the host PAT value every time we enter the - * guest. - */ - rdmsrl(MSR_IA32_CR_PAT, pat); - for (i = 0; i < 8; i++) { - u8 mtrr = pat >> (8 * i); - - if (mtrr2protval[mtrr] == MTRR2PROTVAL_INVALID) - mtrr2protval[mtrr] = __cm_idx2pte(i); - } - - for (i = 0; i < 8; i++) { - if (mtrr2protval[i] == MTRR2PROTVAL_INVALID) { - u8 fallback = fallback_mtrr_type(i); - mtrr2protval[i] = mtrr2protval[fallback]; - BUG_ON(mtrr2protval[i] == MTRR2PROTVAL_INVALID); - } - } -} - static __init int svm_hardware_setup(void) { int cpu; @@ -990,7 +935,6 @@ static __init int svm_hardware_setup(void) } else kvm_disable_tdp(); - build_mtrr2protval(); return 0; err: @@ -1145,44 +1089,7 @@ static u64 svm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) return target_tsc - tsc; } -static void svm_set_guest_pat(struct vcpu_svm *svm, u64 *g_pat) -{ - struct kvm_vcpu *vcpu = &svm->vcpu; - - /* Unlike Intel, AMD takes the guest's CR0.CD into account. - * - * AMD does not have IPAT. To emulate it for the case of guests - * with no assigned devices, just set everything to WB. If guests - * have assigned devices, however, we cannot force WB for RAM - * pages only, so use the guest PAT directly. - */ - if (!kvm_arch_has_assigned_device(vcpu->kvm)) - *g_pat = 0x0606060606060606; - else - *g_pat = vcpu->arch.pat; -} - -static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) -{ - u8 mtrr; - - /* - * 1. MMIO: trust guest MTRR, so same as item 3. - * 2. No passthrough: always map as WB, and force guest PAT to WB as well - * 3. Passthrough: can't guarantee the result, try to trust guest. - */ - if (!is_mmio && !kvm_arch_has_assigned_device(vcpu->kvm)) - return 0; - - if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED) && - kvm_read_cr0(vcpu) & X86_CR0_CD) - return _PAGE_NOCACHE; - - mtrr = kvm_mtrr_get_guest_memory_type(vcpu, gfn); - return mtrr2protval[mtrr]; -} - -static void init_vmcb(struct vcpu_svm *svm, bool init_event) +static void init_vmcb(struct vcpu_svm *svm) { struct vmcb_control_area *control = &svm->vmcb->control; struct vmcb_save_area *save = &svm->vmcb->save; @@ -1253,8 +1160,7 @@ static void init_vmcb(struct vcpu_svm *svm, bool init_event) init_sys_seg(&save->ldtr, SEG_TYPE_LDT); init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16); - if (!init_event) - svm_set_efer(&svm->vcpu, 0); + svm_set_efer(&svm->vcpu, 0); save->dr6 = 0xffff0ff0; kvm_set_rflags(&svm->vcpu, 2); save->rip = 0x0000fff0; @@ -1278,7 +1184,6 @@ static void init_vmcb(struct vcpu_svm *svm, bool init_event) clr_cr_intercept(svm, INTERCEPT_CR3_READ); clr_cr_intercept(svm, INTERCEPT_CR3_WRITE); save->g_pat = svm->vcpu.arch.pat; - svm_set_guest_pat(svm, &save->g_pat); save->cr3 = 0; save->cr4 = 0; } @@ -1309,7 +1214,7 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) if (kvm_vcpu_is_reset_bsp(&svm->vcpu)) svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP; } - init_vmcb(svm, init_event); + init_vmcb(svm); kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy); kvm_register_write(vcpu, VCPU_REGS_RDX, eax); @@ -1365,7 +1270,7 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) clear_page(svm->vmcb); svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT; svm->asid_generation = 0; - init_vmcb(svm, false); + init_vmcb(svm); svm_init_osvw(&svm->vcpu); @@ -1673,10 +1578,13 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) if (!vcpu->fpu_active) cr0 |= X86_CR0_TS; - - /* These are emulated via page tables. */ - cr0 &= ~(X86_CR0_CD | X86_CR0_NW); - + /* + * re-enable caching here because the QEMU bios + * does not do it - this results in some delay at + * reboot + */ + if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED)) + cr0 &= ~(X86_CR0_CD | X86_CR0_NW); svm->vmcb->save.cr0 = cr0; mark_dirty(svm->vmcb, VMCB_CR); update_cr0_intercept(svm); @@ -1984,7 +1892,7 @@ static int shutdown_interception(struct vcpu_svm *svm) * so reinitialize it. */ clear_page(svm->vmcb); - init_vmcb(svm, false); + init_vmcb(svm); kvm_run->exit_reason = KVM_EXIT_SHUTDOWN; return 0; @@ -2459,7 +2367,9 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) nested_vmcb->control.exit_info_2 = vmcb->control.exit_info_2; nested_vmcb->control.exit_int_info = vmcb->control.exit_int_info; nested_vmcb->control.exit_int_info_err = vmcb->control.exit_int_info_err; - nested_vmcb->control.next_rip = vmcb->control.next_rip; + + if (svm->nrips_enabled) + nested_vmcb->control.next_rip = vmcb->control.next_rip; /* * If we emulate a VMRUN/#VMEXIT in the same host #vmexit cycle we have @@ -3154,7 +3064,7 @@ static int cr8_write_interception(struct vcpu_svm *svm) u8 cr8_prev = kvm_get_cr8(&svm->vcpu); /* instruction emulation calls kvm_set_cr8() */ r = cr_interception(svm); - if (irqchip_in_kernel(svm->vcpu.kvm)) + if (lapic_in_kernel(&svm->vcpu)) return r; if (cr8_prev <= kvm_get_cr8(&svm->vcpu)) return r; @@ -3351,16 +3261,6 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) case MSR_VM_IGNNE: vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data); break; - case MSR_IA32_CR_PAT: - if (npt_enabled) { - if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data)) - return 1; - vcpu->arch.pat = data; - svm_set_guest_pat(svm, &svm->vmcb->save.g_pat); - mark_dirty(svm->vmcb, VMCB_NPT); - break; - } - /* fall through */ default: return kvm_set_msr_common(vcpu, msr); } @@ -3398,24 +3298,11 @@ static int msr_interception(struct vcpu_svm *svm) static int interrupt_window_interception(struct vcpu_svm *svm) { - struct kvm_run *kvm_run = svm->vcpu.run; - kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); svm_clear_vintr(svm); svm->vmcb->control.int_ctl &= ~V_IRQ_MASK; mark_dirty(svm->vmcb, VMCB_INTR); ++svm->vcpu.stat.irq_window_exits; - /* - * If the user space waits to inject interrupts, exit as soon as - * possible - */ - if (!irqchip_in_kernel(svm->vcpu.kvm) && - kvm_run->request_interrupt_window && - !kvm_cpu_has_interrupt(&svm->vcpu)) { - kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; - return 0; - } - return 1; } @@ -3763,12 +3650,12 @@ static void svm_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) return; } -static int svm_vm_has_apicv(struct kvm *kvm) +static int svm_cpu_uses_apicv(struct kvm_vcpu *vcpu) { return 0; } -static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) +static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu) { return; } @@ -4195,8 +4082,17 @@ static bool svm_has_high_real_mode_segbase(void) return true; } +static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) +{ + return 0; +} + static void svm_cpuid_update(struct kvm_vcpu *vcpu) { + struct vcpu_svm *svm = to_svm(vcpu); + + /* Update nrips enabled cache */ + svm->nrips_enabled = !!guest_cpuid_has_nrips(&svm->vcpu); } static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) @@ -4524,7 +4420,7 @@ static struct kvm_x86_ops svm_x86_ops = { .enable_irq_window = enable_irq_window, .update_cr8_intercept = update_cr8_intercept, .set_virtual_x2apic_mode = svm_set_virtual_x2apic_mode, - .vm_has_apicv = svm_vm_has_apicv, + .cpu_uses_apicv = svm_cpu_uses_apicv, .load_eoi_exitmap = svm_load_eoi_exitmap, .sync_pir_to_irr = svm_sync_pir_to_irr, diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 4eae7c35ddf5..120302511802 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -129,6 +129,24 @@ TRACE_EVENT(kvm_pio, ); /* + * Tracepoint for fast mmio. + */ +TRACE_EVENT(kvm_fast_mmio, + TP_PROTO(u64 gpa), + TP_ARGS(gpa), + + TP_STRUCT__entry( + __field(u64, gpa) + ), + + TP_fast_assign( + __entry->gpa = gpa; + ), + + TP_printk("fast mmio at gpa 0x%llx", __entry->gpa) +); + +/* * Tracepoint for cpuid. */ TRACE_EVENT(kvm_cpuid, @@ -974,6 +992,39 @@ TRACE_EVENT(kvm_enter_smm, __entry->smbase) ); +/* + * Tracepoint for VT-d posted-interrupts. + */ +TRACE_EVENT(kvm_pi_irte_update, + TP_PROTO(unsigned int vcpu_id, unsigned int gsi, + unsigned int gvec, u64 pi_desc_addr, bool set), + TP_ARGS(vcpu_id, gsi, gvec, pi_desc_addr, set), + + TP_STRUCT__entry( + __field( unsigned int, vcpu_id ) + __field( unsigned int, gsi ) + __field( unsigned int, gvec ) + __field( u64, pi_desc_addr ) + __field( bool, set ) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu_id; + __entry->gsi = gsi; + __entry->gvec = gvec; + __entry->pi_desc_addr = pi_desc_addr; + __entry->set = set; + ), + + TP_printk("VT-d PI is %s for this irq, vcpu %u, gsi: 0x%x, " + "gvec: 0x%x, pi_desc_addr: 0x%llx", + __entry->set ? "enabled and being updated" : "disabled", + __entry->vcpu_id, + __entry->gsi, + __entry->gvec, + __entry->pi_desc_addr) +); + #endif /* _TRACE_KVM_H */ #undef TRACE_INCLUDE_PATH diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 64076740251e..5eb56ed77c1f 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -35,6 +35,7 @@ #include "kvm_cache_regs.h" #include "x86.h" +#include <asm/cpu.h> #include <asm/io.h> #include <asm/desc.h> #include <asm/vmx.h> @@ -45,6 +46,7 @@ #include <asm/debugreg.h> #include <asm/kexec.h> #include <asm/apic.h> +#include <asm/irq_remapping.h> #include "trace.h" #include "pmu.h" @@ -424,6 +426,9 @@ struct nested_vmx { /* to migrate it to L2 if VM_ENTRY_LOAD_DEBUG_CONTROLS is off */ u64 vmcs01_debugctl; + u16 vpid02; + u16 last_vpid; + u32 nested_vmx_procbased_ctls_low; u32 nested_vmx_procbased_ctls_high; u32 nested_vmx_true_procbased_ctls_low; @@ -440,14 +445,33 @@ struct nested_vmx { u32 nested_vmx_misc_low; u32 nested_vmx_misc_high; u32 nested_vmx_ept_caps; + u32 nested_vmx_vpid_caps; }; #define POSTED_INTR_ON 0 +#define POSTED_INTR_SN 1 + /* Posted-Interrupt Descriptor */ struct pi_desc { u32 pir[8]; /* Posted interrupt requested */ - u32 control; /* bit 0 of control is outstanding notification bit */ - u32 rsvd[7]; + union { + struct { + /* bit 256 - Outstanding Notification */ + u16 on : 1, + /* bit 257 - Suppress Notification */ + sn : 1, + /* bit 271:258 - Reserved */ + rsvd_1 : 14; + /* bit 279:272 - Notification Vector */ + u8 nv; + /* bit 287:280 - Reserved */ + u8 rsvd_2; + /* bit 319:288 - Notification Destination */ + u32 ndst; + }; + u64 control; + }; + u32 rsvd[6]; } __aligned(64); static bool pi_test_and_set_on(struct pi_desc *pi_desc) @@ -467,6 +491,30 @@ static int pi_test_and_set_pir(int vector, struct pi_desc *pi_desc) return test_and_set_bit(vector, (unsigned long *)pi_desc->pir); } +static inline void pi_clear_sn(struct pi_desc *pi_desc) +{ + return clear_bit(POSTED_INTR_SN, + (unsigned long *)&pi_desc->control); +} + +static inline void pi_set_sn(struct pi_desc *pi_desc) +{ + return set_bit(POSTED_INTR_SN, + (unsigned long *)&pi_desc->control); +} + +static inline int pi_test_on(struct pi_desc *pi_desc) +{ + return test_bit(POSTED_INTR_ON, + (unsigned long *)&pi_desc->control); +} + +static inline int pi_test_sn(struct pi_desc *pi_desc) +{ + return test_bit(POSTED_INTR_SN, + (unsigned long *)&pi_desc->control); +} + struct vcpu_vmx { struct kvm_vcpu vcpu; unsigned long host_rsp; @@ -532,8 +580,6 @@ struct vcpu_vmx { s64 vnmi_blocked_time; u32 exit_reason; - bool rdtscp_enabled; - /* Posted interrupt descriptor */ struct pi_desc pi_desc; @@ -563,6 +609,11 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) return container_of(vcpu, struct vcpu_vmx, vcpu); } +static struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu) +{ + return &(to_vmx(vcpu)->pi_desc); +} + #define VMCS12_OFFSET(x) offsetof(struct vmcs12, x) #define FIELD(number, name) [number] = VMCS12_OFFSET(name) #define FIELD64(number, name) [number] = VMCS12_OFFSET(name), \ @@ -809,7 +860,7 @@ static void kvm_cpu_vmxon(u64 addr); static void kvm_cpu_vmxoff(void); static bool vmx_mpx_supported(void); static bool vmx_xsaves_supported(void); -static int vmx_vm_has_apicv(struct kvm *kvm); +static int vmx_cpu_uses_apicv(struct kvm_vcpu *vcpu); static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr); static void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); @@ -831,6 +882,13 @@ static DEFINE_PER_CPU(struct vmcs *, current_vmcs); static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu); static DEFINE_PER_CPU(struct desc_ptr, host_gdt); +/* + * We maintian a per-CPU linked-list of vCPU, so in wakeup_handler() we + * can find which vCPU should be waken up. + */ +static DEFINE_PER_CPU(struct list_head, blocked_vcpu_on_cpu); +static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock); + static unsigned long *vmx_io_bitmap_a; static unsigned long *vmx_io_bitmap_b; static unsigned long *vmx_msr_bitmap_legacy; @@ -946,9 +1004,9 @@ static inline bool cpu_has_vmx_tpr_shadow(void) return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW; } -static inline bool vm_need_tpr_shadow(struct kvm *kvm) +static inline bool cpu_need_tpr_shadow(struct kvm_vcpu *vcpu) { - return (cpu_has_vmx_tpr_shadow()) && (irqchip_in_kernel(kvm)); + return cpu_has_vmx_tpr_shadow() && lapic_in_kernel(vcpu); } static inline bool cpu_has_secondary_exec_ctrls(void) @@ -983,7 +1041,8 @@ static inline bool cpu_has_vmx_virtual_intr_delivery(void) static inline bool cpu_has_vmx_posted_intr(void) { - return vmcs_config.pin_based_exec_ctrl & PIN_BASED_POSTED_INTR; + return IS_ENABLED(CONFIG_X86_LOCAL_APIC) && + vmcs_config.pin_based_exec_ctrl & PIN_BASED_POSTED_INTR; } static inline bool cpu_has_vmx_apicv(void) @@ -1062,9 +1121,9 @@ static inline bool cpu_has_vmx_ple(void) SECONDARY_EXEC_PAUSE_LOOP_EXITING; } -static inline bool vm_need_virtualize_apic_accesses(struct kvm *kvm) +static inline bool cpu_need_virtualize_apic_accesses(struct kvm_vcpu *vcpu) { - return flexpriority_enabled && irqchip_in_kernel(kvm); + return flexpriority_enabled && lapic_in_kernel(vcpu); } static inline bool cpu_has_vmx_vpid(void) @@ -1157,6 +1216,11 @@ static inline bool nested_cpu_has_virt_x2apic_mode(struct vmcs12 *vmcs12) return nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE); } +static inline bool nested_cpu_has_vpid(struct vmcs12 *vmcs12) +{ + return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_VPID); +} + static inline bool nested_cpu_has_apic_reg_virt(struct vmcs12 *vmcs12) { return nested_cpu_has2(vmcs12, SECONDARY_EXEC_APIC_REGISTER_VIRT); @@ -1337,13 +1401,13 @@ static void loaded_vmcs_clear(struct loaded_vmcs *loaded_vmcs) __loaded_vmcs_clear, loaded_vmcs, 1); } -static inline void vpid_sync_vcpu_single(struct vcpu_vmx *vmx) +static inline void vpid_sync_vcpu_single(int vpid) { - if (vmx->vpid == 0) + if (vpid == 0) return; if (cpu_has_vmx_invvpid_single()) - __invvpid(VMX_VPID_EXTENT_SINGLE_CONTEXT, vmx->vpid, 0); + __invvpid(VMX_VPID_EXTENT_SINGLE_CONTEXT, vpid, 0); } static inline void vpid_sync_vcpu_global(void) @@ -1352,10 +1416,10 @@ static inline void vpid_sync_vcpu_global(void) __invvpid(VMX_VPID_EXTENT_ALL_CONTEXT, 0, 0); } -static inline void vpid_sync_context(struct vcpu_vmx *vmx) +static inline void vpid_sync_context(int vpid) { if (cpu_has_vmx_invvpid_single()) - vpid_sync_vcpu_single(vmx); + vpid_sync_vcpu_single(vpid); else vpid_sync_vcpu_global(); } @@ -1895,6 +1959,52 @@ static void vmx_load_host_state(struct vcpu_vmx *vmx) preempt_enable(); } +static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu) +{ + struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); + struct pi_desc old, new; + unsigned int dest; + + if (!kvm_arch_has_assigned_device(vcpu->kvm) || + !irq_remapping_cap(IRQ_POSTING_CAP)) + return; + + do { + old.control = new.control = pi_desc->control; + + /* + * If 'nv' field is POSTED_INTR_WAKEUP_VECTOR, there + * are two possible cases: + * 1. After running 'pre_block', context switch + * happened. For this case, 'sn' was set in + * vmx_vcpu_put(), so we need to clear it here. + * 2. After running 'pre_block', we were blocked, + * and woken up by some other guy. For this case, + * we don't need to do anything, 'pi_post_block' + * will do everything for us. However, we cannot + * check whether it is case #1 or case #2 here + * (maybe, not needed), so we also clear sn here, + * I think it is not a big deal. + */ + if (pi_desc->nv != POSTED_INTR_WAKEUP_VECTOR) { + if (vcpu->cpu != cpu) { + dest = cpu_physical_id(cpu); + + if (x2apic_enabled()) + new.ndst = dest; + else + new.ndst = (dest << 8) & 0xFF00; + } + + /* set 'NV' to 'notification vector' */ + new.nv = POSTED_INTR_VECTOR; + } + + /* Allow posting non-urgent interrupts */ + new.sn = 0; + } while (cmpxchg(&pi_desc->control, old.control, + new.control) != old.control); +} /* * Switches to specified vcpu, until a matching vcpu_put(), but assumes * vcpu mutex is already taken. @@ -1945,10 +2055,27 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */ vmx->loaded_vmcs->cpu = cpu; } + + vmx_vcpu_pi_load(vcpu, cpu); +} + +static void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu) +{ + struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); + + if (!kvm_arch_has_assigned_device(vcpu->kvm) || + !irq_remapping_cap(IRQ_POSTING_CAP)) + return; + + /* Set SN when the vCPU is preempted */ + if (vcpu->preempted) + pi_set_sn(pi_desc); } static void vmx_vcpu_put(struct kvm_vcpu *vcpu) { + vmx_vcpu_pi_put(vcpu); + __vmx_load_host_state(to_vmx(vcpu)); if (!vmm_exclusive) { __loaded_vmcs_clear(to_vmx(vcpu)->loaded_vmcs); @@ -2207,7 +2334,7 @@ static void setup_msrs(struct vcpu_vmx *vmx) if (index >= 0) move_msr_up(vmx, index, save_nmsrs++); index = __find_msr_index(vmx, MSR_TSC_AUX); - if (index >= 0 && vmx->rdtscp_enabled) + if (index >= 0 && guest_cpuid_has_rdtscp(&vmx->vcpu)) move_msr_up(vmx, index, save_nmsrs++); /* * MSR_STAR is only needed on long mode guests, and only @@ -2377,7 +2504,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) vmx->nested.nested_vmx_pinbased_ctls_high |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR | PIN_BASED_VMX_PREEMPTION_TIMER; - if (vmx_vm_has_apicv(vmx->vcpu.kvm)) + if (vmx_cpu_uses_apicv(&vmx->vcpu)) vmx->nested.nested_vmx_pinbased_ctls_high |= PIN_BASED_POSTED_INTR; @@ -2471,10 +2598,12 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | SECONDARY_EXEC_RDTSCP | SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | + SECONDARY_EXEC_ENABLE_VPID | SECONDARY_EXEC_APIC_REGISTER_VIRT | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | SECONDARY_EXEC_WBINVD_EXITING | - SECONDARY_EXEC_XSAVES; + SECONDARY_EXEC_XSAVES | + SECONDARY_EXEC_PCOMMIT; if (enable_ept) { /* nested EPT: emulate EPT also to L1 */ @@ -2493,6 +2622,12 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) } else vmx->nested.nested_vmx_ept_caps = 0; + if (enable_vpid) + vmx->nested.nested_vmx_vpid_caps = VMX_VPID_INVVPID_BIT | + VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT; + else + vmx->nested.nested_vmx_vpid_caps = 0; + if (enable_unrestricted_guest) vmx->nested.nested_vmx_secondary_ctls_high |= SECONDARY_EXEC_UNRESTRICTED_GUEST; @@ -2608,7 +2743,8 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) break; case MSR_IA32_VMX_EPT_VPID_CAP: /* Currently, no nested vpid support */ - *pdata = vmx->nested.nested_vmx_ept_caps; + *pdata = vmx->nested.nested_vmx_ept_caps | + ((u64)vmx->nested.nested_vmx_vpid_caps << 32); break; default: return 1; @@ -2673,7 +2809,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = vcpu->arch.ia32_xss; break; case MSR_TSC_AUX: - if (!to_vmx(vcpu)->rdtscp_enabled) + if (!guest_cpuid_has_rdtscp(vcpu)) return 1; /* Otherwise falls through */ default: @@ -2779,7 +2915,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) clear_atomic_switch_msr(vmx, MSR_IA32_XSS); break; case MSR_TSC_AUX: - if (!vmx->rdtscp_enabled) + if (!guest_cpuid_has_rdtscp(vcpu)) return 1; /* Check reserved bit, higher 32 bits should be zero */ if ((data >> 32) != 0) @@ -2874,6 +3010,8 @@ static int hardware_enable(void) return -EBUSY; INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu)); + INIT_LIST_HEAD(&per_cpu(blocked_vcpu_on_cpu, cpu)); + spin_lock_init(&per_cpu(blocked_vcpu_on_cpu_lock, cpu)); /* * Now we can enable the vmclear operation in kdump @@ -3015,7 +3153,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | SECONDARY_EXEC_SHADOW_VMCS | SECONDARY_EXEC_XSAVES | - SECONDARY_EXEC_ENABLE_PML; + SECONDARY_EXEC_ENABLE_PML | + SECONDARY_EXEC_PCOMMIT; if (adjust_vmx_controls(min2, opt2, MSR_IA32_VMX_PROCBASED_CTLS2, &_cpu_based_2nd_exec_control) < 0) @@ -3441,9 +3580,9 @@ static void exit_lmode(struct kvm_vcpu *vcpu) #endif -static void vmx_flush_tlb(struct kvm_vcpu *vcpu) +static inline void __vmx_flush_tlb(struct kvm_vcpu *vcpu, int vpid) { - vpid_sync_context(to_vmx(vcpu)); + vpid_sync_context(vpid); if (enable_ept) { if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) return; @@ -3451,6 +3590,11 @@ static void vmx_flush_tlb(struct kvm_vcpu *vcpu) } } +static void vmx_flush_tlb(struct kvm_vcpu *vcpu) +{ + __vmx_flush_tlb(vcpu, to_vmx(vcpu)->vpid); +} + static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) { ulong cr0_guest_owned_bits = vcpu->arch.cr0_guest_owned_bits; @@ -3644,20 +3788,21 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) if (!is_paging(vcpu)) { hw_cr4 &= ~X86_CR4_PAE; hw_cr4 |= X86_CR4_PSE; - /* - * SMEP/SMAP is disabled if CPU is in non-paging mode - * in hardware. However KVM always uses paging mode to - * emulate guest non-paging mode with TDP. - * To emulate this behavior, SMEP/SMAP needs to be - * manually disabled when guest switches to non-paging - * mode. - */ - hw_cr4 &= ~(X86_CR4_SMEP | X86_CR4_SMAP); } else if (!(cr4 & X86_CR4_PAE)) { hw_cr4 &= ~X86_CR4_PAE; } } + if (!enable_unrestricted_guest && !is_paging(vcpu)) + /* + * SMEP/SMAP is disabled if CPU is in non-paging mode in + * hardware. However KVM always uses paging mode without + * unrestricted guest. + * To emulate this behavior, SMEP/SMAP needs to be manually + * disabled when guest switches to non-paging mode. + */ + hw_cr4 &= ~(X86_CR4_SMEP | X86_CR4_SMAP); + vmcs_writel(CR4_READ_SHADOW, cr4); vmcs_writel(GUEST_CR4, hw_cr4); return 0; @@ -4105,17 +4250,13 @@ static void seg_setup(int seg) static int alloc_apic_access_page(struct kvm *kvm) { struct page *page; - struct kvm_userspace_memory_region kvm_userspace_mem; int r = 0; mutex_lock(&kvm->slots_lock); if (kvm->arch.apic_access_page_done) goto out; - kvm_userspace_mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT; - kvm_userspace_mem.flags = 0; - kvm_userspace_mem.guest_phys_addr = APIC_DEFAULT_PHYS_BASE; - kvm_userspace_mem.memory_size = PAGE_SIZE; - r = __x86_set_memory_region(kvm, &kvm_userspace_mem); + r = __x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT, + APIC_DEFAULT_PHYS_BASE, PAGE_SIZE); if (r) goto out; @@ -4140,44 +4281,38 @@ static int alloc_identity_pagetable(struct kvm *kvm) { /* Called with kvm->slots_lock held. */ - struct kvm_userspace_memory_region kvm_userspace_mem; int r = 0; BUG_ON(kvm->arch.ept_identity_pagetable_done); - kvm_userspace_mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT; - kvm_userspace_mem.flags = 0; - kvm_userspace_mem.guest_phys_addr = - kvm->arch.ept_identity_map_addr; - kvm_userspace_mem.memory_size = PAGE_SIZE; - r = __x86_set_memory_region(kvm, &kvm_userspace_mem); + r = __x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT, + kvm->arch.ept_identity_map_addr, PAGE_SIZE); return r; } -static void allocate_vpid(struct vcpu_vmx *vmx) +static int allocate_vpid(void) { int vpid; - vmx->vpid = 0; if (!enable_vpid) - return; + return 0; spin_lock(&vmx_vpid_lock); vpid = find_first_zero_bit(vmx_vpid_bitmap, VMX_NR_VPIDS); - if (vpid < VMX_NR_VPIDS) { - vmx->vpid = vpid; + if (vpid < VMX_NR_VPIDS) __set_bit(vpid, vmx_vpid_bitmap); - } + else + vpid = 0; spin_unlock(&vmx_vpid_lock); + return vpid; } -static void free_vpid(struct vcpu_vmx *vmx) +static void free_vpid(int vpid) { - if (!enable_vpid) + if (!enable_vpid || vpid == 0) return; spin_lock(&vmx_vpid_lock); - if (vmx->vpid != 0) - __clear_bit(vmx->vpid, vmx_vpid_bitmap); + __clear_bit(vpid, vmx_vpid_bitmap); spin_unlock(&vmx_vpid_lock); } @@ -4332,9 +4467,9 @@ static void vmx_disable_intercept_msr_write_x2apic(u32 msr) msr, MSR_TYPE_W); } -static int vmx_vm_has_apicv(struct kvm *kvm) +static int vmx_cpu_uses_apicv(struct kvm_vcpu *vcpu) { - return enable_apicv && irqchip_in_kernel(kvm); + return enable_apicv && lapic_in_kernel(vcpu); } static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) @@ -4378,6 +4513,22 @@ static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu) { #ifdef CONFIG_SMP if (vcpu->mode == IN_GUEST_MODE) { + struct vcpu_vmx *vmx = to_vmx(vcpu); + + /* + * Currently, we don't support urgent interrupt, + * all interrupts are recognized as non-urgent + * interrupt, so we cannot post interrupts when + * 'SN' is set. + * + * If the vcpu is in guest mode, it means it is + * running instead of being scheduled out and + * waiting in the run queue, and that's the only + * case when 'SN' is set currently, warning if + * 'SN' is set. + */ + WARN_ON_ONCE(pi_test_sn(&vmx->pi_desc)); + apic->send_IPI_mask(get_cpu_mask(vcpu->cpu), POSTED_INTR_VECTOR); return true; @@ -4514,7 +4665,7 @@ static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx) { u32 pin_based_exec_ctrl = vmcs_config.pin_based_exec_ctrl; - if (!vmx_vm_has_apicv(vmx->vcpu.kvm)) + if (!vmx_cpu_uses_apicv(&vmx->vcpu)) pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR; return pin_based_exec_ctrl; } @@ -4526,7 +4677,7 @@ static u32 vmx_exec_control(struct vcpu_vmx *vmx) if (vmx->vcpu.arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT) exec_control &= ~CPU_BASED_MOV_DR_EXITING; - if (!vm_need_tpr_shadow(vmx->vcpu.kvm)) { + if (!cpu_need_tpr_shadow(&vmx->vcpu)) { exec_control &= ~CPU_BASED_TPR_SHADOW; #ifdef CONFIG_X86_64 exec_control |= CPU_BASED_CR8_STORE_EXITING | @@ -4543,7 +4694,7 @@ static u32 vmx_exec_control(struct vcpu_vmx *vmx) static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) { u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl; - if (!vm_need_virtualize_apic_accesses(vmx->vcpu.kvm)) + if (!cpu_need_virtualize_apic_accesses(&vmx->vcpu)) exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; if (vmx->vpid == 0) exec_control &= ~SECONDARY_EXEC_ENABLE_VPID; @@ -4557,7 +4708,7 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST; if (!ple_gap) exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING; - if (!vmx_vm_has_apicv(vmx->vcpu.kvm)) + if (!vmx_cpu_uses_apicv(&vmx->vcpu)) exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; @@ -4567,8 +4718,12 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) a current VMCS12 */ exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS; - /* PML is enabled/disabled in creating/destorying vcpu */ - exec_control &= ~SECONDARY_EXEC_ENABLE_PML; + + if (!enable_pml) + exec_control &= ~SECONDARY_EXEC_ENABLE_PML; + + /* Currently, we allow L1 guest to directly run pcommit instruction. */ + exec_control &= ~SECONDARY_EXEC_PCOMMIT; return exec_control; } @@ -4613,12 +4768,11 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, vmx_exec_control(vmx)); - if (cpu_has_secondary_exec_ctrls()) { + if (cpu_has_secondary_exec_ctrls()) vmcs_write32(SECONDARY_VM_EXEC_CONTROL, vmx_secondary_exec_control(vmx)); - } - if (vmx_vm_has_apicv(vmx->vcpu.kvm)) { + if (vmx_cpu_uses_apicv(&vmx->vcpu)) { vmcs_write64(EOI_EXIT_BITMAP0, 0); vmcs_write64(EOI_EXIT_BITMAP1, 0); vmcs_write64(EOI_EXIT_BITMAP2, 0); @@ -4762,7 +4916,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) if (cpu_has_vmx_tpr_shadow() && !init_event) { vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0); - if (vm_need_tpr_shadow(vcpu->kvm)) + if (cpu_need_tpr_shadow(vcpu)) vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, __pa(vcpu->arch.apic->regs)); vmcs_write32(TPR_THRESHOLD, 0); @@ -4770,7 +4924,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu); - if (vmx_vm_has_apicv(vcpu->kvm)) + if (vmx_cpu_uses_apicv(vcpu)) memset(&vmx->pi_desc, 0, sizeof(struct pi_desc)); if (vmx->vpid != 0) @@ -4780,12 +4934,11 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) vmx_set_cr0(vcpu, cr0); /* enter rmode */ vmx->vcpu.arch.cr0 = cr0; vmx_set_cr4(vcpu, 0); - if (!init_event) - vmx_set_efer(vcpu, 0); + vmx_set_efer(vcpu, 0); vmx_fpu_activate(vcpu); update_exception_bitmap(vcpu); - vpid_sync_context(vmx); + vpid_sync_context(vmx->vpid); } /* @@ -4949,14 +5102,9 @@ static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) { int ret; - struct kvm_userspace_memory_region tss_mem = { - .slot = TSS_PRIVATE_MEMSLOT, - .guest_phys_addr = addr, - .memory_size = PAGE_SIZE * 3, - .flags = 0, - }; - ret = x86_set_memory_region(kvm, &tss_mem); + ret = x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, addr, + PAGE_SIZE * 3); if (ret) return ret; kvm->arch.tss_addr = addr; @@ -5310,7 +5458,7 @@ static int handle_cr(struct kvm_vcpu *vcpu) u8 cr8 = (u8)val; err = kvm_set_cr8(vcpu, cr8); kvm_complete_insn_gp(vcpu, err); - if (irqchip_in_kernel(vcpu->kvm)) + if (lapic_in_kernel(vcpu)) return 1; if (cr8_prev <= cr8) return 1; @@ -5524,17 +5672,6 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu) kvm_make_request(KVM_REQ_EVENT, vcpu); ++vcpu->stat.irq_window_exits; - - /* - * If the user space waits to inject interrupts, exit as soon as - * possible - */ - if (!irqchip_in_kernel(vcpu->kvm) && - vcpu->run->request_interrupt_window && - !kvm_cpu_has_interrupt(vcpu)) { - vcpu->run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; - return 0; - } return 1; } @@ -5767,6 +5904,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu) gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); if (!kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) { skip_emulated_instruction(vcpu); + trace_kvm_fast_mmio(gpa); return 1; } @@ -5924,6 +6062,25 @@ static void update_ple_window_actual_max(void) ple_window_grow, INT_MIN); } +/* + * Handler for POSTED_INTERRUPT_WAKEUP_VECTOR. + */ +static void wakeup_handler(void) +{ + struct kvm_vcpu *vcpu; + int cpu = smp_processor_id(); + + spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu)); + list_for_each_entry(vcpu, &per_cpu(blocked_vcpu_on_cpu, cpu), + blocked_vcpu_list) { + struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); + + if (pi_test_on(pi_desc) == 1) + kvm_vcpu_kick(vcpu); + } + spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu)); +} + static __init int hardware_setup(void) { int r = -ENOMEM, i, msr; @@ -6110,6 +6267,8 @@ static __init int hardware_setup(void) kvm_x86_ops->enable_log_dirty_pt_masked = NULL; } + kvm_set_posted_intr_wakeup_handler(wakeup_handler); + return alloc_kvm_area(); out8: @@ -6641,7 +6800,6 @@ static int nested_vmx_check_permission(struct kvm_vcpu *vcpu) static inline void nested_release_vmcs12(struct vcpu_vmx *vmx) { - u32 exec_control; if (vmx->nested.current_vmptr == -1ull) return; @@ -6654,9 +6812,8 @@ static inline void nested_release_vmcs12(struct vcpu_vmx *vmx) they were modified */ copy_shadow_to_vmcs12(vmx); vmx->nested.sync_shadow_vmcs = false; - exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); - exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS; - vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); + vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL, + SECONDARY_EXEC_SHADOW_VMCS); vmcs_write64(VMCS_LINK_POINTER, -1ull); } vmx->nested.posted_intr_nv = -1; @@ -6676,6 +6833,7 @@ static void free_nested(struct vcpu_vmx *vmx) return; vmx->nested.vmxon = false; + free_vpid(vmx->nested.vpid02); nested_release_vmcs12(vmx); if (enable_shadow_vmcs) free_vmcs(vmx->nested.current_shadow_vmcs); @@ -7052,7 +7210,6 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); gpa_t vmptr; - u32 exec_control; if (!nested_vmx_check_permission(vcpu)) return 1; @@ -7084,9 +7241,8 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu) vmx->nested.current_vmcs12 = new_vmcs12; vmx->nested.current_vmcs12_page = page; if (enable_shadow_vmcs) { - exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); - exec_control |= SECONDARY_EXEC_SHADOW_VMCS; - vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); + vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL, + SECONDARY_EXEC_SHADOW_VMCS); vmcs_write64(VMCS_LINK_POINTER, __pa(vmx->nested.current_shadow_vmcs)); vmx->nested.sync_shadow_vmcs = true; @@ -7192,7 +7348,63 @@ static int handle_invept(struct kvm_vcpu *vcpu) static int handle_invvpid(struct kvm_vcpu *vcpu) { - kvm_queue_exception(vcpu, UD_VECTOR); + struct vcpu_vmx *vmx = to_vmx(vcpu); + u32 vmx_instruction_info; + unsigned long type, types; + gva_t gva; + struct x86_exception e; + int vpid; + + if (!(vmx->nested.nested_vmx_secondary_ctls_high & + SECONDARY_EXEC_ENABLE_VPID) || + !(vmx->nested.nested_vmx_vpid_caps & VMX_VPID_INVVPID_BIT)) { + kvm_queue_exception(vcpu, UD_VECTOR); + return 1; + } + + if (!nested_vmx_check_permission(vcpu)) + return 1; + + vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); + type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf); + + types = (vmx->nested.nested_vmx_vpid_caps >> 8) & 0x7; + + if (!(types & (1UL << type))) { + nested_vmx_failValid(vcpu, + VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); + return 1; + } + + /* according to the intel vmx instruction reference, the memory + * operand is read even if it isn't needed (e.g., for type==global) + */ + if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION), + vmx_instruction_info, false, &gva)) + return 1; + if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &vpid, + sizeof(u32), &e)) { + kvm_inject_page_fault(vcpu, &e); + return 1; + } + + switch (type) { + case VMX_VPID_EXTENT_ALL_CONTEXT: + if (get_vmcs12(vcpu)->virtual_processor_id == 0) { + nested_vmx_failValid(vcpu, + VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); + return 1; + } + __vmx_flush_tlb(vcpu, to_vmx(vcpu)->nested.vpid02); + nested_vmx_succeed(vcpu); + break; + default: + /* Trap single context invalidation invvpid calls */ + BUG_ON(1); + break; + } + + skip_emulated_instruction(vcpu); return 1; } @@ -7221,6 +7433,13 @@ static int handle_pml_full(struct kvm_vcpu *vcpu) return 1; } +static int handle_pcommit(struct kvm_vcpu *vcpu) +{ + /* we never catch pcommit instruct for L1 guest. */ + WARN_ON(1); + return 1; +} + /* * The exit handlers return 1 if the exit was handled fully and guest execution * may resume. Otherwise they set the kvm_run parameter to indicate what needs @@ -7271,6 +7490,7 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { [EXIT_REASON_XSAVES] = handle_xsaves, [EXIT_REASON_XRSTORS] = handle_xrstors, [EXIT_REASON_PML_FULL] = handle_pml_full, + [EXIT_REASON_PCOMMIT] = handle_pcommit, }; static const int kvm_vmx_max_exit_handlers = @@ -7572,6 +7792,8 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) * the XSS exit bitmap in vmcs12. */ return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES); + case EXIT_REASON_PCOMMIT: + return nested_cpu_has2(vmcs12, SECONDARY_EXEC_PCOMMIT); default: return true; } @@ -7583,10 +7805,9 @@ static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2) *info2 = vmcs_read32(VM_EXIT_INTR_INFO); } -static int vmx_enable_pml(struct vcpu_vmx *vmx) +static int vmx_create_pml_buffer(struct vcpu_vmx *vmx) { struct page *pml_pg; - u32 exec_control; pml_pg = alloc_page(GFP_KERNEL | __GFP_ZERO); if (!pml_pg) @@ -7597,24 +7818,15 @@ static int vmx_enable_pml(struct vcpu_vmx *vmx) vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg)); vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1); - exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); - exec_control |= SECONDARY_EXEC_ENABLE_PML; - vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); - return 0; } -static void vmx_disable_pml(struct vcpu_vmx *vmx) +static void vmx_destroy_pml_buffer(struct vcpu_vmx *vmx) { - u32 exec_control; - - ASSERT(vmx->pml_pg); - __free_page(vmx->pml_pg); - vmx->pml_pg = NULL; - - exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); - exec_control &= ~SECONDARY_EXEC_ENABLE_PML; - vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); + if (vmx->pml_pg) { + __free_page(vmx->pml_pg); + vmx->pml_pg = NULL; + } } static void vmx_flush_pml_buffer(struct kvm_vcpu *vcpu) @@ -7938,10 +8150,10 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) * apicv */ if (!cpu_has_vmx_virtualize_x2apic_mode() || - !vmx_vm_has_apicv(vcpu->kvm)) + !vmx_cpu_uses_apicv(vcpu)) return; - if (!vm_need_tpr_shadow(vcpu->kvm)) + if (!cpu_need_tpr_shadow(vcpu)) return; sec_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); @@ -8043,9 +8255,10 @@ static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr) } } -static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) +static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu) { - if (!vmx_vm_has_apicv(vcpu->kvm)) + u64 *eoi_exit_bitmap = vcpu->arch.eoi_exit_bitmap; + if (!vmx_cpu_uses_apicv(vcpu)) return; vmcs_write64(EOI_EXIT_BITMAP0, eoi_exit_bitmap[0]); @@ -8491,8 +8704,8 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu) struct vcpu_vmx *vmx = to_vmx(vcpu); if (enable_pml) - vmx_disable_pml(vmx); - free_vpid(vmx); + vmx_destroy_pml_buffer(vmx); + free_vpid(vmx->vpid); leave_guest_mode(vcpu); vmx_load_vmcs01(vcpu); free_nested(vmx); @@ -8511,7 +8724,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) if (!vmx) return ERR_PTR(-ENOMEM); - allocate_vpid(vmx); + vmx->vpid = allocate_vpid(); err = kvm_vcpu_init(&vmx->vcpu, kvm, id); if (err) @@ -8544,7 +8757,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) put_cpu(); if (err) goto free_vmcs; - if (vm_need_virtualize_apic_accesses(kvm)) { + if (cpu_need_virtualize_apic_accesses(&vmx->vcpu)) { err = alloc_apic_access_page(kvm); if (err) goto free_vmcs; @@ -8559,8 +8772,10 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) goto free_vmcs; } - if (nested) + if (nested) { nested_vmx_setup_ctls_msrs(vmx); + vmx->nested.vpid02 = allocate_vpid(); + } vmx->nested.posted_intr_nv = -1; vmx->nested.current_vmptr = -1ull; @@ -8573,7 +8788,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) * for the guest, etc. */ if (enable_pml) { - err = vmx_enable_pml(vmx); + err = vmx_create_pml_buffer(vmx); if (err) goto free_vmcs; } @@ -8581,13 +8796,14 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) return &vmx->vcpu; free_vmcs: + free_vpid(vmx->nested.vpid02); free_loaded_vmcs(vmx->loaded_vmcs); free_msrs: kfree(vmx->guest_msrs); uninit_vcpu: kvm_vcpu_uninit(&vmx->vcpu); free_vcpu: - free_vpid(vmx); + free_vpid(vmx->vpid); kmem_cache_free(kvm_vcpu_cache, vmx); return ERR_PTR(err); } @@ -8617,17 +8833,22 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) u64 ipat = 0; /* For VT-d and EPT combination - * 1. MMIO: guest may want to apply WC, trust it. + * 1. MMIO: always map as UC * 2. EPT with VT-d: * a. VT-d without snooping control feature: can't guarantee the - * result, try to trust guest. So the same as item 1. + * result, try to trust guest. * b. VT-d with snooping control feature: snooping control feature of * VT-d engine can guarantee the cache correctness. Just set it * to WB to keep consistent with host. So the same as item 3. * 3. EPT without VT-d: always map as WB and set IPAT=1 to keep * consistent with host MTRR */ - if (!is_mmio && !kvm_arch_has_noncoherent_dma(vcpu->kvm)) { + if (is_mmio) { + cache = MTRR_TYPE_UNCACHABLE; + goto exit; + } + + if (!kvm_arch_has_noncoherent_dma(vcpu->kvm)) { ipat = VMX_EPT_IPAT_BIT; cache = MTRR_TYPE_WRBACK; goto exit; @@ -8657,49 +8878,67 @@ static int vmx_get_lpage_level(void) return PT_PDPE_LEVEL; } +static void vmcs_set_secondary_exec_control(u32 new_ctl) +{ + /* + * These bits in the secondary execution controls field + * are dynamic, the others are mostly based on the hypervisor + * architecture and the guest's CPUID. Do not touch the + * dynamic bits. + */ + u32 mask = + SECONDARY_EXEC_SHADOW_VMCS | + SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | + SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; + + u32 cur_ctl = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); + + vmcs_write32(SECONDARY_VM_EXEC_CONTROL, + (new_ctl & ~mask) | (cur_ctl & mask)); +} + static void vmx_cpuid_update(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *best; struct vcpu_vmx *vmx = to_vmx(vcpu); - u32 exec_control; + u32 secondary_exec_ctl = vmx_secondary_exec_control(vmx); - vmx->rdtscp_enabled = false; if (vmx_rdtscp_supported()) { - exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); - if (exec_control & SECONDARY_EXEC_RDTSCP) { - best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); - if (best && (best->edx & bit(X86_FEATURE_RDTSCP))) - vmx->rdtscp_enabled = true; - else { - exec_control &= ~SECONDARY_EXEC_RDTSCP; - vmcs_write32(SECONDARY_VM_EXEC_CONTROL, - exec_control); - } + bool rdtscp_enabled = guest_cpuid_has_rdtscp(vcpu); + if (!rdtscp_enabled) + secondary_exec_ctl &= ~SECONDARY_EXEC_RDTSCP; + + if (nested) { + if (rdtscp_enabled) + vmx->nested.nested_vmx_secondary_ctls_high |= + SECONDARY_EXEC_RDTSCP; + else + vmx->nested.nested_vmx_secondary_ctls_high &= + ~SECONDARY_EXEC_RDTSCP; } - if (nested && !vmx->rdtscp_enabled) - vmx->nested.nested_vmx_secondary_ctls_high &= - ~SECONDARY_EXEC_RDTSCP; } /* Exposing INVPCID only when PCID is exposed */ best = kvm_find_cpuid_entry(vcpu, 0x7, 0); if (vmx_invpcid_supported() && - best && (best->ebx & bit(X86_FEATURE_INVPCID)) && - guest_cpuid_has_pcid(vcpu)) { - exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); - exec_control |= SECONDARY_EXEC_ENABLE_INVPCID; - vmcs_write32(SECONDARY_VM_EXEC_CONTROL, - exec_control); - } else { - if (cpu_has_secondary_exec_ctrls()) { - exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); - exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID; - vmcs_write32(SECONDARY_VM_EXEC_CONTROL, - exec_control); - } + (!best || !(best->ebx & bit(X86_FEATURE_INVPCID)) || + !guest_cpuid_has_pcid(vcpu))) { + secondary_exec_ctl &= ~SECONDARY_EXEC_ENABLE_INVPCID; + if (best) best->ebx &= ~bit(X86_FEATURE_INVPCID); } + + vmcs_set_secondary_exec_control(secondary_exec_ctl); + + if (static_cpu_has(X86_FEATURE_PCOMMIT) && nested) { + if (guest_cpuid_has_pcommit(vcpu)) + vmx->nested.nested_vmx_secondary_ctls_high |= + SECONDARY_EXEC_PCOMMIT; + else + vmx->nested.nested_vmx_secondary_ctls_high &= + ~SECONDARY_EXEC_PCOMMIT; + } } static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) @@ -9307,13 +9546,13 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) if (cpu_has_secondary_exec_ctrls()) { exec_control = vmx_secondary_exec_control(vmx); - if (!vmx->rdtscp_enabled) - exec_control &= ~SECONDARY_EXEC_RDTSCP; + /* Take the following fields only from vmcs12 */ exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | SECONDARY_EXEC_RDTSCP | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | - SECONDARY_EXEC_APIC_REGISTER_VIRT); + SECONDARY_EXEC_APIC_REGISTER_VIRT | + SECONDARY_EXEC_PCOMMIT); if (nested_cpu_has(vmcs12, CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) exec_control |= vmcs12->secondary_vm_exec_control; @@ -9332,7 +9571,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) vmcs_write64(APIC_ACCESS_ADDR, page_to_phys(vmx->nested.apic_access_page)); } else if (!(nested_cpu_has_virt_x2apic_mode(vmcs12)) && - (vm_need_virtualize_apic_accesses(vmx->vcpu.kvm))) { + cpu_need_virtualize_apic_accesses(&vmx->vcpu)) { exec_control |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; kvm_vcpu_reload_apic_access_page(vcpu); @@ -9442,12 +9681,24 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) if (enable_vpid) { /* - * Trivially support vpid by letting L2s share their parent - * L1's vpid. TODO: move to a more elaborate solution, giving - * each L2 its own vpid and exposing the vpid feature to L1. + * There is no direct mapping between vpid02 and vpid12, the + * vpid02 is per-vCPU for L0 and reused while the value of + * vpid12 is changed w/ one invvpid during nested vmentry. + * The vpid12 is allocated by L1 for L2, so it will not + * influence global bitmap(for vpid01 and vpid02 allocation) + * even if spawn a lot of nested vCPUs. */ - vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); - vmx_flush_tlb(vcpu); + if (nested_cpu_has_vpid(vmcs12) && vmx->nested.vpid02) { + vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->nested.vpid02); + if (vmcs12->virtual_processor_id != vmx->nested.last_vpid) { + vmx->nested.last_vpid = vmcs12->virtual_processor_id; + __vmx_flush_tlb(vcpu, to_vmx(vcpu)->nested.vpid02); + } + } else { + vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); + vmx_flush_tlb(vcpu); + } + } if (nested_cpu_has_ept(vmcs12)) { @@ -10287,6 +10538,201 @@ static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm, kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask); } +/* + * This routine does the following things for vCPU which is going + * to be blocked if VT-d PI is enabled. + * - Store the vCPU to the wakeup list, so when interrupts happen + * we can find the right vCPU to wake up. + * - Change the Posted-interrupt descriptor as below: + * 'NDST' <-- vcpu->pre_pcpu + * 'NV' <-- POSTED_INTR_WAKEUP_VECTOR + * - If 'ON' is set during this process, which means at least one + * interrupt is posted for this vCPU, we cannot block it, in + * this case, return 1, otherwise, return 0. + * + */ +static int vmx_pre_block(struct kvm_vcpu *vcpu) +{ + unsigned long flags; + unsigned int dest; + struct pi_desc old, new; + struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); + + if (!kvm_arch_has_assigned_device(vcpu->kvm) || + !irq_remapping_cap(IRQ_POSTING_CAP)) + return 0; + + vcpu->pre_pcpu = vcpu->cpu; + spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock, + vcpu->pre_pcpu), flags); + list_add_tail(&vcpu->blocked_vcpu_list, + &per_cpu(blocked_vcpu_on_cpu, + vcpu->pre_pcpu)); + spin_unlock_irqrestore(&per_cpu(blocked_vcpu_on_cpu_lock, + vcpu->pre_pcpu), flags); + + do { + old.control = new.control = pi_desc->control; + + /* + * We should not block the vCPU if + * an interrupt is posted for it. + */ + if (pi_test_on(pi_desc) == 1) { + spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock, + vcpu->pre_pcpu), flags); + list_del(&vcpu->blocked_vcpu_list); + spin_unlock_irqrestore( + &per_cpu(blocked_vcpu_on_cpu_lock, + vcpu->pre_pcpu), flags); + vcpu->pre_pcpu = -1; + + return 1; + } + + WARN((pi_desc->sn == 1), + "Warning: SN field of posted-interrupts " + "is set before blocking\n"); + + /* + * Since vCPU can be preempted during this process, + * vcpu->cpu could be different with pre_pcpu, we + * need to set pre_pcpu as the destination of wakeup + * notification event, then we can find the right vCPU + * to wakeup in wakeup handler if interrupts happen + * when the vCPU is in blocked state. + */ + dest = cpu_physical_id(vcpu->pre_pcpu); + + if (x2apic_enabled()) + new.ndst = dest; + else + new.ndst = (dest << 8) & 0xFF00; + + /* set 'NV' to 'wakeup vector' */ + new.nv = POSTED_INTR_WAKEUP_VECTOR; + } while (cmpxchg(&pi_desc->control, old.control, + new.control) != old.control); + + return 0; +} + +static void vmx_post_block(struct kvm_vcpu *vcpu) +{ + struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); + struct pi_desc old, new; + unsigned int dest; + unsigned long flags; + + if (!kvm_arch_has_assigned_device(vcpu->kvm) || + !irq_remapping_cap(IRQ_POSTING_CAP)) + return; + + do { + old.control = new.control = pi_desc->control; + + dest = cpu_physical_id(vcpu->cpu); + + if (x2apic_enabled()) + new.ndst = dest; + else + new.ndst = (dest << 8) & 0xFF00; + + /* Allow posting non-urgent interrupts */ + new.sn = 0; + + /* set 'NV' to 'notification vector' */ + new.nv = POSTED_INTR_VECTOR; + } while (cmpxchg(&pi_desc->control, old.control, + new.control) != old.control); + + if(vcpu->pre_pcpu != -1) { + spin_lock_irqsave( + &per_cpu(blocked_vcpu_on_cpu_lock, + vcpu->pre_pcpu), flags); + list_del(&vcpu->blocked_vcpu_list); + spin_unlock_irqrestore( + &per_cpu(blocked_vcpu_on_cpu_lock, + vcpu->pre_pcpu), flags); + vcpu->pre_pcpu = -1; + } +} + +/* + * vmx_update_pi_irte - set IRTE for Posted-Interrupts + * + * @kvm: kvm + * @host_irq: host irq of the interrupt + * @guest_irq: gsi of the interrupt + * @set: set or unset PI + * returns 0 on success, < 0 on failure + */ +static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq, + uint32_t guest_irq, bool set) +{ + struct kvm_kernel_irq_routing_entry *e; + struct kvm_irq_routing_table *irq_rt; + struct kvm_lapic_irq irq; + struct kvm_vcpu *vcpu; + struct vcpu_data vcpu_info; + int idx, ret = -EINVAL; + + if (!kvm_arch_has_assigned_device(kvm) || + !irq_remapping_cap(IRQ_POSTING_CAP)) + return 0; + + idx = srcu_read_lock(&kvm->irq_srcu); + irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); + BUG_ON(guest_irq >= irq_rt->nr_rt_entries); + + hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) { + if (e->type != KVM_IRQ_ROUTING_MSI) + continue; + /* + * VT-d PI cannot support posting multicast/broadcast + * interrupts to a vCPU, we still use interrupt remapping + * for these kind of interrupts. + * + * For lowest-priority interrupts, we only support + * those with single CPU as the destination, e.g. user + * configures the interrupts via /proc/irq or uses + * irqbalance to make the interrupts single-CPU. + * + * We will support full lowest-priority interrupt later. + */ + + kvm_set_msi_irq(e, &irq); + if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) + continue; + + vcpu_info.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu)); + vcpu_info.vector = irq.vector; + + trace_kvm_pi_irte_update(vcpu->vcpu_id, e->gsi, + vcpu_info.vector, vcpu_info.pi_desc_addr, set); + + if (set) + ret = irq_set_vcpu_affinity(host_irq, &vcpu_info); + else { + /* suppress notification event before unposting */ + pi_set_sn(vcpu_to_pi_desc(vcpu)); + ret = irq_set_vcpu_affinity(host_irq, NULL); + pi_clear_sn(vcpu_to_pi_desc(vcpu)); + } + + if (ret < 0) { + printk(KERN_INFO "%s: failed to update PI IRTE\n", + __func__); + goto out; + } + } + + ret = 0; +out: + srcu_read_unlock(&kvm->irq_srcu, idx); + return ret; +} + static struct kvm_x86_ops vmx_x86_ops = { .cpu_has_kvm_support = cpu_has_kvm_support, .disabled_by_bios = vmx_disabled_by_bios, @@ -10356,7 +10802,7 @@ static struct kvm_x86_ops vmx_x86_ops = { .update_cr8_intercept = update_cr8_intercept, .set_virtual_x2apic_mode = vmx_set_virtual_x2apic_mode, .set_apic_access_page_addr = vmx_set_apic_access_page_addr, - .vm_has_apicv = vmx_vm_has_apicv, + .cpu_uses_apicv = vmx_cpu_uses_apicv, .load_eoi_exitmap = vmx_load_eoi_exitmap, .hwapic_irr_update = vmx_hwapic_irr_update, .hwapic_isr_update = vmx_hwapic_isr_update, @@ -10403,7 +10849,12 @@ static struct kvm_x86_ops vmx_x86_ops = { .flush_log_dirty = vmx_flush_log_dirty, .enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked, + .pre_block = vmx_pre_block, + .post_block = vmx_post_block, + .pmu_ops = &intel_pmu_ops, + + .update_pi_irte = vmx_update_pi_irte, }; static int __init vmx_init(void) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 991466bf8dee..4a6eff166fc6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -51,6 +51,8 @@ #include <linux/pci.h> #include <linux/timekeeper_internal.h> #include <linux/pvclock_gtod.h> +#include <linux/kvm_irqfd.h> +#include <linux/irqbypass.h> #include <trace/events/kvm.h> #define CREATE_TRACE_POINTS @@ -64,6 +66,7 @@ #include <asm/fpu/internal.h> /* Ugh! */ #include <asm/pvclock.h> #include <asm/div64.h> +#include <asm/irq_remapping.h> #define MAX_IO_MSRS 256 #define KVM_MAX_MCE_BANKS 32 @@ -622,7 +625,9 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) if ((cr0 ^ old_cr0) & update_bits) kvm_mmu_reset_context(vcpu); - if ((cr0 ^ old_cr0) & X86_CR0_CD) + if (((cr0 ^ old_cr0) & X86_CR0_CD) && + kvm_arch_has_noncoherent_dma(vcpu->kvm) && + !kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED)) kvm_zap_gfn_range(vcpu->kvm, 0, ~0ULL); return 0; @@ -663,9 +668,9 @@ static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) /* Only support XCR_XFEATURE_ENABLED_MASK(xcr0) now */ if (index != XCR_XFEATURE_ENABLED_MASK) return 1; - if (!(xcr0 & XSTATE_FP)) + if (!(xcr0 & XFEATURE_MASK_FP)) return 1; - if ((xcr0 & XSTATE_YMM) && !(xcr0 & XSTATE_SSE)) + if ((xcr0 & XFEATURE_MASK_YMM) && !(xcr0 & XFEATURE_MASK_SSE)) return 1; /* @@ -673,23 +678,24 @@ static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) * saving. However, xcr0 bit 0 is always set, even if the * emulated CPU does not support XSAVE (see fx_init). */ - valid_bits = vcpu->arch.guest_supported_xcr0 | XSTATE_FP; + valid_bits = vcpu->arch.guest_supported_xcr0 | XFEATURE_MASK_FP; if (xcr0 & ~valid_bits) return 1; - if ((!(xcr0 & XSTATE_BNDREGS)) != (!(xcr0 & XSTATE_BNDCSR))) + if ((!(xcr0 & XFEATURE_MASK_BNDREGS)) != + (!(xcr0 & XFEATURE_MASK_BNDCSR))) return 1; - if (xcr0 & XSTATE_AVX512) { - if (!(xcr0 & XSTATE_YMM)) + if (xcr0 & XFEATURE_MASK_AVX512) { + if (!(xcr0 & XFEATURE_MASK_YMM)) return 1; - if ((xcr0 & XSTATE_AVX512) != XSTATE_AVX512) + if ((xcr0 & XFEATURE_MASK_AVX512) != XFEATURE_MASK_AVX512) return 1; } kvm_put_guest_xcr0(vcpu); vcpu->arch.xcr0 = xcr0; - if ((xcr0 ^ old_xcr0) & XSTATE_EXTEND_MASK) + if ((xcr0 ^ old_xcr0) & XFEATURE_MASK_EXTEND) kvm_update_cpuid(vcpu); return 0; } @@ -788,7 +794,7 @@ int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) { if (cr8 & CR8_RESERVED_BITS) return 1; - if (irqchip_in_kernel(vcpu->kvm)) + if (lapic_in_kernel(vcpu)) kvm_lapic_set_tpr(vcpu, cr8); else vcpu->arch.cr8 = cr8; @@ -798,7 +804,7 @@ EXPORT_SYMBOL_GPL(kvm_set_cr8); unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu) { - if (irqchip_in_kernel(vcpu->kvm)) + if (lapic_in_kernel(vcpu)) return kvm_lapic_get_cr8(vcpu); else return vcpu->arch.cr8; @@ -952,6 +958,9 @@ static u32 emulated_msrs[] = { HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC, HV_X64_MSR_CRASH_P0, HV_X64_MSR_CRASH_P1, HV_X64_MSR_CRASH_P2, HV_X64_MSR_CRASH_P3, HV_X64_MSR_CRASH_P4, HV_X64_MSR_CRASH_CTL, + HV_X64_MSR_RESET, + HV_X64_MSR_VP_INDEX, + HV_X64_MSR_VP_RUNTIME, HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, MSR_KVM_PV_EOI_EN, @@ -1708,8 +1717,6 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) vcpu->pvclock_set_guest_stopped_request = false; } - pvclock_flags |= PVCLOCK_COUNTS_FROM_ZERO; - /* If the host uses TSC clocksource, then it is stable */ if (use_master_clock) pvclock_flags |= PVCLOCK_TSC_STABLE_BIT; @@ -1899,6 +1906,8 @@ static void accumulate_steal_time(struct kvm_vcpu *vcpu) static void record_steal_time(struct kvm_vcpu *vcpu) { + accumulate_steal_time(vcpu); + if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED)) return; @@ -2007,8 +2016,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) &vcpu->requests); ka->boot_vcpu_runs_old_kvmclock = tmp; - - ka->kvmclock_offset = -get_kernel_ns(); } vcpu->arch.time = data; @@ -2051,12 +2058,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (!(data & KVM_MSR_ENABLED)) break; - vcpu->arch.st.last_steal = current->sched_info.run_delay; - - preempt_disable(); - accumulate_steal_time(vcpu); - preempt_enable(); - kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu); break; @@ -2452,6 +2453,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_ENABLE_CAP_VM: case KVM_CAP_DISABLE_QUIRKS: case KVM_CAP_SET_BOOT_CPU_ID: + case KVM_CAP_SPLIT_IRQCHIP: #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT case KVM_CAP_ASSIGN_DEV_IRQ: case KVM_CAP_PCI_2_3: @@ -2631,7 +2633,6 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) vcpu->cpu = cpu; } - accumulate_steal_time(vcpu); kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu); } @@ -2665,12 +2666,24 @@ static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, { if (irq->irq >= KVM_NR_INTERRUPTS) return -EINVAL; - if (irqchip_in_kernel(vcpu->kvm)) + + if (!irqchip_in_kernel(vcpu->kvm)) { + kvm_queue_interrupt(vcpu, irq->irq, false); + kvm_make_request(KVM_REQ_EVENT, vcpu); + return 0; + } + + /* + * With in-kernel LAPIC, we only use this to inject EXTINT, so + * fail for in-kernel 8259. + */ + if (pic_in_kernel(vcpu->kvm)) return -ENXIO; - kvm_queue_interrupt(vcpu, irq->irq, false); - kvm_make_request(KVM_REQ_EVENT, vcpu); + if (vcpu->arch.pending_external_vector != -1) + return -EEXIST; + vcpu->arch.pending_external_vector = irq->irq; return 0; } @@ -2909,7 +2922,7 @@ static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu) * Copy each region from the possibly compacted offset to the * non-compacted offset. */ - valid = xstate_bv & ~XSTATE_FPSSE; + valid = xstate_bv & ~XFEATURE_MASK_FPSSE; while (valid) { u64 feature = valid & -valid; int index = fls64(feature) - 1; @@ -2947,7 +2960,7 @@ static void load_xsave(struct kvm_vcpu *vcpu, u8 *src) * Copy each region from the non-compacted offset to the * possibly compacted offset. */ - valid = xstate_bv & ~XSTATE_FPSSE; + valid = xstate_bv & ~XFEATURE_MASK_FPSSE; while (valid) { u64 feature = valid & -valid; int index = fls64(feature) - 1; @@ -2975,7 +2988,7 @@ static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu, &vcpu->arch.guest_fpu.state.fxsave, sizeof(struct fxregs_state)); *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] = - XSTATE_FPSSE; + XFEATURE_MASK_FPSSE; } } @@ -2995,7 +3008,7 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu, return -EINVAL; load_xsave(vcpu, (u8 *)guest_xsave->region); } else { - if (xstate_bv & ~XSTATE_FPSSE) + if (xstate_bv & ~XFEATURE_MASK_FPSSE) return -EINVAL; memcpy(&vcpu->arch.guest_fpu.state.fxsave, guest_xsave->region, sizeof(struct fxregs_state)); @@ -3179,7 +3192,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, struct kvm_vapic_addr va; r = -EINVAL; - if (!irqchip_in_kernel(vcpu->kvm)) + if (!lapic_in_kernel(vcpu)) goto out; r = -EFAULT; if (copy_from_user(&va, argp, sizeof va)) @@ -3428,41 +3441,35 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) static int kvm_vm_ioctl_get_pit(struct kvm *kvm, struct kvm_pit_state *ps) { - int r = 0; - mutex_lock(&kvm->arch.vpit->pit_state.lock); memcpy(ps, &kvm->arch.vpit->pit_state, sizeof(struct kvm_pit_state)); mutex_unlock(&kvm->arch.vpit->pit_state.lock); - return r; + return 0; } static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps) { - int r = 0; - mutex_lock(&kvm->arch.vpit->pit_state.lock); memcpy(&kvm->arch.vpit->pit_state, ps, sizeof(struct kvm_pit_state)); kvm_pit_load_count(kvm, 0, ps->channels[0].count, 0); mutex_unlock(&kvm->arch.vpit->pit_state.lock); - return r; + return 0; } static int kvm_vm_ioctl_get_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps) { - int r = 0; - mutex_lock(&kvm->arch.vpit->pit_state.lock); memcpy(ps->channels, &kvm->arch.vpit->pit_state.channels, sizeof(ps->channels)); ps->flags = kvm->arch.vpit->pit_state.flags; mutex_unlock(&kvm->arch.vpit->pit_state.lock); memset(&ps->reserved, 0, sizeof(ps->reserved)); - return r; + return 0; } static int kvm_vm_ioctl_set_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps) { - int r = 0, start = 0; + int start = 0; u32 prev_legacy, cur_legacy; mutex_lock(&kvm->arch.vpit->pit_state.lock); prev_legacy = kvm->arch.vpit->pit_state.flags & KVM_PIT_FLAGS_HPET_LEGACY; @@ -3474,7 +3481,7 @@ static int kvm_vm_ioctl_set_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps) kvm->arch.vpit->pit_state.flags = ps->flags; kvm_pit_load_count(kvm, 0, kvm->arch.vpit->pit_state.channels[0].count, start); mutex_unlock(&kvm->arch.vpit->pit_state.lock); - return r; + return 0; } static int kvm_vm_ioctl_reinject(struct kvm *kvm, @@ -3559,6 +3566,28 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, kvm->arch.disabled_quirks = cap->args[0]; r = 0; break; + case KVM_CAP_SPLIT_IRQCHIP: { + mutex_lock(&kvm->lock); + r = -EINVAL; + if (cap->args[0] > MAX_NR_RESERVED_IOAPIC_PINS) + goto split_irqchip_unlock; + r = -EEXIST; + if (irqchip_in_kernel(kvm)) + goto split_irqchip_unlock; + if (atomic_read(&kvm->online_vcpus)) + goto split_irqchip_unlock; + r = kvm_setup_empty_irq_routing(kvm); + if (r) + goto split_irqchip_unlock; + /* Pairs with irqchip_in_kernel. */ + smp_wmb(); + kvm->arch.irqchip_split = true; + kvm->arch.nr_reserved_ioapic_pins = cap->args[0]; + r = 0; +split_irqchip_unlock: + mutex_unlock(&kvm->lock); + break; + } default: r = -EINVAL; break; @@ -3672,7 +3701,7 @@ long kvm_arch_vm_ioctl(struct file *filp, } r = -ENXIO; - if (!irqchip_in_kernel(kvm)) + if (!irqchip_in_kernel(kvm) || irqchip_split(kvm)) goto get_irqchip_out; r = kvm_vm_ioctl_get_irqchip(kvm, chip); if (r) @@ -3696,7 +3725,7 @@ long kvm_arch_vm_ioctl(struct file *filp, } r = -ENXIO; - if (!irqchip_in_kernel(kvm)) + if (!irqchip_in_kernel(kvm) || irqchip_split(kvm)) goto set_irqchip_out; r = kvm_vm_ioctl_set_irqchip(kvm, chip); if (r) @@ -4063,6 +4092,15 @@ static int kvm_read_guest_virt_system(struct x86_emulate_ctxt *ctxt, return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, exception); } +static int kvm_read_guest_phys_system(struct x86_emulate_ctxt *ctxt, + unsigned long addr, void *val, unsigned int bytes) +{ + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); + int r = kvm_vcpu_read_guest(vcpu, addr, val, bytes); + + return r < 0 ? X86EMUL_IO_NEEDED : X86EMUL_CONTINUE; +} + int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt, gva_t addr, void *val, unsigned int bytes, @@ -4798,6 +4836,7 @@ static const struct x86_emulate_ops emulate_ops = { .write_gpr = emulator_write_gpr, .read_std = kvm_read_guest_virt_system, .write_std = kvm_write_guest_virt_system, + .read_phys = kvm_read_guest_phys_system, .fetch = kvm_fetch_guest_virt, .read_emulated = emulator_read_emulated, .write_emulated = emulator_write_emulated, @@ -5670,7 +5709,7 @@ void kvm_arch_exit(void) int kvm_vcpu_halt(struct kvm_vcpu *vcpu) { ++vcpu->stat.halt_exits; - if (irqchip_in_kernel(vcpu->kvm)) { + if (lapic_in_kernel(vcpu)) { vcpu->arch.mp_state = KVM_MP_STATE_HALTED; return 1; } else { @@ -5777,9 +5816,15 @@ static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt) */ static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu) { - return (!irqchip_in_kernel(vcpu->kvm) && !kvm_cpu_has_interrupt(vcpu) && - vcpu->run->request_interrupt_window && - kvm_arch_interrupt_allowed(vcpu)); + if (!vcpu->run->request_interrupt_window || pic_in_kernel(vcpu->kvm)) + return false; + + if (kvm_cpu_has_interrupt(vcpu)) + return false; + + return (irqchip_split(vcpu->kvm) + ? kvm_apic_accept_pic_intr(vcpu) + : kvm_arch_interrupt_allowed(vcpu)); } static void post_kvm_run_save(struct kvm_vcpu *vcpu) @@ -5790,13 +5835,17 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu) kvm_run->flags = is_smm(vcpu) ? KVM_RUN_X86_SMM : 0; kvm_run->cr8 = kvm_get_cr8(vcpu); kvm_run->apic_base = kvm_get_apic_base(vcpu); - if (irqchip_in_kernel(vcpu->kvm)) - kvm_run->ready_for_interrupt_injection = 1; - else + if (!irqchip_in_kernel(vcpu->kvm)) kvm_run->ready_for_interrupt_injection = kvm_arch_interrupt_allowed(vcpu) && !kvm_cpu_has_interrupt(vcpu) && !kvm_event_needs_reinjection(vcpu); + else if (!pic_in_kernel(vcpu->kvm)) + kvm_run->ready_for_interrupt_injection = + kvm_apic_accept_pic_intr(vcpu) && + !kvm_cpu_has_interrupt(vcpu); + else + kvm_run->ready_for_interrupt_injection = 1; } static void update_cr8_intercept(struct kvm_vcpu *vcpu) @@ -6147,18 +6196,18 @@ static void process_smi(struct kvm_vcpu *vcpu) static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) { - u64 eoi_exit_bitmap[4]; - u32 tmr[8]; - if (!kvm_apic_hw_enabled(vcpu->arch.apic)) return; - memset(eoi_exit_bitmap, 0, 32); - memset(tmr, 0, 32); + memset(vcpu->arch.eoi_exit_bitmap, 0, 256 / 8); - kvm_ioapic_scan_entry(vcpu, eoi_exit_bitmap, tmr); - kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap); - kvm_apic_update_tmr(vcpu, tmr); + if (irqchip_split(vcpu->kvm)) + kvm_scan_ioapic_routes(vcpu, vcpu->arch.eoi_exit_bitmap); + else { + kvm_x86_ops->sync_pir_to_irr(vcpu); + kvm_ioapic_scan_entry(vcpu, vcpu->arch.eoi_exit_bitmap); + } + kvm_x86_ops->load_eoi_exitmap(vcpu); } static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu) @@ -6171,7 +6220,7 @@ void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu) { struct page *page = NULL; - if (!irqchip_in_kernel(vcpu->kvm)) + if (!lapic_in_kernel(vcpu)) return; if (!kvm_x86_ops->set_apic_access_page_addr) @@ -6209,7 +6258,7 @@ void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, static int vcpu_enter_guest(struct kvm_vcpu *vcpu) { int r; - bool req_int_win = !irqchip_in_kernel(vcpu->kvm) && + bool req_int_win = !lapic_in_kernel(vcpu) && vcpu->run->request_interrupt_window; bool req_immediate_exit = false; @@ -6261,6 +6310,17 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) kvm_pmu_handle_event(vcpu); if (kvm_check_request(KVM_REQ_PMI, vcpu)) kvm_pmu_deliver_pmi(vcpu); + if (kvm_check_request(KVM_REQ_IOAPIC_EOI_EXIT, vcpu)) { + BUG_ON(vcpu->arch.pending_ioapic_eoi > 255); + if (test_bit(vcpu->arch.pending_ioapic_eoi, + (void *) vcpu->arch.eoi_exit_bitmap)) { + vcpu->run->exit_reason = KVM_EXIT_IOAPIC_EOI; + vcpu->run->eoi.vector = + vcpu->arch.pending_ioapic_eoi; + r = 0; + goto out; + } + } if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu)) vcpu_scan_ioapic(vcpu); if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu)) @@ -6271,6 +6331,26 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) r = 0; goto out; } + if (kvm_check_request(KVM_REQ_HV_RESET, vcpu)) { + vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT; + vcpu->run->system_event.type = KVM_SYSTEM_EVENT_RESET; + r = 0; + goto out; + } + } + + /* + * KVM_REQ_EVENT is not set when posted interrupts are set by + * VT-d hardware, so we have to update RVI unconditionally. + */ + if (kvm_lapic_enabled(vcpu)) { + /* + * Update architecture specific hints for APIC + * virtual interrupt delivery. + */ + if (kvm_x86_ops->hwapic_irr_update) + kvm_x86_ops->hwapic_irr_update(vcpu, + kvm_lapic_find_highest_irr(vcpu)); } if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { @@ -6289,13 +6369,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) kvm_x86_ops->enable_irq_window(vcpu); if (kvm_lapic_enabled(vcpu)) { - /* - * Update architecture specific hints for APIC - * virtual interrupt delivery. - */ - if (kvm_x86_ops->hwapic_irr_update) - kvm_x86_ops->hwapic_irr_update(vcpu, - kvm_lapic_find_highest_irr(vcpu)); update_cr8_intercept(vcpu); kvm_lapic_sync_to_vapic(vcpu); } @@ -6431,10 +6504,15 @@ out: static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu) { - if (!kvm_arch_vcpu_runnable(vcpu)) { + if (!kvm_arch_vcpu_runnable(vcpu) && + (!kvm_x86_ops->pre_block || kvm_x86_ops->pre_block(vcpu) == 0)) { srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); kvm_vcpu_block(vcpu); vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); + + if (kvm_x86_ops->post_block) + kvm_x86_ops->post_block(vcpu); + if (!kvm_check_request(KVM_REQ_UNHALT, vcpu)) return 1; } @@ -6457,6 +6535,12 @@ static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu) return 1; } +static inline bool kvm_vcpu_running(struct kvm_vcpu *vcpu) +{ + return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE && + !vcpu->arch.apf.halted); +} + static int vcpu_run(struct kvm_vcpu *vcpu) { int r; @@ -6465,11 +6549,12 @@ static int vcpu_run(struct kvm_vcpu *vcpu) vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); for (;;) { - if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE && - !vcpu->arch.apf.halted) + if (kvm_vcpu_running(vcpu)) { r = vcpu_enter_guest(vcpu); - else + } else { r = vcpu_block(kvm, vcpu); + } + if (r <= 0) break; @@ -6478,8 +6563,8 @@ static int vcpu_run(struct kvm_vcpu *vcpu) kvm_inject_pending_timer_irqs(vcpu); if (dm_request_for_irq_injection(vcpu)) { - r = -EINTR; - vcpu->run->exit_reason = KVM_EXIT_INTR; + r = 0; + vcpu->run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; ++vcpu->stat.request_irq_exits; break; } @@ -6606,7 +6691,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) } /* re-sync apic's tpr */ - if (!irqchip_in_kernel(vcpu->kvm)) { + if (!lapic_in_kernel(vcpu)) { if (kvm_set_cr8(vcpu, kvm_run->cr8) != 0) { r = -EINVAL; goto out; @@ -7004,7 +7089,7 @@ static void fx_init(struct kvm_vcpu *vcpu) /* * Ensure guest xcr0 is valid for loading */ - vcpu->arch.xcr0 = XSTATE_FP; + vcpu->arch.xcr0 = XFEATURE_MASK_FP; vcpu->arch.cr0 |= X86_CR0_ET; } @@ -7306,7 +7391,7 @@ bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu) bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu) { - return irqchip_in_kernel(vcpu->kvm) == (vcpu->arch.apic != NULL); + return irqchip_in_kernel(vcpu->kvm) == lapic_in_kernel(vcpu); } struct static_key kvm_no_apic_vcpu __read_mostly; @@ -7375,6 +7460,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) kvm_async_pf_hash_reset(vcpu); kvm_pmu_init(vcpu); + vcpu->arch.pending_external_vector = -1; + return 0; fail_free_mce_banks: @@ -7400,7 +7487,7 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) kvm_mmu_destroy(vcpu); srcu_read_unlock(&vcpu->kvm->srcu, idx); free_page((unsigned long)vcpu->arch.pio_data); - if (!irqchip_in_kernel(vcpu->kvm)) + if (!lapic_in_kernel(vcpu)) static_key_slow_dec(&kvm_no_apic_vcpu); } @@ -7478,34 +7565,66 @@ void kvm_arch_sync_events(struct kvm *kvm) kvm_free_pit(kvm); } -int __x86_set_memory_region(struct kvm *kvm, - const struct kvm_userspace_memory_region *mem) +int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size) { int i, r; + unsigned long hva; + struct kvm_memslots *slots = kvm_memslots(kvm); + struct kvm_memory_slot *slot, old; /* Called with kvm->slots_lock held. */ - BUG_ON(mem->slot >= KVM_MEM_SLOTS_NUM); + if (WARN_ON(id >= KVM_MEM_SLOTS_NUM)) + return -EINVAL; + + slot = id_to_memslot(slots, id); + if (size) { + if (WARN_ON(slot->npages)) + return -EEXIST; + /* + * MAP_SHARED to prevent internal slot pages from being moved + * by fork()/COW. + */ + hva = vm_mmap(NULL, 0, size, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, 0); + if (IS_ERR((void *)hva)) + return PTR_ERR((void *)hva); + } else { + if (!slot->npages) + return 0; + + hva = 0; + } + + old = *slot; for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) { - struct kvm_userspace_memory_region m = *mem; + struct kvm_userspace_memory_region m; - m.slot |= i << 16; + m.slot = id | (i << 16); + m.flags = 0; + m.guest_phys_addr = gpa; + m.userspace_addr = hva; + m.memory_size = size; r = __kvm_set_memory_region(kvm, &m); if (r < 0) return r; } + if (!size) { + r = vm_munmap(old.userspace_addr, old.npages * PAGE_SIZE); + WARN_ON(r < 0); + } + return 0; } EXPORT_SYMBOL_GPL(__x86_set_memory_region); -int x86_set_memory_region(struct kvm *kvm, - const struct kvm_userspace_memory_region *mem) +int x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size) { int r; mutex_lock(&kvm->slots_lock); - r = __x86_set_memory_region(kvm, mem); + r = __x86_set_memory_region(kvm, id, gpa, size); mutex_unlock(&kvm->slots_lock); return r; @@ -7520,16 +7639,9 @@ void kvm_arch_destroy_vm(struct kvm *kvm) * unless the the memory map has changed due to process exit * or fd copying. */ - struct kvm_userspace_memory_region mem; - memset(&mem, 0, sizeof(mem)); - mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT; - x86_set_memory_region(kvm, &mem); - - mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT; - x86_set_memory_region(kvm, &mem); - - mem.slot = TSS_PRIVATE_MEMSLOT; - x86_set_memory_region(kvm, &mem); + x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT, 0, 0); + x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT, 0, 0); + x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, 0, 0); } kvm_iommu_unmap_guest(kvm); kfree(kvm->arch.vpic); @@ -7632,27 +7744,6 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, const struct kvm_userspace_memory_region *mem, enum kvm_mr_change change) { - /* - * Only private memory slots need to be mapped here since - * KVM_SET_MEMORY_REGION ioctl is no longer supported. - */ - if ((memslot->id >= KVM_USER_MEM_SLOTS) && (change == KVM_MR_CREATE)) { - unsigned long userspace_addr; - - /* - * MAP_SHARED to prevent internal slot pages from being moved - * by fork()/COW. - */ - userspace_addr = vm_mmap(NULL, 0, memslot->npages * PAGE_SIZE, - PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_ANONYMOUS, 0); - - if (IS_ERR((void *)userspace_addr)) - return PTR_ERR((void *)userspace_addr); - - memslot->userspace_addr = userspace_addr; - } - return 0; } @@ -7714,17 +7805,6 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, { int nr_mmu_pages = 0; - if (change == KVM_MR_DELETE && old->id >= KVM_USER_MEM_SLOTS) { - int ret; - - ret = vm_munmap(old->userspace_addr, - old->npages * PAGE_SIZE); - if (ret < 0) - printk(KERN_WARNING - "kvm_vm_ioctl_set_memory_region: " - "failed to munmap memory\n"); - } - if (!kvm->arch.n_requested_mmu_pages) nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm); @@ -7773,19 +7853,36 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm, kvm_mmu_invalidate_zap_all_pages(kvm); } +static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu) +{ + if (!list_empty_careful(&vcpu->async_pf.done)) + return true; + + if (kvm_apic_has_events(vcpu)) + return true; + + if (vcpu->arch.pv.pv_unhalted) + return true; + + if (atomic_read(&vcpu->arch.nmi_queued)) + return true; + + if (test_bit(KVM_REQ_SMI, &vcpu->requests)) + return true; + + if (kvm_arch_interrupt_allowed(vcpu) && + kvm_cpu_has_interrupt(vcpu)) + return true; + + return false; +} + int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) { if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) kvm_x86_ops->check_nested_events(vcpu, false); - return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE && - !vcpu->arch.apf.halted) - || !list_empty_careful(&vcpu->async_pf.done) - || kvm_apic_has_events(vcpu) - || vcpu->arch.pv.pv_unhalted - || atomic_read(&vcpu->arch.nmi_queued) || - (kvm_arch_interrupt_allowed(vcpu) && - kvm_cpu_has_interrupt(vcpu)); + return kvm_vcpu_running(vcpu) || kvm_vcpu_has_events(vcpu); } int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) @@ -8017,7 +8114,59 @@ bool kvm_arch_has_noncoherent_dma(struct kvm *kvm) } EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma); +int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons, + struct irq_bypass_producer *prod) +{ + struct kvm_kernel_irqfd *irqfd = + container_of(cons, struct kvm_kernel_irqfd, consumer); + + if (kvm_x86_ops->update_pi_irte) { + irqfd->producer = prod; + return kvm_x86_ops->update_pi_irte(irqfd->kvm, + prod->irq, irqfd->gsi, 1); + } + + return -EINVAL; +} + +void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons, + struct irq_bypass_producer *prod) +{ + int ret; + struct kvm_kernel_irqfd *irqfd = + container_of(cons, struct kvm_kernel_irqfd, consumer); + + if (!kvm_x86_ops->update_pi_irte) { + WARN_ON(irqfd->producer != NULL); + return; + } + + WARN_ON(irqfd->producer != prod); + irqfd->producer = NULL; + + /* + * When producer of consumer is unregistered, we change back to + * remapped mode, so we can re-use the current implementation + * when the irq is masked/disabed or the consumer side (KVM + * int this case doesn't want to receive the interrupts. + */ + ret = kvm_x86_ops->update_pi_irte(irqfd->kvm, prod->irq, irqfd->gsi, 0); + if (ret) + printk(KERN_INFO "irq bypass consumer (token %p) unregistration" + " fails: %d\n", irqfd->consumer.token, ret); +} + +int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq, + uint32_t guest_irq, bool set) +{ + if (!kvm_x86_ops->update_pi_irte) + return -EINVAL; + + return kvm_x86_ops->update_pi_irte(kvm, host_irq, guest_irq, set); +} + EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit); +EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_fast_mmio); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr); @@ -8032,3 +8181,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ple_window); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pml_full); +EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pi_irte_update); diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 2f822cd886c2..f2afa5fe48a6 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -180,9 +180,9 @@ int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata); bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn, int page_num); -#define KVM_SUPPORTED_XCR0 (XSTATE_FP | XSTATE_SSE | XSTATE_YMM \ - | XSTATE_BNDREGS | XSTATE_BNDCSR \ - | XSTATE_AVX512) +#define KVM_SUPPORTED_XCR0 (XFEATURE_MASK_FP | XFEATURE_MASK_SSE \ + | XFEATURE_MASK_YMM | XFEATURE_MASK_BNDREGS \ + | XFEATURE_MASK_BNDCSR | XFEATURE_MASK_AVX512) extern u64 host_xcr0; extern u64 kvm_supported_xcr0(void); diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index 816488c0b97e..d388de72eaca 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt @@ -353,8 +353,12 @@ AVXcode: 1 17: vmovhps Mq,Vq (v1) | vmovhpd Mq,Vq (66),(v1) 18: Grp16 (1A) 19: -1a: BNDCL Ev,Gv | BNDCU Ev,Gv | BNDMOV Gv,Ev | BNDLDX Gv,Ev,Gv -1b: BNDCN Ev,Gv | BNDMOV Ev,Gv | BNDMK Gv,Ev | BNDSTX Ev,GV,Gv +# Intel SDM opcode map does not list MPX instructions. For now using Gv for +# bnd registers and Ev for everything else is OK because the instruction +# decoder does not use the information except as an indication that there is +# a ModR/M byte. +1a: BNDCL Gv,Ev (F3) | BNDCU Gv,Ev (F2) | BNDMOV Gv,Ev (66) | BNDLDX Gv,Ev +1b: BNDCN Gv,Ev (F2) | BNDMOV Ev,Gv (66) | BNDMK Gv,Ev (F3) | BNDSTX Ev,Gv 1c: 1d: 1e: @@ -732,6 +736,12 @@ bd: vfnmadd231ss/d Vx,Hx,Wx (66),(v),(v1) be: vfnmsub231ps/d Vx,Hx,Wx (66),(v) bf: vfnmsub231ss/d Vx,Hx,Wx (66),(v),(v1) # 0x0f 0x38 0xc0-0xff +c8: sha1nexte Vdq,Wdq +c9: sha1msg1 Vdq,Wdq +ca: sha1msg2 Vdq,Wdq +cb: sha256rnds2 Vdq,Wdq +cc: sha256msg1 Vdq,Wdq +cd: sha256msg2 Vdq,Wdq db: VAESIMC Vdq,Wdq (66),(v1) dc: VAESENC Vdq,Hdq,Wdq (66),(v1) dd: VAESENCLAST Vdq,Hdq,Wdq (66),(v1) @@ -790,6 +800,7 @@ AVXcode: 3 61: vpcmpestri Vdq,Wdq,Ib (66),(v1) 62: vpcmpistrm Vdq,Wdq,Ib (66),(v1) 63: vpcmpistri Vdq,Wdq,Ib (66),(v1) +cc: sha1rnds4 Vdq,Wdq,Ib df: VAESKEYGEN Vdq,Wdq,Ib (66),(v1) f0: RORX Gy,Ey,Ib (F2),(v) EndTable @@ -874,7 +885,7 @@ GrpTable: Grp7 2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B) 3: LIDT Ms 4: SMSW Mw/Rv -5: +5: rdpkru (110),(11B) | wrpkru (111),(11B) 6: LMSW Ew 7: INVLPG Mb | SWAPGS (o64),(000),(11B) | RDTSCP (001),(11B) EndTable @@ -888,6 +899,9 @@ EndTable GrpTable: Grp9 1: CMPXCHG8B/16B Mq/Mdq +3: xrstors +4: xsavec +5: xsaves 6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | RDRAND Rv (11B) 7: VMPTRST Mq | VMPTRST Mq (F3) | RDSEED Rv (11B) EndTable @@ -932,8 +946,8 @@ GrpTable: Grp15 3: vstmxcsr Md (v1) | WRGSBASE Ry (F3),(11B) 4: XSAVE 5: XRSTOR | lfence (11B) -6: XSAVEOPT | mfence (11B) -7: clflush | sfence (11B) +6: XSAVEOPT | clwb (66) | mfence (11B) +7: clflush | clflushopt (66) | sfence (11B) | pcommit (66),(11B) EndTable GrpTable: Grp16 diff --git a/arch/x86/math-emu/fpu_aux.c b/arch/x86/math-emu/fpu_aux.c index dd76a05729b0..024f6e971174 100644 --- a/arch/x86/math-emu/fpu_aux.c +++ b/arch/x86/math-emu/fpu_aux.c @@ -169,6 +169,76 @@ void fxch_i(void) fpu_tag_word = tag_word; } +static void fcmovCC(void) +{ + /* fcmovCC st(i) */ + int i = FPU_rm; + FPU_REG *st0_ptr = &st(0); + FPU_REG *sti_ptr = &st(i); + long tag_word = fpu_tag_word; + int regnr = top & 7; + int regnri = (top + i) & 7; + u_char sti_tag = (tag_word >> (regnri * 2)) & 3; + + if (sti_tag == TAG_Empty) { + FPU_stack_underflow(); + clear_C1(); + return; + } + reg_copy(sti_ptr, st0_ptr); + tag_word &= ~(3 << (regnr * 2)); + tag_word |= (sti_tag << (regnr * 2)); + fpu_tag_word = tag_word; +} + +void fcmovb(void) +{ + if (FPU_EFLAGS & X86_EFLAGS_CF) + fcmovCC(); +} + +void fcmove(void) +{ + if (FPU_EFLAGS & X86_EFLAGS_ZF) + fcmovCC(); +} + +void fcmovbe(void) +{ + if (FPU_EFLAGS & (X86_EFLAGS_CF|X86_EFLAGS_ZF)) + fcmovCC(); +} + +void fcmovu(void) +{ + if (FPU_EFLAGS & X86_EFLAGS_PF) + fcmovCC(); +} + +void fcmovnb(void) +{ + if (!(FPU_EFLAGS & X86_EFLAGS_CF)) + fcmovCC(); +} + +void fcmovne(void) +{ + if (!(FPU_EFLAGS & X86_EFLAGS_ZF)) + fcmovCC(); +} + +void fcmovnbe(void) +{ + if (!(FPU_EFLAGS & (X86_EFLAGS_CF|X86_EFLAGS_ZF))) + fcmovCC(); +} + +void fcmovnu(void) +{ + if (!(FPU_EFLAGS & X86_EFLAGS_PF)) + fcmovCC(); +} + void ffree_(void) { /* ffree st(i) */ diff --git a/arch/x86/math-emu/fpu_emu.h b/arch/x86/math-emu/fpu_emu.h index 4dae511c85ad..afbc4d805d66 100644 --- a/arch/x86/math-emu/fpu_emu.h +++ b/arch/x86/math-emu/fpu_emu.h @@ -71,7 +71,7 @@ #include "fpu_system.h" -#include <asm/sigcontext.h> /* for struct _fpstate */ +#include <uapi/asm/sigcontext.h> /* for struct _fpstate */ #include <asm/math_emu.h> #include <linux/linkage.h> diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c index 3d8f2e421466..e945fedf1de2 100644 --- a/arch/x86/math-emu/fpu_entry.c +++ b/arch/x86/math-emu/fpu_entry.c @@ -40,49 +40,33 @@ #define __BAD__ FPU_illegal /* Illegal on an 80486, causes SIGILL */ -#ifndef NO_UNDOC_CODE /* Un-documented FPU op-codes supported by default. */ +/* fcmovCC and f(u)comi(p) are enabled if CPUID(1).EDX(15) "cmov" is set */ -/* WARNING: These codes are not documented by Intel in their 80486 manual - and may not work on FPU clones or later Intel FPUs. */ - -/* Changes to support the un-doc codes provided by Linus Torvalds. */ - -#define _d9_d8_ fstp_i /* unofficial code (19) */ -#define _dc_d0_ fcom_st /* unofficial code (14) */ -#define _dc_d8_ fcompst /* unofficial code (1c) */ -#define _dd_c8_ fxch_i /* unofficial code (0d) */ -#define _de_d0_ fcompst /* unofficial code (16) */ -#define _df_c0_ ffreep /* unofficial code (07) ffree + pop */ -#define _df_c8_ fxch_i /* unofficial code (0f) */ -#define _df_d0_ fstp_i /* unofficial code (17) */ -#define _df_d8_ fstp_i /* unofficial code (1f) */ +/* WARNING: "u" entries are not documented by Intel in their 80486 manual + and may not work on FPU clones or later Intel FPUs. + Changes to support them provided by Linus Torvalds. */ static FUNC const st_instr_table[64] = { - fadd__, fld_i_, __BAD__, __BAD__, fadd_i, ffree_, faddp_, _df_c0_, - fmul__, fxch_i, __BAD__, __BAD__, fmul_i, _dd_c8_, fmulp_, _df_c8_, - fcom_st, fp_nop, __BAD__, __BAD__, _dc_d0_, fst_i_, _de_d0_, _df_d0_, - fcompst, _d9_d8_, __BAD__, __BAD__, _dc_d8_, fstp_i, fcompp, _df_d8_, - fsub__, FPU_etc, __BAD__, finit_, fsubri, fucom_, fsubrp, fstsw_, - fsubr_, fconst, fucompp, __BAD__, fsub_i, fucomp, fsubp_, __BAD__, - fdiv__, FPU_triga, __BAD__, __BAD__, fdivri, __BAD__, fdivrp, __BAD__, - fdivr_, FPU_trigb, __BAD__, __BAD__, fdiv_i, __BAD__, fdivp_, __BAD__, +/* Opcode: d8 d9 da db */ +/* dc dd de df */ +/* c0..7 */ fadd__, fld_i_, fcmovb, fcmovnb, +/* c0..7 */ fadd_i, ffree_, faddp_, ffreep,/*u*/ +/* c8..f */ fmul__, fxch_i, fcmove, fcmovne, +/* c8..f */ fmul_i, fxch_i,/*u*/ fmulp_, fxch_i,/*u*/ +/* d0..7 */ fcom_st, fp_nop, fcmovbe, fcmovnbe, +/* d0..7 */ fcom_st,/*u*/ fst_i_, fcompst,/*u*/ fstp_i,/*u*/ +/* d8..f */ fcompst, fstp_i,/*u*/ fcmovu, fcmovnu, +/* d8..f */ fcompst,/*u*/ fstp_i, fcompp, fstp_i,/*u*/ +/* e0..7 */ fsub__, FPU_etc, __BAD__, finit_, +/* e0..7 */ fsubri, fucom_, fsubrp, fstsw_, +/* e8..f */ fsubr_, fconst, fucompp, fucomi_, +/* e8..f */ fsub_i, fucomp, fsubp_, fucomip, +/* f0..7 */ fdiv__, FPU_triga, __BAD__, fcomi_, +/* f0..7 */ fdivri, __BAD__, fdivrp, fcomip, +/* f8..f */ fdivr_, FPU_trigb, __BAD__, __BAD__, +/* f8..f */ fdiv_i, __BAD__, fdivp_, __BAD__, }; -#else /* Support only documented FPU op-codes */ - -static FUNC const st_instr_table[64] = { - fadd__, fld_i_, __BAD__, __BAD__, fadd_i, ffree_, faddp_, __BAD__, - fmul__, fxch_i, __BAD__, __BAD__, fmul_i, __BAD__, fmulp_, __BAD__, - fcom_st, fp_nop, __BAD__, __BAD__, __BAD__, fst_i_, __BAD__, __BAD__, - fcompst, __BAD__, __BAD__, __BAD__, __BAD__, fstp_i, fcompp, __BAD__, - fsub__, FPU_etc, __BAD__, finit_, fsubri, fucom_, fsubrp, fstsw_, - fsubr_, fconst, fucompp, __BAD__, fsub_i, fucomp, fsubp_, __BAD__, - fdiv__, FPU_triga, __BAD__, __BAD__, fdivri, __BAD__, fdivrp, __BAD__, - fdivr_, FPU_trigb, __BAD__, __BAD__, fdiv_i, __BAD__, fdivp_, __BAD__, -}; - -#endif /* NO_UNDOC_CODE */ - #define _NONE_ 0 /* Take no special action */ #define _REG0_ 1 /* Need to check for not empty st(0) */ #define _REGI_ 2 /* Need to check for not empty st(0) and st(rm) */ @@ -94,36 +78,18 @@ static FUNC const st_instr_table[64] = { #define _REGIc 0 /* Compare st(0) and st(rm) */ #define _REGIn 0 /* Uses st(0) and st(rm), but handle checks later */ -#ifndef NO_UNDOC_CODE - -/* Un-documented FPU op-codes supported by default. (see above) */ - static u_char const type_table[64] = { - _REGI_, _NONE_, _null_, _null_, _REGIi, _REGi_, _REGIp, _REGi_, - _REGI_, _REGIn, _null_, _null_, _REGIi, _REGI_, _REGIp, _REGI_, - _REGIc, _NONE_, _null_, _null_, _REGIc, _REG0_, _REGIc, _REG0_, - _REGIc, _REG0_, _null_, _null_, _REGIc, _REG0_, _REGIc, _REG0_, - _REGI_, _NONE_, _null_, _NONE_, _REGIi, _REGIc, _REGIp, _NONE_, - _REGI_, _NONE_, _REGIc, _null_, _REGIi, _REGIc, _REGIp, _null_, - _REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_, - _REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_ +/* Opcode: d8 d9 da db dc dd de df */ +/* c0..7 */ _REGI_, _NONE_, _REGIn, _REGIn, _REGIi, _REGi_, _REGIp, _REGi_, +/* c8..f */ _REGI_, _REGIn, _REGIn, _REGIn, _REGIi, _REGI_, _REGIp, _REGI_, +/* d0..7 */ _REGIc, _NONE_, _REGIn, _REGIn, _REGIc, _REG0_, _REGIc, _REG0_, +/* d8..f */ _REGIc, _REG0_, _REGIn, _REGIn, _REGIc, _REG0_, _REGIc, _REG0_, +/* e0..7 */ _REGI_, _NONE_, _null_, _NONE_, _REGIi, _REGIc, _REGIp, _NONE_, +/* e8..f */ _REGI_, _NONE_, _REGIc, _REGIc, _REGIi, _REGIc, _REGIp, _REGIc, +/* f0..7 */ _REGI_, _NONE_, _null_, _REGIc, _REGIi, _null_, _REGIp, _REGIc, +/* f8..f */ _REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_, }; -#else /* Support only documented FPU op-codes */ - -static u_char const type_table[64] = { - _REGI_, _NONE_, _null_, _null_, _REGIi, _REGi_, _REGIp, _null_, - _REGI_, _REGIn, _null_, _null_, _REGIi, _null_, _REGIp, _null_, - _REGIc, _NONE_, _null_, _null_, _null_, _REG0_, _null_, _null_, - _REGIc, _null_, _null_, _null_, _null_, _REG0_, _REGIc, _null_, - _REGI_, _NONE_, _null_, _NONE_, _REGIi, _REGIc, _REGIp, _NONE_, - _REGI_, _NONE_, _REGIc, _null_, _REGIi, _REGIc, _REGIp, _null_, - _REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_, - _REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_ -}; - -#endif /* NO_UNDOC_CODE */ - #ifdef RE_ENTRANT_CHECKING u_char emulating = 0; #endif /* RE_ENTRANT_CHECKING */ diff --git a/arch/x86/math-emu/fpu_proto.h b/arch/x86/math-emu/fpu_proto.h index 9779df436b7d..caff438b9c1d 100644 --- a/arch/x86/math-emu/fpu_proto.h +++ b/arch/x86/math-emu/fpu_proto.h @@ -46,6 +46,14 @@ extern void fstsw_(void); extern void fp_nop(void); extern void fld_i_(void); extern void fxch_i(void); +extern void fcmovb(void); +extern void fcmove(void); +extern void fcmovbe(void); +extern void fcmovu(void); +extern void fcmovnb(void); +extern void fcmovne(void); +extern void fcmovnbe(void); +extern void fcmovnu(void); extern void ffree_(void); extern void ffreep(void); extern void fst_i_(void); @@ -108,6 +116,10 @@ extern void fcompp(void); extern void fucom_(void); extern void fucomp(void); extern void fucompp(void); +extern void fcomi_(void); +extern void fcomip(void); +extern void fucomi_(void); +extern void fucomip(void); /* reg_constant.c */ extern void fconst(void); /* reg_ld_str.c */ diff --git a/arch/x86/math-emu/load_store.c b/arch/x86/math-emu/load_store.c index 2931ff355218..95228ff042c0 100644 --- a/arch/x86/math-emu/load_store.c +++ b/arch/x86/math-emu/load_store.c @@ -33,11 +33,12 @@ #define pop_0() { FPU_settag0(TAG_Empty); top++; } +/* index is a 5-bit value: (3-bit FPU_modrm.reg field | opcode[2,1]) */ static u_char const type_table[32] = { - _PUSH_, _PUSH_, _PUSH_, _PUSH_, - _null_, _null_, _null_, _null_, - _REG0_, _REG0_, _REG0_, _REG0_, - _REG0_, _REG0_, _REG0_, _REG0_, + _PUSH_, _PUSH_, _PUSH_, _PUSH_, /* /0: d9:fld f32, db:fild m32, dd:fld f64, df:fild m16 */ + _null_, _REG0_, _REG0_, _REG0_, /* /1: d9:undef, db,dd,df:fisttp m32/64/16 */ + _REG0_, _REG0_, _REG0_, _REG0_, /* /2: d9:fst f32, db:fist m32, dd:fst f64, df:fist m16 */ + _REG0_, _REG0_, _REG0_, _REG0_, /* /3: d9:fstp f32, db:fistp m32, dd:fstp f64, df:fistp m16 */ _NONE_, _null_, _NONE_, _PUSH_, _NONE_, _PUSH_, _null_, _PUSH_, _NONE_, _null_, _NONE_, _REG0_, @@ -45,15 +46,19 @@ static u_char const type_table[32] = { }; u_char const data_sizes_16[32] = { - 4, 4, 8, 2, 0, 0, 0, 0, - 4, 4, 8, 2, 4, 4, 8, 2, + 4, 4, 8, 2, + 0, 4, 8, 2, /* /1: d9:undef, db,dd,df:fisttp */ + 4, 4, 8, 2, + 4, 4, 8, 2, 14, 0, 94, 10, 2, 10, 0, 8, 14, 0, 94, 10, 2, 10, 2, 8 }; static u_char const data_sizes_32[32] = { - 4, 4, 8, 2, 0, 0, 0, 0, - 4, 4, 8, 2, 4, 4, 8, 2, + 4, 4, 8, 2, + 0, 4, 8, 2, /* /1: d9:undef, db,dd,df:fisttp */ + 4, 4, 8, 2, + 4, 4, 8, 2, 28, 0, 108, 10, 2, 10, 0, 8, 28, 0, 108, 10, 2, 10, 2, 8 }; @@ -65,6 +70,7 @@ int FPU_load_store(u_char type, fpu_addr_modes addr_modes, FPU_REG *st0_ptr; u_char st0_tag = TAG_Empty; /* This is just to stop a gcc warning. */ u_char loaded_tag; + int sv_cw; st0_ptr = NULL; /* Initialized just to stop compiler warnings. */ @@ -111,7 +117,8 @@ int FPU_load_store(u_char type, fpu_addr_modes addr_modes, } switch (type) { - case 000: /* fld m32real */ + /* type is a 5-bit value: (3-bit FPU_modrm.reg field | opcode[2,1]) */ + case 000: /* fld m32real (d9 /0) */ clear_C1(); loaded_tag = FPU_load_single((float __user *)data_address, &loaded_data); @@ -123,13 +130,13 @@ int FPU_load_store(u_char type, fpu_addr_modes addr_modes, } FPU_copy_to_reg0(&loaded_data, loaded_tag); break; - case 001: /* fild m32int */ + case 001: /* fild m32int (db /0) */ clear_C1(); loaded_tag = FPU_load_int32((long __user *)data_address, &loaded_data); FPU_copy_to_reg0(&loaded_data, loaded_tag); break; - case 002: /* fld m64real */ + case 002: /* fld m64real (dd /0) */ clear_C1(); loaded_tag = FPU_load_double((double __user *)data_address, @@ -142,12 +149,44 @@ int FPU_load_store(u_char type, fpu_addr_modes addr_modes, } FPU_copy_to_reg0(&loaded_data, loaded_tag); break; - case 003: /* fild m16int */ + case 003: /* fild m16int (df /0) */ clear_C1(); loaded_tag = FPU_load_int16((short __user *)data_address, &loaded_data); FPU_copy_to_reg0(&loaded_data, loaded_tag); break; + /* case 004: undefined (d9 /1) */ + /* fisttp are enabled if CPUID(1).ECX(0) "sse3" is set */ + case 005: /* fisttp m32int (db /1) */ + clear_C1(); + sv_cw = control_word; + control_word |= RC_CHOP; + if (FPU_store_int32 + (st0_ptr, st0_tag, (long __user *)data_address)) + pop_0(); /* pop only if the number was actually stored + (see the 80486 manual p16-28) */ + control_word = sv_cw; + break; + case 006: /* fisttp m64int (dd /1) */ + clear_C1(); + sv_cw = control_word; + control_word |= RC_CHOP; + if (FPU_store_int64 + (st0_ptr, st0_tag, (long long __user *)data_address)) + pop_0(); /* pop only if the number was actually stored + (see the 80486 manual p16-28) */ + control_word = sv_cw; + break; + case 007: /* fisttp m16int (df /1) */ + clear_C1(); + sv_cw = control_word; + control_word |= RC_CHOP; + if (FPU_store_int16 + (st0_ptr, st0_tag, (short __user *)data_address)) + pop_0(); /* pop only if the number was actually stored + (see the 80486 manual p16-28) */ + control_word = sv_cw; + break; case 010: /* fst m32real */ clear_C1(); FPU_store_single(st0_ptr, st0_tag, diff --git a/arch/x86/math-emu/reg_compare.c b/arch/x86/math-emu/reg_compare.c index ecce55fc2e2e..b77360fdbf4a 100644 --- a/arch/x86/math-emu/reg_compare.c +++ b/arch/x86/math-emu/reg_compare.c @@ -249,6 +249,54 @@ static int compare_st_st(int nr) return 0; } +static int compare_i_st_st(int nr) +{ + int f, c; + FPU_REG *st_ptr; + + if (!NOT_EMPTY(0) || !NOT_EMPTY(nr)) { + FPU_EFLAGS |= (X86_EFLAGS_ZF | X86_EFLAGS_PF | X86_EFLAGS_CF); + /* Stack fault */ + EXCEPTION(EX_StackUnder); + return !(control_word & CW_Invalid); + } + + partial_status &= ~SW_C0; + st_ptr = &st(nr); + c = compare(st_ptr, FPU_gettagi(nr)); + if (c & COMP_NaN) { + FPU_EFLAGS |= (X86_EFLAGS_ZF | X86_EFLAGS_PF | X86_EFLAGS_CF); + EXCEPTION(EX_Invalid); + return !(control_word & CW_Invalid); + } + + switch (c & 7) { + case COMP_A_lt_B: + f = X86_EFLAGS_CF; + break; + case COMP_A_eq_B: + f = X86_EFLAGS_ZF; + break; + case COMP_A_gt_B: + f = 0; + break; + case COMP_No_Comp: + f = X86_EFLAGS_ZF | X86_EFLAGS_PF | X86_EFLAGS_CF; + break; +#ifdef PARANOID + default: + EXCEPTION(EX_INTERNAL | 0x122); + f = 0; + break; +#endif /* PARANOID */ + } + FPU_EFLAGS = (FPU_EFLAGS & ~(X86_EFLAGS_ZF | X86_EFLAGS_PF | X86_EFLAGS_CF)) | f; + if (c & COMP_Denormal) { + return denormal_operand() < 0; + } + return 0; +} + static int compare_u_st_st(int nr) { int f = 0, c; @@ -299,6 +347,58 @@ static int compare_u_st_st(int nr) return 0; } +static int compare_ui_st_st(int nr) +{ + int f = 0, c; + FPU_REG *st_ptr; + + if (!NOT_EMPTY(0) || !NOT_EMPTY(nr)) { + FPU_EFLAGS |= (X86_EFLAGS_ZF | X86_EFLAGS_PF | X86_EFLAGS_CF); + /* Stack fault */ + EXCEPTION(EX_StackUnder); + return !(control_word & CW_Invalid); + } + + partial_status &= ~SW_C0; + st_ptr = &st(nr); + c = compare(st_ptr, FPU_gettagi(nr)); + if (c & COMP_NaN) { + FPU_EFLAGS |= (X86_EFLAGS_ZF | X86_EFLAGS_PF | X86_EFLAGS_CF); + if (c & COMP_SNaN) { /* This is the only difference between + un-ordered and ordinary comparisons */ + EXCEPTION(EX_Invalid); + return !(control_word & CW_Invalid); + } + return 0; + } + + switch (c & 7) { + case COMP_A_lt_B: + f = X86_EFLAGS_CF; + break; + case COMP_A_eq_B: + f = X86_EFLAGS_ZF; + break; + case COMP_A_gt_B: + f = 0; + break; + case COMP_No_Comp: + f = X86_EFLAGS_ZF | X86_EFLAGS_PF | X86_EFLAGS_CF; + break; +#ifdef PARANOID + default: + EXCEPTION(EX_INTERNAL | 0x123); + f = 0; + break; +#endif /* PARANOID */ + } + FPU_EFLAGS = (FPU_EFLAGS & ~(X86_EFLAGS_ZF | X86_EFLAGS_PF | X86_EFLAGS_CF)) | f; + if (c & COMP_Denormal) { + return denormal_operand() < 0; + } + return 0; +} + /*---------------------------------------------------------------------------*/ void fcom_st(void) @@ -348,3 +448,31 @@ void fucompp(void) } else FPU_illegal(); } + +/* P6+ compare-to-EFLAGS ops */ + +void fcomi_(void) +{ + /* fcomi st(i) */ + compare_i_st_st(FPU_rm); +} + +void fcomip(void) +{ + /* fcomip st(i) */ + if (!compare_i_st_st(FPU_rm)) + FPU_pop(); +} + +void fucomi_(void) +{ + /* fucomi st(i) */ + compare_ui_st_st(FPU_rm); +} + +void fucomip(void) +{ + /* fucomip st(i) */ + if (!compare_ui_st_st(FPU_rm)) + FPU_pop(); +} diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index a482d105172b..65c47fda26fc 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -14,7 +14,7 @@ obj-$(CONFIG_SMP) += tlb.o obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o -obj-$(CONFIG_X86_PTDUMP) += dump_pagetables.o +obj-$(CONFIG_X86_PTDUMP_CORE) += dump_pagetables.o obj-$(CONFIG_HIGHMEM) += highmem_32.o diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index f0cedf3395af..1bf417e9cc13 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -32,6 +32,8 @@ struct pg_state { const struct addr_marker *marker; unsigned long lines; bool to_dmesg; + bool check_wx; + unsigned long wx_pages; }; struct addr_marker { @@ -155,7 +157,7 @@ static void printk_prot(struct seq_file *m, pgprot_t prot, int level, bool dmsg) pt_dump_cont_printf(m, dmsg, " "); if ((level == 4 && pr & _PAGE_PAT) || ((level == 3 || level == 2) && pr & _PAGE_PAT_LARGE)) - pt_dump_cont_printf(m, dmsg, "pat "); + pt_dump_cont_printf(m, dmsg, "PAT "); else pt_dump_cont_printf(m, dmsg, " "); if (pr & _PAGE_GLOBAL) @@ -198,8 +200,8 @@ static void note_page(struct seq_file *m, struct pg_state *st, * we have now. "break" is either changing perms, levels or * address space marker. */ - prot = pgprot_val(new_prot) & PTE_FLAGS_MASK; - cur = pgprot_val(st->current_prot) & PTE_FLAGS_MASK; + prot = pgprot_val(new_prot); + cur = pgprot_val(st->current_prot); if (!st->level) { /* First entry */ @@ -214,6 +216,16 @@ static void note_page(struct seq_file *m, struct pg_state *st, const char *unit = units; unsigned long delta; int width = sizeof(unsigned long) * 2; + pgprotval_t pr = pgprot_val(st->current_prot); + + if (st->check_wx && (pr & _PAGE_RW) && !(pr & _PAGE_NX)) { + WARN_ONCE(1, + "x86/mm: Found insecure W+X mapping at address %p/%pS\n", + (void *)st->start_address, + (void *)st->start_address); + st->wx_pages += (st->current_address - + st->start_address) / PAGE_SIZE; + } /* * Now print the actual finished series @@ -269,13 +281,13 @@ static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr, { int i; pte_t *start; + pgprotval_t prot; start = (pte_t *) pmd_page_vaddr(addr); for (i = 0; i < PTRS_PER_PTE; i++) { - pgprot_t prot = pte_pgprot(*start); - + prot = pte_flags(*start); st->current_address = normalize_addr(P + i * PTE_LEVEL_MULT); - note_page(m, st, prot, 4); + note_page(m, st, __pgprot(prot), 4); start++; } } @@ -287,18 +299,19 @@ static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr, { int i; pmd_t *start; + pgprotval_t prot; start = (pmd_t *) pud_page_vaddr(addr); for (i = 0; i < PTRS_PER_PMD; i++) { st->current_address = normalize_addr(P + i * PMD_LEVEL_MULT); if (!pmd_none(*start)) { - pgprotval_t prot = pmd_val(*start) & PTE_FLAGS_MASK; - - if (pmd_large(*start) || !pmd_present(*start)) + if (pmd_large(*start) || !pmd_present(*start)) { + prot = pmd_flags(*start); note_page(m, st, __pgprot(prot), 3); - else + } else { walk_pte_level(m, st, *start, P + i * PMD_LEVEL_MULT); + } } else note_page(m, st, __pgprot(0), 3); start++; @@ -318,19 +331,20 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr, { int i; pud_t *start; + pgprotval_t prot; start = (pud_t *) pgd_page_vaddr(addr); for (i = 0; i < PTRS_PER_PUD; i++) { st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT); if (!pud_none(*start)) { - pgprotval_t prot = pud_val(*start) & PTE_FLAGS_MASK; - - if (pud_large(*start) || !pud_present(*start)) + if (pud_large(*start) || !pud_present(*start)) { + prot = pud_flags(*start); note_page(m, st, __pgprot(prot), 2); - else + } else { walk_pmd_level(m, st, *start, P + i * PUD_LEVEL_MULT); + } } else note_page(m, st, __pgprot(0), 2); @@ -344,13 +358,15 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr, #define pgd_none(a) pud_none(__pud(pgd_val(a))) #endif -void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd) +static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd, + bool checkwx) { #ifdef CONFIG_X86_64 pgd_t *start = (pgd_t *) &init_level4_pgt; #else pgd_t *start = swapper_pg_dir; #endif + pgprotval_t prot; int i; struct pg_state st = {}; @@ -359,16 +375,20 @@ void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd) st.to_dmesg = true; } + st.check_wx = checkwx; + if (checkwx) + st.wx_pages = 0; + for (i = 0; i < PTRS_PER_PGD; i++) { st.current_address = normalize_addr(i * PGD_LEVEL_MULT); if (!pgd_none(*start)) { - pgprotval_t prot = pgd_val(*start) & PTE_FLAGS_MASK; - - if (pgd_large(*start) || !pgd_present(*start)) + if (pgd_large(*start) || !pgd_present(*start)) { + prot = pgd_flags(*start); note_page(m, &st, __pgprot(prot), 1); - else + } else { walk_pud_level(m, &st, *start, i * PGD_LEVEL_MULT); + } } else note_page(m, &st, __pgprot(0), 1); @@ -378,8 +398,26 @@ void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd) /* Flush out the last page */ st.current_address = normalize_addr(PTRS_PER_PGD*PGD_LEVEL_MULT); note_page(m, &st, __pgprot(0), 0); + if (!checkwx) + return; + if (st.wx_pages) + pr_info("x86/mm: Checked W+X mappings: FAILED, %lu W+X pages found.\n", + st.wx_pages); + else + pr_info("x86/mm: Checked W+X mappings: passed, no W+X pages found.\n"); } +void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd) +{ + ptdump_walk_pgd_level_core(m, pgd, false); +} + +void ptdump_walk_pgd_level_checkwx(void) +{ + ptdump_walk_pgd_level_core(NULL, NULL, true); +} + +#ifdef CONFIG_X86_PTDUMP static int ptdump_show(struct seq_file *m, void *v) { ptdump_walk_pgd_level(m, NULL); @@ -397,10 +435,13 @@ static const struct file_operations ptdump_fops = { .llseek = seq_lseek, .release = single_release, }; +#endif static int pt_dump_init(void) { +#ifdef CONFIG_X86_PTDUMP struct dentry *pe; +#endif #ifdef CONFIG_X86_32 /* Not a compile-time constant on x86-32 */ @@ -412,10 +453,12 @@ static int pt_dump_init(void) address_markers[FIXADDR_START_NR].start_address = FIXADDR_START; #endif +#ifdef CONFIG_X86_PTDUMP pe = debugfs_create_file("kernel_page_tables", 0600, NULL, NULL, &ptdump_fops); if (!pe) return -ENOMEM; +#endif return 0; } diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index 81bf3d2af3eb..ae9a37bf1371 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c @@ -118,21 +118,20 @@ static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr, unsigned long end, int write, struct page **pages, int *nr) { unsigned long mask; - pte_t pte = *(pte_t *)&pmd; struct page *head, *page; int refs; mask = _PAGE_PRESENT|_PAGE_USER; if (write) mask |= _PAGE_RW; - if ((pte_flags(pte) & mask) != mask) + if ((pmd_flags(pmd) & mask) != mask) return 0; /* hugepages are never "special" */ - VM_BUG_ON(pte_flags(pte) & _PAGE_SPECIAL); - VM_BUG_ON(!pfn_valid(pte_pfn(pte))); + VM_BUG_ON(pmd_flags(pmd) & _PAGE_SPECIAL); + VM_BUG_ON(!pfn_valid(pmd_pfn(pmd))); refs = 0; - head = pte_page(pte); + head = pmd_page(pmd); page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT); do { VM_BUG_ON_PAGE(compound_head(page) != head, page); @@ -195,21 +194,20 @@ static noinline int gup_huge_pud(pud_t pud, unsigned long addr, unsigned long end, int write, struct page **pages, int *nr) { unsigned long mask; - pte_t pte = *(pte_t *)&pud; struct page *head, *page; int refs; mask = _PAGE_PRESENT|_PAGE_USER; if (write) mask |= _PAGE_RW; - if ((pte_flags(pte) & mask) != mask) + if ((pud_flags(pud) & mask) != mask) return 0; /* hugepages are never "special" */ - VM_BUG_ON(pte_flags(pte) & _PAGE_SPECIAL); - VM_BUG_ON(!pfn_valid(pte_pfn(pte))); + VM_BUG_ON(pud_flags(pud) & _PAGE_SPECIAL); + VM_BUG_ON(!pfn_valid(pud_pfn(pud))); refs = 0; - head = pte_page(pte); + head = pud_page(pud); page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT); do { VM_BUG_ON_PAGE(compound_head(page) != head, page); diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 1d8a83df153a..1f37cb2b56a9 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -693,14 +693,12 @@ void free_initmem(void) #ifdef CONFIG_BLK_DEV_INITRD void __init free_initrd_mem(unsigned long start, unsigned long end) { -#ifdef CONFIG_MICROCODE_EARLY /* * Remember, initrd memory may contain microcode or other useful things. * Before we lose initrd mem, we need to find a place to hold them * now that normal virtual memory is enabled. */ save_microcode_in_initrd(); -#endif /* * end could be not aligned, and We can not align that, diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 7562f42914b4..cb4ef3de61f9 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -957,6 +957,8 @@ void mark_rodata_ro(void) set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); #endif mark_nxdata_nx(); + if (__supported_pte_mask & _PAGE_NX) + debug_checkwx(); } #endif diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 30564e2752d3..5ed62eff31bd 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1132,7 +1132,7 @@ void mark_rodata_ro(void) * has been zapped already via cleanup_highmem(). */ all_end = roundup((unsigned long)_brk_end, PMD_SIZE); - set_memory_nx(rodata_start, (all_end - rodata_start) >> PAGE_SHIFT); + set_memory_nx(text_end, (all_end - text_end) >> PAGE_SHIFT); rodata_test(); @@ -1150,6 +1150,8 @@ void mark_rodata_ro(void) free_init_pages("unused kernel", (unsigned long) __va(__pa_symbol(rodata_end)), (unsigned long) __va(__pa_symbol(_sdata))); + + debug_checkwx(); } #endif diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 9ce5da27b136..d470cf219a2d 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -126,5 +126,5 @@ void __init kasan_init(void) __flush_tlb_all(); init_task.kasan_depth = 0; - pr_info("Kernel address sanitizer initialized\n"); + pr_info("KernelAddressSanitizer initialized\n"); } diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c index 134948b0926f..b0ae85f90f10 100644 --- a/arch/x86/mm/mpx.c +++ b/arch/x86/mm/mpx.c @@ -237,7 +237,8 @@ bad_opcode: */ siginfo_t *mpx_generate_siginfo(struct pt_regs *regs) { - const struct bndreg *bndregs, *bndreg; + const struct mpx_bndreg_state *bndregs; + const struct mpx_bndreg *bndreg; siginfo_t *info = NULL; struct insn insn; uint8_t bndregno; @@ -258,13 +259,13 @@ siginfo_t *mpx_generate_siginfo(struct pt_regs *regs) goto err_out; } /* get bndregs field from current task's xsave area */ - bndregs = get_xsave_field_ptr(XSTATE_BNDREGS); + bndregs = get_xsave_field_ptr(XFEATURE_MASK_BNDREGS); if (!bndregs) { err = -EINVAL; goto err_out; } /* now go select the individual register in the set of 4 */ - bndreg = &bndregs[bndregno]; + bndreg = &bndregs->bndreg[bndregno]; info = kzalloc(sizeof(*info), GFP_KERNEL); if (!info) { @@ -306,7 +307,7 @@ err_out: static __user void *mpx_get_bounds_dir(void) { - const struct bndcsr *bndcsr; + const struct mpx_bndcsr *bndcsr; if (!cpu_feature_enabled(X86_FEATURE_MPX)) return MPX_INVALID_BOUNDS_DIR; @@ -315,7 +316,7 @@ static __user void *mpx_get_bounds_dir(void) * The bounds directory pointer is stored in a register * only accessible if we first do an xsave. */ - bndcsr = get_xsave_field_ptr(XSTATE_BNDCSR); + bndcsr = get_xsave_field_ptr(XFEATURE_MASK_BNDCSR); if (!bndcsr) return MPX_INVALID_BOUNDS_DIR; @@ -489,10 +490,10 @@ out_unmap: static int do_mpx_bt_fault(void) { unsigned long bd_entry, bd_base; - const struct bndcsr *bndcsr; + const struct mpx_bndcsr *bndcsr; struct mm_struct *mm = current->mm; - bndcsr = get_xsave_field_ptr(XSTATE_BNDCSR); + bndcsr = get_xsave_field_ptr(XFEATURE_MASK_BNDCSR); if (!bndcsr) return -EINVAL; /* diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 2c44c0792301..a3137a4feed1 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -414,18 +414,28 @@ pmd_t *lookup_pmd_address(unsigned long address) phys_addr_t slow_virt_to_phys(void *__virt_addr) { unsigned long virt_addr = (unsigned long)__virt_addr; - phys_addr_t phys_addr; - unsigned long offset; + unsigned long phys_addr, offset; enum pg_level level; - unsigned long pmask; pte_t *pte; pte = lookup_address(virt_addr, &level); BUG_ON(!pte); - pmask = page_level_mask(level); - offset = virt_addr & ~pmask; - phys_addr = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT; - return (phys_addr | offset); + + switch (level) { + case PG_LEVEL_1G: + phys_addr = pud_pfn(*(pud_t *)pte) << PAGE_SHIFT; + offset = virt_addr & ~PUD_PAGE_MASK; + break; + case PG_LEVEL_2M: + phys_addr = pmd_pfn(*(pmd_t *)pte) << PAGE_SHIFT; + offset = virt_addr & ~PMD_PAGE_MASK; + break; + default: + phys_addr = pte_pfn(*pte) << PAGE_SHIFT; + offset = virt_addr & ~PAGE_MASK; + } + + return (phys_addr_t)(phys_addr | offset); } EXPORT_SYMBOL_GPL(slow_virt_to_phys); @@ -458,7 +468,7 @@ static int try_preserve_large_page(pte_t *kpte, unsigned long address, struct cpa_data *cpa) { - unsigned long nextpage_addr, numpages, pmask, psize, addr, pfn; + unsigned long nextpage_addr, numpages, pmask, psize, addr, pfn, old_pfn; pte_t new_pte, old_pte, *tmp; pgprot_t old_prot, new_prot, req_prot; int i, do_split = 1; @@ -478,17 +488,21 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, switch (level) { case PG_LEVEL_2M: -#ifdef CONFIG_X86_64 + old_prot = pmd_pgprot(*(pmd_t *)kpte); + old_pfn = pmd_pfn(*(pmd_t *)kpte); + break; case PG_LEVEL_1G: -#endif - psize = page_level_size(level); - pmask = page_level_mask(level); + old_prot = pud_pgprot(*(pud_t *)kpte); + old_pfn = pud_pfn(*(pud_t *)kpte); break; default: do_split = -EINVAL; goto out_unlock; } + psize = page_level_size(level); + pmask = page_level_mask(level); + /* * Calculate the number of pages, which fit into this large * page starting at address: @@ -504,7 +518,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, * up accordingly. */ old_pte = *kpte; - old_prot = req_prot = pgprot_large_2_4k(pte_pgprot(old_pte)); + req_prot = pgprot_large_2_4k(old_prot); pgprot_val(req_prot) &= ~pgprot_val(cpa->mask_clr); pgprot_val(req_prot) |= pgprot_val(cpa->mask_set); @@ -530,10 +544,10 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, req_prot = canon_pgprot(req_prot); /* - * old_pte points to the large page base address. So we need + * old_pfn points to the large page base pfn. So we need * to add the offset of the virtual address: */ - pfn = pte_pfn(old_pte) + ((address & (psize - 1)) >> PAGE_SHIFT); + pfn = old_pfn + ((address & (psize - 1)) >> PAGE_SHIFT); cpa->pfn = pfn; new_prot = static_protections(req_prot, address, pfn); @@ -544,7 +558,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, * the pages in the range we try to preserve: */ addr = address & pmask; - pfn = pte_pfn(old_pte); + pfn = old_pfn; for (i = 0; i < (psize >> PAGE_SHIFT); i++, addr += PAGE_SIZE, pfn++) { pgprot_t chk_prot = static_protections(req_prot, addr, pfn); @@ -574,7 +588,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, * The address is aligned and the number of pages * covers the full page. */ - new_pte = pfn_pte(pte_pfn(old_pte), new_prot); + new_pte = pfn_pte(old_pfn, new_prot); __set_pmd_pte(kpte, address, new_pte); cpa->flags |= CPA_FLUSHTLB; do_split = 0; @@ -591,7 +605,7 @@ __split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address, struct page *base) { pte_t *pbase = (pte_t *)page_address(base); - unsigned long pfn, pfninc = 1; + unsigned long ref_pfn, pfn, pfninc = 1; unsigned int i, level; pte_t *tmp; pgprot_t ref_prot; @@ -608,26 +622,33 @@ __split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address, } paravirt_alloc_pte(&init_mm, page_to_pfn(base)); - ref_prot = pte_pgprot(pte_clrhuge(*kpte)); - /* promote PAT bit to correct position */ - if (level == PG_LEVEL_2M) + switch (level) { + case PG_LEVEL_2M: + ref_prot = pmd_pgprot(*(pmd_t *)kpte); + /* clear PSE and promote PAT bit to correct position */ ref_prot = pgprot_large_2_4k(ref_prot); + ref_pfn = pmd_pfn(*(pmd_t *)kpte); + break; -#ifdef CONFIG_X86_64 - if (level == PG_LEVEL_1G) { + case PG_LEVEL_1G: + ref_prot = pud_pgprot(*(pud_t *)kpte); + ref_pfn = pud_pfn(*(pud_t *)kpte); pfninc = PMD_PAGE_SIZE >> PAGE_SHIFT; + /* - * Set the PSE flags only if the PRESENT flag is set + * Clear the PSE flags if the PRESENT flag is not set * otherwise pmd_present/pmd_huge will return true * even on a non present pmd. */ - if (pgprot_val(ref_prot) & _PAGE_PRESENT) - pgprot_val(ref_prot) |= _PAGE_PSE; - else + if (!(pgprot_val(ref_prot) & _PAGE_PRESENT)) pgprot_val(ref_prot) &= ~_PAGE_PSE; + break; + + default: + spin_unlock(&pgd_lock); + return 1; } -#endif /* * Set the GLOBAL flags only if the PRESENT flag is set @@ -643,13 +664,16 @@ __split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address, /* * Get the target pfn from the original entry: */ - pfn = pte_pfn(*kpte); + pfn = ref_pfn; for (i = 0; i < PTRS_PER_PTE; i++, pfn += pfninc) set_pte(&pbase[i], pfn_pte(pfn, canon_pgprot(ref_prot))); - if (pfn_range_is_mapped(PFN_DOWN(__pa(address)), - PFN_DOWN(__pa(address)) + 1)) - split_page_count(level); + if (virt_addr_valid(address)) { + unsigned long pfn = PFN_DOWN(__pa(address)); + + if (pfn_range_is_mapped(pfn, pfn + 1)) + split_page_count(level); + } /* * Install the new, split up pagetable. diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 70efcd0940f9..75991979f667 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1109,7 +1109,7 @@ void bpf_int_jit_compile(struct bpf_prog *prog) bpf_flush_icache(header, image + proglen); set_memory_ro((unsigned long)header, header->pages); prog->bpf_func = (void *)image; - prog->jited = true; + prog->jited = 1; } out: kfree(addrs); diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index ff9911707160..3cd69832d7f4 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -4,16 +4,15 @@ #include <linux/irq.h> #include <linux/dmi.h> #include <linux/slab.h> +#include <linux/pci-acpi.h> #include <asm/numa.h> #include <asm/pci_x86.h> struct pci_root_info { - struct acpi_device *bridge; - char name[16]; + struct acpi_pci_root_info common; struct pci_sysdata sd; #ifdef CONFIG_PCI_MMCONFIG bool mcfg_added; - u16 segment; u8 start_bus; u8 end_bus; #endif @@ -178,15 +177,18 @@ static int check_segment(u16 seg, struct device *dev, char *estr) return 0; } -static int setup_mcfg_map(struct pci_root_info *info, u16 seg, u8 start, - u8 end, phys_addr_t addr) +static int setup_mcfg_map(struct acpi_pci_root_info *ci) { - int result; - struct device *dev = &info->bridge->dev; + int result, seg; + struct pci_root_info *info; + struct acpi_pci_root *root = ci->root; + struct device *dev = &ci->bridge->dev; - info->start_bus = start; - info->end_bus = end; + info = container_of(ci, struct pci_root_info, common); + info->start_bus = (u8)root->secondary.start; + info->end_bus = (u8)root->secondary.end; info->mcfg_added = false; + seg = info->sd.domain; /* return success if MMCFG is not in use */ if (raw_pci_ext_ops && raw_pci_ext_ops != &pci_mmcfg) @@ -195,7 +197,8 @@ static int setup_mcfg_map(struct pci_root_info *info, u16 seg, u8 start, if (!(pci_probe & PCI_PROBE_MMCONF)) return check_segment(seg, dev, "MMCONFIG is disabled,"); - result = pci_mmconfig_insert(dev, seg, start, end, addr); + result = pci_mmconfig_insert(dev, seg, info->start_bus, info->end_bus, + root->mcfg_addr); if (result == 0) { /* enable MMCFG if it hasn't been enabled yet */ if (raw_pci_ext_ops == NULL) @@ -208,134 +211,55 @@ static int setup_mcfg_map(struct pci_root_info *info, u16 seg, u8 start, return 0; } -static void teardown_mcfg_map(struct pci_root_info *info) +static void teardown_mcfg_map(struct acpi_pci_root_info *ci) { + struct pci_root_info *info; + + info = container_of(ci, struct pci_root_info, common); if (info->mcfg_added) { - pci_mmconfig_delete(info->segment, info->start_bus, - info->end_bus); + pci_mmconfig_delete(info->sd.domain, + info->start_bus, info->end_bus); info->mcfg_added = false; } } #else -static int setup_mcfg_map(struct pci_root_info *info, - u16 seg, u8 start, u8 end, - phys_addr_t addr) +static int setup_mcfg_map(struct acpi_pci_root_info *ci) { return 0; } -static void teardown_mcfg_map(struct pci_root_info *info) + +static void teardown_mcfg_map(struct acpi_pci_root_info *ci) { } #endif -static void validate_resources(struct device *dev, struct list_head *crs_res, - unsigned long type) +static int pci_acpi_root_get_node(struct acpi_pci_root *root) { - LIST_HEAD(list); - struct resource *res1, *res2, *root = NULL; - struct resource_entry *tmp, *entry, *entry2; - - BUG_ON((type & (IORESOURCE_MEM | IORESOURCE_IO)) == 0); - root = (type & IORESOURCE_MEM) ? &iomem_resource : &ioport_resource; - - list_splice_init(crs_res, &list); - resource_list_for_each_entry_safe(entry, tmp, &list) { - bool free = false; - resource_size_t end; - - res1 = entry->res; - if (!(res1->flags & type)) - goto next; - - /* Exclude non-addressable range or non-addressable portion */ - end = min(res1->end, root->end); - if (end <= res1->start) { - dev_info(dev, "host bridge window %pR (ignored, not CPU addressable)\n", - res1); - free = true; - goto next; - } else if (res1->end != end) { - dev_info(dev, "host bridge window %pR ([%#llx-%#llx] ignored, not CPU addressable)\n", - res1, (unsigned long long)end + 1, - (unsigned long long)res1->end); - res1->end = end; - } - - resource_list_for_each_entry(entry2, crs_res) { - res2 = entry2->res; - if (!(res2->flags & type)) - continue; - - /* - * I don't like throwing away windows because then - * our resources no longer match the ACPI _CRS, but - * the kernel resource tree doesn't allow overlaps. - */ - if (resource_overlaps(res1, res2)) { - res2->start = min(res1->start, res2->start); - res2->end = max(res1->end, res2->end); - dev_info(dev, "host bridge window expanded to %pR; %pR ignored\n", - res2, res1); - free = true; - goto next; - } - } + int busnum = root->secondary.start; + struct acpi_device *device = root->device; + int node = acpi_get_node(device->handle); -next: - resource_list_del(entry); - if (free) - resource_list_free_entry(entry); - else - resource_list_add_tail(entry, crs_res); + if (node == NUMA_NO_NODE) { + node = x86_pci_root_bus_node(busnum); + if (node != 0 && node != NUMA_NO_NODE) + dev_info(&device->dev, FW_BUG "no _PXM; falling back to node %d from hardware (may be inconsistent with ACPI node numbers)\n", + node); } + if (node != NUMA_NO_NODE && !node_online(node)) + node = NUMA_NO_NODE; + + return node; } -static void add_resources(struct pci_root_info *info, - struct list_head *resources, - struct list_head *crs_res) +static int pci_acpi_root_init_info(struct acpi_pci_root_info *ci) { - struct resource_entry *entry, *tmp; - struct resource *res, *conflict, *root = NULL; - - validate_resources(&info->bridge->dev, crs_res, IORESOURCE_MEM); - validate_resources(&info->bridge->dev, crs_res, IORESOURCE_IO); - - resource_list_for_each_entry_safe(entry, tmp, crs_res) { - res = entry->res; - if (res->flags & IORESOURCE_MEM) - root = &iomem_resource; - else if (res->flags & IORESOURCE_IO) - root = &ioport_resource; - else - BUG_ON(res); - - conflict = insert_resource_conflict(root, res); - if (conflict) { - dev_info(&info->bridge->dev, - "ignoring host bridge window %pR (conflicts with %s %pR)\n", - res, conflict->name, conflict); - resource_list_destroy_entry(entry); - } - } - - list_splice_tail(crs_res, resources); + return setup_mcfg_map(ci); } -static void release_pci_root_info(struct pci_host_bridge *bridge) +static void pci_acpi_root_release_info(struct acpi_pci_root_info *ci) { - struct resource *res; - struct resource_entry *entry; - struct pci_root_info *info = bridge->release_data; - - resource_list_for_each_entry(entry, &bridge->windows) { - res = entry->res; - if (res->parent && - (res->flags & (IORESOURCE_MEM | IORESOURCE_IO))) - release_resource(res); - } - - teardown_mcfg_map(info); - kfree(info); + teardown_mcfg_map(ci); + kfree(container_of(ci, struct pci_root_info, common)); } /* @@ -358,50 +282,47 @@ static bool resource_is_pcicfg_ioport(struct resource *res) res->start == 0xCF8 && res->end == 0xCFF; } -static void probe_pci_root_info(struct pci_root_info *info, - struct acpi_device *device, - int busnum, int domain, - struct list_head *list) +static int pci_acpi_root_prepare_resources(struct acpi_pci_root_info *ci) { - int ret; + struct acpi_device *device = ci->bridge; + int busnum = ci->root->secondary.start; struct resource_entry *entry, *tmp; + int status; - sprintf(info->name, "PCI Bus %04x:%02x", domain, busnum); - info->bridge = device; - ret = acpi_dev_get_resources(device, list, - acpi_dev_filter_resource_type_cb, - (void *)(IORESOURCE_IO | IORESOURCE_MEM)); - if (ret < 0) - dev_warn(&device->dev, - "failed to parse _CRS method, error code %d\n", ret); - else if (ret == 0) - dev_dbg(&device->dev, - "no IO and memory resources present in _CRS\n"); - else - resource_list_for_each_entry_safe(entry, tmp, list) { - if ((entry->res->flags & IORESOURCE_DISABLED) || - resource_is_pcicfg_ioport(entry->res)) + status = acpi_pci_probe_root_resources(ci); + if (pci_use_crs) { + resource_list_for_each_entry_safe(entry, tmp, &ci->resources) + if (resource_is_pcicfg_ioport(entry->res)) resource_list_destroy_entry(entry); - else - entry->res->name = info->name; - } + return status; + } + + resource_list_for_each_entry_safe(entry, tmp, &ci->resources) { + dev_printk(KERN_DEBUG, &device->dev, + "host bridge window %pR (ignored)\n", entry->res); + resource_list_destroy_entry(entry); + } + x86_pci_root_bus_resources(busnum, &ci->resources); + + return 0; } +static struct acpi_pci_root_ops acpi_pci_root_ops = { + .pci_ops = &pci_root_ops, + .init_info = pci_acpi_root_init_info, + .release_info = pci_acpi_root_release_info, + .prepare_resources = pci_acpi_root_prepare_resources, +}; + struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) { - struct acpi_device *device = root->device; - struct pci_root_info *info; int domain = root->segment; int busnum = root->secondary.start; - struct resource_entry *res_entry; - LIST_HEAD(crs_res); - LIST_HEAD(resources); + int node = pci_acpi_root_get_node(root); struct pci_bus *bus; - struct pci_sysdata *sd; - int node; if (pci_ignore_seg) - domain = 0; + root->segment = domain = 0; if (domain && !pci_domains_supported) { printk(KERN_WARNING "pci_bus %04x:%02x: " @@ -410,71 +331,33 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) return NULL; } - node = acpi_get_node(device->handle); - if (node == NUMA_NO_NODE) { - node = x86_pci_root_bus_node(busnum); - if (node != 0 && node != NUMA_NO_NODE) - dev_info(&device->dev, FW_BUG "no _PXM; falling back to node %d from hardware (may be inconsistent with ACPI node numbers)\n", - node); - } - - if (node != NUMA_NO_NODE && !node_online(node)) - node = NUMA_NO_NODE; - - info = kzalloc_node(sizeof(*info), GFP_KERNEL, node); - if (!info) { - printk(KERN_WARNING "pci_bus %04x:%02x: " - "ignored (out of memory)\n", domain, busnum); - return NULL; - } - - sd = &info->sd; - sd->domain = domain; - sd->node = node; - sd->companion = device; - bus = pci_find_bus(domain, busnum); if (bus) { /* * If the desired bus has been scanned already, replace * its bus->sysdata. */ - memcpy(bus->sysdata, sd, sizeof(*sd)); - kfree(info); - } else { - /* insert busn res at first */ - pci_add_resource(&resources, &root->secondary); + struct pci_sysdata sd = { + .domain = domain, + .node = node, + .companion = root->device + }; - /* - * _CRS with no apertures is normal, so only fall back to - * defaults or native bridge info if we're ignoring _CRS. - */ - probe_pci_root_info(info, device, busnum, domain, &crs_res); - if (pci_use_crs) { - add_resources(info, &resources, &crs_res); - } else { - resource_list_for_each_entry(res_entry, &crs_res) - dev_printk(KERN_DEBUG, &device->dev, - "host bridge window %pR (ignored)\n", - res_entry->res); - resource_list_free(&crs_res); - x86_pci_root_bus_resources(busnum, &resources); - } - - if (!setup_mcfg_map(info, domain, (u8)root->secondary.start, - (u8)root->secondary.end, root->mcfg_addr)) - bus = pci_create_root_bus(NULL, busnum, &pci_root_ops, - sd, &resources); - - if (bus) { - pci_scan_child_bus(bus); - pci_set_host_bridge_release( - to_pci_host_bridge(bus->bridge), - release_pci_root_info, info); - } else { - resource_list_free(&resources); - teardown_mcfg_map(info); - kfree(info); + memcpy(bus->sysdata, &sd, sizeof(sd)); + } else { + struct pci_root_info *info; + + info = kzalloc_node(sizeof(*info), GFP_KERNEL, node); + if (!info) + dev_err(&root->device->dev, + "pci_bus %04x:%02x: ignored (out of memory)\n", + domain, busnum); + else { + info->sd.domain = domain; + info->sd.node = node; + info->sd.companion = root->device; + bus = acpi_pci_root_create(root, &acpi_pci_root_ops, + &info->common, &info->sd); } } @@ -487,9 +370,6 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) pcie_bus_configure_settings(child); } - if (bus && node != NUMA_NO_NODE) - dev_printk(KERN_DEBUG, &bus->dev, "on NUMA node %d\n", node); - return bus; } diff --git a/arch/x86/platform/efi/efi-bgrt.c b/arch/x86/platform/efi/efi-bgrt.c index d7f997f7c26d..ea48449b2e63 100644 --- a/arch/x86/platform/efi/efi-bgrt.c +++ b/arch/x86/platform/efi/efi-bgrt.c @@ -50,11 +50,16 @@ void __init efi_bgrt_init(void) bgrt_tab->version); return; } - if (bgrt_tab->status != 1) { - pr_err("Ignoring BGRT: invalid status %u (expected 1)\n", + if (bgrt_tab->status & 0xfe) { + pr_err("Ignoring BGRT: reserved status bits are non-zero %u\n", bgrt_tab->status); return; } + if (bgrt_tab->status != 1) { + pr_debug("Ignoring BGRT: invalid status %u (expected 1)\n", + bgrt_tab->status); + return; + } if (bgrt_tab->image_type != 0) { pr_err("Ignoring BGRT: invalid image type %u (expected 0)\n", bgrt_tab->image_type); diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 1db84c0758b7..ad285404ea7f 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -194,7 +194,7 @@ static void __init do_add_efi_memmap(void) int __init efi_memblock_x86_reserve_range(void) { struct efi_info *e = &boot_params.efi_info; - unsigned long pmap; + phys_addr_t pmap; if (efi_enabled(EFI_PARAVIRT)) return 0; @@ -209,7 +209,7 @@ int __init efi_memblock_x86_reserve_range(void) #else pmap = (e->efi_memmap | ((__u64)e->efi_memmap_hi << 32)); #endif - memmap.phys_map = (void *)pmap; + memmap.phys_map = pmap; memmap.nr_map = e->efi_memmap_size / e->efi_memdesc_size; memmap.desc_size = e->efi_memdesc_size; @@ -222,7 +222,7 @@ int __init efi_memblock_x86_reserve_range(void) return 0; } -static void __init print_efi_memmap(void) +void __init efi_print_memmap(void) { #ifdef EFI_DEBUG efi_memory_desc_t *md; @@ -524,7 +524,7 @@ void __init efi_init(void) return; if (efi_enabled(EFI_DBG)) - print_efi_memmap(); + efi_print_memmap(); efi_esrt_init(); } @@ -705,6 +705,70 @@ out: } /* + * Iterate the EFI memory map in reverse order because the regions + * will be mapped top-down. The end result is the same as if we had + * mapped things forward, but doesn't require us to change the + * existing implementation of efi_map_region(). + */ +static inline void *efi_map_next_entry_reverse(void *entry) +{ + /* Initial call */ + if (!entry) + return memmap.map_end - memmap.desc_size; + + entry -= memmap.desc_size; + if (entry < memmap.map) + return NULL; + + return entry; +} + +/* + * efi_map_next_entry - Return the next EFI memory map descriptor + * @entry: Previous EFI memory map descriptor + * + * This is a helper function to iterate over the EFI memory map, which + * we do in different orders depending on the current configuration. + * + * To begin traversing the memory map @entry must be %NULL. + * + * Returns %NULL when we reach the end of the memory map. + */ +static void *efi_map_next_entry(void *entry) +{ + if (!efi_enabled(EFI_OLD_MEMMAP) && efi_enabled(EFI_64BIT)) { + /* + * Starting in UEFI v2.5 the EFI_PROPERTIES_TABLE + * config table feature requires us to map all entries + * in the same order as they appear in the EFI memory + * map. That is to say, entry N must have a lower + * virtual address than entry N+1. This is because the + * firmware toolchain leaves relative references in + * the code/data sections, which are split and become + * separate EFI memory regions. Mapping things + * out-of-order leads to the firmware accessing + * unmapped addresses. + * + * Since we need to map things this way whether or not + * the kernel actually makes use of + * EFI_PROPERTIES_TABLE, let's just switch to this + * scheme by default for 64-bit. + */ + return efi_map_next_entry_reverse(entry); + } + + /* Initial call */ + if (!entry) + return memmap.map; + + entry += memmap.desc_size; + if (entry >= memmap.map_end) + return NULL; + + return entry; +} + +/* * Map the efi memory ranges of the runtime services and update new_mmap with * virtual addresses. */ @@ -714,7 +778,8 @@ static void * __init efi_map_regions(int *count, int *pg_shift) unsigned long left = 0; efi_memory_desc_t *md; - for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { + p = NULL; + while ((p = efi_map_next_entry(p))) { md = p; if (!(md->attribute & EFI_MEMORY_RUNTIME)) { #ifdef CONFIG_X86_64 @@ -952,24 +1017,6 @@ u32 efi_mem_type(unsigned long phys_addr) return 0; } -u64 efi_mem_attributes(unsigned long phys_addr) -{ - efi_memory_desc_t *md; - void *p; - - if (!efi_enabled(EFI_MEMMAP)) - return 0; - - for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { - md = p; - if ((md->phys_addr <= phys_addr) && - (phys_addr < (md->phys_addr + - (md->num_pages << EFI_PAGE_SHIFT)))) - return md->attribute; - } - return 0; -} - static int __init arch_parse_efi_cmdline(char *str) { if (!str) { @@ -979,8 +1026,6 @@ static int __init arch_parse_efi_cmdline(char *str) if (parse_option_str(str, "old_map")) set_bit(EFI_OLD_MEMMAP, &efi.flags); - if (parse_option_str(str, "debug")) - set_bit(EFI_DBG, &efi.flags); return 0; } diff --git a/arch/x86/platform/intel-mid/intel-mid.c b/arch/x86/platform/intel-mid/intel-mid.c index 01d54ea766c1..1bbc21e2e4ae 100644 --- a/arch/x86/platform/intel-mid/intel-mid.c +++ b/arch/x86/platform/intel-mid/intel-mid.c @@ -61,7 +61,7 @@ enum intel_mid_timer_options intel_mid_timer_options; /* intel_mid_ops to store sub arch ops */ -struct intel_mid_ops *intel_mid_ops; +static struct intel_mid_ops *intel_mid_ops; /* getter function for sub arch ops*/ static void *(*get_intel_mid_ops[])(void) = INTEL_MID_OPS_INIT; enum intel_mid_cpu_type __intel_mid_cpu_chip; diff --git a/arch/x86/platform/intel-mid/sfi.c b/arch/x86/platform/intel-mid/sfi.c index ce992e8cc065..5ee360a951ce 100644 --- a/arch/x86/platform/intel-mid/sfi.c +++ b/arch/x86/platform/intel-mid/sfi.c @@ -197,10 +197,9 @@ static int __init sfi_parse_gpio(struct sfi_table_header *table) num = SFI_GET_NUM_ENTRIES(sb, struct sfi_gpio_table_entry); pentry = (struct sfi_gpio_table_entry *)sb->pentry; - gpio_table = kmalloc(num * sizeof(*pentry), GFP_KERNEL); + gpio_table = kmemdup(pentry, num * sizeof(*pentry), GFP_KERNEL); if (!gpio_table) return -1; - memcpy(gpio_table, pentry, num * sizeof(*pentry)); gpio_num_entry = num; pr_debug("GPIO pin info:\n"); diff --git a/arch/x86/platform/uv/uv_nmi.c b/arch/x86/platform/uv/uv_nmi.c index 5c9f63fa6abf..327f21c3bde1 100644 --- a/arch/x86/platform/uv/uv_nmi.c +++ b/arch/x86/platform/uv/uv_nmi.c @@ -376,38 +376,42 @@ static void uv_nmi_wait(int master) atomic_read(&uv_nmi_cpus_in_nmi), num_online_cpus()); } +/* Dump Instruction Pointer header */ static void uv_nmi_dump_cpu_ip_hdr(void) { - printk(KERN_DEFAULT - "\nUV: %4s %6s %-32s %s (Note: PID 0 not listed)\n", + pr_info("\nUV: %4s %6s %-32s %s (Note: PID 0 not listed)\n", "CPU", "PID", "COMMAND", "IP"); } +/* Dump Instruction Pointer info */ static void uv_nmi_dump_cpu_ip(int cpu, struct pt_regs *regs) { - printk(KERN_DEFAULT "UV: %4d %6d %-32.32s ", - cpu, current->pid, current->comm); - + pr_info("UV: %4d %6d %-32.32s ", cpu, current->pid, current->comm); printk_address(regs->ip); } -/* Dump this cpu's state */ +/* + * Dump this CPU's state. If action was set to "kdump" and the crash_kexec + * failed, then we provide "dump" as an alternate action. Action "dump" now + * also includes the show "ips" (instruction pointers) action whereas the + * action "ips" only displays instruction pointers for the non-idle CPU's. + * This is an abbreviated form of the "ps" command. + */ static void uv_nmi_dump_state_cpu(int cpu, struct pt_regs *regs) { const char *dots = " ................................. "; - if (uv_nmi_action_is("ips")) { - if (cpu == 0) - uv_nmi_dump_cpu_ip_hdr(); + if (cpu == 0) + uv_nmi_dump_cpu_ip_hdr(); - if (current->pid != 0) - uv_nmi_dump_cpu_ip(cpu, regs); + if (current->pid != 0 || !uv_nmi_action_is("ips")) + uv_nmi_dump_cpu_ip(cpu, regs); - } else if (uv_nmi_action_is("dump")) { - printk(KERN_DEFAULT - "UV:%sNMI process trace for CPU %d\n", dots, cpu); + if (uv_nmi_action_is("dump")) { + pr_info("UV:%sNMI process trace for CPU %d\n", dots, cpu); show_regs(regs); } + this_cpu_write(uv_cpu_nmi.state, UV_NMI_STATE_DUMP_DONE); } @@ -469,8 +473,7 @@ static void uv_nmi_dump_state(int cpu, struct pt_regs *regs, int master) uv_nmi_trigger_dump(tcpu); } if (ignored) - printk(KERN_DEFAULT "UV: %d CPUs ignored NMI\n", - ignored); + pr_alert("UV: %d CPUs ignored NMI\n", ignored); console_loglevel = saved_console_loglevel; pr_alert("UV: process trace complete\n"); @@ -492,8 +495,9 @@ static void uv_nmi_touch_watchdogs(void) touch_nmi_watchdog(); } -#if defined(CONFIG_KEXEC_CORE) static atomic_t uv_nmi_kexec_failed; + +#if defined(CONFIG_KEXEC_CORE) static void uv_nmi_kdump(int cpu, int master, struct pt_regs *regs) { /* Call crash to dump system state */ @@ -502,10 +506,9 @@ static void uv_nmi_kdump(int cpu, int master, struct pt_regs *regs) crash_kexec(regs); pr_emerg("UV: crash_kexec unexpectedly returned, "); + atomic_set(&uv_nmi_kexec_failed, 1); if (!kexec_crash_image) { pr_cont("crash kernel not loaded\n"); - atomic_set(&uv_nmi_kexec_failed, 1); - uv_nmi_sync_exit(1); return; } pr_cont("kexec busy, stalling cpus while waiting\n"); @@ -514,9 +517,6 @@ static void uv_nmi_kdump(int cpu, int master, struct pt_regs *regs) /* If crash exec fails the slaves should return, otherwise stall */ while (atomic_read(&uv_nmi_kexec_failed) == 0) mdelay(10); - - /* Crash kernel most likely not loaded, return in an orderly fashion */ - uv_nmi_sync_exit(0); } #else /* !CONFIG_KEXEC_CORE */ @@ -524,6 +524,7 @@ static inline void uv_nmi_kdump(int cpu, int master, struct pt_regs *regs) { if (master) pr_err("UV: NMI kdump: KEXEC not supported in this kernel\n"); + atomic_set(&uv_nmi_kexec_failed, 1); } #endif /* !CONFIG_KEXEC_CORE */ @@ -613,9 +614,14 @@ int uv_handle_nmi(unsigned int reason, struct pt_regs *regs) master = (atomic_read(&uv_nmi_cpu) == cpu); /* If NMI action is "kdump", then attempt to do it */ - if (uv_nmi_action_is("kdump")) + if (uv_nmi_action_is("kdump")) { uv_nmi_kdump(cpu, master, regs); + /* Unexpected return, revert action to "dump" */ + if (master) + strncpy(uv_nmi_action, "dump", strlen(uv_nmi_action)); + } + /* Pause as all cpus enter the NMI handler */ uv_nmi_wait(master); @@ -640,6 +646,7 @@ int uv_handle_nmi(unsigned int reason, struct pt_regs *regs) atomic_set(&uv_nmi_cpus_in_nmi, -1); atomic_set(&uv_nmi_cpu, -1); atomic_set(&uv_in_nmi, 0); + atomic_set(&uv_nmi_kexec_failed, 0); } uv_nmi_touch_watchdogs(); diff --git a/arch/x86/ras/Kconfig b/arch/x86/ras/Kconfig index 10fea5fc821e..df280da34825 100644 --- a/arch/x86/ras/Kconfig +++ b/arch/x86/ras/Kconfig @@ -1,11 +1,9 @@ config AMD_MCE_INJ tristate "Simple MCE injection interface for AMD processors" - depends on RAS && EDAC_DECODE_MCE && DEBUG_FS + depends on RAS && EDAC_DECODE_MCE && DEBUG_FS && AMD_NB default n help This is a simple debugfs interface to inject MCEs and test different aspects of the MCE handling code. WARNING: Do not even assume this interface is staying stable! - - diff --git a/arch/x86/ras/mce_amd_inj.c b/arch/x86/ras/mce_amd_inj.c index 17e35b5bf779..55d38cfa46c2 100644 --- a/arch/x86/ras/mce_amd_inj.c +++ b/arch/x86/ras/mce_amd_inj.c @@ -17,7 +17,11 @@ #include <linux/cpu.h> #include <linux/string.h> #include <linux/uaccess.h> +#include <linux/pci.h> + #include <asm/mce.h> +#include <asm/amd_nb.h> +#include <asm/irq_vectors.h> #include "../kernel/cpu/mcheck/mce-internal.h" @@ -30,16 +34,21 @@ static struct dentry *dfs_inj; static u8 n_banks; #define MAX_FLAG_OPT_SIZE 3 +#define NBCFG 0x44 enum injection_type { SW_INJ = 0, /* SW injection, simply decode the error */ HW_INJ, /* Trigger a #MC */ + DFR_INT_INJ, /* Trigger Deferred error interrupt */ + THR_INT_INJ, /* Trigger threshold interrupt */ N_INJ_TYPES, }; static const char * const flags_options[] = { [SW_INJ] = "sw", [HW_INJ] = "hw", + [DFR_INT_INJ] = "df", + [THR_INT_INJ] = "th", NULL }; @@ -129,12 +138,9 @@ static ssize_t flags_write(struct file *filp, const char __user *ubuf, { char buf[MAX_FLAG_OPT_SIZE], *__buf; int err; - size_t ret; if (cnt > MAX_FLAG_OPT_SIZE) - cnt = MAX_FLAG_OPT_SIZE; - - ret = cnt; + return -EINVAL; if (copy_from_user(&buf, ubuf, cnt)) return -EFAULT; @@ -150,9 +156,9 @@ static ssize_t flags_write(struct file *filp, const char __user *ubuf, return err; } - *ppos += ret; + *ppos += cnt; - return ret; + return cnt; } static const struct file_operations flags_fops = { @@ -185,6 +191,55 @@ static void trigger_mce(void *info) asm volatile("int $18"); } +static void trigger_dfr_int(void *info) +{ + asm volatile("int %0" :: "i" (DEFERRED_ERROR_VECTOR)); +} + +static void trigger_thr_int(void *info) +{ + asm volatile("int %0" :: "i" (THRESHOLD_APIC_VECTOR)); +} + +static u32 get_nbc_for_node(int node_id) +{ + struct cpuinfo_x86 *c = &boot_cpu_data; + u32 cores_per_node; + + cores_per_node = c->x86_max_cores / amd_get_nodes_per_socket(); + + return cores_per_node * node_id; +} + +static void toggle_nb_mca_mst_cpu(u16 nid) +{ + struct pci_dev *F3 = node_to_amd_nb(nid)->misc; + u32 val; + int err; + + if (!F3) + return; + + err = pci_read_config_dword(F3, NBCFG, &val); + if (err) { + pr_err("%s: Error reading F%dx%03x.\n", + __func__, PCI_FUNC(F3->devfn), NBCFG); + return; + } + + if (val & BIT(27)) + return; + + pr_err("%s: Set D18F3x44[NbMcaToMstCpuEn] which BIOS hasn't done.\n", + __func__); + + val |= BIT(27); + err = pci_write_config_dword(F3, NBCFG, val); + if (err) + pr_err("%s: Error writing F%dx%03x.\n", + __func__, PCI_FUNC(F3->devfn), NBCFG); +} + static void do_inject(void) { u64 mcg_status = 0; @@ -205,6 +260,26 @@ static void do_inject(void) if (!(i_mce.status & MCI_STATUS_PCC)) mcg_status |= MCG_STATUS_RIPV; + /* + * Ensure necessary status bits for deferred errors: + * - MCx_STATUS[Deferred]: make sure it is a deferred error + * - MCx_STATUS[UC] cleared: deferred errors are _not_ UC + */ + if (inj_type == DFR_INT_INJ) { + i_mce.status |= MCI_STATUS_DEFERRED; + i_mce.status |= (i_mce.status & ~MCI_STATUS_UC); + } + + /* + * For multi node CPUs, logging and reporting of bank 4 errors happens + * only on the node base core. Refer to D18F3x44[NbMcaToMstCpuEn] for + * Fam10h and later BKDGs. + */ + if (static_cpu_has(X86_FEATURE_AMD_DCM) && b == 4) { + toggle_nb_mca_mst_cpu(amd_get_nb_id(cpu)); + cpu = get_nbc_for_node(amd_get_nb_id(cpu)); + } + get_online_cpus(); if (!cpu_online(cpu)) goto err; @@ -225,7 +300,16 @@ static void do_inject(void) toggle_hw_mce_inject(cpu, false); - smp_call_function_single(cpu, trigger_mce, NULL, 0); + switch (inj_type) { + case DFR_INT_INJ: + smp_call_function_single(cpu, trigger_dfr_int, NULL, 0); + break; + case THR_INT_INJ: + smp_call_function_single(cpu, trigger_thr_int, NULL, 0); + break; + default: + smp_call_function_single(cpu, trigger_mce, NULL, 0); + } err: put_online_cpus(); @@ -290,6 +374,11 @@ static const char readme_msg[] = "\t handle the error. Be warned: might cause system panic if MCi_STATUS[PCC] \n" "\t is set. Therefore, consider setting (debugfs_mountpoint)/mce/fake_panic \n" "\t before injecting.\n" +"\t - \"df\": Trigger APIC interrupt for Deferred error. Causes deferred \n" +"\t error APIC interrupt handler to handle the error if the feature is \n" +"\t is present in hardware. \n" +"\t - \"th\": Trigger APIC interrupt for Threshold errors. Causes threshold \n" +"\t APIC interrupt handler to handle the error. \n" "\n"; static ssize_t diff --git a/arch/x86/um/asm/syscall.h b/arch/x86/um/asm/syscall.h index 9fe77b7b5a0e..81d6562ce01d 100644 --- a/arch/x86/um/asm/syscall.h +++ b/arch/x86/um/asm/syscall.h @@ -3,6 +3,10 @@ #include <uapi/linux/audit.h> +typedef asmlinkage long (*sys_call_ptr_t)(unsigned long, unsigned long, + unsigned long, unsigned long, + unsigned long, unsigned long); + static inline int syscall_get_arch(void) { #ifdef CONFIG_X86_32 diff --git a/arch/x86/um/ldt.c b/arch/x86/um/ldt.c index 9701a4fd7bf2..836a1eb5df43 100644 --- a/arch/x86/um/ldt.c +++ b/arch/x86/um/ldt.c @@ -12,7 +12,10 @@ #include <skas.h> #include <sysdep/tls.h> -extern int modify_ldt(int func, void *ptr, unsigned long bytecount); +static inline int modify_ldt (int func, void *ptr, unsigned long bytecount) +{ + return syscall(__NR_modify_ldt, func, ptr, bytecount); +} static long write_ldt_entry(struct mm_id *mm_idp, int func, struct user_desc *desc, void **addr, int done) diff --git a/arch/x86/um/sys_call_table_32.c b/arch/x86/um/sys_call_table_32.c index bd16d6c370ec..439c0994b696 100644 --- a/arch/x86/um/sys_call_table_32.c +++ b/arch/x86/um/sys_call_table_32.c @@ -7,6 +7,7 @@ #include <linux/sys.h> #include <linux/cache.h> #include <generated/user_constants.h> +#include <asm/syscall.h> #define __NO_STUBS @@ -24,15 +25,13 @@ #define old_mmap sys_old_mmap -#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage void sym(void) ; +#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ; #include <asm/syscalls_32.h> #undef __SYSCALL_I386 #define __SYSCALL_I386(nr, sym, compat) [ nr ] = sym, -typedef asmlinkage void (*sys_call_ptr_t)(void); - -extern asmlinkage void sys_ni_syscall(void); +extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); const sys_call_ptr_t sys_call_table[] ____cacheline_aligned = { /* diff --git a/arch/x86/um/sys_call_table_64.c b/arch/x86/um/sys_call_table_64.c index a75d8700472a..b74ea6c2c0e7 100644 --- a/arch/x86/um/sys_call_table_64.c +++ b/arch/x86/um/sys_call_table_64.c @@ -7,6 +7,7 @@ #include <linux/sys.h> #include <linux/cache.h> #include <generated/user_constants.h> +#include <asm/syscall.h> #define __NO_STUBS @@ -37,15 +38,13 @@ #define __SYSCALL_COMMON(nr, sym, compat) __SYSCALL_64(nr, sym, compat) #define __SYSCALL_X32(nr, sym, compat) /* Not supported */ -#define __SYSCALL_64(nr, sym, compat) extern asmlinkage void sym(void) ; +#define __SYSCALL_64(nr, sym, compat) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ; #include <asm/syscalls_64.h> #undef __SYSCALL_64 #define __SYSCALL_64(nr, sym, compat) [ nr ] = sym, -typedef void (*sys_call_ptr_t)(void); - -extern void sys_ni_syscall(void); +extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); const sys_call_ptr_t sys_call_table[] ____cacheline_aligned = { /* diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 30d12afe52ed..5774800ff583 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -33,6 +33,10 @@ #include <linux/memblock.h> #include <linux/edd.h> +#ifdef CONFIG_KEXEC_CORE +#include <linux/kexec.h> +#endif + #include <xen/xen.h> #include <xen/events.h> #include <xen/interface/xen.h> @@ -71,6 +75,7 @@ #include <asm/mwait.h> #include <asm/pci_x86.h> #include <asm/pat.h> +#include <asm/cpu.h> #ifdef CONFIG_ACPI #include <linux/acpi.h> @@ -1077,6 +1082,7 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) /* Fast syscall setup is all done in hypercalls, so these are all ignored. Stub them out here to stop Xen console noise. */ + break; default: if (!pmu_msr_write(msr, low, high, &ret)) @@ -1807,6 +1813,21 @@ static struct notifier_block xen_hvm_cpu_notifier = { .notifier_call = xen_hvm_cpu_notify, }; +#ifdef CONFIG_KEXEC_CORE +static void xen_hvm_shutdown(void) +{ + native_machine_shutdown(); + if (kexec_in_progress) + xen_reboot(SHUTDOWN_soft_reset); +} + +static void xen_hvm_crash_shutdown(struct pt_regs *regs) +{ + native_machine_crash_shutdown(regs); + xen_reboot(SHUTDOWN_soft_reset); +} +#endif + static void __init xen_hvm_guest_init(void) { if (xen_pv_domain()) @@ -1826,6 +1847,10 @@ static void __init xen_hvm_guest_init(void) x86_init.irqs.intr_init = xen_init_IRQ; xen_hvm_init_time_ops(); xen_hvm_init_mmu_ops(); +#ifdef CONFIG_KEXEC_CORE + machine_ops.shutdown = xen_hvm_shutdown; + machine_ops.crash_shutdown = xen_hvm_crash_shutdown; +#endif } #endif @@ -1875,3 +1900,17 @@ const struct hypervisor_x86 x86_hyper_xen = { .set_cpu_features = xen_set_cpu_features, }; EXPORT_SYMBOL(x86_hyper_xen); + +#ifdef CONFIG_HOTPLUG_CPU +void xen_arch_register_cpu(int num) +{ + arch_register_cpu(num); +} +EXPORT_SYMBOL(xen_arch_register_cpu); + +void xen_arch_unregister_cpu(int num) +{ + arch_unregister_cpu(num); +} +EXPORT_SYMBOL(xen_arch_unregister_cpu); +#endif diff --git a/arch/x86/xen/grant-table.c b/arch/x86/xen/grant-table.c index 1580e7a5a4cf..e079500b17f3 100644 --- a/arch/x86/xen/grant-table.c +++ b/arch/x86/xen/grant-table.c @@ -133,7 +133,7 @@ static int __init xlated_setup_gnttab_pages(void) kfree(pages); return -ENOMEM; } - rc = alloc_xenballooned_pages(nr_grant_frames, pages, 0 /* lowmem */); + rc = alloc_xenballooned_pages(nr_grant_frames, pages); if (rc) { pr_warn("%s Couldn't balloon alloc %ld pfns rc:%d\n", __func__, nr_grant_frames, rc); diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 9c479fe40459..ac161db63388 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -2888,6 +2888,7 @@ static int do_remap_gfn(struct vm_area_struct *vma, addr += range; if (err_ptr) err_ptr += batch; + cond_resched(); } out: diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index bfc08b13044b..cab9f766bb06 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -112,6 +112,15 @@ static unsigned long *p2m_identity; static pte_t *p2m_missing_pte; static pte_t *p2m_identity_pte; +/* + * Hint at last populated PFN. + * + * Used to set HYPERVISOR_shared_info->arch.max_pfn so the toolstack + * can avoid scanning the whole P2M (which may be sized to account for + * hotplugged memory). + */ +static unsigned long xen_p2m_last_pfn; + static inline unsigned p2m_top_index(unsigned long pfn) { BUG_ON(pfn >= MAX_P2M_PFN); @@ -270,7 +279,7 @@ void xen_setup_mfn_list_list(void) else HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = virt_to_mfn(p2m_top_mfn); - HYPERVISOR_shared_info->arch.max_pfn = xen_max_p2m_pfn; + HYPERVISOR_shared_info->arch.max_pfn = xen_p2m_last_pfn; HYPERVISOR_shared_info->arch.p2m_generation = 0; HYPERVISOR_shared_info->arch.p2m_vaddr = (unsigned long)xen_p2m_addr; HYPERVISOR_shared_info->arch.p2m_cr3 = @@ -406,6 +415,8 @@ void __init xen_vmalloc_p2m_tree(void) static struct vm_struct vm; unsigned long p2m_limit; + xen_p2m_last_pfn = xen_max_p2m_pfn; + p2m_limit = (phys_addr_t)P2M_LIMIT * 1024 * 1024 * 1024 / PAGE_SIZE; vm.flags = VM_ALLOC; vm.size = ALIGN(sizeof(unsigned long) * max(xen_max_p2m_pfn, p2m_limit), @@ -519,7 +530,7 @@ static pte_t *alloc_p2m_pmd(unsigned long addr, pte_t *pte_pg) * the new pages are installed with cmpxchg; if we lose the race then * simply free the page we allocated and use the one that's there. */ -static bool alloc_p2m(unsigned long pfn) +int xen_alloc_p2m_entry(unsigned long pfn) { unsigned topidx; unsigned long *top_mfn_p, *mid_mfn; @@ -529,6 +540,9 @@ static bool alloc_p2m(unsigned long pfn) unsigned long addr = (unsigned long)(xen_p2m_addr + pfn); unsigned long p2m_pfn; + if (xen_feature(XENFEAT_auto_translated_physmap)) + return 0; + ptep = lookup_address(addr, &level); BUG_ON(!ptep || level != PG_LEVEL_4K); pte_pg = (pte_t *)((unsigned long)ptep & ~(PAGE_SIZE - 1)); @@ -537,7 +551,7 @@ static bool alloc_p2m(unsigned long pfn) /* PMD level is missing, allocate a new one */ ptep = alloc_p2m_pmd(addr, pte_pg); if (!ptep) - return false; + return -ENOMEM; } if (p2m_top_mfn && pfn < MAX_P2M_PFN) { @@ -555,7 +569,7 @@ static bool alloc_p2m(unsigned long pfn) mid_mfn = alloc_p2m_page(); if (!mid_mfn) - return false; + return -ENOMEM; p2m_mid_mfn_init(mid_mfn, p2m_missing); @@ -581,7 +595,7 @@ static bool alloc_p2m(unsigned long pfn) p2m = alloc_p2m_page(); if (!p2m) - return false; + return -ENOMEM; if (p2m_pfn == PFN_DOWN(__pa(p2m_missing))) p2m_init(p2m); @@ -608,8 +622,15 @@ static bool alloc_p2m(unsigned long pfn) free_p2m_page(p2m); } - return true; + /* Expanded the p2m? */ + if (pfn > xen_p2m_last_pfn) { + xen_p2m_last_pfn = pfn; + HYPERVISOR_shared_info->arch.max_pfn = xen_p2m_last_pfn; + } + + return 0; } +EXPORT_SYMBOL(xen_alloc_p2m_entry); unsigned long __init set_phys_range_identity(unsigned long pfn_s, unsigned long pfn_e) @@ -671,7 +692,10 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn) bool set_phys_to_machine(unsigned long pfn, unsigned long mfn) { if (unlikely(!__set_phys_to_machine(pfn, mfn))) { - if (!alloc_p2m(pfn)) + int ret; + + ret = xen_alloc_p2m_entry(pfn); + if (ret < 0) return false; return __set_phys_to_machine(pfn, mfn); diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index f5ef6746d47a..7ab29518a3b9 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -212,7 +212,7 @@ static unsigned long __init xen_find_pfn_range(unsigned long *min_pfn) e_pfn = PFN_DOWN(entry->addr + entry->size); /* We only care about E820 after this */ - if (e_pfn < *min_pfn) + if (e_pfn <= *min_pfn) continue; s_pfn = PFN_UP(entry->addr); @@ -548,7 +548,7 @@ static unsigned long __init xen_get_max_pages(void) { unsigned long max_pages, limit; domid_t domid = DOMID_SELF; - int ret; + long ret; limit = xen_get_pages_limit(); max_pages = limit; @@ -798,7 +798,7 @@ char * __init xen_memory_setup(void) xen_ignore_unusable(); /* Make sure the Xen-supplied memory map is well-ordered. */ - sanitize_e820_map(xen_e820_map, xen_e820_map_entries, + sanitize_e820_map(xen_e820_map, ARRAY_SIZE(xen_e820_map), &xen_e820_map_entries); max_pages = xen_get_max_pages(); @@ -829,6 +829,8 @@ char * __init xen_memory_setup(void) addr = xen_e820_map[0].addr; size = xen_e820_map[0].size; while (i < xen_e820_map_entries) { + bool discard = false; + chunk_size = size; type = xen_e820_map[i].type; @@ -843,10 +845,11 @@ char * __init xen_memory_setup(void) xen_add_extra_mem(pfn_s, n_pfns); xen_max_p2m_pfn = pfn_s + n_pfns; } else - type = E820_UNUSABLE; + discard = true; } - xen_align_and_add_e820_region(addr, chunk_size, type); + if (!discard) + xen_align_and_add_e820_region(addr, chunk_size, type); addr += chunk_size; size -= chunk_size; @@ -965,17 +968,8 @@ char * __init xen_auto_xlated_memory_setup(void) static void __init fiddle_vdso(void) { #ifdef CONFIG_X86_32 - /* - * This could be called before selected_vdso32 is initialized, so - * just fiddle with both possible images. vdso_image_32_syscall - * can't be selected, since it only exists on 64-bit systems. - */ - u32 *mask; - mask = vdso_image_32_int80.data + - vdso_image_32_int80.sym_VDSO32_NOTE_MASK; - *mask |= 1 << VDSO_NOTE_NONEGSEG_BIT; - mask = vdso_image_32_sysenter.data + - vdso_image_32_sysenter.sym_VDSO32_NOTE_MASK; + u32 *mask = vdso_image_32.data + + vdso_image_32.sym_VDSO32_NOTE_MASK; *mask |= 1 << VDSO_NOTE_NONEGSEG_BIT; #endif } diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild index 63c223dff5f1..b56855a1382a 100644 --- a/arch/xtensa/include/asm/Kbuild +++ b/arch/xtensa/include/asm/Kbuild @@ -28,4 +28,5 @@ generic-y += statfs.h generic-y += termios.h generic-y += topology.h generic-y += trace_clock.h +generic-y += word-at-a-time.h generic-y += xor.h diff --git a/arch/xtensa/include/asm/atomic.h b/arch/xtensa/include/asm/atomic.h index 93795d047303..fd8017ce298a 100644 --- a/arch/xtensa/include/asm/atomic.h +++ b/arch/xtensa/include/asm/atomic.h @@ -47,7 +47,7 @@ * * Atomically reads the value of @v. */ -#define atomic_read(v) ACCESS_ONCE((v)->counter) +#define atomic_read(v) READ_ONCE((v)->counter) /** * atomic_set - set atomic variable @@ -56,7 +56,7 @@ * * Atomically sets the value of @v to @i. */ -#define atomic_set(v,i) ((v)->counter = (i)) +#define atomic_set(v,i) WRITE_ONCE((v)->counter, (i)) #if XCHAL_HAVE_S32C1I #define ATOMIC_OP(op) \ diff --git a/arch/xtensa/include/uapi/asm/mman.h b/arch/xtensa/include/uapi/asm/mman.h index 201aec0e0446..360944e1da52 100644 --- a/arch/xtensa/include/uapi/asm/mman.h +++ b/arch/xtensa/include/uapi/asm/mman.h @@ -74,6 +74,12 @@ */ #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ +#define MCL_ONFAULT 4 /* lock all pages that are faulted in */ + +/* + * Flags for mlock + */ +#define MLOCK_ONFAULT 0x01 /* Lock pages in range after they are faulted in, do not prefault */ #define MADV_NORMAL 0 /* no further special treatment */ #define MADV_RANDOM 1 /* expect random page references */ diff --git a/arch/xtensa/kernel/time.c b/arch/xtensa/kernel/time.c index b97767dbc7c8..b9ad9feadc2d 100644 --- a/arch/xtensa/kernel/time.c +++ b/arch/xtensa/kernel/time.c @@ -148,7 +148,7 @@ void __init time_init(void) local_timer_setup(0); setup_irq(this_cpu_ptr(&ccount_timer)->evt.irq, &timer_irqaction); sched_clock_register(ccount_sched_clock_read, 32, ccount_freq); - clocksource_of_init(); + clocksource_probe(); } /* |