diff options
author | Paolo Bonzini <pbonzini@redhat.com> | 2015-08-22 23:57:59 +0200 |
---|---|---|
committer | Paolo Bonzini <pbonzini@redhat.com> | 2015-08-22 23:57:59 +0200 |
commit | e3dbc572fe11a5231568e106fa3dcedd1d1bec0f (patch) | |
tree | 5e15ee35c77e75d51cb550cb3cb0f6ecafa7b508 /arch | |
parent | Merge tag 'kvm-arm-for-4.3' of git://git.kernel.org/pub/scm/linux/kernel/git/... (diff) | |
parent | KVM: PPC: Book3S: correct width in XER handling (diff) | |
download | linux-e3dbc572fe11a5231568e106fa3dcedd1d1bec0f.tar.xz linux-e3dbc572fe11a5231568e106fa3dcedd1d1bec0f.zip |
Merge tag 'signed-kvm-ppc-next' of git://github.com/agraf/linux-2.6 into kvm-queue
Patch queue for ppc - 2015-08-22
Highlights for KVM PPC this time around:
- Book3S: A few bug fixes
- Book3S: Allow micro-threading on POWER8
Diffstat (limited to 'arch')
247 files changed, 3114 insertions, 1926 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index bec6666a3cc4..8a8ea7110de8 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -221,6 +221,10 @@ config ARCH_TASK_STRUCT_ALLOCATOR config ARCH_THREAD_INFO_ALLOCATOR bool +# Select if arch wants to size task_struct dynamically via arch_task_struct_size: +config ARCH_WANTS_DYNAMIC_TASK_STRUCT + bool + config HAVE_REGS_AND_STACK_ACCESS_API bool help diff --git a/arch/alpha/include/asm/Kbuild b/arch/alpha/include/asm/Kbuild index cde23cd03609..ffd9cf5ec8c4 100644 --- a/arch/alpha/include/asm/Kbuild +++ b/arch/alpha/include/asm/Kbuild @@ -5,6 +5,7 @@ generic-y += cputime.h generic-y += exec.h generic-y += irq_work.h generic-y += mcs_spinlock.h +generic-y += mm-arch-hooks.h generic-y += preempt.h generic-y += sections.h generic-y += trace_clock.h diff --git a/arch/alpha/include/asm/mm-arch-hooks.h b/arch/alpha/include/asm/mm-arch-hooks.h deleted file mode 100644 index b07fd862fec3..000000000000 --- a/arch/alpha/include/asm/mm-arch-hooks.h +++ /dev/null @@ -1,15 +0,0 @@ -/* - * Architecture specific mm hooks - * - * Copyright (C) 2015, IBM Corporation - * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#ifndef _ASM_ALPHA_MM_ARCH_HOOKS_H -#define _ASM_ALPHA_MM_ARCH_HOOKS_H - -#endif /* _ASM_ALPHA_MM_ARCH_HOOKS_H */ diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index e7cee0a5c56d..91cf4055acab 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -115,6 +115,7 @@ if ISA_ARCOMPACT config ARC_CPU_750D bool "ARC750D" + select ARC_CANT_LLSC help Support for ARC750 core @@ -362,7 +363,7 @@ config ARC_CANT_LLSC config ARC_HAS_LLSC bool "Insn: LLOCK/SCOND (efficient atomic ops)" default y - depends on !ARC_CPU_750D && !ARC_CANT_LLSC + depends on !ARC_CANT_LLSC config ARC_HAS_SWAPE bool "Insn: SWAPE (endian-swap)" diff --git a/arch/arc/Makefile b/arch/arc/Makefile index 6107062c0111..46d87310220d 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -49,7 +49,8 @@ endif ifndef CONFIG_CC_OPTIMIZE_FOR_SIZE # Generic build system uses -O2, we want -O3 -cflags-y += -O3 +# Note: No need to add to cflags-y as that happens anyways +ARCH_CFLAGS += -O3 endif # small data is default for elf32 tool-chain. If not usable, disable it diff --git a/arch/arc/boot/dts/axc003.dtsi b/arch/arc/boot/dts/axc003.dtsi index 15c8d6226c9d..1cd5e82f5dc2 100644 --- a/arch/arc/boot/dts/axc003.dtsi +++ b/arch/arc/boot/dts/axc003.dtsi @@ -12,7 +12,7 @@ / { compatible = "snps,arc"; - clock-frequency = <75000000>; + clock-frequency = <90000000>; #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arc/boot/dts/axc003_idu.dtsi b/arch/arc/boot/dts/axc003_idu.dtsi index 199d42820eca..2f0b33257db2 100644 --- a/arch/arc/boot/dts/axc003_idu.dtsi +++ b/arch/arc/boot/dts/axc003_idu.dtsi @@ -12,7 +12,7 @@ / { compatible = "snps,arc"; - clock-frequency = <75000000>; + clock-frequency = <90000000>; #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild index 1a80cc91a03b..7611b10a2d23 100644 --- a/arch/arc/include/asm/Kbuild +++ b/arch/arc/include/asm/Kbuild @@ -22,6 +22,7 @@ generic-y += kvm_para.h generic-y += local.h generic-y += local64.h generic-y += mcs_spinlock.h +generic-y += mm-arch-hooks.h generic-y += mman.h generic-y += msgbuf.h generic-y += param.h diff --git a/arch/arc/include/asm/bitops.h b/arch/arc/include/asm/bitops.h index 99fe118d3730..57c1f33844d4 100644 --- a/arch/arc/include/asm/bitops.h +++ b/arch/arc/include/asm/bitops.h @@ -50,8 +50,7 @@ static inline void op##_bit(unsigned long nr, volatile unsigned long *m)\ * done for const @nr, but no code is generated due to gcc \ * const prop. \ */ \ - if (__builtin_constant_p(nr)) \ - nr &= 0x1f; \ + nr &= 0x1f; \ \ __asm__ __volatile__( \ "1: llock %0, [%1] \n" \ @@ -82,8 +81,7 @@ static inline int test_and_##op##_bit(unsigned long nr, volatile unsigned long * \ m += nr >> 5; \ \ - if (__builtin_constant_p(nr)) \ - nr &= 0x1f; \ + nr &= 0x1f; \ \ /* \ * Explicit full memory barrier needed before/after as \ @@ -129,16 +127,13 @@ static inline void op##_bit(unsigned long nr, volatile unsigned long *m)\ unsigned long temp, flags; \ m += nr >> 5; \ \ - if (__builtin_constant_p(nr)) \ - nr &= 0x1f; \ - \ /* \ * spin lock/unlock provide the needed smp_mb() before/after \ */ \ bitops_lock(flags); \ \ temp = *m; \ - *m = temp c_op (1UL << nr); \ + *m = temp c_op (1UL << (nr & 0x1f)); \ \ bitops_unlock(flags); \ } @@ -149,17 +144,14 @@ static inline int test_and_##op##_bit(unsigned long nr, volatile unsigned long * unsigned long old, flags; \ m += nr >> 5; \ \ - if (__builtin_constant_p(nr)) \ - nr &= 0x1f; \ - \ bitops_lock(flags); \ \ old = *m; \ - *m = old c_op (1 << nr); \ + *m = old c_op (1UL << (nr & 0x1f)); \ \ bitops_unlock(flags); \ \ - return (old & (1 << nr)) != 0; \ + return (old & (1UL << (nr & 0x1f))) != 0; \ } #endif /* CONFIG_ARC_HAS_LLSC */ @@ -174,11 +166,8 @@ static inline void __##op##_bit(unsigned long nr, volatile unsigned long *m) \ unsigned long temp; \ m += nr >> 5; \ \ - if (__builtin_constant_p(nr)) \ - nr &= 0x1f; \ - \ temp = *m; \ - *m = temp c_op (1UL << nr); \ + *m = temp c_op (1UL << (nr & 0x1f)); \ } #define __TEST_N_BIT_OP(op, c_op, asm_op) \ @@ -187,13 +176,10 @@ static inline int __test_and_##op##_bit(unsigned long nr, volatile unsigned long unsigned long old; \ m += nr >> 5; \ \ - if (__builtin_constant_p(nr)) \ - nr &= 0x1f; \ - \ old = *m; \ - *m = old c_op (1 << nr); \ + *m = old c_op (1UL << (nr & 0x1f)); \ \ - return (old & (1 << nr)) != 0; \ + return (old & (1UL << (nr & 0x1f))) != 0; \ } #define BIT_OPS(op, c_op, asm_op) \ @@ -224,10 +210,7 @@ test_bit(unsigned int nr, const volatile unsigned long *addr) addr += nr >> 5; - if (__builtin_constant_p(nr)) - nr &= 0x1f; - - mask = 1 << nr; + mask = 1UL << (nr & 0x1f); return ((mask & *addr) != 0); } diff --git a/arch/arc/include/asm/futex.h b/arch/arc/include/asm/futex.h index 05b5aaf5b0f9..70cfe16b742d 100644 --- a/arch/arc/include/asm/futex.h +++ b/arch/arc/include/asm/futex.h @@ -16,12 +16,40 @@ #include <linux/uaccess.h> #include <asm/errno.h> +#ifdef CONFIG_ARC_HAS_LLSC + +#define __futex_atomic_op(insn, ret, oldval, uaddr, oparg)\ + \ + __asm__ __volatile__( \ + "1: llock %1, [%2] \n" \ + insn "\n" \ + "2: scond %0, [%2] \n" \ + " bnz 1b \n" \ + " mov %0, 0 \n" \ + "3: \n" \ + " .section .fixup,\"ax\" \n" \ + " .align 4 \n" \ + "4: mov %0, %4 \n" \ + " b 3b \n" \ + " .previous \n" \ + " .section __ex_table,\"a\" \n" \ + " .align 4 \n" \ + " .word 1b, 4b \n" \ + " .word 2b, 4b \n" \ + " .previous \n" \ + \ + : "=&r" (ret), "=&r" (oldval) \ + : "r" (uaddr), "r" (oparg), "ir" (-EFAULT) \ + : "cc", "memory") + +#else /* !CONFIG_ARC_HAS_LLSC */ + #define __futex_atomic_op(insn, ret, oldval, uaddr, oparg)\ \ __asm__ __volatile__( \ - "1: ld %1, [%2] \n" \ + "1: ld %1, [%2] \n" \ insn "\n" \ - "2: st %0, [%2] \n" \ + "2: st %0, [%2] \n" \ " mov %0, 0 \n" \ "3: \n" \ " .section .fixup,\"ax\" \n" \ @@ -39,6 +67,8 @@ : "r" (uaddr), "r" (oparg), "ir" (-EFAULT) \ : "cc", "memory") +#endif + static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) { int op = (encoded_op >> 28) & 7; @@ -123,11 +153,17 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, pagefault_disable(); - /* TBD : can use llock/scond */ __asm__ __volatile__( - "1: ld %0, [%3] \n" - " brne %0, %1, 3f \n" - "2: st %2, [%3] \n" +#ifdef CONFIG_ARC_HAS_LLSC + "1: llock %0, [%3] \n" + " brne %0, %1, 3f \n" + "2: scond %2, [%3] \n" + " bnz 1b \n" +#else + "1: ld %0, [%3] \n" + " brne %0, %1, 3f \n" + "2: st %2, [%3] \n" +#endif "3: \n" " .section .fixup,\"ax\" \n" "4: mov %0, %4 \n" diff --git a/arch/arc/include/asm/mm-arch-hooks.h b/arch/arc/include/asm/mm-arch-hooks.h deleted file mode 100644 index c37541c5f8ba..000000000000 --- a/arch/arc/include/asm/mm-arch-hooks.h +++ /dev/null @@ -1,15 +0,0 @@ -/* - * Architecture specific mm hooks - * - * Copyright (C) 2015, IBM Corporation - * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#ifndef _ASM_ARC_MM_ARCH_HOOKS_H -#define _ASM_ARC_MM_ARCH_HOOKS_H - -#endif /* _ASM_ARC_MM_ARCH_HOOKS_H */ diff --git a/arch/arc/include/asm/ptrace.h b/arch/arc/include/asm/ptrace.h index 91755972b9a2..91694ec1ce95 100644 --- a/arch/arc/include/asm/ptrace.h +++ b/arch/arc/include/asm/ptrace.h @@ -106,7 +106,7 @@ struct callee_regs { long r25, r24, r23, r22, r21, r20, r19, r18, r17, r16, r15, r14, r13; }; -#define instruction_pointer(regs) ((regs)->ret) +#define instruction_pointer(regs) (unsigned long)((regs)->ret) #define profile_pc(regs) instruction_pointer(regs) /* return 1 if user mode or 0 if kernel mode */ diff --git a/arch/arc/kernel/intc-arcv2.c b/arch/arc/kernel/intc-arcv2.c index 6208c630abed..26c156827479 100644 --- a/arch/arc/kernel/intc-arcv2.c +++ b/arch/arc/kernel/intc-arcv2.c @@ -12,7 +12,6 @@ #include <linux/of.h> #include <linux/irqdomain.h> #include <linux/irqchip.h> -#include "../../drivers/irqchip/irqchip.h" #include <asm/irq.h> /* diff --git a/arch/arc/kernel/intc-compact.c b/arch/arc/kernel/intc-compact.c index fcdddb631766..039fac30b5c1 100644 --- a/arch/arc/kernel/intc-compact.c +++ b/arch/arc/kernel/intc-compact.c @@ -12,7 +12,6 @@ #include <linux/of.h> #include <linux/irqdomain.h> #include <linux/irqchip.h> -#include "../../drivers/irqchip/irqchip.h" #include <asm/irq.h> /* diff --git a/arch/arc/kernel/mcip.c b/arch/arc/kernel/mcip.c index 30284e8de6ff..2fb86589054d 100644 --- a/arch/arc/kernel/mcip.c +++ b/arch/arc/kernel/mcip.c @@ -175,7 +175,6 @@ void mcip_init_early_smp(void) #include <linux/irqchip.h> #include <linux/of.h> #include <linux/of_irq.h> -#include "../../drivers/irqchip/irqchip.h" /* * Set the DEST for @cmn_irq to @cpu_mask (1 bit per core) @@ -218,11 +217,28 @@ static void idu_irq_unmask(struct irq_data *data) raw_spin_unlock_irqrestore(&mcip_lock, flags); } +#ifdef CONFIG_SMP static int -idu_irq_set_affinity(struct irq_data *d, const struct cpumask *cpumask, bool f) +idu_irq_set_affinity(struct irq_data *data, const struct cpumask *cpumask, + bool force) { + unsigned long flags; + cpumask_t online; + + /* errout if no online cpu per @cpumask */ + if (!cpumask_and(&online, cpumask, cpu_online_mask)) + return -EINVAL; + + raw_spin_lock_irqsave(&mcip_lock, flags); + + idu_set_dest(data->hwirq, cpumask_bits(&online)[0]); + idu_set_mode(data->hwirq, IDU_M_TRIG_LEVEL, IDU_M_DISTRI_RR); + + raw_spin_unlock_irqrestore(&mcip_lock, flags); + return IRQ_SET_MASK_OK; } +#endif static struct irq_chip idu_irq_chip = { .name = "MCIP IDU Intc", @@ -330,8 +346,7 @@ idu_of_init(struct device_node *intc, struct device_node *parent) if (!i) idu_first_irq = irq; - irq_set_handler_data(irq, domain); - irq_set_chained_handler(irq, idu_cascade_isr); + irq_set_chained_handler_and_data(irq, idu_cascade_isr, domain); } __mcip_cmd(CMD_IDU_ENABLE, 0); diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index a3d186211ed3..18cc01591c96 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -142,17 +142,22 @@ static void read_arc_build_cfg_regs(void) } static const struct cpuinfo_data arc_cpu_tbl[] = { +#ifdef CONFIG_ISA_ARCOMPACT { {0x20, "ARC 600" }, 0x2F}, { {0x30, "ARC 700" }, 0x33}, { {0x34, "ARC 700 R4.10"}, 0x34}, { {0x35, "ARC 700 R4.11"}, 0x35}, - { {0x50, "ARC HS38" }, 0x51}, +#else + { {0x50, "ARC HS38 R2.0"}, 0x51}, + { {0x52, "ARC HS38 R2.1"}, 0x52}, +#endif { {0x00, NULL } } }; -#define IS_AVAIL1(v, str) ((v) ? str : "") -#define IS_USED(cfg) (IS_ENABLED(cfg) ? "" : "(not used) ") -#define IS_AVAIL2(v, str, cfg) IS_AVAIL1(v, str), IS_AVAIL1(v, IS_USED(cfg)) +#define IS_AVAIL1(v, s) ((v) ? s : "") +#define IS_USED_RUN(v) ((v) ? "" : "(not used) ") +#define IS_USED_CFG(cfg) IS_USED_RUN(IS_ENABLED(cfg)) +#define IS_AVAIL2(v, s, cfg) IS_AVAIL1(v, s), IS_AVAIL1(v, IS_USED_CFG(cfg)) static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len) { @@ -226,7 +231,7 @@ static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len) n += scnprintf(buf + n, len - n, "mpy[opt %d] ", opt); } n += scnprintf(buf + n, len - n, "%s", - IS_USED(CONFIG_ARC_HAS_HW_MPY)); + IS_USED_CFG(CONFIG_ARC_HAS_HW_MPY)); } n += scnprintf(buf + n, len - n, "%s%s%s%s%s%s%s%s\n", diff --git a/arch/arc/kernel/troubleshoot.c b/arch/arc/kernel/troubleshoot.c index 807f7d61d7a7..a6f91e88ce36 100644 --- a/arch/arc/kernel/troubleshoot.c +++ b/arch/arc/kernel/troubleshoot.c @@ -58,7 +58,6 @@ static void show_callee_regs(struct callee_regs *cregs) static void print_task_path_n_nm(struct task_struct *tsk, char *buf) { - struct path path; char *path_nm = NULL; struct mm_struct *mm; struct file *exe_file; diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c index b29d62ed4f7e..1cd6695b6ab5 100644 --- a/arch/arc/mm/cache.c +++ b/arch/arc/mm/cache.c @@ -468,10 +468,18 @@ static void __ic_line_inv_vaddr(unsigned long paddr, unsigned long vaddr, noinline void slc_op(unsigned long paddr, unsigned long sz, const int op) { #ifdef CONFIG_ISA_ARCV2 + /* + * SLC is shared between all cores and concurrent aux operations from + * multiple cores need to be serialized using a spinlock + * A concurrent operation can be silently ignored and/or the old/new + * operation can remain incomplete forever (lockup in SLC_CTRL_BUSY loop + * below) + */ + static DEFINE_SPINLOCK(lock); unsigned long flags; unsigned int ctrl; - local_irq_save(flags); + spin_lock_irqsave(&lock, flags); /* * The Region Flush operation is specified by CTRL.RGN_OP[11..9] @@ -504,7 +512,7 @@ noinline void slc_op(unsigned long paddr, unsigned long sz, const int op) while (read_aux_reg(ARC_REG_SLC_CTRL) & SLC_CTRL_BUSY); - local_irq_restore(flags); + spin_unlock_irqrestore(&lock, flags); #endif } diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c index 74a637a1cfc4..57706a9c6948 100644 --- a/arch/arc/mm/dma.c +++ b/arch/arc/mm/dma.c @@ -60,8 +60,8 @@ void *dma_alloc_coherent(struct device *dev, size_t size, /* This is kernel Virtual address (0x7000_0000 based) */ kvaddr = ioremap_nocache((unsigned long)paddr, size); - if (kvaddr != NULL) - memset(kvaddr, 0, size); + if (kvaddr == NULL) + return NULL; /* This is bus address, platform dependent */ *dma_handle = (dma_addr_t)paddr; diff --git a/arch/arm/boot/dts/am335x-pepper.dts b/arch/arm/boot/dts/am335x-pepper.dts index 0d35ab64641c..7106114c7464 100644 --- a/arch/arm/boot/dts/am335x-pepper.dts +++ b/arch/arm/boot/dts/am335x-pepper.dts @@ -74,6 +74,7 @@ audio_codec: tlv320aic3106@1b { compatible = "ti,tlv320aic3106"; reg = <0x1b>; + ai3x-micbias-vg = <0x2>; }; accel: lis331dlh@1d { @@ -153,7 +154,7 @@ ti,audio-routing = "Headphone Jack", "HPLOUT", "Headphone Jack", "HPROUT", - "LINE1L", "Line In"; + "MIC3L", "Mic3L Switch"; }; &mcasp0 { @@ -438,41 +439,50 @@ regulators { dcdc1_reg: regulator@0 { /* VDD_1V8 system supply */ + regulator-always-on; }; dcdc2_reg: regulator@1 { /* VDD_CORE voltage limits 0.95V - 1.26V with +/-4% tolerance */ regulator-name = "vdd_core"; regulator-min-microvolt = <925000>; - regulator-max-microvolt = <1325000>; + regulator-max-microvolt = <1150000>; regulator-boot-on; + regulator-always-on; }; dcdc3_reg: regulator@2 { /* VDD_MPU voltage limits 0.95V - 1.1V with +/-4% tolerance */ regulator-name = "vdd_mpu"; regulator-min-microvolt = <925000>; - regulator-max-microvolt = <1150000>; + regulator-max-microvolt = <1325000>; regulator-boot-on; + regulator-always-on; }; ldo1_reg: regulator@3 { /* VRTC 1.8V always-on supply */ + regulator-name = "vrtc,vdds"; regulator-always-on; }; ldo2_reg: regulator@4 { /* 3.3V rail */ + regulator-name = "vdd_3v3aux"; + regulator-always-on; }; ldo3_reg: regulator@5 { /* VDD_3V3A 3.3V rail */ + regulator-name = "vdd_3v3a"; regulator-min-microvolt = <3300000>; regulator-max-microvolt = <3300000>; }; ldo4_reg: regulator@6 { /* VDD_3V3B 3.3V rail */ + regulator-name = "vdd_3v3b"; + regulator-always-on; }; }; }; diff --git a/arch/arm/boot/dts/cros-ec-keyboard.dtsi b/arch/arm/boot/dts/cros-ec-keyboard.dtsi index 9c7fb0acae79..4e42f30cb318 100644 --- a/arch/arm/boot/dts/cros-ec-keyboard.dtsi +++ b/arch/arm/boot/dts/cros-ec-keyboard.dtsi @@ -22,6 +22,7 @@ MATRIX_KEY(0x00, 0x02, KEY_F1) MATRIX_KEY(0x00, 0x03, KEY_B) MATRIX_KEY(0x00, 0x04, KEY_F10) + MATRIX_KEY(0x00, 0x05, KEY_RO) MATRIX_KEY(0x00, 0x06, KEY_N) MATRIX_KEY(0x00, 0x08, KEY_EQUAL) MATRIX_KEY(0x00, 0x0a, KEY_RIGHTALT) @@ -34,6 +35,7 @@ MATRIX_KEY(0x01, 0x08, KEY_APOSTROPHE) MATRIX_KEY(0x01, 0x09, KEY_F9) MATRIX_KEY(0x01, 0x0b, KEY_BACKSPACE) + MATRIX_KEY(0x01, 0x0c, KEY_HENKAN) MATRIX_KEY(0x02, 0x00, KEY_LEFTCTRL) MATRIX_KEY(0x02, 0x01, KEY_TAB) @@ -45,6 +47,7 @@ MATRIX_KEY(0x02, 0x07, KEY_102ND) MATRIX_KEY(0x02, 0x08, KEY_LEFTBRACE) MATRIX_KEY(0x02, 0x09, KEY_F8) + MATRIX_KEY(0x02, 0x0a, KEY_YEN) MATRIX_KEY(0x03, 0x01, KEY_GRAVE) MATRIX_KEY(0x03, 0x02, KEY_F2) @@ -53,6 +56,7 @@ MATRIX_KEY(0x03, 0x06, KEY_6) MATRIX_KEY(0x03, 0x08, KEY_MINUS) MATRIX_KEY(0x03, 0x0b, KEY_BACKSLASH) + MATRIX_KEY(0x03, 0x0c, KEY_MUHENKAN) MATRIX_KEY(0x04, 0x00, KEY_RIGHTCTRL) MATRIX_KEY(0x04, 0x01, KEY_A) diff --git a/arch/arm/boot/dts/dra7-evm.dts b/arch/arm/boot/dts/dra7-evm.dts index aa465904f6cc..096f68be99e2 100644 --- a/arch/arm/boot/dts/dra7-evm.dts +++ b/arch/arm/boot/dts/dra7-evm.dts @@ -686,7 +686,8 @@ &dcan1 { status = "ok"; - pinctrl-names = "default", "sleep"; - pinctrl-0 = <&dcan1_pins_default>; + pinctrl-names = "default", "sleep", "active"; + pinctrl-0 = <&dcan1_pins_sleep>; pinctrl-1 = <&dcan1_pins_sleep>; + pinctrl-2 = <&dcan1_pins_default>; }; diff --git a/arch/arm/boot/dts/dra72-evm.dts b/arch/arm/boot/dts/dra72-evm.dts index 4e1b60581782..803738414086 100644 --- a/arch/arm/boot/dts/dra72-evm.dts +++ b/arch/arm/boot/dts/dra72-evm.dts @@ -587,9 +587,10 @@ &dcan1 { status = "ok"; - pinctrl-names = "default", "sleep"; - pinctrl-0 = <&dcan1_pins_default>; + pinctrl-names = "default", "sleep", "active"; + pinctrl-0 = <&dcan1_pins_sleep>; pinctrl-1 = <&dcan1_pins_sleep>; + pinctrl-2 = <&dcan1_pins_default>; }; &qspi { diff --git a/arch/arm/boot/dts/imx23.dtsi b/arch/arm/boot/dts/imx23.dtsi index c892d58e8dad..b995333ea22b 100644 --- a/arch/arm/boot/dts/imx23.dtsi +++ b/arch/arm/boot/dts/imx23.dtsi @@ -468,6 +468,7 @@ interrupts = <36 37 38 39 40 41 42 43 44>; status = "disabled"; clocks = <&clks 26>; + #io-channel-cells = <1>; }; spdif@80054000 { diff --git a/arch/arm/boot/dts/imx27.dtsi b/arch/arm/boot/dts/imx27.dtsi index bc215e4b75fd..b69be5c499cf 100644 --- a/arch/arm/boot/dts/imx27.dtsi +++ b/arch/arm/boot/dts/imx27.dtsi @@ -108,7 +108,7 @@ }; gpt1: timer@10003000 { - compatible = "fsl,imx27-gpt", "fsl,imx1-gpt"; + compatible = "fsl,imx27-gpt", "fsl,imx21-gpt"; reg = <0x10003000 0x1000>; interrupts = <26>; clocks = <&clks IMX27_CLK_GPT1_IPG_GATE>, @@ -117,7 +117,7 @@ }; gpt2: timer@10004000 { - compatible = "fsl,imx27-gpt", "fsl,imx1-gpt"; + compatible = "fsl,imx27-gpt", "fsl,imx21-gpt"; reg = <0x10004000 0x1000>; interrupts = <25>; clocks = <&clks IMX27_CLK_GPT2_IPG_GATE>, @@ -126,7 +126,7 @@ }; gpt3: timer@10005000 { - compatible = "fsl,imx27-gpt", "fsl,imx1-gpt"; + compatible = "fsl,imx27-gpt", "fsl,imx21-gpt"; reg = <0x10005000 0x1000>; interrupts = <24>; clocks = <&clks IMX27_CLK_GPT3_IPG_GATE>, @@ -376,7 +376,7 @@ }; gpt4: timer@10019000 { - compatible = "fsl,imx27-gpt", "fsl,imx1-gpt"; + compatible = "fsl,imx27-gpt", "fsl,imx21-gpt"; reg = <0x10019000 0x1000>; interrupts = <4>; clocks = <&clks IMX27_CLK_GPT4_IPG_GATE>, @@ -385,7 +385,7 @@ }; gpt5: timer@1001a000 { - compatible = "fsl,imx27-gpt", "fsl,imx1-gpt"; + compatible = "fsl,imx27-gpt", "fsl,imx21-gpt"; reg = <0x1001a000 0x1000>; interrupts = <3>; clocks = <&clks IMX27_CLK_GPT5_IPG_GATE>, @@ -436,7 +436,7 @@ }; gpt6: timer@1001f000 { - compatible = "fsl,imx27-gpt", "fsl,imx1-gpt"; + compatible = "fsl,imx27-gpt", "fsl,imx21-gpt"; reg = <0x1001f000 0x1000>; interrupts = <2>; clocks = <&clks IMX27_CLK_GPT6_IPG_GATE>, diff --git a/arch/arm/boot/dts/imx53-qsb-common.dtsi b/arch/arm/boot/dts/imx53-qsb-common.dtsi index 181ae5ebf23f..ab4ba39f2ed9 100644 --- a/arch/arm/boot/dts/imx53-qsb-common.dtsi +++ b/arch/arm/boot/dts/imx53-qsb-common.dtsi @@ -295,9 +295,10 @@ &tve { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_vga_sync>; + ddc-i2c-bus = <&i2c2>; fsl,tve-mode = "vga"; - fsl,hsync-pin = <4>; - fsl,vsync-pin = <6>; + fsl,hsync-pin = <7>; /* IPU DI1 PIN7 via EIM_OE */ + fsl,vsync-pin = <8>; /* IPU DI1 PIN8 via EIM_RW */ status = "okay"; }; diff --git a/arch/arm/boot/dts/k2e.dtsi b/arch/arm/boot/dts/k2e.dtsi index 50e555eab50d..1b6494fbdb91 100644 --- a/arch/arm/boot/dts/k2e.dtsi +++ b/arch/arm/boot/dts/k2e.dtsi @@ -86,7 +86,7 @@ gpio,syscon-dev = <&devctrl 0x240>; }; - pcie@21020000 { + pcie1: pcie@21020000 { compatible = "ti,keystone-pcie","snps,dw-pcie"; clocks = <&clkpcie1>; clock-names = "pcie"; @@ -96,6 +96,7 @@ ranges = <0x81000000 0 0 0x23260000 0x4000 0x4000 0x82000000 0 0x60000000 0x60000000 0 0x10000000>; + status = "disabled"; device_type = "pci"; num-lanes = <2>; diff --git a/arch/arm/boot/dts/keystone.dtsi b/arch/arm/boot/dts/keystone.dtsi index c06542b2c954..e7a6f6deabb6 100644 --- a/arch/arm/boot/dts/keystone.dtsi +++ b/arch/arm/boot/dts/keystone.dtsi @@ -286,7 +286,7 @@ ti,syscon-dev = <&devctrl 0x2a0>; }; - pcie@21800000 { + pcie0: pcie@21800000 { compatible = "ti,keystone-pcie", "snps,dw-pcie"; clocks = <&clkpcie>; clock-names = "pcie"; @@ -296,6 +296,7 @@ ranges = <0x81000000 0 0 0x23250000 0 0x4000 0x82000000 0 0x50000000 0x50000000 0 0x10000000>; + status = "disabled"; device_type = "pci"; num-lanes = <2>; diff --git a/arch/arm/boot/dts/omap3-overo-common-lcd35.dtsi b/arch/arm/boot/dts/omap3-overo-common-lcd35.dtsi index 233c69e50ae3..df8908adb0cb 100644 --- a/arch/arm/boot/dts/omap3-overo-common-lcd35.dtsi +++ b/arch/arm/boot/dts/omap3-overo-common-lcd35.dtsi @@ -120,7 +120,7 @@ lcd0: display@0 { compatible = "lgphilips,lb035q02"; - label = "lcd"; + label = "lcd35"; reg = <1>; /* CS1 */ spi-max-frequency = <10000000>; diff --git a/arch/arm/boot/dts/omap3-overo-common-lcd43.dtsi b/arch/arm/boot/dts/omap3-overo-common-lcd43.dtsi index f5395b7da912..048fd216970a 100644 --- a/arch/arm/boot/dts/omap3-overo-common-lcd43.dtsi +++ b/arch/arm/boot/dts/omap3-overo-common-lcd43.dtsi @@ -98,7 +98,7 @@ lcd0: display@0 { compatible = "samsung,lte430wq-f0c", "panel-dpi"; - label = "lcd"; + label = "lcd43"; pinctrl-names = "default"; pinctrl-0 = <<e430_pins>; diff --git a/arch/arm/boot/dts/omap4.dtsi b/arch/arm/boot/dts/omap4.dtsi index f884d6adb71e..7d31c6ff246f 100644 --- a/arch/arm/boot/dts/omap4.dtsi +++ b/arch/arm/boot/dts/omap4.dtsi @@ -551,6 +551,7 @@ reg = <0x4a066000 0x100>; interrupts = <GIC_SPI 28 IRQ_TYPE_LEVEL_HIGH>; ti,hwmods = "mmu_dsp"; + #iommu-cells = <0>; }; mmu_ipu: mmu@55082000 { @@ -558,6 +559,7 @@ reg = <0x55082000 0x100>; interrupts = <GIC_SPI 100 IRQ_TYPE_LEVEL_HIGH>; ti,hwmods = "mmu_ipu"; + #iommu-cells = <0>; ti,iommu-bus-err-back; }; diff --git a/arch/arm/boot/dts/omap5.dtsi b/arch/arm/boot/dts/omap5.dtsi index 7d24ae0306b5..c8fd648a7108 100644 --- a/arch/arm/boot/dts/omap5.dtsi +++ b/arch/arm/boot/dts/omap5.dtsi @@ -612,6 +612,7 @@ reg = <0x4a066000 0x100>; interrupts = <GIC_SPI 28 IRQ_TYPE_LEVEL_HIGH>; ti,hwmods = "mmu_dsp"; + #iommu-cells = <0>; }; mmu_ipu: mmu@55082000 { @@ -619,6 +620,7 @@ reg = <0x55082000 0x100>; interrupts = <GIC_SPI 100 IRQ_TYPE_LEVEL_HIGH>; ti,hwmods = "mmu_ipu"; + #iommu-cells = <0>; ti,iommu-bus-err-back; }; diff --git a/arch/arm/boot/dts/socfpga_cyclone5_sockit.dts b/arch/arm/boot/dts/socfpga_cyclone5_sockit.dts index 71468a7eb28f..5e17fd147728 100644 --- a/arch/arm/boot/dts/socfpga_cyclone5_sockit.dts +++ b/arch/arm/boot/dts/socfpga_cyclone5_sockit.dts @@ -60,27 +60,27 @@ rxc-skew-ps = <2000>; }; -&mmc0 { - vmmc-supply = <®ulator_3_3v>; - vqmmc-supply = <®ulator_3_3v>; -}; - -&usb1 { - status = "okay"; -}; - &gpio2 { status = "okay"; }; -&i2c1{ +&i2c1 { status = "okay"; - accel1: accel1@53{ - compatible = "adxl34x"; + accel1: accelerometer@53 { + compatible = "adi,adxl345"; reg = <0x53>; - interrupt-parent = < &portc >; + interrupt-parent = <&portc>; interrupts = <3 2>; }; }; + +&mmc0 { + vmmc-supply = <®ulator_3_3v>; + vqmmc-supply = <®ulator_3_3v>; +}; + +&usb1 { + status = "okay"; +}; diff --git a/arch/arm/boot/dts/spear1310-evb.dts b/arch/arm/boot/dts/spear1310-evb.dts index d42c84b1df8d..e48857249ce7 100644 --- a/arch/arm/boot/dts/spear1310-evb.dts +++ b/arch/arm/boot/dts/spear1310-evb.dts @@ -1,7 +1,7 @@ /* * DTS file for SPEAr1310 Evaluation Baord * - * Copyright 2012 Viresh Kumar <viresh.linux@gmail.com> + * Copyright 2012 Viresh Kumar <vireshk@kernel.org> * * The code contained herein is licensed under the GNU General Public * License. You may obtain a copy of the GNU General Public License diff --git a/arch/arm/boot/dts/spear1310.dtsi b/arch/arm/boot/dts/spear1310.dtsi index 9d342920695a..54bc6d3cf290 100644 --- a/arch/arm/boot/dts/spear1310.dtsi +++ b/arch/arm/boot/dts/spear1310.dtsi @@ -1,7 +1,7 @@ /* * DTS file for all SPEAr1310 SoCs * - * Copyright 2012 Viresh Kumar <viresh.linux@gmail.com> + * Copyright 2012 Viresh Kumar <vireshk@kernel.org> * * The code contained herein is licensed under the GNU General Public * License. You may obtain a copy of the GNU General Public License diff --git a/arch/arm/boot/dts/spear1340-evb.dts b/arch/arm/boot/dts/spear1340-evb.dts index b23e05ed1d60..c611f5606dfe 100644 --- a/arch/arm/boot/dts/spear1340-evb.dts +++ b/arch/arm/boot/dts/spear1340-evb.dts @@ -1,7 +1,7 @@ /* * DTS file for SPEAr1340 Evaluation Baord * - * Copyright 2012 Viresh Kumar <viresh.linux@gmail.com> + * Copyright 2012 Viresh Kumar <vireshk@kernel.org> * * The code contained herein is licensed under the GNU General Public * License. You may obtain a copy of the GNU General Public License diff --git a/arch/arm/boot/dts/spear1340.dtsi b/arch/arm/boot/dts/spear1340.dtsi index 13e1aa33daa2..df2232d767ed 100644 --- a/arch/arm/boot/dts/spear1340.dtsi +++ b/arch/arm/boot/dts/spear1340.dtsi @@ -1,7 +1,7 @@ /* * DTS file for all SPEAr1340 SoCs * - * Copyright 2012 Viresh Kumar <viresh.linux@gmail.com> + * Copyright 2012 Viresh Kumar <vireshk@kernel.org> * * The code contained herein is licensed under the GNU General Public * License. You may obtain a copy of the GNU General Public License diff --git a/arch/arm/boot/dts/spear13xx.dtsi b/arch/arm/boot/dts/spear13xx.dtsi index 40accc87e3a2..14594ce8c18a 100644 --- a/arch/arm/boot/dts/spear13xx.dtsi +++ b/arch/arm/boot/dts/spear13xx.dtsi @@ -1,7 +1,7 @@ /* * DTS file for all SPEAr13xx SoCs * - * Copyright 2012 Viresh Kumar <viresh.linux@gmail.com> + * Copyright 2012 Viresh Kumar <vireshk@kernel.org> * * The code contained herein is licensed under the GNU General Public * License. You may obtain a copy of the GNU General Public License diff --git a/arch/arm/boot/dts/spear300-evb.dts b/arch/arm/boot/dts/spear300-evb.dts index 5de1431653e4..e859e8288bcd 100644 --- a/arch/arm/boot/dts/spear300-evb.dts +++ b/arch/arm/boot/dts/spear300-evb.dts @@ -1,7 +1,7 @@ /* * DTS file for SPEAr300 Evaluation Baord * - * Copyright 2012 Viresh Kumar <viresh.linux@gmail.com> + * Copyright 2012 Viresh Kumar <vireshk@kernel.org> * * The code contained herein is licensed under the GNU General Public * License. You may obtain a copy of the GNU General Public License diff --git a/arch/arm/boot/dts/spear300.dtsi b/arch/arm/boot/dts/spear300.dtsi index f79b3dfaabe6..f4e92e599729 100644 --- a/arch/arm/boot/dts/spear300.dtsi +++ b/arch/arm/boot/dts/spear300.dtsi @@ -1,7 +1,7 @@ /* * DTS file for SPEAr300 SoC * - * Copyright 2012 Viresh Kumar <viresh.linux@gmail.com> + * Copyright 2012 Viresh Kumar <vireshk@kernel.org> * * The code contained herein is licensed under the GNU General Public * License. You may obtain a copy of the GNU General Public License diff --git a/arch/arm/boot/dts/spear310-evb.dts b/arch/arm/boot/dts/spear310-evb.dts index b09632963d15..070f2c1b7851 100644 --- a/arch/arm/boot/dts/spear310-evb.dts +++ b/arch/arm/boot/dts/spear310-evb.dts @@ -1,7 +1,7 @@ /* * DTS file for SPEAr310 Evaluation Baord * - * Copyright 2012 Viresh Kumar <viresh.linux@gmail.com> + * Copyright 2012 Viresh Kumar <vireshk@kernel.org> * * The code contained herein is licensed under the GNU General Public * License. You may obtain a copy of the GNU General Public License diff --git a/arch/arm/boot/dts/spear310.dtsi b/arch/arm/boot/dts/spear310.dtsi index 95372080eea6..da210b454753 100644 --- a/arch/arm/boot/dts/spear310.dtsi +++ b/arch/arm/boot/dts/spear310.dtsi @@ -1,7 +1,7 @@ /* * DTS file for SPEAr310 SoC * - * Copyright 2012 Viresh Kumar <viresh.linux@gmail.com> + * Copyright 2012 Viresh Kumar <vireshk@kernel.org> * * The code contained herein is licensed under the GNU General Public * License. You may obtain a copy of the GNU General Public License diff --git a/arch/arm/boot/dts/spear320-evb.dts b/arch/arm/boot/dts/spear320-evb.dts index fdedbb514102..1b1034477923 100644 --- a/arch/arm/boot/dts/spear320-evb.dts +++ b/arch/arm/boot/dts/spear320-evb.dts @@ -1,7 +1,7 @@ /* * DTS file for SPEAr320 Evaluation Baord * - * Copyright 2012 Viresh Kumar <viresh.linux@gmail.com> + * Copyright 2012 Viresh Kumar <vireshk@kernel.org> * * The code contained herein is licensed under the GNU General Public * License. You may obtain a copy of the GNU General Public License diff --git a/arch/arm/boot/dts/spear320.dtsi b/arch/arm/boot/dts/spear320.dtsi index ffea342aeec9..22be6e5edaac 100644 --- a/arch/arm/boot/dts/spear320.dtsi +++ b/arch/arm/boot/dts/spear320.dtsi @@ -1,7 +1,7 @@ /* * DTS file for SPEAr320 SoC * - * Copyright 2012 Viresh Kumar <viresh.linux@gmail.com> + * Copyright 2012 Viresh Kumar <vireshk@kernel.org> * * The code contained herein is licensed under the GNU General Public * License. You may obtain a copy of the GNU General Public License diff --git a/arch/arm/boot/dts/spear3xx.dtsi b/arch/arm/boot/dts/spear3xx.dtsi index f0e3fcf8e323..118135d75899 100644 --- a/arch/arm/boot/dts/spear3xx.dtsi +++ b/arch/arm/boot/dts/spear3xx.dtsi @@ -1,7 +1,7 @@ /* * DTS file for all SPEAr3xx SoCs * - * Copyright 2012 Viresh Kumar <viresh.linux@gmail.com> + * Copyright 2012 Viresh Kumar <vireshk@kernel.org> * * The code contained herein is licensed under the GNU General Public * License. You may obtain a copy of the GNU General Public License diff --git a/arch/arm/boot/dts/ste-ccu8540.dts b/arch/arm/boot/dts/ste-ccu8540.dts index 32dd55e5f4e6..6eaaf638e52e 100644 --- a/arch/arm/boot/dts/ste-ccu8540.dts +++ b/arch/arm/boot/dts/ste-ccu8540.dts @@ -17,6 +17,13 @@ model = "ST-Ericsson U8540 platform with Device Tree"; compatible = "st-ericsson,ccu8540", "st-ericsson,u8540"; + /* This stablilizes the serial port enumeration */ + aliases { + serial0 = &ux500_serial0; + serial1 = &ux500_serial1; + serial2 = &ux500_serial2; + }; + memory@0 { device_type = "memory"; reg = <0x20000000 0x1f000000>, <0xc0000000 0x3f000000>; diff --git a/arch/arm/boot/dts/ste-ccu9540.dts b/arch/arm/boot/dts/ste-ccu9540.dts index 651c56d400a4..c8b815819cfe 100644 --- a/arch/arm/boot/dts/ste-ccu9540.dts +++ b/arch/arm/boot/dts/ste-ccu9540.dts @@ -16,6 +16,13 @@ model = "ST-Ericsson CCU9540 platform with Device Tree"; compatible = "st-ericsson,ccu9540", "st-ericsson,u9540"; + /* This stablilizes the serial port enumeration */ + aliases { + serial0 = &ux500_serial0; + serial1 = &ux500_serial1; + serial2 = &ux500_serial2; + }; + memory { reg = <0x00000000 0x20000000>; }; diff --git a/arch/arm/boot/dts/ste-dbx5x0.dtsi b/arch/arm/boot/dts/ste-dbx5x0.dtsi index 853684ad7773..a75f3289e653 100644 --- a/arch/arm/boot/dts/ste-dbx5x0.dtsi +++ b/arch/arm/boot/dts/ste-dbx5x0.dtsi @@ -971,7 +971,7 @@ power-domains = <&pm_domains DOMAIN_VAPE>; }; - uart@80120000 { + ux500_serial0: uart@80120000 { compatible = "arm,pl011", "arm,primecell"; reg = <0x80120000 0x1000>; interrupts = <0 11 IRQ_TYPE_LEVEL_HIGH>; @@ -986,7 +986,7 @@ status = "disabled"; }; - uart@80121000 { + ux500_serial1: uart@80121000 { compatible = "arm,pl011", "arm,primecell"; reg = <0x80121000 0x1000>; interrupts = <0 19 IRQ_TYPE_LEVEL_HIGH>; @@ -1001,7 +1001,7 @@ status = "disabled"; }; - uart@80007000 { + ux500_serial2: uart@80007000 { compatible = "arm,pl011", "arm,primecell"; reg = <0x80007000 0x1000>; interrupts = <0 26 IRQ_TYPE_LEVEL_HIGH>; diff --git a/arch/arm/boot/dts/ste-href.dtsi b/arch/arm/boot/dts/ste-href.dtsi index 744c1e3a744d..6d8ce154347e 100644 --- a/arch/arm/boot/dts/ste-href.dtsi +++ b/arch/arm/boot/dts/ste-href.dtsi @@ -32,11 +32,11 @@ status = "okay"; }; + /* This UART is unused and thus left disabled */ uart@80121000 { pinctrl-names = "default", "sleep"; pinctrl-0 = <&uart1_default_mode>; pinctrl-1 = <&uart1_sleep_mode>; - status = "okay"; }; uart@80007000 { diff --git a/arch/arm/boot/dts/ste-hrefprev60-stuib.dts b/arch/arm/boot/dts/ste-hrefprev60-stuib.dts index 2b1cb5b584b6..18e9795a94f9 100644 --- a/arch/arm/boot/dts/ste-hrefprev60-stuib.dts +++ b/arch/arm/boot/dts/ste-hrefprev60-stuib.dts @@ -17,6 +17,13 @@ model = "ST-Ericsson HREF (pre-v60) and ST UIB"; compatible = "st-ericsson,mop500", "st-ericsson,u8500"; + /* This stablilizes the serial port enumeration */ + aliases { + serial0 = &ux500_serial0; + serial1 = &ux500_serial1; + serial2 = &ux500_serial2; + }; + soc { /* Reset line for the BU21013 touchscreen */ i2c@80110000 { diff --git a/arch/arm/boot/dts/ste-hrefprev60-tvk.dts b/arch/arm/boot/dts/ste-hrefprev60-tvk.dts index 59523f866812..24739914e689 100644 --- a/arch/arm/boot/dts/ste-hrefprev60-tvk.dts +++ b/arch/arm/boot/dts/ste-hrefprev60-tvk.dts @@ -16,4 +16,11 @@ / { model = "ST-Ericsson HREF (pre-v60) and TVK1281618 UIB"; compatible = "st-ericsson,mop500", "st-ericsson,u8500"; + + /* This stablilizes the serial port enumeration */ + aliases { + serial0 = &ux500_serial0; + serial1 = &ux500_serial1; + serial2 = &ux500_serial2; + }; }; diff --git a/arch/arm/boot/dts/ste-hrefprev60.dtsi b/arch/arm/boot/dts/ste-hrefprev60.dtsi index 7f3975b58d16..b0278f4c486c 100644 --- a/arch/arm/boot/dts/ste-hrefprev60.dtsi +++ b/arch/arm/boot/dts/ste-hrefprev60.dtsi @@ -23,6 +23,11 @@ }; soc { + /* Enable UART1 on this board */ + uart@80121000 { + status = "okay"; + }; + i2c@80004000 { tps61052@33 { compatible = "tps61052"; diff --git a/arch/arm/boot/dts/ste-hrefv60plus-stuib.dts b/arch/arm/boot/dts/ste-hrefv60plus-stuib.dts index 8c6a2de56cf1..c2e1ba019a2f 100644 --- a/arch/arm/boot/dts/ste-hrefv60plus-stuib.dts +++ b/arch/arm/boot/dts/ste-hrefv60plus-stuib.dts @@ -19,6 +19,13 @@ model = "ST-Ericsson HREF (v60+) and ST UIB"; compatible = "st-ericsson,hrefv60+", "st-ericsson,u8500"; + /* This stablilizes the serial port enumeration */ + aliases { + serial0 = &ux500_serial0; + serial1 = &ux500_serial1; + serial2 = &ux500_serial2; + }; + soc { /* Reset line for the BU21013 touchscreen */ i2c@80110000 { diff --git a/arch/arm/boot/dts/ste-hrefv60plus-tvk.dts b/arch/arm/boot/dts/ste-hrefv60plus-tvk.dts index d53cccdce776..ebd8547e98f1 100644 --- a/arch/arm/boot/dts/ste-hrefv60plus-tvk.dts +++ b/arch/arm/boot/dts/ste-hrefv60plus-tvk.dts @@ -18,4 +18,11 @@ / { model = "ST-Ericsson HREF (v60+) and TVK1281618 UIB"; compatible = "st-ericsson,hrefv60+", "st-ericsson,u8500"; + + /* This stablilizes the serial port enumeration */ + aliases { + serial0 = &ux500_serial0; + serial1 = &ux500_serial1; + serial2 = &ux500_serial2; + }; }; diff --git a/arch/arm/boot/dts/ste-hrefv60plus.dtsi b/arch/arm/boot/dts/ste-hrefv60plus.dtsi index a4bc9e77d640..810cda743b6d 100644 --- a/arch/arm/boot/dts/ste-hrefv60plus.dtsi +++ b/arch/arm/boot/dts/ste-hrefv60plus.dtsi @@ -43,15 +43,26 @@ <&vaudio_hf_hrefv60_mode>, <&gbf_hrefv60_mode>, <&hdtv_hrefv60_mode>, - <&touch_hrefv60_mode>; + <&touch_hrefv60_mode>, + <&gpios_hrefv60_mode>; sdi0 { - /* SD card detect GPIO pin, extend default state */ sdi0_default_mode: sdi0_default { + /* SD card detect GPIO pin, extend default state */ default_hrefv60_cfg1 { pins = "GPIO95_E8"; ste,config = <&gpio_in_pu>; }; + /* VMMCI level-shifter enable */ + default_hrefv60_cfg2 { + pins = "GPIO169_D22"; + ste,config = <&gpio_out_lo>; + }; + /* VMMCI level-shifter voltage select */ + default_hrefv60_cfg3 { + pins = "GPIO5_AG6"; + ste,config = <&gpio_out_hi>; + }; }; }; ipgpio { @@ -213,6 +224,16 @@ }; }; }; + gpios { + /* Dangling GPIO pins */ + gpios_hrefv60_mode: gpios_hrefv60 { + default_cfg1 { + /* Normally UART1 RXD, now dangling */ + pins = "GPIO4_AH6"; + ste,config = <&in_pu>; + }; + }; + }; }; }; }; diff --git a/arch/arm/boot/dts/ste-snowball.dts b/arch/arm/boot/dts/ste-snowball.dts index 9edadc37719f..32a5ccb14e7e 100644 --- a/arch/arm/boot/dts/ste-snowball.dts +++ b/arch/arm/boot/dts/ste-snowball.dts @@ -18,6 +18,13 @@ model = "Calao Systems Snowball platform with device tree"; compatible = "calaosystems,snowball-a9500", "st-ericsson,u9500"; + /* This stablilizes the serial port enumeration */ + aliases { + serial0 = &ux500_serial0; + serial1 = &ux500_serial1; + serial2 = &ux500_serial2; + }; + memory { reg = <0x00000000 0x20000000>; }; @@ -223,11 +230,11 @@ status = "okay"; }; + /* This UART is unused and thus left disabled */ uart@80121000 { pinctrl-names = "default", "sleep"; pinctrl-0 = <&uart1_default_mode>; pinctrl-1 = <&uart1_sleep_mode>; - status = "okay"; }; uart@80007000 { @@ -452,7 +459,21 @@ pins = "GPIO21_AB3"; /* DAT31DIR */ ste,config = <&out_hi>; }; - + /* SD card detect GPIO pin, extend default state */ + snowball_cfg2 { + pins = "GPIO218_AH11"; + ste,config = <&gpio_in_pu>; + }; + /* VMMCI level-shifter enable */ + snowball_cfg3 { + pins = "GPIO217_AH12"; + ste,config = <&gpio_out_lo>; + }; + /* VMMCI level-shifter voltage select */ + snowball_cfg4 { + pins = "GPIO228_AJ6"; + ste,config = <&gpio_out_hi>; + }; }; }; ssp0 { diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild index 83c50193626c..30b3bc1666d2 100644 --- a/arch/arm/include/asm/Kbuild +++ b/arch/arm/include/asm/Kbuild @@ -13,6 +13,7 @@ generic-y += kdebug.h generic-y += local.h generic-y += local64.h generic-y += mcs_spinlock.h +generic-y += mm-arch-hooks.h generic-y += msgbuf.h generic-y += param.h generic-y += parport.h diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h index 6f225acc07c5..b7f6fb462ea0 100644 --- a/arch/arm/include/asm/memory.h +++ b/arch/arm/include/asm/memory.h @@ -286,7 +286,7 @@ extern phys_addr_t (*arch_virt_to_idmap)(unsigned long x); */ static inline phys_addr_t __virt_to_idmap(unsigned long x) { - if (arch_virt_to_idmap) + if (IS_ENABLED(CONFIG_MMU) && arch_virt_to_idmap) return arch_virt_to_idmap(x); else return __virt_to_phys(x); diff --git a/arch/arm/include/asm/mm-arch-hooks.h b/arch/arm/include/asm/mm-arch-hooks.h deleted file mode 100644 index 7056660c7cc4..000000000000 --- a/arch/arm/include/asm/mm-arch-hooks.h +++ /dev/null @@ -1,15 +0,0 @@ -/* - * Architecture specific mm hooks - * - * Copyright (C) 2015, IBM Corporation - * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#ifndef _ASM_ARM_MM_ARCH_HOOKS_H -#define _ASM_ARM_MM_ARCH_HOOKS_H - -#endif /* _ASM_ARM_MM_ARCH_HOOKS_H */ diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 357f57ea83f4..54272e0be713 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -818,12 +818,13 @@ static int of_pmu_irq_cfg(struct arm_pmu *pmu) if (arch_find_n_match_cpu_physical_id(dn, cpu, NULL)) break; - of_node_put(dn); if (cpu >= nr_cpu_ids) { pr_warn("Failed to find logical CPU for %s\n", dn->name); + of_node_put(dn); break; } + of_node_put(dn); irqs[i] = cpu; cpumask_set_cpu(cpu, &pmu->supported_cpus); diff --git a/arch/arm/kernel/reboot.c b/arch/arm/kernel/reboot.c index 1a4d232796be..38269358fd25 100644 --- a/arch/arm/kernel/reboot.c +++ b/arch/arm/kernel/reboot.c @@ -50,7 +50,7 @@ static void __soft_restart(void *addr) flush_cache_all(); /* Switch to the identity mapping. */ - phys_reset = (phys_reset_t)(unsigned long)virt_to_phys(cpu_reset); + phys_reset = (phys_reset_t)(unsigned long)virt_to_idmap(cpu_reset); phys_reset((unsigned long)addr); /* Should never get here. */ diff --git a/arch/arm/mach-imx/gpc.c b/arch/arm/mach-imx/gpc.c index 80bad29d609a..8c4467fad837 100644 --- a/arch/arm/mach-imx/gpc.c +++ b/arch/arm/mach-imx/gpc.c @@ -291,8 +291,6 @@ void __init imx_gpc_check_dt(void) } } -#ifdef CONFIG_PM_GENERIC_DOMAINS - static void _imx6q_pm_pu_power_off(struct generic_pm_domain *genpd) { int iso, iso2sw; @@ -399,7 +397,6 @@ static struct genpd_onecell_data imx_gpc_onecell_data = { static int imx_gpc_genpd_init(struct device *dev, struct regulator *pu_reg) { struct clk *clk; - bool is_off; int i; imx6q_pu_domain.reg = pu_reg; @@ -416,18 +413,13 @@ static int imx_gpc_genpd_init(struct device *dev, struct regulator *pu_reg) } imx6q_pu_domain.num_clks = i; - is_off = IS_ENABLED(CONFIG_PM); - if (is_off) { - _imx6q_pm_pu_power_off(&imx6q_pu_domain.base); - } else { - /* - * Enable power if compiled without CONFIG_PM in case the - * bootloader disabled it. - */ - imx6q_pm_pu_power_on(&imx6q_pu_domain.base); - } + /* Enable power always in case bootloader disabled it. */ + imx6q_pm_pu_power_on(&imx6q_pu_domain.base); + + if (!IS_ENABLED(CONFIG_PM_GENERIC_DOMAINS)) + return 0; - pm_genpd_init(&imx6q_pu_domain.base, NULL, is_off); + pm_genpd_init(&imx6q_pu_domain.base, NULL, false); return of_genpd_add_provider_onecell(dev->of_node, &imx_gpc_onecell_data); @@ -437,13 +429,6 @@ clk_err: return -EINVAL; } -#else -static inline int imx_gpc_genpd_init(struct device *dev, struct regulator *reg) -{ - return 0; -} -#endif /* CONFIG_PM_GENERIC_DOMAINS */ - static int imx_gpc_probe(struct platform_device *pdev) { struct regulator *pu_reg; diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig index ecc04ff13e95..4a023e8d1bdb 100644 --- a/arch/arm/mach-omap2/Kconfig +++ b/arch/arm/mach-omap2/Kconfig @@ -60,6 +60,7 @@ config SOC_AM43XX select ARM_GIC select MACH_OMAP_GENERIC select MIGHT_HAVE_CACHE_L2X0 + select HAVE_ARM_SCU config SOC_DRA7XX bool "TI DRA7XX" diff --git a/arch/arm/mach-pxa/capc7117.c b/arch/arm/mach-pxa/capc7117.c index c092730749b9..bf366b39fa61 100644 --- a/arch/arm/mach-pxa/capc7117.c +++ b/arch/arm/mach-pxa/capc7117.c @@ -24,6 +24,7 @@ #include <linux/ata_platform.h> #include <linux/serial_8250.h> #include <linux/gpio.h> +#include <linux/regulator/machine.h> #include <asm/mach-types.h> #include <asm/mach/arch.h> @@ -144,6 +145,8 @@ static void __init capc7117_init(void) capc7117_uarts_init(); capc7117_ide_init(); + + regulator_has_full_constraints(); } MACHINE_START(CAPC7117, diff --git a/arch/arm/mach-pxa/cm-x2xx.c b/arch/arm/mach-pxa/cm-x2xx.c index bb99f59a36d8..a17a91eb8e9a 100644 --- a/arch/arm/mach-pxa/cm-x2xx.c +++ b/arch/arm/mach-pxa/cm-x2xx.c @@ -13,6 +13,7 @@ #include <linux/syscore_ops.h> #include <linux/irq.h> #include <linux/gpio.h> +#include <linux/regulator/machine.h> #include <linux/dm9000.h> #include <linux/leds.h> @@ -466,6 +467,8 @@ static void __init cmx2xx_init(void) cmx2xx_init_ac97(); cmx2xx_init_touchscreen(); cmx2xx_init_leds(); + + regulator_has_full_constraints(); } static void __init cmx2xx_init_irq(void) diff --git a/arch/arm/mach-pxa/cm-x300.c b/arch/arm/mach-pxa/cm-x300.c index 4d3588d26c2a..5851f4c254c1 100644 --- a/arch/arm/mach-pxa/cm-x300.c +++ b/arch/arm/mach-pxa/cm-x300.c @@ -835,6 +835,8 @@ static void __init cm_x300_init(void) cm_x300_init_ac97(); cm_x300_init_wi2wi(); cm_x300_init_bl(); + + regulator_has_full_constraints(); } static void __init cm_x300_fixup(struct tag *tags, char **cmdline) diff --git a/arch/arm/mach-pxa/colibri-pxa270.c b/arch/arm/mach-pxa/colibri-pxa270.c index 5f9d9303b346..3503826333c7 100644 --- a/arch/arm/mach-pxa/colibri-pxa270.c +++ b/arch/arm/mach-pxa/colibri-pxa270.c @@ -18,6 +18,7 @@ #include <linux/mtd/partitions.h> #include <linux/mtd/physmap.h> #include <linux/platform_device.h> +#include <linux/regulator/machine.h> #include <linux/ucb1400.h> #include <asm/mach/arch.h> @@ -294,6 +295,8 @@ static void __init colibri_pxa270_init(void) printk(KERN_ERR "Illegal colibri_pxa270_baseboard type %d\n", colibri_pxa270_baseboard); } + + regulator_has_full_constraints(); } /* The "Income s.r.o. SH-Dmaster PXA270 SBC" board can be booted either diff --git a/arch/arm/mach-pxa/em-x270.c b/arch/arm/mach-pxa/em-x270.c index 51531ecffca8..9d7072b04045 100644 --- a/arch/arm/mach-pxa/em-x270.c +++ b/arch/arm/mach-pxa/em-x270.c @@ -1306,6 +1306,8 @@ static void __init em_x270_init(void) em_x270_init_i2c(); em_x270_init_camera(); em_x270_userspace_consumers_init(); + + regulator_has_full_constraints(); } MACHINE_START(EM_X270, "Compulab EM-X270") diff --git a/arch/arm/mach-pxa/icontrol.c b/arch/arm/mach-pxa/icontrol.c index c98511c5abd1..9b0eb0252af6 100644 --- a/arch/arm/mach-pxa/icontrol.c +++ b/arch/arm/mach-pxa/icontrol.c @@ -26,6 +26,7 @@ #include <linux/spi/spi.h> #include <linux/spi/pxa2xx_spi.h> #include <linux/can/platform/mcp251x.h> +#include <linux/regulator/machine.h> #include "generic.h" @@ -185,6 +186,8 @@ static void __init icontrol_init(void) mxm_8x10_mmc_init(); icontrol_can_init(); + + regulator_has_full_constraints(); } MACHINE_START(ICONTROL, "iControl/SafeTcam boards using Embedian MXM-8x10 CoM") diff --git a/arch/arm/mach-pxa/trizeps4.c b/arch/arm/mach-pxa/trizeps4.c index 872dcb20e757..066e3a250ee0 100644 --- a/arch/arm/mach-pxa/trizeps4.c +++ b/arch/arm/mach-pxa/trizeps4.c @@ -26,6 +26,7 @@ #include <linux/dm9000.h> #include <linux/mtd/physmap.h> #include <linux/mtd/partitions.h> +#include <linux/regulator/machine.h> #include <linux/i2c/pxa-i2c.h> #include <asm/types.h> @@ -534,6 +535,8 @@ static void __init trizeps4_init(void) BCR_writew(trizeps_conxs_bcr); board_backlight_power(1); + + regulator_has_full_constraints(); } static void __init trizeps4_map_io(void) diff --git a/arch/arm/mach-pxa/vpac270.c b/arch/arm/mach-pxa/vpac270.c index aa89488f961e..54122a983ae3 100644 --- a/arch/arm/mach-pxa/vpac270.c +++ b/arch/arm/mach-pxa/vpac270.c @@ -24,6 +24,7 @@ #include <linux/dm9000.h> #include <linux/ucb1400.h> #include <linux/ata_platform.h> +#include <linux/regulator/machine.h> #include <linux/regulator/max1586.h> #include <linux/i2c/pxa-i2c.h> @@ -711,6 +712,8 @@ static void __init vpac270_init(void) vpac270_ts_init(); vpac270_rtc_init(); vpac270_ide_init(); + + regulator_has_full_constraints(); } MACHINE_START(VPAC270, "Voipac PXA270") diff --git a/arch/arm/mach-pxa/zeus.c b/arch/arm/mach-pxa/zeus.c index ac2ae5c71ab4..6158566fa0f7 100644 --- a/arch/arm/mach-pxa/zeus.c +++ b/arch/arm/mach-pxa/zeus.c @@ -868,6 +868,8 @@ static void __init zeus_init(void) i2c_register_board_info(0, ARRAY_AND_SIZE(zeus_i2c_devices)); pxa2xx_set_spi_info(3, &pxa2xx_spi_ssp3_master_info); spi_register_board_info(zeus_spi_board_info, ARRAY_SIZE(zeus_spi_board_info)); + + regulator_has_full_constraints(); } static struct map_desc zeus_io_desc[] __initdata = { diff --git a/arch/arm/mach-spear/generic.h b/arch/arm/mach-spear/generic.h index a99d90a4d09c..06640914d9a0 100644 --- a/arch/arm/mach-spear/generic.h +++ b/arch/arm/mach-spear/generic.h @@ -3,7 +3,7 @@ * * Copyright (C) 2009-2012 ST Microelectronics * Rajeev Kumar <rajeev-dlh.kumar@st.com> - * Viresh Kumar <viresh.linux@gmail.com> + * Viresh Kumar <vireshk@kernel.org> * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/arch/arm/mach-spear/include/mach/irqs.h b/arch/arm/mach-spear/include/mach/irqs.h index 92da0a8c6bce..7058720c5278 100644 --- a/arch/arm/mach-spear/include/mach/irqs.h +++ b/arch/arm/mach-spear/include/mach/irqs.h @@ -3,7 +3,7 @@ * * Copyright (C) 2009-2012 ST Microelectronics * Rajeev Kumar <rajeev-dlh.kumar@st.com> - * Viresh Kumar <viresh.linux@gmail.com> + * Viresh Kumar <vireshk@kernel.org> * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/arch/arm/mach-spear/include/mach/misc_regs.h b/arch/arm/mach-spear/include/mach/misc_regs.h index 935639ce59ba..cfaf7c665b58 100644 --- a/arch/arm/mach-spear/include/mach/misc_regs.h +++ b/arch/arm/mach-spear/include/mach/misc_regs.h @@ -4,7 +4,7 @@ * Miscellaneous registers definitions for SPEAr3xx machine family * * Copyright (C) 2009 ST Microelectronics - * Viresh Kumar <viresh.linux@gmail.com> + * Viresh Kumar <vireshk@kernel.org> * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/arch/arm/mach-spear/include/mach/spear.h b/arch/arm/mach-spear/include/mach/spear.h index f2d6a0176575..5ed841ccf8a3 100644 --- a/arch/arm/mach-spear/include/mach/spear.h +++ b/arch/arm/mach-spear/include/mach/spear.h @@ -3,7 +3,7 @@ * * Copyright (C) 2009,2012 ST Microelectronics * Rajeev Kumar<rajeev-dlh.kumar@st.com> - * Viresh Kumar <viresh.linux@gmail.com> + * Viresh Kumar <vireshk@kernel.org> * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/arch/arm/mach-spear/include/mach/uncompress.h b/arch/arm/mach-spear/include/mach/uncompress.h index 51b2dc93e4da..8439b9c12edb 100644 --- a/arch/arm/mach-spear/include/mach/uncompress.h +++ b/arch/arm/mach-spear/include/mach/uncompress.h @@ -4,7 +4,7 @@ * Serial port stubs for kernel decompress status messages * * Copyright (C) 2009 ST Microelectronics - * Viresh Kumar <viresh.linux@gmail.com> + * Viresh Kumar <vireshk@kernel.org> * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/arch/arm/mach-spear/pl080.c b/arch/arm/mach-spear/pl080.c index cfa1199d0f4a..b4529f3e0ee9 100644 --- a/arch/arm/mach-spear/pl080.c +++ b/arch/arm/mach-spear/pl080.c @@ -4,7 +4,7 @@ * DMAC pl080 definitions for SPEAr platform * * Copyright (C) 2012 ST Microelectronics - * Viresh Kumar <viresh.linux@gmail.com> + * Viresh Kumar <vireshk@kernel.org> * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/arch/arm/mach-spear/pl080.h b/arch/arm/mach-spear/pl080.h index eb6590ded40d..608dec6725ae 100644 --- a/arch/arm/mach-spear/pl080.h +++ b/arch/arm/mach-spear/pl080.h @@ -4,7 +4,7 @@ * DMAC pl080 definitions for SPEAr platform * * Copyright (C) 2012 ST Microelectronics - * Viresh Kumar <viresh.linux@gmail.com> + * Viresh Kumar <vireshk@kernel.org> * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/arch/arm/mach-spear/restart.c b/arch/arm/mach-spear/restart.c index ce5e098c4888..b4342155a783 100644 --- a/arch/arm/mach-spear/restart.c +++ b/arch/arm/mach-spear/restart.c @@ -4,7 +4,7 @@ * SPEAr platform specific restart functions * * Copyright (C) 2009 ST Microelectronics - * Viresh Kumar <viresh.linux@gmail.com> + * Viresh Kumar <vireshk@kernel.org> * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/arch/arm/mach-spear/spear1310.c b/arch/arm/mach-spear/spear1310.c index d9ce4d8000f0..cd5d375d91f0 100644 --- a/arch/arm/mach-spear/spear1310.c +++ b/arch/arm/mach-spear/spear1310.c @@ -4,7 +4,7 @@ * SPEAr1310 machine source file * * Copyright (C) 2012 ST Microelectronics - * Viresh Kumar <viresh.linux@gmail.com> + * Viresh Kumar <vireshk@kernel.org> * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/arch/arm/mach-spear/spear1340.c b/arch/arm/mach-spear/spear1340.c index 3f3c0f124bd3..94594d5a446c 100644 --- a/arch/arm/mach-spear/spear1340.c +++ b/arch/arm/mach-spear/spear1340.c @@ -4,7 +4,7 @@ * SPEAr1340 machine source file * * Copyright (C) 2012 ST Microelectronics - * Viresh Kumar <viresh.linux@gmail.com> + * Viresh Kumar <vireshk@kernel.org> * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/arch/arm/mach-spear/spear13xx.c b/arch/arm/mach-spear/spear13xx.c index 2e463a93468d..b7afce6795f4 100644 --- a/arch/arm/mach-spear/spear13xx.c +++ b/arch/arm/mach-spear/spear13xx.c @@ -4,7 +4,7 @@ * SPEAr13XX machines common source file * * Copyright (C) 2012 ST Microelectronics - * Viresh Kumar <viresh.linux@gmail.com> + * Viresh Kumar <vireshk@kernel.org> * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/arch/arm/mach-spear/spear300.c b/arch/arm/mach-spear/spear300.c index b52e48f342f4..5b32edda2276 100644 --- a/arch/arm/mach-spear/spear300.c +++ b/arch/arm/mach-spear/spear300.c @@ -4,7 +4,7 @@ * SPEAr300 machine source file * * Copyright (C) 2009-2012 ST Microelectronics - * Viresh Kumar <viresh.linux@gmail.com> + * Viresh Kumar <vireshk@kernel.org> * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/arch/arm/mach-spear/spear310.c b/arch/arm/mach-spear/spear310.c index ed2029db391f..86a44ac7ff67 100644 --- a/arch/arm/mach-spear/spear310.c +++ b/arch/arm/mach-spear/spear310.c @@ -4,7 +4,7 @@ * SPEAr310 machine source file * * Copyright (C) 2009-2012 ST Microelectronics - * Viresh Kumar <viresh.linux@gmail.com> + * Viresh Kumar <vireshk@kernel.org> * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/arch/arm/mach-spear/spear320.c b/arch/arm/mach-spear/spear320.c index bf634b32a930..d45d751926c5 100644 --- a/arch/arm/mach-spear/spear320.c +++ b/arch/arm/mach-spear/spear320.c @@ -4,7 +4,7 @@ * SPEAr320 machine source file * * Copyright (C) 2009-2012 ST Microelectronics - * Viresh Kumar <viresh.linux@gmail.com> + * Viresh Kumar <vireshk@kernel.org> * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/arch/arm/mach-spear/spear3xx.c b/arch/arm/mach-spear/spear3xx.c index bf3b1fd8cb23..23394ac76cf2 100644 --- a/arch/arm/mach-spear/spear3xx.c +++ b/arch/arm/mach-spear/spear3xx.c @@ -4,7 +4,7 @@ * SPEAr3XX machines common source file * * Copyright (C) 2009-2012 ST Microelectronics - * Viresh Kumar <viresh.linux@gmail.com> + * Viresh Kumar <vireshk@kernel.org> * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 1ced8a0f7a52..cba12f34ff77 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -1971,7 +1971,7 @@ static int extend_iommu_mapping(struct dma_iommu_mapping *mapping) { int next_bitmap; - if (mapping->nr_bitmaps > mapping->extensions) + if (mapping->nr_bitmaps >= mapping->extensions) return -EINVAL; next_bitmap = mapping->nr_bitmaps; diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index 0716bbe19872..de2b246fed38 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -274,7 +274,10 @@ __v7_ca15mp_setup: __v7_b15mp_setup: __v7_ca17mp_setup: mov r10, #0 -1: +1: adr r12, __v7_setup_stack @ the local stack + stmia r12, {r0-r5, lr} @ v7_invalidate_l1 touches r0-r6 + bl v7_invalidate_l1 + ldmia r12, {r0-r5, lr} #ifdef CONFIG_SMP ALT_SMP(mrc p15, 0, r0, c1, c0, 1) ALT_UP(mov r0, #(1 << 6)) @ fake it for UP @@ -283,7 +286,7 @@ __v7_ca17mp_setup: orreq r0, r0, r10 @ Enable CPU-specific SMP bits mcreq p15, 0, r0, c1, c0, 1 #endif - b __v7_setup + b __v7_setup_cont /* * Errata: @@ -413,10 +416,11 @@ __v7_pj4b_setup: __v7_setup: adr r12, __v7_setup_stack @ the local stack - stmia r12, {r0-r5, r7, r9, r11, lr} + stmia r12, {r0-r5, lr} @ v7_invalidate_l1 touches r0-r6 bl v7_invalidate_l1 - ldmia r12, {r0-r5, r7, r9, r11, lr} + ldmia r12, {r0-r5, lr} +__v7_setup_cont: and r0, r9, #0xff000000 @ ARM? teq r0, #0x41000000 bne __errata_finish @@ -480,7 +484,7 @@ ENDPROC(__v7_setup) .align 2 __v7_setup_stack: - .space 4 * 11 @ 11 registers + .space 4 * 7 @ 12 registers __INITDATA diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 4550d247e308..c011e2296cb1 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -74,32 +74,52 @@ struct jit_ctx { int bpf_jit_enable __read_mostly; -static u64 jit_get_skb_b(struct sk_buff *skb, unsigned offset) +static inline int call_neg_helper(struct sk_buff *skb, int offset, void *ret, + unsigned int size) +{ + void *ptr = bpf_internal_load_pointer_neg_helper(skb, offset, size); + + if (!ptr) + return -EFAULT; + memcpy(ret, ptr, size); + return 0; +} + +static u64 jit_get_skb_b(struct sk_buff *skb, int offset) { u8 ret; int err; - err = skb_copy_bits(skb, offset, &ret, 1); + if (offset < 0) + err = call_neg_helper(skb, offset, &ret, 1); + else + err = skb_copy_bits(skb, offset, &ret, 1); return (u64)err << 32 | ret; } -static u64 jit_get_skb_h(struct sk_buff *skb, unsigned offset) +static u64 jit_get_skb_h(struct sk_buff *skb, int offset) { u16 ret; int err; - err = skb_copy_bits(skb, offset, &ret, 2); + if (offset < 0) + err = call_neg_helper(skb, offset, &ret, 2); + else + err = skb_copy_bits(skb, offset, &ret, 2); return (u64)err << 32 | ntohs(ret); } -static u64 jit_get_skb_w(struct sk_buff *skb, unsigned offset) +static u64 jit_get_skb_w(struct sk_buff *skb, int offset) { u32 ret; int err; - err = skb_copy_bits(skb, offset, &ret, 4); + if (offset < 0) + err = call_neg_helper(skb, offset, &ret, 4); + else + err = skb_copy_bits(skb, offset, &ret, 4); return (u64)err << 32 | ntohl(ret); } @@ -536,9 +556,6 @@ static int build_body(struct jit_ctx *ctx) case BPF_LD | BPF_B | BPF_ABS: load_order = 0; load: - /* the interpreter will deal with the negative K */ - if ((int)k < 0) - return -ENOTSUPP; emit_mov_i(r_off, k, ctx); load_common: ctx->seen |= SEEN_DATA | SEEN_CALL; @@ -547,12 +564,24 @@ load_common: emit(ARM_SUB_I(r_scratch, r_skb_hl, 1 << load_order), ctx); emit(ARM_CMP_R(r_scratch, r_off), ctx); - condt = ARM_COND_HS; + condt = ARM_COND_GE; } else { emit(ARM_CMP_R(r_skb_hl, r_off), ctx); condt = ARM_COND_HI; } + /* + * test for negative offset, only if we are + * currently scheduled to take the fast + * path. this will update the flags so that + * the slowpath instruction are ignored if the + * offset is negative. + * + * for loard_order == 0 the HI condition will + * make loads at offset 0 take the slow path too. + */ + _emit(condt, ARM_CMP_I(r_off, 0), ctx); + _emit(condt, ARM_ADD_R(r_scratch, r_off, r_skb_data), ctx); @@ -860,9 +889,11 @@ b_epilogue: off = offsetof(struct sk_buff, vlan_tci); emit(ARM_LDRH_I(r_A, r_skb, off), ctx); if (code == (BPF_ANC | SKF_AD_VLAN_TAG)) - OP_IMM3(ARM_AND, r_A, r_A, VLAN_VID_MASK, ctx); - else - OP_IMM3(ARM_AND, r_A, r_A, VLAN_TAG_PRESENT, ctx); + OP_IMM3(ARM_AND, r_A, r_A, ~VLAN_TAG_PRESENT, ctx); + else { + OP_IMM3(ARM_LSR, r_A, r_A, 12, ctx); + OP_IMM3(ARM_AND, r_A, r_A, 0x1, ctx); + } break; case BPF_ANC | SKF_AD_QUEUE: ctx->seen |= SEEN_SKB; diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild index b112a39834d0..70fd9ffb58cf 100644 --- a/arch/arm64/include/asm/Kbuild +++ b/arch/arm64/include/asm/Kbuild @@ -25,6 +25,7 @@ generic-y += kvm_para.h generic-y += local.h generic-y += local64.h generic-y += mcs_spinlock.h +generic-y += mm-arch-hooks.h generic-y += mman.h generic-y += msgbuf.h generic-y += msi.h diff --git a/arch/arm64/include/asm/mm-arch-hooks.h b/arch/arm64/include/asm/mm-arch-hooks.h deleted file mode 100644 index 562b655f5ba9..000000000000 --- a/arch/arm64/include/asm/mm-arch-hooks.h +++ /dev/null @@ -1,15 +0,0 @@ -/* - * Architecture specific mm hooks - * - * Copyright (C) 2015, IBM Corporation - * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#ifndef _ASM_ARM64_MM_ARCH_HOOKS_H -#define _ASM_ARM64_MM_ARCH_HOOKS_H - -#endif /* _ASM_ARM64_MM_ARCH_HOOKS_H */ diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index f860bfda454a..e16351819fed 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -585,7 +585,8 @@ ENDPROC(el0_irq) * */ ENTRY(cpu_switch_to) - add x8, x0, #THREAD_CPU_CONTEXT + mov x10, #THREAD_CPU_CONTEXT + add x8, x0, x10 mov x9, sp stp x19, x20, [x8], #16 // store callee-saved registers stp x21, x22, [x8], #16 @@ -594,7 +595,7 @@ ENTRY(cpu_switch_to) stp x27, x28, [x8], #16 stp x29, x9, [x8], #16 str lr, [x8] - add x8, x1, #THREAD_CPU_CONTEXT + add x8, x1, x10 ldp x19, x20, [x8], #16 // restore callee-saved registers ldp x21, x22, [x8], #16 ldp x23, x24, [x8], #16 diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c index 240b75c0e94f..463fa2e7e34c 100644 --- a/arch/arm64/kernel/irq.c +++ b/arch/arm64/kernel/irq.c @@ -61,7 +61,7 @@ void __init init_IRQ(void) static bool migrate_one_irq(struct irq_desc *desc) { struct irq_data *d = irq_desc_get_irq_data(desc); - const struct cpumask *affinity = d->affinity; + const struct cpumask *affinity = irq_data_get_affinity_mask(d); struct irq_chip *c; bool ret = false; @@ -81,7 +81,7 @@ static bool migrate_one_irq(struct irq_desc *desc) if (!c->irq_set_affinity) pr_debug("IRQ%u: unable to set affinity\n", d->irq); else if (c->irq_set_affinity(d, affinity, false) == IRQ_SET_MASK_OK && ret) - cpumask_copy(d->affinity, affinity); + cpumask_copy(irq_data_get_affinity_mask(d), affinity); return ret; } diff --git a/arch/avr32/include/asm/Kbuild b/arch/avr32/include/asm/Kbuild index 1d66afdfac07..f61f2dd67464 100644 --- a/arch/avr32/include/asm/Kbuild +++ b/arch/avr32/include/asm/Kbuild @@ -12,6 +12,7 @@ generic-y += irq_work.h generic-y += local.h generic-y += local64.h generic-y += mcs_spinlock.h +generic-y += mm-arch-hooks.h generic-y += param.h generic-y += percpu.h generic-y += preempt.h diff --git a/arch/avr32/include/asm/mm-arch-hooks.h b/arch/avr32/include/asm/mm-arch-hooks.h deleted file mode 100644 index 145452ffbdad..000000000000 --- a/arch/avr32/include/asm/mm-arch-hooks.h +++ /dev/null @@ -1,15 +0,0 @@ -/* - * Architecture specific mm hooks - * - * Copyright (C) 2015, IBM Corporation - * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#ifndef _ASM_AVR32_MM_ARCH_HOOKS_H -#define _ASM_AVR32_MM_ARCH_HOOKS_H - -#endif /* _ASM_AVR32_MM_ARCH_HOOKS_H */ diff --git a/arch/avr32/kernel/time.c b/arch/avr32/kernel/time.c index d0f771be9e96..a124c55733db 100644 --- a/arch/avr32/kernel/time.c +++ b/arch/avr32/kernel/time.c @@ -18,6 +18,7 @@ #include <mach/pm.h> +static bool disable_cpu_idle_poll; static cycle_t read_cycle_count(struct clocksource *cs) { @@ -80,45 +81,45 @@ static int comparator_next_event(unsigned long delta, return 0; } -static void comparator_mode(enum clock_event_mode mode, - struct clock_event_device *evdev) +static int comparator_shutdown(struct clock_event_device *evdev) { - switch (mode) { - case CLOCK_EVT_MODE_ONESHOT: - pr_debug("%s: start\n", evdev->name); - /* FALLTHROUGH */ - case CLOCK_EVT_MODE_RESUME: + pr_debug("%s: %s\n", __func__, evdev->name); + sysreg_write(COMPARE, 0); + + if (disable_cpu_idle_poll) { + disable_cpu_idle_poll = false; /* - * If we're using the COUNT and COMPARE registers we - * need to force idle poll. + * Only disable idle poll if we have forced that + * in a previous call. */ - cpu_idle_poll_ctrl(true); - break; - case CLOCK_EVT_MODE_UNUSED: - case CLOCK_EVT_MODE_SHUTDOWN: - sysreg_write(COMPARE, 0); - pr_debug("%s: stop\n", evdev->name); - if (evdev->mode == CLOCK_EVT_MODE_ONESHOT || - evdev->mode == CLOCK_EVT_MODE_RESUME) { - /* - * Only disable idle poll if we have forced that - * in a previous call. - */ - cpu_idle_poll_ctrl(false); - } - break; - default: - BUG(); + cpu_idle_poll_ctrl(false); } + return 0; +} + +static int comparator_set_oneshot(struct clock_event_device *evdev) +{ + pr_debug("%s: %s\n", __func__, evdev->name); + + disable_cpu_idle_poll = true; + /* + * If we're using the COUNT and COMPARE registers we + * need to force idle poll. + */ + cpu_idle_poll_ctrl(true); + + return 0; } static struct clock_event_device comparator = { - .name = "avr32_comparator", - .features = CLOCK_EVT_FEAT_ONESHOT, - .shift = 16, - .rating = 50, - .set_next_event = comparator_next_event, - .set_mode = comparator_mode, + .name = "avr32_comparator", + .features = CLOCK_EVT_FEAT_ONESHOT, + .shift = 16, + .rating = 50, + .set_next_event = comparator_next_event, + .set_state_shutdown = comparator_shutdown, + .set_state_oneshot = comparator_set_oneshot, + .tick_resume = comparator_set_oneshot, }; void read_persistent_clock(struct timespec *ts) diff --git a/arch/blackfin/include/asm/Kbuild b/arch/blackfin/include/asm/Kbuild index 07051a63415d..61cd1e786a14 100644 --- a/arch/blackfin/include/asm/Kbuild +++ b/arch/blackfin/include/asm/Kbuild @@ -21,6 +21,7 @@ generic-y += kvm_para.h generic-y += local.h generic-y += local64.h generic-y += mcs_spinlock.h +generic-y += mm-arch-hooks.h generic-y += mman.h generic-y += msgbuf.h generic-y += mutex.h diff --git a/arch/blackfin/include/asm/mm-arch-hooks.h b/arch/blackfin/include/asm/mm-arch-hooks.h deleted file mode 100644 index 1c5211ec338f..000000000000 --- a/arch/blackfin/include/asm/mm-arch-hooks.h +++ /dev/null @@ -1,15 +0,0 @@ -/* - * Architecture specific mm hooks - * - * Copyright (C) 2015, IBM Corporation - * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#ifndef _ASM_BLACKFIN_MM_ARCH_HOOKS_H -#define _ASM_BLACKFIN_MM_ARCH_HOOKS_H - -#endif /* _ASM_BLACKFIN_MM_ARCH_HOOKS_H */ diff --git a/arch/c6x/include/asm/Kbuild b/arch/c6x/include/asm/Kbuild index 7aeb32272975..f17c4dc6050c 100644 --- a/arch/c6x/include/asm/Kbuild +++ b/arch/c6x/include/asm/Kbuild @@ -26,6 +26,7 @@ generic-y += kdebug.h generic-y += kmap_types.h generic-y += local.h generic-y += mcs_spinlock.h +generic-y += mm-arch-hooks.h generic-y += mman.h generic-y += mmu.h generic-y += mmu_context.h diff --git a/arch/c6x/include/asm/mm-arch-hooks.h b/arch/c6x/include/asm/mm-arch-hooks.h deleted file mode 100644 index bb3c4a6ce8e9..000000000000 --- a/arch/c6x/include/asm/mm-arch-hooks.h +++ /dev/null @@ -1,15 +0,0 @@ -/* - * Architecture specific mm hooks - * - * Copyright (C) 2015, IBM Corporation - * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#ifndef _ASM_C6X_MM_ARCH_HOOKS_H -#define _ASM_C6X_MM_ARCH_HOOKS_H - -#endif /* _ASM_C6X_MM_ARCH_HOOKS_H */ diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild index d294f6aaff1d..ad2244f35bca 100644 --- a/arch/cris/include/asm/Kbuild +++ b/arch/cris/include/asm/Kbuild @@ -18,6 +18,7 @@ generic-y += linkage.h generic-y += local.h generic-y += local64.h generic-y += mcs_spinlock.h +generic-y += mm-arch-hooks.h generic-y += module.h generic-y += percpu.h generic-y += preempt.h diff --git a/arch/cris/include/asm/mm-arch-hooks.h b/arch/cris/include/asm/mm-arch-hooks.h deleted file mode 100644 index 314f774db2b0..000000000000 --- a/arch/cris/include/asm/mm-arch-hooks.h +++ /dev/null @@ -1,15 +0,0 @@ -/* - * Architecture specific mm hooks - * - * Copyright (C) 2015, IBM Corporation - * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#ifndef _ASM_CRIS_MM_ARCH_HOOKS_H -#define _ASM_CRIS_MM_ARCH_HOOKS_H - -#endif /* _ASM_CRIS_MM_ARCH_HOOKS_H */ diff --git a/arch/frv/include/asm/Kbuild b/arch/frv/include/asm/Kbuild index 30edce31e5c2..8e47b832cc76 100644 --- a/arch/frv/include/asm/Kbuild +++ b/arch/frv/include/asm/Kbuild @@ -4,5 +4,6 @@ generic-y += cputime.h generic-y += exec.h generic-y += irq_work.h generic-y += mcs_spinlock.h +generic-y += mm-arch-hooks.h generic-y += preempt.h generic-y += trace_clock.h diff --git a/arch/frv/include/asm/mm-arch-hooks.h b/arch/frv/include/asm/mm-arch-hooks.h deleted file mode 100644 index 51d13a870404..000000000000 --- a/arch/frv/include/asm/mm-arch-hooks.h +++ /dev/null @@ -1,15 +0,0 @@ -/* - * Architecture specific mm hooks - * - * Copyright (C) 2015, IBM Corporation - * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#ifndef _ASM_FRV_MM_ARCH_HOOKS_H -#define _ASM_FRV_MM_ARCH_HOOKS_H - -#endif /* _ASM_FRV_MM_ARCH_HOOKS_H */ diff --git a/arch/h8300/include/asm/Kbuild b/arch/h8300/include/asm/Kbuild index 00379d64f707..70e6ae1e7006 100644 --- a/arch/h8300/include/asm/Kbuild +++ b/arch/h8300/include/asm/Kbuild @@ -33,6 +33,7 @@ generic-y += linkage.h generic-y += local.h generic-y += local64.h generic-y += mcs_spinlock.h +generic-y += mm-arch-hooks.h generic-y += mman.h generic-y += mmu.h generic-y += mmu_context.h diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild index 5ade4a163558..daee37bd0999 100644 --- a/arch/hexagon/include/asm/Kbuild +++ b/arch/hexagon/include/asm/Kbuild @@ -28,6 +28,7 @@ generic-y += kmap_types.h generic-y += local.h generic-y += local64.h generic-y += mcs_spinlock.h +generic-y += mm-arch-hooks.h generic-y += mman.h generic-y += msgbuf.h generic-y += pci.h diff --git a/arch/hexagon/include/asm/mm-arch-hooks.h b/arch/hexagon/include/asm/mm-arch-hooks.h deleted file mode 100644 index 05e8b939e416..000000000000 --- a/arch/hexagon/include/asm/mm-arch-hooks.h +++ /dev/null @@ -1,15 +0,0 @@ -/* - * Architecture specific mm hooks - * - * Copyright (C) 2015, IBM Corporation - * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#ifndef _ASM_HEXAGON_MM_ARCH_HOOKS_H -#define _ASM_HEXAGON_MM_ARCH_HOOKS_H - -#endif /* _ASM_HEXAGON_MM_ARCH_HOOKS_H */ diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild index ccff13d33fa2..9de3ba12f6b9 100644 --- a/arch/ia64/include/asm/Kbuild +++ b/arch/ia64/include/asm/Kbuild @@ -4,6 +4,7 @@ generic-y += exec.h generic-y += irq_work.h generic-y += kvm_para.h generic-y += mcs_spinlock.h +generic-y += mm-arch-hooks.h generic-y += preempt.h generic-y += trace_clock.h generic-y += vtime.h diff --git a/arch/ia64/include/asm/mm-arch-hooks.h b/arch/ia64/include/asm/mm-arch-hooks.h deleted file mode 100644 index ab4b5c698322..000000000000 --- a/arch/ia64/include/asm/mm-arch-hooks.h +++ /dev/null @@ -1,15 +0,0 @@ -/* - * Architecture specific mm hooks - * - * Copyright (C) 2015, IBM Corporation - * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#ifndef _ASM_IA64_MM_ARCH_HOOKS_H -#define _ASM_IA64_MM_ARCH_HOOKS_H - -#endif /* _ASM_IA64_MM_ARCH_HOOKS_H */ diff --git a/arch/m32r/include/asm/Kbuild b/arch/m32r/include/asm/Kbuild index ba1cdc018731..e0eb704ca1fa 100644 --- a/arch/m32r/include/asm/Kbuild +++ b/arch/m32r/include/asm/Kbuild @@ -4,6 +4,7 @@ generic-y += cputime.h generic-y += exec.h generic-y += irq_work.h generic-y += mcs_spinlock.h +generic-y += mm-arch-hooks.h generic-y += module.h generic-y += preempt.h generic-y += sections.h diff --git a/arch/m32r/include/asm/mm-arch-hooks.h b/arch/m32r/include/asm/mm-arch-hooks.h deleted file mode 100644 index 6d60b4750f41..000000000000 --- a/arch/m32r/include/asm/mm-arch-hooks.h +++ /dev/null @@ -1,15 +0,0 @@ -/* - * Architecture specific mm hooks - * - * Copyright (C) 2015, IBM Corporation - * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#ifndef _ASM_M32R_MM_ARCH_HOOKS_H -#define _ASM_M32R_MM_ARCH_HOOKS_H - -#endif /* _ASM_M32R_MM_ARCH_HOOKS_H */ diff --git a/arch/m68k/Kconfig.cpu b/arch/m68k/Kconfig.cpu index 33013dfcd3e1..c496d48a8c8d 100644 --- a/arch/m68k/Kconfig.cpu +++ b/arch/m68k/Kconfig.cpu @@ -125,6 +125,13 @@ endif # M68KCLASSIC if COLDFIRE +choice + prompt "ColdFire SoC type" + default M520x + help + Select the type of ColdFire System-on-Chip (SoC) that you want + to build for. + config M5206 bool "MCF5206" depends on !MMU @@ -174,9 +181,6 @@ config M525x help Freescale (Motorola) Coldfire 5251/5253 processor support. -config M527x - bool - config M5271 bool "MCF5271" depends on !MMU @@ -223,9 +227,6 @@ config M5307 help Motorola ColdFire 5307 processor support. -config M53xx - bool - config M532x bool "MCF532x" depends on !MMU @@ -251,9 +252,6 @@ config M5407 help Motorola ColdFire 5407 processor support. -config M54xx - bool - config M547x bool "MCF547x" select M54xx @@ -280,6 +278,17 @@ config M5441x help Freescale Coldfire 54410/54415/54416/54417/54418 processor support. +endchoice + +config M527x + bool + +config M53xx + bool + +config M54xx + bool + endif # COLDFIRE @@ -416,22 +425,18 @@ config HAVE_MBAR config HAVE_IPSBAR bool -config CLOCK_SET - bool "Enable setting the CPU clock frequency" - depends on COLDFIRE - default n - help - On some CPU's you do not need to know what the core CPU clock - frequency is. On these you can disable clock setting. On some - traditional 68K parts, and on all ColdFire parts you need to set - the appropriate CPU clock frequency. On these devices many of the - onboard peripherals derive their timing from the master CPU clock - frequency. - config CLOCK_FREQ int "Set the core clock frequency" + default "25000000" if M5206 + default "54000000" if M5206e + default "166666666" if M520x + default "140000000" if M5249 + default "150000000" if M527x || M523x + default "90000000" if M5307 + default "50000000" if M5407 + default "266000000" if M54xx default "66666666" - depends on CLOCK_SET + depends on COLDFIRE help Define the CPU clock frequency in use. This is the core clock frequency, it may or may not be the same as the external clock diff --git a/arch/m68k/configs/m5208evb_defconfig b/arch/m68k/configs/m5208evb_defconfig index e7292f460af4..4c7b7938d53a 100644 --- a/arch/m68k/configs/m5208evb_defconfig +++ b/arch/m68k/configs/m5208evb_defconfig @@ -1,10 +1,6 @@ -# CONFIG_MMU is not set -CONFIG_EXPERIMENTAL=y CONFIG_LOG_BUF_SHIFT=14 -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set -# CONFIG_HOTPLUG is not set # CONFIG_FUTEX is not set # CONFIG_EPOLL is not set # CONFIG_SIGNALFD is not set @@ -16,17 +12,12 @@ CONFIG_EXPERT=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_IOSCHED_DEADLINE is not set # CONFIG_IOSCHED_CFQ is not set -CONFIG_M520x=y -CONFIG_CLOCK_SET=y -CONFIG_CLOCK_FREQ=166666666 -CONFIG_CLOCK_DIV=2 -CONFIG_M5208EVB=y +# CONFIG_MMU is not set # CONFIG_4KSTACKS is not set CONFIG_RAMBASE=0x40000000 CONFIG_RAMSIZE=0x2000000 CONFIG_VECTORBASE=0x40000000 CONFIG_KERNELBASE=0x40020000 -CONFIG_RAM16BIT=y CONFIG_BINFMT_FLAT=y CONFIG_NET=y CONFIG_PACKET=y @@ -40,24 +31,19 @@ CONFIG_INET=y # CONFIG_IPV6 is not set # CONFIG_FW_LOADER is not set CONFIG_MTD=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_RAM=y CONFIG_MTD_UCLINUX=y CONFIG_BLK_DEV_RAM=y -# CONFIG_MISC_DEVICES is not set CONFIG_NETDEVICES=y -CONFIG_NET_ETHERNET=y CONFIG_FEC=y -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set # CONFIG_INPUT is not set # CONFIG_SERIO is not set # CONFIG_VT is not set +# CONFIG_UNIX98_PTYS is not set CONFIG_SERIAL_MCF=y CONFIG_SERIAL_MCF_BAUDRATE=115200 CONFIG_SERIAL_MCF_CONSOLE=y -# CONFIG_UNIX98_PTYS is not set # CONFIG_HW_RANDOM is not set # CONFIG_HWMON is not set # CONFIG_USB_SUPPORT is not set @@ -68,8 +54,6 @@ CONFIG_EXT2_FS=y CONFIG_ROMFS_FS=y CONFIG_ROMFS_BACKED_BY_MTD=y # CONFIG_NETWORK_FILESYSTEMS is not set -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y -CONFIG_FULLDEBUG=y CONFIG_BOOTPARAM=y CONFIG_BOOTPARAM_STRING="root=/dev/mtdblock0" +CONFIG_FULLDEBUG=y diff --git a/arch/m68k/configs/m5249evb_defconfig b/arch/m68k/configs/m5249evb_defconfig index 0cd4b39f325b..a782f368650f 100644 --- a/arch/m68k/configs/m5249evb_defconfig +++ b/arch/m68k/configs/m5249evb_defconfig @@ -1,10 +1,6 @@ -# CONFIG_MMU is not set -CONFIG_EXPERIMENTAL=y CONFIG_LOG_BUF_SHIFT=14 -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set -# CONFIG_HOTPLUG is not set # CONFIG_FUTEX is not set # CONFIG_EPOLL is not set # CONFIG_SIGNALFD is not set @@ -16,10 +12,8 @@ CONFIG_EXPERT=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_IOSCHED_DEADLINE is not set # CONFIG_IOSCHED_CFQ is not set +# CONFIG_MMU is not set CONFIG_M5249=y -CONFIG_CLOCK_SET=y -CONFIG_CLOCK_FREQ=140000000 -CONFIG_CLOCK_DIV=2 CONFIG_M5249C3=y CONFIG_RAMBASE=0x00000000 CONFIG_RAMSIZE=0x00800000 @@ -38,23 +32,18 @@ CONFIG_INET=y # CONFIG_IPV6 is not set # CONFIG_FW_LOADER is not set CONFIG_MTD=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_RAM=y CONFIG_MTD_UCLINUX=y CONFIG_BLK_DEV_RAM=y -# CONFIG_MISC_DEVICES is not set CONFIG_NETDEVICES=y -CONFIG_NET_ETHERNET=y -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set CONFIG_PPP=y # CONFIG_INPUT is not set # CONFIG_SERIO is not set # CONFIG_VT is not set +# CONFIG_UNIX98_PTYS is not set CONFIG_SERIAL_MCF=y CONFIG_SERIAL_MCF_CONSOLE=y -# CONFIG_UNIX98_PTYS is not set # CONFIG_HWMON is not set # CONFIG_USB_SUPPORT is not set CONFIG_EXT2_FS=y @@ -62,7 +51,5 @@ CONFIG_EXT2_FS=y CONFIG_ROMFS_FS=y CONFIG_ROMFS_BACKED_BY_MTD=y # CONFIG_NETWORK_FILESYSTEMS is not set -# CONFIG_RCU_CPU_STALL_DETECTOR is not set CONFIG_BOOTPARAM=y CONFIG_BOOTPARAM_STRING="root=/dev/mtdblock0" -# CONFIG_CRC32 is not set diff --git a/arch/m68k/configs/m5272c3_defconfig b/arch/m68k/configs/m5272c3_defconfig index a60cb3509135..6f5fb92f5cbf 100644 --- a/arch/m68k/configs/m5272c3_defconfig +++ b/arch/m68k/configs/m5272c3_defconfig @@ -1,10 +1,6 @@ -# CONFIG_MMU is not set -CONFIG_EXPERIMENTAL=y CONFIG_LOG_BUF_SHIFT=14 -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set -# CONFIG_HOTPLUG is not set # CONFIG_FUTEX is not set # CONFIG_EPOLL is not set # CONFIG_SIGNALFD is not set @@ -16,8 +12,8 @@ CONFIG_EXPERT=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_IOSCHED_DEADLINE is not set # CONFIG_IOSCHED_CFQ is not set +# CONFIG_MMU is not set CONFIG_M5272=y -CONFIG_CLOCK_SET=y CONFIG_M5272C3=y CONFIG_RAMBASE=0x00000000 CONFIG_RAMSIZE=0x00800000 @@ -36,23 +32,18 @@ CONFIG_INET=y # CONFIG_IPV6 is not set # CONFIG_FW_LOADER is not set CONFIG_MTD=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_RAM=y CONFIG_MTD_UCLINUX=y CONFIG_BLK_DEV_RAM=y -# CONFIG_MISC_DEVICES is not set CONFIG_NETDEVICES=y -CONFIG_NET_ETHERNET=y CONFIG_FEC=y -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set # CONFIG_INPUT is not set # CONFIG_SERIO is not set # CONFIG_VT is not set +# CONFIG_UNIX98_PTYS is not set CONFIG_SERIAL_MCF=y CONFIG_SERIAL_MCF_CONSOLE=y -# CONFIG_UNIX98_PTYS is not set # CONFIG_HWMON is not set # CONFIG_USB_SUPPORT is not set CONFIG_EXT2_FS=y @@ -61,6 +52,5 @@ CONFIG_EXT2_FS=y CONFIG_ROMFS_FS=y CONFIG_ROMFS_BACKED_BY_MTD=y # CONFIG_NETWORK_FILESYSTEMS is not set -# CONFIG_RCU_CPU_STALL_DETECTOR is not set CONFIG_BOOTPARAM=y CONFIG_BOOTPARAM_STRING="root=/dev/mtdblock0" diff --git a/arch/m68k/configs/m5275evb_defconfig b/arch/m68k/configs/m5275evb_defconfig index e6502ab7cb2f..b5d7cd1ce856 100644 --- a/arch/m68k/configs/m5275evb_defconfig +++ b/arch/m68k/configs/m5275evb_defconfig @@ -1,10 +1,6 @@ -# CONFIG_MMU is not set -CONFIG_EXPERIMENTAL=y CONFIG_LOG_BUF_SHIFT=14 -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set -# CONFIG_HOTPLUG is not set # CONFIG_FUTEX is not set # CONFIG_EPOLL is not set # CONFIG_SIGNALFD is not set @@ -16,11 +12,8 @@ CONFIG_EXPERT=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_IOSCHED_DEADLINE is not set # CONFIG_IOSCHED_CFQ is not set +# CONFIG_MMU is not set CONFIG_M5275=y -CONFIG_CLOCK_SET=y -CONFIG_CLOCK_FREQ=150000000 -CONFIG_CLOCK_DIV=2 -CONFIG_M5275EVB=y # CONFIG_4KSTACKS is not set CONFIG_RAMBASE=0x00000000 CONFIG_RAMSIZE=0x00000000 @@ -39,24 +32,19 @@ CONFIG_INET=y # CONFIG_IPV6 is not set # CONFIG_FW_LOADER is not set CONFIG_MTD=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_RAM=y CONFIG_MTD_UCLINUX=y CONFIG_BLK_DEV_RAM=y -# CONFIG_MISC_DEVICES is not set CONFIG_NETDEVICES=y -CONFIG_NET_ETHERNET=y CONFIG_FEC=y -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set CONFIG_PPP=y # CONFIG_INPUT is not set # CONFIG_SERIO is not set # CONFIG_VT is not set +# CONFIG_UNIX98_PTYS is not set CONFIG_SERIAL_MCF=y CONFIG_SERIAL_MCF_CONSOLE=y -# CONFIG_UNIX98_PTYS is not set # CONFIG_HWMON is not set # CONFIG_USB_SUPPORT is not set CONFIG_EXT2_FS=y @@ -65,8 +53,5 @@ CONFIG_EXT2_FS=y CONFIG_ROMFS_FS=y CONFIG_ROMFS_BACKED_BY_MTD=y # CONFIG_NETWORK_FILESYSTEMS is not set -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_BOOTPARAM=y CONFIG_BOOTPARAM_STRING="root=/dev/mtdblock0" -# CONFIG_CRC32 is not set diff --git a/arch/m68k/configs/m5307c3_defconfig b/arch/m68k/configs/m5307c3_defconfig index 023812abd2e6..1b4c09461c40 100644 --- a/arch/m68k/configs/m5307c3_defconfig +++ b/arch/m68k/configs/m5307c3_defconfig @@ -1,10 +1,6 @@ -# CONFIG_MMU is not set -CONFIG_EXPERIMENTAL=y CONFIG_LOG_BUF_SHIFT=14 -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set -# CONFIG_HOTPLUG is not set # CONFIG_FUTEX is not set # CONFIG_EPOLL is not set # CONFIG_SIGNALFD is not set @@ -16,10 +12,8 @@ CONFIG_EXPERT=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_IOSCHED_DEADLINE is not set # CONFIG_IOSCHED_CFQ is not set +# CONFIG_MMU is not set CONFIG_M5307=y -CONFIG_CLOCK_SET=y -CONFIG_CLOCK_FREQ=90000000 -CONFIG_CLOCK_DIV=2 CONFIG_M5307C3=y CONFIG_RAMBASE=0x00000000 CONFIG_RAMSIZE=0x00800000 @@ -38,16 +32,11 @@ CONFIG_INET=y # CONFIG_IPV6 is not set # CONFIG_FW_LOADER is not set CONFIG_MTD=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_RAM=y CONFIG_MTD_UCLINUX=y CONFIG_BLK_DEV_RAM=y -# CONFIG_MISC_DEVICES is not set CONFIG_NETDEVICES=y -CONFIG_NET_ETHERNET=y -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set CONFIG_PPP=y CONFIG_SLIP=y CONFIG_SLIP_COMPRESSED=y @@ -56,21 +45,17 @@ CONFIG_SLIP_COMPRESSED=y # CONFIG_INPUT_MOUSE is not set # CONFIG_SERIO is not set # CONFIG_VT is not set +# CONFIG_LEGACY_PTYS is not set CONFIG_SERIAL_MCF=y CONFIG_SERIAL_MCF_CONSOLE=y -# CONFIG_LEGACY_PTYS is not set # CONFIG_HW_RANDOM is not set # CONFIG_HWMON is not set -# CONFIG_HID_SUPPORT is not set # CONFIG_USB_SUPPORT is not set CONFIG_EXT2_FS=y # CONFIG_DNOTIFY is not set CONFIG_ROMFS_FS=y CONFIG_ROMFS_BACKED_BY_MTD=y # CONFIG_NETWORK_FILESYSTEMS is not set -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y -CONFIG_FULLDEBUG=y CONFIG_BOOTPARAM=y CONFIG_BOOTPARAM_STRING="root=/dev/mtdblock0" -# CONFIG_CRC32 is not set +CONFIG_FULLDEBUG=y diff --git a/arch/m68k/configs/m5407c3_defconfig b/arch/m68k/configs/m5407c3_defconfig index 557b39f3be90..275ad543d4bc 100644 --- a/arch/m68k/configs/m5407c3_defconfig +++ b/arch/m68k/configs/m5407c3_defconfig @@ -1,10 +1,6 @@ -# CONFIG_MMU is not set -CONFIG_EXPERIMENTAL=y CONFIG_LOG_BUF_SHIFT=14 -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set -# CONFIG_HOTPLUG is not set # CONFIG_FUTEX is not set # CONFIG_EPOLL is not set # CONFIG_SIGNALFD is not set @@ -17,9 +13,8 @@ CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_IOSCHED_DEADLINE is not set # CONFIG_IOSCHED_CFQ is not set +# CONFIG_MMU is not set CONFIG_M5407=y -CONFIG_CLOCK_SET=y -CONFIG_CLOCK_FREQ=50000000 CONFIG_M5407C3=y CONFIG_RAMBASE=0x00000000 CONFIG_RAMSIZE=0x00000000 @@ -38,22 +33,17 @@ CONFIG_INET=y # CONFIG_IPV6 is not set # CONFIG_FW_LOADER is not set CONFIG_MTD=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_RAM=y CONFIG_MTD_UCLINUX=y CONFIG_BLK_DEV_RAM=y -# CONFIG_MISC_DEVICES is not set CONFIG_NETDEVICES=y -CONFIG_NET_ETHERNET=y -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set CONFIG_PPP=y # CONFIG_INPUT is not set # CONFIG_VT is not set +# CONFIG_UNIX98_PTYS is not set CONFIG_SERIAL_MCF=y CONFIG_SERIAL_MCF_CONSOLE=y -# CONFIG_UNIX98_PTYS is not set # CONFIG_HW_RANDOM is not set # CONFIG_HWMON is not set # CONFIG_USB_SUPPORT is not set @@ -63,8 +53,5 @@ CONFIG_EXT2_FS=y CONFIG_ROMFS_FS=y CONFIG_ROMFS_BACKED_BY_MTD=y # CONFIG_NETWORK_FILESYSTEMS is not set -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_BOOTPARAM=y CONFIG_BOOTPARAM_STRING="root=/dev/mtdblock0" -# CONFIG_CRC32 is not set diff --git a/arch/m68k/configs/m5475evb_defconfig b/arch/m68k/configs/m5475evb_defconfig index c5018a68819b..4f4ccd13c11b 100644 --- a/arch/m68k/configs/m5475evb_defconfig +++ b/arch/m68k/configs/m5475evb_defconfig @@ -1,11 +1,7 @@ -CONFIG_EXPERIMENTAL=y # CONFIG_SWAP is not set CONFIG_LOG_BUF_SHIFT=14 -CONFIG_SYSFS_DEPRECATED=y -CONFIG_SYSFS_DEPRECATED_V2=y CONFIG_SYSCTL_SYSCALL=y # CONFIG_KALLSYMS is not set -# CONFIG_HOTPLUG is not set # CONFIG_FUTEX is not set # CONFIG_EPOLL is not set # CONFIG_SIGNALFD is not set @@ -20,19 +16,16 @@ CONFIG_MODULES=y # CONFIG_IOSCHED_DEADLINE is not set # CONFIG_IOSCHED_CFQ is not set CONFIG_COLDFIRE=y -CONFIG_M547x=y -CONFIG_CLOCK_SET=y -CONFIG_CLOCK_FREQ=266000000 # CONFIG_4KSTACKS is not set CONFIG_RAMBASE=0x0 CONFIG_RAMSIZE=0x2000000 CONFIG_VECTORBASE=0x0 CONFIG_MBAR=0xff000000 CONFIG_KERNELBASE=0x20000 +CONFIG_PCI=y # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set # CONFIG_FW_LOADER is not set CONFIG_MTD=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_JEDECPROBE=y diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild index 1555bc189c7d..eb85bd9c6180 100644 --- a/arch/m68k/include/asm/Kbuild +++ b/arch/m68k/include/asm/Kbuild @@ -18,6 +18,7 @@ generic-y += kvm_para.h generic-y += local.h generic-y += local64.h generic-y += mcs_spinlock.h +generic-y += mm-arch-hooks.h generic-y += mman.h generic-y += mutex.h generic-y += percpu.h diff --git a/arch/m68k/include/asm/coldfire.h b/arch/m68k/include/asm/coldfire.h index c94557b91448..50aa4dac9ca2 100644 --- a/arch/m68k/include/asm/coldfire.h +++ b/arch/m68k/include/asm/coldfire.h @@ -19,7 +19,7 @@ * in any case new boards come along from time to time that have yet * another different clocking frequency. */ -#ifdef CONFIG_CLOCK_SET +#ifdef CONFIG_CLOCK_FREQ #define MCF_CLK CONFIG_CLOCK_FREQ #else #error "Don't know what your ColdFire CPU clock frequency is??" diff --git a/arch/m68k/include/asm/io_mm.h b/arch/m68k/include/asm/io_mm.h index 618c85d3c786..f55cad529400 100644 --- a/arch/m68k/include/asm/io_mm.h +++ b/arch/m68k/include/asm/io_mm.h @@ -413,7 +413,8 @@ static inline void isa_delay(void) #define writew(val, addr) out_le16((addr), (val)) #endif /* CONFIG_ATARI_ROM_ISA */ -#if !defined(CONFIG_ISA) && !defined(CONFIG_ATARI_ROM_ISA) +#if !defined(CONFIG_ISA) && !defined(CONFIG_ATARI_ROM_ISA) && \ + !(defined(CONFIG_PCI) && defined(CONFIG_COLDFIRE)) /* * We need to define dummy functions for GENERIC_IOMAP support. */ diff --git a/arch/m68k/include/asm/mm-arch-hooks.h b/arch/m68k/include/asm/mm-arch-hooks.h deleted file mode 100644 index 7e8709bc90ae..000000000000 --- a/arch/m68k/include/asm/mm-arch-hooks.h +++ /dev/null @@ -1,15 +0,0 @@ -/* - * Architecture specific mm hooks - * - * Copyright (C) 2015, IBM Corporation - * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#ifndef _ASM_M68K_MM_ARCH_HOOKS_H -#define _ASM_M68K_MM_ARCH_HOOKS_H - -#endif /* _ASM_M68K_MM_ARCH_HOOKS_H */ diff --git a/arch/metag/include/asm/Kbuild b/arch/metag/include/asm/Kbuild index 199320f3c345..df31353fd200 100644 --- a/arch/metag/include/asm/Kbuild +++ b/arch/metag/include/asm/Kbuild @@ -25,6 +25,7 @@ generic-y += kvm_para.h generic-y += local.h generic-y += local64.h generic-y += mcs_spinlock.h +generic-y += mm-arch-hooks.h generic-y += msgbuf.h generic-y += mutex.h generic-y += param.h diff --git a/arch/metag/include/asm/mm-arch-hooks.h b/arch/metag/include/asm/mm-arch-hooks.h deleted file mode 100644 index b0072b2eb0de..000000000000 --- a/arch/metag/include/asm/mm-arch-hooks.h +++ /dev/null @@ -1,15 +0,0 @@ -/* - * Architecture specific mm hooks - * - * Copyright (C) 2015, IBM Corporation - * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#ifndef _ASM_METAG_MM_ARCH_HOOKS_H -#define _ASM_METAG_MM_ARCH_HOOKS_H - -#endif /* _ASM_METAG_MM_ARCH_HOOKS_H */ diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild index 9989ddb169ca..2f222f355c4b 100644 --- a/arch/microblaze/include/asm/Kbuild +++ b/arch/microblaze/include/asm/Kbuild @@ -6,6 +6,7 @@ generic-y += device.h generic-y += exec.h generic-y += irq_work.h generic-y += mcs_spinlock.h +generic-y += mm-arch-hooks.h generic-y += preempt.h generic-y += syscalls.h generic-y += trace_clock.h diff --git a/arch/microblaze/include/asm/mm-arch-hooks.h b/arch/microblaze/include/asm/mm-arch-hooks.h deleted file mode 100644 index 5c4065911bda..000000000000 --- a/arch/microblaze/include/asm/mm-arch-hooks.h +++ /dev/null @@ -1,15 +0,0 @@ -/* - * Architecture specific mm hooks - * - * Copyright (C) 2015, IBM Corporation - * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#ifndef _ASM_MICROBLAZE_MM_ARCH_HOOKS_H -#define _ASM_MICROBLAZE_MM_ARCH_HOOKS_H - -#endif /* _ASM_MICROBLAZE_MM_ARCH_HOOKS_H */ diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index aab7e46cadd5..cee5f93e5712 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -1427,6 +1427,7 @@ config CPU_MIPS64_R6 select CPU_SUPPORTS_HIGHMEM select CPU_SUPPORTS_MSA select GENERIC_CSUM + select MIPS_O32_FP64_SUPPORT if MIPS32_O32 help Choose this option to build a kernel for release 6 or later of the MIPS64 architecture. New MIPS processors, starting with the Warrior @@ -2262,11 +2263,6 @@ config MIPS_CM config MIPS_CPC bool -config SB1_PASS_1_WORKAROUNDS - bool - depends on CPU_SB1_PASS_1 - default y - config SB1_PASS_2_WORKAROUNDS bool depends on CPU_SB1 && (CPU_SB1_PASS_2_2 || CPU_SB1_PASS_2) diff --git a/arch/mips/Makefile b/arch/mips/Makefile index ae2dd59050f7..252e347958f3 100644 --- a/arch/mips/Makefile +++ b/arch/mips/Makefile @@ -181,13 +181,6 @@ cflags-$(CONFIG_CPU_R4000_WORKAROUNDS) += $(call cc-option,-mfix-r4000,) cflags-$(CONFIG_CPU_R4400_WORKAROUNDS) += $(call cc-option,-mfix-r4400,) cflags-$(CONFIG_CPU_DADDI_WORKAROUNDS) += $(call cc-option,-mno-daddi,) -ifdef CONFIG_CPU_SB1 -ifdef CONFIG_SB1_PASS_1_WORKAROUNDS -KBUILD_AFLAGS_MODULE += -msb1-pass1-workarounds -KBUILD_CFLAGS_MODULE += -msb1-pass1-workarounds -endif -endif - # For smartmips configurations, there are hundreds of warnings due to ISA overrides # in assembly and header files. smartmips is only supported for MIPS32r1 onwards # and there is no support for 64-bit. Various '.set mips2' or '.set mips3' or diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild index 7fe5c61a3cb8..1f8546081d20 100644 --- a/arch/mips/include/asm/Kbuild +++ b/arch/mips/include/asm/Kbuild @@ -7,6 +7,7 @@ generic-y += emergency-restart.h generic-y += irq_work.h generic-y += local64.h generic-y += mcs_spinlock.h +generic-y += mm-arch-hooks.h generic-y += mutex.h generic-y += parport.h generic-y += percpu.h diff --git a/arch/mips/include/asm/fpu.h b/arch/mips/include/asm/fpu.h index 084780b355aa..1b0625189835 100644 --- a/arch/mips/include/asm/fpu.h +++ b/arch/mips/include/asm/fpu.h @@ -74,7 +74,7 @@ static inline int __enable_fpu(enum fpu_mode mode) goto fr_common; case FPU_64BIT: -#if !(defined(CONFIG_CPU_MIPS32_R2) || defined(CONFIG_CPU_MIPS32_R6) \ +#if !(defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6) \ || defined(CONFIG_64BIT)) /* we only have a 32-bit FPU */ return SIGFPE; diff --git a/arch/mips/include/asm/mach-sibyte/war.h b/arch/mips/include/asm/mach-sibyte/war.h index 0a227d426b9c..520f8fc2c806 100644 --- a/arch/mips/include/asm/mach-sibyte/war.h +++ b/arch/mips/include/asm/mach-sibyte/war.h @@ -13,8 +13,7 @@ #define R4600_V2_HIT_CACHEOP_WAR 0 #define R5432_CP0_INTERRUPT_WAR 0 -#if defined(CONFIG_SB1_PASS_1_WORKAROUNDS) || \ - defined(CONFIG_SB1_PASS_2_WORKAROUNDS) +#if defined(CONFIG_SB1_PASS_2_WORKAROUNDS) #ifndef __ASSEMBLY__ extern int sb1250_m3_workaround_needed(void); diff --git a/arch/mips/include/asm/mm-arch-hooks.h b/arch/mips/include/asm/mm-arch-hooks.h deleted file mode 100644 index b5609fe8e475..000000000000 --- a/arch/mips/include/asm/mm-arch-hooks.h +++ /dev/null @@ -1,15 +0,0 @@ -/* - * Architecture specific mm hooks - * - * Copyright (C) 2015, IBM Corporation - * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#ifndef _ASM_MIPS_MM_ARCH_HOOKS_H -#define _ASM_MIPS_MM_ARCH_HOOKS_H - -#endif /* _ASM_MIPS_MM_ARCH_HOOKS_H */ diff --git a/arch/mips/include/uapi/asm/sigcontext.h b/arch/mips/include/uapi/asm/sigcontext.h index 6c9906f59c6e..9081d88ae44f 100644 --- a/arch/mips/include/uapi/asm/sigcontext.h +++ b/arch/mips/include/uapi/asm/sigcontext.h @@ -16,7 +16,7 @@ /* * Keep this struct definition in sync with the sigcontext fragment - * in arch/mips/tools/offset.c + * in arch/mips/kernel/asm-offsets.c */ struct sigcontext { unsigned int sc_regmask; /* Unused */ @@ -46,7 +46,7 @@ struct sigcontext { #include <linux/posix_types.h> /* * Keep this struct definition in sync with the sigcontext fragment - * in arch/mips/tools/offset.c + * in arch/mips/kernel/asm-offsets.c * * Warning: this structure illdefined with sc_badvaddr being just an unsigned * int so it was changed to unsigned long in 2.6.0-test1. This may break diff --git a/arch/mips/kernel/asm-offsets.c b/arch/mips/kernel/asm-offsets.c index beabe19ff8e5..072fab13645d 100644 --- a/arch/mips/kernel/asm-offsets.c +++ b/arch/mips/kernel/asm-offsets.c @@ -1,5 +1,5 @@ /* - * offset.c: Calculate pt_regs and task_struct offsets. + * asm-offsets.c: Calculate pt_regs and task_struct offsets. * * Copyright (C) 1996 David S. Miller * Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003 Ralf Baechle diff --git a/arch/mips/sibyte/Kconfig b/arch/mips/sibyte/Kconfig index a8bb972fd9fd..cb9a095f5c5e 100644 --- a/arch/mips/sibyte/Kconfig +++ b/arch/mips/sibyte/Kconfig @@ -81,11 +81,6 @@ choice prompt "SiByte SOC Stepping" depends on SIBYTE_SB1xxx_SOC -config CPU_SB1_PASS_1 - bool "1250 Pass1" - depends on SIBYTE_SB1250 - select CPU_HAS_PREFETCH - config CPU_SB1_PASS_2_1250 bool "1250 An" depends on SIBYTE_SB1250 diff --git a/arch/mips/sibyte/common/bus_watcher.c b/arch/mips/sibyte/common/bus_watcher.c index 5581844c9194..41a1d2242211 100644 --- a/arch/mips/sibyte/common/bus_watcher.c +++ b/arch/mips/sibyte/common/bus_watcher.c @@ -81,10 +81,7 @@ void check_bus_watcher(void) { u32 status, l2_err, memio_err; -#ifdef CONFIG_SB1_PASS_1_WORKAROUNDS - /* Destructive read, clears register and interrupt */ - status = csr_in32(IOADDR(A_SCD_BUS_ERR_STATUS)); -#elif defined(CONFIG_SIBYTE_BCM112X) || defined(CONFIG_SIBYTE_SB1250) +#if defined(CONFIG_SIBYTE_BCM112X) || defined(CONFIG_SIBYTE_SB1250) /* Use non-destructive register */ status = csr_in32(IOADDR(A_SCD_BUS_ERR_STATUS_DEBUG)); #elif defined(CONFIG_SIBYTE_BCM1x55) || defined(CONFIG_SIBYTE_BCM1x80) diff --git a/arch/mips/sibyte/sb1250/setup.c b/arch/mips/sibyte/sb1250/setup.c index 3c02b2a77ae9..9d3c24efdf4a 100644 --- a/arch/mips/sibyte/sb1250/setup.c +++ b/arch/mips/sibyte/sb1250/setup.c @@ -202,12 +202,10 @@ void __init sb1250_setup(void) switch (war_pass) { case K_SYS_REVISION_BCM1250_PASS1: -#ifndef CONFIG_SB1_PASS_1_WORKAROUNDS printk("@@@@ This is a BCM1250 A0-A2 (Pass 1) board, " "and the kernel doesn't have the proper " "workarounds compiled in. @@@@\n"); bad_config = 1; -#endif break; case K_SYS_REVISION_BCM1250_PASS2: /* Pass 2 - easiest as default for now - so many numbers */ diff --git a/arch/mn10300/include/asm/Kbuild b/arch/mn10300/include/asm/Kbuild index de30b0c88796..6edb9ee6128e 100644 --- a/arch/mn10300/include/asm/Kbuild +++ b/arch/mn10300/include/asm/Kbuild @@ -5,6 +5,7 @@ generic-y += cputime.h generic-y += exec.h generic-y += irq_work.h generic-y += mcs_spinlock.h +generic-y += mm-arch-hooks.h generic-y += preempt.h generic-y += sections.h generic-y += trace_clock.h diff --git a/arch/mn10300/include/asm/mm-arch-hooks.h b/arch/mn10300/include/asm/mm-arch-hooks.h deleted file mode 100644 index e2029a652f4c..000000000000 --- a/arch/mn10300/include/asm/mm-arch-hooks.h +++ /dev/null @@ -1,15 +0,0 @@ -/* - * Architecture specific mm hooks - * - * Copyright (C) 2015, IBM Corporation - * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#ifndef _ASM_MN10300_MM_ARCH_HOOKS_H -#define _ASM_MN10300_MM_ARCH_HOOKS_H - -#endif /* _ASM_MN10300_MM_ARCH_HOOKS_H */ diff --git a/arch/nios2/include/asm/Kbuild b/arch/nios2/include/asm/Kbuild index 434639d510b3..914864eb5a25 100644 --- a/arch/nios2/include/asm/Kbuild +++ b/arch/nios2/include/asm/Kbuild @@ -30,6 +30,7 @@ generic-y += kmap_types.h generic-y += kvm_para.h generic-y += local.h generic-y += mcs_spinlock.h +generic-y += mm-arch-hooks.h generic-y += mman.h generic-y += module.h generic-y += msgbuf.h diff --git a/arch/nios2/include/asm/mm-arch-hooks.h b/arch/nios2/include/asm/mm-arch-hooks.h deleted file mode 100644 index d7290dc68558..000000000000 --- a/arch/nios2/include/asm/mm-arch-hooks.h +++ /dev/null @@ -1,15 +0,0 @@ -/* - * Architecture specific mm hooks - * - * Copyright (C) 2015, IBM Corporation - * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#ifndef _ASM_NIOS2_MM_ARCH_HOOKS_H -#define _ASM_NIOS2_MM_ARCH_HOOKS_H - -#endif /* _ASM_NIOS2_MM_ARCH_HOOKS_H */ diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig index e5a693b16da2..443f44de1020 100644 --- a/arch/openrisc/Kconfig +++ b/arch/openrisc/Kconfig @@ -17,6 +17,7 @@ config OPENRISC select GENERIC_IRQ_SHOW select GENERIC_IOMAP select GENERIC_CPU_DEVICES + select HAVE_UID16 select GENERIC_ATOMIC64 select GENERIC_CLOCKEVENTS select GENERIC_STRNCPY_FROM_USER @@ -31,9 +32,6 @@ config MMU config HAVE_DMA_ATTRS def_bool y -config UID16 - def_bool y - config RWSEM_GENERIC_SPINLOCK def_bool y diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild index 2a2e39b8109a..2832f031fb11 100644 --- a/arch/openrisc/include/asm/Kbuild +++ b/arch/openrisc/include/asm/Kbuild @@ -36,6 +36,7 @@ generic-y += kmap_types.h generic-y += kvm_para.h generic-y += local.h generic-y += mcs_spinlock.h +generic-y += mm-arch-hooks.h generic-y += mman.h generic-y += module.h generic-y += msgbuf.h diff --git a/arch/openrisc/include/asm/mm-arch-hooks.h b/arch/openrisc/include/asm/mm-arch-hooks.h deleted file mode 100644 index 6d33cb555fe1..000000000000 --- a/arch/openrisc/include/asm/mm-arch-hooks.h +++ /dev/null @@ -1,15 +0,0 @@ -/* - * Architecture specific mm hooks - * - * Copyright (C) 2015, IBM Corporation - * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#ifndef _ASM_OPENRISC_MM_ARCH_HOOKS_H -#define _ASM_OPENRISC_MM_ARCH_HOOKS_H - -#endif /* _ASM_OPENRISC_MM_ARCH_HOOKS_H */ diff --git a/arch/parisc/include/asm/Kbuild b/arch/parisc/include/asm/Kbuild index 12b341d04f88..f9b3a81aefcd 100644 --- a/arch/parisc/include/asm/Kbuild +++ b/arch/parisc/include/asm/Kbuild @@ -15,6 +15,7 @@ generic-y += kvm_para.h generic-y += local.h generic-y += local64.h generic-y += mcs_spinlock.h +generic-y += mm-arch-hooks.h generic-y += mutex.h generic-y += param.h generic-y += percpu.h diff --git a/arch/parisc/include/asm/mm-arch-hooks.h b/arch/parisc/include/asm/mm-arch-hooks.h deleted file mode 100644 index 654ec63b0ee9..000000000000 --- a/arch/parisc/include/asm/mm-arch-hooks.h +++ /dev/null @@ -1,15 +0,0 @@ -/* - * Architecture specific mm hooks - * - * Copyright (C) 2015, IBM Corporation - * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#ifndef _ASM_PARISC_MM_ARCH_HOOKS_H -#define _ASM_PARISC_MM_ARCH_HOOKS_H - -#endif /* _ASM_PARISC_MM_ARCH_HOOKS_H */ diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h index 3a08eae3318f..3edbb9fc91b4 100644 --- a/arch/parisc/include/asm/pgalloc.h +++ b/arch/parisc/include/asm/pgalloc.h @@ -72,7 +72,7 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) { - if(pmd_flag(*pmd) & PxD_FLAG_ATTACHED) + if (pmd_flag(*pmd) & PxD_FLAG_ATTACHED) { /* * This is the permanent pmd attached to the pgd; * cannot free it. @@ -81,6 +81,7 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) */ mm_inc_nr_pmds(mm); return; + } free_pages((unsigned long)pmd, PMD_ORDER); } diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index b91e74a817d8..9fac01cb89c1 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -158,6 +158,7 @@ extern pfn_t kvmppc_gpa_to_pfn(struct kvm_vcpu *vcpu, gpa_t gpa, bool writing, bool *writable); extern void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev, unsigned long *rmap, long pte_index, int realmode); +extern void kvmppc_update_rmap_change(unsigned long *rmap, unsigned long psize); extern void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep, unsigned long pte_index); void kvmppc_clear_ref_hpte(struct kvm *kvm, __be64 *hptep, @@ -225,12 +226,12 @@ static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu) return vcpu->arch.cr; } -static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, u32 val) +static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, ulong val) { vcpu->arch.xer = val; } -static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu) +static inline ulong kvmppc_get_xer(struct kvm_vcpu *vcpu) { return vcpu->arch.xer; } diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h index 5bdfb5dd3400..72b6225aca73 100644 --- a/arch/powerpc/include/asm/kvm_book3s_asm.h +++ b/arch/powerpc/include/asm/kvm_book3s_asm.h @@ -25,6 +25,12 @@ #define XICS_MFRR 0xc #define XICS_IPI 2 /* interrupt source # for IPIs */ +/* Maximum number of threads per physical core */ +#define MAX_SMT_THREADS 8 + +/* Maximum number of subcores per physical core */ +#define MAX_SUBCORES 4 + #ifdef __ASSEMBLY__ #ifdef CONFIG_KVM_BOOK3S_HANDLER @@ -65,6 +71,19 @@ kvmppc_resume_\intno: #else /*__ASSEMBLY__ */ +struct kvmppc_vcore; + +/* Struct used for coordinating micro-threading (split-core) mode changes */ +struct kvm_split_mode { + unsigned long rpr; + unsigned long pmmar; + unsigned long ldbar; + u8 subcore_size; + u8 do_nap; + u8 napped[MAX_SMT_THREADS]; + struct kvmppc_vcore *master_vcs[MAX_SUBCORES]; +}; + /* * This struct goes in the PACA on 64-bit processors. It is used * to store host state that needs to be saved when we enter a guest @@ -100,6 +119,7 @@ struct kvmppc_host_state { u64 host_spurr; u64 host_dscr; u64 dec_expires; + struct kvm_split_mode *kvm_split_mode; #endif #ifdef CONFIG_PPC_BOOK3S_64 u64 cfar; @@ -112,7 +132,7 @@ struct kvmppc_book3s_shadow_vcpu { bool in_use; ulong gpr[14]; u32 cr; - u32 xer; + ulong xer; ulong ctr; ulong lr; ulong pc; diff --git a/arch/powerpc/include/asm/kvm_booke.h b/arch/powerpc/include/asm/kvm_booke.h index 3286f0d6a86c..bc6e29e4dfd4 100644 --- a/arch/powerpc/include/asm/kvm_booke.h +++ b/arch/powerpc/include/asm/kvm_booke.h @@ -54,12 +54,12 @@ static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu) return vcpu->arch.cr; } -static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, u32 val) +static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, ulong val) { vcpu->arch.xer = val; } -static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu) +static inline ulong kvmppc_get_xer(struct kvm_vcpu *vcpu) { return vcpu->arch.xer; } diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index d91f65b28e32..e187b6a56e7e 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -205,8 +205,10 @@ struct revmap_entry { */ #define KVMPPC_RMAP_LOCK_BIT 63 #define KVMPPC_RMAP_RC_SHIFT 32 +#define KVMPPC_RMAP_CHG_SHIFT 48 #define KVMPPC_RMAP_REFERENCED (HPTE_R_R << KVMPPC_RMAP_RC_SHIFT) #define KVMPPC_RMAP_CHANGED (HPTE_R_C << KVMPPC_RMAP_RC_SHIFT) +#define KVMPPC_RMAP_CHG_ORDER (0x3ful << KVMPPC_RMAP_CHG_SHIFT) #define KVMPPC_RMAP_PRESENT 0x100000000ul #define KVMPPC_RMAP_INDEX 0xfffffffful @@ -278,7 +280,9 @@ struct kvmppc_vcore { u16 last_cpu; u8 vcore_state; u8 in_guest; + struct kvmppc_vcore *master_vcore; struct list_head runnable_threads; + struct list_head preempt_list; spinlock_t lock; wait_queue_head_t wq; spinlock_t stoltb_lock; /* protects stolen_tb and preempt_tb */ @@ -300,12 +304,21 @@ struct kvmppc_vcore { #define VCORE_EXIT_MAP(vc) ((vc)->entry_exit_map >> 8) #define VCORE_IS_EXITING(vc) (VCORE_EXIT_MAP(vc) != 0) -/* Values for vcore_state */ +/* This bit is used when a vcore exit is triggered from outside the vcore */ +#define VCORE_EXIT_REQ 0x10000 + +/* + * Values for vcore_state. + * Note that these are arranged such that lower values + * (< VCORE_SLEEPING) don't require stolen time accounting + * on load/unload, and higher values do. + */ #define VCORE_INACTIVE 0 -#define VCORE_SLEEPING 1 -#define VCORE_PREEMPT 2 -#define VCORE_RUNNING 3 -#define VCORE_EXITING 4 +#define VCORE_PREEMPT 1 +#define VCORE_PIGGYBACK 2 +#define VCORE_SLEEPING 3 +#define VCORE_RUNNING 4 +#define VCORE_EXITING 5 /* * Struct used to manage memory for a virtual processor area @@ -619,6 +632,7 @@ struct kvm_vcpu_arch { int trap; int state; int ptid; + int thread_cpu; bool timer_running; wait_queue_head_t cpu_run; diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index 8452335661a5..790f5d1d9a46 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -287,7 +287,7 @@ /* POWER8 Micro Partition Prefetch (MPP) parameters */ /* Address mask is common for LOGMPP instruction and MPPR SPR */ -#define PPC_MPPE_ADDRESS_MASK 0xffffffffc000 +#define PPC_MPPE_ADDRESS_MASK 0xffffffffc000ULL /* Bits 60 and 61 of MPP SPR should be set to one of the following */ /* Aborting the fetch is indeed setting 00 in the table size bits */ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 98230579d99c..de62392f093c 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -512,6 +512,8 @@ int main(void) DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr)); DEFINE(VCPU_VPA_DIRTY, offsetof(struct kvm_vcpu, arch.vpa.dirty)); DEFINE(VCPU_HEIR, offsetof(struct kvm_vcpu, arch.emul_inst)); + DEFINE(VCPU_CPU, offsetof(struct kvm_vcpu, cpu)); + DEFINE(VCPU_THREAD_CPU, offsetof(struct kvm_vcpu, arch.thread_cpu)); #endif #ifdef CONFIG_PPC_BOOK3S DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id)); @@ -674,7 +676,14 @@ int main(void) HSTATE_FIELD(HSTATE_DSCR, host_dscr); HSTATE_FIELD(HSTATE_DABR, dabr); HSTATE_FIELD(HSTATE_DECEXP, dec_expires); + HSTATE_FIELD(HSTATE_SPLIT_MODE, kvm_split_mode); DEFINE(IPI_PRIORITY, IPI_PRIORITY); + DEFINE(KVM_SPLIT_RPR, offsetof(struct kvm_split_mode, rpr)); + DEFINE(KVM_SPLIT_PMMAR, offsetof(struct kvm_split_mode, pmmar)); + DEFINE(KVM_SPLIT_LDBAR, offsetof(struct kvm_split_mode, ldbar)); + DEFINE(KVM_SPLIT_SIZE, offsetof(struct kvm_split_mode, subcore_size)); + DEFINE(KVM_SPLIT_DO_NAP, offsetof(struct kvm_split_mode, do_nap)); + DEFINE(KVM_SPLIT_NAPPED, offsetof(struct kvm_split_mode, napped)); #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ #ifdef CONFIG_PPC_BOOK3S_64 diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index 3caec2c42105..c2024ac9d4e8 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -74,14 +74,14 @@ config KVM_BOOK3S_64 If unsure, say N. config KVM_BOOK3S_64_HV - tristate "KVM support for POWER7 and PPC970 using hypervisor mode in host" + tristate "KVM for POWER7 and later using hypervisor mode in host" depends on KVM_BOOK3S_64 && PPC_POWERNV select KVM_BOOK3S_HV_POSSIBLE select MMU_NOTIFIER select CMA ---help--- Support running unmodified book3s_64 guest kernels in - virtual machines on POWER7 and PPC970 processors that have + virtual machines on POWER7 and newer processors that have hypervisor mode available to the host. If you say Y here, KVM will use the hardware virtualization @@ -89,8 +89,8 @@ config KVM_BOOK3S_64_HV guest operating systems will run at full hardware speed using supervisor and user modes. However, this also means that KVM is not usable under PowerVM (pHyp), is only usable - on POWER7 (or later) processors and PPC970-family processors, - and cannot emulate a different processor from the host processor. + on POWER7 or later processors, and cannot emulate a + different processor from the host processor. If unsure, say N. diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 05ea8fc7f829..53285d52e381 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -240,7 +240,8 @@ void kvmppc_core_queue_inst_storage(struct kvm_vcpu *vcpu, ulong flags) kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_INST_STORAGE); } -int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority) +static int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, + unsigned int priority) { int deliver = 1; int vec = 0; diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c index 2035d16a9262..d5c9bfeb0c9c 100644 --- a/arch/powerpc/kvm/book3s_32_mmu_host.c +++ b/arch/powerpc/kvm/book3s_32_mmu_host.c @@ -26,6 +26,7 @@ #include <asm/machdep.h> #include <asm/mmu_context.h> #include <asm/hw_irq.h> +#include "book3s.h" /* #define DEBUG_MMU */ /* #define DEBUG_SR */ diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c index b982d925c710..79ad35abd196 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_host.c +++ b/arch/powerpc/kvm/book3s_64_mmu_host.c @@ -28,6 +28,7 @@ #include <asm/mmu_context.h> #include <asm/hw_irq.h> #include "trace_pr.h" +#include "book3s.h" #define PTE_SIZE 12 diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index dab68b7af3f2..1f9c0a17f445 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -761,6 +761,8 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, /* Harvest R and C */ rcbits = be64_to_cpu(hptep[1]) & (HPTE_R_R | HPTE_R_C); *rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT; + if (rcbits & HPTE_R_C) + kvmppc_update_rmap_change(rmapp, psize); if (rcbits & ~rev[i].guest_rpte) { rev[i].guest_rpte = ptel | rcbits; note_hpte_modification(kvm, &rev[i]); @@ -927,8 +929,12 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp) retry: lock_rmap(rmapp); if (*rmapp & KVMPPC_RMAP_CHANGED) { - *rmapp &= ~KVMPPC_RMAP_CHANGED; + long change_order = (*rmapp & KVMPPC_RMAP_CHG_ORDER) + >> KVMPPC_RMAP_CHG_SHIFT; + *rmapp &= ~(KVMPPC_RMAP_CHANGED | KVMPPC_RMAP_CHG_ORDER); npages_dirty = 1; + if (change_order > PAGE_SHIFT) + npages_dirty = 1ul << (change_order - PAGE_SHIFT); } if (!(*rmapp & KVMPPC_RMAP_PRESENT)) { unlock_rmap(rmapp); diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c index 5a2bc4b0dfe5..2afdb9c0937d 100644 --- a/arch/powerpc/kvm/book3s_emulate.c +++ b/arch/powerpc/kvm/book3s_emulate.c @@ -23,6 +23,7 @@ #include <asm/reg.h> #include <asm/switch_to.h> #include <asm/time.h> +#include "book3s.h" #define OP_19_XOP_RFID 18 #define OP_19_XOP_RFI 50 diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 68d067ad4222..fad52f226c12 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -81,6 +81,12 @@ static DECLARE_BITMAP(default_enabled_hcalls, MAX_HCALL_OPCODE/4 + 1); #define MPP_BUFFER_ORDER 3 #endif +static int dynamic_mt_modes = 6; +module_param(dynamic_mt_modes, int, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(dynamic_mt_modes, "Set of allowed dynamic micro-threading modes: 0 (= none), 2, 4, or 6 (= 2 or 4)"); +static int target_smt_mode; +module_param(target_smt_mode, int, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(target_smt_mode, "Target threads per core (0 = max)"); static void kvmppc_end_cede(struct kvm_vcpu *vcpu); static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); @@ -114,7 +120,7 @@ static bool kvmppc_ipi_thread(int cpu) static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu) { - int cpu = vcpu->cpu; + int cpu; wait_queue_head_t *wqp; wqp = kvm_arch_vcpu_wq(vcpu); @@ -123,10 +129,11 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu) ++vcpu->stat.halt_wakeup; } - if (kvmppc_ipi_thread(cpu + vcpu->arch.ptid)) + if (kvmppc_ipi_thread(vcpu->arch.thread_cpu)) return; /* CPU points to the first thread of the core */ + cpu = vcpu->cpu; if (cpu >= 0 && cpu < nr_cpu_ids && cpu_online(cpu)) smp_send_reschedule(cpu); } @@ -164,6 +171,27 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu) * they should never fail.) */ +static void kvmppc_core_start_stolen(struct kvmppc_vcore *vc) +{ + unsigned long flags; + + spin_lock_irqsave(&vc->stoltb_lock, flags); + vc->preempt_tb = mftb(); + spin_unlock_irqrestore(&vc->stoltb_lock, flags); +} + +static void kvmppc_core_end_stolen(struct kvmppc_vcore *vc) +{ + unsigned long flags; + + spin_lock_irqsave(&vc->stoltb_lock, flags); + if (vc->preempt_tb != TB_NIL) { + vc->stolen_tb += mftb() - vc->preempt_tb; + vc->preempt_tb = TB_NIL; + } + spin_unlock_irqrestore(&vc->stoltb_lock, flags); +} + static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu) { struct kvmppc_vcore *vc = vcpu->arch.vcore; @@ -175,14 +203,9 @@ static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu) * vcpu, and once it is set to this vcpu, only this task * ever sets it to NULL. */ - if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE) { - spin_lock_irqsave(&vc->stoltb_lock, flags); - if (vc->preempt_tb != TB_NIL) { - vc->stolen_tb += mftb() - vc->preempt_tb; - vc->preempt_tb = TB_NIL; - } - spin_unlock_irqrestore(&vc->stoltb_lock, flags); - } + if (vc->runner == vcpu && vc->vcore_state >= VCORE_SLEEPING) + kvmppc_core_end_stolen(vc); + spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags); if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST && vcpu->arch.busy_preempt != TB_NIL) { @@ -197,11 +220,9 @@ static void kvmppc_core_vcpu_put_hv(struct kvm_vcpu *vcpu) struct kvmppc_vcore *vc = vcpu->arch.vcore; unsigned long flags; - if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE) { - spin_lock_irqsave(&vc->stoltb_lock, flags); - vc->preempt_tb = mftb(); - spin_unlock_irqrestore(&vc->stoltb_lock, flags); - } + if (vc->runner == vcpu && vc->vcore_state >= VCORE_SLEEPING) + kvmppc_core_start_stolen(vc); + spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags); if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST) vcpu->arch.busy_preempt = mftb(); @@ -214,12 +235,12 @@ static void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr) kvmppc_end_cede(vcpu); } -void kvmppc_set_pvr_hv(struct kvm_vcpu *vcpu, u32 pvr) +static void kvmppc_set_pvr_hv(struct kvm_vcpu *vcpu, u32 pvr) { vcpu->arch.pvr = pvr; } -int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat) +static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat) { unsigned long pcr = 0; struct kvmppc_vcore *vc = vcpu->arch.vcore; @@ -259,7 +280,7 @@ int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat) return 0; } -void kvmppc_dump_regs(struct kvm_vcpu *vcpu) +static void kvmppc_dump_regs(struct kvm_vcpu *vcpu) { int r; @@ -292,7 +313,7 @@ void kvmppc_dump_regs(struct kvm_vcpu *vcpu) vcpu->arch.last_inst); } -struct kvm_vcpu *kvmppc_find_vcpu(struct kvm *kvm, int id) +static struct kvm_vcpu *kvmppc_find_vcpu(struct kvm *kvm, int id) { int r; struct kvm_vcpu *v, *ret = NULL; @@ -641,7 +662,8 @@ static int kvm_arch_vcpu_yield_to(struct kvm_vcpu *target) spin_lock(&vcore->lock); if (target->arch.state == KVMPPC_VCPU_RUNNABLE && - vcore->vcore_state != VCORE_INACTIVE) + vcore->vcore_state != VCORE_INACTIVE && + vcore->runner) target = vcore->runner; spin_unlock(&vcore->lock); @@ -1431,6 +1453,7 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core) vcore->lpcr = kvm->arch.lpcr; vcore->first_vcpuid = core * threads_per_subcore; vcore->kvm = kvm; + INIT_LIST_HEAD(&vcore->preempt_list); vcore->mpp_buffer_is_valid = false; @@ -1655,6 +1678,7 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm, spin_unlock(&vcore->lock); vcpu->arch.vcore = vcore; vcpu->arch.ptid = vcpu->vcpu_id - vcore->first_vcpuid; + vcpu->arch.thread_cpu = -1; vcpu->arch.cpu_type = KVM_CPU_3S_64; kvmppc_sanity_check(vcpu); @@ -1749,6 +1773,7 @@ static int kvmppc_grab_hwthread(int cpu) /* Ensure the thread won't go into the kernel if it wakes */ tpaca->kvm_hstate.kvm_vcpu = NULL; + tpaca->kvm_hstate.kvm_vcore = NULL; tpaca->kvm_hstate.napping = 0; smp_wmb(); tpaca->kvm_hstate.hwthread_req = 1; @@ -1780,26 +1805,32 @@ static void kvmppc_release_hwthread(int cpu) tpaca = &paca[cpu]; tpaca->kvm_hstate.hwthread_req = 0; tpaca->kvm_hstate.kvm_vcpu = NULL; + tpaca->kvm_hstate.kvm_vcore = NULL; + tpaca->kvm_hstate.kvm_split_mode = NULL; } -static void kvmppc_start_thread(struct kvm_vcpu *vcpu) +static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc) { int cpu; struct paca_struct *tpaca; - struct kvmppc_vcore *vc = vcpu->arch.vcore; + struct kvmppc_vcore *mvc = vc->master_vcore; - if (vcpu->arch.timer_running) { - hrtimer_try_to_cancel(&vcpu->arch.dec_timer); - vcpu->arch.timer_running = 0; + cpu = vc->pcpu; + if (vcpu) { + if (vcpu->arch.timer_running) { + hrtimer_try_to_cancel(&vcpu->arch.dec_timer); + vcpu->arch.timer_running = 0; + } + cpu += vcpu->arch.ptid; + vcpu->cpu = mvc->pcpu; + vcpu->arch.thread_cpu = cpu; } - cpu = vc->pcpu + vcpu->arch.ptid; tpaca = &paca[cpu]; - tpaca->kvm_hstate.kvm_vcore = vc; - tpaca->kvm_hstate.ptid = vcpu->arch.ptid; - vcpu->cpu = vc->pcpu; - /* Order stores to hstate.kvm_vcore etc. before store to kvm_vcpu */ - smp_wmb(); tpaca->kvm_hstate.kvm_vcpu = vcpu; + tpaca->kvm_hstate.ptid = cpu - mvc->pcpu; + /* Order stores to hstate.kvm_vcpu etc. before store to kvm_vcore */ + smp_wmb(); + tpaca->kvm_hstate.kvm_vcore = mvc; if (cpu != smp_processor_id()) kvmppc_ipi_thread(cpu); } @@ -1812,12 +1843,12 @@ static void kvmppc_wait_for_nap(void) for (loops = 0; loops < 1000000; ++loops) { /* * Check if all threads are finished. - * We set the vcpu pointer when starting a thread + * We set the vcore pointer when starting a thread * and the thread clears it when finished, so we look - * for any threads that still have a non-NULL vcpu ptr. + * for any threads that still have a non-NULL vcore ptr. */ for (i = 1; i < threads_per_subcore; ++i) - if (paca[cpu + i].kvm_hstate.kvm_vcpu) + if (paca[cpu + i].kvm_hstate.kvm_vcore) break; if (i == threads_per_subcore) { HMT_medium(); @@ -1827,7 +1858,7 @@ static void kvmppc_wait_for_nap(void) } HMT_medium(); for (i = 1; i < threads_per_subcore; ++i) - if (paca[cpu + i].kvm_hstate.kvm_vcpu) + if (paca[cpu + i].kvm_hstate.kvm_vcore) pr_err("KVM: CPU %d seems to be stuck\n", cpu + i); } @@ -1890,6 +1921,278 @@ static void kvmppc_start_restoring_l2_cache(const struct kvmppc_vcore *vc) mtspr(SPRN_MPPR, mpp_addr | PPC_MPPR_FETCH_WHOLE_TABLE); } +/* + * A list of virtual cores for each physical CPU. + * These are vcores that could run but their runner VCPU tasks are + * (or may be) preempted. + */ +struct preempted_vcore_list { + struct list_head list; + spinlock_t lock; +}; + +static DEFINE_PER_CPU(struct preempted_vcore_list, preempted_vcores); + +static void init_vcore_lists(void) +{ + int cpu; + + for_each_possible_cpu(cpu) { + struct preempted_vcore_list *lp = &per_cpu(preempted_vcores, cpu); + spin_lock_init(&lp->lock); + INIT_LIST_HEAD(&lp->list); + } +} + +static void kvmppc_vcore_preempt(struct kvmppc_vcore *vc) +{ + struct preempted_vcore_list *lp = this_cpu_ptr(&preempted_vcores); + + vc->vcore_state = VCORE_PREEMPT; + vc->pcpu = smp_processor_id(); + if (vc->num_threads < threads_per_subcore) { + spin_lock(&lp->lock); + list_add_tail(&vc->preempt_list, &lp->list); + spin_unlock(&lp->lock); + } + + /* Start accumulating stolen time */ + kvmppc_core_start_stolen(vc); +} + +static void kvmppc_vcore_end_preempt(struct kvmppc_vcore *vc) +{ + struct preempted_vcore_list *lp; + + kvmppc_core_end_stolen(vc); + if (!list_empty(&vc->preempt_list)) { + lp = &per_cpu(preempted_vcores, vc->pcpu); + spin_lock(&lp->lock); + list_del_init(&vc->preempt_list); + spin_unlock(&lp->lock); + } + vc->vcore_state = VCORE_INACTIVE; +} + +/* + * This stores information about the virtual cores currently + * assigned to a physical core. + */ +struct core_info { + int n_subcores; + int max_subcore_threads; + int total_threads; + int subcore_threads[MAX_SUBCORES]; + struct kvm *subcore_vm[MAX_SUBCORES]; + struct list_head vcs[MAX_SUBCORES]; +}; + +/* + * This mapping means subcores 0 and 1 can use threads 0-3 and 4-7 + * respectively in 2-way micro-threading (split-core) mode. + */ +static int subcore_thread_map[MAX_SUBCORES] = { 0, 4, 2, 6 }; + +static void init_core_info(struct core_info *cip, struct kvmppc_vcore *vc) +{ + int sub; + + memset(cip, 0, sizeof(*cip)); + cip->n_subcores = 1; + cip->max_subcore_threads = vc->num_threads; + cip->total_threads = vc->num_threads; + cip->subcore_threads[0] = vc->num_threads; + cip->subcore_vm[0] = vc->kvm; + for (sub = 0; sub < MAX_SUBCORES; ++sub) + INIT_LIST_HEAD(&cip->vcs[sub]); + list_add_tail(&vc->preempt_list, &cip->vcs[0]); +} + +static bool subcore_config_ok(int n_subcores, int n_threads) +{ + /* Can only dynamically split if unsplit to begin with */ + if (n_subcores > 1 && threads_per_subcore < MAX_SMT_THREADS) + return false; + if (n_subcores > MAX_SUBCORES) + return false; + if (n_subcores > 1) { + if (!(dynamic_mt_modes & 2)) + n_subcores = 4; + if (n_subcores > 2 && !(dynamic_mt_modes & 4)) + return false; + } + + return n_subcores * roundup_pow_of_two(n_threads) <= MAX_SMT_THREADS; +} + +static void init_master_vcore(struct kvmppc_vcore *vc) +{ + vc->master_vcore = vc; + vc->entry_exit_map = 0; + vc->in_guest = 0; + vc->napping_threads = 0; + vc->conferring_threads = 0; +} + +/* + * See if the existing subcores can be split into 3 (or fewer) subcores + * of at most two threads each, so we can fit in another vcore. This + * assumes there are at most two subcores and at most 6 threads in total. + */ +static bool can_split_piggybacked_subcores(struct core_info *cip) +{ + int sub, new_sub; + int large_sub = -1; + int thr; + int n_subcores = cip->n_subcores; + struct kvmppc_vcore *vc, *vcnext; + struct kvmppc_vcore *master_vc = NULL; + + for (sub = 0; sub < cip->n_subcores; ++sub) { + if (cip->subcore_threads[sub] <= 2) + continue; + if (large_sub >= 0) + return false; + large_sub = sub; + vc = list_first_entry(&cip->vcs[sub], struct kvmppc_vcore, + preempt_list); + if (vc->num_threads > 2) + return false; + n_subcores += (cip->subcore_threads[sub] - 1) >> 1; + } + if (n_subcores > 3 || large_sub < 0) + return false; + + /* + * Seems feasible, so go through and move vcores to new subcores. + * Note that when we have two or more vcores in one subcore, + * all those vcores must have only one thread each. + */ + new_sub = cip->n_subcores; + thr = 0; + sub = large_sub; + list_for_each_entry_safe(vc, vcnext, &cip->vcs[sub], preempt_list) { + if (thr >= 2) { + list_del(&vc->preempt_list); + list_add_tail(&vc->preempt_list, &cip->vcs[new_sub]); + /* vc->num_threads must be 1 */ + if (++cip->subcore_threads[new_sub] == 1) { + cip->subcore_vm[new_sub] = vc->kvm; + init_master_vcore(vc); + master_vc = vc; + ++cip->n_subcores; + } else { + vc->master_vcore = master_vc; + ++new_sub; + } + } + thr += vc->num_threads; + } + cip->subcore_threads[large_sub] = 2; + cip->max_subcore_threads = 2; + + return true; +} + +static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip) +{ + int n_threads = vc->num_threads; + int sub; + + if (!cpu_has_feature(CPU_FTR_ARCH_207S)) + return false; + + if (n_threads < cip->max_subcore_threads) + n_threads = cip->max_subcore_threads; + if (subcore_config_ok(cip->n_subcores + 1, n_threads)) { + cip->max_subcore_threads = n_threads; + } else if (cip->n_subcores <= 2 && cip->total_threads <= 6 && + vc->num_threads <= 2) { + /* + * We may be able to fit another subcore in by + * splitting an existing subcore with 3 or 4 + * threads into two 2-thread subcores, or one + * with 5 or 6 threads into three subcores. + * We can only do this if those subcores have + * piggybacked virtual cores. + */ + if (!can_split_piggybacked_subcores(cip)) + return false; + } else { + return false; + } + + sub = cip->n_subcores; + ++cip->n_subcores; + cip->total_threads += vc->num_threads; + cip->subcore_threads[sub] = vc->num_threads; + cip->subcore_vm[sub] = vc->kvm; + init_master_vcore(vc); + list_del(&vc->preempt_list); + list_add_tail(&vc->preempt_list, &cip->vcs[sub]); + + return true; +} + +static bool can_piggyback_subcore(struct kvmppc_vcore *pvc, + struct core_info *cip, int sub) +{ + struct kvmppc_vcore *vc; + int n_thr; + + vc = list_first_entry(&cip->vcs[sub], struct kvmppc_vcore, + preempt_list); + + /* require same VM and same per-core reg values */ + if (pvc->kvm != vc->kvm || + pvc->tb_offset != vc->tb_offset || + pvc->pcr != vc->pcr || + pvc->lpcr != vc->lpcr) + return false; + + /* P8 guest with > 1 thread per core would see wrong TIR value */ + if (cpu_has_feature(CPU_FTR_ARCH_207S) && + (vc->num_threads > 1 || pvc->num_threads > 1)) + return false; + + n_thr = cip->subcore_threads[sub] + pvc->num_threads; + if (n_thr > cip->max_subcore_threads) { + if (!subcore_config_ok(cip->n_subcores, n_thr)) + return false; + cip->max_subcore_threads = n_thr; + } + + cip->total_threads += pvc->num_threads; + cip->subcore_threads[sub] = n_thr; + pvc->master_vcore = vc; + list_del(&pvc->preempt_list); + list_add_tail(&pvc->preempt_list, &cip->vcs[sub]); + + return true; +} + +/* + * Work out whether it is possible to piggyback the execution of + * vcore *pvc onto the execution of the other vcores described in *cip. + */ +static bool can_piggyback(struct kvmppc_vcore *pvc, struct core_info *cip, + int target_threads) +{ + int sub; + + if (cip->total_threads + pvc->num_threads > target_threads) + return false; + for (sub = 0; sub < cip->n_subcores; ++sub) + if (cip->subcore_threads[sub] && + can_piggyback_subcore(pvc, cip, sub)) + return true; + + if (can_dynamic_split(pvc, cip)) + return true; + + return false; +} + static void prepare_threads(struct kvmppc_vcore *vc) { struct kvm_vcpu *vcpu, *vnext; @@ -1909,12 +2212,45 @@ static void prepare_threads(struct kvmppc_vcore *vc) } } -static void post_guest_process(struct kvmppc_vcore *vc) +static void collect_piggybacks(struct core_info *cip, int target_threads) { + struct preempted_vcore_list *lp = this_cpu_ptr(&preempted_vcores); + struct kvmppc_vcore *pvc, *vcnext; + + spin_lock(&lp->lock); + list_for_each_entry_safe(pvc, vcnext, &lp->list, preempt_list) { + if (!spin_trylock(&pvc->lock)) + continue; + prepare_threads(pvc); + if (!pvc->n_runnable) { + list_del_init(&pvc->preempt_list); + if (pvc->runner == NULL) { + pvc->vcore_state = VCORE_INACTIVE; + kvmppc_core_end_stolen(pvc); + } + spin_unlock(&pvc->lock); + continue; + } + if (!can_piggyback(pvc, cip, target_threads)) { + spin_unlock(&pvc->lock); + continue; + } + kvmppc_core_end_stolen(pvc); + pvc->vcore_state = VCORE_PIGGYBACK; + if (cip->total_threads >= target_threads) + break; + } + spin_unlock(&lp->lock); +} + +static void post_guest_process(struct kvmppc_vcore *vc, bool is_master) +{ + int still_running = 0; u64 now; long ret; struct kvm_vcpu *vcpu, *vnext; + spin_lock(&vc->lock); now = get_tb(); list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads, arch.run_list) { @@ -1933,17 +2269,36 @@ static void post_guest_process(struct kvmppc_vcore *vc) vcpu->arch.ret = ret; vcpu->arch.trap = 0; - if (vcpu->arch.ceded) { - if (!is_kvmppc_resume_guest(ret)) - kvmppc_end_cede(vcpu); - else + if (is_kvmppc_resume_guest(vcpu->arch.ret)) { + if (vcpu->arch.pending_exceptions) + kvmppc_core_prepare_to_enter(vcpu); + if (vcpu->arch.ceded) kvmppc_set_timer(vcpu); - } - if (!is_kvmppc_resume_guest(vcpu->arch.ret)) { + else + ++still_running; + } else { kvmppc_remove_runnable(vc, vcpu); wake_up(&vcpu->arch.cpu_run); } } + list_del_init(&vc->preempt_list); + if (!is_master) { + if (still_running > 0) { + kvmppc_vcore_preempt(vc); + } else if (vc->runner) { + vc->vcore_state = VCORE_PREEMPT; + kvmppc_core_start_stolen(vc); + } else { + vc->vcore_state = VCORE_INACTIVE; + } + if (vc->n_runnable > 0 && vc->runner == NULL) { + /* make sure there's a candidate runner awake */ + vcpu = list_first_entry(&vc->runnable_threads, + struct kvm_vcpu, arch.run_list); + wake_up(&vcpu->arch.cpu_run); + } + } + spin_unlock(&vc->lock); } /* @@ -1955,6 +2310,15 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) struct kvm_vcpu *vcpu, *vnext; int i; int srcu_idx; + struct core_info core_info; + struct kvmppc_vcore *pvc, *vcnext; + struct kvm_split_mode split_info, *sip; + int split, subcore_size, active; + int sub; + bool thr0_done; + unsigned long cmd_bit, stat_bit; + int pcpu, thr; + int target_threads; /* * Remove from the list any threads that have a signal pending @@ -1969,11 +2333,8 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) /* * Initialize *vc. */ - vc->entry_exit_map = 0; + init_master_vcore(vc); vc->preempt_tb = TB_NIL; - vc->in_guest = 0; - vc->napping_threads = 0; - vc->conferring_threads = 0; /* * Make sure we are running on primary threads, and that secondary @@ -1991,24 +2352,112 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) goto out; } - - vc->pcpu = smp_processor_id(); - list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) { - kvmppc_start_thread(vcpu); - kvmppc_create_dtl_entry(vcpu, vc); - trace_kvm_guest_enter(vcpu); + /* + * See if we could run any other vcores on the physical core + * along with this one. + */ + init_core_info(&core_info, vc); + pcpu = smp_processor_id(); + target_threads = threads_per_subcore; + if (target_smt_mode && target_smt_mode < target_threads) + target_threads = target_smt_mode; + if (vc->num_threads < target_threads) + collect_piggybacks(&core_info, target_threads); + + /* Decide on micro-threading (split-core) mode */ + subcore_size = threads_per_subcore; + cmd_bit = stat_bit = 0; + split = core_info.n_subcores; + sip = NULL; + if (split > 1) { + /* threads_per_subcore must be MAX_SMT_THREADS (8) here */ + if (split == 2 && (dynamic_mt_modes & 2)) { + cmd_bit = HID0_POWER8_1TO2LPAR; + stat_bit = HID0_POWER8_2LPARMODE; + } else { + split = 4; + cmd_bit = HID0_POWER8_1TO4LPAR; + stat_bit = HID0_POWER8_4LPARMODE; + } + subcore_size = MAX_SMT_THREADS / split; + sip = &split_info; + memset(&split_info, 0, sizeof(split_info)); + split_info.rpr = mfspr(SPRN_RPR); + split_info.pmmar = mfspr(SPRN_PMMAR); + split_info.ldbar = mfspr(SPRN_LDBAR); + split_info.subcore_size = subcore_size; + for (sub = 0; sub < core_info.n_subcores; ++sub) + split_info.master_vcs[sub] = + list_first_entry(&core_info.vcs[sub], + struct kvmppc_vcore, preempt_list); + /* order writes to split_info before kvm_split_mode pointer */ + smp_wmb(); + } + pcpu = smp_processor_id(); + for (thr = 0; thr < threads_per_subcore; ++thr) + paca[pcpu + thr].kvm_hstate.kvm_split_mode = sip; + + /* Initiate micro-threading (split-core) if required */ + if (cmd_bit) { + unsigned long hid0 = mfspr(SPRN_HID0); + + hid0 |= cmd_bit | HID0_POWER8_DYNLPARDIS; + mb(); + mtspr(SPRN_HID0, hid0); + isync(); + for (;;) { + hid0 = mfspr(SPRN_HID0); + if (hid0 & stat_bit) + break; + cpu_relax(); + } + split_info.do_nap = 1; /* ask secondaries to nap when done */ + } + + /* Start all the threads */ + active = 0; + for (sub = 0; sub < core_info.n_subcores; ++sub) { + thr = subcore_thread_map[sub]; + thr0_done = false; + active |= 1 << thr; + list_for_each_entry(pvc, &core_info.vcs[sub], preempt_list) { + pvc->pcpu = pcpu + thr; + list_for_each_entry(vcpu, &pvc->runnable_threads, + arch.run_list) { + kvmppc_start_thread(vcpu, pvc); + kvmppc_create_dtl_entry(vcpu, pvc); + trace_kvm_guest_enter(vcpu); + if (!vcpu->arch.ptid) + thr0_done = true; + active |= 1 << (thr + vcpu->arch.ptid); + } + /* + * We need to start the first thread of each subcore + * even if it doesn't have a vcpu. + */ + if (pvc->master_vcore == pvc && !thr0_done) + kvmppc_start_thread(NULL, pvc); + thr += pvc->num_threads; + } } - - /* Set this explicitly in case thread 0 doesn't have a vcpu */ - get_paca()->kvm_hstate.kvm_vcore = vc; - get_paca()->kvm_hstate.ptid = 0; + /* + * When doing micro-threading, poke the inactive threads as well. + * This gets them to the nap instruction after kvm_do_nap, + * which reduces the time taken to unsplit later. + */ + if (split > 1) + for (thr = 1; thr < threads_per_subcore; ++thr) + if (!(active & (1 << thr))) + kvmppc_ipi_thread(pcpu + thr); vc->vcore_state = VCORE_RUNNING; preempt_disable(); trace_kvmppc_run_core(vc, 0); - spin_unlock(&vc->lock); + for (sub = 0; sub < core_info.n_subcores; ++sub) + list_for_each_entry(pvc, &core_info.vcs[sub], preempt_list) + spin_unlock(&pvc->lock); kvm_guest_enter(); @@ -2019,32 +2468,58 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) __kvmppc_vcore_entry(); - spin_lock(&vc->lock); - if (vc->mpp_buffer) kvmppc_start_saving_l2_cache(vc); - /* disable sending of IPIs on virtual external irqs */ - list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) - vcpu->cpu = -1; - /* wait for secondary threads to finish writing their state to memory */ - kvmppc_wait_for_nap(); - for (i = 0; i < threads_per_subcore; ++i) - kvmppc_release_hwthread(vc->pcpu + i); + srcu_read_unlock(&vc->kvm->srcu, srcu_idx); + + spin_lock(&vc->lock); /* prevent other vcpu threads from doing kvmppc_start_thread() now */ vc->vcore_state = VCORE_EXITING; - spin_unlock(&vc->lock); - srcu_read_unlock(&vc->kvm->srcu, srcu_idx); + /* wait for secondary threads to finish writing their state to memory */ + kvmppc_wait_for_nap(); + + /* Return to whole-core mode if we split the core earlier */ + if (split > 1) { + unsigned long hid0 = mfspr(SPRN_HID0); + unsigned long loops = 0; + + hid0 &= ~HID0_POWER8_DYNLPARDIS; + stat_bit = HID0_POWER8_2LPARMODE | HID0_POWER8_4LPARMODE; + mb(); + mtspr(SPRN_HID0, hid0); + isync(); + for (;;) { + hid0 = mfspr(SPRN_HID0); + if (!(hid0 & stat_bit)) + break; + cpu_relax(); + ++loops; + } + split_info.do_nap = 0; + } + + /* Let secondaries go back to the offline loop */ + for (i = 0; i < threads_per_subcore; ++i) { + kvmppc_release_hwthread(pcpu + i); + if (sip && sip->napped[i]) + kvmppc_ipi_thread(pcpu + i); + } + + spin_unlock(&vc->lock); /* make sure updates to secondary vcpu structs are visible now */ smp_mb(); kvm_guest_exit(); - preempt_enable(); + for (sub = 0; sub < core_info.n_subcores; ++sub) + list_for_each_entry_safe(pvc, vcnext, &core_info.vcs[sub], + preempt_list) + post_guest_process(pvc, pvc == vc); spin_lock(&vc->lock); - post_guest_process(vc); + preempt_enable(); out: vc->vcore_state = VCORE_INACTIVE; @@ -2055,13 +2530,17 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) * Wait for some other vcpu thread to execute us, and * wake us up when we need to handle something in the host. */ -static void kvmppc_wait_for_exec(struct kvm_vcpu *vcpu, int wait_state) +static void kvmppc_wait_for_exec(struct kvmppc_vcore *vc, + struct kvm_vcpu *vcpu, int wait_state) { DEFINE_WAIT(wait); prepare_to_wait(&vcpu->arch.cpu_run, &wait, wait_state); - if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) + if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) { + spin_unlock(&vc->lock); schedule(); + spin_lock(&vc->lock); + } finish_wait(&vcpu->arch.cpu_run, &wait); } @@ -2137,9 +2616,21 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) * this thread straight away and have it join in. */ if (!signal_pending(current)) { - if (vc->vcore_state == VCORE_RUNNING && !VCORE_IS_EXITING(vc)) { + if (vc->vcore_state == VCORE_PIGGYBACK) { + struct kvmppc_vcore *mvc = vc->master_vcore; + if (spin_trylock(&mvc->lock)) { + if (mvc->vcore_state == VCORE_RUNNING && + !VCORE_IS_EXITING(mvc)) { + kvmppc_create_dtl_entry(vcpu, vc); + kvmppc_start_thread(vcpu, vc); + trace_kvm_guest_enter(vcpu); + } + spin_unlock(&mvc->lock); + } + } else if (vc->vcore_state == VCORE_RUNNING && + !VCORE_IS_EXITING(vc)) { kvmppc_create_dtl_entry(vcpu, vc); - kvmppc_start_thread(vcpu); + kvmppc_start_thread(vcpu, vc); trace_kvm_guest_enter(vcpu); } else if (vc->vcore_state == VCORE_SLEEPING) { wake_up(&vc->wq); @@ -2149,10 +2640,11 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE && !signal_pending(current)) { + if (vc->vcore_state == VCORE_PREEMPT && vc->runner == NULL) + kvmppc_vcore_end_preempt(vc); + if (vc->vcore_state != VCORE_INACTIVE) { - spin_unlock(&vc->lock); - kvmppc_wait_for_exec(vcpu, TASK_INTERRUPTIBLE); - spin_lock(&vc->lock); + kvmppc_wait_for_exec(vc, vcpu, TASK_INTERRUPTIBLE); continue; } list_for_each_entry_safe(v, vn, &vc->runnable_threads, @@ -2179,10 +2671,11 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) if (n_ceded == vc->n_runnable) { kvmppc_vcore_blocked(vc); } else if (should_resched()) { - vc->vcore_state = VCORE_PREEMPT; + kvmppc_vcore_preempt(vc); /* Let something else run */ cond_resched_lock(&vc->lock); - vc->vcore_state = VCORE_INACTIVE; + if (vc->vcore_state == VCORE_PREEMPT) + kvmppc_vcore_end_preempt(vc); } else { kvmppc_run_core(vc); } @@ -2191,11 +2684,8 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE && (vc->vcore_state == VCORE_RUNNING || - vc->vcore_state == VCORE_EXITING)) { - spin_unlock(&vc->lock); - kvmppc_wait_for_exec(vcpu, TASK_UNINTERRUPTIBLE); - spin_lock(&vc->lock); - } + vc->vcore_state == VCORE_EXITING)) + kvmppc_wait_for_exec(vc, vcpu, TASK_UNINTERRUPTIBLE); if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) { kvmppc_remove_runnable(vc, vcpu); @@ -2755,6 +3245,8 @@ static int kvmppc_book3s_init_hv(void) init_default_hcalls(); + init_vcore_lists(); + r = kvmppc_mmu_hv_init(); return r; } diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index ed2589d4593f..fd7006bf6b1a 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -110,14 +110,15 @@ void __init kvm_cma_reserve(void) long int kvmppc_rm_h_confer(struct kvm_vcpu *vcpu, int target, unsigned int yield_count) { - struct kvmppc_vcore *vc = vcpu->arch.vcore; + struct kvmppc_vcore *vc = local_paca->kvm_hstate.kvm_vcore; + int ptid = local_paca->kvm_hstate.ptid; int threads_running; int threads_ceded; int threads_conferring; u64 stop = get_tb() + 10 * tb_ticks_per_usec; int rv = H_SUCCESS; /* => don't yield */ - set_bit(vcpu->arch.ptid, &vc->conferring_threads); + set_bit(ptid, &vc->conferring_threads); while ((get_tb() < stop) && !VCORE_IS_EXITING(vc)) { threads_running = VCORE_ENTRY_MAP(vc); threads_ceded = vc->napping_threads; @@ -127,7 +128,7 @@ long int kvmppc_rm_h_confer(struct kvm_vcpu *vcpu, int target, break; } } - clear_bit(vcpu->arch.ptid, &vc->conferring_threads); + clear_bit(ptid, &vc->conferring_threads); return rv; } @@ -238,7 +239,8 @@ void kvmhv_commence_exit(int trap) { struct kvmppc_vcore *vc = local_paca->kvm_hstate.kvm_vcore; int ptid = local_paca->kvm_hstate.ptid; - int me, ee; + struct kvm_split_mode *sip = local_paca->kvm_hstate.kvm_split_mode; + int me, ee, i; /* Set our bit in the threads-exiting-guest map in the 0xff00 bits of vcore->entry_exit_map */ @@ -258,4 +260,26 @@ void kvmhv_commence_exit(int trap) */ if (trap != BOOK3S_INTERRUPT_HV_DECREMENTER) kvmhv_interrupt_vcore(vc, ee & ~(1 << ptid)); + + /* + * If we are doing dynamic micro-threading, interrupt the other + * subcores to pull them out of their guests too. + */ + if (!sip) + return; + + for (i = 0; i < MAX_SUBCORES; ++i) { + vc = sip->master_vcs[i]; + if (!vc) + break; + do { + ee = vc->entry_exit_map; + /* Already asked to exit? */ + if ((ee >> 8) != 0) + break; + } while (cmpxchg(&vc->entry_exit_map, ee, + ee | VCORE_EXIT_REQ) != ee); + if ((ee >> 8) == 0) + kvmhv_interrupt_vcore(vc, ee); + } } diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index b027a89737b6..c1df9bb1e413 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -12,6 +12,7 @@ #include <linux/kvm_host.h> #include <linux/hugetlb.h> #include <linux/module.h> +#include <linux/log2.h> #include <asm/tlbflush.h> #include <asm/kvm_ppc.h> @@ -97,25 +98,52 @@ void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev, } EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain); +/* Update the changed page order field of an rmap entry */ +void kvmppc_update_rmap_change(unsigned long *rmap, unsigned long psize) +{ + unsigned long order; + + if (!psize) + return; + order = ilog2(psize); + order <<= KVMPPC_RMAP_CHG_SHIFT; + if (order > (*rmap & KVMPPC_RMAP_CHG_ORDER)) + *rmap = (*rmap & ~KVMPPC_RMAP_CHG_ORDER) | order; +} +EXPORT_SYMBOL_GPL(kvmppc_update_rmap_change); + +/* Returns a pointer to the revmap entry for the page mapped by a HPTE */ +static unsigned long *revmap_for_hpte(struct kvm *kvm, unsigned long hpte_v, + unsigned long hpte_gr) +{ + struct kvm_memory_slot *memslot; + unsigned long *rmap; + unsigned long gfn; + + gfn = hpte_rpn(hpte_gr, hpte_page_size(hpte_v, hpte_gr)); + memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn); + if (!memslot) + return NULL; + + rmap = real_vmalloc_addr(&memslot->arch.rmap[gfn - memslot->base_gfn]); + return rmap; +} + /* Remove this HPTE from the chain for a real page */ static void remove_revmap_chain(struct kvm *kvm, long pte_index, struct revmap_entry *rev, unsigned long hpte_v, unsigned long hpte_r) { struct revmap_entry *next, *prev; - unsigned long gfn, ptel, head; - struct kvm_memory_slot *memslot; + unsigned long ptel, head; unsigned long *rmap; unsigned long rcbits; rcbits = hpte_r & (HPTE_R_R | HPTE_R_C); ptel = rev->guest_rpte |= rcbits; - gfn = hpte_rpn(ptel, hpte_page_size(hpte_v, ptel)); - memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn); - if (!memslot) + rmap = revmap_for_hpte(kvm, hpte_v, ptel); + if (!rmap) return; - - rmap = real_vmalloc_addr(&memslot->arch.rmap[gfn - memslot->base_gfn]); lock_rmap(rmap); head = *rmap & KVMPPC_RMAP_INDEX; @@ -131,6 +159,8 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index, *rmap = (*rmap & ~KVMPPC_RMAP_INDEX) | head; } *rmap |= rcbits << KVMPPC_RMAP_RC_SHIFT; + if (rcbits & HPTE_R_C) + kvmppc_update_rmap_change(rmap, hpte_page_size(hpte_v, hpte_r)); unlock_rmap(rmap); } @@ -421,14 +451,20 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags, rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]); v = pte & ~HPTE_V_HVLOCK; if (v & HPTE_V_VALID) { - u64 pte1; - - pte1 = be64_to_cpu(hpte[1]); hpte[0] &= ~cpu_to_be64(HPTE_V_VALID); - rb = compute_tlbie_rb(v, pte1, pte_index); + rb = compute_tlbie_rb(v, be64_to_cpu(hpte[1]), pte_index); do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true); - /* Read PTE low word after tlbie to get final R/C values */ - remove_revmap_chain(kvm, pte_index, rev, v, pte1); + /* + * The reference (R) and change (C) bits in a HPT + * entry can be set by hardware at any time up until + * the HPTE is invalidated and the TLB invalidation + * sequence has completed. This means that when + * removing a HPTE, we need to re-read the HPTE after + * the invalidation sequence has completed in order to + * obtain reliable values of R and C. + */ + remove_revmap_chain(kvm, pte_index, rev, v, + be64_to_cpu(hpte[1])); } r = rev->guest_rpte & ~HPTE_GR_RESERVED; note_hpte_modification(kvm, rev); @@ -655,6 +691,105 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags, return H_SUCCESS; } +long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags, + unsigned long pte_index) +{ + struct kvm *kvm = vcpu->kvm; + __be64 *hpte; + unsigned long v, r, gr; + struct revmap_entry *rev; + unsigned long *rmap; + long ret = H_NOT_FOUND; + + if (pte_index >= kvm->arch.hpt_npte) + return H_PARAMETER; + + rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]); + hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4)); + while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) + cpu_relax(); + v = be64_to_cpu(hpte[0]); + r = be64_to_cpu(hpte[1]); + if (!(v & (HPTE_V_VALID | HPTE_V_ABSENT))) + goto out; + + gr = rev->guest_rpte; + if (rev->guest_rpte & HPTE_R_R) { + rev->guest_rpte &= ~HPTE_R_R; + note_hpte_modification(kvm, rev); + } + if (v & HPTE_V_VALID) { + gr |= r & (HPTE_R_R | HPTE_R_C); + if (r & HPTE_R_R) { + kvmppc_clear_ref_hpte(kvm, hpte, pte_index); + rmap = revmap_for_hpte(kvm, v, gr); + if (rmap) { + lock_rmap(rmap); + *rmap |= KVMPPC_RMAP_REFERENCED; + unlock_rmap(rmap); + } + } + } + vcpu->arch.gpr[4] = gr; + ret = H_SUCCESS; + out: + unlock_hpte(hpte, v & ~HPTE_V_HVLOCK); + return ret; +} + +long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags, + unsigned long pte_index) +{ + struct kvm *kvm = vcpu->kvm; + __be64 *hpte; + unsigned long v, r, gr; + struct revmap_entry *rev; + unsigned long *rmap; + long ret = H_NOT_FOUND; + + if (pte_index >= kvm->arch.hpt_npte) + return H_PARAMETER; + + rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]); + hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4)); + while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) + cpu_relax(); + v = be64_to_cpu(hpte[0]); + r = be64_to_cpu(hpte[1]); + if (!(v & (HPTE_V_VALID | HPTE_V_ABSENT))) + goto out; + + gr = rev->guest_rpte; + if (gr & HPTE_R_C) { + rev->guest_rpte &= ~HPTE_R_C; + note_hpte_modification(kvm, rev); + } + if (v & HPTE_V_VALID) { + /* need to make it temporarily absent so C is stable */ + hpte[0] |= cpu_to_be64(HPTE_V_ABSENT); + kvmppc_invalidate_hpte(kvm, hpte, pte_index); + r = be64_to_cpu(hpte[1]); + gr |= r & (HPTE_R_R | HPTE_R_C); + if (r & HPTE_R_C) { + unsigned long psize = hpte_page_size(v, r); + hpte[1] = cpu_to_be64(r & ~HPTE_R_C); + eieio(); + rmap = revmap_for_hpte(kvm, v, gr); + if (rmap) { + lock_rmap(rmap); + *rmap |= KVMPPC_RMAP_CHANGED; + kvmppc_update_rmap_change(rmap, psize); + unlock_rmap(rmap); + } + } + } + vcpu->arch.gpr[4] = gr; + ret = H_SUCCESS; + out: + unlock_hpte(hpte, v & ~HPTE_V_HVLOCK); + return ret; +} + void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep, unsigned long pte_index) { diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c index 00e45b6d4f24..24f58076d49e 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_xics.c +++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c @@ -67,14 +67,12 @@ static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu, } /* Check if the core is loaded, if not, too hard */ - cpu = vcpu->cpu; + cpu = vcpu->arch.thread_cpu; if (cpu < 0 || cpu >= nr_cpu_ids) { this_icp->rm_action |= XICS_RM_KICK_VCPU; this_icp->rm_kick_target = vcpu; return; } - /* In SMT cpu will always point to thread 0, we adjust it */ - cpu += vcpu->arch.ptid; smp_mb(); kvmhv_rm_send_ipi(cpu); diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index faa86e9c0551..472680f47d09 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -128,6 +128,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) subf r4, r4, r3 mtspr SPRN_DEC, r4 + /* hwthread_req may have got set by cede or no vcpu, so clear it */ + li r0, 0 + stb r0, HSTATE_HWTHREAD_REQ(r13) + /* * For external and machine check interrupts, we need * to call the Linux handler to process the interrupt. @@ -215,7 +219,6 @@ kvm_novcpu_wakeup: ld r5, HSTATE_KVM_VCORE(r13) li r0, 0 stb r0, HSTATE_NAPPING(r13) - stb r0, HSTATE_HWTHREAD_REQ(r13) /* check the wake reason */ bl kvmppc_check_wake_reason @@ -315,10 +318,10 @@ kvm_start_guest: cmpdi r3, 0 bge kvm_no_guest - /* get vcpu pointer, NULL if we have no vcpu to run */ - ld r4,HSTATE_KVM_VCPU(r13) - cmpdi r4,0 - /* if we have no vcpu to run, go back to sleep */ + /* get vcore pointer, NULL if we have nothing to run */ + ld r5,HSTATE_KVM_VCORE(r13) + cmpdi r5,0 + /* if we have no vcore to run, go back to sleep */ beq kvm_no_guest kvm_secondary_got_guest: @@ -327,21 +330,42 @@ kvm_secondary_got_guest: ld r6, PACA_DSCR_DEFAULT(r13) std r6, HSTATE_DSCR(r13) - /* Order load of vcore, ptid etc. after load of vcpu */ + /* On thread 0 of a subcore, set HDEC to max */ + lbz r4, HSTATE_PTID(r13) + cmpwi r4, 0 + bne 63f + lis r6, 0x7fff + ori r6, r6, 0xffff + mtspr SPRN_HDEC, r6 + /* and set per-LPAR registers, if doing dynamic micro-threading */ + ld r6, HSTATE_SPLIT_MODE(r13) + cmpdi r6, 0 + beq 63f + ld r0, KVM_SPLIT_RPR(r6) + mtspr SPRN_RPR, r0 + ld r0, KVM_SPLIT_PMMAR(r6) + mtspr SPRN_PMMAR, r0 + ld r0, KVM_SPLIT_LDBAR(r6) + mtspr SPRN_LDBAR, r0 + isync +63: + /* Order load of vcpu after load of vcore */ lwsync + ld r4, HSTATE_KVM_VCPU(r13) bl kvmppc_hv_entry /* Back from the guest, go back to nap */ - /* Clear our vcpu pointer so we don't come back in early */ + /* Clear our vcpu and vcore pointers so we don't come back in early */ li r0, 0 + std r0, HSTATE_KVM_VCPU(r13) /* - * Once we clear HSTATE_KVM_VCPU(r13), the code in + * Once we clear HSTATE_KVM_VCORE(r13), the code in * kvmppc_run_core() is going to assume that all our vcpu * state is visible in memory. This lwsync makes sure * that that is true. */ lwsync - std r0, HSTATE_KVM_VCPU(r13) + std r0, HSTATE_KVM_VCORE(r13) /* * At this point we have finished executing in the guest. @@ -374,16 +398,63 @@ kvm_no_guest: b power7_wakeup_loss 53: HMT_LOW - ld r4, HSTATE_KVM_VCPU(r13) - cmpdi r4, 0 + ld r5, HSTATE_KVM_VCORE(r13) + cmpdi r5, 0 + bne 60f + ld r3, HSTATE_SPLIT_MODE(r13) + cmpdi r3, 0 + beq kvm_no_guest + lbz r0, KVM_SPLIT_DO_NAP(r3) + cmpwi r0, 0 beq kvm_no_guest HMT_MEDIUM + b kvm_unsplit_nap +60: HMT_MEDIUM b kvm_secondary_got_guest 54: li r0, KVM_HWTHREAD_IN_KVM stb r0, HSTATE_HWTHREAD_STATE(r13) b kvm_no_guest +/* + * Here the primary thread is trying to return the core to + * whole-core mode, so we need to nap. + */ +kvm_unsplit_nap: + /* clear any pending message */ +BEGIN_FTR_SECTION + lis r6, (PPC_DBELL_SERVER << (63-36))@h + PPC_MSGCLR(6) +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) + /* Set kvm_split_mode.napped[tid] = 1 */ + ld r3, HSTATE_SPLIT_MODE(r13) + li r0, 1 + lhz r4, PACAPACAINDEX(r13) + clrldi r4, r4, 61 /* micro-threading => P8 => 8 threads/core */ + addi r4, r4, KVM_SPLIT_NAPPED + stbx r0, r3, r4 + /* Check the do_nap flag again after setting napped[] */ + sync + lbz r0, KVM_SPLIT_DO_NAP(r3) + cmpwi r0, 0 + beq 57f + li r3, (LPCR_PECEDH | LPCR_PECE0) >> 4 + mfspr r4, SPRN_LPCR + rlwimi r4, r3, 4, (LPCR_PECEDP | LPCR_PECEDH | LPCR_PECE0 | LPCR_PECE1) + mtspr SPRN_LPCR, r4 + isync + std r0, HSTATE_SCRATCH0(r13) + ptesync + ld r0, HSTATE_SCRATCH0(r13) +1: cmpd r0, r0 + bne 1b + nap + b . + +57: li r0, 0 + stbx r0, r3, r4 + b kvm_no_guest + /****************************************************************************** * * * Entry code * @@ -854,7 +925,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) cmpwi r0, 0 bne 21f HMT_LOW -20: lbz r0, VCORE_IN_GUEST(r5) +20: lwz r3, VCORE_ENTRY_EXIT(r5) + cmpwi r3, 0x100 + bge no_switch_exit + lbz r0, VCORE_IN_GUEST(r5) cmpwi r0, 0 beq 20b HMT_MEDIUM @@ -870,7 +944,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) blt hdec_soon ld r6, VCPU_CTR(r4) - lwz r7, VCPU_XER(r4) + ld r7, VCPU_XER(r4) mtctr r6 mtxer r7 @@ -985,9 +1059,13 @@ secondary_too_late: #endif 11: b kvmhv_switch_to_host +no_switch_exit: + HMT_MEDIUM + li r12, 0 + b 12f hdec_soon: li r12, BOOK3S_INTERRUPT_HV_DECREMENTER - stw r12, VCPU_TRAP(r4) +12: stw r12, VCPU_TRAP(r4) mr r9, r4 #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING addi r3, r4, VCPU_TB_RMEXIT @@ -1103,7 +1181,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) mfctr r3 mfxer r4 std r3, VCPU_CTR(r9) - stw r4, VCPU_XER(r9) + std r4, VCPU_XER(r9) /* If this is a page table miss then see if it's theirs or ours */ cmpwi r12, BOOK3S_INTERRUPT_H_DATA_STORAGE @@ -1176,6 +1254,11 @@ mc_cont: ld r9, HSTATE_KVM_VCPU(r13) lwz r12, VCPU_TRAP(r9) + /* Stop others sending VCPU interrupts to this physical CPU */ + li r0, -1 + stw r0, VCPU_CPU(r9) + stw r0, VCPU_THREAD_CPU(r9) + /* Save guest CTRL register, set runlatch to 1 */ mfspr r6,SPRN_CTRLF stw r6,VCPU_CTRL(r9) @@ -1540,12 +1623,17 @@ kvmhv_switch_to_host: /* Primary thread waits for all the secondaries to exit guest */ 15: lwz r3,VCORE_ENTRY_EXIT(r5) - srwi r0,r3,8 + rlwinm r0,r3,32-8,0xff clrldi r3,r3,56 cmpw r3,r0 bne 15b isync + /* Did we actually switch to the guest at all? */ + lbz r6, VCORE_IN_GUEST(r5) + cmpwi r6, 0 + beq 19f + /* Primary thread switches back to host partition */ ld r6,KVM_HOST_SDR1(r4) lwz r7,KVM_HOST_LPID(r4) @@ -1589,7 +1677,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) 18: /* Signal secondary CPUs to continue */ stb r0,VCORE_IN_GUEST(r5) - lis r8,0x7fff /* MAX_INT@h */ +19: lis r8,0x7fff /* MAX_INT@h */ mtspr SPRN_HDEC,r8 16: ld r8,KVM_HOST_LPCR(r4) @@ -1675,7 +1763,7 @@ kvmppc_hdsi: bl kvmppc_msr_interrupt fast_interrupt_c_return: 6: ld r7, VCPU_CTR(r9) - lwz r8, VCPU_XER(r9) + ld r8, VCPU_XER(r9) mtctr r7 mtxer r8 mr r4, r9 @@ -1816,8 +1904,8 @@ hcall_real_table: .long DOTSYM(kvmppc_h_remove) - hcall_real_table .long DOTSYM(kvmppc_h_enter) - hcall_real_table .long DOTSYM(kvmppc_h_read) - hcall_real_table - .long 0 /* 0x10 - H_CLEAR_MOD */ - .long 0 /* 0x14 - H_CLEAR_REF */ + .long DOTSYM(kvmppc_h_clear_mod) - hcall_real_table + .long DOTSYM(kvmppc_h_clear_ref) - hcall_real_table .long DOTSYM(kvmppc_h_protect) - hcall_real_table .long DOTSYM(kvmppc_h_get_tce) - hcall_real_table .long DOTSYM(kvmppc_h_put_tce) - hcall_real_table diff --git a/arch/powerpc/kvm/book3s_paired_singles.c b/arch/powerpc/kvm/book3s_paired_singles.c index bd6ab1672ae6..a759d9adb0b6 100644 --- a/arch/powerpc/kvm/book3s_paired_singles.c +++ b/arch/powerpc/kvm/book3s_paired_singles.c @@ -352,7 +352,7 @@ static inline u32 inst_get_field(u32 inst, int msb, int lsb) return kvmppc_get_field(inst, msb + 32, lsb + 32); } -bool kvmppc_inst_is_paired_single(struct kvm_vcpu *vcpu, u32 inst) +static bool kvmppc_inst_is_paired_single(struct kvm_vcpu *vcpu, u32 inst) { if (!(vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE)) return false; diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S index acee37cde840..ca8f174289bb 100644 --- a/arch/powerpc/kvm/book3s_segment.S +++ b/arch/powerpc/kvm/book3s_segment.S @@ -123,7 +123,7 @@ no_dcbz32_on: PPC_LL r8, SVCPU_CTR(r3) PPC_LL r9, SVCPU_LR(r3) lwz r10, SVCPU_CR(r3) - lwz r11, SVCPU_XER(r3) + PPC_LL r11, SVCPU_XER(r3) mtctr r8 mtlr r9 @@ -237,7 +237,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) mfctr r8 mflr r9 - stw r5, SVCPU_XER(r13) + PPC_STL r5, SVCPU_XER(r13) PPC_STL r6, SVCPU_FAULT_DAR(r13) stw r7, SVCPU_FAULT_DSISR(r13) PPC_STL r8, SVCPU_CTR(r13) diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index cc5842657161..ae458f0fd061 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -933,6 +933,7 @@ static void kvmppc_restart_interrupt(struct kvm_vcpu *vcpu, #endif break; case BOOKE_INTERRUPT_CRITICAL: + kvmppc_fill_pt_regs(®s); unknown_exception(®s); break; case BOOKE_INTERRUPT_DEBUG: diff --git a/arch/powerpc/kvm/e500_mmu.c b/arch/powerpc/kvm/e500_mmu.c index 50860e919cb8..29911a07bcdb 100644 --- a/arch/powerpc/kvm/e500_mmu.c +++ b/arch/powerpc/kvm/e500_mmu.c @@ -377,7 +377,7 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, gva_t ea) | MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]); vcpu->arch.shared->mas1 = (vcpu->arch.shared->mas6 & MAS6_SPID0) - | (vcpu->arch.shared->mas6 & (MAS6_SAS ? MAS1_TS : 0)) + | ((vcpu->arch.shared->mas6 & MAS6_SAS) ? MAS1_TS : 0) | (vcpu->arch.shared->mas4 & MAS4_TSIZED(~0)); vcpu->arch.shared->mas2 &= MAS2_EPN; vcpu->arch.shared->mas2 |= vcpu->arch.shared->mas4 & diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index e5dde32fe71f..2e51289610e4 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -660,7 +660,7 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) return kvmppc_core_pending_dec(vcpu); } -enum hrtimer_restart kvmppc_decrementer_wakeup(struct hrtimer *timer) +static enum hrtimer_restart kvmppc_decrementer_wakeup(struct hrtimer *timer) { struct kvm_vcpu *vcpu; diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild index dc5385ebb071..5ad26dd94d77 100644 --- a/arch/s390/include/asm/Kbuild +++ b/arch/s390/include/asm/Kbuild @@ -3,5 +3,6 @@ generic-y += clkdev.h generic-y += irq_work.h generic-y += mcs_spinlock.h +generic-y += mm-arch-hooks.h generic-y += preempt.h generic-y += trace_clock.h diff --git a/arch/s390/include/asm/ctl_reg.h b/arch/s390/include/asm/ctl_reg.h index cfad7fca01d6..d7697ab802f6 100644 --- a/arch/s390/include/asm/ctl_reg.h +++ b/arch/s390/include/asm/ctl_reg.h @@ -57,7 +57,10 @@ union ctlreg0 { unsigned long lap : 1; /* Low-address-protection control */ unsigned long : 4; unsigned long edat : 1; /* Enhanced-DAT-enablement control */ - unsigned long : 23; + unsigned long : 4; + unsigned long afp : 1; /* AFP-register control */ + unsigned long vx : 1; /* Vector enablement control */ + unsigned long : 17; }; }; diff --git a/arch/s390/include/asm/etr.h b/arch/s390/include/asm/etr.h index 629b79a93165..f7e5c36688c3 100644 --- a/arch/s390/include/asm/etr.h +++ b/arch/s390/include/asm/etr.h @@ -214,6 +214,9 @@ static inline int etr_ptff(void *ptff_block, unsigned int func) void etr_switch_to_local(void); void etr_sync_check(void); +/* notifier for syncs */ +extern struct atomic_notifier_head s390_epoch_delta_notifier; + /* STP interruption parameter */ struct stp_irq_parm { unsigned int _pad0 : 14; diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h index 0130d0379edd..d9be7c0c1291 100644 --- a/arch/s390/include/asm/hugetlb.h +++ b/arch/s390/include/asm/hugetlb.h @@ -14,6 +14,7 @@ #define is_hugepage_only_range(mm, addr, len) 0 #define hugetlb_free_pgd_range free_pgd_range +#define hugepages_supported() (MACHINE_HAS_HPAGE) void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte); diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 3024acbe1f9d..df4db81254d3 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -258,6 +258,9 @@ struct kvm_vcpu_stat { u32 diagnose_10; u32 diagnose_44; u32 diagnose_9c; + u32 diagnose_258; + u32 diagnose_308; + u32 diagnose_500; }; #define PGM_OPERATION 0x01 @@ -630,7 +633,6 @@ extern char sie_exit; static inline void kvm_arch_hardware_disable(void) {} static inline void kvm_arch_check_processor_compat(void *rtn) {} -static inline void kvm_arch_exit(void) {} static inline void kvm_arch_sync_events(struct kvm *kvm) {} static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} diff --git a/arch/s390/include/asm/mm-arch-hooks.h b/arch/s390/include/asm/mm-arch-hooks.h deleted file mode 100644 index 07680b2f3c59..000000000000 --- a/arch/s390/include/asm/mm-arch-hooks.h +++ /dev/null @@ -1,15 +0,0 @@ -/* - * Architecture specific mm hooks - * - * Copyright (C) 2015, IBM Corporation - * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#ifndef _ASM_S390_MM_ARCH_HOOKS_H -#define _ASM_S390_MM_ARCH_HOOKS_H - -#endif /* _ASM_S390_MM_ARCH_HOOKS_H */ diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index dd345238d9a7..53eacbd4f09b 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -17,10 +17,7 @@ #define PAGE_DEFAULT_ACC 0 #define PAGE_DEFAULT_KEY (PAGE_DEFAULT_ACC << 4) -#include <asm/setup.h> -#ifndef __ASSEMBLY__ - -extern int HPAGE_SHIFT; +#define HPAGE_SHIFT 20 #define HPAGE_SIZE (1UL << HPAGE_SHIFT) #define HPAGE_MASK (~(HPAGE_SIZE - 1)) #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) @@ -30,6 +27,9 @@ extern int HPAGE_SHIFT; #define ARCH_HAS_PREPARE_HUGEPAGE #define ARCH_HAS_HUGEPAGE_CLEAR_FLUSH +#include <asm/setup.h> +#ifndef __ASSEMBLY__ + static inline void storage_key_init_range(unsigned long start, unsigned long end) { #if PAGE_DEFAULT_KEY diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h index 4cb19fe76dd9..f897ec73dc8c 100644 --- a/arch/s390/include/asm/perf_event.h +++ b/arch/s390/include/asm/perf_event.h @@ -87,7 +87,15 @@ struct sf_raw_sample { } __packed; /* Perf hardware reserve and release functions */ +#ifdef CONFIG_PERF_EVENTS int perf_reserve_sampling(void); void perf_release_sampling(void); +#else /* CONFIG_PERF_EVENTS */ +static inline int perf_reserve_sampling(void) +{ + return 0; +} +static inline void perf_release_sampling(void) {} +#endif /* CONFIG_PERF_EVENTS */ #endif /* _ASM_S390_PERF_EVENT_H */ diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index c7d1b9d09011..a2da259d9327 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -23,15 +23,15 @@ int main(void) { - DEFINE(__THREAD_info, offsetof(struct task_struct, stack)); - DEFINE(__THREAD_ksp, offsetof(struct task_struct, thread.ksp)); - DEFINE(__THREAD_mm_segment, offsetof(struct task_struct, thread.mm_segment)); - BLANK(); + DEFINE(__TASK_thread_info, offsetof(struct task_struct, stack)); + DEFINE(__TASK_thread, offsetof(struct task_struct, thread)); DEFINE(__TASK_pid, offsetof(struct task_struct, pid)); BLANK(); - DEFINE(__THREAD_per_cause, offsetof(struct task_struct, thread.per_event.cause)); - DEFINE(__THREAD_per_address, offsetof(struct task_struct, thread.per_event.address)); - DEFINE(__THREAD_per_paid, offsetof(struct task_struct, thread.per_event.paid)); + DEFINE(__THREAD_ksp, offsetof(struct thread_struct, ksp)); + DEFINE(__THREAD_per_cause, offsetof(struct thread_struct, per_event.cause)); + DEFINE(__THREAD_per_address, offsetof(struct thread_struct, per_event.address)); + DEFINE(__THREAD_per_paid, offsetof(struct thread_struct, per_event.paid)); + DEFINE(__THREAD_trap_tdb, offsetof(struct thread_struct, trap_tdb)); BLANK(); DEFINE(__TI_task, offsetof(struct thread_info, task)); DEFINE(__TI_flags, offsetof(struct thread_info, flags)); @@ -176,7 +176,6 @@ int main(void) DEFINE(__LC_VDSO_PER_CPU, offsetof(struct _lowcore, vdso_per_cpu_data)); DEFINE(__LC_GMAP, offsetof(struct _lowcore, gmap)); DEFINE(__LC_PGM_TDB, offsetof(struct _lowcore, pgm_tdb)); - DEFINE(__THREAD_trap_tdb, offsetof(struct task_struct, thread.trap_tdb)); DEFINE(__GMAP_ASCE, offsetof(struct gmap, asce)); DEFINE(__SIE_PROG0C, offsetof(struct kvm_s390_sie_block, prog0c)); DEFINE(__SIE_PROG20, offsetof(struct kvm_s390_sie_block, prog20)); diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 3238893c9d4f..84062e7a77da 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -178,17 +178,21 @@ _PIF_WORK = (_PIF_PER_TRAP) */ ENTRY(__switch_to) stmg %r6,%r15,__SF_GPRS(%r15) # store gprs of prev task - stg %r15,__THREAD_ksp(%r2) # store kernel stack of prev - lg %r4,__THREAD_info(%r2) # get thread_info of prev - lg %r5,__THREAD_info(%r3) # get thread_info of next + lgr %r1,%r2 + aghi %r1,__TASK_thread # thread_struct of prev task + lg %r4,__TASK_thread_info(%r2) # get thread_info of prev + lg %r5,__TASK_thread_info(%r3) # get thread_info of next + stg %r15,__THREAD_ksp(%r1) # store kernel stack of prev + lgr %r1,%r3 + aghi %r1,__TASK_thread # thread_struct of next task lgr %r15,%r5 aghi %r15,STACK_INIT # end of kernel stack of next stg %r3,__LC_CURRENT # store task struct of next stg %r5,__LC_THREAD_INFO # store thread info of next stg %r15,__LC_KERNEL_STACK # store end of kernel stack + lg %r15,__THREAD_ksp(%r1) # load kernel stack of next lctl %c4,%c4,__TASK_pid(%r3) # load pid to control reg. 4 mvc __LC_CURRENT_PID+4(4,%r0),__TASK_pid(%r3) # store pid of next - lg %r15,__THREAD_ksp(%r3) # load kernel stack of next lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task br %r14 @@ -417,6 +421,7 @@ ENTRY(pgm_check_handler) LAST_BREAK %r14 lg %r15,__LC_KERNEL_STACK lg %r14,__TI_task(%r12) + aghi %r14,__TASK_thread # pointer to thread_struct lghi %r13,__LC_PGM_TDB tm __LC_PGM_ILC+2,0x02 # check for transaction abort jz 2f diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index 505c17c0ae1a..56b550893593 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -21,6 +21,7 @@ #include <asm/nmi.h> #include <asm/crw.h> #include <asm/switch_to.h> +#include <asm/ctl_reg.h> struct mcck_struct { int kill_task; @@ -129,26 +130,30 @@ static int notrace s390_revalidate_registers(struct mci *mci) } else asm volatile("lfpc 0(%0)" : : "a" (fpt_creg_save_area)); - asm volatile( - " ld 0,0(%0)\n" - " ld 1,8(%0)\n" - " ld 2,16(%0)\n" - " ld 3,24(%0)\n" - " ld 4,32(%0)\n" - " ld 5,40(%0)\n" - " ld 6,48(%0)\n" - " ld 7,56(%0)\n" - " ld 8,64(%0)\n" - " ld 9,72(%0)\n" - " ld 10,80(%0)\n" - " ld 11,88(%0)\n" - " ld 12,96(%0)\n" - " ld 13,104(%0)\n" - " ld 14,112(%0)\n" - " ld 15,120(%0)\n" - : : "a" (fpt_save_area)); - /* Revalidate vector registers */ - if (MACHINE_HAS_VX && current->thread.vxrs) { + if (!MACHINE_HAS_VX) { + /* Revalidate floating point registers */ + asm volatile( + " ld 0,0(%0)\n" + " ld 1,8(%0)\n" + " ld 2,16(%0)\n" + " ld 3,24(%0)\n" + " ld 4,32(%0)\n" + " ld 5,40(%0)\n" + " ld 6,48(%0)\n" + " ld 7,56(%0)\n" + " ld 8,64(%0)\n" + " ld 9,72(%0)\n" + " ld 10,80(%0)\n" + " ld 11,88(%0)\n" + " ld 12,96(%0)\n" + " ld 13,104(%0)\n" + " ld 14,112(%0)\n" + " ld 15,120(%0)\n" + : : "a" (fpt_save_area)); + } else { + /* Revalidate vector registers */ + union ctlreg0 cr0; + if (!mci->vr) { /* * Vector registers can't be restored and therefore @@ -156,8 +161,12 @@ static int notrace s390_revalidate_registers(struct mci *mci) */ kill_task = 1; } + cr0.val = S390_lowcore.cregs_save_area[0]; + cr0.afp = cr0.vx = 1; + __ctl_load(cr0.val, 0, 0); restore_vx_regs((__vector128 *) - S390_lowcore.vector_save_area_addr); + &S390_lowcore.vector_save_area); + __ctl_load(S390_lowcore.cregs_save_area[0], 0, 0); } /* Revalidate access registers */ asm volatile( diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index dc5edc29b73a..8f587d871b9f 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -163,7 +163,7 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp, asmlinkage void execve_tail(void) { current->thread.fp_regs.fpc = 0; - asm volatile("sfpc %0,%0" : : "d" (0)); + asm volatile("sfpc %0" : : "d" (0)); } /* diff --git a/arch/s390/kernel/sclp.S b/arch/s390/kernel/sclp.S index 43c3169ea49c..ada0c07fe1a8 100644 --- a/arch/s390/kernel/sclp.S +++ b/arch/s390/kernel/sclp.S @@ -270,6 +270,8 @@ ENTRY(_sclp_print_early) jno .Lesa2 ahi %r15,-80 stmh %r6,%r15,96(%r15) # store upper register halves + basr %r13,0 + lmh %r0,%r15,.Lzeroes-.(%r13) # clear upper register halves .Lesa2: lr %r10,%r2 # save string pointer lhi %r2,0 @@ -291,6 +293,8 @@ ENTRY(_sclp_print_early) .Lesa3: lm %r6,%r15,120(%r15) # restore registers br %r14 +.Lzeroes: + .fill 64,4,0 .LwritedataS4: .long 0x00760005 # SCLP command for write data diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index f7f027caaaaa..ca070d260af2 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -885,8 +885,6 @@ void __init setup_arch(char **cmdline_p) */ setup_hwcaps(); - HPAGE_SHIFT = MACHINE_HAS_HPAGE ? 20 : 0; - /* * Create kernel page tables and switch to virtual addressing. */ diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 9e733d965e08..627887b075a7 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -58,6 +58,9 @@ EXPORT_SYMBOL_GPL(sched_clock_base_cc); static DEFINE_PER_CPU(struct clock_event_device, comparators); +ATOMIC_NOTIFIER_HEAD(s390_epoch_delta_notifier); +EXPORT_SYMBOL(s390_epoch_delta_notifier); + /* * Scheduler clock - returns current time in nanosec units. */ @@ -752,7 +755,7 @@ static void clock_sync_cpu(struct clock_sync_data *sync) static int etr_sync_clock(void *data) { static int first; - unsigned long long clock, old_clock, delay, delta; + unsigned long long clock, old_clock, clock_delta, delay, delta; struct clock_sync_data *etr_sync; struct etr_aib *sync_port, *aib; int port; @@ -789,6 +792,9 @@ static int etr_sync_clock(void *data) delay = (unsigned long long) (aib->edf2.etv - sync_port->edf2.etv) << 32; delta = adjust_time(old_clock, clock, delay); + clock_delta = clock - old_clock; + atomic_notifier_call_chain(&s390_epoch_delta_notifier, 0, + &clock_delta); etr_sync->fixup_cc = delta; fixup_clock_comparator(delta); /* Verify that the clock is properly set. */ @@ -1526,7 +1532,7 @@ void stp_island_check(void) static int stp_sync_clock(void *data) { static int first; - unsigned long long old_clock, delta; + unsigned long long old_clock, delta, new_clock, clock_delta; struct clock_sync_data *stp_sync; int rc; @@ -1551,7 +1557,11 @@ static int stp_sync_clock(void *data) old_clock = get_tod_clock(); rc = chsc_sstpc(stp_page, STP_OP_SYNC, 0); if (rc == 0) { - delta = adjust_time(old_clock, get_tod_clock(), 0); + new_clock = get_tod_clock(); + delta = adjust_time(old_clock, new_clock, 0); + clock_delta = new_clock - old_clock; + atomic_notifier_call_chain(&s390_epoch_delta_notifier, + 0, &clock_delta); fixup_clock_comparator(delta); rc = chsc_sstpi(stp_page, &stp_info, sizeof(struct stp_sstpi)); diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index 4d96c9f53455..7bea81d8a363 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -259,7 +259,7 @@ void vector_exception(struct pt_regs *regs) } /* get vector interrupt code from fpc */ - asm volatile("stfpc %0" : "=m" (current->thread.fp_regs.fpc)); + asm volatile("stfpc %0" : "=Q" (current->thread.fp_regs.fpc)); vic = (current->thread.fp_regs.fpc & 0xf00) >> 8; switch (vic) { case 1: /* invalid vector operation */ @@ -297,7 +297,7 @@ void data_exception(struct pt_regs *regs) location = get_trap_ip(regs); - asm volatile("stfpc %0" : "=m" (current->thread.fp_regs.fpc)); + asm volatile("stfpc %0" : "=Q" (current->thread.fp_regs.fpc)); /* Check for vector register enablement */ if (MACHINE_HAS_VX && !current->thread.vxrs && (current->thread.fp_regs.fpc & FPC_DXC_MASK) == 0xfe00) { diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index fc7ec95848c3..5fbfb88f8477 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -27,13 +27,13 @@ static int diag_release_pages(struct kvm_vcpu *vcpu) start = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4]; end = vcpu->run->s.regs.gprs[vcpu->arch.sie_block->ipa & 0xf] + 4096; + vcpu->stat.diagnose_10++; if (start & ~PAGE_MASK || end & ~PAGE_MASK || start >= end || start < 2 * PAGE_SIZE) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); VCPU_EVENT(vcpu, 5, "diag release pages %lX %lX", start, end); - vcpu->stat.diagnose_10++; /* * We checked for start >= end above, so lets check for the @@ -75,6 +75,9 @@ static int __diag_page_ref_service(struct kvm_vcpu *vcpu) u16 rx = (vcpu->arch.sie_block->ipa & 0xf0) >> 4; u16 ry = (vcpu->arch.sie_block->ipa & 0x0f); + VCPU_EVENT(vcpu, 3, "diag page reference parameter block at 0x%llx", + vcpu->run->s.regs.gprs[rx]); + vcpu->stat.diagnose_258++; if (vcpu->run->s.regs.gprs[rx] & 7) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); rc = read_guest(vcpu, vcpu->run->s.regs.gprs[rx], rx, &parm, sizeof(parm)); @@ -85,6 +88,9 @@ static int __diag_page_ref_service(struct kvm_vcpu *vcpu) switch (parm.subcode) { case 0: /* TOKEN */ + VCPU_EVENT(vcpu, 3, "pageref token addr 0x%llx " + "select mask 0x%llx compare mask 0x%llx", + parm.token_addr, parm.select_mask, parm.compare_mask); if (vcpu->arch.pfault_token != KVM_S390_PFAULT_TOKEN_INVALID) { /* * If the pagefault handshake is already activated, @@ -114,6 +120,7 @@ static int __diag_page_ref_service(struct kvm_vcpu *vcpu) * the cancel, therefore to reduce code complexity, we assume * all outstanding tokens are already pending. */ + VCPU_EVENT(vcpu, 3, "pageref cancel addr 0x%llx", parm.token_addr); if (parm.token_addr || parm.select_mask || parm.compare_mask || parm.zarch) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); @@ -174,7 +181,8 @@ static int __diag_ipl_functions(struct kvm_vcpu *vcpu) unsigned int reg = vcpu->arch.sie_block->ipa & 0xf; unsigned long subcode = vcpu->run->s.regs.gprs[reg] & 0xffff; - VCPU_EVENT(vcpu, 5, "diag ipl functions, subcode %lx", subcode); + VCPU_EVENT(vcpu, 3, "diag ipl functions, subcode %lx", subcode); + vcpu->stat.diagnose_308++; switch (subcode) { case 3: vcpu->run->s390_reset_flags = KVM_S390_RESET_CLEAR; @@ -202,6 +210,7 @@ static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu) { int ret; + vcpu->stat.diagnose_500++; /* No virtio-ccw notification? Get out quickly. */ if (!vcpu->kvm->arch.css_support || (vcpu->run->s.regs.gprs[1] != KVM_S390_VIRTIO_CCW_NOTIFY)) diff --git a/arch/s390/kvm/guestdbg.c b/arch/s390/kvm/guestdbg.c index e97b3455d7e6..47518a324d75 100644 --- a/arch/s390/kvm/guestdbg.c +++ b/arch/s390/kvm/guestdbg.c @@ -473,10 +473,45 @@ static void filter_guest_per_event(struct kvm_vcpu *vcpu) vcpu->arch.sie_block->iprcc &= ~PGM_PER; } +#define pssec(vcpu) (vcpu->arch.sie_block->gcr[1] & _ASCE_SPACE_SWITCH) +#define hssec(vcpu) (vcpu->arch.sie_block->gcr[13] & _ASCE_SPACE_SWITCH) +#define old_ssec(vcpu) ((vcpu->arch.sie_block->tecmc >> 31) & 0x1) +#define old_as_is_home(vcpu) !(vcpu->arch.sie_block->tecmc & 0xffff) + void kvm_s390_handle_per_event(struct kvm_vcpu *vcpu) { + int new_as; + if (debug_exit_required(vcpu)) vcpu->guest_debug |= KVM_GUESTDBG_EXIT_PENDING; filter_guest_per_event(vcpu); + + /* + * Only RP, SAC, SACF, PT, PTI, PR, PC instructions can trigger + * a space-switch event. PER events enforce space-switch events + * for these instructions. So if no PER event for the guest is left, + * we might have to filter the space-switch element out, too. + */ + if (vcpu->arch.sie_block->iprcc == PGM_SPACE_SWITCH) { + vcpu->arch.sie_block->iprcc = 0; + new_as = psw_bits(vcpu->arch.sie_block->gpsw).as; + + /* + * If the AS changed from / to home, we had RP, SAC or SACF + * instruction. Check primary and home space-switch-event + * controls. (theoretically home -> home produced no event) + */ + if (((new_as == PSW_AS_HOME) ^ old_as_is_home(vcpu)) && + (pssec(vcpu) || hssec(vcpu))) + vcpu->arch.sie_block->iprcc = PGM_SPACE_SWITCH; + + /* + * PT, PTI, PR, PC instruction operate on primary AS only. Check + * if the primary-space-switch-event control was or got set. + */ + if (new_as == PSW_AS_PRIMARY && !old_as_is_home(vcpu) && + (pssec(vcpu) || old_ssec(vcpu))) + vcpu->arch.sie_block->iprcc = PGM_SPACE_SWITCH; + } } diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index c98d89708e99..b277d50dcf76 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -30,7 +30,6 @@ #define IOINT_SCHID_MASK 0x0000ffff #define IOINT_SSID_MASK 0x00030000 #define IOINT_CSSID_MASK 0x03fc0000 -#define IOINT_AI_MASK 0x04000000 #define PFAULT_INIT 0x0600 #define PFAULT_DONE 0x0680 #define VIRTIO_PARAM 0x0d00 @@ -72,9 +71,13 @@ static int ckc_interrupts_enabled(struct kvm_vcpu *vcpu) static int ckc_irq_pending(struct kvm_vcpu *vcpu) { + preempt_disable(); if (!(vcpu->arch.sie_block->ckc < - get_tod_clock_fast() + vcpu->arch.sie_block->epoch)) + get_tod_clock_fast() + vcpu->arch.sie_block->epoch)) { + preempt_enable(); return 0; + } + preempt_enable(); return ckc_interrupts_enabled(vcpu); } @@ -311,8 +314,8 @@ static int __must_check __deliver_pfault_init(struct kvm_vcpu *vcpu) li->irq.ext.ext_params2 = 0; spin_unlock(&li->lock); - VCPU_EVENT(vcpu, 4, "interrupt: pfault init parm:%x,parm64:%llx", - 0, ext.ext_params2); + VCPU_EVENT(vcpu, 4, "deliver: pfault init token 0x%llx", + ext.ext_params2); trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_PFAULT_INIT, 0, ext.ext_params2); @@ -368,7 +371,7 @@ static int __must_check __deliver_machine_check(struct kvm_vcpu *vcpu) spin_unlock(&fi->lock); if (deliver) { - VCPU_EVENT(vcpu, 4, "interrupt: machine check mcic=%llx", + VCPU_EVENT(vcpu, 3, "deliver: machine check mcic 0x%llx", mchk.mcic); trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_MCHK, @@ -403,7 +406,7 @@ static int __must_check __deliver_restart(struct kvm_vcpu *vcpu) struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; int rc; - VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu restart"); + VCPU_EVENT(vcpu, 3, "%s", "deliver: cpu restart"); vcpu->stat.deliver_restart_signal++; trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_RESTART, 0, 0); @@ -427,7 +430,6 @@ static int __must_check __deliver_set_prefix(struct kvm_vcpu *vcpu) clear_bit(IRQ_PEND_SET_PREFIX, &li->pending_irqs); spin_unlock(&li->lock); - VCPU_EVENT(vcpu, 4, "interrupt: set prefix to %x", prefix.address); vcpu->stat.deliver_prefix_signal++; trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_SIGP_SET_PREFIX, @@ -450,7 +452,7 @@ static int __must_check __deliver_emergency_signal(struct kvm_vcpu *vcpu) clear_bit(IRQ_PEND_EXT_EMERGENCY, &li->pending_irqs); spin_unlock(&li->lock); - VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp emerg"); + VCPU_EVENT(vcpu, 4, "%s", "deliver: sigp emerg"); vcpu->stat.deliver_emergency_signal++; trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_EMERGENCY, cpu_addr, 0); @@ -477,7 +479,7 @@ static int __must_check __deliver_external_call(struct kvm_vcpu *vcpu) clear_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs); spin_unlock(&li->lock); - VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp ext call"); + VCPU_EVENT(vcpu, 4, "%s", "deliver: sigp ext call"); vcpu->stat.deliver_external_call++; trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_EXTERNAL_CALL, @@ -506,7 +508,7 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu) memset(&li->irq.pgm, 0, sizeof(pgm_info)); spin_unlock(&li->lock); - VCPU_EVENT(vcpu, 4, "interrupt: pgm check code:%x, ilc:%x", + VCPU_EVENT(vcpu, 3, "deliver: program irq code 0x%x, ilc:%d", pgm_info.code, ilc); vcpu->stat.deliver_program_int++; trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_PROGRAM_INT, @@ -622,7 +624,7 @@ static int __must_check __deliver_service(struct kvm_vcpu *vcpu) clear_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs); spin_unlock(&fi->lock); - VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x", + VCPU_EVENT(vcpu, 4, "deliver: sclp parameter 0x%x", ext.ext_params); vcpu->stat.deliver_service_signal++; trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_SERVICE, @@ -651,9 +653,6 @@ static int __must_check __deliver_pfault_done(struct kvm_vcpu *vcpu) struct kvm_s390_interrupt_info, list); if (inti) { - trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, - KVM_S390_INT_PFAULT_DONE, 0, - inti->ext.ext_params2); list_del(&inti->list); fi->counters[FIRQ_CNTR_PFAULT] -= 1; } @@ -662,6 +661,12 @@ static int __must_check __deliver_pfault_done(struct kvm_vcpu *vcpu) spin_unlock(&fi->lock); if (inti) { + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, + KVM_S390_INT_PFAULT_DONE, 0, + inti->ext.ext_params2); + VCPU_EVENT(vcpu, 4, "deliver: pfault done token 0x%llx", + inti->ext.ext_params2); + rc = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE, (u16 *)__LC_EXT_INT_CODE); rc |= put_guest_lc(vcpu, PFAULT_DONE, @@ -691,7 +696,7 @@ static int __must_check __deliver_virtio(struct kvm_vcpu *vcpu) list); if (inti) { VCPU_EVENT(vcpu, 4, - "interrupt: virtio parm:%x,parm64:%llx", + "deliver: virtio parm: 0x%x,parm64: 0x%llx", inti->ext.ext_params, inti->ext.ext_params2); vcpu->stat.deliver_virtio_interrupt++; trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, @@ -741,7 +746,7 @@ static int __must_check __deliver_io(struct kvm_vcpu *vcpu, struct kvm_s390_interrupt_info, list); if (inti) { - VCPU_EVENT(vcpu, 4, "interrupt: I/O %llx", inti->type); + VCPU_EVENT(vcpu, 4, "deliver: I/O 0x%llx", inti->type); vcpu->stat.deliver_io_int++; trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, @@ -855,7 +860,9 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu) goto no_timer; } + preempt_disable(); now = get_tod_clock_fast() + vcpu->arch.sie_block->epoch; + preempt_enable(); sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now); /* underflow */ @@ -864,7 +871,7 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu) __set_cpu_idle(vcpu); hrtimer_start(&vcpu->arch.ckc_timer, ktime_set (0, sltime) , HRTIMER_MODE_REL); - VCPU_EVENT(vcpu, 5, "enabled wait via clock comparator: %llx ns", sltime); + VCPU_EVENT(vcpu, 4, "enabled wait via clock comparator: %llu ns", sltime); no_timer: srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); kvm_vcpu_block(vcpu); @@ -894,7 +901,9 @@ enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer) u64 now, sltime; vcpu = container_of(timer, struct kvm_vcpu, arch.ckc_timer); + preempt_disable(); now = get_tod_clock_fast() + vcpu->arch.sie_block->epoch; + preempt_enable(); sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now); /* @@ -968,6 +977,10 @@ static int __inject_prog(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) { struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + VCPU_EVENT(vcpu, 3, "inject: program irq code 0x%x", irq->u.pgm.code); + trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT, + irq->u.pgm.code, 0); + li->irq.pgm = irq->u.pgm; set_bit(IRQ_PEND_PROG, &li->pending_irqs); return 0; @@ -978,9 +991,6 @@ int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code) struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; struct kvm_s390_irq irq; - VCPU_EVENT(vcpu, 3, "inject: program check %d (from kernel)", code); - trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT, code, - 0, 1); spin_lock(&li->lock); irq.u.pgm.code = code; __inject_prog(vcpu, &irq); @@ -996,10 +1006,6 @@ int kvm_s390_inject_prog_irq(struct kvm_vcpu *vcpu, struct kvm_s390_irq irq; int rc; - VCPU_EVENT(vcpu, 3, "inject: prog irq %d (from kernel)", - pgm_info->code); - trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT, - pgm_info->code, 0, 1); spin_lock(&li->lock); irq.u.pgm = *pgm_info; rc = __inject_prog(vcpu, &irq); @@ -1012,11 +1018,11 @@ static int __inject_pfault_init(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) { struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; - VCPU_EVENT(vcpu, 3, "inject: external irq params:%x, params2:%llx", - irq->u.ext.ext_params, irq->u.ext.ext_params2); + VCPU_EVENT(vcpu, 4, "inject: pfault init parameter block at 0x%llx", + irq->u.ext.ext_params2); trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_PFAULT_INIT, irq->u.ext.ext_params, - irq->u.ext.ext_params2, 2); + irq->u.ext.ext_params2); li->irq.ext = irq->u.ext; set_bit(IRQ_PEND_PFAULT_INIT, &li->pending_irqs); @@ -1045,10 +1051,10 @@ static int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) struct kvm_s390_extcall_info *extcall = &li->irq.extcall; uint16_t src_id = irq->u.extcall.code; - VCPU_EVENT(vcpu, 3, "inject: external call source-cpu:%u", + VCPU_EVENT(vcpu, 4, "inject: external call source-cpu:%u", src_id); trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_EXTERNAL_CALL, - src_id, 0, 2); + src_id, 0); /* sending vcpu invalid */ if (src_id >= KVM_MAX_VCPUS || @@ -1070,10 +1076,10 @@ static int __inject_set_prefix(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; struct kvm_s390_prefix_info *prefix = &li->irq.prefix; - VCPU_EVENT(vcpu, 3, "inject: set prefix to %x (from user)", + VCPU_EVENT(vcpu, 3, "inject: set prefix to %x", irq->u.prefix.address); trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_SIGP_SET_PREFIX, - irq->u.prefix.address, 0, 2); + irq->u.prefix.address, 0); if (!is_vcpu_stopped(vcpu)) return -EBUSY; @@ -1090,7 +1096,7 @@ static int __inject_sigp_stop(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) struct kvm_s390_stop_info *stop = &li->irq.stop; int rc = 0; - trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_SIGP_STOP, 0, 0, 2); + trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_SIGP_STOP, 0, 0); if (irq->u.stop.flags & ~KVM_S390_STOP_SUPP_FLAGS) return -EINVAL; @@ -1114,8 +1120,8 @@ static int __inject_sigp_restart(struct kvm_vcpu *vcpu, { struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; - VCPU_EVENT(vcpu, 3, "inject: restart type %llx", irq->type); - trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_RESTART, 0, 0, 2); + VCPU_EVENT(vcpu, 3, "%s", "inject: restart int"); + trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_RESTART, 0, 0); set_bit(IRQ_PEND_RESTART, &li->pending_irqs); return 0; @@ -1126,10 +1132,10 @@ static int __inject_sigp_emergency(struct kvm_vcpu *vcpu, { struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; - VCPU_EVENT(vcpu, 3, "inject: emergency %u\n", + VCPU_EVENT(vcpu, 4, "inject: emergency from cpu %u", irq->u.emerg.code); trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_EMERGENCY, - irq->u.emerg.code, 0, 2); + irq->u.emerg.code, 0); set_bit(irq->u.emerg.code, li->sigp_emerg_pending); set_bit(IRQ_PEND_EXT_EMERGENCY, &li->pending_irqs); @@ -1142,10 +1148,10 @@ static int __inject_mchk(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; struct kvm_s390_mchk_info *mchk = &li->irq.mchk; - VCPU_EVENT(vcpu, 5, "inject: machine check parm64:%llx", + VCPU_EVENT(vcpu, 3, "inject: machine check mcic 0x%llx", irq->u.mchk.mcic); trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_MCHK, 0, - irq->u.mchk.mcic, 2); + irq->u.mchk.mcic); /* * Because repressible machine checks can be indicated along with @@ -1172,9 +1178,9 @@ static int __inject_ckc(struct kvm_vcpu *vcpu) { struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; - VCPU_EVENT(vcpu, 3, "inject: type %x", KVM_S390_INT_CLOCK_COMP); + VCPU_EVENT(vcpu, 3, "%s", "inject: clock comparator external"); trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_CLOCK_COMP, - 0, 0, 2); + 0, 0); set_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs); atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); @@ -1185,9 +1191,9 @@ static int __inject_cpu_timer(struct kvm_vcpu *vcpu) { struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; - VCPU_EVENT(vcpu, 3, "inject: type %x", KVM_S390_INT_CPU_TIMER); + VCPU_EVENT(vcpu, 3, "%s", "inject: cpu timer external"); trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_CPU_TIMER, - 0, 0, 2); + 0, 0); set_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs); atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); @@ -1435,20 +1441,20 @@ int kvm_s390_inject_vm(struct kvm *kvm, inti->ext.ext_params2 = s390int->parm64; break; case KVM_S390_INT_SERVICE: - VM_EVENT(kvm, 5, "inject: sclp parm:%x", s390int->parm); + VM_EVENT(kvm, 4, "inject: sclp parm:%x", s390int->parm); inti->ext.ext_params = s390int->parm; break; case KVM_S390_INT_PFAULT_DONE: inti->ext.ext_params2 = s390int->parm64; break; case KVM_S390_MCHK: - VM_EVENT(kvm, 5, "inject: machine check parm64:%llx", + VM_EVENT(kvm, 3, "inject: machine check mcic 0x%llx", s390int->parm64); inti->mchk.cr14 = s390int->parm; /* upper bits are not used */ inti->mchk.mcic = s390int->parm64; break; case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: - if (inti->type & IOINT_AI_MASK) + if (inti->type & KVM_S390_INT_IO_AI_MASK) VM_EVENT(kvm, 5, "%s", "inject: I/O (AI)"); else VM_EVENT(kvm, 5, "inject: I/O css %x ss %x schid %04x", @@ -1535,8 +1541,6 @@ static int do_inject_vcpu(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) switch (irq->type) { case KVM_S390_PROGRAM_INT: - VCPU_EVENT(vcpu, 3, "inject: program check %d (from user)", - irq->u.pgm.code); rc = __inject_prog(vcpu, irq); break; case KVM_S390_SIGP_SET_PREFIX: diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 2078f92d15ac..397b88dc1b2c 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -28,6 +28,7 @@ #include <linux/vmalloc.h> #include <asm/asm-offsets.h> #include <asm/lowcore.h> +#include <asm/etr.h> #include <asm/pgtable.h> #include <asm/nmi.h> #include <asm/switch_to.h> @@ -108,6 +109,9 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "diagnose_10", VCPU_STAT(diagnose_10) }, { "diagnose_44", VCPU_STAT(diagnose_44) }, { "diagnose_9c", VCPU_STAT(diagnose_9c) }, + { "diagnose_258", VCPU_STAT(diagnose_258) }, + { "diagnose_308", VCPU_STAT(diagnose_308) }, + { "diagnose_500", VCPU_STAT(diagnose_500) }, { NULL } }; @@ -124,6 +128,7 @@ unsigned long kvm_s390_fac_list_mask_size(void) } static struct gmap_notifier gmap_notifier; +debug_info_t *kvm_s390_dbf; /* Section: not file related */ int kvm_arch_hardware_enable(void) @@ -134,24 +139,69 @@ int kvm_arch_hardware_enable(void) static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address); +/* + * This callback is executed during stop_machine(). All CPUs are therefore + * temporarily stopped. In order not to change guest behavior, we have to + * disable preemption whenever we touch the epoch of kvm and the VCPUs, + * so a CPU won't be stopped while calculating with the epoch. + */ +static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val, + void *v) +{ + struct kvm *kvm; + struct kvm_vcpu *vcpu; + int i; + unsigned long long *delta = v; + + list_for_each_entry(kvm, &vm_list, vm_list) { + kvm->arch.epoch -= *delta; + kvm_for_each_vcpu(i, vcpu, kvm) { + vcpu->arch.sie_block->epoch -= *delta; + } + } + return NOTIFY_OK; +} + +static struct notifier_block kvm_clock_notifier = { + .notifier_call = kvm_clock_sync, +}; + int kvm_arch_hardware_setup(void) { gmap_notifier.notifier_call = kvm_gmap_notifier; gmap_register_ipte_notifier(&gmap_notifier); + atomic_notifier_chain_register(&s390_epoch_delta_notifier, + &kvm_clock_notifier); return 0; } void kvm_arch_hardware_unsetup(void) { gmap_unregister_ipte_notifier(&gmap_notifier); + atomic_notifier_chain_unregister(&s390_epoch_delta_notifier, + &kvm_clock_notifier); } int kvm_arch_init(void *opaque) { + kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long)); + if (!kvm_s390_dbf) + return -ENOMEM; + + if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) { + debug_unregister(kvm_s390_dbf); + return -ENOMEM; + } + /* Register floating interrupt controller interface. */ return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC); } +void kvm_arch_exit(void) +{ + debug_unregister(kvm_s390_dbf); +} + /* Section: device related */ long kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) @@ -281,10 +331,12 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) switch (cap->cap) { case KVM_CAP_S390_IRQCHIP: + VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP"); kvm->arch.use_irqchip = 1; r = 0; break; case KVM_CAP_S390_USER_SIGP: + VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP"); kvm->arch.user_sigp = 1; r = 0; break; @@ -295,8 +347,11 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) r = 0; } else r = -EINVAL; + VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s", + r ? "(not available)" : "(success)"); break; case KVM_CAP_S390_USER_STSI: + VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI"); kvm->arch.user_stsi = 1; r = 0; break; @@ -314,6 +369,8 @@ static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *att switch (attr->attr) { case KVM_S390_VM_MEM_LIMIT_SIZE: ret = 0; + VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes", + kvm->arch.gmap->asce_end); if (put_user(kvm->arch.gmap->asce_end, (u64 __user *)attr->addr)) ret = -EFAULT; break; @@ -330,7 +387,13 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att unsigned int idx; switch (attr->attr) { case KVM_S390_VM_MEM_ENABLE_CMMA: + /* enable CMMA only for z10 and later (EDAT_1) */ + ret = -EINVAL; + if (!MACHINE_IS_LPAR || !MACHINE_HAS_EDAT1) + break; + ret = -EBUSY; + VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support"); mutex_lock(&kvm->lock); if (atomic_read(&kvm->online_vcpus) == 0) { kvm->arch.use_cmma = 1; @@ -339,6 +402,11 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att mutex_unlock(&kvm->lock); break; case KVM_S390_VM_MEM_CLR_CMMA: + ret = -EINVAL; + if (!kvm->arch.use_cmma) + break; + + VM_EVENT(kvm, 3, "%s", "RESET: CMMA states"); mutex_lock(&kvm->lock); idx = srcu_read_lock(&kvm->srcu); s390_reset_cmma(kvm->arch.gmap->mm); @@ -374,6 +442,7 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att } } mutex_unlock(&kvm->lock); + VM_EVENT(kvm, 3, "SET: max guest memory: %lu bytes", new_limit); break; } default: @@ -400,22 +469,26 @@ static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr) kvm->arch.crypto.crycb->aes_wrapping_key_mask, sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); kvm->arch.crypto.aes_kw = 1; + VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support"); break; case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW: get_random_bytes( kvm->arch.crypto.crycb->dea_wrapping_key_mask, sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); kvm->arch.crypto.dea_kw = 1; + VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support"); break; case KVM_S390_VM_CRYPTO_DISABLE_AES_KW: kvm->arch.crypto.aes_kw = 0; memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0, sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); + VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support"); break; case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW: kvm->arch.crypto.dea_kw = 0; memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0, sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); + VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support"); break; default: mutex_unlock(&kvm->lock); @@ -440,6 +513,7 @@ static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr) if (gtod_high != 0) return -EINVAL; + VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x\n", gtod_high); return 0; } @@ -459,12 +533,15 @@ static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr) return r; mutex_lock(&kvm->lock); + preempt_disable(); kvm->arch.epoch = gtod - host_tod; kvm_s390_vcpu_block_all(kvm); kvm_for_each_vcpu(vcpu_idx, cur_vcpu, kvm) cur_vcpu->arch.sie_block->epoch = kvm->arch.epoch; kvm_s390_vcpu_unblock_all(kvm); + preempt_enable(); mutex_unlock(&kvm->lock); + VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx\n", gtod); return 0; } @@ -496,6 +573,7 @@ static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr) if (copy_to_user((void __user *)attr->addr, >od_high, sizeof(gtod_high))) return -EFAULT; + VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x\n", gtod_high); return 0; } @@ -509,9 +587,12 @@ static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr) if (r) return r; + preempt_disable(); gtod = host_tod + kvm->arch.epoch; + preempt_enable(); if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod))) return -EFAULT; + VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx\n", gtod); return 0; } @@ -821,7 +902,9 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) } /* Enable storage key handling for the guest */ - s390_enable_skey(); + r = s390_enable_skey(); + if (r) + goto out; for (i = 0; i < args->count; i++) { hva = gfn_to_hva(kvm, args->start_gfn + i); @@ -879,8 +962,7 @@ long kvm_arch_vm_ioctl(struct file *filp, if (kvm->arch.use_irqchip) { /* Set up dummy routing. */ memset(&routing, 0, sizeof(routing)); - kvm_set_irq_routing(kvm, &routing, 0, 0); - r = 0; + r = kvm_set_irq_routing(kvm, &routing, 0, 0); } break; } @@ -1043,7 +1125,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) sprintf(debug_name, "kvm-%u", current->pid); - kvm->arch.dbf = debug_register(debug_name, 8, 2, 8 * sizeof(long)); + kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long)); if (!kvm->arch.dbf) goto out_err; @@ -1086,7 +1168,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) mutex_init(&kvm->arch.ipte_mutex); debug_register_view(kvm->arch.dbf, &debug_sprintf_view); - VM_EVENT(kvm, 3, "%s", "vm created"); + VM_EVENT(kvm, 3, "vm created with type %lu", type); if (type & KVM_VM_S390_UCONTROL) { kvm->arch.gmap = NULL; @@ -1103,6 +1185,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm->arch.epoch = 0; spin_lock_init(&kvm->arch.start_stop_lock); + KVM_EVENT(3, "vm 0x%p created by pid %u", kvm, current->pid); return 0; out_err: @@ -1110,6 +1193,7 @@ out_err: free_page((unsigned long)kvm->arch.model.fac); debug_unregister(kvm->arch.dbf); free_page((unsigned long)(kvm->arch.sca)); + KVM_EVENT(3, "creation of vm failed: %d", rc); return rc; } @@ -1131,7 +1215,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) if (kvm_is_ucontrol(vcpu->kvm)) gmap_free(vcpu->arch.gmap); - if (kvm_s390_cmma_enabled(vcpu->kvm)) + if (vcpu->kvm->arch.use_cmma) kvm_s390_vcpu_unsetup_cmma(vcpu); free_page((unsigned long)(vcpu->arch.sie_block)); @@ -1166,6 +1250,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm) gmap_free(kvm->arch.gmap); kvm_s390_destroy_adapters(kvm); kvm_s390_clear_float_irqs(kvm); + KVM_EVENT(3, "vm 0x%p destroyed", kvm); } /* Section: vcpu related */ @@ -1264,7 +1349,9 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) { mutex_lock(&vcpu->kvm->lock); + preempt_disable(); vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch; + preempt_enable(); mutex_unlock(&vcpu->kvm->lock); if (!kvm_is_ucontrol(vcpu->kvm)) vcpu->arch.gmap = vcpu->kvm->arch.gmap; @@ -1342,7 +1429,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) } vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE; - if (kvm_s390_cmma_enabled(vcpu->kvm)) { + if (vcpu->kvm->arch.use_cmma) { rc = kvm_s390_vcpu_setup_cmma(vcpu); if (rc) return rc; @@ -1723,18 +1810,6 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, return rc; } -bool kvm_s390_cmma_enabled(struct kvm *kvm) -{ - if (!MACHINE_IS_LPAR) - return false; - /* only enable for z10 and later */ - if (!MACHINE_HAS_EDAT1) - return false; - if (!kvm->arch.use_cmma) - return false; - return true; -} - static bool ibs_enabled(struct kvm_vcpu *vcpu) { return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS; @@ -2340,6 +2415,7 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, case KVM_CAP_S390_CSS_SUPPORT: if (!vcpu->kvm->arch.css_support) { vcpu->kvm->arch.css_support = 1; + VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support"); trace_kvm_s390_enable_css(vcpu->kvm); } r = 0; diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index c5704786e473..c446aabf60d3 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -27,6 +27,13 @@ typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu); #define TDB_FORMAT1 1 #define IS_ITDB_VALID(vcpu) ((*(char *)vcpu->arch.sie_block->itdba == TDB_FORMAT1)) +extern debug_info_t *kvm_s390_dbf; +#define KVM_EVENT(d_loglevel, d_string, d_args...)\ +do { \ + debug_sprintf_event(kvm_s390_dbf, d_loglevel, d_string "\n", \ + d_args); \ +} while (0) + #define VM_EVENT(d_kvm, d_loglevel, d_string, d_args...)\ do { \ debug_sprintf_event(d_kvm->arch.dbf, d_loglevel, d_string "\n", \ @@ -65,6 +72,8 @@ static inline u32 kvm_s390_get_prefix(struct kvm_vcpu *vcpu) static inline void kvm_s390_set_prefix(struct kvm_vcpu *vcpu, u32 prefix) { + VCPU_EVENT(vcpu, 3, "set prefix of cpu %03u to 0x%x", vcpu->vcpu_id, + prefix); vcpu->arch.sie_block->prefix = prefix >> GUEST_PREFIX_SHIFT; kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu); @@ -217,8 +226,6 @@ void exit_sie(struct kvm_vcpu *vcpu); void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu); int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu); void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu); -/* is cmma enabled */ -bool kvm_s390_cmma_enabled(struct kvm *kvm); unsigned long kvm_s390_fac_list_mask_size(void); extern unsigned long kvm_s390_fac_list_mask[]; diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index ad4242245771..4d21dc4d1a84 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -53,11 +53,14 @@ static int handle_set_clock(struct kvm_vcpu *vcpu) kvm_s390_set_psw_cc(vcpu, 3); return 0; } + VCPU_EVENT(vcpu, 3, "SCK: setting guest TOD to 0x%llx", val); val = (val - hostclk) & ~0x3fUL; mutex_lock(&vcpu->kvm->lock); + preempt_disable(); kvm_for_each_vcpu(i, cpup, vcpu->kvm) cpup->arch.sie_block->epoch = val; + preempt_enable(); mutex_unlock(&vcpu->kvm->lock); kvm_s390_set_psw_cc(vcpu, 0); @@ -98,8 +101,6 @@ static int handle_set_prefix(struct kvm_vcpu *vcpu) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); kvm_s390_set_prefix(vcpu, address); - - VCPU_EVENT(vcpu, 5, "setting prefix to %x", address); trace_kvm_s390_handle_prefix(vcpu, 1, address); return 0; } @@ -129,7 +130,7 @@ static int handle_store_prefix(struct kvm_vcpu *vcpu) if (rc) return kvm_s390_inject_prog_cond(vcpu, rc); - VCPU_EVENT(vcpu, 5, "storing prefix to %x", address); + VCPU_EVENT(vcpu, 3, "STPX: storing prefix 0x%x into 0x%llx", address, operand2); trace_kvm_s390_handle_prefix(vcpu, 0, address); return 0; } @@ -155,7 +156,7 @@ static int handle_store_cpu_address(struct kvm_vcpu *vcpu) if (rc) return kvm_s390_inject_prog_cond(vcpu, rc); - VCPU_EVENT(vcpu, 5, "storing cpu address to %llx", ga); + VCPU_EVENT(vcpu, 3, "STAP: storing cpu address (%u) to 0x%llx", vcpu_id, ga); trace_kvm_s390_handle_stap(vcpu, ga); return 0; } @@ -167,6 +168,7 @@ static int __skey_check_enable(struct kvm_vcpu *vcpu) return rc; rc = s390_enable_skey(); + VCPU_EVENT(vcpu, 3, "%s", "enabling storage keys for guest"); trace_kvm_s390_skey_related_inst(vcpu); vcpu->arch.sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | ICTL_RRBE); return rc; @@ -370,7 +372,7 @@ static int handle_stfl(struct kvm_vcpu *vcpu) &fac, sizeof(fac)); if (rc) return rc; - VCPU_EVENT(vcpu, 5, "store facility list value %x", fac); + VCPU_EVENT(vcpu, 3, "STFL: store facility list 0x%x", fac); trace_kvm_s390_handle_stfl(vcpu, fac); return 0; } @@ -468,7 +470,7 @@ static int handle_stidp(struct kvm_vcpu *vcpu) if (rc) return kvm_s390_inject_prog_cond(vcpu, rc); - VCPU_EVENT(vcpu, 5, "%s", "store cpu id"); + VCPU_EVENT(vcpu, 3, "STIDP: store cpu id 0x%llx", stidp_data); return 0; } @@ -521,7 +523,7 @@ static int handle_stsi(struct kvm_vcpu *vcpu) ar_t ar; vcpu->stat.instruction_stsi++; - VCPU_EVENT(vcpu, 4, "stsi: fc: %x sel1: %x sel2: %x", fc, sel1, sel2); + VCPU_EVENT(vcpu, 3, "STSI: fc: %u sel1: %u sel2: %u", fc, sel1, sel2); if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); @@ -758,10 +760,10 @@ static int handle_essa(struct kvm_vcpu *vcpu) struct gmap *gmap; int i; - VCPU_EVENT(vcpu, 5, "cmma release %d pages", entries); + VCPU_EVENT(vcpu, 4, "ESSA: release %d pages", entries); gmap = vcpu->arch.gmap; vcpu->stat.instruction_essa++; - if (!kvm_s390_cmma_enabled(vcpu->kvm)) + if (!vcpu->kvm->arch.use_cmma) return kvm_s390_inject_program_int(vcpu, PGM_OPERATION); if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) @@ -829,7 +831,7 @@ int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu) if (ga & 3) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - VCPU_EVENT(vcpu, 5, "lctl r1:%x, r3:%x, addr:%llx", reg1, reg3, ga); + VCPU_EVENT(vcpu, 4, "LCTL: r1:%d, r3:%d, addr: 0x%llx", reg1, reg3, ga); trace_kvm_s390_handle_lctl(vcpu, 0, reg1, reg3, ga); nr_regs = ((reg3 - reg1) & 0xf) + 1; @@ -868,7 +870,7 @@ int kvm_s390_handle_stctl(struct kvm_vcpu *vcpu) if (ga & 3) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - VCPU_EVENT(vcpu, 5, "stctl r1:%x, r3:%x, addr:%llx", reg1, reg3, ga); + VCPU_EVENT(vcpu, 4, "STCTL r1:%d, r3:%d, addr: 0x%llx", reg1, reg3, ga); trace_kvm_s390_handle_stctl(vcpu, 0, reg1, reg3, ga); reg = reg1; @@ -902,7 +904,7 @@ static int handle_lctlg(struct kvm_vcpu *vcpu) if (ga & 7) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - VCPU_EVENT(vcpu, 5, "lctlg r1:%x, r3:%x, addr:%llx", reg1, reg3, ga); + VCPU_EVENT(vcpu, 4, "LCTLG: r1:%d, r3:%d, addr: 0x%llx", reg1, reg3, ga); trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, ga); nr_regs = ((reg3 - reg1) & 0xf) + 1; @@ -940,7 +942,7 @@ static int handle_stctg(struct kvm_vcpu *vcpu) if (ga & 7) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - VCPU_EVENT(vcpu, 5, "stctg r1:%x, r3:%x, addr:%llx", reg1, reg3, ga); + VCPU_EVENT(vcpu, 4, "STCTG r1:%d, r3:%d, addr: 0x%llx", reg1, reg3, ga); trace_kvm_s390_handle_stctl(vcpu, 1, reg1, reg3, ga); reg = reg1; diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index 72e58bd2bee7..da690b69f9fe 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -205,9 +205,6 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu, *reg &= 0xffffffff00000000UL; *reg |= SIGP_STATUS_INCORRECT_STATE; return SIGP_CC_STATUS_STORED; - } else if (rc == 0) { - VCPU_EVENT(vcpu, 4, "set prefix of cpu %02x to %x", - dst_vcpu->vcpu_id, irq.u.prefix.address); } return rc; @@ -371,7 +368,8 @@ static int handle_sigp_dst(struct kvm_vcpu *vcpu, u8 order_code, return rc; } -static int handle_sigp_order_in_user_space(struct kvm_vcpu *vcpu, u8 order_code) +static int handle_sigp_order_in_user_space(struct kvm_vcpu *vcpu, u8 order_code, + u16 cpu_addr) { if (!vcpu->kvm->arch.user_sigp) return 0; @@ -414,9 +412,8 @@ static int handle_sigp_order_in_user_space(struct kvm_vcpu *vcpu, u8 order_code) default: vcpu->stat.instruction_sigp_unknown++; } - - VCPU_EVENT(vcpu, 4, "sigp order %u: completely handled in user space", - order_code); + VCPU_EVENT(vcpu, 3, "SIGP: order %u for CPU %d handled in userspace", + order_code, cpu_addr); return 1; } @@ -435,7 +432,7 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); order_code = kvm_s390_get_base_disp_rs(vcpu, NULL); - if (handle_sigp_order_in_user_space(vcpu, order_code)) + if (handle_sigp_order_in_user_space(vcpu, order_code, cpu_addr)) return -EOPNOTSUPP; if (r1 % 2) diff --git a/arch/s390/kvm/trace-s390.h b/arch/s390/kvm/trace-s390.h index 3208d33a48cb..cc1d6c68356f 100644 --- a/arch/s390/kvm/trace-s390.h +++ b/arch/s390/kvm/trace-s390.h @@ -105,11 +105,22 @@ TRACE_EVENT(kvm_s390_vcpu_start_stop, {KVM_S390_PROGRAM_INT, "program interrupt"}, \ {KVM_S390_SIGP_SET_PREFIX, "sigp set prefix"}, \ {KVM_S390_RESTART, "sigp restart"}, \ + {KVM_S390_INT_PFAULT_INIT, "pfault init"}, \ + {KVM_S390_INT_PFAULT_DONE, "pfault done"}, \ + {KVM_S390_MCHK, "machine check"}, \ + {KVM_S390_INT_CLOCK_COMP, "clock comparator"}, \ + {KVM_S390_INT_CPU_TIMER, "cpu timer"}, \ {KVM_S390_INT_VIRTIO, "virtio interrupt"}, \ {KVM_S390_INT_SERVICE, "sclp interrupt"}, \ {KVM_S390_INT_EMERGENCY, "sigp emergency"}, \ {KVM_S390_INT_EXTERNAL_CALL, "sigp ext call"} +#define get_irq_name(__type) \ + (__type > KVM_S390_INT_IO_MAX ? \ + __print_symbolic(__type, kvm_s390_int_type) : \ + (__type & KVM_S390_INT_IO_AI_MASK ? \ + "adapter I/O interrupt" : "subchannel I/O interrupt")) + TRACE_EVENT(kvm_s390_inject_vm, TP_PROTO(__u64 type, __u32 parm, __u64 parm64, int who), TP_ARGS(type, parm, parm64, who), @@ -131,22 +142,19 @@ TRACE_EVENT(kvm_s390_inject_vm, TP_printk("inject%s: type:%x (%s) parm:%x parm64:%llx", (__entry->who == 1) ? " (from kernel)" : (__entry->who == 2) ? " (from user)" : "", - __entry->inttype, - __print_symbolic(__entry->inttype, kvm_s390_int_type), + __entry->inttype, get_irq_name(__entry->inttype), __entry->parm, __entry->parm64) ); TRACE_EVENT(kvm_s390_inject_vcpu, - TP_PROTO(unsigned int id, __u64 type, __u32 parm, __u64 parm64, \ - int who), - TP_ARGS(id, type, parm, parm64, who), + TP_PROTO(unsigned int id, __u64 type, __u32 parm, __u64 parm64), + TP_ARGS(id, type, parm, parm64), TP_STRUCT__entry( __field(int, id) __field(__u32, inttype) __field(__u32, parm) __field(__u64, parm64) - __field(int, who) ), TP_fast_assign( @@ -154,15 +162,12 @@ TRACE_EVENT(kvm_s390_inject_vcpu, __entry->inttype = type & 0x00000000ffffffff; __entry->parm = parm; __entry->parm64 = parm64; - __entry->who = who; ), - TP_printk("inject%s (vcpu %d): type:%x (%s) parm:%x parm64:%llx", - (__entry->who == 1) ? " (from kernel)" : - (__entry->who == 2) ? " (from user)" : "", + TP_printk("inject (vcpu %d): type:%x (%s) parm:%x parm64:%llx", __entry->id, __entry->inttype, - __print_symbolic(__entry->inttype, kvm_s390_int_type), - __entry->parm, __entry->parm64) + get_irq_name(__entry->inttype), __entry->parm, + __entry->parm64) ); /* @@ -189,8 +194,8 @@ TRACE_EVENT(kvm_s390_deliver_interrupt, TP_printk("deliver interrupt (vcpu %d): type:%x (%s) " \ "data:%08llx %016llx", __entry->id, __entry->inttype, - __print_symbolic(__entry->inttype, kvm_s390_int_type), - __entry->data0, __entry->data1) + get_irq_name(__entry->inttype), __entry->data0, + __entry->data1) ); /* diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 33082d0d101b..b33f66110ca9 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -31,8 +31,6 @@ #define ALLOC_ORDER 2 #define FRAG_MASK 0x03 -int HPAGE_SHIFT; - unsigned long *crst_table_alloc(struct mm_struct *mm) { struct page *page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c index bc927a09a172..9cfa2ffaa9d6 100644 --- a/arch/s390/oprofile/init.c +++ b/arch/s390/oprofile/init.c @@ -16,6 +16,7 @@ #include <linux/fs.h> #include <linux/module.h> #include <asm/processor.h> +#include <asm/perf_event.h> #include "../../../drivers/oprofile/oprof.h" diff --git a/arch/score/include/asm/Kbuild b/arch/score/include/asm/Kbuild index 138fb3db45ba..92ffe397b893 100644 --- a/arch/score/include/asm/Kbuild +++ b/arch/score/include/asm/Kbuild @@ -7,6 +7,7 @@ generic-y += clkdev.h generic-y += cputime.h generic-y += irq_work.h generic-y += mcs_spinlock.h +generic-y += mm-arch-hooks.h generic-y += preempt.h generic-y += sections.h generic-y += trace_clock.h diff --git a/arch/score/include/asm/mm-arch-hooks.h b/arch/score/include/asm/mm-arch-hooks.h deleted file mode 100644 index 5e38689f189a..000000000000 --- a/arch/score/include/asm/mm-arch-hooks.h +++ /dev/null @@ -1,15 +0,0 @@ -/* - * Architecture specific mm hooks - * - * Copyright (C) 2015, IBM Corporation - * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#ifndef _ASM_SCORE_MM_ARCH_HOOKS_H -#define _ASM_SCORE_MM_ARCH_HOOKS_H - -#endif /* _ASM_SCORE_MM_ARCH_HOOKS_H */ diff --git a/arch/sh/include/asm/Kbuild b/arch/sh/include/asm/Kbuild index 9ac4626e7284..aac452b26aa8 100644 --- a/arch/sh/include/asm/Kbuild +++ b/arch/sh/include/asm/Kbuild @@ -16,6 +16,7 @@ generic-y += kvm_para.h generic-y += local.h generic-y += local64.h generic-y += mcs_spinlock.h +generic-y += mm-arch-hooks.h generic-y += mman.h generic-y += msgbuf.h generic-y += param.h diff --git a/arch/sh/include/asm/mm-arch-hooks.h b/arch/sh/include/asm/mm-arch-hooks.h deleted file mode 100644 index 18087298b728..000000000000 --- a/arch/sh/include/asm/mm-arch-hooks.h +++ /dev/null @@ -1,15 +0,0 @@ -/* - * Architecture specific mm hooks - * - * Copyright (C) 2015, IBM Corporation - * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#ifndef _ASM_SH_MM_ARCH_HOOKS_H -#define _ASM_SH_MM_ARCH_HOOKS_H - -#endif /* _ASM_SH_MM_ARCH_HOOKS_H */ diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild index 2b2a69dcc467..e928618838bc 100644 --- a/arch/sparc/include/asm/Kbuild +++ b/arch/sparc/include/asm/Kbuild @@ -12,6 +12,7 @@ generic-y += linkage.h generic-y += local.h generic-y += local64.h generic-y += mcs_spinlock.h +generic-y += mm-arch-hooks.h generic-y += module.h generic-y += mutex.h generic-y += preempt.h diff --git a/arch/sparc/include/asm/mm-arch-hooks.h b/arch/sparc/include/asm/mm-arch-hooks.h deleted file mode 100644 index b89ba44c16f1..000000000000 --- a/arch/sparc/include/asm/mm-arch-hooks.h +++ /dev/null @@ -1,15 +0,0 @@ -/* - * Architecture specific mm hooks - * - * Copyright (C) 2015, IBM Corporation - * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#ifndef _ASM_SPARC_MM_ARCH_HOOKS_H -#define _ASM_SPARC_MM_ARCH_HOOKS_H - -#endif /* _ASM_SPARC_MM_ARCH_HOOKS_H */ diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild index d53654488c2c..d8a843163471 100644 --- a/arch/tile/include/asm/Kbuild +++ b/arch/tile/include/asm/Kbuild @@ -19,6 +19,7 @@ generic-y += irq_regs.h generic-y += local.h generic-y += local64.h generic-y += mcs_spinlock.h +generic-y += mm-arch-hooks.h generic-y += msgbuf.h generic-y += mutex.h generic-y += param.h diff --git a/arch/tile/include/asm/mm-arch-hooks.h b/arch/tile/include/asm/mm-arch-hooks.h deleted file mode 100644 index d1709ea774f7..000000000000 --- a/arch/tile/include/asm/mm-arch-hooks.h +++ /dev/null @@ -1,15 +0,0 @@ -/* - * Architecture specific mm hooks - * - * Copyright (C) 2015, IBM Corporation - * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#ifndef _ASM_TILE_MM_ARCH_HOOKS_H -#define _ASM_TILE_MM_ARCH_HOOKS_H - -#endif /* _ASM_TILE_MM_ARCH_HOOKS_H */ diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild index 3d63ff6f583f..149ec55f9c46 100644 --- a/arch/um/include/asm/Kbuild +++ b/arch/um/include/asm/Kbuild @@ -16,6 +16,7 @@ generic-y += irq_regs.h generic-y += irq_work.h generic-y += kdebug.h generic-y += mcs_spinlock.h +generic-y += mm-arch-hooks.h generic-y += mutex.h generic-y += param.h generic-y += pci.h diff --git a/arch/um/include/asm/mm-arch-hooks.h b/arch/um/include/asm/mm-arch-hooks.h deleted file mode 100644 index a7c8b0dfdd4e..000000000000 --- a/arch/um/include/asm/mm-arch-hooks.h +++ /dev/null @@ -1,15 +0,0 @@ -/* - * Architecture specific mm hooks - * - * Copyright (C) 2015, IBM Corporation - * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#ifndef _ASM_UM_MM_ARCH_HOOKS_H -#define _ASM_UM_MM_ARCH_HOOKS_H - -#endif /* _ASM_UM_MM_ARCH_HOOKS_H */ diff --git a/arch/unicore32/include/asm/Kbuild b/arch/unicore32/include/asm/Kbuild index d12b377b5a8b..1fc7a286dc6f 100644 --- a/arch/unicore32/include/asm/Kbuild +++ b/arch/unicore32/include/asm/Kbuild @@ -26,6 +26,7 @@ generic-y += kdebug.h generic-y += kmap_types.h generic-y += local.h generic-y += mcs_spinlock.h +generic-y += mm-arch-hooks.h generic-y += mman.h generic-y += module.h generic-y += msgbuf.h diff --git a/arch/unicore32/include/asm/mm-arch-hooks.h b/arch/unicore32/include/asm/mm-arch-hooks.h deleted file mode 100644 index 4d79a850c509..000000000000 --- a/arch/unicore32/include/asm/mm-arch-hooks.h +++ /dev/null @@ -1,15 +0,0 @@ -/* - * Architecture specific mm hooks - * - * Copyright (C) 2015, IBM Corporation - * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#ifndef _ASM_UNICORE32_MM_ARCH_HOOKS_H -#define _ASM_UNICORE32_MM_ARCH_HOOKS_H - -#endif /* _ASM_UNICORE32_MM_ARCH_HOOKS_H */ diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 3dbb7e7909ca..b3a1a5d77d92 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -41,6 +41,7 @@ config X86 select ARCH_USE_CMPXCHG_LOCKREF if X86_64 select ARCH_USE_QUEUED_RWLOCKS select ARCH_USE_QUEUED_SPINLOCKS + select ARCH_WANTS_DYNAMIC_TASK_STRUCT select ARCH_WANT_FRAME_POINTERS select ARCH_WANT_IPC_PARSE_VERSION if X86_32 select ARCH_WANT_OPTIONAL_GPIOLIB diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index a15893d17c55..d8c0d3266173 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -297,6 +297,18 @@ config OPTIMIZE_INLINING If unsure, say N. +config DEBUG_ENTRY + bool "Debug low-level entry code" + depends on DEBUG_KERNEL + ---help--- + This option enables sanity checks in x86's low-level entry code. + Some of these sanity checks may slow down kernel entries and + exits or otherwise impact performance. + + This is currently used to help test NMI code. + + If unsure, say N. + config DEBUG_NMI_SELFTEST bool "NMI Selftest" depends on DEBUG_KERNEL && X86_LOCAL_APIC diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 3bb2c4302df1..8cb3e438f21e 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1237,11 +1237,12 @@ ENTRY(nmi) * If the variable is not set and the stack is not the NMI * stack then: * o Set the special variable on the stack - * o Copy the interrupt frame into a "saved" location on the stack - * o Copy the interrupt frame into a "copy" location on the stack + * o Copy the interrupt frame into an "outermost" location on the + * stack + * o Copy the interrupt frame into an "iret" location on the stack * o Continue processing the NMI * If the variable is set or the previous stack is the NMI stack: - * o Modify the "copy" location to jump to the repeate_nmi + * o Modify the "iret" location to jump to the repeat_nmi * o return back to the first NMI * * Now on exit of the first NMI, we first clear the stack variable @@ -1250,31 +1251,151 @@ ENTRY(nmi) * a nested NMI that updated the copy interrupt stack frame, a * jump will be made to the repeat_nmi code that will handle the second * NMI. + * + * However, espfix prevents us from directly returning to userspace + * with a single IRET instruction. Similarly, IRET to user mode + * can fault. We therefore handle NMIs from user space like + * other IST entries. */ /* Use %rdx as our temp variable throughout */ pushq %rdx + testb $3, CS-RIP+8(%rsp) + jz .Lnmi_from_kernel + + /* + * NMI from user mode. We need to run on the thread stack, but we + * can't go through the normal entry paths: NMIs are masked, and + * we don't want to enable interrupts, because then we'll end + * up in an awkward situation in which IRQs are on but NMIs + * are off. + */ + + SWAPGS + cld + movq %rsp, %rdx + movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp + pushq 5*8(%rdx) /* pt_regs->ss */ + pushq 4*8(%rdx) /* pt_regs->rsp */ + pushq 3*8(%rdx) /* pt_regs->flags */ + pushq 2*8(%rdx) /* pt_regs->cs */ + pushq 1*8(%rdx) /* pt_regs->rip */ + pushq $-1 /* pt_regs->orig_ax */ + pushq %rdi /* pt_regs->di */ + pushq %rsi /* pt_regs->si */ + pushq (%rdx) /* pt_regs->dx */ + pushq %rcx /* pt_regs->cx */ + pushq %rax /* pt_regs->ax */ + pushq %r8 /* pt_regs->r8 */ + pushq %r9 /* pt_regs->r9 */ + pushq %r10 /* pt_regs->r10 */ + pushq %r11 /* pt_regs->r11 */ + pushq %rbx /* pt_regs->rbx */ + pushq %rbp /* pt_regs->rbp */ + pushq %r12 /* pt_regs->r12 */ + pushq %r13 /* pt_regs->r13 */ + pushq %r14 /* pt_regs->r14 */ + pushq %r15 /* pt_regs->r15 */ + + /* + * At this point we no longer need to worry about stack damage + * due to nesting -- we're on the normal thread stack and we're + * done with the NMI stack. + */ + + movq %rsp, %rdi + movq $-1, %rsi + call do_nmi + + /* + * Return back to user mode. We must *not* do the normal exit + * work, because we don't want to enable interrupts. Fortunately, + * do_nmi doesn't modify pt_regs. + */ + SWAPGS + jmp restore_c_regs_and_iret + +.Lnmi_from_kernel: + /* + * Here's what our stack frame will look like: + * +---------------------------------------------------------+ + * | original SS | + * | original Return RSP | + * | original RFLAGS | + * | original CS | + * | original RIP | + * +---------------------------------------------------------+ + * | temp storage for rdx | + * +---------------------------------------------------------+ + * | "NMI executing" variable | + * +---------------------------------------------------------+ + * | iret SS } Copied from "outermost" frame | + * | iret Return RSP } on each loop iteration; overwritten | + * | iret RFLAGS } by a nested NMI to force another | + * | iret CS } iteration if needed. | + * | iret RIP } | + * +---------------------------------------------------------+ + * | outermost SS } initialized in first_nmi; | + * | outermost Return RSP } will not be changed before | + * | outermost RFLAGS } NMI processing is done. | + * | outermost CS } Copied to "iret" frame on each | + * | outermost RIP } iteration. | + * +---------------------------------------------------------+ + * | pt_regs | + * +---------------------------------------------------------+ + * + * The "original" frame is used by hardware. Before re-enabling + * NMIs, we need to be done with it, and we need to leave enough + * space for the asm code here. + * + * We return by executing IRET while RSP points to the "iret" frame. + * That will either return for real or it will loop back into NMI + * processing. + * + * The "outermost" frame is copied to the "iret" frame on each + * iteration of the loop, so each iteration starts with the "iret" + * frame pointing to the final return target. + */ + /* - * If %cs was not the kernel segment, then the NMI triggered in user - * space, which means it is definitely not nested. + * Determine whether we're a nested NMI. + * + * If we interrupted kernel code between repeat_nmi and + * end_repeat_nmi, then we are a nested NMI. We must not + * modify the "iret" frame because it's being written by + * the outer NMI. That's okay; the outer NMI handler is + * about to about to call do_nmi anyway, so we can just + * resume the outer NMI. */ - cmpl $__KERNEL_CS, 16(%rsp) - jne first_nmi + + movq $repeat_nmi, %rdx + cmpq 8(%rsp), %rdx + ja 1f + movq $end_repeat_nmi, %rdx + cmpq 8(%rsp), %rdx + ja nested_nmi_out +1: /* - * Check the special variable on the stack to see if NMIs are - * executing. + * Now check "NMI executing". If it's set, then we're nested. + * This will not detect if we interrupted an outer NMI just + * before IRET. */ cmpl $1, -8(%rsp) je nested_nmi /* - * Now test if the previous stack was an NMI stack. - * We need the double check. We check the NMI stack to satisfy the - * race when the first NMI clears the variable before returning. - * We check the variable because the first NMI could be in a - * breakpoint routine using a breakpoint stack. + * Now test if the previous stack was an NMI stack. This covers + * the case where we interrupt an outer NMI after it clears + * "NMI executing" but before IRET. We need to be careful, though: + * there is one case in which RSP could point to the NMI stack + * despite there being no NMI active: naughty userspace controls + * RSP at the very beginning of the SYSCALL targets. We can + * pull a fast one on naughty userspace, though: we program + * SYSCALL to mask DF, so userspace cannot cause DF to be set + * if it controls the kernel's RSP. We set DF before we clear + * "NMI executing". */ lea 6*8(%rsp), %rdx /* Compare the NMI stack (rdx) with the stack we came from (4*8(%rsp)) */ @@ -1286,25 +1407,20 @@ ENTRY(nmi) cmpq %rdx, 4*8(%rsp) /* If it is below the NMI stack, it is a normal NMI */ jb first_nmi - /* Ah, it is within the NMI stack, treat it as nested */ + + /* Ah, it is within the NMI stack. */ + + testb $(X86_EFLAGS_DF >> 8), (3*8 + 1)(%rsp) + jz first_nmi /* RSP was user controlled. */ + + /* This is a nested NMI. */ nested_nmi: /* - * Do nothing if we interrupted the fixup in repeat_nmi. - * It's about to repeat the NMI handler, so we are fine - * with ignoring this one. + * Modify the "iret" frame to point to repeat_nmi, forcing another + * iteration of NMI handling. */ - movq $repeat_nmi, %rdx - cmpq 8(%rsp), %rdx - ja 1f - movq $end_repeat_nmi, %rdx - cmpq 8(%rsp), %rdx - ja nested_nmi_out - -1: - /* Set up the interrupted NMIs stack to jump to repeat_nmi */ - leaq -1*8(%rsp), %rdx - movq %rdx, %rsp + subq $8, %rsp leaq -10*8(%rsp), %rdx pushq $__KERNEL_DS pushq %rdx @@ -1318,61 +1434,42 @@ nested_nmi: nested_nmi_out: popq %rdx - /* No need to check faults here */ + /* We are returning to kernel mode, so this cannot result in a fault. */ INTERRUPT_RETURN first_nmi: - /* - * Because nested NMIs will use the pushed location that we - * stored in rdx, we must keep that space available. - * Here's what our stack frame will look like: - * +-------------------------+ - * | original SS | - * | original Return RSP | - * | original RFLAGS | - * | original CS | - * | original RIP | - * +-------------------------+ - * | temp storage for rdx | - * +-------------------------+ - * | NMI executing variable | - * +-------------------------+ - * | copied SS | - * | copied Return RSP | - * | copied RFLAGS | - * | copied CS | - * | copied RIP | - * +-------------------------+ - * | Saved SS | - * | Saved Return RSP | - * | Saved RFLAGS | - * | Saved CS | - * | Saved RIP | - * +-------------------------+ - * | pt_regs | - * +-------------------------+ - * - * The saved stack frame is used to fix up the copied stack frame - * that a nested NMI may change to make the interrupted NMI iret jump - * to the repeat_nmi. The original stack frame and the temp storage - * is also used by nested NMIs and can not be trusted on exit. - */ - /* Do not pop rdx, nested NMIs will corrupt that part of the stack */ + /* Restore rdx. */ movq (%rsp), %rdx - /* Set the NMI executing variable on the stack. */ - pushq $1 + /* Make room for "NMI executing". */ + pushq $0 - /* Leave room for the "copied" frame */ + /* Leave room for the "iret" frame */ subq $(5*8), %rsp - /* Copy the stack frame to the Saved frame */ + /* Copy the "original" frame to the "outermost" frame */ .rept 5 pushq 11*8(%rsp) .endr /* Everything up to here is safe from nested NMIs */ +#ifdef CONFIG_DEBUG_ENTRY + /* + * For ease of testing, unmask NMIs right away. Disabled by + * default because IRET is very expensive. + */ + pushq $0 /* SS */ + pushq %rsp /* RSP (minus 8 because of the previous push) */ + addq $8, (%rsp) /* Fix up RSP */ + pushfq /* RFLAGS */ + pushq $__KERNEL_CS /* CS */ + pushq $1f /* RIP */ + INTERRUPT_RETURN /* continues at repeat_nmi below */ +1: +#endif + +repeat_nmi: /* * If there was a nested NMI, the first NMI's iret will return * here. But NMIs are still enabled and we can take another @@ -1381,16 +1478,20 @@ first_nmi: * it will just return, as we are about to repeat an NMI anyway. * This makes it safe to copy to the stack frame that a nested * NMI will update. + * + * RSP is pointing to "outermost RIP". gsbase is unknown, but, if + * we're repeating an NMI, gsbase has the same value that it had on + * the first iteration. paranoid_entry will load the kernel + * gsbase if needed before we call do_nmi. "NMI executing" + * is zero. */ -repeat_nmi: + movq $1, 10*8(%rsp) /* Set "NMI executing". */ + /* - * Update the stack variable to say we are still in NMI (the update - * is benign for the non-repeat case, where 1 was pushed just above - * to this very stack slot). + * Copy the "outermost" frame to the "iret" frame. NMIs that nest + * here must not modify the "iret" frame while we're writing to + * it or it will end up containing garbage. */ - movq $1, 10*8(%rsp) - - /* Make another copy, this one may be modified by nested NMIs */ addq $(10*8), %rsp .rept 5 pushq -6*8(%rsp) @@ -1399,9 +1500,9 @@ repeat_nmi: end_repeat_nmi: /* - * Everything below this point can be preempted by a nested - * NMI if the first NMI took an exception and reset our iret stack - * so that we repeat another NMI. + * Everything below this point can be preempted by a nested NMI. + * If this happens, then the inner NMI will change the "iret" + * frame to point back to repeat_nmi. */ pushq $-1 /* ORIG_RAX: no syscall to restart */ ALLOC_PT_GPREGS_ON_STACK @@ -1415,28 +1516,11 @@ end_repeat_nmi: */ call paranoid_entry - /* - * Save off the CR2 register. If we take a page fault in the NMI then - * it could corrupt the CR2 value. If the NMI preempts a page fault - * handler before it was able to read the CR2 register, and then the - * NMI itself takes a page fault, the page fault that was preempted - * will read the information from the NMI page fault and not the - * origin fault. Save it off and restore it if it changes. - * Use the r12 callee-saved register. - */ - movq %cr2, %r12 - /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ movq %rsp, %rdi movq $-1, %rsi call do_nmi - /* Did the NMI take a page fault? Restore cr2 if it did */ - movq %cr2, %rcx - cmpq %rcx, %r12 - je 1f - movq %r12, %cr2 -1: testl %ebx, %ebx /* swapgs needed? */ jnz nmi_restore nmi_swapgs: @@ -1444,11 +1528,26 @@ nmi_swapgs: nmi_restore: RESTORE_EXTRA_REGS RESTORE_C_REGS - /* Pop the extra iret frame at once */ + + /* Point RSP at the "iret" frame. */ REMOVE_PT_GPREGS_FROM_STACK 6*8 - /* Clear the NMI executing stack variable */ - movq $0, 5*8(%rsp) + /* + * Clear "NMI executing". Set DF first so that we can easily + * distinguish the remaining code between here and IRET from + * the SYSCALL entry and exit paths. On a native kernel, we + * could just inspect RIP, but, on paravirt kernels, + * INTERRUPT_RETURN can translate into a jump into a + * hypercall page. + */ + std + movq $0, 5*8(%rsp) /* clear "NMI executing" */ + + /* + * INTERRUPT_RETURN reads the "iret" frame and exits the NMI + * stack in a single instruction. We are returning to kernel + * mode, so this cannot result in a fault. + */ INTERRUPT_RETURN END(nmi) diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index 4dd1f2d770af..aeac434c9feb 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild @@ -9,3 +9,4 @@ generic-y += cputime.h generic-y += dma-contiguous.h generic-y += early_ioremap.h generic-y += mcs_spinlock.h +generic-y += mm-arch-hooks.h diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h index 0637826292de..c49c5173158e 100644 --- a/arch/x86/include/asm/fpu/types.h +++ b/arch/x86/include/asm/fpu/types.h @@ -189,6 +189,7 @@ union fpregs_state { struct fxregs_state fxsave; struct swregs_state soft; struct xregs_state xsave; + u8 __padding[PAGE_SIZE]; }; /* @@ -198,40 +199,6 @@ union fpregs_state { */ struct fpu { /* - * @state: - * - * In-memory copy of all FPU registers that we save/restore - * over context switches. If the task is using the FPU then - * the registers in the FPU are more recent than this state - * copy. If the task context-switches away then they get - * saved here and represent the FPU state. - * - * After context switches there may be a (short) time period - * during which the in-FPU hardware registers are unchanged - * and still perfectly match this state, if the tasks - * scheduled afterwards are not using the FPU. - * - * This is the 'lazy restore' window of optimization, which - * we track though 'fpu_fpregs_owner_ctx' and 'fpu->last_cpu'. - * - * We detect whether a subsequent task uses the FPU via setting - * CR0::TS to 1, which causes any FPU use to raise a #NM fault. - * - * During this window, if the task gets scheduled again, we - * might be able to skip having to do a restore from this - * memory buffer to the hardware registers - at the cost of - * incurring the overhead of #NM fault traps. - * - * Note that on modern CPUs that support the XSAVEOPT (or other - * optimized XSAVE instructions), we don't use #NM traps anymore, - * as the hardware can track whether FPU registers need saving - * or not. On such CPUs we activate the non-lazy ('eagerfpu') - * logic, which unconditionally saves/restores all FPU state - * across context switches. (if FPU state exists.) - */ - union fpregs_state state; - - /* * @last_cpu: * * Records the last CPU on which this context was loaded into @@ -288,6 +255,43 @@ struct fpu { * deal with bursty apps that only use the FPU for a short time: */ unsigned char counter; + /* + * @state: + * + * In-memory copy of all FPU registers that we save/restore + * over context switches. If the task is using the FPU then + * the registers in the FPU are more recent than this state + * copy. If the task context-switches away then they get + * saved here and represent the FPU state. + * + * After context switches there may be a (short) time period + * during which the in-FPU hardware registers are unchanged + * and still perfectly match this state, if the tasks + * scheduled afterwards are not using the FPU. + * + * This is the 'lazy restore' window of optimization, which + * we track though 'fpu_fpregs_owner_ctx' and 'fpu->last_cpu'. + * + * We detect whether a subsequent task uses the FPU via setting + * CR0::TS to 1, which causes any FPU use to raise a #NM fault. + * + * During this window, if the task gets scheduled again, we + * might be able to skip having to do a restore from this + * memory buffer to the hardware registers - at the cost of + * incurring the overhead of #NM fault traps. + * + * Note that on modern CPUs that support the XSAVEOPT (or other + * optimized XSAVE instructions), we don't use #NM traps anymore, + * as the hardware can track whether FPU registers need saving + * or not. On such CPUs we activate the non-lazy ('eagerfpu') + * logic, which unconditionally saves/restores all FPU state + * across context switches. (if FPU state exists.) + */ + union fpregs_state state; + /* + * WARNING: 'state' is dynamically-sized. Do not put + * anything after it here. + */ }; #endif /* _ASM_X86_FPU_H */ diff --git a/arch/x86/include/asm/intel_pmc_ipc.h b/arch/x86/include/asm/intel_pmc_ipc.h index 200ec2e7821d..cd0310e186f4 100644 --- a/arch/x86/include/asm/intel_pmc_ipc.h +++ b/arch/x86/include/asm/intel_pmc_ipc.h @@ -25,36 +25,9 @@ #if IS_ENABLED(CONFIG_INTEL_PMC_IPC) -/* - * intel_pmc_ipc_simple_command - * @cmd: command - * @sub: sub type - */ int intel_pmc_ipc_simple_command(int cmd, int sub); - -/* - * intel_pmc_ipc_raw_cmd - * @cmd: command - * @sub: sub type - * @in: input data - * @inlen: input length in bytes - * @out: output data - * @outlen: output length in dwords - * @sptr: data writing to SPTR register - * @dptr: data writing to DPTR register - */ int intel_pmc_ipc_raw_cmd(u32 cmd, u32 sub, u8 *in, u32 inlen, u32 *out, u32 outlen, u32 dptr, u32 sptr); - -/* - * intel_pmc_ipc_command - * @cmd: command - * @sub: sub type - * @in: input data - * @inlen: input length in bytes - * @out: output data - * @outlen: output length in dwords - */ int intel_pmc_ipc_command(u32 cmd, u32 sub, u8 *in, u32 inlen, u32 *out, u32 outlen); diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 2a7f5d782c33..c12e845f59e6 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -252,6 +252,11 @@ struct kvm_pio_request { int size; }; +struct rsvd_bits_validate { + u64 rsvd_bits_mask[2][4]; + u64 bad_mt_xwr; +}; + /* * x86 supports 3 paging modes (4-level 64-bit, 3-level 64-bit, and 2-level * 32-bit). The kvm_mmu structure abstracts the details of the current mmu @@ -289,8 +294,15 @@ struct kvm_mmu { u64 *pae_root; u64 *lm_root; - u64 rsvd_bits_mask[2][4]; - u64 bad_mt_xwr; + + /* + * check zero bits on shadow page table entries, these + * bits include not only hardware reserved bits but also + * the bits spte never used. + */ + struct rsvd_bits_validate shadow_zero_check; + + struct rsvd_bits_validate guest_rsvd_check; /* * Bitmap: bit set = last pte in walk @@ -358,6 +370,11 @@ struct kvm_mtrr { struct list_head head; }; +/* Hyper-V per vcpu emulation context */ +struct kvm_vcpu_hv { + u64 hv_vapic; +}; + struct kvm_vcpu_arch { /* * rip and regs accesses must go through @@ -514,8 +531,7 @@ struct kvm_vcpu_arch { /* used for guest single stepping over the given code position */ unsigned long singlestep_rip; - /* fields used by HYPER-V emulation */ - u64 hv_vapic; + struct kvm_vcpu_hv hyperv; cpumask_var_t wbinvd_dirty_mask; @@ -586,6 +602,17 @@ struct kvm_apic_map { struct kvm_lapic *logical_map[16][16]; }; +/* Hyper-V emulation context */ +struct kvm_hv { + u64 hv_guest_os_id; + u64 hv_hypercall; + u64 hv_tsc_page; + + /* Hyper-v based guest crash (NT kernel bugcheck) parameters */ + u64 hv_crash_param[HV_X64_MSR_CRASH_PARAMS]; + u64 hv_crash_ctl; +}; + struct kvm_arch { unsigned int n_used_mmu_pages; unsigned int n_requested_mmu_pages; @@ -604,6 +631,8 @@ struct kvm_arch { bool iommu_noncoherent; #define __KVM_HAVE_ARCH_NONCOHERENT_DMA atomic_t noncoherent_dma_count; +#define __KVM_HAVE_ARCH_ASSIGNED_DEVICE + atomic_t assigned_device_count; struct kvm_pic *vpic; struct kvm_ioapic *vioapic; struct kvm_pit *vpit; @@ -643,16 +672,14 @@ struct kvm_arch { /* reads protected by irq_srcu, writes by irq_lock */ struct hlist_head mask_notifier_list; - /* fields used by HYPER-V emulation */ - u64 hv_guest_os_id; - u64 hv_hypercall; - u64 hv_tsc_page; + struct kvm_hv hyperv; #ifdef CONFIG_KVM_MMU_AUDIT int audit_point; #endif bool boot_vcpu_runs_old_kvmclock; + u32 bsp_vcpu_id; u64 disabled_quirks; }; @@ -1201,5 +1228,7 @@ int __x86_set_memory_region(struct kvm *kvm, const struct kvm_userspace_memory_region *mem); int x86_set_memory_region(struct kvm *kvm, const struct kvm_userspace_memory_region *mem); +bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu); +bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu); #endif /* _ASM_X86_KVM_HOST_H */ diff --git a/arch/x86/include/asm/mm-arch-hooks.h b/arch/x86/include/asm/mm-arch-hooks.h deleted file mode 100644 index 4e881a342236..000000000000 --- a/arch/x86/include/asm/mm-arch-hooks.h +++ /dev/null @@ -1,15 +0,0 @@ -/* - * Architecture specific mm hooks - * - * Copyright (C) 2015, IBM Corporation - * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#ifndef _ASM_X86_MM_ARCH_HOOKS_H -#define _ASM_X86_MM_ARCH_HOOKS_H - -#endif /* _ASM_X86_MM_ARCH_HOOKS_H */ diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 5e8daee7c5c9..804a3a6030ca 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -23,7 +23,7 @@ extern struct static_key rdpmc_always_available; static inline void load_mm_cr4(struct mm_struct *mm) { - if (static_key_true(&rdpmc_always_available) || + if (static_key_false(&rdpmc_always_available) || atomic_read(&mm->context.perf_rdpmc_allowed)) cr4_set_bits(X86_CR4_PCE); else diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 43e6519df0d5..944f1785ed0d 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -390,9 +390,6 @@ struct thread_struct { #endif unsigned long gs; - /* Floating point and extended processor state */ - struct fpu fpu; - /* Save middle states of ptrace breakpoints */ struct perf_event *ptrace_bps[HBP_NUM]; /* Debug status used for traps, single steps, etc... */ @@ -418,6 +415,13 @@ struct thread_struct { unsigned long iopl; /* Max allowed port in the bitmap, in bytes: */ unsigned io_bitmap_max; + + /* Floating point and extended processor state */ + struct fpu fpu; + /* + * WARNING: 'fpu' is dynamically-sized. It *MUST* be at + * the end. + */ }; /* diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index da772edd19ab..448b7ca61aee 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -47,6 +47,7 @@ #define CPU_BASED_MOV_DR_EXITING 0x00800000 #define CPU_BASED_UNCOND_IO_EXITING 0x01000000 #define CPU_BASED_USE_IO_BITMAPS 0x02000000 +#define CPU_BASED_MONITOR_TRAP_FLAG 0x08000000 #define CPU_BASED_USE_MSR_BITMAPS 0x10000000 #define CPU_BASED_MONITOR_EXITING 0x20000000 #define CPU_BASED_PAUSE_EXITING 0x40000000 @@ -367,29 +368,29 @@ enum vmcs_field { #define TYPE_PHYSICAL_APIC_EVENT (10 << 12) #define TYPE_PHYSICAL_APIC_INST (15 << 12) -/* segment AR */ -#define SEGMENT_AR_L_MASK (1 << 13) - -#define AR_TYPE_ACCESSES_MASK 1 -#define AR_TYPE_READABLE_MASK (1 << 1) -#define AR_TYPE_WRITEABLE_MASK (1 << 2) -#define AR_TYPE_CODE_MASK (1 << 3) -#define AR_TYPE_MASK 0x0f -#define AR_TYPE_BUSY_64_TSS 11 -#define AR_TYPE_BUSY_32_TSS 11 -#define AR_TYPE_BUSY_16_TSS 3 -#define AR_TYPE_LDT 2 - -#define AR_UNUSABLE_MASK (1 << 16) -#define AR_S_MASK (1 << 4) -#define AR_P_MASK (1 << 7) -#define AR_L_MASK (1 << 13) -#define AR_DB_MASK (1 << 14) -#define AR_G_MASK (1 << 15) -#define AR_DPL_SHIFT 5 -#define AR_DPL(ar) (((ar) >> AR_DPL_SHIFT) & 3) - -#define AR_RESERVD_MASK 0xfffe0f00 +/* segment AR in VMCS -- these are different from what LAR reports */ +#define VMX_SEGMENT_AR_L_MASK (1 << 13) + +#define VMX_AR_TYPE_ACCESSES_MASK 1 +#define VMX_AR_TYPE_READABLE_MASK (1 << 1) +#define VMX_AR_TYPE_WRITEABLE_MASK (1 << 2) +#define VMX_AR_TYPE_CODE_MASK (1 << 3) +#define VMX_AR_TYPE_MASK 0x0f +#define VMX_AR_TYPE_BUSY_64_TSS 11 +#define VMX_AR_TYPE_BUSY_32_TSS 11 +#define VMX_AR_TYPE_BUSY_16_TSS 3 +#define VMX_AR_TYPE_LDT 2 + +#define VMX_AR_UNUSABLE_MASK (1 << 16) +#define VMX_AR_S_MASK (1 << 4) +#define VMX_AR_P_MASK (1 << 7) +#define VMX_AR_L_MASK (1 << 13) +#define VMX_AR_DB_MASK (1 << 14) +#define VMX_AR_G_MASK (1 << 15) +#define VMX_AR_DPL_SHIFT 5 +#define VMX_AR_DPL(ar) (((ar) >> VMX_AR_DPL_SHIFT) & 3) + +#define VMX_AR_RESERVD_MASK 0xfffe0f00 #define TSS_PRIVATE_MEMSLOT (KVM_USER_MEM_SLOTS + 0) #define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT (KVM_USER_MEM_SLOTS + 1) diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h index 8fba544e9cc4..f36d56bd7632 100644 --- a/arch/x86/include/uapi/asm/hyperv.h +++ b/arch/x86/include/uapi/asm/hyperv.h @@ -108,6 +108,8 @@ #define HV_X64_HYPERCALL_PARAMS_XMM_AVAILABLE (1 << 4) /* Support for a virtual guest idle state is available */ #define HV_X64_GUEST_IDLE_STATE_AVAILABLE (1 << 5) +/* Guest crash data handler available */ +#define HV_X64_GUEST_CRASH_MSR_AVAILABLE (1 << 10) /* * Implementation recommendations. Indicates which behaviors the hypervisor diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index a4ae82eb82aa..cd54147cb365 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -354,7 +354,7 @@ struct kvm_xcrs { struct kvm_sync_regs { }; -#define KVM_QUIRK_LINT0_REENABLED (1 << 0) -#define KVM_QUIRK_CD_NW_CLEARED (1 << 1) +#define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0) +#define KVM_X86_QUIRK_CD_NW_CLEARED (1 << 1) #endif /* _ASM_X86_KVM_H */ diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h index 1fe92181ee9e..37fee272618f 100644 --- a/arch/x86/include/uapi/asm/vmx.h +++ b/arch/x86/include/uapi/asm/vmx.h @@ -58,6 +58,7 @@ #define EXIT_REASON_INVALID_STATE 33 #define EXIT_REASON_MSR_LOAD_FAIL 34 #define EXIT_REASON_MWAIT_INSTRUCTION 36 +#define EXIT_REASON_MONITOR_TRAP_FLAG 37 #define EXIT_REASON_MONITOR_INSTRUCTION 39 #define EXIT_REASON_PAUSE_INSTRUCTION 40 #define EXIT_REASON_MCE_DURING_VMENTRY 41 @@ -106,6 +107,7 @@ { EXIT_REASON_MSR_READ, "MSR_READ" }, \ { EXIT_REASON_MSR_WRITE, "MSR_WRITE" }, \ { EXIT_REASON_MWAIT_INSTRUCTION, "MWAIT_INSTRUCTION" }, \ + { EXIT_REASON_MONITOR_TRAP_FLAG, "MONITOR_TRAP_FLAG" }, \ { EXIT_REASON_MONITOR_INSTRUCTION, "MONITOR_INSTRUCTION" }, \ { EXIT_REASON_PAUSE_INSTRUCTION, "PAUSE_INSTRUCTION" }, \ { EXIT_REASON_MCE_DURING_VMENTRY, "MCE_DURING_VMENTRY" }, \ diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 32826791e675..0b39173dd971 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -4,6 +4,8 @@ #include <asm/fpu/internal.h> #include <asm/tlbflush.h> +#include <linux/sched.h> + /* * Initialize the TS bit in CR0 according to the style of context-switches * we are using: @@ -136,6 +138,43 @@ static void __init fpu__init_system_generic(void) unsigned int xstate_size; EXPORT_SYMBOL_GPL(xstate_size); +/* Enforce that 'MEMBER' is the last field of 'TYPE': */ +#define CHECK_MEMBER_AT_END_OF(TYPE, MEMBER) \ + BUILD_BUG_ON(sizeof(TYPE) != offsetofend(TYPE, MEMBER)) + +/* + * We append the 'struct fpu' to the task_struct: + */ +static void __init fpu__init_task_struct_size(void) +{ + int task_size = sizeof(struct task_struct); + + /* + * Subtract off the static size of the register state. + * It potentially has a bunch of padding. + */ + task_size -= sizeof(((struct task_struct *)0)->thread.fpu.state); + + /* + * Add back the dynamically-calculated register state + * size. + */ + task_size += xstate_size; + + /* + * We dynamically size 'struct fpu', so we require that + * it be at the end of 'thread_struct' and that + * 'thread_struct' be at the end of 'task_struct'. If + * you hit a compile error here, check the structure to + * see if something got added to the end. + */ + CHECK_MEMBER_AT_END_OF(struct fpu, state); + CHECK_MEMBER_AT_END_OF(struct thread_struct, fpu); + CHECK_MEMBER_AT_END_OF(struct task_struct, thread); + + arch_task_struct_size = task_size; +} + /* * Set up the xstate_size based on the legacy FPU context size. * @@ -287,6 +326,7 @@ void __init fpu__init_system(struct cpuinfo_x86 *c) fpu__init_system_generic(); fpu__init_system_xstate_size_legacy(); fpu__init_system_xstate(); + fpu__init_task_struct_size(); fpu__init_system_ctx_switch(); } diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index c3e985d1751c..d05bd2e2ee91 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -408,15 +408,15 @@ static void default_do_nmi(struct pt_regs *regs) NOKPROBE_SYMBOL(default_do_nmi); /* - * NMIs can hit breakpoints which will cause it to lose its - * NMI context with the CPU when the breakpoint does an iret. - */ -#ifdef CONFIG_X86_32 -/* - * For i386, NMIs use the same stack as the kernel, and we can - * add a workaround to the iret problem in C (preventing nested - * NMIs if an NMI takes a trap). Simply have 3 states the NMI - * can be in: + * NMIs can page fault or hit breakpoints which will cause it to lose + * its NMI context with the CPU when the breakpoint or page fault does an IRET. + * + * As a result, NMIs can nest if NMIs get unmasked due an IRET during + * NMI processing. On x86_64, the asm glue protects us from nested NMIs + * if the outer NMI came from kernel mode, but we can still nest if the + * outer NMI came from user mode. + * + * To handle these nested NMIs, we have three states: * * 1) not running * 2) executing @@ -430,15 +430,14 @@ NOKPROBE_SYMBOL(default_do_nmi); * (Note, the latch is binary, thus multiple NMIs triggering, * when one is running, are ignored. Only one NMI is restarted.) * - * If an NMI hits a breakpoint that executes an iret, another - * NMI can preempt it. We do not want to allow this new NMI - * to run, but we want to execute it when the first one finishes. - * We set the state to "latched", and the exit of the first NMI will - * perform a dec_return, if the result is zero (NOT_RUNNING), then - * it will simply exit the NMI handler. If not, the dec_return - * would have set the state to NMI_EXECUTING (what we want it to - * be when we are running). In this case, we simply jump back - * to rerun the NMI handler again, and restart the 'latched' NMI. + * If an NMI executes an iret, another NMI can preempt it. We do not + * want to allow this new NMI to run, but we want to execute it when the + * first one finishes. We set the state to "latched", and the exit of + * the first NMI will perform a dec_return, if the result is zero + * (NOT_RUNNING), then it will simply exit the NMI handler. If not, the + * dec_return would have set the state to NMI_EXECUTING (what we want it + * to be when we are running). In this case, we simply jump back to + * rerun the NMI handler again, and restart the 'latched' NMI. * * No trap (breakpoint or page fault) should be hit before nmi_restart, * thus there is no race between the first check of state for NOT_RUNNING @@ -461,49 +460,36 @@ enum nmi_states { static DEFINE_PER_CPU(enum nmi_states, nmi_state); static DEFINE_PER_CPU(unsigned long, nmi_cr2); -#define nmi_nesting_preprocess(regs) \ - do { \ - if (this_cpu_read(nmi_state) != NMI_NOT_RUNNING) { \ - this_cpu_write(nmi_state, NMI_LATCHED); \ - return; \ - } \ - this_cpu_write(nmi_state, NMI_EXECUTING); \ - this_cpu_write(nmi_cr2, read_cr2()); \ - } while (0); \ - nmi_restart: - -#define nmi_nesting_postprocess() \ - do { \ - if (unlikely(this_cpu_read(nmi_cr2) != read_cr2())) \ - write_cr2(this_cpu_read(nmi_cr2)); \ - if (this_cpu_dec_return(nmi_state)) \ - goto nmi_restart; \ - } while (0) -#else /* x86_64 */ +#ifdef CONFIG_X86_64 /* - * In x86_64 things are a bit more difficult. This has the same problem - * where an NMI hitting a breakpoint that calls iret will remove the - * NMI context, allowing a nested NMI to enter. What makes this more - * difficult is that both NMIs and breakpoints have their own stack. - * When a new NMI or breakpoint is executed, the stack is set to a fixed - * point. If an NMI is nested, it will have its stack set at that same - * fixed address that the first NMI had, and will start corrupting the - * stack. This is handled in entry_64.S, but the same problem exists with - * the breakpoint stack. + * In x86_64, we need to handle breakpoint -> NMI -> breakpoint. Without + * some care, the inner breakpoint will clobber the outer breakpoint's + * stack. * - * If a breakpoint is being processed, and the debug stack is being used, - * if an NMI comes in and also hits a breakpoint, the stack pointer - * will be set to the same fixed address as the breakpoint that was - * interrupted, causing that stack to be corrupted. To handle this case, - * check if the stack that was interrupted is the debug stack, and if - * so, change the IDT so that new breakpoints will use the current stack - * and not switch to the fixed address. On return of the NMI, switch back - * to the original IDT. + * If a breakpoint is being processed, and the debug stack is being + * used, if an NMI comes in and also hits a breakpoint, the stack + * pointer will be set to the same fixed address as the breakpoint that + * was interrupted, causing that stack to be corrupted. To handle this + * case, check if the stack that was interrupted is the debug stack, and + * if so, change the IDT so that new breakpoints will use the current + * stack and not switch to the fixed address. On return of the NMI, + * switch back to the original IDT. */ static DEFINE_PER_CPU(int, update_debug_stack); +#endif -static inline void nmi_nesting_preprocess(struct pt_regs *regs) +dotraplinkage notrace void +do_nmi(struct pt_regs *regs, long error_code) { + if (this_cpu_read(nmi_state) != NMI_NOT_RUNNING) { + this_cpu_write(nmi_state, NMI_LATCHED); + return; + } + this_cpu_write(nmi_state, NMI_EXECUTING); + this_cpu_write(nmi_cr2, read_cr2()); +nmi_restart: + +#ifdef CONFIG_X86_64 /* * If we interrupted a breakpoint, it is possible that * the nmi handler will have breakpoints too. We need to @@ -514,22 +500,8 @@ static inline void nmi_nesting_preprocess(struct pt_regs *regs) debug_stack_set_zero(); this_cpu_write(update_debug_stack, 1); } -} - -static inline void nmi_nesting_postprocess(void) -{ - if (unlikely(this_cpu_read(update_debug_stack))) { - debug_stack_reset(); - this_cpu_write(update_debug_stack, 0); - } -} #endif -dotraplinkage notrace void -do_nmi(struct pt_regs *regs, long error_code) -{ - nmi_nesting_preprocess(regs); - nmi_enter(); inc_irq_stat(__nmi_count); @@ -539,8 +511,17 @@ do_nmi(struct pt_regs *regs, long error_code) nmi_exit(); - /* On i386, may loop back to preprocess */ - nmi_nesting_postprocess(); +#ifdef CONFIG_X86_64 + if (unlikely(this_cpu_read(update_debug_stack))) { + debug_stack_reset(); + this_cpu_write(update_debug_stack, 0); + } +#endif + + if (unlikely(this_cpu_read(nmi_cr2) != read_cr2())) + write_cr2(this_cpu_read(nmi_cr2)); + if (this_cpu_dec_return(nmi_state)) + goto nmi_restart; } NOKPROBE_SYMBOL(do_nmi); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 9cad694ed7c4..397688beed4b 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -81,7 +81,7 @@ EXPORT_SYMBOL_GPL(idle_notifier_unregister); */ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) { - *dst = *src; + memcpy(dst, src, arch_task_struct_size); return fpu__copy(&dst->thread.fpu, &src->thread.fpu); } diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index d3010aa79daf..b1f3ed9c7a9e 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -992,8 +992,17 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle) common_cpu_up(cpu, tidle); + /* + * We have to walk the irq descriptors to setup the vector + * space for the cpu which comes online. Prevent irq + * alloc/free across the bringup. + */ + irq_lock_sparse(); + err = do_boot_cpu(apicid, cpu, tidle); + if (err) { + irq_unlock_sparse(); pr_err("do_boot_cpu failed(%d) to wakeup CPU#%u\n", err, cpu); return -EIO; } @@ -1011,6 +1020,8 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle) touch_nmi_watchdog(); } + irq_unlock_sparse(); + return 0; } diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index 67d215cb8953..a1ff508bb423 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -12,7 +12,9 @@ kvm-y += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \ kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \ - i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o + i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \ + hyperv.o + kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT) += assigned-dev.o iommu.o kvm-intel-y += vmx.o pmu_intel.o kvm-amd-y += svm.o pmu_amd.o diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 64dd46793099..2fbea2544f24 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -98,6 +98,8 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu) best->ebx = xstate_required_size(vcpu->arch.xcr0, true); vcpu->arch.eager_fpu = use_eager_fpu() || guest_cpuid_has_mpx(vcpu); + if (vcpu->arch.eager_fpu) + kvm_x86_ops->fpu_activate(vcpu); /* * The existing code assumes virtual address is 48-bit in the canonical diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c new file mode 100644 index 000000000000..a8160d2ae362 --- /dev/null +++ b/arch/x86/kvm/hyperv.c @@ -0,0 +1,377 @@ +/* + * KVM Microsoft Hyper-V emulation + * + * derived from arch/x86/kvm/x86.c + * + * Copyright (C) 2006 Qumranet, Inc. + * Copyright (C) 2008 Qumranet, Inc. + * Copyright IBM Corporation, 2008 + * Copyright 2010 Red Hat, Inc. and/or its affiliates. + * Copyright (C) 2015 Andrey Smetanin <asmetanin@virtuozzo.com> + * + * Authors: + * Avi Kivity <avi@qumranet.com> + * Yaniv Kamay <yaniv@qumranet.com> + * Amit Shah <amit.shah@qumranet.com> + * Ben-Ami Yassour <benami@il.ibm.com> + * Andrey Smetanin <asmetanin@virtuozzo.com> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#include "x86.h" +#include "lapic.h" +#include "hyperv.h" + +#include <linux/kvm_host.h> +#include <trace/events/kvm.h> + +#include "trace.h" + +static bool kvm_hv_msr_partition_wide(u32 msr) +{ + bool r = false; + + switch (msr) { + case HV_X64_MSR_GUEST_OS_ID: + case HV_X64_MSR_HYPERCALL: + case HV_X64_MSR_REFERENCE_TSC: + case HV_X64_MSR_TIME_REF_COUNT: + case HV_X64_MSR_CRASH_CTL: + case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: + r = true; + break; + } + + return r; +} + +static int kvm_hv_msr_get_crash_data(struct kvm_vcpu *vcpu, + u32 index, u64 *pdata) +{ + struct kvm_hv *hv = &vcpu->kvm->arch.hyperv; + + if (WARN_ON_ONCE(index >= ARRAY_SIZE(hv->hv_crash_param))) + return -EINVAL; + + *pdata = hv->hv_crash_param[index]; + return 0; +} + +static int kvm_hv_msr_get_crash_ctl(struct kvm_vcpu *vcpu, u64 *pdata) +{ + struct kvm_hv *hv = &vcpu->kvm->arch.hyperv; + + *pdata = hv->hv_crash_ctl; + return 0; +} + +static int kvm_hv_msr_set_crash_ctl(struct kvm_vcpu *vcpu, u64 data, bool host) +{ + struct kvm_hv *hv = &vcpu->kvm->arch.hyperv; + + if (host) + hv->hv_crash_ctl = data & HV_X64_MSR_CRASH_CTL_NOTIFY; + + if (!host && (data & HV_X64_MSR_CRASH_CTL_NOTIFY)) { + + vcpu_debug(vcpu, "hv crash (0x%llx 0x%llx 0x%llx 0x%llx 0x%llx)\n", + hv->hv_crash_param[0], + hv->hv_crash_param[1], + hv->hv_crash_param[2], + hv->hv_crash_param[3], + hv->hv_crash_param[4]); + + /* Send notification about crash to user space */ + kvm_make_request(KVM_REQ_HV_CRASH, vcpu); + } + + return 0; +} + +static int kvm_hv_msr_set_crash_data(struct kvm_vcpu *vcpu, + u32 index, u64 data) +{ + struct kvm_hv *hv = &vcpu->kvm->arch.hyperv; + + if (WARN_ON_ONCE(index >= ARRAY_SIZE(hv->hv_crash_param))) + return -EINVAL; + + hv->hv_crash_param[index] = data; + return 0; +} + +static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data, + bool host) +{ + struct kvm *kvm = vcpu->kvm; + struct kvm_hv *hv = &kvm->arch.hyperv; + + switch (msr) { + case HV_X64_MSR_GUEST_OS_ID: + hv->hv_guest_os_id = data; + /* setting guest os id to zero disables hypercall page */ + if (!hv->hv_guest_os_id) + hv->hv_hypercall &= ~HV_X64_MSR_HYPERCALL_ENABLE; + break; + case HV_X64_MSR_HYPERCALL: { + u64 gfn; + unsigned long addr; + u8 instructions[4]; + + /* if guest os id is not set hypercall should remain disabled */ + if (!hv->hv_guest_os_id) + break; + if (!(data & HV_X64_MSR_HYPERCALL_ENABLE)) { + hv->hv_hypercall = data; + break; + } + gfn = data >> HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT; + addr = gfn_to_hva(kvm, gfn); + if (kvm_is_error_hva(addr)) + return 1; + kvm_x86_ops->patch_hypercall(vcpu, instructions); + ((unsigned char *)instructions)[3] = 0xc3; /* ret */ + if (__copy_to_user((void __user *)addr, instructions, 4)) + return 1; + hv->hv_hypercall = data; + mark_page_dirty(kvm, gfn); + break; + } + case HV_X64_MSR_REFERENCE_TSC: { + u64 gfn; + HV_REFERENCE_TSC_PAGE tsc_ref; + + memset(&tsc_ref, 0, sizeof(tsc_ref)); + hv->hv_tsc_page = data; + if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE)) + break; + gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT; + if (kvm_write_guest( + kvm, + gfn << HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT, + &tsc_ref, sizeof(tsc_ref))) + return 1; + mark_page_dirty(kvm, gfn); + break; + } + case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: + return kvm_hv_msr_set_crash_data(vcpu, + msr - HV_X64_MSR_CRASH_P0, + data); + case HV_X64_MSR_CRASH_CTL: + return kvm_hv_msr_set_crash_ctl(vcpu, data, host); + default: + vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n", + msr, data); + return 1; + } + return 0; +} + +static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data) +{ + struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv; + + switch (msr) { + case HV_X64_MSR_APIC_ASSIST_PAGE: { + u64 gfn; + unsigned long addr; + + if (!(data & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE)) { + hv->hv_vapic = data; + if (kvm_lapic_enable_pv_eoi(vcpu, 0)) + return 1; + break; + } + gfn = data >> HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT; + addr = kvm_vcpu_gfn_to_hva(vcpu, gfn); + if (kvm_is_error_hva(addr)) + return 1; + if (__clear_user((void __user *)addr, PAGE_SIZE)) + return 1; + hv->hv_vapic = data; + kvm_vcpu_mark_page_dirty(vcpu, gfn); + if (kvm_lapic_enable_pv_eoi(vcpu, + gfn_to_gpa(gfn) | KVM_MSR_ENABLED)) + return 1; + break; + } + case HV_X64_MSR_EOI: + return kvm_hv_vapic_msr_write(vcpu, APIC_EOI, data); + case HV_X64_MSR_ICR: + return kvm_hv_vapic_msr_write(vcpu, APIC_ICR, data); + case HV_X64_MSR_TPR: + return kvm_hv_vapic_msr_write(vcpu, APIC_TASKPRI, data); + default: + vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n", + msr, data); + return 1; + } + + return 0; +} + +static int kvm_hv_get_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) +{ + u64 data = 0; + struct kvm *kvm = vcpu->kvm; + struct kvm_hv *hv = &kvm->arch.hyperv; + + switch (msr) { + case HV_X64_MSR_GUEST_OS_ID: + data = hv->hv_guest_os_id; + break; + case HV_X64_MSR_HYPERCALL: + data = hv->hv_hypercall; + break; + case HV_X64_MSR_TIME_REF_COUNT: { + data = + div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100); + break; + } + case HV_X64_MSR_REFERENCE_TSC: + data = hv->hv_tsc_page; + break; + case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: + return kvm_hv_msr_get_crash_data(vcpu, + msr - HV_X64_MSR_CRASH_P0, + pdata); + case HV_X64_MSR_CRASH_CTL: + return kvm_hv_msr_get_crash_ctl(vcpu, pdata); + default: + vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); + return 1; + } + + *pdata = data; + return 0; +} + +static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) +{ + u64 data = 0; + struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv; + + switch (msr) { + case HV_X64_MSR_VP_INDEX: { + int r; + struct kvm_vcpu *v; + + kvm_for_each_vcpu(r, v, vcpu->kvm) { + if (v == vcpu) { + data = r; + break; + } + } + break; + } + case HV_X64_MSR_EOI: + return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata); + case HV_X64_MSR_ICR: + return kvm_hv_vapic_msr_read(vcpu, APIC_ICR, pdata); + case HV_X64_MSR_TPR: + return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata); + case HV_X64_MSR_APIC_ASSIST_PAGE: + data = hv->hv_vapic; + break; + default: + vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); + return 1; + } + *pdata = data; + return 0; +} + +int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) +{ + if (kvm_hv_msr_partition_wide(msr)) { + int r; + + mutex_lock(&vcpu->kvm->lock); + r = kvm_hv_set_msr_pw(vcpu, msr, data, host); + mutex_unlock(&vcpu->kvm->lock); + return r; + } else + return kvm_hv_set_msr(vcpu, msr, data); +} + +int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) +{ + if (kvm_hv_msr_partition_wide(msr)) { + int r; + + mutex_lock(&vcpu->kvm->lock); + r = kvm_hv_get_msr_pw(vcpu, msr, pdata); + mutex_unlock(&vcpu->kvm->lock); + return r; + } else + return kvm_hv_get_msr(vcpu, msr, pdata); +} + +bool kvm_hv_hypercall_enabled(struct kvm *kvm) +{ + return kvm->arch.hyperv.hv_hypercall & HV_X64_MSR_HYPERCALL_ENABLE; +} + +int kvm_hv_hypercall(struct kvm_vcpu *vcpu) +{ + u64 param, ingpa, outgpa, ret; + uint16_t code, rep_idx, rep_cnt, res = HV_STATUS_SUCCESS, rep_done = 0; + bool fast, longmode; + + /* + * hypercall generates UD from non zero cpl and real mode + * per HYPER-V spec + */ + if (kvm_x86_ops->get_cpl(vcpu) != 0 || !is_protmode(vcpu)) { + kvm_queue_exception(vcpu, UD_VECTOR); + return 0; + } + + longmode = is_64_bit_mode(vcpu); + + if (!longmode) { + param = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDX) << 32) | + (kvm_register_read(vcpu, VCPU_REGS_RAX) & 0xffffffff); + ingpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RBX) << 32) | + (kvm_register_read(vcpu, VCPU_REGS_RCX) & 0xffffffff); + outgpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDI) << 32) | + (kvm_register_read(vcpu, VCPU_REGS_RSI) & 0xffffffff); + } +#ifdef CONFIG_X86_64 + else { + param = kvm_register_read(vcpu, VCPU_REGS_RCX); + ingpa = kvm_register_read(vcpu, VCPU_REGS_RDX); + outgpa = kvm_register_read(vcpu, VCPU_REGS_R8); + } +#endif + + code = param & 0xffff; + fast = (param >> 16) & 0x1; + rep_cnt = (param >> 32) & 0xfff; + rep_idx = (param >> 48) & 0xfff; + + trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa); + + switch (code) { + case HV_X64_HV_NOTIFY_LONG_SPIN_WAIT: + kvm_vcpu_on_spin(vcpu); + break; + default: + res = HV_STATUS_INVALID_HYPERCALL_CODE; + break; + } + + ret = res | (((u64)rep_done & 0xfff) << 32); + if (longmode) { + kvm_register_write(vcpu, VCPU_REGS_RAX, ret); + } else { + kvm_register_write(vcpu, VCPU_REGS_RDX, ret >> 32); + kvm_register_write(vcpu, VCPU_REGS_RAX, ret & 0xffffffff); + } + + return 1; +} diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h new file mode 100644 index 000000000000..c7bce559f67b --- /dev/null +++ b/arch/x86/kvm/hyperv.h @@ -0,0 +1,32 @@ +/* + * KVM Microsoft Hyper-V emulation + * + * derived from arch/x86/kvm/x86.c + * + * Copyright (C) 2006 Qumranet, Inc. + * Copyright (C) 2008 Qumranet, Inc. + * Copyright IBM Corporation, 2008 + * Copyright 2010 Red Hat, Inc. and/or its affiliates. + * Copyright (C) 2015 Andrey Smetanin <asmetanin@virtuozzo.com> + * + * Authors: + * Avi Kivity <avi@qumranet.com> + * Yaniv Kamay <yaniv@qumranet.com> + * Amit Shah <amit.shah@qumranet.com> + * Ben-Ami Yassour <benami@il.ibm.com> + * Andrey Smetanin <asmetanin@virtuozzo.com> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#ifndef __ARCH_X86_KVM_HYPERV_H__ +#define __ARCH_X86_KVM_HYPERV_H__ + +int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host); +int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata); +bool kvm_hv_hypercall_enabled(struct kvm *kvm); +int kvm_hv_hypercall(struct kvm_vcpu *vcpu); + +#endif diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index fef922ff2635..7cc2360f1848 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c @@ -651,15 +651,10 @@ fail_unlock: return NULL; } -void kvm_destroy_pic(struct kvm *kvm) +void kvm_destroy_pic(struct kvm_pic *vpic) { - struct kvm_pic *vpic = kvm->arch.vpic; - - if (vpic) { - kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &vpic->dev_master); - kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &vpic->dev_slave); - kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &vpic->dev_eclr); - kvm->arch.vpic = NULL; - kfree(vpic); - } + kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_master); + kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_slave); + kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_eclr); + kfree(vpic); } diff --git a/arch/x86/kvm/iommu.c b/arch/x86/kvm/iommu.c index 7dbced309ddb..5c520ebf6343 100644 --- a/arch/x86/kvm/iommu.c +++ b/arch/x86/kvm/iommu.c @@ -200,6 +200,7 @@ int kvm_assign_device(struct kvm *kvm, struct pci_dev *pdev) goto out_unmap; } + kvm_arch_start_assignment(kvm); pci_set_dev_assigned(pdev); dev_info(&pdev->dev, "kvm assign device\n"); @@ -224,6 +225,7 @@ int kvm_deassign_device(struct kvm *kvm, struct pci_dev *pdev) iommu_detach_device(domain, &pdev->dev); pci_clear_dev_assigned(pdev); + kvm_arch_end_assignment(kvm); dev_info(&pdev->dev, "kvm deassign device\n"); diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index ad68c73008c5..3d782a2c336a 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h @@ -74,7 +74,7 @@ struct kvm_pic { }; struct kvm_pic *kvm_create_pic(struct kvm *kvm); -void kvm_destroy_pic(struct kvm *kvm); +void kvm_destroy_pic(struct kvm_pic *vpic); int kvm_pic_read_irq(struct kvm *kvm); void kvm_pic_update_irq(struct kvm_pic *s); @@ -85,11 +85,11 @@ static inline struct kvm_pic *pic_irqchip(struct kvm *kvm) static inline int irqchip_in_kernel(struct kvm *kvm) { - int ret; + struct kvm_pic *vpic = pic_irqchip(kvm); - ret = (pic_irqchip(kvm) != NULL); + /* Read vpic before kvm->irq_routing. */ smp_rmb(); - return ret; + return vpic != NULL; } void kvm_pic_reset(struct kvm_kpic_state *s); diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 954e98a8c2e3..9a3e342e3cda 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1595,7 +1595,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) for (i = 0; i < APIC_LVT_NUM; i++) apic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED); apic_update_lvtt(apic); - if (!(vcpu->kvm->arch.disabled_quirks & KVM_QUIRK_LINT0_REENABLED)) + if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_LINT0_REENABLED)) apic_set_reg(apic, APIC_LVT0, SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT)); apic_manage_nmi_watchdog(apic, kvm_apic_get_reg(apic, APIC_LVT0)); @@ -1900,8 +1900,9 @@ void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu) if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention)) return; - kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data, - sizeof(u32)); + if (kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data, + sizeof(u32))) + return; apic_set_tpr(vcpu->arch.apic, data & 0xff); } diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 71952748222a..764037991d26 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -91,7 +91,7 @@ int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data); static inline bool kvm_hv_vapic_assist_page_enabled(struct kvm_vcpu *vcpu) { - return vcpu->arch.hv_vapic & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE; + return vcpu->arch.hyperv.hv_vapic & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE; } int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index f807496b62c2..fb16a8ea3dee 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -357,12 +357,6 @@ static u64 __get_spte_lockless(u64 *sptep) { return ACCESS_ONCE(*sptep); } - -static bool __check_direct_spte_mmio_pf(u64 spte) -{ - /* It is valid if the spte is zapped. */ - return spte == 0ull; -} #else union split_spte { struct { @@ -478,23 +472,6 @@ retry: return spte.spte; } - -static bool __check_direct_spte_mmio_pf(u64 spte) -{ - union split_spte sspte = (union split_spte)spte; - u32 high_mmio_mask = shadow_mmio_mask >> 32; - - /* It is valid if the spte is zapped. */ - if (spte == 0ull) - return true; - - /* It is valid if the spte is being zapped. */ - if (sspte.spte_low == 0ull && - (sspte.spte_high & high_mmio_mask) == high_mmio_mask) - return true; - - return false; -} #endif static bool spte_is_locklessly_modifiable(u64 spte) @@ -2479,6 +2456,14 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn, return 0; } +static bool kvm_is_mmio_pfn(pfn_t pfn) +{ + if (pfn_valid(pfn)) + return !is_zero_pfn(pfn) && PageReserved(pfn_to_page(pfn)); + + return true; +} + static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access, int level, gfn_t gfn, pfn_t pfn, bool speculative, @@ -2506,7 +2491,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, spte |= PT_PAGE_SIZE_MASK; if (tdp_enabled) spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn, - kvm_is_reserved_pfn(pfn)); + kvm_is_mmio_pfn(pfn)); if (host_writable) spte |= SPTE_HOST_WRITEABLE; @@ -3283,54 +3268,89 @@ static gpa_t nonpaging_gva_to_gpa_nested(struct kvm_vcpu *vcpu, gva_t vaddr, return vcpu->arch.nested_mmu.translate_gpa(vcpu, vaddr, access, exception); } -static bool quickly_check_mmio_pf(struct kvm_vcpu *vcpu, u64 addr, bool direct) +static bool +__is_rsvd_bits_set(struct rsvd_bits_validate *rsvd_check, u64 pte, int level) { - if (direct) - return vcpu_match_mmio_gpa(vcpu, addr); + int bit7 = (pte >> 7) & 1, low6 = pte & 0x3f; - return vcpu_match_mmio_gva(vcpu, addr); + return (pte & rsvd_check->rsvd_bits_mask[bit7][level-1]) | + ((rsvd_check->bad_mt_xwr & (1ull << low6)) != 0); } +static bool is_rsvd_bits_set(struct kvm_mmu *mmu, u64 gpte, int level) +{ + return __is_rsvd_bits_set(&mmu->guest_rsvd_check, gpte, level); +} -/* - * On direct hosts, the last spte is only allows two states - * for mmio page fault: - * - It is the mmio spte - * - It is zapped or it is being zapped. - * - * This function completely checks the spte when the last spte - * is not the mmio spte. - */ -static bool check_direct_spte_mmio_pf(u64 spte) +static bool is_shadow_zero_bits_set(struct kvm_mmu *mmu, u64 spte, int level) { - return __check_direct_spte_mmio_pf(spte); + return __is_rsvd_bits_set(&mmu->shadow_zero_check, spte, level); } -static u64 walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr) +static bool quickly_check_mmio_pf(struct kvm_vcpu *vcpu, u64 addr, bool direct) +{ + if (direct) + return vcpu_match_mmio_gpa(vcpu, addr); + + return vcpu_match_mmio_gva(vcpu, addr); +} + +/* return true if reserved bit is detected on spte. */ +static bool +walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) { struct kvm_shadow_walk_iterator iterator; - u64 spte = 0ull; + u64 sptes[PT64_ROOT_LEVEL], spte = 0ull; + int root, leaf; + bool reserved = false; if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) - return spte; + goto exit; walk_shadow_page_lockless_begin(vcpu); - for_each_shadow_entry_lockless(vcpu, addr, iterator, spte) + + for (shadow_walk_init(&iterator, vcpu, addr), root = iterator.level; + shadow_walk_okay(&iterator); + __shadow_walk_next(&iterator, spte)) { + leaf = iterator.level; + spte = mmu_spte_get_lockless(iterator.sptep); + + sptes[leaf - 1] = spte; + if (!is_shadow_present_pte(spte)) break; + + reserved |= is_shadow_zero_bits_set(&vcpu->arch.mmu, spte, + leaf); + } + walk_shadow_page_lockless_end(vcpu); - return spte; + if (reserved) { + pr_err("%s: detect reserved bits on spte, addr 0x%llx, dump hierarchy:\n", + __func__, addr); + while (root >= leaf) { + pr_err("------ spte 0x%llx level %d.\n", + sptes[root - 1], root); + root--; + } + } +exit: + *sptep = spte; + return reserved; } int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct) { u64 spte; + bool reserved; if (quickly_check_mmio_pf(vcpu, addr, direct)) return RET_MMIO_PF_EMULATE; - spte = walk_shadow_page_get_mmio_spte(vcpu, addr); + reserved = walk_shadow_page_get_mmio_spte(vcpu, addr, &spte); + if (unlikely(reserved)) + return RET_MMIO_PF_BUG; if (is_mmio_spte(spte)) { gfn_t gfn = get_mmio_spte_gfn(spte); @@ -3348,13 +3368,6 @@ int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct) } /* - * It's ok if the gva is remapped by other cpus on shadow guest, - * it's a BUG if the gfn is not a mmio page. - */ - if (direct && !check_direct_spte_mmio_pf(spte)) - return RET_MMIO_PF_BUG; - - /* * If the page table is zapped by other cpus, let CPU fault again on * the address. */ @@ -3596,19 +3609,21 @@ static inline bool is_last_gpte(struct kvm_mmu *mmu, unsigned level, unsigned gp #include "paging_tmpl.h" #undef PTTYPE -static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, - struct kvm_mmu *context) +static void +__reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, + struct rsvd_bits_validate *rsvd_check, + int maxphyaddr, int level, bool nx, bool gbpages, + bool pse) { - int maxphyaddr = cpuid_maxphyaddr(vcpu); u64 exb_bit_rsvd = 0; u64 gbpages_bit_rsvd = 0; u64 nonleaf_bit8_rsvd = 0; - context->bad_mt_xwr = 0; + rsvd_check->bad_mt_xwr = 0; - if (!context->nx) + if (!nx) exb_bit_rsvd = rsvd_bits(63, 63); - if (!guest_cpuid_has_gbpages(vcpu)) + if (!gbpages) gbpages_bit_rsvd = rsvd_bits(7, 7); /* @@ -3618,80 +3633,95 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, if (guest_cpuid_is_amd(vcpu)) nonleaf_bit8_rsvd = rsvd_bits(8, 8); - switch (context->root_level) { + switch (level) { case PT32_ROOT_LEVEL: /* no rsvd bits for 2 level 4K page table entries */ - context->rsvd_bits_mask[0][1] = 0; - context->rsvd_bits_mask[0][0] = 0; - context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0]; + rsvd_check->rsvd_bits_mask[0][1] = 0; + rsvd_check->rsvd_bits_mask[0][0] = 0; + rsvd_check->rsvd_bits_mask[1][0] = + rsvd_check->rsvd_bits_mask[0][0]; - if (!is_pse(vcpu)) { - context->rsvd_bits_mask[1][1] = 0; + if (!pse) { + rsvd_check->rsvd_bits_mask[1][1] = 0; break; } if (is_cpuid_PSE36()) /* 36bits PSE 4MB page */ - context->rsvd_bits_mask[1][1] = rsvd_bits(17, 21); + rsvd_check->rsvd_bits_mask[1][1] = rsvd_bits(17, 21); else /* 32 bits PSE 4MB page */ - context->rsvd_bits_mask[1][1] = rsvd_bits(13, 21); + rsvd_check->rsvd_bits_mask[1][1] = rsvd_bits(13, 21); break; case PT32E_ROOT_LEVEL: - context->rsvd_bits_mask[0][2] = + rsvd_check->rsvd_bits_mask[0][2] = rsvd_bits(maxphyaddr, 63) | rsvd_bits(5, 8) | rsvd_bits(1, 2); /* PDPTE */ - context->rsvd_bits_mask[0][1] = exb_bit_rsvd | + rsvd_check->rsvd_bits_mask[0][1] = exb_bit_rsvd | rsvd_bits(maxphyaddr, 62); /* PDE */ - context->rsvd_bits_mask[0][0] = exb_bit_rsvd | + rsvd_check->rsvd_bits_mask[0][0] = exb_bit_rsvd | rsvd_bits(maxphyaddr, 62); /* PTE */ - context->rsvd_bits_mask[1][1] = exb_bit_rsvd | + rsvd_check->rsvd_bits_mask[1][1] = exb_bit_rsvd | rsvd_bits(maxphyaddr, 62) | rsvd_bits(13, 20); /* large page */ - context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0]; + rsvd_check->rsvd_bits_mask[1][0] = + rsvd_check->rsvd_bits_mask[0][0]; break; case PT64_ROOT_LEVEL: - context->rsvd_bits_mask[0][3] = exb_bit_rsvd | - nonleaf_bit8_rsvd | rsvd_bits(7, 7) | rsvd_bits(maxphyaddr, 51); - context->rsvd_bits_mask[0][2] = exb_bit_rsvd | - nonleaf_bit8_rsvd | gbpages_bit_rsvd | rsvd_bits(maxphyaddr, 51); - context->rsvd_bits_mask[0][1] = exb_bit_rsvd | + rsvd_check->rsvd_bits_mask[0][3] = exb_bit_rsvd | + nonleaf_bit8_rsvd | rsvd_bits(7, 7) | rsvd_bits(maxphyaddr, 51); - context->rsvd_bits_mask[0][0] = exb_bit_rsvd | + rsvd_check->rsvd_bits_mask[0][2] = exb_bit_rsvd | + nonleaf_bit8_rsvd | gbpages_bit_rsvd | rsvd_bits(maxphyaddr, 51); - context->rsvd_bits_mask[1][3] = context->rsvd_bits_mask[0][3]; - context->rsvd_bits_mask[1][2] = exb_bit_rsvd | + rsvd_check->rsvd_bits_mask[0][1] = exb_bit_rsvd | + rsvd_bits(maxphyaddr, 51); + rsvd_check->rsvd_bits_mask[0][0] = exb_bit_rsvd | + rsvd_bits(maxphyaddr, 51); + rsvd_check->rsvd_bits_mask[1][3] = + rsvd_check->rsvd_bits_mask[0][3]; + rsvd_check->rsvd_bits_mask[1][2] = exb_bit_rsvd | gbpages_bit_rsvd | rsvd_bits(maxphyaddr, 51) | rsvd_bits(13, 29); - context->rsvd_bits_mask[1][1] = exb_bit_rsvd | + rsvd_check->rsvd_bits_mask[1][1] = exb_bit_rsvd | rsvd_bits(maxphyaddr, 51) | rsvd_bits(13, 20); /* large page */ - context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0]; + rsvd_check->rsvd_bits_mask[1][0] = + rsvd_check->rsvd_bits_mask[0][0]; break; } } -static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu, - struct kvm_mmu *context, bool execonly) +static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, + struct kvm_mmu *context) +{ + __reset_rsvds_bits_mask(vcpu, &context->guest_rsvd_check, + cpuid_maxphyaddr(vcpu), context->root_level, + context->nx, guest_cpuid_has_gbpages(vcpu), + is_pse(vcpu)); +} + +static void +__reset_rsvds_bits_mask_ept(struct rsvd_bits_validate *rsvd_check, + int maxphyaddr, bool execonly) { - int maxphyaddr = cpuid_maxphyaddr(vcpu); int pte; - context->rsvd_bits_mask[0][3] = + rsvd_check->rsvd_bits_mask[0][3] = rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 7); - context->rsvd_bits_mask[0][2] = + rsvd_check->rsvd_bits_mask[0][2] = rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 6); - context->rsvd_bits_mask[0][1] = + rsvd_check->rsvd_bits_mask[0][1] = rsvd_bits(maxphyaddr, 51) | rsvd_bits(3, 6); - context->rsvd_bits_mask[0][0] = rsvd_bits(maxphyaddr, 51); + rsvd_check->rsvd_bits_mask[0][0] = rsvd_bits(maxphyaddr, 51); /* large page */ - context->rsvd_bits_mask[1][3] = context->rsvd_bits_mask[0][3]; - context->rsvd_bits_mask[1][2] = + rsvd_check->rsvd_bits_mask[1][3] = rsvd_check->rsvd_bits_mask[0][3]; + rsvd_check->rsvd_bits_mask[1][2] = rsvd_bits(maxphyaddr, 51) | rsvd_bits(12, 29); - context->rsvd_bits_mask[1][1] = + rsvd_check->rsvd_bits_mask[1][1] = rsvd_bits(maxphyaddr, 51) | rsvd_bits(12, 20); - context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0]; + rsvd_check->rsvd_bits_mask[1][0] = rsvd_check->rsvd_bits_mask[0][0]; for (pte = 0; pte < 64; pte++) { int rwx_bits = pte & 7; @@ -3699,10 +3729,64 @@ static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu, if (mt == 0x2 || mt == 0x3 || mt == 0x7 || rwx_bits == 0x2 || rwx_bits == 0x6 || (rwx_bits == 0x4 && !execonly)) - context->bad_mt_xwr |= (1ull << pte); + rsvd_check->bad_mt_xwr |= (1ull << pte); } } +static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu, + struct kvm_mmu *context, bool execonly) +{ + __reset_rsvds_bits_mask_ept(&context->guest_rsvd_check, + cpuid_maxphyaddr(vcpu), execonly); +} + +/* + * the page table on host is the shadow page table for the page + * table in guest or amd nested guest, its mmu features completely + * follow the features in guest. + */ +void +reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context) +{ + __reset_rsvds_bits_mask(vcpu, &context->shadow_zero_check, + boot_cpu_data.x86_phys_bits, + context->shadow_root_level, context->nx, + guest_cpuid_has_gbpages(vcpu), is_pse(vcpu)); +} +EXPORT_SYMBOL_GPL(reset_shadow_zero_bits_mask); + +/* + * the direct page table on host, use as much mmu features as + * possible, however, kvm currently does not do execution-protection. + */ +static void +reset_tdp_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, + struct kvm_mmu *context) +{ + if (guest_cpuid_is_amd(vcpu)) + __reset_rsvds_bits_mask(vcpu, &context->shadow_zero_check, + boot_cpu_data.x86_phys_bits, + context->shadow_root_level, false, + cpu_has_gbpages, true); + else + __reset_rsvds_bits_mask_ept(&context->shadow_zero_check, + boot_cpu_data.x86_phys_bits, + false); + +} + +/* + * as the comments in reset_shadow_zero_bits_mask() except it + * is the shadow page table for intel nested guest. + */ +static void +reset_ept_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, + struct kvm_mmu *context, bool execonly) +{ + __reset_rsvds_bits_mask_ept(&context->shadow_zero_check, + boot_cpu_data.x86_phys_bits, execonly); +} + static void update_permission_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, bool ept) { @@ -3881,6 +3965,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) update_permission_bitmask(vcpu, context, false); update_last_pte_bitmap(vcpu, context); + reset_tdp_shadow_zero_bits_mask(vcpu, context); } void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu) @@ -3908,6 +3993,7 @@ void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu) context->base_role.smap_andnot_wp = smap && !is_write_protection(vcpu); context->base_role.smm = is_smm(vcpu); + reset_shadow_zero_bits_mask(vcpu, context); } EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu); @@ -3931,6 +4017,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly) update_permission_bitmask(vcpu, context, true); reset_rsvds_bits_mask_ept(vcpu, context, execonly); + reset_ept_shadow_zero_bits_mask(vcpu, context, execonly); } EXPORT_SYMBOL_GPL(kvm_init_shadow_ept_mmu); @@ -4852,28 +4939,6 @@ unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm) return nr_mmu_pages; } -int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]) -{ - struct kvm_shadow_walk_iterator iterator; - u64 spte; - int nr_sptes = 0; - - if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) - return nr_sptes; - - walk_shadow_page_lockless_begin(vcpu); - for_each_shadow_entry_lockless(vcpu, addr, iterator, spte) { - sptes[iterator.level-1] = spte; - nr_sptes++; - if (!is_shadow_present_pte(spte)) - break; - } - walk_shadow_page_lockless_end(vcpu); - - return nr_sptes; -} -EXPORT_SYMBOL_GPL(kvm_mmu_get_spte_hierarchy); - void kvm_mmu_destroy(struct kvm_vcpu *vcpu) { kvm_mmu_unload(vcpu); diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 398d21c0f6dd..e4202e41d535 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -50,9 +50,11 @@ static inline u64 rsvd_bits(int s, int e) return ((1ULL << (e - s + 1)) - 1) << s; } -int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]); void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask); +void +reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context); + /* * Return values of handle_mmio_page_fault_common: * RET_MMIO_PF_EMULATE: it is a real mmio page fault, emulate the instruction diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c index de1d2d8062e2..9e8bf13572e6 100644 --- a/arch/x86/kvm/mtrr.c +++ b/arch/x86/kvm/mtrr.c @@ -120,6 +120,16 @@ static u8 mtrr_default_type(struct kvm_mtrr *mtrr_state) return mtrr_state->deftype & IA32_MTRR_DEF_TYPE_TYPE_MASK; } +static u8 mtrr_disabled_type(void) +{ + /* + * Intel SDM 11.11.2.2: all MTRRs are disabled when + * IA32_MTRR_DEF_TYPE.E bit is cleared, and the UC + * memory type is applied to all of physical memory. + */ + return MTRR_TYPE_UNCACHABLE; +} + /* * Three terms are used in the following code: * - segment, it indicates the address segments covered by fixed MTRRs. @@ -434,6 +444,8 @@ struct mtrr_iter { /* output fields. */ int mem_type; + /* mtrr is completely disabled? */ + bool mtrr_disabled; /* [start, end) is not fully covered in MTRRs? */ bool partial_map; @@ -549,7 +561,7 @@ static void mtrr_lookup_var_next(struct mtrr_iter *iter) static void mtrr_lookup_start(struct mtrr_iter *iter) { if (!mtrr_is_enabled(iter->mtrr_state)) { - iter->partial_map = true; + iter->mtrr_disabled = true; return; } @@ -563,6 +575,7 @@ static void mtrr_lookup_init(struct mtrr_iter *iter, iter->mtrr_state = mtrr_state; iter->start = start; iter->end = end; + iter->mtrr_disabled = false; iter->partial_map = false; iter->fixed = false; iter->range = NULL; @@ -656,15 +669,19 @@ u8 kvm_mtrr_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn) return MTRR_TYPE_WRBACK; } - /* It is not covered by MTRRs. */ - if (iter.partial_map) { - /* - * We just check one page, partially covered by MTRRs is - * impossible. - */ - WARN_ON(type != -1); - type = mtrr_default_type(mtrr_state); - } + if (iter.mtrr_disabled) + return mtrr_disabled_type(); + + /* not contained in any MTRRs. */ + if (type == -1) + return mtrr_default_type(mtrr_state); + + /* + * We just check one page, partially covered by MTRRs is + * impossible. + */ + WARN_ON(iter.partial_map); + return type; } EXPORT_SYMBOL_GPL(kvm_mtrr_get_guest_memory_type); @@ -689,6 +706,9 @@ bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn, return false; } + if (iter.mtrr_disabled) + return true; + if (!iter.partial_map) return true; diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 0f67d7e24800..736e6ab8784d 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -128,14 +128,6 @@ static inline void FNAME(protect_clean_gpte)(unsigned *access, unsigned gpte) *access &= mask; } -static bool FNAME(is_rsvd_bits_set)(struct kvm_mmu *mmu, u64 gpte, int level) -{ - int bit7 = (gpte >> 7) & 1, low6 = gpte & 0x3f; - - return (gpte & mmu->rsvd_bits_mask[bit7][level-1]) | - ((mmu->bad_mt_xwr & (1ull << low6)) != 0); -} - static inline int FNAME(is_present_gpte)(unsigned long pte) { #if PTTYPE != PTTYPE_EPT @@ -172,7 +164,7 @@ static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, u64 *spte, u64 gpte) { - if (FNAME(is_rsvd_bits_set)(&vcpu->arch.mmu, gpte, PT_PAGE_TABLE_LEVEL)) + if (is_rsvd_bits_set(&vcpu->arch.mmu, gpte, PT_PAGE_TABLE_LEVEL)) goto no_present; if (!FNAME(is_present_gpte)(gpte)) @@ -353,8 +345,7 @@ retry_walk: if (unlikely(!FNAME(is_present_gpte)(pte))) goto error; - if (unlikely(FNAME(is_rsvd_bits_set)(mmu, pte, - walker->level))) { + if (unlikely(is_rsvd_bits_set(mmu, pte, walker->level))) { errcode |= PFERR_RSVD_MASK | PFERR_PRESENT_MASK; goto error; } diff --git a/arch/x86/kvm/pmu_amd.c b/arch/x86/kvm/pmu_amd.c index 886aa25a7131..39b91127ef07 100644 --- a/arch/x86/kvm/pmu_amd.c +++ b/arch/x86/kvm/pmu_amd.c @@ -133,8 +133,6 @@ static int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) /* MSR_K7_PERFCTRn */ pmc = get_gp_pmc(pmu, msr, MSR_K7_PERFCTR0); if (pmc) { - if (!msr_info->host_initiated) - data = (s64)data; pmc->counter += data - pmc_read_counter(pmc); return 0; } diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 602b974a60a6..74d825716f4f 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -865,6 +865,64 @@ static void svm_disable_lbrv(struct vcpu_svm *svm) set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 0, 0); } +#define MTRR_TYPE_UC_MINUS 7 +#define MTRR2PROTVAL_INVALID 0xff + +static u8 mtrr2protval[8]; + +static u8 fallback_mtrr_type(int mtrr) +{ + /* + * WT and WP aren't always available in the host PAT. Treat + * them as UC and UC- respectively. Everything else should be + * there. + */ + switch (mtrr) + { + case MTRR_TYPE_WRTHROUGH: + return MTRR_TYPE_UNCACHABLE; + case MTRR_TYPE_WRPROT: + return MTRR_TYPE_UC_MINUS; + default: + BUG(); + } +} + +static void build_mtrr2protval(void) +{ + int i; + u64 pat; + + for (i = 0; i < 8; i++) + mtrr2protval[i] = MTRR2PROTVAL_INVALID; + + /* Ignore the invalid MTRR types. */ + mtrr2protval[2] = 0; + mtrr2protval[3] = 0; + + /* + * Use host PAT value to figure out the mapping from guest MTRR + * values to nested page table PAT/PCD/PWT values. We do not + * want to change the host PAT value every time we enter the + * guest. + */ + rdmsrl(MSR_IA32_CR_PAT, pat); + for (i = 0; i < 8; i++) { + u8 mtrr = pat >> (8 * i); + + if (mtrr2protval[mtrr] == MTRR2PROTVAL_INVALID) + mtrr2protval[mtrr] = __cm_idx2pte(i); + } + + for (i = 0; i < 8; i++) { + if (mtrr2protval[i] == MTRR2PROTVAL_INVALID) { + u8 fallback = fallback_mtrr_type(i); + mtrr2protval[i] = mtrr2protval[fallback]; + BUG_ON(mtrr2protval[i] == MTRR2PROTVAL_INVALID); + } + } +} + static __init int svm_hardware_setup(void) { int cpu; @@ -931,6 +989,7 @@ static __init int svm_hardware_setup(void) } else kvm_disable_tdp(); + build_mtrr2protval(); return 0; err: @@ -1085,6 +1144,43 @@ static u64 svm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) return target_tsc - tsc; } +static void svm_set_guest_pat(struct vcpu_svm *svm, u64 *g_pat) +{ + struct kvm_vcpu *vcpu = &svm->vcpu; + + /* Unlike Intel, AMD takes the guest's CR0.CD into account. + * + * AMD does not have IPAT. To emulate it for the case of guests + * with no assigned devices, just set everything to WB. If guests + * have assigned devices, however, we cannot force WB for RAM + * pages only, so use the guest PAT directly. + */ + if (!kvm_arch_has_assigned_device(vcpu->kvm)) + *g_pat = 0x0606060606060606; + else + *g_pat = vcpu->arch.pat; +} + +static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) +{ + u8 mtrr; + + /* + * 1. MMIO: trust guest MTRR, so same as item 3. + * 2. No passthrough: always map as WB, and force guest PAT to WB as well + * 3. Passthrough: can't guarantee the result, try to trust guest. + */ + if (!is_mmio && !kvm_arch_has_assigned_device(vcpu->kvm)) + return 0; + + if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED) && + kvm_read_cr0(vcpu) & X86_CR0_CD) + return _PAGE_NOCACHE; + + mtrr = kvm_mtrr_get_guest_memory_type(vcpu, gfn); + return mtrr2protval[mtrr]; +} + static void init_vmcb(struct vcpu_svm *svm, bool init_event) { struct vmcb_control_area *control = &svm->vmcb->control; @@ -1180,6 +1276,7 @@ static void init_vmcb(struct vcpu_svm *svm, bool init_event) clr_cr_intercept(svm, INTERCEPT_CR3_READ); clr_cr_intercept(svm, INTERCEPT_CR3_WRITE); save->g_pat = svm->vcpu.arch.pat; + svm_set_guest_pat(svm, &save->g_pat); save->cr3 = 0; save->cr4 = 0; } @@ -1574,13 +1671,10 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) if (!vcpu->fpu_active) cr0 |= X86_CR0_TS; - /* - * re-enable caching here because the QEMU bios - * does not do it - this results in some delay at - * reboot - */ - if (!(vcpu->kvm->arch.disabled_quirks & KVM_QUIRK_CD_NW_CLEARED)) - cr0 &= ~(X86_CR0_CD | X86_CR0_NW); + + /* These are emulated via page tables. */ + cr0 &= ~(X86_CR0_CD | X86_CR0_NW); + svm->vmcb->save.cr0 = cr0; mark_dirty(svm->vmcb, VMCB_CR); update_cr0_intercept(svm); @@ -2013,6 +2107,7 @@ static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu) vcpu->arch.mmu.get_pdptr = nested_svm_get_tdp_pdptr; vcpu->arch.mmu.inject_page_fault = nested_svm_inject_npf_exit; vcpu->arch.mmu.shadow_root_level = get_npt_level(); + reset_shadow_zero_bits_mask(vcpu, &vcpu->arch.mmu); vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu; } @@ -3254,6 +3349,16 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) case MSR_VM_IGNNE: vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data); break; + case MSR_IA32_CR_PAT: + if (npt_enabled) { + if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data)) + return 1; + vcpu->arch.pat = data; + svm_set_guest_pat(svm, &svm->vmcb->save.g_pat); + mark_dirty(svm->vmcb, VMCB_NPT); + break; + } + /* fall through */ default: return kvm_set_msr_common(vcpu, msr); } @@ -4088,11 +4193,6 @@ static bool svm_has_high_real_mode_segbase(void) return true; } -static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) -{ - return 0; -} - static void svm_cpuid_update(struct kvm_vcpu *vcpu) { } diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index e856dd566f4c..da1590ea43fc 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2443,10 +2443,10 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) CPU_BASED_CR8_LOAD_EXITING | CPU_BASED_CR8_STORE_EXITING | #endif CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING | - CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_EXITING | - CPU_BASED_RDPMC_EXITING | CPU_BASED_RDTSC_EXITING | - CPU_BASED_PAUSE_EXITING | CPU_BASED_TPR_SHADOW | - CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; + CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_TRAP_FLAG | + CPU_BASED_MONITOR_EXITING | CPU_BASED_RDPMC_EXITING | + CPU_BASED_RDTSC_EXITING | CPU_BASED_PAUSE_EXITING | + CPU_BASED_TPR_SHADOW | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; /* * We can allow some features even when not supported by the * hardware. For example, L1 can specify an MSR bitmap - and we @@ -3423,12 +3423,12 @@ static void enter_lmode(struct kvm_vcpu *vcpu) vmx_segment_cache_clear(to_vmx(vcpu)); guest_tr_ar = vmcs_read32(GUEST_TR_AR_BYTES); - if ((guest_tr_ar & AR_TYPE_MASK) != AR_TYPE_BUSY_64_TSS) { + if ((guest_tr_ar & VMX_AR_TYPE_MASK) != VMX_AR_TYPE_BUSY_64_TSS) { pr_debug_ratelimited("%s: tss fixup for long mode. \n", __func__); vmcs_write32(GUEST_TR_AR_BYTES, - (guest_tr_ar & ~AR_TYPE_MASK) - | AR_TYPE_BUSY_64_TSS); + (guest_tr_ar & ~VMX_AR_TYPE_MASK) + | VMX_AR_TYPE_BUSY_64_TSS); } vmx_set_efer(vcpu, vcpu->arch.efer | EFER_LMA); } @@ -3719,7 +3719,7 @@ static int vmx_get_cpl(struct kvm_vcpu *vcpu) return 0; else { int ar = vmx_read_guest_seg_ar(vmx, VCPU_SREG_SS); - return AR_DPL(ar); + return VMX_AR_DPL(ar); } } @@ -3847,11 +3847,11 @@ static bool code_segment_valid(struct kvm_vcpu *vcpu) if (cs.unusable) return false; - if (~cs.type & (AR_TYPE_CODE_MASK|AR_TYPE_ACCESSES_MASK)) + if (~cs.type & (VMX_AR_TYPE_CODE_MASK|VMX_AR_TYPE_ACCESSES_MASK)) return false; if (!cs.s) return false; - if (cs.type & AR_TYPE_WRITEABLE_MASK) { + if (cs.type & VMX_AR_TYPE_WRITEABLE_MASK) { if (cs.dpl > cs_rpl) return false; } else { @@ -3901,7 +3901,7 @@ static bool data_segment_valid(struct kvm_vcpu *vcpu, int seg) return false; if (!var.present) return false; - if (~var.type & (AR_TYPE_CODE_MASK|AR_TYPE_WRITEABLE_MASK)) { + if (~var.type & (VMX_AR_TYPE_CODE_MASK|VMX_AR_TYPE_WRITEABLE_MASK)) { if (var.dpl < rpl) /* DPL < RPL */ return false; } @@ -5759,73 +5759,9 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu) return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0); } -static u64 ept_rsvd_mask(u64 spte, int level) -{ - int i; - u64 mask = 0; - - for (i = 51; i > boot_cpu_data.x86_phys_bits; i--) - mask |= (1ULL << i); - - if (level == 4) - /* bits 7:3 reserved */ - mask |= 0xf8; - else if (spte & (1ULL << 7)) - /* - * 1GB/2MB page, bits 29:12 or 20:12 reserved respectively, - * level == 1 if the hypervisor is using the ignored bit 7. - */ - mask |= (PAGE_SIZE << ((level - 1) * 9)) - PAGE_SIZE; - else if (level > 1) - /* bits 6:3 reserved */ - mask |= 0x78; - - return mask; -} - -static void ept_misconfig_inspect_spte(struct kvm_vcpu *vcpu, u64 spte, - int level) -{ - printk(KERN_ERR "%s: spte 0x%llx level %d\n", __func__, spte, level); - - /* 010b (write-only) */ - WARN_ON((spte & 0x7) == 0x2); - - /* 110b (write/execute) */ - WARN_ON((spte & 0x7) == 0x6); - - /* 100b (execute-only) and value not supported by logical processor */ - if (!cpu_has_vmx_ept_execute_only()) - WARN_ON((spte & 0x7) == 0x4); - - /* not 000b */ - if ((spte & 0x7)) { - u64 rsvd_bits = spte & ept_rsvd_mask(spte, level); - - if (rsvd_bits != 0) { - printk(KERN_ERR "%s: rsvd_bits = 0x%llx\n", - __func__, rsvd_bits); - WARN_ON(1); - } - - /* bits 5:3 are _not_ reserved for large page or leaf page */ - if ((rsvd_bits & 0x38) == 0) { - u64 ept_mem_type = (spte & 0x38) >> 3; - - if (ept_mem_type == 2 || ept_mem_type == 3 || - ept_mem_type == 7) { - printk(KERN_ERR "%s: ept_mem_type=0x%llx\n", - __func__, ept_mem_type); - WARN_ON(1); - } - } - } -} - static int handle_ept_misconfig(struct kvm_vcpu *vcpu) { - u64 sptes[4]; - int nr_sptes, i, ret; + int ret; gpa_t gpa; gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); @@ -5846,13 +5782,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu) return 1; /* It is the real ept misconfig */ - printk(KERN_ERR "EPT: Misconfiguration.\n"); - printk(KERN_ERR "EPT: GPA: 0x%llx\n", gpa); - - nr_sptes = kvm_mmu_get_spte_hierarchy(vcpu, gpa, sptes); - - for (i = PT64_ROOT_LEVEL; i > PT64_ROOT_LEVEL - nr_sptes; --i) - ept_misconfig_inspect_spte(vcpu, sptes[i-1], i); + WARN_ON(1); vcpu->run->exit_reason = KVM_EXIT_UNKNOWN; vcpu->run->hw.hardware_exit_reason = EXIT_REASON_EPT_MISCONFIG; @@ -6246,6 +6176,11 @@ static int handle_mwait(struct kvm_vcpu *vcpu) return handle_nop(vcpu); } +static int handle_monitor_trap(struct kvm_vcpu *vcpu) +{ + return 1; +} + static int handle_monitor(struct kvm_vcpu *vcpu) { printk_once(KERN_WARNING "kvm: MONITOR instruction emulated as NOP!\n"); @@ -6408,8 +6343,12 @@ static enum hrtimer_restart vmx_preemption_timer_fn(struct hrtimer *timer) */ static int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification, - u32 vmx_instruction_info, gva_t *ret) + u32 vmx_instruction_info, bool wr, gva_t *ret) { + gva_t off; + bool exn; + struct kvm_segment s; + /* * According to Vol. 3B, "Information for VM Exits Due to Instruction * Execution", on an exit, vmx_instruction_info holds most of the @@ -6434,22 +6373,63 @@ static int get_vmx_mem_address(struct kvm_vcpu *vcpu, /* Addr = segment_base + offset */ /* offset = base + [index * scale] + displacement */ - *ret = vmx_get_segment_base(vcpu, seg_reg); + off = exit_qualification; /* holds the displacement */ if (base_is_valid) - *ret += kvm_register_read(vcpu, base_reg); + off += kvm_register_read(vcpu, base_reg); if (index_is_valid) - *ret += kvm_register_read(vcpu, index_reg)<<scaling; - *ret += exit_qualification; /* holds the displacement */ + off += kvm_register_read(vcpu, index_reg)<<scaling; + vmx_get_segment(vcpu, &s, seg_reg); + *ret = s.base + off; if (addr_size == 1) /* 32 bit */ *ret &= 0xffffffff; - /* - * TODO: throw #GP (and return 1) in various cases that the VM* - * instructions require it - e.g., offset beyond segment limit, - * unusable or unreadable/unwritable segment, non-canonical 64-bit - * address, and so on. Currently these are not checked. - */ + /* Checks for #GP/#SS exceptions. */ + exn = false; + if (is_protmode(vcpu)) { + /* Protected mode: apply checks for segment validity in the + * following order: + * - segment type check (#GP(0) may be thrown) + * - usability check (#GP(0)/#SS(0)) + * - limit check (#GP(0)/#SS(0)) + */ + if (wr) + /* #GP(0) if the destination operand is located in a + * read-only data segment or any code segment. + */ + exn = ((s.type & 0xa) == 0 || (s.type & 8)); + else + /* #GP(0) if the source operand is located in an + * execute-only code segment + */ + exn = ((s.type & 0xa) == 8); + } + if (exn) { + kvm_queue_exception_e(vcpu, GP_VECTOR, 0); + return 1; + } + if (is_long_mode(vcpu)) { + /* Long mode: #GP(0)/#SS(0) if the memory address is in a + * non-canonical form. This is an only check for long mode. + */ + exn = is_noncanonical_address(*ret); + } else if (is_protmode(vcpu)) { + /* Protected mode: #GP(0)/#SS(0) if the segment is unusable. + */ + exn = (s.unusable != 0); + /* Protected mode: #GP(0)/#SS(0) if the memory + * operand is outside the segment limit. + */ + exn = exn || (off + sizeof(u64) > s.limit); + } + if (exn) { + kvm_queue_exception_e(vcpu, + seg_reg == VCPU_SREG_SS ? + SS_VECTOR : GP_VECTOR, + 0); + return 1; + } + return 0; } @@ -6471,7 +6451,7 @@ static int nested_vmx_check_vmptr(struct kvm_vcpu *vcpu, int exit_reason, int maxphyaddr = cpuid_maxphyaddr(vcpu); if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION), - vmcs_read32(VMX_INSTRUCTION_INFO), &gva)) + vmcs_read32(VMX_INSTRUCTION_INFO), false, &gva)) return 1; if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &vmptr, @@ -6999,7 +6979,7 @@ static int handle_vmread(struct kvm_vcpu *vcpu) field_value); } else { if (get_vmx_mem_address(vcpu, exit_qualification, - vmx_instruction_info, &gva)) + vmx_instruction_info, true, &gva)) return 1; /* _system ok, as nested_vmx_check_permission verified cpl=0 */ kvm_write_guest_virt_system(&vcpu->arch.emulate_ctxt, gva, @@ -7036,7 +7016,7 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu) (((vmx_instruction_info) >> 3) & 0xf)); else { if (get_vmx_mem_address(vcpu, exit_qualification, - vmx_instruction_info, &gva)) + vmx_instruction_info, false, &gva)) return 1; if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &field_value, (is_64_bit_mode(vcpu) ? 8 : 4), &e)) { @@ -7128,7 +7108,7 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu) return 1; if (get_vmx_mem_address(vcpu, exit_qualification, - vmx_instruction_info, &vmcs_gva)) + vmx_instruction_info, true, &vmcs_gva)) return 1; /* ok to use *_system, as nested_vmx_check_permission verified cpl=0 */ if (kvm_write_guest_virt_system(&vcpu->arch.emulate_ctxt, vmcs_gva, @@ -7184,7 +7164,7 @@ static int handle_invept(struct kvm_vcpu *vcpu) * operand is read even if it isn't needed (e.g., for type==global) */ if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION), - vmx_instruction_info, &gva)) + vmx_instruction_info, false, &gva)) return 1; if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &operand, sizeof(operand), &e)) { @@ -7282,6 +7262,7 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { [EXIT_REASON_EPT_MISCONFIG] = handle_ept_misconfig, [EXIT_REASON_PAUSE_INSTRUCTION] = handle_pause, [EXIT_REASON_MWAIT_INSTRUCTION] = handle_mwait, + [EXIT_REASON_MONITOR_TRAP_FLAG] = handle_monitor_trap, [EXIT_REASON_MONITOR_INSTRUCTION] = handle_monitor, [EXIT_REASON_INVEPT] = handle_invept, [EXIT_REASON_INVVPID] = handle_invvpid, @@ -7542,6 +7523,8 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) return true; case EXIT_REASON_MWAIT_INSTRUCTION: return nested_cpu_has(vmcs12, CPU_BASED_MWAIT_EXITING); + case EXIT_REASON_MONITOR_TRAP_FLAG: + return nested_cpu_has(vmcs12, CPU_BASED_MONITOR_TRAP_FLAG); case EXIT_REASON_MONITOR_INSTRUCTION: return nested_cpu_has(vmcs12, CPU_BASED_MONITOR_EXITING); case EXIT_REASON_PAUSE_INSTRUCTION: @@ -8632,22 +8615,17 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) u64 ipat = 0; /* For VT-d and EPT combination - * 1. MMIO: always map as UC + * 1. MMIO: guest may want to apply WC, trust it. * 2. EPT with VT-d: * a. VT-d without snooping control feature: can't guarantee the - * result, try to trust guest. + * result, try to trust guest. So the same as item 1. * b. VT-d with snooping control feature: snooping control feature of * VT-d engine can guarantee the cache correctness. Just set it * to WB to keep consistent with host. So the same as item 3. * 3. EPT without VT-d: always map as WB and set IPAT=1 to keep * consistent with host MTRR */ - if (is_mmio) { - cache = MTRR_TYPE_UNCACHABLE; - goto exit; - } - - if (!kvm_arch_has_noncoherent_dma(vcpu->kvm)) { + if (!is_mmio && !kvm_arch_has_noncoherent_dma(vcpu->kvm)) { ipat = VMX_EPT_IPAT_BIT; cache = MTRR_TYPE_WRBACK; goto exit; @@ -8655,7 +8633,10 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) if (kvm_read_cr0(vcpu) & X86_CR0_CD) { ipat = VMX_EPT_IPAT_BIT; - cache = MTRR_TYPE_UNCACHABLE; + if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED)) + cache = MTRR_TYPE_WRBACK; + else + cache = MTRR_TYPE_UNCACHABLE; goto exit; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index bbaf44e8f0d3..c7b6aed998e9 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -29,6 +29,7 @@ #include "cpuid.h" #include "assigned-dev.h" #include "pmu.h" +#include "hyperv.h" #include <linux/clocksource.h> #include <linux/interrupt.h> @@ -221,11 +222,9 @@ static void shared_msr_update(unsigned slot, u32 msr) void kvm_define_shared_msr(unsigned slot, u32 msr) { BUG_ON(slot >= KVM_NR_SHARED_MSRS); + shared_msrs_global.msrs[slot] = msr; if (slot >= shared_msrs_global.nr) shared_msrs_global.nr = slot + 1; - shared_msrs_global.msrs[slot] = msr; - /* we need ensured the shared_msr_global have been updated */ - smp_wmb(); } EXPORT_SYMBOL_GPL(kvm_define_shared_msr); @@ -526,7 +525,8 @@ int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3) } for (i = 0; i < ARRAY_SIZE(pdpte); ++i) { if (is_present_gpte(pdpte[i]) && - (pdpte[i] & vcpu->arch.mmu.rsvd_bits_mask[0][2])) { + (pdpte[i] & + vcpu->arch.mmu.guest_rsvd_check.rsvd_bits_mask[0][2])) { ret = 0; goto out; } @@ -949,6 +949,8 @@ static u32 emulated_msrs[] = { MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC, + HV_X64_MSR_CRASH_P0, HV_X64_MSR_CRASH_P1, HV_X64_MSR_CRASH_P2, + HV_X64_MSR_CRASH_P3, HV_X64_MSR_CRASH_P4, HV_X64_MSR_CRASH_CTL, HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, MSR_KVM_PV_EOI_EN, @@ -1217,11 +1219,6 @@ static void kvm_get_time_scale(uint32_t scaled_khz, uint32_t base_khz, __func__, base_khz, scaled_khz, shift, *pmultiplier); } -static inline u64 get_kernel_ns(void) -{ - return ktime_get_boot_ns(); -} - #ifdef CONFIG_X86_64 static atomic_t kvm_guest_has_master_clock = ATOMIC_INIT(0); #endif @@ -1869,123 +1866,6 @@ out: return r; } -static bool kvm_hv_hypercall_enabled(struct kvm *kvm) -{ - return kvm->arch.hv_hypercall & HV_X64_MSR_HYPERCALL_ENABLE; -} - -static bool kvm_hv_msr_partition_wide(u32 msr) -{ - bool r = false; - switch (msr) { - case HV_X64_MSR_GUEST_OS_ID: - case HV_X64_MSR_HYPERCALL: - case HV_X64_MSR_REFERENCE_TSC: - case HV_X64_MSR_TIME_REF_COUNT: - r = true; - break; - } - - return r; -} - -static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) -{ - struct kvm *kvm = vcpu->kvm; - - switch (msr) { - case HV_X64_MSR_GUEST_OS_ID: - kvm->arch.hv_guest_os_id = data; - /* setting guest os id to zero disables hypercall page */ - if (!kvm->arch.hv_guest_os_id) - kvm->arch.hv_hypercall &= ~HV_X64_MSR_HYPERCALL_ENABLE; - break; - case HV_X64_MSR_HYPERCALL: { - u64 gfn; - unsigned long addr; - u8 instructions[4]; - - /* if guest os id is not set hypercall should remain disabled */ - if (!kvm->arch.hv_guest_os_id) - break; - if (!(data & HV_X64_MSR_HYPERCALL_ENABLE)) { - kvm->arch.hv_hypercall = data; - break; - } - gfn = data >> HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT; - addr = gfn_to_hva(kvm, gfn); - if (kvm_is_error_hva(addr)) - return 1; - kvm_x86_ops->patch_hypercall(vcpu, instructions); - ((unsigned char *)instructions)[3] = 0xc3; /* ret */ - if (__copy_to_user((void __user *)addr, instructions, 4)) - return 1; - kvm->arch.hv_hypercall = data; - mark_page_dirty(kvm, gfn); - break; - } - case HV_X64_MSR_REFERENCE_TSC: { - u64 gfn; - HV_REFERENCE_TSC_PAGE tsc_ref; - memset(&tsc_ref, 0, sizeof(tsc_ref)); - kvm->arch.hv_tsc_page = data; - if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE)) - break; - gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT; - if (kvm_write_guest(kvm, gfn << HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT, - &tsc_ref, sizeof(tsc_ref))) - return 1; - mark_page_dirty(kvm, gfn); - break; - } - default: - vcpu_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x " - "data 0x%llx\n", msr, data); - return 1; - } - return 0; -} - -static int set_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 data) -{ - switch (msr) { - case HV_X64_MSR_APIC_ASSIST_PAGE: { - u64 gfn; - unsigned long addr; - - if (!(data & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE)) { - vcpu->arch.hv_vapic = data; - if (kvm_lapic_enable_pv_eoi(vcpu, 0)) - return 1; - break; - } - gfn = data >> HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT; - addr = kvm_vcpu_gfn_to_hva(vcpu, gfn); - if (kvm_is_error_hva(addr)) - return 1; - if (__clear_user((void __user *)addr, PAGE_SIZE)) - return 1; - vcpu->arch.hv_vapic = data; - kvm_vcpu_mark_page_dirty(vcpu, gfn); - if (kvm_lapic_enable_pv_eoi(vcpu, gfn_to_gpa(gfn) | KVM_MSR_ENABLED)) - return 1; - break; - } - case HV_X64_MSR_EOI: - return kvm_hv_vapic_msr_write(vcpu, APIC_EOI, data); - case HV_X64_MSR_ICR: - return kvm_hv_vapic_msr_write(vcpu, APIC_ICR, data); - case HV_X64_MSR_TPR: - return kvm_hv_vapic_msr_write(vcpu, APIC_TASKPRI, data); - default: - vcpu_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x " - "data 0x%llx\n", msr, data); - return 1; - } - - return 0; -} - static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data) { gpa_t gpa = data & ~0x3f; @@ -2224,15 +2104,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) */ break; case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15: - if (kvm_hv_msr_partition_wide(msr)) { - int r; - mutex_lock(&vcpu->kvm->lock); - r = set_msr_hyperv_pw(vcpu, msr, data); - mutex_unlock(&vcpu->kvm->lock); - return r; - } else - return set_msr_hyperv(vcpu, msr, data); - break; + case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: + case HV_X64_MSR_CRASH_CTL: + return kvm_hv_set_msr_common(vcpu, msr, data, + msr_info->host_initiated); case MSR_IA32_BBL_CR_CTL3: /* Drop writes to this legacy MSR -- see rdmsr * counterpart for further detail. @@ -2315,68 +2190,6 @@ static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) return 0; } -static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) -{ - u64 data = 0; - struct kvm *kvm = vcpu->kvm; - - switch (msr) { - case HV_X64_MSR_GUEST_OS_ID: - data = kvm->arch.hv_guest_os_id; - break; - case HV_X64_MSR_HYPERCALL: - data = kvm->arch.hv_hypercall; - break; - case HV_X64_MSR_TIME_REF_COUNT: { - data = - div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100); - break; - } - case HV_X64_MSR_REFERENCE_TSC: - data = kvm->arch.hv_tsc_page; - break; - default: - vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); - return 1; - } - - *pdata = data; - return 0; -} - -static int get_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) -{ - u64 data = 0; - - switch (msr) { - case HV_X64_MSR_VP_INDEX: { - int r; - struct kvm_vcpu *v; - kvm_for_each_vcpu(r, v, vcpu->kvm) { - if (v == vcpu) { - data = r; - break; - } - } - break; - } - case HV_X64_MSR_EOI: - return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata); - case HV_X64_MSR_ICR: - return kvm_hv_vapic_msr_read(vcpu, APIC_ICR, pdata); - case HV_X64_MSR_TPR: - return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata); - case HV_X64_MSR_APIC_ASSIST_PAGE: - data = vcpu->arch.hv_vapic; - break; - default: - vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); - return 1; - } - *pdata = data; - return 0; -} - int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) { switch (msr_info->index) { @@ -2493,14 +2306,10 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = 0x20000000; break; case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15: - if (kvm_hv_msr_partition_wide(msr_info->index)) { - int r; - mutex_lock(&vcpu->kvm->lock); - r = get_msr_hyperv_pw(vcpu, msr_info->index, &msr_info->data); - mutex_unlock(&vcpu->kvm->lock); - return r; - } else - return get_msr_hyperv(vcpu, msr_info->index, &msr_info->data); + case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: + case HV_X64_MSR_CRASH_CTL: + return kvm_hv_get_msr_common(vcpu, + msr_info->index, &msr_info->data); break; case MSR_IA32_BBL_CR_CTL3: /* This legacy MSR exists but isn't fully documented in current @@ -2651,6 +2460,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_TSC_DEADLINE_TIMER: case KVM_CAP_ENABLE_CAP_VM: case KVM_CAP_DISABLE_QUIRKS: + case KVM_CAP_SET_BOOT_CPU_ID: #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT case KVM_CAP_ASSIGN_DEV_IRQ: case KVM_CAP_PCI_2_3: @@ -3157,8 +2967,7 @@ static void load_xsave(struct kvm_vcpu *vcpu, u8 *src) cpuid_count(XSTATE_CPUID, index, &size, &offset, &ecx, &edx); memcpy(dest, src + offset, size); - } else - WARN_ON_ONCE(1); + } valid -= feature; } @@ -3818,30 +3627,25 @@ long kvm_arch_vm_ioctl(struct file *filp, r = kvm_ioapic_init(kvm); if (r) { mutex_lock(&kvm->slots_lock); - kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, - &vpic->dev_master); - kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, - &vpic->dev_slave); - kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, - &vpic->dev_eclr); + kvm_destroy_pic(vpic); mutex_unlock(&kvm->slots_lock); - kfree(vpic); goto create_irqchip_unlock; } } else goto create_irqchip_unlock; - smp_wmb(); - kvm->arch.vpic = vpic; - smp_wmb(); r = kvm_setup_default_irq_routing(kvm); if (r) { mutex_lock(&kvm->slots_lock); mutex_lock(&kvm->irq_lock); kvm_ioapic_destroy(kvm); - kvm_destroy_pic(kvm); + kvm_destroy_pic(vpic); mutex_unlock(&kvm->irq_lock); mutex_unlock(&kvm->slots_lock); + goto create_irqchip_unlock; } + /* Write kvm->irq_routing before kvm->arch.vpic. */ + smp_wmb(); + kvm->arch.vpic = vpic; create_irqchip_unlock: mutex_unlock(&kvm->lock); break; @@ -3968,6 +3772,15 @@ long kvm_arch_vm_ioctl(struct file *filp, r = kvm_vm_ioctl_reinject(kvm, &control); break; } + case KVM_SET_BOOT_CPU_ID: + r = 0; + mutex_lock(&kvm->lock); + if (atomic_read(&kvm->online_vcpus) != 0) + r = -EBUSY; + else + kvm->arch.bsp_vcpu_id = arg; + mutex_unlock(&kvm->lock); + break; case KVM_XEN_HVM_CONFIG: { r = -EFAULT; if (copy_from_user(&kvm->arch.xen_hvm_config, argp, @@ -5883,66 +5696,6 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_emulate_halt); -int kvm_hv_hypercall(struct kvm_vcpu *vcpu) -{ - u64 param, ingpa, outgpa, ret; - uint16_t code, rep_idx, rep_cnt, res = HV_STATUS_SUCCESS, rep_done = 0; - bool fast, longmode; - - /* - * hypercall generates UD from non zero cpl and real mode - * per HYPER-V spec - */ - if (kvm_x86_ops->get_cpl(vcpu) != 0 || !is_protmode(vcpu)) { - kvm_queue_exception(vcpu, UD_VECTOR); - return 0; - } - - longmode = is_64_bit_mode(vcpu); - - if (!longmode) { - param = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDX) << 32) | - (kvm_register_read(vcpu, VCPU_REGS_RAX) & 0xffffffff); - ingpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RBX) << 32) | - (kvm_register_read(vcpu, VCPU_REGS_RCX) & 0xffffffff); - outgpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDI) << 32) | - (kvm_register_read(vcpu, VCPU_REGS_RSI) & 0xffffffff); - } -#ifdef CONFIG_X86_64 - else { - param = kvm_register_read(vcpu, VCPU_REGS_RCX); - ingpa = kvm_register_read(vcpu, VCPU_REGS_RDX); - outgpa = kvm_register_read(vcpu, VCPU_REGS_R8); - } -#endif - - code = param & 0xffff; - fast = (param >> 16) & 0x1; - rep_cnt = (param >> 32) & 0xfff; - rep_idx = (param >> 48) & 0xfff; - - trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa); - - switch (code) { - case HV_X64_HV_NOTIFY_LONG_SPIN_WAIT: - kvm_vcpu_on_spin(vcpu); - break; - default: - res = HV_STATUS_INVALID_HYPERCALL_CODE; - break; - } - - ret = res | (((u64)rep_done & 0xfff) << 32); - if (longmode) { - kvm_register_write(vcpu, VCPU_REGS_RAX, ret); - } else { - kvm_register_write(vcpu, VCPU_REGS_RDX, ret >> 32); - kvm_register_write(vcpu, VCPU_REGS_RAX, ret & 0xffffffff); - } - - return 1; -} - /* * kvm_pv_kick_cpu_op: Kick a vcpu. * @@ -6514,6 +6267,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) vcpu_scan_ioapic(vcpu); if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu)) kvm_vcpu_reload_apic_access_page(vcpu); + if (kvm_check_request(KVM_REQ_HV_CRASH, vcpu)) { + vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT; + vcpu->run->system_event.type = KVM_SYSTEM_EVENT_CRASH; + r = 0; + goto out; + } } if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { @@ -7315,11 +7074,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, vcpu = kvm_x86_ops->vcpu_create(kvm, id); - /* - * Activate fpu unconditionally in case the guest needs eager FPU. It will be - * deactivated soon if it doesn't. - */ - kvm_x86_ops->fpu_activate(vcpu); return vcpu; } @@ -7541,6 +7295,17 @@ void kvm_arch_check_processor_compat(void *rtn) kvm_x86_ops->check_processor_compatibility(rtn); } +bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu) +{ + return vcpu->kvm->arch.bsp_vcpu_id == vcpu->vcpu_id; +} +EXPORT_SYMBOL_GPL(kvm_vcpu_is_reset_bsp); + +bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu) +{ + return (vcpu->arch.apic_base & MSR_IA32_APICBASE_BSP) != 0; +} + bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu) { return irqchip_in_kernel(vcpu->kvm) == (vcpu->arch.apic != NULL); @@ -8218,6 +7983,24 @@ bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu) kvm_x86_ops->interrupt_allowed(vcpu); } +void kvm_arch_start_assignment(struct kvm *kvm) +{ + atomic_inc(&kvm->arch.assigned_device_count); +} +EXPORT_SYMBOL_GPL(kvm_arch_start_assignment); + +void kvm_arch_end_assignment(struct kvm *kvm) +{ + atomic_dec(&kvm->arch.assigned_device_count); +} +EXPORT_SYMBOL_GPL(kvm_arch_end_assignment); + +bool kvm_arch_has_assigned_device(struct kvm *kvm) +{ + return atomic_read(&kvm->arch.assigned_device_count); +} +EXPORT_SYMBOL_GPL(kvm_arch_has_assigned_device); + void kvm_arch_register_noncoherent_dma(struct kvm *kvm) { atomic_inc(&kvm->arch.noncoherent_dma_count); diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index edc8cdcd786b..2f822cd886c2 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -147,6 +147,16 @@ static inline void kvm_register_writel(struct kvm_vcpu *vcpu, return kvm_register_write(vcpu, reg, val); } +static inline u64 get_kernel_ns(void) +{ + return ktime_get_boot_ns(); +} + +static inline bool kvm_check_has_quirk(struct kvm *kvm, u64 quirk) +{ + return !(kvm->arch.disabled_quirks & quirk); +} + void kvm_before_handle_nmi(struct kvm_vcpu *vcpu); void kvm_after_handle_nmi(struct kvm_vcpu *vcpu); void kvm_set_pending_timer(struct kvm_vcpu *vcpu); diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild index 14d15bf1a95b..5b478accd5fc 100644 --- a/arch/xtensa/include/asm/Kbuild +++ b/arch/xtensa/include/asm/Kbuild @@ -19,6 +19,7 @@ generic-y += linkage.h generic-y += local.h generic-y += local64.h generic-y += mcs_spinlock.h +generic-y += mm-arch-hooks.h generic-y += percpu.h generic-y += preempt.h generic-y += resource.h diff --git a/arch/xtensa/include/asm/mm-arch-hooks.h b/arch/xtensa/include/asm/mm-arch-hooks.h deleted file mode 100644 index d2e5cfd3dd02..000000000000 --- a/arch/xtensa/include/asm/mm-arch-hooks.h +++ /dev/null @@ -1,15 +0,0 @@ -/* - * Architecture specific mm hooks - * - * Copyright (C) 2015, IBM Corporation - * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#ifndef _ASM_XTENSA_MM_ARCH_HOOKS_H -#define _ASM_XTENSA_MM_ARCH_HOOKS_H - -#endif /* _ASM_XTENSA_MM_ARCH_HOOKS_H */ |