diff options
Diffstat (limited to 'arch')
153 files changed, 1683 insertions, 1240 deletions
diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c index 98838a05ba6d..9d0ac091a52a 100644 --- a/arch/alpha/mm/fault.c +++ b/arch/alpha/mm/fault.c @@ -156,6 +156,8 @@ retry: if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; + else if (fault & VM_FAULT_SIGSEGV) + goto bad_area; else if (fault & VM_FAULT_SIGBUS) goto do_sigbus; BUG(); diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c index 6f7e3a68803a..563cb27e37f5 100644 --- a/arch/arc/mm/fault.c +++ b/arch/arc/mm/fault.c @@ -161,6 +161,8 @@ good_area: if (fault & VM_FAULT_OOM) goto out_of_memory; + else if (fault & VM_FAULT_SIGSEGV) + goto bad_area; else if (fault & VM_FAULT_SIGBUS) goto do_sigbus; diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index 68be9017593d..132c70e2d2f1 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -263,16 +263,37 @@ restart: adr r0, LC0 * OK... Let's do some funky business here. * If we do have a DTB appended to zImage, and we do have * an ATAG list around, we want the later to be translated - * and folded into the former here. To be on the safe side, - * let's temporarily move the stack away into the malloc - * area. No GOT fixup has occurred yet, but none of the - * code we're about to call uses any global variable. + * and folded into the former here. No GOT fixup has occurred + * yet, but none of the code we're about to call uses any + * global variable. */ - add sp, sp, #0x10000 + + /* Get the initial DTB size */ + ldr r5, [r6, #4] +#ifndef __ARMEB__ + /* convert to little endian */ + eor r1, r5, r5, ror #16 + bic r1, r1, #0x00ff0000 + mov r5, r5, ror #8 + eor r5, r5, r1, lsr #8 +#endif + /* 50% DTB growth should be good enough */ + add r5, r5, r5, lsr #1 + /* preserve 64-bit alignment */ + add r5, r5, #7 + bic r5, r5, #7 + /* clamp to 32KB min and 1MB max */ + cmp r5, #(1 << 15) + movlo r5, #(1 << 15) + cmp r5, #(1 << 20) + movhi r5, #(1 << 20) + /* temporarily relocate the stack past the DTB work space */ + add sp, sp, r5 + stmfd sp!, {r0-r3, ip, lr} mov r0, r8 mov r1, r6 - sub r2, sp, r6 + mov r2, r5 bl atags_to_fdt /* @@ -285,11 +306,11 @@ restart: adr r0, LC0 bic r0, r0, #1 add r0, r0, #0x100 mov r1, r6 - sub r2, sp, r6 + mov r2, r5 bleq atags_to_fdt ldmfd sp!, {r0-r3, ip, lr} - sub sp, sp, #0x10000 + sub sp, sp, r5 #endif mov r8, r6 @ use the appended device tree @@ -306,7 +327,7 @@ restart: adr r0, LC0 subs r1, r5, r1 addhi r9, r9, r1 - /* Get the dtb's size */ + /* Get the current DTB size */ ldr r5, [r6, #4] #ifndef __ARMEB__ /* convert r5 (dtb size) to little endian */ diff --git a/arch/arm/boot/dts/exynos4.dtsi b/arch/arm/boot/dts/exynos4.dtsi index b8168f1f8139..24ff27049ce0 100644 --- a/arch/arm/boot/dts/exynos4.dtsi +++ b/arch/arm/boot/dts/exynos4.dtsi @@ -368,7 +368,7 @@ }; i2s1: i2s@13960000 { - compatible = "samsung,s5pv210-i2s"; + compatible = "samsung,s3c6410-i2s"; reg = <0x13960000 0x100>; clocks = <&clock CLK_I2S1>; clock-names = "iis"; @@ -378,7 +378,7 @@ }; i2s2: i2s@13970000 { - compatible = "samsung,s5pv210-i2s"; + compatible = "samsung,s3c6410-i2s"; reg = <0x13970000 0x100>; clocks = <&clock CLK_I2S2>; clock-names = "iis"; diff --git a/arch/arm/boot/dts/sun4i-a10.dtsi b/arch/arm/boot/dts/sun4i-a10.dtsi index 7b4099fcf817..d5c4669224b1 100644 --- a/arch/arm/boot/dts/sun4i-a10.dtsi +++ b/arch/arm/boot/dts/sun4i-a10.dtsi @@ -17,14 +17,6 @@ aliases { ethernet0 = &emac; - serial0 = &uart0; - serial1 = &uart1; - serial2 = &uart2; - serial3 = &uart3; - serial4 = &uart4; - serial5 = &uart5; - serial6 = &uart6; - serial7 = &uart7; }; chosen { @@ -39,6 +31,14 @@ <&ahb_gates 44>; status = "disabled"; }; + + framebuffer@1 { + compatible = "allwinner,simple-framebuffer", "simple-framebuffer"; + allwinner,pipeline = "de_fe0-de_be0-lcd0-hdmi"; + clocks = <&pll5 1>, <&ahb_gates 36>, <&ahb_gates 43>, + <&ahb_gates 44>, <&ahb_gates 46>; + status = "disabled"; + }; }; cpus { @@ -438,8 +438,8 @@ reg-names = "phy_ctrl", "pmu1", "pmu2"; clocks = <&usb_clk 8>; clock-names = "usb_phy"; - resets = <&usb_clk 1>, <&usb_clk 2>; - reset-names = "usb1_reset", "usb2_reset"; + resets = <&usb_clk 0>, <&usb_clk 1>, <&usb_clk 2>; + reset-names = "usb0_reset", "usb1_reset", "usb2_reset"; status = "disabled"; }; diff --git a/arch/arm/boot/dts/sun5i-a10s-olinuxino-micro.dts b/arch/arm/boot/dts/sun5i-a10s-olinuxino-micro.dts index fe3c559ca6a8..bfa742817690 100644 --- a/arch/arm/boot/dts/sun5i-a10s-olinuxino-micro.dts +++ b/arch/arm/boot/dts/sun5i-a10s-olinuxino-micro.dts @@ -55,6 +55,12 @@ model = "Olimex A10s-Olinuxino Micro"; compatible = "olimex,a10s-olinuxino-micro", "allwinner,sun5i-a10s"; + aliases { + serial0 = &uart0; + serial1 = &uart2; + serial2 = &uart3; + }; + soc@01c00000 { emac: ethernet@01c0b000 { pinctrl-names = "default"; diff --git a/arch/arm/boot/dts/sun5i-a10s.dtsi b/arch/arm/boot/dts/sun5i-a10s.dtsi index 1b76667f3182..2e7d8263799d 100644 --- a/arch/arm/boot/dts/sun5i-a10s.dtsi +++ b/arch/arm/boot/dts/sun5i-a10s.dtsi @@ -18,10 +18,6 @@ aliases { ethernet0 = &emac; - serial0 = &uart0; - serial1 = &uart1; - serial2 = &uart2; - serial3 = &uart3; }; chosen { @@ -390,8 +386,8 @@ reg-names = "phy_ctrl", "pmu1"; clocks = <&usb_clk 8>; clock-names = "usb_phy"; - resets = <&usb_clk 1>; - reset-names = "usb1_reset"; + resets = <&usb_clk 0>, <&usb_clk 1>; + reset-names = "usb0_reset", "usb1_reset"; status = "disabled"; }; diff --git a/arch/arm/boot/dts/sun5i-a13-hsg-h702.dts b/arch/arm/boot/dts/sun5i-a13-hsg-h702.dts index eeed1f236ee8..c7be3abd9fcc 100644 --- a/arch/arm/boot/dts/sun5i-a13-hsg-h702.dts +++ b/arch/arm/boot/dts/sun5i-a13-hsg-h702.dts @@ -53,6 +53,10 @@ model = "HSG H702"; compatible = "hsg,h702", "allwinner,sun5i-a13"; + aliases { + serial0 = &uart1; + }; + soc@01c00000 { mmc0: mmc@01c0f000 { pinctrl-names = "default"; diff --git a/arch/arm/boot/dts/sun5i-a13-olinuxino-micro.dts b/arch/arm/boot/dts/sun5i-a13-olinuxino-micro.dts index 916ee8bb826f..3decefb3c37a 100644 --- a/arch/arm/boot/dts/sun5i-a13-olinuxino-micro.dts +++ b/arch/arm/boot/dts/sun5i-a13-olinuxino-micro.dts @@ -54,6 +54,10 @@ model = "Olimex A13-Olinuxino Micro"; compatible = "olimex,a13-olinuxino-micro", "allwinner,sun5i-a13"; + aliases { + serial0 = &uart1; + }; + soc@01c00000 { mmc0: mmc@01c0f000 { pinctrl-names = "default"; diff --git a/arch/arm/boot/dts/sun5i-a13-olinuxino.dts b/arch/arm/boot/dts/sun5i-a13-olinuxino.dts index e31d291d14cb..b421f7fa197b 100644 --- a/arch/arm/boot/dts/sun5i-a13-olinuxino.dts +++ b/arch/arm/boot/dts/sun5i-a13-olinuxino.dts @@ -55,6 +55,10 @@ model = "Olimex A13-Olinuxino"; compatible = "olimex,a13-olinuxino", "allwinner,sun5i-a13"; + aliases { + serial0 = &uart1; + }; + soc@01c00000 { mmc0: mmc@01c0f000 { pinctrl-names = "default"; diff --git a/arch/arm/boot/dts/sun5i-a13.dtsi b/arch/arm/boot/dts/sun5i-a13.dtsi index c35217ea1f64..c556688f8b8b 100644 --- a/arch/arm/boot/dts/sun5i-a13.dtsi +++ b/arch/arm/boot/dts/sun5i-a13.dtsi @@ -16,11 +16,6 @@ / { interrupt-parent = <&intc>; - aliases { - serial0 = &uart1; - serial1 = &uart3; - }; - cpus { #address-cells = <1>; #size-cells = <0>; @@ -349,8 +344,8 @@ reg-names = "phy_ctrl", "pmu1"; clocks = <&usb_clk 8>; clock-names = "usb_phy"; - resets = <&usb_clk 1>; - reset-names = "usb1_reset"; + resets = <&usb_clk 0>, <&usb_clk 1>; + reset-names = "usb0_reset", "usb1_reset"; status = "disabled"; }; diff --git a/arch/arm/boot/dts/sun6i-a31.dtsi b/arch/arm/boot/dts/sun6i-a31.dtsi index f47156b6572b..1e7e7bcf8307 100644 --- a/arch/arm/boot/dts/sun6i-a31.dtsi +++ b/arch/arm/boot/dts/sun6i-a31.dtsi @@ -53,12 +53,6 @@ interrupt-parent = <&gic>; aliases { - serial0 = &uart0; - serial1 = &uart1; - serial2 = &uart2; - serial3 = &uart3; - serial4 = &uart4; - serial5 = &uart5; ethernet0 = &gmac; }; diff --git a/arch/arm/boot/dts/sun7i-a20-bananapi.dts b/arch/arm/boot/dts/sun7i-a20-bananapi.dts index 1cf1214cc068..bd7b15add697 100644 --- a/arch/arm/boot/dts/sun7i-a20-bananapi.dts +++ b/arch/arm/boot/dts/sun7i-a20-bananapi.dts @@ -55,6 +55,12 @@ model = "LeMaker Banana Pi"; compatible = "lemaker,bananapi", "allwinner,sun7i-a20"; + aliases { + serial0 = &uart0; + serial1 = &uart3; + serial2 = &uart7; + }; + soc@01c00000 { spi0: spi@01c05000 { pinctrl-names = "default"; diff --git a/arch/arm/boot/dts/sun7i-a20-hummingbird.dts b/arch/arm/boot/dts/sun7i-a20-hummingbird.dts index 0e4bfa3b2b85..0bcefcbbb756 100644 --- a/arch/arm/boot/dts/sun7i-a20-hummingbird.dts +++ b/arch/arm/boot/dts/sun7i-a20-hummingbird.dts @@ -19,6 +19,14 @@ model = "Merrii A20 Hummingbird"; compatible = "merrii,a20-hummingbird", "allwinner,sun7i-a20"; + aliases { + serial0 = &uart0; + serial1 = &uart2; + serial2 = &uart3; + serial3 = &uart4; + serial4 = &uart5; + }; + soc@01c00000 { mmc0: mmc@01c0f000 { pinctrl-names = "default"; diff --git a/arch/arm/boot/dts/sun7i-a20-olinuxino-micro.dts b/arch/arm/boot/dts/sun7i-a20-olinuxino-micro.dts index 9d669cdf031d..66cc77707198 100644 --- a/arch/arm/boot/dts/sun7i-a20-olinuxino-micro.dts +++ b/arch/arm/boot/dts/sun7i-a20-olinuxino-micro.dts @@ -20,6 +20,9 @@ compatible = "olimex,a20-olinuxino-micro", "allwinner,sun7i-a20"; aliases { + serial0 = &uart0; + serial1 = &uart6; + serial2 = &uart7; spi0 = &spi1; spi1 = &spi2; }; diff --git a/arch/arm/boot/dts/sun7i-a20.dtsi b/arch/arm/boot/dts/sun7i-a20.dtsi index e21ce5992d56..89749ce34a84 100644 --- a/arch/arm/boot/dts/sun7i-a20.dtsi +++ b/arch/arm/boot/dts/sun7i-a20.dtsi @@ -54,14 +54,6 @@ aliases { ethernet0 = &gmac; - serial0 = &uart0; - serial1 = &uart1; - serial2 = &uart2; - serial3 = &uart3; - serial4 = &uart4; - serial5 = &uart5; - serial6 = &uart6; - serial7 = &uart7; }; chosen { diff --git a/arch/arm/boot/dts/sun8i-a23-ippo-q8h-v5.dts b/arch/arm/boot/dts/sun8i-a23-ippo-q8h-v5.dts index 7f2117ce6985..32ad80804dbb 100644 --- a/arch/arm/boot/dts/sun8i-a23-ippo-q8h-v5.dts +++ b/arch/arm/boot/dts/sun8i-a23-ippo-q8h-v5.dts @@ -55,6 +55,10 @@ model = "Ippo Q8H Dual Core Tablet (v5)"; compatible = "ippo,q8h-v5", "allwinner,sun8i-a23"; + aliases { + serial0 = &r_uart; + }; + chosen { bootargs = "earlyprintk console=ttyS0,115200"; }; diff --git a/arch/arm/boot/dts/sun8i-a23.dtsi b/arch/arm/boot/dts/sun8i-a23.dtsi index 0746cd1024d7..86584fcf5e32 100644 --- a/arch/arm/boot/dts/sun8i-a23.dtsi +++ b/arch/arm/boot/dts/sun8i-a23.dtsi @@ -52,15 +52,6 @@ / { interrupt-parent = <&gic>; - aliases { - serial0 = &uart0; - serial1 = &uart1; - serial2 = &uart2; - serial3 = &uart3; - serial4 = &uart4; - serial5 = &r_uart; - }; - cpus { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm/boot/dts/sun9i-a80-optimus.dts b/arch/arm/boot/dts/sun9i-a80-optimus.dts index 506948f582ee..11ec71072e81 100644 --- a/arch/arm/boot/dts/sun9i-a80-optimus.dts +++ b/arch/arm/boot/dts/sun9i-a80-optimus.dts @@ -54,6 +54,11 @@ model = "Merrii A80 Optimus Board"; compatible = "merrii,a80-optimus", "allwinner,sun9i-a80"; + aliases { + serial0 = &uart0; + serial1 = &uart4; + }; + chosen { bootargs = "earlyprintk console=ttyS0,115200"; }; diff --git a/arch/arm/boot/dts/sun9i-a80.dtsi b/arch/arm/boot/dts/sun9i-a80.dtsi index 494714f67b57..9ef4438206a9 100644 --- a/arch/arm/boot/dts/sun9i-a80.dtsi +++ b/arch/arm/boot/dts/sun9i-a80.dtsi @@ -52,16 +52,6 @@ / { interrupt-parent = <&gic>; - aliases { - serial0 = &uart0; - serial1 = &uart1; - serial2 = &uart2; - serial3 = &uart3; - serial4 = &uart4; - serial5 = &uart5; - serial6 = &r_uart; - }; - cpus { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index 66ce17655bb9..7b0152321b20 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h @@ -38,6 +38,16 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu) vcpu->arch.hcr = HCR_GUEST_MASK; } +static inline unsigned long vcpu_get_hcr(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.hcr; +} + +static inline void vcpu_set_hcr(struct kvm_vcpu *vcpu, unsigned long hcr) +{ + vcpu->arch.hcr = hcr; +} + static inline bool vcpu_mode_is_32bit(struct kvm_vcpu *vcpu) { return 1; diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 254e0650e48b..04b4ea0b550a 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -125,9 +125,6 @@ struct kvm_vcpu_arch { * Anything that is not used directly from assembly code goes * here. */ - /* dcache set/way operation pending */ - int last_pcpu; - cpumask_t require_dcache_flush; /* Don't run the guest on this vcpu */ bool pause; diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 63e0ecc04901..1bca8f8af442 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -44,6 +44,7 @@ #ifndef __ASSEMBLY__ +#include <linux/highmem.h> #include <asm/cacheflush.h> #include <asm/pgalloc.h> @@ -161,13 +162,10 @@ static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu) return (vcpu->arch.cp15[c1_SCTLR] & 0b101) == 0b101; } -static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva, - unsigned long size, - bool ipa_uncached) +static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu, pfn_t pfn, + unsigned long size, + bool ipa_uncached) { - if (!vcpu_has_cache_enabled(vcpu) || ipa_uncached) - kvm_flush_dcache_to_poc((void *)hva, size); - /* * If we are going to insert an instruction page and the icache is * either VIPT or PIPT, there is a potential problem where the host @@ -179,18 +177,77 @@ static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva, * * VIVT caches are tagged using both the ASID and the VMID and doesn't * need any kind of flushing (DDI 0406C.b - Page B3-1392). + * + * We need to do this through a kernel mapping (using the + * user-space mapping has proved to be the wrong + * solution). For that, we need to kmap one page at a time, + * and iterate over the range. */ - if (icache_is_pipt()) { - __cpuc_coherent_user_range(hva, hva + size); - } else if (!icache_is_vivt_asid_tagged()) { + + bool need_flush = !vcpu_has_cache_enabled(vcpu) || ipa_uncached; + + VM_BUG_ON(size & PAGE_MASK); + + if (!need_flush && !icache_is_pipt()) + goto vipt_cache; + + while (size) { + void *va = kmap_atomic_pfn(pfn); + + if (need_flush) + kvm_flush_dcache_to_poc(va, PAGE_SIZE); + + if (icache_is_pipt()) + __cpuc_coherent_user_range((unsigned long)va, + (unsigned long)va + PAGE_SIZE); + + size -= PAGE_SIZE; + pfn++; + + kunmap_atomic(va); + } + +vipt_cache: + if (!icache_is_pipt() && !icache_is_vivt_asid_tagged()) { /* any kind of VIPT cache */ __flush_icache_all(); } } +static inline void __kvm_flush_dcache_pte(pte_t pte) +{ + void *va = kmap_atomic(pte_page(pte)); + + kvm_flush_dcache_to_poc(va, PAGE_SIZE); + + kunmap_atomic(va); +} + +static inline void __kvm_flush_dcache_pmd(pmd_t pmd) +{ + unsigned long size = PMD_SIZE; + pfn_t pfn = pmd_pfn(pmd); + + while (size) { + void *va = kmap_atomic_pfn(pfn); + + kvm_flush_dcache_to_poc(va, PAGE_SIZE); + + pfn++; + size -= PAGE_SIZE; + + kunmap_atomic(va); + } +} + +static inline void __kvm_flush_dcache_pud(pud_t pud) +{ +} + #define kvm_virt_to_phys(x) virt_to_idmap((unsigned long)(x)) -void stage2_flush_vm(struct kvm *kvm); +void kvm_set_way_flush(struct kvm_vcpu *vcpu); +void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled); #endif /* !__ASSEMBLY__ */ diff --git a/arch/arm/kernel/entry-v7m.S b/arch/arm/kernel/entry-v7m.S index 2260f1855820..8944f4991c3c 100644 --- a/arch/arm/kernel/entry-v7m.S +++ b/arch/arm/kernel/entry-v7m.S @@ -22,10 +22,12 @@ __invalid_entry: v7m_exception_entry +#ifdef CONFIG_PRINTK adr r0, strerr mrs r1, ipsr mov r2, lr bl printk +#endif mov r0, sp bl show_regs 1: b 1b diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig index 466bd299b1a8..3afee5f40f4f 100644 --- a/arch/arm/kvm/Kconfig +++ b/arch/arm/kvm/Kconfig @@ -23,6 +23,7 @@ config KVM select HAVE_KVM_CPU_RELAX_INTERCEPT select KVM_MMIO select KVM_ARM_HOST + select SRCU depends on ARM_VIRT_EXT && ARM_LPAE ---help--- Support hosting virtualized guest machines. You will also diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 2d6d91001062..0b0d58a905c4 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -281,15 +281,6 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) vcpu->cpu = cpu; vcpu->arch.host_cpu_context = this_cpu_ptr(kvm_host_cpu_state); - /* - * Check whether this vcpu requires the cache to be flushed on - * this physical CPU. This is a consequence of doing dcache - * operations by set/way on this vcpu. We do it here to be in - * a non-preemptible section. - */ - if (cpumask_test_and_clear_cpu(cpu, &vcpu->arch.require_dcache_flush)) - flush_cache_all(); /* We'd really want v7_flush_dcache_all() */ - kvm_arm_set_running_vcpu(vcpu); } @@ -541,7 +532,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) ret = kvm_call_hyp(__kvm_vcpu_run, vcpu); vcpu->mode = OUTSIDE_GUEST_MODE; - vcpu->arch.last_pcpu = smp_processor_id(); kvm_guest_exit(); trace_kvm_exit(*vcpu_pc(vcpu)); /* diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c index 7928dbdf2102..f3d88dc388bc 100644 --- a/arch/arm/kvm/coproc.c +++ b/arch/arm/kvm/coproc.c @@ -189,82 +189,40 @@ static bool access_l2ectlr(struct kvm_vcpu *vcpu, return true; } -/* See note at ARM ARM B1.14.4 */ +/* + * See note at ARMv7 ARM B1.14.4 (TL;DR: S/W ops are not easily virtualized). + */ static bool access_dcsw(struct kvm_vcpu *vcpu, const struct coproc_params *p, const struct coproc_reg *r) { - unsigned long val; - int cpu; - if (!p->is_write) return read_from_write_only(vcpu, p); - cpu = get_cpu(); - - cpumask_setall(&vcpu->arch.require_dcache_flush); - cpumask_clear_cpu(cpu, &vcpu->arch.require_dcache_flush); - - /* If we were already preempted, take the long way around */ - if (cpu != vcpu->arch.last_pcpu) { - flush_cache_all(); - goto done; - } - - val = *vcpu_reg(vcpu, p->Rt1); - - switch (p->CRm) { - case 6: /* Upgrade DCISW to DCCISW, as per HCR.SWIO */ - case 14: /* DCCISW */ - asm volatile("mcr p15, 0, %0, c7, c14, 2" : : "r" (val)); - break; - - case 10: /* DCCSW */ - asm volatile("mcr p15, 0, %0, c7, c10, 2" : : "r" (val)); - break; - } - -done: - put_cpu(); - + kvm_set_way_flush(vcpu); return true; } /* * Generic accessor for VM registers. Only called as long as HCR_TVM - * is set. + * is set. If the guest enables the MMU, we stop trapping the VM + * sys_regs and leave it in complete control of the caches. + * + * Used by the cpu-specific code. */ -static bool access_vm_reg(struct kvm_vcpu *vcpu, - const struct coproc_params *p, - const struct coproc_reg *r) +bool access_vm_reg(struct kvm_vcpu *vcpu, + const struct coproc_params *p, + const struct coproc_reg *r) { + bool was_enabled = vcpu_has_cache_enabled(vcpu); + BUG_ON(!p->is_write); vcpu->arch.cp15[r->reg] = *vcpu_reg(vcpu, p->Rt1); if (p->is_64bit) vcpu->arch.cp15[r->reg + 1] = *vcpu_reg(vcpu, p->Rt2); - return true; -} - -/* - * SCTLR accessor. Only called as long as HCR_TVM is set. If the - * guest enables the MMU, we stop trapping the VM sys_regs and leave - * it in complete control of the caches. - * - * Used by the cpu-specific code. - */ -bool access_sctlr(struct kvm_vcpu *vcpu, - const struct coproc_params *p, - const struct coproc_reg *r) -{ - access_vm_reg(vcpu, p, r); - - if (vcpu_has_cache_enabled(vcpu)) { /* MMU+Caches enabled? */ - vcpu->arch.hcr &= ~HCR_TVM; - stage2_flush_vm(vcpu->kvm); - } - + kvm_toggle_cache(vcpu, was_enabled); return true; } diff --git a/arch/arm/kvm/coproc.h b/arch/arm/kvm/coproc.h index 1a44bbe39643..88d24a3a9778 100644 --- a/arch/arm/kvm/coproc.h +++ b/arch/arm/kvm/coproc.h @@ -153,8 +153,8 @@ static inline int cmp_reg(const struct coproc_reg *i1, #define is64 .is_64 = true #define is32 .is_64 = false -bool access_sctlr(struct kvm_vcpu *vcpu, - const struct coproc_params *p, - const struct coproc_reg *r); +bool access_vm_reg(struct kvm_vcpu *vcpu, + const struct coproc_params *p, + const struct coproc_reg *r); #endif /* __ARM_KVM_COPROC_LOCAL_H__ */ diff --git a/arch/arm/kvm/coproc_a15.c b/arch/arm/kvm/coproc_a15.c index e6f4ae48bda9..a7136757d373 100644 --- a/arch/arm/kvm/coproc_a15.c +++ b/arch/arm/kvm/coproc_a15.c @@ -34,7 +34,7 @@ static const struct coproc_reg a15_regs[] = { /* SCTLR: swapped by interrupt.S. */ { CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32, - access_sctlr, reset_val, c1_SCTLR, 0x00C50078 }, + access_vm_reg, reset_val, c1_SCTLR, 0x00C50078 }, }; static struct kvm_coproc_target_table a15_target_table = { diff --git a/arch/arm/kvm/coproc_a7.c b/arch/arm/kvm/coproc_a7.c index 17fc7cd479d3..b19e46d1b2c0 100644 --- a/arch/arm/kvm/coproc_a7.c +++ b/arch/arm/kvm/coproc_a7.c @@ -37,7 +37,7 @@ static const struct coproc_reg a7_regs[] = { /* SCTLR: swapped by interrupt.S. */ { CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32, - access_sctlr, reset_val, c1_SCTLR, 0x00C50878 }, + access_vm_reg, reset_val, c1_SCTLR, 0x00C50878 }, }; static struct kvm_coproc_target_table a7_target_table = { diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 1dc9778a00af..136662547ca6 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -58,6 +58,26 @@ static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa); } +/* + * D-Cache management functions. They take the page table entries by + * value, as they are flushing the cache using the kernel mapping (or + * kmap on 32bit). + */ +static void kvm_flush_dcache_pte(pte_t pte) +{ + __kvm_flush_dcache_pte(pte); +} + +static void kvm_flush_dcache_pmd(pmd_t pmd) +{ + __kvm_flush_dcache_pmd(pmd); +} + +static void kvm_flush_dcache_pud(pud_t pud) +{ + __kvm_flush_dcache_pud(pud); +} + static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, int min, int max) { @@ -119,6 +139,26 @@ static void clear_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr) put_page(virt_to_page(pmd)); } +/* + * Unmapping vs dcache management: + * + * If a guest maps certain memory pages as uncached, all writes will + * bypass the data cache and go directly to RAM. However, the CPUs + * can still speculate reads (not writes) and fill cache lines with + * data. + * + * Those cache lines will be *clean* cache lines though, so a + * clean+invalidate operation is equivalent to an invalidate + * operation, because no cache lines are marked dirty. + * + * Those clean cache lines could be filled prior to an uncached write + * by the guest, and the cache coherent IO subsystem would therefore + * end up writing old data to disk. + * + * This is why right after unmapping a page/section and invalidating + * the corresponding TLBs, we call kvm_flush_dcache_p*() to make sure + * the IO subsystem will never hit in the cache. + */ static void unmap_ptes(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr, phys_addr_t end) { @@ -128,9 +168,16 @@ static void unmap_ptes(struct kvm *kvm, pmd_t *pmd, start_pte = pte = pte_offset_kernel(pmd, addr); do { if (!pte_none(*pte)) { + pte_t old_pte = *pte; + kvm_set_pte(pte, __pte(0)); - put_page(virt_to_page(pte)); kvm_tlb_flush_vmid_ipa(kvm, addr); + + /* No need to invalidate the cache for device mappings */ + if ((pte_val(old_pte) & PAGE_S2_DEVICE) != PAGE_S2_DEVICE) + kvm_flush_dcache_pte(old_pte); + + put_page(virt_to_page(pte)); } } while (pte++, addr += PAGE_SIZE, addr != end); @@ -149,8 +196,13 @@ static void unmap_pmds(struct kvm *kvm, pud_t *pud, next = kvm_pmd_addr_end(addr, end); if (!pmd_none(*pmd)) { if (kvm_pmd_huge(*pmd)) { + pmd_t old_pmd = *pmd; + pmd_clear(pmd); kvm_tlb_flush_vmid_ipa(kvm, addr); + + kvm_flush_dcache_pmd(old_pmd); + put_page(virt_to_page(pmd)); } else { unmap_ptes(kvm, pmd, addr, next); @@ -173,8 +225,13 @@ static void unmap_puds(struct kvm *kvm, pgd_t *pgd, next = kvm_pud_addr_end(addr, end); if (!pud_none(*pud)) { if (pud_huge(*pud)) { + pud_t old_pud = *pud; + pud_clear(pud); kvm_tlb_flush_vmid_ipa(kvm, addr); + + kvm_flush_dcache_pud(old_pud); + put_page(virt_to_page(pud)); } else { unmap_pmds(kvm, pud, addr, next); @@ -209,10 +266,9 @@ static void stage2_flush_ptes(struct kvm *kvm, pmd_t *pmd, pte = pte_offset_kernel(pmd, addr); do { - if (!pte_none(*pte)) { - hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT); - kvm_flush_dcache_to_poc((void*)hva, PAGE_SIZE); - } + if (!pte_none(*pte) && + (pte_val(*pte) & PAGE_S2_DEVICE) != PAGE_S2_DEVICE) + kvm_flush_dcache_pte(*pte); } while (pte++, addr += PAGE_SIZE, addr != end); } @@ -226,12 +282,10 @@ static void stage2_flush_pmds(struct kvm *kvm, pud_t *pud, do { next = kvm_pmd_addr_end(addr, end); if (!pmd_none(*pmd)) { - if (kvm_pmd_huge(*pmd)) { - hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT); - kvm_flush_dcache_to_poc((void*)hva, PMD_SIZE); - } else { + if (kvm_pmd_huge(*pmd)) + kvm_flush_dcache_pmd(*pmd); + else stage2_flush_ptes(kvm, pmd, addr, next); - } } } while (pmd++, addr = next, addr != end); } @@ -246,12 +300,10 @@ static void stage2_flush_puds(struct kvm *kvm, pgd_t *pgd, do { next = kvm_pud_addr_end(addr, end); if (!pud_none(*pud)) { - if (pud_huge(*pud)) { - hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT); - kvm_flush_dcache_to_poc((void*)hva, PUD_SIZE); - } else { + if (pud_huge(*pud)) + kvm_flush_dcache_pud(*pud); + else stage2_flush_pmds(kvm, pud, addr, next); - } } } while (pud++, addr = next, addr != end); } @@ -278,7 +330,7 @@ static void stage2_flush_memslot(struct kvm *kvm, * Go through the stage 2 page tables and invalidate any cache lines * backing memory already mapped to the VM. */ -void stage2_flush_vm(struct kvm *kvm) +static void stage2_flush_vm(struct kvm *kvm) { struct kvm_memslots *slots; struct kvm_memory_slot *memslot; @@ -905,6 +957,12 @@ static bool kvm_is_device_pfn(unsigned long pfn) return !pfn_valid(pfn); } +static void coherent_cache_guest_page(struct kvm_vcpu *vcpu, pfn_t pfn, + unsigned long size, bool uncached) +{ + __coherent_cache_guest_page(vcpu, pfn, size, uncached); +} + static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, struct kvm_memory_slot *memslot, unsigned long hva, unsigned long fault_status) @@ -994,8 +1052,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, kvm_set_s2pmd_writable(&new_pmd); kvm_set_pfn_dirty(pfn); } - coherent_cache_guest_page(vcpu, hva & PMD_MASK, PMD_SIZE, - fault_ipa_uncached); + coherent_cache_guest_page(vcpu, pfn, PMD_SIZE, fault_ipa_uncached); ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd); } else { pte_t new_pte = pfn_pte(pfn, mem_type); @@ -1003,8 +1060,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, kvm_set_s2pte_writable(&new_pte); kvm_set_pfn_dirty(pfn); } - coherent_cache_guest_page(vcpu, hva, PAGE_SIZE, - fault_ipa_uncached); + coherent_cache_guest_page(vcpu, pfn, PAGE_SIZE, fault_ipa_uncached); ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, pgprot_val(mem_type) == pgprot_val(PAGE_S2_DEVICE)); } @@ -1411,3 +1467,71 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm, unmap_stage2_range(kvm, gpa, size); spin_unlock(&kvm->mmu_lock); } + +/* + * See note at ARMv7 ARM B1.14.4 (TL;DR: S/W ops are not easily virtualized). + * + * Main problems: + * - S/W ops are local to a CPU (not broadcast) + * - We have line migration behind our back (speculation) + * - System caches don't support S/W at all (damn!) + * + * In the face of the above, the best we can do is to try and convert + * S/W ops to VA ops. Because the guest is not allowed to infer the + * S/W to PA mapping, it can only use S/W to nuke the whole cache, + * which is a rather good thing for us. + * + * Also, it is only used when turning caches on/off ("The expected + * usage of the cache maintenance instructions that operate by set/way + * is associated with the cache maintenance instructions associated + * with the powerdown and powerup of caches, if this is required by + * the implementation."). + * + * We use the following policy: + * + * - If we trap a S/W operation, we enable VM trapping to detect + * caches being turned on/off, and do a full clean. + * + * - We flush the caches on both caches being turned on and off. + * + * - Once the caches are enabled, we stop trapping VM ops. + */ +void kvm_set_way_flush(struct kvm_vcpu *vcpu) +{ + unsigned long hcr = vcpu_get_hcr(vcpu); + + /* + * If this is the first time we do a S/W operation + * (i.e. HCR_TVM not set) flush the whole memory, and set the + * VM trapping. + * + * Otherwise, rely on the VM trapping to wait for the MMU + + * Caches to be turned off. At that point, we'll be able to + * clean the caches again. + */ + if (!(hcr & HCR_TVM)) { + trace_kvm_set_way_flush(*vcpu_pc(vcpu), + vcpu_has_cache_enabled(vcpu)); + stage2_flush_vm(vcpu->kvm); + vcpu_set_hcr(vcpu, hcr | HCR_TVM); + } +} + +void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled) +{ + bool now_enabled = vcpu_has_cache_enabled(vcpu); + + /* + * If switching the MMU+caches on, need to invalidate the caches. + * If switching it off, need to clean the caches. + * Clean + invalidate does the trick always. + */ + if (now_enabled != was_enabled) + stage2_flush_vm(vcpu->kvm); + + /* Caches are now on, stop trapping VM ops (until a S/W op) */ + if (now_enabled) + vcpu_set_hcr(vcpu, vcpu_get_hcr(vcpu) & ~HCR_TVM); + + trace_kvm_toggle_cache(*vcpu_pc(vcpu), was_enabled, now_enabled); +} diff --git a/arch/arm/kvm/trace.h b/arch/arm/kvm/trace.h index b1d640f78623..b6a6e7102201 100644 --- a/arch/arm/kvm/trace.h +++ b/arch/arm/kvm/trace.h @@ -223,6 +223,45 @@ TRACE_EVENT(kvm_hvc, __entry->vcpu_pc, __entry->r0, __entry->imm) ); +TRACE_EVENT(kvm_set_way_flush, + TP_PROTO(unsigned long vcpu_pc, bool cache), + TP_ARGS(vcpu_pc, cache), + + TP_STRUCT__entry( + __field( unsigned long, vcpu_pc ) + __field( bool, cache ) + ), + + TP_fast_assign( + __entry->vcpu_pc = vcpu_pc; + __entry->cache = cache; + ), + + TP_printk("S/W flush at 0x%016lx (cache %s)", + __entry->vcpu_pc, __entry->cache ? "on" : "off") +); + +TRACE_EVENT(kvm_toggle_cache, + TP_PROTO(unsigned long vcpu_pc, bool was, bool now), + TP_ARGS(vcpu_pc, was, now), + + TP_STRUCT__entry( + __field( unsigned long, vcpu_pc ) + __field( bool, was ) + __field( bool, now ) + ), + + TP_fast_assign( + __entry->vcpu_pc = vcpu_pc; + __entry->was = was; + __entry->now = now; + ), + + TP_printk("VM op at 0x%016lx (cache was %s, now %s)", + __entry->vcpu_pc, __entry->was ? "on" : "off", + __entry->now ? "on" : "off") +); + #endif /* _TRACE_KVM_H */ #undef TRACE_INCLUDE_PATH diff --git a/arch/arm/mach-mvebu/coherency.c b/arch/arm/mach-mvebu/coherency.c index caa21e9b8cd9..ccef8806bb58 100644 --- a/arch/arm/mach-mvebu/coherency.c +++ b/arch/arm/mach-mvebu/coherency.c @@ -190,6 +190,13 @@ static void __init armada_375_380_coherency_init(struct device_node *np) arch_ioremap_caller = armada_pcie_wa_ioremap_caller; /* + * We should switch the PL310 to I/O coherency mode only if + * I/O coherency is actually enabled. + */ + if (!coherency_available()) + return; + + /* * Add the PL310 property "arm,io-coherent". This makes sure the * outer sync operation is not used, which allows to * workaround the system erratum that causes deadlocks when diff --git a/arch/arm/mach-shmobile/board-ape6evm.c b/arch/arm/mach-shmobile/board-ape6evm.c index 66f67816a844..444f22d370f0 100644 --- a/arch/arm/mach-shmobile/board-ape6evm.c +++ b/arch/arm/mach-shmobile/board-ape6evm.c @@ -18,6 +18,8 @@ #include <linux/gpio_keys.h> #include <linux/input.h> #include <linux/interrupt.h> +#include <linux/irqchip.h> +#include <linux/irqchip/arm-gic.h> #include <linux/kernel.h> #include <linux/mfd/tmio.h> #include <linux/mmc/host.h> @@ -273,6 +275,22 @@ static void __init ape6evm_add_standard_devices(void) sizeof(ape6evm_leds_pdata)); } +static void __init ape6evm_legacy_init_time(void) +{ + /* Do not invoke DT-based timers via clocksource_of_init() */ +} + +static void __init ape6evm_legacy_init_irq(void) +{ + void __iomem *gic_dist_base = ioremap_nocache(0xf1001000, 0x1000); + void __iomem *gic_cpu_base = ioremap_nocache(0xf1002000, 0x1000); + + gic_init(0, 29, gic_dist_base, gic_cpu_base); + + /* Do not invoke DT-based interrupt code via irqchip_init() */ +} + + static const char *ape6evm_boards_compat_dt[] __initdata = { "renesas,ape6evm", NULL, @@ -280,7 +298,9 @@ static const char *ape6evm_boards_compat_dt[] __initdata = { DT_MACHINE_START(APE6EVM_DT, "ape6evm") .init_early = shmobile_init_delay, + .init_irq = ape6evm_legacy_init_irq, .init_machine = ape6evm_add_standard_devices, .init_late = shmobile_init_late, .dt_compat = ape6evm_boards_compat_dt, + .init_time = ape6evm_legacy_init_time, MACHINE_END diff --git a/arch/arm/mach-shmobile/board-lager.c b/arch/arm/mach-shmobile/board-lager.c index f8197eb6e566..65b128dd4072 100644 --- a/arch/arm/mach-shmobile/board-lager.c +++ b/arch/arm/mach-shmobile/board-lager.c @@ -21,6 +21,8 @@ #include <linux/input.h> #include <linux/interrupt.h> #include <linux/irq.h> +#include <linux/irqchip.h> +#include <linux/irqchip/arm-gic.h> #include <linux/kernel.h> #include <linux/leds.h> #include <linux/mfd/tmio.h> @@ -811,6 +813,16 @@ static void __init lager_init(void) lager_ksz8041_fixup); } +static void __init lager_legacy_init_irq(void) +{ + void __iomem *gic_dist_base = ioremap_nocache(0xf1001000, 0x1000); + void __iomem *gic_cpu_base = ioremap_nocache(0xf1002000, 0x1000); + + gic_init(0, 29, gic_dist_base, gic_cpu_base); + + /* Do not invoke DT-based interrupt code via irqchip_init() */ +} + static const char * const lager_boards_compat_dt[] __initconst = { "renesas,lager", NULL, @@ -819,6 +831,7 @@ static const char * const lager_boards_compat_dt[] __initconst = { DT_MACHINE_START(LAGER_DT, "lager") .smp = smp_ops(r8a7790_smp_ops), .init_early = shmobile_init_delay, + .init_irq = lager_legacy_init_irq, .init_time = rcar_gen2_timer_init, .init_machine = lager_init, .init_late = shmobile_init_late, diff --git a/arch/arm/mach-shmobile/setup-rcar-gen2.c b/arch/arm/mach-shmobile/setup-rcar-gen2.c index 3dd6edd9bd1d..cc9470dfb1ce 100644 --- a/arch/arm/mach-shmobile/setup-rcar-gen2.c +++ b/arch/arm/mach-shmobile/setup-rcar-gen2.c @@ -133,7 +133,9 @@ void __init rcar_gen2_timer_init(void) #ifdef CONFIG_COMMON_CLK rcar_gen2_clocks_init(mode); #endif +#ifdef CONFIG_ARCH_SHMOBILE_MULTI clocksource_of_init(); +#endif } struct memory_reserve_config { diff --git a/arch/arm/mach-shmobile/timer.c b/arch/arm/mach-shmobile/timer.c index f1d027aa7a81..0edf2a6d2bbe 100644 --- a/arch/arm/mach-shmobile/timer.c +++ b/arch/arm/mach-shmobile/timer.c @@ -70,6 +70,18 @@ void __init shmobile_init_delay(void) if (!max_freq) return; +#ifdef CONFIG_ARCH_SHMOBILE_LEGACY + /* Non-multiplatform r8a73a4 SoC cannot use arch timer due + * to GIC being initialized from C and arch timer via DT */ + if (of_machine_is_compatible("renesas,r8a73a4")) + has_arch_timer = false; + + /* Non-multiplatform r8a7790 SoC cannot use arch timer due + * to GIC being initialized from C and arch timer via DT */ + if (of_machine_is_compatible("renesas,r8a7790")) + has_arch_timer = false; +#endif + if (!has_arch_timer || !IS_ENABLED(CONFIG_ARM_ARCH_TIMER)) { if (is_a7_a8_a9) shmobile_setup_delay_hz(max_freq, 1, 3); diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index 03823e784f63..c43c71455566 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -1012,6 +1012,7 @@ config ARCH_SUPPORTS_BIG_ENDIAN config ARM_KERNMEM_PERMS bool "Restrict kernel memory permissions" + depends on MMU help If this is set, kernel memory other than kernel text (and rodata) will be made non-executable. The tradeoff is that each region is diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c index 91892569710f..845769e41332 100644 --- a/arch/arm/mm/context.c +++ b/arch/arm/mm/context.c @@ -144,21 +144,17 @@ static void flush_context(unsigned int cpu) /* Update the list of reserved ASIDs and the ASID bitmap. */ bitmap_clear(asid_map, 0, NUM_USER_ASIDS); for_each_possible_cpu(i) { - if (i == cpu) { - asid = 0; - } else { - asid = atomic64_xchg(&per_cpu(active_asids, i), 0); - /* - * If this CPU has already been through a - * rollover, but hasn't run another task in - * the meantime, we must preserve its reserved - * ASID, as this is the only trace we have of - * the process it is still running. - */ - if (asid == 0) - asid = per_cpu(reserved_asids, i); - __set_bit(asid & ~ASID_MASK, asid_map); - } + asid = atomic64_xchg(&per_cpu(active_asids, i), 0); + /* + * If this CPU has already been through a + * rollover, but hasn't run another task in + * the meantime, we must preserve its reserved + * ASID, as this is the only trace we have of + * the process it is still running. + */ + if (asid == 0) + asid = per_cpu(reserved_asids, i); + __set_bit(asid & ~ASID_MASK, asid_map); per_cpu(reserved_asids, i) = asid; } diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 7864797609b3..903dba064a03 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -1940,13 +1940,32 @@ void arm_iommu_release_mapping(struct dma_iommu_mapping *mapping) } EXPORT_SYMBOL_GPL(arm_iommu_release_mapping); +static int __arm_iommu_attach_device(struct device *dev, + struct dma_iommu_mapping *mapping) +{ + int err; + + err = iommu_attach_device(mapping->domain, dev); + if (err) + return err; + + kref_get(&mapping->kref); + dev->archdata.mapping = mapping; + + pr_debug("Attached IOMMU controller to %s device.\n", dev_name(dev)); + return 0; +} + /** * arm_iommu_attach_device * @dev: valid struct device pointer * @mapping: io address space mapping structure (returned from * arm_iommu_create_mapping) * - * Attaches specified io address space mapping to the provided device, + * Attaches specified io address space mapping to the provided device. + * This replaces the dma operations (dma_map_ops pointer) with the + * IOMMU aware version. + * * More than one client might be attached to the same io address space * mapping. */ @@ -1955,25 +1974,16 @@ int arm_iommu_attach_device(struct device *dev, { int err; - err = iommu_attach_device(mapping->domain, dev); + err = __arm_iommu_attach_device(dev, mapping); if (err) return err; - kref_get(&mapping->kref); - dev->archdata.mapping = mapping; - - pr_debug("Attached IOMMU controller to %s device.\n", dev_name(dev)); + set_dma_ops(dev, &iommu_ops); return 0; } EXPORT_SYMBOL_GPL(arm_iommu_attach_device); -/** - * arm_iommu_detach_device - * @dev: valid struct device pointer - * - * Detaches the provided device from a previously attached map. - */ -void arm_iommu_detach_device(struct device *dev) +static void __arm_iommu_detach_device(struct device *dev) { struct dma_iommu_mapping *mapping; @@ -1989,6 +1999,19 @@ void arm_iommu_detach_device(struct device *dev) pr_debug("Detached IOMMU controller from %s device.\n", dev_name(dev)); } + +/** + * arm_iommu_detach_device + * @dev: valid struct device pointer + * + * Detaches the provided device from a previously attached map. + * This voids the dma operations (dma_map_ops pointer) + */ +void arm_iommu_detach_device(struct device *dev) +{ + __arm_iommu_detach_device(dev); + set_dma_ops(dev, NULL); +} EXPORT_SYMBOL_GPL(arm_iommu_detach_device); static struct dma_map_ops *arm_get_iommu_dma_map_ops(bool coherent) @@ -2011,7 +2034,7 @@ static bool arm_setup_iommu_dma_ops(struct device *dev, u64 dma_base, u64 size, return false; } - if (arm_iommu_attach_device(dev, mapping)) { + if (__arm_iommu_attach_device(dev, mapping)) { pr_warn("Failed to attached device %s to IOMMU_mapping\n", dev_name(dev)); arm_iommu_release_mapping(mapping); @@ -2025,7 +2048,10 @@ static void arm_teardown_iommu_dma_ops(struct device *dev) { struct dma_iommu_mapping *mapping = dev->archdata.mapping; - arm_iommu_detach_device(dev); + if (!mapping) + return; + + __arm_iommu_detach_device(dev); arm_iommu_release_mapping(mapping); } diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 865a7e28ea2d..3cb4c856b10d 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -45,6 +45,16 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu) vcpu->arch.hcr_el2 &= ~HCR_RW; } +static inline unsigned long vcpu_get_hcr(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.hcr_el2; +} + +static inline void vcpu_set_hcr(struct kvm_vcpu *vcpu, unsigned long hcr) +{ + vcpu->arch.hcr_el2 = hcr; +} + static inline unsigned long *vcpu_pc(const struct kvm_vcpu *vcpu) { return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pc; diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 0b7dfdb931df..acd101a9014d 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -116,9 +116,6 @@ struct kvm_vcpu_arch { * Anything that is not used directly from assembly code goes * here. */ - /* dcache set/way operation pending */ - int last_pcpu; - cpumask_t require_dcache_flush; /* Don't run the guest */ bool pause; diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 14a74f136272..adcf49547301 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -243,24 +243,46 @@ static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu) return (vcpu_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101; } -static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva, - unsigned long size, - bool ipa_uncached) +static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu, pfn_t pfn, + unsigned long size, + bool ipa_uncached) { + void *va = page_address(pfn_to_page(pfn)); + if (!vcpu_has_cache_enabled(vcpu) || ipa_uncached) - kvm_flush_dcache_to_poc((void *)hva, size); + kvm_flush_dcache_to_poc(va, size); if (!icache_is_aliasing()) { /* PIPT */ - flush_icache_range(hva, hva + size); + flush_icache_range((unsigned long)va, + (unsigned long)va + size); } else if (!icache_is_aivivt()) { /* non ASID-tagged VIVT */ /* any kind of VIPT cache */ __flush_icache_all(); } } +static inline void __kvm_flush_dcache_pte(pte_t pte) +{ + struct page *page = pte_page(pte); + kvm_flush_dcache_to_poc(page_address(page), PAGE_SIZE); +} + +static inline void __kvm_flush_dcache_pmd(pmd_t pmd) +{ + struct page *page = pmd_page(pmd); + kvm_flush_dcache_to_poc(page_address(page), PMD_SIZE); +} + +static inline void __kvm_flush_dcache_pud(pud_t pud) +{ + struct page *page = pud_page(pud); + kvm_flush_dcache_to_poc(page_address(page), PUD_SIZE); +} + #define kvm_virt_to_phys(x) __virt_to_phys((unsigned long)(x)) -void stage2_flush_vm(struct kvm *kvm); +void kvm_set_way_flush(struct kvm_vcpu *vcpu); +void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled); #endif /* __ASSEMBLY__ */ #endif /* __ARM64_KVM_MMU_H__ */ diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index 8ba85e9ea388..b334084d3675 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -26,6 +26,7 @@ config KVM select KVM_ARM_HOST select KVM_ARM_VGIC select KVM_ARM_TIMER + select SRCU ---help--- Support hosting virtualized guest machines. diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 3d7c2df89946..f31e8bb2bc5b 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -69,68 +69,31 @@ static u32 get_ccsidr(u32 csselr) return ccsidr; } -static void do_dc_cisw(u32 val) -{ - asm volatile("dc cisw, %x0" : : "r" (val)); - dsb(ish); -} - -static void do_dc_csw(u32 val) -{ - asm volatile("dc csw, %x0" : : "r" (val)); - dsb(ish); -} - -/* See note at ARM ARM B1.14.4 */ +/* + * See note at ARMv7 ARM B1.14.4 (TL;DR: S/W ops are not easily virtualized). + */ static bool access_dcsw(struct kvm_vcpu *vcpu, const struct sys_reg_params *p, const struct sys_reg_desc *r) { - unsigned long val; - int cpu; - if (!p->is_write) return read_from_write_only(vcpu, p); - cpu = get_cpu(); - - cpumask_setall(&vcpu->arch.require_dcache_flush); - cpumask_clear_cpu(cpu, &vcpu->arch.require_dcache_flush); - - /* If we were already preempted, take the long way around */ - if (cpu != vcpu->arch.last_pcpu) { - flush_cache_all(); - goto done; - } - - val = *vcpu_reg(vcpu, p->Rt); - - switch (p->CRm) { - case 6: /* Upgrade DCISW to DCCISW, as per HCR.SWIO */ - case 14: /* DCCISW */ - do_dc_cisw(val); - break; - - case 10: /* DCCSW */ - do_dc_csw(val); - break; - } - -done: - put_cpu(); - + kvm_set_way_flush(vcpu); return true; } /* * Generic accessor for VM registers. Only called as long as HCR_TVM - * is set. + * is set. If the guest enables the MMU, we stop trapping the VM + * sys_regs and leave it in complete control of the caches. */ static bool access_vm_reg(struct kvm_vcpu *vcpu, const struct sys_reg_params *p, const struct sys_reg_desc *r) { unsigned long val; + bool was_enabled = vcpu_has_cache_enabled(vcpu); BUG_ON(!p->is_write); @@ -143,25 +106,7 @@ static bool access_vm_reg(struct kvm_vcpu *vcpu, vcpu_cp15_64_low(vcpu, r->reg) = val & 0xffffffffUL; } - return true; -} - -/* - * SCTLR_EL1 accessor. Only called as long as HCR_TVM is set. If the - * guest enables the MMU, we stop trapping the VM sys_regs and leave - * it in complete control of the caches. - */ -static bool access_sctlr(struct kvm_vcpu *vcpu, - const struct sys_reg_params *p, - const struct sys_reg_desc *r) -{ - access_vm_reg(vcpu, p, r); - - if (vcpu_has_cache_enabled(vcpu)) { /* MMU+Caches enabled? */ - vcpu->arch.hcr_el2 &= ~HCR_TVM; - stage2_flush_vm(vcpu->kvm); - } - + kvm_toggle_cache(vcpu, was_enabled); return true; } @@ -377,7 +322,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { NULL, reset_mpidr, MPIDR_EL1 }, /* SCTLR_EL1 */ { Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b000), - access_sctlr, reset_val, SCTLR_EL1, 0x00C50078 }, + access_vm_reg, reset_val, SCTLR_EL1, 0x00C50078 }, /* CPACR_EL1 */ { Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b010), NULL, reset_val, CPACR_EL1, 0 }, @@ -657,7 +602,7 @@ static const struct sys_reg_desc cp14_64_regs[] = { * register). */ static const struct sys_reg_desc cp15_regs[] = { - { Op1( 0), CRn( 1), CRm( 0), Op2( 0), access_sctlr, NULL, c1_SCTLR }, + { Op1( 0), CRn( 1), CRm( 0), Op2( 0), access_vm_reg, NULL, c1_SCTLR }, { Op1( 0), CRn( 2), CRm( 0), Op2( 0), access_vm_reg, NULL, c2_TTBR0 }, { Op1( 0), CRn( 2), CRm( 0), Op2( 1), access_vm_reg, NULL, c2_TTBR1 }, { Op1( 0), CRn( 2), CRm( 0), Op2( 2), access_vm_reg, NULL, c2_TTBCR }, diff --git a/arch/avr32/mm/fault.c b/arch/avr32/mm/fault.c index 0eca93327195..d223a8b57c1e 100644 --- a/arch/avr32/mm/fault.c +++ b/arch/avr32/mm/fault.c @@ -142,6 +142,8 @@ good_area: if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; + else if (fault & VM_FAULT_SIGSEGV) + goto bad_area; else if (fault & VM_FAULT_SIGBUS) goto do_sigbus; BUG(); diff --git a/arch/cris/mm/fault.c b/arch/cris/mm/fault.c index 1790f22e71a2..2686a7aa8ec8 100644 --- a/arch/cris/mm/fault.c +++ b/arch/cris/mm/fault.c @@ -176,6 +176,8 @@ retry: if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; + else if (fault & VM_FAULT_SIGSEGV) + goto bad_area; else if (fault & VM_FAULT_SIGBUS) goto do_sigbus; BUG(); diff --git a/arch/frv/mm/fault.c b/arch/frv/mm/fault.c index 9a66372fc7c7..ec4917ddf678 100644 --- a/arch/frv/mm/fault.c +++ b/arch/frv/mm/fault.c @@ -168,6 +168,8 @@ asmlinkage void do_page_fault(int datammu, unsigned long esr0, unsigned long ear if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; + else if (fault & VM_FAULT_SIGSEGV) + goto bad_area; else if (fault & VM_FAULT_SIGBUS) goto do_sigbus; BUG(); diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c index 7225dad87094..ba5ba7accd0d 100644 --- a/arch/ia64/mm/fault.c +++ b/arch/ia64/mm/fault.c @@ -172,6 +172,8 @@ retry: */ if (fault & VM_FAULT_OOM) { goto out_of_memory; + } else if (fault & VM_FAULT_SIGSEGV) { + goto bad_area; } else if (fault & VM_FAULT_SIGBUS) { signal = SIGBUS; goto bad_area; diff --git a/arch/m32r/mm/fault.c b/arch/m32r/mm/fault.c index e9c6a8014bd6..e3d4d4890104 100644 --- a/arch/m32r/mm/fault.c +++ b/arch/m32r/mm/fault.c @@ -200,6 +200,8 @@ good_area: if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; + else if (fault & VM_FAULT_SIGSEGV) + goto bad_area; else if (fault & VM_FAULT_SIGBUS) goto do_sigbus; BUG(); diff --git a/arch/m68k/mm/fault.c b/arch/m68k/mm/fault.c index 2bd7487440c4..b2f04aee46ec 100644 --- a/arch/m68k/mm/fault.c +++ b/arch/m68k/mm/fault.c @@ -145,6 +145,8 @@ good_area: if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; + else if (fault & VM_FAULT_SIGSEGV) + goto map_err; else if (fault & VM_FAULT_SIGBUS) goto bus_err; BUG(); diff --git a/arch/metag/mm/fault.c b/arch/metag/mm/fault.c index 332680e5ebf2..2de5dc695a87 100644 --- a/arch/metag/mm/fault.c +++ b/arch/metag/mm/fault.c @@ -141,6 +141,8 @@ good_area: if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; + else if (fault & VM_FAULT_SIGSEGV) + goto bad_area; else if (fault & VM_FAULT_SIGBUS) goto do_sigbus; BUG(); diff --git a/arch/microblaze/mm/fault.c b/arch/microblaze/mm/fault.c index fa4cf52aa7a6..d46a5ebb7570 100644 --- a/arch/microblaze/mm/fault.c +++ b/arch/microblaze/mm/fault.c @@ -224,6 +224,8 @@ good_area: if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; + else if (fault & VM_FAULT_SIGSEGV) + goto bad_area; else if (fault & VM_FAULT_SIGBUS) goto do_sigbus; BUG(); diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 3289969ee423..843713c05b79 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -2656,27 +2656,21 @@ config TRAD_SIGNALS bool config MIPS32_COMPAT - bool "Kernel support for Linux/MIPS 32-bit binary compatibility" - depends on 64BIT - help - Select this option if you want Linux/MIPS 32-bit binary - compatibility. Since all software available for Linux/MIPS is - currently 32-bit you should say Y here. + bool config COMPAT bool - depends on MIPS32_COMPAT - select ARCH_WANT_OLD_COMPAT_IPC - default y config SYSVIPC_COMPAT bool - depends on COMPAT && SYSVIPC - default y config MIPS32_O32 bool "Kernel support for o32 binaries" - depends on MIPS32_COMPAT + depends on 64BIT + select ARCH_WANT_OLD_COMPAT_IPC + select COMPAT + select MIPS32_COMPAT + select SYSVIPC_COMPAT if SYSVIPC help Select this option if you want to run o32 binaries. These are pure 32-bit binaries as used by the 32-bit Linux/MIPS port. Most of @@ -2686,7 +2680,10 @@ config MIPS32_O32 config MIPS32_N32 bool "Kernel support for n32 binaries" - depends on MIPS32_COMPAT + depends on 64BIT + select COMPAT + select MIPS32_COMPAT + select SYSVIPC_COMPAT if SYSVIPC help Select this option if you want to run n32 binaries. These are 64-bit binaries using 32-bit quantities for addressing and certain diff --git a/arch/mips/boot/elf2ecoff.c b/arch/mips/boot/elf2ecoff.c index 8585078ae50e..2a4c52e27f41 100644 --- a/arch/mips/boot/elf2ecoff.c +++ b/arch/mips/boot/elf2ecoff.c @@ -49,7 +49,8 @@ /* * Some extra ELF definitions */ -#define PT_MIPS_REGINFO 0x70000000 /* Register usage information */ +#define PT_MIPS_REGINFO 0x70000000 /* Register usage information */ +#define PT_MIPS_ABIFLAGS 0x70000003 /* Records ABI related flags */ /* -------------------------------------------------------------------- */ @@ -349,39 +350,46 @@ int main(int argc, char *argv[]) for (i = 0; i < ex.e_phnum; i++) { /* Section types we can ignore... */ - if (ph[i].p_type == PT_NULL || ph[i].p_type == PT_NOTE || - ph[i].p_type == PT_PHDR - || ph[i].p_type == PT_MIPS_REGINFO) + switch (ph[i].p_type) { + case PT_NULL: + case PT_NOTE: + case PT_PHDR: + case PT_MIPS_REGINFO: + case PT_MIPS_ABIFLAGS: continue; - /* Section types we can't handle... */ - else if (ph[i].p_type != PT_LOAD) { - fprintf(stderr, - "Program header %d type %d can't be converted.\n", - ex.e_phnum, ph[i].p_type); - exit(1); - } - /* Writable (data) segment? */ - if (ph[i].p_flags & PF_W) { - struct sect ndata, nbss; - ndata.vaddr = ph[i].p_vaddr; - ndata.len = ph[i].p_filesz; - nbss.vaddr = ph[i].p_vaddr + ph[i].p_filesz; - nbss.len = ph[i].p_memsz - ph[i].p_filesz; + case PT_LOAD: + /* Writable (data) segment? */ + if (ph[i].p_flags & PF_W) { + struct sect ndata, nbss; + + ndata.vaddr = ph[i].p_vaddr; + ndata.len = ph[i].p_filesz; + nbss.vaddr = ph[i].p_vaddr + ph[i].p_filesz; + nbss.len = ph[i].p_memsz - ph[i].p_filesz; - combine(&data, &ndata, 0); - combine(&bss, &nbss, 1); - } else { - struct sect ntxt; + combine(&data, &ndata, 0); + combine(&bss, &nbss, 1); + } else { + struct sect ntxt; - ntxt.vaddr = ph[i].p_vaddr; - ntxt.len = ph[i].p_filesz; + ntxt.vaddr = ph[i].p_vaddr; + ntxt.len = ph[i].p_filesz; - combine(&text, &ntxt, 0); + combine(&text, &ntxt, 0); + } + /* Remember the lowest segment start address. */ + if (ph[i].p_vaddr < cur_vma) + cur_vma = ph[i].p_vaddr; + break; + + default: + /* Section types we can't handle... */ + fprintf(stderr, + "Program header %d type %d can't be converted.\n", + ex.e_phnum, ph[i].p_type); + exit(1); } - /* Remember the lowest segment start address. */ - if (ph[i].p_vaddr < cur_vma) - cur_vma = ph[i].p_vaddr; } /* Sections must be in order to be converted... */ diff --git a/arch/mips/cavium-octeon/smp.c b/arch/mips/cavium-octeon/smp.c index ecd903dd1c45..8b1eeffa12ed 100644 --- a/arch/mips/cavium-octeon/smp.c +++ b/arch/mips/cavium-octeon/smp.c @@ -240,9 +240,7 @@ static int octeon_cpu_disable(void) set_cpu_online(cpu, false); cpu_clear(cpu, cpu_callin_map); - local_irq_disable(); octeon_fixup_irqs(); - local_irq_enable(); flush_cache_all(); local_flush_tlb_all(); diff --git a/arch/mips/configs/malta_defconfig b/arch/mips/configs/malta_defconfig index f57b96dcf7df..61a4460d67d3 100644 --- a/arch/mips/configs/malta_defconfig +++ b/arch/mips/configs/malta_defconfig @@ -132,7 +132,6 @@ CONFIG_IP_NF_MATCH_ECN=m CONFIG_IP_NF_MATCH_TTL=m CONFIG_IP_NF_FILTER=m CONFIG_IP_NF_TARGET_REJECT=m -CONFIG_IP_NF_TARGET_ULOG=m CONFIG_IP_NF_MANGLE=m CONFIG_IP_NF_TARGET_CLUSTERIP=m CONFIG_IP_NF_TARGET_ECN=m @@ -175,7 +174,6 @@ CONFIG_BRIDGE_EBT_MARK_T=m CONFIG_BRIDGE_EBT_REDIRECT=m CONFIG_BRIDGE_EBT_SNAT=m CONFIG_BRIDGE_EBT_LOG=m -CONFIG_BRIDGE_EBT_ULOG=m CONFIG_BRIDGE_EBT_NFLOG=m CONFIG_IP_SCTP=m CONFIG_BRIDGE=m @@ -220,8 +218,6 @@ CONFIG_NET_ACT_SKBEDIT=m CONFIG_NET_CLS_IND=y CONFIG_CFG80211=m CONFIG_MAC80211=m -CONFIG_MAC80211_RC_PID=y -CONFIG_MAC80211_RC_DEFAULT_PID=y CONFIG_MAC80211_MESH=y CONFIG_RFKILL=m CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" @@ -248,19 +244,13 @@ CONFIG_ATA_OVER_ETH=m CONFIG_IDE=y CONFIG_BLK_DEV_IDECD=y CONFIG_IDE_GENERIC=y -CONFIG_BLK_DEV_GENERIC=y -CONFIG_BLK_DEV_PIIX=y -CONFIG_BLK_DEV_IT8213=m -CONFIG_BLK_DEV_TC86C001=m CONFIG_RAID_ATTRS=m -CONFIG_SCSI=m -CONFIG_BLK_DEV_SD=m +CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=m CONFIG_CHR_DEV_OSST=m CONFIG_BLK_DEV_SR=m CONFIG_BLK_DEV_SR_VENDOR=y CONFIG_CHR_DEV_SG=m -CONFIG_SCSI_MULTI_LUN=y CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_LOGGING=y CONFIG_SCSI_SCAN_ASYNC=y @@ -273,6 +263,8 @@ CONFIG_SCSI_AACRAID=m CONFIG_SCSI_AIC7XXX=m CONFIG_AIC7XXX_RESET_DELAY_MS=15000 # CONFIG_AIC7XXX_DEBUG_ENABLE is not set +CONFIG_ATA=y +CONFIG_ATA_PIIX=y CONFIG_MD=y CONFIG_BLK_DEV_MD=m CONFIG_MD_LINEAR=m @@ -340,6 +332,7 @@ CONFIG_UIO=m CONFIG_UIO_CIF=m CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_REISERFS_FS=m CONFIG_REISERFS_PROC_INFO=y CONFIG_REISERFS_FS_XATTR=y @@ -441,4 +434,3 @@ CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m # CONFIG_CRYPTO_ANSI_CPRNG is not set -CONFIG_CRC16=m diff --git a/arch/mips/include/asm/fpu.h b/arch/mips/include/asm/fpu.h index 994d21939676..affebb78f5d6 100644 --- a/arch/mips/include/asm/fpu.h +++ b/arch/mips/include/asm/fpu.h @@ -64,7 +64,7 @@ static inline int __enable_fpu(enum fpu_mode mode) return SIGFPE; /* set FRE */ - write_c0_config5(read_c0_config5() | MIPS_CONF5_FRE); + set_c0_config5(MIPS_CONF5_FRE); goto fr_common; case FPU_64BIT: @@ -74,8 +74,10 @@ static inline int __enable_fpu(enum fpu_mode mode) #endif /* fall through */ case FPU_32BIT: - /* clear FRE */ - write_c0_config5(read_c0_config5() & ~MIPS_CONF5_FRE); + if (cpu_has_fre) { + /* clear FRE */ + clear_c0_config5(MIPS_CONF5_FRE); + } fr_common: /* set CU1 & change FR appropriately */ fr = (int)mode & FPU_FR_MASK; @@ -182,25 +184,32 @@ static inline int init_fpu(void) int ret = 0; if (cpu_has_fpu) { + unsigned int config5; + ret = __own_fpu(); - if (!ret) { - unsigned int config5 = read_c0_config5(); - - /* - * Ensure FRE is clear whilst running _init_fpu, since - * single precision FP instructions are used. If FRE - * was set then we'll just end up initialising all 32 - * 64b registers. - */ - write_c0_config5(config5 & ~MIPS_CONF5_FRE); - enable_fpu_hazard(); + if (ret) + return ret; + if (!cpu_has_fre) { _init_fpu(); - /* Restore FRE */ - write_c0_config5(config5); - enable_fpu_hazard(); + return 0; } + + /* + * Ensure FRE is clear whilst running _init_fpu, since + * single precision FP instructions are used. If FRE + * was set then we'll just end up initialising all 32 + * 64b registers. + */ + config5 = clear_c0_config5(MIPS_CONF5_FRE); + enable_fpu_hazard(); + + _init_fpu(); + + /* Restore FRE */ + write_c0_config5(config5); + enable_fpu_hazard(); } else fpu_emulator_init_fpu(); diff --git a/arch/mips/include/asm/fw/arc/hinv.h b/arch/mips/include/asm/fw/arc/hinv.h index f8d37d1df5de..9fac64a26353 100644 --- a/arch/mips/include/asm/fw/arc/hinv.h +++ b/arch/mips/include/asm/fw/arc/hinv.h @@ -119,7 +119,7 @@ union key_u { #define SGI_ARCS_REV 10 /* rev .10, 3/04/92 */ #endif -typedef struct component { +typedef struct { CONFIGCLASS Class; CONFIGTYPE Type; IDENTIFIERFLAG Flags; @@ -140,7 +140,7 @@ struct cfgdata { }; /* System ID */ -typedef struct systemid { +typedef struct { CHAR VendorId[8]; CHAR ProductId[8]; } SYSTEMID; @@ -166,7 +166,7 @@ typedef enum memorytype { #endif /* _NT_PROM */ } MEMORYTYPE; -typedef struct memorydescriptor { +typedef struct { MEMORYTYPE Type; LONG BasePage; LONG PageCount; diff --git a/arch/mips/include/asm/mips-cm.h b/arch/mips/include/asm/mips-cm.h index b95a827d763e..59c0901bdd84 100644 --- a/arch/mips/include/asm/mips-cm.h +++ b/arch/mips/include/asm/mips-cm.h @@ -89,9 +89,9 @@ static inline bool mips_cm_has_l2sync(void) /* Macros to ease the creation of register access functions */ #define BUILD_CM_R_(name, off) \ -static inline u32 *addr_gcr_##name(void) \ +static inline u32 __iomem *addr_gcr_##name(void) \ { \ - return (u32 *)(mips_cm_base + (off)); \ + return (u32 __iomem *)(mips_cm_base + (off)); \ } \ \ static inline u32 read_gcr_##name(void) \ diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h index 5e4aef304b02..5b720d8c2745 100644 --- a/arch/mips/include/asm/mipsregs.h +++ b/arch/mips/include/asm/mipsregs.h @@ -1386,12 +1386,27 @@ do { \ __res; \ }) +#define _write_32bit_cp1_register(dest, val, gas_hardfloat) \ +do { \ + __asm__ __volatile__( \ + " .set push \n" \ + " .set reorder \n" \ + " "STR(gas_hardfloat)" \n" \ + " ctc1 %0,"STR(dest)" \n" \ + " .set pop \n" \ + : : "r" (val)); \ +} while (0) + #ifdef GAS_HAS_SET_HARDFLOAT #define read_32bit_cp1_register(source) \ _read_32bit_cp1_register(source, .set hardfloat) +#define write_32bit_cp1_register(dest, val) \ + _write_32bit_cp1_register(dest, val, .set hardfloat) #else #define read_32bit_cp1_register(source) \ _read_32bit_cp1_register(source, ) +#define write_32bit_cp1_register(dest, val) \ + _write_32bit_cp1_register(dest, val, ) #endif #ifdef HAVE_AS_DSP diff --git a/arch/mips/include/asm/syscall.h b/arch/mips/include/asm/syscall.h index bb7963753730..6499d93ae68d 100644 --- a/arch/mips/include/asm/syscall.h +++ b/arch/mips/include/asm/syscall.h @@ -29,13 +29,7 @@ static inline long syscall_get_nr(struct task_struct *task, struct pt_regs *regs) { - /* O32 ABI syscall() - Either 64-bit with O32 or 32-bit */ - if ((config_enabled(CONFIG_32BIT) || - test_tsk_thread_flag(task, TIF_32BIT_REGS)) && - (regs->regs[2] == __NR_syscall)) - return regs->regs[4]; - else - return regs->regs[2]; + return current_thread_info()->syscall; } static inline unsigned long mips_get_syscall_arg(unsigned long *arg, diff --git a/arch/mips/include/asm/thread_info.h b/arch/mips/include/asm/thread_info.h index 99eea59604e9..e4440f92b366 100644 --- a/arch/mips/include/asm/thread_info.h +++ b/arch/mips/include/asm/thread_info.h @@ -36,6 +36,7 @@ struct thread_info { */ struct restart_block restart_block; struct pt_regs *regs; + long syscall; /* syscall number */ }; /* diff --git a/arch/mips/include/uapi/asm/unistd.h b/arch/mips/include/uapi/asm/unistd.h index d001bb1ad177..c03088f9f514 100644 --- a/arch/mips/include/uapi/asm/unistd.h +++ b/arch/mips/include/uapi/asm/unistd.h @@ -376,16 +376,17 @@ #define __NR_getrandom (__NR_Linux + 353) #define __NR_memfd_create (__NR_Linux + 354) #define __NR_bpf (__NR_Linux + 355) +#define __NR_execveat (__NR_Linux + 356) /* * Offset of the last Linux o32 flavoured syscall */ -#define __NR_Linux_syscalls 355 +#define __NR_Linux_syscalls 356 #endif /* _MIPS_SIM == _MIPS_SIM_ABI32 */ #define __NR_O32_Linux 4000 -#define __NR_O32_Linux_syscalls 355 +#define __NR_O32_Linux_syscalls 356 #if _MIPS_SIM == _MIPS_SIM_ABI64 @@ -709,16 +710,17 @@ #define __NR_getrandom (__NR_Linux + 313) #define __NR_memfd_create (__NR_Linux + 314) #define __NR_bpf (__NR_Linux + 315) +#define __NR_execveat (__NR_Linux + 316) /* * Offset of the last Linux 64-bit flavoured syscall */ -#define __NR_Linux_syscalls 315 +#define __NR_Linux_syscalls 316 #endif /* _MIPS_SIM == _MIPS_SIM_ABI64 */ #define __NR_64_Linux 5000 -#define __NR_64_Linux_syscalls 315 +#define __NR_64_Linux_syscalls 316 #if _MIPS_SIM == _MIPS_SIM_NABI32 @@ -1046,15 +1048,16 @@ #define __NR_getrandom (__NR_Linux + 317) #define __NR_memfd_create (__NR_Linux + 318) #define __NR_bpf (__NR_Linux + 319) +#define __NR_execveat (__NR_Linux + 320) /* * Offset of the last N32 flavoured syscall */ -#define __NR_Linux_syscalls 319 +#define __NR_Linux_syscalls 320 #endif /* _MIPS_SIM == _MIPS_SIM_NABI32 */ #define __NR_N32_Linux 6000 -#define __NR_N32_Linux_syscalls 319 +#define __NR_N32_Linux_syscalls 320 #endif /* _UAPI_ASM_UNISTD_H */ diff --git a/arch/mips/jz4740/irq.c b/arch/mips/jz4740/irq.c index 2531da1d3add..97206b3deb97 100644 --- a/arch/mips/jz4740/irq.c +++ b/arch/mips/jz4740/irq.c @@ -30,6 +30,9 @@ #include <asm/irq_cpu.h> #include <asm/mach-jz4740/base.h> +#include <asm/mach-jz4740/irq.h> + +#include "irq.h" static void __iomem *jz_intc_base; diff --git a/arch/mips/kernel/elf.c b/arch/mips/kernel/elf.c index c92b15df6893..a5b5b56485c1 100644 --- a/arch/mips/kernel/elf.c +++ b/arch/mips/kernel/elf.c @@ -19,8 +19,8 @@ enum { int arch_elf_pt_proc(void *_ehdr, void *_phdr, struct file *elf, bool is_interp, struct arch_elf_state *state) { - struct elfhdr *ehdr = _ehdr; - struct elf_phdr *phdr = _phdr; + struct elf32_hdr *ehdr = _ehdr; + struct elf32_phdr *phdr = _phdr; struct mips_elf_abiflags_v0 abiflags; int ret; @@ -48,7 +48,7 @@ int arch_elf_pt_proc(void *_ehdr, void *_phdr, struct file *elf, return 0; } -static inline unsigned get_fp_abi(struct elfhdr *ehdr, int in_abi) +static inline unsigned get_fp_abi(struct elf32_hdr *ehdr, int in_abi) { /* If the ABI requirement is provided, simply return that */ if (in_abi != -1) @@ -65,7 +65,7 @@ static inline unsigned get_fp_abi(struct elfhdr *ehdr, int in_abi) int arch_check_elf(void *_ehdr, bool has_interpreter, struct arch_elf_state *state) { - struct elfhdr *ehdr = _ehdr; + struct elf32_hdr *ehdr = _ehdr; unsigned fp_abi, interp_fp_abi, abi0, abi1; /* Ignore non-O32 binaries */ diff --git a/arch/mips/kernel/irq_cpu.c b/arch/mips/kernel/irq_cpu.c index 590c2c980fd3..6eb7a3f515fc 100644 --- a/arch/mips/kernel/irq_cpu.c +++ b/arch/mips/kernel/irq_cpu.c @@ -57,6 +57,8 @@ static struct irq_chip mips_cpu_irq_controller = { .irq_mask_ack = mask_mips_irq, .irq_unmask = unmask_mips_irq, .irq_eoi = unmask_mips_irq, + .irq_disable = mask_mips_irq, + .irq_enable = unmask_mips_irq, }; /* @@ -93,6 +95,8 @@ static struct irq_chip mips_mt_cpu_irq_controller = { .irq_mask_ack = mips_mt_cpu_irq_ack, .irq_unmask = unmask_mips_irq, .irq_eoi = unmask_mips_irq, + .irq_disable = mask_mips_irq, + .irq_enable = unmask_mips_irq, }; asmlinkage void __weak plat_irq_dispatch(void) diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index eb76434828e8..85bff5d513e5 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -82,6 +82,30 @@ void flush_thread(void) { } +int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) +{ + /* + * Save any process state which is live in hardware registers to the + * parent context prior to duplication. This prevents the new child + * state becoming stale if the parent is preempted before copy_thread() + * gets a chance to save the parent's live hardware registers to the + * child context. + */ + preempt_disable(); + + if (is_msa_enabled()) + save_msa(current); + else if (is_fpu_owner()) + _save_fp(current); + + save_dsp(current); + + preempt_enable(); + + *dst = *src; + return 0; +} + int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg, struct task_struct *p) { @@ -92,18 +116,6 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, childksp = (unsigned long)task_stack_page(p) + THREAD_SIZE - 32; - preempt_disable(); - - if (is_msa_enabled()) - save_msa(p); - else if (is_fpu_owner()) - save_fp(p); - - if (cpu_has_dsp) - save_dsp(p); - - preempt_enable(); - /* set up new TSS. */ childregs = (struct pt_regs *) childksp - 1; /* Put the stack after the struct pt_regs. */ diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index 9d1487d83293..510452812594 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -770,6 +770,8 @@ asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall) long ret = 0; user_exit(); + current_thread_info()->syscall = syscall; + if (secure_computing() == -1) return -1; diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S index 00cad1005a16..6e8de80bb446 100644 --- a/arch/mips/kernel/scall32-o32.S +++ b/arch/mips/kernel/scall32-o32.S @@ -181,6 +181,7 @@ illegal_syscall: sll t1, t0, 2 beqz v0, einval lw t2, sys_call_table(t1) # syscall routine + sw a0, PT_R2(sp) # call routine directly on restart /* Some syscalls like execve get their arguments from struct pt_regs and claim zero arguments in the syscall table. Thus we have to @@ -580,3 +581,4 @@ EXPORT(sys_call_table) PTR sys_getrandom PTR sys_memfd_create PTR sys_bpf /* 4355 */ + PTR sys_execveat diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S index 5251565e344b..ad4d44635c76 100644 --- a/arch/mips/kernel/scall64-64.S +++ b/arch/mips/kernel/scall64-64.S @@ -435,4 +435,5 @@ EXPORT(sys_call_table) PTR sys_getrandom PTR sys_memfd_create PTR sys_bpf /* 5315 */ + PTR sys_execveat .size sys_call_table,.-sys_call_table diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S index 77e74398b828..446cc654da56 100644 --- a/arch/mips/kernel/scall64-n32.S +++ b/arch/mips/kernel/scall64-n32.S @@ -428,4 +428,5 @@ EXPORT(sysn32_call_table) PTR sys_getrandom PTR sys_memfd_create PTR sys_bpf + PTR compat_sys_execveat /* 6320 */ .size sysn32_call_table,.-sysn32_call_table diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S index 6f8db9f728e8..d07b210fbeff 100644 --- a/arch/mips/kernel/scall64-o32.S +++ b/arch/mips/kernel/scall64-o32.S @@ -186,6 +186,7 @@ LEAF(sys32_syscall) dsll t1, t0, 3 beqz v0, einval ld t2, sys32_call_table(t1) # syscall routine + sd a0, PT_R2(sp) # call routine directly on restart move a0, a1 # shift argument registers move a1, a2 @@ -565,4 +566,5 @@ EXPORT(sys32_call_table) PTR sys_getrandom PTR sys_memfd_create PTR sys_bpf /* 4355 */ + PTR compat_sys_execveat .size sys32_call_table,.-sys32_call_table diff --git a/arch/mips/kernel/smp-cmp.c b/arch/mips/kernel/smp-cmp.c index 1e0a93c5a3e7..e36a859af666 100644 --- a/arch/mips/kernel/smp-cmp.c +++ b/arch/mips/kernel/smp-cmp.c @@ -44,8 +44,8 @@ static void cmp_init_secondary(void) struct cpuinfo_mips *c __maybe_unused = ¤t_cpu_data; /* Assume GIC is present */ - change_c0_status(ST0_IM, STATUSF_IP3 | STATUSF_IP4 | STATUSF_IP6 | - STATUSF_IP7); + change_c0_status(ST0_IM, STATUSF_IP2 | STATUSF_IP3 | STATUSF_IP4 | + STATUSF_IP5 | STATUSF_IP6 | STATUSF_IP7); /* Enable per-cpu interrupts: platform specific */ diff --git a/arch/mips/kernel/smp-mt.c b/arch/mips/kernel/smp-mt.c index ad86951b73bd..17ea705f6c40 100644 --- a/arch/mips/kernel/smp-mt.c +++ b/arch/mips/kernel/smp-mt.c @@ -161,7 +161,8 @@ static void vsmp_init_secondary(void) #ifdef CONFIG_MIPS_GIC /* This is Malta specific: IPI,performance and timer interrupts */ if (gic_present) - change_c0_status(ST0_IM, STATUSF_IP3 | STATUSF_IP4 | + change_c0_status(ST0_IM, STATUSF_IP2 | STATUSF_IP3 | + STATUSF_IP4 | STATUSF_IP5 | STATUSF_IP6 | STATUSF_IP7); else #endif diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c index c94c4e92e17d..1c0d8c50b7e1 100644 --- a/arch/mips/kernel/smp.c +++ b/arch/mips/kernel/smp.c @@ -123,10 +123,10 @@ asmlinkage void start_secondary(void) unsigned int cpu; cpu_probe(); - cpu_report(); per_cpu_trap_init(false); mips_clockevent_init(); mp_ops->init_secondary(); + cpu_report(); /* * XXX parity protection should be folded in here when it's converted diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index ad3d2031c327..c3b41e24c05a 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -1231,7 +1231,8 @@ static int enable_restore_fp_context(int msa) /* Restore the scalar FP control & status register */ if (!was_fpu_owner) - asm volatile("ctc1 %0, $31" : : "r"(current->thread.fpu.fcr31)); + write_32bit_cp1_register(CP1_STATUS, + current->thread.fpu.fcr31); } out: diff --git a/arch/mips/kvm/Kconfig b/arch/mips/kvm/Kconfig index 30e334e823bd..2ae12825529f 100644 --- a/arch/mips/kvm/Kconfig +++ b/arch/mips/kvm/Kconfig @@ -20,6 +20,7 @@ config KVM select PREEMPT_NOTIFIERS select ANON_INODES select KVM_MMIO + select SRCU ---help--- Support for hosting Guest kernels. Currently supported on MIPS32 processors. diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c index becc42bb1849..70ab5d664332 100644 --- a/arch/mips/mm/fault.c +++ b/arch/mips/mm/fault.c @@ -158,6 +158,8 @@ good_area: if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; + else if (fault & VM_FAULT_SIGSEGV) + goto bad_area; else if (fault & VM_FAULT_SIGBUS) goto do_sigbus; BUG(); diff --git a/arch/mips/mm/tlb-r4k.c b/arch/mips/mm/tlb-r4k.c index e90b2e899291..30639a6e9b8c 100644 --- a/arch/mips/mm/tlb-r4k.c +++ b/arch/mips/mm/tlb-r4k.c @@ -489,6 +489,8 @@ static void r4k_tlb_configure(void) #ifdef CONFIG_64BIT pg |= PG_ELPA; #endif + if (cpu_has_rixiex) + pg |= PG_IEC; write_c0_pagegrain(pg); } diff --git a/arch/mn10300/include/asm/cacheflush.h b/arch/mn10300/include/asm/cacheflush.h index faed90240ded..6d6df839948f 100644 --- a/arch/mn10300/include/asm/cacheflush.h +++ b/arch/mn10300/include/asm/cacheflush.h @@ -159,13 +159,6 @@ extern void flush_icache_range(unsigned long start, unsigned long end); #define copy_from_user_page(vma, page, vaddr, dst, src, len) \ memcpy(dst, src, len) -/* - * Internal debugging function - */ -#ifdef CONFIG_DEBUG_PAGEALLOC -extern void kernel_map_pages(struct page *page, int numpages, int enable); -#endif - #endif /* __ASSEMBLY__ */ #endif /* _ASM_CACHEFLUSH_H */ diff --git a/arch/mn10300/mm/fault.c b/arch/mn10300/mm/fault.c index 3516cbdf1ee9..0c2cc5d39c8e 100644 --- a/arch/mn10300/mm/fault.c +++ b/arch/mn10300/mm/fault.c @@ -262,6 +262,8 @@ good_area: if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; + else if (fault & VM_FAULT_SIGSEGV) + goto bad_area; else if (fault & VM_FAULT_SIGBUS) goto do_sigbus; BUG(); diff --git a/arch/nios2/mm/fault.c b/arch/nios2/mm/fault.c index 15a0bb5fc06d..d194c0427b26 100644 --- a/arch/nios2/mm/fault.c +++ b/arch/nios2/mm/fault.c @@ -135,6 +135,8 @@ survive: if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; + else if (fault & VM_FAULT_SIGSEGV) + goto bad_area; else if (fault & VM_FAULT_SIGBUS) goto do_sigbus; BUG(); @@ -157,9 +159,11 @@ bad_area: bad_area_nosemaphore: /* User mode accesses just cause a SIGSEGV */ if (user_mode(regs)) { - pr_alert("%s: unhandled page fault (%d) at 0x%08lx, " - "cause %ld\n", current->comm, SIGSEGV, address, cause); - show_regs(regs); + if (unhandled_signal(current, SIGSEGV) && printk_ratelimit()) { + pr_info("%s: unhandled page fault (%d) at 0x%08lx, " + "cause %ld\n", current->comm, SIGSEGV, address, cause); + show_regs(regs); + } _exception(SIGSEGV, regs, code, address); return; } diff --git a/arch/openrisc/mm/fault.c b/arch/openrisc/mm/fault.c index 0703acf7d327..230ac20ae794 100644 --- a/arch/openrisc/mm/fault.c +++ b/arch/openrisc/mm/fault.c @@ -171,6 +171,8 @@ good_area: if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; + else if (fault & VM_FAULT_SIGSEGV) + goto bad_area; else if (fault & VM_FAULT_SIGBUS) goto do_sigbus; BUG(); diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c index 3ca9c1131cfe..e5120e653240 100644 --- a/arch/parisc/mm/fault.c +++ b/arch/parisc/mm/fault.c @@ -256,6 +256,8 @@ good_area: */ if (fault & VM_FAULT_OOM) goto out_of_memory; + else if (fault & VM_FAULT_SIGSEGV) + goto bad_area; else if (fault & VM_FAULT_SIGBUS) goto bad_area; BUG(); diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h index 5b9312220e84..30b35fff2dea 100644 --- a/arch/powerpc/include/asm/cacheflush.h +++ b/arch/powerpc/include/asm/cacheflush.h @@ -60,13 +60,6 @@ extern void flush_dcache_phys_range(unsigned long start, unsigned long stop); #define copy_from_user_page(vma, page, vaddr, dst, src, len) \ memcpy(dst, src, len) - - -#ifdef CONFIG_DEBUG_PAGEALLOC -/* internal debugging function */ -void kernel_map_pages(struct page *page, int numpages, int enable); -#endif - #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_CACHEFLUSH_H */ diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index f5769f19ae25..11850f310fb4 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -21,6 +21,7 @@ config KVM select PREEMPT_NOTIFIERS select ANON_INODES select HAVE_KVM_EVENTFD + select SRCU config KVM_BOOK3S_HANDLER bool diff --git a/arch/powerpc/mm/copro_fault.c b/arch/powerpc/mm/copro_fault.c index 5a236f082c78..1b5305d4bdab 100644 --- a/arch/powerpc/mm/copro_fault.c +++ b/arch/powerpc/mm/copro_fault.c @@ -76,7 +76,7 @@ int copro_handle_mm_fault(struct mm_struct *mm, unsigned long ea, if (*flt & VM_FAULT_OOM) { ret = -ENOMEM; goto out_unlock; - } else if (*flt & VM_FAULT_SIGBUS) { + } else if (*flt & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV)) { ret = -EFAULT; goto out_unlock; } diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index eb79907f34fa..6154b0a2b063 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -437,6 +437,8 @@ good_area: */ fault = handle_mm_fault(mm, vma, address, flags); if (unlikely(fault & (VM_FAULT_RETRY|VM_FAULT_ERROR))) { + if (fault & VM_FAULT_SIGSEGV) + goto bad_area; rc = mm_fault_error(regs, address, fault); if (rc >= MM_FAULT_RETURN) goto bail; diff --git a/arch/s390/include/asm/cacheflush.h b/arch/s390/include/asm/cacheflush.h index 3e20383d0921..58fae7d098cf 100644 --- a/arch/s390/include/asm/cacheflush.h +++ b/arch/s390/include/asm/cacheflush.h @@ -4,10 +4,6 @@ /* Caches aren't brain-dead on the s390. */ #include <asm-generic/cacheflush.h> -#ifdef CONFIG_DEBUG_PAGEALLOC -void kernel_map_pages(struct page *page, int numpages, int enable); -#endif - int set_memory_ro(unsigned long addr, int numpages); int set_memory_rw(unsigned long addr, int numpages); int set_memory_nx(unsigned long addr, int numpages); diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig index 646db9c467d1..5fce52cf0e57 100644 --- a/arch/s390/kvm/Kconfig +++ b/arch/s390/kvm/Kconfig @@ -28,6 +28,7 @@ config KVM select HAVE_KVM_IRQCHIP select HAVE_KVM_IRQFD select HAVE_KVM_IRQ_ROUTING + select SRCU ---help--- Support hosting paravirtualized guest machines using the SIE virtualization capability on the mainframe. This should work diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 811937bb90be..9065d5aa3932 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -374,6 +374,12 @@ static noinline void do_fault_error(struct pt_regs *regs, int fault) do_no_context(regs); else pagefault_out_of_memory(); + } else if (fault & VM_FAULT_SIGSEGV) { + /* Kernel mode? Handle exceptions or die */ + if (!user_mode(regs)) + do_no_context(regs); + else + do_sigsegv(regs, SEGV_MAPERR); } else if (fault & VM_FAULT_SIGBUS) { /* Kernel mode? Handle exceptions or die */ if (!user_mode(regs)) diff --git a/arch/score/mm/fault.c b/arch/score/mm/fault.c index 52238983527d..6860beb2a280 100644 --- a/arch/score/mm/fault.c +++ b/arch/score/mm/fault.c @@ -114,6 +114,8 @@ good_area: if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; + else if (fault & VM_FAULT_SIGSEGV) + goto bad_area; else if (fault & VM_FAULT_SIGBUS) goto do_sigbus; BUG(); diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c index 541dc6101508..a58fec9b55e0 100644 --- a/arch/sh/mm/fault.c +++ b/arch/sh/mm/fault.c @@ -353,6 +353,8 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code, } else { if (fault & VM_FAULT_SIGBUS) do_sigbus(regs, error_code, address); + else if (fault & VM_FAULT_SIGSEGV) + bad_area(regs, error_code, address); else BUG(); } diff --git a/arch/sparc/include/asm/cacheflush_64.h b/arch/sparc/include/asm/cacheflush_64.h index 38965379e350..68513c41e10d 100644 --- a/arch/sparc/include/asm/cacheflush_64.h +++ b/arch/sparc/include/asm/cacheflush_64.h @@ -74,11 +74,6 @@ void flush_ptrace_access(struct vm_area_struct *, struct page *, #define flush_cache_vmap(start, end) do { } while (0) #define flush_cache_vunmap(start, end) do { } while (0) -#ifdef CONFIG_DEBUG_PAGEALLOC -/* internal debugging function */ -void kernel_map_pages(struct page *page, int numpages, int enable); -#endif - #endif /* !__ASSEMBLY__ */ #endif /* _SPARC64_CACHEFLUSH_H */ diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c index 908e8c17c902..70d817154fe8 100644 --- a/arch/sparc/mm/fault_32.c +++ b/arch/sparc/mm/fault_32.c @@ -249,6 +249,8 @@ good_area: if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; + else if (fault & VM_FAULT_SIGSEGV) + goto bad_area; else if (fault & VM_FAULT_SIGBUS) goto do_sigbus; BUG(); diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c index 18fcd7167095..479823249429 100644 --- a/arch/sparc/mm/fault_64.c +++ b/arch/sparc/mm/fault_64.c @@ -446,6 +446,8 @@ good_area: if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; + else if (fault & VM_FAULT_SIGSEGV) + goto bad_area; else if (fault & VM_FAULT_SIGBUS) goto do_sigbus; BUG(); diff --git a/arch/tile/kvm/Kconfig b/arch/tile/kvm/Kconfig index 2298cb1daff7..1e968f7550dc 100644 --- a/arch/tile/kvm/Kconfig +++ b/arch/tile/kvm/Kconfig @@ -21,6 +21,7 @@ config KVM depends on HAVE_KVM && MODULES select PREEMPT_NOTIFIERS select ANON_INODES + select SRCU ---help--- Support hosting paravirtualized guest machines. diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c index 565e25a98334..0f61a73534e6 100644 --- a/arch/tile/mm/fault.c +++ b/arch/tile/mm/fault.c @@ -442,6 +442,8 @@ good_area: if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; + else if (fault & VM_FAULT_SIGSEGV) + goto bad_area; else if (fault & VM_FAULT_SIGBUS) goto do_sigbus; BUG(); diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c index 5678c3571e7c..209617302df8 100644 --- a/arch/um/kernel/trap.c +++ b/arch/um/kernel/trap.c @@ -80,6 +80,8 @@ good_area: if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) { goto out_of_memory; + } else if (fault & VM_FAULT_SIGSEGV) { + goto out; } else if (fault & VM_FAULT_SIGBUS) { err = -EACCES; goto out; diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 0dc9d0144a27..5e28e2be3a41 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -138,6 +138,7 @@ config X86 select HAVE_ACPI_APEI_NMI if ACPI select ACPI_LEGACY_TABLES_LOOKUP if ACPI select X86_FEATURE_NAMES if PROC_FS + select SRCU config INSTRUCTION_DECODER def_bool y @@ -855,6 +856,10 @@ config SCHED_MC source "kernel/Kconfig.preempt" +config UP_LATE_INIT + def_bool y + depends on !SMP && X86_LOCAL_APIC + config X86_UP_APIC bool "Local APIC support on uniprocessors" depends on X86_32 && !SMP && !X86_32_NON_STANDARD diff --git a/arch/x86/boot/ctype.h b/arch/x86/boot/ctype.h index 25e13403193c..020f137df7a2 100644 --- a/arch/x86/boot/ctype.h +++ b/arch/x86/boot/ctype.h @@ -1,6 +1,5 @@ -#ifndef BOOT_ISDIGIT_H - -#define BOOT_ISDIGIT_H +#ifndef BOOT_CTYPE_H +#define BOOT_CTYPE_H static inline int isdigit(int ch) { diff --git a/arch/x86/boot/early_serial_console.c b/arch/x86/boot/early_serial_console.c index 5df2869c874b..45a07684bbab 100644 --- a/arch/x86/boot/early_serial_console.c +++ b/arch/x86/boot/early_serial_console.c @@ -2,8 +2,6 @@ #define DEFAULT_SERIAL_PORT 0x3f8 /* ttyS0 */ -#define XMTRDY 0x20 - #define DLAB 0x80 #define TXR 0 /* Transmit register (WRITE) */ @@ -74,8 +72,8 @@ static void parse_earlyprintk(void) static const int bases[] = { 0x3f8, 0x2f8 }; int idx = 0; - if (!strncmp(arg + pos, "ttyS", 4)) - pos += 4; + /* += strlen("ttyS"); */ + pos += 4; if (arg[pos++] == '1') idx = 1; diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 82e8a1d44658..156ebcab4ada 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -179,8 +179,8 @@ sysenter_dispatch: sysexit_from_sys_call: andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) /* clear IF, that popfq doesn't enable interrupts early */ - andl $~0x200,EFLAGS-R11(%rsp) - movl RIP-R11(%rsp),%edx /* User %eip */ + andl $~0x200,EFLAGS-ARGOFFSET(%rsp) + movl RIP-ARGOFFSET(%rsp),%edx /* User %eip */ CFI_REGISTER rip,rdx RESTORE_ARGS 0,24,0,0,0,0 xorq %r8,%r8 diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 465b309af254..92003f3c8a42 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -106,7 +106,14 @@ extern u32 native_safe_apic_wait_icr_idle(void); extern void native_apic_icr_write(u32 low, u32 id); extern u64 native_apic_icr_read(void); -extern int x2apic_mode; +static inline bool apic_is_x2apic_enabled(void) +{ + u64 msr; + + if (rdmsrl_safe(MSR_IA32_APICBASE, &msr)) + return false; + return msr & X2APIC_ENABLE; +} #ifdef CONFIG_X86_X2APIC /* @@ -169,48 +176,23 @@ static inline u64 native_x2apic_icr_read(void) return val; } +extern int x2apic_mode; extern int x2apic_phys; -extern int x2apic_preenabled; -extern void check_x2apic(void); -extern void enable_x2apic(void); +extern void __init check_x2apic(void); +extern void x2apic_setup(void); static inline int x2apic_enabled(void) { - u64 msr; - - if (!cpu_has_x2apic) - return 0; - - rdmsrl(MSR_IA32_APICBASE, msr); - if (msr & X2APIC_ENABLE) - return 1; - return 0; + return cpu_has_x2apic && apic_is_x2apic_enabled(); } #define x2apic_supported() (cpu_has_x2apic) -static inline void x2apic_force_phys(void) -{ - x2apic_phys = 1; -} #else -static inline void disable_x2apic(void) -{ -} -static inline void check_x2apic(void) -{ -} -static inline void enable_x2apic(void) -{ -} -static inline int x2apic_enabled(void) -{ - return 0; -} -static inline void x2apic_force_phys(void) -{ -} +static inline void check_x2apic(void) { } +static inline void x2apic_setup(void) { } +static inline int x2apic_enabled(void) { return 0; } -#define x2apic_preenabled 0 -#define x2apic_supported() 0 +#define x2apic_mode (0) +#define x2apic_supported() (0) #endif extern void enable_IR_x2apic(void); @@ -219,7 +201,6 @@ extern int get_physical_broadcast(void); extern int lapic_get_maxlvt(void); extern void clear_local_APIC(void); -extern void connect_bsp_APIC(void); extern void disconnect_bsp_APIC(int virt_wire_setup); extern void disable_local_APIC(void); extern void lapic_shutdown(void); @@ -227,8 +208,6 @@ extern int verify_local_APIC(void); extern void sync_Arb_IDs(void); extern void init_bsp_APIC(void); extern void setup_local_APIC(void); -extern void end_local_APIC_setup(void); -extern void bsp_end_local_APIC_setup(void); extern void init_apic_mappings(void); void register_lapic_address(unsigned long address); extern void setup_boot_APIC_clock(void); @@ -236,6 +215,9 @@ extern void setup_secondary_APIC_clock(void); extern int APIC_init_uniprocessor(void); extern int apic_force_enable(unsigned long addr); +extern int apic_bsp_setup(bool upmode); +extern void apic_ap_setup(void); + /* * On 32bit this is mach-xxx local */ diff --git a/arch/x86/include/asm/calling.h b/arch/x86/include/asm/calling.h index 76659b67fd11..1f1297b46f83 100644 --- a/arch/x86/include/asm/calling.h +++ b/arch/x86/include/asm/calling.h @@ -83,7 +83,6 @@ For 32-bit we have the following conventions - kernel is built with #define SS 160 #define ARGOFFSET R11 -#define SWFRAME ORIG_RAX .macro SAVE_ARGS addskip=0, save_rcx=1, save_r891011=1, rax_enosys=0 subq $9*8+\addskip, %rsp diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index aede2c347bde..90a54851aedc 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -174,6 +174,7 @@ #define X86_FEATURE_TOPOEXT ( 6*32+22) /* topology extensions CPUID leafs */ #define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* core performance counter extensions */ #define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */ +#define X86_FEATURE_BPEXT (6*32+26) /* data breakpoint extension */ #define X86_FEATURE_PERFCTR_L2 ( 6*32+28) /* L2 performance counter extensions */ /* @@ -388,6 +389,7 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; #define cpu_has_cx16 boot_cpu_has(X86_FEATURE_CX16) #define cpu_has_eager_fpu boot_cpu_has(X86_FEATURE_EAGER_FPU) #define cpu_has_topoext boot_cpu_has(X86_FEATURE_TOPOEXT) +#define cpu_has_bpext boot_cpu_has(X86_FEATURE_BPEXT) #if __GNUC__ >= 4 extern void warn_pre_alternatives(void); diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h index 61fd18b83b6c..12cb66f6d3a5 100644 --- a/arch/x86/include/asm/debugreg.h +++ b/arch/x86/include/asm/debugreg.h @@ -114,5 +114,10 @@ static inline void debug_stack_usage_inc(void) { } static inline void debug_stack_usage_dec(void) { } #endif /* X86_64 */ +#ifdef CONFIG_CPU_SUP_AMD +extern void set_dr_addr_mask(unsigned long mask, int dr); +#else +static inline void set_dr_addr_mask(unsigned long mask, int dr) { } +#endif #endif /* _ASM_X86_DEBUGREG_H */ diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index e97622f57722..0dbc08282291 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -207,7 +207,7 @@ static inline void fpu_fxsave(struct fpu *fpu) if (config_enabled(CONFIG_X86_32)) asm volatile( "fxsave %[fx]" : [fx] "=m" (fpu->state->fxsave)); else if (config_enabled(CONFIG_AS_FXSAVEQ)) - asm volatile("fxsaveq %0" : "=m" (fpu->state->fxsave)); + asm volatile("fxsaveq %[fx]" : [fx] "=m" (fpu->state->fxsave)); else { /* Using "rex64; fxsave %0" is broken because, if the memory * operand uses any extended registers for addressing, a second @@ -290,9 +290,11 @@ static inline int fpu_restore_checking(struct fpu *fpu) static inline int restore_fpu_checking(struct task_struct *tsk) { - /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception - is pending. Clear the x87 state here by setting it to fixed - values. "m" is a random variable that should be in L1 */ + /* + * AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception is + * pending. Clear the x87 state here by setting it to fixed values. + * "m" is a random variable that should be in L1. + */ if (unlikely(static_cpu_has_bug_safe(X86_BUG_FXSAVE_LEAK))) { asm volatile( "fnclex\n\t" diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h index ef1c4d2d41ec..6c98be864a75 100644 --- a/arch/x86/include/asm/hw_breakpoint.h +++ b/arch/x86/include/asm/hw_breakpoint.h @@ -12,6 +12,7 @@ */ struct arch_hw_breakpoint { unsigned long address; + unsigned long mask; u8 len; u8 type; }; diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index ed8089d69094..6eb6fcb83f63 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -40,8 +40,8 @@ extern void __kernel_fpu_end(void); static inline void kernel_fpu_begin(void) { - WARN_ON_ONCE(!irq_fpu_usable()); preempt_disable(); + WARN_ON_ONCE(!irq_fpu_usable()); __kernel_fpu_begin(); } @@ -51,6 +51,10 @@ static inline void kernel_fpu_end(void) preempt_enable(); } +/* Must be called with preempt disabled */ +extern void kernel_fpu_disable(void); +extern void kernel_fpu_enable(void); + /* * Some instructions like VIA's padlock instructions generate a spurious * DNA fault but don't modify SSE registers. And these instructions diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index bf006cce9418..2f91685fe1cd 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -279,6 +279,11 @@ static inline void disable_ioapic_support(void) { } #define native_ioapic_set_affinity NULL #define native_setup_ioapic_entry NULL #define native_eoi_ioapic_pin NULL + +static inline void setup_IO_APIC(void) { } +static inline void enable_IO_APIC(void) { } +static inline void setup_ioapic_dest(void) { } + #endif #endif /* _ASM_X86_IO_APIC_H */ diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h index b7747c4c2cf2..6224d316c405 100644 --- a/arch/x86/include/asm/irq_remapping.h +++ b/arch/x86/include/asm/irq_remapping.h @@ -33,8 +33,6 @@ struct irq_cfg; #ifdef CONFIG_IRQ_REMAP -extern void setup_irq_remapping_ops(void); -extern int irq_remapping_supported(void); extern void set_irq_remapping_broken(void); extern int irq_remapping_prepare(void); extern int irq_remapping_enable(void); @@ -60,8 +58,6 @@ void irq_remap_modify_chip_defaults(struct irq_chip *chip); #else /* CONFIG_IRQ_REMAP */ -static inline void setup_irq_remapping_ops(void) { } -static inline int irq_remapping_supported(void) { return 0; } static inline void set_irq_remapping_broken(void) { } static inline int irq_remapping_prepare(void) { return -ENODEV; } static inline int irq_remapping_enable(void) { return -ENODEV; } diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 51b26e895933..9b3de99dc004 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -190,7 +190,6 @@ enum mcp_flags { void machine_check_poll(enum mcp_flags flags, mce_banks_t *b); int mce_notify_irq(void); -void mce_notify_process(void); DECLARE_PER_CPU(struct mce, injectm); diff --git a/arch/x86/include/asm/pmc_atom.h b/arch/x86/include/asm/pmc_atom.h index fc7a17c05d35..bc0fc0866553 100644 --- a/arch/x86/include/asm/pmc_atom.h +++ b/arch/x86/include/asm/pmc_atom.h @@ -53,6 +53,28 @@ /* Sleep state counter is in units of of 32us */ #define PMC_TMR_SHIFT 5 +/* Power status of power islands */ +#define PMC_PSS 0x98 + +#define PMC_PSS_BIT_GBE BIT(0) +#define PMC_PSS_BIT_SATA BIT(1) +#define PMC_PSS_BIT_HDA BIT(2) +#define PMC_PSS_BIT_SEC BIT(3) +#define PMC_PSS_BIT_PCIE BIT(4) +#define PMC_PSS_BIT_LPSS BIT(5) +#define PMC_PSS_BIT_LPE BIT(6) +#define PMC_PSS_BIT_DFX BIT(7) +#define PMC_PSS_BIT_USH_CTRL BIT(8) +#define PMC_PSS_BIT_USH_SUS BIT(9) +#define PMC_PSS_BIT_USH_VCCS BIT(10) +#define PMC_PSS_BIT_USH_VCCA BIT(11) +#define PMC_PSS_BIT_OTG_CTRL BIT(12) +#define PMC_PSS_BIT_OTG_VCCS BIT(13) +#define PMC_PSS_BIT_OTG_VCCA_CLK BIT(14) +#define PMC_PSS_BIT_OTG_VCCA BIT(15) +#define PMC_PSS_BIT_USB BIT(16) +#define PMC_PSS_BIT_USB_SUS BIT(17) + /* These registers reflect D3 status of functions */ #define PMC_D3_STS_0 0xA0 diff --git a/arch/x86/include/asm/smpboot_hooks.h b/arch/x86/include/asm/smpboot_hooks.h deleted file mode 100644 index 0da7409f0bec..000000000000 --- a/arch/x86/include/asm/smpboot_hooks.h +++ /dev/null @@ -1,68 +0,0 @@ -/* two abstractions specific to kernel/smpboot.c, mainly to cater to visws - * which needs to alter them. */ - -static inline void smpboot_clear_io_apic_irqs(void) -{ -#ifdef CONFIG_X86_IO_APIC - io_apic_irqs = 0; -#endif -} - -static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip) -{ - unsigned long flags; - - spin_lock_irqsave(&rtc_lock, flags); - CMOS_WRITE(0xa, 0xf); - spin_unlock_irqrestore(&rtc_lock, flags); - local_flush_tlb(); - pr_debug("1.\n"); - *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) = - start_eip >> 4; - pr_debug("2.\n"); - *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = - start_eip & 0xf; - pr_debug("3.\n"); -} - -static inline void smpboot_restore_warm_reset_vector(void) -{ - unsigned long flags; - - /* - * Install writable page 0 entry to set BIOS data area. - */ - local_flush_tlb(); - - /* - * Paranoid: Set warm reset code and vector here back - * to default values. - */ - spin_lock_irqsave(&rtc_lock, flags); - CMOS_WRITE(0, 0xf); - spin_unlock_irqrestore(&rtc_lock, flags); - - *((volatile u32 *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = 0; -} - -static inline void __init smpboot_setup_io_apic(void) -{ -#ifdef CONFIG_X86_IO_APIC - /* - * Here we can be sure that there is an IO-APIC in the system. Let's - * go and set it up: - */ - if (!skip_ioapic_setup && nr_ioapics) - setup_IO_APIC(); - else { - nr_ioapics = 0; - } -#endif -} - -static inline void smpboot_clear_io_apic(void) -{ -#ifdef CONFIG_X86_IO_APIC - nr_ioapics = 0; -#endif -} diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 547e344a6dc6..e82e95abc92b 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -75,7 +75,6 @@ struct thread_info { #define TIF_SYSCALL_EMU 6 /* syscall emulation active */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SECCOMP 8 /* secure computing */ -#define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */ #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ #define TIF_UPROBE 12 /* breakpointed or singlestepping */ #define TIF_NOTSC 16 /* TSC is not accessible in userland */ @@ -100,7 +99,6 @@ struct thread_info { #define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) -#define _TIF_MCE_NOTIFY (1 << TIF_MCE_NOTIFY) #define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY) #define _TIF_UPROBE (1 << TIF_UPROBE) #define _TIF_NOTSC (1 << TIF_NOTSC) @@ -140,7 +138,7 @@ struct thread_info { /* Only used for 64 bit */ #define _TIF_DO_NOTIFY_MASK \ - (_TIF_SIGPENDING | _TIF_MCE_NOTIFY | _TIF_NOTIFY_RESUME | \ + (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | \ _TIF_USER_RETURN_NOTIFY | _TIF_UPROBE) /* flags to check in __switch_to() */ @@ -170,6 +168,17 @@ static inline struct thread_info *current_thread_info(void) return ti; } +static inline unsigned long current_stack_pointer(void) +{ + unsigned long sp; +#ifdef CONFIG_X86_64 + asm("mov %%rsp,%0" : "=g" (sp)); +#else + asm("mov %%esp,%0" : "=g" (sp)); +#endif + return sp; +} + #else /* !__ASSEMBLY__ */ /* how to get the thread information struct from ASM */ diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 707adc6549d8..4e49d7dff78e 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -1,6 +1,7 @@ #ifndef _ASM_X86_TRAPS_H #define _ASM_X86_TRAPS_H +#include <linux/context_tracking_state.h> #include <linux/kprobes.h> #include <asm/debugreg.h> @@ -110,6 +111,11 @@ asmlinkage void smp_thermal_interrupt(void); asmlinkage void mce_threshold_interrupt(void); #endif +extern enum ctx_state ist_enter(struct pt_regs *regs); +extern void ist_exit(struct pt_regs *regs, enum ctx_state prev_state); +extern void ist_begin_non_atomic(struct pt_regs *regs); +extern void ist_end_non_atomic(void); + /* Interrupts/Exceptions */ enum { X86_TRAP_DE = 0, /* 0, Divide-by-zero */ diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h index c8aa65d56027..d979e5abae55 100644 --- a/arch/x86/include/uapi/asm/msr-index.h +++ b/arch/x86/include/uapi/asm/msr-index.h @@ -251,6 +251,10 @@ /* Fam 16h MSRs */ #define MSR_F16H_L2I_PERF_CTL 0xc0010230 #define MSR_F16H_L2I_PERF_CTR 0xc0010231 +#define MSR_F16H_DR1_ADDR_MASK 0xc0011019 +#define MSR_F16H_DR2_ADDR_MASK 0xc001101a +#define MSR_F16H_DR3_ADDR_MASK 0xc001101b +#define MSR_F16H_DR0_ADDR_MASK 0xc0011027 /* Fam 15h MSRs */ #define MSR_F15H_PERF_CTL 0xc0010200 diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index b9e30daa0881..a18fff361c7f 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -653,6 +653,7 @@ static int acpi_register_gsi_pic(struct device *dev, u32 gsi, return gsi; } +#ifdef CONFIG_X86_LOCAL_APIC static int acpi_register_gsi_ioapic(struct device *dev, u32 gsi, int trigger, int polarity) { @@ -675,6 +676,7 @@ static void acpi_unregister_gsi_ioapic(u32 gsi) mutex_unlock(&acpi_ioapic_lock); #endif } +#endif int (*__acpi_register_gsi)(struct device *dev, u32 gsi, int trigger, int polarity) = acpi_register_gsi_pic; diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c index b708738d016e..6a7c23ff21d3 100644 --- a/arch/x86/kernel/apb_timer.c +++ b/arch/x86/kernel/apb_timer.c @@ -135,14 +135,6 @@ static inline void apbt_clear_mapping(void) apbt_virt_address = NULL; } -/* - * APBT timer interrupt enable / disable - */ -static inline int is_apbt_capable(void) -{ - return apbt_virt_address ? 1 : 0; -} - static int __init apbt_clockevent_register(void) { struct sfi_timer_table_entry *mtmr; diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 29b5b18afa27..b665d241efad 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -134,9 +134,6 @@ static inline void imcr_apic_to_pic(void) */ static int force_enable_local_apic __initdata; -/* Control whether x2APIC mode is enabled or not */ -static bool nox2apic __initdata; - /* * APIC command line parameters */ @@ -161,33 +158,6 @@ static __init int setup_apicpmtimer(char *s) __setup("apicpmtimer", setup_apicpmtimer); #endif -int x2apic_mode; -#ifdef CONFIG_X86_X2APIC -/* x2apic enabled before OS handover */ -int x2apic_preenabled; -static int x2apic_disabled; -static int __init setup_nox2apic(char *str) -{ - if (x2apic_enabled()) { - int apicid = native_apic_msr_read(APIC_ID); - - if (apicid >= 255) { - pr_warning("Apicid: %08x, cannot enforce nox2apic\n", - apicid); - return 0; - } - - pr_warning("x2apic already enabled. will disable it\n"); - } else - setup_clear_cpu_cap(X86_FEATURE_X2APIC); - - nox2apic = true; - - return 0; -} -early_param("nox2apic", setup_nox2apic); -#endif - unsigned long mp_lapic_addr; int disable_apic; /* Disable local APIC timer from the kernel commandline or via dmi quirk */ @@ -1475,7 +1445,7 @@ void setup_local_APIC(void) #endif } -void end_local_APIC_setup(void) +static void end_local_APIC_setup(void) { lapic_setup_esr(); @@ -1492,116 +1462,184 @@ void end_local_APIC_setup(void) apic_pm_activate(); } -void __init bsp_end_local_APIC_setup(void) +/* + * APIC setup function for application processors. Called from smpboot.c + */ +void apic_ap_setup(void) { + setup_local_APIC(); end_local_APIC_setup(); - - /* - * Now that local APIC setup is completed for BP, configure the fault - * handling for interrupt remapping. - */ - irq_remap_enable_fault_handling(); - } #ifdef CONFIG_X86_X2APIC -/* - * Need to disable xapic and x2apic at the same time and then enable xapic mode - */ -static inline void __disable_x2apic(u64 msr) -{ - wrmsrl(MSR_IA32_APICBASE, - msr & ~(X2APIC_ENABLE | XAPIC_ENABLE)); - wrmsrl(MSR_IA32_APICBASE, msr & ~X2APIC_ENABLE); -} +int x2apic_mode; -static __init void disable_x2apic(void) +enum { + X2APIC_OFF, + X2APIC_ON, + X2APIC_DISABLED, +}; +static int x2apic_state; + +static inline void __x2apic_disable(void) { u64 msr; - if (!cpu_has_x2apic) + if (cpu_has_apic) return; rdmsrl(MSR_IA32_APICBASE, msr); - if (msr & X2APIC_ENABLE) { - u32 x2apic_id = read_apic_id(); - - if (x2apic_id >= 255) - panic("Cannot disable x2apic, id: %08x\n", x2apic_id); + if (!(msr & X2APIC_ENABLE)) + return; + /* Disable xapic and x2apic first and then reenable xapic mode */ + wrmsrl(MSR_IA32_APICBASE, msr & ~(X2APIC_ENABLE | XAPIC_ENABLE)); + wrmsrl(MSR_IA32_APICBASE, msr & ~X2APIC_ENABLE); + printk_once(KERN_INFO "x2apic disabled\n"); +} - pr_info("Disabling x2apic\n"); - __disable_x2apic(msr); +static inline void __x2apic_enable(void) +{ + u64 msr; - if (nox2apic) { - clear_cpu_cap(&cpu_data(0), X86_FEATURE_X2APIC); - setup_clear_cpu_cap(X86_FEATURE_X2APIC); - } + rdmsrl(MSR_IA32_APICBASE, msr); + if (msr & X2APIC_ENABLE) + return; + wrmsrl(MSR_IA32_APICBASE, msr | X2APIC_ENABLE); + printk_once(KERN_INFO "x2apic enabled\n"); +} - x2apic_disabled = 1; - x2apic_mode = 0; +static int __init setup_nox2apic(char *str) +{ + if (x2apic_enabled()) { + int apicid = native_apic_msr_read(APIC_ID); - register_lapic_address(mp_lapic_addr); + if (apicid >= 255) { + pr_warning("Apicid: %08x, cannot enforce nox2apic\n", + apicid); + return 0; + } + pr_warning("x2apic already enabled.\n"); + __x2apic_disable(); } + setup_clear_cpu_cap(X86_FEATURE_X2APIC); + x2apic_state = X2APIC_DISABLED; + x2apic_mode = 0; + return 0; } +early_param("nox2apic", setup_nox2apic); -void check_x2apic(void) +/* Called from cpu_init() to enable x2apic on (secondary) cpus */ +void x2apic_setup(void) { - if (x2apic_enabled()) { - pr_info("x2apic enabled by BIOS, switching to x2apic ops\n"); - x2apic_preenabled = x2apic_mode = 1; + /* + * If x2apic is not in ON state, disable it if already enabled + * from BIOS. + */ + if (x2apic_state != X2APIC_ON) { + __x2apic_disable(); + return; } + __x2apic_enable(); } -void enable_x2apic(void) +static __init void x2apic_disable(void) { - u64 msr; + u32 x2apic_id; - rdmsrl(MSR_IA32_APICBASE, msr); - if (x2apic_disabled) { - __disable_x2apic(msr); + if (x2apic_state != X2APIC_ON) + goto out; + + x2apic_id = read_apic_id(); + if (x2apic_id >= 255) + panic("Cannot disable x2apic, id: %08x\n", x2apic_id); + + __x2apic_disable(); + register_lapic_address(mp_lapic_addr); +out: + x2apic_state = X2APIC_DISABLED; + x2apic_mode = 0; +} + +static __init void x2apic_enable(void) +{ + if (x2apic_state != X2APIC_OFF) return; - } - if (!x2apic_mode) + x2apic_mode = 1; + x2apic_state = X2APIC_ON; + __x2apic_enable(); +} + +static __init void try_to_enable_x2apic(int remap_mode) +{ + if (x2apic_state == X2APIC_DISABLED) return; - if (!(msr & X2APIC_ENABLE)) { - printk_once(KERN_INFO "Enabling x2apic\n"); - wrmsrl(MSR_IA32_APICBASE, msr | X2APIC_ENABLE); + if (remap_mode != IRQ_REMAP_X2APIC_MODE) { + /* IR is required if there is APIC ID > 255 even when running + * under KVM + */ + if (max_physical_apicid > 255 || + (IS_ENABLED(CONFIG_HYPERVISOR_GUEST) && + !hypervisor_x2apic_available())) { + pr_info("x2apic: IRQ remapping doesn't support X2APIC mode\n"); + x2apic_disable(); + return; + } + + /* + * without IR all CPUs can be addressed by IOAPIC/MSI + * only in physical mode + */ + x2apic_phys = 1; } + x2apic_enable(); } -#endif /* CONFIG_X86_X2APIC */ -int __init enable_IR(void) +void __init check_x2apic(void) { -#ifdef CONFIG_IRQ_REMAP - if (!irq_remapping_supported()) { - pr_debug("intr-remapping not supported\n"); - return -1; + if (x2apic_enabled()) { + pr_info("x2apic: enabled by BIOS, switching to x2apic ops\n"); + x2apic_mode = 1; + x2apic_state = X2APIC_ON; + } else if (!cpu_has_x2apic) { + x2apic_state = X2APIC_DISABLED; } +} +#else /* CONFIG_X86_X2APIC */ +static int __init validate_x2apic(void) +{ + if (!apic_is_x2apic_enabled()) + return 0; + /* + * Checkme: Can we simply turn off x2apic here instead of panic? + */ + panic("BIOS has enabled x2apic but kernel doesn't support x2apic, please disable x2apic in BIOS.\n"); +} +early_initcall(validate_x2apic); - if (!x2apic_preenabled && skip_ioapic_setup) { - pr_info("Skipped enabling intr-remap because of skipping " - "io-apic setup\n"); +static inline void try_to_enable_x2apic(int remap_mode) { } +static inline void __x2apic_enable(void) { } +#endif /* !CONFIG_X86_X2APIC */ + +static int __init try_to_enable_IR(void) +{ +#ifdef CONFIG_X86_IO_APIC + if (!x2apic_enabled() && skip_ioapic_setup) { + pr_info("Not enabling interrupt remapping due to skipped IO-APIC setup\n"); return -1; } - - return irq_remapping_enable(); #endif - return -1; + return irq_remapping_enable(); } void __init enable_IR_x2apic(void) { unsigned long flags; - int ret, x2apic_enabled = 0; - int hardware_init_ret; - - /* Make sure irq_remap_ops are initialized */ - setup_irq_remapping_ops(); + int ret, ir_stat; - hardware_init_ret = irq_remapping_prepare(); - if (hardware_init_ret && !x2apic_supported()) + ir_stat = irq_remapping_prepare(); + if (ir_stat < 0 && !x2apic_supported()) return; ret = save_ioapic_entries(); @@ -1614,49 +1652,13 @@ void __init enable_IR_x2apic(void) legacy_pic->mask_all(); mask_ioapic_entries(); - if (x2apic_preenabled && nox2apic) - disable_x2apic(); - - if (hardware_init_ret) - ret = -1; - else - ret = enable_IR(); - - if (!x2apic_supported()) - goto skip_x2apic; + /* If irq_remapping_prepare() succeded, try to enable it */ + if (ir_stat >= 0) + ir_stat = try_to_enable_IR(); + /* ir_stat contains the remap mode or an error code */ + try_to_enable_x2apic(ir_stat); - if (ret < 0) { - /* IR is required if there is APIC ID > 255 even when running - * under KVM - */ - if (max_physical_apicid > 255 || - !hypervisor_x2apic_available()) { - if (x2apic_preenabled) - disable_x2apic(); - goto skip_x2apic; - } - /* - * without IR all CPUs can be addressed by IOAPIC/MSI - * only in physical mode - */ - x2apic_force_phys(); - } - - if (ret == IRQ_REMAP_XAPIC_MODE) { - pr_info("x2apic not enabled, IRQ remapping is in xapic mode\n"); - goto skip_x2apic; - } - - x2apic_enabled = 1; - - if (x2apic_supported() && !x2apic_mode) { - x2apic_mode = 1; - enable_x2apic(); - pr_info("Enabled x2apic\n"); - } - -skip_x2apic: - if (ret < 0) /* IR enabling failed */ + if (ir_stat < 0) restore_ioapic_entries(); legacy_pic->restore_mask(); local_irq_restore(flags); @@ -1847,82 +1849,8 @@ void __init register_lapic_address(unsigned long address) } } -/* - * This initializes the IO-APIC and APIC hardware if this is - * a UP kernel. - */ int apic_version[MAX_LOCAL_APIC]; -int __init APIC_init_uniprocessor(void) -{ - if (disable_apic) { - pr_info("Apic disabled\n"); - return -1; - } -#ifdef CONFIG_X86_64 - if (!cpu_has_apic) { - disable_apic = 1; - pr_info("Apic disabled by BIOS\n"); - return -1; - } -#else - if (!smp_found_config && !cpu_has_apic) - return -1; - - /* - * Complain if the BIOS pretends there is one. - */ - if (!cpu_has_apic && - APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { - pr_err("BIOS bug, local APIC 0x%x not detected!...\n", - boot_cpu_physical_apicid); - return -1; - } -#endif - - default_setup_apic_routing(); - - verify_local_APIC(); - connect_bsp_APIC(); - -#ifdef CONFIG_X86_64 - apic_write(APIC_ID, SET_APIC_ID(boot_cpu_physical_apicid)); -#else - /* - * Hack: In case of kdump, after a crash, kernel might be booting - * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid - * might be zero if read from MP tables. Get it from LAPIC. - */ -# ifdef CONFIG_CRASH_DUMP - boot_cpu_physical_apicid = read_apic_id(); -# endif -#endif - physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); - setup_local_APIC(); - -#ifdef CONFIG_X86_IO_APIC - /* - * Now enable IO-APICs, actually call clear_IO_APIC - * We need clear_IO_APIC before enabling error vector - */ - if (!skip_ioapic_setup && nr_ioapics) - enable_IO_APIC(); -#endif - - bsp_end_local_APIC_setup(); - -#ifdef CONFIG_X86_IO_APIC - if (smp_found_config && !skip_ioapic_setup && nr_ioapics) - setup_IO_APIC(); - else { - nr_ioapics = 0; - } -#endif - - x86_init.timers.setup_percpu_clockev(); - return 0; -} - /* * Local APIC interrupts */ @@ -2027,7 +1955,7 @@ __visible void smp_trace_error_interrupt(struct pt_regs *regs) /** * connect_bsp_APIC - attach the APIC to the interrupt system */ -void __init connect_bsp_APIC(void) +static void __init connect_bsp_APIC(void) { #ifdef CONFIG_X86_32 if (pic_mode) { @@ -2274,6 +2202,100 @@ void __init apic_set_eoi_write(void (*eoi_write)(u32 reg, u32 v)) } } +static void __init apic_bsp_up_setup(void) +{ +#ifdef CONFIG_X86_64 + apic_write(APIC_ID, SET_APIC_ID(boot_cpu_physical_apicid)); +#else + /* + * Hack: In case of kdump, after a crash, kernel might be booting + * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid + * might be zero if read from MP tables. Get it from LAPIC. + */ +# ifdef CONFIG_CRASH_DUMP + boot_cpu_physical_apicid = read_apic_id(); +# endif +#endif + physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); +} + +/** + * apic_bsp_setup - Setup function for local apic and io-apic + * @upmode: Force UP mode (for APIC_init_uniprocessor) + * + * Returns: + * apic_id of BSP APIC + */ +int __init apic_bsp_setup(bool upmode) +{ + int id; + + connect_bsp_APIC(); + if (upmode) + apic_bsp_up_setup(); + setup_local_APIC(); + + if (x2apic_mode) + id = apic_read(APIC_LDR); + else + id = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR)); + + enable_IO_APIC(); + end_local_APIC_setup(); + irq_remap_enable_fault_handling(); + setup_IO_APIC(); + /* Setup local timer */ + x86_init.timers.setup_percpu_clockev(); + return id; +} + +/* + * This initializes the IO-APIC and APIC hardware if this is + * a UP kernel. + */ +int __init APIC_init_uniprocessor(void) +{ + if (disable_apic) { + pr_info("Apic disabled\n"); + return -1; + } +#ifdef CONFIG_X86_64 + if (!cpu_has_apic) { + disable_apic = 1; + pr_info("Apic disabled by BIOS\n"); + return -1; + } +#else + if (!smp_found_config && !cpu_has_apic) + return -1; + + /* + * Complain if the BIOS pretends there is one. + */ + if (!cpu_has_apic && + APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { + pr_err("BIOS bug, local APIC 0x%x not detected!...\n", + boot_cpu_physical_apicid); + return -1; + } +#endif + + if (!smp_found_config) + disable_ioapic_support(); + + default_setup_apic_routing(); + verify_local_APIC(); + apic_bsp_setup(true); + return 0; +} + +#ifdef CONFIG_UP_LATE_INIT +void __init up_late_init(void) +{ + APIC_init_uniprocessor(); +} +#endif + /* * Power management */ @@ -2359,9 +2381,9 @@ static void lapic_resume(void) mask_ioapic_entries(); legacy_pic->mask_all(); - if (x2apic_mode) - enable_x2apic(); - else { + if (x2apic_mode) { + __x2apic_enable(); + } else { /* * Make sure the APICBASE points to the right address * diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 3f5f60406ab1..f4dc2462a1ac 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1507,7 +1507,10 @@ void __init enable_IO_APIC(void) int i8259_apic, i8259_pin; int apic, pin; - if (!nr_legacy_irqs()) + if (skip_ioapic_setup) + nr_ioapics = 0; + + if (!nr_legacy_irqs() || !nr_ioapics) return; for_each_ioapic_pin(apic, pin) { @@ -2295,7 +2298,7 @@ static inline void __init check_timer(void) } local_irq_disable(); apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n"); - if (x2apic_preenabled) + if (apic_is_x2apic_enabled()) apic_printk(APIC_QUIET, KERN_INFO "Perhaps problem with the pre-enabled x2apic mode\n" "Try booting with x2apic and interrupt-remapping disabled in the bios.\n"); @@ -2373,9 +2376,9 @@ void __init setup_IO_APIC(void) { int ioapic; - /* - * calling enable_IO_APIC() is moved to setup_local_APIC for BP - */ + if (skip_ioapic_setup || !nr_ioapics) + return; + io_apic_irqs = nr_legacy_irqs() ? ~PIC_IRQS : ~0UL; apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n"); diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 15c5df92f74e..a220239cea65 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -869,3 +869,22 @@ static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum) return false; } + +void set_dr_addr_mask(unsigned long mask, int dr) +{ + if (!cpu_has_bpext) + return; + + switch (dr) { + case 0: + wrmsr(MSR_F16H_DR0_ADDR_MASK, mask, 0); + break; + case 1: + case 2: + case 3: + wrmsr(MSR_F16H_DR1_ADDR_MASK - 1 + dr, mask, 0); + break; + default: + break; + } +} diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index c6049650c093..b15bffcaba6d 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -491,17 +491,18 @@ u16 __read_mostly tlb_lld_2m[NR_INFO]; u16 __read_mostly tlb_lld_4m[NR_INFO]; u16 __read_mostly tlb_lld_1g[NR_INFO]; -void cpu_detect_tlb(struct cpuinfo_x86 *c) +static void cpu_detect_tlb(struct cpuinfo_x86 *c) { if (this_cpu->c_detect_tlb) this_cpu->c_detect_tlb(c); - printk(KERN_INFO "Last level iTLB entries: 4KB %d, 2MB %d, 4MB %d\n" - "Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d, 1GB %d\n", + pr_info("Last level iTLB entries: 4KB %d, 2MB %d, 4MB %d\n", tlb_lli_4k[ENTRIES], tlb_lli_2m[ENTRIES], - tlb_lli_4m[ENTRIES], tlb_lld_4k[ENTRIES], - tlb_lld_2m[ENTRIES], tlb_lld_4m[ENTRIES], - tlb_lld_1g[ENTRIES]); + tlb_lli_4m[ENTRIES]); + + pr_info("Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d, 1GB %d\n", + tlb_lld_4k[ENTRIES], tlb_lld_2m[ENTRIES], + tlb_lld_4m[ENTRIES], tlb_lld_1g[ENTRIES]); } void detect_ht(struct cpuinfo_x86 *c) @@ -1332,7 +1333,7 @@ void cpu_init(void) barrier(); x86_configure_nx(); - enable_x2apic(); + x2apic_setup(); /* * set up and load the per-CPU TSS diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 9cc6b6f25f42..94d7dcb12145 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -487,10 +487,8 @@ static void init_intel(struct cpuinfo_x86 *c) rdmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb); if ((epb & 0xF) == ENERGY_PERF_BIAS_PERFORMANCE) { - printk_once(KERN_WARNING "ENERGY_PERF_BIAS:" - " Set to 'normal', was 'performance'\n" - "ENERGY_PERF_BIAS: View and update with" - " x86_energy_perf_policy(8)\n"); + pr_warn_once("ENERGY_PERF_BIAS: Set to 'normal', was 'performance'\n"); + pr_warn_once("ENERGY_PERF_BIAS: View and update with x86_energy_perf_policy(8)\n"); epb = (epb & ~0xF) | ENERGY_PERF_BIAS_NORMAL; wrmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb); } diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index d2c611699cd9..cdfed7953963 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -43,6 +43,7 @@ #include <linux/export.h> #include <asm/processor.h> +#include <asm/traps.h> #include <asm/mce.h> #include <asm/msr.h> @@ -115,7 +116,7 @@ static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs); * CPU/chipset specific EDAC code can register a notifier call here to print * MCE errors in a human-readable form. */ -ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain); +static ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain); /* Do initial initialization of a struct mce */ void mce_setup(struct mce *m) @@ -311,7 +312,7 @@ static void wait_for_panic(void) panic("Panicing machine check CPU died"); } -static void mce_panic(char *msg, struct mce *final, char *exp) +static void mce_panic(const char *msg, struct mce *final, char *exp) { int i, apei_err = 0; @@ -529,7 +530,7 @@ static void mce_schedule_work(void) schedule_work(this_cpu_ptr(&mce_work)); } -DEFINE_PER_CPU(struct irq_work, mce_irq_work); +static DEFINE_PER_CPU(struct irq_work, mce_irq_work); static void mce_irq_work_cb(struct irq_work *entry) { @@ -735,7 +736,7 @@ static atomic_t mce_callin; /* * Check if a timeout waiting for other CPUs happened. */ -static int mce_timed_out(u64 *t) +static int mce_timed_out(u64 *t, const char *msg) { /* * The others already did panic for some reason. @@ -750,8 +751,7 @@ static int mce_timed_out(u64 *t) goto out; if ((s64)*t < SPINUNIT) { if (mca_cfg.tolerant <= 1) - mce_panic("Timeout synchronizing machine check over CPUs", - NULL, NULL); + mce_panic(msg, NULL, NULL); cpu_missing = 1; return 1; } @@ -867,7 +867,8 @@ static int mce_start(int *no_way_out) * Wait for everyone. */ while (atomic_read(&mce_callin) != cpus) { - if (mce_timed_out(&timeout)) { + if (mce_timed_out(&timeout, + "Timeout: Not all CPUs entered broadcast exception handler")) { atomic_set(&global_nwo, 0); return -1; } @@ -892,7 +893,8 @@ static int mce_start(int *no_way_out) * only seen by one CPU before cleared, avoiding duplicates. */ while (atomic_read(&mce_executing) < order) { - if (mce_timed_out(&timeout)) { + if (mce_timed_out(&timeout, + "Timeout: Subject CPUs unable to finish machine check processing")) { atomic_set(&global_nwo, 0); return -1; } @@ -936,7 +938,8 @@ static int mce_end(int order) * loops. */ while (atomic_read(&mce_executing) <= cpus) { - if (mce_timed_out(&timeout)) + if (mce_timed_out(&timeout, + "Timeout: Monarch CPU unable to finish machine check processing")) goto reset; ndelay(SPINUNIT); } @@ -949,7 +952,8 @@ static int mce_end(int order) * Subject: Wait for Monarch to finish. */ while (atomic_read(&mce_executing) != 0) { - if (mce_timed_out(&timeout)) + if (mce_timed_out(&timeout, + "Timeout: Monarch CPU did not finish machine check processing")) goto reset; ndelay(SPINUNIT); } @@ -1003,51 +1007,6 @@ static void mce_clear_state(unsigned long *toclear) } /* - * Need to save faulting physical address associated with a process - * in the machine check handler some place where we can grab it back - * later in mce_notify_process() - */ -#define MCE_INFO_MAX 16 - -struct mce_info { - atomic_t inuse; - struct task_struct *t; - __u64 paddr; - int restartable; -} mce_info[MCE_INFO_MAX]; - -static void mce_save_info(__u64 addr, int c) -{ - struct mce_info *mi; - - for (mi = mce_info; mi < &mce_info[MCE_INFO_MAX]; mi++) { - if (atomic_cmpxchg(&mi->inuse, 0, 1) == 0) { - mi->t = current; - mi->paddr = addr; - mi->restartable = c; - return; - } - } - - mce_panic("Too many concurrent recoverable errors", NULL, NULL); -} - -static struct mce_info *mce_find_info(void) -{ - struct mce_info *mi; - - for (mi = mce_info; mi < &mce_info[MCE_INFO_MAX]; mi++) - if (atomic_read(&mi->inuse) && mi->t == current) - return mi; - return NULL; -} - -static void mce_clear_info(struct mce_info *mi) -{ - atomic_set(&mi->inuse, 0); -} - -/* * The actual machine check handler. This only handles real * exceptions when something got corrupted coming in through int 18. * @@ -1063,6 +1022,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) { struct mca_config *cfg = &mca_cfg; struct mce m, *final; + enum ctx_state prev_state; int i; int worst = 0; int severity; @@ -1084,6 +1044,10 @@ void do_machine_check(struct pt_regs *regs, long error_code) DECLARE_BITMAP(toclear, MAX_NR_BANKS); DECLARE_BITMAP(valid_banks, MAX_NR_BANKS); char *msg = "Unknown"; + u64 recover_paddr = ~0ull; + int flags = MF_ACTION_REQUIRED; + + prev_state = ist_enter(regs); this_cpu_inc(mce_exception_count); @@ -1203,9 +1167,9 @@ void do_machine_check(struct pt_regs *regs, long error_code) if (no_way_out) mce_panic("Fatal machine check on current CPU", &m, msg); if (worst == MCE_AR_SEVERITY) { - /* schedule action before return to userland */ - mce_save_info(m.addr, m.mcgstatus & MCG_STATUS_RIPV); - set_thread_flag(TIF_MCE_NOTIFY); + recover_paddr = m.addr; + if (!(m.mcgstatus & MCG_STATUS_RIPV)) + flags |= MF_MUST_KILL; } else if (kill_it) { force_sig(SIGBUS, current); } @@ -1216,6 +1180,27 @@ void do_machine_check(struct pt_regs *regs, long error_code) mce_wrmsrl(MSR_IA32_MCG_STATUS, 0); out: sync_core(); + + if (recover_paddr == ~0ull) + goto done; + + pr_err("Uncorrected hardware memory error in user-access at %llx", + recover_paddr); + /* + * We must call memory_failure() here even if the current process is + * doomed. We still need to mark the page as poisoned and alert any + * other users of the page. + */ + ist_begin_non_atomic(regs); + local_irq_enable(); + if (memory_failure(recover_paddr >> PAGE_SHIFT, MCE_VECTOR, flags) < 0) { + pr_err("Memory error not recovered"); + force_sig(SIGBUS, current); + } + local_irq_disable(); + ist_end_non_atomic(); +done: + ist_exit(regs, prev_state); } EXPORT_SYMBOL_GPL(do_machine_check); @@ -1233,42 +1218,6 @@ int memory_failure(unsigned long pfn, int vector, int flags) #endif /* - * Called in process context that interrupted by MCE and marked with - * TIF_MCE_NOTIFY, just before returning to erroneous userland. - * This code is allowed to sleep. - * Attempt possible recovery such as calling the high level VM handler to - * process any corrupted pages, and kill/signal current process if required. - * Action required errors are handled here. - */ -void mce_notify_process(void) -{ - unsigned long pfn; - struct mce_info *mi = mce_find_info(); - int flags = MF_ACTION_REQUIRED; - - if (!mi) - mce_panic("Lost physical address for unconsumed uncorrectable error", NULL, NULL); - pfn = mi->paddr >> PAGE_SHIFT; - - clear_thread_flag(TIF_MCE_NOTIFY); - - pr_err("Uncorrected hardware memory error in user-access at %llx", - mi->paddr); - /* - * We must call memory_failure() here even if the current process is - * doomed. We still need to mark the page as poisoned and alert any - * other users of the page. - */ - if (!mi->restartable) - flags |= MF_MUST_KILL; - if (memory_failure(pfn, MCE_VECTOR, flags) < 0) { - pr_err("Memory error not recovered"); - force_sig(SIGBUS, current); - } - mce_clear_info(mi); -} - -/* * Action optional processing happens here (picking up * from the list of faulting pages that do_machine_check() * placed into the "ring"). diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c index a3042989398c..ec2663a708e4 100644 --- a/arch/x86/kernel/cpu/mcheck/p5.c +++ b/arch/x86/kernel/cpu/mcheck/p5.c @@ -8,6 +8,7 @@ #include <linux/smp.h> #include <asm/processor.h> +#include <asm/traps.h> #include <asm/mce.h> #include <asm/msr.h> @@ -17,8 +18,11 @@ int mce_p5_enabled __read_mostly; /* Machine check handler for Pentium class Intel CPUs: */ static void pentium_machine_check(struct pt_regs *regs, long error_code) { + enum ctx_state prev_state; u32 loaddr, hi, lotype; + prev_state = ist_enter(regs); + rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi); rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi); @@ -33,6 +37,8 @@ static void pentium_machine_check(struct pt_regs *regs, long error_code) } add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); + + ist_exit(regs, prev_state); } /* Set up machine check reporting for processors with Intel style MCE: */ diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c index 7dc5564d0cdf..bd5d46a32210 100644 --- a/arch/x86/kernel/cpu/mcheck/winchip.c +++ b/arch/x86/kernel/cpu/mcheck/winchip.c @@ -7,14 +7,19 @@ #include <linux/types.h> #include <asm/processor.h> +#include <asm/traps.h> #include <asm/mce.h> #include <asm/msr.h> /* Machine check handler for WinChip C6: */ static void winchip_machine_check(struct pt_regs *regs, long error_code) { + enum ctx_state prev_state = ist_enter(regs); + printk(KERN_EMERG "CPU0: Machine Check Exception.\n"); add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); + + ist_exit(regs, prev_state); } /* Set up machine check reporting on the Winchip C6 series */ diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 15c29096136b..36a83617eb21 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -552,7 +552,7 @@ static int __init microcode_init(void) int error; if (paravirt_enabled() || dis_ucode_ldr) - return 0; + return -EINVAL; if (c->x86_vendor == X86_VENDOR_INTEL) microcode_ops = init_intel_microcode(); diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 944bf019b74f..498b6d967138 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -2431,6 +2431,7 @@ __init int intel_pmu_init(void) break; case 55: /* 22nm Atom "Silvermont" */ + case 76: /* 14nm Atom "Airmont" */ case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */ memcpy(hw_cache_event_ids, slm_hw_cache_event_ids, sizeof(hw_cache_event_ids)); diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c b/arch/x86/kernel/cpu/perf_event_intel_rapl.c index 6e434f8e5fc8..c4bb8b8e5017 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_rapl.c +++ b/arch/x86/kernel/cpu/perf_event_intel_rapl.c @@ -142,7 +142,7 @@ static inline u64 rapl_scale(u64 v) * or use ldexp(count, -32). * Watts = Joules/Time delta */ - return v << (32 - __this_cpu_read(rapl_pmu->hw_unit)); + return v << (32 - __this_cpu_read(rapl_pmu)->hw_unit); } static u64 rapl_event_update(struct perf_event *event) diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index 10b8d3eaaf15..c635b8b49e93 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -840,7 +840,6 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id box->phys_id = phys_id; box->pci_dev = pdev; box->pmu = pmu; - uncore_box_init(box); pci_set_drvdata(pdev, box); raw_spin_lock(&uncore_box_lock); @@ -1004,10 +1003,8 @@ static int uncore_cpu_starting(int cpu) pmu = &type->pmus[j]; box = *per_cpu_ptr(pmu->box, cpu); /* called by uncore_cpu_init? */ - if (box && box->phys_id >= 0) { - uncore_box_init(box); + if (box && box->phys_id >= 0) continue; - } for_each_online_cpu(k) { exist = *per_cpu_ptr(pmu->box, k); @@ -1023,10 +1020,8 @@ static int uncore_cpu_starting(int cpu) } } - if (box) { + if (box) box->phys_id = phys_id; - uncore_box_init(box); - } } } return 0; diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h index 863d9b02563e..6c8c1e7e69d8 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h @@ -257,6 +257,14 @@ static inline int uncore_num_counters(struct intel_uncore_box *box) return box->pmu->type->num_counters; } +static inline void uncore_box_init(struct intel_uncore_box *box) +{ + if (!test_and_set_bit(UNCORE_BOX_FLAG_INITIATED, &box->flags)) { + if (box->pmu->type->ops->init_box) + box->pmu->type->ops->init_box(box); + } +} + static inline void uncore_disable_box(struct intel_uncore_box *box) { if (box->pmu->type->ops->disable_box) @@ -265,6 +273,8 @@ static inline void uncore_disable_box(struct intel_uncore_box *box) static inline void uncore_enable_box(struct intel_uncore_box *box) { + uncore_box_init(box); + if (box->pmu->type->ops->enable_box) box->pmu->type->ops->enable_box(box); } @@ -287,14 +297,6 @@ static inline u64 uncore_read_counter(struct intel_uncore_box *box, return box->pmu->type->ops->read_counter(box, event); } -static inline void uncore_box_init(struct intel_uncore_box *box) -{ - if (!test_and_set_bit(UNCORE_BOX_FLAG_INITIATED, &box->flags)) { - if (box->pmu->type->ops->init_box) - box->pmu->type->ops->init_box(box); - } -} - static inline bool uncore_box_is_fake(struct intel_uncore_box *box) { return (box->phys_id < 0); diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index dd2f07ae9d0c..46201deee923 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -184,9 +184,9 @@ void __init e820_print_map(char *who) * overwritten in the same location, starting at biosmap. * * The integer pointed to by pnr_map must be valid on entry (the - * current number of valid entries located at biosmap) and will - * be updated on return, with the new number of valid entries - * (something no more than max_nr_map.) + * current number of valid entries located at biosmap). If the + * sanitizing succeeds the *pnr_map will be updated with the new + * number of valid entries (something no more than max_nr_map). * * The return value from sanitize_e820_map() is zero if it * successfully 'sanitized' the map entries passed in, and is -1 @@ -561,23 +561,15 @@ u64 __init e820_remove_range(u64 start, u64 size, unsigned old_type, void __init update_e820(void) { - u32 nr_map; - - nr_map = e820.nr_map; - if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr_map)) + if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map)) return; - e820.nr_map = nr_map; printk(KERN_INFO "e820: modified physical RAM map:\n"); e820_print_map("modified"); } static void __init update_e820_saved(void) { - u32 nr_map; - - nr_map = e820_saved.nr_map; - if (sanitize_e820_map(e820_saved.map, ARRAY_SIZE(e820_saved.map), &nr_map)) - return; - e820_saved.nr_map = nr_map; + sanitize_e820_map(e820_saved.map, ARRAY_SIZE(e820_saved.map), + &e820_saved.nr_map); } #define MAX_GAP_END 0x100000000ull /* @@ -898,11 +890,9 @@ early_param("memmap", parse_memmap_opt); void __init finish_e820_parsing(void) { if (userdef) { - u32 nr = e820.nr_map; - - if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr) < 0) + if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), + &e820.nr_map) < 0) early_panic("Invalid user supplied memory map"); - e820.nr_map = nr; printk(KERN_INFO "e820: user-defined physical RAM map:\n"); e820_print_map("user"); diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 9ebaf63ba182..db13655c3a2a 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -143,7 +143,8 @@ ENDPROC(native_usergs_sysret64) movq \tmp,RSP+\offset(%rsp) movq $__USER_DS,SS+\offset(%rsp) movq $__USER_CS,CS+\offset(%rsp) - movq $-1,RCX+\offset(%rsp) + movq RIP+\offset(%rsp),\tmp /* get rip */ + movq \tmp,RCX+\offset(%rsp) /* copy it to rcx as sysret would do */ movq R11+\offset(%rsp),\tmp /* get eflags */ movq \tmp,EFLAGS+\offset(%rsp) .endm @@ -155,27 +156,6 @@ ENDPROC(native_usergs_sysret64) movq \tmp,R11+\offset(%rsp) .endm - .macro FAKE_STACK_FRAME child_rip - /* push in order ss, rsp, eflags, cs, rip */ - xorl %eax, %eax - pushq_cfi $__KERNEL_DS /* ss */ - /*CFI_REL_OFFSET ss,0*/ - pushq_cfi %rax /* rsp */ - CFI_REL_OFFSET rsp,0 - pushq_cfi $(X86_EFLAGS_IF|X86_EFLAGS_FIXED) /* eflags - interrupts on */ - /*CFI_REL_OFFSET rflags,0*/ - pushq_cfi $__KERNEL_CS /* cs */ - /*CFI_REL_OFFSET cs,0*/ - pushq_cfi \child_rip /* rip */ - CFI_REL_OFFSET rip,0 - pushq_cfi %rax /* orig rax */ - .endm - - .macro UNFAKE_STACK_FRAME - addq $8*6, %rsp - CFI_ADJUST_CFA_OFFSET -(6*8) - .endm - /* * initial frame state for interrupts (and exceptions without error code) */ @@ -238,51 +218,6 @@ ENDPROC(native_usergs_sysret64) CFI_REL_OFFSET r15, R15+\offset .endm -/* save partial stack frame */ - .macro SAVE_ARGS_IRQ - cld - /* start from rbp in pt_regs and jump over */ - movq_cfi rdi, (RDI-RBP) - movq_cfi rsi, (RSI-RBP) - movq_cfi rdx, (RDX-RBP) - movq_cfi rcx, (RCX-RBP) - movq_cfi rax, (RAX-RBP) - movq_cfi r8, (R8-RBP) - movq_cfi r9, (R9-RBP) - movq_cfi r10, (R10-RBP) - movq_cfi r11, (R11-RBP) - - /* Save rbp so that we can unwind from get_irq_regs() */ - movq_cfi rbp, 0 - - /* Save previous stack value */ - movq %rsp, %rsi - - leaq -RBP(%rsp),%rdi /* arg1 for handler */ - testl $3, CS-RBP(%rsi) - je 1f - SWAPGS - /* - * irq_count is used to check if a CPU is already on an interrupt stack - * or not. While this is essentially redundant with preempt_count it is - * a little cheaper to use a separate counter in the PDA (short of - * moving irq_enter into assembly, which would be too much work) - */ -1: incl PER_CPU_VAR(irq_count) - cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp - CFI_DEF_CFA_REGISTER rsi - - /* Store previous stack value */ - pushq %rsi - CFI_ESCAPE 0x0f /* DW_CFA_def_cfa_expression */, 6, \ - 0x77 /* DW_OP_breg7 */, 0, \ - 0x06 /* DW_OP_deref */, \ - 0x08 /* DW_OP_const1u */, SS+8-RBP, \ - 0x22 /* DW_OP_plus */ - /* We entered an interrupt context - irqs are off: */ - TRACE_IRQS_OFF - .endm - ENTRY(save_paranoid) XCPT_FRAME 1 RDI+8 cld @@ -426,15 +361,12 @@ system_call_fastpath: * Has incomplete stack frame and undefined top of stack. */ ret_from_sys_call: - movl $_TIF_ALLWORK_MASK,%edi - /* edi: flagmask */ -sysret_check: + testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) + jnz int_ret_from_sys_call_fixup /* Go the the slow path */ + LOCKDEP_SYS_EXIT DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF - movl TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET),%edx - andl %edi,%edx - jnz sysret_careful CFI_REMEMBER_STATE /* * sysretq will re-enable interrupts: @@ -448,49 +380,10 @@ sysret_check: USERGS_SYSRET64 CFI_RESTORE_STATE - /* Handle reschedules */ - /* edx: work, edi: workmask */ -sysret_careful: - bt $TIF_NEED_RESCHED,%edx - jnc sysret_signal - TRACE_IRQS_ON - ENABLE_INTERRUPTS(CLBR_NONE) - pushq_cfi %rdi - SCHEDULE_USER - popq_cfi %rdi - jmp sysret_check - /* Handle a signal */ -sysret_signal: - TRACE_IRQS_ON - ENABLE_INTERRUPTS(CLBR_NONE) -#ifdef CONFIG_AUDITSYSCALL - bt $TIF_SYSCALL_AUDIT,%edx - jc sysret_audit -#endif - /* - * We have a signal, or exit tracing or single-step. - * These all wind up with the iret return path anyway, - * so just join that path right now. - */ +int_ret_from_sys_call_fixup: FIXUP_TOP_OF_STACK %r11, -ARGOFFSET - jmp int_check_syscall_exit_work - -#ifdef CONFIG_AUDITSYSCALL - /* - * Return fast path for syscall audit. Call __audit_syscall_exit() - * directly and then jump back to the fast path with TIF_SYSCALL_AUDIT - * masked off. - */ -sysret_audit: - movq RAX-ARGOFFSET(%rsp),%rsi /* second arg, syscall return value */ - cmpq $-MAX_ERRNO,%rsi /* is it < -MAX_ERRNO? */ - setbe %al /* 1 if so, 0 if not */ - movzbl %al,%edi /* zero-extend that into %edi */ - call __audit_syscall_exit - movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi - jmp sysret_check -#endif /* CONFIG_AUDITSYSCALL */ + jmp int_ret_from_sys_call /* Do syscall tracing */ tracesys: @@ -626,19 +519,6 @@ END(\label) FORK_LIKE vfork FIXED_FRAME stub_iopl, sys_iopl -ENTRY(ptregscall_common) - DEFAULT_FRAME 1 8 /* offset 8: return address */ - RESTORE_TOP_OF_STACK %r11, 8 - movq_cfi_restore R15+8, r15 - movq_cfi_restore R14+8, r14 - movq_cfi_restore R13+8, r13 - movq_cfi_restore R12+8, r12 - movq_cfi_restore RBP+8, rbp - movq_cfi_restore RBX+8, rbx - ret $REST_SKIP /* pop extended registers */ - CFI_ENDPROC -END(ptregscall_common) - ENTRY(stub_execve) CFI_STARTPROC addq $8, %rsp @@ -779,7 +659,48 @@ END(interrupt) /* reserve pt_regs for scratch regs and rbp */ subq $ORIG_RAX-RBP, %rsp CFI_ADJUST_CFA_OFFSET ORIG_RAX-RBP - SAVE_ARGS_IRQ + cld + /* start from rbp in pt_regs and jump over */ + movq_cfi rdi, (RDI-RBP) + movq_cfi rsi, (RSI-RBP) + movq_cfi rdx, (RDX-RBP) + movq_cfi rcx, (RCX-RBP) + movq_cfi rax, (RAX-RBP) + movq_cfi r8, (R8-RBP) + movq_cfi r9, (R9-RBP) + movq_cfi r10, (R10-RBP) + movq_cfi r11, (R11-RBP) + + /* Save rbp so that we can unwind from get_irq_regs() */ + movq_cfi rbp, 0 + + /* Save previous stack value */ + movq %rsp, %rsi + + leaq -RBP(%rsp),%rdi /* arg1 for handler */ + testl $3, CS-RBP(%rsi) + je 1f + SWAPGS + /* + * irq_count is used to check if a CPU is already on an interrupt stack + * or not. While this is essentially redundant with preempt_count it is + * a little cheaper to use a separate counter in the PDA (short of + * moving irq_enter into assembly, which would be too much work) + */ +1: incl PER_CPU_VAR(irq_count) + cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp + CFI_DEF_CFA_REGISTER rsi + + /* Store previous stack value */ + pushq %rsi + CFI_ESCAPE 0x0f /* DW_CFA_def_cfa_expression */, 6, \ + 0x77 /* DW_OP_breg7 */, 0, \ + 0x06 /* DW_OP_deref */, \ + 0x08 /* DW_OP_const1u */, SS+8-RBP, \ + 0x22 /* DW_OP_plus */ + /* We entered an interrupt context - irqs are off: */ + TRACE_IRQS_OFF + call \func .endm @@ -831,6 +752,60 @@ retint_swapgs: /* return to user-space */ */ DISABLE_INTERRUPTS(CLBR_ANY) TRACE_IRQS_IRETQ + + /* + * Try to use SYSRET instead of IRET if we're returning to + * a completely clean 64-bit userspace context. + */ + movq (RCX-R11)(%rsp), %rcx + cmpq %rcx,(RIP-R11)(%rsp) /* RCX == RIP */ + jne opportunistic_sysret_failed + + /* + * On Intel CPUs, sysret with non-canonical RCX/RIP will #GP + * in kernel space. This essentially lets the user take over + * the kernel, since userspace controls RSP. It's not worth + * testing for canonicalness exactly -- this check detects any + * of the 17 high bits set, which is true for non-canonical + * or kernel addresses. (This will pessimize vsyscall=native. + * Big deal.) + * + * If virtual addresses ever become wider, this will need + * to be updated to remain correct on both old and new CPUs. + */ + .ifne __VIRTUAL_MASK_SHIFT - 47 + .error "virtual address width changed -- sysret checks need update" + .endif + shr $__VIRTUAL_MASK_SHIFT, %rcx + jnz opportunistic_sysret_failed + + cmpq $__USER_CS,(CS-R11)(%rsp) /* CS must match SYSRET */ + jne opportunistic_sysret_failed + + movq (R11-ARGOFFSET)(%rsp), %r11 + cmpq %r11,(EFLAGS-ARGOFFSET)(%rsp) /* R11 == RFLAGS */ + jne opportunistic_sysret_failed + + testq $X86_EFLAGS_RF,%r11 /* sysret can't restore RF */ + jnz opportunistic_sysret_failed + + /* nothing to check for RSP */ + + cmpq $__USER_DS,(SS-ARGOFFSET)(%rsp) /* SS must match SYSRET */ + jne opportunistic_sysret_failed + + /* + * We win! This label is here just for ease of understanding + * perf profiles. Nothing jumps here. + */ +irq_return_via_sysret: + CFI_REMEMBER_STATE + RESTORE_ARGS 1,8,1 + movq (RSP-RIP)(%rsp),%rsp + USERGS_SYSRET64 + CFI_RESTORE_STATE + +opportunistic_sysret_failed: SWAPGS jmp restore_args @@ -1048,6 +1023,11 @@ ENTRY(\sym) CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 .if \paranoid + .if \paranoid == 1 + CFI_REMEMBER_STATE + testl $3, CS(%rsp) /* If coming from userspace, switch */ + jnz 1f /* stacks. */ + .endif call save_paranoid .else call error_entry @@ -1088,6 +1068,36 @@ ENTRY(\sym) jmp error_exit /* %ebx: no swapgs flag */ .endif + .if \paranoid == 1 + CFI_RESTORE_STATE + /* + * Paranoid entry from userspace. Switch stacks and treat it + * as a normal entry. This means that paranoid handlers + * run in real process context if user_mode(regs). + */ +1: + call error_entry + + DEFAULT_FRAME 0 + + movq %rsp,%rdi /* pt_regs pointer */ + call sync_regs + movq %rax,%rsp /* switch stack */ + + movq %rsp,%rdi /* pt_regs pointer */ + + .if \has_error_code + movq ORIG_RAX(%rsp),%rsi /* get error code */ + movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */ + .else + xorl %esi,%esi /* no error code */ + .endif + + call \do_sym + + jmp error_exit /* %ebx: no swapgs flag */ + .endif + CFI_ENDPROC END(\sym) .endm @@ -1108,7 +1118,7 @@ idtentry overflow do_overflow has_error_code=0 idtentry bounds do_bounds has_error_code=0 idtentry invalid_op do_invalid_op has_error_code=0 idtentry device_not_available do_device_not_available has_error_code=0 -idtentry double_fault do_double_fault has_error_code=1 paranoid=1 +idtentry double_fault do_double_fault has_error_code=1 paranoid=2 idtentry coprocessor_segment_overrun do_coprocessor_segment_overrun has_error_code=0 idtentry invalid_TSS do_invalid_TSS has_error_code=1 idtentry segment_not_present do_segment_not_present has_error_code=1 @@ -1289,16 +1299,14 @@ idtentry machine_check has_error_code=0 paranoid=1 do_sym=*machine_check_vector( #endif /* - * "Paranoid" exit path from exception stack. - * Paranoid because this is used by NMIs and cannot take - * any kernel state for granted. - * We don't do kernel preemption checks here, because only - * NMI should be common and it does not enable IRQs and - * cannot get reschedule ticks. + * "Paranoid" exit path from exception stack. This is invoked + * only on return from non-NMI IST interrupts that came + * from kernel space. * - * "trace" is 0 for the NMI handler only, because irq-tracing - * is fundamentally NMI-unsafe. (we cannot change the soft and - * hard flags at once, atomically) + * We may be returning to very strange contexts (e.g. very early + * in syscall entry), so checking for preemption here would + * be complicated. Fortunately, we there's no good reason + * to try to handle preemption here. */ /* ebx: no swapgs flag */ @@ -1308,43 +1316,14 @@ ENTRY(paranoid_exit) TRACE_IRQS_OFF_DEBUG testl %ebx,%ebx /* swapgs needed? */ jnz paranoid_restore - testl $3,CS(%rsp) - jnz paranoid_userspace -paranoid_swapgs: TRACE_IRQS_IRETQ 0 SWAPGS_UNSAFE_STACK RESTORE_ALL 8 - jmp irq_return + INTERRUPT_RETURN paranoid_restore: TRACE_IRQS_IRETQ_DEBUG 0 RESTORE_ALL 8 - jmp irq_return -paranoid_userspace: - GET_THREAD_INFO(%rcx) - movl TI_flags(%rcx),%ebx - andl $_TIF_WORK_MASK,%ebx - jz paranoid_swapgs - movq %rsp,%rdi /* &pt_regs */ - call sync_regs - movq %rax,%rsp /* switch stack for scheduling */ - testl $_TIF_NEED_RESCHED,%ebx - jnz paranoid_schedule - movl %ebx,%edx /* arg3: thread flags */ - TRACE_IRQS_ON - ENABLE_INTERRUPTS(CLBR_NONE) - xorl %esi,%esi /* arg2: oldset */ - movq %rsp,%rdi /* arg1: &pt_regs */ - call do_notify_resume - DISABLE_INTERRUPTS(CLBR_NONE) - TRACE_IRQS_OFF - jmp paranoid_userspace -paranoid_schedule: - TRACE_IRQS_ON - ENABLE_INTERRUPTS(CLBR_ANY) - SCHEDULE_USER - DISABLE_INTERRUPTS(CLBR_ANY) - TRACE_IRQS_OFF - jmp paranoid_userspace + INTERRUPT_RETURN CFI_ENDPROC END(paranoid_exit) diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index 3d5fb509bdeb..7114ba220fd4 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -126,6 +126,8 @@ int arch_install_hw_breakpoint(struct perf_event *bp) *dr7 |= encode_dr7(i, info->len, info->type); set_debugreg(*dr7, 7); + if (info->mask) + set_dr_addr_mask(info->mask, i); return 0; } @@ -161,29 +163,8 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp) *dr7 &= ~__encode_dr7(i, info->len, info->type); set_debugreg(*dr7, 7); -} - -static int get_hbp_len(u8 hbp_len) -{ - unsigned int len_in_bytes = 0; - - switch (hbp_len) { - case X86_BREAKPOINT_LEN_1: - len_in_bytes = 1; - break; - case X86_BREAKPOINT_LEN_2: - len_in_bytes = 2; - break; - case X86_BREAKPOINT_LEN_4: - len_in_bytes = 4; - break; -#ifdef CONFIG_X86_64 - case X86_BREAKPOINT_LEN_8: - len_in_bytes = 8; - break; -#endif - } - return len_in_bytes; + if (info->mask) + set_dr_addr_mask(0, i); } /* @@ -196,7 +177,7 @@ int arch_check_bp_in_kernelspace(struct perf_event *bp) struct arch_hw_breakpoint *info = counter_arch_bp(bp); va = info->address; - len = get_hbp_len(info->len); + len = bp->attr.bp_len; return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE); } @@ -277,6 +258,8 @@ static int arch_build_bp_info(struct perf_event *bp) } /* Len */ + info->mask = 0; + switch (bp->attr.bp_len) { case HW_BREAKPOINT_LEN_1: info->len = X86_BREAKPOINT_LEN_1; @@ -293,11 +276,17 @@ static int arch_build_bp_info(struct perf_event *bp) break; #endif default: - return -EINVAL; + if (!is_power_of_2(bp->attr.bp_len)) + return -EINVAL; + if (!cpu_has_bpext) + return -EOPNOTSUPP; + info->mask = bp->attr.bp_len - 1; + info->len = X86_BREAKPOINT_LEN_1; } return 0; } + /* * Validate the arch-specific HW Breakpoint register settings */ @@ -312,11 +301,11 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) if (ret) return ret; - ret = -EINVAL; - switch (info->len) { case X86_BREAKPOINT_LEN_1: align = 0; + if (info->mask) + align = info->mask; break; case X86_BREAKPOINT_LEN_2: align = 1; @@ -330,7 +319,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) break; #endif default: - return ret; + WARN_ON_ONCE(1); } /* diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index a9a4229f6161..81049ffab2d6 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -19,6 +19,19 @@ #include <asm/fpu-internal.h> #include <asm/user.h> +static DEFINE_PER_CPU(bool, in_kernel_fpu); + +void kernel_fpu_disable(void) +{ + WARN_ON(this_cpu_read(in_kernel_fpu)); + this_cpu_write(in_kernel_fpu, true); +} + +void kernel_fpu_enable(void) +{ + this_cpu_write(in_kernel_fpu, false); +} + /* * Were we in an interrupt that interrupted kernel mode? * @@ -33,6 +46,9 @@ */ static inline bool interrupted_kernel_fpu_idle(void) { + if (this_cpu_read(in_kernel_fpu)) + return false; + if (use_eager_fpu()) return __thread_has_fpu(current); @@ -73,10 +89,10 @@ void __kernel_fpu_begin(void) { struct task_struct *me = current; + this_cpu_write(in_kernel_fpu, true); + if (__thread_has_fpu(me)) { - __thread_clear_has_fpu(me); __save_init_fpu(me); - /* We do 'stts()' in __kernel_fpu_end() */ } else if (!use_eager_fpu()) { this_cpu_write(fpu_owner_task, NULL); clts(); @@ -86,19 +102,16 @@ EXPORT_SYMBOL(__kernel_fpu_begin); void __kernel_fpu_end(void) { - if (use_eager_fpu()) { - /* - * For eager fpu, most the time, tsk_used_math() is true. - * Restore the user math as we are done with the kernel usage. - * At few instances during thread exit, signal handling etc, - * tsk_used_math() is false. Those few places will take proper - * actions, so we don't need to restore the math here. - */ - if (likely(tsk_used_math(current))) - math_state_restore(); - } else { + struct task_struct *me = current; + + if (__thread_has_fpu(me)) { + if (WARN_ON(restore_fpu_checking(me))) + drop_init_fpu(me); + } else if (!use_eager_fpu()) { stts(); } + + this_cpu_write(in_kernel_fpu, false); } EXPORT_SYMBOL(__kernel_fpu_end); diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 63ce838e5a54..28d28f5eb8f4 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -69,16 +69,9 @@ static void call_on_stack(void *func, void *stack) : "memory", "cc", "edx", "ecx", "eax"); } -/* how to get the current stack pointer from C */ -#define current_stack_pointer ({ \ - unsigned long sp; \ - asm("mov %%esp,%0" : "=g" (sp)); \ - sp; \ -}) - static inline void *current_stack(void) { - return (void *)(current_stack_pointer & ~(THREAD_SIZE - 1)); + return (void *)(current_stack_pointer() & ~(THREAD_SIZE - 1)); } static inline int @@ -103,7 +96,7 @@ execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) /* Save the next esp at the bottom of the stack */ prev_esp = (u32 *)irqstk; - *prev_esp = current_stack_pointer; + *prev_esp = current_stack_pointer(); if (unlikely(overflow)) call_on_stack(print_stack_overflow, isp); @@ -156,7 +149,7 @@ void do_softirq_own_stack(void) /* Push the previous esp onto the stack */ prev_esp = (u32 *)irqstk; - *prev_esp = current_stack_pointer; + *prev_esp = current_stack_pointer(); call_on_stack(__do_softirq, isp); } diff --git a/arch/x86/kernel/pmc_atom.c b/arch/x86/kernel/pmc_atom.c index 0ee5025e0fa4..d66a4fe6caee 100644 --- a/arch/x86/kernel/pmc_atom.c +++ b/arch/x86/kernel/pmc_atom.c @@ -25,8 +25,6 @@ #include <asm/pmc_atom.h> -#define DRIVER_NAME KBUILD_MODNAME - struct pmc_dev { u32 base_addr; void __iomem *regmap; @@ -38,12 +36,12 @@ struct pmc_dev { static struct pmc_dev pmc_device; static u32 acpi_base_addr; -struct pmc_dev_map { +struct pmc_bit_map { const char *name; u32 bit_mask; }; -static const struct pmc_dev_map dev_map[] = { +static const struct pmc_bit_map dev_map[] = { {"0 - LPSS1_F0_DMA", BIT_LPSS1_F0_DMA}, {"1 - LPSS1_F1_PWM1", BIT_LPSS1_F1_PWM1}, {"2 - LPSS1_F2_PWM2", BIT_LPSS1_F2_PWM2}, @@ -82,6 +80,27 @@ static const struct pmc_dev_map dev_map[] = { {"35 - DFX", BIT_DFX}, }; +static const struct pmc_bit_map pss_map[] = { + {"0 - GBE", PMC_PSS_BIT_GBE}, + {"1 - SATA", PMC_PSS_BIT_SATA}, + {"2 - HDA", PMC_PSS_BIT_HDA}, + {"3 - SEC", PMC_PSS_BIT_SEC}, + {"4 - PCIE", PMC_PSS_BIT_PCIE}, + {"5 - LPSS", PMC_PSS_BIT_LPSS}, + {"6 - LPE", PMC_PSS_BIT_LPE}, + {"7 - DFX", PMC_PSS_BIT_DFX}, + {"8 - USH_CTRL", PMC_PSS_BIT_USH_CTRL}, + {"9 - USH_SUS", PMC_PSS_BIT_USH_SUS}, + {"10 - USH_VCCS", PMC_PSS_BIT_USH_VCCS}, + {"11 - USH_VCCA", PMC_PSS_BIT_USH_VCCA}, + {"12 - OTG_CTRL", PMC_PSS_BIT_OTG_CTRL}, + {"13 - OTG_VCCS", PMC_PSS_BIT_OTG_VCCS}, + {"14 - OTG_VCCA_CLK", PMC_PSS_BIT_OTG_VCCA_CLK}, + {"15 - OTG_VCCA", PMC_PSS_BIT_OTG_VCCA}, + {"16 - USB", PMC_PSS_BIT_USB}, + {"17 - USB_SUS", PMC_PSS_BIT_USB_SUS}, +}; + static inline u32 pmc_reg_read(struct pmc_dev *pmc, int reg_offset) { return readl(pmc->regmap + reg_offset); @@ -169,6 +188,32 @@ static const struct file_operations pmc_dev_state_ops = { .release = single_release, }; +static int pmc_pss_state_show(struct seq_file *s, void *unused) +{ + struct pmc_dev *pmc = s->private; + u32 pss = pmc_reg_read(pmc, PMC_PSS); + int pss_index; + + for (pss_index = 0; pss_index < ARRAY_SIZE(pss_map); pss_index++) { + seq_printf(s, "Island: %-32s\tState: %s\n", + pss_map[pss_index].name, + pss_map[pss_index].bit_mask & pss ? "Off" : "On"); + } + return 0; +} + +static int pmc_pss_state_open(struct inode *inode, struct file *file) +{ + return single_open(file, pmc_pss_state_show, inode->i_private); +} + +static const struct file_operations pmc_pss_state_ops = { + .open = pmc_pss_state_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + static int pmc_sleep_tmr_show(struct seq_file *s, void *unused) { struct pmc_dev *pmc = s->private; @@ -202,11 +247,7 @@ static const struct file_operations pmc_sleep_tmr_ops = { static void pmc_dbgfs_unregister(struct pmc_dev *pmc) { - if (!pmc->dbgfs_dir) - return; - debugfs_remove_recursive(pmc->dbgfs_dir); - pmc->dbgfs_dir = NULL; } static int pmc_dbgfs_register(struct pmc_dev *pmc, struct pci_dev *pdev) @@ -217,19 +258,29 @@ static int pmc_dbgfs_register(struct pmc_dev *pmc, struct pci_dev *pdev) if (!dir) return -ENOMEM; + pmc->dbgfs_dir = dir; + f = debugfs_create_file("dev_state", S_IFREG | S_IRUGO, dir, pmc, &pmc_dev_state_ops); if (!f) { - dev_err(&pdev->dev, "dev_states register failed\n"); + dev_err(&pdev->dev, "dev_state register failed\n"); goto err; } + + f = debugfs_create_file("pss_state", S_IFREG | S_IRUGO, + dir, pmc, &pmc_pss_state_ops); + if (!f) { + dev_err(&pdev->dev, "pss_state register failed\n"); + goto err; + } + f = debugfs_create_file("sleep_state", S_IFREG | S_IRUGO, dir, pmc, &pmc_sleep_tmr_ops); if (!f) { dev_err(&pdev->dev, "sleep_state register failed\n"); goto err; } - pmc->dbgfs_dir = dir; + return 0; err: pmc_dbgfs_unregister(pmc); @@ -292,7 +343,6 @@ MODULE_DEVICE_TABLE(pci, pmc_pci_ids); static int __init pmc_atom_init(void) { - int err = -ENODEV; struct pci_dev *pdev = NULL; const struct pci_device_id *ent; @@ -306,14 +356,11 @@ static int __init pmc_atom_init(void) */ for_each_pci_dev(pdev) { ent = pci_match_id(pmc_pci_ids, pdev); - if (ent) { - err = pmc_setup_dev(pdev); - goto out; - } + if (ent) + return pmc_setup_dev(pdev); } /* Device not found. */ -out: - return err; + return -ENODEV; } module_init(pmc_atom_init); diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c index ca9622a25e95..fe3dbfe0c4a5 100644 --- a/arch/x86/kernel/rtc.c +++ b/arch/x86/kernel/rtc.c @@ -170,7 +170,7 @@ static struct platform_device rtc_device = { static __init int add_rtc_cmos(void) { #ifdef CONFIG_PNP - static const char * const const ids[] __initconst = + static const char * const ids[] __initconst = { "PNP0b00", "PNP0b01", "PNP0b02", }; struct pnp_dev *dev; struct pnp_id *id; diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index ab4734e5411d..c4648adadd7d 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -431,15 +431,13 @@ static void __init parse_setup_data(void) pa_data = boot_params.hdr.setup_data; while (pa_data) { - u32 data_len, map_len, data_type; + u32 data_len, data_type; - map_len = max(PAGE_SIZE - (pa_data & ~PAGE_MASK), - (u64)sizeof(struct setup_data)); - data = early_memremap(pa_data, map_len); + data = early_memremap(pa_data, sizeof(*data)); data_len = data->len + sizeof(struct setup_data); data_type = data->type; pa_next = data->next; - early_iounmap(data, map_len); + early_iounmap(data, sizeof(*data)); switch (data_type) { case SETUP_E820_EXT: diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index ed37a768d0fc..2a33c8f68319 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -740,12 +740,6 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) { user_exit(); -#ifdef CONFIG_X86_MCE - /* notify userspace of pending MCEs */ - if (thread_info_flags & _TIF_MCE_NOTIFY) - mce_notify_process(); -#endif /* CONFIG_X86_64 && CONFIG_X86_MCE */ - if (thread_info_flags & _TIF_UPROBE) uprobe_notify_resume(regs); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 6d7022c683e3..febc6aabc72e 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -73,7 +73,6 @@ #include <asm/setup.h> #include <asm/uv/uv.h> #include <linux/mc146818rtc.h> -#include <asm/smpboot_hooks.h> #include <asm/i8259.h> #include <asm/realmode.h> #include <asm/misc.h> @@ -104,6 +103,43 @@ EXPORT_PER_CPU_SYMBOL(cpu_info); atomic_t init_deasserted; +static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip) +{ + unsigned long flags; + + spin_lock_irqsave(&rtc_lock, flags); + CMOS_WRITE(0xa, 0xf); + spin_unlock_irqrestore(&rtc_lock, flags); + local_flush_tlb(); + pr_debug("1.\n"); + *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) = + start_eip >> 4; + pr_debug("2.\n"); + *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = + start_eip & 0xf; + pr_debug("3.\n"); +} + +static inline void smpboot_restore_warm_reset_vector(void) +{ + unsigned long flags; + + /* + * Install writable page 0 entry to set BIOS data area. + */ + local_flush_tlb(); + + /* + * Paranoid: Set warm reset code and vector here back + * to default values. + */ + spin_lock_irqsave(&rtc_lock, flags); + CMOS_WRITE(0, 0xf); + spin_unlock_irqrestore(&rtc_lock, flags); + + *((volatile u32 *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = 0; +} + /* * Report back to the Boot Processor during boot time or to the caller processor * during CPU online. @@ -136,8 +172,7 @@ static void smp_callin(void) * CPU, first the APIC. (this is probably redundant on most * boards) */ - setup_local_APIC(); - end_local_APIC_setup(); + apic_ap_setup(); /* * Need to setup vector mappings before we enable interrupts. @@ -955,9 +990,12 @@ void arch_disable_smp_support(void) */ static __init void disable_smp(void) { + pr_info("SMP disabled\n"); + + disable_ioapic_support(); + init_cpu_present(cpumask_of(0)); init_cpu_possible(cpumask_of(0)); - smpboot_clear_io_apic_irqs(); if (smp_found_config) physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); @@ -967,6 +1005,13 @@ static __init void disable_smp(void) cpumask_set_cpu(0, cpu_core_mask(0)); } +enum { + SMP_OK, + SMP_NO_CONFIG, + SMP_NO_APIC, + SMP_FORCE_UP, +}; + /* * Various sanity checks. */ @@ -1014,10 +1059,7 @@ static int __init smp_sanity_check(unsigned max_cpus) if (!smp_found_config && !acpi_lapic) { preempt_enable(); pr_notice("SMP motherboard not detected\n"); - disable_smp(); - if (APIC_init_uniprocessor()) - pr_notice("Local APIC not detected. Using dummy APIC emulation.\n"); - return -1; + return SMP_NO_CONFIG; } /* @@ -1041,9 +1083,7 @@ static int __init smp_sanity_check(unsigned max_cpus) boot_cpu_physical_apicid); pr_err("... forcing use of dummy APIC emulation (tell your hw vendor)\n"); } - smpboot_clear_io_apic(); - disable_ioapic_support(); - return -1; + return SMP_NO_APIC; } verify_local_APIC(); @@ -1053,15 +1093,10 @@ static int __init smp_sanity_check(unsigned max_cpus) */ if (!max_cpus) { pr_info("SMP mode deactivated\n"); - smpboot_clear_io_apic(); - - connect_bsp_APIC(); - setup_local_APIC(); - bsp_end_local_APIC_setup(); - return -1; + return SMP_FORCE_UP; } - return 0; + return SMP_OK; } static void __init smp_cpu_index_default(void) @@ -1101,10 +1136,21 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) } set_cpu_sibling_map(0); - if (smp_sanity_check(max_cpus) < 0) { - pr_info("SMP disabled\n"); + switch (smp_sanity_check(max_cpus)) { + case SMP_NO_CONFIG: disable_smp(); + if (APIC_init_uniprocessor()) + pr_notice("Local APIC not detected. Using dummy APIC emulation.\n"); return; + case SMP_NO_APIC: + disable_smp(); + return; + case SMP_FORCE_UP: + disable_smp(); + apic_bsp_setup(false); + return; + case SMP_OK: + break; } default_setup_apic_routing(); @@ -1115,33 +1161,10 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) /* Or can we switch back to PIC here? */ } - connect_bsp_APIC(); - - /* - * Switch from PIC to APIC mode. - */ - setup_local_APIC(); - - if (x2apic_mode) - cpu0_logical_apicid = apic_read(APIC_LDR); - else - cpu0_logical_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR)); - - /* - * Enable IO APIC before setting up error vector - */ - if (!skip_ioapic_setup && nr_ioapics) - enable_IO_APIC(); - - bsp_end_local_APIC_setup(); - smpboot_setup_io_apic(); - /* - * Set up local APIC timer on boot CPU. - */ + cpu0_logical_apicid = apic_bsp_setup(false); pr_info("CPU%d: ", 0); print_cpu_info(&cpu_data(0)); - x86_init.timers.setup_percpu_clockev(); if (is_uv_system()) uv_system_init(); @@ -1177,9 +1200,7 @@ void __init native_smp_cpus_done(unsigned int max_cpus) nmi_selftest(); impress_friends(); -#ifdef CONFIG_X86_IO_APIC setup_ioapic_dest(); -#endif mtrr_aps_init(); } diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 88900e288021..9d2073e2ecc9 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -108,6 +108,88 @@ static inline void preempt_conditional_cli(struct pt_regs *regs) preempt_count_dec(); } +enum ctx_state ist_enter(struct pt_regs *regs) +{ + enum ctx_state prev_state; + + if (user_mode_vm(regs)) { + /* Other than that, we're just an exception. */ + prev_state = exception_enter(); + } else { + /* + * We might have interrupted pretty much anything. In + * fact, if we're a machine check, we can even interrupt + * NMI processing. We don't want in_nmi() to return true, + * but we need to notify RCU. + */ + rcu_nmi_enter(); + prev_state = IN_KERNEL; /* the value is irrelevant. */ + } + + /* + * We are atomic because we're on the IST stack (or we're on x86_32, + * in which case we still shouldn't schedule). + * + * This must be after exception_enter(), because exception_enter() + * won't do anything if in_interrupt() returns true. + */ + preempt_count_add(HARDIRQ_OFFSET); + + /* This code is a bit fragile. Test it. */ + rcu_lockdep_assert(rcu_is_watching(), "ist_enter didn't work"); + + return prev_state; +} + +void ist_exit(struct pt_regs *regs, enum ctx_state prev_state) +{ + /* Must be before exception_exit. */ + preempt_count_sub(HARDIRQ_OFFSET); + + if (user_mode_vm(regs)) + return exception_exit(prev_state); + else + rcu_nmi_exit(); +} + +/** + * ist_begin_non_atomic() - begin a non-atomic section in an IST exception + * @regs: regs passed to the IST exception handler + * + * IST exception handlers normally cannot schedule. As a special + * exception, if the exception interrupted userspace code (i.e. + * user_mode_vm(regs) would return true) and the exception was not + * a double fault, it can be safe to schedule. ist_begin_non_atomic() + * begins a non-atomic section within an ist_enter()/ist_exit() region. + * Callers are responsible for enabling interrupts themselves inside + * the non-atomic section, and callers must call is_end_non_atomic() + * before ist_exit(). + */ +void ist_begin_non_atomic(struct pt_regs *regs) +{ + BUG_ON(!user_mode_vm(regs)); + + /* + * Sanity check: we need to be on the normal thread stack. This + * will catch asm bugs and any attempt to use ist_preempt_enable + * from double_fault. + */ + BUG_ON(((current_stack_pointer() ^ this_cpu_read_stable(kernel_stack)) + & ~(THREAD_SIZE - 1)) != 0); + + preempt_count_sub(HARDIRQ_OFFSET); +} + +/** + * ist_end_non_atomic() - begin a non-atomic section in an IST exception + * + * Ends a non-atomic section started with ist_begin_non_atomic(). + */ +void ist_end_non_atomic(void) +{ + preempt_count_add(HARDIRQ_OFFSET); +} + static nokprobe_inline int do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str, struct pt_regs *regs, long error_code) @@ -251,6 +333,8 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) * end up promoting it to a doublefault. In that case, modify * the stack to make it look like we just entered the #GP * handler from user space, similar to bad_iret. + * + * No need for ist_enter here because we don't use RCU. */ if (((long)regs->sp >> PGDIR_SHIFT) == ESPFIX_PGD_ENTRY && regs->cs == __KERNEL_CS && @@ -263,12 +347,12 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) normal_regs->orig_ax = 0; /* Missing (lost) #GP error code */ regs->ip = (unsigned long)general_protection; regs->sp = (unsigned long)&normal_regs->orig_ax; + return; } #endif - exception_enter(); - /* Return not checked because double check cannot be ignored */ + ist_enter(regs); /* Discard prev_state because we won't return. */ notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV); tsk->thread.error_code = error_code; @@ -434,7 +518,7 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code) if (poke_int3_handler(regs)) return; - prev_state = exception_enter(); + prev_state = ist_enter(regs); #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, SIGTRAP) == NOTIFY_STOP) @@ -460,33 +544,20 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code) preempt_conditional_cli(regs); debug_stack_usage_dec(); exit: - exception_exit(prev_state); + ist_exit(regs, prev_state); } NOKPROBE_SYMBOL(do_int3); #ifdef CONFIG_X86_64 /* - * Help handler running on IST stack to switch back to user stack - * for scheduling or signal handling. The actual stack switch is done in - * entry.S + * Help handler running on IST stack to switch off the IST stack if the + * interrupted code was in user mode. The actual stack switch is done in + * entry_64.S */ asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs) { - struct pt_regs *regs = eregs; - /* Did already sync */ - if (eregs == (struct pt_regs *)eregs->sp) - ; - /* Exception from user space */ - else if (user_mode(eregs)) - regs = task_pt_regs(current); - /* - * Exception from kernel and interrupts are enabled. Move to - * kernel process stack. - */ - else if (eregs->flags & X86_EFLAGS_IF) - regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs)); - if (eregs != regs) - *regs = *eregs; + struct pt_regs *regs = task_pt_regs(current); + *regs = *eregs; return regs; } NOKPROBE_SYMBOL(sync_regs); @@ -554,7 +625,7 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code) unsigned long dr6; int si_code; - prev_state = exception_enter(); + prev_state = ist_enter(regs); get_debugreg(dr6, 6); @@ -629,7 +700,7 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code) debug_stack_usage_dec(); exit: - exception_exit(prev_state); + ist_exit(regs, prev_state); } NOKPROBE_SYMBOL(do_debug); @@ -788,18 +859,16 @@ void math_state_restore(void) local_irq_disable(); } + /* Avoid __kernel_fpu_begin() right after __thread_fpu_begin() */ + kernel_fpu_disable(); __thread_fpu_begin(tsk); - - /* - * Paranoid restore. send a SIGSEGV if we fail to restore the state. - */ if (unlikely(restore_fpu_checking(tsk))) { drop_init_fpu(tsk); force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk); - return; + } else { + tsk->thread.fpu_counter++; } - - tsk->thread.fpu_counter++; + kernel_fpu_enable(); } EXPORT_SYMBOL_GPL(math_state_restore); diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index f9d16ff56c6b..7dc7ba577ecd 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -40,6 +40,7 @@ config KVM select HAVE_KVM_MSI select HAVE_KVM_CPU_RELAX_INTERCEPT select KVM_VFIO + select SRCU ---help--- Support hosting fully virtualized guest machines using hardware virtualization extensions. You will need a fairly recent diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 4f0c0b954686..d52dcf0776ea 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -192,6 +192,9 @@ static void recalculate_apic_map(struct kvm *kvm) u16 cid, lid; u32 ldr, aid; + if (!kvm_apic_present(vcpu)) + continue; + aid = kvm_apic_id(apic); ldr = kvm_apic_get_reg(apic, APIC_LDR); cid = apic_cluster_id(new, ldr); diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 38dcec403b46..e3ff27a5b634 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -898,6 +898,8 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code, if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON| VM_FAULT_HWPOISON_LARGE)) do_sigbus(regs, error_code, address, fault); + else if (fault & VM_FAULT_SIGSEGV) + bad_area_nosemaphore(regs, error_code, address); else BUG(); } diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 7b20bccf3648..2fb384724ebb 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -448,6 +448,22 @@ static const struct dmi_system_id pciprobe_dmi_table[] __initconst = { DMI_MATCH(DMI_PRODUCT_NAME, "ftServer"), }, }, + { + .callback = set_scan_all, + .ident = "Stratus/NEC ftServer", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "NEC"), + DMI_MATCH(DMI_PRODUCT_NAME, "Express5800/R32"), + }, + }, + { + .callback = set_scan_all, + .ident = "Stratus/NEC ftServer", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "NEC"), + DMI_MATCH(DMI_PRODUCT_NAME, "Express5800/R31"), + }, + }, {} }; diff --git a/arch/x86/pci/intel_mid_pci.c b/arch/x86/pci/intel_mid_pci.c index 44b9271580b5..852aa4c92da0 100644 --- a/arch/x86/pci/intel_mid_pci.c +++ b/arch/x86/pci/intel_mid_pci.c @@ -293,7 +293,6 @@ static void mrst_power_off_unused_dev(struct pci_dev *dev) DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0801, mrst_power_off_unused_dev); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0809, mrst_power_off_unused_dev); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x080C, mrst_power_off_unused_dev); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0812, mrst_power_off_unused_dev); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0815, mrst_power_off_unused_dev); /* diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 326198a4434e..676e5e04e4d4 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -610,6 +610,32 @@ static int __init pci_parse_mcfg(struct acpi_table_header *header) return 0; } +#ifdef CONFIG_ACPI_APEI +extern int (*arch_apei_filter_addr)(int (*func)(__u64 start, __u64 size, + void *data), void *data); + +static int pci_mmcfg_for_each_region(int (*func)(__u64 start, __u64 size, + void *data), void *data) +{ + struct pci_mmcfg_region *cfg; + int rc; + + if (list_empty(&pci_mmcfg_list)) + return 0; + + list_for_each_entry(cfg, &pci_mmcfg_list, list) { + rc = func(cfg->res.start, resource_size(&cfg->res), data); + if (rc) + return rc; + } + + return 0; +} +#define set_apei_filter() (arch_apei_filter_addr = pci_mmcfg_for_each_region) +#else +#define set_apei_filter() +#endif + static void __init __pci_mmcfg_init(int early) { pci_mmcfg_reject_broken(early); @@ -644,6 +670,8 @@ void __init pci_mmcfg_early_init(void) else acpi_sfi_table_parse(ACPI_SIG_MCFG, pci_parse_mcfg); __pci_mmcfg_init(1); + + set_apei_filter(); } } diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile index 5a4affe025e8..09297c8e1fcd 100644 --- a/arch/x86/vdso/Makefile +++ b/arch/x86/vdso/Makefile @@ -205,4 +205,4 @@ $(vdso_img_insttargets): install_%: $(obj)/%.dbg $(MODLIB)/vdso FORCE PHONY += vdso_install $(vdso_img_insttargets) vdso_install: $(vdso_img_insttargets) FORCE -clean-files := vdso32-syscall* vdso32-sysenter* vdso32-int80* +clean-files := vdso32-syscall* vdso32-sysenter* vdso32-int80* vdso64* diff --git a/arch/xtensa/mm/fault.c b/arch/xtensa/mm/fault.c index b57c4f91f487..9e3571a6535c 100644 --- a/arch/xtensa/mm/fault.c +++ b/arch/xtensa/mm/fault.c @@ -117,6 +117,8 @@ good_area: if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; + else if (fault & VM_FAULT_SIGSEGV) + goto bad_area; else if (fault & VM_FAULT_SIGBUS) goto do_sigbus; BUG(); |