summaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/alpha/include/uapi/asm/Kbuild2
-rw-r--r--arch/arc/include/uapi/asm/Kbuild1
-rw-r--r--arch/arm/boot/dts/am33xx.dtsi2
-rw-r--r--arch/arm/boot/dts/am4372.dtsi6
-rw-r--r--arch/arm/boot/dts/am437x-cm-t43.dts4
-rw-r--r--arch/arm/boot/dts/armada-385-db-ap.dts1
-rw-r--r--arch/arm/boot/dts/armada-385-linksys.dtsi1
-rw-r--r--arch/arm/boot/dts/armada-385-synology-ds116.dts2
-rw-r--r--arch/arm/boot/dts/armada-388-gp.dts2
-rw-r--r--arch/arm/boot/dts/bcm-nsp.dtsi4
-rw-r--r--arch/arm/boot/dts/bcm283x.dtsi1
-rw-r--r--arch/arm/boot/dts/bcm958623hr.dts4
-rw-r--r--arch/arm/boot/dts/bcm958625hr.dts4
-rw-r--r--arch/arm/boot/dts/dm814x.dtsi2
-rw-r--r--arch/arm/boot/dts/imx53.dtsi9
-rw-r--r--arch/arm/boot/dts/logicpd-som-lv-37xx-devkit.dts3
-rw-r--r--arch/arm/boot/dts/logicpd-som-lv.dtsi17
-rw-r--r--arch/arm/boot/dts/meson.dtsi18
-rw-r--r--arch/arm/boot/dts/nspire.dtsi1
-rw-r--r--arch/arm/boot/dts/omap3-beagle-xm.dts1
-rw-r--r--arch/arm/boot/dts/omap3-beagle.dts1
-rw-r--r--arch/arm/boot/dts/omap3-cm-t3x.dtsi2
-rw-r--r--arch/arm/boot/dts/omap3-evm-common.dtsi1
-rw-r--r--arch/arm/boot/dts/omap3-gta04.dtsi1
-rw-r--r--arch/arm/boot/dts/omap3-igep0020-common.dtsi1
-rw-r--r--arch/arm/boot/dts/omap3-igep0030-common.dtsi1
-rw-r--r--arch/arm/boot/dts/omap3-lilly-a83x.dtsi1
-rw-r--r--arch/arm/boot/dts/omap3-overo-base.dtsi1
-rw-r--r--arch/arm/boot/dts/omap3-pandora-common.dtsi1
-rw-r--r--arch/arm/boot/dts/omap3-tao3530.dtsi1
-rw-r--r--arch/arm/boot/dts/omap3.dtsi1
-rw-r--r--arch/arm/boot/dts/omap4-droid4-xt894.dts1
-rw-r--r--arch/arm/boot/dts/omap4-duovero.dtsi1
-rw-r--r--arch/arm/boot/dts/omap4-panda-common.dtsi1
-rw-r--r--arch/arm/boot/dts/omap4-var-som-om44.dtsi1
-rw-r--r--arch/arm/boot/dts/omap4.dtsi5
-rw-r--r--arch/arm/boot/dts/omap5-board-common.dtsi2
-rw-r--r--arch/arm/boot/dts/omap5-cm-t54.dts2
-rw-r--r--arch/arm/boot/dts/omap5.dtsi1
-rw-r--r--arch/arm/boot/dts/r8a7790.dtsi1
-rw-r--r--arch/arm/boot/dts/r8a7792.dtsi1
-rw-r--r--arch/arm/boot/dts/r8a7793.dtsi1
-rw-r--r--arch/arm/boot/dts/r8a7794.dtsi1
-rw-r--r--arch/arm/boot/dts/vf610-zii-dev-rev-c.dts6
-rw-r--r--arch/arm/common/sa1111.c328
-rw-r--r--arch/arm/include/asm/hardware/sa1111.h32
-rw-r--r--arch/arm/include/asm/kvm_arm.h3
-rw-r--r--arch/arm/include/asm/kvm_host.h5
-rw-r--r--arch/arm/include/asm/pgtable-3level.h1
-rw-r--r--arch/arm/include/uapi/asm/Kbuild1
-rw-r--r--arch/arm/kernel/entry-header.S4
-rw-r--r--arch/arm/lib/csumpartialcopyuser.S4
-rw-r--r--arch/arm/mach-meson/platsmp.c2
-rw-r--r--arch/arm/mach-omap2/cm_common.c6
-rw-r--r--arch/arm/mach-omap2/omap-secure.c21
-rw-r--r--arch/arm/mach-omap2/omap-secure.h4
-rw-r--r--arch/arm/mach-omap2/omap_device.c10
-rw-r--r--arch/arm/mach-omap2/omap_hwmod_3xxx_data.c1
-rw-r--r--arch/arm/mach-omap2/pm.h4
-rw-r--r--arch/arm/mach-omap2/pm34xx.c13
-rw-r--r--arch/arm/mach-omap2/prcm-common.h1
-rw-r--r--arch/arm/mach-omap2/prm33xx.c12
-rw-r--r--arch/arm/mach-omap2/sleep34xx.S26
-rw-r--r--arch/arm/mach-sa1100/Kconfig1
-rw-r--r--arch/arm/mach-sa1100/assabet.c71
-rw-r--r--arch/arm/mach-sa1100/neponset.c159
-rw-r--r--arch/arm/net/bpf_jit_32.c225
-rw-r--r--arch/arm64/Kconfig12
-rw-r--r--arch/arm64/Makefile3
-rw-r--r--arch/arm64/boot/dts/Makefile2
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi4
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-gxl.dtsi6
-rw-r--r--arch/arm64/boot/dts/socionext/uniphier-ld11-ref.dts1
-rw-r--r--arch/arm64/boot/dts/socionext/uniphier-ld20-ref.dts1
-rw-r--r--arch/arm64/boot/dts/socionext/uniphier-pxs3-ref.dts3
-rw-r--r--arch/arm64/include/asm/assembler.h10
-rw-r--r--arch/arm64/include/asm/cacheflush.h2
-rw-r--r--arch/arm64/include/asm/cpufeature.h3
-rw-r--r--arch/arm64/include/asm/cputype.h2
-rw-r--r--arch/arm64/include/asm/efi.h4
-rw-r--r--arch/arm64/include/asm/kvm_arm.h3
-rw-r--r--arch/arm64/include/asm/kvm_host.h1
-rw-r--r--arch/arm64/include/asm/mmu_context.h46
-rw-r--r--arch/arm64/include/asm/module.h46
-rw-r--r--arch/arm64/include/asm/perf_event.h2
-rw-r--r--arch/arm64/include/asm/pgtable.h42
-rw-r--r--arch/arm64/include/uapi/asm/bpf_perf_event.h9
-rw-r--r--arch/arm64/kernel/Makefile3
-rw-r--r--arch/arm64/kernel/cpu-reset.S1
-rw-r--r--arch/arm64/kernel/cpu_ops.c6
-rw-r--r--arch/arm64/kernel/cpufeature.c3
-rw-r--r--arch/arm64/kernel/efi-entry.S2
-rw-r--r--arch/arm64/kernel/fpsimd.c57
-rw-r--r--arch/arm64/kernel/ftrace-mod.S18
-rw-r--r--arch/arm64/kernel/ftrace.c14
-rw-r--r--arch/arm64/kernel/head.S1
-rw-r--r--arch/arm64/kernel/hw_breakpoint.c2
-rw-r--r--arch/arm64/kernel/module-plts.c50
-rw-r--r--arch/arm64/kernel/module.lds1
-rw-r--r--arch/arm64/kernel/perf_event.c6
-rw-r--r--arch/arm64/kernel/process.c9
-rw-r--r--arch/arm64/kernel/relocate_kernel.S1
-rw-r--r--arch/arm64/kvm/debug.c21
-rw-r--r--arch/arm64/kvm/handle_exit.c57
-rw-r--r--arch/arm64/kvm/hyp-init.S1
-rw-r--r--arch/arm64/kvm/hyp/debug-sr.c3
-rw-r--r--arch/arm64/kvm/hyp/switch.c37
-rw-r--r--arch/arm64/mm/context.c28
-rw-r--r--arch/arm64/mm/dump.c2
-rw-r--r--arch/arm64/mm/fault.c5
-rw-r--r--arch/arm64/mm/init.c3
-rw-r--r--arch/arm64/mm/pgd.c2
-rw-r--r--arch/blackfin/include/uapi/asm/Kbuild1
-rw-r--r--arch/c6x/include/uapi/asm/Kbuild1
-rw-r--r--arch/cris/include/uapi/asm/Kbuild1
-rw-r--r--arch/frv/include/uapi/asm/Kbuild2
-rw-r--r--arch/h8300/include/uapi/asm/Kbuild1
-rw-r--r--arch/hexagon/include/uapi/asm/Kbuild1
-rw-r--r--arch/ia64/include/uapi/asm/Kbuild1
-rw-r--r--arch/m32r/include/uapi/asm/Kbuild1
-rw-r--r--arch/m68k/configs/stmark2_defconfig1
-rw-r--r--arch/m68k/include/uapi/asm/Kbuild1
-rw-r--r--arch/m68k/kernel/vmlinux-nommu.lds2
-rw-r--r--arch/m68k/kernel/vmlinux-std.lds2
-rw-r--r--arch/m68k/kernel/vmlinux-sun3.lds2
-rw-r--r--arch/metag/include/uapi/asm/Kbuild1
-rw-r--r--arch/microblaze/include/asm/mmu_context_mm.h1
-rw-r--r--arch/microblaze/include/uapi/asm/Kbuild1
-rw-r--r--arch/mips/include/asm/Kbuild1
-rw-r--r--arch/mips/include/asm/pgtable.h2
-rw-r--r--arch/mips/include/asm/serial.h22
-rw-r--r--arch/mips/include/uapi/asm/Kbuild1
-rw-r--r--arch/mips/kvm/mips.c7
-rw-r--r--arch/mn10300/include/uapi/asm/Kbuild1
-rw-r--r--arch/nios2/include/uapi/asm/Kbuild1
-rw-r--r--arch/openrisc/include/uapi/asm/Kbuild1
-rw-r--r--arch/parisc/boot/compressed/misc.c4
-rw-r--r--arch/parisc/include/asm/thread_info.h5
-rw-r--r--arch/parisc/include/uapi/asm/Kbuild1
-rw-r--r--arch/parisc/kernel/entry.S12
-rw-r--r--arch/parisc/kernel/hpmc.S1
-rw-r--r--arch/parisc/kernel/unwind.c1
-rw-r--r--arch/parisc/lib/delay.c2
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgtable.h1
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h1
-rw-r--r--arch/powerpc/include/asm/machdep.h1
-rw-r--r--arch/powerpc/include/asm/mmu_context.h5
-rw-r--r--arch/powerpc/include/asm/setup.h1
-rw-r--r--arch/powerpc/include/uapi/asm/Kbuild1
-rw-r--r--arch/powerpc/kernel/cpu_setup_power.S2
-rw-r--r--arch/powerpc/kernel/fadump.c22
-rw-r--r--arch/powerpc/kernel/misc_64.S2
-rw-r--r--arch/powerpc/kernel/process.c14
-rw-r--r--arch/powerpc/kernel/setup-common.c27
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c37
-rw-r--r--arch/powerpc/kvm/book3s_hv.c3
-rw-r--r--arch/powerpc/kvm/book3s_xive.c7
-rw-r--r--arch/powerpc/kvm/powerpc.c7
-rw-r--r--arch/powerpc/mm/hash_native_64.c15
-rw-r--r--arch/powerpc/net/bpf_jit_comp64.c6
-rw-r--r--arch/powerpc/perf/core-book3s.c12
-rw-r--r--arch/powerpc/perf/imc-pmu.c17
-rw-r--r--arch/powerpc/platforms/ps3/setup.c15
-rw-r--r--arch/powerpc/platforms/pseries/setup.c1
-rw-r--r--arch/powerpc/sysdev/fsl_msi.c4
-rw-r--r--arch/powerpc/xmon/xmon.c10
-rw-r--r--arch/riscv/include/asm/Kbuild1
-rw-r--r--arch/riscv/include/asm/asm.h12
-rw-r--r--arch/riscv/include/asm/atomic.h103
-rw-r--r--arch/riscv/include/asm/barrier.h36
-rw-r--r--arch/riscv/include/asm/bitops.h2
-rw-r--r--arch/riscv/include/asm/bug.h6
-rw-r--r--arch/riscv/include/asm/cacheflush.h30
-rw-r--r--arch/riscv/include/asm/io.h18
-rw-r--r--arch/riscv/include/asm/mmu.h4
-rw-r--r--arch/riscv/include/asm/mmu_context.h45
-rw-r--r--arch/riscv/include/asm/pgtable.h58
-rw-r--r--arch/riscv/include/asm/spinlock.h11
-rw-r--r--arch/riscv/include/asm/timex.h3
-rw-r--r--arch/riscv/include/asm/tlbflush.h7
-rw-r--r--arch/riscv/include/asm/vdso-syscalls.h28
-rw-r--r--arch/riscv/include/asm/vdso.h4
-rw-r--r--arch/riscv/include/uapi/asm/Kbuild1
-rw-r--r--arch/riscv/kernel/head.S3
-rw-r--r--arch/riscv/kernel/riscv_ksyms.c3
-rw-r--r--arch/riscv/kernel/setup.c16
-rw-r--r--arch/riscv/kernel/smp.c55
-rw-r--r--arch/riscv/kernel/sys_riscv.c33
-rw-r--r--arch/riscv/kernel/syscall_table.c2
-rw-r--r--arch/riscv/kernel/vdso/Makefile7
-rw-r--r--arch/riscv/kernel/vdso/clock_getres.S26
-rw-r--r--arch/riscv/kernel/vdso/clock_gettime.S26
-rw-r--r--arch/riscv/kernel/vdso/flush_icache.S31
-rw-r--r--arch/riscv/kernel/vdso/getcpu.S26
-rw-r--r--arch/riscv/kernel/vdso/gettimeofday.S26
-rw-r--r--arch/riscv/kernel/vdso/vdso.lds.S7
-rw-r--r--arch/riscv/lib/delay.c1
-rw-r--r--arch/riscv/mm/Makefile1
-rw-r--r--arch/riscv/mm/cacheflush.c23
-rw-r--r--arch/riscv/mm/ioremap.c2
-rw-r--r--arch/s390/Kbuild1
-rw-r--r--arch/s390/Makefile5
-rw-r--r--arch/s390/appldata/Makefile1
-rw-r--r--arch/s390/appldata/appldata_base.c1
-rw-r--r--arch/s390/appldata/appldata_mem.c1
-rw-r--r--arch/s390/appldata/appldata_net_sum.c1
-rw-r--r--arch/s390/appldata/appldata_os.c1
-rw-r--r--arch/s390/boot/compressed/vmlinux.scr1
-rw-r--r--arch/s390/boot/install.sh5
-rw-r--r--arch/s390/crypto/aes_s390.c7
-rw-r--r--arch/s390/crypto/arch_random.c6
-rw-r--r--arch/s390/crypto/crc32-vx.c1
-rw-r--r--arch/s390/crypto/des_s390.c7
-rw-r--r--arch/s390/crypto/ghash_s390.c1
-rw-r--r--arch/s390/crypto/paes_s390.c6
-rw-r--r--arch/s390/crypto/prng.c1
-rw-r--r--arch/s390/crypto/sha.h7
-rw-r--r--arch/s390/crypto/sha1_s390.c7
-rw-r--r--arch/s390/crypto/sha256_s390.c7
-rw-r--r--arch/s390/crypto/sha512_s390.c7
-rw-r--r--arch/s390/crypto/sha_common.c7
-rw-r--r--arch/s390/hypfs/Makefile1
-rw-r--r--arch/s390/hypfs/inode.c2
-rw-r--r--arch/s390/include/asm/Kbuild1
-rw-r--r--arch/s390/include/asm/alternative.h1
-rw-r--r--arch/s390/include/asm/ap.h5
-rw-r--r--arch/s390/include/asm/bugs.h1
-rw-r--r--arch/s390/include/asm/cpu_mf.h5
-rw-r--r--arch/s390/include/asm/elf.h15
-rw-r--r--arch/s390/include/asm/kprobes.h15
-rw-r--r--arch/s390/include/asm/kvm_host.h5
-rw-r--r--arch/s390/include/asm/kvm_para.h7
-rw-r--r--arch/s390/include/asm/livepatch.h8
-rw-r--r--arch/s390/include/asm/mmu_context.h2
-rw-r--r--arch/s390/include/asm/perf_event.h1
-rw-r--r--arch/s390/include/asm/pgtable.h2
-rw-r--r--arch/s390/include/asm/ptrace.h11
-rw-r--r--arch/s390/include/asm/segment.h1
-rw-r--r--arch/s390/include/asm/switch_to.h27
-rw-r--r--arch/s390/include/asm/syscall.h5
-rw-r--r--arch/s390/include/asm/sysinfo.h5
-rw-r--r--arch/s390/include/asm/topology.h1
-rw-r--r--arch/s390/include/asm/vga.h1
-rw-r--r--arch/s390/include/uapi/asm/Kbuild1
-rw-r--r--arch/s390/include/uapi/asm/bpf_perf_event.h9
-rw-r--r--arch/s390/include/uapi/asm/kvm.h4
-rw-r--r--arch/s390/include/uapi/asm/kvm_para.h4
-rw-r--r--arch/s390/include/uapi/asm/kvm_perf.h4
-rw-r--r--arch/s390/include/uapi/asm/perf_regs.h1
-rw-r--r--arch/s390/include/uapi/asm/ptrace.h125
-rw-r--r--arch/s390/include/uapi/asm/sthyi.h1
-rw-r--r--arch/s390/include/uapi/asm/virtio-ccw.h6
-rw-r--r--arch/s390/include/uapi/asm/vmcp.h1
-rw-r--r--arch/s390/include/uapi/asm/zcrypt.h14
-rw-r--r--arch/s390/kernel/alternative.c1
-rw-r--r--arch/s390/kernel/compat_linux.c1
-rw-r--r--arch/s390/kernel/debug.c2
-rw-r--r--arch/s390/kernel/dis.c10
-rw-r--r--arch/s390/kernel/dumpstack.c1
-rw-r--r--arch/s390/kernel/entry.S15
-rw-r--r--arch/s390/kernel/ipl.c1
-rw-r--r--arch/s390/kernel/kprobes.c15
-rw-r--r--arch/s390/kernel/lgr.c1
-rw-r--r--arch/s390/kernel/module.c15
-rw-r--r--arch/s390/kernel/nmi.c1
-rw-r--r--arch/s390/kernel/perf_cpum_cf.c5
-rw-r--r--arch/s390/kernel/perf_cpum_sf.c5
-rw-r--r--arch/s390/kernel/perf_event.c5
-rw-r--r--arch/s390/kernel/perf_regs.c1
-rw-r--r--arch/s390/kernel/ptrace.c8
-rw-r--r--arch/s390/kernel/setup.c1
-rw-r--r--arch/s390/kernel/smp.c1
-rw-r--r--arch/s390/kernel/stacktrace.c1
-rw-r--r--arch/s390/kernel/sthyi.c5
-rw-r--r--arch/s390/kernel/syscalls.S6
-rw-r--r--arch/s390/kernel/time.c1
-rw-r--r--arch/s390/kernel/topology.c1
-rw-r--r--arch/s390/kernel/vdso.c5
-rw-r--r--arch/s390/kernel/vdso32/clock_getres.S5
-rw-r--r--arch/s390/kernel/vdso32/clock_gettime.S5
-rw-r--r--arch/s390/kernel/vdso32/gettimeofday.S5
-rw-r--r--arch/s390/kernel/vdso64/clock_getres.S5
-rw-r--r--arch/s390/kernel/vdso64/clock_gettime.S5
-rw-r--r--arch/s390/kernel/vdso64/gettimeofday.S5
-rw-r--r--arch/s390/kernel/vdso64/note.S1
-rw-r--r--arch/s390/kernel/vtime.c1
-rw-r--r--arch/s390/kvm/Makefile5
-rw-r--r--arch/s390/kvm/diag.c5
-rw-r--r--arch/s390/kvm/gaccess.h5
-rw-r--r--arch/s390/kvm/guestdbg.c5
-rw-r--r--arch/s390/kvm/intercept.c5
-rw-r--r--arch/s390/kvm/interrupt.c5
-rw-r--r--arch/s390/kvm/irq.h5
-rw-r--r--arch/s390/kvm/kvm-s390.c18
-rw-r--r--arch/s390/kvm/kvm-s390.h5
-rw-r--r--arch/s390/kvm/priv.c16
-rw-r--r--arch/s390/kvm/sigp.c5
-rw-r--r--arch/s390/kvm/vsie.c5
-rw-r--r--arch/s390/mm/cmm.c1
-rw-r--r--arch/s390/mm/gmap.c1
-rw-r--r--arch/s390/mm/mmap.c16
-rw-r--r--arch/s390/mm/pgalloc.c2
-rw-r--r--arch/s390/mm/pgtable.c1
-rw-r--r--arch/s390/net/Makefile1
-rw-r--r--arch/s390/net/bpf_jit_comp.c11
-rw-r--r--arch/s390/numa/Makefile1
-rw-r--r--arch/s390/pci/Makefile1
-rw-r--r--arch/s390/pci/pci.c1
-rw-r--r--arch/s390/pci/pci_debug.c1
-rw-r--r--arch/s390/pci/pci_dma.c1
-rw-r--r--arch/s390/pci/pci_insn.c1
-rw-r--r--arch/s390/tools/gen_opcode_table.c1
-rw-r--r--arch/score/include/uapi/asm/Kbuild1
-rw-r--r--arch/sh/include/uapi/asm/Kbuild1
-rw-r--r--arch/sparc/include/asm/pgtable_64.h2
-rw-r--r--arch/sparc/include/uapi/asm/Kbuild1
-rw-r--r--arch/sparc/lib/Makefile2
-rw-r--r--arch/sparc/lib/hweight.S4
-rw-r--r--arch/sparc/mm/fault_32.c2
-rw-r--r--arch/sparc/mm/fault_64.c2
-rw-r--r--arch/sparc/net/bpf_jit_comp_64.c6
-rw-r--r--arch/tile/include/asm/pgtable.h1
-rw-r--r--arch/tile/include/uapi/asm/Kbuild1
-rw-r--r--arch/um/include/asm/Kbuild1
-rw-r--r--arch/um/include/asm/mmu_context.h3
-rw-r--r--arch/um/kernel/trap.c2
-rw-r--r--arch/unicore32/include/asm/mmu_context.h5
-rw-r--r--arch/unicore32/include/uapi/asm/Kbuild1
-rw-r--r--arch/x86/Kconfig3
-rw-r--r--arch/x86/Kconfig.debug1
-rw-r--r--arch/x86/boot/compressed/Makefile1
-rw-r--r--arch/x86/boot/compressed/head_64.S16
-rw-r--r--arch/x86/boot/compressed/misc.c16
-rw-r--r--arch/x86/boot/compressed/pagetable.c3
-rw-r--r--arch/x86/boot/compressed/pgtable_64.c28
-rw-r--r--arch/x86/boot/genimage.sh32
-rw-r--r--arch/x86/crypto/salsa20_glue.c7
-rw-r--r--arch/x86/entry/calling.h145
-rw-r--r--arch/x86/entry/entry_32.S14
-rw-r--r--arch/x86/entry/entry_64.S237
-rw-r--r--arch/x86/entry/entry_64_compat.S31
-rw-r--r--arch/x86/entry/vdso/vclock_gettime.c2
-rw-r--r--arch/x86/entry/vsyscall/vsyscall_64.c38
-rw-r--r--arch/x86/events/intel/core.c5
-rw-r--r--arch/x86/events/intel/ds.c130
-rw-r--r--arch/x86/events/perf_event.h23
-rw-r--r--arch/x86/include/asm/asm.h2
-rw-r--r--arch/x86/include/asm/cpu_entry_area.h81
-rw-r--r--arch/x86/include/asm/cpufeature.h2
-rw-r--r--arch/x86/include/asm/cpufeatures.h5
-rw-r--r--arch/x86/include/asm/desc.h14
-rw-r--r--arch/x86/include/asm/disabled-features.h8
-rw-r--r--arch/x86/include/asm/espfix.h7
-rw-r--r--arch/x86/include/asm/fixmap.h7
-rw-r--r--arch/x86/include/asm/hypervisor.h25
-rw-r--r--arch/x86/include/asm/intel_ds.h36
-rw-r--r--arch/x86/include/asm/invpcid.h53
-rw-r--r--arch/x86/include/asm/irqdomain.h2
-rw-r--r--arch/x86/include/asm/irqflags.h3
-rw-r--r--arch/x86/include/asm/kdebug.h1
-rw-r--r--arch/x86/include/asm/kmemcheck.h1
-rw-r--r--arch/x86/include/asm/kvm_emulate.h2
-rw-r--r--arch/x86/include/asm/kvm_host.h19
-rw-r--r--arch/x86/include/asm/mmu.h4
-rw-r--r--arch/x86/include/asm/mmu_context.h113
-rw-r--r--arch/x86/include/asm/paravirt.h9
-rw-r--r--arch/x86/include/asm/pgalloc.h11
-rw-r--r--arch/x86/include/asm/pgtable.h38
-rw-r--r--arch/x86/include/asm/pgtable_32_types.h15
-rw-r--r--arch/x86/include/asm/pgtable_64.h92
-rw-r--r--arch/x86/include/asm/pgtable_64_types.h51
-rw-r--r--arch/x86/include/asm/processor-flags.h5
-rw-r--r--arch/x86/include/asm/processor.h82
-rw-r--r--arch/x86/include/asm/pti.h14
-rw-r--r--arch/x86/include/asm/segment.h12
-rw-r--r--arch/x86/include/asm/stacktrace.h3
-rw-r--r--arch/x86/include/asm/suspend_32.h8
-rw-r--r--arch/x86/include/asm/suspend_64.h19
-rw-r--r--arch/x86/include/asm/switch_to.h13
-rw-r--r--arch/x86/include/asm/thread_info.h2
-rw-r--r--arch/x86/include/asm/tlbflush.h347
-rw-r--r--arch/x86/include/asm/trace/irq_vectors.h16
-rw-r--r--arch/x86/include/asm/traps.h1
-rw-r--r--arch/x86/include/asm/unwind.h7
-rw-r--r--arch/x86/include/asm/vsyscall.h1
-rw-r--r--arch/x86/include/uapi/asm/Kbuild1
-rw-r--r--arch/x86/include/uapi/asm/processor-flags.h7
-rw-r--r--arch/x86/kernel/apic/apic.c2
-rw-r--r--arch/x86/kernel/apic/apic_flat_64.c2
-rw-r--r--arch/x86/kernel/apic/apic_noop.c2
-rw-r--r--arch/x86/kernel/apic/io_apic.c2
-rw-r--r--arch/x86/kernel/apic/msi.c8
-rw-r--r--arch/x86/kernel/apic/probe_32.c2
-rw-r--r--arch/x86/kernel/apic/vector.c24
-rw-r--r--arch/x86/kernel/apic/x2apic_cluster.c2
-rw-r--r--arch/x86/kernel/asm-offsets.c10
-rw-r--r--arch/x86/kernel/asm-offsets_32.c9
-rw-r--r--arch/x86/kernel/asm-offsets_64.c4
-rw-r--r--arch/x86/kernel/cpu/amd.c7
-rw-r--r--arch/x86/kernel/cpu/common.c103
-rw-r--r--arch/x86/kernel/cpu/microcode/amd.c4
-rw-r--r--arch/x86/kernel/cpu/microcode/intel.c13
-rw-r--r--arch/x86/kernel/doublefault.c36
-rw-r--r--arch/x86/kernel/dumpstack.c81
-rw-r--r--arch/x86/kernel/dumpstack_32.c6
-rw-r--r--arch/x86/kernel/dumpstack_64.c12
-rw-r--r--arch/x86/kernel/head_64.S30
-rw-r--r--arch/x86/kernel/ioport.c2
-rw-r--r--arch/x86/kernel/irq.c12
-rw-r--r--arch/x86/kernel/irq_64.c4
-rw-r--r--arch/x86/kernel/ldt.c198
-rw-r--r--arch/x86/kernel/machine_kexec_32.c4
-rw-r--r--arch/x86/kernel/paravirt_patch_64.c2
-rw-r--r--arch/x86/kernel/process.c21
-rw-r--r--arch/x86/kernel/process_32.c2
-rw-r--r--arch/x86/kernel/process_64.c14
-rw-r--r--arch/x86/kernel/smpboot.c21
-rw-r--r--arch/x86/kernel/stacktrace.c6
-rw-r--r--arch/x86/kernel/tls.c11
-rw-r--r--arch/x86/kernel/traps.c77
-rw-r--r--arch/x86/kernel/unwind_orc.c88
-rw-r--r--arch/x86/kernel/vmlinux.lds.S17
-rw-r--r--arch/x86/kvm/cpuid.h2
-rw-r--r--arch/x86/kvm/emulate.c93
-rw-r--r--arch/x86/kvm/ioapic.c34
-rw-r--r--arch/x86/kvm/lapic.c12
-rw-r--r--arch/x86/kvm/mmu.c8
-rw-r--r--arch/x86/kvm/svm.c11
-rw-r--r--arch/x86/kvm/vmx.c81
-rw-r--r--arch/x86/kvm/x86.c139
-rw-r--r--arch/x86/lib/delay.c4
-rw-r--r--arch/x86/lib/x86-opcode-map.txt13
-rw-r--r--arch/x86/mm/Makefile9
-rw-r--r--arch/x86/mm/cpu_entry_area.c166
-rw-r--r--arch/x86/mm/debug_pagetables.c80
-rw-r--r--arch/x86/mm/dump_pagetables.c141
-rw-r--r--arch/x86/mm/extable.c6
-rw-r--r--arch/x86/mm/fault.c4
-rw-r--r--arch/x86/mm/init.c80
-rw-r--r--arch/x86/mm/init_32.c6
-rw-r--r--arch/x86/mm/ioremap.c4
-rw-r--r--arch/x86/mm/kasan_init_64.c23
-rw-r--r--arch/x86/mm/kmemcheck/error.c1
-rw-r--r--arch/x86/mm/kmemcheck/error.h1
-rw-r--r--arch/x86/mm/kmemcheck/opcode.c1
-rw-r--r--arch/x86/mm/kmemcheck/opcode.h1
-rw-r--r--arch/x86/mm/kmemcheck/pte.c1
-rw-r--r--arch/x86/mm/kmemcheck/pte.h1
-rw-r--r--arch/x86/mm/kmemcheck/selftest.c1
-rw-r--r--arch/x86/mm/kmemcheck/selftest.h1
-rw-r--r--arch/x86/mm/kmemcheck/shadow.h1
-rw-r--r--arch/x86/mm/kmmio.c12
-rw-r--r--arch/x86/mm/mem_encrypt.c4
-rw-r--r--arch/x86/mm/pgtable.c5
-rw-r--r--arch/x86/mm/pgtable_32.c1
-rw-r--r--arch/x86/mm/pti.c387
-rw-r--r--arch/x86/mm/tlb.c64
-rw-r--r--arch/x86/pci/broadcom_bus.c2
-rw-r--r--arch/x86/pci/fixup.c27
-rw-r--r--arch/x86/platform/efi/efi_64.c5
-rw-r--r--arch/x86/platform/uv/tlb_uv.c2
-rw-r--r--arch/x86/platform/uv/uv_irq.c2
-rw-r--r--arch/x86/platform/uv/uv_nmi.c4
-rw-r--r--arch/x86/power/cpu.c112
-rw-r--r--arch/x86/xen/apic.c2
-rw-r--r--arch/x86/xen/enlighten.c81
-rw-r--r--arch/x86/xen/enlighten_pv.c42
-rw-r--r--arch/x86/xen/mmu_pv.c14
-rw-r--r--arch/x86/xen/setup.c6
-rw-r--r--arch/x86/xen/xen-asm_64.S14
-rw-r--r--arch/xtensa/include/uapi/asm/Kbuild1
471 files changed, 5260 insertions, 2373 deletions
diff --git a/arch/alpha/include/uapi/asm/Kbuild b/arch/alpha/include/uapi/asm/Kbuild
index b15bf6bc0e94..14a2e9af97e9 100644
--- a/arch/alpha/include/uapi/asm/Kbuild
+++ b/arch/alpha/include/uapi/asm/Kbuild
@@ -1,2 +1,4 @@
# UAPI Header export list
include include/uapi/asm-generic/Kbuild.asm
+
+generic-y += bpf_perf_event.h
diff --git a/arch/arc/include/uapi/asm/Kbuild b/arch/arc/include/uapi/asm/Kbuild
index fa6d0ff4ff89..170b5db64afe 100644
--- a/arch/arc/include/uapi/asm/Kbuild
+++ b/arch/arc/include/uapi/asm/Kbuild
@@ -3,6 +3,7 @@ include include/uapi/asm-generic/Kbuild.asm
generic-y += auxvec.h
generic-y += bitsperlong.h
+generic-y += bpf_perf_event.h
generic-y += errno.h
generic-y += fcntl.h
generic-y += ioctl.h
diff --git a/arch/arm/boot/dts/am33xx.dtsi b/arch/arm/boot/dts/am33xx.dtsi
index 1b81c4e75772..d37f95025807 100644
--- a/arch/arm/boot/dts/am33xx.dtsi
+++ b/arch/arm/boot/dts/am33xx.dtsi
@@ -630,6 +630,7 @@
reg-names = "phy";
status = "disabled";
ti,ctrl_mod = <&usb_ctrl_mod>;
+ #phy-cells = <0>;
};
usb0: usb@47401000 {
@@ -678,6 +679,7 @@
reg-names = "phy";
status = "disabled";
ti,ctrl_mod = <&usb_ctrl_mod>;
+ #phy-cells = <0>;
};
usb1: usb@47401800 {
diff --git a/arch/arm/boot/dts/am4372.dtsi b/arch/arm/boot/dts/am4372.dtsi
index e5b061469bf8..4714a59fd86d 100644
--- a/arch/arm/boot/dts/am4372.dtsi
+++ b/arch/arm/boot/dts/am4372.dtsi
@@ -927,7 +927,8 @@
reg = <0x48038000 0x2000>,
<0x46000000 0x400000>;
reg-names = "mpu", "dat";
- interrupts = <80>, <81>;
+ interrupts = <GIC_SPI 80 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 81 IRQ_TYPE_LEVEL_HIGH>;
interrupt-names = "tx", "rx";
status = "disabled";
dmas = <&edma 8 2>,
@@ -941,7 +942,8 @@
reg = <0x4803C000 0x2000>,
<0x46400000 0x400000>;
reg-names = "mpu", "dat";
- interrupts = <82>, <83>;
+ interrupts = <GIC_SPI 82 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 83 IRQ_TYPE_LEVEL_HIGH>;
interrupt-names = "tx", "rx";
status = "disabled";
dmas = <&edma 10 2>,
diff --git a/arch/arm/boot/dts/am437x-cm-t43.dts b/arch/arm/boot/dts/am437x-cm-t43.dts
index 9e92d480576b..3b9a94c274a7 100644
--- a/arch/arm/boot/dts/am437x-cm-t43.dts
+++ b/arch/arm/boot/dts/am437x-cm-t43.dts
@@ -301,8 +301,8 @@
status = "okay";
pinctrl-names = "default";
pinctrl-0 = <&spi0_pins>;
- dmas = <&edma 16
- &edma 17>;
+ dmas = <&edma 16 0
+ &edma 17 0>;
dma-names = "tx0", "rx0";
flash: w25q64cvzpig@0 {
diff --git a/arch/arm/boot/dts/armada-385-db-ap.dts b/arch/arm/boot/dts/armada-385-db-ap.dts
index 25d2d720dc0e..678aa023335d 100644
--- a/arch/arm/boot/dts/armada-385-db-ap.dts
+++ b/arch/arm/boot/dts/armada-385-db-ap.dts
@@ -236,6 +236,7 @@
usb3_phy: usb3_phy {
compatible = "usb-nop-xceiv";
vcc-supply = <&reg_xhci0_vbus>;
+ #phy-cells = <0>;
};
reg_xhci0_vbus: xhci0-vbus {
diff --git a/arch/arm/boot/dts/armada-385-linksys.dtsi b/arch/arm/boot/dts/armada-385-linksys.dtsi
index e1f355ffc8f7..434dc9aaa5e4 100644
--- a/arch/arm/boot/dts/armada-385-linksys.dtsi
+++ b/arch/arm/boot/dts/armada-385-linksys.dtsi
@@ -66,6 +66,7 @@
usb3_1_phy: usb3_1-phy {
compatible = "usb-nop-xceiv";
vcc-supply = <&usb3_1_vbus>;
+ #phy-cells = <0>;
};
usb3_1_vbus: usb3_1-vbus {
diff --git a/arch/arm/boot/dts/armada-385-synology-ds116.dts b/arch/arm/boot/dts/armada-385-synology-ds116.dts
index 36ad571e76f3..0a3552ebda3b 100644
--- a/arch/arm/boot/dts/armada-385-synology-ds116.dts
+++ b/arch/arm/boot/dts/armada-385-synology-ds116.dts
@@ -191,11 +191,13 @@
usb3_0_phy: usb3_0_phy {
compatible = "usb-nop-xceiv";
vcc-supply = <&reg_usb3_0_vbus>;
+ #phy-cells = <0>;
};
usb3_1_phy: usb3_1_phy {
compatible = "usb-nop-xceiv";
vcc-supply = <&reg_usb3_1_vbus>;
+ #phy-cells = <0>;
};
reg_usb3_0_vbus: usb3-vbus0 {
diff --git a/arch/arm/boot/dts/armada-388-gp.dts b/arch/arm/boot/dts/armada-388-gp.dts
index f503955dbd3b..51b4ee6df130 100644
--- a/arch/arm/boot/dts/armada-388-gp.dts
+++ b/arch/arm/boot/dts/armada-388-gp.dts
@@ -276,11 +276,13 @@
usb2_1_phy: usb2_1_phy {
compatible = "usb-nop-xceiv";
vcc-supply = <&reg_usb2_1_vbus>;
+ #phy-cells = <0>;
};
usb3_phy: usb3_phy {
compatible = "usb-nop-xceiv";
vcc-supply = <&reg_usb3_vbus>;
+ #phy-cells = <0>;
};
reg_usb3_vbus: usb3-vbus {
diff --git a/arch/arm/boot/dts/bcm-nsp.dtsi b/arch/arm/boot/dts/bcm-nsp.dtsi
index 528b9e3bc1da..dcc55aa84583 100644
--- a/arch/arm/boot/dts/bcm-nsp.dtsi
+++ b/arch/arm/boot/dts/bcm-nsp.dtsi
@@ -85,7 +85,7 @@
timer@20200 {
compatible = "arm,cortex-a9-global-timer";
reg = <0x20200 0x100>;
- interrupts = <GIC_PPI 11 IRQ_TYPE_LEVEL_HIGH>;
+ interrupts = <GIC_PPI 11 IRQ_TYPE_EDGE_RISING>;
clocks = <&periph_clk>;
};
@@ -93,7 +93,7 @@
compatible = "arm,cortex-a9-twd-timer";
reg = <0x20600 0x20>;
interrupts = <GIC_PPI 13 (GIC_CPU_MASK_SIMPLE(2) |
- IRQ_TYPE_LEVEL_HIGH)>;
+ IRQ_TYPE_EDGE_RISING)>;
clocks = <&periph_clk>;
};
diff --git a/arch/arm/boot/dts/bcm283x.dtsi b/arch/arm/boot/dts/bcm283x.dtsi
index 013431e3d7c3..dcde93c85c2d 100644
--- a/arch/arm/boot/dts/bcm283x.dtsi
+++ b/arch/arm/boot/dts/bcm283x.dtsi
@@ -639,5 +639,6 @@
usbphy: phy {
compatible = "usb-nop-xceiv";
+ #phy-cells = <0>;
};
};
diff --git a/arch/arm/boot/dts/bcm958623hr.dts b/arch/arm/boot/dts/bcm958623hr.dts
index 3bc50849d013..b8bde13de90a 100644
--- a/arch/arm/boot/dts/bcm958623hr.dts
+++ b/arch/arm/boot/dts/bcm958623hr.dts
@@ -141,10 +141,6 @@
status = "okay";
};
-&sata {
- status = "okay";
-};
-
&qspi {
bspi-sel = <0>;
flash: m25p80@0 {
diff --git a/arch/arm/boot/dts/bcm958625hr.dts b/arch/arm/boot/dts/bcm958625hr.dts
index d94d14b3c745..6a44b8021702 100644
--- a/arch/arm/boot/dts/bcm958625hr.dts
+++ b/arch/arm/boot/dts/bcm958625hr.dts
@@ -177,10 +177,6 @@
status = "okay";
};
-&sata {
- status = "okay";
-};
-
&srab {
compatible = "brcm,bcm58625-srab", "brcm,nsp-srab";
status = "okay";
diff --git a/arch/arm/boot/dts/dm814x.dtsi b/arch/arm/boot/dts/dm814x.dtsi
index 9708157f5daf..681f5487406e 100644
--- a/arch/arm/boot/dts/dm814x.dtsi
+++ b/arch/arm/boot/dts/dm814x.dtsi
@@ -75,6 +75,7 @@
reg = <0x47401300 0x100>;
reg-names = "phy";
ti,ctrl_mod = <&usb_ctrl_mod>;
+ #phy-cells = <0>;
};
usb0: usb@47401000 {
@@ -385,6 +386,7 @@
reg = <0x1b00 0x100>;
reg-names = "phy";
ti,ctrl_mod = <&usb_ctrl_mod>;
+ #phy-cells = <0>;
};
};
diff --git a/arch/arm/boot/dts/imx53.dtsi b/arch/arm/boot/dts/imx53.dtsi
index 589a67c5f796..84f17f7abb71 100644
--- a/arch/arm/boot/dts/imx53.dtsi
+++ b/arch/arm/boot/dts/imx53.dtsi
@@ -433,15 +433,6 @@
clock-names = "ipg", "per";
};
- srtc: srtc@53fa4000 {
- compatible = "fsl,imx53-rtc", "fsl,imx25-rtc";
- reg = <0x53fa4000 0x4000>;
- interrupts = <24>;
- interrupt-parent = <&tzic>;
- clocks = <&clks IMX5_CLK_SRTC_GATE>;
- clock-names = "ipg";
- };
-
iomuxc: iomuxc@53fa8000 {
compatible = "fsl,imx53-iomuxc";
reg = <0x53fa8000 0x4000>;
diff --git a/arch/arm/boot/dts/logicpd-som-lv-37xx-devkit.dts b/arch/arm/boot/dts/logicpd-som-lv-37xx-devkit.dts
index 38faa90007d7..2fa5eb4bd402 100644
--- a/arch/arm/boot/dts/logicpd-som-lv-37xx-devkit.dts
+++ b/arch/arm/boot/dts/logicpd-som-lv-37xx-devkit.dts
@@ -72,7 +72,8 @@
};
&gpmc {
- ranges = <1 0 0x08000000 0x1000000>; /* CS1: 16MB for LAN9221 */
+ ranges = <0 0 0x30000000 0x1000000 /* CS0: 16MB for NAND */
+ 1 0 0x2c000000 0x1000000>; /* CS1: 16MB for LAN9221 */
ethernet@gpmc {
pinctrl-names = "default";
diff --git a/arch/arm/boot/dts/logicpd-som-lv.dtsi b/arch/arm/boot/dts/logicpd-som-lv.dtsi
index 26cce4d18405..29cb804d10cc 100644
--- a/arch/arm/boot/dts/logicpd-som-lv.dtsi
+++ b/arch/arm/boot/dts/logicpd-som-lv.dtsi
@@ -33,11 +33,12 @@
hsusb2_phy: hsusb2_phy {
compatible = "usb-nop-xceiv";
reset-gpios = <&gpio1 4 GPIO_ACTIVE_LOW>; /* gpio_4 */
+ #phy-cells = <0>;
};
};
&gpmc {
- ranges = <0 0 0x00000000 0x1000000>; /* CS0: 16MB for NAND */
+ ranges = <0 0 0x30000000 0x1000000>; /* CS0: 16MB for NAND */
nand@0,0 {
compatible = "ti,omap2-nand";
@@ -121,7 +122,7 @@
&mmc3 {
interrupts-extended = <&intc 94 &omap3_pmx_core2 0x46>;
- pinctrl-0 = <&mmc3_pins>;
+ pinctrl-0 = <&mmc3_pins &wl127x_gpio>;
pinctrl-names = "default";
vmmc-supply = <&wl12xx_vmmc>;
non-removable;
@@ -132,8 +133,8 @@
wlcore: wlcore@2 {
compatible = "ti,wl1273";
reg = <2>;
- interrupt-parent = <&gpio5>;
- interrupts = <24 IRQ_TYPE_LEVEL_HIGH>; /* gpio 152 */
+ interrupt-parent = <&gpio1>;
+ interrupts = <2 IRQ_TYPE_LEVEL_HIGH>; /* gpio 2 */
ref-clock-frequency = <26000000>;
};
};
@@ -157,8 +158,6 @@
OMAP3_CORE1_IOPAD(0x2166, PIN_INPUT_PULLUP | MUX_MODE3) /* sdmmc2_dat5.sdmmc3_dat1 */
OMAP3_CORE1_IOPAD(0x2168, PIN_INPUT_PULLUP | MUX_MODE3) /* sdmmc2_dat6.sdmmc3_dat2 */
OMAP3_CORE1_IOPAD(0x216a, PIN_INPUT_PULLUP | MUX_MODE3) /* sdmmc2_dat6.sdmmc3_dat3 */
- OMAP3_CORE1_IOPAD(0x2184, PIN_INPUT_PULLUP | MUX_MODE4) /* mcbsp4_clkx.gpio_152 */
- OMAP3_CORE1_IOPAD(0x2a0c, PIN_OUTPUT | MUX_MODE4) /* sys_boot1.gpio_3 */
OMAP3_CORE1_IOPAD(0x21d0, PIN_INPUT_PULLUP | MUX_MODE3) /* mcspi1_cs1.sdmmc3_cmd */
OMAP3_CORE1_IOPAD(0x21d2, PIN_INPUT_PULLUP | MUX_MODE3) /* mcspi1_cs2.sdmmc_clk */
>;
@@ -228,6 +227,12 @@
OMAP3_WKUP_IOPAD(0x2a0e, PIN_OUTPUT | MUX_MODE4) /* sys_boot2.gpio_4 */
>;
};
+ wl127x_gpio: pinmux_wl127x_gpio_pin {
+ pinctrl-single,pins = <
+ OMAP3_WKUP_IOPAD(0x2a0c, PIN_INPUT | MUX_MODE4) /* sys_boot0.gpio_2 */
+ OMAP3_WKUP_IOPAD(0x2a0c, PIN_OUTPUT | MUX_MODE4) /* sys_boot1.gpio_3 */
+ >;
+ };
};
&omap3_pmx_core2 {
diff --git a/arch/arm/boot/dts/meson.dtsi b/arch/arm/boot/dts/meson.dtsi
index 4926133077b3..0d9faf1a51ea 100644
--- a/arch/arm/boot/dts/meson.dtsi
+++ b/arch/arm/boot/dts/meson.dtsi
@@ -85,15 +85,6 @@
reg = <0x7c00 0x200>;
};
- gpio_intc: interrupt-controller@9880 {
- compatible = "amlogic,meson-gpio-intc";
- reg = <0xc1109880 0x10>;
- interrupt-controller;
- #interrupt-cells = <2>;
- amlogic,channel-interrupts = <64 65 66 67 68 69 70 71>;
- status = "disabled";
- };
-
hwrng: rng@8100 {
compatible = "amlogic,meson-rng";
reg = <0x8100 0x8>;
@@ -191,6 +182,15 @@
status = "disabled";
};
+ gpio_intc: interrupt-controller@9880 {
+ compatible = "amlogic,meson-gpio-intc";
+ reg = <0x9880 0x10>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ amlogic,channel-interrupts = <64 65 66 67 68 69 70 71>;
+ status = "disabled";
+ };
+
wdt: watchdog@9900 {
compatible = "amlogic,meson6-wdt";
reg = <0x9900 0x8>;
diff --git a/arch/arm/boot/dts/nspire.dtsi b/arch/arm/boot/dts/nspire.dtsi
index ec2283b1a638..1a5ae4cd107f 100644
--- a/arch/arm/boot/dts/nspire.dtsi
+++ b/arch/arm/boot/dts/nspire.dtsi
@@ -56,6 +56,7 @@
usb_phy: usb_phy {
compatible = "usb-nop-xceiv";
+ #phy-cells = <0>;
};
vbus_reg: vbus_reg {
diff --git a/arch/arm/boot/dts/omap3-beagle-xm.dts b/arch/arm/boot/dts/omap3-beagle-xm.dts
index 683b96a8f73e..0349fcc9dc26 100644
--- a/arch/arm/boot/dts/omap3-beagle-xm.dts
+++ b/arch/arm/boot/dts/omap3-beagle-xm.dts
@@ -90,6 +90,7 @@
compatible = "usb-nop-xceiv";
reset-gpios = <&gpio5 19 GPIO_ACTIVE_LOW>; /* gpio_147 */
vcc-supply = <&hsusb2_power>;
+ #phy-cells = <0>;
};
tfp410: encoder0 {
diff --git a/arch/arm/boot/dts/omap3-beagle.dts b/arch/arm/boot/dts/omap3-beagle.dts
index 4d2eaf843fa9..3ca8991a6c3e 100644
--- a/arch/arm/boot/dts/omap3-beagle.dts
+++ b/arch/arm/boot/dts/omap3-beagle.dts
@@ -64,6 +64,7 @@
compatible = "usb-nop-xceiv";
reset-gpios = <&gpio5 19 GPIO_ACTIVE_LOW>; /* gpio_147 */
vcc-supply = <&hsusb2_power>;
+ #phy-cells = <0>;
};
sound {
diff --git a/arch/arm/boot/dts/omap3-cm-t3x.dtsi b/arch/arm/boot/dts/omap3-cm-t3x.dtsi
index 31d5ebf38892..ab6003fe5a43 100644
--- a/arch/arm/boot/dts/omap3-cm-t3x.dtsi
+++ b/arch/arm/boot/dts/omap3-cm-t3x.dtsi
@@ -43,12 +43,14 @@
hsusb1_phy: hsusb1_phy {
compatible = "usb-nop-xceiv";
vcc-supply = <&hsusb1_power>;
+ #phy-cells = <0>;
};
/* HS USB Host PHY on PORT 2 */
hsusb2_phy: hsusb2_phy {
compatible = "usb-nop-xceiv";
vcc-supply = <&hsusb2_power>;
+ #phy-cells = <0>;
};
ads7846reg: ads7846-reg {
diff --git a/arch/arm/boot/dts/omap3-evm-common.dtsi b/arch/arm/boot/dts/omap3-evm-common.dtsi
index dbc3f030a16c..ee64191e41ca 100644
--- a/arch/arm/boot/dts/omap3-evm-common.dtsi
+++ b/arch/arm/boot/dts/omap3-evm-common.dtsi
@@ -29,6 +29,7 @@
compatible = "usb-nop-xceiv";
reset-gpios = <&gpio1 21 GPIO_ACTIVE_LOW>; /* gpio_21 */
vcc-supply = <&hsusb2_power>;
+ #phy-cells = <0>;
};
leds {
diff --git a/arch/arm/boot/dts/omap3-gta04.dtsi b/arch/arm/boot/dts/omap3-gta04.dtsi
index 4504908c23fe..3dc56fb156b7 100644
--- a/arch/arm/boot/dts/omap3-gta04.dtsi
+++ b/arch/arm/boot/dts/omap3-gta04.dtsi
@@ -120,6 +120,7 @@
hsusb2_phy: hsusb2_phy {
compatible = "usb-nop-xceiv";
reset-gpios = <&gpio6 14 GPIO_ACTIVE_LOW>;
+ #phy-cells = <0>;
};
tv0: connector {
diff --git a/arch/arm/boot/dts/omap3-igep0020-common.dtsi b/arch/arm/boot/dts/omap3-igep0020-common.dtsi
index 667f96245729..ecbec23af49f 100644
--- a/arch/arm/boot/dts/omap3-igep0020-common.dtsi
+++ b/arch/arm/boot/dts/omap3-igep0020-common.dtsi
@@ -58,6 +58,7 @@
compatible = "usb-nop-xceiv";
reset-gpios = <&gpio1 24 GPIO_ACTIVE_LOW>; /* gpio_24 */
vcc-supply = <&hsusb1_power>;
+ #phy-cells = <0>;
};
tfp410: encoder {
diff --git a/arch/arm/boot/dts/omap3-igep0030-common.dtsi b/arch/arm/boot/dts/omap3-igep0030-common.dtsi
index e94d9427450c..443f71707437 100644
--- a/arch/arm/boot/dts/omap3-igep0030-common.dtsi
+++ b/arch/arm/boot/dts/omap3-igep0030-common.dtsi
@@ -37,6 +37,7 @@
hsusb2_phy: hsusb2_phy {
compatible = "usb-nop-xceiv";
reset-gpios = <&gpio2 22 GPIO_ACTIVE_LOW>; /* gpio_54 */
+ #phy-cells = <0>;
};
};
diff --git a/arch/arm/boot/dts/omap3-lilly-a83x.dtsi b/arch/arm/boot/dts/omap3-lilly-a83x.dtsi
index 343a36d8031d..7ada1e93e166 100644
--- a/arch/arm/boot/dts/omap3-lilly-a83x.dtsi
+++ b/arch/arm/boot/dts/omap3-lilly-a83x.dtsi
@@ -51,6 +51,7 @@
hsusb1_phy: hsusb1_phy {
compatible = "usb-nop-xceiv";
vcc-supply = <&reg_vcc3>;
+ #phy-cells = <0>;
};
};
diff --git a/arch/arm/boot/dts/omap3-overo-base.dtsi b/arch/arm/boot/dts/omap3-overo-base.dtsi
index f25e158e7163..ac141fcd1742 100644
--- a/arch/arm/boot/dts/omap3-overo-base.dtsi
+++ b/arch/arm/boot/dts/omap3-overo-base.dtsi
@@ -51,6 +51,7 @@
compatible = "usb-nop-xceiv";
reset-gpios = <&gpio6 23 GPIO_ACTIVE_LOW>; /* gpio_183 */
vcc-supply = <&hsusb2_power>;
+ #phy-cells = <0>;
};
/* Regulator to trigger the nPoweron signal of the Wifi module */
diff --git a/arch/arm/boot/dts/omap3-pandora-common.dtsi b/arch/arm/boot/dts/omap3-pandora-common.dtsi
index 53e007abdc71..cd53dc6c0051 100644
--- a/arch/arm/boot/dts/omap3-pandora-common.dtsi
+++ b/arch/arm/boot/dts/omap3-pandora-common.dtsi
@@ -205,6 +205,7 @@
compatible = "usb-nop-xceiv";
reset-gpios = <&gpio1 16 GPIO_ACTIVE_LOW>; /* GPIO_16 */
vcc-supply = <&vaux2>;
+ #phy-cells = <0>;
};
/* HS USB Host VBUS supply
diff --git a/arch/arm/boot/dts/omap3-tao3530.dtsi b/arch/arm/boot/dts/omap3-tao3530.dtsi
index 9a601d15247b..6f5bd027b717 100644
--- a/arch/arm/boot/dts/omap3-tao3530.dtsi
+++ b/arch/arm/boot/dts/omap3-tao3530.dtsi
@@ -46,6 +46,7 @@
compatible = "usb-nop-xceiv";
reset-gpios = <&gpio6 2 GPIO_ACTIVE_LOW>; /* gpio_162 */
vcc-supply = <&hsusb2_power>;
+ #phy-cells = <0>;
};
sound {
diff --git a/arch/arm/boot/dts/omap3.dtsi b/arch/arm/boot/dts/omap3.dtsi
index 90b5c7148feb..bb33935df7b0 100644
--- a/arch/arm/boot/dts/omap3.dtsi
+++ b/arch/arm/boot/dts/omap3.dtsi
@@ -715,6 +715,7 @@
compatible = "ti,ohci-omap3";
reg = <0x48064400 0x400>;
interrupts = <76>;
+ remote-wakeup-connected;
};
usbhsehci: ehci@48064800 {
diff --git a/arch/arm/boot/dts/omap4-droid4-xt894.dts b/arch/arm/boot/dts/omap4-droid4-xt894.dts
index 8b93d37310f2..24a463f8641f 100644
--- a/arch/arm/boot/dts/omap4-droid4-xt894.dts
+++ b/arch/arm/boot/dts/omap4-droid4-xt894.dts
@@ -73,6 +73,7 @@
/* HS USB Host PHY on PORT 1 */
hsusb1_phy: hsusb1_phy {
compatible = "usb-nop-xceiv";
+ #phy-cells = <0>;
};
/* LCD regulator from sw5 source */
diff --git a/arch/arm/boot/dts/omap4-duovero.dtsi b/arch/arm/boot/dts/omap4-duovero.dtsi
index 6e6810c258eb..eb123b24c8e3 100644
--- a/arch/arm/boot/dts/omap4-duovero.dtsi
+++ b/arch/arm/boot/dts/omap4-duovero.dtsi
@@ -43,6 +43,7 @@
hsusb1_phy: hsusb1_phy {
compatible = "usb-nop-xceiv";
reset-gpios = <&gpio2 30 GPIO_ACTIVE_LOW>; /* gpio_62 */
+ #phy-cells = <0>;
pinctrl-names = "default";
pinctrl-0 = <&hsusb1phy_pins>;
diff --git a/arch/arm/boot/dts/omap4-panda-common.dtsi b/arch/arm/boot/dts/omap4-panda-common.dtsi
index 22c1eee9b07a..5501d1b4e6cd 100644
--- a/arch/arm/boot/dts/omap4-panda-common.dtsi
+++ b/arch/arm/boot/dts/omap4-panda-common.dtsi
@@ -89,6 +89,7 @@
hsusb1_phy: hsusb1_phy {
compatible = "usb-nop-xceiv";
reset-gpios = <&gpio2 30 GPIO_ACTIVE_LOW>; /* gpio_62 */
+ #phy-cells = <0>;
vcc-supply = <&hsusb1_power>;
clocks = <&auxclk3_ck>;
clock-names = "main_clk";
diff --git a/arch/arm/boot/dts/omap4-var-som-om44.dtsi b/arch/arm/boot/dts/omap4-var-som-om44.dtsi
index 6500bfc8d130..10fce28ceb5b 100644
--- a/arch/arm/boot/dts/omap4-var-som-om44.dtsi
+++ b/arch/arm/boot/dts/omap4-var-som-om44.dtsi
@@ -44,6 +44,7 @@
reset-gpios = <&gpio6 17 GPIO_ACTIVE_LOW>; /* gpio 177 */
vcc-supply = <&vbat>;
+ #phy-cells = <0>;
clocks = <&auxclk3_ck>;
clock-names = "main_clk";
diff --git a/arch/arm/boot/dts/omap4.dtsi b/arch/arm/boot/dts/omap4.dtsi
index 1dc5a76b3c71..cc1a07a3620f 100644
--- a/arch/arm/boot/dts/omap4.dtsi
+++ b/arch/arm/boot/dts/omap4.dtsi
@@ -398,7 +398,7 @@
elm: elm@48078000 {
compatible = "ti,am3352-elm";
reg = <0x48078000 0x2000>;
- interrupts = <4>;
+ interrupts = <GIC_SPI 4 IRQ_TYPE_LEVEL_HIGH>;
ti,hwmods = "elm";
status = "disabled";
};
@@ -1081,14 +1081,13 @@
usbhsohci: ohci@4a064800 {
compatible = "ti,ohci-omap3";
reg = <0x4a064800 0x400>;
- interrupt-parent = <&gic>;
interrupts = <GIC_SPI 76 IRQ_TYPE_LEVEL_HIGH>;
+ remote-wakeup-connected;
};
usbhsehci: ehci@4a064c00 {
compatible = "ti,ehci-omap";
reg = <0x4a064c00 0x400>;
- interrupt-parent = <&gic>;
interrupts = <GIC_SPI 77 IRQ_TYPE_LEVEL_HIGH>;
};
};
diff --git a/arch/arm/boot/dts/omap5-board-common.dtsi b/arch/arm/boot/dts/omap5-board-common.dtsi
index 575ecffb0e9e..1b20838bb9a4 100644
--- a/arch/arm/boot/dts/omap5-board-common.dtsi
+++ b/arch/arm/boot/dts/omap5-board-common.dtsi
@@ -73,12 +73,14 @@
clocks = <&auxclk1_ck>;
clock-names = "main_clk";
clock-frequency = <19200000>;
+ #phy-cells = <0>;
};
/* HS USB Host PHY on PORT 3 */
hsusb3_phy: hsusb3_phy {
compatible = "usb-nop-xceiv";
reset-gpios = <&gpio3 15 GPIO_ACTIVE_LOW>; /* gpio3_79 ETH_NRESET */
+ #phy-cells = <0>;
};
tpd12s015: encoder {
diff --git a/arch/arm/boot/dts/omap5-cm-t54.dts b/arch/arm/boot/dts/omap5-cm-t54.dts
index 5b172a04b6f1..5e21fb430a65 100644
--- a/arch/arm/boot/dts/omap5-cm-t54.dts
+++ b/arch/arm/boot/dts/omap5-cm-t54.dts
@@ -63,12 +63,14 @@
hsusb2_phy: hsusb2_phy {
compatible = "usb-nop-xceiv";
reset-gpios = <&gpio3 12 GPIO_ACTIVE_LOW>; /* gpio3_76 HUB_RESET */
+ #phy-cells = <0>;
};
/* HS USB Host PHY on PORT 3 */
hsusb3_phy: hsusb3_phy {
compatible = "usb-nop-xceiv";
reset-gpios = <&gpio3 19 GPIO_ACTIVE_LOW>; /* gpio3_83 ETH_RESET */
+ #phy-cells = <0>;
};
leds {
diff --git a/arch/arm/boot/dts/omap5.dtsi b/arch/arm/boot/dts/omap5.dtsi
index 4cd0005e462f..51a7fb3d7b9a 100644
--- a/arch/arm/boot/dts/omap5.dtsi
+++ b/arch/arm/boot/dts/omap5.dtsi
@@ -940,6 +940,7 @@
compatible = "ti,ohci-omap3";
reg = <0x4a064800 0x400>;
interrupts = <GIC_SPI 76 IRQ_TYPE_LEVEL_HIGH>;
+ remote-wakeup-connected;
};
usbhsehci: ehci@4a064c00 {
diff --git a/arch/arm/boot/dts/r8a7790.dtsi b/arch/arm/boot/dts/r8a7790.dtsi
index 2f017fee4009..62baabd757b6 100644
--- a/arch/arm/boot/dts/r8a7790.dtsi
+++ b/arch/arm/boot/dts/r8a7790.dtsi
@@ -1201,6 +1201,7 @@
clock-names = "extal", "usb_extal";
#clock-cells = <2>;
#power-domain-cells = <0>;
+ #reset-cells = <1>;
};
prr: chipid@ff000044 {
diff --git a/arch/arm/boot/dts/r8a7792.dtsi b/arch/arm/boot/dts/r8a7792.dtsi
index 131f65b0426e..3d080e07374c 100644
--- a/arch/arm/boot/dts/r8a7792.dtsi
+++ b/arch/arm/boot/dts/r8a7792.dtsi
@@ -829,6 +829,7 @@
clock-names = "extal";
#clock-cells = <2>;
#power-domain-cells = <0>;
+ #reset-cells = <1>;
};
};
diff --git a/arch/arm/boot/dts/r8a7793.dtsi b/arch/arm/boot/dts/r8a7793.dtsi
index 58eae569b4e0..0cd1035de1a4 100644
--- a/arch/arm/boot/dts/r8a7793.dtsi
+++ b/arch/arm/boot/dts/r8a7793.dtsi
@@ -1088,6 +1088,7 @@
clock-names = "extal", "usb_extal";
#clock-cells = <2>;
#power-domain-cells = <0>;
+ #reset-cells = <1>;
};
rst: reset-controller@e6160000 {
diff --git a/arch/arm/boot/dts/r8a7794.dtsi b/arch/arm/boot/dts/r8a7794.dtsi
index 905e50c9b524..5643976c1356 100644
--- a/arch/arm/boot/dts/r8a7794.dtsi
+++ b/arch/arm/boot/dts/r8a7794.dtsi
@@ -1099,6 +1099,7 @@
clock-names = "extal", "usb_extal";
#clock-cells = <2>;
#power-domain-cells = <0>;
+ #reset-cells = <1>;
};
rst: reset-controller@e6160000 {
diff --git a/arch/arm/boot/dts/vf610-zii-dev-rev-c.dts b/arch/arm/boot/dts/vf610-zii-dev-rev-c.dts
index 02a6227c717c..4b8edc8982cf 100644
--- a/arch/arm/boot/dts/vf610-zii-dev-rev-c.dts
+++ b/arch/arm/boot/dts/vf610-zii-dev-rev-c.dts
@@ -121,7 +121,7 @@
switch0port10: port@10 {
reg = <10>;
label = "dsa";
- phy-mode = "xgmii";
+ phy-mode = "xaui";
link = <&switch1port10>;
};
};
@@ -208,7 +208,7 @@
switch1port10: port@10 {
reg = <10>;
label = "dsa";
- phy-mode = "xgmii";
+ phy-mode = "xaui";
link = <&switch0port10>;
};
};
@@ -359,7 +359,7 @@
};
&i2c1 {
- at24mac602@0 {
+ at24mac602@50 {
compatible = "atmel,24c02";
reg = <0x50>;
read-only;
diff --git a/arch/arm/common/sa1111.c b/arch/arm/common/sa1111.c
index 4ecd5120fce7..a2c878769eaf 100644
--- a/arch/arm/common/sa1111.c
+++ b/arch/arm/common/sa1111.c
@@ -108,6 +108,7 @@ struct sa1111 {
spinlock_t lock;
void __iomem *base;
struct sa1111_platform_data *pdata;
+ struct irq_domain *irqdomain;
struct gpio_chip gc;
#ifdef CONFIG_PM
void *saved_state;
@@ -125,7 +126,7 @@ struct sa1111_dev_info {
unsigned long skpcr_mask;
bool dma;
unsigned int devid;
- unsigned int irq[6];
+ unsigned int hwirq[6];
};
static struct sa1111_dev_info sa1111_devices[] = {
@@ -134,7 +135,7 @@ static struct sa1111_dev_info sa1111_devices[] = {
.skpcr_mask = SKPCR_UCLKEN,
.dma = true,
.devid = SA1111_DEVID_USB,
- .irq = {
+ .hwirq = {
IRQ_USBPWR,
IRQ_HCIM,
IRQ_HCIBUFFACC,
@@ -148,7 +149,7 @@ static struct sa1111_dev_info sa1111_devices[] = {
.skpcr_mask = SKPCR_I2SCLKEN | SKPCR_L3CLKEN,
.dma = true,
.devid = SA1111_DEVID_SAC,
- .irq = {
+ .hwirq = {
AUDXMTDMADONEA,
AUDXMTDMADONEB,
AUDRCVDMADONEA,
@@ -164,7 +165,7 @@ static struct sa1111_dev_info sa1111_devices[] = {
.offset = SA1111_KBD,
.skpcr_mask = SKPCR_PTCLKEN,
.devid = SA1111_DEVID_PS2_KBD,
- .irq = {
+ .hwirq = {
IRQ_TPRXINT,
IRQ_TPTXINT
},
@@ -173,7 +174,7 @@ static struct sa1111_dev_info sa1111_devices[] = {
.offset = SA1111_MSE,
.skpcr_mask = SKPCR_PMCLKEN,
.devid = SA1111_DEVID_PS2_MSE,
- .irq = {
+ .hwirq = {
IRQ_MSRXINT,
IRQ_MSTXINT
},
@@ -182,7 +183,7 @@ static struct sa1111_dev_info sa1111_devices[] = {
.offset = 0x1800,
.skpcr_mask = 0,
.devid = SA1111_DEVID_PCMCIA,
- .irq = {
+ .hwirq = {
IRQ_S0_READY_NINT,
IRQ_S0_CD_VALID,
IRQ_S0_BVD1_STSCHG,
@@ -193,6 +194,19 @@ static struct sa1111_dev_info sa1111_devices[] = {
},
};
+static int sa1111_map_irq(struct sa1111 *sachip, irq_hw_number_t hwirq)
+{
+ return irq_create_mapping(sachip->irqdomain, hwirq);
+}
+
+static void sa1111_handle_irqdomain(struct irq_domain *irqdomain, int irq)
+{
+ struct irq_desc *d = irq_to_desc(irq_linear_revmap(irqdomain, irq));
+
+ if (d)
+ generic_handle_irq_desc(d);
+}
+
/*
* SA1111 interrupt support. Since clearing an IRQ while there are
* active IRQs causes the interrupt output to pulse, the upper levels
@@ -202,49 +216,45 @@ static void sa1111_irq_handler(struct irq_desc *desc)
{
unsigned int stat0, stat1, i;
struct sa1111 *sachip = irq_desc_get_handler_data(desc);
+ struct irq_domain *irqdomain;
void __iomem *mapbase = sachip->base + SA1111_INTC;
- stat0 = sa1111_readl(mapbase + SA1111_INTSTATCLR0);
- stat1 = sa1111_readl(mapbase + SA1111_INTSTATCLR1);
+ stat0 = readl_relaxed(mapbase + SA1111_INTSTATCLR0);
+ stat1 = readl_relaxed(mapbase + SA1111_INTSTATCLR1);
- sa1111_writel(stat0, mapbase + SA1111_INTSTATCLR0);
+ writel_relaxed(stat0, mapbase + SA1111_INTSTATCLR0);
desc->irq_data.chip->irq_ack(&desc->irq_data);
- sa1111_writel(stat1, mapbase + SA1111_INTSTATCLR1);
+ writel_relaxed(stat1, mapbase + SA1111_INTSTATCLR1);
if (stat0 == 0 && stat1 == 0) {
do_bad_IRQ(desc);
return;
}
+ irqdomain = sachip->irqdomain;
+
for (i = 0; stat0; i++, stat0 >>= 1)
if (stat0 & 1)
- generic_handle_irq(i + sachip->irq_base);
+ sa1111_handle_irqdomain(irqdomain, i);
for (i = 32; stat1; i++, stat1 >>= 1)
if (stat1 & 1)
- generic_handle_irq(i + sachip->irq_base);
+ sa1111_handle_irqdomain(irqdomain, i);
/* For level-based interrupts */
desc->irq_data.chip->irq_unmask(&desc->irq_data);
}
-#define SA1111_IRQMASK_LO(x) (1 << (x - sachip->irq_base))
-#define SA1111_IRQMASK_HI(x) (1 << (x - sachip->irq_base - 32))
-
static u32 sa1111_irqmask(struct irq_data *d)
{
- struct sa1111 *sachip = irq_data_get_irq_chip_data(d);
-
- return BIT((d->irq - sachip->irq_base) & 31);
+ return BIT(irqd_to_hwirq(d) & 31);
}
static int sa1111_irqbank(struct irq_data *d)
{
- struct sa1111 *sachip = irq_data_get_irq_chip_data(d);
-
- return ((d->irq - sachip->irq_base) / 32) * 4;
+ return (irqd_to_hwirq(d) / 32) * 4;
}
static void sa1111_ack_irq(struct irq_data *d)
@@ -257,9 +267,9 @@ static void sa1111_mask_irq(struct irq_data *d)
void __iomem *mapbase = sachip->base + SA1111_INTC + sa1111_irqbank(d);
u32 ie;
- ie = sa1111_readl(mapbase + SA1111_INTEN0);
+ ie = readl_relaxed(mapbase + SA1111_INTEN0);
ie &= ~sa1111_irqmask(d);
- sa1111_writel(ie, mapbase + SA1111_INTEN0);
+ writel(ie, mapbase + SA1111_INTEN0);
}
static void sa1111_unmask_irq(struct irq_data *d)
@@ -268,9 +278,9 @@ static void sa1111_unmask_irq(struct irq_data *d)
void __iomem *mapbase = sachip->base + SA1111_INTC + sa1111_irqbank(d);
u32 ie;
- ie = sa1111_readl(mapbase + SA1111_INTEN0);
+ ie = readl_relaxed(mapbase + SA1111_INTEN0);
ie |= sa1111_irqmask(d);
- sa1111_writel(ie, mapbase + SA1111_INTEN0);
+ writel_relaxed(ie, mapbase + SA1111_INTEN0);
}
/*
@@ -287,11 +297,11 @@ static int sa1111_retrigger_irq(struct irq_data *d)
u32 ip, mask = sa1111_irqmask(d);
int i;
- ip = sa1111_readl(mapbase + SA1111_INTPOL0);
+ ip = readl_relaxed(mapbase + SA1111_INTPOL0);
for (i = 0; i < 8; i++) {
- sa1111_writel(ip ^ mask, mapbase + SA1111_INTPOL0);
- sa1111_writel(ip, mapbase + SA1111_INTPOL0);
- if (sa1111_readl(mapbase + SA1111_INTSTATCLR0) & mask)
+ writel_relaxed(ip ^ mask, mapbase + SA1111_INTPOL0);
+ writel_relaxed(ip, mapbase + SA1111_INTPOL0);
+ if (readl_relaxed(mapbase + SA1111_INTSTATCLR0) & mask)
break;
}
@@ -313,13 +323,13 @@ static int sa1111_type_irq(struct irq_data *d, unsigned int flags)
if ((!(flags & IRQ_TYPE_EDGE_RISING) ^ !(flags & IRQ_TYPE_EDGE_FALLING)) == 0)
return -EINVAL;
- ip = sa1111_readl(mapbase + SA1111_INTPOL0);
+ ip = readl_relaxed(mapbase + SA1111_INTPOL0);
if (flags & IRQ_TYPE_EDGE_RISING)
ip &= ~mask;
else
ip |= mask;
- sa1111_writel(ip, mapbase + SA1111_INTPOL0);
- sa1111_writel(ip, mapbase + SA1111_WAKEPOL0);
+ writel_relaxed(ip, mapbase + SA1111_INTPOL0);
+ writel_relaxed(ip, mapbase + SA1111_WAKEPOL0);
return 0;
}
@@ -330,12 +340,12 @@ static int sa1111_wake_irq(struct irq_data *d, unsigned int on)
void __iomem *mapbase = sachip->base + SA1111_INTC + sa1111_irqbank(d);
u32 we, mask = sa1111_irqmask(d);
- we = sa1111_readl(mapbase + SA1111_WAKEEN0);
+ we = readl_relaxed(mapbase + SA1111_WAKEEN0);
if (on)
we |= mask;
else
we &= ~mask;
- sa1111_writel(we, mapbase + SA1111_WAKEEN0);
+ writel_relaxed(we, mapbase + SA1111_WAKEEN0);
return 0;
}
@@ -350,10 +360,30 @@ static struct irq_chip sa1111_irq_chip = {
.irq_set_wake = sa1111_wake_irq,
};
+static int sa1111_irqdomain_map(struct irq_domain *d, unsigned int irq,
+ irq_hw_number_t hwirq)
+{
+ struct sa1111 *sachip = d->host_data;
+
+ /* Disallow unavailable interrupts */
+ if (hwirq > SSPROR && hwirq < AUDXMTDMADONEA)
+ return -EINVAL;
+
+ irq_set_chip_data(irq, sachip);
+ irq_set_chip_and_handler(irq, &sa1111_irq_chip, handle_edge_irq);
+ irq_clear_status_flags(irq, IRQ_NOREQUEST | IRQ_NOPROBE);
+
+ return 0;
+}
+
+static const struct irq_domain_ops sa1111_irqdomain_ops = {
+ .map = sa1111_irqdomain_map,
+ .xlate = irq_domain_xlate_twocell,
+};
+
static int sa1111_setup_irq(struct sa1111 *sachip, unsigned irq_base)
{
void __iomem *irqbase = sachip->base + SA1111_INTC;
- unsigned i, irq;
int ret;
/*
@@ -373,38 +403,40 @@ static int sa1111_setup_irq(struct sa1111 *sachip, unsigned irq_base)
sachip->irq_base = ret;
/* disable all IRQs */
- sa1111_writel(0, irqbase + SA1111_INTEN0);
- sa1111_writel(0, irqbase + SA1111_INTEN1);
- sa1111_writel(0, irqbase + SA1111_WAKEEN0);
- sa1111_writel(0, irqbase + SA1111_WAKEEN1);
+ writel_relaxed(0, irqbase + SA1111_INTEN0);
+ writel_relaxed(0, irqbase + SA1111_INTEN1);
+ writel_relaxed(0, irqbase + SA1111_WAKEEN0);
+ writel_relaxed(0, irqbase + SA1111_WAKEEN1);
/*
* detect on rising edge. Note: Feb 2001 Errata for SA1111
* specifies that S0ReadyInt and S1ReadyInt should be '1'.
*/
- sa1111_writel(0, irqbase + SA1111_INTPOL0);
- sa1111_writel(BIT(IRQ_S0_READY_NINT & 31) |
- BIT(IRQ_S1_READY_NINT & 31),
- irqbase + SA1111_INTPOL1);
+ writel_relaxed(0, irqbase + SA1111_INTPOL0);
+ writel_relaxed(BIT(IRQ_S0_READY_NINT & 31) |
+ BIT(IRQ_S1_READY_NINT & 31),
+ irqbase + SA1111_INTPOL1);
/* clear all IRQs */
- sa1111_writel(~0, irqbase + SA1111_INTSTATCLR0);
- sa1111_writel(~0, irqbase + SA1111_INTSTATCLR1);
-
- for (i = IRQ_GPAIN0; i <= SSPROR; i++) {
- irq = sachip->irq_base + i;
- irq_set_chip_and_handler(irq, &sa1111_irq_chip, handle_edge_irq);
- irq_set_chip_data(irq, sachip);
- irq_clear_status_flags(irq, IRQ_NOREQUEST | IRQ_NOPROBE);
- }
+ writel_relaxed(~0, irqbase + SA1111_INTSTATCLR0);
+ writel_relaxed(~0, irqbase + SA1111_INTSTATCLR1);
- for (i = AUDXMTDMADONEA; i <= IRQ_S1_BVD1_STSCHG; i++) {
- irq = sachip->irq_base + i;
- irq_set_chip_and_handler(irq, &sa1111_irq_chip, handle_edge_irq);
- irq_set_chip_data(irq, sachip);
- irq_clear_status_flags(irq, IRQ_NOREQUEST | IRQ_NOPROBE);
+ sachip->irqdomain = irq_domain_add_linear(NULL, SA1111_IRQ_NR,
+ &sa1111_irqdomain_ops,
+ sachip);
+ if (!sachip->irqdomain) {
+ irq_free_descs(sachip->irq_base, SA1111_IRQ_NR);
+ return -ENOMEM;
}
+ irq_domain_associate_many(sachip->irqdomain,
+ sachip->irq_base + IRQ_GPAIN0,
+ IRQ_GPAIN0, SSPROR + 1 - IRQ_GPAIN0);
+ irq_domain_associate_many(sachip->irqdomain,
+ sachip->irq_base + AUDXMTDMADONEA,
+ AUDXMTDMADONEA,
+ IRQ_S1_BVD1_STSCHG + 1 - AUDXMTDMADONEA);
+
/*
* Register SA1111 interrupt
*/
@@ -420,20 +452,22 @@ static int sa1111_setup_irq(struct sa1111 *sachip, unsigned irq_base)
static void sa1111_remove_irq(struct sa1111 *sachip)
{
+ struct irq_domain *domain = sachip->irqdomain;
void __iomem *irqbase = sachip->base + SA1111_INTC;
+ int i;
/* disable all IRQs */
- sa1111_writel(0, irqbase + SA1111_INTEN0);
- sa1111_writel(0, irqbase + SA1111_INTEN1);
- sa1111_writel(0, irqbase + SA1111_WAKEEN0);
- sa1111_writel(0, irqbase + SA1111_WAKEEN1);
+ writel_relaxed(0, irqbase + SA1111_INTEN0);
+ writel_relaxed(0, irqbase + SA1111_INTEN1);
+ writel_relaxed(0, irqbase + SA1111_WAKEEN0);
+ writel_relaxed(0, irqbase + SA1111_WAKEEN1);
- if (sachip->irq != NO_IRQ) {
- irq_set_chained_handler_and_data(sachip->irq, NULL, NULL);
- irq_free_descs(sachip->irq_base, SA1111_IRQ_NR);
+ irq_set_chained_handler_and_data(sachip->irq, NULL, NULL);
+ for (i = 0; i < SA1111_IRQ_NR; i++)
+ irq_dispose_mapping(irq_find_mapping(domain, i));
+ irq_domain_remove(domain);
- release_mem_region(sachip->phys + SA1111_INTC, 512);
- }
+ release_mem_region(sachip->phys + SA1111_INTC, 512);
}
enum {
@@ -572,7 +606,7 @@ static int sa1111_gpio_to_irq(struct gpio_chip *gc, unsigned offset)
{
struct sa1111 *sachip = gc_to_sa1111(gc);
- return sachip->irq_base + offset;
+ return sa1111_map_irq(sachip, offset);
}
static int sa1111_setup_gpios(struct sa1111 *sachip)
@@ -618,11 +652,11 @@ static void sa1111_wake(struct sa1111 *sachip)
/*
* Turn VCO on, and disable PLL Bypass.
*/
- r = sa1111_readl(sachip->base + SA1111_SKCR);
+ r = readl_relaxed(sachip->base + SA1111_SKCR);
r &= ~SKCR_VCO_OFF;
- sa1111_writel(r, sachip->base + SA1111_SKCR);
+ writel_relaxed(r, sachip->base + SA1111_SKCR);
r |= SKCR_PLL_BYPASS | SKCR_OE_EN;
- sa1111_writel(r, sachip->base + SA1111_SKCR);
+ writel_relaxed(r, sachip->base + SA1111_SKCR);
/*
* Wait lock time. SA1111 manual _doesn't_
@@ -634,7 +668,7 @@ static void sa1111_wake(struct sa1111 *sachip)
* Enable RCLK. We also ensure that RDYEN is set.
*/
r |= SKCR_RCLKEN | SKCR_RDYEN;
- sa1111_writel(r, sachip->base + SA1111_SKCR);
+ writel_relaxed(r, sachip->base + SA1111_SKCR);
/*
* Wait 14 RCLK cycles for the chip to finish coming out
@@ -645,7 +679,7 @@ static void sa1111_wake(struct sa1111 *sachip)
/*
* Ensure all clocks are initially off.
*/
- sa1111_writel(0, sachip->base + SA1111_SKPCR);
+ writel_relaxed(0, sachip->base + SA1111_SKPCR);
spin_unlock_irqrestore(&sachip->lock, flags);
}
@@ -675,7 +709,7 @@ sa1111_configure_smc(struct sa1111 *sachip, int sdram, unsigned int drac,
if (cas_latency == 3)
smcr |= SMCR_CLAT;
- sa1111_writel(smcr, sachip->base + SA1111_SMCR);
+ writel_relaxed(smcr, sachip->base + SA1111_SMCR);
/*
* Now clear the bits in the DMA mask to work around the SA1111
@@ -723,8 +757,8 @@ sa1111_init_one_child(struct sa1111 *sachip, struct resource *parent,
dev->mapbase = sachip->base + info->offset;
dev->skpcr_mask = info->skpcr_mask;
- for (i = 0; i < ARRAY_SIZE(info->irq); i++)
- dev->irq[i] = sachip->irq_base + info->irq[i];
+ for (i = 0; i < ARRAY_SIZE(info->hwirq); i++)
+ dev->hwirq[i] = info->hwirq[i];
/*
* If the parent device has a DMA mask associated with it, and
@@ -814,7 +848,7 @@ static int __sa1111_probe(struct device *me, struct resource *mem, int irq)
/*
* Probe for the chip. Only touch the SBI registers.
*/
- id = sa1111_readl(sachip->base + SA1111_SKID);
+ id = readl_relaxed(sachip->base + SA1111_SKID);
if ((id & SKID_ID_MASK) != SKID_SA1111_ID) {
printk(KERN_DEBUG "SA1111 not detected: ID = %08lx\n", id);
ret = -ENODEV;
@@ -833,11 +867,9 @@ static int __sa1111_probe(struct device *me, struct resource *mem, int irq)
* The interrupt controller must be initialised before any
* other device to ensure that the interrupts are available.
*/
- if (sachip->irq != NO_IRQ) {
- ret = sa1111_setup_irq(sachip, pd->irq_base);
- if (ret)
- goto err_clk;
- }
+ ret = sa1111_setup_irq(sachip, pd->irq_base);
+ if (ret)
+ goto err_clk;
/* Setup the GPIOs - should really be done after the IRQ setup */
ret = sa1111_setup_gpios(sachip);
@@ -864,8 +896,8 @@ static int __sa1111_probe(struct device *me, struct resource *mem, int irq)
* DMA. It can otherwise be held firmly in the off position.
* (currently, we always enable it.)
*/
- val = sa1111_readl(sachip->base + SA1111_SKPCR);
- sa1111_writel(val | SKPCR_DCLKEN, sachip->base + SA1111_SKPCR);
+ val = readl_relaxed(sachip->base + SA1111_SKPCR);
+ writel_relaxed(val | SKPCR_DCLKEN, sachip->base + SA1111_SKPCR);
/*
* Enable the SA1110 memory bus request and grant signals.
@@ -962,31 +994,31 @@ static int sa1111_suspend_noirq(struct device *dev)
* Save state.
*/
base = sachip->base;
- save->skcr = sa1111_readl(base + SA1111_SKCR);
- save->skpcr = sa1111_readl(base + SA1111_SKPCR);
- save->skcdr = sa1111_readl(base + SA1111_SKCDR);
- save->skaud = sa1111_readl(base + SA1111_SKAUD);
- save->skpwm0 = sa1111_readl(base + SA1111_SKPWM0);
- save->skpwm1 = sa1111_readl(base + SA1111_SKPWM1);
+ save->skcr = readl_relaxed(base + SA1111_SKCR);
+ save->skpcr = readl_relaxed(base + SA1111_SKPCR);
+ save->skcdr = readl_relaxed(base + SA1111_SKCDR);
+ save->skaud = readl_relaxed(base + SA1111_SKAUD);
+ save->skpwm0 = readl_relaxed(base + SA1111_SKPWM0);
+ save->skpwm1 = readl_relaxed(base + SA1111_SKPWM1);
- sa1111_writel(0, sachip->base + SA1111_SKPWM0);
- sa1111_writel(0, sachip->base + SA1111_SKPWM1);
+ writel_relaxed(0, sachip->base + SA1111_SKPWM0);
+ writel_relaxed(0, sachip->base + SA1111_SKPWM1);
base = sachip->base + SA1111_INTC;
- save->intpol0 = sa1111_readl(base + SA1111_INTPOL0);
- save->intpol1 = sa1111_readl(base + SA1111_INTPOL1);
- save->inten0 = sa1111_readl(base + SA1111_INTEN0);
- save->inten1 = sa1111_readl(base + SA1111_INTEN1);
- save->wakepol0 = sa1111_readl(base + SA1111_WAKEPOL0);
- save->wakepol1 = sa1111_readl(base + SA1111_WAKEPOL1);
- save->wakeen0 = sa1111_readl(base + SA1111_WAKEEN0);
- save->wakeen1 = sa1111_readl(base + SA1111_WAKEEN1);
+ save->intpol0 = readl_relaxed(base + SA1111_INTPOL0);
+ save->intpol1 = readl_relaxed(base + SA1111_INTPOL1);
+ save->inten0 = readl_relaxed(base + SA1111_INTEN0);
+ save->inten1 = readl_relaxed(base + SA1111_INTEN1);
+ save->wakepol0 = readl_relaxed(base + SA1111_WAKEPOL0);
+ save->wakepol1 = readl_relaxed(base + SA1111_WAKEPOL1);
+ save->wakeen0 = readl_relaxed(base + SA1111_WAKEEN0);
+ save->wakeen1 = readl_relaxed(base + SA1111_WAKEEN1);
/*
* Disable.
*/
- val = sa1111_readl(sachip->base + SA1111_SKCR);
- sa1111_writel(val | SKCR_SLEEP, sachip->base + SA1111_SKCR);
+ val = readl_relaxed(sachip->base + SA1111_SKCR);
+ writel_relaxed(val | SKCR_SLEEP, sachip->base + SA1111_SKCR);
clk_disable(sachip->clk);
@@ -1023,7 +1055,7 @@ static int sa1111_resume_noirq(struct device *dev)
* Ensure that the SA1111 is still here.
* FIXME: shouldn't do this here.
*/
- id = sa1111_readl(sachip->base + SA1111_SKID);
+ id = readl_relaxed(sachip->base + SA1111_SKID);
if ((id & SKID_ID_MASK) != SKID_SA1111_ID) {
__sa1111_remove(sachip);
dev_set_drvdata(dev, NULL);
@@ -1047,26 +1079,26 @@ static int sa1111_resume_noirq(struct device *dev)
*/
spin_lock_irqsave(&sachip->lock, flags);
- sa1111_writel(0, sachip->base + SA1111_INTC + SA1111_INTEN0);
- sa1111_writel(0, sachip->base + SA1111_INTC + SA1111_INTEN1);
+ writel_relaxed(0, sachip->base + SA1111_INTC + SA1111_INTEN0);
+ writel_relaxed(0, sachip->base + SA1111_INTC + SA1111_INTEN1);
base = sachip->base;
- sa1111_writel(save->skcr, base + SA1111_SKCR);
- sa1111_writel(save->skpcr, base + SA1111_SKPCR);
- sa1111_writel(save->skcdr, base + SA1111_SKCDR);
- sa1111_writel(save->skaud, base + SA1111_SKAUD);
- sa1111_writel(save->skpwm0, base + SA1111_SKPWM0);
- sa1111_writel(save->skpwm1, base + SA1111_SKPWM1);
+ writel_relaxed(save->skcr, base + SA1111_SKCR);
+ writel_relaxed(save->skpcr, base + SA1111_SKPCR);
+ writel_relaxed(save->skcdr, base + SA1111_SKCDR);
+ writel_relaxed(save->skaud, base + SA1111_SKAUD);
+ writel_relaxed(save->skpwm0, base + SA1111_SKPWM0);
+ writel_relaxed(save->skpwm1, base + SA1111_SKPWM1);
base = sachip->base + SA1111_INTC;
- sa1111_writel(save->intpol0, base + SA1111_INTPOL0);
- sa1111_writel(save->intpol1, base + SA1111_INTPOL1);
- sa1111_writel(save->inten0, base + SA1111_INTEN0);
- sa1111_writel(save->inten1, base + SA1111_INTEN1);
- sa1111_writel(save->wakepol0, base + SA1111_WAKEPOL0);
- sa1111_writel(save->wakepol1, base + SA1111_WAKEPOL1);
- sa1111_writel(save->wakeen0, base + SA1111_WAKEEN0);
- sa1111_writel(save->wakeen1, base + SA1111_WAKEEN1);
+ writel_relaxed(save->intpol0, base + SA1111_INTPOL0);
+ writel_relaxed(save->intpol1, base + SA1111_INTPOL1);
+ writel_relaxed(save->inten0, base + SA1111_INTEN0);
+ writel_relaxed(save->inten1, base + SA1111_INTEN1);
+ writel_relaxed(save->wakepol0, base + SA1111_WAKEPOL0);
+ writel_relaxed(save->wakepol1, base + SA1111_WAKEPOL1);
+ writel_relaxed(save->wakeen0, base + SA1111_WAKEEN0);
+ writel_relaxed(save->wakeen1, base + SA1111_WAKEEN1);
spin_unlock_irqrestore(&sachip->lock, flags);
@@ -1153,7 +1185,7 @@ static unsigned int __sa1111_pll_clock(struct sa1111 *sachip)
{
unsigned int skcdr, fbdiv, ipdiv, opdiv;
- skcdr = sa1111_readl(sachip->base + SA1111_SKCDR);
+ skcdr = readl_relaxed(sachip->base + SA1111_SKCDR);
fbdiv = (skcdr & 0x007f) + 2;
ipdiv = ((skcdr & 0x0f80) >> 7) + 2;
@@ -1195,13 +1227,13 @@ void sa1111_select_audio_mode(struct sa1111_dev *sadev, int mode)
spin_lock_irqsave(&sachip->lock, flags);
- val = sa1111_readl(sachip->base + SA1111_SKCR);
+ val = readl_relaxed(sachip->base + SA1111_SKCR);
if (mode == SA1111_AUDIO_I2S) {
val &= ~SKCR_SELAC;
} else {
val |= SKCR_SELAC;
}
- sa1111_writel(val, sachip->base + SA1111_SKCR);
+ writel_relaxed(val, sachip->base + SA1111_SKCR);
spin_unlock_irqrestore(&sachip->lock, flags);
}
@@ -1226,7 +1258,7 @@ int sa1111_set_audio_rate(struct sa1111_dev *sadev, int rate)
if (div > 128)
div = 128;
- sa1111_writel(div - 1, sachip->base + SA1111_SKAUD);
+ writel_relaxed(div - 1, sachip->base + SA1111_SKAUD);
return 0;
}
@@ -1244,7 +1276,7 @@ int sa1111_get_audio_rate(struct sa1111_dev *sadev)
if (sadev->devid != SA1111_DEVID_SAC)
return -EINVAL;
- div = sa1111_readl(sachip->base + SA1111_SKAUD) + 1;
+ div = readl_relaxed(sachip->base + SA1111_SKAUD) + 1;
return __sa1111_pll_clock(sachip) / (256 * div);
}
@@ -1261,10 +1293,10 @@ void sa1111_set_io_dir(struct sa1111_dev *sadev,
#define MODIFY_BITS(port, mask, dir) \
if (mask) { \
- val = sa1111_readl(port); \
+ val = readl_relaxed(port); \
val &= ~(mask); \
val |= (dir) & (mask); \
- sa1111_writel(val, port); \
+ writel_relaxed(val, port); \
}
spin_lock_irqsave(&sachip->lock, flags);
@@ -1329,8 +1361,8 @@ int sa1111_enable_device(struct sa1111_dev *sadev)
if (ret == 0) {
spin_lock_irqsave(&sachip->lock, flags);
- val = sa1111_readl(sachip->base + SA1111_SKPCR);
- sa1111_writel(val | sadev->skpcr_mask, sachip->base + SA1111_SKPCR);
+ val = readl_relaxed(sachip->base + SA1111_SKPCR);
+ writel_relaxed(val | sadev->skpcr_mask, sachip->base + SA1111_SKPCR);
spin_unlock_irqrestore(&sachip->lock, flags);
}
return ret;
@@ -1348,8 +1380,8 @@ void sa1111_disable_device(struct sa1111_dev *sadev)
unsigned int val;
spin_lock_irqsave(&sachip->lock, flags);
- val = sa1111_readl(sachip->base + SA1111_SKPCR);
- sa1111_writel(val & ~sadev->skpcr_mask, sachip->base + SA1111_SKPCR);
+ val = readl_relaxed(sachip->base + SA1111_SKPCR);
+ writel_relaxed(val & ~sadev->skpcr_mask, sachip->base + SA1111_SKPCR);
spin_unlock_irqrestore(&sachip->lock, flags);
if (sachip->pdata && sachip->pdata->disable)
@@ -1359,9 +1391,10 @@ EXPORT_SYMBOL(sa1111_disable_device);
int sa1111_get_irq(struct sa1111_dev *sadev, unsigned num)
{
- if (num >= ARRAY_SIZE(sadev->irq))
+ struct sa1111 *sachip = sa1111_chip_driver(sadev);
+ if (num >= ARRAY_SIZE(sadev->hwirq))
return -EINVAL;
- return sadev->irq[num];
+ return sa1111_map_irq(sachip, sadev->hwirq[num]);
}
EXPORT_SYMBOL_GPL(sa1111_get_irq);
@@ -1379,36 +1412,6 @@ static int sa1111_match(struct device *_dev, struct device_driver *_drv)
return !!(dev->devid & drv->devid);
}
-static int sa1111_bus_suspend(struct device *dev, pm_message_t state)
-{
- struct sa1111_dev *sadev = to_sa1111_device(dev);
- struct sa1111_driver *drv = SA1111_DRV(dev->driver);
- int ret = 0;
-
- if (drv && drv->suspend)
- ret = drv->suspend(sadev, state);
- return ret;
-}
-
-static int sa1111_bus_resume(struct device *dev)
-{
- struct sa1111_dev *sadev = to_sa1111_device(dev);
- struct sa1111_driver *drv = SA1111_DRV(dev->driver);
- int ret = 0;
-
- if (drv && drv->resume)
- ret = drv->resume(sadev);
- return ret;
-}
-
-static void sa1111_bus_shutdown(struct device *dev)
-{
- struct sa1111_driver *drv = SA1111_DRV(dev->driver);
-
- if (drv && drv->shutdown)
- drv->shutdown(to_sa1111_device(dev));
-}
-
static int sa1111_bus_probe(struct device *dev)
{
struct sa1111_dev *sadev = to_sa1111_device(dev);
@@ -1436,9 +1439,6 @@ struct bus_type sa1111_bus_type = {
.match = sa1111_match,
.probe = sa1111_bus_probe,
.remove = sa1111_bus_remove,
- .suspend = sa1111_bus_suspend,
- .resume = sa1111_bus_resume,
- .shutdown = sa1111_bus_shutdown,
};
EXPORT_SYMBOL(sa1111_bus_type);
diff --git a/arch/arm/include/asm/hardware/sa1111.h b/arch/arm/include/asm/hardware/sa1111.h
index 0bbf163d1ed3..798e520e8a49 100644
--- a/arch/arm/include/asm/hardware/sa1111.h
+++ b/arch/arm/include/asm/hardware/sa1111.h
@@ -16,33 +16,6 @@
#include <mach/bitfield.h>
/*
- * The SA1111 is always located at virtual 0xf4000000, and is always
- * "native" endian.
- */
-
-#define SA1111_VBASE 0xf4000000
-
-/* Don't use these! */
-#define SA1111_p2v( x ) ((x) - SA1111_BASE + SA1111_VBASE)
-#define SA1111_v2p( x ) ((x) - SA1111_VBASE + SA1111_BASE)
-
-#ifndef __ASSEMBLY__
-#define _SA1111(x) ((x) + sa1111->resource.start)
-#endif
-
-#define sa1111_writel(val,addr) __raw_writel(val, addr)
-#define sa1111_readl(addr) __raw_readl(addr)
-
-/*
- * 26 bits of the SA-1110 address bus are available to the SA-1111.
- * Use these when feeding target addresses to the DMA engines.
- */
-
-#define SA1111_ADDR_WIDTH (26)
-#define SA1111_ADDR_MASK ((1<<SA1111_ADDR_WIDTH)-1)
-#define SA1111_DMA_ADDR(x) ((x)&SA1111_ADDR_MASK)
-
-/*
* Don't ask the (SAC) DMA engines to move less than this amount.
*/
@@ -417,7 +390,7 @@ struct sa1111_dev {
struct resource res;
void __iomem *mapbase;
unsigned int skpcr_mask;
- unsigned int irq[6];
+ unsigned int hwirq[6];
u64 dma_mask;
};
@@ -431,9 +404,6 @@ struct sa1111_driver {
unsigned int devid;
int (*probe)(struct sa1111_dev *);
int (*remove)(struct sa1111_dev *);
- int (*suspend)(struct sa1111_dev *, pm_message_t);
- int (*resume)(struct sa1111_dev *);
- void (*shutdown)(struct sa1111_dev *);
};
#define SA1111_DRV(_d) container_of((_d), struct sa1111_driver, drv)
diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
index c8781450905b..3ab8b3781bfe 100644
--- a/arch/arm/include/asm/kvm_arm.h
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -161,8 +161,7 @@
#else
#define VTTBR_X (5 - KVM_T0SZ)
#endif
-#define VTTBR_BADDR_SHIFT (VTTBR_X - 1)
-#define VTTBR_BADDR_MASK (((_AC(1, ULL) << (40 - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT)
+#define VTTBR_BADDR_MASK (((_AC(1, ULL) << (40 - VTTBR_X)) - 1) << VTTBR_X)
#define VTTBR_VMID_SHIFT _AC(48, ULL)
#define VTTBR_VMID_MASK(size) (_AT(u64, (1 << size) - 1) << VTTBR_VMID_SHIFT)
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 242151ea6908..a9f7d3f47134 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -285,6 +285,11 @@ static inline void kvm_arm_init_debug(void) {}
static inline void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) {}
static inline void kvm_arm_clear_debug(struct kvm_vcpu *vcpu) {}
static inline void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu) {}
+static inline bool kvm_arm_handle_step_debug(struct kvm_vcpu *vcpu,
+ struct kvm_run *run)
+{
+ return false;
+}
int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu,
struct kvm_device_attr *attr);
diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h
index 2a029bceaf2f..1a7a17b2a1ba 100644
--- a/arch/arm/include/asm/pgtable-3level.h
+++ b/arch/arm/include/asm/pgtable-3level.h
@@ -221,7 +221,6 @@ static inline pte_t pte_mkspecial(pte_t pte)
}
#define __HAVE_ARCH_PTE_SPECIAL
-#define __HAVE_ARCH_PMD_WRITE
#define pmd_write(pmd) (pmd_isclear((pmd), L_PMD_SECT_RDONLY))
#define pmd_dirty(pmd) (pmd_isset((pmd), L_PMD_SECT_DIRTY))
#define pud_page(pud) pmd_page(__pmd(pud_val(pud)))
diff --git a/arch/arm/include/uapi/asm/Kbuild b/arch/arm/include/uapi/asm/Kbuild
index 4d53de308ee0..4d1cc1847edf 100644
--- a/arch/arm/include/uapi/asm/Kbuild
+++ b/arch/arm/include/uapi/asm/Kbuild
@@ -7,6 +7,7 @@ generated-y += unistd-oabi.h
generated-y += unistd-eabi.h
generic-y += bitsperlong.h
+generic-y += bpf_perf_event.h
generic-y += errno.h
generic-y += ioctl.h
generic-y += ipcbuf.h
diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S
index 7f4d80c2db6b..0f07579af472 100644
--- a/arch/arm/kernel/entry-header.S
+++ b/arch/arm/kernel/entry-header.S
@@ -300,7 +300,7 @@
mov r2, sp
ldr r1, [r2, #\offset + S_PSR] @ get calling cpsr
ldr lr, [r2, #\offset + S_PC]! @ get pc
- tst r1, #0xcf
+ tst r1, #PSR_I_BIT | 0x0f
bne 1f
msr spsr_cxsf, r1 @ save in spsr_svc
#if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_32v6K)
@@ -332,7 +332,7 @@
ldr r1, [sp, #\offset + S_PSR] @ get calling cpsr
ldr lr, [sp, #\offset + S_PC] @ get pc
add sp, sp, #\offset + S_SP
- tst r1, #0xcf
+ tst r1, #PSR_I_BIT | 0x0f
bne 1f
msr spsr_cxsf, r1 @ save in spsr_svc
diff --git a/arch/arm/lib/csumpartialcopyuser.S b/arch/arm/lib/csumpartialcopyuser.S
index 1712f132b80d..b83fdc06286a 100644
--- a/arch/arm/lib/csumpartialcopyuser.S
+++ b/arch/arm/lib/csumpartialcopyuser.S
@@ -85,7 +85,11 @@
.pushsection .text.fixup,"ax"
.align 4
9001: mov r4, #-EFAULT
+#ifdef CONFIG_CPU_SW_DOMAIN_PAN
+ ldr r5, [sp, #9*4] @ *err_ptr
+#else
ldr r5, [sp, #8*4] @ *err_ptr
+#endif
str r4, [r5]
ldmia sp, {r1, r2} @ retrieve dst, len
add r2, r2, r1
diff --git a/arch/arm/mach-meson/platsmp.c b/arch/arm/mach-meson/platsmp.c
index 2555f9056a33..cad7ee8f0d6b 100644
--- a/arch/arm/mach-meson/platsmp.c
+++ b/arch/arm/mach-meson/platsmp.c
@@ -102,7 +102,7 @@ static void __init meson_smp_prepare_cpus(const char *scu_compatible,
scu_base = of_iomap(node, 0);
if (!scu_base) {
- pr_err("Couln't map SCU registers\n");
+ pr_err("Couldn't map SCU registers\n");
return;
}
diff --git a/arch/arm/mach-omap2/cm_common.c b/arch/arm/mach-omap2/cm_common.c
index d555791cf349..83c6fa74cc31 100644
--- a/arch/arm/mach-omap2/cm_common.c
+++ b/arch/arm/mach-omap2/cm_common.c
@@ -68,14 +68,17 @@ void __init omap2_set_globals_cm(void __iomem *cm, void __iomem *cm2)
int cm_split_idlest_reg(struct clk_omap_reg *idlest_reg, s16 *prcm_inst,
u8 *idlest_reg_id)
{
+ int ret;
if (!cm_ll_data->split_idlest_reg) {
WARN_ONCE(1, "cm: %s: no low-level function defined\n",
__func__);
return -EINVAL;
}
- return cm_ll_data->split_idlest_reg(idlest_reg, prcm_inst,
+ ret = cm_ll_data->split_idlest_reg(idlest_reg, prcm_inst,
idlest_reg_id);
+ *prcm_inst -= cm_base.offset;
+ return ret;
}
/**
@@ -337,6 +340,7 @@ int __init omap2_cm_base_init(void)
if (mem) {
mem->pa = res.start + data->offset;
mem->va = data->mem + data->offset;
+ mem->offset = data->offset;
}
data->np = np;
diff --git a/arch/arm/mach-omap2/omap-secure.c b/arch/arm/mach-omap2/omap-secure.c
index 5ac122e88f67..fa7f308c9027 100644
--- a/arch/arm/mach-omap2/omap-secure.c
+++ b/arch/arm/mach-omap2/omap-secure.c
@@ -73,6 +73,27 @@ phys_addr_t omap_secure_ram_mempool_base(void)
return omap_secure_memblock_base;
}
+#if defined(CONFIG_ARCH_OMAP3) && defined(CONFIG_PM)
+u32 omap3_save_secure_ram(void __iomem *addr, int size)
+{
+ u32 ret;
+ u32 param[5];
+
+ if (size != OMAP3_SAVE_SECURE_RAM_SZ)
+ return OMAP3_SAVE_SECURE_RAM_SZ;
+
+ param[0] = 4; /* Number of arguments */
+ param[1] = __pa(addr); /* Physical address for saving */
+ param[2] = 0;
+ param[3] = 1;
+ param[4] = 1;
+
+ ret = save_secure_ram_context(__pa(param));
+
+ return ret;
+}
+#endif
+
/**
* rx51_secure_dispatcher: Routine to dispatch secure PPA API calls
* @idx: The PPA API index
diff --git a/arch/arm/mach-omap2/omap-secure.h b/arch/arm/mach-omap2/omap-secure.h
index bae263fba640..c509cde71f93 100644
--- a/arch/arm/mach-omap2/omap-secure.h
+++ b/arch/arm/mach-omap2/omap-secure.h
@@ -31,6 +31,8 @@
/* Maximum Secure memory storage size */
#define OMAP_SECURE_RAM_STORAGE (88 * SZ_1K)
+#define OMAP3_SAVE_SECURE_RAM_SZ 0x803F
+
/* Secure low power HAL API index */
#define OMAP4_HAL_SAVESECURERAM_INDEX 0x1a
#define OMAP4_HAL_SAVEHW_INDEX 0x1b
@@ -65,6 +67,8 @@ extern u32 omap_smc2(u32 id, u32 falg, u32 pargs);
extern u32 omap_smc3(u32 id, u32 process, u32 flag, u32 pargs);
extern phys_addr_t omap_secure_ram_mempool_base(void);
extern int omap_secure_ram_reserve_memblock(void);
+extern u32 save_secure_ram_context(u32 args_pa);
+extern u32 omap3_save_secure_ram(void __iomem *save_regs, int size);
extern u32 rx51_secure_dispatcher(u32 idx, u32 process, u32 flag, u32 nargs,
u32 arg1, u32 arg2, u32 arg3, u32 arg4);
diff --git a/arch/arm/mach-omap2/omap_device.c b/arch/arm/mach-omap2/omap_device.c
index d45cbfdb4be6..f0388058b7da 100644
--- a/arch/arm/mach-omap2/omap_device.c
+++ b/arch/arm/mach-omap2/omap_device.c
@@ -391,10 +391,8 @@ omap_device_copy_resources(struct omap_hwmod *oh,
const char *name;
int error, irq = 0;
- if (!oh || !oh->od || !oh->od->pdev) {
- error = -EINVAL;
- goto error;
- }
+ if (!oh || !oh->od || !oh->od->pdev)
+ return -EINVAL;
np = oh->od->pdev->dev.of_node;
if (!np) {
@@ -516,8 +514,10 @@ struct platform_device __init *omap_device_build(const char *pdev_name,
goto odbs_exit1;
od = omap_device_alloc(pdev, &oh, 1);
- if (IS_ERR(od))
+ if (IS_ERR(od)) {
+ ret = PTR_ERR(od);
goto odbs_exit1;
+ }
ret = platform_device_add_data(pdev, pdata, pdata_len);
if (ret)
diff --git a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
index d2106ae4410a..52c9d585b44d 100644
--- a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
@@ -1646,6 +1646,7 @@ static struct omap_hwmod omap3xxx_mmc3_hwmod = {
.main_clk = "mmchs3_fck",
.prcm = {
.omap2 = {
+ .module_offs = CORE_MOD,
.prcm_reg_id = 1,
.module_bit = OMAP3430_EN_MMC3_SHIFT,
.idlest_reg_id = 1,
diff --git a/arch/arm/mach-omap2/pm.h b/arch/arm/mach-omap2/pm.h
index b668719b9b25..8e30772cfe32 100644
--- a/arch/arm/mach-omap2/pm.h
+++ b/arch/arm/mach-omap2/pm.h
@@ -81,10 +81,6 @@ extern unsigned int omap3_do_wfi_sz;
/* ... and its pointer from SRAM after copy */
extern void (*omap3_do_wfi_sram)(void);
-/* save_secure_ram_context function pointer and size, for copy to SRAM */
-extern int save_secure_ram_context(u32 *addr);
-extern unsigned int save_secure_ram_context_sz;
-
extern void omap3_save_scratchpad_contents(void);
#define PM_RTA_ERRATUM_i608 (1 << 0)
diff --git a/arch/arm/mach-omap2/pm34xx.c b/arch/arm/mach-omap2/pm34xx.c
index 841ba19d64a6..36c55547137c 100644
--- a/arch/arm/mach-omap2/pm34xx.c
+++ b/arch/arm/mach-omap2/pm34xx.c
@@ -48,6 +48,7 @@
#include "prm3xxx.h"
#include "pm.h"
#include "sdrc.h"
+#include "omap-secure.h"
#include "sram.h"
#include "control.h"
#include "vc.h"
@@ -66,7 +67,6 @@ struct power_state {
static LIST_HEAD(pwrst_list);
-static int (*_omap_save_secure_sram)(u32 *addr);
void (*omap3_do_wfi_sram)(void);
static struct powerdomain *mpu_pwrdm, *neon_pwrdm;
@@ -121,8 +121,8 @@ static void omap3_save_secure_ram_context(void)
* will hang the system.
*/
pwrdm_set_next_pwrst(mpu_pwrdm, PWRDM_POWER_ON);
- ret = _omap_save_secure_sram((u32 *)(unsigned long)
- __pa(omap3_secure_ram_storage));
+ ret = omap3_save_secure_ram(omap3_secure_ram_storage,
+ OMAP3_SAVE_SECURE_RAM_SZ);
pwrdm_set_next_pwrst(mpu_pwrdm, mpu_next_state);
/* Following is for error tracking, it should not happen */
if (ret) {
@@ -434,15 +434,10 @@ static int __init pwrdms_setup(struct powerdomain *pwrdm, void *unused)
*
* The minimum set of functions is pushed to SRAM for execution:
* - omap3_do_wfi for erratum i581 WA,
- * - save_secure_ram_context for security extensions.
*/
void omap_push_sram_idle(void)
{
omap3_do_wfi_sram = omap_sram_push(omap3_do_wfi, omap3_do_wfi_sz);
-
- if (omap_type() != OMAP2_DEVICE_TYPE_GP)
- _omap_save_secure_sram = omap_sram_push(save_secure_ram_context,
- save_secure_ram_context_sz);
}
static void __init pm_errata_configure(void)
@@ -553,7 +548,7 @@ int __init omap3_pm_init(void)
clkdm_add_wkdep(neon_clkdm, mpu_clkdm);
if (omap_type() != OMAP2_DEVICE_TYPE_GP) {
omap3_secure_ram_storage =
- kmalloc(0x803F, GFP_KERNEL);
+ kmalloc(OMAP3_SAVE_SECURE_RAM_SZ, GFP_KERNEL);
if (!omap3_secure_ram_storage)
pr_err("Memory allocation failed when allocating for secure sram context\n");
diff --git a/arch/arm/mach-omap2/prcm-common.h b/arch/arm/mach-omap2/prcm-common.h
index 0592b23902c6..0977da0dab76 100644
--- a/arch/arm/mach-omap2/prcm-common.h
+++ b/arch/arm/mach-omap2/prcm-common.h
@@ -528,6 +528,7 @@ struct omap_prcm_irq_setup {
struct omap_domain_base {
u32 pa;
void __iomem *va;
+ s16 offset;
};
/**
diff --git a/arch/arm/mach-omap2/prm33xx.c b/arch/arm/mach-omap2/prm33xx.c
index d2c5bcabdbeb..ebaf80d72a10 100644
--- a/arch/arm/mach-omap2/prm33xx.c
+++ b/arch/arm/mach-omap2/prm33xx.c
@@ -176,17 +176,6 @@ static int am33xx_pwrdm_read_pwrst(struct powerdomain *pwrdm)
return v;
}
-static int am33xx_pwrdm_read_prev_pwrst(struct powerdomain *pwrdm)
-{
- u32 v;
-
- v = am33xx_prm_read_reg(pwrdm->prcm_offs, pwrdm->pwrstst_offs);
- v &= AM33XX_LASTPOWERSTATEENTERED_MASK;
- v >>= AM33XX_LASTPOWERSTATEENTERED_SHIFT;
-
- return v;
-}
-
static int am33xx_pwrdm_set_lowpwrstchange(struct powerdomain *pwrdm)
{
am33xx_prm_rmw_reg_bits(AM33XX_LOWPOWERSTATECHANGE_MASK,
@@ -357,7 +346,6 @@ struct pwrdm_ops am33xx_pwrdm_operations = {
.pwrdm_set_next_pwrst = am33xx_pwrdm_set_next_pwrst,
.pwrdm_read_next_pwrst = am33xx_pwrdm_read_next_pwrst,
.pwrdm_read_pwrst = am33xx_pwrdm_read_pwrst,
- .pwrdm_read_prev_pwrst = am33xx_pwrdm_read_prev_pwrst,
.pwrdm_set_logic_retst = am33xx_pwrdm_set_logic_retst,
.pwrdm_read_logic_pwrst = am33xx_pwrdm_read_logic_pwrst,
.pwrdm_read_logic_retst = am33xx_pwrdm_read_logic_retst,
diff --git a/arch/arm/mach-omap2/sleep34xx.S b/arch/arm/mach-omap2/sleep34xx.S
index fa5fd24f524c..22daf4efed68 100644
--- a/arch/arm/mach-omap2/sleep34xx.S
+++ b/arch/arm/mach-omap2/sleep34xx.S
@@ -93,20 +93,13 @@ ENTRY(enable_omap3630_toggle_l2_on_restore)
ENDPROC(enable_omap3630_toggle_l2_on_restore)
/*
- * Function to call rom code to save secure ram context. This gets
- * relocated to SRAM, so it can be all in .data section. Otherwise
- * we need to initialize api_params separately.
+ * Function to call rom code to save secure ram context.
+ *
+ * r0 = physical address of the parameters
*/
- .data
- .align 3
ENTRY(save_secure_ram_context)
stmfd sp!, {r4 - r11, lr} @ save registers on stack
- adr r3, api_params @ r3 points to parameters
- str r0, [r3,#0x4] @ r0 has sdram address
- ldr r12, high_mask
- and r3, r3, r12
- ldr r12, sram_phy_addr_mask
- orr r3, r3, r12
+ mov r3, r0 @ physical address of parameters
mov r0, #25 @ set service ID for PPA
mov r12, r0 @ copy secure service ID in r12
mov r1, #0 @ set task id for ROM code in r1
@@ -120,18 +113,7 @@ ENTRY(save_secure_ram_context)
nop
nop
ldmfd sp!, {r4 - r11, pc}
- .align
-sram_phy_addr_mask:
- .word SRAM_BASE_P
-high_mask:
- .word 0xffff
-api_params:
- .word 0x4, 0x0, 0x0, 0x1, 0x1
ENDPROC(save_secure_ram_context)
-ENTRY(save_secure_ram_context_sz)
- .word . - save_secure_ram_context
-
- .text
/*
* ======================
diff --git a/arch/arm/mach-sa1100/Kconfig b/arch/arm/mach-sa1100/Kconfig
index 36e3c79f4973..07df3a59b13f 100644
--- a/arch/arm/mach-sa1100/Kconfig
+++ b/arch/arm/mach-sa1100/Kconfig
@@ -5,6 +5,7 @@ menu "SA11x0 Implementations"
config SA1100_ASSABET
bool "Assabet"
select ARM_SA1110_CPUFREQ
+ select GPIO_REG
help
Say Y here if you are using the Intel(R) StrongARM(R) SA-1110
Microprocessor Development Board (also known as the Assabet).
diff --git a/arch/arm/mach-sa1100/assabet.c b/arch/arm/mach-sa1100/assabet.c
index d28ecb9ef172..f68241d995f2 100644
--- a/arch/arm/mach-sa1100/assabet.c
+++ b/arch/arm/mach-sa1100/assabet.c
@@ -13,6 +13,7 @@
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/errno.h>
+#include <linux/gpio/gpio-reg.h>
#include <linux/ioport.h>
#include <linux/platform_data/sa11x0-serial.h>
#include <linux/serial_core.h>
@@ -61,20 +62,45 @@
unsigned long SCR_value = ASSABET_SCR_INIT;
EXPORT_SYMBOL(SCR_value);
-static unsigned long BCR_value = ASSABET_BCR_DB1110;
+static struct gpio_chip *assabet_bcr_gc;
+
+static const char *assabet_names[] = {
+ "cf_pwr", "cf_gfx_reset", "nsoft_reset", "irda_fsel",
+ "irda_md0", "irda_md1", "stereo_loopback", "ncf_bus_on",
+ "audio_pwr_on", "light_pwr_on", "lcd16data", "lcd_pwr_on",
+ "rs232_on", "nred_led", "ngreen_led", "vib_on",
+ "com_dtr", "com_rts", "radio_wake_mod", "i2c_enab",
+ "tvir_enab", "qmute", "radio_pwr_on", "spkr_off",
+ "rs232_valid", "com_dcd", "com_cts", "com_dsr",
+ "radio_cts", "radio_dsr", "radio_dcd", "radio_ri",
+};
+/* The old deprecated interface */
void ASSABET_BCR_frob(unsigned int mask, unsigned int val)
{
- unsigned long flags;
+ unsigned long m = mask, v = val;
- local_irq_save(flags);
- BCR_value = (BCR_value & ~mask) | val;
- ASSABET_BCR = BCR_value;
- local_irq_restore(flags);
+ assabet_bcr_gc->set_multiple(assabet_bcr_gc, &m, &v);
}
-
EXPORT_SYMBOL(ASSABET_BCR_frob);
+static int __init assabet_init_gpio(void __iomem *reg, u32 def_val)
+{
+ struct gpio_chip *gc;
+
+ writel_relaxed(def_val, reg);
+
+ gc = gpio_reg_init(NULL, reg, -1, 32, "assabet", 0xff000000, def_val,
+ assabet_names, NULL, NULL);
+
+ if (IS_ERR(gc))
+ return PTR_ERR(gc);
+
+ assabet_bcr_gc = gc;
+
+ return gc->base;
+}
+
/*
* The codec reset goes to three devices, so we need to release
* the rest when any one of these requests it. However, that
@@ -146,7 +172,7 @@ static void adv7171_write(unsigned reg, unsigned val)
unsigned gpdr = GPDR;
unsigned gplr = GPLR;
- ASSABET_BCR = BCR_value | ASSABET_BCR_AUDIO_ON;
+ ASSABET_BCR_frob(ASSABET_BCR_AUDIO_ON, ASSABET_BCR_AUDIO_ON);
udelay(100);
GPCR = SDA | SCK | MOD; /* clear L3 mode to ensure UDA1341 doesn't respond */
@@ -457,14 +483,6 @@ static void __init assabet_init(void)
sa11x0_ppc_configure_mcp();
if (machine_has_neponset()) {
- /*
- * Angel sets this, but other bootloaders may not.
- *
- * This must precede any driver calls to BCR_set()
- * or BCR_clear().
- */
- ASSABET_BCR = BCR_value = ASSABET_BCR_DB1111;
-
#ifndef CONFIG_ASSABET_NEPONSET
printk( "Warning: Neponset detected but full support "
"hasn't been configured in the kernel\n" );
@@ -748,12 +766,31 @@ static int __init assabet_leds_init(void)
fs_initcall(assabet_leds_init);
#endif
+void __init assabet_init_irq(void)
+{
+ u32 def_val;
+
+ sa1100_init_irq();
+
+ if (machine_has_neponset())
+ def_val = ASSABET_BCR_DB1111;
+ else
+ def_val = ASSABET_BCR_DB1110;
+
+ /*
+ * Angel sets this, but other bootloaders may not.
+ *
+ * This must precede any driver calls to BCR_set() or BCR_clear().
+ */
+ assabet_init_gpio((void *)&ASSABET_BCR, def_val);
+}
+
MACHINE_START(ASSABET, "Intel-Assabet")
.atag_offset = 0x100,
.fixup = fixup_assabet,
.map_io = assabet_map_io,
.nr_irqs = SA1100_NR_IRQS,
- .init_irq = sa1100_init_irq,
+ .init_irq = assabet_init_irq,
.init_time = sa1100_timer_init,
.init_machine = assabet_init,
.init_late = sa11x0_init_late,
diff --git a/arch/arm/mach-sa1100/neponset.c b/arch/arm/mach-sa1100/neponset.c
index a61a2432766b..b1823f445358 100644
--- a/arch/arm/mach-sa1100/neponset.c
+++ b/arch/arm/mach-sa1100/neponset.c
@@ -3,6 +3,8 @@
* linux/arch/arm/mach-sa1100/neponset.c
*/
#include <linux/err.h>
+#include <linux/gpio/driver.h>
+#include <linux/gpio/gpio-reg.h>
#include <linux/init.h>
#include <linux/ioport.h>
#include <linux/irq.h>
@@ -45,10 +47,13 @@
#define IRR_USAR (1 << 1)
#define IRR_SA1111 (1 << 2)
+#define NCR_NGPIO 7
+
#define MDM_CTL0_RTS1 (1 << 0)
#define MDM_CTL0_DTR1 (1 << 1)
#define MDM_CTL0_RTS2 (1 << 2)
#define MDM_CTL0_DTR2 (1 << 3)
+#define MDM_CTL0_NGPIO 4
#define MDM_CTL1_CTS1 (1 << 0)
#define MDM_CTL1_DSR1 (1 << 1)
@@ -56,80 +61,87 @@
#define MDM_CTL1_CTS2 (1 << 3)
#define MDM_CTL1_DSR2 (1 << 4)
#define MDM_CTL1_DCD2 (1 << 5)
+#define MDM_CTL1_NGPIO 6
#define AUD_SEL_1341 (1 << 0)
#define AUD_MUTE_1341 (1 << 1)
+#define AUD_NGPIO 2
extern void sa1110_mb_disable(void);
+#define to_neponset_gpio_chip(x) container_of(x, struct neponset_gpio_chip, gc)
+
+static const char *neponset_ncr_names[] = {
+ "gp01_off", "tp_power", "ms_power", "enet_osc",
+ "spi_kb_wk_up", "a0vpp", "a1vpp"
+};
+
+static const char *neponset_mdmctl0_names[] = {
+ "rts3", "dtr3", "rts1", "dtr1",
+};
+
+static const char *neponset_mdmctl1_names[] = {
+ "cts3", "dsr3", "dcd3", "cts1", "dsr1", "dcd1"
+};
+
+static const char *neponset_aud_names[] = {
+ "sel_1341", "mute_1341",
+};
+
struct neponset_drvdata {
void __iomem *base;
struct platform_device *sa1111;
struct platform_device *smc91x;
unsigned irq_base;
-#ifdef CONFIG_PM_SLEEP
- u32 ncr0;
- u32 mdm_ctl_0;
-#endif
+ struct gpio_chip *gpio[4];
};
-static void __iomem *nep_base;
+static struct neponset_drvdata *nep;
void neponset_ncr_frob(unsigned int mask, unsigned int val)
{
- void __iomem *base = nep_base;
-
- if (base) {
- unsigned long flags;
- unsigned v;
-
- local_irq_save(flags);
- v = readb_relaxed(base + NCR_0);
- writeb_relaxed((v & ~mask) | val, base + NCR_0);
- local_irq_restore(flags);
- } else {
- WARN(1, "nep_base unset\n");
- }
+ struct neponset_drvdata *n = nep;
+ unsigned long m = mask, v = val;
+
+ if (nep)
+ n->gpio[0]->set_multiple(n->gpio[0], &m, &v);
+ else
+ WARN(1, "nep unset\n");
}
EXPORT_SYMBOL(neponset_ncr_frob);
static void neponset_set_mctrl(struct uart_port *port, u_int mctrl)
{
- void __iomem *base = nep_base;
- u_int mdm_ctl0;
+ struct neponset_drvdata *n = nep;
+ unsigned long mask, val = 0;
- if (!base)
+ if (!n)
return;
- mdm_ctl0 = readb_relaxed(base + MDM_CTL_0);
if (port->mapbase == _Ser1UTCR0) {
- if (mctrl & TIOCM_RTS)
- mdm_ctl0 &= ~MDM_CTL0_RTS2;
- else
- mdm_ctl0 |= MDM_CTL0_RTS2;
-
- if (mctrl & TIOCM_DTR)
- mdm_ctl0 &= ~MDM_CTL0_DTR2;
- else
- mdm_ctl0 |= MDM_CTL0_DTR2;
+ mask = MDM_CTL0_RTS2 | MDM_CTL0_DTR2;
+
+ if (!(mctrl & TIOCM_RTS))
+ val |= MDM_CTL0_RTS2;
+
+ if (!(mctrl & TIOCM_DTR))
+ val |= MDM_CTL0_DTR2;
} else if (port->mapbase == _Ser3UTCR0) {
- if (mctrl & TIOCM_RTS)
- mdm_ctl0 &= ~MDM_CTL0_RTS1;
- else
- mdm_ctl0 |= MDM_CTL0_RTS1;
-
- if (mctrl & TIOCM_DTR)
- mdm_ctl0 &= ~MDM_CTL0_DTR1;
- else
- mdm_ctl0 |= MDM_CTL0_DTR1;
+ mask = MDM_CTL0_RTS1 | MDM_CTL0_DTR1;
+
+ if (!(mctrl & TIOCM_RTS))
+ val |= MDM_CTL0_RTS1;
+
+ if (!(mctrl & TIOCM_DTR))
+ val |= MDM_CTL0_DTR1;
}
- writeb_relaxed(mdm_ctl0, base + MDM_CTL_0);
+ n->gpio[1]->set_multiple(n->gpio[1], &mask, &val);
}
static u_int neponset_get_mctrl(struct uart_port *port)
{
- void __iomem *base = nep_base;
+ void __iomem *base = nep->base;
u_int ret = TIOCM_CD | TIOCM_CTS | TIOCM_DSR;
u_int mdm_ctl1;
@@ -231,6 +243,22 @@ static struct irq_chip nochip = {
.irq_unmask = nochip_noop,
};
+static int neponset_init_gpio(struct gpio_chip **gcp,
+ struct device *dev, const char *label, void __iomem *reg,
+ unsigned num, bool in, const char *const * names)
+{
+ struct gpio_chip *gc;
+
+ gc = gpio_reg_init(dev, reg, -1, num, label, in ? 0xffffffff : 0,
+ readl_relaxed(reg), names, NULL, NULL);
+ if (IS_ERR(gc))
+ return PTR_ERR(gc);
+
+ *gcp = gc;
+
+ return 0;
+}
+
static struct sa1111_platform_data sa1111_info = {
.disable_devs = SA1111_DEVID_PS2_MSE,
};
@@ -274,7 +302,7 @@ static int neponset_probe(struct platform_device *dev)
};
int ret, irq;
- if (nep_base)
+ if (nep)
return -EBUSY;
irq = ret = platform_get_irq(dev, 0);
@@ -330,6 +358,22 @@ static int neponset_probe(struct platform_device *dev)
irq_set_irq_type(irq, IRQ_TYPE_EDGE_RISING);
irq_set_chained_handler_and_data(irq, neponset_irq_handler, d);
+ /* Disable GPIO 0/1 drivers so the buttons work on the Assabet */
+ writeb_relaxed(NCR_GP01_OFF, d->base + NCR_0);
+
+ neponset_init_gpio(&d->gpio[0], &dev->dev, "neponset-ncr",
+ d->base + NCR_0, NCR_NGPIO, false,
+ neponset_ncr_names);
+ neponset_init_gpio(&d->gpio[1], &dev->dev, "neponset-mdm-ctl0",
+ d->base + MDM_CTL_0, MDM_CTL0_NGPIO, false,
+ neponset_mdmctl0_names);
+ neponset_init_gpio(&d->gpio[2], &dev->dev, "neponset-mdm-ctl1",
+ d->base + MDM_CTL_1, MDM_CTL1_NGPIO, true,
+ neponset_mdmctl1_names);
+ neponset_init_gpio(&d->gpio[3], &dev->dev, "neponset-aud-ctl",
+ d->base + AUD_CTL, AUD_NGPIO, false,
+ neponset_aud_names);
+
/*
* We would set IRQ_GPIO25 to be a wake-up IRQ, but unfortunately
* something on the Neponset activates this IRQ on sleep (eth?)
@@ -340,16 +384,13 @@ static int neponset_probe(struct platform_device *dev)
dev_info(&dev->dev, "Neponset daughter board, providing IRQ%u-%u\n",
d->irq_base, d->irq_base + NEP_IRQ_NR - 1);
- nep_base = d->base;
+ nep = d;
sa1100_register_uart_fns(&neponset_port_fns);
/* Ensure that the memory bus request/grant signals are setup */
sa1110_mb_disable();
- /* Disable GPIO 0/1 drivers so the buttons work on the Assabet */
- writeb_relaxed(NCR_GP01_OFF, d->base + NCR_0);
-
sa1111_resources[0].parent = sa1111_res;
sa1111_resources[1].start = d->irq_base + NEP_IRQ_SA1111;
sa1111_resources[1].end = d->irq_base + NEP_IRQ_SA1111;
@@ -385,7 +426,7 @@ static int neponset_remove(struct platform_device *dev)
platform_device_unregister(d->smc91x);
irq_set_chained_handler(irq, NULL);
irq_free_descs(d->irq_base, NEP_IRQ_NR);
- nep_base = NULL;
+ nep = NULL;
iounmap(d->base);
kfree(d);
@@ -393,30 +434,22 @@ static int neponset_remove(struct platform_device *dev)
}
#ifdef CONFIG_PM_SLEEP
-static int neponset_suspend(struct device *dev)
-{
- struct neponset_drvdata *d = dev_get_drvdata(dev);
-
- d->ncr0 = readb_relaxed(d->base + NCR_0);
- d->mdm_ctl_0 = readb_relaxed(d->base + MDM_CTL_0);
-
- return 0;
-}
-
static int neponset_resume(struct device *dev)
{
struct neponset_drvdata *d = dev_get_drvdata(dev);
+ int i, ret = 0;
- writeb_relaxed(d->ncr0, d->base + NCR_0);
- writeb_relaxed(d->mdm_ctl_0, d->base + MDM_CTL_0);
+ for (i = 0; i < ARRAY_SIZE(d->gpio); i++) {
+ ret = gpio_reg_resume(d->gpio[i]);
+ if (ret)
+ break;
+ }
- return 0;
+ return ret;
}
static const struct dev_pm_ops neponset_pm_ops = {
- .suspend_noirq = neponset_suspend,
.resume_noirq = neponset_resume,
- .freeze_noirq = neponset_suspend,
.restore_noirq = neponset_resume,
};
#define PM_OPS &neponset_pm_ops
diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index c199990e12b6..323a4df59a6c 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -27,14 +27,58 @@
int bpf_jit_enable __read_mostly;
+/*
+ * eBPF prog stack layout:
+ *
+ * high
+ * original ARM_SP => +-----+
+ * | | callee saved registers
+ * +-----+ <= (BPF_FP + SCRATCH_SIZE)
+ * | ... | eBPF JIT scratch space
+ * eBPF fp register => +-----+
+ * (BPF_FP) | ... | eBPF prog stack
+ * +-----+
+ * |RSVD | JIT scratchpad
+ * current ARM_SP => +-----+ <= (BPF_FP - STACK_SIZE + SCRATCH_SIZE)
+ * | |
+ * | ... | Function call stack
+ * | |
+ * +-----+
+ * low
+ *
+ * The callee saved registers depends on whether frame pointers are enabled.
+ * With frame pointers (to be compliant with the ABI):
+ *
+ * high
+ * original ARM_SP => +------------------+ \
+ * | pc | |
+ * current ARM_FP => +------------------+ } callee saved registers
+ * |r4-r8,r10,fp,ip,lr| |
+ * +------------------+ /
+ * low
+ *
+ * Without frame pointers:
+ *
+ * high
+ * original ARM_SP => +------------------+
+ * | r4-r8,r10,fp,lr | callee saved registers
+ * current ARM_FP => +------------------+
+ * low
+ *
+ * When popping registers off the stack at the end of a BPF function, we
+ * reference them via the current ARM_FP register.
+ */
+#define CALLEE_MASK (1 << ARM_R4 | 1 << ARM_R5 | 1 << ARM_R6 | \
+ 1 << ARM_R7 | 1 << ARM_R8 | 1 << ARM_R10 | \
+ 1 << ARM_FP)
+#define CALLEE_PUSH_MASK (CALLEE_MASK | 1 << ARM_LR)
+#define CALLEE_POP_MASK (CALLEE_MASK | 1 << ARM_PC)
+
#define STACK_OFFSET(k) (k)
#define TMP_REG_1 (MAX_BPF_JIT_REG + 0) /* TEMP Register 1 */
#define TMP_REG_2 (MAX_BPF_JIT_REG + 1) /* TEMP Register 2 */
#define TCALL_CNT (MAX_BPF_JIT_REG + 2) /* Tail Call Count */
-/* Flags used for JIT optimization */
-#define SEEN_CALL (1 << 0)
-
#define FLAG_IMM_OVERFLOW (1 << 0)
/*
@@ -95,7 +139,6 @@ static const u8 bpf2a32[][2] = {
* idx : index of current last JITed instruction.
* prologue_bytes : bytes used in prologue.
* epilogue_offset : offset of epilogue starting.
- * seen : bit mask used for JIT optimization.
* offsets : array of eBPF instruction offsets in
* JITed code.
* target : final JITed code.
@@ -110,7 +153,6 @@ struct jit_ctx {
unsigned int idx;
unsigned int prologue_bytes;
unsigned int epilogue_offset;
- u32 seen;
u32 flags;
u32 *offsets;
u32 *target;
@@ -179,8 +221,13 @@ static void jit_fill_hole(void *area, unsigned int size)
*ptr++ = __opcode_to_mem_arm(ARM_INST_UDF);
}
-/* Stack must be multiples of 16 Bytes */
-#define STACK_ALIGN(sz) (((sz) + 3) & ~3)
+#if defined(CONFIG_AEABI) && (__LINUX_ARM_ARCH__ >= 5)
+/* EABI requires the stack to be aligned to 64-bit boundaries */
+#define STACK_ALIGNMENT 8
+#else
+/* Stack must be aligned to 32-bit boundaries */
+#define STACK_ALIGNMENT 4
+#endif
/* Stack space for BPF_REG_2, BPF_REG_3, BPF_REG_4,
* BPF_REG_5, BPF_REG_7, BPF_REG_8, BPF_REG_9,
@@ -194,7 +241,7 @@ static void jit_fill_hole(void *area, unsigned int size)
+ SCRATCH_SIZE + \
+ 4 /* extra for skb_copy_bits buffer */)
-#define STACK_SIZE STACK_ALIGN(_STACK_SIZE)
+#define STACK_SIZE ALIGN(_STACK_SIZE, STACK_ALIGNMENT)
/* Get the offset of eBPF REGISTERs stored on scratch space. */
#define STACK_VAR(off) (STACK_SIZE-off-4)
@@ -285,16 +332,19 @@ static inline void emit_mov_i(const u8 rd, u32 val, struct jit_ctx *ctx)
emit_mov_i_no8m(rd, val, ctx);
}
-static inline void emit_blx_r(u8 tgt_reg, struct jit_ctx *ctx)
+static void emit_bx_r(u8 tgt_reg, struct jit_ctx *ctx)
{
- ctx->seen |= SEEN_CALL;
-#if __LINUX_ARM_ARCH__ < 5
- emit(ARM_MOV_R(ARM_LR, ARM_PC), ctx);
-
if (elf_hwcap & HWCAP_THUMB)
emit(ARM_BX(tgt_reg), ctx);
else
emit(ARM_MOV_R(ARM_PC, tgt_reg), ctx);
+}
+
+static inline void emit_blx_r(u8 tgt_reg, struct jit_ctx *ctx)
+{
+#if __LINUX_ARM_ARCH__ < 5
+ emit(ARM_MOV_R(ARM_LR, ARM_PC), ctx);
+ emit_bx_r(tgt_reg, ctx);
#else
emit(ARM_BLX_R(tgt_reg), ctx);
#endif
@@ -354,7 +404,6 @@ static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op)
}
/* Call appropriate function */
- ctx->seen |= SEEN_CALL;
emit_mov_i(ARM_IP, op == BPF_DIV ?
(u32)jit_udiv32 : (u32)jit_mod32, ctx);
emit_blx_r(ARM_IP, ctx);
@@ -620,8 +669,6 @@ static inline void emit_a32_lsh_r64(const u8 dst[], const u8 src[], bool dstk,
/* Do LSH operation */
emit(ARM_SUB_I(ARM_IP, rt, 32), ctx);
emit(ARM_RSB_I(tmp2[0], rt, 32), ctx);
- /* As we are using ARM_LR */
- ctx->seen |= SEEN_CALL;
emit(ARM_MOV_SR(ARM_LR, rm, SRTYPE_ASL, rt), ctx);
emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd, SRTYPE_ASL, ARM_IP), ctx);
emit(ARM_ORR_SR(ARM_IP, ARM_LR, rd, SRTYPE_LSR, tmp2[0]), ctx);
@@ -656,8 +703,6 @@ static inline void emit_a32_arsh_r64(const u8 dst[], const u8 src[], bool dstk,
/* Do the ARSH operation */
emit(ARM_RSB_I(ARM_IP, rt, 32), ctx);
emit(ARM_SUBS_I(tmp2[0], rt, 32), ctx);
- /* As we are using ARM_LR */
- ctx->seen |= SEEN_CALL;
emit(ARM_MOV_SR(ARM_LR, rd, SRTYPE_LSR, rt), ctx);
emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_ASL, ARM_IP), ctx);
_emit(ARM_COND_MI, ARM_B(0), ctx);
@@ -692,8 +737,6 @@ static inline void emit_a32_lsr_r64(const u8 dst[], const u8 src[], bool dstk,
/* Do LSH operation */
emit(ARM_RSB_I(ARM_IP, rt, 32), ctx);
emit(ARM_SUBS_I(tmp2[0], rt, 32), ctx);
- /* As we are using ARM_LR */
- ctx->seen |= SEEN_CALL;
emit(ARM_MOV_SR(ARM_LR, rd, SRTYPE_LSR, rt), ctx);
emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_ASL, ARM_IP), ctx);
emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_LSR, tmp2[0]), ctx);
@@ -828,8 +871,6 @@ static inline void emit_a32_mul_r64(const u8 dst[], const u8 src[], bool dstk,
/* Do Multiplication */
emit(ARM_MUL(ARM_IP, rd, rn), ctx);
emit(ARM_MUL(ARM_LR, rm, rt), ctx);
- /* As we are using ARM_LR */
- ctx->seen |= SEEN_CALL;
emit(ARM_ADD_R(ARM_LR, ARM_IP, ARM_LR), ctx);
emit(ARM_UMULL(ARM_IP, rm, rd, rt), ctx);
@@ -872,33 +913,53 @@ static inline void emit_str_r(const u8 dst, const u8 src, bool dstk,
}
/* dst = *(size*)(src + off) */
-static inline void emit_ldx_r(const u8 dst, const u8 src, bool dstk,
- const s32 off, struct jit_ctx *ctx, const u8 sz){
+static inline void emit_ldx_r(const u8 dst[], const u8 src, bool dstk,
+ s32 off, struct jit_ctx *ctx, const u8 sz){
const u8 *tmp = bpf2a32[TMP_REG_1];
- u8 rd = dstk ? tmp[1] : dst;
+ const u8 *rd = dstk ? tmp : dst;
u8 rm = src;
+ s32 off_max;
- if (off) {
+ if (sz == BPF_H)
+ off_max = 0xff;
+ else
+ off_max = 0xfff;
+
+ if (off < 0 || off > off_max) {
emit_a32_mov_i(tmp[0], off, false, ctx);
emit(ARM_ADD_R(tmp[0], tmp[0], src), ctx);
rm = tmp[0];
+ off = 0;
+ } else if (rd[1] == rm) {
+ emit(ARM_MOV_R(tmp[0], rm), ctx);
+ rm = tmp[0];
}
switch (sz) {
- case BPF_W:
- /* Load a Word */
- emit(ARM_LDR_I(rd, rm, 0), ctx);
+ case BPF_B:
+ /* Load a Byte */
+ emit(ARM_LDRB_I(rd[1], rm, off), ctx);
+ emit_a32_mov_i(dst[0], 0, dstk, ctx);
break;
case BPF_H:
/* Load a HalfWord */
- emit(ARM_LDRH_I(rd, rm, 0), ctx);
+ emit(ARM_LDRH_I(rd[1], rm, off), ctx);
+ emit_a32_mov_i(dst[0], 0, dstk, ctx);
break;
- case BPF_B:
- /* Load a Byte */
- emit(ARM_LDRB_I(rd, rm, 0), ctx);
+ case BPF_W:
+ /* Load a Word */
+ emit(ARM_LDR_I(rd[1], rm, off), ctx);
+ emit_a32_mov_i(dst[0], 0, dstk, ctx);
+ break;
+ case BPF_DW:
+ /* Load a Double Word */
+ emit(ARM_LDR_I(rd[1], rm, off), ctx);
+ emit(ARM_LDR_I(rd[0], rm, off + 4), ctx);
break;
}
if (dstk)
- emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst)), ctx);
+ emit(ARM_STR_I(rd[1], ARM_SP, STACK_VAR(dst[1])), ctx);
+ if (dstk && sz == BPF_DW)
+ emit(ARM_STR_I(rd[0], ARM_SP, STACK_VAR(dst[0])), ctx);
}
/* Arithmatic Operation */
@@ -906,7 +967,6 @@ static inline void emit_ar_r(const u8 rd, const u8 rt, const u8 rm,
const u8 rn, struct jit_ctx *ctx, u8 op) {
switch (op) {
case BPF_JSET:
- ctx->seen |= SEEN_CALL;
emit(ARM_AND_R(ARM_IP, rt, rn), ctx);
emit(ARM_AND_R(ARM_LR, rd, rm), ctx);
emit(ARM_ORRS_R(ARM_IP, ARM_LR, ARM_IP), ctx);
@@ -945,7 +1005,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
const u8 *tcc = bpf2a32[TCALL_CNT];
const int idx0 = ctx->idx;
#define cur_offset (ctx->idx - idx0)
-#define jmp_offset (out_offset - (cur_offset))
+#define jmp_offset (out_offset - (cur_offset) - 2)
u32 off, lo, hi;
/* if (index >= array->map.max_entries)
@@ -956,7 +1016,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
emit_a32_mov_i(tmp[1], off, false, ctx);
emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r2[1])), ctx);
emit(ARM_LDR_R(tmp[1], tmp2[1], tmp[1]), ctx);
- /* index (64 bit) */
+ /* index is 32-bit for arrays */
emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r3[1])), ctx);
/* index >= array->map.max_entries */
emit(ARM_CMP_R(tmp2[1], tmp[1]), ctx);
@@ -997,7 +1057,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
emit_a32_mov_i(tmp2[1], off, false, ctx);
emit(ARM_LDR_R(tmp[1], tmp[1], tmp2[1]), ctx);
emit(ARM_ADD_I(tmp[1], tmp[1], ctx->prologue_bytes), ctx);
- emit(ARM_BX(tmp[1]), ctx);
+ emit_bx_r(tmp[1], ctx);
/* out: */
if (out_offset == -1)
@@ -1070,54 +1130,22 @@ static void build_prologue(struct jit_ctx *ctx)
const u8 r2 = bpf2a32[BPF_REG_1][1];
const u8 r3 = bpf2a32[BPF_REG_1][0];
const u8 r4 = bpf2a32[BPF_REG_6][1];
- const u8 r5 = bpf2a32[BPF_REG_6][0];
- const u8 r6 = bpf2a32[TMP_REG_1][1];
- const u8 r7 = bpf2a32[TMP_REG_1][0];
- const u8 r8 = bpf2a32[TMP_REG_2][1];
- const u8 r10 = bpf2a32[TMP_REG_2][0];
const u8 fplo = bpf2a32[BPF_REG_FP][1];
const u8 fphi = bpf2a32[BPF_REG_FP][0];
- const u8 sp = ARM_SP;
const u8 *tcc = bpf2a32[TCALL_CNT];
- u16 reg_set = 0;
-
- /*
- * eBPF prog stack layout
- *
- * high
- * original ARM_SP => +-----+ eBPF prologue
- * |FP/LR|
- * current ARM_FP => +-----+
- * | ... | callee saved registers
- * eBPF fp register => +-----+ <= (BPF_FP)
- * | ... | eBPF JIT scratch space
- * | | eBPF prog stack
- * +-----+
- * |RSVD | JIT scratchpad
- * current A64_SP => +-----+ <= (BPF_FP - STACK_SIZE)
- * | |
- * | ... | Function call stack
- * | |
- * +-----+
- * low
- */
-
/* Save callee saved registers. */
- reg_set |= (1<<r4) | (1<<r5) | (1<<r6) | (1<<r7) | (1<<r8) | (1<<r10);
#ifdef CONFIG_FRAME_POINTER
- reg_set |= (1<<ARM_FP) | (1<<ARM_IP) | (1<<ARM_LR) | (1<<ARM_PC);
- emit(ARM_MOV_R(ARM_IP, sp), ctx);
+ u16 reg_set = CALLEE_PUSH_MASK | 1 << ARM_IP | 1 << ARM_PC;
+ emit(ARM_MOV_R(ARM_IP, ARM_SP), ctx);
emit(ARM_PUSH(reg_set), ctx);
emit(ARM_SUB_I(ARM_FP, ARM_IP, 4), ctx);
#else
- /* Check if call instruction exists in BPF body */
- if (ctx->seen & SEEN_CALL)
- reg_set |= (1<<ARM_LR);
- emit(ARM_PUSH(reg_set), ctx);
+ emit(ARM_PUSH(CALLEE_PUSH_MASK), ctx);
+ emit(ARM_MOV_R(ARM_FP, ARM_SP), ctx);
#endif
/* Save frame pointer for later */
- emit(ARM_SUB_I(ARM_IP, sp, SCRATCH_SIZE), ctx);
+ emit(ARM_SUB_I(ARM_IP, ARM_SP, SCRATCH_SIZE), ctx);
ctx->stack_size = imm8m(STACK_SIZE);
@@ -1140,33 +1168,19 @@ static void build_prologue(struct jit_ctx *ctx)
/* end of prologue */
}
+/* restore callee saved registers. */
static void build_epilogue(struct jit_ctx *ctx)
{
- const u8 r4 = bpf2a32[BPF_REG_6][1];
- const u8 r5 = bpf2a32[BPF_REG_6][0];
- const u8 r6 = bpf2a32[TMP_REG_1][1];
- const u8 r7 = bpf2a32[TMP_REG_1][0];
- const u8 r8 = bpf2a32[TMP_REG_2][1];
- const u8 r10 = bpf2a32[TMP_REG_2][0];
- u16 reg_set = 0;
-
- /* unwind function call stack */
- emit(ARM_ADD_I(ARM_SP, ARM_SP, ctx->stack_size), ctx);
-
- /* restore callee saved registers. */
- reg_set |= (1<<r4) | (1<<r5) | (1<<r6) | (1<<r7) | (1<<r8) | (1<<r10);
#ifdef CONFIG_FRAME_POINTER
- /* the first instruction of the prologue was: mov ip, sp */
- reg_set |= (1<<ARM_FP) | (1<<ARM_SP) | (1<<ARM_PC);
+ /* When using frame pointers, some additional registers need to
+ * be loaded. */
+ u16 reg_set = CALLEE_POP_MASK | 1 << ARM_SP;
+ emit(ARM_SUB_I(ARM_SP, ARM_FP, hweight16(reg_set) * 4), ctx);
emit(ARM_LDM(ARM_SP, reg_set), ctx);
#else
- if (ctx->seen & SEEN_CALL)
- reg_set |= (1<<ARM_PC);
/* Restore callee saved registers. */
- emit(ARM_POP(reg_set), ctx);
- /* Return back to the callee function */
- if (!(ctx->seen & SEEN_CALL))
- emit(ARM_BX(ARM_LR), ctx);
+ emit(ARM_MOV_R(ARM_SP, ARM_FP), ctx);
+ emit(ARM_POP(CALLEE_POP_MASK), ctx);
#endif
}
@@ -1394,8 +1408,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
emit_rev32(rt, rt, ctx);
goto emit_bswap_uxt;
case 64:
- /* Because of the usage of ARM_LR */
- ctx->seen |= SEEN_CALL;
emit_rev32(ARM_LR, rt, ctx);
emit_rev32(rt, rd, ctx);
emit(ARM_MOV_R(rd, ARM_LR), ctx);
@@ -1448,22 +1460,7 @@ exit:
rn = sstk ? tmp2[1] : src_lo;
if (sstk)
emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_lo)), ctx);
- switch (BPF_SIZE(code)) {
- case BPF_W:
- /* Load a Word */
- case BPF_H:
- /* Load a Half-Word */
- case BPF_B:
- /* Load a Byte */
- emit_ldx_r(dst_lo, rn, dstk, off, ctx, BPF_SIZE(code));
- emit_a32_mov_i(dst_hi, 0, dstk, ctx);
- break;
- case BPF_DW:
- /* Load a double word */
- emit_ldx_r(dst_lo, rn, dstk, off, ctx, BPF_W);
- emit_ldx_r(dst_hi, rn, dstk, off+4, ctx, BPF_W);
- break;
- }
+ emit_ldx_r(dst, rn, dstk, off, ctx, BPF_SIZE(code));
break;
/* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + imm)) */
case BPF_LD | BPF_ABS | BPF_W:
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index a93339f5178f..c9a7e9e1414f 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -557,7 +557,6 @@ config QCOM_QDF2400_ERRATUM_0065
If unsure, say Y.
-
config SOCIONEXT_SYNQUACER_PREITS
bool "Socionext Synquacer: Workaround for GICv3 pre-ITS"
default y
@@ -576,6 +575,17 @@ config HISILICON_ERRATUM_161600802
a 128kB offset to be applied to the target address in this commands.
If unsure, say Y.
+
+config QCOM_FALKOR_ERRATUM_E1041
+ bool "Falkor E1041: Speculative instruction fetches might cause errant memory access"
+ default y
+ help
+ Falkor CPU may speculatively fetch instructions from an improper
+ memory location when MMU translation is changed from SCTLR_ELn[M]=1
+ to SCTLR_ELn[M]=0. Prefix an ISB instruction to fix the problem.
+
+ If unsure, say Y.
+
endmenu
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index b35788c909f1..b481b4a7c011 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -83,9 +83,6 @@ endif
ifeq ($(CONFIG_ARM64_MODULE_PLTS),y)
KBUILD_LDFLAGS_MODULE += -T $(srctree)/arch/arm64/kernel/module.lds
-ifeq ($(CONFIG_DYNAMIC_FTRACE),y)
-KBUILD_LDFLAGS_MODULE += $(objtree)/arch/arm64/kernel/ftrace-mod.o
-endif
endif
# Default value
diff --git a/arch/arm64/boot/dts/Makefile b/arch/arm64/boot/dts/Makefile
index d7c22d51bc50..4aa50b9b26bc 100644
--- a/arch/arm64/boot/dts/Makefile
+++ b/arch/arm64/boot/dts/Makefile
@@ -12,6 +12,7 @@ subdir-y += cavium
subdir-y += exynos
subdir-y += freescale
subdir-y += hisilicon
+subdir-y += lg
subdir-y += marvell
subdir-y += mediatek
subdir-y += nvidia
@@ -22,5 +23,4 @@ subdir-y += rockchip
subdir-y += socionext
subdir-y += sprd
subdir-y += xilinx
-subdir-y += lg
subdir-y += zte
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi
index ead895a4e9a5..1fb8b9d6cb4e 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi
@@ -753,12 +753,12 @@
&uart_B {
clocks = <&xtal>, <&clkc CLKID_UART1>, <&xtal>;
- clock-names = "xtal", "core", "baud";
+ clock-names = "xtal", "pclk", "baud";
};
&uart_C {
clocks = <&xtal>, <&clkc CLKID_UART2>, <&xtal>;
- clock-names = "xtal", "core", "baud";
+ clock-names = "xtal", "pclk", "baud";
};
&vpu {
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi
index 8ed981f59e5a..6524b89e7115 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi
@@ -688,7 +688,7 @@
&uart_A {
clocks = <&xtal>, <&clkc CLKID_UART0>, <&xtal>;
- clock-names = "xtal", "core", "baud";
+ clock-names = "xtal", "pclk", "baud";
};
&uart_AO {
@@ -703,12 +703,12 @@
&uart_B {
clocks = <&xtal>, <&clkc CLKID_UART1>, <&xtal>;
- clock-names = "xtal", "core", "baud";
+ clock-names = "xtal", "pclk", "baud";
};
&uart_C {
clocks = <&xtal>, <&clkc CLKID_UART2>, <&xtal>;
- clock-names = "xtal", "core", "baud";
+ clock-names = "xtal", "pclk", "baud";
};
&vpu {
diff --git a/arch/arm64/boot/dts/socionext/uniphier-ld11-ref.dts b/arch/arm64/boot/dts/socionext/uniphier-ld11-ref.dts
index dd7193acc7df..6bdefb26b329 100644
--- a/arch/arm64/boot/dts/socionext/uniphier-ld11-ref.dts
+++ b/arch/arm64/boot/dts/socionext/uniphier-ld11-ref.dts
@@ -40,7 +40,6 @@
};
&ethsc {
- interrupt-parent = <&gpio>;
interrupts = <0 8>;
};
diff --git a/arch/arm64/boot/dts/socionext/uniphier-ld20-ref.dts b/arch/arm64/boot/dts/socionext/uniphier-ld20-ref.dts
index d99e3731358c..254d6795c67e 100644
--- a/arch/arm64/boot/dts/socionext/uniphier-ld20-ref.dts
+++ b/arch/arm64/boot/dts/socionext/uniphier-ld20-ref.dts
@@ -40,7 +40,6 @@
};
&ethsc {
- interrupt-parent = <&gpio>;
interrupts = <0 8>;
};
diff --git a/arch/arm64/boot/dts/socionext/uniphier-pxs3-ref.dts b/arch/arm64/boot/dts/socionext/uniphier-pxs3-ref.dts
index 864feeb35180..f9f06fcfb94a 100644
--- a/arch/arm64/boot/dts/socionext/uniphier-pxs3-ref.dts
+++ b/arch/arm64/boot/dts/socionext/uniphier-pxs3-ref.dts
@@ -38,8 +38,7 @@
};
&ethsc {
- interrupt-parent = <&gpio>;
- interrupts = <0 8>;
+ interrupts = <4 8>;
};
&serial0 {
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index aef72d886677..8b168280976f 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -512,4 +512,14 @@ alternative_else_nop_endif
#endif
.endm
+/**
+ * Errata workaround prior to disable MMU. Insert an ISB immediately prior
+ * to executing the MSR that will change SCTLR_ELn[M] from a value of 1 to 0.
+ */
+ .macro pre_disable_mmu_workaround
+#ifdef CONFIG_QCOM_FALKOR_ERRATUM_E1041
+ isb
+#endif
+ .endm
+
#endif /* __ASM_ASSEMBLER_H */
diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index 76d1cc85d5b1..955130762a3c 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -38,7 +38,7 @@
*
* See Documentation/cachetlb.txt for more information. Please note that
* the implementation assumes non-aliasing VIPT D-cache and (aliasing)
- * VIPT or ASID-tagged VIVT I-cache.
+ * VIPT I-cache.
*
* flush_cache_mm(mm)
*
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index ac67cfc2585a..060e3a4008ab 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -60,6 +60,9 @@ enum ftr_type {
#define FTR_VISIBLE true /* Feature visible to the user space */
#define FTR_HIDDEN false /* Feature is hidden from the user */
+#define FTR_VISIBLE_IF_IS_ENABLED(config) \
+ (IS_ENABLED(config) ? FTR_VISIBLE : FTR_HIDDEN)
+
struct arm64_ftr_bits {
bool sign; /* Value is signed ? */
bool visible;
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 235e77d98261..cbf08d7cbf30 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -91,6 +91,7 @@
#define BRCM_CPU_PART_VULCAN 0x516
#define QCOM_CPU_PART_FALKOR_V1 0x800
+#define QCOM_CPU_PART_FALKOR 0xC00
#define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53)
#define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57)
@@ -99,6 +100,7 @@
#define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX)
#define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX)
#define MIDR_QCOM_FALKOR_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR_V1)
+#define MIDR_QCOM_FALKOR MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR)
#ifndef __ASSEMBLY__
diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h
index 650344d01124..c4cd5081d78b 100644
--- a/arch/arm64/include/asm/efi.h
+++ b/arch/arm64/include/asm/efi.h
@@ -132,11 +132,9 @@ static inline void efi_set_pgd(struct mm_struct *mm)
* Defer the switch to the current thread's TTBR0_EL1
* until uaccess_enable(). Restore the current
* thread's saved ttbr0 corresponding to its active_mm
- * (if different from init_mm).
*/
cpu_set_reserved_ttbr0();
- if (current->active_mm != &init_mm)
- update_saved_ttbr0(current, current->active_mm);
+ update_saved_ttbr0(current, current->active_mm);
}
}
}
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 7f069ff37f06..715d395ef45b 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -170,8 +170,7 @@
#define VTCR_EL2_FLAGS (VTCR_EL2_COMMON_BITS | VTCR_EL2_TGRAN_FLAGS)
#define VTTBR_X (VTTBR_X_TGRAN_MAGIC - VTCR_EL2_T0SZ_IPA)
-#define VTTBR_BADDR_SHIFT (VTTBR_X - 1)
-#define VTTBR_BADDR_MASK (((UL(1) << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT)
+#define VTTBR_BADDR_MASK (((UL(1) << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_X)
#define VTTBR_VMID_SHIFT (UL(48))
#define VTTBR_VMID_MASK(size) (_AT(u64, (1 << size) - 1) << VTTBR_VMID_SHIFT)
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 674912d7a571..ea6cb5b24258 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -370,6 +370,7 @@ void kvm_arm_init_debug(void);
void kvm_arm_setup_debug(struct kvm_vcpu *vcpu);
void kvm_arm_clear_debug(struct kvm_vcpu *vcpu);
void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu);
+bool kvm_arm_handle_step_debug(struct kvm_vcpu *vcpu, struct kvm_run *run);
int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu,
struct kvm_device_attr *attr);
int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index 3257895a9b5e..9d155fa9a507 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -156,29 +156,21 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu);
#define init_new_context(tsk,mm) ({ atomic64_set(&(mm)->context.id, 0); 0; })
-/*
- * This is called when "tsk" is about to enter lazy TLB mode.
- *
- * mm: describes the currently active mm context
- * tsk: task which is entering lazy tlb
- * cpu: cpu number which is entering lazy tlb
- *
- * tsk->mm will be NULL
- */
-static inline void
-enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
-{
-}
-
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
static inline void update_saved_ttbr0(struct task_struct *tsk,
struct mm_struct *mm)
{
- if (system_uses_ttbr0_pan()) {
- BUG_ON(mm->pgd == swapper_pg_dir);
- task_thread_info(tsk)->ttbr0 =
- virt_to_phys(mm->pgd) | ASID(mm) << 48;
- }
+ u64 ttbr;
+
+ if (!system_uses_ttbr0_pan())
+ return;
+
+ if (mm == &init_mm)
+ ttbr = __pa_symbol(empty_zero_page);
+ else
+ ttbr = virt_to_phys(mm->pgd) | ASID(mm) << 48;
+
+ task_thread_info(tsk)->ttbr0 = ttbr;
}
#else
static inline void update_saved_ttbr0(struct task_struct *tsk,
@@ -187,6 +179,16 @@ static inline void update_saved_ttbr0(struct task_struct *tsk,
}
#endif
+static inline void
+enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
+{
+ /*
+ * We don't actually care about the ttbr0 mapping, so point it at the
+ * zero page.
+ */
+ update_saved_ttbr0(tsk, &init_mm);
+}
+
static inline void __switch_mm(struct mm_struct *next)
{
unsigned int cpu = smp_processor_id();
@@ -214,11 +216,9 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next,
* Update the saved TTBR0_EL1 of the scheduled-in task as the previous
* value may have not been initialised yet (activate_mm caller) or the
* ASID has changed since the last run (following the context switch
- * of another thread of the same process). Avoid setting the reserved
- * TTBR0_EL1 to swapper_pg_dir (init_mm; e.g. via idle_task_exit).
+ * of another thread of the same process).
*/
- if (next != &init_mm)
- update_saved_ttbr0(tsk, next);
+ update_saved_ttbr0(tsk, next);
}
#define deactivate_mm(tsk,mm) do { } while (0)
diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h
index 19bd97671bb8..4f766178fa6f 100644
--- a/arch/arm64/include/asm/module.h
+++ b/arch/arm64/include/asm/module.h
@@ -32,7 +32,7 @@ struct mod_arch_specific {
struct mod_plt_sec init;
/* for CONFIG_DYNAMIC_FTRACE */
- void *ftrace_trampoline;
+ struct plt_entry *ftrace_trampoline;
};
#endif
@@ -45,4 +45,48 @@ extern u64 module_alloc_base;
#define module_alloc_base ((u64)_etext - MODULES_VSIZE)
#endif
+struct plt_entry {
+ /*
+ * A program that conforms to the AArch64 Procedure Call Standard
+ * (AAPCS64) must assume that a veneer that alters IP0 (x16) and/or
+ * IP1 (x17) may be inserted at any branch instruction that is
+ * exposed to a relocation that supports long branches. Since that
+ * is exactly what we are dealing with here, we are free to use x16
+ * as a scratch register in the PLT veneers.
+ */
+ __le32 mov0; /* movn x16, #0x.... */
+ __le32 mov1; /* movk x16, #0x...., lsl #16 */
+ __le32 mov2; /* movk x16, #0x...., lsl #32 */
+ __le32 br; /* br x16 */
+};
+
+static inline struct plt_entry get_plt_entry(u64 val)
+{
+ /*
+ * MOVK/MOVN/MOVZ opcode:
+ * +--------+------------+--------+-----------+-------------+---------+
+ * | sf[31] | opc[30:29] | 100101 | hw[22:21] | imm16[20:5] | Rd[4:0] |
+ * +--------+------------+--------+-----------+-------------+---------+
+ *
+ * Rd := 0x10 (x16)
+ * hw := 0b00 (no shift), 0b01 (lsl #16), 0b10 (lsl #32)
+ * opc := 0b11 (MOVK), 0b00 (MOVN), 0b10 (MOVZ)
+ * sf := 1 (64-bit variant)
+ */
+ return (struct plt_entry){
+ cpu_to_le32(0x92800010 | (((~val ) & 0xffff)) << 5),
+ cpu_to_le32(0xf2a00010 | ((( val >> 16) & 0xffff)) << 5),
+ cpu_to_le32(0xf2c00010 | ((( val >> 32) & 0xffff)) << 5),
+ cpu_to_le32(0xd61f0200)
+ };
+}
+
+static inline bool plt_entries_equal(const struct plt_entry *a,
+ const struct plt_entry *b)
+{
+ return a->mov0 == b->mov0 &&
+ a->mov1 == b->mov1 &&
+ a->mov2 == b->mov2;
+}
+
#endif /* __ASM_MODULE_H */
diff --git a/arch/arm64/include/asm/perf_event.h b/arch/arm64/include/asm/perf_event.h
index 8d5cbec17d80..f9ccc36d3dc3 100644
--- a/arch/arm64/include/asm/perf_event.h
+++ b/arch/arm64/include/asm/perf_event.h
@@ -18,6 +18,7 @@
#define __ASM_PERF_EVENT_H
#include <asm/stack_pointer.h>
+#include <asm/ptrace.h>
#define ARMV8_PMU_MAX_COUNTERS 32
#define ARMV8_PMU_COUNTER_MASK (ARMV8_PMU_MAX_COUNTERS - 1)
@@ -79,6 +80,7 @@ struct pt_regs;
extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
extern unsigned long perf_misc_flags(struct pt_regs *regs);
#define perf_misc_flags(regs) perf_misc_flags(regs)
+#define perf_arch_bpf_user_pt_regs(regs) &regs->user_regs
#endif
#define perf_arch_fetch_caller_regs(regs, __ip) { \
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index c9530b5b5ca8..bdcc7f1c9d06 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -42,6 +42,8 @@
#include <asm/cmpxchg.h>
#include <asm/fixmap.h>
#include <linux/mmdebug.h>
+#include <linux/mm_types.h>
+#include <linux/sched.h>
extern void __pte_error(const char *file, int line, unsigned long val);
extern void __pmd_error(const char *file, int line, unsigned long val);
@@ -149,12 +151,20 @@ static inline pte_t pte_mkwrite(pte_t pte)
static inline pte_t pte_mkclean(pte_t pte)
{
- return clear_pte_bit(pte, __pgprot(PTE_DIRTY));
+ pte = clear_pte_bit(pte, __pgprot(PTE_DIRTY));
+ pte = set_pte_bit(pte, __pgprot(PTE_RDONLY));
+
+ return pte;
}
static inline pte_t pte_mkdirty(pte_t pte)
{
- return set_pte_bit(pte, __pgprot(PTE_DIRTY));
+ pte = set_pte_bit(pte, __pgprot(PTE_DIRTY));
+
+ if (pte_write(pte))
+ pte = clear_pte_bit(pte, __pgprot(PTE_RDONLY));
+
+ return pte;
}
static inline pte_t pte_mkold(pte_t pte)
@@ -207,9 +217,6 @@ static inline void set_pte(pte_t *ptep, pte_t pte)
}
}
-struct mm_struct;
-struct vm_area_struct;
-
extern void __sync_icache_dcache(pte_t pteval, unsigned long addr);
/*
@@ -238,7 +245,8 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
* hardware updates of the pte (ptep_set_access_flags safely changes
* valid ptes without going through an invalid entry).
*/
- if (pte_valid(*ptep) && pte_valid(pte)) {
+ if (IS_ENABLED(CONFIG_DEBUG_VM) && pte_valid(*ptep) && pte_valid(pte) &&
+ (mm == current->active_mm || atomic_read(&mm->mm_users) > 1)) {
VM_WARN_ONCE(!pte_young(pte),
"%s: racy access flag clearing: 0x%016llx -> 0x%016llx",
__func__, pte_val(*ptep), pte_val(pte));
@@ -345,7 +353,6 @@ static inline int pmd_protnone(pmd_t pmd)
#define pmd_thp_or_huge(pmd) (pmd_huge(pmd) || pmd_trans_huge(pmd))
-#define __HAVE_ARCH_PMD_WRITE
#define pmd_write(pmd) pte_write(pmd_pte(pmd))
#define pmd_mkhuge(pmd) (__pmd(pmd_val(pmd) & ~PMD_TABLE_BIT))
@@ -642,28 +649,23 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
/*
- * ptep_set_wrprotect - mark read-only while preserving the hardware update of
- * the Access Flag.
+ * ptep_set_wrprotect - mark read-only while trasferring potential hardware
+ * dirty status (PTE_DBM && !PTE_RDONLY) to the software PTE_DIRTY bit.
*/
#define __HAVE_ARCH_PTEP_SET_WRPROTECT
static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep)
{
pte_t old_pte, pte;
- /*
- * ptep_set_wrprotect() is only called on CoW mappings which are
- * private (!VM_SHARED) with the pte either read-only (!PTE_WRITE &&
- * PTE_RDONLY) or writable and software-dirty (PTE_WRITE &&
- * !PTE_RDONLY && PTE_DIRTY); see is_cow_mapping() and
- * protection_map[]. There is no race with the hardware update of the
- * dirty state: clearing of PTE_RDONLY when PTE_WRITE (a.k.a. PTE_DBM)
- * is set.
- */
- VM_WARN_ONCE(pte_write(*ptep) && !pte_dirty(*ptep),
- "%s: potential race with hardware DBM", __func__);
pte = READ_ONCE(*ptep);
do {
old_pte = pte;
+ /*
+ * If hardware-dirty (PTE_WRITE/DBM bit set and PTE_RDONLY
+ * clear), set the PTE_DIRTY bit.
+ */
+ if (pte_hw_dirty(pte))
+ pte = pte_mkdirty(pte);
pte = pte_wrprotect(pte);
pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep),
pte_val(old_pte), pte_val(pte));
diff --git a/arch/arm64/include/uapi/asm/bpf_perf_event.h b/arch/arm64/include/uapi/asm/bpf_perf_event.h
new file mode 100644
index 000000000000..b551b741653d
--- /dev/null
+++ b/arch/arm64/include/uapi/asm/bpf_perf_event.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _UAPI__ASM_BPF_PERF_EVENT_H__
+#define _UAPI__ASM_BPF_PERF_EVENT_H__
+
+#include <asm/ptrace.h>
+
+typedef struct user_pt_regs bpf_user_pt_regs_t;
+
+#endif /* _UAPI__ASM_BPF_PERF_EVENT_H__ */
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 8265dd790895..067baace74a0 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -61,6 +61,3 @@ extra-y += $(head-y) vmlinux.lds
ifeq ($(CONFIG_DEBUG_EFI),y)
AFLAGS_head.o += -DVMLINUX_PATH="\"$(realpath $(objtree)/vmlinux)\""
endif
-
-# will be included by each individual module but not by the core kernel itself
-extra-$(CONFIG_DYNAMIC_FTRACE) += ftrace-mod.o
diff --git a/arch/arm64/kernel/cpu-reset.S b/arch/arm64/kernel/cpu-reset.S
index 65f42d257414..2a752cb2a0f3 100644
--- a/arch/arm64/kernel/cpu-reset.S
+++ b/arch/arm64/kernel/cpu-reset.S
@@ -37,6 +37,7 @@ ENTRY(__cpu_soft_restart)
mrs x12, sctlr_el1
ldr x13, =SCTLR_ELx_FLAGS
bic x12, x12, x13
+ pre_disable_mmu_workaround
msr sctlr_el1, x12
isb
diff --git a/arch/arm64/kernel/cpu_ops.c b/arch/arm64/kernel/cpu_ops.c
index d16978213c5b..ea001241bdd4 100644
--- a/arch/arm64/kernel/cpu_ops.c
+++ b/arch/arm64/kernel/cpu_ops.c
@@ -31,13 +31,13 @@ extern const struct cpu_operations cpu_psci_ops;
const struct cpu_operations *cpu_ops[NR_CPUS] __ro_after_init;
-static const struct cpu_operations *dt_supported_cpu_ops[] __initconst = {
+static const struct cpu_operations *const dt_supported_cpu_ops[] __initconst = {
&smp_spin_table_ops,
&cpu_psci_ops,
NULL,
};
-static const struct cpu_operations *acpi_supported_cpu_ops[] __initconst = {
+static const struct cpu_operations *const acpi_supported_cpu_ops[] __initconst = {
#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
&acpi_parking_protocol_ops,
#endif
@@ -47,7 +47,7 @@ static const struct cpu_operations *acpi_supported_cpu_ops[] __initconst = {
static const struct cpu_operations * __init cpu_get_ops(const char *name)
{
- const struct cpu_operations **ops;
+ const struct cpu_operations *const *ops;
ops = acpi_disabled ? dt_supported_cpu_ops : acpi_supported_cpu_ops;
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index c5ba0097887f..a73a5928f09b 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -145,7 +145,8 @@ static const struct arm64_ftr_bits ftr_id_aa64isar1[] = {
};
static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_SVE_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
+ FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_SVE_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_GIC_SHIFT, 4, 0),
S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_ASIMD_SHIFT, 4, ID_AA64PFR0_ASIMD_NI),
S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_FP_SHIFT, 4, ID_AA64PFR0_FP_NI),
diff --git a/arch/arm64/kernel/efi-entry.S b/arch/arm64/kernel/efi-entry.S
index 4e6ad355bd05..6b9736c3fb56 100644
--- a/arch/arm64/kernel/efi-entry.S
+++ b/arch/arm64/kernel/efi-entry.S
@@ -96,6 +96,7 @@ ENTRY(entry)
mrs x0, sctlr_el2
bic x0, x0, #1 << 0 // clear SCTLR.M
bic x0, x0, #1 << 2 // clear SCTLR.C
+ pre_disable_mmu_workaround
msr sctlr_el2, x0
isb
b 2f
@@ -103,6 +104,7 @@ ENTRY(entry)
mrs x0, sctlr_el1
bic x0, x0, #1 << 0 // clear SCTLR.M
bic x0, x0, #1 << 2 // clear SCTLR.C
+ pre_disable_mmu_workaround
msr sctlr_el1, x0
isb
2:
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 143b3e72c25e..fae81f7964b4 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -114,7 +114,12 @@
* returned from the 2nd syscall yet, TIF_FOREIGN_FPSTATE is still set so
* whatever is in the FPSIMD registers is not saved to memory, but discarded.
*/
-static DEFINE_PER_CPU(struct fpsimd_state *, fpsimd_last_state);
+struct fpsimd_last_state_struct {
+ struct fpsimd_state *st;
+ bool sve_in_use;
+};
+
+static DEFINE_PER_CPU(struct fpsimd_last_state_struct, fpsimd_last_state);
/* Default VL for tasks that don't set it explicitly: */
static int sve_default_vl = -1;
@@ -905,7 +910,7 @@ void fpsimd_thread_switch(struct task_struct *next)
*/
struct fpsimd_state *st = &next->thread.fpsimd_state;
- if (__this_cpu_read(fpsimd_last_state) == st
+ if (__this_cpu_read(fpsimd_last_state.st) == st
&& st->cpu == smp_processor_id())
clear_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE);
else
@@ -992,6 +997,21 @@ void fpsimd_signal_preserve_current_state(void)
}
/*
+ * Associate current's FPSIMD context with this cpu
+ * Preemption must be disabled when calling this function.
+ */
+static void fpsimd_bind_to_cpu(void)
+{
+ struct fpsimd_last_state_struct *last =
+ this_cpu_ptr(&fpsimd_last_state);
+ struct fpsimd_state *st = &current->thread.fpsimd_state;
+
+ last->st = st;
+ last->sve_in_use = test_thread_flag(TIF_SVE);
+ st->cpu = smp_processor_id();
+}
+
+/*
* Load the userland FPSIMD state of 'current' from memory, but only if the
* FPSIMD state already held in the registers is /not/ the most recent FPSIMD
* state of 'current'
@@ -1004,11 +1024,8 @@ void fpsimd_restore_current_state(void)
local_bh_disable();
if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
- struct fpsimd_state *st = &current->thread.fpsimd_state;
-
task_fpsimd_load();
- __this_cpu_write(fpsimd_last_state, st);
- st->cpu = smp_processor_id();
+ fpsimd_bind_to_cpu();
}
local_bh_enable();
@@ -1026,18 +1043,14 @@ void fpsimd_update_current_state(struct fpsimd_state *state)
local_bh_disable();
- if (system_supports_sve() && test_thread_flag(TIF_SVE)) {
- current->thread.fpsimd_state = *state;
+ current->thread.fpsimd_state.user_fpsimd = state->user_fpsimd;
+ if (system_supports_sve() && test_thread_flag(TIF_SVE))
fpsimd_to_sve(current);
- }
- task_fpsimd_load();
- if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
- struct fpsimd_state *st = &current->thread.fpsimd_state;
+ task_fpsimd_load();
- __this_cpu_write(fpsimd_last_state, st);
- st->cpu = smp_processor_id();
- }
+ if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE))
+ fpsimd_bind_to_cpu();
local_bh_enable();
}
@@ -1052,7 +1065,7 @@ void fpsimd_flush_task_state(struct task_struct *t)
static inline void fpsimd_flush_cpu_state(void)
{
- __this_cpu_write(fpsimd_last_state, NULL);
+ __this_cpu_write(fpsimd_last_state.st, NULL);
}
/*
@@ -1065,14 +1078,10 @@ static inline void fpsimd_flush_cpu_state(void)
#ifdef CONFIG_ARM64_SVE
void sve_flush_cpu_state(void)
{
- struct fpsimd_state *const fpstate = __this_cpu_read(fpsimd_last_state);
- struct task_struct *tsk;
-
- if (!fpstate)
- return;
+ struct fpsimd_last_state_struct const *last =
+ this_cpu_ptr(&fpsimd_last_state);
- tsk = container_of(fpstate, struct task_struct, thread.fpsimd_state);
- if (test_tsk_thread_flag(tsk, TIF_SVE))
+ if (last->st && last->sve_in_use)
fpsimd_flush_cpu_state();
}
#endif /* CONFIG_ARM64_SVE */
@@ -1267,7 +1276,7 @@ static inline void fpsimd_pm_init(void) { }
#ifdef CONFIG_HOTPLUG_CPU
static int fpsimd_cpu_dead(unsigned int cpu)
{
- per_cpu(fpsimd_last_state, cpu) = NULL;
+ per_cpu(fpsimd_last_state.st, cpu) = NULL;
return 0;
}
diff --git a/arch/arm64/kernel/ftrace-mod.S b/arch/arm64/kernel/ftrace-mod.S
deleted file mode 100644
index 00c4025be4ff..000000000000
--- a/arch/arm64/kernel/ftrace-mod.S
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- * Copyright (C) 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
- .section ".text.ftrace_trampoline", "ax"
- .align 3
-0: .quad 0
-__ftrace_trampoline:
- ldr x16, 0b
- br x16
-ENDPROC(__ftrace_trampoline)
diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
index c13b1fca0e5b..50986e388d2b 100644
--- a/arch/arm64/kernel/ftrace.c
+++ b/arch/arm64/kernel/ftrace.c
@@ -76,7 +76,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
if (offset < -SZ_128M || offset >= SZ_128M) {
#ifdef CONFIG_ARM64_MODULE_PLTS
- unsigned long *trampoline;
+ struct plt_entry trampoline;
struct module *mod;
/*
@@ -104,22 +104,24 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
* is added in the future, but for now, the pr_err() below
* deals with a theoretical issue only.
*/
- trampoline = (unsigned long *)mod->arch.ftrace_trampoline;
- if (trampoline[0] != addr) {
- if (trampoline[0] != 0) {
+ trampoline = get_plt_entry(addr);
+ if (!plt_entries_equal(mod->arch.ftrace_trampoline,
+ &trampoline)) {
+ if (!plt_entries_equal(mod->arch.ftrace_trampoline,
+ &(struct plt_entry){})) {
pr_err("ftrace: far branches to multiple entry points unsupported inside a single module\n");
return -EINVAL;
}
/* point the trampoline to our ftrace entry point */
module_disable_ro(mod);
- trampoline[0] = addr;
+ *mod->arch.ftrace_trampoline = trampoline;
module_enable_ro(mod, true);
/* update trampoline before patching in the branch */
smp_wmb();
}
- addr = (unsigned long)&trampoline[1];
+ addr = (unsigned long)(void *)mod->arch.ftrace_trampoline;
#else /* CONFIG_ARM64_MODULE_PLTS */
return -EINVAL;
#endif /* CONFIG_ARM64_MODULE_PLTS */
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 67e86a0f57ac..e3cb9fbf96b6 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -750,6 +750,7 @@ __primary_switch:
* to take into account by discarding the current kernel mapping and
* creating a new one.
*/
+ pre_disable_mmu_workaround
msr sctlr_el1, x20 // disable the MMU
isb
bl __create_page_tables // recreate kernel mapping
diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c
index 749f81779420..74bb56f656ef 100644
--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c
@@ -28,6 +28,7 @@
#include <linux/perf_event.h>
#include <linux/ptrace.h>
#include <linux/smp.h>
+#include <linux/uaccess.h>
#include <asm/compat.h>
#include <asm/current.h>
@@ -36,7 +37,6 @@
#include <asm/traps.h>
#include <asm/cputype.h>
#include <asm/system_misc.h>
-#include <asm/uaccess.h>
/* Breakpoint currently in use for each BRP. */
static DEFINE_PER_CPU(struct perf_event *, bp_on_reg[ARM_MAX_BRP]);
diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c
index d05dbe658409..ea640f92fe5a 100644
--- a/arch/arm64/kernel/module-plts.c
+++ b/arch/arm64/kernel/module-plts.c
@@ -11,21 +11,6 @@
#include <linux/module.h>
#include <linux/sort.h>
-struct plt_entry {
- /*
- * A program that conforms to the AArch64 Procedure Call Standard
- * (AAPCS64) must assume that a veneer that alters IP0 (x16) and/or
- * IP1 (x17) may be inserted at any branch instruction that is
- * exposed to a relocation that supports long branches. Since that
- * is exactly what we are dealing with here, we are free to use x16
- * as a scratch register in the PLT veneers.
- */
- __le32 mov0; /* movn x16, #0x.... */
- __le32 mov1; /* movk x16, #0x...., lsl #16 */
- __le32 mov2; /* movk x16, #0x...., lsl #32 */
- __le32 br; /* br x16 */
-};
-
static bool in_init(const struct module *mod, void *loc)
{
return (u64)loc - (u64)mod->init_layout.base < mod->init_layout.size;
@@ -40,33 +25,14 @@ u64 module_emit_plt_entry(struct module *mod, void *loc, const Elf64_Rela *rela,
int i = pltsec->plt_num_entries;
u64 val = sym->st_value + rela->r_addend;
- /*
- * MOVK/MOVN/MOVZ opcode:
- * +--------+------------+--------+-----------+-------------+---------+
- * | sf[31] | opc[30:29] | 100101 | hw[22:21] | imm16[20:5] | Rd[4:0] |
- * +--------+------------+--------+-----------+-------------+---------+
- *
- * Rd := 0x10 (x16)
- * hw := 0b00 (no shift), 0b01 (lsl #16), 0b10 (lsl #32)
- * opc := 0b11 (MOVK), 0b00 (MOVN), 0b10 (MOVZ)
- * sf := 1 (64-bit variant)
- */
- plt[i] = (struct plt_entry){
- cpu_to_le32(0x92800010 | (((~val ) & 0xffff)) << 5),
- cpu_to_le32(0xf2a00010 | ((( val >> 16) & 0xffff)) << 5),
- cpu_to_le32(0xf2c00010 | ((( val >> 32) & 0xffff)) << 5),
- cpu_to_le32(0xd61f0200)
- };
+ plt[i] = get_plt_entry(val);
/*
* Check if the entry we just created is a duplicate. Given that the
* relocations are sorted, this will be the last entry we allocated.
* (if one exists).
*/
- if (i > 0 &&
- plt[i].mov0 == plt[i - 1].mov0 &&
- plt[i].mov1 == plt[i - 1].mov1 &&
- plt[i].mov2 == plt[i - 1].mov2)
+ if (i > 0 && plt_entries_equal(plt + i, plt + i - 1))
return (u64)&plt[i - 1];
pltsec->plt_num_entries++;
@@ -154,6 +120,7 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
unsigned long core_plts = 0;
unsigned long init_plts = 0;
Elf64_Sym *syms = NULL;
+ Elf_Shdr *tramp = NULL;
int i;
/*
@@ -165,6 +132,10 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
mod->arch.core.plt = sechdrs + i;
else if (!strcmp(secstrings + sechdrs[i].sh_name, ".init.plt"))
mod->arch.init.plt = sechdrs + i;
+ else if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE) &&
+ !strcmp(secstrings + sechdrs[i].sh_name,
+ ".text.ftrace_trampoline"))
+ tramp = sechdrs + i;
else if (sechdrs[i].sh_type == SHT_SYMTAB)
syms = (Elf64_Sym *)sechdrs[i].sh_addr;
}
@@ -215,5 +186,12 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
mod->arch.init.plt_num_entries = 0;
mod->arch.init.plt_max_entries = init_plts;
+ if (tramp) {
+ tramp->sh_type = SHT_NOBITS;
+ tramp->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
+ tramp->sh_addralign = __alignof__(struct plt_entry);
+ tramp->sh_size = sizeof(struct plt_entry);
+ }
+
return 0;
}
diff --git a/arch/arm64/kernel/module.lds b/arch/arm64/kernel/module.lds
index f7c9781a9d48..22e36a21c113 100644
--- a/arch/arm64/kernel/module.lds
+++ b/arch/arm64/kernel/module.lds
@@ -1,4 +1,5 @@
SECTIONS {
.plt (NOLOAD) : { BYTE(0) }
.init.plt (NOLOAD) : { BYTE(0) }
+ .text.ftrace_trampoline (NOLOAD) : { BYTE(0) }
}
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index 9eaef51f83ff..3affca3dd96a 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -262,12 +262,6 @@ static const unsigned armv8_a73_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
[C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD,
[C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR,
-
- [C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD,
- [C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR,
-
- [C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD,
- [C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR,
};
static const unsigned armv8_thunder_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index b2adcce7bc18..6b7dcf4310ac 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -314,6 +314,15 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
clear_tsk_thread_flag(p, TIF_SVE);
p->thread.sve_state = NULL;
+ /*
+ * In case p was allocated the same task_struct pointer as some
+ * other recently-exited task, make sure p is disassociated from
+ * any cpu that may have run that now-exited task recently.
+ * Otherwise we could erroneously skip reloading the FPSIMD
+ * registers for p.
+ */
+ fpsimd_flush_task_state(p);
+
if (likely(!(p->flags & PF_KTHREAD))) {
*childregs = *current_pt_regs();
childregs->regs[0] = 0;
diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S
index ce704a4aeadd..f407e422a720 100644
--- a/arch/arm64/kernel/relocate_kernel.S
+++ b/arch/arm64/kernel/relocate_kernel.S
@@ -45,6 +45,7 @@ ENTRY(arm64_relocate_new_kernel)
mrs x0, sctlr_el2
ldr x1, =SCTLR_ELx_FLAGS
bic x0, x0, x1
+ pre_disable_mmu_workaround
msr sctlr_el2, x0
isb
1:
diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c
index dbadfaf850a7..fa63b28c65e0 100644
--- a/arch/arm64/kvm/debug.c
+++ b/arch/arm64/kvm/debug.c
@@ -221,3 +221,24 @@ void kvm_arm_clear_debug(struct kvm_vcpu *vcpu)
}
}
}
+
+
+/*
+ * After successfully emulating an instruction, we might want to
+ * return to user space with a KVM_EXIT_DEBUG. We can only do this
+ * once the emulation is complete, though, so for userspace emulations
+ * we have to wait until we have re-entered KVM before calling this
+ * helper.
+ *
+ * Return true (and set exit_reason) to return to userspace or false
+ * if no further action is required.
+ */
+bool kvm_arm_handle_step_debug(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
+ run->exit_reason = KVM_EXIT_DEBUG;
+ run->debug.arch.hsr = ESR_ELx_EC_SOFTSTP_LOW << ESR_ELx_EC_SHIFT;
+ return true;
+ }
+ return false;
+}
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index b71247995469..304203fa9e33 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -28,6 +28,7 @@
#include <asm/kvm_emulate.h>
#include <asm/kvm_mmu.h>
#include <asm/kvm_psci.h>
+#include <asm/debug-monitors.h>
#define CREATE_TRACE_POINTS
#include "trace.h"
@@ -187,14 +188,46 @@ static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu)
}
/*
+ * We may be single-stepping an emulated instruction. If the emulation
+ * has been completed in the kernel, we can return to userspace with a
+ * KVM_EXIT_DEBUG, otherwise userspace needs to complete its
+ * emulation first.
+ */
+static int handle_trap_exceptions(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ int handled;
+
+ /*
+ * See ARM ARM B1.14.1: "Hyp traps on instructions
+ * that fail their condition code check"
+ */
+ if (!kvm_condition_valid(vcpu)) {
+ kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
+ handled = 1;
+ } else {
+ exit_handle_fn exit_handler;
+
+ exit_handler = kvm_get_exit_handler(vcpu);
+ handled = exit_handler(vcpu, run);
+ }
+
+ /*
+ * kvm_arm_handle_step_debug() sets the exit_reason on the kvm_run
+ * structure if we need to return to userspace.
+ */
+ if (handled > 0 && kvm_arm_handle_step_debug(vcpu, run))
+ handled = 0;
+
+ return handled;
+}
+
+/*
* Return > 0 to return to guest, < 0 on error, 0 (and set exit_reason) on
* proper exit to userspace.
*/
int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
int exception_index)
{
- exit_handle_fn exit_handler;
-
if (ARM_SERROR_PENDING(exception_index)) {
u8 hsr_ec = ESR_ELx_EC(kvm_vcpu_get_hsr(vcpu));
@@ -220,20 +253,14 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
return 1;
case ARM_EXCEPTION_EL1_SERROR:
kvm_inject_vabt(vcpu);
- return 1;
- case ARM_EXCEPTION_TRAP:
- /*
- * See ARM ARM B1.14.1: "Hyp traps on instructions
- * that fail their condition code check"
- */
- if (!kvm_condition_valid(vcpu)) {
- kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
+ /* We may still need to return for single-step */
+ if (!(*vcpu_cpsr(vcpu) & DBG_SPSR_SS)
+ && kvm_arm_handle_step_debug(vcpu, run))
+ return 0;
+ else
return 1;
- }
-
- exit_handler = kvm_get_exit_handler(vcpu);
-
- return exit_handler(vcpu, run);
+ case ARM_EXCEPTION_TRAP:
+ return handle_trap_exceptions(vcpu, run);
case ARM_EXCEPTION_HYP_GONE:
/*
* EL2 has been reset to the hyp-stub. This happens when a guest
diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S
index 3f9615582377..870828c364c5 100644
--- a/arch/arm64/kvm/hyp-init.S
+++ b/arch/arm64/kvm/hyp-init.S
@@ -151,6 +151,7 @@ reset:
mrs x5, sctlr_el2
ldr x6, =SCTLR_ELx_FLAGS
bic x5, x5, x6 // Clear SCTL_M and etc
+ pre_disable_mmu_workaround
msr sctlr_el2, x5
isb
diff --git a/arch/arm64/kvm/hyp/debug-sr.c b/arch/arm64/kvm/hyp/debug-sr.c
index 321c9c05dd9e..f4363d40e2cd 100644
--- a/arch/arm64/kvm/hyp/debug-sr.c
+++ b/arch/arm64/kvm/hyp/debug-sr.c
@@ -74,6 +74,9 @@ static void __hyp_text __debug_save_spe_nvhe(u64 *pmscr_el1)
{
u64 reg;
+ /* Clear pmscr in case of early return */
+ *pmscr_el1 = 0;
+
/* SPE present on this CPU? */
if (!cpuid_feature_extract_unsigned_field(read_sysreg(id_aa64dfr0_el1),
ID_AA64DFR0_PMSVER_SHIFT))
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 525c01f48867..f7c651f3a8c0 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -22,6 +22,7 @@
#include <asm/kvm_emulate.h>
#include <asm/kvm_hyp.h>
#include <asm/fpsimd.h>
+#include <asm/debug-monitors.h>
static bool __hyp_text __fpsimd_enabled_nvhe(void)
{
@@ -269,7 +270,11 @@ static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu)
return true;
}
-static void __hyp_text __skip_instr(struct kvm_vcpu *vcpu)
+/* Skip an instruction which has been emulated. Returns true if
+ * execution can continue or false if we need to exit hyp mode because
+ * single-step was in effect.
+ */
+static bool __hyp_text __skip_instr(struct kvm_vcpu *vcpu)
{
*vcpu_pc(vcpu) = read_sysreg_el2(elr);
@@ -282,6 +287,14 @@ static void __hyp_text __skip_instr(struct kvm_vcpu *vcpu)
}
write_sysreg_el2(*vcpu_pc(vcpu), elr);
+
+ if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
+ vcpu->arch.fault.esr_el2 =
+ (ESR_ELx_EC_SOFTSTP_LOW << ESR_ELx_EC_SHIFT) | 0x22;
+ return false;
+ } else {
+ return true;
+ }
}
int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu)
@@ -342,13 +355,21 @@ again:
int ret = __vgic_v2_perform_cpuif_access(vcpu);
if (ret == 1) {
- __skip_instr(vcpu);
- goto again;
+ if (__skip_instr(vcpu))
+ goto again;
+ else
+ exit_code = ARM_EXCEPTION_TRAP;
}
if (ret == -1) {
- /* Promote an illegal access to an SError */
- __skip_instr(vcpu);
+ /* Promote an illegal access to an
+ * SError. If we would be returning
+ * due to single-step clear the SS
+ * bit so handle_exit knows what to
+ * do after dealing with the error.
+ */
+ if (!__skip_instr(vcpu))
+ *vcpu_cpsr(vcpu) &= ~DBG_SPSR_SS;
exit_code = ARM_EXCEPTION_EL1_SERROR;
}
@@ -363,8 +384,10 @@ again:
int ret = __vgic_v3_perform_cpuif_access(vcpu);
if (ret == 1) {
- __skip_instr(vcpu);
- goto again;
+ if (__skip_instr(vcpu))
+ goto again;
+ else
+ exit_code = ARM_EXCEPTION_TRAP;
}
/* 0 falls through to be handled out of EL2 */
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index ab9f5f0fb2c7..6f4017046323 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -96,12 +96,6 @@ static void flush_context(unsigned int cpu)
set_reserved_asid_bits();
- /*
- * Ensure the generation bump is observed before we xchg the
- * active_asids.
- */
- smp_wmb();
-
for_each_possible_cpu(i) {
asid = atomic64_xchg_relaxed(&per_cpu(active_asids, i), 0);
/*
@@ -117,7 +111,10 @@ static void flush_context(unsigned int cpu)
per_cpu(reserved_asids, i) = asid;
}
- /* Queue a TLB invalidate and flush the I-cache if necessary. */
+ /*
+ * Queue a TLB invalidation for each CPU to perform on next
+ * context-switch
+ */
cpumask_setall(&tlb_flush_pending);
}
@@ -202,11 +199,18 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu)
asid = atomic64_read(&mm->context.id);
/*
- * The memory ordering here is subtle. We rely on the control
- * dependency between the generation read and the update of
- * active_asids to ensure that we are synchronised with a
- * parallel rollover (i.e. this pairs with the smp_wmb() in
- * flush_context).
+ * The memory ordering here is subtle.
+ * If our ASID matches the current generation, then we update
+ * our active_asids entry with a relaxed xchg. Racing with a
+ * concurrent rollover means that either:
+ *
+ * - We get a zero back from the xchg and end up waiting on the
+ * lock. Taking the lock synchronises with the rollover and so
+ * we are forced to see the updated generation.
+ *
+ * - We get a valid ASID back from the xchg, which means the
+ * relaxed xchg in flush_context will treat us as reserved
+ * because atomic RmWs are totally ordered for a given location.
*/
if (!((asid ^ atomic64_read(&asid_generation)) >> asid_bits)
&& atomic64_xchg_relaxed(&per_cpu(active_asids, cpu), asid))
diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c
index ca74a2aace42..7b60d62ac593 100644
--- a/arch/arm64/mm/dump.c
+++ b/arch/arm64/mm/dump.c
@@ -389,7 +389,7 @@ void ptdump_check_wx(void)
.check_wx = true,
};
- walk_pgd(&st, &init_mm, 0);
+ walk_pgd(&st, &init_mm, VA_START);
note_page(&st, 0, 0, 0);
if (st.wx_pages || st.uxn_pages)
pr_warn("Checked W+X mappings: FAILED, %lu W+X pages found, %lu non-UXN pages found\n",
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 22168cd0dde7..9b7f89df49db 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -574,7 +574,6 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs)
{
struct siginfo info;
const struct fault_info *inf;
- int ret = 0;
inf = esr_to_fault_info(esr);
pr_err("Synchronous External Abort: %s (0x%08x) at 0x%016lx\n",
@@ -589,7 +588,7 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs)
if (interrupts_enabled(regs))
nmi_enter();
- ret = ghes_notify_sea();
+ ghes_notify_sea();
if (interrupts_enabled(regs))
nmi_exit();
@@ -604,7 +603,7 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs)
info.si_addr = (void __user *)addr;
arm64_notify_die("", regs, &info, esr);
- return ret;
+ return 0;
}
static const struct fault_info fault_info[] = {
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 5960bef0170d..00e7b900ca41 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -476,6 +476,8 @@ void __init arm64_memblock_init(void)
reserve_elfcorehdr();
+ high_memory = __va(memblock_end_of_DRAM() - 1) + 1;
+
dma_contiguous_reserve(arm64_dma_phys_limit);
memblock_allow_resize();
@@ -502,7 +504,6 @@ void __init bootmem_init(void)
sparse_init();
zone_sizes_init(min, max);
- high_memory = __va((max << PAGE_SHIFT) - 1) + 1;
memblock_dump_all();
}
diff --git a/arch/arm64/mm/pgd.c b/arch/arm64/mm/pgd.c
index 371c5f03a170..051e71ec3335 100644
--- a/arch/arm64/mm/pgd.c
+++ b/arch/arm64/mm/pgd.c
@@ -26,7 +26,7 @@
#include <asm/page.h>
#include <asm/tlbflush.h>
-static struct kmem_cache *pgd_cache;
+static struct kmem_cache *pgd_cache __ro_after_init;
pgd_t *pgd_alloc(struct mm_struct *mm)
{
diff --git a/arch/blackfin/include/uapi/asm/Kbuild b/arch/blackfin/include/uapi/asm/Kbuild
index aa624b4ab655..2240b38c2915 100644
--- a/arch/blackfin/include/uapi/asm/Kbuild
+++ b/arch/blackfin/include/uapi/asm/Kbuild
@@ -3,6 +3,7 @@ include include/uapi/asm-generic/Kbuild.asm
generic-y += auxvec.h
generic-y += bitsperlong.h
+generic-y += bpf_perf_event.h
generic-y += errno.h
generic-y += ioctl.h
generic-y += ipcbuf.h
diff --git a/arch/c6x/include/uapi/asm/Kbuild b/arch/c6x/include/uapi/asm/Kbuild
index 67ee896a76a7..26644e15d854 100644
--- a/arch/c6x/include/uapi/asm/Kbuild
+++ b/arch/c6x/include/uapi/asm/Kbuild
@@ -3,6 +3,7 @@ include include/uapi/asm-generic/Kbuild.asm
generic-y += auxvec.h
generic-y += bitsperlong.h
+generic-y += bpf_perf_event.h
generic-y += errno.h
generic-y += fcntl.h
generic-y += ioctl.h
diff --git a/arch/cris/include/uapi/asm/Kbuild b/arch/cris/include/uapi/asm/Kbuild
index 3687b54bb18e..3470c6e9c7b9 100644
--- a/arch/cris/include/uapi/asm/Kbuild
+++ b/arch/cris/include/uapi/asm/Kbuild
@@ -3,6 +3,7 @@ include include/uapi/asm-generic/Kbuild.asm
generic-y += auxvec.h
generic-y += bitsperlong.h
+generic-y += bpf_perf_event.h
generic-y += errno.h
generic-y += fcntl.h
generic-y += ioctl.h
diff --git a/arch/frv/include/uapi/asm/Kbuild b/arch/frv/include/uapi/asm/Kbuild
index b15bf6bc0e94..14a2e9af97e9 100644
--- a/arch/frv/include/uapi/asm/Kbuild
+++ b/arch/frv/include/uapi/asm/Kbuild
@@ -1,2 +1,4 @@
# UAPI Header export list
include include/uapi/asm-generic/Kbuild.asm
+
+generic-y += bpf_perf_event.h
diff --git a/arch/h8300/include/uapi/asm/Kbuild b/arch/h8300/include/uapi/asm/Kbuild
index 187aed820e71..2f65f78792cb 100644
--- a/arch/h8300/include/uapi/asm/Kbuild
+++ b/arch/h8300/include/uapi/asm/Kbuild
@@ -2,6 +2,7 @@
include include/uapi/asm-generic/Kbuild.asm
generic-y += auxvec.h
+generic-y += bpf_perf_event.h
generic-y += errno.h
generic-y += fcntl.h
generic-y += ioctl.h
diff --git a/arch/hexagon/include/uapi/asm/Kbuild b/arch/hexagon/include/uapi/asm/Kbuild
index cb5df3aad3a8..41a176dbb53e 100644
--- a/arch/hexagon/include/uapi/asm/Kbuild
+++ b/arch/hexagon/include/uapi/asm/Kbuild
@@ -2,6 +2,7 @@
include include/uapi/asm-generic/Kbuild.asm
generic-y += auxvec.h
+generic-y += bpf_perf_event.h
generic-y += errno.h
generic-y += fcntl.h
generic-y += ioctl.h
diff --git a/arch/ia64/include/uapi/asm/Kbuild b/arch/ia64/include/uapi/asm/Kbuild
index 13a97aa2285f..f5c6967a93bb 100644
--- a/arch/ia64/include/uapi/asm/Kbuild
+++ b/arch/ia64/include/uapi/asm/Kbuild
@@ -1,4 +1,5 @@
# UAPI Header export list
include include/uapi/asm-generic/Kbuild.asm
+generic-y += bpf_perf_event.h
generic-y += kvm_para.h
diff --git a/arch/m32r/include/uapi/asm/Kbuild b/arch/m32r/include/uapi/asm/Kbuild
index 1c44d3b3eba0..451bf6071c6e 100644
--- a/arch/m32r/include/uapi/asm/Kbuild
+++ b/arch/m32r/include/uapi/asm/Kbuild
@@ -1,5 +1,6 @@
# UAPI Header export list
include include/uapi/asm-generic/Kbuild.asm
+generic-y += bpf_perf_event.h
generic-y += kvm_para.h
generic-y += siginfo.h
diff --git a/arch/m68k/configs/stmark2_defconfig b/arch/m68k/configs/stmark2_defconfig
index 55e55dbc2fb6..3d07b1de7eb0 100644
--- a/arch/m68k/configs/stmark2_defconfig
+++ b/arch/m68k/configs/stmark2_defconfig
@@ -5,7 +5,6 @@ CONFIG_SYSVIPC=y
CONFIG_LOG_BUF_SHIFT=14
CONFIG_NAMESPACES=y
CONFIG_BLK_DEV_INITRD=y
-CONFIG_INITRAMFS_SOURCE="../uClinux-dist/romfs"
# CONFIG_RD_BZIP2 is not set
# CONFIG_RD_LZMA is not set
# CONFIG_RD_XZ is not set
diff --git a/arch/m68k/include/uapi/asm/Kbuild b/arch/m68k/include/uapi/asm/Kbuild
index 3717b64a620d..c2e26a44c482 100644
--- a/arch/m68k/include/uapi/asm/Kbuild
+++ b/arch/m68k/include/uapi/asm/Kbuild
@@ -3,6 +3,7 @@ include include/uapi/asm-generic/Kbuild.asm
generic-y += auxvec.h
generic-y += bitsperlong.h
+generic-y += bpf_perf_event.h
generic-y += errno.h
generic-y += ioctl.h
generic-y += ipcbuf.h
diff --git a/arch/m68k/kernel/vmlinux-nommu.lds b/arch/m68k/kernel/vmlinux-nommu.lds
index 3aa571a513b5..cf6edda38971 100644
--- a/arch/m68k/kernel/vmlinux-nommu.lds
+++ b/arch/m68k/kernel/vmlinux-nommu.lds
@@ -45,6 +45,8 @@ SECTIONS {
.text : {
HEAD_TEXT
TEXT_TEXT
+ IRQENTRY_TEXT
+ SOFTIRQENTRY_TEXT
SCHED_TEXT
CPUIDLE_TEXT
LOCK_TEXT
diff --git a/arch/m68k/kernel/vmlinux-std.lds b/arch/m68k/kernel/vmlinux-std.lds
index 89172b8974b9..625a5785804f 100644
--- a/arch/m68k/kernel/vmlinux-std.lds
+++ b/arch/m68k/kernel/vmlinux-std.lds
@@ -16,6 +16,8 @@ SECTIONS
.text : {
HEAD_TEXT
TEXT_TEXT
+ IRQENTRY_TEXT
+ SOFTIRQENTRY_TEXT
SCHED_TEXT
CPUIDLE_TEXT
LOCK_TEXT
diff --git a/arch/m68k/kernel/vmlinux-sun3.lds b/arch/m68k/kernel/vmlinux-sun3.lds
index 293990efc917..9868270b0984 100644
--- a/arch/m68k/kernel/vmlinux-sun3.lds
+++ b/arch/m68k/kernel/vmlinux-sun3.lds
@@ -16,6 +16,8 @@ SECTIONS
.text : {
HEAD_TEXT
TEXT_TEXT
+ IRQENTRY_TEXT
+ SOFTIRQENTRY_TEXT
SCHED_TEXT
CPUIDLE_TEXT
LOCK_TEXT
diff --git a/arch/metag/include/uapi/asm/Kbuild b/arch/metag/include/uapi/asm/Kbuild
index 6ac763d9a3e3..f9eaf07d29f8 100644
--- a/arch/metag/include/uapi/asm/Kbuild
+++ b/arch/metag/include/uapi/asm/Kbuild
@@ -3,6 +3,7 @@ include include/uapi/asm-generic/Kbuild.asm
generic-y += auxvec.h
generic-y += bitsperlong.h
+generic-y += bpf_perf_event.h
generic-y += errno.h
generic-y += fcntl.h
generic-y += ioctl.h
diff --git a/arch/microblaze/include/asm/mmu_context_mm.h b/arch/microblaze/include/asm/mmu_context_mm.h
index 99472d2ca340..97559fe0b953 100644
--- a/arch/microblaze/include/asm/mmu_context_mm.h
+++ b/arch/microblaze/include/asm/mmu_context_mm.h
@@ -13,6 +13,7 @@
#include <linux/atomic.h>
#include <linux/mm_types.h>
+#include <linux/sched.h>
#include <asm/bitops.h>
#include <asm/mmu.h>
diff --git a/arch/microblaze/include/uapi/asm/Kbuild b/arch/microblaze/include/uapi/asm/Kbuild
index 06609ca36115..2c6a6bffea32 100644
--- a/arch/microblaze/include/uapi/asm/Kbuild
+++ b/arch/microblaze/include/uapi/asm/Kbuild
@@ -2,6 +2,7 @@
include include/uapi/asm-generic/Kbuild.asm
generic-y += bitsperlong.h
+generic-y += bpf_perf_event.h
generic-y += errno.h
generic-y += fcntl.h
generic-y += ioctl.h
diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild
index 7c8aab23bce8..b1f66699677d 100644
--- a/arch/mips/include/asm/Kbuild
+++ b/arch/mips/include/asm/Kbuild
@@ -16,7 +16,6 @@ generic-y += qrwlock.h
generic-y += qspinlock.h
generic-y += sections.h
generic-y += segment.h
-generic-y += serial.h
generic-y += trace_clock.h
generic-y += unaligned.h
generic-y += user.h
diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
index 9e9e94415d08..1a508a74d48d 100644
--- a/arch/mips/include/asm/pgtable.h
+++ b/arch/mips/include/asm/pgtable.h
@@ -552,7 +552,7 @@ static inline pmd_t pmd_mkhuge(pmd_t pmd)
extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp, pmd_t pmd);
-#define __HAVE_ARCH_PMD_WRITE
+#define pmd_write pmd_write
static inline int pmd_write(pmd_t pmd)
{
return !!(pmd_val(pmd) & _PAGE_WRITE);
diff --git a/arch/mips/include/asm/serial.h b/arch/mips/include/asm/serial.h
new file mode 100644
index 000000000000..1d830c6666c2
--- /dev/null
+++ b/arch/mips/include/asm/serial.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright (C) 2017 MIPS Tech, LLC
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ */
+#ifndef __ASM__SERIAL_H
+#define __ASM__SERIAL_H
+
+#ifdef CONFIG_MIPS_GENERIC
+/*
+ * Generic kernels cannot know a correct value for all platforms at
+ * compile time. Set it to 0 to prevent 8250_early using it
+ */
+#define BASE_BAUD 0
+#else
+#include <asm-generic/serial.h>
+#endif
+
+#endif /* __ASM__SERIAL_H */
diff --git a/arch/mips/include/uapi/asm/Kbuild b/arch/mips/include/uapi/asm/Kbuild
index a0266feba9e6..7a4becd8963a 100644
--- a/arch/mips/include/uapi/asm/Kbuild
+++ b/arch/mips/include/uapi/asm/Kbuild
@@ -1,4 +1,5 @@
# UAPI Header export list
include include/uapi/asm-generic/Kbuild.asm
+generic-y += bpf_perf_event.h
generic-y += ipcbuf.h
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index d535edc01434..75fdeaa8c62f 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -445,10 +445,8 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
int r = -EINTR;
- sigset_t sigsaved;
- if (vcpu->sigset_active)
- sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
+ kvm_sigset_activate(vcpu);
if (vcpu->mmio_needed) {
if (!vcpu->mmio_is_write)
@@ -480,8 +478,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
local_irq_enable();
out:
- if (vcpu->sigset_active)
- sigprocmask(SIG_SETMASK, &sigsaved, NULL);
+ kvm_sigset_deactivate(vcpu);
return r;
}
diff --git a/arch/mn10300/include/uapi/asm/Kbuild b/arch/mn10300/include/uapi/asm/Kbuild
index c94ee54210bc..81271d3af47c 100644
--- a/arch/mn10300/include/uapi/asm/Kbuild
+++ b/arch/mn10300/include/uapi/asm/Kbuild
@@ -1,4 +1,5 @@
# UAPI Header export list
include include/uapi/asm-generic/Kbuild.asm
+generic-y += bpf_perf_event.h
generic-y += siginfo.h
diff --git a/arch/nios2/include/uapi/asm/Kbuild b/arch/nios2/include/uapi/asm/Kbuild
index ffca24da7647..13a3d77b4d7b 100644
--- a/arch/nios2/include/uapi/asm/Kbuild
+++ b/arch/nios2/include/uapi/asm/Kbuild
@@ -3,6 +3,7 @@ include include/uapi/asm-generic/Kbuild.asm
generic-y += auxvec.h
generic-y += bitsperlong.h
+generic-y += bpf_perf_event.h
generic-y += errno.h
generic-y += fcntl.h
generic-y += ioctl.h
diff --git a/arch/openrisc/include/uapi/asm/Kbuild b/arch/openrisc/include/uapi/asm/Kbuild
index 62286dbeb904..130c16ccba0a 100644
--- a/arch/openrisc/include/uapi/asm/Kbuild
+++ b/arch/openrisc/include/uapi/asm/Kbuild
@@ -3,6 +3,7 @@ include include/uapi/asm-generic/Kbuild.asm
generic-y += auxvec.h
generic-y += bitsperlong.h
+generic-y += bpf_perf_event.h
generic-y += errno.h
generic-y += fcntl.h
generic-y += ioctl.h
diff --git a/arch/parisc/boot/compressed/misc.c b/arch/parisc/boot/compressed/misc.c
index 9345b44b86f0..f57118e1f6b4 100644
--- a/arch/parisc/boot/compressed/misc.c
+++ b/arch/parisc/boot/compressed/misc.c
@@ -123,8 +123,8 @@ int puts(const char *s)
while ((nuline = strchr(s, '\n')) != NULL) {
if (nuline != s)
pdc_iodc_print(s, nuline - s);
- pdc_iodc_print("\r\n", 2);
- s = nuline + 1;
+ pdc_iodc_print("\r\n", 2);
+ s = nuline + 1;
}
if (*s != '\0')
pdc_iodc_print(s, strlen(s));
diff --git a/arch/parisc/include/asm/thread_info.h b/arch/parisc/include/asm/thread_info.h
index c980a02a52bc..598c8d60fa5e 100644
--- a/arch/parisc/include/asm/thread_info.h
+++ b/arch/parisc/include/asm/thread_info.h
@@ -35,7 +35,12 @@ struct thread_info {
/* thread information allocation */
+#ifdef CONFIG_IRQSTACKS
+#define THREAD_SIZE_ORDER 2 /* PA-RISC requires at least 16k stack */
+#else
#define THREAD_SIZE_ORDER 3 /* PA-RISC requires at least 32k stack */
+#endif
+
/* Be sure to hunt all references to this down when you change the size of
* the kernel stack */
#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
diff --git a/arch/parisc/include/uapi/asm/Kbuild b/arch/parisc/include/uapi/asm/Kbuild
index 196d2a4efb31..286ef5a5904b 100644
--- a/arch/parisc/include/uapi/asm/Kbuild
+++ b/arch/parisc/include/uapi/asm/Kbuild
@@ -2,6 +2,7 @@
include include/uapi/asm-generic/Kbuild.asm
generic-y += auxvec.h
+generic-y += bpf_perf_event.h
generic-y += kvm_para.h
generic-y += param.h
generic-y += poll.h
diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S
index a4fd296c958e..f3cecf5117cf 100644
--- a/arch/parisc/kernel/entry.S
+++ b/arch/parisc/kernel/entry.S
@@ -878,9 +878,6 @@ ENTRY_CFI(syscall_exit_rfi)
STREG %r19,PT_SR7(%r16)
intr_return:
- /* NOTE: Need to enable interrupts incase we schedule. */
- ssm PSW_SM_I, %r0
-
/* check for reschedule */
mfctl %cr30,%r1
LDREG TI_FLAGS(%r1),%r19 /* sched.h: TIF_NEED_RESCHED */
@@ -907,6 +904,11 @@ intr_check_sig:
LDREG PT_IASQ1(%r16), %r20
cmpib,COND(=),n 0,%r20,intr_restore /* backward */
+ /* NOTE: We need to enable interrupts if we have to deliver
+ * signals. We used to do this earlier but it caused kernel
+ * stack overflows. */
+ ssm PSW_SM_I, %r0
+
copy %r0, %r25 /* long in_syscall = 0 */
#ifdef CONFIG_64BIT
ldo -16(%r30),%r29 /* Reference param save area */
@@ -958,6 +960,10 @@ intr_do_resched:
cmpib,COND(=) 0, %r20, intr_do_preempt
nop
+ /* NOTE: We need to enable interrupts if we schedule. We used
+ * to do this earlier but it caused kernel stack overflows. */
+ ssm PSW_SM_I, %r0
+
#ifdef CONFIG_64BIT
ldo -16(%r30),%r29 /* Reference param save area */
#endif
diff --git a/arch/parisc/kernel/hpmc.S b/arch/parisc/kernel/hpmc.S
index e3a8e5e4d5de..8d072c44f300 100644
--- a/arch/parisc/kernel/hpmc.S
+++ b/arch/parisc/kernel/hpmc.S
@@ -305,6 +305,7 @@ ENDPROC_CFI(os_hpmc)
__INITRODATA
+ .align 4
.export os_hpmc_size
os_hpmc_size:
.word .os_hpmc_end-.os_hpmc
diff --git a/arch/parisc/kernel/unwind.c b/arch/parisc/kernel/unwind.c
index 5a657986ebbf..143f90e2f9f3 100644
--- a/arch/parisc/kernel/unwind.c
+++ b/arch/parisc/kernel/unwind.c
@@ -15,7 +15,6 @@
#include <linux/slab.h>
#include <linux/kallsyms.h>
#include <linux/sort.h>
-#include <linux/sched.h>
#include <linux/uaccess.h>
#include <asm/assembly.h>
diff --git a/arch/parisc/lib/delay.c b/arch/parisc/lib/delay.c
index 7eab4bb8abe6..66e506520505 100644
--- a/arch/parisc/lib/delay.c
+++ b/arch/parisc/lib/delay.c
@@ -16,9 +16,7 @@
#include <linux/preempt.h>
#include <linux/init.h>
-#include <asm/processor.h>
#include <asm/delay.h>
-
#include <asm/special_insns.h> /* for mfctl() */
#include <asm/processor.h> /* for boot_cpu_data */
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 9a677cd5997f..44697817ccc6 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -1005,7 +1005,6 @@ static inline int pmd_protnone(pmd_t pmd)
}
#endif /* CONFIG_NUMA_BALANCING */
-#define __HAVE_ARCH_PMD_WRITE
#define pmd_write(pmd) pte_write(pmd_pte(pmd))
#define __pmd_write(pmd) __pte_write(pmd_pte(pmd))
#define pmd_savedwrite(pmd) pte_savedwrite(pmd_pte(pmd))
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 96753f3aac6d..941c2a3f231b 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -180,6 +180,7 @@ extern void kvm_spapr_tce_release_iommu_group(struct kvm *kvm,
struct iommu_group *grp);
extern int kvmppc_switch_mmu_to_hpt(struct kvm *kvm);
extern int kvmppc_switch_mmu_to_radix(struct kvm *kvm);
+extern void kvmppc_setup_partition_table(struct kvm *kvm);
extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
struct kvm_create_spapr_tce_64 *args);
diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index 73b92017b6d7..cd2fc1cc1cc7 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -76,6 +76,7 @@ struct machdep_calls {
void __noreturn (*restart)(char *cmd);
void __noreturn (*halt)(void);
+ void (*panic)(char *str);
void (*cpu_die)(void);
long (*time_init)(void); /* Optional, may be NULL */
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index 6177d43f0ce8..e2a2b8400490 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -160,9 +160,10 @@ static inline void enter_lazy_tlb(struct mm_struct *mm,
#endif
}
-static inline void arch_dup_mmap(struct mm_struct *oldmm,
- struct mm_struct *mm)
+static inline int arch_dup_mmap(struct mm_struct *oldmm,
+ struct mm_struct *mm)
{
+ return 0;
}
#ifndef CONFIG_PPC_BOOK3S_64
diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h
index 257d23dbf55d..cf00ec26303a 100644
--- a/arch/powerpc/include/asm/setup.h
+++ b/arch/powerpc/include/asm/setup.h
@@ -24,6 +24,7 @@ extern void reloc_got2(unsigned long);
void check_for_initrd(void);
void initmem_init(void);
+void setup_panic(void);
#define ARCH_PANIC_TIMEOUT 180
#ifdef CONFIG_PPC_PSERIES
diff --git a/arch/powerpc/include/uapi/asm/Kbuild b/arch/powerpc/include/uapi/asm/Kbuild
index 0d960ef78a9a..1a6ed5919ffd 100644
--- a/arch/powerpc/include/uapi/asm/Kbuild
+++ b/arch/powerpc/include/uapi/asm/Kbuild
@@ -1,6 +1,7 @@
# UAPI Header export list
include include/uapi/asm-generic/Kbuild.asm
+generic-y += bpf_perf_event.h
generic-y += param.h
generic-y += poll.h
generic-y += resource.h
diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S
index 610955fe8b81..679bbe714e85 100644
--- a/arch/powerpc/kernel/cpu_setup_power.S
+++ b/arch/powerpc/kernel/cpu_setup_power.S
@@ -102,6 +102,7 @@ _GLOBAL(__setup_cpu_power9)
li r0,0
mtspr SPRN_PSSCR,r0
mtspr SPRN_LPID,r0
+ mtspr SPRN_PID,r0
mfspr r3,SPRN_LPCR
LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE | LPCR_HEIC)
or r3, r3, r4
@@ -126,6 +127,7 @@ _GLOBAL(__restore_cpu_power9)
li r0,0
mtspr SPRN_PSSCR,r0
mtspr SPRN_LPID,r0
+ mtspr SPRN_PID,r0
mfspr r3,SPRN_LPCR
LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE | LPCR_HEIC)
or r3, r3, r4
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index 04ea5c04fd24..3c2c2688918f 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -1462,25 +1462,6 @@ static void fadump_init_files(void)
return;
}
-static int fadump_panic_event(struct notifier_block *this,
- unsigned long event, void *ptr)
-{
- /*
- * If firmware-assisted dump has been registered then trigger
- * firmware-assisted dump and let firmware handle everything
- * else. If this returns, then fadump was not registered, so
- * go through the rest of the panic path.
- */
- crash_fadump(NULL, ptr);
-
- return NOTIFY_DONE;
-}
-
-static struct notifier_block fadump_panic_block = {
- .notifier_call = fadump_panic_event,
- .priority = INT_MIN /* may not return; must be done last */
-};
-
/*
* Prepare for firmware-assisted dump.
*/
@@ -1513,9 +1494,6 @@ int __init setup_fadump(void)
init_fadump_mem_struct(&fdm, fw_dump.reserve_dump_area_start);
fadump_init_files();
- atomic_notifier_chain_register(&panic_notifier_list,
- &fadump_panic_block);
-
return 1;
}
subsys_initcall(setup_fadump);
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index 8ac0bd2bddb0..3280953a82cf 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -623,7 +623,9 @@ BEGIN_FTR_SECTION
* NOTE, we rely on r0 being 0 from above.
*/
mtspr SPRN_IAMR,r0
+BEGIN_FTR_SECTION_NESTED(42)
mtspr SPRN_AMOR,r0
+END_FTR_SECTION_NESTED_IFSET(CPU_FTR_HVMODE, 42)
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
/* save regs for local vars on new stack.
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index bfdd783e3916..72be0c32e902 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1403,7 +1403,7 @@ void show_regs(struct pt_regs * regs)
printk("NIP: "REG" LR: "REG" CTR: "REG"\n",
regs->nip, regs->link, regs->ctr);
- printk("REGS: %p TRAP: %04lx %s (%s)\n",
+ printk("REGS: %px TRAP: %04lx %s (%s)\n",
regs, regs->trap, print_tainted(), init_utsname()->release);
printk("MSR: "REG" ", regs->msr);
print_msr_bits(regs->msr);
@@ -1569,16 +1569,22 @@ void arch_release_task_struct(struct task_struct *t)
*/
int set_thread_tidr(struct task_struct *t)
{
+ int rc;
+
if (!cpu_has_feature(CPU_FTR_ARCH_300))
return -EINVAL;
if (t != current)
return -EINVAL;
- t->thread.tidr = assign_thread_tidr();
- if (t->thread.tidr < 0)
- return t->thread.tidr;
+ if (t->thread.tidr)
+ return 0;
+
+ rc = assign_thread_tidr();
+ if (rc < 0)
+ return rc;
+ t->thread.tidr = rc;
mtspr(SPRN_TIDR, t->thread.tidr);
return 0;
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 2075322cd225..9d213542a48b 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -704,6 +704,30 @@ int check_legacy_ioport(unsigned long base_port)
}
EXPORT_SYMBOL(check_legacy_ioport);
+static int ppc_panic_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ /*
+ * If firmware-assisted dump has been registered then trigger
+ * firmware-assisted dump and let firmware handle everything else.
+ */
+ crash_fadump(NULL, ptr);
+ ppc_md.panic(ptr); /* May not return */
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block ppc_panic_block = {
+ .notifier_call = ppc_panic_event,
+ .priority = INT_MIN /* may not return; must be done last */
+};
+
+void __init setup_panic(void)
+{
+ if (!ppc_md.panic)
+ return;
+ atomic_notifier_chain_register(&panic_notifier_list, &ppc_panic_block);
+}
+
#ifdef CONFIG_CHECK_CACHE_COHERENCY
/*
* For platforms that have configurable cache-coherency. This function
@@ -848,6 +872,9 @@ void __init setup_arch(char **cmdline_p)
/* Probe the machine type, establish ppc_md. */
probe_machine();
+ /* Setup panic notifier if requested by the platform. */
+ setup_panic();
+
/*
* Configure ppc_md.power_save (ppc32 only, 64-bit machines do
* it from their respective probe() function.
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 235319c2574e..966097232d21 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -1238,8 +1238,9 @@ static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
unsigned long vpte, rpte, guest_rpte;
int ret;
struct revmap_entry *rev;
- unsigned long apsize, psize, avpn, pteg, hash;
+ unsigned long apsize, avpn, pteg, hash;
unsigned long new_idx, new_pteg, replace_vpte;
+ int pshift;
hptep = (__be64 *)(old->virt + (idx << 4));
@@ -1298,8 +1299,8 @@ static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
goto out;
rpte = be64_to_cpu(hptep[1]);
- psize = hpte_base_page_size(vpte, rpte);
- avpn = HPTE_V_AVPN_VAL(vpte) & ~((psize - 1) >> 23);
+ pshift = kvmppc_hpte_base_page_shift(vpte, rpte);
+ avpn = HPTE_V_AVPN_VAL(vpte) & ~(((1ul << pshift) - 1) >> 23);
pteg = idx / HPTES_PER_GROUP;
if (vpte & HPTE_V_SECONDARY)
pteg = ~pteg;
@@ -1311,20 +1312,20 @@ static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
offset = (avpn & 0x1f) << 23;
vsid = avpn >> 5;
/* We can find more bits from the pteg value */
- if (psize < (1ULL << 23))
- offset |= ((vsid ^ pteg) & old_hash_mask) * psize;
+ if (pshift < 23)
+ offset |= ((vsid ^ pteg) & old_hash_mask) << pshift;
- hash = vsid ^ (offset / psize);
+ hash = vsid ^ (offset >> pshift);
} else {
unsigned long offset, vsid;
/* We only have 40 - 23 bits of seg_off in avpn */
offset = (avpn & 0x1ffff) << 23;
vsid = avpn >> 17;
- if (psize < (1ULL << 23))
- offset |= ((vsid ^ (vsid << 25) ^ pteg) & old_hash_mask) * psize;
+ if (pshift < 23)
+ offset |= ((vsid ^ (vsid << 25) ^ pteg) & old_hash_mask) << pshift;
- hash = vsid ^ (vsid << 25) ^ (offset / psize);
+ hash = vsid ^ (vsid << 25) ^ (offset >> pshift);
}
new_pteg = hash & new_hash_mask;
@@ -1801,6 +1802,7 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
ssize_t nb;
long int err, ret;
int mmu_ready;
+ int pshift;
if (!access_ok(VERIFY_READ, buf, count))
return -EFAULT;
@@ -1855,6 +1857,9 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
err = -EINVAL;
if (!(v & HPTE_V_VALID))
goto out;
+ pshift = kvmppc_hpte_base_page_shift(v, r);
+ if (pshift <= 0)
+ goto out;
lbuf += 2;
nb += HPTE_SIZE;
@@ -1869,14 +1874,18 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
goto out;
}
if (!mmu_ready && is_vrma_hpte(v)) {
- unsigned long psize = hpte_base_page_size(v, r);
- unsigned long senc = slb_pgsize_encoding(psize);
- unsigned long lpcr;
+ unsigned long senc, lpcr;
+ senc = slb_pgsize_encoding(1ul << pshift);
kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T |
(VRMA_VSID << SLB_VSID_SHIFT_1T);
- lpcr = senc << (LPCR_VRMASD_SH - 4);
- kvmppc_update_lpcr(kvm, lpcr, LPCR_VRMASD);
+ if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
+ lpcr = senc << (LPCR_VRMASD_SH - 4);
+ kvmppc_update_lpcr(kvm, lpcr,
+ LPCR_VRMASD);
+ } else {
+ kvmppc_setup_partition_table(kvm);
+ }
mmu_ready = 1;
}
++i;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 79ea3d9269db..2d46037ce936 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -120,7 +120,6 @@ MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core");
static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
-static void kvmppc_setup_partition_table(struct kvm *kvm);
static inline struct kvm_vcpu *next_runnable_thread(struct kvmppc_vcore *vc,
int *ip)
@@ -3574,7 +3573,7 @@ static void kvmppc_mmu_destroy_hv(struct kvm_vcpu *vcpu)
return;
}
-static void kvmppc_setup_partition_table(struct kvm *kvm)
+void kvmppc_setup_partition_table(struct kvm *kvm)
{
unsigned long dw0, dw1;
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index bf457843e032..0d750d274c4e 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -725,7 +725,8 @@ u64 kvmppc_xive_get_icp(struct kvm_vcpu *vcpu)
/* Return the per-cpu state for state saving/migration */
return (u64)xc->cppr << KVM_REG_PPC_ICP_CPPR_SHIFT |
- (u64)xc->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT;
+ (u64)xc->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT |
+ (u64)0xff << KVM_REG_PPC_ICP_PPRI_SHIFT;
}
int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval)
@@ -1558,7 +1559,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr)
/*
* Restore P and Q. If the interrupt was pending, we
- * force both P and Q, which will trigger a resend.
+ * force Q and !P, which will trigger a resend.
*
* That means that a guest that had both an interrupt
* pending (queued) and Q set will restore with only
@@ -1566,7 +1567,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr)
* is perfectly fine as coalescing interrupts that haven't
* been presented yet is always allowed.
*/
- if (val & KVM_XICS_PRESENTED || val & KVM_XICS_PENDING)
+ if (val & KVM_XICS_PRESENTED && !(val & KVM_XICS_PENDING))
state->old_p = true;
if (val & KVM_XICS_QUEUED || val & KVM_XICS_PENDING)
state->old_q = true;
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 6b6c53c42ac9..1915e86cef6f 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -1407,7 +1407,6 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
int r;
- sigset_t sigsaved;
if (vcpu->mmio_needed) {
vcpu->mmio_needed = 0;
@@ -1448,16 +1447,14 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
#endif
}
- if (vcpu->sigset_active)
- sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
+ kvm_sigset_activate(vcpu);
if (run->immediate_exit)
r = -EINTR;
else
r = kvmppc_vcpu_run(run, vcpu);
- if (vcpu->sigset_active)
- sigprocmask(SIG_SETMASK, &sigsaved, NULL);
+ kvm_sigset_deactivate(vcpu);
return r;
}
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index 3848af167df9..640cf566e986 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -47,7 +47,8 @@
DEFINE_RAW_SPINLOCK(native_tlbie_lock);
-static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize)
+static inline unsigned long ___tlbie(unsigned long vpn, int psize,
+ int apsize, int ssize)
{
unsigned long va;
unsigned int penc;
@@ -100,7 +101,15 @@ static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize)
: "memory");
break;
}
- trace_tlbie(0, 0, va, 0, 0, 0, 0);
+ return va;
+}
+
+static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize)
+{
+ unsigned long rb;
+
+ rb = ___tlbie(vpn, psize, apsize, ssize);
+ trace_tlbie(0, 0, rb, 0, 0, 0, 0);
}
static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize)
@@ -652,7 +661,7 @@ static void native_hpte_clear(void)
if (hpte_v & HPTE_V_VALID) {
hpte_decode(hptep, slot, &psize, &apsize, &ssize, &vpn);
hptep->v = 0;
- __tlbie(vpn, psize, apsize, ssize);
+ ___tlbie(vpn, psize, apsize, ssize);
}
}
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index 46d74e81aff1..d183b4801bdb 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -763,7 +763,8 @@ emit_clear:
func = (u8 *) __bpf_call_base + imm;
/* Save skb pointer if we need to re-cache skb data */
- if (bpf_helper_changes_pkt_data(func))
+ if ((ctx->seen & SEEN_SKB) &&
+ bpf_helper_changes_pkt_data(func))
PPC_BPF_STL(3, 1, bpf_jit_stack_local(ctx));
bpf_jit_emit_func_call(image, ctx, (u64)func);
@@ -772,7 +773,8 @@ emit_clear:
PPC_MR(b2p[BPF_REG_0], 3);
/* refresh skb cache */
- if (bpf_helper_changes_pkt_data(func)) {
+ if ((ctx->seen & SEEN_SKB) &&
+ bpf_helper_changes_pkt_data(func)) {
/* reload skb pointer to r3 */
PPC_BPF_LL(3, 1, bpf_jit_stack_local(ctx));
bpf_jit_emit_skb_loads(image, ctx);
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 9e3da168d54c..fce545774d50 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -410,8 +410,12 @@ static __u64 power_pmu_bhrb_to(u64 addr)
int ret;
__u64 target;
- if (is_kernel_addr(addr))
- return branch_target((unsigned int *)addr);
+ if (is_kernel_addr(addr)) {
+ if (probe_kernel_read(&instr, (void *)addr, sizeof(instr)))
+ return 0;
+
+ return branch_target(&instr);
+ }
/* Userspace: need copy instruction here then translate it */
pagefault_disable();
@@ -1415,7 +1419,7 @@ static int collect_events(struct perf_event *group, int max_count,
int n = 0;
struct perf_event *event;
- if (!is_software_event(group)) {
+ if (group->pmu->task_ctx_nr == perf_hw_context) {
if (n >= max_count)
return -1;
ctrs[n] = group;
@@ -1423,7 +1427,7 @@ static int collect_events(struct perf_event *group, int max_count,
events[n++] = group->hw.config;
}
list_for_each_entry(event, &group->sibling_list, group_entry) {
- if (!is_software_event(event) &&
+ if (event->pmu->task_ctx_nr == perf_hw_context &&
event->state != PERF_EVENT_STATE_OFF) {
if (n >= max_count)
return -1;
diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index 0ead3cd73caa..be4e7f84f70a 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -310,6 +310,19 @@ static int ppc_nest_imc_cpu_offline(unsigned int cpu)
return 0;
/*
+ * Check whether nest_imc is registered. We could end up here if the
+ * cpuhotplug callback registration fails. i.e, callback invokes the
+ * offline path for all successfully registered nodes. At this stage,
+ * nest_imc pmu will not be registered and we should return here.
+ *
+ * We return with a zero since this is not an offline failure. And
+ * cpuhp_setup_state() returns the actual failure reason to the caller,
+ * which in turn will call the cleanup routine.
+ */
+ if (!nest_pmus)
+ return 0;
+
+ /*
* Now that this cpu is one of the designated,
* find a next cpu a) which is online and b) in same chip.
*/
@@ -1171,6 +1184,7 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr)
if (nest_pmus == 1) {
cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE);
kfree(nest_imc_refc);
+ kfree(per_nest_pmu_arr);
}
if (nest_pmus > 0)
@@ -1195,7 +1209,6 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr)
kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs);
kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]);
kfree(pmu_ptr);
- kfree(per_nest_pmu_arr);
return;
}
@@ -1309,6 +1322,8 @@ int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id
ret = nest_pmu_cpumask_init();
if (ret) {
mutex_unlock(&nest_init_lock);
+ kfree(nest_imc_refc);
+ kfree(per_nest_pmu_arr);
goto err_free;
}
}
diff --git a/arch/powerpc/platforms/ps3/setup.c b/arch/powerpc/platforms/ps3/setup.c
index 9dabea6e1443..6244bc849469 100644
--- a/arch/powerpc/platforms/ps3/setup.c
+++ b/arch/powerpc/platforms/ps3/setup.c
@@ -104,6 +104,20 @@ static void __noreturn ps3_halt(void)
ps3_sys_manager_halt(); /* never returns */
}
+static void ps3_panic(char *str)
+{
+ DBG("%s:%d %s\n", __func__, __LINE__, str);
+
+ smp_send_stop();
+ printk("\n");
+ printk(" System does not reboot automatically.\n");
+ printk(" Please press POWER button.\n");
+ printk("\n");
+
+ while(1)
+ lv1_pause(1);
+}
+
#if defined(CONFIG_FB_PS3) || defined(CONFIG_FB_PS3_MODULE) || \
defined(CONFIG_PS3_FLASH) || defined(CONFIG_PS3_FLASH_MODULE)
static void __init prealloc(struct ps3_prealloc *p)
@@ -255,6 +269,7 @@ define_machine(ps3) {
.probe = ps3_probe,
.setup_arch = ps3_setup_arch,
.init_IRQ = ps3_init_IRQ,
+ .panic = ps3_panic,
.get_boot_time = ps3_get_boot_time,
.set_dabr = ps3_set_dabr,
.calibrate_decr = ps3_calibrate_decr,
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 5f1beb8367ac..a8531e012658 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -726,6 +726,7 @@ define_machine(pseries) {
.pcibios_fixup = pSeries_final_fixup,
.restart = rtas_restart,
.halt = rtas_halt,
+ .panic = rtas_os_term,
.get_boot_time = rtas_get_boot_time,
.get_rtc_time = rtas_get_rtc_time,
.set_rtc_time = rtas_set_rtc_time,
diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c
index 44cbf4c12ea1..df95102e732c 100644
--- a/arch/powerpc/sysdev/fsl_msi.c
+++ b/arch/powerpc/sysdev/fsl_msi.c
@@ -354,6 +354,7 @@ static int fsl_of_msi_remove(struct platform_device *ofdev)
}
static struct lock_class_key fsl_msi_irq_class;
+static struct lock_class_key fsl_msi_irq_request_class;
static int fsl_msi_setup_hwirq(struct fsl_msi *msi, struct platform_device *dev,
int offset, int irq_index)
@@ -373,7 +374,8 @@ static int fsl_msi_setup_hwirq(struct fsl_msi *msi, struct platform_device *dev,
dev_err(&dev->dev, "No memory for MSI cascade data\n");
return -ENOMEM;
}
- irq_set_lockdep_class(virt_msir, &fsl_msi_irq_class);
+ irq_set_lockdep_class(virt_msir, &fsl_msi_irq_class,
+ &fsl_msi_irq_request_class);
cascade_data->index = offset;
cascade_data->msi_data = msi;
cascade_data->virq = virt_msir;
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 1b2d8cb49abb..cab24f549e7c 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -1590,7 +1590,7 @@ static void print_bug_trap(struct pt_regs *regs)
printf("kernel BUG at %s:%u!\n",
bug->file, bug->line);
#else
- printf("kernel BUG at %p!\n", (void *)bug->bug_addr);
+ printf("kernel BUG at %px!\n", (void *)bug->bug_addr);
#endif
#endif /* CONFIG_BUG */
}
@@ -2329,7 +2329,7 @@ static void dump_one_paca(int cpu)
p = &paca[cpu];
- printf("paca for cpu 0x%x @ %p:\n", cpu, p);
+ printf("paca for cpu 0x%x @ %px:\n", cpu, p);
printf(" %-*s = %s\n", 20, "possible", cpu_possible(cpu) ? "yes" : "no");
printf(" %-*s = %s\n", 20, "present", cpu_present(cpu) ? "yes" : "no");
@@ -2945,7 +2945,7 @@ static void show_task(struct task_struct *tsk)
(tsk->exit_state & EXIT_DEAD) ? 'E' :
(tsk->state & TASK_INTERRUPTIBLE) ? 'S' : '?';
- printf("%p %016lx %6d %6d %c %2d %s\n", tsk,
+ printf("%px %016lx %6d %6d %c %2d %s\n", tsk,
tsk->thread.ksp,
tsk->pid, tsk->parent->pid,
state, task_thread_info(tsk)->cpu,
@@ -2988,7 +2988,7 @@ static void show_pte(unsigned long addr)
if (setjmp(bus_error_jmp) != 0) {
catch_memory_errors = 0;
- printf("*** Error dumping pte for task %p\n", tsk);
+ printf("*** Error dumping pte for task %px\n", tsk);
return;
}
@@ -3074,7 +3074,7 @@ static void show_tasks(void)
if (setjmp(bus_error_jmp) != 0) {
catch_memory_errors = 0;
- printf("*** Error dumping task %p\n", tsk);
+ printf("*** Error dumping task %px\n", tsk);
return;
}
diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild
index 18158be62a2b..970460a0b492 100644
--- a/arch/riscv/include/asm/Kbuild
+++ b/arch/riscv/include/asm/Kbuild
@@ -40,6 +40,7 @@ generic-y += resource.h
generic-y += scatterlist.h
generic-y += sections.h
generic-y += sembuf.h
+generic-y += serial.h
generic-y += setup.h
generic-y += shmbuf.h
generic-y += shmparam.h
diff --git a/arch/riscv/include/asm/asm.h b/arch/riscv/include/asm/asm.h
index 6cbbb6a68d76..5ad4cb622bed 100644
--- a/arch/riscv/include/asm/asm.h
+++ b/arch/riscv/include/asm/asm.h
@@ -58,17 +58,17 @@
#endif
#if (__SIZEOF_INT__ == 4)
-#define INT __ASM_STR(.word)
-#define SZINT __ASM_STR(4)
-#define LGINT __ASM_STR(2)
+#define RISCV_INT __ASM_STR(.word)
+#define RISCV_SZINT __ASM_STR(4)
+#define RISCV_LGINT __ASM_STR(2)
#else
#error "Unexpected __SIZEOF_INT__"
#endif
#if (__SIZEOF_SHORT__ == 2)
-#define SHORT __ASM_STR(.half)
-#define SZSHORT __ASM_STR(2)
-#define LGSHORT __ASM_STR(1)
+#define RISCV_SHORT __ASM_STR(.half)
+#define RISCV_SZSHORT __ASM_STR(2)
+#define RISCV_LGSHORT __ASM_STR(1)
#else
#error "Unexpected __SIZEOF_SHORT__"
#endif
diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h
index e2e37c57cbeb..e65d1cd89e28 100644
--- a/arch/riscv/include/asm/atomic.h
+++ b/arch/riscv/include/asm/atomic.h
@@ -50,30 +50,30 @@ static __always_inline void atomic64_set(atomic64_t *v, long i)
* have the AQ or RL bits set. These don't return anything, so there's only
* one version to worry about.
*/
-#define ATOMIC_OP(op, asm_op, c_op, I, asm_type, c_type, prefix) \
-static __always_inline void atomic##prefix##_##op(c_type i, atomic##prefix##_t *v) \
-{ \
- __asm__ __volatile__ ( \
- "amo" #asm_op "." #asm_type " zero, %1, %0" \
- : "+A" (v->counter) \
- : "r" (I) \
- : "memory"); \
+#define ATOMIC_OP(op, asm_op, I, asm_type, c_type, prefix) \
+static __always_inline void atomic##prefix##_##op(c_type i, atomic##prefix##_t *v) \
+{ \
+ __asm__ __volatile__ ( \
+ "amo" #asm_op "." #asm_type " zero, %1, %0" \
+ : "+A" (v->counter) \
+ : "r" (I) \
+ : "memory"); \
}
#ifdef CONFIG_GENERIC_ATOMIC64
-#define ATOMIC_OPS(op, asm_op, c_op, I) \
- ATOMIC_OP (op, asm_op, c_op, I, w, int, )
+#define ATOMIC_OPS(op, asm_op, I) \
+ ATOMIC_OP (op, asm_op, I, w, int, )
#else
-#define ATOMIC_OPS(op, asm_op, c_op, I) \
- ATOMIC_OP (op, asm_op, c_op, I, w, int, ) \
- ATOMIC_OP (op, asm_op, c_op, I, d, long, 64)
+#define ATOMIC_OPS(op, asm_op, I) \
+ ATOMIC_OP (op, asm_op, I, w, int, ) \
+ ATOMIC_OP (op, asm_op, I, d, long, 64)
#endif
-ATOMIC_OPS(add, add, +, i)
-ATOMIC_OPS(sub, add, +, -i)
-ATOMIC_OPS(and, and, &, i)
-ATOMIC_OPS( or, or, |, i)
-ATOMIC_OPS(xor, xor, ^, i)
+ATOMIC_OPS(add, add, i)
+ATOMIC_OPS(sub, add, -i)
+ATOMIC_OPS(and, and, i)
+ATOMIC_OPS( or, or, i)
+ATOMIC_OPS(xor, xor, i)
#undef ATOMIC_OP
#undef ATOMIC_OPS
@@ -83,7 +83,7 @@ ATOMIC_OPS(xor, xor, ^, i)
* There's two flavors of these: the arithmatic ops have both fetch and return
* versions, while the logical ops only have fetch versions.
*/
-#define ATOMIC_FETCH_OP(op, asm_op, c_op, I, asm_or, c_or, asm_type, c_type, prefix) \
+#define ATOMIC_FETCH_OP(op, asm_op, I, asm_or, c_or, asm_type, c_type, prefix) \
static __always_inline c_type atomic##prefix##_fetch_##op##c_or(c_type i, atomic##prefix##_t *v) \
{ \
register c_type ret; \
@@ -103,13 +103,13 @@ static __always_inline c_type atomic##prefix##_##op##_return##c_or(c_type i, ato
#ifdef CONFIG_GENERIC_ATOMIC64
#define ATOMIC_OPS(op, asm_op, c_op, I, asm_or, c_or) \
- ATOMIC_FETCH_OP (op, asm_op, c_op, I, asm_or, c_or, w, int, ) \
+ ATOMIC_FETCH_OP (op, asm_op, I, asm_or, c_or, w, int, ) \
ATOMIC_OP_RETURN(op, asm_op, c_op, I, asm_or, c_or, w, int, )
#else
#define ATOMIC_OPS(op, asm_op, c_op, I, asm_or, c_or) \
- ATOMIC_FETCH_OP (op, asm_op, c_op, I, asm_or, c_or, w, int, ) \
+ ATOMIC_FETCH_OP (op, asm_op, I, asm_or, c_or, w, int, ) \
ATOMIC_OP_RETURN(op, asm_op, c_op, I, asm_or, c_or, w, int, ) \
- ATOMIC_FETCH_OP (op, asm_op, c_op, I, asm_or, c_or, d, long, 64) \
+ ATOMIC_FETCH_OP (op, asm_op, I, asm_or, c_or, d, long, 64) \
ATOMIC_OP_RETURN(op, asm_op, c_op, I, asm_or, c_or, d, long, 64)
#endif
@@ -126,28 +126,28 @@ ATOMIC_OPS(sub, add, +, -i, .aqrl, )
#undef ATOMIC_OPS
#ifdef CONFIG_GENERIC_ATOMIC64
-#define ATOMIC_OPS(op, asm_op, c_op, I, asm_or, c_or) \
- ATOMIC_FETCH_OP(op, asm_op, c_op, I, asm_or, c_or, w, int, )
+#define ATOMIC_OPS(op, asm_op, I, asm_or, c_or) \
+ ATOMIC_FETCH_OP(op, asm_op, I, asm_or, c_or, w, int, )
#else
-#define ATOMIC_OPS(op, asm_op, c_op, I, asm_or, c_or) \
- ATOMIC_FETCH_OP(op, asm_op, c_op, I, asm_or, c_or, w, int, ) \
- ATOMIC_FETCH_OP(op, asm_op, c_op, I, asm_or, c_or, d, long, 64)
+#define ATOMIC_OPS(op, asm_op, I, asm_or, c_or) \
+ ATOMIC_FETCH_OP(op, asm_op, I, asm_or, c_or, w, int, ) \
+ ATOMIC_FETCH_OP(op, asm_op, I, asm_or, c_or, d, long, 64)
#endif
-ATOMIC_OPS(and, and, &, i, , _relaxed)
-ATOMIC_OPS(and, and, &, i, .aq , _acquire)
-ATOMIC_OPS(and, and, &, i, .rl , _release)
-ATOMIC_OPS(and, and, &, i, .aqrl, )
+ATOMIC_OPS(and, and, i, , _relaxed)
+ATOMIC_OPS(and, and, i, .aq , _acquire)
+ATOMIC_OPS(and, and, i, .rl , _release)
+ATOMIC_OPS(and, and, i, .aqrl, )
-ATOMIC_OPS( or, or, |, i, , _relaxed)
-ATOMIC_OPS( or, or, |, i, .aq , _acquire)
-ATOMIC_OPS( or, or, |, i, .rl , _release)
-ATOMIC_OPS( or, or, |, i, .aqrl, )
+ATOMIC_OPS( or, or, i, , _relaxed)
+ATOMIC_OPS( or, or, i, .aq , _acquire)
+ATOMIC_OPS( or, or, i, .rl , _release)
+ATOMIC_OPS( or, or, i, .aqrl, )
-ATOMIC_OPS(xor, xor, ^, i, , _relaxed)
-ATOMIC_OPS(xor, xor, ^, i, .aq , _acquire)
-ATOMIC_OPS(xor, xor, ^, i, .rl , _release)
-ATOMIC_OPS(xor, xor, ^, i, .aqrl, )
+ATOMIC_OPS(xor, xor, i, , _relaxed)
+ATOMIC_OPS(xor, xor, i, .aq , _acquire)
+ATOMIC_OPS(xor, xor, i, .rl , _release)
+ATOMIC_OPS(xor, xor, i, .aqrl, )
#undef ATOMIC_OPS
@@ -182,13 +182,13 @@ ATOMIC_OPS(add_negative, add, <, 0)
#undef ATOMIC_OP
#undef ATOMIC_OPS
-#define ATOMIC_OP(op, func_op, c_op, I, c_type, prefix) \
+#define ATOMIC_OP(op, func_op, I, c_type, prefix) \
static __always_inline void atomic##prefix##_##op(atomic##prefix##_t *v) \
{ \
atomic##prefix##_##func_op(I, v); \
}
-#define ATOMIC_FETCH_OP(op, func_op, c_op, I, c_type, prefix) \
+#define ATOMIC_FETCH_OP(op, func_op, I, c_type, prefix) \
static __always_inline c_type atomic##prefix##_fetch_##op(atomic##prefix##_t *v) \
{ \
return atomic##prefix##_fetch_##func_op(I, v); \
@@ -202,16 +202,16 @@ static __always_inline c_type atomic##prefix##_##op##_return(atomic##prefix##_t
#ifdef CONFIG_GENERIC_ATOMIC64
#define ATOMIC_OPS(op, asm_op, c_op, I) \
- ATOMIC_OP (op, asm_op, c_op, I, int, ) \
- ATOMIC_FETCH_OP (op, asm_op, c_op, I, int, ) \
+ ATOMIC_OP (op, asm_op, I, int, ) \
+ ATOMIC_FETCH_OP (op, asm_op, I, int, ) \
ATOMIC_OP_RETURN(op, asm_op, c_op, I, int, )
#else
#define ATOMIC_OPS(op, asm_op, c_op, I) \
- ATOMIC_OP (op, asm_op, c_op, I, int, ) \
- ATOMIC_FETCH_OP (op, asm_op, c_op, I, int, ) \
+ ATOMIC_OP (op, asm_op, I, int, ) \
+ ATOMIC_FETCH_OP (op, asm_op, I, int, ) \
ATOMIC_OP_RETURN(op, asm_op, c_op, I, int, ) \
- ATOMIC_OP (op, asm_op, c_op, I, long, 64) \
- ATOMIC_FETCH_OP (op, asm_op, c_op, I, long, 64) \
+ ATOMIC_OP (op, asm_op, I, long, 64) \
+ ATOMIC_FETCH_OP (op, asm_op, I, long, 64) \
ATOMIC_OP_RETURN(op, asm_op, c_op, I, long, 64)
#endif
@@ -300,8 +300,13 @@ static __always_inline long atomic64_inc_not_zero(atomic64_t *v)
/*
* atomic_{cmp,}xchg is required to have exactly the same ordering semantics as
- * {cmp,}xchg and the operations that return, so they need a barrier. We just
- * use the other implementations directly.
+ * {cmp,}xchg and the operations that return, so they need a barrier.
+ */
+/*
+ * FIXME: atomic_cmpxchg_{acquire,release,relaxed} are all implemented by
+ * assigning the same barrier to both the LR and SC operations, but that might
+ * not make any sense. We're waiting on a memory model specification to
+ * determine exactly what the right thing to do is here.
*/
#define ATOMIC_OP(c_t, prefix, c_or, size, asm_or) \
static __always_inline c_t atomic##prefix##_cmpxchg##c_or(atomic##prefix##_t *v, c_t o, c_t n) \
diff --git a/arch/riscv/include/asm/barrier.h b/arch/riscv/include/asm/barrier.h
index 183534b7c39b..c0319cbf1eec 100644
--- a/arch/riscv/include/asm/barrier.h
+++ b/arch/riscv/include/asm/barrier.h
@@ -39,27 +39,23 @@
#define smp_wmb() RISCV_FENCE(w,w)
/*
- * These fences exist to enforce ordering around the relaxed AMOs. The
- * documentation defines that
- * "
- * atomic_fetch_add();
- * is equivalent to:
- * smp_mb__before_atomic();
- * atomic_fetch_add_relaxed();
- * smp_mb__after_atomic();
- * "
- * So we emit full fences on both sides.
- */
-#define __smb_mb__before_atomic() smp_mb()
-#define __smb_mb__after_atomic() smp_mb()
-
-/*
- * These barriers prevent accesses performed outside a spinlock from being moved
- * inside a spinlock. Since RISC-V sets the aq/rl bits on our spinlock only
- * enforce release consistency, we need full fences here.
+ * This is a very specific barrier: it's currently only used in two places in
+ * the kernel, both in the scheduler. See include/linux/spinlock.h for the two
+ * orderings it guarantees, but the "critical section is RCsc" guarantee
+ * mandates a barrier on RISC-V. The sequence looks like:
+ *
+ * lr.aq lock
+ * sc lock <= LOCKED
+ * smp_mb__after_spinlock()
+ * // critical section
+ * lr lock
+ * sc.rl lock <= UNLOCKED
+ *
+ * The AQ/RL pair provides a RCpc critical section, but there's not really any
+ * way we can take advantage of that here because the ordering is only enforced
+ * on that one lock. Thus, we're just doing a full fence.
*/
-#define smb_mb__before_spinlock() smp_mb()
-#define smb_mb__after_spinlock() smp_mb()
+#define smp_mb__after_spinlock() RISCV_FENCE(rw,rw)
#include <asm-generic/barrier.h>
diff --git a/arch/riscv/include/asm/bitops.h b/arch/riscv/include/asm/bitops.h
index 7c281ef1d583..f30daf26f08f 100644
--- a/arch/riscv/include/asm/bitops.h
+++ b/arch/riscv/include/asm/bitops.h
@@ -67,7 +67,7 @@
: "memory");
#define __test_and_op_bit(op, mod, nr, addr) \
- __test_and_op_bit_ord(op, mod, nr, addr, )
+ __test_and_op_bit_ord(op, mod, nr, addr, .aqrl)
#define __op_bit(op, mod, nr, addr) \
__op_bit_ord(op, mod, nr, addr, )
diff --git a/arch/riscv/include/asm/bug.h b/arch/riscv/include/asm/bug.h
index c3e13764a943..bfc7f099ab1f 100644
--- a/arch/riscv/include/asm/bug.h
+++ b/arch/riscv/include/asm/bug.h
@@ -27,8 +27,8 @@
typedef u32 bug_insn_t;
#ifdef CONFIG_GENERIC_BUG_RELATIVE_POINTERS
-#define __BUG_ENTRY_ADDR INT " 1b - 2b"
-#define __BUG_ENTRY_FILE INT " %0 - 2b"
+#define __BUG_ENTRY_ADDR RISCV_INT " 1b - 2b"
+#define __BUG_ENTRY_FILE RISCV_INT " %0 - 2b"
#else
#define __BUG_ENTRY_ADDR RISCV_PTR " 1b"
#define __BUG_ENTRY_FILE RISCV_PTR " %0"
@@ -38,7 +38,7 @@ typedef u32 bug_insn_t;
#define __BUG_ENTRY \
__BUG_ENTRY_ADDR "\n\t" \
__BUG_ENTRY_FILE "\n\t" \
- SHORT " %1"
+ RISCV_SHORT " %1"
#else
#define __BUG_ENTRY \
__BUG_ENTRY_ADDR
diff --git a/arch/riscv/include/asm/cacheflush.h b/arch/riscv/include/asm/cacheflush.h
index 0595585013b0..efd89a88d2d0 100644
--- a/arch/riscv/include/asm/cacheflush.h
+++ b/arch/riscv/include/asm/cacheflush.h
@@ -18,22 +18,44 @@
#undef flush_icache_range
#undef flush_icache_user_range
+#undef flush_dcache_page
static inline void local_flush_icache_all(void)
{
asm volatile ("fence.i" ::: "memory");
}
+#define PG_dcache_clean PG_arch_1
+
+static inline void flush_dcache_page(struct page *page)
+{
+ if (test_bit(PG_dcache_clean, &page->flags))
+ clear_bit(PG_dcache_clean, &page->flags);
+}
+
+/*
+ * RISC-V doesn't have an instruction to flush parts of the instruction cache,
+ * so instead we just flush the whole thing.
+ */
+#define flush_icache_range(start, end) flush_icache_all()
+#define flush_icache_user_range(vma, pg, addr, len) flush_icache_all()
+
#ifndef CONFIG_SMP
-#define flush_icache_range(start, end) local_flush_icache_all()
-#define flush_icache_user_range(vma, pg, addr, len) local_flush_icache_all()
+#define flush_icache_all() local_flush_icache_all()
+#define flush_icache_mm(mm, local) flush_icache_all()
#else /* CONFIG_SMP */
-#define flush_icache_range(start, end) sbi_remote_fence_i(0)
-#define flush_icache_user_range(vma, pg, addr, len) sbi_remote_fence_i(0)
+#define flush_icache_all() sbi_remote_fence_i(0)
+void flush_icache_mm(struct mm_struct *mm, bool local);
#endif /* CONFIG_SMP */
+/*
+ * Bits in sys_riscv_flush_icache()'s flags argument.
+ */
+#define SYS_RISCV_FLUSH_ICACHE_LOCAL 1UL
+#define SYS_RISCV_FLUSH_ICACHE_ALL (SYS_RISCV_FLUSH_ICACHE_LOCAL)
+
#endif /* _ASM_RISCV_CACHEFLUSH_H */
diff --git a/arch/riscv/include/asm/io.h b/arch/riscv/include/asm/io.h
index c1f32cfcc79b..a82ce599b639 100644
--- a/arch/riscv/include/asm/io.h
+++ b/arch/riscv/include/asm/io.h
@@ -19,6 +19,8 @@
#ifndef _ASM_RISCV_IO_H
#define _ASM_RISCV_IO_H
+#include <linux/types.h>
+
#ifdef CONFIG_MMU
extern void __iomem *ioremap(phys_addr_t offset, unsigned long size);
@@ -32,7 +34,7 @@ extern void __iomem *ioremap(phys_addr_t offset, unsigned long size);
#define ioremap_wc(addr, size) ioremap((addr), (size))
#define ioremap_wt(addr, size) ioremap((addr), (size))
-extern void iounmap(void __iomem *addr);
+extern void iounmap(volatile void __iomem *addr);
#endif /* CONFIG_MMU */
@@ -250,7 +252,7 @@ static inline u64 __raw_readq(const volatile void __iomem *addr)
const ctype *buf = buffer; \
\
do { \
- __raw_writeq(*buf++, addr); \
+ __raw_write ## len(*buf++, addr); \
} while (--count); \
} \
afence; \
@@ -266,9 +268,9 @@ __io_reads_ins(reads, u32, l, __io_br(), __io_ar())
__io_reads_ins(ins, u8, b, __io_pbr(), __io_par())
__io_reads_ins(ins, u16, w, __io_pbr(), __io_par())
__io_reads_ins(ins, u32, l, __io_pbr(), __io_par())
-#define insb(addr, buffer, count) __insb((void __iomem *)addr, buffer, count)
-#define insw(addr, buffer, count) __insw((void __iomem *)addr, buffer, count)
-#define insl(addr, buffer, count) __insl((void __iomem *)addr, buffer, count)
+#define insb(addr, buffer, count) __insb((void __iomem *)(long)addr, buffer, count)
+#define insw(addr, buffer, count) __insw((void __iomem *)(long)addr, buffer, count)
+#define insl(addr, buffer, count) __insl((void __iomem *)(long)addr, buffer, count)
__io_writes_outs(writes, u8, b, __io_bw(), __io_aw())
__io_writes_outs(writes, u16, w, __io_bw(), __io_aw())
@@ -280,9 +282,9 @@ __io_writes_outs(writes, u32, l, __io_bw(), __io_aw())
__io_writes_outs(outs, u8, b, __io_pbw(), __io_paw())
__io_writes_outs(outs, u16, w, __io_pbw(), __io_paw())
__io_writes_outs(outs, u32, l, __io_pbw(), __io_paw())
-#define outsb(addr, buffer, count) __outsb((void __iomem *)addr, buffer, count)
-#define outsw(addr, buffer, count) __outsw((void __iomem *)addr, buffer, count)
-#define outsl(addr, buffer, count) __outsl((void __iomem *)addr, buffer, count)
+#define outsb(addr, buffer, count) __outsb((void __iomem *)(long)addr, buffer, count)
+#define outsw(addr, buffer, count) __outsw((void __iomem *)(long)addr, buffer, count)
+#define outsl(addr, buffer, count) __outsl((void __iomem *)(long)addr, buffer, count)
#ifdef CONFIG_64BIT
__io_reads_ins(reads, u64, q, __io_br(), __io_ar())
diff --git a/arch/riscv/include/asm/mmu.h b/arch/riscv/include/asm/mmu.h
index 66805cba9a27..5df2dccdba12 100644
--- a/arch/riscv/include/asm/mmu.h
+++ b/arch/riscv/include/asm/mmu.h
@@ -19,6 +19,10 @@
typedef struct {
void *vdso;
+#ifdef CONFIG_SMP
+ /* A local icache flush is needed before user execution can resume. */
+ cpumask_t icache_stale_mask;
+#endif
} mm_context_t;
#endif /* __ASSEMBLY__ */
diff --git a/arch/riscv/include/asm/mmu_context.h b/arch/riscv/include/asm/mmu_context.h
index de1fc1631fc4..97424834dce2 100644
--- a/arch/riscv/include/asm/mmu_context.h
+++ b/arch/riscv/include/asm/mmu_context.h
@@ -1,5 +1,6 @@
/*
* Copyright (C) 2012 Regents of the University of California
+ * Copyright (C) 2017 SiFive
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -14,11 +15,13 @@
#ifndef _ASM_RISCV_MMU_CONTEXT_H
#define _ASM_RISCV_MMU_CONTEXT_H
+#include <linux/mm_types.h>
#include <asm-generic/mm_hooks.h>
#include <linux/mm.h>
#include <linux/sched.h>
#include <asm/tlbflush.h>
+#include <asm/cacheflush.h>
static inline void enter_lazy_tlb(struct mm_struct *mm,
struct task_struct *task)
@@ -46,12 +49,54 @@ static inline void set_pgdir(pgd_t *pgd)
csr_write(sptbr, virt_to_pfn(pgd) | SPTBR_MODE);
}
+/*
+ * When necessary, performs a deferred icache flush for the given MM context,
+ * on the local CPU. RISC-V has no direct mechanism for instruction cache
+ * shoot downs, so instead we send an IPI that informs the remote harts they
+ * need to flush their local instruction caches. To avoid pathologically slow
+ * behavior in a common case (a bunch of single-hart processes on a many-hart
+ * machine, ie 'make -j') we avoid the IPIs for harts that are not currently
+ * executing a MM context and instead schedule a deferred local instruction
+ * cache flush to be performed before execution resumes on each hart. This
+ * actually performs that local instruction cache flush, which implicitly only
+ * refers to the current hart.
+ */
+static inline void flush_icache_deferred(struct mm_struct *mm)
+{
+#ifdef CONFIG_SMP
+ unsigned int cpu = smp_processor_id();
+ cpumask_t *mask = &mm->context.icache_stale_mask;
+
+ if (cpumask_test_cpu(cpu, mask)) {
+ cpumask_clear_cpu(cpu, mask);
+ /*
+ * Ensure the remote hart's writes are visible to this hart.
+ * This pairs with a barrier in flush_icache_mm.
+ */
+ smp_mb();
+ local_flush_icache_all();
+ }
+#endif
+}
+
static inline void switch_mm(struct mm_struct *prev,
struct mm_struct *next, struct task_struct *task)
{
if (likely(prev != next)) {
+ /*
+ * Mark the current MM context as inactive, and the next as
+ * active. This is at least used by the icache flushing
+ * routines in order to determine who should
+ */
+ unsigned int cpu = smp_processor_id();
+
+ cpumask_clear_cpu(cpu, mm_cpumask(prev));
+ cpumask_set_cpu(cpu, mm_cpumask(next));
+
set_pgdir(next->pgd);
local_flush_tlb_all();
+
+ flush_icache_deferred(next);
}
}
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 3399257780b2..2cbd92ed1629 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -178,28 +178,6 @@ static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long addr)
#define pte_offset_map(dir, addr) pte_offset_kernel((dir), (addr))
#define pte_unmap(pte) ((void)(pte))
-/*
- * Certain architectures need to do special things when PTEs within
- * a page table are directly modified. Thus, the following hook is
- * made available.
- */
-static inline void set_pte(pte_t *ptep, pte_t pteval)
-{
- *ptep = pteval;
-}
-
-static inline void set_pte_at(struct mm_struct *mm,
- unsigned long addr, pte_t *ptep, pte_t pteval)
-{
- set_pte(ptep, pteval);
-}
-
-static inline void pte_clear(struct mm_struct *mm,
- unsigned long addr, pte_t *ptep)
-{
- set_pte_at(mm, addr, ptep, __pte(0));
-}
-
static inline int pte_present(pte_t pte)
{
return (pte_val(pte) & _PAGE_PRESENT);
@@ -210,21 +188,22 @@ static inline int pte_none(pte_t pte)
return (pte_val(pte) == 0);
}
-/* static inline int pte_read(pte_t pte) */
-
static inline int pte_write(pte_t pte)
{
return pte_val(pte) & _PAGE_WRITE;
}
+static inline int pte_exec(pte_t pte)
+{
+ return pte_val(pte) & _PAGE_EXEC;
+}
+
static inline int pte_huge(pte_t pte)
{
return pte_present(pte)
&& (pte_val(pte) & (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC));
}
-/* static inline int pte_exec(pte_t pte) */
-
static inline int pte_dirty(pte_t pte)
{
return pte_val(pte) & _PAGE_DIRTY;
@@ -311,6 +290,33 @@ static inline int pte_same(pte_t pte_a, pte_t pte_b)
return pte_val(pte_a) == pte_val(pte_b);
}
+/*
+ * Certain architectures need to do special things when PTEs within
+ * a page table are directly modified. Thus, the following hook is
+ * made available.
+ */
+static inline void set_pte(pte_t *ptep, pte_t pteval)
+{
+ *ptep = pteval;
+}
+
+void flush_icache_pte(pte_t pte);
+
+static inline void set_pte_at(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep, pte_t pteval)
+{
+ if (pte_present(pteval) && pte_exec(pteval))
+ flush_icache_pte(pteval);
+
+ set_pte(ptep, pteval);
+}
+
+static inline void pte_clear(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep)
+{
+ set_pte_at(mm, addr, ptep, __pte(0));
+}
+
#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
static inline int ptep_set_access_flags(struct vm_area_struct *vma,
unsigned long address, pte_t *ptep,
diff --git a/arch/riscv/include/asm/spinlock.h b/arch/riscv/include/asm/spinlock.h
index 04c71d938afd..2fd27e8ef1fd 100644
--- a/arch/riscv/include/asm/spinlock.h
+++ b/arch/riscv/include/asm/spinlock.h
@@ -24,7 +24,7 @@
/* FIXME: Replace this with a ticket lock, like MIPS. */
-#define arch_spin_is_locked(x) ((x)->lock != 0)
+#define arch_spin_is_locked(x) (READ_ONCE((x)->lock) != 0)
static inline void arch_spin_unlock(arch_spinlock_t *lock)
{
@@ -58,15 +58,6 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
}
}
-static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
-{
- smp_rmb();
- do {
- cpu_relax();
- } while (arch_spin_is_locked(lock));
- smp_acquire__after_ctrl_dep();
-}
-
/***********************************************************/
static inline void arch_read_lock(arch_rwlock_t *lock)
diff --git a/arch/riscv/include/asm/timex.h b/arch/riscv/include/asm/timex.h
index 3df4932d8964..2f26989cb864 100644
--- a/arch/riscv/include/asm/timex.h
+++ b/arch/riscv/include/asm/timex.h
@@ -18,7 +18,7 @@
typedef unsigned long cycles_t;
-static inline cycles_t get_cycles(void)
+static inline cycles_t get_cycles_inline(void)
{
cycles_t n;
@@ -27,6 +27,7 @@ static inline cycles_t get_cycles(void)
: "=r" (n));
return n;
}
+#define get_cycles get_cycles_inline
#ifdef CONFIG_64BIT
static inline uint64_t get_cycles64(void)
diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h
index 5ee4ae370b5e..715b0f10af58 100644
--- a/arch/riscv/include/asm/tlbflush.h
+++ b/arch/riscv/include/asm/tlbflush.h
@@ -17,7 +17,12 @@
#ifdef CONFIG_MMU
-/* Flush entire local TLB */
+#include <linux/mm_types.h>
+
+/*
+ * Flush entire local TLB. 'sfence.vma' implicitly fences with the instruction
+ * cache as well, so a 'fence.i' is not necessary.
+ */
static inline void local_flush_tlb_all(void)
{
__asm__ __volatile__ ("sfence.vma" : : : "memory");
diff --git a/arch/riscv/include/asm/vdso-syscalls.h b/arch/riscv/include/asm/vdso-syscalls.h
new file mode 100644
index 000000000000..a2ccf1894929
--- /dev/null
+++ b/arch/riscv/include/asm/vdso-syscalls.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2017 SiFive
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _ASM_RISCV_VDSO_SYSCALLS_H
+#define _ASM_RISCV_VDSO_SYSCALLS_H
+
+#ifdef CONFIG_SMP
+
+/* These syscalls are only used by the vDSO and are not in the uapi. */
+#define __NR_riscv_flush_icache (__NR_arch_specific_syscall + 15)
+__SYSCALL(__NR_riscv_flush_icache, sys_riscv_flush_icache)
+
+#endif
+
+#endif /* _ASM_RISCV_VDSO_H */
diff --git a/arch/riscv/include/asm/vdso.h b/arch/riscv/include/asm/vdso.h
index 602f61257553..541544d64c33 100644
--- a/arch/riscv/include/asm/vdso.h
+++ b/arch/riscv/include/asm/vdso.h
@@ -38,4 +38,8 @@ struct vdso_data {
(void __user *)((unsigned long)(base) + __vdso_##name); \
})
+#ifdef CONFIG_SMP
+asmlinkage long sys_riscv_flush_icache(uintptr_t, uintptr_t, uintptr_t);
+#endif
+
#endif /* _ASM_RISCV_VDSO_H */
diff --git a/arch/riscv/include/uapi/asm/Kbuild b/arch/riscv/include/uapi/asm/Kbuild
index 5ded96b06352..7e91f4850475 100644
--- a/arch/riscv/include/uapi/asm/Kbuild
+++ b/arch/riscv/include/uapi/asm/Kbuild
@@ -3,6 +3,7 @@ include include/uapi/asm-generic/Kbuild.asm
generic-y += setup.h
generic-y += unistd.h
+generic-y += bpf_perf_event.h
generic-y += errno.h
generic-y += fcntl.h
generic-y += ioctl.h
diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S
index 76af908f87c1..78f670d70133 100644
--- a/arch/riscv/kernel/head.S
+++ b/arch/riscv/kernel/head.S
@@ -152,6 +152,3 @@ END(_start)
__PAGE_ALIGNED_BSS
/* Empty zero page */
.balign PAGE_SIZE
-ENTRY(empty_zero_page)
- .fill (empty_zero_page + PAGE_SIZE) - ., 1, 0x00
-END(empty_zero_page)
diff --git a/arch/riscv/kernel/riscv_ksyms.c b/arch/riscv/kernel/riscv_ksyms.c
index 23cc81ec9e94..551734248748 100644
--- a/arch/riscv/kernel/riscv_ksyms.c
+++ b/arch/riscv/kernel/riscv_ksyms.c
@@ -12,4 +12,7 @@
/*
* Assembly functions that may be used (directly or indirectly) by modules
*/
+EXPORT_SYMBOL(__clear_user);
EXPORT_SYMBOL(__copy_user);
+EXPORT_SYMBOL(memset);
+EXPORT_SYMBOL(memcpy);
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index de7db114c315..cb7b0c63014e 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -38,10 +38,6 @@
#include <asm/tlbflush.h>
#include <asm/thread_info.h>
-#ifdef CONFIG_HVC_RISCV_SBI
-#include <asm/hvc_riscv_sbi.h>
-#endif
-
#ifdef CONFIG_DUMMY_CONSOLE
struct screen_info screen_info = {
.orig_video_lines = 30,
@@ -58,7 +54,12 @@ static char __initdata builtin_cmdline[COMMAND_LINE_SIZE] = CONFIG_CMDLINE;
#endif /* CONFIG_CMDLINE_BOOL */
unsigned long va_pa_offset;
+EXPORT_SYMBOL(va_pa_offset);
unsigned long pfn_base;
+EXPORT_SYMBOL(pfn_base);
+
+unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss;
+EXPORT_SYMBOL(empty_zero_page);
/* The lucky hart to first increment this variable will boot the other cores */
atomic_t hart_lottery;
@@ -207,13 +208,6 @@ static void __init setup_bootmem(void)
void __init setup_arch(char **cmdline_p)
{
-#if defined(CONFIG_HVC_RISCV_SBI)
- if (likely(early_console == NULL)) {
- early_console = &riscv_sbi_early_console_dev;
- register_console(early_console);
- }
-#endif
-
#ifdef CONFIG_CMDLINE_BOOL
#ifdef CONFIG_CMDLINE_OVERRIDE
strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c
index b4a71ec5906f..6d3962435720 100644
--- a/arch/riscv/kernel/smp.c
+++ b/arch/riscv/kernel/smp.c
@@ -38,6 +38,13 @@ enum ipi_message_type {
IPI_MAX
};
+
+/* Unsupported */
+int setup_profiling_timer(unsigned int multiplier)
+{
+ return -EINVAL;
+}
+
irqreturn_t handle_ipi(void)
{
unsigned long *pending_ipis = &ipi_data[smp_processor_id()].bits;
@@ -108,3 +115,51 @@ void smp_send_reschedule(int cpu)
{
send_ipi_message(cpumask_of(cpu), IPI_RESCHEDULE);
}
+
+/*
+ * Performs an icache flush for the given MM context. RISC-V has no direct
+ * mechanism for instruction cache shoot downs, so instead we send an IPI that
+ * informs the remote harts they need to flush their local instruction caches.
+ * To avoid pathologically slow behavior in a common case (a bunch of
+ * single-hart processes on a many-hart machine, ie 'make -j') we avoid the
+ * IPIs for harts that are not currently executing a MM context and instead
+ * schedule a deferred local instruction cache flush to be performed before
+ * execution resumes on each hart.
+ */
+void flush_icache_mm(struct mm_struct *mm, bool local)
+{
+ unsigned int cpu;
+ cpumask_t others, *mask;
+
+ preempt_disable();
+
+ /* Mark every hart's icache as needing a flush for this MM. */
+ mask = &mm->context.icache_stale_mask;
+ cpumask_setall(mask);
+ /* Flush this hart's I$ now, and mark it as flushed. */
+ cpu = smp_processor_id();
+ cpumask_clear_cpu(cpu, mask);
+ local_flush_icache_all();
+
+ /*
+ * Flush the I$ of other harts concurrently executing, and mark them as
+ * flushed.
+ */
+ cpumask_andnot(&others, mm_cpumask(mm), cpumask_of(cpu));
+ local |= cpumask_empty(&others);
+ if (mm != current->active_mm || !local)
+ sbi_remote_fence_i(others.bits);
+ else {
+ /*
+ * It's assumed that at least one strongly ordered operation is
+ * performed on this hart between setting a hart's cpumask bit
+ * and scheduling this MM context on that hart. Sending an SBI
+ * remote message will do this, but in the case where no
+ * messages are sent we still need to order this hart's writes
+ * with flush_icache_deferred().
+ */
+ smp_mb();
+ }
+
+ preempt_enable();
+}
diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c
index 4351be7d0533..79c78668258e 100644
--- a/arch/riscv/kernel/sys_riscv.c
+++ b/arch/riscv/kernel/sys_riscv.c
@@ -14,8 +14,8 @@
*/
#include <linux/syscalls.h>
-#include <asm/cmpxchg.h>
#include <asm/unistd.h>
+#include <asm/cacheflush.h>
static long riscv_sys_mmap(unsigned long addr, unsigned long len,
unsigned long prot, unsigned long flags,
@@ -47,3 +47,34 @@ SYSCALL_DEFINE6(mmap2, unsigned long, addr, unsigned long, len,
return riscv_sys_mmap(addr, len, prot, flags, fd, offset, 12);
}
#endif /* !CONFIG_64BIT */
+
+#ifdef CONFIG_SMP
+/*
+ * Allows the instruction cache to be flushed from userspace. Despite RISC-V
+ * having a direct 'fence.i' instruction available to userspace (which we
+ * can't trap!), that's not actually viable when running on Linux because the
+ * kernel might schedule a process on another hart. There is no way for
+ * userspace to handle this without invoking the kernel (as it doesn't know the
+ * thread->hart mappings), so we've defined a RISC-V specific system call to
+ * flush the instruction cache.
+ *
+ * sys_riscv_flush_icache() is defined to flush the instruction cache over an
+ * address range, with the flush applying to either all threads or just the
+ * caller. We don't currently do anything with the address range, that's just
+ * in there for forwards compatibility.
+ */
+SYSCALL_DEFINE3(riscv_flush_icache, uintptr_t, start, uintptr_t, end,
+ uintptr_t, flags)
+{
+ struct mm_struct *mm = current->mm;
+ bool local = (flags & SYS_RISCV_FLUSH_ICACHE_LOCAL) != 0;
+
+ /* Check the reserved flags. */
+ if (unlikely(flags & ~SYS_RISCV_FLUSH_ICACHE_ALL))
+ return -EINVAL;
+
+ flush_icache_mm(mm, local);
+
+ return 0;
+}
+#endif
diff --git a/arch/riscv/kernel/syscall_table.c b/arch/riscv/kernel/syscall_table.c
index 4e30dc5fb593..a5bd6401f95e 100644
--- a/arch/riscv/kernel/syscall_table.c
+++ b/arch/riscv/kernel/syscall_table.c
@@ -15,6 +15,7 @@
#include <linux/linkage.h>
#include <linux/syscalls.h>
#include <asm-generic/syscalls.h>
+#include <asm/vdso.h>
#undef __SYSCALL
#define __SYSCALL(nr, call) [nr] = (call),
@@ -22,4 +23,5 @@
void *sys_call_table[__NR_syscalls] = {
[0 ... __NR_syscalls - 1] = sys_ni_syscall,
#include <asm/unistd.h>
+#include <asm/vdso-syscalls.h>
};
diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile
index 523d0a8ac8db..324568d33921 100644
--- a/arch/riscv/kernel/vdso/Makefile
+++ b/arch/riscv/kernel/vdso/Makefile
@@ -1,7 +1,12 @@
# Copied from arch/tile/kernel/vdso/Makefile
# Symbols present in the vdso
-vdso-syms = rt_sigreturn
+vdso-syms = rt_sigreturn
+vdso-syms += gettimeofday
+vdso-syms += clock_gettime
+vdso-syms += clock_getres
+vdso-syms += getcpu
+vdso-syms += flush_icache
# Files to link into the vdso
obj-vdso = $(patsubst %, %.o, $(vdso-syms))
diff --git a/arch/riscv/kernel/vdso/clock_getres.S b/arch/riscv/kernel/vdso/clock_getres.S
new file mode 100644
index 000000000000..edf7e2339648
--- /dev/null
+++ b/arch/riscv/kernel/vdso/clock_getres.S
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2017 SiFive
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/linkage.h>
+#include <asm/unistd.h>
+
+ .text
+/* int __vdso_clock_getres(clockid_t clock_id, struct timespec *res); */
+ENTRY(__vdso_clock_getres)
+ .cfi_startproc
+ /* For now, just do the syscall. */
+ li a7, __NR_clock_getres
+ ecall
+ ret
+ .cfi_endproc
+ENDPROC(__vdso_clock_getres)
diff --git a/arch/riscv/kernel/vdso/clock_gettime.S b/arch/riscv/kernel/vdso/clock_gettime.S
new file mode 100644
index 000000000000..aac65676c6d5
--- /dev/null
+++ b/arch/riscv/kernel/vdso/clock_gettime.S
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2017 SiFive
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/linkage.h>
+#include <asm/unistd.h>
+
+ .text
+/* int __vdso_clock_gettime(clockid_t clock_id, struct timespec *tp); */
+ENTRY(__vdso_clock_gettime)
+ .cfi_startproc
+ /* For now, just do the syscall. */
+ li a7, __NR_clock_gettime
+ ecall
+ ret
+ .cfi_endproc
+ENDPROC(__vdso_clock_gettime)
diff --git a/arch/riscv/kernel/vdso/flush_icache.S b/arch/riscv/kernel/vdso/flush_icache.S
new file mode 100644
index 000000000000..b0fbad74e873
--- /dev/null
+++ b/arch/riscv/kernel/vdso/flush_icache.S
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2017 SiFive
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/linkage.h>
+#include <asm/unistd.h>
+#include <asm/vdso-syscalls.h>
+
+ .text
+/* int __vdso_flush_icache(void *start, void *end, unsigned long flags); */
+ENTRY(__vdso_flush_icache)
+ .cfi_startproc
+#ifdef CONFIG_SMP
+ li a7, __NR_riscv_flush_icache
+ ecall
+#else
+ fence.i
+ li a0, 0
+#endif
+ ret
+ .cfi_endproc
+ENDPROC(__vdso_flush_icache)
diff --git a/arch/riscv/kernel/vdso/getcpu.S b/arch/riscv/kernel/vdso/getcpu.S
new file mode 100644
index 000000000000..cc7e98924484
--- /dev/null
+++ b/arch/riscv/kernel/vdso/getcpu.S
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2017 SiFive
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/linkage.h>
+#include <asm/unistd.h>
+
+ .text
+/* int __vdso_getcpu(unsigned *cpu, unsigned *node, void *unused); */
+ENTRY(__vdso_getcpu)
+ .cfi_startproc
+ /* For now, just do the syscall. */
+ li a7, __NR_getcpu
+ ecall
+ ret
+ .cfi_endproc
+ENDPROC(__vdso_getcpu)
diff --git a/arch/riscv/kernel/vdso/gettimeofday.S b/arch/riscv/kernel/vdso/gettimeofday.S
new file mode 100644
index 000000000000..da85d33e8990
--- /dev/null
+++ b/arch/riscv/kernel/vdso/gettimeofday.S
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2017 SiFive
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/linkage.h>
+#include <asm/unistd.h>
+
+ .text
+/* int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz); */
+ENTRY(__vdso_gettimeofday)
+ .cfi_startproc
+ /* For now, just do the syscall. */
+ li a7, __NR_gettimeofday
+ ecall
+ ret
+ .cfi_endproc
+ENDPROC(__vdso_gettimeofday)
diff --git a/arch/riscv/kernel/vdso/vdso.lds.S b/arch/riscv/kernel/vdso/vdso.lds.S
index 8c9dce95c11d..cd1d47e0724b 100644
--- a/arch/riscv/kernel/vdso/vdso.lds.S
+++ b/arch/riscv/kernel/vdso/vdso.lds.S
@@ -70,8 +70,11 @@ VERSION
LINUX_4.15 {
global:
__vdso_rt_sigreturn;
- __vdso_cmpxchg32;
- __vdso_cmpxchg64;
+ __vdso_gettimeofday;
+ __vdso_clock_gettime;
+ __vdso_clock_getres;
+ __vdso_getcpu;
+ __vdso_flush_icache;
local: *;
};
}
diff --git a/arch/riscv/lib/delay.c b/arch/riscv/lib/delay.c
index 1cc4ac3964b4..dce8ae24c6d3 100644
--- a/arch/riscv/lib/delay.c
+++ b/arch/riscv/lib/delay.c
@@ -84,6 +84,7 @@ void __delay(unsigned long cycles)
while ((unsigned long)(get_cycles() - t0) < cycles)
cpu_relax();
}
+EXPORT_SYMBOL(__delay);
void udelay(unsigned long usecs)
{
diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile
index 81f7d9ce6d88..eb22ab49b3e0 100644
--- a/arch/riscv/mm/Makefile
+++ b/arch/riscv/mm/Makefile
@@ -2,3 +2,4 @@ obj-y += init.o
obj-y += fault.o
obj-y += extable.o
obj-y += ioremap.o
+obj-y += cacheflush.o
diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c
new file mode 100644
index 000000000000..498c0a0814fe
--- /dev/null
+++ b/arch/riscv/mm/cacheflush.c
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2017 SiFive
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <asm/pgtable.h>
+#include <asm/cacheflush.h>
+
+void flush_icache_pte(pte_t pte)
+{
+ struct page *page = pte_page(pte);
+
+ if (!test_and_set_bit(PG_dcache_clean, &page->flags))
+ flush_icache_all();
+}
diff --git a/arch/riscv/mm/ioremap.c b/arch/riscv/mm/ioremap.c
index e99194a4077e..70ef2724cdf6 100644
--- a/arch/riscv/mm/ioremap.c
+++ b/arch/riscv/mm/ioremap.c
@@ -85,7 +85,7 @@ EXPORT_SYMBOL(ioremap);
*
* Caller must ensure there is only one unmapping for the same pointer.
*/
-void iounmap(void __iomem *addr)
+void iounmap(volatile void __iomem *addr)
{
vunmap((void *)((unsigned long)addr & PAGE_MASK));
}
diff --git a/arch/s390/Kbuild b/arch/s390/Kbuild
index eae2c64cf69d..9fdff3fe1a42 100644
--- a/arch/s390/Kbuild
+++ b/arch/s390/Kbuild
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
obj-y += kernel/
obj-y += mm/
obj-$(CONFIG_KVM) += kvm/
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index 6b3f41985f28..de54cfc6109d 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# s390/Makefile
#
@@ -6,10 +7,6 @@
# for "archclean" and "archdep" for cleaning up and making dependencies for
# this architecture
#
-# This file is subject to the terms and conditions of the GNU General Public
-# License. See the file "COPYING" in the main directory of this archive
-# for more details.
-#
# Copyright (C) 1994 by Linus Torvalds
#
diff --git a/arch/s390/appldata/Makefile b/arch/s390/appldata/Makefile
index 99f1cf071304..b06def4a4f2f 100644
--- a/arch/s390/appldata/Makefile
+++ b/arch/s390/appldata/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for the Linux - z/VM Monitor Stream.
#
diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c
index ef3fb1b9201f..cb6e8066b1ad 100644
--- a/arch/s390/appldata/appldata_base.c
+++ b/arch/s390/appldata/appldata_base.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Base infrastructure for Linux-z/VM Monitor Stream, Stage 1.
* Exports appldata_register_ops() and appldata_unregister_ops() for the
diff --git a/arch/s390/appldata/appldata_mem.c b/arch/s390/appldata/appldata_mem.c
index 598df5708501..e68136c3c23a 100644
--- a/arch/s390/appldata/appldata_mem.c
+++ b/arch/s390/appldata/appldata_mem.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Data gathering module for Linux-VM Monitor Stream, Stage 1.
* Collects data related to memory management.
diff --git a/arch/s390/appldata/appldata_net_sum.c b/arch/s390/appldata/appldata_net_sum.c
index 66037d2622b4..8bc14b0d1def 100644
--- a/arch/s390/appldata/appldata_net_sum.c
+++ b/arch/s390/appldata/appldata_net_sum.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Data gathering module for Linux-VM Monitor Stream, Stage 1.
* Collects accumulated network statistics (Packets received/transmitted,
diff --git a/arch/s390/appldata/appldata_os.c b/arch/s390/appldata/appldata_os.c
index 45b3178200ab..433a994b1a89 100644
--- a/arch/s390/appldata/appldata_os.c
+++ b/arch/s390/appldata/appldata_os.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Data gathering module for Linux-VM Monitor Stream, Stage 1.
* Collects misc. OS related data (CPU utilization, running processes).
diff --git a/arch/s390/boot/compressed/vmlinux.scr b/arch/s390/boot/compressed/vmlinux.scr
index f02382ae5c48..42a242597f34 100644
--- a/arch/s390/boot/compressed/vmlinux.scr
+++ b/arch/s390/boot/compressed/vmlinux.scr
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
SECTIONS
{
.rodata.compressed : {
diff --git a/arch/s390/boot/install.sh b/arch/s390/boot/install.sh
index aed3069699bd..bed227f267ae 100644
--- a/arch/s390/boot/install.sh
+++ b/arch/s390/boot/install.sh
@@ -1,11 +1,8 @@
#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
#
# arch/s390x/boot/install.sh
#
-# This file is subject to the terms and conditions of the GNU General Public
-# License. See the file "COPYING" in the main directory of this archive
-# for more details.
-#
# Copyright (C) 1995 by Linus Torvalds
#
# Adapted from code in arch/i386/boot/Makefile by H. Peter Anvin
diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c
index b48e20dd94e9..d60798737d86 100644
--- a/arch/s390/crypto/aes_s390.c
+++ b/arch/s390/crypto/aes_s390.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0+
/*
* Cryptographic API.
*
@@ -11,12 +12,6 @@
* Harald Freudenberger <freude@de.ibm.com>
*
* Derived from "crypto/aes_generic.c"
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
*/
#define KMSG_COMPONENT "aes_s390"
diff --git a/arch/s390/crypto/arch_random.c b/arch/s390/crypto/arch_random.c
index 36aefc07d10c..8720e9203ecf 100644
--- a/arch/s390/crypto/arch_random.c
+++ b/arch/s390/crypto/arch_random.c
@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* s390 arch random implementation.
*
* Copyright IBM Corp. 2017
* Author(s): Harald Freudenberger <freude@de.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
- *
*/
#include <linux/kernel.h>
diff --git a/arch/s390/crypto/crc32-vx.c b/arch/s390/crypto/crc32-vx.c
index 992e630c227b..436865926c26 100644
--- a/arch/s390/crypto/crc32-vx.c
+++ b/arch/s390/crypto/crc32-vx.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Crypto-API module for CRC-32 algorithms implemented with the
* z/Architecture Vector Extension Facility.
diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c
index 0d296662bbf0..5346b5a80bb6 100644
--- a/arch/s390/crypto/des_s390.c
+++ b/arch/s390/crypto/des_s390.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0+
/*
* Cryptographic API.
*
@@ -6,12 +7,6 @@
* Copyright IBM Corp. 2003, 2011
* Author(s): Thomas Spatzier
* Jan Glauber (jan.glauber@de.ibm.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
*/
#include <linux/init.h>
diff --git a/arch/s390/crypto/ghash_s390.c b/arch/s390/crypto/ghash_s390.c
index 564616d48d8b..3b7f96c9eead 100644
--- a/arch/s390/crypto/ghash_s390.c
+++ b/arch/s390/crypto/ghash_s390.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Cryptographic API.
*
diff --git a/arch/s390/crypto/paes_s390.c b/arch/s390/crypto/paes_s390.c
index a4e903ed7e21..003932db8d12 100644
--- a/arch/s390/crypto/paes_s390.c
+++ b/arch/s390/crypto/paes_s390.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Cryptographic API.
*
@@ -7,11 +8,6 @@
* Copyright IBM Corp. 2017
* Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
* Harald Freudenberger <freude@de.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
- *
*/
#define KMSG_COMPONENT "paes_s390"
diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c
index 3e47c4a0f18b..a97a1802cfb4 100644
--- a/arch/s390/crypto/prng.c
+++ b/arch/s390/crypto/prng.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Copyright IBM Corp. 2006, 2015
* Author(s): Jan Glauber <jan.glauber@de.ibm.com>
diff --git a/arch/s390/crypto/sha.h b/arch/s390/crypto/sha.h
index 10f200790079..d6f8258b44df 100644
--- a/arch/s390/crypto/sha.h
+++ b/arch/s390/crypto/sha.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
/*
* Cryptographic API.
*
@@ -5,12 +6,6 @@
*
* Copyright IBM Corp. 2007
* Author(s): Jan Glauber (jang@de.ibm.com)
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
*/
#ifndef _CRYPTO_ARCH_S390_SHA_H
#define _CRYPTO_ARCH_S390_SHA_H
diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c
index c7de53d8da75..a00c17f761c1 100644
--- a/arch/s390/crypto/sha1_s390.c
+++ b/arch/s390/crypto/sha1_s390.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0+
/*
* Cryptographic API.
*
@@ -16,12 +17,6 @@
* Copyright (c) Alan Smithee.
* Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
* Copyright (c) Jean-Francois Dive <jef@linuxbe.org>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
*/
#include <crypto/internal/hash.h>
#include <linux/init.h>
diff --git a/arch/s390/crypto/sha256_s390.c b/arch/s390/crypto/sha256_s390.c
index 53c277999a28..944aa6b237cd 100644
--- a/arch/s390/crypto/sha256_s390.c
+++ b/arch/s390/crypto/sha256_s390.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0+
/*
* Cryptographic API.
*
@@ -6,12 +7,6 @@
* s390 Version:
* Copyright IBM Corp. 2005, 2011
* Author(s): Jan Glauber (jang@de.ibm.com)
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
*/
#include <crypto/internal/hash.h>
#include <linux/init.h>
diff --git a/arch/s390/crypto/sha512_s390.c b/arch/s390/crypto/sha512_s390.c
index 2f4caa1ef123..b17eded532b1 100644
--- a/arch/s390/crypto/sha512_s390.c
+++ b/arch/s390/crypto/sha512_s390.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0+
/*
* Cryptographic API.
*
@@ -5,12 +6,6 @@
*
* Copyright IBM Corp. 2007
* Author(s): Jan Glauber (jang@de.ibm.com)
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
*/
#include <crypto/internal/hash.h>
#include <crypto/sha.h>
diff --git a/arch/s390/crypto/sha_common.c b/arch/s390/crypto/sha_common.c
index c740f77285b2..cf0718d121bc 100644
--- a/arch/s390/crypto/sha_common.c
+++ b/arch/s390/crypto/sha_common.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0+
/*
* Cryptographic API.
*
@@ -5,12 +6,6 @@
*
* Copyright IBM Corp. 2007
* Author(s): Jan Glauber (jang@de.ibm.com)
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
*/
#include <crypto/internal/hash.h>
diff --git a/arch/s390/hypfs/Makefile b/arch/s390/hypfs/Makefile
index 2ee25ba252d6..06f601509ce9 100644
--- a/arch/s390/hypfs/Makefile
+++ b/arch/s390/hypfs/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for the linux hypfs filesystem routines.
#
diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c
index cf8a2d92467f..43bbe63e2992 100644
--- a/arch/s390/hypfs/inode.c
+++ b/arch/s390/hypfs/inode.c
@@ -1,9 +1,9 @@
+// SPDX-License-Identifier: GPL-1.0+
/*
* Hypervisor filesystem for Linux on s390.
*
* Copyright IBM Corp. 2006, 2008
* Author(s): Michael Holzheu <holzheu@de.ibm.com>
- * License: GPL
*/
#define KMSG_COMPONENT "hypfs"
diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild
index 41c211a4d8b1..048450869328 100644
--- a/arch/s390/include/asm/Kbuild
+++ b/arch/s390/include/asm/Kbuild
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
generic-y += asm-offsets.h
generic-y += cacheflush.h
generic-y += clkdev.h
diff --git a/arch/s390/include/asm/alternative.h b/arch/s390/include/asm/alternative.h
index a72002056b54..c2cf7bcdef9b 100644
--- a/arch/s390/include/asm/alternative.h
+++ b/arch/s390/include/asm/alternative.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_S390_ALTERNATIVE_H
#define _ASM_S390_ALTERNATIVE_H
diff --git a/arch/s390/include/asm/ap.h b/arch/s390/include/asm/ap.h
index c02f4aba88a6..cfce6835b109 100644
--- a/arch/s390/include/asm/ap.h
+++ b/arch/s390/include/asm/ap.h
@@ -1,12 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Adjunct processor (AP) interfaces
*
* Copyright IBM Corp. 2017
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
- *
* Author(s): Tony Krowiak <akrowia@linux.vnet.ibm.com>
* Martin Schwidefsky <schwidefsky@de.ibm.com>
* Harald Freudenberger <freude@de.ibm.com>
diff --git a/arch/s390/include/asm/bugs.h b/arch/s390/include/asm/bugs.h
index 0f5bd894f4dc..aa42a179be33 100644
--- a/arch/s390/include/asm/bugs.h
+++ b/arch/s390/include/asm/bugs.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* S390 version
* Copyright IBM Corp. 1999
diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h
index 792cda339af1..dd08db491b89 100644
--- a/arch/s390/include/asm/cpu_mf.h
+++ b/arch/s390/include/asm/cpu_mf.h
@@ -1,13 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* CPU-measurement facilities
*
* Copyright IBM Corp. 2012
* Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
* Jan Glauber <jang@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
*/
#ifndef _ASM_S390_CPU_MF_H
#define _ASM_S390_CPU_MF_H
diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h
index 9a3cb3983c01..1a61b1b997f2 100644
--- a/arch/s390/include/asm/elf.h
+++ b/arch/s390/include/asm/elf.h
@@ -194,13 +194,14 @@ struct arch_elf_state {
#define CORE_DUMP_USE_REGSET
#define ELF_EXEC_PAGESIZE PAGE_SIZE
-/*
- * This is the base location for PIE (ET_DYN with INTERP) loads. On
- * 64-bit, this is raised to 4GB to leave the entire 32-bit address
- * space open for things that want to use the area for 32-bit pointers.
- */
-#define ELF_ET_DYN_BASE (is_compat_task() ? 0x000400000UL : \
- 0x100000000UL)
+/* This is the location that an ET_DYN program is loaded if exec'ed. Typical
+ use of this is to invoke "./ld.so someprog" to test out a new version of
+ the loader. We need to make sure that it is out of the way of the program
+ that it will "exec", and that there is sufficient room for the brk. 64-bit
+ tasks are aligned to 4GB. */
+#define ELF_ET_DYN_BASE (is_compat_task() ? \
+ (STACK_TOP / 3 * 2) : \
+ (STACK_TOP / 3 * 2) & ~((1UL << 32) - 1))
/* This yields a mask that user programs can use to figure out what
instruction set this CPU supports. */
diff --git a/arch/s390/include/asm/kprobes.h b/arch/s390/include/asm/kprobes.h
index 921391f2341e..13de80cf741c 100644
--- a/arch/s390/include/asm/kprobes.h
+++ b/arch/s390/include/asm/kprobes.h
@@ -1,22 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
#ifndef _ASM_S390_KPROBES_H
#define _ASM_S390_KPROBES_H
/*
* Kernel Probes (KProbes)
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
* Copyright IBM Corp. 2002, 2006
*
* 2002-Oct Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index f3a9b5a445b6..e14f381757f6 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -1,12 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* definition for kernel virtual machines on s390
*
* Copyright IBM Corp. 2008, 2009
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
- *
* Author(s): Carsten Otte <cotte@de.ibm.com>
*/
diff --git a/arch/s390/include/asm/kvm_para.h b/arch/s390/include/asm/kvm_para.h
index 41393052ac57..74eeec9c0a80 100644
--- a/arch/s390/include/asm/kvm_para.h
+++ b/arch/s390/include/asm/kvm_para.h
@@ -1,12 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* definition for paravirtual devices on s390
*
* Copyright IBM Corp. 2008
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
- *
* Author(s): Christian Borntraeger <borntraeger@de.ibm.com>
*/
/*
@@ -20,8 +17,6 @@
*
* Copyright IBM Corp. 2007,2008
* Author(s): Christian Borntraeger <borntraeger@de.ibm.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2.
*/
#ifndef __S390_KVM_PARA_H
#define __S390_KVM_PARA_H
diff --git a/arch/s390/include/asm/livepatch.h b/arch/s390/include/asm/livepatch.h
index 6de5c6cb0061..672f95b12d40 100644
--- a/arch/s390/include/asm/livepatch.h
+++ b/arch/s390/include/asm/livepatch.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
/*
* livepatch.h - s390-specific Kernel Live Patching Core
*
@@ -7,13 +8,6 @@
* Jiri Slaby
*/
-/*
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- */
-
#ifndef ASM_LIVEPATCH_H
#define ASM_LIVEPATCH_H
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index f4a07f788f78..65154eaa3714 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -28,7 +28,7 @@ static inline int init_new_context(struct task_struct *tsk,
#ifdef CONFIG_PGSTE
mm->context.alloc_pgste = page_table_allocate_pgste ||
test_thread_flag(TIF_PGSTE) ||
- current->mm->context.alloc_pgste;
+ (current->mm && current->mm->context.alloc_pgste);
mm->context.has_pgste = 0;
mm->context.use_skey = 0;
mm->context.use_cmma = 0;
diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h
index d6c9d1e0dc2d..b9c0e361748b 100644
--- a/arch/s390/include/asm/perf_event.h
+++ b/arch/s390/include/asm/perf_event.h
@@ -40,6 +40,7 @@ struct pt_regs;
extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
extern unsigned long perf_misc_flags(struct pt_regs *regs);
#define perf_misc_flags(regs) perf_misc_flags(regs)
+#define perf_arch_bpf_user_pt_regs(regs) &regs->user_regs
/* Perf pt_regs extension for sample-data-entry indicators */
struct perf_sf_sde_regs {
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index d7fe9838084d..0a6b0286c32e 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -709,7 +709,7 @@ static inline unsigned long pmd_pfn(pmd_t pmd)
return (pmd_val(pmd) & origin_mask) >> PAGE_SHIFT;
}
-#define __HAVE_ARCH_PMD_WRITE
+#define pmd_write pmd_write
static inline int pmd_write(pmd_t pmd)
{
return (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE) != 0;
diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h
index a3788dafc0e1..6f70d81c40f2 100644
--- a/arch/s390/include/asm/ptrace.h
+++ b/arch/s390/include/asm/ptrace.h
@@ -74,9 +74,14 @@ enum {
*/
struct pt_regs
{
- unsigned long args[1];
- psw_t psw;
- unsigned long gprs[NUM_GPRS];
+ union {
+ user_pt_regs user_regs;
+ struct {
+ unsigned long args[1];
+ psw_t psw;
+ unsigned long gprs[NUM_GPRS];
+ };
+ };
unsigned long orig_gpr2;
unsigned int int_code;
unsigned int int_parm;
diff --git a/arch/s390/include/asm/segment.h b/arch/s390/include/asm/segment.h
index 8bfce3475b1c..97a0582b8d0f 100644
--- a/arch/s390/include/asm/segment.h
+++ b/arch/s390/include/asm/segment.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_SEGMENT_H
#define _ASM_SEGMENT_H
diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h
index ec7b476c1ac5..c61b2cc1a8a8 100644
--- a/arch/s390/include/asm/switch_to.h
+++ b/arch/s390/include/asm/switch_to.h
@@ -30,21 +30,20 @@ static inline void restore_access_regs(unsigned int *acrs)
asm volatile("lam 0,15,%0" : : "Q" (*(acrstype *)acrs));
}
-#define switch_to(prev,next,last) do { \
- if (prev->mm) { \
- save_fpu_regs(); \
- save_access_regs(&prev->thread.acrs[0]); \
- save_ri_cb(prev->thread.ri_cb); \
- save_gs_cb(prev->thread.gs_cb); \
- } \
+#define switch_to(prev, next, last) do { \
+ /* save_fpu_regs() sets the CIF_FPU flag, which enforces \
+ * a restore of the floating point / vector registers as \
+ * soon as the next task returns to user space \
+ */ \
+ save_fpu_regs(); \
+ save_access_regs(&prev->thread.acrs[0]); \
+ save_ri_cb(prev->thread.ri_cb); \
+ save_gs_cb(prev->thread.gs_cb); \
update_cr_regs(next); \
- if (next->mm) { \
- set_cpu_flag(CIF_FPU); \
- restore_access_regs(&next->thread.acrs[0]); \
- restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb); \
- restore_gs_cb(next->thread.gs_cb); \
- } \
- prev = __switch_to(prev,next); \
+ restore_access_regs(&next->thread.acrs[0]); \
+ restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb); \
+ restore_gs_cb(next->thread.gs_cb); \
+ prev = __switch_to(prev, next); \
} while (0)
#endif /* __ASM_SWITCH_TO_H */
diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h
index 6bc941be6921..96f9a9151fde 100644
--- a/arch/s390/include/asm/syscall.h
+++ b/arch/s390/include/asm/syscall.h
@@ -1,12 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Access to user system call parameters and results
*
* Copyright IBM Corp. 2008
* Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
*/
#ifndef _ASM_SYSCALL_H
diff --git a/arch/s390/include/asm/sysinfo.h b/arch/s390/include/asm/sysinfo.h
index a702cb9d4269..25057c118d56 100644
--- a/arch/s390/include/asm/sysinfo.h
+++ b/arch/s390/include/asm/sysinfo.h
@@ -1,12 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* definition for store system information stsi
*
* Copyright IBM Corp. 2001, 2008
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
- *
* Author(s): Ulrich Weigand <weigand@de.ibm.com>
* Christian Borntraeger <borntraeger@de.ibm.com>
*/
diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h
index 1807229b292f..cca406fdbe51 100644
--- a/arch/s390/include/asm/topology.h
+++ b/arch/s390/include/asm/topology.h
@@ -53,6 +53,7 @@ const struct cpumask *cpu_coregroup_mask(int cpu);
static inline void topology_init_early(void) { }
static inline void topology_schedule_update(void) { }
static inline int topology_cpu_init(struct cpu *cpu) { return 0; }
+static inline int topology_cpu_dedicated(int cpu_nr) { return 0; }
static inline void topology_expect_change(void) { }
#endif /* CONFIG_SCHED_TOPOLOGY */
diff --git a/arch/s390/include/asm/vga.h b/arch/s390/include/asm/vga.h
index d375526c261f..605dc46bac5e 100644
--- a/arch/s390/include/asm/vga.h
+++ b/arch/s390/include/asm/vga.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_S390_VGA_H
#define _ASM_S390_VGA_H
diff --git a/arch/s390/include/uapi/asm/Kbuild b/arch/s390/include/uapi/asm/Kbuild
index 098f28778a13..92b7c9b3e641 100644
--- a/arch/s390/include/uapi/asm/Kbuild
+++ b/arch/s390/include/uapi/asm/Kbuild
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
# UAPI Header export list
include include/uapi/asm-generic/Kbuild.asm
diff --git a/arch/s390/include/uapi/asm/bpf_perf_event.h b/arch/s390/include/uapi/asm/bpf_perf_event.h
new file mode 100644
index 000000000000..cefe7c7cd4f6
--- /dev/null
+++ b/arch/s390/include/uapi/asm/bpf_perf_event.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _UAPI__ASM_BPF_PERF_EVENT_H__
+#define _UAPI__ASM_BPF_PERF_EVENT_H__
+
+#include <asm/ptrace.h>
+
+typedef user_pt_regs bpf_user_pt_regs_t;
+
+#endif /* _UAPI__ASM_BPF_PERF_EVENT_H__ */
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
index 9ad172dcd912..38535a57fef8 100644
--- a/arch/s390/include/uapi/asm/kvm.h
+++ b/arch/s390/include/uapi/asm/kvm.h
@@ -6,10 +6,6 @@
*
* Copyright IBM Corp. 2008
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
- *
* Author(s): Carsten Otte <cotte@de.ibm.com>
* Christian Borntraeger <borntraeger@de.ibm.com>
*/
diff --git a/arch/s390/include/uapi/asm/kvm_para.h b/arch/s390/include/uapi/asm/kvm_para.h
index 0dc86b3a7cb0..b9ab584adf43 100644
--- a/arch/s390/include/uapi/asm/kvm_para.h
+++ b/arch/s390/include/uapi/asm/kvm_para.h
@@ -4,9 +4,5 @@
*
* Copyright IBM Corp. 2008
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
- *
* Author(s): Christian Borntraeger <borntraeger@de.ibm.com>
*/
diff --git a/arch/s390/include/uapi/asm/kvm_perf.h b/arch/s390/include/uapi/asm/kvm_perf.h
index c36c97ffdc6f..84606b8cc49e 100644
--- a/arch/s390/include/uapi/asm/kvm_perf.h
+++ b/arch/s390/include/uapi/asm/kvm_perf.h
@@ -4,10 +4,6 @@
*
* Copyright 2014 IBM Corp.
* Author(s): Alexander Yarygin <yarygin@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
*/
#ifndef __LINUX_KVM_PERF_S390_H
diff --git a/arch/s390/include/uapi/asm/perf_regs.h b/arch/s390/include/uapi/asm/perf_regs.h
index 7c8564f98205..d17dd9e5d516 100644
--- a/arch/s390/include/uapi/asm/perf_regs.h
+++ b/arch/s390/include/uapi/asm/perf_regs.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#ifndef _ASM_S390_PERF_REGS_H
#define _ASM_S390_PERF_REGS_H
diff --git a/arch/s390/include/uapi/asm/ptrace.h b/arch/s390/include/uapi/asm/ptrace.h
index 0d23c8ff2900..543dd70e12c8 100644
--- a/arch/s390/include/uapi/asm/ptrace.h
+++ b/arch/s390/include/uapi/asm/ptrace.h
@@ -162,7 +162,7 @@
#define GPR_SIZE 8
#define CR_SIZE 8
-#define STACK_FRAME_OVERHEAD 160 /* size of minimum stack frame */
+#define STACK_FRAME_OVERHEAD 160 /* size of minimum stack frame */
#endif /* __s390x__ */
@@ -179,17 +179,16 @@
#define ACR_SIZE 4
-#define PTRACE_OLDSETOPTIONS 21
+#define PTRACE_OLDSETOPTIONS 21
#ifndef __ASSEMBLY__
#include <linux/stddef.h>
#include <linux/types.h>
-typedef union
-{
- float f;
- double d;
- __u64 ui;
+typedef union {
+ float f;
+ double d;
+ __u64 ui;
struct
{
__u32 hi;
@@ -197,23 +196,21 @@ typedef union
} fp;
} freg_t;
-typedef struct
-{
- __u32 fpc;
+typedef struct {
+ __u32 fpc;
__u32 pad;
- freg_t fprs[NUM_FPRS];
+ freg_t fprs[NUM_FPRS];
} s390_fp_regs;
-#define FPC_EXCEPTION_MASK 0xF8000000
-#define FPC_FLAGS_MASK 0x00F80000
-#define FPC_DXC_MASK 0x0000FF00
-#define FPC_RM_MASK 0x00000003
+#define FPC_EXCEPTION_MASK 0xF8000000
+#define FPC_FLAGS_MASK 0x00F80000
+#define FPC_DXC_MASK 0x0000FF00
+#define FPC_RM_MASK 0x00000003
/* this typedef defines how a Program Status Word looks like */
-typedef struct
-{
- unsigned long mask;
- unsigned long addr;
+typedef struct {
+ unsigned long mask;
+ unsigned long addr;
} __attribute__ ((aligned(8))) psw_t;
#ifndef __s390x__
@@ -282,8 +279,7 @@ typedef struct
/*
* The s390_regs structure is used to define the elf_gregset_t.
*/
-typedef struct
-{
+typedef struct {
psw_t psw;
unsigned long gprs[NUM_GPRS];
unsigned int acrs[NUM_ACRS];
@@ -291,24 +287,32 @@ typedef struct
} s390_regs;
/*
+ * The user_pt_regs structure exports the beginning of
+ * the in-kernel pt_regs structure to user space.
+ */
+typedef struct {
+ unsigned long args[1];
+ psw_t psw;
+ unsigned long gprs[NUM_GPRS];
+} user_pt_regs;
+
+/*
* Now for the user space program event recording (trace) definitions.
* The following structures are used only for the ptrace interface, don't
* touch or even look at it if you don't want to modify the user-space
* ptrace interface. In particular stay away from it for in-kernel PER.
*/
-typedef struct
-{
+typedef struct {
unsigned long cr[NUM_CR_WORDS];
} per_cr_words;
#define PER_EM_MASK 0xE8000000UL
-typedef struct
-{
+typedef struct {
#ifdef __s390x__
- unsigned : 32;
+ unsigned : 32;
#endif /* __s390x__ */
- unsigned em_branching : 1;
+ unsigned em_branching : 1;
unsigned em_instruction_fetch : 1;
/*
* Switching on storage alteration automatically fixes
@@ -317,44 +321,41 @@ typedef struct
unsigned em_storage_alteration : 1;
unsigned em_gpr_alt_unused : 1;
unsigned em_store_real_address : 1;
- unsigned : 3;
+ unsigned : 3;
unsigned branch_addr_ctl : 1;
- unsigned : 1;
+ unsigned : 1;
unsigned storage_alt_space_ctl : 1;
- unsigned : 21;
+ unsigned : 21;
unsigned long starting_addr;
unsigned long ending_addr;
} per_cr_bits;
-typedef struct
-{
+typedef struct {
unsigned short perc_atmid;
unsigned long address;
unsigned char access_id;
} per_lowcore_words;
-typedef struct
-{
- unsigned perc_branching : 1;
+typedef struct {
+ unsigned perc_branching : 1;
unsigned perc_instruction_fetch : 1;
unsigned perc_storage_alteration : 1;
- unsigned perc_gpr_alt_unused : 1;
+ unsigned perc_gpr_alt_unused : 1;
unsigned perc_store_real_address : 1;
- unsigned : 3;
- unsigned atmid_psw_bit_31 : 1;
- unsigned atmid_validity_bit : 1;
- unsigned atmid_psw_bit_32 : 1;
- unsigned atmid_psw_bit_5 : 1;
- unsigned atmid_psw_bit_16 : 1;
- unsigned atmid_psw_bit_17 : 1;
- unsigned si : 2;
+ unsigned : 3;
+ unsigned atmid_psw_bit_31 : 1;
+ unsigned atmid_validity_bit : 1;
+ unsigned atmid_psw_bit_32 : 1;
+ unsigned atmid_psw_bit_5 : 1;
+ unsigned atmid_psw_bit_16 : 1;
+ unsigned atmid_psw_bit_17 : 1;
+ unsigned si : 2;
unsigned long address;
- unsigned : 4;
- unsigned access_id : 4;
+ unsigned : 4;
+ unsigned access_id : 4;
} per_lowcore_bits;
-typedef struct
-{
+typedef struct {
union {
per_cr_words words;
per_cr_bits bits;
@@ -364,9 +365,9 @@ typedef struct
* the kernel always sets them to zero. To enable single
* stepping use ptrace(PTRACE_SINGLESTEP) instead.
*/
- unsigned single_step : 1;
+ unsigned single_step : 1;
unsigned instruction_fetch : 1;
- unsigned : 30;
+ unsigned : 30;
/*
* These addresses are copied into cr10 & cr11 if single
* stepping is switched off
@@ -376,11 +377,10 @@ typedef struct
union {
per_lowcore_words words;
per_lowcore_bits bits;
- } lowcore;
+ } lowcore;
} per_struct;
-typedef struct
-{
+typedef struct {
unsigned int len;
unsigned long kernel_addr;
unsigned long process_addr;
@@ -390,12 +390,12 @@ typedef struct
* S/390 specific non posix ptrace requests. I chose unusual values so
* they are unlikely to clash with future ptrace definitions.
*/
-#define PTRACE_PEEKUSR_AREA 0x5000
-#define PTRACE_POKEUSR_AREA 0x5001
+#define PTRACE_PEEKUSR_AREA 0x5000
+#define PTRACE_POKEUSR_AREA 0x5001
#define PTRACE_PEEKTEXT_AREA 0x5002
#define PTRACE_PEEKDATA_AREA 0x5003
#define PTRACE_POKETEXT_AREA 0x5004
-#define PTRACE_POKEDATA_AREA 0x5005
+#define PTRACE_POKEDATA_AREA 0x5005
#define PTRACE_GET_LAST_BREAK 0x5006
#define PTRACE_PEEK_SYSTEM_CALL 0x5007
#define PTRACE_POKE_SYSTEM_CALL 0x5008
@@ -413,21 +413,19 @@ typedef struct
* PT_PROT definition is loosely based on hppa bsd definition in
* gdb/hppab-nat.c
*/
-#define PTRACE_PROT 21
+#define PTRACE_PROT 21
-typedef enum
-{
+typedef enum {
ptprot_set_access_watchpoint,
ptprot_set_write_watchpoint,
ptprot_disable_watchpoint
} ptprot_flags;
-typedef struct
-{
+typedef struct {
unsigned long lowaddr;
unsigned long hiaddr;
ptprot_flags prot;
-} ptprot_area;
+} ptprot_area;
/* Sequence of bytes for breakpoint illegal instruction. */
#define S390_BREAKPOINT {0x0,0x1}
@@ -439,8 +437,7 @@ typedef struct
* The user_regs_struct defines the way the user registers are
* store on the stack for signal handling.
*/
-struct user_regs_struct
-{
+struct user_regs_struct {
psw_t psw;
unsigned long gprs[NUM_GPRS];
unsigned int acrs[NUM_ACRS];
diff --git a/arch/s390/include/uapi/asm/sthyi.h b/arch/s390/include/uapi/asm/sthyi.h
index ec113db4eb7e..b1b022316983 100644
--- a/arch/s390/include/uapi/asm/sthyi.h
+++ b/arch/s390/include/uapi/asm/sthyi.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#ifndef _UAPI_ASM_STHYI_H
#define _UAPI_ASM_STHYI_H
diff --git a/arch/s390/include/uapi/asm/virtio-ccw.h b/arch/s390/include/uapi/asm/virtio-ccw.h
index 967aad390105..2b605f7e8483 100644
--- a/arch/s390/include/uapi/asm/virtio-ccw.h
+++ b/arch/s390/include/uapi/asm/virtio-ccw.h
@@ -1,13 +1,9 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
/*
* Definitions for virtio-ccw devices.
*
* Copyright IBM Corp. 2013
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
- *
* Author(s): Cornelia Huck <cornelia.huck@de.ibm.com>
*/
#ifndef __KVM_VIRTIO_CCW_H
diff --git a/arch/s390/include/uapi/asm/vmcp.h b/arch/s390/include/uapi/asm/vmcp.h
index 4caf71714a55..aeaaa030030e 100644
--- a/arch/s390/include/uapi/asm/vmcp.h
+++ b/arch/s390/include/uapi/asm/vmcp.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
* Copyright IBM Corp. 2004, 2005
* Interface implementation for communication with the z/VM control program
diff --git a/arch/s390/include/uapi/asm/zcrypt.h b/arch/s390/include/uapi/asm/zcrypt.h
index 137ef473584e..d568307321fc 100644
--- a/arch/s390/include/uapi/asm/zcrypt.h
+++ b/arch/s390/include/uapi/asm/zcrypt.h
@@ -9,20 +9,6 @@
* Eric Rossman (edrossma@us.ibm.com)
*
* Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#ifndef __ASM_S390_ZCRYPT_H
diff --git a/arch/s390/kernel/alternative.c b/arch/s390/kernel/alternative.c
index 315986a06cf5..574e77622c04 100644
--- a/arch/s390/kernel/alternative.c
+++ b/arch/s390/kernel/alternative.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/module.h>
#include <asm/alternative.h>
#include <asm/facility.h>
diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c
index f04db3779b34..59eea9c65d3e 100644
--- a/arch/s390/kernel/compat_linux.c
+++ b/arch/s390/kernel/compat_linux.c
@@ -263,6 +263,7 @@ COMPAT_SYSCALL_DEFINE2(s390_setgroups16, int, gidsetsize, u16 __user *, grouplis
return retval;
}
+ groups_sort(group_info);
retval = set_current_groups(group_info);
put_group_info(group_info);
diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c
index 58b9e127b615..80e974adb9e8 100644
--- a/arch/s390/kernel/debug.c
+++ b/arch/s390/kernel/debug.c
@@ -1392,7 +1392,7 @@ int debug_dflt_header_fn(debug_info_t *id, struct debug_view *view,
else
except_str = "-";
caller = (unsigned long) entry->caller;
- rc += sprintf(out_buf, "%02i %011ld:%06lu %1u %1s %02i %p ",
+ rc += sprintf(out_buf, "%02i %011ld:%06lu %1u %1s %02i %pK ",
area, sec, usec, level, except_str,
entry->id.fields.cpuid, (void *)caller);
return rc;
diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c
index 3be829721cf9..b2c68fbf2634 100644
--- a/arch/s390/kernel/dis.c
+++ b/arch/s390/kernel/dis.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Disassemble s390 instructions.
*
@@ -396,9 +397,14 @@ struct s390_insn *find_insn(unsigned char *code)
unsigned char opfrag;
int i;
+ /* Search the opcode offset table to find an entry which
+ * matches the beginning of the opcode. If there is no match
+ * the last entry will be used, which is the default entry for
+ * unknown instructions as well as 1-byte opcode instructions.
+ */
for (i = 0; i < ARRAY_SIZE(opcode_offset); i++) {
entry = &opcode_offset[i];
- if (entry->opcode == code[0] || entry->opcode == 0)
+ if (entry->opcode == code[0])
break;
}
@@ -543,7 +549,7 @@ void show_code(struct pt_regs *regs)
start += opsize;
pr_cont("%s", buffer);
ptr = buffer;
- ptr += sprintf(ptr, "\n\t ");
+ ptr += sprintf(ptr, "\n ");
hops++;
}
pr_cont("\n");
diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c
index 2aa545dca4d5..5b23c4f6e50c 100644
--- a/arch/s390/kernel/dumpstack.c
+++ b/arch/s390/kernel/dumpstack.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Stack dumping functions
*
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index a316cd6999ad..9e5f6cd8e4c2 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -180,18 +180,17 @@ _PIF_WORK = (_PIF_PER_TRAP | _PIF_SYSCALL_RESTART)
*/
ENTRY(__switch_to)
stmg %r6,%r15,__SF_GPRS(%r15) # store gprs of prev task
- lgr %r1,%r2
- aghi %r1,__TASK_thread # thread_struct of prev task
- lg %r5,__TASK_stack(%r3) # start of kernel stack of next
- stg %r15,__THREAD_ksp(%r1) # store kernel stack of prev
- lgr %r1,%r3
- aghi %r1,__TASK_thread # thread_struct of next task
+ lghi %r4,__TASK_stack
+ lghi %r1,__TASK_thread
+ lg %r5,0(%r4,%r3) # start of kernel stack of next
+ stg %r15,__THREAD_ksp(%r1,%r2) # store kernel stack of prev
lgr %r15,%r5
aghi %r15,STACK_INIT # end of kernel stack of next
stg %r3,__LC_CURRENT # store task struct of next
stg %r15,__LC_KERNEL_STACK # store end of kernel stack
- lg %r15,__THREAD_ksp(%r1) # load kernel stack of next
- mvc __LC_CURRENT_PID(4,%r0),__TASK_pid(%r3) # store pid of next
+ lg %r15,__THREAD_ksp(%r1,%r3) # load kernel stack of next
+ aghi %r3,__TASK_pid
+ mvc __LC_CURRENT_PID(4,%r0),0(%r3) # store pid of next
lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task
TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_LPP
bzr %r14
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index 310e59e6eb4b..8ecb8726ac47 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* ipl/reipl/dump support for Linux on s390.
*
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index 1a6521af1751..af3722c28fd9 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -1,20 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0+
/*
* Kernel Probes (KProbes)
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
* Copyright IBM Corp. 2002, 2006
*
* s390 port, used ppc64 as template. Mike Grundy <grundym@us.ibm.com>
diff --git a/arch/s390/kernel/lgr.c b/arch/s390/kernel/lgr.c
index bf9622f0e6b1..452502f9a0d9 100644
--- a/arch/s390/kernel/lgr.c
+++ b/arch/s390/kernel/lgr.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Linux Guest Relocation (LGR) detection
*
diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c
index 7b87991416fd..b7abfad4fd7d 100644
--- a/arch/s390/kernel/module.c
+++ b/arch/s390/kernel/module.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0+
/*
* Kernel module help for s390.
*
@@ -8,20 +9,6 @@
*
* based on i386 version
* Copyright (C) 2001 Rusty Russell.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/module.h>
#include <linux/elf.h>
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index 6ff169253cae..c7a627620e5e 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Machine check handler
*
diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
index 746d03423333..cc085e2d2ce9 100644
--- a/arch/s390/kernel/perf_cpum_cf.c
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Performance event support for s390x - CPU-measurement Counter Facility
*
* Copyright IBM Corp. 2012, 2017
* Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
*/
#define KMSG_COMPONENT "cpum_cf"
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index 227b38bd82c9..1c9ddd7aa5ec 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Performance event support for the System z CPU-measurement Sampling Facility
*
* Copyright IBM Corp. 2013
* Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
*/
#define KMSG_COMPONENT "cpum_sf"
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c
index 93a386f4a3b5..0d770e513abf 100644
--- a/arch/s390/kernel/perf_event.c
+++ b/arch/s390/kernel/perf_event.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Performance event support for s390x
*
* Copyright IBM Corp. 2012, 2013
* Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
*/
#define KMSG_COMPONENT "perf"
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
diff --git a/arch/s390/kernel/perf_regs.c b/arch/s390/kernel/perf_regs.c
index f8603ebed669..54e2d634b849 100644
--- a/arch/s390/kernel/perf_regs.c
+++ b/arch/s390/kernel/perf_regs.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/perf_event.h>
#include <linux/perf_regs.h>
#include <linux/kernel.h>
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index 26c0523c1488..cd3df5514552 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -1651,6 +1651,14 @@ static const struct user_regset s390_compat_regsets[] = {
.set = s390_gs_cb_set,
},
{
+ .core_note_type = NT_S390_GS_BC,
+ .n = sizeof(struct gs_cb) / sizeof(__u64),
+ .size = sizeof(__u64),
+ .align = sizeof(__u64),
+ .get = s390_gs_bc_get,
+ .set = s390_gs_bc_set,
+ },
+ {
.core_note_type = NT_S390_RI_CB,
.n = sizeof(struct runtime_instr_cb) / sizeof(__u64),
.size = sizeof(__u64),
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 090053cf279b..793da97f9a6e 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* S390 version
* Copyright IBM Corp. 1999, 2012
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index cd4334e80b64..b8c1a85bcf2d 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -55,6 +55,7 @@
#include <asm/sigp.h>
#include <asm/idle.h>
#include <asm/nmi.h>
+#include <asm/topology.h>
#include "entry.h"
enum {
diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c
index e66687dc6144..460dcfba7d4e 100644
--- a/arch/s390/kernel/stacktrace.c
+++ b/arch/s390/kernel/stacktrace.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Stack trace management functions
*
diff --git a/arch/s390/kernel/sthyi.c b/arch/s390/kernel/sthyi.c
index 12981e197f01..80b862e9c53c 100644
--- a/arch/s390/kernel/sthyi.c
+++ b/arch/s390/kernel/sthyi.c
@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* store hypervisor information instruction emulation functions.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
- *
* Copyright IBM Corp. 2016
* Author(s): Janosch Frank <frankja@linux.vnet.ibm.com>
*/
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index 308a7b63348b..f7fc63385553 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -370,10 +370,10 @@ SYSCALL(sys_recvmmsg,compat_sys_recvmmsg)
SYSCALL(sys_sendmmsg,compat_sys_sendmmsg)
SYSCALL(sys_socket,sys_socket)
SYSCALL(sys_socketpair,compat_sys_socketpair) /* 360 */
-SYSCALL(sys_bind,sys_bind)
-SYSCALL(sys_connect,sys_connect)
+SYSCALL(sys_bind,compat_sys_bind)
+SYSCALL(sys_connect,compat_sys_connect)
SYSCALL(sys_listen,sys_listen)
-SYSCALL(sys_accept4,sys_accept4)
+SYSCALL(sys_accept4,compat_sys_accept4)
SYSCALL(sys_getsockopt,compat_sys_getsockopt) /* 365 */
SYSCALL(sys_setsockopt,compat_sys_setsockopt)
SYSCALL(sys_getsockname,compat_sys_getsockname)
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index be6198193ec2..cf561160ea88 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Time of day based timer functions.
*
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index f9b393d4a078..4d5b65e527b5 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Copyright IBM Corp. 2007, 2011
* Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
index 39a218703c50..f3a1c7c6824e 100644
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* vdso setup for s390
*
* Copyright IBM Corp. 2008
* Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
*/
#include <linux/init.h>
diff --git a/arch/s390/kernel/vdso32/clock_getres.S b/arch/s390/kernel/vdso32/clock_getres.S
index eca3f001f081..f61df5253c23 100644
--- a/arch/s390/kernel/vdso32/clock_getres.S
+++ b/arch/s390/kernel/vdso32/clock_getres.S
@@ -1,13 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Userland implementation of clock_getres() for 32 bits processes in a
* s390 kernel for use in the vDSO
*
* Copyright IBM Corp. 2008
* Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
*/
#include <asm/vdso.h>
#include <asm/asm-offsets.h>
diff --git a/arch/s390/kernel/vdso32/clock_gettime.S b/arch/s390/kernel/vdso32/clock_gettime.S
index a5769b83d90e..2d6ec3abe095 100644
--- a/arch/s390/kernel/vdso32/clock_gettime.S
+++ b/arch/s390/kernel/vdso32/clock_gettime.S
@@ -1,13 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Userland implementation of clock_gettime() for 32 bits processes in a
* s390 kernel for use in the vDSO
*
* Copyright IBM Corp. 2008
* Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
*/
#include <asm/vdso.h>
#include <asm/asm-offsets.h>
diff --git a/arch/s390/kernel/vdso32/gettimeofday.S b/arch/s390/kernel/vdso32/gettimeofday.S
index 63b86dceb0bf..aa8bf13a2edb 100644
--- a/arch/s390/kernel/vdso32/gettimeofday.S
+++ b/arch/s390/kernel/vdso32/gettimeofday.S
@@ -1,13 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Userland implementation of gettimeofday() for 32 bits processes in a
* s390 kernel for use in the vDSO
*
* Copyright IBM Corp. 2008
* Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
*/
#include <asm/vdso.h>
#include <asm/asm-offsets.h>
diff --git a/arch/s390/kernel/vdso64/clock_getres.S b/arch/s390/kernel/vdso64/clock_getres.S
index c8513deb8c66..faf5213b15df 100644
--- a/arch/s390/kernel/vdso64/clock_getres.S
+++ b/arch/s390/kernel/vdso64/clock_getres.S
@@ -1,13 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Userland implementation of clock_getres() for 64 bits processes in a
* s390 kernel for use in the vDSO
*
* Copyright IBM Corp. 2008
* Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
*/
#include <asm/vdso.h>
#include <asm/asm-offsets.h>
diff --git a/arch/s390/kernel/vdso64/clock_gettime.S b/arch/s390/kernel/vdso64/clock_gettime.S
index 5d7b56b49458..6046b3bfca46 100644
--- a/arch/s390/kernel/vdso64/clock_gettime.S
+++ b/arch/s390/kernel/vdso64/clock_gettime.S
@@ -1,13 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Userland implementation of clock_gettime() for 64 bits processes in a
* s390 kernel for use in the vDSO
*
* Copyright IBM Corp. 2008
* Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
*/
#include <asm/vdso.h>
#include <asm/asm-offsets.h>
diff --git a/arch/s390/kernel/vdso64/gettimeofday.S b/arch/s390/kernel/vdso64/gettimeofday.S
index b02e62f3bc12..cc9dbc27da6f 100644
--- a/arch/s390/kernel/vdso64/gettimeofday.S
+++ b/arch/s390/kernel/vdso64/gettimeofday.S
@@ -1,13 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Userland implementation of gettimeofday() for 64 bits processes in a
* s390 kernel for use in the vDSO
*
* Copyright IBM Corp. 2008
* Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
*/
#include <asm/vdso.h>
#include <asm/asm-offsets.h>
diff --git a/arch/s390/kernel/vdso64/note.S b/arch/s390/kernel/vdso64/note.S
index 79a071e4357e..db19d0680a0a 100644
--- a/arch/s390/kernel/vdso64/note.S
+++ b/arch/s390/kernel/vdso64/note.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
* Here we can supply some information useful to userland.
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index dd7178fbb4f3..f24395a01918 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Virtual cpu timer based timer functions.
*
diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile
index 6048b1c6e580..05ee90a5ea08 100644
--- a/arch/s390/kvm/Makefile
+++ b/arch/s390/kvm/Makefile
@@ -1,10 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
# Makefile for kernel virtual machines on s390
#
# Copyright IBM Corp. 2008
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License (version 2 only)
-# as published by the Free Software Foundation.
KVM := ../../../virt/kvm
common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o $(KVM)/async_pf.o $(KVM)/irqchip.o $(KVM)/vfio.o
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index d93a2c0474bf..89aa114a2cba 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* handling diagnose instructions
*
* Copyright IBM Corp. 2008, 2011
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
- *
* Author(s): Carsten Otte <cotte@de.ibm.com>
* Christian Borntraeger <borntraeger@de.ibm.com>
*/
diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h
index bec42b852246..f4c51756c462 100644
--- a/arch/s390/kvm/gaccess.h
+++ b/arch/s390/kvm/gaccess.h
@@ -1,12 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* access guest memory
*
* Copyright IBM Corp. 2008, 2014
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
- *
* Author(s): Carsten Otte <cotte@de.ibm.com>
*/
diff --git a/arch/s390/kvm/guestdbg.c b/arch/s390/kvm/guestdbg.c
index bcbd86621d01..b5f3e82006d0 100644
--- a/arch/s390/kvm/guestdbg.c
+++ b/arch/s390/kvm/guestdbg.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* kvm guest debug support
*
* Copyright IBM Corp. 2014
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
- *
* Author(s): David Hildenbrand <dahi@linux.vnet.ibm.com>
*/
#include <linux/kvm_host.h>
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index 8fe034beb623..9c7d70715862 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* in-kernel handling for sie intercepts
*
* Copyright IBM Corp. 2008, 2014
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
- *
* Author(s): Carsten Otte <cotte@de.ibm.com>
* Christian Borntraeger <borntraeger@de.ibm.com>
*/
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index fa557372d600..024ad8bcc516 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* handling kvm guest interrupts
*
* Copyright IBM Corp. 2008, 2015
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
- *
* Author(s): Carsten Otte <cotte@de.ibm.com>
*/
diff --git a/arch/s390/kvm/irq.h b/arch/s390/kvm/irq.h
index d98e4159643d..484608c71dd0 100644
--- a/arch/s390/kvm/irq.h
+++ b/arch/s390/kvm/irq.h
@@ -1,12 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* s390 irqchip routines
*
* Copyright IBM Corp. 2014
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
- *
* Author(s): Cornelia Huck <cornelia.huck@de.ibm.com>
*/
#ifndef __KVM_IRQ_H
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 98ad8b9e0360..ec8b68e97d3c 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
/*
- * hosting zSeries kernel virtual machines
+ * hosting IBM Z kernel virtual machines (s390x)
*
- * Copyright IBM Corp. 2008, 2009
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
+ * Copyright IBM Corp. 2008, 2017
*
* Author(s): Carsten Otte <cotte@de.ibm.com>
* Christian Borntraeger <borntraeger@de.ibm.com>
@@ -3372,7 +3369,6 @@ static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
int rc;
- sigset_t sigsaved;
if (kvm_run->immediate_exit)
return -EINTR;
@@ -3382,8 +3378,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
return 0;
}
- if (vcpu->sigset_active)
- sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
+ kvm_sigset_activate(vcpu);
if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
kvm_s390_vcpu_start(vcpu);
@@ -3417,8 +3412,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
disable_cpu_timer_accounting(vcpu);
store_regs(vcpu, kvm_run);
- if (vcpu->sigset_active)
- sigprocmask(SIG_SETMASK, &sigsaved, NULL);
+ kvm_sigset_deactivate(vcpu);
vcpu->stat.exit_userspace++;
return rc;
@@ -3811,6 +3805,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
r = -EINVAL;
break;
}
+ /* do not use irq_state.flags, it will break old QEMUs */
r = kvm_s390_set_irq_state(vcpu,
(void __user *) irq_state.buf,
irq_state.len);
@@ -3826,6 +3821,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
r = -EINVAL;
break;
}
+ /* do not use irq_state.flags, it will break old QEMUs */
r = kvm_s390_get_irq_state(vcpu,
(__u8 __user *) irq_state.buf,
irq_state.len);
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 10d65dfbc306..5e46ba429bcb 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -1,12 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* definition for kvm on s390
*
* Copyright IBM Corp. 2008, 2009
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
- *
* Author(s): Carsten Otte <cotte@de.ibm.com>
* Christian Borntraeger <borntraeger@de.ibm.com>
* Christian Ehrhardt <ehrhardt@de.ibm.com>
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index c954ac49eee4..572496c688cc 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* handling privileged instructions
*
* Copyright IBM Corp. 2008, 2013
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
- *
* Author(s): Carsten Otte <cotte@de.ibm.com>
* Christian Borntraeger <borntraeger@de.ibm.com>
*/
@@ -235,8 +232,6 @@ static int try_handle_skey(struct kvm_vcpu *vcpu)
VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation");
return -EAGAIN;
}
- if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
- return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
return 0;
}
@@ -247,6 +242,9 @@ static int handle_iske(struct kvm_vcpu *vcpu)
int reg1, reg2;
int rc;
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
rc = try_handle_skey(vcpu);
if (rc)
return rc != -EAGAIN ? rc : 0;
@@ -276,6 +274,9 @@ static int handle_rrbe(struct kvm_vcpu *vcpu)
int reg1, reg2;
int rc;
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
rc = try_handle_skey(vcpu);
if (rc)
return rc != -EAGAIN ? rc : 0;
@@ -311,6 +312,9 @@ static int handle_sske(struct kvm_vcpu *vcpu)
int reg1, reg2;
int rc;
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
rc = try_handle_skey(vcpu);
if (rc)
return rc != -EAGAIN ? rc : 0;
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index 9d592ef4104b..c1f5cde2c878 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* handling interprocessor communication
*
* Copyright IBM Corp. 2008, 2013
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
- *
* Author(s): Carsten Otte <cotte@de.ibm.com>
* Christian Borntraeger <borntraeger@de.ibm.com>
* Christian Ehrhardt <ehrhardt@de.ibm.com>
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index a311938b63b3..5d6ae0326d9e 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* kvm nested virtualization support for s390x
*
* Copyright IBM Corp. 2016
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
- *
* Author(s): David Hildenbrand <dahi@linux.vnet.ibm.com>
*/
#include <linux/vmalloc.h>
diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c
index 3d017171ff8f..6cf024eb2085 100644
--- a/arch/s390/mm/cmm.c
+++ b/arch/s390/mm/cmm.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Collaborative memory management interface.
*
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index b2c140193b0a..05d459b638f5 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* KVM guest address space mapping code
*
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
index 5bea139517a2..831bdcf407bb 100644
--- a/arch/s390/mm/mmap.c
+++ b/arch/s390/mm/mmap.c
@@ -1,24 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0+
/*
* flexible mmap layout support
*
* Copyright 2003-2004 Red Hat Inc., Durham, North Carolina.
* All Rights Reserved.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- *
* Started by Ingo Molnar <mingo@elte.hu>
*/
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index 434a9564917b..cb364153c43c 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -83,8 +83,6 @@ int crst_table_upgrade(struct mm_struct *mm, unsigned long end)
/* upgrade should only happen from 3 to 4, 3 to 5, or 4 to 5 levels */
VM_BUG_ON(mm->context.asce_limit < _REGION2_SIZE);
- if (end >= TASK_SIZE_MAX)
- return -ENOMEM;
rc = 0;
notify = 0;
while (mm->context.asce_limit < end) {
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index ae677f814bc0..4f2b65d01a70 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Copyright IBM Corp. 2007, 2011
* Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
diff --git a/arch/s390/net/Makefile b/arch/s390/net/Makefile
index 90568c33ddb0..e0d5f245e42b 100644
--- a/arch/s390/net/Makefile
+++ b/arch/s390/net/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Arch-specific network modules
#
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index e81c16838b90..9557d8b516df 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -55,8 +55,7 @@ struct bpf_jit {
#define SEEN_LITERAL 8 /* code uses literals */
#define SEEN_FUNC 16 /* calls C functions */
#define SEEN_TAIL_CALL 32 /* code uses tail calls */
-#define SEEN_SKB_CHANGE 64 /* code changes skb data */
-#define SEEN_REG_AX 128 /* code uses constant blinding */
+#define SEEN_REG_AX 64 /* code uses constant blinding */
#define SEEN_STACK (SEEN_FUNC | SEEN_MEM | SEEN_SKB)
/*
@@ -448,12 +447,12 @@ static void bpf_jit_prologue(struct bpf_jit *jit, u32 stack_depth)
EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0,
REG_15, 152);
}
- if (jit->seen & SEEN_SKB)
+ if (jit->seen & SEEN_SKB) {
emit_load_skb_data_hlen(jit);
- if (jit->seen & SEEN_SKB_CHANGE)
/* stg %b1,ST_OFF_SKBP(%r0,%r15) */
EMIT6_DISP_LH(0xe3000000, 0x0024, BPF_REG_1, REG_0, REG_15,
STK_OFF_SKBP);
+ }
}
/*
@@ -983,8 +982,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
EMIT2(0x0d00, REG_14, REG_W1);
/* lgr %b0,%r2: load return value into %b0 */
EMIT4(0xb9040000, BPF_REG_0, REG_2);
- if (bpf_helper_changes_pkt_data((void *)func)) {
- jit->seen |= SEEN_SKB_CHANGE;
+ if ((jit->seen & SEEN_SKB) &&
+ bpf_helper_changes_pkt_data((void *)func)) {
/* lg %b1,ST_OFF_SKBP(%r15) */
EMIT6_DISP_LH(0xe3000000, 0x0004, BPF_REG_1, REG_0,
REG_15, STK_OFF_SKBP);
diff --git a/arch/s390/numa/Makefile b/arch/s390/numa/Makefile
index f94ecaffa71b..66c2dff74895 100644
--- a/arch/s390/numa/Makefile
+++ b/arch/s390/numa/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
obj-y += numa.o
obj-y += toptree.o
obj-$(CONFIG_NUMA_EMU) += mode_emu.o
diff --git a/arch/s390/pci/Makefile b/arch/s390/pci/Makefile
index 805d8b29193a..22d0871291ee 100644
--- a/arch/s390/pci/Makefile
+++ b/arch/s390/pci/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for the s390 PCI subsystem.
#
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 0fe649c0d542..4902fed221c0 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Copyright IBM Corp. 2012
*
diff --git a/arch/s390/pci/pci_debug.c b/arch/s390/pci/pci_debug.c
index c2f786f0ea06..b482e95b6249 100644
--- a/arch/s390/pci/pci_debug.c
+++ b/arch/s390/pci/pci_debug.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Copyright IBM Corp. 2012,2015
*
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
index 0d300ee00f4e..f7aa5a77827e 100644
--- a/arch/s390/pci/pci_dma.c
+++ b/arch/s390/pci/pci_dma.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Copyright IBM Corp. 2012
*
diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c
index 81b840bc6e4e..19bcb3b45a70 100644
--- a/arch/s390/pci/pci_insn.c
+++ b/arch/s390/pci/pci_insn.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* s390 specific pci instructions
*
diff --git a/arch/s390/tools/gen_opcode_table.c b/arch/s390/tools/gen_opcode_table.c
index 01d4c5a4bfe9..357d42681cef 100644
--- a/arch/s390/tools/gen_opcode_table.c
+++ b/arch/s390/tools/gen_opcode_table.c
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Generate opcode table initializers for the in-kernel disassembler.
*
diff --git a/arch/score/include/uapi/asm/Kbuild b/arch/score/include/uapi/asm/Kbuild
index c94ee54210bc..81271d3af47c 100644
--- a/arch/score/include/uapi/asm/Kbuild
+++ b/arch/score/include/uapi/asm/Kbuild
@@ -1,4 +1,5 @@
# UAPI Header export list
include include/uapi/asm-generic/Kbuild.asm
+generic-y += bpf_perf_event.h
generic-y += siginfo.h
diff --git a/arch/sh/include/uapi/asm/Kbuild b/arch/sh/include/uapi/asm/Kbuild
index e28531333efa..ba4d39cb321d 100644
--- a/arch/sh/include/uapi/asm/Kbuild
+++ b/arch/sh/include/uapi/asm/Kbuild
@@ -2,6 +2,7 @@
include include/uapi/asm-generic/Kbuild.asm
generic-y += bitsperlong.h
+generic-y += bpf_perf_event.h
generic-y += errno.h
generic-y += fcntl.h
generic-y += ioctl.h
diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
index 5a9e96be1665..9937c5ff94a9 100644
--- a/arch/sparc/include/asm/pgtable_64.h
+++ b/arch/sparc/include/asm/pgtable_64.h
@@ -715,7 +715,7 @@ static inline unsigned long pmd_pfn(pmd_t pmd)
return pte_pfn(pte);
}
-#define __HAVE_ARCH_PMD_WRITE
+#define pmd_write pmd_write
static inline unsigned long pmd_write(pmd_t pmd)
{
pte_t pte = __pte(pmd_val(pmd));
diff --git a/arch/sparc/include/uapi/asm/Kbuild b/arch/sparc/include/uapi/asm/Kbuild
index 2178c78c7c1a..4680ba246b55 100644
--- a/arch/sparc/include/uapi/asm/Kbuild
+++ b/arch/sparc/include/uapi/asm/Kbuild
@@ -1,4 +1,5 @@
# UAPI Header export list
include include/uapi/asm-generic/Kbuild.asm
+generic-y += bpf_perf_event.h
generic-y += types.h
diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
index 0f0f76b4f6cd..063556fe2cb1 100644
--- a/arch/sparc/lib/Makefile
+++ b/arch/sparc/lib/Makefile
@@ -19,7 +19,7 @@ lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o
lib-$(CONFIG_SPARC64) += multi3.o
lib-$(CONFIG_SPARC64) += fls.o
lib-$(CONFIG_SPARC64) += fls64.o
-obj-$(CONFIG_SPARC64) += NG4fls.o
+lib-$(CONFIG_SPARC64) += NG4fls.o
lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o
lib-$(CONFIG_SPARC64) += csum_copy.o csum_copy_from_user.o csum_copy_to_user.o
diff --git a/arch/sparc/lib/hweight.S b/arch/sparc/lib/hweight.S
index e5547b22cd18..0ddbbb031822 100644
--- a/arch/sparc/lib/hweight.S
+++ b/arch/sparc/lib/hweight.S
@@ -44,8 +44,8 @@ EXPORT_SYMBOL(__arch_hweight32)
.previous
ENTRY(__arch_hweight64)
- sethi %hi(__sw_hweight16), %g1
- jmpl %g1 + %lo(__sw_hweight16), %g0
+ sethi %hi(__sw_hweight64), %g1
+ jmpl %g1 + %lo(__sw_hweight64), %g0
nop
ENDPROC(__arch_hweight64)
EXPORT_SYMBOL(__arch_hweight64)
diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c
index be3136f142a9..a8103a84b4ac 100644
--- a/arch/sparc/mm/fault_32.c
+++ b/arch/sparc/mm/fault_32.c
@@ -113,7 +113,7 @@ show_signal_msg(struct pt_regs *regs, int sig, int code,
if (!printk_ratelimit())
return;
- printk("%s%s[%d]: segfault at %lx ip %p (rpc %p) sp %p error %x",
+ printk("%s%s[%d]: segfault at %lx ip %px (rpc %px) sp %px error %x",
task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
tsk->comm, task_pid_nr(tsk), address,
(void *)regs->pc, (void *)regs->u_regs[UREG_I7],
diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c
index 815c03d7a765..41363f46797b 100644
--- a/arch/sparc/mm/fault_64.c
+++ b/arch/sparc/mm/fault_64.c
@@ -154,7 +154,7 @@ show_signal_msg(struct pt_regs *regs, int sig, int code,
if (!printk_ratelimit())
return;
- printk("%s%s[%d]: segfault at %lx ip %p (rpc %p) sp %p error %x",
+ printk("%s%s[%d]: segfault at %lx ip %px (rpc %px) sp %px error %x",
task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
tsk->comm, task_pid_nr(tsk), address,
(void *)regs->tpc, (void *)regs->u_regs[UREG_I7],
diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c
index 5765e7e711f7..ff5f9cb3039a 100644
--- a/arch/sparc/net/bpf_jit_comp_64.c
+++ b/arch/sparc/net/bpf_jit_comp_64.c
@@ -1245,14 +1245,16 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
u8 *func = ((u8 *)__bpf_call_base) + imm;
ctx->saw_call = true;
+ if (ctx->saw_ld_abs_ind && bpf_helper_changes_pkt_data(func))
+ emit_reg_move(bpf2sparc[BPF_REG_1], L7, ctx);
emit_call((u32 *)func, ctx);
emit_nop(ctx);
emit_reg_move(O0, bpf2sparc[BPF_REG_0], ctx);
- if (bpf_helper_changes_pkt_data(func) && ctx->saw_ld_abs_ind)
- load_skb_regs(ctx, bpf2sparc[BPF_REG_6]);
+ if (ctx->saw_ld_abs_ind && bpf_helper_changes_pkt_data(func))
+ load_skb_regs(ctx, L7);
break;
}
diff --git a/arch/tile/include/asm/pgtable.h b/arch/tile/include/asm/pgtable.h
index 2a26cc4fefc2..adfa21b18488 100644
--- a/arch/tile/include/asm/pgtable.h
+++ b/arch/tile/include/asm/pgtable.h
@@ -475,7 +475,6 @@ static inline void pmd_clear(pmd_t *pmdp)
#define pmd_mkdirty(pmd) pte_pmd(pte_mkdirty(pmd_pte(pmd)))
#define pmd_huge_page(pmd) pte_huge(pmd_pte(pmd))
#define pmd_mkhuge(pmd) pte_pmd(pte_mkhuge(pmd_pte(pmd)))
-#define __HAVE_ARCH_PMD_WRITE
#define pfn_pmd(pfn, pgprot) pte_pmd(pfn_pte((pfn), (pgprot)))
#define pmd_pfn(pmd) pte_pfn(pmd_pte(pmd))
diff --git a/arch/tile/include/uapi/asm/Kbuild b/arch/tile/include/uapi/asm/Kbuild
index 5711de0a1b5e..cc439612bcd5 100644
--- a/arch/tile/include/uapi/asm/Kbuild
+++ b/arch/tile/include/uapi/asm/Kbuild
@@ -1,6 +1,7 @@
# UAPI Header export list
include include/uapi/asm-generic/Kbuild.asm
+generic-y += bpf_perf_event.h
generic-y += errno.h
generic-y += fcntl.h
generic-y += ioctl.h
diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild
index 50a32c33d729..73c57f614c9e 100644
--- a/arch/um/include/asm/Kbuild
+++ b/arch/um/include/asm/Kbuild
@@ -1,4 +1,5 @@
generic-y += barrier.h
+generic-y += bpf_perf_event.h
generic-y += bug.h
generic-y += clkdev.h
generic-y += current.h
diff --git a/arch/um/include/asm/mmu_context.h b/arch/um/include/asm/mmu_context.h
index b668e351fd6c..fca34b2177e2 100644
--- a/arch/um/include/asm/mmu_context.h
+++ b/arch/um/include/asm/mmu_context.h
@@ -15,9 +15,10 @@ extern void uml_setup_stubs(struct mm_struct *mm);
/*
* Needed since we do not use the asm-generic/mm_hooks.h:
*/
-static inline void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
+static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
{
uml_setup_stubs(mm);
+ return 0;
}
extern void arch_exit_mmap(struct mm_struct *mm);
static inline void arch_unmap(struct mm_struct *mm,
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index 4e6fcb32620f..428644175956 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -150,7 +150,7 @@ static void show_segv_info(struct uml_pt_regs *regs)
if (!printk_ratelimit())
return;
- printk("%s%s[%d]: segfault at %lx ip %p sp %p error %x",
+ printk("%s%s[%d]: segfault at %lx ip %px sp %px error %x",
task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
tsk->comm, task_pid_nr(tsk), FAULT_ADDRESS(*fi),
(void *)UPT_IP(regs), (void *)UPT_SP(regs),
diff --git a/arch/unicore32/include/asm/mmu_context.h b/arch/unicore32/include/asm/mmu_context.h
index 59b06b48f27d..5c205a9cb5a6 100644
--- a/arch/unicore32/include/asm/mmu_context.h
+++ b/arch/unicore32/include/asm/mmu_context.h
@@ -81,9 +81,10 @@ do { \
} \
} while (0)
-static inline void arch_dup_mmap(struct mm_struct *oldmm,
- struct mm_struct *mm)
+static inline int arch_dup_mmap(struct mm_struct *oldmm,
+ struct mm_struct *mm)
{
+ return 0;
}
static inline void arch_unmap(struct mm_struct *mm,
diff --git a/arch/unicore32/include/uapi/asm/Kbuild b/arch/unicore32/include/uapi/asm/Kbuild
index 759a71411169..8611ef980554 100644
--- a/arch/unicore32/include/uapi/asm/Kbuild
+++ b/arch/unicore32/include/uapi/asm/Kbuild
@@ -3,6 +3,7 @@ include include/uapi/asm-generic/Kbuild.asm
generic-y += auxvec.h
generic-y += bitsperlong.h
+generic-y += bpf_perf_event.h
generic-y += errno.h
generic-y += fcntl.h
generic-y += ioctl.h
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 8eed3f94bfc7..d4fc98c50378 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -926,7 +926,8 @@ config MAXSMP
config NR_CPUS
int "Maximum number of CPUs" if SMP && !MAXSMP
range 2 8 if SMP && X86_32 && !X86_BIGSMP
- range 2 512 if SMP && !MAXSMP && !CPUMASK_OFFSTACK
+ range 2 64 if SMP && X86_32 && X86_BIGSMP
+ range 2 512 if SMP && !MAXSMP && !CPUMASK_OFFSTACK && X86_64
range 2 8192 if SMP && !MAXSMP && CPUMASK_OFFSTACK && X86_64
default "1" if !SMP
default "8192" if MAXSMP
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 6293a8768a91..672441c008c7 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -400,6 +400,7 @@ config UNWINDER_FRAME_POINTER
config UNWINDER_GUESS
bool "Guess unwinder"
depends on EXPERT
+ depends on !STACKDEPOT
---help---
This option enables the "guess" unwinder for unwinding kernel stack
traces. It scans the stack and reports every kernel text address it
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 1e9c322e973a..f25e1530e064 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -80,6 +80,7 @@ vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/kaslr.o
ifdef CONFIG_X86_64
vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/pagetable.o
vmlinux-objs-y += $(obj)/mem_encrypt.o
+ vmlinux-objs-y += $(obj)/pgtable_64.o
endif
$(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 20919b4f3133..fc313e29fe2c 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -305,10 +305,18 @@ ENTRY(startup_64)
leaq boot_stack_end(%rbx), %rsp
#ifdef CONFIG_X86_5LEVEL
- /* Check if 5-level paging has already enabled */
- movq %cr4, %rax
- testl $X86_CR4_LA57, %eax
- jnz lvl5
+ /*
+ * Check if we need to enable 5-level paging.
+ * RSI holds real mode data and need to be preserved across
+ * a function call.
+ */
+ pushq %rsi
+ call l5_paging_required
+ popq %rsi
+
+ /* If l5_paging_required() returned zero, we're done here. */
+ cmpq $0, %rax
+ je lvl5
/*
* At this point we are in long mode with 4-level paging enabled,
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index b50c42455e25..98761a1576ce 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -169,6 +169,16 @@ void __puthex(unsigned long value)
}
}
+static bool l5_supported(void)
+{
+ /* Check if leaf 7 is supported. */
+ if (native_cpuid_eax(0) < 7)
+ return 0;
+
+ /* Check if la57 is supported. */
+ return native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31));
+}
+
#if CONFIG_X86_NEED_RELOCS
static void handle_relocations(void *output, unsigned long output_len,
unsigned long virt_addr)
@@ -362,6 +372,12 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
console_init();
debug_putstr("early console in extract_kernel\n");
+ if (IS_ENABLED(CONFIG_X86_5LEVEL) && !l5_supported()) {
+ error("This linux kernel as configured requires 5-level paging\n"
+ "This CPU does not support the required 'cr4.la57' feature\n"
+ "Unable to boot - please use a kernel appropriate for your CPU\n");
+ }
+
free_mem_ptr = heap; /* Heap */
free_mem_end_ptr = heap + BOOT_HEAP_SIZE;
diff --git a/arch/x86/boot/compressed/pagetable.c b/arch/x86/boot/compressed/pagetable.c
index d5364ca2e3f9..b5e5e02f8cde 100644
--- a/arch/x86/boot/compressed/pagetable.c
+++ b/arch/x86/boot/compressed/pagetable.c
@@ -23,6 +23,9 @@
*/
#undef CONFIG_AMD_MEM_ENCRYPT
+/* No PAGE_TABLE_ISOLATION support needed either: */
+#undef CONFIG_PAGE_TABLE_ISOLATION
+
#include "misc.h"
/* These actually do the work of building the kernel identity maps. */
diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c
new file mode 100644
index 000000000000..b4469a37e9a1
--- /dev/null
+++ b/arch/x86/boot/compressed/pgtable_64.c
@@ -0,0 +1,28 @@
+#include <asm/processor.h>
+
+/*
+ * __force_order is used by special_insns.h asm code to force instruction
+ * serialization.
+ *
+ * It is not referenced from the code, but GCC < 5 with -fPIE would fail
+ * due to an undefined symbol. Define it to make these ancient GCCs work.
+ */
+unsigned long __force_order;
+
+int l5_paging_required(void)
+{
+ /* Check if leaf 7 is supported. */
+
+ if (native_cpuid_eax(0) < 7)
+ return 0;
+
+ /* Check if la57 is supported. */
+ if (!(native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31))))
+ return 0;
+
+ /* Check if 5-level paging has already been enabled. */
+ if (native_read_cr4() & X86_CR4_LA57)
+ return 0;
+
+ return 1;
+}
diff --git a/arch/x86/boot/genimage.sh b/arch/x86/boot/genimage.sh
index 49f4970f693b..6a10d52a4145 100644
--- a/arch/x86/boot/genimage.sh
+++ b/arch/x86/boot/genimage.sh
@@ -44,9 +44,9 @@ FDINITRD=$6
# Make sure the files actually exist
verify "$FBZIMAGE"
-verify "$MTOOLSRC"
genbzdisk() {
+ verify "$MTOOLSRC"
mformat a:
syslinux $FIMAGE
echo "$KCMDLINE" | mcopy - a:syslinux.cfg
@@ -57,6 +57,7 @@ genbzdisk() {
}
genfdimage144() {
+ verify "$MTOOLSRC"
dd if=/dev/zero of=$FIMAGE bs=1024 count=1440 2> /dev/null
mformat v:
syslinux $FIMAGE
@@ -68,6 +69,7 @@ genfdimage144() {
}
genfdimage288() {
+ verify "$MTOOLSRC"
dd if=/dev/zero of=$FIMAGE bs=1024 count=2880 2> /dev/null
mformat w:
syslinux $FIMAGE
@@ -78,39 +80,43 @@ genfdimage288() {
mcopy $FBZIMAGE w:linux
}
-genisoimage() {
+geniso() {
tmp_dir=`dirname $FIMAGE`/isoimage
rm -rf $tmp_dir
mkdir $tmp_dir
- for i in lib lib64 share end ; do
+ for i in lib lib64 share ; do
for j in syslinux ISOLINUX ; do
if [ -f /usr/$i/$j/isolinux.bin ] ; then
isolinux=/usr/$i/$j/isolinux.bin
- cp $isolinux $tmp_dir
fi
done
for j in syslinux syslinux/modules/bios ; do
if [ -f /usr/$i/$j/ldlinux.c32 ]; then
ldlinux=/usr/$i/$j/ldlinux.c32
- cp $ldlinux $tmp_dir
fi
done
if [ -n "$isolinux" -a -n "$ldlinux" ] ; then
break
fi
- if [ $i = end -a -z "$isolinux" ] ; then
- echo 'Need an isolinux.bin file, please install syslinux/isolinux.'
- exit 1
- fi
done
+ if [ -z "$isolinux" ] ; then
+ echo 'Need an isolinux.bin file, please install syslinux/isolinux.'
+ exit 1
+ fi
+ if [ -z "$ldlinux" ] ; then
+ echo 'Need an ldlinux.c32 file, please install syslinux/isolinux.'
+ exit 1
+ fi
+ cp $isolinux $tmp_dir
+ cp $ldlinux $tmp_dir
cp $FBZIMAGE $tmp_dir/linux
echo "$KCMDLINE" > $tmp_dir/isolinux.cfg
if [ -f "$FDINITRD" ] ; then
cp "$FDINITRD" $tmp_dir/initrd.img
fi
- mkisofs -J -r -input-charset=utf-8 -quiet -o $FIMAGE -b isolinux.bin \
- -c boot.cat -no-emul-boot -boot-load-size 4 -boot-info-table \
- $tmp_dir
+ genisoimage -J -r -input-charset=utf-8 -quiet -o $FIMAGE \
+ -b isolinux.bin -c boot.cat -no-emul-boot -boot-load-size 4 \
+ -boot-info-table $tmp_dir
isohybrid $FIMAGE 2>/dev/null || true
rm -rf $tmp_dir
}
@@ -119,6 +125,6 @@ case $1 in
bzdisk) genbzdisk;;
fdimage144) genfdimage144;;
fdimage288) genfdimage288;;
- isoimage) genisoimage;;
+ isoimage) geniso;;
*) echo 'Unknown image format'; exit 1;
esac
diff --git a/arch/x86/crypto/salsa20_glue.c b/arch/x86/crypto/salsa20_glue.c
index 399a29d067d6..cb91a64a99e7 100644
--- a/arch/x86/crypto/salsa20_glue.c
+++ b/arch/x86/crypto/salsa20_glue.c
@@ -59,13 +59,6 @@ static int encrypt(struct blkcipher_desc *desc,
salsa20_ivsetup(ctx, walk.iv);
- if (likely(walk.nbytes == nbytes))
- {
- salsa20_encrypt_bytes(ctx, walk.src.virt.addr,
- walk.dst.virt.addr, nbytes);
- return blkcipher_walk_done(desc, &walk, 0);
- }
-
while (walk.nbytes >= 64) {
salsa20_encrypt_bytes(ctx, walk.src.virt.addr,
walk.dst.virt.addr,
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index 3fd8bc560fae..45a63e00a6af 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -1,6 +1,11 @@
/* SPDX-License-Identifier: GPL-2.0 */
#include <linux/jump_label.h>
#include <asm/unwind_hints.h>
+#include <asm/cpufeatures.h>
+#include <asm/page_types.h>
+#include <asm/percpu.h>
+#include <asm/asm-offsets.h>
+#include <asm/processor-flags.h>
/*
@@ -187,6 +192,146 @@ For 32-bit we have the following conventions - kernel is built with
#endif
.endm
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+
+/*
+ * PAGE_TABLE_ISOLATION PGDs are 8k. Flip bit 12 to switch between the two
+ * halves:
+ */
+#define PTI_SWITCH_PGTABLES_MASK (1<<PAGE_SHIFT)
+#define PTI_SWITCH_MASK (PTI_SWITCH_PGTABLES_MASK|(1<<X86_CR3_PTI_SWITCH_BIT))
+
+.macro SET_NOFLUSH_BIT reg:req
+ bts $X86_CR3_PCID_NOFLUSH_BIT, \reg
+.endm
+
+.macro ADJUST_KERNEL_CR3 reg:req
+ ALTERNATIVE "", "SET_NOFLUSH_BIT \reg", X86_FEATURE_PCID
+ /* Clear PCID and "PAGE_TABLE_ISOLATION bit", point CR3 at kernel pagetables: */
+ andq $(~PTI_SWITCH_MASK), \reg
+.endm
+
+.macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
+ ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
+ mov %cr3, \scratch_reg
+ ADJUST_KERNEL_CR3 \scratch_reg
+ mov \scratch_reg, %cr3
+.Lend_\@:
+.endm
+
+#define THIS_CPU_user_pcid_flush_mask \
+ PER_CPU_VAR(cpu_tlbstate) + TLB_STATE_user_pcid_flush_mask
+
+.macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req
+ ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
+ mov %cr3, \scratch_reg
+
+ ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID
+
+ /*
+ * Test if the ASID needs a flush.
+ */
+ movq \scratch_reg, \scratch_reg2
+ andq $(0x7FF), \scratch_reg /* mask ASID */
+ bt \scratch_reg, THIS_CPU_user_pcid_flush_mask
+ jnc .Lnoflush_\@
+
+ /* Flush needed, clear the bit */
+ btr \scratch_reg, THIS_CPU_user_pcid_flush_mask
+ movq \scratch_reg2, \scratch_reg
+ jmp .Lwrcr3_\@
+
+.Lnoflush_\@:
+ movq \scratch_reg2, \scratch_reg
+ SET_NOFLUSH_BIT \scratch_reg
+
+.Lwrcr3_\@:
+ /* Flip the PGD and ASID to the user version */
+ orq $(PTI_SWITCH_MASK), \scratch_reg
+ mov \scratch_reg, %cr3
+.Lend_\@:
+.endm
+
+.macro SWITCH_TO_USER_CR3_STACK scratch_reg:req
+ pushq %rax
+ SWITCH_TO_USER_CR3_NOSTACK scratch_reg=\scratch_reg scratch_reg2=%rax
+ popq %rax
+.endm
+
+.macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req
+ ALTERNATIVE "jmp .Ldone_\@", "", X86_FEATURE_PTI
+ movq %cr3, \scratch_reg
+ movq \scratch_reg, \save_reg
+ /*
+ * Is the "switch mask" all zero? That means that both of
+ * these are zero:
+ *
+ * 1. The user/kernel PCID bit, and
+ * 2. The user/kernel "bit" that points CR3 to the
+ * bottom half of the 8k PGD
+ *
+ * That indicates a kernel CR3 value, not a user CR3.
+ */
+ testq $(PTI_SWITCH_MASK), \scratch_reg
+ jz .Ldone_\@
+
+ ADJUST_KERNEL_CR3 \scratch_reg
+ movq \scratch_reg, %cr3
+
+.Ldone_\@:
+.endm
+
+.macro RESTORE_CR3 scratch_reg:req save_reg:req
+ ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
+
+ ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID
+
+ /*
+ * KERNEL pages can always resume with NOFLUSH as we do
+ * explicit flushes.
+ */
+ bt $X86_CR3_PTI_SWITCH_BIT, \save_reg
+ jnc .Lnoflush_\@
+
+ /*
+ * Check if there's a pending flush for the user ASID we're
+ * about to set.
+ */
+ movq \save_reg, \scratch_reg
+ andq $(0x7FF), \scratch_reg
+ bt \scratch_reg, THIS_CPU_user_pcid_flush_mask
+ jnc .Lnoflush_\@
+
+ btr \scratch_reg, THIS_CPU_user_pcid_flush_mask
+ jmp .Lwrcr3_\@
+
+.Lnoflush_\@:
+ SET_NOFLUSH_BIT \save_reg
+
+.Lwrcr3_\@:
+ /*
+ * The CR3 write could be avoided when not changing its value,
+ * but would require a CR3 read *and* a scratch register.
+ */
+ movq \save_reg, %cr3
+.Lend_\@:
+.endm
+
+#else /* CONFIG_PAGE_TABLE_ISOLATION=n: */
+
+.macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
+.endm
+.macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req
+.endm
+.macro SWITCH_TO_USER_CR3_STACK scratch_reg:req
+.endm
+.macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req
+.endm
+.macro RESTORE_CR3 scratch_reg:req save_reg:req
+.endm
+
+#endif
+
#endif /* CONFIG_X86_64 */
/*
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 4838037f97f6..ace8f321a5a1 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -941,9 +941,10 @@ ENTRY(debug)
movl %esp, %eax # pt_regs pointer
/* Are we currently on the SYSENTER stack? */
- PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx)
- subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */
- cmpl $SIZEOF_SYSENTER_stack, %ecx
+ movl PER_CPU_VAR(cpu_entry_area), %ecx
+ addl $CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx
+ subl %eax, %ecx /* ecx = (end of entry_stack) - esp */
+ cmpl $SIZEOF_entry_stack, %ecx
jb .Ldebug_from_sysenter_stack
TRACE_IRQS_OFF
@@ -984,9 +985,10 @@ ENTRY(nmi)
movl %esp, %eax # pt_regs pointer
/* Are we currently on the SYSENTER stack? */
- PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx)
- subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */
- cmpl $SIZEOF_SYSENTER_stack, %ecx
+ movl PER_CPU_VAR(cpu_entry_area), %ecx
+ addl $CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx
+ subl %eax, %ecx /* ecx = (end of entry_stack) - esp */
+ cmpl $SIZEOF_entry_stack, %ecx
jb .Lnmi_from_sysenter_stack
/* Not on SYSENTER stack. */
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index f81d50d7ceac..f048e384ff54 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -23,7 +23,6 @@
#include <asm/segment.h>
#include <asm/cache.h>
#include <asm/errno.h>
-#include "calling.h"
#include <asm/asm-offsets.h>
#include <asm/msr.h>
#include <asm/unistd.h>
@@ -40,6 +39,8 @@
#include <asm/frame.h>
#include <linux/err.h>
+#include "calling.h"
+
.code64
.section .entry.text, "ax"
@@ -140,6 +141,67 @@ END(native_usergs_sysret64)
* with them due to bugs in both AMD and Intel CPUs.
*/
+ .pushsection .entry_trampoline, "ax"
+
+/*
+ * The code in here gets remapped into cpu_entry_area's trampoline. This means
+ * that the assembler and linker have the wrong idea as to where this code
+ * lives (and, in fact, it's mapped more than once, so it's not even at a
+ * fixed address). So we can't reference any symbols outside the entry
+ * trampoline and expect it to work.
+ *
+ * Instead, we carefully abuse %rip-relative addressing.
+ * _entry_trampoline(%rip) refers to the start of the remapped) entry
+ * trampoline. We can thus find cpu_entry_area with this macro:
+ */
+
+#define CPU_ENTRY_AREA \
+ _entry_trampoline - CPU_ENTRY_AREA_entry_trampoline(%rip)
+
+/* The top word of the SYSENTER stack is hot and is usable as scratch space. */
+#define RSP_SCRATCH CPU_ENTRY_AREA_entry_stack + \
+ SIZEOF_entry_stack - 8 + CPU_ENTRY_AREA
+
+ENTRY(entry_SYSCALL_64_trampoline)
+ UNWIND_HINT_EMPTY
+ swapgs
+
+ /* Stash the user RSP. */
+ movq %rsp, RSP_SCRATCH
+
+ /* Note: using %rsp as a scratch reg. */
+ SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp
+
+ /* Load the top of the task stack into RSP */
+ movq CPU_ENTRY_AREA_tss + TSS_sp1 + CPU_ENTRY_AREA, %rsp
+
+ /* Start building the simulated IRET frame. */
+ pushq $__USER_DS /* pt_regs->ss */
+ pushq RSP_SCRATCH /* pt_regs->sp */
+ pushq %r11 /* pt_regs->flags */
+ pushq $__USER_CS /* pt_regs->cs */
+ pushq %rcx /* pt_regs->ip */
+
+ /*
+ * x86 lacks a near absolute jump, and we can't jump to the real
+ * entry text with a relative jump. We could push the target
+ * address and then use retq, but this destroys the pipeline on
+ * many CPUs (wasting over 20 cycles on Sandy Bridge). Instead,
+ * spill RDI and restore it in a second-stage trampoline.
+ */
+ pushq %rdi
+ movq $entry_SYSCALL_64_stage2, %rdi
+ jmp *%rdi
+END(entry_SYSCALL_64_trampoline)
+
+ .popsection
+
+ENTRY(entry_SYSCALL_64_stage2)
+ UNWIND_HINT_EMPTY
+ popq %rdi
+ jmp entry_SYSCALL_64_after_hwframe
+END(entry_SYSCALL_64_stage2)
+
ENTRY(entry_SYSCALL_64)
UNWIND_HINT_EMPTY
/*
@@ -149,6 +211,10 @@ ENTRY(entry_SYSCALL_64)
*/
swapgs
+ /*
+ * This path is not taken when PAGE_TABLE_ISOLATION is disabled so it
+ * is not required to switch CR3.
+ */
movq %rsp, PER_CPU_VAR(rsp_scratch)
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
@@ -330,8 +396,25 @@ syscall_return_via_sysret:
popq %rsi /* skip rcx */
popq %rdx
popq %rsi
+
+ /*
+ * Now all regs are restored except RSP and RDI.
+ * Save old stack pointer and switch to trampoline stack.
+ */
+ movq %rsp, %rdi
+ movq PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp
+
+ pushq RSP-RDI(%rdi) /* RSP */
+ pushq (%rdi) /* RDI */
+
+ /*
+ * We are on the trampoline stack. All regs except RDI are live.
+ * We can do future final exit work right here.
+ */
+ SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
+
popq %rdi
- movq RSP-ORIG_RAX(%rsp), %rsp
+ popq %rsp
USERGS_SYSRET64
END(entry_SYSCALL_64)
@@ -466,12 +549,13 @@ END(irq_entries_start)
.macro DEBUG_ENTRY_ASSERT_IRQS_OFF
#ifdef CONFIG_DEBUG_ENTRY
- pushfq
- testl $X86_EFLAGS_IF, (%rsp)
+ pushq %rax
+ SAVE_FLAGS(CLBR_RAX)
+ testl $X86_EFLAGS_IF, %eax
jz .Lokay_\@
ud2
.Lokay_\@:
- addq $8, %rsp
+ popq %rax
#endif
.endm
@@ -563,6 +647,13 @@ END(irq_entries_start)
/* 0(%rsp): ~(interrupt number) */
.macro interrupt func
cld
+
+ testb $3, CS-ORIG_RAX(%rsp)
+ jz 1f
+ SWAPGS
+ call switch_to_thread_stack
+1:
+
ALLOC_PT_GPREGS_ON_STACK
SAVE_C_REGS
SAVE_EXTRA_REGS
@@ -572,12 +663,8 @@ END(irq_entries_start)
jz 1f
/*
- * IRQ from user mode. Switch to kernel gsbase and inform context
- * tracking that we're in kernel mode.
- */
- SWAPGS
-
- /*
+ * IRQ from user mode.
+ *
* We need to tell lockdep that IRQs are off. We can't do this until
* we fix gsbase, and we should do it before enter_from_user_mode
* (which can take locks). Since TRACE_IRQS_OFF idempotent,
@@ -630,10 +717,43 @@ GLOBAL(swapgs_restore_regs_and_return_to_usermode)
ud2
1:
#endif
- SWAPGS
POP_EXTRA_REGS
- POP_C_REGS
- addq $8, %rsp /* skip regs->orig_ax */
+ popq %r11
+ popq %r10
+ popq %r9
+ popq %r8
+ popq %rax
+ popq %rcx
+ popq %rdx
+ popq %rsi
+
+ /*
+ * The stack is now user RDI, orig_ax, RIP, CS, EFLAGS, RSP, SS.
+ * Save old stack pointer and switch to trampoline stack.
+ */
+ movq %rsp, %rdi
+ movq PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp
+
+ /* Copy the IRET frame to the trampoline stack. */
+ pushq 6*8(%rdi) /* SS */
+ pushq 5*8(%rdi) /* RSP */
+ pushq 4*8(%rdi) /* EFLAGS */
+ pushq 3*8(%rdi) /* CS */
+ pushq 2*8(%rdi) /* RIP */
+
+ /* Push user RDI on the trampoline stack. */
+ pushq (%rdi)
+
+ /*
+ * We are on the trampoline stack. All regs except RDI are live.
+ * We can do future final exit work right here.
+ */
+
+ SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
+
+ /* Restore RDI. */
+ popq %rdi
+ SWAPGS
INTERRUPT_RETURN
@@ -713,7 +833,9 @@ native_irq_return_ldt:
*/
pushq %rdi /* Stash user RDI */
- SWAPGS
+ SWAPGS /* to kernel GS */
+ SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi /* to kernel CR3 */
+
movq PER_CPU_VAR(espfix_waddr), %rdi
movq %rax, (0*8)(%rdi) /* user RAX */
movq (1*8)(%rsp), %rax /* user RIP */
@@ -729,7 +851,6 @@ native_irq_return_ldt:
/* Now RAX == RSP. */
andl $0xffff0000, %eax /* RAX = (RSP & 0xffff0000) */
- popq %rdi /* Restore user RDI */
/*
* espfix_stack[31:16] == 0. The page tables are set up such that
@@ -740,7 +861,11 @@ native_irq_return_ldt:
* still points to an RO alias of the ESPFIX stack.
*/
orq PER_CPU_VAR(espfix_stack), %rax
- SWAPGS
+
+ SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
+ SWAPGS /* to user GS */
+ popq %rdi /* Restore user RDI */
+
movq %rax, %rsp
UNWIND_HINT_IRET_REGS offset=8
@@ -829,7 +954,35 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt
/*
* Exception entry points.
*/
-#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss) + (TSS_ist + ((x) - 1) * 8)
+#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8)
+
+/*
+ * Switch to the thread stack. This is called with the IRET frame and
+ * orig_ax on the stack. (That is, RDI..R12 are not on the stack and
+ * space has not been allocated for them.)
+ */
+ENTRY(switch_to_thread_stack)
+ UNWIND_HINT_FUNC
+
+ pushq %rdi
+ /* Need to switch before accessing the thread stack. */
+ SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
+ movq %rsp, %rdi
+ movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+ UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI
+
+ pushq 7*8(%rdi) /* regs->ss */
+ pushq 6*8(%rdi) /* regs->rsp */
+ pushq 5*8(%rdi) /* regs->eflags */
+ pushq 4*8(%rdi) /* regs->cs */
+ pushq 3*8(%rdi) /* regs->ip */
+ pushq 2*8(%rdi) /* regs->orig_ax */
+ pushq 8(%rdi) /* return address */
+ UNWIND_HINT_FUNC
+
+ movq (%rdi), %rdi
+ ret
+END(switch_to_thread_stack)
.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
ENTRY(\sym)
@@ -848,11 +1001,12 @@ ENTRY(\sym)
ALLOC_PT_GPREGS_ON_STACK
- .if \paranoid
- .if \paranoid == 1
+ .if \paranoid < 2
testb $3, CS(%rsp) /* If coming from userspace, switch stacks */
- jnz 1f
+ jnz .Lfrom_usermode_switch_stack_\@
.endif
+
+ .if \paranoid
call paranoid_entry
.else
call error_entry
@@ -894,20 +1048,15 @@ ENTRY(\sym)
jmp error_exit
.endif
- .if \paranoid == 1
+ .if \paranoid < 2
/*
- * Paranoid entry from userspace. Switch stacks and treat it
+ * Entry from userspace. Switch stacks and treat it
* as a normal entry. This means that paranoid handlers
* run in real process context if user_mode(regs).
*/
-1:
+.Lfrom_usermode_switch_stack_\@:
call error_entry
-
- movq %rsp, %rdi /* pt_regs pointer */
- call sync_regs
- movq %rax, %rsp /* switch stack */
-
movq %rsp, %rdi /* pt_regs pointer */
.if \has_error_code
@@ -1119,7 +1268,11 @@ ENTRY(paranoid_entry)
js 1f /* negative -> in kernel */
SWAPGS
xorl %ebx, %ebx
-1: ret
+
+1:
+ SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14
+
+ ret
END(paranoid_entry)
/*
@@ -1141,6 +1294,7 @@ ENTRY(paranoid_exit)
testl %ebx, %ebx /* swapgs needed? */
jnz .Lparanoid_exit_no_swapgs
TRACE_IRQS_IRETQ
+ RESTORE_CR3 scratch_reg=%rbx save_reg=%r14
SWAPGS_UNSAFE_STACK
jmp .Lparanoid_exit_restore
.Lparanoid_exit_no_swapgs:
@@ -1168,8 +1322,18 @@ ENTRY(error_entry)
* from user mode due to an IRET fault.
*/
SWAPGS
+ /* We have user CR3. Change to kernel CR3. */
+ SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
.Lerror_entry_from_usermode_after_swapgs:
+ /* Put us onto the real thread stack. */
+ popq %r12 /* save return addr in %12 */
+ movq %rsp, %rdi /* arg0 = pt_regs pointer */
+ call sync_regs
+ movq %rax, %rsp /* switch stack */
+ ENCODE_FRAME_POINTER
+ pushq %r12
+
/*
* We need to tell lockdep that IRQs are off. We can't do this until
* we fix gsbase, and we should do it before enter_from_user_mode
@@ -1206,6 +1370,7 @@ ENTRY(error_entry)
* .Lgs_change's error handler with kernel gsbase.
*/
SWAPGS
+ SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
jmp .Lerror_entry_done
.Lbstep_iret:
@@ -1215,10 +1380,11 @@ ENTRY(error_entry)
.Lerror_bad_iret:
/*
- * We came from an IRET to user mode, so we have user gsbase.
- * Switch to kernel gsbase:
+ * We came from an IRET to user mode, so we have user
+ * gsbase and CR3. Switch to kernel gsbase and CR3:
*/
SWAPGS
+ SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
/*
* Pretend that the exception came from user mode: set up pt_regs
@@ -1250,6 +1416,10 @@ END(error_exit)
/*
* Runs on exception stack. Xen PV does not go through this path at all,
* so we can use real assembly here.
+ *
+ * Registers:
+ * %r14: Used to save/restore the CR3 of the interrupted context
+ * when PAGE_TABLE_ISOLATION is in use. Do not clobber.
*/
ENTRY(nmi)
UNWIND_HINT_IRET_REGS
@@ -1313,6 +1483,7 @@ ENTRY(nmi)
swapgs
cld
+ SWITCH_TO_KERNEL_CR3 scratch_reg=%rdx
movq %rsp, %rdx
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
UNWIND_HINT_IRET_REGS base=%rdx offset=8
@@ -1565,6 +1736,8 @@ end_repeat_nmi:
movq $-1, %rsi
call do_nmi
+ RESTORE_CR3 scratch_reg=%r15 save_reg=%r14
+
testl %ebx, %ebx /* swapgs needed? */
jnz nmi_restore
nmi_swapgs:
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index 568e130d932c..40f17009ec20 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -48,7 +48,11 @@
*/
ENTRY(entry_SYSENTER_compat)
/* Interrupts are off on entry. */
- SWAPGS_UNSAFE_STACK
+ SWAPGS
+
+ /* We are about to clobber %rsp anyway, clobbering here is OK */
+ SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp
+
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
/*
@@ -216,6 +220,12 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe)
pushq $0 /* pt_regs->r15 = 0 */
/*
+ * We just saved %rdi so it is safe to clobber. It is not
+ * preserved during the C calls inside TRACE_IRQS_OFF anyway.
+ */
+ SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
+
+ /*
* User mode is traced as though IRQs are on, and SYSENTER
* turned them off.
*/
@@ -256,10 +266,22 @@ sysret32_from_system_call:
* when the system call started, which is already known to user
* code. We zero R8-R10 to avoid info leaks.
*/
+ movq RSP-ORIG_RAX(%rsp), %rsp
+
+ /*
+ * The original userspace %rsp (RSP-ORIG_RAX(%rsp)) is stored
+ * on the process stack which is not mapped to userspace and
+ * not readable after we SWITCH_TO_USER_CR3. Delay the CR3
+ * switch until after after the last reference to the process
+ * stack.
+ *
+ * %r8/%r9 are zeroed before the sysret, thus safe to clobber.
+ */
+ SWITCH_TO_USER_CR3_NOSTACK scratch_reg=%r8 scratch_reg2=%r9
+
xorq %r8, %r8
xorq %r9, %r9
xorq %r10, %r10
- movq RSP-ORIG_RAX(%rsp), %rsp
swapgs
sysretl
END(entry_SYSCALL_compat)
@@ -306,8 +328,11 @@ ENTRY(entry_INT80_compat)
*/
movl %eax, %eax
- /* Construct struct pt_regs on stack (iret frame is already on stack) */
pushq %rax /* pt_regs->orig_ax */
+
+ /* switch to thread stack expects orig_ax to be pushed */
+ call switch_to_thread_stack
+
pushq %rdi /* pt_regs->di */
pushq %rsi /* pt_regs->si */
pushq %rdx /* pt_regs->dx */
diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c
index 11b13c4b43d5..f19856d95c60 100644
--- a/arch/x86/entry/vdso/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vclock_gettime.c
@@ -324,5 +324,5 @@ notrace time_t __vdso_time(time_t *t)
*t = result;
return result;
}
-int time(time_t *t)
+time_t time(time_t *t)
__attribute__((weak, alias("__vdso_time")));
diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c
index f279ba2643dc..577fa8adb785 100644
--- a/arch/x86/entry/vsyscall/vsyscall_64.c
+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
@@ -37,6 +37,7 @@
#include <asm/unistd.h>
#include <asm/fixmap.h>
#include <asm/traps.h>
+#include <asm/paravirt.h>
#define CREATE_TRACE_POINTS
#include "vsyscall_trace.h"
@@ -138,6 +139,10 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
WARN_ON_ONCE(address != regs->ip);
+ /* This should be unreachable in NATIVE mode. */
+ if (WARN_ON(vsyscall_mode == NATIVE))
+ return false;
+
if (vsyscall_mode == NONE) {
warn_bad_vsyscall(KERN_INFO, regs,
"vsyscall attempted with vsyscall=none");
@@ -329,16 +334,47 @@ int in_gate_area_no_mm(unsigned long addr)
return vsyscall_mode != NONE && (addr & PAGE_MASK) == VSYSCALL_ADDR;
}
+/*
+ * The VSYSCALL page is the only user-accessible page in the kernel address
+ * range. Normally, the kernel page tables can have _PAGE_USER clear, but
+ * the tables covering VSYSCALL_ADDR need _PAGE_USER set if vsyscalls
+ * are enabled.
+ *
+ * Some day we may create a "minimal" vsyscall mode in which we emulate
+ * vsyscalls but leave the page not present. If so, we skip calling
+ * this.
+ */
+void __init set_vsyscall_pgtable_user_bits(pgd_t *root)
+{
+ pgd_t *pgd;
+ p4d_t *p4d;
+ pud_t *pud;
+ pmd_t *pmd;
+
+ pgd = pgd_offset_pgd(root, VSYSCALL_ADDR);
+ set_pgd(pgd, __pgd(pgd_val(*pgd) | _PAGE_USER));
+ p4d = p4d_offset(pgd, VSYSCALL_ADDR);
+#if CONFIG_PGTABLE_LEVELS >= 5
+ p4d->p4d |= _PAGE_USER;
+#endif
+ pud = pud_offset(p4d, VSYSCALL_ADDR);
+ set_pud(pud, __pud(pud_val(*pud) | _PAGE_USER));
+ pmd = pmd_offset(pud, VSYSCALL_ADDR);
+ set_pmd(pmd, __pmd(pmd_val(*pmd) | _PAGE_USER));
+}
+
void __init map_vsyscall(void)
{
extern char __vsyscall_page;
unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page);
- if (vsyscall_mode != NONE)
+ if (vsyscall_mode != NONE) {
__set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall,
vsyscall_mode == NATIVE
? PAGE_KERNEL_VSYSCALL
: PAGE_KERNEL_VVAR);
+ set_vsyscall_pgtable_user_bits(swapper_pg_dir);
+ }
BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) !=
(unsigned long)VSYSCALL_ADDR);
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 09c26a4f139c..731153a4681e 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3847,6 +3847,8 @@ static struct attribute *intel_pmu_attrs[] = {
__init int intel_pmu_init(void)
{
+ struct attribute **extra_attr = NULL;
+ struct attribute **to_free = NULL;
union cpuid10_edx edx;
union cpuid10_eax eax;
union cpuid10_ebx ebx;
@@ -3854,7 +3856,6 @@ __init int intel_pmu_init(void)
unsigned int unused;
struct extra_reg *er;
int version, i;
- struct attribute **extra_attr = NULL;
char *name;
if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
@@ -4294,6 +4295,7 @@ __init int intel_pmu_init(void)
extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
hsw_format_attr : nhm_format_attr;
extra_attr = merge_attr(extra_attr, skl_format_attr);
+ to_free = extra_attr;
x86_pmu.cpu_events = get_hsw_events_attrs();
intel_pmu_pebs_data_source_skl(
boot_cpu_data.x86_model == INTEL_FAM6_SKYLAKE_X);
@@ -4401,6 +4403,7 @@ __init int intel_pmu_init(void)
pr_cont("full-width counters, ");
}
+ kfree(to_free);
return 0;
}
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 3674a4b6f8bd..8f0aace08b87 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -3,16 +3,18 @@
#include <linux/types.h>
#include <linux/slab.h>
+#include <asm/cpu_entry_area.h>
#include <asm/perf_event.h>
#include <asm/insn.h>
#include "../perf_event.h"
+/* Waste a full page so it can be mapped into the cpu_entry_area */
+DEFINE_PER_CPU_PAGE_ALIGNED(struct debug_store, cpu_debug_store);
+
/* The size of a BTS record in bytes: */
#define BTS_RECORD_SIZE 24
-#define BTS_BUFFER_SIZE (PAGE_SIZE << 4)
-#define PEBS_BUFFER_SIZE (PAGE_SIZE << 4)
#define PEBS_FIXUP_SIZE PAGE_SIZE
/*
@@ -279,17 +281,52 @@ void fini_debug_store_on_cpu(int cpu)
static DEFINE_PER_CPU(void *, insn_buffer);
-static int alloc_pebs_buffer(int cpu)
+static void ds_update_cea(void *cea, void *addr, size_t size, pgprot_t prot)
{
- struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+ phys_addr_t pa;
+ size_t msz = 0;
+
+ pa = virt_to_phys(addr);
+ for (; msz < size; msz += PAGE_SIZE, pa += PAGE_SIZE, cea += PAGE_SIZE)
+ cea_set_pte(cea, pa, prot);
+}
+
+static void ds_clear_cea(void *cea, size_t size)
+{
+ size_t msz = 0;
+
+ for (; msz < size; msz += PAGE_SIZE, cea += PAGE_SIZE)
+ cea_set_pte(cea, 0, PAGE_NONE);
+}
+
+static void *dsalloc_pages(size_t size, gfp_t flags, int cpu)
+{
+ unsigned int order = get_order(size);
int node = cpu_to_node(cpu);
- int max;
- void *buffer, *ibuffer;
+ struct page *page;
+
+ page = __alloc_pages_node(node, flags | __GFP_ZERO, order);
+ return page ? page_address(page) : NULL;
+}
+
+static void dsfree_pages(const void *buffer, size_t size)
+{
+ if (buffer)
+ free_pages((unsigned long)buffer, get_order(size));
+}
+
+static int alloc_pebs_buffer(int cpu)
+{
+ struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
+ struct debug_store *ds = hwev->ds;
+ size_t bsiz = x86_pmu.pebs_buffer_size;
+ int max, node = cpu_to_node(cpu);
+ void *buffer, *ibuffer, *cea;
if (!x86_pmu.pebs)
return 0;
- buffer = kzalloc_node(x86_pmu.pebs_buffer_size, GFP_KERNEL, node);
+ buffer = dsalloc_pages(bsiz, GFP_KERNEL, cpu);
if (unlikely(!buffer))
return -ENOMEM;
@@ -300,25 +337,27 @@ static int alloc_pebs_buffer(int cpu)
if (x86_pmu.intel_cap.pebs_format < 2) {
ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node);
if (!ibuffer) {
- kfree(buffer);
+ dsfree_pages(buffer, bsiz);
return -ENOMEM;
}
per_cpu(insn_buffer, cpu) = ibuffer;
}
-
- max = x86_pmu.pebs_buffer_size / x86_pmu.pebs_record_size;
-
- ds->pebs_buffer_base = (u64)(unsigned long)buffer;
+ hwev->ds_pebs_vaddr = buffer;
+ /* Update the cpu entry area mapping */
+ cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
+ ds->pebs_buffer_base = (unsigned long) cea;
+ ds_update_cea(cea, buffer, bsiz, PAGE_KERNEL);
ds->pebs_index = ds->pebs_buffer_base;
- ds->pebs_absolute_maximum = ds->pebs_buffer_base +
- max * x86_pmu.pebs_record_size;
-
+ max = x86_pmu.pebs_record_size * (bsiz / x86_pmu.pebs_record_size);
+ ds->pebs_absolute_maximum = ds->pebs_buffer_base + max;
return 0;
}
static void release_pebs_buffer(int cpu)
{
- struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+ struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
+ struct debug_store *ds = hwev->ds;
+ void *cea;
if (!ds || !x86_pmu.pebs)
return;
@@ -326,73 +365,70 @@ static void release_pebs_buffer(int cpu)
kfree(per_cpu(insn_buffer, cpu));
per_cpu(insn_buffer, cpu) = NULL;
- kfree((void *)(unsigned long)ds->pebs_buffer_base);
+ /* Clear the fixmap */
+ cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
+ ds_clear_cea(cea, x86_pmu.pebs_buffer_size);
ds->pebs_buffer_base = 0;
+ dsfree_pages(hwev->ds_pebs_vaddr, x86_pmu.pebs_buffer_size);
+ hwev->ds_pebs_vaddr = NULL;
}
static int alloc_bts_buffer(int cpu)
{
- struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
- int node = cpu_to_node(cpu);
- int max, thresh;
- void *buffer;
+ struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
+ struct debug_store *ds = hwev->ds;
+ void *buffer, *cea;
+ int max;
if (!x86_pmu.bts)
return 0;
- buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node);
+ buffer = dsalloc_pages(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, cpu);
if (unlikely(!buffer)) {
WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__);
return -ENOMEM;
}
-
- max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
- thresh = max / 16;
-
- ds->bts_buffer_base = (u64)(unsigned long)buffer;
+ hwev->ds_bts_vaddr = buffer;
+ /* Update the fixmap */
+ cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer;
+ ds->bts_buffer_base = (unsigned long) cea;
+ ds_update_cea(cea, buffer, BTS_BUFFER_SIZE, PAGE_KERNEL);
ds->bts_index = ds->bts_buffer_base;
- ds->bts_absolute_maximum = ds->bts_buffer_base +
- max * BTS_RECORD_SIZE;
- ds->bts_interrupt_threshold = ds->bts_absolute_maximum -
- thresh * BTS_RECORD_SIZE;
-
+ max = BTS_RECORD_SIZE * (BTS_BUFFER_SIZE / BTS_RECORD_SIZE);
+ ds->bts_absolute_maximum = ds->bts_buffer_base + max;
+ ds->bts_interrupt_threshold = ds->bts_absolute_maximum - (max / 16);
return 0;
}
static void release_bts_buffer(int cpu)
{
- struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+ struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
+ struct debug_store *ds = hwev->ds;
+ void *cea;
if (!ds || !x86_pmu.bts)
return;
- kfree((void *)(unsigned long)ds->bts_buffer_base);
+ /* Clear the fixmap */
+ cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer;
+ ds_clear_cea(cea, BTS_BUFFER_SIZE);
ds->bts_buffer_base = 0;
+ dsfree_pages(hwev->ds_bts_vaddr, BTS_BUFFER_SIZE);
+ hwev->ds_bts_vaddr = NULL;
}
static int alloc_ds_buffer(int cpu)
{
- int node = cpu_to_node(cpu);
- struct debug_store *ds;
-
- ds = kzalloc_node(sizeof(*ds), GFP_KERNEL, node);
- if (unlikely(!ds))
- return -ENOMEM;
+ struct debug_store *ds = &get_cpu_entry_area(cpu)->cpu_debug_store;
+ memset(ds, 0, sizeof(*ds));
per_cpu(cpu_hw_events, cpu).ds = ds;
-
return 0;
}
static void release_ds_buffer(int cpu)
{
- struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
-
- if (!ds)
- return;
-
per_cpu(cpu_hw_events, cpu).ds = NULL;
- kfree(ds);
}
void release_ds_buffers(void)
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index f7aaadf9331f..8e4ea143ed96 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -14,6 +14,8 @@
#include <linux/perf_event.h>
+#include <asm/intel_ds.h>
+
/* To enable MSR tracing please use the generic trace points. */
/*
@@ -77,8 +79,6 @@ struct amd_nb {
struct event_constraint event_constraints[X86_PMC_IDX_MAX];
};
-/* The maximal number of PEBS events: */
-#define MAX_PEBS_EVENTS 8
#define PEBS_COUNTER_MASK ((1ULL << MAX_PEBS_EVENTS) - 1)
/*
@@ -95,23 +95,6 @@ struct amd_nb {
PERF_SAMPLE_TRANSACTION | PERF_SAMPLE_PHYS_ADDR | \
PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER)
-/*
- * A debug store configuration.
- *
- * We only support architectures that use 64bit fields.
- */
-struct debug_store {
- u64 bts_buffer_base;
- u64 bts_index;
- u64 bts_absolute_maximum;
- u64 bts_interrupt_threshold;
- u64 pebs_buffer_base;
- u64 pebs_index;
- u64 pebs_absolute_maximum;
- u64 pebs_interrupt_threshold;
- u64 pebs_event_reset[MAX_PEBS_EVENTS];
-};
-
#define PEBS_REGS \
(PERF_REG_X86_AX | \
PERF_REG_X86_BX | \
@@ -216,6 +199,8 @@ struct cpu_hw_events {
* Intel DebugStore bits
*/
struct debug_store *ds;
+ void *ds_pebs_vaddr;
+ void *ds_bts_vaddr;
u64 pebs_enabled;
int n_pebs;
int n_large_pebs;
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 219faaec51df..386a6900e206 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -136,6 +136,7 @@
#endif
#ifndef __ASSEMBLY__
+#ifndef __BPF__
/*
* This output constraint should be used for any inline asm which has a "call"
* instruction. Otherwise the asm may be inserted before the frame pointer
@@ -145,5 +146,6 @@
register unsigned long current_stack_pointer asm(_ASM_SP);
#define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer)
#endif
+#endif
#endif /* _ASM_X86_ASM_H */
diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h
new file mode 100644
index 000000000000..4a7884b8dca5
--- /dev/null
+++ b/arch/x86/include/asm/cpu_entry_area.h
@@ -0,0 +1,81 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#ifndef _ASM_X86_CPU_ENTRY_AREA_H
+#define _ASM_X86_CPU_ENTRY_AREA_H
+
+#include <linux/percpu-defs.h>
+#include <asm/processor.h>
+#include <asm/intel_ds.h>
+
+/*
+ * cpu_entry_area is a percpu region that contains things needed by the CPU
+ * and early entry/exit code. Real types aren't used for all fields here
+ * to avoid circular header dependencies.
+ *
+ * Every field is a virtual alias of some other allocated backing store.
+ * There is no direct allocation of a struct cpu_entry_area.
+ */
+struct cpu_entry_area {
+ char gdt[PAGE_SIZE];
+
+ /*
+ * The GDT is just below entry_stack and thus serves (on x86_64) as
+ * a a read-only guard page.
+ */
+ struct entry_stack_page entry_stack_page;
+
+ /*
+ * On x86_64, the TSS is mapped RO. On x86_32, it's mapped RW because
+ * we need task switches to work, and task switches write to the TSS.
+ */
+ struct tss_struct tss;
+
+ char entry_trampoline[PAGE_SIZE];
+
+#ifdef CONFIG_X86_64
+ /*
+ * Exception stacks used for IST entries.
+ *
+ * In the future, this should have a separate slot for each stack
+ * with guard pages between them.
+ */
+ char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ];
+#endif
+#ifdef CONFIG_CPU_SUP_INTEL
+ /*
+ * Per CPU debug store for Intel performance monitoring. Wastes a
+ * full page at the moment.
+ */
+ struct debug_store cpu_debug_store;
+ /*
+ * The actual PEBS/BTS buffers must be mapped to user space
+ * Reserve enough fixmap PTEs.
+ */
+ struct debug_store_buffers cpu_debug_buffers;
+#endif
+};
+
+#define CPU_ENTRY_AREA_SIZE (sizeof(struct cpu_entry_area))
+#define CPU_ENTRY_AREA_TOT_SIZE (CPU_ENTRY_AREA_SIZE * NR_CPUS)
+
+DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
+
+extern void setup_cpu_entry_areas(void);
+extern void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags);
+
+#define CPU_ENTRY_AREA_RO_IDT CPU_ENTRY_AREA_BASE
+#define CPU_ENTRY_AREA_PER_CPU (CPU_ENTRY_AREA_RO_IDT + PAGE_SIZE)
+
+#define CPU_ENTRY_AREA_RO_IDT_VADDR ((void *)CPU_ENTRY_AREA_RO_IDT)
+
+#define CPU_ENTRY_AREA_MAP_SIZE \
+ (CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_TOT_SIZE - CPU_ENTRY_AREA_BASE)
+
+extern struct cpu_entry_area *get_cpu_entry_area(int cpu);
+
+static inline struct entry_stack *cpu_entry_stack(int cpu)
+{
+ return &get_cpu_entry_area(cpu)->entry_stack_page.stack;
+}
+
+#endif
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index bf6a76202a77..ea9a7dde62e5 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -135,6 +135,8 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
set_bit(bit, (unsigned long *)cpu_caps_set); \
} while (0)
+#define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit)
+
#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS)
/*
* Static testing of CPU features. Used the same as boot_cpu_has().
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index c0b0e9e8aa66..07cdd1715705 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -197,11 +197,12 @@
#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */
#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */
#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */
+#define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */
#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
#define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */
-
+#define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */
#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */
#define X86_FEATURE_AVX512_4VNNIW ( 7*32+16) /* AVX-512 Neural Network Instructions */
@@ -266,6 +267,7 @@
/* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */
#define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */
#define X86_FEATURE_IRPERF (13*32+ 1) /* Instructions Retired Count */
+#define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* Always save/restore FP error pointers */
/* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */
@@ -339,5 +341,6 @@
#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */
#define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */
#define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */
+#define X86_BUG_CPU_INSECURE X86_BUG(14) /* CPU is insecure and needs kernel page table isolation */
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index 4011cb03ef08..13c5ee878a47 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -7,6 +7,7 @@
#include <asm/mmu.h>
#include <asm/fixmap.h>
#include <asm/irq_vectors.h>
+#include <asm/cpu_entry_area.h>
#include <linux/smp.h>
#include <linux/percpu.h>
@@ -20,6 +21,8 @@ static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *in
desc->type = (info->read_exec_only ^ 1) << 1;
desc->type |= info->contents << 2;
+ /* Set the ACCESS bit so it can be mapped RO */
+ desc->type |= 1;
desc->s = 1;
desc->dpl = 0x3;
@@ -60,17 +63,10 @@ static inline struct desc_struct *get_current_gdt_rw(void)
return this_cpu_ptr(&gdt_page)->gdt;
}
-/* Get the fixmap index for a specific processor */
-static inline unsigned int get_cpu_gdt_ro_index(int cpu)
-{
- return FIX_GDT_REMAP_BEGIN + cpu;
-}
-
/* Provide the fixmap address of the remapped GDT */
static inline struct desc_struct *get_cpu_gdt_ro(int cpu)
{
- unsigned int idx = get_cpu_gdt_ro_index(cpu);
- return (struct desc_struct *)__fix_to_virt(idx);
+ return (struct desc_struct *)&get_cpu_entry_area(cpu)->gdt;
}
/* Provide the current read-only GDT */
@@ -185,7 +181,7 @@ static inline void set_tssldt_descriptor(void *d, unsigned long addr,
#endif
}
-static inline void __set_tss_desc(unsigned cpu, unsigned int entry, void *addr)
+static inline void __set_tss_desc(unsigned cpu, unsigned int entry, struct x86_hw_tss *addr)
{
struct desc_struct *d = get_cpu_gdt_rw(cpu);
tss_desc tss;
diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
index 14d6d5007314..b027633e7300 100644
--- a/arch/x86/include/asm/disabled-features.h
+++ b/arch/x86/include/asm/disabled-features.h
@@ -50,6 +50,12 @@
# define DISABLE_LA57 (1<<(X86_FEATURE_LA57 & 31))
#endif
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+# define DISABLE_PTI 0
+#else
+# define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31))
+#endif
+
/*
* Make sure to add features to the correct mask
*/
@@ -60,7 +66,7 @@
#define DISABLED_MASK4 (DISABLE_PCID)
#define DISABLED_MASK5 0
#define DISABLED_MASK6 0
-#define DISABLED_MASK7 0
+#define DISABLED_MASK7 (DISABLE_PTI)
#define DISABLED_MASK8 0
#define DISABLED_MASK9 (DISABLE_MPX)
#define DISABLED_MASK10 0
diff --git a/arch/x86/include/asm/espfix.h b/arch/x86/include/asm/espfix.h
index 0211029076ea..6777480d8a42 100644
--- a/arch/x86/include/asm/espfix.h
+++ b/arch/x86/include/asm/espfix.h
@@ -2,7 +2,7 @@
#ifndef _ASM_X86_ESPFIX_H
#define _ASM_X86_ESPFIX_H
-#ifdef CONFIG_X86_64
+#ifdef CONFIG_X86_ESPFIX64
#include <asm/percpu.h>
@@ -11,7 +11,8 @@ DECLARE_PER_CPU_READ_MOSTLY(unsigned long, espfix_waddr);
extern void init_espfix_bsp(void);
extern void init_espfix_ap(int cpu);
-
-#endif /* CONFIG_X86_64 */
+#else
+static inline void init_espfix_ap(int cpu) { }
+#endif
#endif /* _ASM_X86_ESPFIX_H */
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index b0c505fe9a95..64c4a30e0d39 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -44,7 +44,6 @@ extern unsigned long __FIXADDR_TOP;
PAGE_SIZE)
#endif
-
/*
* Here we define all the compile-time 'special' virtual
* addresses. The point is to have a constant address at
@@ -84,7 +83,6 @@ enum fixed_addresses {
FIX_IO_APIC_BASE_0,
FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1,
#endif
- FIX_RO_IDT, /* Virtual mapping for read-only IDT */
#ifdef CONFIG_X86_32
FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */
FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
@@ -100,9 +98,6 @@ enum fixed_addresses {
#ifdef CONFIG_X86_INTEL_MID
FIX_LNW_VRTC,
#endif
- /* Fixmap entries to remap the GDTs, one per processor. */
- FIX_GDT_REMAP_BEGIN,
- FIX_GDT_REMAP_END = FIX_GDT_REMAP_BEGIN + NR_CPUS - 1,
#ifdef CONFIG_ACPI_APEI_GHES
/* Used for GHES mapping from assorted contexts */
@@ -143,7 +138,7 @@ enum fixed_addresses {
extern void reserve_top_address(unsigned long reserve);
#define FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT)
-#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)
+#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)
extern int fixmaps_set;
diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h
index 1b0a5abcd8ae..96aa6b9884dc 100644
--- a/arch/x86/include/asm/hypervisor.h
+++ b/arch/x86/include/asm/hypervisor.h
@@ -20,16 +20,7 @@
#ifndef _ASM_X86_HYPERVISOR_H
#define _ASM_X86_HYPERVISOR_H
-#ifdef CONFIG_HYPERVISOR_GUEST
-
-#include <asm/kvm_para.h>
-#include <asm/x86_init.h>
-#include <asm/xen/hypervisor.h>
-
-/*
- * x86 hypervisor information
- */
-
+/* x86 hypervisor types */
enum x86_hypervisor_type {
X86_HYPER_NATIVE = 0,
X86_HYPER_VMWARE,
@@ -39,6 +30,12 @@ enum x86_hypervisor_type {
X86_HYPER_KVM,
};
+#ifdef CONFIG_HYPERVISOR_GUEST
+
+#include <asm/kvm_para.h>
+#include <asm/x86_init.h>
+#include <asm/xen/hypervisor.h>
+
struct hypervisor_x86 {
/* Hypervisor name */
const char *name;
@@ -58,7 +55,15 @@ struct hypervisor_x86 {
extern enum x86_hypervisor_type x86_hyper_type;
extern void init_hypervisor_platform(void);
+static inline bool hypervisor_is_type(enum x86_hypervisor_type type)
+{
+ return x86_hyper_type == type;
+}
#else
static inline void init_hypervisor_platform(void) { }
+static inline bool hypervisor_is_type(enum x86_hypervisor_type type)
+{
+ return type == X86_HYPER_NATIVE;
+}
#endif /* CONFIG_HYPERVISOR_GUEST */
#endif /* _ASM_X86_HYPERVISOR_H */
diff --git a/arch/x86/include/asm/intel_ds.h b/arch/x86/include/asm/intel_ds.h
new file mode 100644
index 000000000000..62a9f4966b42
--- /dev/null
+++ b/arch/x86/include/asm/intel_ds.h
@@ -0,0 +1,36 @@
+#ifndef _ASM_INTEL_DS_H
+#define _ASM_INTEL_DS_H
+
+#include <linux/percpu-defs.h>
+
+#define BTS_BUFFER_SIZE (PAGE_SIZE << 4)
+#define PEBS_BUFFER_SIZE (PAGE_SIZE << 4)
+
+/* The maximal number of PEBS events: */
+#define MAX_PEBS_EVENTS 8
+
+/*
+ * A debug store configuration.
+ *
+ * We only support architectures that use 64bit fields.
+ */
+struct debug_store {
+ u64 bts_buffer_base;
+ u64 bts_index;
+ u64 bts_absolute_maximum;
+ u64 bts_interrupt_threshold;
+ u64 pebs_buffer_base;
+ u64 pebs_index;
+ u64 pebs_absolute_maximum;
+ u64 pebs_interrupt_threshold;
+ u64 pebs_event_reset[MAX_PEBS_EVENTS];
+} __aligned(PAGE_SIZE);
+
+DECLARE_PER_CPU_PAGE_ALIGNED(struct debug_store, cpu_debug_store);
+
+struct debug_store_buffers {
+ char bts_buffer[BTS_BUFFER_SIZE];
+ char pebs_buffer[PEBS_BUFFER_SIZE];
+};
+
+#endif
diff --git a/arch/x86/include/asm/invpcid.h b/arch/x86/include/asm/invpcid.h
new file mode 100644
index 000000000000..989cfa86de85
--- /dev/null
+++ b/arch/x86/include/asm/invpcid.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_INVPCID
+#define _ASM_X86_INVPCID
+
+static inline void __invpcid(unsigned long pcid, unsigned long addr,
+ unsigned long type)
+{
+ struct { u64 d[2]; } desc = { { pcid, addr } };
+
+ /*
+ * The memory clobber is because the whole point is to invalidate
+ * stale TLB entries and, especially if we're flushing global
+ * mappings, we don't want the compiler to reorder any subsequent
+ * memory accesses before the TLB flush.
+ *
+ * The hex opcode is invpcid (%ecx), %eax in 32-bit mode and
+ * invpcid (%rcx), %rax in long mode.
+ */
+ asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01"
+ : : "m" (desc), "a" (type), "c" (&desc) : "memory");
+}
+
+#define INVPCID_TYPE_INDIV_ADDR 0
+#define INVPCID_TYPE_SINGLE_CTXT 1
+#define INVPCID_TYPE_ALL_INCL_GLOBAL 2
+#define INVPCID_TYPE_ALL_NON_GLOBAL 3
+
+/* Flush all mappings for a given pcid and addr, not including globals. */
+static inline void invpcid_flush_one(unsigned long pcid,
+ unsigned long addr)
+{
+ __invpcid(pcid, addr, INVPCID_TYPE_INDIV_ADDR);
+}
+
+/* Flush all mappings for a given PCID, not including globals. */
+static inline void invpcid_flush_single_context(unsigned long pcid)
+{
+ __invpcid(pcid, 0, INVPCID_TYPE_SINGLE_CTXT);
+}
+
+/* Flush all mappings, including globals, for all PCIDs. */
+static inline void invpcid_flush_all(void)
+{
+ __invpcid(0, 0, INVPCID_TYPE_ALL_INCL_GLOBAL);
+}
+
+/* Flush all mappings for all PCIDs except globals. */
+static inline void invpcid_flush_all_nonglobals(void)
+{
+ __invpcid(0, 0, INVPCID_TYPE_ALL_NON_GLOBAL);
+}
+
+#endif /* _ASM_X86_INVPCID */
diff --git a/arch/x86/include/asm/irqdomain.h b/arch/x86/include/asm/irqdomain.h
index 139feef467f7..c066ffae222b 100644
--- a/arch/x86/include/asm/irqdomain.h
+++ b/arch/x86/include/asm/irqdomain.h
@@ -44,7 +44,7 @@ extern int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq,
extern void mp_irqdomain_free(struct irq_domain *domain, unsigned int virq,
unsigned int nr_irqs);
extern int mp_irqdomain_activate(struct irq_domain *domain,
- struct irq_data *irq_data, bool early);
+ struct irq_data *irq_data, bool reserve);
extern void mp_irqdomain_deactivate(struct irq_domain *domain,
struct irq_data *irq_data);
extern int mp_irqdomain_ioapic_idx(struct irq_domain *domain);
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index c8ef23f2c28f..89f08955fff7 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -142,6 +142,9 @@ static inline notrace unsigned long arch_local_irq_save(void)
swapgs; \
sysretl
+#ifdef CONFIG_DEBUG_ENTRY
+#define SAVE_FLAGS(x) pushfq; popq %rax
+#endif
#else
#define INTERRUPT_RETURN iret
#define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit
diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h
index f86a8caa561e..395c9631e000 100644
--- a/arch/x86/include/asm/kdebug.h
+++ b/arch/x86/include/asm/kdebug.h
@@ -26,6 +26,7 @@ extern void die(const char *, struct pt_regs *,long);
extern int __must_check __die(const char *, struct pt_regs *, long);
extern void show_stack_regs(struct pt_regs *regs);
extern void __show_regs(struct pt_regs *regs, int all);
+extern void show_iret_regs(struct pt_regs *regs);
extern unsigned long oops_begin(void);
extern void oops_end(unsigned long, struct pt_regs *, int signr);
diff --git a/arch/x86/include/asm/kmemcheck.h b/arch/x86/include/asm/kmemcheck.h
deleted file mode 100644
index ea32a7d3cf1b..000000000000
--- a/arch/x86/include/asm/kmemcheck.h
+++ /dev/null
@@ -1 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index 034caa1a084e..b24b1c8b3979 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -214,8 +214,6 @@ struct x86_emulate_ops {
void (*halt)(struct x86_emulate_ctxt *ctxt);
void (*wbinvd)(struct x86_emulate_ctxt *ctxt);
int (*fix_hypercall)(struct x86_emulate_ctxt *ctxt);
- void (*get_fpu)(struct x86_emulate_ctxt *ctxt); /* disables preempt */
- void (*put_fpu)(struct x86_emulate_ctxt *ctxt); /* reenables preempt */
int (*intercept)(struct x86_emulate_ctxt *ctxt,
struct x86_instruction_info *info,
enum x86_intercept_stage stage);
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 1bfb99770c34..516798431328 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -536,7 +536,20 @@ struct kvm_vcpu_arch {
struct kvm_mmu_memory_cache mmu_page_cache;
struct kvm_mmu_memory_cache mmu_page_header_cache;
+ /*
+ * QEMU userspace and the guest each have their own FPU state.
+ * In vcpu_run, we switch between the user and guest FPU contexts.
+ * While running a VCPU, the VCPU thread will have the guest FPU
+ * context.
+ *
+ * Note that while the PKRU state lives inside the fpu registers,
+ * it is switched out separately at VMENTER and VMEXIT time. The
+ * "guest_fpu" state here contains the guest FPU context, with the
+ * host PRKU bits.
+ */
+ struct fpu user_fpu;
struct fpu guest_fpu;
+
u64 xcr0;
u64 guest_supported_xcr0;
u32 guest_xstate_size;
@@ -1161,7 +1174,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2,
static inline int emulate_instruction(struct kvm_vcpu *vcpu,
int emulation_type)
{
- return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0);
+ return x86_emulate_instruction(vcpu, 0,
+ emulation_type | EMULTYPE_NO_REEXECUTE, NULL, 0);
}
void kvm_enable_efer_bits(u64);
@@ -1434,4 +1448,7 @@ static inline int kvm_cpu_get_apicid(int mps_cpu)
#define put_smstate(type, buf, offset, val) \
*(type *)((buf) + (offset) - 0x7e00) = val
+void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
+ unsigned long start, unsigned long end);
+
#endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
index 9ea26f167497..5ff3e8af2c20 100644
--- a/arch/x86/include/asm/mmu.h
+++ b/arch/x86/include/asm/mmu.h
@@ -3,6 +3,7 @@
#define _ASM_X86_MMU_H
#include <linux/spinlock.h>
+#include <linux/rwsem.h>
#include <linux/mutex.h>
#include <linux/atomic.h>
@@ -27,7 +28,8 @@ typedef struct {
atomic64_t tlb_gen;
#ifdef CONFIG_MODIFY_LDT_SYSCALL
- struct ldt_struct *ldt;
+ struct rw_semaphore ldt_usr_sem;
+ struct ldt_struct *ldt;
#endif
#ifdef CONFIG_X86_64
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 6d16d15d09a0..c931b88982a0 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -50,22 +50,53 @@ struct ldt_struct {
* call gates. On native, we could merge the ldt_struct and LDT
* allocations, but it's not worth trying to optimize.
*/
- struct desc_struct *entries;
- unsigned int nr_entries;
+ struct desc_struct *entries;
+ unsigned int nr_entries;
+
+ /*
+ * If PTI is in use, then the entries array is not mapped while we're
+ * in user mode. The whole array will be aliased at the addressed
+ * given by ldt_slot_va(slot). We use two slots so that we can allocate
+ * and map, and enable a new LDT without invalidating the mapping
+ * of an older, still-in-use LDT.
+ *
+ * slot will be -1 if this LDT doesn't have an alias mapping.
+ */
+ int slot;
};
+/* This is a multiple of PAGE_SIZE. */
+#define LDT_SLOT_STRIDE (LDT_ENTRIES * LDT_ENTRY_SIZE)
+
+static inline void *ldt_slot_va(int slot)
+{
+#ifdef CONFIG_X86_64
+ return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot);
+#else
+ BUG();
+#endif
+}
+
/*
* Used for LDT copy/destruction.
*/
-int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm);
+static inline void init_new_context_ldt(struct mm_struct *mm)
+{
+ mm->context.ldt = NULL;
+ init_rwsem(&mm->context.ldt_usr_sem);
+}
+int ldt_dup_context(struct mm_struct *oldmm, struct mm_struct *mm);
void destroy_context_ldt(struct mm_struct *mm);
+void ldt_arch_exit_mmap(struct mm_struct *mm);
#else /* CONFIG_MODIFY_LDT_SYSCALL */
-static inline int init_new_context_ldt(struct task_struct *tsk,
- struct mm_struct *mm)
+static inline void init_new_context_ldt(struct mm_struct *mm) { }
+static inline int ldt_dup_context(struct mm_struct *oldmm,
+ struct mm_struct *mm)
{
return 0;
}
-static inline void destroy_context_ldt(struct mm_struct *mm) {}
+static inline void destroy_context_ldt(struct mm_struct *mm) { }
+static inline void ldt_arch_exit_mmap(struct mm_struct *mm) { }
#endif
static inline void load_mm_ldt(struct mm_struct *mm)
@@ -90,10 +121,31 @@ static inline void load_mm_ldt(struct mm_struct *mm)
* that we can see.
*/
- if (unlikely(ldt))
- set_ldt(ldt->entries, ldt->nr_entries);
- else
+ if (unlikely(ldt)) {
+ if (static_cpu_has(X86_FEATURE_PTI)) {
+ if (WARN_ON_ONCE((unsigned long)ldt->slot > 1)) {
+ /*
+ * Whoops -- either the new LDT isn't mapped
+ * (if slot == -1) or is mapped into a bogus
+ * slot (if slot > 1).
+ */
+ clear_LDT();
+ return;
+ }
+
+ /*
+ * If page table isolation is enabled, ldt->entries
+ * will not be mapped in the userspace pagetables.
+ * Tell the CPU to access the LDT through the alias
+ * at ldt_slot_va(ldt->slot).
+ */
+ set_ldt(ldt_slot_va(ldt->slot), ldt->nr_entries);
+ } else {
+ set_ldt(ldt->entries, ldt->nr_entries);
+ }
+ } else {
clear_LDT();
+ }
#else
clear_LDT();
#endif
@@ -132,18 +184,21 @@ void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);
static inline int init_new_context(struct task_struct *tsk,
struct mm_struct *mm)
{
+ mutex_init(&mm->context.lock);
+
mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id);
atomic64_set(&mm->context.tlb_gen, 0);
- #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
+#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
if (cpu_feature_enabled(X86_FEATURE_OSPKE)) {
/* pkey 0 is the default and always allocated */
mm->context.pkey_allocation_map = 0x1;
/* -1 means unallocated or invalid */
mm->context.execute_only_pkey = -1;
}
- #endif
- return init_new_context_ldt(tsk, mm);
+#endif
+ init_new_context_ldt(mm);
+ return 0;
}
static inline void destroy_context(struct mm_struct *mm)
{
@@ -176,15 +231,16 @@ do { \
} while (0)
#endif
-static inline void arch_dup_mmap(struct mm_struct *oldmm,
- struct mm_struct *mm)
+static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
{
paravirt_arch_dup_mmap(oldmm, mm);
+ return ldt_dup_context(oldmm, mm);
}
static inline void arch_exit_mmap(struct mm_struct *mm)
{
paravirt_arch_exit_mmap(mm);
+ ldt_arch_exit_mmap(mm);
}
#ifdef CONFIG_X86_64
@@ -282,33 +338,6 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
}
/*
- * If PCID is on, ASID-aware code paths put the ASID+1 into the PCID
- * bits. This serves two purposes. It prevents a nasty situation in
- * which PCID-unaware code saves CR3, loads some other value (with PCID
- * == 0), and then restores CR3, thus corrupting the TLB for ASID 0 if
- * the saved ASID was nonzero. It also means that any bugs involving
- * loading a PCID-enabled CR3 with CR4.PCIDE off will trigger
- * deterministically.
- */
-
-static inline unsigned long build_cr3(struct mm_struct *mm, u16 asid)
-{
- if (static_cpu_has(X86_FEATURE_PCID)) {
- VM_WARN_ON_ONCE(asid > 4094);
- return __sme_pa(mm->pgd) | (asid + 1);
- } else {
- VM_WARN_ON_ONCE(asid != 0);
- return __sme_pa(mm->pgd);
- }
-}
-
-static inline unsigned long build_cr3_noflush(struct mm_struct *mm, u16 asid)
-{
- VM_WARN_ON_ONCE(asid > 4094);
- return __sme_pa(mm->pgd) | (asid + 1) | CR3_NOFLUSH;
-}
-
-/*
* This can be used from process context to figure out what the value of
* CR3 is without needing to do a (slow) __read_cr3().
*
@@ -317,7 +346,7 @@ static inline unsigned long build_cr3_noflush(struct mm_struct *mm, u16 asid)
*/
static inline unsigned long __get_current_cr3_fast(void)
{
- unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm),
+ unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd,
this_cpu_read(cpu_tlbstate.loaded_mm_asid));
/* For now, be very restrictive about when this can be called. */
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 283efcaac8af..892df375b615 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -927,6 +927,15 @@ extern void default_banner(void);
PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \
CLBR_NONE, \
jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64))
+
+#ifdef CONFIG_DEBUG_ENTRY
+#define SAVE_FLAGS(clobbers) \
+ PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_save_fl), clobbers, \
+ PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \
+ call PARA_INDIRECT(pv_irq_ops+PV_IRQ_save_fl); \
+ PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
+#endif
+
#endif /* CONFIG_X86_32 */
#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
index 4b5e1eafada7..aff42e1da6ee 100644
--- a/arch/x86/include/asm/pgalloc.h
+++ b/arch/x86/include/asm/pgalloc.h
@@ -30,6 +30,17 @@ static inline void paravirt_release_p4d(unsigned long pfn) {}
*/
extern gfp_t __userpte_alloc_gfp;
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+/*
+ * Instead of one PGD, we acquire two PGDs. Being order-1, it is
+ * both 8k in size and 8k-aligned. That lets us just flip bit 12
+ * in a pointer to swap between the two 4k halves.
+ */
+#define PGD_ALLOCATION_ORDER 1
+#else
+#define PGD_ALLOCATION_ORDER 0
+#endif
+
/*
* Allocate and free page tables.
*/
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 09f9e1e00e3b..e42b8943cb1a 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -28,6 +28,7 @@ extern pgd_t early_top_pgt[PTRS_PER_PGD];
int __init __early_make_pgtable(unsigned long address, pmdval_t pmd);
void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd);
+void ptdump_walk_pgd_level_debugfs(struct seq_file *m, pgd_t *pgd, bool user);
void ptdump_walk_pgd_level_checkwx(void);
#ifdef CONFIG_DEBUG_WX
@@ -841,7 +842,12 @@ static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
static inline int p4d_bad(p4d_t p4d)
{
- return (p4d_flags(p4d) & ~(_KERNPG_TABLE | _PAGE_USER)) != 0;
+ unsigned long ignore_flags = _KERNPG_TABLE | _PAGE_USER;
+
+ if (IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION))
+ ignore_flags |= _PAGE_NX;
+
+ return (p4d_flags(p4d) & ~ignore_flags) != 0;
}
#endif /* CONFIG_PGTABLE_LEVELS > 3 */
@@ -875,7 +881,12 @@ static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
static inline int pgd_bad(pgd_t pgd)
{
- return (pgd_flags(pgd) & ~_PAGE_USER) != _KERNPG_TABLE;
+ unsigned long ignore_flags = _PAGE_USER;
+
+ if (IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION))
+ ignore_flags |= _PAGE_NX;
+
+ return (pgd_flags(pgd) & ~ignore_flags) != _KERNPG_TABLE;
}
static inline int pgd_none(pgd_t pgd)
@@ -904,7 +915,11 @@ static inline int pgd_none(pgd_t pgd)
* pgd_offset() returns a (pgd_t *)
* pgd_index() is used get the offset into the pgd page's array of pgd_t's;
*/
-#define pgd_offset(mm, address) ((mm)->pgd + pgd_index((address)))
+#define pgd_offset_pgd(pgd, address) (pgd + pgd_index((address)))
+/*
+ * a shortcut to get a pgd_t in a given mm
+ */
+#define pgd_offset(mm, address) pgd_offset_pgd((mm)->pgd, (address))
/*
* a shortcut which implies the use of the kernel's pgd, instead
* of a process's
@@ -1061,7 +1076,7 @@ extern int pmdp_clear_flush_young(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp);
-#define __HAVE_ARCH_PMD_WRITE
+#define pmd_write pmd_write
static inline int pmd_write(pmd_t pmd)
{
return pmd_flags(pmd) & _PAGE_RW;
@@ -1088,6 +1103,12 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm,
clear_bit(_PAGE_BIT_RW, (unsigned long *)pmdp);
}
+#define pud_write pud_write
+static inline int pud_write(pud_t pud)
+{
+ return pud_flags(pud) & _PAGE_RW;
+}
+
/*
* clone_pgd_range(pgd_t *dst, pgd_t *src, int count);
*
@@ -1100,7 +1121,14 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm,
*/
static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
{
- memcpy(dst, src, count * sizeof(pgd_t));
+ memcpy(dst, src, count * sizeof(pgd_t));
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ if (!static_cpu_has(X86_FEATURE_PTI))
+ return;
+ /* Clone the user space pgd as well */
+ memcpy(kernel_to_user_pgdp(dst), kernel_to_user_pgdp(src),
+ count * sizeof(pgd_t));
+#endif
}
#define PTE_SHIFT ilog2(PTRS_PER_PTE)
diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h
index f2ca9b28fd68..ce245b0cdfca 100644
--- a/arch/x86/include/asm/pgtable_32_types.h
+++ b/arch/x86/include/asm/pgtable_32_types.h
@@ -38,13 +38,22 @@ extern bool __vmalloc_start_set; /* set once high_memory is set */
#define LAST_PKMAP 1024
#endif
-#define PKMAP_BASE ((FIXADDR_START - PAGE_SIZE * (LAST_PKMAP + 1)) \
- & PMD_MASK)
+/*
+ * Define this here and validate with BUILD_BUG_ON() in pgtable_32.c
+ * to avoid include recursion hell
+ */
+#define CPU_ENTRY_AREA_PAGES (NR_CPUS * 40)
+
+#define CPU_ENTRY_AREA_BASE \
+ ((FIXADDR_START - PAGE_SIZE * (CPU_ENTRY_AREA_PAGES + 1)) & PMD_MASK)
+
+#define PKMAP_BASE \
+ ((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK)
#ifdef CONFIG_HIGHMEM
# define VMALLOC_END (PKMAP_BASE - 2 * PAGE_SIZE)
#else
-# define VMALLOC_END (FIXADDR_START - 2 * PAGE_SIZE)
+# define VMALLOC_END (CPU_ENTRY_AREA_BASE - 2 * PAGE_SIZE)
#endif
#define MODULES_VADDR VMALLOC_START
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index e9f05331e732..81462e9a34f6 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -131,9 +131,97 @@ static inline pud_t native_pudp_get_and_clear(pud_t *xp)
#endif
}
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+/*
+ * All top-level PAGE_TABLE_ISOLATION page tables are order-1 pages
+ * (8k-aligned and 8k in size). The kernel one is at the beginning 4k and
+ * the user one is in the last 4k. To switch between them, you
+ * just need to flip the 12th bit in their addresses.
+ */
+#define PTI_PGTABLE_SWITCH_BIT PAGE_SHIFT
+
+/*
+ * This generates better code than the inline assembly in
+ * __set_bit().
+ */
+static inline void *ptr_set_bit(void *ptr, int bit)
+{
+ unsigned long __ptr = (unsigned long)ptr;
+
+ __ptr |= BIT(bit);
+ return (void *)__ptr;
+}
+static inline void *ptr_clear_bit(void *ptr, int bit)
+{
+ unsigned long __ptr = (unsigned long)ptr;
+
+ __ptr &= ~BIT(bit);
+ return (void *)__ptr;
+}
+
+static inline pgd_t *kernel_to_user_pgdp(pgd_t *pgdp)
+{
+ return ptr_set_bit(pgdp, PTI_PGTABLE_SWITCH_BIT);
+}
+
+static inline pgd_t *user_to_kernel_pgdp(pgd_t *pgdp)
+{
+ return ptr_clear_bit(pgdp, PTI_PGTABLE_SWITCH_BIT);
+}
+
+static inline p4d_t *kernel_to_user_p4dp(p4d_t *p4dp)
+{
+ return ptr_set_bit(p4dp, PTI_PGTABLE_SWITCH_BIT);
+}
+
+static inline p4d_t *user_to_kernel_p4dp(p4d_t *p4dp)
+{
+ return ptr_clear_bit(p4dp, PTI_PGTABLE_SWITCH_BIT);
+}
+#endif /* CONFIG_PAGE_TABLE_ISOLATION */
+
+/*
+ * Page table pages are page-aligned. The lower half of the top
+ * level is used for userspace and the top half for the kernel.
+ *
+ * Returns true for parts of the PGD that map userspace and
+ * false for the parts that map the kernel.
+ */
+static inline bool pgdp_maps_userspace(void *__ptr)
+{
+ unsigned long ptr = (unsigned long)__ptr;
+
+ return (ptr & ~PAGE_MASK) < (PAGE_SIZE / 2);
+}
+
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+pgd_t __pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd);
+
+/*
+ * Take a PGD location (pgdp) and a pgd value that needs to be set there.
+ * Populates the user and returns the resulting PGD that must be set in
+ * the kernel copy of the page tables.
+ */
+static inline pgd_t pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd)
+{
+ if (!static_cpu_has(X86_FEATURE_PTI))
+ return pgd;
+ return __pti_set_user_pgd(pgdp, pgd);
+}
+#else
+static inline pgd_t pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd)
+{
+ return pgd;
+}
+#endif
+
static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d)
{
+#if defined(CONFIG_PAGE_TABLE_ISOLATION) && !defined(CONFIG_X86_5LEVEL)
+ p4dp->pgd = pti_set_user_pgd(&p4dp->pgd, p4d.pgd);
+#else
*p4dp = p4d;
+#endif
}
static inline void native_p4d_clear(p4d_t *p4d)
@@ -147,7 +235,11 @@ static inline void native_p4d_clear(p4d_t *p4d)
static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
{
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ *pgdp = pti_set_user_pgd(pgdp, pgd);
+#else
*pgdp = pgd;
+#endif
}
static inline void native_pgd_clear(pgd_t *pgd)
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 6d5f45dcd4a1..b97a539bcdee 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -76,32 +76,45 @@ typedef struct { pteval_t pte; } pte_t;
#define PGDIR_MASK (~(PGDIR_SIZE - 1))
/* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */
-#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
+#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
+
#ifdef CONFIG_X86_5LEVEL
-#define VMALLOC_SIZE_TB _AC(16384, UL)
-#define __VMALLOC_BASE _AC(0xff92000000000000, UL)
-#define __VMEMMAP_BASE _AC(0xffd4000000000000, UL)
+# define VMALLOC_SIZE_TB _AC(12800, UL)
+# define __VMALLOC_BASE _AC(0xffa0000000000000, UL)
+# define __VMEMMAP_BASE _AC(0xffd4000000000000, UL)
+# define LDT_PGD_ENTRY _AC(-112, UL)
+# define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT)
#else
-#define VMALLOC_SIZE_TB _AC(32, UL)
-#define __VMALLOC_BASE _AC(0xffffc90000000000, UL)
-#define __VMEMMAP_BASE _AC(0xffffea0000000000, UL)
+# define VMALLOC_SIZE_TB _AC(32, UL)
+# define __VMALLOC_BASE _AC(0xffffc90000000000, UL)
+# define __VMEMMAP_BASE _AC(0xffffea0000000000, UL)
+# define LDT_PGD_ENTRY _AC(-4, UL)
+# define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT)
#endif
+
#ifdef CONFIG_RANDOMIZE_MEMORY
-#define VMALLOC_START vmalloc_base
-#define VMEMMAP_START vmemmap_base
+# define VMALLOC_START vmalloc_base
+# define VMEMMAP_START vmemmap_base
#else
-#define VMALLOC_START __VMALLOC_BASE
-#define VMEMMAP_START __VMEMMAP_BASE
+# define VMALLOC_START __VMALLOC_BASE
+# define VMEMMAP_START __VMEMMAP_BASE
#endif /* CONFIG_RANDOMIZE_MEMORY */
-#define VMALLOC_END (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL))
-#define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
+
+#define VMALLOC_END (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL))
+
+#define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
/* The module sections ends with the start of the fixmap */
-#define MODULES_END __fix_to_virt(__end_of_fixed_addresses + 1)
-#define MODULES_LEN (MODULES_END - MODULES_VADDR)
-#define ESPFIX_PGD_ENTRY _AC(-2, UL)
-#define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << P4D_SHIFT)
-#define EFI_VA_START ( -4 * (_AC(1, UL) << 30))
-#define EFI_VA_END (-68 * (_AC(1, UL) << 30))
+#define MODULES_END __fix_to_virt(__end_of_fixed_addresses + 1)
+#define MODULES_LEN (MODULES_END - MODULES_VADDR)
+
+#define ESPFIX_PGD_ENTRY _AC(-2, UL)
+#define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << P4D_SHIFT)
+
+#define CPU_ENTRY_AREA_PGD _AC(-3, UL)
+#define CPU_ENTRY_AREA_BASE (CPU_ENTRY_AREA_PGD << P4D_SHIFT)
+
+#define EFI_VA_START ( -4 * (_AC(1, UL) << 30))
+#define EFI_VA_END (-68 * (_AC(1, UL) << 30))
#define EARLY_DYNAMIC_PAGE_TABLES 64
diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h
index 43212a43ee69..6a60fea90b9d 100644
--- a/arch/x86/include/asm/processor-flags.h
+++ b/arch/x86/include/asm/processor-flags.h
@@ -38,6 +38,11 @@
#define CR3_ADDR_MASK __sme_clr(0x7FFFFFFFFFFFF000ull)
#define CR3_PCID_MASK 0xFFFull
#define CR3_NOFLUSH BIT_ULL(63)
+
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+# define X86_CR3_PTI_SWITCH_BIT 11
+#endif
+
#else
/*
* CR3_ADDR_MASK needs at least bits 31:5 set on PAE systems, and we save
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index cc16fa882e3e..d3a67fba200a 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -163,9 +163,9 @@ enum cpuid_regs_idx {
extern struct cpuinfo_x86 boot_cpu_data;
extern struct cpuinfo_x86 new_cpu_data;
-extern struct tss_struct doublefault_tss;
-extern __u32 cpu_caps_cleared[NCAPINTS];
-extern __u32 cpu_caps_set[NCAPINTS];
+extern struct x86_hw_tss doublefault_tss;
+extern __u32 cpu_caps_cleared[NCAPINTS + NBUGINTS];
+extern __u32 cpu_caps_set[NCAPINTS + NBUGINTS];
#ifdef CONFIG_SMP
DECLARE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
@@ -253,6 +253,11 @@ static inline void load_cr3(pgd_t *pgdir)
write_cr3(__sme_pa(pgdir));
}
+/*
+ * Note that while the legacy 'TSS' name comes from 'Task State Segment',
+ * on modern x86 CPUs the TSS also holds information important to 64-bit mode,
+ * unrelated to the task-switch mechanism:
+ */
#ifdef CONFIG_X86_32
/* This is the TSS defined by the hardware. */
struct x86_hw_tss {
@@ -305,7 +310,13 @@ struct x86_hw_tss {
struct x86_hw_tss {
u32 reserved1;
u64 sp0;
+
+ /*
+ * We store cpu_current_top_of_stack in sp1 so it's always accessible.
+ * Linux does not use ring 1, so sp1 is not otherwise needed.
+ */
u64 sp1;
+
u64 sp2;
u64 reserved2;
u64 ist[7];
@@ -323,12 +334,22 @@ struct x86_hw_tss {
#define IO_BITMAP_BITS 65536
#define IO_BITMAP_BYTES (IO_BITMAP_BITS/8)
#define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long))
-#define IO_BITMAP_OFFSET offsetof(struct tss_struct, io_bitmap)
+#define IO_BITMAP_OFFSET (offsetof(struct tss_struct, io_bitmap) - offsetof(struct tss_struct, x86_tss))
#define INVALID_IO_BITMAP_OFFSET 0x8000
+struct entry_stack {
+ unsigned long words[64];
+};
+
+struct entry_stack_page {
+ struct entry_stack stack;
+} __aligned(PAGE_SIZE);
+
struct tss_struct {
/*
- * The hardware state:
+ * The fixed hardware portion. This must not cross a page boundary
+ * at risk of violating the SDM's advice and potentially triggering
+ * errata.
*/
struct x86_hw_tss x86_tss;
@@ -339,18 +360,9 @@ struct tss_struct {
* be within the limit.
*/
unsigned long io_bitmap[IO_BITMAP_LONGS + 1];
+} __aligned(PAGE_SIZE);
-#ifdef CONFIG_X86_32
- /*
- * Space for the temporary SYSENTER stack.
- */
- unsigned long SYSENTER_stack_canary;
- unsigned long SYSENTER_stack[64];
-#endif
-
-} ____cacheline_aligned;
-
-DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss);
+DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw);
/*
* sizeof(unsigned long) coming from an extra "long" at the end
@@ -364,6 +376,9 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss);
#ifdef CONFIG_X86_32
DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
+#else
+/* The RO copy can't be accessed with this_cpu_xyz(), so use the RW copy. */
+#define cpu_current_top_of_stack cpu_tss_rw.x86_tss.sp1
#endif
/*
@@ -523,7 +538,7 @@ static inline void native_set_iopl_mask(unsigned mask)
static inline void
native_load_sp0(unsigned long sp0)
{
- this_cpu_write(cpu_tss.x86_tss.sp0, sp0);
+ this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0);
}
static inline void native_swapgs(void)
@@ -535,12 +550,12 @@ static inline void native_swapgs(void)
static inline unsigned long current_top_of_stack(void)
{
-#ifdef CONFIG_X86_64
- return this_cpu_read_stable(cpu_tss.x86_tss.sp0);
-#else
- /* sp0 on x86_32 is special in and around vm86 mode. */
+ /*
+ * We can't read directly from tss.sp0: sp0 on x86_32 is special in
+ * and around vm86 mode and sp0 on x86_64 is special because of the
+ * entry trampoline.
+ */
return this_cpu_read_stable(cpu_current_top_of_stack);
-#endif
}
static inline bool on_thread_stack(void)
@@ -837,13 +852,22 @@ static inline void spin_lock_prefetch(const void *x)
#else
/*
- * User space process size. 47bits minus one guard page. The guard
- * page is necessary on Intel CPUs: if a SYSCALL instruction is at
- * the highest possible canonical userspace address, then that
- * syscall will enter the kernel with a non-canonical return
- * address, and SYSRET will explode dangerously. We avoid this
- * particular problem by preventing anything from being mapped
- * at the maximum canonical address.
+ * User space process size. This is the first address outside the user range.
+ * There are a few constraints that determine this:
+ *
+ * On Intel CPUs, if a SYSCALL instruction is at the highest canonical
+ * address, then that syscall will enter the kernel with a
+ * non-canonical return address, and SYSRET will explode dangerously.
+ * We avoid this particular problem by preventing anything executable
+ * from being mapped at the maximum canonical address.
+ *
+ * On AMD CPUs in the Ryzen family, there's a nasty bug in which the
+ * CPUs malfunction if they execute code from the highest canonical page.
+ * They'll speculate right off the end of the canonical space, and
+ * bad things happen. This is worked around in the same way as the
+ * Intel problem.
+ *
+ * With page table isolation enabled, we map the LDT in ... [stay tuned]
*/
#define TASK_SIZE_MAX ((1UL << __VIRTUAL_MASK_SHIFT) - PAGE_SIZE)
diff --git a/arch/x86/include/asm/pti.h b/arch/x86/include/asm/pti.h
new file mode 100644
index 000000000000..0b5ef05b2d2d
--- /dev/null
+++ b/arch/x86/include/asm/pti.h
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef _ASM_X86_PTI_H
+#define _ASM_X86_PTI_H
+#ifndef __ASSEMBLY__
+
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+extern void pti_init(void);
+extern void pti_check_boottime_disable(void);
+#else
+static inline void pti_check_boottime_disable(void) { }
+#endif
+
+#endif /* __ASSEMBLY__ */
+#endif /* _ASM_X86_PTI_H */
diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h
index b20f9d623f9c..8f09012b92e7 100644
--- a/arch/x86/include/asm/segment.h
+++ b/arch/x86/include/asm/segment.h
@@ -236,11 +236,23 @@
*/
#define EARLY_IDT_HANDLER_SIZE 9
+/*
+ * xen_early_idt_handler_array is for Xen pv guests: for each entry in
+ * early_idt_handler_array it contains a prequel in the form of
+ * pop %rcx; pop %r11; jmp early_idt_handler_array[i]; summing up to
+ * max 8 bytes.
+ */
+#define XEN_EARLY_IDT_HANDLER_SIZE 8
+
#ifndef __ASSEMBLY__
extern const char early_idt_handler_array[NUM_EXCEPTION_VECTORS][EARLY_IDT_HANDLER_SIZE];
extern void early_ignore_irq(void);
+#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV)
+extern const char xen_early_idt_handler_array[NUM_EXCEPTION_VECTORS][XEN_EARLY_IDT_HANDLER_SIZE];
+#endif
+
/*
* Load a segment. Fall back on loading the zero segment if something goes
* wrong. This variant assumes that loading zero fully clears the segment.
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index 8da111b3c342..f73706878772 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -16,6 +16,7 @@ enum stack_type {
STACK_TYPE_TASK,
STACK_TYPE_IRQ,
STACK_TYPE_SOFTIRQ,
+ STACK_TYPE_ENTRY,
STACK_TYPE_EXCEPTION,
STACK_TYPE_EXCEPTION_LAST = STACK_TYPE_EXCEPTION + N_EXCEPTION_STACKS-1,
};
@@ -28,6 +29,8 @@ struct stack_info {
bool in_task_stack(unsigned long *stack, struct task_struct *task,
struct stack_info *info);
+bool in_entry_stack(unsigned long *stack, struct stack_info *info);
+
int get_stack_info(unsigned long *stack, struct task_struct *task,
struct stack_info *info, unsigned long *visit_mask);
diff --git a/arch/x86/include/asm/suspend_32.h b/arch/x86/include/asm/suspend_32.h
index 982c325dad33..8be6afb58471 100644
--- a/arch/x86/include/asm/suspend_32.h
+++ b/arch/x86/include/asm/suspend_32.h
@@ -12,7 +12,13 @@
/* image of the saved processor state */
struct saved_context {
- u16 es, fs, gs, ss;
+ /*
+ * On x86_32, all segment registers, with the possible exception of
+ * gs, are saved at kernel entry in pt_regs.
+ */
+#ifdef CONFIG_X86_32_LAZY_GS
+ u16 gs;
+#endif
unsigned long cr0, cr2, cr3, cr4;
u64 misc_enable;
bool misc_enable_saved;
diff --git a/arch/x86/include/asm/suspend_64.h b/arch/x86/include/asm/suspend_64.h
index 7306e911faee..a7af9f53c0cb 100644
--- a/arch/x86/include/asm/suspend_64.h
+++ b/arch/x86/include/asm/suspend_64.h
@@ -20,8 +20,20 @@
*/
struct saved_context {
struct pt_regs regs;
- u16 ds, es, fs, gs, ss;
- unsigned long gs_base, gs_kernel_base, fs_base;
+
+ /*
+ * User CS and SS are saved in current_pt_regs(). The rest of the
+ * segment selectors need to be saved and restored here.
+ */
+ u16 ds, es, fs, gs;
+
+ /*
+ * Usermode FSBASE and GSBASE may not match the fs and gs selectors,
+ * so we save them separately. We save the kernelmode GSBASE to
+ * restore percpu access after resume.
+ */
+ unsigned long kernelmode_gs_base, usermode_gs_base, fs_base;
+
unsigned long cr0, cr2, cr3, cr4, cr8;
u64 misc_enable;
bool misc_enable_saved;
@@ -30,8 +42,7 @@ struct saved_context {
u16 gdt_pad; /* Unused */
struct desc_ptr gdt_desc;
u16 idt_pad;
- u16 idt_limit;
- unsigned long idt_base;
+ struct desc_ptr idt;
u16 ldt;
u16 tss;
unsigned long tr;
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index 8c6bd6863db9..eb5f7999a893 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -16,8 +16,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
struct tss_struct *tss);
/* This runs runs on the previous thread's stack. */
-static inline void prepare_switch_to(struct task_struct *prev,
- struct task_struct *next)
+static inline void prepare_switch_to(struct task_struct *next)
{
#ifdef CONFIG_VMAP_STACK
/*
@@ -70,7 +69,7 @@ struct fork_frame {
#define switch_to(prev, next, last) \
do { \
- prepare_switch_to(prev, next); \
+ prepare_switch_to(next); \
\
((last) = __switch_to_asm((prev), (next))); \
} while (0)
@@ -79,10 +78,10 @@ do { \
static inline void refresh_sysenter_cs(struct thread_struct *thread)
{
/* Only happens when SEP is enabled, no need to test "SEP"arately: */
- if (unlikely(this_cpu_read(cpu_tss.x86_tss.ss1) == thread->sysenter_cs))
+ if (unlikely(this_cpu_read(cpu_tss_rw.x86_tss.ss1) == thread->sysenter_cs))
return;
- this_cpu_write(cpu_tss.x86_tss.ss1, thread->sysenter_cs);
+ this_cpu_write(cpu_tss_rw.x86_tss.ss1, thread->sysenter_cs);
wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
}
#endif
@@ -90,10 +89,12 @@ static inline void refresh_sysenter_cs(struct thread_struct *thread)
/* This is used when switching tasks or entering/exiting vm86 mode. */
static inline void update_sp0(struct task_struct *task)
{
+ /* On x86_64, sp0 always points to the entry trampoline stack, which is constant: */
#ifdef CONFIG_X86_32
load_sp0(task->thread.sp0);
#else
- load_sp0(task_top_of_stack(task));
+ if (static_cpu_has(X86_FEATURE_XENPV))
+ load_sp0(task_top_of_stack(task));
#endif
}
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 70f425947dc5..00223333821a 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -207,7 +207,7 @@ static inline int arch_within_stack_frames(const void * const stack,
#else /* !__ASSEMBLY__ */
#ifdef CONFIG_X86_64
-# define cpu_current_top_of_stack (cpu_tss + TSS_sp0)
+# define cpu_current_top_of_stack (cpu_tss_rw + TSS_sp1)
#endif
#endif
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 509046cfa5ce..4a08dd2ab32a 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -9,70 +9,130 @@
#include <asm/cpufeature.h>
#include <asm/special_insns.h>
#include <asm/smp.h>
+#include <asm/invpcid.h>
+#include <asm/pti.h>
+#include <asm/processor-flags.h>
-static inline void __invpcid(unsigned long pcid, unsigned long addr,
- unsigned long type)
-{
- struct { u64 d[2]; } desc = { { pcid, addr } };
+/*
+ * The x86 feature is called PCID (Process Context IDentifier). It is similar
+ * to what is traditionally called ASID on the RISC processors.
+ *
+ * We don't use the traditional ASID implementation, where each process/mm gets
+ * its own ASID and flush/restart when we run out of ASID space.
+ *
+ * Instead we have a small per-cpu array of ASIDs and cache the last few mm's
+ * that came by on this CPU, allowing cheaper switch_mm between processes on
+ * this CPU.
+ *
+ * We end up with different spaces for different things. To avoid confusion we
+ * use different names for each of them:
+ *
+ * ASID - [0, TLB_NR_DYN_ASIDS-1]
+ * the canonical identifier for an mm
+ *
+ * kPCID - [1, TLB_NR_DYN_ASIDS]
+ * the value we write into the PCID part of CR3; corresponds to the
+ * ASID+1, because PCID 0 is special.
+ *
+ * uPCID - [2048 + 1, 2048 + TLB_NR_DYN_ASIDS]
+ * for KPTI each mm has two address spaces and thus needs two
+ * PCID values, but we can still do with a single ASID denomination
+ * for each mm. Corresponds to kPCID + 2048.
+ *
+ */
- /*
- * The memory clobber is because the whole point is to invalidate
- * stale TLB entries and, especially if we're flushing global
- * mappings, we don't want the compiler to reorder any subsequent
- * memory accesses before the TLB flush.
- *
- * The hex opcode is invpcid (%ecx), %eax in 32-bit mode and
- * invpcid (%rcx), %rax in long mode.
- */
- asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01"
- : : "m" (desc), "a" (type), "c" (&desc) : "memory");
-}
+/* There are 12 bits of space for ASIDS in CR3 */
+#define CR3_HW_ASID_BITS 12
-#define INVPCID_TYPE_INDIV_ADDR 0
-#define INVPCID_TYPE_SINGLE_CTXT 1
-#define INVPCID_TYPE_ALL_INCL_GLOBAL 2
-#define INVPCID_TYPE_ALL_NON_GLOBAL 3
+/*
+ * When enabled, PAGE_TABLE_ISOLATION consumes a single bit for
+ * user/kernel switches
+ */
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+# define PTI_CONSUMED_PCID_BITS 1
+#else
+# define PTI_CONSUMED_PCID_BITS 0
+#endif
-/* Flush all mappings for a given pcid and addr, not including globals. */
-static inline void invpcid_flush_one(unsigned long pcid,
- unsigned long addr)
-{
- __invpcid(pcid, addr, INVPCID_TYPE_INDIV_ADDR);
-}
+#define CR3_AVAIL_PCID_BITS (X86_CR3_PCID_BITS - PTI_CONSUMED_PCID_BITS)
+
+/*
+ * ASIDs are zero-based: 0->MAX_AVAIL_ASID are valid. -1 below to account
+ * for them being zero-based. Another -1 is because PCID 0 is reserved for
+ * use by non-PCID-aware users.
+ */
+#define MAX_ASID_AVAILABLE ((1 << CR3_AVAIL_PCID_BITS) - 2)
-/* Flush all mappings for a given PCID, not including globals. */
-static inline void invpcid_flush_single_context(unsigned long pcid)
+/*
+ * 6 because 6 should be plenty and struct tlb_state will fit in two cache
+ * lines.
+ */
+#define TLB_NR_DYN_ASIDS 6
+
+/*
+ * Given @asid, compute kPCID
+ */
+static inline u16 kern_pcid(u16 asid)
{
- __invpcid(pcid, 0, INVPCID_TYPE_SINGLE_CTXT);
+ VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
+
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ /*
+ * Make sure that the dynamic ASID space does not confict with the
+ * bit we are using to switch between user and kernel ASIDs.
+ */
+ BUILD_BUG_ON(TLB_NR_DYN_ASIDS >= (1 << X86_CR3_PTI_SWITCH_BIT));
+
+ /*
+ * The ASID being passed in here should have respected the
+ * MAX_ASID_AVAILABLE and thus never have the switch bit set.
+ */
+ VM_WARN_ON_ONCE(asid & (1 << X86_CR3_PTI_SWITCH_BIT));
+#endif
+ /*
+ * The dynamically-assigned ASIDs that get passed in are small
+ * (<TLB_NR_DYN_ASIDS). They never have the high switch bit set,
+ * so do not bother to clear it.
+ *
+ * If PCID is on, ASID-aware code paths put the ASID+1 into the
+ * PCID bits. This serves two purposes. It prevents a nasty
+ * situation in which PCID-unaware code saves CR3, loads some other
+ * value (with PCID == 0), and then restores CR3, thus corrupting
+ * the TLB for ASID 0 if the saved ASID was nonzero. It also means
+ * that any bugs involving loading a PCID-enabled CR3 with
+ * CR4.PCIDE off will trigger deterministically.
+ */
+ return asid + 1;
}
-/* Flush all mappings, including globals, for all PCIDs. */
-static inline void invpcid_flush_all(void)
+/*
+ * Given @asid, compute uPCID
+ */
+static inline u16 user_pcid(u16 asid)
{
- __invpcid(0, 0, INVPCID_TYPE_ALL_INCL_GLOBAL);
+ u16 ret = kern_pcid(asid);
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ ret |= 1 << X86_CR3_PTI_SWITCH_BIT;
+#endif
+ return ret;
}
-/* Flush all mappings for all PCIDs except globals. */
-static inline void invpcid_flush_all_nonglobals(void)
+struct pgd_t;
+static inline unsigned long build_cr3(pgd_t *pgd, u16 asid)
{
- __invpcid(0, 0, INVPCID_TYPE_ALL_NON_GLOBAL);
+ if (static_cpu_has(X86_FEATURE_PCID)) {
+ return __sme_pa(pgd) | kern_pcid(asid);
+ } else {
+ VM_WARN_ON_ONCE(asid != 0);
+ return __sme_pa(pgd);
+ }
}
-static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
+static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid)
{
- u64 new_tlb_gen;
-
- /*
- * Bump the generation count. This also serves as a full barrier
- * that synchronizes with switch_mm(): callers are required to order
- * their read of mm_cpumask after their writes to the paging
- * structures.
- */
- smp_mb__before_atomic();
- new_tlb_gen = atomic64_inc_return(&mm->context.tlb_gen);
- smp_mb__after_atomic();
-
- return new_tlb_gen;
+ VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
+ VM_WARN_ON_ONCE(!this_cpu_has(X86_FEATURE_PCID));
+ return __sme_pa(pgd) | kern_pcid(asid) | CR3_NOFLUSH;
}
#ifdef CONFIG_PARAVIRT
@@ -99,12 +159,6 @@ static inline bool tlb_defer_switch_to_init_mm(void)
return !static_cpu_has(X86_FEATURE_PCID);
}
-/*
- * 6 because 6 should be plenty and struct tlb_state will fit in
- * two cache lines.
- */
-#define TLB_NR_DYN_ASIDS 6
-
struct tlb_context {
u64 ctx_id;
u64 tlb_gen;
@@ -139,6 +193,24 @@ struct tlb_state {
bool is_lazy;
/*
+ * If set we changed the page tables in such a way that we
+ * needed an invalidation of all contexts (aka. PCIDs / ASIDs).
+ * This tells us to go invalidate all the non-loaded ctxs[]
+ * on the next context switch.
+ *
+ * The current ctx was kept up-to-date as it ran and does not
+ * need to be invalidated.
+ */
+ bool invalidate_other;
+
+ /*
+ * Mask that contains TLB_NR_DYN_ASIDS+1 bits to indicate
+ * the corresponding user PCID needs a flush next time we
+ * switch to it; see SWITCH_TO_USER_CR3.
+ */
+ unsigned short user_pcid_flush_mask;
+
+ /*
* Access to this CR4 shadow and to H/W CR4 is protected by
* disabling interrupts when modifying either one.
*/
@@ -173,40 +245,43 @@ static inline void cr4_init_shadow(void)
this_cpu_write(cpu_tlbstate.cr4, __read_cr4());
}
+static inline void __cr4_set(unsigned long cr4)
+{
+ lockdep_assert_irqs_disabled();
+ this_cpu_write(cpu_tlbstate.cr4, cr4);
+ __write_cr4(cr4);
+}
+
/* Set in this cpu's CR4. */
static inline void cr4_set_bits(unsigned long mask)
{
- unsigned long cr4;
+ unsigned long cr4, flags;
+ local_irq_save(flags);
cr4 = this_cpu_read(cpu_tlbstate.cr4);
- if ((cr4 | mask) != cr4) {
- cr4 |= mask;
- this_cpu_write(cpu_tlbstate.cr4, cr4);
- __write_cr4(cr4);
- }
+ if ((cr4 | mask) != cr4)
+ __cr4_set(cr4 | mask);
+ local_irq_restore(flags);
}
/* Clear in this cpu's CR4. */
static inline void cr4_clear_bits(unsigned long mask)
{
- unsigned long cr4;
+ unsigned long cr4, flags;
+ local_irq_save(flags);
cr4 = this_cpu_read(cpu_tlbstate.cr4);
- if ((cr4 & ~mask) != cr4) {
- cr4 &= ~mask;
- this_cpu_write(cpu_tlbstate.cr4, cr4);
- __write_cr4(cr4);
- }
+ if ((cr4 & ~mask) != cr4)
+ __cr4_set(cr4 & ~mask);
+ local_irq_restore(flags);
}
-static inline void cr4_toggle_bits(unsigned long mask)
+static inline void cr4_toggle_bits_irqsoff(unsigned long mask)
{
unsigned long cr4;
cr4 = this_cpu_read(cpu_tlbstate.cr4);
- cr4 ^= mask;
- this_cpu_write(cpu_tlbstate.cr4, cr4);
- __write_cr4(cr4);
+ __cr4_set(cr4 ^ mask);
}
/* Read the CR4 shadow. */
@@ -216,6 +291,14 @@ static inline unsigned long cr4_read_shadow(void)
}
/*
+ * Mark all other ASIDs as invalid, preserves the current.
+ */
+static inline void invalidate_other_asid(void)
+{
+ this_cpu_write(cpu_tlbstate.invalidate_other, true);
+}
+
+/*
* Save some of cr4 feature set we're using (e.g. Pentium 4MB
* enable and PPro Global page enable), so that any CPU's that boot
* up after us can get the correct flags. This should only be used
@@ -234,37 +317,63 @@ static inline void cr4_set_bits_and_update_boot(unsigned long mask)
extern void initialize_tlbstate_and_flush(void);
-static inline void __native_flush_tlb(void)
+/*
+ * Given an ASID, flush the corresponding user ASID. We can delay this
+ * until the next time we switch to it.
+ *
+ * See SWITCH_TO_USER_CR3.
+ */
+static inline void invalidate_user_asid(u16 asid)
{
+ /* There is no user ASID if address space separation is off */
+ if (!IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION))
+ return;
+
/*
- * If current->mm == NULL then we borrow a mm which may change during a
- * task switch and therefore we must not be preempted while we write CR3
- * back:
+ * We only have a single ASID if PCID is off and the CR3
+ * write will have flushed it.
*/
- preempt_disable();
- native_write_cr3(__native_read_cr3());
- preempt_enable();
+ if (!cpu_feature_enabled(X86_FEATURE_PCID))
+ return;
+
+ if (!static_cpu_has(X86_FEATURE_PTI))
+ return;
+
+ __set_bit(kern_pcid(asid),
+ (unsigned long *)this_cpu_ptr(&cpu_tlbstate.user_pcid_flush_mask));
}
-static inline void __native_flush_tlb_global_irq_disabled(void)
+/*
+ * flush the entire current user mapping
+ */
+static inline void __native_flush_tlb(void)
{
- unsigned long cr4;
+ /*
+ * Preemption or interrupts must be disabled to protect the access
+ * to the per CPU variable and to prevent being preempted between
+ * read_cr3() and write_cr3().
+ */
+ WARN_ON_ONCE(preemptible());
- cr4 = this_cpu_read(cpu_tlbstate.cr4);
- /* clear PGE */
- native_write_cr4(cr4 & ~X86_CR4_PGE);
- /* write old PGE again and flush TLBs */
- native_write_cr4(cr4);
+ invalidate_user_asid(this_cpu_read(cpu_tlbstate.loaded_mm_asid));
+
+ /* If current->mm == NULL then the read_cr3() "borrows" an mm */
+ native_write_cr3(__native_read_cr3());
}
+/*
+ * flush everything
+ */
static inline void __native_flush_tlb_global(void)
{
- unsigned long flags;
+ unsigned long cr4, flags;
if (static_cpu_has(X86_FEATURE_INVPCID)) {
/*
* Using INVPCID is considerably faster than a pair of writes
* to CR4 sandwiched inside an IRQ flag save/restore.
+ *
+ * Note, this works with CR4.PCIDE=0 or 1.
*/
invpcid_flush_all();
return;
@@ -277,36 +386,69 @@ static inline void __native_flush_tlb_global(void)
*/
raw_local_irq_save(flags);
- __native_flush_tlb_global_irq_disabled();
+ cr4 = this_cpu_read(cpu_tlbstate.cr4);
+ /* toggle PGE */
+ native_write_cr4(cr4 ^ X86_CR4_PGE);
+ /* write old PGE again and flush TLBs */
+ native_write_cr4(cr4);
raw_local_irq_restore(flags);
}
+/*
+ * flush one page in the user mapping
+ */
static inline void __native_flush_tlb_single(unsigned long addr)
{
+ u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
+
asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
+
+ if (!static_cpu_has(X86_FEATURE_PTI))
+ return;
+
+ /*
+ * Some platforms #GP if we call invpcid(type=1/2) before CR4.PCIDE=1.
+ * Just use invalidate_user_asid() in case we are called early.
+ */
+ if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE))
+ invalidate_user_asid(loaded_mm_asid);
+ else
+ invpcid_flush_one(user_pcid(loaded_mm_asid), addr);
}
+/*
+ * flush everything
+ */
static inline void __flush_tlb_all(void)
{
- if (boot_cpu_has(X86_FEATURE_PGE))
+ if (boot_cpu_has(X86_FEATURE_PGE)) {
__flush_tlb_global();
- else
+ } else {
+ /*
+ * !PGE -> !PCID (setup_pcid()), thus every flush is total.
+ */
__flush_tlb();
-
- /*
- * Note: if we somehow had PCID but not PGE, then this wouldn't work --
- * we'd end up flushing kernel translations for the current ASID but
- * we might fail to flush kernel translations for other cached ASIDs.
- *
- * To avoid this issue, we force PCID off if PGE is off.
- */
+ }
}
+/*
+ * flush one page in the kernel mapping
+ */
static inline void __flush_tlb_one(unsigned long addr)
{
count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE);
__flush_tlb_single(addr);
+
+ if (!static_cpu_has(X86_FEATURE_PTI))
+ return;
+
+ /*
+ * __flush_tlb_single() will have cleared the TLB entry for this ASID,
+ * but since kernel space is replicated across all, we must also
+ * invalidate all others.
+ */
+ invalidate_other_asid();
}
#define TLB_FLUSH_ALL -1UL
@@ -367,6 +509,17 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long a)
void native_flush_tlb_others(const struct cpumask *cpumask,
const struct flush_tlb_info *info);
+static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
+{
+ /*
+ * Bump the generation count. This also serves as a full barrier
+ * that synchronizes with switch_mm(): callers are required to order
+ * their read of mm_cpumask after their writes to the paging
+ * structures.
+ */
+ return atomic64_inc_return(&mm->context.tlb_gen);
+}
+
static inline void arch_tlbbatch_add_mm(struct arch_tlbflush_unmap_batch *batch,
struct mm_struct *mm)
{
diff --git a/arch/x86/include/asm/trace/irq_vectors.h b/arch/x86/include/asm/trace/irq_vectors.h
index 84b9ec0c1bc0..22647a642e98 100644
--- a/arch/x86/include/asm/trace/irq_vectors.h
+++ b/arch/x86/include/asm/trace/irq_vectors.h
@@ -283,34 +283,34 @@ TRACE_EVENT(vector_alloc_managed,
DECLARE_EVENT_CLASS(vector_activate,
TP_PROTO(unsigned int irq, bool is_managed, bool can_reserve,
- bool early),
+ bool reserve),
- TP_ARGS(irq, is_managed, can_reserve, early),
+ TP_ARGS(irq, is_managed, can_reserve, reserve),
TP_STRUCT__entry(
__field( unsigned int, irq )
__field( bool, is_managed )
__field( bool, can_reserve )
- __field( bool, early )
+ __field( bool, reserve )
),
TP_fast_assign(
__entry->irq = irq;
__entry->is_managed = is_managed;
__entry->can_reserve = can_reserve;
- __entry->early = early;
+ __entry->reserve = reserve;
),
- TP_printk("irq=%u is_managed=%d can_reserve=%d early=%d",
+ TP_printk("irq=%u is_managed=%d can_reserve=%d reserve=%d",
__entry->irq, __entry->is_managed, __entry->can_reserve,
- __entry->early)
+ __entry->reserve)
);
#define DEFINE_IRQ_VECTOR_ACTIVATE_EVENT(name) \
DEFINE_EVENT_FN(vector_activate, name, \
TP_PROTO(unsigned int irq, bool is_managed, \
- bool can_reserve, bool early), \
- TP_ARGS(irq, is_managed, can_reserve, early), NULL, NULL); \
+ bool can_reserve, bool reserve), \
+ TP_ARGS(irq, is_managed, can_reserve, reserve), NULL, NULL); \
DEFINE_IRQ_VECTOR_ACTIVATE_EVENT(vector_activate);
DEFINE_IRQ_VECTOR_ACTIVATE_EVENT(vector_deactivate);
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 1fadd310ff68..31051f35cbb7 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -75,7 +75,6 @@ dotraplinkage void do_segment_not_present(struct pt_regs *, long);
dotraplinkage void do_stack_segment(struct pt_regs *, long);
#ifdef CONFIG_X86_64
dotraplinkage void do_double_fault(struct pt_regs *, long);
-asmlinkage struct pt_regs *sync_regs(struct pt_regs *);
#endif
dotraplinkage void do_general_protection(struct pt_regs *, long);
dotraplinkage void do_page_fault(struct pt_regs *, unsigned long);
diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h
index e9cc6fe1fc6f..c1688c2d0a12 100644
--- a/arch/x86/include/asm/unwind.h
+++ b/arch/x86/include/asm/unwind.h
@@ -7,6 +7,9 @@
#include <asm/ptrace.h>
#include <asm/stacktrace.h>
+#define IRET_FRAME_OFFSET (offsetof(struct pt_regs, ip))
+#define IRET_FRAME_SIZE (sizeof(struct pt_regs) - IRET_FRAME_OFFSET)
+
struct unwind_state {
struct stack_info stack_info;
unsigned long stack_mask;
@@ -52,6 +55,10 @@ void unwind_start(struct unwind_state *state, struct task_struct *task,
}
#if defined(CONFIG_UNWINDER_ORC) || defined(CONFIG_UNWINDER_FRAME_POINTER)
+/*
+ * WARNING: The entire pt_regs may not be safe to dereference. In some cases,
+ * only the iret frame registers are accessible. Use with caution!
+ */
static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
{
if (unwind_done(state))
diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h
index d9a7c659009c..b986b2ca688a 100644
--- a/arch/x86/include/asm/vsyscall.h
+++ b/arch/x86/include/asm/vsyscall.h
@@ -7,6 +7,7 @@
#ifdef CONFIG_X86_VSYSCALL_EMULATION
extern void map_vsyscall(void);
+extern void set_vsyscall_pgtable_user_bits(pgd_t *root);
/*
* Called on instruction fetch fault in vsyscall page.
diff --git a/arch/x86/include/uapi/asm/Kbuild b/arch/x86/include/uapi/asm/Kbuild
index da1489cb64dc..1e901e421f2d 100644
--- a/arch/x86/include/uapi/asm/Kbuild
+++ b/arch/x86/include/uapi/asm/Kbuild
@@ -1,6 +1,7 @@
# UAPI Header export list
include include/uapi/asm-generic/Kbuild.asm
+generic-y += bpf_perf_event.h
generated-y += unistd_32.h
generated-y += unistd_64.h
generated-y += unistd_x32.h
diff --git a/arch/x86/include/uapi/asm/processor-flags.h b/arch/x86/include/uapi/asm/processor-flags.h
index 7e1e730396ae..bcba3c643e63 100644
--- a/arch/x86/include/uapi/asm/processor-flags.h
+++ b/arch/x86/include/uapi/asm/processor-flags.h
@@ -78,7 +78,12 @@
#define X86_CR3_PWT _BITUL(X86_CR3_PWT_BIT)
#define X86_CR3_PCD_BIT 4 /* Page Cache Disable */
#define X86_CR3_PCD _BITUL(X86_CR3_PCD_BIT)
-#define X86_CR3_PCID_MASK _AC(0x00000fff,UL) /* PCID Mask */
+
+#define X86_CR3_PCID_BITS 12
+#define X86_CR3_PCID_MASK (_AC((1UL << X86_CR3_PCID_BITS) - 1, UL))
+
+#define X86_CR3_PCID_NOFLUSH_BIT 63 /* Preserve old PCID */
+#define X86_CR3_PCID_NOFLUSH _BITULL(X86_CR3_PCID_NOFLUSH_BIT)
/*
* Intel CPU features in CR4
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 6e272f3ea984..880441f24146 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2626,11 +2626,13 @@ static int __init apic_set_verbosity(char *arg)
apic_verbosity = APIC_DEBUG;
else if (strcmp("verbose", arg) == 0)
apic_verbosity = APIC_VERBOSE;
+#ifdef CONFIG_X86_64
else {
pr_warning("APIC Verbosity level %s not recognised"
" use apic=verbose or apic=debug\n", arg);
return -EINVAL;
}
+#endif
return 0;
}
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index aa85690e9b64..25a87028cb3f 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -151,7 +151,7 @@ static struct apic apic_flat __ro_after_init = {
.apic_id_valid = default_apic_id_valid,
.apic_id_registered = flat_apic_id_registered,
- .irq_delivery_mode = dest_LowestPrio,
+ .irq_delivery_mode = dest_Fixed,
.irq_dest_mode = 1, /* logical */
.disable_esr = 0,
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index 7b659c4480c9..5078b5ce63a7 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -110,7 +110,7 @@ struct apic apic_noop __ro_after_init = {
.apic_id_valid = default_apic_id_valid,
.apic_id_registered = noop_apic_id_registered,
- .irq_delivery_mode = dest_LowestPrio,
+ .irq_delivery_mode = dest_Fixed,
/* logical delivery broadcast to all CPUs: */
.irq_dest_mode = 1,
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 201579dc5242..8a7963421460 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2988,7 +2988,7 @@ void mp_irqdomain_free(struct irq_domain *domain, unsigned int virq,
}
int mp_irqdomain_activate(struct irq_domain *domain,
- struct irq_data *irq_data, bool early)
+ struct irq_data *irq_data, bool reserve)
{
unsigned long flags;
diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c
index 9b18be764422..ce503c99f5c4 100644
--- a/arch/x86/kernel/apic/msi.c
+++ b/arch/x86/kernel/apic/msi.c
@@ -39,17 +39,13 @@ static void irq_msi_compose_msg(struct irq_data *data, struct msi_msg *msg)
((apic->irq_dest_mode == 0) ?
MSI_ADDR_DEST_MODE_PHYSICAL :
MSI_ADDR_DEST_MODE_LOGICAL) |
- ((apic->irq_delivery_mode != dest_LowestPrio) ?
- MSI_ADDR_REDIRECTION_CPU :
- MSI_ADDR_REDIRECTION_LOWPRI) |
+ MSI_ADDR_REDIRECTION_CPU |
MSI_ADDR_DEST_ID(cfg->dest_apicid);
msg->data =
MSI_DATA_TRIGGER_EDGE |
MSI_DATA_LEVEL_ASSERT |
- ((apic->irq_delivery_mode != dest_LowestPrio) ?
- MSI_DATA_DELIVERY_FIXED :
- MSI_DATA_DELIVERY_LOWPRI) |
+ MSI_DATA_DELIVERY_FIXED |
MSI_DATA_VECTOR(cfg->vector);
}
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index fa22017de806..02e8acb134f8 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -105,7 +105,7 @@ static struct apic apic_default __ro_after_init = {
.apic_id_valid = default_apic_id_valid,
.apic_id_registered = default_apic_id_registered,
- .irq_delivery_mode = dest_LowestPrio,
+ .irq_delivery_mode = dest_Fixed,
/* logical delivery broadcast to all CPUs: */
.irq_dest_mode = 1,
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 6a823a25eaff..f8b03bb8e725 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -184,6 +184,7 @@ static void reserve_irq_vector_locked(struct irq_data *irqd)
irq_matrix_reserve(vector_matrix);
apicd->can_reserve = true;
apicd->has_reserved = true;
+ irqd_set_can_reserve(irqd);
trace_vector_reserve(irqd->irq, 0);
vector_assign_managed_shutdown(irqd);
}
@@ -368,8 +369,18 @@ static int activate_reserved(struct irq_data *irqd)
int ret;
ret = assign_irq_vector_any_locked(irqd);
- if (!ret)
+ if (!ret) {
apicd->has_reserved = false;
+ /*
+ * Core might have disabled reservation mode after
+ * allocating the irq descriptor. Ideally this should
+ * happen before allocation time, but that would require
+ * completely convoluted ways of transporting that
+ * information.
+ */
+ if (!irqd_can_reserve(irqd))
+ apicd->can_reserve = false;
+ }
return ret;
}
@@ -398,21 +409,21 @@ static int activate_managed(struct irq_data *irqd)
}
static int x86_vector_activate(struct irq_domain *dom, struct irq_data *irqd,
- bool early)
+ bool reserve)
{
struct apic_chip_data *apicd = apic_chip_data(irqd);
unsigned long flags;
int ret = 0;
trace_vector_activate(irqd->irq, apicd->is_managed,
- apicd->can_reserve, early);
+ apicd->can_reserve, reserve);
/* Nothing to do for fixed assigned vectors */
if (!apicd->can_reserve && !apicd->is_managed)
return 0;
raw_spin_lock_irqsave(&vector_lock, flags);
- if (early || irqd_is_managed_and_shutdown(irqd))
+ if (reserve || irqd_is_managed_and_shutdown(irqd))
vector_assign_managed_shutdown(irqd);
else if (apicd->is_managed)
ret = activate_managed(irqd);
@@ -478,6 +489,7 @@ static bool vector_configure_legacy(unsigned int virq, struct irq_data *irqd,
} else {
/* Release the vector */
apicd->can_reserve = true;
+ irqd_set_can_reserve(irqd);
clear_irq_vector(irqd);
realloc = true;
}
@@ -542,8 +554,8 @@ error:
}
#ifdef CONFIG_GENERIC_IRQ_DEBUGFS
-void x86_vector_debug_show(struct seq_file *m, struct irq_domain *d,
- struct irq_data *irqd, int ind)
+static void x86_vector_debug_show(struct seq_file *m, struct irq_domain *d,
+ struct irq_data *irqd, int ind)
{
unsigned int cpu, vector, prev_cpu, prev_vector;
struct apic_chip_data *apicd;
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index 622f13ca8a94..8b04234e010b 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -184,7 +184,7 @@ static struct apic apic_x2apic_cluster __ro_after_init = {
.apic_id_valid = x2apic_apic_id_valid,
.apic_id_registered = x2apic_apic_id_registered,
- .irq_delivery_mode = dest_LowestPrio,
+ .irq_delivery_mode = dest_Fixed,
.irq_dest_mode = 1, /* logical */
.disable_esr = 0,
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 8ea78275480d..76417a9aab73 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -17,6 +17,7 @@
#include <asm/sigframe.h>
#include <asm/bootparam.h>
#include <asm/suspend.h>
+#include <asm/tlbflush.h>
#ifdef CONFIG_XEN
#include <xen/interface/xen.h>
@@ -93,4 +94,13 @@ void common(void) {
BLANK();
DEFINE(PTREGS_SIZE, sizeof(struct pt_regs));
+
+ /* TLB state for the entry code */
+ OFFSET(TLB_STATE_user_pcid_flush_mask, tlb_state, user_pcid_flush_mask);
+
+ /* Layout info for cpu_entry_area */
+ OFFSET(CPU_ENTRY_AREA_tss, cpu_entry_area, tss);
+ OFFSET(CPU_ENTRY_AREA_entry_trampoline, cpu_entry_area, entry_trampoline);
+ OFFSET(CPU_ENTRY_AREA_entry_stack, cpu_entry_area, entry_stack_page);
+ DEFINE(SIZEOF_entry_stack, sizeof(struct entry_stack));
}
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index dedf428b20b6..fa1261eefa16 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -47,13 +47,8 @@ void foo(void)
BLANK();
/* Offset from the sysenter stack to tss.sp0 */
- DEFINE(TSS_sysenter_sp0, offsetof(struct tss_struct, x86_tss.sp0) -
- offsetofend(struct tss_struct, SYSENTER_stack));
-
- /* Offset from cpu_tss to SYSENTER_stack */
- OFFSET(CPU_TSS_SYSENTER_stack, tss_struct, SYSENTER_stack);
- /* Size of SYSENTER_stack */
- DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack));
+ DEFINE(TSS_sysenter_sp0, offsetof(struct cpu_entry_area, tss.x86_tss.sp0) -
+ offsetofend(struct cpu_entry_area, entry_stack_page.stack));
#ifdef CONFIG_CC_STACKPROTECTOR
BLANK();
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 630212fa9b9d..bf51e51d808d 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -23,6 +23,9 @@ int main(void)
#ifdef CONFIG_PARAVIRT
OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64);
OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs);
+#ifdef CONFIG_DEBUG_ENTRY
+ OFFSET(PV_IRQ_save_fl, pv_irq_ops, save_fl);
+#endif
BLANK();
#endif
@@ -63,6 +66,7 @@ int main(void)
OFFSET(TSS_ist, tss_struct, x86_tss.ist);
OFFSET(TSS_sp0, tss_struct, x86_tss.sp0);
+ OFFSET(TSS_sp1, tss_struct, x86_tss.sp1);
BLANK();
#ifdef CONFIG_CC_STACKPROTECTOR
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index d58184b7cd44..bcb75dc97d44 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -804,8 +804,11 @@ static void init_amd(struct cpuinfo_x86 *c)
case 0x17: init_amd_zn(c); break;
}
- /* Enable workaround for FXSAVE leak */
- if (c->x86 >= 6)
+ /*
+ * Enable workaround for FXSAVE leak on CPUs
+ * without a XSaveErPtr feature
+ */
+ if ((c->x86 >= 6) && (!cpu_has(c, X86_FEATURE_XSAVEERPTR)))
set_cpu_bug(c, X86_BUG_FXSAVE_LEAK);
cpu_detect_cache_sizes(c);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index fa998ca8aa5a..c47de4ebf63a 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -476,8 +476,8 @@ static const char *table_lookup_model(struct cpuinfo_x86 *c)
return NULL; /* Not found */
}
-__u32 cpu_caps_cleared[NCAPINTS];
-__u32 cpu_caps_set[NCAPINTS];
+__u32 cpu_caps_cleared[NCAPINTS + NBUGINTS];
+__u32 cpu_caps_set[NCAPINTS + NBUGINTS];
void load_percpu_segment(int cpu)
{
@@ -490,28 +490,23 @@ void load_percpu_segment(int cpu)
load_stack_canary_segment();
}
-/* Setup the fixmap mapping only once per-processor */
-static inline void setup_fixmap_gdt(int cpu)
-{
-#ifdef CONFIG_X86_64
- /* On 64-bit systems, we use a read-only fixmap GDT. */
- pgprot_t prot = PAGE_KERNEL_RO;
-#else
- /*
- * On native 32-bit systems, the GDT cannot be read-only because
- * our double fault handler uses a task gate, and entering through
- * a task gate needs to change an available TSS to busy. If the GDT
- * is read-only, that will triple fault.
- *
- * On Xen PV, the GDT must be read-only because the hypervisor requires
- * it.
- */
- pgprot_t prot = boot_cpu_has(X86_FEATURE_XENPV) ?
- PAGE_KERNEL_RO : PAGE_KERNEL;
+#ifdef CONFIG_X86_32
+/* The 32-bit entry code needs to find cpu_entry_area. */
+DEFINE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
#endif
- __set_fixmap(get_cpu_gdt_ro_index(cpu), get_cpu_gdt_paddr(cpu), prot);
-}
+#ifdef CONFIG_X86_64
+/*
+ * Special IST stacks which the CPU switches to when it calls
+ * an IST-marked descriptor entry. Up to 7 stacks (hardware
+ * limit), all of them are 4K, except the debug stack which
+ * is 8K.
+ */
+static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
+ [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
+ [DEBUG_STACK - 1] = DEBUG_STKSZ
+};
+#endif
/* Load the original GDT from the per-cpu structure */
void load_direct_gdt(int cpu)
@@ -747,7 +742,7 @@ static void apply_forced_caps(struct cpuinfo_x86 *c)
{
int i;
- for (i = 0; i < NCAPINTS; i++) {
+ for (i = 0; i < NCAPINTS + NBUGINTS; i++) {
c->x86_capability[i] &= ~cpu_caps_cleared[i];
c->x86_capability[i] |= cpu_caps_set[i];
}
@@ -927,6 +922,10 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
}
setup_force_cpu_cap(X86_FEATURE_ALWAYS);
+
+ /* Assume for now that ALL x86 CPUs are insecure */
+ setup_force_cpu_bug(X86_BUG_CPU_INSECURE);
+
fpu__init_system(c);
#ifdef CONFIG_X86_32
@@ -1250,7 +1249,7 @@ void enable_sep_cpu(void)
return;
cpu = get_cpu();
- tss = &per_cpu(cpu_tss, cpu);
+ tss = &per_cpu(cpu_tss_rw, cpu);
/*
* We cache MSR_IA32_SYSENTER_CS's value in the TSS's ss1 field --
@@ -1259,11 +1258,7 @@ void enable_sep_cpu(void)
tss->x86_tss.ss1 = __KERNEL_CS;
wrmsr(MSR_IA32_SYSENTER_CS, tss->x86_tss.ss1, 0);
-
- wrmsr(MSR_IA32_SYSENTER_ESP,
- (unsigned long)tss + offsetofend(struct tss_struct, SYSENTER_stack),
- 0);
-
+ wrmsr(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_entry_stack(cpu) + 1), 0);
wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0);
put_cpu();
@@ -1357,25 +1352,22 @@ DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1;
DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
EXPORT_PER_CPU_SYMBOL(__preempt_count);
-/*
- * Special IST stacks which the CPU switches to when it calls
- * an IST-marked descriptor entry. Up to 7 stacks (hardware
- * limit), all of them are 4K, except the debug stack which
- * is 8K.
- */
-static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
- [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
- [DEBUG_STACK - 1] = DEBUG_STKSZ
-};
-
-static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
- [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
-
/* May not be marked __init: used by software suspend */
void syscall_init(void)
{
+ extern char _entry_trampoline[];
+ extern char entry_SYSCALL_64_trampoline[];
+
+ int cpu = smp_processor_id();
+ unsigned long SYSCALL64_entry_trampoline =
+ (unsigned long)get_cpu_entry_area(cpu)->entry_trampoline +
+ (entry_SYSCALL_64_trampoline - _entry_trampoline);
+
wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS);
- wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64);
+ if (static_cpu_has(X86_FEATURE_PTI))
+ wrmsrl(MSR_LSTAR, SYSCALL64_entry_trampoline);
+ else
+ wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64);
#ifdef CONFIG_IA32_EMULATION
wrmsrl(MSR_CSTAR, (unsigned long)entry_SYSCALL_compat);
@@ -1386,7 +1378,7 @@ void syscall_init(void)
* AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit).
*/
wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
- wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
+ wrmsrl_safe(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_entry_stack(cpu) + 1));
wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat);
#else
wrmsrl(MSR_CSTAR, (unsigned long)ignore_sysret);
@@ -1530,7 +1522,7 @@ void cpu_init(void)
if (cpu)
load_ucode_ap();
- t = &per_cpu(cpu_tss, cpu);
+ t = &per_cpu(cpu_tss_rw, cpu);
oist = &per_cpu(orig_ist, cpu);
#ifdef CONFIG_NUMA
@@ -1569,7 +1561,7 @@ void cpu_init(void)
* set up and load the per-CPU TSS
*/
if (!oist->ist[0]) {
- char *estacks = per_cpu(exception_stacks, cpu);
+ char *estacks = get_cpu_entry_area(cpu)->exception_stacks;
for (v = 0; v < N_EXCEPTION_STACKS; v++) {
estacks += exception_stack_sizes[v];
@@ -1580,7 +1572,7 @@ void cpu_init(void)
}
}
- t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
+ t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
/*
* <= is required because the CPU will access up to
@@ -1596,11 +1588,12 @@ void cpu_init(void)
enter_lazy_tlb(&init_mm, me);
/*
- * Initialize the TSS. Don't bother initializing sp0, as the initial
- * task never enters user mode.
+ * Initialize the TSS. sp0 points to the entry trampoline stack
+ * regardless of what task is running.
*/
- set_tss_desc(cpu, t);
+ set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
load_TR_desc();
+ load_sp0((unsigned long)(cpu_entry_stack(cpu) + 1));
load_mm_ldt(&init_mm);
@@ -1612,7 +1605,6 @@ void cpu_init(void)
if (is_uv_system())
uv_cpu_init();
- setup_fixmap_gdt(cpu);
load_fixmap_gdt(cpu);
}
@@ -1622,7 +1614,7 @@ void cpu_init(void)
{
int cpu = smp_processor_id();
struct task_struct *curr = current;
- struct tss_struct *t = &per_cpu(cpu_tss, cpu);
+ struct tss_struct *t = &per_cpu(cpu_tss_rw, cpu);
wait_for_master_cpu(cpu);
@@ -1657,12 +1649,12 @@ void cpu_init(void)
* Initialize the TSS. Don't bother initializing sp0, as the initial
* task never enters user mode.
*/
- set_tss_desc(cpu, t);
+ set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
load_TR_desc();
load_mm_ldt(&init_mm);
- t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
+ t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
#ifdef CONFIG_DOUBLEFAULT
/* Set up doublefault TSS pointer in the GDT */
@@ -1674,7 +1666,6 @@ void cpu_init(void)
fpu__init_cpu();
- setup_fixmap_gdt(cpu);
load_fixmap_gdt(cpu);
}
#endif
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index c6daec4bdba5..330b8462d426 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -470,6 +470,7 @@ static unsigned int verify_patch_size(u8 family, u32 patch_size,
#define F14H_MPB_MAX_SIZE 1824
#define F15H_MPB_MAX_SIZE 4096
#define F16H_MPB_MAX_SIZE 3458
+#define F17H_MPB_MAX_SIZE 3200
switch (family) {
case 0x14:
@@ -481,6 +482,9 @@ static unsigned int verify_patch_size(u8 family, u32 patch_size,
case 0x16:
max_size = F16H_MPB_MAX_SIZE;
break;
+ case 0x17:
+ max_size = F17H_MPB_MAX_SIZE;
+ break;
default:
max_size = F1XH_MPB_MAX_SIZE;
break;
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 7dbcb7adf797..8ccdca6d3f9e 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -565,15 +565,6 @@ static void print_ucode(struct ucode_cpu_info *uci)
}
#else
-/*
- * Flush global tlb. We only do this in x86_64 where paging has been enabled
- * already and PGE should be enabled as well.
- */
-static inline void flush_tlb_early(void)
-{
- __native_flush_tlb_global_irq_disabled();
-}
-
static inline void print_ucode(struct ucode_cpu_info *uci)
{
struct microcode_intel *mc;
@@ -602,10 +593,6 @@ static int apply_microcode_early(struct ucode_cpu_info *uci, bool early)
if (rev != mc->hdr.rev)
return -1;
-#ifdef CONFIG_X86_64
- /* Flush global tlb. This is precaution. */
- flush_tlb_early();
-#endif
uci->cpu_sig.rev = rev;
if (early)
diff --git a/arch/x86/kernel/doublefault.c b/arch/x86/kernel/doublefault.c
index 0e662c55ae90..0b8cedb20d6d 100644
--- a/arch/x86/kernel/doublefault.c
+++ b/arch/x86/kernel/doublefault.c
@@ -50,25 +50,23 @@ static void doublefault_fn(void)
cpu_relax();
}
-struct tss_struct doublefault_tss __cacheline_aligned = {
- .x86_tss = {
- .sp0 = STACK_START,
- .ss0 = __KERNEL_DS,
- .ldt = 0,
- .io_bitmap_base = INVALID_IO_BITMAP_OFFSET,
-
- .ip = (unsigned long) doublefault_fn,
- /* 0x2 bit is always set */
- .flags = X86_EFLAGS_SF | 0x2,
- .sp = STACK_START,
- .es = __USER_DS,
- .cs = __KERNEL_CS,
- .ss = __KERNEL_DS,
- .ds = __USER_DS,
- .fs = __KERNEL_PERCPU,
-
- .__cr3 = __pa_nodebug(swapper_pg_dir),
- }
+struct x86_hw_tss doublefault_tss __cacheline_aligned = {
+ .sp0 = STACK_START,
+ .ss0 = __KERNEL_DS,
+ .ldt = 0,
+ .io_bitmap_base = INVALID_IO_BITMAP_OFFSET,
+
+ .ip = (unsigned long) doublefault_fn,
+ /* 0x2 bit is always set */
+ .flags = X86_EFLAGS_SF | 0x2,
+ .sp = STACK_START,
+ .es = __USER_DS,
+ .cs = __KERNEL_CS,
+ .ss = __KERNEL_DS,
+ .ds = __USER_DS,
+ .fs = __KERNEL_PERCPU,
+
+ .__cr3 = __pa_nodebug(swapper_pg_dir),
};
/* dummy for do_double_fault() call */
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index f13b4c00a5de..5fa110699ed2 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -18,6 +18,7 @@
#include <linux/nmi.h>
#include <linux/sysfs.h>
+#include <asm/cpu_entry_area.h>
#include <asm/stacktrace.h>
#include <asm/unwind.h>
@@ -43,6 +44,24 @@ bool in_task_stack(unsigned long *stack, struct task_struct *task,
return true;
}
+bool in_entry_stack(unsigned long *stack, struct stack_info *info)
+{
+ struct entry_stack *ss = cpu_entry_stack(smp_processor_id());
+
+ void *begin = ss;
+ void *end = ss + 1;
+
+ if ((void *)stack < begin || (void *)stack >= end)
+ return false;
+
+ info->type = STACK_TYPE_ENTRY;
+ info->begin = begin;
+ info->end = end;
+ info->next_sp = NULL;
+
+ return true;
+}
+
static void printk_stack_address(unsigned long address, int reliable,
char *log_lvl)
{
@@ -50,6 +69,28 @@ static void printk_stack_address(unsigned long address, int reliable,
printk("%s %s%pB\n", log_lvl, reliable ? "" : "? ", (void *)address);
}
+void show_iret_regs(struct pt_regs *regs)
+{
+ printk(KERN_DEFAULT "RIP: %04x:%pS\n", (int)regs->cs, (void *)regs->ip);
+ printk(KERN_DEFAULT "RSP: %04x:%016lx EFLAGS: %08lx", (int)regs->ss,
+ regs->sp, regs->flags);
+}
+
+static void show_regs_safe(struct stack_info *info, struct pt_regs *regs)
+{
+ if (on_stack(info, regs, sizeof(*regs)))
+ __show_regs(regs, 0);
+ else if (on_stack(info, (void *)regs + IRET_FRAME_OFFSET,
+ IRET_FRAME_SIZE)) {
+ /*
+ * When an interrupt or exception occurs in entry code, the
+ * full pt_regs might not have been saved yet. In that case
+ * just print the iret frame.
+ */
+ show_iret_regs(regs);
+ }
+}
+
void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
unsigned long *stack, char *log_lvl)
{
@@ -71,31 +112,35 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
* - task stack
* - interrupt stack
* - HW exception stacks (double fault, nmi, debug, mce)
+ * - entry stack
*
- * x86-32 can have up to three stacks:
+ * x86-32 can have up to four stacks:
* - task stack
* - softirq stack
* - hardirq stack
+ * - entry stack
*/
for (regs = NULL; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) {
const char *stack_name;
- /*
- * If we overflowed the task stack into a guard page, jump back
- * to the bottom of the usable stack.
- */
- if (task_stack_page(task) - (void *)stack < PAGE_SIZE)
- stack = task_stack_page(task);
-
- if (get_stack_info(stack, task, &stack_info, &visit_mask))
- break;
+ if (get_stack_info(stack, task, &stack_info, &visit_mask)) {
+ /*
+ * We weren't on a valid stack. It's possible that
+ * we overflowed a valid stack into a guard page.
+ * See if the next page up is valid so that we can
+ * generate some kind of backtrace if this happens.
+ */
+ stack = (unsigned long *)PAGE_ALIGN((unsigned long)stack);
+ if (get_stack_info(stack, task, &stack_info, &visit_mask))
+ break;
+ }
stack_name = stack_type_name(stack_info.type);
if (stack_name)
printk("%s <%s>\n", log_lvl, stack_name);
- if (regs && on_stack(&stack_info, regs, sizeof(*regs)))
- __show_regs(regs, 0);
+ if (regs)
+ show_regs_safe(&stack_info, regs);
/*
* Scan the stack, printing any text addresses we find. At the
@@ -119,7 +164,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
/*
* Don't print regs->ip again if it was already printed
- * by __show_regs() below.
+ * by show_regs_safe() below.
*/
if (regs && stack == &regs->ip)
goto next;
@@ -155,8 +200,8 @@ next:
/* if the frame has entry regs, print them */
regs = unwind_get_entry_regs(&state);
- if (regs && on_stack(&stack_info, regs, sizeof(*regs)))
- __show_regs(regs, 0);
+ if (regs)
+ show_regs_safe(&stack_info, regs);
}
if (stack_name)
@@ -252,11 +297,13 @@ int __die(const char *str, struct pt_regs *regs, long err)
unsigned long sp;
#endif
printk(KERN_DEFAULT
- "%s: %04lx [#%d]%s%s%s%s\n", str, err & 0xffff, ++die_counter,
+ "%s: %04lx [#%d]%s%s%s%s%s\n", str, err & 0xffff, ++die_counter,
IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : "",
IS_ENABLED(CONFIG_SMP) ? " SMP" : "",
debug_pagealloc_enabled() ? " DEBUG_PAGEALLOC" : "",
- IS_ENABLED(CONFIG_KASAN) ? " KASAN" : "");
+ IS_ENABLED(CONFIG_KASAN) ? " KASAN" : "",
+ IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION) ?
+ (boot_cpu_has(X86_FEATURE_PTI) ? " PTI" : " NOPTI") : "");
if (notify_die(DIE_OOPS, str, regs, err,
current->thread.trap_nr, SIGSEGV) == NOTIFY_STOP)
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index daefae83a3aa..04170f63e3a1 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -26,6 +26,9 @@ const char *stack_type_name(enum stack_type type)
if (type == STACK_TYPE_SOFTIRQ)
return "SOFTIRQ";
+ if (type == STACK_TYPE_ENTRY)
+ return "ENTRY_TRAMPOLINE";
+
return NULL;
}
@@ -93,6 +96,9 @@ int get_stack_info(unsigned long *stack, struct task_struct *task,
if (task != current)
goto unknown;
+ if (in_entry_stack(stack, info))
+ goto recursion_check;
+
if (in_hardirq_stack(stack, info))
goto recursion_check;
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 88ce2ffdb110..563e28d14f2c 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -37,6 +37,15 @@ const char *stack_type_name(enum stack_type type)
if (type == STACK_TYPE_IRQ)
return "IRQ";
+ if (type == STACK_TYPE_ENTRY) {
+ /*
+ * On 64-bit, we have a generic entry stack that we
+ * use for all the kernel entry points, including
+ * SYSENTER.
+ */
+ return "ENTRY_TRAMPOLINE";
+ }
+
if (type >= STACK_TYPE_EXCEPTION && type <= STACK_TYPE_EXCEPTION_LAST)
return exception_stack_names[type - STACK_TYPE_EXCEPTION];
@@ -115,6 +124,9 @@ int get_stack_info(unsigned long *stack, struct task_struct *task,
if (in_irq_stack(stack, info))
goto recursion_check;
+ if (in_entry_stack(stack, info))
+ goto recursion_check;
+
goto unknown;
recursion_check:
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 7dca675fe78d..04a625f0fcda 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -341,6 +341,27 @@ GLOBAL(early_recursion_flag)
.balign PAGE_SIZE; \
GLOBAL(name)
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+/*
+ * Each PGD needs to be 8k long and 8k aligned. We do not
+ * ever go out to userspace with these, so we do not
+ * strictly *need* the second page, but this allows us to
+ * have a single set_pgd() implementation that does not
+ * need to worry about whether it has 4k or 8k to work
+ * with.
+ *
+ * This ensures PGDs are 8k long:
+ */
+#define PTI_USER_PGD_FILL 512
+/* This ensures they are 8k-aligned: */
+#define NEXT_PGD_PAGE(name) \
+ .balign 2 * PAGE_SIZE; \
+GLOBAL(name)
+#else
+#define NEXT_PGD_PAGE(name) NEXT_PAGE(name)
+#define PTI_USER_PGD_FILL 0
+#endif
+
/* Automate the creation of 1 to 1 mapping pmd entries */
#define PMDS(START, PERM, COUNT) \
i = 0 ; \
@@ -350,13 +371,14 @@ GLOBAL(name)
.endr
__INITDATA
-NEXT_PAGE(early_top_pgt)
+NEXT_PGD_PAGE(early_top_pgt)
.fill 511,8,0
#ifdef CONFIG_X86_5LEVEL
.quad level4_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
#else
.quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
#endif
+ .fill PTI_USER_PGD_FILL,8,0
NEXT_PAGE(early_dynamic_pgts)
.fill 512*EARLY_DYNAMIC_PAGE_TABLES,8,0
@@ -364,13 +386,14 @@ NEXT_PAGE(early_dynamic_pgts)
.data
#if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH)
-NEXT_PAGE(init_top_pgt)
+NEXT_PGD_PAGE(init_top_pgt)
.quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
.org init_top_pgt + PGD_PAGE_OFFSET*8, 0
.quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
.org init_top_pgt + PGD_START_KERNEL*8, 0
/* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
.quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
+ .fill PTI_USER_PGD_FILL,8,0
NEXT_PAGE(level3_ident_pgt)
.quad level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
@@ -381,8 +404,9 @@ NEXT_PAGE(level2_ident_pgt)
*/
PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)
#else
-NEXT_PAGE(init_top_pgt)
+NEXT_PGD_PAGE(init_top_pgt)
.fill 512,8,0
+ .fill PTI_USER_PGD_FILL,8,0
#endif
#ifdef CONFIG_X86_5LEVEL
diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
index 3feb648781c4..2f723301eb58 100644
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c
@@ -67,7 +67,7 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
* because the ->io_bitmap_max value must match the bitmap
* contents:
*/
- tss = &per_cpu(cpu_tss, get_cpu());
+ tss = &per_cpu(cpu_tss_rw, get_cpu());
if (turn_on)
bitmap_clear(t->io_bitmap_ptr, from, num);
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 49cfd9fe7589..68e1867cca80 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -219,18 +219,6 @@ __visible unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
/* high bit used in ret_from_ code */
unsigned vector = ~regs->orig_ax;
- /*
- * NB: Unlike exception entries, IRQ entries do not reliably
- * handle context tracking in the low-level entry code. This is
- * because syscall entries execute briefly with IRQs on before
- * updating context tracking state, so we can take an IRQ from
- * kernel mode with CONTEXT_USER. The low-level entry code only
- * updates the context if we came from user mode, so we won't
- * switch to CONTEXT_KERNEL. We'll fix that once the syscall
- * code is cleaned up enough that we can cleanly defer enabling
- * IRQs.
- */
-
entering_irq();
/* entering_irq() tells RCU that we're not quiescent. Check it. */
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 020efbf5786b..d86e344f5b3d 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -57,10 +57,10 @@ static inline void stack_overflow_check(struct pt_regs *regs)
if (regs->sp >= estack_top && regs->sp <= estack_bottom)
return;
- WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx)\n",
+ WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx,ip:%pF)\n",
current->comm, curbase, regs->sp,
irq_stack_top, irq_stack_bottom,
- estack_top, estack_bottom);
+ estack_top, estack_bottom, (void *)regs->ip);
if (sysctl_panic_on_stackoverflow)
panic("low stack detected by irq handler - check messages\n");
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index 1c1eae961340..26d713ecad34 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -5,6 +5,11 @@
* Copyright (C) 2002 Andi Kleen
*
* This handles calls from both 32bit and 64bit mode.
+ *
+ * Lock order:
+ * contex.ldt_usr_sem
+ * mmap_sem
+ * context.lock
*/
#include <linux/errno.h>
@@ -19,6 +24,7 @@
#include <linux/uaccess.h>
#include <asm/ldt.h>
+#include <asm/tlb.h>
#include <asm/desc.h>
#include <asm/mmu_context.h>
#include <asm/syscalls.h>
@@ -42,17 +48,15 @@ static void refresh_ldt_segments(void)
#endif
}
-/* context.lock is held for us, so we don't need any locking. */
+/* context.lock is held by the task which issued the smp function call */
static void flush_ldt(void *__mm)
{
struct mm_struct *mm = __mm;
- mm_context_t *pc;
if (this_cpu_read(cpu_tlbstate.loaded_mm) != mm)
return;
- pc = &mm->context;
- set_ldt(pc->ldt->entries, pc->ldt->nr_entries);
+ load_mm_ldt(mm);
refresh_ldt_segments();
}
@@ -89,25 +93,143 @@ static struct ldt_struct *alloc_ldt_struct(unsigned int num_entries)
return NULL;
}
+ /* The new LDT isn't aliased for PTI yet. */
+ new_ldt->slot = -1;
+
new_ldt->nr_entries = num_entries;
return new_ldt;
}
+/*
+ * If PTI is enabled, this maps the LDT into the kernelmode and
+ * usermode tables for the given mm.
+ *
+ * There is no corresponding unmap function. Even if the LDT is freed, we
+ * leave the PTEs around until the slot is reused or the mm is destroyed.
+ * This is harmless: the LDT is always in ordinary memory, and no one will
+ * access the freed slot.
+ *
+ * If we wanted to unmap freed LDTs, we'd also need to do a flush to make
+ * it useful, and the flush would slow down modify_ldt().
+ */
+static int
+map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot)
+{
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ bool is_vmalloc, had_top_level_entry;
+ unsigned long va;
+ spinlock_t *ptl;
+ pgd_t *pgd;
+ int i;
+
+ if (!static_cpu_has(X86_FEATURE_PTI))
+ return 0;
+
+ /*
+ * Any given ldt_struct should have map_ldt_struct() called at most
+ * once.
+ */
+ WARN_ON(ldt->slot != -1);
+
+ /*
+ * Did we already have the top level entry allocated? We can't
+ * use pgd_none() for this because it doens't do anything on
+ * 4-level page table kernels.
+ */
+ pgd = pgd_offset(mm, LDT_BASE_ADDR);
+ had_top_level_entry = (pgd->pgd != 0);
+
+ is_vmalloc = is_vmalloc_addr(ldt->entries);
+
+ for (i = 0; i * PAGE_SIZE < ldt->nr_entries * LDT_ENTRY_SIZE; i++) {
+ unsigned long offset = i << PAGE_SHIFT;
+ const void *src = (char *)ldt->entries + offset;
+ unsigned long pfn;
+ pte_t pte, *ptep;
+
+ va = (unsigned long)ldt_slot_va(slot) + offset;
+ pfn = is_vmalloc ? vmalloc_to_pfn(src) :
+ page_to_pfn(virt_to_page(src));
+ /*
+ * Treat the PTI LDT range as a *userspace* range.
+ * get_locked_pte() will allocate all needed pagetables
+ * and account for them in this mm.
+ */
+ ptep = get_locked_pte(mm, va, &ptl);
+ if (!ptep)
+ return -ENOMEM;
+ /*
+ * Map it RO so the easy to find address is not a primary
+ * target via some kernel interface which misses a
+ * permission check.
+ */
+ pte = pfn_pte(pfn, __pgprot(__PAGE_KERNEL_RO & ~_PAGE_GLOBAL));
+ set_pte_at(mm, va, ptep, pte);
+ pte_unmap_unlock(ptep, ptl);
+ }
+
+ if (mm->context.ldt) {
+ /*
+ * We already had an LDT. The top-level entry should already
+ * have been allocated and synchronized with the usermode
+ * tables.
+ */
+ WARN_ON(!had_top_level_entry);
+ if (static_cpu_has(X86_FEATURE_PTI))
+ WARN_ON(!kernel_to_user_pgdp(pgd)->pgd);
+ } else {
+ /*
+ * This is the first time we're mapping an LDT for this process.
+ * Sync the pgd to the usermode tables.
+ */
+ WARN_ON(had_top_level_entry);
+ if (static_cpu_has(X86_FEATURE_PTI)) {
+ WARN_ON(kernel_to_user_pgdp(pgd)->pgd);
+ set_pgd(kernel_to_user_pgdp(pgd), *pgd);
+ }
+ }
+
+ va = (unsigned long)ldt_slot_va(slot);
+ flush_tlb_mm_range(mm, va, va + LDT_SLOT_STRIDE, 0);
+
+ ldt->slot = slot;
+#endif
+ return 0;
+}
+
+static void free_ldt_pgtables(struct mm_struct *mm)
+{
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ struct mmu_gather tlb;
+ unsigned long start = LDT_BASE_ADDR;
+ unsigned long end = start + (1UL << PGDIR_SHIFT);
+
+ if (!static_cpu_has(X86_FEATURE_PTI))
+ return;
+
+ tlb_gather_mmu(&tlb, mm, start, end);
+ free_pgd_range(&tlb, start, end, start, end);
+ tlb_finish_mmu(&tlb, start, end);
+#endif
+}
+
/* After calling this, the LDT is immutable. */
static void finalize_ldt_struct(struct ldt_struct *ldt)
{
paravirt_alloc_ldt(ldt->entries, ldt->nr_entries);
}
-/* context.lock is held */
-static void install_ldt(struct mm_struct *current_mm,
- struct ldt_struct *ldt)
+static void install_ldt(struct mm_struct *mm, struct ldt_struct *ldt)
{
+ mutex_lock(&mm->context.lock);
+
/* Synchronizes with READ_ONCE in load_mm_ldt. */
- smp_store_release(&current_mm->context.ldt, ldt);
+ smp_store_release(&mm->context.ldt, ldt);
- /* Activate the LDT for all CPUs using current_mm. */
- on_each_cpu_mask(mm_cpumask(current_mm), flush_ldt, current_mm, true);
+ /* Activate the LDT for all CPUs using currents mm. */
+ on_each_cpu_mask(mm_cpumask(mm), flush_ldt, mm, true);
+
+ mutex_unlock(&mm->context.lock);
}
static void free_ldt_struct(struct ldt_struct *ldt)
@@ -124,27 +246,20 @@ static void free_ldt_struct(struct ldt_struct *ldt)
}
/*
- * we do not have to muck with descriptors here, that is
- * done in switch_mm() as needed.
+ * Called on fork from arch_dup_mmap(). Just copy the current LDT state,
+ * the new task is not running, so nothing can be installed.
*/
-int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm)
+int ldt_dup_context(struct mm_struct *old_mm, struct mm_struct *mm)
{
struct ldt_struct *new_ldt;
- struct mm_struct *old_mm;
int retval = 0;
- mutex_init(&mm->context.lock);
- old_mm = current->mm;
- if (!old_mm) {
- mm->context.ldt = NULL;
+ if (!old_mm)
return 0;
- }
mutex_lock(&old_mm->context.lock);
- if (!old_mm->context.ldt) {
- mm->context.ldt = NULL;
+ if (!old_mm->context.ldt)
goto out_unlock;
- }
new_ldt = alloc_ldt_struct(old_mm->context.ldt->nr_entries);
if (!new_ldt) {
@@ -156,6 +271,12 @@ int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm)
new_ldt->nr_entries * LDT_ENTRY_SIZE);
finalize_ldt_struct(new_ldt);
+ retval = map_ldt_struct(mm, new_ldt, 0);
+ if (retval) {
+ free_ldt_pgtables(mm);
+ free_ldt_struct(new_ldt);
+ goto out_unlock;
+ }
mm->context.ldt = new_ldt;
out_unlock:
@@ -174,13 +295,18 @@ void destroy_context_ldt(struct mm_struct *mm)
mm->context.ldt = NULL;
}
+void ldt_arch_exit_mmap(struct mm_struct *mm)
+{
+ free_ldt_pgtables(mm);
+}
+
static int read_ldt(void __user *ptr, unsigned long bytecount)
{
struct mm_struct *mm = current->mm;
unsigned long entries_size;
int retval;
- mutex_lock(&mm->context.lock);
+ down_read(&mm->context.ldt_usr_sem);
if (!mm->context.ldt) {
retval = 0;
@@ -209,7 +335,7 @@ static int read_ldt(void __user *ptr, unsigned long bytecount)
retval = bytecount;
out_unlock:
- mutex_unlock(&mm->context.lock);
+ up_read(&mm->context.ldt_usr_sem);
return retval;
}
@@ -269,7 +395,8 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
ldt.avl = 0;
}
- mutex_lock(&mm->context.lock);
+ if (down_write_killable(&mm->context.ldt_usr_sem))
+ return -EINTR;
old_ldt = mm->context.ldt;
old_nr_entries = old_ldt ? old_ldt->nr_entries : 0;
@@ -286,12 +413,31 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
new_ldt->entries[ldt_info.entry_number] = ldt;
finalize_ldt_struct(new_ldt);
+ /*
+ * If we are using PTI, map the new LDT into the userspace pagetables.
+ * If there is already an LDT, use the other slot so that other CPUs
+ * will continue to use the old LDT until install_ldt() switches
+ * them over to the new LDT.
+ */
+ error = map_ldt_struct(mm, new_ldt, old_ldt ? !old_ldt->slot : 0);
+ if (error) {
+ /*
+ * This only can fail for the first LDT setup. If an LDT is
+ * already installed then the PTE page is already
+ * populated. Mop up a half populated page table.
+ */
+ if (!WARN_ON_ONCE(old_ldt))
+ free_ldt_pgtables(mm);
+ free_ldt_struct(new_ldt);
+ goto out_unlock;
+ }
+
install_ldt(mm, new_ldt);
free_ldt_struct(old_ldt);
error = 0;
out_unlock:
- mutex_unlock(&mm->context.lock);
+ up_write(&mm->context.ldt_usr_sem);
out:
return error;
}
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index 00bc751c861c..edfede768688 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -48,8 +48,6 @@ static void load_segments(void)
"\tmovl $"STR(__KERNEL_DS)",%%eax\n"
"\tmovl %%eax,%%ds\n"
"\tmovl %%eax,%%es\n"
- "\tmovl %%eax,%%fs\n"
- "\tmovl %%eax,%%gs\n"
"\tmovl %%eax,%%ss\n"
: : : "eax", "memory");
#undef STR
@@ -232,8 +230,8 @@ void machine_kexec(struct kimage *image)
* The gdt & idt are now invalid.
* If you want to load them you must set up your own idt & gdt.
*/
- set_gdt(phys_to_virt(0), 0);
idt_invalidate(phys_to_virt(0));
+ set_gdt(phys_to_virt(0), 0);
/* now call it */
image->start = relocate_kernel_ptr((unsigned long)image->head,
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
index ac0be8283325..9edadabf04f6 100644
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -10,7 +10,6 @@ DEF_NATIVE(pv_irq_ops, save_fl, "pushfq; popq %rax");
DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax");
DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax");
DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3");
-DEF_NATIVE(pv_mmu_ops, flush_tlb_single, "invlpg (%rdi)");
DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd");
DEF_NATIVE(pv_cpu_ops, usergs_sysret64, "swapgs; sysretq");
@@ -60,7 +59,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
PATCH_SITE(pv_mmu_ops, read_cr2);
PATCH_SITE(pv_mmu_ops, read_cr3);
PATCH_SITE(pv_mmu_ops, write_cr3);
- PATCH_SITE(pv_mmu_ops, flush_tlb_single);
PATCH_SITE(pv_cpu_ops, wbinvd);
#if defined(CONFIG_PARAVIRT_SPINLOCKS)
case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock):
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 97fb3e5737f5..aed9d94bd46f 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -47,7 +47,7 @@
* section. Since TSS's are completely CPU-local, we want them
* on exact cacheline boundaries, to eliminate cacheline ping-pong.
*/
-__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
+__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss_rw) = {
.x86_tss = {
/*
* .sp0 is only used when entering ring 0 from a lower
@@ -56,6 +56,16 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
* Poison it.
*/
.sp0 = (1UL << (BITS_PER_LONG-1)) + 1,
+
+#ifdef CONFIG_X86_64
+ /*
+ * .sp1 is cpu_current_top_of_stack. The init task never
+ * runs user code, but cpu_current_top_of_stack should still
+ * be well defined before the first context switch.
+ */
+ .sp1 = TOP_OF_INIT_STACK,
+#endif
+
#ifdef CONFIG_X86_32
.ss0 = __KERNEL_DS,
.ss1 = __KERNEL_CS,
@@ -71,11 +81,8 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
*/
.io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 },
#endif
-#ifdef CONFIG_X86_32
- .SYSENTER_stack_canary = STACK_END_MAGIC,
-#endif
};
-EXPORT_PER_CPU_SYMBOL(cpu_tss);
+EXPORT_PER_CPU_SYMBOL(cpu_tss_rw);
DEFINE_PER_CPU(bool, __tss_limit_invalid);
EXPORT_PER_CPU_SYMBOL_GPL(__tss_limit_invalid);
@@ -104,7 +111,7 @@ void exit_thread(struct task_struct *tsk)
struct fpu *fpu = &t->fpu;
if (bp) {
- struct tss_struct *tss = &per_cpu(cpu_tss, get_cpu());
+ struct tss_struct *tss = &per_cpu(cpu_tss_rw, get_cpu());
t->io_bitmap_ptr = NULL;
clear_thread_flag(TIF_IO_BITMAP);
@@ -299,7 +306,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
}
if ((tifp ^ tifn) & _TIF_NOTSC)
- cr4_toggle_bits(X86_CR4_TSD);
+ cr4_toggle_bits_irqsoff(X86_CR4_TSD);
if ((tifp ^ tifn) & _TIF_NOCPUID)
set_cpuid_faulting(!!(tifn & _TIF_NOCPUID));
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 45bf0c5f93e1..5224c6099184 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -234,7 +234,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
struct fpu *prev_fpu = &prev->fpu;
struct fpu *next_fpu = &next->fpu;
int cpu = smp_processor_id();
- struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
+ struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu);
/* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index eeeb34f85c25..c75466232016 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -69,9 +69,8 @@ void __show_regs(struct pt_regs *regs, int all)
unsigned int fsindex, gsindex;
unsigned int ds, cs, es;
- printk(KERN_DEFAULT "RIP: %04lx:%pS\n", regs->cs, (void *)regs->ip);
- printk(KERN_DEFAULT "RSP: %04lx:%016lx EFLAGS: %08lx", regs->ss,
- regs->sp, regs->flags);
+ show_iret_regs(regs);
+
if (regs->orig_ax != -1)
pr_cont(" ORIG_RAX: %016lx\n", regs->orig_ax);
else
@@ -88,6 +87,9 @@ void __show_regs(struct pt_regs *regs, int all)
printk(KERN_DEFAULT "R13: %016lx R14: %016lx R15: %016lx\n",
regs->r13, regs->r14, regs->r15);
+ if (!all)
+ return;
+
asm("movl %%ds,%0" : "=r" (ds));
asm("movl %%cs,%0" : "=r" (cs));
asm("movl %%es,%0" : "=r" (es));
@@ -98,9 +100,6 @@ void __show_regs(struct pt_regs *regs, int all)
rdmsrl(MSR_GS_BASE, gs);
rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
- if (!all)
- return;
-
cr0 = read_cr0();
cr2 = read_cr2();
cr3 = __read_cr3();
@@ -400,7 +399,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
struct fpu *prev_fpu = &prev->fpu;
struct fpu *next_fpu = &next->fpu;
int cpu = smp_processor_id();
- struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
+ struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu);
WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) &&
this_cpu_read(irq_count) != -1);
@@ -462,6 +461,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
* Switch the PDA and FPU contexts.
*/
this_cpu_write(current_task, next_p);
+ this_cpu_write(cpu_current_top_of_stack, task_top_of_stack(next_p));
/* Reload sp0. */
update_sp0(next_p);
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 3d01df7d7cf6..ed556d50d7ed 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -106,7 +106,7 @@ EXPORT_SYMBOL(__max_logical_packages);
static unsigned int logical_packages __read_mostly;
/* Maximum number of SMT threads on any online core */
-int __max_smt_threads __read_mostly;
+int __read_mostly __max_smt_threads = 1;
/* Flag to indicate if a complete sched domain rebuild is required */
bool x86_topology_update;
@@ -126,14 +126,10 @@ static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
spin_lock_irqsave(&rtc_lock, flags);
CMOS_WRITE(0xa, 0xf);
spin_unlock_irqrestore(&rtc_lock, flags);
- local_flush_tlb();
- pr_debug("1.\n");
*((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) =
start_eip >> 4;
- pr_debug("2.\n");
*((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) =
start_eip & 0xf;
- pr_debug("3.\n");
}
static inline void smpboot_restore_warm_reset_vector(void)
@@ -141,11 +137,6 @@ static inline void smpboot_restore_warm_reset_vector(void)
unsigned long flags;
/*
- * Install writable page 0 entry to set BIOS data area.
- */
- local_flush_tlb();
-
- /*
* Paranoid: Set warm reset code and vector here back
* to default values.
*/
@@ -237,7 +228,7 @@ static void notrace start_secondary(void *unused)
load_cr3(swapper_pg_dir);
__flush_tlb_all();
#endif
-
+ load_current_idt();
cpu_init();
x86_cpuinit.early_percpu_clock_init();
preempt_disable();
@@ -932,12 +923,8 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
initial_code = (unsigned long)start_secondary;
initial_stack = idle->thread.sp;
- /*
- * Enable the espfix hack for this CPU
- */
-#ifdef CONFIG_X86_ESPFIX64
+ /* Enable the espfix hack for this CPU */
init_espfix_ap(cpu);
-#endif
/* So we see what's up */
announce_cpu(cpu, apicid);
@@ -1304,7 +1291,7 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
* Today neither Intel nor AMD support heterogenous systems so
* extrapolate the boot cpu's data to all packages.
*/
- ncpus = cpu_data(0).booted_cores * smp_num_siblings;
+ ncpus = cpu_data(0).booted_cores * topology_max_smt_threads();
__max_logical_packages = DIV_ROUND_UP(nr_cpu_ids, ncpus);
pr_info("Max logical packages: %u\n", __max_logical_packages);
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index 77835bc021c7..20161ef53537 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -164,8 +164,12 @@ int save_stack_trace_tsk_reliable(struct task_struct *tsk,
{
int ret;
+ /*
+ * If the task doesn't have a stack (e.g., a zombie), the stack is
+ * "reliably" empty.
+ */
if (!try_get_task_stack(tsk))
- return -EINVAL;
+ return 0;
ret = __save_stack_trace_reliable(trace, tsk);
diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c
index 9a9c9b076955..a5b802a12212 100644
--- a/arch/x86/kernel/tls.c
+++ b/arch/x86/kernel/tls.c
@@ -93,17 +93,10 @@ static void set_tls_desc(struct task_struct *p, int idx,
cpu = get_cpu();
while (n-- > 0) {
- if (LDT_empty(info) || LDT_zero(info)) {
+ if (LDT_empty(info) || LDT_zero(info))
memset(desc, 0, sizeof(*desc));
- } else {
+ else
fill_ldt(desc, info);
-
- /*
- * Always set the accessed bit so that the CPU
- * doesn't try to write to the (read-only) GDT.
- */
- desc->type |= 1;
- }
++info;
++desc;
}
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 989514c94a55..446c9ef8cfc3 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -51,6 +51,7 @@
#include <asm/traps.h>
#include <asm/desc.h>
#include <asm/fpu/internal.h>
+#include <asm/cpu_entry_area.h>
#include <asm/mce.h>
#include <asm/fixmap.h>
#include <asm/mach_traps.h>
@@ -348,23 +349,42 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
/*
* If IRET takes a non-IST fault on the espfix64 stack, then we
- * end up promoting it to a doublefault. In that case, modify
- * the stack to make it look like we just entered the #GP
- * handler from user space, similar to bad_iret.
+ * end up promoting it to a doublefault. In that case, take
+ * advantage of the fact that we're not using the normal (TSS.sp0)
+ * stack right now. We can write a fake #GP(0) frame at TSS.sp0
+ * and then modify our own IRET frame so that, when we return,
+ * we land directly at the #GP(0) vector with the stack already
+ * set up according to its expectations.
+ *
+ * The net result is that our #GP handler will think that we
+ * entered from usermode with the bad user context.
*
* No need for ist_enter here because we don't use RCU.
*/
- if (((long)regs->sp >> PGDIR_SHIFT) == ESPFIX_PGD_ENTRY &&
+ if (((long)regs->sp >> P4D_SHIFT) == ESPFIX_PGD_ENTRY &&
regs->cs == __KERNEL_CS &&
regs->ip == (unsigned long)native_irq_return_iret)
{
- struct pt_regs *normal_regs = task_pt_regs(current);
+ struct pt_regs *gpregs = (struct pt_regs *)this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1;
- /* Fake a #GP(0) from userspace. */
- memmove(&normal_regs->ip, (void *)regs->sp, 5*8);
- normal_regs->orig_ax = 0; /* Missing (lost) #GP error code */
+ /*
+ * regs->sp points to the failing IRET frame on the
+ * ESPFIX64 stack. Copy it to the entry stack. This fills
+ * in gpregs->ss through gpregs->ip.
+ *
+ */
+ memmove(&gpregs->ip, (void *)regs->sp, 5*8);
+ gpregs->orig_ax = 0; /* Missing (lost) #GP error code */
+
+ /*
+ * Adjust our frame so that we return straight to the #GP
+ * vector with the expected RSP value. This is safe because
+ * we won't enable interupts or schedule before we invoke
+ * general_protection, so nothing will clobber the stack
+ * frame we just set up.
+ */
regs->ip = (unsigned long)general_protection;
- regs->sp = (unsigned long)&normal_regs->orig_ax;
+ regs->sp = (unsigned long)&gpregs->orig_ax;
return;
}
@@ -389,7 +409,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
*
* Processors update CR2 whenever a page fault is detected. If a
* second page fault occurs while an earlier page fault is being
- * deliv- ered, the faulting linear address of the second fault will
+ * delivered, the faulting linear address of the second fault will
* overwrite the contents of CR2 (replacing the previous
* address). These updates to CR2 occur even if the page fault
* results in a double fault or occurs during the delivery of a
@@ -605,14 +625,15 @@ NOKPROBE_SYMBOL(do_int3);
#ifdef CONFIG_X86_64
/*
- * Help handler running on IST stack to switch off the IST stack if the
- * interrupted code was in user mode. The actual stack switch is done in
- * entry_64.S
+ * Help handler running on a per-cpu (IST or entry trampoline) stack
+ * to switch to the normal thread stack if the interrupted code was in
+ * user mode. The actual stack switch is done in entry_64.S
*/
asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs)
{
- struct pt_regs *regs = task_pt_regs(current);
- *regs = *eregs;
+ struct pt_regs *regs = (struct pt_regs *)this_cpu_read(cpu_current_top_of_stack) - 1;
+ if (regs != eregs)
+ *regs = *eregs;
return regs;
}
NOKPROBE_SYMBOL(sync_regs);
@@ -628,13 +649,13 @@ struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s)
/*
* This is called from entry_64.S early in handling a fault
* caused by a bad iret to user mode. To handle the fault
- * correctly, we want move our stack frame to task_pt_regs
- * and we want to pretend that the exception came from the
- * iret target.
+ * correctly, we want to move our stack frame to where it would
+ * be had we entered directly on the entry stack (rather than
+ * just below the IRET frame) and we want to pretend that the
+ * exception came from the IRET target.
*/
struct bad_iret_stack *new_stack =
- container_of(task_pt_regs(current),
- struct bad_iret_stack, regs);
+ (struct bad_iret_stack *)this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1;
/* Copy the IRET target to the new stack. */
memmove(&new_stack->regs.ip, (void *)s->regs.sp, 5*8);
@@ -795,14 +816,6 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
debug_stack_usage_dec();
exit:
-#if defined(CONFIG_X86_32)
- /*
- * This is the most likely code path that involves non-trivial use
- * of the SYSENTER stack. Check that we haven't overrun it.
- */
- WARN(this_cpu_read(cpu_tss.SYSENTER_stack_canary) != STACK_END_MAGIC,
- "Overran or corrupted SYSENTER stack\n");
-#endif
ist_exit(regs);
}
NOKPROBE_SYMBOL(do_debug);
@@ -929,6 +942,9 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
void __init trap_init(void)
{
+ /* Init cpu_entry_area before IST entries are set up */
+ setup_cpu_entry_areas();
+
idt_setup_traps();
/*
@@ -936,8 +952,9 @@ void __init trap_init(void)
* "sidt" instruction will not leak the location of the kernel, and
* to defend the IDT against arbitrary memory write vulnerabilities.
* It will be reloaded in cpu_init() */
- __set_fixmap(FIX_RO_IDT, __pa_symbol(idt_table), PAGE_KERNEL_RO);
- idt_descr.address = fix_to_virt(FIX_RO_IDT);
+ cea_set_pte(CPU_ENTRY_AREA_RO_IDT_VADDR, __pa_symbol(idt_table),
+ PAGE_KERNEL_RO);
+ idt_descr.address = CPU_ENTRY_AREA_RO_IDT;
/*
* Should be a barrier for any external CPU state:
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index a3f973b2c97a..be86a865087a 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -253,22 +253,15 @@ unsigned long *unwind_get_return_address_ptr(struct unwind_state *state)
return NULL;
}
-static bool stack_access_ok(struct unwind_state *state, unsigned long addr,
+static bool stack_access_ok(struct unwind_state *state, unsigned long _addr,
size_t len)
{
struct stack_info *info = &state->stack_info;
+ void *addr = (void *)_addr;
- /*
- * If the address isn't on the current stack, switch to the next one.
- *
- * We may have to traverse multiple stacks to deal with the possibility
- * that info->next_sp could point to an empty stack and the address
- * could be on a subsequent stack.
- */
- while (!on_stack(info, (void *)addr, len))
- if (get_stack_info(info->next_sp, state->task, info,
- &state->stack_mask))
- return false;
+ if (!on_stack(info, addr, len) &&
+ (get_stack_info(addr, state->task, info, &state->stack_mask)))
+ return false;
return true;
}
@@ -283,42 +276,32 @@ static bool deref_stack_reg(struct unwind_state *state, unsigned long addr,
return true;
}
-#define REGS_SIZE (sizeof(struct pt_regs))
-#define SP_OFFSET (offsetof(struct pt_regs, sp))
-#define IRET_REGS_SIZE (REGS_SIZE - offsetof(struct pt_regs, ip))
-#define IRET_SP_OFFSET (SP_OFFSET - offsetof(struct pt_regs, ip))
-
static bool deref_stack_regs(struct unwind_state *state, unsigned long addr,
- unsigned long *ip, unsigned long *sp, bool full)
+ unsigned long *ip, unsigned long *sp)
{
- size_t regs_size = full ? REGS_SIZE : IRET_REGS_SIZE;
- size_t sp_offset = full ? SP_OFFSET : IRET_SP_OFFSET;
- struct pt_regs *regs = (struct pt_regs *)(addr + regs_size - REGS_SIZE);
-
- if (IS_ENABLED(CONFIG_X86_64)) {
- if (!stack_access_ok(state, addr, regs_size))
- return false;
+ struct pt_regs *regs = (struct pt_regs *)addr;
- *ip = regs->ip;
- *sp = regs->sp;
+ /* x86-32 support will be more complicated due to the &regs->sp hack */
+ BUILD_BUG_ON(IS_ENABLED(CONFIG_X86_32));
- return true;
- }
-
- if (!stack_access_ok(state, addr, sp_offset))
+ if (!stack_access_ok(state, addr, sizeof(struct pt_regs)))
return false;
*ip = regs->ip;
+ *sp = regs->sp;
+ return true;
+}
- if (user_mode(regs)) {
- if (!stack_access_ok(state, addr + sp_offset,
- REGS_SIZE - SP_OFFSET))
- return false;
+static bool deref_stack_iret_regs(struct unwind_state *state, unsigned long addr,
+ unsigned long *ip, unsigned long *sp)
+{
+ struct pt_regs *regs = (void *)addr - IRET_FRAME_OFFSET;
- *sp = regs->sp;
- } else
- *sp = (unsigned long)&regs->sp;
+ if (!stack_access_ok(state, addr, IRET_FRAME_SIZE))
+ return false;
+ *ip = regs->ip;
+ *sp = regs->sp;
return true;
}
@@ -327,7 +310,6 @@ bool unwind_next_frame(struct unwind_state *state)
unsigned long ip_p, sp, orig_ip, prev_sp = state->sp;
enum stack_type prev_type = state->stack_info.type;
struct orc_entry *orc;
- struct pt_regs *ptregs;
bool indirect = false;
if (unwind_done(state))
@@ -435,7 +417,7 @@ bool unwind_next_frame(struct unwind_state *state)
break;
case ORC_TYPE_REGS:
- if (!deref_stack_regs(state, sp, &state->ip, &state->sp, true)) {
+ if (!deref_stack_regs(state, sp, &state->ip, &state->sp)) {
orc_warn("can't dereference registers at %p for ip %pB\n",
(void *)sp, (void *)orig_ip);
goto done;
@@ -447,20 +429,14 @@ bool unwind_next_frame(struct unwind_state *state)
break;
case ORC_TYPE_REGS_IRET:
- if (!deref_stack_regs(state, sp, &state->ip, &state->sp, false)) {
+ if (!deref_stack_iret_regs(state, sp, &state->ip, &state->sp)) {
orc_warn("can't dereference iret registers at %p for ip %pB\n",
(void *)sp, (void *)orig_ip);
goto done;
}
- ptregs = container_of((void *)sp, struct pt_regs, ip);
- if ((unsigned long)ptregs >= prev_sp &&
- on_stack(&state->stack_info, ptregs, REGS_SIZE)) {
- state->regs = ptregs;
- state->full_regs = false;
- } else
- state->regs = NULL;
-
+ state->regs = (void *)sp - IRET_FRAME_OFFSET;
+ state->full_regs = false;
state->signal = true;
break;
@@ -553,8 +529,18 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
}
if (get_stack_info((unsigned long *)state->sp, state->task,
- &state->stack_info, &state->stack_mask))
- return;
+ &state->stack_info, &state->stack_mask)) {
+ /*
+ * We weren't on a valid stack. It's possible that
+ * we overflowed a valid stack into a guard page.
+ * See if the next page up is valid so that we can
+ * generate some kind of backtrace if this happens.
+ */
+ void *next_page = (void *)PAGE_ALIGN((unsigned long)state->sp);
+ if (get_stack_info(next_page, state->task, &state->stack_info,
+ &state->stack_mask))
+ return;
+ }
/*
* The caller can provide the address of the first frame directly
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index a4009fb9be87..1e413a9326aa 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -61,11 +61,17 @@ jiffies_64 = jiffies;
. = ALIGN(HPAGE_SIZE); \
__end_rodata_hpage_align = .;
+#define ALIGN_ENTRY_TEXT_BEGIN . = ALIGN(PMD_SIZE);
+#define ALIGN_ENTRY_TEXT_END . = ALIGN(PMD_SIZE);
+
#else
#define X64_ALIGN_RODATA_BEGIN
#define X64_ALIGN_RODATA_END
+#define ALIGN_ENTRY_TEXT_BEGIN
+#define ALIGN_ENTRY_TEXT_END
+
#endif
PHDRS {
@@ -102,11 +108,22 @@ SECTIONS
CPUIDLE_TEXT
LOCK_TEXT
KPROBES_TEXT
+ ALIGN_ENTRY_TEXT_BEGIN
ENTRY_TEXT
IRQENTRY_TEXT
+ ALIGN_ENTRY_TEXT_END
SOFTIRQENTRY_TEXT
*(.fixup)
*(.gnu.warning)
+
+#ifdef CONFIG_X86_64
+ . = ALIGN(PAGE_SIZE);
+ _entry_trampoline = .;
+ *(.entry_trampoline)
+ . = ALIGN(PAGE_SIZE);
+ ASSERT(. - _entry_trampoline == PAGE_SIZE, "entry trampoline is too big");
+#endif
+
/* End of text section */
_etext = .;
} :text = 0x9090
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index cdc70a3a6583..c2cea6651279 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -44,7 +44,7 @@ static const struct cpuid_reg reverse_cpuid[] = {
[CPUID_8086_0001_EDX] = {0x80860001, 0, CPUID_EDX},
[CPUID_1_ECX] = { 1, 0, CPUID_ECX},
[CPUID_C000_0001_EDX] = {0xc0000001, 0, CPUID_EDX},
- [CPUID_8000_0001_ECX] = {0xc0000001, 0, CPUID_ECX},
+ [CPUID_8000_0001_ECX] = {0x80000001, 0, CPUID_ECX},
[CPUID_7_0_EBX] = { 7, 0, CPUID_EBX},
[CPUID_D_1_EAX] = { 0xd, 1, CPUID_EAX},
[CPUID_F_0_EDX] = { 0xf, 0, CPUID_EDX},
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 8079d141792a..b514b2b2845a 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -1046,7 +1046,6 @@ static void fetch_register_operand(struct operand *op)
static void read_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg)
{
- ctxt->ops->get_fpu(ctxt);
switch (reg) {
case 0: asm("movdqa %%xmm0, %0" : "=m"(*data)); break;
case 1: asm("movdqa %%xmm1, %0" : "=m"(*data)); break;
@@ -1068,13 +1067,11 @@ static void read_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg)
#endif
default: BUG();
}
- ctxt->ops->put_fpu(ctxt);
}
static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data,
int reg)
{
- ctxt->ops->get_fpu(ctxt);
switch (reg) {
case 0: asm("movdqa %0, %%xmm0" : : "m"(*data)); break;
case 1: asm("movdqa %0, %%xmm1" : : "m"(*data)); break;
@@ -1096,12 +1093,10 @@ static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data,
#endif
default: BUG();
}
- ctxt->ops->put_fpu(ctxt);
}
static void read_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg)
{
- ctxt->ops->get_fpu(ctxt);
switch (reg) {
case 0: asm("movq %%mm0, %0" : "=m"(*data)); break;
case 1: asm("movq %%mm1, %0" : "=m"(*data)); break;
@@ -1113,12 +1108,10 @@ static void read_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg)
case 7: asm("movq %%mm7, %0" : "=m"(*data)); break;
default: BUG();
}
- ctxt->ops->put_fpu(ctxt);
}
static void write_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg)
{
- ctxt->ops->get_fpu(ctxt);
switch (reg) {
case 0: asm("movq %0, %%mm0" : : "m"(*data)); break;
case 1: asm("movq %0, %%mm1" : : "m"(*data)); break;
@@ -1130,7 +1123,6 @@ static void write_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg)
case 7: asm("movq %0, %%mm7" : : "m"(*data)); break;
default: BUG();
}
- ctxt->ops->put_fpu(ctxt);
}
static int em_fninit(struct x86_emulate_ctxt *ctxt)
@@ -1138,9 +1130,7 @@ static int em_fninit(struct x86_emulate_ctxt *ctxt)
if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
return emulate_nm(ctxt);
- ctxt->ops->get_fpu(ctxt);
asm volatile("fninit");
- ctxt->ops->put_fpu(ctxt);
return X86EMUL_CONTINUE;
}
@@ -1151,9 +1141,7 @@ static int em_fnstcw(struct x86_emulate_ctxt *ctxt)
if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
return emulate_nm(ctxt);
- ctxt->ops->get_fpu(ctxt);
asm volatile("fnstcw %0": "+m"(fcw));
- ctxt->ops->put_fpu(ctxt);
ctxt->dst.val = fcw;
@@ -1167,9 +1155,7 @@ static int em_fnstsw(struct x86_emulate_ctxt *ctxt)
if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
return emulate_nm(ctxt);
- ctxt->ops->get_fpu(ctxt);
asm volatile("fnstsw %0": "+m"(fsw));
- ctxt->ops->put_fpu(ctxt);
ctxt->dst.val = fsw;
@@ -2404,9 +2390,21 @@ static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, u64 smbase, int n)
}
static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
- u64 cr0, u64 cr4)
+ u64 cr0, u64 cr3, u64 cr4)
{
int bad;
+ u64 pcid;
+
+ /* In order to later set CR4.PCIDE, CR3[11:0] must be zero. */
+ pcid = 0;
+ if (cr4 & X86_CR4_PCIDE) {
+ pcid = cr3 & 0xfff;
+ cr3 &= ~0xfff;
+ }
+
+ bad = ctxt->ops->set_cr(ctxt, 3, cr3);
+ if (bad)
+ return X86EMUL_UNHANDLEABLE;
/*
* First enable PAE, long mode needs it before CR0.PG = 1 is set.
@@ -2425,6 +2423,12 @@ static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
bad = ctxt->ops->set_cr(ctxt, 4, cr4);
if (bad)
return X86EMUL_UNHANDLEABLE;
+ if (pcid) {
+ bad = ctxt->ops->set_cr(ctxt, 3, cr3 | pcid);
+ if (bad)
+ return X86EMUL_UNHANDLEABLE;
+ }
+
}
return X86EMUL_CONTINUE;
@@ -2435,11 +2439,11 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, u64 smbase)
struct desc_struct desc;
struct desc_ptr dt;
u16 selector;
- u32 val, cr0, cr4;
+ u32 val, cr0, cr3, cr4;
int i;
cr0 = GET_SMSTATE(u32, smbase, 0x7ffc);
- ctxt->ops->set_cr(ctxt, 3, GET_SMSTATE(u32, smbase, 0x7ff8));
+ cr3 = GET_SMSTATE(u32, smbase, 0x7ff8);
ctxt->eflags = GET_SMSTATE(u32, smbase, 0x7ff4) | X86_EFLAGS_FIXED;
ctxt->_eip = GET_SMSTATE(u32, smbase, 0x7ff0);
@@ -2481,14 +2485,14 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, u64 smbase)
ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7ef8));
- return rsm_enter_protected_mode(ctxt, cr0, cr4);
+ return rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
}
static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
{
struct desc_struct desc;
struct desc_ptr dt;
- u64 val, cr0, cr4;
+ u64 val, cr0, cr3, cr4;
u32 base3;
u16 selector;
int i, r;
@@ -2505,7 +2509,7 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1);
cr0 = GET_SMSTATE(u64, smbase, 0x7f58);
- ctxt->ops->set_cr(ctxt, 3, GET_SMSTATE(u64, smbase, 0x7f50));
+ cr3 = GET_SMSTATE(u64, smbase, 0x7f50);
cr4 = GET_SMSTATE(u64, smbase, 0x7f48);
ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7f00));
val = GET_SMSTATE(u64, smbase, 0x7ed0);
@@ -2533,7 +2537,7 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
dt.address = GET_SMSTATE(u64, smbase, 0x7e68);
ctxt->ops->set_gdt(ctxt, &dt);
- r = rsm_enter_protected_mode(ctxt, cr0, cr4);
+ r = rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
if (r != X86EMUL_CONTINUE)
return r;
@@ -4001,12 +4005,8 @@ static int em_fxsave(struct x86_emulate_ctxt *ctxt)
if (rc != X86EMUL_CONTINUE)
return rc;
- ctxt->ops->get_fpu(ctxt);
-
rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_state));
- ctxt->ops->put_fpu(ctxt);
-
if (rc != X86EMUL_CONTINUE)
return rc;
@@ -4014,6 +4014,26 @@ static int em_fxsave(struct x86_emulate_ctxt *ctxt)
fxstate_size(ctxt));
}
+/*
+ * FXRSTOR might restore XMM registers not provided by the guest. Fill
+ * in the host registers (via FXSAVE) instead, so they won't be modified.
+ * (preemption has to stay disabled until FXRSTOR).
+ *
+ * Use noinline to keep the stack for other functions called by callers small.
+ */
+static noinline int fxregs_fixup(struct fxregs_state *fx_state,
+ const size_t used_size)
+{
+ struct fxregs_state fx_tmp;
+ int rc;
+
+ rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_tmp));
+ memcpy((void *)fx_state + used_size, (void *)&fx_tmp + used_size,
+ __fxstate_size(16) - used_size);
+
+ return rc;
+}
+
static int em_fxrstor(struct x86_emulate_ctxt *ctxt)
{
struct fxregs_state fx_state;
@@ -4024,19 +4044,17 @@ static int em_fxrstor(struct x86_emulate_ctxt *ctxt)
if (rc != X86EMUL_CONTINUE)
return rc;
- ctxt->ops->get_fpu(ctxt);
-
size = fxstate_size(ctxt);
+ rc = segmented_read_std(ctxt, ctxt->memop.addr.mem, &fx_state, size);
+ if (rc != X86EMUL_CONTINUE)
+ return rc;
+
if (size < __fxstate_size(16)) {
- rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_state));
+ rc = fxregs_fixup(&fx_state, size);
if (rc != X86EMUL_CONTINUE)
goto out;
}
- rc = segmented_read_std(ctxt, ctxt->memop.addr.mem, &fx_state, size);
- if (rc != X86EMUL_CONTINUE)
- goto out;
-
if (fx_state.mxcsr >> 16) {
rc = emulate_gp(ctxt, 0);
goto out;
@@ -4046,8 +4064,6 @@ static int em_fxrstor(struct x86_emulate_ctxt *ctxt)
rc = asm_safe("fxrstor %[fx]", : [fx] "m"(fx_state));
out:
- ctxt->ops->put_fpu(ctxt);
-
return rc;
}
@@ -5000,6 +5016,8 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
bool op_prefix = false;
bool has_seg_override = false;
struct opcode opcode;
+ u16 dummy;
+ struct desc_struct desc;
ctxt->memop.type = OP_NONE;
ctxt->memopp = NULL;
@@ -5018,6 +5036,11 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
switch (mode) {
case X86EMUL_MODE_REAL:
case X86EMUL_MODE_VM86:
+ def_op_bytes = def_ad_bytes = 2;
+ ctxt->ops->get_segment(ctxt, &dummy, &desc, NULL, VCPU_SREG_CS);
+ if (desc.d)
+ def_op_bytes = def_ad_bytes = 4;
+ break;
case X86EMUL_MODE_PROT16:
def_op_bytes = def_ad_bytes = 2;
break;
@@ -5290,9 +5313,7 @@ static int flush_pending_x87_faults(struct x86_emulate_ctxt *ctxt)
{
int rc;
- ctxt->ops->get_fpu(ctxt);
rc = asm_safe("fwait");
- ctxt->ops->put_fpu(ctxt);
if (unlikely(rc != X86EMUL_CONTINUE))
return emulate_exception(ctxt, MF_VECTOR, 0, false);
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index bdff437acbcb..4e822ad363f3 100644
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -209,12 +209,12 @@ static int ioapic_set_irq(struct kvm_ioapic *ioapic, unsigned int irq,
old_irr = ioapic->irr;
ioapic->irr |= mask;
- if (edge)
+ if (edge) {
ioapic->irr_delivered &= ~mask;
- if ((edge && old_irr == ioapic->irr) ||
- (!edge && entry.fields.remote_irr)) {
- ret = 0;
- goto out;
+ if (old_irr == ioapic->irr) {
+ ret = 0;
+ goto out;
+ }
}
ret = ioapic_service(ioapic, irq, line_status);
@@ -257,8 +257,7 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, ulong *ioapic_handled_vectors)
index == RTC_GSI) {
if (kvm_apic_match_dest(vcpu, NULL, 0,
e->fields.dest_id, e->fields.dest_mode) ||
- (e->fields.trig_mode == IOAPIC_EDGE_TRIG &&
- kvm_apic_pending_eoi(vcpu, e->fields.vector)))
+ kvm_apic_pending_eoi(vcpu, e->fields.vector))
__set_bit(e->fields.vector,
ioapic_handled_vectors);
}
@@ -277,6 +276,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
{
unsigned index;
bool mask_before, mask_after;
+ int old_remote_irr, old_delivery_status;
union kvm_ioapic_redirect_entry *e;
switch (ioapic->ioregsel) {
@@ -299,14 +299,28 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
return;
e = &ioapic->redirtbl[index];
mask_before = e->fields.mask;
+ /* Preserve read-only fields */
+ old_remote_irr = e->fields.remote_irr;
+ old_delivery_status = e->fields.delivery_status;
if (ioapic->ioregsel & 1) {
e->bits &= 0xffffffff;
e->bits |= (u64) val << 32;
} else {
e->bits &= ~0xffffffffULL;
e->bits |= (u32) val;
- e->fields.remote_irr = 0;
}
+ e->fields.remote_irr = old_remote_irr;
+ e->fields.delivery_status = old_delivery_status;
+
+ /*
+ * Some OSes (Linux, Xen) assume that Remote IRR bit will
+ * be cleared by IOAPIC hardware when the entry is configured
+ * as edge-triggered. This behavior is used to simulate an
+ * explicit EOI on IOAPICs that don't have the EOI register.
+ */
+ if (e->fields.trig_mode == IOAPIC_EDGE_TRIG)
+ e->fields.remote_irr = 0;
+
mask_after = e->fields.mask;
if (mask_before != mask_after)
kvm_fire_mask_notifiers(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index, mask_after);
@@ -324,7 +338,9 @@ static int ioapic_service(struct kvm_ioapic *ioapic, int irq, bool line_status)
struct kvm_lapic_irq irqe;
int ret;
- if (entry->fields.mask)
+ if (entry->fields.mask ||
+ (entry->fields.trig_mode == IOAPIC_LEVEL_TRIG &&
+ entry->fields.remote_irr))
return -1;
ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x "
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 943acbf00c69..e2c1fb8d35ce 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -266,9 +266,14 @@ static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id)
recalculate_apic_map(apic->vcpu->kvm);
}
+static inline u32 kvm_apic_calc_x2apic_ldr(u32 id)
+{
+ return ((id >> 4) << 16) | (1 << (id & 0xf));
+}
+
static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u32 id)
{
- u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf));
+ u32 ldr = kvm_apic_calc_x2apic_ldr(id);
WARN_ON_ONCE(id != apic->vcpu->vcpu_id);
@@ -2245,6 +2250,7 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu,
{
if (apic_x2apic_mode(vcpu->arch.apic)) {
u32 *id = (u32 *)(s->regs + APIC_ID);
+ u32 *ldr = (u32 *)(s->regs + APIC_LDR);
if (vcpu->kvm->arch.x2apic_format) {
if (*id != vcpu->vcpu_id)
@@ -2255,6 +2261,10 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu,
else
*id <<= 24;
}
+
+ /* In x2APIC mode, the LDR is fixed and based on the id */
+ if (set)
+ *ldr = kvm_apic_calc_x2apic_ldr(*id);
}
return 0;
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index e5e66e5c6640..c4deb1f34faa 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3395,7 +3395,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
spin_lock(&vcpu->kvm->mmu_lock);
if(make_mmu_pages_available(vcpu) < 0) {
spin_unlock(&vcpu->kvm->mmu_lock);
- return 1;
+ return -ENOSPC;
}
sp = kvm_mmu_get_page(vcpu, 0, 0,
vcpu->arch.mmu.shadow_root_level, 1, ACC_ALL);
@@ -3410,7 +3410,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
spin_lock(&vcpu->kvm->mmu_lock);
if (make_mmu_pages_available(vcpu) < 0) {
spin_unlock(&vcpu->kvm->mmu_lock);
- return 1;
+ return -ENOSPC;
}
sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT),
i << 30, PT32_ROOT_LEVEL, 1, ACC_ALL);
@@ -3450,7 +3450,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
spin_lock(&vcpu->kvm->mmu_lock);
if (make_mmu_pages_available(vcpu) < 0) {
spin_unlock(&vcpu->kvm->mmu_lock);
- return 1;
+ return -ENOSPC;
}
sp = kvm_mmu_get_page(vcpu, root_gfn, 0,
vcpu->arch.mmu.shadow_root_level, 0, ACC_ALL);
@@ -3487,7 +3487,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
spin_lock(&vcpu->kvm->mmu_lock);
if (make_mmu_pages_available(vcpu) < 0) {
spin_unlock(&vcpu->kvm->mmu_lock);
- return 1;
+ return -ENOSPC;
}
sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, PT32_ROOT_LEVEL,
0, ACC_ALL);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 59e13a79c2e3..eb714f1cdf7e 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -361,6 +361,7 @@ static void recalc_intercepts(struct vcpu_svm *svm)
{
struct vmcb_control_area *c, *h;
struct nested_state *g;
+ u32 h_intercept_exceptions;
mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
@@ -371,9 +372,14 @@ static void recalc_intercepts(struct vcpu_svm *svm)
h = &svm->nested.hsave->control;
g = &svm->nested;
+ /* No need to intercept #UD if L1 doesn't intercept it */
+ h_intercept_exceptions =
+ h->intercept_exceptions & ~(1U << UD_VECTOR);
+
c->intercept_cr = h->intercept_cr | g->intercept_cr;
c->intercept_dr = h->intercept_dr | g->intercept_dr;
- c->intercept_exceptions = h->intercept_exceptions | g->intercept_exceptions;
+ c->intercept_exceptions =
+ h_intercept_exceptions | g->intercept_exceptions;
c->intercept = h->intercept | g->intercept;
}
@@ -2196,7 +2202,10 @@ static int ud_interception(struct vcpu_svm *svm)
{
int er;
+ WARN_ON_ONCE(is_guest_mode(&svm->vcpu));
er = emulate_instruction(&svm->vcpu, EMULTYPE_TRAP_UD);
+ if (er == EMULATE_USER_EXIT)
+ return 0;
if (er != EMULATE_DONE)
kvm_queue_exception(&svm->vcpu, UD_VECTOR);
return 1;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 714a0673ec3c..023afa0c8887 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1887,7 +1887,7 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
{
u32 eb;
- eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) |
+ eb = (1u << PF_VECTOR) | (1u << MC_VECTOR) |
(1u << DB_VECTOR) | (1u << AC_VECTOR);
if ((vcpu->guest_debug &
(KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) ==
@@ -1905,6 +1905,8 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
*/
if (is_guest_mode(vcpu))
eb |= get_vmcs12(vcpu)->exception_bitmap;
+ else
+ eb |= 1u << UD_VECTOR;
vmcs_write32(EXCEPTION_BITMAP, eb);
}
@@ -2300,7 +2302,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
* processors. See 22.2.4.
*/
vmcs_writel(HOST_TR_BASE,
- (unsigned long)this_cpu_ptr(&cpu_tss));
+ (unsigned long)&get_cpu_entry_area(cpu)->tss.x86_tss);
vmcs_writel(HOST_GDTR_BASE, (unsigned long)gdt); /* 22.2.4 */
/*
@@ -5600,7 +5602,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
}
- vmcs_writel(GUEST_RFLAGS, 0x02);
+ kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
kvm_rip_write(vcpu, 0xfff0);
vmcs_writel(GUEST_GDTR_BASE, 0);
@@ -5915,11 +5917,10 @@ static int handle_exception(struct kvm_vcpu *vcpu)
return 1; /* already handled by vmx_vcpu_run() */
if (is_invalid_opcode(intr_info)) {
- if (is_guest_mode(vcpu)) {
- kvm_queue_exception(vcpu, UD_VECTOR);
- return 1;
- }
+ WARN_ON_ONCE(is_guest_mode(vcpu));
er = emulate_instruction(vcpu, EMULTYPE_TRAP_UD);
+ if (er == EMULATE_USER_EXIT)
+ return 0;
if (er != EMULATE_DONE)
kvm_queue_exception(vcpu, UD_VECTOR);
return 1;
@@ -6602,7 +6603,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
if (kvm_test_request(KVM_REQ_EVENT, vcpu))
return 1;
- err = emulate_instruction(vcpu, EMULTYPE_NO_REEXECUTE);
+ err = emulate_instruction(vcpu, 0);
if (err == EMULATE_USER_EXIT) {
++vcpu->stat.mmio_exits;
@@ -6750,16 +6751,10 @@ static __init int hardware_setup(void)
goto out;
}
- vmx_io_bitmap_b = (unsigned long *)__get_free_page(GFP_KERNEL);
memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
- /*
- * Allow direct access to the PC debug port (it is often used for I/O
- * delays, but the vmexits simply slow things down).
- */
memset(vmx_io_bitmap_a, 0xff, PAGE_SIZE);
- clear_bit(0x80, vmx_io_bitmap_a);
memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE);
@@ -7414,10 +7409,11 @@ static inline void nested_release_vmcs12(struct vcpu_vmx *vmx)
*/
static void free_nested(struct vcpu_vmx *vmx)
{
- if (!vmx->nested.vmxon)
+ if (!vmx->nested.vmxon && !vmx->nested.smm.vmxon)
return;
vmx->nested.vmxon = false;
+ vmx->nested.smm.vmxon = false;
free_vpid(vmx->nested.vpid02);
vmx->nested.posted_intr_nv = -1;
vmx->nested.current_vmptr = -1ull;
@@ -9800,8 +9796,7 @@ static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu)
cr4_fixed1_update(X86_CR4_SMEP, ebx, bit(X86_FEATURE_SMEP));
cr4_fixed1_update(X86_CR4_SMAP, ebx, bit(X86_FEATURE_SMAP));
cr4_fixed1_update(X86_CR4_PKE, ecx, bit(X86_FEATURE_PKU));
- /* TODO: Use X86_CR4_UMIP and X86_FEATURE_UMIP macros */
- cr4_fixed1_update(bit(11), ecx, bit(2));
+ cr4_fixed1_update(X86_CR4_UMIP, ecx, bit(X86_FEATURE_UMIP));
#undef cr4_fixed1_update
}
@@ -10875,6 +10870,11 @@ static int check_vmentry_postreqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
return 1;
}
+ if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS) &&
+ (is_noncanonical_address(vmcs12->guest_bndcfgs & PAGE_MASK, vcpu) ||
+ (vmcs12->guest_bndcfgs & MSR_IA32_BNDCFGS_RSVD)))
+ return 1;
+
return 0;
}
@@ -11099,13 +11099,12 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
unsigned long exit_qual;
-
- if (kvm_event_needs_reinjection(vcpu))
- return -EBUSY;
+ bool block_nested_events =
+ vmx->nested.nested_run_pending || kvm_event_needs_reinjection(vcpu);
if (vcpu->arch.exception.pending &&
nested_vmx_check_exception(vcpu, &exit_qual)) {
- if (vmx->nested.nested_run_pending)
+ if (block_nested_events)
return -EBUSY;
nested_vmx_inject_exception_vmexit(vcpu, exit_qual);
vcpu->arch.exception.pending = false;
@@ -11114,14 +11113,14 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
if (nested_cpu_has_preemption_timer(get_vmcs12(vcpu)) &&
vmx->nested.preemption_timer_expired) {
- if (vmx->nested.nested_run_pending)
+ if (block_nested_events)
return -EBUSY;
nested_vmx_vmexit(vcpu, EXIT_REASON_PREEMPTION_TIMER, 0, 0);
return 0;
}
if (vcpu->arch.nmi_pending && nested_exit_on_nmi(vcpu)) {
- if (vmx->nested.nested_run_pending)
+ if (block_nested_events)
return -EBUSY;
nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
NMI_VECTOR | INTR_TYPE_NMI_INTR |
@@ -11137,7 +11136,7 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
if ((kvm_cpu_has_interrupt(vcpu) || external_intr) &&
nested_exit_on_intr(vcpu)) {
- if (vmx->nested.nested_run_pending)
+ if (block_nested_events)
return -EBUSY;
nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, 0, 0);
return 0;
@@ -11324,6 +11323,24 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
kvm_clear_interrupt_queue(vcpu);
}
+static void load_vmcs12_mmu_host_state(struct kvm_vcpu *vcpu,
+ struct vmcs12 *vmcs12)
+{
+ u32 entry_failure_code;
+
+ nested_ept_uninit_mmu_context(vcpu);
+
+ /*
+ * Only PDPTE load can fail as the value of cr3 was checked on entry and
+ * couldn't have changed.
+ */
+ if (nested_vmx_load_cr3(vcpu, vmcs12->host_cr3, false, &entry_failure_code))
+ nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_PDPTE_FAIL);
+
+ if (!enable_ept)
+ vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault;
+}
+
/*
* A part of what we need to when the nested L2 guest exits and we want to
* run its L1 parent, is to reset L1's guest state to the host state specified
@@ -11337,7 +11354,6 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12)
{
struct kvm_segment seg;
- u32 entry_failure_code;
if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER)
vcpu->arch.efer = vmcs12->host_ia32_efer;
@@ -11364,17 +11380,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK);
vmx_set_cr4(vcpu, vmcs12->host_cr4);
- nested_ept_uninit_mmu_context(vcpu);
-
- /*
- * Only PDPTE load can fail as the value of cr3 was checked on entry and
- * couldn't have changed.
- */
- if (nested_vmx_load_cr3(vcpu, vmcs12->host_cr3, false, &entry_failure_code))
- nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_PDPTE_FAIL);
-
- if (!enable_ept)
- vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault;
+ load_vmcs12_mmu_host_state(vcpu, vmcs12);
if (enable_vpid) {
/*
@@ -11604,6 +11610,9 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
* accordingly.
*/
nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
+
+ load_vmcs12_mmu_host_state(vcpu, vmcs12);
+
/*
* The emulated instruction was already skipped in
* nested_vmx_run, but the updated RIP was never
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 34c85aa2e2d1..1cec2c62a0b0 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -107,6 +107,9 @@ EXPORT_SYMBOL_GPL(kvm_x86_ops);
static bool __read_mostly ignore_msrs = 0;
module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR);
+static bool __read_mostly report_ignored_msrs = true;
+module_param(report_ignored_msrs, bool, S_IRUGO | S_IWUSR);
+
unsigned int min_timer_period_us = 500;
module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR);
@@ -1795,10 +1798,13 @@ u64 get_kvmclock_ns(struct kvm *kvm)
/* both __this_cpu_read() and rdtsc() should be on the same cpu */
get_cpu();
- kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL,
- &hv_clock.tsc_shift,
- &hv_clock.tsc_to_system_mul);
- ret = __pvclock_read_cycles(&hv_clock, rdtsc());
+ if (__this_cpu_read(cpu_tsc_khz)) {
+ kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL,
+ &hv_clock.tsc_shift,
+ &hv_clock.tsc_to_system_mul);
+ ret = __pvclock_read_cycles(&hv_clock, rdtsc());
+ } else
+ ret = ktime_get_boot_ns() + ka->kvmclock_offset;
put_cpu();
@@ -1830,6 +1836,9 @@ static void kvm_setup_pvclock_page(struct kvm_vcpu *v)
*/
BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0);
+ if (guest_hv_clock.version & 1)
+ ++guest_hv_clock.version; /* first time write, random junk */
+
vcpu->hv_clock.version = guest_hv_clock.version + 1;
kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
&vcpu->hv_clock,
@@ -2322,7 +2331,9 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
/* Drop writes to this legacy MSR -- see rdmsr
* counterpart for further detail.
*/
- vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n", msr, data);
+ if (report_ignored_msrs)
+ vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n",
+ msr, data);
break;
case MSR_AMD64_OSVW_ID_LENGTH:
if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
@@ -2359,8 +2370,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr, data);
return 1;
} else {
- vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n",
- msr, data);
+ if (report_ignored_msrs)
+ vcpu_unimpl(vcpu,
+ "ignored wrmsr: 0x%x data 0x%llx\n",
+ msr, data);
break;
}
}
@@ -2578,7 +2591,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->index);
return 1;
} else {
- vcpu_unimpl(vcpu, "ignored rdmsr: 0x%x\n", msr_info->index);
+ if (report_ignored_msrs)
+ vcpu_unimpl(vcpu, "ignored rdmsr: 0x%x\n",
+ msr_info->index);
msr_info->data = 0;
}
break;
@@ -2922,7 +2937,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
srcu_read_unlock(&vcpu->kvm->srcu, idx);
pagefault_enable();
kvm_x86_ops->vcpu_put(vcpu);
- kvm_put_guest_fpu(vcpu);
vcpu->arch.last_host_tsc = rdtsc();
}
@@ -4370,7 +4384,7 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
addr, n, v))
&& kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, n, v))
break;
- trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v);
+ trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, v);
handled += n;
addr += n;
len -= n;
@@ -4629,7 +4643,7 @@ static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes)
{
if (vcpu->mmio_read_completed) {
trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
- vcpu->mmio_fragments[0].gpa, *(u64 *)val);
+ vcpu->mmio_fragments[0].gpa, val);
vcpu->mmio_read_completed = 0;
return 1;
}
@@ -4651,14 +4665,14 @@ static int write_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
static int write_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes, void *val)
{
- trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, *(u64 *)val);
+ trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, val);
return vcpu_mmio_write(vcpu, gpa, bytes, val);
}
static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
void *val, int bytes)
{
- trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0);
+ trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, NULL);
return X86EMUL_IO_NEEDED;
}
@@ -5237,17 +5251,6 @@ static void emulator_halt(struct x86_emulate_ctxt *ctxt)
emul_to_vcpu(ctxt)->arch.halt_request = 1;
}
-static void emulator_get_fpu(struct x86_emulate_ctxt *ctxt)
-{
- preempt_disable();
- kvm_load_guest_fpu(emul_to_vcpu(ctxt));
-}
-
-static void emulator_put_fpu(struct x86_emulate_ctxt *ctxt)
-{
- preempt_enable();
-}
-
static int emulator_intercept(struct x86_emulate_ctxt *ctxt,
struct x86_instruction_info *info,
enum x86_intercept_stage stage)
@@ -5325,8 +5328,6 @@ static const struct x86_emulate_ops emulate_ops = {
.halt = emulator_halt,
.wbinvd = emulator_wbinvd,
.fix_hypercall = emulator_fix_hypercall,
- .get_fpu = emulator_get_fpu,
- .put_fpu = emulator_put_fpu,
.intercept = emulator_intercept,
.get_cpuid = emulator_get_cpuid,
.set_nmi_mask = emulator_set_nmi_mask,
@@ -5430,7 +5431,7 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu)
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
vcpu->run->internal.ndata = 0;
- r = EMULATE_FAIL;
+ r = EMULATE_USER_EXIT;
}
kvm_queue_exception(vcpu, UD_VECTOR);
@@ -5722,6 +5723,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
if (reexecute_instruction(vcpu, cr2, write_fault_to_spt,
emulation_type))
return EMULATE_DONE;
+ if (ctxt->have_exception && inject_emulated_exception(vcpu))
+ return EMULATE_DONE;
if (emulation_type & EMULTYPE_SKIP)
return EMULATE_FAIL;
return handle_emulation_failure(vcpu);
@@ -6761,6 +6764,20 @@ static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu)
kvm_x86_ops->tlb_flush(vcpu);
}
+void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
+ unsigned long start, unsigned long end)
+{
+ unsigned long apic_address;
+
+ /*
+ * The physical address of apic access page is stored in the VMCS.
+ * Update it when it becomes invalid.
+ */
+ apic_address = gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
+ if (start <= apic_address && apic_address < end)
+ kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD);
+}
+
void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
{
struct page *page = NULL;
@@ -6935,7 +6952,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
preempt_disable();
kvm_x86_ops->prepare_guest_switch(vcpu);
- kvm_load_guest_fpu(vcpu);
/*
* Disable IRQs before setting IN_GUEST_MODE. Posted interrupt
@@ -7248,14 +7264,11 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
- struct fpu *fpu = &current->thread.fpu;
int r;
- sigset_t sigsaved;
- fpu__initialize(fpu);
+ kvm_sigset_activate(vcpu);
- if (vcpu->sigset_active)
- sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
+ kvm_load_guest_fpu(vcpu);
if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
if (kvm_run->immediate_exit) {
@@ -7297,9 +7310,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
r = vcpu_run(vcpu);
out:
+ kvm_put_guest_fpu(vcpu);
post_kvm_run_save(vcpu);
- if (vcpu->sigset_active)
- sigprocmask(SIG_SETMASK, &sigsaved, NULL);
+ kvm_sigset_deactivate(vcpu);
return r;
}
@@ -7367,7 +7380,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
#endif
kvm_rip_write(vcpu, regs->rip);
- kvm_set_rflags(vcpu, regs->rflags);
+ kvm_set_rflags(vcpu, regs->rflags | X86_EFLAGS_FIXED);
vcpu->arch.exception.pending = false;
@@ -7481,6 +7494,29 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
}
EXPORT_SYMBOL_GPL(kvm_task_switch);
+int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+{
+ if ((sregs->efer & EFER_LME) && (sregs->cr0 & X86_CR0_PG_BIT)) {
+ /*
+ * When EFER.LME and CR0.PG are set, the processor is in
+ * 64-bit mode (though maybe in a 32-bit code segment).
+ * CR4.PAE and EFER.LMA must be set.
+ */
+ if (!(sregs->cr4 & X86_CR4_PAE_BIT)
+ || !(sregs->efer & EFER_LMA))
+ return -EINVAL;
+ } else {
+ /*
+ * Not in 64-bit mode: EFER.LMA is clear and the code
+ * segment cannot be 64-bit.
+ */
+ if (sregs->efer & EFER_LMA || sregs->cs.l)
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
struct kvm_sregs *sregs)
{
@@ -7493,6 +7529,9 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
(sregs->cr4 & X86_CR4_OSXSAVE))
return -EINVAL;
+ if (kvm_valid_sregs(vcpu, sregs))
+ return -EINVAL;
+
apic_base_msr.data = sregs->apic_base;
apic_base_msr.host_initiated = true;
if (kvm_set_apic_base(vcpu, &apic_base_msr))
@@ -7690,32 +7729,25 @@ static void fx_init(struct kvm_vcpu *vcpu)
vcpu->arch.cr0 |= X86_CR0_ET;
}
+/* Swap (qemu) user FPU context for the guest FPU context. */
void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
{
- if (vcpu->guest_fpu_loaded)
- return;
-
- /*
- * Restore all possible states in the guest,
- * and assume host would use all available bits.
- * Guest xcr0 would be loaded later.
- */
- vcpu->guest_fpu_loaded = 1;
- __kernel_fpu_begin();
+ preempt_disable();
+ copy_fpregs_to_fpstate(&vcpu->arch.user_fpu);
/* PKRU is separately restored in kvm_x86_ops->run. */
__copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state,
~XFEATURE_MASK_PKRU);
+ preempt_enable();
trace_kvm_fpu(1);
}
+/* When vcpu_run ends, restore user space FPU context. */
void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
{
- if (!vcpu->guest_fpu_loaded)
- return;
-
- vcpu->guest_fpu_loaded = 0;
+ preempt_disable();
copy_fpregs_to_fpstate(&vcpu->arch.guest_fpu);
- __kernel_fpu_end();
+ copy_kernel_to_fpregs(&vcpu->arch.user_fpu.state);
+ preempt_enable();
++vcpu->stat.fpu_reload;
trace_kvm_fpu(0);
}
@@ -7832,7 +7864,8 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
* To avoid have the INIT path from kvm_apic_has_events() that be
* called with loaded FPU and does not let userspace fix the state.
*/
- kvm_put_guest_fpu(vcpu);
+ if (init_event)
+ kvm_put_guest_fpu(vcpu);
mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu.state.xsave,
XFEATURE_MASK_BNDREGS);
if (mpx_state_buffer)
@@ -7841,6 +7874,8 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
XFEATURE_MASK_BNDCSR);
if (mpx_state_buffer)
memset(mpx_state_buffer, 0, sizeof(struct mpx_bndcsr));
+ if (init_event)
+ kvm_load_guest_fpu(vcpu);
}
if (!init_event) {
diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c
index 553f8fd23cc4..4846eff7e4c8 100644
--- a/arch/x86/lib/delay.c
+++ b/arch/x86/lib/delay.c
@@ -107,10 +107,10 @@ static void delay_mwaitx(unsigned long __loops)
delay = min_t(u64, MWAITX_MAX_LOOPS, loops);
/*
- * Use cpu_tss as a cacheline-aligned, seldomly
+ * Use cpu_tss_rw as a cacheline-aligned, seldomly
* accessed per-cpu variable as the monitor target.
*/
- __monitorx(raw_cpu_ptr(&cpu_tss), 0, 0);
+ __monitorx(raw_cpu_ptr(&cpu_tss_rw), 0, 0);
/*
* AMD, like Intel, supports the EAX hint and EAX=0xf
diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt
index c4d55919fac1..e0b85930dd77 100644
--- a/arch/x86/lib/x86-opcode-map.txt
+++ b/arch/x86/lib/x86-opcode-map.txt
@@ -607,7 +607,7 @@ fb: psubq Pq,Qq | vpsubq Vx,Hx,Wx (66),(v1)
fc: paddb Pq,Qq | vpaddb Vx,Hx,Wx (66),(v1)
fd: paddw Pq,Qq | vpaddw Vx,Hx,Wx (66),(v1)
fe: paddd Pq,Qq | vpaddd Vx,Hx,Wx (66),(v1)
-ff:
+ff: UD0
EndTable
Table: 3-byte opcode 1 (0x0f 0x38)
@@ -717,7 +717,7 @@ AVXcode: 2
7e: vpermt2d/q Vx,Hx,Wx (66),(ev)
7f: vpermt2ps/d Vx,Hx,Wx (66),(ev)
80: INVEPT Gy,Mdq (66)
-81: INVPID Gy,Mdq (66)
+81: INVVPID Gy,Mdq (66)
82: INVPCID Gy,Mdq (66)
83: vpmultishiftqb Vx,Hx,Wx (66),(ev)
88: vexpandps/d Vpd,Wpd (66),(ev)
@@ -970,6 +970,15 @@ GrpTable: Grp9
EndTable
GrpTable: Grp10
+# all are UD1
+0: UD1
+1: UD1
+2: UD1
+3: UD1
+4: UD1
+5: UD1
+6: UD1
+7: UD1
EndTable
# Grp11A and Grp11B are expressed as Grp11 in Intel SDM
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 8e13b8cc6bed..27e9e90a8d35 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -10,7 +10,7 @@ CFLAGS_REMOVE_mem_encrypt.o = -pg
endif
obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
- pat.o pgtable.o physaddr.o setup_nx.o tlb.o
+ pat.o pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o
# Make sure __phys_addr has no stackprotector
nostackp := $(call cc-option, -fno-stack-protector)
@@ -41,9 +41,10 @@ obj-$(CONFIG_AMD_NUMA) += amdtopology.o
obj-$(CONFIG_ACPI_NUMA) += srat.o
obj-$(CONFIG_NUMA_EMU) += numa_emulation.o
-obj-$(CONFIG_X86_INTEL_MPX) += mpx.o
-obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o
-obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o
+obj-$(CONFIG_X86_INTEL_MPX) += mpx.o
+obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o
+obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o
+obj-$(CONFIG_PAGE_TABLE_ISOLATION) += pti.o
obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt.o
obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt_boot.o
diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c
new file mode 100644
index 000000000000..b9283cc27622
--- /dev/null
+++ b/arch/x86/mm/cpu_entry_area.c
@@ -0,0 +1,166 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/spinlock.h>
+#include <linux/percpu.h>
+
+#include <asm/cpu_entry_area.h>
+#include <asm/pgtable.h>
+#include <asm/fixmap.h>
+#include <asm/desc.h>
+
+static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage);
+
+#ifdef CONFIG_X86_64
+static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
+ [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
+#endif
+
+struct cpu_entry_area *get_cpu_entry_area(int cpu)
+{
+ unsigned long va = CPU_ENTRY_AREA_PER_CPU + cpu * CPU_ENTRY_AREA_SIZE;
+ BUILD_BUG_ON(sizeof(struct cpu_entry_area) % PAGE_SIZE != 0);
+
+ return (struct cpu_entry_area *) va;
+}
+EXPORT_SYMBOL(get_cpu_entry_area);
+
+void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags)
+{
+ unsigned long va = (unsigned long) cea_vaddr;
+
+ set_pte_vaddr(va, pfn_pte(pa >> PAGE_SHIFT, flags));
+}
+
+static void __init
+cea_map_percpu_pages(void *cea_vaddr, void *ptr, int pages, pgprot_t prot)
+{
+ for ( ; pages; pages--, cea_vaddr+= PAGE_SIZE, ptr += PAGE_SIZE)
+ cea_set_pte(cea_vaddr, per_cpu_ptr_to_phys(ptr), prot);
+}
+
+static void percpu_setup_debug_store(int cpu)
+{
+#ifdef CONFIG_CPU_SUP_INTEL
+ int npages;
+ void *cea;
+
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
+ return;
+
+ cea = &get_cpu_entry_area(cpu)->cpu_debug_store;
+ npages = sizeof(struct debug_store) / PAGE_SIZE;
+ BUILD_BUG_ON(sizeof(struct debug_store) % PAGE_SIZE != 0);
+ cea_map_percpu_pages(cea, &per_cpu(cpu_debug_store, cpu), npages,
+ PAGE_KERNEL);
+
+ cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers;
+ /*
+ * Force the population of PMDs for not yet allocated per cpu
+ * memory like debug store buffers.
+ */
+ npages = sizeof(struct debug_store_buffers) / PAGE_SIZE;
+ for (; npages; npages--, cea += PAGE_SIZE)
+ cea_set_pte(cea, 0, PAGE_NONE);
+#endif
+}
+
+/* Setup the fixmap mappings only once per-processor */
+static void __init setup_cpu_entry_area(int cpu)
+{
+#ifdef CONFIG_X86_64
+ extern char _entry_trampoline[];
+
+ /* On 64-bit systems, we use a read-only fixmap GDT and TSS. */
+ pgprot_t gdt_prot = PAGE_KERNEL_RO;
+ pgprot_t tss_prot = PAGE_KERNEL_RO;
+#else
+ /*
+ * On native 32-bit systems, the GDT cannot be read-only because
+ * our double fault handler uses a task gate, and entering through
+ * a task gate needs to change an available TSS to busy. If the
+ * GDT is read-only, that will triple fault. The TSS cannot be
+ * read-only because the CPU writes to it on task switches.
+ *
+ * On Xen PV, the GDT must be read-only because the hypervisor
+ * requires it.
+ */
+ pgprot_t gdt_prot = boot_cpu_has(X86_FEATURE_XENPV) ?
+ PAGE_KERNEL_RO : PAGE_KERNEL;
+ pgprot_t tss_prot = PAGE_KERNEL;
+#endif
+
+ cea_set_pte(&get_cpu_entry_area(cpu)->gdt, get_cpu_gdt_paddr(cpu),
+ gdt_prot);
+
+ cea_map_percpu_pages(&get_cpu_entry_area(cpu)->entry_stack_page,
+ per_cpu_ptr(&entry_stack_storage, cpu), 1,
+ PAGE_KERNEL);
+
+ /*
+ * The Intel SDM says (Volume 3, 7.2.1):
+ *
+ * Avoid placing a page boundary in the part of the TSS that the
+ * processor reads during a task switch (the first 104 bytes). The
+ * processor may not correctly perform address translations if a
+ * boundary occurs in this area. During a task switch, the processor
+ * reads and writes into the first 104 bytes of each TSS (using
+ * contiguous physical addresses beginning with the physical address
+ * of the first byte of the TSS). So, after TSS access begins, if
+ * part of the 104 bytes is not physically contiguous, the processor
+ * will access incorrect information without generating a page-fault
+ * exception.
+ *
+ * There are also a lot of errata involving the TSS spanning a page
+ * boundary. Assert that we're not doing that.
+ */
+ BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^
+ offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK);
+ BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0);
+ cea_map_percpu_pages(&get_cpu_entry_area(cpu)->tss,
+ &per_cpu(cpu_tss_rw, cpu),
+ sizeof(struct tss_struct) / PAGE_SIZE, tss_prot);
+
+#ifdef CONFIG_X86_32
+ per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu);
+#endif
+
+#ifdef CONFIG_X86_64
+ BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0);
+ BUILD_BUG_ON(sizeof(exception_stacks) !=
+ sizeof(((struct cpu_entry_area *)0)->exception_stacks));
+ cea_map_percpu_pages(&get_cpu_entry_area(cpu)->exception_stacks,
+ &per_cpu(exception_stacks, cpu),
+ sizeof(exception_stacks) / PAGE_SIZE, PAGE_KERNEL);
+
+ cea_set_pte(&get_cpu_entry_area(cpu)->entry_trampoline,
+ __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX);
+#endif
+ percpu_setup_debug_store(cpu);
+}
+
+static __init void setup_cpu_entry_area_ptes(void)
+{
+#ifdef CONFIG_X86_32
+ unsigned long start, end;
+
+ BUILD_BUG_ON(CPU_ENTRY_AREA_PAGES * PAGE_SIZE < CPU_ENTRY_AREA_MAP_SIZE);
+ BUG_ON(CPU_ENTRY_AREA_BASE & ~PMD_MASK);
+
+ start = CPU_ENTRY_AREA_BASE;
+ end = start + CPU_ENTRY_AREA_MAP_SIZE;
+
+ /* Careful here: start + PMD_SIZE might wrap around */
+ for (; start < end && start >= CPU_ENTRY_AREA_BASE; start += PMD_SIZE)
+ populate_extra_pte(start);
+#endif
+}
+
+void __init setup_cpu_entry_areas(void)
+{
+ unsigned int cpu;
+
+ setup_cpu_entry_area_ptes();
+
+ for_each_possible_cpu(cpu)
+ setup_cpu_entry_area(cpu);
+}
diff --git a/arch/x86/mm/debug_pagetables.c b/arch/x86/mm/debug_pagetables.c
index bfcffdf6c577..421f2664ffa0 100644
--- a/arch/x86/mm/debug_pagetables.c
+++ b/arch/x86/mm/debug_pagetables.c
@@ -5,7 +5,7 @@
static int ptdump_show(struct seq_file *m, void *v)
{
- ptdump_walk_pgd_level(m, NULL);
+ ptdump_walk_pgd_level_debugfs(m, NULL, false);
return 0;
}
@@ -22,21 +22,89 @@ static const struct file_operations ptdump_fops = {
.release = single_release,
};
-static struct dentry *pe;
+static int ptdump_show_curknl(struct seq_file *m, void *v)
+{
+ if (current->mm->pgd) {
+ down_read(&current->mm->mmap_sem);
+ ptdump_walk_pgd_level_debugfs(m, current->mm->pgd, false);
+ up_read(&current->mm->mmap_sem);
+ }
+ return 0;
+}
+
+static int ptdump_open_curknl(struct inode *inode, struct file *filp)
+{
+ return single_open(filp, ptdump_show_curknl, NULL);
+}
+
+static const struct file_operations ptdump_curknl_fops = {
+ .owner = THIS_MODULE,
+ .open = ptdump_open_curknl,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+static struct dentry *pe_curusr;
+
+static int ptdump_show_curusr(struct seq_file *m, void *v)
+{
+ if (current->mm->pgd) {
+ down_read(&current->mm->mmap_sem);
+ ptdump_walk_pgd_level_debugfs(m, current->mm->pgd, true);
+ up_read(&current->mm->mmap_sem);
+ }
+ return 0;
+}
+
+static int ptdump_open_curusr(struct inode *inode, struct file *filp)
+{
+ return single_open(filp, ptdump_show_curusr, NULL);
+}
+
+static const struct file_operations ptdump_curusr_fops = {
+ .owner = THIS_MODULE,
+ .open = ptdump_open_curusr,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+#endif
+
+static struct dentry *dir, *pe_knl, *pe_curknl;
static int __init pt_dump_debug_init(void)
{
- pe = debugfs_create_file("kernel_page_tables", S_IRUSR, NULL, NULL,
- &ptdump_fops);
- if (!pe)
+ dir = debugfs_create_dir("page_tables", NULL);
+ if (!dir)
return -ENOMEM;
+ pe_knl = debugfs_create_file("kernel", 0400, dir, NULL,
+ &ptdump_fops);
+ if (!pe_knl)
+ goto err;
+
+ pe_curknl = debugfs_create_file("current_kernel", 0400,
+ dir, NULL, &ptdump_curknl_fops);
+ if (!pe_curknl)
+ goto err;
+
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ pe_curusr = debugfs_create_file("current_user", 0400,
+ dir, NULL, &ptdump_curusr_fops);
+ if (!pe_curusr)
+ goto err;
+#endif
return 0;
+err:
+ debugfs_remove_recursive(dir);
+ return -ENOMEM;
}
static void __exit pt_dump_debug_exit(void)
{
- debugfs_remove_recursive(pe);
+ debugfs_remove_recursive(dir);
}
module_init(pt_dump_debug_init);
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index 5e3ac6fe6c9e..f56902c1f04b 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -44,68 +44,97 @@ struct addr_marker {
unsigned long max_lines;
};
-/* indices for address_markers; keep sync'd w/ address_markers below */
+/* Address space markers hints */
+
+#ifdef CONFIG_X86_64
+
enum address_markers_idx {
USER_SPACE_NR = 0,
-#ifdef CONFIG_X86_64
KERNEL_SPACE_NR,
LOW_KERNEL_NR,
+#if defined(CONFIG_MODIFY_LDT_SYSCALL) && defined(CONFIG_X86_5LEVEL)
+ LDT_NR,
+#endif
VMALLOC_START_NR,
VMEMMAP_START_NR,
#ifdef CONFIG_KASAN
KASAN_SHADOW_START_NR,
KASAN_SHADOW_END_NR,
#endif
-# ifdef CONFIG_X86_ESPFIX64
+#if defined(CONFIG_MODIFY_LDT_SYSCALL) && !defined(CONFIG_X86_5LEVEL)
+ LDT_NR,
+#endif
+ CPU_ENTRY_AREA_NR,
+#ifdef CONFIG_X86_ESPFIX64
ESPFIX_START_NR,
-# endif
+#endif
+#ifdef CONFIG_EFI
+ EFI_END_NR,
+#endif
HIGH_KERNEL_NR,
MODULES_VADDR_NR,
MODULES_END_NR,
-#else
+ FIXADDR_START_NR,
+ END_OF_SPACE_NR,
+};
+
+static struct addr_marker address_markers[] = {
+ [USER_SPACE_NR] = { 0, "User Space" },
+ [KERNEL_SPACE_NR] = { (1UL << 63), "Kernel Space" },
+ [LOW_KERNEL_NR] = { 0UL, "Low Kernel Mapping" },
+ [VMALLOC_START_NR] = { 0UL, "vmalloc() Area" },
+ [VMEMMAP_START_NR] = { 0UL, "Vmemmap" },
+#ifdef CONFIG_KASAN
+ [KASAN_SHADOW_START_NR] = { KASAN_SHADOW_START, "KASAN shadow" },
+ [KASAN_SHADOW_END_NR] = { KASAN_SHADOW_END, "KASAN shadow end" },
+#endif
+#ifdef CONFIG_MODIFY_LDT_SYSCALL
+ [LDT_NR] = { LDT_BASE_ADDR, "LDT remap" },
+#endif
+ [CPU_ENTRY_AREA_NR] = { CPU_ENTRY_AREA_BASE,"CPU entry Area" },
+#ifdef CONFIG_X86_ESPFIX64
+ [ESPFIX_START_NR] = { ESPFIX_BASE_ADDR, "ESPfix Area", 16 },
+#endif
+#ifdef CONFIG_EFI
+ [EFI_END_NR] = { EFI_VA_END, "EFI Runtime Services" },
+#endif
+ [HIGH_KERNEL_NR] = { __START_KERNEL_map, "High Kernel Mapping" },
+ [MODULES_VADDR_NR] = { MODULES_VADDR, "Modules" },
+ [MODULES_END_NR] = { MODULES_END, "End Modules" },
+ [FIXADDR_START_NR] = { FIXADDR_START, "Fixmap Area" },
+ [END_OF_SPACE_NR] = { -1, NULL }
+};
+
+#else /* CONFIG_X86_64 */
+
+enum address_markers_idx {
+ USER_SPACE_NR = 0,
KERNEL_SPACE_NR,
VMALLOC_START_NR,
VMALLOC_END_NR,
-# ifdef CONFIG_HIGHMEM
+#ifdef CONFIG_HIGHMEM
PKMAP_BASE_NR,
-# endif
- FIXADDR_START_NR,
#endif
+ CPU_ENTRY_AREA_NR,
+ FIXADDR_START_NR,
+ END_OF_SPACE_NR,
};
-/* Address space markers hints */
static struct addr_marker address_markers[] = {
- { 0, "User Space" },
-#ifdef CONFIG_X86_64
- { 0x8000000000000000UL, "Kernel Space" },
- { 0/* PAGE_OFFSET */, "Low Kernel Mapping" },
- { 0/* VMALLOC_START */, "vmalloc() Area" },
- { 0/* VMEMMAP_START */, "Vmemmap" },
-#ifdef CONFIG_KASAN
- { KASAN_SHADOW_START, "KASAN shadow" },
- { KASAN_SHADOW_END, "KASAN shadow end" },
+ [USER_SPACE_NR] = { 0, "User Space" },
+ [KERNEL_SPACE_NR] = { PAGE_OFFSET, "Kernel Mapping" },
+ [VMALLOC_START_NR] = { 0UL, "vmalloc() Area" },
+ [VMALLOC_END_NR] = { 0UL, "vmalloc() End" },
+#ifdef CONFIG_HIGHMEM
+ [PKMAP_BASE_NR] = { 0UL, "Persistent kmap() Area" },
#endif
-# ifdef CONFIG_X86_ESPFIX64
- { ESPFIX_BASE_ADDR, "ESPfix Area", 16 },
-# endif
-# ifdef CONFIG_EFI
- { EFI_VA_END, "EFI Runtime Services" },
-# endif
- { __START_KERNEL_map, "High Kernel Mapping" },
- { MODULES_VADDR, "Modules" },
- { MODULES_END, "End Modules" },
-#else
- { PAGE_OFFSET, "Kernel Mapping" },
- { 0/* VMALLOC_START */, "vmalloc() Area" },
- { 0/*VMALLOC_END*/, "vmalloc() End" },
-# ifdef CONFIG_HIGHMEM
- { 0/*PKMAP_BASE*/, "Persistent kmap() Area" },
-# endif
- { 0/*FIXADDR_START*/, "Fixmap Area" },
-#endif
- { -1, NULL } /* End of list */
+ [CPU_ENTRY_AREA_NR] = { 0UL, "CPU entry area" },
+ [FIXADDR_START_NR] = { 0UL, "Fixmap area" },
+ [END_OF_SPACE_NR] = { -1, NULL }
};
+#endif /* !CONFIG_X86_64 */
+
/* Multipliers for offsets within the PTEs */
#define PTE_LEVEL_MULT (PAGE_SIZE)
#define PMD_LEVEL_MULT (PTRS_PER_PTE * PTE_LEVEL_MULT)
@@ -140,7 +169,7 @@ static void printk_prot(struct seq_file *m, pgprot_t prot, int level, bool dmsg)
static const char * const level_name[] =
{ "cr3", "pgd", "p4d", "pud", "pmd", "pte" };
- if (!pgprot_val(prot)) {
+ if (!(pr & _PAGE_PRESENT)) {
/* Not present */
pt_dump_cont_printf(m, dmsg, " ");
} else {
@@ -447,7 +476,7 @@ static inline bool is_hypervisor_range(int idx)
}
static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
- bool checkwx)
+ bool checkwx, bool dmesg)
{
#ifdef CONFIG_X86_64
pgd_t *start = (pgd_t *) &init_top_pgt;
@@ -460,7 +489,7 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
if (pgd) {
start = pgd;
- st.to_dmesg = true;
+ st.to_dmesg = dmesg;
}
st.check_wx = checkwx;
@@ -498,13 +527,37 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd)
{
- ptdump_walk_pgd_level_core(m, pgd, false);
+ ptdump_walk_pgd_level_core(m, pgd, false, true);
+}
+
+void ptdump_walk_pgd_level_debugfs(struct seq_file *m, pgd_t *pgd, bool user)
+{
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ if (user && static_cpu_has(X86_FEATURE_PTI))
+ pgd = kernel_to_user_pgdp(pgd);
+#endif
+ ptdump_walk_pgd_level_core(m, pgd, false, false);
+}
+EXPORT_SYMBOL_GPL(ptdump_walk_pgd_level_debugfs);
+
+static void ptdump_walk_user_pgd_level_checkwx(void)
+{
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ pgd_t *pgd = (pgd_t *) &init_top_pgt;
+
+ if (!static_cpu_has(X86_FEATURE_PTI))
+ return;
+
+ pr_info("x86/mm: Checking user space page tables\n");
+ pgd = kernel_to_user_pgdp(pgd);
+ ptdump_walk_pgd_level_core(NULL, pgd, true, false);
+#endif
}
-EXPORT_SYMBOL_GPL(ptdump_walk_pgd_level);
void ptdump_walk_pgd_level_checkwx(void)
{
- ptdump_walk_pgd_level_core(NULL, NULL, true);
+ ptdump_walk_pgd_level_core(NULL, NULL, true, false);
+ ptdump_walk_user_pgd_level_checkwx();
}
static int __init pt_dump_init(void)
@@ -525,8 +578,8 @@ static int __init pt_dump_init(void)
address_markers[PKMAP_BASE_NR].start_address = PKMAP_BASE;
# endif
address_markers[FIXADDR_START_NR].start_address = FIXADDR_START;
+ address_markers[CPU_ENTRY_AREA_NR].start_address = CPU_ENTRY_AREA_BASE;
#endif
-
return 0;
}
__initcall(pt_dump_init);
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index 3321b446b66c..9fe656c42aa5 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -1,6 +1,7 @@
#include <linux/extable.h>
#include <linux/uaccess.h>
#include <linux/sched/debug.h>
+#include <xen/xen.h>
#include <asm/fpu/internal.h>
#include <asm/traps.h>
@@ -82,7 +83,7 @@ bool ex_handler_refcount(const struct exception_table_entry *fixup,
return true;
}
-EXPORT_SYMBOL_GPL(ex_handler_refcount);
+EXPORT_SYMBOL(ex_handler_refcount);
/*
* Handler for when we fail to restore a task's FPU state. We should never get
@@ -212,8 +213,9 @@ void __init early_fixup_exception(struct pt_regs *regs, int trapnr)
* Old CPUs leave the high bits of CS on the stack
* undefined. I'm not sure which CPUs do this, but at least
* the 486 DX works this way.
+ * Xen pv domains are not using the default __KERNEL_CS.
*/
- if (regs->cs != __KERNEL_CS)
+ if (!xen_pv_domain() && regs->cs != __KERNEL_CS)
goto fail;
/*
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 78ca9a8ee454..06fe3d51d385 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -701,7 +701,7 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code,
else
printk(KERN_CONT "paging request");
- printk(KERN_CONT " at %p\n", (void *) address);
+ printk(KERN_CONT " at %px\n", (void *) address);
printk(KERN_ALERT "IP: %pS\n", (void *)regs->ip);
dump_pagetable(address);
@@ -860,7 +860,7 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code,
if (!printk_ratelimit())
return;
- printk("%s%s[%d]: segfault at %lx ip %p sp %p error %lx",
+ printk("%s%s[%d]: segfault at %lx ip %px sp %px error %lx",
task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
tsk->comm, task_pid_nr(tsk), address,
(void *)regs->ip, (void *)regs->sp, error_code);
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 6fdf91ef130a..8ca324d07282 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -20,6 +20,7 @@
#include <asm/kaslr.h>
#include <asm/hypervisor.h>
#include <asm/cpufeature.h>
+#include <asm/pti.h>
/*
* We need to define the tracepoints somewhere, and tlb.c
@@ -160,6 +161,12 @@ struct map_range {
static int page_size_mask;
+static void enable_global_pages(void)
+{
+ if (!static_cpu_has(X86_FEATURE_PTI))
+ __supported_pte_mask |= _PAGE_GLOBAL;
+}
+
static void __init probe_page_size_mask(void)
{
/*
@@ -177,11 +184,11 @@ static void __init probe_page_size_mask(void)
cr4_set_bits_and_update_boot(X86_CR4_PSE);
/* Enable PGE if available */
+ __supported_pte_mask &= ~_PAGE_GLOBAL;
if (boot_cpu_has(X86_FEATURE_PGE)) {
cr4_set_bits_and_update_boot(X86_CR4_PGE);
- __supported_pte_mask |= _PAGE_GLOBAL;
- } else
- __supported_pte_mask &= ~_PAGE_GLOBAL;
+ enable_global_pages();
+ }
/* Enable 1 GB linear kernel mappings if available: */
if (direct_gbpages && boot_cpu_has(X86_FEATURE_GBPAGES)) {
@@ -194,34 +201,44 @@ static void __init probe_page_size_mask(void)
static void setup_pcid(void)
{
-#ifdef CONFIG_X86_64
- if (boot_cpu_has(X86_FEATURE_PCID)) {
- if (boot_cpu_has(X86_FEATURE_PGE)) {
- /*
- * This can't be cr4_set_bits_and_update_boot() --
- * the trampoline code can't handle CR4.PCIDE and
- * it wouldn't do any good anyway. Despite the name,
- * cr4_set_bits_and_update_boot() doesn't actually
- * cause the bits in question to remain set all the
- * way through the secondary boot asm.
- *
- * Instead, we brute-force it and set CR4.PCIDE
- * manually in start_secondary().
- */
- cr4_set_bits(X86_CR4_PCIDE);
- } else {
- /*
- * flush_tlb_all(), as currently implemented, won't
- * work if PCID is on but PGE is not. Since that
- * combination doesn't exist on real hardware, there's
- * no reason to try to fully support it, but it's
- * polite to avoid corrupting data if we're on
- * an improperly configured VM.
- */
- setup_clear_cpu_cap(X86_FEATURE_PCID);
- }
+ if (!IS_ENABLED(CONFIG_X86_64))
+ return;
+
+ if (!boot_cpu_has(X86_FEATURE_PCID))
+ return;
+
+ if (boot_cpu_has(X86_FEATURE_PGE)) {
+ /*
+ * This can't be cr4_set_bits_and_update_boot() -- the
+ * trampoline code can't handle CR4.PCIDE and it wouldn't
+ * do any good anyway. Despite the name,
+ * cr4_set_bits_and_update_boot() doesn't actually cause
+ * the bits in question to remain set all the way through
+ * the secondary boot asm.
+ *
+ * Instead, we brute-force it and set CR4.PCIDE manually in
+ * start_secondary().
+ */
+ cr4_set_bits(X86_CR4_PCIDE);
+
+ /*
+ * INVPCID's single-context modes (2/3) only work if we set
+ * X86_CR4_PCIDE, *and* we INVPCID support. It's unusable
+ * on systems that have X86_CR4_PCIDE clear, or that have
+ * no INVPCID support at all.
+ */
+ if (boot_cpu_has(X86_FEATURE_INVPCID))
+ setup_force_cpu_cap(X86_FEATURE_INVPCID_SINGLE);
+ } else {
+ /*
+ * flush_tlb_all(), as currently implemented, won't work if
+ * PCID is on but PGE is not. Since that combination
+ * doesn't exist on real hardware, there's no reason to try
+ * to fully support it, but it's polite to avoid corrupting
+ * data if we're on an improperly configured VM.
+ */
+ setup_clear_cpu_cap(X86_FEATURE_PCID);
}
-#endif
}
#ifdef CONFIG_X86_32
@@ -622,6 +639,7 @@ void __init init_mem_mapping(void)
{
unsigned long end;
+ pti_check_boottime_disable();
probe_page_size_mask();
setup_pcid();
@@ -845,7 +863,7 @@ void __init zone_sizes_init(void)
free_area_init_nodes(max_zone_pfns);
}
-DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = {
+__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = {
.loaded_mm = &init_mm,
.next_asid = 1,
.cr4 = ~0UL, /* fail hard if we screw up cr4 shadow initialization */
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 8a64a6f2848d..135c9a7898c7 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -50,6 +50,7 @@
#include <asm/setup.h>
#include <asm/set_memory.h>
#include <asm/page_types.h>
+#include <asm/cpu_entry_area.h>
#include <asm/init.h>
#include "mm_internal.h"
@@ -766,6 +767,7 @@ void __init mem_init(void)
mem_init_print_info(NULL);
printk(KERN_INFO "virtual kernel memory layout:\n"
" fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
+ " cpu_entry : 0x%08lx - 0x%08lx (%4ld kB)\n"
#ifdef CONFIG_HIGHMEM
" pkmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
#endif
@@ -777,6 +779,10 @@ void __init mem_init(void)
FIXADDR_START, FIXADDR_TOP,
(FIXADDR_TOP - FIXADDR_START) >> 10,
+ CPU_ENTRY_AREA_BASE,
+ CPU_ENTRY_AREA_BASE + CPU_ENTRY_AREA_MAP_SIZE,
+ CPU_ENTRY_AREA_MAP_SIZE >> 10,
+
#ifdef CONFIG_HIGHMEM
PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE,
(LAST_PKMAP*PAGE_SIZE) >> 10,
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 6e4573b1da34..c45b6ec5357b 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -404,11 +404,11 @@ void iounmap(volatile void __iomem *addr)
return;
}
+ mmiotrace_iounmap(addr);
+
addr = (volatile void __iomem *)
(PAGE_MASK & (unsigned long __force)addr);
- mmiotrace_iounmap(addr);
-
/* Use the vm area unlocked, assuming the caller
ensures there isn't another iounmap for the same address
in parallel. Reuse of the virtual address is prevented by
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index 99dfed6dfef8..47388f0c0e59 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -15,6 +15,7 @@
#include <asm/tlbflush.h>
#include <asm/sections.h>
#include <asm/pgtable.h>
+#include <asm/cpu_entry_area.h>
extern struct range pfn_mapped[E820_MAX_ENTRIES];
@@ -277,6 +278,7 @@ void __init kasan_early_init(void)
void __init kasan_init(void)
{
int i;
+ void *shadow_cpu_entry_begin, *shadow_cpu_entry_end;
#ifdef CONFIG_KASAN_INLINE
register_die_notifier(&kasan_die_notifier);
@@ -321,16 +323,33 @@ void __init kasan_init(void)
map_range(&pfn_mapped[i]);
}
+ shadow_cpu_entry_begin = (void *)CPU_ENTRY_AREA_BASE;
+ shadow_cpu_entry_begin = kasan_mem_to_shadow(shadow_cpu_entry_begin);
+ shadow_cpu_entry_begin = (void *)round_down((unsigned long)shadow_cpu_entry_begin,
+ PAGE_SIZE);
+
+ shadow_cpu_entry_end = (void *)(CPU_ENTRY_AREA_BASE +
+ CPU_ENTRY_AREA_MAP_SIZE);
+ shadow_cpu_entry_end = kasan_mem_to_shadow(shadow_cpu_entry_end);
+ shadow_cpu_entry_end = (void *)round_up((unsigned long)shadow_cpu_entry_end,
+ PAGE_SIZE);
+
kasan_populate_zero_shadow(
kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM),
- kasan_mem_to_shadow((void *)__START_KERNEL_map));
+ shadow_cpu_entry_begin);
+
+ kasan_populate_shadow((unsigned long)shadow_cpu_entry_begin,
+ (unsigned long)shadow_cpu_entry_end, 0);
+
+ kasan_populate_zero_shadow(shadow_cpu_entry_end,
+ kasan_mem_to_shadow((void *)__START_KERNEL_map));
kasan_populate_shadow((unsigned long)kasan_mem_to_shadow(_stext),
(unsigned long)kasan_mem_to_shadow(_end),
early_pfn_to_nid(__pa(_stext)));
kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END),
- (void *)KASAN_SHADOW_END);
+ (void *)KASAN_SHADOW_END);
load_cr3(init_top_pgt);
__flush_tlb_all();
diff --git a/arch/x86/mm/kmemcheck/error.c b/arch/x86/mm/kmemcheck/error.c
deleted file mode 100644
index cec594032515..000000000000
--- a/arch/x86/mm/kmemcheck/error.c
+++ /dev/null
@@ -1 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
diff --git a/arch/x86/mm/kmemcheck/error.h b/arch/x86/mm/kmemcheck/error.h
deleted file mode 100644
index ea32a7d3cf1b..000000000000
--- a/arch/x86/mm/kmemcheck/error.h
+++ /dev/null
@@ -1 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
diff --git a/arch/x86/mm/kmemcheck/opcode.c b/arch/x86/mm/kmemcheck/opcode.c
deleted file mode 100644
index cec594032515..000000000000
--- a/arch/x86/mm/kmemcheck/opcode.c
+++ /dev/null
@@ -1 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
diff --git a/arch/x86/mm/kmemcheck/opcode.h b/arch/x86/mm/kmemcheck/opcode.h
deleted file mode 100644
index ea32a7d3cf1b..000000000000
--- a/arch/x86/mm/kmemcheck/opcode.h
+++ /dev/null
@@ -1 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
diff --git a/arch/x86/mm/kmemcheck/pte.c b/arch/x86/mm/kmemcheck/pte.c
deleted file mode 100644
index cec594032515..000000000000
--- a/arch/x86/mm/kmemcheck/pte.c
+++ /dev/null
@@ -1 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
diff --git a/arch/x86/mm/kmemcheck/pte.h b/arch/x86/mm/kmemcheck/pte.h
deleted file mode 100644
index ea32a7d3cf1b..000000000000
--- a/arch/x86/mm/kmemcheck/pte.h
+++ /dev/null
@@ -1 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
diff --git a/arch/x86/mm/kmemcheck/selftest.c b/arch/x86/mm/kmemcheck/selftest.c
deleted file mode 100644
index cec594032515..000000000000
--- a/arch/x86/mm/kmemcheck/selftest.c
+++ /dev/null
@@ -1 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
diff --git a/arch/x86/mm/kmemcheck/selftest.h b/arch/x86/mm/kmemcheck/selftest.h
deleted file mode 100644
index ea32a7d3cf1b..000000000000
--- a/arch/x86/mm/kmemcheck/selftest.h
+++ /dev/null
@@ -1 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
diff --git a/arch/x86/mm/kmemcheck/shadow.h b/arch/x86/mm/kmemcheck/shadow.h
deleted file mode 100644
index ea32a7d3cf1b..000000000000
--- a/arch/x86/mm/kmemcheck/shadow.h
+++ /dev/null
@@ -1 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c
index c21c2ed04612..58477ec3d66d 100644
--- a/arch/x86/mm/kmmio.c
+++ b/arch/x86/mm/kmmio.c
@@ -435,17 +435,18 @@ int register_kmmio_probe(struct kmmio_probe *p)
unsigned long flags;
int ret = 0;
unsigned long size = 0;
+ unsigned long addr = p->addr & PAGE_MASK;
const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK);
unsigned int l;
pte_t *pte;
spin_lock_irqsave(&kmmio_lock, flags);
- if (get_kmmio_probe(p->addr)) {
+ if (get_kmmio_probe(addr)) {
ret = -EEXIST;
goto out;
}
- pte = lookup_address(p->addr, &l);
+ pte = lookup_address(addr, &l);
if (!pte) {
ret = -EINVAL;
goto out;
@@ -454,7 +455,7 @@ int register_kmmio_probe(struct kmmio_probe *p)
kmmio_count++;
list_add_rcu(&p->list, &kmmio_probes);
while (size < size_lim) {
- if (add_kmmio_fault_page(p->addr + size))
+ if (add_kmmio_fault_page(addr + size))
pr_err("Unable to set page fault.\n");
size += page_level_size(l);
}
@@ -528,19 +529,20 @@ void unregister_kmmio_probe(struct kmmio_probe *p)
{
unsigned long flags;
unsigned long size = 0;
+ unsigned long addr = p->addr & PAGE_MASK;
const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK);
struct kmmio_fault_page *release_list = NULL;
struct kmmio_delayed_release *drelease;
unsigned int l;
pte_t *pte;
- pte = lookup_address(p->addr, &l);
+ pte = lookup_address(addr, &l);
if (!pte)
return;
spin_lock_irqsave(&kmmio_lock, flags);
while (size < size_lim) {
- release_kmmio_fault_page(p->addr + size, &release_list);
+ release_kmmio_fault_page(addr + size, &release_list);
size += page_level_size(l);
}
list_del_rcu(&p->list);
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index d9a9e9fc75dd..391b13402e40 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -405,13 +405,13 @@ bool sme_active(void)
{
return sme_me_mask && !sev_enabled;
}
-EXPORT_SYMBOL_GPL(sme_active);
+EXPORT_SYMBOL(sme_active);
bool sev_active(void)
{
return sme_me_mask && sev_enabled;
}
-EXPORT_SYMBOL_GPL(sev_active);
+EXPORT_SYMBOL(sev_active);
static const struct dma_map_ops sev_dma_ops = {
.alloc = sev_alloc,
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 96d456a94b03..004abf9ebf12 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -355,14 +355,15 @@ static inline void _pgd_free(pgd_t *pgd)
kmem_cache_free(pgd_cache, pgd);
}
#else
+
static inline pgd_t *_pgd_alloc(void)
{
- return (pgd_t *)__get_free_page(PGALLOC_GFP);
+ return (pgd_t *)__get_free_pages(PGALLOC_GFP, PGD_ALLOCATION_ORDER);
}
static inline void _pgd_free(pgd_t *pgd)
{
- free_page((unsigned long)pgd);
+ free_pages((unsigned long)pgd, PGD_ALLOCATION_ORDER);
}
#endif /* CONFIG_X86_PAE */
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index 6b9bf023a700..c3c5274410a9 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -10,6 +10,7 @@
#include <linux/pagemap.h>
#include <linux/spinlock.h>
+#include <asm/cpu_entry_area.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
#include <asm/fixmap.h>
diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
new file mode 100644
index 000000000000..bce8aea65606
--- /dev/null
+++ b/arch/x86/mm/pti.c
@@ -0,0 +1,387 @@
+/*
+ * Copyright(c) 2017 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * This code is based in part on work published here:
+ *
+ * https://github.com/IAIK/KAISER
+ *
+ * The original work was written by and and signed off by for the Linux
+ * kernel by:
+ *
+ * Signed-off-by: Richard Fellner <richard.fellner@student.tugraz.at>
+ * Signed-off-by: Moritz Lipp <moritz.lipp@iaik.tugraz.at>
+ * Signed-off-by: Daniel Gruss <daniel.gruss@iaik.tugraz.at>
+ * Signed-off-by: Michael Schwarz <michael.schwarz@iaik.tugraz.at>
+ *
+ * Major changes to the original code by: Dave Hansen <dave.hansen@intel.com>
+ * Mostly rewritten by Thomas Gleixner <tglx@linutronix.de> and
+ * Andy Lutomirsky <luto@amacapital.net>
+ */
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/bug.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/mm.h>
+#include <linux/uaccess.h>
+
+#include <asm/cpufeature.h>
+#include <asm/hypervisor.h>
+#include <asm/vsyscall.h>
+#include <asm/cmdline.h>
+#include <asm/pti.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/tlbflush.h>
+#include <asm/desc.h>
+
+#undef pr_fmt
+#define pr_fmt(fmt) "Kernel/User page tables isolation: " fmt
+
+/* Backporting helper */
+#ifndef __GFP_NOTRACK
+#define __GFP_NOTRACK 0
+#endif
+
+static void __init pti_print_if_insecure(const char *reason)
+{
+ if (boot_cpu_has_bug(X86_BUG_CPU_INSECURE))
+ pr_info("%s\n", reason);
+}
+
+static void __init pti_print_if_secure(const char *reason)
+{
+ if (!boot_cpu_has_bug(X86_BUG_CPU_INSECURE))
+ pr_info("%s\n", reason);
+}
+
+void __init pti_check_boottime_disable(void)
+{
+ char arg[5];
+ int ret;
+
+ if (hypervisor_is_type(X86_HYPER_XEN_PV)) {
+ pti_print_if_insecure("disabled on XEN PV.");
+ return;
+ }
+
+ ret = cmdline_find_option(boot_command_line, "pti", arg, sizeof(arg));
+ if (ret > 0) {
+ if (ret == 3 && !strncmp(arg, "off", 3)) {
+ pti_print_if_insecure("disabled on command line.");
+ return;
+ }
+ if (ret == 2 && !strncmp(arg, "on", 2)) {
+ pti_print_if_secure("force enabled on command line.");
+ goto enable;
+ }
+ if (ret == 4 && !strncmp(arg, "auto", 4))
+ goto autosel;
+ }
+
+ if (cmdline_find_option_bool(boot_command_line, "nopti")) {
+ pti_print_if_insecure("disabled on command line.");
+ return;
+ }
+
+autosel:
+ if (!boot_cpu_has_bug(X86_BUG_CPU_INSECURE))
+ return;
+enable:
+ setup_force_cpu_cap(X86_FEATURE_PTI);
+}
+
+pgd_t __pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd)
+{
+ /*
+ * Changes to the high (kernel) portion of the kernelmode page
+ * tables are not automatically propagated to the usermode tables.
+ *
+ * Users should keep in mind that, unlike the kernelmode tables,
+ * there is no vmalloc_fault equivalent for the usermode tables.
+ * Top-level entries added to init_mm's usermode pgd after boot
+ * will not be automatically propagated to other mms.
+ */
+ if (!pgdp_maps_userspace(pgdp))
+ return pgd;
+
+ /*
+ * The user page tables get the full PGD, accessible from
+ * userspace:
+ */
+ kernel_to_user_pgdp(pgdp)->pgd = pgd.pgd;
+
+ /*
+ * If this is normal user memory, make it NX in the kernel
+ * pagetables so that, if we somehow screw up and return to
+ * usermode with the kernel CR3 loaded, we'll get a page fault
+ * instead of allowing user code to execute with the wrong CR3.
+ *
+ * As exceptions, we don't set NX if:
+ * - _PAGE_USER is not set. This could be an executable
+ * EFI runtime mapping or something similar, and the kernel
+ * may execute from it
+ * - we don't have NX support
+ * - we're clearing the PGD (i.e. the new pgd is not present).
+ */
+ if ((pgd.pgd & (_PAGE_USER|_PAGE_PRESENT)) == (_PAGE_USER|_PAGE_PRESENT) &&
+ (__supported_pte_mask & _PAGE_NX))
+ pgd.pgd |= _PAGE_NX;
+
+ /* return the copy of the PGD we want the kernel to use: */
+ return pgd;
+}
+
+/*
+ * Walk the user copy of the page tables (optionally) trying to allocate
+ * page table pages on the way down.
+ *
+ * Returns a pointer to a P4D on success, or NULL on failure.
+ */
+static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
+{
+ pgd_t *pgd = kernel_to_user_pgdp(pgd_offset_k(address));
+ gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
+
+ if (address < PAGE_OFFSET) {
+ WARN_ONCE(1, "attempt to walk user address\n");
+ return NULL;
+ }
+
+ if (pgd_none(*pgd)) {
+ unsigned long new_p4d_page = __get_free_page(gfp);
+ if (!new_p4d_page)
+ return NULL;
+
+ if (pgd_none(*pgd)) {
+ set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page)));
+ new_p4d_page = 0;
+ }
+ if (new_p4d_page)
+ free_page(new_p4d_page);
+ }
+ BUILD_BUG_ON(pgd_large(*pgd) != 0);
+
+ return p4d_offset(pgd, address);
+}
+
+/*
+ * Walk the user copy of the page tables (optionally) trying to allocate
+ * page table pages on the way down.
+ *
+ * Returns a pointer to a PMD on success, or NULL on failure.
+ */
+static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
+{
+ gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
+ p4d_t *p4d = pti_user_pagetable_walk_p4d(address);
+ pud_t *pud;
+
+ BUILD_BUG_ON(p4d_large(*p4d) != 0);
+ if (p4d_none(*p4d)) {
+ unsigned long new_pud_page = __get_free_page(gfp);
+ if (!new_pud_page)
+ return NULL;
+
+ if (p4d_none(*p4d)) {
+ set_p4d(p4d, __p4d(_KERNPG_TABLE | __pa(new_pud_page)));
+ new_pud_page = 0;
+ }
+ if (new_pud_page)
+ free_page(new_pud_page);
+ }
+
+ pud = pud_offset(p4d, address);
+ /* The user page tables do not use large mappings: */
+ if (pud_large(*pud)) {
+ WARN_ON(1);
+ return NULL;
+ }
+ if (pud_none(*pud)) {
+ unsigned long new_pmd_page = __get_free_page(gfp);
+ if (!new_pmd_page)
+ return NULL;
+
+ if (pud_none(*pud)) {
+ set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page)));
+ new_pmd_page = 0;
+ }
+ if (new_pmd_page)
+ free_page(new_pmd_page);
+ }
+
+ return pmd_offset(pud, address);
+}
+
+#ifdef CONFIG_X86_VSYSCALL_EMULATION
+/*
+ * Walk the shadow copy of the page tables (optionally) trying to allocate
+ * page table pages on the way down. Does not support large pages.
+ *
+ * Note: this is only used when mapping *new* kernel data into the
+ * user/shadow page tables. It is never used for userspace data.
+ *
+ * Returns a pointer to a PTE on success, or NULL on failure.
+ */
+static __init pte_t *pti_user_pagetable_walk_pte(unsigned long address)
+{
+ gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
+ pmd_t *pmd = pti_user_pagetable_walk_pmd(address);
+ pte_t *pte;
+
+ /* We can't do anything sensible if we hit a large mapping. */
+ if (pmd_large(*pmd)) {
+ WARN_ON(1);
+ return NULL;
+ }
+
+ if (pmd_none(*pmd)) {
+ unsigned long new_pte_page = __get_free_page(gfp);
+ if (!new_pte_page)
+ return NULL;
+
+ if (pmd_none(*pmd)) {
+ set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page)));
+ new_pte_page = 0;
+ }
+ if (new_pte_page)
+ free_page(new_pte_page);
+ }
+
+ pte = pte_offset_kernel(pmd, address);
+ if (pte_flags(*pte) & _PAGE_USER) {
+ WARN_ONCE(1, "attempt to walk to user pte\n");
+ return NULL;
+ }
+ return pte;
+}
+
+static void __init pti_setup_vsyscall(void)
+{
+ pte_t *pte, *target_pte;
+ unsigned int level;
+
+ pte = lookup_address(VSYSCALL_ADDR, &level);
+ if (!pte || WARN_ON(level != PG_LEVEL_4K) || pte_none(*pte))
+ return;
+
+ target_pte = pti_user_pagetable_walk_pte(VSYSCALL_ADDR);
+ if (WARN_ON(!target_pte))
+ return;
+
+ *target_pte = *pte;
+ set_vsyscall_pgtable_user_bits(kernel_to_user_pgdp(swapper_pg_dir));
+}
+#else
+static void __init pti_setup_vsyscall(void) { }
+#endif
+
+static void __init
+pti_clone_pmds(unsigned long start, unsigned long end, pmdval_t clear)
+{
+ unsigned long addr;
+
+ /*
+ * Clone the populated PMDs which cover start to end. These PMD areas
+ * can have holes.
+ */
+ for (addr = start; addr < end; addr += PMD_SIZE) {
+ pmd_t *pmd, *target_pmd;
+ pgd_t *pgd;
+ p4d_t *p4d;
+ pud_t *pud;
+
+ pgd = pgd_offset_k(addr);
+ if (WARN_ON(pgd_none(*pgd)))
+ return;
+ p4d = p4d_offset(pgd, addr);
+ if (WARN_ON(p4d_none(*p4d)))
+ return;
+ pud = pud_offset(p4d, addr);
+ if (pud_none(*pud))
+ continue;
+ pmd = pmd_offset(pud, addr);
+ if (pmd_none(*pmd))
+ continue;
+
+ target_pmd = pti_user_pagetable_walk_pmd(addr);
+ if (WARN_ON(!target_pmd))
+ return;
+
+ /*
+ * Copy the PMD. That is, the kernelmode and usermode
+ * tables will share the last-level page tables of this
+ * address range
+ */
+ *target_pmd = pmd_clear_flags(*pmd, clear);
+ }
+}
+
+/*
+ * Clone a single p4d (i.e. a top-level entry on 4-level systems and a
+ * next-level entry on 5-level systems.
+ */
+static void __init pti_clone_p4d(unsigned long addr)
+{
+ p4d_t *kernel_p4d, *user_p4d;
+ pgd_t *kernel_pgd;
+
+ user_p4d = pti_user_pagetable_walk_p4d(addr);
+ kernel_pgd = pgd_offset_k(addr);
+ kernel_p4d = p4d_offset(kernel_pgd, addr);
+ *user_p4d = *kernel_p4d;
+}
+
+/*
+ * Clone the CPU_ENTRY_AREA into the user space visible page table.
+ */
+static void __init pti_clone_user_shared(void)
+{
+ pti_clone_p4d(CPU_ENTRY_AREA_BASE);
+}
+
+/*
+ * Clone the ESPFIX P4D into the user space visinble page table
+ */
+static void __init pti_setup_espfix64(void)
+{
+#ifdef CONFIG_X86_ESPFIX64
+ pti_clone_p4d(ESPFIX_BASE_ADDR);
+#endif
+}
+
+/*
+ * Clone the populated PMDs of the entry and irqentry text and force it RO.
+ */
+static void __init pti_clone_entry_text(void)
+{
+ pti_clone_pmds((unsigned long) __entry_text_start,
+ (unsigned long) __irqentry_text_end, _PAGE_RW);
+}
+
+/*
+ * Initialize kernel page table isolation
+ */
+void __init pti_init(void)
+{
+ if (!static_cpu_has(X86_FEATURE_PTI))
+ return;
+
+ pr_info("enabled\n");
+
+ pti_clone_user_shared();
+ pti_clone_entry_text();
+ pti_setup_espfix64();
+ pti_setup_vsyscall();
+}
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 3118392cdf75..a1561957dccb 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -28,6 +28,38 @@
* Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi
*/
+/*
+ * We get here when we do something requiring a TLB invalidation
+ * but could not go invalidate all of the contexts. We do the
+ * necessary invalidation by clearing out the 'ctx_id' which
+ * forces a TLB flush when the context is loaded.
+ */
+void clear_asid_other(void)
+{
+ u16 asid;
+
+ /*
+ * This is only expected to be set if we have disabled
+ * kernel _PAGE_GLOBAL pages.
+ */
+ if (!static_cpu_has(X86_FEATURE_PTI)) {
+ WARN_ON_ONCE(1);
+ return;
+ }
+
+ for (asid = 0; asid < TLB_NR_DYN_ASIDS; asid++) {
+ /* Do not need to flush the current asid */
+ if (asid == this_cpu_read(cpu_tlbstate.loaded_mm_asid))
+ continue;
+ /*
+ * Make sure the next time we go to switch to
+ * this asid, we do a flush:
+ */
+ this_cpu_write(cpu_tlbstate.ctxs[asid].ctx_id, 0);
+ }
+ this_cpu_write(cpu_tlbstate.invalidate_other, false);
+}
+
atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1);
@@ -42,6 +74,9 @@ static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,
return;
}
+ if (this_cpu_read(cpu_tlbstate.invalidate_other))
+ clear_asid_other();
+
for (asid = 0; asid < TLB_NR_DYN_ASIDS; asid++) {
if (this_cpu_read(cpu_tlbstate.ctxs[asid].ctx_id) !=
next->context.ctx_id)
@@ -65,6 +100,25 @@ static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,
*need_flush = true;
}
+static void load_new_mm_cr3(pgd_t *pgdir, u16 new_asid, bool need_flush)
+{
+ unsigned long new_mm_cr3;
+
+ if (need_flush) {
+ invalidate_user_asid(new_asid);
+ new_mm_cr3 = build_cr3(pgdir, new_asid);
+ } else {
+ new_mm_cr3 = build_cr3_noflush(pgdir, new_asid);
+ }
+
+ /*
+ * Caution: many callers of this function expect
+ * that load_cr3() is serializing and orders TLB
+ * fills with respect to the mm_cpumask writes.
+ */
+ write_cr3(new_mm_cr3);
+}
+
void leave_mm(int cpu)
{
struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
@@ -128,7 +182,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
* isn't free.
*/
#ifdef CONFIG_DEBUG_VM
- if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev, prev_asid))) {
+ if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev->pgd, prev_asid))) {
/*
* If we were to BUG here, we'd be very likely to kill
* the system so hard that we don't see the call trace.
@@ -195,7 +249,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
if (need_flush) {
this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
- write_cr3(build_cr3(next, new_asid));
+ load_new_mm_cr3(next->pgd, new_asid, true);
/*
* NB: This gets called via leave_mm() in the idle path
@@ -208,7 +262,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
} else {
/* The new ASID is already up to date. */
- write_cr3(build_cr3_noflush(next, new_asid));
+ load_new_mm_cr3(next->pgd, new_asid, false);
/* See above wrt _rcuidle. */
trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0);
@@ -288,7 +342,7 @@ void initialize_tlbstate_and_flush(void)
!(cr4_read_shadow() & X86_CR4_PCIDE));
/* Force ASID 0 and force a TLB flush. */
- write_cr3(build_cr3(mm, 0));
+ write_cr3(build_cr3(mm->pgd, 0));
/* Reinitialize tlbstate. */
this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0);
@@ -551,7 +605,7 @@ static void do_kernel_range_flush(void *info)
/* flush range by one by one 'invlpg' */
for (addr = f->start; addr < f->end; addr += PAGE_SIZE)
- __flush_tlb_single(addr);
+ __flush_tlb_one(addr);
}
void flush_tlb_kernel_range(unsigned long start, unsigned long end)
diff --git a/arch/x86/pci/broadcom_bus.c b/arch/x86/pci/broadcom_bus.c
index bb461cfd01ab..526536c81ddc 100644
--- a/arch/x86/pci/broadcom_bus.c
+++ b/arch/x86/pci/broadcom_bus.c
@@ -97,7 +97,7 @@ static int __init broadcom_postcore_init(void)
* We should get host bridge information from ACPI unless the BIOS
* doesn't support it.
*/
- if (acpi_os_get_root_pointer())
+ if (!acpi_disabled && acpi_os_get_root_pointer())
return 0;
#endif
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index 1e996df687a3..e663d6bf1328 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -665,6 +665,16 @@ static void pci_amd_enable_64bit_bar(struct pci_dev *dev)
unsigned i;
u32 base, limit, high;
struct resource *res, *conflict;
+ struct pci_dev *other;
+
+ /* Check that we are the only device of that type */
+ other = pci_get_device(dev->vendor, dev->device, NULL);
+ if (other != dev ||
+ (other = pci_get_device(dev->vendor, dev->device, other))) {
+ /* This is a multi-socket system, don't touch it for now */
+ pci_dev_put(other);
+ return;
+ }
for (i = 0; i < 8; i++) {
pci_read_config_dword(dev, AMD_141b_MMIO_BASE(i), &base);
@@ -696,8 +706,13 @@ static void pci_amd_enable_64bit_bar(struct pci_dev *dev)
res->end = 0xfd00000000ull - 1;
/* Just grab the free area behind system memory for this */
- while ((conflict = request_resource_conflict(&iomem_resource, res)))
+ while ((conflict = request_resource_conflict(&iomem_resource, res))) {
+ if (conflict->end >= res->end) {
+ kfree(res);
+ return;
+ }
res->start = conflict->end + 1;
+ }
dev_info(&dev->dev, "adding root bus resource %pR\n", res);
@@ -714,10 +729,10 @@ static void pci_amd_enable_64bit_bar(struct pci_dev *dev)
pci_bus_add_resource(dev->bus, res, 0);
}
-DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMD, 0x1401, pci_amd_enable_64bit_bar);
-DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMD, 0x141b, pci_amd_enable_64bit_bar);
-DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMD, 0x1571, pci_amd_enable_64bit_bar);
-DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMD, 0x15b1, pci_amd_enable_64bit_bar);
-DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMD, 0x1601, pci_amd_enable_64bit_bar);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x1401, pci_amd_enable_64bit_bar);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x141b, pci_amd_enable_64bit_bar);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x1571, pci_amd_enable_64bit_bar);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x15b1, pci_amd_enable_64bit_bar);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x1601, pci_amd_enable_64bit_bar);
#endif
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 6a151ce70e86..d87ac96e37ed 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -196,6 +196,9 @@ static pgd_t *efi_pgd;
* because we want to avoid inserting EFI region mappings (EFI_VA_END
* to EFI_VA_START) into the standard kernel page tables. Everything
* else can be shared, see efi_sync_low_kernel_mappings().
+ *
+ * We don't want the pgd on the pgd_list and cannot use pgd_alloc() for the
+ * allocation.
*/
int __init efi_alloc_page_tables(void)
{
@@ -208,7 +211,7 @@ int __init efi_alloc_page_tables(void)
return 0;
gfp_mask = GFP_KERNEL | __GFP_ZERO;
- efi_pgd = (pgd_t *)__get_free_page(gfp_mask);
+ efi_pgd = (pgd_t *)__get_free_pages(gfp_mask, PGD_ALLOCATION_ORDER);
if (!efi_pgd)
return -ENOMEM;
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index f44c0bc95aa2..8538a6723171 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -299,7 +299,7 @@ static void bau_process_message(struct msg_desc *mdp, struct bau_control *bcp,
local_flush_tlb();
stat->d_alltlb++;
} else {
- __flush_tlb_one(msg->address);
+ __flush_tlb_single(msg->address);
stat->d_onetlb++;
}
stat->d_requestee++;
diff --git a/arch/x86/platform/uv/uv_irq.c b/arch/x86/platform/uv/uv_irq.c
index 5f6fd860820a..e4cb9f4cde8a 100644
--- a/arch/x86/platform/uv/uv_irq.c
+++ b/arch/x86/platform/uv/uv_irq.c
@@ -128,7 +128,7 @@ static void uv_domain_free(struct irq_domain *domain, unsigned int virq,
* on the specified blade to allow the sending of MSIs to the specified CPU.
*/
static int uv_domain_activate(struct irq_domain *domain,
- struct irq_data *irq_data, bool early)
+ struct irq_data *irq_data, bool reserve)
{
uv_program_mmr(irqd_cfg(irq_data), irq_data->chip_data);
return 0;
diff --git a/arch/x86/platform/uv/uv_nmi.c b/arch/x86/platform/uv/uv_nmi.c
index c34bd8233f7c..5f64f30873e2 100644
--- a/arch/x86/platform/uv/uv_nmi.c
+++ b/arch/x86/platform/uv/uv_nmi.c
@@ -905,7 +905,7 @@ static inline void uv_call_kgdb_kdb(int cpu, struct pt_regs *regs, int master)
/*
* UV NMI handler
*/
-int uv_handle_nmi(unsigned int reason, struct pt_regs *regs)
+static int uv_handle_nmi(unsigned int reason, struct pt_regs *regs)
{
struct uv_hub_nmi_s *hub_nmi = uv_hub_nmi;
int cpu = smp_processor_id();
@@ -1013,7 +1013,7 @@ void uv_nmi_init(void)
}
/* Setup HUB NMI info */
-void __init uv_nmi_setup_common(bool hubbed)
+static void __init uv_nmi_setup_common(bool hubbed)
{
int size = sizeof(void *) * (1 << NODES_SHIFT);
int cpu;
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 84fcfde53f8f..a7d966964c6f 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -82,12 +82,8 @@ static void __save_processor_state(struct saved_context *ctxt)
/*
* descriptor tables
*/
-#ifdef CONFIG_X86_32
store_idt(&ctxt->idt);
-#else
-/* CONFIG_X86_64 */
- store_idt((struct desc_ptr *)&ctxt->idt_limit);
-#endif
+
/*
* We save it here, but restore it only in the hibernate case.
* For ACPI S3 resume, this is loaded via 'early_gdt_desc' in 64-bit
@@ -103,22 +99,18 @@ static void __save_processor_state(struct saved_context *ctxt)
/*
* segment registers
*/
-#ifdef CONFIG_X86_32
- savesegment(es, ctxt->es);
- savesegment(fs, ctxt->fs);
+#ifdef CONFIG_X86_32_LAZY_GS
savesegment(gs, ctxt->gs);
- savesegment(ss, ctxt->ss);
-#else
-/* CONFIG_X86_64 */
- asm volatile ("movw %%ds, %0" : "=m" (ctxt->ds));
- asm volatile ("movw %%es, %0" : "=m" (ctxt->es));
- asm volatile ("movw %%fs, %0" : "=m" (ctxt->fs));
- asm volatile ("movw %%gs, %0" : "=m" (ctxt->gs));
- asm volatile ("movw %%ss, %0" : "=m" (ctxt->ss));
+#endif
+#ifdef CONFIG_X86_64
+ savesegment(gs, ctxt->gs);
+ savesegment(fs, ctxt->fs);
+ savesegment(ds, ctxt->ds);
+ savesegment(es, ctxt->es);
rdmsrl(MSR_FS_BASE, ctxt->fs_base);
- rdmsrl(MSR_GS_BASE, ctxt->gs_base);
- rdmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base);
+ rdmsrl(MSR_GS_BASE, ctxt->kernelmode_gs_base);
+ rdmsrl(MSR_KERNEL_GS_BASE, ctxt->usermode_gs_base);
mtrr_save_fixed_ranges(NULL);
rdmsrl(MSR_EFER, ctxt->efer);
@@ -160,17 +152,19 @@ static void do_fpu_end(void)
static void fix_processor_context(void)
{
int cpu = smp_processor_id();
- struct tss_struct *t = &per_cpu(cpu_tss, cpu);
#ifdef CONFIG_X86_64
struct desc_struct *desc = get_cpu_gdt_rw(cpu);
tss_desc tss;
#endif
- set_tss_desc(cpu, t); /*
- * This just modifies memory; should not be
- * necessary. But... This is necessary, because
- * 386 hardware has concept of busy TSS or some
- * similar stupidity.
- */
+
+ /*
+ * We need to reload TR, which requires that we change the
+ * GDT entry to indicate "available" first.
+ *
+ * XXX: This could probably all be replaced by a call to
+ * force_reload_TR().
+ */
+ set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
#ifdef CONFIG_X86_64
memcpy(&tss, &desc[GDT_ENTRY_TSS], sizeof(tss_desc));
@@ -178,6 +172,9 @@ static void fix_processor_context(void)
write_gdt_entry(desc, GDT_ENTRY_TSS, &tss, DESC_TSS);
syscall_init(); /* This sets MSR_*STAR and related */
+#else
+ if (boot_cpu_has(X86_FEATURE_SEP))
+ enable_sep_cpu();
#endif
load_TR_desc(); /* This does ltr */
load_mm_ldt(current->active_mm); /* This does lldt */
@@ -190,9 +187,12 @@ static void fix_processor_context(void)
}
/**
- * __restore_processor_state - restore the contents of CPU registers saved
- * by __save_processor_state()
- * @ctxt - structure to load the registers contents from
+ * __restore_processor_state - restore the contents of CPU registers saved
+ * by __save_processor_state()
+ * @ctxt - structure to load the registers contents from
+ *
+ * The asm code that gets us here will have restored a usable GDT, although
+ * it will be pointing to the wrong alias.
*/
static void notrace __restore_processor_state(struct saved_context *ctxt)
{
@@ -215,46 +215,52 @@ static void notrace __restore_processor_state(struct saved_context *ctxt)
write_cr2(ctxt->cr2);
write_cr0(ctxt->cr0);
+ /* Restore the IDT. */
+ load_idt(&ctxt->idt);
+
/*
- * now restore the descriptor tables to their proper values
- * ltr is done i fix_processor_context().
+ * Just in case the asm code got us here with the SS, DS, or ES
+ * out of sync with the GDT, update them.
*/
-#ifdef CONFIG_X86_32
- load_idt(&ctxt->idt);
+ loadsegment(ss, __KERNEL_DS);
+ loadsegment(ds, __USER_DS);
+ loadsegment(es, __USER_DS);
+
+ /*
+ * Restore percpu access. Percpu access can happen in exception
+ * handlers or in complicated helpers like load_gs_index().
+ */
+#ifdef CONFIG_X86_64
+ wrmsrl(MSR_GS_BASE, ctxt->kernelmode_gs_base);
#else
-/* CONFIG_X86_64 */
- load_idt((const struct desc_ptr *)&ctxt->idt_limit);
+ loadsegment(fs, __KERNEL_PERCPU);
+ loadsegment(gs, __KERNEL_STACK_CANARY);
#endif
+ /* Restore the TSS, RO GDT, LDT, and usermode-relevant MSRs. */
+ fix_processor_context();
+
/*
- * segment registers
+ * Now that we have descriptor tables fully restored and working
+ * exception handling, restore the usermode segments.
*/
-#ifdef CONFIG_X86_32
+#ifdef CONFIG_X86_64
+ loadsegment(ds, ctxt->es);
loadsegment(es, ctxt->es);
loadsegment(fs, ctxt->fs);
- loadsegment(gs, ctxt->gs);
- loadsegment(ss, ctxt->ss);
+ load_gs_index(ctxt->gs);
/*
- * sysenter MSRs
+ * Restore FSBASE and GSBASE after restoring the selectors, since
+ * restoring the selectors clobbers the bases. Keep in mind
+ * that MSR_KERNEL_GS_BASE is horribly misnamed.
*/
- if (boot_cpu_has(X86_FEATURE_SEP))
- enable_sep_cpu();
-#else
-/* CONFIG_X86_64 */
- asm volatile ("movw %0, %%ds" :: "r" (ctxt->ds));
- asm volatile ("movw %0, %%es" :: "r" (ctxt->es));
- asm volatile ("movw %0, %%fs" :: "r" (ctxt->fs));
- load_gs_index(ctxt->gs);
- asm volatile ("movw %0, %%ss" :: "r" (ctxt->ss));
-
wrmsrl(MSR_FS_BASE, ctxt->fs_base);
- wrmsrl(MSR_GS_BASE, ctxt->gs_base);
- wrmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base);
+ wrmsrl(MSR_KERNEL_GS_BASE, ctxt->usermode_gs_base);
+#elif defined(CONFIG_X86_32_LAZY_GS)
+ loadsegment(gs, ctxt->gs);
#endif
- fix_processor_context();
-
do_fpu_end();
tsc_verify_tsc_adjust(true);
x86_platform.restore_sched_clock_state();
diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c
index 6b830d4cb4c8..de58533d3664 100644
--- a/arch/x86/xen/apic.c
+++ b/arch/x86/xen/apic.c
@@ -57,7 +57,7 @@ static u32 xen_apic_read(u32 reg)
return 0;
if (reg == APIC_LVR)
- return 0x10;
+ return 0x14;
#ifdef CONFIG_X86_32
if (reg == APIC_LDR)
return SET_APIC_LOGICAL_ID(1UL << smp_processor_id());
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index d669e9d89001..c9081c6671f0 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1,8 +1,12 @@
+#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
+#include <linux/bootmem.h>
+#endif
#include <linux/cpu.h>
#include <linux/kexec.h>
#include <xen/features.h>
#include <xen/page.h>
+#include <xen/interface/memory.h>
#include <asm/xen/hypercall.h>
#include <asm/xen/hypervisor.h>
@@ -331,3 +335,80 @@ void xen_arch_unregister_cpu(int num)
}
EXPORT_SYMBOL(xen_arch_unregister_cpu);
#endif
+
+#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
+void __init arch_xen_balloon_init(struct resource *hostmem_resource)
+{
+ struct xen_memory_map memmap;
+ int rc;
+ unsigned int i, last_guest_ram;
+ phys_addr_t max_addr = PFN_PHYS(max_pfn);
+ struct e820_table *xen_e820_table;
+ const struct e820_entry *entry;
+ struct resource *res;
+
+ if (!xen_initial_domain())
+ return;
+
+ xen_e820_table = kmalloc(sizeof(*xen_e820_table), GFP_KERNEL);
+ if (!xen_e820_table)
+ return;
+
+ memmap.nr_entries = ARRAY_SIZE(xen_e820_table->entries);
+ set_xen_guest_handle(memmap.buffer, xen_e820_table->entries);
+ rc = HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap);
+ if (rc) {
+ pr_warn("%s: Can't read host e820 (%d)\n", __func__, rc);
+ goto out;
+ }
+
+ last_guest_ram = 0;
+ for (i = 0; i < memmap.nr_entries; i++) {
+ if (xen_e820_table->entries[i].addr >= max_addr)
+ break;
+ if (xen_e820_table->entries[i].type == E820_TYPE_RAM)
+ last_guest_ram = i;
+ }
+
+ entry = &xen_e820_table->entries[last_guest_ram];
+ if (max_addr >= entry->addr + entry->size)
+ goto out; /* No unallocated host RAM. */
+
+ hostmem_resource->start = max_addr;
+ hostmem_resource->end = entry->addr + entry->size;
+
+ /*
+ * Mark non-RAM regions between the end of dom0 RAM and end of host RAM
+ * as unavailable. The rest of that region can be used for hotplug-based
+ * ballooning.
+ */
+ for (; i < memmap.nr_entries; i++) {
+ entry = &xen_e820_table->entries[i];
+
+ if (entry->type == E820_TYPE_RAM)
+ continue;
+
+ if (entry->addr >= hostmem_resource->end)
+ break;
+
+ res = kzalloc(sizeof(*res), GFP_KERNEL);
+ if (!res)
+ goto out;
+
+ res->name = "Unavailable host RAM";
+ res->start = entry->addr;
+ res->end = (entry->addr + entry->size < hostmem_resource->end) ?
+ entry->addr + entry->size : hostmem_resource->end;
+ rc = insert_resource(hostmem_resource, res);
+ if (rc) {
+ pr_warn("%s: Can't insert [%llx - %llx) (%d)\n",
+ __func__, res->start, res->end, rc);
+ kfree(res);
+ goto out;
+ }
+ }
+
+ out:
+ kfree(xen_e820_table);
+}
+#endif /* CONFIG_XEN_BALLOON_MEMORY_HOTPLUG */
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 5b2b3f3f6531..c047f42552e1 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -88,6 +88,8 @@
#include "multicalls.h"
#include "pmu.h"
+#include "../kernel/cpu/cpu.h" /* get_cpu_cap() */
+
void *xen_initial_gdt;
static int xen_cpu_up_prepare_pv(unsigned int cpu);
@@ -622,7 +624,7 @@ static struct trap_array_entry trap_array[] = {
{ simd_coprocessor_error, xen_simd_coprocessor_error, false },
};
-static bool get_trap_addr(void **addr, unsigned int ist)
+static bool __ref get_trap_addr(void **addr, unsigned int ist)
{
unsigned int nr;
bool ist_okay = false;
@@ -644,6 +646,14 @@ static bool get_trap_addr(void **addr, unsigned int ist)
}
}
+ if (nr == ARRAY_SIZE(trap_array) &&
+ *addr >= (void *)early_idt_handler_array[0] &&
+ *addr < (void *)early_idt_handler_array[NUM_EXCEPTION_VECTORS]) {
+ nr = (*addr - (void *)early_idt_handler_array[0]) /
+ EARLY_IDT_HANDLER_SIZE;
+ *addr = (void *)xen_early_idt_handler_array[nr];
+ }
+
if (WARN_ON(ist != 0 && !ist_okay))
return false;
@@ -818,7 +828,7 @@ static void xen_load_sp0(unsigned long sp0)
mcs = xen_mc_entry(0);
MULTI_stack_switch(mcs.mc, __KERNEL_DS, sp0);
xen_mc_issue(PARAVIRT_LAZY_CPU);
- this_cpu_write(cpu_tss.x86_tss.sp0, sp0);
+ this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0);
}
void xen_set_iopl_mask(unsigned mask)
@@ -1250,6 +1260,7 @@ asmlinkage __visible void __init xen_start_kernel(void)
__userpte_alloc_gfp &= ~__GFP_HIGHMEM;
/* Work out if we support NX */
+ get_cpu_cap(&boot_cpu_data);
x86_configure_nx();
/* Get mfn list */
@@ -1262,6 +1273,21 @@ asmlinkage __visible void __init xen_start_kernel(void)
xen_setup_gdt(0);
xen_init_irq_ops();
+
+ /* Let's presume PV guests always boot on vCPU with id 0. */
+ per_cpu(xen_vcpu_id, 0) = 0;
+
+ /*
+ * Setup xen_vcpu early because idt_setup_early_handler needs it for
+ * local_irq_disable(), irqs_disabled().
+ *
+ * Don't do the full vcpu_info placement stuff until we have
+ * the cpu_possible_mask and a non-dummy shared_info.
+ */
+ xen_vcpu_info_reset(0);
+
+ idt_setup_early_handler();
+
xen_init_capabilities();
#ifdef CONFIG_X86_LOCAL_APIC
@@ -1295,18 +1321,6 @@ asmlinkage __visible void __init xen_start_kernel(void)
*/
acpi_numa = -1;
#endif
- /* Let's presume PV guests always boot on vCPU with id 0. */
- per_cpu(xen_vcpu_id, 0) = 0;
-
- /*
- * Setup xen_vcpu early because start_kernel needs it for
- * local_irq_disable(), irqs_disabled().
- *
- * Don't do the full vcpu_info placement stuff until we have
- * the cpu_possible_mask and a non-dummy shared_info.
- */
- xen_vcpu_info_reset(0);
-
WARN_ON(xen_cpuhp_setup(xen_cpu_up_prepare_pv, xen_cpu_dead_pv));
local_irq_disable();
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index fc048ec686e7..4d62c071b166 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -1902,6 +1902,18 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
/* Graft it onto L4[511][510] */
copy_page(level2_kernel_pgt, l2);
+ /*
+ * Zap execute permission from the ident map. Due to the sharing of
+ * L1 entries we need to do this in the L2.
+ */
+ if (__supported_pte_mask & _PAGE_NX) {
+ for (i = 0; i < PTRS_PER_PMD; ++i) {
+ if (pmd_none(level2_ident_pgt[i]))
+ continue;
+ level2_ident_pgt[i] = pmd_set_flags(level2_ident_pgt[i], _PAGE_NX);
+ }
+ }
+
/* Copy the initial P->M table mappings if necessary. */
i = pgd_index(xen_start_info->mfn_list);
if (i && i < pgd_index(__START_KERNEL_map))
@@ -2261,7 +2273,6 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
switch (idx) {
case FIX_BTMAP_END ... FIX_BTMAP_BEGIN:
- case FIX_RO_IDT:
#ifdef CONFIG_X86_32
case FIX_WP_TEST:
# ifdef CONFIG_HIGHMEM
@@ -2272,7 +2283,6 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
#endif
case FIX_TEXT_POKE0:
case FIX_TEXT_POKE1:
- case FIX_GDT_REMAP_BEGIN ... FIX_GDT_REMAP_END:
/* All local page mappings */
pte = pfn_pte(phys, prot);
break;
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index c114ca767b3b..6e0d2086eacb 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -808,7 +808,6 @@ char * __init xen_memory_setup(void)
addr = xen_e820_table.entries[0].addr;
size = xen_e820_table.entries[0].size;
while (i < xen_e820_table.nr_entries) {
- bool discard = false;
chunk_size = size;
type = xen_e820_table.entries[i].type;
@@ -824,11 +823,10 @@ char * __init xen_memory_setup(void)
xen_add_extra_mem(pfn_s, n_pfns);
xen_max_p2m_pfn = pfn_s + n_pfns;
} else
- discard = true;
+ type = E820_TYPE_UNUSABLE;
}
- if (!discard)
- xen_align_and_add_e820_region(addr, chunk_size, type);
+ xen_align_and_add_e820_region(addr, chunk_size, type);
addr += chunk_size;
size -= chunk_size;
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
index 8a10c9a9e2b5..417b339e5c8e 100644
--- a/arch/x86/xen/xen-asm_64.S
+++ b/arch/x86/xen/xen-asm_64.S
@@ -15,6 +15,7 @@
#include <xen/interface/xen.h>
+#include <linux/init.h>
#include <linux/linkage.h>
.macro xen_pv_trap name
@@ -54,6 +55,19 @@ xen_pv_trap entry_INT80_compat
#endif
xen_pv_trap hypervisor_callback
+ __INIT
+ENTRY(xen_early_idt_handler_array)
+ i = 0
+ .rept NUM_EXCEPTION_VECTORS
+ pop %rcx
+ pop %r11
+ jmp early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE
+ i = i + 1
+ .fill xen_early_idt_handler_array + i*XEN_EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc
+ .endr
+END(xen_early_idt_handler_array)
+ __FINIT
+
hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32
/*
* Xen64 iret frame:
diff --git a/arch/xtensa/include/uapi/asm/Kbuild b/arch/xtensa/include/uapi/asm/Kbuild
index a5bcdfb890f1..837d4dd76785 100644
--- a/arch/xtensa/include/uapi/asm/Kbuild
+++ b/arch/xtensa/include/uapi/asm/Kbuild
@@ -2,6 +2,7 @@
include include/uapi/asm-generic/Kbuild.asm
generic-y += bitsperlong.h
+generic-y += bpf_perf_event.h
generic-y += errno.h
generic-y += fcntl.h
generic-y += ioctl.h