summaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/alpha/include/uapi/asm/Kbuild2
-rw-r--r--arch/arc/include/uapi/asm/Kbuild1
-rw-r--r--arch/arm/Kconfig12
-rw-r--r--arch/arm/Kconfig.debug33
-rw-r--r--arch/arm/Makefile6
-rw-r--r--arch/arm/boot/compressed/string.c5
-rw-r--r--arch/arm/boot/compressed/vmlinux.lds.S8
-rw-r--r--arch/arm/boot/dts/am33xx.dtsi2
-rw-r--r--arch/arm/boot/dts/am4372.dtsi6
-rw-r--r--arch/arm/boot/dts/am437x-cm-t43.dts4
-rw-r--r--arch/arm/boot/dts/armada-385-db-ap.dts1
-rw-r--r--arch/arm/boot/dts/armada-385-linksys.dtsi1
-rw-r--r--arch/arm/boot/dts/armada-385-synology-ds116.dts2
-rw-r--r--arch/arm/boot/dts/armada-388-gp.dts2
-rw-r--r--arch/arm/boot/dts/bcm-nsp.dtsi4
-rw-r--r--arch/arm/boot/dts/bcm283x.dtsi1
-rw-r--r--arch/arm/boot/dts/bcm958623hr.dts4
-rw-r--r--arch/arm/boot/dts/bcm958625hr.dts4
-rw-r--r--arch/arm/boot/dts/dm814x.dtsi2
-rw-r--r--arch/arm/boot/dts/imx53.dtsi9
-rw-r--r--arch/arm/boot/dts/logicpd-som-lv-37xx-devkit.dts3
-rw-r--r--arch/arm/boot/dts/logicpd-som-lv.dtsi17
-rw-r--r--arch/arm/boot/dts/meson.dtsi18
-rw-r--r--arch/arm/boot/dts/nspire.dtsi1
-rw-r--r--arch/arm/boot/dts/omap3-beagle-xm.dts1
-rw-r--r--arch/arm/boot/dts/omap3-beagle.dts1
-rw-r--r--arch/arm/boot/dts/omap3-cm-t3x.dtsi2
-rw-r--r--arch/arm/boot/dts/omap3-evm-common.dtsi1
-rw-r--r--arch/arm/boot/dts/omap3-gta04.dtsi1
-rw-r--r--arch/arm/boot/dts/omap3-igep0020-common.dtsi1
-rw-r--r--arch/arm/boot/dts/omap3-igep0030-common.dtsi1
-rw-r--r--arch/arm/boot/dts/omap3-lilly-a83x.dtsi1
-rw-r--r--arch/arm/boot/dts/omap3-overo-base.dtsi1
-rw-r--r--arch/arm/boot/dts/omap3-pandora-common.dtsi1
-rw-r--r--arch/arm/boot/dts/omap3-tao3530.dtsi1
-rw-r--r--arch/arm/boot/dts/omap3.dtsi1
-rw-r--r--arch/arm/boot/dts/omap4-droid4-xt894.dts1
-rw-r--r--arch/arm/boot/dts/omap4-duovero.dtsi1
-rw-r--r--arch/arm/boot/dts/omap4-panda-common.dtsi1
-rw-r--r--arch/arm/boot/dts/omap4-var-som-om44.dtsi1
-rw-r--r--arch/arm/boot/dts/omap4.dtsi5
-rw-r--r--arch/arm/boot/dts/omap5-board-common.dtsi2
-rw-r--r--arch/arm/boot/dts/omap5-cm-t54.dts2
-rw-r--r--arch/arm/boot/dts/omap5.dtsi1
-rw-r--r--arch/arm/boot/dts/r8a7790.dtsi1
-rw-r--r--arch/arm/boot/dts/r8a7792.dtsi1
-rw-r--r--arch/arm/boot/dts/r8a7793.dtsi1
-rw-r--r--arch/arm/boot/dts/r8a7794.dtsi1
-rw-r--r--arch/arm/boot/dts/vf610-zii-dev-rev-c.dts6
-rw-r--r--arch/arm/common/bL_switcher_dummy_if.c4
-rw-r--r--arch/arm/common/sa1111.c328
-rw-r--r--arch/arm/include/asm/exception.h3
-rw-r--r--arch/arm/include/asm/glue-cache.h4
-rw-r--r--arch/arm/include/asm/hardware/cache-b15-rac.h10
-rw-r--r--arch/arm/include/asm/hardware/sa1111.h32
-rw-r--r--arch/arm/include/asm/kvm_arm.h3
-rw-r--r--arch/arm/include/asm/kvm_host.h5
-rw-r--r--arch/arm/include/asm/memory.h1
-rw-r--r--arch/arm/include/asm/ptdump.h43
-rw-r--r--arch/arm/include/asm/sections.h21
-rw-r--r--arch/arm/include/asm/string.h14
-rw-r--r--arch/arm/include/asm/traps.h12
-rw-r--r--arch/arm/include/asm/unified.h77
-rw-r--r--arch/arm/include/uapi/asm/Kbuild1
-rw-r--r--arch/arm/kernel/armksyms.c1
-rw-r--r--arch/arm/kernel/entry-armv.S6
-rw-r--r--arch/arm/kernel/entry-common.S1
-rw-r--r--arch/arm/kernel/head-common.S5
-rw-r--r--arch/arm/kernel/hw_breakpoint.c10
-rw-r--r--arch/arm/kernel/smp.c3
-rw-r--r--arch/arm/kernel/stacktrace.c14
-rw-r--r--arch/arm/kernel/traps.c4
-rw-r--r--arch/arm/kernel/vmlinux-xip.lds.S6
-rw-r--r--arch/arm/kernel/vmlinux.lds.S6
-rw-r--r--arch/arm/lib/Makefile2
-rw-r--r--arch/arm/lib/memzero.S137
-rw-r--r--arch/arm/mach-meson/platsmp.c2
-rw-r--r--arch/arm/mach-omap2/cm_common.c6
-rw-r--r--arch/arm/mach-omap2/omap-secure.c21
-rw-r--r--arch/arm/mach-omap2/omap-secure.h4
-rw-r--r--arch/arm/mach-omap2/omap_device.c10
-rw-r--r--arch/arm/mach-omap2/omap_hwmod_3xxx_data.c1
-rw-r--r--arch/arm/mach-omap2/pm.h4
-rw-r--r--arch/arm/mach-omap2/pm34xx.c13
-rw-r--r--arch/arm/mach-omap2/prcm-common.h1
-rw-r--r--arch/arm/mach-omap2/prm33xx.c12
-rw-r--r--arch/arm/mach-omap2/sleep34xx.S26
-rw-r--r--arch/arm/mach-sa1100/Kconfig1
-rw-r--r--arch/arm/mach-sa1100/assabet.c71
-rw-r--r--arch/arm/mach-sa1100/neponset.c159
-rw-r--r--arch/arm/mm/Kconfig8
-rw-r--r--arch/arm/mm/Makefile4
-rw-r--r--arch/arm/mm/cache-b15-rac.c356
-rw-r--r--arch/arm/mm/cache-v7.S21
-rw-r--r--arch/arm/mm/dump.c151
-rw-r--r--arch/arm/mm/fault.c5
-rw-r--r--arch/arm/mm/idmap.c4
-rw-r--r--arch/arm/mm/init.c2
-rw-r--r--arch/arm/mm/nommu.c4
-rw-r--r--arch/arm/mm/pmsa-v7.c4
-rw-r--r--arch/arm/mm/proc-v7.S6
-rw-r--r--arch/arm/mm/ptdump_debugfs.c34
-rw-r--r--arch/arm/probes/kprobes/core.c14
-rw-r--r--arch/arm64/Kconfig12
-rw-r--r--arch/arm64/boot/dts/Makefile2
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi4
-rw-r--r--arch/arm64/boot/dts/amlogic/meson-gxl.dtsi6
-rw-r--r--arch/arm64/boot/dts/socionext/uniphier-ld11-ref.dts1
-rw-r--r--arch/arm64/boot/dts/socionext/uniphier-ld20-ref.dts1
-rw-r--r--arch/arm64/boot/dts/socionext/uniphier-pxs3-ref.dts3
-rw-r--r--arch/arm64/include/asm/assembler.h10
-rw-r--r--arch/arm64/include/asm/cpufeature.h3
-rw-r--r--arch/arm64/include/asm/cputype.h2
-rw-r--r--arch/arm64/include/asm/efi.h4
-rw-r--r--arch/arm64/include/asm/kvm_arm.h3
-rw-r--r--arch/arm64/include/asm/kvm_host.h1
-rw-r--r--arch/arm64/include/asm/mmu_context.h46
-rw-r--r--arch/arm64/include/asm/perf_event.h2
-rw-r--r--arch/arm64/include/asm/pgtable.h41
-rw-r--r--arch/arm64/include/uapi/asm/bpf_perf_event.h9
-rw-r--r--arch/arm64/kernel/cpu-reset.S1
-rw-r--r--arch/arm64/kernel/cpufeature.c3
-rw-r--r--arch/arm64/kernel/efi-entry.S2
-rw-r--r--arch/arm64/kernel/fpsimd.c53
-rw-r--r--arch/arm64/kernel/head.S1
-rw-r--r--arch/arm64/kernel/hw_breakpoint.c2
-rw-r--r--arch/arm64/kernel/process.c9
-rw-r--r--arch/arm64/kernel/relocate_kernel.S1
-rw-r--r--arch/arm64/kvm/debug.c21
-rw-r--r--arch/arm64/kvm/handle_exit.c57
-rw-r--r--arch/arm64/kvm/hyp-init.S1
-rw-r--r--arch/arm64/kvm/hyp/debug-sr.c3
-rw-r--r--arch/arm64/kvm/hyp/switch.c37
-rw-r--r--arch/arm64/mm/dump.c2
-rw-r--r--arch/arm64/mm/fault.c5
-rw-r--r--arch/arm64/mm/init.c3
-rw-r--r--arch/blackfin/include/uapi/asm/Kbuild1
-rw-r--r--arch/c6x/include/uapi/asm/Kbuild1
-rw-r--r--arch/cris/include/uapi/asm/Kbuild1
-rw-r--r--arch/frv/include/uapi/asm/Kbuild2
-rw-r--r--arch/h8300/include/uapi/asm/Kbuild1
-rw-r--r--arch/hexagon/include/uapi/asm/Kbuild1
-rw-r--r--arch/ia64/include/uapi/asm/Kbuild1
-rw-r--r--arch/m32r/include/uapi/asm/Kbuild1
-rw-r--r--arch/m68k/configs/stmark2_defconfig1
-rw-r--r--arch/m68k/include/uapi/asm/Kbuild1
-rw-r--r--arch/m68k/kernel/vmlinux-nommu.lds2
-rw-r--r--arch/m68k/kernel/vmlinux-std.lds2
-rw-r--r--arch/m68k/kernel/vmlinux-sun3.lds2
-rw-r--r--arch/metag/include/uapi/asm/Kbuild1
-rw-r--r--arch/microblaze/include/uapi/asm/Kbuild1
-rw-r--r--arch/mips/include/asm/Kbuild1
-rw-r--r--arch/mips/include/asm/serial.h22
-rw-r--r--arch/mips/include/uapi/asm/Kbuild1
-rw-r--r--arch/mn10300/include/uapi/asm/Kbuild1
-rw-r--r--arch/nios2/include/uapi/asm/Kbuild1
-rw-r--r--arch/openrisc/include/uapi/asm/Kbuild1
-rw-r--r--arch/parisc/boot/compressed/misc.c4
-rw-r--r--arch/parisc/include/asm/thread_info.h5
-rw-r--r--arch/parisc/include/uapi/asm/Kbuild1
-rw-r--r--arch/parisc/kernel/entry.S12
-rw-r--r--arch/parisc/kernel/hpmc.S1
-rw-r--r--arch/parisc/kernel/unwind.c1
-rw-r--r--arch/parisc/lib/delay.c2
-rw-r--r--arch/powerpc/include/asm/machdep.h1
-rw-r--r--arch/powerpc/include/asm/mmu_context.h5
-rw-r--r--arch/powerpc/include/asm/setup.h1
-rw-r--r--arch/powerpc/include/uapi/asm/Kbuild1
-rw-r--r--arch/powerpc/kernel/cpu_setup_power.S2
-rw-r--r--arch/powerpc/kernel/fadump.c22
-rw-r--r--arch/powerpc/kernel/process.c2
-rw-r--r--arch/powerpc/kernel/setup-common.c27
-rw-r--r--arch/powerpc/kvm/book3s_xive.c7
-rw-r--r--arch/powerpc/net/bpf_jit_comp64.c6
-rw-r--r--arch/powerpc/perf/core-book3s.c12
-rw-r--r--arch/powerpc/perf/imc-pmu.c17
-rw-r--r--arch/powerpc/platforms/ps3/setup.c15
-rw-r--r--arch/powerpc/platforms/pseries/setup.c1
-rw-r--r--arch/powerpc/sysdev/fsl_msi.c4
-rw-r--r--arch/powerpc/xmon/xmon.c10
-rw-r--r--arch/riscv/include/asm/barrier.h19
-rw-r--r--arch/riscv/include/uapi/asm/Kbuild1
-rw-r--r--arch/riscv/kernel/setup.c11
-rw-r--r--arch/riscv/kernel/sys_riscv.c2
-rw-r--r--arch/s390/Kbuild1
-rw-r--r--arch/s390/appldata/Makefile1
-rw-r--r--arch/s390/boot/compressed/vmlinux.scr1
-rw-r--r--arch/s390/crypto/sha1_s390.c7
-rw-r--r--arch/s390/hypfs/Makefile1
-rw-r--r--arch/s390/include/asm/Kbuild1
-rw-r--r--arch/s390/include/asm/alternative.h1
-rw-r--r--arch/s390/include/asm/ap.h5
-rw-r--r--arch/s390/include/asm/bugs.h1
-rw-r--r--arch/s390/include/asm/perf_event.h1
-rw-r--r--arch/s390/include/asm/pgtable.h6
-rw-r--r--arch/s390/include/asm/ptrace.h11
-rw-r--r--arch/s390/include/asm/segment.h1
-rw-r--r--arch/s390/include/asm/switch_to.h27
-rw-r--r--arch/s390/include/asm/vga.h1
-rw-r--r--arch/s390/include/uapi/asm/Kbuild1
-rw-r--r--arch/s390/include/uapi/asm/bpf_perf_event.h9
-rw-r--r--arch/s390/include/uapi/asm/perf_regs.h1
-rw-r--r--arch/s390/include/uapi/asm/ptrace.h125
-rw-r--r--arch/s390/include/uapi/asm/sthyi.h1
-rw-r--r--arch/s390/include/uapi/asm/virtio-ccw.h2
-rw-r--r--arch/s390/include/uapi/asm/vmcp.h1
-rw-r--r--arch/s390/kernel/alternative.c1
-rw-r--r--arch/s390/kernel/compat_linux.c1
-rw-r--r--arch/s390/kernel/perf_regs.c1
-rw-r--r--arch/s390/kernel/syscalls.S6
-rw-r--r--arch/s390/kernel/vdso64/note.S1
-rw-r--r--arch/s390/kvm/Makefile5
-rw-r--r--arch/s390/kvm/diag.c5
-rw-r--r--arch/s390/kvm/gaccess.h5
-rw-r--r--arch/s390/kvm/guestdbg.c5
-rw-r--r--arch/s390/kvm/intercept.c5
-rw-r--r--arch/s390/kvm/interrupt.c5
-rw-r--r--arch/s390/kvm/irq.h5
-rw-r--r--arch/s390/kvm/kvm-s390.c11
-rw-r--r--arch/s390/kvm/kvm-s390.h5
-rw-r--r--arch/s390/kvm/priv.c16
-rw-r--r--arch/s390/kvm/sigp.c5
-rw-r--r--arch/s390/kvm/vsie.c5
-rw-r--r--arch/s390/mm/pgalloc.c2
-rw-r--r--arch/s390/net/Makefile1
-rw-r--r--arch/s390/net/bpf_jit_comp.c11
-rw-r--r--arch/s390/numa/Makefile1
-rw-r--r--arch/s390/pci/Makefile1
-rw-r--r--arch/s390/tools/gen_opcode_table.c1
-rw-r--r--arch/score/include/uapi/asm/Kbuild1
-rw-r--r--arch/sh/include/uapi/asm/Kbuild1
-rw-r--r--arch/sparc/include/uapi/asm/Kbuild1
-rw-r--r--arch/sparc/lib/hweight.S4
-rw-r--r--arch/sparc/mm/fault_32.c2
-rw-r--r--arch/sparc/mm/fault_64.c2
-rw-r--r--arch/sparc/mm/gup.c4
-rw-r--r--arch/sparc/net/bpf_jit_comp_64.c6
-rw-r--r--arch/tile/include/uapi/asm/Kbuild1
-rw-r--r--arch/um/include/asm/Kbuild1
-rw-r--r--arch/um/include/asm/mmu_context.h3
-rw-r--r--arch/um/kernel/trap.c2
-rw-r--r--arch/unicore32/include/asm/mmu_context.h5
-rw-r--r--arch/unicore32/include/uapi/asm/Kbuild1
-rw-r--r--arch/x86/Kconfig3
-rw-r--r--arch/x86/Kconfig.debug1
-rw-r--r--arch/x86/boot/compressed/Makefile1
-rw-r--r--arch/x86/boot/compressed/head_64.S16
-rw-r--r--arch/x86/boot/compressed/misc.c16
-rw-r--r--arch/x86/boot/compressed/pagetable.c3
-rw-r--r--arch/x86/boot/compressed/pgtable_64.c28
-rw-r--r--arch/x86/boot/genimage.sh32
-rw-r--r--arch/x86/crypto/salsa20_glue.c7
-rw-r--r--arch/x86/entry/calling.h145
-rw-r--r--arch/x86/entry/entry_32.S14
-rw-r--r--arch/x86/entry/entry_64.S237
-rw-r--r--arch/x86/entry/entry_64_compat.S31
-rw-r--r--arch/x86/entry/vdso/vclock_gettime.c2
-rw-r--r--arch/x86/entry/vsyscall/vsyscall_64.c38
-rw-r--r--arch/x86/events/intel/core.c5
-rw-r--r--arch/x86/events/intel/ds.c130
-rw-r--r--arch/x86/events/perf_event.h23
-rw-r--r--arch/x86/include/asm/asm.h2
-rw-r--r--arch/x86/include/asm/cpu_entry_area.h81
-rw-r--r--arch/x86/include/asm/cpufeature.h2
-rw-r--r--arch/x86/include/asm/cpufeatures.h5
-rw-r--r--arch/x86/include/asm/desc.h14
-rw-r--r--arch/x86/include/asm/disabled-features.h8
-rw-r--r--arch/x86/include/asm/espfix.h7
-rw-r--r--arch/x86/include/asm/fixmap.h7
-rw-r--r--arch/x86/include/asm/hypervisor.h25
-rw-r--r--arch/x86/include/asm/intel_ds.h36
-rw-r--r--arch/x86/include/asm/invpcid.h53
-rw-r--r--arch/x86/include/asm/irqdomain.h2
-rw-r--r--arch/x86/include/asm/irqflags.h3
-rw-r--r--arch/x86/include/asm/kdebug.h1
-rw-r--r--arch/x86/include/asm/kmemcheck.h1
-rw-r--r--arch/x86/include/asm/kvm_emulate.h2
-rw-r--r--arch/x86/include/asm/kvm_host.h16
-rw-r--r--arch/x86/include/asm/mmu.h4
-rw-r--r--arch/x86/include/asm/mmu_context.h113
-rw-r--r--arch/x86/include/asm/paravirt.h9
-rw-r--r--arch/x86/include/asm/pgalloc.h11
-rw-r--r--arch/x86/include/asm/pgtable.h30
-rw-r--r--arch/x86/include/asm/pgtable_32_types.h15
-rw-r--r--arch/x86/include/asm/pgtable_64.h92
-rw-r--r--arch/x86/include/asm/pgtable_64_types.h51
-rw-r--r--arch/x86/include/asm/processor-flags.h5
-rw-r--r--arch/x86/include/asm/processor.h82
-rw-r--r--arch/x86/include/asm/pti.h14
-rw-r--r--arch/x86/include/asm/segment.h12
-rw-r--r--arch/x86/include/asm/stacktrace.h3
-rw-r--r--arch/x86/include/asm/suspend_32.h8
-rw-r--r--arch/x86/include/asm/suspend_64.h19
-rw-r--r--arch/x86/include/asm/switch_to.h13
-rw-r--r--arch/x86/include/asm/thread_info.h2
-rw-r--r--arch/x86/include/asm/tlbflush.h347
-rw-r--r--arch/x86/include/asm/trace/irq_vectors.h16
-rw-r--r--arch/x86/include/asm/traps.h1
-rw-r--r--arch/x86/include/asm/unwind.h7
-rw-r--r--arch/x86/include/asm/vsyscall.h1
-rw-r--r--arch/x86/include/uapi/asm/Kbuild1
-rw-r--r--arch/x86/include/uapi/asm/processor-flags.h7
-rw-r--r--arch/x86/kernel/apic/apic.c2
-rw-r--r--arch/x86/kernel/apic/apic_flat_64.c2
-rw-r--r--arch/x86/kernel/apic/apic_noop.c2
-rw-r--r--arch/x86/kernel/apic/io_apic.c2
-rw-r--r--arch/x86/kernel/apic/msi.c8
-rw-r--r--arch/x86/kernel/apic/probe_32.c2
-rw-r--r--arch/x86/kernel/apic/vector.c24
-rw-r--r--arch/x86/kernel/apic/x2apic_cluster.c2
-rw-r--r--arch/x86/kernel/asm-offsets.c10
-rw-r--r--arch/x86/kernel/asm-offsets_32.c9
-rw-r--r--arch/x86/kernel/asm-offsets_64.c4
-rw-r--r--arch/x86/kernel/cpu/amd.c7
-rw-r--r--arch/x86/kernel/cpu/common.c103
-rw-r--r--arch/x86/kernel/cpu/microcode/amd.c4
-rw-r--r--arch/x86/kernel/cpu/microcode/intel.c13
-rw-r--r--arch/x86/kernel/doublefault.c36
-rw-r--r--arch/x86/kernel/dumpstack.c81
-rw-r--r--arch/x86/kernel/dumpstack_32.c6
-rw-r--r--arch/x86/kernel/dumpstack_64.c12
-rw-r--r--arch/x86/kernel/head_64.S30
-rw-r--r--arch/x86/kernel/ioport.c2
-rw-r--r--arch/x86/kernel/irq.c12
-rw-r--r--arch/x86/kernel/irq_64.c4
-rw-r--r--arch/x86/kernel/ldt.c198
-rw-r--r--arch/x86/kernel/machine_kexec_32.c4
-rw-r--r--arch/x86/kernel/paravirt_patch_64.c2
-rw-r--r--arch/x86/kernel/process.c21
-rw-r--r--arch/x86/kernel/process_32.c2
-rw-r--r--arch/x86/kernel/process_64.c14
-rw-r--r--arch/x86/kernel/smpboot.c21
-rw-r--r--arch/x86/kernel/stacktrace.c6
-rw-r--r--arch/x86/kernel/tls.c11
-rw-r--r--arch/x86/kernel/traps.c77
-rw-r--r--arch/x86/kernel/unwind_orc.c88
-rw-r--r--arch/x86/kernel/vmlinux.lds.S17
-rw-r--r--arch/x86/kvm/emulate.c56
-rw-r--r--arch/x86/kvm/mmu.c8
-rw-r--r--arch/x86/kvm/vmx.c8
-rw-r--r--arch/x86/kvm/x86.c99
-rw-r--r--arch/x86/lib/delay.c4
-rw-r--r--arch/x86/lib/x86-opcode-map.txt13
-rw-r--r--arch/x86/mm/Makefile9
-rw-r--r--arch/x86/mm/cpu_entry_area.c166
-rw-r--r--arch/x86/mm/debug_pagetables.c80
-rw-r--r--arch/x86/mm/dump_pagetables.c141
-rw-r--r--arch/x86/mm/extable.c6
-rw-r--r--arch/x86/mm/fault.c4
-rw-r--r--arch/x86/mm/init.c80
-rw-r--r--arch/x86/mm/init_32.c6
-rw-r--r--arch/x86/mm/ioremap.c4
-rw-r--r--arch/x86/mm/kasan_init_64.c23
-rw-r--r--arch/x86/mm/kmemcheck/error.c1
-rw-r--r--arch/x86/mm/kmemcheck/error.h1
-rw-r--r--arch/x86/mm/kmemcheck/opcode.c1
-rw-r--r--arch/x86/mm/kmemcheck/opcode.h1
-rw-r--r--arch/x86/mm/kmemcheck/pte.c1
-rw-r--r--arch/x86/mm/kmemcheck/pte.h1
-rw-r--r--arch/x86/mm/kmemcheck/selftest.c1
-rw-r--r--arch/x86/mm/kmemcheck/selftest.h1
-rw-r--r--arch/x86/mm/kmemcheck/shadow.h1
-rw-r--r--arch/x86/mm/kmmio.c12
-rw-r--r--arch/x86/mm/mem_encrypt.c4
-rw-r--r--arch/x86/mm/pgtable.c5
-rw-r--r--arch/x86/mm/pgtable_32.c1
-rw-r--r--arch/x86/mm/pti.c387
-rw-r--r--arch/x86/mm/tlb.c64
-rw-r--r--arch/x86/pci/broadcom_bus.c2
-rw-r--r--arch/x86/pci/fixup.c27
-rw-r--r--arch/x86/platform/efi/efi_64.c5
-rw-r--r--arch/x86/platform/uv/tlb_uv.c2
-rw-r--r--arch/x86/platform/uv/uv_irq.c2
-rw-r--r--arch/x86/platform/uv/uv_nmi.c4
-rw-r--r--arch/x86/power/cpu.c112
-rw-r--r--arch/x86/xen/apic.c2
-rw-r--r--arch/x86/xen/enlighten.c81
-rw-r--r--arch/x86/xen/enlighten_pv.c42
-rw-r--r--arch/x86/xen/mmu_pv.c14
-rw-r--r--arch/x86/xen/setup.c6
-rw-r--r--arch/x86/xen/xen-asm_64.S14
-rw-r--r--arch/xtensa/include/uapi/asm/Kbuild1
382 files changed, 4970 insertions, 2046 deletions
diff --git a/arch/alpha/include/uapi/asm/Kbuild b/arch/alpha/include/uapi/asm/Kbuild
index b15bf6bc0e94..14a2e9af97e9 100644
--- a/arch/alpha/include/uapi/asm/Kbuild
+++ b/arch/alpha/include/uapi/asm/Kbuild
@@ -1,2 +1,4 @@
# UAPI Header export list
include include/uapi/asm-generic/Kbuild.asm
+
+generic-y += bpf_perf_event.h
diff --git a/arch/arc/include/uapi/asm/Kbuild b/arch/arc/include/uapi/asm/Kbuild
index fa6d0ff4ff89..170b5db64afe 100644
--- a/arch/arc/include/uapi/asm/Kbuild
+++ b/arch/arc/include/uapi/asm/Kbuild
@@ -3,6 +3,7 @@ include include/uapi/asm-generic/Kbuild.asm
generic-y += auxvec.h
generic-y += bitsperlong.h
+generic-y += bpf_perf_event.h
generic-y += errno.h
generic-y += fcntl.h
generic-y += ioctl.h
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 51c8df561077..fea5ae8ecd0d 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -3,8 +3,8 @@ config ARM
bool
default y
select ARCH_CLOCKSOURCE_DATA
- select ARCH_DISCARD_MEMBLOCK if !HAVE_ARCH_PFN_VALID
- select ARCH_HAS_DEBUG_VIRTUAL
+ select ARCH_DISCARD_MEMBLOCK if !HAVE_ARCH_PFN_VALID && !KEXEC
+ select ARCH_HAS_DEBUG_VIRTUAL if MMU
select ARCH_HAS_DEVMEM_IS_ALLOWED
select ARCH_HAS_ELF_RANDOMIZE
select ARCH_HAS_SET_MEMORY
@@ -99,6 +99,7 @@ config ARM
select OLD_SIGACTION
select OLD_SIGSUSPEND3
select PERF_USE_VMALLOC
+ select REFCOUNT_FULL
select RTC_LIB
select SYS_SUPPORTS_APM_EMULATION
# Above selects are sorted alphabetically; please add new ones
@@ -1524,12 +1525,10 @@ config THUMB2_KERNEL
bool "Compile the kernel in Thumb-2 mode" if !CPU_THUMBONLY
depends on (CPU_V7 || CPU_V7M) && !CPU_V6 && !CPU_V6K
default y if CPU_THUMBONLY
- select ARM_ASM_UNIFIED
select ARM_UNWIND
help
By enabling this option, the kernel will be compiled in
- Thumb-2 mode. A compiler/assembler that understand the unified
- ARM-Thumb syntax is needed.
+ Thumb-2 mode.
If unsure, say N.
@@ -1564,9 +1563,6 @@ config THUMB2_AVOID_R_ARM_THM_JUMP11
Unless you are sure your tools don't have this problem, say Y.
-config ARM_ASM_UNIFIED
- bool
-
config ARM_PATCH_IDIV
bool "Runtime patch udiv/sdiv instructions into __aeabi_{u}idiv()"
depends on CPU_32v7 && !XIP_KERNEL
diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug
index 17685e19aed8..78a647080ebc 100644
--- a/arch/arm/Kconfig.debug
+++ b/arch/arm/Kconfig.debug
@@ -3,10 +3,14 @@ menu "Kernel hacking"
source "lib/Kconfig.debug"
-config ARM_PTDUMP
+config ARM_PTDUMP_CORE
+ def_bool n
+
+config ARM_PTDUMP_DEBUGFS
bool "Export kernel pagetable layout to userspace via debugfs"
depends on DEBUG_KERNEL
depends on MMU
+ select ARM_PTDUMP_CORE
select DEBUG_FS
---help---
Say Y here if you want to show the kernel pagetable layout in a
@@ -16,6 +20,33 @@ config ARM_PTDUMP
kernel.
If in doubt, say "N"
+config DEBUG_WX
+ bool "Warn on W+X mappings at boot"
+ select ARM_PTDUMP_CORE
+ ---help---
+ Generate a warning if any W+X mappings are found at boot.
+
+ This is useful for discovering cases where the kernel is leaving
+ W+X mappings after applying NX, as such mappings are a security risk.
+
+ Look for a message in dmesg output like this:
+
+ arm/mm: Checked W+X mappings: passed, no W+X pages found.
+
+ or like this, if the check failed:
+
+ arm/mm: Checked W+X mappings: FAILED, <N> W+X pages found.
+
+ Note that even if the check fails, your kernel is possibly
+ still fine, as W+X mappings are not a security hole in
+ themselves, what they do is that they make the exploitation
+ of other unfixed kernel bugs easier.
+
+ There is no runtime or memory usage effect of this option
+ once the kernel has booted up - it's a one time check.
+
+ If in doubt, say "Y".
+
# RMK wants arm kernels compiled with frame pointers or stack unwinding.
# If you know what you are doing and are willing to live without stack
# traces, you can get a slightly smaller kernel by setting this option to
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index 80351e505fd5..e83f5161fdd8 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -115,9 +115,11 @@ ifeq ($(CONFIG_ARM_UNWIND),y)
CFLAGS_ABI +=-funwind-tables
endif
+# Accept old syntax despite ".syntax unified"
+AFLAGS_NOWARN :=$(call as-option,-Wa$(comma)-mno-warn-deprecated,-Wa$(comma)-W)
+
ifeq ($(CONFIG_THUMB2_KERNEL),y)
AFLAGS_AUTOIT :=$(call as-option,-Wa$(comma)-mimplicit-it=always,-Wa$(comma)-mauto-it)
-AFLAGS_NOWARN :=$(call as-option,-Wa$(comma)-mno-warn-deprecated,-Wa$(comma)-W)
CFLAGS_ISA :=-mthumb $(AFLAGS_AUTOIT) $(AFLAGS_NOWARN)
AFLAGS_ISA :=$(CFLAGS_ISA) -Wa$(comma)-mthumb
# Work around buggy relocation from gas if requested:
@@ -125,7 +127,7 @@ ifeq ($(CONFIG_THUMB2_AVOID_R_ARM_THM_JUMP11),y)
KBUILD_CFLAGS_MODULE +=-fno-optimize-sibling-calls
endif
else
-CFLAGS_ISA :=$(call cc-option,-marm,)
+CFLAGS_ISA :=$(call cc-option,-marm,) $(AFLAGS_NOWARN)
AFLAGS_ISA :=$(CFLAGS_ISA)
endif
diff --git a/arch/arm/boot/compressed/string.c b/arch/arm/boot/compressed/string.c
index 309e1bbad75d..13c90abc68d6 100644
--- a/arch/arm/boot/compressed/string.c
+++ b/arch/arm/boot/compressed/string.c
@@ -130,8 +130,3 @@ void *memset(void *s, int c, size_t count)
*xs++ = c;
return s;
}
-
-void __memzero(void *s, size_t count)
-{
- memset(s, 0, count);
-}
diff --git a/arch/arm/boot/compressed/vmlinux.lds.S b/arch/arm/boot/compressed/vmlinux.lds.S
index e6bf6774c4bb..2b963d8e76dd 100644
--- a/arch/arm/boot/compressed/vmlinux.lds.S
+++ b/arch/arm/boot/compressed/vmlinux.lds.S
@@ -56,6 +56,7 @@ SECTIONS
.rodata : {
*(.rodata)
*(.rodata.*)
+ *(.data.rel.ro)
}
.piggydata : {
*(.piggydata)
@@ -101,6 +102,12 @@ SECTIONS
* this symbol allows further debug in the near future.
*/
.image_end (NOLOAD) : {
+ /*
+ * EFI requires that the image is aligned to 512 bytes, and appended
+ * DTB requires that we know where the end of the image is. Ensure
+ * that both are satisfied by ensuring that there are no additional
+ * sections emitted into the decompressor image.
+ */
_edata_real = .;
}
@@ -128,3 +135,4 @@ SECTIONS
.stab.indexstr 0 : { *(.stab.indexstr) }
.comment 0 : { *(.comment) }
}
+ASSERT(_edata_real == _edata, "error: zImage file size is incorrect");
diff --git a/arch/arm/boot/dts/am33xx.dtsi b/arch/arm/boot/dts/am33xx.dtsi
index 1b81c4e75772..d37f95025807 100644
--- a/arch/arm/boot/dts/am33xx.dtsi
+++ b/arch/arm/boot/dts/am33xx.dtsi
@@ -630,6 +630,7 @@
reg-names = "phy";
status = "disabled";
ti,ctrl_mod = <&usb_ctrl_mod>;
+ #phy-cells = <0>;
};
usb0: usb@47401000 {
@@ -678,6 +679,7 @@
reg-names = "phy";
status = "disabled";
ti,ctrl_mod = <&usb_ctrl_mod>;
+ #phy-cells = <0>;
};
usb1: usb@47401800 {
diff --git a/arch/arm/boot/dts/am4372.dtsi b/arch/arm/boot/dts/am4372.dtsi
index e5b061469bf8..4714a59fd86d 100644
--- a/arch/arm/boot/dts/am4372.dtsi
+++ b/arch/arm/boot/dts/am4372.dtsi
@@ -927,7 +927,8 @@
reg = <0x48038000 0x2000>,
<0x46000000 0x400000>;
reg-names = "mpu", "dat";
- interrupts = <80>, <81>;
+ interrupts = <GIC_SPI 80 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 81 IRQ_TYPE_LEVEL_HIGH>;
interrupt-names = "tx", "rx";
status = "disabled";
dmas = <&edma 8 2>,
@@ -941,7 +942,8 @@
reg = <0x4803C000 0x2000>,
<0x46400000 0x400000>;
reg-names = "mpu", "dat";
- interrupts = <82>, <83>;
+ interrupts = <GIC_SPI 82 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 83 IRQ_TYPE_LEVEL_HIGH>;
interrupt-names = "tx", "rx";
status = "disabled";
dmas = <&edma 10 2>,
diff --git a/arch/arm/boot/dts/am437x-cm-t43.dts b/arch/arm/boot/dts/am437x-cm-t43.dts
index 9e92d480576b..3b9a94c274a7 100644
--- a/arch/arm/boot/dts/am437x-cm-t43.dts
+++ b/arch/arm/boot/dts/am437x-cm-t43.dts
@@ -301,8 +301,8 @@
status = "okay";
pinctrl-names = "default";
pinctrl-0 = <&spi0_pins>;
- dmas = <&edma 16
- &edma 17>;
+ dmas = <&edma 16 0
+ &edma 17 0>;
dma-names = "tx0", "rx0";
flash: w25q64cvzpig@0 {
diff --git a/arch/arm/boot/dts/armada-385-db-ap.dts b/arch/arm/boot/dts/armada-385-db-ap.dts
index 25d2d720dc0e..678aa023335d 100644
--- a/arch/arm/boot/dts/armada-385-db-ap.dts
+++ b/arch/arm/boot/dts/armada-385-db-ap.dts
@@ -236,6 +236,7 @@
usb3_phy: usb3_phy {
compatible = "usb-nop-xceiv";
vcc-supply = <&reg_xhci0_vbus>;
+ #phy-cells = <0>;
};
reg_xhci0_vbus: xhci0-vbus {
diff --git a/arch/arm/boot/dts/armada-385-linksys.dtsi b/arch/arm/boot/dts/armada-385-linksys.dtsi
index e1f355ffc8f7..434dc9aaa5e4 100644
--- a/arch/arm/boot/dts/armada-385-linksys.dtsi
+++ b/arch/arm/boot/dts/armada-385-linksys.dtsi
@@ -66,6 +66,7 @@
usb3_1_phy: usb3_1-phy {
compatible = "usb-nop-xceiv";
vcc-supply = <&usb3_1_vbus>;
+ #phy-cells = <0>;
};
usb3_1_vbus: usb3_1-vbus {
diff --git a/arch/arm/boot/dts/armada-385-synology-ds116.dts b/arch/arm/boot/dts/armada-385-synology-ds116.dts
index 36ad571e76f3..0a3552ebda3b 100644
--- a/arch/arm/boot/dts/armada-385-synology-ds116.dts
+++ b/arch/arm/boot/dts/armada-385-synology-ds116.dts
@@ -191,11 +191,13 @@
usb3_0_phy: usb3_0_phy {
compatible = "usb-nop-xceiv";
vcc-supply = <&reg_usb3_0_vbus>;
+ #phy-cells = <0>;
};
usb3_1_phy: usb3_1_phy {
compatible = "usb-nop-xceiv";
vcc-supply = <&reg_usb3_1_vbus>;
+ #phy-cells = <0>;
};
reg_usb3_0_vbus: usb3-vbus0 {
diff --git a/arch/arm/boot/dts/armada-388-gp.dts b/arch/arm/boot/dts/armada-388-gp.dts
index f503955dbd3b..51b4ee6df130 100644
--- a/arch/arm/boot/dts/armada-388-gp.dts
+++ b/arch/arm/boot/dts/armada-388-gp.dts
@@ -276,11 +276,13 @@
usb2_1_phy: usb2_1_phy {
compatible = "usb-nop-xceiv";
vcc-supply = <&reg_usb2_1_vbus>;
+ #phy-cells = <0>;
};
usb3_phy: usb3_phy {
compatible = "usb-nop-xceiv";
vcc-supply = <&reg_usb3_vbus>;
+ #phy-cells = <0>;
};
reg_usb3_vbus: usb3-vbus {
diff --git a/arch/arm/boot/dts/bcm-nsp.dtsi b/arch/arm/boot/dts/bcm-nsp.dtsi
index 528b9e3bc1da..dcc55aa84583 100644
--- a/arch/arm/boot/dts/bcm-nsp.dtsi
+++ b/arch/arm/boot/dts/bcm-nsp.dtsi
@@ -85,7 +85,7 @@
timer@20200 {
compatible = "arm,cortex-a9-global-timer";
reg = <0x20200 0x100>;
- interrupts = <GIC_PPI 11 IRQ_TYPE_LEVEL_HIGH>;
+ interrupts = <GIC_PPI 11 IRQ_TYPE_EDGE_RISING>;
clocks = <&periph_clk>;
};
@@ -93,7 +93,7 @@
compatible = "arm,cortex-a9-twd-timer";
reg = <0x20600 0x20>;
interrupts = <GIC_PPI 13 (GIC_CPU_MASK_SIMPLE(2) |
- IRQ_TYPE_LEVEL_HIGH)>;
+ IRQ_TYPE_EDGE_RISING)>;
clocks = <&periph_clk>;
};
diff --git a/arch/arm/boot/dts/bcm283x.dtsi b/arch/arm/boot/dts/bcm283x.dtsi
index 013431e3d7c3..dcde93c85c2d 100644
--- a/arch/arm/boot/dts/bcm283x.dtsi
+++ b/arch/arm/boot/dts/bcm283x.dtsi
@@ -639,5 +639,6 @@
usbphy: phy {
compatible = "usb-nop-xceiv";
+ #phy-cells = <0>;
};
};
diff --git a/arch/arm/boot/dts/bcm958623hr.dts b/arch/arm/boot/dts/bcm958623hr.dts
index 3bc50849d013..b8bde13de90a 100644
--- a/arch/arm/boot/dts/bcm958623hr.dts
+++ b/arch/arm/boot/dts/bcm958623hr.dts
@@ -141,10 +141,6 @@
status = "okay";
};
-&sata {
- status = "okay";
-};
-
&qspi {
bspi-sel = <0>;
flash: m25p80@0 {
diff --git a/arch/arm/boot/dts/bcm958625hr.dts b/arch/arm/boot/dts/bcm958625hr.dts
index d94d14b3c745..6a44b8021702 100644
--- a/arch/arm/boot/dts/bcm958625hr.dts
+++ b/arch/arm/boot/dts/bcm958625hr.dts
@@ -177,10 +177,6 @@
status = "okay";
};
-&sata {
- status = "okay";
-};
-
&srab {
compatible = "brcm,bcm58625-srab", "brcm,nsp-srab";
status = "okay";
diff --git a/arch/arm/boot/dts/dm814x.dtsi b/arch/arm/boot/dts/dm814x.dtsi
index 9708157f5daf..681f5487406e 100644
--- a/arch/arm/boot/dts/dm814x.dtsi
+++ b/arch/arm/boot/dts/dm814x.dtsi
@@ -75,6 +75,7 @@
reg = <0x47401300 0x100>;
reg-names = "phy";
ti,ctrl_mod = <&usb_ctrl_mod>;
+ #phy-cells = <0>;
};
usb0: usb@47401000 {
@@ -385,6 +386,7 @@
reg = <0x1b00 0x100>;
reg-names = "phy";
ti,ctrl_mod = <&usb_ctrl_mod>;
+ #phy-cells = <0>;
};
};
diff --git a/arch/arm/boot/dts/imx53.dtsi b/arch/arm/boot/dts/imx53.dtsi
index 589a67c5f796..84f17f7abb71 100644
--- a/arch/arm/boot/dts/imx53.dtsi
+++ b/arch/arm/boot/dts/imx53.dtsi
@@ -433,15 +433,6 @@
clock-names = "ipg", "per";
};
- srtc: srtc@53fa4000 {
- compatible = "fsl,imx53-rtc", "fsl,imx25-rtc";
- reg = <0x53fa4000 0x4000>;
- interrupts = <24>;
- interrupt-parent = <&tzic>;
- clocks = <&clks IMX5_CLK_SRTC_GATE>;
- clock-names = "ipg";
- };
-
iomuxc: iomuxc@53fa8000 {
compatible = "fsl,imx53-iomuxc";
reg = <0x53fa8000 0x4000>;
diff --git a/arch/arm/boot/dts/logicpd-som-lv-37xx-devkit.dts b/arch/arm/boot/dts/logicpd-som-lv-37xx-devkit.dts
index 38faa90007d7..2fa5eb4bd402 100644
--- a/arch/arm/boot/dts/logicpd-som-lv-37xx-devkit.dts
+++ b/arch/arm/boot/dts/logicpd-som-lv-37xx-devkit.dts
@@ -72,7 +72,8 @@
};
&gpmc {
- ranges = <1 0 0x08000000 0x1000000>; /* CS1: 16MB for LAN9221 */
+ ranges = <0 0 0x30000000 0x1000000 /* CS0: 16MB for NAND */
+ 1 0 0x2c000000 0x1000000>; /* CS1: 16MB for LAN9221 */
ethernet@gpmc {
pinctrl-names = "default";
diff --git a/arch/arm/boot/dts/logicpd-som-lv.dtsi b/arch/arm/boot/dts/logicpd-som-lv.dtsi
index 26cce4d18405..29cb804d10cc 100644
--- a/arch/arm/boot/dts/logicpd-som-lv.dtsi
+++ b/arch/arm/boot/dts/logicpd-som-lv.dtsi
@@ -33,11 +33,12 @@
hsusb2_phy: hsusb2_phy {
compatible = "usb-nop-xceiv";
reset-gpios = <&gpio1 4 GPIO_ACTIVE_LOW>; /* gpio_4 */
+ #phy-cells = <0>;
};
};
&gpmc {
- ranges = <0 0 0x00000000 0x1000000>; /* CS0: 16MB for NAND */
+ ranges = <0 0 0x30000000 0x1000000>; /* CS0: 16MB for NAND */
nand@0,0 {
compatible = "ti,omap2-nand";
@@ -121,7 +122,7 @@
&mmc3 {
interrupts-extended = <&intc 94 &omap3_pmx_core2 0x46>;
- pinctrl-0 = <&mmc3_pins>;
+ pinctrl-0 = <&mmc3_pins &wl127x_gpio>;
pinctrl-names = "default";
vmmc-supply = <&wl12xx_vmmc>;
non-removable;
@@ -132,8 +133,8 @@
wlcore: wlcore@2 {
compatible = "ti,wl1273";
reg = <2>;
- interrupt-parent = <&gpio5>;
- interrupts = <24 IRQ_TYPE_LEVEL_HIGH>; /* gpio 152 */
+ interrupt-parent = <&gpio1>;
+ interrupts = <2 IRQ_TYPE_LEVEL_HIGH>; /* gpio 2 */
ref-clock-frequency = <26000000>;
};
};
@@ -157,8 +158,6 @@
OMAP3_CORE1_IOPAD(0x2166, PIN_INPUT_PULLUP | MUX_MODE3) /* sdmmc2_dat5.sdmmc3_dat1 */
OMAP3_CORE1_IOPAD(0x2168, PIN_INPUT_PULLUP | MUX_MODE3) /* sdmmc2_dat6.sdmmc3_dat2 */
OMAP3_CORE1_IOPAD(0x216a, PIN_INPUT_PULLUP | MUX_MODE3) /* sdmmc2_dat6.sdmmc3_dat3 */
- OMAP3_CORE1_IOPAD(0x2184, PIN_INPUT_PULLUP | MUX_MODE4) /* mcbsp4_clkx.gpio_152 */
- OMAP3_CORE1_IOPAD(0x2a0c, PIN_OUTPUT | MUX_MODE4) /* sys_boot1.gpio_3 */
OMAP3_CORE1_IOPAD(0x21d0, PIN_INPUT_PULLUP | MUX_MODE3) /* mcspi1_cs1.sdmmc3_cmd */
OMAP3_CORE1_IOPAD(0x21d2, PIN_INPUT_PULLUP | MUX_MODE3) /* mcspi1_cs2.sdmmc_clk */
>;
@@ -228,6 +227,12 @@
OMAP3_WKUP_IOPAD(0x2a0e, PIN_OUTPUT | MUX_MODE4) /* sys_boot2.gpio_4 */
>;
};
+ wl127x_gpio: pinmux_wl127x_gpio_pin {
+ pinctrl-single,pins = <
+ OMAP3_WKUP_IOPAD(0x2a0c, PIN_INPUT | MUX_MODE4) /* sys_boot0.gpio_2 */
+ OMAP3_WKUP_IOPAD(0x2a0c, PIN_OUTPUT | MUX_MODE4) /* sys_boot1.gpio_3 */
+ >;
+ };
};
&omap3_pmx_core2 {
diff --git a/arch/arm/boot/dts/meson.dtsi b/arch/arm/boot/dts/meson.dtsi
index 4926133077b3..0d9faf1a51ea 100644
--- a/arch/arm/boot/dts/meson.dtsi
+++ b/arch/arm/boot/dts/meson.dtsi
@@ -85,15 +85,6 @@
reg = <0x7c00 0x200>;
};
- gpio_intc: interrupt-controller@9880 {
- compatible = "amlogic,meson-gpio-intc";
- reg = <0xc1109880 0x10>;
- interrupt-controller;
- #interrupt-cells = <2>;
- amlogic,channel-interrupts = <64 65 66 67 68 69 70 71>;
- status = "disabled";
- };
-
hwrng: rng@8100 {
compatible = "amlogic,meson-rng";
reg = <0x8100 0x8>;
@@ -191,6 +182,15 @@
status = "disabled";
};
+ gpio_intc: interrupt-controller@9880 {
+ compatible = "amlogic,meson-gpio-intc";
+ reg = <0x9880 0x10>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ amlogic,channel-interrupts = <64 65 66 67 68 69 70 71>;
+ status = "disabled";
+ };
+
wdt: watchdog@9900 {
compatible = "amlogic,meson6-wdt";
reg = <0x9900 0x8>;
diff --git a/arch/arm/boot/dts/nspire.dtsi b/arch/arm/boot/dts/nspire.dtsi
index ec2283b1a638..1a5ae4cd107f 100644
--- a/arch/arm/boot/dts/nspire.dtsi
+++ b/arch/arm/boot/dts/nspire.dtsi
@@ -56,6 +56,7 @@
usb_phy: usb_phy {
compatible = "usb-nop-xceiv";
+ #phy-cells = <0>;
};
vbus_reg: vbus_reg {
diff --git a/arch/arm/boot/dts/omap3-beagle-xm.dts b/arch/arm/boot/dts/omap3-beagle-xm.dts
index 683b96a8f73e..0349fcc9dc26 100644
--- a/arch/arm/boot/dts/omap3-beagle-xm.dts
+++ b/arch/arm/boot/dts/omap3-beagle-xm.dts
@@ -90,6 +90,7 @@
compatible = "usb-nop-xceiv";
reset-gpios = <&gpio5 19 GPIO_ACTIVE_LOW>; /* gpio_147 */
vcc-supply = <&hsusb2_power>;
+ #phy-cells = <0>;
};
tfp410: encoder0 {
diff --git a/arch/arm/boot/dts/omap3-beagle.dts b/arch/arm/boot/dts/omap3-beagle.dts
index 4d2eaf843fa9..3ca8991a6c3e 100644
--- a/arch/arm/boot/dts/omap3-beagle.dts
+++ b/arch/arm/boot/dts/omap3-beagle.dts
@@ -64,6 +64,7 @@
compatible = "usb-nop-xceiv";
reset-gpios = <&gpio5 19 GPIO_ACTIVE_LOW>; /* gpio_147 */
vcc-supply = <&hsusb2_power>;
+ #phy-cells = <0>;
};
sound {
diff --git a/arch/arm/boot/dts/omap3-cm-t3x.dtsi b/arch/arm/boot/dts/omap3-cm-t3x.dtsi
index 31d5ebf38892..ab6003fe5a43 100644
--- a/arch/arm/boot/dts/omap3-cm-t3x.dtsi
+++ b/arch/arm/boot/dts/omap3-cm-t3x.dtsi
@@ -43,12 +43,14 @@
hsusb1_phy: hsusb1_phy {
compatible = "usb-nop-xceiv";
vcc-supply = <&hsusb1_power>;
+ #phy-cells = <0>;
};
/* HS USB Host PHY on PORT 2 */
hsusb2_phy: hsusb2_phy {
compatible = "usb-nop-xceiv";
vcc-supply = <&hsusb2_power>;
+ #phy-cells = <0>;
};
ads7846reg: ads7846-reg {
diff --git a/arch/arm/boot/dts/omap3-evm-common.dtsi b/arch/arm/boot/dts/omap3-evm-common.dtsi
index dbc3f030a16c..ee64191e41ca 100644
--- a/arch/arm/boot/dts/omap3-evm-common.dtsi
+++ b/arch/arm/boot/dts/omap3-evm-common.dtsi
@@ -29,6 +29,7 @@
compatible = "usb-nop-xceiv";
reset-gpios = <&gpio1 21 GPIO_ACTIVE_LOW>; /* gpio_21 */
vcc-supply = <&hsusb2_power>;
+ #phy-cells = <0>;
};
leds {
diff --git a/arch/arm/boot/dts/omap3-gta04.dtsi b/arch/arm/boot/dts/omap3-gta04.dtsi
index 4504908c23fe..3dc56fb156b7 100644
--- a/arch/arm/boot/dts/omap3-gta04.dtsi
+++ b/arch/arm/boot/dts/omap3-gta04.dtsi
@@ -120,6 +120,7 @@
hsusb2_phy: hsusb2_phy {
compatible = "usb-nop-xceiv";
reset-gpios = <&gpio6 14 GPIO_ACTIVE_LOW>;
+ #phy-cells = <0>;
};
tv0: connector {
diff --git a/arch/arm/boot/dts/omap3-igep0020-common.dtsi b/arch/arm/boot/dts/omap3-igep0020-common.dtsi
index 667f96245729..ecbec23af49f 100644
--- a/arch/arm/boot/dts/omap3-igep0020-common.dtsi
+++ b/arch/arm/boot/dts/omap3-igep0020-common.dtsi
@@ -58,6 +58,7 @@
compatible = "usb-nop-xceiv";
reset-gpios = <&gpio1 24 GPIO_ACTIVE_LOW>; /* gpio_24 */
vcc-supply = <&hsusb1_power>;
+ #phy-cells = <0>;
};
tfp410: encoder {
diff --git a/arch/arm/boot/dts/omap3-igep0030-common.dtsi b/arch/arm/boot/dts/omap3-igep0030-common.dtsi
index e94d9427450c..443f71707437 100644
--- a/arch/arm/boot/dts/omap3-igep0030-common.dtsi
+++ b/arch/arm/boot/dts/omap3-igep0030-common.dtsi
@@ -37,6 +37,7 @@
hsusb2_phy: hsusb2_phy {
compatible = "usb-nop-xceiv";
reset-gpios = <&gpio2 22 GPIO_ACTIVE_LOW>; /* gpio_54 */
+ #phy-cells = <0>;
};
};
diff --git a/arch/arm/boot/dts/omap3-lilly-a83x.dtsi b/arch/arm/boot/dts/omap3-lilly-a83x.dtsi
index 343a36d8031d..7ada1e93e166 100644
--- a/arch/arm/boot/dts/omap3-lilly-a83x.dtsi
+++ b/arch/arm/boot/dts/omap3-lilly-a83x.dtsi
@@ -51,6 +51,7 @@
hsusb1_phy: hsusb1_phy {
compatible = "usb-nop-xceiv";
vcc-supply = <&reg_vcc3>;
+ #phy-cells = <0>;
};
};
diff --git a/arch/arm/boot/dts/omap3-overo-base.dtsi b/arch/arm/boot/dts/omap3-overo-base.dtsi
index f25e158e7163..ac141fcd1742 100644
--- a/arch/arm/boot/dts/omap3-overo-base.dtsi
+++ b/arch/arm/boot/dts/omap3-overo-base.dtsi
@@ -51,6 +51,7 @@
compatible = "usb-nop-xceiv";
reset-gpios = <&gpio6 23 GPIO_ACTIVE_LOW>; /* gpio_183 */
vcc-supply = <&hsusb2_power>;
+ #phy-cells = <0>;
};
/* Regulator to trigger the nPoweron signal of the Wifi module */
diff --git a/arch/arm/boot/dts/omap3-pandora-common.dtsi b/arch/arm/boot/dts/omap3-pandora-common.dtsi
index 53e007abdc71..cd53dc6c0051 100644
--- a/arch/arm/boot/dts/omap3-pandora-common.dtsi
+++ b/arch/arm/boot/dts/omap3-pandora-common.dtsi
@@ -205,6 +205,7 @@
compatible = "usb-nop-xceiv";
reset-gpios = <&gpio1 16 GPIO_ACTIVE_LOW>; /* GPIO_16 */
vcc-supply = <&vaux2>;
+ #phy-cells = <0>;
};
/* HS USB Host VBUS supply
diff --git a/arch/arm/boot/dts/omap3-tao3530.dtsi b/arch/arm/boot/dts/omap3-tao3530.dtsi
index 9a601d15247b..6f5bd027b717 100644
--- a/arch/arm/boot/dts/omap3-tao3530.dtsi
+++ b/arch/arm/boot/dts/omap3-tao3530.dtsi
@@ -46,6 +46,7 @@
compatible = "usb-nop-xceiv";
reset-gpios = <&gpio6 2 GPIO_ACTIVE_LOW>; /* gpio_162 */
vcc-supply = <&hsusb2_power>;
+ #phy-cells = <0>;
};
sound {
diff --git a/arch/arm/boot/dts/omap3.dtsi b/arch/arm/boot/dts/omap3.dtsi
index 90b5c7148feb..bb33935df7b0 100644
--- a/arch/arm/boot/dts/omap3.dtsi
+++ b/arch/arm/boot/dts/omap3.dtsi
@@ -715,6 +715,7 @@
compatible = "ti,ohci-omap3";
reg = <0x48064400 0x400>;
interrupts = <76>;
+ remote-wakeup-connected;
};
usbhsehci: ehci@48064800 {
diff --git a/arch/arm/boot/dts/omap4-droid4-xt894.dts b/arch/arm/boot/dts/omap4-droid4-xt894.dts
index 8b93d37310f2..24a463f8641f 100644
--- a/arch/arm/boot/dts/omap4-droid4-xt894.dts
+++ b/arch/arm/boot/dts/omap4-droid4-xt894.dts
@@ -73,6 +73,7 @@
/* HS USB Host PHY on PORT 1 */
hsusb1_phy: hsusb1_phy {
compatible = "usb-nop-xceiv";
+ #phy-cells = <0>;
};
/* LCD regulator from sw5 source */
diff --git a/arch/arm/boot/dts/omap4-duovero.dtsi b/arch/arm/boot/dts/omap4-duovero.dtsi
index 6e6810c258eb..eb123b24c8e3 100644
--- a/arch/arm/boot/dts/omap4-duovero.dtsi
+++ b/arch/arm/boot/dts/omap4-duovero.dtsi
@@ -43,6 +43,7 @@
hsusb1_phy: hsusb1_phy {
compatible = "usb-nop-xceiv";
reset-gpios = <&gpio2 30 GPIO_ACTIVE_LOW>; /* gpio_62 */
+ #phy-cells = <0>;
pinctrl-names = "default";
pinctrl-0 = <&hsusb1phy_pins>;
diff --git a/arch/arm/boot/dts/omap4-panda-common.dtsi b/arch/arm/boot/dts/omap4-panda-common.dtsi
index 22c1eee9b07a..5501d1b4e6cd 100644
--- a/arch/arm/boot/dts/omap4-panda-common.dtsi
+++ b/arch/arm/boot/dts/omap4-panda-common.dtsi
@@ -89,6 +89,7 @@
hsusb1_phy: hsusb1_phy {
compatible = "usb-nop-xceiv";
reset-gpios = <&gpio2 30 GPIO_ACTIVE_LOW>; /* gpio_62 */
+ #phy-cells = <0>;
vcc-supply = <&hsusb1_power>;
clocks = <&auxclk3_ck>;
clock-names = "main_clk";
diff --git a/arch/arm/boot/dts/omap4-var-som-om44.dtsi b/arch/arm/boot/dts/omap4-var-som-om44.dtsi
index 6500bfc8d130..10fce28ceb5b 100644
--- a/arch/arm/boot/dts/omap4-var-som-om44.dtsi
+++ b/arch/arm/boot/dts/omap4-var-som-om44.dtsi
@@ -44,6 +44,7 @@
reset-gpios = <&gpio6 17 GPIO_ACTIVE_LOW>; /* gpio 177 */
vcc-supply = <&vbat>;
+ #phy-cells = <0>;
clocks = <&auxclk3_ck>;
clock-names = "main_clk";
diff --git a/arch/arm/boot/dts/omap4.dtsi b/arch/arm/boot/dts/omap4.dtsi
index 1dc5a76b3c71..cc1a07a3620f 100644
--- a/arch/arm/boot/dts/omap4.dtsi
+++ b/arch/arm/boot/dts/omap4.dtsi
@@ -398,7 +398,7 @@
elm: elm@48078000 {
compatible = "ti,am3352-elm";
reg = <0x48078000 0x2000>;
- interrupts = <4>;
+ interrupts = <GIC_SPI 4 IRQ_TYPE_LEVEL_HIGH>;
ti,hwmods = "elm";
status = "disabled";
};
@@ -1081,14 +1081,13 @@
usbhsohci: ohci@4a064800 {
compatible = "ti,ohci-omap3";
reg = <0x4a064800 0x400>;
- interrupt-parent = <&gic>;
interrupts = <GIC_SPI 76 IRQ_TYPE_LEVEL_HIGH>;
+ remote-wakeup-connected;
};
usbhsehci: ehci@4a064c00 {
compatible = "ti,ehci-omap";
reg = <0x4a064c00 0x400>;
- interrupt-parent = <&gic>;
interrupts = <GIC_SPI 77 IRQ_TYPE_LEVEL_HIGH>;
};
};
diff --git a/arch/arm/boot/dts/omap5-board-common.dtsi b/arch/arm/boot/dts/omap5-board-common.dtsi
index 575ecffb0e9e..1b20838bb9a4 100644
--- a/arch/arm/boot/dts/omap5-board-common.dtsi
+++ b/arch/arm/boot/dts/omap5-board-common.dtsi
@@ -73,12 +73,14 @@
clocks = <&auxclk1_ck>;
clock-names = "main_clk";
clock-frequency = <19200000>;
+ #phy-cells = <0>;
};
/* HS USB Host PHY on PORT 3 */
hsusb3_phy: hsusb3_phy {
compatible = "usb-nop-xceiv";
reset-gpios = <&gpio3 15 GPIO_ACTIVE_LOW>; /* gpio3_79 ETH_NRESET */
+ #phy-cells = <0>;
};
tpd12s015: encoder {
diff --git a/arch/arm/boot/dts/omap5-cm-t54.dts b/arch/arm/boot/dts/omap5-cm-t54.dts
index 5b172a04b6f1..5e21fb430a65 100644
--- a/arch/arm/boot/dts/omap5-cm-t54.dts
+++ b/arch/arm/boot/dts/omap5-cm-t54.dts
@@ -63,12 +63,14 @@
hsusb2_phy: hsusb2_phy {
compatible = "usb-nop-xceiv";
reset-gpios = <&gpio3 12 GPIO_ACTIVE_LOW>; /* gpio3_76 HUB_RESET */
+ #phy-cells = <0>;
};
/* HS USB Host PHY on PORT 3 */
hsusb3_phy: hsusb3_phy {
compatible = "usb-nop-xceiv";
reset-gpios = <&gpio3 19 GPIO_ACTIVE_LOW>; /* gpio3_83 ETH_RESET */
+ #phy-cells = <0>;
};
leds {
diff --git a/arch/arm/boot/dts/omap5.dtsi b/arch/arm/boot/dts/omap5.dtsi
index 4cd0005e462f..51a7fb3d7b9a 100644
--- a/arch/arm/boot/dts/omap5.dtsi
+++ b/arch/arm/boot/dts/omap5.dtsi
@@ -940,6 +940,7 @@
compatible = "ti,ohci-omap3";
reg = <0x4a064800 0x400>;
interrupts = <GIC_SPI 76 IRQ_TYPE_LEVEL_HIGH>;
+ remote-wakeup-connected;
};
usbhsehci: ehci@4a064c00 {
diff --git a/arch/arm/boot/dts/r8a7790.dtsi b/arch/arm/boot/dts/r8a7790.dtsi
index 2f017fee4009..62baabd757b6 100644
--- a/arch/arm/boot/dts/r8a7790.dtsi
+++ b/arch/arm/boot/dts/r8a7790.dtsi
@@ -1201,6 +1201,7 @@
clock-names = "extal", "usb_extal";
#clock-cells = <2>;
#power-domain-cells = <0>;
+ #reset-cells = <1>;
};
prr: chipid@ff000044 {
diff --git a/arch/arm/boot/dts/r8a7792.dtsi b/arch/arm/boot/dts/r8a7792.dtsi
index 131f65b0426e..3d080e07374c 100644
--- a/arch/arm/boot/dts/r8a7792.dtsi
+++ b/arch/arm/boot/dts/r8a7792.dtsi
@@ -829,6 +829,7 @@
clock-names = "extal";
#clock-cells = <2>;
#power-domain-cells = <0>;
+ #reset-cells = <1>;
};
};
diff --git a/arch/arm/boot/dts/r8a7793.dtsi b/arch/arm/boot/dts/r8a7793.dtsi
index 58eae569b4e0..0cd1035de1a4 100644
--- a/arch/arm/boot/dts/r8a7793.dtsi
+++ b/arch/arm/boot/dts/r8a7793.dtsi
@@ -1088,6 +1088,7 @@
clock-names = "extal", "usb_extal";
#clock-cells = <2>;
#power-domain-cells = <0>;
+ #reset-cells = <1>;
};
rst: reset-controller@e6160000 {
diff --git a/arch/arm/boot/dts/r8a7794.dtsi b/arch/arm/boot/dts/r8a7794.dtsi
index 905e50c9b524..5643976c1356 100644
--- a/arch/arm/boot/dts/r8a7794.dtsi
+++ b/arch/arm/boot/dts/r8a7794.dtsi
@@ -1099,6 +1099,7 @@
clock-names = "extal", "usb_extal";
#clock-cells = <2>;
#power-domain-cells = <0>;
+ #reset-cells = <1>;
};
rst: reset-controller@e6160000 {
diff --git a/arch/arm/boot/dts/vf610-zii-dev-rev-c.dts b/arch/arm/boot/dts/vf610-zii-dev-rev-c.dts
index 02a6227c717c..4b8edc8982cf 100644
--- a/arch/arm/boot/dts/vf610-zii-dev-rev-c.dts
+++ b/arch/arm/boot/dts/vf610-zii-dev-rev-c.dts
@@ -121,7 +121,7 @@
switch0port10: port@10 {
reg = <10>;
label = "dsa";
- phy-mode = "xgmii";
+ phy-mode = "xaui";
link = <&switch1port10>;
};
};
@@ -208,7 +208,7 @@
switch1port10: port@10 {
reg = <10>;
label = "dsa";
- phy-mode = "xgmii";
+ phy-mode = "xaui";
link = <&switch0port10>;
};
};
@@ -359,7 +359,7 @@
};
&i2c1 {
- at24mac602@0 {
+ at24mac602@50 {
compatible = "atmel,24c02";
reg = <0x50>;
read-only;
diff --git a/arch/arm/common/bL_switcher_dummy_if.c b/arch/arm/common/bL_switcher_dummy_if.c
index 4c10c6452678..f4dc1714a79e 100644
--- a/arch/arm/common/bL_switcher_dummy_if.c
+++ b/arch/arm/common/bL_switcher_dummy_if.c
@@ -57,3 +57,7 @@ static struct miscdevice bL_switcher_device = {
&bL_switcher_fops
};
module_misc_device(bL_switcher_device);
+
+MODULE_AUTHOR("Nicolas Pitre <nico@linaro.org>");
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("big.LITTLE switcher dummy user interface");
diff --git a/arch/arm/common/sa1111.c b/arch/arm/common/sa1111.c
index 4ecd5120fce7..a2c878769eaf 100644
--- a/arch/arm/common/sa1111.c
+++ b/arch/arm/common/sa1111.c
@@ -108,6 +108,7 @@ struct sa1111 {
spinlock_t lock;
void __iomem *base;
struct sa1111_platform_data *pdata;
+ struct irq_domain *irqdomain;
struct gpio_chip gc;
#ifdef CONFIG_PM
void *saved_state;
@@ -125,7 +126,7 @@ struct sa1111_dev_info {
unsigned long skpcr_mask;
bool dma;
unsigned int devid;
- unsigned int irq[6];
+ unsigned int hwirq[6];
};
static struct sa1111_dev_info sa1111_devices[] = {
@@ -134,7 +135,7 @@ static struct sa1111_dev_info sa1111_devices[] = {
.skpcr_mask = SKPCR_UCLKEN,
.dma = true,
.devid = SA1111_DEVID_USB,
- .irq = {
+ .hwirq = {
IRQ_USBPWR,
IRQ_HCIM,
IRQ_HCIBUFFACC,
@@ -148,7 +149,7 @@ static struct sa1111_dev_info sa1111_devices[] = {
.skpcr_mask = SKPCR_I2SCLKEN | SKPCR_L3CLKEN,
.dma = true,
.devid = SA1111_DEVID_SAC,
- .irq = {
+ .hwirq = {
AUDXMTDMADONEA,
AUDXMTDMADONEB,
AUDRCVDMADONEA,
@@ -164,7 +165,7 @@ static struct sa1111_dev_info sa1111_devices[] = {
.offset = SA1111_KBD,
.skpcr_mask = SKPCR_PTCLKEN,
.devid = SA1111_DEVID_PS2_KBD,
- .irq = {
+ .hwirq = {
IRQ_TPRXINT,
IRQ_TPTXINT
},
@@ -173,7 +174,7 @@ static struct sa1111_dev_info sa1111_devices[] = {
.offset = SA1111_MSE,
.skpcr_mask = SKPCR_PMCLKEN,
.devid = SA1111_DEVID_PS2_MSE,
- .irq = {
+ .hwirq = {
IRQ_MSRXINT,
IRQ_MSTXINT
},
@@ -182,7 +183,7 @@ static struct sa1111_dev_info sa1111_devices[] = {
.offset = 0x1800,
.skpcr_mask = 0,
.devid = SA1111_DEVID_PCMCIA,
- .irq = {
+ .hwirq = {
IRQ_S0_READY_NINT,
IRQ_S0_CD_VALID,
IRQ_S0_BVD1_STSCHG,
@@ -193,6 +194,19 @@ static struct sa1111_dev_info sa1111_devices[] = {
},
};
+static int sa1111_map_irq(struct sa1111 *sachip, irq_hw_number_t hwirq)
+{
+ return irq_create_mapping(sachip->irqdomain, hwirq);
+}
+
+static void sa1111_handle_irqdomain(struct irq_domain *irqdomain, int irq)
+{
+ struct irq_desc *d = irq_to_desc(irq_linear_revmap(irqdomain, irq));
+
+ if (d)
+ generic_handle_irq_desc(d);
+}
+
/*
* SA1111 interrupt support. Since clearing an IRQ while there are
* active IRQs causes the interrupt output to pulse, the upper levels
@@ -202,49 +216,45 @@ static void sa1111_irq_handler(struct irq_desc *desc)
{
unsigned int stat0, stat1, i;
struct sa1111 *sachip = irq_desc_get_handler_data(desc);
+ struct irq_domain *irqdomain;
void __iomem *mapbase = sachip->base + SA1111_INTC;
- stat0 = sa1111_readl(mapbase + SA1111_INTSTATCLR0);
- stat1 = sa1111_readl(mapbase + SA1111_INTSTATCLR1);
+ stat0 = readl_relaxed(mapbase + SA1111_INTSTATCLR0);
+ stat1 = readl_relaxed(mapbase + SA1111_INTSTATCLR1);
- sa1111_writel(stat0, mapbase + SA1111_INTSTATCLR0);
+ writel_relaxed(stat0, mapbase + SA1111_INTSTATCLR0);
desc->irq_data.chip->irq_ack(&desc->irq_data);
- sa1111_writel(stat1, mapbase + SA1111_INTSTATCLR1);
+ writel_relaxed(stat1, mapbase + SA1111_INTSTATCLR1);
if (stat0 == 0 && stat1 == 0) {
do_bad_IRQ(desc);
return;
}
+ irqdomain = sachip->irqdomain;
+
for (i = 0; stat0; i++, stat0 >>= 1)
if (stat0 & 1)
- generic_handle_irq(i + sachip->irq_base);
+ sa1111_handle_irqdomain(irqdomain, i);
for (i = 32; stat1; i++, stat1 >>= 1)
if (stat1 & 1)
- generic_handle_irq(i + sachip->irq_base);
+ sa1111_handle_irqdomain(irqdomain, i);
/* For level-based interrupts */
desc->irq_data.chip->irq_unmask(&desc->irq_data);
}
-#define SA1111_IRQMASK_LO(x) (1 << (x - sachip->irq_base))
-#define SA1111_IRQMASK_HI(x) (1 << (x - sachip->irq_base - 32))
-
static u32 sa1111_irqmask(struct irq_data *d)
{
- struct sa1111 *sachip = irq_data_get_irq_chip_data(d);
-
- return BIT((d->irq - sachip->irq_base) & 31);
+ return BIT(irqd_to_hwirq(d) & 31);
}
static int sa1111_irqbank(struct irq_data *d)
{
- struct sa1111 *sachip = irq_data_get_irq_chip_data(d);
-
- return ((d->irq - sachip->irq_base) / 32) * 4;
+ return (irqd_to_hwirq(d) / 32) * 4;
}
static void sa1111_ack_irq(struct irq_data *d)
@@ -257,9 +267,9 @@ static void sa1111_mask_irq(struct irq_data *d)
void __iomem *mapbase = sachip->base + SA1111_INTC + sa1111_irqbank(d);
u32 ie;
- ie = sa1111_readl(mapbase + SA1111_INTEN0);
+ ie = readl_relaxed(mapbase + SA1111_INTEN0);
ie &= ~sa1111_irqmask(d);
- sa1111_writel(ie, mapbase + SA1111_INTEN0);
+ writel(ie, mapbase + SA1111_INTEN0);
}
static void sa1111_unmask_irq(struct irq_data *d)
@@ -268,9 +278,9 @@ static void sa1111_unmask_irq(struct irq_data *d)
void __iomem *mapbase = sachip->base + SA1111_INTC + sa1111_irqbank(d);
u32 ie;
- ie = sa1111_readl(mapbase + SA1111_INTEN0);
+ ie = readl_relaxed(mapbase + SA1111_INTEN0);
ie |= sa1111_irqmask(d);
- sa1111_writel(ie, mapbase + SA1111_INTEN0);
+ writel_relaxed(ie, mapbase + SA1111_INTEN0);
}
/*
@@ -287,11 +297,11 @@ static int sa1111_retrigger_irq(struct irq_data *d)
u32 ip, mask = sa1111_irqmask(d);
int i;
- ip = sa1111_readl(mapbase + SA1111_INTPOL0);
+ ip = readl_relaxed(mapbase + SA1111_INTPOL0);
for (i = 0; i < 8; i++) {
- sa1111_writel(ip ^ mask, mapbase + SA1111_INTPOL0);
- sa1111_writel(ip, mapbase + SA1111_INTPOL0);
- if (sa1111_readl(mapbase + SA1111_INTSTATCLR0) & mask)
+ writel_relaxed(ip ^ mask, mapbase + SA1111_INTPOL0);
+ writel_relaxed(ip, mapbase + SA1111_INTPOL0);
+ if (readl_relaxed(mapbase + SA1111_INTSTATCLR0) & mask)
break;
}
@@ -313,13 +323,13 @@ static int sa1111_type_irq(struct irq_data *d, unsigned int flags)
if ((!(flags & IRQ_TYPE_EDGE_RISING) ^ !(flags & IRQ_TYPE_EDGE_FALLING)) == 0)
return -EINVAL;
- ip = sa1111_readl(mapbase + SA1111_INTPOL0);
+ ip = readl_relaxed(mapbase + SA1111_INTPOL0);
if (flags & IRQ_TYPE_EDGE_RISING)
ip &= ~mask;
else
ip |= mask;
- sa1111_writel(ip, mapbase + SA1111_INTPOL0);
- sa1111_writel(ip, mapbase + SA1111_WAKEPOL0);
+ writel_relaxed(ip, mapbase + SA1111_INTPOL0);
+ writel_relaxed(ip, mapbase + SA1111_WAKEPOL0);
return 0;
}
@@ -330,12 +340,12 @@ static int sa1111_wake_irq(struct irq_data *d, unsigned int on)
void __iomem *mapbase = sachip->base + SA1111_INTC + sa1111_irqbank(d);
u32 we, mask = sa1111_irqmask(d);
- we = sa1111_readl(mapbase + SA1111_WAKEEN0);
+ we = readl_relaxed(mapbase + SA1111_WAKEEN0);
if (on)
we |= mask;
else
we &= ~mask;
- sa1111_writel(we, mapbase + SA1111_WAKEEN0);
+ writel_relaxed(we, mapbase + SA1111_WAKEEN0);
return 0;
}
@@ -350,10 +360,30 @@ static struct irq_chip sa1111_irq_chip = {
.irq_set_wake = sa1111_wake_irq,
};
+static int sa1111_irqdomain_map(struct irq_domain *d, unsigned int irq,
+ irq_hw_number_t hwirq)
+{
+ struct sa1111 *sachip = d->host_data;
+
+ /* Disallow unavailable interrupts */
+ if (hwirq > SSPROR && hwirq < AUDXMTDMADONEA)
+ return -EINVAL;
+
+ irq_set_chip_data(irq, sachip);
+ irq_set_chip_and_handler(irq, &sa1111_irq_chip, handle_edge_irq);
+ irq_clear_status_flags(irq, IRQ_NOREQUEST | IRQ_NOPROBE);
+
+ return 0;
+}
+
+static const struct irq_domain_ops sa1111_irqdomain_ops = {
+ .map = sa1111_irqdomain_map,
+ .xlate = irq_domain_xlate_twocell,
+};
+
static int sa1111_setup_irq(struct sa1111 *sachip, unsigned irq_base)
{
void __iomem *irqbase = sachip->base + SA1111_INTC;
- unsigned i, irq;
int ret;
/*
@@ -373,38 +403,40 @@ static int sa1111_setup_irq(struct sa1111 *sachip, unsigned irq_base)
sachip->irq_base = ret;
/* disable all IRQs */
- sa1111_writel(0, irqbase + SA1111_INTEN0);
- sa1111_writel(0, irqbase + SA1111_INTEN1);
- sa1111_writel(0, irqbase + SA1111_WAKEEN0);
- sa1111_writel(0, irqbase + SA1111_WAKEEN1);
+ writel_relaxed(0, irqbase + SA1111_INTEN0);
+ writel_relaxed(0, irqbase + SA1111_INTEN1);
+ writel_relaxed(0, irqbase + SA1111_WAKEEN0);
+ writel_relaxed(0, irqbase + SA1111_WAKEEN1);
/*
* detect on rising edge. Note: Feb 2001 Errata for SA1111
* specifies that S0ReadyInt and S1ReadyInt should be '1'.
*/
- sa1111_writel(0, irqbase + SA1111_INTPOL0);
- sa1111_writel(BIT(IRQ_S0_READY_NINT & 31) |
- BIT(IRQ_S1_READY_NINT & 31),
- irqbase + SA1111_INTPOL1);
+ writel_relaxed(0, irqbase + SA1111_INTPOL0);
+ writel_relaxed(BIT(IRQ_S0_READY_NINT & 31) |
+ BIT(IRQ_S1_READY_NINT & 31),
+ irqbase + SA1111_INTPOL1);
/* clear all IRQs */
- sa1111_writel(~0, irqbase + SA1111_INTSTATCLR0);
- sa1111_writel(~0, irqbase + SA1111_INTSTATCLR1);
-
- for (i = IRQ_GPAIN0; i <= SSPROR; i++) {
- irq = sachip->irq_base + i;
- irq_set_chip_and_handler(irq, &sa1111_irq_chip, handle_edge_irq);
- irq_set_chip_data(irq, sachip);
- irq_clear_status_flags(irq, IRQ_NOREQUEST | IRQ_NOPROBE);
- }
+ writel_relaxed(~0, irqbase + SA1111_INTSTATCLR0);
+ writel_relaxed(~0, irqbase + SA1111_INTSTATCLR1);
- for (i = AUDXMTDMADONEA; i <= IRQ_S1_BVD1_STSCHG; i++) {
- irq = sachip->irq_base + i;
- irq_set_chip_and_handler(irq, &sa1111_irq_chip, handle_edge_irq);
- irq_set_chip_data(irq, sachip);
- irq_clear_status_flags(irq, IRQ_NOREQUEST | IRQ_NOPROBE);
+ sachip->irqdomain = irq_domain_add_linear(NULL, SA1111_IRQ_NR,
+ &sa1111_irqdomain_ops,
+ sachip);
+ if (!sachip->irqdomain) {
+ irq_free_descs(sachip->irq_base, SA1111_IRQ_NR);
+ return -ENOMEM;
}
+ irq_domain_associate_many(sachip->irqdomain,
+ sachip->irq_base + IRQ_GPAIN0,
+ IRQ_GPAIN0, SSPROR + 1 - IRQ_GPAIN0);
+ irq_domain_associate_many(sachip->irqdomain,
+ sachip->irq_base + AUDXMTDMADONEA,
+ AUDXMTDMADONEA,
+ IRQ_S1_BVD1_STSCHG + 1 - AUDXMTDMADONEA);
+
/*
* Register SA1111 interrupt
*/
@@ -420,20 +452,22 @@ static int sa1111_setup_irq(struct sa1111 *sachip, unsigned irq_base)
static void sa1111_remove_irq(struct sa1111 *sachip)
{
+ struct irq_domain *domain = sachip->irqdomain;
void __iomem *irqbase = sachip->base + SA1111_INTC;
+ int i;
/* disable all IRQs */
- sa1111_writel(0, irqbase + SA1111_INTEN0);
- sa1111_writel(0, irqbase + SA1111_INTEN1);
- sa1111_writel(0, irqbase + SA1111_WAKEEN0);
- sa1111_writel(0, irqbase + SA1111_WAKEEN1);
+ writel_relaxed(0, irqbase + SA1111_INTEN0);
+ writel_relaxed(0, irqbase + SA1111_INTEN1);
+ writel_relaxed(0, irqbase + SA1111_WAKEEN0);
+ writel_relaxed(0, irqbase + SA1111_WAKEEN1);
- if (sachip->irq != NO_IRQ) {
- irq_set_chained_handler_and_data(sachip->irq, NULL, NULL);
- irq_free_descs(sachip->irq_base, SA1111_IRQ_NR);
+ irq_set_chained_handler_and_data(sachip->irq, NULL, NULL);
+ for (i = 0; i < SA1111_IRQ_NR; i++)
+ irq_dispose_mapping(irq_find_mapping(domain, i));
+ irq_domain_remove(domain);
- release_mem_region(sachip->phys + SA1111_INTC, 512);
- }
+ release_mem_region(sachip->phys + SA1111_INTC, 512);
}
enum {
@@ -572,7 +606,7 @@ static int sa1111_gpio_to_irq(struct gpio_chip *gc, unsigned offset)
{
struct sa1111 *sachip = gc_to_sa1111(gc);
- return sachip->irq_base + offset;
+ return sa1111_map_irq(sachip, offset);
}
static int sa1111_setup_gpios(struct sa1111 *sachip)
@@ -618,11 +652,11 @@ static void sa1111_wake(struct sa1111 *sachip)
/*
* Turn VCO on, and disable PLL Bypass.
*/
- r = sa1111_readl(sachip->base + SA1111_SKCR);
+ r = readl_relaxed(sachip->base + SA1111_SKCR);
r &= ~SKCR_VCO_OFF;
- sa1111_writel(r, sachip->base + SA1111_SKCR);
+ writel_relaxed(r, sachip->base + SA1111_SKCR);
r |= SKCR_PLL_BYPASS | SKCR_OE_EN;
- sa1111_writel(r, sachip->base + SA1111_SKCR);
+ writel_relaxed(r, sachip->base + SA1111_SKCR);
/*
* Wait lock time. SA1111 manual _doesn't_
@@ -634,7 +668,7 @@ static void sa1111_wake(struct sa1111 *sachip)
* Enable RCLK. We also ensure that RDYEN is set.
*/
r |= SKCR_RCLKEN | SKCR_RDYEN;
- sa1111_writel(r, sachip->base + SA1111_SKCR);
+ writel_relaxed(r, sachip->base + SA1111_SKCR);
/*
* Wait 14 RCLK cycles for the chip to finish coming out
@@ -645,7 +679,7 @@ static void sa1111_wake(struct sa1111 *sachip)
/*
* Ensure all clocks are initially off.
*/
- sa1111_writel(0, sachip->base + SA1111_SKPCR);
+ writel_relaxed(0, sachip->base + SA1111_SKPCR);
spin_unlock_irqrestore(&sachip->lock, flags);
}
@@ -675,7 +709,7 @@ sa1111_configure_smc(struct sa1111 *sachip, int sdram, unsigned int drac,
if (cas_latency == 3)
smcr |= SMCR_CLAT;
- sa1111_writel(smcr, sachip->base + SA1111_SMCR);
+ writel_relaxed(smcr, sachip->base + SA1111_SMCR);
/*
* Now clear the bits in the DMA mask to work around the SA1111
@@ -723,8 +757,8 @@ sa1111_init_one_child(struct sa1111 *sachip, struct resource *parent,
dev->mapbase = sachip->base + info->offset;
dev->skpcr_mask = info->skpcr_mask;
- for (i = 0; i < ARRAY_SIZE(info->irq); i++)
- dev->irq[i] = sachip->irq_base + info->irq[i];
+ for (i = 0; i < ARRAY_SIZE(info->hwirq); i++)
+ dev->hwirq[i] = info->hwirq[i];
/*
* If the parent device has a DMA mask associated with it, and
@@ -814,7 +848,7 @@ static int __sa1111_probe(struct device *me, struct resource *mem, int irq)
/*
* Probe for the chip. Only touch the SBI registers.
*/
- id = sa1111_readl(sachip->base + SA1111_SKID);
+ id = readl_relaxed(sachip->base + SA1111_SKID);
if ((id & SKID_ID_MASK) != SKID_SA1111_ID) {
printk(KERN_DEBUG "SA1111 not detected: ID = %08lx\n", id);
ret = -ENODEV;
@@ -833,11 +867,9 @@ static int __sa1111_probe(struct device *me, struct resource *mem, int irq)
* The interrupt controller must be initialised before any
* other device to ensure that the interrupts are available.
*/
- if (sachip->irq != NO_IRQ) {
- ret = sa1111_setup_irq(sachip, pd->irq_base);
- if (ret)
- goto err_clk;
- }
+ ret = sa1111_setup_irq(sachip, pd->irq_base);
+ if (ret)
+ goto err_clk;
/* Setup the GPIOs - should really be done after the IRQ setup */
ret = sa1111_setup_gpios(sachip);
@@ -864,8 +896,8 @@ static int __sa1111_probe(struct device *me, struct resource *mem, int irq)
* DMA. It can otherwise be held firmly in the off position.
* (currently, we always enable it.)
*/
- val = sa1111_readl(sachip->base + SA1111_SKPCR);
- sa1111_writel(val | SKPCR_DCLKEN, sachip->base + SA1111_SKPCR);
+ val = readl_relaxed(sachip->base + SA1111_SKPCR);
+ writel_relaxed(val | SKPCR_DCLKEN, sachip->base + SA1111_SKPCR);
/*
* Enable the SA1110 memory bus request and grant signals.
@@ -962,31 +994,31 @@ static int sa1111_suspend_noirq(struct device *dev)
* Save state.
*/
base = sachip->base;
- save->skcr = sa1111_readl(base + SA1111_SKCR);
- save->skpcr = sa1111_readl(base + SA1111_SKPCR);
- save->skcdr = sa1111_readl(base + SA1111_SKCDR);
- save->skaud = sa1111_readl(base + SA1111_SKAUD);
- save->skpwm0 = sa1111_readl(base + SA1111_SKPWM0);
- save->skpwm1 = sa1111_readl(base + SA1111_SKPWM1);
+ save->skcr = readl_relaxed(base + SA1111_SKCR);
+ save->skpcr = readl_relaxed(base + SA1111_SKPCR);
+ save->skcdr = readl_relaxed(base + SA1111_SKCDR);
+ save->skaud = readl_relaxed(base + SA1111_SKAUD);
+ save->skpwm0 = readl_relaxed(base + SA1111_SKPWM0);
+ save->skpwm1 = readl_relaxed(base + SA1111_SKPWM1);
- sa1111_writel(0, sachip->base + SA1111_SKPWM0);
- sa1111_writel(0, sachip->base + SA1111_SKPWM1);
+ writel_relaxed(0, sachip->base + SA1111_SKPWM0);
+ writel_relaxed(0, sachip->base + SA1111_SKPWM1);
base = sachip->base + SA1111_INTC;
- save->intpol0 = sa1111_readl(base + SA1111_INTPOL0);
- save->intpol1 = sa1111_readl(base + SA1111_INTPOL1);
- save->inten0 = sa1111_readl(base + SA1111_INTEN0);
- save->inten1 = sa1111_readl(base + SA1111_INTEN1);
- save->wakepol0 = sa1111_readl(base + SA1111_WAKEPOL0);
- save->wakepol1 = sa1111_readl(base + SA1111_WAKEPOL1);
- save->wakeen0 = sa1111_readl(base + SA1111_WAKEEN0);
- save->wakeen1 = sa1111_readl(base + SA1111_WAKEEN1);
+ save->intpol0 = readl_relaxed(base + SA1111_INTPOL0);
+ save->intpol1 = readl_relaxed(base + SA1111_INTPOL1);
+ save->inten0 = readl_relaxed(base + SA1111_INTEN0);
+ save->inten1 = readl_relaxed(base + SA1111_INTEN1);
+ save->wakepol0 = readl_relaxed(base + SA1111_WAKEPOL0);
+ save->wakepol1 = readl_relaxed(base + SA1111_WAKEPOL1);
+ save->wakeen0 = readl_relaxed(base + SA1111_WAKEEN0);
+ save->wakeen1 = readl_relaxed(base + SA1111_WAKEEN1);
/*
* Disable.
*/
- val = sa1111_readl(sachip->base + SA1111_SKCR);
- sa1111_writel(val | SKCR_SLEEP, sachip->base + SA1111_SKCR);
+ val = readl_relaxed(sachip->base + SA1111_SKCR);
+ writel_relaxed(val | SKCR_SLEEP, sachip->base + SA1111_SKCR);
clk_disable(sachip->clk);
@@ -1023,7 +1055,7 @@ static int sa1111_resume_noirq(struct device *dev)
* Ensure that the SA1111 is still here.
* FIXME: shouldn't do this here.
*/
- id = sa1111_readl(sachip->base + SA1111_SKID);
+ id = readl_relaxed(sachip->base + SA1111_SKID);
if ((id & SKID_ID_MASK) != SKID_SA1111_ID) {
__sa1111_remove(sachip);
dev_set_drvdata(dev, NULL);
@@ -1047,26 +1079,26 @@ static int sa1111_resume_noirq(struct device *dev)
*/
spin_lock_irqsave(&sachip->lock, flags);
- sa1111_writel(0, sachip->base + SA1111_INTC + SA1111_INTEN0);
- sa1111_writel(0, sachip->base + SA1111_INTC + SA1111_INTEN1);
+ writel_relaxed(0, sachip->base + SA1111_INTC + SA1111_INTEN0);
+ writel_relaxed(0, sachip->base + SA1111_INTC + SA1111_INTEN1);
base = sachip->base;
- sa1111_writel(save->skcr, base + SA1111_SKCR);
- sa1111_writel(save->skpcr, base + SA1111_SKPCR);
- sa1111_writel(save->skcdr, base + SA1111_SKCDR);
- sa1111_writel(save->skaud, base + SA1111_SKAUD);
- sa1111_writel(save->skpwm0, base + SA1111_SKPWM0);
- sa1111_writel(save->skpwm1, base + SA1111_SKPWM1);
+ writel_relaxed(save->skcr, base + SA1111_SKCR);
+ writel_relaxed(save->skpcr, base + SA1111_SKPCR);
+ writel_relaxed(save->skcdr, base + SA1111_SKCDR);
+ writel_relaxed(save->skaud, base + SA1111_SKAUD);
+ writel_relaxed(save->skpwm0, base + SA1111_SKPWM0);
+ writel_relaxed(save->skpwm1, base + SA1111_SKPWM1);
base = sachip->base + SA1111_INTC;
- sa1111_writel(save->intpol0, base + SA1111_INTPOL0);
- sa1111_writel(save->intpol1, base + SA1111_INTPOL1);
- sa1111_writel(save->inten0, base + SA1111_INTEN0);
- sa1111_writel(save->inten1, base + SA1111_INTEN1);
- sa1111_writel(save->wakepol0, base + SA1111_WAKEPOL0);
- sa1111_writel(save->wakepol1, base + SA1111_WAKEPOL1);
- sa1111_writel(save->wakeen0, base + SA1111_WAKEEN0);
- sa1111_writel(save->wakeen1, base + SA1111_WAKEEN1);
+ writel_relaxed(save->intpol0, base + SA1111_INTPOL0);
+ writel_relaxed(save->intpol1, base + SA1111_INTPOL1);
+ writel_relaxed(save->inten0, base + SA1111_INTEN0);
+ writel_relaxed(save->inten1, base + SA1111_INTEN1);
+ writel_relaxed(save->wakepol0, base + SA1111_WAKEPOL0);
+ writel_relaxed(save->wakepol1, base + SA1111_WAKEPOL1);
+ writel_relaxed(save->wakeen0, base + SA1111_WAKEEN0);
+ writel_relaxed(save->wakeen1, base + SA1111_WAKEEN1);
spin_unlock_irqrestore(&sachip->lock, flags);
@@ -1153,7 +1185,7 @@ static unsigned int __sa1111_pll_clock(struct sa1111 *sachip)
{
unsigned int skcdr, fbdiv, ipdiv, opdiv;
- skcdr = sa1111_readl(sachip->base + SA1111_SKCDR);
+ skcdr = readl_relaxed(sachip->base + SA1111_SKCDR);
fbdiv = (skcdr & 0x007f) + 2;
ipdiv = ((skcdr & 0x0f80) >> 7) + 2;
@@ -1195,13 +1227,13 @@ void sa1111_select_audio_mode(struct sa1111_dev *sadev, int mode)
spin_lock_irqsave(&sachip->lock, flags);
- val = sa1111_readl(sachip->base + SA1111_SKCR);
+ val = readl_relaxed(sachip->base + SA1111_SKCR);
if (mode == SA1111_AUDIO_I2S) {
val &= ~SKCR_SELAC;
} else {
val |= SKCR_SELAC;
}
- sa1111_writel(val, sachip->base + SA1111_SKCR);
+ writel_relaxed(val, sachip->base + SA1111_SKCR);
spin_unlock_irqrestore(&sachip->lock, flags);
}
@@ -1226,7 +1258,7 @@ int sa1111_set_audio_rate(struct sa1111_dev *sadev, int rate)
if (div > 128)
div = 128;
- sa1111_writel(div - 1, sachip->base + SA1111_SKAUD);
+ writel_relaxed(div - 1, sachip->base + SA1111_SKAUD);
return 0;
}
@@ -1244,7 +1276,7 @@ int sa1111_get_audio_rate(struct sa1111_dev *sadev)
if (sadev->devid != SA1111_DEVID_SAC)
return -EINVAL;
- div = sa1111_readl(sachip->base + SA1111_SKAUD) + 1;
+ div = readl_relaxed(sachip->base + SA1111_SKAUD) + 1;
return __sa1111_pll_clock(sachip) / (256 * div);
}
@@ -1261,10 +1293,10 @@ void sa1111_set_io_dir(struct sa1111_dev *sadev,
#define MODIFY_BITS(port, mask, dir) \
if (mask) { \
- val = sa1111_readl(port); \
+ val = readl_relaxed(port); \
val &= ~(mask); \
val |= (dir) & (mask); \
- sa1111_writel(val, port); \
+ writel_relaxed(val, port); \
}
spin_lock_irqsave(&sachip->lock, flags);
@@ -1329,8 +1361,8 @@ int sa1111_enable_device(struct sa1111_dev *sadev)
if (ret == 0) {
spin_lock_irqsave(&sachip->lock, flags);
- val = sa1111_readl(sachip->base + SA1111_SKPCR);
- sa1111_writel(val | sadev->skpcr_mask, sachip->base + SA1111_SKPCR);
+ val = readl_relaxed(sachip->base + SA1111_SKPCR);
+ writel_relaxed(val | sadev->skpcr_mask, sachip->base + SA1111_SKPCR);
spin_unlock_irqrestore(&sachip->lock, flags);
}
return ret;
@@ -1348,8 +1380,8 @@ void sa1111_disable_device(struct sa1111_dev *sadev)
unsigned int val;
spin_lock_irqsave(&sachip->lock, flags);
- val = sa1111_readl(sachip->base + SA1111_SKPCR);
- sa1111_writel(val & ~sadev->skpcr_mask, sachip->base + SA1111_SKPCR);
+ val = readl_relaxed(sachip->base + SA1111_SKPCR);
+ writel_relaxed(val & ~sadev->skpcr_mask, sachip->base + SA1111_SKPCR);
spin_unlock_irqrestore(&sachip->lock, flags);
if (sachip->pdata && sachip->pdata->disable)
@@ -1359,9 +1391,10 @@ EXPORT_SYMBOL(sa1111_disable_device);
int sa1111_get_irq(struct sa1111_dev *sadev, unsigned num)
{
- if (num >= ARRAY_SIZE(sadev->irq))
+ struct sa1111 *sachip = sa1111_chip_driver(sadev);
+ if (num >= ARRAY_SIZE(sadev->hwirq))
return -EINVAL;
- return sadev->irq[num];
+ return sa1111_map_irq(sachip, sadev->hwirq[num]);
}
EXPORT_SYMBOL_GPL(sa1111_get_irq);
@@ -1379,36 +1412,6 @@ static int sa1111_match(struct device *_dev, struct device_driver *_drv)
return !!(dev->devid & drv->devid);
}
-static int sa1111_bus_suspend(struct device *dev, pm_message_t state)
-{
- struct sa1111_dev *sadev = to_sa1111_device(dev);
- struct sa1111_driver *drv = SA1111_DRV(dev->driver);
- int ret = 0;
-
- if (drv && drv->suspend)
- ret = drv->suspend(sadev, state);
- return ret;
-}
-
-static int sa1111_bus_resume(struct device *dev)
-{
- struct sa1111_dev *sadev = to_sa1111_device(dev);
- struct sa1111_driver *drv = SA1111_DRV(dev->driver);
- int ret = 0;
-
- if (drv && drv->resume)
- ret = drv->resume(sadev);
- return ret;
-}
-
-static void sa1111_bus_shutdown(struct device *dev)
-{
- struct sa1111_driver *drv = SA1111_DRV(dev->driver);
-
- if (drv && drv->shutdown)
- drv->shutdown(to_sa1111_device(dev));
-}
-
static int sa1111_bus_probe(struct device *dev)
{
struct sa1111_dev *sadev = to_sa1111_device(dev);
@@ -1436,9 +1439,6 @@ struct bus_type sa1111_bus_type = {
.match = sa1111_match,
.probe = sa1111_bus_probe,
.remove = sa1111_bus_remove,
- .suspend = sa1111_bus_suspend,
- .resume = sa1111_bus_resume,
- .shutdown = sa1111_bus_shutdown,
};
EXPORT_SYMBOL(sa1111_bus_type);
diff --git a/arch/arm/include/asm/exception.h b/arch/arm/include/asm/exception.h
index a7273ad9587a..58e039a851af 100644
--- a/arch/arm/include/asm/exception.h
+++ b/arch/arm/include/asm/exception.h
@@ -10,11 +10,10 @@
#include <linux/interrupt.h>
-#define __exception __attribute__((section(".exception.text")))
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
#define __exception_irq_entry __irq_entry
#else
-#define __exception_irq_entry __exception
+#define __exception_irq_entry
#endif
#endif /* __ASM_ARM_EXCEPTION_H */
diff --git a/arch/arm/include/asm/glue-cache.h b/arch/arm/include/asm/glue-cache.h
index 01c3d92624e5..8d1f498e5dd8 100644
--- a/arch/arm/include/asm/glue-cache.h
+++ b/arch/arm/include/asm/glue-cache.h
@@ -117,6 +117,10 @@
# endif
#endif
+#if defined(CONFIG_CACHE_B15_RAC)
+# define MULTI_CACHE 1
+#endif
+
#if defined(CONFIG_CPU_V7M)
# define MULTI_CACHE 1
#endif
diff --git a/arch/arm/include/asm/hardware/cache-b15-rac.h b/arch/arm/include/asm/hardware/cache-b15-rac.h
new file mode 100644
index 000000000000..3d43ec06fd35
--- /dev/null
+++ b/arch/arm/include/asm/hardware/cache-b15-rac.h
@@ -0,0 +1,10 @@
+#ifndef __ASM_ARM_HARDWARE_CACHE_B15_RAC_H
+#define __ASM_ARM_HARDWARE_CACHE_B15_RAC_H
+
+#ifndef __ASSEMBLY__
+
+void b15_flush_kern_cache_all(void);
+
+#endif
+
+#endif
diff --git a/arch/arm/include/asm/hardware/sa1111.h b/arch/arm/include/asm/hardware/sa1111.h
index 0bbf163d1ed3..798e520e8a49 100644
--- a/arch/arm/include/asm/hardware/sa1111.h
+++ b/arch/arm/include/asm/hardware/sa1111.h
@@ -16,33 +16,6 @@
#include <mach/bitfield.h>
/*
- * The SA1111 is always located at virtual 0xf4000000, and is always
- * "native" endian.
- */
-
-#define SA1111_VBASE 0xf4000000
-
-/* Don't use these! */
-#define SA1111_p2v( x ) ((x) - SA1111_BASE + SA1111_VBASE)
-#define SA1111_v2p( x ) ((x) - SA1111_VBASE + SA1111_BASE)
-
-#ifndef __ASSEMBLY__
-#define _SA1111(x) ((x) + sa1111->resource.start)
-#endif
-
-#define sa1111_writel(val,addr) __raw_writel(val, addr)
-#define sa1111_readl(addr) __raw_readl(addr)
-
-/*
- * 26 bits of the SA-1110 address bus are available to the SA-1111.
- * Use these when feeding target addresses to the DMA engines.
- */
-
-#define SA1111_ADDR_WIDTH (26)
-#define SA1111_ADDR_MASK ((1<<SA1111_ADDR_WIDTH)-1)
-#define SA1111_DMA_ADDR(x) ((x)&SA1111_ADDR_MASK)
-
-/*
* Don't ask the (SAC) DMA engines to move less than this amount.
*/
@@ -417,7 +390,7 @@ struct sa1111_dev {
struct resource res;
void __iomem *mapbase;
unsigned int skpcr_mask;
- unsigned int irq[6];
+ unsigned int hwirq[6];
u64 dma_mask;
};
@@ -431,9 +404,6 @@ struct sa1111_driver {
unsigned int devid;
int (*probe)(struct sa1111_dev *);
int (*remove)(struct sa1111_dev *);
- int (*suspend)(struct sa1111_dev *, pm_message_t);
- int (*resume)(struct sa1111_dev *);
- void (*shutdown)(struct sa1111_dev *);
};
#define SA1111_DRV(_d) container_of((_d), struct sa1111_driver, drv)
diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
index c8781450905b..3ab8b3781bfe 100644
--- a/arch/arm/include/asm/kvm_arm.h
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -161,8 +161,7 @@
#else
#define VTTBR_X (5 - KVM_T0SZ)
#endif
-#define VTTBR_BADDR_SHIFT (VTTBR_X - 1)
-#define VTTBR_BADDR_MASK (((_AC(1, ULL) << (40 - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT)
+#define VTTBR_BADDR_MASK (((_AC(1, ULL) << (40 - VTTBR_X)) - 1) << VTTBR_X)
#define VTTBR_VMID_SHIFT _AC(48, ULL)
#define VTTBR_VMID_MASK(size) (_AT(u64, (1 << size) - 1) << VTTBR_VMID_SHIFT)
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 242151ea6908..a9f7d3f47134 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -285,6 +285,11 @@ static inline void kvm_arm_init_debug(void) {}
static inline void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) {}
static inline void kvm_arm_clear_debug(struct kvm_vcpu *vcpu) {}
static inline void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu) {}
+static inline bool kvm_arm_handle_step_debug(struct kvm_vcpu *vcpu,
+ struct kvm_run *run)
+{
+ return false;
+}
int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu,
struct kvm_device_attr *attr);
diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
index 1f54e4e98c1e..496667703693 100644
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -88,6 +88,7 @@
#else /* CONFIG_MMU */
#ifndef __ASSEMBLY__
+extern unsigned long setup_vectors_base(void);
extern unsigned long vectors_base;
#define VECTORS_BASE vectors_base
#endif
diff --git a/arch/arm/include/asm/ptdump.h b/arch/arm/include/asm/ptdump.h
new file mode 100644
index 000000000000..3ebf9718288d
--- /dev/null
+++ b/arch/arm/include/asm/ptdump.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2014 ARM Ltd. */
+#ifndef __ASM_PTDUMP_H
+#define __ASM_PTDUMP_H
+
+#ifdef CONFIG_ARM_PTDUMP_CORE
+
+#include <linux/mm_types.h>
+#include <linux/seq_file.h>
+
+struct addr_marker {
+ unsigned long start_address;
+ char *name;
+};
+
+struct ptdump_info {
+ struct mm_struct *mm;
+ const struct addr_marker *markers;
+ unsigned long base_addr;
+};
+
+void ptdump_walk_pgd(struct seq_file *s, struct ptdump_info *info);
+#ifdef CONFIG_ARM_PTDUMP_DEBUGFS
+int ptdump_debugfs_register(struct ptdump_info *info, const char *name);
+#else
+static inline int ptdump_debugfs_register(struct ptdump_info *info,
+ const char *name)
+{
+ return 0;
+}
+#endif /* CONFIG_ARM_PTDUMP_DEBUGFS */
+
+void ptdump_check_wx(void);
+
+#endif /* CONFIG_ARM_PTDUMP_CORE */
+
+#ifdef CONFIG_DEBUG_WX
+#define debug_checkwx() ptdump_check_wx()
+#else
+#define debug_checkwx() do { } while (0)
+#endif
+
+#endif /* __ASM_PTDUMP_H */
diff --git a/arch/arm/include/asm/sections.h b/arch/arm/include/asm/sections.h
index 63dfe1f10335..4ceb4f757d4d 100644
--- a/arch/arm/include/asm/sections.h
+++ b/arch/arm/include/asm/sections.h
@@ -6,4 +6,25 @@
extern char _exiprom[];
+extern char __idmap_text_start[];
+extern char __idmap_text_end[];
+extern char __entry_text_start[];
+extern char __entry_text_end[];
+extern char __hyp_idmap_text_start[];
+extern char __hyp_idmap_text_end[];
+
+static inline bool in_entry_text(unsigned long addr)
+{
+ return memory_contains(__entry_text_start, __entry_text_end,
+ (void *)addr, 1);
+}
+
+static inline bool in_idmap_text(unsigned long addr)
+{
+ void *a = (void *)addr;
+ return memory_contains(__idmap_text_start, __idmap_text_end, a, 1) ||
+ memory_contains(__hyp_idmap_text_start, __hyp_idmap_text_end,
+ a, 1);
+}
+
#endif /* _ASM_ARM_SECTIONS_H */
diff --git a/arch/arm/include/asm/string.h b/arch/arm/include/asm/string.h
index f54a3136aac6..111a1d8a41dd 100644
--- a/arch/arm/include/asm/string.h
+++ b/arch/arm/include/asm/string.h
@@ -39,18 +39,4 @@ static inline void *memset64(uint64_t *p, uint64_t v, __kernel_size_t n)
return __memset64(p, v, n * 8, v >> 32);
}
-extern void __memzero(void *ptr, __kernel_size_t n);
-
-#define memset(p,v,n) \
- ({ \
- void *__p = (p); size_t __n = n; \
- if ((__n) != 0) { \
- if (__builtin_constant_p((v)) && (v) == 0) \
- __memzero((__p),(__n)); \
- else \
- memset((__p),(v),(__n)); \
- } \
- (__p); \
- })
-
#endif
diff --git a/arch/arm/include/asm/traps.h b/arch/arm/include/asm/traps.h
index f9a6c5fc3fd1..a00288d75ee6 100644
--- a/arch/arm/include/asm/traps.h
+++ b/arch/arm/include/asm/traps.h
@@ -28,18 +28,6 @@ static inline int __in_irqentry_text(unsigned long ptr)
ptr < (unsigned long)&__irqentry_text_end;
}
-static inline int in_exception_text(unsigned long ptr)
-{
- extern char __exception_text_start[];
- extern char __exception_text_end[];
- int in;
-
- in = ptr >= (unsigned long)&__exception_text_start &&
- ptr < (unsigned long)&__exception_text_end;
-
- return in ? : __in_irqentry_text(ptr);
-}
-
extern void __init early_trap_init(void *);
extern void dump_backtrace_entry(unsigned long where, unsigned long from, unsigned long frame);
extern void ptrace_break(struct task_struct *tsk, struct pt_regs *regs);
diff --git a/arch/arm/include/asm/unified.h b/arch/arm/include/asm/unified.h
index a91ae499614c..2c3b952be63e 100644
--- a/arch/arm/include/asm/unified.h
+++ b/arch/arm/include/asm/unified.h
@@ -20,8 +20,10 @@
#ifndef __ASM_UNIFIED_H
#define __ASM_UNIFIED_H
-#if defined(__ASSEMBLY__) && defined(CONFIG_ARM_ASM_UNIFIED)
+#if defined(__ASSEMBLY__)
.syntax unified
+#else
+__asm__(".syntax unified");
#endif
#ifdef CONFIG_CPU_V7M
@@ -64,77 +66,4 @@
#endif /* CONFIG_THUMB2_KERNEL */
-#ifndef CONFIG_ARM_ASM_UNIFIED
-
-/*
- * If the unified assembly syntax isn't used (in ARM mode), these
- * macros expand to an empty string
- */
-#ifdef __ASSEMBLY__
- .macro it, cond
- .endm
- .macro itt, cond
- .endm
- .macro ite, cond
- .endm
- .macro ittt, cond
- .endm
- .macro itte, cond
- .endm
- .macro itet, cond
- .endm
- .macro itee, cond
- .endm
- .macro itttt, cond
- .endm
- .macro ittte, cond
- .endm
- .macro ittet, cond
- .endm
- .macro ittee, cond
- .endm
- .macro itett, cond
- .endm
- .macro itete, cond
- .endm
- .macro iteet, cond
- .endm
- .macro iteee, cond
- .endm
-#else /* !__ASSEMBLY__ */
-__asm__(
-" .macro it, cond\n"
-" .endm\n"
-" .macro itt, cond\n"
-" .endm\n"
-" .macro ite, cond\n"
-" .endm\n"
-" .macro ittt, cond\n"
-" .endm\n"
-" .macro itte, cond\n"
-" .endm\n"
-" .macro itet, cond\n"
-" .endm\n"
-" .macro itee, cond\n"
-" .endm\n"
-" .macro itttt, cond\n"
-" .endm\n"
-" .macro ittte, cond\n"
-" .endm\n"
-" .macro ittet, cond\n"
-" .endm\n"
-" .macro ittee, cond\n"
-" .endm\n"
-" .macro itett, cond\n"
-" .endm\n"
-" .macro itete, cond\n"
-" .endm\n"
-" .macro iteet, cond\n"
-" .endm\n"
-" .macro iteee, cond\n"
-" .endm\n");
-#endif /* __ASSEMBLY__ */
-
-#endif /* CONFIG_ARM_ASM_UNIFIED */
-
#endif /* !__ASM_UNIFIED_H */
diff --git a/arch/arm/include/uapi/asm/Kbuild b/arch/arm/include/uapi/asm/Kbuild
index 4d53de308ee0..4d1cc1847edf 100644
--- a/arch/arm/include/uapi/asm/Kbuild
+++ b/arch/arm/include/uapi/asm/Kbuild
@@ -7,6 +7,7 @@ generated-y += unistd-oabi.h
generated-y += unistd-eabi.h
generic-y += bitsperlong.h
+generic-y += bpf_perf_event.h
generic-y += errno.h
generic-y += ioctl.h
generic-y += ipcbuf.h
diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c
index 5266fd9ad6b4..783fbb4de5f9 100644
--- a/arch/arm/kernel/armksyms.c
+++ b/arch/arm/kernel/armksyms.c
@@ -92,7 +92,6 @@ EXPORT_SYMBOL(__memset64);
EXPORT_SYMBOL(memcpy);
EXPORT_SYMBOL(memmove);
EXPORT_SYMBOL(memchr);
-EXPORT_SYMBOL(__memzero);
EXPORT_SYMBOL(mmioset);
EXPORT_SYMBOL(mmiocpy);
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index fbc707626b3e..1752033b0070 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -82,11 +82,7 @@
#endif
.endm
-#ifdef CONFIG_KPROBES
- .section .kprobes.text,"ax",%progbits
-#else
- .text
-#endif
+ .section .entry.text,"ax",%progbits
/*
* Invalid mode handlers
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index e655dcd0a933..3c4f88701f22 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -37,6 +37,7 @@ saved_pc .req lr
#define TRACE(x...)
#endif
+ .section .entry.text,"ax",%progbits
.align 5
#if !(IS_ENABLED(CONFIG_TRACE_IRQFLAGS) || IS_ENABLED(CONFIG_CONTEXT_TRACKING))
/*
diff --git a/arch/arm/kernel/head-common.S b/arch/arm/kernel/head-common.S
index 21dde771a7dd..6e0375e7db05 100644
--- a/arch/arm/kernel/head-common.S
+++ b/arch/arm/kernel/head-common.S
@@ -105,8 +105,9 @@ __mmap_switched:
ARM( ldmia r4!, {r0, r1, sp} )
THUMB( ldmia r4!, {r0, r1, r3} )
THUMB( mov sp, r3 )
- sub r1, r1, r0
- bl __memzero @ clear .bss
+ sub r2, r1, r0
+ mov r1, #0
+ bl memset @ clear .bss
ldmia r4, {r0, r1, r2, r3}
str r9, [r0] @ Save processor ID
diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c
index af2a7f1e3103..629e25152c0d 100644
--- a/arch/arm/kernel/hw_breakpoint.c
+++ b/arch/arm/kernel/hw_breakpoint.c
@@ -44,17 +44,17 @@ static DEFINE_PER_CPU(struct perf_event *, bp_on_reg[ARM_MAX_BRP]);
static DEFINE_PER_CPU(struct perf_event *, wp_on_reg[ARM_MAX_WRP]);
/* Number of BRP/WRP registers on this CPU. */
-static int core_num_brps;
-static int core_num_wrps;
+static int core_num_brps __ro_after_init;
+static int core_num_wrps __ro_after_init;
/* Debug architecture version. */
-static u8 debug_arch;
+static u8 debug_arch __ro_after_init;
/* Does debug architecture support OS Save and Restore? */
-static bool has_ossr;
+static bool has_ossr __ro_after_init;
/* Maximum supported watchpoint length. */
-static u8 max_watchpoint_len;
+static u8 max_watchpoint_len __ro_after_init;
#define READ_WB_REG_CASE(OP2, M, VAL) \
case ((OP2 << 4) + M): \
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index b4fbf00ee4ad..2da087926ebe 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -379,6 +379,9 @@ asmlinkage void secondary_start_kernel(void)
cpu_init();
+#ifndef CONFIG_MMU
+ setup_vectors_base();
+#endif
pr_debug("CPU%u: Booted secondary processor\n", cpu);
preempt_disable();
diff --git a/arch/arm/kernel/stacktrace.c b/arch/arm/kernel/stacktrace.c
index 65228bf4c6df..a56e7c856ab5 100644
--- a/arch/arm/kernel/stacktrace.c
+++ b/arch/arm/kernel/stacktrace.c
@@ -3,6 +3,7 @@
#include <linux/sched/debug.h>
#include <linux/stacktrace.h>
+#include <asm/sections.h>
#include <asm/stacktrace.h>
#include <asm/traps.h>
@@ -63,7 +64,6 @@ EXPORT_SYMBOL(walk_stackframe);
#ifdef CONFIG_STACKTRACE
struct stack_trace_data {
struct stack_trace *trace;
- unsigned long last_pc;
unsigned int no_sched_functions;
unsigned int skip;
};
@@ -87,16 +87,7 @@ static int save_trace(struct stackframe *frame, void *d)
if (trace->nr_entries >= trace->max_entries)
return 1;
- /*
- * in_exception_text() is designed to test if the PC is one of
- * the functions which has an exception stack above it, but
- * unfortunately what is in frame->pc is the return LR value,
- * not the saved PC value. So, we need to track the previous
- * frame PC value when doing this.
- */
- addr = data->last_pc;
- data->last_pc = frame->pc;
- if (!in_exception_text(addr))
+ if (!in_entry_text(frame->pc))
return 0;
regs = (struct pt_regs *)frame->sp;
@@ -114,7 +105,6 @@ static noinline void __save_stack_trace(struct task_struct *tsk,
struct stackframe frame;
data.trace = trace;
- data.last_pc = ULONG_MAX;
data.skip = trace->skip;
data.no_sched_functions = nosched;
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 5cf04888c581..e344bdd2e5ac 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -72,7 +72,7 @@ void dump_backtrace_entry(unsigned long where, unsigned long from, unsigned long
printk("Function entered at [<%08lx>] from [<%08lx>]\n", where, from);
#endif
- if (in_exception_text(where))
+ if (in_entry_text(from))
dump_mem("", "Exception stack", frame + 4, frame + 4 + sizeof(struct pt_regs));
}
@@ -433,7 +433,7 @@ static int call_undef_hook(struct pt_regs *regs, unsigned int instr)
return fn ? fn(regs, instr) : 1;
}
-asmlinkage void __exception do_undefinstr(struct pt_regs *regs)
+asmlinkage void do_undefinstr(struct pt_regs *regs)
{
unsigned int instr;
siginfo_t info;
diff --git a/arch/arm/kernel/vmlinux-xip.lds.S b/arch/arm/kernel/vmlinux-xip.lds.S
index ec4b3f94ad80..12b87591eb7c 100644
--- a/arch/arm/kernel/vmlinux-xip.lds.S
+++ b/arch/arm/kernel/vmlinux-xip.lds.S
@@ -96,9 +96,9 @@ SECTIONS
.text : { /* Real text segment */
_stext = .; /* Text and read-only data */
IDMAP_TEXT
- __exception_text_start = .;
- *(.exception.text)
- __exception_text_end = .;
+ __entry_text_start = .;
+ *(.entry.text)
+ __entry_text_end = .;
IRQENTRY_TEXT
TEXT_TEXT
SCHED_TEXT
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index ee53f6518872..84a1ae3ce46e 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -105,9 +105,9 @@ SECTIONS
.text : { /* Real text segment */
_stext = .; /* Text and read-only data */
IDMAP_TEXT
- __exception_text_start = .;
- *(.exception.text)
- __exception_text_end = .;
+ __entry_text_start = .;
+ *(.entry.text)
+ __entry_text_end = .;
IRQENTRY_TEXT
SOFTIRQENTRY_TEXT
TEXT_TEXT
diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile
index 4cb0b9624d8f..ad25fd1872c7 100644
--- a/arch/arm/lib/Makefile
+++ b/arch/arm/lib/Makefile
@@ -8,7 +8,7 @@
lib-y := backtrace.o changebit.o csumipv6.o csumpartial.o \
csumpartialcopy.o csumpartialcopyuser.o clearbit.o \
delay.o delay-loop.o findbit.o memchr.o memcpy.o \
- memmove.o memset.o memzero.o setbit.o \
+ memmove.o memset.o setbit.o \
strchr.o strrchr.o \
testchangebit.o testclearbit.o testsetbit.o \
ashldi3.o ashrdi3.o lshrdi3.o muldi3.o \
diff --git a/arch/arm/lib/memzero.S b/arch/arm/lib/memzero.S
deleted file mode 100644
index 0eded952e089..000000000000
--- a/arch/arm/lib/memzero.S
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- * linux/arch/arm/lib/memzero.S
- *
- * Copyright (C) 1995-2000 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <asm/unwind.h>
-
- .text
- .align 5
- .word 0
-/*
- * Align the pointer in r0. r3 contains the number of bytes that we are
- * mis-aligned by, and r1 is the number of bytes. If r1 < 4, then we
- * don't bother; we use byte stores instead.
- */
-UNWIND( .fnstart )
-1: subs r1, r1, #4 @ 1 do we have enough
- blt 5f @ 1 bytes to align with?
- cmp r3, #2 @ 1
- strltb r2, [r0], #1 @ 1
- strleb r2, [r0], #1 @ 1
- strb r2, [r0], #1 @ 1
- add r1, r1, r3 @ 1 (r1 = r1 - (4 - r3))
-/*
- * The pointer is now aligned and the length is adjusted. Try doing the
- * memzero again.
- */
-
-ENTRY(__memzero)
- mov r2, #0 @ 1
- ands r3, r0, #3 @ 1 unaligned?
- bne 1b @ 1
-/*
- * r3 = 0, and we know that the pointer in r0 is aligned to a word boundary.
- */
- cmp r1, #16 @ 1 we can skip this chunk if we
- blt 4f @ 1 have < 16 bytes
-
-#if ! CALGN(1)+0
-
-/*
- * We need an extra register for this loop - save the return address and
- * use the LR
- */
- str lr, [sp, #-4]! @ 1
-UNWIND( .fnend )
-UNWIND( .fnstart )
-UNWIND( .save {lr} )
- mov ip, r2 @ 1
- mov lr, r2 @ 1
-
-3: subs r1, r1, #64 @ 1 write 32 bytes out per loop
- stmgeia r0!, {r2, r3, ip, lr} @ 4
- stmgeia r0!, {r2, r3, ip, lr} @ 4
- stmgeia r0!, {r2, r3, ip, lr} @ 4
- stmgeia r0!, {r2, r3, ip, lr} @ 4
- bgt 3b @ 1
- ldmeqfd sp!, {pc} @ 1/2 quick exit
-/*
- * No need to correct the count; we're only testing bits from now on
- */
- tst r1, #32 @ 1
- stmneia r0!, {r2, r3, ip, lr} @ 4
- stmneia r0!, {r2, r3, ip, lr} @ 4
- tst r1, #16 @ 1 16 bytes or more?
- stmneia r0!, {r2, r3, ip, lr} @ 4
- ldr lr, [sp], #4 @ 1
-UNWIND( .fnend )
-
-#else
-
-/*
- * This version aligns the destination pointer in order to write
- * whole cache lines at once.
- */
-
- stmfd sp!, {r4-r7, lr}
-UNWIND( .fnend )
-UNWIND( .fnstart )
-UNWIND( .save {r4-r7, lr} )
- mov r4, r2
- mov r5, r2
- mov r6, r2
- mov r7, r2
- mov ip, r2
- mov lr, r2
-
- cmp r1, #96
- andgts ip, r0, #31
- ble 3f
-
- rsb ip, ip, #32
- sub r1, r1, ip
- movs ip, ip, lsl #(32 - 4)
- stmcsia r0!, {r4, r5, r6, r7}
- stmmiia r0!, {r4, r5}
- movs ip, ip, lsl #2
- strcs r2, [r0], #4
-
-3: subs r1, r1, #64
- stmgeia r0!, {r2-r7, ip, lr}
- stmgeia r0!, {r2-r7, ip, lr}
- bgt 3b
- ldmeqfd sp!, {r4-r7, pc}
-
- tst r1, #32
- stmneia r0!, {r2-r7, ip, lr}
- tst r1, #16
- stmneia r0!, {r4-r7}
- ldmfd sp!, {r4-r7, lr}
-UNWIND( .fnend )
-
-#endif
-
-UNWIND( .fnstart )
-4: tst r1, #8 @ 1 8 bytes or more?
- stmneia r0!, {r2, r3} @ 2
- tst r1, #4 @ 1 4 bytes or more?
- strne r2, [r0], #4 @ 1
-/*
- * When we get here, we've got less than 4 bytes to zero. We
- * may have an unaligned pointer as well.
- */
-5: tst r1, #2 @ 1 2 bytes or more?
- strneb r2, [r0], #1 @ 1
- strneb r2, [r0], #1 @ 1
- tst r1, #1 @ 1 a byte left over
- strneb r2, [r0], #1 @ 1
- ret lr @ 1
-UNWIND( .fnend )
-ENDPROC(__memzero)
diff --git a/arch/arm/mach-meson/platsmp.c b/arch/arm/mach-meson/platsmp.c
index 2555f9056a33..cad7ee8f0d6b 100644
--- a/arch/arm/mach-meson/platsmp.c
+++ b/arch/arm/mach-meson/platsmp.c
@@ -102,7 +102,7 @@ static void __init meson_smp_prepare_cpus(const char *scu_compatible,
scu_base = of_iomap(node, 0);
if (!scu_base) {
- pr_err("Couln't map SCU registers\n");
+ pr_err("Couldn't map SCU registers\n");
return;
}
diff --git a/arch/arm/mach-omap2/cm_common.c b/arch/arm/mach-omap2/cm_common.c
index d555791cf349..83c6fa74cc31 100644
--- a/arch/arm/mach-omap2/cm_common.c
+++ b/arch/arm/mach-omap2/cm_common.c
@@ -68,14 +68,17 @@ void __init omap2_set_globals_cm(void __iomem *cm, void __iomem *cm2)
int cm_split_idlest_reg(struct clk_omap_reg *idlest_reg, s16 *prcm_inst,
u8 *idlest_reg_id)
{
+ int ret;
if (!cm_ll_data->split_idlest_reg) {
WARN_ONCE(1, "cm: %s: no low-level function defined\n",
__func__);
return -EINVAL;
}
- return cm_ll_data->split_idlest_reg(idlest_reg, prcm_inst,
+ ret = cm_ll_data->split_idlest_reg(idlest_reg, prcm_inst,
idlest_reg_id);
+ *prcm_inst -= cm_base.offset;
+ return ret;
}
/**
@@ -337,6 +340,7 @@ int __init omap2_cm_base_init(void)
if (mem) {
mem->pa = res.start + data->offset;
mem->va = data->mem + data->offset;
+ mem->offset = data->offset;
}
data->np = np;
diff --git a/arch/arm/mach-omap2/omap-secure.c b/arch/arm/mach-omap2/omap-secure.c
index 5ac122e88f67..fa7f308c9027 100644
--- a/arch/arm/mach-omap2/omap-secure.c
+++ b/arch/arm/mach-omap2/omap-secure.c
@@ -73,6 +73,27 @@ phys_addr_t omap_secure_ram_mempool_base(void)
return omap_secure_memblock_base;
}
+#if defined(CONFIG_ARCH_OMAP3) && defined(CONFIG_PM)
+u32 omap3_save_secure_ram(void __iomem *addr, int size)
+{
+ u32 ret;
+ u32 param[5];
+
+ if (size != OMAP3_SAVE_SECURE_RAM_SZ)
+ return OMAP3_SAVE_SECURE_RAM_SZ;
+
+ param[0] = 4; /* Number of arguments */
+ param[1] = __pa(addr); /* Physical address for saving */
+ param[2] = 0;
+ param[3] = 1;
+ param[4] = 1;
+
+ ret = save_secure_ram_context(__pa(param));
+
+ return ret;
+}
+#endif
+
/**
* rx51_secure_dispatcher: Routine to dispatch secure PPA API calls
* @idx: The PPA API index
diff --git a/arch/arm/mach-omap2/omap-secure.h b/arch/arm/mach-omap2/omap-secure.h
index bae263fba640..c509cde71f93 100644
--- a/arch/arm/mach-omap2/omap-secure.h
+++ b/arch/arm/mach-omap2/omap-secure.h
@@ -31,6 +31,8 @@
/* Maximum Secure memory storage size */
#define OMAP_SECURE_RAM_STORAGE (88 * SZ_1K)
+#define OMAP3_SAVE_SECURE_RAM_SZ 0x803F
+
/* Secure low power HAL API index */
#define OMAP4_HAL_SAVESECURERAM_INDEX 0x1a
#define OMAP4_HAL_SAVEHW_INDEX 0x1b
@@ -65,6 +67,8 @@ extern u32 omap_smc2(u32 id, u32 falg, u32 pargs);
extern u32 omap_smc3(u32 id, u32 process, u32 flag, u32 pargs);
extern phys_addr_t omap_secure_ram_mempool_base(void);
extern int omap_secure_ram_reserve_memblock(void);
+extern u32 save_secure_ram_context(u32 args_pa);
+extern u32 omap3_save_secure_ram(void __iomem *save_regs, int size);
extern u32 rx51_secure_dispatcher(u32 idx, u32 process, u32 flag, u32 nargs,
u32 arg1, u32 arg2, u32 arg3, u32 arg4);
diff --git a/arch/arm/mach-omap2/omap_device.c b/arch/arm/mach-omap2/omap_device.c
index d45cbfdb4be6..f0388058b7da 100644
--- a/arch/arm/mach-omap2/omap_device.c
+++ b/arch/arm/mach-omap2/omap_device.c
@@ -391,10 +391,8 @@ omap_device_copy_resources(struct omap_hwmod *oh,
const char *name;
int error, irq = 0;
- if (!oh || !oh->od || !oh->od->pdev) {
- error = -EINVAL;
- goto error;
- }
+ if (!oh || !oh->od || !oh->od->pdev)
+ return -EINVAL;
np = oh->od->pdev->dev.of_node;
if (!np) {
@@ -516,8 +514,10 @@ struct platform_device __init *omap_device_build(const char *pdev_name,
goto odbs_exit1;
od = omap_device_alloc(pdev, &oh, 1);
- if (IS_ERR(od))
+ if (IS_ERR(od)) {
+ ret = PTR_ERR(od);
goto odbs_exit1;
+ }
ret = platform_device_add_data(pdev, pdata, pdata_len);
if (ret)
diff --git a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
index d2106ae4410a..52c9d585b44d 100644
--- a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
@@ -1646,6 +1646,7 @@ static struct omap_hwmod omap3xxx_mmc3_hwmod = {
.main_clk = "mmchs3_fck",
.prcm = {
.omap2 = {
+ .module_offs = CORE_MOD,
.prcm_reg_id = 1,
.module_bit = OMAP3430_EN_MMC3_SHIFT,
.idlest_reg_id = 1,
diff --git a/arch/arm/mach-omap2/pm.h b/arch/arm/mach-omap2/pm.h
index b668719b9b25..8e30772cfe32 100644
--- a/arch/arm/mach-omap2/pm.h
+++ b/arch/arm/mach-omap2/pm.h
@@ -81,10 +81,6 @@ extern unsigned int omap3_do_wfi_sz;
/* ... and its pointer from SRAM after copy */
extern void (*omap3_do_wfi_sram)(void);
-/* save_secure_ram_context function pointer and size, for copy to SRAM */
-extern int save_secure_ram_context(u32 *addr);
-extern unsigned int save_secure_ram_context_sz;
-
extern void omap3_save_scratchpad_contents(void);
#define PM_RTA_ERRATUM_i608 (1 << 0)
diff --git a/arch/arm/mach-omap2/pm34xx.c b/arch/arm/mach-omap2/pm34xx.c
index 841ba19d64a6..36c55547137c 100644
--- a/arch/arm/mach-omap2/pm34xx.c
+++ b/arch/arm/mach-omap2/pm34xx.c
@@ -48,6 +48,7 @@
#include "prm3xxx.h"
#include "pm.h"
#include "sdrc.h"
+#include "omap-secure.h"
#include "sram.h"
#include "control.h"
#include "vc.h"
@@ -66,7 +67,6 @@ struct power_state {
static LIST_HEAD(pwrst_list);
-static int (*_omap_save_secure_sram)(u32 *addr);
void (*omap3_do_wfi_sram)(void);
static struct powerdomain *mpu_pwrdm, *neon_pwrdm;
@@ -121,8 +121,8 @@ static void omap3_save_secure_ram_context(void)
* will hang the system.
*/
pwrdm_set_next_pwrst(mpu_pwrdm, PWRDM_POWER_ON);
- ret = _omap_save_secure_sram((u32 *)(unsigned long)
- __pa(omap3_secure_ram_storage));
+ ret = omap3_save_secure_ram(omap3_secure_ram_storage,
+ OMAP3_SAVE_SECURE_RAM_SZ);
pwrdm_set_next_pwrst(mpu_pwrdm, mpu_next_state);
/* Following is for error tracking, it should not happen */
if (ret) {
@@ -434,15 +434,10 @@ static int __init pwrdms_setup(struct powerdomain *pwrdm, void *unused)
*
* The minimum set of functions is pushed to SRAM for execution:
* - omap3_do_wfi for erratum i581 WA,
- * - save_secure_ram_context for security extensions.
*/
void omap_push_sram_idle(void)
{
omap3_do_wfi_sram = omap_sram_push(omap3_do_wfi, omap3_do_wfi_sz);
-
- if (omap_type() != OMAP2_DEVICE_TYPE_GP)
- _omap_save_secure_sram = omap_sram_push(save_secure_ram_context,
- save_secure_ram_context_sz);
}
static void __init pm_errata_configure(void)
@@ -553,7 +548,7 @@ int __init omap3_pm_init(void)
clkdm_add_wkdep(neon_clkdm, mpu_clkdm);
if (omap_type() != OMAP2_DEVICE_TYPE_GP) {
omap3_secure_ram_storage =
- kmalloc(0x803F, GFP_KERNEL);
+ kmalloc(OMAP3_SAVE_SECURE_RAM_SZ, GFP_KERNEL);
if (!omap3_secure_ram_storage)
pr_err("Memory allocation failed when allocating for secure sram context\n");
diff --git a/arch/arm/mach-omap2/prcm-common.h b/arch/arm/mach-omap2/prcm-common.h
index 0592b23902c6..0977da0dab76 100644
--- a/arch/arm/mach-omap2/prcm-common.h
+++ b/arch/arm/mach-omap2/prcm-common.h
@@ -528,6 +528,7 @@ struct omap_prcm_irq_setup {
struct omap_domain_base {
u32 pa;
void __iomem *va;
+ s16 offset;
};
/**
diff --git a/arch/arm/mach-omap2/prm33xx.c b/arch/arm/mach-omap2/prm33xx.c
index d2c5bcabdbeb..ebaf80d72a10 100644
--- a/arch/arm/mach-omap2/prm33xx.c
+++ b/arch/arm/mach-omap2/prm33xx.c
@@ -176,17 +176,6 @@ static int am33xx_pwrdm_read_pwrst(struct powerdomain *pwrdm)
return v;
}
-static int am33xx_pwrdm_read_prev_pwrst(struct powerdomain *pwrdm)
-{
- u32 v;
-
- v = am33xx_prm_read_reg(pwrdm->prcm_offs, pwrdm->pwrstst_offs);
- v &= AM33XX_LASTPOWERSTATEENTERED_MASK;
- v >>= AM33XX_LASTPOWERSTATEENTERED_SHIFT;
-
- return v;
-}
-
static int am33xx_pwrdm_set_lowpwrstchange(struct powerdomain *pwrdm)
{
am33xx_prm_rmw_reg_bits(AM33XX_LOWPOWERSTATECHANGE_MASK,
@@ -357,7 +346,6 @@ struct pwrdm_ops am33xx_pwrdm_operations = {
.pwrdm_set_next_pwrst = am33xx_pwrdm_set_next_pwrst,
.pwrdm_read_next_pwrst = am33xx_pwrdm_read_next_pwrst,
.pwrdm_read_pwrst = am33xx_pwrdm_read_pwrst,
- .pwrdm_read_prev_pwrst = am33xx_pwrdm_read_prev_pwrst,
.pwrdm_set_logic_retst = am33xx_pwrdm_set_logic_retst,
.pwrdm_read_logic_pwrst = am33xx_pwrdm_read_logic_pwrst,
.pwrdm_read_logic_retst = am33xx_pwrdm_read_logic_retst,
diff --git a/arch/arm/mach-omap2/sleep34xx.S b/arch/arm/mach-omap2/sleep34xx.S
index fa5fd24f524c..22daf4efed68 100644
--- a/arch/arm/mach-omap2/sleep34xx.S
+++ b/arch/arm/mach-omap2/sleep34xx.S
@@ -93,20 +93,13 @@ ENTRY(enable_omap3630_toggle_l2_on_restore)
ENDPROC(enable_omap3630_toggle_l2_on_restore)
/*
- * Function to call rom code to save secure ram context. This gets
- * relocated to SRAM, so it can be all in .data section. Otherwise
- * we need to initialize api_params separately.
+ * Function to call rom code to save secure ram context.
+ *
+ * r0 = physical address of the parameters
*/
- .data
- .align 3
ENTRY(save_secure_ram_context)
stmfd sp!, {r4 - r11, lr} @ save registers on stack
- adr r3, api_params @ r3 points to parameters
- str r0, [r3,#0x4] @ r0 has sdram address
- ldr r12, high_mask
- and r3, r3, r12
- ldr r12, sram_phy_addr_mask
- orr r3, r3, r12
+ mov r3, r0 @ physical address of parameters
mov r0, #25 @ set service ID for PPA
mov r12, r0 @ copy secure service ID in r12
mov r1, #0 @ set task id for ROM code in r1
@@ -120,18 +113,7 @@ ENTRY(save_secure_ram_context)
nop
nop
ldmfd sp!, {r4 - r11, pc}
- .align
-sram_phy_addr_mask:
- .word SRAM_BASE_P
-high_mask:
- .word 0xffff
-api_params:
- .word 0x4, 0x0, 0x0, 0x1, 0x1
ENDPROC(save_secure_ram_context)
-ENTRY(save_secure_ram_context_sz)
- .word . - save_secure_ram_context
-
- .text
/*
* ======================
diff --git a/arch/arm/mach-sa1100/Kconfig b/arch/arm/mach-sa1100/Kconfig
index 36e3c79f4973..07df3a59b13f 100644
--- a/arch/arm/mach-sa1100/Kconfig
+++ b/arch/arm/mach-sa1100/Kconfig
@@ -5,6 +5,7 @@ menu "SA11x0 Implementations"
config SA1100_ASSABET
bool "Assabet"
select ARM_SA1110_CPUFREQ
+ select GPIO_REG
help
Say Y here if you are using the Intel(R) StrongARM(R) SA-1110
Microprocessor Development Board (also known as the Assabet).
diff --git a/arch/arm/mach-sa1100/assabet.c b/arch/arm/mach-sa1100/assabet.c
index d28ecb9ef172..f68241d995f2 100644
--- a/arch/arm/mach-sa1100/assabet.c
+++ b/arch/arm/mach-sa1100/assabet.c
@@ -13,6 +13,7 @@
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/errno.h>
+#include <linux/gpio/gpio-reg.h>
#include <linux/ioport.h>
#include <linux/platform_data/sa11x0-serial.h>
#include <linux/serial_core.h>
@@ -61,20 +62,45 @@
unsigned long SCR_value = ASSABET_SCR_INIT;
EXPORT_SYMBOL(SCR_value);
-static unsigned long BCR_value = ASSABET_BCR_DB1110;
+static struct gpio_chip *assabet_bcr_gc;
+
+static const char *assabet_names[] = {
+ "cf_pwr", "cf_gfx_reset", "nsoft_reset", "irda_fsel",
+ "irda_md0", "irda_md1", "stereo_loopback", "ncf_bus_on",
+ "audio_pwr_on", "light_pwr_on", "lcd16data", "lcd_pwr_on",
+ "rs232_on", "nred_led", "ngreen_led", "vib_on",
+ "com_dtr", "com_rts", "radio_wake_mod", "i2c_enab",
+ "tvir_enab", "qmute", "radio_pwr_on", "spkr_off",
+ "rs232_valid", "com_dcd", "com_cts", "com_dsr",
+ "radio_cts", "radio_dsr", "radio_dcd", "radio_ri",
+};
+/* The old deprecated interface */
void ASSABET_BCR_frob(unsigned int mask, unsigned int val)
{
- unsigned long flags;
+ unsigned long m = mask, v = val;
- local_irq_save(flags);
- BCR_value = (BCR_value & ~mask) | val;
- ASSABET_BCR = BCR_value;
- local_irq_restore(flags);
+ assabet_bcr_gc->set_multiple(assabet_bcr_gc, &m, &v);
}
-
EXPORT_SYMBOL(ASSABET_BCR_frob);
+static int __init assabet_init_gpio(void __iomem *reg, u32 def_val)
+{
+ struct gpio_chip *gc;
+
+ writel_relaxed(def_val, reg);
+
+ gc = gpio_reg_init(NULL, reg, -1, 32, "assabet", 0xff000000, def_val,
+ assabet_names, NULL, NULL);
+
+ if (IS_ERR(gc))
+ return PTR_ERR(gc);
+
+ assabet_bcr_gc = gc;
+
+ return gc->base;
+}
+
/*
* The codec reset goes to three devices, so we need to release
* the rest when any one of these requests it. However, that
@@ -146,7 +172,7 @@ static void adv7171_write(unsigned reg, unsigned val)
unsigned gpdr = GPDR;
unsigned gplr = GPLR;
- ASSABET_BCR = BCR_value | ASSABET_BCR_AUDIO_ON;
+ ASSABET_BCR_frob(ASSABET_BCR_AUDIO_ON, ASSABET_BCR_AUDIO_ON);
udelay(100);
GPCR = SDA | SCK | MOD; /* clear L3 mode to ensure UDA1341 doesn't respond */
@@ -457,14 +483,6 @@ static void __init assabet_init(void)
sa11x0_ppc_configure_mcp();
if (machine_has_neponset()) {
- /*
- * Angel sets this, but other bootloaders may not.
- *
- * This must precede any driver calls to BCR_set()
- * or BCR_clear().
- */
- ASSABET_BCR = BCR_value = ASSABET_BCR_DB1111;
-
#ifndef CONFIG_ASSABET_NEPONSET
printk( "Warning: Neponset detected but full support "
"hasn't been configured in the kernel\n" );
@@ -748,12 +766,31 @@ static int __init assabet_leds_init(void)
fs_initcall(assabet_leds_init);
#endif
+void __init assabet_init_irq(void)
+{
+ u32 def_val;
+
+ sa1100_init_irq();
+
+ if (machine_has_neponset())
+ def_val = ASSABET_BCR_DB1111;
+ else
+ def_val = ASSABET_BCR_DB1110;
+
+ /*
+ * Angel sets this, but other bootloaders may not.
+ *
+ * This must precede any driver calls to BCR_set() or BCR_clear().
+ */
+ assabet_init_gpio((void *)&ASSABET_BCR, def_val);
+}
+
MACHINE_START(ASSABET, "Intel-Assabet")
.atag_offset = 0x100,
.fixup = fixup_assabet,
.map_io = assabet_map_io,
.nr_irqs = SA1100_NR_IRQS,
- .init_irq = sa1100_init_irq,
+ .init_irq = assabet_init_irq,
.init_time = sa1100_timer_init,
.init_machine = assabet_init,
.init_late = sa11x0_init_late,
diff --git a/arch/arm/mach-sa1100/neponset.c b/arch/arm/mach-sa1100/neponset.c
index a61a2432766b..b1823f445358 100644
--- a/arch/arm/mach-sa1100/neponset.c
+++ b/arch/arm/mach-sa1100/neponset.c
@@ -3,6 +3,8 @@
* linux/arch/arm/mach-sa1100/neponset.c
*/
#include <linux/err.h>
+#include <linux/gpio/driver.h>
+#include <linux/gpio/gpio-reg.h>
#include <linux/init.h>
#include <linux/ioport.h>
#include <linux/irq.h>
@@ -45,10 +47,13 @@
#define IRR_USAR (1 << 1)
#define IRR_SA1111 (1 << 2)
+#define NCR_NGPIO 7
+
#define MDM_CTL0_RTS1 (1 << 0)
#define MDM_CTL0_DTR1 (1 << 1)
#define MDM_CTL0_RTS2 (1 << 2)
#define MDM_CTL0_DTR2 (1 << 3)
+#define MDM_CTL0_NGPIO 4
#define MDM_CTL1_CTS1 (1 << 0)
#define MDM_CTL1_DSR1 (1 << 1)
@@ -56,80 +61,87 @@
#define MDM_CTL1_CTS2 (1 << 3)
#define MDM_CTL1_DSR2 (1 << 4)
#define MDM_CTL1_DCD2 (1 << 5)
+#define MDM_CTL1_NGPIO 6
#define AUD_SEL_1341 (1 << 0)
#define AUD_MUTE_1341 (1 << 1)
+#define AUD_NGPIO 2
extern void sa1110_mb_disable(void);
+#define to_neponset_gpio_chip(x) container_of(x, struct neponset_gpio_chip, gc)
+
+static const char *neponset_ncr_names[] = {
+ "gp01_off", "tp_power", "ms_power", "enet_osc",
+ "spi_kb_wk_up", "a0vpp", "a1vpp"
+};
+
+static const char *neponset_mdmctl0_names[] = {
+ "rts3", "dtr3", "rts1", "dtr1",
+};
+
+static const char *neponset_mdmctl1_names[] = {
+ "cts3", "dsr3", "dcd3", "cts1", "dsr1", "dcd1"
+};
+
+static const char *neponset_aud_names[] = {
+ "sel_1341", "mute_1341",
+};
+
struct neponset_drvdata {
void __iomem *base;
struct platform_device *sa1111;
struct platform_device *smc91x;
unsigned irq_base;
-#ifdef CONFIG_PM_SLEEP
- u32 ncr0;
- u32 mdm_ctl_0;
-#endif
+ struct gpio_chip *gpio[4];
};
-static void __iomem *nep_base;
+static struct neponset_drvdata *nep;
void neponset_ncr_frob(unsigned int mask, unsigned int val)
{
- void __iomem *base = nep_base;
-
- if (base) {
- unsigned long flags;
- unsigned v;
-
- local_irq_save(flags);
- v = readb_relaxed(base + NCR_0);
- writeb_relaxed((v & ~mask) | val, base + NCR_0);
- local_irq_restore(flags);
- } else {
- WARN(1, "nep_base unset\n");
- }
+ struct neponset_drvdata *n = nep;
+ unsigned long m = mask, v = val;
+
+ if (nep)
+ n->gpio[0]->set_multiple(n->gpio[0], &m, &v);
+ else
+ WARN(1, "nep unset\n");
}
EXPORT_SYMBOL(neponset_ncr_frob);
static void neponset_set_mctrl(struct uart_port *port, u_int mctrl)
{
- void __iomem *base = nep_base;
- u_int mdm_ctl0;
+ struct neponset_drvdata *n = nep;
+ unsigned long mask, val = 0;
- if (!base)
+ if (!n)
return;
- mdm_ctl0 = readb_relaxed(base + MDM_CTL_0);
if (port->mapbase == _Ser1UTCR0) {
- if (mctrl & TIOCM_RTS)
- mdm_ctl0 &= ~MDM_CTL0_RTS2;
- else
- mdm_ctl0 |= MDM_CTL0_RTS2;
-
- if (mctrl & TIOCM_DTR)
- mdm_ctl0 &= ~MDM_CTL0_DTR2;
- else
- mdm_ctl0 |= MDM_CTL0_DTR2;
+ mask = MDM_CTL0_RTS2 | MDM_CTL0_DTR2;
+
+ if (!(mctrl & TIOCM_RTS))
+ val |= MDM_CTL0_RTS2;
+
+ if (!(mctrl & TIOCM_DTR))
+ val |= MDM_CTL0_DTR2;
} else if (port->mapbase == _Ser3UTCR0) {
- if (mctrl & TIOCM_RTS)
- mdm_ctl0 &= ~MDM_CTL0_RTS1;
- else
- mdm_ctl0 |= MDM_CTL0_RTS1;
-
- if (mctrl & TIOCM_DTR)
- mdm_ctl0 &= ~MDM_CTL0_DTR1;
- else
- mdm_ctl0 |= MDM_CTL0_DTR1;
+ mask = MDM_CTL0_RTS1 | MDM_CTL0_DTR1;
+
+ if (!(mctrl & TIOCM_RTS))
+ val |= MDM_CTL0_RTS1;
+
+ if (!(mctrl & TIOCM_DTR))
+ val |= MDM_CTL0_DTR1;
}
- writeb_relaxed(mdm_ctl0, base + MDM_CTL_0);
+ n->gpio[1]->set_multiple(n->gpio[1], &mask, &val);
}
static u_int neponset_get_mctrl(struct uart_port *port)
{
- void __iomem *base = nep_base;
+ void __iomem *base = nep->base;
u_int ret = TIOCM_CD | TIOCM_CTS | TIOCM_DSR;
u_int mdm_ctl1;
@@ -231,6 +243,22 @@ static struct irq_chip nochip = {
.irq_unmask = nochip_noop,
};
+static int neponset_init_gpio(struct gpio_chip **gcp,
+ struct device *dev, const char *label, void __iomem *reg,
+ unsigned num, bool in, const char *const * names)
+{
+ struct gpio_chip *gc;
+
+ gc = gpio_reg_init(dev, reg, -1, num, label, in ? 0xffffffff : 0,
+ readl_relaxed(reg), names, NULL, NULL);
+ if (IS_ERR(gc))
+ return PTR_ERR(gc);
+
+ *gcp = gc;
+
+ return 0;
+}
+
static struct sa1111_platform_data sa1111_info = {
.disable_devs = SA1111_DEVID_PS2_MSE,
};
@@ -274,7 +302,7 @@ static int neponset_probe(struct platform_device *dev)
};
int ret, irq;
- if (nep_base)
+ if (nep)
return -EBUSY;
irq = ret = platform_get_irq(dev, 0);
@@ -330,6 +358,22 @@ static int neponset_probe(struct platform_device *dev)
irq_set_irq_type(irq, IRQ_TYPE_EDGE_RISING);
irq_set_chained_handler_and_data(irq, neponset_irq_handler, d);
+ /* Disable GPIO 0/1 drivers so the buttons work on the Assabet */
+ writeb_relaxed(NCR_GP01_OFF, d->base + NCR_0);
+
+ neponset_init_gpio(&d->gpio[0], &dev->dev, "neponset-ncr",
+ d->base + NCR_0, NCR_NGPIO, false,
+ neponset_ncr_names);
+ neponset_init_gpio(&d->gpio[1], &dev->dev, "neponset-mdm-ctl0",
+ d->base + MDM_CTL_0, MDM_CTL0_NGPIO, false,
+ neponset_mdmctl0_names);
+ neponset_init_gpio(&d->gpio[2], &dev->dev, "neponset-mdm-ctl1",
+ d->base + MDM_CTL_1, MDM_CTL1_NGPIO, true,
+ neponset_mdmctl1_names);
+ neponset_init_gpio(&d->gpio[3], &dev->dev, "neponset-aud-ctl",
+ d->base + AUD_CTL, AUD_NGPIO, false,
+ neponset_aud_names);
+
/*
* We would set IRQ_GPIO25 to be a wake-up IRQ, but unfortunately
* something on the Neponset activates this IRQ on sleep (eth?)
@@ -340,16 +384,13 @@ static int neponset_probe(struct platform_device *dev)
dev_info(&dev->dev, "Neponset daughter board, providing IRQ%u-%u\n",
d->irq_base, d->irq_base + NEP_IRQ_NR - 1);
- nep_base = d->base;
+ nep = d;
sa1100_register_uart_fns(&neponset_port_fns);
/* Ensure that the memory bus request/grant signals are setup */
sa1110_mb_disable();
- /* Disable GPIO 0/1 drivers so the buttons work on the Assabet */
- writeb_relaxed(NCR_GP01_OFF, d->base + NCR_0);
-
sa1111_resources[0].parent = sa1111_res;
sa1111_resources[1].start = d->irq_base + NEP_IRQ_SA1111;
sa1111_resources[1].end = d->irq_base + NEP_IRQ_SA1111;
@@ -385,7 +426,7 @@ static int neponset_remove(struct platform_device *dev)
platform_device_unregister(d->smc91x);
irq_set_chained_handler(irq, NULL);
irq_free_descs(d->irq_base, NEP_IRQ_NR);
- nep_base = NULL;
+ nep = NULL;
iounmap(d->base);
kfree(d);
@@ -393,30 +434,22 @@ static int neponset_remove(struct platform_device *dev)
}
#ifdef CONFIG_PM_SLEEP
-static int neponset_suspend(struct device *dev)
-{
- struct neponset_drvdata *d = dev_get_drvdata(dev);
-
- d->ncr0 = readb_relaxed(d->base + NCR_0);
- d->mdm_ctl_0 = readb_relaxed(d->base + MDM_CTL_0);
-
- return 0;
-}
-
static int neponset_resume(struct device *dev)
{
struct neponset_drvdata *d = dev_get_drvdata(dev);
+ int i, ret = 0;
- writeb_relaxed(d->ncr0, d->base + NCR_0);
- writeb_relaxed(d->mdm_ctl_0, d->base + MDM_CTL_0);
+ for (i = 0; i < ARRAY_SIZE(d->gpio); i++) {
+ ret = gpio_reg_resume(d->gpio[i]);
+ if (ret)
+ break;
+ }
- return 0;
+ return ret;
}
static const struct dev_pm_ops neponset_pm_ops = {
- .suspend_noirq = neponset_suspend,
.resume_noirq = neponset_resume,
- .freeze_noirq = neponset_suspend,
.restore_noirq = neponset_resume,
};
#define PM_OPS &neponset_pm_ops
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index fd9077a74fce..7f14acf67caf 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -909,6 +909,14 @@ config OUTER_CACHE_SYNC
The outer cache has a outer_cache_fns.sync function pointer
that can be used to drain the write buffer of the outer cache.
+config CACHE_B15_RAC
+ bool "Enable the Broadcom Brahma-B15 read-ahead cache controller"
+ depends on ARCH_BRCMSTB
+ default y
+ help
+ This option enables the Broadcom Brahma-B15 read-ahead cache
+ controller. If disabled, the read-ahead cache remains off.
+
config CACHE_FEROCEON_L2
bool "Enable the Feroceon L2 cache controller"
depends on ARCH_MV78XX0 || ARCH_MVEBU
diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile
index 01bcc33f59e3..9dbb84923e12 100644
--- a/arch/arm/mm/Makefile
+++ b/arch/arm/mm/Makefile
@@ -13,7 +13,8 @@ obj-y += nommu.o
obj-$(CONFIG_ARM_MPU) += pmsa-v7.o
endif
-obj-$(CONFIG_ARM_PTDUMP) += dump.o
+obj-$(CONFIG_ARM_PTDUMP_CORE) += dump.o
+obj-$(CONFIG_ARM_PTDUMP_DEBUGFS) += ptdump_debugfs.o
obj-$(CONFIG_MODULES) += proc-syms.o
obj-$(CONFIG_DEBUG_VIRTUAL) += physaddr.o
@@ -103,6 +104,7 @@ AFLAGS_proc-v6.o :=-Wa,-march=armv6
AFLAGS_proc-v7.o :=-Wa,-march=armv7-a
obj-$(CONFIG_OUTER_CACHE) += l2c-common.o
+obj-$(CONFIG_CACHE_B15_RAC) += cache-b15-rac.o
obj-$(CONFIG_CACHE_FEROCEON_L2) += cache-feroceon-l2.o
obj-$(CONFIG_CACHE_L2X0) += cache-l2x0.o l2c-l2x0-resume.o
obj-$(CONFIG_CACHE_L2X0_PMU) += cache-l2x0-pmu.o
diff --git a/arch/arm/mm/cache-b15-rac.c b/arch/arm/mm/cache-b15-rac.c
new file mode 100644
index 000000000000..d9586ba2e63c
--- /dev/null
+++ b/arch/arm/mm/cache-b15-rac.c
@@ -0,0 +1,356 @@
+/*
+ * Broadcom Brahma-B15 CPU read-ahead cache management functions
+ *
+ * Copyright (C) 2015-2016 Broadcom
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/err.h>
+#include <linux/spinlock.h>
+#include <linux/io.h>
+#include <linux/bitops.h>
+#include <linux/of_address.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
+#include <linux/syscore_ops.h>
+#include <linux/reboot.h>
+
+#include <asm/cacheflush.h>
+#include <asm/hardware/cache-b15-rac.h>
+
+extern void v7_flush_kern_cache_all(void);
+
+/* RAC register offsets, relative to the HIF_CPU_BIUCTRL register base */
+#define RAC_CONFIG0_REG (0x78)
+#define RACENPREF_MASK (0x3)
+#define RACPREFINST_SHIFT (0)
+#define RACENINST_SHIFT (2)
+#define RACPREFDATA_SHIFT (4)
+#define RACENDATA_SHIFT (6)
+#define RAC_CPU_SHIFT (8)
+#define RACCFG_MASK (0xff)
+#define RAC_CONFIG1_REG (0x7c)
+#define RAC_FLUSH_REG (0x80)
+#define FLUSH_RAC (1 << 0)
+
+/* Bitmask to enable instruction and data prefetching with a 256-bytes stride */
+#define RAC_DATA_INST_EN_MASK (1 << RACPREFINST_SHIFT | \
+ RACENPREF_MASK << RACENINST_SHIFT | \
+ 1 << RACPREFDATA_SHIFT | \
+ RACENPREF_MASK << RACENDATA_SHIFT)
+
+#define RAC_ENABLED 0
+/* Special state where we want to bypass the spinlock and call directly
+ * into the v7 cache maintenance operations during suspend/resume
+ */
+#define RAC_SUSPENDED 1
+
+static void __iomem *b15_rac_base;
+static DEFINE_SPINLOCK(rac_lock);
+
+static u32 rac_config0_reg;
+
+/* Initialization flag to avoid checking for b15_rac_base, and to prevent
+ * multi-platform kernels from crashing here as well.
+ */
+static unsigned long b15_rac_flags;
+
+static inline u32 __b15_rac_disable(void)
+{
+ u32 val = __raw_readl(b15_rac_base + RAC_CONFIG0_REG);
+ __raw_writel(0, b15_rac_base + RAC_CONFIG0_REG);
+ dmb();
+ return val;
+}
+
+static inline void __b15_rac_flush(void)
+{
+ u32 reg;
+
+ __raw_writel(FLUSH_RAC, b15_rac_base + RAC_FLUSH_REG);
+ do {
+ /* This dmb() is required to force the Bus Interface Unit
+ * to clean oustanding writes, and forces an idle cycle
+ * to be inserted.
+ */
+ dmb();
+ reg = __raw_readl(b15_rac_base + RAC_FLUSH_REG);
+ } while (reg & FLUSH_RAC);
+}
+
+static inline u32 b15_rac_disable_and_flush(void)
+{
+ u32 reg;
+
+ reg = __b15_rac_disable();
+ __b15_rac_flush();
+ return reg;
+}
+
+static inline void __b15_rac_enable(u32 val)
+{
+ __raw_writel(val, b15_rac_base + RAC_CONFIG0_REG);
+ /* dsb() is required here to be consistent with __flush_icache_all() */
+ dsb();
+}
+
+#define BUILD_RAC_CACHE_OP(name, bar) \
+void b15_flush_##name(void) \
+{ \
+ unsigned int do_flush; \
+ u32 val = 0; \
+ \
+ if (test_bit(RAC_SUSPENDED, &b15_rac_flags)) { \
+ v7_flush_##name(); \
+ bar; \
+ return; \
+ } \
+ \
+ spin_lock(&rac_lock); \
+ do_flush = test_bit(RAC_ENABLED, &b15_rac_flags); \
+ if (do_flush) \
+ val = b15_rac_disable_and_flush(); \
+ v7_flush_##name(); \
+ if (!do_flush) \
+ bar; \
+ else \
+ __b15_rac_enable(val); \
+ spin_unlock(&rac_lock); \
+}
+
+#define nobarrier
+
+/* The readahead cache present in the Brahma-B15 CPU is a special piece of
+ * hardware after the integrated L2 cache of the B15 CPU complex whose purpose
+ * is to prefetch instruction and/or data with a line size of either 64 bytes
+ * or 256 bytes. The rationale is that the data-bus of the CPU interface is
+ * optimized for 256-bytes transactions, and enabling the readahead cache
+ * provides a significant performance boost we want it enabled (typically
+ * twice the performance for a memcpy benchmark application).
+ *
+ * The readahead cache is transparent for Modified Virtual Addresses
+ * cache maintenance operations: ICIMVAU, DCIMVAC, DCCMVAC, DCCMVAU and
+ * DCCIMVAC.
+ *
+ * It is however not transparent for the following cache maintenance
+ * operations: DCISW, DCCSW, DCCISW, ICIALLUIS and ICIALLU which is precisely
+ * what we are patching here with our BUILD_RAC_CACHE_OP here.
+ */
+BUILD_RAC_CACHE_OP(kern_cache_all, nobarrier);
+
+static void b15_rac_enable(void)
+{
+ unsigned int cpu;
+ u32 enable = 0;
+
+ for_each_possible_cpu(cpu)
+ enable |= (RAC_DATA_INST_EN_MASK << (cpu * RAC_CPU_SHIFT));
+
+ b15_rac_disable_and_flush();
+ __b15_rac_enable(enable);
+}
+
+static int b15_rac_reboot_notifier(struct notifier_block *nb,
+ unsigned long action,
+ void *data)
+{
+ /* During kexec, we are not yet migrated on the boot CPU, so we need to
+ * make sure we are SMP safe here. Once the RAC is disabled, flag it as
+ * suspended such that the hotplug notifier returns early.
+ */
+ if (action == SYS_RESTART) {
+ spin_lock(&rac_lock);
+ b15_rac_disable_and_flush();
+ clear_bit(RAC_ENABLED, &b15_rac_flags);
+ set_bit(RAC_SUSPENDED, &b15_rac_flags);
+ spin_unlock(&rac_lock);
+ }
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block b15_rac_reboot_nb = {
+ .notifier_call = b15_rac_reboot_notifier,
+};
+
+/* The CPU hotplug case is the most interesting one, we basically need to make
+ * sure that the RAC is disabled for the entire system prior to having a CPU
+ * die, in particular prior to this dying CPU having exited the coherency
+ * domain.
+ *
+ * Once this CPU is marked dead, we can safely re-enable the RAC for the
+ * remaining CPUs in the system which are still online.
+ *
+ * Offlining a CPU is the problematic case, onlining a CPU is not much of an
+ * issue since the CPU and its cache-level hierarchy will start filling with
+ * the RAC disabled, so L1 and L2 only.
+ *
+ * In this function, we should NOT have to verify any unsafe setting/condition
+ * b15_rac_base:
+ *
+ * It is protected by the RAC_ENABLED flag which is cleared by default, and
+ * being cleared when initial procedure is done. b15_rac_base had been set at
+ * that time.
+ *
+ * RAC_ENABLED:
+ * There is a small timing windows, in b15_rac_init(), between
+ * cpuhp_setup_state_*()
+ * ...
+ * set RAC_ENABLED
+ * However, there is no hotplug activity based on the Linux booting procedure.
+ *
+ * Since we have to disable RAC for all cores, we keep RAC on as long as as
+ * possible (disable it as late as possible) to gain the cache benefit.
+ *
+ * Thus, dying/dead states are chosen here
+ *
+ * We are choosing not do disable the RAC on a per-CPU basis, here, if we did
+ * we would want to consider disabling it as early as possible to benefit the
+ * other active CPUs.
+ */
+
+/* Running on the dying CPU */
+static int b15_rac_dying_cpu(unsigned int cpu)
+{
+ /* During kexec/reboot, the RAC is disabled via the reboot notifier
+ * return early here.
+ */
+ if (test_bit(RAC_SUSPENDED, &b15_rac_flags))
+ return 0;
+
+ spin_lock(&rac_lock);
+
+ /* Indicate that we are starting a hotplug procedure */
+ __clear_bit(RAC_ENABLED, &b15_rac_flags);
+
+ /* Disable the readahead cache and save its value to a global */
+ rac_config0_reg = b15_rac_disable_and_flush();
+
+ spin_unlock(&rac_lock);
+
+ return 0;
+}
+
+/* Running on a non-dying CPU */
+static int b15_rac_dead_cpu(unsigned int cpu)
+{
+ /* During kexec/reboot, the RAC is disabled via the reboot notifier
+ * return early here.
+ */
+ if (test_bit(RAC_SUSPENDED, &b15_rac_flags))
+ return 0;
+
+ spin_lock(&rac_lock);
+
+ /* And enable it */
+ __b15_rac_enable(rac_config0_reg);
+ __set_bit(RAC_ENABLED, &b15_rac_flags);
+
+ spin_unlock(&rac_lock);
+
+ return 0;
+}
+
+static int b15_rac_suspend(void)
+{
+ /* Suspend the read-ahead cache oeprations, forcing our cache
+ * implementation to fallback to the regular ARMv7 calls.
+ *
+ * We are guaranteed to be running on the boot CPU at this point and
+ * with every other CPU quiesced, so setting RAC_SUSPENDED is not racy
+ * here.
+ */
+ rac_config0_reg = b15_rac_disable_and_flush();
+ set_bit(RAC_SUSPENDED, &b15_rac_flags);
+
+ return 0;
+}
+
+static void b15_rac_resume(void)
+{
+ /* Coming out of a S3 suspend/resume cycle, the read-ahead cache
+ * register RAC_CONFIG0_REG will be restored to its default value, make
+ * sure we re-enable it and set the enable flag, we are also guaranteed
+ * to run on the boot CPU, so not racy again.
+ */
+ __b15_rac_enable(rac_config0_reg);
+ clear_bit(RAC_SUSPENDED, &b15_rac_flags);
+}
+
+static struct syscore_ops b15_rac_syscore_ops = {
+ .suspend = b15_rac_suspend,
+ .resume = b15_rac_resume,
+};
+
+static int __init b15_rac_init(void)
+{
+ struct device_node *dn;
+ int ret = 0, cpu;
+ u32 reg, en_mask = 0;
+
+ dn = of_find_compatible_node(NULL, NULL, "brcm,brcmstb-cpu-biu-ctrl");
+ if (!dn)
+ return -ENODEV;
+
+ if (WARN(num_possible_cpus() > 4, "RAC only supports 4 CPUs\n"))
+ goto out;
+
+ b15_rac_base = of_iomap(dn, 0);
+ if (!b15_rac_base) {
+ pr_err("failed to remap BIU control base\n");
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = register_reboot_notifier(&b15_rac_reboot_nb);
+ if (ret) {
+ pr_err("failed to register reboot notifier\n");
+ iounmap(b15_rac_base);
+ goto out;
+ }
+
+ if (IS_ENABLED(CONFIG_HOTPLUG_CPU)) {
+ ret = cpuhp_setup_state_nocalls(CPUHP_AP_ARM_CACHE_B15_RAC_DEAD,
+ "arm/cache-b15-rac:dead",
+ NULL, b15_rac_dead_cpu);
+ if (ret)
+ goto out_unmap;
+
+ ret = cpuhp_setup_state_nocalls(CPUHP_AP_ARM_CACHE_B15_RAC_DYING,
+ "arm/cache-b15-rac:dying",
+ NULL, b15_rac_dying_cpu);
+ if (ret)
+ goto out_cpu_dead;
+ }
+
+ if (IS_ENABLED(CONFIG_PM_SLEEP))
+ register_syscore_ops(&b15_rac_syscore_ops);
+
+ spin_lock(&rac_lock);
+ reg = __raw_readl(b15_rac_base + RAC_CONFIG0_REG);
+ for_each_possible_cpu(cpu)
+ en_mask |= ((1 << RACPREFDATA_SHIFT) << (cpu * RAC_CPU_SHIFT));
+ WARN(reg & en_mask, "Read-ahead cache not previously disabled\n");
+
+ b15_rac_enable();
+ set_bit(RAC_ENABLED, &b15_rac_flags);
+ spin_unlock(&rac_lock);
+
+ pr_info("Broadcom Brahma-B15 readahead cache at: 0x%p\n",
+ b15_rac_base + RAC_CONFIG0_REG);
+
+ goto out;
+
+out_cpu_dead:
+ cpuhp_remove_state_nocalls(CPUHP_AP_ARM_CACHE_B15_RAC_DYING);
+out_unmap:
+ unregister_reboot_notifier(&b15_rac_reboot_nb);
+ iounmap(b15_rac_base);
+out:
+ of_node_put(dn);
+ return ret;
+}
+arch_initcall(b15_rac_init);
diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S
index de78109d002d..215df435bfb9 100644
--- a/arch/arm/mm/cache-v7.S
+++ b/arch/arm/mm/cache-v7.S
@@ -15,6 +15,7 @@
#include <asm/assembler.h>
#include <asm/errno.h>
#include <asm/unwind.h>
+#include <asm/hardware/cache-b15-rac.h>
#include "proc-macros.S"
@@ -446,3 +447,23 @@ ENDPROC(v7_dma_unmap_area)
@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
define_cache_functions v7
+
+ /* The Broadcom Brahma-B15 read-ahead cache requires some modifications
+ * to the v7_cache_fns, we only override the ones we need
+ */
+#ifndef CONFIG_CACHE_B15_RAC
+ globl_equ b15_flush_kern_cache_all, v7_flush_kern_cache_all
+#endif
+ globl_equ b15_flush_icache_all, v7_flush_icache_all
+ globl_equ b15_flush_kern_cache_louis, v7_flush_kern_cache_louis
+ globl_equ b15_flush_user_cache_all, v7_flush_user_cache_all
+ globl_equ b15_flush_user_cache_range, v7_flush_user_cache_range
+ globl_equ b15_coherent_kern_range, v7_coherent_kern_range
+ globl_equ b15_coherent_user_range, v7_coherent_user_range
+ globl_equ b15_flush_kern_dcache_area, v7_flush_kern_dcache_area
+
+ globl_equ b15_dma_map_area, v7_dma_map_area
+ globl_equ b15_dma_unmap_area, v7_dma_unmap_area
+ globl_equ b15_dma_flush_range, v7_dma_flush_range
+
+ define_cache_functions b15
diff --git a/arch/arm/mm/dump.c b/arch/arm/mm/dump.c
index fc3b44028cfb..084779c5c893 100644
--- a/arch/arm/mm/dump.c
+++ b/arch/arm/mm/dump.c
@@ -21,11 +21,7 @@
#include <asm/fixmap.h>
#include <asm/memory.h>
#include <asm/pgtable.h>
-
-struct addr_marker {
- unsigned long start_address;
- const char *name;
-};
+#include <asm/ptdump.h>
static struct addr_marker address_markers[] = {
{ MODULES_VADDR, "Modules" },
@@ -38,12 +34,26 @@ static struct addr_marker address_markers[] = {
{ -1, NULL },
};
+#define pt_dump_seq_printf(m, fmt, args...) \
+({ \
+ if (m) \
+ seq_printf(m, fmt, ##args); \
+})
+
+#define pt_dump_seq_puts(m, fmt) \
+({ \
+ if (m) \
+ seq_printf(m, fmt); \
+})
+
struct pg_state {
struct seq_file *seq;
const struct addr_marker *marker;
unsigned long start_address;
unsigned level;
u64 current_prot;
+ bool check_wx;
+ unsigned long wx_pages;
const char *current_domain;
};
@@ -52,6 +62,8 @@ struct prot_bits {
u64 val;
const char *set;
const char *clear;
+ bool ro_bit;
+ bool nx_bit;
};
static const struct prot_bits pte_bits[] = {
@@ -65,11 +77,13 @@ static const struct prot_bits pte_bits[] = {
.val = L_PTE_RDONLY,
.set = "ro",
.clear = "RW",
+ .ro_bit = true,
}, {
.mask = L_PTE_XN,
.val = L_PTE_XN,
.set = "NX",
.clear = "x ",
+ .nx_bit = true,
}, {
.mask = L_PTE_SHARED,
.val = L_PTE_SHARED,
@@ -133,11 +147,13 @@ static const struct prot_bits section_bits[] = {
.val = L_PMD_SECT_RDONLY | PMD_SECT_AP2,
.set = "ro",
.clear = "RW",
+ .ro_bit = true,
#elif __LINUX_ARM_ARCH__ >= 6
{
.mask = PMD_SECT_APX | PMD_SECT_AP_READ | PMD_SECT_AP_WRITE,
.val = PMD_SECT_APX | PMD_SECT_AP_WRITE,
.set = " ro",
+ .ro_bit = true,
}, {
.mask = PMD_SECT_APX | PMD_SECT_AP_READ | PMD_SECT_AP_WRITE,
.val = PMD_SECT_AP_WRITE,
@@ -156,6 +172,7 @@ static const struct prot_bits section_bits[] = {
.mask = PMD_SECT_AP_READ | PMD_SECT_AP_WRITE,
.val = 0,
.set = " ro",
+ .ro_bit = true,
}, {
.mask = PMD_SECT_AP_READ | PMD_SECT_AP_WRITE,
.val = PMD_SECT_AP_WRITE,
@@ -174,6 +191,7 @@ static const struct prot_bits section_bits[] = {
.val = PMD_SECT_XN,
.set = "NX",
.clear = "x ",
+ .nx_bit = true,
}, {
.mask = PMD_SECT_S,
.val = PMD_SECT_S,
@@ -186,6 +204,8 @@ struct pg_level {
const struct prot_bits *bits;
size_t num;
u64 mask;
+ const struct prot_bits *ro_bit;
+ const struct prot_bits *nx_bit;
};
static struct pg_level pg_level[] = {
@@ -214,10 +234,27 @@ static void dump_prot(struct pg_state *st, const struct prot_bits *bits, size_t
s = bits->clear;
if (s)
- seq_printf(st->seq, " %s", s);
+ pt_dump_seq_printf(st->seq, " %s", s);
}
}
+static void note_prot_wx(struct pg_state *st, unsigned long addr)
+{
+ if (!st->check_wx)
+ return;
+ if ((st->current_prot & pg_level[st->level].ro_bit->mask) ==
+ pg_level[st->level].ro_bit->val)
+ return;
+ if ((st->current_prot & pg_level[st->level].nx_bit->mask) ==
+ pg_level[st->level].nx_bit->val)
+ return;
+
+ WARN_ONCE(1, "arm/mm: Found insecure W+X mapping at address %pS\n",
+ (void *)st->start_address);
+
+ st->wx_pages += (addr - st->start_address) / PAGE_SIZE;
+}
+
static void note_page(struct pg_state *st, unsigned long addr,
unsigned int level, u64 val, const char *domain)
{
@@ -228,7 +265,7 @@ static void note_page(struct pg_state *st, unsigned long addr,
st->level = level;
st->current_prot = prot;
st->current_domain = domain;
- seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
+ pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
} else if (prot != st->current_prot || level != st->level ||
domain != st->current_domain ||
addr >= st->marker[1].start_address) {
@@ -236,7 +273,8 @@ static void note_page(struct pg_state *st, unsigned long addr,
unsigned long delta;
if (st->current_prot) {
- seq_printf(st->seq, "0x%08lx-0x%08lx ",
+ note_prot_wx(st, addr);
+ pt_dump_seq_printf(st->seq, "0x%08lx-0x%08lx ",
st->start_address, addr);
delta = (addr - st->start_address) >> 10;
@@ -244,17 +282,19 @@ static void note_page(struct pg_state *st, unsigned long addr,
delta >>= 10;
unit++;
}
- seq_printf(st->seq, "%9lu%c", delta, *unit);
+ pt_dump_seq_printf(st->seq, "%9lu%c", delta, *unit);
if (st->current_domain)
- seq_printf(st->seq, " %s", st->current_domain);
+ pt_dump_seq_printf(st->seq, " %s",
+ st->current_domain);
if (pg_level[st->level].bits)
dump_prot(st, pg_level[st->level].bits, pg_level[st->level].num);
- seq_printf(st->seq, "\n");
+ pt_dump_seq_printf(st->seq, "\n");
}
if (addr >= st->marker[1].start_address) {
st->marker++;
- seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
+ pt_dump_seq_printf(st->seq, "---[ %s ]---\n",
+ st->marker->name);
}
st->start_address = addr;
st->current_prot = prot;
@@ -335,61 +375,82 @@ static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start)
}
}
-static void walk_pgd(struct seq_file *m)
+static void walk_pgd(struct pg_state *st, struct mm_struct *mm,
+ unsigned long start)
{
- pgd_t *pgd = swapper_pg_dir;
- struct pg_state st;
- unsigned long addr;
+ pgd_t *pgd = pgd_offset(mm, 0UL);
unsigned i;
-
- memset(&st, 0, sizeof(st));
- st.seq = m;
- st.marker = address_markers;
+ unsigned long addr;
for (i = 0; i < PTRS_PER_PGD; i++, pgd++) {
- addr = i * PGDIR_SIZE;
+ addr = start + i * PGDIR_SIZE;
if (!pgd_none(*pgd)) {
- walk_pud(&st, pgd, addr);
+ walk_pud(st, pgd, addr);
} else {
- note_page(&st, addr, 1, pgd_val(*pgd), NULL);
+ note_page(st, addr, 1, pgd_val(*pgd), NULL);
}
}
-
- note_page(&st, 0, 0, 0, NULL);
}
-static int ptdump_show(struct seq_file *m, void *v)
+void ptdump_walk_pgd(struct seq_file *m, struct ptdump_info *info)
{
- walk_pgd(m);
- return 0;
-}
+ struct pg_state st = {
+ .seq = m,
+ .marker = info->markers,
+ .check_wx = false,
+ };
-static int ptdump_open(struct inode *inode, struct file *file)
-{
- return single_open(file, ptdump_show, NULL);
+ walk_pgd(&st, info->mm, info->base_addr);
+ note_page(&st, 0, 0, 0, NULL);
}
-static const struct file_operations ptdump_fops = {
- .open = ptdump_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
-static int ptdump_init(void)
+static void ptdump_initialize(void)
{
- struct dentry *pe;
unsigned i, j;
for (i = 0; i < ARRAY_SIZE(pg_level); i++)
if (pg_level[i].bits)
- for (j = 0; j < pg_level[i].num; j++)
+ for (j = 0; j < pg_level[i].num; j++) {
pg_level[i].mask |= pg_level[i].bits[j].mask;
+ if (pg_level[i].bits[j].ro_bit)
+ pg_level[i].ro_bit = &pg_level[i].bits[j];
+ if (pg_level[i].bits[j].nx_bit)
+ pg_level[i].nx_bit = &pg_level[i].bits[j];
+ }
address_markers[2].start_address = VMALLOC_START;
+}
+
+static struct ptdump_info kernel_ptdump_info = {
+ .mm = &init_mm,
+ .markers = address_markers,
+ .base_addr = 0,
+};
- pe = debugfs_create_file("kernel_page_tables", 0400, NULL, NULL,
- &ptdump_fops);
- return pe ? 0 : -ENOMEM;
+void ptdump_check_wx(void)
+{
+ struct pg_state st = {
+ .seq = NULL,
+ .marker = (struct addr_marker[]) {
+ { 0, NULL},
+ { -1, NULL},
+ },
+ .check_wx = true,
+ };
+
+ walk_pgd(&st, &init_mm, 0);
+ note_page(&st, 0, 0, 0, NULL);
+ if (st.wx_pages)
+ pr_warn("Checked W+X mappings: FAILED, %lu W+X pages found\n",
+ st.wx_pages);
+ else
+ pr_info("Checked W+X mappings: passed, no W+X pages found\n");
+}
+
+static int ptdump_init(void)
+{
+ ptdump_initialize();
+ return ptdump_debugfs_register(&kernel_ptdump_info,
+ "kernel_page_tables");
}
__initcall(ptdump_init);
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index 42f585379e19..b75eada23d0a 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -21,7 +21,6 @@
#include <linux/highmem.h>
#include <linux/perf_event.h>
-#include <asm/exception.h>
#include <asm/pgtable.h>
#include <asm/system_misc.h>
#include <asm/system_info.h>
@@ -545,7 +544,7 @@ hook_fault_code(int nr, int (*fn)(unsigned long, unsigned int, struct pt_regs *)
/*
* Dispatch a data abort to the relevant handler.
*/
-asmlinkage void __exception
+asmlinkage void
do_DataAbort(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
{
const struct fsr_info *inf = fsr_info + fsr_fs(fsr);
@@ -578,7 +577,7 @@ hook_ifault_code(int nr, int (*fn)(unsigned long, unsigned int, struct pt_regs *
ifsr_info[nr].name = name;
}
-asmlinkage void __exception
+asmlinkage void
do_PrefetchAbort(unsigned long addr, unsigned int ifsr, struct pt_regs *regs)
{
const struct fsr_info *inf = ifsr_info + fsr_fs(ifsr);
diff --git a/arch/arm/mm/idmap.c b/arch/arm/mm/idmap.c
index 10bfba85eb96..1d1edd064199 100644
--- a/arch/arm/mm/idmap.c
+++ b/arch/arm/mm/idmap.c
@@ -16,8 +16,8 @@
* are not supported on any CPU using the idmap tables as its current
* page tables.
*/
-pgd_t *idmap_pgd;
-long long arch_phys_to_idmap_offset;
+pgd_t *idmap_pgd __ro_after_init;
+long long arch_phys_to_idmap_offset __ro_after_init;
#ifdef CONFIG_ARM_LPAE
static void idmap_add_pmd(pud_t *pud, unsigned long addr, unsigned long end,
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index a1f11a7ee81b..bd6f4513539a 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -36,6 +36,7 @@
#include <asm/system_info.h>
#include <asm/tlb.h>
#include <asm/fixmap.h>
+#include <asm/ptdump.h>
#include <asm/mach/arch.h>
#include <asm/mach/map.h>
@@ -738,6 +739,7 @@ static int __mark_rodata_ro(void *unused)
void mark_rodata_ro(void)
{
stop_machine(__mark_rodata_ro, NULL, NULL);
+ debug_checkwx();
}
void set_kernel_text_rw(void)
diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c
index e4370810f4f1..7c087961b7ce 100644
--- a/arch/arm/mm/nommu.c
+++ b/arch/arm/mm/nommu.c
@@ -31,7 +31,7 @@ struct mpu_rgn_info mpu_rgn_info;
#ifdef CONFIG_CPU_CP15
#ifdef CONFIG_CPU_HIGH_VECTOR
-static unsigned long __init setup_vectors_base(void)
+unsigned long setup_vectors_base(void)
{
unsigned long reg = get_cr();
@@ -57,7 +57,7 @@ static inline bool security_extensions_enabled(void)
return 0;
}
-static unsigned long __init setup_vectors_base(void)
+unsigned long setup_vectors_base(void)
{
unsigned long base = 0, reg = get_cr();
diff --git a/arch/arm/mm/pmsa-v7.c b/arch/arm/mm/pmsa-v7.c
index 976df60ac426..e2853bfff74e 100644
--- a/arch/arm/mm/pmsa-v7.c
+++ b/arch/arm/mm/pmsa-v7.c
@@ -6,6 +6,7 @@
#include <linux/bitops.h>
#include <linux/memblock.h>
+#include <linux/string.h>
#include <asm/cacheflush.h>
#include <asm/cp15.h>
@@ -296,6 +297,7 @@ void __init adjust_lowmem_bounds_mpu(void)
}
}
+ memset(mem, 0, sizeof(mem));
num = allocate_region(mem_start, specified_mem_size, mem_max_regions, mem);
for (i = 0; i < num; i++) {
@@ -433,7 +435,7 @@ void __init mpu_setup(void)
/* Background */
err |= mpu_setup_region(region++, 0, 32,
- MPU_ACR_XN | MPU_RGN_STRONGLY_ORDERED | MPU_AP_PL1RW_PL0NA,
+ MPU_ACR_XN | MPU_RGN_STRONGLY_ORDERED | MPU_AP_PL1RW_PL0RW,
0, false);
#ifdef CONFIG_XIP_KERNEL
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index 01d64c0b2563..d55d493f9a1e 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -567,7 +567,7 @@ __v7_setup_stack:
/*
* Standard v7 proc info content
*/
-.macro __v7_proc name, initfunc, mm_mmuflags = 0, io_mmuflags = 0, hwcaps = 0, proc_fns = v7_processor_functions
+.macro __v7_proc name, initfunc, mm_mmuflags = 0, io_mmuflags = 0, hwcaps = 0, proc_fns = v7_processor_functions, cache_fns = v7_cache_fns
ALT_SMP(.long PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_AP_READ | \
PMD_SECT_AF | PMD_FLAGS_SMP | \mm_mmuflags)
ALT_UP(.long PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_AP_READ | \
@@ -583,7 +583,7 @@ __v7_setup_stack:
.long \proc_fns
.long v7wbi_tlb_fns
.long v6_user_fns
- .long v7_cache_fns
+ .long \cache_fns
.endm
#ifndef CONFIG_ARM_LPAE
@@ -678,7 +678,7 @@ __v7_ca15mp_proc_info:
__v7_b15mp_proc_info:
.long 0x420f00f0
.long 0xff0ffff0
- __v7_proc __v7_b15mp_proc_info, __v7_b15mp_setup
+ __v7_proc __v7_b15mp_proc_info, __v7_b15mp_setup, cache_fns = b15_cache_fns
.size __v7_b15mp_proc_info, . - __v7_b15mp_proc_info
/*
diff --git a/arch/arm/mm/ptdump_debugfs.c b/arch/arm/mm/ptdump_debugfs.c
new file mode 100644
index 000000000000..be8d87be4b93
--- /dev/null
+++ b/arch/arm/mm/ptdump_debugfs.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#include <asm/ptdump.h>
+
+static int ptdump_show(struct seq_file *m, void *v)
+{
+ struct ptdump_info *info = m->private;
+
+ ptdump_walk_pgd(m, info);
+ return 0;
+}
+
+static int ptdump_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, ptdump_show, inode->i_private);
+}
+
+static const struct file_operations ptdump_fops = {
+ .open = ptdump_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+int ptdump_debugfs_register(struct ptdump_info *info, const char *name)
+{
+ struct dentry *pe;
+
+ pe = debugfs_create_file(name, 0400, NULL, info, &ptdump_fops);
+ return pe ? 0 : -ENOMEM;
+
+}
diff --git a/arch/arm/probes/kprobes/core.c b/arch/arm/probes/kprobes/core.c
index 52d1cd14fda4..e90cc8a08186 100644
--- a/arch/arm/probes/kprobes/core.c
+++ b/arch/arm/probes/kprobes/core.c
@@ -32,6 +32,7 @@
#include <linux/percpu.h>
#include <linux/bug.h>
#include <asm/patch.h>
+#include <asm/sections.h>
#include "../decode-arm.h"
#include "../decode-thumb.h"
@@ -64,9 +65,6 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
int is;
const struct decode_checker **checkers;
- if (in_exception_text(addr))
- return -EINVAL;
-
#ifdef CONFIG_THUMB2_KERNEL
thumb = true;
addr &= ~1; /* Bit 0 would normally be set to indicate Thumb code */
@@ -680,3 +678,13 @@ int __init arch_init_kprobes()
#endif
return 0;
}
+
+bool arch_within_kprobe_blacklist(unsigned long addr)
+{
+ void *a = (void *)addr;
+
+ return __in_irqentry_text(addr) ||
+ in_entry_text(addr) ||
+ in_idmap_text(addr) ||
+ memory_contains(__kprobes_text_start, __kprobes_text_end, a, 1);
+}
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index a93339f5178f..c9a7e9e1414f 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -557,7 +557,6 @@ config QCOM_QDF2400_ERRATUM_0065
If unsure, say Y.
-
config SOCIONEXT_SYNQUACER_PREITS
bool "Socionext Synquacer: Workaround for GICv3 pre-ITS"
default y
@@ -576,6 +575,17 @@ config HISILICON_ERRATUM_161600802
a 128kB offset to be applied to the target address in this commands.
If unsure, say Y.
+
+config QCOM_FALKOR_ERRATUM_E1041
+ bool "Falkor E1041: Speculative instruction fetches might cause errant memory access"
+ default y
+ help
+ Falkor CPU may speculatively fetch instructions from an improper
+ memory location when MMU translation is changed from SCTLR_ELn[M]=1
+ to SCTLR_ELn[M]=0. Prefix an ISB instruction to fix the problem.
+
+ If unsure, say Y.
+
endmenu
diff --git a/arch/arm64/boot/dts/Makefile b/arch/arm64/boot/dts/Makefile
index d7c22d51bc50..4aa50b9b26bc 100644
--- a/arch/arm64/boot/dts/Makefile
+++ b/arch/arm64/boot/dts/Makefile
@@ -12,6 +12,7 @@ subdir-y += cavium
subdir-y += exynos
subdir-y += freescale
subdir-y += hisilicon
+subdir-y += lg
subdir-y += marvell
subdir-y += mediatek
subdir-y += nvidia
@@ -22,5 +23,4 @@ subdir-y += rockchip
subdir-y += socionext
subdir-y += sprd
subdir-y += xilinx
-subdir-y += lg
subdir-y += zte
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi
index ead895a4e9a5..1fb8b9d6cb4e 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi
@@ -753,12 +753,12 @@
&uart_B {
clocks = <&xtal>, <&clkc CLKID_UART1>, <&xtal>;
- clock-names = "xtal", "core", "baud";
+ clock-names = "xtal", "pclk", "baud";
};
&uart_C {
clocks = <&xtal>, <&clkc CLKID_UART2>, <&xtal>;
- clock-names = "xtal", "core", "baud";
+ clock-names = "xtal", "pclk", "baud";
};
&vpu {
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi
index 8ed981f59e5a..6524b89e7115 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi
@@ -688,7 +688,7 @@
&uart_A {
clocks = <&xtal>, <&clkc CLKID_UART0>, <&xtal>;
- clock-names = "xtal", "core", "baud";
+ clock-names = "xtal", "pclk", "baud";
};
&uart_AO {
@@ -703,12 +703,12 @@
&uart_B {
clocks = <&xtal>, <&clkc CLKID_UART1>, <&xtal>;
- clock-names = "xtal", "core", "baud";
+ clock-names = "xtal", "pclk", "baud";
};
&uart_C {
clocks = <&xtal>, <&clkc CLKID_UART2>, <&xtal>;
- clock-names = "xtal", "core", "baud";
+ clock-names = "xtal", "pclk", "baud";
};
&vpu {
diff --git a/arch/arm64/boot/dts/socionext/uniphier-ld11-ref.dts b/arch/arm64/boot/dts/socionext/uniphier-ld11-ref.dts
index dd7193acc7df..6bdefb26b329 100644
--- a/arch/arm64/boot/dts/socionext/uniphier-ld11-ref.dts
+++ b/arch/arm64/boot/dts/socionext/uniphier-ld11-ref.dts
@@ -40,7 +40,6 @@
};
&ethsc {
- interrupt-parent = <&gpio>;
interrupts = <0 8>;
};
diff --git a/arch/arm64/boot/dts/socionext/uniphier-ld20-ref.dts b/arch/arm64/boot/dts/socionext/uniphier-ld20-ref.dts
index d99e3731358c..254d6795c67e 100644
--- a/arch/arm64/boot/dts/socionext/uniphier-ld20-ref.dts
+++ b/arch/arm64/boot/dts/socionext/uniphier-ld20-ref.dts
@@ -40,7 +40,6 @@
};
&ethsc {
- interrupt-parent = <&gpio>;
interrupts = <0 8>;
};
diff --git a/arch/arm64/boot/dts/socionext/uniphier-pxs3-ref.dts b/arch/arm64/boot/dts/socionext/uniphier-pxs3-ref.dts
index 864feeb35180..f9f06fcfb94a 100644
--- a/arch/arm64/boot/dts/socionext/uniphier-pxs3-ref.dts
+++ b/arch/arm64/boot/dts/socionext/uniphier-pxs3-ref.dts
@@ -38,8 +38,7 @@
};
&ethsc {
- interrupt-parent = <&gpio>;
- interrupts = <0 8>;
+ interrupts = <4 8>;
};
&serial0 {
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index aef72d886677..8b168280976f 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -512,4 +512,14 @@ alternative_else_nop_endif
#endif
.endm
+/**
+ * Errata workaround prior to disable MMU. Insert an ISB immediately prior
+ * to executing the MSR that will change SCTLR_ELn[M] from a value of 1 to 0.
+ */
+ .macro pre_disable_mmu_workaround
+#ifdef CONFIG_QCOM_FALKOR_ERRATUM_E1041
+ isb
+#endif
+ .endm
+
#endif /* __ASM_ASSEMBLER_H */
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index ac67cfc2585a..060e3a4008ab 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -60,6 +60,9 @@ enum ftr_type {
#define FTR_VISIBLE true /* Feature visible to the user space */
#define FTR_HIDDEN false /* Feature is hidden from the user */
+#define FTR_VISIBLE_IF_IS_ENABLED(config) \
+ (IS_ENABLED(config) ? FTR_VISIBLE : FTR_HIDDEN)
+
struct arm64_ftr_bits {
bool sign; /* Value is signed ? */
bool visible;
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 235e77d98261..cbf08d7cbf30 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -91,6 +91,7 @@
#define BRCM_CPU_PART_VULCAN 0x516
#define QCOM_CPU_PART_FALKOR_V1 0x800
+#define QCOM_CPU_PART_FALKOR 0xC00
#define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53)
#define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57)
@@ -99,6 +100,7 @@
#define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX)
#define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX)
#define MIDR_QCOM_FALKOR_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR_V1)
+#define MIDR_QCOM_FALKOR MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR)
#ifndef __ASSEMBLY__
diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h
index 650344d01124..c4cd5081d78b 100644
--- a/arch/arm64/include/asm/efi.h
+++ b/arch/arm64/include/asm/efi.h
@@ -132,11 +132,9 @@ static inline void efi_set_pgd(struct mm_struct *mm)
* Defer the switch to the current thread's TTBR0_EL1
* until uaccess_enable(). Restore the current
* thread's saved ttbr0 corresponding to its active_mm
- * (if different from init_mm).
*/
cpu_set_reserved_ttbr0();
- if (current->active_mm != &init_mm)
- update_saved_ttbr0(current, current->active_mm);
+ update_saved_ttbr0(current, current->active_mm);
}
}
}
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 7f069ff37f06..715d395ef45b 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -170,8 +170,7 @@
#define VTCR_EL2_FLAGS (VTCR_EL2_COMMON_BITS | VTCR_EL2_TGRAN_FLAGS)
#define VTTBR_X (VTTBR_X_TGRAN_MAGIC - VTCR_EL2_T0SZ_IPA)
-#define VTTBR_BADDR_SHIFT (VTTBR_X - 1)
-#define VTTBR_BADDR_MASK (((UL(1) << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT)
+#define VTTBR_BADDR_MASK (((UL(1) << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_X)
#define VTTBR_VMID_SHIFT (UL(48))
#define VTTBR_VMID_MASK(size) (_AT(u64, (1 << size) - 1) << VTTBR_VMID_SHIFT)
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 674912d7a571..ea6cb5b24258 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -370,6 +370,7 @@ void kvm_arm_init_debug(void);
void kvm_arm_setup_debug(struct kvm_vcpu *vcpu);
void kvm_arm_clear_debug(struct kvm_vcpu *vcpu);
void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu);
+bool kvm_arm_handle_step_debug(struct kvm_vcpu *vcpu, struct kvm_run *run);
int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu,
struct kvm_device_attr *attr);
int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index 3257895a9b5e..9d155fa9a507 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -156,29 +156,21 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu);
#define init_new_context(tsk,mm) ({ atomic64_set(&(mm)->context.id, 0); 0; })
-/*
- * This is called when "tsk" is about to enter lazy TLB mode.
- *
- * mm: describes the currently active mm context
- * tsk: task which is entering lazy tlb
- * cpu: cpu number which is entering lazy tlb
- *
- * tsk->mm will be NULL
- */
-static inline void
-enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
-{
-}
-
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
static inline void update_saved_ttbr0(struct task_struct *tsk,
struct mm_struct *mm)
{
- if (system_uses_ttbr0_pan()) {
- BUG_ON(mm->pgd == swapper_pg_dir);
- task_thread_info(tsk)->ttbr0 =
- virt_to_phys(mm->pgd) | ASID(mm) << 48;
- }
+ u64 ttbr;
+
+ if (!system_uses_ttbr0_pan())
+ return;
+
+ if (mm == &init_mm)
+ ttbr = __pa_symbol(empty_zero_page);
+ else
+ ttbr = virt_to_phys(mm->pgd) | ASID(mm) << 48;
+
+ task_thread_info(tsk)->ttbr0 = ttbr;
}
#else
static inline void update_saved_ttbr0(struct task_struct *tsk,
@@ -187,6 +179,16 @@ static inline void update_saved_ttbr0(struct task_struct *tsk,
}
#endif
+static inline void
+enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
+{
+ /*
+ * We don't actually care about the ttbr0 mapping, so point it at the
+ * zero page.
+ */
+ update_saved_ttbr0(tsk, &init_mm);
+}
+
static inline void __switch_mm(struct mm_struct *next)
{
unsigned int cpu = smp_processor_id();
@@ -214,11 +216,9 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next,
* Update the saved TTBR0_EL1 of the scheduled-in task as the previous
* value may have not been initialised yet (activate_mm caller) or the
* ASID has changed since the last run (following the context switch
- * of another thread of the same process). Avoid setting the reserved
- * TTBR0_EL1 to swapper_pg_dir (init_mm; e.g. via idle_task_exit).
+ * of another thread of the same process).
*/
- if (next != &init_mm)
- update_saved_ttbr0(tsk, next);
+ update_saved_ttbr0(tsk, next);
}
#define deactivate_mm(tsk,mm) do { } while (0)
diff --git a/arch/arm64/include/asm/perf_event.h b/arch/arm64/include/asm/perf_event.h
index 8d5cbec17d80..f9ccc36d3dc3 100644
--- a/arch/arm64/include/asm/perf_event.h
+++ b/arch/arm64/include/asm/perf_event.h
@@ -18,6 +18,7 @@
#define __ASM_PERF_EVENT_H
#include <asm/stack_pointer.h>
+#include <asm/ptrace.h>
#define ARMV8_PMU_MAX_COUNTERS 32
#define ARMV8_PMU_COUNTER_MASK (ARMV8_PMU_MAX_COUNTERS - 1)
@@ -79,6 +80,7 @@ struct pt_regs;
extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
extern unsigned long perf_misc_flags(struct pt_regs *regs);
#define perf_misc_flags(regs) perf_misc_flags(regs)
+#define perf_arch_bpf_user_pt_regs(regs) &regs->user_regs
#endif
#define perf_arch_fetch_caller_regs(regs, __ip) { \
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 149d05fb9421..bdcc7f1c9d06 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -42,6 +42,8 @@
#include <asm/cmpxchg.h>
#include <asm/fixmap.h>
#include <linux/mmdebug.h>
+#include <linux/mm_types.h>
+#include <linux/sched.h>
extern void __pte_error(const char *file, int line, unsigned long val);
extern void __pmd_error(const char *file, int line, unsigned long val);
@@ -149,12 +151,20 @@ static inline pte_t pte_mkwrite(pte_t pte)
static inline pte_t pte_mkclean(pte_t pte)
{
- return clear_pte_bit(pte, __pgprot(PTE_DIRTY));
+ pte = clear_pte_bit(pte, __pgprot(PTE_DIRTY));
+ pte = set_pte_bit(pte, __pgprot(PTE_RDONLY));
+
+ return pte;
}
static inline pte_t pte_mkdirty(pte_t pte)
{
- return set_pte_bit(pte, __pgprot(PTE_DIRTY));
+ pte = set_pte_bit(pte, __pgprot(PTE_DIRTY));
+
+ if (pte_write(pte))
+ pte = clear_pte_bit(pte, __pgprot(PTE_RDONLY));
+
+ return pte;
}
static inline pte_t pte_mkold(pte_t pte)
@@ -207,9 +217,6 @@ static inline void set_pte(pte_t *ptep, pte_t pte)
}
}
-struct mm_struct;
-struct vm_area_struct;
-
extern void __sync_icache_dcache(pte_t pteval, unsigned long addr);
/*
@@ -238,7 +245,8 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
* hardware updates of the pte (ptep_set_access_flags safely changes
* valid ptes without going through an invalid entry).
*/
- if (pte_valid(*ptep) && pte_valid(pte)) {
+ if (IS_ENABLED(CONFIG_DEBUG_VM) && pte_valid(*ptep) && pte_valid(pte) &&
+ (mm == current->active_mm || atomic_read(&mm->mm_users) > 1)) {
VM_WARN_ONCE(!pte_young(pte),
"%s: racy access flag clearing: 0x%016llx -> 0x%016llx",
__func__, pte_val(*ptep), pte_val(pte));
@@ -641,28 +649,23 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
/*
- * ptep_set_wrprotect - mark read-only while preserving the hardware update of
- * the Access Flag.
+ * ptep_set_wrprotect - mark read-only while trasferring potential hardware
+ * dirty status (PTE_DBM && !PTE_RDONLY) to the software PTE_DIRTY bit.
*/
#define __HAVE_ARCH_PTEP_SET_WRPROTECT
static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep)
{
pte_t old_pte, pte;
- /*
- * ptep_set_wrprotect() is only called on CoW mappings which are
- * private (!VM_SHARED) with the pte either read-only (!PTE_WRITE &&
- * PTE_RDONLY) or writable and software-dirty (PTE_WRITE &&
- * !PTE_RDONLY && PTE_DIRTY); see is_cow_mapping() and
- * protection_map[]. There is no race with the hardware update of the
- * dirty state: clearing of PTE_RDONLY when PTE_WRITE (a.k.a. PTE_DBM)
- * is set.
- */
- VM_WARN_ONCE(pte_write(*ptep) && !pte_dirty(*ptep),
- "%s: potential race with hardware DBM", __func__);
pte = READ_ONCE(*ptep);
do {
old_pte = pte;
+ /*
+ * If hardware-dirty (PTE_WRITE/DBM bit set and PTE_RDONLY
+ * clear), set the PTE_DIRTY bit.
+ */
+ if (pte_hw_dirty(pte))
+ pte = pte_mkdirty(pte);
pte = pte_wrprotect(pte);
pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep),
pte_val(old_pte), pte_val(pte));
diff --git a/arch/arm64/include/uapi/asm/bpf_perf_event.h b/arch/arm64/include/uapi/asm/bpf_perf_event.h
new file mode 100644
index 000000000000..b551b741653d
--- /dev/null
+++ b/arch/arm64/include/uapi/asm/bpf_perf_event.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _UAPI__ASM_BPF_PERF_EVENT_H__
+#define _UAPI__ASM_BPF_PERF_EVENT_H__
+
+#include <asm/ptrace.h>
+
+typedef struct user_pt_regs bpf_user_pt_regs_t;
+
+#endif /* _UAPI__ASM_BPF_PERF_EVENT_H__ */
diff --git a/arch/arm64/kernel/cpu-reset.S b/arch/arm64/kernel/cpu-reset.S
index 65f42d257414..2a752cb2a0f3 100644
--- a/arch/arm64/kernel/cpu-reset.S
+++ b/arch/arm64/kernel/cpu-reset.S
@@ -37,6 +37,7 @@ ENTRY(__cpu_soft_restart)
mrs x12, sctlr_el1
ldr x13, =SCTLR_ELx_FLAGS
bic x12, x12, x13
+ pre_disable_mmu_workaround
msr sctlr_el1, x12
isb
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index c5ba0097887f..a73a5928f09b 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -145,7 +145,8 @@ static const struct arm64_ftr_bits ftr_id_aa64isar1[] = {
};
static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_SVE_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
+ FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_SVE_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_GIC_SHIFT, 4, 0),
S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_ASIMD_SHIFT, 4, ID_AA64PFR0_ASIMD_NI),
S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_FP_SHIFT, 4, ID_AA64PFR0_FP_NI),
diff --git a/arch/arm64/kernel/efi-entry.S b/arch/arm64/kernel/efi-entry.S
index 4e6ad355bd05..6b9736c3fb56 100644
--- a/arch/arm64/kernel/efi-entry.S
+++ b/arch/arm64/kernel/efi-entry.S
@@ -96,6 +96,7 @@ ENTRY(entry)
mrs x0, sctlr_el2
bic x0, x0, #1 << 0 // clear SCTLR.M
bic x0, x0, #1 << 2 // clear SCTLR.C
+ pre_disable_mmu_workaround
msr sctlr_el2, x0
isb
b 2f
@@ -103,6 +104,7 @@ ENTRY(entry)
mrs x0, sctlr_el1
bic x0, x0, #1 << 0 // clear SCTLR.M
bic x0, x0, #1 << 2 // clear SCTLR.C
+ pre_disable_mmu_workaround
msr sctlr_el1, x0
isb
2:
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 5084e699447a..fae81f7964b4 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -114,7 +114,12 @@
* returned from the 2nd syscall yet, TIF_FOREIGN_FPSTATE is still set so
* whatever is in the FPSIMD registers is not saved to memory, but discarded.
*/
-static DEFINE_PER_CPU(struct fpsimd_state *, fpsimd_last_state);
+struct fpsimd_last_state_struct {
+ struct fpsimd_state *st;
+ bool sve_in_use;
+};
+
+static DEFINE_PER_CPU(struct fpsimd_last_state_struct, fpsimd_last_state);
/* Default VL for tasks that don't set it explicitly: */
static int sve_default_vl = -1;
@@ -905,7 +910,7 @@ void fpsimd_thread_switch(struct task_struct *next)
*/
struct fpsimd_state *st = &next->thread.fpsimd_state;
- if (__this_cpu_read(fpsimd_last_state) == st
+ if (__this_cpu_read(fpsimd_last_state.st) == st
&& st->cpu == smp_processor_id())
clear_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE);
else
@@ -992,6 +997,21 @@ void fpsimd_signal_preserve_current_state(void)
}
/*
+ * Associate current's FPSIMD context with this cpu
+ * Preemption must be disabled when calling this function.
+ */
+static void fpsimd_bind_to_cpu(void)
+{
+ struct fpsimd_last_state_struct *last =
+ this_cpu_ptr(&fpsimd_last_state);
+ struct fpsimd_state *st = &current->thread.fpsimd_state;
+
+ last->st = st;
+ last->sve_in_use = test_thread_flag(TIF_SVE);
+ st->cpu = smp_processor_id();
+}
+
+/*
* Load the userland FPSIMD state of 'current' from memory, but only if the
* FPSIMD state already held in the registers is /not/ the most recent FPSIMD
* state of 'current'
@@ -1004,11 +1024,8 @@ void fpsimd_restore_current_state(void)
local_bh_disable();
if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
- struct fpsimd_state *st = &current->thread.fpsimd_state;
-
task_fpsimd_load();
- __this_cpu_write(fpsimd_last_state, st);
- st->cpu = smp_processor_id();
+ fpsimd_bind_to_cpu();
}
local_bh_enable();
@@ -1026,18 +1043,14 @@ void fpsimd_update_current_state(struct fpsimd_state *state)
local_bh_disable();
- current->thread.fpsimd_state = *state;
+ current->thread.fpsimd_state.user_fpsimd = state->user_fpsimd;
if (system_supports_sve() && test_thread_flag(TIF_SVE))
fpsimd_to_sve(current);
task_fpsimd_load();
- if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
- struct fpsimd_state *st = &current->thread.fpsimd_state;
-
- __this_cpu_write(fpsimd_last_state, st);
- st->cpu = smp_processor_id();
- }
+ if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE))
+ fpsimd_bind_to_cpu();
local_bh_enable();
}
@@ -1052,7 +1065,7 @@ void fpsimd_flush_task_state(struct task_struct *t)
static inline void fpsimd_flush_cpu_state(void)
{
- __this_cpu_write(fpsimd_last_state, NULL);
+ __this_cpu_write(fpsimd_last_state.st, NULL);
}
/*
@@ -1065,14 +1078,10 @@ static inline void fpsimd_flush_cpu_state(void)
#ifdef CONFIG_ARM64_SVE
void sve_flush_cpu_state(void)
{
- struct fpsimd_state *const fpstate = __this_cpu_read(fpsimd_last_state);
- struct task_struct *tsk;
-
- if (!fpstate)
- return;
+ struct fpsimd_last_state_struct const *last =
+ this_cpu_ptr(&fpsimd_last_state);
- tsk = container_of(fpstate, struct task_struct, thread.fpsimd_state);
- if (test_tsk_thread_flag(tsk, TIF_SVE))
+ if (last->st && last->sve_in_use)
fpsimd_flush_cpu_state();
}
#endif /* CONFIG_ARM64_SVE */
@@ -1267,7 +1276,7 @@ static inline void fpsimd_pm_init(void) { }
#ifdef CONFIG_HOTPLUG_CPU
static int fpsimd_cpu_dead(unsigned int cpu)
{
- per_cpu(fpsimd_last_state, cpu) = NULL;
+ per_cpu(fpsimd_last_state.st, cpu) = NULL;
return 0;
}
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 67e86a0f57ac..e3cb9fbf96b6 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -750,6 +750,7 @@ __primary_switch:
* to take into account by discarding the current kernel mapping and
* creating a new one.
*/
+ pre_disable_mmu_workaround
msr sctlr_el1, x20 // disable the MMU
isb
bl __create_page_tables // recreate kernel mapping
diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c
index 749f81779420..74bb56f656ef 100644
--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c
@@ -28,6 +28,7 @@
#include <linux/perf_event.h>
#include <linux/ptrace.h>
#include <linux/smp.h>
+#include <linux/uaccess.h>
#include <asm/compat.h>
#include <asm/current.h>
@@ -36,7 +37,6 @@
#include <asm/traps.h>
#include <asm/cputype.h>
#include <asm/system_misc.h>
-#include <asm/uaccess.h>
/* Breakpoint currently in use for each BRP. */
static DEFINE_PER_CPU(struct perf_event *, bp_on_reg[ARM_MAX_BRP]);
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index b2adcce7bc18..6b7dcf4310ac 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -314,6 +314,15 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
clear_tsk_thread_flag(p, TIF_SVE);
p->thread.sve_state = NULL;
+ /*
+ * In case p was allocated the same task_struct pointer as some
+ * other recently-exited task, make sure p is disassociated from
+ * any cpu that may have run that now-exited task recently.
+ * Otherwise we could erroneously skip reloading the FPSIMD
+ * registers for p.
+ */
+ fpsimd_flush_task_state(p);
+
if (likely(!(p->flags & PF_KTHREAD))) {
*childregs = *current_pt_regs();
childregs->regs[0] = 0;
diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S
index ce704a4aeadd..f407e422a720 100644
--- a/arch/arm64/kernel/relocate_kernel.S
+++ b/arch/arm64/kernel/relocate_kernel.S
@@ -45,6 +45,7 @@ ENTRY(arm64_relocate_new_kernel)
mrs x0, sctlr_el2
ldr x1, =SCTLR_ELx_FLAGS
bic x0, x0, x1
+ pre_disable_mmu_workaround
msr sctlr_el2, x0
isb
1:
diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c
index dbadfaf850a7..fa63b28c65e0 100644
--- a/arch/arm64/kvm/debug.c
+++ b/arch/arm64/kvm/debug.c
@@ -221,3 +221,24 @@ void kvm_arm_clear_debug(struct kvm_vcpu *vcpu)
}
}
}
+
+
+/*
+ * After successfully emulating an instruction, we might want to
+ * return to user space with a KVM_EXIT_DEBUG. We can only do this
+ * once the emulation is complete, though, so for userspace emulations
+ * we have to wait until we have re-entered KVM before calling this
+ * helper.
+ *
+ * Return true (and set exit_reason) to return to userspace or false
+ * if no further action is required.
+ */
+bool kvm_arm_handle_step_debug(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
+ run->exit_reason = KVM_EXIT_DEBUG;
+ run->debug.arch.hsr = ESR_ELx_EC_SOFTSTP_LOW << ESR_ELx_EC_SHIFT;
+ return true;
+ }
+ return false;
+}
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index b71247995469..304203fa9e33 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -28,6 +28,7 @@
#include <asm/kvm_emulate.h>
#include <asm/kvm_mmu.h>
#include <asm/kvm_psci.h>
+#include <asm/debug-monitors.h>
#define CREATE_TRACE_POINTS
#include "trace.h"
@@ -187,14 +188,46 @@ static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu)
}
/*
+ * We may be single-stepping an emulated instruction. If the emulation
+ * has been completed in the kernel, we can return to userspace with a
+ * KVM_EXIT_DEBUG, otherwise userspace needs to complete its
+ * emulation first.
+ */
+static int handle_trap_exceptions(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ int handled;
+
+ /*
+ * See ARM ARM B1.14.1: "Hyp traps on instructions
+ * that fail their condition code check"
+ */
+ if (!kvm_condition_valid(vcpu)) {
+ kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
+ handled = 1;
+ } else {
+ exit_handle_fn exit_handler;
+
+ exit_handler = kvm_get_exit_handler(vcpu);
+ handled = exit_handler(vcpu, run);
+ }
+
+ /*
+ * kvm_arm_handle_step_debug() sets the exit_reason on the kvm_run
+ * structure if we need to return to userspace.
+ */
+ if (handled > 0 && kvm_arm_handle_step_debug(vcpu, run))
+ handled = 0;
+
+ return handled;
+}
+
+/*
* Return > 0 to return to guest, < 0 on error, 0 (and set exit_reason) on
* proper exit to userspace.
*/
int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
int exception_index)
{
- exit_handle_fn exit_handler;
-
if (ARM_SERROR_PENDING(exception_index)) {
u8 hsr_ec = ESR_ELx_EC(kvm_vcpu_get_hsr(vcpu));
@@ -220,20 +253,14 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
return 1;
case ARM_EXCEPTION_EL1_SERROR:
kvm_inject_vabt(vcpu);
- return 1;
- case ARM_EXCEPTION_TRAP:
- /*
- * See ARM ARM B1.14.1: "Hyp traps on instructions
- * that fail their condition code check"
- */
- if (!kvm_condition_valid(vcpu)) {
- kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
+ /* We may still need to return for single-step */
+ if (!(*vcpu_cpsr(vcpu) & DBG_SPSR_SS)
+ && kvm_arm_handle_step_debug(vcpu, run))
+ return 0;
+ else
return 1;
- }
-
- exit_handler = kvm_get_exit_handler(vcpu);
-
- return exit_handler(vcpu, run);
+ case ARM_EXCEPTION_TRAP:
+ return handle_trap_exceptions(vcpu, run);
case ARM_EXCEPTION_HYP_GONE:
/*
* EL2 has been reset to the hyp-stub. This happens when a guest
diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S
index 3f9615582377..870828c364c5 100644
--- a/arch/arm64/kvm/hyp-init.S
+++ b/arch/arm64/kvm/hyp-init.S
@@ -151,6 +151,7 @@ reset:
mrs x5, sctlr_el2
ldr x6, =SCTLR_ELx_FLAGS
bic x5, x5, x6 // Clear SCTL_M and etc
+ pre_disable_mmu_workaround
msr sctlr_el2, x5
isb
diff --git a/arch/arm64/kvm/hyp/debug-sr.c b/arch/arm64/kvm/hyp/debug-sr.c
index 321c9c05dd9e..f4363d40e2cd 100644
--- a/arch/arm64/kvm/hyp/debug-sr.c
+++ b/arch/arm64/kvm/hyp/debug-sr.c
@@ -74,6 +74,9 @@ static void __hyp_text __debug_save_spe_nvhe(u64 *pmscr_el1)
{
u64 reg;
+ /* Clear pmscr in case of early return */
+ *pmscr_el1 = 0;
+
/* SPE present on this CPU? */
if (!cpuid_feature_extract_unsigned_field(read_sysreg(id_aa64dfr0_el1),
ID_AA64DFR0_PMSVER_SHIFT))
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 525c01f48867..f7c651f3a8c0 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -22,6 +22,7 @@
#include <asm/kvm_emulate.h>
#include <asm/kvm_hyp.h>
#include <asm/fpsimd.h>
+#include <asm/debug-monitors.h>
static bool __hyp_text __fpsimd_enabled_nvhe(void)
{
@@ -269,7 +270,11 @@ static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu)
return true;
}
-static void __hyp_text __skip_instr(struct kvm_vcpu *vcpu)
+/* Skip an instruction which has been emulated. Returns true if
+ * execution can continue or false if we need to exit hyp mode because
+ * single-step was in effect.
+ */
+static bool __hyp_text __skip_instr(struct kvm_vcpu *vcpu)
{
*vcpu_pc(vcpu) = read_sysreg_el2(elr);
@@ -282,6 +287,14 @@ static void __hyp_text __skip_instr(struct kvm_vcpu *vcpu)
}
write_sysreg_el2(*vcpu_pc(vcpu), elr);
+
+ if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
+ vcpu->arch.fault.esr_el2 =
+ (ESR_ELx_EC_SOFTSTP_LOW << ESR_ELx_EC_SHIFT) | 0x22;
+ return false;
+ } else {
+ return true;
+ }
}
int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu)
@@ -342,13 +355,21 @@ again:
int ret = __vgic_v2_perform_cpuif_access(vcpu);
if (ret == 1) {
- __skip_instr(vcpu);
- goto again;
+ if (__skip_instr(vcpu))
+ goto again;
+ else
+ exit_code = ARM_EXCEPTION_TRAP;
}
if (ret == -1) {
- /* Promote an illegal access to an SError */
- __skip_instr(vcpu);
+ /* Promote an illegal access to an
+ * SError. If we would be returning
+ * due to single-step clear the SS
+ * bit so handle_exit knows what to
+ * do after dealing with the error.
+ */
+ if (!__skip_instr(vcpu))
+ *vcpu_cpsr(vcpu) &= ~DBG_SPSR_SS;
exit_code = ARM_EXCEPTION_EL1_SERROR;
}
@@ -363,8 +384,10 @@ again:
int ret = __vgic_v3_perform_cpuif_access(vcpu);
if (ret == 1) {
- __skip_instr(vcpu);
- goto again;
+ if (__skip_instr(vcpu))
+ goto again;
+ else
+ exit_code = ARM_EXCEPTION_TRAP;
}
/* 0 falls through to be handled out of EL2 */
diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c
index ca74a2aace42..7b60d62ac593 100644
--- a/arch/arm64/mm/dump.c
+++ b/arch/arm64/mm/dump.c
@@ -389,7 +389,7 @@ void ptdump_check_wx(void)
.check_wx = true,
};
- walk_pgd(&st, &init_mm, 0);
+ walk_pgd(&st, &init_mm, VA_START);
note_page(&st, 0, 0, 0);
if (st.wx_pages || st.uxn_pages)
pr_warn("Checked W+X mappings: FAILED, %lu W+X pages found, %lu non-UXN pages found\n",
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 22168cd0dde7..9b7f89df49db 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -574,7 +574,6 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs)
{
struct siginfo info;
const struct fault_info *inf;
- int ret = 0;
inf = esr_to_fault_info(esr);
pr_err("Synchronous External Abort: %s (0x%08x) at 0x%016lx\n",
@@ -589,7 +588,7 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs)
if (interrupts_enabled(regs))
nmi_enter();
- ret = ghes_notify_sea();
+ ghes_notify_sea();
if (interrupts_enabled(regs))
nmi_exit();
@@ -604,7 +603,7 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs)
info.si_addr = (void __user *)addr;
arm64_notify_die("", regs, &info, esr);
- return ret;
+ return 0;
}
static const struct fault_info fault_info[] = {
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 5960bef0170d..00e7b900ca41 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -476,6 +476,8 @@ void __init arm64_memblock_init(void)
reserve_elfcorehdr();
+ high_memory = __va(memblock_end_of_DRAM() - 1) + 1;
+
dma_contiguous_reserve(arm64_dma_phys_limit);
memblock_allow_resize();
@@ -502,7 +504,6 @@ void __init bootmem_init(void)
sparse_init();
zone_sizes_init(min, max);
- high_memory = __va((max << PAGE_SHIFT) - 1) + 1;
memblock_dump_all();
}
diff --git a/arch/blackfin/include/uapi/asm/Kbuild b/arch/blackfin/include/uapi/asm/Kbuild
index aa624b4ab655..2240b38c2915 100644
--- a/arch/blackfin/include/uapi/asm/Kbuild
+++ b/arch/blackfin/include/uapi/asm/Kbuild
@@ -3,6 +3,7 @@ include include/uapi/asm-generic/Kbuild.asm
generic-y += auxvec.h
generic-y += bitsperlong.h
+generic-y += bpf_perf_event.h
generic-y += errno.h
generic-y += ioctl.h
generic-y += ipcbuf.h
diff --git a/arch/c6x/include/uapi/asm/Kbuild b/arch/c6x/include/uapi/asm/Kbuild
index 67ee896a76a7..26644e15d854 100644
--- a/arch/c6x/include/uapi/asm/Kbuild
+++ b/arch/c6x/include/uapi/asm/Kbuild
@@ -3,6 +3,7 @@ include include/uapi/asm-generic/Kbuild.asm
generic-y += auxvec.h
generic-y += bitsperlong.h
+generic-y += bpf_perf_event.h
generic-y += errno.h
generic-y += fcntl.h
generic-y += ioctl.h
diff --git a/arch/cris/include/uapi/asm/Kbuild b/arch/cris/include/uapi/asm/Kbuild
index 3687b54bb18e..3470c6e9c7b9 100644
--- a/arch/cris/include/uapi/asm/Kbuild
+++ b/arch/cris/include/uapi/asm/Kbuild
@@ -3,6 +3,7 @@ include include/uapi/asm-generic/Kbuild.asm
generic-y += auxvec.h
generic-y += bitsperlong.h
+generic-y += bpf_perf_event.h
generic-y += errno.h
generic-y += fcntl.h
generic-y += ioctl.h
diff --git a/arch/frv/include/uapi/asm/Kbuild b/arch/frv/include/uapi/asm/Kbuild
index b15bf6bc0e94..14a2e9af97e9 100644
--- a/arch/frv/include/uapi/asm/Kbuild
+++ b/arch/frv/include/uapi/asm/Kbuild
@@ -1,2 +1,4 @@
# UAPI Header export list
include include/uapi/asm-generic/Kbuild.asm
+
+generic-y += bpf_perf_event.h
diff --git a/arch/h8300/include/uapi/asm/Kbuild b/arch/h8300/include/uapi/asm/Kbuild
index 187aed820e71..2f65f78792cb 100644
--- a/arch/h8300/include/uapi/asm/Kbuild
+++ b/arch/h8300/include/uapi/asm/Kbuild
@@ -2,6 +2,7 @@
include include/uapi/asm-generic/Kbuild.asm
generic-y += auxvec.h
+generic-y += bpf_perf_event.h
generic-y += errno.h
generic-y += fcntl.h
generic-y += ioctl.h
diff --git a/arch/hexagon/include/uapi/asm/Kbuild b/arch/hexagon/include/uapi/asm/Kbuild
index cb5df3aad3a8..41a176dbb53e 100644
--- a/arch/hexagon/include/uapi/asm/Kbuild
+++ b/arch/hexagon/include/uapi/asm/Kbuild
@@ -2,6 +2,7 @@
include include/uapi/asm-generic/Kbuild.asm
generic-y += auxvec.h
+generic-y += bpf_perf_event.h
generic-y += errno.h
generic-y += fcntl.h
generic-y += ioctl.h
diff --git a/arch/ia64/include/uapi/asm/Kbuild b/arch/ia64/include/uapi/asm/Kbuild
index 13a97aa2285f..f5c6967a93bb 100644
--- a/arch/ia64/include/uapi/asm/Kbuild
+++ b/arch/ia64/include/uapi/asm/Kbuild
@@ -1,4 +1,5 @@
# UAPI Header export list
include include/uapi/asm-generic/Kbuild.asm
+generic-y += bpf_perf_event.h
generic-y += kvm_para.h
diff --git a/arch/m32r/include/uapi/asm/Kbuild b/arch/m32r/include/uapi/asm/Kbuild
index 1c44d3b3eba0..451bf6071c6e 100644
--- a/arch/m32r/include/uapi/asm/Kbuild
+++ b/arch/m32r/include/uapi/asm/Kbuild
@@ -1,5 +1,6 @@
# UAPI Header export list
include include/uapi/asm-generic/Kbuild.asm
+generic-y += bpf_perf_event.h
generic-y += kvm_para.h
generic-y += siginfo.h
diff --git a/arch/m68k/configs/stmark2_defconfig b/arch/m68k/configs/stmark2_defconfig
index 55e55dbc2fb6..3d07b1de7eb0 100644
--- a/arch/m68k/configs/stmark2_defconfig
+++ b/arch/m68k/configs/stmark2_defconfig
@@ -5,7 +5,6 @@ CONFIG_SYSVIPC=y
CONFIG_LOG_BUF_SHIFT=14
CONFIG_NAMESPACES=y
CONFIG_BLK_DEV_INITRD=y
-CONFIG_INITRAMFS_SOURCE="../uClinux-dist/romfs"
# CONFIG_RD_BZIP2 is not set
# CONFIG_RD_LZMA is not set
# CONFIG_RD_XZ is not set
diff --git a/arch/m68k/include/uapi/asm/Kbuild b/arch/m68k/include/uapi/asm/Kbuild
index 3717b64a620d..c2e26a44c482 100644
--- a/arch/m68k/include/uapi/asm/Kbuild
+++ b/arch/m68k/include/uapi/asm/Kbuild
@@ -3,6 +3,7 @@ include include/uapi/asm-generic/Kbuild.asm
generic-y += auxvec.h
generic-y += bitsperlong.h
+generic-y += bpf_perf_event.h
generic-y += errno.h
generic-y += ioctl.h
generic-y += ipcbuf.h
diff --git a/arch/m68k/kernel/vmlinux-nommu.lds b/arch/m68k/kernel/vmlinux-nommu.lds
index 3aa571a513b5..cf6edda38971 100644
--- a/arch/m68k/kernel/vmlinux-nommu.lds
+++ b/arch/m68k/kernel/vmlinux-nommu.lds
@@ -45,6 +45,8 @@ SECTIONS {
.text : {
HEAD_TEXT
TEXT_TEXT
+ IRQENTRY_TEXT
+ SOFTIRQENTRY_TEXT
SCHED_TEXT
CPUIDLE_TEXT
LOCK_TEXT
diff --git a/arch/m68k/kernel/vmlinux-std.lds b/arch/m68k/kernel/vmlinux-std.lds
index 89172b8974b9..625a5785804f 100644
--- a/arch/m68k/kernel/vmlinux-std.lds
+++ b/arch/m68k/kernel/vmlinux-std.lds
@@ -16,6 +16,8 @@ SECTIONS
.text : {
HEAD_TEXT
TEXT_TEXT
+ IRQENTRY_TEXT
+ SOFTIRQENTRY_TEXT
SCHED_TEXT
CPUIDLE_TEXT
LOCK_TEXT
diff --git a/arch/m68k/kernel/vmlinux-sun3.lds b/arch/m68k/kernel/vmlinux-sun3.lds
index 293990efc917..9868270b0984 100644
--- a/arch/m68k/kernel/vmlinux-sun3.lds
+++ b/arch/m68k/kernel/vmlinux-sun3.lds
@@ -16,6 +16,8 @@ SECTIONS
.text : {
HEAD_TEXT
TEXT_TEXT
+ IRQENTRY_TEXT
+ SOFTIRQENTRY_TEXT
SCHED_TEXT
CPUIDLE_TEXT
LOCK_TEXT
diff --git a/arch/metag/include/uapi/asm/Kbuild b/arch/metag/include/uapi/asm/Kbuild
index 6ac763d9a3e3..f9eaf07d29f8 100644
--- a/arch/metag/include/uapi/asm/Kbuild
+++ b/arch/metag/include/uapi/asm/Kbuild
@@ -3,6 +3,7 @@ include include/uapi/asm-generic/Kbuild.asm
generic-y += auxvec.h
generic-y += bitsperlong.h
+generic-y += bpf_perf_event.h
generic-y += errno.h
generic-y += fcntl.h
generic-y += ioctl.h
diff --git a/arch/microblaze/include/uapi/asm/Kbuild b/arch/microblaze/include/uapi/asm/Kbuild
index 06609ca36115..2c6a6bffea32 100644
--- a/arch/microblaze/include/uapi/asm/Kbuild
+++ b/arch/microblaze/include/uapi/asm/Kbuild
@@ -2,6 +2,7 @@
include include/uapi/asm-generic/Kbuild.asm
generic-y += bitsperlong.h
+generic-y += bpf_perf_event.h
generic-y += errno.h
generic-y += fcntl.h
generic-y += ioctl.h
diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild
index 7c8aab23bce8..b1f66699677d 100644
--- a/arch/mips/include/asm/Kbuild
+++ b/arch/mips/include/asm/Kbuild
@@ -16,7 +16,6 @@ generic-y += qrwlock.h
generic-y += qspinlock.h
generic-y += sections.h
generic-y += segment.h
-generic-y += serial.h
generic-y += trace_clock.h
generic-y += unaligned.h
generic-y += user.h
diff --git a/arch/mips/include/asm/serial.h b/arch/mips/include/asm/serial.h
new file mode 100644
index 000000000000..1d830c6666c2
--- /dev/null
+++ b/arch/mips/include/asm/serial.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright (C) 2017 MIPS Tech, LLC
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ */
+#ifndef __ASM__SERIAL_H
+#define __ASM__SERIAL_H
+
+#ifdef CONFIG_MIPS_GENERIC
+/*
+ * Generic kernels cannot know a correct value for all platforms at
+ * compile time. Set it to 0 to prevent 8250_early using it
+ */
+#define BASE_BAUD 0
+#else
+#include <asm-generic/serial.h>
+#endif
+
+#endif /* __ASM__SERIAL_H */
diff --git a/arch/mips/include/uapi/asm/Kbuild b/arch/mips/include/uapi/asm/Kbuild
index a0266feba9e6..7a4becd8963a 100644
--- a/arch/mips/include/uapi/asm/Kbuild
+++ b/arch/mips/include/uapi/asm/Kbuild
@@ -1,4 +1,5 @@
# UAPI Header export list
include include/uapi/asm-generic/Kbuild.asm
+generic-y += bpf_perf_event.h
generic-y += ipcbuf.h
diff --git a/arch/mn10300/include/uapi/asm/Kbuild b/arch/mn10300/include/uapi/asm/Kbuild
index c94ee54210bc..81271d3af47c 100644
--- a/arch/mn10300/include/uapi/asm/Kbuild
+++ b/arch/mn10300/include/uapi/asm/Kbuild
@@ -1,4 +1,5 @@
# UAPI Header export list
include include/uapi/asm-generic/Kbuild.asm
+generic-y += bpf_perf_event.h
generic-y += siginfo.h
diff --git a/arch/nios2/include/uapi/asm/Kbuild b/arch/nios2/include/uapi/asm/Kbuild
index ffca24da7647..13a3d77b4d7b 100644
--- a/arch/nios2/include/uapi/asm/Kbuild
+++ b/arch/nios2/include/uapi/asm/Kbuild
@@ -3,6 +3,7 @@ include include/uapi/asm-generic/Kbuild.asm
generic-y += auxvec.h
generic-y += bitsperlong.h
+generic-y += bpf_perf_event.h
generic-y += errno.h
generic-y += fcntl.h
generic-y += ioctl.h
diff --git a/arch/openrisc/include/uapi/asm/Kbuild b/arch/openrisc/include/uapi/asm/Kbuild
index 62286dbeb904..130c16ccba0a 100644
--- a/arch/openrisc/include/uapi/asm/Kbuild
+++ b/arch/openrisc/include/uapi/asm/Kbuild
@@ -3,6 +3,7 @@ include include/uapi/asm-generic/Kbuild.asm
generic-y += auxvec.h
generic-y += bitsperlong.h
+generic-y += bpf_perf_event.h
generic-y += errno.h
generic-y += fcntl.h
generic-y += ioctl.h
diff --git a/arch/parisc/boot/compressed/misc.c b/arch/parisc/boot/compressed/misc.c
index 9345b44b86f0..f57118e1f6b4 100644
--- a/arch/parisc/boot/compressed/misc.c
+++ b/arch/parisc/boot/compressed/misc.c
@@ -123,8 +123,8 @@ int puts(const char *s)
while ((nuline = strchr(s, '\n')) != NULL) {
if (nuline != s)
pdc_iodc_print(s, nuline - s);
- pdc_iodc_print("\r\n", 2);
- s = nuline + 1;
+ pdc_iodc_print("\r\n", 2);
+ s = nuline + 1;
}
if (*s != '\0')
pdc_iodc_print(s, strlen(s));
diff --git a/arch/parisc/include/asm/thread_info.h b/arch/parisc/include/asm/thread_info.h
index c980a02a52bc..598c8d60fa5e 100644
--- a/arch/parisc/include/asm/thread_info.h
+++ b/arch/parisc/include/asm/thread_info.h
@@ -35,7 +35,12 @@ struct thread_info {
/* thread information allocation */
+#ifdef CONFIG_IRQSTACKS
+#define THREAD_SIZE_ORDER 2 /* PA-RISC requires at least 16k stack */
+#else
#define THREAD_SIZE_ORDER 3 /* PA-RISC requires at least 32k stack */
+#endif
+
/* Be sure to hunt all references to this down when you change the size of
* the kernel stack */
#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
diff --git a/arch/parisc/include/uapi/asm/Kbuild b/arch/parisc/include/uapi/asm/Kbuild
index 196d2a4efb31..286ef5a5904b 100644
--- a/arch/parisc/include/uapi/asm/Kbuild
+++ b/arch/parisc/include/uapi/asm/Kbuild
@@ -2,6 +2,7 @@
include include/uapi/asm-generic/Kbuild.asm
generic-y += auxvec.h
+generic-y += bpf_perf_event.h
generic-y += kvm_para.h
generic-y += param.h
generic-y += poll.h
diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S
index a4fd296c958e..f3cecf5117cf 100644
--- a/arch/parisc/kernel/entry.S
+++ b/arch/parisc/kernel/entry.S
@@ -878,9 +878,6 @@ ENTRY_CFI(syscall_exit_rfi)
STREG %r19,PT_SR7(%r16)
intr_return:
- /* NOTE: Need to enable interrupts incase we schedule. */
- ssm PSW_SM_I, %r0
-
/* check for reschedule */
mfctl %cr30,%r1
LDREG TI_FLAGS(%r1),%r19 /* sched.h: TIF_NEED_RESCHED */
@@ -907,6 +904,11 @@ intr_check_sig:
LDREG PT_IASQ1(%r16), %r20
cmpib,COND(=),n 0,%r20,intr_restore /* backward */
+ /* NOTE: We need to enable interrupts if we have to deliver
+ * signals. We used to do this earlier but it caused kernel
+ * stack overflows. */
+ ssm PSW_SM_I, %r0
+
copy %r0, %r25 /* long in_syscall = 0 */
#ifdef CONFIG_64BIT
ldo -16(%r30),%r29 /* Reference param save area */
@@ -958,6 +960,10 @@ intr_do_resched:
cmpib,COND(=) 0, %r20, intr_do_preempt
nop
+ /* NOTE: We need to enable interrupts if we schedule. We used
+ * to do this earlier but it caused kernel stack overflows. */
+ ssm PSW_SM_I, %r0
+
#ifdef CONFIG_64BIT
ldo -16(%r30),%r29 /* Reference param save area */
#endif
diff --git a/arch/parisc/kernel/hpmc.S b/arch/parisc/kernel/hpmc.S
index e3a8e5e4d5de..8d072c44f300 100644
--- a/arch/parisc/kernel/hpmc.S
+++ b/arch/parisc/kernel/hpmc.S
@@ -305,6 +305,7 @@ ENDPROC_CFI(os_hpmc)
__INITRODATA
+ .align 4
.export os_hpmc_size
os_hpmc_size:
.word .os_hpmc_end-.os_hpmc
diff --git a/arch/parisc/kernel/unwind.c b/arch/parisc/kernel/unwind.c
index 5a657986ebbf..143f90e2f9f3 100644
--- a/arch/parisc/kernel/unwind.c
+++ b/arch/parisc/kernel/unwind.c
@@ -15,7 +15,6 @@
#include <linux/slab.h>
#include <linux/kallsyms.h>
#include <linux/sort.h>
-#include <linux/sched.h>
#include <linux/uaccess.h>
#include <asm/assembly.h>
diff --git a/arch/parisc/lib/delay.c b/arch/parisc/lib/delay.c
index 7eab4bb8abe6..66e506520505 100644
--- a/arch/parisc/lib/delay.c
+++ b/arch/parisc/lib/delay.c
@@ -16,9 +16,7 @@
#include <linux/preempt.h>
#include <linux/init.h>
-#include <asm/processor.h>
#include <asm/delay.h>
-
#include <asm/special_insns.h> /* for mfctl() */
#include <asm/processor.h> /* for boot_cpu_data */
diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index 73b92017b6d7..cd2fc1cc1cc7 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -76,6 +76,7 @@ struct machdep_calls {
void __noreturn (*restart)(char *cmd);
void __noreturn (*halt)(void);
+ void (*panic)(char *str);
void (*cpu_die)(void);
long (*time_init)(void); /* Optional, may be NULL */
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index 6177d43f0ce8..e2a2b8400490 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -160,9 +160,10 @@ static inline void enter_lazy_tlb(struct mm_struct *mm,
#endif
}
-static inline void arch_dup_mmap(struct mm_struct *oldmm,
- struct mm_struct *mm)
+static inline int arch_dup_mmap(struct mm_struct *oldmm,
+ struct mm_struct *mm)
{
+ return 0;
}
#ifndef CONFIG_PPC_BOOK3S_64
diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h
index 257d23dbf55d..cf00ec26303a 100644
--- a/arch/powerpc/include/asm/setup.h
+++ b/arch/powerpc/include/asm/setup.h
@@ -24,6 +24,7 @@ extern void reloc_got2(unsigned long);
void check_for_initrd(void);
void initmem_init(void);
+void setup_panic(void);
#define ARCH_PANIC_TIMEOUT 180
#ifdef CONFIG_PPC_PSERIES
diff --git a/arch/powerpc/include/uapi/asm/Kbuild b/arch/powerpc/include/uapi/asm/Kbuild
index 0d960ef78a9a..1a6ed5919ffd 100644
--- a/arch/powerpc/include/uapi/asm/Kbuild
+++ b/arch/powerpc/include/uapi/asm/Kbuild
@@ -1,6 +1,7 @@
# UAPI Header export list
include include/uapi/asm-generic/Kbuild.asm
+generic-y += bpf_perf_event.h
generic-y += param.h
generic-y += poll.h
generic-y += resource.h
diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S
index 610955fe8b81..679bbe714e85 100644
--- a/arch/powerpc/kernel/cpu_setup_power.S
+++ b/arch/powerpc/kernel/cpu_setup_power.S
@@ -102,6 +102,7 @@ _GLOBAL(__setup_cpu_power9)
li r0,0
mtspr SPRN_PSSCR,r0
mtspr SPRN_LPID,r0
+ mtspr SPRN_PID,r0
mfspr r3,SPRN_LPCR
LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE | LPCR_HEIC)
or r3, r3, r4
@@ -126,6 +127,7 @@ _GLOBAL(__restore_cpu_power9)
li r0,0
mtspr SPRN_PSSCR,r0
mtspr SPRN_LPID,r0
+ mtspr SPRN_PID,r0
mfspr r3,SPRN_LPCR
LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE | LPCR_HEIC)
or r3, r3, r4
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index 04ea5c04fd24..3c2c2688918f 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -1462,25 +1462,6 @@ static void fadump_init_files(void)
return;
}
-static int fadump_panic_event(struct notifier_block *this,
- unsigned long event, void *ptr)
-{
- /*
- * If firmware-assisted dump has been registered then trigger
- * firmware-assisted dump and let firmware handle everything
- * else. If this returns, then fadump was not registered, so
- * go through the rest of the panic path.
- */
- crash_fadump(NULL, ptr);
-
- return NOTIFY_DONE;
-}
-
-static struct notifier_block fadump_panic_block = {
- .notifier_call = fadump_panic_event,
- .priority = INT_MIN /* may not return; must be done last */
-};
-
/*
* Prepare for firmware-assisted dump.
*/
@@ -1513,9 +1494,6 @@ int __init setup_fadump(void)
init_fadump_mem_struct(&fdm, fw_dump.reserve_dump_area_start);
fadump_init_files();
- atomic_notifier_chain_register(&panic_notifier_list,
- &fadump_panic_block);
-
return 1;
}
subsys_initcall(setup_fadump);
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 5acb5a176dbe..72be0c32e902 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1403,7 +1403,7 @@ void show_regs(struct pt_regs * regs)
printk("NIP: "REG" LR: "REG" CTR: "REG"\n",
regs->nip, regs->link, regs->ctr);
- printk("REGS: %p TRAP: %04lx %s (%s)\n",
+ printk("REGS: %px TRAP: %04lx %s (%s)\n",
regs, regs->trap, print_tainted(), init_utsname()->release);
printk("MSR: "REG" ", regs->msr);
print_msr_bits(regs->msr);
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 2075322cd225..9d213542a48b 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -704,6 +704,30 @@ int check_legacy_ioport(unsigned long base_port)
}
EXPORT_SYMBOL(check_legacy_ioport);
+static int ppc_panic_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ /*
+ * If firmware-assisted dump has been registered then trigger
+ * firmware-assisted dump and let firmware handle everything else.
+ */
+ crash_fadump(NULL, ptr);
+ ppc_md.panic(ptr); /* May not return */
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block ppc_panic_block = {
+ .notifier_call = ppc_panic_event,
+ .priority = INT_MIN /* may not return; must be done last */
+};
+
+void __init setup_panic(void)
+{
+ if (!ppc_md.panic)
+ return;
+ atomic_notifier_chain_register(&panic_notifier_list, &ppc_panic_block);
+}
+
#ifdef CONFIG_CHECK_CACHE_COHERENCY
/*
* For platforms that have configurable cache-coherency. This function
@@ -848,6 +872,9 @@ void __init setup_arch(char **cmdline_p)
/* Probe the machine type, establish ppc_md. */
probe_machine();
+ /* Setup panic notifier if requested by the platform. */
+ setup_panic();
+
/*
* Configure ppc_md.power_save (ppc32 only, 64-bit machines do
* it from their respective probe() function.
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index bf457843e032..0d750d274c4e 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -725,7 +725,8 @@ u64 kvmppc_xive_get_icp(struct kvm_vcpu *vcpu)
/* Return the per-cpu state for state saving/migration */
return (u64)xc->cppr << KVM_REG_PPC_ICP_CPPR_SHIFT |
- (u64)xc->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT;
+ (u64)xc->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT |
+ (u64)0xff << KVM_REG_PPC_ICP_PPRI_SHIFT;
}
int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval)
@@ -1558,7 +1559,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr)
/*
* Restore P and Q. If the interrupt was pending, we
- * force both P and Q, which will trigger a resend.
+ * force Q and !P, which will trigger a resend.
*
* That means that a guest that had both an interrupt
* pending (queued) and Q set will restore with only
@@ -1566,7 +1567,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr)
* is perfectly fine as coalescing interrupts that haven't
* been presented yet is always allowed.
*/
- if (val & KVM_XICS_PRESENTED || val & KVM_XICS_PENDING)
+ if (val & KVM_XICS_PRESENTED && !(val & KVM_XICS_PENDING))
state->old_p = true;
if (val & KVM_XICS_QUEUED || val & KVM_XICS_PENDING)
state->old_q = true;
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index 46d74e81aff1..d183b4801bdb 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -763,7 +763,8 @@ emit_clear:
func = (u8 *) __bpf_call_base + imm;
/* Save skb pointer if we need to re-cache skb data */
- if (bpf_helper_changes_pkt_data(func))
+ if ((ctx->seen & SEEN_SKB) &&
+ bpf_helper_changes_pkt_data(func))
PPC_BPF_STL(3, 1, bpf_jit_stack_local(ctx));
bpf_jit_emit_func_call(image, ctx, (u64)func);
@@ -772,7 +773,8 @@ emit_clear:
PPC_MR(b2p[BPF_REG_0], 3);
/* refresh skb cache */
- if (bpf_helper_changes_pkt_data(func)) {
+ if ((ctx->seen & SEEN_SKB) &&
+ bpf_helper_changes_pkt_data(func)) {
/* reload skb pointer to r3 */
PPC_BPF_LL(3, 1, bpf_jit_stack_local(ctx));
bpf_jit_emit_skb_loads(image, ctx);
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 9e3da168d54c..fce545774d50 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -410,8 +410,12 @@ static __u64 power_pmu_bhrb_to(u64 addr)
int ret;
__u64 target;
- if (is_kernel_addr(addr))
- return branch_target((unsigned int *)addr);
+ if (is_kernel_addr(addr)) {
+ if (probe_kernel_read(&instr, (void *)addr, sizeof(instr)))
+ return 0;
+
+ return branch_target(&instr);
+ }
/* Userspace: need copy instruction here then translate it */
pagefault_disable();
@@ -1415,7 +1419,7 @@ static int collect_events(struct perf_event *group, int max_count,
int n = 0;
struct perf_event *event;
- if (!is_software_event(group)) {
+ if (group->pmu->task_ctx_nr == perf_hw_context) {
if (n >= max_count)
return -1;
ctrs[n] = group;
@@ -1423,7 +1427,7 @@ static int collect_events(struct perf_event *group, int max_count,
events[n++] = group->hw.config;
}
list_for_each_entry(event, &group->sibling_list, group_entry) {
- if (!is_software_event(event) &&
+ if (event->pmu->task_ctx_nr == perf_hw_context &&
event->state != PERF_EVENT_STATE_OFF) {
if (n >= max_count)
return -1;
diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index 0ead3cd73caa..be4e7f84f70a 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -310,6 +310,19 @@ static int ppc_nest_imc_cpu_offline(unsigned int cpu)
return 0;
/*
+ * Check whether nest_imc is registered. We could end up here if the
+ * cpuhotplug callback registration fails. i.e, callback invokes the
+ * offline path for all successfully registered nodes. At this stage,
+ * nest_imc pmu will not be registered and we should return here.
+ *
+ * We return with a zero since this is not an offline failure. And
+ * cpuhp_setup_state() returns the actual failure reason to the caller,
+ * which in turn will call the cleanup routine.
+ */
+ if (!nest_pmus)
+ return 0;
+
+ /*
* Now that this cpu is one of the designated,
* find a next cpu a) which is online and b) in same chip.
*/
@@ -1171,6 +1184,7 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr)
if (nest_pmus == 1) {
cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE);
kfree(nest_imc_refc);
+ kfree(per_nest_pmu_arr);
}
if (nest_pmus > 0)
@@ -1195,7 +1209,6 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr)
kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs);
kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]);
kfree(pmu_ptr);
- kfree(per_nest_pmu_arr);
return;
}
@@ -1309,6 +1322,8 @@ int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id
ret = nest_pmu_cpumask_init();
if (ret) {
mutex_unlock(&nest_init_lock);
+ kfree(nest_imc_refc);
+ kfree(per_nest_pmu_arr);
goto err_free;
}
}
diff --git a/arch/powerpc/platforms/ps3/setup.c b/arch/powerpc/platforms/ps3/setup.c
index 9dabea6e1443..6244bc849469 100644
--- a/arch/powerpc/platforms/ps3/setup.c
+++ b/arch/powerpc/platforms/ps3/setup.c
@@ -104,6 +104,20 @@ static void __noreturn ps3_halt(void)
ps3_sys_manager_halt(); /* never returns */
}
+static void ps3_panic(char *str)
+{
+ DBG("%s:%d %s\n", __func__, __LINE__, str);
+
+ smp_send_stop();
+ printk("\n");
+ printk(" System does not reboot automatically.\n");
+ printk(" Please press POWER button.\n");
+ printk("\n");
+
+ while(1)
+ lv1_pause(1);
+}
+
#if defined(CONFIG_FB_PS3) || defined(CONFIG_FB_PS3_MODULE) || \
defined(CONFIG_PS3_FLASH) || defined(CONFIG_PS3_FLASH_MODULE)
static void __init prealloc(struct ps3_prealloc *p)
@@ -255,6 +269,7 @@ define_machine(ps3) {
.probe = ps3_probe,
.setup_arch = ps3_setup_arch,
.init_IRQ = ps3_init_IRQ,
+ .panic = ps3_panic,
.get_boot_time = ps3_get_boot_time,
.set_dabr = ps3_set_dabr,
.calibrate_decr = ps3_calibrate_decr,
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 5f1beb8367ac..a8531e012658 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -726,6 +726,7 @@ define_machine(pseries) {
.pcibios_fixup = pSeries_final_fixup,
.restart = rtas_restart,
.halt = rtas_halt,
+ .panic = rtas_os_term,
.get_boot_time = rtas_get_boot_time,
.get_rtc_time = rtas_get_rtc_time,
.set_rtc_time = rtas_set_rtc_time,
diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c
index 44cbf4c12ea1..df95102e732c 100644
--- a/arch/powerpc/sysdev/fsl_msi.c
+++ b/arch/powerpc/sysdev/fsl_msi.c
@@ -354,6 +354,7 @@ static int fsl_of_msi_remove(struct platform_device *ofdev)
}
static struct lock_class_key fsl_msi_irq_class;
+static struct lock_class_key fsl_msi_irq_request_class;
static int fsl_msi_setup_hwirq(struct fsl_msi *msi, struct platform_device *dev,
int offset, int irq_index)
@@ -373,7 +374,8 @@ static int fsl_msi_setup_hwirq(struct fsl_msi *msi, struct platform_device *dev,
dev_err(&dev->dev, "No memory for MSI cascade data\n");
return -ENOMEM;
}
- irq_set_lockdep_class(virt_msir, &fsl_msi_irq_class);
+ irq_set_lockdep_class(virt_msir, &fsl_msi_irq_class,
+ &fsl_msi_irq_request_class);
cascade_data->index = offset;
cascade_data->msi_data = msi;
cascade_data->virq = virt_msir;
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 1b2d8cb49abb..cab24f549e7c 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -1590,7 +1590,7 @@ static void print_bug_trap(struct pt_regs *regs)
printf("kernel BUG at %s:%u!\n",
bug->file, bug->line);
#else
- printf("kernel BUG at %p!\n", (void *)bug->bug_addr);
+ printf("kernel BUG at %px!\n", (void *)bug->bug_addr);
#endif
#endif /* CONFIG_BUG */
}
@@ -2329,7 +2329,7 @@ static void dump_one_paca(int cpu)
p = &paca[cpu];
- printf("paca for cpu 0x%x @ %p:\n", cpu, p);
+ printf("paca for cpu 0x%x @ %px:\n", cpu, p);
printf(" %-*s = %s\n", 20, "possible", cpu_possible(cpu) ? "yes" : "no");
printf(" %-*s = %s\n", 20, "present", cpu_present(cpu) ? "yes" : "no");
@@ -2945,7 +2945,7 @@ static void show_task(struct task_struct *tsk)
(tsk->exit_state & EXIT_DEAD) ? 'E' :
(tsk->state & TASK_INTERRUPTIBLE) ? 'S' : '?';
- printf("%p %016lx %6d %6d %c %2d %s\n", tsk,
+ printf("%px %016lx %6d %6d %c %2d %s\n", tsk,
tsk->thread.ksp,
tsk->pid, tsk->parent->pid,
state, task_thread_info(tsk)->cpu,
@@ -2988,7 +2988,7 @@ static void show_pte(unsigned long addr)
if (setjmp(bus_error_jmp) != 0) {
catch_memory_errors = 0;
- printf("*** Error dumping pte for task %p\n", tsk);
+ printf("*** Error dumping pte for task %px\n", tsk);
return;
}
@@ -3074,7 +3074,7 @@ static void show_tasks(void)
if (setjmp(bus_error_jmp) != 0) {
catch_memory_errors = 0;
- printf("*** Error dumping task %p\n", tsk);
+ printf("*** Error dumping task %px\n", tsk);
return;
}
diff --git a/arch/riscv/include/asm/barrier.h b/arch/riscv/include/asm/barrier.h
index 773c4e039cd7..c0319cbf1eec 100644
--- a/arch/riscv/include/asm/barrier.h
+++ b/arch/riscv/include/asm/barrier.h
@@ -38,6 +38,25 @@
#define smp_rmb() RISCV_FENCE(r,r)
#define smp_wmb() RISCV_FENCE(w,w)
+/*
+ * This is a very specific barrier: it's currently only used in two places in
+ * the kernel, both in the scheduler. See include/linux/spinlock.h for the two
+ * orderings it guarantees, but the "critical section is RCsc" guarantee
+ * mandates a barrier on RISC-V. The sequence looks like:
+ *
+ * lr.aq lock
+ * sc lock <= LOCKED
+ * smp_mb__after_spinlock()
+ * // critical section
+ * lr lock
+ * sc.rl lock <= UNLOCKED
+ *
+ * The AQ/RL pair provides a RCpc critical section, but there's not really any
+ * way we can take advantage of that here because the ordering is only enforced
+ * on that one lock. Thus, we're just doing a full fence.
+ */
+#define smp_mb__after_spinlock() RISCV_FENCE(rw,rw)
+
#include <asm-generic/barrier.h>
#endif /* __ASSEMBLY__ */
diff --git a/arch/riscv/include/uapi/asm/Kbuild b/arch/riscv/include/uapi/asm/Kbuild
index 5ded96b06352..7e91f4850475 100644
--- a/arch/riscv/include/uapi/asm/Kbuild
+++ b/arch/riscv/include/uapi/asm/Kbuild
@@ -3,6 +3,7 @@ include include/uapi/asm-generic/Kbuild.asm
generic-y += setup.h
generic-y += unistd.h
+generic-y += bpf_perf_event.h
generic-y += errno.h
generic-y += fcntl.h
generic-y += ioctl.h
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index 8fbb6749910d..cb7b0c63014e 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -38,10 +38,6 @@
#include <asm/tlbflush.h>
#include <asm/thread_info.h>
-#ifdef CONFIG_HVC_RISCV_SBI
-#include <asm/hvc_riscv_sbi.h>
-#endif
-
#ifdef CONFIG_DUMMY_CONSOLE
struct screen_info screen_info = {
.orig_video_lines = 30,
@@ -212,13 +208,6 @@ static void __init setup_bootmem(void)
void __init setup_arch(char **cmdline_p)
{
-#if defined(CONFIG_HVC_RISCV_SBI)
- if (likely(early_console == NULL)) {
- early_console = &riscv_sbi_early_console_dev;
- register_console(early_console);
- }
-#endif
-
#ifdef CONFIG_CMDLINE_BOOL
#ifdef CONFIG_CMDLINE_OVERRIDE
strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c
index a2ae936a093e..79c78668258e 100644
--- a/arch/riscv/kernel/sys_riscv.c
+++ b/arch/riscv/kernel/sys_riscv.c
@@ -70,7 +70,7 @@ SYSCALL_DEFINE3(riscv_flush_icache, uintptr_t, start, uintptr_t, end,
bool local = (flags & SYS_RISCV_FLUSH_ICACHE_LOCAL) != 0;
/* Check the reserved flags. */
- if (unlikely(flags & !SYS_RISCV_FLUSH_ICACHE_ALL))
+ if (unlikely(flags & ~SYS_RISCV_FLUSH_ICACHE_ALL))
return -EINVAL;
flush_icache_mm(mm, local);
diff --git a/arch/s390/Kbuild b/arch/s390/Kbuild
index eae2c64cf69d..9fdff3fe1a42 100644
--- a/arch/s390/Kbuild
+++ b/arch/s390/Kbuild
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
obj-y += kernel/
obj-y += mm/
obj-$(CONFIG_KVM) += kvm/
diff --git a/arch/s390/appldata/Makefile b/arch/s390/appldata/Makefile
index 99f1cf071304..b06def4a4f2f 100644
--- a/arch/s390/appldata/Makefile
+++ b/arch/s390/appldata/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for the Linux - z/VM Monitor Stream.
#
diff --git a/arch/s390/boot/compressed/vmlinux.scr b/arch/s390/boot/compressed/vmlinux.scr
index f02382ae5c48..42a242597f34 100644
--- a/arch/s390/boot/compressed/vmlinux.scr
+++ b/arch/s390/boot/compressed/vmlinux.scr
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
SECTIONS
{
.rodata.compressed : {
diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c
index c7de53d8da75..a00c17f761c1 100644
--- a/arch/s390/crypto/sha1_s390.c
+++ b/arch/s390/crypto/sha1_s390.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0+
/*
* Cryptographic API.
*
@@ -16,12 +17,6 @@
* Copyright (c) Alan Smithee.
* Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
* Copyright (c) Jean-Francois Dive <jef@linuxbe.org>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
*/
#include <crypto/internal/hash.h>
#include <linux/init.h>
diff --git a/arch/s390/hypfs/Makefile b/arch/s390/hypfs/Makefile
index 2ee25ba252d6..06f601509ce9 100644
--- a/arch/s390/hypfs/Makefile
+++ b/arch/s390/hypfs/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for the linux hypfs filesystem routines.
#
diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild
index 41c211a4d8b1..048450869328 100644
--- a/arch/s390/include/asm/Kbuild
+++ b/arch/s390/include/asm/Kbuild
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
generic-y += asm-offsets.h
generic-y += cacheflush.h
generic-y += clkdev.h
diff --git a/arch/s390/include/asm/alternative.h b/arch/s390/include/asm/alternative.h
index a72002056b54..c2cf7bcdef9b 100644
--- a/arch/s390/include/asm/alternative.h
+++ b/arch/s390/include/asm/alternative.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_S390_ALTERNATIVE_H
#define _ASM_S390_ALTERNATIVE_H
diff --git a/arch/s390/include/asm/ap.h b/arch/s390/include/asm/ap.h
index c02f4aba88a6..cfce6835b109 100644
--- a/arch/s390/include/asm/ap.h
+++ b/arch/s390/include/asm/ap.h
@@ -1,12 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Adjunct processor (AP) interfaces
*
* Copyright IBM Corp. 2017
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
- *
* Author(s): Tony Krowiak <akrowia@linux.vnet.ibm.com>
* Martin Schwidefsky <schwidefsky@de.ibm.com>
* Harald Freudenberger <freude@de.ibm.com>
diff --git a/arch/s390/include/asm/bugs.h b/arch/s390/include/asm/bugs.h
index 0f5bd894f4dc..aa42a179be33 100644
--- a/arch/s390/include/asm/bugs.h
+++ b/arch/s390/include/asm/bugs.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* S390 version
* Copyright IBM Corp. 1999
diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h
index d6c9d1e0dc2d..b9c0e361748b 100644
--- a/arch/s390/include/asm/perf_event.h
+++ b/arch/s390/include/asm/perf_event.h
@@ -40,6 +40,7 @@ struct pt_regs;
extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
extern unsigned long perf_misc_flags(struct pt_regs *regs);
#define perf_misc_flags(regs) perf_misc_flags(regs)
+#define perf_arch_bpf_user_pt_regs(regs) &regs->user_regs
/* Perf pt_regs extension for sample-data-entry indicators */
struct perf_sf_sde_regs {
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 57d7bc92e0b8..0a6b0286c32e 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -1264,12 +1264,6 @@ static inline pud_t pud_mkwrite(pud_t pud)
return pud;
}
-#define pud_write pud_write
-static inline int pud_write(pud_t pud)
-{
- return (pud_val(pud) & _REGION3_ENTRY_WRITE) != 0;
-}
-
static inline pud_t pud_mkclean(pud_t pud)
{
if (pud_large(pud)) {
diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h
index a3788dafc0e1..6f70d81c40f2 100644
--- a/arch/s390/include/asm/ptrace.h
+++ b/arch/s390/include/asm/ptrace.h
@@ -74,9 +74,14 @@ enum {
*/
struct pt_regs
{
- unsigned long args[1];
- psw_t psw;
- unsigned long gprs[NUM_GPRS];
+ union {
+ user_pt_regs user_regs;
+ struct {
+ unsigned long args[1];
+ psw_t psw;
+ unsigned long gprs[NUM_GPRS];
+ };
+ };
unsigned long orig_gpr2;
unsigned int int_code;
unsigned int int_parm;
diff --git a/arch/s390/include/asm/segment.h b/arch/s390/include/asm/segment.h
index 8bfce3475b1c..97a0582b8d0f 100644
--- a/arch/s390/include/asm/segment.h
+++ b/arch/s390/include/asm/segment.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_SEGMENT_H
#define _ASM_SEGMENT_H
diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h
index ec7b476c1ac5..c61b2cc1a8a8 100644
--- a/arch/s390/include/asm/switch_to.h
+++ b/arch/s390/include/asm/switch_to.h
@@ -30,21 +30,20 @@ static inline void restore_access_regs(unsigned int *acrs)
asm volatile("lam 0,15,%0" : : "Q" (*(acrstype *)acrs));
}
-#define switch_to(prev,next,last) do { \
- if (prev->mm) { \
- save_fpu_regs(); \
- save_access_regs(&prev->thread.acrs[0]); \
- save_ri_cb(prev->thread.ri_cb); \
- save_gs_cb(prev->thread.gs_cb); \
- } \
+#define switch_to(prev, next, last) do { \
+ /* save_fpu_regs() sets the CIF_FPU flag, which enforces \
+ * a restore of the floating point / vector registers as \
+ * soon as the next task returns to user space \
+ */ \
+ save_fpu_regs(); \
+ save_access_regs(&prev->thread.acrs[0]); \
+ save_ri_cb(prev->thread.ri_cb); \
+ save_gs_cb(prev->thread.gs_cb); \
update_cr_regs(next); \
- if (next->mm) { \
- set_cpu_flag(CIF_FPU); \
- restore_access_regs(&next->thread.acrs[0]); \
- restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb); \
- restore_gs_cb(next->thread.gs_cb); \
- } \
- prev = __switch_to(prev,next); \
+ restore_access_regs(&next->thread.acrs[0]); \
+ restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb); \
+ restore_gs_cb(next->thread.gs_cb); \
+ prev = __switch_to(prev, next); \
} while (0)
#endif /* __ASM_SWITCH_TO_H */
diff --git a/arch/s390/include/asm/vga.h b/arch/s390/include/asm/vga.h
index d375526c261f..605dc46bac5e 100644
--- a/arch/s390/include/asm/vga.h
+++ b/arch/s390/include/asm/vga.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_S390_VGA_H
#define _ASM_S390_VGA_H
diff --git a/arch/s390/include/uapi/asm/Kbuild b/arch/s390/include/uapi/asm/Kbuild
index 098f28778a13..92b7c9b3e641 100644
--- a/arch/s390/include/uapi/asm/Kbuild
+++ b/arch/s390/include/uapi/asm/Kbuild
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
# UAPI Header export list
include include/uapi/asm-generic/Kbuild.asm
diff --git a/arch/s390/include/uapi/asm/bpf_perf_event.h b/arch/s390/include/uapi/asm/bpf_perf_event.h
new file mode 100644
index 000000000000..cefe7c7cd4f6
--- /dev/null
+++ b/arch/s390/include/uapi/asm/bpf_perf_event.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _UAPI__ASM_BPF_PERF_EVENT_H__
+#define _UAPI__ASM_BPF_PERF_EVENT_H__
+
+#include <asm/ptrace.h>
+
+typedef user_pt_regs bpf_user_pt_regs_t;
+
+#endif /* _UAPI__ASM_BPF_PERF_EVENT_H__ */
diff --git a/arch/s390/include/uapi/asm/perf_regs.h b/arch/s390/include/uapi/asm/perf_regs.h
index 7c8564f98205..d17dd9e5d516 100644
--- a/arch/s390/include/uapi/asm/perf_regs.h
+++ b/arch/s390/include/uapi/asm/perf_regs.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#ifndef _ASM_S390_PERF_REGS_H
#define _ASM_S390_PERF_REGS_H
diff --git a/arch/s390/include/uapi/asm/ptrace.h b/arch/s390/include/uapi/asm/ptrace.h
index 0d23c8ff2900..543dd70e12c8 100644
--- a/arch/s390/include/uapi/asm/ptrace.h
+++ b/arch/s390/include/uapi/asm/ptrace.h
@@ -162,7 +162,7 @@
#define GPR_SIZE 8
#define CR_SIZE 8
-#define STACK_FRAME_OVERHEAD 160 /* size of minimum stack frame */
+#define STACK_FRAME_OVERHEAD 160 /* size of minimum stack frame */
#endif /* __s390x__ */
@@ -179,17 +179,16 @@
#define ACR_SIZE 4
-#define PTRACE_OLDSETOPTIONS 21
+#define PTRACE_OLDSETOPTIONS 21
#ifndef __ASSEMBLY__
#include <linux/stddef.h>
#include <linux/types.h>
-typedef union
-{
- float f;
- double d;
- __u64 ui;
+typedef union {
+ float f;
+ double d;
+ __u64 ui;
struct
{
__u32 hi;
@@ -197,23 +196,21 @@ typedef union
} fp;
} freg_t;
-typedef struct
-{
- __u32 fpc;
+typedef struct {
+ __u32 fpc;
__u32 pad;
- freg_t fprs[NUM_FPRS];
+ freg_t fprs[NUM_FPRS];
} s390_fp_regs;
-#define FPC_EXCEPTION_MASK 0xF8000000
-#define FPC_FLAGS_MASK 0x00F80000
-#define FPC_DXC_MASK 0x0000FF00
-#define FPC_RM_MASK 0x00000003
+#define FPC_EXCEPTION_MASK 0xF8000000
+#define FPC_FLAGS_MASK 0x00F80000
+#define FPC_DXC_MASK 0x0000FF00
+#define FPC_RM_MASK 0x00000003
/* this typedef defines how a Program Status Word looks like */
-typedef struct
-{
- unsigned long mask;
- unsigned long addr;
+typedef struct {
+ unsigned long mask;
+ unsigned long addr;
} __attribute__ ((aligned(8))) psw_t;
#ifndef __s390x__
@@ -282,8 +279,7 @@ typedef struct
/*
* The s390_regs structure is used to define the elf_gregset_t.
*/
-typedef struct
-{
+typedef struct {
psw_t psw;
unsigned long gprs[NUM_GPRS];
unsigned int acrs[NUM_ACRS];
@@ -291,24 +287,32 @@ typedef struct
} s390_regs;
/*
+ * The user_pt_regs structure exports the beginning of
+ * the in-kernel pt_regs structure to user space.
+ */
+typedef struct {
+ unsigned long args[1];
+ psw_t psw;
+ unsigned long gprs[NUM_GPRS];
+} user_pt_regs;
+
+/*
* Now for the user space program event recording (trace) definitions.
* The following structures are used only for the ptrace interface, don't
* touch or even look at it if you don't want to modify the user-space
* ptrace interface. In particular stay away from it for in-kernel PER.
*/
-typedef struct
-{
+typedef struct {
unsigned long cr[NUM_CR_WORDS];
} per_cr_words;
#define PER_EM_MASK 0xE8000000UL
-typedef struct
-{
+typedef struct {
#ifdef __s390x__
- unsigned : 32;
+ unsigned : 32;
#endif /* __s390x__ */
- unsigned em_branching : 1;
+ unsigned em_branching : 1;
unsigned em_instruction_fetch : 1;
/*
* Switching on storage alteration automatically fixes
@@ -317,44 +321,41 @@ typedef struct
unsigned em_storage_alteration : 1;
unsigned em_gpr_alt_unused : 1;
unsigned em_store_real_address : 1;
- unsigned : 3;
+ unsigned : 3;
unsigned branch_addr_ctl : 1;
- unsigned : 1;
+ unsigned : 1;
unsigned storage_alt_space_ctl : 1;
- unsigned : 21;
+ unsigned : 21;
unsigned long starting_addr;
unsigned long ending_addr;
} per_cr_bits;
-typedef struct
-{
+typedef struct {
unsigned short perc_atmid;
unsigned long address;
unsigned char access_id;
} per_lowcore_words;
-typedef struct
-{
- unsigned perc_branching : 1;
+typedef struct {
+ unsigned perc_branching : 1;
unsigned perc_instruction_fetch : 1;
unsigned perc_storage_alteration : 1;
- unsigned perc_gpr_alt_unused : 1;
+ unsigned perc_gpr_alt_unused : 1;
unsigned perc_store_real_address : 1;
- unsigned : 3;
- unsigned atmid_psw_bit_31 : 1;
- unsigned atmid_validity_bit : 1;
- unsigned atmid_psw_bit_32 : 1;
- unsigned atmid_psw_bit_5 : 1;
- unsigned atmid_psw_bit_16 : 1;
- unsigned atmid_psw_bit_17 : 1;
- unsigned si : 2;
+ unsigned : 3;
+ unsigned atmid_psw_bit_31 : 1;
+ unsigned atmid_validity_bit : 1;
+ unsigned atmid_psw_bit_32 : 1;
+ unsigned atmid_psw_bit_5 : 1;
+ unsigned atmid_psw_bit_16 : 1;
+ unsigned atmid_psw_bit_17 : 1;
+ unsigned si : 2;
unsigned long address;
- unsigned : 4;
- unsigned access_id : 4;
+ unsigned : 4;
+ unsigned access_id : 4;
} per_lowcore_bits;
-typedef struct
-{
+typedef struct {
union {
per_cr_words words;
per_cr_bits bits;
@@ -364,9 +365,9 @@ typedef struct
* the kernel always sets them to zero. To enable single
* stepping use ptrace(PTRACE_SINGLESTEP) instead.
*/
- unsigned single_step : 1;
+ unsigned single_step : 1;
unsigned instruction_fetch : 1;
- unsigned : 30;
+ unsigned : 30;
/*
* These addresses are copied into cr10 & cr11 if single
* stepping is switched off
@@ -376,11 +377,10 @@ typedef struct
union {
per_lowcore_words words;
per_lowcore_bits bits;
- } lowcore;
+ } lowcore;
} per_struct;
-typedef struct
-{
+typedef struct {
unsigned int len;
unsigned long kernel_addr;
unsigned long process_addr;
@@ -390,12 +390,12 @@ typedef struct
* S/390 specific non posix ptrace requests. I chose unusual values so
* they are unlikely to clash with future ptrace definitions.
*/
-#define PTRACE_PEEKUSR_AREA 0x5000
-#define PTRACE_POKEUSR_AREA 0x5001
+#define PTRACE_PEEKUSR_AREA 0x5000
+#define PTRACE_POKEUSR_AREA 0x5001
#define PTRACE_PEEKTEXT_AREA 0x5002
#define PTRACE_PEEKDATA_AREA 0x5003
#define PTRACE_POKETEXT_AREA 0x5004
-#define PTRACE_POKEDATA_AREA 0x5005
+#define PTRACE_POKEDATA_AREA 0x5005
#define PTRACE_GET_LAST_BREAK 0x5006
#define PTRACE_PEEK_SYSTEM_CALL 0x5007
#define PTRACE_POKE_SYSTEM_CALL 0x5008
@@ -413,21 +413,19 @@ typedef struct
* PT_PROT definition is loosely based on hppa bsd definition in
* gdb/hppab-nat.c
*/
-#define PTRACE_PROT 21
+#define PTRACE_PROT 21
-typedef enum
-{
+typedef enum {
ptprot_set_access_watchpoint,
ptprot_set_write_watchpoint,
ptprot_disable_watchpoint
} ptprot_flags;
-typedef struct
-{
+typedef struct {
unsigned long lowaddr;
unsigned long hiaddr;
ptprot_flags prot;
-} ptprot_area;
+} ptprot_area;
/* Sequence of bytes for breakpoint illegal instruction. */
#define S390_BREAKPOINT {0x0,0x1}
@@ -439,8 +437,7 @@ typedef struct
* The user_regs_struct defines the way the user registers are
* store on the stack for signal handling.
*/
-struct user_regs_struct
-{
+struct user_regs_struct {
psw_t psw;
unsigned long gprs[NUM_GPRS];
unsigned int acrs[NUM_ACRS];
diff --git a/arch/s390/include/uapi/asm/sthyi.h b/arch/s390/include/uapi/asm/sthyi.h
index ec113db4eb7e..b1b022316983 100644
--- a/arch/s390/include/uapi/asm/sthyi.h
+++ b/arch/s390/include/uapi/asm/sthyi.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#ifndef _UAPI_ASM_STHYI_H
#define _UAPI_ASM_STHYI_H
diff --git a/arch/s390/include/uapi/asm/virtio-ccw.h b/arch/s390/include/uapi/asm/virtio-ccw.h
index 3a77833c74dc..2b605f7e8483 100644
--- a/arch/s390/include/uapi/asm/virtio-ccw.h
+++ b/arch/s390/include/uapi/asm/virtio-ccw.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
/*
* Definitions for virtio-ccw devices.
*
diff --git a/arch/s390/include/uapi/asm/vmcp.h b/arch/s390/include/uapi/asm/vmcp.h
index 4caf71714a55..aeaaa030030e 100644
--- a/arch/s390/include/uapi/asm/vmcp.h
+++ b/arch/s390/include/uapi/asm/vmcp.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
* Copyright IBM Corp. 2004, 2005
* Interface implementation for communication with the z/VM control program
diff --git a/arch/s390/kernel/alternative.c b/arch/s390/kernel/alternative.c
index 315986a06cf5..574e77622c04 100644
--- a/arch/s390/kernel/alternative.c
+++ b/arch/s390/kernel/alternative.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/module.h>
#include <asm/alternative.h>
#include <asm/facility.h>
diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c
index f04db3779b34..59eea9c65d3e 100644
--- a/arch/s390/kernel/compat_linux.c
+++ b/arch/s390/kernel/compat_linux.c
@@ -263,6 +263,7 @@ COMPAT_SYSCALL_DEFINE2(s390_setgroups16, int, gidsetsize, u16 __user *, grouplis
return retval;
}
+ groups_sort(group_info);
retval = set_current_groups(group_info);
put_group_info(group_info);
diff --git a/arch/s390/kernel/perf_regs.c b/arch/s390/kernel/perf_regs.c
index f8603ebed669..54e2d634b849 100644
--- a/arch/s390/kernel/perf_regs.c
+++ b/arch/s390/kernel/perf_regs.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/perf_event.h>
#include <linux/perf_regs.h>
#include <linux/kernel.h>
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index 308a7b63348b..f7fc63385553 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -370,10 +370,10 @@ SYSCALL(sys_recvmmsg,compat_sys_recvmmsg)
SYSCALL(sys_sendmmsg,compat_sys_sendmmsg)
SYSCALL(sys_socket,sys_socket)
SYSCALL(sys_socketpair,compat_sys_socketpair) /* 360 */
-SYSCALL(sys_bind,sys_bind)
-SYSCALL(sys_connect,sys_connect)
+SYSCALL(sys_bind,compat_sys_bind)
+SYSCALL(sys_connect,compat_sys_connect)
SYSCALL(sys_listen,sys_listen)
-SYSCALL(sys_accept4,sys_accept4)
+SYSCALL(sys_accept4,compat_sys_accept4)
SYSCALL(sys_getsockopt,compat_sys_getsockopt) /* 365 */
SYSCALL(sys_setsockopt,compat_sys_setsockopt)
SYSCALL(sys_getsockname,compat_sys_getsockname)
diff --git a/arch/s390/kernel/vdso64/note.S b/arch/s390/kernel/vdso64/note.S
index 79a071e4357e..db19d0680a0a 100644
--- a/arch/s390/kernel/vdso64/note.S
+++ b/arch/s390/kernel/vdso64/note.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
* Here we can supply some information useful to userland.
diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile
index 6048b1c6e580..05ee90a5ea08 100644
--- a/arch/s390/kvm/Makefile
+++ b/arch/s390/kvm/Makefile
@@ -1,10 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
# Makefile for kernel virtual machines on s390
#
# Copyright IBM Corp. 2008
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License (version 2 only)
-# as published by the Free Software Foundation.
KVM := ../../../virt/kvm
common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o $(KVM)/async_pf.o $(KVM)/irqchip.o $(KVM)/vfio.o
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index d93a2c0474bf..89aa114a2cba 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* handling diagnose instructions
*
* Copyright IBM Corp. 2008, 2011
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
- *
* Author(s): Carsten Otte <cotte@de.ibm.com>
* Christian Borntraeger <borntraeger@de.ibm.com>
*/
diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h
index bec42b852246..f4c51756c462 100644
--- a/arch/s390/kvm/gaccess.h
+++ b/arch/s390/kvm/gaccess.h
@@ -1,12 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* access guest memory
*
* Copyright IBM Corp. 2008, 2014
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
- *
* Author(s): Carsten Otte <cotte@de.ibm.com>
*/
diff --git a/arch/s390/kvm/guestdbg.c b/arch/s390/kvm/guestdbg.c
index bcbd86621d01..b5f3e82006d0 100644
--- a/arch/s390/kvm/guestdbg.c
+++ b/arch/s390/kvm/guestdbg.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* kvm guest debug support
*
* Copyright IBM Corp. 2014
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
- *
* Author(s): David Hildenbrand <dahi@linux.vnet.ibm.com>
*/
#include <linux/kvm_host.h>
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index 8fe034beb623..9c7d70715862 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* in-kernel handling for sie intercepts
*
* Copyright IBM Corp. 2008, 2014
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
- *
* Author(s): Carsten Otte <cotte@de.ibm.com>
* Christian Borntraeger <borntraeger@de.ibm.com>
*/
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index fa557372d600..024ad8bcc516 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* handling kvm guest interrupts
*
* Copyright IBM Corp. 2008, 2015
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
- *
* Author(s): Carsten Otte <cotte@de.ibm.com>
*/
diff --git a/arch/s390/kvm/irq.h b/arch/s390/kvm/irq.h
index d98e4159643d..484608c71dd0 100644
--- a/arch/s390/kvm/irq.h
+++ b/arch/s390/kvm/irq.h
@@ -1,12 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* s390 irqchip routines
*
* Copyright IBM Corp. 2014
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
- *
* Author(s): Cornelia Huck <cornelia.huck@de.ibm.com>
*/
#ifndef __KVM_IRQ_H
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 9614aea5839b..ec8b68e97d3c 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
/*
- * hosting zSeries kernel virtual machines
+ * hosting IBM Z kernel virtual machines (s390x)
*
- * Copyright IBM Corp. 2008, 2009
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
+ * Copyright IBM Corp. 2008, 2017
*
* Author(s): Carsten Otte <cotte@de.ibm.com>
* Christian Borntraeger <borntraeger@de.ibm.com>
@@ -3808,6 +3805,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
r = -EINVAL;
break;
}
+ /* do not use irq_state.flags, it will break old QEMUs */
r = kvm_s390_set_irq_state(vcpu,
(void __user *) irq_state.buf,
irq_state.len);
@@ -3823,6 +3821,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
r = -EINVAL;
break;
}
+ /* do not use irq_state.flags, it will break old QEMUs */
r = kvm_s390_get_irq_state(vcpu,
(__u8 __user *) irq_state.buf,
irq_state.len);
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 10d65dfbc306..5e46ba429bcb 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -1,12 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* definition for kvm on s390
*
* Copyright IBM Corp. 2008, 2009
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
- *
* Author(s): Carsten Otte <cotte@de.ibm.com>
* Christian Borntraeger <borntraeger@de.ibm.com>
* Christian Ehrhardt <ehrhardt@de.ibm.com>
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index c954ac49eee4..572496c688cc 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* handling privileged instructions
*
* Copyright IBM Corp. 2008, 2013
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
- *
* Author(s): Carsten Otte <cotte@de.ibm.com>
* Christian Borntraeger <borntraeger@de.ibm.com>
*/
@@ -235,8 +232,6 @@ static int try_handle_skey(struct kvm_vcpu *vcpu)
VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation");
return -EAGAIN;
}
- if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
- return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
return 0;
}
@@ -247,6 +242,9 @@ static int handle_iske(struct kvm_vcpu *vcpu)
int reg1, reg2;
int rc;
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
rc = try_handle_skey(vcpu);
if (rc)
return rc != -EAGAIN ? rc : 0;
@@ -276,6 +274,9 @@ static int handle_rrbe(struct kvm_vcpu *vcpu)
int reg1, reg2;
int rc;
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
rc = try_handle_skey(vcpu);
if (rc)
return rc != -EAGAIN ? rc : 0;
@@ -311,6 +312,9 @@ static int handle_sske(struct kvm_vcpu *vcpu)
int reg1, reg2;
int rc;
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
rc = try_handle_skey(vcpu);
if (rc)
return rc != -EAGAIN ? rc : 0;
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index 9d592ef4104b..c1f5cde2c878 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* handling interprocessor communication
*
* Copyright IBM Corp. 2008, 2013
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
- *
* Author(s): Carsten Otte <cotte@de.ibm.com>
* Christian Borntraeger <borntraeger@de.ibm.com>
* Christian Ehrhardt <ehrhardt@de.ibm.com>
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index a311938b63b3..5d6ae0326d9e 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* kvm nested virtualization support for s390x
*
* Copyright IBM Corp. 2016
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
- *
* Author(s): David Hildenbrand <dahi@linux.vnet.ibm.com>
*/
#include <linux/vmalloc.h>
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index 434a9564917b..cb364153c43c 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -83,8 +83,6 @@ int crst_table_upgrade(struct mm_struct *mm, unsigned long end)
/* upgrade should only happen from 3 to 4, 3 to 5, or 4 to 5 levels */
VM_BUG_ON(mm->context.asce_limit < _REGION2_SIZE);
- if (end >= TASK_SIZE_MAX)
- return -ENOMEM;
rc = 0;
notify = 0;
while (mm->context.asce_limit < end) {
diff --git a/arch/s390/net/Makefile b/arch/s390/net/Makefile
index 90568c33ddb0..e0d5f245e42b 100644
--- a/arch/s390/net/Makefile
+++ b/arch/s390/net/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Arch-specific network modules
#
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index e81c16838b90..9557d8b516df 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -55,8 +55,7 @@ struct bpf_jit {
#define SEEN_LITERAL 8 /* code uses literals */
#define SEEN_FUNC 16 /* calls C functions */
#define SEEN_TAIL_CALL 32 /* code uses tail calls */
-#define SEEN_SKB_CHANGE 64 /* code changes skb data */
-#define SEEN_REG_AX 128 /* code uses constant blinding */
+#define SEEN_REG_AX 64 /* code uses constant blinding */
#define SEEN_STACK (SEEN_FUNC | SEEN_MEM | SEEN_SKB)
/*
@@ -448,12 +447,12 @@ static void bpf_jit_prologue(struct bpf_jit *jit, u32 stack_depth)
EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0,
REG_15, 152);
}
- if (jit->seen & SEEN_SKB)
+ if (jit->seen & SEEN_SKB) {
emit_load_skb_data_hlen(jit);
- if (jit->seen & SEEN_SKB_CHANGE)
/* stg %b1,ST_OFF_SKBP(%r0,%r15) */
EMIT6_DISP_LH(0xe3000000, 0x0024, BPF_REG_1, REG_0, REG_15,
STK_OFF_SKBP);
+ }
}
/*
@@ -983,8 +982,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
EMIT2(0x0d00, REG_14, REG_W1);
/* lgr %b0,%r2: load return value into %b0 */
EMIT4(0xb9040000, BPF_REG_0, REG_2);
- if (bpf_helper_changes_pkt_data((void *)func)) {
- jit->seen |= SEEN_SKB_CHANGE;
+ if ((jit->seen & SEEN_SKB) &&
+ bpf_helper_changes_pkt_data((void *)func)) {
/* lg %b1,ST_OFF_SKBP(%r15) */
EMIT6_DISP_LH(0xe3000000, 0x0004, BPF_REG_1, REG_0,
REG_15, STK_OFF_SKBP);
diff --git a/arch/s390/numa/Makefile b/arch/s390/numa/Makefile
index f94ecaffa71b..66c2dff74895 100644
--- a/arch/s390/numa/Makefile
+++ b/arch/s390/numa/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
obj-y += numa.o
obj-y += toptree.o
obj-$(CONFIG_NUMA_EMU) += mode_emu.o
diff --git a/arch/s390/pci/Makefile b/arch/s390/pci/Makefile
index 805d8b29193a..22d0871291ee 100644
--- a/arch/s390/pci/Makefile
+++ b/arch/s390/pci/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for the s390 PCI subsystem.
#
diff --git a/arch/s390/tools/gen_opcode_table.c b/arch/s390/tools/gen_opcode_table.c
index 01d4c5a4bfe9..357d42681cef 100644
--- a/arch/s390/tools/gen_opcode_table.c
+++ b/arch/s390/tools/gen_opcode_table.c
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Generate opcode table initializers for the in-kernel disassembler.
*
diff --git a/arch/score/include/uapi/asm/Kbuild b/arch/score/include/uapi/asm/Kbuild
index c94ee54210bc..81271d3af47c 100644
--- a/arch/score/include/uapi/asm/Kbuild
+++ b/arch/score/include/uapi/asm/Kbuild
@@ -1,4 +1,5 @@
# UAPI Header export list
include include/uapi/asm-generic/Kbuild.asm
+generic-y += bpf_perf_event.h
generic-y += siginfo.h
diff --git a/arch/sh/include/uapi/asm/Kbuild b/arch/sh/include/uapi/asm/Kbuild
index e28531333efa..ba4d39cb321d 100644
--- a/arch/sh/include/uapi/asm/Kbuild
+++ b/arch/sh/include/uapi/asm/Kbuild
@@ -2,6 +2,7 @@
include include/uapi/asm-generic/Kbuild.asm
generic-y += bitsperlong.h
+generic-y += bpf_perf_event.h
generic-y += errno.h
generic-y += fcntl.h
generic-y += ioctl.h
diff --git a/arch/sparc/include/uapi/asm/Kbuild b/arch/sparc/include/uapi/asm/Kbuild
index 2178c78c7c1a..4680ba246b55 100644
--- a/arch/sparc/include/uapi/asm/Kbuild
+++ b/arch/sparc/include/uapi/asm/Kbuild
@@ -1,4 +1,5 @@
# UAPI Header export list
include include/uapi/asm-generic/Kbuild.asm
+generic-y += bpf_perf_event.h
generic-y += types.h
diff --git a/arch/sparc/lib/hweight.S b/arch/sparc/lib/hweight.S
index e5547b22cd18..0ddbbb031822 100644
--- a/arch/sparc/lib/hweight.S
+++ b/arch/sparc/lib/hweight.S
@@ -44,8 +44,8 @@ EXPORT_SYMBOL(__arch_hweight32)
.previous
ENTRY(__arch_hweight64)
- sethi %hi(__sw_hweight16), %g1
- jmpl %g1 + %lo(__sw_hweight16), %g0
+ sethi %hi(__sw_hweight64), %g1
+ jmpl %g1 + %lo(__sw_hweight64), %g0
nop
ENDPROC(__arch_hweight64)
EXPORT_SYMBOL(__arch_hweight64)
diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c
index be3136f142a9..a8103a84b4ac 100644
--- a/arch/sparc/mm/fault_32.c
+++ b/arch/sparc/mm/fault_32.c
@@ -113,7 +113,7 @@ show_signal_msg(struct pt_regs *regs, int sig, int code,
if (!printk_ratelimit())
return;
- printk("%s%s[%d]: segfault at %lx ip %p (rpc %p) sp %p error %x",
+ printk("%s%s[%d]: segfault at %lx ip %px (rpc %px) sp %px error %x",
task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
tsk->comm, task_pid_nr(tsk), address,
(void *)regs->pc, (void *)regs->u_regs[UREG_I7],
diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c
index 815c03d7a765..41363f46797b 100644
--- a/arch/sparc/mm/fault_64.c
+++ b/arch/sparc/mm/fault_64.c
@@ -154,7 +154,7 @@ show_signal_msg(struct pt_regs *regs, int sig, int code,
if (!printk_ratelimit())
return;
- printk("%s%s[%d]: segfault at %lx ip %p (rpc %p) sp %p error %x",
+ printk("%s%s[%d]: segfault at %lx ip %px (rpc %px) sp %px error %x",
task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
tsk->comm, task_pid_nr(tsk), address,
(void *)regs->tpc, (void *)regs->u_regs[UREG_I7],
diff --git a/arch/sparc/mm/gup.c b/arch/sparc/mm/gup.c
index 33c0f8bb0f33..5335ba3c850e 100644
--- a/arch/sparc/mm/gup.c
+++ b/arch/sparc/mm/gup.c
@@ -75,7 +75,7 @@ static int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
if (!(pmd_val(pmd) & _PAGE_VALID))
return 0;
- if (!pmd_access_permitted(pmd, write))
+ if (write && !pmd_write(pmd))
return 0;
refs = 0;
@@ -114,7 +114,7 @@ static int gup_huge_pud(pud_t *pudp, pud_t pud, unsigned long addr,
if (!(pud_val(pud) & _PAGE_VALID))
return 0;
- if (!pud_access_permitted(pud, write))
+ if (write && !pud_write(pud))
return 0;
refs = 0;
diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c
index 5765e7e711f7..ff5f9cb3039a 100644
--- a/arch/sparc/net/bpf_jit_comp_64.c
+++ b/arch/sparc/net/bpf_jit_comp_64.c
@@ -1245,14 +1245,16 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
u8 *func = ((u8 *)__bpf_call_base) + imm;
ctx->saw_call = true;
+ if (ctx->saw_ld_abs_ind && bpf_helper_changes_pkt_data(func))
+ emit_reg_move(bpf2sparc[BPF_REG_1], L7, ctx);
emit_call((u32 *)func, ctx);
emit_nop(ctx);
emit_reg_move(O0, bpf2sparc[BPF_REG_0], ctx);
- if (bpf_helper_changes_pkt_data(func) && ctx->saw_ld_abs_ind)
- load_skb_regs(ctx, bpf2sparc[BPF_REG_6]);
+ if (ctx->saw_ld_abs_ind && bpf_helper_changes_pkt_data(func))
+ load_skb_regs(ctx, L7);
break;
}
diff --git a/arch/tile/include/uapi/asm/Kbuild b/arch/tile/include/uapi/asm/Kbuild
index 5711de0a1b5e..cc439612bcd5 100644
--- a/arch/tile/include/uapi/asm/Kbuild
+++ b/arch/tile/include/uapi/asm/Kbuild
@@ -1,6 +1,7 @@
# UAPI Header export list
include include/uapi/asm-generic/Kbuild.asm
+generic-y += bpf_perf_event.h
generic-y += errno.h
generic-y += fcntl.h
generic-y += ioctl.h
diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild
index 50a32c33d729..73c57f614c9e 100644
--- a/arch/um/include/asm/Kbuild
+++ b/arch/um/include/asm/Kbuild
@@ -1,4 +1,5 @@
generic-y += barrier.h
+generic-y += bpf_perf_event.h
generic-y += bug.h
generic-y += clkdev.h
generic-y += current.h
diff --git a/arch/um/include/asm/mmu_context.h b/arch/um/include/asm/mmu_context.h
index b668e351fd6c..fca34b2177e2 100644
--- a/arch/um/include/asm/mmu_context.h
+++ b/arch/um/include/asm/mmu_context.h
@@ -15,9 +15,10 @@ extern void uml_setup_stubs(struct mm_struct *mm);
/*
* Needed since we do not use the asm-generic/mm_hooks.h:
*/
-static inline void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
+static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
{
uml_setup_stubs(mm);
+ return 0;
}
extern void arch_exit_mmap(struct mm_struct *mm);
static inline void arch_unmap(struct mm_struct *mm,
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index 4e6fcb32620f..428644175956 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -150,7 +150,7 @@ static void show_segv_info(struct uml_pt_regs *regs)
if (!printk_ratelimit())
return;
- printk("%s%s[%d]: segfault at %lx ip %p sp %p error %x",
+ printk("%s%s[%d]: segfault at %lx ip %px sp %px error %x",
task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
tsk->comm, task_pid_nr(tsk), FAULT_ADDRESS(*fi),
(void *)UPT_IP(regs), (void *)UPT_SP(regs),
diff --git a/arch/unicore32/include/asm/mmu_context.h b/arch/unicore32/include/asm/mmu_context.h
index 59b06b48f27d..5c205a9cb5a6 100644
--- a/arch/unicore32/include/asm/mmu_context.h
+++ b/arch/unicore32/include/asm/mmu_context.h
@@ -81,9 +81,10 @@ do { \
} \
} while (0)
-static inline void arch_dup_mmap(struct mm_struct *oldmm,
- struct mm_struct *mm)
+static inline int arch_dup_mmap(struct mm_struct *oldmm,
+ struct mm_struct *mm)
{
+ return 0;
}
static inline void arch_unmap(struct mm_struct *mm,
diff --git a/arch/unicore32/include/uapi/asm/Kbuild b/arch/unicore32/include/uapi/asm/Kbuild
index 759a71411169..8611ef980554 100644
--- a/arch/unicore32/include/uapi/asm/Kbuild
+++ b/arch/unicore32/include/uapi/asm/Kbuild
@@ -3,6 +3,7 @@ include include/uapi/asm-generic/Kbuild.asm
generic-y += auxvec.h
generic-y += bitsperlong.h
+generic-y += bpf_perf_event.h
generic-y += errno.h
generic-y += fcntl.h
generic-y += ioctl.h
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 8eed3f94bfc7..d4fc98c50378 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -926,7 +926,8 @@ config MAXSMP
config NR_CPUS
int "Maximum number of CPUs" if SMP && !MAXSMP
range 2 8 if SMP && X86_32 && !X86_BIGSMP
- range 2 512 if SMP && !MAXSMP && !CPUMASK_OFFSTACK
+ range 2 64 if SMP && X86_32 && X86_BIGSMP
+ range 2 512 if SMP && !MAXSMP && !CPUMASK_OFFSTACK && X86_64
range 2 8192 if SMP && !MAXSMP && CPUMASK_OFFSTACK && X86_64
default "1" if !SMP
default "8192" if MAXSMP
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 6293a8768a91..672441c008c7 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -400,6 +400,7 @@ config UNWINDER_FRAME_POINTER
config UNWINDER_GUESS
bool "Guess unwinder"
depends on EXPERT
+ depends on !STACKDEPOT
---help---
This option enables the "guess" unwinder for unwinding kernel stack
traces. It scans the stack and reports every kernel text address it
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 1e9c322e973a..f25e1530e064 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -80,6 +80,7 @@ vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/kaslr.o
ifdef CONFIG_X86_64
vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/pagetable.o
vmlinux-objs-y += $(obj)/mem_encrypt.o
+ vmlinux-objs-y += $(obj)/pgtable_64.o
endif
$(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 20919b4f3133..fc313e29fe2c 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -305,10 +305,18 @@ ENTRY(startup_64)
leaq boot_stack_end(%rbx), %rsp
#ifdef CONFIG_X86_5LEVEL
- /* Check if 5-level paging has already enabled */
- movq %cr4, %rax
- testl $X86_CR4_LA57, %eax
- jnz lvl5
+ /*
+ * Check if we need to enable 5-level paging.
+ * RSI holds real mode data and need to be preserved across
+ * a function call.
+ */
+ pushq %rsi
+ call l5_paging_required
+ popq %rsi
+
+ /* If l5_paging_required() returned zero, we're done here. */
+ cmpq $0, %rax
+ je lvl5
/*
* At this point we are in long mode with 4-level paging enabled,
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index b50c42455e25..98761a1576ce 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -169,6 +169,16 @@ void __puthex(unsigned long value)
}
}
+static bool l5_supported(void)
+{
+ /* Check if leaf 7 is supported. */
+ if (native_cpuid_eax(0) < 7)
+ return 0;
+
+ /* Check if la57 is supported. */
+ return native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31));
+}
+
#if CONFIG_X86_NEED_RELOCS
static void handle_relocations(void *output, unsigned long output_len,
unsigned long virt_addr)
@@ -362,6 +372,12 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
console_init();
debug_putstr("early console in extract_kernel\n");
+ if (IS_ENABLED(CONFIG_X86_5LEVEL) && !l5_supported()) {
+ error("This linux kernel as configured requires 5-level paging\n"
+ "This CPU does not support the required 'cr4.la57' feature\n"
+ "Unable to boot - please use a kernel appropriate for your CPU\n");
+ }
+
free_mem_ptr = heap; /* Heap */
free_mem_end_ptr = heap + BOOT_HEAP_SIZE;
diff --git a/arch/x86/boot/compressed/pagetable.c b/arch/x86/boot/compressed/pagetable.c
index d5364ca2e3f9..b5e5e02f8cde 100644
--- a/arch/x86/boot/compressed/pagetable.c
+++ b/arch/x86/boot/compressed/pagetable.c
@@ -23,6 +23,9 @@
*/
#undef CONFIG_AMD_MEM_ENCRYPT
+/* No PAGE_TABLE_ISOLATION support needed either: */
+#undef CONFIG_PAGE_TABLE_ISOLATION
+
#include "misc.h"
/* These actually do the work of building the kernel identity maps. */
diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c
new file mode 100644
index 000000000000..b4469a37e9a1
--- /dev/null
+++ b/arch/x86/boot/compressed/pgtable_64.c
@@ -0,0 +1,28 @@
+#include <asm/processor.h>
+
+/*
+ * __force_order is used by special_insns.h asm code to force instruction
+ * serialization.
+ *
+ * It is not referenced from the code, but GCC < 5 with -fPIE would fail
+ * due to an undefined symbol. Define it to make these ancient GCCs work.
+ */
+unsigned long __force_order;
+
+int l5_paging_required(void)
+{
+ /* Check if leaf 7 is supported. */
+
+ if (native_cpuid_eax(0) < 7)
+ return 0;
+
+ /* Check if la57 is supported. */
+ if (!(native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31))))
+ return 0;
+
+ /* Check if 5-level paging has already been enabled. */
+ if (native_read_cr4() & X86_CR4_LA57)
+ return 0;
+
+ return 1;
+}
diff --git a/arch/x86/boot/genimage.sh b/arch/x86/boot/genimage.sh
index 49f4970f693b..6a10d52a4145 100644
--- a/arch/x86/boot/genimage.sh
+++ b/arch/x86/boot/genimage.sh
@@ -44,9 +44,9 @@ FDINITRD=$6
# Make sure the files actually exist
verify "$FBZIMAGE"
-verify "$MTOOLSRC"
genbzdisk() {
+ verify "$MTOOLSRC"
mformat a:
syslinux $FIMAGE
echo "$KCMDLINE" | mcopy - a:syslinux.cfg
@@ -57,6 +57,7 @@ genbzdisk() {
}
genfdimage144() {
+ verify "$MTOOLSRC"
dd if=/dev/zero of=$FIMAGE bs=1024 count=1440 2> /dev/null
mformat v:
syslinux $FIMAGE
@@ -68,6 +69,7 @@ genfdimage144() {
}
genfdimage288() {
+ verify "$MTOOLSRC"
dd if=/dev/zero of=$FIMAGE bs=1024 count=2880 2> /dev/null
mformat w:
syslinux $FIMAGE
@@ -78,39 +80,43 @@ genfdimage288() {
mcopy $FBZIMAGE w:linux
}
-genisoimage() {
+geniso() {
tmp_dir=`dirname $FIMAGE`/isoimage
rm -rf $tmp_dir
mkdir $tmp_dir
- for i in lib lib64 share end ; do
+ for i in lib lib64 share ; do
for j in syslinux ISOLINUX ; do
if [ -f /usr/$i/$j/isolinux.bin ] ; then
isolinux=/usr/$i/$j/isolinux.bin
- cp $isolinux $tmp_dir
fi
done
for j in syslinux syslinux/modules/bios ; do
if [ -f /usr/$i/$j/ldlinux.c32 ]; then
ldlinux=/usr/$i/$j/ldlinux.c32
- cp $ldlinux $tmp_dir
fi
done
if [ -n "$isolinux" -a -n "$ldlinux" ] ; then
break
fi
- if [ $i = end -a -z "$isolinux" ] ; then
- echo 'Need an isolinux.bin file, please install syslinux/isolinux.'
- exit 1
- fi
done
+ if [ -z "$isolinux" ] ; then
+ echo 'Need an isolinux.bin file, please install syslinux/isolinux.'
+ exit 1
+ fi
+ if [ -z "$ldlinux" ] ; then
+ echo 'Need an ldlinux.c32 file, please install syslinux/isolinux.'
+ exit 1
+ fi
+ cp $isolinux $tmp_dir
+ cp $ldlinux $tmp_dir
cp $FBZIMAGE $tmp_dir/linux
echo "$KCMDLINE" > $tmp_dir/isolinux.cfg
if [ -f "$FDINITRD" ] ; then
cp "$FDINITRD" $tmp_dir/initrd.img
fi
- mkisofs -J -r -input-charset=utf-8 -quiet -o $FIMAGE -b isolinux.bin \
- -c boot.cat -no-emul-boot -boot-load-size 4 -boot-info-table \
- $tmp_dir
+ genisoimage -J -r -input-charset=utf-8 -quiet -o $FIMAGE \
+ -b isolinux.bin -c boot.cat -no-emul-boot -boot-load-size 4 \
+ -boot-info-table $tmp_dir
isohybrid $FIMAGE 2>/dev/null || true
rm -rf $tmp_dir
}
@@ -119,6 +125,6 @@ case $1 in
bzdisk) genbzdisk;;
fdimage144) genfdimage144;;
fdimage288) genfdimage288;;
- isoimage) genisoimage;;
+ isoimage) geniso;;
*) echo 'Unknown image format'; exit 1;
esac
diff --git a/arch/x86/crypto/salsa20_glue.c b/arch/x86/crypto/salsa20_glue.c
index 399a29d067d6..cb91a64a99e7 100644
--- a/arch/x86/crypto/salsa20_glue.c
+++ b/arch/x86/crypto/salsa20_glue.c
@@ -59,13 +59,6 @@ static int encrypt(struct blkcipher_desc *desc,
salsa20_ivsetup(ctx, walk.iv);
- if (likely(walk.nbytes == nbytes))
- {
- salsa20_encrypt_bytes(ctx, walk.src.virt.addr,
- walk.dst.virt.addr, nbytes);
- return blkcipher_walk_done(desc, &walk, 0);
- }
-
while (walk.nbytes >= 64) {
salsa20_encrypt_bytes(ctx, walk.src.virt.addr,
walk.dst.virt.addr,
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index 3fd8bc560fae..45a63e00a6af 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -1,6 +1,11 @@
/* SPDX-License-Identifier: GPL-2.0 */
#include <linux/jump_label.h>
#include <asm/unwind_hints.h>
+#include <asm/cpufeatures.h>
+#include <asm/page_types.h>
+#include <asm/percpu.h>
+#include <asm/asm-offsets.h>
+#include <asm/processor-flags.h>
/*
@@ -187,6 +192,146 @@ For 32-bit we have the following conventions - kernel is built with
#endif
.endm
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+
+/*
+ * PAGE_TABLE_ISOLATION PGDs are 8k. Flip bit 12 to switch between the two
+ * halves:
+ */
+#define PTI_SWITCH_PGTABLES_MASK (1<<PAGE_SHIFT)
+#define PTI_SWITCH_MASK (PTI_SWITCH_PGTABLES_MASK|(1<<X86_CR3_PTI_SWITCH_BIT))
+
+.macro SET_NOFLUSH_BIT reg:req
+ bts $X86_CR3_PCID_NOFLUSH_BIT, \reg
+.endm
+
+.macro ADJUST_KERNEL_CR3 reg:req
+ ALTERNATIVE "", "SET_NOFLUSH_BIT \reg", X86_FEATURE_PCID
+ /* Clear PCID and "PAGE_TABLE_ISOLATION bit", point CR3 at kernel pagetables: */
+ andq $(~PTI_SWITCH_MASK), \reg
+.endm
+
+.macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
+ ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
+ mov %cr3, \scratch_reg
+ ADJUST_KERNEL_CR3 \scratch_reg
+ mov \scratch_reg, %cr3
+.Lend_\@:
+.endm
+
+#define THIS_CPU_user_pcid_flush_mask \
+ PER_CPU_VAR(cpu_tlbstate) + TLB_STATE_user_pcid_flush_mask
+
+.macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req
+ ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
+ mov %cr3, \scratch_reg
+
+ ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID
+
+ /*
+ * Test if the ASID needs a flush.
+ */
+ movq \scratch_reg, \scratch_reg2
+ andq $(0x7FF), \scratch_reg /* mask ASID */
+ bt \scratch_reg, THIS_CPU_user_pcid_flush_mask
+ jnc .Lnoflush_\@
+
+ /* Flush needed, clear the bit */
+ btr \scratch_reg, THIS_CPU_user_pcid_flush_mask
+ movq \scratch_reg2, \scratch_reg
+ jmp .Lwrcr3_\@
+
+.Lnoflush_\@:
+ movq \scratch_reg2, \scratch_reg
+ SET_NOFLUSH_BIT \scratch_reg
+
+.Lwrcr3_\@:
+ /* Flip the PGD and ASID to the user version */
+ orq $(PTI_SWITCH_MASK), \scratch_reg
+ mov \scratch_reg, %cr3
+.Lend_\@:
+.endm
+
+.macro SWITCH_TO_USER_CR3_STACK scratch_reg:req
+ pushq %rax
+ SWITCH_TO_USER_CR3_NOSTACK scratch_reg=\scratch_reg scratch_reg2=%rax
+ popq %rax
+.endm
+
+.macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req
+ ALTERNATIVE "jmp .Ldone_\@", "", X86_FEATURE_PTI
+ movq %cr3, \scratch_reg
+ movq \scratch_reg, \save_reg
+ /*
+ * Is the "switch mask" all zero? That means that both of
+ * these are zero:
+ *
+ * 1. The user/kernel PCID bit, and
+ * 2. The user/kernel "bit" that points CR3 to the
+ * bottom half of the 8k PGD
+ *
+ * That indicates a kernel CR3 value, not a user CR3.
+ */
+ testq $(PTI_SWITCH_MASK), \scratch_reg
+ jz .Ldone_\@
+
+ ADJUST_KERNEL_CR3 \scratch_reg
+ movq \scratch_reg, %cr3
+
+.Ldone_\@:
+.endm
+
+.macro RESTORE_CR3 scratch_reg:req save_reg:req
+ ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
+
+ ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID
+
+ /*
+ * KERNEL pages can always resume with NOFLUSH as we do
+ * explicit flushes.
+ */
+ bt $X86_CR3_PTI_SWITCH_BIT, \save_reg
+ jnc .Lnoflush_\@
+
+ /*
+ * Check if there's a pending flush for the user ASID we're
+ * about to set.
+ */
+ movq \save_reg, \scratch_reg
+ andq $(0x7FF), \scratch_reg
+ bt \scratch_reg, THIS_CPU_user_pcid_flush_mask
+ jnc .Lnoflush_\@
+
+ btr \scratch_reg, THIS_CPU_user_pcid_flush_mask
+ jmp .Lwrcr3_\@
+
+.Lnoflush_\@:
+ SET_NOFLUSH_BIT \save_reg
+
+.Lwrcr3_\@:
+ /*
+ * The CR3 write could be avoided when not changing its value,
+ * but would require a CR3 read *and* a scratch register.
+ */
+ movq \save_reg, %cr3
+.Lend_\@:
+.endm
+
+#else /* CONFIG_PAGE_TABLE_ISOLATION=n: */
+
+.macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
+.endm
+.macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req
+.endm
+.macro SWITCH_TO_USER_CR3_STACK scratch_reg:req
+.endm
+.macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req
+.endm
+.macro RESTORE_CR3 scratch_reg:req save_reg:req
+.endm
+
+#endif
+
#endif /* CONFIG_X86_64 */
/*
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 4838037f97f6..ace8f321a5a1 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -941,9 +941,10 @@ ENTRY(debug)
movl %esp, %eax # pt_regs pointer
/* Are we currently on the SYSENTER stack? */
- PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx)
- subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */
- cmpl $SIZEOF_SYSENTER_stack, %ecx
+ movl PER_CPU_VAR(cpu_entry_area), %ecx
+ addl $CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx
+ subl %eax, %ecx /* ecx = (end of entry_stack) - esp */
+ cmpl $SIZEOF_entry_stack, %ecx
jb .Ldebug_from_sysenter_stack
TRACE_IRQS_OFF
@@ -984,9 +985,10 @@ ENTRY(nmi)
movl %esp, %eax # pt_regs pointer
/* Are we currently on the SYSENTER stack? */
- PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx)
- subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */
- cmpl $SIZEOF_SYSENTER_stack, %ecx
+ movl PER_CPU_VAR(cpu_entry_area), %ecx
+ addl $CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx
+ subl %eax, %ecx /* ecx = (end of entry_stack) - esp */
+ cmpl $SIZEOF_entry_stack, %ecx
jb .Lnmi_from_sysenter_stack
/* Not on SYSENTER stack. */
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index f81d50d7ceac..f048e384ff54 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -23,7 +23,6 @@
#include <asm/segment.h>
#include <asm/cache.h>
#include <asm/errno.h>
-#include "calling.h"
#include <asm/asm-offsets.h>
#include <asm/msr.h>
#include <asm/unistd.h>
@@ -40,6 +39,8 @@
#include <asm/frame.h>
#include <linux/err.h>
+#include "calling.h"
+
.code64
.section .entry.text, "ax"
@@ -140,6 +141,67 @@ END(native_usergs_sysret64)
* with them due to bugs in both AMD and Intel CPUs.
*/
+ .pushsection .entry_trampoline, "ax"
+
+/*
+ * The code in here gets remapped into cpu_entry_area's trampoline. This means
+ * that the assembler and linker have the wrong idea as to where this code
+ * lives (and, in fact, it's mapped more than once, so it's not even at a
+ * fixed address). So we can't reference any symbols outside the entry
+ * trampoline and expect it to work.
+ *
+ * Instead, we carefully abuse %rip-relative addressing.
+ * _entry_trampoline(%rip) refers to the start of the remapped) entry
+ * trampoline. We can thus find cpu_entry_area with this macro:
+ */
+
+#define CPU_ENTRY_AREA \
+ _entry_trampoline - CPU_ENTRY_AREA_entry_trampoline(%rip)
+
+/* The top word of the SYSENTER stack is hot and is usable as scratch space. */
+#define RSP_SCRATCH CPU_ENTRY_AREA_entry_stack + \
+ SIZEOF_entry_stack - 8 + CPU_ENTRY_AREA
+
+ENTRY(entry_SYSCALL_64_trampoline)
+ UNWIND_HINT_EMPTY
+ swapgs
+
+ /* Stash the user RSP. */
+ movq %rsp, RSP_SCRATCH
+
+ /* Note: using %rsp as a scratch reg. */
+ SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp
+
+ /* Load the top of the task stack into RSP */
+ movq CPU_ENTRY_AREA_tss + TSS_sp1 + CPU_ENTRY_AREA, %rsp
+
+ /* Start building the simulated IRET frame. */
+ pushq $__USER_DS /* pt_regs->ss */
+ pushq RSP_SCRATCH /* pt_regs->sp */
+ pushq %r11 /* pt_regs->flags */
+ pushq $__USER_CS /* pt_regs->cs */
+ pushq %rcx /* pt_regs->ip */
+
+ /*
+ * x86 lacks a near absolute jump, and we can't jump to the real
+ * entry text with a relative jump. We could push the target
+ * address and then use retq, but this destroys the pipeline on
+ * many CPUs (wasting over 20 cycles on Sandy Bridge). Instead,
+ * spill RDI and restore it in a second-stage trampoline.
+ */
+ pushq %rdi
+ movq $entry_SYSCALL_64_stage2, %rdi
+ jmp *%rdi
+END(entry_SYSCALL_64_trampoline)
+
+ .popsection
+
+ENTRY(entry_SYSCALL_64_stage2)
+ UNWIND_HINT_EMPTY
+ popq %rdi
+ jmp entry_SYSCALL_64_after_hwframe
+END(entry_SYSCALL_64_stage2)
+
ENTRY(entry_SYSCALL_64)
UNWIND_HINT_EMPTY
/*
@@ -149,6 +211,10 @@ ENTRY(entry_SYSCALL_64)
*/
swapgs
+ /*
+ * This path is not taken when PAGE_TABLE_ISOLATION is disabled so it
+ * is not required to switch CR3.
+ */
movq %rsp, PER_CPU_VAR(rsp_scratch)
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
@@ -330,8 +396,25 @@ syscall_return_via_sysret:
popq %rsi /* skip rcx */
popq %rdx
popq %rsi
+
+ /*
+ * Now all regs are restored except RSP and RDI.
+ * Save old stack pointer and switch to trampoline stack.
+ */
+ movq %rsp, %rdi
+ movq PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp
+
+ pushq RSP-RDI(%rdi) /* RSP */
+ pushq (%rdi) /* RDI */
+
+ /*
+ * We are on the trampoline stack. All regs except RDI are live.
+ * We can do future final exit work right here.
+ */
+ SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
+
popq %rdi
- movq RSP-ORIG_RAX(%rsp), %rsp
+ popq %rsp
USERGS_SYSRET64
END(entry_SYSCALL_64)
@@ -466,12 +549,13 @@ END(irq_entries_start)
.macro DEBUG_ENTRY_ASSERT_IRQS_OFF
#ifdef CONFIG_DEBUG_ENTRY
- pushfq
- testl $X86_EFLAGS_IF, (%rsp)
+ pushq %rax
+ SAVE_FLAGS(CLBR_RAX)
+ testl $X86_EFLAGS_IF, %eax
jz .Lokay_\@
ud2
.Lokay_\@:
- addq $8, %rsp
+ popq %rax
#endif
.endm
@@ -563,6 +647,13 @@ END(irq_entries_start)
/* 0(%rsp): ~(interrupt number) */
.macro interrupt func
cld
+
+ testb $3, CS-ORIG_RAX(%rsp)
+ jz 1f
+ SWAPGS
+ call switch_to_thread_stack
+1:
+
ALLOC_PT_GPREGS_ON_STACK
SAVE_C_REGS
SAVE_EXTRA_REGS
@@ -572,12 +663,8 @@ END(irq_entries_start)
jz 1f
/*
- * IRQ from user mode. Switch to kernel gsbase and inform context
- * tracking that we're in kernel mode.
- */
- SWAPGS
-
- /*
+ * IRQ from user mode.
+ *
* We need to tell lockdep that IRQs are off. We can't do this until
* we fix gsbase, and we should do it before enter_from_user_mode
* (which can take locks). Since TRACE_IRQS_OFF idempotent,
@@ -630,10 +717,43 @@ GLOBAL(swapgs_restore_regs_and_return_to_usermode)
ud2
1:
#endif
- SWAPGS
POP_EXTRA_REGS
- POP_C_REGS
- addq $8, %rsp /* skip regs->orig_ax */
+ popq %r11
+ popq %r10
+ popq %r9
+ popq %r8
+ popq %rax
+ popq %rcx
+ popq %rdx
+ popq %rsi
+
+ /*
+ * The stack is now user RDI, orig_ax, RIP, CS, EFLAGS, RSP, SS.
+ * Save old stack pointer and switch to trampoline stack.
+ */
+ movq %rsp, %rdi
+ movq PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp
+
+ /* Copy the IRET frame to the trampoline stack. */
+ pushq 6*8(%rdi) /* SS */
+ pushq 5*8(%rdi) /* RSP */
+ pushq 4*8(%rdi) /* EFLAGS */
+ pushq 3*8(%rdi) /* CS */
+ pushq 2*8(%rdi) /* RIP */
+
+ /* Push user RDI on the trampoline stack. */
+ pushq (%rdi)
+
+ /*
+ * We are on the trampoline stack. All regs except RDI are live.
+ * We can do future final exit work right here.
+ */
+
+ SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
+
+ /* Restore RDI. */
+ popq %rdi
+ SWAPGS
INTERRUPT_RETURN
@@ -713,7 +833,9 @@ native_irq_return_ldt:
*/
pushq %rdi /* Stash user RDI */
- SWAPGS
+ SWAPGS /* to kernel GS */
+ SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi /* to kernel CR3 */
+
movq PER_CPU_VAR(espfix_waddr), %rdi
movq %rax, (0*8)(%rdi) /* user RAX */
movq (1*8)(%rsp), %rax /* user RIP */
@@ -729,7 +851,6 @@ native_irq_return_ldt:
/* Now RAX == RSP. */
andl $0xffff0000, %eax /* RAX = (RSP & 0xffff0000) */
- popq %rdi /* Restore user RDI */
/*
* espfix_stack[31:16] == 0. The page tables are set up such that
@@ -740,7 +861,11 @@ native_irq_return_ldt:
* still points to an RO alias of the ESPFIX stack.
*/
orq PER_CPU_VAR(espfix_stack), %rax
- SWAPGS
+
+ SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
+ SWAPGS /* to user GS */
+ popq %rdi /* Restore user RDI */
+
movq %rax, %rsp
UNWIND_HINT_IRET_REGS offset=8
@@ -829,7 +954,35 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt
/*
* Exception entry points.
*/
-#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss) + (TSS_ist + ((x) - 1) * 8)
+#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8)
+
+/*
+ * Switch to the thread stack. This is called with the IRET frame and
+ * orig_ax on the stack. (That is, RDI..R12 are not on the stack and
+ * space has not been allocated for them.)
+ */
+ENTRY(switch_to_thread_stack)
+ UNWIND_HINT_FUNC
+
+ pushq %rdi
+ /* Need to switch before accessing the thread stack. */
+ SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
+ movq %rsp, %rdi
+ movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+ UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI
+
+ pushq 7*8(%rdi) /* regs->ss */
+ pushq 6*8(%rdi) /* regs->rsp */
+ pushq 5*8(%rdi) /* regs->eflags */
+ pushq 4*8(%rdi) /* regs->cs */
+ pushq 3*8(%rdi) /* regs->ip */
+ pushq 2*8(%rdi) /* regs->orig_ax */
+ pushq 8(%rdi) /* return address */
+ UNWIND_HINT_FUNC
+
+ movq (%rdi), %rdi
+ ret
+END(switch_to_thread_stack)
.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
ENTRY(\sym)
@@ -848,11 +1001,12 @@ ENTRY(\sym)
ALLOC_PT_GPREGS_ON_STACK
- .if \paranoid
- .if \paranoid == 1
+ .if \paranoid < 2
testb $3, CS(%rsp) /* If coming from userspace, switch stacks */
- jnz 1f
+ jnz .Lfrom_usermode_switch_stack_\@
.endif
+
+ .if \paranoid
call paranoid_entry
.else
call error_entry
@@ -894,20 +1048,15 @@ ENTRY(\sym)
jmp error_exit
.endif
- .if \paranoid == 1
+ .if \paranoid < 2
/*
- * Paranoid entry from userspace. Switch stacks and treat it
+ * Entry from userspace. Switch stacks and treat it
* as a normal entry. This means that paranoid handlers
* run in real process context if user_mode(regs).
*/
-1:
+.Lfrom_usermode_switch_stack_\@:
call error_entry
-
- movq %rsp, %rdi /* pt_regs pointer */
- call sync_regs
- movq %rax, %rsp /* switch stack */
-
movq %rsp, %rdi /* pt_regs pointer */
.if \has_error_code
@@ -1119,7 +1268,11 @@ ENTRY(paranoid_entry)
js 1f /* negative -> in kernel */
SWAPGS
xorl %ebx, %ebx
-1: ret
+
+1:
+ SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14
+
+ ret
END(paranoid_entry)
/*
@@ -1141,6 +1294,7 @@ ENTRY(paranoid_exit)
testl %ebx, %ebx /* swapgs needed? */
jnz .Lparanoid_exit_no_swapgs
TRACE_IRQS_IRETQ
+ RESTORE_CR3 scratch_reg=%rbx save_reg=%r14
SWAPGS_UNSAFE_STACK
jmp .Lparanoid_exit_restore
.Lparanoid_exit_no_swapgs:
@@ -1168,8 +1322,18 @@ ENTRY(error_entry)
* from user mode due to an IRET fault.
*/
SWAPGS
+ /* We have user CR3. Change to kernel CR3. */
+ SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
.Lerror_entry_from_usermode_after_swapgs:
+ /* Put us onto the real thread stack. */
+ popq %r12 /* save return addr in %12 */
+ movq %rsp, %rdi /* arg0 = pt_regs pointer */
+ call sync_regs
+ movq %rax, %rsp /* switch stack */
+ ENCODE_FRAME_POINTER
+ pushq %r12
+
/*
* We need to tell lockdep that IRQs are off. We can't do this until
* we fix gsbase, and we should do it before enter_from_user_mode
@@ -1206,6 +1370,7 @@ ENTRY(error_entry)
* .Lgs_change's error handler with kernel gsbase.
*/
SWAPGS
+ SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
jmp .Lerror_entry_done
.Lbstep_iret:
@@ -1215,10 +1380,11 @@ ENTRY(error_entry)
.Lerror_bad_iret:
/*
- * We came from an IRET to user mode, so we have user gsbase.
- * Switch to kernel gsbase:
+ * We came from an IRET to user mode, so we have user
+ * gsbase and CR3. Switch to kernel gsbase and CR3:
*/
SWAPGS
+ SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
/*
* Pretend that the exception came from user mode: set up pt_regs
@@ -1250,6 +1416,10 @@ END(error_exit)
/*
* Runs on exception stack. Xen PV does not go through this path at all,
* so we can use real assembly here.
+ *
+ * Registers:
+ * %r14: Used to save/restore the CR3 of the interrupted context
+ * when PAGE_TABLE_ISOLATION is in use. Do not clobber.
*/
ENTRY(nmi)
UNWIND_HINT_IRET_REGS
@@ -1313,6 +1483,7 @@ ENTRY(nmi)
swapgs
cld
+ SWITCH_TO_KERNEL_CR3 scratch_reg=%rdx
movq %rsp, %rdx
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
UNWIND_HINT_IRET_REGS base=%rdx offset=8
@@ -1565,6 +1736,8 @@ end_repeat_nmi:
movq $-1, %rsi
call do_nmi
+ RESTORE_CR3 scratch_reg=%r15 save_reg=%r14
+
testl %ebx, %ebx /* swapgs needed? */
jnz nmi_restore
nmi_swapgs:
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index 568e130d932c..40f17009ec20 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -48,7 +48,11 @@
*/
ENTRY(entry_SYSENTER_compat)
/* Interrupts are off on entry. */
- SWAPGS_UNSAFE_STACK
+ SWAPGS
+
+ /* We are about to clobber %rsp anyway, clobbering here is OK */
+ SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp
+
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
/*
@@ -216,6 +220,12 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe)
pushq $0 /* pt_regs->r15 = 0 */
/*
+ * We just saved %rdi so it is safe to clobber. It is not
+ * preserved during the C calls inside TRACE_IRQS_OFF anyway.
+ */
+ SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
+
+ /*
* User mode is traced as though IRQs are on, and SYSENTER
* turned them off.
*/
@@ -256,10 +266,22 @@ sysret32_from_system_call:
* when the system call started, which is already known to user
* code. We zero R8-R10 to avoid info leaks.
*/
+ movq RSP-ORIG_RAX(%rsp), %rsp
+
+ /*
+ * The original userspace %rsp (RSP-ORIG_RAX(%rsp)) is stored
+ * on the process stack which is not mapped to userspace and
+ * not readable after we SWITCH_TO_USER_CR3. Delay the CR3
+ * switch until after after the last reference to the process
+ * stack.
+ *
+ * %r8/%r9 are zeroed before the sysret, thus safe to clobber.
+ */
+ SWITCH_TO_USER_CR3_NOSTACK scratch_reg=%r8 scratch_reg2=%r9
+
xorq %r8, %r8
xorq %r9, %r9
xorq %r10, %r10
- movq RSP-ORIG_RAX(%rsp), %rsp
swapgs
sysretl
END(entry_SYSCALL_compat)
@@ -306,8 +328,11 @@ ENTRY(entry_INT80_compat)
*/
movl %eax, %eax
- /* Construct struct pt_regs on stack (iret frame is already on stack) */
pushq %rax /* pt_regs->orig_ax */
+
+ /* switch to thread stack expects orig_ax to be pushed */
+ call switch_to_thread_stack
+
pushq %rdi /* pt_regs->di */
pushq %rsi /* pt_regs->si */
pushq %rdx /* pt_regs->dx */
diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c
index 11b13c4b43d5..f19856d95c60 100644
--- a/arch/x86/entry/vdso/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vclock_gettime.c
@@ -324,5 +324,5 @@ notrace time_t __vdso_time(time_t *t)
*t = result;
return result;
}
-int time(time_t *t)
+time_t time(time_t *t)
__attribute__((weak, alias("__vdso_time")));
diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c
index f279ba2643dc..577fa8adb785 100644
--- a/arch/x86/entry/vsyscall/vsyscall_64.c
+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
@@ -37,6 +37,7 @@
#include <asm/unistd.h>
#include <asm/fixmap.h>
#include <asm/traps.h>
+#include <asm/paravirt.h>
#define CREATE_TRACE_POINTS
#include "vsyscall_trace.h"
@@ -138,6 +139,10 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
WARN_ON_ONCE(address != regs->ip);
+ /* This should be unreachable in NATIVE mode. */
+ if (WARN_ON(vsyscall_mode == NATIVE))
+ return false;
+
if (vsyscall_mode == NONE) {
warn_bad_vsyscall(KERN_INFO, regs,
"vsyscall attempted with vsyscall=none");
@@ -329,16 +334,47 @@ int in_gate_area_no_mm(unsigned long addr)
return vsyscall_mode != NONE && (addr & PAGE_MASK) == VSYSCALL_ADDR;
}
+/*
+ * The VSYSCALL page is the only user-accessible page in the kernel address
+ * range. Normally, the kernel page tables can have _PAGE_USER clear, but
+ * the tables covering VSYSCALL_ADDR need _PAGE_USER set if vsyscalls
+ * are enabled.
+ *
+ * Some day we may create a "minimal" vsyscall mode in which we emulate
+ * vsyscalls but leave the page not present. If so, we skip calling
+ * this.
+ */
+void __init set_vsyscall_pgtable_user_bits(pgd_t *root)
+{
+ pgd_t *pgd;
+ p4d_t *p4d;
+ pud_t *pud;
+ pmd_t *pmd;
+
+ pgd = pgd_offset_pgd(root, VSYSCALL_ADDR);
+ set_pgd(pgd, __pgd(pgd_val(*pgd) | _PAGE_USER));
+ p4d = p4d_offset(pgd, VSYSCALL_ADDR);
+#if CONFIG_PGTABLE_LEVELS >= 5
+ p4d->p4d |= _PAGE_USER;
+#endif
+ pud = pud_offset(p4d, VSYSCALL_ADDR);
+ set_pud(pud, __pud(pud_val(*pud) | _PAGE_USER));
+ pmd = pmd_offset(pud, VSYSCALL_ADDR);
+ set_pmd(pmd, __pmd(pmd_val(*pmd) | _PAGE_USER));
+}
+
void __init map_vsyscall(void)
{
extern char __vsyscall_page;
unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page);
- if (vsyscall_mode != NONE)
+ if (vsyscall_mode != NONE) {
__set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall,
vsyscall_mode == NATIVE
? PAGE_KERNEL_VSYSCALL
: PAGE_KERNEL_VVAR);
+ set_vsyscall_pgtable_user_bits(swapper_pg_dir);
+ }
BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) !=
(unsigned long)VSYSCALL_ADDR);
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 09c26a4f139c..731153a4681e 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3847,6 +3847,8 @@ static struct attribute *intel_pmu_attrs[] = {
__init int intel_pmu_init(void)
{
+ struct attribute **extra_attr = NULL;
+ struct attribute **to_free = NULL;
union cpuid10_edx edx;
union cpuid10_eax eax;
union cpuid10_ebx ebx;
@@ -3854,7 +3856,6 @@ __init int intel_pmu_init(void)
unsigned int unused;
struct extra_reg *er;
int version, i;
- struct attribute **extra_attr = NULL;
char *name;
if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
@@ -4294,6 +4295,7 @@ __init int intel_pmu_init(void)
extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
hsw_format_attr : nhm_format_attr;
extra_attr = merge_attr(extra_attr, skl_format_attr);
+ to_free = extra_attr;
x86_pmu.cpu_events = get_hsw_events_attrs();
intel_pmu_pebs_data_source_skl(
boot_cpu_data.x86_model == INTEL_FAM6_SKYLAKE_X);
@@ -4401,6 +4403,7 @@ __init int intel_pmu_init(void)
pr_cont("full-width counters, ");
}
+ kfree(to_free);
return 0;
}
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 3674a4b6f8bd..8f0aace08b87 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -3,16 +3,18 @@
#include <linux/types.h>
#include <linux/slab.h>
+#include <asm/cpu_entry_area.h>
#include <asm/perf_event.h>
#include <asm/insn.h>
#include "../perf_event.h"
+/* Waste a full page so it can be mapped into the cpu_entry_area */
+DEFINE_PER_CPU_PAGE_ALIGNED(struct debug_store, cpu_debug_store);
+
/* The size of a BTS record in bytes: */
#define BTS_RECORD_SIZE 24
-#define BTS_BUFFER_SIZE (PAGE_SIZE << 4)
-#define PEBS_BUFFER_SIZE (PAGE_SIZE << 4)
#define PEBS_FIXUP_SIZE PAGE_SIZE
/*
@@ -279,17 +281,52 @@ void fini_debug_store_on_cpu(int cpu)
static DEFINE_PER_CPU(void *, insn_buffer);
-static int alloc_pebs_buffer(int cpu)
+static void ds_update_cea(void *cea, void *addr, size_t size, pgprot_t prot)
{
- struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+ phys_addr_t pa;
+ size_t msz = 0;
+
+ pa = virt_to_phys(addr);
+ for (; msz < size; msz += PAGE_SIZE, pa += PAGE_SIZE, cea += PAGE_SIZE)
+ cea_set_pte(cea, pa, prot);
+}
+
+static void ds_clear_cea(void *cea, size_t size)
+{
+ size_t msz = 0;
+
+ for (; msz < size; msz += PAGE_SIZE, cea += PAGE_SIZE)
+ cea_set_pte(cea, 0, PAGE_NONE);
+}
+
+static void *dsalloc_pages(size_t size, gfp_t flags, int cpu)
+{
+ unsigned int order = get_order(size);
int node = cpu_to_node(cpu);
- int max;
- void *buffer, *ibuffer;
+ struct page *page;
+
+ page = __alloc_pages_node(node, flags | __GFP_ZERO, order);
+ return page ? page_address(page) : NULL;
+}
+
+static void dsfree_pages(const void *buffer, size_t size)
+{
+ if (buffer)
+ free_pages((unsigned long)buffer, get_order(size));
+}
+
+static int alloc_pebs_buffer(int cpu)
+{
+ struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
+ struct debug_store *ds = hwev->ds;
+ size_t bsiz = x86_pmu.pebs_buffer_size;
+ int max, node = cpu_to_node(cpu);
+ void *buffer, *ibuffer, *cea;
if (!x86_pmu.pebs)
return 0;
- buffer = kzalloc_node(x86_pmu.pebs_buffer_size, GFP_KERNEL, node);
+ buffer = dsalloc_pages(bsiz, GFP_KERNEL, cpu);
if (unlikely(!buffer))
return -ENOMEM;
@@ -300,25 +337,27 @@ static int alloc_pebs_buffer(int cpu)
if (x86_pmu.intel_cap.pebs_format < 2) {
ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node);
if (!ibuffer) {
- kfree(buffer);
+ dsfree_pages(buffer, bsiz);
return -ENOMEM;
}
per_cpu(insn_buffer, cpu) = ibuffer;
}
-
- max = x86_pmu.pebs_buffer_size / x86_pmu.pebs_record_size;
-
- ds->pebs_buffer_base = (u64)(unsigned long)buffer;
+ hwev->ds_pebs_vaddr = buffer;
+ /* Update the cpu entry area mapping */
+ cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
+ ds->pebs_buffer_base = (unsigned long) cea;
+ ds_update_cea(cea, buffer, bsiz, PAGE_KERNEL);
ds->pebs_index = ds->pebs_buffer_base;
- ds->pebs_absolute_maximum = ds->pebs_buffer_base +
- max * x86_pmu.pebs_record_size;
-
+ max = x86_pmu.pebs_record_size * (bsiz / x86_pmu.pebs_record_size);
+ ds->pebs_absolute_maximum = ds->pebs_buffer_base + max;
return 0;
}
static void release_pebs_buffer(int cpu)
{
- struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+ struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
+ struct debug_store *ds = hwev->ds;
+ void *cea;
if (!ds || !x86_pmu.pebs)
return;
@@ -326,73 +365,70 @@ static void release_pebs_buffer(int cpu)
kfree(per_cpu(insn_buffer, cpu));
per_cpu(insn_buffer, cpu) = NULL;
- kfree((void *)(unsigned long)ds->pebs_buffer_base);
+ /* Clear the fixmap */
+ cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
+ ds_clear_cea(cea, x86_pmu.pebs_buffer_size);
ds->pebs_buffer_base = 0;
+ dsfree_pages(hwev->ds_pebs_vaddr, x86_pmu.pebs_buffer_size);
+ hwev->ds_pebs_vaddr = NULL;
}
static int alloc_bts_buffer(int cpu)
{
- struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
- int node = cpu_to_node(cpu);
- int max, thresh;
- void *buffer;
+ struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
+ struct debug_store *ds = hwev->ds;
+ void *buffer, *cea;
+ int max;
if (!x86_pmu.bts)
return 0;
- buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node);
+ buffer = dsalloc_pages(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, cpu);
if (unlikely(!buffer)) {
WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__);
return -ENOMEM;
}
-
- max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
- thresh = max / 16;
-
- ds->bts_buffer_base = (u64)(unsigned long)buffer;
+ hwev->ds_bts_vaddr = buffer;
+ /* Update the fixmap */
+ cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer;
+ ds->bts_buffer_base = (unsigned long) cea;
+ ds_update_cea(cea, buffer, BTS_BUFFER_SIZE, PAGE_KERNEL);
ds->bts_index = ds->bts_buffer_base;
- ds->bts_absolute_maximum = ds->bts_buffer_base +
- max * BTS_RECORD_SIZE;
- ds->bts_interrupt_threshold = ds->bts_absolute_maximum -
- thresh * BTS_RECORD_SIZE;
-
+ max = BTS_RECORD_SIZE * (BTS_BUFFER_SIZE / BTS_RECORD_SIZE);
+ ds->bts_absolute_maximum = ds->bts_buffer_base + max;
+ ds->bts_interrupt_threshold = ds->bts_absolute_maximum - (max / 16);
return 0;
}
static void release_bts_buffer(int cpu)
{
- struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+ struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
+ struct debug_store *ds = hwev->ds;
+ void *cea;
if (!ds || !x86_pmu.bts)
return;
- kfree((void *)(unsigned long)ds->bts_buffer_base);
+ /* Clear the fixmap */
+ cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer;
+ ds_clear_cea(cea, BTS_BUFFER_SIZE);
ds->bts_buffer_base = 0;
+ dsfree_pages(hwev->ds_bts_vaddr, BTS_BUFFER_SIZE);
+ hwev->ds_bts_vaddr = NULL;
}
static int alloc_ds_buffer(int cpu)
{
- int node = cpu_to_node(cpu);
- struct debug_store *ds;
-
- ds = kzalloc_node(sizeof(*ds), GFP_KERNEL, node);
- if (unlikely(!ds))
- return -ENOMEM;
+ struct debug_store *ds = &get_cpu_entry_area(cpu)->cpu_debug_store;
+ memset(ds, 0, sizeof(*ds));
per_cpu(cpu_hw_events, cpu).ds = ds;
-
return 0;
}
static void release_ds_buffer(int cpu)
{
- struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
-
- if (!ds)
- return;
-
per_cpu(cpu_hw_events, cpu).ds = NULL;
- kfree(ds);
}
void release_ds_buffers(void)
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index f7aaadf9331f..8e4ea143ed96 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -14,6 +14,8 @@
#include <linux/perf_event.h>
+#include <asm/intel_ds.h>
+
/* To enable MSR tracing please use the generic trace points. */
/*
@@ -77,8 +79,6 @@ struct amd_nb {
struct event_constraint event_constraints[X86_PMC_IDX_MAX];
};
-/* The maximal number of PEBS events: */
-#define MAX_PEBS_EVENTS 8
#define PEBS_COUNTER_MASK ((1ULL << MAX_PEBS_EVENTS) - 1)
/*
@@ -95,23 +95,6 @@ struct amd_nb {
PERF_SAMPLE_TRANSACTION | PERF_SAMPLE_PHYS_ADDR | \
PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER)
-/*
- * A debug store configuration.
- *
- * We only support architectures that use 64bit fields.
- */
-struct debug_store {
- u64 bts_buffer_base;
- u64 bts_index;
- u64 bts_absolute_maximum;
- u64 bts_interrupt_threshold;
- u64 pebs_buffer_base;
- u64 pebs_index;
- u64 pebs_absolute_maximum;
- u64 pebs_interrupt_threshold;
- u64 pebs_event_reset[MAX_PEBS_EVENTS];
-};
-
#define PEBS_REGS \
(PERF_REG_X86_AX | \
PERF_REG_X86_BX | \
@@ -216,6 +199,8 @@ struct cpu_hw_events {
* Intel DebugStore bits
*/
struct debug_store *ds;
+ void *ds_pebs_vaddr;
+ void *ds_bts_vaddr;
u64 pebs_enabled;
int n_pebs;
int n_large_pebs;
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 219faaec51df..386a6900e206 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -136,6 +136,7 @@
#endif
#ifndef __ASSEMBLY__
+#ifndef __BPF__
/*
* This output constraint should be used for any inline asm which has a "call"
* instruction. Otherwise the asm may be inserted before the frame pointer
@@ -145,5 +146,6 @@
register unsigned long current_stack_pointer asm(_ASM_SP);
#define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer)
#endif
+#endif
#endif /* _ASM_X86_ASM_H */
diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h
new file mode 100644
index 000000000000..4a7884b8dca5
--- /dev/null
+++ b/arch/x86/include/asm/cpu_entry_area.h
@@ -0,0 +1,81 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#ifndef _ASM_X86_CPU_ENTRY_AREA_H
+#define _ASM_X86_CPU_ENTRY_AREA_H
+
+#include <linux/percpu-defs.h>
+#include <asm/processor.h>
+#include <asm/intel_ds.h>
+
+/*
+ * cpu_entry_area is a percpu region that contains things needed by the CPU
+ * and early entry/exit code. Real types aren't used for all fields here
+ * to avoid circular header dependencies.
+ *
+ * Every field is a virtual alias of some other allocated backing store.
+ * There is no direct allocation of a struct cpu_entry_area.
+ */
+struct cpu_entry_area {
+ char gdt[PAGE_SIZE];
+
+ /*
+ * The GDT is just below entry_stack and thus serves (on x86_64) as
+ * a a read-only guard page.
+ */
+ struct entry_stack_page entry_stack_page;
+
+ /*
+ * On x86_64, the TSS is mapped RO. On x86_32, it's mapped RW because
+ * we need task switches to work, and task switches write to the TSS.
+ */
+ struct tss_struct tss;
+
+ char entry_trampoline[PAGE_SIZE];
+
+#ifdef CONFIG_X86_64
+ /*
+ * Exception stacks used for IST entries.
+ *
+ * In the future, this should have a separate slot for each stack
+ * with guard pages between them.
+ */
+ char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ];
+#endif
+#ifdef CONFIG_CPU_SUP_INTEL
+ /*
+ * Per CPU debug store for Intel performance monitoring. Wastes a
+ * full page at the moment.
+ */
+ struct debug_store cpu_debug_store;
+ /*
+ * The actual PEBS/BTS buffers must be mapped to user space
+ * Reserve enough fixmap PTEs.
+ */
+ struct debug_store_buffers cpu_debug_buffers;
+#endif
+};
+
+#define CPU_ENTRY_AREA_SIZE (sizeof(struct cpu_entry_area))
+#define CPU_ENTRY_AREA_TOT_SIZE (CPU_ENTRY_AREA_SIZE * NR_CPUS)
+
+DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
+
+extern void setup_cpu_entry_areas(void);
+extern void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags);
+
+#define CPU_ENTRY_AREA_RO_IDT CPU_ENTRY_AREA_BASE
+#define CPU_ENTRY_AREA_PER_CPU (CPU_ENTRY_AREA_RO_IDT + PAGE_SIZE)
+
+#define CPU_ENTRY_AREA_RO_IDT_VADDR ((void *)CPU_ENTRY_AREA_RO_IDT)
+
+#define CPU_ENTRY_AREA_MAP_SIZE \
+ (CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_TOT_SIZE - CPU_ENTRY_AREA_BASE)
+
+extern struct cpu_entry_area *get_cpu_entry_area(int cpu);
+
+static inline struct entry_stack *cpu_entry_stack(int cpu)
+{
+ return &get_cpu_entry_area(cpu)->entry_stack_page.stack;
+}
+
+#endif
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index bf6a76202a77..ea9a7dde62e5 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -135,6 +135,8 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
set_bit(bit, (unsigned long *)cpu_caps_set); \
} while (0)
+#define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit)
+
#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS)
/*
* Static testing of CPU features. Used the same as boot_cpu_has().
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index c0b0e9e8aa66..07cdd1715705 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -197,11 +197,12 @@
#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */
#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */
#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */
+#define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */
#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
#define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */
-
+#define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */
#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */
#define X86_FEATURE_AVX512_4VNNIW ( 7*32+16) /* AVX-512 Neural Network Instructions */
@@ -266,6 +267,7 @@
/* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */
#define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */
#define X86_FEATURE_IRPERF (13*32+ 1) /* Instructions Retired Count */
+#define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* Always save/restore FP error pointers */
/* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */
@@ -339,5 +341,6 @@
#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */
#define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */
#define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */
+#define X86_BUG_CPU_INSECURE X86_BUG(14) /* CPU is insecure and needs kernel page table isolation */
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index 4011cb03ef08..13c5ee878a47 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -7,6 +7,7 @@
#include <asm/mmu.h>
#include <asm/fixmap.h>
#include <asm/irq_vectors.h>
+#include <asm/cpu_entry_area.h>
#include <linux/smp.h>
#include <linux/percpu.h>
@@ -20,6 +21,8 @@ static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *in
desc->type = (info->read_exec_only ^ 1) << 1;
desc->type |= info->contents << 2;
+ /* Set the ACCESS bit so it can be mapped RO */
+ desc->type |= 1;
desc->s = 1;
desc->dpl = 0x3;
@@ -60,17 +63,10 @@ static inline struct desc_struct *get_current_gdt_rw(void)
return this_cpu_ptr(&gdt_page)->gdt;
}
-/* Get the fixmap index for a specific processor */
-static inline unsigned int get_cpu_gdt_ro_index(int cpu)
-{
- return FIX_GDT_REMAP_BEGIN + cpu;
-}
-
/* Provide the fixmap address of the remapped GDT */
static inline struct desc_struct *get_cpu_gdt_ro(int cpu)
{
- unsigned int idx = get_cpu_gdt_ro_index(cpu);
- return (struct desc_struct *)__fix_to_virt(idx);
+ return (struct desc_struct *)&get_cpu_entry_area(cpu)->gdt;
}
/* Provide the current read-only GDT */
@@ -185,7 +181,7 @@ static inline void set_tssldt_descriptor(void *d, unsigned long addr,
#endif
}
-static inline void __set_tss_desc(unsigned cpu, unsigned int entry, void *addr)
+static inline void __set_tss_desc(unsigned cpu, unsigned int entry, struct x86_hw_tss *addr)
{
struct desc_struct *d = get_cpu_gdt_rw(cpu);
tss_desc tss;
diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
index 14d6d5007314..b027633e7300 100644
--- a/arch/x86/include/asm/disabled-features.h
+++ b/arch/x86/include/asm/disabled-features.h
@@ -50,6 +50,12 @@
# define DISABLE_LA57 (1<<(X86_FEATURE_LA57 & 31))
#endif
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+# define DISABLE_PTI 0
+#else
+# define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31))
+#endif
+
/*
* Make sure to add features to the correct mask
*/
@@ -60,7 +66,7 @@
#define DISABLED_MASK4 (DISABLE_PCID)
#define DISABLED_MASK5 0
#define DISABLED_MASK6 0
-#define DISABLED_MASK7 0
+#define DISABLED_MASK7 (DISABLE_PTI)
#define DISABLED_MASK8 0
#define DISABLED_MASK9 (DISABLE_MPX)
#define DISABLED_MASK10 0
diff --git a/arch/x86/include/asm/espfix.h b/arch/x86/include/asm/espfix.h
index 0211029076ea..6777480d8a42 100644
--- a/arch/x86/include/asm/espfix.h
+++ b/arch/x86/include/asm/espfix.h
@@ -2,7 +2,7 @@
#ifndef _ASM_X86_ESPFIX_H
#define _ASM_X86_ESPFIX_H
-#ifdef CONFIG_X86_64
+#ifdef CONFIG_X86_ESPFIX64
#include <asm/percpu.h>
@@ -11,7 +11,8 @@ DECLARE_PER_CPU_READ_MOSTLY(unsigned long, espfix_waddr);
extern void init_espfix_bsp(void);
extern void init_espfix_ap(int cpu);
-
-#endif /* CONFIG_X86_64 */
+#else
+static inline void init_espfix_ap(int cpu) { }
+#endif
#endif /* _ASM_X86_ESPFIX_H */
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index b0c505fe9a95..64c4a30e0d39 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -44,7 +44,6 @@ extern unsigned long __FIXADDR_TOP;
PAGE_SIZE)
#endif
-
/*
* Here we define all the compile-time 'special' virtual
* addresses. The point is to have a constant address at
@@ -84,7 +83,6 @@ enum fixed_addresses {
FIX_IO_APIC_BASE_0,
FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1,
#endif
- FIX_RO_IDT, /* Virtual mapping for read-only IDT */
#ifdef CONFIG_X86_32
FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */
FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
@@ -100,9 +98,6 @@ enum fixed_addresses {
#ifdef CONFIG_X86_INTEL_MID
FIX_LNW_VRTC,
#endif
- /* Fixmap entries to remap the GDTs, one per processor. */
- FIX_GDT_REMAP_BEGIN,
- FIX_GDT_REMAP_END = FIX_GDT_REMAP_BEGIN + NR_CPUS - 1,
#ifdef CONFIG_ACPI_APEI_GHES
/* Used for GHES mapping from assorted contexts */
@@ -143,7 +138,7 @@ enum fixed_addresses {
extern void reserve_top_address(unsigned long reserve);
#define FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT)
-#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)
+#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)
extern int fixmaps_set;
diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h
index 1b0a5abcd8ae..96aa6b9884dc 100644
--- a/arch/x86/include/asm/hypervisor.h
+++ b/arch/x86/include/asm/hypervisor.h
@@ -20,16 +20,7 @@
#ifndef _ASM_X86_HYPERVISOR_H
#define _ASM_X86_HYPERVISOR_H
-#ifdef CONFIG_HYPERVISOR_GUEST
-
-#include <asm/kvm_para.h>
-#include <asm/x86_init.h>
-#include <asm/xen/hypervisor.h>
-
-/*
- * x86 hypervisor information
- */
-
+/* x86 hypervisor types */
enum x86_hypervisor_type {
X86_HYPER_NATIVE = 0,
X86_HYPER_VMWARE,
@@ -39,6 +30,12 @@ enum x86_hypervisor_type {
X86_HYPER_KVM,
};
+#ifdef CONFIG_HYPERVISOR_GUEST
+
+#include <asm/kvm_para.h>
+#include <asm/x86_init.h>
+#include <asm/xen/hypervisor.h>
+
struct hypervisor_x86 {
/* Hypervisor name */
const char *name;
@@ -58,7 +55,15 @@ struct hypervisor_x86 {
extern enum x86_hypervisor_type x86_hyper_type;
extern void init_hypervisor_platform(void);
+static inline bool hypervisor_is_type(enum x86_hypervisor_type type)
+{
+ return x86_hyper_type == type;
+}
#else
static inline void init_hypervisor_platform(void) { }
+static inline bool hypervisor_is_type(enum x86_hypervisor_type type)
+{
+ return type == X86_HYPER_NATIVE;
+}
#endif /* CONFIG_HYPERVISOR_GUEST */
#endif /* _ASM_X86_HYPERVISOR_H */
diff --git a/arch/x86/include/asm/intel_ds.h b/arch/x86/include/asm/intel_ds.h
new file mode 100644
index 000000000000..62a9f4966b42
--- /dev/null
+++ b/arch/x86/include/asm/intel_ds.h
@@ -0,0 +1,36 @@
+#ifndef _ASM_INTEL_DS_H
+#define _ASM_INTEL_DS_H
+
+#include <linux/percpu-defs.h>
+
+#define BTS_BUFFER_SIZE (PAGE_SIZE << 4)
+#define PEBS_BUFFER_SIZE (PAGE_SIZE << 4)
+
+/* The maximal number of PEBS events: */
+#define MAX_PEBS_EVENTS 8
+
+/*
+ * A debug store configuration.
+ *
+ * We only support architectures that use 64bit fields.
+ */
+struct debug_store {
+ u64 bts_buffer_base;
+ u64 bts_index;
+ u64 bts_absolute_maximum;
+ u64 bts_interrupt_threshold;
+ u64 pebs_buffer_base;
+ u64 pebs_index;
+ u64 pebs_absolute_maximum;
+ u64 pebs_interrupt_threshold;
+ u64 pebs_event_reset[MAX_PEBS_EVENTS];
+} __aligned(PAGE_SIZE);
+
+DECLARE_PER_CPU_PAGE_ALIGNED(struct debug_store, cpu_debug_store);
+
+struct debug_store_buffers {
+ char bts_buffer[BTS_BUFFER_SIZE];
+ char pebs_buffer[PEBS_BUFFER_SIZE];
+};
+
+#endif
diff --git a/arch/x86/include/asm/invpcid.h b/arch/x86/include/asm/invpcid.h
new file mode 100644
index 000000000000..989cfa86de85
--- /dev/null
+++ b/arch/x86/include/asm/invpcid.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_INVPCID
+#define _ASM_X86_INVPCID
+
+static inline void __invpcid(unsigned long pcid, unsigned long addr,
+ unsigned long type)
+{
+ struct { u64 d[2]; } desc = { { pcid, addr } };
+
+ /*
+ * The memory clobber is because the whole point is to invalidate
+ * stale TLB entries and, especially if we're flushing global
+ * mappings, we don't want the compiler to reorder any subsequent
+ * memory accesses before the TLB flush.
+ *
+ * The hex opcode is invpcid (%ecx), %eax in 32-bit mode and
+ * invpcid (%rcx), %rax in long mode.
+ */
+ asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01"
+ : : "m" (desc), "a" (type), "c" (&desc) : "memory");
+}
+
+#define INVPCID_TYPE_INDIV_ADDR 0
+#define INVPCID_TYPE_SINGLE_CTXT 1
+#define INVPCID_TYPE_ALL_INCL_GLOBAL 2
+#define INVPCID_TYPE_ALL_NON_GLOBAL 3
+
+/* Flush all mappings for a given pcid and addr, not including globals. */
+static inline void invpcid_flush_one(unsigned long pcid,
+ unsigned long addr)
+{
+ __invpcid(pcid, addr, INVPCID_TYPE_INDIV_ADDR);
+}
+
+/* Flush all mappings for a given PCID, not including globals. */
+static inline void invpcid_flush_single_context(unsigned long pcid)
+{
+ __invpcid(pcid, 0, INVPCID_TYPE_SINGLE_CTXT);
+}
+
+/* Flush all mappings, including globals, for all PCIDs. */
+static inline void invpcid_flush_all(void)
+{
+ __invpcid(0, 0, INVPCID_TYPE_ALL_INCL_GLOBAL);
+}
+
+/* Flush all mappings for all PCIDs except globals. */
+static inline void invpcid_flush_all_nonglobals(void)
+{
+ __invpcid(0, 0, INVPCID_TYPE_ALL_NON_GLOBAL);
+}
+
+#endif /* _ASM_X86_INVPCID */
diff --git a/arch/x86/include/asm/irqdomain.h b/arch/x86/include/asm/irqdomain.h
index 139feef467f7..c066ffae222b 100644
--- a/arch/x86/include/asm/irqdomain.h
+++ b/arch/x86/include/asm/irqdomain.h
@@ -44,7 +44,7 @@ extern int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq,
extern void mp_irqdomain_free(struct irq_domain *domain, unsigned int virq,
unsigned int nr_irqs);
extern int mp_irqdomain_activate(struct irq_domain *domain,
- struct irq_data *irq_data, bool early);
+ struct irq_data *irq_data, bool reserve);
extern void mp_irqdomain_deactivate(struct irq_domain *domain,
struct irq_data *irq_data);
extern int mp_irqdomain_ioapic_idx(struct irq_domain *domain);
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index c8ef23f2c28f..89f08955fff7 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -142,6 +142,9 @@ static inline notrace unsigned long arch_local_irq_save(void)
swapgs; \
sysretl
+#ifdef CONFIG_DEBUG_ENTRY
+#define SAVE_FLAGS(x) pushfq; popq %rax
+#endif
#else
#define INTERRUPT_RETURN iret
#define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit
diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h
index f86a8caa561e..395c9631e000 100644
--- a/arch/x86/include/asm/kdebug.h
+++ b/arch/x86/include/asm/kdebug.h
@@ -26,6 +26,7 @@ extern void die(const char *, struct pt_regs *,long);
extern int __must_check __die(const char *, struct pt_regs *, long);
extern void show_stack_regs(struct pt_regs *regs);
extern void __show_regs(struct pt_regs *regs, int all);
+extern void show_iret_regs(struct pt_regs *regs);
extern unsigned long oops_begin(void);
extern void oops_end(unsigned long, struct pt_regs *, int signr);
diff --git a/arch/x86/include/asm/kmemcheck.h b/arch/x86/include/asm/kmemcheck.h
deleted file mode 100644
index ea32a7d3cf1b..000000000000
--- a/arch/x86/include/asm/kmemcheck.h
+++ /dev/null
@@ -1 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index 034caa1a084e..b24b1c8b3979 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -214,8 +214,6 @@ struct x86_emulate_ops {
void (*halt)(struct x86_emulate_ctxt *ctxt);
void (*wbinvd)(struct x86_emulate_ctxt *ctxt);
int (*fix_hypercall)(struct x86_emulate_ctxt *ctxt);
- void (*get_fpu)(struct x86_emulate_ctxt *ctxt); /* disables preempt */
- void (*put_fpu)(struct x86_emulate_ctxt *ctxt); /* reenables preempt */
int (*intercept)(struct x86_emulate_ctxt *ctxt,
struct x86_instruction_info *info,
enum x86_intercept_stage stage);
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 977de5fb968b..516798431328 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -536,7 +536,20 @@ struct kvm_vcpu_arch {
struct kvm_mmu_memory_cache mmu_page_cache;
struct kvm_mmu_memory_cache mmu_page_header_cache;
+ /*
+ * QEMU userspace and the guest each have their own FPU state.
+ * In vcpu_run, we switch between the user and guest FPU contexts.
+ * While running a VCPU, the VCPU thread will have the guest FPU
+ * context.
+ *
+ * Note that while the PKRU state lives inside the fpu registers,
+ * it is switched out separately at VMENTER and VMEXIT time. The
+ * "guest_fpu" state here contains the guest FPU context, with the
+ * host PRKU bits.
+ */
+ struct fpu user_fpu;
struct fpu guest_fpu;
+
u64 xcr0;
u64 guest_supported_xcr0;
u32 guest_xstate_size;
@@ -1435,4 +1448,7 @@ static inline int kvm_cpu_get_apicid(int mps_cpu)
#define put_smstate(type, buf, offset, val) \
*(type *)((buf) + (offset) - 0x7e00) = val
+void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
+ unsigned long start, unsigned long end);
+
#endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
index 9ea26f167497..5ff3e8af2c20 100644
--- a/arch/x86/include/asm/mmu.h
+++ b/arch/x86/include/asm/mmu.h
@@ -3,6 +3,7 @@
#define _ASM_X86_MMU_H
#include <linux/spinlock.h>
+#include <linux/rwsem.h>
#include <linux/mutex.h>
#include <linux/atomic.h>
@@ -27,7 +28,8 @@ typedef struct {
atomic64_t tlb_gen;
#ifdef CONFIG_MODIFY_LDT_SYSCALL
- struct ldt_struct *ldt;
+ struct rw_semaphore ldt_usr_sem;
+ struct ldt_struct *ldt;
#endif
#ifdef CONFIG_X86_64
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 6d16d15d09a0..c931b88982a0 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -50,22 +50,53 @@ struct ldt_struct {
* call gates. On native, we could merge the ldt_struct and LDT
* allocations, but it's not worth trying to optimize.
*/
- struct desc_struct *entries;
- unsigned int nr_entries;
+ struct desc_struct *entries;
+ unsigned int nr_entries;
+
+ /*
+ * If PTI is in use, then the entries array is not mapped while we're
+ * in user mode. The whole array will be aliased at the addressed
+ * given by ldt_slot_va(slot). We use two slots so that we can allocate
+ * and map, and enable a new LDT without invalidating the mapping
+ * of an older, still-in-use LDT.
+ *
+ * slot will be -1 if this LDT doesn't have an alias mapping.
+ */
+ int slot;
};
+/* This is a multiple of PAGE_SIZE. */
+#define LDT_SLOT_STRIDE (LDT_ENTRIES * LDT_ENTRY_SIZE)
+
+static inline void *ldt_slot_va(int slot)
+{
+#ifdef CONFIG_X86_64
+ return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot);
+#else
+ BUG();
+#endif
+}
+
/*
* Used for LDT copy/destruction.
*/
-int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm);
+static inline void init_new_context_ldt(struct mm_struct *mm)
+{
+ mm->context.ldt = NULL;
+ init_rwsem(&mm->context.ldt_usr_sem);
+}
+int ldt_dup_context(struct mm_struct *oldmm, struct mm_struct *mm);
void destroy_context_ldt(struct mm_struct *mm);
+void ldt_arch_exit_mmap(struct mm_struct *mm);
#else /* CONFIG_MODIFY_LDT_SYSCALL */
-static inline int init_new_context_ldt(struct task_struct *tsk,
- struct mm_struct *mm)
+static inline void init_new_context_ldt(struct mm_struct *mm) { }
+static inline int ldt_dup_context(struct mm_struct *oldmm,
+ struct mm_struct *mm)
{
return 0;
}
-static inline void destroy_context_ldt(struct mm_struct *mm) {}
+static inline void destroy_context_ldt(struct mm_struct *mm) { }
+static inline void ldt_arch_exit_mmap(struct mm_struct *mm) { }
#endif
static inline void load_mm_ldt(struct mm_struct *mm)
@@ -90,10 +121,31 @@ static inline void load_mm_ldt(struct mm_struct *mm)
* that we can see.
*/
- if (unlikely(ldt))
- set_ldt(ldt->entries, ldt->nr_entries);
- else
+ if (unlikely(ldt)) {
+ if (static_cpu_has(X86_FEATURE_PTI)) {
+ if (WARN_ON_ONCE((unsigned long)ldt->slot > 1)) {
+ /*
+ * Whoops -- either the new LDT isn't mapped
+ * (if slot == -1) or is mapped into a bogus
+ * slot (if slot > 1).
+ */
+ clear_LDT();
+ return;
+ }
+
+ /*
+ * If page table isolation is enabled, ldt->entries
+ * will not be mapped in the userspace pagetables.
+ * Tell the CPU to access the LDT through the alias
+ * at ldt_slot_va(ldt->slot).
+ */
+ set_ldt(ldt_slot_va(ldt->slot), ldt->nr_entries);
+ } else {
+ set_ldt(ldt->entries, ldt->nr_entries);
+ }
+ } else {
clear_LDT();
+ }
#else
clear_LDT();
#endif
@@ -132,18 +184,21 @@ void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);
static inline int init_new_context(struct task_struct *tsk,
struct mm_struct *mm)
{
+ mutex_init(&mm->context.lock);
+
mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id);
atomic64_set(&mm->context.tlb_gen, 0);
- #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
+#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
if (cpu_feature_enabled(X86_FEATURE_OSPKE)) {
/* pkey 0 is the default and always allocated */
mm->context.pkey_allocation_map = 0x1;
/* -1 means unallocated or invalid */
mm->context.execute_only_pkey = -1;
}
- #endif
- return init_new_context_ldt(tsk, mm);
+#endif
+ init_new_context_ldt(mm);
+ return 0;
}
static inline void destroy_context(struct mm_struct *mm)
{
@@ -176,15 +231,16 @@ do { \
} while (0)
#endif
-static inline void arch_dup_mmap(struct mm_struct *oldmm,
- struct mm_struct *mm)
+static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
{
paravirt_arch_dup_mmap(oldmm, mm);
+ return ldt_dup_context(oldmm, mm);
}
static inline void arch_exit_mmap(struct mm_struct *mm)
{
paravirt_arch_exit_mmap(mm);
+ ldt_arch_exit_mmap(mm);
}
#ifdef CONFIG_X86_64
@@ -282,33 +338,6 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
}
/*
- * If PCID is on, ASID-aware code paths put the ASID+1 into the PCID
- * bits. This serves two purposes. It prevents a nasty situation in
- * which PCID-unaware code saves CR3, loads some other value (with PCID
- * == 0), and then restores CR3, thus corrupting the TLB for ASID 0 if
- * the saved ASID was nonzero. It also means that any bugs involving
- * loading a PCID-enabled CR3 with CR4.PCIDE off will trigger
- * deterministically.
- */
-
-static inline unsigned long build_cr3(struct mm_struct *mm, u16 asid)
-{
- if (static_cpu_has(X86_FEATURE_PCID)) {
- VM_WARN_ON_ONCE(asid > 4094);
- return __sme_pa(mm->pgd) | (asid + 1);
- } else {
- VM_WARN_ON_ONCE(asid != 0);
- return __sme_pa(mm->pgd);
- }
-}
-
-static inline unsigned long build_cr3_noflush(struct mm_struct *mm, u16 asid)
-{
- VM_WARN_ON_ONCE(asid > 4094);
- return __sme_pa(mm->pgd) | (asid + 1) | CR3_NOFLUSH;
-}
-
-/*
* This can be used from process context to figure out what the value of
* CR3 is without needing to do a (slow) __read_cr3().
*
@@ -317,7 +346,7 @@ static inline unsigned long build_cr3_noflush(struct mm_struct *mm, u16 asid)
*/
static inline unsigned long __get_current_cr3_fast(void)
{
- unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm),
+ unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd,
this_cpu_read(cpu_tlbstate.loaded_mm_asid));
/* For now, be very restrictive about when this can be called. */
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 283efcaac8af..892df375b615 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -927,6 +927,15 @@ extern void default_banner(void);
PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \
CLBR_NONE, \
jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64))
+
+#ifdef CONFIG_DEBUG_ENTRY
+#define SAVE_FLAGS(clobbers) \
+ PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_save_fl), clobbers, \
+ PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \
+ call PARA_INDIRECT(pv_irq_ops+PV_IRQ_save_fl); \
+ PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
+#endif
+
#endif /* CONFIG_X86_32 */
#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
index 4b5e1eafada7..aff42e1da6ee 100644
--- a/arch/x86/include/asm/pgalloc.h
+++ b/arch/x86/include/asm/pgalloc.h
@@ -30,6 +30,17 @@ static inline void paravirt_release_p4d(unsigned long pfn) {}
*/
extern gfp_t __userpte_alloc_gfp;
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+/*
+ * Instead of one PGD, we acquire two PGDs. Being order-1, it is
+ * both 8k in size and 8k-aligned. That lets us just flip bit 12
+ * in a pointer to swap between the two 4k halves.
+ */
+#define PGD_ALLOCATION_ORDER 1
+#else
+#define PGD_ALLOCATION_ORDER 0
+#endif
+
/*
* Allocate and free page tables.
*/
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 95e2dfd75521..e42b8943cb1a 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -28,6 +28,7 @@ extern pgd_t early_top_pgt[PTRS_PER_PGD];
int __init __early_make_pgtable(unsigned long address, pmdval_t pmd);
void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd);
+void ptdump_walk_pgd_level_debugfs(struct seq_file *m, pgd_t *pgd, bool user);
void ptdump_walk_pgd_level_checkwx(void);
#ifdef CONFIG_DEBUG_WX
@@ -841,7 +842,12 @@ static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
static inline int p4d_bad(p4d_t p4d)
{
- return (p4d_flags(p4d) & ~(_KERNPG_TABLE | _PAGE_USER)) != 0;
+ unsigned long ignore_flags = _KERNPG_TABLE | _PAGE_USER;
+
+ if (IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION))
+ ignore_flags |= _PAGE_NX;
+
+ return (p4d_flags(p4d) & ~ignore_flags) != 0;
}
#endif /* CONFIG_PGTABLE_LEVELS > 3 */
@@ -875,7 +881,12 @@ static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
static inline int pgd_bad(pgd_t pgd)
{
- return (pgd_flags(pgd) & ~_PAGE_USER) != _KERNPG_TABLE;
+ unsigned long ignore_flags = _PAGE_USER;
+
+ if (IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION))
+ ignore_flags |= _PAGE_NX;
+
+ return (pgd_flags(pgd) & ~ignore_flags) != _KERNPG_TABLE;
}
static inline int pgd_none(pgd_t pgd)
@@ -904,7 +915,11 @@ static inline int pgd_none(pgd_t pgd)
* pgd_offset() returns a (pgd_t *)
* pgd_index() is used get the offset into the pgd page's array of pgd_t's;
*/
-#define pgd_offset(mm, address) ((mm)->pgd + pgd_index((address)))
+#define pgd_offset_pgd(pgd, address) (pgd + pgd_index((address)))
+/*
+ * a shortcut to get a pgd_t in a given mm
+ */
+#define pgd_offset(mm, address) pgd_offset_pgd((mm)->pgd, (address))
/*
* a shortcut which implies the use of the kernel's pgd, instead
* of a process's
@@ -1106,7 +1121,14 @@ static inline int pud_write(pud_t pud)
*/
static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
{
- memcpy(dst, src, count * sizeof(pgd_t));
+ memcpy(dst, src, count * sizeof(pgd_t));
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ if (!static_cpu_has(X86_FEATURE_PTI))
+ return;
+ /* Clone the user space pgd as well */
+ memcpy(kernel_to_user_pgdp(dst), kernel_to_user_pgdp(src),
+ count * sizeof(pgd_t));
+#endif
}
#define PTE_SHIFT ilog2(PTRS_PER_PTE)
diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h
index f2ca9b28fd68..ce245b0cdfca 100644
--- a/arch/x86/include/asm/pgtable_32_types.h
+++ b/arch/x86/include/asm/pgtable_32_types.h
@@ -38,13 +38,22 @@ extern bool __vmalloc_start_set; /* set once high_memory is set */
#define LAST_PKMAP 1024
#endif
-#define PKMAP_BASE ((FIXADDR_START - PAGE_SIZE * (LAST_PKMAP + 1)) \
- & PMD_MASK)
+/*
+ * Define this here and validate with BUILD_BUG_ON() in pgtable_32.c
+ * to avoid include recursion hell
+ */
+#define CPU_ENTRY_AREA_PAGES (NR_CPUS * 40)
+
+#define CPU_ENTRY_AREA_BASE \
+ ((FIXADDR_START - PAGE_SIZE * (CPU_ENTRY_AREA_PAGES + 1)) & PMD_MASK)
+
+#define PKMAP_BASE \
+ ((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK)
#ifdef CONFIG_HIGHMEM
# define VMALLOC_END (PKMAP_BASE - 2 * PAGE_SIZE)
#else
-# define VMALLOC_END (FIXADDR_START - 2 * PAGE_SIZE)
+# define VMALLOC_END (CPU_ENTRY_AREA_BASE - 2 * PAGE_SIZE)
#endif
#define MODULES_VADDR VMALLOC_START
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index e9f05331e732..81462e9a34f6 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -131,9 +131,97 @@ static inline pud_t native_pudp_get_and_clear(pud_t *xp)
#endif
}
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+/*
+ * All top-level PAGE_TABLE_ISOLATION page tables are order-1 pages
+ * (8k-aligned and 8k in size). The kernel one is at the beginning 4k and
+ * the user one is in the last 4k. To switch between them, you
+ * just need to flip the 12th bit in their addresses.
+ */
+#define PTI_PGTABLE_SWITCH_BIT PAGE_SHIFT
+
+/*
+ * This generates better code than the inline assembly in
+ * __set_bit().
+ */
+static inline void *ptr_set_bit(void *ptr, int bit)
+{
+ unsigned long __ptr = (unsigned long)ptr;
+
+ __ptr |= BIT(bit);
+ return (void *)__ptr;
+}
+static inline void *ptr_clear_bit(void *ptr, int bit)
+{
+ unsigned long __ptr = (unsigned long)ptr;
+
+ __ptr &= ~BIT(bit);
+ return (void *)__ptr;
+}
+
+static inline pgd_t *kernel_to_user_pgdp(pgd_t *pgdp)
+{
+ return ptr_set_bit(pgdp, PTI_PGTABLE_SWITCH_BIT);
+}
+
+static inline pgd_t *user_to_kernel_pgdp(pgd_t *pgdp)
+{
+ return ptr_clear_bit(pgdp, PTI_PGTABLE_SWITCH_BIT);
+}
+
+static inline p4d_t *kernel_to_user_p4dp(p4d_t *p4dp)
+{
+ return ptr_set_bit(p4dp, PTI_PGTABLE_SWITCH_BIT);
+}
+
+static inline p4d_t *user_to_kernel_p4dp(p4d_t *p4dp)
+{
+ return ptr_clear_bit(p4dp, PTI_PGTABLE_SWITCH_BIT);
+}
+#endif /* CONFIG_PAGE_TABLE_ISOLATION */
+
+/*
+ * Page table pages are page-aligned. The lower half of the top
+ * level is used for userspace and the top half for the kernel.
+ *
+ * Returns true for parts of the PGD that map userspace and
+ * false for the parts that map the kernel.
+ */
+static inline bool pgdp_maps_userspace(void *__ptr)
+{
+ unsigned long ptr = (unsigned long)__ptr;
+
+ return (ptr & ~PAGE_MASK) < (PAGE_SIZE / 2);
+}
+
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+pgd_t __pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd);
+
+/*
+ * Take a PGD location (pgdp) and a pgd value that needs to be set there.
+ * Populates the user and returns the resulting PGD that must be set in
+ * the kernel copy of the page tables.
+ */
+static inline pgd_t pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd)
+{
+ if (!static_cpu_has(X86_FEATURE_PTI))
+ return pgd;
+ return __pti_set_user_pgd(pgdp, pgd);
+}
+#else
+static inline pgd_t pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd)
+{
+ return pgd;
+}
+#endif
+
static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d)
{
+#if defined(CONFIG_PAGE_TABLE_ISOLATION) && !defined(CONFIG_X86_5LEVEL)
+ p4dp->pgd = pti_set_user_pgd(&p4dp->pgd, p4d.pgd);
+#else
*p4dp = p4d;
+#endif
}
static inline void native_p4d_clear(p4d_t *p4d)
@@ -147,7 +235,11 @@ static inline void native_p4d_clear(p4d_t *p4d)
static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
{
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ *pgdp = pti_set_user_pgd(pgdp, pgd);
+#else
*pgdp = pgd;
+#endif
}
static inline void native_pgd_clear(pgd_t *pgd)
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 6d5f45dcd4a1..b97a539bcdee 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -76,32 +76,45 @@ typedef struct { pteval_t pte; } pte_t;
#define PGDIR_MASK (~(PGDIR_SIZE - 1))
/* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */
-#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
+#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
+
#ifdef CONFIG_X86_5LEVEL
-#define VMALLOC_SIZE_TB _AC(16384, UL)
-#define __VMALLOC_BASE _AC(0xff92000000000000, UL)
-#define __VMEMMAP_BASE _AC(0xffd4000000000000, UL)
+# define VMALLOC_SIZE_TB _AC(12800, UL)
+# define __VMALLOC_BASE _AC(0xffa0000000000000, UL)
+# define __VMEMMAP_BASE _AC(0xffd4000000000000, UL)
+# define LDT_PGD_ENTRY _AC(-112, UL)
+# define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT)
#else
-#define VMALLOC_SIZE_TB _AC(32, UL)
-#define __VMALLOC_BASE _AC(0xffffc90000000000, UL)
-#define __VMEMMAP_BASE _AC(0xffffea0000000000, UL)
+# define VMALLOC_SIZE_TB _AC(32, UL)
+# define __VMALLOC_BASE _AC(0xffffc90000000000, UL)
+# define __VMEMMAP_BASE _AC(0xffffea0000000000, UL)
+# define LDT_PGD_ENTRY _AC(-4, UL)
+# define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT)
#endif
+
#ifdef CONFIG_RANDOMIZE_MEMORY
-#define VMALLOC_START vmalloc_base
-#define VMEMMAP_START vmemmap_base
+# define VMALLOC_START vmalloc_base
+# define VMEMMAP_START vmemmap_base
#else
-#define VMALLOC_START __VMALLOC_BASE
-#define VMEMMAP_START __VMEMMAP_BASE
+# define VMALLOC_START __VMALLOC_BASE
+# define VMEMMAP_START __VMEMMAP_BASE
#endif /* CONFIG_RANDOMIZE_MEMORY */
-#define VMALLOC_END (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL))
-#define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
+
+#define VMALLOC_END (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL))
+
+#define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
/* The module sections ends with the start of the fixmap */
-#define MODULES_END __fix_to_virt(__end_of_fixed_addresses + 1)
-#define MODULES_LEN (MODULES_END - MODULES_VADDR)
-#define ESPFIX_PGD_ENTRY _AC(-2, UL)
-#define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << P4D_SHIFT)
-#define EFI_VA_START ( -4 * (_AC(1, UL) << 30))
-#define EFI_VA_END (-68 * (_AC(1, UL) << 30))
+#define MODULES_END __fix_to_virt(__end_of_fixed_addresses + 1)
+#define MODULES_LEN (MODULES_END - MODULES_VADDR)
+
+#define ESPFIX_PGD_ENTRY _AC(-2, UL)
+#define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << P4D_SHIFT)
+
+#define CPU_ENTRY_AREA_PGD _AC(-3, UL)
+#define CPU_ENTRY_AREA_BASE (CPU_ENTRY_AREA_PGD << P4D_SHIFT)
+
+#define EFI_VA_START ( -4 * (_AC(1, UL) << 30))
+#define EFI_VA_END (-68 * (_AC(1, UL) << 30))
#define EARLY_DYNAMIC_PAGE_TABLES 64
diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h
index 43212a43ee69..6a60fea90b9d 100644
--- a/arch/x86/include/asm/processor-flags.h
+++ b/arch/x86/include/asm/processor-flags.h
@@ -38,6 +38,11 @@
#define CR3_ADDR_MASK __sme_clr(0x7FFFFFFFFFFFF000ull)
#define CR3_PCID_MASK 0xFFFull
#define CR3_NOFLUSH BIT_ULL(63)
+
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+# define X86_CR3_PTI_SWITCH_BIT 11
+#endif
+
#else
/*
* CR3_ADDR_MASK needs at least bits 31:5 set on PAE systems, and we save
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index cc16fa882e3e..d3a67fba200a 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -163,9 +163,9 @@ enum cpuid_regs_idx {
extern struct cpuinfo_x86 boot_cpu_data;
extern struct cpuinfo_x86 new_cpu_data;
-extern struct tss_struct doublefault_tss;
-extern __u32 cpu_caps_cleared[NCAPINTS];
-extern __u32 cpu_caps_set[NCAPINTS];
+extern struct x86_hw_tss doublefault_tss;
+extern __u32 cpu_caps_cleared[NCAPINTS + NBUGINTS];
+extern __u32 cpu_caps_set[NCAPINTS + NBUGINTS];
#ifdef CONFIG_SMP
DECLARE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
@@ -253,6 +253,11 @@ static inline void load_cr3(pgd_t *pgdir)
write_cr3(__sme_pa(pgdir));
}
+/*
+ * Note that while the legacy 'TSS' name comes from 'Task State Segment',
+ * on modern x86 CPUs the TSS also holds information important to 64-bit mode,
+ * unrelated to the task-switch mechanism:
+ */
#ifdef CONFIG_X86_32
/* This is the TSS defined by the hardware. */
struct x86_hw_tss {
@@ -305,7 +310,13 @@ struct x86_hw_tss {
struct x86_hw_tss {
u32 reserved1;
u64 sp0;
+
+ /*
+ * We store cpu_current_top_of_stack in sp1 so it's always accessible.
+ * Linux does not use ring 1, so sp1 is not otherwise needed.
+ */
u64 sp1;
+
u64 sp2;
u64 reserved2;
u64 ist[7];
@@ -323,12 +334,22 @@ struct x86_hw_tss {
#define IO_BITMAP_BITS 65536
#define IO_BITMAP_BYTES (IO_BITMAP_BITS/8)
#define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long))
-#define IO_BITMAP_OFFSET offsetof(struct tss_struct, io_bitmap)
+#define IO_BITMAP_OFFSET (offsetof(struct tss_struct, io_bitmap) - offsetof(struct tss_struct, x86_tss))
#define INVALID_IO_BITMAP_OFFSET 0x8000
+struct entry_stack {
+ unsigned long words[64];
+};
+
+struct entry_stack_page {
+ struct entry_stack stack;
+} __aligned(PAGE_SIZE);
+
struct tss_struct {
/*
- * The hardware state:
+ * The fixed hardware portion. This must not cross a page boundary
+ * at risk of violating the SDM's advice and potentially triggering
+ * errata.
*/
struct x86_hw_tss x86_tss;
@@ -339,18 +360,9 @@ struct tss_struct {
* be within the limit.
*/
unsigned long io_bitmap[IO_BITMAP_LONGS + 1];
+} __aligned(PAGE_SIZE);
-#ifdef CONFIG_X86_32
- /*
- * Space for the temporary SYSENTER stack.
- */
- unsigned long SYSENTER_stack_canary;
- unsigned long SYSENTER_stack[64];
-#endif
-
-} ____cacheline_aligned;
-
-DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss);
+DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw);
/*
* sizeof(unsigned long) coming from an extra "long" at the end
@@ -364,6 +376,9 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss);
#ifdef CONFIG_X86_32
DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
+#else
+/* The RO copy can't be accessed with this_cpu_xyz(), so use the RW copy. */
+#define cpu_current_top_of_stack cpu_tss_rw.x86_tss.sp1
#endif
/*
@@ -523,7 +538,7 @@ static inline void native_set_iopl_mask(unsigned mask)
static inline void
native_load_sp0(unsigned long sp0)
{
- this_cpu_write(cpu_tss.x86_tss.sp0, sp0);
+ this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0);
}
static inline void native_swapgs(void)
@@ -535,12 +550,12 @@ static inline void native_swapgs(void)
static inline unsigned long current_top_of_stack(void)
{
-#ifdef CONFIG_X86_64
- return this_cpu_read_stable(cpu_tss.x86_tss.sp0);
-#else
- /* sp0 on x86_32 is special in and around vm86 mode. */
+ /*
+ * We can't read directly from tss.sp0: sp0 on x86_32 is special in
+ * and around vm86 mode and sp0 on x86_64 is special because of the
+ * entry trampoline.
+ */
return this_cpu_read_stable(cpu_current_top_of_stack);
-#endif
}
static inline bool on_thread_stack(void)
@@ -837,13 +852,22 @@ static inline void spin_lock_prefetch(const void *x)
#else
/*
- * User space process size. 47bits minus one guard page. The guard
- * page is necessary on Intel CPUs: if a SYSCALL instruction is at
- * the highest possible canonical userspace address, then that
- * syscall will enter the kernel with a non-canonical return
- * address, and SYSRET will explode dangerously. We avoid this
- * particular problem by preventing anything from being mapped
- * at the maximum canonical address.
+ * User space process size. This is the first address outside the user range.
+ * There are a few constraints that determine this:
+ *
+ * On Intel CPUs, if a SYSCALL instruction is at the highest canonical
+ * address, then that syscall will enter the kernel with a
+ * non-canonical return address, and SYSRET will explode dangerously.
+ * We avoid this particular problem by preventing anything executable
+ * from being mapped at the maximum canonical address.
+ *
+ * On AMD CPUs in the Ryzen family, there's a nasty bug in which the
+ * CPUs malfunction if they execute code from the highest canonical page.
+ * They'll speculate right off the end of the canonical space, and
+ * bad things happen. This is worked around in the same way as the
+ * Intel problem.
+ *
+ * With page table isolation enabled, we map the LDT in ... [stay tuned]
*/
#define TASK_SIZE_MAX ((1UL << __VIRTUAL_MASK_SHIFT) - PAGE_SIZE)
diff --git a/arch/x86/include/asm/pti.h b/arch/x86/include/asm/pti.h
new file mode 100644
index 000000000000..0b5ef05b2d2d
--- /dev/null
+++ b/arch/x86/include/asm/pti.h
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef _ASM_X86_PTI_H
+#define _ASM_X86_PTI_H
+#ifndef __ASSEMBLY__
+
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+extern void pti_init(void);
+extern void pti_check_boottime_disable(void);
+#else
+static inline void pti_check_boottime_disable(void) { }
+#endif
+
+#endif /* __ASSEMBLY__ */
+#endif /* _ASM_X86_PTI_H */
diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h
index b20f9d623f9c..8f09012b92e7 100644
--- a/arch/x86/include/asm/segment.h
+++ b/arch/x86/include/asm/segment.h
@@ -236,11 +236,23 @@
*/
#define EARLY_IDT_HANDLER_SIZE 9
+/*
+ * xen_early_idt_handler_array is for Xen pv guests: for each entry in
+ * early_idt_handler_array it contains a prequel in the form of
+ * pop %rcx; pop %r11; jmp early_idt_handler_array[i]; summing up to
+ * max 8 bytes.
+ */
+#define XEN_EARLY_IDT_HANDLER_SIZE 8
+
#ifndef __ASSEMBLY__
extern const char early_idt_handler_array[NUM_EXCEPTION_VECTORS][EARLY_IDT_HANDLER_SIZE];
extern void early_ignore_irq(void);
+#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV)
+extern const char xen_early_idt_handler_array[NUM_EXCEPTION_VECTORS][XEN_EARLY_IDT_HANDLER_SIZE];
+#endif
+
/*
* Load a segment. Fall back on loading the zero segment if something goes
* wrong. This variant assumes that loading zero fully clears the segment.
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index 8da111b3c342..f73706878772 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -16,6 +16,7 @@ enum stack_type {
STACK_TYPE_TASK,
STACK_TYPE_IRQ,
STACK_TYPE_SOFTIRQ,
+ STACK_TYPE_ENTRY,
STACK_TYPE_EXCEPTION,
STACK_TYPE_EXCEPTION_LAST = STACK_TYPE_EXCEPTION + N_EXCEPTION_STACKS-1,
};
@@ -28,6 +29,8 @@ struct stack_info {
bool in_task_stack(unsigned long *stack, struct task_struct *task,
struct stack_info *info);
+bool in_entry_stack(unsigned long *stack, struct stack_info *info);
+
int get_stack_info(unsigned long *stack, struct task_struct *task,
struct stack_info *info, unsigned long *visit_mask);
diff --git a/arch/x86/include/asm/suspend_32.h b/arch/x86/include/asm/suspend_32.h
index 982c325dad33..8be6afb58471 100644
--- a/arch/x86/include/asm/suspend_32.h
+++ b/arch/x86/include/asm/suspend_32.h
@@ -12,7 +12,13 @@
/* image of the saved processor state */
struct saved_context {
- u16 es, fs, gs, ss;
+ /*
+ * On x86_32, all segment registers, with the possible exception of
+ * gs, are saved at kernel entry in pt_regs.
+ */
+#ifdef CONFIG_X86_32_LAZY_GS
+ u16 gs;
+#endif
unsigned long cr0, cr2, cr3, cr4;
u64 misc_enable;
bool misc_enable_saved;
diff --git a/arch/x86/include/asm/suspend_64.h b/arch/x86/include/asm/suspend_64.h
index 7306e911faee..a7af9f53c0cb 100644
--- a/arch/x86/include/asm/suspend_64.h
+++ b/arch/x86/include/asm/suspend_64.h
@@ -20,8 +20,20 @@
*/
struct saved_context {
struct pt_regs regs;
- u16 ds, es, fs, gs, ss;
- unsigned long gs_base, gs_kernel_base, fs_base;
+
+ /*
+ * User CS and SS are saved in current_pt_regs(). The rest of the
+ * segment selectors need to be saved and restored here.
+ */
+ u16 ds, es, fs, gs;
+
+ /*
+ * Usermode FSBASE and GSBASE may not match the fs and gs selectors,
+ * so we save them separately. We save the kernelmode GSBASE to
+ * restore percpu access after resume.
+ */
+ unsigned long kernelmode_gs_base, usermode_gs_base, fs_base;
+
unsigned long cr0, cr2, cr3, cr4, cr8;
u64 misc_enable;
bool misc_enable_saved;
@@ -30,8 +42,7 @@ struct saved_context {
u16 gdt_pad; /* Unused */
struct desc_ptr gdt_desc;
u16 idt_pad;
- u16 idt_limit;
- unsigned long idt_base;
+ struct desc_ptr idt;
u16 ldt;
u16 tss;
unsigned long tr;
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index 8c6bd6863db9..eb5f7999a893 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -16,8 +16,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
struct tss_struct *tss);
/* This runs runs on the previous thread's stack. */
-static inline void prepare_switch_to(struct task_struct *prev,
- struct task_struct *next)
+static inline void prepare_switch_to(struct task_struct *next)
{
#ifdef CONFIG_VMAP_STACK
/*
@@ -70,7 +69,7 @@ struct fork_frame {
#define switch_to(prev, next, last) \
do { \
- prepare_switch_to(prev, next); \
+ prepare_switch_to(next); \
\
((last) = __switch_to_asm((prev), (next))); \
} while (0)
@@ -79,10 +78,10 @@ do { \
static inline void refresh_sysenter_cs(struct thread_struct *thread)
{
/* Only happens when SEP is enabled, no need to test "SEP"arately: */
- if (unlikely(this_cpu_read(cpu_tss.x86_tss.ss1) == thread->sysenter_cs))
+ if (unlikely(this_cpu_read(cpu_tss_rw.x86_tss.ss1) == thread->sysenter_cs))
return;
- this_cpu_write(cpu_tss.x86_tss.ss1, thread->sysenter_cs);
+ this_cpu_write(cpu_tss_rw.x86_tss.ss1, thread->sysenter_cs);
wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
}
#endif
@@ -90,10 +89,12 @@ static inline void refresh_sysenter_cs(struct thread_struct *thread)
/* This is used when switching tasks or entering/exiting vm86 mode. */
static inline void update_sp0(struct task_struct *task)
{
+ /* On x86_64, sp0 always points to the entry trampoline stack, which is constant: */
#ifdef CONFIG_X86_32
load_sp0(task->thread.sp0);
#else
- load_sp0(task_top_of_stack(task));
+ if (static_cpu_has(X86_FEATURE_XENPV))
+ load_sp0(task_top_of_stack(task));
#endif
}
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 70f425947dc5..00223333821a 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -207,7 +207,7 @@ static inline int arch_within_stack_frames(const void * const stack,
#else /* !__ASSEMBLY__ */
#ifdef CONFIG_X86_64
-# define cpu_current_top_of_stack (cpu_tss + TSS_sp0)
+# define cpu_current_top_of_stack (cpu_tss_rw + TSS_sp1)
#endif
#endif
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 509046cfa5ce..4a08dd2ab32a 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -9,70 +9,130 @@
#include <asm/cpufeature.h>
#include <asm/special_insns.h>
#include <asm/smp.h>
+#include <asm/invpcid.h>
+#include <asm/pti.h>
+#include <asm/processor-flags.h>
-static inline void __invpcid(unsigned long pcid, unsigned long addr,
- unsigned long type)
-{
- struct { u64 d[2]; } desc = { { pcid, addr } };
+/*
+ * The x86 feature is called PCID (Process Context IDentifier). It is similar
+ * to what is traditionally called ASID on the RISC processors.
+ *
+ * We don't use the traditional ASID implementation, where each process/mm gets
+ * its own ASID and flush/restart when we run out of ASID space.
+ *
+ * Instead we have a small per-cpu array of ASIDs and cache the last few mm's
+ * that came by on this CPU, allowing cheaper switch_mm between processes on
+ * this CPU.
+ *
+ * We end up with different spaces for different things. To avoid confusion we
+ * use different names for each of them:
+ *
+ * ASID - [0, TLB_NR_DYN_ASIDS-1]
+ * the canonical identifier for an mm
+ *
+ * kPCID - [1, TLB_NR_DYN_ASIDS]
+ * the value we write into the PCID part of CR3; corresponds to the
+ * ASID+1, because PCID 0 is special.
+ *
+ * uPCID - [2048 + 1, 2048 + TLB_NR_DYN_ASIDS]
+ * for KPTI each mm has two address spaces and thus needs two
+ * PCID values, but we can still do with a single ASID denomination
+ * for each mm. Corresponds to kPCID + 2048.
+ *
+ */
- /*
- * The memory clobber is because the whole point is to invalidate
- * stale TLB entries and, especially if we're flushing global
- * mappings, we don't want the compiler to reorder any subsequent
- * memory accesses before the TLB flush.
- *
- * The hex opcode is invpcid (%ecx), %eax in 32-bit mode and
- * invpcid (%rcx), %rax in long mode.
- */
- asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01"
- : : "m" (desc), "a" (type), "c" (&desc) : "memory");
-}
+/* There are 12 bits of space for ASIDS in CR3 */
+#define CR3_HW_ASID_BITS 12
-#define INVPCID_TYPE_INDIV_ADDR 0
-#define INVPCID_TYPE_SINGLE_CTXT 1
-#define INVPCID_TYPE_ALL_INCL_GLOBAL 2
-#define INVPCID_TYPE_ALL_NON_GLOBAL 3
+/*
+ * When enabled, PAGE_TABLE_ISOLATION consumes a single bit for
+ * user/kernel switches
+ */
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+# define PTI_CONSUMED_PCID_BITS 1
+#else
+# define PTI_CONSUMED_PCID_BITS 0
+#endif
-/* Flush all mappings for a given pcid and addr, not including globals. */
-static inline void invpcid_flush_one(unsigned long pcid,
- unsigned long addr)
-{
- __invpcid(pcid, addr, INVPCID_TYPE_INDIV_ADDR);
-}
+#define CR3_AVAIL_PCID_BITS (X86_CR3_PCID_BITS - PTI_CONSUMED_PCID_BITS)
+
+/*
+ * ASIDs are zero-based: 0->MAX_AVAIL_ASID are valid. -1 below to account
+ * for them being zero-based. Another -1 is because PCID 0 is reserved for
+ * use by non-PCID-aware users.
+ */
+#define MAX_ASID_AVAILABLE ((1 << CR3_AVAIL_PCID_BITS) - 2)
-/* Flush all mappings for a given PCID, not including globals. */
-static inline void invpcid_flush_single_context(unsigned long pcid)
+/*
+ * 6 because 6 should be plenty and struct tlb_state will fit in two cache
+ * lines.
+ */
+#define TLB_NR_DYN_ASIDS 6
+
+/*
+ * Given @asid, compute kPCID
+ */
+static inline u16 kern_pcid(u16 asid)
{
- __invpcid(pcid, 0, INVPCID_TYPE_SINGLE_CTXT);
+ VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
+
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ /*
+ * Make sure that the dynamic ASID space does not confict with the
+ * bit we are using to switch between user and kernel ASIDs.
+ */
+ BUILD_BUG_ON(TLB_NR_DYN_ASIDS >= (1 << X86_CR3_PTI_SWITCH_BIT));
+
+ /*
+ * The ASID being passed in here should have respected the
+ * MAX_ASID_AVAILABLE and thus never have the switch bit set.
+ */
+ VM_WARN_ON_ONCE(asid & (1 << X86_CR3_PTI_SWITCH_BIT));
+#endif
+ /*
+ * The dynamically-assigned ASIDs that get passed in are small
+ * (<TLB_NR_DYN_ASIDS). They never have the high switch bit set,
+ * so do not bother to clear it.
+ *
+ * If PCID is on, ASID-aware code paths put the ASID+1 into the
+ * PCID bits. This serves two purposes. It prevents a nasty
+ * situation in which PCID-unaware code saves CR3, loads some other
+ * value (with PCID == 0), and then restores CR3, thus corrupting
+ * the TLB for ASID 0 if the saved ASID was nonzero. It also means
+ * that any bugs involving loading a PCID-enabled CR3 with
+ * CR4.PCIDE off will trigger deterministically.
+ */
+ return asid + 1;
}
-/* Flush all mappings, including globals, for all PCIDs. */
-static inline void invpcid_flush_all(void)
+/*
+ * Given @asid, compute uPCID
+ */
+static inline u16 user_pcid(u16 asid)
{
- __invpcid(0, 0, INVPCID_TYPE_ALL_INCL_GLOBAL);
+ u16 ret = kern_pcid(asid);
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ ret |= 1 << X86_CR3_PTI_SWITCH_BIT;
+#endif
+ return ret;
}
-/* Flush all mappings for all PCIDs except globals. */
-static inline void invpcid_flush_all_nonglobals(void)
+struct pgd_t;
+static inline unsigned long build_cr3(pgd_t *pgd, u16 asid)
{
- __invpcid(0, 0, INVPCID_TYPE_ALL_NON_GLOBAL);
+ if (static_cpu_has(X86_FEATURE_PCID)) {
+ return __sme_pa(pgd) | kern_pcid(asid);
+ } else {
+ VM_WARN_ON_ONCE(asid != 0);
+ return __sme_pa(pgd);
+ }
}
-static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
+static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid)
{
- u64 new_tlb_gen;
-
- /*
- * Bump the generation count. This also serves as a full barrier
- * that synchronizes with switch_mm(): callers are required to order
- * their read of mm_cpumask after their writes to the paging
- * structures.
- */
- smp_mb__before_atomic();
- new_tlb_gen = atomic64_inc_return(&mm->context.tlb_gen);
- smp_mb__after_atomic();
-
- return new_tlb_gen;
+ VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
+ VM_WARN_ON_ONCE(!this_cpu_has(X86_FEATURE_PCID));
+ return __sme_pa(pgd) | kern_pcid(asid) | CR3_NOFLUSH;
}
#ifdef CONFIG_PARAVIRT
@@ -99,12 +159,6 @@ static inline bool tlb_defer_switch_to_init_mm(void)
return !static_cpu_has(X86_FEATURE_PCID);
}
-/*
- * 6 because 6 should be plenty and struct tlb_state will fit in
- * two cache lines.
- */
-#define TLB_NR_DYN_ASIDS 6
-
struct tlb_context {
u64 ctx_id;
u64 tlb_gen;
@@ -139,6 +193,24 @@ struct tlb_state {
bool is_lazy;
/*
+ * If set we changed the page tables in such a way that we
+ * needed an invalidation of all contexts (aka. PCIDs / ASIDs).
+ * This tells us to go invalidate all the non-loaded ctxs[]
+ * on the next context switch.
+ *
+ * The current ctx was kept up-to-date as it ran and does not
+ * need to be invalidated.
+ */
+ bool invalidate_other;
+
+ /*
+ * Mask that contains TLB_NR_DYN_ASIDS+1 bits to indicate
+ * the corresponding user PCID needs a flush next time we
+ * switch to it; see SWITCH_TO_USER_CR3.
+ */
+ unsigned short user_pcid_flush_mask;
+
+ /*
* Access to this CR4 shadow and to H/W CR4 is protected by
* disabling interrupts when modifying either one.
*/
@@ -173,40 +245,43 @@ static inline void cr4_init_shadow(void)
this_cpu_write(cpu_tlbstate.cr4, __read_cr4());
}
+static inline void __cr4_set(unsigned long cr4)
+{
+ lockdep_assert_irqs_disabled();
+ this_cpu_write(cpu_tlbstate.cr4, cr4);
+ __write_cr4(cr4);
+}
+
/* Set in this cpu's CR4. */
static inline void cr4_set_bits(unsigned long mask)
{
- unsigned long cr4;
+ unsigned long cr4, flags;
+ local_irq_save(flags);
cr4 = this_cpu_read(cpu_tlbstate.cr4);
- if ((cr4 | mask) != cr4) {
- cr4 |= mask;
- this_cpu_write(cpu_tlbstate.cr4, cr4);
- __write_cr4(cr4);
- }
+ if ((cr4 | mask) != cr4)
+ __cr4_set(cr4 | mask);
+ local_irq_restore(flags);
}
/* Clear in this cpu's CR4. */
static inline void cr4_clear_bits(unsigned long mask)
{
- unsigned long cr4;
+ unsigned long cr4, flags;
+ local_irq_save(flags);
cr4 = this_cpu_read(cpu_tlbstate.cr4);
- if ((cr4 & ~mask) != cr4) {
- cr4 &= ~mask;
- this_cpu_write(cpu_tlbstate.cr4, cr4);
- __write_cr4(cr4);
- }
+ if ((cr4 & ~mask) != cr4)
+ __cr4_set(cr4 & ~mask);
+ local_irq_restore(flags);
}
-static inline void cr4_toggle_bits(unsigned long mask)
+static inline void cr4_toggle_bits_irqsoff(unsigned long mask)
{
unsigned long cr4;
cr4 = this_cpu_read(cpu_tlbstate.cr4);
- cr4 ^= mask;
- this_cpu_write(cpu_tlbstate.cr4, cr4);
- __write_cr4(cr4);
+ __cr4_set(cr4 ^ mask);
}
/* Read the CR4 shadow. */
@@ -216,6 +291,14 @@ static inline unsigned long cr4_read_shadow(void)
}
/*
+ * Mark all other ASIDs as invalid, preserves the current.
+ */
+static inline void invalidate_other_asid(void)
+{
+ this_cpu_write(cpu_tlbstate.invalidate_other, true);
+}
+
+/*
* Save some of cr4 feature set we're using (e.g. Pentium 4MB
* enable and PPro Global page enable), so that any CPU's that boot
* up after us can get the correct flags. This should only be used
@@ -234,37 +317,63 @@ static inline void cr4_set_bits_and_update_boot(unsigned long mask)
extern void initialize_tlbstate_and_flush(void);
-static inline void __native_flush_tlb(void)
+/*
+ * Given an ASID, flush the corresponding user ASID. We can delay this
+ * until the next time we switch to it.
+ *
+ * See SWITCH_TO_USER_CR3.
+ */
+static inline void invalidate_user_asid(u16 asid)
{
+ /* There is no user ASID if address space separation is off */
+ if (!IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION))
+ return;
+
/*
- * If current->mm == NULL then we borrow a mm which may change during a
- * task switch and therefore we must not be preempted while we write CR3
- * back:
+ * We only have a single ASID if PCID is off and the CR3
+ * write will have flushed it.
*/
- preempt_disable();
- native_write_cr3(__native_read_cr3());
- preempt_enable();
+ if (!cpu_feature_enabled(X86_FEATURE_PCID))
+ return;
+
+ if (!static_cpu_has(X86_FEATURE_PTI))
+ return;
+
+ __set_bit(kern_pcid(asid),
+ (unsigned long *)this_cpu_ptr(&cpu_tlbstate.user_pcid_flush_mask));
}
-static inline void __native_flush_tlb_global_irq_disabled(void)
+/*
+ * flush the entire current user mapping
+ */
+static inline void __native_flush_tlb(void)
{
- unsigned long cr4;
+ /*
+ * Preemption or interrupts must be disabled to protect the access
+ * to the per CPU variable and to prevent being preempted between
+ * read_cr3() and write_cr3().
+ */
+ WARN_ON_ONCE(preemptible());
- cr4 = this_cpu_read(cpu_tlbstate.cr4);
- /* clear PGE */
- native_write_cr4(cr4 & ~X86_CR4_PGE);
- /* write old PGE again and flush TLBs */
- native_write_cr4(cr4);
+ invalidate_user_asid(this_cpu_read(cpu_tlbstate.loaded_mm_asid));
+
+ /* If current->mm == NULL then the read_cr3() "borrows" an mm */
+ native_write_cr3(__native_read_cr3());
}
+/*
+ * flush everything
+ */
static inline void __native_flush_tlb_global(void)
{
- unsigned long flags;
+ unsigned long cr4, flags;
if (static_cpu_has(X86_FEATURE_INVPCID)) {
/*
* Using INVPCID is considerably faster than a pair of writes
* to CR4 sandwiched inside an IRQ flag save/restore.
+ *
+ * Note, this works with CR4.PCIDE=0 or 1.
*/
invpcid_flush_all();
return;
@@ -277,36 +386,69 @@ static inline void __native_flush_tlb_global(void)
*/
raw_local_irq_save(flags);
- __native_flush_tlb_global_irq_disabled();
+ cr4 = this_cpu_read(cpu_tlbstate.cr4);
+ /* toggle PGE */
+ native_write_cr4(cr4 ^ X86_CR4_PGE);
+ /* write old PGE again and flush TLBs */
+ native_write_cr4(cr4);
raw_local_irq_restore(flags);
}
+/*
+ * flush one page in the user mapping
+ */
static inline void __native_flush_tlb_single(unsigned long addr)
{
+ u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
+
asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
+
+ if (!static_cpu_has(X86_FEATURE_PTI))
+ return;
+
+ /*
+ * Some platforms #GP if we call invpcid(type=1/2) before CR4.PCIDE=1.
+ * Just use invalidate_user_asid() in case we are called early.
+ */
+ if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE))
+ invalidate_user_asid(loaded_mm_asid);
+ else
+ invpcid_flush_one(user_pcid(loaded_mm_asid), addr);
}
+/*
+ * flush everything
+ */
static inline void __flush_tlb_all(void)
{
- if (boot_cpu_has(X86_FEATURE_PGE))
+ if (boot_cpu_has(X86_FEATURE_PGE)) {
__flush_tlb_global();
- else
+ } else {
+ /*
+ * !PGE -> !PCID (setup_pcid()), thus every flush is total.
+ */
__flush_tlb();
-
- /*
- * Note: if we somehow had PCID but not PGE, then this wouldn't work --
- * we'd end up flushing kernel translations for the current ASID but
- * we might fail to flush kernel translations for other cached ASIDs.
- *
- * To avoid this issue, we force PCID off if PGE is off.
- */
+ }
}
+/*
+ * flush one page in the kernel mapping
+ */
static inline void __flush_tlb_one(unsigned long addr)
{
count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE);
__flush_tlb_single(addr);
+
+ if (!static_cpu_has(X86_FEATURE_PTI))
+ return;
+
+ /*
+ * __flush_tlb_single() will have cleared the TLB entry for this ASID,
+ * but since kernel space is replicated across all, we must also
+ * invalidate all others.
+ */
+ invalidate_other_asid();
}
#define TLB_FLUSH_ALL -1UL
@@ -367,6 +509,17 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long a)
void native_flush_tlb_others(const struct cpumask *cpumask,
const struct flush_tlb_info *info);
+static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
+{
+ /*
+ * Bump the generation count. This also serves as a full barrier
+ * that synchronizes with switch_mm(): callers are required to order
+ * their read of mm_cpumask after their writes to the paging
+ * structures.
+ */
+ return atomic64_inc_return(&mm->context.tlb_gen);
+}
+
static inline void arch_tlbbatch_add_mm(struct arch_tlbflush_unmap_batch *batch,
struct mm_struct *mm)
{
diff --git a/arch/x86/include/asm/trace/irq_vectors.h b/arch/x86/include/asm/trace/irq_vectors.h
index 84b9ec0c1bc0..22647a642e98 100644
--- a/arch/x86/include/asm/trace/irq_vectors.h
+++ b/arch/x86/include/asm/trace/irq_vectors.h
@@ -283,34 +283,34 @@ TRACE_EVENT(vector_alloc_managed,
DECLARE_EVENT_CLASS(vector_activate,
TP_PROTO(unsigned int irq, bool is_managed, bool can_reserve,
- bool early),
+ bool reserve),
- TP_ARGS(irq, is_managed, can_reserve, early),
+ TP_ARGS(irq, is_managed, can_reserve, reserve),
TP_STRUCT__entry(
__field( unsigned int, irq )
__field( bool, is_managed )
__field( bool, can_reserve )
- __field( bool, early )
+ __field( bool, reserve )
),
TP_fast_assign(
__entry->irq = irq;
__entry->is_managed = is_managed;
__entry->can_reserve = can_reserve;
- __entry->early = early;
+ __entry->reserve = reserve;
),
- TP_printk("irq=%u is_managed=%d can_reserve=%d early=%d",
+ TP_printk("irq=%u is_managed=%d can_reserve=%d reserve=%d",
__entry->irq, __entry->is_managed, __entry->can_reserve,
- __entry->early)
+ __entry->reserve)
);
#define DEFINE_IRQ_VECTOR_ACTIVATE_EVENT(name) \
DEFINE_EVENT_FN(vector_activate, name, \
TP_PROTO(unsigned int irq, bool is_managed, \
- bool can_reserve, bool early), \
- TP_ARGS(irq, is_managed, can_reserve, early), NULL, NULL); \
+ bool can_reserve, bool reserve), \
+ TP_ARGS(irq, is_managed, can_reserve, reserve), NULL, NULL); \
DEFINE_IRQ_VECTOR_ACTIVATE_EVENT(vector_activate);
DEFINE_IRQ_VECTOR_ACTIVATE_EVENT(vector_deactivate);
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 1fadd310ff68..31051f35cbb7 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -75,7 +75,6 @@ dotraplinkage void do_segment_not_present(struct pt_regs *, long);
dotraplinkage void do_stack_segment(struct pt_regs *, long);
#ifdef CONFIG_X86_64
dotraplinkage void do_double_fault(struct pt_regs *, long);
-asmlinkage struct pt_regs *sync_regs(struct pt_regs *);
#endif
dotraplinkage void do_general_protection(struct pt_regs *, long);
dotraplinkage void do_page_fault(struct pt_regs *, unsigned long);
diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h
index e9cc6fe1fc6f..c1688c2d0a12 100644
--- a/arch/x86/include/asm/unwind.h
+++ b/arch/x86/include/asm/unwind.h
@@ -7,6 +7,9 @@
#include <asm/ptrace.h>
#include <asm/stacktrace.h>
+#define IRET_FRAME_OFFSET (offsetof(struct pt_regs, ip))
+#define IRET_FRAME_SIZE (sizeof(struct pt_regs) - IRET_FRAME_OFFSET)
+
struct unwind_state {
struct stack_info stack_info;
unsigned long stack_mask;
@@ -52,6 +55,10 @@ void unwind_start(struct unwind_state *state, struct task_struct *task,
}
#if defined(CONFIG_UNWINDER_ORC) || defined(CONFIG_UNWINDER_FRAME_POINTER)
+/*
+ * WARNING: The entire pt_regs may not be safe to dereference. In some cases,
+ * only the iret frame registers are accessible. Use with caution!
+ */
static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
{
if (unwind_done(state))
diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h
index d9a7c659009c..b986b2ca688a 100644
--- a/arch/x86/include/asm/vsyscall.h
+++ b/arch/x86/include/asm/vsyscall.h
@@ -7,6 +7,7 @@
#ifdef CONFIG_X86_VSYSCALL_EMULATION
extern void map_vsyscall(void);
+extern void set_vsyscall_pgtable_user_bits(pgd_t *root);
/*
* Called on instruction fetch fault in vsyscall page.
diff --git a/arch/x86/include/uapi/asm/Kbuild b/arch/x86/include/uapi/asm/Kbuild
index da1489cb64dc..1e901e421f2d 100644
--- a/arch/x86/include/uapi/asm/Kbuild
+++ b/arch/x86/include/uapi/asm/Kbuild
@@ -1,6 +1,7 @@
# UAPI Header export list
include include/uapi/asm-generic/Kbuild.asm
+generic-y += bpf_perf_event.h
generated-y += unistd_32.h
generated-y += unistd_64.h
generated-y += unistd_x32.h
diff --git a/arch/x86/include/uapi/asm/processor-flags.h b/arch/x86/include/uapi/asm/processor-flags.h
index 7e1e730396ae..bcba3c643e63 100644
--- a/arch/x86/include/uapi/asm/processor-flags.h
+++ b/arch/x86/include/uapi/asm/processor-flags.h
@@ -78,7 +78,12 @@
#define X86_CR3_PWT _BITUL(X86_CR3_PWT_BIT)
#define X86_CR3_PCD_BIT 4 /* Page Cache Disable */
#define X86_CR3_PCD _BITUL(X86_CR3_PCD_BIT)
-#define X86_CR3_PCID_MASK _AC(0x00000fff,UL) /* PCID Mask */
+
+#define X86_CR3_PCID_BITS 12
+#define X86_CR3_PCID_MASK (_AC((1UL << X86_CR3_PCID_BITS) - 1, UL))
+
+#define X86_CR3_PCID_NOFLUSH_BIT 63 /* Preserve old PCID */
+#define X86_CR3_PCID_NOFLUSH _BITULL(X86_CR3_PCID_NOFLUSH_BIT)
/*
* Intel CPU features in CR4
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 6e272f3ea984..880441f24146 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2626,11 +2626,13 @@ static int __init apic_set_verbosity(char *arg)
apic_verbosity = APIC_DEBUG;
else if (strcmp("verbose", arg) == 0)
apic_verbosity = APIC_VERBOSE;
+#ifdef CONFIG_X86_64
else {
pr_warning("APIC Verbosity level %s not recognised"
" use apic=verbose or apic=debug\n", arg);
return -EINVAL;
}
+#endif
return 0;
}
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index aa85690e9b64..25a87028cb3f 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -151,7 +151,7 @@ static struct apic apic_flat __ro_after_init = {
.apic_id_valid = default_apic_id_valid,
.apic_id_registered = flat_apic_id_registered,
- .irq_delivery_mode = dest_LowestPrio,
+ .irq_delivery_mode = dest_Fixed,
.irq_dest_mode = 1, /* logical */
.disable_esr = 0,
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index 7b659c4480c9..5078b5ce63a7 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -110,7 +110,7 @@ struct apic apic_noop __ro_after_init = {
.apic_id_valid = default_apic_id_valid,
.apic_id_registered = noop_apic_id_registered,
- .irq_delivery_mode = dest_LowestPrio,
+ .irq_delivery_mode = dest_Fixed,
/* logical delivery broadcast to all CPUs: */
.irq_dest_mode = 1,
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 201579dc5242..8a7963421460 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2988,7 +2988,7 @@ void mp_irqdomain_free(struct irq_domain *domain, unsigned int virq,
}
int mp_irqdomain_activate(struct irq_domain *domain,
- struct irq_data *irq_data, bool early)
+ struct irq_data *irq_data, bool reserve)
{
unsigned long flags;
diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c
index 9b18be764422..ce503c99f5c4 100644
--- a/arch/x86/kernel/apic/msi.c
+++ b/arch/x86/kernel/apic/msi.c
@@ -39,17 +39,13 @@ static void irq_msi_compose_msg(struct irq_data *data, struct msi_msg *msg)
((apic->irq_dest_mode == 0) ?
MSI_ADDR_DEST_MODE_PHYSICAL :
MSI_ADDR_DEST_MODE_LOGICAL) |
- ((apic->irq_delivery_mode != dest_LowestPrio) ?
- MSI_ADDR_REDIRECTION_CPU :
- MSI_ADDR_REDIRECTION_LOWPRI) |
+ MSI_ADDR_REDIRECTION_CPU |
MSI_ADDR_DEST_ID(cfg->dest_apicid);
msg->data =
MSI_DATA_TRIGGER_EDGE |
MSI_DATA_LEVEL_ASSERT |
- ((apic->irq_delivery_mode != dest_LowestPrio) ?
- MSI_DATA_DELIVERY_FIXED :
- MSI_DATA_DELIVERY_LOWPRI) |
+ MSI_DATA_DELIVERY_FIXED |
MSI_DATA_VECTOR(cfg->vector);
}
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index fa22017de806..02e8acb134f8 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -105,7 +105,7 @@ static struct apic apic_default __ro_after_init = {
.apic_id_valid = default_apic_id_valid,
.apic_id_registered = default_apic_id_registered,
- .irq_delivery_mode = dest_LowestPrio,
+ .irq_delivery_mode = dest_Fixed,
/* logical delivery broadcast to all CPUs: */
.irq_dest_mode = 1,
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 6a823a25eaff..f8b03bb8e725 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -184,6 +184,7 @@ static void reserve_irq_vector_locked(struct irq_data *irqd)
irq_matrix_reserve(vector_matrix);
apicd->can_reserve = true;
apicd->has_reserved = true;
+ irqd_set_can_reserve(irqd);
trace_vector_reserve(irqd->irq, 0);
vector_assign_managed_shutdown(irqd);
}
@@ -368,8 +369,18 @@ static int activate_reserved(struct irq_data *irqd)
int ret;
ret = assign_irq_vector_any_locked(irqd);
- if (!ret)
+ if (!ret) {
apicd->has_reserved = false;
+ /*
+ * Core might have disabled reservation mode after
+ * allocating the irq descriptor. Ideally this should
+ * happen before allocation time, but that would require
+ * completely convoluted ways of transporting that
+ * information.
+ */
+ if (!irqd_can_reserve(irqd))
+ apicd->can_reserve = false;
+ }
return ret;
}
@@ -398,21 +409,21 @@ static int activate_managed(struct irq_data *irqd)
}
static int x86_vector_activate(struct irq_domain *dom, struct irq_data *irqd,
- bool early)
+ bool reserve)
{
struct apic_chip_data *apicd = apic_chip_data(irqd);
unsigned long flags;
int ret = 0;
trace_vector_activate(irqd->irq, apicd->is_managed,
- apicd->can_reserve, early);
+ apicd->can_reserve, reserve);
/* Nothing to do for fixed assigned vectors */
if (!apicd->can_reserve && !apicd->is_managed)
return 0;
raw_spin_lock_irqsave(&vector_lock, flags);
- if (early || irqd_is_managed_and_shutdown(irqd))
+ if (reserve || irqd_is_managed_and_shutdown(irqd))
vector_assign_managed_shutdown(irqd);
else if (apicd->is_managed)
ret = activate_managed(irqd);
@@ -478,6 +489,7 @@ static bool vector_configure_legacy(unsigned int virq, struct irq_data *irqd,
} else {
/* Release the vector */
apicd->can_reserve = true;
+ irqd_set_can_reserve(irqd);
clear_irq_vector(irqd);
realloc = true;
}
@@ -542,8 +554,8 @@ error:
}
#ifdef CONFIG_GENERIC_IRQ_DEBUGFS
-void x86_vector_debug_show(struct seq_file *m, struct irq_domain *d,
- struct irq_data *irqd, int ind)
+static void x86_vector_debug_show(struct seq_file *m, struct irq_domain *d,
+ struct irq_data *irqd, int ind)
{
unsigned int cpu, vector, prev_cpu, prev_vector;
struct apic_chip_data *apicd;
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index 622f13ca8a94..8b04234e010b 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -184,7 +184,7 @@ static struct apic apic_x2apic_cluster __ro_after_init = {
.apic_id_valid = x2apic_apic_id_valid,
.apic_id_registered = x2apic_apic_id_registered,
- .irq_delivery_mode = dest_LowestPrio,
+ .irq_delivery_mode = dest_Fixed,
.irq_dest_mode = 1, /* logical */
.disable_esr = 0,
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 8ea78275480d..76417a9aab73 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -17,6 +17,7 @@
#include <asm/sigframe.h>
#include <asm/bootparam.h>
#include <asm/suspend.h>
+#include <asm/tlbflush.h>
#ifdef CONFIG_XEN
#include <xen/interface/xen.h>
@@ -93,4 +94,13 @@ void common(void) {
BLANK();
DEFINE(PTREGS_SIZE, sizeof(struct pt_regs));
+
+ /* TLB state for the entry code */
+ OFFSET(TLB_STATE_user_pcid_flush_mask, tlb_state, user_pcid_flush_mask);
+
+ /* Layout info for cpu_entry_area */
+ OFFSET(CPU_ENTRY_AREA_tss, cpu_entry_area, tss);
+ OFFSET(CPU_ENTRY_AREA_entry_trampoline, cpu_entry_area, entry_trampoline);
+ OFFSET(CPU_ENTRY_AREA_entry_stack, cpu_entry_area, entry_stack_page);
+ DEFINE(SIZEOF_entry_stack, sizeof(struct entry_stack));
}
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index dedf428b20b6..fa1261eefa16 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -47,13 +47,8 @@ void foo(void)
BLANK();
/* Offset from the sysenter stack to tss.sp0 */
- DEFINE(TSS_sysenter_sp0, offsetof(struct tss_struct, x86_tss.sp0) -
- offsetofend(struct tss_struct, SYSENTER_stack));
-
- /* Offset from cpu_tss to SYSENTER_stack */
- OFFSET(CPU_TSS_SYSENTER_stack, tss_struct, SYSENTER_stack);
- /* Size of SYSENTER_stack */
- DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack));
+ DEFINE(TSS_sysenter_sp0, offsetof(struct cpu_entry_area, tss.x86_tss.sp0) -
+ offsetofend(struct cpu_entry_area, entry_stack_page.stack));
#ifdef CONFIG_CC_STACKPROTECTOR
BLANK();
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 630212fa9b9d..bf51e51d808d 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -23,6 +23,9 @@ int main(void)
#ifdef CONFIG_PARAVIRT
OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64);
OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs);
+#ifdef CONFIG_DEBUG_ENTRY
+ OFFSET(PV_IRQ_save_fl, pv_irq_ops, save_fl);
+#endif
BLANK();
#endif
@@ -63,6 +66,7 @@ int main(void)
OFFSET(TSS_ist, tss_struct, x86_tss.ist);
OFFSET(TSS_sp0, tss_struct, x86_tss.sp0);
+ OFFSET(TSS_sp1, tss_struct, x86_tss.sp1);
BLANK();
#ifdef CONFIG_CC_STACKPROTECTOR
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index d58184b7cd44..bcb75dc97d44 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -804,8 +804,11 @@ static void init_amd(struct cpuinfo_x86 *c)
case 0x17: init_amd_zn(c); break;
}
- /* Enable workaround for FXSAVE leak */
- if (c->x86 >= 6)
+ /*
+ * Enable workaround for FXSAVE leak on CPUs
+ * without a XSaveErPtr feature
+ */
+ if ((c->x86 >= 6) && (!cpu_has(c, X86_FEATURE_XSAVEERPTR)))
set_cpu_bug(c, X86_BUG_FXSAVE_LEAK);
cpu_detect_cache_sizes(c);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index fa998ca8aa5a..c47de4ebf63a 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -476,8 +476,8 @@ static const char *table_lookup_model(struct cpuinfo_x86 *c)
return NULL; /* Not found */
}
-__u32 cpu_caps_cleared[NCAPINTS];
-__u32 cpu_caps_set[NCAPINTS];
+__u32 cpu_caps_cleared[NCAPINTS + NBUGINTS];
+__u32 cpu_caps_set[NCAPINTS + NBUGINTS];
void load_percpu_segment(int cpu)
{
@@ -490,28 +490,23 @@ void load_percpu_segment(int cpu)
load_stack_canary_segment();
}
-/* Setup the fixmap mapping only once per-processor */
-static inline void setup_fixmap_gdt(int cpu)
-{
-#ifdef CONFIG_X86_64
- /* On 64-bit systems, we use a read-only fixmap GDT. */
- pgprot_t prot = PAGE_KERNEL_RO;
-#else
- /*
- * On native 32-bit systems, the GDT cannot be read-only because
- * our double fault handler uses a task gate, and entering through
- * a task gate needs to change an available TSS to busy. If the GDT
- * is read-only, that will triple fault.
- *
- * On Xen PV, the GDT must be read-only because the hypervisor requires
- * it.
- */
- pgprot_t prot = boot_cpu_has(X86_FEATURE_XENPV) ?
- PAGE_KERNEL_RO : PAGE_KERNEL;
+#ifdef CONFIG_X86_32
+/* The 32-bit entry code needs to find cpu_entry_area. */
+DEFINE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
#endif
- __set_fixmap(get_cpu_gdt_ro_index(cpu), get_cpu_gdt_paddr(cpu), prot);
-}
+#ifdef CONFIG_X86_64
+/*
+ * Special IST stacks which the CPU switches to when it calls
+ * an IST-marked descriptor entry. Up to 7 stacks (hardware
+ * limit), all of them are 4K, except the debug stack which
+ * is 8K.
+ */
+static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
+ [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
+ [DEBUG_STACK - 1] = DEBUG_STKSZ
+};
+#endif
/* Load the original GDT from the per-cpu structure */
void load_direct_gdt(int cpu)
@@ -747,7 +742,7 @@ static void apply_forced_caps(struct cpuinfo_x86 *c)
{
int i;
- for (i = 0; i < NCAPINTS; i++) {
+ for (i = 0; i < NCAPINTS + NBUGINTS; i++) {
c->x86_capability[i] &= ~cpu_caps_cleared[i];
c->x86_capability[i] |= cpu_caps_set[i];
}
@@ -927,6 +922,10 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
}
setup_force_cpu_cap(X86_FEATURE_ALWAYS);
+
+ /* Assume for now that ALL x86 CPUs are insecure */
+ setup_force_cpu_bug(X86_BUG_CPU_INSECURE);
+
fpu__init_system(c);
#ifdef CONFIG_X86_32
@@ -1250,7 +1249,7 @@ void enable_sep_cpu(void)
return;
cpu = get_cpu();
- tss = &per_cpu(cpu_tss, cpu);
+ tss = &per_cpu(cpu_tss_rw, cpu);
/*
* We cache MSR_IA32_SYSENTER_CS's value in the TSS's ss1 field --
@@ -1259,11 +1258,7 @@ void enable_sep_cpu(void)
tss->x86_tss.ss1 = __KERNEL_CS;
wrmsr(MSR_IA32_SYSENTER_CS, tss->x86_tss.ss1, 0);
-
- wrmsr(MSR_IA32_SYSENTER_ESP,
- (unsigned long)tss + offsetofend(struct tss_struct, SYSENTER_stack),
- 0);
-
+ wrmsr(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_entry_stack(cpu) + 1), 0);
wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0);
put_cpu();
@@ -1357,25 +1352,22 @@ DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1;
DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
EXPORT_PER_CPU_SYMBOL(__preempt_count);
-/*
- * Special IST stacks which the CPU switches to when it calls
- * an IST-marked descriptor entry. Up to 7 stacks (hardware
- * limit), all of them are 4K, except the debug stack which
- * is 8K.
- */
-static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
- [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
- [DEBUG_STACK - 1] = DEBUG_STKSZ
-};
-
-static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
- [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
-
/* May not be marked __init: used by software suspend */
void syscall_init(void)
{
+ extern char _entry_trampoline[];
+ extern char entry_SYSCALL_64_trampoline[];
+
+ int cpu = smp_processor_id();
+ unsigned long SYSCALL64_entry_trampoline =
+ (unsigned long)get_cpu_entry_area(cpu)->entry_trampoline +
+ (entry_SYSCALL_64_trampoline - _entry_trampoline);
+
wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS);
- wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64);
+ if (static_cpu_has(X86_FEATURE_PTI))
+ wrmsrl(MSR_LSTAR, SYSCALL64_entry_trampoline);
+ else
+ wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64);
#ifdef CONFIG_IA32_EMULATION
wrmsrl(MSR_CSTAR, (unsigned long)entry_SYSCALL_compat);
@@ -1386,7 +1378,7 @@ void syscall_init(void)
* AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit).
*/
wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
- wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
+ wrmsrl_safe(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_entry_stack(cpu) + 1));
wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat);
#else
wrmsrl(MSR_CSTAR, (unsigned long)ignore_sysret);
@@ -1530,7 +1522,7 @@ void cpu_init(void)
if (cpu)
load_ucode_ap();
- t = &per_cpu(cpu_tss, cpu);
+ t = &per_cpu(cpu_tss_rw, cpu);
oist = &per_cpu(orig_ist, cpu);
#ifdef CONFIG_NUMA
@@ -1569,7 +1561,7 @@ void cpu_init(void)
* set up and load the per-CPU TSS
*/
if (!oist->ist[0]) {
- char *estacks = per_cpu(exception_stacks, cpu);
+ char *estacks = get_cpu_entry_area(cpu)->exception_stacks;
for (v = 0; v < N_EXCEPTION_STACKS; v++) {
estacks += exception_stack_sizes[v];
@@ -1580,7 +1572,7 @@ void cpu_init(void)
}
}
- t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
+ t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
/*
* <= is required because the CPU will access up to
@@ -1596,11 +1588,12 @@ void cpu_init(void)
enter_lazy_tlb(&init_mm, me);
/*
- * Initialize the TSS. Don't bother initializing sp0, as the initial
- * task never enters user mode.
+ * Initialize the TSS. sp0 points to the entry trampoline stack
+ * regardless of what task is running.
*/
- set_tss_desc(cpu, t);
+ set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
load_TR_desc();
+ load_sp0((unsigned long)(cpu_entry_stack(cpu) + 1));
load_mm_ldt(&init_mm);
@@ -1612,7 +1605,6 @@ void cpu_init(void)
if (is_uv_system())
uv_cpu_init();
- setup_fixmap_gdt(cpu);
load_fixmap_gdt(cpu);
}
@@ -1622,7 +1614,7 @@ void cpu_init(void)
{
int cpu = smp_processor_id();
struct task_struct *curr = current;
- struct tss_struct *t = &per_cpu(cpu_tss, cpu);
+ struct tss_struct *t = &per_cpu(cpu_tss_rw, cpu);
wait_for_master_cpu(cpu);
@@ -1657,12 +1649,12 @@ void cpu_init(void)
* Initialize the TSS. Don't bother initializing sp0, as the initial
* task never enters user mode.
*/
- set_tss_desc(cpu, t);
+ set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
load_TR_desc();
load_mm_ldt(&init_mm);
- t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
+ t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
#ifdef CONFIG_DOUBLEFAULT
/* Set up doublefault TSS pointer in the GDT */
@@ -1674,7 +1666,6 @@ void cpu_init(void)
fpu__init_cpu();
- setup_fixmap_gdt(cpu);
load_fixmap_gdt(cpu);
}
#endif
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index c6daec4bdba5..330b8462d426 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -470,6 +470,7 @@ static unsigned int verify_patch_size(u8 family, u32 patch_size,
#define F14H_MPB_MAX_SIZE 1824
#define F15H_MPB_MAX_SIZE 4096
#define F16H_MPB_MAX_SIZE 3458
+#define F17H_MPB_MAX_SIZE 3200
switch (family) {
case 0x14:
@@ -481,6 +482,9 @@ static unsigned int verify_patch_size(u8 family, u32 patch_size,
case 0x16:
max_size = F16H_MPB_MAX_SIZE;
break;
+ case 0x17:
+ max_size = F17H_MPB_MAX_SIZE;
+ break;
default:
max_size = F1XH_MPB_MAX_SIZE;
break;
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 7dbcb7adf797..8ccdca6d3f9e 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -565,15 +565,6 @@ static void print_ucode(struct ucode_cpu_info *uci)
}
#else
-/*
- * Flush global tlb. We only do this in x86_64 where paging has been enabled
- * already and PGE should be enabled as well.
- */
-static inline void flush_tlb_early(void)
-{
- __native_flush_tlb_global_irq_disabled();
-}
-
static inline void print_ucode(struct ucode_cpu_info *uci)
{
struct microcode_intel *mc;
@@ -602,10 +593,6 @@ static int apply_microcode_early(struct ucode_cpu_info *uci, bool early)
if (rev != mc->hdr.rev)
return -1;
-#ifdef CONFIG_X86_64
- /* Flush global tlb. This is precaution. */
- flush_tlb_early();
-#endif
uci->cpu_sig.rev = rev;
if (early)
diff --git a/arch/x86/kernel/doublefault.c b/arch/x86/kernel/doublefault.c
index 0e662c55ae90..0b8cedb20d6d 100644
--- a/arch/x86/kernel/doublefault.c
+++ b/arch/x86/kernel/doublefault.c
@@ -50,25 +50,23 @@ static void doublefault_fn(void)
cpu_relax();
}
-struct tss_struct doublefault_tss __cacheline_aligned = {
- .x86_tss = {
- .sp0 = STACK_START,
- .ss0 = __KERNEL_DS,
- .ldt = 0,
- .io_bitmap_base = INVALID_IO_BITMAP_OFFSET,
-
- .ip = (unsigned long) doublefault_fn,
- /* 0x2 bit is always set */
- .flags = X86_EFLAGS_SF | 0x2,
- .sp = STACK_START,
- .es = __USER_DS,
- .cs = __KERNEL_CS,
- .ss = __KERNEL_DS,
- .ds = __USER_DS,
- .fs = __KERNEL_PERCPU,
-
- .__cr3 = __pa_nodebug(swapper_pg_dir),
- }
+struct x86_hw_tss doublefault_tss __cacheline_aligned = {
+ .sp0 = STACK_START,
+ .ss0 = __KERNEL_DS,
+ .ldt = 0,
+ .io_bitmap_base = INVALID_IO_BITMAP_OFFSET,
+
+ .ip = (unsigned long) doublefault_fn,
+ /* 0x2 bit is always set */
+ .flags = X86_EFLAGS_SF | 0x2,
+ .sp = STACK_START,
+ .es = __USER_DS,
+ .cs = __KERNEL_CS,
+ .ss = __KERNEL_DS,
+ .ds = __USER_DS,
+ .fs = __KERNEL_PERCPU,
+
+ .__cr3 = __pa_nodebug(swapper_pg_dir),
};
/* dummy for do_double_fault() call */
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index f13b4c00a5de..5fa110699ed2 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -18,6 +18,7 @@
#include <linux/nmi.h>
#include <linux/sysfs.h>
+#include <asm/cpu_entry_area.h>
#include <asm/stacktrace.h>
#include <asm/unwind.h>
@@ -43,6 +44,24 @@ bool in_task_stack(unsigned long *stack, struct task_struct *task,
return true;
}
+bool in_entry_stack(unsigned long *stack, struct stack_info *info)
+{
+ struct entry_stack *ss = cpu_entry_stack(smp_processor_id());
+
+ void *begin = ss;
+ void *end = ss + 1;
+
+ if ((void *)stack < begin || (void *)stack >= end)
+ return false;
+
+ info->type = STACK_TYPE_ENTRY;
+ info->begin = begin;
+ info->end = end;
+ info->next_sp = NULL;
+
+ return true;
+}
+
static void printk_stack_address(unsigned long address, int reliable,
char *log_lvl)
{
@@ -50,6 +69,28 @@ static void printk_stack_address(unsigned long address, int reliable,
printk("%s %s%pB\n", log_lvl, reliable ? "" : "? ", (void *)address);
}
+void show_iret_regs(struct pt_regs *regs)
+{
+ printk(KERN_DEFAULT "RIP: %04x:%pS\n", (int)regs->cs, (void *)regs->ip);
+ printk(KERN_DEFAULT "RSP: %04x:%016lx EFLAGS: %08lx", (int)regs->ss,
+ regs->sp, regs->flags);
+}
+
+static void show_regs_safe(struct stack_info *info, struct pt_regs *regs)
+{
+ if (on_stack(info, regs, sizeof(*regs)))
+ __show_regs(regs, 0);
+ else if (on_stack(info, (void *)regs + IRET_FRAME_OFFSET,
+ IRET_FRAME_SIZE)) {
+ /*
+ * When an interrupt or exception occurs in entry code, the
+ * full pt_regs might not have been saved yet. In that case
+ * just print the iret frame.
+ */
+ show_iret_regs(regs);
+ }
+}
+
void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
unsigned long *stack, char *log_lvl)
{
@@ -71,31 +112,35 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
* - task stack
* - interrupt stack
* - HW exception stacks (double fault, nmi, debug, mce)
+ * - entry stack
*
- * x86-32 can have up to three stacks:
+ * x86-32 can have up to four stacks:
* - task stack
* - softirq stack
* - hardirq stack
+ * - entry stack
*/
for (regs = NULL; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) {
const char *stack_name;
- /*
- * If we overflowed the task stack into a guard page, jump back
- * to the bottom of the usable stack.
- */
- if (task_stack_page(task) - (void *)stack < PAGE_SIZE)
- stack = task_stack_page(task);
-
- if (get_stack_info(stack, task, &stack_info, &visit_mask))
- break;
+ if (get_stack_info(stack, task, &stack_info, &visit_mask)) {
+ /*
+ * We weren't on a valid stack. It's possible that
+ * we overflowed a valid stack into a guard page.
+ * See if the next page up is valid so that we can
+ * generate some kind of backtrace if this happens.
+ */
+ stack = (unsigned long *)PAGE_ALIGN((unsigned long)stack);
+ if (get_stack_info(stack, task, &stack_info, &visit_mask))
+ break;
+ }
stack_name = stack_type_name(stack_info.type);
if (stack_name)
printk("%s <%s>\n", log_lvl, stack_name);
- if (regs && on_stack(&stack_info, regs, sizeof(*regs)))
- __show_regs(regs, 0);
+ if (regs)
+ show_regs_safe(&stack_info, regs);
/*
* Scan the stack, printing any text addresses we find. At the
@@ -119,7 +164,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
/*
* Don't print regs->ip again if it was already printed
- * by __show_regs() below.
+ * by show_regs_safe() below.
*/
if (regs && stack == &regs->ip)
goto next;
@@ -155,8 +200,8 @@ next:
/* if the frame has entry regs, print them */
regs = unwind_get_entry_regs(&state);
- if (regs && on_stack(&stack_info, regs, sizeof(*regs)))
- __show_regs(regs, 0);
+ if (regs)
+ show_regs_safe(&stack_info, regs);
}
if (stack_name)
@@ -252,11 +297,13 @@ int __die(const char *str, struct pt_regs *regs, long err)
unsigned long sp;
#endif
printk(KERN_DEFAULT
- "%s: %04lx [#%d]%s%s%s%s\n", str, err & 0xffff, ++die_counter,
+ "%s: %04lx [#%d]%s%s%s%s%s\n", str, err & 0xffff, ++die_counter,
IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : "",
IS_ENABLED(CONFIG_SMP) ? " SMP" : "",
debug_pagealloc_enabled() ? " DEBUG_PAGEALLOC" : "",
- IS_ENABLED(CONFIG_KASAN) ? " KASAN" : "");
+ IS_ENABLED(CONFIG_KASAN) ? " KASAN" : "",
+ IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION) ?
+ (boot_cpu_has(X86_FEATURE_PTI) ? " PTI" : " NOPTI") : "");
if (notify_die(DIE_OOPS, str, regs, err,
current->thread.trap_nr, SIGSEGV) == NOTIFY_STOP)
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index daefae83a3aa..04170f63e3a1 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -26,6 +26,9 @@ const char *stack_type_name(enum stack_type type)
if (type == STACK_TYPE_SOFTIRQ)
return "SOFTIRQ";
+ if (type == STACK_TYPE_ENTRY)
+ return "ENTRY_TRAMPOLINE";
+
return NULL;
}
@@ -93,6 +96,9 @@ int get_stack_info(unsigned long *stack, struct task_struct *task,
if (task != current)
goto unknown;
+ if (in_entry_stack(stack, info))
+ goto recursion_check;
+
if (in_hardirq_stack(stack, info))
goto recursion_check;
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 88ce2ffdb110..563e28d14f2c 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -37,6 +37,15 @@ const char *stack_type_name(enum stack_type type)
if (type == STACK_TYPE_IRQ)
return "IRQ";
+ if (type == STACK_TYPE_ENTRY) {
+ /*
+ * On 64-bit, we have a generic entry stack that we
+ * use for all the kernel entry points, including
+ * SYSENTER.
+ */
+ return "ENTRY_TRAMPOLINE";
+ }
+
if (type >= STACK_TYPE_EXCEPTION && type <= STACK_TYPE_EXCEPTION_LAST)
return exception_stack_names[type - STACK_TYPE_EXCEPTION];
@@ -115,6 +124,9 @@ int get_stack_info(unsigned long *stack, struct task_struct *task,
if (in_irq_stack(stack, info))
goto recursion_check;
+ if (in_entry_stack(stack, info))
+ goto recursion_check;
+
goto unknown;
recursion_check:
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 7dca675fe78d..04a625f0fcda 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -341,6 +341,27 @@ GLOBAL(early_recursion_flag)
.balign PAGE_SIZE; \
GLOBAL(name)
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+/*
+ * Each PGD needs to be 8k long and 8k aligned. We do not
+ * ever go out to userspace with these, so we do not
+ * strictly *need* the second page, but this allows us to
+ * have a single set_pgd() implementation that does not
+ * need to worry about whether it has 4k or 8k to work
+ * with.
+ *
+ * This ensures PGDs are 8k long:
+ */
+#define PTI_USER_PGD_FILL 512
+/* This ensures they are 8k-aligned: */
+#define NEXT_PGD_PAGE(name) \
+ .balign 2 * PAGE_SIZE; \
+GLOBAL(name)
+#else
+#define NEXT_PGD_PAGE(name) NEXT_PAGE(name)
+#define PTI_USER_PGD_FILL 0
+#endif
+
/* Automate the creation of 1 to 1 mapping pmd entries */
#define PMDS(START, PERM, COUNT) \
i = 0 ; \
@@ -350,13 +371,14 @@ GLOBAL(name)
.endr
__INITDATA
-NEXT_PAGE(early_top_pgt)
+NEXT_PGD_PAGE(early_top_pgt)
.fill 511,8,0
#ifdef CONFIG_X86_5LEVEL
.quad level4_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
#else
.quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
#endif
+ .fill PTI_USER_PGD_FILL,8,0
NEXT_PAGE(early_dynamic_pgts)
.fill 512*EARLY_DYNAMIC_PAGE_TABLES,8,0
@@ -364,13 +386,14 @@ NEXT_PAGE(early_dynamic_pgts)
.data
#if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH)
-NEXT_PAGE(init_top_pgt)
+NEXT_PGD_PAGE(init_top_pgt)
.quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
.org init_top_pgt + PGD_PAGE_OFFSET*8, 0
.quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
.org init_top_pgt + PGD_START_KERNEL*8, 0
/* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
.quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
+ .fill PTI_USER_PGD_FILL,8,0
NEXT_PAGE(level3_ident_pgt)
.quad level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
@@ -381,8 +404,9 @@ NEXT_PAGE(level2_ident_pgt)
*/
PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)
#else
-NEXT_PAGE(init_top_pgt)
+NEXT_PGD_PAGE(init_top_pgt)
.fill 512,8,0
+ .fill PTI_USER_PGD_FILL,8,0
#endif
#ifdef CONFIG_X86_5LEVEL
diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
index 3feb648781c4..2f723301eb58 100644
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c
@@ -67,7 +67,7 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
* because the ->io_bitmap_max value must match the bitmap
* contents:
*/
- tss = &per_cpu(cpu_tss, get_cpu());
+ tss = &per_cpu(cpu_tss_rw, get_cpu());
if (turn_on)
bitmap_clear(t->io_bitmap_ptr, from, num);
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 49cfd9fe7589..68e1867cca80 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -219,18 +219,6 @@ __visible unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
/* high bit used in ret_from_ code */
unsigned vector = ~regs->orig_ax;
- /*
- * NB: Unlike exception entries, IRQ entries do not reliably
- * handle context tracking in the low-level entry code. This is
- * because syscall entries execute briefly with IRQs on before
- * updating context tracking state, so we can take an IRQ from
- * kernel mode with CONTEXT_USER. The low-level entry code only
- * updates the context if we came from user mode, so we won't
- * switch to CONTEXT_KERNEL. We'll fix that once the syscall
- * code is cleaned up enough that we can cleanly defer enabling
- * IRQs.
- */
-
entering_irq();
/* entering_irq() tells RCU that we're not quiescent. Check it. */
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 020efbf5786b..d86e344f5b3d 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -57,10 +57,10 @@ static inline void stack_overflow_check(struct pt_regs *regs)
if (regs->sp >= estack_top && regs->sp <= estack_bottom)
return;
- WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx)\n",
+ WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx,ip:%pF)\n",
current->comm, curbase, regs->sp,
irq_stack_top, irq_stack_bottom,
- estack_top, estack_bottom);
+ estack_top, estack_bottom, (void *)regs->ip);
if (sysctl_panic_on_stackoverflow)
panic("low stack detected by irq handler - check messages\n");
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index 1c1eae961340..26d713ecad34 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -5,6 +5,11 @@
* Copyright (C) 2002 Andi Kleen
*
* This handles calls from both 32bit and 64bit mode.
+ *
+ * Lock order:
+ * contex.ldt_usr_sem
+ * mmap_sem
+ * context.lock
*/
#include <linux/errno.h>
@@ -19,6 +24,7 @@
#include <linux/uaccess.h>
#include <asm/ldt.h>
+#include <asm/tlb.h>
#include <asm/desc.h>
#include <asm/mmu_context.h>
#include <asm/syscalls.h>
@@ -42,17 +48,15 @@ static void refresh_ldt_segments(void)
#endif
}
-/* context.lock is held for us, so we don't need any locking. */
+/* context.lock is held by the task which issued the smp function call */
static void flush_ldt(void *__mm)
{
struct mm_struct *mm = __mm;
- mm_context_t *pc;
if (this_cpu_read(cpu_tlbstate.loaded_mm) != mm)
return;
- pc = &mm->context;
- set_ldt(pc->ldt->entries, pc->ldt->nr_entries);
+ load_mm_ldt(mm);
refresh_ldt_segments();
}
@@ -89,25 +93,143 @@ static struct ldt_struct *alloc_ldt_struct(unsigned int num_entries)
return NULL;
}
+ /* The new LDT isn't aliased for PTI yet. */
+ new_ldt->slot = -1;
+
new_ldt->nr_entries = num_entries;
return new_ldt;
}
+/*
+ * If PTI is enabled, this maps the LDT into the kernelmode and
+ * usermode tables for the given mm.
+ *
+ * There is no corresponding unmap function. Even if the LDT is freed, we
+ * leave the PTEs around until the slot is reused or the mm is destroyed.
+ * This is harmless: the LDT is always in ordinary memory, and no one will
+ * access the freed slot.
+ *
+ * If we wanted to unmap freed LDTs, we'd also need to do a flush to make
+ * it useful, and the flush would slow down modify_ldt().
+ */
+static int
+map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot)
+{
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ bool is_vmalloc, had_top_level_entry;
+ unsigned long va;
+ spinlock_t *ptl;
+ pgd_t *pgd;
+ int i;
+
+ if (!static_cpu_has(X86_FEATURE_PTI))
+ return 0;
+
+ /*
+ * Any given ldt_struct should have map_ldt_struct() called at most
+ * once.
+ */
+ WARN_ON(ldt->slot != -1);
+
+ /*
+ * Did we already have the top level entry allocated? We can't
+ * use pgd_none() for this because it doens't do anything on
+ * 4-level page table kernels.
+ */
+ pgd = pgd_offset(mm, LDT_BASE_ADDR);
+ had_top_level_entry = (pgd->pgd != 0);
+
+ is_vmalloc = is_vmalloc_addr(ldt->entries);
+
+ for (i = 0; i * PAGE_SIZE < ldt->nr_entries * LDT_ENTRY_SIZE; i++) {
+ unsigned long offset = i << PAGE_SHIFT;
+ const void *src = (char *)ldt->entries + offset;
+ unsigned long pfn;
+ pte_t pte, *ptep;
+
+ va = (unsigned long)ldt_slot_va(slot) + offset;
+ pfn = is_vmalloc ? vmalloc_to_pfn(src) :
+ page_to_pfn(virt_to_page(src));
+ /*
+ * Treat the PTI LDT range as a *userspace* range.
+ * get_locked_pte() will allocate all needed pagetables
+ * and account for them in this mm.
+ */
+ ptep = get_locked_pte(mm, va, &ptl);
+ if (!ptep)
+ return -ENOMEM;
+ /*
+ * Map it RO so the easy to find address is not a primary
+ * target via some kernel interface which misses a
+ * permission check.
+ */
+ pte = pfn_pte(pfn, __pgprot(__PAGE_KERNEL_RO & ~_PAGE_GLOBAL));
+ set_pte_at(mm, va, ptep, pte);
+ pte_unmap_unlock(ptep, ptl);
+ }
+
+ if (mm->context.ldt) {
+ /*
+ * We already had an LDT. The top-level entry should already
+ * have been allocated and synchronized with the usermode
+ * tables.
+ */
+ WARN_ON(!had_top_level_entry);
+ if (static_cpu_has(X86_FEATURE_PTI))
+ WARN_ON(!kernel_to_user_pgdp(pgd)->pgd);
+ } else {
+ /*
+ * This is the first time we're mapping an LDT for this process.
+ * Sync the pgd to the usermode tables.
+ */
+ WARN_ON(had_top_level_entry);
+ if (static_cpu_has(X86_FEATURE_PTI)) {
+ WARN_ON(kernel_to_user_pgdp(pgd)->pgd);
+ set_pgd(kernel_to_user_pgdp(pgd), *pgd);
+ }
+ }
+
+ va = (unsigned long)ldt_slot_va(slot);
+ flush_tlb_mm_range(mm, va, va + LDT_SLOT_STRIDE, 0);
+
+ ldt->slot = slot;
+#endif
+ return 0;
+}
+
+static void free_ldt_pgtables(struct mm_struct *mm)
+{
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ struct mmu_gather tlb;
+ unsigned long start = LDT_BASE_ADDR;
+ unsigned long end = start + (1UL << PGDIR_SHIFT);
+
+ if (!static_cpu_has(X86_FEATURE_PTI))
+ return;
+
+ tlb_gather_mmu(&tlb, mm, start, end);
+ free_pgd_range(&tlb, start, end, start, end);
+ tlb_finish_mmu(&tlb, start, end);
+#endif
+}
+
/* After calling this, the LDT is immutable. */
static void finalize_ldt_struct(struct ldt_struct *ldt)
{
paravirt_alloc_ldt(ldt->entries, ldt->nr_entries);
}
-/* context.lock is held */
-static void install_ldt(struct mm_struct *current_mm,
- struct ldt_struct *ldt)
+static void install_ldt(struct mm_struct *mm, struct ldt_struct *ldt)
{
+ mutex_lock(&mm->context.lock);
+
/* Synchronizes with READ_ONCE in load_mm_ldt. */
- smp_store_release(&current_mm->context.ldt, ldt);
+ smp_store_release(&mm->context.ldt, ldt);
- /* Activate the LDT for all CPUs using current_mm. */
- on_each_cpu_mask(mm_cpumask(current_mm), flush_ldt, current_mm, true);
+ /* Activate the LDT for all CPUs using currents mm. */
+ on_each_cpu_mask(mm_cpumask(mm), flush_ldt, mm, true);
+
+ mutex_unlock(&mm->context.lock);
}
static void free_ldt_struct(struct ldt_struct *ldt)
@@ -124,27 +246,20 @@ static void free_ldt_struct(struct ldt_struct *ldt)
}
/*
- * we do not have to muck with descriptors here, that is
- * done in switch_mm() as needed.
+ * Called on fork from arch_dup_mmap(). Just copy the current LDT state,
+ * the new task is not running, so nothing can be installed.
*/
-int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm)
+int ldt_dup_context(struct mm_struct *old_mm, struct mm_struct *mm)
{
struct ldt_struct *new_ldt;
- struct mm_struct *old_mm;
int retval = 0;
- mutex_init(&mm->context.lock);
- old_mm = current->mm;
- if (!old_mm) {
- mm->context.ldt = NULL;
+ if (!old_mm)
return 0;
- }
mutex_lock(&old_mm->context.lock);
- if (!old_mm->context.ldt) {
- mm->context.ldt = NULL;
+ if (!old_mm->context.ldt)
goto out_unlock;
- }
new_ldt = alloc_ldt_struct(old_mm->context.ldt->nr_entries);
if (!new_ldt) {
@@ -156,6 +271,12 @@ int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm)
new_ldt->nr_entries * LDT_ENTRY_SIZE);
finalize_ldt_struct(new_ldt);
+ retval = map_ldt_struct(mm, new_ldt, 0);
+ if (retval) {
+ free_ldt_pgtables(mm);
+ free_ldt_struct(new_ldt);
+ goto out_unlock;
+ }
mm->context.ldt = new_ldt;
out_unlock:
@@ -174,13 +295,18 @@ void destroy_context_ldt(struct mm_struct *mm)
mm->context.ldt = NULL;
}
+void ldt_arch_exit_mmap(struct mm_struct *mm)
+{
+ free_ldt_pgtables(mm);
+}
+
static int read_ldt(void __user *ptr, unsigned long bytecount)
{
struct mm_struct *mm = current->mm;
unsigned long entries_size;
int retval;
- mutex_lock(&mm->context.lock);
+ down_read(&mm->context.ldt_usr_sem);
if (!mm->context.ldt) {
retval = 0;
@@ -209,7 +335,7 @@ static int read_ldt(void __user *ptr, unsigned long bytecount)
retval = bytecount;
out_unlock:
- mutex_unlock(&mm->context.lock);
+ up_read(&mm->context.ldt_usr_sem);
return retval;
}
@@ -269,7 +395,8 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
ldt.avl = 0;
}
- mutex_lock(&mm->context.lock);
+ if (down_write_killable(&mm->context.ldt_usr_sem))
+ return -EINTR;
old_ldt = mm->context.ldt;
old_nr_entries = old_ldt ? old_ldt->nr_entries : 0;
@@ -286,12 +413,31 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
new_ldt->entries[ldt_info.entry_number] = ldt;
finalize_ldt_struct(new_ldt);
+ /*
+ * If we are using PTI, map the new LDT into the userspace pagetables.
+ * If there is already an LDT, use the other slot so that other CPUs
+ * will continue to use the old LDT until install_ldt() switches
+ * them over to the new LDT.
+ */
+ error = map_ldt_struct(mm, new_ldt, old_ldt ? !old_ldt->slot : 0);
+ if (error) {
+ /*
+ * This only can fail for the first LDT setup. If an LDT is
+ * already installed then the PTE page is already
+ * populated. Mop up a half populated page table.
+ */
+ if (!WARN_ON_ONCE(old_ldt))
+ free_ldt_pgtables(mm);
+ free_ldt_struct(new_ldt);
+ goto out_unlock;
+ }
+
install_ldt(mm, new_ldt);
free_ldt_struct(old_ldt);
error = 0;
out_unlock:
- mutex_unlock(&mm->context.lock);
+ up_write(&mm->context.ldt_usr_sem);
out:
return error;
}
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index 00bc751c861c..edfede768688 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -48,8 +48,6 @@ static void load_segments(void)
"\tmovl $"STR(__KERNEL_DS)",%%eax\n"
"\tmovl %%eax,%%ds\n"
"\tmovl %%eax,%%es\n"
- "\tmovl %%eax,%%fs\n"
- "\tmovl %%eax,%%gs\n"
"\tmovl %%eax,%%ss\n"
: : : "eax", "memory");
#undef STR
@@ -232,8 +230,8 @@ void machine_kexec(struct kimage *image)
* The gdt & idt are now invalid.
* If you want to load them you must set up your own idt & gdt.
*/
- set_gdt(phys_to_virt(0), 0);
idt_invalidate(phys_to_virt(0));
+ set_gdt(phys_to_virt(0), 0);
/* now call it */
image->start = relocate_kernel_ptr((unsigned long)image->head,
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
index ac0be8283325..9edadabf04f6 100644
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -10,7 +10,6 @@ DEF_NATIVE(pv_irq_ops, save_fl, "pushfq; popq %rax");
DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax");
DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax");
DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3");
-DEF_NATIVE(pv_mmu_ops, flush_tlb_single, "invlpg (%rdi)");
DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd");
DEF_NATIVE(pv_cpu_ops, usergs_sysret64, "swapgs; sysretq");
@@ -60,7 +59,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
PATCH_SITE(pv_mmu_ops, read_cr2);
PATCH_SITE(pv_mmu_ops, read_cr3);
PATCH_SITE(pv_mmu_ops, write_cr3);
- PATCH_SITE(pv_mmu_ops, flush_tlb_single);
PATCH_SITE(pv_cpu_ops, wbinvd);
#if defined(CONFIG_PARAVIRT_SPINLOCKS)
case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock):
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 97fb3e5737f5..aed9d94bd46f 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -47,7 +47,7 @@
* section. Since TSS's are completely CPU-local, we want them
* on exact cacheline boundaries, to eliminate cacheline ping-pong.
*/
-__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
+__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss_rw) = {
.x86_tss = {
/*
* .sp0 is only used when entering ring 0 from a lower
@@ -56,6 +56,16 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
* Poison it.
*/
.sp0 = (1UL << (BITS_PER_LONG-1)) + 1,
+
+#ifdef CONFIG_X86_64
+ /*
+ * .sp1 is cpu_current_top_of_stack. The init task never
+ * runs user code, but cpu_current_top_of_stack should still
+ * be well defined before the first context switch.
+ */
+ .sp1 = TOP_OF_INIT_STACK,
+#endif
+
#ifdef CONFIG_X86_32
.ss0 = __KERNEL_DS,
.ss1 = __KERNEL_CS,
@@ -71,11 +81,8 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
*/
.io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 },
#endif
-#ifdef CONFIG_X86_32
- .SYSENTER_stack_canary = STACK_END_MAGIC,
-#endif
};
-EXPORT_PER_CPU_SYMBOL(cpu_tss);
+EXPORT_PER_CPU_SYMBOL(cpu_tss_rw);
DEFINE_PER_CPU(bool, __tss_limit_invalid);
EXPORT_PER_CPU_SYMBOL_GPL(__tss_limit_invalid);
@@ -104,7 +111,7 @@ void exit_thread(struct task_struct *tsk)
struct fpu *fpu = &t->fpu;
if (bp) {
- struct tss_struct *tss = &per_cpu(cpu_tss, get_cpu());
+ struct tss_struct *tss = &per_cpu(cpu_tss_rw, get_cpu());
t->io_bitmap_ptr = NULL;
clear_thread_flag(TIF_IO_BITMAP);
@@ -299,7 +306,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
}
if ((tifp ^ tifn) & _TIF_NOTSC)
- cr4_toggle_bits(X86_CR4_TSD);
+ cr4_toggle_bits_irqsoff(X86_CR4_TSD);
if ((tifp ^ tifn) & _TIF_NOCPUID)
set_cpuid_faulting(!!(tifn & _TIF_NOCPUID));
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 45bf0c5f93e1..5224c6099184 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -234,7 +234,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
struct fpu *prev_fpu = &prev->fpu;
struct fpu *next_fpu = &next->fpu;
int cpu = smp_processor_id();
- struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
+ struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu);
/* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index eeeb34f85c25..c75466232016 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -69,9 +69,8 @@ void __show_regs(struct pt_regs *regs, int all)
unsigned int fsindex, gsindex;
unsigned int ds, cs, es;
- printk(KERN_DEFAULT "RIP: %04lx:%pS\n", regs->cs, (void *)regs->ip);
- printk(KERN_DEFAULT "RSP: %04lx:%016lx EFLAGS: %08lx", regs->ss,
- regs->sp, regs->flags);
+ show_iret_regs(regs);
+
if (regs->orig_ax != -1)
pr_cont(" ORIG_RAX: %016lx\n", regs->orig_ax);
else
@@ -88,6 +87,9 @@ void __show_regs(struct pt_regs *regs, int all)
printk(KERN_DEFAULT "R13: %016lx R14: %016lx R15: %016lx\n",
regs->r13, regs->r14, regs->r15);
+ if (!all)
+ return;
+
asm("movl %%ds,%0" : "=r" (ds));
asm("movl %%cs,%0" : "=r" (cs));
asm("movl %%es,%0" : "=r" (es));
@@ -98,9 +100,6 @@ void __show_regs(struct pt_regs *regs, int all)
rdmsrl(MSR_GS_BASE, gs);
rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
- if (!all)
- return;
-
cr0 = read_cr0();
cr2 = read_cr2();
cr3 = __read_cr3();
@@ -400,7 +399,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
struct fpu *prev_fpu = &prev->fpu;
struct fpu *next_fpu = &next->fpu;
int cpu = smp_processor_id();
- struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
+ struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu);
WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) &&
this_cpu_read(irq_count) != -1);
@@ -462,6 +461,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
* Switch the PDA and FPU contexts.
*/
this_cpu_write(current_task, next_p);
+ this_cpu_write(cpu_current_top_of_stack, task_top_of_stack(next_p));
/* Reload sp0. */
update_sp0(next_p);
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 3d01df7d7cf6..ed556d50d7ed 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -106,7 +106,7 @@ EXPORT_SYMBOL(__max_logical_packages);
static unsigned int logical_packages __read_mostly;
/* Maximum number of SMT threads on any online core */
-int __max_smt_threads __read_mostly;
+int __read_mostly __max_smt_threads = 1;
/* Flag to indicate if a complete sched domain rebuild is required */
bool x86_topology_update;
@@ -126,14 +126,10 @@ static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
spin_lock_irqsave(&rtc_lock, flags);
CMOS_WRITE(0xa, 0xf);
spin_unlock_irqrestore(&rtc_lock, flags);
- local_flush_tlb();
- pr_debug("1.\n");
*((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) =
start_eip >> 4;
- pr_debug("2.\n");
*((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) =
start_eip & 0xf;
- pr_debug("3.\n");
}
static inline void smpboot_restore_warm_reset_vector(void)
@@ -141,11 +137,6 @@ static inline void smpboot_restore_warm_reset_vector(void)
unsigned long flags;
/*
- * Install writable page 0 entry to set BIOS data area.
- */
- local_flush_tlb();
-
- /*
* Paranoid: Set warm reset code and vector here back
* to default values.
*/
@@ -237,7 +228,7 @@ static void notrace start_secondary(void *unused)
load_cr3(swapper_pg_dir);
__flush_tlb_all();
#endif
-
+ load_current_idt();
cpu_init();
x86_cpuinit.early_percpu_clock_init();
preempt_disable();
@@ -932,12 +923,8 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
initial_code = (unsigned long)start_secondary;
initial_stack = idle->thread.sp;
- /*
- * Enable the espfix hack for this CPU
- */
-#ifdef CONFIG_X86_ESPFIX64
+ /* Enable the espfix hack for this CPU */
init_espfix_ap(cpu);
-#endif
/* So we see what's up */
announce_cpu(cpu, apicid);
@@ -1304,7 +1291,7 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
* Today neither Intel nor AMD support heterogenous systems so
* extrapolate the boot cpu's data to all packages.
*/
- ncpus = cpu_data(0).booted_cores * smp_num_siblings;
+ ncpus = cpu_data(0).booted_cores * topology_max_smt_threads();
__max_logical_packages = DIV_ROUND_UP(nr_cpu_ids, ncpus);
pr_info("Max logical packages: %u\n", __max_logical_packages);
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index 77835bc021c7..20161ef53537 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -164,8 +164,12 @@ int save_stack_trace_tsk_reliable(struct task_struct *tsk,
{
int ret;
+ /*
+ * If the task doesn't have a stack (e.g., a zombie), the stack is
+ * "reliably" empty.
+ */
if (!try_get_task_stack(tsk))
- return -EINVAL;
+ return 0;
ret = __save_stack_trace_reliable(trace, tsk);
diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c
index 9a9c9b076955..a5b802a12212 100644
--- a/arch/x86/kernel/tls.c
+++ b/arch/x86/kernel/tls.c
@@ -93,17 +93,10 @@ static void set_tls_desc(struct task_struct *p, int idx,
cpu = get_cpu();
while (n-- > 0) {
- if (LDT_empty(info) || LDT_zero(info)) {
+ if (LDT_empty(info) || LDT_zero(info))
memset(desc, 0, sizeof(*desc));
- } else {
+ else
fill_ldt(desc, info);
-
- /*
- * Always set the accessed bit so that the CPU
- * doesn't try to write to the (read-only) GDT.
- */
- desc->type |= 1;
- }
++info;
++desc;
}
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 989514c94a55..446c9ef8cfc3 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -51,6 +51,7 @@
#include <asm/traps.h>
#include <asm/desc.h>
#include <asm/fpu/internal.h>
+#include <asm/cpu_entry_area.h>
#include <asm/mce.h>
#include <asm/fixmap.h>
#include <asm/mach_traps.h>
@@ -348,23 +349,42 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
/*
* If IRET takes a non-IST fault on the espfix64 stack, then we
- * end up promoting it to a doublefault. In that case, modify
- * the stack to make it look like we just entered the #GP
- * handler from user space, similar to bad_iret.
+ * end up promoting it to a doublefault. In that case, take
+ * advantage of the fact that we're not using the normal (TSS.sp0)
+ * stack right now. We can write a fake #GP(0) frame at TSS.sp0
+ * and then modify our own IRET frame so that, when we return,
+ * we land directly at the #GP(0) vector with the stack already
+ * set up according to its expectations.
+ *
+ * The net result is that our #GP handler will think that we
+ * entered from usermode with the bad user context.
*
* No need for ist_enter here because we don't use RCU.
*/
- if (((long)regs->sp >> PGDIR_SHIFT) == ESPFIX_PGD_ENTRY &&
+ if (((long)regs->sp >> P4D_SHIFT) == ESPFIX_PGD_ENTRY &&
regs->cs == __KERNEL_CS &&
regs->ip == (unsigned long)native_irq_return_iret)
{
- struct pt_regs *normal_regs = task_pt_regs(current);
+ struct pt_regs *gpregs = (struct pt_regs *)this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1;
- /* Fake a #GP(0) from userspace. */
- memmove(&normal_regs->ip, (void *)regs->sp, 5*8);
- normal_regs->orig_ax = 0; /* Missing (lost) #GP error code */
+ /*
+ * regs->sp points to the failing IRET frame on the
+ * ESPFIX64 stack. Copy it to the entry stack. This fills
+ * in gpregs->ss through gpregs->ip.
+ *
+ */
+ memmove(&gpregs->ip, (void *)regs->sp, 5*8);
+ gpregs->orig_ax = 0; /* Missing (lost) #GP error code */
+
+ /*
+ * Adjust our frame so that we return straight to the #GP
+ * vector with the expected RSP value. This is safe because
+ * we won't enable interupts or schedule before we invoke
+ * general_protection, so nothing will clobber the stack
+ * frame we just set up.
+ */
regs->ip = (unsigned long)general_protection;
- regs->sp = (unsigned long)&normal_regs->orig_ax;
+ regs->sp = (unsigned long)&gpregs->orig_ax;
return;
}
@@ -389,7 +409,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
*
* Processors update CR2 whenever a page fault is detected. If a
* second page fault occurs while an earlier page fault is being
- * deliv- ered, the faulting linear address of the second fault will
+ * delivered, the faulting linear address of the second fault will
* overwrite the contents of CR2 (replacing the previous
* address). These updates to CR2 occur even if the page fault
* results in a double fault or occurs during the delivery of a
@@ -605,14 +625,15 @@ NOKPROBE_SYMBOL(do_int3);
#ifdef CONFIG_X86_64
/*
- * Help handler running on IST stack to switch off the IST stack if the
- * interrupted code was in user mode. The actual stack switch is done in
- * entry_64.S
+ * Help handler running on a per-cpu (IST or entry trampoline) stack
+ * to switch to the normal thread stack if the interrupted code was in
+ * user mode. The actual stack switch is done in entry_64.S
*/
asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs)
{
- struct pt_regs *regs = task_pt_regs(current);
- *regs = *eregs;
+ struct pt_regs *regs = (struct pt_regs *)this_cpu_read(cpu_current_top_of_stack) - 1;
+ if (regs != eregs)
+ *regs = *eregs;
return regs;
}
NOKPROBE_SYMBOL(sync_regs);
@@ -628,13 +649,13 @@ struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s)
/*
* This is called from entry_64.S early in handling a fault
* caused by a bad iret to user mode. To handle the fault
- * correctly, we want move our stack frame to task_pt_regs
- * and we want to pretend that the exception came from the
- * iret target.
+ * correctly, we want to move our stack frame to where it would
+ * be had we entered directly on the entry stack (rather than
+ * just below the IRET frame) and we want to pretend that the
+ * exception came from the IRET target.
*/
struct bad_iret_stack *new_stack =
- container_of(task_pt_regs(current),
- struct bad_iret_stack, regs);
+ (struct bad_iret_stack *)this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1;
/* Copy the IRET target to the new stack. */
memmove(&new_stack->regs.ip, (void *)s->regs.sp, 5*8);
@@ -795,14 +816,6 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
debug_stack_usage_dec();
exit:
-#if defined(CONFIG_X86_32)
- /*
- * This is the most likely code path that involves non-trivial use
- * of the SYSENTER stack. Check that we haven't overrun it.
- */
- WARN(this_cpu_read(cpu_tss.SYSENTER_stack_canary) != STACK_END_MAGIC,
- "Overran or corrupted SYSENTER stack\n");
-#endif
ist_exit(regs);
}
NOKPROBE_SYMBOL(do_debug);
@@ -929,6 +942,9 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
void __init trap_init(void)
{
+ /* Init cpu_entry_area before IST entries are set up */
+ setup_cpu_entry_areas();
+
idt_setup_traps();
/*
@@ -936,8 +952,9 @@ void __init trap_init(void)
* "sidt" instruction will not leak the location of the kernel, and
* to defend the IDT against arbitrary memory write vulnerabilities.
* It will be reloaded in cpu_init() */
- __set_fixmap(FIX_RO_IDT, __pa_symbol(idt_table), PAGE_KERNEL_RO);
- idt_descr.address = fix_to_virt(FIX_RO_IDT);
+ cea_set_pte(CPU_ENTRY_AREA_RO_IDT_VADDR, __pa_symbol(idt_table),
+ PAGE_KERNEL_RO);
+ idt_descr.address = CPU_ENTRY_AREA_RO_IDT;
/*
* Should be a barrier for any external CPU state:
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index a3f973b2c97a..be86a865087a 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -253,22 +253,15 @@ unsigned long *unwind_get_return_address_ptr(struct unwind_state *state)
return NULL;
}
-static bool stack_access_ok(struct unwind_state *state, unsigned long addr,
+static bool stack_access_ok(struct unwind_state *state, unsigned long _addr,
size_t len)
{
struct stack_info *info = &state->stack_info;
+ void *addr = (void *)_addr;
- /*
- * If the address isn't on the current stack, switch to the next one.
- *
- * We may have to traverse multiple stacks to deal with the possibility
- * that info->next_sp could point to an empty stack and the address
- * could be on a subsequent stack.
- */
- while (!on_stack(info, (void *)addr, len))
- if (get_stack_info(info->next_sp, state->task, info,
- &state->stack_mask))
- return false;
+ if (!on_stack(info, addr, len) &&
+ (get_stack_info(addr, state->task, info, &state->stack_mask)))
+ return false;
return true;
}
@@ -283,42 +276,32 @@ static bool deref_stack_reg(struct unwind_state *state, unsigned long addr,
return true;
}
-#define REGS_SIZE (sizeof(struct pt_regs))
-#define SP_OFFSET (offsetof(struct pt_regs, sp))
-#define IRET_REGS_SIZE (REGS_SIZE - offsetof(struct pt_regs, ip))
-#define IRET_SP_OFFSET (SP_OFFSET - offsetof(struct pt_regs, ip))
-
static bool deref_stack_regs(struct unwind_state *state, unsigned long addr,
- unsigned long *ip, unsigned long *sp, bool full)
+ unsigned long *ip, unsigned long *sp)
{
- size_t regs_size = full ? REGS_SIZE : IRET_REGS_SIZE;
- size_t sp_offset = full ? SP_OFFSET : IRET_SP_OFFSET;
- struct pt_regs *regs = (struct pt_regs *)(addr + regs_size - REGS_SIZE);
-
- if (IS_ENABLED(CONFIG_X86_64)) {
- if (!stack_access_ok(state, addr, regs_size))
- return false;
+ struct pt_regs *regs = (struct pt_regs *)addr;
- *ip = regs->ip;
- *sp = regs->sp;
+ /* x86-32 support will be more complicated due to the &regs->sp hack */
+ BUILD_BUG_ON(IS_ENABLED(CONFIG_X86_32));
- return true;
- }
-
- if (!stack_access_ok(state, addr, sp_offset))
+ if (!stack_access_ok(state, addr, sizeof(struct pt_regs)))
return false;
*ip = regs->ip;
+ *sp = regs->sp;
+ return true;
+}
- if (user_mode(regs)) {
- if (!stack_access_ok(state, addr + sp_offset,
- REGS_SIZE - SP_OFFSET))
- return false;
+static bool deref_stack_iret_regs(struct unwind_state *state, unsigned long addr,
+ unsigned long *ip, unsigned long *sp)
+{
+ struct pt_regs *regs = (void *)addr - IRET_FRAME_OFFSET;
- *sp = regs->sp;
- } else
- *sp = (unsigned long)&regs->sp;
+ if (!stack_access_ok(state, addr, IRET_FRAME_SIZE))
+ return false;
+ *ip = regs->ip;
+ *sp = regs->sp;
return true;
}
@@ -327,7 +310,6 @@ bool unwind_next_frame(struct unwind_state *state)
unsigned long ip_p, sp, orig_ip, prev_sp = state->sp;
enum stack_type prev_type = state->stack_info.type;
struct orc_entry *orc;
- struct pt_regs *ptregs;
bool indirect = false;
if (unwind_done(state))
@@ -435,7 +417,7 @@ bool unwind_next_frame(struct unwind_state *state)
break;
case ORC_TYPE_REGS:
- if (!deref_stack_regs(state, sp, &state->ip, &state->sp, true)) {
+ if (!deref_stack_regs(state, sp, &state->ip, &state->sp)) {
orc_warn("can't dereference registers at %p for ip %pB\n",
(void *)sp, (void *)orig_ip);
goto done;
@@ -447,20 +429,14 @@ bool unwind_next_frame(struct unwind_state *state)
break;
case ORC_TYPE_REGS_IRET:
- if (!deref_stack_regs(state, sp, &state->ip, &state->sp, false)) {
+ if (!deref_stack_iret_regs(state, sp, &state->ip, &state->sp)) {
orc_warn("can't dereference iret registers at %p for ip %pB\n",
(void *)sp, (void *)orig_ip);
goto done;
}
- ptregs = container_of((void *)sp, struct pt_regs, ip);
- if ((unsigned long)ptregs >= prev_sp &&
- on_stack(&state->stack_info, ptregs, REGS_SIZE)) {
- state->regs = ptregs;
- state->full_regs = false;
- } else
- state->regs = NULL;
-
+ state->regs = (void *)sp - IRET_FRAME_OFFSET;
+ state->full_regs = false;
state->signal = true;
break;
@@ -553,8 +529,18 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
}
if (get_stack_info((unsigned long *)state->sp, state->task,
- &state->stack_info, &state->stack_mask))
- return;
+ &state->stack_info, &state->stack_mask)) {
+ /*
+ * We weren't on a valid stack. It's possible that
+ * we overflowed a valid stack into a guard page.
+ * See if the next page up is valid so that we can
+ * generate some kind of backtrace if this happens.
+ */
+ void *next_page = (void *)PAGE_ALIGN((unsigned long)state->sp);
+ if (get_stack_info(next_page, state->task, &state->stack_info,
+ &state->stack_mask))
+ return;
+ }
/*
* The caller can provide the address of the first frame directly
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index a4009fb9be87..1e413a9326aa 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -61,11 +61,17 @@ jiffies_64 = jiffies;
. = ALIGN(HPAGE_SIZE); \
__end_rodata_hpage_align = .;
+#define ALIGN_ENTRY_TEXT_BEGIN . = ALIGN(PMD_SIZE);
+#define ALIGN_ENTRY_TEXT_END . = ALIGN(PMD_SIZE);
+
#else
#define X64_ALIGN_RODATA_BEGIN
#define X64_ALIGN_RODATA_END
+#define ALIGN_ENTRY_TEXT_BEGIN
+#define ALIGN_ENTRY_TEXT_END
+
#endif
PHDRS {
@@ -102,11 +108,22 @@ SECTIONS
CPUIDLE_TEXT
LOCK_TEXT
KPROBES_TEXT
+ ALIGN_ENTRY_TEXT_BEGIN
ENTRY_TEXT
IRQENTRY_TEXT
+ ALIGN_ENTRY_TEXT_END
SOFTIRQENTRY_TEXT
*(.fixup)
*(.gnu.warning)
+
+#ifdef CONFIG_X86_64
+ . = ALIGN(PAGE_SIZE);
+ _entry_trampoline = .;
+ *(.entry_trampoline)
+ . = ALIGN(PAGE_SIZE);
+ ASSERT(. - _entry_trampoline == PAGE_SIZE, "entry trampoline is too big");
+#endif
+
/* End of text section */
_etext = .;
} :text = 0x9090
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index e7d04d0c8008..b514b2b2845a 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -1046,7 +1046,6 @@ static void fetch_register_operand(struct operand *op)
static void read_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg)
{
- ctxt->ops->get_fpu(ctxt);
switch (reg) {
case 0: asm("movdqa %%xmm0, %0" : "=m"(*data)); break;
case 1: asm("movdqa %%xmm1, %0" : "=m"(*data)); break;
@@ -1068,13 +1067,11 @@ static void read_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg)
#endif
default: BUG();
}
- ctxt->ops->put_fpu(ctxt);
}
static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data,
int reg)
{
- ctxt->ops->get_fpu(ctxt);
switch (reg) {
case 0: asm("movdqa %0, %%xmm0" : : "m"(*data)); break;
case 1: asm("movdqa %0, %%xmm1" : : "m"(*data)); break;
@@ -1096,12 +1093,10 @@ static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data,
#endif
default: BUG();
}
- ctxt->ops->put_fpu(ctxt);
}
static void read_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg)
{
- ctxt->ops->get_fpu(ctxt);
switch (reg) {
case 0: asm("movq %%mm0, %0" : "=m"(*data)); break;
case 1: asm("movq %%mm1, %0" : "=m"(*data)); break;
@@ -1113,12 +1108,10 @@ static void read_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg)
case 7: asm("movq %%mm7, %0" : "=m"(*data)); break;
default: BUG();
}
- ctxt->ops->put_fpu(ctxt);
}
static void write_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg)
{
- ctxt->ops->get_fpu(ctxt);
switch (reg) {
case 0: asm("movq %0, %%mm0" : : "m"(*data)); break;
case 1: asm("movq %0, %%mm1" : : "m"(*data)); break;
@@ -1130,7 +1123,6 @@ static void write_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg)
case 7: asm("movq %0, %%mm7" : : "m"(*data)); break;
default: BUG();
}
- ctxt->ops->put_fpu(ctxt);
}
static int em_fninit(struct x86_emulate_ctxt *ctxt)
@@ -1138,9 +1130,7 @@ static int em_fninit(struct x86_emulate_ctxt *ctxt)
if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
return emulate_nm(ctxt);
- ctxt->ops->get_fpu(ctxt);
asm volatile("fninit");
- ctxt->ops->put_fpu(ctxt);
return X86EMUL_CONTINUE;
}
@@ -1151,9 +1141,7 @@ static int em_fnstcw(struct x86_emulate_ctxt *ctxt)
if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
return emulate_nm(ctxt);
- ctxt->ops->get_fpu(ctxt);
asm volatile("fnstcw %0": "+m"(fcw));
- ctxt->ops->put_fpu(ctxt);
ctxt->dst.val = fcw;
@@ -1167,9 +1155,7 @@ static int em_fnstsw(struct x86_emulate_ctxt *ctxt)
if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
return emulate_nm(ctxt);
- ctxt->ops->get_fpu(ctxt);
asm volatile("fnstsw %0": "+m"(fsw));
- ctxt->ops->put_fpu(ctxt);
ctxt->dst.val = fsw;
@@ -2404,9 +2390,21 @@ static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, u64 smbase, int n)
}
static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
- u64 cr0, u64 cr4)
+ u64 cr0, u64 cr3, u64 cr4)
{
int bad;
+ u64 pcid;
+
+ /* In order to later set CR4.PCIDE, CR3[11:0] must be zero. */
+ pcid = 0;
+ if (cr4 & X86_CR4_PCIDE) {
+ pcid = cr3 & 0xfff;
+ cr3 &= ~0xfff;
+ }
+
+ bad = ctxt->ops->set_cr(ctxt, 3, cr3);
+ if (bad)
+ return X86EMUL_UNHANDLEABLE;
/*
* First enable PAE, long mode needs it before CR0.PG = 1 is set.
@@ -2425,6 +2423,12 @@ static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
bad = ctxt->ops->set_cr(ctxt, 4, cr4);
if (bad)
return X86EMUL_UNHANDLEABLE;
+ if (pcid) {
+ bad = ctxt->ops->set_cr(ctxt, 3, cr3 | pcid);
+ if (bad)
+ return X86EMUL_UNHANDLEABLE;
+ }
+
}
return X86EMUL_CONTINUE;
@@ -2435,11 +2439,11 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, u64 smbase)
struct desc_struct desc;
struct desc_ptr dt;
u16 selector;
- u32 val, cr0, cr4;
+ u32 val, cr0, cr3, cr4;
int i;
cr0 = GET_SMSTATE(u32, smbase, 0x7ffc);
- ctxt->ops->set_cr(ctxt, 3, GET_SMSTATE(u32, smbase, 0x7ff8));
+ cr3 = GET_SMSTATE(u32, smbase, 0x7ff8);
ctxt->eflags = GET_SMSTATE(u32, smbase, 0x7ff4) | X86_EFLAGS_FIXED;
ctxt->_eip = GET_SMSTATE(u32, smbase, 0x7ff0);
@@ -2481,14 +2485,14 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, u64 smbase)
ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7ef8));
- return rsm_enter_protected_mode(ctxt, cr0, cr4);
+ return rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
}
static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
{
struct desc_struct desc;
struct desc_ptr dt;
- u64 val, cr0, cr4;
+ u64 val, cr0, cr3, cr4;
u32 base3;
u16 selector;
int i, r;
@@ -2505,7 +2509,7 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1);
cr0 = GET_SMSTATE(u64, smbase, 0x7f58);
- ctxt->ops->set_cr(ctxt, 3, GET_SMSTATE(u64, smbase, 0x7f50));
+ cr3 = GET_SMSTATE(u64, smbase, 0x7f50);
cr4 = GET_SMSTATE(u64, smbase, 0x7f48);
ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7f00));
val = GET_SMSTATE(u64, smbase, 0x7ed0);
@@ -2533,7 +2537,7 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
dt.address = GET_SMSTATE(u64, smbase, 0x7e68);
ctxt->ops->set_gdt(ctxt, &dt);
- r = rsm_enter_protected_mode(ctxt, cr0, cr4);
+ r = rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
if (r != X86EMUL_CONTINUE)
return r;
@@ -4001,12 +4005,8 @@ static int em_fxsave(struct x86_emulate_ctxt *ctxt)
if (rc != X86EMUL_CONTINUE)
return rc;
- ctxt->ops->get_fpu(ctxt);
-
rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_state));
- ctxt->ops->put_fpu(ctxt);
-
if (rc != X86EMUL_CONTINUE)
return rc;
@@ -4049,8 +4049,6 @@ static int em_fxrstor(struct x86_emulate_ctxt *ctxt)
if (rc != X86EMUL_CONTINUE)
return rc;
- ctxt->ops->get_fpu(ctxt);
-
if (size < __fxstate_size(16)) {
rc = fxregs_fixup(&fx_state, size);
if (rc != X86EMUL_CONTINUE)
@@ -4066,8 +4064,6 @@ static int em_fxrstor(struct x86_emulate_ctxt *ctxt)
rc = asm_safe("fxrstor %[fx]", : [fx] "m"(fx_state));
out:
- ctxt->ops->put_fpu(ctxt);
-
return rc;
}
@@ -5317,9 +5313,7 @@ static int flush_pending_x87_faults(struct x86_emulate_ctxt *ctxt)
{
int rc;
- ctxt->ops->get_fpu(ctxt);
rc = asm_safe("fwait");
- ctxt->ops->put_fpu(ctxt);
if (unlikely(rc != X86EMUL_CONTINUE))
return emulate_exception(ctxt, MF_VECTOR, 0, false);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index e5e66e5c6640..c4deb1f34faa 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3395,7 +3395,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
spin_lock(&vcpu->kvm->mmu_lock);
if(make_mmu_pages_available(vcpu) < 0) {
spin_unlock(&vcpu->kvm->mmu_lock);
- return 1;
+ return -ENOSPC;
}
sp = kvm_mmu_get_page(vcpu, 0, 0,
vcpu->arch.mmu.shadow_root_level, 1, ACC_ALL);
@@ -3410,7 +3410,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
spin_lock(&vcpu->kvm->mmu_lock);
if (make_mmu_pages_available(vcpu) < 0) {
spin_unlock(&vcpu->kvm->mmu_lock);
- return 1;
+ return -ENOSPC;
}
sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT),
i << 30, PT32_ROOT_LEVEL, 1, ACC_ALL);
@@ -3450,7 +3450,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
spin_lock(&vcpu->kvm->mmu_lock);
if (make_mmu_pages_available(vcpu) < 0) {
spin_unlock(&vcpu->kvm->mmu_lock);
- return 1;
+ return -ENOSPC;
}
sp = kvm_mmu_get_page(vcpu, root_gfn, 0,
vcpu->arch.mmu.shadow_root_level, 0, ACC_ALL);
@@ -3487,7 +3487,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
spin_lock(&vcpu->kvm->mmu_lock);
if (make_mmu_pages_available(vcpu) < 0) {
spin_unlock(&vcpu->kvm->mmu_lock);
- return 1;
+ return -ENOSPC;
}
sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, PT32_ROOT_LEVEL,
0, ACC_ALL);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 4704aaf6d19e..023afa0c8887 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2302,7 +2302,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
* processors. See 22.2.4.
*/
vmcs_writel(HOST_TR_BASE,
- (unsigned long)this_cpu_ptr(&cpu_tss));
+ (unsigned long)&get_cpu_entry_area(cpu)->tss.x86_tss);
vmcs_writel(HOST_GDTR_BASE, (unsigned long)gdt); /* 22.2.4 */
/*
@@ -6751,16 +6751,10 @@ static __init int hardware_setup(void)
goto out;
}
- vmx_io_bitmap_b = (unsigned long *)__get_free_page(GFP_KERNEL);
memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
- /*
- * Allow direct access to the PC debug port (it is often used for I/O
- * delays, but the vmexits simply slow things down).
- */
memset(vmx_io_bitmap_a, 0xff, PAGE_SIZE);
- clear_bit(0x80, vmx_io_bitmap_a);
memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index eee8e7faf1af..1cec2c62a0b0 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2937,7 +2937,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
srcu_read_unlock(&vcpu->kvm->srcu, idx);
pagefault_enable();
kvm_x86_ops->vcpu_put(vcpu);
- kvm_put_guest_fpu(vcpu);
vcpu->arch.last_host_tsc = rdtsc();
}
@@ -4385,7 +4384,7 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
addr, n, v))
&& kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, n, v))
break;
- trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v);
+ trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, v);
handled += n;
addr += n;
len -= n;
@@ -4644,7 +4643,7 @@ static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes)
{
if (vcpu->mmio_read_completed) {
trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
- vcpu->mmio_fragments[0].gpa, *(u64 *)val);
+ vcpu->mmio_fragments[0].gpa, val);
vcpu->mmio_read_completed = 0;
return 1;
}
@@ -4666,14 +4665,14 @@ static int write_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
static int write_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes, void *val)
{
- trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, *(u64 *)val);
+ trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, val);
return vcpu_mmio_write(vcpu, gpa, bytes, val);
}
static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
void *val, int bytes)
{
- trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0);
+ trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, NULL);
return X86EMUL_IO_NEEDED;
}
@@ -5252,17 +5251,6 @@ static void emulator_halt(struct x86_emulate_ctxt *ctxt)
emul_to_vcpu(ctxt)->arch.halt_request = 1;
}
-static void emulator_get_fpu(struct x86_emulate_ctxt *ctxt)
-{
- preempt_disable();
- kvm_load_guest_fpu(emul_to_vcpu(ctxt));
-}
-
-static void emulator_put_fpu(struct x86_emulate_ctxt *ctxt)
-{
- preempt_enable();
-}
-
static int emulator_intercept(struct x86_emulate_ctxt *ctxt,
struct x86_instruction_info *info,
enum x86_intercept_stage stage)
@@ -5340,8 +5328,6 @@ static const struct x86_emulate_ops emulate_ops = {
.halt = emulator_halt,
.wbinvd = emulator_wbinvd,
.fix_hypercall = emulator_fix_hypercall,
- .get_fpu = emulator_get_fpu,
- .put_fpu = emulator_put_fpu,
.intercept = emulator_intercept,
.get_cpuid = emulator_get_cpuid,
.set_nmi_mask = emulator_set_nmi_mask,
@@ -6778,6 +6764,20 @@ static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu)
kvm_x86_ops->tlb_flush(vcpu);
}
+void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
+ unsigned long start, unsigned long end)
+{
+ unsigned long apic_address;
+
+ /*
+ * The physical address of apic access page is stored in the VMCS.
+ * Update it when it becomes invalid.
+ */
+ apic_address = gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
+ if (start <= apic_address && apic_address < end)
+ kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD);
+}
+
void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
{
struct page *page = NULL;
@@ -6952,7 +6952,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
preempt_disable();
kvm_x86_ops->prepare_guest_switch(vcpu);
- kvm_load_guest_fpu(vcpu);
/*
* Disable IRQs before setting IN_GUEST_MODE. Posted interrupt
@@ -7265,13 +7264,12 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
- struct fpu *fpu = &current->thread.fpu;
int r;
- fpu__initialize(fpu);
-
kvm_sigset_activate(vcpu);
+ kvm_load_guest_fpu(vcpu);
+
if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
if (kvm_run->immediate_exit) {
r = -EINTR;
@@ -7312,6 +7310,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
r = vcpu_run(vcpu);
out:
+ kvm_put_guest_fpu(vcpu);
post_kvm_run_save(vcpu);
kvm_sigset_deactivate(vcpu);
@@ -7381,7 +7380,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
#endif
kvm_rip_write(vcpu, regs->rip);
- kvm_set_rflags(vcpu, regs->rflags);
+ kvm_set_rflags(vcpu, regs->rflags | X86_EFLAGS_FIXED);
vcpu->arch.exception.pending = false;
@@ -7495,6 +7494,29 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
}
EXPORT_SYMBOL_GPL(kvm_task_switch);
+int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+{
+ if ((sregs->efer & EFER_LME) && (sregs->cr0 & X86_CR0_PG_BIT)) {
+ /*
+ * When EFER.LME and CR0.PG are set, the processor is in
+ * 64-bit mode (though maybe in a 32-bit code segment).
+ * CR4.PAE and EFER.LMA must be set.
+ */
+ if (!(sregs->cr4 & X86_CR4_PAE_BIT)
+ || !(sregs->efer & EFER_LMA))
+ return -EINVAL;
+ } else {
+ /*
+ * Not in 64-bit mode: EFER.LMA is clear and the code
+ * segment cannot be 64-bit.
+ */
+ if (sregs->efer & EFER_LMA || sregs->cs.l)
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
struct kvm_sregs *sregs)
{
@@ -7507,6 +7529,9 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
(sregs->cr4 & X86_CR4_OSXSAVE))
return -EINVAL;
+ if (kvm_valid_sregs(vcpu, sregs))
+ return -EINVAL;
+
apic_base_msr.data = sregs->apic_base;
apic_base_msr.host_initiated = true;
if (kvm_set_apic_base(vcpu, &apic_base_msr))
@@ -7704,32 +7729,25 @@ static void fx_init(struct kvm_vcpu *vcpu)
vcpu->arch.cr0 |= X86_CR0_ET;
}
+/* Swap (qemu) user FPU context for the guest FPU context. */
void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
{
- if (vcpu->guest_fpu_loaded)
- return;
-
- /*
- * Restore all possible states in the guest,
- * and assume host would use all available bits.
- * Guest xcr0 would be loaded later.
- */
- vcpu->guest_fpu_loaded = 1;
- __kernel_fpu_begin();
+ preempt_disable();
+ copy_fpregs_to_fpstate(&vcpu->arch.user_fpu);
/* PKRU is separately restored in kvm_x86_ops->run. */
__copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state,
~XFEATURE_MASK_PKRU);
+ preempt_enable();
trace_kvm_fpu(1);
}
+/* When vcpu_run ends, restore user space FPU context. */
void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
{
- if (!vcpu->guest_fpu_loaded)
- return;
-
- vcpu->guest_fpu_loaded = 0;
+ preempt_disable();
copy_fpregs_to_fpstate(&vcpu->arch.guest_fpu);
- __kernel_fpu_end();
+ copy_kernel_to_fpregs(&vcpu->arch.user_fpu.state);
+ preempt_enable();
++vcpu->stat.fpu_reload;
trace_kvm_fpu(0);
}
@@ -7846,7 +7864,8 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
* To avoid have the INIT path from kvm_apic_has_events() that be
* called with loaded FPU and does not let userspace fix the state.
*/
- kvm_put_guest_fpu(vcpu);
+ if (init_event)
+ kvm_put_guest_fpu(vcpu);
mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu.state.xsave,
XFEATURE_MASK_BNDREGS);
if (mpx_state_buffer)
@@ -7855,6 +7874,8 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
XFEATURE_MASK_BNDCSR);
if (mpx_state_buffer)
memset(mpx_state_buffer, 0, sizeof(struct mpx_bndcsr));
+ if (init_event)
+ kvm_load_guest_fpu(vcpu);
}
if (!init_event) {
diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c
index 553f8fd23cc4..4846eff7e4c8 100644
--- a/arch/x86/lib/delay.c
+++ b/arch/x86/lib/delay.c
@@ -107,10 +107,10 @@ static void delay_mwaitx(unsigned long __loops)
delay = min_t(u64, MWAITX_MAX_LOOPS, loops);
/*
- * Use cpu_tss as a cacheline-aligned, seldomly
+ * Use cpu_tss_rw as a cacheline-aligned, seldomly
* accessed per-cpu variable as the monitor target.
*/
- __monitorx(raw_cpu_ptr(&cpu_tss), 0, 0);
+ __monitorx(raw_cpu_ptr(&cpu_tss_rw), 0, 0);
/*
* AMD, like Intel, supports the EAX hint and EAX=0xf
diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt
index c4d55919fac1..e0b85930dd77 100644
--- a/arch/x86/lib/x86-opcode-map.txt
+++ b/arch/x86/lib/x86-opcode-map.txt
@@ -607,7 +607,7 @@ fb: psubq Pq,Qq | vpsubq Vx,Hx,Wx (66),(v1)
fc: paddb Pq,Qq | vpaddb Vx,Hx,Wx (66),(v1)
fd: paddw Pq,Qq | vpaddw Vx,Hx,Wx (66),(v1)
fe: paddd Pq,Qq | vpaddd Vx,Hx,Wx (66),(v1)
-ff:
+ff: UD0
EndTable
Table: 3-byte opcode 1 (0x0f 0x38)
@@ -717,7 +717,7 @@ AVXcode: 2
7e: vpermt2d/q Vx,Hx,Wx (66),(ev)
7f: vpermt2ps/d Vx,Hx,Wx (66),(ev)
80: INVEPT Gy,Mdq (66)
-81: INVPID Gy,Mdq (66)
+81: INVVPID Gy,Mdq (66)
82: INVPCID Gy,Mdq (66)
83: vpmultishiftqb Vx,Hx,Wx (66),(ev)
88: vexpandps/d Vpd,Wpd (66),(ev)
@@ -970,6 +970,15 @@ GrpTable: Grp9
EndTable
GrpTable: Grp10
+# all are UD1
+0: UD1
+1: UD1
+2: UD1
+3: UD1
+4: UD1
+5: UD1
+6: UD1
+7: UD1
EndTable
# Grp11A and Grp11B are expressed as Grp11 in Intel SDM
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 8e13b8cc6bed..27e9e90a8d35 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -10,7 +10,7 @@ CFLAGS_REMOVE_mem_encrypt.o = -pg
endif
obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
- pat.o pgtable.o physaddr.o setup_nx.o tlb.o
+ pat.o pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o
# Make sure __phys_addr has no stackprotector
nostackp := $(call cc-option, -fno-stack-protector)
@@ -41,9 +41,10 @@ obj-$(CONFIG_AMD_NUMA) += amdtopology.o
obj-$(CONFIG_ACPI_NUMA) += srat.o
obj-$(CONFIG_NUMA_EMU) += numa_emulation.o
-obj-$(CONFIG_X86_INTEL_MPX) += mpx.o
-obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o
-obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o
+obj-$(CONFIG_X86_INTEL_MPX) += mpx.o
+obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o
+obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o
+obj-$(CONFIG_PAGE_TABLE_ISOLATION) += pti.o
obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt.o
obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt_boot.o
diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c
new file mode 100644
index 000000000000..b9283cc27622
--- /dev/null
+++ b/arch/x86/mm/cpu_entry_area.c
@@ -0,0 +1,166 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/spinlock.h>
+#include <linux/percpu.h>
+
+#include <asm/cpu_entry_area.h>
+#include <asm/pgtable.h>
+#include <asm/fixmap.h>
+#include <asm/desc.h>
+
+static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage);
+
+#ifdef CONFIG_X86_64
+static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
+ [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
+#endif
+
+struct cpu_entry_area *get_cpu_entry_area(int cpu)
+{
+ unsigned long va = CPU_ENTRY_AREA_PER_CPU + cpu * CPU_ENTRY_AREA_SIZE;
+ BUILD_BUG_ON(sizeof(struct cpu_entry_area) % PAGE_SIZE != 0);
+
+ return (struct cpu_entry_area *) va;
+}
+EXPORT_SYMBOL(get_cpu_entry_area);
+
+void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags)
+{
+ unsigned long va = (unsigned long) cea_vaddr;
+
+ set_pte_vaddr(va, pfn_pte(pa >> PAGE_SHIFT, flags));
+}
+
+static void __init
+cea_map_percpu_pages(void *cea_vaddr, void *ptr, int pages, pgprot_t prot)
+{
+ for ( ; pages; pages--, cea_vaddr+= PAGE_SIZE, ptr += PAGE_SIZE)
+ cea_set_pte(cea_vaddr, per_cpu_ptr_to_phys(ptr), prot);
+}
+
+static void percpu_setup_debug_store(int cpu)
+{
+#ifdef CONFIG_CPU_SUP_INTEL
+ int npages;
+ void *cea;
+
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
+ return;
+
+ cea = &get_cpu_entry_area(cpu)->cpu_debug_store;
+ npages = sizeof(struct debug_store) / PAGE_SIZE;
+ BUILD_BUG_ON(sizeof(struct debug_store) % PAGE_SIZE != 0);
+ cea_map_percpu_pages(cea, &per_cpu(cpu_debug_store, cpu), npages,
+ PAGE_KERNEL);
+
+ cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers;
+ /*
+ * Force the population of PMDs for not yet allocated per cpu
+ * memory like debug store buffers.
+ */
+ npages = sizeof(struct debug_store_buffers) / PAGE_SIZE;
+ for (; npages; npages--, cea += PAGE_SIZE)
+ cea_set_pte(cea, 0, PAGE_NONE);
+#endif
+}
+
+/* Setup the fixmap mappings only once per-processor */
+static void __init setup_cpu_entry_area(int cpu)
+{
+#ifdef CONFIG_X86_64
+ extern char _entry_trampoline[];
+
+ /* On 64-bit systems, we use a read-only fixmap GDT and TSS. */
+ pgprot_t gdt_prot = PAGE_KERNEL_RO;
+ pgprot_t tss_prot = PAGE_KERNEL_RO;
+#else
+ /*
+ * On native 32-bit systems, the GDT cannot be read-only because
+ * our double fault handler uses a task gate, and entering through
+ * a task gate needs to change an available TSS to busy. If the
+ * GDT is read-only, that will triple fault. The TSS cannot be
+ * read-only because the CPU writes to it on task switches.
+ *
+ * On Xen PV, the GDT must be read-only because the hypervisor
+ * requires it.
+ */
+ pgprot_t gdt_prot = boot_cpu_has(X86_FEATURE_XENPV) ?
+ PAGE_KERNEL_RO : PAGE_KERNEL;
+ pgprot_t tss_prot = PAGE_KERNEL;
+#endif
+
+ cea_set_pte(&get_cpu_entry_area(cpu)->gdt, get_cpu_gdt_paddr(cpu),
+ gdt_prot);
+
+ cea_map_percpu_pages(&get_cpu_entry_area(cpu)->entry_stack_page,
+ per_cpu_ptr(&entry_stack_storage, cpu), 1,
+ PAGE_KERNEL);
+
+ /*
+ * The Intel SDM says (Volume 3, 7.2.1):
+ *
+ * Avoid placing a page boundary in the part of the TSS that the
+ * processor reads during a task switch (the first 104 bytes). The
+ * processor may not correctly perform address translations if a
+ * boundary occurs in this area. During a task switch, the processor
+ * reads and writes into the first 104 bytes of each TSS (using
+ * contiguous physical addresses beginning with the physical address
+ * of the first byte of the TSS). So, after TSS access begins, if
+ * part of the 104 bytes is not physically contiguous, the processor
+ * will access incorrect information without generating a page-fault
+ * exception.
+ *
+ * There are also a lot of errata involving the TSS spanning a page
+ * boundary. Assert that we're not doing that.
+ */
+ BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^
+ offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK);
+ BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0);
+ cea_map_percpu_pages(&get_cpu_entry_area(cpu)->tss,
+ &per_cpu(cpu_tss_rw, cpu),
+ sizeof(struct tss_struct) / PAGE_SIZE, tss_prot);
+
+#ifdef CONFIG_X86_32
+ per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu);
+#endif
+
+#ifdef CONFIG_X86_64
+ BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0);
+ BUILD_BUG_ON(sizeof(exception_stacks) !=
+ sizeof(((struct cpu_entry_area *)0)->exception_stacks));
+ cea_map_percpu_pages(&get_cpu_entry_area(cpu)->exception_stacks,
+ &per_cpu(exception_stacks, cpu),
+ sizeof(exception_stacks) / PAGE_SIZE, PAGE_KERNEL);
+
+ cea_set_pte(&get_cpu_entry_area(cpu)->entry_trampoline,
+ __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX);
+#endif
+ percpu_setup_debug_store(cpu);
+}
+
+static __init void setup_cpu_entry_area_ptes(void)
+{
+#ifdef CONFIG_X86_32
+ unsigned long start, end;
+
+ BUILD_BUG_ON(CPU_ENTRY_AREA_PAGES * PAGE_SIZE < CPU_ENTRY_AREA_MAP_SIZE);
+ BUG_ON(CPU_ENTRY_AREA_BASE & ~PMD_MASK);
+
+ start = CPU_ENTRY_AREA_BASE;
+ end = start + CPU_ENTRY_AREA_MAP_SIZE;
+
+ /* Careful here: start + PMD_SIZE might wrap around */
+ for (; start < end && start >= CPU_ENTRY_AREA_BASE; start += PMD_SIZE)
+ populate_extra_pte(start);
+#endif
+}
+
+void __init setup_cpu_entry_areas(void)
+{
+ unsigned int cpu;
+
+ setup_cpu_entry_area_ptes();
+
+ for_each_possible_cpu(cpu)
+ setup_cpu_entry_area(cpu);
+}
diff --git a/arch/x86/mm/debug_pagetables.c b/arch/x86/mm/debug_pagetables.c
index bfcffdf6c577..421f2664ffa0 100644
--- a/arch/x86/mm/debug_pagetables.c
+++ b/arch/x86/mm/debug_pagetables.c
@@ -5,7 +5,7 @@
static int ptdump_show(struct seq_file *m, void *v)
{
- ptdump_walk_pgd_level(m, NULL);
+ ptdump_walk_pgd_level_debugfs(m, NULL, false);
return 0;
}
@@ -22,21 +22,89 @@ static const struct file_operations ptdump_fops = {
.release = single_release,
};
-static struct dentry *pe;
+static int ptdump_show_curknl(struct seq_file *m, void *v)
+{
+ if (current->mm->pgd) {
+ down_read(&current->mm->mmap_sem);
+ ptdump_walk_pgd_level_debugfs(m, current->mm->pgd, false);
+ up_read(&current->mm->mmap_sem);
+ }
+ return 0;
+}
+
+static int ptdump_open_curknl(struct inode *inode, struct file *filp)
+{
+ return single_open(filp, ptdump_show_curknl, NULL);
+}
+
+static const struct file_operations ptdump_curknl_fops = {
+ .owner = THIS_MODULE,
+ .open = ptdump_open_curknl,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+static struct dentry *pe_curusr;
+
+static int ptdump_show_curusr(struct seq_file *m, void *v)
+{
+ if (current->mm->pgd) {
+ down_read(&current->mm->mmap_sem);
+ ptdump_walk_pgd_level_debugfs(m, current->mm->pgd, true);
+ up_read(&current->mm->mmap_sem);
+ }
+ return 0;
+}
+
+static int ptdump_open_curusr(struct inode *inode, struct file *filp)
+{
+ return single_open(filp, ptdump_show_curusr, NULL);
+}
+
+static const struct file_operations ptdump_curusr_fops = {
+ .owner = THIS_MODULE,
+ .open = ptdump_open_curusr,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+#endif
+
+static struct dentry *dir, *pe_knl, *pe_curknl;
static int __init pt_dump_debug_init(void)
{
- pe = debugfs_create_file("kernel_page_tables", S_IRUSR, NULL, NULL,
- &ptdump_fops);
- if (!pe)
+ dir = debugfs_create_dir("page_tables", NULL);
+ if (!dir)
return -ENOMEM;
+ pe_knl = debugfs_create_file("kernel", 0400, dir, NULL,
+ &ptdump_fops);
+ if (!pe_knl)
+ goto err;
+
+ pe_curknl = debugfs_create_file("current_kernel", 0400,
+ dir, NULL, &ptdump_curknl_fops);
+ if (!pe_curknl)
+ goto err;
+
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ pe_curusr = debugfs_create_file("current_user", 0400,
+ dir, NULL, &ptdump_curusr_fops);
+ if (!pe_curusr)
+ goto err;
+#endif
return 0;
+err:
+ debugfs_remove_recursive(dir);
+ return -ENOMEM;
}
static void __exit pt_dump_debug_exit(void)
{
- debugfs_remove_recursive(pe);
+ debugfs_remove_recursive(dir);
}
module_init(pt_dump_debug_init);
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index 5e3ac6fe6c9e..f56902c1f04b 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -44,68 +44,97 @@ struct addr_marker {
unsigned long max_lines;
};
-/* indices for address_markers; keep sync'd w/ address_markers below */
+/* Address space markers hints */
+
+#ifdef CONFIG_X86_64
+
enum address_markers_idx {
USER_SPACE_NR = 0,
-#ifdef CONFIG_X86_64
KERNEL_SPACE_NR,
LOW_KERNEL_NR,
+#if defined(CONFIG_MODIFY_LDT_SYSCALL) && defined(CONFIG_X86_5LEVEL)
+ LDT_NR,
+#endif
VMALLOC_START_NR,
VMEMMAP_START_NR,
#ifdef CONFIG_KASAN
KASAN_SHADOW_START_NR,
KASAN_SHADOW_END_NR,
#endif
-# ifdef CONFIG_X86_ESPFIX64
+#if defined(CONFIG_MODIFY_LDT_SYSCALL) && !defined(CONFIG_X86_5LEVEL)
+ LDT_NR,
+#endif
+ CPU_ENTRY_AREA_NR,
+#ifdef CONFIG_X86_ESPFIX64
ESPFIX_START_NR,
-# endif
+#endif
+#ifdef CONFIG_EFI
+ EFI_END_NR,
+#endif
HIGH_KERNEL_NR,
MODULES_VADDR_NR,
MODULES_END_NR,
-#else
+ FIXADDR_START_NR,
+ END_OF_SPACE_NR,
+};
+
+static struct addr_marker address_markers[] = {
+ [USER_SPACE_NR] = { 0, "User Space" },
+ [KERNEL_SPACE_NR] = { (1UL << 63), "Kernel Space" },
+ [LOW_KERNEL_NR] = { 0UL, "Low Kernel Mapping" },
+ [VMALLOC_START_NR] = { 0UL, "vmalloc() Area" },
+ [VMEMMAP_START_NR] = { 0UL, "Vmemmap" },
+#ifdef CONFIG_KASAN
+ [KASAN_SHADOW_START_NR] = { KASAN_SHADOW_START, "KASAN shadow" },
+ [KASAN_SHADOW_END_NR] = { KASAN_SHADOW_END, "KASAN shadow end" },
+#endif
+#ifdef CONFIG_MODIFY_LDT_SYSCALL
+ [LDT_NR] = { LDT_BASE_ADDR, "LDT remap" },
+#endif
+ [CPU_ENTRY_AREA_NR] = { CPU_ENTRY_AREA_BASE,"CPU entry Area" },
+#ifdef CONFIG_X86_ESPFIX64
+ [ESPFIX_START_NR] = { ESPFIX_BASE_ADDR, "ESPfix Area", 16 },
+#endif
+#ifdef CONFIG_EFI
+ [EFI_END_NR] = { EFI_VA_END, "EFI Runtime Services" },
+#endif
+ [HIGH_KERNEL_NR] = { __START_KERNEL_map, "High Kernel Mapping" },
+ [MODULES_VADDR_NR] = { MODULES_VADDR, "Modules" },
+ [MODULES_END_NR] = { MODULES_END, "End Modules" },
+ [FIXADDR_START_NR] = { FIXADDR_START, "Fixmap Area" },
+ [END_OF_SPACE_NR] = { -1, NULL }
+};
+
+#else /* CONFIG_X86_64 */
+
+enum address_markers_idx {
+ USER_SPACE_NR = 0,
KERNEL_SPACE_NR,
VMALLOC_START_NR,
VMALLOC_END_NR,
-# ifdef CONFIG_HIGHMEM
+#ifdef CONFIG_HIGHMEM
PKMAP_BASE_NR,
-# endif
- FIXADDR_START_NR,
#endif
+ CPU_ENTRY_AREA_NR,
+ FIXADDR_START_NR,
+ END_OF_SPACE_NR,
};
-/* Address space markers hints */
static struct addr_marker address_markers[] = {
- { 0, "User Space" },
-#ifdef CONFIG_X86_64
- { 0x8000000000000000UL, "Kernel Space" },
- { 0/* PAGE_OFFSET */, "Low Kernel Mapping" },
- { 0/* VMALLOC_START */, "vmalloc() Area" },
- { 0/* VMEMMAP_START */, "Vmemmap" },
-#ifdef CONFIG_KASAN
- { KASAN_SHADOW_START, "KASAN shadow" },
- { KASAN_SHADOW_END, "KASAN shadow end" },
+ [USER_SPACE_NR] = { 0, "User Space" },
+ [KERNEL_SPACE_NR] = { PAGE_OFFSET, "Kernel Mapping" },
+ [VMALLOC_START_NR] = { 0UL, "vmalloc() Area" },
+ [VMALLOC_END_NR] = { 0UL, "vmalloc() End" },
+#ifdef CONFIG_HIGHMEM
+ [PKMAP_BASE_NR] = { 0UL, "Persistent kmap() Area" },
#endif
-# ifdef CONFIG_X86_ESPFIX64
- { ESPFIX_BASE_ADDR, "ESPfix Area", 16 },
-# endif
-# ifdef CONFIG_EFI
- { EFI_VA_END, "EFI Runtime Services" },
-# endif
- { __START_KERNEL_map, "High Kernel Mapping" },
- { MODULES_VADDR, "Modules" },
- { MODULES_END, "End Modules" },
-#else
- { PAGE_OFFSET, "Kernel Mapping" },
- { 0/* VMALLOC_START */, "vmalloc() Area" },
- { 0/*VMALLOC_END*/, "vmalloc() End" },
-# ifdef CONFIG_HIGHMEM
- { 0/*PKMAP_BASE*/, "Persistent kmap() Area" },
-# endif
- { 0/*FIXADDR_START*/, "Fixmap Area" },
-#endif
- { -1, NULL } /* End of list */
+ [CPU_ENTRY_AREA_NR] = { 0UL, "CPU entry area" },
+ [FIXADDR_START_NR] = { 0UL, "Fixmap area" },
+ [END_OF_SPACE_NR] = { -1, NULL }
};
+#endif /* !CONFIG_X86_64 */
+
/* Multipliers for offsets within the PTEs */
#define PTE_LEVEL_MULT (PAGE_SIZE)
#define PMD_LEVEL_MULT (PTRS_PER_PTE * PTE_LEVEL_MULT)
@@ -140,7 +169,7 @@ static void printk_prot(struct seq_file *m, pgprot_t prot, int level, bool dmsg)
static const char * const level_name[] =
{ "cr3", "pgd", "p4d", "pud", "pmd", "pte" };
- if (!pgprot_val(prot)) {
+ if (!(pr & _PAGE_PRESENT)) {
/* Not present */
pt_dump_cont_printf(m, dmsg, " ");
} else {
@@ -447,7 +476,7 @@ static inline bool is_hypervisor_range(int idx)
}
static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
- bool checkwx)
+ bool checkwx, bool dmesg)
{
#ifdef CONFIG_X86_64
pgd_t *start = (pgd_t *) &init_top_pgt;
@@ -460,7 +489,7 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
if (pgd) {
start = pgd;
- st.to_dmesg = true;
+ st.to_dmesg = dmesg;
}
st.check_wx = checkwx;
@@ -498,13 +527,37 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd)
{
- ptdump_walk_pgd_level_core(m, pgd, false);
+ ptdump_walk_pgd_level_core(m, pgd, false, true);
+}
+
+void ptdump_walk_pgd_level_debugfs(struct seq_file *m, pgd_t *pgd, bool user)
+{
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ if (user && static_cpu_has(X86_FEATURE_PTI))
+ pgd = kernel_to_user_pgdp(pgd);
+#endif
+ ptdump_walk_pgd_level_core(m, pgd, false, false);
+}
+EXPORT_SYMBOL_GPL(ptdump_walk_pgd_level_debugfs);
+
+static void ptdump_walk_user_pgd_level_checkwx(void)
+{
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ pgd_t *pgd = (pgd_t *) &init_top_pgt;
+
+ if (!static_cpu_has(X86_FEATURE_PTI))
+ return;
+
+ pr_info("x86/mm: Checking user space page tables\n");
+ pgd = kernel_to_user_pgdp(pgd);
+ ptdump_walk_pgd_level_core(NULL, pgd, true, false);
+#endif
}
-EXPORT_SYMBOL_GPL(ptdump_walk_pgd_level);
void ptdump_walk_pgd_level_checkwx(void)
{
- ptdump_walk_pgd_level_core(NULL, NULL, true);
+ ptdump_walk_pgd_level_core(NULL, NULL, true, false);
+ ptdump_walk_user_pgd_level_checkwx();
}
static int __init pt_dump_init(void)
@@ -525,8 +578,8 @@ static int __init pt_dump_init(void)
address_markers[PKMAP_BASE_NR].start_address = PKMAP_BASE;
# endif
address_markers[FIXADDR_START_NR].start_address = FIXADDR_START;
+ address_markers[CPU_ENTRY_AREA_NR].start_address = CPU_ENTRY_AREA_BASE;
#endif
-
return 0;
}
__initcall(pt_dump_init);
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index 3321b446b66c..9fe656c42aa5 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -1,6 +1,7 @@
#include <linux/extable.h>
#include <linux/uaccess.h>
#include <linux/sched/debug.h>
+#include <xen/xen.h>
#include <asm/fpu/internal.h>
#include <asm/traps.h>
@@ -82,7 +83,7 @@ bool ex_handler_refcount(const struct exception_table_entry *fixup,
return true;
}
-EXPORT_SYMBOL_GPL(ex_handler_refcount);
+EXPORT_SYMBOL(ex_handler_refcount);
/*
* Handler for when we fail to restore a task's FPU state. We should never get
@@ -212,8 +213,9 @@ void __init early_fixup_exception(struct pt_regs *regs, int trapnr)
* Old CPUs leave the high bits of CS on the stack
* undefined. I'm not sure which CPUs do this, but at least
* the 486 DX works this way.
+ * Xen pv domains are not using the default __KERNEL_CS.
*/
- if (regs->cs != __KERNEL_CS)
+ if (!xen_pv_domain() && regs->cs != __KERNEL_CS)
goto fail;
/*
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 78ca9a8ee454..06fe3d51d385 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -701,7 +701,7 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code,
else
printk(KERN_CONT "paging request");
- printk(KERN_CONT " at %p\n", (void *) address);
+ printk(KERN_CONT " at %px\n", (void *) address);
printk(KERN_ALERT "IP: %pS\n", (void *)regs->ip);
dump_pagetable(address);
@@ -860,7 +860,7 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code,
if (!printk_ratelimit())
return;
- printk("%s%s[%d]: segfault at %lx ip %p sp %p error %lx",
+ printk("%s%s[%d]: segfault at %lx ip %px sp %px error %lx",
task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
tsk->comm, task_pid_nr(tsk), address,
(void *)regs->ip, (void *)regs->sp, error_code);
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 6fdf91ef130a..8ca324d07282 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -20,6 +20,7 @@
#include <asm/kaslr.h>
#include <asm/hypervisor.h>
#include <asm/cpufeature.h>
+#include <asm/pti.h>
/*
* We need to define the tracepoints somewhere, and tlb.c
@@ -160,6 +161,12 @@ struct map_range {
static int page_size_mask;
+static void enable_global_pages(void)
+{
+ if (!static_cpu_has(X86_FEATURE_PTI))
+ __supported_pte_mask |= _PAGE_GLOBAL;
+}
+
static void __init probe_page_size_mask(void)
{
/*
@@ -177,11 +184,11 @@ static void __init probe_page_size_mask(void)
cr4_set_bits_and_update_boot(X86_CR4_PSE);
/* Enable PGE if available */
+ __supported_pte_mask &= ~_PAGE_GLOBAL;
if (boot_cpu_has(X86_FEATURE_PGE)) {
cr4_set_bits_and_update_boot(X86_CR4_PGE);
- __supported_pte_mask |= _PAGE_GLOBAL;
- } else
- __supported_pte_mask &= ~_PAGE_GLOBAL;
+ enable_global_pages();
+ }
/* Enable 1 GB linear kernel mappings if available: */
if (direct_gbpages && boot_cpu_has(X86_FEATURE_GBPAGES)) {
@@ -194,34 +201,44 @@ static void __init probe_page_size_mask(void)
static void setup_pcid(void)
{
-#ifdef CONFIG_X86_64
- if (boot_cpu_has(X86_FEATURE_PCID)) {
- if (boot_cpu_has(X86_FEATURE_PGE)) {
- /*
- * This can't be cr4_set_bits_and_update_boot() --
- * the trampoline code can't handle CR4.PCIDE and
- * it wouldn't do any good anyway. Despite the name,
- * cr4_set_bits_and_update_boot() doesn't actually
- * cause the bits in question to remain set all the
- * way through the secondary boot asm.
- *
- * Instead, we brute-force it and set CR4.PCIDE
- * manually in start_secondary().
- */
- cr4_set_bits(X86_CR4_PCIDE);
- } else {
- /*
- * flush_tlb_all(), as currently implemented, won't
- * work if PCID is on but PGE is not. Since that
- * combination doesn't exist on real hardware, there's
- * no reason to try to fully support it, but it's
- * polite to avoid corrupting data if we're on
- * an improperly configured VM.
- */
- setup_clear_cpu_cap(X86_FEATURE_PCID);
- }
+ if (!IS_ENABLED(CONFIG_X86_64))
+ return;
+
+ if (!boot_cpu_has(X86_FEATURE_PCID))
+ return;
+
+ if (boot_cpu_has(X86_FEATURE_PGE)) {
+ /*
+ * This can't be cr4_set_bits_and_update_boot() -- the
+ * trampoline code can't handle CR4.PCIDE and it wouldn't
+ * do any good anyway. Despite the name,
+ * cr4_set_bits_and_update_boot() doesn't actually cause
+ * the bits in question to remain set all the way through
+ * the secondary boot asm.
+ *
+ * Instead, we brute-force it and set CR4.PCIDE manually in
+ * start_secondary().
+ */
+ cr4_set_bits(X86_CR4_PCIDE);
+
+ /*
+ * INVPCID's single-context modes (2/3) only work if we set
+ * X86_CR4_PCIDE, *and* we INVPCID support. It's unusable
+ * on systems that have X86_CR4_PCIDE clear, or that have
+ * no INVPCID support at all.
+ */
+ if (boot_cpu_has(X86_FEATURE_INVPCID))
+ setup_force_cpu_cap(X86_FEATURE_INVPCID_SINGLE);
+ } else {
+ /*
+ * flush_tlb_all(), as currently implemented, won't work if
+ * PCID is on but PGE is not. Since that combination
+ * doesn't exist on real hardware, there's no reason to try
+ * to fully support it, but it's polite to avoid corrupting
+ * data if we're on an improperly configured VM.
+ */
+ setup_clear_cpu_cap(X86_FEATURE_PCID);
}
-#endif
}
#ifdef CONFIG_X86_32
@@ -622,6 +639,7 @@ void __init init_mem_mapping(void)
{
unsigned long end;
+ pti_check_boottime_disable();
probe_page_size_mask();
setup_pcid();
@@ -845,7 +863,7 @@ void __init zone_sizes_init(void)
free_area_init_nodes(max_zone_pfns);
}
-DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = {
+__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = {
.loaded_mm = &init_mm,
.next_asid = 1,
.cr4 = ~0UL, /* fail hard if we screw up cr4 shadow initialization */
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 8a64a6f2848d..135c9a7898c7 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -50,6 +50,7 @@
#include <asm/setup.h>
#include <asm/set_memory.h>
#include <asm/page_types.h>
+#include <asm/cpu_entry_area.h>
#include <asm/init.h>
#include "mm_internal.h"
@@ -766,6 +767,7 @@ void __init mem_init(void)
mem_init_print_info(NULL);
printk(KERN_INFO "virtual kernel memory layout:\n"
" fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
+ " cpu_entry : 0x%08lx - 0x%08lx (%4ld kB)\n"
#ifdef CONFIG_HIGHMEM
" pkmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
#endif
@@ -777,6 +779,10 @@ void __init mem_init(void)
FIXADDR_START, FIXADDR_TOP,
(FIXADDR_TOP - FIXADDR_START) >> 10,
+ CPU_ENTRY_AREA_BASE,
+ CPU_ENTRY_AREA_BASE + CPU_ENTRY_AREA_MAP_SIZE,
+ CPU_ENTRY_AREA_MAP_SIZE >> 10,
+
#ifdef CONFIG_HIGHMEM
PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE,
(LAST_PKMAP*PAGE_SIZE) >> 10,
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 6e4573b1da34..c45b6ec5357b 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -404,11 +404,11 @@ void iounmap(volatile void __iomem *addr)
return;
}
+ mmiotrace_iounmap(addr);
+
addr = (volatile void __iomem *)
(PAGE_MASK & (unsigned long __force)addr);
- mmiotrace_iounmap(addr);
-
/* Use the vm area unlocked, assuming the caller
ensures there isn't another iounmap for the same address
in parallel. Reuse of the virtual address is prevented by
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index 99dfed6dfef8..47388f0c0e59 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -15,6 +15,7 @@
#include <asm/tlbflush.h>
#include <asm/sections.h>
#include <asm/pgtable.h>
+#include <asm/cpu_entry_area.h>
extern struct range pfn_mapped[E820_MAX_ENTRIES];
@@ -277,6 +278,7 @@ void __init kasan_early_init(void)
void __init kasan_init(void)
{
int i;
+ void *shadow_cpu_entry_begin, *shadow_cpu_entry_end;
#ifdef CONFIG_KASAN_INLINE
register_die_notifier(&kasan_die_notifier);
@@ -321,16 +323,33 @@ void __init kasan_init(void)
map_range(&pfn_mapped[i]);
}
+ shadow_cpu_entry_begin = (void *)CPU_ENTRY_AREA_BASE;
+ shadow_cpu_entry_begin = kasan_mem_to_shadow(shadow_cpu_entry_begin);
+ shadow_cpu_entry_begin = (void *)round_down((unsigned long)shadow_cpu_entry_begin,
+ PAGE_SIZE);
+
+ shadow_cpu_entry_end = (void *)(CPU_ENTRY_AREA_BASE +
+ CPU_ENTRY_AREA_MAP_SIZE);
+ shadow_cpu_entry_end = kasan_mem_to_shadow(shadow_cpu_entry_end);
+ shadow_cpu_entry_end = (void *)round_up((unsigned long)shadow_cpu_entry_end,
+ PAGE_SIZE);
+
kasan_populate_zero_shadow(
kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM),
- kasan_mem_to_shadow((void *)__START_KERNEL_map));
+ shadow_cpu_entry_begin);
+
+ kasan_populate_shadow((unsigned long)shadow_cpu_entry_begin,
+ (unsigned long)shadow_cpu_entry_end, 0);
+
+ kasan_populate_zero_shadow(shadow_cpu_entry_end,
+ kasan_mem_to_shadow((void *)__START_KERNEL_map));
kasan_populate_shadow((unsigned long)kasan_mem_to_shadow(_stext),
(unsigned long)kasan_mem_to_shadow(_end),
early_pfn_to_nid(__pa(_stext)));
kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END),
- (void *)KASAN_SHADOW_END);
+ (void *)KASAN_SHADOW_END);
load_cr3(init_top_pgt);
__flush_tlb_all();
diff --git a/arch/x86/mm/kmemcheck/error.c b/arch/x86/mm/kmemcheck/error.c
deleted file mode 100644
index cec594032515..000000000000
--- a/arch/x86/mm/kmemcheck/error.c
+++ /dev/null
@@ -1 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
diff --git a/arch/x86/mm/kmemcheck/error.h b/arch/x86/mm/kmemcheck/error.h
deleted file mode 100644
index ea32a7d3cf1b..000000000000
--- a/arch/x86/mm/kmemcheck/error.h
+++ /dev/null
@@ -1 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
diff --git a/arch/x86/mm/kmemcheck/opcode.c b/arch/x86/mm/kmemcheck/opcode.c
deleted file mode 100644
index cec594032515..000000000000
--- a/arch/x86/mm/kmemcheck/opcode.c
+++ /dev/null
@@ -1 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
diff --git a/arch/x86/mm/kmemcheck/opcode.h b/arch/x86/mm/kmemcheck/opcode.h
deleted file mode 100644
index ea32a7d3cf1b..000000000000
--- a/arch/x86/mm/kmemcheck/opcode.h
+++ /dev/null
@@ -1 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
diff --git a/arch/x86/mm/kmemcheck/pte.c b/arch/x86/mm/kmemcheck/pte.c
deleted file mode 100644
index cec594032515..000000000000
--- a/arch/x86/mm/kmemcheck/pte.c
+++ /dev/null
@@ -1 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
diff --git a/arch/x86/mm/kmemcheck/pte.h b/arch/x86/mm/kmemcheck/pte.h
deleted file mode 100644
index ea32a7d3cf1b..000000000000
--- a/arch/x86/mm/kmemcheck/pte.h
+++ /dev/null
@@ -1 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
diff --git a/arch/x86/mm/kmemcheck/selftest.c b/arch/x86/mm/kmemcheck/selftest.c
deleted file mode 100644
index cec594032515..000000000000
--- a/arch/x86/mm/kmemcheck/selftest.c
+++ /dev/null
@@ -1 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
diff --git a/arch/x86/mm/kmemcheck/selftest.h b/arch/x86/mm/kmemcheck/selftest.h
deleted file mode 100644
index ea32a7d3cf1b..000000000000
--- a/arch/x86/mm/kmemcheck/selftest.h
+++ /dev/null
@@ -1 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
diff --git a/arch/x86/mm/kmemcheck/shadow.h b/arch/x86/mm/kmemcheck/shadow.h
deleted file mode 100644
index ea32a7d3cf1b..000000000000
--- a/arch/x86/mm/kmemcheck/shadow.h
+++ /dev/null
@@ -1 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c
index c21c2ed04612..58477ec3d66d 100644
--- a/arch/x86/mm/kmmio.c
+++ b/arch/x86/mm/kmmio.c
@@ -435,17 +435,18 @@ int register_kmmio_probe(struct kmmio_probe *p)
unsigned long flags;
int ret = 0;
unsigned long size = 0;
+ unsigned long addr = p->addr & PAGE_MASK;
const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK);
unsigned int l;
pte_t *pte;
spin_lock_irqsave(&kmmio_lock, flags);
- if (get_kmmio_probe(p->addr)) {
+ if (get_kmmio_probe(addr)) {
ret = -EEXIST;
goto out;
}
- pte = lookup_address(p->addr, &l);
+ pte = lookup_address(addr, &l);
if (!pte) {
ret = -EINVAL;
goto out;
@@ -454,7 +455,7 @@ int register_kmmio_probe(struct kmmio_probe *p)
kmmio_count++;
list_add_rcu(&p->list, &kmmio_probes);
while (size < size_lim) {
- if (add_kmmio_fault_page(p->addr + size))
+ if (add_kmmio_fault_page(addr + size))
pr_err("Unable to set page fault.\n");
size += page_level_size(l);
}
@@ -528,19 +529,20 @@ void unregister_kmmio_probe(struct kmmio_probe *p)
{
unsigned long flags;
unsigned long size = 0;
+ unsigned long addr = p->addr & PAGE_MASK;
const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK);
struct kmmio_fault_page *release_list = NULL;
struct kmmio_delayed_release *drelease;
unsigned int l;
pte_t *pte;
- pte = lookup_address(p->addr, &l);
+ pte = lookup_address(addr, &l);
if (!pte)
return;
spin_lock_irqsave(&kmmio_lock, flags);
while (size < size_lim) {
- release_kmmio_fault_page(p->addr + size, &release_list);
+ release_kmmio_fault_page(addr + size, &release_list);
size += page_level_size(l);
}
list_del_rcu(&p->list);
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index d9a9e9fc75dd..391b13402e40 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -405,13 +405,13 @@ bool sme_active(void)
{
return sme_me_mask && !sev_enabled;
}
-EXPORT_SYMBOL_GPL(sme_active);
+EXPORT_SYMBOL(sme_active);
bool sev_active(void)
{
return sme_me_mask && sev_enabled;
}
-EXPORT_SYMBOL_GPL(sev_active);
+EXPORT_SYMBOL(sev_active);
static const struct dma_map_ops sev_dma_ops = {
.alloc = sev_alloc,
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 96d456a94b03..004abf9ebf12 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -355,14 +355,15 @@ static inline void _pgd_free(pgd_t *pgd)
kmem_cache_free(pgd_cache, pgd);
}
#else
+
static inline pgd_t *_pgd_alloc(void)
{
- return (pgd_t *)__get_free_page(PGALLOC_GFP);
+ return (pgd_t *)__get_free_pages(PGALLOC_GFP, PGD_ALLOCATION_ORDER);
}
static inline void _pgd_free(pgd_t *pgd)
{
- free_page((unsigned long)pgd);
+ free_pages((unsigned long)pgd, PGD_ALLOCATION_ORDER);
}
#endif /* CONFIG_X86_PAE */
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index 6b9bf023a700..c3c5274410a9 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -10,6 +10,7 @@
#include <linux/pagemap.h>
#include <linux/spinlock.h>
+#include <asm/cpu_entry_area.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
#include <asm/fixmap.h>
diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
new file mode 100644
index 000000000000..bce8aea65606
--- /dev/null
+++ b/arch/x86/mm/pti.c
@@ -0,0 +1,387 @@
+/*
+ * Copyright(c) 2017 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * This code is based in part on work published here:
+ *
+ * https://github.com/IAIK/KAISER
+ *
+ * The original work was written by and and signed off by for the Linux
+ * kernel by:
+ *
+ * Signed-off-by: Richard Fellner <richard.fellner@student.tugraz.at>
+ * Signed-off-by: Moritz Lipp <moritz.lipp@iaik.tugraz.at>
+ * Signed-off-by: Daniel Gruss <daniel.gruss@iaik.tugraz.at>
+ * Signed-off-by: Michael Schwarz <michael.schwarz@iaik.tugraz.at>
+ *
+ * Major changes to the original code by: Dave Hansen <dave.hansen@intel.com>
+ * Mostly rewritten by Thomas Gleixner <tglx@linutronix.de> and
+ * Andy Lutomirsky <luto@amacapital.net>
+ */
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/bug.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/mm.h>
+#include <linux/uaccess.h>
+
+#include <asm/cpufeature.h>
+#include <asm/hypervisor.h>
+#include <asm/vsyscall.h>
+#include <asm/cmdline.h>
+#include <asm/pti.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/tlbflush.h>
+#include <asm/desc.h>
+
+#undef pr_fmt
+#define pr_fmt(fmt) "Kernel/User page tables isolation: " fmt
+
+/* Backporting helper */
+#ifndef __GFP_NOTRACK
+#define __GFP_NOTRACK 0
+#endif
+
+static void __init pti_print_if_insecure(const char *reason)
+{
+ if (boot_cpu_has_bug(X86_BUG_CPU_INSECURE))
+ pr_info("%s\n", reason);
+}
+
+static void __init pti_print_if_secure(const char *reason)
+{
+ if (!boot_cpu_has_bug(X86_BUG_CPU_INSECURE))
+ pr_info("%s\n", reason);
+}
+
+void __init pti_check_boottime_disable(void)
+{
+ char arg[5];
+ int ret;
+
+ if (hypervisor_is_type(X86_HYPER_XEN_PV)) {
+ pti_print_if_insecure("disabled on XEN PV.");
+ return;
+ }
+
+ ret = cmdline_find_option(boot_command_line, "pti", arg, sizeof(arg));
+ if (ret > 0) {
+ if (ret == 3 && !strncmp(arg, "off", 3)) {
+ pti_print_if_insecure("disabled on command line.");
+ return;
+ }
+ if (ret == 2 && !strncmp(arg, "on", 2)) {
+ pti_print_if_secure("force enabled on command line.");
+ goto enable;
+ }
+ if (ret == 4 && !strncmp(arg, "auto", 4))
+ goto autosel;
+ }
+
+ if (cmdline_find_option_bool(boot_command_line, "nopti")) {
+ pti_print_if_insecure("disabled on command line.");
+ return;
+ }
+
+autosel:
+ if (!boot_cpu_has_bug(X86_BUG_CPU_INSECURE))
+ return;
+enable:
+ setup_force_cpu_cap(X86_FEATURE_PTI);
+}
+
+pgd_t __pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd)
+{
+ /*
+ * Changes to the high (kernel) portion of the kernelmode page
+ * tables are not automatically propagated to the usermode tables.
+ *
+ * Users should keep in mind that, unlike the kernelmode tables,
+ * there is no vmalloc_fault equivalent for the usermode tables.
+ * Top-level entries added to init_mm's usermode pgd after boot
+ * will not be automatically propagated to other mms.
+ */
+ if (!pgdp_maps_userspace(pgdp))
+ return pgd;
+
+ /*
+ * The user page tables get the full PGD, accessible from
+ * userspace:
+ */
+ kernel_to_user_pgdp(pgdp)->pgd = pgd.pgd;
+
+ /*
+ * If this is normal user memory, make it NX in the kernel
+ * pagetables so that, if we somehow screw up and return to
+ * usermode with the kernel CR3 loaded, we'll get a page fault
+ * instead of allowing user code to execute with the wrong CR3.
+ *
+ * As exceptions, we don't set NX if:
+ * - _PAGE_USER is not set. This could be an executable
+ * EFI runtime mapping or something similar, and the kernel
+ * may execute from it
+ * - we don't have NX support
+ * - we're clearing the PGD (i.e. the new pgd is not present).
+ */
+ if ((pgd.pgd & (_PAGE_USER|_PAGE_PRESENT)) == (_PAGE_USER|_PAGE_PRESENT) &&
+ (__supported_pte_mask & _PAGE_NX))
+ pgd.pgd |= _PAGE_NX;
+
+ /* return the copy of the PGD we want the kernel to use: */
+ return pgd;
+}
+
+/*
+ * Walk the user copy of the page tables (optionally) trying to allocate
+ * page table pages on the way down.
+ *
+ * Returns a pointer to a P4D on success, or NULL on failure.
+ */
+static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
+{
+ pgd_t *pgd = kernel_to_user_pgdp(pgd_offset_k(address));
+ gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
+
+ if (address < PAGE_OFFSET) {
+ WARN_ONCE(1, "attempt to walk user address\n");
+ return NULL;
+ }
+
+ if (pgd_none(*pgd)) {
+ unsigned long new_p4d_page = __get_free_page(gfp);
+ if (!new_p4d_page)
+ return NULL;
+
+ if (pgd_none(*pgd)) {
+ set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page)));
+ new_p4d_page = 0;
+ }
+ if (new_p4d_page)
+ free_page(new_p4d_page);
+ }
+ BUILD_BUG_ON(pgd_large(*pgd) != 0);
+
+ return p4d_offset(pgd, address);
+}
+
+/*
+ * Walk the user copy of the page tables (optionally) trying to allocate
+ * page table pages on the way down.
+ *
+ * Returns a pointer to a PMD on success, or NULL on failure.
+ */
+static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
+{
+ gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
+ p4d_t *p4d = pti_user_pagetable_walk_p4d(address);
+ pud_t *pud;
+
+ BUILD_BUG_ON(p4d_large(*p4d) != 0);
+ if (p4d_none(*p4d)) {
+ unsigned long new_pud_page = __get_free_page(gfp);
+ if (!new_pud_page)
+ return NULL;
+
+ if (p4d_none(*p4d)) {
+ set_p4d(p4d, __p4d(_KERNPG_TABLE | __pa(new_pud_page)));
+ new_pud_page = 0;
+ }
+ if (new_pud_page)
+ free_page(new_pud_page);
+ }
+
+ pud = pud_offset(p4d, address);
+ /* The user page tables do not use large mappings: */
+ if (pud_large(*pud)) {
+ WARN_ON(1);
+ return NULL;
+ }
+ if (pud_none(*pud)) {
+ unsigned long new_pmd_page = __get_free_page(gfp);
+ if (!new_pmd_page)
+ return NULL;
+
+ if (pud_none(*pud)) {
+ set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page)));
+ new_pmd_page = 0;
+ }
+ if (new_pmd_page)
+ free_page(new_pmd_page);
+ }
+
+ return pmd_offset(pud, address);
+}
+
+#ifdef CONFIG_X86_VSYSCALL_EMULATION
+/*
+ * Walk the shadow copy of the page tables (optionally) trying to allocate
+ * page table pages on the way down. Does not support large pages.
+ *
+ * Note: this is only used when mapping *new* kernel data into the
+ * user/shadow page tables. It is never used for userspace data.
+ *
+ * Returns a pointer to a PTE on success, or NULL on failure.
+ */
+static __init pte_t *pti_user_pagetable_walk_pte(unsigned long address)
+{
+ gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
+ pmd_t *pmd = pti_user_pagetable_walk_pmd(address);
+ pte_t *pte;
+
+ /* We can't do anything sensible if we hit a large mapping. */
+ if (pmd_large(*pmd)) {
+ WARN_ON(1);
+ return NULL;
+ }
+
+ if (pmd_none(*pmd)) {
+ unsigned long new_pte_page = __get_free_page(gfp);
+ if (!new_pte_page)
+ return NULL;
+
+ if (pmd_none(*pmd)) {
+ set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page)));
+ new_pte_page = 0;
+ }
+ if (new_pte_page)
+ free_page(new_pte_page);
+ }
+
+ pte = pte_offset_kernel(pmd, address);
+ if (pte_flags(*pte) & _PAGE_USER) {
+ WARN_ONCE(1, "attempt to walk to user pte\n");
+ return NULL;
+ }
+ return pte;
+}
+
+static void __init pti_setup_vsyscall(void)
+{
+ pte_t *pte, *target_pte;
+ unsigned int level;
+
+ pte = lookup_address(VSYSCALL_ADDR, &level);
+ if (!pte || WARN_ON(level != PG_LEVEL_4K) || pte_none(*pte))
+ return;
+
+ target_pte = pti_user_pagetable_walk_pte(VSYSCALL_ADDR);
+ if (WARN_ON(!target_pte))
+ return;
+
+ *target_pte = *pte;
+ set_vsyscall_pgtable_user_bits(kernel_to_user_pgdp(swapper_pg_dir));
+}
+#else
+static void __init pti_setup_vsyscall(void) { }
+#endif
+
+static void __init
+pti_clone_pmds(unsigned long start, unsigned long end, pmdval_t clear)
+{
+ unsigned long addr;
+
+ /*
+ * Clone the populated PMDs which cover start to end. These PMD areas
+ * can have holes.
+ */
+ for (addr = start; addr < end; addr += PMD_SIZE) {
+ pmd_t *pmd, *target_pmd;
+ pgd_t *pgd;
+ p4d_t *p4d;
+ pud_t *pud;
+
+ pgd = pgd_offset_k(addr);
+ if (WARN_ON(pgd_none(*pgd)))
+ return;
+ p4d = p4d_offset(pgd, addr);
+ if (WARN_ON(p4d_none(*p4d)))
+ return;
+ pud = pud_offset(p4d, addr);
+ if (pud_none(*pud))
+ continue;
+ pmd = pmd_offset(pud, addr);
+ if (pmd_none(*pmd))
+ continue;
+
+ target_pmd = pti_user_pagetable_walk_pmd(addr);
+ if (WARN_ON(!target_pmd))
+ return;
+
+ /*
+ * Copy the PMD. That is, the kernelmode and usermode
+ * tables will share the last-level page tables of this
+ * address range
+ */
+ *target_pmd = pmd_clear_flags(*pmd, clear);
+ }
+}
+
+/*
+ * Clone a single p4d (i.e. a top-level entry on 4-level systems and a
+ * next-level entry on 5-level systems.
+ */
+static void __init pti_clone_p4d(unsigned long addr)
+{
+ p4d_t *kernel_p4d, *user_p4d;
+ pgd_t *kernel_pgd;
+
+ user_p4d = pti_user_pagetable_walk_p4d(addr);
+ kernel_pgd = pgd_offset_k(addr);
+ kernel_p4d = p4d_offset(kernel_pgd, addr);
+ *user_p4d = *kernel_p4d;
+}
+
+/*
+ * Clone the CPU_ENTRY_AREA into the user space visible page table.
+ */
+static void __init pti_clone_user_shared(void)
+{
+ pti_clone_p4d(CPU_ENTRY_AREA_BASE);
+}
+
+/*
+ * Clone the ESPFIX P4D into the user space visinble page table
+ */
+static void __init pti_setup_espfix64(void)
+{
+#ifdef CONFIG_X86_ESPFIX64
+ pti_clone_p4d(ESPFIX_BASE_ADDR);
+#endif
+}
+
+/*
+ * Clone the populated PMDs of the entry and irqentry text and force it RO.
+ */
+static void __init pti_clone_entry_text(void)
+{
+ pti_clone_pmds((unsigned long) __entry_text_start,
+ (unsigned long) __irqentry_text_end, _PAGE_RW);
+}
+
+/*
+ * Initialize kernel page table isolation
+ */
+void __init pti_init(void)
+{
+ if (!static_cpu_has(X86_FEATURE_PTI))
+ return;
+
+ pr_info("enabled\n");
+
+ pti_clone_user_shared();
+ pti_clone_entry_text();
+ pti_setup_espfix64();
+ pti_setup_vsyscall();
+}
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 3118392cdf75..a1561957dccb 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -28,6 +28,38 @@
* Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi
*/
+/*
+ * We get here when we do something requiring a TLB invalidation
+ * but could not go invalidate all of the contexts. We do the
+ * necessary invalidation by clearing out the 'ctx_id' which
+ * forces a TLB flush when the context is loaded.
+ */
+void clear_asid_other(void)
+{
+ u16 asid;
+
+ /*
+ * This is only expected to be set if we have disabled
+ * kernel _PAGE_GLOBAL pages.
+ */
+ if (!static_cpu_has(X86_FEATURE_PTI)) {
+ WARN_ON_ONCE(1);
+ return;
+ }
+
+ for (asid = 0; asid < TLB_NR_DYN_ASIDS; asid++) {
+ /* Do not need to flush the current asid */
+ if (asid == this_cpu_read(cpu_tlbstate.loaded_mm_asid))
+ continue;
+ /*
+ * Make sure the next time we go to switch to
+ * this asid, we do a flush:
+ */
+ this_cpu_write(cpu_tlbstate.ctxs[asid].ctx_id, 0);
+ }
+ this_cpu_write(cpu_tlbstate.invalidate_other, false);
+}
+
atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1);
@@ -42,6 +74,9 @@ static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,
return;
}
+ if (this_cpu_read(cpu_tlbstate.invalidate_other))
+ clear_asid_other();
+
for (asid = 0; asid < TLB_NR_DYN_ASIDS; asid++) {
if (this_cpu_read(cpu_tlbstate.ctxs[asid].ctx_id) !=
next->context.ctx_id)
@@ -65,6 +100,25 @@ static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,
*need_flush = true;
}
+static void load_new_mm_cr3(pgd_t *pgdir, u16 new_asid, bool need_flush)
+{
+ unsigned long new_mm_cr3;
+
+ if (need_flush) {
+ invalidate_user_asid(new_asid);
+ new_mm_cr3 = build_cr3(pgdir, new_asid);
+ } else {
+ new_mm_cr3 = build_cr3_noflush(pgdir, new_asid);
+ }
+
+ /*
+ * Caution: many callers of this function expect
+ * that load_cr3() is serializing and orders TLB
+ * fills with respect to the mm_cpumask writes.
+ */
+ write_cr3(new_mm_cr3);
+}
+
void leave_mm(int cpu)
{
struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
@@ -128,7 +182,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
* isn't free.
*/
#ifdef CONFIG_DEBUG_VM
- if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev, prev_asid))) {
+ if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev->pgd, prev_asid))) {
/*
* If we were to BUG here, we'd be very likely to kill
* the system so hard that we don't see the call trace.
@@ -195,7 +249,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
if (need_flush) {
this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
- write_cr3(build_cr3(next, new_asid));
+ load_new_mm_cr3(next->pgd, new_asid, true);
/*
* NB: This gets called via leave_mm() in the idle path
@@ -208,7 +262,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
} else {
/* The new ASID is already up to date. */
- write_cr3(build_cr3_noflush(next, new_asid));
+ load_new_mm_cr3(next->pgd, new_asid, false);
/* See above wrt _rcuidle. */
trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0);
@@ -288,7 +342,7 @@ void initialize_tlbstate_and_flush(void)
!(cr4_read_shadow() & X86_CR4_PCIDE));
/* Force ASID 0 and force a TLB flush. */
- write_cr3(build_cr3(mm, 0));
+ write_cr3(build_cr3(mm->pgd, 0));
/* Reinitialize tlbstate. */
this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0);
@@ -551,7 +605,7 @@ static void do_kernel_range_flush(void *info)
/* flush range by one by one 'invlpg' */
for (addr = f->start; addr < f->end; addr += PAGE_SIZE)
- __flush_tlb_single(addr);
+ __flush_tlb_one(addr);
}
void flush_tlb_kernel_range(unsigned long start, unsigned long end)
diff --git a/arch/x86/pci/broadcom_bus.c b/arch/x86/pci/broadcom_bus.c
index bb461cfd01ab..526536c81ddc 100644
--- a/arch/x86/pci/broadcom_bus.c
+++ b/arch/x86/pci/broadcom_bus.c
@@ -97,7 +97,7 @@ static int __init broadcom_postcore_init(void)
* We should get host bridge information from ACPI unless the BIOS
* doesn't support it.
*/
- if (acpi_os_get_root_pointer())
+ if (!acpi_disabled && acpi_os_get_root_pointer())
return 0;
#endif
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index 1e996df687a3..e663d6bf1328 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -665,6 +665,16 @@ static void pci_amd_enable_64bit_bar(struct pci_dev *dev)
unsigned i;
u32 base, limit, high;
struct resource *res, *conflict;
+ struct pci_dev *other;
+
+ /* Check that we are the only device of that type */
+ other = pci_get_device(dev->vendor, dev->device, NULL);
+ if (other != dev ||
+ (other = pci_get_device(dev->vendor, dev->device, other))) {
+ /* This is a multi-socket system, don't touch it for now */
+ pci_dev_put(other);
+ return;
+ }
for (i = 0; i < 8; i++) {
pci_read_config_dword(dev, AMD_141b_MMIO_BASE(i), &base);
@@ -696,8 +706,13 @@ static void pci_amd_enable_64bit_bar(struct pci_dev *dev)
res->end = 0xfd00000000ull - 1;
/* Just grab the free area behind system memory for this */
- while ((conflict = request_resource_conflict(&iomem_resource, res)))
+ while ((conflict = request_resource_conflict(&iomem_resource, res))) {
+ if (conflict->end >= res->end) {
+ kfree(res);
+ return;
+ }
res->start = conflict->end + 1;
+ }
dev_info(&dev->dev, "adding root bus resource %pR\n", res);
@@ -714,10 +729,10 @@ static void pci_amd_enable_64bit_bar(struct pci_dev *dev)
pci_bus_add_resource(dev->bus, res, 0);
}
-DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMD, 0x1401, pci_amd_enable_64bit_bar);
-DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMD, 0x141b, pci_amd_enable_64bit_bar);
-DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMD, 0x1571, pci_amd_enable_64bit_bar);
-DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMD, 0x15b1, pci_amd_enable_64bit_bar);
-DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMD, 0x1601, pci_amd_enable_64bit_bar);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x1401, pci_amd_enable_64bit_bar);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x141b, pci_amd_enable_64bit_bar);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x1571, pci_amd_enable_64bit_bar);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x15b1, pci_amd_enable_64bit_bar);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x1601, pci_amd_enable_64bit_bar);
#endif
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 6a151ce70e86..d87ac96e37ed 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -196,6 +196,9 @@ static pgd_t *efi_pgd;
* because we want to avoid inserting EFI region mappings (EFI_VA_END
* to EFI_VA_START) into the standard kernel page tables. Everything
* else can be shared, see efi_sync_low_kernel_mappings().
+ *
+ * We don't want the pgd on the pgd_list and cannot use pgd_alloc() for the
+ * allocation.
*/
int __init efi_alloc_page_tables(void)
{
@@ -208,7 +211,7 @@ int __init efi_alloc_page_tables(void)
return 0;
gfp_mask = GFP_KERNEL | __GFP_ZERO;
- efi_pgd = (pgd_t *)__get_free_page(gfp_mask);
+ efi_pgd = (pgd_t *)__get_free_pages(gfp_mask, PGD_ALLOCATION_ORDER);
if (!efi_pgd)
return -ENOMEM;
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index f44c0bc95aa2..8538a6723171 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -299,7 +299,7 @@ static void bau_process_message(struct msg_desc *mdp, struct bau_control *bcp,
local_flush_tlb();
stat->d_alltlb++;
} else {
- __flush_tlb_one(msg->address);
+ __flush_tlb_single(msg->address);
stat->d_onetlb++;
}
stat->d_requestee++;
diff --git a/arch/x86/platform/uv/uv_irq.c b/arch/x86/platform/uv/uv_irq.c
index 5f6fd860820a..e4cb9f4cde8a 100644
--- a/arch/x86/platform/uv/uv_irq.c
+++ b/arch/x86/platform/uv/uv_irq.c
@@ -128,7 +128,7 @@ static void uv_domain_free(struct irq_domain *domain, unsigned int virq,
* on the specified blade to allow the sending of MSIs to the specified CPU.
*/
static int uv_domain_activate(struct irq_domain *domain,
- struct irq_data *irq_data, bool early)
+ struct irq_data *irq_data, bool reserve)
{
uv_program_mmr(irqd_cfg(irq_data), irq_data->chip_data);
return 0;
diff --git a/arch/x86/platform/uv/uv_nmi.c b/arch/x86/platform/uv/uv_nmi.c
index c34bd8233f7c..5f64f30873e2 100644
--- a/arch/x86/platform/uv/uv_nmi.c
+++ b/arch/x86/platform/uv/uv_nmi.c
@@ -905,7 +905,7 @@ static inline void uv_call_kgdb_kdb(int cpu, struct pt_regs *regs, int master)
/*
* UV NMI handler
*/
-int uv_handle_nmi(unsigned int reason, struct pt_regs *regs)
+static int uv_handle_nmi(unsigned int reason, struct pt_regs *regs)
{
struct uv_hub_nmi_s *hub_nmi = uv_hub_nmi;
int cpu = smp_processor_id();
@@ -1013,7 +1013,7 @@ void uv_nmi_init(void)
}
/* Setup HUB NMI info */
-void __init uv_nmi_setup_common(bool hubbed)
+static void __init uv_nmi_setup_common(bool hubbed)
{
int size = sizeof(void *) * (1 << NODES_SHIFT);
int cpu;
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 84fcfde53f8f..a7d966964c6f 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -82,12 +82,8 @@ static void __save_processor_state(struct saved_context *ctxt)
/*
* descriptor tables
*/
-#ifdef CONFIG_X86_32
store_idt(&ctxt->idt);
-#else
-/* CONFIG_X86_64 */
- store_idt((struct desc_ptr *)&ctxt->idt_limit);
-#endif
+
/*
* We save it here, but restore it only in the hibernate case.
* For ACPI S3 resume, this is loaded via 'early_gdt_desc' in 64-bit
@@ -103,22 +99,18 @@ static void __save_processor_state(struct saved_context *ctxt)
/*
* segment registers
*/
-#ifdef CONFIG_X86_32
- savesegment(es, ctxt->es);
- savesegment(fs, ctxt->fs);
+#ifdef CONFIG_X86_32_LAZY_GS
savesegment(gs, ctxt->gs);
- savesegment(ss, ctxt->ss);
-#else
-/* CONFIG_X86_64 */
- asm volatile ("movw %%ds, %0" : "=m" (ctxt->ds));
- asm volatile ("movw %%es, %0" : "=m" (ctxt->es));
- asm volatile ("movw %%fs, %0" : "=m" (ctxt->fs));
- asm volatile ("movw %%gs, %0" : "=m" (ctxt->gs));
- asm volatile ("movw %%ss, %0" : "=m" (ctxt->ss));
+#endif
+#ifdef CONFIG_X86_64
+ savesegment(gs, ctxt->gs);
+ savesegment(fs, ctxt->fs);
+ savesegment(ds, ctxt->ds);
+ savesegment(es, ctxt->es);
rdmsrl(MSR_FS_BASE, ctxt->fs_base);
- rdmsrl(MSR_GS_BASE, ctxt->gs_base);
- rdmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base);
+ rdmsrl(MSR_GS_BASE, ctxt->kernelmode_gs_base);
+ rdmsrl(MSR_KERNEL_GS_BASE, ctxt->usermode_gs_base);
mtrr_save_fixed_ranges(NULL);
rdmsrl(MSR_EFER, ctxt->efer);
@@ -160,17 +152,19 @@ static void do_fpu_end(void)
static void fix_processor_context(void)
{
int cpu = smp_processor_id();
- struct tss_struct *t = &per_cpu(cpu_tss, cpu);
#ifdef CONFIG_X86_64
struct desc_struct *desc = get_cpu_gdt_rw(cpu);
tss_desc tss;
#endif
- set_tss_desc(cpu, t); /*
- * This just modifies memory; should not be
- * necessary. But... This is necessary, because
- * 386 hardware has concept of busy TSS or some
- * similar stupidity.
- */
+
+ /*
+ * We need to reload TR, which requires that we change the
+ * GDT entry to indicate "available" first.
+ *
+ * XXX: This could probably all be replaced by a call to
+ * force_reload_TR().
+ */
+ set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
#ifdef CONFIG_X86_64
memcpy(&tss, &desc[GDT_ENTRY_TSS], sizeof(tss_desc));
@@ -178,6 +172,9 @@ static void fix_processor_context(void)
write_gdt_entry(desc, GDT_ENTRY_TSS, &tss, DESC_TSS);
syscall_init(); /* This sets MSR_*STAR and related */
+#else
+ if (boot_cpu_has(X86_FEATURE_SEP))
+ enable_sep_cpu();
#endif
load_TR_desc(); /* This does ltr */
load_mm_ldt(current->active_mm); /* This does lldt */
@@ -190,9 +187,12 @@ static void fix_processor_context(void)
}
/**
- * __restore_processor_state - restore the contents of CPU registers saved
- * by __save_processor_state()
- * @ctxt - structure to load the registers contents from
+ * __restore_processor_state - restore the contents of CPU registers saved
+ * by __save_processor_state()
+ * @ctxt - structure to load the registers contents from
+ *
+ * The asm code that gets us here will have restored a usable GDT, although
+ * it will be pointing to the wrong alias.
*/
static void notrace __restore_processor_state(struct saved_context *ctxt)
{
@@ -215,46 +215,52 @@ static void notrace __restore_processor_state(struct saved_context *ctxt)
write_cr2(ctxt->cr2);
write_cr0(ctxt->cr0);
+ /* Restore the IDT. */
+ load_idt(&ctxt->idt);
+
/*
- * now restore the descriptor tables to their proper values
- * ltr is done i fix_processor_context().
+ * Just in case the asm code got us here with the SS, DS, or ES
+ * out of sync with the GDT, update them.
*/
-#ifdef CONFIG_X86_32
- load_idt(&ctxt->idt);
+ loadsegment(ss, __KERNEL_DS);
+ loadsegment(ds, __USER_DS);
+ loadsegment(es, __USER_DS);
+
+ /*
+ * Restore percpu access. Percpu access can happen in exception
+ * handlers or in complicated helpers like load_gs_index().
+ */
+#ifdef CONFIG_X86_64
+ wrmsrl(MSR_GS_BASE, ctxt->kernelmode_gs_base);
#else
-/* CONFIG_X86_64 */
- load_idt((const struct desc_ptr *)&ctxt->idt_limit);
+ loadsegment(fs, __KERNEL_PERCPU);
+ loadsegment(gs, __KERNEL_STACK_CANARY);
#endif
+ /* Restore the TSS, RO GDT, LDT, and usermode-relevant MSRs. */
+ fix_processor_context();
+
/*
- * segment registers
+ * Now that we have descriptor tables fully restored and working
+ * exception handling, restore the usermode segments.
*/
-#ifdef CONFIG_X86_32
+#ifdef CONFIG_X86_64
+ loadsegment(ds, ctxt->es);
loadsegment(es, ctxt->es);
loadsegment(fs, ctxt->fs);
- loadsegment(gs, ctxt->gs);
- loadsegment(ss, ctxt->ss);
+ load_gs_index(ctxt->gs);
/*
- * sysenter MSRs
+ * Restore FSBASE and GSBASE after restoring the selectors, since
+ * restoring the selectors clobbers the bases. Keep in mind
+ * that MSR_KERNEL_GS_BASE is horribly misnamed.
*/
- if (boot_cpu_has(X86_FEATURE_SEP))
- enable_sep_cpu();
-#else
-/* CONFIG_X86_64 */
- asm volatile ("movw %0, %%ds" :: "r" (ctxt->ds));
- asm volatile ("movw %0, %%es" :: "r" (ctxt->es));
- asm volatile ("movw %0, %%fs" :: "r" (ctxt->fs));
- load_gs_index(ctxt->gs);
- asm volatile ("movw %0, %%ss" :: "r" (ctxt->ss));
-
wrmsrl(MSR_FS_BASE, ctxt->fs_base);
- wrmsrl(MSR_GS_BASE, ctxt->gs_base);
- wrmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base);
+ wrmsrl(MSR_KERNEL_GS_BASE, ctxt->usermode_gs_base);
+#elif defined(CONFIG_X86_32_LAZY_GS)
+ loadsegment(gs, ctxt->gs);
#endif
- fix_processor_context();
-
do_fpu_end();
tsc_verify_tsc_adjust(true);
x86_platform.restore_sched_clock_state();
diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c
index 6b830d4cb4c8..de58533d3664 100644
--- a/arch/x86/xen/apic.c
+++ b/arch/x86/xen/apic.c
@@ -57,7 +57,7 @@ static u32 xen_apic_read(u32 reg)
return 0;
if (reg == APIC_LVR)
- return 0x10;
+ return 0x14;
#ifdef CONFIG_X86_32
if (reg == APIC_LDR)
return SET_APIC_LOGICAL_ID(1UL << smp_processor_id());
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index d669e9d89001..c9081c6671f0 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1,8 +1,12 @@
+#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
+#include <linux/bootmem.h>
+#endif
#include <linux/cpu.h>
#include <linux/kexec.h>
#include <xen/features.h>
#include <xen/page.h>
+#include <xen/interface/memory.h>
#include <asm/xen/hypercall.h>
#include <asm/xen/hypervisor.h>
@@ -331,3 +335,80 @@ void xen_arch_unregister_cpu(int num)
}
EXPORT_SYMBOL(xen_arch_unregister_cpu);
#endif
+
+#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
+void __init arch_xen_balloon_init(struct resource *hostmem_resource)
+{
+ struct xen_memory_map memmap;
+ int rc;
+ unsigned int i, last_guest_ram;
+ phys_addr_t max_addr = PFN_PHYS(max_pfn);
+ struct e820_table *xen_e820_table;
+ const struct e820_entry *entry;
+ struct resource *res;
+
+ if (!xen_initial_domain())
+ return;
+
+ xen_e820_table = kmalloc(sizeof(*xen_e820_table), GFP_KERNEL);
+ if (!xen_e820_table)
+ return;
+
+ memmap.nr_entries = ARRAY_SIZE(xen_e820_table->entries);
+ set_xen_guest_handle(memmap.buffer, xen_e820_table->entries);
+ rc = HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap);
+ if (rc) {
+ pr_warn("%s: Can't read host e820 (%d)\n", __func__, rc);
+ goto out;
+ }
+
+ last_guest_ram = 0;
+ for (i = 0; i < memmap.nr_entries; i++) {
+ if (xen_e820_table->entries[i].addr >= max_addr)
+ break;
+ if (xen_e820_table->entries[i].type == E820_TYPE_RAM)
+ last_guest_ram = i;
+ }
+
+ entry = &xen_e820_table->entries[last_guest_ram];
+ if (max_addr >= entry->addr + entry->size)
+ goto out; /* No unallocated host RAM. */
+
+ hostmem_resource->start = max_addr;
+ hostmem_resource->end = entry->addr + entry->size;
+
+ /*
+ * Mark non-RAM regions between the end of dom0 RAM and end of host RAM
+ * as unavailable. The rest of that region can be used for hotplug-based
+ * ballooning.
+ */
+ for (; i < memmap.nr_entries; i++) {
+ entry = &xen_e820_table->entries[i];
+
+ if (entry->type == E820_TYPE_RAM)
+ continue;
+
+ if (entry->addr >= hostmem_resource->end)
+ break;
+
+ res = kzalloc(sizeof(*res), GFP_KERNEL);
+ if (!res)
+ goto out;
+
+ res->name = "Unavailable host RAM";
+ res->start = entry->addr;
+ res->end = (entry->addr + entry->size < hostmem_resource->end) ?
+ entry->addr + entry->size : hostmem_resource->end;
+ rc = insert_resource(hostmem_resource, res);
+ if (rc) {
+ pr_warn("%s: Can't insert [%llx - %llx) (%d)\n",
+ __func__, res->start, res->end, rc);
+ kfree(res);
+ goto out;
+ }
+ }
+
+ out:
+ kfree(xen_e820_table);
+}
+#endif /* CONFIG_XEN_BALLOON_MEMORY_HOTPLUG */
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 5b2b3f3f6531..c047f42552e1 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -88,6 +88,8 @@
#include "multicalls.h"
#include "pmu.h"
+#include "../kernel/cpu/cpu.h" /* get_cpu_cap() */
+
void *xen_initial_gdt;
static int xen_cpu_up_prepare_pv(unsigned int cpu);
@@ -622,7 +624,7 @@ static struct trap_array_entry trap_array[] = {
{ simd_coprocessor_error, xen_simd_coprocessor_error, false },
};
-static bool get_trap_addr(void **addr, unsigned int ist)
+static bool __ref get_trap_addr(void **addr, unsigned int ist)
{
unsigned int nr;
bool ist_okay = false;
@@ -644,6 +646,14 @@ static bool get_trap_addr(void **addr, unsigned int ist)
}
}
+ if (nr == ARRAY_SIZE(trap_array) &&
+ *addr >= (void *)early_idt_handler_array[0] &&
+ *addr < (void *)early_idt_handler_array[NUM_EXCEPTION_VECTORS]) {
+ nr = (*addr - (void *)early_idt_handler_array[0]) /
+ EARLY_IDT_HANDLER_SIZE;
+ *addr = (void *)xen_early_idt_handler_array[nr];
+ }
+
if (WARN_ON(ist != 0 && !ist_okay))
return false;
@@ -818,7 +828,7 @@ static void xen_load_sp0(unsigned long sp0)
mcs = xen_mc_entry(0);
MULTI_stack_switch(mcs.mc, __KERNEL_DS, sp0);
xen_mc_issue(PARAVIRT_LAZY_CPU);
- this_cpu_write(cpu_tss.x86_tss.sp0, sp0);
+ this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0);
}
void xen_set_iopl_mask(unsigned mask)
@@ -1250,6 +1260,7 @@ asmlinkage __visible void __init xen_start_kernel(void)
__userpte_alloc_gfp &= ~__GFP_HIGHMEM;
/* Work out if we support NX */
+ get_cpu_cap(&boot_cpu_data);
x86_configure_nx();
/* Get mfn list */
@@ -1262,6 +1273,21 @@ asmlinkage __visible void __init xen_start_kernel(void)
xen_setup_gdt(0);
xen_init_irq_ops();
+
+ /* Let's presume PV guests always boot on vCPU with id 0. */
+ per_cpu(xen_vcpu_id, 0) = 0;
+
+ /*
+ * Setup xen_vcpu early because idt_setup_early_handler needs it for
+ * local_irq_disable(), irqs_disabled().
+ *
+ * Don't do the full vcpu_info placement stuff until we have
+ * the cpu_possible_mask and a non-dummy shared_info.
+ */
+ xen_vcpu_info_reset(0);
+
+ idt_setup_early_handler();
+
xen_init_capabilities();
#ifdef CONFIG_X86_LOCAL_APIC
@@ -1295,18 +1321,6 @@ asmlinkage __visible void __init xen_start_kernel(void)
*/
acpi_numa = -1;
#endif
- /* Let's presume PV guests always boot on vCPU with id 0. */
- per_cpu(xen_vcpu_id, 0) = 0;
-
- /*
- * Setup xen_vcpu early because start_kernel needs it for
- * local_irq_disable(), irqs_disabled().
- *
- * Don't do the full vcpu_info placement stuff until we have
- * the cpu_possible_mask and a non-dummy shared_info.
- */
- xen_vcpu_info_reset(0);
-
WARN_ON(xen_cpuhp_setup(xen_cpu_up_prepare_pv, xen_cpu_dead_pv));
local_irq_disable();
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index fc048ec686e7..4d62c071b166 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -1902,6 +1902,18 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
/* Graft it onto L4[511][510] */
copy_page(level2_kernel_pgt, l2);
+ /*
+ * Zap execute permission from the ident map. Due to the sharing of
+ * L1 entries we need to do this in the L2.
+ */
+ if (__supported_pte_mask & _PAGE_NX) {
+ for (i = 0; i < PTRS_PER_PMD; ++i) {
+ if (pmd_none(level2_ident_pgt[i]))
+ continue;
+ level2_ident_pgt[i] = pmd_set_flags(level2_ident_pgt[i], _PAGE_NX);
+ }
+ }
+
/* Copy the initial P->M table mappings if necessary. */
i = pgd_index(xen_start_info->mfn_list);
if (i && i < pgd_index(__START_KERNEL_map))
@@ -2261,7 +2273,6 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
switch (idx) {
case FIX_BTMAP_END ... FIX_BTMAP_BEGIN:
- case FIX_RO_IDT:
#ifdef CONFIG_X86_32
case FIX_WP_TEST:
# ifdef CONFIG_HIGHMEM
@@ -2272,7 +2283,6 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
#endif
case FIX_TEXT_POKE0:
case FIX_TEXT_POKE1:
- case FIX_GDT_REMAP_BEGIN ... FIX_GDT_REMAP_END:
/* All local page mappings */
pte = pfn_pte(phys, prot);
break;
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index c114ca767b3b..6e0d2086eacb 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -808,7 +808,6 @@ char * __init xen_memory_setup(void)
addr = xen_e820_table.entries[0].addr;
size = xen_e820_table.entries[0].size;
while (i < xen_e820_table.nr_entries) {
- bool discard = false;
chunk_size = size;
type = xen_e820_table.entries[i].type;
@@ -824,11 +823,10 @@ char * __init xen_memory_setup(void)
xen_add_extra_mem(pfn_s, n_pfns);
xen_max_p2m_pfn = pfn_s + n_pfns;
} else
- discard = true;
+ type = E820_TYPE_UNUSABLE;
}
- if (!discard)
- xen_align_and_add_e820_region(addr, chunk_size, type);
+ xen_align_and_add_e820_region(addr, chunk_size, type);
addr += chunk_size;
size -= chunk_size;
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
index 8a10c9a9e2b5..417b339e5c8e 100644
--- a/arch/x86/xen/xen-asm_64.S
+++ b/arch/x86/xen/xen-asm_64.S
@@ -15,6 +15,7 @@
#include <xen/interface/xen.h>
+#include <linux/init.h>
#include <linux/linkage.h>
.macro xen_pv_trap name
@@ -54,6 +55,19 @@ xen_pv_trap entry_INT80_compat
#endif
xen_pv_trap hypervisor_callback
+ __INIT
+ENTRY(xen_early_idt_handler_array)
+ i = 0
+ .rept NUM_EXCEPTION_VECTORS
+ pop %rcx
+ pop %r11
+ jmp early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE
+ i = i + 1
+ .fill xen_early_idt_handler_array + i*XEN_EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc
+ .endr
+END(xen_early_idt_handler_array)
+ __FINIT
+
hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32
/*
* Xen64 iret frame:
diff --git a/arch/xtensa/include/uapi/asm/Kbuild b/arch/xtensa/include/uapi/asm/Kbuild
index a5bcdfb890f1..837d4dd76785 100644
--- a/arch/xtensa/include/uapi/asm/Kbuild
+++ b/arch/xtensa/include/uapi/asm/Kbuild
@@ -2,6 +2,7 @@
include include/uapi/asm-generic/Kbuild.asm
generic-y += bitsperlong.h
+generic-y += bpf_perf_event.h
generic-y += errno.h
generic-y += fcntl.h
generic-y += ioctl.h